From 550e50cfecd22675cbb9e58100fd8dc49f30bc7e Mon Sep 17 00:00:00 2001 From: kensmith Date: Wed, 12 Aug 2009 07:37:18 +0000 Subject: Prepare for 8.0 package set, adjust for 8-stable, acknowledge 9-current is coming. Approved by: re (implicit) --- usr.sbin/pkg_install/add/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/pkg_install/add/main.c b/usr.sbin/pkg_install/add/main.c index b2d511e..eaed0c0 100644 --- a/usr.sbin/pkg_install/add/main.c +++ b/usr.sbin/pkg_install/add/main.c @@ -82,13 +82,15 @@ struct { { 700000, 700099, "/packages-7.0-release" }, { 701000, 701099, "/packages-7.1-release" }, { 702000, 702099, "/packages-7.2-release" }, + { 800000, 800499, "/packages-8.0-release" }, { 300000, 399000, "/packages-3-stable" }, { 400000, 499000, "/packages-4-stable" }, { 502100, 502128, "/packages-5-current" }, { 503100, 599000, "/packages-5-stable" }, { 600100, 699000, "/packages-6-stable" }, { 700100, 799000, "/packages-7-stable" }, - { 800000, 899000, "/packages-8-current" }, + { 800500, 899000, "/packages-8-stable" }, + { 900000, 999000, "/packages-9-current" }, { 0, 9999999, "/packages-current" }, { 0, 0, NULL } }; -- cgit v1.1 From be6265aac0e0644a7ccc8bcfd1d717175d8dbd16 Mon Sep 17 00:00:00 2001 From: cperciva Date: Wed, 12 Aug 2009 12:00:22 +0000 Subject: Merge r196128 to stable/8. Approved by: re (rwatson) --- usr.sbin/ntp/scripts/mkver | 2 -- 1 file changed, 2 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/ntp/scripts/mkver b/usr.sbin/ntp/scripts/mkver index 02c6924..a8f5175 100755 --- a/usr.sbin/ntp/scripts/mkver +++ b/usr.sbin/ntp/scripts/mkver @@ -23,8 +23,6 @@ case "" in *) ConfStr="${ConfStr}-r" ;; esac -ConfStr="$ConfStr `LC_ALL=C date`" - if [ ! -f .version ]; then echo 0 > .version fi -- cgit v1.1 From 1408c3c090097457de28cb0b613c2b61c65336a4 Mon Sep 17 00:00:00 2001 From: bz Date: Wed, 12 Aug 2009 12:31:29 +0000 Subject: MFC r196137: Do not truncate IPv6 addresses when printing them in the jls -av 7.x multi-IP jail backward compat output. Reported by: ed Tested by: ed Reviewed by: rwatson Approved by: re --- usr.sbin/jls/jls.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/jls/jls.c b/usr.sbin/jls/jls.c index 8c8b981..0661ee3 100644 --- a/usr.sbin/jls/jls.c +++ b/usr.sbin/jls/jls.c @@ -359,7 +359,7 @@ print_jail(int pflags, int jflags) ipbuf, sizeof(ipbuf)) == NULL) err(1, "inet_ntop"); else - printf("%6s %-15.15s\n", "", ipbuf); + printf("%6s %s\n", "", ipbuf); } } else if (pflags & PRINT_DEFAULT) printf("%6d %-15.15s %-29.29s %.74s\n", -- cgit v1.1 From 8922ad2ae8c61c16d4debd9b64cca88901613810 Mon Sep 17 00:00:00 2001 From: jhb Date: Wed, 12 Aug 2009 14:40:21 +0000 Subject: MFC 196147: Fix references to the kernel distributions to use the correct names (uppercase). Approved by: re (rwatson, kib) --- usr.sbin/sysinstall/install.c | 2 +- usr.sbin/sysinstall/sysinstall.8 | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/sysinstall/install.c b/usr.sbin/sysinstall/install.c index d00da3c..a2979c9 100644 --- a/usr.sbin/sysinstall/install.c +++ b/usr.sbin/sysinstall/install.c @@ -939,7 +939,7 @@ installFixupKernel(dialogMenuItem *self, int dists) if (RunningAsInit) { /* * Install something as /boot/kernel. Prefer SMP - * over generic--this should handle the case where + * over GENERIC--this should handle the case where * both SMP and GENERIC are installed (otherwise we * select the one kernel that was installed). * diff --git a/usr.sbin/sysinstall/sysinstall.8 b/usr.sbin/sysinstall/sysinstall.8 index 7cb55aa..46a14fc 100644 --- a/usr.sbin/sysinstall/sysinstall.8 +++ b/usr.sbin/sysinstall/sysinstall.8 @@ -411,9 +411,9 @@ Possible distribution values are: .Bl -tag -width indentxx .It Li base The base binary distribution. -.It Li generic +.It Li GENERIC The GENERIC kernel. -.It Li smp +.It Li SMP A kernel suitable for multiple processor systems. .It Li doc Miscellaneous documentation -- cgit v1.1 From d1ee22ed295e48e8afdbde02ad51f47ef26417cd Mon Sep 17 00:00:00 2001 From: cperciva Date: Fri, 14 Aug 2009 13:26:50 +0000 Subject: Merge r196213 to stable/8. Approved by: re (rwatson) --- usr.sbin/sysinstall/devices.c | 1 + 1 file changed, 1 insertion(+) (limited to 'usr.sbin') diff --git a/usr.sbin/sysinstall/devices.c b/usr.sbin/sysinstall/devices.c index 3ba8732..b95fdc5 100644 --- a/usr.sbin/sysinstall/devices.c +++ b/usr.sbin/sysinstall/devices.c @@ -80,6 +80,7 @@ static struct _devname { CDROM("acd%d", "ATAPI/IDE CDROM", 4), DISK("da%d", "SCSI disk device", 16), DISK("ad%d", "ATA/IDE disk device", 16), + DISK("ada%d", "SATA disk device", 16), DISK("ar%d", "ATA/IDE RAID device", 16), DISK("afd%d", "ATAPI/IDE floppy device", 4), DISK("mlxd%d", "Mylex RAID disk", 4), -- cgit v1.1 From 2544a74dea02416ec3a2f0a3e93e1b30c7270c27 Mon Sep 17 00:00:00 2001 From: remko Date: Fri, 14 Aug 2009 19:30:59 +0000 Subject: Remove bogus char cast. PR: 118014 Submitted by: Gardner Bell Approved by: re (rwatson), imp (mentor, implicit) MFC after: immediate Approved by: re (rwatson), imp (mentor, implicit) --- usr.sbin/arp/arp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/arp/arp.c b/usr.sbin/arp/arp.c index e585ba0..8a3410f 100644 --- a/usr.sbin/arp/arp.c +++ b/usr.sbin/arp/arp.c @@ -120,7 +120,7 @@ main(int argc, char *argv[]) int aflag = 0; /* do it for all entries */ while ((ch = getopt(argc, argv, "andfsSi:")) != -1) - switch((char)ch) { + switch(ch) { case 'a': aflag = 1; break; -- cgit v1.1 From 2a0da3095ae2626f6eb9a8260dfa86c7335b18ff Mon Sep 17 00:00:00 2001 From: stas Date: Sat, 15 Aug 2009 15:12:46 +0000 Subject: - Merge r196244: Avoid overflowing the swap size counters in human-readable mode by introducing the new CONVERT_BLOCKS macro which operates on sizes already converted to number of blocks. With this macro it is not longer needed to perform needless multiplica Approved by: re (kib) --- usr.sbin/pstat/pstat.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/pstat/pstat.c b/usr.sbin/pstat/pstat.c index bf297f5..5435166 100644 --- a/usr.sbin/pstat/pstat.c +++ b/usr.sbin/pstat/pstat.c @@ -460,6 +460,7 @@ getfiles(struct xfile **abuf, size_t *alen) */ #define CONVERT(v) ((int64_t)(v) * pagesize / blocksize) +#define CONVERT_BLOCKS(v) ((int64_t)(v) * pagesize) static struct kvm_swap swtot; static int nswdev; @@ -492,10 +493,10 @@ print_swap_line(const char *swdevname, intmax_t nblks, intmax_t bused, printf("%-15s %*jd ", swdevname, hlen, CONVERT(nblks)); if (humanflag) { humanize_number(usedbuf, sizeof(usedbuf), - CONVERT(blocksize * bused), "", + CONVERT_BLOCKS(bused), "", HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL); humanize_number(availbuf, sizeof(availbuf), - CONVERT(blocksize * bavail), "", + CONVERT_BLOCKS(bavail), "", HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL); printf("%8s %8s %5.0f%%\n", usedbuf, availbuf, bpercent); } else { -- cgit v1.1 From 5124d2867a451ea52d826b91cc2924d3e0a64f75 Mon Sep 17 00:00:00 2001 From: keramida Date: Sat, 15 Aug 2009 18:03:34 +0000 Subject: MFC 196254 - iostat: add a bit of space between tty in/out columns The columns for tty input and output may bump against each other if the tty output needs more than 5 columns. Add a bit of space that pushes everything 1 column to the right, but also avoids the problem. Approved by: re (rwatson) --- usr.sbin/iostat/iostat.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/iostat/iostat.c b/usr.sbin/iostat/iostat.c index 65a0386..9831f84 100644 --- a/usr.sbin/iostat/iostat.c +++ b/usr.sbin/iostat/iostat.c @@ -586,7 +586,7 @@ main(int argc, char **argv) } if (xflag == 0 && Tflag > 0) - printf("%4.0Lf%5.0Lf", cur.tk_nin / etime, + printf("%4.0Lf %5.0Lf", cur.tk_nin / etime, cur.tk_nout / etime); devstats(hflag, etime, havelast); @@ -674,7 +674,7 @@ phdr(void) return; if (Tflag > 0) - (void)printf(" tty"); + (void)printf(" tty"); for (i = 0, printed=0;(i < num_devices) && (printed < maxshowdevs);i++){ int di; if ((dev_select[i].selected != 0) @@ -696,7 +696,7 @@ phdr(void) (void)printf("\n"); if (Tflag > 0) - (void)printf(" tin tout"); + (void)printf(" tin tout"); for (i=0, printed = 0;(i < num_devices) && (printed < maxshowdevs);i++){ if ((dev_select[i].selected != 0) @@ -741,7 +741,7 @@ devstats(int perf_select, long double etime, int havelast) if (xflag > 0) { printf(" extended device statistics "); if (Tflag > 0) - printf(" tty "); + printf(" tty "); if (Cflag > 0) printf(" cpu "); printf("\n"); @@ -754,7 +754,7 @@ devstats(int perf_select, long double etime, int havelast) "device r/i w/i kr/i kw/i wait svc_t %%b " ); if (Tflag > 0) - printf("tin tout "); + printf("tin tout "); if (Cflag > 0) printf("us ni sy in id "); printf("\n"); @@ -895,7 +895,7 @@ devstats(int perf_select, long double etime, int havelast) */ printf("%52s",""); if (Tflag > 0) - printf("%4.0Lf%5.0Lf", cur.tk_nin / etime, + printf("%4.0Lf %5.0Lf", cur.tk_nin / etime, cur.tk_nout / etime); if (Cflag > 0) cpustats(); -- cgit v1.1 From 7d3db4e4d669f46f838cd1d3a97a8cb279552f8f Mon Sep 17 00:00:00 2001 From: rink Date: Sun, 16 Aug 2009 10:25:58 +0000 Subject: MFC r196272 Prevent sysinstall from needlessly waiting for confirmation when using an USB device in non-interactive mode. If there are no USB devices, sysinstall gives an error messages, and if there is >1, it'll ask which one is to be used. This change allows a non-interactive install from USB media to succeed without any user interaction if there is exactly one USB disk device in the system it can use. Submitted by: Daniel O'Connor < doconnorat gsoft dot com dot au > Reviewed by: randi Approved by: re (rwatson) --- usr.sbin/sysinstall/media.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/sysinstall/media.c b/usr.sbin/sysinstall/media.c index e50b47c..b325323 100644 --- a/usr.sbin/sysinstall/media.c +++ b/usr.sbin/sysinstall/media.c @@ -262,7 +262,8 @@ mediaSetUSB(dialogMenuItem *self) mediaDevice = devs[0]; if (mediaDevice) mediaDevice->private = NULL; - msgConfirm("Using USB device: %s", mediaDevice->name); + if (!variable_get(VAR_NONINTERACTIVE)) + msgConfirm("Using USB device: %s", mediaDevice->name); return (mediaDevice ? DITEM_LEAVE_MENU : DITEM_FAILURE); } -- cgit v1.1 From ff568ea9d320517bb41adcbd893ed5e500cbf6ad Mon Sep 17 00:00:00 2001 From: scottl Date: Mon, 17 Aug 2009 07:25:12 +0000 Subject: Merge mfiutil Approved by: re --- usr.sbin/Makefile | 1 + usr.sbin/mfiutil/Makefile | 18 + usr.sbin/mfiutil/README | 104 ++++ usr.sbin/mfiutil/mfi_cmd.c | 351 +++++++++++++ usr.sbin/mfiutil/mfi_config.c | 1164 +++++++++++++++++++++++++++++++++++++++++ usr.sbin/mfiutil/mfi_drive.c | 625 ++++++++++++++++++++++ usr.sbin/mfiutil/mfi_evt.c | 667 +++++++++++++++++++++++ usr.sbin/mfiutil/mfi_flash.c | 199 +++++++ usr.sbin/mfiutil/mfi_patrol.c | 305 +++++++++++ usr.sbin/mfiutil/mfi_show.c | 560 ++++++++++++++++++++ usr.sbin/mfiutil/mfi_volume.c | 412 +++++++++++++++ usr.sbin/mfiutil/mfiutil.1 | 574 ++++++++++++++++++++ usr.sbin/mfiutil/mfiutil.8 | 566 ++++++++++++++++++++ usr.sbin/mfiutil/mfiutil.c | 134 +++++ usr.sbin/mfiutil/mfiutil.h | 147 ++++++ 15 files changed, 5827 insertions(+) create mode 100644 usr.sbin/mfiutil/Makefile create mode 100644 usr.sbin/mfiutil/README create mode 100644 usr.sbin/mfiutil/mfi_cmd.c create mode 100644 usr.sbin/mfiutil/mfi_config.c create mode 100644 usr.sbin/mfiutil/mfi_drive.c create mode 100644 usr.sbin/mfiutil/mfi_evt.c create mode 100644 usr.sbin/mfiutil/mfi_flash.c create mode 100644 usr.sbin/mfiutil/mfi_patrol.c create mode 100644 usr.sbin/mfiutil/mfi_show.c create mode 100644 usr.sbin/mfiutil/mfi_volume.c create mode 100644 usr.sbin/mfiutil/mfiutil.1 create mode 100644 usr.sbin/mfiutil/mfiutil.8 create mode 100644 usr.sbin/mfiutil/mfiutil.c create mode 100644 usr.sbin/mfiutil/mfiutil.h (limited to 'usr.sbin') diff --git a/usr.sbin/Makefile b/usr.sbin/Makefile index f57ff3a..37d24f8 100644 --- a/usr.sbin/Makefile +++ b/usr.sbin/Makefile @@ -94,6 +94,7 @@ SUBDIR= ${_ac} \ manctl \ memcontrol \ mergemaster \ + mfiutil \ mixer \ ${_mld6query} \ mlxcontrol \ diff --git a/usr.sbin/mfiutil/Makefile b/usr.sbin/mfiutil/Makefile new file mode 100644 index 0000000..03ded03 --- /dev/null +++ b/usr.sbin/mfiutil/Makefile @@ -0,0 +1,18 @@ +# $FreeBSD$ +PROG= mfiutil + +SRCS= mfiutil.c mfi_cmd.c mfi_config.c mfi_drive.c mfi_evt.c mfi_flash.c \ + mfi_patrol.c mfi_show.c mfi_volume.c +MAN8= mfiutil.8 + +CFLAGS+= -fno-builtin-strftime +WARNS?=3 + +LDADD= -lutil + +# Here be dragons +.ifdef DEBUG +CFLAGS+= -DDEBUG +.endif + +.include diff --git a/usr.sbin/mfiutil/README b/usr.sbin/mfiutil/README new file mode 100644 index 0000000..15e16bb --- /dev/null +++ b/usr.sbin/mfiutil/README @@ -0,0 +1,104 @@ +# $FreeBSD$ + +This package includes a mfiutil command for administering mfi(4) controllers +on FreeBSD. + +Version 1.0.13 + * Cleaned up warnings in preparation for integration with FreeBSD + +Version 1.0.12 + * Add 'drive clear' command to wipe drives with all 0x00 characters + +Version 1.0.11 + * Display serial number for drives + * Display location info for drives with 'show config' + +Version 1.0.10 + * Display min and max stripe size supported by adapters. + * Added support for examining the controller event log. + +Version 1.0.9 + * Display stripe size for volumes. + * Added support for setting the stripe size for new volumes. + * Fix a regression in 1.0.8 that broke creation of RAID-5 and RAID-50 + arrays. + +Version 1.0.8 + * Added support for RAID-60 arrays. + * Added 'flash' command to support firmware flashing. + +Version 1.0.7 + * Renamed 'clear config' to 'clear, 'create volume' to 'create', + 'delete volume' to 'delete', 'create spare' to 'add', and + 'delete spare' to 'remove'. The old names still work. + * Added support for RAID-6 arrays. + +Version 1.0.6 + * Added 'show patrol', 'patrol', 'start patrol', and 'stop patrol' + commands to manage patrol reads. + +Version 1.0.5 + * Added 'create volume' and 'delete volume' commands to manage volumes. + * Added 'clear config' command to clear entire configuration. + * Added more detailed error reporting based on firmware status codes. + * Renamed 'progress' command to 'drive progress'. + * Added 'volume progress' command to display progress of volume-level + activites such as background inits. + * Fixed 'create spare' to properly add global spares. + +Version 1.0.4 + * Added 'create spare' and 'delete spare' commands to manage hot spares. + * Added 'good' command to mark unconfigured bad drives as good. + * Display more information about hot spares in 'show config' + * Allow physical drives to be specified via Exx:Syy similar to megacli + * Display onboard memory size in 'show adapter' + +Version 1.0.3 + * Added 'cache' command to manage cache settings for volumes. + * Added 'name' command to name volumes. + * Added manpage. + +Version 1.0.2 + * Added 'show adapter' and 'show battery' commands. + * Added RAID level of volumes to 'show config' and 'show volumes'. + * Added drive model info to 'show config' and 'show drives'. + * Added package firmware version to 'show firmware'. + * Added read and write cache status to 'show volumes'. + * Map volume IDs to mfidX device names on newer kernels. + +Version 1.0.1 + * Added 'show firmware' command + +Version 1.0.0 + * Initial release + +usage: mfiutil [-u unit] ... + +Commands include: + version + show adapter - display controller information + show battery - display battery information + show config - display RAID configuration + show drives - list physical drives + show firmware - list firmware images + show volumes - list logical volumes + show patrol - display patrol read status + fail - fail a physical drive + good - mark a bad physical drive as good + rebuild - mark failed drive ready for rebuild + drive progress - display status of active operations + start rebuild + abort rebuild + locate - toggle drive LED + cache [command [setting]] + name + volume progress - display status of active operations + clear - clear volume configuration + create [-v] [,[,...]] [[,[,...]] + delete + add [volume] - add a hot spare + remove - remove a hot spare + patrol [interval [start]] + start patrol - start a patrol read + stop patrol - stop a patrol read + flash diff --git a/usr.sbin/mfiutil/mfi_cmd.c b/usr.sbin/mfiutil/mfi_cmd.c new file mode 100644 index 0000000..abc8312 --- /dev/null +++ b/usr.sbin/mfiutil/mfi_cmd.c @@ -0,0 +1,351 @@ +/*- + * Copyright (c) 2008, 2009 Yahoo!, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The names of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "mfiutil.h" +#include + +static const char *mfi_status_codes[] = { + "Command completed succesfully", + "Invalid command", + "Invalid DMCD opcode", + "Invalid parameter", + "Invalid Sequence Number", + "Abort isn't possible for the requested command", + "Application 'host' code not found", + "Application in use", + "Application not initialized", + "Array index invalid", + "Array row not empty", + "Configuration resource conflict", + "Device not found", + "Drive too small", + "Flash memory allocation failed", + "Flash download already in progress", + "Flash operation failed", + "Bad flash image", + "Incomplete flash image", + "Flash not open", + "Flash not started", + "Flush failed", + "Specified application doesn't have host-resident code", + "Volume consistency check in progress", + "Volume initialization in progress", + "Volume LBA out of range", + "Maximum number of volumes are already configured", + "Volume is not OPTIMAL", + "Volume rebuild in progress", + "Volume reconstruction in progress", + "Volume RAID level is wrong for requested operation", + "Too many spares assigned", + "Scratch memory not available", + "Error writing MFC data to SEEPROM", + "Required hardware is missing", + "Item not found", + "Volume drives are not within an enclosure", + "Drive clear in progress", + "Drive type mismatch (SATA vs SAS)", + "Patrol read disabled", + "Invalid row index", + "SAS Config - Invalid action", + "SAS Config - Invalid data", + "SAS Config - Invalid page", + "SAS Config - Invalid type", + "SCSI command completed with error", + "SCSI I/O request failed", + "SCSI RESERVATION_CONFLICT", + "One or more flush operations during shutdown failed", + "Firmware time is not set", + "Wrong firmware or drive state", + "Volume is offline", + "Peer controller rejected request", + "Unable to inform peer of communication changes", + "Volume reservation already in progress", + "I2C errors were detected", + "PCI errors occurred during XOR/DMA operation", + "Diagnostics failed", + "Unable to process command as boot messages are pending", + "Foreign configuration is incomplete" +}; + +const char * +mfi_status(u_int status_code) +{ + static char buffer[16]; + + if (status_code == MFI_STAT_INVALID_STATUS) + return ("Invalid status"); + if (status_code < sizeof(mfi_status_codes) / sizeof(char *)) + return (mfi_status_codes[status_code]); + snprintf(buffer, sizeof(buffer), "Status: 0x%02x", status_code); + return (buffer); +} + +const char * +mfi_raid_level(uint8_t primary_level, uint8_t secondary_level) +{ + static char buf[16]; + + switch (primary_level) { + case DDF_RAID0: + return ("RAID-0"); + case DDF_RAID1: + if (secondary_level != 0) + return ("RAID-10"); + else + return ("RAID-1"); + case DDF_RAID1E: + return ("RAID-1E"); + case DDF_RAID3: + return ("RAID-3"); + case DDF_RAID5: + if (secondary_level != 0) + return ("RAID-50"); + else + return ("RAID-5"); + case DDF_RAID5E: + return ("RAID-5E"); + case DDF_RAID5EE: + return ("RAID-5EE"); + case DDF_RAID6: + if (secondary_level != 0) + return ("RAID-60"); + else + return ("RAID-6"); + case DDF_JBOD: + return ("JBOD"); + case DDF_CONCAT: + return ("CONCAT"); + default: + sprintf(buf, "LVL 0x%02x", primary_level); + return (buf); + } +} + +static int +mfi_query_disk(int fd, uint8_t target_id, struct mfi_query_disk *info) +{ + + bzero(info, sizeof(*info)); + info->array_id = target_id; + if (ioctl(fd, MFIIO_QUERY_DISK, info) < 0) + return (-1); + if (!info->present) { + errno = ENXIO; + return (-1); + } + return (0); +} + +const char * +mfi_volume_name(int fd, uint8_t target_id) +{ + static struct mfi_query_disk info; + static char buf[4]; + + if (mfi_query_disk(fd, target_id, &info) < 0) { + snprintf(buf, sizeof(buf), "%d", target_id); + return (buf); + } + return (info.devname); +} + +int +mfi_volume_busy(int fd, uint8_t target_id) +{ + struct mfi_query_disk info; + + /* Assume it isn't mounted if we can't get information. */ + if (mfi_query_disk(fd, target_id, &info) < 0) + return (0); + return (info.open != 0); +} + +/* + * Check if the running kernel supports changing the RAID + * configuration of the mfi controller. + */ +int +mfi_reconfig_supported(void) +{ + char mibname[64]; + size_t len; + int dummy; + + len = sizeof(dummy); + snprintf(mibname, sizeof(mibname), "dev.mfi.%d.delete_busy_volumes", + mfi_unit); + return (sysctlbyname(mibname, &dummy, &len, NULL, 0) == 0); +} + +int +mfi_lookup_volume(int fd, const char *name, uint8_t *target_id) +{ + struct mfi_query_disk info; + struct mfi_ld_list list; + char *cp; + long val; + u_int i; + + /* If it's a valid number, treat it as a raw target ID. */ + val = strtol(name, &cp, 0); + if (*cp == '\0') { + *target_id = val; + return (0); + } + + if (mfi_dcmd_command(fd, MFI_DCMD_LD_GET_LIST, &list, sizeof(list), + NULL, 0, NULL) < 0) + return (-1); + + for (i = 0; i < list.ld_count; i++) { + if (mfi_query_disk(fd, list.ld_list[i].ld.v.target_id, + &info) < 0) + continue; + if (strcmp(name, info.devname) == 0) { + *target_id = list.ld_list[i].ld.v.target_id; + return (0); + } + } + errno = EINVAL; + return (-1); +} + +int +mfi_dcmd_command(int fd, uint32_t opcode, void *buf, size_t bufsize, + uint8_t *mbox, size_t mboxlen, uint8_t *statusp) +{ + struct mfi_ioc_passthru ioc; + struct mfi_dcmd_frame *dcmd; + int r; + + if ((mbox != NULL && (mboxlen == 0 || mboxlen > MFI_MBOX_SIZE)) || + (mbox == NULL && mboxlen != 0)) { + errno = EINVAL; + return (-1); + } + + bzero(&ioc, sizeof(ioc)); + dcmd = &ioc.ioc_frame; + if (mbox) + bcopy(mbox, dcmd->mbox, mboxlen); + dcmd->header.cmd = MFI_CMD_DCMD; + dcmd->header.timeout = 0; + dcmd->header.flags = 0; + dcmd->header.data_len = bufsize; + dcmd->opcode = opcode; + + ioc.buf = buf; + ioc.buf_size = bufsize; + r = ioctl(fd, MFIIO_PASSTHRU, &ioc); + if (r < 0) + return (r); + + if (statusp != NULL) + *statusp = dcmd->header.cmd_status; + else if (dcmd->header.cmd_status != MFI_STAT_OK) { + warnx("Command failed: %s", + mfi_status(dcmd->header.cmd_status)); + errno = EIO; + return (-1); + } + return (0); +} + +int +mfi_ctrl_get_info(int fd, struct mfi_ctrl_info *info, uint8_t *statusp) +{ + + return (mfi_dcmd_command(fd, MFI_DCMD_CTRL_GETINFO, info, + sizeof(struct mfi_ctrl_info), NULL, 0, statusp)); +} + +int +mfi_open(int unit) +{ + char path[MAXPATHLEN]; + + snprintf(path, sizeof(path), "/dev/mfi%d", unit); + return (open(path, O_RDWR)); +} + +void +mfi_display_progress(const char *label, struct mfi_progress *prog) +{ + uint seconds; + + printf("%s: %.2f%% complete, after %ds", label, + (float)prog->progress * 100 / 0xffff, prog->elapsed_seconds); + if (prog->elapsed_seconds > 10) { + printf(" finished in "); + seconds = (0x10000 * (uint32_t)prog->elapsed_seconds) / + prog->progress - prog->elapsed_seconds; + if (seconds > 3600) + printf("%u:", seconds / 3600); + if (seconds > 60) { + seconds %= 3600; + printf("%02u:%02u", seconds / 60, seconds % 60); + } else + printf("%us", seconds); + } + printf("\n"); +} + +int +mfi_table_handler(struct mfiutil_command **start, struct mfiutil_command **end, + int ac, char **av) +{ + struct mfiutil_command **cmd; + + if (ac < 2) { + warnx("The %s command requires a sub-command.", av[0]); + return (EINVAL); + } + for (cmd = start; cmd < end; cmd++) { + if (strcmp((*cmd)->name, av[1]) == 0) + return ((*cmd)->handler(ac - 1, av + 1)); + } + + warnx("%s is not a valid sub-command of %s.", av[1], av[0]); + return (ENOENT); +} diff --git a/usr.sbin/mfiutil/mfi_config.c b/usr.sbin/mfiutil/mfi_config.c new file mode 100644 index 0000000..6152d21 --- /dev/null +++ b/usr.sbin/mfiutil/mfi_config.c @@ -0,0 +1,1164 @@ +/*- + * Copyright (c) 2008, 2009 Yahoo!, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The names of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#ifdef DEBUG +#include +#endif +#include +#include +#include +#ifdef DEBUG +#include +#endif +#include +#include +#include +#include +#include "mfiutil.h" + +#ifdef DEBUG +static void dump_config(int fd, struct mfi_config_data *config); +#endif + +static int add_spare(int ac, char **av); +static int remove_spare(int ac, char **av); + +#define powerof2(x) ((((x)-1)&(x))==0) + +static long +dehumanize(const char *value) +{ + char *vtp; + long iv; + + if (value == NULL) + return (0); + iv = strtoq(value, &vtp, 0); + if (vtp == value || (vtp[0] != '\0' && vtp[1] != '\0')) { + return (0); + } + switch (vtp[0]) { + case 't': case 'T': + iv *= 1024; + case 'g': case 'G': + iv *= 1024; + case 'm': case 'M': + iv *= 1024; + case 'k': case 'K': + iv *= 1024; + case '\0': + break; + default: + return (0); + } + return (iv); +} +int +mfi_config_read(int fd, struct mfi_config_data **configp) +{ + struct mfi_config_data *config; + uint32_t config_size; + + /* + * Keep fetching the config in a loop until we have a large enough + * buffer to hold the entire configuration. + */ + config = NULL; + config_size = 1024; +fetch: + config = reallocf(config, config_size); + if (config == NULL) + return (-1); + if (mfi_dcmd_command(fd, MFI_DCMD_CFG_READ, config, + config_size, NULL, 0, NULL) < 0) + return (-1); + + if (config->size > config_size) { + config_size = config->size; + goto fetch; + } + + *configp = config; + return (0); +} + +static struct mfi_array * +mfi_config_lookup_array(struct mfi_config_data *config, uint16_t array_ref) +{ + struct mfi_array *ar; + char *p; + int i; + + p = (char *)config->array; + for (i = 0; i < config->array_count; i++) { + ar = (struct mfi_array *)p; + if (ar->array_ref == array_ref) + return (ar); + p += config->array_size; + } + + return (NULL); +} + +static struct mfi_ld_config * +mfi_config_lookup_volume(struct mfi_config_data *config, uint8_t target_id) +{ + struct mfi_ld_config *ld; + char *p; + int i; + + p = (char *)config->array + config->array_count * config->array_size; + for (i = 0; i < config->log_drv_count; i++) { + ld = (struct mfi_ld_config *)p; + if (ld->properties.ld.v.target_id == target_id) + return (ld); + p += config->log_drv_size; + } + + return (NULL); +} + +static int +clear_config(int ac, char **av) +{ + struct mfi_ld_list list; + int ch, fd; + u_int i; + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + if (!mfi_reconfig_supported()) { + warnx("The current mfi(4) driver does not support " + "configuration changes."); + return (EOPNOTSUPP); + } + + if (mfi_ld_get_list(fd, &list, NULL) < 0) { + warn("Failed to get volume list"); + return (errno); + } + + for (i = 0; i < list.ld_count; i++) { + if (mfi_volume_busy(fd, list.ld_list[i].ld.v.target_id)) { + warnx("Volume %s is busy and cannot be deleted", + mfi_volume_name(fd, list.ld_list[i].ld.v.target_id)); + return (EBUSY); + } + } + + printf( + "Are you sure you wish to clear the configuration on mfi%u? [y/N] ", + mfi_unit); + ch = getchar(); + if (ch != 'y' && ch != 'Y') { + printf("\nAborting\n"); + return (0); + } + + if (mfi_dcmd_command(fd, MFI_DCMD_CFG_CLEAR, NULL, 0, NULL, 0, NULL) < 0) { + warn("Failed to clear configuration"); + return (errno); + } + + printf("mfi%d: Configuration cleared\n", mfi_unit); + close(fd); + + return (0); +} +MFI_COMMAND(top, clear, clear_config); + +#define MFI_ARRAY_SIZE 288 +#define MAX_DRIVES_PER_ARRAY \ + ((MFI_ARRAY_SIZE - sizeof(struct mfi_array)) / 8) + +#define RT_RAID0 0 +#define RT_RAID1 1 +#define RT_RAID5 2 +#define RT_RAID6 3 +#define RT_JBOD 4 +#define RT_CONCAT 5 +#define RT_RAID10 6 +#define RT_RAID50 7 +#define RT_RAID60 8 + +static int +compare_int(const void *one, const void *two) +{ + int first, second; + + first = *(const int *)one; + second = *(const int *)two; + + return (first - second); +} + +static struct raid_type_entry { + const char *name; + int raid_type; +} raid_type_table[] = { + { "raid0", RT_RAID0 }, + { "raid-0", RT_RAID0 }, + { "raid1", RT_RAID1 }, + { "raid-1", RT_RAID1 }, + { "mirror", RT_RAID1 }, + { "raid5", RT_RAID5 }, + { "raid-5", RT_RAID5 }, + { "raid6", RT_RAID6 }, + { "raid-6", RT_RAID6 }, + { "jbod", RT_JBOD }, + { "concat", RT_CONCAT }, + { "raid10", RT_RAID10 }, + { "raid1+0", RT_RAID10 }, + { "raid-10", RT_RAID10 }, + { "raid-1+0", RT_RAID10 }, + { "raid50", RT_RAID50 }, + { "raid5+0", RT_RAID50 }, + { "raid-50", RT_RAID50 }, + { "raid-5+0", RT_RAID50 }, + { "raid60", RT_RAID60 }, + { "raid6+0", RT_RAID60 }, + { "raid-60", RT_RAID60 }, + { "raid-6+0", RT_RAID60 }, + { NULL, 0 }, +}; + +struct config_id_state { + int array_count; + int log_drv_count; + int *arrays; + int *volumes; + uint16_t array_ref; + uint8_t target_id; +}; + +struct array_info { + int drive_count; + struct mfi_pd_info *drives; + struct mfi_array *array; +}; + +/* Parse a comma-separated list of drives for an array. */ +static int +parse_array(int fd, int raid_type, char *array_str, struct array_info *info) +{ + struct mfi_pd_info *pinfo; + uint16_t device_id; + char *cp; + u_int count; + int error; + + cp = array_str; + for (count = 0; cp != NULL; count++) { + cp = strchr(cp, ','); + if (cp != NULL) { + cp++; + if (*cp == ',') { + warnx("Invalid drive list '%s'", array_str); + return (EINVAL); + } + } + } + + /* Validate the number of drives for this array. */ + if (count >= MAX_DRIVES_PER_ARRAY) { + warnx("Too many drives for a single array: max is %zu", + MAX_DRIVES_PER_ARRAY); + return (EINVAL); + } + switch (raid_type) { + case RT_RAID1: + case RT_RAID10: + if (count % 2 != 0) { + warnx("RAID1 and RAID10 require an even number of " + "drives in each array"); + return (EINVAL); + } + break; + case RT_RAID5: + case RT_RAID50: + if (count < 3) { + warnx("RAID5 and RAID50 require at least 3 drives in " + "each array"); + return (EINVAL); + } + break; + case RT_RAID6: + case RT_RAID60: + if (count < 4) { + warnx("RAID6 and RAID60 require at least 4 drives in " + "each array"); + return (EINVAL); + } + break; + } + + /* Validate each drive. */ + info->drives = calloc(count, sizeof(struct mfi_pd_info)); + info->drive_count = count; + for (pinfo = info->drives; (cp = strsep(&array_str, ",")) != NULL; + pinfo++) { + error = mfi_lookup_drive(fd, cp, &device_id); + if (error) + return (error); + + if (mfi_pd_get_info(fd, device_id, pinfo, NULL) < 0) { + warn("Failed to fetch drive info for drive %s", cp); + return (errno); + } + + if (pinfo->fw_state != MFI_PD_STATE_UNCONFIGURED_GOOD) { + warnx("Drive %u is not available", device_id); + return (EINVAL); + } + } + + return (0); +} + +/* + * Find the next free array ref assuming that 'array_ref' is the last + * one used. 'array_ref' should be 0xffff for the initial test. + */ +static uint16_t +find_next_array(struct config_id_state *state) +{ + int i; + + /* Assume the current one is used. */ + state->array_ref++; + + /* Find the next free one. */ + for (i = 0; i < state->array_count; i++) + if (state->arrays[i] == state->array_ref) + state->array_ref++; + return (state->array_ref); +} + +/* + * Find the next free volume ID assuming that 'target_id' is the last + * one used. 'target_id' should be 0xff for the initial test. + */ +static uint8_t +find_next_volume(struct config_id_state *state) +{ + int i; + + /* Assume the current one is used. */ + state->target_id++; + + /* Find the next free one. */ + for (i = 0; i < state->log_drv_count; i++) + if (state->volumes[i] == state->target_id) + state->target_id++; + return (state->target_id); +} + +/* Populate an array with drives. */ +static void +build_array(int fd, char *arrayp, struct array_info *array_info, + struct config_id_state *state, int verbose) +{ + struct mfi_array *ar = (struct mfi_array *)arrayp; + int i; + + ar->size = array_info->drives[0].coerced_size; + ar->num_drives = array_info->drive_count; + ar->array_ref = find_next_array(state); + for (i = 0; i < array_info->drive_count; i++) { + if (verbose) + printf("Adding drive %u to array %u\n", + array_info->drives[i].ref.v.device_id, + ar->array_ref); + if (ar->size > array_info->drives[i].coerced_size) + ar->size = array_info->drives[i].coerced_size; + ar->pd[i].ref = array_info->drives[i].ref; + ar->pd[i].fw_state = MFI_PD_STATE_ONLINE; + } + array_info->array = ar; +} + +/* + * Create a volume that spans one or more arrays. + */ +static void +build_volume(char *volumep, int narrays, struct array_info *arrays, + int raid_type, long stripe_size, struct config_id_state *state, int verbose) +{ + struct mfi_ld_config *ld = (struct mfi_ld_config *)volumep; + struct mfi_array *ar; + int i; + + /* properties */ + ld->properties.ld.v.target_id = find_next_volume(state); + ld->properties.ld.v.seq = 0; + ld->properties.default_cache_policy = MR_LD_CACHE_ALLOW_WRITE_CACHE | + MR_LD_CACHE_WRITE_BACK; + ld->properties.access_policy = MFI_LD_ACCESS_RW; + ld->properties.disk_cache_policy = MR_PD_CACHE_UNCHANGED; + ld->properties.current_cache_policy = MR_LD_CACHE_ALLOW_WRITE_CACHE | + MR_LD_CACHE_WRITE_BACK; + ld->properties.no_bgi = 0; + + /* params */ + switch (raid_type) { + case RT_RAID0: + case RT_JBOD: + ld->params.primary_raid_level = DDF_RAID0; + ld->params.raid_level_qualifier = 0; + ld->params.secondary_raid_level = 0; + break; + case RT_RAID1: + ld->params.primary_raid_level = DDF_RAID1; + ld->params.raid_level_qualifier = 0; + ld->params.secondary_raid_level = 0; + break; + case RT_RAID5: + ld->params.primary_raid_level = DDF_RAID5; + ld->params.raid_level_qualifier = 3; + ld->params.secondary_raid_level = 0; + break; + case RT_RAID6: + ld->params.primary_raid_level = DDF_RAID6; + ld->params.raid_level_qualifier = 3; + ld->params.secondary_raid_level = 0; + break; + case RT_CONCAT: + ld->params.primary_raid_level = DDF_CONCAT; + ld->params.raid_level_qualifier = 0; + ld->params.secondary_raid_level = 0; + break; + case RT_RAID10: + ld->params.primary_raid_level = DDF_RAID1; + ld->params.raid_level_qualifier = 0; + ld->params.secondary_raid_level = 3; /* XXX? */ + break; + case RT_RAID50: + /* + * XXX: This appears to work though the card's BIOS + * complains that the configuration is foreign. The + * BIOS setup does not allow for creation of RAID-50 + * or RAID-60 arrays. The only nested array + * configuration it allows for is RAID-10. + */ + ld->params.primary_raid_level = DDF_RAID5; + ld->params.raid_level_qualifier = 3; + ld->params.secondary_raid_level = 3; /* XXX? */ + break; + case RT_RAID60: + ld->params.primary_raid_level = DDF_RAID6; + ld->params.raid_level_qualifier = 3; + ld->params.secondary_raid_level = 3; /* XXX? */ + break; + } + + /* + * Stripe size is encoded as (2 ^ N) * 512 = stripe_size. Use + * ffs() to simulate log2(stripe_size). + */ + ld->params.stripe_size = ffs(stripe_size) - 1 - 9; + ld->params.num_drives = arrays[0].array->num_drives; + ld->params.span_depth = narrays; + ld->params.state = MFI_LD_STATE_OPTIMAL; + ld->params.init_state = MFI_LD_PARAMS_INIT_NO; + ld->params.is_consistent = 0; + + /* spans */ + for (i = 0; i < narrays; i++) { + ar = arrays[i].array; + if (verbose) + printf("Adding array %u to volume %u\n", ar->array_ref, + ld->properties.ld.v.target_id); + ld->span[i].start_block = 0; + ld->span[i].num_blocks = ar->size; + ld->span[i].array_ref = ar->array_ref; + } +} + +static int +create_volume(int ac, char **av) +{ + struct mfi_config_data *config; + struct mfi_array *ar; + struct mfi_ld_config *ld; + struct config_id_state state; + size_t config_size; + char *p, *cfg_arrays, *cfg_volumes; + int error, fd, i, raid_type; + int narrays, nvolumes, arrays_per_volume; + struct array_info *arrays; + long stripe_size; +#ifdef DEBUG + int dump; +#endif + int ch, verbose; + + /* + * Backwards compat. Map 'create volume' to 'create' and + * 'create spare' to 'add'. + */ + if (ac > 1) { + if (strcmp(av[1], "volume") == 0) { + av++; + ac--; + } else if (strcmp(av[1], "spare") == 0) { + av++; + ac--; + return (add_spare(ac, av)); + } + } + + if (ac < 2) { + warnx("create volume: volume type required"); + return (EINVAL); + } + + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + if (!mfi_reconfig_supported()) { + warnx("The current mfi(4) driver does not support " + "configuration changes."); + return (EOPNOTSUPP); + } + + /* Lookup the RAID type first. */ + raid_type = -1; + for (i = 0; raid_type_table[i].name != NULL; i++) + if (strcasecmp(raid_type_table[i].name, av[1]) == 0) { + raid_type = raid_type_table[i].raid_type; + break; + } + + if (raid_type == -1) { + warnx("Unknown or unsupported volume type %s", av[1]); + return (EINVAL); + } + + /* Parse any options. */ + optind = 2; +#ifdef DEBUG + dump = 0; +#endif + verbose = 0; + stripe_size = 64 * 1024; + + while ((ch = getopt(ac, av, "ds:v")) != -1) { + switch (ch) { +#ifdef DEBUG + case 'd': + dump = 1; + break; +#endif + case 's': + stripe_size = dehumanize(optarg); + if ((stripe_size < 512) || (!powerof2(stripe_size))) + stripe_size = 64 * 1024; + break; + case 'v': + verbose = 1; + break; + case '?': + default: + return (EINVAL); + } + } + ac -= optind; + av += optind; + + /* Parse all the arrays. */ + narrays = ac; + if (narrays == 0) { + warnx("At least one drive list is required"); + return (EINVAL); + } + switch (raid_type) { + case RT_RAID0: + case RT_RAID1: + case RT_RAID5: + case RT_RAID6: + case RT_CONCAT: + if (narrays != 1) { + warnx("Only one drive list can be specified"); + return (EINVAL); + } + break; + case RT_RAID10: + case RT_RAID50: + case RT_RAID60: + if (narrays < 1) { + warnx("RAID10, RAID50, and RAID60 require at least " + "two drive lists"); + return (EINVAL); + } + if (narrays > MFI_MAX_SPAN_DEPTH) { + warnx("Volume spans more than %d arrays", + MFI_MAX_SPAN_DEPTH); + return (EINVAL); + } + break; + } + arrays = calloc(narrays, sizeof(*arrays)); + for (i = 0; i < narrays; i++) { + error = parse_array(fd, raid_type, av[i], &arrays[i]); + if (error) + return (error); + } + + switch (raid_type) { + case RT_RAID10: + case RT_RAID50: + case RT_RAID60: + for (i = 1; i < narrays; i++) { + if (arrays[i].drive_count != arrays[0].drive_count) { + warnx("All arrays must contain the same " + "number of drives"); + return (EINVAL); + } + } + break; + } + + /* + * Fetch the current config and build sorted lists of existing + * array and volume identifiers. + */ + if (mfi_config_read(fd, &config) < 0) { + warn("Failed to read configuration"); + return (errno); + } + p = (char *)config->array; + state.array_ref = 0xffff; + state.target_id = 0xff; + state.array_count = config->array_count; + if (config->array_count > 0) { + state.arrays = calloc(config->array_count, sizeof(int)); + for (i = 0; i < config->array_count; i++) { + ar = (struct mfi_array *)p; + state.arrays[i] = ar->array_ref; + p += config->array_size; + } + qsort(state.arrays, config->array_count, sizeof(int), + compare_int); + } else + state.arrays = NULL; + state.log_drv_count = config->log_drv_count; + if (config->log_drv_count) { + state.volumes = calloc(config->log_drv_count, sizeof(int)); + for (i = 0; i < config->log_drv_count; i++) { + ld = (struct mfi_ld_config *)p; + state.volumes[i] = ld->properties.ld.v.target_id; + p += config->log_drv_size; + } + qsort(state.volumes, config->log_drv_count, sizeof(int), + compare_int); + } else + state.volumes = NULL; + free(config); + + /* Determine the size of the configuration we will build. */ + switch (raid_type) { + case RT_RAID0: + case RT_RAID1: + case RT_RAID5: + case RT_RAID6: + case RT_CONCAT: + case RT_JBOD: + /* Each volume spans a single array. */ + nvolumes = narrays; + break; + case RT_RAID10: + case RT_RAID50: + case RT_RAID60: + /* A single volume spans multiple arrays. */ + nvolumes = 1; + break; + default: + /* Pacify gcc. */ + abort(); + } + + config_size = sizeof(struct mfi_config_data) + + sizeof(struct mfi_ld_config) * nvolumes + MFI_ARRAY_SIZE * narrays; + config = calloc(1, config_size); + config->size = config_size; + config->array_count = narrays; + config->array_size = MFI_ARRAY_SIZE; /* XXX: Firmware hardcode */ + config->log_drv_count = nvolumes; + config->log_drv_size = sizeof(struct mfi_ld_config); + config->spares_count = 0; + config->spares_size = 40; /* XXX: Firmware hardcode */ + cfg_arrays = (char *)config->array; + cfg_volumes = cfg_arrays + config->array_size * narrays; + + /* Build the arrays. */ + for (i = 0; i < narrays; i++) { + build_array(fd, cfg_arrays, &arrays[i], &state, verbose); + cfg_arrays += config->array_size; + } + + /* Now build the volume(s). */ + arrays_per_volume = narrays / nvolumes; + for (i = 0; i < nvolumes; i++) { + build_volume(cfg_volumes, arrays_per_volume, + &arrays[i * arrays_per_volume], raid_type, stripe_size, + &state, verbose); + cfg_volumes += config->log_drv_size; + } + +#ifdef DEBUG + if (dump) + dump_config(fd, config); + else +#endif + + /* Send the new config to the controller. */ + if (mfi_dcmd_command(fd, MFI_DCMD_CFG_ADD, config, config_size, + NULL, 0, NULL) < 0) { + warn("Failed to add volume"); + return (errno); + } + + /* Clean up. */ + free(config); + if (state.log_drv_count > 0) + free(state.volumes); + if (state.array_count > 0) + free(state.arrays); + for (i = 0; i < narrays; i++) + free(arrays[i].drives); + free(arrays); + close(fd); + + return (0); +} +MFI_COMMAND(top, create, create_volume); + +static int +delete_volume(int ac, char **av) +{ + struct mfi_ld_info info; + int fd; + uint8_t target_id, mbox[4]; + + /* + * Backwards compat. Map 'delete volume' to 'delete' and + * 'delete spare' to 'remove'. + */ + if (ac > 1) { + if (strcmp(av[1], "volume") == 0) { + av++; + ac--; + } else if (strcmp(av[1], "spare") == 0) { + av++; + ac--; + return (remove_spare(ac, av)); + } + } + + if (ac != 2) { + warnx("delete volume: volume required"); + return (EINVAL); + } + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + if (!mfi_reconfig_supported()) { + warnx("The current mfi(4) driver does not support " + "configuration changes."); + return (EOPNOTSUPP); + } + + if (mfi_lookup_volume(fd, av[1], &target_id) < 0) { + warn("Invalid volume %s", av[1]); + return (errno); + } + + if (mfi_ld_get_info(fd, target_id, &info, NULL) < 0) { + warn("Failed to get info for volume %d", target_id); + return (errno); + } + + if (mfi_volume_busy(fd, target_id)) { + warnx("Volume %s is busy and cannot be deleted", + mfi_volume_name(fd, target_id)); + return (EBUSY); + } + + mbox_store_ldref(mbox, &info.ld_config.properties.ld); + if (mfi_dcmd_command(fd, MFI_DCMD_LD_DELETE, NULL, 0, mbox, + sizeof(mbox), NULL) < 0) { + warn("Failed to delete volume"); + return (errno); + } + + close(fd); + + return (0); +} +MFI_COMMAND(top, delete, delete_volume); + +static int +add_spare(int ac, char **av) +{ + struct mfi_pd_info info; + struct mfi_config_data *config; + struct mfi_array *ar; + struct mfi_ld_config *ld; + struct mfi_spare *spare; + uint16_t device_id; + uint8_t target_id; + char *p; + int error, fd, i; + + if (ac < 2) { + warnx("add spare: drive required"); + return (EINVAL); + } + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + error = mfi_lookup_drive(fd, av[1], &device_id); + if (error) + return (error); + + if (mfi_pd_get_info(fd, device_id, &info, NULL) < 0) { + warn("Failed to fetch drive info"); + return (errno); + } + + if (info.fw_state != MFI_PD_STATE_UNCONFIGURED_GOOD) { + warnx("Drive %u is not available", device_id); + return (EINVAL); + } + + if (ac > 2) { + if (mfi_lookup_volume(fd, av[2], &target_id) < 0) { + warn("Invalid volume %s", av[2]); + return (errno); + } + } + + if (mfi_config_read(fd, &config) < 0) { + warn("Failed to read configuration"); + return (errno); + } + + spare = malloc(sizeof(struct mfi_spare) + sizeof(uint16_t) * + config->array_count); + bzero(spare, sizeof(struct mfi_spare)); + spare->ref = info.ref; + + if (ac == 2) { + /* Global spare backs all arrays. */ + p = (char *)config->array; + for (i = 0; i < config->array_count; i++) { + ar = (struct mfi_array *)p; + if (ar->size > info.coerced_size) { + warnx("Spare isn't large enough for array %u", + ar->array_ref); + return (EINVAL); + } + p += config->array_size; + } + spare->array_count = 0; + } else { + /* + * Dedicated spares only back the arrays for a + * specific volume. + */ + ld = mfi_config_lookup_volume(config, target_id); + if (ld == NULL) { + warnx("Did not find volume %d", target_id); + return (EINVAL); + } + + spare->spare_type |= MFI_SPARE_DEDICATED; + spare->array_count = ld->params.span_depth; + for (i = 0; i < ld->params.span_depth; i++) { + ar = mfi_config_lookup_array(config, + ld->span[i].array_ref); + if (ar == NULL) { + warnx("Missing array; inconsistent config?"); + return (ENXIO); + } + if (ar->size > info.coerced_size) { + warnx("Spare isn't large enough for array %u", + ar->array_ref); + return (EINVAL); + } + spare->array_ref[i] = ar->array_ref; + } + } + free(config); + + if (mfi_dcmd_command(fd, MFI_DCMD_CFG_MAKE_SPARE, spare, + sizeof(struct mfi_spare) + sizeof(uint16_t) * spare->array_count, + NULL, 0, NULL) < 0) { + warn("Failed to assign spare"); + return (errno); + } + + close(fd); + + return (0); +} +MFI_COMMAND(top, add, add_spare); + +static int +remove_spare(int ac, char **av) +{ + struct mfi_pd_info info; + int error, fd; + uint16_t device_id; + uint8_t mbox[4]; + + if (ac != 2) { + warnx("remove spare: drive required"); + return (EINVAL); + } + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + error = mfi_lookup_drive(fd, av[1], &device_id); + if (error) + return (error); + + /* Get the info for this drive. */ + if (mfi_pd_get_info(fd, device_id, &info, NULL) < 0) { + warn("Failed to fetch info for drive %u", device_id); + return (errno); + } + + if (info.fw_state != MFI_PD_STATE_HOT_SPARE) { + warnx("Drive %u is not a hot spare", device_id); + return (EINVAL); + } + + mbox_store_pdref(mbox, &info.ref); + if (mfi_dcmd_command(fd, MFI_DCMD_CFG_REMOVE_SPARE, NULL, 0, mbox, + sizeof(mbox), NULL) < 0) { + warn("Failed to delete spare"); + return (errno); + } + + close(fd); + + return (0); +} +MFI_COMMAND(top, remove, remove_spare); + +#ifdef DEBUG +/* Display raw data about a config. */ +static void +dump_config(int fd, struct mfi_config_data *config) +{ + struct mfi_array *ar; + struct mfi_ld_config *ld; + struct mfi_spare *sp; + struct mfi_pd_info pinfo; + uint16_t device_id; + char *p; + int i, j; + + printf( + "mfi%d Configuration (Debug): %d arrays, %d volumes, %d spares\n", + mfi_unit, config->array_count, config->log_drv_count, + config->spares_count); + printf(" array size: %u\n", config->array_size); + printf(" volume size: %u\n", config->log_drv_size); + printf(" spare size: %u\n", config->spares_size); + p = (char *)config->array; + + for (i = 0; i < config->array_count; i++) { + ar = (struct mfi_array *)p; + printf(" array %u of %u drives:\n", ar->array_ref, + ar->num_drives); + printf(" size = %ju\n", (uintmax_t)ar->size); + for (j = 0; j < ar->num_drives; j++) { + device_id = ar->pd[j].ref.device_id; + if (device_id == 0xffff) + printf(" drive MISSING\n"); + else { + printf(" drive %u %s\n", device_id, + mfi_pdstate(ar->pd[j].fw_state)); + if (mfi_pd_get_info(fd, device_id, &pinfo, + NULL) >= 0) { + printf(" raw size: %ju\n", + (uintmax_t)pinfo.raw_size); + printf(" non-coerced size: %ju\n", + (uintmax_t)pinfo.non_coerced_size); + printf(" coerced size: %ju\n", + (uintmax_t)pinfo.coerced_size); + } + } + } + p += config->array_size; + } + + for (i = 0; i < config->log_drv_count; i++) { + ld = (struct mfi_ld_config *)p; + printf(" volume %s ", + mfi_volume_name(fd, ld->properties.ld.v.target_id)); + printf("%s %s", + mfi_raid_level(ld->params.primary_raid_level, + ld->params.secondary_raid_level), + mfi_ldstate(ld->params.state)); + if (ld->properties.name[0] != '\0') + printf(" <%s>", ld->properties.name); + printf("\n"); + printf(" primary raid level: %u\n", + ld->params.primary_raid_level); + printf(" raid level qualifier: %u\n", + ld->params.raid_level_qualifier); + printf(" secondary raid level: %u\n", + ld->params.secondary_raid_level); + printf(" stripe size: %u\n", ld->params.stripe_size); + printf(" num drives: %u\n", ld->params.num_drives); + printf(" init state: %u\n", ld->params.init_state); + printf(" consistent: %u\n", ld->params.is_consistent); + printf(" no bgi: %u\n", ld->properties.no_bgi); + printf(" spans:\n"); + for (j = 0; j < ld->params.span_depth; j++) { + printf(" array %u @ ", ld->span[j].array_ref); + printf("%ju : %ju\n", + (uintmax_t)ld->span[j].start_block, + (uintmax_t)ld->span[j].num_blocks); + } + p += config->log_drv_size; + } + + for (i = 0; i < config->spares_count; i++) { + sp = (struct mfi_spare *)p; + printf(" %s spare %u ", + sp->spare_type & MFI_SPARE_DEDICATED ? "dedicated" : + "global", sp->ref.device_id); + printf("%s", mfi_pdstate(MFI_PD_STATE_HOT_SPARE)); + printf(" backs:\n"); + for (j = 0; j < sp->array_count; j++) + printf(" array %u\n", sp->array_ref[j]); + p += config->spares_size; + } +} + +static int +debug_config(int ac, char **av) +{ + struct mfi_config_data *config; + int fd; + + if (ac != 1) { + warnx("debug: extra arguments"); + return (EINVAL); + } + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + /* Get the config from the controller. */ + if (mfi_config_read(fd, &config) < 0) { + warn("Failed to get config"); + return (errno); + } + + /* Dump out the configuration. */ + dump_config(fd, config); + free(config); + close(fd); + + return (0); +} +MFI_COMMAND(top, debug, debug_config); + +static int +dump(int ac, char **av) +{ + struct mfi_config_data *config; + char buf[64]; + size_t len; + int fd; + + if (ac != 1) { + warnx("dump: extra arguments"); + return (EINVAL); + } + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + /* Get the stashed copy of the last dcmd from the driver. */ + snprintf(buf, sizeof(buf), "dev.mfi.%d.debug_command", mfi_unit); + if (sysctlbyname(buf, NULL, &len, NULL, 0) < 0) { + warn("Failed to read debug command"); + if (errno == ENOENT) + errno = EOPNOTSUPP; + return (errno); + } + + config = malloc(len); + if (sysctlbyname(buf, config, &len, NULL, 0) < 0) { + warn("Failed to read debug command"); + return (errno); + } + dump_config(fd, config); + free(config); + close(fd); + + return (0); +} +MFI_COMMAND(top, dump, dump); +#endif diff --git a/usr.sbin/mfiutil/mfi_drive.c b/usr.sbin/mfiutil/mfi_drive.c new file mode 100644 index 0000000..1044228 --- /dev/null +++ b/usr.sbin/mfiutil/mfi_drive.c @@ -0,0 +1,625 @@ +/*- + * Copyright (c) 2008, 2009 Yahoo!, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The names of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mfiutil.h" + +MFI_TABLE(top, drive); + +const char * +mfi_pdstate(enum mfi_pd_state state) +{ + static char buf[16]; + + switch (state) { + case MFI_PD_STATE_UNCONFIGURED_GOOD: + return ("UNCONFIGURED GOOD"); + case MFI_PD_STATE_UNCONFIGURED_BAD: + return ("UNCONFIGURED BAD"); + case MFI_PD_STATE_HOT_SPARE: + return ("HOT SPARE"); + case MFI_PD_STATE_OFFLINE: + return ("OFFLINE"); + case MFI_PD_STATE_FAILED: + return ("FAILED"); + case MFI_PD_STATE_REBUILD: + return ("REBUILD"); + case MFI_PD_STATE_ONLINE: + return ("ONLINE"); + default: + sprintf(buf, "PSTATE 0x%04x", state); + return (buf); + } +} + +int +mfi_lookup_drive(int fd, char *drive, uint16_t *device_id) +{ + struct mfi_pd_list *list; + uint8_t encl, slot; + long val; + u_int i; + char *cp; + + /* Look for a raw device id first. */ + val = strtol(drive, &cp, 0); + if (*cp == '\0') { + if (val < 0 || val >= 0xffff) + goto bad; + *device_id = val; + return (0); + } + + /* Support for MegaCli style [Exx]:Syy notation. */ + if (toupper(drive[0]) == 'E' || toupper(drive[0]) == 'S') { + if (drive[1] == '\0') + goto bad; + cp = drive; + if (toupper(drive[0]) == 'E') { + cp++; /* Eat 'E' */ + val = strtol(cp, &cp, 0); + if (val < 0 || val > 0xff || *cp != ':') + goto bad; + encl = val; + cp++; /* Eat ':' */ + if (toupper(*cp) != 'S') + goto bad; + } else + encl = 0xff; + cp++; /* Eat 'S' */ + if (*cp == '\0') + goto bad; + val = strtol(cp, &cp, 0); + if (val < 0 || val > 0xff || *cp != '\0') + goto bad; + slot = val; + + if (mfi_pd_get_list(fd, &list, NULL) < 0) { + warn("Failed to fetch drive list"); + return (errno); + } + + for (i = 0; i < list->count; i++) { + if (list->addr[i].scsi_dev_type != 0) + continue; + + if (((encl == 0xff && + list->addr[i].encl_device_id == 0xffff) || + list->addr[i].encl_index == encl) && + list->addr[i].slot_number == slot) { + *device_id = list->addr[i].device_id; + free(list); + return (0); + } + } + free(list); + warnx("Unknown drive %s", drive); + return (EINVAL); + } + +bad: + warnx("Invalid drive number %s", drive); + return (EINVAL); +} + +static void +mbox_store_device_id(uint8_t *mbox, uint16_t device_id) +{ + + mbox[0] = device_id & 0xff; + mbox[1] = device_id >> 8; +} + +void +mbox_store_pdref(uint8_t *mbox, union mfi_pd_ref *ref) +{ + + mbox[0] = ref->v.device_id & 0xff; + mbox[1] = ref->v.device_id >> 8; + mbox[2] = ref->v.seq_num & 0xff; + mbox[3] = ref->v.seq_num >> 8; +} + +int +mfi_pd_get_list(int fd, struct mfi_pd_list **listp, uint8_t *statusp) +{ + struct mfi_pd_list *list; + uint32_t list_size; + + /* + * Keep fetching the list in a loop until we have a large enough + * buffer to hold the entire list. + */ + list = NULL; + list_size = 1024; +fetch: + list = reallocf(list, list_size); + if (list == NULL) + return (-1); + if (mfi_dcmd_command(fd, MFI_DCMD_PD_GET_LIST, list, list_size, NULL, + 0, statusp) < 0) { + free(list); + return (-1); + } + + if (list->size > list_size) { + list_size = list->size; + goto fetch; + } + + *listp = list; + return (0); +} + +int +mfi_pd_get_info(int fd, uint16_t device_id, struct mfi_pd_info *info, + uint8_t *statusp) +{ + uint8_t mbox[2]; + + mbox_store_device_id(&mbox[0], device_id); + return (mfi_dcmd_command(fd, MFI_DCMD_PD_GET_INFO, info, + sizeof(struct mfi_pd_info), mbox, 2, statusp)); +} + +static void +cam_strvis(char *dst, const char *src, int srclen, int dstlen) +{ + + /* Trim leading/trailing spaces, nulls. */ + while (srclen > 0 && src[0] == ' ') + src++, srclen--; + while (srclen > 0 + && (src[srclen-1] == ' ' || src[srclen-1] == '\0')) + srclen--; + + while (srclen > 0 && dstlen > 1) { + char *cur_pos = dst; + + if (*src < 0x20) { + /* SCSI-II Specifies that these should never occur. */ + /* non-printable character */ + if (dstlen > 4) { + *cur_pos++ = '\\'; + *cur_pos++ = ((*src & 0300) >> 6) + '0'; + *cur_pos++ = ((*src & 0070) >> 3) + '0'; + *cur_pos++ = ((*src & 0007) >> 0) + '0'; + } else { + *cur_pos++ = '?'; + } + } else { + /* normal character */ + *cur_pos++ = *src; + } + src++; + srclen--; + dstlen -= cur_pos - dst; + dst = cur_pos; + } + *dst = '\0'; +} + +/* Borrowed heavily from scsi_all.c:scsi_print_inquiry(). */ +const char * +mfi_pd_inq_string(struct mfi_pd_info *info) +{ + struct scsi_inquiry_data *inq_data; + char vendor[16], product[48], revision[16], rstr[12], serial[SID_VENDOR_SPECIFIC_0_SIZE]; + static char inq_string[64]; + + inq_data = (struct scsi_inquiry_data *)info->inquiry_data; + if (SID_QUAL_IS_VENDOR_UNIQUE(inq_data)) + return (NULL); + if (SID_TYPE(inq_data) != T_DIRECT) + return (NULL); + if (SID_QUAL(inq_data) != SID_QUAL_LU_CONNECTED) + return (NULL); + + cam_strvis(vendor, inq_data->vendor, sizeof(inq_data->vendor), + sizeof(vendor)); + cam_strvis(product, inq_data->product, sizeof(inq_data->product), + sizeof(product)); + cam_strvis(revision, inq_data->revision, sizeof(inq_data->revision), + sizeof(revision)); + cam_strvis(serial, (char *)inq_data->vendor_specific0, sizeof(inq_data->vendor_specific0), + sizeof(serial)); + + /* Hack for SATA disks, no idea how to tell speed. */ + if (strcmp(vendor, "ATA") == 0) { + snprintf(inq_string, sizeof(inq_string), "<%s %s serial=%s> SATA", + product, revision, serial); + return (inq_string); + } + + switch (SID_ANSI_REV(inq_data)) { + case SCSI_REV_CCS: + strcpy(rstr, "SCSI-CCS"); + break; + case 5: + strcpy(rstr, "SAS"); + break; + default: + snprintf(rstr, sizeof (rstr), "SCSI-%d", + SID_ANSI_REV(inq_data)); + break; + } + snprintf(inq_string, sizeof(inq_string), "<%s %s %s serial=%s> %s", vendor, + product, revision, serial, rstr); + return (inq_string); +} + +/* Helper function to set a drive to a given state. */ +static int +drive_set_state(char *drive, uint16_t new_state) +{ + struct mfi_pd_info info; + uint16_t device_id; + uint8_t mbox[6]; + int error, fd; + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + error = mfi_lookup_drive(fd, drive, &device_id); + if (error) + return (error); + + /* Get the info for this drive. */ + if (mfi_pd_get_info(fd, device_id, &info, NULL) < 0) { + warn("Failed to fetch info for drive %u", device_id); + return (errno); + } + + /* Try to change the state. */ + if (info.fw_state == new_state) { + warnx("Drive %u is already in the desired state", device_id); + return (EINVAL); + } + + mbox_store_pdref(&mbox[0], &info.ref); + mbox[4] = new_state & 0xff; + mbox[5] = new_state >> 8; + if (mfi_dcmd_command(fd, MFI_DCMD_PD_STATE_SET, NULL, 0, mbox, 6, + NULL) < 0) { + warn("Failed to set drive %u to %s", device_id, + mfi_pdstate(new_state)); + return (errno); + } + + close(fd); + + return (0); +} + +static int +fail_drive(int ac, char **av) +{ + + if (ac != 2) { + warnx("fail: %s", ac > 2 ? "extra arguments" : + "drive required"); + return (EINVAL); + } + + return (drive_set_state(av[1], MFI_PD_STATE_FAILED)); +} +MFI_COMMAND(top, fail, fail_drive); + +static int +good_drive(int ac, char **av) +{ + + if (ac != 2) { + warnx("good: %s", ac > 2 ? "extra arguments" : + "drive required"); + return (EINVAL); + } + + return (drive_set_state(av[1], MFI_PD_STATE_UNCONFIGURED_GOOD)); +} +MFI_COMMAND(top, good, good_drive); + +static int +rebuild_drive(int ac, char **av) +{ + + if (ac != 2) { + warnx("rebuild: %s", ac > 2 ? "extra arguments" : + "drive required"); + return (EINVAL); + } + + return (drive_set_state(av[1], MFI_PD_STATE_REBUILD)); +} +MFI_COMMAND(top, rebuild, rebuild_drive); + +static int +start_rebuild(int ac, char **av) +{ + struct mfi_pd_info info; + uint16_t device_id; + uint8_t mbox[4]; + int error, fd; + + if (ac != 2) { + warnx("start rebuild: %s", ac > 2 ? "extra arguments" : + "drive required"); + return (EINVAL); + } + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + error = mfi_lookup_drive(fd, av[1], &device_id); + if (error) + return (error); + + /* Get the info for this drive. */ + if (mfi_pd_get_info(fd, device_id, &info, NULL) < 0) { + warn("Failed to fetch info for drive %u", device_id); + return (errno); + } + + /* Check the state, must be REBUILD. */ + if (info.fw_state != MFI_PD_STATE_REBUILD) { + warn("Drive %d is not in the REBUILD state", device_id); + return (EINVAL); + } + + /* Start the rebuild. */ + mbox_store_pdref(&mbox[0], &info.ref); + if (mfi_dcmd_command(fd, MFI_DCMD_PD_REBUILD_START, NULL, 0, mbox, 4, + NULL) < 0) { + warn("Failed to start rebuild on drive %u", device_id); + return (errno); + } + close(fd); + + return (0); +} +MFI_COMMAND(start, rebuild, start_rebuild); + +static int +abort_rebuild(int ac, char **av) +{ + struct mfi_pd_info info; + uint16_t device_id; + uint8_t mbox[4]; + int error, fd; + + if (ac != 2) { + warnx("abort rebuild: %s", ac > 2 ? "extra arguments" : + "drive required"); + return (EINVAL); + } + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + error = mfi_lookup_drive(fd, av[1], &device_id); + if (error) + return (error); + + /* Get the info for this drive. */ + if (mfi_pd_get_info(fd, device_id, &info, NULL) < 0) { + warn("Failed to fetch info for drive %u", device_id); + return (errno); + } + + /* Check the state, must be REBUILD. */ + if (info.fw_state != MFI_PD_STATE_REBUILD) { + warn("Drive %d is not in the REBUILD state", device_id); + return (EINVAL); + } + + /* Abort the rebuild. */ + mbox_store_pdref(&mbox[0], &info.ref); + if (mfi_dcmd_command(fd, MFI_DCMD_PD_REBUILD_ABORT, NULL, 0, mbox, 4, + NULL) < 0) { + warn("Failed to abort rebuild on drive %u", device_id); + return (errno); + } + close(fd); + + return (0); +} +MFI_COMMAND(abort, rebuild, abort_rebuild); + +static int +drive_progress(int ac, char **av) +{ + struct mfi_pd_info info; + uint16_t device_id; + int error, fd; + + if (ac != 2) { + warnx("drive progress: %s", ac > 2 ? "extra arguments" : + "drive required"); + return (EINVAL); + } + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + error = mfi_lookup_drive(fd, av[1], &device_id); + if (error) + return (error); + + /* Get the info for this drive. */ + if (mfi_pd_get_info(fd, device_id, &info, NULL) < 0) { + warn("Failed to fetch info for drive %u", device_id); + return (errno); + } + close(fd); + + /* Display any of the active events. */ + if (info.prog_info.active & MFI_PD_PROGRESS_REBUILD) + mfi_display_progress("Rebuild", &info.prog_info.rbld); + if (info.prog_info.active & MFI_PD_PROGRESS_PATROL) + mfi_display_progress("Patrol Read", &info.prog_info.patrol); + if (info.prog_info.active & MFI_PD_PROGRESS_CLEAR) + mfi_display_progress("Clear", &info.prog_info.clear); + if ((info.prog_info.active & (MFI_PD_PROGRESS_REBUILD | + MFI_PD_PROGRESS_PATROL | MFI_PD_PROGRESS_CLEAR)) == 0) + printf("No activity in progress for drive %u.\n", device_id); + + return (0); +} +MFI_COMMAND(drive, progress, drive_progress); + +static int +drive_clear(int ac, char **av) +{ + struct mfi_pd_info info; + uint32_t opcode; + uint16_t device_id; + uint8_t mbox[4]; + char *s1; + int error, fd; + + if (ac != 3) { + warnx("drive clear: %s", ac > 3 ? "extra arguments" : + "drive and action requires"); + return (EINVAL); + } + + for (s1 = av[2]; *s1 != '\0'; s1++) + *s1 = tolower(*s1); + if (strcmp(av[2], "start") == 0) + opcode = MFI_DCMD_PD_CLEAR_START; + else if ((strcmp(av[2], "stop") == 0) || (strcmp(av[2], "abort") == 0)) + opcode = MFI_DCMD_PD_CLEAR_ABORT; + else { + warnx("drive clear: invalid action, must be 'start' or 'stop'\n"); + return (EINVAL); + } + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + error = mfi_lookup_drive(fd, av[1], &device_id); + if (error) + return (error); + + /* Get the info for this drive. */ + if (mfi_pd_get_info(fd, device_id, &info, NULL) < 0) { + warn("Failed to fetch info for drive %u", device_id); + return (errno); + } + + mbox_store_pdref(&mbox[0], &info.ref); + if (mfi_dcmd_command(fd, opcode, NULL, 0, mbox, 4, NULL) < 0) { + warn("Failed to %s clear on drive %u", + opcode == MFI_DCMD_PD_CLEAR_START ? "start" : "stop", + device_id); + return (errno); + } + + close(fd); + return (0); +} +MFI_COMMAND(drive, clear, drive_clear); + +static int +drive_locate(int ac, char **av) +{ + uint16_t device_id; + uint32_t opcode; + int error, fd; + uint8_t mbox[4]; + + if (ac != 3) { + warnx("locate: %s", ac > 3 ? "extra arguments" : + "drive and state required"); + return (EINVAL); + } + + if (strcasecmp(av[2], "on") == 0 || strcasecmp(av[2], "start") == 0) + opcode = MFI_DCMD_PD_LOCATE_START; + else if (strcasecmp(av[2], "off") == 0 || + strcasecmp(av[2], "stop") == 0) + opcode = MFI_DCMD_PD_LOCATE_STOP; + else { + warnx("locate: invalid state %s", av[2]); + return (EINVAL); + } + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + error = mfi_lookup_drive(fd, av[1], &device_id); + if (error) + return (error); + + + mbox_store_device_id(&mbox[0], device_id); + mbox[2] = 0; + mbox[3] = 0; + if (mfi_dcmd_command(fd, opcode, NULL, 0, mbox, 4, NULL) < 0) { + warn("Failed to %s locate on drive %u", + opcode == MFI_DCMD_PD_LOCATE_START ? "start" : "stop", + device_id); + return (errno); + } + close(fd); + + return (0); +} +MFI_COMMAND(top, locate, drive_locate); diff --git a/usr.sbin/mfiutil/mfi_evt.c b/usr.sbin/mfiutil/mfi_evt.c new file mode 100644 index 0000000..d553dae --- /dev/null +++ b/usr.sbin/mfiutil/mfi_evt.c @@ -0,0 +1,667 @@ +/*- + * Copyright (c) 2008, 2009 Yahoo!, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The names of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include +#include +//#include +#include +#include +#include +#include +#include +#include "mfiutil.h" + +static int +mfi_event_get_info(int fd, struct mfi_evt_log_state *info, uint8_t *statusp) +{ + + return (mfi_dcmd_command(fd, MFI_DCMD_CTRL_EVENT_GETINFO, info, + sizeof(struct mfi_evt_log_state), NULL, 0, statusp)); +} + +static int +mfi_get_events(int fd, struct mfi_evt_list *list, int num_events, + union mfi_evt filter, uint32_t start_seq, uint8_t *statusp) +{ + uint32_t mbox[2]; + size_t size; + + mbox[0] = start_seq; + mbox[1] = filter.word; + size = sizeof(struct mfi_evt_list) + sizeof(struct mfi_evt_detail) * + (num_events - 1); + return (mfi_dcmd_command(fd, MFI_DCMD_CTRL_EVENT_GET, list, size, + (uint8_t *)&mbox, sizeof(mbox), statusp)); +} + +static int +show_logstate(int ac, char **av) +{ + struct mfi_evt_log_state info; + int fd; + + if (ac != 1) { + warnx("show logstate: extra arguments"); + return (EINVAL); + } + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + if (mfi_event_get_info(fd, &info, NULL) < 0) { + warn("Failed to get event log info"); + return (errno); + } + + printf("mfi%d Event Log Sequence Numbers:\n", mfi_unit); + printf(" Newest Seq #: %u\n", info.newest_seq_num); + printf(" Oldest Seq #: %u\n", info.oldest_seq_num); + printf(" Clear Seq #: %u\n", info.clear_seq_num); + printf("Shutdown Seq #: %u\n", info.shutdown_seq_num); + printf(" Boot Seq #: %u\n", info.boot_seq_num); + + close(fd); + + return (0); +} +MFI_COMMAND(show, logstate, show_logstate); + +static int +parse_seq(struct mfi_evt_log_state *info, char *arg, uint32_t *seq) +{ + char *cp; + long val; + + if (strcasecmp(arg, "newest") == 0) { + *seq = info->newest_seq_num; + return (0); + } + if (strcasecmp(arg, "oldest") == 0) { + *seq = info->oldest_seq_num; + return (0); + } + if (strcasecmp(arg, "clear") == 0) { + *seq = info->clear_seq_num; + return (0); + } + if (strcasecmp(arg, "shutdown") == 0) { + *seq = info->shutdown_seq_num; + return (0); + } + if (strcasecmp(arg, "boot") == 0) { + *seq = info->boot_seq_num; + return (0); + } + val = strtol(arg, &cp, 0); + if (*cp != '\0' || val < 0) { + errno = EINVAL; + return (-1); + } + *seq = val; + return (0); +} + +static int +parse_locale(char *arg, uint16_t *locale) +{ + char *cp; + long val; + + if (strncasecmp(arg, "vol", 3) == 0 || strcasecmp(arg, "ld") == 0) { + *locale = MFI_EVT_LOCALE_LD; + return (0); + } + if (strncasecmp(arg, "drive", 5) == 0 || strcasecmp(arg, "pd") == 0) { + *locale = MFI_EVT_LOCALE_PD; + return (0); + } + if (strncasecmp(arg, "encl", 4) == 0) { + *locale = MFI_EVT_LOCALE_ENCL; + return (0); + } + if (strncasecmp(arg, "batt", 4) == 0 || + strncasecmp(arg, "bbu", 3) == 0) { + *locale = MFI_EVT_LOCALE_BBU; + return (0); + } + if (strcasecmp(arg, "sas") == 0) { + *locale = MFI_EVT_LOCALE_SAS; + return (0); + } + if (strcasecmp(arg, "ctrl") == 0 || strncasecmp(arg, "cont", 4) == 0) { + *locale = MFI_EVT_LOCALE_CTRL; + return (0); + } + if (strcasecmp(arg, "config") == 0) { + *locale = MFI_EVT_LOCALE_CONFIG; + return (0); + } + if (strcasecmp(arg, "cluster") == 0) { + *locale = MFI_EVT_LOCALE_CLUSTER; + return (0); + } + if (strcasecmp(arg, "all") == 0) { + *locale = MFI_EVT_LOCALE_ALL; + return (0); + } + val = strtol(arg, &cp, 0); + if (*cp != '\0' || val < 0 || val > 0xffff) { + errno = EINVAL; + return (-1); + } + *locale = val; + return (0); +} + +static int +parse_class(char *arg, int8_t *class) +{ + char *cp; + long val; + + if (strcasecmp(arg, "debug") == 0) { + *class = MFI_EVT_CLASS_DEBUG; + return (0); + } + if (strncasecmp(arg, "prog", 4) == 0) { + *class = MFI_EVT_CLASS_PROGRESS; + return (0); + } + if (strncasecmp(arg, "info", 4) == 0) { + *class = MFI_EVT_CLASS_INFO; + return (0); + } + if (strncasecmp(arg, "warn", 4) == 0) { + *class = MFI_EVT_CLASS_WARNING; + return (0); + } + if (strncasecmp(arg, "crit", 4) == 0) { + *class = MFI_EVT_CLASS_CRITICAL; + return (0); + } + if (strcasecmp(arg, "fatal") == 0) { + *class = MFI_EVT_CLASS_FATAL; + return (0); + } + if (strcasecmp(arg, "dead") == 0) { + *class = MFI_EVT_CLASS_DEAD; + return (0); + } + val = strtol(arg, &cp, 0); + if (*cp != '\0' || val < -128 || val > 127) { + errno = EINVAL; + return (-1); + } + *class = val; + return (0); +} + +/* + * The timestamp is the number of seconds since 00:00 Jan 1, 2000. If + * the bits in 24-31 are all set, then it is the number of seconds since + * boot. + */ +static const char * +format_timestamp(uint32_t timestamp) +{ + static char buffer[32]; + static time_t base; + time_t t; + struct tm tm; + + if ((timestamp & 0xff000000) == 0xff000000) { + snprintf(buffer, sizeof(buffer), "boot + %us", timestamp & + 0x00ffffff); + return (buffer); + } + + if (base == 0) { + /* Compute 00:00 Jan 1, 2000 offset. */ + bzero(&tm, sizeof(tm)); + tm.tm_mday = 1; + tm.tm_year = (2000 - 1900); + base = mktime(&tm); + } + if (base == -1) { + snprintf(buffer, sizeof(buffer), "%us", timestamp); + return (buffer); + } + t = base + timestamp; + strftime(buffer, sizeof(buffer), "%+", localtime(&t)); + return (buffer); +} + +static const char * +format_locale(uint16_t locale) +{ + static char buffer[8]; + + switch (locale) { + case MFI_EVT_LOCALE_LD: + return ("VOLUME"); + case MFI_EVT_LOCALE_PD: + return ("DRIVE"); + case MFI_EVT_LOCALE_ENCL: + return ("ENCL"); + case MFI_EVT_LOCALE_BBU: + return ("BATTERY"); + case MFI_EVT_LOCALE_SAS: + return ("SAS"); + case MFI_EVT_LOCALE_CTRL: + return ("CTRL"); + case MFI_EVT_LOCALE_CONFIG: + return ("CONFIG"); + case MFI_EVT_LOCALE_CLUSTER: + return ("CLUSTER"); + case MFI_EVT_LOCALE_ALL: + return ("ALL"); + default: + snprintf(buffer, sizeof(buffer), "0x%04x", locale); + return (buffer); + } +} + +static const char * +format_class(int8_t class) +{ + static char buffer[6]; + + switch (class) { + case MFI_EVT_CLASS_DEBUG: + return ("debug"); + case MFI_EVT_CLASS_PROGRESS: + return ("progress"); + case MFI_EVT_CLASS_INFO: + return ("info"); + case MFI_EVT_CLASS_WARNING: + return ("WARN"); + case MFI_EVT_CLASS_CRITICAL: + return ("CRIT"); + case MFI_EVT_CLASS_FATAL: + return ("FATAL"); + case MFI_EVT_CLASS_DEAD: + return ("DEAD"); + default: + snprintf(buffer, sizeof(buffer), "%d", class); + return (buffer); + } +} + +/* Simulates %D from kernel printf(9). */ +static void +simple_hex(void *ptr, size_t length, const char *separator) +{ + unsigned char *cp; + u_int i; + + if (length == 0) + return; + cp = ptr; + printf("%02x", cp[0]); + for (i = 1; i < length; i++) + printf("%s%02x", separator, cp[i]); +} + +static const char * +pdrive_location(struct mfi_evt_pd *pd) +{ + static char buffer[16]; + + if (pd->enclosure_index == 0) + snprintf(buffer, sizeof(buffer), "%02d(s%d)", pd->device_id, + pd->slot_number); + else + snprintf(buffer, sizeof(buffer), "%02d(e%d/s%d)", pd->device_id, + pd->enclosure_index, pd->slot_number); + return (buffer); +} + +static const char * +volume_name(int fd, struct mfi_evt_ld *ld) +{ + + return (mfi_volume_name(fd, ld->target_id)); +} + +/* Ripped from sys/dev/mfi/mfi.c. */ +static void +mfi_decode_evt(int fd, struct mfi_evt_detail *detail, int verbose) +{ + + printf("%5d (%s/%s/%s) - ", detail->seq, format_timestamp(detail->time), + format_locale(detail->class.members.locale), + format_class(detail->class.members.class)); + switch (detail->arg_type) { + case MR_EVT_ARGS_NONE: + break; + case MR_EVT_ARGS_CDB_SENSE: + if (verbose) { + printf("PD %s CDB ", + pdrive_location(&detail->args.cdb_sense.pd) + ); + simple_hex(detail->args.cdb_sense.cdb, + detail->args.cdb_sense.cdb_len, ":"); + printf(" Sense "); + simple_hex(detail->args.cdb_sense.sense, + detail->args.cdb_sense.sense_len, ":"); + printf(":\n "); + } + break; + case MR_EVT_ARGS_LD: + printf("VOL %s event: ", volume_name(fd, &detail->args.ld)); + break; + case MR_EVT_ARGS_LD_COUNT: + printf("VOL %s", volume_name(fd, &detail->args.ld_count.ld)); + if (verbose) { + printf(" count %lld: ", + (long long)detail->args.ld_count.count); + } + printf(": "); + break; + case MR_EVT_ARGS_LD_LBA: + printf("VOL %s", volume_name(fd, &detail->args.ld_count.ld)); + if (verbose) { + printf(" lba %lld", + (long long)detail->args.ld_lba.lba); + } + printf(": "); + break; + case MR_EVT_ARGS_LD_OWNER: + printf("VOL %s", volume_name(fd, &detail->args.ld_count.ld)); + if (verbose) { + printf(" owner changed: prior %d, new %d", + detail->args.ld_owner.pre_owner, + detail->args.ld_owner.new_owner); + } + printf(": "); + break; + case MR_EVT_ARGS_LD_LBA_PD_LBA: + printf("VOL %s", volume_name(fd, &detail->args.ld_count.ld)); + if (verbose) { + printf(" lba %lld, physical drive PD %s lba %lld", + (long long)detail->args.ld_lba_pd_lba.ld_lba, + pdrive_location(&detail->args.ld_lba_pd_lba.pd), + (long long)detail->args.ld_lba_pd_lba.pd_lba); + } + printf(": "); + break; + case MR_EVT_ARGS_LD_PROG: + printf("VOL %s", volume_name(fd, &detail->args.ld_prog.ld)); + if (verbose) { + printf(" progress %d%% in %ds", + detail->args.ld_prog.prog.progress/655, + detail->args.ld_prog.prog.elapsed_seconds); + } + printf(": "); + break; + case MR_EVT_ARGS_LD_STATE: + printf("VOL %s", volume_name(fd, &detail->args.ld_prog.ld)); + if (verbose) { + printf(" state prior %s new %s", + mfi_ldstate(detail->args.ld_state.prev_state), + mfi_ldstate(detail->args.ld_state.new_state)); + } + printf(": "); + break; + case MR_EVT_ARGS_LD_STRIP: + printf("VOL %s", volume_name(fd, &detail->args.ld_prog.ld)); + if (verbose) { + printf(" strip %lld", + (long long)detail->args.ld_strip.strip); + } + printf(": "); + break; + case MR_EVT_ARGS_PD: + if (verbose) { + printf("PD %s event: ", + pdrive_location(&detail->args.pd)); + } + break; + case MR_EVT_ARGS_PD_ERR: + if (verbose) { + printf("PD %s err %d: ", + pdrive_location(&detail->args.pd_err.pd), + detail->args.pd_err.err); + } + break; + case MR_EVT_ARGS_PD_LBA: + if (verbose) { + printf("PD %s lba %lld: ", + pdrive_location(&detail->args.pd_lba.pd), + (long long)detail->args.pd_lba.lba); + } + break; + case MR_EVT_ARGS_PD_LBA_LD: + if (verbose) { + printf("PD %s lba %lld VOL %s: ", + pdrive_location(&detail->args.pd_lba_ld.pd), + (long long)detail->args.pd_lba.lba, + volume_name(fd, &detail->args.pd_lba_ld.ld)); + } + break; + case MR_EVT_ARGS_PD_PROG: + if (verbose) { + printf("PD %s progress %d%% seconds %ds: ", + pdrive_location(&detail->args.pd_prog.pd), + detail->args.pd_prog.prog.progress/655, + detail->args.pd_prog.prog.elapsed_seconds); + } + break; + case MR_EVT_ARGS_PD_STATE: + if (verbose) { + printf("PD %s state prior %s new %s: ", + pdrive_location(&detail->args.pd_prog.pd), + mfi_pdstate(detail->args.pd_state.prev_state), + mfi_pdstate(detail->args.pd_state.new_state)); + } + break; + case MR_EVT_ARGS_PCI: + if (verbose) { + printf("PCI 0x%04x 0x%04x 0x%04x 0x%04x: ", + detail->args.pci.venderId, + detail->args.pci.deviceId, + detail->args.pci.subVenderId, + detail->args.pci.subDeviceId); + } + break; + case MR_EVT_ARGS_RATE: + if (verbose) { + printf("Rebuild rate %d: ", detail->args.rate); + } + break; + case MR_EVT_ARGS_TIME: + if (verbose) { + printf("Adapter time %s; %d seconds since power on: ", + format_timestamp(detail->args.time.rtc), + detail->args.time.elapsedSeconds); + } + break; + case MR_EVT_ARGS_ECC: + if (verbose) { + printf("Adapter ECC %x,%x: %s: ", + detail->args.ecc.ecar, + detail->args.ecc.elog, + detail->args.ecc.str); + } + break; + default: + if (verbose) { + printf("Type %d: ", detail->arg_type); + } + break; + } + printf("%s\n", detail->description); +} + +static int +show_events(int ac, char **av) +{ + struct mfi_evt_log_state info; + struct mfi_evt_list *list; + union mfi_evt filter; + long val; + char *cp; + ssize_t size; + uint32_t seq, start, stop; + uint8_t status; + int ch, fd, num_events, verbose; + u_int i; + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + if (mfi_event_get_info(fd, &info, NULL) < 0) { + warn("Failed to get event log info"); + return (errno); + } + + /* Default settings. */ + num_events = 15; + filter.members.reserved = 0; + filter.members.locale = MFI_EVT_LOCALE_ALL; + filter.members.class = MFI_EVT_CLASS_WARNING; + start = info.boot_seq_num; + stop = info.newest_seq_num; + verbose = 0; + + /* Parse any options. */ + optind = 1; + while ((ch = getopt(ac, av, "c:l:n:v")) != -1) { + switch (ch) { + case 'c': + if (parse_class(optarg, &filter.members.class) < 0) { + warn("Error parsing event class"); + return (errno); + } + break; + case 'l': + if (parse_locale(optarg, &filter.members.locale) < 0) { + warn("Error parsing event locale"); + return (errno); + } + break; + case 'n': + val = strtol(optarg, &cp, 0); + if (*cp != '\0' || val <= 0) { + warnx("Invalid event count"); + return (EINVAL); + } + num_events = val; + break; + case 'v': + verbose = 1; + break; + case '?': + default: + return (EINVAL); + } + } + ac -= optind; + av += optind; + + /* Determine buffer size and validate it. */ + size = sizeof(struct mfi_evt_list) + sizeof(struct mfi_evt_detail) * + (num_events - 1); + if (size > getpagesize()) { + warnx("Event count is too high"); + return (EINVAL); + } + + /* Handle optional start and stop sequence numbers. */ + if (ac > 2) { + warnx("show events: extra arguments"); + return (EINVAL); + } + if (ac > 0 && parse_seq(&info, av[0], &start) < 0) { + warn("Error parsing starting sequence number"); + return (errno); + } + if (ac > 1 && parse_seq(&info, av[1], &stop) < 0) { + warn("Error parsing ending sequence number"); + return (errno); + } + + list = malloc(size); + for (seq = start;;) { + if (mfi_get_events(fd, list, num_events, filter, seq, + &status) < 0) { + warn("Failed to fetch events"); + return (errno); + } + if (status == MFI_STAT_NOT_FOUND) { + if (seq == start) + warnx("No matching events found"); + break; + } + if (status != MFI_STAT_OK) { + warnx("Error fetching events: %s", mfi_status(status)); + return (EIO); + } + + for (i = 0; i < list->count; i++) { + /* + * If this event is newer than 'stop_seq' then + * break out of the loop. Note that the log + * is a circular buffer so we have to handle + * the case that our stop point is earlier in + * the buffer than our start point. + */ + if (list->event[i].seq >= stop) { + if (start <= stop) + break; + else if (list->event[i].seq < start) + break; + } + mfi_decode_evt(fd, &list->event[i], verbose); + } + + /* + * XXX: If the event's seq # is the end of the buffer + * then this probably won't do the right thing. We + * need to know the size of the buffer somehow. + */ + seq = list->event[list->count - 1].seq + 1; + + } + + free(list); + close(fd); + + return (0); +} +MFI_COMMAND(show, events, show_events); diff --git a/usr.sbin/mfiutil/mfi_flash.c b/usr.sbin/mfiutil/mfi_flash.c new file mode 100644 index 0000000..5dd93f1 --- /dev/null +++ b/usr.sbin/mfiutil/mfi_flash.c @@ -0,0 +1,199 @@ +/*- + * Copyright (c) 2008, 2009 Yahoo!, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The names of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mfiutil.h" + +#define FLASH_BUF_SIZE (64 * 1024) + +int fw_name_width, fw_version_width, fw_date_width, fw_time_width; + +static void +scan_firmware(struct mfi_info_component *comp) +{ + int len; + + len = strlen(comp->name); + if (fw_name_width < len) + fw_name_width = len; + len = strlen(comp->version); + if (fw_version_width < len) + fw_version_width = len; + len = strlen(comp->build_date); + if (fw_date_width < len) + fw_date_width = len; + len = strlen(comp->build_time); + if (fw_time_width < len) + fw_time_width = len; +} + +static void +display_firmware(struct mfi_info_component *comp) +{ + + printf("%-*s %-*s %-*s %-*s\n", fw_name_width, comp->name, + fw_version_width, comp->version, fw_date_width, comp->build_date, + fw_time_width, comp->build_time); +} + +static void +display_pending_firmware(int fd) +{ + struct mfi_ctrl_info info; + struct mfi_info_component header; + u_int i; + + if (mfi_ctrl_get_info(fd, &info, NULL) < 0) { + warn("Failed to get controller info"); + return; + } + + printf("mfi%d Pending Firmware Images:\n", mfi_unit); + strcpy(header.name, "Name"); + strcpy(header.version, "Version"); + strcpy(header.build_date, "Date"); + strcpy(header.build_time, "Time"); + scan_firmware(&header); + if (info.pending_image_component_count > 8) + info.pending_image_component_count = 8; + for (i = 0; i < info.pending_image_component_count; i++) + scan_firmware(&info.pending_image_component[i]); + display_firmware(&header); + for (i = 0; i < info.pending_image_component_count; i++) + display_firmware(&info.pending_image_component[i]); +} + +static void +mbox_store_word(uint8_t *mbox, uint32_t val) +{ + + mbox[0] = val & 0xff; + mbox[1] = val >> 8 & 0xff; + mbox[2] = val >> 16 & 0xff; + mbox[3] = val >> 24; +} + +static int +flash_adapter(int ac, char **av) +{ + struct mfi_progress dummy; + off_t offset; + size_t nread; + char *buf; + struct stat sb; + int fd, flash; + uint8_t mbox[4], status; + + if (ac != 2) { + warnx("flash: Firmware file required"); + return (EINVAL); + } + + flash = open(av[1], O_RDONLY); + if (flash < 0) { + warn("flash: Failed to open %s", av[1]); + return (errno); + } + + if (fstat(flash, &sb) < 0) { + warn("fstat(%s)", av[1]); + return (errno); + } + if (sb.st_size % 1024 != 0 || sb.st_size > 0x7fffffff) { + warnx("Invalid flash file size"); + return (EINVAL); + } + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + /* First, ask the firmware to allocate space for the flash file. */ + mbox_store_word(mbox, sb.st_size); + mfi_dcmd_command(fd, MFI_DCMD_FLASH_FW_OPEN, NULL, 0, mbox, 4, &status); + if (status != MFI_STAT_OK) { + warnx("Failed to alloc flash memory: %s", mfi_status(status)); + return (EIO); + } + + /* Upload the file 64k at a time. */ + buf = malloc(FLASH_BUF_SIZE); + offset = 0; + while (sb.st_size > 0) { + nread = read(flash, buf, FLASH_BUF_SIZE); + if (nread <= 0 || nread % 1024 != 0) { + warnx("Bad read from flash file"); + mfi_dcmd_command(fd, MFI_DCMD_FLASH_FW_CLOSE, NULL, 0, + NULL, 0, NULL); + return (ENXIO); + } + + mbox_store_word(mbox, offset); + mfi_dcmd_command(fd, MFI_DCMD_FLASH_FW_DOWNLOAD, buf, nread, + mbox, 4, &status); + if (status != MFI_STAT_OK) { + warnx("Flash download failed: %s", mfi_status(status)); + mfi_dcmd_command(fd, MFI_DCMD_FLASH_FW_CLOSE, NULL, 0, + NULL, 0, NULL); + return (ENXIO); + } + sb.st_size -= nread; + offset += nread; + } + close(flash); + + /* Kick off the flash. */ + printf("WARNING: Firmware flash in progress, do not reboot machine... "); + fflush(stdout); + mfi_dcmd_command(fd, MFI_DCMD_FLASH_FW_FLASH, &dummy, sizeof(dummy), + NULL, 0, &status); + if (status != MFI_STAT_OK) { + printf("failed:\n\t%s\n", mfi_status(status)); + return (ENXIO); + } + printf("finished\n"); + display_pending_firmware(fd); + + close(fd); + + return (0); +} +MFI_COMMAND(top, flash, flash_adapter); diff --git a/usr.sbin/mfiutil/mfi_patrol.c b/usr.sbin/mfiutil/mfi_patrol.c new file mode 100644 index 0000000..b8da2ae --- /dev/null +++ b/usr.sbin/mfiutil/mfi_patrol.c @@ -0,0 +1,305 @@ +/*- + * Copyright (c) 2008, 2009 Yahoo!, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The names of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "mfiutil.h" + +static char * +adapter_time(time_t now, uint32_t at_now, uint32_t at) +{ + time_t t; + + t = (now - at_now) + at; + return (ctime(&t)); +} + +static void +mfi_get_time(int fd, uint32_t *at) +{ + + if (mfi_dcmd_command(fd, MFI_DCMD_TIME_SECS_GET, at, sizeof(*at), NULL, + 0, NULL) < 0) { + warn("Couldn't fetch adapter time"); + at = 0; + } +} + +static int +patrol_get_props(int fd, struct mfi_pr_properties *prop) +{ + + if (mfi_dcmd_command(fd, MFI_DCMD_PR_GET_PROPERTIES, prop, + sizeof(*prop), NULL, 0, NULL) < 0) { + warn("Failed to get patrol read properties"); + return (-1); + } + return (0); +} + +static int +show_patrol(int ac, char **av) +{ + struct mfi_pr_properties prop; + struct mfi_pr_status status; + struct mfi_pd_list *list; + struct mfi_pd_info info; + char label[16]; + time_t now; + uint32_t at; + int fd; + u_int i; + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + time(&now); + mfi_get_time(fd, &at); + if (patrol_get_props(fd, &prop) < 0) + return (errno); + printf("Operation Mode: "); + switch (prop.op_mode) { + case MFI_PR_OPMODE_AUTO: + printf("auto\n"); + break; + case MFI_PR_OPMODE_MANUAL: + printf("manual\n"); + break; + case MFI_PR_OPMODE_DISABLED: + printf("disabled\n"); + break; + default: + printf("??? (%02x)\n", prop.op_mode); + break; + } + if (prop.op_mode == MFI_PR_OPMODE_AUTO) { + if (at != 0 && prop.next_exec) + printf(" Next Run Starts: %s", adapter_time(now, at, + prop.next_exec)); + if (prop.exec_freq == 0xffffffff) + printf(" Runs Execute Continuously\n"); + else if (prop.exec_freq != 0) + printf(" Runs Start Every %u seconds\n", + prop.exec_freq); + } + + if (mfi_dcmd_command(fd, MFI_DCMD_PR_GET_STATUS, &status, + sizeof(status), NULL, 0, NULL) < 0) { + warn("Failed to get patrol read properties"); + return (errno); + } + printf("Runs Completed: %u\n", status.num_iteration); + printf("Current State: "); + switch (status.state) { + case MFI_PR_STATE_STOPPED: + printf("stopped\n"); + break; + case MFI_PR_STATE_READY: + printf("ready\n"); + break; + case MFI_PR_STATE_ACTIVE: + printf("active\n"); + break; + case MFI_PR_STATE_ABORTED: + printf("aborted\n"); + break; + default: + printf("??? (%02x)\n", status.state); + break; + } + if (status.state == MFI_PR_STATE_ACTIVE) { + if (mfi_pd_get_list(fd, &list, NULL) < 0) { + warn("Failed to get drive list"); + return (errno); + } + + for (i = 0; i < list->count; i++) { + if (list->addr[i].scsi_dev_type != 0) + continue; + + if (mfi_pd_get_info(fd, list->addr[i].device_id, &info, + NULL) < 0) { + warn("Failed to fetch info for drive %u", + list->addr[i].device_id); + return (errno); + } + if (info.prog_info.active & MFI_PD_PROGRESS_PATROL) { + snprintf(label, sizeof(label), " Drive %u", + list->addr[i].device_id); + mfi_display_progress(label, + &info.prog_info.patrol); + } + } + } + + close(fd); + + return (0); +} +MFI_COMMAND(show, patrol, show_patrol); + +static int +start_patrol(int ac, char **av) +{ + int fd; + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + if (mfi_dcmd_command(fd, MFI_DCMD_PR_START, NULL, 0, NULL, 0, NULL) < + 0) { + warn("Failed to start patrol read"); + return (errno); + } + + close(fd); + + return (0); +} +MFI_COMMAND(start, patrol, start_patrol); + +static int +stop_patrol(int ac, char **av) +{ + int fd; + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + if (mfi_dcmd_command(fd, MFI_DCMD_PR_STOP, NULL, 0, NULL, 0, NULL) < + 0) { + warn("Failed to stop patrol read"); + return (errno); + } + + close(fd); + + return (0); +} +MFI_COMMAND(stop, patrol, stop_patrol); + +static int +patrol_config(int ac, char **av) +{ + struct mfi_pr_properties prop; + long val; + time_t now; + uint32_t at, next_exec, exec_freq; + char *cp; + uint8_t op_mode; + int fd; + + exec_freq = 0; /* GCC too stupid */ + next_exec = 0; + if (ac < 2) { + warnx("patrol: command required"); + return (EINVAL); + } + if (strcasecmp(av[1], "auto") == 0) { + op_mode = MFI_PR_OPMODE_AUTO; + if (ac > 2) { + if (strcasecmp(av[2], "continously") == 0) + exec_freq = 0xffffffff; + else { + val = strtol(av[2], &cp, 0); + if (*cp != '\0') { + warnx("patrol: Invalid interval %s", + av[2]); + return (EINVAL); + } + exec_freq = val; + } + } + if (ac > 3) { + val = strtol(av[3], &cp, 0); + if (*cp != '\0' || val < 0) { + warnx("patrol: Invalid start time %s", av[3]); + return (EINVAL); + } + next_exec = val; + } + } else if (strcasecmp(av[1], "manual") == 0) + op_mode = MFI_PR_OPMODE_MANUAL; + else if (strcasecmp(av[1], "disable") == 0) + op_mode = MFI_PR_OPMODE_DISABLED; + else { + warnx("patrol: Invalid command %s", av[1]); + return (EINVAL); + } + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + if (patrol_get_props(fd, &prop) < 0) + return (errno); + prop.op_mode = op_mode; + if (op_mode == MFI_PR_OPMODE_AUTO) { + if (ac > 2) + prop.exec_freq = exec_freq; + if (ac > 3) { + time(&now); + mfi_get_time(fd, &at); + if (at == 0) + return (ENXIO); + prop.next_exec = at + next_exec; + printf("Starting next patrol read at %s", + adapter_time(now, at, prop.next_exec)); + } + } + if (mfi_dcmd_command(fd, MFI_DCMD_PR_SET_PROPERTIES, &prop, + sizeof(prop), NULL, 0, NULL) < 0) { + warn("Failed to set patrol read properties"); + return (errno); + } + + close(fd); + + return (0); +} +MFI_COMMAND(top, patrol, patrol_config); diff --git a/usr.sbin/mfiutil/mfi_show.c b/usr.sbin/mfiutil/mfi_show.c new file mode 100644 index 0000000..abd1110 --- /dev/null +++ b/usr.sbin/mfiutil/mfi_show.c @@ -0,0 +1,560 @@ +/*- + * Copyright (c) 2008, 2009 Yahoo!, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The names of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "mfiutil.h" + +MFI_TABLE(top, show); + +static void +format_stripe(char *buf, size_t buflen, uint8_t stripe) +{ + + humanize_number(buf, buflen, (1 << stripe) * 512, "", HN_AUTOSCALE, + HN_B | HN_NOSPACE); +} + +static int +show_adapter(int ac, char **av) +{ + struct mfi_ctrl_info info; + char stripe[5]; + int fd, comma; + + if (ac != 1) { + warnx("show adapter: extra arguments"); + return (EINVAL); + } + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + if (mfi_ctrl_get_info(fd, &info, NULL) < 0) { + warn("Failed to get controller info"); + return (errno); + } + printf("mfi%d Adapter:\n", mfi_unit); + printf(" Product Name: %.80s\n", info.product_name); + printf(" Serial Number: %.32s\n", info.serial_number); + if (info.package_version[0] != '\0') + printf(" Firmware: %s\n", info.package_version); + printf(" RAID Levels:"); +#ifdef DEBUG + printf(" (%#x)", info.raid_levels); +#endif + comma = 0; + if (info.raid_levels & MFI_INFO_RAID_0) { + printf(" JBOD, RAID0"); + comma = 1; + } + if (info.raid_levels & MFI_INFO_RAID_1) { + printf("%s RAID1", comma ? "," : ""); + comma = 1; + } + if (info.raid_levels & MFI_INFO_RAID_5) { + printf("%s RAID5", comma ? "," : ""); + comma = 1; + } + if (info.raid_levels & MFI_INFO_RAID_1E) { + printf("%s RAID1E", comma ? "," : ""); + comma = 1; + } + if (info.raid_levels & MFI_INFO_RAID_6) { + printf("%s RAID6", comma ? "," : ""); + comma = 1; + } + if ((info.raid_levels & (MFI_INFO_RAID_0 | MFI_INFO_RAID_1)) == + (MFI_INFO_RAID_0 | MFI_INFO_RAID_1)) { + printf("%s RAID10", comma ? "," : ""); + comma = 1; + } + if ((info.raid_levels & (MFI_INFO_RAID_0 | MFI_INFO_RAID_5)) == + (MFI_INFO_RAID_0 | MFI_INFO_RAID_5)) { + printf("%s RAID50", comma ? "," : ""); + comma = 1; + } + printf("\n"); + printf(" Battery Backup: "); + if (info.hw_present & MFI_INFO_HW_BBU) + printf("present\n"); + else + printf("not present\n"); + if (info.hw_present & MFI_INFO_HW_NVRAM) + printf(" NVRAM: %uK\n", info.nvram_size); + printf(" Onboard Memory: %uM\n", info.memory_size); + format_stripe(stripe, sizeof(stripe), info.stripe_sz_ops.min); + printf(" Minimum Stripe: %s\n", stripe); + format_stripe(stripe, sizeof(stripe), info.stripe_sz_ops.max); + printf(" Maximum Stripe: %s\n", stripe); + + close(fd); + + return (0); +} +MFI_COMMAND(show, adapter, show_adapter); + +static int +show_battery(int ac, char **av) +{ + struct mfi_bbu_capacity_info cap; + struct mfi_bbu_design_info design; + uint8_t status; + int fd; + + if (ac != 1) { + warnx("show battery: extra arguments"); + return (EINVAL); + } + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + if (mfi_dcmd_command(fd, MFI_DCMD_BBU_GET_CAPACITY_INFO, &cap, + sizeof(cap), NULL, 0, &status) < 0) { + if (status == MFI_STAT_NO_HW_PRESENT) { + printf("mfi%d: No battery present\n", mfi_unit); + return (0); + } + warn("Failed to get capacity info"); + return (errno); + } + + if (mfi_dcmd_command(fd, MFI_DCMD_BBU_GET_DESIGN_INFO, &design, + sizeof(design), NULL, 0, NULL) < 0) { + warn("Failed to get design info"); + return (errno); + } + + printf("mfi%d: Battery State:\n", mfi_unit); + printf(" Manufacture Date: %d/%d/%d\n", design.mfg_date >> 5 & 0x0f, + design.mfg_date & 0x1f, design.mfg_date >> 9 & 0xffff); + printf(" Serial Number: %d\n", design.serial_number); + printf(" Manufacturer: %s\n", design.mfg_name); + printf(" Model: %s\n", design.device_name); + printf(" Chemistry: %s\n", design.device_chemistry); + printf(" Design Capacity: %d mAh\n", design.design_capacity); + printf(" Design Voltage: %d mV\n", design.design_voltage); + printf(" Current Charge: %d%%\n", cap.relative_charge); + + close(fd); + + return (0); +} +MFI_COMMAND(show, battery, show_battery); + +static void +print_ld(struct mfi_ld_info *info, int state_len) +{ + struct mfi_ld_params *params = &info->ld_config.params; + const char *level; + char size[6], stripe[5]; + + humanize_number(size, sizeof(size), info->size * 512, + "", HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL); + format_stripe(stripe, sizeof(stripe), + info->ld_config.params.stripe_size); + level = mfi_raid_level(params->primary_raid_level, + params->secondary_raid_level); + if (state_len > 0) + printf("(%6s) %-8s %6s %-*s", size, level, stripe, state_len, + mfi_ldstate(params->state)); + else + printf("(%s) %s %s %s", size, level, stripe, + mfi_ldstate(params->state)); +} + +static void +print_pd(struct mfi_pd_info *info, int state_len, int location) +{ + const char *s; + char buf[6]; + + humanize_number(buf, sizeof(buf), info->raw_size * 512, "", + HN_AUTOSCALE, HN_B | HN_NOSPACE |HN_DECIMAL); + printf("(%6s) ", buf); + if (state_len > 0) + printf("%-*s", state_len, mfi_pdstate(info->fw_state)); + else + printf("%s", mfi_pdstate(info->fw_state)); + s = mfi_pd_inq_string(info); + if (s != NULL) + printf(" %s", s); + if (!location) + return; + if (info->encl_device_id == 0xffff) + printf(" slot %d", info->slot_number); + else if (info->encl_device_id == info->ref.v.device_id) + printf(" enclosure %d", info->encl_index); + else + printf(" enclosure %d, slot %d", info->encl_index, + info->slot_number); +} + +static int +show_config(int ac, char **av) +{ + struct mfi_config_data *config; + struct mfi_array *ar; + struct mfi_ld_config *ld; + struct mfi_spare *sp; + struct mfi_ld_info linfo; + struct mfi_pd_info pinfo; + uint16_t device_id; + char *p; + int fd, i, j; + + if (ac != 1) { + warnx("show config: extra arguments"); + return (EINVAL); + } + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + /* Get the config from the controller. */ + if (mfi_config_read(fd, &config) < 0) { + warn("Failed to get config"); + return (errno); + } + + /* Dump out the configuration. */ + printf("mfi%d Configuration: %d arrays, %d volumes, %d spares\n", + mfi_unit, config->array_count, config->log_drv_count, + config->spares_count); + p = (char *)config->array; + + for (i = 0; i < config->array_count; i++) { + ar = (struct mfi_array *)p; + printf(" array %u of %u drives:\n", ar->array_ref, + ar->num_drives); + for (j = 0; j < ar->num_drives; j++) { + device_id = ar->pd[j].ref.v.device_id; + if (device_id == 0xffff) + printf(" drive MISSING\n"); + else { + printf(" drive %u ", device_id); + if (mfi_pd_get_info(fd, device_id, &pinfo, + NULL) < 0) + printf("%s", + mfi_pdstate(ar->pd[j].fw_state)); + else + print_pd(&pinfo, -1, 1); + printf("\n"); + } + } + p += config->array_size; + } + + for (i = 0; i < config->log_drv_count; i++) { + ld = (struct mfi_ld_config *)p; + printf(" volume %s ", + mfi_volume_name(fd, ld->properties.ld.v.target_id)); + if (mfi_ld_get_info(fd, ld->properties.ld.v.target_id, &linfo, + NULL) < 0) { + printf("%s %s", + mfi_raid_level(ld->params.primary_raid_level, + ld->params.secondary_raid_level), + mfi_ldstate(ld->params.state)); + } else + print_ld(&linfo, -1); + if (ld->properties.name[0] != '\0') + printf(" <%s>", ld->properties.name); + printf(" spans:\n"); + for (j = 0; j < ld->params.span_depth; j++) + printf(" array %u\n", ld->span[j].array_ref); + p += config->log_drv_size; + } + + for (i = 0; i < config->spares_count; i++) { + sp = (struct mfi_spare *)p; + printf(" %s spare %u ", + sp->spare_type & MFI_SPARE_DEDICATED ? "dedicated" : + "global", sp->ref.v.device_id); + if (mfi_pd_get_info(fd, sp->ref.v.device_id, &pinfo, NULL) < 0) + printf("%s", mfi_pdstate(MFI_PD_STATE_HOT_SPARE)); + else + print_pd(&pinfo, -1, 1); + if (sp->spare_type & MFI_SPARE_DEDICATED) { + printf(" backs:\n"); + for (j = 0; j < sp->array_count; j++) + printf(" array %u\n", sp->array_ref[j]); + } else + printf("\n"); + p += config->spares_size; + } + close(fd); + + return (0); +} +MFI_COMMAND(show, config, show_config); + +static int +show_volumes(int ac, char **av) +{ + struct mfi_ld_list list; + struct mfi_ld_info info; + u_int i, len, state_len; + int fd; + + if (ac != 1) { + warnx("show volumes: extra arguments"); + return (EINVAL); + } + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + /* Get the logical drive list from the controller. */ + if (mfi_ld_get_list(fd, &list, NULL) < 0) { + warn("Failed to get volume list"); + return (errno); + } + + /* List the volumes. */ + printf("mfi%d Volumes:\n", mfi_unit); + state_len = strlen("State"); + for (i = 0; i < list.ld_count; i++) { + len = strlen(mfi_ldstate(list.ld_list[i].state)); + if (len > state_len) + state_len = len; + } + printf(" Id Size Level Stripe "); + len = state_len - strlen("State"); + for (i = 0; i < (len + 1) / 2; i++) + printf(" "); + printf("State"); + for (i = 0; i < len / 2; i++) + printf(" "); + printf(" Cache Name\n"); + for (i = 0; i < list.ld_count; i++) { + if (mfi_ld_get_info(fd, list.ld_list[i].ld.v.target_id, &info, + NULL) < 0) { + warn("Failed to get info for volume %d", + list.ld_list[i].ld.v.target_id); + return (errno); + } + printf("%6s ", + mfi_volume_name(fd, list.ld_list[i].ld.v.target_id)); + print_ld(&info, state_len); + switch (info.ld_config.properties.current_cache_policy & + (MR_LD_CACHE_ALLOW_WRITE_CACHE | + MR_LD_CACHE_ALLOW_READ_CACHE)) { + case 0: + printf(" Disabled"); + break; + case MR_LD_CACHE_ALLOW_READ_CACHE: + printf(" Reads "); + break; + case MR_LD_CACHE_ALLOW_WRITE_CACHE: + printf(" Writes "); + break; + case MR_LD_CACHE_ALLOW_WRITE_CACHE | + MR_LD_CACHE_ALLOW_READ_CACHE: + printf(" Enabled "); + break; + } + if (info.ld_config.properties.name[0] != '\0') + printf(" <%s>", info.ld_config.properties.name); + printf("\n"); + } + close(fd); + + return (0); +} +MFI_COMMAND(show, volumes, show_volumes); + +static int +show_drives(int ac, char **av) +{ + struct mfi_pd_list *list; + struct mfi_pd_info info; + u_int i, len, state_len; + int fd; + + if (ac != 1) { + warnx("show drives: extra arguments"); + return (EINVAL); + } + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + if (mfi_pd_get_list(fd, &list, NULL) < 0) { + warn("Failed to get drive list"); + return (errno); + } + + /* Walk the list of drives to determine width of state column. */ + state_len = 0; + for (i = 0; i < list->count; i++) { + if (list->addr[i].scsi_dev_type != 0) + continue; + + if (mfi_pd_get_info(fd, list->addr[i].device_id, &info, + NULL) < 0) { + warn("Failed to fetch info for drive %u", + list->addr[i].device_id); + return (errno); + } + len = strlen(mfi_pdstate(info.fw_state)); + if (len > state_len) + state_len = len; + } + + /* List the drives. */ + printf("mfi%d Physical Drives:\n", mfi_unit); + for (i = 0; i < list->count; i++) { + + /* Skip non-hard disks. */ + if (list->addr[i].scsi_dev_type != 0) + continue; + + /* Fetch details for this drive. */ + if (mfi_pd_get_info(fd, list->addr[i].device_id, &info, + NULL) < 0) { + warn("Failed to fetch info for drive %u", + list->addr[i].device_id); + return (errno); + } + + print_pd(&info, state_len, 1); + printf("\n"); + } + close(fd); + + return (0); +} +MFI_COMMAND(show, drives, show_drives); + +int fw_name_width, fw_version_width, fw_date_width, fw_time_width; + +static void +scan_firmware(struct mfi_info_component *comp) +{ + int len; + + len = strlen(comp->name); + if (fw_name_width < len) + fw_name_width = len; + len = strlen(comp->version); + if (fw_version_width < len) + fw_version_width = len; + len = strlen(comp->build_date); + if (fw_date_width < len) + fw_date_width = len; + len = strlen(comp->build_time); + if (fw_time_width < len) + fw_time_width = len; +} + +static void +display_firmware(struct mfi_info_component *comp, const char *tag) +{ + + printf("%-*s %-*s %-*s %-*s %s\n", fw_name_width, comp->name, + fw_version_width, comp->version, fw_date_width, comp->build_date, + fw_time_width, comp->build_time, tag); +} + +static int +show_firmware(int ac, char **av) +{ + struct mfi_ctrl_info info; + struct mfi_info_component header; + int fd; + u_int i; + + if (ac != 1) { + warnx("show drives: extra arguments"); + return (EINVAL); + } + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + if (mfi_ctrl_get_info(fd, &info, NULL) < 0) { + warn("Failed to get controller info"); + return (errno); + } + + if (info.package_version[0] != '\0') + printf("mfi%d Firmware Package Version: %s\n", mfi_unit, + info.package_version); + printf("mfi%d Firmware Images:\n", mfi_unit); + strcpy(header.name, "Name"); + strcpy(header.version, "Version"); + strcpy(header.build_date, "Date"); + strcpy(header.build_time, "Time"); + scan_firmware(&header); + if (info.image_component_count > 8) + info.image_component_count = 8; + for (i = 0; i < info.image_component_count; i++) + scan_firmware(&info.image_component[i]); + if (info.pending_image_component_count > 8) + info.pending_image_component_count = 8; + for (i = 0; i < info.pending_image_component_count; i++) + scan_firmware(&info.pending_image_component[i]); + display_firmware(&header, "Status"); + for (i = 0; i < info.image_component_count; i++) + display_firmware(&info.image_component[i], "active"); + for (i = 0; i < info.pending_image_component_count; i++) + display_firmware(&info.pending_image_component[i], "pending"); + + close(fd); + + return (0); +} +MFI_COMMAND(show, firmware, show_firmware); diff --git a/usr.sbin/mfiutil/mfi_volume.c b/usr.sbin/mfiutil/mfi_volume.c new file mode 100644 index 0000000..b3f4a76 --- /dev/null +++ b/usr.sbin/mfiutil/mfi_volume.c @@ -0,0 +1,412 @@ +/*- + * Copyright (c) 2008, 2009 Yahoo!, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The names of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "mfiutil.h" + +MFI_TABLE(top, volume); + +const char * +mfi_ldstate(enum mfi_ld_state state) +{ + static char buf[16]; + + switch (state) { + case MFI_LD_STATE_OFFLINE: + return ("OFFLINE"); + case MFI_LD_STATE_PARTIALLY_DEGRADED: + return ("PARTIALLY DEGRADED"); + case MFI_LD_STATE_DEGRADED: + return ("DEGRADED"); + case MFI_LD_STATE_OPTIMAL: + return ("OPTIMAL"); + default: + sprintf(buf, "LSTATE 0x%02x", state); + return (buf); + } +} + +void +mbox_store_ldref(uint8_t *mbox, union mfi_ld_ref *ref) +{ + + mbox[0] = ref->v.target_id; + mbox[1] = ref->v.reserved; + mbox[2] = ref->v.seq & 0xff; + mbox[3] = ref->v.seq >> 8; +} + +int +mfi_ld_get_list(int fd, struct mfi_ld_list *list, uint8_t *statusp) +{ + + return (mfi_dcmd_command(fd, MFI_DCMD_LD_GET_LIST, list, + sizeof(struct mfi_ld_list), NULL, 0, statusp)); +} + +int +mfi_ld_get_info(int fd, uint8_t target_id, struct mfi_ld_info *info, + uint8_t *statusp) +{ + uint8_t mbox[1]; + + mbox[0] = target_id; + return (mfi_dcmd_command(fd, MFI_DCMD_LD_GET_INFO, info, + sizeof(struct mfi_ld_info), mbox, 1, statusp)); +} + +static int +mfi_ld_get_props(int fd, uint8_t target_id, struct mfi_ld_props *props) +{ + uint8_t mbox[1]; + + mbox[0] = target_id; + return (mfi_dcmd_command(fd, MFI_DCMD_LD_GET_PROP, props, + sizeof(struct mfi_ld_props), mbox, 1, NULL)); +} + +static int +mfi_ld_set_props(int fd, struct mfi_ld_props *props) +{ + uint8_t mbox[4]; + + mbox_store_ldref(mbox, &props->ld); + return (mfi_dcmd_command(fd, MFI_DCMD_LD_SET_PROP, props, + sizeof(struct mfi_ld_props), mbox, 4, NULL)); +} + +static int +update_cache_policy(int fd, struct mfi_ld_props *props, uint8_t new_policy, + uint8_t mask) +{ + uint8_t changes, policy; + + policy = (props->default_cache_policy & ~mask) | new_policy; + if (policy == props->default_cache_policy) + return (0); + changes = policy ^ props->default_cache_policy; + if (changes & MR_LD_CACHE_ALLOW_WRITE_CACHE) + printf("%s caching of I/O writes\n", + policy & MR_LD_CACHE_ALLOW_WRITE_CACHE ? "Enabling" : + "Disabling"); + if (changes & MR_LD_CACHE_ALLOW_READ_CACHE) + printf("%s caching of I/O reads\n", + policy & MR_LD_CACHE_ALLOW_READ_CACHE ? "Enabling" : + "Disabling"); + if (changes & MR_LD_CACHE_WRITE_BACK) + printf("Setting write cache policy to %s\n", + policy & MR_LD_CACHE_WRITE_BACK ? "write-back" : + "write-through"); + if (changes & (MR_LD_CACHE_READ_AHEAD | MR_LD_CACHE_READ_ADAPTIVE)) + printf("Setting read ahead policy to %s\n", + policy & MR_LD_CACHE_READ_AHEAD ? + (policy & MR_LD_CACHE_READ_ADAPTIVE ? + "adaptive" : "always") : "none"); + + props->default_cache_policy = policy; + if (mfi_ld_set_props(fd, props) < 0) { + warn("Failed to set volume properties"); + return (errno); + } + return (0); +} + +static int +volume_cache(int ac, char **av) +{ + struct mfi_ld_props props; + int error, fd; + uint8_t target_id, policy; + + if (ac < 2) { + warnx("cache: volume required"); + return (EINVAL); + } + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + if (mfi_lookup_volume(fd, av[1], &target_id) < 0) { + warn("Invalid volume: %s", av[1]); + return (errno); + } + + if (mfi_ld_get_props(fd, target_id, &props) < 0) { + warn("Failed to fetch volume properties"); + return (errno); + } + + if (ac == 2) { + printf("mfi%u volume %s cache settings:\n", mfi_unit, + mfi_volume_name(fd, target_id)); + printf(" I/O caching: "); + switch (props.default_cache_policy & + (MR_LD_CACHE_ALLOW_WRITE_CACHE | + MR_LD_CACHE_ALLOW_READ_CACHE)) { + case 0: + printf("disabled\n"); + break; + case MR_LD_CACHE_ALLOW_WRITE_CACHE: + printf("writes\n"); + break; + case MR_LD_CACHE_ALLOW_READ_CACHE: + printf("reads\n"); + break; + case MR_LD_CACHE_ALLOW_WRITE_CACHE | + MR_LD_CACHE_ALLOW_READ_CACHE: + printf("writes and reads\n"); + break; + } + printf(" write caching: %s\n", + props.default_cache_policy & MR_LD_CACHE_WRITE_BACK ? + "write-back" : "write-through"); + printf(" read ahead: %s\n", + props.default_cache_policy & MR_LD_CACHE_READ_AHEAD ? + (props.default_cache_policy & MR_LD_CACHE_READ_ADAPTIVE ? + "adaptive" : "always") : "none"); + printf("drive write cache: "); + switch (props.disk_cache_policy) { + case MR_PD_CACHE_UNCHANGED: + printf("default\n"); + break; + case MR_PD_CACHE_ENABLE: + printf("enabled\n"); + break; + case MR_PD_CACHE_DISABLE: + printf("disabled\n"); + break; + default: + printf("??? %d\n", props.disk_cache_policy); + break; + } + if (props.default_cache_policy != props.current_cache_policy) + printf("Cache Disabled Due to Dead Battery\n"); + error = 0; + } else { + if (strcmp(av[2], "all") == 0 || strcmp(av[2], "enable") == 0) + error = update_cache_policy(fd, &props, + MR_LD_CACHE_ALLOW_READ_CACHE | + MR_LD_CACHE_ALLOW_WRITE_CACHE, + MR_LD_CACHE_ALLOW_READ_CACHE | + MR_LD_CACHE_ALLOW_WRITE_CACHE); + else if (strcmp(av[2], "none") == 0 || + strcmp(av[2], "disable") == 0) + error = update_cache_policy(fd, &props, 0, + MR_LD_CACHE_ALLOW_READ_CACHE | + MR_LD_CACHE_ALLOW_WRITE_CACHE); + else if (strcmp(av[2], "reads") == 0) + error = update_cache_policy(fd, &props, + MR_LD_CACHE_ALLOW_READ_CACHE, + MR_LD_CACHE_ALLOW_READ_CACHE | + MR_LD_CACHE_ALLOW_WRITE_CACHE); + else if (strcmp(av[2], "writes") == 0) + error = update_cache_policy(fd, &props, + MR_LD_CACHE_ALLOW_WRITE_CACHE, + MR_LD_CACHE_ALLOW_READ_CACHE | + MR_LD_CACHE_ALLOW_WRITE_CACHE); + else if (strcmp(av[2], "write-back") == 0) + error = update_cache_policy(fd, &props, + MR_LD_CACHE_WRITE_BACK, + MR_LD_CACHE_WRITE_BACK); + else if (strcmp(av[2], "write-through") == 0) + error = update_cache_policy(fd, &props, 0, + MR_LD_CACHE_WRITE_BACK); + else if (strcmp(av[2], "read-ahead") == 0) { + if (ac < 4) { + warnx("cache: read-ahead setting required"); + return (EINVAL); + } + if (strcmp(av[3], "none") == 0) + policy = 0; + else if (strcmp(av[3], "always") == 0) + policy = MR_LD_CACHE_READ_AHEAD; + else if (strcmp(av[3], "adaptive") == 0) + policy = MR_LD_CACHE_READ_AHEAD | + MR_LD_CACHE_READ_ADAPTIVE; + else { + warnx("cache: invalid read-ahead setting"); + return (EINVAL); + } + error = update_cache_policy(fd, &props, policy, + MR_LD_CACHE_READ_AHEAD | + MR_LD_CACHE_READ_ADAPTIVE); + } else if (strcmp(av[2], "write-cache") == 0) { + if (ac < 4) { + warnx("cache: write-cache setting required"); + return (EINVAL); + } + if (strcmp(av[3], "enable") == 0) + policy = MR_PD_CACHE_ENABLE; + else if (strcmp(av[3], "disable") == 0) + policy = MR_PD_CACHE_DISABLE; + else if (strcmp(av[3], "default") == 0) + policy = MR_PD_CACHE_UNCHANGED; + else { + warnx("cache: invalid write-cache setting"); + return (EINVAL); + } + error = 0; + if (policy != props.disk_cache_policy) { + switch (policy) { + case MR_PD_CACHE_ENABLE: + printf("Enabling write-cache on physical drives\n"); + break; + case MR_PD_CACHE_DISABLE: + printf("Disabling write-cache on physical drives\n"); + break; + case MR_PD_CACHE_UNCHANGED: + printf("Using default write-cache setting on physical drives\n"); + break; + } + props.disk_cache_policy = policy; + if (mfi_ld_set_props(fd, &props) < 0) { + warn("Failed to set volume properties"); + error = errno; + } + } + } else { + warnx("cache: Invalid command"); + return (EINVAL); + } + } + close(fd); + + return (error); +} +MFI_COMMAND(top, cache, volume_cache); + +static int +volume_name(int ac, char **av) +{ + struct mfi_ld_props props; + int fd; + uint8_t target_id; + + if (ac != 3) { + warnx("name: volume and name required"); + return (EINVAL); + } + + if (strlen(av[2]) >= sizeof(props.name)) { + warnx("name: new name is too long"); + return (ENOSPC); + } + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + if (mfi_lookup_volume(fd, av[1], &target_id) < 0) { + warn("Invalid volume: %s", av[1]); + return (errno); + } + + if (mfi_ld_get_props(fd, target_id, &props) < 0) { + warn("Failed to fetch volume properties"); + return (errno); + } + + printf("mfi%u volume %s name changed from \"%s\" to \"%s\"\n", mfi_unit, + mfi_volume_name(fd, target_id), props.name, av[2]); + bzero(props.name, sizeof(props.name)); + strcpy(props.name, av[2]); + if (mfi_ld_set_props(fd, &props) < 0) { + warn("Failed to set volume properties"); + return (errno); + } + + close(fd); + + return (0); +} +MFI_COMMAND(top, name, volume_name); + +static int +volume_progress(int ac, char **av) +{ + struct mfi_ld_info info; + int fd; + uint8_t target_id; + + if (ac != 2) { + warnx("volume progress: %s", ac > 2 ? "extra arguments" : + "volume required"); + return (EINVAL); + } + + fd = mfi_open(mfi_unit); + if (fd < 0) { + warn("mfi_open"); + return (errno); + } + + if (mfi_lookup_volume(fd, av[1], &target_id) < 0) { + warn("Invalid volume: %s", av[1]); + return (errno); + } + + /* Get the info for this drive. */ + if (mfi_ld_get_info(fd, target_id, &info, NULL) < 0) { + warn("Failed to fetch info for volume %s", + mfi_volume_name(fd, target_id)); + return (errno); + } + + /* Display any of the active events. */ + if (info.progress.active & MFI_LD_PROGRESS_CC) + mfi_display_progress("Consistency Check", &info.progress.cc); + if (info.progress.active & MFI_LD_PROGRESS_BGI) + mfi_display_progress("Background Init", &info.progress.bgi); + if (info.progress.active & MFI_LD_PROGRESS_FGI) + mfi_display_progress("Foreground Init", &info.progress.fgi); + if (info.progress.active & MFI_LD_PROGRESS_RECON) + mfi_display_progress("Reconstruction", &info.progress.recon); + if ((info.progress.active & (MFI_LD_PROGRESS_CC | MFI_LD_PROGRESS_BGI | + MFI_LD_PROGRESS_FGI | MFI_LD_PROGRESS_RECON)) == 0) + printf("No activity in progress for volume %s.\n", + mfi_volume_name(fd, target_id)); + close(fd); + + return (0); +} +MFI_COMMAND(volume, progress, volume_progress); diff --git a/usr.sbin/mfiutil/mfiutil.1 b/usr.sbin/mfiutil/mfiutil.1 new file mode 100644 index 0000000..d7860ab --- /dev/null +++ b/usr.sbin/mfiutil/mfiutil.1 @@ -0,0 +1,574 @@ +.\" Copyright (c) 2008, 2009 Yahoo!, Inc. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. The names of the authors may not be used to endorse or promote +.\" products derived from this software without specific prior written +.\" permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd June 17, 2008 +.Dt MFIUTIL 1 +.Os +.Sh NAME +.Nm mfiutil +.Nd Utility for managing LSI MegaRAID SAS controllers +.Sh SYNOPSIS +.Nm +.Cm version +.Nm +.Op Fl u Ar unit +.Cm show adapter +.Nm +.Op Fl u Ar unit +.Cm show battery +.Nm +.Op Fl u Ar unit +.Cm show config +.Nm +.Op Fl u Ar unit +.Cm show drives +.Nm +.Op Fl u Ar unit +.Cm show events +.Op Fl c Ar class +.Op Fl l Ar locale +.Op Fl n Ar count +.Op Fl v +.Op Ar start Op Ar stop +.Nm +.Op Fl u Ar unit +.Cm show firmware +.Nm +.Op Fl u Ar unit +.Cm show logstate +.Nm +.Op Fl u Ar unit +.Cm show patrol +.Nm +.Op Fl u Ar unit +.Cm show volumes +.Nm +.Op Fl u Ar unit +.Cm fail Ar drive +.Nm +.Op Fl u Ar unit +.Cm good Ar drive +.Nm +.Op Fl u Ar unit +.Cm rebuild Ar drive +.Nm +.Op Fl u Ar unit +.Cm drive progress Ar drive +.Nm +.Op Fl u Ar unit +.Cm drive clear Ar drive Brq "start | stop" +.Nm +.Op Fl u Ar unit +.Cm start rebuild Ar drive +.Nm +.Op Fl u Ar unit +.Cm abort rebuild Ar drive +.Nm +.Op Fl u Ar unit +.Cm locate Ar drive Brq "on | off" +.Nm +.Op Fl u Ar unit +.Cm cache Ar volume Op Ar setting Op Ar value +.Nm +.Op Fl u Ar unit +.Cm name Ar volume Ar name +.Nm +.Op Fl u Ar unit +.Cm volume progress Ar volume +.Nm +.Op Fl u Ar unit +.Cm clear +.Nm +.Op Fl u Ar unit +.Cm create Ar type +.Op Fl v +.Op Fl s Ar stripe_size +.Ar drive Ns Op \&, Ns Ar drive Ns Op ",..." +.Op Ar drive Ns Op \&, Ns Ar drive Ns Op ",..." +.Nm +.Op Fl u Ar unit +.Cm delete Ar volume +.Nm +.Op Fl u Ar unit +.Cm add Ar drive Op Ar volume +.Nm +.Op Fl u Ar unit +.Cm remove Ar drive +.Nm +.Op Fl u Ar unit +.Cm start patrol +.Nm +.Op Fl u Ar unit +.Cm stop patrol +.Nm +.Op Fl u Ar unit +.Cm patrol Ar command Op Ar interval Op Ar start +.Nm +.Op Fl u Ar unit +.Cm flash Ar file +.Sh DESCRIPTION +The +.Nm +utility can be used to display or modify various parameters on LSI +MegaRAID SAS RAID controllers. +Each invocation of +.Nm +consists of zero or more global options followed by a command. +Commands may support additional optional or required arguments after the +command. +.Pp +Currently one global option is supported: +.Bl -tag -width indent +.It Fl u Ar unit +.Ar unit +specifies the unit of the controller to work with. +If no unit is specified, +then unit 0 is used. +.El +.Pp +Volumes may be specified in two forms. +First, +a volume may be identified by its target ID. +Second, +on the volume may be specified by the corresponding +.Em mfidX +device, +such as +.Em mfid0 . +Note that this second method only works on OS versions +.Dv 6.2-YAHOO-20070510 +and later. +.Pp +Drives may be specified in two forms. +First, +a drive may be identified by its device ID. +The device ID for configured drives can be found in +.Cm show config . +Second, +a drive may be identified by its location as +.Sm off +.Op E Ar xx Ns \&: +.Li S Ns Ar yy +.Sm on +where +.Ar xx +is the enclosure +and +.Ar yy +is the slot for each drive as displayed in +.Cm show drives . +.Pp +The +.Nm +utility supports several different groups of commands. +The first group of commands provide information about the controller, +the volumes it manages, and the drives it controls. +The second group of commands are used to manage the physical drives +attached to the controller. +The third group of commands are used to manage the logical volumes +managed by the controller. +The fourth group of commands are used to manage the drive configuration for +the controller. +The fifth group of commands are used to manage controller-wide operations. +.Pp +The informational commands include: +.Bl -tag -width indent +.It Cm version +Displays the version of +.Nm . +.It Cm show adapter +Displays information about the RAID controller such as the model number. +.It Cm show battery +Displays information about the battery from the battery backup unit. +.It Cm show config +Displays the volume and drive configuration for the controller. +Each array is listed along with the physical drives the array is built from. +Each volume is listed along with the arrays that the volume spans. +If any hot spare drives are configured, then they are listed as well. +.It Cm show drives +Lists all of the physical drives attached to the controller. +.It Xo Cm show events +.Op Fl c Ar class +.Op Fl l Ar locale +.Op Fl n Ar count +.Op Fl v +.Op Ar start Op Ar stop +.Xc +Display entries from the controller's event log. +The controller maintains a circular buffer of events. +Each event is tagged with a class and locale. +.Pp +The +.Ar class +parameter limits the output to entries at the specified class or higher. +The default class is +.Dq warn . +The available classes from lowest priority to highest are: +.Bl -tag -width -indent +.It Cm debug +Debug messages. +.It Cm progress +Periodic progress updates for long-running operations such as background +initializations, array rebuilds, or patrol reads. +.It Cm info +Informational messages such as drive insertions and volume creations. +.It Cm warn +Indicates that some component may be close to failing. +.It Cm crit +A component has failed, but no data is lost. +For example, a volume becoming degraded due to a drive failure. +.It Cm fatal +A component has failed resulting in data loss. +.It Cm dead +The controller itself has died. +.El +.Pp +The +.Ar locale +parameter limits the output to entries for the specified part of the controller. +The default locale is +.Dq all . +The available locales are +.Dq volume , +.Dq drive , +.Dq enclousure , +.Dq battery , +.Dq sas , +.Dq controller , +.Dq config , +.Dq cluster , +and +.Dq all . +.Pp +The +.Ar count +parameter is a debugging aid that specifies the number of events to fetch from +the controller for each low-level request. +The default is 15 events. +.Pp +By default, matching event log entries from the previous shutdown up to the +present are displayed. This range can be adjusted via the +.Ar start +and +.Ar stop +parameters. +Each of these parameters can either be specified as a log entry number or as +one of the following aliases: +.Bl -tag -width -indent +.It Cm newest +The newest entry in the event log. +.It Cm oldest +The oldest entry in the event log. +.It Cm clear +The first entry since the event log was cleared. +.It Cm shutdown +The entry in the event log corresponding to the last time the controller was +cleanly shut down. +.It Cm boot +The entry in the event log corresponding to the most recent boot. +.El +.It Cm show firmware +Lists all of the firmware images present on the controller. +.It Cm show logstate +Display the various sequence numbers associated with the event log. +.It Cm show patrol +Display the status of the controller's patrol read operation. +.It Cm show volumes +Lists all of the logical volumes managed by the controller. +.El +.Pp +The physical drive management commands include: +.Bl -tag -width indent +.It Cm fail Ar drive +Mark +.Ar drive +as failed. +.Ar Drive +must be an online drive that is part of an array. +.It Cm good Ar drive +Mark +.Ar drive +as an unconfigured good drive. +.Ar Drive +must not be part of an existing array. +.It Cm rebuild Ar drive +Mark a failed +.Ar drive +that is still part of an array as a good drive suitable for a rebuild. +The firmware should kick off an array rebuild on its own if a failed drive +is marked as a rebuild drive. +.It Cm drive progress Ar drive +Report the current progress and estimated completion time of drive operations +such as rebuilds or patrol reads. +.It Cm drive clear Ar drive Brq "start | stop" +Start or stop the writing of all 0x00 characters to a drive. +.It Cm start rebuild Ar drive +Manually start a rebuild on +.Ar drive . +.It Cm abort rebuild Ar drive +Abort an in-progress rebuild operation on +.Ar drive . +It can be resumed with the +.Cm start rebuild +command. +.It Cm locate Ar drive Brq "on | off" +Change the state of the external LED associated with +.Ar drive . +.El +.Pp +The logical volume management commands include: +.Bl -tag -width indent +.It Cm cache Ar volume Op Ar setting Op Ar value +If no +.Ar setting +argument is supplied, then the current cache policy for +.Ar volume +is displayed; +otherwise, +the cache policy for +.Ar volume +is modified. +The optional +.Ar setting +argument can be one of the following values: +.Bl -tag -width indent +.It Cm enable +Enable caching for both read and write I/O operations. +.It Cm disable +Disable caching for both read and write I/O operations. +.It Cm reads +Enable caching only for read I/O operations. +.It Cm writes +Enable caching only for write I/O operations. +.It Cm write-back +Use write-back policy for cached writes. +.It Cm write-through +Use write-through policy for cached writes. +.It Cm read-ahead Op Ar value +Set the read ahead policy for cached reads. +The +.Ar value +argument can be set to either +.Dq none , +.Dq adaptive , +or +.Dq always . +.It Cm write-cache Op Ar value +Control the write caches on the physical drives backing +.Ar volume . +The +.Ar value +argument can be set to either +.Dq disable , +.Dq enable , +or +.Dq default . +.Pp +In general this setting should be left disabled to avoid data loss when the +physical drives lose power. +The battery backup of the RAID controller does not save data in the write +caches of the physical drives. +.El +.It Cm name Ar volume Ar name +Sets the name of +.Ar volume +to +.Ar name . +.It Cm volume progress Ar volume +Report the current progress and estimated completion time of volume operations +such as consistency checks and initializations. +.El +.Pp +The configuration commands include: +.Bl -tag -width indent +.It Cm clear +Delete the entire configuration including all volumes, arrays, and spares. +.It Xo Cm create Ar type +.Op Fl v +.Op Fl s Ar stripe_size +.Ar drive Ns Op \&, Ns Ar drive Ns Op ",..." +.Op Ar drive Ns Op \&, Ns Ar drive Ns Op ",..." +.Xc +Create a new volume. +The +.Ar type +specifies the type of volume to create. +Currently supported types include: +.Bl -tag -width indent +.It Cm jbod +Creates a RAID0 volume for each drive specified. +Each drive must be specified as a separate argument. +.It Cm raid0 +Creates one RAID0 volume spanning the drives listed in the single drive list. +.It Cm raid1 +Creates one RAID1 volume spanning the drives listed in the single drive list. +.It Cm raid5 +Creates one RAID5 volume spanning the drives listed in the single drive list. +.It Cm raid6 +Creates one RAID6 volume spanning the drives listed in the single drive list. +.It Cm raid10 +Creates one RAID10 volume spanning multiple RAID1 arrays. +The drives for each RAID1 array are specified as a single drive list. +.It Cm raid50 +Creates one RAID50 volume spanning multiple RAID5 arrays. +The drives for each RAID5 array are specified as a single drive list. +.It Cm raid60 +Creates one RAID60 volume spanning multiple RAID6 arrays. +The drives for each RAID6 array are specified as a single drive list. +.It Cm concat +Creates a single volume by concatenating all of the drives in the single drive +list. +.El +.Pp +.Sy Note: +Not all volume types are supported by all controllers. +.Pp +If the +.Fl v +flag is specified after +.Ar type , +then more verbose output will be enabled. +Currently this just provides notification as drives are added to arrays and +arrays to volumes when building the configuration. +.Pp +The +.Fl s +.Ar stripe_size +parameter allows the stripe size of the array to be set. +By default a stripe size of 64K is used. +Valid values are 512 through 1M, though the MFI firmware may reject some +values. +.It Cm delete Ar volume +Delete the volume +.Ar volume . +.It Cm add Ar drive Op Ar volume +Mark +.Ar drive +as a hot spare. +.Ar Drive +must be in the unconfigured good state. +If +.Ar volume +is specified, +then the hot spare will be dedicated to arrays backing that volume. +Otherwise, +.Ar drive +will be used as a global hot spare backing all arrays for this controller. +Note that +.Ar drive +must be as large as the smallest drive in all of the arrays it is going to +back. +.It Cm remove Ar drive +Remove the hot spare +.Ar drive +from service. +It will be placed in the unconfigured good state. +.El +.Pp +The controller management commands include: +.Bl -tag -width indent +.It Cm patrol Ar command Op Ar interval Op Ar start +Set the patrol read operation mode. +The +.Ar command +argument can be one of the following values: +.Bl -tag -width indent +.It Cm disable +Disable patrol reads. +.It Cm auto +Enable periodic patrol reads initiated by the firmware. +The optional +.Ar interval +argument specifies the interval in seconds between patrol reads. +If patrol reads should be run continously, +then +.Ar interval +should consist of the word +.Dq continuously . +The optional +.Ar start +argument specifies a non-negative, relative start time for the next patrol read. +If an interval or start time is not specified, +then the existing setting will be used. +.It Cm manual +Enable manual patrol reads that are only initiated by the user. +.El +.It Cm start patrol +Start a patrol read operation. +.It Cm stop patrol +Stop a currently running patrol read operation. +.It Cm flash Ar file +Updates the flash on the controller with the firmware stored in +.Ar file . +A reboot is required for the new firmware to take effect. +.El +.Sh EXAMPLES +Configure the cache for volume mfid0 to cache only writes: +.Pp +.Dl Nm Cm cache mfid0 writes +.Dl Nm Cm cache mfid0 write-back +.Pp +Create a RAID5 array spanning the first four disks in the second enclosure: +.Pp +.Dl Nm Cm create raid5 e1:s0,e1:s1,e1:s2,e1:s4 +.Pp +Configure the first three disks on a controller as JBOD: +.Pp +.Dl Nm Cm create jbod 0 1 2 +.Pp +Create a RAID10 volume that spans two arrays each of which contains two disks +from two different enclosures: +.Pp +.Dl Nm Cm create raid10 e1:s0,e1:s1 e2:s0,e2:s1 +.Pp +Add drive with the device ID of 4 as a global hot spare: +.Pp +.Dl Nm Cm add 4 +.Pp +Add the drive in slot 2 in the main chassis as a hot spare for volume mfid0: +.Pp +.Dl Nm Cm add s2 mfid0 +.Pp +Configure the adapter to run periodic patrol reads once a week with the first +patrol read starting in 5 minutes: +.Pp +.Dl Nm Cm patrol auto 604800 300 +.Pp +.Sh SEE ALSO +.Xr mfi 4 +.Sh BUGS +On 64-bit OS versions +.Dv 6.2-YAHOO-20070514 +and earlier, +the +.Xr mfi 4 +driver does not properly report firmware errors to 32-bit versions of +.Nm . +As a result, +some commands may fail even though they do not report any errors. diff --git a/usr.sbin/mfiutil/mfiutil.8 b/usr.sbin/mfiutil/mfiutil.8 new file mode 100644 index 0000000..e6026d6 --- /dev/null +++ b/usr.sbin/mfiutil/mfiutil.8 @@ -0,0 +1,566 @@ +.\" Copyright (c) 2008, 2009 Yahoo!, Inc. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. The names of the authors may not be used to endorse or promote +.\" products derived from this software without specific prior written +.\" permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd August 16, 2009 +.Dt MFIUTIL 8 +.Os +.Sh NAME +.Nm mfiutil +.Nd Utility for managing LSI MegaRAID SAS controllers +.Sh SYNOPSIS +.Nm +.Cm version +.Nm +.Op Fl u Ar unit +.Cm show adapter +.Nm +.Op Fl u Ar unit +.Cm show battery +.Nm +.Op Fl u Ar unit +.Cm show config +.Nm +.Op Fl u Ar unit +.Cm show drives +.Nm +.Op Fl u Ar unit +.Cm show events +.Op Fl c Ar class +.Op Fl l Ar locale +.Op Fl n Ar count +.Op Fl v +.Op Ar start Op Ar stop +.Nm +.Op Fl u Ar unit +.Cm show firmware +.Nm +.Op Fl u Ar unit +.Cm show logstate +.Nm +.Op Fl u Ar unit +.Cm show patrol +.Nm +.Op Fl u Ar unit +.Cm show volumes +.Nm +.Op Fl u Ar unit +.Cm fail Ar drive +.Nm +.Op Fl u Ar unit +.Cm good Ar drive +.Nm +.Op Fl u Ar unit +.Cm rebuild Ar drive +.Nm +.Op Fl u Ar unit +.Cm drive progress Ar drive +.Nm +.Op Fl u Ar unit +.Cm drive clear Ar drive Brq "start | stop" +.Nm +.Op Fl u Ar unit +.Cm start rebuild Ar drive +.Nm +.Op Fl u Ar unit +.Cm abort rebuild Ar drive +.Nm +.Op Fl u Ar unit +.Cm locate Ar drive Brq "on | off" +.Nm +.Op Fl u Ar unit +.Cm cache Ar volume Op Ar setting Op Ar value +.Nm +.Op Fl u Ar unit +.Cm name Ar volume Ar name +.Nm +.Op Fl u Ar unit +.Cm volume progress Ar volume +.Nm +.Op Fl u Ar unit +.Cm clear +.Nm +.Op Fl u Ar unit +.Cm create Ar type +.Op Fl v +.Op Fl s Ar stripe_size +.Ar drive Ns Op \&, Ns Ar drive Ns Op ",..." +.Op Ar drive Ns Op \&, Ns Ar drive Ns Op ",..." +.Nm +.Op Fl u Ar unit +.Cm delete Ar volume +.Nm +.Op Fl u Ar unit +.Cm add Ar drive Op Ar volume +.Nm +.Op Fl u Ar unit +.Cm remove Ar drive +.Nm +.Op Fl u Ar unit +.Cm start patrol +.Nm +.Op Fl u Ar unit +.Cm stop patrol +.Nm +.Op Fl u Ar unit +.Cm patrol Ar command Op Ar interval Op Ar start +.Nm +.Op Fl u Ar unit +.Cm flash Ar file +.Sh DESCRIPTION +The +.Nm +utility can be used to display or modify various parameters on LSI +MegaRAID SAS RAID controllers. +Each invocation of +.Nm +consists of zero or more global options followed by a command. +Commands may support additional optional or required arguments after the +command. +.Pp +Currently one global option is supported: +.Bl -tag -width indent +.It Fl u Ar unit +.Ar unit +specifies the unit of the controller to work with. +If no unit is specified, +then unit 0 is used. +.El +.Pp +Volumes may be specified in two forms. +First, +a volume may be identified by its target ID. +Second, +on the volume may be specified by the corresponding +.Em mfidX +device, +such as +.Em mfid0 . +.Pp +Drives may be specified in two forms. +First, +a drive may be identified by its device ID. +The device ID for configured drives can be found in +.Cm show config . +Second, +a drive may be identified by its location as +.Sm off +.Op E Ar xx Ns \&: +.Li S Ns Ar yy +.Sm on +where +.Ar xx +is the enclosure +and +.Ar yy +is the slot for each drive as displayed in +.Cm show drives . +.Pp +The +.Nm +utility supports several different groups of commands. +The first group of commands provide information about the controller, +the volumes it manages, and the drives it controls. +The second group of commands are used to manage the physical drives +attached to the controller. +The third group of commands are used to manage the logical volumes +managed by the controller. +The fourth group of commands are used to manage the drive configuration for +the controller. +The fifth group of commands are used to manage controller-wide operations. +.Pp +The informational commands include: +.Bl -tag -width indent +.It Cm version +Displays the version of +.Nm . +.It Cm show adapter +Displays information about the RAID controller such as the model number. +.It Cm show battery +Displays information about the battery from the battery backup unit. +.It Cm show config +Displays the volume and drive configuration for the controller. +Each array is listed along with the physical drives the array is built from. +Each volume is listed along with the arrays that the volume spans. +If any hot spare drives are configured, then they are listed as well. +.It Cm show drives +Lists all of the physical drives attached to the controller. +.It Xo Cm show events +.Op Fl c Ar class +.Op Fl l Ar locale +.Op Fl n Ar count +.Op Fl v +.Op Ar start Op Ar stop +.Xc +Display entries from the controller's event log. +The controller maintains a circular buffer of events. +Each event is tagged with a class and locale. +.Pp +The +.Ar class +parameter limits the output to entries at the specified class or higher. +The default class is +.Dq warn . +The available classes from lowest priority to highest are: +.Bl -tag -width -indent +.It Cm debug +Debug messages. +.It Cm progress +Periodic progress updates for long-running operations such as background +initializations, array rebuilds, or patrol reads. +.It Cm info +Informational messages such as drive insertions and volume creations. +.It Cm warn +Indicates that some component may be close to failing. +.It Cm crit +A component has failed, but no data is lost. +For example, a volume becoming degraded due to a drive failure. +.It Cm fatal +A component has failed resulting in data loss. +.It Cm dead +The controller itself has died. +.El +.Pp +The +.Ar locale +parameter limits the output to entries for the specified part of the controller. +The default locale is +.Dq all . +The available locales are +.Dq volume , +.Dq drive , +.Dq enclousure , +.Dq battery , +.Dq sas , +.Dq controller , +.Dq config , +.Dq cluster , +and +.Dq all . +.Pp +The +.Ar count +parameter is a debugging aid that specifies the number of events to fetch from +the controller for each low-level request. +The default is 15 events. +.Pp +By default, matching event log entries from the previous shutdown up to the +present are displayed. This range can be adjusted via the +.Ar start +and +.Ar stop +parameters. +Each of these parameters can either be specified as a log entry number or as +one of the following aliases: +.Bl -tag -width -indent +.It Cm newest +The newest entry in the event log. +.It Cm oldest +The oldest entry in the event log. +.It Cm clear +The first entry since the event log was cleared. +.It Cm shutdown +The entry in the event log corresponding to the last time the controller was +cleanly shut down. +.It Cm boot +The entry in the event log corresponding to the most recent boot. +.El +.It Cm show firmware +Lists all of the firmware images present on the controller. +.It Cm show logstate +Display the various sequence numbers associated with the event log. +.It Cm show patrol +Display the status of the controller's patrol read operation. +.It Cm show volumes +Lists all of the logical volumes managed by the controller. +.El +.Pp +The physical drive management commands include: +.Bl -tag -width indent +.It Cm fail Ar drive +Mark +.Ar drive +as failed. +.Ar Drive +must be an online drive that is part of an array. +.It Cm good Ar drive +Mark +.Ar drive +as an unconfigured good drive. +.Ar Drive +must not be part of an existing array. +.It Cm rebuild Ar drive +Mark a failed +.Ar drive +that is still part of an array as a good drive suitable for a rebuild. +The firmware should kick off an array rebuild on its own if a failed drive +is marked as a rebuild drive. +.It Cm drive progress Ar drive +Report the current progress and estimated completion time of drive operations +such as rebuilds or patrol reads. +.It Cm drive clear Ar drive Brq "start | stop" +Start or stop the writing of all 0x00 characters to a drive. +.It Cm start rebuild Ar drive +Manually start a rebuild on +.Ar drive . +.It Cm abort rebuild Ar drive +Abort an in-progress rebuild operation on +.Ar drive . +It can be resumed with the +.Cm start rebuild +command. +.It Cm locate Ar drive Brq "on | off" +Change the state of the external LED associated with +.Ar drive . +.El +.Pp +The logical volume management commands include: +.Bl -tag -width indent +.It Cm cache Ar volume Op Ar setting Op Ar value +If no +.Ar setting +argument is supplied, then the current cache policy for +.Ar volume +is displayed; +otherwise, +the cache policy for +.Ar volume +is modified. +The optional +.Ar setting +argument can be one of the following values: +.Bl -tag -width indent +.It Cm enable +Enable caching for both read and write I/O operations. +.It Cm disable +Disable caching for both read and write I/O operations. +.It Cm reads +Enable caching only for read I/O operations. +.It Cm writes +Enable caching only for write I/O operations. +.It Cm write-back +Use write-back policy for cached writes. +.It Cm write-through +Use write-through policy for cached writes. +.It Cm read-ahead Op Ar value +Set the read ahead policy for cached reads. +The +.Ar value +argument can be set to either +.Dq none , +.Dq adaptive , +or +.Dq always . +.It Cm write-cache Op Ar value +Control the write caches on the physical drives backing +.Ar volume . +The +.Ar value +argument can be set to either +.Dq disable , +.Dq enable , +or +.Dq default . +.Pp +In general this setting should be left disabled to avoid data loss when the +physical drives lose power. +The battery backup of the RAID controller does not save data in the write +caches of the physical drives. +.El +.It Cm name Ar volume Ar name +Sets the name of +.Ar volume +to +.Ar name . +.It Cm volume progress Ar volume +Report the current progress and estimated completion time of volume operations +such as consistency checks and initializations. +.El +.Pp +The configuration commands include: +.Bl -tag -width indent +.It Cm clear +Delete the entire configuration including all volumes, arrays, and spares. +.It Xo Cm create Ar type +.Op Fl v +.Op Fl s Ar stripe_size +.Ar drive Ns Op \&, Ns Ar drive Ns Op ",..." +.Op Ar drive Ns Op \&, Ns Ar drive Ns Op ",..." +.Xc +Create a new volume. +The +.Ar type +specifies the type of volume to create. +Currently supported types include: +.Bl -tag -width indent +.It Cm jbod +Creates a RAID0 volume for each drive specified. +Each drive must be specified as a separate argument. +.It Cm raid0 +Creates one RAID0 volume spanning the drives listed in the single drive list. +.It Cm raid1 +Creates one RAID1 volume spanning the drives listed in the single drive list. +.It Cm raid5 +Creates one RAID5 volume spanning the drives listed in the single drive list. +.It Cm raid6 +Creates one RAID6 volume spanning the drives listed in the single drive list. +.It Cm raid10 +Creates one RAID10 volume spanning multiple RAID1 arrays. +The drives for each RAID1 array are specified as a single drive list. +.It Cm raid50 +Creates one RAID50 volume spanning multiple RAID5 arrays. +The drives for each RAID5 array are specified as a single drive list. +.It Cm raid60 +Creates one RAID60 volume spanning multiple RAID6 arrays. +The drives for each RAID6 array are specified as a single drive list. +.It Cm concat +Creates a single volume by concatenating all of the drives in the single drive +list. +.El +.Pp +.Sy Note: +Not all volume types are supported by all controllers. +.Pp +If the +.Fl v +flag is specified after +.Ar type , +then more verbose output will be enabled. +Currently this just provides notification as drives are added to arrays and +arrays to volumes when building the configuration. +.Pp +The +.Fl s +.Ar stripe_size +parameter allows the stripe size of the array to be set. +By default a stripe size of 64K is used. +Valid values are 512 through 1M, though the MFI firmware may reject some +values. +.It Cm delete Ar volume +Delete the volume +.Ar volume . +.It Cm add Ar drive Op Ar volume +Mark +.Ar drive +as a hot spare. +.Ar Drive +must be in the unconfigured good state. +If +.Ar volume +is specified, +then the hot spare will be dedicated to arrays backing that volume. +Otherwise, +.Ar drive +will be used as a global hot spare backing all arrays for this controller. +Note that +.Ar drive +must be as large as the smallest drive in all of the arrays it is going to +back. +.It Cm remove Ar drive +Remove the hot spare +.Ar drive +from service. +It will be placed in the unconfigured good state. +.El +.Pp +The controller management commands include: +.Bl -tag -width indent +.It Cm patrol Ar command Op Ar interval Op Ar start +Set the patrol read operation mode. +The +.Ar command +argument can be one of the following values: +.Bl -tag -width indent +.It Cm disable +Disable patrol reads. +.It Cm auto +Enable periodic patrol reads initiated by the firmware. +The optional +.Ar interval +argument specifies the interval in seconds between patrol reads. +If patrol reads should be run continously, +then +.Ar interval +should consist of the word +.Dq continuously . +The optional +.Ar start +argument specifies a non-negative, relative start time for the next patrol read. +If an interval or start time is not specified, +then the existing setting will be used. +.It Cm manual +Enable manual patrol reads that are only initiated by the user. +.El +.It Cm start patrol +Start a patrol read operation. +.It Cm stop patrol +Stop a currently running patrol read operation. +.It Cm flash Ar file +Updates the flash on the controller with the firmware stored in +.Ar file . +A reboot is required for the new firmware to take effect. +.El +.Sh EXAMPLES +Configure the cache for volume mfid0 to cache only writes: +.Pp +.Dl Nm Cm cache mfid0 writes +.Dl Nm Cm cache mfid0 write-back +.Pp +Create a RAID5 array spanning the first four disks in the second enclosure: +.Pp +.Dl Nm Cm create raid5 e1:s0,e1:s1,e1:s2,e1:s4 +.Pp +Configure the first three disks on a controller as JBOD: +.Pp +.Dl Nm Cm create jbod 0 1 2 +.Pp +Create a RAID10 volume that spans two arrays each of which contains two disks +from two different enclosures: +.Pp +.Dl Nm Cm create raid10 e1:s0,e1:s1 e2:s0,e2:s1 +.Pp +Add drive with the device ID of 4 as a global hot spare: +.Pp +.Dl Nm Cm add 4 +.Pp +Add the drive in slot 2 in the main chassis as a hot spare for volume mfid0: +.Pp +.Dl Nm Cm add s2 mfid0 +.Pp +Configure the adapter to run periodic patrol reads once a week with the first +patrol read starting in 5 minutes: +.Pp +.Dl Nm Cm patrol auto 604800 300 +.Pp +.Sh SEE ALSO +.Xr mfi 4 +.Sh HISTORY +The +.Nm +utility first appeared in +.Fx 8.0 . diff --git a/usr.sbin/mfiutil/mfiutil.c b/usr.sbin/mfiutil/mfiutil.c new file mode 100644 index 0000000..d091fb6 --- /dev/null +++ b/usr.sbin/mfiutil/mfiutil.c @@ -0,0 +1,134 @@ +/*- + * Copyright (c) 2008, 2009 Yahoo!, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The names of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include +#include +#include +#include +#include +#include "mfiutil.h" + +SET_DECLARE(MFI_DATASET(top), struct mfiutil_command); + +MFI_TABLE(top, start); +MFI_TABLE(top, stop); +MFI_TABLE(top, abort); + +int mfi_unit; + +static void +usage(void) +{ + + fprintf(stderr, "usage: mfiutil [-u unit] ...\n\n"); + fprintf(stderr, "Commands include:\n"); + fprintf(stderr, " version\n"); + fprintf(stderr, " show adapter - display controller information\n"); + fprintf(stderr, " show battery - display battery information\n"); + fprintf(stderr, " show config - display RAID configuration\n"); + fprintf(stderr, " show drives - list physical drives\n"); + fprintf(stderr, " show events - display event log\n"); + fprintf(stderr, " show firmware - list firmware images\n"); + fprintf(stderr, " show volumes - list logical volumes\n"); + fprintf(stderr, " show patrol - display patrol read status\n"); + fprintf(stderr, " fail - fail a physical drive\n"); + fprintf(stderr, " good - mark a bad physical drive as good\n"); + fprintf(stderr, " rebuild - mark failed drive ready for rebuild\n"); + fprintf(stderr, " drive progress - display status of active operations\n"); + fprintf(stderr, " drive clear - clear a drive with all 0x00\n"); + fprintf(stderr, " start rebuild \n"); + fprintf(stderr, " abort rebuild \n"); + fprintf(stderr, " locate - toggle drive LED\n"); + fprintf(stderr, " cache [command [setting]]\n"); + fprintf(stderr, " name \n"); + fprintf(stderr, " volume progress - display status of active operations\n"); + fprintf(stderr, " clear - clear volume configuration\n"); + fprintf(stderr, " create [-v] [,[,...]] [[,[,...]]\n"); + fprintf(stderr, " delete \n"); + fprintf(stderr, " add [volume] - add a hot spare\n"); + fprintf(stderr, " remove - remove a hot spare\n"); + fprintf(stderr, " patrol [interval [start]]\n"); + fprintf(stderr, " start patrol - start a patrol read\n"); + fprintf(stderr, " stop patrol - stop a patrol read\n"); + fprintf(stderr, " flash \n"); +#ifdef DEBUG + fprintf(stderr, " debug - debug 'show config'\n"); + fprintf(stderr, " dump - display 'saved' config\n"); +#endif + exit(1); +} + +static int +version(int ac, char **av) +{ + + printf("mfiutil version 1.0.13"); +#ifdef DEBUG + printf(" (DEBUG)"); +#endif + printf("\n"); + return (0); +} +MFI_COMMAND(top, version, version); + +int +main(int ac, char **av) +{ + struct mfiutil_command **cmd; + int ch; + + while ((ch = getopt(ac, av, "u:")) != -1) { + switch (ch) { + case 'u': + mfi_unit = atoi(optarg); + break; + case '?': + usage(); + } + } + + av += optind; + ac -= optind; + + /* getopt() eats av[0], so we can't use mfi_table_handler() directly. */ + if (ac == 0) + usage(); + + SET_FOREACH(cmd, MFI_DATASET(top)) { + if (strcmp((*cmd)->name, av[0]) == 0) { + (*cmd)->handler(ac, av); + return (0); + } + } + warnx("Unknown command %s.", av[0]); + return (0); +} diff --git a/usr.sbin/mfiutil/mfiutil.h b/usr.sbin/mfiutil/mfiutil.h new file mode 100644 index 0000000..b080b50 --- /dev/null +++ b/usr.sbin/mfiutil/mfiutil.h @@ -0,0 +1,147 @@ +/*- + * Copyright (c) 2008, 2009 Yahoo!, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The names of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef __MFIUTIL_H__ +#define __MFIUTIL_H__ + +#include +#include + +#include + +/* 4.x compat */ +#ifndef SET_DECLARE + +/* */ +#define __used +#define __section(x) __attribute__((__section__(x))) + +/* */ +#undef __MAKE_SET +#undef DATA_SET + +#define __MAKE_SET(set, sym) \ + static void const * const __set_##set##_sym_##sym \ + __section("set_" #set) __used = &sym + +#define DATA_SET(set, sym) __MAKE_SET(set, sym) + +#define SET_DECLARE(set, ptype) \ + extern ptype *__CONCAT(__start_set_,set); \ + extern ptype *__CONCAT(__stop_set_,set) + +#define SET_BEGIN(set) \ + (&__CONCAT(__start_set_,set)) +#define SET_LIMIT(set) \ + (&__CONCAT(__stop_set_,set)) + +#define SET_FOREACH(pvar, set) \ + for (pvar = SET_BEGIN(set); pvar < SET_LIMIT(set); pvar++) + +int humanize_number(char *_buf, size_t _len, int64_t _number, + const char *_suffix, int _scale, int _flags); + +/* humanize_number(3) */ +#define HN_DECIMAL 0x01 +#define HN_NOSPACE 0x02 +#define HN_B 0x04 +#define HN_DIVISOR_1000 0x08 + +#define HN_GETSCALE 0x10 +#define HN_AUTOSCALE 0x20 + +#endif + +/* Constants for DDF RAID levels. */ +#define DDF_RAID0 0x00 +#define DDF_RAID1 0x01 +#define DDF_RAID3 0x03 +#define DDF_RAID5 0x05 +#define DDF_RAID6 0x06 +#define DDF_RAID1E 0x11 +#define DDF_JBOD 0x0f +#define DDF_CONCAT 0x1f +#define DDF_RAID5E 0x15 +#define DDF_RAID5EE 0x25 + +struct mfiutil_command { + const char *name; + int (*handler)(int ac, char **av); +}; + +#define MFI_DATASET(name) mfiutil_ ## name ## _table + +#define MFI_COMMAND(set, name, function) \ + static struct mfiutil_command function ## _mfiutil_command = \ + { #name, function }; \ + DATA_SET(MFI_DATASET(set), function ## _mfiutil_command) + +#define MFI_TABLE(set, name) \ + SET_DECLARE(MFI_DATASET(name), struct mfiutil_command); \ + \ + static int \ + mfiutil_ ## name ## _table_handler(int ac, char **av) \ + { \ + return (mfi_table_handler(SET_BEGIN(MFI_DATASET(name)), \ + SET_LIMIT(MFI_DATASET(name)), ac, av)); \ + } \ + MFI_COMMAND(set, name, mfiutil_ ## name ## _table_handler) + +extern int mfi_unit; + +void mbox_store_ldref(uint8_t *mbox, union mfi_ld_ref *ref); +void mbox_store_pdref(uint8_t *mbox, union mfi_pd_ref *ref); +void mfi_display_progress(const char *label, struct mfi_progress *prog); +int mfi_table_handler(struct mfiutil_command **start, + struct mfiutil_command **end, int ac, char **av); +const char *mfi_raid_level(uint8_t primary_level, uint8_t secondary_level); +const char *mfi_ldstate(enum mfi_ld_state state); +const char *mfi_pdstate(enum mfi_pd_state state); +const char *mfi_pd_inq_string(struct mfi_pd_info *info); +const char *mfi_volume_name(int fd, uint8_t target_id); +int mfi_volume_busy(int fd, uint8_t target_id); +int mfi_config_read(int fd, struct mfi_config_data **configp); +int mfi_lookup_drive(int fd, char *drive, uint16_t *device_id); +int mfi_lookup_volume(int fd, const char *name, uint8_t *target_id); +int mfi_dcmd_command(int fd, uint32_t opcode, void *buf, size_t bufsize, + uint8_t *mbox, size_t mboxlen, uint8_t *statusp); +int mfi_open(int unit); +int mfi_ctrl_get_info(int fd, struct mfi_ctrl_info *info, uint8_t *statusp); +int mfi_ld_get_info(int fd, uint8_t target_id, struct mfi_ld_info *info, + uint8_t *statusp); +int mfi_ld_get_list(int fd, struct mfi_ld_list *list, uint8_t *statusp); +int mfi_pd_get_info(int fd, uint16_t device_id, struct mfi_pd_info *info, + uint8_t *statusp); +int mfi_pd_get_list(int fd, struct mfi_pd_list **listp, uint8_t *statusp); +int mfi_reconfig_supported(void); +const char *mfi_status(u_int status_code); + +#endif /* !__MFIUTIL_H__ */ -- cgit v1.1 From 4d28a1b892bfbdbe100677b4346de7af4b237f46 Mon Sep 17 00:00:00 2001 From: scottl Date: Mon, 17 Aug 2009 07:30:08 +0000 Subject: Merge mptutil Approved by: re --- usr.sbin/Makefile | 1 + usr.sbin/mptutil/Makefile | 19 + usr.sbin/mptutil/mpt_cam.c | 569 ++++++++++++++++++++ usr.sbin/mptutil/mpt_cmd.c | 639 +++++++++++++++++++++++ usr.sbin/mptutil/mpt_config.c | 1160 +++++++++++++++++++++++++++++++++++++++++ usr.sbin/mptutil/mpt_drive.c | 395 ++++++++++++++ usr.sbin/mptutil/mpt_evt.c | 155 ++++++ usr.sbin/mptutil/mpt_show.c | 559 ++++++++++++++++++++ usr.sbin/mptutil/mpt_volume.c | 248 +++++++++ usr.sbin/mptutil/mptutil.8 | 386 ++++++++++++++ usr.sbin/mptutil/mptutil.c | 123 +++++ usr.sbin/mptutil/mptutil.h | 178 +++++++ 12 files changed, 4432 insertions(+) create mode 100644 usr.sbin/mptutil/Makefile create mode 100644 usr.sbin/mptutil/mpt_cam.c create mode 100644 usr.sbin/mptutil/mpt_cmd.c create mode 100644 usr.sbin/mptutil/mpt_config.c create mode 100644 usr.sbin/mptutil/mpt_drive.c create mode 100644 usr.sbin/mptutil/mpt_evt.c create mode 100644 usr.sbin/mptutil/mpt_show.c create mode 100644 usr.sbin/mptutil/mpt_volume.c create mode 100644 usr.sbin/mptutil/mptutil.8 create mode 100644 usr.sbin/mptutil/mptutil.c create mode 100644 usr.sbin/mptutil/mptutil.h (limited to 'usr.sbin') diff --git a/usr.sbin/Makefile b/usr.sbin/Makefile index 37d24f8..160b122 100644 --- a/usr.sbin/Makefile +++ b/usr.sbin/Makefile @@ -104,6 +104,7 @@ SUBDIR= ${_ac} \ ${_mount_smbfs} \ ${_moused} \ ${_mptable} \ + mptutil \ mtest \ mtree \ ${_named} \ diff --git a/usr.sbin/mptutil/Makefile b/usr.sbin/mptutil/Makefile new file mode 100644 index 0000000..4abf66e --- /dev/null +++ b/usr.sbin/mptutil/Makefile @@ -0,0 +1,19 @@ +# $FreeBSD$ + +PROG= mptutil +SRCS= mptutil.c mpt_cam.c mpt_cmd.c mpt_config.c mpt_drive.c mpt_evt.c \ + mpt_show.c mpt_volume.c +# mpt_flash.c +MAN= mptutil.8 + +WARNS?= 3 + +DPADD+= ${LIBCAM} ${LIBUTIL} +LDADD+= -lcam -lutil + +# Here be dragons +.ifdef DEBUG +CFLAGS+= -DDEBUG +.endif + +.include diff --git a/usr.sbin/mptutil/mpt_cam.c b/usr.sbin/mptutil/mpt_cam.c new file mode 100644 index 0000000..0d20c7d --- /dev/null +++ b/usr.sbin/mptutil/mpt_cam.c @@ -0,0 +1,569 @@ +/*- + * Copyright (c) 2008 Yahoo!, Inc. + * All rights reserved. + * Written by: John Baldwin + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of any co-contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__RCSID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "mptutil.h" + +static int xptfd; + +static int +xpt_open(void) +{ + + if (xptfd == 0) + xptfd = open(XPT_DEVICE, O_RDWR); + return (xptfd); +} + +int +mpt_query_disk(U8 VolumeBus, U8 VolumeID, struct mpt_query_disk *qd) +{ + struct bus_match_pattern *b; + struct periph_match_pattern *p; + struct periph_match_result *r; + union ccb ccb; + size_t bufsize; + int i; + + /* mpt(4) only handles devices on bus 0. */ + if (VolumeBus != 0) + return (ENXIO); + + if (xpt_open() < 0) + return (ENXIO); + + bzero(&ccb, sizeof(ccb)); + + ccb.ccb_h.func_code = XPT_DEV_MATCH; + ccb.ccb_h.path_id = CAM_XPT_PATH_ID; + ccb.ccb_h.target_id = CAM_TARGET_WILDCARD; + ccb.ccb_h.target_lun = CAM_LUN_WILDCARD; + + bufsize = sizeof(struct dev_match_result) * 5; + ccb.cdm.num_matches = 0; + ccb.cdm.match_buf_len = bufsize; + ccb.cdm.matches = calloc(1, bufsize); + + bufsize = sizeof(struct dev_match_pattern) * 2; + ccb.cdm.num_patterns = 2; + ccb.cdm.pattern_buf_len = bufsize; + ccb.cdm.patterns = calloc(1, bufsize); + + /* Match mptX bus 0. */ + ccb.cdm.patterns[0].type = DEV_MATCH_BUS; + b = &ccb.cdm.patterns[0].pattern.bus_pattern; + snprintf(b->dev_name, sizeof(b->dev_name), "mpt"); + b->unit_number = mpt_unit; + b->bus_id = 0; + b->flags = BUS_MATCH_NAME | BUS_MATCH_UNIT | BUS_MATCH_BUS_ID; + + /* Look for a "da" device at the specified target and lun. */ + ccb.cdm.patterns[1].type = DEV_MATCH_PERIPH; + p = &ccb.cdm.patterns[1].pattern.periph_pattern; + snprintf(p->periph_name, sizeof(p->periph_name), "da"); + p->target_id = VolumeID; + p->flags = PERIPH_MATCH_NAME | PERIPH_MATCH_TARGET; + + if (ioctl(xptfd, CAMIOCOMMAND, &ccb) < 0) { + i = errno; + free(ccb.cdm.matches); + free(ccb.cdm.patterns); + return (i); + } + free(ccb.cdm.patterns); + + if (((ccb.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) || + (ccb.cdm.status != CAM_DEV_MATCH_LAST)) { + warnx("mpt_query_disk got CAM error %#x, CDM error %d\n", + ccb.ccb_h.status, ccb.cdm.status); + free(ccb.cdm.matches); + return (EIO); + } + + /* + * We should have exactly 2 matches, 1 for the bus and 1 for + * the peripheral. However, if we only have 1 match and it is + * for the bus, don't print an error message and return + * ENOENT. + */ + if (ccb.cdm.num_matches == 1 && + ccb.cdm.matches[0].type == DEV_MATCH_BUS) { + free(ccb.cdm.matches); + return (ENOENT); + } + if (ccb.cdm.num_matches != 2) { + warnx("mpt_query_disk got %d matches, expected 2", + ccb.cdm.num_matches); + free(ccb.cdm.matches); + return (EIO); + } + if (ccb.cdm.matches[0].type != DEV_MATCH_BUS || + ccb.cdm.matches[1].type != DEV_MATCH_PERIPH) { + warnx("mpt_query_disk got wrong CAM matches"); + free(ccb.cdm.matches); + return (EIO); + } + + /* Copy out the data. */ + r = &ccb.cdm.matches[1].result.periph_result; + snprintf(qd->devname, sizeof(qd->devname), "%s%d", r->periph_name, + r->unit_number); + free(ccb.cdm.matches); + + return (0); +} + +static int +periph_is_volume(CONFIG_PAGE_IOC_2 *ioc2, struct periph_match_result *r) +{ + CONFIG_PAGE_IOC_2_RAID_VOL *vol; + int i; + + if (ioc2 == NULL) + return (0); + vol = ioc2->RaidVolume; + for (i = 0; i < ioc2->NumActiveVolumes; vol++, i++) { + if (vol->VolumeBus == 0 && vol->VolumeID == r->target_id) + return (1); + } + return (0); +} + +/* Much borrowed from scsireadcapacity() in src/sbin/camcontrol/camcontrol.c. */ +static int +fetch_scsi_capacity(struct cam_device *dev, struct mpt_standalone_disk *disk) +{ + struct scsi_read_capacity_data rcap; + struct scsi_read_capacity_data_long rcaplong; + union ccb *ccb; + int error; + + ccb = cam_getccb(dev); + if (ccb == NULL) + return (ENOMEM); + + /* Zero the rest of the ccb. */ + bzero(&(&ccb->ccb_h)[1], sizeof(struct ccb_scsiio) - + sizeof(struct ccb_hdr)); + + scsi_read_capacity(&ccb->csio, 1, NULL, MSG_SIMPLE_Q_TAG, &rcap, + SSD_FULL_SIZE, 5000); + + /* Disable freezing the device queue */ + ccb->ccb_h.flags |= CAM_DEV_QFRZDIS; + + if (cam_send_ccb(dev, ccb) < 0) { + error = errno; + cam_freeccb(ccb); + return (error); + } + + if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { + cam_freeccb(ccb); + return (EIO); + } + cam_freeccb(ccb); + + /* + * A last block of 2^32-1 means that the true capacity is over 2TB, + * and we need to issue the long READ CAPACITY to get the real + * capacity. Otherwise, we're all set. + */ + if (scsi_4btoul(rcap.addr) != 0xffffffff) { + disk->maxlba = scsi_4btoul(rcap.addr); + return (0); + } + + /* Zero the rest of the ccb. */ + bzero(&(&ccb->ccb_h)[1], sizeof(struct ccb_scsiio) - + sizeof(struct ccb_hdr)); + + scsi_read_capacity_16(&ccb->csio, 1, NULL, MSG_SIMPLE_Q_TAG, 0, 0, 0, + &rcaplong, SSD_FULL_SIZE, 5000); + + /* Disable freezing the device queue */ + ccb->ccb_h.flags |= CAM_DEV_QFRZDIS; + + if (cam_send_ccb(dev, ccb) < 0) { + error = errno; + cam_freeccb(ccb); + return (error); + } + + if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { + cam_freeccb(ccb); + return (EIO); + } + cam_freeccb(ccb); + + disk->maxlba = scsi_8btou64(rcaplong.addr); + return (0); +} + +/* Borrowed heavily from scsi_all.c:scsi_print_inquiry(). */ +static void +format_scsi_inquiry(struct mpt_standalone_disk *disk, + struct scsi_inquiry_data *inq_data) +{ + char vendor[16], product[48], revision[16], rstr[12]; + + if (SID_QUAL_IS_VENDOR_UNIQUE(inq_data)) + return; + if (SID_TYPE(inq_data) != T_DIRECT) + return; + if (SID_QUAL(inq_data) != SID_QUAL_LU_CONNECTED) + return; + + cam_strvis(vendor, inq_data->vendor, sizeof(inq_data->vendor), + sizeof(vendor)); + cam_strvis(product, inq_data->product, sizeof(inq_data->product), + sizeof(product)); + cam_strvis(revision, inq_data->revision, sizeof(inq_data->revision), + sizeof(revision)); + + /* Hack for SATA disks, no idea how to tell speed. */ + if (strcmp(vendor, "ATA") == 0) { + snprintf(disk->inqstring, sizeof(disk->inqstring), + "<%s %s> SATA", product, revision); + return; + } + + switch (SID_ANSI_REV(inq_data)) { + case SCSI_REV_CCS: + strcpy(rstr, "SCSI-CCS"); + break; + case 5: + strcpy(rstr, "SAS"); + break; + default: + snprintf(rstr, sizeof (rstr), "SCSI-%d", + SID_ANSI_REV(inq_data)); + break; + } + snprintf(disk->inqstring, sizeof(disk->inqstring), "<%s %s %s> %s", + vendor, product, revision, rstr); +} + +/* Much borrowed from scsiinquiry() in src/sbin/camcontrol/camcontrol.c. */ +static int +fetch_scsi_inquiry(struct cam_device *dev, struct mpt_standalone_disk *disk) +{ + struct scsi_inquiry_data *inq_buf; + union ccb *ccb; + int error; + + ccb = cam_getccb(dev); + if (ccb == NULL) + return (ENOMEM); + + /* Zero the rest of the ccb. */ + bzero(&(&ccb->ccb_h)[1], sizeof(struct ccb_scsiio) - + sizeof(struct ccb_hdr)); + + inq_buf = calloc(1, sizeof(*inq_buf)); + if (inq_buf == NULL) { + cam_freeccb(ccb); + return (ENOMEM); + } + scsi_inquiry(&ccb->csio, 1, NULL, MSG_SIMPLE_Q_TAG, (void *)inq_buf, + SHORT_INQUIRY_LENGTH, 0, 0, SSD_FULL_SIZE, 5000); + + /* Disable freezing the device queue */ + ccb->ccb_h.flags |= CAM_DEV_QFRZDIS; + + if (cam_send_ccb(dev, ccb) < 0) { + error = errno; + free(inq_buf); + cam_freeccb(ccb); + return (error); + } + + if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { + free(inq_buf); + cam_freeccb(ccb); + return (EIO); + } + + cam_freeccb(ccb); + format_scsi_inquiry(disk, inq_buf); + free(inq_buf); + return (0); +} + +int +mpt_fetch_disks(int fd, int *ndisks, struct mpt_standalone_disk **disksp) +{ + CONFIG_PAGE_IOC_2 *ioc2; + struct mpt_standalone_disk *disks; + struct bus_match_pattern *b; + struct periph_match_pattern *p; + struct periph_match_result *r; + struct cam_device *dev; + union ccb ccb; + size_t bufsize; + u_int i; + int count; + + if (xpt_open() < 0) + return (ENXIO); + + for (count = 100;; count+= 100) { + /* Try to fetch 'count' disks in one go. */ + bzero(&ccb, sizeof(ccb)); + + ccb.ccb_h.func_code = XPT_DEV_MATCH; + + bufsize = sizeof(struct dev_match_result) * (count + 2); + ccb.cdm.num_matches = 0; + ccb.cdm.match_buf_len = bufsize; + ccb.cdm.matches = calloc(1, bufsize); + + bufsize = sizeof(struct dev_match_pattern) * 2; + ccb.cdm.num_patterns = 2; + ccb.cdm.pattern_buf_len = bufsize; + ccb.cdm.patterns = calloc(1, bufsize); + + /* Match mptX bus 0. */ + ccb.cdm.patterns[0].type = DEV_MATCH_BUS; + b = &ccb.cdm.patterns[0].pattern.bus_pattern; + snprintf(b->dev_name, sizeof(b->dev_name), "mpt"); + b->unit_number = mpt_unit; + b->bus_id = 0; + b->flags = BUS_MATCH_NAME | BUS_MATCH_UNIT | BUS_MATCH_BUS_ID; + + /* Match any "da" peripherals. */ + ccb.cdm.patterns[1].type = DEV_MATCH_PERIPH; + p = &ccb.cdm.patterns[1].pattern.periph_pattern; + snprintf(p->periph_name, sizeof(p->periph_name), "da"); + p->flags = PERIPH_MATCH_NAME; + + if (ioctl(xptfd, CAMIOCOMMAND, &ccb) < 0) { + i = errno; + free(ccb.cdm.matches); + free(ccb.cdm.patterns); + return (i); + } + free(ccb.cdm.patterns); + + /* Check for CCB errors. */ + if ((ccb.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { + free(ccb.cdm.matches); + return (EIO); + } + + /* If we need a longer list, try again. */ + if (ccb.cdm.status == CAM_DEV_MATCH_MORE) { + free(ccb.cdm.matches); + continue; + } + + /* If we got an error, abort. */ + if (ccb.cdm.status != CAM_DEV_MATCH_LAST) { + free(ccb.cdm.matches); + return (EIO); + } + break; + } + + /* + * We should have N + 1 matches, 1 for the bus and 1 for each + * "da" device. + */ + if (ccb.cdm.num_matches < 1) { + warnx("mpt_fetch_disks didn't get any matches"); + free(ccb.cdm.matches); + return (EIO); + } + if (ccb.cdm.matches[0].type != DEV_MATCH_BUS) { + warnx("mpt_fetch_disks got wrong CAM matches"); + free(ccb.cdm.matches); + return (EIO); + } + for (i = 1; i < ccb.cdm.num_matches; i++) { + if (ccb.cdm.matches[i].type != DEV_MATCH_PERIPH) { + warnx("mpt_fetch_disks got wrong CAM matches"); + free(ccb.cdm.matches); + return (EIO); + } + } + + /* Shortcut if we don't have any "da" devices. */ + if (ccb.cdm.num_matches == 1) { + free(ccb.cdm.matches); + *ndisks = 0; + *disksp = NULL; + return (0); + } + + /* + * Some of the "da" peripherals may be for RAID volumes, so + * fetch the IOC 2 page (list of RAID volumes) so we can + * exclude them from the list. + */ + ioc2 = mpt_read_ioc_page(fd, 2, NULL); + disks = calloc(ccb.cdm.num_matches, sizeof(*disks)); + count = 0; + for (i = 1; i < ccb.cdm.num_matches; i++) { + r = &ccb.cdm.matches[i].result.periph_result; + if (periph_is_volume(ioc2, r)) + continue; + disks[count].bus = 0; + disks[count].target = r->target_id; + snprintf(disks[count].devname, sizeof(disks[count].devname), + "%s%d", r->periph_name, r->unit_number); + + dev = cam_open_device(disks[count].devname, O_RDWR); + if (dev != NULL) { + fetch_scsi_capacity(dev, &disks[count]); + fetch_scsi_inquiry(dev, &disks[count]); + cam_close_device(dev); + } + count++; + } + free(ccb.cdm.matches); + free(ioc2); + + *ndisks = count; + *disksp = disks; + return (0); +} + +/* + * Instruct the mpt(4) device to rescan its busses to find new devices + * such as disks whose RAID physdisk page was removed or volumes that + * were created. If id is -1, the entire bus is rescanned. + * Otherwise, only devices at the specified ID are rescanned. If bus + * is -1, then all busses are scanned instead of the specified bus. + * Note that currently, only bus 0 is supported. + */ +int +mpt_rescan_bus(int bus, int id) +{ + struct bus_match_pattern *b; + union ccb ccb; + path_id_t path_id; + size_t bufsize; + + /* mpt(4) only handles devices on bus 0. */ + if (bus != -1 && bus != 0) + return (EINVAL); + + if (xpt_open() < 0) + return (ENXIO); + + /* First, find the path id of bus 0 for this mpt controller. */ + bzero(&ccb, sizeof(ccb)); + + ccb.ccb_h.func_code = XPT_DEV_MATCH; + + bufsize = sizeof(struct dev_match_result) * 1; + ccb.cdm.num_matches = 0; + ccb.cdm.match_buf_len = bufsize; + ccb.cdm.matches = calloc(1, bufsize); + + bufsize = sizeof(struct dev_match_pattern) * 1; + ccb.cdm.num_patterns = 1; + ccb.cdm.pattern_buf_len = bufsize; + ccb.cdm.patterns = calloc(1, bufsize); + + /* Match mptX bus 0. */ + ccb.cdm.patterns[0].type = DEV_MATCH_BUS; + b = &ccb.cdm.patterns[0].pattern.bus_pattern; + snprintf(b->dev_name, sizeof(b->dev_name), "mpt"); + b->unit_number = mpt_unit; + b->bus_id = 0; + b->flags = BUS_MATCH_NAME | BUS_MATCH_UNIT | BUS_MATCH_BUS_ID; + + if (ioctl(xptfd, CAMIOCOMMAND, &ccb) < 0) { + free(ccb.cdm.matches); + free(ccb.cdm.patterns); + return (errno); + } + free(ccb.cdm.patterns); + + if (((ccb.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) || + (ccb.cdm.status != CAM_DEV_MATCH_LAST)) { + warnx("mpt_rescan_bus got CAM error %#x, CDM error %d\n", + ccb.ccb_h.status, ccb.cdm.status); + free(ccb.cdm.matches); + return (EIO); + } + + /* We should have exactly 1 match for the bus. */ + if (ccb.cdm.num_matches != 1 || + ccb.cdm.matches[0].type != DEV_MATCH_BUS) { + free(ccb.cdm.matches); + return (ENOENT); + } + path_id = ccb.cdm.matches[0].result.bus_result.path_id; + free(ccb.cdm.matches); + + /* Now perform the actual rescan. */ + ccb.ccb_h.path_id = path_id; + if (id == -1) { + ccb.ccb_h.func_code = XPT_SCAN_BUS; + ccb.ccb_h.target_id = CAM_TARGET_WILDCARD; + ccb.ccb_h.target_lun = CAM_LUN_WILDCARD; + ccb.ccb_h.timeout = 5000; + } else { + ccb.ccb_h.func_code = XPT_SCAN_LUN; + ccb.ccb_h.target_id = id; + ccb.ccb_h.target_lun = 0; + } + ccb.crcn.flags = CAM_FLAG_NONE; + + /* Run this at a low priority. */ + ccb.ccb_h.pinfo.priority = 5; + + if (ioctl(xptfd, CAMIOCOMMAND, &ccb) == -1) + return (errno); + + if ((ccb.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { + warnx("mpt_rescan_bus rescan got CAM error %#x\n", + ccb.ccb_h.status & CAM_STATUS_MASK); + return (EIO); + } + + return (0); +} diff --git a/usr.sbin/mptutil/mpt_cmd.c b/usr.sbin/mptutil/mpt_cmd.c new file mode 100644 index 0000000..2d6000c --- /dev/null +++ b/usr.sbin/mptutil/mpt_cmd.c @@ -0,0 +1,639 @@ +/*- + * Copyright (c) 2008 Yahoo!, Inc. + * All rights reserved. + * Written by: John Baldwin + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of any co-contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__RCSID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "mptutil.h" + +static const char *mpt_ioc_status_codes[] = { + "Success", /* 0x0000 */ + "Invalid function", + "Busy", + "Invalid scatter-gather list", + "Internal error", + "Reserved", + "Insufficient resources", + "Invalid field", + "Invalid state", /* 0x0008 */ + "Operation state not supported", + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, /* 0x0010 */ + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, /* 0x0018 */ + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + "Invalid configuration action", /* 0x0020 */ + "Invalid configuration type", + "Invalid configuration page", + "Invalid configuration data", + "No configuration defaults", + "Unable to commit configuration change", + NULL, + NULL, + NULL, /* 0x0028 */ + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, /* 0x0030 */ + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, /* 0x0038 */ + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + "Recovered SCSI error", /* 0x0040 */ + "Invalid SCSI bus", + "Invalid SCSI target ID", + "SCSI device not there", + "SCSI data overrun", + "SCSI data underrun", + "SCSI I/O error", + "SCSI protocol error", + "SCSI task terminated", /* 0x0048 */ + "SCSI residual mismatch", + "SCSI task management failed", + "SCSI I/O controller terminated", + "SCSI external controller terminated", + "EEDP guard error", + "EEDP reference tag error", + "EEDP application tag error", + NULL, /* 0x0050 */ + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, /* 0x0058 */ + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + "SCSI target priority I/O", /* 0x0060 */ + "Invalid SCSI target port", + "Invalid SCSI target I/O index", + "SCSI target aborted", + "No connection retryable", + "No connection", + "FC aborted", + "Invalid FC receive ID", + "FC did invalid", /* 0x0068 */ + "FC node logged out", + "Transfer count mismatch", + "STS data not set", + "FC exchange canceled", + "Data offset error", + "Too much write data", + "IU too short", + "ACK NAK timeout", /* 0x0070 */ + "NAK received", + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, /* 0x0078 */ + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + "LAN device not found", /* 0x0080 */ + "LAN device failure", + "LAN transmit error", + "LAN transmit aborted", + "LAN receive error", + "LAN receive aborted", + "LAN partial packet", + "LAN canceled", + NULL, /* 0x0088 */ + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + "SAS SMP request failed", /* 0x0090 */ + "SAS SMP data overrun", + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + "Inband aborted", /* 0x0098 */ + "No inband connection", + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + "Diagnostic released", /* 0x00A0 */ +}; + +static const char *mpt_raid_action_status_codes[] = { + "Success", + "Invalid action", + "Failure", + "Operation in progress", +}; + +const char * +mpt_ioc_status(U16 IOCStatus) +{ + static char buffer[16]; + + IOCStatus &= MPI_IOCSTATUS_MASK; + if (IOCStatus < sizeof(mpt_ioc_status_codes) / sizeof(char *) && + mpt_ioc_status_codes[IOCStatus] != NULL) + return (mpt_ioc_status_codes[IOCStatus]); + snprintf(buffer, sizeof(buffer), "Status: 0x%04x", IOCStatus); + return (buffer); +} + +const char * +mpt_raid_status(U16 ActionStatus) +{ + static char buffer[16]; + + if (ActionStatus < sizeof(mpt_raid_action_status_codes) / + sizeof(char *)) + return (mpt_raid_action_status_codes[ActionStatus]); + snprintf(buffer, sizeof(buffer), "Status: 0x%04x", ActionStatus); + return (buffer); +} + +const char * +mpt_raid_level(U8 VolumeType) +{ + static char buf[16]; + + switch (VolumeType) { + case MPI_RAID_VOL_TYPE_IS: + return ("RAID-0"); + case MPI_RAID_VOL_TYPE_IM: + return ("RAID-1"); + case MPI_RAID_VOL_TYPE_IME: + return ("RAID-1E"); + case MPI_RAID_VOL_TYPE_RAID_5: + return ("RAID-5"); + case MPI_RAID_VOL_TYPE_RAID_6: + return ("RAID-6"); + case MPI_RAID_VOL_TYPE_RAID_10: + return ("RAID-10"); + case MPI_RAID_VOL_TYPE_RAID_50: + return ("RAID-50"); + default: + sprintf(buf, "LVL 0x%02x", VolumeType); + return (buf); + } +} + +const char * +mpt_volume_name(U8 VolumeBus, U8 VolumeID) +{ + static struct mpt_query_disk info; + static char buf[16]; + + if (mpt_query_disk(VolumeBus, VolumeID, &info) != 0) { + /* + * We only print out the bus number if it is non-zero + * since mpt(4) only supports devices on bus zero + * anyway. + */ + if (VolumeBus == 0) + snprintf(buf, sizeof(buf), "%d", VolumeID); + else + snprintf(buf, sizeof(buf), "%d:%d", VolumeBus, + VolumeID); + return (buf); + } + return (info.devname); +} + +int +mpt_lookup_volume(int fd, const char *name, U8 *VolumeBus, U8 *VolumeID) +{ + CONFIG_PAGE_IOC_2 *ioc2; + CONFIG_PAGE_IOC_2_RAID_VOL *vol; + struct mpt_query_disk info; + char *cp; + long bus, id; + int i; + + /* + * Check for a raw [:] string. If the bus is not + * specified, assume bus 0. + */ + bus = strtol(name, &cp, 0); + if (*cp == ':') { + id = strtol(cp + 1, &cp, 0); + if (*cp == '\0') { + if (bus < 0 || bus > 0xff || id < 0 || id > 0xff) { + errno = EINVAL; + return (-1); + } + *VolumeBus = bus; + *VolumeID = id; + return (0); + } + } else if (*cp == '\0') { + if (bus < 0 || bus > 0xff) { + errno = EINVAL; + return (-1); + } + *VolumeBus = 0; + *VolumeID = bus; + return (0); + } + + ioc2 = mpt_read_ioc_page(fd, 2, NULL); + if (ioc2 == NULL) + return (-1); + + vol = ioc2->RaidVolume; + for (i = 0; i < ioc2->NumActiveVolumes; vol++, i++) { + if (mpt_query_disk(vol->VolumeBus, vol->VolumeID, &info) != 0) + continue; + if (strcmp(name, info.devname) == 0) { + *VolumeBus = vol->VolumeBus; + *VolumeID = vol->VolumeID; + free(ioc2); + return (0); + } + } + free(ioc2); + errno = EINVAL; + return (-1); +} + +int +mpt_read_config_page_header(int fd, U8 PageType, U8 PageNumber, U32 PageAddress, + CONFIG_PAGE_HEADER *header, U16 *IOCStatus) +{ + struct mpt_cfg_page_req req; + + if (IOCStatus != NULL) + *IOCStatus = MPI_IOCSTATUS_SUCCESS; + bzero(&req, sizeof(req)); + req.header.PageType = PageType; + req.header.PageNumber = PageNumber; + req.page_address = PageAddress; + if (ioctl(fd, MPTIO_READ_CFG_HEADER, &req) < 0) + return (-1); + if (!IOC_STATUS_SUCCESS(req.ioc_status)) { + if (IOCStatus != NULL) + *IOCStatus = req.ioc_status; + else + warnx("Reading config page header failed: %s", + mpt_ioc_status(req.ioc_status)); + errno = EIO; + return (-1); + } + *header = req.header; + return (0); +} + +void * +mpt_read_config_page(int fd, U8 PageType, U8 PageNumber, U32 PageAddress, + U16 *IOCStatus) +{ + struct mpt_cfg_page_req req; + void *buf; + int save_errno; + + if (IOCStatus != NULL) + *IOCStatus = MPI_IOCSTATUS_SUCCESS; + bzero(&req, sizeof(req)); + req.header.PageType = PageType; + req.header.PageNumber = PageNumber; + req.page_address = PageAddress; + if (ioctl(fd, MPTIO_READ_CFG_HEADER, &req) < 0) + return (NULL); + if (!IOC_STATUS_SUCCESS(req.ioc_status)) { + if (IOCStatus != NULL) + *IOCStatus = req.ioc_status; + else + warnx("Reading config page header failed: %s", + mpt_ioc_status(req.ioc_status)); + errno = EIO; + return (NULL); + } + req.len = req.header.PageLength * 4; + buf = malloc(req.len); + req.buf = buf; + bcopy(&req.header, buf, sizeof(req.header)); + if (ioctl(fd, MPTIO_READ_CFG_PAGE, &req) < 0) { + save_errno = errno; + free(buf); + errno = save_errno; + return (NULL); + } + if (!IOC_STATUS_SUCCESS(req.ioc_status)) { + if (IOCStatus != NULL) + *IOCStatus = req.ioc_status; + else + warnx("Reading config page failed: %s", + mpt_ioc_status(req.ioc_status)); + free(buf); + errno = EIO; + return (NULL); + } + return (buf); +} + +void * +mpt_read_extended_config_page(int fd, U8 ExtPageType, U8 PageVersion, + U8 PageNumber, U32 PageAddress, U16 *IOCStatus) +{ + struct mpt_ext_cfg_page_req req; + void *buf; + int save_errno; + + if (IOCStatus != NULL) + *IOCStatus = MPI_IOCSTATUS_SUCCESS; + bzero(&req, sizeof(req)); + req.header.PageVersion = PageVersion; + req.header.PageNumber = PageNumber; + req.header.ExtPageType = ExtPageType; + req.page_address = PageAddress; + if (ioctl(fd, MPTIO_READ_EXT_CFG_HEADER, &req) < 0) + return (NULL); + if (!IOC_STATUS_SUCCESS(req.ioc_status)) { + if (IOCStatus != NULL) + *IOCStatus = req.ioc_status; + else + warnx("Reading extended config page header failed: %s", + mpt_ioc_status(req.ioc_status)); + errno = EIO; + return (NULL); + } + req.len = req.header.ExtPageLength * 4; + buf = malloc(req.len); + req.buf = buf; + bcopy(&req.header, buf, sizeof(req.header)); + if (ioctl(fd, MPTIO_READ_EXT_CFG_PAGE, &req) < 0) { + save_errno = errno; + free(buf); + errno = save_errno; + return (NULL); + } + if (!IOC_STATUS_SUCCESS(req.ioc_status)) { + if (IOCStatus != NULL) + *IOCStatus = req.ioc_status; + else + warnx("Reading extended config page failed: %s", + mpt_ioc_status(req.ioc_status)); + free(buf); + errno = EIO; + return (NULL); + } + return (buf); +} + +int +mpt_write_config_page(int fd, void *buf, U16 *IOCStatus) +{ + CONFIG_PAGE_HEADER *hdr; + struct mpt_cfg_page_req req; + + if (IOCStatus != NULL) + *IOCStatus = MPI_IOCSTATUS_SUCCESS; + bzero(&req, sizeof(req)); + req.buf = buf; + hdr = buf; + req.len = hdr->PageLength * 4; + if (ioctl(fd, MPTIO_WRITE_CFG_PAGE, &req) < 0) + return (-1); + if (!IOC_STATUS_SUCCESS(req.ioc_status)) { + if (IOCStatus != NULL) { + *IOCStatus = req.ioc_status; + return (0); + } + warnx("Writing config page failed: %s", + mpt_ioc_status(req.ioc_status)); + errno = EIO; + return (-1); + } + return (0); +} + +int +mpt_raid_action(int fd, U8 Action, U8 VolumeBus, U8 VolumeID, U8 PhysDiskNum, + U32 ActionDataWord, void *buf, int len, RAID_VOL0_STATUS *VolumeStatus, + U32 *ActionData, int datalen, U16 *IOCStatus, U16 *ActionStatus, int write) +{ + struct mpt_raid_action raid_act; + + if (IOCStatus != NULL) + *IOCStatus = MPI_IOCSTATUS_SUCCESS; + if (datalen < 0 || (unsigned)datalen > sizeof(raid_act.action_data)) { + errno = EINVAL; + return (-1); + } + bzero(&raid_act, sizeof(raid_act)); + raid_act.action = Action; + raid_act.volume_bus = VolumeBus; + raid_act.volume_id = VolumeID; + raid_act.phys_disk_num = PhysDiskNum; + raid_act.action_data_word = ActionDataWord; + if (buf != NULL && len != 0) { + raid_act.buf = buf; + raid_act.len = len; + raid_act.write = write; + } + + if (ioctl(fd, MPTIO_RAID_ACTION, &raid_act) < 0) + return (-1); + + if (!IOC_STATUS_SUCCESS(raid_act.ioc_status)) { + if (IOCStatus != NULL) { + *IOCStatus = raid_act.ioc_status; + return (0); + } + warnx("RAID action failed: %s", + mpt_ioc_status(raid_act.ioc_status)); + errno = EIO; + return (-1); + } + + if (ActionStatus != NULL) + *ActionStatus = raid_act.action_status; + if (raid_act.action_status != MPI_RAID_ACTION_ASTATUS_SUCCESS) { + if (ActionStatus != NULL) + return (0); + warnx("RAID action failed: %s", + mpt_raid_status(raid_act.action_status)); + errno = EIO; + return (-1); + } + + if (VolumeStatus != NULL) + *((U32 *)VolumeStatus) = raid_act.volume_status; + if (ActionData != NULL) + bcopy(raid_act.action_data, ActionData, datalen); + return (0); +} + +int +mpt_open(int unit) +{ + char path[MAXPATHLEN]; + + snprintf(path, sizeof(path), "/dev/mpt%d", unit); + return (open(path, O_RDWR)); +} + +int +mpt_table_handler(struct mptutil_command **start, struct mptutil_command **end, + int ac, char **av) +{ + struct mptutil_command **cmd; + + if (ac < 2) { + warnx("The %s command requires a sub-command.", av[0]); + return (EINVAL); + } + for (cmd = start; cmd < end; cmd++) { + if (strcmp((*cmd)->name, av[1]) == 0) + return ((*cmd)->handler(ac - 1, av + 1)); + } + + warnx("%s is not a valid sub-command of %s.", av[1], av[0]); + return (ENOENT); +} + +#ifdef DEBUG +void +hexdump(const void *ptr, int length, const char *hdr, int flags) +{ + int i, j, k; + int cols; + const unsigned char *cp; + char delim; + + if ((flags & HD_DELIM_MASK) != 0) + delim = (flags & HD_DELIM_MASK) >> 8; + else + delim = ' '; + + if ((flags & HD_COLUMN_MASK) != 0) + cols = flags & HD_COLUMN_MASK; + else + cols = 16; + + cp = ptr; + for (i = 0; i < length; i+= cols) { + if (hdr != NULL) + printf("%s", hdr); + + if ((flags & HD_OMIT_COUNT) == 0) + printf("%04x ", i); + + if ((flags & HD_OMIT_HEX) == 0) { + for (j = 0; j < cols; j++) { + k = i + j; + if (k < length) + printf("%c%02x", delim, cp[k]); + else + printf(" "); + } + } + + if ((flags & HD_OMIT_CHARS) == 0) { + printf(" |"); + for (j = 0; j < cols; j++) { + k = i + j; + if (k >= length) + printf(" "); + else if (cp[k] >= ' ' && cp[k] <= '~') + printf("%c", cp[k]); + else + printf("."); + } + printf("|"); + } + printf("\n"); + } +} +#endif diff --git a/usr.sbin/mptutil/mpt_config.c b/usr.sbin/mptutil/mpt_config.c new file mode 100644 index 0000000..3874df3 --- /dev/null +++ b/usr.sbin/mptutil/mpt_config.c @@ -0,0 +1,1160 @@ +/*- + * Copyright (c) 2008 Yahoo!, Inc. + * All rights reserved. + * Written by: John Baldwin + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of any co-contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__RCSID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#ifdef DEBUG +#include +#endif +#include +#include +#include +#include +#include "mptutil.h" + +#ifdef DEBUG +static void dump_config(CONFIG_PAGE_RAID_VOL_0 *vol); +#endif + +#define powerof2(x) ((((x)-1)&(x))==0) + +static long +dehumanize(const char *value) +{ + char *vtp; + long iv; + + if (value == NULL) + return (0); + iv = strtoq(value, &vtp, 0); + if (vtp == value || (vtp[0] != '\0' && vtp[1] != '\0')) { + return (0); + } + switch (vtp[0]) { + case 't': case 'T': + iv *= 1024; + case 'g': case 'G': + iv *= 1024; + case 'm': case 'M': + iv *= 1024; + case 'k': case 'K': + iv *= 1024; + case '\0': + break; + default: + return (0); + } + return (iv); +} + +/* + * Lock the volume by opening its /dev device read/write. This will + * only work if nothing else has it opened (including mounts). We + * leak the fd on purpose since this application is not long-running. + */ +int +mpt_lock_volume(U8 VolumeBus, U8 VolumeID) +{ + char path[MAXPATHLEN]; + struct mpt_query_disk qd; + int error, vfd; + + error = mpt_query_disk(VolumeBus, VolumeID, &qd); + if (error == ENOENT) + /* + * This means there isn't a CAM device associated with + * the volume, and thus it is already implicitly + * locked, so just return. + */ + return (0); + if (error) { + errno = error; + warn("Unable to lookup volume device name"); + return (-1); + } + snprintf(path, sizeof(path), "%s%s", _PATH_DEV, qd.devname); + vfd = open(path, O_RDWR); + if (vfd < 0) { + warn("Unable to lock volume %s", qd.devname); + return (-1); + } + return (0); +} + +static int +mpt_lock_physdisk(struct mpt_standalone_disk *disk) +{ + char path[MAXPATHLEN]; + int dfd; + + snprintf(path, sizeof(path), "%s%s", _PATH_DEV, disk->devname); + dfd = open(path, O_RDWR); + if (dfd < 0) { + warn("Unable to lock disk %s", disk->devname); + return (-1); + } + return (0); +} + +static int +mpt_lookup_standalone_disk(const char *name, struct mpt_standalone_disk *disks, + int ndisks, int *index) +{ + char *cp; + long bus, id; + int i; + + /* Check for a raw : string. */ + bus = strtol(name, &cp, 0); + if (*cp == ':') { + id = strtol(cp + 1, &cp, 0); + if (*cp == '\0') { + if (bus < 0 || bus > 0xff || id < 0 || id > 0xff) { + errno = EINVAL; + return (-1); + } + for (i = 0; i < ndisks; i++) { + if (disks[i].bus == (U8)bus && + disks[i].target == (U8)id) { + *index = i; + return (0); + } + } + errno = ENOENT; + return (-1); + } + } + + if (name[0] == 'd' && name[1] == 'a') { + for (i = 0; i < ndisks; i++) { + if (strcmp(name, disks[i].devname) == 0) { + *index = i; + return (0); + } + } + errno = ENOENT; + return (-1); + } + + errno = EINVAL; + return (-1); +} + +/* + * Mark a standalone disk as being a physical disk. + */ +static int +mpt_create_physdisk(int fd, struct mpt_standalone_disk *disk, U8 *PhysDiskNum) +{ + CONFIG_PAGE_HEADER header; + CONFIG_PAGE_RAID_PHYS_DISK_0 *config_page; + U32 ActionData; + + if (mpt_read_config_page_header(fd, MPI_CONFIG_PAGETYPE_RAID_PHYSDISK, + 0, 0, &header, NULL) < 0) + return (-1); + if (header.PageVersion > MPI_RAIDPHYSDISKPAGE0_PAGEVERSION) { + warnx("Unsupported RAID physdisk page 0 version %d", + header.PageVersion); + errno = EOPNOTSUPP; + return (-1); + } + config_page = calloc(1, sizeof(CONFIG_PAGE_RAID_PHYS_DISK_0)); + config_page->Header.PageType = MPI_CONFIG_PAGETYPE_RAID_PHYSDISK; + config_page->Header.PageNumber = 0; + config_page->Header.PageLength = sizeof(CONFIG_PAGE_RAID_PHYS_DISK_0) / + 4; + config_page->PhysDiskIOC = 0; /* XXX */ + config_page->PhysDiskBus = disk->bus; + config_page->PhysDiskID = disk->target; + + /* XXX: Enclosure info for PhysDiskSettings? */ + if (mpt_raid_action(fd, MPI_RAID_ACTION_CREATE_PHYSDISK, 0, 0, 0, 0, + config_page, sizeof(CONFIG_PAGE_RAID_PHYS_DISK_0), NULL, + &ActionData, sizeof(ActionData), NULL, NULL, 1) < 0) + return (-1); + *PhysDiskNum = ActionData & 0xff; + return (0); +} + +static int +mpt_delete_physdisk(int fd, U8 PhysDiskNum) +{ + + return (mpt_raid_action(fd, MPI_RAID_ACTION_DELETE_PHYSDISK, 0, 0, + PhysDiskNum, 0, NULL, 0, NULL, NULL, 0, NULL, NULL, 0)); +} + +/* + * MPT's firmware does not have a clear command. Instead, we + * implement it by deleting each array and disk by hand. + */ +static int +clear_config(int ac, char **av) +{ + CONFIG_PAGE_IOC_2 *ioc2; + CONFIG_PAGE_IOC_2_RAID_VOL *vol; + CONFIG_PAGE_IOC_3 *ioc3; + IOC_3_PHYS_DISK *disk; + CONFIG_PAGE_IOC_5 *ioc5; + IOC_5_HOT_SPARE *spare; + int ch, fd, i; + + fd = mpt_open(mpt_unit); + if (fd < 0) { + warn("mpt_open"); + return (errno); + } + + ioc2 = mpt_read_ioc_page(fd, 2, NULL); + if (ioc2 == NULL) { + warn("Failed to fetch volume list"); + return (errno); + } + + /* Lock all the volumes first. */ + vol = ioc2->RaidVolume; + for (i = 0; i < ioc2->NumActiveVolumes; vol++, i++) { + if (mpt_lock_volume(vol->VolumeBus, vol->VolumeID) < 0) { + warnx("Volume %s is busy and cannot be deleted", + mpt_volume_name(vol->VolumeBus, vol->VolumeID)); + return (EBUSY); + } + } + + printf( + "Are you sure you wish to clear the configuration on mpt%u? [y/N] ", + mpt_unit); + ch = getchar(); + if (ch != 'y' && ch != 'Y') { + printf("\nAborting\n"); + return (0); + } + + /* Delete all the volumes. */ + vol = ioc2->RaidVolume; + for (i = 0; i < ioc2->NumActiveVolumes; vol++, i++) + if (mpt_raid_action(fd, MPI_RAID_ACTION_DELETE_VOLUME, + vol->VolumeBus, vol->VolumeID, 0, + MPI_RAID_ACTION_ADATA_DEL_PHYS_DISKS | + MPI_RAID_ACTION_ADATA_ZERO_LBA0, NULL, 0, NULL, NULL, 0, + NULL, NULL, 0) < 0) + warn("Failed to delete volume %s", + mpt_volume_name(vol->VolumeBus, vol->VolumeID)); + free(ioc2); + + /* Delete all the spares. */ + ioc5 = mpt_read_ioc_page(fd, 5, NULL); + if (ioc5 == NULL) + warn("Failed to fetch spare list"); + else { + spare = ioc5->HotSpare; + for (i = 0; i < ioc5->NumHotSpares; spare++, i++) + if (mpt_delete_physdisk(fd, spare->PhysDiskNum) < 0) + warn("Failed to delete physical disk %d", + spare->PhysDiskNum); + free(ioc5); + } + + /* Delete any RAID physdisks that may be left. */ + ioc3 = mpt_read_ioc_page(fd, 3, NULL); + if (ioc3 == NULL) + warn("Failed to fetch drive list"); + else { + disk = ioc3->PhysDisk; + for (i = 0; i < ioc3->NumPhysDisks; disk++, i++) + if (mpt_delete_physdisk(fd, disk->PhysDiskNum) < 0) + warn("Failed to delete physical disk %d", + disk->PhysDiskNum); + free(ioc3); + } + + printf("mpt%d: Configuration cleared\n", mpt_unit); + mpt_rescan_bus(-1, -1); + close(fd); + + return (0); +} +MPT_COMMAND(top, clear, clear_config); + +#define RT_RAID0 0 +#define RT_RAID1 1 +#define RT_RAID1E 2 + +static struct raid_type_entry { + const char *name; + int raid_type; +} raid_type_table[] = { + { "raid0", RT_RAID0 }, + { "raid-0", RT_RAID0 }, + { "raid1", RT_RAID1 }, + { "raid-1", RT_RAID1 }, + { "mirror", RT_RAID1 }, + { "raid1e", RT_RAID1E }, + { "raid-1e", RT_RAID1E }, + { NULL, 0 }, +}; + +struct config_id_state { + struct mpt_standalone_disk *sdisks; + struct mpt_drive_list *list; + CONFIG_PAGE_IOC_2 *ioc2; + U8 target_id; + int nsdisks; +}; + +struct drive_info { + CONFIG_PAGE_RAID_PHYS_DISK_0 *info; + struct mpt_standalone_disk *sdisk; +}; + +struct volume_info { + int drive_count; + struct drive_info *drives; +}; + +/* Parse a comma-separated list of drives for a volume. */ +static int +parse_volume(int fd, int raid_type, struct config_id_state *state, + char *volume_str, struct volume_info *info) +{ + struct drive_info *dinfo; + U8 PhysDiskNum; + char *cp; + int count, error, i; + + cp = volume_str; + for (count = 0; cp != NULL; count++) { + cp = strchr(cp, ','); + if (cp != NULL) { + cp++; + if (*cp == ',') { + warnx("Invalid drive list '%s'", volume_str); + return (EINVAL); + } + } + } + + /* Validate the number of drives for this volume. */ + switch (raid_type) { + case RT_RAID0: + if (count < 2) { + warnx("RAID0 requires at least 2 drives in each " + "array"); + return (EINVAL); + } + break; + case RT_RAID1: + if (count != 2) { + warnx("RAID1 requires exactly 2 drives in each " + "array"); + return (EINVAL); + } + break; + case RT_RAID1E: + if (count < 3) { + warnx("RAID1E requires at least 3 drives in each " + "array"); + return (EINVAL); + } + break; + } + + /* Validate each drive. */ + info->drives = calloc(count, sizeof(struct drive_info)); + info->drive_count = count; + for (dinfo = info->drives; (cp = strsep(&volume_str, ",")) != NULL; + dinfo++) { + /* If this drive is already a RAID phys just fetch the info. */ + error = mpt_lookup_drive(state->list, cp, &PhysDiskNum); + if (error == 0) { + dinfo->info = mpt_pd_info(fd, PhysDiskNum, NULL); + if (dinfo->info == NULL) + return (errno); + continue; + } + + /* See if it is a standalone disk. */ + if (mpt_lookup_standalone_disk(cp, state->sdisks, + state->nsdisks, &i) < 0) { + warn("Unable to lookup drive %s", cp); + return (errno); + } + dinfo->sdisk = &state->sdisks[i]; + + /* Lock the disk, we will create phys disk pages later. */ + if (mpt_lock_physdisk(dinfo->sdisk) < 0) + return (errno); + } + + return (0); +} + +/* + * Add RAID physdisk pages for any standalone disks that a volume is + * going to use. + */ +static int +add_drives(int fd, struct volume_info *info, int verbose) +{ + struct drive_info *dinfo; + U8 PhysDiskNum; + int i; + + for (i = 0, dinfo = info->drives; i < info->drive_count; + i++, dinfo++) { + if (dinfo->info == NULL) { + if (mpt_create_physdisk(fd, dinfo->sdisk, + &PhysDiskNum) < 0) { + warn( + "Failed to create physical disk page for %s", + dinfo->sdisk->devname); + return (errno); + } + if (verbose) + printf("Added drive %s with PhysDiskNum %u\n", + dinfo->sdisk->devname, PhysDiskNum); + + dinfo->info = mpt_pd_info(fd, PhysDiskNum, NULL); + if (dinfo->info == NULL) + return (errno); + } + } + return (0); +} + +/* + * Find the next free target ID assuming that 'target_id' is the last + * one used. 'target_id' should be 0xff for the initial test. + */ +static U8 +find_next_volume(struct config_id_state *state) +{ + CONFIG_PAGE_IOC_2_RAID_VOL *vol; + int i; + +restart: + /* Assume the current one is used. */ + state->target_id++; + + /* Search drives first. */ + for (i = 0; i < state->nsdisks; i++) + if (state->sdisks[i].target == state->target_id) + goto restart; + for (i = 0; i < state->list->ndrives; i++) + if (state->list->drives[i]->PhysDiskID == state->target_id) + goto restart; + + /* Seach volumes second. */ + vol = state->ioc2->RaidVolume; + for (i = 0; i < state->ioc2->NumActiveVolumes; vol++, i++) + if (vol->VolumeID == state->target_id) + goto restart; + + return (state->target_id); +} + +/* Create a volume and populate it with drives. */ +static CONFIG_PAGE_RAID_VOL_0 * +build_volume(int fd, struct volume_info *info, int raid_type, long stripe_size, + struct config_id_state *state, int verbose) +{ + CONFIG_PAGE_HEADER header; + CONFIG_PAGE_RAID_VOL_0 *vol; + RAID_VOL0_PHYS_DISK *rdisk; + struct drive_info *dinfo; + U32 MinLBA; + uint64_t MaxLBA; + size_t page_size; + int i; + + if (mpt_read_config_page_header(fd, MPI_CONFIG_PAGETYPE_RAID_VOLUME, + 0, 0, &header, NULL) < 0) + return (NULL); + if (header.PageVersion > MPI_RAIDVOLPAGE0_PAGEVERSION) { + warnx("Unsupported RAID volume page 0 version %d", + header.PageVersion); + errno = EOPNOTSUPP; + return (NULL); + } + page_size = sizeof(CONFIG_PAGE_RAID_VOL_0) + + sizeof(RAID_VOL0_PHYS_DISK) * (info->drive_count - 1); + vol = calloc(1, page_size); + + /* Header */ + vol->Header.PageType = MPI_CONFIG_PAGETYPE_RAID_VOLUME; + vol->Header.PageNumber = 0; + vol->Header.PageLength = page_size / 4; + + /* Properties */ + vol->VolumeID = find_next_volume(state); + vol->VolumeBus = 0; + vol->VolumeIOC = 0; /* XXX */ + vol->VolumeStatus.Flags = MPI_RAIDVOL0_STATUS_FLAG_ENABLED; + vol->VolumeStatus.State = MPI_RAIDVOL0_STATUS_STATE_OPTIMAL; + vol->VolumeSettings.Settings = MPI_RAIDVOL0_SETTING_USE_DEFAULTS; + vol->VolumeSettings.HotSparePool = MPI_RAID_HOT_SPARE_POOL_0; + vol->NumPhysDisks = info->drive_count; + + /* Find the smallest drive. */ + MinLBA = info->drives[0].info->MaxLBA; + for (i = 1; i < info->drive_count; i++) + if (info->drives[i].info->MaxLBA < MinLBA) + MinLBA = info->drives[i].info->MaxLBA; + + /* + * Now chop off 512MB at the end to leave room for the + * metadata. The controller might only use 64MB, but we just + * chop off the max to be simple. + */ + MinLBA -= (512 * 1024 * 1024) / 512; + + switch (raid_type) { + case RT_RAID0: + vol->VolumeType = MPI_RAID_VOL_TYPE_IS; + vol->StripeSize = stripe_size / 512; + MaxLBA = MinLBA * info->drive_count; + break; + case RT_RAID1: + vol->VolumeType = MPI_RAID_VOL_TYPE_IM; + MaxLBA = MinLBA * (info->drive_count / 2); + break; + case RT_RAID1E: + vol->VolumeType = MPI_RAID_VOL_TYPE_IME; + vol->StripeSize = stripe_size / 512; + MaxLBA = MinLBA * info->drive_count / 2; + break; + default: + /* Pacify gcc. */ + abort(); + } + + /* + * If the controller doesn't support 64-bit addressing and the + * new volume is larger than 2^32 blocks, warn the user and + * truncate the volume. + */ + if (MaxLBA >> 32 != 0 && + !(state->ioc2->CapabilitiesFlags & + MPI_IOCPAGE2_CAP_FLAGS_RAID_64_BIT_ADDRESSING)) { + warnx( + "Controller does not support volumes > 2TB, truncating volume."); + MaxLBA = 0xffffffff; + } + vol->MaxLBA = MaxLBA; + vol->MaxLBAHigh = MaxLBA >> 32; + + /* Populate drives. */ + for (i = 0, dinfo = info->drives, rdisk = vol->PhysDisk; + i < info->drive_count; i++, dinfo++, rdisk++) { + if (verbose) + printf("Adding drive %u (%u:%u) to volume %u:%u\n", + dinfo->info->PhysDiskNum, dinfo->info->PhysDiskBus, + dinfo->info->PhysDiskID, vol->VolumeBus, + vol->VolumeID); + if (raid_type == RT_RAID1) { + if (i == 0) + rdisk->PhysDiskMap = + MPI_RAIDVOL0_PHYSDISK_PRIMARY; + else + rdisk->PhysDiskMap = + MPI_RAIDVOL0_PHYSDISK_SECONDARY; + } else + rdisk->PhysDiskMap = i; + rdisk->PhysDiskNum = dinfo->info->PhysDiskNum; + } + + return (vol); +} + +static int +create_volume(int ac, char **av) +{ + CONFIG_PAGE_RAID_VOL_0 *vol; + struct config_id_state state; + struct volume_info *info; + int ch, error, fd, i, raid_type, verbose, quick; + long stripe_size; +#ifdef DEBUG + int dump; +#endif + + if (ac < 2) { + warnx("create: volume type required"); + return (EINVAL); + } + + fd = mpt_open(mpt_unit); + if (fd < 0) { + warn("mpt_open"); + return (errno); + } + + /* Lookup the RAID type first. */ + raid_type = -1; + for (i = 0; raid_type_table[i].name != NULL; i++) + if (strcasecmp(raid_type_table[i].name, av[1]) == 0) { + raid_type = raid_type_table[i].raid_type; + break; + } + + if (raid_type == -1) { + warnx("Unknown or unsupported volume type %s", av[1]); + return (EINVAL); + } + + /* Parse any options. */ + optind = 2; +#ifdef DEBUG + dump = 0; +#endif + quick = 0; + verbose = 0; + stripe_size = 64 * 1024; + + while ((ch = getopt(ac, av, "dqs:v")) != -1) { + switch (ch) { +#ifdef DEBUG + case 'd': + dump = 1; + break; +#endif + case 'q': + quick = 1; + break; + case 's': + stripe_size = dehumanize(optarg); + if ((stripe_size < 512) || (!powerof2(stripe_size))) { + warnx("Invalid stripe size %s", optarg); + return (EINVAL); + } + break; + case 'v': + verbose = 1; + break; + case '?': + default: + return (EINVAL); + } + } + ac -= optind; + av += optind; + + /* Fetch existing config data. */ + state.ioc2 = mpt_read_ioc_page(fd, 2, NULL); + if (state.ioc2 == NULL) { + warn("Failed to read volume list"); + return (errno); + } + state.list = mpt_pd_list(fd); + if (state.list == NULL) + return (errno); + error = mpt_fetch_disks(fd, &state.nsdisks, &state.sdisks); + if (error) { + warn("Failed to fetch standalone disk list"); + return (error); + } + state.target_id = 0xff; + + /* Parse the drive list. */ + if (ac != 1) { + warnx("Exactly one drive list is required"); + return (EINVAL); + } + info = calloc(1, sizeof(*info)); + error = parse_volume(fd, raid_type, &state, av[0], info); + if (error) + return (error); + + /* Create RAID physdisk pages for standalone disks. */ + error = add_drives(fd, info, verbose); + if (error) + return (error); + + /* Build the volume. */ + vol = build_volume(fd, info, raid_type, stripe_size, &state, verbose); + +#ifdef DEBUG + if (dump) { + dump_config(vol); + goto skip; + } +#endif + + /* Send the new volume to the controller. */ + if (mpt_raid_action(fd, MPI_RAID_ACTION_CREATE_VOLUME, vol->VolumeBus, + vol->VolumeID, 0, quick ? MPI_RAID_ACTION_ADATA_DO_NOT_SYNC : 0, + vol, vol->Header.PageLength * 4, NULL, NULL, 0, NULL, NULL, 1) < + 0) { + warn("Failed to add volume"); + return (errno); + } + +#ifdef DEBUG +skip: +#endif + mpt_rescan_bus(vol->VolumeBus, vol->VolumeID); + + /* Clean up. */ + free(vol); + free(info); + free(state.sdisks); + mpt_free_pd_list(state.list); + free(state.ioc2); + close(fd); + + return (0); +} +MPT_COMMAND(top, create, create_volume); + +static int +delete_volume(int ac, char **av) +{ + U8 VolumeBus, VolumeID; + int fd; + + if (ac != 2) { + warnx("delete: volume required"); + return (EINVAL); + } + + fd = mpt_open(mpt_unit); + if (fd < 0) { + warn("mpt_open"); + return (errno); + } + + if (mpt_lookup_volume(fd, av[1], &VolumeBus, &VolumeID) < 0) { + warn("Invalid volume %s", av[1]); + return (errno); + } + + if (mpt_lock_volume(VolumeBus, VolumeID) < 0) + return (errno); + + if (mpt_raid_action(fd, MPI_RAID_ACTION_DELETE_VOLUME, VolumeBus, + VolumeID, 0, MPI_RAID_ACTION_ADATA_DEL_PHYS_DISKS | + MPI_RAID_ACTION_ADATA_ZERO_LBA0, NULL, 0, NULL, NULL, 0, NULL, + NULL, 0) < 0) { + warn("Failed to delete volume"); + return (errno); + } + + mpt_rescan_bus(-1, -1); + close(fd); + + return (0); +} +MPT_COMMAND(top, delete, delete_volume); + +static int +find_volume_spare_pool(int fd, const char *name, int *pool) +{ + CONFIG_PAGE_RAID_VOL_0 *info; + CONFIG_PAGE_IOC_2 *ioc2; + CONFIG_PAGE_IOC_2_RAID_VOL *vol; + U8 VolumeBus, VolumeID; + int i, j, new_pool, pool_count[7]; + + if (mpt_lookup_volume(fd, name, &VolumeBus, &VolumeID) < 0) { + warn("Invalid volume %s", name); + return (-1); + } + + info = mpt_vol_info(fd, VolumeBus, VolumeID, NULL); + if (info == NULL) + return (-1); + + /* + * Check for an existing pool other than pool 0 (used for + * global spares). + */ + if ((info->VolumeSettings.HotSparePool & ~MPI_RAID_HOT_SPARE_POOL_0) != + 0) { + *pool = 1 << (ffs(info->VolumeSettings.HotSparePool & + ~MPI_RAID_HOT_SPARE_POOL_0) - 1); + return (0); + } + free(info); + + /* + * Try to find a free pool. First, figure out which pools are + * in use. + */ + ioc2 = mpt_read_ioc_page(fd, 2, NULL); + if (ioc2 == NULL) { + warn("Failed to fetch volume list"); + return (-1); + } + bzero(pool_count, sizeof(pool_count)); + vol = ioc2->RaidVolume; + for (i = 0; i < ioc2->NumActiveVolumes; vol++, i++) { + info = mpt_vol_info(fd, vol->VolumeBus, vol->VolumeID, NULL); + if (info == NULL) + return (-1); + for (j = 0; j < 7; j++) + if (info->VolumeSettings.HotSparePool & (1 << (j + 1))) + pool_count[j]++; + free(info); + } + free(ioc2); + + /* Find the pool with the lowest use count. */ + new_pool = 0; + for (i = 1; i < 7; i++) + if (pool_count[i] < pool_count[new_pool]) + new_pool = i; + new_pool++; + + /* Add this pool to the volume. */ + info = mpt_vol_info(fd, VolumeBus, VolumeID, NULL); + if (info == NULL) + return (-1); + info->VolumeSettings.HotSparePool |= (1 << new_pool); + if (mpt_raid_action(fd, MPI_RAID_ACTION_CHANGE_VOLUME_SETTINGS, + VolumeBus, VolumeID, 0, *(U32 *)&info->VolumeSettings, NULL, 0, + NULL, NULL, 0, NULL, NULL, 0) < 0) { + warnx("Failed to add spare pool %d to %s", new_pool, + mpt_volume_name(VolumeBus, VolumeID)); + return (-1); + } + free(info); + + *pool = (1 << new_pool); + return (0); +} + +static int +add_spare(int ac, char **av) +{ + CONFIG_PAGE_RAID_PHYS_DISK_0 *info; + struct mpt_standalone_disk *sdisks; + struct mpt_drive_list *list; + U8 PhysDiskNum; + int error, fd, i, nsdisks, pool; + + if (ac < 2) { + warnx("add spare: drive required"); + return (EINVAL); + } + if (ac > 3) { + warnx("add spare: extra arguments"); + return (EINVAL); + } + + fd = mpt_open(mpt_unit); + if (fd < 0) { + warn("mpt_open"); + return (errno); + } + + if (ac == 3) { + if (find_volume_spare_pool(fd, av[2], &pool) < 0) + return (errno); + } else + pool = MPI_RAID_HOT_SPARE_POOL_0; + + list = mpt_pd_list(fd); + if (list == NULL) + return (errno); + + error = mpt_lookup_drive(list, av[1], &PhysDiskNum); + if (error) { + error = mpt_fetch_disks(fd, &nsdisks, &sdisks); + if (error != 0) { + warn("Failed to fetch standalone disk list"); + return (error); + } + + if (mpt_lookup_standalone_disk(av[1], sdisks, nsdisks, &i) < + 0) { + warn("Unable to lookup drive %s", av[1]); + return (errno); + } + + if (mpt_lock_physdisk(&sdisks[i]) < 0) + return (errno); + + if (mpt_create_physdisk(fd, &sdisks[i], &PhysDiskNum) < 0) { + warn("Failed to create physical disk page"); + return (errno); + } + free(sdisks); + } + mpt_free_pd_list(list); + + info = mpt_pd_info(fd, PhysDiskNum, NULL); + if (info == NULL) { + warn("Failed to fetch drive info"); + return (errno); + } + + info->PhysDiskSettings.HotSparePool = pool; + error = mpt_raid_action(fd, MPI_RAID_ACTION_CHANGE_PHYSDISK_SETTINGS, 0, + 0, PhysDiskNum, *(U32 *)&info->PhysDiskSettings, NULL, 0, NULL, + NULL, 0, NULL, NULL, 0); + if (error) { + warn("Failed to assign spare"); + return (errno); + } + + free(info); + close(fd); + + return (0); +} +MPT_COMMAND(top, add, add_spare); + +static int +remove_spare(int ac, char **av) +{ + CONFIG_PAGE_RAID_PHYS_DISK_0 *info; + struct mpt_drive_list *list; + U8 PhysDiskNum; + int error, fd; + + if (ac != 2) { + warnx("remove spare: drive required"); + return (EINVAL); + } + + fd = mpt_open(mpt_unit); + if (fd < 0) { + warn("mpt_open"); + return (errno); + } + + list = mpt_pd_list(fd); + if (list == NULL) + return (errno); + + error = mpt_lookup_drive(list, av[1], &PhysDiskNum); + if (error) { + warn("Failed to find drive %s", av[1]); + return (error); + } + mpt_free_pd_list(list); + + + info = mpt_pd_info(fd, PhysDiskNum, NULL); + if (info == NULL) { + warn("Failed to fetch drive info"); + return (errno); + } + + if (info->PhysDiskSettings.HotSparePool == 0) { + warnx("Drive %u is not a hot spare", PhysDiskNum); + return (EINVAL); + } + + if (mpt_delete_physdisk(fd, PhysDiskNum) < 0) { + warn("Failed to delete physical disk page"); + return (errno); + } + + mpt_rescan_bus(info->PhysDiskBus, info->PhysDiskID); + free(info); + close(fd); + + return (0); +} +MPT_COMMAND(top, remove, remove_spare); + +#ifdef DEBUG +MPT_TABLE(top, pd); + +static int +pd_create(int ac, char **av) +{ + struct mpt_standalone_disk *disks; + int error, fd, i, ndisks; + U8 PhysDiskNum; + + if (ac != 2) { + warnx("pd create: drive required"); + return (EINVAL); + } + + fd = mpt_open(mpt_unit); + if (fd < 0) { + warn("mpt_open"); + return (errno); + } + + error = mpt_fetch_disks(fd, &ndisks, &disks); + if (error != 0) { + warn("Failed to fetch standalone disk list"); + return (error); + } + + if (mpt_lookup_standalone_disk(av[1], disks, ndisks, &i) < 0) { + warn("Unable to lookup drive"); + return (errno); + } + + if (mpt_lock_physdisk(&disks[i]) < 0) + return (errno); + + if (mpt_create_physdisk(fd, &disks[i], &PhysDiskNum) < 0) { + warn("Failed to create physical disk page"); + return (errno); + } + free(disks); + + printf("Added drive %s with PhysDiskNum %u\n", av[1], PhysDiskNum); + + close(fd); + + return (0); +} +MPT_COMMAND(pd, create, pd_create); + +static int +pd_delete(int ac, char **av) +{ + CONFIG_PAGE_RAID_PHYS_DISK_0 *info; + struct mpt_drive_list *list; + int fd; + U8 PhysDiskNum; + + if (ac != 2) { + warnx("pd delete: drive required"); + return (EINVAL); + } + + fd = mpt_open(mpt_unit); + if (fd < 0) { + warn("mpt_open"); + return (errno); + } + + list = mpt_pd_list(fd); + if (list == NULL) + return (errno); + + if (mpt_lookup_drive(list, av[1], &PhysDiskNum) < 0) { + warn("Failed to find drive %s", av[1]); + return (errno); + } + mpt_free_pd_list(list); + + info = mpt_pd_info(fd, PhysDiskNum, NULL); + if (info == NULL) { + warn("Failed to fetch drive info"); + return (errno); + } + + if (mpt_delete_physdisk(fd, PhysDiskNum) < 0) { + warn("Failed to delete physical disk page"); + return (errno); + } + + mpt_rescan_bus(info->PhysDiskBus, info->PhysDiskID); + free(info); + close(fd); + + return (0); +} +MPT_COMMAND(pd, delete, pd_delete); + +/* Display raw data about a volume config. */ +static void +dump_config(CONFIG_PAGE_RAID_VOL_0 *vol) +{ + int i; + + printf("Volume Configuration (Debug):\n"); + printf( + " Page Header: Type 0x%02x Number 0x%02x Length 0x%02x(%u) Version 0x%02x\n", + vol->Header.PageType, vol->Header.PageNumber, + vol->Header.PageLength, vol->Header.PageLength * 4, + vol->Header.PageVersion); + printf(" Address: %d:%d IOC %d\n", vol->VolumeBus, vol->VolumeID, + vol->VolumeIOC); + printf(" Type: %d (%s)\n", vol->VolumeType, + mpt_raid_level(vol->VolumeType)); + printf(" Status: %s (Flags 0x%02x)\n", + mpt_volstate(vol->VolumeStatus.State), vol->VolumeStatus.Flags); + printf(" Settings: 0x%04x (Spare Pools 0x%02x)\n", + vol->VolumeSettings.Settings, vol->VolumeSettings.HotSparePool); + printf(" MaxLBA: %ju\n", (uintmax_t)vol->MaxLBAHigh << 32 | + vol->MaxLBA); + printf(" Stripe Size: %ld\n", (long)vol->StripeSize * 512); + printf(" %d Disks:\n", vol->NumPhysDisks); + + for (i = 0; i < vol->NumPhysDisks; i++) + printf(" Disk %d: Num 0x%02x Map 0x%02x\n", i, + vol->PhysDisk[i].PhysDiskNum, vol->PhysDisk[i].PhysDiskMap); +} + +static int +debug_config(int ac, char **av) +{ + CONFIG_PAGE_RAID_VOL_0 *vol; + U8 VolumeBus, VolumeID; + int fd; + + if (ac != 2) { + warnx("debug: volume required"); + return (EINVAL); + } + + fd = mpt_open(mpt_unit); + if (fd < 0) { + warn("mpt_open"); + return (errno); + } + + if (mpt_lookup_volume(fd, av[1], &VolumeBus, &VolumeID) < 0) { + warn("Invalid volume: %s", av[1]); + return (errno); + } + + vol = mpt_vol_info(fd, VolumeBus, VolumeID, NULL); + if (vol == NULL) { + warn("Failed to get volume info"); + return (errno); + } + + dump_config(vol); + free(vol); + close(fd); + + return (0); +} +MPT_COMMAND(top, debug, debug_config); +#endif diff --git a/usr.sbin/mptutil/mpt_drive.c b/usr.sbin/mptutil/mpt_drive.c new file mode 100644 index 0000000..e941b96 --- /dev/null +++ b/usr.sbin/mptutil/mpt_drive.c @@ -0,0 +1,395 @@ +/*- + * Copyright (c) 2008 Yahoo!, Inc. + * All rights reserved. + * Written by: John Baldwin + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of any co-contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__RCSID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "mptutil.h" + +const char * +mpt_pdstate(CONFIG_PAGE_RAID_PHYS_DISK_0 *info) +{ + static char buf[16]; + + switch (info->PhysDiskStatus.State) { + case MPI_PHYSDISK0_STATUS_ONLINE: + if ((info->PhysDiskStatus.Flags & + MPI_PHYSDISK0_STATUS_FLAG_OUT_OF_SYNC) && + info->PhysDiskSettings.HotSparePool == 0) + return ("REBUILD"); + else + return ("ONLINE"); + case MPI_PHYSDISK0_STATUS_MISSING: + return ("MISSING"); + case MPI_PHYSDISK0_STATUS_NOT_COMPATIBLE: + return ("NOT COMPATIBLE"); + case MPI_PHYSDISK0_STATUS_FAILED: + return ("FAILED"); + case MPI_PHYSDISK0_STATUS_INITIALIZING: + return ("INITIALIZING"); + case MPI_PHYSDISK0_STATUS_OFFLINE_REQUESTED: + return ("OFFLINE REQUESTED"); + case MPI_PHYSDISK0_STATUS_FAILED_REQUESTED: + return ("FAILED REQUESTED"); + case MPI_PHYSDISK0_STATUS_OTHER_OFFLINE: + return ("OTHER OFFLINE"); + default: + sprintf(buf, "PSTATE 0x%02x", info->PhysDiskStatus.State); + return (buf); + } +} + +/* + * There are several ways to enumerate physical disks. Unfortunately, + * none of them are truly complete, so we have to build a union of all of + * them. Specifically: + * + * - IOC2 : This gives us a list of volumes, and by walking the volumes we + * can enumerate all of the drives attached to volumes including + * online drives and failed drives. + * - IOC3 : This gives us a list of all online physical drives including + * drives that are not part of a volume nor a spare drive. It + * does not include any failed drives. + * - IOC5 : This gives us a list of all spare drives including failed + * spares. + * + * The specific edge cases are that 1) a failed volume member can only be + * found via IOC2, 2) a drive that is neither a volume member nor a spare + * can only be found via IOC3, and 3) a failed spare can only be found via + * IOC5. + * + * To handle this, walk all of the three lists and use the following + * routine to add each drive encountered. It quietly succeeds if the + * drive is already present in the list. It also sorts the list as it + * inserts new drives. + */ +static int +mpt_pd_insert(int fd, struct mpt_drive_list *list, U8 PhysDiskNum) +{ + int i, j; + + /* + * First, do a simple linear search to see if we have already + * seen this drive. + */ + for (i = 0; i < list->ndrives; i++) { + if (list->drives[i]->PhysDiskNum == PhysDiskNum) + return (0); + if (list->drives[i]->PhysDiskNum > PhysDiskNum) + break; + } + + /* + * 'i' is our slot for the 'new' drive. Make room and then + * read the drive info. + */ + for (j = list->ndrives - 1; j >= i; j--) + list->drives[j + 1] = list->drives[j]; + list->drives[i] = mpt_pd_info(fd, PhysDiskNum, NULL); + if (list->drives[i] == NULL) + return (-1); + list->ndrives++; + return (0); +} + +struct mpt_drive_list * +mpt_pd_list(int fd) +{ + CONFIG_PAGE_IOC_2 *ioc2; + CONFIG_PAGE_IOC_2_RAID_VOL *vol; + CONFIG_PAGE_RAID_VOL_0 **volumes; + RAID_VOL0_PHYS_DISK *rdisk; + CONFIG_PAGE_IOC_3 *ioc3; + IOC_3_PHYS_DISK *disk; + CONFIG_PAGE_IOC_5 *ioc5; + IOC_5_HOT_SPARE *spare; + struct mpt_drive_list *list; + int count, i, j; + + ioc2 = mpt_read_ioc_page(fd, 2, NULL); + if (ioc2 == NULL) { + warn("Failed to fetch volume list"); + return (NULL); + } + + ioc3 = mpt_read_ioc_page(fd, 3, NULL); + if (ioc3 == NULL) { + warn("Failed to fetch drive list"); + free(ioc2); + return (NULL); + } + + ioc5 = mpt_read_ioc_page(fd, 5, NULL); + if (ioc5 == NULL) { + warn("Failed to fetch spare list"); + free(ioc3); + free(ioc2); + return (NULL); + } + + /* + * Go ahead and read the info for all the volumes. For this + * pass we figure out how many physical drives there are. + */ + volumes = malloc(sizeof(*volumes) * ioc2->NumActiveVolumes); + count = 0; + vol = ioc2->RaidVolume; + for (i = 0; i < ioc2->NumActiveVolumes; vol++, i++) { + volumes[i] = mpt_vol_info(fd, vol->VolumeBus, vol->VolumeID, + NULL); + if (volumes[i] == NULL) { + warn("Failed to read volume info"); + return (NULL); + } + count += volumes[i]->NumPhysDisks; + } + count += ioc3->NumPhysDisks; + count += ioc5->NumHotSpares; + + /* Walk the various lists enumerating drives. */ + list = malloc(sizeof(*list) + sizeof(CONFIG_PAGE_RAID_PHYS_DISK_0) * + count); + list->ndrives = 0; + + for (i = 0; i < ioc2->NumActiveVolumes; i++) { + rdisk = volumes[i]->PhysDisk; + for (j = 0; j < volumes[i]->NumPhysDisks; rdisk++, j++) + if (mpt_pd_insert(fd, list, rdisk->PhysDiskNum) < 0) + return (NULL); + free(volumes[i]); + } + free(ioc2); + free(volumes); + + spare = ioc5->HotSpare; + for (i = 0; i < ioc5->NumHotSpares; spare++, i++) + if (mpt_pd_insert(fd, list, spare->PhysDiskNum) < 0) + return (NULL); + free(ioc5); + + disk = ioc3->PhysDisk; + for (i = 0; i < ioc3->NumPhysDisks; disk++, i++) + if (mpt_pd_insert(fd, list, disk->PhysDiskNum) < 0) + return (NULL); + free(ioc3); + + return (list); +} + +void +mpt_free_pd_list(struct mpt_drive_list *list) +{ + int i; + + for (i = 0; i < list->ndrives; i++) + free(list->drives[i]); + free(list); +} + +int +mpt_lookup_drive(struct mpt_drive_list *list, const char *drive, + U8 *PhysDiskNum) +{ + long val; + uint8_t bus, id; + char *cp; + + /* Look for a raw device id first. */ + val = strtol(drive, &cp, 0); + if (*cp == '\0') { + if (val < 0 || val > 0xff) + goto bad; + *PhysDiskNum = val; + return (0); + } + + /* Look for a : string. */ + if (*cp == ':') { + if (val < 0 || val > 0xff) + goto bad; + bus = val; + val = strtol(cp + 1, &cp, 0); + if (*cp != '\0') + goto bad; + if (val < 0 || val > 0xff) + goto bad; + id = val; + + for (val = 0; val < list->ndrives; val++) { + if (list->drives[val]->PhysDiskBus == bus && + list->drives[val]->PhysDiskID == id) { + *PhysDiskNum = list->drives[val]->PhysDiskNum; + return (0); + } + } + errno = ENOENT; + return (-1); + } + +bad: + errno = EINVAL; + return (-1); +} + +/* Borrowed heavily from scsi_all.c:scsi_print_inquiry(). */ +const char * +mpt_pd_inq_string(CONFIG_PAGE_RAID_PHYS_DISK_0 *pd_info) +{ + RAID_PHYS_DISK0_INQUIRY_DATA *inq_data; + u_char vendor[9], product[17], revision[5]; + static char inq_string[64]; + + inq_data = &pd_info->InquiryData; + cam_strvis(vendor, inq_data->VendorID, sizeof(inq_data->VendorID), + sizeof(vendor)); + cam_strvis(product, inq_data->ProductID, sizeof(inq_data->ProductID), + sizeof(product)); + cam_strvis(revision, inq_data->ProductRevLevel, + sizeof(inq_data->ProductRevLevel), sizeof(revision)); + + /* Total hack. */ + if (strcmp(vendor, "ATA") == 0) + snprintf(inq_string, sizeof(inq_string), "<%s %s> SATA", + product, revision); + else + snprintf(inq_string, sizeof(inq_string), "<%s %s %s> SAS", + vendor, product, revision); + return (inq_string); +} + +/* Helper function to set a drive to a given state. */ +static int +drive_set_state(char *drive, U8 Action, U8 State, const char *name) +{ + CONFIG_PAGE_RAID_PHYS_DISK_0 *info; + struct mpt_drive_list *list; + U8 PhysDiskNum; + int fd; + + fd = mpt_open(mpt_unit); + if (fd < 0) { + warn("mpt_open"); + return (errno); + } + + list = mpt_pd_list(fd); + if (list == NULL) + return (errno); + + if (mpt_lookup_drive(list, drive, &PhysDiskNum) < 0) { + warn("Failed to find drive %s", drive); + return (errno); + } + mpt_free_pd_list(list); + + /* Get the info for this drive. */ + info = mpt_pd_info(fd, PhysDiskNum, NULL); + if (info == NULL) { + warn("Failed to fetch info for drive %u", PhysDiskNum); + return (errno); + } + + /* Try to change the state. */ + if (info->PhysDiskStatus.State == State) { + warnx("Drive %u is already in the desired state", PhysDiskNum); + return (EINVAL); + } + + if (mpt_raid_action(fd, Action, 0, 0, PhysDiskNum, 0, NULL, 0, NULL, + NULL, 0, NULL, NULL, 0) < 0) { + warn("Failed to set drive %u to %s", PhysDiskNum, name); + return (errno); + } + + free(info); + close(fd); + + return (0); +} + +static int +fail_drive(int ac, char **av) +{ + + if (ac != 2) { + warnx("fail: %s", ac > 2 ? "extra arguments" : + "drive required"); + return (EINVAL); + } + + return (drive_set_state(av[1], MPI_RAID_ACTION_FAIL_PHYSDISK, + MPI_PHYSDISK0_STATUS_FAILED_REQUESTED, "FAILED")); +} +MPT_COMMAND(top, fail, fail_drive); + +static int +online_drive(int ac, char **av) +{ + + if (ac != 2) { + warnx("online: %s", ac > 2 ? "extra arguments" : + "drive required"); + return (EINVAL); + } + + return (drive_set_state(av[1], MPI_RAID_ACTION_PHYSDISK_ONLINE, + MPI_PHYSDISK0_STATUS_ONLINE, "ONLINE")); +} +MPT_COMMAND(top, online, online_drive); + +static int +offline_drive(int ac, char **av) +{ + + if (ac != 2) { + warnx("offline: %s", ac > 2 ? "extra arguments" : + "drive required"); + return (EINVAL); + } + + return (drive_set_state(av[1], MPI_RAID_ACTION_PHYSDISK_OFFLINE, + MPI_PHYSDISK0_STATUS_OFFLINE_REQUESTED, "OFFLINE")); +} +MPT_COMMAND(top, offline, offline_drive); diff --git a/usr.sbin/mptutil/mpt_evt.c b/usr.sbin/mptutil/mpt_evt.c new file mode 100644 index 0000000..4b64352 --- /dev/null +++ b/usr.sbin/mptutil/mpt_evt.c @@ -0,0 +1,155 @@ +/*- + * Copyright (c) 2008 Yahoo!, Inc. + * All rights reserved. + * Written by: John Baldwin + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of any co-contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__RCSID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include "mptutil.h" + +static CONFIG_PAGE_LOG_0 * +mpt_get_events(int fd, U16 *IOCStatus) +{ + + return (mpt_read_extended_config_page(fd, MPI_CONFIG_EXTPAGETYPE_LOG, + 0, 0, 0, IOCStatus)); +} + +/* + * 1 2 3 4 5 6 7 + * 1234567890123456789012345678901234567890123456789012345678901234567890 + * < ID> < time > LogSequence, entry->TimeStamp, + entry->LogEntryQualifier); + for (i = 0; i < 14; i++) + printf("%02x ", entry->LogData[i]); + printf("|"); + for (i = 0; i < 14; i++) + printf("%c", isprint(entry->LogData[i]) ? entry->LogData[i] : + '.'); + printf("|\n"); + printf(" "); + for (i = 0; i < 14; i++) + printf("%02x ", entry->LogData[i + 14]); + printf("|"); + for (i = 0; i < 14; i++) + printf("%c", isprint(entry->LogData[i + 14]) ? + entry->LogData[i + 14] : '.'); + printf("|\n"); +} + +static int +event_compare(const void *first, const void *second) +{ + MPI_LOG_0_ENTRY * const *one; + MPI_LOG_0_ENTRY * const *two; + + one = first; + two = second; + return ((*one)->LogSequence - ((*two)->LogSequence)); +} + +static int +show_events(int ac, char **av) +{ + CONFIG_PAGE_LOG_0 *log; + MPI_LOG_0_ENTRY **entries; + int ch, fd, i, num_events, verbose; + + fd = mpt_open(mpt_unit); + if (fd < 0) { + warn("mpt_open"); + return (errno); + } + + log = mpt_get_events(fd, NULL); + if (log == NULL) { + warn("Failed to get event log info"); + return (errno); + } + + /* Default settings. */ + verbose = 0; + + /* Parse any options. */ + optind = 1; + while ((ch = getopt(ac, av, "v")) != -1) { + switch (ch) { + case 'v': + verbose = 1; + break; + case '?': + default: + return (EINVAL); + } + } + ac -= optind; + av += optind; + + /* Build a list of valid entries and sort them by sequence. */ + entries = malloc(sizeof(MPI_LOG_0_ENTRY *) * log->NumLogEntries); + num_events = 0; + for (i = 0; i < log->NumLogEntries; i++) { + if (log->LogEntry[i].LogEntryQualifier == + MPI_LOG_0_ENTRY_QUAL_ENTRY_UNUSED) + continue; + entries[num_events] = &log->LogEntry[i]; + num_events++; + } + + qsort(entries, num_events, sizeof(MPI_LOG_0_ENTRY *), event_compare); + + if (num_events == 0) + printf("Event log is empty\n"); + else { + printf(" ID Time Type Log Data\n"); + for (i = 0; i < num_events; i++) + mpt_print_event(entries[i], verbose); + } + + free(entries); + close(fd); + + return (0); +} +MPT_COMMAND(show, events, show_events); diff --git a/usr.sbin/mptutil/mpt_show.c b/usr.sbin/mptutil/mpt_show.c new file mode 100644 index 0000000..e0b6b74 --- /dev/null +++ b/usr.sbin/mptutil/mpt_show.c @@ -0,0 +1,559 @@ +/*- + * Copyright (c) 2008 Yahoo!, Inc. + * All rights reserved. + * Written by: John Baldwin + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of any co-contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__RCSID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include "mptutil.h" + +MPT_TABLE(top, show); + +#define STANDALONE_STATE "ONLINE" + +static void +format_stripe(char *buf, size_t buflen, U32 stripe) +{ + + humanize_number(buf, buflen, stripe * 512, "", HN_AUTOSCALE, + HN_B | HN_NOSPACE); +} + +static void +display_stripe_map(const char *label, U32 StripeMap) +{ + char stripe[5]; + int comma, i; + + comma = 0; + printf("%s: ", label); + for (i = 0; StripeMap != 0; i++, StripeMap >>= 1) + if (StripeMap & 1) { + format_stripe(stripe, sizeof(stripe), 1 << i); + if (comma) + printf(", "); + printf("%s", stripe); + comma = 1; + } + printf("\n"); +} + +static int +show_adapter(int ac, char **av) +{ + CONFIG_PAGE_MANUFACTURING_0 *man0; + CONFIG_PAGE_IOC_2 *ioc2; + CONFIG_PAGE_IOC_6 *ioc6; + int fd, comma; + + if (ac != 1) { + warnx("show adapter: extra arguments"); + return (EINVAL); + } + + fd = mpt_open(mpt_unit); + if (fd < 0) { + warn("mpt_open"); + return (errno); + } + + man0 = mpt_read_man_page(fd, 0, NULL); + if (man0 == NULL) { + warn("Failed to get controller info"); + return (errno); + } + if (man0->Header.PageLength < sizeof(*man0) / 4) { + warn("Invalid controller info"); + return (EINVAL); + } + printf("mpt%d Adapter:\n", mpt_unit); + printf(" Board Name: %.16s\n", man0->BoardName); + printf(" Board Assembly: %.16s\n", man0->BoardAssembly); + printf(" Chip Name: %.16s\n", man0->ChipName); + printf(" Chip Revision: %.16s\n", man0->ChipRevision); + + free(man0); + + ioc2 = mpt_read_ioc_page(fd, 2, NULL); + if (ioc2 != NULL) { + printf(" RAID Levels:"); + comma = 0; + if (ioc2->CapabilitiesFlags & + MPI_IOCPAGE2_CAP_FLAGS_IS_SUPPORT) { + printf(" RAID0"); + comma = 1; + } + if (ioc2->CapabilitiesFlags & + MPI_IOCPAGE2_CAP_FLAGS_IM_SUPPORT) { + printf("%s RAID1", comma ? "," : ""); + comma = 1; + } + if (ioc2->CapabilitiesFlags & + MPI_IOCPAGE2_CAP_FLAGS_IME_SUPPORT) { + printf("%s RAID1E", comma ? "," : ""); + comma = 1; + } + if (ioc2->CapabilitiesFlags & + MPI_IOCPAGE2_CAP_FLAGS_RAID_5_SUPPORT) { + printf("%s RAID5", comma ? "," : ""); + comma = 1; + } + if (ioc2->CapabilitiesFlags & + MPI_IOCPAGE2_CAP_FLAGS_RAID_6_SUPPORT) { + printf("%s RAID6", comma ? "," : ""); + comma = 1; + } + if (ioc2->CapabilitiesFlags & + MPI_IOCPAGE2_CAP_FLAGS_RAID_10_SUPPORT) { + printf("%s RAID10", comma ? "," : ""); + comma = 1; + } + if (ioc2->CapabilitiesFlags & + MPI_IOCPAGE2_CAP_FLAGS_RAID_50_SUPPORT) { + printf("%s RAID50", comma ? "," : ""); + comma = 1; + } + if (!comma) + printf(" none"); + printf("\n"); + free(ioc2); + } + + ioc6 = mpt_read_ioc_page(fd, 6, NULL); + if (ioc6 != NULL) { + display_stripe_map(" RAID0 Stripes", + ioc6->SupportedStripeSizeMapIS); + display_stripe_map(" RAID1E Stripes", + ioc6->SupportedStripeSizeMapIME); + printf(" RAID0 Drives/Vol: %u", ioc6->MinDrivesIS); + if (ioc6->MinDrivesIS != ioc6->MaxDrivesIS) + printf("-%u", ioc6->MaxDrivesIS); + printf("\n"); + printf(" RAID1 Drives/Vol: %u", ioc6->MinDrivesIM); + if (ioc6->MinDrivesIM != ioc6->MaxDrivesIM) + printf("-%u", ioc6->MaxDrivesIM); + printf("\n"); + printf("RAID1E Drives/Vol: %u", ioc6->MinDrivesIME); + if (ioc6->MinDrivesIME != ioc6->MaxDrivesIME) + printf("-%u", ioc6->MaxDrivesIME); + printf("\n"); + free(ioc6); + } + + /* TODO: Add an ioctl to fetch IOC_FACTS and print firmware version. */ + + close(fd); + + return (0); +} +MPT_COMMAND(show, adapter, show_adapter); + +static void +print_vol(CONFIG_PAGE_RAID_VOL_0 *info, int state_len) +{ + uint64_t size; + const char *level, *state; + char buf[6], stripe[5]; + + size = ((uint64_t)info->MaxLBAHigh << 32) | info->MaxLBA; + humanize_number(buf, sizeof(buf), (size + 1) * 512, "", HN_AUTOSCALE, + HN_B | HN_NOSPACE | HN_DECIMAL); + if (info->VolumeType == MPI_RAID_VOL_TYPE_IM) + stripe[0] = '\0'; + else + format_stripe(stripe, sizeof(stripe), info->StripeSize); + level = mpt_raid_level(info->VolumeType); + state = mpt_volstate(info->VolumeStatus.State); + if (state_len > 0) + printf("(%6s) %-8s %6s %-*s", buf, level, stripe, state_len, + state); + else if (stripe[0] != '\0') + printf("(%s) %s %s %s", buf, level, stripe, state); + else + printf("(%s) %s %s", buf, level, state); +} + +static void +print_pd(CONFIG_PAGE_RAID_PHYS_DISK_0 *info, int state_len, int location) +{ + const char *inq, *state; + char buf[6]; + + humanize_number(buf, sizeof(buf), ((uint64_t)info->MaxLBA + 1) * 512, + "", HN_AUTOSCALE, HN_B | HN_NOSPACE |HN_DECIMAL); + state = mpt_pdstate(info); + if (state_len > 0) + printf("(%6s) %-*s", buf, state_len, state); + else + printf("(%s) %s", buf, state); + inq = mpt_pd_inq_string(info); + if (inq != NULL) + printf(" %s", inq); + if (!location) + return; + printf(" bus %d id %d", info->PhysDiskBus, info->PhysDiskID); +} + +static void +print_standalone(struct mpt_standalone_disk *disk, int state_len, int location) +{ + char buf[6]; + + humanize_number(buf, sizeof(buf), (disk->maxlba + 1) * 512, + "", HN_AUTOSCALE, HN_B | HN_NOSPACE |HN_DECIMAL); + if (state_len > 0) + printf("(%6s) %-*s", buf, state_len, STANDALONE_STATE); + else + printf("(%s) %s", buf, STANDALONE_STATE); + if (disk->inqstring[0] != '\0') + printf(" %s", disk->inqstring); + if (!location) + return; + printf(" bus %d id %d", disk->bus, disk->target); +} + +static void +print_spare_pools(U8 HotSparePool) +{ + int i; + + if (HotSparePool == 0) { + printf("none"); + return; + } + for (i = 0; HotSparePool != 0; i++) { + if (HotSparePool & 1) { + printf("%d", i); + if (HotSparePool == 1) + break; + printf(", "); + } + HotSparePool >>= 1; + } +} + +static int +show_config(int ac, char **av) +{ + CONFIG_PAGE_IOC_2 *ioc2; + CONFIG_PAGE_IOC_2_RAID_VOL *vol; + CONFIG_PAGE_IOC_5 *ioc5; + IOC_5_HOT_SPARE *spare; + CONFIG_PAGE_RAID_VOL_0 *vinfo; + RAID_VOL0_PHYS_DISK *disk; + CONFIG_PAGE_RAID_VOL_1 *vnames; + CONFIG_PAGE_RAID_PHYS_DISK_0 *pinfo; + struct mpt_standalone_disk *sdisks; + int fd, i, j, nsdisks; + + if (ac != 1) { + warnx("show config: extra arguments"); + return (EINVAL); + } + + fd = mpt_open(mpt_unit); + if (fd < 0) { + warn("mpt_open"); + return (errno); + } + + /* Get the config from the controller. */ + ioc2 = mpt_read_ioc_page(fd, 2, NULL); + ioc5 = mpt_read_ioc_page(fd, 5, NULL); + if (ioc2 == NULL || ioc5 == NULL) { + warn("Failed to get config"); + return (errno); + } + if (mpt_fetch_disks(fd, &nsdisks, &sdisks) < 0) { + warn("Failed to get standalone drive list"); + return (errno); + } + + /* Dump out the configuration. */ + printf("mpt%d Configuration: %d volumes, %d drives\n", + mpt_unit, ioc2->NumActiveVolumes, ioc2->NumActivePhysDisks + + nsdisks); + vol = ioc2->RaidVolume; + for (i = 0; i < ioc2->NumActiveVolumes; vol++, i++) { + printf(" volume %s ", mpt_volume_name(vol->VolumeBus, + vol->VolumeID)); + vinfo = mpt_vol_info(fd, vol->VolumeBus, vol->VolumeID, NULL); + if (vinfo == NULL) { + printf("%s UNKNOWN", mpt_raid_level(vol->VolumeType)); + } else + print_vol(vinfo, -1); + vnames = mpt_vol_names(fd, vol->VolumeBus, vol->VolumeID, NULL); + if (vnames != NULL) { + if (vnames->Name[0] != '\0') + printf(" <%s>", vnames->Name); + free(vnames); + } + if (vinfo == NULL) { + printf("\n"); + continue; + } + printf(" spans:\n"); + disk = vinfo->PhysDisk; + for (j = 0; j < vinfo->NumPhysDisks; disk++, j++) { + printf(" drive %u ", disk->PhysDiskNum); + pinfo = mpt_pd_info(fd, disk->PhysDiskNum, NULL); + if (pinfo != NULL) { + print_pd(pinfo, -1, 0); + free(pinfo); + } + printf("\n"); + } + if (vinfo->VolumeSettings.HotSparePool != 0) { + printf(" spare pools: "); + print_spare_pools(vinfo->VolumeSettings.HotSparePool); + printf("\n"); + } + free(vinfo); + } + + spare = ioc5->HotSpare; + for (i = 0; i < ioc5->NumHotSpares; spare++, i++) { + printf(" spare %u ", spare->PhysDiskNum); + pinfo = mpt_pd_info(fd, spare->PhysDiskNum, NULL); + if (pinfo != NULL) { + print_pd(pinfo, -1, 0); + free(pinfo); + } + printf(" backs pool %d\n", ffs(spare->HotSparePool) - 1); + } + for (i = 0; i < nsdisks; i++) { + printf(" drive %s ", sdisks[i].devname); + print_standalone(&sdisks[i], -1, 0); + printf("\n"); + } + free(ioc2); + free(ioc5); + free(sdisks); + close(fd); + + return (0); +} +MPT_COMMAND(show, config, show_config); + +static int +show_volumes(int ac, char **av) +{ + CONFIG_PAGE_IOC_2 *ioc2; + CONFIG_PAGE_IOC_2_RAID_VOL *vol; + CONFIG_PAGE_RAID_VOL_0 **volumes; + CONFIG_PAGE_RAID_VOL_1 *vnames; + int fd, i, len, state_len; + + if (ac != 1) { + warnx("show volumes: extra arguments"); + return (EINVAL); + } + + fd = mpt_open(mpt_unit); + if (fd < 0) { + warn("mpt_open"); + return (errno); + } + + /* Get the volume list from the controller. */ + ioc2 = mpt_read_ioc_page(fd, 2, NULL); + if (ioc2 == NULL) { + warn("Failed to get volume list"); + return (errno); + } + + /* + * Go ahead and read the info for all the volumes and figure + * out the maximum width of the state field. + */ + volumes = malloc(sizeof(*volumes) * ioc2->NumActiveVolumes); + state_len = strlen("State"); + vol = ioc2->RaidVolume; + for (i = 0; i < ioc2->NumActiveVolumes; vol++, i++) { + volumes[i] = mpt_vol_info(fd, vol->VolumeBus, vol->VolumeID, + NULL); + if (volumes[i] == NULL) + len = strlen("UNKNOWN"); + else + len = strlen(mpt_volstate( + volumes[i]->VolumeStatus.State)); + if (len > state_len) + state_len = len; + } + printf("mpt%d Volumes:\n", mpt_unit); + printf(" Id Size Level Stripe "); + len = state_len - strlen("State"); + for (i = 0; i < (len + 1) / 2; i++) + printf(" "); + printf("State"); + for (i = 0; i < len / 2; i++) + printf(" "); + printf(" Write-Cache Name\n"); + vol = ioc2->RaidVolume; + for (i = 0; i < ioc2->NumActiveVolumes; vol++, i++) { + printf("%6s ", mpt_volume_name(vol->VolumeBus, vol->VolumeID)); + if (volumes[i] != NULL) + print_vol(volumes[i], state_len); + else + printf(" %-8s %-*s", + mpt_raid_level(vol->VolumeType), state_len, + "UNKNOWN"); + if (volumes[i] != NULL) { + if (volumes[i]->VolumeSettings.Settings & + MPI_RAIDVOL0_SETTING_WRITE_CACHING_ENABLE) + printf(" Enabled "); + else + printf(" Disabled "); + } else + printf(" "); + free(volumes[i]); + vnames = mpt_vol_names(fd, vol->VolumeBus, vol->VolumeID, NULL); + if (vnames != NULL) { + if (vnames->Name[0] != '\0') + printf(" <%s>", vnames->Name); + free(vnames); + } + printf("\n"); + } + free(ioc2); + close(fd); + + return (0); +} +MPT_COMMAND(show, volumes, show_volumes); + +static int +show_drives(int ac, char **av) +{ + struct mpt_drive_list *list; + struct mpt_standalone_disk *sdisks; + int fd, i, len, nsdisks, state_len; + + if (ac != 1) { + warnx("show drives: extra arguments"); + return (EINVAL); + } + + fd = mpt_open(mpt_unit); + if (fd < 0) { + warn("mpt_open"); + return (errno); + } + + /* Get the drive list. */ + list = mpt_pd_list(fd); + if (list == NULL) { + warn("Failed to get drive list"); + return (errno); + } + + /* Fetch the list of standalone disks for this controller. */ + state_len = 0; + if (mpt_fetch_disks(fd, &nsdisks, &sdisks) != 0) { + nsdisks = 0; + sdisks = NULL; + } + if (nsdisks != 0) + state_len = strlen(STANDALONE_STATE); + + /* Walk the drive list to determine width of state column. */ + for (i = 0; i < list->ndrives; i++) { + len = strlen(mpt_pdstate(list->drives[i])); + if (len > state_len) + state_len = len; + } + + /* List the drives. */ + printf("mpt%d Physical Drives:\n", mpt_unit); + for (i = 0; i < list->ndrives; i++) { + printf("%4u ", list->drives[i]->PhysDiskNum); + print_pd(list->drives[i], state_len, 1); + printf("\n"); + } + mpt_free_pd_list(list); + for (i = 0; i < nsdisks; i++) { + printf("%4s ", sdisks[i].devname); + print_standalone(&sdisks[i], state_len, 1); + printf("\n"); + } + free(sdisks); + + close(fd); + + return (0); +} +MPT_COMMAND(show, drives, show_drives); + +#ifdef DEBUG +static int +show_physdisks(int ac, char **av) +{ + CONFIG_PAGE_RAID_PHYS_DISK_0 *pinfo; + U16 IOCStatus; + int fd, i; + + if (ac != 1) { + warnx("show drives: extra arguments"); + return (EINVAL); + } + + fd = mpt_open(mpt_unit); + if (fd < 0) { + warn("mpt_open"); + return (errno); + } + + /* Try to find each possible phys disk page. */ + for (i = 0; i <= 0xff; i++) { + pinfo = mpt_pd_info(fd, i, &IOCStatus); + if (pinfo == NULL) { + if (IOCStatus != MPI_IOCSTATUS_CONFIG_INVALID_PAGE) + warnx("mpt_pd_info(%d): %s", i, + mpt_ioc_status(IOCStatus)); + continue; + } + printf("%3u ", i); + print_pd(pinfo, -1, 1); + printf("\n"); + } + + close(fd); + + return (0); +} +MPT_COMMAND(show, pd, show_physdisks); +#endif diff --git a/usr.sbin/mptutil/mpt_volume.c b/usr.sbin/mptutil/mpt_volume.c new file mode 100644 index 0000000..04adcb7 --- /dev/null +++ b/usr.sbin/mptutil/mpt_volume.c @@ -0,0 +1,248 @@ +/*- + * Copyright (c) 2008 Yahoo!, Inc. + * All rights reserved. + * Written by: John Baldwin + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of any co-contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__RCSID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mptutil.h" + +MPT_TABLE(top, volume); + +const char * +mpt_volstate(U8 State) +{ + static char buf[16]; + + switch (State) { + case MPI_RAIDVOL0_STATUS_STATE_OPTIMAL: + return ("OPTIMAL"); + case MPI_RAIDVOL0_STATUS_STATE_DEGRADED: + return ("DEGRADED"); + case MPI_RAIDVOL0_STATUS_STATE_FAILED: + return ("FAILED"); + case MPI_RAIDVOL0_STATUS_STATE_MISSING: + return ("MISSING"); + default: + sprintf(buf, "VSTATE 0x%02x", State); + return (buf); + } +} + +static int +volume_name(int ac, char **av) +{ + CONFIG_PAGE_RAID_VOL_1 *vnames; + U8 VolumeBus, VolumeID; + int fd; + + if (ac != 3) { + warnx("name: volume and name required"); + return (EINVAL); + } + + if (strlen(av[2]) >= sizeof(vnames->Name)) { + warnx("name: new name is too long"); + return (ENOSPC); + } + + fd = mpt_open(mpt_unit); + if (fd < 0) { + warn("mpt_open"); + return (errno); + } + + if (mpt_lookup_volume(fd, av[1], &VolumeBus, &VolumeID) < 0) { + warn("Invalid volume: %s", av[1]); + return (errno); + } + + vnames = mpt_vol_names(fd, VolumeBus, VolumeID, NULL); + if (vnames == NULL) { + warn("Failed to fetch volume names"); + return (errno); + } + + if (vnames->Header.PageType != MPI_CONFIG_PAGEATTR_CHANGEABLE) { + warnx("Volume name is read only"); + return (EOPNOTSUPP); + } + printf("mpt%u changing volume %s name from \"%s\" to \"%s\"\n", + mpt_unit, mpt_volume_name(VolumeBus, VolumeID), vnames->Name, + av[2]); + bzero(vnames->Name, sizeof(vnames->Name)); + strcpy(vnames->Name, av[2]); + + if (mpt_write_config_page(fd, vnames, NULL) < 0) { + warn("Failed to set volume name"); + return (errno); + } + + free(vnames); + close(fd); + + return (0); +} +MPT_COMMAND(top, name, volume_name); + +static int +volume_status(int ac, char **av) +{ + MPI_RAID_VOL_INDICATOR prog; + RAID_VOL0_STATUS VolumeStatus; + uint64_t total, remaining; + float pct; + U8 VolumeBus, VolumeID; + int fd; + + if (ac != 2) { + warnx("volume status: %s", ac > 2 ? "extra arguments" : + "volume required"); + return (EINVAL); + } + + fd = mpt_open(mpt_unit); + if (fd < 0) { + warn("mpt_open"); + return (errno); + } + + if (mpt_lookup_volume(fd, av[1], &VolumeBus, &VolumeID) < 0) { + warn("Invalid volume: %s", av[1]); + return (errno); + } + + if (mpt_raid_action(fd, MPI_RAID_ACTION_INDICATOR_STRUCT, VolumeBus, + VolumeID, 0, 0, NULL, 0, &VolumeStatus, (U32 *)&prog, sizeof(prog), + NULL, NULL, 0) < 0) { + warn("Fetching volume status failed"); + return (errno); + } + + printf("Volume %s status:\n", mpt_volume_name(VolumeBus, VolumeID)); + printf(" state: %s\n", mpt_volstate(VolumeStatus.State)); + printf(" flags:"); + if (VolumeStatus.Flags & MPI_RAIDVOL0_STATUS_FLAG_ENABLED) + printf(" ENABLED"); + else + printf(" DISABLED"); + if (VolumeStatus.Flags & MPI_RAIDVOL0_STATUS_FLAG_QUIESCED) + printf(", QUIESCED"); + if (VolumeStatus.Flags & MPI_RAIDVOL0_STATUS_FLAG_RESYNC_IN_PROGRESS) + printf(", REBUILDING"); + if (VolumeStatus.Flags & MPI_RAIDVOL0_STATUS_FLAG_VOLUME_INACTIVE) + printf(", INACTIVE"); + if (VolumeStatus.Flags & MPI_RAIDVOL0_STATUS_FLAG_BAD_BLOCK_TABLE_FULL) + printf(", BAD BLOCK TABLE FULL"); + printf("\n"); + if (VolumeStatus.Flags & MPI_RAIDVOL0_STATUS_FLAG_RESYNC_IN_PROGRESS) { + total = (uint64_t)prog.TotalBlocks.High << 32 | + prog.TotalBlocks.Low; + remaining = (uint64_t)prog.BlocksRemaining.High << 32 | + prog.BlocksRemaining.Low; + pct = (float)(total - remaining) * 100 / total; + printf(" resync: %.2f%% complete\n", pct); + } + + close(fd); + return (0); +} +MPT_COMMAND(volume, status, volume_status); + +static int +volume_cache(int ac, char **av) +{ + CONFIG_PAGE_RAID_VOL_0 *volume; + U32 Settings, NewSettings; + U8 VolumeBus, VolumeID; + char *s1; + int fd; + + if (ac != 3) { + warnx("volume cache: %s", ac > 3 ? "extra arguments" : + "volume required"); + return (EINVAL); + } + + for (s1 = av[2]; *s1 != '\0'; s1++) + *s1 = tolower(*s1); + if ((strcmp(av[2], "enable")) && (strcmp(av[2], "enabled")) && + (strcmp(av[2], "disable")) && (strcmp(av[2], "disabled"))) { + warnx("volume cache: invalid flag, must be 'enable' or 'disable'\n"); + return (EINVAL); + } + + fd = mpt_open(mpt_unit); + if (fd < 0) { + warn("mpt_open"); + return (errno); + } + + if (mpt_lookup_volume(fd, av[1], &VolumeBus, &VolumeID) < 0) { + warn("Invalid volume: %s", av[1]); + return (errno); + } + + volume = mpt_vol_info(fd, VolumeBus, VolumeID, NULL); + if (volume == NULL) + return (-1); + + Settings = volume->VolumeSettings.Settings; + + NewSettings = Settings; + if (strncmp(av[2], "enable", sizeof("enable")) == 0) + NewSettings |= 0x01; + if (strncmp(av[2], "disable", sizeof("disable")) == 0) + NewSettings &= ~0x01; + + if (NewSettings == Settings) { + warnx("volume cache unchanged\n"); + close(fd); + return (0); + } + + volume->VolumeSettings.Settings = NewSettings; + if (mpt_raid_action(fd, MPI_RAID_ACTION_CHANGE_VOLUME_SETTINGS, + VolumeBus, VolumeID, 0, *(U32 *)&volume->VolumeSettings, NULL, 0, + NULL, NULL, 0, NULL, NULL, 0) < 0) + warnx("volume cache change failed, errno= %d\n", errno); + + close(fd); + return (0); +} +MPT_COMMAND(volume, cache, volume_cache); diff --git a/usr.sbin/mptutil/mptutil.8 b/usr.sbin/mptutil/mptutil.8 new file mode 100644 index 0000000..7acaf3e --- /dev/null +++ b/usr.sbin/mptutil/mptutil.8 @@ -0,0 +1,386 @@ +.\" +.\" Copyright (c) 2008 Yahoo!, Inc. +.\" All rights reserved. +.\" Written by: John Baldwin +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the author nor the names of any co-contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd August 16, 2009 +.Dt MPTUTIL 8 +.Os +.Sh NAME +.Nm mptutil +.Nd Utility for managing LSI Fusion-MPT controllers +.Sh SYNOPSIS +.Nm +.Cm version +.Nm +.Op Fl u Ar unit +.Cm show adapter +.Nm +.Op Fl u Ar unit +.Cm show config +.Nm +.Op Fl u Ar unit +.Cm show drives +.Nm +.Op Fl u Ar unit +.Cm show events +.Nm +.Op Fl u Ar unit +.Cm show volumes +.Nm +.Op Fl u Ar unit +.Cm fail Ar drive +.Nm +.Op Fl u Ar unit +.Cm online Ar drive +.Nm +.Op Fl u Ar unit +.Cm offline Ar drive +.Nm +.Op Fl u Ar unit +.Cm name Ar volume Ar name +.Nm +.Op Fl u Ar unit +.Cm volume status Ar volume +.Nm +.Op Fl u Ar unit +.Cm volume cache Ar volume +.Ar enable|disable +.Nm +.Op Fl u Ar unit +.Cm clear +.Nm +.Op Fl u Ar unit +.Cm create Ar type +.Op Fl q +.Op Fl v +.Op Fl s Ar stripe_size +.Ar drive Ns Op \&, Ns Ar drive Ns Op ",..." +.Nm +.Op Fl u Ar unit +.Cm delete Ar volume +.Nm +.Op Fl u Ar unit +.Cm add Ar drive Op Ar volume +.Nm +.Op Fl u Ar unit +.Cm remove Ar drive +.Sh DESCRIPTION +The +.Nm +utility can be used to display or modify various parameters on LSI +Fusion-MPT controllers. +Each invocation of +.Nm +consists of zero or more global options followed by a command. +Commands may support additional optional or required arguments after the +command. +.Pp +Currently one global option is supported: +.Bl -tag -width indent +.It Fl u Ar unit +.Ar unit +specifies the unit of the controller to work with. +If no unit is specified, +then unit 0 is used. +.El +.Pp +Volumes may be specified in two forms. +First, +a volume may be identified by its location as +.Sm off +.Op Ar xx Ns \&: +.Ar yy +.Sm on +where +.Ar xx +is the bus ID and +.Ar yy +is the target ID. +If the bus ID is ommitted, +the volume is assumed to be on bus 0. +Second, +on the volume may be specified by the corresponding +.Em daX +device, +such as +.Em da0 . +.Pp +The +.Xr mpt 4 +controller divides drives up into two categories. +Configured drives belong to a RAID volume either as a member drive or as a hot +spare. +Each configured drive is assigned a unique device ID such as 0 or 1 that is +show in +.Cm show config , +and in the first column of +.Cm show drives . +Any drive not associated with a RAID volume as either a member or a hot spare +is a standalone drive. +Standalone drives are visible to the operating system as SCSI disk devices. +As a result, drives may be specified in three forms. +First, +a configured drive may be identified by its device ID. +Second, +any drive may be identified by its location as +.Sm off +.Ar xx Ns \&: +.Ar yy +.Sm on +where +.Ar xx +is the bus ID and +.Ar yy +is the target ID for each drive as displayed in +.Cm show drives . +Note that unlike volumes, +a drive location always requires the bus ID to avoid confusion with device IDs. +Third, +a standalone drive that is not part of a volume may be identified by its +corresponding +.Em daX +device as displayed in +.Cm show drives . +.Pp +The +.Nm +utility supports several different groups of commands. +The first group of commands provide information about the controller, +the volumes it manages, and the drives it controls. +The second group of commands are used to manage the physical drives +attached to the controller. +The third group of commands are used to manage the logical volumes +managed by the controller. +The fourth group of commands are used to manage the drive configuration for +the controller. +.Pp +The informational commands include: +.Bl -tag -width indent +.It Cm version +Displays the version of +.Nm . +.It Cm show adapter +Displays information about the RAID controller such as the model number. +.It Cm show config +Displays the volume and drive configuration for the controller. +Each volume is listed along with the physical drives that the volume spans. +If any hot spare drives are configured, then they are listed as well. +.It Cm show drives +Lists all of the physical drives attached to the controller. +.It Cm show events +Display all the entries from the controller's event log. +Due to lack of documentation this command isn't very useful currently and +just dumps each log entry in hex. +.It Cm show volumes +Lists all of the logical volumes managed by the controller. +.El +.Pp +The physical drive management commands include: +.Bl -tag -width indent +.It Cm fail Ar drive +Mark +.Ar drive +as +.Dq failed requested . +Note that this state is different from the +.Dq failed +state that is used when the firmware fails a drive. +.Ar Drive +must be a configured drive. +.It Cm online Ar drive +Mark +.Ar drive +as an online drive. +.Ar Drive +must be part a configured drive in either the +.Dq offline +or +.Dq failed requested +states. +.It Cm offline Ar drive +Mark +.Ar drive +as offline. +.Ar Drive +must be a configured, online drive. +.El +.Pp +The logical volume management commands include: +.Bl -tag -width indent +.It Cm name Ar volume Ar name +Sets the name of +.Ar volume +to +.Ar name . +.It Cm volume cache Ar volume Ar enable|disable +Enables or disables the drive write cache for the member drives of +.Ar volume . +.It Cm volume status Ar volume +Display more detailed status about a single volume including the current +progress of a rebuild operation if one is being performed. +.El +.Pp +The configuration commands include: +.Bl -tag -width indent +.It Cm clear +Delete the entire configuration including all volumes and spares. +All drives will become standalone drives. +.It Xo Cm create Ar type +.Op Fl q +.Op Fl v +.Op Fl s Ar stripe_size +.Ar drive Ns Op \&, Ns Ar drive Ns Op ",..." +.Xc +Create a new volume. +The +.Ar type +specifies the type of volume to create. +Currently supported types include: +.Bl -tag -width indent +.It Cm raid0 +Creates one RAID0 volume spanning the drives listed in the single drive list. +.It Cm raid1 +Creates one RAID1 volume spanning the drives listed in the single drive list. +.It Cm raid1e +Creates one RAID1E volume spanning the drives listed in the single drive list. +.El +.Pp +.Sy Note: +Not all volume types are supported by all controllers. +.Pp +If the +.Fl q +flag is specified after +.Ar type , +then a +.Dq quick +initialization of the volume will be done. +This is useful when the drives do not contain any existing data that need +to be preserved. +.Pp +If the +.Fl v +flag is specified after +.Ar type , +then more verbose output will be enabled. +Currently this just provides notification as drives are added to volumes +when building the configuration. +.Pp +The +.Fl s +.Ar stripe_size +parameter allows the stripe size of the array to be set. +By default a stripe size of 64K is used. +The list of valid values for a given +.Ar type +are listed in the output of +.Cm show adapter . +.It Cm delete Ar volume +Delete the volume +.Ar volume . +Member drives will become standalone drives. +.It Cm add Ar drive Op Ar volume +Mark +.Ar drive +as a hot spare. +.Ar Drive +must not be a member of a volume. +If +.Ar volume +is specified, +then the hot spare will be dedicated to that volume. +Otherwise, +.Ar drive +will be used as a global hot spare backing all volumes for this controller. +Note that +.Ar drive +must be as large as the smallest drive in all of the volumes it is going to +back. +.It Cm remove Ar drive +Remove the hot spare +.Ar drive +from service. +It will become a standalone drive. +.El +.Sh EXAMPLES +Mark the drive at bus 0 target 4 as offline: +.Pp +.Dl Nm Cm offline 0:4 +.Pp +Create a RAID1 array from the two standalone drives +.Va da1 +and +.Va da2 : +.Pp +.Dl Nm Cm create raid1 da1,da2 +.Pp +Mark standalone drive +.Va da3 +as a global hot spare: +.Pp +.Dl Nm Cm add da3 +.Sh SEE ALSO +.Xr mpt 4 +.Sh BUGS +.Pp +The handling of spare drives appears to be unreliable. +The +.Xr mpt 4 +firmware manages spares via spare drive +.Dq pools . +There are eight pools numbered 0 through 7. +Each spare drive can only be assigned to a single pool. +Each volume can be backed by any combination of zero or more spare pools. +The +.Nm +utility attempts to use the following algorithm for managing spares. +Global spares are always assigned to pool 0, +and all volumes are always backed by pool 0. +For dedicated spares, +.Nm +assigns one of the remaining 7 pools to each volume and +assigns dedicated drives to that pool. +In practice however, it seems that assigning a drive as a spare does not +take effect until the box has been rebooted. +Also, the firmware renumbers the spare pool assignments after a reboot +which undoes the effects of the algorithm above. +Simple cases such as assigning global spares seem to work ok +.Pq albeit requiring a reboot to take effect +but more +.Dq exotic +configurations may not work reliably. +.Pp +Drive configuration commands result in an excessive flood of messages on the +console. +.Sh HISTORY +The +.Nm +utility first appeared in +.Fx 8.0 . diff --git a/usr.sbin/mptutil/mptutil.c b/usr.sbin/mptutil/mptutil.c new file mode 100644 index 0000000..e7b5806 --- /dev/null +++ b/usr.sbin/mptutil/mptutil.c @@ -0,0 +1,123 @@ +/*- + * Copyright (c) 2008 Yahoo!, Inc. + * All rights reserved. + * Written by: John Baldwin + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of any co-contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__RCSID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include "mptutil.h" + +SET_DECLARE(MPT_DATASET(top), struct mptutil_command); + +int mpt_unit; + +static void +usage(void) +{ + + fprintf(stderr, "usage: mptutil [-u unit] ...\n\n"); + fprintf(stderr, "Commands include:\n"); + fprintf(stderr, " version\n"); + fprintf(stderr, " show adapter - display controller information\n"); + fprintf(stderr, " show config - display RAID configuration\n"); + fprintf(stderr, " show drives - list physical drives\n"); + fprintf(stderr, " show events - display event log\n"); + fprintf(stderr, " show volumes - list logical volumes\n"); + fprintf(stderr, " fail - fail a physical drive\n"); + fprintf(stderr, " online - bring an offline physical drive online\n"); + fprintf(stderr, " offline - mark a physical drive offline\n"); + fprintf(stderr, " name \n"); + fprintf(stderr, " volume status - display volume status\n"); + fprintf(stderr, " volume cache \n"); + fprintf(stderr, " - Enable or disable the volume drive caches\n"); + fprintf(stderr, " clear - clear volume configuration\n"); + fprintf(stderr, " create [-vq] [-s stripe] [,[,...]]\n"); + fprintf(stderr, " delete \n"); + fprintf(stderr, " add [volume] - add a hot spare\n"); + fprintf(stderr, " remove - remove a hot spare\n"); +#ifdef DEBUG + fprintf(stderr, " pd create - create RAID physdisk\n"); + fprintf(stderr, " pd delete - delete RAID physdisk\n"); + fprintf(stderr, " debug - debug 'show config'\n"); +#endif + exit(1); +} + +static int +version(int ac, char **av) +{ + + printf("mptutil version 1.0.3"); +#ifdef DEBUG + printf(" (DEBUG)"); +#endif + printf("\n"); + return (0); +} +MPT_COMMAND(top, version, version); + +int +main(int ac, char **av) +{ + struct mptutil_command **cmd; + int ch; + + while ((ch = getopt(ac, av, "u:")) != -1) { + switch (ch) { + case 'u': + mpt_unit = atoi(optarg); + break; + case '?': + usage(); + } + } + + av += optind; + ac -= optind; + + /* getopt() eats av[0], so we can't use mpt_table_handler() directly. */ + if (ac == 0) + usage(); + + SET_FOREACH(cmd, MPT_DATASET(top)) { + if (strcmp((*cmd)->name, av[0]) == 0) { + (*cmd)->handler(ac, av); + return (0); + } + } + warnx("Unknown command %s.", av[0]); + return (0); +} diff --git a/usr.sbin/mptutil/mptutil.h b/usr.sbin/mptutil/mptutil.h new file mode 100644 index 0000000..3951235 --- /dev/null +++ b/usr.sbin/mptutil/mptutil.h @@ -0,0 +1,178 @@ +/*- + * Copyright (c) 2008 Yahoo!, Inc. + * All rights reserved. + * Written by: John Baldwin + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of any co-contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef __MPTUTIL_H__ +#define __MPTUTIL_H__ + +#include +#include + +#include +#include +#include +#include + +#define IOC_STATUS_SUCCESS(status) \ + (((status) & MPI_IOCSTATUS_MASK) == MPI_IOCSTATUS_SUCCESS) + +struct mpt_query_disk { + char devname[SPECNAMELEN + 1]; +}; + +struct mpt_standalone_disk { + uint64_t maxlba; + char inqstring[64]; + char devname[SPECNAMELEN + 1]; + u_int bus; + u_int target; +}; + +struct mpt_drive_list { + int ndrives; + CONFIG_PAGE_RAID_PHYS_DISK_0 *drives[0]; +}; + +struct mptutil_command { + const char *name; + int (*handler)(int ac, char **av); +}; + +#define MPT_DATASET(name) mptutil_ ## name ## _table + +#define MPT_COMMAND(set, name, function) \ + static struct mptutil_command function ## _mptutil_command = \ + { #name, function }; \ + DATA_SET(MPT_DATASET(set), function ## _mptutil_command) + +#define MPT_TABLE(set, name) \ + SET_DECLARE(MPT_DATASET(name), struct mptutil_command); \ + \ + static int \ + mptutil_ ## name ## _table_handler(int ac, char **av) \ + { \ + return (mpt_table_handler(SET_BEGIN(MPT_DATASET(name)), \ + SET_LIMIT(MPT_DATASET(name)), ac, av)); \ + } \ + MPT_COMMAND(set, name, mptutil_ ## name ## _table_handler) + +extern int mpt_unit; + +#ifdef DEBUG +void hexdump(const void *ptr, int length, const char *hdr, int flags); +#define HD_COLUMN_MASK 0xff +#define HD_DELIM_MASK 0xff00 +#define HD_OMIT_COUNT (1 << 16) +#define HD_OMIT_HEX (1 << 17) +#define HD_OMIT_CHARS (1 << 18) +#endif + +int mpt_table_handler(struct mptutil_command **start, + struct mptutil_command **end, int ac, char **av); +int mpt_read_config_page_header(int fd, U8 PageType, U8 PageNumber, + U32 PageAddress, CONFIG_PAGE_HEADER *header, U16 *IOCStatus); +void *mpt_read_config_page(int fd, U8 PageType, U8 PageNumber, + U32 PageAddress, U16 *IOCStatus); +void *mpt_read_extended_config_page(int fd, U8 ExtPageType, U8 PageVersion, + U8 PageNumber, U32 PageAddress, U16 *IOCStatus); +int mpt_write_config_page(int fd, void *buf, U16 *IOCStatus); +const char *mpt_ioc_status(U16 IOCStatus); +int mpt_raid_action(int fd, U8 Action, U8 VolumeBus, U8 VolumeID, + U8 PhysDiskNum, U32 ActionDataWord, void *buf, int len, + RAID_VOL0_STATUS *VolumeStatus, U32 *ActionData, int datalen, + U16 *IOCStatus, U16 *ActionStatus, int write); +const char *mpt_raid_status(U16 ActionStatus); +int mpt_open(int unit); +const char *mpt_raid_level(U8 VolumeType); +const char *mpt_volstate(U8 State); +const char *mpt_pdstate(CONFIG_PAGE_RAID_PHYS_DISK_0 *info); +const char *mpt_pd_inq_string(CONFIG_PAGE_RAID_PHYS_DISK_0 *pd_info); +struct mpt_drive_list *mpt_pd_list(int fd); +void mpt_free_pd_list(struct mpt_drive_list *list); +int mpt_query_disk(U8 VolumeBus, U8 VolumeID, struct mpt_query_disk *qd); +const char *mpt_volume_name(U8 VolumeBus, U8 VolumeID); +int mpt_fetch_disks(int fd, int *ndisks, + struct mpt_standalone_disk **disksp); +int mpt_lock_volume(U8 VolumeBus, U8 VolumeID); +int mpt_lookup_drive(struct mpt_drive_list *list, const char *drive, + U8 *PhysDiskNum); +int mpt_lookup_volume(int fd, const char *name, U8 *VolumeBus, + U8 *VolumeID); +int mpt_rescan_bus(int bus, int id); + +static __inline void * +mpt_read_man_page(int fd, U8 PageNumber, U16 *IOCStatus) +{ + + return (mpt_read_config_page(fd, MPI_CONFIG_PAGETYPE_MANUFACTURING, + PageNumber, 0, IOCStatus)); +} + +static __inline void * +mpt_read_ioc_page(int fd, U8 PageNumber, U16 *IOCStatus) +{ + + return (mpt_read_config_page(fd, MPI_CONFIG_PAGETYPE_IOC, PageNumber, + 0, IOCStatus)); +} + +static __inline U32 +mpt_vol_pageaddr(U8 VolumeBus, U8 VolumeID) +{ + + return (VolumeBus << 8 | VolumeID); +} + +static __inline CONFIG_PAGE_RAID_VOL_0 * +mpt_vol_info(int fd, U8 VolumeBus, U8 VolumeID, U16 *IOCStatus) +{ + + return (mpt_read_config_page(fd, MPI_CONFIG_PAGETYPE_RAID_VOLUME, 0, + mpt_vol_pageaddr(VolumeBus, VolumeID), IOCStatus)); +} + +static __inline CONFIG_PAGE_RAID_VOL_1 * +mpt_vol_names(int fd, U8 VolumeBus, U8 VolumeID, U16 *IOCStatus) +{ + + return (mpt_read_config_page(fd, MPI_CONFIG_PAGETYPE_RAID_VOLUME, 1, + mpt_vol_pageaddr(VolumeBus, VolumeID), IOCStatus)); +} + +static __inline CONFIG_PAGE_RAID_PHYS_DISK_0 * +mpt_pd_info(int fd, U8 PhysDiskNum, U16 *IOCStatus) +{ + + return (mpt_read_config_page(fd, MPI_CONFIG_PAGETYPE_RAID_PHYSDISK, 0, + PhysDiskNum, IOCStatus)); +} + +#endif /* !__MPTUTIL_H__ */ -- cgit v1.1 From 31e949ad31327c0225bdcc0c9c4612c1332e4af6 Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 19 Aug 2009 21:01:32 +0000 Subject: MFC 196392: Add support for backing up the old kernel when installing a new kernel using freebsd-update. This applies to using freebsd-update in "upgrade mode" and normal freebsd-update on a security branch. The backup kernel will be written to /boot/kernel.old, if the directory does not exist, or the directory was created by freebsd-update in a previous backup. Otherwise freebsd-update will generate a new directory name for use by the backup. By default symbol files are not backed up to save diskspace and avoid filling up the root partition. This feature is fully configurable in the freebsd-update config file, but defaults to enabled. Reviewed by: cperciva Approved by: re (kib) --- usr.sbin/freebsd-update/freebsd-update.sh | 162 +++++++++++++++++++++++++++++- 1 file changed, 161 insertions(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/freebsd-update/freebsd-update.sh b/usr.sbin/freebsd-update/freebsd-update.sh index 331ef10..2eacca8 100644 --- a/usr.sbin/freebsd-update/freebsd-update.sh +++ b/usr.sbin/freebsd-update/freebsd-update.sh @@ -88,7 +88,7 @@ EOF CONFIGOPTIONS="KEYPRINT WORKDIR SERVERNAME MAILTO ALLOWADD ALLOWDELETE KEEPMODIFIEDMETADATA COMPONENTS IGNOREPATHS UPDATEIFUNMODIFIED BASEDIR VERBOSELEVEL TARGETRELEASE STRICTCOMPONENTS MERGECHANGES - IDSIGNOREPATHS" + IDSIGNOREPATHS BACKUPKERNEL BACKUPKERNELDIR BACKUPKERNELSYMBOLFILES" # Set all the configuration options to "". nullconfig () { @@ -308,6 +308,70 @@ config_VerboseLevel () { fi } +config_BackupKernel () { + if [ -z ${BACKUPKERNEL} ]; then + case $1 in + [Yy][Ee][Ss]) + BACKUPKERNEL=yes + ;; + [Nn][Oo]) + BACKUPKERNEL=no + ;; + *) + return 1 + ;; + esac + else + return 1 + fi +} + +config_BackupKernelDir () { + if [ -z ${BACKUPKERNELDIR} ]; then + if [ -z "$1" ]; then + echo "BackupKernelDir set to empty dir" + return 1 + fi + + # We check for some paths which would be extremely odd + # to use, but which could cause a lot of problems if + # used. + case $1 in + /|/bin|/boot|/etc|/lib|/libexec|/sbin|/usr|/var) + echo "BackupKernelDir set to invalid path $1" + return 1 + ;; + /*) + BACKUPKERNELDIR=$1 + ;; + *) + echo "BackupKernelDir ($1) is not an absolute path" + return 1 + ;; + esac + else + return 1 + fi +} + +config_BackupKernelSymbolFiles () { + if [ -z ${BACKUPKERNELSYMBOLFILES} ]; then + case $1 in + [Yy][Ee][Ss]) + BACKUPKERNELSYMBOLFILES=yes + ;; + [Nn][Oo]) + BACKUPKERNELSYMBOLFILES=no + ;; + *) + return 1 + ;; + esac + else + return 1 + fi +} + # Handle one line of configuration configline () { if [ $# -eq 0 ]; then @@ -461,6 +525,9 @@ default_params () { config_BaseDir / config_VerboseLevel stats config_StrictComponents no + config_BackupKernel yes + config_BackupKernelDir /boot/kernel.old + config_BackupKernelSymbolFiles no # Merge these defaults into the earlier-configured settings mergeconfig @@ -665,6 +732,14 @@ install_check_params () { echo "Re-run '$0 fetch'." exit 1 fi + + # Figure out what directory contains the running kernel + BOOTFILE=`sysctl -n kern.bootfile` + KERNELDIR=${BOOTFILE%/kernel} + if ! [ -d ${KERNELDIR} ]; then + echo "Cannot identify running kernel" + exit 1 + fi } # Perform sanity checks and set some final parameters in @@ -2494,6 +2569,88 @@ install_unschg () { rm filelist } +# Decide which directory name to use for kernel backups. +backup_kernel_finddir () { + CNT=0 + while true ; do + # Pathname does not exist, so it is OK use that name + # for backup directory. + if [ ! -e $BACKUPKERNELDIR ]; then + return 0 + fi + + # If directory do exist, we only use if it has our + # marker file. + if [ -d $BACKUPKERNELDIR -a \ + -e $BACKUPKERNELDIR/.freebsd-update ]; then + return 0 + fi + + # We could not use current directory name, so add counter to + # the end and try again. + CNT=$((CNT + 1)) + if [ $CNT -gt 9 ]; then + echo "Could not find valid backup dir ($BACKUPKERNELDIR)" + exit 1 + fi + BACKUPKERNELDIR="`echo $BACKUPKERNELDIR | sed -Ee 's/[0-9]\$//'`" + BACKUPKERNELDIR="${BACKUPKERNELDIR}${CNT}" + done +} + +# Backup the current kernel using hardlinks, if not disabled by user. +# Since we delete all files in the directory used for previous backups +# we create a marker file called ".freebsd-update" in the directory so +# we can determine on the next run that the directory was created by +# freebsd-update and we then do not accidentally remove user files in +# the unlikely case that the user has created a directory with a +# conflicting name. +backup_kernel () { + # Only make kernel backup is so configured. + if [ $BACKUPKERNEL != yes ]; then + return 0 + fi + + # Decide which directory name to use for kernel backups. + backup_kernel_finddir + + # Remove old kernel backup files. If $BACKUPKERNELDIR was + # "not ours", backup_kernel_finddir would have exited, so + # deleting the directory content is as safe as we can make it. + if [ -d $BACKUPKERNELDIR ]; then + rm -f $BACKUPKERNELDIR/* + fi + + # Create directory for backup if it doesn't exist. + mkdir -p $BACKUPKERNELDIR + + # Mark the directory as having been created by freebsd-update. + touch $BACKUPKERNELDIR/.freebsd-update + if [ $? -ne 0 ]; then + echo "Could not create kernel backup directory" + exit 1 + fi + + # Disable pathname expansion to be sure *.symbols is not + # expanded. + set -f + + # Use find to ignore symbol files, unless disabled by user. + if [ $BACKUPKERNELSYMBOLFILES = yes ]; then + FINDFILTER="" + else + FINDFILTER=-"a ! -name *.symbols" + fi + + # Backup all the kernel files using hardlinks. + find $KERNELDIR -type f $FINDFILTER | \ + sed -Ee "s,($KERNELDIR)/?(.*),\1/\2 ${BACKUPKERNELDIR}/\2," | \ + xargs -n 2 cp -pl + + # Re-enable patchname expansion. + set +f +} + # Install new files install_from_index () { # First pass: Do everything apart from setting file flags. We @@ -2575,6 +2732,9 @@ install_files () { grep -E '^/boot/' $1/INDEX-OLD > INDEX-OLD grep -E '^/boot/' $1/INDEX-NEW > INDEX-NEW + # Backup current kernel before installing a new one + backup_kernel || return 1 + # Install new files install_from_index INDEX-NEW || return 1 -- cgit v1.1 From 3035528d02691a66223ae2957934a6b3dd43a8b9 Mon Sep 17 00:00:00 2001 From: sam Date: Sun, 23 Aug 2009 16:21:49 +0000 Subject: MFC r196472: Enable _DIRENT_HAVE_D_TYPE so wpa_cli scans directories properly for it's unix domain socket. Before this change wpa_cli would take the first file in the directory that was not "." or "..". Approved by: re (rwatson) --- usr.sbin/wpa/wpa_cli/Makefile | 2 ++ 1 file changed, 2 insertions(+) (limited to 'usr.sbin') diff --git a/usr.sbin/wpa/wpa_cli/Makefile b/usr.sbin/wpa/wpa_cli/Makefile index ae16a72..123485a 100644 --- a/usr.sbin/wpa/wpa_cli/Makefile +++ b/usr.sbin/wpa/wpa_cli/Makefile @@ -11,6 +11,8 @@ MAN= wpa_cli.8 CFLAGS+= -DCONFIG_CTRL_IFACE CFLAGS+= -DCONFIG_CTRL_IFACE_UNIX +# enable use of d_type to identify unix domain sockets +CFLAGS+= -D_DIRENT_HAVE_D_TYPE #CFLAGS+= -DCONFIG_READLINE #LDADD+= -ledit -ltermcap -- cgit v1.1 From e2be57e7d5075e788e4b84a38ee91b60616a9023 Mon Sep 17 00:00:00 2001 From: brian Date: Thu, 27 Aug 2009 07:05:46 +0000 Subject: MFC: When realloc()ing device memory for transfer to another ppp process, don't continue to use the realloc()d pointer - it might have changed! Remove some stray diagnostics while I'm here. Approved by: re (kib) --- usr.sbin/ppp/ether.c | 7 ++++--- usr.sbin/ppp/netgraph.c | 8 ++++---- usr.sbin/ppp/tty.c | 9 +++++---- 3 files changed, 13 insertions(+), 11 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/ppp/ether.c b/usr.sbin/ppp/ether.c index e4e154a..a86f3bd 100644 --- a/usr.sbin/ppp/ether.c +++ b/usr.sbin/ppp/ether.c @@ -193,17 +193,18 @@ static void ether_device2iov(struct device *d, struct iovec *iov, int *niov, int maxiov __unused, int *auxfd, int *nauxfd) { - struct etherdevice *dev = device2ether(d); + struct etherdevice *dev; int sz = physical_MaxDeviceSize(); - iov[*niov].iov_base = realloc(d, sz); - if (iov[*niov].iov_base == NULL) { + iov[*niov].iov_base = d = realloc(d, sz); + if (d == NULL) { log_Printf(LogALERT, "Failed to allocate memory: %d\n", sz); AbortProgram(EX_OSERR); } iov[*niov].iov_len = sz; (*niov)++; + dev = device2ether(d); if (dev->cs >= 0) { *auxfd = dev->cs; (*nauxfd)++; diff --git a/usr.sbin/ppp/netgraph.c b/usr.sbin/ppp/netgraph.c index 471dc0d..23a575b 100644 --- a/usr.sbin/ppp/netgraph.c +++ b/usr.sbin/ppp/netgraph.c @@ -235,7 +235,6 @@ ng_Read(struct physical *p, void *v, size_t n) { char hook[NG_HOOKSIZ]; -log_Printf(LogDEBUG, "ng_Read\n"); switch (p->dl->state) { case DATALINK_DIAL: case DATALINK_LOGIN: @@ -282,17 +281,18 @@ static void ng_device2iov(struct device *d, struct iovec *iov, int *niov, int maxiov __unused, int *auxfd, int *nauxfd) { - struct ngdevice *dev = device2ng(d); + struct ngdevice *dev; int sz = physical_MaxDeviceSize(); - iov[*niov].iov_base = realloc(d, sz); - if (iov[*niov].iov_base == NULL) { + iov[*niov].iov_base = d = realloc(d, sz); + if (d == NULL) { log_Printf(LogALERT, "Failed to allocate memory: %d\n", sz); AbortProgram(EX_OSERR); } iov[*niov].iov_len = sz; (*niov)++; + dev = device2ng(d); *auxfd = dev->cs; (*nauxfd)++; } diff --git a/usr.sbin/ppp/tty.c b/usr.sbin/ppp/tty.c index 8d5d58b..ab984cc 100644 --- a/usr.sbin/ppp/tty.c +++ b/usr.sbin/ppp/tty.c @@ -384,7 +384,6 @@ UnloadLineDiscipline(struct physical *p) struct ttydevice *dev = device2tty(p->handler); if (isngtty(dev)) { -log_Printf(LogPHASE, "back to speed %d\n", dev->real.speed); if (!physical_SetSpeed(p, dev->real.speed)) log_Printf(LogWARN, "Couldn't reset tty speed to %d\n", dev->real.speed); dev->real.speed = 0; @@ -582,17 +581,19 @@ tty_device2iov(struct device *d, struct iovec *iov, int *niov, #endif ) { - struct ttydevice *dev = device2tty(d); + struct ttydevice *dev; int sz = physical_MaxDeviceSize(); - iov[*niov].iov_base = realloc(d, sz); - if (iov[*niov].iov_base == NULL) { + iov[*niov].iov_base = d = realloc(d, sz); + if (d == NULL) { log_Printf(LogALERT, "Failed to allocate memory: %d\n", sz); AbortProgram(EX_OSERR); } iov[*niov].iov_len = sz; (*niov)++; + dev = device2tty(d); + #ifndef NONETGRAPH if (dev->cs >= 0) { *auxfd = dev->cs; -- cgit v1.1 From 9e9a81fc35d1a5cf36349b82e8fab976fcb18b76 Mon Sep 17 00:00:00 2001 From: brian Date: Thu, 27 Aug 2009 07:07:38 +0000 Subject: MFC: When ``ppp -direct'' is invoked by a program that uses pipe(2) to create stdin and stdout, don't blindly try to use stdin as a bi-directional channel. Instead, detect the pipe and set up a special exec handler that indirects write() calls through stdout. This fixes the problem where ``set device "!ssh -e none host ppp -direct label"'' no longer works with an openssh-5.2 server side as that version of openssh ignores the USE_PIPES config setting and *always* uses pipes (rather than socketpair) for stdin/stdout channels. Approved by: re (kib) --- usr.sbin/ppp/exec.c | 422 ++++++++++++++++++++++++++++++++++-------------- usr.sbin/ppp/exec.h | 2 +- usr.sbin/ppp/main.c | 6 +- usr.sbin/ppp/physical.c | 1 + 4 files changed, 305 insertions(+), 126 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/ppp/exec.c b/usr.sbin/ppp/exec.c index 7c80034..a53db37 100644 --- a/usr.sbin/ppp/exec.c +++ b/usr.sbin/ppp/exec.c @@ -35,7 +35,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -63,24 +65,106 @@ #include "cbcp.h" #include "datalink.h" #include "id.h" +#include "main.h" #include "exec.h" -static struct device execdevice = { + +struct execdevice { + struct device dev; /* What struct physical knows about */ + int fd_out; /* output descriptor */ +}; + +#define device2exec(d) ((d)->type == EXEC_DEVICE ? (struct execdevice *)d : NULL) + +unsigned +exec_DeviceSize(void) +{ + return sizeof(struct execdevice); +} + +static void +exec_Free(struct physical *p) +{ + struct execdevice *dev = device2exec(p->handler); + + if (dev->fd_out != -1) + close(dev->fd_out); + free(dev); +} + +static void +exec_device2iov(struct device *d, struct iovec *iov, int *niov, + int maxiov __unused, int *auxfd, int *nauxfd) +{ + struct execdevice *dev; + int sz = physical_MaxDeviceSize(); + + iov[*niov].iov_base = d = realloc(d, sz); + if (d == NULL) { + log_Printf(LogALERT, "Failed to allocate memory: %d\n", sz); + AbortProgram(EX_OSERR); + } + iov[*niov].iov_len = sz; + (*niov)++; + + dev = device2exec(d); + if (dev->fd_out >= 0) { + *auxfd = dev->fd_out; + (*nauxfd)++; + } +} + +static int +exec_RemoveFromSet(struct physical *p, fd_set *r, fd_set *w, fd_set *e) +{ + struct execdevice *dev = device2exec(p->handler); + int sets; + + p->handler->removefromset = NULL; + sets = physical_RemoveFromSet(p, r, w, e); + p->handler->removefromset = exec_RemoveFromSet; + + if (dev->fd_out >= 0) { + if (w && FD_ISSET(dev->fd_out, w)) { + FD_CLR(dev->fd_out, w); + log_Printf(LogTIMER, "%s: fdunset(w) %d\n", p->link.name, dev->fd_out); + sets++; + } + if (e && FD_ISSET(dev->fd_out, e)) { + FD_CLR(dev->fd_out, e); + log_Printf(LogTIMER, "%s: fdunset(e) %d\n", p->link.name, dev->fd_out); + sets++; + } + } + + return sets; +} + +static ssize_t +exec_Write(struct physical *p, const void *v, size_t n) +{ + struct execdevice *dev = device2exec(p->handler); + int fd = dev->fd_out == -1 ? p->fd : dev->fd_out; + + return write(fd, v, n); +} + +static struct device baseexecdevice = { EXEC_DEVICE, "exec", 0, { CD_NOTREQUIRED, 0 }, NULL, + exec_RemoveFromSet, NULL, NULL, NULL, NULL, NULL, + exec_Free, NULL, - NULL, - NULL, - NULL, - NULL, + exec_Write, + exec_device2iov, NULL, NULL, NULL @@ -88,146 +172,238 @@ static struct device execdevice = { struct device * exec_iov2device(int type, struct physical *p, struct iovec *iov, - int *niov, int maxiov __unused, int *auxfd __unused, - int *nauxfd __unused) + int *niov, int maxiov __unused, int *auxfd, int *nauxfd) { if (type == EXEC_DEVICE) { - free(iov[(*niov)++].iov_base); - physical_SetupStack(p, execdevice.name, PHYSICAL_NOFORCE); - return &execdevice; + struct execdevice *dev = (struct execdevice *)iov[(*niov)++].iov_base; + + dev = realloc(dev, sizeof *dev); /* Reduce to the correct size */ + if (dev == NULL) { + log_Printf(LogALERT, "Failed to allocate memory: %d\n", + (int)(sizeof *dev)); + AbortProgram(EX_OSERR); + } + + if (*nauxfd) { + dev->fd_out = *auxfd; + (*nauxfd)--; + } else + dev->fd_out = -1; + + /* Refresh function pointers etc */ + memcpy(&dev->dev, &baseexecdevice, sizeof dev->dev); + + physical_SetupStack(p, dev->dev.name, PHYSICAL_NOFORCE); + return &dev->dev; } return NULL; } +static int +exec_UpdateSet(struct fdescriptor *d, fd_set *r, fd_set *w, fd_set *e, int *n) +{ + struct physical *p = descriptor2physical(d); + struct execdevice *dev = device2exec(p->handler); + int result = 0; + + if (w && dev->fd_out >= 0) { + FD_SET(dev->fd_out, w); + log_Printf(LogTIMER, "%s: fdset(w) %d\n", p->link.name, dev->fd_out); + result++; + w = NULL; + } + + if (e && dev->fd_out >= 0) { + FD_SET(dev->fd_out, e); + log_Printf(LogTIMER, "%s: fdset(e) %d\n", p->link.name, dev->fd_out); + result++; + } + + if (result && *n <= dev->fd_out) + *n = dev->fd_out + 1; + + return result + physical_doUpdateSet(d, r, w, e, n, 0); +} + +static int +exec_IsSet(struct fdescriptor *d, const fd_set *fdset) +{ + struct physical *p = descriptor2physical(d); + struct execdevice *dev = device2exec(p->handler); + int result = dev->fd_out >= 0 && FD_ISSET(dev->fd_out, fdset); + result += physical_IsSet(d, fdset); + + return result; +} + struct device * exec_Create(struct physical *p) { - if (p->fd < 0 && *p->name.full == '!') { - int fids[2], type; - - p->fd--; /* We own the device but maybe can't use it - change fd */ - type = physical_IsSync(p) ? SOCK_DGRAM : SOCK_STREAM; - - if (socketpair(AF_UNIX, type, PF_UNSPEC, fids) < 0) - log_Printf(LogPHASE, "Unable to create pipe for line exec: %s\n", - strerror(errno)); - else { - static int child_status; /* This variable is abused ! */ - int stat, argc, i, ret, wret, pidpipe[2]; - pid_t pid, realpid; - char *argv[MAXARGS]; - - stat = fcntl(fids[0], F_GETFL, 0); - if (stat > 0) { - stat |= O_NONBLOCK; - fcntl(fids[0], F_SETFL, stat); + struct execdevice *dev; + + dev = NULL; + if (p->fd < 0) { + if (*p->name.full == '!') { + int fids[2], type; + + if ((dev = malloc(sizeof *dev)) == NULL) { + log_Printf(LogWARN, "%s: Cannot allocate an exec device: %s\n", + p->link.name, strerror(errno)); + return NULL; } - realpid = getpid(); - if (pipe(pidpipe) == -1) { - log_Printf(LogPHASE, "Unable to pipe for line exec: %s\n", + dev->fd_out = -1; + + p->fd--; /* We own the device but maybe can't use it - change fd */ + type = physical_IsSync(p) ? SOCK_DGRAM : SOCK_STREAM; + + if (socketpair(AF_UNIX, type, PF_UNSPEC, fids) < 0) { + log_Printf(LogPHASE, "Unable to create pipe for line exec: %s\n", strerror(errno)); - close(fids[1]); - } else switch ((pid = fork())) { - case -1: - log_Printf(LogPHASE, "Unable to fork for line exec: %s\n", + free(dev); + dev = NULL; + } else { + static int child_status; /* This variable is abused ! */ + int stat, argc, i, ret, wret, pidpipe[2]; + pid_t pid, realpid; + char *argv[MAXARGS]; + + stat = fcntl(fids[0], F_GETFL, 0); + if (stat > 0) { + stat |= O_NONBLOCK; + fcntl(fids[0], F_SETFL, stat); + } + realpid = getpid(); + if (pipe(pidpipe) == -1) { + log_Printf(LogPHASE, "Unable to pipe for line exec: %s\n", strerror(errno)); - close(pidpipe[0]); - close(pidpipe[1]); close(fids[1]); - break; - - case 0: - close(pidpipe[0]); close(fids[0]); - timer_TermService(); -#ifndef NOSUID - setuid(ID0realuid()); -#endif - - child_status = 0; - switch ((pid = vfork())) { - case 0: - close(pidpipe[1]); - break; - - case -1: - ret = errno; - log_Printf(LogPHASE, "Unable to vfork to drop parent: %s\n", - strerror(errno)); - close(pidpipe[1]); - _exit(ret); - - default: - write(pidpipe[1], &pid, sizeof pid); - close(pidpipe[1]); - _exit(child_status); /* The error from exec() ! */ - } - - log_Printf(LogDEBUG, "Exec'ing ``%s''\n", p->name.base); - - if ((argc = MakeArgs(p->name.base, argv, VECSIZE(argv), - PARSE_REDUCE|PARSE_NOHASH)) < 0) { - log_Printf(LogWARN, "Syntax error in exec command\n"); - _exit(ESRCH); - } - - command_Expand(argv, argc, (char const *const *)argv, - p->dl->bundle, 0, realpid); - - dup2(fids[1], STDIN_FILENO); - dup2(fids[1], STDOUT_FILENO); - dup2(fids[1], STDERR_FILENO); - for (i = getdtablesize(); i > STDERR_FILENO; i--) - fcntl(i, F_SETFD, 1); - - execvp(*argv, argv); - child_status = errno; /* Only works for vfork() */ - printf("execvp failed: %s: %s\r\n", *argv, strerror(child_status)); - _exit(child_status); - break; - - default: - close(pidpipe[1]); - close(fids[1]); - if (read(pidpipe[0], &p->session_owner, sizeof p->session_owner) != - sizeof p->session_owner) - p->session_owner = (pid_t)-1; - close(pidpipe[0]); - while ((wret = waitpid(pid, &stat, 0)) == -1 && errno == EINTR) - ; - if (wret == -1) { - log_Printf(LogWARN, "Waiting for child process: %s\n", + free(dev); + dev = NULL; + } else switch ((pid = fork())) { + case -1: + log_Printf(LogPHASE, "Unable to fork for line exec: %s\n", strerror(errno)); + close(pidpipe[0]); + close(pidpipe[1]); + close(fids[1]); close(fids[0]); - p->session_owner = (pid_t)-1; - break; - } else if (WIFSIGNALED(stat)) { - log_Printf(LogWARN, "Child process received sig %d !\n", - WTERMSIG(stat)); - close(fids[0]); - p->session_owner = (pid_t)-1; break; - } else if (WIFSTOPPED(stat)) { - log_Printf(LogWARN, "Child process received stop sig %d !\n", - WSTOPSIG(stat)); - /* I guess that's ok.... */ - } else if ((ret = WEXITSTATUS(stat))) { - log_Printf(LogWARN, "Cannot exec \"%s\": %s\n", p->name.base, - strerror(ret)); + + case 0: + close(pidpipe[0]); close(fids[0]); - p->session_owner = (pid_t)-1; + timer_TermService(); + #ifndef NOSUID + setuid(ID0realuid()); + #endif + + child_status = 0; + switch ((pid = vfork())) { + case 0: + close(pidpipe[1]); + break; + + case -1: + ret = errno; + log_Printf(LogPHASE, "Unable to vfork to drop parent: %s\n", + strerror(errno)); + close(pidpipe[1]); + _exit(ret); + + default: + write(pidpipe[1], &pid, sizeof pid); + close(pidpipe[1]); + _exit(child_status); /* The error from exec() ! */ + } + + log_Printf(LogDEBUG, "Exec'ing ``%s''\n", p->name.base); + + if ((argc = MakeArgs(p->name.base, argv, VECSIZE(argv), + PARSE_REDUCE|PARSE_NOHASH)) < 0) { + log_Printf(LogWARN, "Syntax error in exec command\n"); + _exit(ESRCH); + } + + command_Expand(argv, argc, (char const *const *)argv, + p->dl->bundle, 0, realpid); + + dup2(fids[1], STDIN_FILENO); + dup2(fids[1], STDOUT_FILENO); + dup2(fids[1], STDERR_FILENO); + for (i = getdtablesize(); i > STDERR_FILENO; i--) + fcntl(i, F_SETFD, 1); + + execvp(*argv, argv); + child_status = errno; /* Only works for vfork() */ + printf("execvp failed: %s: %s\r\n", *argv, strerror(child_status)); + _exit(child_status); break; - } - p->fd = fids[0]; - log_Printf(LogDEBUG, "Using descriptor %d for child\n", p->fd); - physical_SetupStack(p, execdevice.name, PHYSICAL_NOFORCE); - if (p->cfg.cd.necessity != CD_DEFAULT) - log_Printf(LogWARN, "Carrier settings ignored\n"); - return &execdevice; + + default: + close(pidpipe[1]); + close(fids[1]); + if (read(pidpipe[0], &p->session_owner, sizeof p->session_owner) != + sizeof p->session_owner) + p->session_owner = (pid_t)-1; + close(pidpipe[0]); + while ((wret = waitpid(pid, &stat, 0)) == -1 && errno == EINTR) + ; + if (wret == -1) { + log_Printf(LogWARN, "Waiting for child process: %s\n", + strerror(errno)); + close(fids[0]); + p->session_owner = (pid_t)-1; + break; + } else if (WIFSIGNALED(stat)) { + log_Printf(LogWARN, "Child process received sig %d !\n", + WTERMSIG(stat)); + close(fids[0]); + p->session_owner = (pid_t)-1; + break; + } else if (WIFSTOPPED(stat)) { + log_Printf(LogWARN, "Child process received stop sig %d !\n", + WSTOPSIG(stat)); + /* I guess that's ok.... */ + } else if ((ret = WEXITSTATUS(stat))) { + log_Printf(LogWARN, "Cannot exec \"%s\": %s\n", p->name.base, + strerror(ret)); + close(fids[0]); + p->session_owner = (pid_t)-1; + break; + } + p->fd = fids[0]; + log_Printf(LogDEBUG, "Using descriptor %d for child\n", p->fd); + } } - close(fids[0]); } + } else { + struct stat st; + + if (fstat(p->fd, &st) != -1 && (st.st_mode & S_IFIFO)) { + if ((dev = malloc(sizeof *dev)) == NULL) + log_Printf(LogWARN, "%s: Cannot allocate an exec device: %s\n", + p->link.name, strerror(errno)); + else if (p->fd == STDIN_FILENO) { + log_Printf(LogPHASE, "%s: Using stdin/stdout to communicate with " + "parent (pipe mode)\n", p->link.name); + dev->fd_out = dup(STDOUT_FILENO); + + /* Hook things up so that we monitor dev->fd_out */ + p->desc.UpdateSet = exec_UpdateSet; + p->desc.IsSet = exec_IsSet; + } else + dev->fd_out = -1; + } + } + + if (dev) { + memcpy(&dev->dev, &baseexecdevice, sizeof dev->dev); + physical_SetupStack(p, dev->dev.name, PHYSICAL_NOFORCE); + if (p->cfg.cd.necessity != CD_DEFAULT) + log_Printf(LogWARN, "Carrier settings ignored\n"); + return &dev->dev; } return NULL; diff --git a/usr.sbin/ppp/exec.h b/usr.sbin/ppp/exec.h index d4b3387..32bd748 100644 --- a/usr.sbin/ppp/exec.h +++ b/usr.sbin/ppp/exec.h @@ -32,4 +32,4 @@ struct device; extern struct device *exec_Create(struct physical *); extern struct device *exec_iov2device(int, struct physical *, struct iovec *, int *, int, int *, int *); -#define exec_DeviceSize physical_DeviceSize +extern unsigned exec_DeviceSize(void); diff --git a/usr.sbin/ppp/main.c b/usr.sbin/ppp/main.c index b4d5e29..fd826d0 100644 --- a/usr.sbin/ppp/main.c +++ b/usr.sbin/ppp/main.c @@ -509,9 +509,11 @@ main(int argc, char **argv) if (!sw.fg) setsid(); } else { - /* -direct - STDIN_FILENO gets used by physical_Open */ + /* + * -direct - STDIN_FILENO gets used by physical_Open. STDOUT_FILENO + * *may* get used in exec/pipe mode. + */ prompt_TtyInit(NULL); - close(STDOUT_FILENO); close(STDERR_FILENO); } } else { diff --git a/usr.sbin/ppp/physical.c b/usr.sbin/ppp/physical.c index ed3ab9c..0377800 100644 --- a/usr.sbin/ppp/physical.c +++ b/usr.sbin/ppp/physical.c @@ -1017,6 +1017,7 @@ physical_Open(struct physical *p) p->fd = STDIN_FILENO; for (h = 0; h < NDEVICES && p->handler == NULL && p->fd >= 0; h++) p->handler = (*devices[h].create)(p); + close(STDOUT_FILENO); if (p->fd >= 0) { if (p->handler == NULL) { physical_SetupStack(p, "unknown", PHYSICAL_NOFORCE); -- cgit v1.1 From 02bf3e241c1682c2e7acf0853087423915d64fe9 Mon Sep 17 00:00:00 2001 From: brian Date: Sat, 29 Aug 2009 04:15:37 +0000 Subject: MFC r196530: Document that ppp handles pipe(2) descriptors specially in -direct mode. Approved by: re (kib) --- usr.sbin/ppp/ppp.8.m4 | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/ppp/ppp.8.m4 b/usr.sbin/ppp/ppp.8.m4 index 2fc84e4..6663759 100644 --- a/usr.sbin/ppp/ppp.8.m4 +++ b/usr.sbin/ppp/ppp.8.m4 @@ -27,7 +27,7 @@ changecom(,)dnl .\" .\" $FreeBSD$ .\" -.Dd May 24, 2007 +.Dd August 25, 2009 .Dt PPP 8 .Os .Sh NAME @@ -171,6 +171,17 @@ If callback is configured, will use the .Dq set device information when dialing back. +.Pp +When run in +.Fl direct +mode, +.Nm +will behave slightly differently if descriptor 0 was created by +.Xr pipe 2 . +As pipes are not bi-directional, ppp will redirect all writes to descriptor +1 (standard output), leaving only reads acting on descriptor 0. +No special action is taken if descriptor 0 was created by +.Xr socketpair 2 . .It Fl dedicated This option is designed for machines connected with a dedicated wire. @@ -6055,6 +6066,8 @@ This socket is used to pass links between different instances of .Xr tcpdump 1 , .Xr telnet 1 , .Xr kldload 2 , +.Xr pipe 2 , +.Xr socketpair 2 , ifdef({LOCALNAT},{},{.Xr libalias 3 , })dnl ifdef({LOCALRAD},{},{.Xr libradius 3 , -- cgit v1.1 From 66f76404df6720f0882ae280efe98a144122d75e Mon Sep 17 00:00:00 2001 From: bz Date: Sat, 5 Sep 2009 17:29:08 +0000 Subject: MFC r196866: In the NEXTADDR macro use SA_SIZE() rather than directly using sizeof(), as introduced in r186119, for advancing the current position into the buffer. See comment in net/route.h for a description of the difference. This makes ndp -s work again. Reviewed by: qingli Approved by: re (kib) --- usr.sbin/ndp/ndp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/ndp/ndp.c b/usr.sbin/ndp/ndp.c index 570961a..17e439c 100644 --- a/usr.sbin/ndp/ndp.c +++ b/usr.sbin/ndp/ndp.c @@ -116,7 +116,7 @@ #define NEXTADDR(w, s) \ if (rtm->rtm_addrs & (w)) { \ - bcopy((char *)&s, cp, sizeof(s)); cp += sizeof(s);} + bcopy((char *)&s, cp, sizeof(s)); cp += SA_SIZE(&s);} static pid_t pid; -- cgit v1.1 From 9a11e310006fb7e9bc4ef7590cd176a3e72ea9d2 Mon Sep 17 00:00:00 2001 From: ume Date: Sun, 13 Sep 2009 11:34:33 +0000 Subject: MFC r196475: - Add AS lookup functionality to traceroute6(8) as well. - Support for IPv6 transport for AS lookup. - Introduce $RA_SERVER to set whois server. - Support for 4 byte ASN. - ANSIfy function declaration in as.c. Approved by: re (kib) --- usr.sbin/traceroute6/Makefile | 5 +++++ usr.sbin/traceroute6/traceroute6.8 | 11 +++++++++-- usr.sbin/traceroute6/traceroute6.c | 29 ++++++++++++++++++++++++++++- 3 files changed, 42 insertions(+), 3 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/traceroute6/Makefile b/usr.sbin/traceroute6/Makefile index 6ff72d2..60618a2 100644 --- a/usr.sbin/traceroute6/Makefile +++ b/usr.sbin/traceroute6/Makefile @@ -13,12 +13,17 @@ # A PARTICULAR PURPOSE. # $FreeBSD$ +TRACEROUTE_DISTDIR?= ${.CURDIR}/../../contrib/traceroute +.PATH: ${TRACEROUTE_DISTDIR} + PROG= traceroute6 MAN= traceroute6.8 +SRCS= as.c traceroute6.c BINOWN= root BINMODE= 4555 CFLAGS+= -DIPSEC -DUSE_RFC2292BIS -DHAVE_POLL +CFLAGS+= -I${.CURDIR} -I${TRACEROUTE_DISTDIR} -I. DPADD= ${LIBIPSEC} LDADD= -lipsec diff --git a/usr.sbin/traceroute6/traceroute6.8 b/usr.sbin/traceroute6/traceroute6.8 index b6116f0..884ad6d 100644 --- a/usr.sbin/traceroute6/traceroute6.8 +++ b/usr.sbin/traceroute6/traceroute6.8 @@ -29,7 +29,7 @@ .\" .\" $FreeBSD$ .\" -.Dd May 17, 1998 +.Dd August 24, 2009 .Dt TRACEROUTE6 8 .Os .\" @@ -40,7 +40,7 @@ .Sh SYNOPSIS .Nm .Bk -words -.Op Fl dIlnNrvU +.Op Fl adIlnNrvU .Ek .Bk -words .Op Fl f Ar firsthop @@ -64,6 +64,9 @@ .Op Fl w Ar waittime .Ek .Bk -words +.Op Fl A Ar as_server +.Ek +.Bk -words .Ar target .Op Ar datalen .Ek @@ -84,6 +87,10 @@ after the destination host name. .Pp Other options are: .Bl -tag -width Ds +.It Fl a +Turn on AS# lookups for each hop encountered. +.It Fl A Ar as_server +Turn on AS# lookups and use the given server instead of the default. .It Fl d Debug mode. .It Fl f Ar firsthop diff --git a/usr.sbin/traceroute6/traceroute6.c b/usr.sbin/traceroute6/traceroute6.c index d06502f..699af68 100644 --- a/usr.sbin/traceroute6/traceroute6.c +++ b/usr.sbin/traceroute6/traceroute6.c @@ -282,6 +282,8 @@ static const char rcsid[] = #include #endif +#include "as.h" + #define DUMMY_PORT 10010 #define MAXPACKET 65535 /* max ip packet size */ @@ -359,6 +361,9 @@ int waittime = 5; /* time to wait for response (in seconds) */ int nflag; /* print addresses numerically */ int useproto = IPPROTO_UDP; /* protocol to use to send packet */ int lflag; /* print both numerical address & hostname */ +int as_path; /* print as numbers for each hop */ +char *as_server = NULL; +void *asn; int main(argc, argv) @@ -411,8 +416,15 @@ main(argc, argv) seq = 0; - while ((ch = getopt(argc, argv, "df:g:Ilm:nNp:q:rs:Uvw:")) != -1) + while ((ch = getopt(argc, argv, "aA:df:g:Ilm:nNp:q:rs:Uvw:")) != -1) switch (ch) { + case 'a': + as_path = 1; + break; + case 'A': + as_path = 1; + as_server = optarg; + break; case 'd': options |= SO_DEBUG; break; @@ -867,6 +879,17 @@ main(argc, argv) srcport = ntohs(Src.sin6_port); } + if (as_path) { + asn = as_setup(as_server); + if (asn == NULL) { + fprintf(stderr, + "traceroute6: as_setup failed, AS# lookups" + " disabled\n"); + (void)fflush(stderr); + as_path = 0; + } + } + /* * Message to users */ @@ -948,6 +971,8 @@ main(argc, argv) exit(0); } } + if (as_path) + as_shutdown(asn); exit(0); } @@ -1361,6 +1386,8 @@ print(mhdr, cc) if (getnameinfo((struct sockaddr *)from, from->sin6_len, hbuf, sizeof(hbuf), NULL, 0, NI_NUMERICHOST) != 0) strlcpy(hbuf, "invalid", sizeof(hbuf)); + if (as_path) + printf(" [AS%u]", as_lookup(asn, hbuf, AF_INET6)); if (nflag) printf(" %s", hbuf); else if (lflag) -- cgit v1.1 From 919b95389dff318724dca86d1bf976860d92b724 Mon Sep 17 00:00:00 2001 From: jilles Date: Tue, 6 Oct 2009 21:23:49 +0000 Subject: MFC r197625: Fix using lp(1) without the new -t option after r194171. Approved by: re (kib) --- usr.sbin/lpr/lp/lp.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/lpr/lp/lp.sh b/usr.sbin/lpr/lp/lp.sh index 03ad887..dbec053 100644 --- a/usr.sbin/lpr/lp/lp.sh +++ b/usr.sbin/lpr/lp/lp.sh @@ -70,7 +70,7 @@ do s) # (silent option) : ;; t) # title for banner page - title="-J${OPTARG}";; + title="${OPTARG}";; *) # (error msg printed by getopts) exit 2;; esac @@ -78,4 +78,4 @@ done shift $(($OPTIND - 1)) -exec /usr/bin/lpr "-P${dest}" ${symlink} ${ncopies} ${mailafter} "${title}" "$@" +exec /usr/bin/lpr "-P${dest}" ${symlink} ${ncopies} ${mailafter} ${title:+-J"${title}"} "$@" -- cgit v1.1 From 9dae859f9ca26acc7f84a1235362d24e3102bf93 Mon Sep 17 00:00:00 2001 From: jhb Date: Wed, 28 Oct 2009 21:08:20 +0000 Subject: MFC: Remove spurious README and an old version of the manpage. --- usr.sbin/mfiutil/README | 104 -------- usr.sbin/mfiutil/mfiutil.1 | 574 --------------------------------------------- 2 files changed, 678 deletions(-) delete mode 100644 usr.sbin/mfiutil/README delete mode 100644 usr.sbin/mfiutil/mfiutil.1 (limited to 'usr.sbin') diff --git a/usr.sbin/mfiutil/README b/usr.sbin/mfiutil/README deleted file mode 100644 index 15e16bb..0000000 --- a/usr.sbin/mfiutil/README +++ /dev/null @@ -1,104 +0,0 @@ -# $FreeBSD$ - -This package includes a mfiutil command for administering mfi(4) controllers -on FreeBSD. - -Version 1.0.13 - * Cleaned up warnings in preparation for integration with FreeBSD - -Version 1.0.12 - * Add 'drive clear' command to wipe drives with all 0x00 characters - -Version 1.0.11 - * Display serial number for drives - * Display location info for drives with 'show config' - -Version 1.0.10 - * Display min and max stripe size supported by adapters. - * Added support for examining the controller event log. - -Version 1.0.9 - * Display stripe size for volumes. - * Added support for setting the stripe size for new volumes. - * Fix a regression in 1.0.8 that broke creation of RAID-5 and RAID-50 - arrays. - -Version 1.0.8 - * Added support for RAID-60 arrays. - * Added 'flash' command to support firmware flashing. - -Version 1.0.7 - * Renamed 'clear config' to 'clear, 'create volume' to 'create', - 'delete volume' to 'delete', 'create spare' to 'add', and - 'delete spare' to 'remove'. The old names still work. - * Added support for RAID-6 arrays. - -Version 1.0.6 - * Added 'show patrol', 'patrol', 'start patrol', and 'stop patrol' - commands to manage patrol reads. - -Version 1.0.5 - * Added 'create volume' and 'delete volume' commands to manage volumes. - * Added 'clear config' command to clear entire configuration. - * Added more detailed error reporting based on firmware status codes. - * Renamed 'progress' command to 'drive progress'. - * Added 'volume progress' command to display progress of volume-level - activites such as background inits. - * Fixed 'create spare' to properly add global spares. - -Version 1.0.4 - * Added 'create spare' and 'delete spare' commands to manage hot spares. - * Added 'good' command to mark unconfigured bad drives as good. - * Display more information about hot spares in 'show config' - * Allow physical drives to be specified via Exx:Syy similar to megacli - * Display onboard memory size in 'show adapter' - -Version 1.0.3 - * Added 'cache' command to manage cache settings for volumes. - * Added 'name' command to name volumes. - * Added manpage. - -Version 1.0.2 - * Added 'show adapter' and 'show battery' commands. - * Added RAID level of volumes to 'show config' and 'show volumes'. - * Added drive model info to 'show config' and 'show drives'. - * Added package firmware version to 'show firmware'. - * Added read and write cache status to 'show volumes'. - * Map volume IDs to mfidX device names on newer kernels. - -Version 1.0.1 - * Added 'show firmware' command - -Version 1.0.0 - * Initial release - -usage: mfiutil [-u unit] ... - -Commands include: - version - show adapter - display controller information - show battery - display battery information - show config - display RAID configuration - show drives - list physical drives - show firmware - list firmware images - show volumes - list logical volumes - show patrol - display patrol read status - fail - fail a physical drive - good - mark a bad physical drive as good - rebuild - mark failed drive ready for rebuild - drive progress - display status of active operations - start rebuild - abort rebuild - locate - toggle drive LED - cache [command [setting]] - name - volume progress - display status of active operations - clear - clear volume configuration - create [-v] [,[,...]] [[,[,...]] - delete - add [volume] - add a hot spare - remove - remove a hot spare - patrol [interval [start]] - start patrol - start a patrol read - stop patrol - stop a patrol read - flash diff --git a/usr.sbin/mfiutil/mfiutil.1 b/usr.sbin/mfiutil/mfiutil.1 deleted file mode 100644 index d7860ab..0000000 --- a/usr.sbin/mfiutil/mfiutil.1 +++ /dev/null @@ -1,574 +0,0 @@ -.\" Copyright (c) 2008, 2009 Yahoo!, Inc. -.\" All rights reserved. -.\" -.\" Redistribution and use in source and binary forms, with or without -.\" modification, are permitted provided that the following conditions -.\" are met: -.\" 1. Redistributions of source code must retain the above copyright -.\" notice, this list of conditions and the following disclaimer. -.\" 2. Redistributions in binary form must reproduce the above copyright -.\" notice, this list of conditions and the following disclaimer in the -.\" documentation and/or other materials provided with the distribution. -.\" 3. The names of the authors may not be used to endorse or promote -.\" products derived from this software without specific prior written -.\" permission. -.\" -.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND -.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE -.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -.\" SUCH DAMAGE. -.\" -.\" $FreeBSD$ -.\" -.Dd June 17, 2008 -.Dt MFIUTIL 1 -.Os -.Sh NAME -.Nm mfiutil -.Nd Utility for managing LSI MegaRAID SAS controllers -.Sh SYNOPSIS -.Nm -.Cm version -.Nm -.Op Fl u Ar unit -.Cm show adapter -.Nm -.Op Fl u Ar unit -.Cm show battery -.Nm -.Op Fl u Ar unit -.Cm show config -.Nm -.Op Fl u Ar unit -.Cm show drives -.Nm -.Op Fl u Ar unit -.Cm show events -.Op Fl c Ar class -.Op Fl l Ar locale -.Op Fl n Ar count -.Op Fl v -.Op Ar start Op Ar stop -.Nm -.Op Fl u Ar unit -.Cm show firmware -.Nm -.Op Fl u Ar unit -.Cm show logstate -.Nm -.Op Fl u Ar unit -.Cm show patrol -.Nm -.Op Fl u Ar unit -.Cm show volumes -.Nm -.Op Fl u Ar unit -.Cm fail Ar drive -.Nm -.Op Fl u Ar unit -.Cm good Ar drive -.Nm -.Op Fl u Ar unit -.Cm rebuild Ar drive -.Nm -.Op Fl u Ar unit -.Cm drive progress Ar drive -.Nm -.Op Fl u Ar unit -.Cm drive clear Ar drive Brq "start | stop" -.Nm -.Op Fl u Ar unit -.Cm start rebuild Ar drive -.Nm -.Op Fl u Ar unit -.Cm abort rebuild Ar drive -.Nm -.Op Fl u Ar unit -.Cm locate Ar drive Brq "on | off" -.Nm -.Op Fl u Ar unit -.Cm cache Ar volume Op Ar setting Op Ar value -.Nm -.Op Fl u Ar unit -.Cm name Ar volume Ar name -.Nm -.Op Fl u Ar unit -.Cm volume progress Ar volume -.Nm -.Op Fl u Ar unit -.Cm clear -.Nm -.Op Fl u Ar unit -.Cm create Ar type -.Op Fl v -.Op Fl s Ar stripe_size -.Ar drive Ns Op \&, Ns Ar drive Ns Op ",..." -.Op Ar drive Ns Op \&, Ns Ar drive Ns Op ",..." -.Nm -.Op Fl u Ar unit -.Cm delete Ar volume -.Nm -.Op Fl u Ar unit -.Cm add Ar drive Op Ar volume -.Nm -.Op Fl u Ar unit -.Cm remove Ar drive -.Nm -.Op Fl u Ar unit -.Cm start patrol -.Nm -.Op Fl u Ar unit -.Cm stop patrol -.Nm -.Op Fl u Ar unit -.Cm patrol Ar command Op Ar interval Op Ar start -.Nm -.Op Fl u Ar unit -.Cm flash Ar file -.Sh DESCRIPTION -The -.Nm -utility can be used to display or modify various parameters on LSI -MegaRAID SAS RAID controllers. -Each invocation of -.Nm -consists of zero or more global options followed by a command. -Commands may support additional optional or required arguments after the -command. -.Pp -Currently one global option is supported: -.Bl -tag -width indent -.It Fl u Ar unit -.Ar unit -specifies the unit of the controller to work with. -If no unit is specified, -then unit 0 is used. -.El -.Pp -Volumes may be specified in two forms. -First, -a volume may be identified by its target ID. -Second, -on the volume may be specified by the corresponding -.Em mfidX -device, -such as -.Em mfid0 . -Note that this second method only works on OS versions -.Dv 6.2-YAHOO-20070510 -and later. -.Pp -Drives may be specified in two forms. -First, -a drive may be identified by its device ID. -The device ID for configured drives can be found in -.Cm show config . -Second, -a drive may be identified by its location as -.Sm off -.Op E Ar xx Ns \&: -.Li S Ns Ar yy -.Sm on -where -.Ar xx -is the enclosure -and -.Ar yy -is the slot for each drive as displayed in -.Cm show drives . -.Pp -The -.Nm -utility supports several different groups of commands. -The first group of commands provide information about the controller, -the volumes it manages, and the drives it controls. -The second group of commands are used to manage the physical drives -attached to the controller. -The third group of commands are used to manage the logical volumes -managed by the controller. -The fourth group of commands are used to manage the drive configuration for -the controller. -The fifth group of commands are used to manage controller-wide operations. -.Pp -The informational commands include: -.Bl -tag -width indent -.It Cm version -Displays the version of -.Nm . -.It Cm show adapter -Displays information about the RAID controller such as the model number. -.It Cm show battery -Displays information about the battery from the battery backup unit. -.It Cm show config -Displays the volume and drive configuration for the controller. -Each array is listed along with the physical drives the array is built from. -Each volume is listed along with the arrays that the volume spans. -If any hot spare drives are configured, then they are listed as well. -.It Cm show drives -Lists all of the physical drives attached to the controller. -.It Xo Cm show events -.Op Fl c Ar class -.Op Fl l Ar locale -.Op Fl n Ar count -.Op Fl v -.Op Ar start Op Ar stop -.Xc -Display entries from the controller's event log. -The controller maintains a circular buffer of events. -Each event is tagged with a class and locale. -.Pp -The -.Ar class -parameter limits the output to entries at the specified class or higher. -The default class is -.Dq warn . -The available classes from lowest priority to highest are: -.Bl -tag -width -indent -.It Cm debug -Debug messages. -.It Cm progress -Periodic progress updates for long-running operations such as background -initializations, array rebuilds, or patrol reads. -.It Cm info -Informational messages such as drive insertions and volume creations. -.It Cm warn -Indicates that some component may be close to failing. -.It Cm crit -A component has failed, but no data is lost. -For example, a volume becoming degraded due to a drive failure. -.It Cm fatal -A component has failed resulting in data loss. -.It Cm dead -The controller itself has died. -.El -.Pp -The -.Ar locale -parameter limits the output to entries for the specified part of the controller. -The default locale is -.Dq all . -The available locales are -.Dq volume , -.Dq drive , -.Dq enclousure , -.Dq battery , -.Dq sas , -.Dq controller , -.Dq config , -.Dq cluster , -and -.Dq all . -.Pp -The -.Ar count -parameter is a debugging aid that specifies the number of events to fetch from -the controller for each low-level request. -The default is 15 events. -.Pp -By default, matching event log entries from the previous shutdown up to the -present are displayed. This range can be adjusted via the -.Ar start -and -.Ar stop -parameters. -Each of these parameters can either be specified as a log entry number or as -one of the following aliases: -.Bl -tag -width -indent -.It Cm newest -The newest entry in the event log. -.It Cm oldest -The oldest entry in the event log. -.It Cm clear -The first entry since the event log was cleared. -.It Cm shutdown -The entry in the event log corresponding to the last time the controller was -cleanly shut down. -.It Cm boot -The entry in the event log corresponding to the most recent boot. -.El -.It Cm show firmware -Lists all of the firmware images present on the controller. -.It Cm show logstate -Display the various sequence numbers associated with the event log. -.It Cm show patrol -Display the status of the controller's patrol read operation. -.It Cm show volumes -Lists all of the logical volumes managed by the controller. -.El -.Pp -The physical drive management commands include: -.Bl -tag -width indent -.It Cm fail Ar drive -Mark -.Ar drive -as failed. -.Ar Drive -must be an online drive that is part of an array. -.It Cm good Ar drive -Mark -.Ar drive -as an unconfigured good drive. -.Ar Drive -must not be part of an existing array. -.It Cm rebuild Ar drive -Mark a failed -.Ar drive -that is still part of an array as a good drive suitable for a rebuild. -The firmware should kick off an array rebuild on its own if a failed drive -is marked as a rebuild drive. -.It Cm drive progress Ar drive -Report the current progress and estimated completion time of drive operations -such as rebuilds or patrol reads. -.It Cm drive clear Ar drive Brq "start | stop" -Start or stop the writing of all 0x00 characters to a drive. -.It Cm start rebuild Ar drive -Manually start a rebuild on -.Ar drive . -.It Cm abort rebuild Ar drive -Abort an in-progress rebuild operation on -.Ar drive . -It can be resumed with the -.Cm start rebuild -command. -.It Cm locate Ar drive Brq "on | off" -Change the state of the external LED associated with -.Ar drive . -.El -.Pp -The logical volume management commands include: -.Bl -tag -width indent -.It Cm cache Ar volume Op Ar setting Op Ar value -If no -.Ar setting -argument is supplied, then the current cache policy for -.Ar volume -is displayed; -otherwise, -the cache policy for -.Ar volume -is modified. -The optional -.Ar setting -argument can be one of the following values: -.Bl -tag -width indent -.It Cm enable -Enable caching for both read and write I/O operations. -.It Cm disable -Disable caching for both read and write I/O operations. -.It Cm reads -Enable caching only for read I/O operations. -.It Cm writes -Enable caching only for write I/O operations. -.It Cm write-back -Use write-back policy for cached writes. -.It Cm write-through -Use write-through policy for cached writes. -.It Cm read-ahead Op Ar value -Set the read ahead policy for cached reads. -The -.Ar value -argument can be set to either -.Dq none , -.Dq adaptive , -or -.Dq always . -.It Cm write-cache Op Ar value -Control the write caches on the physical drives backing -.Ar volume . -The -.Ar value -argument can be set to either -.Dq disable , -.Dq enable , -or -.Dq default . -.Pp -In general this setting should be left disabled to avoid data loss when the -physical drives lose power. -The battery backup of the RAID controller does not save data in the write -caches of the physical drives. -.El -.It Cm name Ar volume Ar name -Sets the name of -.Ar volume -to -.Ar name . -.It Cm volume progress Ar volume -Report the current progress and estimated completion time of volume operations -such as consistency checks and initializations. -.El -.Pp -The configuration commands include: -.Bl -tag -width indent -.It Cm clear -Delete the entire configuration including all volumes, arrays, and spares. -.It Xo Cm create Ar type -.Op Fl v -.Op Fl s Ar stripe_size -.Ar drive Ns Op \&, Ns Ar drive Ns Op ",..." -.Op Ar drive Ns Op \&, Ns Ar drive Ns Op ",..." -.Xc -Create a new volume. -The -.Ar type -specifies the type of volume to create. -Currently supported types include: -.Bl -tag -width indent -.It Cm jbod -Creates a RAID0 volume for each drive specified. -Each drive must be specified as a separate argument. -.It Cm raid0 -Creates one RAID0 volume spanning the drives listed in the single drive list. -.It Cm raid1 -Creates one RAID1 volume spanning the drives listed in the single drive list. -.It Cm raid5 -Creates one RAID5 volume spanning the drives listed in the single drive list. -.It Cm raid6 -Creates one RAID6 volume spanning the drives listed in the single drive list. -.It Cm raid10 -Creates one RAID10 volume spanning multiple RAID1 arrays. -The drives for each RAID1 array are specified as a single drive list. -.It Cm raid50 -Creates one RAID50 volume spanning multiple RAID5 arrays. -The drives for each RAID5 array are specified as a single drive list. -.It Cm raid60 -Creates one RAID60 volume spanning multiple RAID6 arrays. -The drives for each RAID6 array are specified as a single drive list. -.It Cm concat -Creates a single volume by concatenating all of the drives in the single drive -list. -.El -.Pp -.Sy Note: -Not all volume types are supported by all controllers. -.Pp -If the -.Fl v -flag is specified after -.Ar type , -then more verbose output will be enabled. -Currently this just provides notification as drives are added to arrays and -arrays to volumes when building the configuration. -.Pp -The -.Fl s -.Ar stripe_size -parameter allows the stripe size of the array to be set. -By default a stripe size of 64K is used. -Valid values are 512 through 1M, though the MFI firmware may reject some -values. -.It Cm delete Ar volume -Delete the volume -.Ar volume . -.It Cm add Ar drive Op Ar volume -Mark -.Ar drive -as a hot spare. -.Ar Drive -must be in the unconfigured good state. -If -.Ar volume -is specified, -then the hot spare will be dedicated to arrays backing that volume. -Otherwise, -.Ar drive -will be used as a global hot spare backing all arrays for this controller. -Note that -.Ar drive -must be as large as the smallest drive in all of the arrays it is going to -back. -.It Cm remove Ar drive -Remove the hot spare -.Ar drive -from service. -It will be placed in the unconfigured good state. -.El -.Pp -The controller management commands include: -.Bl -tag -width indent -.It Cm patrol Ar command Op Ar interval Op Ar start -Set the patrol read operation mode. -The -.Ar command -argument can be one of the following values: -.Bl -tag -width indent -.It Cm disable -Disable patrol reads. -.It Cm auto -Enable periodic patrol reads initiated by the firmware. -The optional -.Ar interval -argument specifies the interval in seconds between patrol reads. -If patrol reads should be run continously, -then -.Ar interval -should consist of the word -.Dq continuously . -The optional -.Ar start -argument specifies a non-negative, relative start time for the next patrol read. -If an interval or start time is not specified, -then the existing setting will be used. -.It Cm manual -Enable manual patrol reads that are only initiated by the user. -.El -.It Cm start patrol -Start a patrol read operation. -.It Cm stop patrol -Stop a currently running patrol read operation. -.It Cm flash Ar file -Updates the flash on the controller with the firmware stored in -.Ar file . -A reboot is required for the new firmware to take effect. -.El -.Sh EXAMPLES -Configure the cache for volume mfid0 to cache only writes: -.Pp -.Dl Nm Cm cache mfid0 writes -.Dl Nm Cm cache mfid0 write-back -.Pp -Create a RAID5 array spanning the first four disks in the second enclosure: -.Pp -.Dl Nm Cm create raid5 e1:s0,e1:s1,e1:s2,e1:s4 -.Pp -Configure the first three disks on a controller as JBOD: -.Pp -.Dl Nm Cm create jbod 0 1 2 -.Pp -Create a RAID10 volume that spans two arrays each of which contains two disks -from two different enclosures: -.Pp -.Dl Nm Cm create raid10 e1:s0,e1:s1 e2:s0,e2:s1 -.Pp -Add drive with the device ID of 4 as a global hot spare: -.Pp -.Dl Nm Cm add 4 -.Pp -Add the drive in slot 2 in the main chassis as a hot spare for volume mfid0: -.Pp -.Dl Nm Cm add s2 mfid0 -.Pp -Configure the adapter to run periodic patrol reads once a week with the first -patrol read starting in 5 minutes: -.Pp -.Dl Nm Cm patrol auto 604800 300 -.Pp -.Sh SEE ALSO -.Xr mfi 4 -.Sh BUGS -On 64-bit OS versions -.Dv 6.2-YAHOO-20070514 -and earlier, -the -.Xr mfi 4 -driver does not properly report firmware errors to 32-bit versions of -.Nm . -As a result, -some commands may fail even though they do not report any errors. -- cgit v1.1 From 90b355517cbdf332c7444cb98976e6a147247cac Mon Sep 17 00:00:00 2001 From: jhb Date: Thu, 29 Oct 2009 14:40:21 +0000 Subject: MFC 196555: - Use the headers from ACPI-CA to define various constants and structures for table layouts, etc. rather than homerolling our own structures and constants in acpidump.h. - Verify the extended checksum on the RSDP. - Handle new ACPI 3.0 fields in MADT including X2APIC entries and UIDs for local SAPICs. - Add handling for new ACPI 3.0 flags in the FADT. --- usr.sbin/acpi/acpidump/acpi.c | 828 ++++++++++++++++++++----------------- usr.sbin/acpi/acpidump/acpi_user.c | 48 ++- usr.sbin/acpi/acpidump/acpidump.c | 6 +- usr.sbin/acpi/acpidump/acpidump.h | 381 ++--------------- 4 files changed, 523 insertions(+), 740 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/acpi/acpidump/acpi.c b/usr.sbin/acpi/acpidump/acpi.c index 800655c..8a89f10 100644 --- a/usr.sbin/acpi/acpidump/acpi.c +++ b/usr.sbin/acpi/acpidump/acpi.c @@ -47,31 +47,36 @@ #define END_COMMENT " */\n" static void acpi_print_string(char *s, size_t length); -static void acpi_print_gas(struct ACPIgas *gas); -static int acpi_get_fadt_revision(struct FADTbody *fadt); -static void acpi_handle_fadt(struct ACPIsdt *fadt); +static void acpi_print_gas(ACPI_GENERIC_ADDRESS *gas); +static int acpi_get_fadt_revision(ACPI_TABLE_FADT *fadt); +static void acpi_handle_fadt(ACPI_TABLE_HEADER *fadt); static void acpi_print_cpu(u_char cpu_id); -static void acpi_print_local_apic(u_char cpu_id, u_char apic_id, - u_int32_t flags); -static void acpi_print_io_apic(u_char apic_id, u_int32_t int_base, - u_int64_t apic_addr); -static void acpi_print_mps_flags(u_int16_t flags); -static void acpi_print_intr(u_int32_t intr, u_int16_t mps_flags); -static void acpi_print_apic(struct MADT_APIC *mp); -static void acpi_handle_apic(struct ACPIsdt *sdp); -static void acpi_handle_hpet(struct ACPIsdt *sdp); +static void acpi_print_cpu_uid(uint32_t uid, char *uid_string); +static void acpi_print_local_apic(uint32_t apic_id, uint32_t flags); +static void acpi_print_io_apic(uint32_t apic_id, uint32_t int_base, + uint64_t apic_addr); +static void acpi_print_mps_flags(uint16_t flags); +static void acpi_print_intr(uint32_t intr, uint16_t mps_flags); +static void acpi_print_local_nmi(u_int lint, uint16_t mps_flags); +static void acpi_print_madt(ACPI_SUBTABLE_HEADER *mp); +static void acpi_handle_madt(ACPI_TABLE_HEADER *sdp); +static void acpi_handle_ecdt(ACPI_TABLE_HEADER *sdp); +static void acpi_handle_hpet(ACPI_TABLE_HEADER *sdp); +static void acpi_handle_mcfg(ACPI_TABLE_HEADER *sdp); static void acpi_print_srat_cpu(uint32_t apic_id, uint32_t proximity_domain, uint32_t flags); -static void acpi_print_srat_memory(struct SRAT_memory *mp); -static void acpi_print_srat(struct SRATentry *srat); -static void acpi_handle_srat(struct ACPIsdt *sdp); -static void acpi_print_sdt(struct ACPIsdt *sdp); -static void acpi_print_fadt(struct ACPIsdt *sdp); -static void acpi_print_facs(struct FACSbody *facs); -static void acpi_print_dsdt(struct ACPIsdt *dsdp); -static struct ACPIsdt *acpi_map_sdt(vm_offset_t pa); -static void acpi_print_rsd_ptr(struct ACPIrsdp *rp); -static void acpi_handle_rsdt(struct ACPIsdt *rsdp); +static void acpi_print_srat_memory(ACPI_SRAT_MEM_AFFINITY *mp); +static void acpi_print_srat(ACPI_SUBTABLE_HEADER *srat); +static void acpi_handle_srat(ACPI_TABLE_HEADER *sdp); +static void acpi_print_sdt(ACPI_TABLE_HEADER *sdp); +static void acpi_print_fadt(ACPI_TABLE_HEADER *sdp); +static void acpi_print_facs(ACPI_TABLE_FACS *facs); +static void acpi_print_dsdt(ACPI_TABLE_HEADER *dsdp); +static ACPI_TABLE_HEADER *acpi_map_sdt(vm_offset_t pa); +static void acpi_print_rsd_ptr(ACPI_TABLE_RSDP *rp); +static void acpi_handle_rsdt(ACPI_TABLE_HEADER *rsdp); +static void acpi_walk_subtables(ACPI_TABLE_HEADER *table, void *first, + void (*action)(ACPI_SUBTABLE_HEADER *)); /* Size of an address. 32-bit for ACPI 1.0, 64-bit for ACPI 2.0 and up. */ static int addr_size; @@ -92,41 +97,44 @@ acpi_print_string(char *s, size_t length) } static void -acpi_print_gas(struct ACPIgas *gas) +acpi_print_gas(ACPI_GENERIC_ADDRESS *gas) { - switch(gas->address_space_id) { + switch(gas->SpaceId) { case ACPI_GAS_MEMORY: - printf("0x%08lx:%u[%u] (Memory)", (u_long)gas->address, - gas->bit_offset, gas->bit_width); + printf("0x%08lx:%u[%u] (Memory)", (u_long)gas->Address, + gas->BitOffset, gas->BitWidth); break; case ACPI_GAS_IO: - printf("0x%02lx:%u[%u] (IO)", (u_long)gas->address, - gas->bit_offset, gas->bit_width); + printf("0x%02lx:%u[%u] (IO)", (u_long)gas->Address, + gas->BitOffset, gas->BitWidth); break; case ACPI_GAS_PCI: - printf("%x:%x+0x%x (PCI)", (uint16_t)(gas->address >> 32), - (uint16_t)((gas->address >> 16) & 0xffff), - (uint16_t)gas->address); + printf("%x:%x+0x%x (PCI)", (uint16_t)(gas->Address >> 32), + (uint16_t)((gas->Address >> 16) & 0xffff), + (uint16_t)gas->Address); break; /* XXX How to handle these below? */ case ACPI_GAS_EMBEDDED: - printf("0x%x:%u[%u] (EC)", (uint16_t)gas->address, - gas->bit_offset, gas->bit_width); + printf("0x%x:%u[%u] (EC)", (uint16_t)gas->Address, + gas->BitOffset, gas->BitWidth); break; case ACPI_GAS_SMBUS: - printf("0x%x:%u[%u] (SMBus)", (uint16_t)gas->address, - gas->bit_offset, gas->bit_width); + printf("0x%x:%u[%u] (SMBus)", (uint16_t)gas->Address, + gas->BitOffset, gas->BitWidth); break; + case ACPI_GAS_CMOS: + case ACPI_GAS_PCIBAR: + case ACPI_GAS_DATATABLE: case ACPI_GAS_FIXED: default: - printf("0x%08lx (?)", (u_long)gas->address); + printf("0x%08lx (?)", (u_long)gas->Address); break; } } /* The FADT revision indicates whether we use the DSDT or X_DSDT addresses. */ static int -acpi_get_fadt_revision(struct FADTbody *fadt) +acpi_get_fadt_revision(ACPI_TABLE_FADT *fadt) { int fadt_revision; @@ -141,8 +149,8 @@ acpi_get_fadt_revision(struct FADTbody *fadt) * 32 and 64 bit versions don't match, prefer the 32 bit * version for all subsequent tables. */ - if (fadt->facs_ptr != 0 && - (fadt->x_facs_ptr & 0xffffffff) != fadt->facs_ptr) + if (fadt->Facs != 0 && + (fadt->XFacs & 0xffffffff) != fadt->Facs) fadt_revision = 1; } else fadt_revision = 1; @@ -150,35 +158,52 @@ acpi_get_fadt_revision(struct FADTbody *fadt) } static void -acpi_handle_fadt(struct ACPIsdt *sdp) +acpi_handle_fadt(ACPI_TABLE_HEADER *sdp) { - struct ACPIsdt *dsdp; - struct FACSbody *facs; - struct FADTbody *fadt; + ACPI_TABLE_HEADER *dsdp; + ACPI_TABLE_FACS *facs; + ACPI_TABLE_FADT *fadt; int fadt_revision; - fadt = (struct FADTbody *)sdp->body; + fadt = (ACPI_TABLE_FADT *)sdp; acpi_print_fadt(sdp); fadt_revision = acpi_get_fadt_revision(fadt); if (fadt_revision == 1) - facs = (struct FACSbody *)acpi_map_sdt(fadt->facs_ptr); + facs = (ACPI_TABLE_FACS *)acpi_map_sdt(fadt->Facs); else - facs = (struct FACSbody *)acpi_map_sdt(fadt->x_facs_ptr); - if (memcmp(facs->signature, "FACS", 4) != 0 || facs->len < 64) + facs = (ACPI_TABLE_FACS *)acpi_map_sdt(fadt->XFacs); + if (memcmp(facs->Signature, ACPI_SIG_FACS, 4) != 0 || facs->Length < 64) errx(1, "FACS is corrupt"); acpi_print_facs(facs); if (fadt_revision == 1) - dsdp = (struct ACPIsdt *)acpi_map_sdt(fadt->dsdt_ptr); + dsdp = (ACPI_TABLE_HEADER *)acpi_map_sdt(fadt->Dsdt); else - dsdp = (struct ACPIsdt *)acpi_map_sdt(fadt->x_dsdt_ptr); - if (acpi_checksum(dsdp, dsdp->len)) + dsdp = (ACPI_TABLE_HEADER *)acpi_map_sdt(fadt->XDsdt); + if (acpi_checksum(dsdp, dsdp->Length)) errx(1, "DSDT is corrupt"); acpi_print_dsdt(dsdp); } static void +acpi_walk_subtables(ACPI_TABLE_HEADER *table, void *first, + void (*action)(ACPI_SUBTABLE_HEADER *)) +{ + ACPI_SUBTABLE_HEADER *subtable; + char *end; + + subtable = first; + end = (char *)table + table->Length; + while ((char *)subtable < end) { + printf("\n"); + action(subtable); + subtable = (ACPI_SUBTABLE_HEADER *)((char *)subtable + + subtable->Length); + } +} + +static void acpi_print_cpu(u_char cpu_id) { @@ -190,219 +215,264 @@ acpi_print_cpu(u_char cpu_id) } static void -acpi_print_local_apic(u_char cpu_id, u_char apic_id, u_int32_t flags) +acpi_print_cpu_uid(uint32_t uid, char *uid_string) { - acpi_print_cpu(cpu_id); + + printf("\tUID=%d", uid); + if (uid_string != NULL) + printf(" (%s)", uid_string); + printf("\n"); +} + +static void +acpi_print_local_apic(uint32_t apic_id, uint32_t flags) +{ + printf("\tFlags={"); - if (flags & ACPI_MADT_APIC_LOCAL_FLAG_ENABLED) + if (flags & ACPI_MADT_ENABLED) printf("ENABLED"); else printf("DISABLED"); printf("}\n"); - printf("\tAPIC ID=%d\n", (u_int)apic_id); + printf("\tAPIC ID=%d\n", apic_id); } static void -acpi_print_io_apic(u_char apic_id, u_int32_t int_base, u_int64_t apic_addr) +acpi_print_io_apic(uint32_t apic_id, uint32_t int_base, uint64_t apic_addr) { - printf("\tAPIC ID=%d\n", (u_int)apic_id); + + printf("\tAPIC ID=%d\n", apic_id); printf("\tINT BASE=%d\n", int_base); - printf("\tADDR=0x%016jx\n", apic_addr); + printf("\tADDR=0x%016jx\n", (uintmax_t)apic_addr); } static void -acpi_print_mps_flags(u_int16_t flags) +acpi_print_mps_flags(uint16_t flags) { printf("\tFlags={Polarity="); - switch (flags & MPS_INT_FLAG_POLARITY_MASK) { - case MPS_INT_FLAG_POLARITY_CONFORM: + switch (flags & ACPI_MADT_POLARITY_MASK) { + case ACPI_MADT_POLARITY_CONFORMS: printf("conforming"); break; - case MPS_INT_FLAG_POLARITY_HIGH: + case ACPI_MADT_POLARITY_ACTIVE_HIGH: printf("active-hi"); break; - case MPS_INT_FLAG_POLARITY_LOW: + case ACPI_MADT_POLARITY_ACTIVE_LOW: printf("active-lo"); break; default: - printf("0x%x", flags & MPS_INT_FLAG_POLARITY_MASK); + printf("0x%x", flags & ACPI_MADT_POLARITY_MASK); break; } printf(", Trigger="); - switch (flags & MPS_INT_FLAG_TRIGGER_MASK) { - case MPS_INT_FLAG_TRIGGER_CONFORM: + switch (flags & ACPI_MADT_TRIGGER_MASK) { + case ACPI_MADT_TRIGGER_CONFORMS: printf("conforming"); break; - case MPS_INT_FLAG_TRIGGER_EDGE: + case ACPI_MADT_TRIGGER_EDGE: printf("edge"); break; - case MPS_INT_FLAG_TRIGGER_LEVEL: + case ACPI_MADT_TRIGGER_LEVEL: printf("level"); break; default: - printf("0x%x", (flags & MPS_INT_FLAG_TRIGGER_MASK) >> 2); + printf("0x%x", (flags & ACPI_MADT_TRIGGER_MASK) >> 2); } printf("}\n"); } static void -acpi_print_intr(u_int32_t intr, u_int16_t mps_flags) +acpi_print_intr(uint32_t intr, uint16_t mps_flags) { - printf("\tINTR=%d\n", (u_int)intr); + printf("\tINTR=%d\n", intr); + acpi_print_mps_flags(mps_flags); +} + +static void +acpi_print_local_nmi(u_int lint, uint16_t mps_flags) +{ + + printf("\tLINT Pin=%d\n", lint); acpi_print_mps_flags(mps_flags); } const char *apic_types[] = { "Local APIC", "IO APIC", "INT Override", "NMI", - "Local NMI", "Local APIC Override", "IO SAPIC", - "Local SAPIC", "Platform Interrupt" }; -const char *platform_int_types[] = { "PMI", "INIT", + "Local APIC NMI", "Local APIC Override", + "IO SAPIC", "Local SAPIC", "Platform Interrupt", + "Local X2APIC", "Local X2APIC NMI" }; +const char *platform_int_types[] = { "0 (unknown)", "PMI", "INIT", "Corrected Platform Error" }; static void -acpi_print_apic(struct MADT_APIC *mp) +acpi_print_madt(ACPI_SUBTABLE_HEADER *mp) { - - if (mp->type < sizeof(apic_types) / sizeof(apic_types[0])) - printf("\tType=%s\n", apic_types[mp->type]); + ACPI_MADT_LOCAL_APIC *lapic; + ACPI_MADT_IO_APIC *ioapic; + ACPI_MADT_INTERRUPT_OVERRIDE *over; + ACPI_MADT_NMI_SOURCE *nmi; + ACPI_MADT_LOCAL_APIC_NMI *lapic_nmi; + ACPI_MADT_LOCAL_APIC_OVERRIDE *lapic_over; + ACPI_MADT_IO_SAPIC *iosapic; + ACPI_MADT_LOCAL_SAPIC *lsapic; + ACPI_MADT_INTERRUPT_SOURCE *isrc; + ACPI_MADT_LOCAL_X2APIC *x2apic; + ACPI_MADT_LOCAL_X2APIC_NMI *x2apic_nmi; + + if (mp->Type < sizeof(apic_types) / sizeof(apic_types[0])) + printf("\tType=%s\n", apic_types[mp->Type]); else - printf("\tType=%d (unknown)\n", mp->type); - switch (mp->type) { - case ACPI_MADT_APIC_TYPE_LOCAL_APIC: - acpi_print_local_apic(mp->body.local_apic.cpu_id, - mp->body.local_apic.apic_id, mp->body.local_apic.flags); + printf("\tType=%d (unknown)\n", mp->Type); + switch (mp->Type) { + case ACPI_MADT_TYPE_LOCAL_APIC: + lapic = (ACPI_MADT_LOCAL_APIC *)mp; + acpi_print_cpu(lapic->ProcessorId); + acpi_print_local_apic(lapic->Id, lapic->LapicFlags); break; - case ACPI_MADT_APIC_TYPE_IO_APIC: - acpi_print_io_apic(mp->body.io_apic.apic_id, - mp->body.io_apic.int_base, - mp->body.io_apic.apic_addr); + case ACPI_MADT_TYPE_IO_APIC: + ioapic = (ACPI_MADT_IO_APIC *)mp; + acpi_print_io_apic(ioapic->Id, ioapic->GlobalIrqBase, + ioapic->Address); break; - case ACPI_MADT_APIC_TYPE_INT_OVERRIDE: - printf("\tBUS=%d\n", (u_int)mp->body.int_override.bus); - printf("\tIRQ=%d\n", (u_int)mp->body.int_override.source); - acpi_print_intr(mp->body.int_override.intr, - mp->body.int_override.mps_flags); + case ACPI_MADT_TYPE_INTERRUPT_OVERRIDE: + over = (ACPI_MADT_INTERRUPT_OVERRIDE *)mp; + printf("\tBUS=%d\n", (u_int)over->Bus); + printf("\tIRQ=%d\n", (u_int)over->SourceIrq); + acpi_print_intr(over->GlobalIrq, over->IntiFlags); break; - case ACPI_MADT_APIC_TYPE_NMI: - acpi_print_intr(mp->body.nmi.intr, mp->body.nmi.mps_flags); + case ACPI_MADT_TYPE_NMI_SOURCE: + nmi = (ACPI_MADT_NMI_SOURCE *)mp; + acpi_print_intr(nmi->GlobalIrq, nmi->IntiFlags); break; - case ACPI_MADT_APIC_TYPE_LOCAL_NMI: - acpi_print_cpu(mp->body.local_nmi.cpu_id); - printf("\tLINT Pin=%d\n", mp->body.local_nmi.lintpin); - acpi_print_mps_flags(mp->body.local_nmi.mps_flags); + case ACPI_MADT_TYPE_LOCAL_APIC_NMI: + lapic_nmi = (ACPI_MADT_LOCAL_APIC_NMI *)mp; + acpi_print_cpu(lapic_nmi->ProcessorId); + acpi_print_local_nmi(lapic_nmi->Lint, lapic_nmi->IntiFlags); break; - case ACPI_MADT_APIC_TYPE_LOCAL_OVERRIDE: + case ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE: + lapic_over = (ACPI_MADT_LOCAL_APIC_OVERRIDE *)mp; printf("\tLocal APIC ADDR=0x%016jx\n", - mp->body.local_apic_override.apic_addr); + (uintmax_t)lapic_over->Address); + break; + case ACPI_MADT_TYPE_IO_SAPIC: + iosapic = (ACPI_MADT_IO_SAPIC *)mp; + acpi_print_io_apic(iosapic->Id, iosapic->GlobalIrqBase, + iosapic->Address); + break; + case ACPI_MADT_TYPE_LOCAL_SAPIC: + lsapic = (ACPI_MADT_LOCAL_SAPIC *)mp; + acpi_print_cpu(lsapic->ProcessorId); + acpi_print_local_apic(lsapic->Id, lsapic->LapicFlags); + printf("\tAPIC EID=%d\n", (u_int)lsapic->Eid); + if (mp->Length > __offsetof(ACPI_MADT_LOCAL_SAPIC, Uid)) + acpi_print_cpu_uid(lsapic->Uid, lsapic->UidString); break; - case ACPI_MADT_APIC_TYPE_IO_SAPIC: - acpi_print_io_apic(mp->body.io_sapic.apic_id, - mp->body.io_sapic.int_base, - mp->body.io_sapic.apic_addr); + case ACPI_MADT_TYPE_INTERRUPT_SOURCE: + isrc = (ACPI_MADT_INTERRUPT_SOURCE *)mp; + if (isrc->Type < sizeof(platform_int_types) / + sizeof(platform_int_types[0])) + printf("\tType=%s\n", platform_int_types[isrc->Type]); + else + printf("\tType=%d (unknown)\n", isrc->Type); + printf("\tAPIC ID=%d\n", (u_int)isrc->Id); + printf("\tAPIC EID=%d\n", (u_int)isrc->Eid); + printf("\tSAPIC Vector=%d\n", (u_int)isrc->IoSapicVector); + acpi_print_intr(isrc->GlobalIrq, isrc->IntiFlags); break; - case ACPI_MADT_APIC_TYPE_LOCAL_SAPIC: - acpi_print_local_apic(mp->body.local_sapic.cpu_id, - mp->body.local_sapic.apic_id, mp->body.local_sapic.flags); - printf("\tAPIC EID=%d\n", (u_int)mp->body.local_sapic.apic_eid); + case ACPI_MADT_TYPE_LOCAL_X2APIC: + x2apic = (ACPI_MADT_LOCAL_X2APIC *)mp; + acpi_print_cpu_uid(x2apic->Uid, NULL); + acpi_print_local_apic(x2apic->LocalApicId, x2apic->LapicFlags); break; - case ACPI_MADT_APIC_TYPE_INT_SRC: - printf("\tType=%s\n", - platform_int_types[mp->body.int_src.type]); - printf("\tCPU ID=%d\n", (u_int)mp->body.int_src.cpu_id); - printf("\tCPU EID=%d\n", (u_int)mp->body.int_src.cpu_id); - printf("\tSAPIC Vector=%d\n", - (u_int)mp->body.int_src.sapic_vector); - acpi_print_intr(mp->body.int_src.intr, - mp->body.int_src.mps_flags); + case ACPI_MADT_TYPE_LOCAL_X2APIC_NMI: + x2apic_nmi = (ACPI_MADT_LOCAL_X2APIC_NMI *)mp; + acpi_print_cpu_uid(x2apic_nmi->Uid, NULL); + acpi_print_local_nmi(x2apic_nmi->Lint, x2apic_nmi->IntiFlags); break; } } static void -acpi_handle_apic(struct ACPIsdt *sdp) +acpi_handle_madt(ACPI_TABLE_HEADER *sdp) { - struct MADTbody *madtp; - struct MADT_APIC *madt_apicp; + ACPI_TABLE_MADT *madt; printf(BEGIN_COMMENT); acpi_print_sdt(sdp); - madtp = (struct MADTbody *) sdp->body; - printf("\tLocal APIC ADDR=0x%08x\n", madtp->lapic_addr); + madt = (ACPI_TABLE_MADT *)sdp; + printf("\tLocal APIC ADDR=0x%08x\n", madt->Address); printf("\tFlags={"); - if (madtp->flags & ACPI_APIC_FLAG_PCAT_COMPAT) + if (madt->Flags & ACPI_MADT_PCAT_COMPAT) printf("PC-AT"); printf("}\n"); - madt_apicp = (struct MADT_APIC *)madtp->body; - while (((uintptr_t)madt_apicp) - ((uintptr_t)sdp) < sdp->len) { - printf("\n"); - acpi_print_apic(madt_apicp); - madt_apicp = (struct MADT_APIC *) ((char *)madt_apicp + - madt_apicp->len); - } + acpi_walk_subtables(sdp, (madt + 1), acpi_print_madt); printf(END_COMMENT); } static void -acpi_handle_hpet(struct ACPIsdt *sdp) +acpi_handle_hpet(ACPI_TABLE_HEADER *sdp) { - struct HPETbody *hpetp; + ACPI_TABLE_HPET *hpet; printf(BEGIN_COMMENT); acpi_print_sdt(sdp); - hpetp = (struct HPETbody *) sdp->body; - printf("\tHPET Number=%d\n", hpetp->hpet_number); + hpet = (ACPI_TABLE_HPET *)sdp; + printf("\tHPET Number=%d\n", hpet->Sequence); printf("\tADDR="); - acpi_print_gas(&hpetp->genaddr); - printf("\tHW Rev=0x%x\n", hpetp->block_hwrev); - printf("\tComparitors=%d\n", hpetp->block_comparitors); - printf("\tCounter Size=%d\n", hpetp->block_counter_size); + acpi_print_gas(&hpet->Address); + printf("\tHW Rev=0x%x\n", hpet->Id & ACPI_HPET_ID_HARDWARE_REV_ID); + printf("\tComparators=%d\n", (hpet->Id & ACPI_HPET_ID_COMPARATORS) >> + 8); + printf("\tCounter Size=%d\n", hpet->Id & ACPI_HPET_ID_COUNT_SIZE_CAP ? + 1 : 0); printf("\tLegacy IRQ routing capable={"); - if (hpetp->block_legacy_capable) + if (hpet->Id & ACPI_HPET_ID_LEGACY_CAPABLE) printf("TRUE}\n"); else printf("FALSE}\n"); - printf("\tPCI Vendor ID=0x%04x\n", hpetp->block_pcivendor); - printf("\tMinimal Tick=%d\n", hpetp->clock_tick); + printf("\tPCI Vendor ID=0x%04x\n", hpet->Id >> 16); + printf("\tMinimal Tick=%d\n", hpet->MinimumTick); printf(END_COMMENT); } static void -acpi_handle_ecdt(struct ACPIsdt *sdp) +acpi_handle_ecdt(ACPI_TABLE_HEADER *sdp) { - struct ECDTbody *ecdt; + ACPI_TABLE_ECDT *ecdt; printf(BEGIN_COMMENT); acpi_print_sdt(sdp); - ecdt = (struct ECDTbody *) sdp->body; + ecdt = (ACPI_TABLE_ECDT *)sdp; printf("\tEC_CONTROL="); - acpi_print_gas(&ecdt->ec_control); + acpi_print_gas(&ecdt->Control); printf("\n\tEC_DATA="); - acpi_print_gas(&ecdt->ec_data); - printf("\n\tUID=%#x, ", ecdt->uid); - printf("GPE_BIT=%#x\n", ecdt->gpe_bit); - printf("\tEC_ID=%s\n", ecdt->ec_id); + acpi_print_gas(&ecdt->Data); + printf("\n\tUID=%#x, ", ecdt->Uid); + printf("GPE_BIT=%#x\n", ecdt->Gpe); + printf("\tEC_ID=%s\n", ecdt->Id); printf(END_COMMENT); } static void -acpi_handle_mcfg(struct ACPIsdt *sdp) +acpi_handle_mcfg(ACPI_TABLE_HEADER *sdp) { - struct MCFGbody *mcfg; - u_int i, e; + ACPI_TABLE_MCFG *mcfg; + ACPI_MCFG_ALLOCATION *alloc; + u_int i, entries; printf(BEGIN_COMMENT); acpi_print_sdt(sdp); - mcfg = (struct MCFGbody *) sdp->body; - - e = (sdp->len - ((caddr_t)&mcfg->s[0] - (caddr_t)sdp)) / - sizeof(*mcfg->s); - for (i = 0; i < e; i++, mcfg++) { + mcfg = (ACPI_TABLE_MCFG *)sdp; + entries = (sdp->Length - sizeof(ACPI_TABLE_MCFG)) / + sizeof(ACPI_MCFG_ALLOCATION); + alloc = (ACPI_MCFG_ALLOCATION *)(mcfg + 1); + for (i = 0; i < entries; i++, alloc++) { printf("\n"); - printf("\tBase Address=0x%016jx\n", mcfg->s[i].baseaddr); - printf("\tSegment Group=0x%04x\n", mcfg->s[i].seg_grp); - printf("\tStart Bus=%d\n", mcfg->s[i].start); - printf("\tEnd Bus=%d\n", mcfg->s[i].end); + printf("\tBase Address=0x%016jx\n", alloc->Address); + printf("\tSegment Group=0x%04x\n", alloc->PciSegment); + printf("\tStart Bus=%d\n", alloc->StartBusNumber); + printf("\tEnd Bus=%d\n", alloc->EndBusNumber); } printf(END_COMMENT); } @@ -423,108 +493,109 @@ acpi_print_srat_cpu(uint32_t apic_id, uint32_t proximity_domain, } static void -acpi_print_srat_memory(struct SRAT_memory *mp) +acpi_print_srat_memory(ACPI_SRAT_MEM_AFFINITY *mp) { printf("\tFlags={"); - if (mp->flags & ACPI_SRAT_MEM_ENABLED) + if (mp->Flags & ACPI_SRAT_MEM_ENABLED) printf("ENABLED"); else printf("DISABLED"); - if (mp->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) + if (mp->Flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) printf(",HOT_PLUGGABLE"); - if (mp->flags & ACPI_SRAT_MEM_NON_VOLATILE) + if (mp->Flags & ACPI_SRAT_MEM_NON_VOLATILE) printf(",NON_VOLATILE"); printf("}\n"); - printf("\tBase Address=0x%016jx\n", (uintmax_t)mp->base_address); - printf("\tLength=0x%016jx\n", (uintmax_t)mp->length); - printf("\tProximity Domain=%d\n", mp->proximity_domain); + printf("\tBase Address=0x%016jx\n", (uintmax_t)mp->BaseAddress); + printf("\tLength=0x%016jx\n", (uintmax_t)mp->Length); + printf("\tProximity Domain=%d\n", mp->ProximityDomain); } const char *srat_types[] = { "CPU", "Memory", "X2APIC" }; static void -acpi_print_srat(struct SRATentry *srat) +acpi_print_srat(ACPI_SUBTABLE_HEADER *srat) { + ACPI_SRAT_CPU_AFFINITY *cpu; + ACPI_SRAT_X2APIC_CPU_AFFINITY *x2apic; - if (srat->type < sizeof(srat_types) / sizeof(srat_types[0])) - printf("\tType=%s\n", srat_types[srat->type]); + if (srat->Type < sizeof(srat_types) / sizeof(srat_types[0])) + printf("\tType=%s\n", srat_types[srat->Type]); else - printf("\tType=%d (unknown)\n", srat->type); - switch (srat->type) { + printf("\tType=%d (unknown)\n", srat->Type); + switch (srat->Type) { case ACPI_SRAT_TYPE_CPU_AFFINITY: - acpi_print_srat_cpu(srat->body.cpu.apic_id, - srat->body.cpu.proximity_domain_hi[2] << 24 | - srat->body.cpu.proximity_domain_hi[1] << 16 | - srat->body.cpu.proximity_domain_hi[0] << 0 | - srat->body.cpu.proximity_domain_lo, srat->body.cpu.flags); + cpu = (ACPI_SRAT_CPU_AFFINITY *)srat; + acpi_print_srat_cpu(cpu->ApicId, + cpu->ProximityDomainHi[2] << 24 | + cpu->ProximityDomainHi[1] << 16 | + cpu->ProximityDomainHi[0] << 0 | + cpu->ProximityDomainLo, cpu->Flags); break; case ACPI_SRAT_TYPE_MEMORY_AFFINITY: - acpi_print_srat_memory(&srat->body.mem); + acpi_print_srat_memory((ACPI_SRAT_MEM_AFFINITY *)srat); break; case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY: - acpi_print_srat_cpu(srat->body.x2apic.apic_id, - srat->body.x2apic.proximity_domain, - srat->body.x2apic.flags); + x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)srat; + acpi_print_srat_cpu(x2apic->ApicId, x2apic->ProximityDomain, + x2apic->Flags); break; } } static void -acpi_handle_srat(struct ACPIsdt *sdp) +acpi_handle_srat(ACPI_TABLE_HEADER *sdp) { - struct SRATbody *sratp; - struct SRATentry *entry; + ACPI_TABLE_SRAT *srat; printf(BEGIN_COMMENT); acpi_print_sdt(sdp); - sratp = (struct SRATbody *)sdp->body; - printf("\tTable Revision=%d\n", sratp->table_revision); - entry = sratp->body; - while (((uintptr_t)entry) - ((uintptr_t)sdp) < sdp->len) { - printf("\n"); - acpi_print_srat(entry); - entry = (struct SRATentry *)((char *)entry + entry->len); - } + srat = (ACPI_TABLE_SRAT *)sdp; + printf("\tTable Revision=%d\n", srat->TableRevision); + acpi_walk_subtables(sdp, (srat + 1), acpi_print_srat); printf(END_COMMENT); } static void -acpi_print_sdt(struct ACPIsdt *sdp) +acpi_print_sdt(ACPI_TABLE_HEADER *sdp) { printf(" "); - acpi_print_string(sdp->signature, 4); + acpi_print_string(sdp->Signature, ACPI_NAME_SIZE); printf(": Length=%d, Revision=%d, Checksum=%d,\n", - sdp->len, sdp->rev, sdp->check); + sdp->Length, sdp->Revision, sdp->Checksum); printf("\tOEMID="); - acpi_print_string(sdp->oemid, 6); + acpi_print_string(sdp->OemId, ACPI_OEM_ID_SIZE); printf(", OEM Table ID="); - acpi_print_string(sdp->oemtblid, 8); - printf(", OEM Revision=0x%x,\n", sdp->oemrev); + acpi_print_string(sdp->OemTableId, ACPI_OEM_TABLE_ID_SIZE); + printf(", OEM Revision=0x%x,\n", sdp->OemRevision); printf("\tCreator ID="); - acpi_print_string(sdp->creator, 4); - printf(", Creator Revision=0x%x\n", sdp->crerev); + acpi_print_string(sdp->AslCompilerId, ACPI_NAME_SIZE); + printf(", Creator Revision=0x%x\n", sdp->AslCompilerRevision); } static void -acpi_print_rsdt(struct ACPIsdt *rsdp) +acpi_print_rsdt(ACPI_TABLE_HEADER *rsdp) { + ACPI_TABLE_RSDT *rsdt; + ACPI_TABLE_XSDT *xsdt; int i, entries; u_long addr; + rsdt = (ACPI_TABLE_RSDT *)rsdp; + xsdt = (ACPI_TABLE_XSDT *)rsdp; printf(BEGIN_COMMENT); acpi_print_sdt(rsdp); - entries = (rsdp->len - SIZEOF_SDT_HDR) / addr_size; + entries = (rsdp->Length - sizeof(ACPI_TABLE_HEADER)) / addr_size; printf("\tEntries={ "); for (i = 0; i < entries; i++) { if (i > 0) printf(", "); switch (addr_size) { case 4: - addr = le32dec((char*)rsdp->body + i * addr_size); + addr = le32toh(rsdt->TableOffsetEntry[i]); break; case 8: - addr = le64dec((char*)rsdp->body + i * addr_size); + addr = le64toh(xsdt->TableOffsetEntry[i]); break; default: addr = 0; @@ -542,138 +613,147 @@ static const char *acpi_pm_profiles[] = { }; static void -acpi_print_fadt(struct ACPIsdt *sdp) +acpi_print_fadt(ACPI_TABLE_HEADER *sdp) { - struct FADTbody *fadt; + ACPI_TABLE_FADT *fadt; const char *pm; char sep; - fadt = (struct FADTbody *)sdp->body; + fadt = (ACPI_TABLE_FADT *)sdp; printf(BEGIN_COMMENT); acpi_print_sdt(sdp); - printf(" \tFACS=0x%x, DSDT=0x%x\n", fadt->facs_ptr, - fadt->dsdt_ptr); - printf("\tINT_MODEL=%s\n", fadt->int_model ? "APIC" : "PIC"); - if (fadt->pm_profile >= sizeof(acpi_pm_profiles) / sizeof(char *)) + printf(" \tFACS=0x%x, DSDT=0x%x\n", fadt->Facs, + fadt->Dsdt); + printf("\tINT_MODEL=%s\n", fadt->Model ? "APIC" : "PIC"); + if (fadt->PreferredProfile >= sizeof(acpi_pm_profiles) / sizeof(char *)) pm = "Reserved"; else - pm = acpi_pm_profiles[fadt->pm_profile]; - printf("\tPreferred_PM_Profile=%s (%d)\n", pm, fadt->pm_profile); - printf("\tSCI_INT=%d\n", fadt->sci_int); - printf("\tSMI_CMD=0x%x, ", fadt->smi_cmd); - printf("ACPI_ENABLE=0x%x, ", fadt->acpi_enable); - printf("ACPI_DISABLE=0x%x, ", fadt->acpi_disable); - printf("S4BIOS_REQ=0x%x\n", fadt->s4biosreq); - printf("\tPSTATE_CNT=0x%x\n", fadt->pstate_cnt); + pm = acpi_pm_profiles[fadt->PreferredProfile]; + printf("\tPreferred_PM_Profile=%s (%d)\n", pm, fadt->PreferredProfile); + printf("\tSCI_INT=%d\n", fadt->SciInterrupt); + printf("\tSMI_CMD=0x%x, ", fadt->SmiCommand); + printf("ACPI_ENABLE=0x%x, ", fadt->AcpiEnable); + printf("ACPI_DISABLE=0x%x, ", fadt->AcpiDisable); + printf("S4BIOS_REQ=0x%x\n", fadt->S4BiosRequest); + printf("\tPSTATE_CNT=0x%x\n", fadt->PstateControl); printf("\tPM1a_EVT_BLK=0x%x-0x%x\n", - fadt->pm1a_evt_blk, - fadt->pm1a_evt_blk + fadt->pm1_evt_len - 1); - if (fadt->pm1b_evt_blk != 0) + fadt->Pm1aEventBlock, + fadt->Pm1aEventBlock + fadt->Pm1EventLength - 1); + if (fadt->Pm1bEventBlock != 0) printf("\tPM1b_EVT_BLK=0x%x-0x%x\n", - fadt->pm1b_evt_blk, - fadt->pm1b_evt_blk + fadt->pm1_evt_len - 1); + fadt->Pm1bEventBlock, + fadt->Pm1bEventBlock + fadt->Pm1EventLength - 1); printf("\tPM1a_CNT_BLK=0x%x-0x%x\n", - fadt->pm1a_cnt_blk, - fadt->pm1a_cnt_blk + fadt->pm1_cnt_len - 1); - if (fadt->pm1b_cnt_blk != 0) + fadt->Pm1aControlBlock, + fadt->Pm1aControlBlock + fadt->Pm1ControlLength - 1); + if (fadt->Pm1bControlBlock != 0) printf("\tPM1b_CNT_BLK=0x%x-0x%x\n", - fadt->pm1b_cnt_blk, - fadt->pm1b_cnt_blk + fadt->pm1_cnt_len - 1); - if (fadt->pm2_cnt_blk != 0) + fadt->Pm1bControlBlock, + fadt->Pm1bControlBlock + fadt->Pm1ControlLength - 1); + if (fadt->Pm2ControlBlock != 0) printf("\tPM2_CNT_BLK=0x%x-0x%x\n", - fadt->pm2_cnt_blk, - fadt->pm2_cnt_blk + fadt->pm2_cnt_len - 1); + fadt->Pm2ControlBlock, + fadt->Pm2ControlBlock + fadt->Pm2ControlLength - 1); printf("\tPM_TMR_BLK=0x%x-0x%x\n", - fadt->pm_tmr_blk, - fadt->pm_tmr_blk + fadt->pm_tmr_len - 1); - if (fadt->gpe0_blk != 0) + fadt->PmTimerBlock, + fadt->PmTimerBlock + fadt->PmTimerLength - 1); + if (fadt->Gpe0Block != 0) printf("\tGPE0_BLK=0x%x-0x%x\n", - fadt->gpe0_blk, - fadt->gpe0_blk + fadt->gpe0_len - 1); - if (fadt->gpe1_blk != 0) + fadt->Gpe0Block, + fadt->Gpe0Block + fadt->Gpe0BlockLength - 1); + if (fadt->Gpe1Block != 0) printf("\tGPE1_BLK=0x%x-0x%x, GPE1_BASE=%d\n", - fadt->gpe1_blk, - fadt->gpe1_blk + fadt->gpe1_len - 1, - fadt->gpe1_base); - if (fadt->cst_cnt != 0) - printf("\tCST_CNT=0x%x\n", fadt->cst_cnt); + fadt->Gpe1Block, + fadt->Gpe1Block + fadt->Gpe1BlockLength - 1, + fadt->Gpe1Base); + if (fadt->CstControl != 0) + printf("\tCST_CNT=0x%x\n", fadt->CstControl); printf("\tP_LVL2_LAT=%d us, P_LVL3_LAT=%d us\n", - fadt->p_lvl2_lat, fadt->p_lvl3_lat); + fadt->C2Latency, fadt->C3Latency); printf("\tFLUSH_SIZE=%d, FLUSH_STRIDE=%d\n", - fadt->flush_size, fadt->flush_stride); + fadt->FlushSize, fadt->FlushStride); printf("\tDUTY_OFFSET=%d, DUTY_WIDTH=%d\n", - fadt->duty_off, fadt->duty_width); + fadt->DutyOffset, fadt->DutyWidth); printf("\tDAY_ALRM=%d, MON_ALRM=%d, CENTURY=%d\n", - fadt->day_alrm, fadt->mon_alrm, fadt->century); + fadt->DayAlarm, fadt->MonthAlarm, fadt->Century); #define PRINTFLAG(var, flag) do { \ - if ((var) & FADT_FLAG_## flag) { \ + if ((var) & ACPI_FADT_## flag) { \ printf("%c%s", sep, #flag); sep = ','; \ } \ } while (0) printf("\tIAPC_BOOT_ARCH="); sep = '{'; - PRINTFLAG(fadt->iapc_boot_arch, LEGACY_DEV); - PRINTFLAG(fadt->iapc_boot_arch, 8042); - if (fadt->iapc_boot_arch != 0) + PRINTFLAG(fadt->BootFlags, LEGACY_DEVICES); + PRINTFLAG(fadt->BootFlags, 8042); + PRINTFLAG(fadt->BootFlags, NO_VGA); + PRINTFLAG(fadt->BootFlags, NO_MSI); + PRINTFLAG(fadt->BootFlags, NO_ASPM); + if (fadt->BootFlags != 0) printf("}"); printf("\n"); printf("\tFlags="); sep = '{'; - PRINTFLAG(fadt->flags, WBINVD); - PRINTFLAG(fadt->flags, WBINVD_FLUSH); - PRINTFLAG(fadt->flags, PROC_C1); - PRINTFLAG(fadt->flags, P_LVL2_UP); - PRINTFLAG(fadt->flags, PWR_BUTTON); - PRINTFLAG(fadt->flags, SLP_BUTTON); - PRINTFLAG(fadt->flags, FIX_RTC); - PRINTFLAG(fadt->flags, RTC_S4); - PRINTFLAG(fadt->flags, TMR_VAL_EXT); - PRINTFLAG(fadt->flags, DCK_CAP); - PRINTFLAG(fadt->flags, RESET_REG); - PRINTFLAG(fadt->flags, SEALED_CASE); - PRINTFLAG(fadt->flags, HEADLESS); - PRINTFLAG(fadt->flags, CPU_SW_SLP); - if (fadt->flags != 0) + PRINTFLAG(fadt->Flags, WBINVD); + PRINTFLAG(fadt->Flags, WBINVD_FLUSH); + PRINTFLAG(fadt->Flags, C1_SUPPORTED); + PRINTFLAG(fadt->Flags, C2_MP_SUPPORTED); + PRINTFLAG(fadt->Flags, POWER_BUTTON); + PRINTFLAG(fadt->Flags, SLEEP_BUTTON); + PRINTFLAG(fadt->Flags, FIXED_RTC); + PRINTFLAG(fadt->Flags, S4_RTC_WAKE); + PRINTFLAG(fadt->Flags, 32BIT_TIMER); + PRINTFLAG(fadt->Flags, DOCKING_SUPPORTED); + PRINTFLAG(fadt->Flags, RESET_REGISTER); + PRINTFLAG(fadt->Flags, SEALED_CASE); + PRINTFLAG(fadt->Flags, HEADLESS); + PRINTFLAG(fadt->Flags, SLEEP_TYPE); + PRINTFLAG(fadt->Flags, PCI_EXPRESS_WAKE); + PRINTFLAG(fadt->Flags, PLATFORM_CLOCK); + PRINTFLAG(fadt->Flags, S4_RTC_VALID); + PRINTFLAG(fadt->Flags, REMOTE_POWER_ON); + PRINTFLAG(fadt->Flags, APIC_CLUSTER); + PRINTFLAG(fadt->Flags, APIC_PHYSICAL); + if (fadt->Flags != 0) printf("}\n"); #undef PRINTFLAG - if (fadt->flags & FADT_FLAG_RESET_REG) { + if (fadt->Flags & ACPI_FADT_RESET_REGISTER) { printf("\tRESET_REG="); - acpi_print_gas(&fadt->reset_reg); - printf(", RESET_VALUE=%#x\n", fadt->reset_value); + acpi_print_gas(&fadt->ResetRegister); + printf(", RESET_VALUE=%#x\n", fadt->ResetValue); } if (acpi_get_fadt_revision(fadt) > 1) { - printf("\tX_FACS=0x%08lx, ", (u_long)fadt->x_facs_ptr); - printf("X_DSDT=0x%08lx\n", (u_long)fadt->x_dsdt_ptr); + printf("\tX_FACS=0x%08lx, ", (u_long)fadt->XFacs); + printf("X_DSDT=0x%08lx\n", (u_long)fadt->XDsdt); printf("\tX_PM1a_EVT_BLK="); - acpi_print_gas(&fadt->x_pm1a_evt_blk); - if (fadt->x_pm1b_evt_blk.address != 0) { + acpi_print_gas(&fadt->XPm1aEventBlock); + if (fadt->XPm1bEventBlock.Address != 0) { printf("\n\tX_PM1b_EVT_BLK="); - acpi_print_gas(&fadt->x_pm1b_evt_blk); + acpi_print_gas(&fadt->XPm1bEventBlock); } printf("\n\tX_PM1a_CNT_BLK="); - acpi_print_gas(&fadt->x_pm1a_cnt_blk); - if (fadt->x_pm1b_cnt_blk.address != 0) { + acpi_print_gas(&fadt->XPm1aControlBlock); + if (fadt->XPm1bControlBlock.Address != 0) { printf("\n\tX_PM1b_CNT_BLK="); - acpi_print_gas(&fadt->x_pm1b_cnt_blk); + acpi_print_gas(&fadt->XPm1bControlBlock); } - if (fadt->x_pm1b_cnt_blk.address != 0) { + if (fadt->XPm2ControlBlock.Address != 0) { printf("\n\tX_PM2_CNT_BLK="); - acpi_print_gas(&fadt->x_pm2_cnt_blk); + acpi_print_gas(&fadt->XPm2ControlBlock); } printf("\n\tX_PM_TMR_BLK="); - acpi_print_gas(&fadt->x_pm_tmr_blk); - if (fadt->x_gpe0_blk.address != 0) { + acpi_print_gas(&fadt->XPmTimerBlock); + if (fadt->XGpe0Block.Address != 0) { printf("\n\tX_GPE0_BLK="); - acpi_print_gas(&fadt->x_gpe0_blk); + acpi_print_gas(&fadt->XGpe0Block); } - if (fadt->x_gpe1_blk.address != 0) { + if (fadt->XGpe1Block.Address != 0) { printf("\n\tX_GPE1_BLK="); - acpi_print_gas(&fadt->x_gpe1_blk); + acpi_print_gas(&fadt->XGpe1Block); } printf("\n"); } @@ -682,38 +762,38 @@ acpi_print_fadt(struct ACPIsdt *sdp) } static void -acpi_print_facs(struct FACSbody *facs) +acpi_print_facs(ACPI_TABLE_FACS *facs) { printf(BEGIN_COMMENT); - printf(" FACS:\tLength=%u, ", facs->len); - printf("HwSig=0x%08x, ", facs->hw_sig); - printf("Firm_Wake_Vec=0x%08x\n", facs->firm_wake_vec); + printf(" FACS:\tLength=%u, ", facs->Length); + printf("HwSig=0x%08x, ", facs->HardwareSignature); + printf("Firm_Wake_Vec=0x%08x\n", facs->FirmwareWakingVector); printf("\tGlobal_Lock="); - if (facs->global_lock != 0) { - if (facs->global_lock & FACS_FLAG_LOCK_PENDING) + if (facs->GlobalLock != 0) { + if (facs->GlobalLock & ACPI_GLOCK_PENDING) printf("PENDING,"); - if (facs->global_lock & FACS_FLAG_LOCK_OWNED) + if (facs->GlobalLock & ACPI_GLOCK_OWNED) printf("OWNED"); } printf("\n"); printf("\tFlags="); - if (facs->flags & FACS_FLAG_S4BIOS_F) + if (facs->Flags & ACPI_FACS_S4_BIOS_PRESENT) printf("S4BIOS"); printf("\n"); - if (facs->x_firm_wake_vec != 0) { + if (facs->XFirmwareWakingVector != 0) { printf("\tX_Firm_Wake_Vec=%08lx\n", - (u_long)facs->x_firm_wake_vec); + (u_long)facs->XFirmwareWakingVector); } - printf("\tVersion=%u\n", facs->version); + printf("\tVersion=%u\n", facs->Version); printf(END_COMMENT); } static void -acpi_print_dsdt(struct ACPIsdt *dsdp) +acpi_print_dsdt(ACPI_TABLE_HEADER *dsdp) { printf(BEGIN_COMMENT); acpi_print_sdt(dsdp); @@ -723,8 +803,8 @@ acpi_print_dsdt(struct ACPIsdt *dsdp) int acpi_checksum(void *p, size_t length) { - u_int8_t *bp; - u_int8_t sum; + uint8_t *bp; + uint8_t sum; bp = p; sum = 0; @@ -734,71 +814,77 @@ acpi_checksum(void *p, size_t length) return (sum); } -static struct ACPIsdt * +static ACPI_TABLE_HEADER * acpi_map_sdt(vm_offset_t pa) { - struct ACPIsdt *sp; + ACPI_TABLE_HEADER *sp; - sp = acpi_map_physical(pa, sizeof(struct ACPIsdt)); - sp = acpi_map_physical(pa, sp->len); + sp = acpi_map_physical(pa, sizeof(ACPI_TABLE_HEADER)); + sp = acpi_map_physical(pa, sp->Length); return (sp); } static void -acpi_print_rsd_ptr(struct ACPIrsdp *rp) +acpi_print_rsd_ptr(ACPI_TABLE_RSDP *rp) { printf(BEGIN_COMMENT); printf(" RSD PTR: OEM="); - acpi_print_string(rp->oem, 6); - printf(", ACPI_Rev=%s (%d)\n", rp->revision < 2 ? "1.0x" : "2.0x", - rp->revision); - if (rp->revision < 2) { - printf("\tRSDT=0x%08x, cksum=%u\n", rp->rsdt_addr, rp->sum); + acpi_print_string(rp->OemId, ACPI_OEM_ID_SIZE); + printf(", ACPI_Rev=%s (%d)\n", rp->Revision < 2 ? "1.0x" : "2.0x", + rp->Revision); + if (rp->Revision < 2) { + printf("\tRSDT=0x%08x, cksum=%u\n", rp->RsdtPhysicalAddress, + rp->Checksum); } else { printf("\tXSDT=0x%08lx, length=%u, cksum=%u\n", - (u_long)rp->xsdt_addr, rp->length, rp->xsum); + (u_long)rp->XsdtPhysicalAddress, rp->Length, + rp->ExtendedChecksum); } printf(END_COMMENT); } static void -acpi_handle_rsdt(struct ACPIsdt *rsdp) +acpi_handle_rsdt(ACPI_TABLE_HEADER *rsdp) { - struct ACPIsdt *sdp; + ACPI_TABLE_HEADER *sdp; + ACPI_TABLE_RSDT *rsdt; + ACPI_TABLE_XSDT *xsdt; vm_offset_t addr; int entries, i; acpi_print_rsdt(rsdp); - entries = (rsdp->len - SIZEOF_SDT_HDR) / addr_size; + rsdt = (ACPI_TABLE_RSDT *)rsdp; + xsdt = (ACPI_TABLE_XSDT *)rsdp; + entries = (rsdp->Length - sizeof(ACPI_TABLE_HEADER)) / addr_size; for (i = 0; i < entries; i++) { switch (addr_size) { case 4: - addr = le32dec((char*)rsdp->body + i * addr_size); + addr = le32toh(rsdt->TableOffsetEntry[i]); break; case 8: - addr = le64dec((char*)rsdp->body + i * addr_size); + addr = le64toh(xsdt->TableOffsetEntry[i]); break; default: assert((addr = 0)); } - sdp = (struct ACPIsdt *)acpi_map_sdt(addr); - if (acpi_checksum(sdp, sdp->len)) { + sdp = (ACPI_TABLE_HEADER *)acpi_map_sdt(addr); + if (acpi_checksum(sdp, sdp->Length)) { warnx("RSDT entry %d (sig %.4s) is corrupt", i, - sdp->signature); + sdp->Signature); continue; } - if (!memcmp(sdp->signature, "FACP", 4)) + if (!memcmp(sdp->Signature, ACPI_SIG_FADT, 4)) acpi_handle_fadt(sdp); - else if (!memcmp(sdp->signature, "APIC", 4)) - acpi_handle_apic(sdp); - else if (!memcmp(sdp->signature, "HPET", 4)) + else if (!memcmp(sdp->Signature, ACPI_SIG_MADT, 4)) + acpi_handle_madt(sdp); + else if (!memcmp(sdp->Signature, ACPI_SIG_HPET, 4)) acpi_handle_hpet(sdp); - else if (!memcmp(sdp->signature, "ECDT", 4)) + else if (!memcmp(sdp->Signature, ACPI_SIG_ECDT, 4)) acpi_handle_ecdt(sdp); - else if (!memcmp(sdp->signature, "MCFG", 4)) + else if (!memcmp(sdp->Signature, ACPI_SIG_MCFG, 4)) acpi_handle_mcfg(sdp); - else if (!memcmp(sdp->signature, "SRAT", 4)) + else if (!memcmp(sdp->Signature, ACPI_SIG_SRAT, 4)) acpi_handle_srat(sdp); else { printf(BEGIN_COMMENT); @@ -808,11 +894,11 @@ acpi_handle_rsdt(struct ACPIsdt *rsdp) } } -struct ACPIsdt * +ACPI_TABLE_HEADER * sdt_load_devmem(void) { - struct ACPIrsdp *rp; - struct ACPIsdt *rsdp; + ACPI_TABLE_RSDP *rp; + ACPI_TABLE_HEADER *rsdp; rp = acpi_find_rsd_ptr(); if (!rp) @@ -820,16 +906,16 @@ sdt_load_devmem(void) if (tflag) acpi_print_rsd_ptr(rp); - if (rp->revision < 2) { - rsdp = (struct ACPIsdt *)acpi_map_sdt(rp->rsdt_addr); - if (memcmp(rsdp->signature, "RSDT", 4) != 0 || - acpi_checksum(rsdp, rsdp->len) != 0) + if (rp->Revision < 2) { + rsdp = (ACPI_TABLE_HEADER *)acpi_map_sdt(rp->RsdtPhysicalAddress); + if (memcmp(rsdp->Signature, "RSDT", 4) != 0 || + acpi_checksum(rsdp, rsdp->Length) != 0) errx(1, "RSDT is corrupted"); addr_size = sizeof(uint32_t); } else { - rsdp = (struct ACPIsdt *)acpi_map_sdt(rp->xsdt_addr); - if (memcmp(rsdp->signature, "XSDT", 4) != 0 || - acpi_checksum(rsdp, rsdp->len) != 0) + rsdp = (ACPI_TABLE_HEADER *)acpi_map_sdt(rp->XsdtPhysicalAddress); + if (memcmp(rsdp->Signature, "XSDT", 4) != 0 || + acpi_checksum(rsdp, rsdp->Length) != 0) errx(1, "XSDT is corrupted"); addr_size = sizeof(uint64_t); } @@ -838,37 +924,39 @@ sdt_load_devmem(void) /* Write the DSDT to a file, concatenating any SSDTs (if present). */ static int -write_dsdt(int fd, struct ACPIsdt *rsdt, struct ACPIsdt *dsdt) +write_dsdt(int fd, ACPI_TABLE_HEADER *rsdt, ACPI_TABLE_HEADER *dsdt) { - struct ACPIsdt sdt; - struct ACPIsdt *ssdt; + ACPI_TABLE_HEADER sdt; + ACPI_TABLE_HEADER *ssdt; uint8_t sum; /* Create a new checksum to account for the DSDT and any SSDTs. */ sdt = *dsdt; if (rsdt != NULL) { - sdt.check = 0; - sum = acpi_checksum(dsdt->body, dsdt->len - SIZEOF_SDT_HDR); - ssdt = sdt_from_rsdt(rsdt, "SSDT", NULL); + sdt.Checksum = 0; + sum = acpi_checksum(dsdt + 1, dsdt->Length - + sizeof(ACPI_TABLE_HEADER)); + ssdt = sdt_from_rsdt(rsdt, ACPI_SIG_SSDT, NULL); while (ssdt != NULL) { - sdt.len += ssdt->len - SIZEOF_SDT_HDR; - sum += acpi_checksum(ssdt->body, - ssdt->len - SIZEOF_SDT_HDR); - ssdt = sdt_from_rsdt(rsdt, "SSDT", ssdt); + sdt.Length += ssdt->Length - sizeof(ACPI_TABLE_HEADER); + sum += acpi_checksum(ssdt + 1, + ssdt->Length - sizeof(ACPI_TABLE_HEADER)); + ssdt = sdt_from_rsdt(rsdt, ACPI_SIG_SSDT, ssdt); } - sum += acpi_checksum(&sdt, SIZEOF_SDT_HDR); - sdt.check -= sum; + sum += acpi_checksum(&sdt, sizeof(ACPI_TABLE_HEADER)); + sdt.Checksum -= sum; } /* Write out the DSDT header and body. */ - write(fd, &sdt, SIZEOF_SDT_HDR); - write(fd, dsdt->body, dsdt->len - SIZEOF_SDT_HDR); + write(fd, &sdt, sizeof(ACPI_TABLE_HEADER)); + write(fd, dsdt + 1, dsdt->Length - sizeof(ACPI_TABLE_HEADER)); /* Write out any SSDTs (if present.) */ if (rsdt != NULL) { ssdt = sdt_from_rsdt(rsdt, "SSDT", NULL); while (ssdt != NULL) { - write(fd, ssdt->body, ssdt->len - SIZEOF_SDT_HDR); + write(fd, ssdt + 1, ssdt->Length - + sizeof(ACPI_TABLE_HEADER)); ssdt = sdt_from_rsdt(rsdt, "SSDT", ssdt); } } @@ -876,7 +964,7 @@ write_dsdt(int fd, struct ACPIsdt *rsdt, struct ACPIsdt *dsdt) } void -dsdt_save_file(char *outfile, struct ACPIsdt *rsdt, struct ACPIsdt *dsdp) +dsdt_save_file(char *outfile, ACPI_TABLE_HEADER *rsdt, ACPI_TABLE_HEADER *dsdp) { int fd; mode_t mode; @@ -893,7 +981,7 @@ dsdt_save_file(char *outfile, struct ACPIsdt *rsdt, struct ACPIsdt *dsdp) } void -aml_disassemble(struct ACPIsdt *rsdt, struct ACPIsdt *dsdp) +aml_disassemble(ACPI_TABLE_HEADER *rsdt, ACPI_TABLE_HEADER *dsdp) { char buf[PATH_MAX], tmpstr[PATH_MAX]; const char *tmpdir; @@ -949,40 +1037,44 @@ aml_disassemble(struct ACPIsdt *rsdt, struct ACPIsdt *dsdp) } void -sdt_print_all(struct ACPIsdt *rsdp) +sdt_print_all(ACPI_TABLE_HEADER *rsdp) { acpi_handle_rsdt(rsdp); } /* Fetch a table matching the given signature via the RSDT. */ -struct ACPIsdt * -sdt_from_rsdt(struct ACPIsdt *rsdt, const char *sig, struct ACPIsdt *last) +ACPI_TABLE_HEADER * +sdt_from_rsdt(ACPI_TABLE_HEADER *rsdp, const char *sig, ACPI_TABLE_HEADER *last) { - struct ACPIsdt *sdt; + ACPI_TABLE_HEADER *sdt; + ACPI_TABLE_RSDT *rsdt; + ACPI_TABLE_XSDT *xsdt; vm_offset_t addr; int entries, i; - entries = (rsdt->len - SIZEOF_SDT_HDR) / addr_size; + rsdt = (ACPI_TABLE_RSDT *)rsdp; + xsdt = (ACPI_TABLE_XSDT *)rsdp; + entries = (rsdp->Length - sizeof(ACPI_TABLE_HEADER)) / addr_size; for (i = 0; i < entries; i++) { switch (addr_size) { case 4: - addr = le32dec((char*)rsdt->body + i * addr_size); + addr = le32toh(rsdt->TableOffsetEntry[i]); break; case 8: - addr = le64dec((char*)rsdt->body + i * addr_size); + addr = le64toh(xsdt->TableOffsetEntry[i]); break; default: assert((addr = 0)); } - sdt = (struct ACPIsdt *)acpi_map_sdt(addr); + sdt = (ACPI_TABLE_HEADER *)acpi_map_sdt(addr); if (last != NULL) { if (sdt == last) last = NULL; continue; } - if (memcmp(sdt->signature, sig, strlen(sig))) + if (memcmp(sdt->Signature, sig, strlen(sig))) continue; - if (acpi_checksum(sdt, sdt->len)) + if (acpi_checksum(sdt, sdt->Length)) errx(1, "RSDT entry %d is corrupt", i); return (sdt); } @@ -990,17 +1082,17 @@ sdt_from_rsdt(struct ACPIsdt *rsdt, const char *sig, struct ACPIsdt *last) return (NULL); } -struct ACPIsdt * -dsdt_from_fadt(struct FADTbody *fadt) +ACPI_TABLE_HEADER * +dsdt_from_fadt(ACPI_TABLE_FADT *fadt) { - struct ACPIsdt *sdt; + ACPI_TABLE_HEADER *sdt; - /* Use the DSDT address if it is version 1, otherwise use X_DSDT. */ + /* Use the DSDT address if it is version 1, otherwise use XDSDT. */ if (acpi_get_fadt_revision(fadt) == 1) - sdt = (struct ACPIsdt *)acpi_map_sdt(fadt->dsdt_ptr); + sdt = (ACPI_TABLE_HEADER *)acpi_map_sdt(fadt->Dsdt); else - sdt = (struct ACPIsdt *)acpi_map_sdt(fadt->x_dsdt_ptr); - if (acpi_checksum(sdt, sdt->len)) + sdt = (ACPI_TABLE_HEADER *)acpi_map_sdt(fadt->XDsdt); + if (acpi_checksum(sdt, sdt->Length)) errx(1, "DSDT is corrupt\n"); return (sdt); } diff --git a/usr.sbin/acpi/acpidump/acpi_user.c b/usr.sbin/acpi/acpidump/acpi_user.c index d9d9c24..f0c47b4 100644 --- a/usr.sbin/acpi/acpidump/acpi_user.c +++ b/usr.sbin/acpi/acpidump/acpi_user.c @@ -94,40 +94,43 @@ acpi_user_find_mapping(vm_offset_t pa, size_t size) return (map); } -static struct ACPIrsdp * +static ACPI_TABLE_RSDP * acpi_get_rsdp(u_long addr) { - struct ACPIrsdp rsdp; + ACPI_TABLE_RSDP rsdp; size_t len; /* Read in the table signature and check it. */ pread(acpi_mem_fd, &rsdp, 8, addr); - if (memcmp(rsdp.signature, "RSD PTR ", 8)) + if (memcmp(rsdp.Signature, "RSD PTR ", 8)) return (NULL); /* Read the entire table. */ pread(acpi_mem_fd, &rsdp, sizeof(rsdp), addr); - /* Run the checksum only over the version 1 header. */ - if (acpi_checksum(&rsdp, 20)) + /* Check the standard checksum. */ + if (acpi_checksum(&rsdp, ACPI_RSDP_CHECKSUM_LENGTH) != 0) + return (NULL); + + /* Check extended checksum if table version >= 2. */ + if (rsdp.Revision >= 2 && + acpi_checksum(&rsdp, ACPI_RSDP_XCHECKSUM_LENGTH) != 0) return (NULL); /* If the revision is 0, assume a version 1 length. */ - if (rsdp.revision == 0) - len = 20; + if (rsdp.Revision == 0) + len = ACPI_RSDP_REV0_SIZE; else - len = rsdp.length; - - /* XXX Should handle ACPI 2.0 RSDP extended checksum here. */ + len = rsdp.Length; return (acpi_map_physical(addr, len)); } -static struct ACPIrsdp * +static ACPI_TABLE_RSDP * acpi_scan_rsd_ptr(void) { #if defined(__amd64__) || defined(__i386__) - struct ACPIrsdp *rsdp; + ACPI_TABLE_RSDP *rsdp; u_long addr, end; /* @@ -137,15 +140,15 @@ acpi_scan_rsd_ptr(void) * 1. EBDA (1 KB area addressed by the 16 bit pointer at 0x40E * 2. High memory (0xE0000 - 0xFFFFF) */ - addr = RSDP_EBDA_PTR; + addr = ACPI_EBDA_PTR_LOCATION; pread(acpi_mem_fd, &addr, sizeof(uint16_t), addr); addr <<= 4; - end = addr + RSDP_EBDA_SIZE; + end = addr + ACPI_EBDA_WINDOW_SIZE; for (; addr < end; addr += 16) if ((rsdp = acpi_get_rsdp(addr)) != NULL) return (rsdp); - addr = RSDP_HI_START; - end = addr + RSDP_HI_SIZE; + addr = ACPI_HI_RSDP_WINDOW_BASE; + end = addr + ACPI_HI_RSDP_WINDOW_SIZE; for (; addr < end; addr += 16) if ((rsdp = acpi_get_rsdp(addr)) != NULL) return (rsdp); @@ -156,10 +159,10 @@ acpi_scan_rsd_ptr(void) /* * Public interfaces */ -struct ACPIrsdp * +ACPI_TABLE_RSDP * acpi_find_rsd_ptr(void) { - struct ACPIrsdp *rsdp; + ACPI_TABLE_RSDP *rsdp; char buf[20]; u_long addr; size_t len; @@ -191,10 +194,10 @@ acpi_map_physical(vm_offset_t pa, size_t size) return (map->va + (pa - map->pa)); } -struct ACPIsdt * +ACPI_TABLE_HEADER * dsdt_load_file(char *infile) { - struct ACPIsdt *sdt; + ACPI_TABLE_HEADER *sdt; uint8_t *dp; struct stat sb; @@ -210,8 +213,9 @@ dsdt_load_file(char *infile) if (dp == NULL) errx(1, "mmap %s", infile); - sdt = (struct ACPIsdt *)dp; - if (strncmp(dp, "DSDT", 4) != 0 || acpi_checksum(sdt, sdt->len) != 0) + sdt = (ACPI_TABLE_HEADER *)dp; + if (strncmp(dp, ACPI_SIG_DSDT, 4) != 0 || + acpi_checksum(sdt, sdt->Length) != 0) return (NULL); return (sdt); diff --git a/usr.sbin/acpi/acpidump/acpidump.c b/usr.sbin/acpi/acpidump/acpidump.c index a601ac2..38844b6 100644 --- a/usr.sbin/acpi/acpidump/acpidump.c +++ b/usr.sbin/acpi/acpidump/acpidump.c @@ -54,9 +54,9 @@ usage(const char *progname) int main(int argc, char *argv[]) { + ACPI_TABLE_HEADER *rsdt, *sdt; char c, *progname; char *dsdt_input_file, *dsdt_output_file; - struct ACPIsdt *rsdt, *sdt; dsdt_input_file = dsdt_output_file = NULL; progname = argv[0]; @@ -117,8 +117,8 @@ main(int argc, char *argv[]) /* Translate RSDT to DSDT pointer */ if (dsdt_input_file == NULL) { - sdt = sdt_from_rsdt(rsdt, "FACP", NULL); - sdt = dsdt_from_fadt((struct FADTbody *)sdt->body); + sdt = sdt_from_rsdt(rsdt, ACPI_SIG_FADT, NULL); + sdt = dsdt_from_fadt((ACPI_TABLE_FADT *)sdt); } else { sdt = rsdt; rsdt = NULL; diff --git a/usr.sbin/acpi/acpidump/acpidump.h b/usr.sbin/acpi/acpidump/acpidump.h index 8eca6a8b..c75cabd 100644 --- a/usr.sbin/acpi/acpidump/acpidump.h +++ b/usr.sbin/acpi/acpidump/acpidump.h @@ -28,368 +28,55 @@ */ #ifndef _ACPIDUMP_H_ -#define _ACPIDUMP_H_ - -/* Generic Address structure */ -struct ACPIgas { - u_int8_t address_space_id; -#define ACPI_GAS_MEMORY 0 -#define ACPI_GAS_IO 1 -#define ACPI_GAS_PCI 2 -#define ACPI_GAS_EMBEDDED 3 -#define ACPI_GAS_SMBUS 4 -#define ACPI_GAS_FIXED 0x7f - u_int8_t bit_width; - u_int8_t bit_offset; - u_int8_t _reserved; - u_int64_t address; -} __packed; - -/* Root System Description Pointer */ -struct ACPIrsdp { - u_char signature[8]; - u_char sum; - u_char oem[6]; - u_char revision; - u_int32_t rsdt_addr; - u_int32_t length; - u_int64_t xsdt_addr; - u_char xsum; - u_char _reserved_[3]; -} __packed; - -/* System Description Table */ -struct ACPIsdt { - u_char signature[4]; - u_int32_t len; - u_char rev; - u_char check; - u_char oemid[6]; - u_char oemtblid[8]; - u_int32_t oemrev; - u_char creator[4]; - u_int32_t crerev; -#define SIZEOF_SDT_HDR 36 /* struct size except body */ - u_int32_t body[1];/* This member should be casted */ -} __packed; - -/* Fixed ACPI Description Table (body) */ -struct FADTbody { - u_int32_t facs_ptr; - u_int32_t dsdt_ptr; - u_int8_t int_model; -#define ACPI_FADT_INTMODEL_PIC 0 /* Standard PC-AT PIC */ -#define ACPI_FADT_INTMODEL_APIC 1 /* Multiple APIC */ - u_int8_t pm_profile; - u_int16_t sci_int; - u_int32_t smi_cmd; - u_int8_t acpi_enable; - u_int8_t acpi_disable; - u_int8_t s4biosreq; - u_int8_t pstate_cnt; - u_int32_t pm1a_evt_blk; - u_int32_t pm1b_evt_blk; - u_int32_t pm1a_cnt_blk; - u_int32_t pm1b_cnt_blk; - u_int32_t pm2_cnt_blk; - u_int32_t pm_tmr_blk; - u_int32_t gpe0_blk; - u_int32_t gpe1_blk; - u_int8_t pm1_evt_len; - u_int8_t pm1_cnt_len; - u_int8_t pm2_cnt_len; - u_int8_t pm_tmr_len; - u_int8_t gpe0_len; - u_int8_t gpe1_len; - u_int8_t gpe1_base; - u_int8_t cst_cnt; - u_int16_t p_lvl2_lat; - u_int16_t p_lvl3_lat; - u_int16_t flush_size; - u_int16_t flush_stride; - u_int8_t duty_off; - u_int8_t duty_width; - u_int8_t day_alrm; - u_int8_t mon_alrm; - u_int8_t century; - u_int16_t iapc_boot_arch; -#define FADT_FLAG_LEGACY_DEV 1 /* System has legacy devices */ -#define FADT_FLAG_8042 2 /* 8042 keyboard controller */ - u_char reserved4[1]; - u_int32_t flags; -#define FADT_FLAG_WBINVD 1 /* WBINVD is correctly supported */ -#define FADT_FLAG_WBINVD_FLUSH 2 /* WBINVD flushes caches */ -#define FADT_FLAG_PROC_C1 4 /* C1 power state supported */ -#define FADT_FLAG_P_LVL2_UP 8 /* C2 power state works on SMP */ -#define FADT_FLAG_PWR_BUTTON 16 /* Power button uses control method */ -#define FADT_FLAG_SLP_BUTTON 32 /* Sleep button uses control method */ -#define FADT_FLAG_FIX_RTC 64 /* RTC wakeup not supported */ -#define FADT_FLAG_RTC_S4 128 /* RTC can wakeup from S4 state */ -#define FADT_FLAG_TMR_VAL_EXT 256 /* TMR_VAL is 32bit */ -#define FADT_FLAG_DCK_CAP 512 /* Can support docking */ -#define FADT_FLAG_RESET_REG 1024 /* Supports RESET_REG */ -#define FADT_FLAG_SEALED_CASE 2048 /* Case cannot be opened */ -#define FADT_FLAG_HEADLESS 4096 /* No monitor */ -#define FADT_FLAG_CPU_SW_SLP 8192 /* Supports CPU software sleep */ - struct ACPIgas reset_reg; - u_int8_t reset_value; - u_int8_t reserved5[3]; - u_int64_t x_facs_ptr; - u_int64_t x_dsdt_ptr; - struct ACPIgas x_pm1a_evt_blk; - struct ACPIgas x_pm1b_evt_blk; - struct ACPIgas x_pm1a_cnt_blk; - struct ACPIgas x_pm1b_cnt_blk; - struct ACPIgas x_pm2_cnt_blk; - struct ACPIgas x_pm_tmr_blk; - struct ACPIgas x_gpe0_blk; - struct ACPIgas x_gpe1_blk; -} __packed; - -/* Firmware ACPI Control Structure */ -struct FACSbody { - u_char signature[4]; - u_int32_t len; - u_int32_t hw_sig; - /* - * NOTE This should be filled with physical address below 1MB!! - * sigh.... - */ - u_int32_t firm_wake_vec; - u_int32_t global_lock; -#define FACS_FLAG_LOCK_PENDING 1 /* 5.2.6.1 Global Lock */ -#define FACS_FLAG_LOCK_OWNED 2 - u_int32_t flags; -#define FACS_FLAG_S4BIOS_F 1 /* Supports S4BIOS_SEQ */ - u_int64_t x_firm_wake_vec; - u_int8_t version; - char reserved[31]; -} __packed; - -struct MADT_local_apic { - u_char cpu_id; - u_char apic_id; - u_int32_t flags; -#define ACPI_MADT_APIC_LOCAL_FLAG_ENABLED 1 -} __packed; - -struct MADT_io_apic { - u_char apic_id; - u_char reserved; - u_int32_t apic_addr; - u_int32_t int_base; -} __packed; - -struct MADT_int_override { - u_char bus; - u_char source; - u_int32_t intr; - u_int16_t mps_flags; -#define MPS_INT_FLAG_POLARITY_MASK 0x3 -#define MPS_INT_FLAG_POLARITY_CONFORM 0x0 -#define MPS_INT_FLAG_POLARITY_HIGH 0x1 -#define MPS_INT_FLAG_POLARITY_LOW 0x3 -#define MPS_INT_FLAG_TRIGGER_MASK 0xc -#define MPS_INT_FLAG_TRIGGER_CONFORM 0x0 -#define MPS_INT_FLAG_TRIGGER_EDGE 0x4 -#define MPS_INT_FLAG_TRIGGER_LEVEL 0xc -} __packed; - -struct MADT_nmi { - u_int16_t mps_flags; - u_int32_t intr; -} __packed; - -struct MADT_local_nmi { - u_char cpu_id; - u_int16_t mps_flags; - u_char lintpin; -} __packed; - -struct MADT_local_apic_override { - u_char reserved[2]; - u_int64_t apic_addr; -} __packed; - -struct MADT_io_sapic { - u_char apic_id; - u_char reserved; - u_int32_t int_base; - u_int64_t apic_addr; -} __packed; - -struct MADT_local_sapic { - u_char cpu_id; - u_char apic_id; - u_char apic_eid; - u_char reserved[3]; - u_int32_t flags; -} __packed; - -struct MADT_int_src { - u_int16_t mps_flags; - u_char type; -#define ACPI_MADT_APIC_INT_SOURCE_PMI 1 -#define ACPI_MADT_APIC_INT_SOURCE_INIT 2 -#define ACPI_MADT_APIC_INT_SOURCE_CPEI 3 /* Corrected Platform Error */ - u_char cpu_id; - u_char cpu_eid; - u_char sapic_vector; - u_int32_t intr; - u_char reserved[4]; -} __packed; - -struct MADT_APIC { - u_char type; -#define ACPI_MADT_APIC_TYPE_LOCAL_APIC 0 -#define ACPI_MADT_APIC_TYPE_IO_APIC 1 -#define ACPI_MADT_APIC_TYPE_INT_OVERRIDE 2 -#define ACPI_MADT_APIC_TYPE_NMI 3 -#define ACPI_MADT_APIC_TYPE_LOCAL_NMI 4 -#define ACPI_MADT_APIC_TYPE_LOCAL_OVERRIDE 5 -#define ACPI_MADT_APIC_TYPE_IO_SAPIC 6 -#define ACPI_MADT_APIC_TYPE_LOCAL_SAPIC 7 -#define ACPI_MADT_APIC_TYPE_INT_SRC 8 - u_char len; - union { - struct MADT_local_apic local_apic; - struct MADT_io_apic io_apic; - struct MADT_int_override int_override; - struct MADT_nmi nmi; - struct MADT_local_nmi local_nmi; - struct MADT_local_apic_override local_apic_override; - struct MADT_io_sapic io_sapic; - struct MADT_local_sapic local_sapic; - struct MADT_int_src int_src; - } body; -} __packed; - -struct MADTbody { - u_int32_t lapic_addr; - u_int32_t flags; -#define ACPI_APIC_FLAG_PCAT_COMPAT 1 /* System has dual-8259 setup. */ - u_char body[1]; -} __packed; - -struct HPETbody { - u_int32_t block_hwrev:8, - block_comparitors:5, - block_counter_size:1, - :1, - block_legacy_capable:1, - block_pcivendor:16; - struct ACPIgas genaddr; - u_int8_t hpet_number; - u_int16_t clock_tick __packed; -} __packed; - -/* Embedded Controller Description Table */ -struct ECDTbody { - struct ACPIgas ec_control; /* Control register */ - struct ACPIgas ec_data; /* Data register */ - uint32_t uid; /* Same value as _UID in namespace */ - uint8_t gpe_bit; /* GPE bit for the EC */ - u_char ec_id[1]; /* Variable length name string */ -} __packed; - -/* Memory Mapped PCI config space base allocation structure */ -struct MCFGbody { - uint8_t rsvd[8]; - struct { - uint64_t baseaddr; /* Base Address */ - uint16_t seg_grp; /* Segment group number */ - uint8_t start; /* Starting bus number */ - uint8_t end; /* Ending bus number */ - uint8_t rsvd[4]; /* Reserved */ - } s[1] __packed; -} __packed; - -/* System Resource Affinity Table */ -struct SRAT_cpu { - uint8_t proximity_domain_lo; - uint8_t apic_id; - uint32_t flags; -#define ACPI_SRAT_CPU_ENABLED 0x00000001 - uint8_t sapic_eid; - uint8_t proximity_domain_hi[3]; - uint32_t reserved; -} __packed; - -struct SRAT_memory { - uint32_t proximity_domain; - uint16_t reserved; - uint64_t base_address; - uint64_t length; - uint32_t reserved1; - uint32_t flags; -#define ACPI_SRAT_MEM_ENABLED 0x00000001 -#define ACPI_SRAT_MEM_HOT_PLUGGABLE 0x00000002 -#define ACPI_SRAT_MEM_NON_VOLATILE 0x00000002 - uint64_t reserved2; -} __packed; - -struct SRAT_x2apic { - uint16_t reserved; - uint32_t proximity_domain; - uint32_t apic_id; - uint32_t flags; -} __packed; - -struct SRATentry { - uint8_t type; -#define ACPI_SRAT_TYPE_CPU_AFFINITY 0 -#define ACPI_SRAT_TYPE_MEMORY_AFFINITY 1 -#define ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY 2 - uint8_t len; - union { - struct SRAT_cpu cpu; - struct SRAT_memory mem; - struct SRAT_x2apic x2apic; - } body; -} __packed; - -struct SRATbody { - uint32_t table_revision; - uint64_t reserved; - struct SRATentry body[0]; -} __packed; - -/* - * Addresses to scan on ia32 for the RSD PTR. According to section 5.2.2 - * of the ACPI spec, we only consider two regions for the base address: - * 1. EBDA (1 KB area addressed to by 16 bit pointer at 0x40E) - * 2. High memory (0xE0000 - 0xFFFFF) - */ -#define RSDP_EBDA_PTR 0x40E -#define RSDP_EBDA_SIZE 0x400 -#define RSDP_HI_START 0xE0000 -#define RSDP_HI_SIZE 0x20000 +#define _ACPIDUMP_H_ + +#include +#include +#include + +/* GAS address space ID constants. */ +#define ACPI_GAS_MEMORY 0 +#define ACPI_GAS_IO 1 +#define ACPI_GAS_PCI 2 +#define ACPI_GAS_EMBEDDED 3 +#define ACPI_GAS_SMBUS 4 +#define ACPI_GAS_CMOS 5 +#define ACPI_GAS_PCIBAR 6 +#define ACPI_GAS_DATATABLE 7 +#define ACPI_GAS_FIXED 0x7f + +/* Subfields in the HPET Id member. */ +#define ACPI_HPET_ID_HARDWARE_REV_ID 0x000000ff +#define ACPI_HPET_ID_COMPARATORS 0x00001f00 +#define ACPI_HPET_ID_COUNT_SIZE_CAP 0x00002000 +#define ACPI_HPET_ID_LEGACY_CAPABLE 0x00008000 +#define ACPI_HPET_ID_PCI_VENDOR_ID 0xffff0000 /* Find and map the RSD PTR structure and return it for parsing */ -struct ACPIsdt *sdt_load_devmem(void); +ACPI_TABLE_HEADER *sdt_load_devmem(void); /* * Load the DSDT from a previous save file. Note that other tables are * not saved (i.e. FADT) */ -struct ACPIsdt *dsdt_load_file(char *); +ACPI_TABLE_HEADER *dsdt_load_file(char *); /* Save the DSDT to a file */ -void dsdt_save_file(char *, struct ACPIsdt *, struct ACPIsdt *); +void dsdt_save_file(char *, ACPI_TABLE_HEADER *, ACPI_TABLE_HEADER *); /* Print out as many fixed tables as possible, given the RSD PTR */ -void sdt_print_all(struct ACPIsdt *); +void sdt_print_all(ACPI_TABLE_HEADER *); /* Disassemble the AML in the DSDT */ -void aml_disassemble(struct ACPIsdt *, struct ACPIsdt *); +void aml_disassemble(ACPI_TABLE_HEADER *, ACPI_TABLE_HEADER *); /* Routines for accessing tables in physical memory */ -struct ACPIrsdp *acpi_find_rsd_ptr(void); -void *acpi_map_physical(vm_offset_t, size_t); -struct ACPIsdt *sdt_from_rsdt(struct ACPIsdt *, const char *, - struct ACPIsdt *); -struct ACPIsdt *dsdt_from_fadt(struct FADTbody *); -int acpi_checksum(void *, size_t); +ACPI_TABLE_RSDP *acpi_find_rsd_ptr(void); +void *acpi_map_physical(vm_offset_t, size_t); +ACPI_TABLE_HEADER *sdt_from_rsdt(ACPI_TABLE_HEADER *, const char *, + ACPI_TABLE_HEADER *); +ACPI_TABLE_HEADER *dsdt_from_fadt(ACPI_TABLE_FADT *); +int acpi_checksum(void *, size_t); /* Command line flags */ extern int dflag; -- cgit v1.1 From 87adb90059f2f16ea105618be06b1df00b4e4940 Mon Sep 17 00:00:00 2001 From: ed Date: Sun, 1 Nov 2009 10:30:30 +0000 Subject: MFC various commits back to stable/8: SVN r197174: Make sure we never place the cursor outside the screen. For some vague reason, it may be possible that scp->cursor_pos exceeds scp->ysize * scp->xsize. This means that teken_set_cursor() may get called with an invalid position. Just ignore the old cursor position in this case. Reported by: Paul B. Mahol SVN r198213: Make lock devices work properly. It turned out I did add the code to use the init state devices to set the termios structure when opening the device, but it seems I totally forgot to add the bits required to force the actual locking of flags through the lock state devices. Reported by: ru SVN r198215, r198217: Fix a typo in the jail(8) manpage. Submitted by: Jille Timmermans SVN r198216: Fix qouting in a comment, to make it look more consistent Submitted by: Jille Timmermans SVN r198223: Properly set the low watermarks when reducing the baud rate. Now that buffers are deallocated lazily, we should not use tty*q_getsize() to obtain the buffer size to calculate the low watermarks. Doing this may cause the watermark to be placed outside the typical buffer size. This caused some regressions after my previous commit to the TTY code, which allows pseudo-devices to resize the buffers as well. Reported by: yongari, dougb --- usr.sbin/jail/jail.8 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/jail/jail.8 b/usr.sbin/jail/jail.8 index f3340bd..4c1a963 100644 --- a/usr.sbin/jail/jail.8 +++ b/usr.sbin/jail/jail.8 @@ -34,7 +34,7 @@ .\" .\" $FreeBSD$ .\" -.Dd July 25, 2009 +.Dd October 18, 2009 .Dt JAIL 8 .Os .Sh NAME @@ -377,7 +377,7 @@ Since raw sockets can be used to configure and interact with various network subsystems, extra caution should be used where privileged access to jails is given out to untrusted parties. .It Va allow.chflags -Normally, priveleged users inside a jail are treated as unprivileged by +Normally, privileged users inside a jail are treated as unprivileged by .Xr chflags 2 . When this parameter is set, such users are treated as privileged, and may manipulate system file flags subject to the usual constraints on -- cgit v1.1 From c179d4e7c36aa5fdcaffc598cd75e6b5c755c2b1 Mon Sep 17 00:00:00 2001 From: emaste Date: Mon, 2 Nov 2009 16:27:34 +0000 Subject: MFC r198029: Correct typo: thetime -> the time PR: docs/139447 Submitted by: Guido Falsi mad at madpilot dot net --- usr.sbin/ntp/doc/ntpd.8 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/ntp/doc/ntpd.8 b/usr.sbin/ntp/doc/ntpd.8 index 5194b75..a55fe43 100644 --- a/usr.sbin/ntp/doc/ntpd.8 +++ b/usr.sbin/ntp/doc/ntpd.8 @@ -121,7 +121,7 @@ Normally, .Nm exits with a message to the system log if the offset exceeds the panic threshold, which is 1000 s by default. -This option allows thetime to be set to any value without restriction; +This option allows the time to be set to any value without restriction; however, this can happen only once. If the threshold is exceeded after that, .Nm -- cgit v1.1 From c3498f81a66ccf76700d59a25b33de86bc23d981 Mon Sep 17 00:00:00 2001 From: jhb Date: Wed, 4 Nov 2009 16:05:09 +0000 Subject: MFC 198586: Include the output of the ddb(4) capture buffer. --- usr.sbin/crashinfo/crashinfo.sh | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'usr.sbin') diff --git a/usr.sbin/crashinfo/crashinfo.sh b/usr.sbin/crashinfo/crashinfo.sh index cd41009..886affd 100755 --- a/usr.sbin/crashinfo/crashinfo.sh +++ b/usr.sbin/crashinfo/crashinfo.sh @@ -304,3 +304,10 @@ echo "------------------------------------------------------------------------" echo "kernel config" echo config -x $KERNEL + +echo +echo "------------------------------------------------------------------------" +echo "ddb capture buffer" +echo + +ddb capture -M $VMCORE -N $KERNEL print -- cgit v1.1 From 876c22287f42d0f7f39118133bdc2a98390de6ce Mon Sep 17 00:00:00 2001 From: brueffer Date: Sun, 8 Nov 2009 12:08:42 +0000 Subject: MFC: r198750 Expand DESCRIPTION and add a basic EXAMPLES section. --- usr.sbin/usbconfig/usbconfig.8 | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/usbconfig/usbconfig.8 b/usr.sbin/usbconfig/usbconfig.8 index 07dbed2..0914d9e 100644 --- a/usr.sbin/usbconfig/usbconfig.8 +++ b/usr.sbin/usbconfig/usbconfig.8 @@ -23,7 +23,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd Sep 28, 2008 +.Dd November 1, 2009 .Dt USBCONFIG 8 .Os .Sh NAME @@ -49,5 +49,13 @@ Should only be used in conjunction with the unit argument. .It Fl h Show help and available commands. .El +.Pp +When called without options, +.Nm +prints a list of all available USB devices. +.Sh EXAMPLES +Show information about the device on USB bus 1 at address 2: +.Pp +.Dl usbconfig -u 1 -a 2 dump_info .Sh SEE ALSO .Xr usb 4 -- cgit v1.1 From e2307867a0f1550aed25f436b7086898fa587047 Mon Sep 17 00:00:00 2001 From: edwin Date: Mon, 9 Nov 2009 11:32:18 +0000 Subject: MFC of 198831, tzcode2009q - Cleanup unnecessary local variables in zdump. - Fix man-page --- usr.sbin/zic/zdump.c | 10 +++++----- usr.sbin/zic/zic.8 | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/zic/zdump.c b/usr.sbin/zic/zdump.c index 263a592..0f61125 100644 --- a/usr.sbin/zic/zdump.c +++ b/usr.sbin/zic/zdump.c @@ -6,7 +6,7 @@ #ifndef lint static const char rcsid[] = "$FreeBSD$"; -static char elsieid[] = "@(#)zdump.c 8.9"; +static char elsieid[] = "@(#)zdump.c 8.10"; #endif /* not lint */ /* @@ -152,7 +152,7 @@ static size_t longest; static char * progname; static int warned; -static void usage(const char *progname, FILE *stream, int status); +static void usage(FILE *stream, int status); static char * abbr(struct tm * tmp); static void abbrok(const char * abbrp, const char * zone); static long delta(struct tm * newp, struct tm * oldp); @@ -273,7 +273,7 @@ char * argv[]; if (strcmp(argv[i], "--version") == 0) { errx(EXIT_SUCCESS, "%s", elsieid); } else if (strcmp(argv[i], "--help") == 0) { - usage(progname, stdout, EXIT_SUCCESS); + usage(stdout, EXIT_SUCCESS); } vflag = 0; cutarg = NULL; @@ -283,7 +283,7 @@ char * argv[]; else cutarg = optarg; if ((c != -1) || (optind == argc - 1 && strcmp(argv[optind], "=") == 0)) { - usage(progname, stderr, EXIT_FAILURE); + usage(stderr, EXIT_FAILURE); } if (vflag) { if (cutarg != NULL) { @@ -468,7 +468,7 @@ const long y; } static void -usage(const char *progname, FILE *stream, int status) +usage(FILE *stream, int status) { fprintf(stream, _("usage: %s [--version] [-v] [--help] [-c [loyear,]hiyear] zonename ...\n\ diff --git a/usr.sbin/zic/zic.8 b/usr.sbin/zic/zic.8 index 082ff25..f7ff815b 100644 --- a/usr.sbin/zic/zic.8 +++ b/usr.sbin/zic/zic.8 @@ -260,7 +260,7 @@ the variable part is null. .El .Pp A zone line has the form: -.Dl "Zone NAME GMTOFF RULES/SAVE FORMAT [UNTILYEAR [MONTH [DAY [TIME]]]] +.Dl "Zone NAME GMTOFF RULES/SAVE FORMAT [UNTILYEAR [MONTH [DAY [TIME]]]]" For example: .Dl "Zone Australia/Adelaide 9:30 Aus CST 1971 Oct 31 2:00 The fields that make up a zone line are: -- cgit v1.1 From 1e4d801926749dc991f39df4ad3c4f418d7fb7e7 Mon Sep 17 00:00:00 2001 From: delphij Date: Tue, 10 Nov 2009 00:41:22 +0000 Subject: MFC r198846: Set umask to 0x077 instead of the default. This prevents non-root user from reading crashinfo output, which could contain some sensitive information. --- usr.sbin/crashinfo/crashinfo.sh | 2 ++ 1 file changed, 2 insertions(+) (limited to 'usr.sbin') diff --git a/usr.sbin/crashinfo/crashinfo.sh b/usr.sbin/crashinfo/crashinfo.sh index 886affd..60f0360 100755 --- a/usr.sbin/crashinfo/crashinfo.sh +++ b/usr.sbin/crashinfo/crashinfo.sh @@ -147,6 +147,8 @@ fi echo "Writing crash summary to $FILE." +umask 077 + # Simulate uname ostype=$(echo -e printf '"%s", ostype' | gdb -x /dev/stdin -batch $KERNEL) osrelease=$(echo -e printf '"%s", osrelease' | gdb -x /dev/stdin -batch $KERNEL) -- cgit v1.1 From f740bf386db919250c43449fcc60f497479881f6 Mon Sep 17 00:00:00 2001 From: jh Date: Wed, 11 Nov 2009 19:50:52 +0000 Subject: MFC r197833: When run() returns an error, print the error message also in non-interactive mode. Previously error messages were printed only in interactive mode. PR: bin/124517 Approved by: trasz (mentor) --- usr.sbin/cdcontrol/cdcontrol.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/cdcontrol/cdcontrol.c b/usr.sbin/cdcontrol/cdcontrol.c index ede5904..a4b13d8 100644 --- a/usr.sbin/cdcontrol/cdcontrol.c +++ b/usr.sbin/cdcontrol/cdcontrol.c @@ -241,7 +241,7 @@ int main (int argc, char **argv) if (argc > 0) { char buf[80], *p; - int len; + int len, rc; for (p=buf; argc-->0; ++argv) { len = strlen (*argv); @@ -257,7 +257,11 @@ int main (int argc, char **argv) } *p = 0; arg = parse (buf, &cmd); - return (run (cmd, arg)); + rc = run (cmd, arg); + if (rc < 0 && verbose) + warn(NULL); + + return (rc); } if (verbose == 1) -- cgit v1.1 From 0de84c4cd85a8bd69e98a705548993a95bd3930d Mon Sep 17 00:00:00 2001 From: cperciva Date: Thu, 3 Dec 2009 09:18:40 +0000 Subject: Disable SSL renegotiation in order to protect against a serious protocol flaw. [09:15] Correctly handle failures from unsetenv resulting from a corrupt environment in rtld-elf. [09:16] Fix permissions in freebsd-update in order to prevent leakage of sensitive files. [09:17] Approved by: so (cperciva) Security: FreeBSD-SA-09:15.ssl Security: FreeBSD-SA-09:16.rtld Security: FreeBSD-SA-09:17.freebsd-udpate --- usr.sbin/freebsd-update/freebsd-update.sh | 1 + 1 file changed, 1 insertion(+) (limited to 'usr.sbin') diff --git a/usr.sbin/freebsd-update/freebsd-update.sh b/usr.sbin/freebsd-update/freebsd-update.sh index 2eacca8..de0c1a5 100644 --- a/usr.sbin/freebsd-update/freebsd-update.sh +++ b/usr.sbin/freebsd-update/freebsd-update.sh @@ -600,6 +600,7 @@ fetch_check_params () { echo ${WORKDIR} exit 1 fi + chmod 700 ${WORKDIR} cd ${WORKDIR} || exit 1 # Generate release number. The s/SECURITY/RELEASE/ bit exists -- cgit v1.1 From 978399517368c0c5ffb85ac5a5707f2d4896df50 Mon Sep 17 00:00:00 2001 From: thompsa Date: Wed, 9 Dec 2009 23:14:53 +0000 Subject: MFC r198833 Add more verbose output when dumping the configuration descriptor. --- usr.sbin/usbconfig/dump.c | 69 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 57 insertions(+), 12 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/usbconfig/dump.c b/usr.sbin/usbconfig/dump.c index 6a2fd02..6dfc6ec 100644 --- a/usr.sbin/usbconfig/dump.c +++ b/usr.sbin/usbconfig/dump.c @@ -100,21 +100,66 @@ dump_field(struct libusb20_device *pdev, const char *plevel, printf("%s%s = 0x%04x ", plevel, field, value); - if ((field[0] != 'i') || (field[1] == 'd')) { - printf("\n"); - return; - } - if (value == 0) { - printf(" \n"); - return; + if (strlen(plevel) == 8) { + /* Endpoint Descriptor */ + + if (strcmp(field, "bEndpointAddress") == 0) { + if (value & 0x80) + printf(" \n"); + else + printf(" \n"); + return; + } + + if (strcmp(field, "bmAttributes") == 0) { + switch (value & 0x03) { + case 0: + printf(" \n"); + break; + case 1: + switch (value & 0x0C) { + case 0x00: + printf(" \n"); + break; + case 0x04: + printf(" \n"); + break; + case 0x08: + printf(" \n"); + break; + default: + printf(" \n"); + break; + } + break; + case 2: + printf(" \n"); + break; + default: + printf(" \n"); + break; + } + return; + } } - if (libusb20_dev_req_string_simple_sync(pdev, value, - temp_string, sizeof(temp_string))) { - printf(" \n"); + + if ((field[0] == 'i') && (field[1] != 'd')) { + /* Indirect String Descriptor */ + if (value == 0) { + printf(" \n"); + return; + } + if (libusb20_dev_req_string_simple_sync(pdev, value, + temp_string, sizeof(temp_string))) { + printf(" \n"); + return; + } + printf(" <%s>\n", temp_string); return; } - printf(" <%s>\n", temp_string); - return; + + /* No additional information */ + printf("\n"); } static void -- cgit v1.1 From 67756fdf3415ae53dc300e0bd039c18d8a601909 Mon Sep 17 00:00:00 2001 From: syrinx Date: Sat, 12 Dec 2009 20:26:11 +0000 Subject: MFC r200122 Make sure enough memory is allocated for a struct pft_entry when refreshing the list of pf tables. --- usr.sbin/bsnmpd/modules/snmp_pf/pf_snmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/bsnmpd/modules/snmp_pf/pf_snmp.c b/usr.sbin/bsnmpd/modules/snmp_pf/pf_snmp.c index 919e5d4..4e554c0 100644 --- a/usr.sbin/bsnmpd/modules/snmp_pf/pf_snmp.c +++ b/usr.sbin/bsnmpd/modules/snmp_pf/pf_snmp.c @@ -1104,7 +1104,7 @@ pft_refresh(void) } for (i = 0; i < numtbls; i++) { - e = malloc(sizeof(struct pfr_tstats)); + e = malloc(sizeof(struct pft_entry)); if (e == NULL) goto err1; e->index = i + 1; -- cgit v1.1 From 11e4f3cd81934965eb062bc9bfefe40c76eaf8aa Mon Sep 17 00:00:00 2001 From: attilio Date: Sat, 19 Dec 2009 19:35:53 +0000 Subject: MFC r199804: Avoid sshd, crond, inetd and syslogd to be killed in an high-pressure swapping environment. Sponsored by: Sandvine Incorporated --- usr.sbin/cron/cron/cron.c | 4 ++++ usr.sbin/inetd/inetd.c | 4 ++++ usr.sbin/syslogd/syslogd.c | 4 ++++ 3 files changed, 12 insertions(+) (limited to 'usr.sbin') diff --git a/usr.sbin/cron/cron/cron.c b/usr.sbin/cron/cron/cron.c index 101989c..52cdcc8 100644 --- a/usr.sbin/cron/cron/cron.c +++ b/usr.sbin/cron/cron/cron.c @@ -24,6 +24,7 @@ static const char rcsid[] = #include "cron.h" +#include #include #if SYS_TIME_H # include @@ -134,6 +135,9 @@ main(argc, argv) } } + if (madvise(NULL, 0, MADV_PROTECT) != 0) + log_it("CRON", getpid(), "WARNING", "madvise() failed"); + pidfile_write(pfh); database.head = NULL; database.tail = NULL; diff --git a/usr.sbin/inetd/inetd.c b/usr.sbin/inetd/inetd.c index 6bcbac6..e8d34f4 100644 --- a/usr.sbin/inetd/inetd.c +++ b/usr.sbin/inetd/inetd.c @@ -110,6 +110,7 @@ __FBSDID("$FreeBSD$"); */ #include #include +#include #include #include #include @@ -497,6 +498,9 @@ main(int argc, char **argv) } } + if (madvise(NULL, 0, MADV_PROTECT) != 0) + syslog(LOG_WARNING, "madvise() failed: %s", strerror(errno)); + for (i = 0; i < PERIPSIZE; ++i) LIST_INIT(&proctable[i]); diff --git a/usr.sbin/syslogd/syslogd.c b/usr.sbin/syslogd/syslogd.c index 3e628a2..5cb0af9 100644 --- a/usr.sbin/syslogd/syslogd.c +++ b/usr.sbin/syslogd/syslogd.c @@ -77,6 +77,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -352,6 +353,9 @@ main(int argc, char *argv[]) pid_t ppid = 1, spid; socklen_t len; + if (madvise(NULL, 0, MADV_PROTECT) != 0) + dprintf("madvise() failed: %s\n", strerror(errno)); + bindhostname = NULL; while ((ch = getopt(argc, argv, "468Aa:b:cCdf:kl:m:nop:P:sS:Tuv")) != -1) -- cgit v1.1 From 9495529b908a24314a8a60de5e15c3a06a2a46f2 Mon Sep 17 00:00:00 2001 From: jamie Date: Sun, 20 Dec 2009 04:45:32 +0000 Subject: MFC r200449: Don't free jail parameter values after printing them - jail_param_get expects them to be there for the next jail in the list. PR: bin/141359 --- usr.sbin/jls/jls.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/jls/jls.c b/usr.sbin/jls/jls.c index 0661ee3..3f4800c 100644 --- a/usr.sbin/jls/jls.c +++ b/usr.sbin/jls/jls.c @@ -425,11 +425,6 @@ print_jail(int pflags, int jflags) if (params[i].jp_flags & JP_USER) free(param_values[i]); } - for (i = 0; i < nparams; i++) - if (!(params[i].jp_flags & JP_RAWVALUE)) { - free(params[i].jp_value); - params[i].jp_value = NULL; - } return (jid); } -- cgit v1.1 From 145c1109517ba8110c826af4a7bfcc913d13e413 Mon Sep 17 00:00:00 2001 From: edwin Date: Tue, 29 Dec 2009 10:05:20 +0000 Subject: MFC of r200832, tzdata2009t zic: - Fix URL / reference to Calendrical Calculations: Third Edition libc/stdtime: - Fix typo in tzfile.5 (no changes in our part) --- usr.sbin/zic/Theory | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/zic/Theory b/usr.sbin/zic/Theory index 8e93365..752125c 100644 --- a/usr.sbin/zic/Theory +++ b/usr.sbin/zic/Theory @@ -1,4 +1,4 @@ -@(#)Theory 8.3 +@(#)Theory 8.4 This file is in the public domain, so clarified as of 2009-05-17 by Arthur David Olson. $FreeBSD$ @@ -361,10 +361,10 @@ abbreviations like `EST'; this avoids the ambiguity. Calendrical issues are a bit out of scope for a time zone database, but they indicate the sort of problems that we would run into if we extended the time zone database further into the past. An excellent -resource in this area is Edward M. Reingold and Nachum Dershowitz, - -Calendrical Calculations: The Millennium Edition -, Cambridge University Press (2001). Other information and +resource in this area is Nachum Dershowitz and Edward M. Reingold, + +Calendrical Calculations: Third Edition +, Cambridge University Press (2008). Other information and sources are given below. They sometimes disagree. -- cgit v1.1 From 19daa7badb12d50ab733b4d68d59ed2a61ee5a22 Mon Sep 17 00:00:00 2001 From: dougb Date: Wed, 30 Dec 2009 19:36:40 +0000 Subject: MFC r200743: The service command is an easy interface to the rc.d system. Its primary purpose is to start and stop services provided by the rc.d scripts, however it can also be used to list the scripts using various criteria. MFC r200809, r200980, r201035, r201069, 201070: Updates/fixes suggested after the initial introducition --- usr.sbin/Makefile | 1 + usr.sbin/service/Makefile | 7 +++ usr.sbin/service/service.8 | 131 ++++++++++++++++++++++++++++++++++++++++++++ usr.sbin/service/service.sh | 120 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 259 insertions(+) create mode 100644 usr.sbin/service/Makefile create mode 100644 usr.sbin/service/service.8 create mode 100755 usr.sbin/service/service.sh (limited to 'usr.sbin') diff --git a/usr.sbin/Makefile b/usr.sbin/Makefile index 160b122..99a0620 100644 --- a/usr.sbin/Makefile +++ b/usr.sbin/Makefile @@ -167,6 +167,7 @@ SUBDIR= ${_ac} \ ${_sa} \ ${_sade} \ ${_sendmail} \ + service \ setfib \ setfmac \ setpmac \ diff --git a/usr.sbin/service/Makefile b/usr.sbin/service/Makefile new file mode 100644 index 0000000..a03714d --- /dev/null +++ b/usr.sbin/service/Makefile @@ -0,0 +1,7 @@ +# $FreeBSD$ + +SCRIPTS=service.sh +MAN= service.8 + +.include + diff --git a/usr.sbin/service/service.8 b/usr.sbin/service/service.8 new file mode 100644 index 0000000..a7f3135 --- /dev/null +++ b/usr.sbin/service/service.8 @@ -0,0 +1,131 @@ +.\" Copyright (c) 2009 Douglas Barton +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd December 26, 2009 +.Dt service 8 +.Os +.Sh NAME +.Nm service +.Nd "control (start/stop/etc.) or list system services" +.Sh SYNOPSIS +.Nm +.Fl e +.Nm +.Op Fl v +.Fl l | r +.Nm +.Op Fl v +.Ar start|stop|etc. +.Sh DESCRIPTION +The +.Nm +command is an easy interface to the rc.d system. +Its primary purpose is to start and stop services provided +by the rc.d scripts. +When used for this purpose it will set the same restricted +environment that is in use at boot time (see below). +It can also be used to list +the scripts using various criteria. +.Pp +The options are as follows: +.Bl -tag -width F1 +.It Fl e +List services that are enabled. +The list of scripts to check is compiled using +.Xr rcorder 8 +the same way that it is done in +.Xr rc 8 , +then that list of scripts is checked for an +.Qq rcvar +assignment. +If present the script is checked to see if it is enabled. +.It Fl l +List all files in +.Pa /etc/rc.d +and the local startup directories. +As described in +.Xr rc.conf 5 +this is usually +.Pa /usr/local/etc/rc.d . +All files will be listed whether they are an actual +rc.d script or not. +.It Fl r +Generate the +.Xr rcorder 8 +as in +.Fl e +above, but list all of the files, not just what is enabled. +.It Fl v +Be slightly more verbose +.El +.Sh ENVIRONMENT +When used to run rc.d scripts the +.Nm +command sets +.Ev HOME +to +.Pa / +and +.Ev PATH +to +.Pa /sbin:/bin:/usr/sbin:/usr/bin +which is how they are set in +.Pa /etc/rc +at boot time. +.Sh EXIT STATUS +.Ex -std +.Sh EXAMPLES +The following are examples of typical usage of the +.Nm +command: +.Pp +.Dl "service named status" +.Dl "service -rv" +.Pp +The following programmable completion entry can be use in +.Xr bash 1 +for the names of the rc.d scripts: +.Dl "_service () {" +.Dl " local cur" +.Dl " cur=${COMP_WORDS[COMP_CWORD]}" +.Dl " COMPREPLY=( $( compgen -W '$( service -l )' -- $cur ) )" +.Dl " return 0" +.Dl "}" +.Dl "complete -F _service service" +.Sh SEE ALSO +.Xr bash 1 Pq Pa ports/shells/bash , +.Xr rc.conf 5 , +.Xr rc 8 , +.Xr rcorder 8 +.Sh HISTORY +The +.Nm +manual page service first appeared in +.Fx 7.3 . +.Sh AUTHORS +This +manual page was written by +.An Douglas Barton . diff --git a/usr.sbin/service/service.sh b/usr.sbin/service/service.sh new file mode 100755 index 0000000..1794315 --- /dev/null +++ b/usr.sbin/service/service.sh @@ -0,0 +1,120 @@ +#!/bin/sh + +# $FreeBSD$ + +# Copyright (c) 2009 Douglas Barton +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. + +. /etc/rc.subr +load_rc_config 'XXX' + +usage () { + echo '' + echo 'Usage:' + echo "${0##*/} -e" + echo "${0##*/} [-v] -l | -r" + echo "${0##*/} [-v] start|stop|etc." + echo "${0##*/} -h" + echo '' + echo '-e Show services that are enabled' + echo "-l List all scripts in /etc/rc.d and $local_startup" + echo '-r Show the results of boot time rcorder' + echo '-v Verbose' + echo '' +} + +while getopts 'ehlrv' COMMAND_LINE_ARGUMENT ; do + case "${COMMAND_LINE_ARGUMENT}" in + e) ENABLED=eopt ;; + h) usage ; exit 0 ;; + l) LIST=lopt ;; + r) RCORDER=ropt ;; + v) VERBOSE=vopt ;; + *) usage ; exit 1 ;; + esac +done +shift $(( $OPTIND - 1 )) + +if [ -n "$ENABLED" -o -n "$RCORDER" ]; then + # Copied from /etc/rc + skip="-s nostart" + if [ `/sbin/sysctl -n security.jail.jailed` -eq 1 ]; then + skip="$skip -s nojail" + fi + [ -n "$local_startup" ] && find_local_scripts_new + files=`rcorder ${skip} /etc/rc.d/* ${local_rc} 2>/dev/null` +fi + +if [ -n "$ENABLED" ]; then + for file in $files; do + if grep -q ^rcvar $file; then + eval `grep ^name= $file` + eval `grep ^rcvar $file` + checkyesno $rcvar 2>/dev/null && echo $file + fi + done + exit 0 +fi + +if [ -n "$LIST" ]; then + for dir in /etc/rc.d $local_startup; do + [ -n "$VERBOSE" ] && echo "From ${dir}:" + cd $dir && for file in *; do echo $file; done + done + exit 0 +fi + +if [ -n "$RCORDER" ]; then + for file in $files; do + echo $file + if [ -n "$VERBOSE" ]; then + case "$file" in + */${early_late_divider}) + echo '========= Early/Late Divider =========' ;; + esac + fi + done + exit 0 +fi + +if [ $# -gt 1 ]; then + script=$1 + shift +else + usage + exit 1 +fi + +cd / +for dir in /etc/rc.d $local_startup; do + if [ -x "$dir/$script" ]; then + [ -n "$VERBOSE" ] && echo "$script is located in $dir" + exec env -i HOME=/ PATH=/sbin:/bin:/usr/sbin:/usr/bin $dir/$script $* + fi +done + +# If the script was not found +echo "$script does not exist in /etc/rc.d or the local startup" +echo "directories (${local_startup})" +exit 1 -- cgit v1.1 From a031d7ce23fe46471eadeb80cd4f0594dcf8abaf Mon Sep 17 00:00:00 2001 From: mav Date: Tue, 5 Jan 2010 13:55:49 +0000 Subject: MFC r196799: Don't bother obtaining the ident if we are not going to print it. --- usr.sbin/diskinfo/diskinfo.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/diskinfo/diskinfo.c b/usr.sbin/diskinfo/diskinfo.c index 48b019b..7ecf7cb 100644 --- a/usr.sbin/diskinfo/diskinfo.c +++ b/usr.sbin/diskinfo/diskinfo.c @@ -104,9 +104,6 @@ main(int argc, char **argv) error = ioctl(fd, DIOCGFWHEADS, &fwheads); if (error) fwheads = 0; - error = ioctl(fd, DIOCGIDENT, ident); - if (error) - ident[0] = '\0'; if (!opt_v) { printf("%s", argv[i]); printf("\t%u", sectorsize); @@ -133,7 +130,7 @@ main(int argc, char **argv) printf("\t%-12u\t# Heads according to firmware.\n", fwheads); printf("\t%-12u\t# Sectors according to firmware.\n", fwsectors); } - if (ident[0] != '\0') + if (ioctl(fd, DIOCGIDENT, ident) == 0) printf("\t%-12s\t# Disk ident.\n", ident); } printf("\n"); -- cgit v1.1 From 7a9bcff0747e4bce3b347000ded01bf7130f4c7c Mon Sep 17 00:00:00 2001 From: mav Date: Tue, 5 Jan 2010 13:56:58 +0000 Subject: MFC 200968: Make diskinfo report disk stripe size and offset. It should help users to make file systems optimally aligned and tuned for better performance. --- usr.sbin/diskinfo/diskinfo.8 | 3 ++- usr.sbin/diskinfo/diskinfo.c | 12 +++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/diskinfo/diskinfo.8 b/usr.sbin/diskinfo/diskinfo.8 index dfc7040..f68d426 100644 --- a/usr.sbin/diskinfo/diskinfo.8 +++ b/usr.sbin/diskinfo/diskinfo.8 @@ -46,7 +46,8 @@ and optionally runs a naive performance test on the device. .Pp If given no arguments, the output will be a single line per specified device with the following fields: device name, sectorsize, media size in bytes, -media size in sectors, firmware cylinders, firmware heads, and firmware sectors. +media size in sectors, stripe size, stripe offset, firmware cylinders, +firmware heads, and firmware sectors. The last three fields are only present if the information is available. .Pp If given the diff --git a/usr.sbin/diskinfo/diskinfo.c b/usr.sbin/diskinfo/diskinfo.c index 7ecf7cb..1f967b7 100644 --- a/usr.sbin/diskinfo/diskinfo.c +++ b/usr.sbin/diskinfo/diskinfo.c @@ -58,7 +58,7 @@ main(int argc, char **argv) { int i, ch, fd, error; char buf[BUFSIZ], ident[DISK_IDENT_SIZE]; - off_t mediasize; + off_t mediasize, stripesize, stripeoffset; u_int sectorsize, fwsectors, fwheads; while ((ch = getopt(argc, argv, "ctv")) != -1) { @@ -104,11 +104,19 @@ main(int argc, char **argv) error = ioctl(fd, DIOCGFWHEADS, &fwheads); if (error) fwheads = 0; + error = ioctl(fd, DIOCGSTRIPESIZE, &stripesize); + if (error) + stripesize = 0; + error = ioctl(fd, DIOCGSTRIPEOFFSET, &stripeoffset); + if (error) + stripeoffset = 0; if (!opt_v) { printf("%s", argv[i]); printf("\t%u", sectorsize); printf("\t%jd", (intmax_t)mediasize); printf("\t%jd", (intmax_t)mediasize/sectorsize); + printf("\t%jd", (intmax_t)stripesize); + printf("\t%jd", (intmax_t)stripeoffset); if (fwsectors != 0 && fwheads != 0) { printf("\t%jd", (intmax_t)mediasize / (fwsectors * fwheads * sectorsize)); @@ -124,6 +132,8 @@ main(int argc, char **argv) (intmax_t)mediasize, buf); printf("\t%-12jd\t# mediasize in sectors\n", (intmax_t)mediasize/sectorsize); + printf("\t%-12jd\t# stripesize\n", stripesize); + printf("\t%-12jd\t# stripeoffset\n", stripeoffset); if (fwsectors != 0 && fwheads != 0) { printf("\t%-12jd\t# Cylinders according to firmware.\n", (intmax_t)mediasize / (fwsectors * fwheads * sectorsize)); -- cgit v1.1 From ea5192e625ebf087e54f747a66b38f4034431708 Mon Sep 17 00:00:00 2001 From: qingli Date: Tue, 5 Jan 2010 22:14:55 +0000 Subject: MFC r201282, r201543 r201282 ------- The proxy arp entries could not be added into the system over the IFF_POINTOPOINT link types. The reason was due to the routing entry returned from the kernel covering the remote end is of an interface type that does not support ARP. This patch fixes this problem by providing a hint to the kernel routing code, which indicates the prefix route instead of the PPP host route should be returned to the caller. Since a host route to the local end point is also added into the routing table, and there could be multiple such instantiations due to multiple PPP links can be created with the same local end IP address, this patch also fixes the loopback route installation failure problem observed prior to this patch. The reference count of loopback route to local end would be either incremented or decremented. The first instantiation would create the entry and the last removal would delete the route entry. r201543 ------- The IFA_RTSELF address flag marks a loopback route has been installed for the interface address. This marker is necessary to properly support PPP types of links where multiple links can have the same local end IP address. The IFA_RTSELF flag bit maps to the RTF_HOST value, which was combined into the route flag bits during prefix installation in IPv6. This inclusion causing the prefix route to be unusable. This patch fixes this bug by excluding the IFA_RTSELF flag during route installation. PR: ports/141342, kern/141134 --- usr.sbin/arp/arp.c | 64 +++++++++++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 29 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/arp/arp.c b/usr.sbin/arp/arp.c index 8a3410f..3142658 100644 --- a/usr.sbin/arp/arp.c +++ b/usr.sbin/arp/arp.c @@ -326,7 +326,6 @@ set(int argc, char **argv) doing_proxy = 1; if (argc && strncmp(argv[1], "only", 3) == 0) { proxy_only = 1; - dst->sin_other = SIN_PROXY; argc--; argv++; } } else if (strncmp(argv[0], "blackhole", 9) == 0) { @@ -365,33 +364,30 @@ set(int argc, char **argv) sdl_m.sdl_alen = ETHER_ADDR_LEN; } } - for (;;) { /* try at most twice */ - rtm = rtmsg(RTM_GET, dst, &sdl_m); - if (rtm == NULL) { - warn("%s", host); - return (1); - } - addr = (struct sockaddr_inarp *)(rtm + 1); - sdl = (struct sockaddr_dl *)(SA_SIZE(addr) + (char *)addr); - if (addr->sin_addr.s_addr != dst->sin_addr.s_addr) - break; - if (sdl->sdl_family == AF_LINK && - !(rtm->rtm_flags & RTF_GATEWAY) && - valid_type(sdl->sdl_type) ) - break; - if (doing_proxy == 0) { - printf("set: can only proxy for %s\n", host); - return (1); - } - if (dst->sin_other & SIN_PROXY) { - printf("set: proxy entry exists for non 802 device\n"); - return (1); - } - dst->sin_other = SIN_PROXY; - proxy_only = 1; + + /* + * In the case a proxy-arp entry is being added for + * a remote end point, the RTF_ANNOUNCE flag in the + * RTM_GET command is an indication to the kernel + * routing code that the interface associated with + * the prefix route covering the local end of the + * PPP link should be returned, on which ARP applies. + */ + rtm = rtmsg(RTM_GET, dst, &sdl_m); + if (rtm == NULL) { + warn("%s", host); + return (1); + } + addr = (struct sockaddr_inarp *)(rtm + 1); + sdl = (struct sockaddr_dl *)(SA_SIZE(addr) + (char *)addr); + if (addr->sin_addr.s_addr == dst->sin_addr.s_addr) { + printf("set: proxy entry exists for non 802 device\n"); + return (1); } - if (sdl->sdl_family != AF_LINK) { + if ((sdl->sdl_family != AF_LINK) || + (rtm->rtm_flags & RTF_GATEWAY) || + !valid_type(sdl->sdl_type)) { printf("cannot intuit interface index and type for %s\n", host); return (1); } @@ -436,7 +432,11 @@ delete(char *host, int do_proxy) dst = getaddr(host); if (dst == NULL) return (1); - dst->sin_other = do_proxy; + + /* + * Perform a regular entry delete first. + */ + flags &= ~RTF_ANNOUNCE; /* * setup the data structure to notify the kernel @@ -471,11 +471,16 @@ delete(char *host, int do_proxy) break; } - if (dst->sin_other & SIN_PROXY) { + /* + * Regualar entry delete failed, now check if there + * is a proxy-arp entry to remove. + */ + if (flags & RTF_ANNOUNCE) { fprintf(stderr, "delete: cannot locate %s\n",host); return (1); } - dst->sin_other = SIN_PROXY; + + flags |= RTF_ANNOUNCE; } rtm->rtm_flags |= RTF_LLDATA; if (rtmsg(RTM_DELETE, dst, NULL) != NULL) { @@ -485,6 +490,7 @@ delete(char *host, int do_proxy) return (1); } + /* * Search the arp table and do some action on matching entries */ -- cgit v1.1 From bff2fcd685c5e3bc06f62453cb455da887c770e2 Mon Sep 17 00:00:00 2001 From: gavin Date: Wed, 6 Jan 2010 20:54:04 +0000 Subject: MFC r200819: Grammar and minor tweaks to powerd(8) man page. PR: docs/133186 Approved by: ed (mentor, implicit) --- usr.sbin/powerd/powerd.8 | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/powerd/powerd.8 b/usr.sbin/powerd/powerd.8 index 7465410..5808862 100644 --- a/usr.sbin/powerd/powerd.8 +++ b/usr.sbin/powerd/powerd.8 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd December 24, 2008 +.Dd December 21, 2009 .Dt POWERD 8 .Os .Sh NAME @@ -45,7 +45,7 @@ The .Nm utility monitors the system state and sets various power control options accordingly. -It offers three modes (maximum, minimum, and adaptive) that can be +It offers four modes (maximum, minimum, adaptive and hiadaptive) that can be individually selected while on AC power or batteries. The modes maximum, minimum, adaptive and hiadaptive may be abbreviated max, min, adp, hadp. @@ -57,9 +57,10 @@ Adaptive mode attempts to strike a balance by degrading performance when the system appears idle and increasing it when the system is busy. It offers a good balance between a small performance loss for greatly increased power savings. -Hiadaptive mode is alike adaptive mode, but tuned for systems where +Hiadaptive mode is like adaptive mode, but tuned for systems where performance and interactivity are more important then power consumption. -It rises frequency faster, drops slower and keeps twice lower CPU load. +It increases frequency faster, reduces the frequency less aggressively and +will maintain full frequency for longer. The default mode is adaptive for battery power and hiadaptive for the rest. .Pp The -- cgit v1.1 From 82ce19b08666aae906c476cce056728b9c11a348 Mon Sep 17 00:00:00 2001 From: jhb Date: Tue, 12 Jan 2010 18:47:40 +0000 Subject: MFC 201202: Use reallocf() to simplify some logic. --- usr.sbin/arp/arp.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/arp/arp.c b/usr.sbin/arp/arp.c index 3142658..61427d3 100644 --- a/usr.sbin/arp/arp.c +++ b/usr.sbin/arp/arp.c @@ -499,7 +499,7 @@ search(u_long addr, action_fn *action) { int mib[6]; size_t needed; - char *lim, *buf, *newbuf, *next; + char *lim, *buf, *next; struct rt_msghdr *rtm; struct sockaddr_inarp *sin2; struct sockaddr_dl *sdl; @@ -522,13 +522,9 @@ search(u_long addr, action_fn *action) return 0; buf = NULL; for (;;) { - newbuf = realloc(buf, needed); - if (newbuf == NULL) { - if (buf != NULL) - free(buf); + buf = reallocf(buf, needed); + if (buf == NULL) errx(1, "could not reallocate memory"); - } - buf = newbuf; st = sysctl(mib, 6, buf, &needed, NULL, 0); if (st == 0 || errno != ENOMEM) break; -- cgit v1.1 From e81c61af7c971e97226d982f36b822048d20a8bd Mon Sep 17 00:00:00 2001 From: dougb Date: Fri, 15 Jan 2010 03:28:46 +0000 Subject: MFC r200416: Simplify handling of MTREEFILE relative to DESTDIR Make the message about a missing MTREEFILE combined with -U more informative MFC r200425: Over time things that used to be files/directories/links can change to something else. So add code to detect when things don't match and give the user choices about how to fix it. If we're using -P and something in the above check needs to be moved we need to have the directory there for it, so create it at the beginning and delete empty versions of it at the end. The case where something used to be a file or link and now is supposed to be a directory (e.g., /etc/security) is especially dangerous, so make failure to install a necessary directory in $DESTDIR a fatal error. MFC r200700: In the places where find is used that the user may see the results, first pipe it to sort so that order of processing will be deterministic and like things will be grouped together. MFC r200701: Fix an indentation issue, no functional changes MFC r200708: Fix a problem with how mergemaster handles the hard links for /.cshrc and /.profile. The problem is that install(1) will unlink the old file before it installs the new one, which means that in the best case we have to compare the changes for the old file twice. So, change the logic to first test to see if the link exists, then install the file. Then if the link was there and we're using -i, just create the link in /root and be done with it. Otherwise display the message to the user and give them the option. Because we are now sorting things before doing the comparison we can know conclusively that the files in / should be the sources, and the files in /root will be the targets, so adjust the paths accordingly. While I'm here, split a too-long error message into two lines and just return at the end of handling these files instead of setting the variable that says "do nothing" and then returning at the end of the function anyway. MFC r201291: Add some patches contributed by jhb: 1. Don't prompt the user for "-U but no db" error if we're using -a 2. Add an option to delete stale rc.d files automatically if the user has DELETE_STALE_RC_FILES in their rc file. Lack of command line option for this is not an oversight. 3. Add []'s around the terminal $ for the $FreeBSD$ test for -F For one bug raised by jhb I did a more thorough solution: There were a lot of things that "snuck in" between the end of the test for -r and the start of the comparison. One of them is the creation of the mtree db, as pointed out by jhb. Fix this problem more thoroughly by moving the end of the test down to where it should/used to be, right before the comparison. As a result, indent the interloping code to match. MFC r201292: Document the DELETE_STALE_RC_FILES option introduced in r201291. This is an "rc file only" option by design. While I'm here, update the comments in the example rc file to indicate which command line options they relate to, and correct the defaults for a couple of options. MFC r201293: It's not necessary to include both Op and Fl for command line options included in the text, so use only the latter. Clarify that using -U doesn't make sense in combination with -a MFC r201323: If we are using -p it does not make any sense to even create the MTREENEW file since it will never be used. MFC r201765: Update copyright date Update delete_temproot() to include the error message if it fails, and clean up the places where it's called. If there are no files left in temproot when the comparison is done delete it without prompting. This should make "automated" runs of mergemaster without -a a little easier. Document the new behavior in the man page. --- usr.sbin/mergemaster/mergemaster.8 | 65 ++++++---- usr.sbin/mergemaster/mergemaster.sh | 246 +++++++++++++++++++++++------------- 2 files changed, 199 insertions(+), 112 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/mergemaster/mergemaster.8 b/usr.sbin/mergemaster/mergemaster.8 index 2bff4be..c9217b3 100644 --- a/usr.sbin/mergemaster/mergemaster.8 +++ b/usr.sbin/mergemaster/mergemaster.8 @@ -1,4 +1,4 @@ -.\" Copyright (c) 1998-2009 Douglas Barton +.\" Copyright (c) 1998-2010 Douglas Barton .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd March 18, 2009 +.Dd January 7, 2010 .Dt MERGEMASTER 8 .Os .Sh NAME @@ -61,10 +61,10 @@ down, populating that environment with the various files. You can specify a different source directory with the -.Op Fl m +.Fl m command line option, or specify the destination directory with the -.Op Fl D +.Fl D option. It then compares each file in that environment to its installed counterpart. @@ -91,10 +91,10 @@ compares the files themselves. You can also specify that the script ignore the Id strings and compare every file with the -.Op Fl s +.Fl s option. Using the -.Op Fl F +.Fl F option .Nm will install the new file for you if they differ only by @@ -126,7 +126,7 @@ installing an updated version of this file you should probably run .Xr pwd_mkdb 8 with the -.Op Fl p +.Fl p option to rebuild your password databases and recreate .Pa /etc/passwd . @@ -165,6 +165,14 @@ which will be read before Options specified on the command line are updated last, and therefore can override both files. .Pp +When the comparison is done if there are any files remaining +in the temproot directory they will be listed, and if the +.Fl a +option is not in use the user will be given the option of +deleting the temproot directory. +If there are no files remaining in the temproot directory +it will be deleted. +.Pp The options are as follows: .Bl -tag -width Fl .It Fl s @@ -198,11 +206,13 @@ If the directory exists, it creates a new one in a previously non-existent directory. This option unsets the verbose flag, -but is compatible with all other options. +but other than +.Fl U +it is compatible with all other options. Setting -.Op Fl a +.Fl a makes -.Op Fl w +.Fl w superfluous. .It Fl h Display usage and help information. @@ -253,7 +263,7 @@ instead of the default Add the date and time to the name of the temporary root directory. If -.Op Fl t +.Fl t is specified, this option must follow it if you want the date added too. .It Fl u Ar N @@ -320,44 +330,44 @@ with all values commented out: # These are options for mergemaster, with their default values listed # The following options have command line overrides # -# The target architecture (unset by default) +# The target architecture (-A, unset by default) #ARCHSTRING='TARGET_ARCH=' # -# Sourcedir is the directory to do the 'make' in (where the new files are) +# Sourcedir is the directory to do the 'make' in (-m) #SOURCEDIR='/usr/src' # -# Directory to install the temporary root environment into +# Directory to install the temporary root environment into (-t) #TEMPROOT='/var/tmp/temproot' # -# Specify the destination directory for the installed files +# Specify the destination directory for the installed files (-D) #DESTDIR= # -# Strict comparison skips the VCS Id test and compares every file +# Strict comparison skips the VCS Id test and compares every file (-s) #STRICT=no # -# Type of diff, such as unified, context, etc. +# Type of diff, such as unified, context, etc. (-c) #DIFF_FLAG='-u' # -# Install the new file if it differs only by VCS Id ($FreeBSD) +# Install the new file if it differs only by VCS Id ($FreeBSD, -F) #FREEBSD_ID= # -# Verbose mode includes more details and additional checks +# Verbose mode includes more details and additional checks (-v) #VERBOSE= # -# Automatically install files that do not exist on the system already +# Automatically install files that do not exist on the system already (-i) #AUTO_INSTALL= # -# Automatically upgrade files that have not been user modified +# Automatically upgrade files that have not been user modified (-U) #AUTO_UPGRADE= # -# Compare /etc/rc.conf[.local] to /etc/defaults/rc.conf -#COMP_CONFS=yes +# Compare /etc/rc.conf[.local] to /etc/defaults/rc.conf (-C) +#COMP_CONFS= # -# Preserve files that you replace -#PRESERVE_FILES=yes +# Preserve files that you replace (-P) +#PRESERVE_FILES= #PRESERVE_FILES_DIR=/var/tmp/mergemaster/preserved-files-`date +%y%m%d-%H%M%S` # -# The umask for mergemaster to compare the default file's modes to +# The umask for mergemaster to compare the default file's modes to (-u) #NEW_UMASK=022 # # The following options have no command line overrides @@ -377,6 +387,9 @@ with all values commented out: # If you set 'yes' above, make sure to include the PATH to your pager #PATH=/bin:/usr/bin:/usr/sbin # +# Delete stale files in /etc/rc.d without prompting +#DELETE_STALE_RC_FILES= +# # Specify the path to scripts to run before the comparison starts, # and/or after the script has finished its work #MM_PRE_COMPARE_SCRIPT= diff --git a/usr.sbin/mergemaster/mergemaster.sh b/usr.sbin/mergemaster/mergemaster.sh index 4d2ffc0..1b0753b 100755 --- a/usr.sbin/mergemaster/mergemaster.sh +++ b/usr.sbin/mergemaster/mergemaster.sh @@ -5,7 +5,7 @@ # Compare files created by /usr/src/etc/Makefile (or the directory # the user specifies) with the currently installed copies. -# Copyright 1998-2009 Douglas Barton +# Copyright 1998-2010 Douglas Barton # DougB@FreeBSD.org # $FreeBSD$ @@ -263,7 +263,7 @@ fi # Assign the location of the mtree database # -MTREEDB=${MTREEDB:-/var/db} +MTREEDB=${MTREEDB:-${DESTDIR}/var/db} MTREEFILE="${MTREEDB}/mergemaster.mtree" # Check the command line options @@ -345,18 +345,24 @@ done # Don't force the user to set this in the mergemaster rc file if [ -n "${PRESERVE_FILES}" -a -z "${PRESERVE_FILES_DIR}" ]; then PRESERVE_FILES_DIR=/var/tmp/mergemaster/preserved-files-`date +%y%m%d-%H%M%S` + mkdir -p ${PRESERVE_FILES_DIR} fi # Check for the mtree database in DESTDIR case "${AUTO_UPGRADE}" in '') ;; # If the option is not set no need to run the test or warn the user *) - if [ ! -s "${DESTDIR}${MTREEFILE}" ]; then + if [ ! -s "${MTREEFILE}" ]; then echo '' - echo "*** Unable to find mtree database. Skipping auto-upgrade on this run." + echo "*** Unable to find mtree database (${MTREEFILE})." + echo " Skipping auto-upgrade on this run." echo " It will be created for the next run when this one is complete." echo '' - press_to_continue + case "${AUTO_RUN}" in + '') + press_to_continue + ;; + esac unset AUTO_UPGRADE fi ;; @@ -463,9 +469,9 @@ MM_MAKE="make ${ARCHSTRING} -m ${SOURCEDIR}/share/mk" # Check DESTDIR against the mergemaster mtree database to see what # files the user changed from the reference files. # -if [ -n "${AUTO_UPGRADE}" -a -s "${DESTDIR}${MTREEFILE}" ]; then +if [ -n "${AUTO_UPGRADE}" -a -s "${MTREEFILE}" ]; then CHANGED=: - for file in `mtree -eqL -f ${DESTDIR}${MTREEFILE} -p ${DESTDIR}/ \ + for file in `mtree -eqL -f ${MTREEFILE} -p ${DESTDIR}/ \ 2>/dev/null | awk '($2 == "changed") {print $1}'`; do if [ -f "${DESTDIR}/$file" ]; then CHANGED="${CHANGED}${DESTDIR}/${file}:" @@ -505,7 +511,7 @@ CVS_ID_TAG=FreeBSD delete_temproot () { rm -rf "${TEMPROOT}" 2>/dev/null chflags -R 0 "${TEMPROOT}" 2>/dev/null - rm -rf "${TEMPROOT}" || exit 1 + rm -rf "${TEMPROOT}" || { echo "*** Unable to delete ${TEMPROOT}"; exit 1; } } case "${RERUN}" in @@ -536,7 +542,7 @@ case "${RERUN}" in echo '' echo " *** Deleting the old ${TEMPROOT}" echo '' - delete_temproot || exit 1 + delete_temproot unset TEST_TEMP_ROOT ;; [tT]) @@ -663,31 +669,32 @@ case "${RERUN}" in for file in ${IGNORE_FILES}; do test -e ${TEMPROOT}/${file} && unlink ${TEMPROOT}/${file} done - ;; # End of the "RERUN" test -esac -# We really don't want to have to deal with files like login.conf.db, pwd.db, -# or spwd.db. Instead, we want to compare the text versions, and run *_mkdb. -# Prompt the user to do so below, as needed. -# -rm -f ${TEMPROOT}/etc/*.db ${TEMPROOT}/etc/passwd + # We really don't want to have to deal with files like login.conf.db, pwd.db, + # or spwd.db. Instead, we want to compare the text versions, and run *_mkdb. + # Prompt the user to do so below, as needed. + # + rm -f ${TEMPROOT}/etc/*.db ${TEMPROOT}/etc/passwd -# We only need to compare things like freebsd.cf once -find ${TEMPROOT}/usr/obj -type f -delete 2>/dev/null + # We only need to compare things like freebsd.cf once + find ${TEMPROOT}/usr/obj -type f -delete 2>/dev/null -# Delete stuff we do not need to keep the mtree database small, -# and to make the actual comparison faster. -find ${TEMPROOT}/usr -type l -delete 2>/dev/null -find ${TEMPROOT} -type f -size 0 -delete 2>/dev/null -find -d ${TEMPROOT} -type d -empty -delete 2>/dev/null + # Delete stuff we do not need to keep the mtree database small, + # and to make the actual comparison faster. + find ${TEMPROOT}/usr -type l -delete 2>/dev/null + find ${TEMPROOT} -type f -size 0 -delete 2>/dev/null + find -d ${TEMPROOT} -type d -empty -delete 2>/dev/null -# Build the mtree database in a temporary location. -MTREENEW=`mktemp -t mergemaster.mtree` -case "${PRE_WORLD}" in -'') mtree -ci -p ${TEMPROOT} -k size,md5digest > ${MTREENEW} 2>/dev/null - ;; -*) # We don't want to mess with the mtree database on a pre-world run. - ;; + # Build the mtree database in a temporary location. + case "${PRE_WORLD}" in + '') MTREENEW=`mktemp -t mergemaster.mtree` + mtree -ci -p ${TEMPROOT} -k size,md5digest > ${MTREENEW} 2>/dev/null + ;; + *) # We don't want to mess with the mtree database on a pre-world run or + # when re-scanning a previously-built tree. + ;; + esac + ;; # End of the "RERUN" test esac # Get ready to start comparing files @@ -818,7 +825,8 @@ mm_install () { if [ -n "${DESTDIR}${INSTALL_DIR}" -a ! -d "${DESTDIR}${INSTALL_DIR}" ]; then DIR_MODE=`find_mode "${TEMPROOT}/${INSTALL_DIR}"` - install -d -o root -g wheel -m "${DIR_MODE}" "${DESTDIR}${INSTALL_DIR}" + install -d -o root -g wheel -m "${DIR_MODE}" "${DESTDIR}${INSTALL_DIR}" || + install_error $1 ${DESTDIR}${INSTALL_DIR} fi FILE_MODE=`find_mode "${1}"` @@ -837,32 +845,39 @@ mm_install () { DONT_INSTALL=yes ;; /.cshrc | /.profile) - case "${AUTO_INSTALL}" in - '') - case "${LINK_EXPLAINED}" in - '') - echo " *** Historically BSD derived systems have had a" - echo " hard link from /.cshrc and /.profile to" - echo " their namesakes in /root. Please indicate" - echo " your preference below for bringing your" - echo " installed files up to date." - echo '' - LINK_EXPLAINED=yes - ;; - esac + local st_nlink - echo " Use 'd' to delete the temporary ${COMPFILE}" - echo " Use 'l' to delete the existing ${DESTDIR}${COMPFILE#.} and create the link" - echo '' - echo " Default is to leave the temporary file to deal with by hand" - echo '' - echo -n " How should I handle ${COMPFILE}? [Leave it to install later] " - read HANDLE_LINK - ;; - *) # Part of AUTO_INSTALL - HANDLE_LINK=l - ;; - esac + # install will unlink the file before it installs the new one, + # so we have to restore/create the link afterwards. + # + st_nlink=0 # In case the file does not yet exist + eval $(stat -s ${DESTDIR}${COMPFILE#.} 2>/dev/null) + + do_install_and_rm "${FILE_MODE}" "${1}" "${DESTDIR}${INSTALL_DIR}" + + if [ -n "${AUTO_INSTALL}" -a $st_nlink -gt 1 ]; then + HANDLE_LINK=l + else + case "${LINK_EXPLAINED}" in + '') + echo " *** Historically BSD derived systems have had a" + echo " hard link from /.cshrc and /.profile to" + echo " their namesakes in /root. Please indicate" + echo " your preference below for bringing your" + echo " installed files up to date." + echo '' + LINK_EXPLAINED=yes + ;; + esac + + echo " Use 'd' to delete the temporary ${COMPFILE}" + echo " Use 'l' to delete the existing ${DESTDIR}/root/${COMPFILE##*/} and create the link" + echo '' + echo " Default is to leave the temporary file to deal with by hand" + echo '' + echo -n " How should I handle ${COMPFILE}? [Leave it to install later] " + read HANDLE_LINK + fi case "${HANDLE_LINK}" in [dD]*) @@ -872,19 +887,19 @@ mm_install () { ;; [lL]*) echo '' - rm -f "${DESTDIR}${COMPFILE#.}" - if ln "${DESTDIR}/root/${COMPFILE##*/}" "${DESTDIR}${COMPFILE#.}"; then + unlink ${DESTDIR}/root/${COMPFILE##*/} + if ln ${DESTDIR}${COMPFILE#.} ${DESTDIR}/root/${COMPFILE##*/}; then echo " *** Link from ${DESTDIR}${COMPFILE#.} to ${DESTDIR}/root/${COMPFILE##*/} installed successfully" - rm "${COMPFILE}" else - echo " *** Error linking ${DESTDIR}${COMPFILE#.} to ${DESTDIR}/root/${COMPFILE##*/}, ${COMPFILE} will remain to install by hand" + echo " *** Error linking ${DESTDIR}${COMPFILE#.} to ${DESTDIR}/root/${COMPFILE##*/}" + echo " *** ${COMPFILE} will remain for your consideration" fi ;; *) echo " *** ${COMPFILE} will remain for your consideration" ;; esac - DONT_INSTALL=yes + return ;; esac @@ -955,6 +970,12 @@ if [ -z "${PRE_WORLD}" -a -z "${RERUN}" ]; then esac sleep 2 ;; + *) + if [ -n "${DELETE_STALE_RC_FILES}" ]; then + echo ' *** Deleting ... ' + rm ${STALE_RC_FILES} + echo ' done.' + fi esac ;; esac @@ -967,7 +988,58 @@ if [ -r "${MM_PRE_COMPARE_SCRIPT}" ]; then . "${MM_PRE_COMPARE_SCRIPT}" fi -for COMPFILE in `find . -type f`; do +# Things that were files/directories/links in one version can sometimes +# change to something else in a newer version. So we need to explicitly +# test for this, and warn the user if what we find does not match. +# +for COMPFILE in `find . | sort` ; do + if [ -e "${DESTDIR}${COMPFILE#.}" ]; then + INSTALLED_TYPE=`stat -f '%HT' ${DESTDIR}${COMPFILE#.}` + else + continue + fi + TEMPROOT_TYPE=`stat -f '%HT' $COMPFILE` + + if [ ! "$TEMPROOT_TYPE" = "$INSTALLED_TYPE" ]; then + [ "$COMPFILE" = '.' ] && continue + TEMPROOT_TYPE=`echo $TEMPROOT_TYPE | tr [:upper:] [:lower:]` + INSTALLED_TYPE=`echo $INSTALLED_TYPE | tr [:upper:] [:lower:]` + + echo "*** The installed file ${DESTDIR}${COMPFILE#.} has the type \"$INSTALLED_TYPE\"" + echo " but the new version has the type \"$TEMPROOT_TYPE\"" + echo '' + echo " How would you like to handle this?" + echo '' + echo " Use 'r' to remove ${DESTDIR}${COMPFILE#.}" + case "$TEMPROOT_TYPE" in + 'symbolic link') + TARGET=`readlink $COMPFILE` + echo " and create a link to $TARGET in its place" ;; + *) echo " You will be able to install it as a \"$TEMPROOT_TYPE\"" ;; + esac + echo '' + echo " Use 'i' to ignore this" + echo '' + echo -n " How to proceed? [i] " + read ANSWER + case "$ANSWER" in + [rR]) case "${PRESERVE_FILES}" in + [Yy][Ee][Ss]) + mv ${DESTDIR}${COMPFILE#.} ${PRESERVE_FILES_DIR}/ || exit 1 ;; + *) rm -rf ${DESTDIR}${COMPFILE#.} ;; + esac + case "$TEMPROOT_TYPE" in + 'symbolic link') ln -sf $TARGET ${DESTDIR}${COMPFILE#.} ;; + esac ;; + *) echo '' + echo "*** See the man page about adding ${COMPFILE#.} to the list of IGNORE_FILES" + press_to_continue ;; + esac + echo '' + fi +done + +for COMPFILE in `find . -type f | sort`; do # First, check to see if the file exists in DESTDIR. If not, the # diff_loop function knows how to handle it. @@ -1032,7 +1104,7 @@ for COMPFILE in `find . -type f`; do # If the user chose the -F option, test for that before proceeding # if [ -n "$FREEBSD_ID" ]; then - if diff -q -I'[$]FreeBSD:.*$' "${DESTDIR}${COMPFILE#.}" "${COMPFILE}" > \ + if diff -q -I'[$]FreeBSD.*[$]' "${DESTDIR}${COMPFILE#.}" "${COMPFILE}" > \ /dev/null 2>&1; then if mm_install "${COMPFILE}"; then echo "*** Updated revision control Id for ${DESTDIR}${COMPFILE#.}" @@ -1061,8 +1133,8 @@ echo "*** Comparison complete" if [ -s "${MTREENEW}" ]; then echo "*** Saving mtree database for future upgrades" - test -e "${DESTDIR}${MTREEFILE}" && unlink ${DESTDIR}${MTREEFILE} - mv ${MTREENEW} ${DESTDIR}${MTREEFILE} + test -e "${MTREEFILE}" && unlink ${MTREEFILE} + mv ${MTREENEW} ${MTREEFILE} fi echo '' @@ -1070,30 +1142,28 @@ echo '' TEST_FOR_FILES=`find ${TEMPROOT} -type f -size +0 2>/dev/null` if [ -n "${TEST_FOR_FILES}" ]; then echo "*** Files that remain for you to merge by hand:" - find "${TEMPROOT}" -type f -size +0 + find "${TEMPROOT}" -type f -size +0 | sort echo '' -fi - -case "${AUTO_RUN}" in -'') - echo -n "Do you wish to delete what is left of ${TEMPROOT}? [no] " - read DEL_TEMPROOT - case "${DEL_TEMPROOT}" in - [yY]*) - if delete_temproot; then - echo " *** ${TEMPROOT} has been deleted" - else - echo " *** Unable to delete ${TEMPROOT}" - fi - ;; - *) - echo " *** ${TEMPROOT} will remain" + case "${AUTO_RUN}" in + '') + echo -n "Do you wish to delete what is left of ${TEMPROOT}? [no] " + read DEL_TEMPROOT + case "${DEL_TEMPROOT}" in + [yY]*) + delete_temproot + ;; + *) + echo " *** ${TEMPROOT} will remain" + ;; + esac ;; + *) ;; esac - ;; -*) ;; -esac +else + echo "*** ${TEMPROOT} is empty, deleting" + delete_temproot +fi case "${AUTO_INSTALLED_FILES}" in '') ;; @@ -1268,5 +1338,9 @@ case "${PRE_WORLD}" in ;; esac -exit 0 +if [ -n "${PRESERVE_FILES}" ]; then + find -d $PRESERVE_FILES_DIR -type d -empty -delete 2>/dev/null + rmdir $PRESERVE_FILES_DIR 2>/dev/null +fi +exit 0 -- cgit v1.1 From 03e5768749a2a74d505e014d4ea8cf992afd1dee Mon Sep 17 00:00:00 2001 From: marck Date: Fri, 15 Jan 2010 12:02:22 +0000 Subject: MFH r201051: To remove a server, one should use double backslash, and half of them are eaten by shell. Fix this. --- usr.sbin/ypserv/ypinit.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/ypserv/ypinit.sh b/usr.sbin/ypserv/ypinit.sh index 1be7e0e..374308e 100644 --- a/usr.sbin/ypserv/ypinit.sh +++ b/usr.sbin/ypserv/ypinit.sh @@ -298,7 +298,7 @@ do echo "Update the list of hosts running YP servers in domain ${DOMAIN}." echo "Master for this domain is ${MASTER_NAME}." echo "" - echo "First verify old servers, type \\ to remove a server." + echo "First verify old servers, type \\\\ to remove a server." echo "Then add new servers, one per line. When done type a ." echo "" echo " master server : ${HOST}" -- cgit v1.1 From d6a771ac989f3aa00ab1bb6efc0eafca07d36f3c Mon Sep 17 00:00:00 2001 From: gavin Date: Sun, 17 Jan 2010 11:20:53 +0000 Subject: Merge r197308 from head (originally by ed): Spell Israel correctly. PR: bin/138580 Submitted by: Alexey Savartsov Approved by: ed (mentor, implicit) --- usr.sbin/sysinstall/menus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/sysinstall/menus.c b/usr.sbin/sysinstall/menus.c index 9376ed1..8ea7f98 100644 --- a/usr.sbin/sysinstall/menus.c +++ b/usr.sbin/sysinstall/menus.c @@ -691,7 +691,7 @@ DMenu MenuMediaFTP = { { " Ireland #3", "ftp3.ie.freebsd.org", NULL, dmenuSetVariable, NULL, VAR_FTP_PATH "=ftp://ftp3.ie.freebsd.org" }, - { "Isreal", "ftp.il.freebsd.org", NULL, dmenuSetVariable, NULL, + { "Israel", "ftp.il.freebsd.org", NULL, dmenuSetVariable, NULL, VAR_FTP_PATH "=ftp://ftp.il.freebsd.org" }, { "Italy", "ftp.it.freebsd.org", NULL, dmenuSetVariable, NULL, -- cgit v1.1 From 43c4cb0d736fc21594886905e74e4431058bd2e9 Mon Sep 17 00:00:00 2001 From: bz Date: Sun, 17 Jan 2010 15:20:34 +0000 Subject: MFC r201806: Switch traceroute over to make use of proper in-kernel source address selection. Reviewed by: rwatson, fenner PR: kern/139454 Tested by: Frank Steinborn (steinex nognu.de) MFC r201897: Correct spelling. Submitted by: (pluknet gmail.com) --- usr.sbin/traceroute/Makefile | 4 +- usr.sbin/traceroute/findsaddr-udp.c | 94 +++++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+), 2 deletions(-) create mode 100644 usr.sbin/traceroute/findsaddr-udp.c (limited to 'usr.sbin') diff --git a/usr.sbin/traceroute/Makefile b/usr.sbin/traceroute/Makefile index 489c862..3898af8 100644 --- a/usr.sbin/traceroute/Makefile +++ b/usr.sbin/traceroute/Makefile @@ -5,7 +5,7 @@ TRACEROUTE_DISTDIR?= ${.CURDIR}/../../contrib/traceroute PROG= traceroute MAN= traceroute.8 -SRCS= as.c version.c traceroute.c ifaddrlist.c findsaddr-socket.c +SRCS= as.c version.c traceroute.c ifaddrlist.c findsaddr-udp.c BINOWN= root BINMODE=4555 CLEANFILES= version.c @@ -29,7 +29,7 @@ DPADD= ${LIBIPSEC} LDADD= -lipsec .endif -CFLAGS+= -I${TRACEROUTE_DISTDIR}/lbl +CFLAGS+= -I${TRACEROUTE_DISTDIR}/lbl -I${TRACEROUTE_DISTDIR} version.c: ${TRACEROUTE_DISTDIR}/VERSION @rm -f ${.TARGET} diff --git a/usr.sbin/traceroute/findsaddr-udp.c b/usr.sbin/traceroute/findsaddr-udp.c new file mode 100644 index 0000000..3da72c7 --- /dev/null +++ b/usr.sbin/traceroute/findsaddr-udp.c @@ -0,0 +1,94 @@ +/*- + * Copyright (c) 2010 Bjoern A. Zeeb + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include + +#include +#include + +#include + +#include "findsaddr.h" +#include "traceroute.h" + +/* + * Return the source address for the given destination address. + * + * This makes use of proper source address selection in the FreeBSD kernel + * even taking jails into account (sys/netinet/in_pcb.c:in_pcbladdr()). + * We open a UDP socket, and connect to the destination, letting the kernel + * do the bind and then read the source IPv4 address using getsockname(2). + * This has multiple advantages: no need to do PF_ROUTE operations possibly + * needing special privileges, jails properly taken into account and most + * important - getting the result the kernel would give us rather than + * best-guessing ourselves. + */ +const char * +findsaddr(register const struct sockaddr_in *to, + register struct sockaddr_in *from) +{ + const char *errstr; + struct sockaddr_in cto, cfrom; + int s; + socklen_t len; + + s = socket(AF_INET, SOCK_DGRAM, 0); + if (s == -1) + return ("failed to open DGRAM socket for src addr selection."); + + errstr = NULL; + len = sizeof(struct sockaddr_in); + memcpy(&cto, to, len); + cto.sin_port = htons(65535); /* Dummy port for connect(2). */ + if (connect(s, (struct sockaddr *)&cto, len) == -1) { + errstr = "failed to connect to peer for src addr selection."; + goto err; + } + + if (getsockname(s, (struct sockaddr *)&cfrom, &len) == -1) { + errstr = "failed to get socket name for src addr selection."; + goto err; + } + + if (len != sizeof(struct sockaddr_in) || cfrom.sin_family != AF_INET) { + errstr = "unexpected address family in src addr selection."; + goto err; + } + + /* Update source address for traceroute. */ + setsin(from, cfrom.sin_addr.s_addr); + +err: + (void) close(s); + + /* No error (string) to return. */ + return (errstr); +} + +/* end */ -- cgit v1.1 From 6ebbadd626328cce7cbf94c168b2811417846d06 Mon Sep 17 00:00:00 2001 From: brueffer Date: Sun, 17 Jan 2010 17:31:53 +0000 Subject: MFC: r201870, r201889 Various language fixes. Also fixed the URL to totd, obtained from NetBSD. --- usr.sbin/faithd/faithd.8 | 87 +++++++++++++++++++++++------------------------- 1 file changed, 42 insertions(+), 45 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/faithd/faithd.8 b/usr.sbin/faithd/faithd.8 index 5d16989..eef3ed9 100644 --- a/usr.sbin/faithd/faithd.8 +++ b/usr.sbin/faithd/faithd.8 @@ -29,7 +29,7 @@ .\" .\" $FreeBSD$ .\" -.Dd May 17, 1998 +.Dd January 9, 2010 .Dt FAITHD 8 .Os .Sh NAME @@ -44,20 +44,18 @@ .Sh DESCRIPTION The .Nm -utility provides IPv6-to-IPv4 TCP relay. -It must be used on an IPv4/v6 dual stack router. +utility provides IPv6-to-IPv4 TCP relaying. +It can only be used on an IPv4/v6 dual stack router. .Pp When .Nm receives .Tn TCPv6 -traffic, -.Nm -will relay the +traffic, it will relay the .Tn TCPv6 traffic to .Tn TCPv4 . -Destination for relayed +The destination for the relayed .Tn TCPv4 connection will be determined by the last 4 octets of the original .Tn IPv6 @@ -73,14 +71,14 @@ destination address is the traffic will be relayed to IPv4 destination .Li 10.1.1.1 . .Pp -To use +To use the .Nm translation service, an IPv6 address prefix must be reserved for mapping IPv4 addresses into. -Kernel must be properly configured to route all the TCP connection +The kernel must be properly configured to route all the TCP connections toward the reserved IPv6 address prefix into the .Xr faith 4 -pseudo interface, by using +pseudo interface, using the .Xr route 8 command. Also, @@ -91,7 +89,7 @@ to .Dv 1 . .Pp The router must be configured to capture all the TCP traffic -toward reserved +for the reserved .Tn IPv6 address prefix, by using .Xr route 8 @@ -101,21 +99,20 @@ commands. .Pp The .Nm -utility needs a special name-to-address translation logic, so that -hostnames gets resolved into special +utility needs special name-to-address translation logic, so that +hostnames get resolved into the special .Tn IPv6 address prefix. -For small-scale installation, use -.Xr hosts 5 . -For large-scale installation, it is useful to have +For small-scale installations, use +.Xr hosts 5 ; +For large-scale installations, it is useful to have a DNS server with special address translation support. An implementation called .Nm totd -is available -at -.Pa http://www.vermicelli.pasta.cs.uit.no/ipv6/software.html . -Make sure you do not propagate translated DNS records to normal DNS cloud, -it is highly harmful. +is available at +.Pa http://www.vermicelli.pasta.cs.uit.no/software/totd.html . +Make sure you do not propagate translated DNS records over to normal +DNS, as it can cause severe problems. .Ss Daemon mode When .Nm @@ -150,9 +147,9 @@ to you can run local daemons on the router. The .Nm -utility will invoke local daemon at +utility will invoke a local daemon at .Ar serverpath -if the destination address is local interface address, +if the destination address is a local interface address, and will perform translation to IPv4 TCP in other cases. You can also specify .Ar serverargs @@ -182,7 +179,7 @@ The .Nm utility includes special support for protocols used by .Xr ftp 1 . -When translating FTP protocol, +When translating the FTP protocol, .Nm translates network level addresses in .Li PORT/LPRT/EPRT @@ -191,8 +188,8 @@ and commands. .Pp Inactive sessions will be disconnected in 30 minutes, -to avoid stale sessions from chewing up resources. -This may be inappropriate for some of the services +to prevent stale sessions from chewing up resources. +This may be inappropriate for some services (should this be configurable?). .Ss inetd mode When @@ -200,13 +197,13 @@ When is invoked via .Xr inetd 8 , .Nm -will handle connection passed from standard input. +will handle connections passed from standard input. If the connection endpoint is in the reserved IPv6 address prefix, .Nm will relay the connection. Otherwise, .Nm -will invoke service-specific daemon like +will invoke a service-specific daemon like .Xr telnetd 8 , by using the command argument passed from .Xr inetd 8 . @@ -219,16 +216,16 @@ For example, if .Nm is invoked via .Xr inetd 8 -on FTP port, it will operate as a FTP relay. +on the FTP port, it will operate as an FTP relay. .Pp The operation mode requires special support for .Nm in .Xr inetd 8 . .Ss Access control -To prevent malicious accesses, +To prevent malicious access, .Nm -implements a simple address-based access control. +implements simple address-based access control. With .Pa /etc/faithd.conf (or @@ -239,7 +236,7 @@ specified by will avoid relaying unwanted traffic. The .Pa faithd.conf -contains directives with the following format: +configuration file contains directives of the following format: .Bl -bullet .It .Ar src Ns / Ns Ar slen Cm deny Ar dst Ns / Ns Ar dlen @@ -281,6 +278,7 @@ on error. .Sh EXAMPLES Before invoking .Nm , +the .Xr faith 4 interface has to be configured properly. .Bd -literal -offset @@ -334,12 +332,12 @@ ssh stream tcp6/faith nowait root faithd /usr/sbin/sshd -i .Ed .Pp .Xr inetd 8 -will open listening sockets with enabling kernel TCP relay support. -Whenever connection comes in, +will open listening sockets with kernel TCP relay support enabled. +Whenever a connection comes in, .Nm will be invoked by .Xr inetd 8 . -If it the connection endpoint is in the reserved IPv6 address prefix. +If the connection endpoint is in the reserved IPv6 address prefix. The .Nm utility will relay the connection. @@ -377,12 +375,12 @@ setting. .Sh HISTORY The .Nm -utility first appeared in WIDE Hydrangea IPv6 protocol stack kit. +utility first appeared in the WIDE Hydrangea IPv6 protocol stack kit. .\" .Pp IPv6 and IPsec support based on the KAME Project (http://www.kame.net/) stack was initially integrated into -.Fx 4.0 +.Fx 4.0 . .Sh SECURITY CONSIDERATIONS It is very insecure to use IP-address based authentication, for connections relayed by .Nm , @@ -392,16 +390,15 @@ Administrators are advised to limit accesses to .Nm using .Pa faithd.conf , -or by using IPv6 packet filters. -It is to protect +or by using IPv6 packet filters, to protect the .Nm -service from malicious parties and avoid theft of service/bandwidth. -IPv6 destination address can be limited by -carefully configuring routing entries that points to +service from malicious parties, and to avoid theft of service/bandwidth. +IPv6 destination addresses can be limited by +carefully configuring routing entries that point to .Xr faith 4 , using .Xr route 8 . -IPv6 source address needs to be filtered by using packet filters. -Documents listed in +The IPv6 source address needs to be filtered using packet filters. +The documents listed in .Sx SEE ALSO -have more discussions on this topic. +have more information on this topic. -- cgit v1.1 From 2e685ed5b8aaeaba261d057d2655b69822965dba Mon Sep 17 00:00:00 2001 From: thompsa Date: Sun, 17 Jan 2010 18:22:42 +0000 Subject: MFC r202181,202243,202270 Add a driver by Fredrik Lindberg for Option HSDPA USB devices. These differ from standard 3G wireless units by supplying a raw IP/IPv6 endpoint rather than using PPP over serial. uhsoctl(1) is used to initiate and close the WAN connection. Obtained from: Fredrik Lindberg --- usr.sbin/Makefile | 2 + usr.sbin/uhsoctl/Makefile | 10 + usr.sbin/uhsoctl/uhsoctl.1 | 104 +++ usr.sbin/uhsoctl/uhsoctl.c | 1532 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 1648 insertions(+) create mode 100644 usr.sbin/uhsoctl/Makefile create mode 100644 usr.sbin/uhsoctl/uhsoctl.1 create mode 100644 usr.sbin/uhsoctl/uhsoctl.c (limited to 'usr.sbin') diff --git a/usr.sbin/Makefile b/usr.sbin/Makefile index 99a0620..5df3ca2 100644 --- a/usr.sbin/Makefile +++ b/usr.sbin/Makefile @@ -189,6 +189,7 @@ SUBDIR= ${_ac} \ tzsetup \ ${_uathload} \ ugidfw \ + ${_uhsoctl} \ ${_usbdevs} \ ${_usbconfig} \ ${_vidcontrol} \ @@ -410,6 +411,7 @@ _crunch= crunch .if ${MACHINE_ARCH} != "ia64" _uathload= uathload .endif +_uhsoctl= uhsoctl #_usbdevs= usbdevs _usbconfig= usbconfig .endif diff --git a/usr.sbin/uhsoctl/Makefile b/usr.sbin/uhsoctl/Makefile new file mode 100644 index 0000000..9704923 --- /dev/null +++ b/usr.sbin/uhsoctl/Makefile @@ -0,0 +1,10 @@ +# $FreeBSD$ + +PROG= uhsoctl +MAN= uhsoctl.1 +WARNS= 1 + +DPADD= ${LIBUTIL} +LDADD= -lutil + +.include diff --git a/usr.sbin/uhsoctl/uhsoctl.1 b/usr.sbin/uhsoctl/uhsoctl.1 new file mode 100644 index 0000000..932cefa --- /dev/null +++ b/usr.sbin/uhsoctl/uhsoctl.1 @@ -0,0 +1,104 @@ +.\" Copyright (c) 2008-2009 Fredrik Lindberg +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd Aug 12, 2009 +.Os +.Dt UHSOCTL 1 +.Sh NAME +.Nm uhsoctl +.Nd connection utility for Option based devices +.Sh SYNOPSIS +.Nm +.Op Fl a Ar apn +.Op Fl c Ar cid +.Op Fl p Ar pin +.Op Fl u Ar username +.Op Fl k Ar password +.Op Fl r Ar path +.Op Fl f Ar path +.Op Fl b | n +.Ar interface +.Nm +.Fl d +.Ar interface +.Nm +.Fl h +.Sh DESCRIPTION +.Nm +is a small connection utility for Option N.V. devices that are based on Options +packet interface and uses proprietary AT_* calls to establish connections. +The utility (tries to) configure both default route and name servers +(/etc/resolv.conf). +.Pp +By default +.Nm +detaches from the terminal upon on a successful connection, a few command-line +options exists that allows this behavior to be changed. +.Pp +.Nm +attempts to find a usable controlling serial port based on the provided network +interface. +If this fails you might specify a serial port manually. +.Sh OPTIONS +.Bl -tag -width XXXX +.It Fl a Ar apn +Specify APN to connect to. +.It Fl c Ar cid +Specify CID (Context ID) to use, by default CID 1 is used. +If an APN has been configured once, it's enough to specify the CID used for +further accesses. +.It Fl p Ar pin +Specify SIM PIN. +.It Fl u Ar username +Specify username. +.It Fl k Ar password +Specify username. +.It Fl r Ar path +Path to resolv.conf, default /etc/resolv.conf. +Use /dev/null to disable updating of name servers. +.It Fl f Ar path +Explicitly set the serial port to use as controlling terminal. +Might be needed if the automatic detection fails. +.It Fl b +Fork into background directly, before a connection has been established. +.It Fl n +Never fork into background, run entirely in foreground. +.El +.Sh EXAMPLES +Connect to +.Dq Li apn.example.com +on interface +.Dq Li uhso0 +and use PIN +.Dq 1234 +to enable the SIM card. + +.Dl "uhsoctl -a apn.example.com -p 1234 uhso0" + +Disconnect from a previously established connection + +.Dl "uhsoctl -d uhso0" +.Sh SEE ALSO +.Xr uhso 4 diff --git a/usr.sbin/uhsoctl/uhsoctl.c b/usr.sbin/uhsoctl/uhsoctl.c new file mode 100644 index 0000000..6ed5245 --- /dev/null +++ b/usr.sbin/uhsoctl/uhsoctl.c @@ -0,0 +1,1532 @@ +/*- + * Copyright (c) 2008-2009 Fredrik Lindberg + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Connection utility to ease connectivity using the raw IP packet interface + * available on uhso(4) devices. + */ + +#define TTY_NAME "/dev/%s" +#define SYSCTL_TEST "dev.uhso.%d.%%driver" +#define SYSCTL_PORTS "dev.uhso.%d.ports" +#define SYSCTL_NETIF "dev.uhso.%d.netif" +#define SYSCTL_NAME_TTY "dev.uhso.%d.port.%s.tty" +#define SYSCTL_NAME_DESC "dev.uhso.%d.port.%s.desc" +#define RESOLV_PATH "/etc/resolv.conf" +#define PIDFILE "/var/run/uhsoctl.%s.pid" + +static const char *network_access_type[] = { + "GSM", + "Compact GSM", + "UMTS", + "GSM (EGPRS)", + "HSDPA", + "HSUPA", + "HSDPA/HSUPA" +}; + +static const char *network_reg_status[] = { + "Not registered", + "Registered", + "Searching for network", + "Network registration denied", + "Unknown", + "Registered (roaming)" +}; + +struct ctx { + int fd; + int flags; +#define IPASSIGNED 0x01 +#define FLG_NODAEMON 0x02 /* Don't detach from terminal */ +#define FLG_DAEMON 0x04 /* Running as daemon */ +#define FLG_DELAYED 0x08 /* Fork into background after connect */ +#define FLG_NEWDATA 0x10 +#define FLG_WATCHDOG 0x20 /* Watchdog enabled */ +#define FLG_WDEXP 0x40 /* Watchdog expired */ + const char *ifnam; + const char *pin; /* device PIN */ + + char pidfile[128]; + struct pidfh *pfh; + + time_t watchdog; + + /* PDP context settings */ + int pdp_ctx; + const char *pdp_apn; + const char *pdp_user; + const char *pdp_pwd; + + /* Connection status */ + int con_status; /* Connected? */ + char *con_apn; /* Connected APN */ + char *con_oper; /* Operator name */ + int con_net_stat; /* Network connection status */ + int con_net_type; /* Network connection type */ + + /* Misc. status */ + int dbm; + + /* IP and nameserver settings */ + struct in_addr ip; + char **ns; + const char *resolv_path; + char *resolv; /* Old resolv.conf */ + size_t resolv_sz; +}; + +static int readline_buf(const char *, const char *, char *, size_t); +static int readline(int, char *, size_t); +static void daemonize(struct ctx *); + +static int at_cmd_async(int, const char *, ...); + +typedef union { + void *ptr; + uint32_t int32; +} resp_data; +typedef struct { + resp_data val[2]; +} resp_arg; +typedef void (*resp_cb)(resp_arg *, const char *, const char *); + +typedef void (*async_cb)(void *, const char *); +struct async_handle { + const char *cmd; + async_cb func; +}; + +static void at_async_creg(void *, const char *); +static void at_async_cgreg(void *, const char *); +static void at_async_cops(void *, const char *); +static void at_async_owancall(void *, const char *); +static void at_async_owandata(void *, const char *); +static void at_async_csq(void *, const char *); + +static struct async_handle async_cmd[] = { + { "+CREG", at_async_creg }, + { "+CGREG", at_async_cgreg }, + { "+COPS", at_async_cops }, + { "+CSQ", at_async_csq }, + { "_OWANCALL", at_async_owancall }, + { "_OWANDATA", at_async_owandata }, + { NULL, NULL } +}; + +struct timer_entry; +struct timers { + TAILQ_HEAD(, timer_entry) head; + int res; +}; + +typedef void (*tmr_cb)(int, void *); +struct timer_entry { + TAILQ_ENTRY(timer_entry) next; + int id; + int timeout; + tmr_cb func; + void *arg; +}; + + +static struct timers timers; +static volatile int running = 1; +static int syslog_open = 0; +static char syslog_title[64]; + +/* Periodic timer, runs ready timer tasks every tick */ +static void +tmr_run(struct timers *tmrs) +{ + struct timer_entry *te, *te2; + + te = TAILQ_FIRST(&tmrs->head); + if (te == NULL) + return; + + te->timeout -= tmrs->res; + while (te->timeout <= 0) { + te2 = TAILQ_NEXT(te, next); + TAILQ_REMOVE(&tmrs->head, te, next); + if (te2 != NULL) + te2->timeout -= tmrs->res; + + te->func(te->id, te->arg); + free(te); + te = te2; + if (te == NULL) + break; + } +} + +/* Add a new timer */ +static void +tmr_add(struct timers *tmrs, int id, int timeout, tmr_cb func, void *arg) +{ + struct timer_entry *te, *te2, *te3; + + te = malloc(sizeof(struct timer_entry)); + memset(te, 0, sizeof(struct timer_entry)); + + te->timeout = timeout; + te->func = func; + te->arg = arg; + te->id = id; + + te2 = TAILQ_FIRST(&tmrs->head); + + if (TAILQ_EMPTY(&tmrs->head)) { + TAILQ_INSERT_HEAD(&tmrs->head, te, next); + } else if (te->timeout < te2->timeout) { + te2->timeout -= te->timeout; + TAILQ_INSERT_HEAD(&tmrs->head, te, next); + } else { + while (te->timeout >= te2->timeout) { + te->timeout -= te2->timeout; + te3 = TAILQ_NEXT(te2, next); + if (te3 == NULL || te3->timeout > te->timeout) + break; + te2 = te3; + } + TAILQ_INSERT_AFTER(&tmrs->head, te2, te, next); + } +} + +#define watchdog_enable(ctx) (ctx)->flags |= FLG_WATCHDOG +#define watchdog_disable(ctx) (ctx)->flags &= ~FLG_WATCHDOG + +static void +watchdog_reset(struct ctx *ctx, int timeout) +{ + struct timespec tp; + + clock_gettime(CLOCK_MONOTONIC, &tp), + ctx->watchdog = tp.tv_sec + timeout; + + watchdog_enable(ctx); +} + +static void +tmr_creg(int id, void *arg) +{ + struct ctx *ctx = arg; + + at_cmd_async(ctx->fd, "AT+CREG?\r\n"); + watchdog_reset(ctx, 10); +} + +static void +tmr_cgreg(int id, void *arg) +{ + struct ctx *ctx = arg; + + at_cmd_async(ctx->fd, "AT+CGREG?\r\n"); + watchdog_reset(ctx, 10); +} + +static void +tmr_status(int id, void *arg) +{ + struct ctx *ctx = arg; + + at_cmd_async(ctx->fd, "AT+CSQ\r\n"); + watchdog_reset(ctx, 10); +} + +static void +tmr_watchdog(int id, void *arg) +{ + struct ctx *ctx = arg; + pid_t self; + struct timespec tp; + + tmr_add(&timers, 1, 5, tmr_watchdog, ctx); + + if (!(ctx->flags & FLG_WATCHDOG)) + return; + + clock_gettime(CLOCK_MONOTONIC, &tp); + + if (tp.tv_sec >= ctx->watchdog) { +#ifdef DEBUG + fprintf(stderr, "Watchdog expired\n"); +#endif + ctx->flags |= FLG_WDEXP; + self = getpid(); + kill(self, SIGHUP); + } +} + +static void +sig_handle(int sig) +{ + + switch (sig) { + case SIGHUP: + case SIGINT: + case SIGQUIT: + case SIGTERM: + running = 0; + break; + case SIGALRM: + tmr_run(&timers); + break; + } +} + +static void +logger(int pri, const char *fmt, ...) +{ + char *buf; + va_list ap; + + va_start(ap, fmt); + vasprintf(&buf, fmt, ap); + if (syslog_open) + syslog(pri, buf); + else { + switch (pri) { + case LOG_INFO: + case LOG_NOTICE: + printf("%s\n", buf); + break; + default: + fprintf(stderr, "%s: %s\n", getprogname(), buf); + break; + } + } + + free(buf); + va_end(ap); +} + +/* Add/remove IP address from an interface */ +static int +ifaddr_ad(int d, const char *ifnam, struct sockaddr *sa, struct sockaddr *mask) +{ + struct ifaliasreq req; + int fd, error; + + fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0) + return (-1); + + memset(&req, 0, sizeof(struct ifaliasreq)); + strlcpy(req.ifra_name, ifnam, sizeof(req.ifra_name)); + memcpy(&req.ifra_addr, sa, sa->sa_len); + memcpy(&req.ifra_mask, mask, mask->sa_len); + + error = ioctl(fd, d, (char *)&req); + close(fd); + return (error); +} + +#define if_ifup(ifnam) if_setflags(ifnam, IFF_UP) +#define if_ifdown(ifnam) if_setflags(ifnam, -IFF_UP) + +static int +if_setflags(const char *ifnam, int flags) +{ + struct ifreq ifr; + int fd, error; + unsigned int oflags = 0; + + memset(&ifr, 0, sizeof(struct ifreq)); + strlcpy(ifr.ifr_name, ifnam, sizeof(ifr.ifr_name)); + + fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0) + return (-1); + + error = ioctl(fd, SIOCGIFFLAGS, &ifr); + if (error == 0) { + oflags = (ifr.ifr_flags & 0xffff) | (ifr.ifr_flagshigh << 16); + } + + if (flags < 0) + oflags &= ~(-flags); + else + oflags |= flags; + + ifr.ifr_flags = oflags & 0xffff; + ifr.ifr_flagshigh = oflags >> 16; + + error = ioctl(fd, SIOCSIFFLAGS, &ifr); + if (error != 0) + warn("ioctl SIOCSIFFLAGS"); + + close(fd); + return (error); +} + +static int +ifaddr_add(const char *ifnam, struct sockaddr *sa, struct sockaddr *mask) +{ + int error; + + error = ifaddr_ad(SIOCAIFADDR, ifnam, sa, mask); + if (error != 0) + warn("ioctl SIOCAIFADDR"); + return (error); +} + +static int +ifaddr_del(const char *ifnam, struct sockaddr *sa, struct sockaddr *mask) +{ + int error; + + error = ifaddr_ad(SIOCDIFADDR, ifnam, sa, mask); + if (error != 0) + warn("ioctl SIOCDIFADDR"); + return (error); +} + +static int +set_nameservers(struct ctx *ctx, const char *respath, int ns, ...) +{ + int i, n, fd; + FILE *fp; + char *p; + va_list ap; + struct stat sb; + char buf[512]; + + if (ctx->ns != NULL) { + for (i = 0; ctx->ns[i] != NULL; i++) { + free(ctx->ns[i]); + } + free(ctx->ns); + } + + fd = open(respath, O_RDWR | O_CREAT | O_NOFOLLOW); + if (fd < 0) + return (-1); + + if (ns == 0) { + /* Attempt to restore old resolv.conf */ + if (ctx->resolv != NULL) { + ftruncate(fd, 0); + lseek(fd, 0, SEEK_SET); + write(fd, ctx->resolv, ctx->resolv_sz); + free(ctx->resolv); + ctx->resolv = NULL; + ctx->resolv_sz = 0; + } + close(fd); + return (0); + } + + + ctx->ns = malloc(sizeof(char *) * (ns + 1)); + if (ctx->ns == NULL) { + close(fd); + return (-1); + } + + va_start(ap, ns); + for (i = 0; i < ns; i++) { + p = va_arg(ap, char *); + ctx->ns[i] = strdup(p); + } + ctx->ns[i] = NULL; + va_end(ap); + + /* Attempt to backup the old resolv.conf */ + if (ctx->resolv == NULL) { + i = fstat(fd, &sb); + if (i == 0 && sb.st_size != 0) { + ctx->resolv_sz = sb.st_size; + ctx->resolv = malloc(sb.st_size); + if (ctx->resolv != NULL) { + n = read(fd, ctx->resolv, sb.st_size); + if (n != sb.st_size) { + free(ctx->resolv); + ctx->resolv = NULL; + } + } + } + } + + + ftruncate(fd, 0); + lseek(fd, 0, SEEK_SET); + fp = fdopen(fd, "w"); + + /* + * Write back everything other than nameserver entries to the + * new resolv.conf + */ + if (ctx->resolv != NULL) { + p = ctx->resolv; + while ((i = readline_buf(p, ctx->resolv + ctx->resolv_sz, buf, + sizeof(buf))) > 0) { + p += i; + if (strncasecmp(buf, "nameserver", 10) == 0) + continue; + fprintf(fp, "%s", buf); + } + } + + for (i = 0; ctx->ns[i] != NULL; i++) { + fprintf(fp, "nameserver %s\n", ctx->ns[i]); + } + fclose(fp); + return (0); +} + +/* Read a \n-terminated line from buffer */ +static int +readline_buf(const char *s, const char *e, char *buf, size_t bufsz) +{ + int pos = 0; + char *p = buf; + + for (; s < e; s++) { + *p = *s; + pos++; + if (pos >= (bufsz - 1)) + break; + if (*p++ == '\n') + break; + } + *p = '\0'; + return (pos); +} + +/* Read a \n-terminated line from file */ +static int +readline(int fd, char *buf, size_t bufsz) +{ + int n = 0, pos = 0; + char *p = buf; + + for (;;) { + n = read(fd, p, 1); + if (n <= 0) + break; + pos++; + if (pos >= (bufsz - 1)) + break; + if (*p++ == '\n') + break; + } + *p = '\0'; + return (n <= 0 ? n : pos); +} + +/* + * Synchronous AT command + */ +static int +at_cmd(struct ctx *ctx, const char *resp, resp_cb cb, resp_arg *ra, const char *cf, ...) +{ + size_t l; + int n, error, retval = 0; + va_list ap; + fd_set set; + char buf[512]; + char cmd[64]; + + va_start(ap, cf); + vsnprintf(cmd, sizeof(cmd), cf, ap); + va_end(ap); + +#ifdef DEBUG + fprintf(stderr, "SYNC_CMD: %s", cmd); +#endif + + l = strlen(cmd); + n = write(ctx->fd, cmd, l); + if (n <= 0) + return (-1); + + if (resp != NULL) { + l = strlen(resp); +#ifdef DEBUG + fprintf(stderr, "SYNC_EXP: %s (%d)\n", resp, l); +#endif + } + + for (;;) { + bzero(buf, sizeof(buf)); + + FD_ZERO(&set); + watchdog_reset(ctx, 5); + do { + FD_SET(ctx->fd, &set); + error = select(ctx->fd + 1, &set, NULL, NULL, NULL); + if (error < 0 && errno == EINTR && ctx->flags & FLG_WDEXP) { + watchdog_disable(ctx); + retval = -2; + break; + } + } while (error <= 0 && errno == EINTR); + + if (error <= 0) { + retval = -2; + break; + } + + n = readline(ctx->fd, buf, sizeof(buf)); + if (n <= 0) { + retval = -2; + break; + } + + if (strcmp(buf, "\r\n") == 0 || strcmp(buf, "\n") == 0) + continue; + +#ifdef DEBUG + fprintf(stderr, "SYNC_RESP: %s", buf); +#endif + + if (strncmp(buf, "OK", 2) == 0) { + break; + } + else if (strncmp(buf, "ERROR", 5) == 0) { + retval = -1; + break; + } + + if (resp != NULL) { + retval = strncmp(resp, buf, l); + if (retval == 0 && cb != NULL) { + cb(ra, cmd, buf); + } + } + } +#ifdef DEBUG + fprintf(stderr, "SYNC_RETVAL=%d\n", retval); +#endif + return (retval); +} + +static int +at_cmd_async(int fd, const char *cf, ...) +{ + size_t l; + va_list ap; + char cmd[64]; + + va_start(ap, cf); + vsnprintf(cmd, sizeof(cmd), cf, ap); + va_end(ap); + +#ifdef DEBUG + fprintf(stderr, "CMD: %s", cmd); +#endif + l = strlen(cmd); + return (write(fd, cmd, l)); +} + +static void +saveresp(resp_arg *ra, const char *cmd, const char *resp) +{ + char **buf; + int i = ra->val[1].int32; + + buf = realloc(ra->val[0].ptr, sizeof(char *) * (i + 1)); + if (buf == NULL) + return; + + buf[i] = strdup(resp); + + ra->val[0].ptr = buf; + ra->val[1].int32 = i + 1; +} + +static void +at_async_creg(void *arg, const char *resp) +{ + struct ctx *ctx = arg; + int n, reg; + + n = sscanf(resp, "+CREG: %*d,%d", ®); + if (n != 1) { + n = sscanf(resp, "+CREG: %d", ®); + if (n != 1) + return; + } + + if (ctx->con_net_stat != 1 && ctx->con_net_stat != 5) { + tmr_add(&timers, 1, 1, tmr_creg, ctx); + } + else { + tmr_add(&timers, 1, 30, tmr_creg, ctx); + } + + if (ctx->con_net_stat == reg) + return; + + ctx->con_net_stat = reg; + at_cmd_async(ctx->fd, "AT+COPS?\r\n"); +} + +static void +at_async_cgreg(void *arg, const char *resp) +{ + struct ctx *ctx = arg; + int n, reg; + + n = sscanf(resp, "+CGREG: %*d,%d", ®); + if (n != 1) { + n = sscanf(resp, "+CGREG: %d", ®); + if (n != 1) + return; + } + + if (ctx->con_net_stat != 1 && ctx->con_net_stat != 5) { + tmr_add(&timers, 1, 1, tmr_cgreg, ctx); + } + else { + tmr_add(&timers, 1, 30, tmr_cgreg, ctx); + } + + if (ctx->con_net_stat == reg) + return; + + ctx->con_net_stat = reg; + at_cmd_async(ctx->fd, "AT+COPS?\r\n"); +} + + +static void +at_async_cops(void *arg, const char *resp) +{ + struct ctx *ctx = arg; + int n, at; + char opr[64]; + + n = sscanf(resp, "+COPS: %*d,%*d,\"%[^\"]\",%d", + opr, &at); + if (n != 2) + return; + + if (ctx->con_oper != NULL) { + if (ctx->con_net_type == at && + strcasecmp(opr, ctx->con_oper) == 0) + return; + free(ctx->con_oper); + } + + ctx->con_oper = strdup(opr); + ctx->con_net_type = at; + + if (ctx->con_net_stat == 1 || ctx->con_net_stat == 5) { + logger(LOG_NOTICE, "%s to \"%s\" (%s)", + network_reg_status[ctx->con_net_stat], + ctx->con_oper, network_access_type[ctx->con_net_type]); + if (ctx->con_status != 1) { + at_cmd_async(ctx->fd, "AT_OWANCALL=%d,1,1\r\n", + ctx->pdp_ctx); + } + } + else { + logger(LOG_NOTICE, "%s (%s)", + network_reg_status[ctx->con_net_stat], + network_access_type[ctx->con_net_type]); + } +} + +/* + * Signal strength for pretty console output + * + * From 3GPP TS 27.007 V8.3.0, Section 8.5 + * 0 = -113 dBm or less + * 1 = -111 dBm + * 2...30 = -109...-53 dBm + * 31 = -51 dBm or greater + * + * So, dbm = (rssi * 2) - 113 +*/ +static void +at_async_csq(void *arg, const char *resp) +{ + struct ctx *ctx = arg; + int n, rssi; + + n = sscanf(resp, "+CSQ: %d,%*d", &rssi); + if (n != 1) + return; + if (rssi == 99) + ctx->dbm = 0; + else { + ctx->dbm = (rssi * 2) - 113; + tmr_add(&timers, 1, 15, tmr_status, ctx); + } + + ctx->flags |= FLG_NEWDATA; +} + +static void +at_async_owancall(void *arg, const char *resp) +{ + struct ctx *ctx = arg; + int n, i; + + n = sscanf(resp, "_OWANCALL: %*d,%d", &i); + if (n != 1) + return; + + if (i == ctx->con_status) + return; + + at_cmd_async(ctx->fd, "AT_OWANDATA=%d\r\n", ctx->pdp_ctx); + + ctx->con_status = i; + if (ctx->con_status == 1) { + logger(LOG_NOTICE, "Connected to \"%s\" (%s), %s", + ctx->con_oper, ctx->con_apn, + network_access_type[ctx->con_net_type]); + } + else { + logger(LOG_NOTICE, "Disconnected from \"%s\" (%s)", + ctx->con_oper, ctx->con_apn); + } +} + +static void +at_async_owandata(void *arg, const char *resp) +{ + struct ctx *ctx = arg; + char ip[40], ns1[40], ns2[40]; + int n, error, rs; + struct ifaddrs *ifap, *ifa; + struct sockaddr_in sin, mask; + struct sockaddr_dl sdl; + struct { + struct rt_msghdr rtm; + char buf[512]; + } r; + char *cp = r.buf; + + n = sscanf(resp, "_OWANDATA: %*d, %[^,], %*[^,], %[^,], %[^,]", + ip, ns1, ns2); + if (n != 3) + return; + + /* XXX: AF_INET assumption */ + + logger(LOG_NOTICE, "IP address: %s, Nameservers: %s, %s", ip, ns1, ns2); + + sin.sin_len = mask.sin_len = sizeof(struct sockaddr_in); + memset(&mask.sin_addr.s_addr, 0xff, sizeof(mask.sin_addr.s_addr)); + sin.sin_family = mask.sin_family = AF_INET; + + if (ctx->flags & IPASSIGNED) { + memcpy(&sin.sin_addr.s_addr, &ctx->ip.s_addr, + sizeof(sin.sin_addr.s_addr)); + ifaddr_del(ctx->ifnam, (struct sockaddr *)&sin, + (struct sockaddr *)&mask); + } + inet_pton(AF_INET, ip, &ctx->ip.s_addr); + memcpy(&sin.sin_addr.s_addr, &ctx->ip.s_addr, + sizeof(sin.sin_addr.s_addr)); + + error = ifaddr_add(ctx->ifnam, (struct sockaddr *)&sin, + (struct sockaddr *)&mask); + if (error != 0) { + logger(LOG_ERR, "failed to set ip-address"); + return; + } + + if_ifup(ctx->ifnam); + + ctx->flags |= IPASSIGNED; + + set_nameservers(ctx, ctx->resolv_path, 0); + error = set_nameservers(ctx, ctx->resolv_path, 2, ns1, ns2); + if (error != 0) { + logger(LOG_ERR, "failed to set nameservers"); + } + + error = getifaddrs(&ifap); + if (error != 0) { + logger(LOG_ERR, "getifaddrs: %s", strerror(errno)); + return; + } + + for (ifa = ifap; ifa; ifa = ifa->ifa_next) { + if (ifa->ifa_addr->sa_family != AF_LINK) + continue; + if (strcmp(ctx->ifnam, ifa->ifa_name) == 0) { + memcpy(&sdl, (struct sockaddr_dl *)ifa->ifa_addr, + sizeof(struct sockaddr_dl)); + break; + } + } + if (ifa == NULL) + return; + + rs = socket(PF_ROUTE, SOCK_RAW, 0); + if (rs < 0) { + logger(LOG_ERR, "socket PF_ROUTE: %s", strerror(errno)); + return; + } + + memset(&r, 0, sizeof(r)); + + r.rtm.rtm_version = RTM_VERSION; + r.rtm.rtm_type = RTM_ADD; + r.rtm.rtm_flags = RTF_UP | RTF_STATIC; + r.rtm.rtm_pid = getpid(); + memset(&sin, 0, sizeof(struct sockaddr_in)); + sin.sin_family = AF_INET; + sin.sin_len = sizeof(struct sockaddr_in); + + memcpy(cp, &sin, sin.sin_len); + cp += SA_SIZE(&sin); + memcpy(cp, &sdl, sdl.sdl_len); + cp += SA_SIZE(&sdl); + memcpy(cp, &sin, sin.sin_len); + r.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY | RTA_NETMASK; + r.rtm.rtm_msglen = sizeof(r); + + n = write(rs, &r, r.rtm.rtm_msglen); + if (n != r.rtm.rtm_msglen) { + r.rtm.rtm_type = RTM_DELETE; + n = write(rs, &r, r.rtm.rtm_msglen); + r.rtm.rtm_type = RTM_ADD; + n = write(rs, &r, r.rtm.rtm_msglen); + } + + if (n != r.rtm.rtm_msglen) { + logger(LOG_ERR, "failed to set default route: %s", + strerror(errno)); + } + close(rs); + + /* Delayed daemonization */ + if ((ctx->flags & FLG_DELAYED) && !(ctx->flags & FLG_NODAEMON)) + daemonize(ctx); +} + +static int +at_async(struct ctx *ctx, void *arg) +{ + int n, i; + size_t l; + char buf[512]; + + watchdog_reset(ctx, 15); + + bzero(buf, sizeof(buf)); + n = readline(ctx->fd, buf, sizeof(buf)); + if (n <= 0) + return (n <= 0 ? -1 : 0); + +#ifdef DEBUG + fprintf(stderr, "AT_ASYNC_RESP: %s", buf); +#endif + for (i = 0; async_cmd[i].cmd != NULL; i++) { + l = strlen(async_cmd[i].cmd); + if (strncmp(buf, async_cmd[i].cmd, l) == 0) { + async_cmd[i].func(arg, buf); + } + } + return (0); +} + +static const char *port_type_list[] = { + "control", "application", "application2", NULL +}; + +/* + * Attempts to find a list of control tty for the interface + * FreeBSD attaches USb devices per interface so we have to go through + * hoops to find which ttys that belong to our network interface. + */ +static char ** +get_tty(struct ctx *ctx) +{ + char buf[64]; + char data[128]; + size_t len; + int error; + unsigned int i; + char **list = NULL; + int list_size = 0; + const char **p; + + for (i = 0; ; i++) { + /* Basic test to check if we're even in the right ballpark */ + snprintf(buf, 64, SYSCTL_TEST, i); + len = 127; + error = sysctlbyname(buf, data, &len, NULL, 0); +#ifdef DEBUG + fprintf(stderr, "sysctl %s returned(%d): %s\n", + buf, error, error == 0 ? data : "FAILED"); +#endif + if (error < 0) + return NULL; + if (strcasecmp(data, "uhso") != 0) + return NULL; + + /* Check for interface */ + snprintf(buf, 64, SYSCTL_NETIF, i); + len = 127; + error = sysctlbyname(buf, data, &len, NULL, 0); +#ifdef DEBUG + fprintf(stderr, "sysctl %s returned(%d): %s\n", + buf, error, error == 0 ? data : "FAILED"); +#endif + if (error < 0) + continue; + + if (strcasecmp(data, ctx->ifnam) != 0) + continue; +#ifdef DEBUG + fprintf(stderr, "Found %s at %s\n", ctx->ifnam, buf); +#endif + break; + } + + /* Add multiplexed ports */ + for (p = port_type_list; *p != NULL; p++) { + snprintf(buf, 64, SYSCTL_NAME_TTY, i, *p); + len = 127; + error = sysctlbyname(buf, data, &len, NULL, 0); +#ifdef DEBUG + fprintf(stderr, "sysctl %s returned(%d): %s\n", + buf, error, error == 0 ? data : "FAILED"); +#endif + if (error == 0) { + list = realloc(list, (list_size + 1) * sizeof(char *)); + list[list_size] = malloc(strlen(data) + strlen(TTY_NAME)); + sprintf(list[list_size], TTY_NAME, data); + list_size++; + } + } + + /* + * We can return directly if we found multiplexed serial ports because + * devices with these ports only have additional diagnostic ports (useless) + * and modem ports (for used with pppd). + */ + if (list_size > 0) { + list = realloc(list, (list_size + 1) * sizeof(char *)); + list[list_size] = NULL; + return list; + } + + /* + * The network port is on a high numbered interface so we walk backwards until + * we hit anything other than application/control. + */ + + for (--i; i >= 0; i--) { + /* Basic test to check if we're even in the right ballpark */ + snprintf(buf, 64, SYSCTL_TEST, i); + len = 127; + error = sysctlbyname(buf, data, &len, NULL, 0); +#ifdef DEBUG + fprintf(stderr, "sysctl %s returned(%d): %s\n", + buf, error, error == 0 ? data : "FAILED"); +#endif + if (error < 0) + break; + if (strcasecmp(data, "uhso") != 0) + break; + + /* Test for useable ports */ + for (p = port_type_list; *p != NULL; p++) { + snprintf(buf, 64, SYSCTL_NAME_TTY, i, p); + len = 127; + error = sysctlbyname(buf, data, &len, NULL, 0); + if (error == 0) { + list = realloc(list, (list_size + 1) * sizeof(char *)); + list[list_size] = malloc(strlen(data) + strlen(TTY_NAME)); + sprintf(list[list_size], TTY_NAME, data); + list_size++; + } + } + + /* HACK! first port is a diagnostic port, we abort here */ + snprintf(buf, 64, SYSCTL_NAME_TTY, i, "diagnostic"); + len = 127; + error = sysctlbyname(buf, data, &len, NULL, 0); +#ifdef DEBUG + fprintf(stderr, "sysctl %s returned(%d): %s\n", + buf, error, error == 0 ? data : "FAILED"); +#endif + if (error == 0) + break; + } + + list = realloc(list, (list_size + 1) * sizeof(char *)); + list[list_size] = NULL; + return list; +} + +static int +do_connect(struct ctx *ctx, const char *tty) +{ + int i, error, needcfg; + resp_arg ra; + struct termios t; + char **buf; + +#ifdef DEBUG + fprintf(stderr, "Attempting to open %s\n", tty); +#endif + + ctx->fd = open(tty, O_RDWR); + if (ctx->fd < 0) { +#ifdef DEBUG + fprintf(stderr, "Failed to open %s\n", tty); +#endif + return (-1); + } + + tcgetattr(ctx->fd, &t); + t.c_oflag = 0; + t.c_iflag = 0; + t.c_cflag = CLOCAL | CREAD; + t.c_lflag = 0; + tcsetattr(ctx->fd, TCSAFLUSH, &t); + + error = at_cmd(ctx, NULL, NULL, NULL, "AT\r\n"); + if (error == -2) { + warnx("failed to read from device"); + return (-1); + } + + /* Check for PIN */ + error = at_cmd(ctx, "+CPIN: READY", NULL, NULL, "AT+CPIN?\r\n"); + if (error != 0) { + if (ctx->pin == NULL) { + errx(1, "device requires PIN"); + } + + error = at_cmd(ctx, NULL, NULL, NULL, "AT+CPIN=\"%s\"\r\n", + ctx->pin); + if (error != 0) { + errx(1, "wrong PIN"); + } + } + + /* + * Check if a PDP context has been configured and configure one + * if needed. + */ + ra.val[0].ptr = NULL; + ra.val[1].int32 = 0; + error = at_cmd(ctx, "+CGDCONT", saveresp, &ra, "AT+CGDCONT?\r\n"); + buf = ra.val[0].ptr; + needcfg = 1; + for (i = 0; i < ra.val[1].int32; i++) { + char apn[256]; + int cid; + error = sscanf(buf[i], "+CGDCONT: %d,\"%*[^\"]\",\"%[^\"]\"", + &cid, apn); + if (error != 2) { + free(buf[i]); + continue; + } + + if (cid == ctx->pdp_ctx) { + ctx->con_apn = strdup(apn); + if (ctx->pdp_apn != NULL) { + if (strcmp(apn, ctx->pdp_apn) == 0) + needcfg = 0; + } + else { + needcfg = 0; + } + } + free(buf[i]); + } + free(buf); + + if (needcfg) { + if (ctx->pdp_apn == NULL) + errx(1, "device is not configured and no APN given"); + + error = at_cmd(ctx, NULL, NULL, NULL, + "AT+CGDCONT=%d,,\"%s\"\r\n", ctx->pdp_ctx, ctx->pdp_apn); + if (error != 0) { + errx(1, "failed to configure device"); + } + ctx->con_apn = strdup(ctx->pdp_apn); + } + + if (ctx->pdp_user != NULL || ctx->pdp_pwd != NULL) { + at_cmd(ctx, NULL, NULL, NULL, + "AT$QCPDPP=%d,1,\"%s\",\"%s\"\r\n", ctx->pdp_ctx, + (ctx->pdp_user != NULL) ? ctx->pdp_user : "", + (ctx->pdp_pwd != NULL) ? ctx->pdp_pwd : ""); + } + + error = at_cmd(ctx, NULL, NULL, NULL, "AT_OWANCALL=%d,0,0\r\n", + ctx->pdp_ctx); + if (error != 0) + return (-1); + + at_cmd_async(ctx->fd, "AT+CGREG?\r\n"); + at_cmd_async(ctx->fd, "AT+CREG?\r\n"); + + tmr_add(&timers, 1, 5, tmr_status, ctx); + return (0); +} + +static void +do_disconnect(struct ctx *ctx) +{ + struct sockaddr_in sin, mask; + + /* Disconnect */ + at_cmd(ctx, NULL, NULL, NULL, "AT_OWANCALL=%d,0,0\r\n", + ctx->pdp_ctx); + close(ctx->fd); + + /* Remove ip-address from interface */ + if (ctx->flags & IPASSIGNED) { + sin.sin_len = mask.sin_len = sizeof(struct sockaddr_in); + memset(&mask.sin_addr.s_addr, 0xff, + sizeof(mask.sin_addr.s_addr)); + sin.sin_family = mask.sin_family = AF_INET; + memcpy(&sin.sin_addr.s_addr, &ctx->ip.s_addr, + sizeof(sin.sin_addr.s_addr)); + ifaddr_del(ctx->ifnam, (struct sockaddr *)&sin, + (struct sockaddr *)&mask); + + if_ifdown(ctx->ifnam); + ctx->flags &= ~IPASSIGNED; + } + + /* Attempt to reset resolv.conf */ + set_nameservers(ctx, ctx->resolv_path, 0); +} + +static void +daemonize(struct ctx *ctx) +{ + struct pidfh *pfh; + pid_t opid; + + snprintf(ctx->pidfile, 127, PIDFILE, ctx->ifnam); + + pfh = pidfile_open(ctx->pidfile, 0600, &opid); + if (pfh == NULL) { + warn("Cannot create pidfile %s", ctx->pidfile); + return; + } + + if (daemon(0, 0) == -1) { + warn("Cannot daemonize"); + pidfile_remove(pfh); + return; + } + + pidfile_write(pfh); + ctx->pfh = pfh; + ctx->flags |= FLG_DAEMON; + + snprintf(syslog_title, 63, "%s:%s", getprogname(), ctx->ifnam); + openlog(syslog_title, LOG_PID, LOG_USER); + syslog_open = 1; +} + +static void +send_disconnect(const char *ifnam) +{ + char pidfile[128]; + FILE *fp; + pid_t pid; + int n; + + snprintf(pidfile, 127, PIDFILE, ifnam); + fp = fopen(pidfile, "r"); + if (fp == NULL) { + warn("Cannot open %s", pidfile); + return; + } + + n = fscanf(fp, "%d", &pid); + fclose(fp); + if (n != 1) { + warnx("unable to read daemon pid"); + return; + } +#ifdef DEBUG + fprintf(stderr, "Sending SIGTERM to %d\n", pid); +#endif + kill(pid, SIGTERM); +} + +static void +usage(const char *exec) +{ + + printf("usage %s [-b] [-n] [-a apn] [-c cid] [-p pin] [-u username] " + "[-k password] [-r resolvpath] [-f tty] interface\n", exec); + printf("usage %s -d interface\n", exec); +} + +enum { + MODE_CONN, + MODE_DISC +}; + +int +main(int argc, char *argv[]) +{ + int ch, error, mode; + const char *ifnam = NULL; + char *tty = NULL; + char **p, **tty_list; + fd_set set; + struct ctx ctx; + struct itimerval it; + + TAILQ_INIT(&timers.head); + timers.res = 1; + + ctx.pdp_ctx = 1; + ctx.pdp_apn = ctx.pdp_user = ctx.pdp_pwd = NULL; + ctx.pin = NULL; + + ctx.con_status = 0; + ctx.con_apn = NULL; + ctx.con_oper = NULL; + ctx.con_net_stat = 0; + ctx.con_net_type = -1; + ctx.flags = 0; + ctx.resolv_path = RESOLV_PATH; + ctx.resolv = NULL; + ctx.ns = NULL; + ctx.dbm = 0; + + mode = MODE_CONN; + ctx.flags |= FLG_DELAYED; + + while ((ch = getopt(argc, argv, "?ha:p:c:u:k:r:f:dbn")) != -1) { + switch (ch) { + case 'a': + ctx.pdp_apn = argv[optind - 1]; + break; + case 'c': + ctx.pdp_ctx = strtol(argv[optind - 1], NULL, 10); + if (ctx.pdp_ctx < 1) { + warnx("Invalid context ID, defaulting to 1"); + ctx.pdp_ctx = 1; + } + break; + case 'p': + ctx.pin = argv[optind - 1]; + break; + case 'u': + ctx.pdp_user = argv[optind - 1]; + break; + case 'k': + ctx.pdp_pwd = argv[optind - 1]; + break; + case 'r': + ctx.resolv_path = argv[optind - 1]; + break; + case 'd': + mode = MODE_DISC; + break; + case 'b': + ctx.flags &= ~FLG_DELAYED; + break; + case 'n': + ctx.flags |= FLG_NODAEMON; + break; + case 'f': + tty = argv[optind - 1]; + break; + case 'h': + case '?': + default: + usage(argv[0]); + exit(EXIT_SUCCESS); + } + } + + argc -= optind; + argv += optind; + + if (argc < 1) + errx(1, "no interface given"); + + ifnam = argv[argc - 1]; + ctx.ifnam = strdup(ifnam); + + switch (mode) { + case MODE_DISC: + printf("Disconnecting %s\n", ifnam); + send_disconnect(ifnam); + exit(EXIT_SUCCESS); + default: + break; + } + + signal(SIGHUP, sig_handle); + signal(SIGINT, sig_handle); + signal(SIGQUIT, sig_handle); + signal(SIGTERM, sig_handle); + signal(SIGALRM, sig_handle); + + it.it_interval.tv_sec = 1; + it.it_interval.tv_usec = 0; + it.it_value.tv_sec = 1; + it.it_value.tv_usec = 0; + error = setitimer(ITIMER_REAL, &it, NULL); + if (error != 0) + errx(1, "setitimer"); + + tmr_add(&timers, 1, 5, &tmr_watchdog, &ctx); + watchdog_reset(&ctx, 15); + + if (tty != NULL) { + error = do_connect(&ctx, tty); + if (error != 0) + errx(1, "Failed to open %s", tty); + } + else { + tty_list = get_tty(&ctx); +#ifdef DEBUG + if (tty_list == NULL) { + fprintf(stderr, "get_tty returned empty list\n"); + } else { + fprintf(stderr, "tty list:\n"); + for (p = tty_list; *p != NULL; p++) { + fprintf(stderr, "\t %s\n", *p); + } + } +#endif + for (p = tty_list; *p != NULL; p++) { + error = do_connect(&ctx, *p); + if (error == 0) { + tty = *p; + break; + } + } + if (*p == NULL) + errx(1, "Failed to obtain a control port, " + "try specifying one manually"); + } + + if (!(ctx.flags & FLG_DELAYED) && !(ctx.flags & FLG_NODAEMON)) + daemonize(&ctx); + + + FD_ZERO(&set); + FD_SET(ctx.fd, &set); + for (;;) { + + watchdog_disable(&ctx); + error = select(ctx.fd + 1, &set, NULL, NULL, NULL); + if (error <= 0) { + if (running && errno == EINTR) + continue; + if (ctx.flags & FLG_WDEXP) { + ctx.flags &= ~FLG_WDEXP; + watchdog_reset(&ctx, 5); + do_disconnect(&ctx); + watchdog_reset(&ctx, 15); + do_connect(&ctx, tty); + running = 1; + continue; + } + + break; + } + + if (FD_ISSET(ctx.fd, &set)) { + watchdog_reset(&ctx, 15); + error = at_async(&ctx, &ctx); + if (error != 0) + break; + } + FD_SET(ctx.fd, &set); + + if (!(ctx.flags & FLG_DAEMON) && (ctx.flags & IPASSIGNED)) { + printf("Status: %s (%s)", + ctx.con_status ? "connected" : "disconnected", + network_access_type[ctx.con_net_type]); + if (ctx.dbm < 0) + printf(", signal: %d dBm", ctx.dbm); + printf("\r"); + fflush(stdout); + } + } + if (!(ctx.flags & FLG_DAEMON) && (ctx.flags & IPASSIGNED)) + printf("\n"); + + signal(SIGHUP, SIG_DFL); + signal(SIGINT, SIG_DFL); + signal(SIGQUIT, SIG_DFL); + signal(SIGTERM, SIG_DFL); + signal(SIGALRM, SIG_IGN); + + do_disconnect(&ctx); + + if (ctx.flags & FLG_DAEMON) { + pidfile_remove(ctx.pfh); + if (syslog_open) + closelog(); + } + + return (0); +} -- cgit v1.1 From b9f7b6d00de66c1d4478faa0773b5fe511672064 Mon Sep 17 00:00:00 2001 From: thompsa Date: Sun, 17 Jan 2010 18:26:21 +0000 Subject: MFC r201705 Sync to p4 - Add new quirks commands and the '-d' option optionally to specify the ugen device. --- usr.sbin/usbconfig/dump.c | 37 ++++++++++ usr.sbin/usbconfig/dump.h | 6 ++ usr.sbin/usbconfig/usbconfig.8 | 39 ++++++++++- usr.sbin/usbconfig/usbconfig.c | 155 +++++++++++++++++++++++++++++++++++------ 4 files changed, 213 insertions(+), 24 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/usbconfig/dump.c b/usr.sbin/usbconfig/dump.c index 6dfc6ec..4e527ea 100644 --- a/usr.sbin/usbconfig/dump.c +++ b/usr.sbin/usbconfig/dump.c @@ -365,3 +365,40 @@ dump_config(struct libusb20_device *pdev, uint8_t all_cfg) } return; } + +void +dump_string_by_index(struct libusb20_device *pdev, uint8_t str_index) +{ + char *pbuf; + uint8_t n; + uint8_t len; + + pbuf = malloc(256); + if (pbuf == NULL) + err(1, "out of memory"); + + if (str_index == 0) { + /* language table */ + if (libusb20_dev_req_string_sync(pdev, + str_index, 0, pbuf, 256)) { + printf("STRING_0x%02x = \n", str_index); + } else { + printf("STRING_0x%02x = ", str_index); + len = (uint8_t)pbuf[0]; + for (n = 0; n != len; n++) { + printf("0x%02x%s", (uint8_t)pbuf[n], + (n != (len-1)) ? ", " : ""); + } + printf("\n"); + } + } else { + /* ordinary string */ + if (libusb20_dev_req_string_simple_sync(pdev, + str_index, pbuf, 256)) { + printf("STRING_0x%02x = \n", str_index); + } else { + printf("STRING_0x%02x = <%s>\n", str_index, pbuf); + } + } + free(pbuf); +} diff --git a/usr.sbin/usbconfig/dump.h b/usr.sbin/usbconfig/dump.h index 7fafdfd..581684a 100644 --- a/usr.sbin/usbconfig/dump.h +++ b/usr.sbin/usbconfig/dump.h @@ -24,11 +24,17 @@ * SUCH DAMAGE. */ +#ifndef _DUMP_H_ +#define _DUMP_H_ + const char *dump_mode(uint8_t value); const char *dump_speed(uint8_t value); const char *dump_power_mode(uint8_t value); +void dump_string_by_index(struct libusb20_device *pdev, uint8_t index); void dump_device_info(struct libusb20_device *pdev, uint8_t show_drv); void dump_be_quirk_names(struct libusb20_backend *pbe); void dump_be_dev_quirks(struct libusb20_backend *pbe); void dump_device_desc(struct libusb20_device *pdev); void dump_config(struct libusb20_device *pdev, uint8_t all_cfg); + +#endif /* _DUMP_H_ */ diff --git a/usr.sbin/usbconfig/usbconfig.8 b/usr.sbin/usbconfig/usbconfig.8 index 0914d9e..971367d 100644 --- a/usr.sbin/usbconfig/usbconfig.8 +++ b/usr.sbin/usbconfig/usbconfig.8 @@ -1,6 +1,6 @@ .\" $FreeBSD$ .\" -.\" Copyright (c) 2008 Hans Petter Selasky. All rights reserved. +.\" Copyright (c) 2008-2010 Hans Petter Selasky. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions @@ -23,7 +23,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd November 1, 2009 +.Dd January 6, 2010 .Dt USBCONFIG 8 .Os .Sh NAME @@ -34,6 +34,9 @@ .Op Fl u Ar unit .Op Fl a Ar addr .Op cmds... +.Nm +.Op Fl d Ar [ugen]. +.Op cmds... .Sh DESCRIPTION The .Nm @@ -46,6 +49,9 @@ Limit device range to USB devices connected to the given USBUS unit. .It Fl a Ar addr Limit device range to the given USB device index. Should only be used in conjunction with the unit argument. +.It Fl d Ar [ugen]. +Limit device range to USB devices connected to the given unit and address. +The unit and address coordinates may be prefixed by the lowercased word "ugen". .It Fl h Show help and available commands. .El @@ -57,5 +63,34 @@ prints a list of all available USB devices. Show information about the device on USB bus 1 at address 2: .Pp .Dl usbconfig -u 1 -a 2 dump_info +.Pp +Dump HID descriptor for device on USB bus 1 at address 2: +.Pp +.Dl usbconfig -u 1 -a 2 do_request 0x81 0x06 0x2200 0 0x100 +.Pp +Dump string descriptor at index Z for device on USB bus 1 at address 2: +.Pp +.Dl usbconfig -u 1 -a 2 dump_string Z +.Pp +Dump current configuration descriptor for device on USB bus 1 at address 2: +.Pp +.Dl usbconfig -u 1 -a 2 dump_curr_config_desc +.Pp +Dump device descriptor for device on USB bus 1 at address 2: +.Pp +.Dl usbconfig -u 1 -a 2 dump_device_desc +.Pp +Program the device on USB bus 1 at address 2 to suspend, resume, power off, go into power save, or power on: +.Pp +.Dl usbconfig -u 1 -a 2 suspend +.Dl usbconfig -u 1 -a 2 resume +.Dl usbconfig -u 1 -a 2 power_off +.Dl usbconfig -u 1 -a 2 power_save +.Dl usbconfig -u 1 -a 2 power_on +.Pp +Display a list of available quirk names: +.Pp +.Dl usbconfig dump_quirk_names +.Pp .Sh SEE ALSO .Xr usb 4 diff --git a/usr.sbin/usbconfig/usbconfig.c b/usr.sbin/usbconfig/usbconfig.c index 810e183..fd701a7 100644 --- a/usr.sbin/usbconfig/usbconfig.c +++ b/usr.sbin/usbconfig/usbconfig.c @@ -1,6 +1,6 @@ /* $FreeBSD$ */ /*- - * Copyright (c) 2008 Hans Petter Selasky. All rights reserved. + * Copyright (c) 2008-2009 Hans Petter Selasky. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -81,6 +81,8 @@ struct options { uint8_t got_show_iface_driver:1; uint8_t got_remove_device_quirk:1; uint8_t got_add_device_quirk:1; + uint8_t got_remove_quirk:1; + uint8_t got_add_quirk:1; uint8_t got_dump_string:1; uint8_t got_do_request:1; }; @@ -94,6 +96,7 @@ struct token { enum { T_UNIT, T_ADDR, + T_UGEN, T_IFACE, T_SET_CONFIG, T_SET_ALT, @@ -101,6 +104,8 @@ enum { T_GET_TEMPLATE, T_ADD_DEVICE_QUIRK, T_REMOVE_DEVICE_QUIRK, + T_ADD_QUIRK, + T_REMOVE_QUIRK, T_SHOW_IFACE_DRIVER, T_DUMP_QUIRK_NAMES, T_DUMP_DEVICE_QUIRKS, @@ -124,6 +129,7 @@ static struct options options; static const struct token token[] = { {"-u", T_UNIT, 1}, {"-a", T_ADDR, 1}, + {"-d", T_UGEN, 1}, {"-i", T_IFACE, 1}, {"set_config", T_SET_CONFIG, 1}, {"set_alt", T_SET_ALT, 1}, @@ -131,6 +137,8 @@ static const struct token token[] = { {"get_template", T_GET_TEMPLATE, 0}, {"add_dev_quirk_vplh", T_ADD_DEVICE_QUIRK, 5}, {"remove_dev_quirk_vplh", T_REMOVE_DEVICE_QUIRK, 5}, + {"add_quirk", T_ADD_QUIRK, 1}, + {"remove_quirk", T_REMOVE_QUIRK, 1}, {"dump_quirk_names", T_DUMP_QUIRK_NAMES, 0}, {"dump_device_quirks", T_DUMP_DEVICE_QUIRKS, 0}, {"dump_device_desc", T_DUMP_DEVICE_DESC, 0}, @@ -247,11 +255,20 @@ get_int(const char *s) } static void +duplicate_option(const char *ptr) +{ + printf("Syntax error: " + "Duplicate option: '%s'\n", ptr); + exit(1); +} + +static void usage(void) { printf("" "usbconfig - configure the USB subsystem" "\n" "usage: usbconfig -u -a -i [cmds...]" "\n" + "usage: usbconfig -d [ugen]. -i [cmds...]" "\n" "commands:" "\n" " set_config " "\n" " set_alt " "\n" @@ -259,6 +276,8 @@ usage(void) " get_template" "\n" " add_dev_quirk_vplh " "\n" " remove_dev_quirk_vplh " "\n" + " add_quirk " "\n" + " remove_quirk " "\n" " dump_quirk_names" "\n" " dump_device_quirks" "\n" " dump_device_desc" "\n" @@ -360,25 +379,33 @@ flush_command(struct libusb20_backend *pbe, struct options *opt) } matches++; + if (opt->got_remove_quirk) { + struct LIBUSB20_DEVICE_DESC_DECODED *ddesc; + + ddesc = libusb20_dev_get_device_desc(pdev); + + be_dev_remove_quirk(pbe, + ddesc->idVendor, ddesc->idProduct, + ddesc->bcdDevice, ddesc->bcdDevice, + opt->quirkname); + } + + if (opt->got_add_quirk) { + struct LIBUSB20_DEVICE_DESC_DECODED *ddesc; + + ddesc = libusb20_dev_get_device_desc(pdev); + + be_dev_add_quirk(pbe, + ddesc->idVendor, ddesc->idProduct, + ddesc->bcdDevice, ddesc->bcdDevice, + opt->quirkname); + } + if (libusb20_dev_open(pdev, 0)) { err(1, "could not open device"); } if (opt->got_dump_string) { - char *pbuf; - - pbuf = malloc(256); - if (pbuf == NULL) { - err(1, "out of memory"); - } - if (libusb20_dev_req_string_simple_sync(pdev, - opt->string_index, pbuf, 256)) { - printf("STRING_0x%02x = \n", - opt->string_index); - } else { - printf("STRING_0x%02x = <%s>\n", - opt->string_index, pbuf); - } - free(pbuf); + dump_string_by_index(pdev, opt->string_index); } if (opt->got_do_request) { uint16_t actlen; @@ -501,6 +528,9 @@ main(int argc, char **argv) { struct libusb20_backend *pbe; struct options *opt = &options; + const char *ptr; + int unit; + int addr; int n; int t; @@ -518,6 +548,28 @@ main(int argc, char **argv) if (t > 255) t = 255; switch (get_token(argv[n], t)) { + case T_ADD_QUIRK: + if (opt->got_add_quirk) { + flush_command(pbe, opt); + } + opt->quirkname = argv[n + 1]; + n++; + + opt->got_add_quirk = 1; + opt->got_any++; + break; + + case T_REMOVE_QUIRK: + if (opt->got_remove_quirk) { + flush_command(pbe, opt); + } + opt->quirkname = argv[n + 1]; + n++; + + opt->got_remove_quirk = 1; + opt->got_any++; + break; + case T_ADD_DEVICE_QUIRK: if (opt->got_add_device_quirk) { flush_command(pbe, opt); @@ -548,11 +600,15 @@ main(int argc, char **argv) break; case T_DUMP_QUIRK_NAMES: + if (opt->got_dump_quirk_names) + duplicate_option(argv[n]); opt->got_dump_quirk_names = 1; opt->got_any++; break; case T_DUMP_DEVICE_QUIRKS: + if (opt->got_dump_device_quirks) + duplicate_option(argv[n]); opt->got_dump_device_quirks = 1; opt->got_any++; break; @@ -561,6 +617,33 @@ main(int argc, char **argv) opt->got_show_iface_driver = 1; break; + case T_UGEN: + if (opt->got_any) { + /* allow multiple commands on the same line */ + flush_command(pbe, opt); + } + ptr = argv[n + 1]; + + if ((ptr[0] == 'u') && + (ptr[1] == 'g') && + (ptr[2] == 'e') && + (ptr[3] == 'n')) + ptr += 4; + + if ((sscanf(ptr, "%d.%d", + &unit, &addr) != 2) || + (unit < 0) || (unit > 65535) || + (addr < 0) || (addr > 65535)) { + errx(1, "cannot " + "parse '%s'", argv[n + 1]); + } + opt->bus = unit; + opt->addr = addr; + opt->got_bus = 1; +; opt->got_addr = 1; + n++; + break; + case T_UNIT: if (opt->got_any) { /* allow multiple commands on the same line */ @@ -581,84 +664,112 @@ main(int argc, char **argv) n++; break; case T_SET_CONFIG: + if (opt->got_set_config) + duplicate_option(argv[n]); opt->config_index = num_id(argv[n + 1], "cfg_index"); opt->got_set_config = 1; opt->got_any++; n++; break; case T_SET_ALT: + if (opt->got_set_alt) + duplicate_option(argv[n]); opt->alt_index = num_id(argv[n + 1], "cfg_index"); opt->got_set_alt = 1; opt->got_any++; n++; break; case T_SET_TEMPLATE: + if (opt->got_set_template) + duplicate_option(argv[n]); opt->template = get_int(argv[n + 1]); opt->got_set_template = 1; opt->got_any++; n++; break; case T_GET_TEMPLATE: + if (opt->got_get_template) + duplicate_option(argv[n]); opt->got_get_template = 1; opt->got_any++; break; case T_DUMP_DEVICE_DESC: + if (opt->got_dump_device_desc) + duplicate_option(argv[n]); opt->got_dump_device_desc = 1; opt->got_any++; break; case T_DUMP_CURR_CONFIG_DESC: + if (opt->got_dump_curr_config) + duplicate_option(argv[n]); opt->got_dump_curr_config = 1; opt->got_any++; break; case T_DUMP_ALL_CONFIG_DESC: + if (opt->got_dump_all_config) + duplicate_option(argv[n]); opt->got_dump_all_config = 1; opt->got_any++; break; case T_DUMP_INFO: + if (opt->got_dump_info) + duplicate_option(argv[n]); opt->got_dump_info = 1; opt->got_any++; break; case T_DUMP_STRING: - if (opt->got_dump_string) { - flush_command(pbe, opt); - } + if (opt->got_dump_string) + duplicate_option(argv[n]); opt->string_index = num_id(argv[n + 1], "str_index"); opt->got_dump_string = 1; opt->got_any++; n++; break; case T_SUSPEND: + if (opt->got_suspend) + duplicate_option(argv[n]); opt->got_suspend = 1; opt->got_any++; break; case T_RESUME: + if (opt->got_resume) + duplicate_option(argv[n]); opt->got_resume = 1; opt->got_any++; break; case T_POWER_OFF: + if (opt->got_power_off) + duplicate_option(argv[n]); opt->got_power_off = 1; opt->got_any++; break; case T_POWER_SAVE: + if (opt->got_power_save) + duplicate_option(argv[n]); opt->got_power_save = 1; opt->got_any++; break; case T_POWER_ON: + if (opt->got_power_on) + duplicate_option(argv[n]); opt->got_power_on = 1; opt->got_any++; break; case T_RESET: + if (opt->got_reset) + duplicate_option(argv[n]); opt->got_reset = 1; opt->got_any++; break; case T_LIST: + if (opt->got_list) + duplicate_option(argv[n]); opt->got_list = 1; opt->got_any++; break; case T_DO_REQUEST: - if (opt->got_do_request) { - flush_command(pbe, opt); - } + if (opt->got_do_request) + duplicate_option(argv[n]); LIBUSB20_INIT(LIBUSB20_CONTROL_SETUP, &opt->setup); opt->setup.bmRequestType = num_id(argv[n + 1], "bmReqTyp"); opt->setup.bRequest = num_id(argv[n + 2], "bReq"); -- cgit v1.1 From 789f1d904d8b4ca27f8828baa66760ac0f46a646 Mon Sep 17 00:00:00 2001 From: thompsa Date: Sun, 17 Jan 2010 18:27:13 +0000 Subject: MFC r201922 Add missing library dependency. --- usr.sbin/usbconfig/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'usr.sbin') diff --git a/usr.sbin/usbconfig/Makefile b/usr.sbin/usbconfig/Makefile index 1cd0928..0aa51fa 100644 --- a/usr.sbin/usbconfig/Makefile +++ b/usr.sbin/usbconfig/Makefile @@ -4,6 +4,7 @@ PROG= usbconfig MAN= usbconfig.8 SRCS= usbconfig.c dump.c +DPADD+= ${LIBUSB} LDADD+= -lusb .include -- cgit v1.1 From 828edc278bcc198ab9dd18f7c525c3ffdaecdf19 Mon Sep 17 00:00:00 2001 From: thompsa Date: Sun, 17 Jan 2010 18:27:53 +0000 Subject: MFC r202026 Print error messages to stderr. --- usr.sbin/usbconfig/usbconfig.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/usbconfig/usbconfig.c b/usr.sbin/usbconfig/usbconfig.c index fd701a7..2bd3668 100644 --- a/usr.sbin/usbconfig/usbconfig.c +++ b/usr.sbin/usbconfig/usbconfig.c @@ -175,7 +175,7 @@ be_dev_remove_quirk(struct libusb20_backend *pbe, error = libusb20_be_remove_dev_quirk(pbe, &q); if (error) { - printf("Removing quirk '%s' failed, continuing.\n", str); + fprintf(stderr, "Removing quirk '%s' failed, continuing.\n", str); } return; } @@ -198,7 +198,7 @@ be_dev_add_quirk(struct libusb20_backend *pbe, error = libusb20_be_add_dev_quirk(pbe, &q); if (error) { - printf("Adding quirk '%s' failed, continuing.\n", str); + fprintf(stderr, "Adding quirk '%s' failed, continuing.\n", str); } return; } @@ -257,7 +257,7 @@ get_int(const char *s) static void duplicate_option(const char *ptr) { - printf("Syntax error: " + fprintf(stderr, "Syntax error: " "Duplicate option: '%s'\n", ptr); exit(1); } @@ -265,7 +265,7 @@ duplicate_option(const char *ptr) static void usage(void) { - printf("" + fprintf(stderr, "" "usbconfig - configure the USB subsystem" "\n" "usage: usbconfig -u -a -i [cmds...]" "\n" "usage: usbconfig -d [ugen]. -i [cmds...]" "\n" @@ -349,7 +349,7 @@ flush_command(struct libusb20_backend *pbe, struct options *opt) if (opt->got_set_template) { opt->got_any--; if (libusb20_be_set_template(pbe, opt->template)) { - printf("Setting USB template %u failed, " + fprintf(stderr, "Setting USB template %u failed, " "continuing.\n", opt->template); } } -- cgit v1.1 From 7dac57c0ec71d902040c050389e26e9d0d7f7421 Mon Sep 17 00:00:00 2001 From: delphij Date: Mon, 18 Jan 2010 04:58:14 +0000 Subject: MFC r210520: Test index value is within the range before using it to reference array member. PR: bin/141838 Submitted by: Henning Petersen --- usr.sbin/rtsold/probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/rtsold/probe.c b/usr.sbin/rtsold/probe.c index 61c47bc..6d0ea79 100644 --- a/usr.sbin/rtsold/probe.c +++ b/usr.sbin/rtsold/probe.c @@ -118,7 +118,7 @@ defrouter_probe(struct ifinfo *ifinfo) goto closeandend; } - for (i = 0; dr.defrouter[i].if_index && i < PRLSTSIZ; i++) { + for (i = 0; i < DRLSTSIZ && dr.defrouter[i].if_index; i++) { if (ifindex && dr.defrouter[i].if_index == ifindex) { /* sanity check */ if (!IN6_IS_ADDR_LINKLOCAL(&dr.defrouter[i].rtaddr)) { -- cgit v1.1 From 1cf7f0292a4c79bb9f40b9f71ec2124ad737cd91 Mon Sep 17 00:00:00 2001 From: nyan Date: Mon, 18 Jan 2010 10:55:29 +0000 Subject: MFC: revision 201392 Use UFS2 as default filesystem on pc98. Now pc98's boot2 works for UFS2. --- usr.sbin/sysinstall/label.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/sysinstall/label.c b/usr.sbin/sysinstall/label.c index 2ca947a..08cd3df 100644 --- a/usr.sbin/sysinstall/label.c +++ b/usr.sbin/sysinstall/label.c @@ -384,11 +384,7 @@ new_part(PartType type, char *mpoint, Boolean newfs) pi->newfs_data.newfs_ufs.acls = FALSE; pi->newfs_data.newfs_ufs.multilabel = FALSE; pi->newfs_data.newfs_ufs.softupdates = strcmp(mpoint, "/"); -#ifdef PC98 - pi->newfs_data.newfs_ufs.ufs1 = TRUE; -#else pi->newfs_data.newfs_ufs.ufs1 = FALSE; -#endif } return pi; -- cgit v1.1 From cb4a455b341a19a215c8366d1efe38690bbaae5e Mon Sep 17 00:00:00 2001 From: delphij Date: Wed, 20 Jan 2010 00:43:15 +0000 Subject: MFC r200795: Add support of using environment variable BURNCD_SPEED to specify recodring speed. PR: bin/140530 Submitted by: Alexander Best --- usr.sbin/burncd/burncd.8 | 8 ++++++-- usr.sbin/burncd/burncd.c | 20 +++++++++++++------- 2 files changed, 19 insertions(+), 9 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/burncd/burncd.8 b/usr.sbin/burncd/burncd.8 index 18c1bc4..2501e72 100644 --- a/usr.sbin/burncd/burncd.8 +++ b/usr.sbin/burncd/burncd.8 @@ -27,7 +27,7 @@ .\" .\" $FreeBSD$ .\" -.Dd May 2, 2005 +.Dd December 21, 2009 .Os .Dt BURNCD 8 .Sh NAME @@ -158,7 +158,11 @@ refers to stdin, and can only be used once. .Sh ENVIRONMENT The following environment variables affect the execution of .Nm : -.Bl -tag -width ".Ev CDROM" +.Bl -tag -width ".Ev BURNCD_SPEED" +.It Ev BURNCD_SPEED +The write speed to use if one is not specified with the +.Fl s +flag. .It Ev CDROM The CD device to use if one is not specified with the .Fl f diff --git a/usr.sbin/burncd/burncd.c b/usr.sbin/burncd/burncd.c index c4d1648..2931cf8 100644 --- a/usr.sbin/burncd/burncd.c +++ b/usr.sbin/burncd/burncd.c @@ -80,11 +80,13 @@ main(int argc, char **argv) int dao = 0, eject = 0, fixate = 0, list = 0, multi = 0, preemp = 0; int nogap = 0, speed = 4 * 177, test_write = 0, force = 0; int block_size = 0, block_type = 0, cdopen = 0, dvdrw = 0; - const char *dev; + const char *dev, *env_speed; if ((dev = getenv("CDROM")) == NULL) dev = "/dev/acd0"; + env_speed = getenv("BURNCD_SPEED"); + while ((ch = getopt(argc, argv, "def:Flmnpqs:tv")) != -1) { switch (ch) { case 'd': @@ -124,12 +126,7 @@ main(int argc, char **argv) break; case 's': - if (strcasecmp("max", optarg) == 0) - speed = CDR_MAX_SPEED; - else - speed = atoi(optarg) * 177; - if (speed <= 0) - errx(EX_USAGE, "Invalid speed: %s", optarg); + env_speed = optarg; break; case 't': @@ -147,6 +144,15 @@ main(int argc, char **argv) argc -= optind; argv += optind; + if (env_speed == NULL) + ; + else if (strcasecmp("max", env_speed) == 0) + speed = CDR_MAX_SPEED; + else + speed = atoi(env_speed) * 177; + if (speed <= 0) + errx(EX_USAGE, "Invalid speed: %s", optarg); + if (argc == 0) usage(); -- cgit v1.1 From df84beebd6755b924add8db85c6f21e02174cce8 Mon Sep 17 00:00:00 2001 From: flz Date: Fri, 22 Jan 2010 23:13:46 +0000 Subject: Synchronize pkg_install with HEAD. --- usr.sbin/pkg_install/add/main.c | 2 +- usr.sbin/pkg_install/create/main.c | 2 +- usr.sbin/pkg_install/delete/Makefile | 1 - usr.sbin/pkg_install/delete/main.c | 2 +- usr.sbin/pkg_install/info/Makefile | 1 - usr.sbin/pkg_install/info/main.c | 2 +- usr.sbin/pkg_install/lib/lib.h | 12 ++++++------ usr.sbin/pkg_install/updating/Makefile | 1 - usr.sbin/pkg_install/version/Makefile | 1 - usr.sbin/pkg_install/version/main.c | 2 +- 10 files changed, 11 insertions(+), 15 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/pkg_install/add/main.c b/usr.sbin/pkg_install/add/main.c index eaed0c0..d9b381b 100644 --- a/usr.sbin/pkg_install/add/main.c +++ b/usr.sbin/pkg_install/add/main.c @@ -344,7 +344,7 @@ getpackagesite(void) } static void -usage() +usage(void) { fprintf(stderr, "%s\n%s\n", "usage: pkg_add [-viInfFrRMSK] [-t template] [-p prefix] [-P prefix] [-C chrootdir]", diff --git a/usr.sbin/pkg_install/create/main.c b/usr.sbin/pkg_install/create/main.c index b98b21e..d85392d 100644 --- a/usr.sbin/pkg_install/create/main.c +++ b/usr.sbin/pkg_install/create/main.c @@ -249,7 +249,7 @@ main(int argc, char **argv) } static void -usage() +usage(void) { fprintf(stderr, "%s\n%s\n%s\n%s\n%s\n%s\n%s\n", "usage: pkg_create [-YNOhjnvyz] [-C conflicts] [-P pkgs] [-p prefix]", diff --git a/usr.sbin/pkg_install/delete/Makefile b/usr.sbin/pkg_install/delete/Makefile index c346ea7..c9a0fde 100644 --- a/usr.sbin/pkg_install/delete/Makefile +++ b/usr.sbin/pkg_install/delete/Makefile @@ -5,7 +5,6 @@ SRCS= main.c perform.c CFLAGS+= -I${.CURDIR}/../lib -WARNS?= 6 WFORMAT?= 1 DPADD= ${LIBINSTALL} ${LIBMD} diff --git a/usr.sbin/pkg_install/delete/main.c b/usr.sbin/pkg_install/delete/main.c index 6075f72..f09a432 100644 --- a/usr.sbin/pkg_install/delete/main.c +++ b/usr.sbin/pkg_install/delete/main.c @@ -170,7 +170,7 @@ main(int argc, char **argv) } static void -usage() +usage(void) { fprintf(stderr, "%s\n%s\n", "usage: pkg_delete [-dDfGinrvxX] [-p prefix] pkg-name ...", diff --git a/usr.sbin/pkg_install/info/Makefile b/usr.sbin/pkg_install/info/Makefile index 675f7ca..485cb22 100644 --- a/usr.sbin/pkg_install/info/Makefile +++ b/usr.sbin/pkg_install/info/Makefile @@ -5,7 +5,6 @@ SRCS= main.c perform.c show.c CFLAGS+= -I${.CURDIR}/../lib -WARNS?= 6 WFORMAT?= 1 DPADD= ${LIBINSTALL} ${LIBFETCH} ${LIBMD} diff --git a/usr.sbin/pkg_install/info/main.c b/usr.sbin/pkg_install/info/main.c index 08ab23b..2de638e 100644 --- a/usr.sbin/pkg_install/info/main.c +++ b/usr.sbin/pkg_install/info/main.c @@ -282,7 +282,7 @@ main(int argc, char **argv) } static void -usage() +usage(void) { fprintf(stderr, "%s\n%s\n%s\n%s\n%s\n", "usage: pkg_info [-bcdDEfgGiIjkKLmopPqQrRsvVxX] [-e package] [-l prefix]", diff --git a/usr.sbin/pkg_install/lib/lib.h b/usr.sbin/pkg_install/lib/lib.h index acbd017..a3d0413 100644 --- a/usr.sbin/pkg_install/lib/lib.h +++ b/usr.sbin/pkg_install/lib/lib.h @@ -84,14 +84,14 @@ #define DISPLAY_FNAME "+DISPLAY" #define MTREE_FNAME "+MTREE_DIRS" -#if defined(__FreeBSD_version) && __FreeBSD_version >= 800000 +#if defined(__FreeBSD_version) && __FreeBSD_version >= 900000 +#define INDEX_FNAME "INDEX-9" +#elif defined(__FreeBSD_version) && __FreeBSD_version >= 800000 #define INDEX_FNAME "INDEX-8" #elif defined(__FreeBSD_version) && __FreeBSD_version >= 700000 #define INDEX_FNAME "INDEX-7" #elif defined(__FreeBSD_version) && __FreeBSD_version >= 600000 #define INDEX_FNAME "INDEX-6" -#elif defined(__FreeBSD_version) && __FreeBSD_version >= 500036 -#define INDEX_FNAME "INDEX-5" #else #define INDEX_FNAME "INDEX" #endif @@ -102,10 +102,10 @@ #define PKG_PREFIX_VNAME "PKG_PREFIX" /* - * Version of the package tools - increase only when some - * functionality used by bsd.port.mk is changed, added or removed + * Version of the package tools - increase whenever you make a change + * in the code that is not cosmetic only. */ -#define PKG_INSTALL_VERSION 20090519 +#define PKG_INSTALL_VERSION 20090902 #define PKG_WRAPCONF_FNAME "/var/db/pkg_install.conf" #define main(argc, argv) real_main(argc, argv) diff --git a/usr.sbin/pkg_install/updating/Makefile b/usr.sbin/pkg_install/updating/Makefile index cf8bfc8..b0d3689 100644 --- a/usr.sbin/pkg_install/updating/Makefile +++ b/usr.sbin/pkg_install/updating/Makefile @@ -5,7 +5,6 @@ SRCS= main.c CFLAGS+= -I${.CURDIR}/../lib -WARNS?= 6 WFORMAT?= 1 DPADD= ${LIBINSTALL} ${LIBFETCH} ${LIBMD} diff --git a/usr.sbin/pkg_install/version/Makefile b/usr.sbin/pkg_install/version/Makefile index 71168ee..3e1d7a5 100644 --- a/usr.sbin/pkg_install/version/Makefile +++ b/usr.sbin/pkg_install/version/Makefile @@ -5,7 +5,6 @@ SRCS= main.c perform.c CFLAGS+= -I${.CURDIR}/../lib -WARNS?= 6 WFORMAT?= 1 DPADD= ${LIBINSTALL} ${LIBFETCH} ${LIBMD} diff --git a/usr.sbin/pkg_install/version/main.c b/usr.sbin/pkg_install/version/main.c index f46d945..cad8583 100644 --- a/usr.sbin/pkg_install/version/main.c +++ b/usr.sbin/pkg_install/version/main.c @@ -127,7 +127,7 @@ main(int argc, char **argv) } static void -usage() +usage(void) { fprintf(stderr, "%s\n%s\n%s\n", "usage: pkg_version [-hIoqv] [-l limchar] [-L limchar] [[-X] -s string] [-O origin] [index]", -- cgit v1.1 From 6ce2179f0194bba955e9fc269c00825c2a1d32db Mon Sep 17 00:00:00 2001 From: delphij Date: Sat, 23 Jan 2010 00:52:32 +0000 Subject: MFC r202709: Give the right value when complaining it being wrong. Reported by: danfe --- usr.sbin/burncd/burncd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/burncd/burncd.c b/usr.sbin/burncd/burncd.c index 2931cf8..43a0a09 100644 --- a/usr.sbin/burncd/burncd.c +++ b/usr.sbin/burncd/burncd.c @@ -151,7 +151,7 @@ main(int argc, char **argv) else speed = atoi(env_speed) * 177; if (speed <= 0) - errx(EX_USAGE, "Invalid speed: %s", optarg); + errx(EX_USAGE, "Invalid speed: %s", env_speed); if (argc == 0) usage(); -- cgit v1.1 From 09676aea9b40baf57b26a0872995575cef72eed3 Mon Sep 17 00:00:00 2001 From: bz Date: Sat, 23 Jan 2010 16:40:35 +0000 Subject: MFC r202468: Add ip4.saddrsel/ip4.nosaddrsel (and equivalent for ip6) to control whether to use source address selection (default) or the primary jail address for unbound outgoing connections. This is intended to be used by people upgrading from single-IP jails to multi-IP jails but not having to change firewall rules, application ACLs, ... but to force their connections (unless otherwise changed) to the primry jail IP they had been used for years, as well as for people prefering to implement similar policies. Note that for IPv6, if configured incorrectly, this might lead to scope violations, which single-IPv6 jails could as well, as by the design of jails. [1] Reviewed by: jamie, hrs (ipv6 part) Pointed out by: hrs [1] --- usr.sbin/jail/jail.8 | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/jail/jail.8 b/usr.sbin/jail/jail.8 index 4c1a963..f09157f 100644 --- a/usr.sbin/jail/jail.8 +++ b/usr.sbin/jail/jail.8 @@ -34,7 +34,7 @@ .\" .\" $FreeBSD$ .\" -.Dd October 18, 2009 +.Dd January 17, 2010 .Dt JAIL 8 .Os .Sh NAME @@ -252,6 +252,13 @@ match. It is only possible to start multiple jails with the same IP address, if none of the jails has more than this single overlapping IP address assigned to itself. +.It Va ip4.saddrsel +A boolean option to change the formerly mentioned behaviour and disable +IPv4 source address selection for the prison in favour of the primary +IPv4 address of the jail. +Source address selection is enabled by default for all jails and a +.Va ip4.nosaddrsel +setting of a parent jail is not inherited for any child jails. .It Va ip4 Control the availablity of IPv4 addresses. Possible values are @@ -267,9 +274,10 @@ Setting the .Va ip4.addr parameter implies a value of .Dq new . -.It Va ip6.addr , Va ip6 -A list of IPv6 addresses assigned to the prison, the counterpart to -.Va ip4.addr +.It Va ip6.addr , Va ip6.saddrsel , Va ip6 +A set of IPv6 options for the prison, the counterparts to +.Va ip4.addr , +.Va ip4.saddrsel and .Va ip4 above. -- cgit v1.1 From 9fefaa1da5ac95af3adaeb90fa4952d512222d64 Mon Sep 17 00:00:00 2001 From: kib Date: Sun, 24 Jan 2010 12:35:36 +0000 Subject: Merge scandir(3) interface update to stable/8. MFC r201512: Modernize scandir(3) and alphasort(3) interfaces according to the IEEE Std 1003.1-2008. MFC r201602: Move scandir(3) and alphasort(3) into XSI namespace. MFC r201604: Use thunks to adapt alphasort-like interface to the comparision function required by qsort() and qsort_r(). MFC r202556 (by ache): Use strcoll() in opendir() and alphasort(). Remove some comments. MFC r202572 (by ache): Revert to using strcmp() for opendir(). MFC r202677 (by ache): Style. MFC r202679 (by ache): Style: rename internal function to opendir_compar(). MFC r202691 (by ache): For alphasort(3) add reference to strcoll(3). MFC r202693 (by ache): Style: reword comment. --- usr.sbin/lpr/common_source/lp.h | 2 +- usr.sbin/lpr/common_source/rmjob.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/lpr/common_source/lp.h b/usr.sbin/lpr/common_source/lp.h index 891ed2f..63f2ff1 100644 --- a/usr.sbin/lpr/common_source/lp.h +++ b/usr.sbin/lpr/common_source/lp.h @@ -280,7 +280,7 @@ void inform(const struct printer *_pp, char *_cf); void init_printer(struct printer *_pp); void init_request(struct request *_rp); int inlist(char *_uname, char *_cfile); -int iscf(struct dirent *_d); +int iscf(const struct dirent *_d); void ldump(const char *_nfile, const char *_datafile, int _copies); void lastprinter(void); int lockchk(struct printer *_pp, char *_slockf); diff --git a/usr.sbin/lpr/common_source/rmjob.c b/usr.sbin/lpr/common_source/rmjob.c index d6fd614..33fcdac 100644 --- a/usr.sbin/lpr/common_source/rmjob.c +++ b/usr.sbin/lpr/common_source/rmjob.c @@ -384,7 +384,7 @@ rmremote(const struct printer *pp) * Return 1 if the filename begins with 'cf' */ int -iscf(struct dirent *d) +iscf(const struct dirent *d) { return(d->d_name[0] == 'c' && d->d_name[1] == 'f'); } -- cgit v1.1 From 9b55384fa0f5a69cc3fd2170dce5224916b1eb9e Mon Sep 17 00:00:00 2001 From: dougb Date: Tue, 26 Jan 2010 18:24:13 +0000 Subject: MFC r202817: Make -U once again honor -D after my change to consolidate setting of MTREEDB with DESTDIR. PR: bin/143089 Submitted by: Anton Yuzhaninov --- usr.sbin/mergemaster/mergemaster.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/mergemaster/mergemaster.sh b/usr.sbin/mergemaster/mergemaster.sh index 1b0753b..d21d80b 100755 --- a/usr.sbin/mergemaster/mergemaster.sh +++ b/usr.sbin/mergemaster/mergemaster.sh @@ -261,11 +261,6 @@ if [ -r "$HOME/.mergemasterrc" ]; then . "$HOME/.mergemasterrc" fi -# Assign the location of the mtree database -# -MTREEDB=${MTREEDB:-${DESTDIR}/var/db} -MTREEFILE="${MTREEDB}/mergemaster.mtree" - # Check the command line options # while getopts ":ascrvhipCPm:t:du:w:D:A:FU" COMMAND_LINE_ARGUMENT ; do @@ -342,6 +337,11 @@ while getopts ":ascrvhipCPm:t:du:w:D:A:FU" COMMAND_LINE_ARGUMENT ; do esac done +# Assign the location of the mtree database +# +MTREEDB=${MTREEDB:-${DESTDIR}/var/db} +MTREEFILE="${MTREEDB}/mergemaster.mtree" + # Don't force the user to set this in the mergemaster rc file if [ -n "${PRESERVE_FILES}" -a -z "${PRESERVE_FILES_DIR}" ]; then PRESERVE_FILES_DIR=/var/tmp/mergemaster/preserved-files-`date +%y%m%d-%H%M%S` -- cgit v1.1 From 880dc001b5df23d619456a7c32792c8e0fdb483a Mon Sep 17 00:00:00 2001 From: delphij Date: Thu, 28 Jan 2010 02:33:20 +0000 Subject: MFC r200135: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make umount(8) WARNS=6 clean: - Cast delimiter width to integer [1] - Solve name conflicts against system header - Constify parameters to avoid qualifier conflict PR: bin/140017 [1] Submitted by: Ulrich Spörlein [1] Sponsored by: iXsystems, Inc --- usr.sbin/rpc.umntall/mounttab.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/rpc.umntall/mounttab.c b/usr.sbin/rpc.umntall/mounttab.c index 9d7fe2f..9765f4f 100644 --- a/usr.sbin/rpc.umntall/mounttab.c +++ b/usr.sbin/rpc.umntall/mounttab.c @@ -45,7 +45,7 @@ __FBSDID("$FreeBSD$"); struct mtablist *mtabhead; -static void badline(char *field, char *bad); +static void badline(const char *field, const char *bad); /* * Add an entry to PATH_MOUNTTAB for each mounted NFS filesystem, @@ -69,12 +69,12 @@ add_mtab(char *hostp, char *dirp) { * Read mounttab line for line and return struct mtablist. */ int -read_mtab() { +read_mtab(void) { struct mtablist **mtabpp, *mtabp; char *hostp, *dirp, *cp; char str[STRSIZ]; char *timep, *endp; - time_t time; + time_t actiontime; u_long ultmp; FILE *mtabfile; @@ -86,7 +86,7 @@ read_mtab() { return (0); } } - time = 0; + actiontime = 0; mtabpp = &mtabhead; while (fgets(str, STRSIZ, mtabfile) != NULL) { cp = str; @@ -113,13 +113,13 @@ read_mtab() { badline("time", timep); continue; } - time = ultmp; + actiontime = ultmp; if ((mtabp = malloc(sizeof (struct mtablist))) == NULL) { syslog(LOG_ERR, "malloc"); fclose(mtabfile); return (0); } - mtabp->mtab_time = time; + mtabp->mtab_time = actiontime; memmove(mtabp->mtab_host, hostp, MNTNAMLEN); mtabp->mtab_host[MNTNAMLEN - 1] = '\0'; memmove(mtabp->mtab_dirp, dirp, MNTPATHLEN); @@ -218,7 +218,7 @@ free_mtab() { * Print bad lines to syslog. */ static void -badline(char *field, char *bad) { +badline(const char *field, const char *bad) { syslog(LOG_ERR, "bad mounttab %s field '%s'", field, (bad == NULL) ? "" : bad); } -- cgit v1.1 From ade3e629421f12a6799785d918a77632dfae157a Mon Sep 17 00:00:00 2001 From: antoine Date: Sat, 30 Jan 2010 12:11:21 +0000 Subject: MFC r201145 to stable/8: (S)LIST_HEAD_INITIALIZER takes a (S)LIST_HEAD as an argument. Fix some wrong usages. Note: this does not affect generated binaries as this argument is not used. PR: 137213 Submitted by: Eygene Ryabinkin (initial version) --- usr.sbin/cpucontrol/cpucontrol.c | 2 +- usr.sbin/pmcstat/pmcstat_log.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/cpucontrol/cpucontrol.c b/usr.sbin/cpucontrol/cpucontrol.c index 8b3ca1d..b0ef9a3 100644 --- a/usr.sbin/cpucontrol/cpucontrol.c +++ b/usr.sbin/cpucontrol/cpucontrol.c @@ -83,7 +83,7 @@ struct datadir { const char *path; SLIST_ENTRY(datadir) next; }; -static SLIST_HEAD(, datadir) datadirs = SLIST_HEAD_INITIALIZER(&datadirs); +static SLIST_HEAD(, datadir) datadirs = SLIST_HEAD_INITIALIZER(datadirs); struct ucode_handler { ucode_probe_t *probe; diff --git a/usr.sbin/pmcstat/pmcstat_log.c b/usr.sbin/pmcstat/pmcstat_log.c index 9814126..a403852 100644 --- a/usr.sbin/pmcstat/pmcstat_log.c +++ b/usr.sbin/pmcstat/pmcstat_log.c @@ -141,7 +141,7 @@ struct pmcstat_pmcrecord { }; static LIST_HEAD(,pmcstat_pmcrecord) pmcstat_pmcs = - LIST_HEAD_INITIALIZER(&pmcstat_pmcs); + LIST_HEAD_INITIALIZER(pmcstat_pmcs); /* -- cgit v1.1 From 8f8fe6b19ecf4a741ffec800090a762d8fca0815 Mon Sep 17 00:00:00 2001 From: antoine Date: Sat, 30 Jan 2010 15:40:00 +0000 Subject: MFC r202440 to stable/8: Unbreak world WITHOUT_NETGRAPH. PR: 137487 Submitted by: bf (previous version) No objections: net@ --- usr.sbin/ppp/Makefile | 3 +++ 1 file changed, 3 insertions(+) (limited to 'usr.sbin') diff --git a/usr.sbin/ppp/Makefile b/usr.sbin/ppp/Makefile index e8ae0e3..11003d7 100644 --- a/usr.sbin/ppp/Makefile +++ b/usr.sbin/ppp/Makefile @@ -25,6 +25,9 @@ PPP_NO_SUID= .if ${MK_ATM} == "no" PPP_NO_ATM= .endif +.if ${MK_NETGRAPH} == "no" +PPP_NO_NETGRAPH= +.endif .if ${MK_PAM_SUPPORT} == "no" PPP_NO_PAM= .endif -- cgit v1.1 From 9aeeda725e7b4847e54934c695b8f815f2b4d060 Mon Sep 17 00:00:00 2001 From: jhb Date: Tue, 2 Feb 2010 18:50:02 +0000 Subject: MFC 203032: Don't pop up the menu to select a documentation language for non-interactive installs. Default to not installing any documentation in that case. --- usr.sbin/sysinstall/dist.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'usr.sbin') diff --git a/usr.sbin/sysinstall/dist.c b/usr.sbin/sysinstall/dist.c index 3c84a1d..cf5566f 100644 --- a/usr.sbin/sysinstall/dist.c +++ b/usr.sbin/sysinstall/dist.c @@ -783,6 +783,10 @@ distSetDoc(dialogMenuItem *self) { int i; + /* Assume no docs for non-interactive installs. */ + if (variable_get(VAR_NONINTERACTIVE)) + return DITEM_SUCCESS | DITEM_RESTORE; + dialog_clear_norefresh(); if (!dmenuOpenSimple(&MenuDocInstall, FALSE)) i = DITEM_FAILURE; -- cgit v1.1 From 4a32e61c7194d6066d53ab2352ca5967efc78173 Mon Sep 17 00:00:00 2001 From: delphij Date: Wed, 3 Feb 2010 18:42:14 +0000 Subject: MFC r202668+r200806: Don't consider non-existence of a PID file an error, we should be able to proceed anyway as this most likely mean that the process has been terminated. [1] Add a new option, -P, which reverts newsyslog(8) to the old behavior, which stops to proceed further, as it is possible that processes which fails to create PID file get screwed by rotation. [2] PR: bin/140397 Submitted by: Dan Lukes [1] Requested by: stas [2] --- usr.sbin/newsyslog/newsyslog.8 | 8 ++++++-- usr.sbin/newsyslog/newsyslog.c | 21 ++++++++++++++++++--- 2 files changed, 24 insertions(+), 5 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/newsyslog/newsyslog.8 b/usr.sbin/newsyslog/newsyslog.8 index 3db563f..06714aa 100644 --- a/usr.sbin/newsyslog/newsyslog.8 +++ b/usr.sbin/newsyslog/newsyslog.8 @@ -17,7 +17,7 @@ .\" the suitability of this software for any purpose. It is .\" provided "as is" without express or implied warranty. .\" -.Dd February 24, 2005 +.Dd January 19, 2010 .Dt NEWSYSLOG 8 .Os .Sh NAME @@ -25,7 +25,7 @@ .Nd maintain system log files to manageable sizes .Sh SYNOPSIS .Nm -.Op Fl CFNnrsv +.Op Fl CFNPnrsv .Op Fl R Ar tagname .Op Fl a Ar directory .Op Fl d Ar directory @@ -169,6 +169,10 @@ This option is intended to be used with the or .Fl CC options when creating log files is the only objective. +.It Fl P +Prevent further action if we should send signal but the +.Dq pidfile +is empty or does not exist. .It Fl R Ar tagname Specify that .Nm diff --git a/usr.sbin/newsyslog/newsyslog.c b/usr.sbin/newsyslog/newsyslog.c index ef74f23..4cadda4 100644 --- a/usr.sbin/newsyslog/newsyslog.c +++ b/usr.sbin/newsyslog/newsyslog.c @@ -167,6 +167,7 @@ int needroot = 1; /* Root privs are necessary */ int noaction = 0; /* Don't do anything, just show it */ int norotate = 0; /* Don't rotate */ int nosignal; /* Do not send any signals */ +int enforcepid = 0; /* If PID file does not exist or empty, do nothing */ int force = 0; /* Force the trim no matter what */ int rotatereq = 0; /* -R = Always rotate the file(s) as given */ /* on the command (this also requires */ @@ -580,7 +581,7 @@ parse_args(int argc, char **argv) *p = '\0'; /* Parse command line options. */ - while ((ch = getopt(argc, argv, "a:d:f:nrsvCD:FNR:")) != -1) + while ((ch = getopt(argc, argv, "a:d:f:nrsvCD:FNPR:")) != -1) switch (ch) { case 'a': archtodir++; @@ -624,6 +625,9 @@ parse_args(int argc, char **argv) case 'N': norotate++; break; + case 'P': + enforcepid++; + break; case 'R': rotatereq++; requestor = strdup(optarg); @@ -1779,7 +1783,18 @@ set_swpid(struct sigwork_entry *swork, const struct conf_entry *ent) f = fopen(ent->pid_file, "r"); if (f == NULL) { - warn("can't open pid file: %s", ent->pid_file); + if (errno == ENOENT && enforcepid == 0) { + /* + * Warn if the PID file doesn't exist, but do + * not consider it an error. Most likely it + * means the process has been terminated, + * so it should be safe to rotate any log + * files that the process would have been using. + */ + swork->sw_pidok = 1; + warnx("pid file doesn't exist: %s", ent->pid_file); + } else + warn("can't open pid file: %s", ent->pid_file); return; } @@ -1790,7 +1805,7 @@ set_swpid(struct sigwork_entry *swork, const struct conf_entry *ent) * has terminated, so it should be safe to rotate any * log files that the process would have been using. */ - if (feof(f)) { + if (feof(f) && enforcepid == 0) { swork->sw_pidok = 1; warnx("pid file is empty: %s", ent->pid_file); } else -- cgit v1.1 From 63ac3cf8c47ea847e5eb490a164494f9a7eedbb7 Mon Sep 17 00:00:00 2001 From: avg Date: Sat, 6 Feb 2010 12:03:25 +0000 Subject: MFC r197104,197105,197106,197107,197688,198237,199337,199338,200553,200554, 202771,202773: bring acpica version to 20100121 MFC details: r197104 | jkim | 2009-09-12 01:48:53 +0300 (Sat, 12 Sep 2009) | 4 lines MFV: r196804 Import ACPICA 20090903 r197105 | jkim | 2009-09-12 01:49:34 +0300 (Sat, 12 Sep 2009) | 2 lines Catch up with ACPICA 20090903. r197106 | jkim | 2009-09-12 01:50:15 +0300 (Sat, 12 Sep 2009) | 2 lines Catch up with ACPICA 20090903. r197107 | jkim | 2009-09-12 01:56:08 +0300 (Sat, 12 Sep 2009) | 2 lines Canonify include paths for newly added files. r197688 | jkim | 2009-10-01 23:56:15 +0300 (Thu, 01 Oct 2009) | 4 lines Compile ACPI debugger and disassembler for kernel modules unconditionally. These files will generate almost empty object files without ACPI_DEBUG/DDB options. As a result, size of acpi.ko will increase slightly. r198237 | jkim | 2009-10-19 19:12:58 +0300 (Mon, 19 Oct 2009) | 2 lines Merge ACPICA 20091013. r199337 | jkim | 2009-11-16 23:47:12 +0200 (Mon, 16 Nov 2009) | 2 lines Merge ACPICA 20091112. r199338 | jkim | 2009-11-16 23:53:56 +0200 (Mon, 16 Nov 2009) | 2 lines Add a forgotten module Makefile change from the previous commit. r200553 | jkim | 2009-12-15 00:24:04 +0200 (Tue, 15 Dec 2009) | 2 lines Merge ACPICA 20091214. r200554 | jkim | 2009-12-15 00:28:32 +0200 (Tue, 15 Dec 2009) | 3 lines Remove _FDE quirk handling as these quirks are automatically repaired by ACPICA layer since ACPICA 20091214. r202771 | jkim | 2010-01-21 23:14:28 +0200 (Thu, 21 Jan 2010) | 2 lines Merge ACPICA 20100121. r202773 | jkim | 2010-01-21 23:31:39 +0200 (Thu, 21 Jan 2010) | 2 lines Fix a new header inclusion. Discussed with: jkim, jhb No objections from: acpi@ --- usr.sbin/acpi/acpidb/Makefile | 10 +++++----- usr.sbin/acpi/acpidb/acpidb.c | 14 +++++++------- usr.sbin/acpi/iasl/Makefile | 3 ++- 3 files changed, 14 insertions(+), 13 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/acpi/acpidb/Makefile b/usr.sbin/acpi/acpidb/Makefile index 1694867..5981f38 100644 --- a/usr.sbin/acpi/acpidb/Makefile +++ b/usr.sbin/acpi/acpidb/Makefile @@ -38,8 +38,8 @@ SRCS+= psargs.c psloop.c psopcode.c psparse.c psscope.c \ # namespace SRCS+= nsaccess.c nsalloc.c nsdump.c nseval.c nsinit.c \ nsload.c nsnames.c nsobject.c nsparse.c nspredef.c \ - nssearch.c nsutils.c nswalk.c nsxfeval.c nsxfname.c \ - nsxfobj.c + nsrepair.c nsrepair2.c nssearch.c nsutils.c nswalk.c \ + nsxfeval.c nsxfname.c nsxfobj.c # resources SRCS+= rsaddr.c rscalc.c rscreate.c rsdump.c rsinfo.c \ @@ -52,9 +52,9 @@ SRCS+= tbfadt.c tbfind.c tbinstal.c tbutils.c tbxface.c \ # utilities SRCS+= utalloc.c utcache.c utcopy.c utdebug.c utdelete.c \ - uteval.c utglobal.c utinit.c utlock.c utmath.c utmisc.c \ - utmutex.c utobject.c utresrc.c utstate.c uttrack.c \ - utxface.c + uteval.c utglobal.c utids.c utinit.c utlock.c utmath.c \ + utmisc.c utmutex.c utobject.c utresrc.c utstate.c \ + uttrack.c utxface.c MAN= acpidb.8 WARNS?= 2 diff --git a/usr.sbin/acpi/acpidb/acpidb.c b/usr.sbin/acpi/acpidb/acpidb.c index 53d9db1..325dfae 100644 --- a/usr.sbin/acpi/acpidb/acpidb.c +++ b/usr.sbin/acpi/acpidb/acpidb.c @@ -81,7 +81,7 @@ static int aml_simulate_regcontent_read(int regtype, static int aml_simulate_regcontent_write(int regtype, ACPI_PHYSICAL_ADDRESS addr, UINT8 *valuep); -static ACPI_INTEGER aml_simulate_prompt(char *msg, ACPI_INTEGER def_val); +static UINT64 aml_simulate_prompt(char *msg, UINT64 def_val); static void aml_simulation_regload(const char *dumpfile); static void aml_simulation_regdump(const char *dumpfile); @@ -161,11 +161,11 @@ aml_simulate_regcontent_write(int regtype, ACPI_PHYSICAL_ADDRESS addr, UINT8 *va return (aml_simulate_regcontent_add(regtype, addr, *valuep)); } -static ACPI_INTEGER -aml_simulate_prompt(char *msg, ACPI_INTEGER def_val) +static UINT64 +aml_simulate_prompt(char *msg, UINT64 def_val) { char buf[16], *ep; - ACPI_INTEGER val; + UINT64 val; val = def_val; printf("DEBUG"); @@ -271,12 +271,12 @@ aml_vm_space_handler( UINT32 Function, ACPI_PHYSICAL_ADDRESS Address, UINT32 BitWidth, - ACPI_INTEGER *Value, + UINT64 *Value, int Prompt) { int state; UINT8 val; - ACPI_INTEGER value, i; + UINT64 value, i; char msg[256]; static const char *space_names[] = { "SYSTEM_MEMORY", "SYSTEM_IO", "PCI_CONFIG", @@ -336,7 +336,7 @@ aml_vm_space_handler_##name ( \ UINT32 Function, \ ACPI_PHYSICAL_ADDRESS Address, \ UINT32 BitWidth, \ - ACPI_INTEGER *Value) \ + UINT64 *Value) \ { \ return (aml_vm_space_handler(id, Function, Address, \ BitWidth, Value, aml_debug_prompt)); \ diff --git a/usr.sbin/acpi/iasl/Makefile b/usr.sbin/acpi/iasl/Makefile index 551bfc6..ff6ebd7 100644 --- a/usr.sbin/acpi/iasl/Makefile +++ b/usr.sbin/acpi/iasl/Makefile @@ -5,7 +5,8 @@ SRCS= adfile.c adisasm.c adwalk.c SRCS+= osunixxf.c # common -SRCS+= dmrestag.c dmtable.c dmtbdump.c dmtbinfo.c getopt.c +SRCS+= dmextern.c dmrestag.c dmtable.c dmtbdump.c dmtbinfo.c \ + getopt.c # compiler SRCS+= aslanalyze.c aslcodegen.c aslcompile.c aslcompiler.y.h \ -- cgit v1.1 From c93877c1bb1c338a7014ffd155638f1efba80362 Mon Sep 17 00:00:00 2001 From: ume Date: Sat, 13 Feb 2010 16:25:33 +0000 Subject: MFC r203378: Make -a option actually work. --- usr.sbin/rtsold/rtsold.c | 1 - 1 file changed, 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/rtsold/rtsold.c b/usr.sbin/rtsold/rtsold.c index f0493fbd..fd08954 100644 --- a/usr.sbin/rtsold/rtsold.c +++ b/usr.sbin/rtsold/rtsold.c @@ -841,7 +841,6 @@ autoifprobe(void) if (!argv[n]) err(1, "malloc"); n++; - argv[n] = NULL; } if (n) { -- cgit v1.1 From b1b494074aed19230e17a74f24a84023336c7b98 Mon Sep 17 00:00:00 2001 From: ume Date: Sat, 13 Feb 2010 16:28:25 +0000 Subject: MFC r203387: Exclude the interfaces which IPv6 and/or accepting RA is disabled from the auto probed interface list. --- usr.sbin/rtsold/rtsold.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/rtsold/rtsold.c b/usr.sbin/rtsold/rtsold.c index fd08954..b2031c8 100644 --- a/usr.sbin/rtsold/rtsold.c +++ b/usr.sbin/rtsold/rtsold.c @@ -32,15 +32,20 @@ */ #include +#include #include #include #include #include #include +#include #include #include +#include + +#include #include #include @@ -789,8 +794,9 @@ autoifprobe(void) static char **argv = NULL; static int n = 0; char **a; - int i, found; + int s, i, found; struct ifaddrs *ifap, *ifa, *target; + struct in6_ndireq nd; /* initialize */ while (n--) @@ -804,6 +810,11 @@ autoifprobe(void) if (getifaddrs(&ifap) != 0) return NULL; + if (!Fflag && (s = socket(AF_INET6, SOCK_DGRAM, 0)) < 0) { + err(1, "socket"); + /* NOTREACHED */ + } + target = NULL; /* find an ethernet */ for (ifa = ifap; ifa; ifa = ifa->ifa_next) { @@ -829,6 +840,23 @@ autoifprobe(void) if (found) continue; + /* + * Skip the interfaces which IPv6 and/or accepting RA + * is disabled. + */ + if (!Fflag) { + memset(&nd, 0, sizeof(nd)); + strlcpy(nd.ifname, ifa->ifa_name, sizeof(nd.ifname)); + if (ioctl(s, SIOCGIFINFO_IN6, (caddr_t)&nd) < 0) { + err(1, "ioctl(SIOCGIFINFO_IN6)"); + /* NOTREACHED */ + } + if ((nd.ndi.flags & ND6_IFF_IFDISABLED)) + continue; + if (!(nd.ndi.flags & ND6_IFF_ACCEPT_RTADV)) + continue; + } + /* if we find multiple candidates, just warn. */ if (n != 0 && dflag > 1) warnx("multiple interfaces found"); @@ -855,6 +883,8 @@ autoifprobe(void) warnx("probing %s", argv[i]); } } + if (!Fflag) + close(s); freeifaddrs(ifap); return argv; } -- cgit v1.1 From 3b412f120a5ecda5b055bc54454cf9ebc9f86ba8 Mon Sep 17 00:00:00 2001 From: brucec Date: Thu, 18 Feb 2010 10:48:37 +0000 Subject: MFC r203690: Xorg isn't treated as a distribution, so /usr/X11R6/lib shouldn't be configured when running ldconfig. PR: bin/138945 Approved by: rrs (mentor) --- usr.sbin/sysinstall/package.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/sysinstall/package.c b/usr.sbin/sysinstall/package.c index 212b654..9aa4589 100644 --- a/usr.sbin/sysinstall/package.c +++ b/usr.sbin/sysinstall/package.c @@ -139,7 +139,7 @@ package_extract(Device *dev, char *name, Boolean depended) /* If necessary, initialize the ldconfig hints */ if (!file_readable("/var/run/ld-elf.so.hints")) - vsystem("ldconfig /usr/lib /usr/lib/compat /usr/local/lib /usr/X11R6/lib"); + vsystem("ldconfig /usr/lib /usr/lib/compat /usr/local/lib"); /* Be initially optimistic */ ret = DITEM_SUCCESS; -- cgit v1.1 From d499b4f50e5e556508b5bde30542f8ddc7f222bc Mon Sep 17 00:00:00 2001 From: ru Date: Mon, 22 Feb 2010 16:00:55 +0000 Subject: MFC: r203919: Show when an ARP entry expires (now that this info cannot be obtained with netstat(1)). --- usr.sbin/arp/arp.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/arp/arp.c b/usr.sbin/arp/arp.c index 61427d3..a0e228c 100644 --- a/usr.sbin/arp/arp.c +++ b/usr.sbin/arp/arp.c @@ -101,7 +101,8 @@ static int valid_type(int type); static int nflag; /* no reverse dns lookups */ static char *rifname; -static int expire_time, flags, doing_proxy, proxy_only; +static time_t expire_time; +static int flags, doing_proxy, proxy_only; /* which function we're supposed to do */ #define F_GET 1 @@ -594,6 +595,15 @@ print_entry(struct sockaddr_dl *sdl, printf(" on %s", ifname); if (rtm->rtm_rmx.rmx_expire == 0) printf(" permanent"); + else { + static struct timeval tv; + if (tv.tv_sec == 0) + gettimeofday(&tv, 0); + if ((expire_time = rtm->rtm_rmx.rmx_expire - tv.tv_sec) > 0) + printf(" expires in %d seconds", (int)expire_time); + else + printf(" expired"); + } if (addr->sin_other & SIN_PROXY) printf(" published (proxy only)"); if (rtm->rtm_flags & RTF_ANNOUNCE) -- cgit v1.1 From 37147abaa37031640b114ed8df4ad2581fc80fc2 Mon Sep 17 00:00:00 2001 From: jhb Date: Wed, 24 Feb 2010 21:20:25 +0000 Subject: MFC 204086: - Don't emit a warning in 'show adapter' if the IOC2 or IOC6 pages are not present. mpt(4) controllers that do not support RAID do not have an IOC6 page, for example. - Correct a check for a missing page error in a debug function. --- usr.sbin/mptutil/mpt_show.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/mptutil/mpt_show.c b/usr.sbin/mptutil/mpt_show.c index e0b6b74..5a2a946 100644 --- a/usr.sbin/mptutil/mpt_show.c +++ b/usr.sbin/mptutil/mpt_show.c @@ -78,6 +78,7 @@ show_adapter(int ac, char **av) CONFIG_PAGE_MANUFACTURING_0 *man0; CONFIG_PAGE_IOC_2 *ioc2; CONFIG_PAGE_IOC_6 *ioc6; + U16 IOCStatus; int fd, comma; if (ac != 1) { @@ -108,7 +109,7 @@ show_adapter(int ac, char **av) free(man0); - ioc2 = mpt_read_ioc_page(fd, 2, NULL); + ioc2 = mpt_read_ioc_page(fd, 2, &IOCStatus); if (ioc2 != NULL) { printf(" RAID Levels:"); comma = 0; @@ -151,9 +152,11 @@ show_adapter(int ac, char **av) printf(" none"); printf("\n"); free(ioc2); - } + } else if ((IOCStatus & MPI_IOCSTATUS_MASK) != + MPI_IOCSTATUS_CONFIG_INVALID_PAGE) + warnx("mpt_read_ioc_page(2): %s", mpt_ioc_status(IOCStatus)); - ioc6 = mpt_read_ioc_page(fd, 6, NULL); + ioc6 = mpt_read_ioc_page(fd, 6, &IOCStatus); if (ioc6 != NULL) { display_stripe_map(" RAID0 Stripes", ioc6->SupportedStripeSizeMapIS); @@ -172,7 +175,9 @@ show_adapter(int ac, char **av) printf("-%u", ioc6->MaxDrivesIME); printf("\n"); free(ioc6); - } + } else if ((IOCStatus & MPI_IOCSTATUS_MASK) != + MPI_IOCSTATUS_CONFIG_INVALID_PAGE) + warnx("mpt_read_ioc_page(6): %s", mpt_ioc_status(IOCStatus)); /* TODO: Add an ioctl to fetch IOC_FACTS and print firmware version. */ @@ -541,7 +546,8 @@ show_physdisks(int ac, char **av) for (i = 0; i <= 0xff; i++) { pinfo = mpt_pd_info(fd, i, &IOCStatus); if (pinfo == NULL) { - if (IOCStatus != MPI_IOCSTATUS_CONFIG_INVALID_PAGE) + if ((IOCStatus & MPI_IOCSTATUS_MASK) != + MPI_IOCSTATUS_CONFIG_INVALID_PAGE) warnx("mpt_pd_info(%d): %s", i, mpt_ioc_status(IOCStatus)); continue; -- cgit v1.1 From c2f20743f2974f04797ee4f1a0a42140fa403804 Mon Sep 17 00:00:00 2001 From: jhb Date: Wed, 24 Feb 2010 21:29:18 +0000 Subject: MFC 204090: Fix mptutil's method for locating disk devices attached to a specific mpt(4) controller. Previously, the code assumed that multiple match patterns provided to an XPT_DEV_MATCH request were ANDed together. Instead, they are ORed. Instead, to match peripherals for a specific bus, one query needs to be performed to lookup the path ID of the bus. A second query can then be performed matching peripherals attached to that path. This approach also makes the code a bit cleaner as the returned match results do not mix buses and perphierals. --- usr.sbin/mptutil/mpt_cam.c | 222 ++++++++++++++++++++++----------------------- 1 file changed, 109 insertions(+), 113 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/mptutil/mpt_cam.c b/usr.sbin/mptutil/mpt_cam.c index 0d20c7d..b14451a 100644 --- a/usr.sbin/mptutil/mpt_cam.c +++ b/usr.sbin/mptutil/mpt_cam.c @@ -56,15 +56,75 @@ xpt_open(void) return (xptfd); } +/* Fetch the path id of bus 0 for the opened mpt controller. */ +static int +fetch_path_id(path_id_t *path_id) +{ + struct bus_match_pattern *b; + union ccb ccb; + size_t bufsize; + + if (xpt_open() < 0) + return (ENXIO); + + /* First, find the path id of bus 0 for this mpt controller. */ + bzero(&ccb, sizeof(ccb)); + + ccb.ccb_h.func_code = XPT_DEV_MATCH; + + bufsize = sizeof(struct dev_match_result) * 1; + ccb.cdm.num_matches = 0; + ccb.cdm.match_buf_len = bufsize; + ccb.cdm.matches = calloc(1, bufsize); + + bufsize = sizeof(struct dev_match_pattern) * 1; + ccb.cdm.num_patterns = 1; + ccb.cdm.pattern_buf_len = bufsize; + ccb.cdm.patterns = calloc(1, bufsize); + + /* Match mptX bus 0. */ + ccb.cdm.patterns[0].type = DEV_MATCH_BUS; + b = &ccb.cdm.patterns[0].pattern.bus_pattern; + snprintf(b->dev_name, sizeof(b->dev_name), "mpt"); + b->unit_number = mpt_unit; + b->bus_id = 0; + b->flags = BUS_MATCH_NAME | BUS_MATCH_UNIT | BUS_MATCH_BUS_ID; + + if (ioctl(xptfd, CAMIOCOMMAND, &ccb) < 0) { + free(ccb.cdm.matches); + free(ccb.cdm.patterns); + return (errno); + } + free(ccb.cdm.patterns); + + if (((ccb.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) || + (ccb.cdm.status != CAM_DEV_MATCH_LAST)) { + warnx("fetch_path_id got CAM error %#x, CDM error %d\n", + ccb.ccb_h.status, ccb.cdm.status); + free(ccb.cdm.matches); + return (EIO); + } + + /* We should have exactly 1 match for the bus. */ + if (ccb.cdm.num_matches != 1 || + ccb.cdm.matches[0].type != DEV_MATCH_BUS) { + free(ccb.cdm.matches); + return (ENOENT); + } + *path_id = ccb.cdm.matches[0].result.bus_result.path_id; + free(ccb.cdm.matches); + return (0); +} + int mpt_query_disk(U8 VolumeBus, U8 VolumeID, struct mpt_query_disk *qd) { - struct bus_match_pattern *b; struct periph_match_pattern *p; struct periph_match_result *r; union ccb ccb; + path_id_t path_id; size_t bufsize; - int i; + int error, i; /* mpt(4) only handles devices on bus 0. */ if (VolumeBus != 0) @@ -73,6 +133,11 @@ mpt_query_disk(U8 VolumeBus, U8 VolumeID, struct mpt_query_disk *qd) if (xpt_open() < 0) return (ENXIO); + /* Find the path ID of bus 0. */ + error = fetch_path_id(&path_id); + if (error) + return (error); + bzero(&ccb, sizeof(ccb)); ccb.ccb_h.func_code = XPT_DEV_MATCH; @@ -85,25 +150,18 @@ mpt_query_disk(U8 VolumeBus, U8 VolumeID, struct mpt_query_disk *qd) ccb.cdm.match_buf_len = bufsize; ccb.cdm.matches = calloc(1, bufsize); - bufsize = sizeof(struct dev_match_pattern) * 2; - ccb.cdm.num_patterns = 2; + bufsize = sizeof(struct dev_match_pattern) * 1; + ccb.cdm.num_patterns = 1; ccb.cdm.pattern_buf_len = bufsize; ccb.cdm.patterns = calloc(1, bufsize); - /* Match mptX bus 0. */ - ccb.cdm.patterns[0].type = DEV_MATCH_BUS; - b = &ccb.cdm.patterns[0].pattern.bus_pattern; - snprintf(b->dev_name, sizeof(b->dev_name), "mpt"); - b->unit_number = mpt_unit; - b->bus_id = 0; - b->flags = BUS_MATCH_NAME | BUS_MATCH_UNIT | BUS_MATCH_BUS_ID; - /* Look for a "da" device at the specified target and lun. */ - ccb.cdm.patterns[1].type = DEV_MATCH_PERIPH; - p = &ccb.cdm.patterns[1].pattern.periph_pattern; + ccb.cdm.patterns[0].type = DEV_MATCH_PERIPH; + p = &ccb.cdm.patterns[0].pattern.periph_pattern; + p->path_id = path_id; snprintf(p->periph_name, sizeof(p->periph_name), "da"); p->target_id = VolumeID; - p->flags = PERIPH_MATCH_NAME | PERIPH_MATCH_TARGET; + p->flags = PERIPH_MATCH_PATH | PERIPH_MATCH_NAME | PERIPH_MATCH_TARGET; if (ioctl(xptfd, CAMIOCOMMAND, &ccb) < 0) { i = errno; @@ -122,25 +180,22 @@ mpt_query_disk(U8 VolumeBus, U8 VolumeID, struct mpt_query_disk *qd) } /* - * We should have exactly 2 matches, 1 for the bus and 1 for - * the peripheral. However, if we only have 1 match and it is - * for the bus, don't print an error message and return - * ENOENT. + * We should have exactly 1 match for the peripheral. + * However, if we don't get a match, don't print an error + * message and return ENOENT. */ - if (ccb.cdm.num_matches == 1 && - ccb.cdm.matches[0].type == DEV_MATCH_BUS) { + if (ccb.cdm.num_matches == 0) { free(ccb.cdm.matches); return (ENOENT); } - if (ccb.cdm.num_matches != 2) { - warnx("mpt_query_disk got %d matches, expected 2", + if (ccb.cdm.num_matches != 1) { + warnx("mpt_query_disk got %d matches, expected 1", ccb.cdm.num_matches); free(ccb.cdm.matches); return (EIO); } - if (ccb.cdm.matches[0].type != DEV_MATCH_BUS || - ccb.cdm.matches[1].type != DEV_MATCH_PERIPH) { - warnx("mpt_query_disk got wrong CAM matches"); + if (ccb.cdm.matches[0].type != DEV_MATCH_PERIPH) { + warnx("mpt_query_disk got wrong CAM match"); free(ccb.cdm.matches); return (EIO); } @@ -336,47 +391,44 @@ mpt_fetch_disks(int fd, int *ndisks, struct mpt_standalone_disk **disksp) { CONFIG_PAGE_IOC_2 *ioc2; struct mpt_standalone_disk *disks; - struct bus_match_pattern *b; struct periph_match_pattern *p; struct periph_match_result *r; struct cam_device *dev; union ccb ccb; + path_id_t path_id; size_t bufsize; u_int i; - int count; + int count, error; if (xpt_open() < 0) return (ENXIO); + error = fetch_path_id(&path_id); + if (error) + return (error); + for (count = 100;; count+= 100) { /* Try to fetch 'count' disks in one go. */ bzero(&ccb, sizeof(ccb)); ccb.ccb_h.func_code = XPT_DEV_MATCH; - bufsize = sizeof(struct dev_match_result) * (count + 2); + bufsize = sizeof(struct dev_match_result) * (count + 1); ccb.cdm.num_matches = 0; ccb.cdm.match_buf_len = bufsize; ccb.cdm.matches = calloc(1, bufsize); - bufsize = sizeof(struct dev_match_pattern) * 2; - ccb.cdm.num_patterns = 2; + bufsize = sizeof(struct dev_match_pattern) * 1; + ccb.cdm.num_patterns = 1; ccb.cdm.pattern_buf_len = bufsize; ccb.cdm.patterns = calloc(1, bufsize); - /* Match mptX bus 0. */ - ccb.cdm.patterns[0].type = DEV_MATCH_BUS; - b = &ccb.cdm.patterns[0].pattern.bus_pattern; - snprintf(b->dev_name, sizeof(b->dev_name), "mpt"); - b->unit_number = mpt_unit; - b->bus_id = 0; - b->flags = BUS_MATCH_NAME | BUS_MATCH_UNIT | BUS_MATCH_BUS_ID; - /* Match any "da" peripherals. */ - ccb.cdm.patterns[1].type = DEV_MATCH_PERIPH; - p = &ccb.cdm.patterns[1].pattern.periph_pattern; + ccb.cdm.patterns[0].type = DEV_MATCH_PERIPH; + p = &ccb.cdm.patterns[0].pattern.periph_pattern; + p->path_id = path_id; snprintf(p->periph_name, sizeof(p->periph_name), "da"); - p->flags = PERIPH_MATCH_NAME; + p->flags = PERIPH_MATCH_PATH | PERIPH_MATCH_NAME; if (ioctl(xptfd, CAMIOCOMMAND, &ccb) < 0) { i = errno; @@ -406,21 +458,16 @@ mpt_fetch_disks(int fd, int *ndisks, struct mpt_standalone_disk **disksp) break; } - /* - * We should have N + 1 matches, 1 for the bus and 1 for each - * "da" device. - */ - if (ccb.cdm.num_matches < 1) { - warnx("mpt_fetch_disks didn't get any matches"); - free(ccb.cdm.matches); - return (EIO); - } - if (ccb.cdm.matches[0].type != DEV_MATCH_BUS) { - warnx("mpt_fetch_disks got wrong CAM matches"); + /* Shortcut if we don't have any "da" devices. */ + if (ccb.cdm.num_matches == 0) { free(ccb.cdm.matches); - return (EIO); + *ndisks = 0; + *disksp = NULL; + return (0); } - for (i = 1; i < ccb.cdm.num_matches; i++) { + + /* We should have N matches, 1 for each "da" device. */ + for (i = 0; i < ccb.cdm.num_matches; i++) { if (ccb.cdm.matches[i].type != DEV_MATCH_PERIPH) { warnx("mpt_fetch_disks got wrong CAM matches"); free(ccb.cdm.matches); @@ -428,14 +475,6 @@ mpt_fetch_disks(int fd, int *ndisks, struct mpt_standalone_disk **disksp) } } - /* Shortcut if we don't have any "da" devices. */ - if (ccb.cdm.num_matches == 1) { - free(ccb.cdm.matches); - *ndisks = 0; - *disksp = NULL; - return (0); - } - /* * Some of the "da" peripherals may be for RAID volumes, so * fetch the IOC 2 page (list of RAID volumes) so we can @@ -444,7 +483,7 @@ mpt_fetch_disks(int fd, int *ndisks, struct mpt_standalone_disk **disksp) ioc2 = mpt_read_ioc_page(fd, 2, NULL); disks = calloc(ccb.cdm.num_matches, sizeof(*disks)); count = 0; - for (i = 1; i < ccb.cdm.num_matches; i++) { + for (i = 0; i < ccb.cdm.num_matches; i++) { r = &ccb.cdm.matches[i].result.periph_result; if (periph_is_volume(ioc2, r)) continue; @@ -480,10 +519,9 @@ mpt_fetch_disks(int fd, int *ndisks, struct mpt_standalone_disk **disksp) int mpt_rescan_bus(int bus, int id) { - struct bus_match_pattern *b; union ccb ccb; path_id_t path_id; - size_t bufsize; + int error; /* mpt(4) only handles devices on bus 0. */ if (bus != -1 && bus != 0) @@ -492,54 +530,12 @@ mpt_rescan_bus(int bus, int id) if (xpt_open() < 0) return (ENXIO); - /* First, find the path id of bus 0 for this mpt controller. */ - bzero(&ccb, sizeof(ccb)); - - ccb.ccb_h.func_code = XPT_DEV_MATCH; - - bufsize = sizeof(struct dev_match_result) * 1; - ccb.cdm.num_matches = 0; - ccb.cdm.match_buf_len = bufsize; - ccb.cdm.matches = calloc(1, bufsize); - - bufsize = sizeof(struct dev_match_pattern) * 1; - ccb.cdm.num_patterns = 1; - ccb.cdm.pattern_buf_len = bufsize; - ccb.cdm.patterns = calloc(1, bufsize); - - /* Match mptX bus 0. */ - ccb.cdm.patterns[0].type = DEV_MATCH_BUS; - b = &ccb.cdm.patterns[0].pattern.bus_pattern; - snprintf(b->dev_name, sizeof(b->dev_name), "mpt"); - b->unit_number = mpt_unit; - b->bus_id = 0; - b->flags = BUS_MATCH_NAME | BUS_MATCH_UNIT | BUS_MATCH_BUS_ID; - - if (ioctl(xptfd, CAMIOCOMMAND, &ccb) < 0) { - free(ccb.cdm.matches); - free(ccb.cdm.patterns); - return (errno); - } - free(ccb.cdm.patterns); - - if (((ccb.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) || - (ccb.cdm.status != CAM_DEV_MATCH_LAST)) { - warnx("mpt_rescan_bus got CAM error %#x, CDM error %d\n", - ccb.ccb_h.status, ccb.cdm.status); - free(ccb.cdm.matches); - return (EIO); - } - - /* We should have exactly 1 match for the bus. */ - if (ccb.cdm.num_matches != 1 || - ccb.cdm.matches[0].type != DEV_MATCH_BUS) { - free(ccb.cdm.matches); - return (ENOENT); - } - path_id = ccb.cdm.matches[0].result.bus_result.path_id; - free(ccb.cdm.matches); + error = fetch_path_id(&path_id); + if (error) + return (error); - /* Now perform the actual rescan. */ + /* Perform the actual rescan. */ + bzero(&ccb, sizeof(ccb)); ccb.ccb_h.path_id = path_id; if (id == -1) { ccb.ccb_h.func_code = XPT_SCAN_BUS; -- cgit v1.1 From 5f3a0f7a25dae325c3692c4a5abd22d5dbda04c7 Mon Sep 17 00:00:00 2001 From: fabient Date: Fri, 5 Mar 2010 22:40:31 +0000 Subject: MFC 203790: - Reorganize code in 'plugin' to share log processing. - Kcachegrind (calltree) support with assembly/source code mapping and call count estimator (-F). - Top mode for calltree and callgraph plugin (-T). --- usr.sbin/pmcstat/Makefile | 5 +- usr.sbin/pmcstat/pmcpl_annotate.c | 111 +++ usr.sbin/pmcstat/pmcpl_annotate.h | 41 + usr.sbin/pmcstat/pmcpl_callgraph.c | 682 ++++++++++++++ usr.sbin/pmcstat/pmcpl_callgraph.h | 67 ++ usr.sbin/pmcstat/pmcpl_calltree.c | 1000 +++++++++++++++++++++ usr.sbin/pmcstat/pmcpl_calltree.h | 42 + usr.sbin/pmcstat/pmcpl_gprof.c | 533 +++++++++++ usr.sbin/pmcstat/pmcpl_gprof.h | 47 + usr.sbin/pmcstat/pmcstat.8 | 28 +- usr.sbin/pmcstat/pmcstat.c | 286 ++++-- usr.sbin/pmcstat/pmcstat.h | 75 +- usr.sbin/pmcstat/pmcstat_log.c | 1732 +++++++++++++----------------------- usr.sbin/pmcstat/pmcstat_log.h | 196 ++++ usr.sbin/pmcstat/pmcstat_top.h | 75 ++ 15 files changed, 3685 insertions(+), 1235 deletions(-) create mode 100644 usr.sbin/pmcstat/pmcpl_annotate.c create mode 100644 usr.sbin/pmcstat/pmcpl_annotate.h create mode 100644 usr.sbin/pmcstat/pmcpl_callgraph.c create mode 100644 usr.sbin/pmcstat/pmcpl_callgraph.h create mode 100644 usr.sbin/pmcstat/pmcpl_calltree.c create mode 100644 usr.sbin/pmcstat/pmcpl_calltree.h create mode 100644 usr.sbin/pmcstat/pmcpl_gprof.c create mode 100644 usr.sbin/pmcstat/pmcpl_gprof.h create mode 100644 usr.sbin/pmcstat/pmcstat_log.h create mode 100644 usr.sbin/pmcstat/pmcstat_top.h (limited to 'usr.sbin') diff --git a/usr.sbin/pmcstat/Makefile b/usr.sbin/pmcstat/Makefile index 6b11d8d..4412e24 100644 --- a/usr.sbin/pmcstat/Makefile +++ b/usr.sbin/pmcstat/Makefile @@ -6,10 +6,11 @@ PROG= pmcstat MAN= pmcstat.8 DPADD= ${LIBELF} ${LIBKVM} ${LIBPMC} ${LIBM} -LDADD= -lelf -lkvm -lpmc -lm +LDADD= -lelf -lkvm -lpmc -lm -lncurses WARNS?= 6 -SRCS= pmcstat.c pmcstat.h pmcstat_log.c +SRCS= pmcstat.c pmcstat.h pmcstat_log.c \ +pmcpl_callgraph.c pmcpl_gprof.c pmcpl_annotate.c pmcpl_calltree.c .include diff --git a/usr.sbin/pmcstat/pmcpl_annotate.c b/usr.sbin/pmcstat/pmcpl_annotate.c new file mode 100644 index 0000000..802983c --- /dev/null +++ b/usr.sbin/pmcstat/pmcpl_annotate.c @@ -0,0 +1,111 @@ +/*- + * Copyright (c) 2005-2007, Joseph Koshy + * Copyright (c) 2007 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by A. Joseph Koshy under + * sponsorship from the FreeBSD Foundation and Google, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Transform a hwpmc(4) log into human readable form, and into + * gprof(1) compatible profiles. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pmcstat.h" +#include "pmcstat_log.h" +#include "pmcpl_annotate.h" + +/* + * Record a callchain. + */ + +void +pmcpl_annotate_process(struct pmcstat_process *pp, struct pmcstat_pmcrecord *pmcr, + uint32_t nsamples, uintfptr_t *cc, int usermode, uint32_t cpu) +{ + struct pmcstat_pcmap *map; + struct pmcstat_symbol *sym; + uintfptr_t newpc; + struct pmcstat_image *image; + + (void) pmcr; (void) nsamples; (void) usermode; (void) cpu; + + map = pmcstat_process_find_map(usermode ? pp : pmcstat_kernproc, cc[0]); + if (map == NULL) { + /* Unknown offset. */ + pmcstat_stats.ps_samples_unknown_offset++; + return; + } + + assert(cc[0] >= map->ppm_lowpc && cc[0] < map->ppm_highpc); + + image = map->ppm_image; + newpc = cc[0] - (map->ppm_lowpc + + (image->pi_vaddr - image->pi_start)); + sym = pmcstat_symbol_search(image, newpc); + if (sym == NULL) + return; + + fprintf(args.pa_graphfile, "%p %s 0x%jx 0x%jx\n", + (void *)cc[0], + pmcstat_string_unintern(sym->ps_name), + (uintmax_t)(sym->ps_start + + image->pi_vaddr), (uintmax_t)(sym->ps_end + + image->pi_vaddr)); +} diff --git a/usr.sbin/pmcstat/pmcpl_annotate.h b/usr.sbin/pmcstat/pmcpl_annotate.h new file mode 100644 index 0000000..482bcd4 --- /dev/null +++ b/usr.sbin/pmcstat/pmcpl_annotate.h @@ -0,0 +1,41 @@ +/*- + * Copyright (c) 2005-2007, Joseph Koshy + * Copyright (c) 2007 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by A. Joseph Koshy under + * sponsorship from the FreeBSD Foundation and Google, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _PMCSTAT_PL_ANNOTATE_H_ +#define _PMCSTAT_PL_ANNOTATE_H_ + +/* Function prototypes */ +void pmcpl_annotate_process( + struct pmcstat_process *pp, struct pmcstat_pmcrecord *pmcr, + uint32_t nsamples, uintfptr_t *cc, int usermode, uint32_t cpu); + +#endif /* _PMCSTAT_PL_ANNOTATE_H_ */ diff --git a/usr.sbin/pmcstat/pmcpl_callgraph.c b/usr.sbin/pmcstat/pmcpl_callgraph.c new file mode 100644 index 0000000..d6f1a9d --- /dev/null +++ b/usr.sbin/pmcstat/pmcpl_callgraph.c @@ -0,0 +1,682 @@ +/*- + * Copyright (c) 2005-2007, Joseph Koshy + * Copyright (c) 2007 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by A. Joseph Koshy under + * sponsorship from the FreeBSD Foundation and Google, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Transform a hwpmc(4) log into human readable form, and into + * gprof(1) compatible profiles. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pmcstat.h" +#include "pmcstat_log.h" +#include "pmcstat_top.h" +#include "pmcpl_callgraph.h" + +/* Get the sample value in percent related to nsamples. */ +#define PMCPL_CG_COUNTP(a) \ + ((a)->pcg_count * 100.0 / nsamples) + +/* + * The toplevel CG nodes (i.e., with rank == 0) are placed in a hash table. + */ + +struct pmcstat_cgnode_hash_list pmcstat_cgnode_hash[PMCSTAT_NHASH]; +int pmcstat_cgnode_hash_count; + +static pmcstat_interned_string pmcstat_previous_filename_printed; + +static struct pmcstat_cgnode * +pmcstat_cgnode_allocate(struct pmcstat_image *image, uintfptr_t pc) +{ + struct pmcstat_cgnode *cg; + + if ((cg = malloc(sizeof(*cg))) == NULL) + err(EX_OSERR, "ERROR: Cannot allocate callgraph node"); + + cg->pcg_image = image; + cg->pcg_func = pc; + + cg->pcg_count = 0; + cg->pcg_nchildren = 0; + LIST_INIT(&cg->pcg_children); + + return (cg); +} + +/* + * Free a node and its children. + */ +static void +pmcstat_cgnode_free(struct pmcstat_cgnode *cg) +{ + struct pmcstat_cgnode *cgc, *cgtmp; + + LIST_FOREACH_SAFE(cgc, &cg->pcg_children, pcg_sibling, cgtmp) + pmcstat_cgnode_free(cgc); + free(cg); +} + +/* + * Look for a callgraph node associated with pmc `pmcid' in the global + * hash table that corresponds to the given `pc' value in the process + * `pp'. + */ +static struct pmcstat_cgnode * +pmcstat_cgnode_hash_lookup_pc(struct pmcstat_process *pp, pmc_id_t pmcid, + uintfptr_t pc, int usermode) +{ + struct pmcstat_pcmap *ppm; + struct pmcstat_symbol *sym; + struct pmcstat_image *image; + struct pmcstat_cgnode *cg; + struct pmcstat_cgnode_hash *h; + uintfptr_t loadaddress; + unsigned int i, hash; + + ppm = pmcstat_process_find_map(usermode ? pp : pmcstat_kernproc, pc); + if (ppm == NULL) + return (NULL); + + image = ppm->ppm_image; + + loadaddress = ppm->ppm_lowpc + image->pi_vaddr - image->pi_start; + pc -= loadaddress; /* Convert to an offset in the image. */ + + /* + * Try determine the function at this offset. If we can't + * find a function round leave the `pc' value alone. + */ + if ((sym = pmcstat_symbol_search(image, pc)) != NULL) + pc = sym->ps_start; + + for (hash = i = 0; i < sizeof(uintfptr_t); i++) + hash += (pc >> i) & 0xFF; + + hash &= PMCSTAT_HASH_MASK; + + cg = NULL; + LIST_FOREACH(h, &pmcstat_cgnode_hash[hash], pch_next) + { + if (h->pch_pmcid != pmcid) + continue; + + cg = h->pch_cgnode; + + assert(cg != NULL); + + if (cg->pcg_image == image && cg->pcg_func == pc) + return (cg); + } + + /* + * We haven't seen this (pmcid, pc) tuple yet, so allocate a + * new callgraph node and a new hash table entry for it. + */ + cg = pmcstat_cgnode_allocate(image, pc); + if ((h = malloc(sizeof(*h))) == NULL) + err(EX_OSERR, "ERROR: Could not allocate callgraph node"); + + h->pch_pmcid = pmcid; + h->pch_cgnode = cg; + LIST_INSERT_HEAD(&pmcstat_cgnode_hash[hash], h, pch_next); + + pmcstat_cgnode_hash_count++; + + return (cg); +} + +/* + * Compare two callgraph nodes for sorting. + */ +static int +pmcstat_cgnode_compare(const void *a, const void *b) +{ + const struct pmcstat_cgnode *const *pcg1, *const *pcg2, *cg1, *cg2; + + pcg1 = (const struct pmcstat_cgnode *const *) a; + cg1 = *pcg1; + pcg2 = (const struct pmcstat_cgnode *const *) b; + cg2 = *pcg2; + + /* Sort in reverse order */ + if (cg1->pcg_count < cg2->pcg_count) + return (1); + if (cg1->pcg_count > cg2->pcg_count) + return (-1); + return (0); +} + +/* + * Find (allocating if a needed) a callgraph node in the given + * parent with the same (image, pcoffset) pair. + */ + +static struct pmcstat_cgnode * +pmcstat_cgnode_find(struct pmcstat_cgnode *parent, struct pmcstat_image *image, + uintfptr_t pcoffset) +{ + struct pmcstat_cgnode *child; + + LIST_FOREACH(child, &parent->pcg_children, pcg_sibling) { + if (child->pcg_image == image && + child->pcg_func == pcoffset) + return (child); + } + + /* + * Allocate a new structure. + */ + + child = pmcstat_cgnode_allocate(image, pcoffset); + + /* + * Link it into the parent. + */ + LIST_INSERT_HEAD(&parent->pcg_children, child, pcg_sibling); + parent->pcg_nchildren++; + + return (child); +} + +/* + * Print one callgraph node. The output format is: + * + * indentation %(parent's samples) #nsamples function@object + */ +static void +pmcstat_cgnode_print(struct pmcstat_cgnode *cg, int depth, uint32_t total) +{ + uint32_t n; + const char *space; + struct pmcstat_symbol *sym; + struct pmcstat_cgnode **sortbuffer, **cgn, *pcg; + + space = " "; + + if (depth > 0) + (void) fprintf(args.pa_graphfile, "%*s", depth, space); + + if (cg->pcg_count == total) + (void) fprintf(args.pa_graphfile, "100.0%% "); + else + (void) fprintf(args.pa_graphfile, "%05.2f%% ", + 100.0 * cg->pcg_count / total); + + n = fprintf(args.pa_graphfile, " [%u] ", cg->pcg_count); + + /* #samples is a 12 character wide field. */ + if (n < 12) + (void) fprintf(args.pa_graphfile, "%*s", 12 - n, space); + + if (depth > 0) + (void) fprintf(args.pa_graphfile, "%*s", depth, space); + + sym = pmcstat_symbol_search(cg->pcg_image, cg->pcg_func); + if (sym) + (void) fprintf(args.pa_graphfile, "%s", + pmcstat_string_unintern(sym->ps_name)); + else + (void) fprintf(args.pa_graphfile, "%p", + (void *) (cg->pcg_image->pi_vaddr + cg->pcg_func)); + + if (pmcstat_previous_filename_printed != + cg->pcg_image->pi_fullpath) { + pmcstat_previous_filename_printed = cg->pcg_image->pi_fullpath; + (void) fprintf(args.pa_graphfile, " @ %s\n", + pmcstat_string_unintern( + pmcstat_previous_filename_printed)); + } else + (void) fprintf(args.pa_graphfile, "\n"); + + if (cg->pcg_nchildren == 0) + return; + + if ((sortbuffer = (struct pmcstat_cgnode **) + malloc(sizeof(struct pmcstat_cgnode *) * + cg->pcg_nchildren)) == NULL) + err(EX_OSERR, "ERROR: Cannot print callgraph"); + cgn = sortbuffer; + + LIST_FOREACH(pcg, &cg->pcg_children, pcg_sibling) + *cgn++ = pcg; + + assert(cgn - sortbuffer == (int) cg->pcg_nchildren); + + qsort(sortbuffer, cg->pcg_nchildren, sizeof(struct pmcstat_cgnode *), + pmcstat_cgnode_compare); + + for (cgn = sortbuffer, n = 0; n < cg->pcg_nchildren; n++, cgn++) + pmcstat_cgnode_print(*cgn, depth+1, cg->pcg_count); + + free(sortbuffer); +} + +/* + * Record a callchain. + */ + +void +pmcpl_cg_process(struct pmcstat_process *pp, struct pmcstat_pmcrecord *pmcr, + uint32_t nsamples, uintfptr_t *cc, int usermode, uint32_t cpu) +{ + uintfptr_t pc, loadaddress; + uint32_t n; + struct pmcstat_image *image; + struct pmcstat_pcmap *ppm; + struct pmcstat_symbol *sym; + struct pmcstat_cgnode *parent, *child; + struct pmcstat_process *km; + pmc_id_t pmcid; + + (void) cpu; + + /* + * Find the callgraph node recorded in the global hash table + * for this (pmcid, pc). + */ + + pc = cc[0]; + pmcid = pmcr->pr_pmcid; + parent = pmcstat_cgnode_hash_lookup_pc(pp, pmcid, pc, usermode); + if (parent == NULL) { + pmcstat_stats.ps_callchain_dubious_frames++; + return; + } + + parent->pcg_count++; + + /* + * For each return address in the call chain record, subject + * to the maximum depth desired. + * - Find the image associated with the sample. Stop if there + * there is no valid image at that address. + * - Find the function that overlaps the return address. + * - If found: use the start address of the function. + * If not found (say an object's symbol table is not present or + * is incomplete), round down to th gprof bucket granularity. + * - Convert return virtual address to an offset in the image. + * - Look for a child with the same {offset,image} tuple, + * inserting one if needed. + * - Increment the count of occurrences of the child. + */ + km = pmcstat_kernproc; + + for (n = 1; n < (uint32_t) args.pa_graphdepth && n < nsamples; n++, + parent = child) { + pc = cc[n]; + + ppm = pmcstat_process_find_map(usermode ? pp : km, pc); + if (ppm == NULL) { + /* Detect full frame capture (kernel + user). */ + if (!usermode) { + ppm = pmcstat_process_find_map(pp, pc); + if (ppm != NULL) + km = pp; + } + } + if (ppm == NULL) + return; + + image = ppm->ppm_image; + loadaddress = ppm->ppm_lowpc + image->pi_vaddr - + image->pi_start; + pc -= loadaddress; + + if ((sym = pmcstat_symbol_search(image, pc)) != NULL) + pc = sym->ps_start; + + child = pmcstat_cgnode_find(parent, image, pc); + child->pcg_count++; + } +} + +/* + * Printing a callgraph for a PMC. + */ +static void +pmcstat_callgraph_print_for_pmcid(struct pmcstat_pmcrecord *pmcr) +{ + int n, nentries; + uint32_t nsamples; + pmc_id_t pmcid; + struct pmcstat_cgnode **sortbuffer, **cgn; + struct pmcstat_cgnode_hash *pch; + + /* + * We pull out all callgraph nodes in the top-level hash table + * with a matching PMC id. We then sort these based on the + * frequency of occurrence. Each callgraph node is then + * printed. + */ + + nsamples = 0; + pmcid = pmcr->pr_pmcid; + if ((sortbuffer = (struct pmcstat_cgnode **) + malloc(sizeof(struct pmcstat_cgnode *) * + pmcstat_cgnode_hash_count)) == NULL) + err(EX_OSERR, "ERROR: Cannot sort callgraph"); + cgn = sortbuffer; + + for (n = 0; n < PMCSTAT_NHASH; n++) + LIST_FOREACH(pch, &pmcstat_cgnode_hash[n], pch_next) + if (pch->pch_pmcid == pmcid) { + nsamples += pch->pch_cgnode->pcg_count; + *cgn++ = pch->pch_cgnode; + } + + nentries = cgn - sortbuffer; + assert(nentries <= pmcstat_cgnode_hash_count); + + if (nentries == 0) { + free(sortbuffer); + return; + } + + qsort(sortbuffer, nentries, sizeof(struct pmcstat_cgnode *), + pmcstat_cgnode_compare); + + (void) fprintf(args.pa_graphfile, + "@ %s [%u samples]\n\n", + pmcstat_string_unintern(pmcr->pr_pmcname), + nsamples); + + for (cgn = sortbuffer, n = 0; n < nentries; n++, cgn++) { + pmcstat_previous_filename_printed = NULL; + pmcstat_cgnode_print(*cgn, 0, nsamples); + (void) fprintf(args.pa_graphfile, "\n"); + } + + free(sortbuffer); +} + +/* + * Print out callgraphs. + */ + +static void +pmcstat_callgraph_print(void) +{ + struct pmcstat_pmcrecord *pmcr; + + LIST_FOREACH(pmcr, &pmcstat_pmcs, pr_next) + pmcstat_callgraph_print_for_pmcid(pmcr); +} + +static void +pmcstat_cgnode_topprint(struct pmcstat_cgnode *cg, + int depth, uint32_t nsamples) +{ + int v_attrs, vs_len, ns_len, width, len, n, nchildren; + float v; + char ns[30], vs[10]; + struct pmcstat_symbol *sym; + struct pmcstat_cgnode **sortbuffer, **cgn, *pcg; + + (void) depth; + + /* Format value. */ + v = PMCPL_CG_COUNTP(cg); + snprintf(vs, sizeof(vs), "%.1f", v); + v_attrs = PMCSTAT_ATTRPERCENT(v); + + /* Format name. */ + sym = pmcstat_symbol_search(cg->pcg_image, cg->pcg_func); + if (sym != NULL) { + snprintf(ns, sizeof(ns), "%s", + pmcstat_string_unintern(sym->ps_name)); + } else + snprintf(ns, sizeof(ns), "%p", + (void *)cg->pcg_func); + + PMCSTAT_ATTRON(v_attrs); + PMCSTAT_PRINTW("%5.5s", vs); + PMCSTAT_ATTROFF(v_attrs); + PMCSTAT_PRINTW(" %-10.10s %-20.20s", + pmcstat_string_unintern(cg->pcg_image->pi_name), + ns); + + nchildren = cg->pcg_nchildren; + if (nchildren == 0) { + PMCSTAT_PRINTW("\n"); + return; + } + + width = pmcstat_displaywidth - 40; + + if ((sortbuffer = (struct pmcstat_cgnode **) + malloc(sizeof(struct pmcstat_cgnode *) * + nchildren)) == NULL) + err(EX_OSERR, "ERROR: Cannot print callgraph"); + cgn = sortbuffer; + + LIST_FOREACH(pcg, &cg->pcg_children, pcg_sibling) + *cgn++ = pcg; + + assert(cgn - sortbuffer == (int)nchildren); + + qsort(sortbuffer, nchildren, sizeof(struct pmcstat_cgnode *), + pmcstat_cgnode_compare); + + /* Count how many callers. */ + for (cgn = sortbuffer, n = 0; n < nchildren; n++, cgn++) { + pcg = *cgn; + + v = PMCPL_CG_COUNTP(pcg); + if (v < pmcstat_threshold) + break; + } + nchildren = n; + + for (cgn = sortbuffer, n = 0; n < nchildren; n++, cgn++) { + pcg = *cgn; + + /* Format value. */ + if (nchildren > 1) { + v = PMCPL_CG_COUNTP(pcg); + vs_len = snprintf(vs, sizeof(vs), ":%.1f", v); + v_attrs = PMCSTAT_ATTRPERCENT(v); + } else + vs_len = 0; + + /* Format name. */ + sym = pmcstat_symbol_search(pcg->pcg_image, pcg->pcg_func); + if (sym != NULL) { + ns_len = snprintf(ns, sizeof(ns), "%s", + pmcstat_string_unintern(sym->ps_name)); + } else + ns_len = snprintf(ns, sizeof(ns), "%p", + (void *)pcg->pcg_func); + + len = ns_len + vs_len + 1; + if (width - len < 0) { + PMCSTAT_PRINTW("..."); + break; + } + width -= len; + + PMCSTAT_PRINTW(" %s", ns); + if (nchildren > 1) { + PMCSTAT_ATTRON(v_attrs); + PMCSTAT_PRINTW("%s", vs); + PMCSTAT_ATTROFF(v_attrs); + } + } + PMCSTAT_PRINTW("\n"); + free(sortbuffer); +} + +/* + * Top mode display. + */ + +void +pmcpl_cg_topdisplay(void) +{ + int n, nentries; + uint32_t nsamples; + struct pmcstat_cgnode **sortbuffer, **cgn; + struct pmcstat_cgnode_hash *pch; + struct pmcstat_pmcrecord *pmcr; + + pmcr = pmcstat_pmcindex_to_pmcr(pmcstat_pmcinfilter); + + /* + * We pull out all callgraph nodes in the top-level hash table + * with a matching PMC index. We then sort these based on the + * frequency of occurrence. Each callgraph node is then + * printed. + */ + + nsamples = 0; + + if ((sortbuffer = (struct pmcstat_cgnode **) + malloc(sizeof(struct pmcstat_cgnode *) * + pmcstat_cgnode_hash_count)) == NULL) + err(EX_OSERR, "ERROR: Cannot sort callgraph"); + cgn = sortbuffer; + + for (n = 0; n < PMCSTAT_NHASH; n++) + LIST_FOREACH(pch, &pmcstat_cgnode_hash[n], pch_next) + if (pmcr == NULL || pch->pch_pmcid == pmcr->pr_pmcid) { + nsamples += pch->pch_cgnode->pcg_count; + *cgn++ = pch->pch_cgnode; + } + + nentries = cgn - sortbuffer; + assert(nentries <= pmcstat_cgnode_hash_count); + + if (nentries == 0) { + free(sortbuffer); + return; + } + + qsort(sortbuffer, nentries, sizeof(struct pmcstat_cgnode *), + pmcstat_cgnode_compare); + + PMCSTAT_PRINTW("%5.5s %-10.10s %-20.20s %s\n", + "%SAMP", "IMAGE", "FUNCTION", "CALLERS"); + + nentries = min(pmcstat_displayheight - 2, nentries); + + for (cgn = sortbuffer, n = 0; n < nentries; n++, cgn++) { + if (PMCPL_CG_COUNTP(*cgn) < pmcstat_threshold) + break; + pmcstat_cgnode_topprint(*cgn, 0, nsamples); + } + + free(sortbuffer); +} + +/* + * Handle top mode keypress. + */ + +int +pmcpl_cg_topkeypress(int c, WINDOW *w) +{ + + (void) c; (void) w; + + return 0; +} + +int +pmcpl_cg_init(void) +{ + int i; + + pmcstat_cgnode_hash_count = 0; + pmcstat_previous_filename_printed = NULL; + + for (i = 0; i < PMCSTAT_NHASH; i++) { + LIST_INIT(&pmcstat_cgnode_hash[i]); + } + + return (0); +} + +void +pmcpl_cg_shutdown(FILE *mf) +{ + int i; + struct pmcstat_cgnode_hash *pch, *pchtmp; + + (void) mf; + + if (args.pa_flags & FLAG_DO_CALLGRAPHS) + pmcstat_callgraph_print(); + + /* + * Free memory. + */ + for (i = 0; i < PMCSTAT_NHASH; i++) { + LIST_FOREACH_SAFE(pch, &pmcstat_cgnode_hash[i], pch_next, + pchtmp) { + pmcstat_cgnode_free(pch->pch_cgnode); + LIST_REMOVE(pch, pch_next); + free(pch); + } + } +} + diff --git a/usr.sbin/pmcstat/pmcpl_callgraph.h b/usr.sbin/pmcstat/pmcpl_callgraph.h new file mode 100644 index 0000000..aaf0e1b --- /dev/null +++ b/usr.sbin/pmcstat/pmcpl_callgraph.h @@ -0,0 +1,67 @@ +/*- + * Copyright (c) 2005-2007, Joseph Koshy + * Copyright (c) 2007 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by A. Joseph Koshy under + * sponsorship from the FreeBSD Foundation and Google, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _PMCSTAT_PL_CALLGRAPH_H_ +#define _PMCSTAT_PL_CALLGRAPH_H_ + +/* + * Each call graph node is tracked by a pmcstat_cgnode struct. + */ + +struct pmcstat_cgnode { + struct pmcstat_image *pcg_image; + uintfptr_t pcg_func; + uint32_t pcg_count; + uint32_t pcg_nchildren; + LIST_ENTRY(pmcstat_cgnode) pcg_sibling; + LIST_HEAD(,pmcstat_cgnode) pcg_children; +}; + +struct pmcstat_cgnode_hash { + struct pmcstat_cgnode *pch_cgnode; + pmc_id_t pch_pmcid; + LIST_ENTRY(pmcstat_cgnode_hash) pch_next; +}; +extern LIST_HEAD(pmcstat_cgnode_hash_list, pmcstat_cgnode_hash) pmcstat_cgnode_hash[PMCSTAT_NHASH]; +extern int pmcstat_cgnode_hash_count; + +/* Function prototypes */ +int pmcpl_cg_init(void); +void pmcpl_cg_shutdown(FILE *mf); +void pmcpl_cg_process( + struct pmcstat_process *pp, struct pmcstat_pmcrecord *pmcr, + uint32_t nsamples, uintfptr_t *cc, int usermode, uint32_t cpu); +int pmcpl_cg_topkeypress(int c, WINDOW *w); +void pmcpl_cg_topdisplay(void); +void pmcpl_cg_configure(char *opt); + +#endif /* _PMCSTAT_PL_CALLGRAPH_H_ */ diff --git a/usr.sbin/pmcstat/pmcpl_calltree.c b/usr.sbin/pmcstat/pmcpl_calltree.c new file mode 100644 index 0000000..498092d --- /dev/null +++ b/usr.sbin/pmcstat/pmcpl_calltree.c @@ -0,0 +1,1000 @@ +/*- + * Copyright (c) 2009, Fabien Thomas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Process hwpmc(4) samples as calltree. + * + * Output file format compatible with Kcachegrind (kdesdk). + * Handle top mode with a sorted tree display. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pmcstat.h" +#include "pmcstat_log.h" +#include "pmcstat_top.h" +#include "pmcpl_calltree.h" + +#define PMCPL_CT_GROWSIZE 4 + +static pmcstat_interned_string pmcpl_ct_prevfn; + +static int pmcstat_skiplink = 0; + +struct pmcpl_ct_node; + +/* Get the sample value for PMC a. */ +#define PMCPL_CT_SAMPLE(a, b) \ + ((a) < (b)->npmcs ? (b)->sb[a] : 0) + +/* Get the sample value in percent related to rsamples. */ +#define PMCPL_CT_SAMPLEP(a, b) \ + (PMCPL_CT_SAMPLE(a, b) * 100.0 / rsamples->sb[a]) + +struct pmcpl_ct_sample { + int npmcs; /* Max pmc index available. */ + unsigned *sb; /* Sample buffer for 0..npmcs. */ +}; + +struct pmcpl_ct_arc { + struct pmcpl_ct_sample pcta_samples; + struct pmcpl_ct_sample pcta_callid; + unsigned pcta_call; + struct pmcpl_ct_node *pcta_child; +}; + +struct pmcpl_ct_instr { + uintfptr_t pctf_func; + struct pmcpl_ct_sample pctf_samples; +}; + +/* + * Each calltree node is tracked by a pmcpl_ct_node struct. + */ +struct pmcpl_ct_node { +#define PMCPL_PCT_TAG 0x00000001 /* Loop detection. */ + uint32_t pct_flags; + struct pmcstat_image *pct_image; + uintfptr_t pct_func; + struct pmcpl_ct_sample pct_samples; + + int pct_narc; + int pct_arc_c; + struct pmcpl_ct_arc *pct_arc; + + /* TODO: optimize for large number of items. */ + int pct_ninstr; + int pct_instr_c; + struct pmcpl_ct_instr *pct_instr; +}; + +struct pmcpl_ct_node_hash { + struct pmcpl_ct_node *pch_ctnode; + LIST_ENTRY(pmcpl_ct_node_hash) pch_next; +}; + +struct pmcpl_ct_sample pmcpl_ct_callid; + +#define PMCPL_CT_MAXCOL PMC_CALLCHAIN_DEPTH_MAX +#define PMCPL_CT_MAXLINE 256 +struct pmcpl_ct_node *pmcpl_ct_topscreen[PMCPL_CT_MAXCOL][PMCPL_CT_MAXLINE]; + +/* + * All nodes indexed by function/image name are placed in a hash table. + */ +static LIST_HEAD(,pmcpl_ct_node_hash) pmcpl_ct_node_hash[PMCSTAT_NHASH]; + +/* + * Root node for the graph. + */ +static struct pmcpl_ct_node *pmcpl_ct_root; + +/* + * Prototypes + */ + +/* + * Initialize a samples. + */ + +static void +pmcpl_ct_samples_init(struct pmcpl_ct_sample *samples) +{ + + samples->npmcs = 0; + samples->sb = NULL; +} + +/* + * Free a samples. + */ + +static void +pmcpl_ct_samples_free(struct pmcpl_ct_sample *samples) +{ + + samples->npmcs = 0; + free(samples->sb); + samples->sb = NULL; +} + +/* + * Grow a sample block to store pmcstat_npmcs PMCs. + */ + +static void +pmcpl_ct_samples_grow(struct pmcpl_ct_sample *samples) +{ + int npmcs; + + /* Enough storage. */ + if (pmcstat_npmcs <= samples->npmcs) + return; + + npmcs = samples->npmcs + + max(pmcstat_npmcs - samples->npmcs, PMCPL_CT_GROWSIZE); + samples->sb = realloc(samples->sb, npmcs * sizeof(unsigned)); + if (samples->sb == NULL) + errx(EX_SOFTWARE, "ERROR: out of memory"); + bzero((char *)samples->sb + samples->npmcs * sizeof(unsigned), + (npmcs - samples->npmcs) * sizeof(unsigned)); + samples->npmcs = npmcs; +} + +/* + * Compute the sum of all root arcs. + */ + +static void +pmcpl_ct_samples_root(struct pmcpl_ct_sample *samples) +{ + int i, pmcin; + + pmcpl_ct_samples_init(samples); + pmcpl_ct_samples_grow(samples); + + for (i = 0; i < pmcpl_ct_root->pct_narc; i++) + for (pmcin = 0; pmcin < pmcstat_npmcs; pmcin++) + samples->sb[pmcin] += PMCPL_CT_SAMPLE(pmcin, + &pmcpl_ct_root->pct_arc[i].pcta_samples); +} + +/* + * Grow the arc table. + */ + +static void +pmcpl_ct_arc_grow(int cursize, int *maxsize, struct pmcpl_ct_arc **items) +{ + int nmaxsize; + + if (cursize < *maxsize) + return; + + nmaxsize = *maxsize + max(cursize + 1 - *maxsize, PMCPL_CT_GROWSIZE); + *items = realloc(*items, nmaxsize * sizeof(struct pmcpl_ct_arc)); + if (*items == NULL) + errx(EX_SOFTWARE, "ERROR: out of memory"); + bzero((char *)*items + *maxsize * sizeof(struct pmcpl_ct_arc), + (nmaxsize - *maxsize) * sizeof(struct pmcpl_ct_arc)); + *maxsize = nmaxsize; +} + +/* + * Compare two arc by samples value. + */ +static int +pmcpl_ct_arc_compare(void *thunk, const void *a, const void *b) +{ + const struct pmcpl_ct_arc *ct1, *ct2; + int pmcin = *(int *)thunk; + + ct1 = (const struct pmcpl_ct_arc *) a; + ct2 = (const struct pmcpl_ct_arc *) b; + + /* Sort in reverse order */ + if (PMCPL_CT_SAMPLE(pmcin, &ct1->pcta_samples) < + PMCPL_CT_SAMPLE(pmcin, &ct2->pcta_samples)) + return (1); + if (PMCPL_CT_SAMPLE(pmcin, &ct1->pcta_samples) > + PMCPL_CT_SAMPLE(pmcin, &ct2->pcta_samples)) + return (-1); + return (0); +} + +/* + * Grow the instr table. + */ + +static void +pmcpl_ct_instr_grow(int cursize, int *maxsize, struct pmcpl_ct_instr **items) +{ + int nmaxsize; + + if (cursize < *maxsize) + return; + + nmaxsize = *maxsize + max(cursize + 1 - *maxsize, PMCPL_CT_GROWSIZE); + *items = realloc(*items, nmaxsize * sizeof(struct pmcpl_ct_instr)); + if (*items == NULL) + errx(EX_SOFTWARE, "ERROR: out of memory"); + bzero((char *)*items + *maxsize * sizeof(struct pmcpl_ct_instr), + (nmaxsize - *maxsize) * sizeof(struct pmcpl_ct_instr)); + *maxsize = nmaxsize; +} + +/* + * Add a new instruction sample to given node. + */ + +static void +pmcpl_ct_instr_add(struct pmcpl_ct_node *ct, int pmcin, uintfptr_t pc) +{ + int i; + struct pmcpl_ct_instr *in; + + for (i = 0; ipct_ninstr; i++) { + if (ct->pct_instr[i].pctf_func == pc) { + in = &ct->pct_instr[i]; + pmcpl_ct_samples_grow(&in->pctf_samples); + in->pctf_samples.sb[pmcin]++; + return; + } + } + + pmcpl_ct_instr_grow(ct->pct_ninstr, &ct->pct_instr_c, &ct->pct_instr); + in = &ct->pct_instr[ct->pct_ninstr]; + in->pctf_func = pc; + pmcpl_ct_samples_init(&in->pctf_samples); + pmcpl_ct_samples_grow(&in->pctf_samples); + in->pctf_samples.sb[pmcin] = 1; + ct->pct_ninstr++; +} + +/* + * Allocate a new node. + */ + +static struct pmcpl_ct_node * +pmcpl_ct_node_allocate(struct pmcstat_image *image, uintfptr_t pc) +{ + struct pmcpl_ct_node *ct; + + if ((ct = malloc(sizeof(*ct))) == NULL) + err(EX_OSERR, "ERROR: Cannot allocate callgraph node"); + + ct->pct_flags = 0; + ct->pct_image = image; + ct->pct_func = pc; + + pmcpl_ct_samples_init(&ct->pct_samples); + + ct->pct_narc = 0; + ct->pct_arc_c = 0; + ct->pct_arc = NULL; + + ct->pct_ninstr = 0; + ct->pct_instr_c = 0; + ct->pct_instr = NULL; + + return (ct); +} + +/* + * Free a node. + */ + +static void +pmcpl_ct_node_free(struct pmcpl_ct_node *ct) +{ + int i; + + for (i = 0; i < ct->pct_narc; i++) { + pmcpl_ct_samples_free(&ct->pct_arc[i].pcta_samples); + pmcpl_ct_samples_free(&ct->pct_arc[i].pcta_callid); + } + + pmcpl_ct_samples_free(&ct->pct_samples); + free(ct->pct_arc); + free(ct->pct_instr); + free(ct); +} + +/* + * Clear the graph tag on each node. + */ +static void +pmcpl_ct_node_cleartag(void) +{ + int i; + struct pmcpl_ct_node_hash *pch; + + for (i = 0; i < PMCSTAT_NHASH; i++) + LIST_FOREACH(pch, &pmcpl_ct_node_hash[i], pch_next) + pch->pch_ctnode->pct_flags &= ~PMCPL_PCT_TAG; + + pmcpl_ct_root->pct_flags &= ~PMCPL_PCT_TAG; +} + +/* + * Print the callchain line by line with maximum cost at top. + */ + +static int +pmcpl_ct_node_dumptop(int pmcin, struct pmcpl_ct_node *ct, + struct pmcpl_ct_sample *rsamples, int x, int *y) +{ + int i; + + if (ct->pct_flags & PMCPL_PCT_TAG) + return 0; + + ct->pct_flags |= PMCPL_PCT_TAG; + + if (x >= PMCPL_CT_MAXCOL) { + pmcpl_ct_topscreen[x][*y] = NULL; + return 1; + } + pmcpl_ct_topscreen[x][*y] = ct; + + /* + * This is a terminal node + */ + if (ct->pct_narc == 0) { + pmcpl_ct_topscreen[x+1][*y] = NULL; + if (*y >= PMCPL_CT_MAXLINE || + *y >= pmcstat_displaywidth) + return 1; + *y = *y + 1; + for (i=0; i < x; i++) + pmcpl_ct_topscreen[i][*y] = + pmcpl_ct_topscreen[i][*y - 1]; + return 0; + } + + /* + * Quicksort the arcs. + */ + qsort_r(ct->pct_arc, ct->pct_narc, sizeof(struct pmcpl_ct_arc), + &pmcin, pmcpl_ct_arc_compare); + + for (i = 0; i < ct->pct_narc; i++) { + if (PMCPL_CT_SAMPLEP(pmcin, + &ct->pct_arc[i].pcta_samples) > pmcstat_threshold) { + if (pmcpl_ct_node_dumptop(pmcin, + ct->pct_arc[i].pcta_child, + rsamples, x+1, y)) + return 1; + } + } + + return 0; +} + +/* + * Format and display given PMC index. + */ + +static void +pmcpl_ct_node_printtop(struct pmcpl_ct_sample *rsamples, int pmcin, int maxy) +{ + int v_attrs, ns_len, vs_len, is_len, width, indentwidth, x, y; + float v; + char ns[30], vs[10], is[20]; + struct pmcpl_ct_node *ct; + struct pmcstat_symbol *sym; + const char *space = " "; + + for (y = 0; y < maxy; y++) { + /* Output image. */ + ct = pmcpl_ct_topscreen[0][y]; + snprintf(is, sizeof(is), "%-10.10s", + pmcstat_string_unintern(ct->pct_image->pi_name)); + PMCSTAT_PRINTW("%s ", is); + width = indentwidth = 11; + + for (x = 0; pmcpl_ct_topscreen[x][y] !=NULL; x++) { + + ct = pmcpl_ct_topscreen[x][y]; + + ns[0] = '\0'; ns_len = 0; + vs[0] = '\0'; vs_len = 0; + is[0] = '\0'; is_len = 0; + + /* Format value. */ + v = PMCPL_CT_SAMPLEP(pmcin, &ct->pct_samples); + if (v > pmcstat_threshold) + vs_len = snprintf(vs, sizeof(vs), "(%.1f%%)", v); + v_attrs = PMCSTAT_ATTRPERCENT(v); + + if (pmcstat_skiplink && v <= pmcstat_threshold) { + PMCSTAT_PRINTW(". "); + width += 2; + continue; + } + sym = pmcstat_symbol_search(ct->pct_image, ct->pct_func); + if (sym != NULL) { + ns_len = snprintf(ns, sizeof(ns), "%s", + pmcstat_string_unintern(sym->ps_name)); + } else + ns_len = snprintf(ns, sizeof(ns), "%p", + (void *)ct->pct_func); + + /* Format image. */ + if (x > 0 && pmcpl_ct_topscreen[x-1][y]->pct_image != ct->pct_image) + is_len = snprintf(is, sizeof(is), "@%s", + pmcstat_string_unintern(ct->pct_image->pi_name)); + + /* Check for line wrap. */ + width += ns_len + is_len + vs_len + 1; + if (width >= pmcstat_displaywidth) { + PMCSTAT_PRINTW("\n%*s", indentwidth, space); + width = indentwidth + ns_len + is_len + vs_len; + } + + PMCSTAT_ATTRON(v_attrs); + PMCSTAT_PRINTW("%s%s%s ", ns, is, vs); + PMCSTAT_ATTROFF(v_attrs); + } + PMCSTAT_PRINTW("\n"); + } +} + +/* + * Output top mode snapshot. + */ + +void +pmcpl_ct_topdisplay(void) +{ + int i, x, y, pmcin; + struct pmcpl_ct_sample rsamples; + + pmcpl_ct_samples_root(&rsamples); + + PMCSTAT_PRINTW("%-10.10s %s\n", "IMAGE", "CALLTREE"); + + for (pmcin = 0; pmcin < pmcstat_npmcs; pmcin++) { + /* Filter PMCs. */ + if (pmcstat_pmcinfilter != pmcin) + continue; + + pmcpl_ct_node_cleartag(); + + /* Quicksort the arcs. */ + qsort_r(pmcpl_ct_root->pct_arc, + pmcpl_ct_root->pct_narc, + sizeof(struct pmcpl_ct_arc), + &pmcin, pmcpl_ct_arc_compare); + + x = y = 0; + for (i = 0; i < pmcpl_ct_root->pct_narc; i++) { + if (pmcpl_ct_node_dumptop(pmcin, + pmcpl_ct_root->pct_arc[i].pcta_child, + &rsamples, x, &y)) { + break; + } + } + + pmcpl_ct_node_printtop(&rsamples, pmcin, y); + } + pmcpl_ct_samples_free(&rsamples); +} + +/* + * Handle top mode keypress. + */ + +int +pmcpl_ct_topkeypress(int c, WINDOW *w) +{ + + switch (c) { + case 'f': + pmcstat_skiplink = !pmcstat_skiplink; + wprintw(w, "skip empty link %s", pmcstat_skiplink ? "on" : "off"); + break; + } + + return 0; +} + +/* + * Look for a callgraph node associated with pmc `pmcid' in the global + * hash table that corresponds to the given `pc' value in the process map + * `ppm'. + */ + +static struct pmcpl_ct_node * +pmcpl_ct_node_hash_lookup_pc(struct pmcpl_ct_node *parent, + struct pmcstat_pcmap *ppm, uintfptr_t pc, int pmcin) +{ + struct pmcstat_symbol *sym; + struct pmcstat_image *image; + struct pmcpl_ct_node *ct; + struct pmcpl_ct_node_hash *h; + struct pmcpl_ct_arc *arc; + uintfptr_t loadaddress; + int i; + unsigned int hash; + + assert(parent != NULL); + + image = ppm->ppm_image; + + loadaddress = ppm->ppm_lowpc + image->pi_vaddr - image->pi_start; + pc -= loadaddress; /* Convert to an offset in the image. */ + + /* + * Try determine the function at this offset. If we can't + * find a function round leave the `pc' value alone. + */ + if ((sym = pmcstat_symbol_search(image, pc)) != NULL) + pc = sym->ps_start; + + for (hash = i = 0; i < (int)sizeof(uintfptr_t); i++) + hash += (pc >> i) & 0xFF; + + hash &= PMCSTAT_HASH_MASK; + + ct = NULL; + LIST_FOREACH(h, &pmcpl_ct_node_hash[hash], pch_next) { + ct = h->pch_ctnode; + + assert(ct != NULL); + + if (ct->pct_image == image && ct->pct_func == pc) { + /* + * Find related arc in parent node and + * increment the sample count. + */ + for (i = 0; i < parent->pct_narc; i++) { + if (parent->pct_arc[i].pcta_child == ct) { + arc = &parent->pct_arc[i]; + pmcpl_ct_samples_grow(&arc->pcta_samples); + arc->pcta_samples.sb[pmcin]++; + /* Estimate call count. */ + pmcpl_ct_samples_grow(&arc->pcta_callid); + if (pmcpl_ct_callid.sb[pmcin] - + arc->pcta_callid.sb[pmcin] > 1) + arc->pcta_call++; + arc->pcta_callid.sb[pmcin] = + pmcpl_ct_callid.sb[pmcin]; + return (ct); + } + } + + /* + * No arc found for us, add ourself to the parent. + */ + pmcpl_ct_arc_grow(parent->pct_narc, + &parent->pct_arc_c, &parent->pct_arc); + arc = &parent->pct_arc[parent->pct_narc]; + pmcpl_ct_samples_grow(&arc->pcta_samples); + arc->pcta_samples.sb[pmcin] = 1; + arc->pcta_call = 1; + pmcpl_ct_samples_grow(&arc->pcta_callid); + arc->pcta_callid.sb[pmcin] = pmcpl_ct_callid.sb[pmcin]; + arc->pcta_child = ct; + parent->pct_narc++; + return (ct); + } + } + + /* + * We haven't seen this (pmcid, pc) tuple yet, so allocate a + * new callgraph node and a new hash table entry for it. + */ + ct = pmcpl_ct_node_allocate(image, pc); + if ((h = malloc(sizeof(*h))) == NULL) + err(EX_OSERR, "ERROR: Could not allocate callgraph node"); + + h->pch_ctnode = ct; + LIST_INSERT_HEAD(&pmcpl_ct_node_hash[hash], h, pch_next); + + pmcpl_ct_arc_grow(parent->pct_narc, + &parent->pct_arc_c, &parent->pct_arc); + arc = &parent->pct_arc[parent->pct_narc]; + pmcpl_ct_samples_grow(&arc->pcta_samples); + arc->pcta_samples.sb[pmcin] = 1; + arc->pcta_call = 1; + pmcpl_ct_samples_grow(&arc->pcta_callid); + arc->pcta_callid.sb[pmcin] = pmcpl_ct_callid.sb[pmcin]; + arc->pcta_child = ct; + parent->pct_narc++; + return (ct); +} + +/* + * Record a callchain. + */ + +void +pmcpl_ct_process(struct pmcstat_process *pp, struct pmcstat_pmcrecord *pmcr, + uint32_t nsamples, uintfptr_t *cc, int usermode, uint32_t cpu) +{ + int n, pmcin; + struct pmcstat_pcmap *ppm[PMC_CALLCHAIN_DEPTH_MAX]; + struct pmcstat_process *km; + struct pmcpl_ct_node *parent, *child; + + (void) cpu; + + assert(nsamples>0 && nsamples<=PMC_CALLCHAIN_DEPTH_MAX); + + /* Get the PMC index. */ + pmcin = pmcr->pr_pmcin; + + /* + * Validate mapping for the callchain. + * Go from bottom to first invalid entry. + */ + km = pmcstat_kernproc; + for (n = 0; n < (int)nsamples; n++) { + ppm[n] = pmcstat_process_find_map(usermode ? + pp : km, cc[n]); + if (ppm[n] == NULL) { + /* Detect full frame capture (kernel + user). */ + if (!usermode) { + ppm[n] = pmcstat_process_find_map(pp, cc[n]); + if (ppm[n] != NULL) + km = pp; + } + } + if (ppm[n] == NULL) + break; + } + if (n-- == 0) { + pmcstat_stats.ps_callchain_dubious_frames++; + return; + } + + /* Increase the call generation counter. */ + pmcpl_ct_samples_grow(&pmcpl_ct_callid); + pmcpl_ct_callid.sb[pmcin]++; + + /* + * Iterate remaining addresses. + */ + for (parent = pmcpl_ct_root, child = NULL; n >= 0; n--) { + child = pmcpl_ct_node_hash_lookup_pc(parent, ppm[n], cc[n], + pmcin); + if (child == NULL) { + pmcstat_stats.ps_callchain_dubious_frames++; + continue; + } + parent = child; + } + + /* + * Increment the sample count for this PMC. + */ + if (child != NULL) { + pmcpl_ct_samples_grow(&child->pct_samples); + child->pct_samples.sb[pmcin]++; + + /* Update per instruction sample if required. */ + if (args.pa_ctdumpinstr) + pmcpl_ct_instr_add(child, pmcin, cc[0] - + (ppm[0]->ppm_lowpc + ppm[0]->ppm_image->pi_vaddr - + ppm[0]->ppm_image->pi_start)); + } +} + +/* + * Print node self cost. + */ + +static void +pmcpl_ct_node_printself(struct pmcpl_ct_node *ct) +{ + int i, j, line; + uintptr_t addr; + struct pmcstat_symbol *sym; + char sourcefile[PATH_MAX]; + char funcname[PATH_MAX]; + + /* + * Object binary. + */ +#ifdef PMCPL_CT_OPTIMIZEFN + if (pmcpl_ct_prevfn != ct->pct_image->pi_fullpath) { +#endif + pmcpl_ct_prevfn = ct->pct_image->pi_fullpath; + fprintf(args.pa_graphfile, "ob=%s\n", + pmcstat_string_unintern(pmcpl_ct_prevfn)); +#ifdef PMCPL_CT_OPTIMIZEFN + } +#endif + + /* + * Function name. + */ + if (pmcstat_image_addr2line(ct->pct_image, ct->pct_func, + sourcefile, sizeof(sourcefile), &line, + funcname, sizeof(funcname))) { + fprintf(args.pa_graphfile, "fn=%s\n", + funcname); + } else { + sym = pmcstat_symbol_search(ct->pct_image, ct->pct_func); + if (sym != NULL) + fprintf(args.pa_graphfile, "fn=%s\n", + pmcstat_string_unintern(sym->ps_name)); + else + fprintf(args.pa_graphfile, "fn=%p\n", + (void *)(ct->pct_image->pi_vaddr + ct->pct_func)); + } + + /* + * Self cost. + */ + if (ct->pct_ninstr > 0) { + for (i = 0; i < ct->pct_ninstr; i++) { + addr = ct->pct_image->pi_vaddr + + ct->pct_instr[i].pctf_func; + line = 0; + if (pmcstat_image_addr2line(ct->pct_image, addr, + sourcefile, sizeof(sourcefile), &line, + funcname, sizeof(funcname))) + fprintf(args.pa_graphfile, "fl=%s\n", sourcefile); + fprintf(args.pa_graphfile, "%p %u", (void *)addr, line); + for (j = 0; jpct_instr[i].pctf_samples)); + fprintf(args.pa_graphfile, "\n"); + } + } else { + addr = ct->pct_image->pi_vaddr + ct->pct_func; + line = 0; + if (pmcstat_image_addr2line(ct->pct_image, addr, + sourcefile, sizeof(sourcefile), &line, + funcname, sizeof(funcname))) + fprintf(args.pa_graphfile, "fl=%s\n", sourcefile); + fprintf(args.pa_graphfile, "* *"); + for (i = 0; ipct_samples)); + fprintf(args.pa_graphfile, "\n"); + } +} + +/* + * Print node child cost. + */ + +static void +pmcpl_ct_node_printchild(struct pmcpl_ct_node *ct) +{ + int i, j, line; + uintptr_t addr; + struct pmcstat_symbol *sym; + struct pmcpl_ct_node *child; + char sourcefile[PATH_MAX]; + char funcname[PATH_MAX]; + + /* + * Child cost. + * TODO: attach child cost to the real position in the funtion. + * TODO: cfn= / call addr() / addr(call ) + */ + for (i=0 ; ipct_narc; i++) { + child = ct->pct_arc[i].pcta_child; + + /* Object binary. */ +#ifdef PMCPL_CT_OPTIMIZEFN + if (pmcpl_ct_prevfn != child->pct_image->pi_fullpath) { +#endif + pmcpl_ct_prevfn = child->pct_image->pi_fullpath; + fprintf(args.pa_graphfile, "cob=%s\n", + pmcstat_string_unintern(pmcpl_ct_prevfn)); +#if PMCPL_CT_OPTIMIZEFN + } +#endif + /* Child function name. */ + addr = child->pct_image->pi_vaddr + child->pct_func; + /* Child function source file. */ + if (pmcstat_image_addr2line(child->pct_image, addr, + sourcefile, sizeof(sourcefile), &line, + funcname, sizeof(funcname))) { + fprintf(args.pa_graphfile, "cfn=%s\n", funcname); + fprintf(args.pa_graphfile, "cfl=%s\n", sourcefile); + } else { + sym = pmcstat_symbol_search(child->pct_image, + child->pct_func); + if (sym != NULL) + fprintf(args.pa_graphfile, "cfn=%s\n", + pmcstat_string_unintern(sym->ps_name)); + else + fprintf(args.pa_graphfile, "cfn=%p\n", (void *)addr); + } + + /* Child function address, line and call count. */ + fprintf(args.pa_graphfile, "calls=%u %p %u\n", + ct->pct_arc[i].pcta_call, (void *)addr, line); + + if (ct->pct_image != NULL) { + /* Call address, line, sample. */ + addr = ct->pct_image->pi_vaddr + ct->pct_func; + line = 0; + pmcstat_image_addr2line(ct->pct_image, addr, sourcefile, + sizeof(sourcefile), &line, + funcname, sizeof(funcname)); + fprintf(args.pa_graphfile, "%p %u", (void *)addr, line); + } + else + fprintf(args.pa_graphfile, "* *"); + for (j = 0; jpct_arc[i].pcta_samples)); + fprintf(args.pa_graphfile, "\n"); + } +} + +/* + * Clean the PMC name for Kcachegrind formula + */ + +static void +pmcpl_ct_fixup_pmcname(char *s) +{ + char *p; + + for (p = s; *p; p++) + if (!isalnum(*p)) + *p = '_'; +} + +/* + * Print a calltree (KCachegrind) for all PMCs. + */ + +static void +pmcpl_ct_print(void) +{ + int n, i; + struct pmcpl_ct_node_hash *pch; + struct pmcpl_ct_sample rsamples; + char name[40]; + + pmcpl_ct_samples_root(&rsamples); + pmcpl_ct_prevfn = NULL; + + fprintf(args.pa_graphfile, + "version: 1\n" + "creator: pmcstat\n" + "positions: instr line\n" + "events:"); + for (i=0; ipch_ctnode); + pmcpl_ct_node_printchild(pch->pch_ctnode); + } + + pmcpl_ct_samples_free(&rsamples); +} + +int +pmcpl_ct_configure(char *opt) +{ + + if (strncmp(opt, "skiplink=", 9) == 0) { + pmcstat_skiplink = atoi(opt+9); + } else + return (0); + + return (1); +} + +int +pmcpl_ct_init(void) +{ + int i; + + pmcpl_ct_prevfn = NULL; + pmcpl_ct_root = pmcpl_ct_node_allocate(NULL, 0); + + for (i = 0; i < PMCSTAT_NHASH; i++) + LIST_INIT(&pmcpl_ct_node_hash[i]); + + pmcpl_ct_samples_init(&pmcpl_ct_callid); + + return (0); +} + +void +pmcpl_ct_shutdown(FILE *mf) +{ + int i; + struct pmcpl_ct_node_hash *pch, *pchtmp; + + (void) mf; + + if (args.pa_flags & FLAG_DO_CALLGRAPHS) + pmcpl_ct_print(); + + /* + * Free memory. + */ + + for (i = 0; i < PMCSTAT_NHASH; i++) { + LIST_FOREACH_SAFE(pch, &pmcpl_ct_node_hash[i], pch_next, + pchtmp) { + pmcpl_ct_node_free(pch->pch_ctnode); + free(pch); + } + } + + pmcpl_ct_node_free(pmcpl_ct_root); + pmcpl_ct_root = NULL; + + pmcpl_ct_samples_free(&pmcpl_ct_callid); +} + diff --git a/usr.sbin/pmcstat/pmcpl_calltree.h b/usr.sbin/pmcstat/pmcpl_calltree.h new file mode 100644 index 0000000..f54957f --- /dev/null +++ b/usr.sbin/pmcstat/pmcpl_calltree.h @@ -0,0 +1,42 @@ +/*- + * Copyright (c) 2009, Fabien Thomas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _PMCSTAT_PL_CALLTREE_H_ +#define _PMCSTAT_PL_CALLTREE_H_ + +/* Function prototypes */ +int pmcpl_ct_init(void); +void pmcpl_ct_shutdown(FILE *mf); +void pmcpl_ct_process( + struct pmcstat_process *pp, struct pmcstat_pmcrecord *pmcr, + uint32_t nsamples, uintfptr_t *cc, int usermode, uint32_t cpu); +int pmcpl_ct_topkeypress(int c, WINDOW *w); +void pmcpl_ct_topdisplay(void); +int pmcpl_ct_configure(char *opt); + +#endif /* _PMCSTAT_PL_CALLTREE_H_ */ diff --git a/usr.sbin/pmcstat/pmcpl_gprof.c b/usr.sbin/pmcstat/pmcpl_gprof.c new file mode 100644 index 0000000..9327eb9 --- /dev/null +++ b/usr.sbin/pmcstat/pmcpl_gprof.c @@ -0,0 +1,533 @@ +/*- + * Copyright (c) 2005-2007, Joseph Koshy + * Copyright (c) 2007 The FreeBSD Foundation + * Copyright (c) 2009, Fabien Thomas + * All rights reserved. + * + * Portions of this software were developed by A. Joseph Koshy under + * sponsorship from the FreeBSD Foundation and Google, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Transform a hwpmc(4) log into human readable form, and into + * gprof(1) compatible profiles. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pmcstat.h" +#include "pmcstat_log.h" +#include "pmcpl_callgraph.h" +#include "pmcpl_gprof.h" + +/* + * struct pmcstat_gmonfile tracks a given 'gmon.out' file. These + * files are mmap()'ed in as needed. + */ + +struct pmcstat_gmonfile { + LIST_ENTRY(pmcstat_gmonfile) pgf_next; /* list of entries */ + int pgf_overflow; /* whether a count overflowed */ + pmc_id_t pgf_pmcid; /* id of the associated pmc */ + size_t pgf_nbuckets; /* #buckets in this gmon.out */ + unsigned int pgf_nsamples; /* #samples in this gmon.out */ + pmcstat_interned_string pgf_name; /* pathname of gmon.out file */ + size_t pgf_ndatabytes; /* number of bytes mapped */ + void *pgf_gmondata; /* pointer to mmap'ed data */ + FILE *pgf_file; /* used when writing gmon arcs */ +}; + +/* + * Prototypes + */ + +static void pmcstat_gmon_create_file(struct pmcstat_gmonfile *_pgf, + struct pmcstat_image *_image); +static pmcstat_interned_string pmcstat_gmon_create_name(const char *_sd, + struct pmcstat_image *_img, pmc_id_t _pmcid); +static void pmcstat_gmon_map_file(struct pmcstat_gmonfile *_pgf); +static void pmcstat_gmon_unmap_file(struct pmcstat_gmonfile *_pgf); + +static struct pmcstat_gmonfile *pmcstat_image_find_gmonfile(struct + pmcstat_image *_i, pmc_id_t _id); + +/* + * Create a gmon.out file and size it. + */ + +static void +pmcstat_gmon_create_file(struct pmcstat_gmonfile *pgf, + struct pmcstat_image *image) +{ + int fd; + size_t count; + struct gmonhdr gm; + const char *pathname; + char buffer[DEFAULT_BUFFER_SIZE]; + + pathname = pmcstat_string_unintern(pgf->pgf_name); + if ((fd = open(pathname, O_RDWR|O_NOFOLLOW|O_CREAT, + S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)) < 0) + err(EX_OSERR, "ERROR: Cannot open \"%s\"", pathname); + + gm.lpc = image->pi_start; + gm.hpc = image->pi_end; + gm.ncnt = (pgf->pgf_nbuckets * sizeof(HISTCOUNTER)) + + sizeof(struct gmonhdr); + gm.version = GMONVERSION; + gm.profrate = 0; /* use ticks */ + gm.histcounter_type = 0; /* compatibility with moncontrol() */ + gm.spare[0] = gm.spare[1] = 0; + + /* Write out the gmon header */ + if (write(fd, &gm, sizeof(gm)) < 0) + goto error; + + /* Zero fill the samples[] array */ + (void) memset(buffer, 0, sizeof(buffer)); + + count = pgf->pgf_ndatabytes - sizeof(struct gmonhdr); + while (count > sizeof(buffer)) { + if (write(fd, &buffer, sizeof(buffer)) < 0) + goto error; + count -= sizeof(buffer); + } + + if (write(fd, &buffer, count) < 0) + goto error; + + (void) close(fd); + + return; + + error: + err(EX_OSERR, "ERROR: Cannot write \"%s\"", pathname); +} + +/* + * Determine the full pathname of a gmon.out file for a given + * (image,pmcid) combination. Return the interned string. + */ + +pmcstat_interned_string +pmcstat_gmon_create_name(const char *samplesdir, struct pmcstat_image *image, + pmc_id_t pmcid) +{ + const char *pmcname; + char fullpath[PATH_MAX]; + + pmcname = pmcstat_pmcid_to_name(pmcid); + + (void) snprintf(fullpath, sizeof(fullpath), + "%s/%s/%s", samplesdir, pmcname, + pmcstat_string_unintern(image->pi_samplename)); + + return (pmcstat_string_intern(fullpath)); +} + + +/* + * Mmap in a gmon.out file for processing. + */ + +static void +pmcstat_gmon_map_file(struct pmcstat_gmonfile *pgf) +{ + int fd; + const char *pathname; + + pathname = pmcstat_string_unintern(pgf->pgf_name); + + /* the gmon.out file must already exist */ + if ((fd = open(pathname, O_RDWR | O_NOFOLLOW, 0)) < 0) + err(EX_OSERR, "ERROR: cannot open \"%s\"", pathname); + + pgf->pgf_gmondata = mmap(NULL, pgf->pgf_ndatabytes, + PROT_READ|PROT_WRITE, MAP_NOSYNC|MAP_SHARED, fd, 0); + + if (pgf->pgf_gmondata == MAP_FAILED) + err(EX_OSERR, "ERROR: cannot map \"%s\"", pathname); + + (void) close(fd); +} + +/* + * Unmap a gmon.out file after sync'ing its data to disk. + */ + +static void +pmcstat_gmon_unmap_file(struct pmcstat_gmonfile *pgf) +{ + (void) msync(pgf->pgf_gmondata, pgf->pgf_ndatabytes, + MS_SYNC); + (void) munmap(pgf->pgf_gmondata, pgf->pgf_ndatabytes); + pgf->pgf_gmondata = NULL; +} + +static void +pmcstat_gmon_append_arc(struct pmcstat_image *image, pmc_id_t pmcid, + uintptr_t rawfrom, uintptr_t rawto, uint32_t count) +{ + struct rawarc arc; /* from */ + const char *pathname; + struct pmcstat_gmonfile *pgf; + + if ((pgf = pmcstat_image_find_gmonfile(image, pmcid)) == NULL) + return; + + if (pgf->pgf_file == NULL) { + pathname = pmcstat_string_unintern(pgf->pgf_name); + if ((pgf->pgf_file = fopen(pathname, "a")) == NULL) + return; + } + + arc.raw_frompc = rawfrom + image->pi_vaddr; + arc.raw_selfpc = rawto + image->pi_vaddr; + arc.raw_count = count; + + (void) fwrite(&arc, sizeof(arc), 1, pgf->pgf_file); + +} + +static struct pmcstat_gmonfile * +pmcstat_image_find_gmonfile(struct pmcstat_image *image, pmc_id_t pmcid) +{ + struct pmcstat_gmonfile *pgf; + LIST_FOREACH(pgf, &image->pi_gmlist, pgf_next) + if (pgf->pgf_pmcid == pmcid) + return (pgf); + return (NULL); +} + +static void +pmcstat_cgnode_do_gmon_arcs(struct pmcstat_cgnode *cg, pmc_id_t pmcid) +{ + struct pmcstat_cgnode *cgc; + + /* + * Look for child nodes that belong to the same image. + */ + + LIST_FOREACH(cgc, &cg->pcg_children, pcg_sibling) { + if (cgc->pcg_image == cg->pcg_image) + pmcstat_gmon_append_arc(cg->pcg_image, pmcid, + cgc->pcg_func, cg->pcg_func, cgc->pcg_count); + if (cgc->pcg_nchildren > 0) + pmcstat_cgnode_do_gmon_arcs(cgc, pmcid); + } +} + +static void +pmcstat_callgraph_do_gmon_arcs_for_pmcid(pmc_id_t pmcid) +{ + int n; + struct pmcstat_cgnode_hash *pch; + + for (n = 0; n < PMCSTAT_NHASH; n++) + LIST_FOREACH(pch, &pmcstat_cgnode_hash[n], pch_next) + if (pch->pch_pmcid == pmcid && + pch->pch_cgnode->pcg_nchildren > 1) + pmcstat_cgnode_do_gmon_arcs(pch->pch_cgnode, + pmcid); +} + + +static void +pmcstat_callgraph_do_gmon_arcs(void) +{ + struct pmcstat_pmcrecord *pmcr; + + LIST_FOREACH(pmcr, &pmcstat_pmcs, pr_next) + pmcstat_callgraph_do_gmon_arcs_for_pmcid(pmcr->pr_pmcid); +} + +void +pmcpl_gmon_initimage(struct pmcstat_image *pi) +{ + int count, nlen; + char *sn; + char name[NAME_MAX]; + + /* + * Look for a suitable name for the sample files associated + * with this image: if `basename(path)`+".gmon" is available, + * we use that, otherwise we try iterating through + * `basename(path)`+ "~" + NNN + ".gmon" till we get a free + * entry. + */ + if ((sn = basename(pmcstat_string_unintern(pi->pi_execpath))) == NULL) + err(EX_OSERR, "ERROR: Cannot process \"%s\"", + pmcstat_string_unintern(pi->pi_execpath)); + + nlen = strlen(sn); + nlen = min(nlen, (int) (sizeof(name) - sizeof(".gmon"))); + + snprintf(name, sizeof(name), "%.*s.gmon", nlen, sn); + + /* try use the unabridged name first */ + if (pmcstat_string_lookup(name) == NULL) + pi->pi_samplename = pmcstat_string_intern(name); + else { + /* + * Otherwise use a prefix from the original name and + * upto 3 digits. + */ + nlen = strlen(sn); + nlen = min(nlen, (int) (sizeof(name)-sizeof("~NNN.gmon"))); + count = 0; + do { + if (++count > 999) + errx(EX_CANTCREAT, "ERROR: cannot create a " + "gmon file for \"%s\"", name); + snprintf(name, sizeof(name), "%.*s~%3.3d.gmon", + nlen, sn, count); + if (pmcstat_string_lookup(name) == NULL) { + pi->pi_samplename = + pmcstat_string_intern(name); + count = 0; + } + } while (count > 0); + } + + LIST_INIT(&pi->pi_gmlist); +} + +void +pmcpl_gmon_shutdownimage(struct pmcstat_image *pi) +{ + struct pmcstat_gmonfile *pgf, *pgftmp; + + LIST_FOREACH_SAFE(pgf, &pi->pi_gmlist, pgf_next, pgftmp) { + if (pgf->pgf_file) + (void) fclose(pgf->pgf_file); + LIST_REMOVE(pgf, pgf_next); + free(pgf); + } +} + +void +pmcpl_gmon_newpmc(pmcstat_interned_string ps, struct pmcstat_pmcrecord *pr) +{ + struct stat st; + char fullpath[PATH_MAX]; + + (void) pr; + + /* + * Create the appropriate directory to hold gmon.out files. + */ + + (void) snprintf(fullpath, sizeof(fullpath), "%s/%s", args.pa_samplesdir, + pmcstat_string_unintern(ps)); + + /* If the path name exists, it should be a directory */ + if (stat(fullpath, &st) == 0 && S_ISDIR(st.st_mode)) + return; + + if (mkdir(fullpath, S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) < 0) + err(EX_OSERR, "ERROR: Cannot create directory \"%s\"", + fullpath); +} + +/* + * Increment the bucket in the gmon.out file corresponding to 'pmcid' + * and 'pc'. + */ + +void +pmcpl_gmon_process(struct pmcstat_process *pp, struct pmcstat_pmcrecord *pmcr, + uint32_t nsamples, uintfptr_t *cc, int usermode, uint32_t cpu) +{ + struct pmcstat_pcmap *map; + struct pmcstat_image *image; + struct pmcstat_gmonfile *pgf; + uintfptr_t bucket; + HISTCOUNTER *hc; + pmc_id_t pmcid; + + (void) nsamples; (void) usermode; (void) cpu; + + map = pmcstat_process_find_map(usermode ? pp : pmcstat_kernproc, cc[0]); + if (map == NULL) { + /* Unknown offset. */ + pmcstat_stats.ps_samples_unknown_offset++; + return; + } + + assert(cc[0] >= map->ppm_lowpc && cc[0] < map->ppm_highpc); + + image = map->ppm_image; + pmcid = pmcr->pr_pmcid; + + /* + * If this is the first time we are seeing a sample for + * this executable image, try determine its parameters. + */ + if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN) + pmcstat_image_determine_type(image); + + assert(image->pi_type != PMCSTAT_IMAGE_UNKNOWN); + + /* Ignore samples in images that we know nothing about. */ + if (image->pi_type == PMCSTAT_IMAGE_INDETERMINABLE) { + pmcstat_stats.ps_samples_indeterminable++; + return; + } + + /* + * Find the gmon file corresponding to 'pmcid', creating it if + * needed. + */ + pgf = pmcstat_image_find_gmonfile(image, pmcid); + if (pgf == NULL) { + if ((pgf = calloc(1, sizeof(*pgf))) == NULL) + err(EX_OSERR, "ERROR:"); + + pgf->pgf_gmondata = NULL; /* mark as unmapped */ + pgf->pgf_name = pmcstat_gmon_create_name(args.pa_samplesdir, + image, pmcid); + pgf->pgf_pmcid = pmcid; + assert(image->pi_end > image->pi_start); + pgf->pgf_nbuckets = (image->pi_end - image->pi_start) / + FUNCTION_ALIGNMENT; /* see */ + pgf->pgf_ndatabytes = sizeof(struct gmonhdr) + + pgf->pgf_nbuckets * sizeof(HISTCOUNTER); + pgf->pgf_nsamples = 0; + pgf->pgf_file = NULL; + + pmcstat_gmon_create_file(pgf, image); + + LIST_INSERT_HEAD(&image->pi_gmlist, pgf, pgf_next); + } + + /* + * Map the gmon file in if needed. It may have been mapped + * out under memory pressure. + */ + if (pgf->pgf_gmondata == NULL) + pmcstat_gmon_map_file(pgf); + + assert(pgf->pgf_gmondata != NULL); + + /* + * + */ + + bucket = (cc[0] - map->ppm_lowpc) / FUNCTION_ALIGNMENT; + + assert(bucket < pgf->pgf_nbuckets); + + hc = (HISTCOUNTER *) ((uintptr_t) pgf->pgf_gmondata + + sizeof(struct gmonhdr)); + + /* saturating add */ + if (hc[bucket] < 0xFFFFU) /* XXX tie this to sizeof(HISTCOUNTER) */ + hc[bucket]++; + else /* mark that an overflow occurred */ + pgf->pgf_overflow = 1; + + pgf->pgf_nsamples++; +} + +/* + * Shutdown module. + */ + +void +pmcpl_gmon_shutdown(FILE *mf) +{ + int i; + struct pmcstat_gmonfile *pgf; + struct pmcstat_image *pi; + + /* + * Sync back all gprof flat profile data. + */ + for (i = 0; i < PMCSTAT_NHASH; i++) { + LIST_FOREACH(pi, &pmcstat_image_hash[i], pi_next) { + if (mf) + (void) fprintf(mf, " \"%s\" => \"%s\"", + pmcstat_string_unintern(pi->pi_execpath), + pmcstat_string_unintern( + pi->pi_samplename)); + + /* flush gmon.out data to disk */ + LIST_FOREACH(pgf, &pi->pi_gmlist, pgf_next) { + pmcstat_gmon_unmap_file(pgf); + if (mf) + (void) fprintf(mf, " %s/%d", + pmcstat_pmcid_to_name( + pgf->pgf_pmcid), + pgf->pgf_nsamples); + if (pgf->pgf_overflow && args.pa_verbosity >= 1) + warnx("WARNING: profile \"%s\" " + "overflowed.", + pmcstat_string_unintern( + pgf->pgf_name)); + } + + if (mf) + (void) fprintf(mf, "\n"); + } + } + + /* + * Compute arcs and add these to the gprof files. + */ + if (args.pa_flags & FLAG_DO_GPROF && args.pa_graphdepth > 1) + pmcstat_callgraph_do_gmon_arcs(); +} diff --git a/usr.sbin/pmcstat/pmcpl_gprof.h b/usr.sbin/pmcstat/pmcpl_gprof.h new file mode 100644 index 0000000..069082f --- /dev/null +++ b/usr.sbin/pmcstat/pmcpl_gprof.h @@ -0,0 +1,47 @@ +/*- + * Copyright (c) 2005-2007, Joseph Koshy + * Copyright (c) 2007 The FreeBSD Foundation + * Copyright (c) 2009, Fabien Thomas + * All rights reserved. + * + * Portions of this software were developed by A. Joseph Koshy under + * sponsorship from the FreeBSD Foundation and Google, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _PMCSTAT_PL_GPROF_H_ +#define _PMCSTAT_PL_GPROF_H_ + +/* Function prototypes */ +void pmcpl_gmon_shutdown(FILE *mf); +void pmcpl_gmon_process( + struct pmcstat_process *pp, struct pmcstat_pmcrecord *pmcr, + uint32_t nsamples, uintfptr_t *cc, int usermode, uint32_t cpu); +void pmcpl_gmon_initimage(struct pmcstat_image *pi); +void pmcpl_gmon_shutdownimage(struct pmcstat_image *pi); +void pmcpl_gmon_newpmc(pmcstat_interned_string ps, + struct pmcstat_pmcrecord *pr); + +#endif /* _PMCSTAT_PL_GPROF_H_ */ diff --git a/usr.sbin/pmcstat/pmcstat.8 b/usr.sbin/pmcstat/pmcstat.8 index a4e6f1f..309eb3e 100644 --- a/usr.sbin/pmcstat/pmcstat.8 +++ b/usr.sbin/pmcstat/pmcstat.8 @@ -36,6 +36,7 @@ .Op Fl C .Op Fl D Ar pathname .Op Fl E +.Op Fl F Ar pathname .Op Fl G Ar pathname .Op Fl M Ar mapfilename .Op Fl N @@ -43,9 +44,11 @@ .Op Fl P Ar event-spec .Op Fl R Ar logfilename .Op Fl S Ar event-spec +.Op Fl T .Op Fl W .Op Fl c Ar cpu-spec .Op Fl d +.Op Fl f Ar pluginopt .Op Fl g .Op Fl k Ar kerneldir .Op Fl m Ar pathname @@ -129,6 +132,16 @@ complex pipeline of processes when used in conjunction with the .Fl d option. The default is to not to enable per-process tracking. +.It Fl F Ar pathname +Print calltree (Kcachegrind) information to file +.Ar pathname . +If argument +.Ar pathname +is a +.Dq Li - +this information is sent to the output file specified by the +.Fl o +option. .It Fl G Ar pathname Print callchain information to file .Ar pathname . @@ -195,6 +208,12 @@ Perform offline analysis using sampling data in file Allocate a system mode sampling PMC measuring hardware events specified in .Ar event-spec . +.It Fl T +Use a top like mode for sampling PMCs. The following hotkeys +can be used: 'c+a' switch to accumulative mode, 'c+d' switch +to delta mode, 'm' merge PMCs, 'n' change view, 'p' show next +PMC, ' ' pause, 'q' quit. calltree only: 'f' cost under threshold +is seen as a dot. .It Fl W Toggle logging the incremental counts seen by the threads of a tracked process each time they are scheduled on a CPU. @@ -218,6 +237,12 @@ Toggle between process mode PMCs measuring events for the target process' current and future children or only measuring events for the target process. The default is to measure events for the target process alone. +.It Fl f Ar pluginopt +Pass option string to the active plugin. +.br +threshold= do not display cost under specified value (Top). +.br +skiplink=0|1 replace node with cost under threshold by a dot (Top). .It Fl g Produce profiles in a format compatible with .Xr gprof 1 . @@ -286,7 +311,8 @@ regular expression for selecting processes based on their command names. .It Fl v Increase verbosity. .It Fl w Ar secs -Print the values of all counting mode PMCs every +Print the values of all counting mode PMCs or sampling mode PMCs +for top mode every .Ar secs seconds. The argument diff --git a/usr.sbin/pmcstat/pmcstat.c b/usr.sbin/pmcstat/pmcstat.c index 6496ddb..a73d293 100644 --- a/usr.sbin/pmcstat/pmcstat.c +++ b/usr.sbin/pmcstat/pmcstat.c @@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -106,13 +107,15 @@ __FBSDID("$FreeBSD$"); int pmcstat_interrupt = 0; int pmcstat_displayheight = DEFAULT_DISPLAY_HEIGHT; +int pmcstat_displaywidth = DEFAULT_DISPLAY_WIDTH; int pmcstat_sockpair[NSOCKPAIRFD]; int pmcstat_kq; kvm_t *pmcstat_kvm; struct kinfo_proc *pmcstat_plist; +struct pmcstat_args args; void -pmcstat_attach_pmcs(struct pmcstat_args *a) +pmcstat_attach_pmcs(void) { struct pmcstat_ev *ev; struct pmcstat_target *pt; @@ -120,10 +123,10 @@ pmcstat_attach_pmcs(struct pmcstat_args *a) /* Attach all process PMCs to target processes. */ count = 0; - STAILQ_FOREACH(ev, &a->pa_events, ev_next) { + STAILQ_FOREACH(ev, &args.pa_events, ev_next) { if (PMC_IS_SYSTEM_MODE(ev->ev_mode)) continue; - SLIST_FOREACH(pt, &a->pa_targets, pt_next) + SLIST_FOREACH(pt, &args.pa_targets, pt_next) if (pmc_attach(ev->ev_pmcid, pt->pt_pid) == 0) count++; else if (errno != ESRCH) @@ -138,12 +141,12 @@ pmcstat_attach_pmcs(struct pmcstat_args *a) void -pmcstat_cleanup(struct pmcstat_args *a) +pmcstat_cleanup(void) { struct pmcstat_ev *ev, *tmp; /* release allocated PMCs. */ - STAILQ_FOREACH_SAFE(ev, &a->pa_events, ev_next, tmp) + STAILQ_FOREACH_SAFE(ev, &args.pa_events, ev_next, tmp) if (ev->ev_pmcid != PMC_ID_INVALID) { if (pmc_stop(ev->ev_pmcid) < 0) err(EX_OSERR, "ERROR: cannot stop pmc 0x%x " @@ -153,25 +156,25 @@ pmcstat_cleanup(struct pmcstat_args *a) "0x%x \"%s\"", ev->ev_pmcid, ev->ev_name); free(ev->ev_name); free(ev->ev_spec); - STAILQ_REMOVE(&a->pa_events, ev, pmcstat_ev, ev_next); + STAILQ_REMOVE(&args.pa_events, ev, pmcstat_ev, ev_next); free(ev); } /* de-configure the log file if present. */ - if (a->pa_flags & (FLAG_HAS_PIPE | FLAG_HAS_OUTPUT_LOGFILE)) + if (args.pa_flags & (FLAG_HAS_PIPE | FLAG_HAS_OUTPUT_LOGFILE)) (void) pmc_configure_logfile(-1); - if (a->pa_logparser) { - pmclog_close(a->pa_logparser); - a->pa_logparser = NULL; + if (args.pa_logparser) { + pmclog_close(args.pa_logparser); + args.pa_logparser = NULL; } - if (a->pa_flags & (FLAG_HAS_PIPE | FLAG_HAS_OUTPUT_LOGFILE)) - pmcstat_shutdown_logging(a); + if (args.pa_flags & (FLAG_HAS_PIPE | FLAG_HAS_OUTPUT_LOGFILE)) + pmcstat_shutdown_logging(); } void -pmcstat_clone_event_descriptor(struct pmcstat_args *a, struct pmcstat_ev *ev, +pmcstat_clone_event_descriptor(struct pmcstat_ev *ev, uint32_t cpumask) { int cpu; @@ -194,14 +197,14 @@ pmcstat_clone_event_descriptor(struct pmcstat_args *a, struct pmcstat_ev *ev, ev_clone->ev_saved = ev->ev_saved; ev_clone->ev_spec = strdup(ev->ev_spec); - STAILQ_INSERT_TAIL(&a->pa_events, ev_clone, ev_next); + STAILQ_INSERT_TAIL(&args.pa_events, ev_clone, ev_next); cpumask &= ~(1 << cpu); } } void -pmcstat_create_process(struct pmcstat_args *a) +pmcstat_create_process(void) { char token; pid_t pid; @@ -229,10 +232,10 @@ pmcstat_create_process(struct pmcstat_args *a) (void) close(pmcstat_sockpair[CHILDSOCKET]); /* exec() the program requested */ - execvp(*a->pa_argv, a->pa_argv); + execvp(*args.pa_argv, args.pa_argv); /* and if that fails, notify the parent */ kill(getppid(), SIGCHLD); - err(EX_OSERR, "ERROR: execvp \"%s\" failed", *a->pa_argv); + err(EX_OSERR, "ERROR: execvp \"%s\" failed", *args.pa_argv); /*NOTREACHED*/ default: /* parent */ @@ -250,7 +253,7 @@ pmcstat_create_process(struct pmcstat_args *a) errx(EX_SOFTWARE, "ERROR: Out of memory."); pt->pt_pid = pid; - SLIST_INSERT_HEAD(&a->pa_targets, pt, pt_next); + SLIST_INSERT_HEAD(&args.pa_targets, pt, pt_next); /* Wait for the child to signal that its ready to go. */ if (read(pmcstat_sockpair[PARENTSOCKET], &token, 1) < 0) @@ -260,7 +263,7 @@ pmcstat_create_process(struct pmcstat_args *a) } void -pmcstat_find_targets(struct pmcstat_args *a, const char *spec) +pmcstat_find_targets(const char *spec) { int n, nproc, pid, rv; struct pmcstat_target *pt; @@ -275,7 +278,7 @@ pmcstat_find_targets(struct pmcstat_args *a, const char *spec) if ((pt = malloc(sizeof(*pt))) == NULL) goto outofmemory; pt->pt_pid = pid; - SLIST_INSERT_HEAD(&a->pa_targets, pt, pt_next); + SLIST_INSERT_HEAD(&args.pa_targets, pt, pt_next); return; } @@ -302,7 +305,7 @@ pmcstat_find_targets(struct pmcstat_args *a, const char *spec) if ((pt = malloc(sizeof(*pt))) == NULL) goto outofmemory; pt->pt_pid = kp->ki_pid; - SLIST_INSERT_HEAD(&a->pa_targets, pt, pt_next); + SLIST_INSERT_HEAD(&args.pa_targets, pt, pt_next); } else if (rv != REG_NOMATCH) { regerror(rv, ®, errbuf, sizeof(errbuf)); errx(EX_SOFTWARE, "ERROR: Regex evalation failed: %s", @@ -343,17 +346,17 @@ pmcstat_get_cpumask(const char *cpuspec) } void -pmcstat_kill_process(struct pmcstat_args *a) +pmcstat_kill_process(void) { struct pmcstat_target *pt; - assert(a->pa_flags & FLAG_HAS_COMMANDLINE); + assert(args.pa_flags & FLAG_HAS_COMMANDLINE); /* * If a command line was specified, it would be the very first * in the list, before any other processes specified by -t. */ - pt = SLIST_FIRST(&a->pa_targets); + pt = SLIST_FIRST(&args.pa_targets); assert(pt != NULL); if (kill(pt->pt_pid, SIGINT) != 0) @@ -361,7 +364,7 @@ pmcstat_kill_process(struct pmcstat_args *a) } void -pmcstat_start_pmcs(struct pmcstat_args *a) +pmcstat_start_pmcs(void) { struct pmcstat_ev *ev; @@ -372,7 +375,7 @@ pmcstat_start_pmcs(struct pmcstat_args *a) if (pmc_start(ev->ev_pmcid) < 0) { warn("ERROR: Cannot start pmc 0x%x \"%s\"", ev->ev_pmcid, ev->ev_name); - pmcstat_cleanup(a); + pmcstat_cleanup(); exit(EX_OSERR); } } @@ -380,37 +383,37 @@ pmcstat_start_pmcs(struct pmcstat_args *a) } void -pmcstat_print_headers(struct pmcstat_args *a) +pmcstat_print_headers(void) { struct pmcstat_ev *ev; int c, w; - (void) fprintf(a->pa_printfile, PRINT_HEADER_PREFIX); + (void) fprintf(args.pa_printfile, PRINT_HEADER_PREFIX); - STAILQ_FOREACH(ev, &a->pa_events, ev_next) { + STAILQ_FOREACH(ev, &args.pa_events, ev_next) { if (PMC_IS_SAMPLING_MODE(ev->ev_mode)) continue; c = PMC_IS_SYSTEM_MODE(ev->ev_mode) ? 's' : 'p'; if (ev->ev_fieldskip != 0) - (void) fprintf(a->pa_printfile, "%*s", + (void) fprintf(args.pa_printfile, "%*s", ev->ev_fieldskip, ""); w = ev->ev_fieldwidth - ev->ev_fieldskip - 2; if (c == 's') - (void) fprintf(a->pa_printfile, "s/%02d/%-*s ", + (void) fprintf(args.pa_printfile, "s/%02d/%-*s ", ev->ev_cpu, w-3, ev->ev_name); else - (void) fprintf(a->pa_printfile, "p/%*s ", w, + (void) fprintf(args.pa_printfile, "p/%*s ", w, ev->ev_name); } - (void) fflush(a->pa_printfile); + (void) fflush(args.pa_printfile); } void -pmcstat_print_counters(struct pmcstat_args *a) +pmcstat_print_counters(void) { int extra_width; struct pmcstat_ev *ev; @@ -418,7 +421,7 @@ pmcstat_print_counters(struct pmcstat_args *a) extra_width = sizeof(PRINT_HEADER_PREFIX) - 1; - STAILQ_FOREACH(ev, &a->pa_events, ev_next) { + STAILQ_FOREACH(ev, &args.pa_events, ev_next) { /* skip sampling mode counters */ if (PMC_IS_SAMPLING_MODE(ev->ev_mode)) @@ -428,7 +431,7 @@ pmcstat_print_counters(struct pmcstat_args *a) err(EX_OSERR, "ERROR: Cannot read pmc " "\"%s\"", ev->ev_name); - (void) fprintf(a->pa_printfile, "%*ju ", + (void) fprintf(args.pa_printfile, "%*ju ", ev->ev_fieldwidth + extra_width, (uintmax_t) ev->ev_cumulative ? value : (value - ev->ev_saved)); @@ -438,7 +441,7 @@ pmcstat_print_counters(struct pmcstat_args *a) extra_width = 0; } - (void) fflush(a->pa_printfile); + (void) fflush(args.pa_printfile); } /* @@ -446,20 +449,20 @@ pmcstat_print_counters(struct pmcstat_args *a) */ void -pmcstat_print_pmcs(struct pmcstat_args *a) +pmcstat_print_pmcs(void) { static int linecount = 0; /* check if we need to print a header line */ if (++linecount > pmcstat_displayheight) { - (void) fprintf(a->pa_printfile, "\n"); + (void) fprintf(args.pa_printfile, "\n"); linecount = 1; } if (linecount == 1) - pmcstat_print_headers(a); - (void) fprintf(a->pa_printfile, "\n"); + pmcstat_print_headers(); + (void) fprintf(args.pa_printfile, "\n"); - pmcstat_print_counters(a); + pmcstat_print_counters(); return; } @@ -493,6 +496,8 @@ pmcstat_show_usage(void) "\t -C\t\t (toggle) show cumulative counts\n" "\t -D path\t create profiles in directory \"path\"\n" "\t -E\t\t (toggle) show counts at process exit\n" + "\t -F file\t write a system-wide callgraph (Kcachegrind format)" + " to \"file\"\n" "\t -G file\t write a system-wide callgraph to \"file\"\n" "\t -M file\t print executable/gmon file map to \"file\"\n" "\t -N\t\t (toggle) capture callchains\n" @@ -500,9 +505,11 @@ pmcstat_show_usage(void) "\t -P spec\t allocate a process-private sampling PMC\n" "\t -R file\t read events from \"file\"\n" "\t -S spec\t allocate a system-wide sampling PMC\n" + "\t -T\t\t start in top mode\n" "\t -W\t\t (toggle) show counts per context switch\n" "\t -c cpu-list\t set cpus for subsequent system-wide PMCs\n" "\t -d\t\t (toggle) track descendants\n" + "\t -f spec\t pass \"spec\" to as plugin option\n" "\t -g\t\t produce gprof(1) compatible profiles\n" "\t -k dir\t\t set the path to the kernel\n" "\t -n rate\t set sampling rate\n" @@ -520,6 +527,24 @@ pmcstat_show_usage(void) } /* + * At exit handler for top mode + */ + +void +pmcstat_topexit(void) +{ + if (!args.pa_toptty) + return; + + /* + * Shutdown ncurses. + */ + clrtoeol(); + refresh(); + endwin(); +} + +/* * Main */ @@ -535,6 +560,7 @@ main(int argc, char **argv) int graphdepth; int pipefd[2]; int use_cumulative_counts; + short cf, cb; uint32_t cpumask; char *end, *tmp; const char *errmsg, *graphfilename; @@ -570,6 +596,13 @@ main(int argc, char **argv) args.pa_mapfilename = NULL; args.pa_inputpath = NULL; args.pa_outputpath = NULL; + args.pa_pplugin = PMCSTAT_PL_NONE; + args.pa_plugin = PMCSTAT_PL_NONE; + args.pa_ctdumpinstr = 1; + args.pa_topmode = PMCSTAT_TOP_DELTA; + args.pa_toptty = 0; + args.pa_topcolor = 0; + args.pa_mergepmc = 0; STAILQ_INIT(&args.pa_events); SLIST_INIT(&args.pa_targets); bzero(&ds_start, sizeof(ds_start)); @@ -594,7 +627,7 @@ main(int argc, char **argv) } while ((option = getopt(argc, argv, - "CD:EG:M:NO:P:R:S:Wc:dgk:m:n:o:p:qr:s:t:vw:z:")) != -1) + "CD:EF:G:M:NO:P:R:S:TWc:df:gk:m:n:o:p:qr:s:t:vw:z:")) != -1) switch (option) { case 'C': /* cumulative values */ use_cumulative_counts = !use_cumulative_counts; @@ -628,13 +661,28 @@ main(int argc, char **argv) args.pa_required |= FLAG_HAS_PROCESS_PMCS; break; + case 'F': /* produce a system-wide calltree */ + args.pa_flags |= FLAG_DO_CALLGRAPHS; + args.pa_plugin = PMCSTAT_PL_CALLTREE; + graphfilename = optarg; + break; + + case 'f': /* plugins options */ + if (args.pa_plugin == PMCSTAT_PL_NONE) + err(EX_USAGE, "ERROR: Need -g/-G/-m/-T."); + pmcstat_pluginconfigure_log(optarg); + break; + case 'G': /* produce a system-wide callgraph */ args.pa_flags |= FLAG_DO_CALLGRAPHS; + args.pa_plugin = PMCSTAT_PL_CALLGRAPH; graphfilename = optarg; break; case 'g': /* produce gprof compatible profiles */ args.pa_flags |= FLAG_DO_GPROF; + args.pa_pplugin = PMCSTAT_PL_CALLGRAPH; + args.pa_plugin = PMCSTAT_PL_GPROF; break; case 'k': /* pathname to the kernel */ @@ -645,8 +693,9 @@ main(int argc, char **argv) break; case 'm': - args.pa_flags |= FLAG_WANTS_MAPPINGS; - graphfilename = optarg; + args.pa_flags |= FLAG_DO_ANNOTATE; + args.pa_plugin = PMCSTAT_PL_ANNOTATE; + graphfilename = optarg; break; case 'E': /* log process exit */ @@ -732,7 +781,7 @@ main(int argc, char **argv) STAILQ_INSERT_TAIL(&args.pa_events, ev, ev_next); if (option == 's' || option == 'S') - pmcstat_clone_event_descriptor(&args, ev, + pmcstat_clone_event_descriptor(ev, cpumask & ~(1 << ev->ev_cpu)); break; @@ -782,12 +831,21 @@ main(int argc, char **argv) break; case 't': /* target pid or process name */ - pmcstat_find_targets(&args, optarg); + pmcstat_find_targets(optarg); args.pa_flags |= FLAG_HAS_TARGET; args.pa_required |= FLAG_HAS_PROCESS_PMCS; break; + case 'T': /* top mode */ + args.pa_flags |= FLAG_DO_TOP; + args.pa_plugin = PMCSTAT_PL_CALLGRAPH; + args.pa_ctdumpinstr = 0; + args.pa_mergepmc = 1; + if (args.pa_printfile == stderr) + args.pa_printfile = stdout; + break; + case 'v': /* verbose */ args.pa_verbosity++; break; @@ -798,7 +856,6 @@ main(int argc, char **argv) errx(EX_USAGE, "ERROR: Illegal wait interval " "value \"%s\".", optarg); args.pa_flags |= FLAG_HAS_WAIT_INTERVAL; - args.pa_required |= FLAG_HAS_COUNTING_PMCS; args.pa_interval = interval; break; @@ -833,7 +890,7 @@ main(int argc, char **argv) args.pa_flags |= FLAG_HAS_COMMANDLINE; if (args.pa_flags & (FLAG_DO_GPROF | FLAG_DO_CALLGRAPHS | - FLAG_WANTS_MAPPINGS)) + FLAG_DO_ANNOTATE | FLAG_DO_TOP)) args.pa_flags |= FLAG_DO_ANALYSIS; /* @@ -846,11 +903,11 @@ main(int argc, char **argv) "exclusive."); /* -m option is allowed with -R only. */ - if (args.pa_flags & FLAG_WANTS_MAPPINGS && args.pa_inputpath == NULL) + if (args.pa_flags & FLAG_DO_ANNOTATE && args.pa_inputpath == NULL) errx(EX_USAGE, "ERROR: option -m requires an input file"); /* -m option is not allowed combined with -g or -G. */ - if (args.pa_flags & FLAG_WANTS_MAPPINGS && + if (args.pa_flags & FLAG_DO_ANNOTATE && args.pa_flags & (FLAG_DO_GPROF | FLAG_DO_CALLGRAPHS)) errx(EX_USAGE, "ERROR: option -m and -g | -G are mutually " "exclusive"); @@ -904,7 +961,7 @@ main(int argc, char **argv) /* check for counting mode options without a counting PMC */ if ((args.pa_required & FLAG_HAS_COUNTING_PMCS) && (args.pa_flags & FLAG_HAS_COUNTING_PMCS) == 0) - errx(EX_USAGE, "ERROR: options -C, -W, -o and -w require at " + errx(EX_USAGE, "ERROR: options -C, -W and -o require at " "least one counting mode PMC to be specified."); /* check for sampling mode options without a sampling PMC spec */ @@ -913,10 +970,10 @@ main(int argc, char **argv) errx(EX_USAGE, "ERROR: options -N, -n and -O require at " "least one sampling mode PMC to be specified."); - /* check if -g/-G are being used correctly */ + /* check if -g/-G/-m/-T are being used correctly */ if ((args.pa_flags & FLAG_DO_ANALYSIS) && !(args.pa_flags & (FLAG_HAS_SAMPLING_PMCS|FLAG_READ_LOGFILE))) - errx(EX_USAGE, "ERROR: options -g/-G require sampling PMCs " + errx(EX_USAGE, "ERROR: options -g/-G/-m/-T require sampling PMCs " "or -R to be specified."); /* check if -O was spuriously specified */ @@ -926,11 +983,11 @@ main(int argc, char **argv) "ERROR: option -O is used only with options " "-E, -P, -S and -W."); - /* -k kernel path require -g/-G or -R */ + /* -k kernel path require -g/-G/-m/-T or -R */ if ((args.pa_flags & FLAG_HAS_KERNELPATH) && (args.pa_flags & FLAG_DO_ANALYSIS) == 0 && (args.pa_flags & FLAG_READ_LOGFILE) == 0) - errx(EX_USAGE, "ERROR: option -k is only used with -g/-R."); + errx(EX_USAGE, "ERROR: option -k is only used with -g/-R/-m/-T."); /* -D only applies to gprof output mode (-g) */ if ((args.pa_flags & FLAG_HAS_SAMPLESDIR) && @@ -943,6 +1000,11 @@ main(int argc, char **argv) (args.pa_flags & FLAG_READ_LOGFILE) == 0) errx(EX_USAGE, "ERROR: option -M is only used with -g/-R."); + /* -T is incompatible with -R (replay logfile is a TODO) */ + if ((args.pa_flags & FLAG_DO_TOP) && + (args.pa_flags & FLAG_READ_LOGFILE)) + errx(EX_USAGE, "ERROR: option -T is incompatible with -R."); + /* * Disallow textual output of sampling PMCs if counting PMCs * have also been asked for, mostly because the combined output @@ -996,7 +1058,7 @@ main(int argc, char **argv) "for writing", graphfilename); } } - if (args.pa_flags & FLAG_WANTS_MAPPINGS) { + if (args.pa_flags & FLAG_DO_ANNOTATE) { args.pa_graphfile = fopen(graphfilename, "w"); if (args.pa_graphfile == NULL) err(EX_OSERR, "ERROR: cannot open \"%s\" for writing", @@ -1012,13 +1074,13 @@ main(int argc, char **argv) if ((args.pa_flags & FLAG_DO_ANALYSIS) == 0) args.pa_flags |= FLAG_DO_PRINT; - pmcstat_initialize_logging(&args); + pmcstat_initialize_logging(); args.pa_logfd = pmcstat_open_log(args.pa_inputpath, PMCSTAT_OPEN_FOR_READ); if ((args.pa_logparser = pmclog_open(args.pa_logfd)) == NULL) err(EX_OSERR, "ERROR: Cannot create parser"); - pmcstat_process_log(&args); - pmcstat_shutdown_logging(&args); + pmcstat_process_log(); + pmcstat_shutdown_logging(); exit(EX_OK); } @@ -1062,7 +1124,9 @@ main(int argc, char **argv) args.pa_logfd = pipefd[WRITEPIPEFD]; - args.pa_flags |= (FLAG_HAS_PIPE | FLAG_DO_PRINT); + args.pa_flags |= FLAG_HAS_PIPE; + if ((args.pa_flags & FLAG_DO_TOP) == 0) + args.pa_flags |= FLAG_DO_PRINT; args.pa_logparser = pmclog_open(pipefd[READPIPEFD]); } @@ -1126,12 +1190,24 @@ main(int argc, char **argv) err(EX_OSERR, "ERROR: Cannot determine window size"); pmcstat_displayheight = ws.ws_row - 1; + pmcstat_displaywidth = ws.ws_col - 1; EV_SET(&kev, SIGWINCH, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL); if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0) err(EX_OSERR, "ERROR: Cannot register kevent for " "SIGWINCH"); + + args.pa_toptty = 1; + } + + /* + * Listen to key input in top mode. + */ + if (args.pa_flags & FLAG_DO_TOP) { + EV_SET(&kev, fileno(stdin), EVFILT_READ, EV_ADD, 0, 0, NULL); + if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0) + err(EX_OSERR, "ERROR: Cannot register kevent"); } EV_SET(&kev, SIGINT, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL); @@ -1152,9 +1228,13 @@ main(int argc, char **argv) if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0) err(EX_OSERR, "ERROR: Cannot register kevent for SIGCHLD"); - /* setup a timer if we have counting mode PMCs needing to be printed */ - if ((args.pa_flags & FLAG_HAS_COUNTING_PMCS) && - (args.pa_required & FLAG_HAS_OUTPUT_LOGFILE) == 0) { + /* + * Setup a timer if we have counting mode PMCs needing to be printed or + * top mode plugin is active. + */ + if (((args.pa_flags & FLAG_HAS_COUNTING_PMCS) && + (args.pa_required & FLAG_HAS_OUTPUT_LOGFILE) == 0) || + (args.pa_flags & FLAG_DO_TOP)) { EV_SET(&kev, 0, EVFILT_TIMER, EV_ADD, 0, args.pa_interval * 1000, NULL); @@ -1165,7 +1245,7 @@ main(int argc, char **argv) /* attach PMCs to the target process, starting it if specified */ if (args.pa_flags & FLAG_HAS_COMMANDLINE) - pmcstat_create_process(&args); + pmcstat_create_process(); if (check_driver_stats && pmc_get_driver_stats(&ds_start) < 0) err(EX_OSERR, "ERROR: Cannot retrieve driver statistics"); @@ -1176,7 +1256,7 @@ main(int argc, char **argv) errx(EX_DATAERR, "ERROR: No matching target " "processes."); if (args.pa_flags & FLAG_HAS_PROCESS_PMCS) - pmcstat_attach_pmcs(&args); + pmcstat_attach_pmcs(); if (pmcstat_kvm) { kvm_close(pmcstat_kvm); @@ -1185,16 +1265,16 @@ main(int argc, char **argv) } /* start the pmcs */ - pmcstat_start_pmcs(&args); + pmcstat_start_pmcs(); /* start the (commandline) process if needed */ if (args.pa_flags & FLAG_HAS_COMMANDLINE) pmcstat_start_process(); /* initialize logging if printing the configured log */ - if ((args.pa_flags & FLAG_DO_PRINT) && + if ((args.pa_flags & (FLAG_DO_PRINT | FLAG_DO_TOP)) && (args.pa_flags & (FLAG_HAS_PIPE | FLAG_HAS_OUTPUT_LOGFILE))) - pmcstat_initialize_logging(&args); + pmcstat_initialize_logging(); /* Handle SIGINT using the kqueue loop */ sa.sa_handler = SIG_IGN; @@ -1205,6 +1285,37 @@ main(int argc, char **argv) err(EX_OSERR, "ERROR: Cannot install signal handler"); /* + * Setup the top mode display. + */ + if (args.pa_flags & FLAG_DO_TOP) { + args.pa_flags &= ~FLAG_DO_PRINT; + + if (args.pa_toptty) { + /* + * Init ncurses. + */ + initscr(); + if(has_colors() == TRUE) { + args.pa_topcolor = 1; + start_color(); + use_default_colors(); + pair_content(0, &cf, &cb); + init_pair(1, COLOR_RED, cb); + init_pair(2, COLOR_YELLOW, cb); + init_pair(3, COLOR_GREEN, cb); + } + cbreak(); + noecho(); + nonl(); + nodelay(stdscr, 1); + intrflush(stdscr, FALSE); + keypad(stdscr, TRUE); + clear(); + atexit(pmcstat_topexit); + } + } + + /* * loop till either the target process (if any) exits, or we * are killed by a SIGINT. */ @@ -1225,14 +1336,18 @@ main(int argc, char **argv) case EVFILT_PROC: /* target has exited */ if (args.pa_flags & (FLAG_HAS_OUTPUT_LOGFILE | FLAG_HAS_PIPE)) - runstate = pmcstat_close_log(&args); + runstate = pmcstat_close_log(); else runstate = PMCSTAT_FINISHED; do_print = 1; break; case EVFILT_READ: /* log file data is present */ - runstate = pmcstat_process_log(&args); + if (kev.ident == (unsigned)fileno(stdin)) { + if (pmcstat_keypress_log()) + runstate = pmcstat_close_log(); + } else + runstate = pmcstat_process_log(); break; case EVFILT_SIGNAL: @@ -1253,17 +1368,17 @@ main(int argc, char **argv) */ if (args.pa_flags & (FLAG_HAS_OUTPUT_LOGFILE | FLAG_HAS_PIPE)) { - runstate = pmcstat_close_log(&args); + runstate = pmcstat_close_log(); if (args.pa_flags & (FLAG_DO_PRINT|FLAG_DO_ANALYSIS)) - pmcstat_process_log(&args); + pmcstat_process_log(); } do_print = 1; /* print PMCs at exit */ runstate = PMCSTAT_FINISHED; } else if (kev.ident == SIGINT) { /* Kill the child process if we started it */ if (args.pa_flags & FLAG_HAS_COMMANDLINE) - pmcstat_kill_process(&args); + pmcstat_kill_process(); /* Close the pipe to self, if present. */ if (args.pa_flags & FLAG_HAS_PIPE) (void) close(pipefd[READPIPEFD]); @@ -1274,6 +1389,7 @@ main(int argc, char **argv) err(EX_OSERR, "ERROR: Cannot determine " "window size"); pmcstat_displayheight = ws.ws_row - 1; + pmcstat_displaywidth = ws.ws_col - 1; } else assert(0); @@ -1285,22 +1401,30 @@ main(int argc, char **argv) } - if (do_print && - (args.pa_required & FLAG_HAS_OUTPUT_LOGFILE) == 0) { - pmcstat_print_pmcs(&args); - if (runstate == PMCSTAT_FINISHED && /* final newline */ - (args.pa_flags & FLAG_DO_PRINT) == 0) - (void) fprintf(args.pa_printfile, "\n"); + if (do_print) { + if ((args.pa_required & FLAG_HAS_OUTPUT_LOGFILE) == 0) { + pmcstat_print_pmcs(); + if (runstate == PMCSTAT_FINISHED && /* final newline */ + (args.pa_flags & FLAG_DO_PRINT) == 0) + (void) fprintf(args.pa_printfile, "\n"); + } + if (args.pa_flags & FLAG_DO_TOP) + pmcstat_display_log(); do_print = 0; } } while (runstate != PMCSTAT_FINISHED); + if ((args.pa_flags & FLAG_DO_TOP) && args.pa_toptty) { + pmcstat_topexit(); + args.pa_toptty = 0; + } + /* flush any pending log entries */ if (args.pa_flags & (FLAG_HAS_OUTPUT_LOGFILE | FLAG_HAS_PIPE)) pmc_flush_logfile(); - pmcstat_cleanup(&args); + pmcstat_cleanup(); free(args.pa_kernel); diff --git a/usr.sbin/pmcstat/pmcstat.h b/usr.sbin/pmcstat/pmcstat.h index 5d6f5f2..1bb7862 100644 --- a/usr.sbin/pmcstat/pmcstat.h +++ b/usr.sbin/pmcstat/pmcstat.h @@ -47,13 +47,15 @@ #define FLAG_HAS_SAMPLESDIR 0x00000800 /* -D dir */ #define FLAG_HAS_KERNELPATH 0x00001000 /* -k kernel */ #define FLAG_DO_PRINT 0x00002000 /* -o */ -#define FLAG_DO_CALLGRAPHS 0x00004000 /* -G */ -#define FLAG_DO_ANALYSIS 0x00008000 /* -g or -G */ -#define FLAG_WANTS_MAPPINGS 0x00010000 /* -m */ +#define FLAG_DO_CALLGRAPHS 0x00004000 /* -G or -F */ +#define FLAG_DO_ANNOTATE 0x00008000 /* -m */ +#define FLAG_DO_TOP 0x00010000 /* -T */ +#define FLAG_DO_ANALYSIS 0x00020000 /* -g or -G or -m or -T */ #define DEFAULT_SAMPLE_COUNT 65536 #define DEFAULT_WAIT_INTERVAL 5.0 -#define DEFAULT_DISPLAY_HEIGHT 23 +#define DEFAULT_DISPLAY_HEIGHT 256 /* file virtual height */ +#define DEFAULT_DISPLAY_WIDTH 1024 /* file virtual width */ #define DEFAULT_BUFFER_SIZE 4096 #define DEFAULT_CALLGRAPH_DEPTH 4 @@ -75,12 +77,24 @@ #define PMCSTAT_LDD_COMMAND "/usr/bin/ldd" -#define PMCSTAT_PRINT_ENTRY(A,T,...) do { \ - (void) fprintf((A)->pa_printfile, "%-9s", T); \ - (void) fprintf((A)->pa_printfile, " " __VA_ARGS__); \ - (void) fprintf((A)->pa_printfile, "\n"); \ +#define PMCSTAT_PRINT_ENTRY(T,...) do { \ + (void) fprintf(args.pa_printfile, "%-9s", T); \ + (void) fprintf(args.pa_printfile, " " __VA_ARGS__); \ + (void) fprintf(args.pa_printfile, "\n"); \ } while (0) +#define PMCSTAT_PL_NONE 0 +#define PMCSTAT_PL_CALLGRAPH 1 +#define PMCSTAT_PL_GPROF 2 +#define PMCSTAT_PL_ANNOTATE 3 +#define PMCSTAT_PL_CALLTREE 4 + +#define PMCSTAT_TOP_DELTA 0 +#define PMCSTAT_TOP_ACCUM 1 + +#define min(A,B) ((A) < (B) ? (A) : (B)) +#define max(A,B) ((A) > (B) ? (A) : (B)) + enum pmcstat_state { PMCSTAT_FINISHED = 0, PMCSTAT_EXITING = 1, @@ -110,6 +124,8 @@ struct pmcstat_target { struct pmcstat_args { int pa_flags; /* argument flags */ int pa_required; /* required features */ + int pa_pplugin; /* pre-processing plugin */ + int pa_plugin; /* analysis plugin */ int pa_verbosity; /* verbosity level */ FILE *pa_printfile; /* where to send printed output */ int pa_logfd; /* output log file */ @@ -124,31 +140,44 @@ struct pmcstat_args { int pa_graphdepth; /* print depth for callgraphs */ double pa_interval; /* printing interval in seconds */ uint32_t pa_cpumask; /* filter for CPUs analysed */ + int pa_ctdumpinstr; /* dump instructions with calltree */ + int pa_topmode; /* delta or accumulative */ + int pa_toptty; /* output to tty or file */ + int pa_topcolor; /* terminal support color */ + int pa_mergepmc; /* merge PMC with same name */ int pa_argc; char **pa_argv; STAILQ_HEAD(, pmcstat_ev) pa_events; SLIST_HEAD(, pmcstat_target) pa_targets; -} args; +}; + +extern int pmcstat_displayheight; /* current terminal height */ +extern int pmcstat_displaywidth; /* current terminal width */ +extern struct pmcstat_args args; /* command line args */ /* Function prototypes */ -void pmcstat_attach_pmcs(struct pmcstat_args *_a); -void pmcstat_cleanup(struct pmcstat_args *_a); -void pmcstat_clone_event_descriptor(struct pmcstat_args *_a, +void pmcstat_attach_pmcs(void); +void pmcstat_cleanup(void); +void pmcstat_clone_event_descriptor( struct pmcstat_ev *_ev, uint32_t _cpumask); -int pmcstat_close_log(struct pmcstat_args *_a); -void pmcstat_create_process(struct pmcstat_args *_a); -void pmcstat_find_targets(struct pmcstat_args *_a, const char *_arg); -void pmcstat_initialize_logging(struct pmcstat_args *_a); -void pmcstat_kill_process(struct pmcstat_args *_a); +int pmcstat_close_log(void); +void pmcstat_create_process(void); +void pmcstat_find_targets(const char *_arg); +void pmcstat_initialize_logging(void); +void pmcstat_kill_process(void); int pmcstat_open_log(const char *_p, int _mode); -void pmcstat_print_counters(struct pmcstat_args *_a); -void pmcstat_print_headers(struct pmcstat_args *_a); -void pmcstat_print_pmcs(struct pmcstat_args *_a); +void pmcstat_print_counters(void); +void pmcstat_print_headers(void); +void pmcstat_print_pmcs(void); void pmcstat_show_usage(void); -void pmcstat_shutdown_logging(struct pmcstat_args *_a); -void pmcstat_start_pmcs(struct pmcstat_args *_a); +void pmcstat_shutdown_logging(void); +void pmcstat_start_pmcs(void); void pmcstat_start_process(void); -int pmcstat_process_log(struct pmcstat_args *_a); +int pmcstat_process_log(void); +int pmcstat_keypress_log(void); +void pmcstat_display_log(void); +void pmcstat_pluginconfigure_log(char *_opt); uint32_t pmcstat_get_cpumask(const char *_a); +void pmcstat_topexit(void); #endif /* _PMCSTAT_H_ */ diff --git a/usr.sbin/pmcstat/pmcstat_log.c b/usr.sbin/pmcstat/pmcstat_log.c index a403852..5811af3 100644 --- a/usr.sbin/pmcstat/pmcstat_log.c +++ b/usr.sbin/pmcstat/pmcstat_log.c @@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -68,9 +69,8 @@ __FBSDID("$FreeBSD$"); #include #include "pmcstat.h" - -#define min(A,B) ((A) < (B) ? (A) : (B)) -#define max(A,B) ((A) > (B) ? (A) : (B)) +#include "pmcstat_log.h" +#include "pmcstat_top.h" #define PMCSTAT_ALLOCATE 1 @@ -81,6 +81,7 @@ __FBSDID("$FreeBSD$"); * pmcstat_shutdown_logging() orderly shutdown, called last * pmcstat_open_log() open an eventlog for processing * pmcstat_process_log() print/convert an event log + * pmcstat_display_log() top mode display for the log * pmcstat_close_log() finish processing an event log * * IMPLEMENTATION NOTES @@ -127,236 +128,125 @@ __FBSDID("$FreeBSD$"); * also given a 'rank' that reflects its depth in the call stack. */ -typedef const void *pmcstat_interned_string; - -/* - * 'pmcstat_pmcrecord' is a mapping from PMC ids to human-readable - * names. - */ - -struct pmcstat_pmcrecord { - LIST_ENTRY(pmcstat_pmcrecord) pr_next; - pmc_id_t pr_pmcid; - pmcstat_interned_string pr_pmcname; -}; - -static LIST_HEAD(,pmcstat_pmcrecord) pmcstat_pmcs = - LIST_HEAD_INITIALIZER(pmcstat_pmcs); - - -/* - * struct pmcstat_gmonfile tracks a given 'gmon.out' file. These - * files are mmap()'ed in as needed. - */ - -struct pmcstat_gmonfile { - LIST_ENTRY(pmcstat_gmonfile) pgf_next; /* list of entries */ - int pgf_overflow; /* whether a count overflowed */ - pmc_id_t pgf_pmcid; /* id of the associated pmc */ - size_t pgf_nbuckets; /* #buckets in this gmon.out */ - unsigned int pgf_nsamples; /* #samples in this gmon.out */ - pmcstat_interned_string pgf_name; /* pathname of gmon.out file */ - size_t pgf_ndatabytes; /* number of bytes mapped */ - void *pgf_gmondata; /* pointer to mmap'ed data */ - FILE *pgf_file; /* used when writing gmon arcs */ -}; - -/* - * A 'pmcstat_image' structure describes an executable program on - * disk. 'pi_execpath' is a cookie representing the pathname of - * the executable. 'pi_start' and 'pi_end' are the least and greatest - * virtual addresses for the text segments in the executable. - * 'pi_gmonlist' contains a linked list of gmon.out files associated - * with this image. - */ - -enum pmcstat_image_type { - PMCSTAT_IMAGE_UNKNOWN = 0, /* never looked at the image */ - PMCSTAT_IMAGE_INDETERMINABLE, /* can't tell what the image is */ - PMCSTAT_IMAGE_ELF32, /* ELF 32 bit object */ - PMCSTAT_IMAGE_ELF64, /* ELF 64 bit object */ - PMCSTAT_IMAGE_AOUT /* AOUT object */ -}; - -struct pmcstat_image { - LIST_ENTRY(pmcstat_image) pi_next; /* hash link */ - TAILQ_ENTRY(pmcstat_image) pi_lru; /* LRU list */ - pmcstat_interned_string pi_execpath; /* cookie */ - pmcstat_interned_string pi_samplename; /* sample path name */ - pmcstat_interned_string pi_fullpath; /* path to FS object */ - - enum pmcstat_image_type pi_type; /* executable type */ - - /* - * Executables have pi_start and pi_end; these are zero - * for shared libraries. - */ - uintfptr_t pi_start; /* start address (inclusive) */ - uintfptr_t pi_end; /* end address (exclusive) */ - uintfptr_t pi_entry; /* entry address */ - uintfptr_t pi_vaddr; /* virtual address where loaded */ - int pi_isdynamic; /* whether a dynamic object */ - int pi_iskernelmodule; - pmcstat_interned_string pi_dynlinkerpath; /* path in .interp */ - - /* All symbols associated with this object. */ - struct pmcstat_symbol *pi_symbols; - size_t pi_symcount; - - /* - * An image can be associated with one or more gmon.out files; - * one per PMC. - */ - LIST_HEAD(,pmcstat_gmonfile) pi_gmlist; -}; +struct pmcstat_pmcs pmcstat_pmcs = LIST_HEAD_INITIALIZER(pmcstat_pmcs); /* * All image descriptors are kept in a hash table. */ -static LIST_HEAD(,pmcstat_image) pmcstat_image_hash[PMCSTAT_NHASH]; - -/* - * A 'pmcstat_pcmap' structure maps a virtual address range to an - * underlying 'pmcstat_image' descriptor. - */ -struct pmcstat_pcmap { - TAILQ_ENTRY(pmcstat_pcmap) ppm_next; - uintfptr_t ppm_lowpc; - uintfptr_t ppm_highpc; - struct pmcstat_image *ppm_image; -}; +struct pmcstat_image_hash_list pmcstat_image_hash[PMCSTAT_NHASH]; /* - * A 'pmcstat_process' structure models processes. Each process is - * associated with a set of pmcstat_pcmap structures that map - * addresses inside it to executable objects. This set is implemented - * as a list, kept sorted in ascending order of mapped addresses. - * - * 'pp_pid' holds the pid of the process. When a process exits, the - * 'pp_isactive' field is set to zero, but the process structure is - * not immediately reclaimed because there may still be samples in the - * log for this process. + * All process descriptors are kept in a hash table. */ +struct pmcstat_process_hash_list pmcstat_process_hash[PMCSTAT_NHASH]; -struct pmcstat_process { - LIST_ENTRY(pmcstat_process) pp_next; /* hash-next */ - pid_t pp_pid; /* associated pid */ - int pp_isactive; /* whether active */ - uintfptr_t pp_entryaddr; /* entry address */ - TAILQ_HEAD(,pmcstat_pcmap) pp_map; /* address range map */ -}; +struct pmcstat_stats pmcstat_stats; /* statistics */ -/* - * All process descriptors are kept in a hash table. - */ -static LIST_HEAD(,pmcstat_process) pmcstat_process_hash[PMCSTAT_NHASH]; +struct pmcstat_process *pmcstat_kernproc; /* kernel 'process' */ -static struct pmcstat_process *pmcstat_kernproc; /* kernel 'process' */ +#include "pmcpl_gprof.h" +#include "pmcpl_callgraph.h" +#include "pmcpl_annotate.h" +#include "pmcpl_calltree.h" -/* - * Each function symbol tracked by pmcstat(8). - */ +struct pmc_plugins { + const char *pl_name; /* name */ -struct pmcstat_symbol { - pmcstat_interned_string ps_name; - uint64_t ps_start; - uint64_t ps_end; -}; + /* configure */ + int (*pl_configure)(char *opt); -/* - * Each call graph node is tracked by a pmcstat_cgnode struct. - */ + /* init and shutdown */ + int (*pl_init)(void); + void (*pl_shutdown)(FILE *mf); -struct pmcstat_cgnode { - struct pmcstat_image *pcg_image; - uintfptr_t pcg_func; - uint32_t pcg_count; - uint32_t pcg_nchildren; - LIST_ENTRY(pmcstat_cgnode) pcg_sibling; - LIST_HEAD(,pmcstat_cgnode) pcg_children; -}; + /* sample processing */ + void (*pl_process)(struct pmcstat_process *pp, + struct pmcstat_pmcrecord *pmcr, uint32_t nsamples, + uintfptr_t *cc, int usermode, uint32_t cpu); -struct pmcstat_cgnode_hash { - struct pmcstat_cgnode *pch_cgnode; - uint32_t pch_pmcid; - LIST_ENTRY(pmcstat_cgnode_hash) pch_next; -}; + /* image */ + void (*pl_initimage)(struct pmcstat_image *pi); + void (*pl_shutdownimage)(struct pmcstat_image *pi); -static int pmcstat_cgnode_hash_count; -static pmcstat_interned_string pmcstat_previous_filename_printed; + /* pmc */ + void (*pl_newpmc)(pmcstat_interned_string ps, + struct pmcstat_pmcrecord *pr); + + /* top display */ + void (*pl_topdisplay)(void); -/* - * The toplevel CG nodes (i.e., with rank == 0) are placed in a hash table. - */ + /* top keypress */ + int (*pl_topkeypress)(int c, WINDOW *w); -static LIST_HEAD(,pmcstat_cgnode_hash) pmcstat_cgnode_hash[PMCSTAT_NHASH]; +} plugins[] = { + { + .pl_name = "none", + }, + { + .pl_name = "callgraph", + .pl_init = pmcpl_cg_init, + .pl_shutdown = pmcpl_cg_shutdown, + .pl_process = pmcpl_cg_process, + .pl_topkeypress = pmcpl_cg_topkeypress, + .pl_topdisplay = pmcpl_cg_topdisplay + }, + { + .pl_name = "gprof", + .pl_shutdown = pmcpl_gmon_shutdown, + .pl_process = pmcpl_gmon_process, + .pl_initimage = pmcpl_gmon_initimage, + .pl_shutdownimage = pmcpl_gmon_shutdownimage, + .pl_newpmc = pmcpl_gmon_newpmc + }, + { + .pl_name = "annotate", + .pl_process = pmcpl_annotate_process + }, + { + .pl_name = "calltree", + .pl_configure = pmcpl_ct_configure, + .pl_init = pmcpl_ct_init, + .pl_shutdown = pmcpl_ct_shutdown, + .pl_process = pmcpl_ct_process, + .pl_topkeypress = pmcpl_ct_topkeypress, + .pl_topdisplay = pmcpl_ct_topdisplay + }, + { + .pl_name = NULL + } +}; -/* Misc. statistics */ -static struct pmcstat_stats { - int ps_exec_aout; /* # a.out executables seen */ - int ps_exec_elf; /* # elf executables seen */ - int ps_exec_errors; /* # errors processing executables */ - int ps_exec_indeterminable; /* # unknown executables seen */ - int ps_samples_total; /* total number of samples processed */ - int ps_samples_skipped; /* #samples filtered out for any reason */ - int ps_samples_unknown_offset; /* #samples of rank 0 not in a map */ - int ps_samples_indeterminable; /* #samples in indeterminable images */ - int ps_callchain_dubious_frames;/* #dubious frame pointers seen */ -} pmcstat_stats; +int pmcstat_mergepmc; +int pmcstat_pmcinfilter = 0; /* PMC filter for top mode. */ +float pmcstat_threshold = 0.5; /* Cost filter for top mode. */ /* * Prototypes */ -static void pmcstat_gmon_create_file(struct pmcstat_gmonfile *_pgf, - struct pmcstat_image *_image); -static pmcstat_interned_string pmcstat_gmon_create_name(const char *_sd, - struct pmcstat_image *_img, pmc_id_t _pmcid); -static void pmcstat_gmon_map_file(struct pmcstat_gmonfile *_pgf); -static void pmcstat_gmon_unmap_file(struct pmcstat_gmonfile *_pgf); - -static void pmcstat_image_determine_type(struct pmcstat_image *_image, - struct pmcstat_args *_a); -static struct pmcstat_gmonfile *pmcstat_image_find_gmonfile(struct - pmcstat_image *_i, pmc_id_t _id); static struct pmcstat_image *pmcstat_image_from_path(pmcstat_interned_string _path, int _iskernelmodule); -static void pmcstat_image_get_aout_params(struct pmcstat_image *_image, - struct pmcstat_args *_a); -static void pmcstat_image_get_elf_params(struct pmcstat_image *_image, - struct pmcstat_args *_a); -static void pmcstat_image_increment_bucket(struct pmcstat_pcmap *_pcm, - uintfptr_t _pc, pmc_id_t _pmcid, struct pmcstat_args *_a); +static void pmcstat_image_get_aout_params(struct pmcstat_image *_image); +static void pmcstat_image_get_elf_params(struct pmcstat_image *_image); static void pmcstat_image_link(struct pmcstat_process *_pp, struct pmcstat_image *_i, uintfptr_t _lpc); static void pmcstat_pmcid_add(pmc_id_t _pmcid, - pmcstat_interned_string _name, struct pmcstat_args *_a); -static const char *pmcstat_pmcid_to_name(pmc_id_t _pmcid); + pmcstat_interned_string _name); static void pmcstat_process_aout_exec(struct pmcstat_process *_pp, - struct pmcstat_image *_image, uintfptr_t _entryaddr, - struct pmcstat_args *_a); + struct pmcstat_image *_image, uintfptr_t _entryaddr); static void pmcstat_process_elf_exec(struct pmcstat_process *_pp, - struct pmcstat_image *_image, uintfptr_t _entryaddr, - struct pmcstat_args *_a); + struct pmcstat_image *_image, uintfptr_t _entryaddr); static void pmcstat_process_exec(struct pmcstat_process *_pp, - pmcstat_interned_string _path, uintfptr_t _entryaddr, - struct pmcstat_args *_ao); + pmcstat_interned_string _path, uintfptr_t _entryaddr); static struct pmcstat_process *pmcstat_process_lookup(pid_t _pid, int _allocate); -static struct pmcstat_pcmap *pmcstat_process_find_map( - struct pmcstat_process *_p, uintfptr_t _pc); - static int pmcstat_string_compute_hash(const char *_string); static void pmcstat_string_initialize(void); -static pmcstat_interned_string pmcstat_string_intern(const char *_s); -static pmcstat_interned_string pmcstat_string_lookup(const char *_s); static int pmcstat_string_lookup_hash(pmcstat_interned_string _is); static void pmcstat_string_shutdown(void); -static const char *pmcstat_string_unintern(pmcstat_interned_string _is); - /* * A simple implementation of interned strings. Each interned string @@ -375,6 +265,16 @@ struct pmcstat_string { static LIST_HEAD(,pmcstat_string) pmcstat_string_hash[PMCSTAT_NHASH]; /* + * PMC count. + */ +int pmcstat_npmcs; + +/* + * PMC Top mode pause state. + */ +int pmcstat_pause; + +/* * Compute a 'hash' value for a string. */ @@ -394,7 +294,7 @@ pmcstat_string_compute_hash(const char *s) * interned structure. */ -static pmcstat_interned_string +pmcstat_interned_string pmcstat_string_intern(const char *s) { struct pmcstat_string *ps; @@ -416,7 +316,7 @@ pmcstat_string_intern(const char *s) return ((pmcstat_interned_string) ps); } -static const char * +const char * pmcstat_string_unintern(pmcstat_interned_string str) { const char *s; @@ -425,7 +325,7 @@ pmcstat_string_unintern(pmcstat_interned_string str) return (s); } -static pmcstat_interned_string +pmcstat_interned_string pmcstat_string_lookup(const char *s) { struct pmcstat_string *ps; @@ -483,163 +383,13 @@ pmcstat_string_shutdown(void) } /* - * Create a gmon.out file and size it. - */ - -static void -pmcstat_gmon_create_file(struct pmcstat_gmonfile *pgf, - struct pmcstat_image *image) -{ - int fd; - size_t count; - struct gmonhdr gm; - const char *pathname; - char buffer[DEFAULT_BUFFER_SIZE]; - - pathname = pmcstat_string_unintern(pgf->pgf_name); - if ((fd = open(pathname, O_RDWR|O_NOFOLLOW|O_CREAT, - S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)) < 0) - err(EX_OSERR, "ERROR: Cannot open \"%s\"", pathname); - - gm.lpc = image->pi_start; - gm.hpc = image->pi_end; - gm.ncnt = (pgf->pgf_nbuckets * sizeof(HISTCOUNTER)) + - sizeof(struct gmonhdr); - gm.version = GMONVERSION; - gm.profrate = 0; /* use ticks */ - gm.histcounter_type = 0; /* compatibility with moncontrol() */ - gm.spare[0] = gm.spare[1] = 0; - - /* Write out the gmon header */ - if (write(fd, &gm, sizeof(gm)) < 0) - goto error; - - /* Zero fill the samples[] array */ - (void) memset(buffer, 0, sizeof(buffer)); - - count = pgf->pgf_ndatabytes - sizeof(struct gmonhdr); - while (count > sizeof(buffer)) { - if (write(fd, &buffer, sizeof(buffer)) < 0) - goto error; - count -= sizeof(buffer); - } - - if (write(fd, &buffer, count) < 0) - goto error; - - (void) close(fd); - - return; - - error: - err(EX_OSERR, "ERROR: Cannot write \"%s\"", pathname); -} - -/* - * Determine the full pathname of a gmon.out file for a given - * (image,pmcid) combination. Return the interned string. - */ - -pmcstat_interned_string -pmcstat_gmon_create_name(const char *samplesdir, struct pmcstat_image *image, - pmc_id_t pmcid) -{ - const char *pmcname; - char fullpath[PATH_MAX]; - - pmcname = pmcstat_pmcid_to_name(pmcid); - - (void) snprintf(fullpath, sizeof(fullpath), - "%s/%s/%s", samplesdir, pmcname, - pmcstat_string_unintern(image->pi_samplename)); - - return (pmcstat_string_intern(fullpath)); -} - - -/* - * Mmap in a gmon.out file for processing. - */ - -static void -pmcstat_gmon_map_file(struct pmcstat_gmonfile *pgf) -{ - int fd; - const char *pathname; - - pathname = pmcstat_string_unintern(pgf->pgf_name); - - /* the gmon.out file must already exist */ - if ((fd = open(pathname, O_RDWR | O_NOFOLLOW, 0)) < 0) - err(EX_OSERR, "ERROR: cannot open \"%s\"", pathname); - - pgf->pgf_gmondata = mmap(NULL, pgf->pgf_ndatabytes, - PROT_READ|PROT_WRITE, MAP_NOSYNC|MAP_SHARED, fd, 0); - - if (pgf->pgf_gmondata == MAP_FAILED) - err(EX_OSERR, "ERROR: cannot map \"%s\"", pathname); - - (void) close(fd); -} - -/* - * Unmap a gmon.out file after sync'ing its data to disk. - */ - -static void -pmcstat_gmon_unmap_file(struct pmcstat_gmonfile *pgf) -{ - (void) msync(pgf->pgf_gmondata, pgf->pgf_ndatabytes, - MS_SYNC); - (void) munmap(pgf->pgf_gmondata, pgf->pgf_ndatabytes); - pgf->pgf_gmondata = NULL; -} - -static void -pmcstat_gmon_append_arc(struct pmcstat_image *image, pmc_id_t pmcid, - uintptr_t rawfrom, uintptr_t rawto, uint32_t count) -{ - struct rawarc arc; /* from */ - const char *pathname; - struct pmcstat_gmonfile *pgf; - - if ((pgf = pmcstat_image_find_gmonfile(image, pmcid)) == NULL) - return; - - if (pgf->pgf_file == NULL) { - pathname = pmcstat_string_unintern(pgf->pgf_name); - if ((pgf->pgf_file = fopen(pathname, "a")) == NULL) - return; - } - - arc.raw_frompc = rawfrom + image->pi_vaddr; - arc.raw_selfpc = rawto + image->pi_vaddr; - arc.raw_count = count; - - (void) fwrite(&arc, sizeof(arc), 1, pgf->pgf_file); - -} - -static struct pmcstat_gmonfile * -pmcstat_image_find_gmonfile(struct pmcstat_image *image, pmc_id_t pmcid) -{ - struct pmcstat_gmonfile *pgf; - LIST_FOREACH(pgf, &image->pi_gmlist, pgf_next) - if (pgf->pgf_pmcid == pmcid) - return (pgf); - return (NULL); -} - - -/* * Determine whether a given executable image is an A.OUT object, and * if so, fill in its parameters from the text file. * Sets image->pi_type. */ static void -pmcstat_image_get_aout_params(struct pmcstat_image *image, - struct pmcstat_args *a) +pmcstat_image_get_aout_params(struct pmcstat_image *image) { int fd; ssize_t nbytes; @@ -655,7 +405,7 @@ pmcstat_image_get_aout_params(struct pmcstat_image *image, "unsupported \"%s\"", path); (void) snprintf(buffer, sizeof(buffer), "%s%s", - a->pa_fsroot, path); + args.pa_fsroot, path); if ((fd = open(buffer, O_RDONLY, 0)) < 0 || (nbytes = read(fd, &ex, sizeof(ex))) < 0) { @@ -702,7 +452,7 @@ pmcstat_symbol_compare(const void *a, const void *b) * Map an address to a symbol in an image. */ -static struct pmcstat_symbol * +struct pmcstat_symbol * pmcstat_symbol_search(struct pmcstat_image *image, uintfptr_t addr) { struct pmcstat_symbol sym; @@ -825,12 +575,12 @@ pmcstat_image_add_symbols(struct pmcstat_image *image, Elf *e, */ static void -pmcstat_image_get_elf_params(struct pmcstat_image *image, - struct pmcstat_args *a) +pmcstat_image_get_elf_params(struct pmcstat_image *image) { int fd; size_t i, nph, nsh; const char *path, *elfbase; + char *p, *endp; uintfptr_t minva, maxva; Elf *e; Elf_Scn *scn; @@ -858,10 +608,10 @@ pmcstat_image_get_elf_params(struct pmcstat_image *image, */ if (image->pi_iskernelmodule) (void) snprintf(buffer, sizeof(buffer), "%s%s/%s", - a->pa_fsroot, a->pa_kernel, path); + args.pa_fsroot, args.pa_kernel, path); else (void) snprintf(buffer, sizeof(buffer), "%s%s", - a->pa_fsroot, path); + args.pa_fsroot, path); e = NULL; if ((fd = open(buffer, O_RDONLY, 0)) < 0 || @@ -960,6 +710,14 @@ pmcstat_image_get_elf_params(struct pmcstat_image *image, image->pi_type = image_type; image->pi_fullpath = pmcstat_string_intern(buffer); + /* Build display name + */ + endp = buffer; + for (p = buffer; *p; p++) + if (*p == '/') + endp = p+1; + image->pi_name = pmcstat_string_intern(endp); + done: (void) elf_end(e); if (fd >= 0) @@ -972,17 +730,16 @@ pmcstat_image_get_elf_params(struct pmcstat_image *image, * If no handler claims the image, set its type to 'INDETERMINABLE'. */ -static void -pmcstat_image_determine_type(struct pmcstat_image *image, - struct pmcstat_args *a) +void +pmcstat_image_determine_type(struct pmcstat_image *image) { assert(image->pi_type == PMCSTAT_IMAGE_UNKNOWN); /* Try each kind of handler in turn */ if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN) - pmcstat_image_get_elf_params(image, a); + pmcstat_image_get_elf_params(image); if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN) - pmcstat_image_get_aout_params(image, a); + pmcstat_image_get_aout_params(image); /* * Otherwise, remember that we tried to determine @@ -1006,10 +763,8 @@ static struct pmcstat_image * pmcstat_image_from_path(pmcstat_interned_string internedpath, int iskernelmodule) { - int count, hash, nlen; + int hash; struct pmcstat_image *pi; - char *sn; - char name[NAME_MAX]; hash = pmcstat_string_lookup_hash(internedpath); @@ -1038,50 +793,12 @@ pmcstat_image_from_path(pmcstat_interned_string internedpath, pi->pi_dynlinkerpath = NULL; pi->pi_symbols = NULL; pi->pi_symcount = 0; + pi->pi_addr2line = NULL; - /* - * Look for a suitable name for the sample files associated - * with this image: if `basename(path)`+".gmon" is available, - * we use that, otherwise we try iterating through - * `basename(path)`+ "~" + NNN + ".gmon" till we get a free - * entry. - */ - if ((sn = basename(pmcstat_string_unintern(internedpath))) == NULL) - err(EX_OSERR, "ERROR: Cannot process \"%s\"", - pmcstat_string_unintern(internedpath)); - - nlen = strlen(sn); - nlen = min(nlen, (int) (sizeof(name) - sizeof(".gmon"))); - - snprintf(name, sizeof(name), "%.*s.gmon", nlen, sn); - - /* try use the unabridged name first */ - if (pmcstat_string_lookup(name) == NULL) - pi->pi_samplename = pmcstat_string_intern(name); - else { - /* - * Otherwise use a prefix from the original name and - * upto 3 digits. - */ - nlen = strlen(sn); - nlen = min(nlen, (int) (sizeof(name)-sizeof("~NNN.gmon"))); - count = 0; - do { - if (++count > 999) - errx(EX_CANTCREAT, "ERROR: cannot create a " - "gmon file for \"%s\"", name); - snprintf(name, sizeof(name), "%.*s~%3.3d.gmon", - nlen, sn, count); - if (pmcstat_string_lookup(name) == NULL) { - pi->pi_samplename = - pmcstat_string_intern(name); - count = 0; - } - } while (count > 0); - } - - - LIST_INIT(&pi->pi_gmlist); + if (plugins[args.pa_pplugin].pl_initimage != NULL) + plugins[args.pa_pplugin].pl_initimage(pi); + if (plugins[args.pa_plugin].pl_initimage != NULL) + plugins[args.pa_plugin].pl_initimage(pi); LIST_INSERT_HEAD(&pmcstat_image_hash[hash], pi, pi_next); @@ -1089,94 +806,6 @@ pmcstat_image_from_path(pmcstat_interned_string internedpath, } /* - * Increment the bucket in the gmon.out file corresponding to 'pmcid' - * and 'pc'. - */ - -static void -pmcstat_image_increment_bucket(struct pmcstat_pcmap *map, uintfptr_t pc, - pmc_id_t pmcid, struct pmcstat_args *a) -{ - struct pmcstat_image *image; - struct pmcstat_gmonfile *pgf; - uintfptr_t bucket; - HISTCOUNTER *hc; - - assert(pc >= map->ppm_lowpc && pc < map->ppm_highpc); - - image = map->ppm_image; - - /* - * If this is the first time we are seeing a sample for - * this executable image, try determine its parameters. - */ - if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN) - pmcstat_image_determine_type(image, a); - - assert(image->pi_type != PMCSTAT_IMAGE_UNKNOWN); - - /* Ignore samples in images that we know nothing about. */ - if (image->pi_type == PMCSTAT_IMAGE_INDETERMINABLE) { - pmcstat_stats.ps_samples_indeterminable++; - return; - } - - /* - * Find the gmon file corresponding to 'pmcid', creating it if - * needed. - */ - pgf = pmcstat_image_find_gmonfile(image, pmcid); - if (pgf == NULL) { - if ((pgf = calloc(1, sizeof(*pgf))) == NULL) - err(EX_OSERR, "ERROR:"); - - pgf->pgf_gmondata = NULL; /* mark as unmapped */ - pgf->pgf_name = pmcstat_gmon_create_name(a->pa_samplesdir, - image, pmcid); - pgf->pgf_pmcid = pmcid; - assert(image->pi_end > image->pi_start); - pgf->pgf_nbuckets = (image->pi_end - image->pi_start) / - FUNCTION_ALIGNMENT; /* see */ - pgf->pgf_ndatabytes = sizeof(struct gmonhdr) + - pgf->pgf_nbuckets * sizeof(HISTCOUNTER); - pgf->pgf_nsamples = 0; - pgf->pgf_file = NULL; - - pmcstat_gmon_create_file(pgf, image); - - LIST_INSERT_HEAD(&image->pi_gmlist, pgf, pgf_next); - } - - /* - * Map the gmon file in if needed. It may have been mapped - * out under memory pressure. - */ - if (pgf->pgf_gmondata == NULL) - pmcstat_gmon_map_file(pgf); - - assert(pgf->pgf_gmondata != NULL); - - /* - * - */ - - bucket = (pc - map->ppm_lowpc) / FUNCTION_ALIGNMENT; - - assert(bucket < pgf->pgf_nbuckets); - - hc = (HISTCOUNTER *) ((uintptr_t) pgf->pgf_gmondata + - sizeof(struct gmonhdr)); - - /* saturating add */ - if (hc[bucket] < 0xFFFFU) /* XXX tie this to sizeof(HISTCOUNTER) */ - hc[bucket]++; - else /* mark that an overflow occurred */ - pgf->pgf_overflow = 1; - - pgf->pgf_nsamples++; -} - -/* * Record the fact that PC values from 'start' to 'end' come from * image 'image'. */ @@ -1284,72 +913,181 @@ pmcstat_image_unmap(struct pmcstat_process *pp, uintfptr_t start, } /* + * Resolve file name and line number for the given address. + */ +int +pmcstat_image_addr2line(struct pmcstat_image *image, uintfptr_t addr, + char *sourcefile, size_t sourcefile_len, unsigned *sourceline, + char *funcname, size_t funcname_len) +{ + static int addr2line_warn = 0; + + char *sep, cmdline[PATH_MAX], imagepath[PATH_MAX]; + int fd; + + if (image->pi_addr2line == NULL) { + snprintf(imagepath, sizeof(imagepath), "%s.symbols", + pmcstat_string_unintern(image->pi_fullpath)); + fd = open(imagepath, O_RDONLY); + if (fd < 0) { + snprintf(imagepath, sizeof(imagepath), "%s", + pmcstat_string_unintern(image->pi_fullpath)); + } else + close(fd); + snprintf(cmdline, sizeof(cmdline), "addr2line -Cfe \"%s\"", + imagepath); + image->pi_addr2line = popen(cmdline, "r+"); + if (image->pi_addr2line == NULL) { + if (!addr2line_warn) { + addr2line_warn = 1; + warnx("WARNING: addr2line is needed" + "for source code information."); + } + return (0); + } + } + + if (feof(image->pi_addr2line) || ferror(image->pi_addr2line)) { + warnx("WARNING: addr2line pipe error"); + pclose(image->pi_addr2line); + image->pi_addr2line = NULL; + return (0); + } + + fprintf(image->pi_addr2line, "%p\n", (void *)addr); + + if (fgets(funcname, funcname_len, image->pi_addr2line) == NULL) { + warnx("WARNING: addr2line function name read error"); + return (0); + } + sep = strchr(funcname, '\n'); + if (sep != NULL) + *sep = '\0'; + + if (fgets(sourcefile, sourcefile_len, image->pi_addr2line) == NULL) { + warnx("WARNING: addr2line source file read error"); + return (0); + } + sep = strchr(sourcefile, ':'); + if (sep == NULL) { + warnx("WARNING: addr2line source line separator missing"); + return (0); + } + *sep = '\0'; + *sourceline = atoi(sep+1); + if (*sourceline == 0) + return (0); + + return (1); +} + +/* * Add a {pmcid,name} mapping. */ static void -pmcstat_pmcid_add(pmc_id_t pmcid, pmcstat_interned_string ps, - struct pmcstat_args *a) +pmcstat_pmcid_add(pmc_id_t pmcid, pmcstat_interned_string ps) { - struct pmcstat_pmcrecord *pr; - struct stat st; - char fullpath[PATH_MAX]; + struct pmcstat_pmcrecord *pr, *prm; /* Replace an existing name for the PMC. */ + prm = NULL; LIST_FOREACH(pr, &pmcstat_pmcs, pr_next) - if (pr->pr_pmcid == pmcid) { - pr->pr_pmcname = ps; - return; - } + if (pr->pr_pmcid == pmcid) { + pr->pr_pmcname = ps; + return; + } else if (pr->pr_pmcname == ps) + prm = pr; /* - * Otherwise, allocate a new descriptor and create the - * appropriate directory to hold gmon.out files. + * Otherwise, allocate a new descriptor and call the + * plugins hook. */ if ((pr = malloc(sizeof(*pr))) == NULL) err(EX_OSERR, "ERROR: Cannot allocate pmc record"); pr->pr_pmcid = pmcid; pr->pr_pmcname = ps; - LIST_INSERT_HEAD(&pmcstat_pmcs, pr, pr_next); + pr->pr_pmcin = pmcstat_npmcs++; + pr->pr_merge = prm == NULL ? pr : prm; - (void) snprintf(fullpath, sizeof(fullpath), "%s/%s", a->pa_samplesdir, - pmcstat_string_unintern(ps)); - - /* If the path name exists, it should be a directory */ - if (stat(fullpath, &st) == 0 && S_ISDIR(st.st_mode)) - return; + LIST_INSERT_HEAD(&pmcstat_pmcs, pr, pr_next); - if (mkdir(fullpath, S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) < 0) - err(EX_OSERR, "ERROR: Cannot create directory \"%s\"", - fullpath); + if (plugins[args.pa_pplugin].pl_newpmc != NULL) + plugins[args.pa_pplugin].pl_newpmc(ps, pr); + if (plugins[args.pa_plugin].pl_newpmc != NULL) + plugins[args.pa_plugin].pl_newpmc(ps, pr); } /* * Given a pmcid in use, find its human-readable name. */ -static const char * +const char * pmcstat_pmcid_to_name(pmc_id_t pmcid) { struct pmcstat_pmcrecord *pr; - char fullpath[PATH_MAX]; LIST_FOREACH(pr, &pmcstat_pmcs, pr_next) if (pr->pr_pmcid == pmcid) return (pmcstat_string_unintern(pr->pr_pmcname)); - /* create a default name and add this entry */ - if ((pr = malloc(sizeof(*pr))) == NULL) - err(EX_OSERR, "ERROR: "); - pr->pr_pmcid = pmcid; + err(EX_SOFTWARE, "ERROR: cannot find pmcid"); + return NULL; +} - (void) snprintf(fullpath, sizeof(fullpath), "%X", (unsigned int) pmcid); - pr->pr_pmcname = pmcstat_string_intern(fullpath); +/* + * Convert PMC index to name. + */ - LIST_INSERT_HEAD(&pmcstat_pmcs, pr, pr_next); +const char * +pmcstat_pmcindex_to_name(int pmcin) +{ + struct pmcstat_pmcrecord *pr; + + LIST_FOREACH(pr, &pmcstat_pmcs, pr_next) + if (pr->pr_pmcin == pmcin) + return pmcstat_string_unintern(pr->pr_pmcname); + + err(EX_SOFTWARE, "ERROR: cannot find pmcid name"); + return NULL; +} + +/* + * Return PMC record with given index. + */ - return (pmcstat_string_unintern(pr->pr_pmcname)); +struct pmcstat_pmcrecord * +pmcstat_pmcindex_to_pmcr(int pmcin) +{ + struct pmcstat_pmcrecord *pr; + + LIST_FOREACH(pr, &pmcstat_pmcs, pr_next) + if (pr->pr_pmcin == pmcin) + return pr; + + err(EX_SOFTWARE, "ERROR: invalid pmcindex"); + return NULL; +} + +/* + * Get PMC record by id, apply merge policy. + */ + +static struct pmcstat_pmcrecord * +pmcstat_lookup_pmcid(pmc_id_t pmcid) +{ + struct pmcstat_pmcrecord *pr; + + LIST_FOREACH(pr, &pmcstat_pmcs, pr_next) { + if (pr->pr_pmcid == pmcid) { + if (pmcstat_mergepmc) + return pr->pr_merge; + return pr; + } + } + + return NULL; } /* @@ -1358,13 +1096,11 @@ pmcstat_pmcid_to_name(pmc_id_t pmcid) static void pmcstat_process_aout_exec(struct pmcstat_process *pp, - struct pmcstat_image *image, uintfptr_t entryaddr, - struct pmcstat_args *a) + struct pmcstat_image *image, uintfptr_t entryaddr) { (void) pp; (void) image; (void) entryaddr; - (void) a; /* TODO Implement a.out handling */ } @@ -1374,8 +1110,7 @@ pmcstat_process_aout_exec(struct pmcstat_process *pp, static void pmcstat_process_elf_exec(struct pmcstat_process *pp, - struct pmcstat_image *image, uintfptr_t entryaddr, - struct pmcstat_args *a) + struct pmcstat_image *image, uintfptr_t entryaddr) { uintmax_t libstart; struct pmcstat_image *rtldimage; @@ -1414,8 +1149,7 @@ pmcstat_process_elf_exec(struct pmcstat_process *pp, * this we can figure out the address where the * runtime loader's file object had been mapped to. */ - rtldimage = pmcstat_image_from_path(image->pi_dynlinkerpath, - 0); + rtldimage = pmcstat_image_from_path(image->pi_dynlinkerpath, 0); if (rtldimage == NULL) { warnx("WARNING: Cannot find image for \"%s\".", pmcstat_string_unintern(image->pi_dynlinkerpath)); @@ -1424,7 +1158,7 @@ pmcstat_process_elf_exec(struct pmcstat_process *pp, } if (rtldimage->pi_type == PMCSTAT_IMAGE_UNKNOWN) - pmcstat_image_get_elf_params(rtldimage, a); + pmcstat_image_get_elf_params(rtldimage); if (rtldimage->pi_type != PMCSTAT_IMAGE_ELF32 && rtldimage->pi_type != PMCSTAT_IMAGE_ELF64) { @@ -1495,8 +1229,7 @@ pmcstat_process_lookup(pid_t pid, int allocate) static void pmcstat_process_exec(struct pmcstat_process *pp, - pmcstat_interned_string path, uintfptr_t entryaddr, - struct pmcstat_args *a) + pmcstat_interned_string path, uintfptr_t entryaddr) { struct pmcstat_image *image; @@ -1506,7 +1239,7 @@ pmcstat_process_exec(struct pmcstat_process *pp, } if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN) - pmcstat_image_determine_type(image, a); + pmcstat_image_determine_type(image); assert(image->pi_type != PMCSTAT_IMAGE_UNKNOWN); @@ -1514,12 +1247,12 @@ pmcstat_process_exec(struct pmcstat_process *pp, case PMCSTAT_IMAGE_ELF32: case PMCSTAT_IMAGE_ELF64: pmcstat_stats.ps_exec_elf++; - pmcstat_process_elf_exec(pp, image, entryaddr, a); + pmcstat_process_elf_exec(pp, image, entryaddr); break; case PMCSTAT_IMAGE_AOUT: pmcstat_stats.ps_exec_aout++; - pmcstat_process_aout_exec(pp, image, entryaddr, a); + pmcstat_process_aout_exec(pp, image, entryaddr); break; case PMCSTAT_IMAGE_INDETERMINABLE: @@ -1537,7 +1270,7 @@ pmcstat_process_exec(struct pmcstat_process *pp, * Find the map entry associated with process 'p' at PC value 'pc'. */ -static struct pmcstat_pcmap * +struct pmcstat_pcmap * pmcstat_process_find_map(struct pmcstat_process *p, uintfptr_t pc) { struct pmcstat_pcmap *ppm; @@ -1552,444 +1285,36 @@ pmcstat_process_find_map(struct pmcstat_process *p, uintfptr_t pc) return (NULL); } -static struct pmcstat_cgnode * -pmcstat_cgnode_allocate(struct pmcstat_image *image, uintfptr_t pc) +/* + * Convert a hwpmc(4) log to profile information. A system-wide + * callgraph is generated if FLAG_DO_CALLGRAPHS is set. gmon.out + * files usable by gprof(1) are created if FLAG_DO_GPROF is set. + */ +static int +pmcstat_analyze_log(void) { - struct pmcstat_cgnode *cg; + uint32_t cpu, cpuflags; + uintfptr_t pc; + pid_t pid; + struct pmcstat_image *image; + struct pmcstat_process *pp, *ppnew; + struct pmcstat_pcmap *ppm, *ppmtmp; + struct pmclog_ev ev; + struct pmcstat_pmcrecord *pmcr; + pmcstat_interned_string image_path; - if ((cg = malloc(sizeof(*cg))) == NULL) - err(EX_OSERR, "ERROR: Cannot allocate callgraph node"); + assert(args.pa_flags & FLAG_DO_ANALYSIS); - cg->pcg_image = image; - cg->pcg_func = pc; + if (elf_version(EV_CURRENT) == EV_NONE) + err(EX_UNAVAILABLE, "Elf library intialization failed"); - cg->pcg_count = 0; - cg->pcg_nchildren = 0; - LIST_INIT(&cg->pcg_children); - - return (cg); -} - -/* - * Free a node and its children. - */ -static void -pmcstat_cgnode_free(struct pmcstat_cgnode *cg) -{ - struct pmcstat_cgnode *cgc, *cgtmp; - - LIST_FOREACH_SAFE(cgc, &cg->pcg_children, pcg_sibling, cgtmp) - pmcstat_cgnode_free(cgc); - free(cg); -} - -/* - * Look for a callgraph node associated with pmc `pmcid' in the global - * hash table that corresponds to the given `pc' value in the process - * `pp'. - */ -static struct pmcstat_cgnode * -pmcstat_cgnode_hash_lookup_pc(struct pmcstat_process *pp, uint32_t pmcid, - uintfptr_t pc, int usermode) -{ - struct pmcstat_pcmap *ppm; - struct pmcstat_symbol *sym; - struct pmcstat_image *image; - struct pmcstat_cgnode *cg; - struct pmcstat_cgnode_hash *h; - uintfptr_t loadaddress; - unsigned int i, hash; - - ppm = pmcstat_process_find_map(usermode ? pp : pmcstat_kernproc, pc); - if (ppm == NULL) - return (NULL); - - image = ppm->ppm_image; - - loadaddress = ppm->ppm_lowpc + image->pi_vaddr - image->pi_start; - pc -= loadaddress; /* Convert to an offset in the image. */ - - /* - * Try determine the function at this offset. If we can't - * find a function round leave the `pc' value alone. - */ - if ((sym = pmcstat_symbol_search(image, pc)) != NULL) - pc = sym->ps_start; - - for (hash = i = 0; i < sizeof(uintfptr_t); i++) - hash += (pc >> i) & 0xFF; - - hash &= PMCSTAT_HASH_MASK; - - cg = NULL; - LIST_FOREACH(h, &pmcstat_cgnode_hash[hash], pch_next) - { - if (h->pch_pmcid != pmcid) - continue; - - cg = h->pch_cgnode; - - assert(cg != NULL); - - if (cg->pcg_image == image && cg->pcg_func == pc) - return (cg); - } - - /* - * We haven't seen this (pmcid, pc) tuple yet, so allocate a - * new callgraph node and a new hash table entry for it. - */ - cg = pmcstat_cgnode_allocate(image, pc); - if ((h = malloc(sizeof(*h))) == NULL) - err(EX_OSERR, "ERROR: Could not allocate callgraph node"); - - h->pch_pmcid = pmcid; - h->pch_cgnode = cg; - LIST_INSERT_HEAD(&pmcstat_cgnode_hash[hash], h, pch_next); - - pmcstat_cgnode_hash_count++; - - return (cg); -} - -/* - * Compare two callgraph nodes for sorting. - */ -static int -pmcstat_cgnode_compare(const void *a, const void *b) -{ - const struct pmcstat_cgnode *const *pcg1, *const *pcg2, *cg1, *cg2; - - pcg1 = (const struct pmcstat_cgnode *const *) a; - cg1 = *pcg1; - pcg2 = (const struct pmcstat_cgnode *const *) b; - cg2 = *pcg2; - - /* Sort in reverse order */ - if (cg1->pcg_count < cg2->pcg_count) - return (1); - if (cg1->pcg_count > cg2->pcg_count) - return (-1); - return (0); -} - -/* - * Find (allocating if a needed) a callgraph node in the given - * parent with the same (image, pcoffset) pair. - */ - -static struct pmcstat_cgnode * -pmcstat_cgnode_find(struct pmcstat_cgnode *parent, struct pmcstat_image *image, - uintfptr_t pcoffset) -{ - struct pmcstat_cgnode *child; - - LIST_FOREACH(child, &parent->pcg_children, pcg_sibling) { - if (child->pcg_image == image && - child->pcg_func == pcoffset) - return (child); - } - - /* - * Allocate a new structure. - */ - - child = pmcstat_cgnode_allocate(image, pcoffset); - - /* - * Link it into the parent. - */ - LIST_INSERT_HEAD(&parent->pcg_children, child, pcg_sibling); - parent->pcg_nchildren++; - - return (child); -} - -/* - * Print one callgraph node. The output format is: - * - * indentation %(parent's samples) #nsamples function@object - */ -static void -pmcstat_cgnode_print(struct pmcstat_args *a, struct pmcstat_cgnode *cg, - int depth, uint32_t total) -{ - uint32_t n; - const char *space; - struct pmcstat_symbol *sym; - struct pmcstat_cgnode **sortbuffer, **cgn, *pcg; - - space = " "; - - if (depth > 0) - (void) fprintf(a->pa_graphfile, "%*s", depth, space); - - if (cg->pcg_count == total) - (void) fprintf(a->pa_graphfile, "100.0%% "); - else - (void) fprintf(a->pa_graphfile, "%05.2f%% ", - 100.0 * cg->pcg_count / total); - - n = fprintf(a->pa_graphfile, " [%u] ", cg->pcg_count); - - /* #samples is a 12 character wide field. */ - if (n < 12) - (void) fprintf(a->pa_graphfile, "%*s", 12 - n, space); - - if (depth > 0) - (void) fprintf(a->pa_graphfile, "%*s", depth, space); - - sym = pmcstat_symbol_search(cg->pcg_image, cg->pcg_func); - if (sym) - (void) fprintf(a->pa_graphfile, "%s", - pmcstat_string_unintern(sym->ps_name)); - else - (void) fprintf(a->pa_graphfile, "%p", - (void *) (cg->pcg_image->pi_vaddr + cg->pcg_func)); - - if (pmcstat_previous_filename_printed != - cg->pcg_image->pi_fullpath) { - pmcstat_previous_filename_printed = cg->pcg_image->pi_fullpath; - (void) fprintf(a->pa_graphfile, " @ %s\n", - pmcstat_string_unintern( - pmcstat_previous_filename_printed)); - } else - (void) fprintf(a->pa_graphfile, "\n"); - - if (cg->pcg_nchildren == 0) - return; - - if ((sortbuffer = (struct pmcstat_cgnode **) - malloc(sizeof(struct pmcstat_cgnode *) * - cg->pcg_nchildren)) == NULL) - err(EX_OSERR, "ERROR: Cannot print callgraph"); - cgn = sortbuffer; - - LIST_FOREACH(pcg, &cg->pcg_children, pcg_sibling) - *cgn++ = pcg; - - assert(cgn - sortbuffer == (int) cg->pcg_nchildren); - - qsort(sortbuffer, cg->pcg_nchildren, sizeof(struct pmcstat_cgnode *), - pmcstat_cgnode_compare); - - for (cgn = sortbuffer, n = 0; n < cg->pcg_nchildren; n++, cgn++) - pmcstat_cgnode_print(a, *cgn, depth+1, cg->pcg_count); - - free(sortbuffer); -} - -/* - * Record a callchain. - */ - -static void -pmcstat_record_callchain(struct pmcstat_process *pp, uint32_t pmcid, - uint32_t nsamples, uintfptr_t *cc, int usermode, struct pmcstat_args *a) -{ - uintfptr_t pc, loadaddress; - uint32_t n; - struct pmcstat_image *image; - struct pmcstat_pcmap *ppm; - struct pmcstat_symbol *sym; - struct pmcstat_cgnode *parent, *child; - - /* - * Find the callgraph node recorded in the global hash table - * for this (pmcid, pc). - */ - - pc = cc[0]; - parent = pmcstat_cgnode_hash_lookup_pc(pp, pmcid, pc, usermode); - if (parent == NULL) { - pmcstat_stats.ps_callchain_dubious_frames++; - return; - } - - parent->pcg_count++; - - /* - * For each return address in the call chain record, subject - * to the maximum depth desired. - * - Find the image associated with the sample. Stop if there - * there is no valid image at that address. - * - Find the function that overlaps the return address. - * - If found: use the start address of the function. - * If not found (say an object's symbol table is not present or - * is incomplete), round down to th gprof bucket granularity. - * - Convert return virtual address to an offset in the image. - * - Look for a child with the same {offset,image} tuple, - * inserting one if needed. - * - Increment the count of occurrences of the child. - */ - - for (n = 1; n < (uint32_t) a->pa_graphdepth && n < nsamples; n++, - parent = child) { - pc = cc[n]; - - ppm = pmcstat_process_find_map(usermode ? pp : - pmcstat_kernproc, pc); - if (ppm == NULL) - return; - - image = ppm->ppm_image; - loadaddress = ppm->ppm_lowpc + image->pi_vaddr - - image->pi_start; - pc -= loadaddress; - - if ((sym = pmcstat_symbol_search(image, pc)) != NULL) - pc = sym->ps_start; - - child = pmcstat_cgnode_find(parent, image, pc); - child->pcg_count++; - } -} - -/* - * Printing a callgraph for a PMC. - */ -static void -pmcstat_callgraph_print_for_pmcid(struct pmcstat_args *a, - struct pmcstat_pmcrecord *pmcr) -{ - int n, nentries; - uint32_t nsamples, pmcid; - struct pmcstat_cgnode **sortbuffer, **cgn; - struct pmcstat_cgnode_hash *pch; - - /* - * We pull out all callgraph nodes in the top-level hash table - * with a matching PMC id. We then sort these based on the - * frequency of occurrence. Each callgraph node is then - * printed. - */ - - nsamples = 0; - pmcid = pmcr->pr_pmcid; - if ((sortbuffer = (struct pmcstat_cgnode **) - malloc(sizeof(struct pmcstat_cgnode *) * - pmcstat_cgnode_hash_count)) == NULL) - err(EX_OSERR, "ERROR: Cannot sort callgraph"); - cgn = sortbuffer; - - memset(sortbuffer, 0xFF, pmcstat_cgnode_hash_count * - sizeof(struct pmcstat_cgnode **)); - - for (n = 0; n < PMCSTAT_NHASH; n++) - LIST_FOREACH(pch, &pmcstat_cgnode_hash[n], pch_next) - if (pch->pch_pmcid == pmcid) { - nsamples += pch->pch_cgnode->pcg_count; - *cgn++ = pch->pch_cgnode; - } - - nentries = cgn - sortbuffer; - assert(nentries <= pmcstat_cgnode_hash_count); - - if (nentries == 0) - return; - - qsort(sortbuffer, nentries, sizeof(struct pmcstat_cgnode *), - pmcstat_cgnode_compare); - - (void) fprintf(a->pa_graphfile, - "@ %s [%u samples]\n\n", - pmcstat_string_unintern(pmcr->pr_pmcname), - nsamples); - - for (cgn = sortbuffer, n = 0; n < nentries; n++, cgn++) { - pmcstat_previous_filename_printed = NULL; - pmcstat_cgnode_print(a, *cgn, 0, nsamples); - (void) fprintf(a->pa_graphfile, "\n"); - } - - free(sortbuffer); -} - -/* - * Print out callgraphs. - */ - -static void -pmcstat_callgraph_print(struct pmcstat_args *a) -{ - struct pmcstat_pmcrecord *pmcr; - - LIST_FOREACH(pmcr, &pmcstat_pmcs, pr_next) - pmcstat_callgraph_print_for_pmcid(a, pmcr); -} - -static void -pmcstat_cgnode_do_gmon_arcs(struct pmcstat_cgnode *cg, pmc_id_t pmcid) -{ - struct pmcstat_cgnode *cgc; - - /* - * Look for child nodes that belong to the same image. - */ - - LIST_FOREACH(cgc, &cg->pcg_children, pcg_sibling) { - if (cgc->pcg_image == cg->pcg_image) - pmcstat_gmon_append_arc(cg->pcg_image, pmcid, - cgc->pcg_func, cg->pcg_func, cgc->pcg_count); - if (cgc->pcg_nchildren > 0) - pmcstat_cgnode_do_gmon_arcs(cgc, pmcid); - } -} - -static void -pmcstat_callgraph_do_gmon_arcs_for_pmcid(pmc_id_t pmcid) -{ - int n; - struct pmcstat_cgnode_hash *pch; - - for (n = 0; n < PMCSTAT_NHASH; n++) - LIST_FOREACH(pch, &pmcstat_cgnode_hash[n], pch_next) - if (pch->pch_pmcid == pmcid && - pch->pch_cgnode->pcg_nchildren > 1) - pmcstat_cgnode_do_gmon_arcs(pch->pch_cgnode, - pmcid); -} - - -static void -pmcstat_callgraph_do_gmon_arcs(void) -{ - struct pmcstat_pmcrecord *pmcr; - - LIST_FOREACH(pmcr, &pmcstat_pmcs, pr_next) - pmcstat_callgraph_do_gmon_arcs_for_pmcid(pmcr->pr_pmcid); -} - -/* - * Convert a hwpmc(4) log to profile information. A system-wide - * callgraph is generated if FLAG_DO_CALLGRAPHS is set. gmon.out - * files usable by gprof(1) are created if FLAG_DO_GPROF is set. - */ -static int -pmcstat_analyze_log(struct pmcstat_args *a) -{ - uint32_t cpu, cpuflags; - uintfptr_t pc, newpc; - pid_t pid; - struct pmcstat_image *image; - struct pmcstat_symbol *sym; - struct pmcstat_process *pp, *ppnew; - struct pmcstat_pcmap *ppm, *ppmtmp; - struct pmclog_ev ev; - pmcstat_interned_string image_path; - - assert(a->pa_flags & FLAG_DO_ANALYSIS); - - if (elf_version(EV_CURRENT) == EV_NONE) - err(EX_UNAVAILABLE, "Elf library intialization failed"); - - while (pmclog_read(a->pa_logparser, &ev) == 0) { + while (pmclog_read(args.pa_logparser, &ev) == 0) { assert(ev.pl_state == PMCLOG_OK); switch (ev.pl_type) { case PMCLOG_TYPE_INITIALIZE: if ((ev.pl_u.pl_i.pl_version & 0xFF000000) != - PMC_VERSION_MAJOR << 24 && a->pa_verbosity > 0) + PMC_VERSION_MAJOR << 24 && args.pa_verbosity > 0) warnx("WARNING: Log version 0x%x does not " "match compiled version 0x%x.", ev.pl_u.pl_i.pl_version, @@ -2019,7 +1344,7 @@ pmcstat_analyze_log(struct pmcstat_args *a) pl_pathname); image = pmcstat_image_from_path(image_path, pid == -1); if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN) - pmcstat_image_determine_type(image, a); + pmcstat_image_determine_type(image); if (image->pi_type != PMCSTAT_IMAGE_INDETERMINABLE) pmcstat_image_link(pp, image, ev.pl_u.pl_mi.pl_start); @@ -2059,16 +1384,23 @@ pmcstat_analyze_log(struct pmcstat_args *a) pc = ev.pl_u.pl_s.pl_pc; pp = pmcstat_process_lookup(ev.pl_u.pl_s.pl_pid, PMCSTAT_ALLOCATE); - if ((ppm = pmcstat_process_find_map(pp, pc)) == NULL && - (ppm = pmcstat_process_find_map(pmcstat_kernproc, - pc)) == NULL) { /* unknown process,offset pair */ - pmcstat_stats.ps_samples_unknown_offset++; - break; - } - pmcstat_image_increment_bucket(ppm, pc, - ev.pl_u.pl_s.pl_pmcid, a); + /* Get PMC record. */ + pmcr = pmcstat_lookup_pmcid(ev.pl_u.pl_s.pl_pmcid); + assert(pmcr != NULL); + + /* + * Call the plugins processing + * TODO: move pmcstat_process_find_map inside plugins + */ + if (plugins[args.pa_pplugin].pl_process != NULL) + plugins[args.pa_pplugin].pl_process( + pp, pmcr, 1, &pc, + pmcstat_process_find_map(pp, pc) != NULL, 0); + plugins[args.pa_plugin].pl_process( + pp, pmcr, 1, &pc, + pmcstat_process_find_map(pp, pc) != NULL, 0); break; case PMCLOG_TYPE_CALLCHAIN: @@ -2078,7 +1410,7 @@ pmcstat_analyze_log(struct pmcstat_args *a) cpu = PMC_CALLCHAIN_CPUFLAGS_TO_CPU(cpuflags); /* Filter on the CPU id. */ - if ((a->pa_cpumask & (1 << cpu)) == 0) { + if ((args.pa_cpumask & (1 << cpu)) == 0) { pmcstat_stats.ps_samples_skipped++; break; } @@ -2086,45 +1418,27 @@ pmcstat_analyze_log(struct pmcstat_args *a) pp = pmcstat_process_lookup(ev.pl_u.pl_cc.pl_pid, PMCSTAT_ALLOCATE); - if ((a->pa_flags & FLAG_WANTS_MAPPINGS) == 0) - pmcstat_record_callchain(pp, - ev.pl_u.pl_cc.pl_pmcid, - ev.pl_u.pl_cc.pl_npc, ev.pl_u.pl_cc.pl_pc, - PMC_CALLCHAIN_CPUFLAGS_TO_USERMODE(cpuflags), a); - - if ((a->pa_flags & - (FLAG_DO_GPROF | FLAG_WANTS_MAPPINGS)) == 0) - break; - - pc = ev.pl_u.pl_cc.pl_pc[0]; - if (PMC_CALLCHAIN_CPUFLAGS_TO_USERMODE(cpuflags) == 0) - pp = pmcstat_kernproc; - ppm = pmcstat_process_find_map(pp, pc); - if (ppm == NULL) { - - /* Unknown offset. */ - pmcstat_stats.ps_samples_unknown_offset++; - break; - } - if (a->pa_flags & FLAG_WANTS_MAPPINGS) { - image = ppm->ppm_image; - newpc = pc - (ppm->ppm_lowpc + - (image->pi_vaddr - image->pi_start)); - sym = pmcstat_symbol_search(image, newpc); - if (sym == NULL) - break; - fprintf(a->pa_graphfile, "%p %s 0x%jx 0x%jx\n", - (void *)pc, - pmcstat_string_unintern(sym->ps_name), - (uintmax_t)(sym->ps_start + - image->pi_vaddr), (uintmax_t)(sym->ps_end + - image->pi_vaddr)); - break; - } + /* Get PMC record. */ + pmcr = pmcstat_lookup_pmcid(ev.pl_u.pl_cc.pl_pmcid); + assert(pmcr != NULL); - pmcstat_image_increment_bucket(ppm, pc, - ev.pl_u.pl_cc.pl_pmcid, a); + /* + * Call the plugins processing + */ + if (plugins[args.pa_pplugin].pl_process != NULL) + plugins[args.pa_pplugin].pl_process( + pp, pmcr, + ev.pl_u.pl_cc.pl_npc, + ev.pl_u.pl_cc.pl_pc, + PMC_CALLCHAIN_CPUFLAGS_TO_USERMODE(cpuflags), + cpu); + plugins[args.pa_plugin].pl_process( + pp, pmcr, + ev.pl_u.pl_cc.pl_npc, + ev.pl_u.pl_cc.pl_pc, + PMC_CALLCHAIN_CPUFLAGS_TO_USERMODE(cpuflags), + cpu); break; case PMCLOG_TYPE_PMCALLOCATE: @@ -2133,7 +1447,7 @@ pmcstat_analyze_log(struct pmcstat_args *a) * PMC and its name. */ pmcstat_pmcid_add(ev.pl_u.pl_a.pl_pmcid, - pmcstat_string_intern(ev.pl_u.pl_a.pl_evname), a); + pmcstat_string_intern(ev.pl_u.pl_a.pl_evname)); break; case PMCLOG_TYPE_PROCEXEC: @@ -2156,7 +1470,7 @@ pmcstat_analyze_log(struct pmcstat_args *a) ev.pl_u.pl_x.pl_pathname); assert(image_path != NULL); pmcstat_process_exec(pp, image_path, - ev.pl_u.pl_x.pl_entryaddr, a); + ev.pl_u.pl_x.pl_entryaddr); break; case PMCLOG_TYPE_PROCEXIT: @@ -2224,16 +1538,16 @@ pmcstat_analyze_log(struct pmcstat_args *a) */ static int -pmcstat_print_log(struct pmcstat_args *a) +pmcstat_print_log(void) { struct pmclog_ev ev; uint32_t npc; - while (pmclog_read(a->pa_logparser, &ev) == 0) { + while (pmclog_read(args.pa_logparser, &ev) == 0) { assert(ev.pl_state == PMCLOG_OK); switch (ev.pl_type) { case PMCLOG_TYPE_CALLCHAIN: - PMCSTAT_PRINT_ENTRY(a, "callchain", + PMCSTAT_PRINT_ENTRY("callchain", "%d 0x%x %d %d %c", ev.pl_u.pl_cc.pl_pid, ev.pl_u.pl_cc.pl_pmcid, PMC_CALLCHAIN_CPUFLAGS_TO_CPU(ev.pl_u.pl_cc. \ @@ -2241,95 +1555,95 @@ pmcstat_print_log(struct pmcstat_args *a) PMC_CALLCHAIN_CPUFLAGS_TO_USERMODE(ev.pl_u.pl_cc.\ pl_cpuflags) ? 'u' : 's'); for (npc = 0; npc < ev.pl_u.pl_cc.pl_npc; npc++) - PMCSTAT_PRINT_ENTRY(a, "...", "%p", + PMCSTAT_PRINT_ENTRY("...", "%p", (void *) ev.pl_u.pl_cc.pl_pc[npc]); break; case PMCLOG_TYPE_CLOSELOG: - PMCSTAT_PRINT_ENTRY(a,"closelog",); + PMCSTAT_PRINT_ENTRY("closelog",); break; case PMCLOG_TYPE_DROPNOTIFY: - PMCSTAT_PRINT_ENTRY(a,"drop",); + PMCSTAT_PRINT_ENTRY("drop",); break; case PMCLOG_TYPE_INITIALIZE: - PMCSTAT_PRINT_ENTRY(a,"initlog","0x%x \"%s\"", + PMCSTAT_PRINT_ENTRY("initlog","0x%x \"%s\"", ev.pl_u.pl_i.pl_version, pmc_name_of_cputype(ev.pl_u.pl_i.pl_arch)); if ((ev.pl_u.pl_i.pl_version & 0xFF000000) != - PMC_VERSION_MAJOR << 24 && a->pa_verbosity > 0) + PMC_VERSION_MAJOR << 24 && args.pa_verbosity > 0) warnx("WARNING: Log version 0x%x != expected " "version 0x%x.", ev.pl_u.pl_i.pl_version, PMC_VERSION); break; case PMCLOG_TYPE_MAP_IN: - PMCSTAT_PRINT_ENTRY(a,"map-in","%d %p \"%s\"", + PMCSTAT_PRINT_ENTRY("map-in","%d %p \"%s\"", ev.pl_u.pl_mi.pl_pid, (void *) ev.pl_u.pl_mi.pl_start, ev.pl_u.pl_mi.pl_pathname); break; case PMCLOG_TYPE_MAP_OUT: - PMCSTAT_PRINT_ENTRY(a,"map-out","%d %p %p", + PMCSTAT_PRINT_ENTRY("map-out","%d %p %p", ev.pl_u.pl_mo.pl_pid, (void *) ev.pl_u.pl_mo.pl_start, (void *) ev.pl_u.pl_mo.pl_end); break; case PMCLOG_TYPE_PCSAMPLE: - PMCSTAT_PRINT_ENTRY(a,"sample","0x%x %d %p %c", + PMCSTAT_PRINT_ENTRY("sample","0x%x %d %p %c", ev.pl_u.pl_s.pl_pmcid, ev.pl_u.pl_s.pl_pid, (void *) ev.pl_u.pl_s.pl_pc, ev.pl_u.pl_s.pl_usermode ? 'u' : 's'); break; case PMCLOG_TYPE_PMCALLOCATE: - PMCSTAT_PRINT_ENTRY(a,"allocate","0x%x \"%s\" 0x%x", + PMCSTAT_PRINT_ENTRY("allocate","0x%x \"%s\" 0x%x", ev.pl_u.pl_a.pl_pmcid, ev.pl_u.pl_a.pl_evname, ev.pl_u.pl_a.pl_flags); break; case PMCLOG_TYPE_PMCATTACH: - PMCSTAT_PRINT_ENTRY(a,"attach","0x%x %d \"%s\"", + PMCSTAT_PRINT_ENTRY("attach","0x%x %d \"%s\"", ev.pl_u.pl_t.pl_pmcid, ev.pl_u.pl_t.pl_pid, ev.pl_u.pl_t.pl_pathname); break; case PMCLOG_TYPE_PMCDETACH: - PMCSTAT_PRINT_ENTRY(a,"detach","0x%x %d", + PMCSTAT_PRINT_ENTRY("detach","0x%x %d", ev.pl_u.pl_d.pl_pmcid, ev.pl_u.pl_d.pl_pid); break; case PMCLOG_TYPE_PROCCSW: - PMCSTAT_PRINT_ENTRY(a,"cswval","0x%x %d %jd", + PMCSTAT_PRINT_ENTRY("cswval","0x%x %d %jd", ev.pl_u.pl_c.pl_pmcid, ev.pl_u.pl_c.pl_pid, ev.pl_u.pl_c.pl_value); break; case PMCLOG_TYPE_PROCEXEC: - PMCSTAT_PRINT_ENTRY(a,"exec","0x%x %d %p \"%s\"", + PMCSTAT_PRINT_ENTRY("exec","0x%x %d %p \"%s\"", ev.pl_u.pl_x.pl_pmcid, ev.pl_u.pl_x.pl_pid, (void *) ev.pl_u.pl_x.pl_entryaddr, ev.pl_u.pl_x.pl_pathname); break; case PMCLOG_TYPE_PROCEXIT: - PMCSTAT_PRINT_ENTRY(a,"exitval","0x%x %d %jd", + PMCSTAT_PRINT_ENTRY("exitval","0x%x %d %jd", ev.pl_u.pl_e.pl_pmcid, ev.pl_u.pl_e.pl_pid, ev.pl_u.pl_e.pl_value); break; case PMCLOG_TYPE_PROCFORK: - PMCSTAT_PRINT_ENTRY(a,"fork","%d %d", + PMCSTAT_PRINT_ENTRY("fork","%d %d", ev.pl_u.pl_f.pl_oldpid, ev.pl_u.pl_f.pl_newpid); break; case PMCLOG_TYPE_USERDATA: - PMCSTAT_PRINT_ENTRY(a,"userdata","0x%x", + PMCSTAT_PRINT_ENTRY("userdata","0x%x", ev.pl_u.pl_u.pl_userdata); break; case PMCLOG_TYPE_SYSEXIT: - PMCSTAT_PRINT_ENTRY(a,"exit","%d", + PMCSTAT_PRINT_ENTRY("exit","%d", ev.pl_u.pl_se.pl_pid); break; default: - fprintf(a->pa_printfile, "unknown event (type %d).\n", + fprintf(args.pa_printfile, "unknown event (type %d).\n", ev.pl_type); } } @@ -2354,13 +1668,13 @@ pmcstat_print_log(struct pmcstat_args *a) */ int -pmcstat_close_log(struct pmcstat_args *a) +pmcstat_close_log(void) { if (pmc_flush_logfile() < 0 || pmc_configure_logfile(-1) < 0) err(EX_OSERR, "ERROR: logging failed"); - a->pa_flags &= ~(FLAG_HAS_OUTPUT_LOGFILE | FLAG_HAS_PIPE); - return (a->pa_flags & FLAG_HAS_PIPE ? PMCSTAT_EXITING : + args.pa_flags &= ~(FLAG_HAS_OUTPUT_LOGFILE | FLAG_HAS_PIPE); + return (args.pa_flags & FLAG_HAS_PIPE ? PMCSTAT_EXITING : PMCSTAT_FINISHED); } @@ -2456,17 +1770,208 @@ pmcstat_open_log(const char *path, int mode) */ int -pmcstat_process_log(struct pmcstat_args *a) +pmcstat_process_log(void) { /* * If analysis has not been asked for, just print the log to * the current output file. */ - if (a->pa_flags & FLAG_DO_PRINT) - return (pmcstat_print_log(a)); + if (args.pa_flags & FLAG_DO_PRINT) + return (pmcstat_print_log()); else - return (pmcstat_analyze_log(a)); + return (pmcstat_analyze_log()); +} + +/* + * Refresh top display. + */ + +static void +pmcstat_refresh_top(void) +{ + char pmcname[40]; + + /* If in pause mode do not refresh display. */ + if (pmcstat_pause) + return; + + /* Format PMC name. */ + if (pmcstat_mergepmc) + snprintf(pmcname, sizeof(pmcname), "[%s]", + pmcstat_pmcindex_to_name(pmcstat_pmcinfilter)); + else + snprintf(pmcname, sizeof(pmcname), "%s.%d", + pmcstat_pmcindex_to_name(pmcstat_pmcinfilter), + pmcstat_pmcinfilter); + + PMCSTAT_PRINTBEGIN(); + PMCSTAT_PRINTW("PMC: %s Samples: %u processed, %u invalid\n\n", + pmcname, + pmcstat_stats.ps_samples_total, + pmcstat_stats.ps_samples_unknown_offset + + pmcstat_stats.ps_samples_indeterminable + + pmcstat_stats.ps_callchain_dubious_frames); + if (plugins[args.pa_plugin].pl_topdisplay != NULL) + plugins[args.pa_plugin].pl_topdisplay(); + PMCSTAT_PRINTEND(); +} + +/* + * Find the next pmc index to display. + */ + +static void +pmcstat_changefilter(void) +{ + int pmcin; + struct pmcstat_pmcrecord *pmcr; + + /* + * Find the next merge target. + */ + if (pmcstat_mergepmc) { + pmcin = pmcstat_pmcinfilter; + + do { + pmcr = pmcstat_pmcindex_to_pmcr(pmcstat_pmcinfilter); + if (pmcr == pmcr->pr_merge) + break; + + pmcstat_pmcinfilter++; + if (pmcstat_pmcinfilter >= pmcstat_npmcs) + pmcstat_pmcinfilter = 0; + + } while (pmcstat_pmcinfilter != pmcin); + } +} + +/* + * Top mode keypress. + */ + +int +pmcstat_keypress_log(void) +{ + int c, ret = 0; + WINDOW *w; + + w = newwin(1, 0, 1, 0); + c = wgetch(w); + wprintw(w, "Key: %c => ", c); + switch (c) { + case 'c': + wprintw(w, "enter mode 'd' or 'a' => "); + c = wgetch(w); + if (c == 'd') { + args.pa_topmode = PMCSTAT_TOP_DELTA; + wprintw(w, "switching to delta mode"); + } else { + args.pa_topmode = PMCSTAT_TOP_ACCUM; + wprintw(w, "switching to accumulation mode"); + } + break; + case 'm': + pmcstat_mergepmc = !pmcstat_mergepmc; + /* + * Changing merge state require data reset. + */ + if (plugins[args.pa_plugin].pl_shutdown != NULL) + plugins[args.pa_plugin].pl_shutdown(NULL); + bzero(&pmcstat_stats, sizeof(struct pmcstat_stats)); + if (plugins[args.pa_plugin].pl_init != NULL) + plugins[args.pa_plugin].pl_init(); + + /* Update filter to be on a merge target. */ + pmcstat_changefilter(); + wprintw(w, "merge PMC %s", pmcstat_mergepmc ? "on" : "off"); + break; + case 'n': + /* Close current plugin. */ + if (plugins[args.pa_plugin].pl_shutdown != NULL) + plugins[args.pa_plugin].pl_shutdown(NULL); + + /* Find next top display available. */ + do { + args.pa_plugin++; + if (plugins[args.pa_plugin].pl_name == NULL) + args.pa_plugin = 0; + } while (plugins[args.pa_plugin].pl_topdisplay == NULL); + + /* Open new plugin. */ + bzero(&pmcstat_stats, sizeof(struct pmcstat_stats)); + if (plugins[args.pa_plugin].pl_init != NULL) + plugins[args.pa_plugin].pl_init(); + wprintw(w, "switching to plugin %s", + plugins[args.pa_plugin].pl_name); + break; + case 'p': + pmcstat_pmcinfilter++; + if (pmcstat_pmcinfilter >= pmcstat_npmcs) + pmcstat_pmcinfilter = 0; + pmcstat_changefilter(); + wprintw(w, "switching to PMC %s.%d", + pmcstat_pmcindex_to_name(pmcstat_pmcinfilter), + pmcstat_pmcinfilter); + break; + case ' ': + pmcstat_pause = !pmcstat_pause; + if (pmcstat_pause) + wprintw(w, "pause => press space again to continue"); + break; + case 'q': + wprintw(w, "exiting..."); + ret = 1; + default: + if (plugins[args.pa_plugin].pl_topkeypress != NULL) + if (plugins[args.pa_plugin].pl_topkeypress(c, w)) + ret = 1; + } + + wrefresh(w); + delwin(w); + return ret; +} + + +/* + * Top mode display. + */ + +void +pmcstat_display_log(void) +{ + + pmcstat_refresh_top(); + + /* Reset everythings if delta mode. */ + if (args.pa_topmode == PMCSTAT_TOP_DELTA) { + if (plugins[args.pa_plugin].pl_shutdown != NULL) + plugins[args.pa_plugin].pl_shutdown(NULL); + bzero(&pmcstat_stats, sizeof(struct pmcstat_stats)); + if (plugins[args.pa_plugin].pl_init != NULL) + plugins[args.pa_plugin].pl_init(); + } + +} + +/* + * Configure a plugins. + */ + +void +pmcstat_pluginconfigure_log(char *opt) +{ + + if (strncmp(opt, "threshold=", 10) == 0) { + pmcstat_threshold = atof(opt+10); + } else { + if (plugins[args.pa_plugin].pl_configure != NULL) { + if (!plugins[args.pa_plugin].pl_configure(opt)) + err(EX_USAGE, + "ERROR: unknown option <%s>.", opt); + } + } } /* @@ -2474,12 +1979,10 @@ pmcstat_process_log(struct pmcstat_args *a) */ void -pmcstat_initialize_logging(struct pmcstat_args *a) +pmcstat_initialize_logging(void) { int i; - (void) a; - /* use a convenient format for 'ldd' output */ if (setenv("LD_TRACE_LOADED_OBJECTS_FMT1","%o \"%p\" %x\n",1) != 0) err(EX_OSERR, "ERROR: Cannot setenv"); @@ -2499,6 +2002,21 @@ pmcstat_initialize_logging(struct pmcstat_args *a) if ((pmcstat_kernproc = pmcstat_process_lookup((pid_t) -1, PMCSTAT_ALLOCATE)) == NULL) err(EX_OSERR, "ERROR: Cannot initialize logging"); + + /* PMC count. */ + pmcstat_npmcs = 0; + + /* Merge PMC with same name. */ + pmcstat_mergepmc = args.pa_mergepmc; + + /* + * Initialize plugins + */ + + if (plugins[args.pa_pplugin].pl_init != NULL) + plugins[args.pa_pplugin].pl_init(); + if (plugins[args.pa_plugin].pl_init != NULL) + plugins[args.pa_plugin].pl_init(); } /* @@ -2506,99 +2024,57 @@ pmcstat_initialize_logging(struct pmcstat_args *a) */ void -pmcstat_shutdown_logging(struct pmcstat_args *a) +pmcstat_shutdown_logging(void) { int i; FILE *mf; - struct pmcstat_gmonfile *pgf, *pgftmp; struct pmcstat_image *pi, *pitmp; struct pmcstat_process *pp, *pptmp; - struct pmcstat_cgnode_hash *pch, *pchtmp; + struct pmcstat_pcmap *ppm, *ppmtmp; /* determine where to send the map file */ mf = NULL; - if (a->pa_mapfilename != NULL) - mf = (strcmp(a->pa_mapfilename, "-") == 0) ? - a->pa_printfile : fopen(a->pa_mapfilename, "w"); + if (args.pa_mapfilename != NULL) + mf = (strcmp(args.pa_mapfilename, "-") == 0) ? + args.pa_printfile : fopen(args.pa_mapfilename, "w"); - if (mf == NULL && a->pa_flags & FLAG_DO_GPROF && - a->pa_verbosity >= 2) - mf = a->pa_printfile; + if (mf == NULL && args.pa_flags & FLAG_DO_GPROF && + args.pa_verbosity >= 2) + mf = args.pa_printfile; if (mf) (void) fprintf(mf, "MAP:\n"); - - if (a->pa_flags & FLAG_DO_CALLGRAPHS) - pmcstat_callgraph_print(a); - /* - * Sync back all gprof flat profile data. + * Shutdown the plugins */ - for (i = 0; i < PMCSTAT_NHASH; i++) { - LIST_FOREACH(pi, &pmcstat_image_hash[i], pi_next) { - if (mf) - (void) fprintf(mf, " \"%s\" => \"%s\"", - pmcstat_string_unintern(pi->pi_execpath), - pmcstat_string_unintern( - pi->pi_samplename)); - - /* flush gmon.out data to disk */ - LIST_FOREACH(pgf, &pi->pi_gmlist, pgf_next) { - pmcstat_gmon_unmap_file(pgf); - if (mf) - (void) fprintf(mf, " %s/%d", - pmcstat_pmcid_to_name( - pgf->pgf_pmcid), - pgf->pgf_nsamples); - if (pgf->pgf_overflow && a->pa_verbosity >= 1) - warnx("WARNING: profile \"%s\" " - "overflowed.", - pmcstat_string_unintern( - pgf->pgf_name)); - } - if (mf) - (void) fprintf(mf, "\n"); - } - } - - /* - * Compute arcs and add these to the gprof files. - */ - if (a->pa_flags & FLAG_DO_GPROF && a->pa_graphdepth > 1) - pmcstat_callgraph_do_gmon_arcs(); - - /* - * Free memory. - */ - for (i = 0; i < PMCSTAT_NHASH; i++) { - LIST_FOREACH_SAFE(pch, &pmcstat_cgnode_hash[i], pch_next, - pchtmp) { - pmcstat_cgnode_free(pch->pch_cgnode); - free(pch); - } - } + if (plugins[args.pa_plugin].pl_shutdown != NULL) + plugins[args.pa_plugin].pl_shutdown(mf); + if (plugins[args.pa_pplugin].pl_shutdown != NULL) + plugins[args.pa_pplugin].pl_shutdown(mf); for (i = 0; i < PMCSTAT_NHASH; i++) { - LIST_FOREACH_SAFE(pi, &pmcstat_image_hash[i], pi_next, pitmp) - { - LIST_FOREACH_SAFE(pgf, &pi->pi_gmlist, pgf_next, - pgftmp) { - if (pgf->pgf_file) - (void) fclose(pgf->pgf_file); - LIST_REMOVE(pgf, pgf_next); - free(pgf); - } - if (pi->pi_symbols) - free(pi->pi_symbols); - + LIST_FOREACH_SAFE(pi, &pmcstat_image_hash[i], pi_next, + pitmp) { + if (plugins[args.pa_plugin].pl_shutdownimage != NULL) + plugins[args.pa_plugin].pl_shutdownimage(pi); + if (plugins[args.pa_pplugin].pl_shutdownimage != NULL) + plugins[args.pa_pplugin].pl_shutdownimage(pi); + + free(pi->pi_symbols); + if (pi->pi_addr2line != NULL) + pclose(pi->pi_addr2line); LIST_REMOVE(pi, pi_next); free(pi); } LIST_FOREACH_SAFE(pp, &pmcstat_process_hash[i], pp_next, pptmp) { + TAILQ_FOREACH_SAFE(ppm, &pp->pp_map, ppm_next, ppmtmp) { + TAILQ_REMOVE(&pp->pp_map, ppm, ppm_next); + free(ppm); + } LIST_REMOVE(pp, pp_next); free(pp); } @@ -2610,23 +2086,23 @@ pmcstat_shutdown_logging(struct pmcstat_args *a) * Print errors unless -q was specified. Print all statistics * if verbosity > 1. */ -#define PRINT(N,V,A) do { \ - if (pmcstat_stats.ps_##V || (A)->pa_verbosity >= 2) \ - (void) fprintf((A)->pa_printfile, " %-40s %d\n",\ +#define PRINT(N,V) do { \ + if (pmcstat_stats.ps_##V || args.pa_verbosity >= 2) \ + (void) fprintf(args.pa_printfile, " %-40s %d\n",\ N, pmcstat_stats.ps_##V); \ } while (0) - if (a->pa_verbosity >= 1 && a->pa_flags & FLAG_DO_GPROF) { - (void) fprintf(a->pa_printfile, "CONVERSION STATISTICS:\n"); - PRINT("#exec/a.out", exec_aout, a); - PRINT("#exec/elf", exec_elf, a); - PRINT("#exec/unknown", exec_indeterminable, a); - PRINT("#exec handling errors", exec_errors, a); - PRINT("#samples/total", samples_total, a); - PRINT("#samples/unclaimed", samples_unknown_offset, a); - PRINT("#samples/unknown-object", samples_indeterminable, a); - PRINT("#callchain/dubious-frames", callchain_dubious_frames, - a); + if (args.pa_verbosity >= 1 && (args.pa_flags & FLAG_DO_ANALYSIS) && + (args.pa_flags & FLAG_DO_TOP) == 0) { + (void) fprintf(args.pa_printfile, "CONVERSION STATISTICS:\n"); + PRINT("#exec/a.out", exec_aout); + PRINT("#exec/elf", exec_elf); + PRINT("#exec/unknown", exec_indeterminable); + PRINT("#exec handling errors", exec_errors); + PRINT("#samples/total", samples_total); + PRINT("#samples/unclaimed", samples_unknown_offset); + PRINT("#samples/unknown-object", samples_indeterminable); + PRINT("#callchain/dubious-frames", callchain_dubious_frames); } if (mf) diff --git a/usr.sbin/pmcstat/pmcstat_log.h b/usr.sbin/pmcstat/pmcstat_log.h new file mode 100644 index 0000000..de92649 --- /dev/null +++ b/usr.sbin/pmcstat/pmcstat_log.h @@ -0,0 +1,196 @@ +/*- + * Copyright (c) 2005-2007, Joseph Koshy + * Copyright (c) 2007 The FreeBSD Foundation + * Copyright (c) 2009, Fabien Thomas + * All rights reserved. + * + * Portions of this software were developed by A. Joseph Koshy under + * sponsorship from the FreeBSD Foundation and Google, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _PMCSTAT_LOG_H_ +#define _PMCSTAT_LOG_H_ + +typedef const void *pmcstat_interned_string; + +/* + * A 'pmcstat_process' structure models processes. Each process is + * associated with a set of pmcstat_pcmap structures that map + * addresses inside it to executable objects. This set is implemented + * as a list, kept sorted in ascending order of mapped addresses. + * + * 'pp_pid' holds the pid of the process. When a process exits, the + * 'pp_isactive' field is set to zero, but the process structure is + * not immediately reclaimed because there may still be samples in the + * log for this process. + */ + +struct pmcstat_process { + LIST_ENTRY(pmcstat_process) pp_next; /* hash-next */ + pid_t pp_pid; /* associated pid */ + int pp_isactive; /* whether active */ + uintfptr_t pp_entryaddr; /* entry address */ + TAILQ_HEAD(,pmcstat_pcmap) pp_map; /* address range map */ +}; +extern LIST_HEAD(pmcstat_process_hash_list, pmcstat_process) pmcstat_process_hash[PMCSTAT_NHASH]; + +/* + * A 'pmcstat_image' structure describes an executable program on + * disk. 'pi_execpath' is a cookie representing the pathname of + * the executable. 'pi_start' and 'pi_end' are the least and greatest + * virtual addresses for the text segments in the executable. + * 'pi_gmonlist' contains a linked list of gmon.out files associated + * with this image. + */ + +enum pmcstat_image_type { + PMCSTAT_IMAGE_UNKNOWN = 0, /* never looked at the image */ + PMCSTAT_IMAGE_INDETERMINABLE, /* can't tell what the image is */ + PMCSTAT_IMAGE_ELF32, /* ELF 32 bit object */ + PMCSTAT_IMAGE_ELF64, /* ELF 64 bit object */ + PMCSTAT_IMAGE_AOUT /* AOUT object */ +}; + +struct pmcstat_image { + LIST_ENTRY(pmcstat_image) pi_next; /* hash link */ + TAILQ_ENTRY(pmcstat_image) pi_lru; /* LRU list */ + pmcstat_interned_string pi_execpath; /* cookie */ + pmcstat_interned_string pi_samplename; /* sample path name */ + pmcstat_interned_string pi_fullpath; /* path to FS object */ + pmcstat_interned_string pi_name; /* display name */ + + enum pmcstat_image_type pi_type; /* executable type */ + + /* + * Executables have pi_start and pi_end; these are zero + * for shared libraries. + */ + uintfptr_t pi_start; /* start address (inclusive) */ + uintfptr_t pi_end; /* end address (exclusive) */ + uintfptr_t pi_entry; /* entry address */ + uintfptr_t pi_vaddr; /* virtual address where loaded */ + int pi_isdynamic; /* whether a dynamic object */ + int pi_iskernelmodule; + pmcstat_interned_string pi_dynlinkerpath; /* path in .interp */ + + /* All symbols associated with this object. */ + struct pmcstat_symbol *pi_symbols; + size_t pi_symcount; + + /* Handle to addr2line for this image. */ + FILE *pi_addr2line; + + /* + * Plugins private data + */ + + /* gprof: + * An image can be associated with one or more gmon.out files; + * one per PMC. + */ + LIST_HEAD(,pmcstat_gmonfile) pi_gmlist; +}; +extern LIST_HEAD(pmcstat_image_hash_list, pmcstat_image) pmcstat_image_hash[PMCSTAT_NHASH]; + +/* + * A 'pmcstat_pcmap' structure maps a virtual address range to an + * underlying 'pmcstat_image' descriptor. + */ +struct pmcstat_pcmap { + TAILQ_ENTRY(pmcstat_pcmap) ppm_next; + uintfptr_t ppm_lowpc; + uintfptr_t ppm_highpc; + struct pmcstat_image *ppm_image; +}; + +/* + * Each function symbol tracked by pmcstat(8). + */ + +struct pmcstat_symbol { + pmcstat_interned_string ps_name; + uint64_t ps_start; + uint64_t ps_end; +}; + +/* + * 'pmcstat_pmcrecord' is a mapping from PMC ids to human-readable + * names. + */ + +struct pmcstat_pmcrecord { + LIST_ENTRY(pmcstat_pmcrecord) pr_next; + pmc_id_t pr_pmcid; + int pr_pmcin; + pmcstat_interned_string pr_pmcname; + struct pmcstat_pmcrecord *pr_merge; +}; +extern LIST_HEAD(pmcstat_pmcs, pmcstat_pmcrecord) pmcstat_pmcs; /* PMC list */ + +/* + * Misc. statistics + */ +struct pmcstat_stats { + int ps_exec_aout; /* # a.out executables seen */ + int ps_exec_elf; /* # elf executables seen */ + int ps_exec_errors; /* # errors processing executables */ + int ps_exec_indeterminable; /* # unknown executables seen */ + int ps_samples_total; /* total number of samples processed */ + int ps_samples_skipped; /* #samples filtered out for any reason */ + int ps_samples_unknown_offset; /* #samples of rank 0 not in a map */ + int ps_samples_indeterminable; /* #samples in indeterminable images */ + int ps_callchain_dubious_frames;/* #dubious frame pointers seen */ +}; +extern struct pmcstat_stats pmcstat_stats; /* statistics */ + +extern struct pmcstat_process *pmcstat_kernproc; /* kernel 'process' */ + +extern int pmcstat_npmcs; /* PMC count. */ + +/* + * Top mode global options. + */ +float pmcstat_threshold; /* Threshold to filter node. */ +int pmcstat_pmcinfilter; /* PMC index displayed. */ + +/* Function prototypes */ +const char *pmcstat_pmcid_to_name(pmc_id_t _pmcid); +const char *pmcstat_pmcindex_to_name(int pmcin); +struct pmcstat_pmcrecord *pmcstat_pmcindex_to_pmcr(int pmcin); +struct pmcstat_pcmap *pmcstat_process_find_map(struct pmcstat_process *_p, + uintfptr_t _pc); +struct pmcstat_symbol *pmcstat_symbol_search(struct pmcstat_image *image, + uintfptr_t addr); +const char *pmcstat_string_unintern(pmcstat_interned_string _is); +pmcstat_interned_string pmcstat_string_intern(const char *_s); +void pmcstat_image_determine_type(struct pmcstat_image *_image); +pmcstat_interned_string pmcstat_string_lookup(const char *_s); +int pmcstat_image_addr2line(struct pmcstat_image *image, uintfptr_t addr, + char *sourcefile, size_t sourcefile_len, unsigned *sourceline, + char *funcname, size_t funcname_len); + +#endif /* _PMCSTAT_LOG_H_ */ + diff --git a/usr.sbin/pmcstat/pmcstat_top.h b/usr.sbin/pmcstat/pmcstat_top.h new file mode 100644 index 0000000..281410b --- /dev/null +++ b/usr.sbin/pmcstat/pmcstat_top.h @@ -0,0 +1,75 @@ +/*- + * Copyright (c) 2009, Fabien Thomas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _PMCSTAT_TOP_H_ +#define _PMCSTAT_TOP_H_ + +/* Return the ncurses attributes for the given value. */ +#define PMCSTAT_ATTRPERCENT(b) \ + ((b) > 10.0 ? (args.pa_topcolor ? COLOR_PAIR(1) : A_BOLD) : \ + ((b) > 5.0 ? (args.pa_topcolor ? COLOR_PAIR(2) : 0) : \ + ((b) > 2.5 ? (args.pa_topcolor ? COLOR_PAIR(3) : 0) : 0))) + +/* Print to the default ncurse windows if on a terminal or to the file. */ +#define PMCSTAT_PRINTW(...) do { \ + if (args.pa_toptty) \ + printw(__VA_ARGS__); \ + else \ + fprintf(args.pa_printfile, __VA_ARGS__);\ +} while (0) + +/* If ncurses mode active set attributes. */ +#define PMCSTAT_ATTRON(b) do { \ + if (args.pa_toptty) \ + attron(b); \ +} while (0) + +/* If ncurses mode active unset attributes. */ +#define PMCSTAT_ATTROFF(b) do { \ + if (args.pa_toptty) \ + attroff(b); \ +} while (0) + +/* Erase screen and set cursor to top left. */ +#define PMCSTAT_PRINTBEGIN() do { \ + if (args.pa_toptty) \ + clear(); \ +} while (0) + +/* Flush buffer to backend. */ +#define PMCSTAT_PRINTEND() do { \ + if (!args.pa_toptty) { \ + PMCSTAT_PRINTW("---\n"); \ + fflush(args.pa_printfile); \ + } else \ + refresh(); \ +} while (0) + +/* Function prototypes */ + +#endif /* _PMCSTAT_TOP_H_ */ -- cgit v1.1 From 47627f3cde9fe68e66869f9d184729218ba667bf Mon Sep 17 00:00:00 2001 From: fabient Date: Fri, 5 Mar 2010 22:52:41 +0000 Subject: MFC 204329 partially: Fixed dependencies (make checkdpadd). --- usr.sbin/pmcstat/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/pmcstat/Makefile b/usr.sbin/pmcstat/Makefile index 4412e24..2bd8133 100644 --- a/usr.sbin/pmcstat/Makefile +++ b/usr.sbin/pmcstat/Makefile @@ -5,7 +5,7 @@ PROG= pmcstat MAN= pmcstat.8 -DPADD= ${LIBELF} ${LIBKVM} ${LIBPMC} ${LIBM} +DPADD= ${LIBELF} ${LIBKVM} ${LIBPMC} ${LIBM} ${LIBNCURSES} LDADD= -lelf -lkvm -lpmc -lm -lncurses WARNS?= 6 -- cgit v1.1 From e7448a3e1deabc16833dd8098bd6b680787b4df6 Mon Sep 17 00:00:00 2001 From: fabient Date: Mon, 8 Mar 2010 07:53:44 +0000 Subject: MFC r204783: Bug fixed: - no display on serial terminal in top mode. - display alignment for continuation string. - correct invalid value used for display limit. --- usr.sbin/pmcstat/pmcpl_callgraph.c | 2 +- usr.sbin/pmcstat/pmcpl_calltree.c | 2 +- usr.sbin/pmcstat/pmcstat.c | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/pmcstat/pmcpl_callgraph.c b/usr.sbin/pmcstat/pmcpl_callgraph.c index d6f1a9d..84e2819 100644 --- a/usr.sbin/pmcstat/pmcpl_callgraph.c +++ b/usr.sbin/pmcstat/pmcpl_callgraph.c @@ -550,7 +550,7 @@ pmcstat_cgnode_topprint(struct pmcstat_cgnode *cg, len = ns_len + vs_len + 1; if (width - len < 0) { - PMCSTAT_PRINTW("..."); + PMCSTAT_PRINTW(" ..."); break; } width -= len; diff --git a/usr.sbin/pmcstat/pmcpl_calltree.c b/usr.sbin/pmcstat/pmcpl_calltree.c index 498092d..404653e 100644 --- a/usr.sbin/pmcstat/pmcpl_calltree.c +++ b/usr.sbin/pmcstat/pmcpl_calltree.c @@ -387,7 +387,7 @@ pmcpl_ct_node_dumptop(int pmcin, struct pmcpl_ct_node *ct, if (ct->pct_narc == 0) { pmcpl_ct_topscreen[x+1][*y] = NULL; if (*y >= PMCPL_CT_MAXLINE || - *y >= pmcstat_displaywidth) + *y >= pmcstat_displayheight) return 1; *y = *y + 1; for (i=0; i < x; i++) diff --git a/usr.sbin/pmcstat/pmcstat.c b/usr.sbin/pmcstat/pmcstat.c index a73d293..89ec8f0 100644 --- a/usr.sbin/pmcstat/pmcstat.c +++ b/usr.sbin/pmcstat/pmcstat.c @@ -1311,6 +1311,9 @@ main(int argc, char **argv) intrflush(stdscr, FALSE); keypad(stdscr, TRUE); clear(); + /* Get terminal width / height with ncurses. */ + getmaxyx(stdscr, pmcstat_displayheight, pmcstat_displaywidth); + pmcstat_displayheight--; pmcstat_displaywidth--; atexit(pmcstat_topexit); } } -- cgit v1.1 From 4abad1cd79a7ea67ffa645d7a07cd9f8f21137cc Mon Sep 17 00:00:00 2001 From: fabient Date: Thu, 11 Mar 2010 07:36:45 +0000 Subject: MFC r204878: Change the way shutdown is handled for log file. pmc_flush_logfile is now non-blocking and just ask the kernel to shutdown the file. From that point, no more data is accepted by the log thread and when the last buffer is flushed the file is closed. This will remove a deadlock between pmcstat asking for flush while it cannot flush the pipe itself. --- usr.sbin/pmcstat/pmcstat_log.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/pmcstat/pmcstat_log.c b/usr.sbin/pmcstat/pmcstat_log.c index 5811af3..fc26001 100644 --- a/usr.sbin/pmcstat/pmcstat_log.c +++ b/usr.sbin/pmcstat/pmcstat_log.c @@ -1670,10 +1670,8 @@ pmcstat_print_log(void) int pmcstat_close_log(void) { - if (pmc_flush_logfile() < 0 || - pmc_configure_logfile(-1) < 0) + if (pmc_flush_logfile() < 0) err(EX_OSERR, "ERROR: logging failed"); - args.pa_flags &= ~(FLAG_HAS_OUTPUT_LOGFILE | FLAG_HAS_PIPE); return (args.pa_flags & FLAG_HAS_PIPE ? PMCSTAT_EXITING : PMCSTAT_FINISHED); } -- cgit v1.1 From a08df8acc9cf8fa82aa9758edb9c57e8570a8ced Mon Sep 17 00:00:00 2001 From: joerg Date: Tue, 16 Mar 2010 05:13:20 +0000 Subject: r205170: then -> than --- usr.sbin/powerd/powerd.8 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/powerd/powerd.8 b/usr.sbin/powerd/powerd.8 index 5808862..2d6acff 100644 --- a/usr.sbin/powerd/powerd.8 +++ b/usr.sbin/powerd/powerd.8 @@ -58,7 +58,7 @@ the system appears idle and increasing it when the system is busy. It offers a good balance between a small performance loss for greatly increased power savings. Hiadaptive mode is like adaptive mode, but tuned for systems where -performance and interactivity are more important then power consumption. +performance and interactivity are more important than power consumption. It increases frequency faster, reduces the frequency less aggressively and will maintain full frequency for longer. The default mode is adaptive for battery power and hiadaptive for the rest. -- cgit v1.1 From 21a8fe512b800288e4f0ca316e4f8652b6b5a2aa Mon Sep 17 00:00:00 2001 From: dougb Date: Wed, 17 Mar 2010 07:26:00 +0000 Subject: MFC r205145: Make it more clear in the docs that -a is not compatible with -iFU, and enforce this in the code. Apparently a lot of users mistakenly combine -a with these flags and are then mystified that no changes were made. While I'm here, fix a trailing space in mergemaster.8 --- usr.sbin/mergemaster/mergemaster.8 | 15 +++++++++------ usr.sbin/mergemaster/mergemaster.sh | 14 +++++++++++++- 2 files changed, 22 insertions(+), 7 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/mergemaster/mergemaster.8 b/usr.sbin/mergemaster/mergemaster.8 index c9217b3..a8bafb7 100644 --- a/usr.sbin/mergemaster/mergemaster.8 +++ b/usr.sbin/mergemaster/mergemaster.8 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd January 7, 2010 +.Dd March 13, 2010 .Dt MERGEMASTER 8 .Os .Sh NAME @@ -32,7 +32,8 @@ .Nd merge configuration files, et al during an upgrade .Sh SYNOPSIS .Nm -.Op Fl scrvahipFCPU +.Op Fl scrvhpCP +.Op Fl a|iFU .Op Fl m Ar /path/to/sources .Op Fl t Ar /path/to/temp/root .Op Fl d @@ -168,7 +169,7 @@ and therefore can override both files. When the comparison is done if there are any files remaining in the temproot directory they will be listed, and if the .Fl a -option is not in use the user will be given the option of +option is not in use the user will be given the option of deleting the temproot directory. If there are no files remaining in the temproot directory it will be deleted. @@ -206,9 +207,11 @@ If the directory exists, it creates a new one in a previously non-existent directory. This option unsets the verbose flag, -but other than -.Fl U -it is compatible with all other options. +and is not compatible with +.Fl i , +.Fl F , +or +.Fl U . Setting .Fl a makes diff --git a/usr.sbin/mergemaster/mergemaster.sh b/usr.sbin/mergemaster/mergemaster.sh index d21d80b..43126ae 100755 --- a/usr.sbin/mergemaster/mergemaster.sh +++ b/usr.sbin/mergemaster/mergemaster.sh @@ -15,7 +15,7 @@ PATH=/bin:/usr/bin:/usr/sbin display_usage () { VERSION_NUMBER=`grep "[$]FreeBSD:" $0 | cut -d ' ' -f 4` echo "mergemaster version ${VERSION_NUMBER}" - echo 'Usage: mergemaster [-scrvahipFCPU]' + echo 'Usage: mergemaster [-scrvhpCP] [-a|[-iFU]]' echo ' [-m /path] [-t /path] [-d] [-u N] [-w N] [-A arch] [-D /path]' echo "Options:" echo " -s Strict comparison (diff every pair of files)" @@ -337,6 +337,18 @@ while getopts ":ascrvhipCPm:t:du:w:D:A:FU" COMMAND_LINE_ARGUMENT ; do esac done +if [ -n "$AUTO_RUN" ]; then + if [ -n "$FREEBSD_ID" -o -n "$AUTO_UPGRADE" -o -n "$AUTO_INSTALL" ]; then + echo '' + echo "*** You have included the -a option along with one or more options" + echo ' that indicate that you wish mergemaster to actually make updates' + echo ' (-F, -U, or -i), however these options are not compatible.' + echo ' Please read mergemaster(8) for more information.' + echo '' + exit 1 + fi +fi + # Assign the location of the mtree database # MTREEDB=${MTREEDB:-${DESTDIR}/var/db} -- cgit v1.1 From 237d6f0e4a51d1e1fc9572f2c25fb79712126d1f Mon Sep 17 00:00:00 2001 From: qingli Date: Mon, 22 Mar 2010 23:33:40 +0000 Subject: MFC r205272 Need to set the proper flag bit when inserting ARP entries into the kernel. --- usr.sbin/ppp/arp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/ppp/arp.c b/usr.sbin/ppp/arp.c index 02dce51..e67f89b 100644 --- a/usr.sbin/ppp/arp.c +++ b/usr.sbin/ppp/arp.c @@ -119,7 +119,7 @@ arp_ProxySub(struct bundle *bundle, struct in_addr addr, int add) return 0; } arpmsg.hdr.rtm_type = add ? RTM_ADD : RTM_DELETE; - arpmsg.hdr.rtm_flags = RTF_ANNOUNCE | RTF_HOST | RTF_STATIC; + arpmsg.hdr.rtm_flags = RTF_ANNOUNCE | RTF_HOST | RTF_STATIC | RTF_LLDATA; arpmsg.hdr.rtm_version = RTM_VERSION; arpmsg.hdr.rtm_seq = ++bundle->routing_seq; arpmsg.hdr.rtm_addrs = RTA_DST | RTA_GATEWAY; -- cgit v1.1 From c6ee0bbd3bad420718c2502836809db8bc41ee39 Mon Sep 17 00:00:00 2001 From: ed Date: Wed, 24 Mar 2010 12:11:59 +0000 Subject: MFC r205296: Properly progress through the list of IPv6 addresses using in6_addr size. Right now if a jail has multiple IPv6 addresses, it will print them shifting only 4 bytes at a time. Example: 2001:4dd0:ff41::b23f:a9 2001:4dd0:ff41::b23f:aa Becomes: 2001:4dd0:ff41::b23f:a9 ff41::b23f:a9:2001:4dd0 By casting to in6_addr, it uses the correct offsets. --- usr.sbin/jls/jls.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/jls/jls.c b/usr.sbin/jls/jls.c index 3f4800c..a3a23b7 100644 --- a/usr.sbin/jls/jls.c +++ b/usr.sbin/jls/jls.c @@ -355,7 +355,7 @@ print_jail(int pflags, int jflags) count = params[7].jp_valuelen / sizeof(struct in6_addr); for (ai = 0; ai < count; ai++) if (inet_ntop(AF_INET6, - &((struct in_addr *)params[7].jp_value)[ai], + &((struct in6_addr *)params[7].jp_value)[ai], ipbuf, sizeof(ipbuf)) == NULL) err(1, "inet_ntop"); else -- cgit v1.1 From 8892b138cd33cfb6259be43fdd8377faaa820dfe Mon Sep 17 00:00:00 2001 From: gavin Date: Thu, 25 Mar 2010 12:56:20 +0000 Subject: Merge r204165 from head: Add a "-x" option to chown(8)/chgrp(1) similar to the same option in du(1), cp(1) etc, to prevent the crossing of mountpoints whilst using the commands recursively. PR: bin/130855 Submitted by: keramida --- usr.sbin/chown/chgrp.1 | 10 +++++++--- usr.sbin/chown/chown.8 | 12 ++++++++---- usr.sbin/chown/chown.c | 17 +++++++++++------ 3 files changed, 26 insertions(+), 13 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/chown/chgrp.1 b/usr.sbin/chown/chgrp.1 index 71b6806..8a4c271 100644 --- a/usr.sbin/chown/chgrp.1 +++ b/usr.sbin/chown/chgrp.1 @@ -31,7 +31,7 @@ .\" @(#)chgrp.1 8.3 (Berkeley) 3/31/94 .\" $FreeBSD$ .\" -.Dd April 25, 2003 +.Dd February 21, 2010 .Dt CHGRP 1 .Os .Sh NAME @@ -39,7 +39,7 @@ .Nd change group .Sh SYNOPSIS .Nm -.Op Fl fhv +.Op Fl fhvx .Oo .Fl R .Op Fl H | Fl L | Fl P @@ -89,6 +89,8 @@ If the flag is specified more than once, .Nm will print the filename, followed by the old and new numeric group ID. +.It Fl x +File system mount points are not traversed. .El .Pp The @@ -125,7 +127,9 @@ In previous versions of this system, symbolic links did not have groups. .Pp The .Fl v -option is non-standard and its use in scripts is not recommended. +and +.Fl x +options are non-standard and their use in scripts is not recommended. .Sh SEE ALSO .Xr chown 2 , .Xr fts 3 , diff --git a/usr.sbin/chown/chown.8 b/usr.sbin/chown/chown.8 index f617f73..b5882a3 100644 --- a/usr.sbin/chown/chown.8 +++ b/usr.sbin/chown/chown.8 @@ -28,7 +28,7 @@ .\" @(#)chown.8 8.3 (Berkeley) 3/31/94 .\" $FreeBSD$ .\" -.Dd April 25, 2003 +.Dd February 21, 2010 .Dt CHOWN 8 .Os .Sh NAME @@ -36,7 +36,7 @@ .Nd change file owner and group .Sh SYNOPSIS .Nm -.Op Fl fhv +.Op Fl fhvx .Oo .Fl R .Op Fl H | Fl L | Fl P @@ -44,7 +44,7 @@ .Ar owner Ns Op : Ns Ar group .Ar .Nm -.Op Fl fhv +.Op Fl fhvx .Oo .Fl R .Op Fl H | Fl L | Fl P @@ -97,6 +97,8 @@ If the flag is specified more than once, .Nm will print the filename, followed by the old and new numeric user/group ID. +.It Fl x +File system mount points are not traversed. .El .Pp The @@ -146,7 +148,9 @@ owners. .Pp The .Fl v -option is non-standard and its use in scripts is not recommended. +and +.Fl x +options are non-standard and their use in scripts is not recommended. .Sh SEE ALSO .Xr chgrp 1 , .Xr find 1 , diff --git a/usr.sbin/chown/chown.c b/usr.sbin/chown/chown.c index b79deca..7a22292 100644 --- a/usr.sbin/chown/chown.c +++ b/usr.sbin/chown/chown.c @@ -73,14 +73,14 @@ main(int argc, char **argv) { FTS *ftsp; FTSENT *p; - int Hflag, Lflag, Rflag, fflag, hflag, vflag; + int Hflag, Lflag, Rflag, fflag, hflag, vflag, xflag; int ch, fts_options, rval; char *cp; ischown = (strcmp(basename(argv[0]), "chown") == 0); - Hflag = Lflag = Rflag = fflag = hflag = vflag = 0; - while ((ch = getopt(argc, argv, "HLPRfhv")) != -1) + Hflag = Lflag = Rflag = fflag = hflag = vflag = xflag = 0; + while ((ch = getopt(argc, argv, "HLPRfhvx")) != -1) switch (ch) { case 'H': Hflag = 1; @@ -105,6 +105,9 @@ main(int argc, char **argv) case 'v': vflag++; break; + case 'x': + xflag = 1; + break; case '?': default: usage(); @@ -128,6 +131,8 @@ main(int argc, char **argv) } } else fts_options = hflag ? FTS_PHYSICAL : FTS_LOGICAL; + if (xflag) + fts_options |= FTS_XDEV; uid = (uid_t)-1; gid = (gid_t)-1; @@ -301,11 +306,11 @@ usage(void) if (ischown) (void)fprintf(stderr, "%s\n%s\n", - "usage: chown [-fhv] [-R [-H | -L | -P]] owner[:group]" + "usage: chown [-fhvx] [-R [-H | -L | -P]] owner[:group]" " file ...", - " chown [-fhv] [-R [-H | -L | -P]] :group file ..."); + " chown [-fhvx] [-R [-H | -L | -P]] :group file ..."); else (void)fprintf(stderr, "%s\n", - "usage: chgrp [-fhv] [-R [-H | -L | -P]] group file ..."); + "usage: chgrp [-fhvx] [-R [-H | -L | -P]] group file ..."); exit(1); } -- cgit v1.1 From 6330fbd3f2df13a1e7bdf2e591b70e291e1eac9b Mon Sep 17 00:00:00 2001 From: fabient Date: Mon, 29 Mar 2010 06:57:43 +0000 Subject: MFC r205693: Do not overflow the term in the case of multi-line display. --- usr.sbin/pmcstat/pmcpl_calltree.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/pmcstat/pmcpl_calltree.c b/usr.sbin/pmcstat/pmcpl_calltree.c index 404653e..aac7913 100644 --- a/usr.sbin/pmcstat/pmcpl_calltree.c +++ b/usr.sbin/pmcstat/pmcpl_calltree.c @@ -366,7 +366,7 @@ pmcpl_ct_node_cleartag(void) static int pmcpl_ct_node_dumptop(int pmcin, struct pmcpl_ct_node *ct, - struct pmcpl_ct_sample *rsamples, int x, int *y) + struct pmcpl_ct_sample *rsamples, int x, int *y, int maxy) { int i; @@ -387,7 +387,7 @@ pmcpl_ct_node_dumptop(int pmcin, struct pmcpl_ct_node *ct, if (ct->pct_narc == 0) { pmcpl_ct_topscreen[x+1][*y] = NULL; if (*y >= PMCPL_CT_MAXLINE || - *y >= pmcstat_displayheight) + *y >= maxy) return 1; *y = *y + 1; for (i=0; i < x; i++) @@ -407,7 +407,7 @@ pmcpl_ct_node_dumptop(int pmcin, struct pmcpl_ct_node *ct, &ct->pct_arc[i].pcta_samples) > pmcstat_threshold) { if (pmcpl_ct_node_dumptop(pmcin, ct->pct_arc[i].pcta_child, - rsamples, x+1, y)) + rsamples, x+1, y, maxy)) return 1; } } @@ -472,6 +472,9 @@ pmcpl_ct_node_printtop(struct pmcpl_ct_sample *rsamples, int pmcin, int maxy) /* Check for line wrap. */ width += ns_len + is_len + vs_len + 1; if (width >= pmcstat_displaywidth) { + maxy--; + if (y >= maxy) + break; PMCSTAT_PRINTW("\n%*s", indentwidth, space); width = indentwidth + ns_len + is_len + vs_len; } @@ -515,7 +518,7 @@ pmcpl_ct_topdisplay(void) for (i = 0; i < pmcpl_ct_root->pct_narc; i++) { if (pmcpl_ct_node_dumptop(pmcin, pmcpl_ct_root->pct_arc[i].pcta_child, - &rsamples, x, &y)) { + &rsamples, x, &y, pmcstat_displayheight - 2)) { break; } } -- cgit v1.1 From 483b284ddbac32116f5db84d4d3c6e61c8efd5b0 Mon Sep 17 00:00:00 2001 From: fabient Date: Wed, 31 Mar 2010 07:10:40 +0000 Subject: MFC r205809: Wait for pmc name in the log before displaying data. This will solve an abort in case of low throughput PMCs. --- usr.sbin/pmcstat/pmcstat_log.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/pmcstat/pmcstat_log.c b/usr.sbin/pmcstat/pmcstat_log.c index fc26001..9b524ff 100644 --- a/usr.sbin/pmcstat/pmcstat_log.c +++ b/usr.sbin/pmcstat/pmcstat_log.c @@ -1049,7 +1049,6 @@ pmcstat_pmcindex_to_name(int pmcin) if (pr->pr_pmcin == pmcin) return pmcstat_string_unintern(pr->pr_pmcname); - err(EX_SOFTWARE, "ERROR: cannot find pmcid name"); return NULL; } @@ -1789,19 +1788,23 @@ static void pmcstat_refresh_top(void) { char pmcname[40]; + const char *s; /* If in pause mode do not refresh display. */ if (pmcstat_pause) return; + /* Wait until PMC pop in the log. */ + s = pmcstat_pmcindex_to_name(pmcstat_pmcinfilter); + if (s == NULL) + return; + /* Format PMC name. */ if (pmcstat_mergepmc) - snprintf(pmcname, sizeof(pmcname), "[%s]", - pmcstat_pmcindex_to_name(pmcstat_pmcinfilter)); + snprintf(pmcname, sizeof(pmcname), "[%s]", s); else snprintf(pmcname, sizeof(pmcname), "%s.%d", - pmcstat_pmcindex_to_name(pmcstat_pmcinfilter), - pmcstat_pmcinfilter); + s, pmcstat_pmcinfilter); PMCSTAT_PRINTBEGIN(); PMCSTAT_PRINTW("PMC: %s Samples: %u processed, %u invalid\n\n", -- cgit v1.1 From 68a882bd5a8526479bf8001d0b3403ee7a2bfaaf Mon Sep 17 00:00:00 2001 From: jkim Date: Wed, 31 Mar 2010 16:01:48 +0000 Subject: MFC: r205855 Print memory model of the video mode except for planar memory model. 'P', 'D', 'C', 'H', and 'V' mean packed pixel, direct color, CGA, Hercules, and VGA X memory models respectively where they have fixed number of planes. --- usr.sbin/vidcontrol/vidcontrol.c | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/vidcontrol/vidcontrol.c b/usr.sbin/vidcontrol/vidcontrol.c index 847ff3e..c419c14 100644 --- a/usr.sbin/vidcontrol/vidcontrol.c +++ b/usr.sbin/vidcontrol/vidcontrol.c @@ -950,10 +950,11 @@ show_adapter_info(void) static void show_mode_info(void) { - struct video_info _info; char buf[80]; - int mode; + struct video_info _info; int c; + int mm; + int mode; printf(" mode# flags type size " "font window linear buffer\n"); @@ -972,9 +973,35 @@ show_mode_info(void) if (_info.vi_flags & V_INFO_GRAPHICS) { c = 'G'; - snprintf(buf, sizeof(buf), "%dx%dx%d %d", - _info.vi_width, _info.vi_height, - _info.vi_depth, _info.vi_planes); + if (_info.vi_mem_model == V_INFO_MM_PLANAR) + snprintf(buf, sizeof(buf), "%dx%dx%d %d", + _info.vi_width, _info.vi_height, + _info.vi_depth, _info.vi_planes); + else { + switch (_info.vi_mem_model) { + case V_INFO_MM_PACKED: + mm = 'P'; + break; + case V_INFO_MM_DIRECT: + mm = 'D'; + break; + case V_INFO_MM_CGA: + mm = 'C'; + break; + case V_INFO_MM_HGC: + mm = 'H'; + break; + case V_INFO_MM_VGAX: + mm = 'V'; + break; + default: + mm = ' '; + break; + } + snprintf(buf, sizeof(buf), "%dx%dx%d %c", + _info.vi_width, _info.vi_height, + _info.vi_depth, mm); + } } else { c = 'T'; -- cgit v1.1 From 82edcf996485c20678054c8958d204c7e46fb35f Mon Sep 17 00:00:00 2001 From: np Date: Mon, 5 Apr 2010 23:55:04 +0000 Subject: MFC r204267: Allow cxgbtool to build with WARNS=6 --- usr.sbin/cxgbtool/cxgbtool.c | 205 ++++++++++++++++++++++++--------------- usr.sbin/cxgbtool/reg_defs.c | 22 ++--- usr.sbin/cxgbtool/reg_defs_t3.c | 48 ++++----- usr.sbin/cxgbtool/reg_defs_t3b.c | 48 ++++----- usr.sbin/cxgbtool/reg_defs_t3c.c | 48 ++++----- 5 files changed, 211 insertions(+), 160 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/cxgbtool/cxgbtool.c b/usr.sbin/cxgbtool/cxgbtool.c index 8d7e677..28d91f9 100644 --- a/usr.sbin/cxgbtool/cxgbtool.c +++ b/usr.sbin/cxgbtool/cxgbtool.c @@ -85,7 +85,8 @@ struct reg_info { static const char *progname; -static void __attribute__((noreturn)) usage(FILE *fp) +static void +usage(FILE *fp) { fprintf(fp, "Usage: %s [operation]\n", progname); fprintf(fp, @@ -136,7 +137,8 @@ doit(const char *iff_name, unsigned long cmd, void *data) return ioctl(fd, cmd, data) < 0 ? -1 : 0; } -static int get_int_arg(const char *s, uint32_t *valp) +static int +get_int_arg(const char *s, uint32_t *valp) { char *p; @@ -172,11 +174,12 @@ write_reg(const char *iff_name, uint32_t addr, uint32_t val) err(1, "register write"); } -static int register_io(int argc, char *argv[], int start_arg, +static int +register_io(int argc, char *argv[], int start_arg, const char *iff_name) { char *p; - uint32_t addr, val = 0, write = 0; + uint32_t addr, val = 0, w = 0; if (argc != start_arg + 1) return -1; @@ -184,14 +187,14 @@ static int register_io(int argc, char *argv[], int start_arg, if (p == argv[start_arg]) return -1; if (*p == '=' && p[1]) { val = strtoul(p + 1, &p, 0); - write = 1; + w = 1; } if (*p) { warnx("bad parameter \"%s\"", argv[start_arg]); return -1; } - if (write) + if (w) write_reg(iff_name, addr, val); else { val = read_reg(iff_name, addr); @@ -200,9 +203,9 @@ static int register_io(int argc, char *argv[], int start_arg, return 0; } -static int mdio_io(int argc, char *argv[], int start_arg, const char *iff_name) +static int +mdio_io(int argc, char *argv[], int start_arg, const char *iff_name) { - struct ifreq ifr; struct ch_mii_data p; unsigned int cmd, phy_addr, reg, mmd, val; @@ -230,12 +233,14 @@ static int mdio_io(int argc, char *argv[], int start_arg, const char *iff_name) return 0; } -static inline uint32_t xtract(uint32_t val, int shift, int len) +static inline +uint32_t xtract(uint32_t val, int shift, int len) { return (val >> shift) & ((1 << len) - 1); } -static int dump_block_regs(const struct reg_info *reg_array, uint32_t *regs) +static int +dump_block_regs(const struct reg_info *reg_array, uint32_t *regs) { uint32_t reg_val = 0; // silence compiler warning @@ -254,7 +259,8 @@ static int dump_block_regs(const struct reg_info *reg_array, uint32_t *regs) return 1; } -static int dump_regs_t2(int argc, char *argv[], int start_arg, uint32_t *regs) +static int +dump_regs_t2(int argc, char *argv[], int start_arg, uint32_t *regs) { int match = 0; char *block_name = NULL; @@ -292,8 +298,8 @@ static int dump_regs_t2(int argc, char *argv[], int start_arg, uint32_t *regs) } #if defined(CONFIG_T3_REGS) -static int dump_regs_t3(int argc, char *argv[], int start_arg, uint32_t *regs, - int is_pcie) +static int +dump_regs_t3(int argc, char *argv[], int start_arg, uint32_t *regs, int is_pcie) { int match = 0; char *block_name = NULL; @@ -353,8 +359,9 @@ static int dump_regs_t3(int argc, char *argv[], int start_arg, uint32_t *regs, return 0; } -static int dump_regs_t3b(int argc, char *argv[], int start_arg, uint32_t *regs, - int is_pcie) +static int +dump_regs_t3b(int argc, char *argv[], int start_arg, uint32_t *regs, + int is_pcie) { int match = 0; char *block_name = NULL; @@ -414,8 +421,9 @@ static int dump_regs_t3b(int argc, char *argv[], int start_arg, uint32_t *regs, return 0; } -static int dump_regs_t3c(int argc, char *argv[], int start_arg, uint32_t *regs, - int is_pcie) +static int +dump_regs_t3c(int argc, char *argv[], int start_arg, uint32_t *regs, + int is_pcie) { int match = 0; char *block_name = NULL; @@ -479,7 +487,7 @@ static int dump_regs_t3c(int argc, char *argv[], int start_arg, uint32_t *regs, static int dump_regs(int argc, char *argv[], int start_arg, const char *iff_name) { - int i, vers, revision, is_pcie; + int vers, revision, is_pcie; struct ch_ifconf_regs regs; regs.len = REGDUMP_SIZE; @@ -514,7 +522,8 @@ dump_regs(int argc, char *argv[], int start_arg, const char *iff_name) return 0; } -static int t3_meminfo(const uint32_t *regs) +static int +t3_meminfo(const uint32_t *regs) { enum { SG_EGR_CNTX_BADDR = 0x58, @@ -592,11 +601,16 @@ static int t3_meminfo(const uint32_t *regs) return 0; } -static int meminfo(int argc, char *argv[], int start_arg, const char *iff_name) +static int +meminfo(int argc, char *argv[], int start_arg, const char *iff_name) { int vers; struct ch_ifconf_regs regs; + (void) argc; + (void) argv; + (void) start_arg; + regs.len = REGDUMP_SIZE; if ((regs.data = malloc(regs.len)) == NULL) err(1, "can't malloc"); @@ -612,11 +626,11 @@ static int meminfo(int argc, char *argv[], int start_arg, const char *iff_name) return 0; } -static int mtu_tab_op(int argc, char *argv[], int start_arg, - const char *iff_name) +static int +mtu_tab_op(int argc, char *argv[], int start_arg, const char *iff_name) { struct ch_mtus m; - int i; + unsigned int i; if (argc == start_arg) { if (doit(iff_name, CHELSIO_GETMTUTAB, &m) < 0) @@ -649,13 +663,14 @@ static int mtu_tab_op(int argc, char *argv[], int start_arg, } #ifdef CHELSIO_INTERNAL -static void show_egress_cntxt(uint32_t data[]) +static void +show_egress_cntxt(uint32_t data[]) { printf("credits: %u\n", data[0] & 0x7fff); printf("GTS: %u\n", (data[0] >> 15) & 1); printf("index: %u\n", data[0] >> 16); printf("queue size: %u\n", data[1] & 0xffff); - printf("base address: 0x%llx\n", + printf("base address: 0x%" PRIx64 "\n", ((data[1] >> 16) | ((uint64_t)data[2] << 16) | (((uint64_t)data[3] & 0xf) << 48)) << 12); printf("rsp queue #: %u\n", (data[3] >> 4) & 7); @@ -667,9 +682,10 @@ static void show_egress_cntxt(uint32_t data[]) printf("valid: %u\n", (data[3] >> 31) & 1); } -static void show_fl_cntxt(uint32_t data[]) +static void +show_fl_cntxt(uint32_t data[]) { - printf("base address: 0x%llx\n", + printf("base address: 0x%" PRIx64 "\n", ((uint64_t)data[0] | ((uint64_t)data[1] & 0xfffff) << 32) << 12); printf("index: %u\n", (data[1] >> 20) | ((data[2] & 0xf) << 12)); printf("queue size: %u\n", (data[2] >> 4) & 0xffff); @@ -680,11 +696,12 @@ static void show_fl_cntxt(uint32_t data[]) printf("GTS: %u\n", (data[3] >> 31) & 1); } -static void show_response_cntxt(uint32_t data[]) +static void +show_response_cntxt(uint32_t data[]) { printf("index: %u\n", data[0] & 0xffff); printf("size: %u\n", data[0] >> 16); - printf("base address: 0x%llx\n", + printf("base address: 0x%" PRIx64 "\n", ((uint64_t)data[1] | ((uint64_t)data[2] & 0xfffff) << 32) << 12); printf("MSI-X/RspQ: %u\n", (data[2] >> 20) & 0x3f); printf("intr enable: %u\n", (data[2] >> 26) & 1); @@ -694,11 +711,12 @@ static void show_response_cntxt(uint32_t data[]) printf("FL threshold: %u\n", data[3]); } -static void show_cq_cntxt(uint32_t data[]) +static void +show_cq_cntxt(uint32_t data[]) { printf("index: %u\n", data[0] & 0xffff); printf("size: %u\n", data[0] >> 16); - printf("base address: 0x%llx\n", + printf("base address: 0x%" PRIx64 "\n", ((uint64_t)data[1] | ((uint64_t)data[2] & 0xfffff) << 32) << 12); printf("rsp queue #: %u\n", (data[2] >> 20) & 0x3f); printf("AN: %u\n", (data[2] >> 26) & 1); @@ -710,8 +728,8 @@ static void show_cq_cntxt(uint32_t data[]) printf("credit threshold: %u\n", data[3] >> 16); } -static int get_sge_context(int argc, char *argv[], int start_arg, - const char *iff_name) +static int +get_sge_context(int argc, char *argv[], int start_arg, const char *iff_name) { struct ch_cntxt ctx; @@ -750,8 +768,8 @@ static int get_sge_context(int argc, char *argv[], int start_arg, #define ntohll(x) be64toh((x)) -static int get_sge_desc(int argc, char *argv[], int start_arg, - const char *iff_name) +static int +get_sge_desc(int argc, char *argv[], int start_arg, const char *iff_name) { uint64_t *p, wr_hdr; unsigned int n = 1, qset, qnum; @@ -796,7 +814,8 @@ static int get_sge_desc(int argc, char *argv[], int start_arg, } #endif -static int get_tcb2(int argc, char *argv[], int start_arg, const char *iff_name) +static int +get_tcb2(int argc, char *argv[], int start_arg, const char *iff_name) { uint64_t *d; unsigned int i; @@ -835,8 +854,9 @@ static int get_tcb2(int argc, char *argv[], int start_arg, const char *iff_name) return 0; } -static int get_pm_page_spec(const char *s, unsigned int *page_size, - unsigned int *num_pages) +static int +get_pm_page_spec(const char *s, unsigned int *page_size, + unsigned int *num_pages) { char *p; unsigned long val; @@ -854,7 +874,8 @@ static int get_pm_page_spec(const char *s, unsigned int *page_size, return *p; } -static int conf_pm(int argc, char *argv[], int start_arg, const char *iff_name) +static int +conf_pm(int argc, char *argv[], int start_arg, const char *iff_name) { struct ch_pm pm; @@ -884,8 +905,8 @@ static int conf_pm(int argc, char *argv[], int start_arg, const char *iff_name) } #ifdef CHELSIO_INTERNAL -static int dump_tcam(int argc, char *argv[], int start_arg, - const char *iff_name) +static int +dump_tcam(int argc, char *argv[], int start_arg, const char *iff_name) { unsigned int nwords; struct ch_tcam_word op; @@ -907,7 +928,8 @@ static int dump_tcam(int argc, char *argv[], int start_arg, return 0; } -static void hexdump_8b(unsigned int start, uint64_t *data, unsigned int len) +static void +hexdump_8b(unsigned int start, uint64_t *data, unsigned int len) { int i; @@ -920,8 +942,8 @@ static void hexdump_8b(unsigned int start, uint64_t *data, unsigned int len) } } -static int dump_mc7(int argc, char *argv[], int start_arg, - const char *iff_name) +static int +dump_mc7(int argc, char *argv[], int start_arg, const char *iff_name) { struct ch_mem_range mem; unsigned int mem_id, addr, len; @@ -959,10 +981,11 @@ static int dump_mc7(int argc, char *argv[], int start_arg, } #endif -/* Max FW size is 32K including version, +4 bytes for the checksum. */ +/* Max FW size is 64K including version, +4 bytes for the checksum. */ #define MAX_FW_IMAGE_SIZE (64 * 1024) -static int load_fw(int argc, char *argv[], int start_arg, const char *iff_name) +static int +load_fw(int argc, char *argv[], int start_arg, const char *iff_name) { int fd, len; struct ch_mem_range op; @@ -979,12 +1002,13 @@ static int load_fw(int argc, char *argv[], int start_arg, const char *iff_name) if (!op.buf) err(1, "load firmware"); - op.len = read(fd, op.buf, MAX_FW_IMAGE_SIZE + 1); - if (op.len < 0) + len = read(fd, op.buf, MAX_FW_IMAGE_SIZE + 1); + if (len < 0) err(1, "load firmware"); - if (op.len > MAX_FW_IMAGE_SIZE) + if (len > MAX_FW_IMAGE_SIZE) errx(1, "FW image too large"); + op.len = len; if (doit(iff_name, CHELSIO_LOAD_FW, &op) < 0) err(1, "load firmware"); return 0; @@ -993,8 +1017,8 @@ static int load_fw(int argc, char *argv[], int start_arg, const char *iff_name) /* Max BOOT size is 255*512 bytes including the BIOS boot ROM basic header */ #define MAX_BOOT_IMAGE_SIZE (0xff * 512) -static int load_boot(int argc, char *argv[], - int start_arg, const char *iff_name) +static int +load_boot(int argc, char *argv[], int start_arg, const char *iff_name) { int fd, len; struct ch_mem_range op; @@ -1024,7 +1048,8 @@ static int load_boot(int argc, char *argv[], return 0; } -static int dump_proto_sram(const char *iff_name) +static int +dump_proto_sram(const char *iff_name) { int i, j; uint8_t buf[PROTO_SRAM_SIZE]; @@ -1054,15 +1079,20 @@ static int dump_proto_sram(const char *iff_name) return 0; } -static int proto_sram_op(int argc, char *argv[], int start_arg, +static int +proto_sram_op(int argc, char *argv[], int start_arg, const char *iff_name) { + (void) argv; + (void) start_arg; + if (argc == start_arg) return dump_proto_sram(iff_name); return -1; } -static int dump_qset_params(const char *iff_name) +static int +dump_qset_params(const char *iff_name) { struct ch_qset_params qp; @@ -1084,10 +1114,10 @@ static int dump_qset_params(const char *iff_name) return 0; } -static int qset_config(int argc, char *argv[], int start_arg, - const char *iff_name) +static int +qset_config(int argc, char *argv[], int start_arg, const char *iff_name) { - struct ch_qset_params qp; + (void) argv; if (argc == start_arg) return dump_qset_params(iff_name); @@ -1095,11 +1125,13 @@ static int qset_config(int argc, char *argv[], int start_arg, return -1; } -static int qset_num_config(int argc, char *argv[], int start_arg, - const char *iff_name) +static int +qset_num_config(int argc, char *argv[], int start_arg, const char *iff_name) { struct ch_reg reg; + (void) argv; + if (argc == start_arg) { if (doit(iff_name, CHELSIO_GET_QSET_NUM, ®) < 0) err(1, "get qsets"); @@ -1113,7 +1145,8 @@ static int qset_num_config(int argc, char *argv[], int start_arg, /* * Parse a string containing an IP address with an optional network prefix. */ -static int parse_ipaddr(const char *s, uint32_t *addr, uint32_t *mask) +static int +parse_ipaddr(const char *s, uint32_t *addr, uint32_t *mask) { char *p, *slash; struct in_addr ia; @@ -1143,7 +1176,8 @@ static int parse_ipaddr(const char *s, uint32_t *addr, uint32_t *mask) /* * Parse a string containing a value and an optional colon separated mask. */ -static int parse_val_mask_param(const char *s, uint32_t *val, uint32_t *mask) +static int +parse_val_mask_param(const char *s, uint32_t *val, uint32_t *mask) { char *p; @@ -1156,14 +1190,15 @@ static int parse_val_mask_param(const char *s, uint32_t *val, uint32_t *mask) return *p ? -1 : 0; } -static int parse_trace_param(const char *s, uint32_t *val, uint32_t *mask) +static int +parse_trace_param(const char *s, uint32_t *val, uint32_t *mask) { return strchr(s, '.') ? parse_ipaddr(s, val, mask) : parse_val_mask_param(s, val, mask); } -static int trace_config(int argc, char *argv[], int start_arg, - const char *iff_name) +static int +trace_config(int argc, char *argv[], int start_arg, const char *iff_name) { uint32_t val, mask; struct ch_trace trace; @@ -1238,7 +1273,8 @@ static int trace_config(int argc, char *argv[], int start_arg, return 0; } -static int get_sched_param(int argc, char *argv[], int pos, unsigned int *valp) +static int +get_sched_param(int argc, char *argv[], int pos, unsigned int *valp) { if (pos + 1 >= argc) errx(1, "missing value for %s", argv[pos]); @@ -1247,7 +1283,8 @@ static int get_sched_param(int argc, char *argv[], int pos, unsigned int *valp) return 0; } -static int tx_sched(int argc, char *argv[], int start_arg, const char *iff_name) +static int +tx_sched(int argc, char *argv[], int start_arg, const char *iff_name) { struct ch_hw_sched op; unsigned int idx, val; @@ -1293,7 +1330,8 @@ static int tx_sched(int argc, char *argv[], int start_arg, const char *iff_name) return 0; } -static int pktsched(int argc, char *argv[], int start_arg, const char *iff_name) +static int +pktsched(int argc, char *argv[], int start_arg, const char *iff_name) { struct ch_pktsched_params op; unsigned int idx, min = -1, max, binding = -1; @@ -1333,20 +1371,29 @@ static int pktsched(int argc, char *argv[], int start_arg, const char *iff_name) return 0; } -static int clear_stats(int argc, char *argv[], int start_arg, - const char *iff_name) +static int +clear_stats(int argc, char *argv[], int start_arg, const char *iff_name) { + (void) argc; + (void) argv; + (void) start_arg; + if (doit(iff_name, CHELSIO_CLEAR_STATS, NULL) < 0) err(1, "clearstats"); return 0; } -static int get_up_la(int argc, char *argv[], int start_arg, const char *iff_name) +static int +get_up_la(int argc, char *argv[], int start_arg, const char *iff_name) { struct ch_up_la la; int i, idx, max_idx, entries; + (void) argc; + (void) argv; + (void) start_arg; + la.stopped = 0; la.idx = -1; la.bufsize = LA_BUFSIZE; @@ -1372,11 +1419,16 @@ static int get_up_la(int argc, char *argv[], int start_arg, const char *iff_name return 0; } -static int get_up_ioqs(int argc, char *argv[], int start_arg, const char *iff_name) +static int +get_up_ioqs(int argc, char *argv[], int start_arg, const char *iff_name) { struct ch_up_ioqs ioqs; int i, entries; + (void) argc; + (void) argv; + (void) start_arg; + bzero(&ioqs, sizeof(ioqs)); ioqs.bufsize = IOQS_BUFSIZE; ioqs.data = malloc(IOQS_BUFSIZE); @@ -1465,10 +1517,12 @@ run_cmd(int argc, char *argv[], const char *iff_name) static int run_cmd_loop(int argc, char *argv[], const char *iff_name) { - int n, i; + int n; + unsigned int i; char buf[64]; char *args[8], *s; + (void) argc; args[0] = argv[0]; args[1] = argv[1]; @@ -1481,11 +1535,8 @@ run_cmd_loop(int argc, char *argv[], const char *iff_name) for (;;) { fprintf(stdout, "> "); fflush(stdout); - n = read(STDIN_FILENO, buf, sizeof(buf)); - if (n > sizeof(buf) - 1) { - fprintf(stdout, "too much input.\n"); - return (0); - } else if (n <= 0) + n = read(STDIN_FILENO, buf, sizeof(buf) - 1); + if (n <= 0) return (0); if (buf[--n] != '\n') diff --git a/usr.sbin/cxgbtool/reg_defs.c b/usr.sbin/cxgbtool/reg_defs.c index 734061f..687bb75 100644 --- a/usr.sbin/cxgbtool/reg_defs.c +++ b/usr.sbin/cxgbtool/reg_defs.c @@ -106,7 +106,7 @@ struct reg_info sge_regs[] = { { "Packet_Too_Big", 3, 1 }, { "Packet_Mismatch", 4, 1 }, { "SG_RESPACCUTIMER", 0xc0, 0 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info mc3_regs[] = { @@ -196,7 +196,7 @@ struct reg_info mc3_regs[] = { { "MC3_Uncorr_Err", 1, 1 }, { "MC3_Parity_Err", 2, 8 }, { "MC3_Addr_Err", 10, 1 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info mc4_regs[] = { @@ -276,7 +276,7 @@ struct reg_info mc4_regs[] = { { "MC4_Corr_Err", 0, 1 }, { "MC4_Uncorr_Err", 1, 1 }, { "MC4_Addr_Err", 2, 1 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info tpi_regs[] = { @@ -290,7 +290,7 @@ struct reg_info tpi_regs[] = { { "INT_DIR", 31, 1 }, { "TPI_PAR", 0x29c, 0 }, { "TPIPAR", 0, 7 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info tp_regs[] = { @@ -509,7 +509,7 @@ struct reg_info tp_regs[] = { { "DROP_TICKS_CNT", 4, 26 }, { "NUM_PKTS_DROPPED", 0, 4 }, { "TP_TX_DROP_COUNT", 0x4bc, 0 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info rat_regs[] = { @@ -532,7 +532,7 @@ struct reg_info rat_regs[] = { { "CspiFramingError", 1, 1 }, { "SgeFramingError", 2, 1 }, { "TpFramingError", 3, 1 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info cspi_regs[] = { @@ -560,7 +560,7 @@ struct reg_info cspi_regs[] = { { "TXDrop", 2, 1 }, { "RXOverflow", 3, 1 }, { "RAMParityErr", 4, 1 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info espi_regs[] = { @@ -660,7 +660,7 @@ struct reg_info espi_regs[] = { { "Error_Ack", 9, 1 }, { "Unmapped_Err", 10, 1 }, { "Transaction_Timer", 16, 8 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info ulp_regs[] = { @@ -682,7 +682,7 @@ struct reg_info ulp_regs[] = { { "Pm_E2C_Wrt_Full", 24, 1 }, { "Pm_C2E_Wrt_Full", 25, 1 }, { "ULP_PIO_CTRL", 0x998, 0 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info pl_regs[] = { @@ -712,7 +712,7 @@ struct reg_info pl_regs[] = { { "PL_Intr_CSPI", 9, 1 }, { "PL_Intr_PCIX", 10, 1 }, { "PL_Intr_EXT", 11, 1 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info mc5_regs[] = { @@ -833,5 +833,5 @@ struct reg_info mc5_regs[] = { { "MC5_DATA_WRITE_CMD", 0xcf4, 0 }, { "MC5_DATA_READ_CMD", 0xcf8, 0 }, { "MC5_MASK_WRITE_CMD", 0xcfc, 0 }, - { NULL } + { NULL, 0, 0 } }; diff --git a/usr.sbin/cxgbtool/reg_defs_t3.c b/usr.sbin/cxgbtool/reg_defs_t3.c index ffa4aef..6e9b8b1 100644 --- a/usr.sbin/cxgbtool/reg_defs_t3.c +++ b/usr.sbin/cxgbtool/reg_defs_t3.c @@ -140,7 +140,7 @@ struct reg_info sge3_regs[] = { { "DrbPriThrsh", 0, 16 }, { "SG_DEBUG_INDEX", 0x78, 0 }, { "SG_DEBUG_DATA", 0x7c, 0 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info pcix1_regs[] = { @@ -212,7 +212,7 @@ struct reg_info pcix1_regs[] = { { "WakeUp0", 2, 1 }, { "SleepMode1", 1, 1 }, { "SleepMode0", 0, 1 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info pcie0_regs[] = { @@ -411,7 +411,7 @@ struct reg_info pcie0_regs[] = { { "BISTDone", 24, 8 }, { "BISTCycleThresh", 3, 16 }, { "BISTMode", 0, 3 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3dbg_regs[] = { @@ -557,7 +557,7 @@ struct reg_info t3dbg_regs[] = { { "PMON_CDEL_MANUAL", 4, 1 }, { "PMON_MANUAL", 1, 1 }, { "PMON_AUTO", 0, 1 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info mc7_pmrx_regs[] = { @@ -674,7 +674,7 @@ struct reg_info mc7_pmrx_regs[] = { { "PE", 2, 15 }, { "UE", 1, 1 }, { "CE", 0, 1 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info mc7_pmtx_regs[] = { @@ -791,7 +791,7 @@ struct reg_info mc7_pmtx_regs[] = { { "PE", 2, 15 }, { "UE", 1, 1 }, { "CE", 0, 1 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info mc7_cm_regs[] = { @@ -908,7 +908,7 @@ struct reg_info mc7_cm_regs[] = { { "PE", 2, 15 }, { "UE", 1, 1 }, { "CE", 0, 1 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info cim_regs[] = { @@ -1024,7 +1024,7 @@ struct reg_info cim_regs[] = { { "CIM_CDEBUGDATA", 0x2d0, 0 }, { "CDebugDataH", 16, 16 }, { "CDebugDataL", 0, 16 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info tp1_regs[] = { @@ -1384,7 +1384,7 @@ struct reg_info tp1_regs[] = { { "TP_EMBED_OP_FIELD3", 0x4f4, 0 }, { "TP_EMBED_OP_FIELD4", 0x4f8, 0 }, { "TP_EMBED_OP_FIELD5", 0x4fc, 0 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info ulp2_rx_regs[] = { @@ -1428,7 +1428,7 @@ struct reg_info ulp2_rx_regs[] = { { "ULPRX_RQ_ULIMIT", 0x538, 0 }, { "ULPRX_PBL_LLIMIT", 0x53c, 0 }, { "ULPRX_PBL_ULIMIT", 0x540, 0 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info ulp2_tx_regs[] = { @@ -1456,7 +1456,7 @@ struct reg_info ulp2_tx_regs[] = { { "ULPTX_DMA_WEIGHT", 0x5ac, 0 }, { "D1_WEIGHT", 16, 16 }, { "D0_WEIGHT", 0, 16 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info pm1_rx_regs[] = { @@ -1500,7 +1500,7 @@ struct reg_info pm1_rx_regs[] = { { "ocspi1_ofifo2x_Tx_framing_error", 6, 1 }, { "iespi_par_error", 3, 3 }, { "ocspi_par_error", 0, 3 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info pm1_tx_regs[] = { @@ -1544,7 +1544,7 @@ struct reg_info pm1_tx_regs[] = { { "oespi1_ofifo2x_Tx_framing_error", 6, 1 }, { "icspi_par_error", 3, 3 }, { "oespi_par_error", 0, 3 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info mps0_regs[] = { @@ -1585,7 +1585,7 @@ struct reg_info mps0_regs[] = { { "RXTpParErr", 4, 2 }, { "TX1TpParErr", 2, 2 }, { "TX0TpParErr", 0, 2 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info cpl_switch_regs[] = { @@ -1616,7 +1616,7 @@ struct reg_info cpl_switch_regs[] = { { "cpl_map_tbl_idx", 0, 8 }, { "CPL_MAP_TBL_DATA", 0x65c, 0 }, { "cpl_map_tbl_data", 0, 8 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info smb0_regs[] = { @@ -1682,7 +1682,7 @@ struct reg_info smb0_regs[] = { { "DebugDataL", 0, 16 }, { "SMB_DEBUG_LA", 0x69c, 0 }, { "DebugLAReqAddr", 0, 10 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info i2cm0_regs[] = { @@ -1695,7 +1695,7 @@ struct reg_info i2cm0_regs[] = { { "Ack", 30, 1 }, { "Cont", 1, 1 }, { "Op", 0, 1 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info mi1_regs[] = { @@ -1714,7 +1714,7 @@ struct reg_info mi1_regs[] = { { "Busy", 31, 1 }, { "Inc", 2, 1 }, { "Op", 0, 2 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info jm1_regs[] = { @@ -1727,7 +1727,7 @@ struct reg_info jm1_regs[] = { { "JM_OP", 0x6cc, 0 }, { "Busy", 31, 1 }, { "Cnt", 0, 5 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info sf1_regs[] = { @@ -1737,7 +1737,7 @@ struct reg_info sf1_regs[] = { { "Cont", 3, 1 }, { "ByteCnt", 1, 2 }, { "Op", 0, 1 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info pl3_regs[] = { @@ -1839,7 +1839,7 @@ struct reg_info pl3_regs[] = { { "PL_REV", 0x6f4, 0 }, { "Rev", 0, 4 }, { "PL_CLI", 0x6f8, 0 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info mc5a_regs[] = { @@ -2010,7 +2010,7 @@ struct reg_info mc5a_regs[] = { { "ReadCmd", 0, 20 }, { "MC5_DB_MASK_WRITE_CMD", 0x7fc, 0 }, { "MaskWr", 0, 16 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info xgmac0_0_regs[] = { @@ -2341,7 +2341,7 @@ struct reg_info xgmac0_0_regs[] = { { "XGM_RX_SPI4_SOP_EOP_CNT", 0x9ac, 0 }, { "RxSPI4SopCnt", 16, 16 }, { "RxSPI4EopCnt", 0, 16 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info xgmac0_1_regs[] = { @@ -2672,5 +2672,5 @@ struct reg_info xgmac0_1_regs[] = { { "XGM_RX_SPI4_SOP_EOP_CNT", 0xbac, 0 }, { "RxSPI4SopCnt", 16, 16 }, { "RxSPI4EopCnt", 0, 16 }, - { NULL } + { NULL, 0, 0 } }; diff --git a/usr.sbin/cxgbtool/reg_defs_t3b.c b/usr.sbin/cxgbtool/reg_defs_t3b.c index 539742c..cd85d84 100644 --- a/usr.sbin/cxgbtool/reg_defs_t3b.c +++ b/usr.sbin/cxgbtool/reg_defs_t3b.c @@ -150,7 +150,7 @@ struct reg_info t3b_sge3_regs[] = { { "DrbPriThrsh", 0, 16 }, { "SG_DEBUG_INDEX", 0x78, 0 }, { "SG_DEBUG_DATA", 0x7c, 0 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3b_pcix1_regs[] = { @@ -222,7 +222,7 @@ struct reg_info t3b_pcix1_regs[] = { { "WakeUp0", 2, 1 }, { "SleepMode1", 1, 1 }, { "SleepMode0", 0, 1 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3b_pcie0_regs[] = { @@ -376,7 +376,7 @@ struct reg_info t3b_pcie0_regs[] = { { "BeaconDetect", 2, 1 }, { "RxDetect", 1, 1 }, { "TxIdleDetect", 0, 1 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3b_t3dbg_regs[] = { @@ -557,7 +557,7 @@ struct reg_info t3b_t3dbg_regs[] = { { "BSEnLane1", 4, 1 }, { "BSInSelLane0", 1, 2 }, { "BSEnLane0", 0, 1 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3b_mc7_pmrx_regs[] = { @@ -678,7 +678,7 @@ struct reg_info t3b_mc7_pmrx_regs[] = { { "PE", 2, 15 }, { "UE", 1, 1 }, { "CE", 0, 1 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3b_mc7_pmtx_regs[] = { @@ -799,7 +799,7 @@ struct reg_info t3b_mc7_pmtx_regs[] = { { "PE", 2, 15 }, { "UE", 1, 1 }, { "CE", 0, 1 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3b_mc7_cm_regs[] = { @@ -920,7 +920,7 @@ struct reg_info t3b_mc7_cm_regs[] = { { "PE", 2, 15 }, { "UE", 1, 1 }, { "CE", 0, 1 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3b_cim_regs[] = { @@ -1047,7 +1047,7 @@ struct reg_info t3b_cim_regs[] = { { "PILADbgWrPtr", 0, 9 }, { "CIM_PO_LA_DEBUGDATA", 0x2e8, 0 }, { "CIM_PI_LA_DEBUGDATA", 0x2ec, 0 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3b_tp1_regs[] = { @@ -1453,7 +1453,7 @@ struct reg_info t3b_tp1_regs[] = { { "TP_EMBED_OP_FIELD3", 0x4f4, 0 }, { "TP_EMBED_OP_FIELD4", 0x4f8, 0 }, { "TP_EMBED_OP_FIELD5", 0x4fc, 0 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3b_ulp2_rx_regs[] = { @@ -1497,7 +1497,7 @@ struct reg_info t3b_ulp2_rx_regs[] = { { "ULPRX_RQ_ULIMIT", 0x538, 0 }, { "ULPRX_PBL_LLIMIT", 0x53c, 0 }, { "ULPRX_PBL_ULIMIT", 0x540, 0 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3b_ulp2_tx_regs[] = { @@ -1525,7 +1525,7 @@ struct reg_info t3b_ulp2_tx_regs[] = { { "ULPTX_DMA_WEIGHT", 0x5ac, 0 }, { "D1_WEIGHT", 16, 16 }, { "D0_WEIGHT", 0, 16 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3b_pm1_rx_regs[] = { @@ -1569,7 +1569,7 @@ struct reg_info t3b_pm1_rx_regs[] = { { "ocspi1_ofifo2x_Tx_framing_error", 6, 1 }, { "iespi_par_error", 3, 3 }, { "ocspi_par_error", 0, 3 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3b_pm1_tx_regs[] = { @@ -1613,7 +1613,7 @@ struct reg_info t3b_pm1_tx_regs[] = { { "oespi1_ofifo2x_Tx_framing_error", 6, 1 }, { "icspi_par_error", 3, 3 }, { "oespi_par_error", 0, 3 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3b_mps0_regs[] = { @@ -1655,7 +1655,7 @@ struct reg_info t3b_mps0_regs[] = { { "RXTpParErr", 4, 2 }, { "TX1TpParErr", 2, 2 }, { "TX0TpParErr", 0, 2 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3b_cpl_switch_regs[] = { @@ -1686,7 +1686,7 @@ struct reg_info t3b_cpl_switch_regs[] = { { "cpl_map_tbl_idx", 0, 8 }, { "CPL_MAP_TBL_DATA", 0x65c, 0 }, { "cpl_map_tbl_data", 0, 8 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3b_smb0_regs[] = { @@ -1752,7 +1752,7 @@ struct reg_info t3b_smb0_regs[] = { { "DebugDataL", 0, 16 }, { "SMB_DEBUG_LA", 0x69c, 0 }, { "DebugLAReqAddr", 0, 10 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3b_i2cm0_regs[] = { @@ -1765,7 +1765,7 @@ struct reg_info t3b_i2cm0_regs[] = { { "Ack", 30, 1 }, { "Cont", 1, 1 }, { "Op", 0, 1 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3b_mi1_regs[] = { @@ -1784,7 +1784,7 @@ struct reg_info t3b_mi1_regs[] = { { "Busy", 31, 1 }, { "Inc", 2, 1 }, { "Op", 0, 2 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3b_jm1_regs[] = { @@ -1797,7 +1797,7 @@ struct reg_info t3b_jm1_regs[] = { { "JM_OP", 0x6cc, 0 }, { "Busy", 31, 1 }, { "Cnt", 0, 5 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3b_sf1_regs[] = { @@ -1807,7 +1807,7 @@ struct reg_info t3b_sf1_regs[] = { { "Cont", 3, 1 }, { "ByteCnt", 1, 2 }, { "Op", 0, 1 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3b_pl3_regs[] = { @@ -1917,7 +1917,7 @@ struct reg_info t3b_pl3_regs[] = { { "PL_CLI", 0x6f8, 0 }, { "PL_LCK", 0x6fc, 0 }, { "Lck", 0, 2 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3b_mc5a_regs[] = { @@ -2100,7 +2100,7 @@ struct reg_info t3b_mc5a_regs[] = { { "ReadCmd", 0, 20 }, { "MC5_DB_MASK_WRITE_CMD", 0x7fc, 0 }, { "MaskWr", 0, 16 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3b_xgmac0_0_regs[] = { @@ -2464,7 +2464,7 @@ struct reg_info t3b_xgmac0_0_regs[] = { { "XGM_RX_SPI4_SOP_EOP_CNT", 0x9ac, 0 }, { "RxSPI4SopCnt", 16, 16 }, { "RxSPI4EopCnt", 0, 16 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3b_xgmac0_1_regs[] = { @@ -2828,5 +2828,5 @@ struct reg_info t3b_xgmac0_1_regs[] = { { "XGM_RX_SPI4_SOP_EOP_CNT", 0xbac, 0 }, { "RxSPI4SopCnt", 16, 16 }, { "RxSPI4EopCnt", 0, 16 }, - { NULL } + { NULL, 0, 0 } }; diff --git a/usr.sbin/cxgbtool/reg_defs_t3c.c b/usr.sbin/cxgbtool/reg_defs_t3c.c index 6127fa4..b9181b6 100644 --- a/usr.sbin/cxgbtool/reg_defs_t3c.c +++ b/usr.sbin/cxgbtool/reg_defs_t3c.c @@ -177,7 +177,7 @@ struct reg_info t3c_sge3_regs[] = { { "DrbPriThrsh", 0, 16 }, { "SG_DEBUG_INDEX", 0x78, 0 }, { "SG_DEBUG_DATA", 0x7c, 0 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3c_pcix1_regs[] = { @@ -282,7 +282,7 @@ struct reg_info t3c_pcix1_regs[] = { { "IntSt", 4, 3 }, { "PIOSt", 2, 2 }, { "RFReqRdSt", 0, 2 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3c_pcie0_regs[] = { @@ -475,7 +475,7 @@ struct reg_info t3c_pcie0_regs[] = { { "P_WMark", 18, 11 }, { "NP_WMark", 11, 7 }, { "CPL_WMark", 0, 11 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3c_t3dbg_regs[] = { @@ -656,7 +656,7 @@ struct reg_info t3c_t3dbg_regs[] = { { "BSEnLane1", 4, 1 }, { "BSInSelLane0", 1, 2 }, { "BSEnLane0", 0, 1 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3c_mc7_pmrx_regs[] = { @@ -777,7 +777,7 @@ struct reg_info t3c_mc7_pmrx_regs[] = { { "PE", 2, 15 }, { "UE", 1, 1 }, { "CE", 0, 1 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3c_mc7_pmtx_regs[] = { @@ -898,7 +898,7 @@ struct reg_info t3c_mc7_pmtx_regs[] = { { "PE", 2, 15 }, { "UE", 1, 1 }, { "CE", 0, 1 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3c_mc7_cm_regs[] = { @@ -1019,7 +1019,7 @@ struct reg_info t3c_mc7_cm_regs[] = { { "PE", 2, 15 }, { "UE", 1, 1 }, { "CE", 0, 1 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3c_cim_regs[] = { @@ -1194,7 +1194,7 @@ struct reg_info t3c_cim_regs[] = { { "PILADbgWrPtr", 0, 9 }, { "CIM_PO_LA_DEBUGDATA", 0x2e8, 0 }, { "CIM_PI_LA_DEBUGDATA", 0x2ec, 0 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3c_tp1_regs[] = { @@ -1667,7 +1667,7 @@ struct reg_info t3c_tp1_regs[] = { { "TP_EMBED_OP_FIELD3", 0x4f4, 0 }, { "TP_EMBED_OP_FIELD4", 0x4f8, 0 }, { "TP_EMBED_OP_FIELD5", 0x4fc, 0 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3c_ulp2_rx_regs[] = { @@ -1725,7 +1725,7 @@ struct reg_info t3c_ulp2_rx_regs[] = { { "ULPRX_RQ_ULIMIT", 0x538, 0 }, { "ULPRX_PBL_LLIMIT", 0x53c, 0 }, { "ULPRX_PBL_ULIMIT", 0x540, 0 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3c_ulp2_tx_regs[] = { @@ -1766,7 +1766,7 @@ struct reg_info t3c_ulp2_tx_regs[] = { { "ULPTX_DMA_WEIGHT", 0x5ac, 0 }, { "D1_WEIGHT", 16, 16 }, { "D0_WEIGHT", 0, 16 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3c_pm1_rx_regs[] = { @@ -1810,7 +1810,7 @@ struct reg_info t3c_pm1_rx_regs[] = { { "ocspi1_ofifo2x_Tx_framing_error", 6, 1 }, { "iespi_par_error", 3, 3 }, { "ocspi_par_error", 0, 3 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3c_pm1_tx_regs[] = { @@ -1854,7 +1854,7 @@ struct reg_info t3c_pm1_tx_regs[] = { { "oespi1_ofifo2x_Tx_framing_error", 6, 1 }, { "icspi_par_error", 3, 3 }, { "oespi_par_error", 0, 3 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3c_mps0_regs[] = { @@ -1896,7 +1896,7 @@ struct reg_info t3c_mps0_regs[] = { { "RXTpParErr", 4, 2 }, { "TX1TpParErr", 2, 2 }, { "TX0TpParErr", 0, 2 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3c_cpl_switch_regs[] = { @@ -1930,7 +1930,7 @@ struct reg_info t3c_cpl_switch_regs[] = { { "cpl_map_tbl_idx", 0, 8 }, { "CPL_MAP_TBL_DATA", 0x65c, 0 }, { "cpl_map_tbl_data", 0, 8 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3c_smb0_regs[] = { @@ -1996,7 +1996,7 @@ struct reg_info t3c_smb0_regs[] = { { "DebugDataL", 0, 16 }, { "SMB_DEBUG_LA", 0x69c, 0 }, { "DebugLAReqAddr", 0, 10 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3c_i2cm0_regs[] = { @@ -2009,7 +2009,7 @@ struct reg_info t3c_i2cm0_regs[] = { { "Ack", 30, 1 }, { "Cont", 1, 1 }, { "Op", 0, 1 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3c_mi1_regs[] = { @@ -2028,7 +2028,7 @@ struct reg_info t3c_mi1_regs[] = { { "Busy", 31, 1 }, { "Inc", 2, 1 }, { "Op", 0, 2 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3c_jm1_regs[] = { @@ -2041,7 +2041,7 @@ struct reg_info t3c_jm1_regs[] = { { "JM_OP", 0x6cc, 0 }, { "Busy", 31, 1 }, { "Cnt", 0, 5 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3c_sf1_regs[] = { @@ -2051,7 +2051,7 @@ struct reg_info t3c_sf1_regs[] = { { "Cont", 3, 1 }, { "ByteCnt", 1, 2 }, { "Op", 0, 1 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3c_pl3_regs[] = { @@ -2162,7 +2162,7 @@ struct reg_info t3c_pl3_regs[] = { { "PL_CLI", 0x6f8, 0 }, { "PL_LCK", 0x6fc, 0 }, { "Lck", 0, 2 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3c_mc5a_regs[] = { @@ -2346,7 +2346,7 @@ struct reg_info t3c_mc5a_regs[] = { { "ReadCmd", 0, 20 }, { "MC5_DB_MASK_WRITE_CMD", 0x7fc, 0 }, { "MaskWr", 0, 16 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3c_xgmac0_0_regs[] = { @@ -2730,7 +2730,7 @@ struct reg_info t3c_xgmac0_0_regs[] = { { "XGM_RX_SPI4_SOP_EOP_CNT", 0x9ac, 0 }, { "RxSPI4SopCnt", 16, 16 }, { "RxSPI4EopCnt", 0, 16 }, - { NULL } + { NULL, 0, 0 } }; struct reg_info t3c_xgmac0_1_regs[] = { @@ -3114,6 +3114,6 @@ struct reg_info t3c_xgmac0_1_regs[] = { { "XGM_RX_SPI4_SOP_EOP_CNT", 0xbac, 0 }, { "RxSPI4SopCnt", 16, 16 }, { "RxSPI4EopCnt", 0, 16 }, - { NULL } + { NULL, 0, 0 } }; -- cgit v1.1 From 90c13151289bd7d7e6cdf2458565706430098a13 Mon Sep 17 00:00:00 2001 From: trasz Date: Tue, 6 Apr 2010 10:34:15 +0000 Subject: MFC r201211: Remove pppd and SLIP-related stuff. --- usr.sbin/crunch/examples/really-big.conf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/crunch/examples/really-big.conf b/usr.sbin/crunch/examples/really-big.conf index 45a10de..bc692c6 100644 --- a/usr.sbin/crunch/examples/really-big.conf +++ b/usr.sbin/crunch/examples/really-big.conf @@ -27,7 +27,7 @@ progs badsect bim clri disklabel dmesg dump dumpfs fdisk fsck halt progs ifconfig init mknod modload modunload mount mount_isofs progs mount_lofs mount_msdosfs mount_portalfs mountd progs newfs nfsd nfsiod ping quotacheck reboot restore route routed savecore -progs shutdown slattach swapon ttyflags tunefs umount +progs shutdown swapon ttyflags tunefs umount # shell scripts: fastboot ln dump rdump @@ -71,7 +71,7 @@ progs ac accton amd arp bad144 catman chown chroot config config.new cron progs dev_mkdb diskpart edquota flcopy gettable grfinfo hilinfo htable inetd progs iostat iteconfig kvm_mkdb mtree named portmap pppd progs pstat pwd_mkdb quot quotaon rarpd rbootd repquota rmt rpc.bootparamd -progs rwhod sa sliplogin slstats spray sysctl syslogd tcpdump +progs rwhod sa spray sysctl syslogd tcpdump progs traceroute trpt trsp update vipw vnconfig ypbind yppoll ypset special amd srcdir /usr/src/usr.sbin/amd/amd -- cgit v1.1 From 439a7cb804223ba6784e9acb0b6e706774c9fe22 Mon Sep 17 00:00:00 2001 From: fabient Date: Mon, 12 Apr 2010 21:37:28 +0000 Subject: MFC r206090: Improve "top" header by: - Display sample received per PMCs (or merged PMCs). - Display percentage vs all samples --- usr.sbin/pmcstat/pmcpl_callgraph.c | 1 + usr.sbin/pmcstat/pmcpl_calltree.c | 9 ++++++ usr.sbin/pmcstat/pmcstat_log.c | 60 +++++++++++++++++++++++++++++--------- usr.sbin/pmcstat/pmcstat_log.h | 2 ++ 4 files changed, 59 insertions(+), 13 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/pmcstat/pmcpl_callgraph.c b/usr.sbin/pmcstat/pmcpl_callgraph.c index 84e2819..53d342b 100644 --- a/usr.sbin/pmcstat/pmcpl_callgraph.c +++ b/usr.sbin/pmcstat/pmcpl_callgraph.c @@ -341,6 +341,7 @@ pmcpl_cg_process(struct pmcstat_process *pp, struct pmcstat_pmcrecord *pmcr, parent = pmcstat_cgnode_hash_lookup_pc(pp, pmcid, pc, usermode); if (parent == NULL) { pmcstat_stats.ps_callchain_dubious_frames++; + pmcr->pr_dubious_frames++; return; } diff --git a/usr.sbin/pmcstat/pmcpl_calltree.c b/usr.sbin/pmcstat/pmcpl_calltree.c index aac7913..4f62c62 100644 --- a/usr.sbin/pmcstat/pmcpl_calltree.c +++ b/usr.sbin/pmcstat/pmcpl_calltree.c @@ -403,6 +403,10 @@ pmcpl_ct_node_dumptop(int pmcin, struct pmcpl_ct_node *ct, &pmcin, pmcpl_ct_arc_compare); for (i = 0; i < ct->pct_narc; i++) { + /* Skip this arc if there is no sample at all. */ + if (PMCPL_CT_SAMPLE(pmcin, + &ct->pct_arc[i].pcta_samples) == 0) + continue; if (PMCPL_CT_SAMPLEP(pmcin, &ct->pct_arc[i].pcta_samples) > pmcstat_threshold) { if (pmcpl_ct_node_dumptop(pmcin, @@ -516,6 +520,10 @@ pmcpl_ct_topdisplay(void) x = y = 0; for (i = 0; i < pmcpl_ct_root->pct_narc; i++) { + /* Skip this arc if there is no sample at all. */ + if (PMCPL_CT_SAMPLE(pmcin, + &pmcpl_ct_root->pct_arc[i].pcta_samples) == 0) + continue; if (pmcpl_ct_node_dumptop(pmcin, pmcpl_ct_root->pct_arc[i].pcta_child, &rsamples, x, &y, pmcstat_displayheight - 2)) { @@ -693,6 +701,7 @@ pmcpl_ct_process(struct pmcstat_process *pp, struct pmcstat_pmcrecord *pmcr, } if (n-- == 0) { pmcstat_stats.ps_callchain_dubious_frames++; + pmcr->pr_dubious_frames++; return; } diff --git a/usr.sbin/pmcstat/pmcstat_log.c b/usr.sbin/pmcstat/pmcstat_log.c index 9b524ff..51f66ca 100644 --- a/usr.sbin/pmcstat/pmcstat_log.c +++ b/usr.sbin/pmcstat/pmcstat_log.c @@ -247,6 +247,7 @@ static int pmcstat_string_compute_hash(const char *_string); static void pmcstat_string_initialize(void); static int pmcstat_string_lookup_hash(pmcstat_interned_string _is); static void pmcstat_string_shutdown(void); +static void pmcstat_stats_reset(void); /* * A simple implementation of interned strings. Each interned string @@ -274,6 +275,21 @@ int pmcstat_npmcs; */ int pmcstat_pause; +static void +pmcstat_stats_reset(void) +{ + struct pmcstat_pmcrecord *pr; + + /* Flush PMCs stats. */ + LIST_FOREACH(pr, &pmcstat_pmcs, pr_next) { + pr->pr_samples = 0; + pr->pr_dubious_frames = 0; + } + + /* Flush global stats. */ + bzero(&pmcstat_stats, sizeof(struct pmcstat_stats)); +} + /* * Compute a 'hash' value for a string. */ @@ -1009,6 +1025,8 @@ pmcstat_pmcid_add(pmc_id_t pmcid, pmcstat_interned_string ps) pr->pr_pmcid = pmcid; pr->pr_pmcname = ps; pr->pr_pmcin = pmcstat_npmcs++; + pr->pr_samples = 0; + pr->pr_dubious_frames = 0; pr->pr_merge = prm == NULL ? pr : prm; LIST_INSERT_HEAD(&pmcstat_pmcs, pr, pr_next); @@ -1387,6 +1405,7 @@ pmcstat_analyze_log(void) /* Get PMC record. */ pmcr = pmcstat_lookup_pmcid(ev.pl_u.pl_s.pl_pmcid); assert(pmcr != NULL); + pmcr->pr_samples++; /* * Call the plugins processing @@ -1420,6 +1439,7 @@ pmcstat_analyze_log(void) /* Get PMC record. */ pmcr = pmcstat_lookup_pmcid(ev.pl_u.pl_cc.pl_pmcid); assert(pmcr != NULL); + pmcr->pr_samples++; /* * Call the plugins processing @@ -1787,32 +1807,46 @@ pmcstat_process_log(void) static void pmcstat_refresh_top(void) { + int v_attrs; + float v; char pmcname[40]; - const char *s; + struct pmcstat_pmcrecord *pmcpr; /* If in pause mode do not refresh display. */ if (pmcstat_pause) return; /* Wait until PMC pop in the log. */ - s = pmcstat_pmcindex_to_name(pmcstat_pmcinfilter); - if (s == NULL) + pmcpr = pmcstat_pmcindex_to_pmcr(pmcstat_pmcinfilter); + if (pmcpr == NULL) return; /* Format PMC name. */ if (pmcstat_mergepmc) - snprintf(pmcname, sizeof(pmcname), "[%s]", s); + snprintf(pmcname, sizeof(pmcname), "[%s]", + pmcstat_string_unintern(pmcpr->pr_pmcname)); else snprintf(pmcname, sizeof(pmcname), "%s.%d", - s, pmcstat_pmcinfilter); + pmcstat_string_unintern(pmcpr->pr_pmcname), + pmcstat_pmcinfilter); + + /* Format samples count. */ + if (pmcstat_stats.ps_samples_total > 0) + v = (pmcpr->pr_samples * 100.0) / + pmcstat_stats.ps_samples_total; + else + v = 0.; + v_attrs = PMCSTAT_ATTRPERCENT(v); PMCSTAT_PRINTBEGIN(); - PMCSTAT_PRINTW("PMC: %s Samples: %u processed, %u invalid\n\n", + PMCSTAT_PRINTW("PMC: %s Samples: %u ", pmcname, - pmcstat_stats.ps_samples_total, - pmcstat_stats.ps_samples_unknown_offset + - pmcstat_stats.ps_samples_indeterminable + - pmcstat_stats.ps_callchain_dubious_frames); + pmcpr->pr_samples); + PMCSTAT_ATTRON(v_attrs); + PMCSTAT_PRINTW("(%.1f%%) ", v); + PMCSTAT_ATTROFF(v_attrs); + PMCSTAT_PRINTW(", %u unresolved\n\n", + pmcpr->pr_dubious_frames); if (plugins[args.pa_plugin].pl_topdisplay != NULL) plugins[args.pa_plugin].pl_topdisplay(); PMCSTAT_PRINTEND(); @@ -1879,7 +1913,7 @@ pmcstat_keypress_log(void) */ if (plugins[args.pa_plugin].pl_shutdown != NULL) plugins[args.pa_plugin].pl_shutdown(NULL); - bzero(&pmcstat_stats, sizeof(struct pmcstat_stats)); + pmcstat_stats_reset(); if (plugins[args.pa_plugin].pl_init != NULL) plugins[args.pa_plugin].pl_init(); @@ -1900,7 +1934,7 @@ pmcstat_keypress_log(void) } while (plugins[args.pa_plugin].pl_topdisplay == NULL); /* Open new plugin. */ - bzero(&pmcstat_stats, sizeof(struct pmcstat_stats)); + pmcstat_stats_reset(); if (plugins[args.pa_plugin].pl_init != NULL) plugins[args.pa_plugin].pl_init(); wprintw(w, "switching to plugin %s", @@ -1949,7 +1983,7 @@ pmcstat_display_log(void) if (args.pa_topmode == PMCSTAT_TOP_DELTA) { if (plugins[args.pa_plugin].pl_shutdown != NULL) plugins[args.pa_plugin].pl_shutdown(NULL); - bzero(&pmcstat_stats, sizeof(struct pmcstat_stats)); + pmcstat_stats_reset(); if (plugins[args.pa_plugin].pl_init != NULL) plugins[args.pa_plugin].pl_init(); } diff --git a/usr.sbin/pmcstat/pmcstat_log.h b/usr.sbin/pmcstat/pmcstat_log.h index de92649..8936fad 100644 --- a/usr.sbin/pmcstat/pmcstat_log.h +++ b/usr.sbin/pmcstat/pmcstat_log.h @@ -146,6 +146,8 @@ struct pmcstat_pmcrecord { pmc_id_t pr_pmcid; int pr_pmcin; pmcstat_interned_string pr_pmcname; + int pr_samples; + int pr_dubious_frames; struct pmcstat_pmcrecord *pr_merge; }; extern LIST_HEAD(pmcstat_pmcs, pmcstat_pmcrecord) pmcstat_pmcs; /* PMC list */ -- cgit v1.1 From ef6247d8377b73b56a5684aa98440127479c8717 Mon Sep 17 00:00:00 2001 From: imp Date: Tue, 13 Apr 2010 00:48:54 +0000 Subject: MFC r203710: When you have multiple addresses on the same network on different interfaces (such as when you are part of a carp pool), and you run rpcbind -h to restrict which interfaces have rpc services, rpcbind can none-the-less return addresses that aren't in the -h list. This patch enforces the rule that when you specify -h on the command line, then services returned from rpcbind must be to one of the addresses listed in -h, or be a loopback address (since localhost is implicit when running -h). The root cause of this is the assumption in addrmerge that there can be only one interface that matches a given network IP address. This turns out not to be the case. To retain historical behavior, I didn't try to fix the routine to prefer the address that the request came into, since I didn't know the side effects that might cause in the normal case. My quick analysis suggests that it wouldn't be a problem, but since this code is tricky I opted for the more conservative patch of only restricting the reply when -h is in effect. Hence, this change will have no effect when you are running rpcbind without -h. Reviewed by: alfred@ Sponsored by: iX Systems MFC after: 2 weeks --- usr.sbin/rpcbind/rpcbind.c | 76 ++++++++++++++++++++++++++++++++++++++++++++-- usr.sbin/rpcbind/rpcbind.h | 9 ++++++ usr.sbin/rpcbind/util.c | 20 ++++++------ 3 files changed, 93 insertions(+), 12 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/rpcbind/rpcbind.c b/usr.sbin/rpcbind/rpcbind.c index b601da5..5a76a68 100644 --- a/usr.sbin/rpcbind/rpcbind.c +++ b/usr.sbin/rpcbind/rpcbind.c @@ -92,6 +92,7 @@ int oldstyle_local = 0; int verboselog = 0; char **hosts = NULL; +struct sockaddr **bound_sa; int ipv6_only = 0; int nhosts = 0; int on = 1; @@ -119,6 +120,7 @@ static void rbllist_add(rpcprog_t, rpcvers_t, struct netconfig *, struct netbuf *); static void terminate(int); static void parseargs(int, char *[]); +static void update_bound_sa(void); int main(int argc, char *argv[]) @@ -130,6 +132,8 @@ main(int argc, char *argv[]) parseargs(argc, argv); + update_bound_sa(); + /* Check that another rpcbind isn't already running. */ if ((rpcbindlockfd = (open(RPCBINDDLOCK, O_RDONLY|O_CREAT, 0444))) == -1) @@ -323,8 +327,7 @@ init_transport(struct netconfig *nconf) * If no hosts were specified, just bind to INADDR_ANY. * Otherwise make sure 127.0.0.1 is added to the list. */ - nhostsbak = nhosts; - nhostsbak++; + nhostsbak = nhosts + 1; hosts = realloc(hosts, nhostsbak * sizeof(char *)); if (nhostsbak == 1) hosts[0] = "*"; @@ -657,6 +660,75 @@ error: return (1); } +/* + * Create the list of addresses that we're bound to. Normally, this + * list is empty because we're listening on the wildcard address + * (nhost == 0). If -h is specified on the command line, then + * bound_sa will have a list of the addresses that the program binds + * to specifically. This function takes that list and converts them to + * struct sockaddr * and stores them in bound_sa. + */ +static void +update_bound_sa(void) +{ + struct addrinfo hints, *res = NULL; + int i; + + if (nhosts == 0) + return; + bound_sa = malloc(sizeof(*bound_sa) * nhosts); + memset(&hints, 0, sizeof(hints)); + hints.ai_family = PF_UNSPEC; + for (i = 0; i < nhosts; i++) { + if (getaddrinfo(hosts[i], NULL, &hints, &res) != 0) + continue; + bound_sa[i] = malloc(res->ai_addrlen); + memcpy(bound_sa[i], res->ai_addr, res->ai_addrlen); + } +} + +/* + * Match the sa against the list of addresses we've bound to. If + * we've not specifically bound to anything, we match everything. + * Otherwise, if the IPv4 or IPv6 address matches one of the addresses + * in bound_sa, we return true. If not, we return false. + */ +int +listen_addr(const struct sockaddr *sa) +{ + int i; + + /* + * If nhosts == 0, then there were no -h options on the + * command line, so all addresses are addresses we're + * listening to. + */ + if (nhosts == 0) + return 1; + for (i = 0; i < nhosts; i++) { + if (bound_sa[i] == NULL || + sa->sa_family != bound_sa[i]->sa_family) + continue; + switch (sa->sa_family) { + case AF_INET: + if (memcmp(&SA2SINADDR(sa), &SA2SINADDR(bound_sa[i]), + sizeof(struct in_addr)) == 0) + return (1); + break; +#ifdef INET6 + case AF_INET6: + if (memcmp(&SA2SIN6ADDR(sa), &SA2SIN6ADDR(bound_sa[i]), + sizeof(struct in6_addr)) == 0) + return (1); + break; +#endif + default: + break; + } + } + return (0); +} + static void rbllist_add(rpcprog_t prog, rpcvers_t vers, struct netconfig *nconf, struct netbuf *addr) diff --git a/usr.sbin/rpcbind/rpcbind.h b/usr.sbin/rpcbind/rpcbind.h index 5537ce4..717bb3b 100644 --- a/usr.sbin/rpcbind/rpcbind.h +++ b/usr.sbin/rpcbind/rpcbind.h @@ -134,6 +134,7 @@ void read_warmstart(void); char *addrmerge(struct netbuf *caller, char *serv_uaddr, char *clnt_uaddr, char *netid); +int listen_addr(const struct sockaddr *sa); void network_init(void); struct sockaddr *local_sa(int); @@ -141,4 +142,12 @@ struct sockaddr *local_sa(int); #define RPCB_ALLVERS 0 #define RPCB_ONEVERS 1 +/* To convert a struct sockaddr to IPv4 or IPv6 address */ +#define SA2SIN(sa) ((struct sockaddr_in *)(sa)) +#define SA2SINADDR(sa) (SA2SIN(sa)->sin_addr) +#ifdef INET6 +#define SA2SIN6(sa) ((struct sockaddr_in6 *)(sa)) +#define SA2SIN6ADDR(sa) (SA2SIN6(sa)->sin6_addr) +#endif + #endif /* rpcbind_h */ diff --git a/usr.sbin/rpcbind/util.c b/usr.sbin/rpcbind/util.c index 66797a7..9cdfa70 100644 --- a/usr.sbin/rpcbind/util.c +++ b/usr.sbin/rpcbind/util.c @@ -58,13 +58,6 @@ #include "rpcbind.h" -#define SA2SIN(sa) ((struct sockaddr_in *)(sa)) -#define SA2SINADDR(sa) (SA2SIN(sa)->sin_addr) -#ifdef INET6 -#define SA2SIN6(sa) ((struct sockaddr_in6 *)(sa)) -#define SA2SIN6ADDR(sa) (SA2SIN6(sa)->sin6_addr) -#endif - static struct sockaddr_in *local_in4; #ifdef INET6 static struct sockaddr_in6 *local_in6; @@ -176,9 +169,13 @@ addrmerge(struct netbuf *caller, char *serv_uaddr, char *clnt_uaddr, goto freeit; /* - * Loop through all interfaces. For each interface, see if the - * network portion of its address is equal to that of the client. - * If so, we have found the interface that we want to use. + * Loop through all interfaces. For each interface, see if it + * is either the loopback interface (which we always listen + * on) or is one of the addresses the program bound to (the + * wildcard by default, or a subset if -h is specified) and + * the network portion of its address is equal to that of the + * client. If so, we have found the interface that we want to + * use. */ bestif = NULL; for (ifap = ifp; ifap != NULL; ifap = ifap->ifa_next) { @@ -189,6 +186,9 @@ addrmerge(struct netbuf *caller, char *serv_uaddr, char *clnt_uaddr, !(ifap->ifa_flags & IFF_UP)) continue; + if (!(ifap->ifa_flags & IFF_LOOPBACK) && !listen_addr(ifsa)) + continue; + switch (hint_sa->sa_family) { case AF_INET: /* -- cgit v1.1 From 3d28eb4488b3d038e08f7a076b1d2dfda3fc9966 Mon Sep 17 00:00:00 2001 From: emaste Date: Tue, 13 Apr 2010 18:46:18 +0000 Subject: MFC r205880 by ru: - Handle calloc() allocation failures. - Fixed a comment. - 2 -> EXIT_FAILURE in some places. - errx() -> err() where appropriate. PR: 144644 Submitted by: Garrett Cooper Also fix endinclude() prototype to avoid compiler warning. --- usr.sbin/config/config.y | 10 ++++++++++ usr.sbin/config/lang.l | 5 ++++- usr.sbin/config/main.c | 20 +++++++++++--------- usr.sbin/config/mkmakefile.c | 2 ++ usr.sbin/config/mkoptions.c | 10 ++++++++++ 5 files changed, 37 insertions(+), 10 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/config/config.y b/usr.sbin/config/config.y index 9425daf..19f2779 100644 --- a/usr.sbin/config/config.y +++ b/usr.sbin/config/config.y @@ -166,6 +166,8 @@ Config_spec: CPU Save_id { struct cputype *cp = (struct cputype *)calloc(1, sizeof (struct cputype)); + if (cp == NULL) + err(EXIT_FAILURE, "calloc"); cp->cpu_name = $2; SLIST_INSERT_HEAD(&cputype, cp, cpu_next); } | @@ -197,6 +199,8 @@ Config_spec: struct hint *hint; hint = (struct hint *)calloc(1, sizeof (struct hint)); + if (hint == NULL) + err(EXIT_FAILURE, "calloc"); hint->hint_name = $2; STAILQ_INSERT_TAIL(&hints, hint, hint_next); hintmode = 1; @@ -331,6 +335,8 @@ newfile(char *name) struct files_name *nl; nl = (struct files_name *) calloc(1, sizeof *nl); + if (nl == NULL) + err(EXIT_FAILURE, "calloc"); nl->f_name = name; STAILQ_INSERT_TAIL(&fntab, nl, f_next); } @@ -364,6 +370,8 @@ newdev(char *name) } np = (struct device *) calloc(1, sizeof *np); + if (np == NULL) + err(EXIT_FAILURE, "calloc"); np->d_name = name; STAILQ_INSERT_TAIL(&dtab, np, d_next); } @@ -422,6 +430,8 @@ newopt(struct opt_head *list, char *name, char *value, int append) } op = (struct opt *)calloc(1, sizeof (struct opt)); + if (op == NULL) + err(EXIT_FAILURE, "calloc"); op->op_name = name; op->op_ownfile = 0; op->op_value = value; diff --git a/usr.sbin/config/lang.l b/usr.sbin/config/lang.l index 075f21f..4eb94da 100644 --- a/usr.sbin/config/lang.l +++ b/usr.sbin/config/lang.l @@ -33,6 +33,7 @@ #include #include +#include #include #include "y.tab.h" #include "config.h" @@ -220,6 +221,8 @@ cfgfile_add(const char *fname) struct cfgfile *cf; cf = calloc(1, sizeof(*cf)); + if (cf == NULL) + err(EXIT_FAILURE, "calloc"); assert(cf != NULL); asprintf(&cf->cfg_path, "%s", fname); STAILQ_INSERT_TAIL(&cfgfiles, cf, cfg_next); @@ -285,7 +288,7 @@ include(const char *fname, int ateof) * Terminate the most recent inclusion. */ static int -endinclude() +endinclude(void) { struct incl *in; int ateof; diff --git a/usr.sbin/config/main.c b/usr.sbin/config/main.c index 34806ab..2b5e055 100644 --- a/usr.sbin/config/main.c +++ b/usr.sbin/config/main.c @@ -120,7 +120,7 @@ main(int argc, char **argv) if (*destdir == '\0') strlcpy(destdir, optarg, sizeof(destdir)); else - errx(2, "directory already set"); + errx(EXIT_FAILURE, "directory already set"); break; case 'g': debugging++; @@ -175,7 +175,7 @@ main(int argc, char **argv) if (mkdir(p, 0777)) err(2, "%s", p); } else if (!S_ISDIR(buf.st_mode)) - errx(2, "%s isn't a directory", p); + errx(EXIT_FAILURE, "%s isn't a directory", p); SLIST_INIT(&cputype); SLIST_INIT(&mkopt); @@ -256,7 +256,7 @@ get_srcdir(void) int i; if (realpath("../..", srcdir) == NULL) - errx(2, "Unable to find root of source tree"); + err(EXIT_FAILURE, "Unable to find root of source tree"); if ((pwd = getenv("PWD")) != NULL && *pwd == '/' && (pwd = strdup(pwd)) != NULL) { /* Remove the last two path components. */ @@ -513,7 +513,7 @@ configfile(void) } sbuf_finish(sb); /* - * We print first part of the tamplate, replace our tag with + * We print first part of the template, replace our tag with * configuration files content and later continue writing our * template. */ @@ -650,6 +650,8 @@ remember(const char *file) } } hl = calloc(1, sizeof(*hl)); + if (hl == NULL) + err(EXIT_FAILURE, "calloc"); hl->h_name = s; hl->h_next = htab; htab = hl; @@ -671,19 +673,19 @@ kernconfdump(const char *file) r = open(file, O_RDONLY); if (r == -1) - errx(EXIT_FAILURE, "Couldn't open file '%s'", file); + err(EXIT_FAILURE, "Couldn't open file '%s'", file); error = fstat(r, &st); if (error == -1) - errx(EXIT_FAILURE, "fstat() failed"); + err(EXIT_FAILURE, "fstat() failed"); if (S_ISDIR(st.st_mode)) errx(EXIT_FAILURE, "'%s' is a directory", file); fp = fdopen(r, "r"); if (fp == NULL) - errx(EXIT_FAILURE, "fdopen() failed"); + err(EXIT_FAILURE, "fdopen() failed"); osz = 1024; o = calloc(1, osz); if (o == NULL) - errx(EXIT_FAILURE, "Couldn't allocate memory"); + err(EXIT_FAILURE, "Couldn't allocate memory"); /* ELF note section header. */ asprintf(&cmd, "/usr/bin/elfdump -c %s | grep -A 5 kern_conf" "| tail -2 | cut -d ' ' -f 2 | paste - - -", file); @@ -703,7 +705,7 @@ kernconfdump(const char *file) "INCLUDE_CONFIG_FILE", file); r = fseek(fp, off, SEEK_CUR); if (r != 0) - errx(EXIT_FAILURE, "fseek() failed"); + err(EXIT_FAILURE, "fseek() failed"); for (i = 0; i < size - 1; i++) { r = fgetc(fp); if (r == EOF) diff --git a/usr.sbin/config/mkmakefile.c b/usr.sbin/config/mkmakefile.c index a89db8e..f0d37fa 100644 --- a/usr.sbin/config/mkmakefile.c +++ b/usr.sbin/config/mkmakefile.c @@ -98,6 +98,8 @@ new_fent(void) struct file_list *fp; fp = (struct file_list *) calloc(1, sizeof *fp); + if (fp == NULL) + err(EXIT_FAILURE, "calloc"); STAILQ_INSERT_TAIL(&ftab, fp, f_next); return (fp); } diff --git a/usr.sbin/config/mkoptions.c b/usr.sbin/config/mkoptions.c index c4bd624..5cd9d61 100644 --- a/usr.sbin/config/mkoptions.c +++ b/usr.sbin/config/mkoptions.c @@ -70,6 +70,8 @@ options(void) /* Fake the cpu types as options. */ SLIST_FOREACH(cp, &cputype, cpu_next) { op = (struct opt *)calloc(1, sizeof(*op)); + if (op == NULL) + err(EXIT_FAILURE, "calloc"); op->op_name = ns(cp->cpu_name); SLIST_INSERT_HEAD(&opt, op, op_next); } @@ -84,6 +86,8 @@ options(void) /* Fake MAXUSERS as an option. */ op = (struct opt *)calloc(1, sizeof(*op)); + if (op == NULL) + err(EXIT_FAILURE, "calloc"); op->op_name = ns("MAXUSERS"); snprintf(buf, sizeof(buf), "%d", maxusers); op->op_value = ns(buf); @@ -199,6 +203,8 @@ do_option(char *name) tidy++; } else { op = (struct opt *) calloc(1, sizeof *op); + if (op == NULL) + err(EXIT_FAILURE, "calloc"); op->op_name = inw; op->op_value = invalue; SLIST_INSERT_HEAD(&op_head, op, op_next); @@ -225,6 +231,8 @@ do_option(char *name) if (value && !seen) { /* New option appears */ op = (struct opt *) calloc(1, sizeof *op); + if (op == NULL) + err(EXIT_FAILURE, "calloc"); op->op_name = ns(name); op->op_value = value ? ns(value) : NULL; SLIST_INSERT_HEAD(&op_head, op, op_next); @@ -336,6 +344,8 @@ next: } po = (struct opt_list *) calloc(1, sizeof *po); + if (po == NULL) + err(EXIT_FAILURE, "calloc"); po->o_name = this; po->o_file = val; SLIST_INSERT_HEAD(&otab, po, o_next); -- cgit v1.1 From 9b85332a7712aa8858207f79bf399b2695fb2f72 Mon Sep 17 00:00:00 2001 From: imp Date: Sun, 18 Apr 2010 00:57:30 +0000 Subject: MFC r206664: Allow option aliasing. Lines of the form: OLD_OPT = NEW_OPT in options* files will now map OLD_OPT to NEW_OPT with a friendly message. This is indented for situations where we need to preserve an interface in the config file in an upwards compatible fashion on a stable branch. Reviewed by: nwhitehorn@ MFC after: 3 days --- usr.sbin/config/config.h | 2 ++ usr.sbin/config/mkoptions.c | 27 ++++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/config/config.h b/usr.sbin/config/config.h index d55c96b..ec19986 100644 --- a/usr.sbin/config/config.h +++ b/usr.sbin/config/config.h @@ -129,6 +129,8 @@ SLIST_HEAD(opt_head, opt) opt, mkopt, rmopts; struct opt_list { char *o_name; char *o_file; + int o_flags; +#define OL_ALIAS 1 SLIST_ENTRY(opt_list) o_next; }; diff --git a/usr.sbin/config/mkoptions.c b/usr.sbin/config/mkoptions.c index 5cd9d61..1a6ccc8 100644 --- a/usr.sbin/config/mkoptions.c +++ b/usr.sbin/config/mkoptions.c @@ -94,6 +94,17 @@ options(void) SLIST_INSERT_HEAD(&opt, op, op_next); read_options(); + SLIST_FOREACH(op, &opt, op_next) { + SLIST_FOREACH(ol, &otab, o_next) { + if (eq(op->op_name, ol->o_name) && + (ol->o_flags & OL_ALIAS)) { + printf("Mapping option %s to %s.\n", + op->op_name, ol->o_file); + op->op_name = ol->o_file; + break; + } + } + } SLIST_FOREACH(ol, &otab, o_next) do_option(ol->o_name); SLIST_FOREACH(op, &opt, op_next) { @@ -124,7 +135,6 @@ do_option(char *name) int tidy; file = tooption(name); - /* * Check to see if the option was specified.. */ @@ -292,6 +302,7 @@ read_options(void) struct opt_list *po; int first = 1; char genopt[MAXPATHLEN]; + int flags = 0; SLIST_INIT(&otab); (void) snprintf(fname, sizeof(fname), "../../conf/options"); @@ -301,6 +312,7 @@ openit: return; } next: + flags = 0; wd = get_word(fp); if (wd == (char *)EOF) { (void) fclose(fp); @@ -332,6 +344,18 @@ next: (void) snprintf(genopt, sizeof(genopt), "opt_%s.h", lower(s)); val = genopt; free(s); + } else if (eq(val, "=")) { + val = get_word(fp); + if (val == (char *)EOF) { + printf("%s: unexpected end of file\n", fname); + exit(1); + } + if (val == 0) { + printf("%s: Expected a right hand side at %s\n", fname, + this); + exit(1); + } + flags |= OL_ALIAS; } val = ns(val); @@ -348,6 +372,7 @@ next: err(EXIT_FAILURE, "calloc"); po->o_name = this; po->o_file = val; + po->o_flags = flags; SLIST_INSERT_HEAD(&otab, po, o_next); goto next; -- cgit v1.1 From d3b88ea404bdea318bcfe7e6cbf61db919c62c6a Mon Sep 17 00:00:00 2001 From: ume Date: Sun, 18 Apr 2010 04:15:21 +0000 Subject: MFC r206156, r206159, r206163: services_mkdb; generate db file from services(5) to increase speed of getserv*() --- usr.sbin/Makefile | 1 + usr.sbin/services_mkdb/Makefile | 10 + usr.sbin/services_mkdb/services_mkdb.8 | 97 ++++++++ usr.sbin/services_mkdb/services_mkdb.c | 429 +++++++++++++++++++++++++++++++++ usr.sbin/services_mkdb/uniq.c | 159 ++++++++++++ 5 files changed, 696 insertions(+) create mode 100644 usr.sbin/services_mkdb/Makefile create mode 100644 usr.sbin/services_mkdb/services_mkdb.8 create mode 100644 usr.sbin/services_mkdb/services_mkdb.c create mode 100644 usr.sbin/services_mkdb/uniq.c (limited to 'usr.sbin') diff --git a/usr.sbin/Makefile b/usr.sbin/Makefile index 5df3ca2..4cb5709 100644 --- a/usr.sbin/Makefile +++ b/usr.sbin/Makefile @@ -168,6 +168,7 @@ SUBDIR= ${_ac} \ ${_sade} \ ${_sendmail} \ service \ + services_mkdb \ setfib \ setfmac \ setpmac \ diff --git a/usr.sbin/services_mkdb/Makefile b/usr.sbin/services_mkdb/Makefile new file mode 100644 index 0000000..659cdb8 --- /dev/null +++ b/usr.sbin/services_mkdb/Makefile @@ -0,0 +1,10 @@ +# $FreeBSD$ + +PROG= services_mkdb +MAN= services_mkdb.8 +SRCS= services_mkdb.c uniq.c + +DPADD+= ${LIBUTIL} +LDADD+= -lutil + +.include diff --git a/usr.sbin/services_mkdb/services_mkdb.8 b/usr.sbin/services_mkdb/services_mkdb.8 new file mode 100644 index 0000000..6468fd2 --- /dev/null +++ b/usr.sbin/services_mkdb/services_mkdb.8 @@ -0,0 +1,97 @@ +.\" $NetBSD: services_mkdb.8,v 1.9 2009/05/13 22:36:39 wiz Exp $ +.\" +.\" Copyright (c) 1999 The NetBSD Foundation, Inc. +.\" All rights reserved. +.\" +.\" This code is derived from software contributed to The NetBSD Foundation +.\" by Luke Mewburn. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd April 4, 2010 +.Dt SERVICES_MKDB 8 +.Os +.Sh NAME +.Nm services_mkdb +.Nd generate the services databases +.Sh SYNOPSIS +.Nm +.Op Fl q +.Op Fl o Ar database +.Op file +.Nm +.Fl u +.Op file +.Sh DESCRIPTION +.Nm +creates a +.Xr db 3 +database for the specified file. +If no file is specified, then +.Pa /etc/services +is used. +The database is installed into +.Pa /var/db/services.db . +The file must be in the correct format (see +.Xr services 5 ) . +.Pp +The options are as follows: +.Bl -tag -width indent +.It Fl o Ar database +Put the output databases in the named file. +.It Fl q +Don't warn about duplicate services. +.It Fl u +Print the services file to stdout, omitting duplicate entries and comments. +.El +.Pp +The databases are used by the C library services routines (see +.Xr getservent 3 ) . +.Pp +.Nm +exits zero on success, non-zero on failure. +.Sh FILES +.Bl -tag -width 24n -compact +.It Pa /var/db/services.db +The current services database. +.It Pa /var/db/services.db.tmp +A temporary file. +.It Pa /etc/services +The current services file. +.El +.Sh SEE ALSO +.Xr db 3 , +.Xr getservent 3 , +.Xr services 5 +.Sh BUGS +Because +.Nm +guarantees not to install a partial destination file it must +build a temporary file in the same file system and if successful use +.Xr rename 2 +to install over the destination file. +.Pp +If +.Nm +fails it will leave the previous version of the destination file intact. diff --git a/usr.sbin/services_mkdb/services_mkdb.c b/usr.sbin/services_mkdb/services_mkdb.c new file mode 100644 index 0000000..f4cf62a --- /dev/null +++ b/usr.sbin/services_mkdb/services_mkdb.c @@ -0,0 +1,429 @@ +/* $NetBSD: services_mkdb.c,v 1.14 2008/04/28 20:24:17 martin Exp $ */ + +/*- + * Copyright (c) 1999 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Luke Mewburn and Christos Zoulas. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static char tname[MAXPATHLEN]; + +#define PMASK 0xffff +#define PROTOMAX 5 + +extern void uniq(const char *); + +static void add(DB *, StringList *, size_t, const char *, size_t *, int); +static StringList ***parseservices(const char *, StringList *); +static void cleanup(void); +static void store(DB *, DBT *, DBT *, int); +static void killproto(DBT *); +static char *getstring(const char *, size_t, char **, const char *); +static size_t getprotoindex(StringList *, const char *); +static const char *getprotostr(StringList *, size_t); +static const char *mkaliases(StringList *, char *, size_t); +static void usage(void); + +const HASHINFO hinfo = { + .bsize = 256, + .ffactor = 4, + .nelem = 32768, + .cachesize = 1024, + .hash = NULL, + .lorder = 0 +}; + + +int +main(int argc, char *argv[]) +{ + DB *db; + int ch; + const char *fname = _PATH_SERVICES; + const char *dbname = _PATH_SERVICES_DB; + int warndup = 1; + int unique = 0; + int otherflag = 0; + size_t cnt = 0; + StringList *sl, ***svc; + size_t port, proto; + + setprogname(argv[0]); + + while ((ch = getopt(argc, argv, "qo:u")) != -1) + switch (ch) { + case 'q': + otherflag = 1; + warndup = 0; + break; + case 'o': + otherflag = 1; + dbname = optarg; + break; + case 'u': + unique++; + break; + case '?': + default: + usage(); + } + + argc -= optind; + argv += optind; + + if (argc > 1 || (unique && otherflag)) + usage(); + if (argc == 1) + fname = argv[0]; + + if (unique) + uniq(fname); + + svc = parseservices(fname, sl = sl_init()); + + if (atexit(cleanup)) + err(1, "Cannot install exit handler"); + + (void)snprintf(tname, sizeof(tname), "%s.tmp", dbname); + db = dbopen(tname, O_RDWR | O_CREAT | O_EXCL, + (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH), DB_HASH, &hinfo); + if (!db) + err(1, "Error opening temporary database `%s'", tname); + + + for (port = 0; port < PMASK + 1; port++) { + if (svc[port] == NULL) + continue; + + for (proto = 0; proto < PROTOMAX; proto++) { + StringList *s; + if ((s = svc[port][proto]) == NULL) + continue; + add(db, s, port, getprotostr(sl, proto), &cnt, warndup); + } + + free(svc[port]); + } + + free(svc); + sl_free(sl, 1); + + if ((db->close)(db)) + err(1, "Error closing temporary database `%s'", tname); + + if (rename(tname, dbname) == -1) + err(1, "Cannot rename `%s' to `%s'", tname, dbname); + + return 0; +} + +static void +add(DB *db, StringList *sl, size_t port, const char *proto, size_t *cnt, + int warndup) +{ + size_t i; + char keyb[BUFSIZ], datab[BUFSIZ], abuf[BUFSIZ]; + DBT data, key; + key.data = keyb; + data.data = datab; + +#ifdef DEBUG + (void)printf("add %s %zu %s [ ", sl->sl_str[0], port, proto); + for (i = 1; i < sl->sl_cur; i++) + (void)printf("%s ", sl->sl_str[i]); + (void)printf("]\n"); +#endif + + /* key `indirect key', data `full line' */ + data.size = snprintf(datab, sizeof(datab), "%zu", (*cnt)++) + 1; + key.size = snprintf(keyb, sizeof(keyb), "%s %zu/%s %s", + sl->sl_str[0], port, proto, mkaliases(sl, abuf, sizeof(abuf))) + 1; + store(db, &data, &key, warndup); + + /* key `\377port/proto', data = `indirect key' */ + key.size = snprintf(keyb, sizeof(keyb), "\377%zu/%s", + port, proto) + 1; + store(db, &key, &data, warndup); + + /* key `\377port', data = `indirect key' */ + killproto(&key); + store(db, &key, &data, warndup); + + /* add references for service and all aliases */ + for (i = 0; i < sl->sl_cur; i++) { + /* key `\376service/proto', data = `indirect key' */ + key.size = snprintf(keyb, sizeof(keyb), "\376%s/%s", + sl->sl_str[i], proto) + 1; + store(db, &key, &data, warndup); + + /* key `\376service', data = `indirect key' */ + killproto(&key); + store(db, &key, &data, warndup); + } + sl_free(sl, 1); +} + +static StringList *** +parseservices(const char *fname, StringList *sl) +{ + size_t len, line, pindex; + FILE *fp; + StringList ***svc, *s; + char *p, *ep; + + if ((fp = fopen(fname, "r")) == NULL) + err(1, "Cannot open `%s'", fname); + + line = 0; + if ((svc = calloc(PMASK + 1, sizeof(StringList **))) == NULL) + err(1, "Cannot allocate %zu bytes", (size_t)(PMASK + 1)); + + /* XXX: change NULL to "\0\0#" when fparseln fixed */ + for (; (p = fparseln(fp, &len, &line, NULL, 0)) != NULL; free(p)) { + char *name, *port, *proto, *aliases, *cp, *alias; + unsigned long pnum; + + if (len == 0) + continue; + + for (cp = p; *cp && isspace((unsigned char)*cp); cp++) + continue; + + if (*cp == '\0' || *cp == '#') + continue; + + if ((name = getstring(fname, line, &cp, "name")) == NULL) + continue; + + if ((port = getstring(fname, line, &cp, "port")) == NULL) + continue; + + if (cp) { + for (aliases = cp; *cp && *cp != '#'; cp++) + continue; + + if (*cp) + *cp = '\0'; + } else + aliases = NULL; + + proto = strchr(port, '/'); + if (proto == NULL || proto[1] == '\0') { + warnx("%s, %zu: no protocol found", fname, line); + continue; + } + *proto++ = '\0'; + + errno = 0; + pnum = strtoul(port, &ep, 0); + if (*port == '\0' || *ep != '\0') { + warnx("%s, %zu: invalid port `%s'", fname, line, port); + continue; + } + if ((errno == ERANGE && pnum == ULONG_MAX) || pnum > PMASK) { + warnx("%s, %zu: port too big `%s'", fname, line, port); + continue; + } + + if (svc[pnum] == NULL) { + svc[pnum] = calloc(PROTOMAX, sizeof(StringList *)); + if (svc[pnum] == NULL) + err(1, "Cannot allocate %zu bytes", + (size_t)PROTOMAX); + } + + pindex = getprotoindex(sl, proto); + if (svc[pnum][pindex] == NULL) + s = svc[pnum][pindex] = sl_init(); + else + s = svc[pnum][pindex]; + + /* build list of aliases */ + if (sl_find(s, name) == NULL) { + char *p2; + + if ((p2 = strdup(name)) == NULL) + err(1, "Cannot copy string"); + (void)sl_add(s, p2); + } + + if (aliases) { + while ((alias = strsep(&aliases, " \t")) != NULL) { + if (alias[0] == '\0') + continue; + if (sl_find(s, alias) == NULL) { + char *p2; + + if ((p2 = strdup(alias)) == NULL) + err(1, "Cannot copy string"); + (void)sl_add(s, p2); + } + } + } + } + (void)fclose(fp); + return svc; +} + +/* + * cleanup(): Remove temporary files upon exit + */ +static void +cleanup(void) +{ + if (tname[0]) + (void)unlink(tname); +} + +static char * +getstring(const char *fname, size_t line, char **cp, const char *tag) +{ + char *str; + + while ((str = strsep(cp, " \t")) != NULL && *str == '\0') + continue; + + if (str == NULL) + warnx("%s, %zu: no %s found", fname, line, tag); + + return str; +} + +static void +killproto(DBT *key) +{ + char *p, *d = key->data; + + if ((p = strchr(d, '/')) == NULL) + abort(); + *p++ = '\0'; + key->size = p - d; +} + +static void +store(DB *db, DBT *key, DBT *data, int warndup) +{ +#ifdef DEBUG + int k = key->size - 1; + int d = data->size - 1; + (void)printf("store [%*.*s] [%*.*s]\n", + k, k, (char *)key->data + 1, + d, d, (char *)data->data + 1); +#endif + switch ((db->put)(db, key, data, R_NOOVERWRITE)) { + case 0: + break; + case 1: + if (warndup) + warnx("duplicate service `%s'", + &((char *)key->data)[1]); + break; + case -1: + err(1, "put"); + break; + default: + abort(); + break; + } +} + +static size_t +getprotoindex(StringList *sl, const char *str) +{ + size_t i; + char *p; + + for (i= 0; i < sl->sl_cur; i++) + if (strcmp(sl->sl_str[i], str) == 0) + return i; + + if (i == PROTOMAX) + errx(1, "Ran out of protocols adding `%s';" + " recompile with larger PROTOMAX", str); + if ((p = strdup(str)) == NULL) + err(1, "Cannot copy string"); + (void)sl_add(sl, p); + return i; +} + +static const char * +getprotostr(StringList *sl, size_t i) +{ + assert(i < sl->sl_cur); + return sl->sl_str[i]; +} + +static const char * +mkaliases(StringList *sl, char *buf, size_t len) +{ + size_t nc, i, pos; + + buf[0] = 0; + for (i = 1, pos = 0; i < sl->sl_cur; i++) { + nc = strlcpy(buf + pos, sl->sl_str[i], len); + if (nc >= len) + goto out; + pos += nc; + len -= nc; + nc = strlcpy(buf + pos, " ", len); + if (nc >= len) + goto out; + pos += nc; + len -= nc; + } + return buf; +out: + warn("aliases for `%s' truncated", sl->sl_str[0]); + return buf; +} + +static void +usage(void) +{ + (void)fprintf(stderr, "Usage:\t%s [-q] [-o ] []\n" + "\t%s -u []\n", getprogname(), getprogname()); + exit(1); +} diff --git a/usr.sbin/services_mkdb/uniq.c b/usr.sbin/services_mkdb/uniq.c new file mode 100644 index 0000000..0674b4b --- /dev/null +++ b/usr.sbin/services_mkdb/uniq.c @@ -0,0 +1,159 @@ +/* $NetBSD: uniq.c,v 1.4 2008/04/28 20:24:17 martin Exp $ */ + +/*- + * Copyright (c) 2007 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Christos Zoulas. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include + +extern const HASHINFO hinfo; + +void uniq(const char *); +static int comp(const char *, char **, size_t *); + +/* + * Preserve only unique content lines in a file. Input lines that have + * content [alphanumeric characters before a comment] are white-space + * normalized and have their comments removed. Then they are placed + * in a hash table, and only the first instance of them is printed. + * Comment lines without any alphanumeric content are always printed + * since they are there to make the file "pretty". Comment lines with + * alphanumeric content are also placed into the hash table and only + * printed once. + */ +void +uniq(const char *fname) +{ + DB *db; + DBT key; + static const DBT data = { NULL, 0 }; + FILE *fp; + char *line; + size_t len; + + if ((db = dbopen(NULL, O_RDWR, 0, DB_HASH, &hinfo)) == NULL) + err(1, "Cannot create in memory database"); + + if ((fp = fopen(fname, "r")) == NULL) + err(1, "Cannot open `%s'", fname); + while ((line = fgetln(fp, &len)) != NULL) { + size_t complen = len; + char *compline; + if (!comp(line, &compline, &complen)) { + (void)fprintf(stdout, "%*.*s", (int)len, (int)len, + line); + continue; + } + key.data = compline; + key.size = complen; + switch ((db->put)(db, &key, &data, R_NOOVERWRITE)) { + case 0: + (void)fprintf(stdout, "%*.*s", (int)len, (int)len, + line); + break; + case 1: + break; + case -1: + err(1, "put"); + default: + abort(); + break; + } + } + (void)fflush(stdout); + exit(0); +} + +/* + * normalize whitespace in the original line and place a new string + * with whitespace converted to a single space in compline. If the line + * contains just comments, we preserve them. If it contains data and + * comments, we kill the comments. Return 1 if the line had actual + * contents, or 0 if it was just a comment without alphanumeric characters. + */ +static int +comp(const char *origline, char **compline, size_t *len) +{ + const unsigned char *p; + unsigned char *q; + char *cline; + size_t l = *len, complen; + int hasalnum, iscomment; + + /* Eat leading space */ + for (p = (const unsigned char *)origline; l && *p && isspace(*p); + p++, l--) + continue; + if ((cline = malloc(l + 1)) == NULL) + err(1, "Cannot allocate %zu bytes", l + 1); + (void)memcpy(cline, p, l); + cline[l] = '\0'; + if (*cline == '\0') + return 0; + + complen = 0; + hasalnum = 0; + iscomment = 0; + + for (q = (unsigned char *)cline; l && *p; p++, l--) { + if (isspace(*p)) { + if (complen && isspace(q[-1])) + continue; + *q++ = ' '; + complen++; + } else { + if (!iscomment && *p == '#') { + if (hasalnum) + break; + iscomment = 1; + } else + hasalnum |= isalnum(*p); + *q++ = *p; + complen++; + } + } + + /* Eat trailing space */ + while (complen && isspace(q[-1])) { + --q; + --complen; + } + *q = '\0'; + *compline = cline; + *len = complen; + return hasalnum; +} -- cgit v1.1 From a3b0dae83c586623a33e678409ac4f933e6c7f6b Mon Sep 17 00:00:00 2001 From: weongyo Date: Tue, 20 Apr 2010 22:55:07 +0000 Subject: MFC r204328: Add bwn(4) driver. --- usr.sbin/sysinstall/devices.c | 1 + 1 file changed, 1 insertion(+) (limited to 'usr.sbin') diff --git a/usr.sbin/sysinstall/devices.c b/usr.sbin/sysinstall/devices.c index b95fdc5..a2b7939 100644 --- a/usr.sbin/sysinstall/devices.c +++ b/usr.sbin/sysinstall/devices.c @@ -105,6 +105,7 @@ static struct _devname { NETWORK("bfe", "Broadcom BCM440x PCI Ethernet card"), NETWORK("bge", "Broadcom BCM570x PCI Gigabit Ethernet card"), NETWORK("bm", "Apple BMAC Built-in Ethernet"), + NETWORK("bwn", "Broadcom BCM43xx IEEE 802.11 wireless adapter"), NETWORK("cas", "Sun Cassini/Cassini+ or NS DP83065 Saturn Ethernet"), NETWORK("cue", "CATC USB Ethernet adapter"), NETWORK("cxgb", "Chelsio T3 10Gb Ethernet card"), -- cgit v1.1 From 58a69d8476b15590c0dcfe37a06a5004b3babf4b Mon Sep 17 00:00:00 2001 From: maxim Date: Wed, 21 Apr 2010 05:35:06 +0000 Subject: MFC r205671: trim leading w/space. --- usr.sbin/sysinstall/sysinstall.8 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/sysinstall/sysinstall.8 b/usr.sbin/sysinstall/sysinstall.8 index 46a14fc..66ab552 100644 --- a/usr.sbin/sysinstall/sysinstall.8 +++ b/usr.sbin/sysinstall/sysinstall.8 @@ -543,7 +543,7 @@ Commit any rc.conf changes to disk. Preserve existing rc.conf parameters. This is useful if you have a post-install script which modifies rc.conf. .El - .It installExpress +.It installExpress Start an "express" installation, asking few questions of the user. .Pp -- cgit v1.1 From 8c76fc5afe55b8f9d7e494362d08b3e45f669f1f Mon Sep 17 00:00:00 2001 From: maxim Date: Wed, 21 Apr 2010 05:38:16 +0000 Subject: MFC r205873: remove duplication, improve wording. --- usr.sbin/mtree/mtree.8 | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/mtree/mtree.8 b/usr.sbin/mtree/mtree.8 index f0a93ed..119481b 100644 --- a/usr.sbin/mtree/mtree.8 +++ b/usr.sbin/mtree/mtree.8 @@ -41,9 +41,6 @@ .Op Fl f Ar spec .Ek .Bk -words -.Op Fl f Ar spec -.Ek -.Bk -words .Op Fl K Ar keywords .Ek .Bk -words @@ -119,7 +116,7 @@ Same as except a status of 2 is returned if the file hierarchy did not match the specification. .It Fl w -Make some errorconditions non-fatal warnings. +Make some errors non-fatal warnings. .It Fl x Do not descend below mount points in the file hierarchy. .It Fl f Ar file -- cgit v1.1 From 13325206c6f1da85d4846aa8ff4aa531a3a16409 Mon Sep 17 00:00:00 2001 From: fabient Date: Sun, 25 Apr 2010 16:16:43 +0000 Subject: MFC r206994: Apply threshold filter to root node in calltree view. --- usr.sbin/pmcstat/pmcpl_calltree.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/pmcstat/pmcpl_calltree.c b/usr.sbin/pmcstat/pmcpl_calltree.c index 4f62c62..f8ceece 100644 --- a/usr.sbin/pmcstat/pmcpl_calltree.c +++ b/usr.sbin/pmcstat/pmcpl_calltree.c @@ -499,9 +499,10 @@ void pmcpl_ct_topdisplay(void) { int i, x, y, pmcin; - struct pmcpl_ct_sample rsamples; + struct pmcpl_ct_sample r, *rsamples; - pmcpl_ct_samples_root(&rsamples); + rsamples = &r; + pmcpl_ct_samples_root(rsamples); PMCSTAT_PRINTW("%-10.10s %s\n", "IMAGE", "CALLTREE"); @@ -524,16 +525,20 @@ pmcpl_ct_topdisplay(void) if (PMCPL_CT_SAMPLE(pmcin, &pmcpl_ct_root->pct_arc[i].pcta_samples) == 0) continue; + if (PMCPL_CT_SAMPLEP(pmcin, + &pmcpl_ct_root->pct_arc[i].pcta_samples) <= + pmcstat_threshold) + continue; if (pmcpl_ct_node_dumptop(pmcin, pmcpl_ct_root->pct_arc[i].pcta_child, - &rsamples, x, &y, pmcstat_displayheight - 2)) { + rsamples, x, &y, pmcstat_displayheight - 2)) { break; } } - pmcpl_ct_node_printtop(&rsamples, pmcin, y); + pmcpl_ct_node_printtop(rsamples, pmcin, y); } - pmcpl_ct_samples_free(&rsamples); + pmcpl_ct_samples_free(rsamples); } /* -- cgit v1.1 From a6f6a37fc5f8652b5aff4d80b6543a4eafd5b944 Mon Sep 17 00:00:00 2001 From: imp Date: Sun, 25 Apr 2010 19:21:19 +0000 Subject: MFC r206915 Bump minor version of config to reflect the new option remapping feature. The kernel makefiles have specifically not been bumped because nothing uses this new feature and doing so forces everybody to recompile for no good reason. This chnage will be MFC'd where the kernel version numbers for amd64 and ia64 will be bumped, since those are the only two that have use the option remapping feature. Once merged, this will give a better error message to folks that are using buildkernel without buildworld or kernel-toolchain to update their kernels. MFC after: 3 days --- usr.sbin/config/configvers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/config/configvers.h b/usr.sbin/config/configvers.h index 28451585..73310bd 100644 --- a/usr.sbin/config/configvers.h +++ b/usr.sbin/config/configvers.h @@ -49,5 +49,5 @@ * * $FreeBSD$ */ -#define CONFIGVERS 600007 +#define CONFIGVERS 600008 #define MAJOR_VERS(x) ((x) / 100000) -- cgit v1.1 From d5f7aeaa6649a4adafad439e30fac6277206a68f Mon Sep 17 00:00:00 2001 From: yongari Date: Mon, 26 Apr 2010 17:27:08 +0000 Subject: MFC r206628: Add sge(4) to the list of supported network interface. --- usr.sbin/sysinstall/devices.c | 1 + 1 file changed, 1 insertion(+) (limited to 'usr.sbin') diff --git a/usr.sbin/sysinstall/devices.c b/usr.sbin/sysinstall/devices.c index a2b7939..8cf1fb3 100644 --- a/usr.sbin/sysinstall/devices.c +++ b/usr.sbin/sysinstall/devices.c @@ -150,6 +150,7 @@ static struct _devname { NETWORK("rue", "RealTek USB Ethernet card"), NETWORK("rum", "Ralink Technology USB IEEE 802.11 wireless adapter"), NETWORK("sf", "Adaptec AIC-6915 PCI Ethernet card"), + NETWORK("sge", "Silicon Integrated Systems SiS190/191 Ethernet"), NETWORK("sis", "SiS 900/SiS 7016 PCI Ethernet card"), #ifdef PC98 NETWORK("snc", "SONIC Ethernet card"), -- cgit v1.1 From 48dc09c70af0ed8a9d864482356a61d9a9a7f2f9 Mon Sep 17 00:00:00 2001 From: jkim Date: Mon, 26 Apr 2010 20:55:03 +0000 Subject: MFC: r204773 Merge ACPICA 20100304. MFC: r204874 Update module Makefile for ACPICA 20100304. MFC: r204877 Allow ACPI module build on amd64. Although we strongly recommend building it into kernel, there is no need to prevent it from building at all. MFC: r204916 - Allow users to enable dumping Debug objects without ACPI debugger. Setting the new sysctl MIB "debug.acpi.enable_debug_objects" to a non-zero value enables us to print Debug object when something is written to it. - Allow users to disable interpreter slack mode. Setting the new tunable "debug.acpi.interpreter_slack" to zero disables some workarounds for common BIOS mistakes and enables strict ACPI implementations by the specification. MFC: r204920 Since the interpreter slack mode is a tunable now, enable a local hack only when it is set. Note the default behaviour does not change by this change. MFC: r204965 Fix white spaces. MFC: r206117 Merge ACPICA 20100331 (and four additional upstream patches). --- usr.sbin/acpi/acpidb/Makefile | 10 +++++----- usr.sbin/acpi/iasl/Makefile | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/acpi/acpidb/Makefile b/usr.sbin/acpi/acpidb/Makefile index 5981f38..c2fe844 100644 --- a/usr.sbin/acpi/acpidb/Makefile +++ b/usr.sbin/acpi/acpidb/Makefile @@ -25,11 +25,11 @@ SRCS+= dsfield.c dsinit.c dsmethod.c dsmthdat.c dsobject.c \ dswstate.c # interpreter/executer -SRCS+= exconfig.c exconvrt.c excreate.c exdump.c exfield.c \ - exfldio.c exmisc.c exmutex.c exnames.c exoparg1.c \ - exoparg2.c exoparg3.c exoparg6.c exprep.c exregion.c \ - exresnte.c exresolv.c exresop.c exstore.c exstoren.c \ - exstorob.c exsystem.c exutils.c +SRCS+= exconfig.c exconvrt.c excreate.c exdebug.c exdump.c \ + exfield.c exfldio.c exmisc.c exmutex.c exnames.c \ + exoparg1.c exoparg2.c exoparg3.c exoparg6.c exprep.c \ + exregion.c exresnte.c exresolv.c exresop.c exstore.c \ + exstoren.c exstorob.c exsystem.c exutils.c # interpreter/parser SRCS+= psargs.c psloop.c psopcode.c psparse.c psscope.c \ diff --git a/usr.sbin/acpi/iasl/Makefile b/usr.sbin/acpi/iasl/Makefile index ff6ebd7..98b0c23 100644 --- a/usr.sbin/acpi/iasl/Makefile +++ b/usr.sbin/acpi/iasl/Makefile @@ -13,9 +13,9 @@ SRCS+= aslanalyze.c aslcodegen.c aslcompile.c aslcompiler.y.h \ aslcompilerlex.l aslcompilerparse.y aslerror.c \ aslfiles.c aslfold.c asllength.c asllisting.c \ aslload.c asllookup.c aslmain.c aslmap.c aslopcodes.c \ - asloperands.c aslopt.c aslresource.c aslrestype1.c \ - aslrestype2.c aslstartup.c aslstubs.c asltransform.c \ - asltree.c aslutils.c + asloperands.c aslopt.c aslpredef.c aslresource.c \ + aslrestype1.c aslrestype2.c aslstartup.c aslstubs.c \ + asltransform.c asltree.c aslutils.c # debugger SRCS+= dbfileio.c -- cgit v1.1 From fcff1c0ce3ea2794d587009e3d79129cf152cd13 Mon Sep 17 00:00:00 2001 From: delphij Date: Thu, 29 Apr 2010 00:28:34 +0000 Subject: MFC r205938: Sync with OpenBSD: - avoid coredump when there's only one token on a line; - Use calloc(); - Remove a line inherited from example mdoc. Obtained from: OpenBSD --- usr.sbin/mailwrapper/mailwrapper.8 | 3 +-- usr.sbin/mailwrapper/mailwrapper.c | 8 ++++---- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/mailwrapper/mailwrapper.8 b/usr.sbin/mailwrapper/mailwrapper.8 index 587826b..6e498f6 100644 --- a/usr.sbin/mailwrapper/mailwrapper.8 +++ b/usr.sbin/mailwrapper/mailwrapper.8 @@ -1,5 +1,5 @@ +.\" $OpenBSD: mailwrapper.8,v 1.10 2009/02/07 16:58:23 martynas Exp $ .\" $NetBSD: mailwrapper.8,v 1.11 2002/02/08 01:38:50 ross Exp $ -.\" $OpenBSD: mailwrapper.8,v 1.8 2003/06/12 12:59:51 jmc Exp $ .\" $FreeBSD$ .\" .\" Copyright (c) 1998 @@ -31,7 +31,6 @@ .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .\" -.\" The following requests are required for all man pages. .Dd August 7, 2006 .Dt MAILWRAPPER 8 .Os diff --git a/usr.sbin/mailwrapper/mailwrapper.c b/usr.sbin/mailwrapper/mailwrapper.c index d696d25..1b52a64 100644 --- a/usr.sbin/mailwrapper/mailwrapper.c +++ b/usr.sbin/mailwrapper/mailwrapper.c @@ -1,4 +1,4 @@ -/* $OpenBSD: mailwrapper.c,v 1.16 2004/07/06 03:38:14 millert Exp $ */ +/* $OpenBSD: mailwrapper.c,v 1.18 2007/11/06 14:39:19 otto Exp $ */ /* $NetBSD: mailwrapper.c,v 1.9 2003/03/09 08:10:43 mjl Exp $ */ /* @@ -61,8 +61,8 @@ initarg(struct arglist *al) { al->argc = 0; al->maxc = 10; - if ((al->argv = malloc(al->maxc * sizeof(char *))) == NULL) - err(EX_TEMPFAIL, "malloc"); + if ((al->argv = calloc(al->maxc, sizeof(char *))) == NULL) + err(EX_TEMPFAIL, "calloc"); } static void @@ -126,7 +126,7 @@ main(int argc, char *argv[], char *envp[]) continue; } - if ((from = strsep(&cp, WS)) == NULL) + if ((from = strsep(&cp, WS)) == NULL || cp == NULL) goto parse_error; cp += strspn(cp, WS); -- cgit v1.1 From 31fdf3079cf534f6870332a1f2180eb525f07ca2 Mon Sep 17 00:00:00 2001 From: imp Date: Sun, 2 May 2010 06:14:36 +0000 Subject: MFC r207260: Move checking the version up from Makefile generation to just after we've parsed the config file. Makefile generation is too late if we've introduce changes to the syntax of the metafiles to warn about version skew, since we have to try to parse them and we get an parse error that's rather baffling to the user rather than a 'your config is too old, upgrade' which we should get. We have to defer doing it until after we've read the user's config file because we define machinename there. The version required to compile the kernel is encoded in Makefile.machinename. There's no real reason for this to be the case, but changing it now would introduce some logistical issues that I'd rather avoid for the moment. I intend to revisit this if we're still using config in FreeBSD 10. This also means that we cannot introduce any config metafile changes that result in a syntax error or other error for the user until 9.0 is released. Otherwise, we break the upgrade path, or at least reduce the usefulness of the error messages we generate. # This implies that the config file option mapping will need to be redone. --- usr.sbin/config/config.h | 1 + usr.sbin/config/main.c | 40 +++++++++++++++++++++++++++++++++++++++ usr.sbin/config/mkmakefile.c | 45 +++++++++++++++++++++----------------------- 3 files changed, 62 insertions(+), 24 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/config/config.h b/usr.sbin/config/config.h index ec19986..4abb567 100644 --- a/usr.sbin/config/config.h +++ b/usr.sbin/config/config.h @@ -179,6 +179,7 @@ void makehints(void); void headers(void); void cfgfile_add(const char *); void cfgfile_removeall(void); +FILE *open_makefile_template(void); extern STAILQ_HEAD(device_head, device) dtab; diff --git a/usr.sbin/config/main.c b/usr.sbin/config/main.c index 2b5e055..5cffed2 100644 --- a/usr.sbin/config/main.c +++ b/usr.sbin/config/main.c @@ -90,6 +90,7 @@ static void get_srcdir(void); static void usage(void); static void cleanheaders(char *); static void kernconfdump(const char *); +static void checkversion(void); struct hdr_list { char *h_name; @@ -204,6 +205,7 @@ main(int argc, char **argv) printf("cpu type must be specified\n"); exit(1); } + checkversion(); /* * make symbolic links in compilation directory @@ -721,3 +723,41 @@ kernconfdump(const char *file) } fclose(fp); } + +static void +badversion(int versreq) +{ + fprintf(stderr, "ERROR: version of config(8) does not match kernel!\n"); + fprintf(stderr, "config version = %d, ", CONFIGVERS); + fprintf(stderr, "version required = %d\n\n", versreq); + fprintf(stderr, "Make sure that /usr/src/usr.sbin/config is in sync\n"); + fprintf(stderr, "with your /usr/src/sys and install a new config binary\n"); + fprintf(stderr, "before trying this again.\n\n"); + fprintf(stderr, "If running the new config fails check your config\n"); + fprintf(stderr, "file against the GENERIC or LINT config files for\n"); + fprintf(stderr, "changes in config syntax, or option/device naming\n"); + fprintf(stderr, "conventions\n\n"); + exit(1); +} + +static void +checkversion(void) +{ + FILE *ifp; + char line[BUFSIZ]; + int versreq; + + ifp = open_makefile_template(); + while (fgets(line, BUFSIZ, ifp) != 0) { + if (*line != '%') + continue; + if (strncmp(line, "%VERSREQ=", 9) != 0) + continue; + versreq = atoi(line + 9); + if (MAJOR_VERS(versreq) == MAJOR_VERS(CONFIGVERS) && + versreq <= CONFIGVERS) + continue; + badversion(versreq); + } + fclose(ifp); +} diff --git a/usr.sbin/config/mkmakefile.c b/usr.sbin/config/mkmakefile.c index f0d37fa..844364c 100644 --- a/usr.sbin/config/mkmakefile.c +++ b/usr.sbin/config/mkmakefile.c @@ -105,17 +105,14 @@ new_fent(void) } /* - * Build the makefile from the skeleton + * Open the correct Makefile and return it, or error out. */ -void -makefile(void) +FILE * +open_makefile_template(void) { - FILE *ifp, *ofp; + FILE *ifp; char line[BUFSIZ]; - struct opt *op, *t; - int versreq; - read_files(); snprintf(line, sizeof(line), "../../conf/Makefile.%s", machinename); ifp = fopen(line, "r"); if (ifp == 0) { @@ -124,7 +121,21 @@ makefile(void) } if (ifp == 0) err(1, "%s", line); + return (ifp); +} +/* + * Build the makefile from the skeleton + */ +void +makefile(void) +{ + FILE *ifp, *ofp; + char line[BUFSIZ]; + struct opt *op, *t; + + read_files(); + ifp = open_makefile_template(); ofp = fopen(path("Makefile.new"), "w"); if (ofp == 0) err(1, "%s", path("Makefile.new")); @@ -156,23 +167,9 @@ makefile(void) do_rules(ofp); else if (eq(line, "%CLEAN\n")) do_clean(ofp); - else if (strncmp(line, "%VERSREQ=", sizeof("%VERSREQ=") - 1) == 0) { - versreq = atoi(line + sizeof("%VERSREQ=") - 1); - if (MAJOR_VERS(versreq) != MAJOR_VERS(CONFIGVERS) || - versreq > CONFIGVERS) { - fprintf(stderr, "ERROR: version of config(8) does not match kernel!\n"); - fprintf(stderr, "config version = %d, ", CONFIGVERS); - fprintf(stderr, "version required = %d\n\n", versreq); - fprintf(stderr, "Make sure that /usr/src/usr.sbin/config is in sync\n"); - fprintf(stderr, "with your /usr/src/sys and install a new config binary\n"); - fprintf(stderr, "before trying this again.\n\n"); - fprintf(stderr, "If running the new config fails check your config\n"); - fprintf(stderr, "file against the GENERIC or LINT config files for\n"); - fprintf(stderr, "changes in config syntax, or option/device naming\n"); - fprintf(stderr, "conventions\n\n"); - exit(1); - } - } else + else if (strncmp(line, "%VERSREQ=", 9) == 0) + line[0] = '\0'; /* handled elsewhere */ + else fprintf(stderr, "Unknown %% construct in generic makefile: %s", line); -- cgit v1.1 From 2e57b92d26240c087250419358b425a8a8ca89bf Mon Sep 17 00:00:00 2001 From: imp Date: Sun, 2 May 2010 06:18:08 +0000 Subject: MFC r207263: Redo how we add compat options so as to be compatible with old versions of config. Remove support for the syntax OLD = NEW form the options file, and instead have a new file $S/conf/options-compat. This file will be parsed as OLD NEW on each line. Bump version of config. Since nothing in -current ever used this, there's no hazards for current users, so I'm not bumping the version in the Makefiles.$MACHINE. No need, really, for this version bump in -current, but this was introduced into -stable before I realized the version check was ineffective there, so the verison bump doesn't hurt here and keeps the two branches in sync, versionwise, after the MFC. --- usr.sbin/config/configvers.h | 2 +- usr.sbin/config/mkoptions.c | 157 ++++++++++++++++++++++--------------------- 2 files changed, 81 insertions(+), 78 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/config/configvers.h b/usr.sbin/config/configvers.h index 73310bd..2d43329 100644 --- a/usr.sbin/config/configvers.h +++ b/usr.sbin/config/configvers.h @@ -49,5 +49,5 @@ * * $FreeBSD$ */ -#define CONFIGVERS 600008 +#define CONFIGVERS 600009 #define MAJOR_VERS(x) ((x) / 100000) diff --git a/usr.sbin/config/mkoptions.c b/usr.sbin/config/mkoptions.c index 1a6ccc8..846e028 100644 --- a/usr.sbin/config/mkoptions.c +++ b/usr.sbin/config/mkoptions.c @@ -166,7 +166,7 @@ do_option(char *name) fprintf(outf, "#define %s %s\n", name, value); } /* else empty file */ - (void) fclose(outf); + (void)fclose(outf); return; } basefile = ""; @@ -225,7 +225,7 @@ do_option(char *name) if (cp == (char *)EOF) break; } - (void) fclose(inf); + (void)fclose(inf); if (!tidy && ((value == NULL && oldvalue == NULL) || (value && oldvalue && eq(value, oldvalue)))) { while (!SLIST_EMPTY(&op_head)) { @@ -263,7 +263,7 @@ do_option(char *name) free(op->op_value); free(op); } - (void) fclose(outf); + (void)fclose(outf); } /* @@ -277,7 +277,7 @@ tooption(char *name) struct opt_list *po; /* "cannot happen"? the otab list should be complete.. */ - (void) strlcpy(nbuf, "options.h", sizeof(nbuf)); + (void)strlcpy(nbuf, "options.h", sizeof(nbuf)); SLIST_FOREACH(po, &otab, o_next) { if (eq(po->o_name, name)) { @@ -286,96 +286,99 @@ tooption(char *name) } } - (void) strlcpy(hbuf, path(nbuf), sizeof(hbuf)); + (void)strlcpy(hbuf, path(nbuf), sizeof(hbuf)); return (hbuf); } -/* - * read the options and options. files - */ + static void -read_options(void) +insert_option(char *this, char *val, int flags) +{ + struct opt_list *po; + + po = (struct opt_list *) calloc(1, sizeof *po); + if (po == NULL) + err(EXIT_FAILURE, "calloc"); + po->o_name = this; + po->o_file = val; + po->o_flags = flags; + SLIST_INSERT_HEAD(&otab, po, o_next); +} + + +static void +check_duplicate(const char *fname, const char *this) +{ + struct opt_list *po; + + SLIST_FOREACH(po, &otab, o_next) { + if (eq(po->o_name, this)) { + printf("%s: Duplicate option %s.\n", + fname, this); + exit(1); + } + } +} + +static int +read_option_file(const char *fname, int flags) { FILE *fp; - char fname[MAXPATHLEN]; char *wd, *this, *val; - struct opt_list *po; - int first = 1; char genopt[MAXPATHLEN]; - int flags = 0; - SLIST_INIT(&otab); - (void) snprintf(fname, sizeof(fname), "../../conf/options"); -openit: fp = fopen(fname, "r"); - if (fp == 0) { - return; - } -next: - flags = 0; - wd = get_word(fp); - if (wd == (char *)EOF) { - (void) fclose(fp); - if (first == 1) { - first++; - (void) snprintf(fname, sizeof fname, "../../conf/options.%s", machinename); - fp = fopen(fname, "r"); - if (fp != 0) - goto next; - (void) snprintf(fname, sizeof fname, "options.%s", machinename); - goto openit; + if (fp == 0) + return (0); + while ((wd = get_word(fp)) != (char *)EOF) { + if (wd == 0) + continue; + if (wd[0] == '#') { + while (((wd = get_word(fp)) != (char *)EOF) && wd) + continue; + continue; } - return; - } - if (wd == 0) - goto next; - if (wd[0] == '#') - { - while (((wd = get_word(fp)) != (char *)EOF) && wd) - ; - goto next; - } - this = ns(wd); - val = get_word(fp); - if (val == (char *)EOF) - return; - if (val == 0) { - char *s = ns(this); - (void) snprintf(genopt, sizeof(genopt), "opt_%s.h", lower(s)); - val = genopt; - free(s); - } else if (eq(val, "=")) { + this = ns(wd); val = get_word(fp); - if (val == (char *)EOF) { - printf("%s: unexpected end of file\n", fname); - exit(1); - } + if (val == (char *)EOF) + return (1); if (val == 0) { - printf("%s: Expected a right hand side at %s\n", fname, - this); - exit(1); + if (flags) { + printf("%s: compat file requires two words " + "per line at %s\n", fname, this); + exit(1); + } + char *s = ns(this); + (void)snprintf(genopt, sizeof(genopt), "opt_%s.h", + lower(s)); + val = genopt; + free(s); } - flags |= OL_ALIAS; + val = ns(val); + check_duplicate(fname, this); + insert_option(this, val, flags); } - val = ns(val); + (void)fclose(fp); + return (1); +} - SLIST_FOREACH(po, &otab, o_next) { - if (eq(po->o_name, this)) { - printf("%s: Duplicate option %s.\n", - fname, this); - exit(1); - } - } - - po = (struct opt_list *) calloc(1, sizeof *po); - if (po == NULL) - err(EXIT_FAILURE, "calloc"); - po->o_name = this; - po->o_file = val; - po->o_flags = flags; - SLIST_INSERT_HEAD(&otab, po, o_next); +/* + * read the options and options. files + */ +static void +read_options(void) +{ + char fname[MAXPATHLEN]; - goto next; + SLIST_INIT(&otab); + read_option_file("../../conf/options", 0); + (void)snprintf(fname, sizeof fname, "../../conf/options.%s", + machinename); + if (!read_option_file(fname, 0)) { + (void)snprintf(fname, sizeof fname, "options.%s", machinename); + read_option_file(fname, 0); + } + read_option_file("../../conf/options-compat", OL_ALIAS); } static char * -- cgit v1.1 From 56a6fa1b6103a97e72dea2456a399126e07e6b52 Mon Sep 17 00:00:00 2001 From: imp Date: Sun, 2 May 2010 06:18:57 +0000 Subject: MFC r207265: Require the option that's mapped be listed in the options file. This will allow people with old config options to either have it just work (if config is new enough), or get a version error (if their config is about 7.0 or newer) rather than getting a cryptic error about duplicated options in the options file, or getting an error about an unknown option, at which point they'd update their config file only to learn they need a new config, only to learn they didn't really need to update their config file... All this because our version checking was in the wrong place for the past decade... # hopefully this is the last change, and we'll be able to config with an # 8.0 GENERIC file on stable/8 after I merge this change and add the # compat options. --- usr.sbin/config/mkoptions.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/config/mkoptions.c b/usr.sbin/config/mkoptions.c index 846e028..044b669 100644 --- a/usr.sbin/config/mkoptions.c +++ b/usr.sbin/config/mkoptions.c @@ -292,32 +292,49 @@ tooption(char *name) static void -insert_option(char *this, char *val, int flags) +check_duplicate(const char *fname, const char *this) { struct opt_list *po; + SLIST_FOREACH(po, &otab, o_next) { + if (eq(po->o_name, this)) { + printf("%s: Duplicate option %s.\n", + fname, this); + exit(1); + } + } +} + +static void +insert_option(const char *fname, char *this, char *val) +{ + struct opt_list *po; + + check_duplicate(fname, this); po = (struct opt_list *) calloc(1, sizeof *po); if (po == NULL) err(EXIT_FAILURE, "calloc"); po->o_name = this; po->o_file = val; - po->o_flags = flags; + po->o_flags = 0; SLIST_INSERT_HEAD(&otab, po, o_next); } - static void -check_duplicate(const char *fname, const char *this) +update_option(const char *this, char *val, int flags) { struct opt_list *po; SLIST_FOREACH(po, &otab, o_next) { if (eq(po->o_name, this)) { - printf("%s: Duplicate option %s.\n", - fname, this); - exit(1); + free(po->o_file); + po->o_file = val; + po->o_flags = flags; + return; } } + printf("Compat option %s not listed in options file.\n", this); + exit(1); } static int @@ -355,8 +372,10 @@ read_option_file(const char *fname, int flags) free(s); } val = ns(val); - check_duplicate(fname, this); - insert_option(this, val, flags); + if (flags == 0) + insert_option(fname, this, val); + else + update_option(this, val, flags); } (void)fclose(fp); return (1); -- cgit v1.1 From 5626f96be273b1b91dbed367a7f5e070c972345b Mon Sep 17 00:00:00 2001 From: imp Date: Tue, 4 May 2010 03:56:25 +0000 Subject: MFC: r207461 sparc64, and possibly other architectures, pads the length of the section holding the config file to sh_addralign bytes using NULs. This bogusly triggers an assert. Break out of the loop when we hit an NUL within that many bytes of the end. --- usr.sbin/config/main.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/config/main.c b/usr.sbin/config/main.c index 5cffed2..e63e6ab 100644 --- a/usr.sbin/config/main.c +++ b/usr.sbin/config/main.c @@ -670,7 +670,7 @@ kernconfdump(const char *file) struct stat st; FILE *fp, *pp; int error, len, osz, r; - unsigned int i, off, size; + unsigned int i, off, size, t1, t2, align; char *cmd, *o; r = open(file, O_RDONLY); @@ -689,8 +689,8 @@ kernconfdump(const char *file) if (o == NULL) err(EXIT_FAILURE, "Couldn't allocate memory"); /* ELF note section header. */ - asprintf(&cmd, "/usr/bin/elfdump -c %s | grep -A 5 kern_conf" - "| tail -2 | cut -d ' ' -f 2 | paste - - -", file); + asprintf(&cmd, "/usr/bin/elfdump -c %s | grep -A 8 kern_conf" + "| tail -5 | cut -d ' ' -f 2 | paste - - - - -", file); if (cmd == NULL) errx(EXIT_FAILURE, "asprintf() failed"); pp = popen(cmd, "r"); @@ -699,24 +699,28 @@ kernconfdump(const char *file) free(cmd); len = fread(o, osz, 1, pp); pclose(pp); - r = sscanf(o, "%d\t%d", &off, &size); + r = sscanf(o, "%d%d%d%d%d", &off, &size, &t1, &t2, &align); free(o); - if (r != 2) + if (r != 5) errx(EXIT_FAILURE, "File %s doesn't contain configuration " "file. Either unsupported, or not compiled with " "INCLUDE_CONFIG_FILE", file); r = fseek(fp, off, SEEK_CUR); if (r != 0) err(EXIT_FAILURE, "fseek() failed"); - for (i = 0; i < size - 1; i++) { + for (i = 0; i < size; i++) { r = fgetc(fp); if (r == EOF) break; /* * If '\0' is present in the middle of the configuration * string, this means something very weird is happening. - * Make such case very visible. + * Make such case very visible. However, some architectures + * pad the length of the section with NULs to a multiple of + * sh_addralign, allow a NUL in that part of the section. */ + if (r == '\0' && (size - i) < align) + break; assert(r != '\0' && ("Char present in the configuration " "string mustn't be equal to 0")); fputc(r, stdout); -- cgit v1.1 From 8ae2425c9da1c9617bfe989e82059822368acb4a Mon Sep 17 00:00:00 2001 From: fabient Date: Sun, 9 May 2010 15:39:49 +0000 Subject: MFC r207731: Exclude undefined symbol from ELF file when doing function resolve. --- usr.sbin/pmcstat/pmcstat_log.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'usr.sbin') diff --git a/usr.sbin/pmcstat/pmcstat_log.c b/usr.sbin/pmcstat/pmcstat_log.c index 51f66ca..d98e834 100644 --- a/usr.sbin/pmcstat/pmcstat_log.c +++ b/usr.sbin/pmcstat/pmcstat_log.c @@ -539,6 +539,8 @@ pmcstat_image_add_symbols(struct pmcstat_image *image, Elf *e, return; if (GELF_ST_TYPE(sym.st_info) != STT_FUNC) continue; + if (sym.st_shndx == STN_UNDEF) + continue; if (!firsttime && pmcstat_symbol_search(image, sym.st_value)) continue; /* We've seen this symbol already. */ -- cgit v1.1 From 7e663c88a37acf47f4a7353a2c7e3a6380160444 Mon Sep 17 00:00:00 2001 From: emaste Date: Mon, 10 May 2010 01:22:29 +0000 Subject: MFC r207590: Update GRANDPARENTED text to match the contents of tzdata/factory r19879. This eliminates "warning: time zone abbreviation differs from POSIX" on installworld. --- usr.sbin/zic/private.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/zic/private.h b/usr.sbin/zic/private.h index ecbf612..d00bf9b 100644 --- a/usr.sbin/zic/private.h +++ b/usr.sbin/zic/private.h @@ -34,7 +34,7 @@ static const char privatehid[] = "@(#)private.h 8.6"; #endif /* !defined NOID */ #endif /* !defined lint */ -#define GRANDPARENTED "Local time zone must be set--see zic manual page" +#define GRANDPARENTED "Local time zone must be set--use tzsetup" /* ** Defaults for preprocessor symbols. -- cgit v1.1 From c7f68c4e8fe616d00bf515a6e516e9a32389780a Mon Sep 17 00:00:00 2001 From: fabient Date: Fri, 14 May 2010 13:42:17 +0000 Subject: MFC r207755: Rework the calltree top view by critical callchain. The percentage shown is the sum of the cost for the codepath. --- usr.sbin/pmcstat/pmcpl_calltree.c | 190 +++++++++++++++++++++----------------- 1 file changed, 104 insertions(+), 86 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/pmcstat/pmcpl_calltree.c b/usr.sbin/pmcstat/pmcpl_calltree.c index f8ceece..0955133 100644 --- a/usr.sbin/pmcstat/pmcpl_calltree.c +++ b/usr.sbin/pmcstat/pmcpl_calltree.c @@ -120,8 +120,15 @@ struct pmcpl_ct_node_hash { struct pmcpl_ct_sample pmcpl_ct_callid; #define PMCPL_CT_MAXCOL PMC_CALLCHAIN_DEPTH_MAX -#define PMCPL_CT_MAXLINE 256 -struct pmcpl_ct_node *pmcpl_ct_topscreen[PMCPL_CT_MAXCOL][PMCPL_CT_MAXLINE]; +#define PMCPL_CT_MAXLINE 1024 /* TODO: dynamic. */ + +struct pmcpl_ct_line { + unsigned ln_sum; + unsigned ln_index; +}; + +struct pmcpl_ct_line pmcpl_ct_topmax[PMCPL_CT_MAXLINE+1]; +struct pmcpl_ct_node *pmcpl_ct_topscreen[PMCPL_CT_MAXCOL+1][PMCPL_CT_MAXLINE+1]; /* * All nodes indexed by function/image name are placed in a hash table. @@ -225,28 +232,6 @@ pmcpl_ct_arc_grow(int cursize, int *maxsize, struct pmcpl_ct_arc **items) } /* - * Compare two arc by samples value. - */ -static int -pmcpl_ct_arc_compare(void *thunk, const void *a, const void *b) -{ - const struct pmcpl_ct_arc *ct1, *ct2; - int pmcin = *(int *)thunk; - - ct1 = (const struct pmcpl_ct_arc *) a; - ct2 = (const struct pmcpl_ct_arc *) b; - - /* Sort in reverse order */ - if (PMCPL_CT_SAMPLE(pmcin, &ct1->pcta_samples) < - PMCPL_CT_SAMPLE(pmcin, &ct2->pcta_samples)) - return (1); - if (PMCPL_CT_SAMPLE(pmcin, &ct1->pcta_samples) > - PMCPL_CT_SAMPLE(pmcin, &ct2->pcta_samples)) - return (-1); - return (0); -} - -/* * Grow the instr table. */ @@ -366,9 +351,9 @@ pmcpl_ct_node_cleartag(void) static int pmcpl_ct_node_dumptop(int pmcin, struct pmcpl_ct_node *ct, - struct pmcpl_ct_sample *rsamples, int x, int *y, int maxy) + struct pmcpl_ct_sample *rsamples, int x, int *y) { - int i; + int i, terminal; if (ct->pct_flags & PMCPL_PCT_TAG) return 0; @@ -382,12 +367,21 @@ pmcpl_ct_node_dumptop(int pmcin, struct pmcpl_ct_node *ct, pmcpl_ct_topscreen[x][*y] = ct; /* - * This is a terminal node + * Check if this is a terminal node. + * We need to check that some samples exist + * for at least one arc for that PMC. */ - if (ct->pct_narc == 0) { + terminal = 1; + for (i = 0; i < ct->pct_narc; i++) + if (PMCPL_CT_SAMPLE(pmcin, + &ct->pct_arc[i].pcta_samples) != 0) { + terminal = 0; + break; + } + + if (ct->pct_narc == 0 || terminal) { pmcpl_ct_topscreen[x+1][*y] = NULL; - if (*y >= PMCPL_CT_MAXLINE || - *y >= maxy) + if (*y >= PMCPL_CT_MAXLINE) return 1; *y = *y + 1; for (i=0; i < x; i++) @@ -396,14 +390,7 @@ pmcpl_ct_node_dumptop(int pmcin, struct pmcpl_ct_node *ct, return 0; } - /* - * Quicksort the arcs. - */ - qsort_r(ct->pct_arc, ct->pct_narc, sizeof(struct pmcpl_ct_arc), - &pmcin, pmcpl_ct_arc_compare); - for (i = 0; i < ct->pct_narc; i++) { - /* Skip this arc if there is no sample at all. */ if (PMCPL_CT_SAMPLE(pmcin, &ct->pct_arc[i].pcta_samples) == 0) continue; @@ -411,7 +398,7 @@ pmcpl_ct_node_dumptop(int pmcin, struct pmcpl_ct_node *ct, &ct->pct_arc[i].pcta_samples) > pmcstat_threshold) { if (pmcpl_ct_node_dumptop(pmcin, ct->pct_arc[i].pcta_child, - rsamples, x+1, y, maxy)) + rsamples, x+1, y)) return 1; } } @@ -420,12 +407,36 @@ pmcpl_ct_node_dumptop(int pmcin, struct pmcpl_ct_node *ct, } /* + * Compare two top line by sum. + */ +static int +pmcpl_ct_line_compare(const void *a, const void *b) +{ + const struct pmcpl_ct_line *ct1, *ct2; + + ct1 = (const struct pmcpl_ct_line *) a; + ct2 = (const struct pmcpl_ct_line *) b; + + /* Sort in reverse order */ + if (ct1->ln_sum < ct2->ln_sum) + return (1); + if (ct1->ln_sum > ct2->ln_sum) + return (-1); + return (0); +} + +/* * Format and display given PMC index. */ static void pmcpl_ct_node_printtop(struct pmcpl_ct_sample *rsamples, int pmcin, int maxy) { +#undef TS +#undef TSI +#define TS(x, y) (pmcpl_ct_topscreen[x][y]) +#define TSI(x, y) (pmcpl_ct_topscreen[x][pmcpl_ct_topmax[y].ln_index]) + int v_attrs, ns_len, vs_len, is_len, width, indentwidth, x, y; float v; char ns[30], vs[10], is[20]; @@ -433,33 +444,60 @@ pmcpl_ct_node_printtop(struct pmcpl_ct_sample *rsamples, int pmcin, int maxy) struct pmcstat_symbol *sym; const char *space = " "; + /* + * Sort by line cost. + */ + for (y = 0; ; y++) { + ct = TS(1, y); + if (ct == NULL) + break; + + pmcpl_ct_topmax[y].ln_sum = 0; + pmcpl_ct_topmax[y].ln_index = y; + for (x = 1; TS(x, y) != NULL; x++) { + pmcpl_ct_topmax[y].ln_sum += + PMCPL_CT_SAMPLE(pmcin, &TS(x, y)->pct_samples); + } + } + qsort(pmcpl_ct_topmax, y, sizeof(pmcpl_ct_topmax[0]), + pmcpl_ct_line_compare); + pmcpl_ct_topmax[y].ln_index = y; + for (y = 0; y < maxy; y++) { - /* Output image. */ - ct = pmcpl_ct_topscreen[0][y]; - snprintf(is, sizeof(is), "%-10.10s", - pmcstat_string_unintern(ct->pct_image->pi_name)); - PMCSTAT_PRINTW("%s ", is); - width = indentwidth = 11; + ct = TSI(1, y); + if (ct == NULL) + break; + + if (y > 0) + PMCSTAT_PRINTW("\n"); - for (x = 0; pmcpl_ct_topscreen[x][y] !=NULL; x++) { + /* Output sum. */ + v = pmcpl_ct_topmax[y].ln_sum * 100.0 / + rsamples->sb[pmcin]; + snprintf(vs, sizeof(vs), "%.1f", v); + v_attrs = PMCSTAT_ATTRPERCENT(v); + PMCSTAT_ATTRON(v_attrs); + PMCSTAT_PRINTW("%5.5s ", vs); + PMCSTAT_ATTROFF(v_attrs); - ct = pmcpl_ct_topscreen[x][y]; + width = indentwidth = 5 + 1; + + for (x = 1; (ct = TSI(x, y)) != NULL; x++) { - ns[0] = '\0'; ns_len = 0; vs[0] = '\0'; vs_len = 0; is[0] = '\0'; is_len = 0; /* Format value. */ v = PMCPL_CT_SAMPLEP(pmcin, &ct->pct_samples); if (v > pmcstat_threshold) - vs_len = snprintf(vs, sizeof(vs), "(%.1f%%)", v); + vs_len = snprintf(vs, sizeof(vs), + "(%.1f%%)", v); v_attrs = PMCSTAT_ATTRPERCENT(v); if (pmcstat_skiplink && v <= pmcstat_threshold) { - PMCSTAT_PRINTW(". "); - width += 2; - continue; - } + strlcpy(ns, ".", sizeof(ns)); + ns_len = 1; + } else { sym = pmcstat_symbol_search(ct->pct_image, ct->pct_func); if (sym != NULL) { ns_len = snprintf(ns, sizeof(ns), "%s", @@ -469,12 +507,14 @@ pmcpl_ct_node_printtop(struct pmcpl_ct_sample *rsamples, int pmcin, int maxy) (void *)ct->pct_func); /* Format image. */ - if (x > 0 && pmcpl_ct_topscreen[x-1][y]->pct_image != ct->pct_image) + if (x == 1 || + TSI(x-1, y)->pct_image != ct->pct_image) is_len = snprintf(is, sizeof(is), "@%s", pmcstat_string_unintern(ct->pct_image->pi_name)); /* Check for line wrap. */ width += ns_len + is_len + vs_len + 1; + } if (width >= pmcstat_displaywidth) { maxy--; if (y >= maxy) @@ -487,7 +527,6 @@ pmcpl_ct_node_printtop(struct pmcpl_ct_sample *rsamples, int pmcin, int maxy) PMCSTAT_PRINTW("%s%s%s ", ns, is, vs); PMCSTAT_ATTROFF(v_attrs); } - PMCSTAT_PRINTW("\n"); } } @@ -498,46 +537,25 @@ pmcpl_ct_node_printtop(struct pmcpl_ct_sample *rsamples, int pmcin, int maxy) void pmcpl_ct_topdisplay(void) { - int i, x, y, pmcin; + int y; struct pmcpl_ct_sample r, *rsamples; rsamples = &r; pmcpl_ct_samples_root(rsamples); - PMCSTAT_PRINTW("%-10.10s %s\n", "IMAGE", "CALLTREE"); + pmcpl_ct_node_cleartag(); - for (pmcin = 0; pmcin < pmcstat_npmcs; pmcin++) { - /* Filter PMCs. */ - if (pmcstat_pmcinfilter != pmcin) - continue; + PMCSTAT_PRINTW("%5.5s %s\n", "%SAMP", "CALLTREE"); - pmcpl_ct_node_cleartag(); - - /* Quicksort the arcs. */ - qsort_r(pmcpl_ct_root->pct_arc, - pmcpl_ct_root->pct_narc, - sizeof(struct pmcpl_ct_arc), - &pmcin, pmcpl_ct_arc_compare); - - x = y = 0; - for (i = 0; i < pmcpl_ct_root->pct_narc; i++) { - /* Skip this arc if there is no sample at all. */ - if (PMCPL_CT_SAMPLE(pmcin, - &pmcpl_ct_root->pct_arc[i].pcta_samples) == 0) - continue; - if (PMCPL_CT_SAMPLEP(pmcin, - &pmcpl_ct_root->pct_arc[i].pcta_samples) <= - pmcstat_threshold) - continue; - if (pmcpl_ct_node_dumptop(pmcin, - pmcpl_ct_root->pct_arc[i].pcta_child, - rsamples, x, &y, pmcstat_displayheight - 2)) { - break; - } - } + y = 0; + if (pmcpl_ct_node_dumptop(pmcstat_pmcinfilter, + pmcpl_ct_root, rsamples, 0, &y)) + PMCSTAT_PRINTW("...\n"); + pmcpl_ct_topscreen[1][y] = NULL; + + pmcpl_ct_node_printtop(rsamples, + pmcstat_pmcinfilter, pmcstat_displayheight - 2); - pmcpl_ct_node_printtop(rsamples, pmcin, y); - } pmcpl_ct_samples_free(rsamples); } -- cgit v1.1 From 7f3db98fba4e727a5c118996b65ca379e4977488 Mon Sep 17 00:00:00 2001 From: rmacklem Date: Mon, 17 May 2010 01:18:12 +0000 Subject: MFC: r207689 If the "-alldirs" export option was used for the V4: line, mountd would crash in check_options() since dp == NULL for the V4: line. This patch moves the check for options allowed on the V4: line to ahead of where dp is used to avoid this crash. --- usr.sbin/mountd/mountd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/mountd/mountd.c b/usr.sbin/mountd/mountd.c index 340afd3..41a845f 100644 --- a/usr.sbin/mountd/mountd.c +++ b/usr.sbin/mountd/mountd.c @@ -2881,16 +2881,16 @@ check_options(dp) syslog(LOG_ERR, "-mask and /masklen are mutually exclusive"); return (1); } - if ((opt_flags & OP_ALLDIRS) && dp->dp_left) { - syslog(LOG_ERR, "-alldirs has multiple directories"); - return (1); - } if (v4root_phase > 0 && (opt_flags & ~(OP_SEC | OP_MASK | OP_NET | OP_HAVEMASK | OP_MASKLEN)) != 0) { syslog(LOG_ERR,"only -sec,-net,-mask options allowed on V4:"); return (1); } + if ((opt_flags & OP_ALLDIRS) && dp->dp_left) { + syslog(LOG_ERR, "-alldirs has multiple directories"); + return (1); + } return (0); } -- cgit v1.1 From a7f3a8c6d48cb1d490b3b669727689f38d10de9a Mon Sep 17 00:00:00 2001 From: dougb Date: Tue, 18 May 2010 02:17:40 +0000 Subject: MFC r208088: Hide the creation and population of the temproot --- usr.sbin/mergemaster/mergemaster.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/mergemaster/mergemaster.sh b/usr.sbin/mergemaster/mergemaster.sh index 43126ae..311f226 100755 --- a/usr.sbin/mergemaster/mergemaster.sh +++ b/usr.sbin/mergemaster/mergemaster.sh @@ -617,14 +617,14 @@ case "${RERUN}" in case "${DESTDIR}" in '') ;; *) - ${MM_MAKE} DESTDIR=${DESTDIR} distrib-dirs + ${MM_MAKE} DESTDIR=${DESTDIR} distrib-dirs >/dev/null ;; esac od=${TEMPROOT}/usr/obj - ${MM_MAKE} DESTDIR=${TEMPROOT} distrib-dirs && - MAKEOBJDIRPREFIX=$od ${MM_MAKE} _obj SUBDIR_OVERRIDE=etc && - MAKEOBJDIRPREFIX=$od ${MM_MAKE} everything SUBDIR_OVERRIDE=etc && - MAKEOBJDIRPREFIX=$od ${MM_MAKE} DESTDIR=${TEMPROOT} distribution;} || + ${MM_MAKE} DESTDIR=${TEMPROOT} distrib-dirs >/dev/null && + MAKEOBJDIRPREFIX=$od ${MM_MAKE} _obj SUBDIR_OVERRIDE=etc >/dev/null && + MAKEOBJDIRPREFIX=$od ${MM_MAKE} everything SUBDIR_OVERRIDE=etc >/dev/null && + MAKEOBJDIRPREFIX=$od ${MM_MAKE} DESTDIR=${TEMPROOT} distribution >/dev/null;} || { echo ''; echo " *** FATAL ERROR: Cannot 'cd' to ${SOURCEDIR} and install files to"; echo " the temproot environment"; -- cgit v1.1 From c00023af29c999b7ae1d2d84a75d96141fb83b1f Mon Sep 17 00:00:00 2001 From: kaiw Date: Tue, 18 May 2010 10:32:20 +0000 Subject: MFC r205728 Merge improvements from kernel HID parser to the userland usbhid(3) parser. This merge does not change any API and should not break any native or thirdparty applications. Changes include: * Merge multiple report ID support and other improvements from kernel HID parser. * Ignore rid argument in hid_start_parser, parse all the report items since we now support multiple report ID. * Skip report ID byte in hid_get_data() and set report ID byte in hid_set_data(), if report ID is non-zero. * Reimplement hid_get_report_id: instead get report id from uhid device (which is always 0), try parsing the report descriptor and return the first report ID encountered. MFC r207812 hid_get_data() now expects that the hid data passed in always contains the report ID byte. Thus we should not skip the the report ID byte in hid_interrupt(). Also, if HUP_KEYBOARD usage is an array, do not try to modify the 'data' pointer, instead, increase the hid_item_t field 'pos' by 'report_size' before calling hid_get_data() during each iteration. --- usr.sbin/bluetooth/bthidd/hid.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/bluetooth/bthidd/hid.c b/usr.sbin/bluetooth/bthidd/hid.c index 9b5e5ec..e944849 100644 --- a/usr.sbin/bluetooth/bthidd/hid.c +++ b/usr.sbin/bluetooth/bthidd/hid.c @@ -130,7 +130,7 @@ hid_interrupt(bthid_session_p s, uint8_t *data, int32_t len) hid_item_t h; int32_t report_id, usage, page, val, mouse_x, mouse_y, mouse_z, mouse_butt, - mevents, kevents; + mevents, kevents, i; assert(s != NULL); assert(s->srv != NULL); @@ -150,8 +150,8 @@ hid_interrupt(bthid_session_p s, uint8_t *data, int32_t len) } report_id = data[1]; - data += 2; - len -= 2; + data ++; + len --; hid_device = get_hid_device(&s->bdaddr); assert(hid_device != NULL); @@ -202,17 +202,11 @@ hid_interrupt(bthid_session_p s, uint8_t *data, int32_t len) if (val && val < kbd_maxkey()) bit_set(s->keys1, val); - data ++; - len --; - - len = min(len, h.report_size); - while (len > 0) { + for (i = 1; i < h.report_count; i++) { + h.pos += h.report_size; val = hid_get_data(data, &h); if (val && val < kbd_maxkey()) bit_set(s->keys1, val); - - data ++; - len --; } } break; -- cgit v1.1 From fb5a301096ade09947da4800c900cee1f71f4164 Mon Sep 17 00:00:00 2001 From: brucec Date: Tue, 18 May 2010 18:19:06 +0000 Subject: MFC r207006: Remove the reference to DD mode, and replace "Wizard" mode with "Expert" mode. Also, make sure the "Q = Finish" text is visible. Reword the boot manager screen to try and avoid confusion, and make the order of the menu items match that in sysinstall. PR: bin/142916 Submitted by: Jeremy Chadwick Approved by: rrs (mentor) --- usr.sbin/sade/disks.c | 7 +++---- usr.sbin/sade/menus.c | 23 +++++++++++------------ 2 files changed, 14 insertions(+), 16 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/sade/disks.c b/usr.sbin/sade/disks.c index b802e12..3158ff3 100644 --- a/usr.sbin/sade/disks.c +++ b/usr.sbin/sade/disks.c @@ -202,10 +202,9 @@ static void print_command_summary(void) { mvprintw(14, 0, "The following commands are supported (in upper or lower case):"); - mvprintw(16, 0, "A = Use Entire Disk G = set Drive Geometry C = Create Slice F = `DD' mode"); - mvprintw(17, 0, "D = Delete Slice Z = Toggle Size Units S = Set Bootable | = Wizard m."); - mvprintw(18, 0, "T = Change Type U = Undo All Changes Q = Finish"); - mvprintw(18, 47, "W = Write Changes"); + mvprintw(16, 0, "A = Use Entire Disk G = set Drive Geometry C = Create Slice"); + mvprintw(17, 0, "D = Delete Slice Z = Toggle Size Units S = Set Bootable | = Expert m."); + mvprintw(18, 0, "T = Change Type U = Undo All Changes W = Write Changes Q = Finish"); mvprintw(21, 0, "Use F1 or ? to get more help, arrow keys to select."); move(0, 0); } diff --git a/usr.sbin/sade/menus.c b/usr.sbin/sade/menus.c index f48c772..df51804 100644 --- a/usr.sbin/sade/menus.c +++ b/usr.sbin/sade/menus.c @@ -92,24 +92,23 @@ DMenu MenuIPLType = { DMenu MenuMBRType = { DMENU_NORMAL_TYPE | DMENU_SELECTION_RETURNS, "overwrite me", /* will be disk specific label */ - "FreeBSD comes with a boot selector that allows you to easily\n" + "FreeBSD comes with a boot manager that allows you to easily\n" "select between FreeBSD and any other operating systems on your machine\n" "at boot time. If you have more than one drive and want to boot\n" - "from the second one, the boot selector will also make it possible\n" + "from the second one, the boot manager will also make it possible\n" "to do so (limitations in the PC BIOS usually prevent this otherwise).\n" - "If you do not want a boot selector, or wish to replace an existing\n" - "one, select \"standard\". If you would prefer your Master Boot\n" - "Record to remain untouched then select \"None\".\n\n" - " NOTE: PC-DOS users will almost certainly require \"None\"!", - "Press F1 to read about drive setup", + "If you have other operating systems installed and would like a choice when\n" + "booting, choose \"BootMgr\". If you would prefer to keep your existing\n" + "boot manager, select \"None\".\n\n", + "", "drives", - { { "BootMgr", "Install the FreeBSD Boot Manager", - dmenuRadioCheck, dmenuSetValue, NULL, &BootMgr, 0, 0, 0, 0 }, - { "Standard", "Install a standard MBR (no boot manager)", + { { "Standard", "Install a standard MBR (non-interactive boot manager)", dmenuRadioCheck, dmenuSetValue, NULL, &BootMgr, '(', '*', ')', 1 }, - { "None", "Leave the Master Boot Record untouched", + { "BootMgr", "Install the FreeBSD boot manager", + dmenuRadioCheck, dmenuSetValue, NULL, &BootMgr, '(', '*', ')', 0 }, + { "None", "Do not install a boot manager", dmenuRadioCheck, dmenuSetValue, NULL, &BootMgr, '(', '*', ')', 2 }, - { NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, 0 } }, + { NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, 0 } } }; #endif /* PC98 */ #endif /* __i386__ */ -- cgit v1.1 From 571f93bdfa288b0d0e34692ad658c685a4772de1 Mon Sep 17 00:00:00 2001 From: brucec Date: Tue, 18 May 2010 18:20:11 +0000 Subject: MFC r207005: Make the "Q = Finish" text visible when running sysinstall as a normal application. Reword the boot manager screen to try and avoid confusion. Approved by: rrs (mentor) --- usr.sbin/sysinstall/disks.c | 7 +++++-- usr.sbin/sysinstall/menus.c | 16 ++++++---------- 2 files changed, 11 insertions(+), 12 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/sysinstall/disks.c b/usr.sbin/sysinstall/disks.c index 919b97d..934ab85 100644 --- a/usr.sbin/sysinstall/disks.c +++ b/usr.sbin/sysinstall/disks.c @@ -207,9 +207,12 @@ print_command_summary(void) mvprintw(14, 0, "The following commands are supported (in upper or lower case):"); mvprintw(16, 0, "A = Use Entire Disk G = set Drive Geometry C = Create Slice"); mvprintw(17, 0, "D = Delete Slice Z = Toggle Size Units S = Set Bootable | = Expert m."); - mvprintw(18, 0, "T = Change Type U = Undo All Changes Q = Finish"); + mvprintw(18, 0, "T = Change Type U = Undo All Changes"); + if (!RunningAsInit) - mvprintw(18, 47, "W = Write Changes"); + mvprintw(18, 47, "W = Write Changes Q = Finish"); + else + mvprintw(18, 47, "Q = Finish"); mvprintw(21, 0, "Use F1 or ? to get more help, arrow keys to select."); move(0, 0); } diff --git a/usr.sbin/sysinstall/menus.c b/usr.sbin/sysinstall/menus.c index 8ea7f98..32b4ed0 100644 --- a/usr.sbin/sysinstall/menus.c +++ b/usr.sbin/sysinstall/menus.c @@ -1179,20 +1179,16 @@ DMenu MenuMBRType = { "at boot time. If you have more than one drive and want to boot\n" "from the second one, the boot manager will also make it possible\n" "to do so (limitations in the PC BIOS usually prevent this otherwise).\n" - "If you will only have FreeBSD on the machine the boot manager is\n" - "not needed and it slows down the boot while offering you the choice\n" - "of which operating system to boot. If you do not want a boot\n" - "manager, or wish to replace an existing one, select \"standard\".\n" - "If you would prefer your Master Boot Record remain untouched then\n" - "select \"None\".\n\n" - " NOTE: PC-DOS users will almost certainly require \"None\"!", - "Press F1 to read about drive setup", + "If you have other operating systems installed and would like a choice when\n" + "booting, choose \"BootMgr\". If you would prefer to keep your existing\n" + "boot manager, select \"None\".\n\n", + "", "drives", - { { "Standard", "Install a standard MBR (no boot manager)", + { { "Standard", "Install a standard MBR (non-interactive boot manager)", dmenuRadioCheck, dmenuSetValue, NULL, &BootMgr, '(', '*', ')', 1 }, { "BootMgr", "Install the FreeBSD Boot Manager", dmenuRadioCheck, dmenuSetValue, NULL, &BootMgr, '(', '*', ')', 0 }, - { "None", "Leave the Master Boot Record untouched", + { "None", "Do not install a boot manager", dmenuRadioCheck, dmenuSetValue, NULL, &BootMgr, '(', '*', ')', 2 }, { NULL } }, }; -- cgit v1.1 From f387526e7d0a2d4f5d69e3faa1473bc51dcd8b45 Mon Sep 17 00:00:00 2001 From: nork Date: Thu, 20 May 2010 22:12:36 +0000 Subject: MFC r207612: Add support run services_mkdb(8). Approved by: dougb, imp (mentor) Reviewed by: ume --- usr.sbin/mergemaster/mergemaster.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'usr.sbin') diff --git a/usr.sbin/mergemaster/mergemaster.sh b/usr.sbin/mergemaster/mergemaster.sh index 311f226..19381b7 100755 --- a/usr.sbin/mergemaster/mergemaster.sh +++ b/usr.sbin/mergemaster/mergemaster.sh @@ -851,6 +851,9 @@ mm_install () { /etc/login.conf) NEED_CAP_MKDB=yes ;; + /etc/services) + NEED_SERVICES_MKDB=yes + ;; /etc/master.passwd) do_install_and_rm 600 "${1}" "${DESTDIR}${INSTALL_DIR}" NEED_PWD_MKDB=yes @@ -1280,6 +1283,17 @@ case "${NEED_CAP_MKDB}" in ;; esac +case "${NEED_SERVICES_MKDB}" in +'') ;; +*) + echo '' + echo "*** You installed a services file, so make sure that you run" + echo " '/usr/sbin/services_mkdb -q -o ${DESTDIR}/var/db/services.db ${DESTDIR}/etc/services'" + echo " to rebuild your services database" + run_it_now "/usr/sbin/services_mkdb -q -o ${DESTDIR}/var/db/services.db ${DESTDIR}/etc/services" + ;; +esac + case "${NEED_PWD_MKDB}" in '') ;; *) -- cgit v1.1 From e28b5a1908c723c94d41b8161b37493edaaa07b4 Mon Sep 17 00:00:00 2001 From: randi Date: Fri, 21 May 2010 23:08:53 +0000 Subject: MFC r198317,206995: Introduce 'netDev=ANY' support for scripted (install.cfg) installs, which results in the first ethernet interface with physical link being selected. Approved by: cperciva (mentor) --- usr.sbin/sysinstall/tcpip.c | 85 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 75 insertions(+), 10 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/sysinstall/tcpip.c b/usr.sbin/sysinstall/tcpip.c index f505a05..1c1d676 100644 --- a/usr.sbin/sysinstall/tcpip.c +++ b/usr.sbin/sysinstall/tcpip.c @@ -40,10 +40,17 @@ #include "sysinstall.h" #include #include +#include #include +#include + #include +#include +#include + #include #include +#include /* The help file for the TCP/IP setup screen */ #define TCP_HELPFILE "tcp" @@ -636,6 +643,53 @@ netHook(dialogMenuItem *self) return devs ? DITEM_LEAVE_MENU : DITEM_FAILURE; } +static char * +tcpDeviceScan(void) +{ + int s; + struct ifmediareq ifmr; + struct ifaddrs *ifap, *ifa; + struct if_data *ifd; + char *network_dev; + + if ((s = socket(AF_LOCAL, SOCK_DGRAM, 0)) < 0) + return (NULL); + + if (getifaddrs(&ifap) < 0) + return (NULL); + + for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) { + memset(&ifmr, 0, sizeof(ifmr)); + strlcpy(ifmr.ifm_name, ifa->ifa_name, sizeof(ifmr.ifm_name)); + + if (ioctl(s, SIOCGIFMEDIA, (caddr_t)&ifmr) < 0) + continue; /* some devices don't support this */ + + if ((ifmr.ifm_status & IFM_AVALID) == 0) + continue; /* not active */ + + if (IFM_TYPE(ifmr.ifm_active) != IFM_ETHER) + continue; /* not an ethernet device */ + + if (ifmr.ifm_status & IFM_ACTIVE) { + network_dev = strdup(ifa->ifa_name); + freeifaddrs(ifap); + + if (!variable_get(VAR_NONINTERACTIVE)) + msgConfirm("Using interface %s", network_dev); + + msgDebug("tcpDeviceScan found %s", network_dev); + return (network_dev); + } + } + + close(s); + + freeifaddrs(ifap); + + return (NULL); +} + /* Get a network device */ Device * tcpDeviceSelect(void) @@ -647,27 +701,38 @@ tcpDeviceSelect(void) rval = NULL; - if (variable_get(VAR_NONINTERACTIVE) && variable_get(VAR_NETWORK_DEVICE)) { + if (variable_get(VAR_NETWORK_DEVICE)) { network_dev = variable_get(VAR_NETWORK_DEVICE); + /* + * netDev can be set to several types of values. + * If netDev is set to ANY, scan all network devices + * looking for a valid link, and go with the first + * device found. netDev can also be specified as a + * comma delimited list, with each network device + * tried in order. netDev can also be set to a single + * network device. + */ + if (!strcmp(network_dev, "ANY")) + network_dev = strdup(tcpDeviceScan()); + while ((dev = strsep(&network_dev, ",")) != NULL) { devs = deviceFind(dev, DEVICE_TYPE_NETWORK); cnt = deviceCount(devs); + if (cnt) { - if (DITEM_STATUS(tcpOpenDialog(devs[0]) == DITEM_SUCCESS)) - return(devs[0]); + if (DITEM_STATUS(tcpOpenDialog(devs[0])) == DITEM_SUCCESS) + return (devs[0]); } } - } - devs = deviceFind(variable_get(VAR_NETWORK_DEVICE), DEVICE_TYPE_NETWORK); - cnt = deviceCount(devs); + if (!variable_get(VAR_NONINTERACTIVE)) + msgConfirm("No network devices available!"); - if (!cnt) { - msgConfirm("No network devices available!"); - return NULL; + return (NULL); } - else if ((!RunningAsInit) && (variable_check("NETWORK_CONFIGURED=NO") != TRUE)) { + + if ((!RunningAsInit) && (variable_check("NETWORK_CONFIGURED=NO") != TRUE)) { if (!msgYesNo("Running multi-user, assume that the network is already configured?")) return devs[0]; } -- cgit v1.1 From 865c7cd5ce5ad04dd249f2f815816bb4a3214aba Mon Sep 17 00:00:00 2001 From: bcr Date: Sun, 23 May 2010 11:26:43 +0000 Subject: MFC r208273: Add a sentence to the man page explaining that the -d option can only be used when ntpd is compiled with DEBUG support. PR: docs/138206 Submitted by: Oliver Pinter (oliver dot pntr at gmail dot com) Approved by: roberto --- usr.sbin/ntp/doc/ntpd.8 | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/ntp/doc/ntpd.8 b/usr.sbin/ntp/doc/ntpd.8 index a55fe43..069ef42 100644 --- a/usr.sbin/ntp/doc/ntpd.8 +++ b/usr.sbin/ntp/doc/ntpd.8 @@ -1,7 +1,7 @@ .\" .\" $FreeBSD$ .\" -.Dd May 17, 2006 +.Dd May 18, 2010 .Dt NTPD 8 .Os .Sh NAME @@ -108,6 +108,9 @@ Specify the name and path of the configuration file, default Specify debugging mode. This option may occur more than once, with each occurrence indicating greater detail of display. +You need to compile +.Nm +with DEBUG in order to use this. .It Fl D Ar level Specify debugging level directly. .It Fl f Ar driftfile -- cgit v1.1 From 865091bcc53c79c57deedf70d6125a4bd1813e62 Mon Sep 17 00:00:00 2001 From: brueffer Date: Sun, 23 May 2010 20:18:51 +0000 Subject: MFC: r208056 Wording fixes. --- usr.sbin/mtree/mtree.8 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/mtree/mtree.8 b/usr.sbin/mtree/mtree.8 index 119481b..c5908f5 100644 --- a/usr.sbin/mtree/mtree.8 +++ b/usr.sbin/mtree/mtree.8 @@ -88,7 +88,7 @@ Do not complain about files that are in the file hierarchy, but not in the specification. .It Fl i Indent the output 4 spaces each time a directory level is descended when -create a specification with the +creating a specification with the .Fl c option. This does not affect either the /set statements or the comment before each @@ -126,7 +126,7 @@ instead of from the standard input. .Pp If this option is specified twice, the two specifications are compared to each other rather than to the file hierarchy. -The specifications be sorted like output generated using +The specifications will be sorted like output generated using .Fl c . The output format in this case is somewhat remniscent of .Xr comm 1 , -- cgit v1.1 From f48c65526f78bda7d3c30dcabf0d5e5987c1bc39 Mon Sep 17 00:00:00 2001 From: brueffer Date: Sun, 23 May 2010 20:23:34 +0000 Subject: MFC: r208054 List /var/cron/tabs in FILES and add descriptions for the other entries. --- usr.sbin/cron/crontab/crontab.1 | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/cron/crontab/crontab.1 b/usr.sbin/cron/crontab/crontab.1 index b634d31..eed515e 100644 --- a/usr.sbin/cron/crontab/crontab.1 +++ b/usr.sbin/cron/crontab/crontab.1 @@ -17,7 +17,7 @@ .\" .\" $FreeBSD$ .\" -.Dd December 29, 1993 +.Dd May 13, 2010 .Dt CRONTAB 1 .Os .Sh NAME @@ -114,7 +114,11 @@ from the editor, the modified crontab will be installed automatically. .Sh FILES .Bl -tag -width /var/cron/allow -compact .It Pa /var/cron/allow +List of users allowed to use crontab .It Pa /var/cron/deny +List of users prohibited from using crontab +.It Pa /var/cron/tabs +Directory for personal crontab files .El .Sh DIAGNOSTICS A fairly informative usage message appears if you run it with a bad command -- cgit v1.1 From e26081689e950ffd1ac4b4eaaf5a215ab4c38e60 Mon Sep 17 00:00:00 2001 From: kensmith Date: Mon, 24 May 2010 15:26:40 +0000 Subject: Merge r208110: > Do not attempt to render a logrecord with length byte, until we have > decompressed all the bytes required. Requested by: phk --- usr.sbin/fifolog/lib/fifolog_reader.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'usr.sbin') diff --git a/usr.sbin/fifolog/lib/fifolog_reader.c b/usr.sbin/fifolog/lib/fifolog_reader.c index 77e586f..6f18fbb 100644 --- a/usr.sbin/fifolog/lib/fifolog_reader.c +++ b/usr.sbin/fifolog/lib/fifolog_reader.c @@ -225,6 +225,8 @@ fifolog_reader_chop(struct fifolog_reader *fr, fifolog_reader_render_t *func, vo if (u & FIFOLOG_LENGTH) { v = p[w]; w++; + if (p + w + v >= q) + return (p); } else { for (v = 0; p + v + w < q && p[v + w] != '\0'; v++) continue; -- cgit v1.1 From a069bb9fd35a154850547bdb408aa48b20eac7fb Mon Sep 17 00:00:00 2001 From: randi Date: Tue, 25 May 2010 03:08:47 +0000 Subject: MFC r208407: Flush all routes before adding the default route in order to allow sysinstall to cleanly re-initialize the network. PR: bin/145553 Submitted by: gcooper Approved by: cperciva (mentor) --- usr.sbin/sysinstall/network.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/sysinstall/network.c b/usr.sbin/sysinstall/network.c index 20a0430..a06c927 100644 --- a/usr.sbin/sysinstall/network.c +++ b/usr.sbin/sysinstall/network.c @@ -141,12 +141,22 @@ mediaInitNetwork(Device *dev) } rp = variable_get(VAR_GATEWAY); if (!rp || *rp == '0') { - msgConfirm("No gateway has been set. You may be unable to access hosts\n" + msgConfirm("No gateway has been set. You will be unable to access hosts\n" "not on your local network"); } else { + /* + * Explicitly flush all routes to get back to a known sane + * state. We don't need to check this exit code because if + * anything fails it will show up in the route add below. + */ + system("route -n flush"); msgDebug("Adding default route to %s.\n", rp); - vsystem("route -n add default %s", rp); + if (vsystem("route -n add default %s", rp) != 0) { + msgConfirm("Failed to add a default route; please check " + "your network configuration"); + return FALSE; + } } } else { msgDebug("A DHCP interface. Should already be up.\n"); -- cgit v1.1 From 7304692e617d7bac26ec574745838213980a320e Mon Sep 17 00:00:00 2001 From: randi Date: Tue, 25 May 2010 03:15:09 +0000 Subject: MFC r198477,198503: Eject CDROM after installation if used as source media. Approved by: cperciva (mentor) --- usr.sbin/sysinstall/cdrom.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'usr.sbin') diff --git a/usr.sbin/sysinstall/cdrom.c b/usr.sbin/sysinstall/cdrom.c index a5029ec..6bc72d8 100644 --- a/usr.sbin/sysinstall/cdrom.c +++ b/usr.sbin/sysinstall/cdrom.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -58,6 +59,8 @@ static Boolean previouslyMounted; /* Was the disc already mounted? */ static char mountpoint[MAXPATHLEN] = "/dist"; int CDROMInitQuiet; +static void mediaEjectCDROM(Device *dev); + static properties read_props(char *name) { @@ -218,4 +221,24 @@ mediaShutdownCDROM(Device *dev) msgConfirm("Could not unmount the CDROM/DVD from %s: %s", mountpoint, strerror(errno)); else cdromMounted = FALSE; + + mediaEjectCDROM(dev); +} + +static void +mediaEjectCDROM(Device *dev) +{ + int fd = -1; + + msgDebug("Ejecting CDROM/DVD at %s", dev->devname); + + fd = open(dev->devname, O_RDONLY); + + if (fd < 0) + msgDebug("Could not eject the CDROM/DVD from %s: %s", dev->devname, strerror(errno)); + else { + ioctl(fd, CDIOCALLOW); + ioctl(fd, CDIOCEJECT); + close(fd); + } } -- cgit v1.1 From aec939697631599c4eed310c30d19b6c8de568e3 Mon Sep 17 00:00:00 2001 From: jkim Date: Tue, 25 May 2010 20:16:36 +0000 Subject: MFC: r208320 Add a new build option, MAN_UTILS. This option lets you control building utilities and related support files for manual pages, which were previously controlled by MAN. For POLA, the default depends on MAN, i.e., WITHOUT_MAN implies WITHOUT_MAN_UTILS and WITH_MAN implies WITH_MAN_UTILS. Note this patch implicitly fixes a documentation bug of src.conf(5), which says WITHOUT_MAN may be used to not build manual pages while it was also disabling some utilities for manual pages. Approved by: re (kib) --- usr.sbin/Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/Makefile b/usr.sbin/Makefile index 4cb5709..44f20a4 100644 --- a/usr.sbin/Makefile +++ b/usr.sbin/Makefile @@ -91,7 +91,7 @@ SUBDIR= ${_ac} \ mailwrapper \ makefs \ ${_makemap} \ - manctl \ + ${_manctl} \ memcontrol \ mergemaster \ mfiutil \ @@ -324,6 +324,10 @@ _nscd= nscd _lpr= lpr .endif +.if ${MK_MAN_UTILS} != "no" +_manctl= manctl +.endif + .if ${MK_NETGRAPH} != "no" _flowctl= flowctl _lmcconfig= lmcconfig -- cgit v1.1 From 87354a5115c84359d532c5e24b556be8c6d5e399 Mon Sep 17 00:00:00 2001 From: kensmith Date: Wed, 26 May 2010 17:18:32 +0000 Subject: Add packages-8.1-release directory. While here add packages-7.3-release. Approved by: re (implicit) --- usr.sbin/pkg_install/add/main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'usr.sbin') diff --git a/usr.sbin/pkg_install/add/main.c b/usr.sbin/pkg_install/add/main.c index d9b381b..d300ce8 100644 --- a/usr.sbin/pkg_install/add/main.c +++ b/usr.sbin/pkg_install/add/main.c @@ -82,7 +82,9 @@ struct { { 700000, 700099, "/packages-7.0-release" }, { 701000, 701099, "/packages-7.1-release" }, { 702000, 702099, "/packages-7.2-release" }, + { 703000, 703099, "/packages-7.3-release" }, { 800000, 800499, "/packages-8.0-release" }, + { 801000, 801499, "/packages-8.1-release" }, { 300000, 399000, "/packages-3-stable" }, { 400000, 499000, "/packages-4-stable" }, { 502100, 502128, "/packages-5-current" }, -- cgit v1.1 From cecdfcbeea0822ed4b1ce847da15efb757dfa5d5 Mon Sep 17 00:00:00 2001 From: cperciva Date: Thu, 27 May 2010 03:15:04 +0000 Subject: Change the current working directory to be inside the jail created by the jail(8) command. [10:04] Fix a one-NUL-byte buffer overflow in libopie. [10:05] Correctly sanity-check a buffer length in nfs mount. [10:06] Approved by: so (cperciva) Approved by: re (kensmith) Security: FreeBSD-SA-10:04.jail Security: FreeBSD-SA-10:05.opie Security: FreeBSD-SA-10:06.nfsclient --- usr.sbin/jail/jail.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'usr.sbin') diff --git a/usr.sbin/jail/jail.c b/usr.sbin/jail/jail.c index ca87796..0722bfd 100644 --- a/usr.sbin/jail/jail.c +++ b/usr.sbin/jail/jail.c @@ -511,6 +511,10 @@ set_param(const char *name, char *value) *value++ = '\0'; } + /* jail_set won't chdir along with its chroot, so do it here. */ + if (!strcmp(name, "path") && chdir(value) < 0) + err(1, "chdir: %s", value); + /* Check for repeat parameters */ for (i = 0; i < nparams; i++) if (!strcmp(name, params[i].jp_name)) { -- cgit v1.1 From 6fb5326831321db7d1ad5cb56cb125da582d72c5 Mon Sep 17 00:00:00 2001 From: bz Date: Sun, 30 May 2010 11:28:01 +0000 Subject: MFC r199818: New style of jail(8) usage requires "-c" argument to create a jail. Approved by: re (hrs) --- usr.sbin/jail/jail.8 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/jail/jail.8 b/usr.sbin/jail/jail.8 index f09157f..77f1b95 100644 --- a/usr.sbin/jail/jail.8 +++ b/usr.sbin/jail/jail.8 @@ -569,7 +569,7 @@ or for running a virtual server. Start a shell in the jail: .Pp .Bd -literal -offset indent -jail path=/data/jail/192.0.2.100 host.hostname=testhostname \\ +jail -c path=/data/jail/192.0.2.100 host.hostname=testhostname \\ ip4.addr=192.0.2.100 command=/bin/sh .Ed .Pp @@ -644,7 +644,7 @@ script from within the jail. .Bd -literal -offset indent ifconfig ed0 inet alias 192.0.2.100/32 mount -t procfs proc /data/jail/192.0.2.100/proc -jail path=/data/jail/192.0.2.100 host.hostname=testhostname \\ +jail -c path=/data/jail/192.0.2.100 host.hostname=testhostname \\ ip4.addr=192.0.2.100 command=/bin/sh /etc/rc .Ed .Pp -- cgit v1.1 From 322c673680e7cf7828ca2ee1ccc2a09ecee1d0d4 Mon Sep 17 00:00:00 2001 From: randi Date: Fri, 18 Jun 2010 21:10:56 +0000 Subject: MFC r209273: Fix uninitialized variables that cause a crash when the network is initialized and sysinstall is not running as init. Submitted by: Nick Mills Approved by: cperciva (mentor) Approved by: re (kensmith) --- usr.sbin/sysinstall/tcpip.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'usr.sbin') diff --git a/usr.sbin/sysinstall/tcpip.c b/usr.sbin/sysinstall/tcpip.c index 1c1d676..4537815 100644 --- a/usr.sbin/sysinstall/tcpip.c +++ b/usr.sbin/sysinstall/tcpip.c @@ -732,6 +732,9 @@ tcpDeviceSelect(void) return (NULL); } + devs = deviceFind(NULL, DEVICE_TYPE_NETWORK); + cnt = deviceCount(devs); + if ((!RunningAsInit) && (variable_check("NETWORK_CONFIGURED=NO") != TRUE)) { if (!msgYesNo("Running multi-user, assume that the network is already configured?")) return devs[0]; -- cgit v1.1 From d45b7f14ae6fa78882fa9ec3be976733ca4767b4 Mon Sep 17 00:00:00 2001 From: grehan Date: Fri, 13 May 2011 04:54:01 +0000 Subject: Import of bhyve hypervisor and utilities, part 1. vmm.ko - kernel module for VT-x, VT-d and hypervisor control bhyve - user-space sequencer and i/o emulation vmmctl - dump of hypervisor register state libvmm - front-end to vmm.ko chardev interface bhyve was designed and implemented by Neel Natu. Thanks to the following folk from NetApp who helped to make this available: Joe CaraDonna Peter Snyder Jeff Heller Sandeep Mann Steve Miller Brian Pawlowski --- usr.sbin/Makefile | 4 + usr.sbin/bhyve/Makefile | 18 + usr.sbin/bhyve/atpic.c | 68 ++ usr.sbin/bhyve/consport.c | 121 +++ usr.sbin/bhyve/dbgport.c | 124 ++++ usr.sbin/bhyve/dbgport.h | 36 + usr.sbin/bhyve/elcr.c | 65 ++ usr.sbin/bhyve/fbsdrun.c | 650 ++++++++++++++++ usr.sbin/bhyve/fbsdrun.h | 53 ++ usr.sbin/bhyve/inout.c | 98 +++ usr.sbin/bhyve/inout.h | 64 ++ usr.sbin/bhyve/mevent.c | 419 +++++++++++ usr.sbin/bhyve/mevent.h | 49 ++ usr.sbin/bhyve/mevent_test.c | 180 +++++ usr.sbin/bhyve/pci_emul.c | 976 ++++++++++++++++++++++++ usr.sbin/bhyve/pci_emul.h | 171 +++++ usr.sbin/bhyve/pci_hostbridge.c | 52 ++ usr.sbin/bhyve/pci_passthru.c | 508 +++++++++++++ usr.sbin/bhyve/pci_virtio_block.c | 502 +++++++++++++ usr.sbin/bhyve/pci_virtio_net.c | 739 ++++++++++++++++++ usr.sbin/bhyve/pit_8254.c | 196 +++++ usr.sbin/bhyve/pit_8254.h | 45 ++ usr.sbin/bhyve/post.c | 51 ++ usr.sbin/bhyve/rtc.c | 268 +++++++ usr.sbin/bhyve/uart.c | 60 ++ usr.sbin/bhyve/virtio.h | 85 +++ usr.sbin/bhyve/xmsr.c | 261 +++++++ usr.sbin/bhyve/xmsr.h | 34 + usr.sbin/vmmctl/Makefile | 15 + usr.sbin/vmmctl/sample.sh | 75 ++ usr.sbin/vmmctl/vmmctl.c | 1485 +++++++++++++++++++++++++++++++++++++ 31 files changed, 7472 insertions(+) create mode 100644 usr.sbin/bhyve/Makefile create mode 100644 usr.sbin/bhyve/atpic.c create mode 100644 usr.sbin/bhyve/consport.c create mode 100644 usr.sbin/bhyve/dbgport.c create mode 100644 usr.sbin/bhyve/dbgport.h create mode 100644 usr.sbin/bhyve/elcr.c create mode 100644 usr.sbin/bhyve/fbsdrun.c create mode 100644 usr.sbin/bhyve/fbsdrun.h create mode 100644 usr.sbin/bhyve/inout.c create mode 100644 usr.sbin/bhyve/inout.h create mode 100644 usr.sbin/bhyve/mevent.c create mode 100644 usr.sbin/bhyve/mevent.h create mode 100644 usr.sbin/bhyve/mevent_test.c create mode 100644 usr.sbin/bhyve/pci_emul.c create mode 100644 usr.sbin/bhyve/pci_emul.h create mode 100644 usr.sbin/bhyve/pci_hostbridge.c create mode 100644 usr.sbin/bhyve/pci_passthru.c create mode 100644 usr.sbin/bhyve/pci_virtio_block.c create mode 100644 usr.sbin/bhyve/pci_virtio_net.c create mode 100644 usr.sbin/bhyve/pit_8254.c create mode 100644 usr.sbin/bhyve/pit_8254.h create mode 100644 usr.sbin/bhyve/post.c create mode 100644 usr.sbin/bhyve/rtc.c create mode 100644 usr.sbin/bhyve/uart.c create mode 100644 usr.sbin/bhyve/virtio.h create mode 100644 usr.sbin/bhyve/xmsr.c create mode 100644 usr.sbin/bhyve/xmsr.h create mode 100644 usr.sbin/vmmctl/Makefile create mode 100755 usr.sbin/vmmctl/sample.sh create mode 100644 usr.sbin/vmmctl/vmmctl.c (limited to 'usr.sbin') diff --git a/usr.sbin/Makefile b/usr.sbin/Makefile index 44f20a4..fc527b7 100644 --- a/usr.sbin/Makefile +++ b/usr.sbin/Makefile @@ -19,6 +19,7 @@ SUBDIR= ${_ac} \ ${_auditd} \ ${_auditreduce} \ ${_authpf} \ + ${_bhyve} \ ${_bluetooth} \ ${_boot0cfg} \ ${_boot98cfg} \ @@ -194,6 +195,7 @@ SUBDIR= ${_ac} \ ${_usbdevs} \ ${_usbconfig} \ ${_vidcontrol} \ + ${_vmmctl} \ vipw \ wake \ watch \ @@ -477,6 +479,7 @@ _boot98cfg= boot98cfg _acpi= acpi .endif _asf= asf +_bhyve= bhyve _boot0cfg= boot0cfg .if ${MK_TOOLCHAIN} != "no" _btxld= btxld @@ -494,6 +497,7 @@ _ndiscvt= ndiscvt .endif _sicontrol= sicontrol _spkrtest= spkrtest +_vmmctl= vmmctl _zzz= zzz .endif diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile new file mode 100644 index 0000000..71df082 --- /dev/null +++ b/usr.sbin/bhyve/Makefile @@ -0,0 +1,18 @@ +# +# $FreeBSD$ +# + +PROG= bhyve + +SRCS= atpic.c consport.c dbgport.c elcr.c fbsdrun.c inout.c mevent.c +SRCS+= pci_emul.c pci_hostbridge.c pci_passthru.c pci_virtio_block.c +SRCS+= pci_virtio_net.c pit_8254.c post.c rtc.c uart.c xmsr.c + +NO_MAN= + +DPADD= ${LIBVMMAPI} ${LIBMD} ${LIBPTHREAD} +LDADD= -lvmmapi -lmd -lpthread + +CFLAGS+= -I${.CURDIR}/../../sys + +.include diff --git a/usr.sbin/bhyve/atpic.c b/usr.sbin/bhyve/atpic.c new file mode 100644 index 0000000..a9fb084 --- /dev/null +++ b/usr.sbin/bhyve/atpic.c @@ -0,0 +1,68 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +#include +#include +#include + +#include "inout.h" + +/* + * FreeBSD only writes to the 8259 interrupt controllers to put them in a + * shutdown state. + * + * So, we just ignore the writes. + */ + +#define IO_ICU1 0x20 +#define IO_ICU2 0xA0 +#define ICU_IMR_OFFSET 1 + +static int +atpic_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, + uint32_t *eax, void *arg) +{ + if (bytes != 1) + return (-1); + + if (in) + return (-1); + + /* Pretend all writes to the 8259 are alright */ + return (0); +} + +INOUT_PORT(atpic, IO_ICU1, IOPORT_F_INOUT, atpic_handler); +INOUT_PORT(atpic, IO_ICU1 + ICU_IMR_OFFSET, IOPORT_F_INOUT, atpic_handler); +INOUT_PORT(atpic, IO_ICU2, IOPORT_F_INOUT, atpic_handler); +INOUT_PORT(atpic, IO_ICU2 + ICU_IMR_OFFSET, IOPORT_F_INOUT, atpic_handler); diff --git a/usr.sbin/bhyve/consport.c b/usr.sbin/bhyve/consport.c new file mode 100644 index 0000000..34f94a6 --- /dev/null +++ b/usr.sbin/bhyve/consport.c @@ -0,0 +1,121 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include +#include +#include +#include +#include + +#include "inout.h" + +#define BVM_CONSOLE_PORT 0x220 + +static struct termios tio_orig, tio_new; + +static void +ttyclose(void) +{ + tcsetattr(STDIN_FILENO, TCSANOW, &tio_orig); +} + +static void +ttyopen(void) +{ + tcgetattr(STDIN_FILENO, &tio_orig); + + cfmakeraw(&tio_new); + tcsetattr(STDIN_FILENO, TCSANOW, &tio_new); + + atexit(ttyclose); +} + +static bool +tty_char_available(void) +{ + fd_set rfds; + struct timeval tv; + + FD_ZERO(&rfds); + FD_SET(STDIN_FILENO, &rfds); + tv.tv_sec = 0; + tv.tv_usec = 0; + if (select(STDIN_FILENO + 1, &rfds, NULL, NULL, &tv) > 0) { + return (true); + } else { + return (false); + } +} + +static int +ttyread(void) +{ + char rb; + + if (tty_char_available()) { + read(STDIN_FILENO, &rb, 1); + return (rb & 0xff); + } else { + return (-1); + } +} + +static void +ttywrite(unsigned char wb) +{ + (void) write(STDOUT_FILENO, &wb, 1); +} + +static int +console_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, + uint32_t *eax, void *arg) +{ + static int opened; + + if (bytes != 4) + return (-1); + + if (!opened) { + ttyopen(); + opened = 1; + } + + if (in) + *eax = ttyread(); + else + ttywrite(*eax); + + return (0); +} +INOUT_PORT(console, BVM_CONSOLE_PORT, IOPORT_F_INOUT, console_handler); diff --git a/usr.sbin/bhyve/dbgport.c b/usr.sbin/bhyve/dbgport.c new file mode 100644 index 0000000..be919e1 --- /dev/null +++ b/usr.sbin/bhyve/dbgport.c @@ -0,0 +1,124 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "inout.h" + +#define BVM_DBG_PORT 0x224 + +static int listen_fd, conn_fd; + +static struct sockaddr_in sin; + +void +init_dbgport(int sport) +{ + conn_fd = -1; + + if ((listen_fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) { + perror("socket"); + exit(1); + } + + sin.sin_len = sizeof(sin); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl(INADDR_ANY); + sin.sin_port = htons(sport); + + if (bind(listen_fd, (struct sockaddr *)&sin, sizeof(sin)) < 0) { + perror("bind"); + exit(1); + } + + if (listen(listen_fd, 1) < 0) { + perror("listen"); + exit(1); + } +} + +static int +dbg_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, + uint32_t *eax, void *arg) +{ + char ch; + int nwritten, nread, printonce; + + if (bytes != 4) + return (-1); + +again: + printonce = 0; + while (conn_fd < 0) { + if (!printonce) { + printf("Waiting for connection from gdb\r\n"); + printonce = 1; + } + conn_fd = accept(listen_fd, NULL, NULL); + if (conn_fd >= 0) + fcntl(conn_fd, F_SETFL, O_NONBLOCK); + else if (errno != EINTR) + perror("accept"); + } + + if (in) { + nread = read(conn_fd, &ch, 1); + if (nread == -1 && errno == EAGAIN) + *eax = -1; + else if (nread == 1) + *eax = ch; + else { + close(conn_fd); + conn_fd = -1; + goto again; + } + } else { + ch = *eax; + nwritten = write(conn_fd, &ch, 1); + if (nwritten != 1) { + close(conn_fd); + conn_fd = -1; + goto again; + } + } + return (0); +} + +INOUT_PORT(dbg, BVM_DBG_PORT, IOPORT_F_INOUT, dbg_handler); diff --git a/usr.sbin/bhyve/dbgport.h b/usr.sbin/bhyve/dbgport.h new file mode 100644 index 0000000..8c7dab7 --- /dev/null +++ b/usr.sbin/bhyve/dbgport.h @@ -0,0 +1,36 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _DBGPORT_H_ +#define _DBGPORT_H_ + +#define DEFAULT_GDB_PORT 6466 + +void init_dbgport(int port); + +#endif diff --git a/usr.sbin/bhyve/elcr.c b/usr.sbin/bhyve/elcr.c new file mode 100644 index 0000000..2417ae1 --- /dev/null +++ b/usr.sbin/bhyve/elcr.c @@ -0,0 +1,65 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +#include "inout.h" + +/* + * EISA interrupt Level Control Register. + * + * This is a 16-bit register with one bit for each of the IRQ0 through IRQ15. + * A level triggered irq is indicated by setting the corresponding bit to '1'. + */ +#define ELCR_PORT 0x4d0 + +static uint8_t elcr[2] = { 0x00, 0x00 }; + +static int +elcr_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, + uint32_t *eax, void *arg) +{ + int idx; + + if (bytes != 1) + return (-1); + + idx = port - ELCR_PORT; + + if (in) + *eax = elcr[idx]; + else + elcr[idx] = *eax; + + return (0); +} +INOUT_PORT(elcr, ELCR_PORT + 0, IOPORT_F_INOUT, elcr_handler); +INOUT_PORT(elcr, ELCR_PORT + 1, IOPORT_F_INOUT, elcr_handler); diff --git a/usr.sbin/bhyve/fbsdrun.c b/usr.sbin/bhyve/fbsdrun.c new file mode 100644 index 0000000..ddbe709b --- /dev/null +++ b/usr.sbin/bhyve/fbsdrun.c @@ -0,0 +1,650 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "fbsdrun.h" +#include "inout.h" +#include "dbgport.h" +#include "mevent.h" +#include "pci_emul.h" +#include "xmsr.h" + +#define DEFAULT_GUEST_HZ 100 +#define DEFAULT_GUEST_TSLICE 200 + +#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ + +#define VMEXIT_SWITCH 0 /* force vcpu switch in mux mode */ +#define VMEXIT_CONTINUE 1 /* continue from next instruction */ +#define VMEXIT_RESTART 2 /* restart current instruction */ +#define VMEXIT_ABORT 3 /* abort the vm run loop */ +#define VMEXIT_RESET 4 /* guest machine has reset */ + +#define MB (1024UL * 1024) +#define GB (1024UL * MB) + +typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); + +int guest_tslice = DEFAULT_GUEST_TSLICE; +int guest_hz = DEFAULT_GUEST_HZ; +char *vmname; + +u_long lomem_sz; +u_long himem_sz; + +int guest_ncpus; + +static int pincpu = -1; +static int guest_vcpu_mux; +static int guest_vmexit_on_hlt, guest_vmexit_on_pause; + +static int foundcpus; + +static char *lomem_addr; +static char *himem_addr; + +static char *progname; +static const int BSP = 0; + +static int cpumask; + +static void *oem_tbl_start; +static int oem_tbl_size; + +static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); + +struct vm_exit vmexit[VM_MAXCPU]; + +struct fbsdstats { + uint64_t vmexit_bogus; + uint64_t vmexit_bogus_switch; + uint64_t vmexit_hlt; + uint64_t vmexit_pause; + uint64_t vmexit_mtrap; + uint64_t cpu_switch_rotate; + uint64_t cpu_switch_direct; + int io_reset; +} stats; + +struct mt_vmm_info { + pthread_t mt_thr; + struct vmctx *mt_ctx; + int mt_vcpu; +} mt_vmm_info[VM_MAXCPU]; + +static void +usage(int code) +{ + + fprintf(stderr, + "Usage: %s [-hBHP][-g ][-z ][-s ][-p pincpu]" + "[-n ][-m lowmem][-M highmem] \n" + " -g: gdb port (default is %d and 0 means don't open)\n" + " -c: # cpus (default 1)\n" + " -p: pin vcpu 'n' to host cpu 'pincpu + n'\n" + " -B: inject breakpoint exception on vm entry\n" + " -H: vmexit from the guest on hlt\n" + " -P: vmexit from the guest on pause\n" + " -h: help\n" + " -z: guest hz (default is %d)\n" + " -s: PCI slot config\n" + " -n: PCI slot naming\n" + " -m: lowmem in MB\n" + " -M: highmem in MB\n" + " -x: mux vcpus to 1 hcpu\n" + " -t: mux vcpu timeslice hz (default %d)\n", + progname, DEFAULT_GDB_PORT, DEFAULT_GUEST_HZ, + DEFAULT_GUEST_TSLICE); + exit(code); +} + +void * +paddr_guest2host(uintptr_t gaddr) +{ + if (lomem_sz == 0) + return (NULL); + + if (gaddr < lomem_sz) { + return ((void *)(lomem_addr + gaddr)); + } else if (gaddr >= 4*GB && gaddr < (4*GB + himem_sz)) { + return ((void *)(himem_addr + gaddr - 4*GB)); + } else + return (NULL); +} + +void +fbsdrun_add_oemtbl(void *tbl, int tblsz) +{ + oem_tbl_start = tbl; + oem_tbl_size = tblsz; +} + +int +fbsdrun_vmexit_on_pause(void) +{ + + return (guest_vmexit_on_pause); +} + +int +fbsdrun_vmexit_on_hlt(void) +{ + + return (guest_vmexit_on_hlt); +} + +int +fbsdrun_muxed(void) +{ + + return (guest_vcpu_mux); +} + +void * +fbsdrun_start_thread(void *param) +{ + int vcpu; + struct mt_vmm_info *mtp = param; + + vcpu = mtp->mt_vcpu; + vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); + + /* not reached */ + exit(1); + return (NULL); +} + +void +fbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t rip) +{ + int error; + + if (cpumask & (1 << vcpu)) { + printf("addcpu: attempting to add existing cpu %d\n", vcpu); + exit(1); + } + + cpumask |= 1 << vcpu; + foundcpus++; + + /* + * Set up the vmexit struct to allow execution to start + * at the given RIP + */ + vmexit[vcpu].rip = rip; + vmexit[vcpu].inst_length = 0; + + if (vcpu == BSP || !guest_vcpu_mux){ + mt_vmm_info[vcpu].mt_ctx = ctx; + mt_vmm_info[vcpu].mt_vcpu = vcpu; + + error = pthread_create(&mt_vmm_info[vcpu].mt_thr, NULL, + fbsdrun_start_thread, &mt_vmm_info[vcpu]); + assert(error == 0); + } +} + +static int +fbsdrun_get_next_cpu(int curcpu) +{ + + /* + * Get the next available CPU. Assumes they arrive + * in ascending order with no gaps. + */ + return ((curcpu + 1) % foundcpus); +} + +int +vmexit_catch_reset(void) +{ + stats.io_reset++; + return (VMEXIT_RESET); +} + +int +vmexit_catch_inout(void) +{ + return (VMEXIT_ABORT); +} + +int +vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, + uint32_t eax) +{ +#if PG_DEBUG /* put all types of debug here */ + if (eax == 0) { + pause_noswitch = 1; + } else if (eax == 1) { + pause_noswitch = 0; + } else { + pause_noswitch = 0; + if (eax == 5) { + vm_set_capability(ctx, *pvcpu, VM_CAP_MTRAP_EXIT, 1); + } + } +#endif + return (VMEXIT_CONTINUE); +} + +static int +vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) +{ + int error; + int bytes, port, in, out; + uint32_t eax; + int vcpu; + + vcpu = *pvcpu; + + port = vme->u.inout.port; + bytes = vme->u.inout.bytes; + eax = vme->u.inout.eax; + in = vme->u.inout.in; + out = !in; + + /* We don't deal with these */ + if (vme->u.inout.string || vme->u.inout.rep) + return (VMEXIT_ABORT); + + /* Special case of guest reset */ + if (out && port == 0x64 && (uint8_t)eax == 0xFE) + return (vmexit_catch_reset()); + + /* Extra-special case of host notifications */ + if (out && port == GUEST_NIO_PORT) + return (vmexit_handle_notify(ctx, vme, pvcpu, eax)); + + error = emulate_inout(ctx, vcpu, in, port, bytes, &eax); + if (error == 0 && in) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, eax); + + if (error == 0) + return (VMEXIT_CONTINUE); + else { + fprintf(stderr, "Unhandled %s%c 0x%04x\n", + in ? "in" : "out", + bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port); + return (vmexit_catch_inout()); + } +} + +static int +vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) +{ + printf("vm exit rdmsr 0x%x, cpu %d\n", vme->u.msr.code, *pvcpu); + return (VMEXIT_ABORT); +} + +static int +vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) +{ + int newcpu; + int retval = VMEXIT_CONTINUE; + + newcpu = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code,vme->u.msr.wval); + + if (guest_vcpu_mux && *pvcpu != newcpu) { + retval = VMEXIT_SWITCH; + *pvcpu = newcpu; + } + + return (retval); +} + +static int +vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) +{ + + printf("vm exit[%d]\n", *pvcpu); + printf("\treason\t\tVMX\n"); + printf("\trip\t\t0x%016lx\n", vmexit->rip); + printf("\tinst_length\t%d\n", vmexit->inst_length); + printf("\terror\t\t%d\n", vmexit->u.vmx.error); + printf("\texit_reason\t%u\n", vmexit->u.vmx.exit_reason); + printf("\tqualification\t0x%016lx\n", vmexit->u.vmx.exit_qualification); + + return (VMEXIT_ABORT); +} + +static int bogus_noswitch = 1; + +static int +vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) +{ + stats.vmexit_bogus++; + + if (!guest_vcpu_mux || guest_ncpus == 1 || bogus_noswitch) { + return (VMEXIT_RESTART); + } else { + stats.vmexit_bogus_switch++; + vmexit->inst_length = 0; + *pvcpu = -1; + return (VMEXIT_SWITCH); + } +} + +static int +vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) +{ + stats.vmexit_hlt++; + if (fbsdrun_muxed()) { + *pvcpu = -1; + return (VMEXIT_SWITCH); + } else { + /* + * Just continue execution with the next instruction. We use + * the HLT VM exit as a way to be friendly with the host + * scheduler. + */ + return (VMEXIT_CONTINUE); + } +} + +static int pause_noswitch; + +static int +vmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) +{ + stats.vmexit_pause++; + + if (fbsdrun_muxed() && !pause_noswitch) { + *pvcpu = -1; + return (VMEXIT_SWITCH); + } else { + return (VMEXIT_CONTINUE); + } +} + +static int +vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) +{ + stats.vmexit_mtrap++; + + return (VMEXIT_RESTART); +} + +static void +sigalrm(int sig) +{ + return; +} + +static void +setup_timeslice(void) +{ + struct sigaction sa; + struct itimerval itv; + int error; + + /* + * Setup a realtime timer to generate a SIGALRM at a + * frequency of 'guest_tslice' ticks per second. + */ + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + sa.sa_handler = sigalrm; + + error = sigaction(SIGALRM, &sa, NULL); + assert(error == 0); + + itv.it_interval.tv_sec = 0; + itv.it_interval.tv_usec = 1000000 / guest_tslice; + itv.it_value.tv_sec = 0; + itv.it_value.tv_usec = 1000000 / guest_tslice; + + error = setitimer(ITIMER_REAL, &itv, NULL); + assert(error == 0); +} + +static vmexit_handler_t handler[VM_EXITCODE_MAX] = { + [VM_EXITCODE_INOUT] = vmexit_inout, + [VM_EXITCODE_VMX] = vmexit_vmx, + [VM_EXITCODE_BOGUS] = vmexit_bogus, + [VM_EXITCODE_RDMSR] = vmexit_rdmsr, + [VM_EXITCODE_WRMSR] = vmexit_wrmsr, + [VM_EXITCODE_MTRAP] = vmexit_mtrap, +}; + +static void +vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip) +{ + int error, rc, prevcpu; + + if (guest_vcpu_mux) + setup_timeslice(); + + if (pincpu >= 0) { + error = vm_set_pinning(ctx, vcpu, pincpu + vcpu); + assert(error == 0); + } + + while (1) { + error = vm_run(ctx, vcpu, rip, &vmexit[vcpu]); + if (error != 0) + break; + + prevcpu = vcpu; + rc = (*handler[vmexit[vcpu].exitcode])(ctx, &vmexit[vcpu], + &vcpu); + switch (rc) { + case VMEXIT_SWITCH: + assert(guest_vcpu_mux); + if (vcpu == -1) { + stats.cpu_switch_rotate++; + vcpu = fbsdrun_get_next_cpu(prevcpu); + } else { + stats.cpu_switch_direct++; + } + /* fall through */ + case VMEXIT_CONTINUE: + rip = vmexit[vcpu].rip + vmexit[vcpu].inst_length; + break; + case VMEXIT_RESTART: + rip = vmexit[vcpu].rip; + break; + case VMEXIT_RESET: + exit(0); + default: + exit(1); + } + } + fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); +} + + +int +main(int argc, char *argv[]) +{ + int c, error, gdb_port, inject_bkpt, tmp, err; + struct vmctx *ctx; + uint64_t rip; + + inject_bkpt = 0; + progname = basename(argv[0]); + gdb_port = DEFAULT_GDB_PORT; + guest_ncpus = 1; + + while ((c = getopt(argc, argv, "hBHPxp:g:c:z:s:n:m:M:")) != -1) { + switch (c) { + case 'B': + inject_bkpt = 1; + break; + case 'x': + guest_vcpu_mux = 1; + break; + case 'p': + pincpu = atoi(optarg); + break; + case 'c': + guest_ncpus = atoi(optarg); + break; + case 'g': + gdb_port = atoi(optarg); + break; + case 'z': + guest_hz = atoi(optarg); + break; + case 't': + guest_tslice = atoi(optarg); + break; + case 's': + pci_parse_slot(optarg); + break; + case 'n': + pci_parse_name(optarg); + break; + case 'm': + lomem_sz = strtoul(optarg, NULL, 0) * MB; + break; + case 'M': + himem_sz = strtoul(optarg, NULL, 0) * MB; + break; + case 'H': + guest_vmexit_on_hlt = 1; + break; + case 'P': + guest_vmexit_on_pause = 1; + break; + case 'h': + usage(0); + default: + usage(1); + } + } + argc -= optind; + argv += optind; + + if (argc != 1) + usage(1); + + /* No need to mux if guest is uni-processor */ + if (guest_ncpus <= 1) + guest_vcpu_mux = 0; + + /* vmexit on hlt if guest is muxed */ + if (guest_vcpu_mux) { + guest_vmexit_on_hlt = 1; + guest_vmexit_on_pause = 1; + } + + vmname = argv[0]; + + ctx = vm_open(vmname); + if (ctx == NULL) { + perror("vm_open"); + exit(1); + } + + if (fbsdrun_vmexit_on_hlt()) { + err = vm_get_capability(ctx, BSP, VM_CAP_HALT_EXIT, &tmp); + if (err < 0) { + printf("VM exit on HLT not supported\n"); + exit(1); + } + vm_set_capability(ctx, BSP, VM_CAP_HALT_EXIT, 1); + handler[VM_EXITCODE_HLT] = vmexit_hlt; + } + + if (fbsdrun_vmexit_on_pause()) { + /* + * pause exit support required for this mode + */ + err = vm_get_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, &tmp); + if (err < 0) { + printf("SMP mux requested, no pause support\n"); + exit(1); + } + vm_set_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, 1); + handler[VM_EXITCODE_PAUSE] = vmexit_pause; + } + + if (lomem_sz != 0) { + lomem_addr = vm_map_memory(ctx, 0, lomem_sz); + if (lomem_addr == (char *) MAP_FAILED) { + lomem_sz = 0; + } else if (himem_sz != 0) { + himem_addr = vm_map_memory(ctx, 4*GB, himem_sz); + if (himem_addr == (char *) MAP_FAILED) { + lomem_sz = 0; + himem_sz = 0; + } + } + } + + init_inout(); + init_pci(ctx); + + if (gdb_port != 0) + init_dbgport(gdb_port); + + error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); + assert(error == 0); + + if (inject_bkpt) { + error = vm_inject_event(ctx, BSP, VM_HW_EXCEPTION, IDT_BP); + assert(error == 0); + } + + /* + * build the guest tables, MP etc. + */ + vm_build_tables(ctx, guest_ncpus, oem_tbl_start, oem_tbl_size); + + /* + * Add CPU 0 + */ + fbsdrun_addcpu(ctx, BSP, rip); + + /* + * Head off to the main event dispatch loop + */ + mevent_dispatch(); + + exit(1); +} diff --git a/usr.sbin/bhyve/fbsdrun.h b/usr.sbin/bhyve/fbsdrun.h new file mode 100644 index 0000000..8106122 --- /dev/null +++ b/usr.sbin/bhyve/fbsdrun.h @@ -0,0 +1,53 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _FBSDRUN_H_ +#define _FBSDRUN_H_ + +#ifndef CTASSERT /* Allow lint to override */ +#define CTASSERT(x) _CTASSERT(x, __LINE__) +#define _CTASSERT(x, y) __CTASSERT(x, y) +#define __CTASSERT(x, y) typedef char __assert ## y[(x) ? 1 : -1] +#endif + +struct vmctx; +extern int guest_hz; +extern int guest_tslice; +extern int guest_ncpus; +extern char *vmname; + +extern u_long lomem_sz, himem_sz; + +void *paddr_guest2host(uintptr_t); + +void fbsdrun_addcpu(struct vmctx *ctx, int cpu, uint64_t rip); +void fbsdrun_add_oemtbl(void *tbl, int tblsz); +int fbsdrun_muxed(void); +int fbsdrun_vmexit_on_hlt(void); +int fbsdrun_vmexit_on_pause(void); +#endif diff --git a/usr.sbin/bhyve/inout.c b/usr.sbin/bhyve/inout.c new file mode 100644 index 0000000..84445b1 --- /dev/null +++ b/usr.sbin/bhyve/inout.c @@ -0,0 +1,98 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include +#include + +#include "inout.h" + +SET_DECLARE(inout_port_set, struct inout_port); + +#define MAX_IOPORTS (1 << 16) + +static struct { + const char *name; + int flags; + inout_func_t handler; + void *arg; +} inout_handlers[MAX_IOPORTS]; + +int +emulate_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes, + uint32_t *eax) +{ + int flags; + inout_func_t handler; + void *arg; + + assert(port < MAX_IOPORTS); + + if ((handler = inout_handlers[port].handler) == NULL) + return (-1); + + flags = inout_handlers[port].flags; + arg = inout_handlers[port].arg; + + if ((in && (flags & IOPORT_F_IN)) || (!in && (flags & IOPORT_F_OUT))) + return ((*handler)(ctx, vcpu, in, port, bytes, eax, arg)); + else + return (-1); +} + +void +init_inout(void) +{ + struct inout_port **iopp, *iop; + + SET_FOREACH(iopp, inout_port_set) { + iop = *iopp; + assert(iop->port < MAX_IOPORTS); + inout_handlers[iop->port].name = iop->name; + inout_handlers[iop->port].flags = iop->flags; + inout_handlers[iop->port].handler = iop->handler; + inout_handlers[iop->port].arg = NULL; + } +} + +int +register_inout(struct inout_port *iop) +{ + assert(iop->port < MAX_IOPORTS); + inout_handlers[iop->port].name = iop->name; + inout_handlers[iop->port].flags = iop->flags; + inout_handlers[iop->port].handler = iop->handler; + inout_handlers[iop->port].arg = iop->arg; + + return (0); +} diff --git a/usr.sbin/bhyve/inout.h b/usr.sbin/bhyve/inout.h new file mode 100644 index 0000000..7b8a4a6 --- /dev/null +++ b/usr.sbin/bhyve/inout.h @@ -0,0 +1,64 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _INOUT_H_ +#define _INOUT_H_ + +#include + +struct vmctx; + +typedef int (*inout_func_t)(struct vmctx *ctx, int vcpu, int in, int port, + int bytes, uint32_t *eax, void *arg); + +struct inout_port { + const char *name; + int port; + int flags; + inout_func_t handler; + void *arg; +}; +#define IOPORT_F_IN 0x1 +#define IOPORT_F_OUT 0x2 +#define IOPORT_F_INOUT 0x3 + +#define INOUT_PORT(name, port, flags, handler) \ + static struct inout_port __CONCAT(__inout_port, __LINE__) = { \ + #name, \ + (port), \ + (flags), \ + (handler) \ + }; \ + DATA_SET(inout_port_set, __CONCAT(__inout_port, __LINE__)) + +void init_inout(void); +int emulate_inout(struct vmctx *, int vcpu, int in, int port, int bytes, + uint32_t *eax); +int register_inout(struct inout_port *iop); + +#endif /* _INOUT_H_ */ diff --git a/usr.sbin/bhyve/mevent.c b/usr.sbin/bhyve/mevent.c new file mode 100644 index 0000000..0d3b287 --- /dev/null +++ b/usr.sbin/bhyve/mevent.c @@ -0,0 +1,419 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * Micro event library for FreeBSD, designed for a single i/o thread + * using kqueue, and having events be persistent by default. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include "mevent.h" + +#define MEVENT_MAX 64 + +#define MEV_ENABLE 1 +#define MEV_DISABLE 2 +#define MEV_DEL_PENDING 3 + +static pthread_t mevent_tid; +static int mevent_pipefd[2]; +static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER; + +struct mevent { + void (*me_func)(int, enum ev_type, void *); + int me_fd; + enum ev_type me_type; + void *me_param; + int me_cq; + int me_state; + int me_closefd; + LIST_ENTRY(mevent) me_list; +}; + +static LIST_HEAD(listhead, mevent) global_head, change_head; + +static void +mevent_qlock(void) +{ + pthread_mutex_lock(&mevent_lmutex); +} + +static void +mevent_qunlock(void) +{ + pthread_mutex_unlock(&mevent_lmutex); +} + +static void +mevent_pipe_read(int fd, enum ev_type type, void *param) +{ + char buf[MEVENT_MAX]; + int status; + + /* + * Drain the pipe read side. The fd is non-blocking so this is + * safe to do. + */ + do { + status = read(fd, buf, sizeof(buf)); + } while (status == MEVENT_MAX); +} + +static void +mevent_notify(void) +{ + char c; + + /* + * If calling from outside the i/o thread, write a byte on the + * pipe to force the i/o thread to exit the blocking kevent call. + */ + if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) { + write(mevent_pipefd[1], &c, 1); + } +} + +static int +mevent_kq_filter(struct mevent *mevp) +{ + int retval; + + retval = 0; + + if (mevp->me_type == EVF_READ) + retval = EVFILT_READ; + + if (mevp->me_type == EVF_WRITE) + retval = EVFILT_WRITE; + + return (retval); +} + +static int +mevent_kq_flags(struct mevent *mevp) +{ + int ret; + + switch (mevp->me_state) { + case MEV_ENABLE: + ret = EV_ADD; + break; + case MEV_DISABLE: + ret = EV_DISABLE; + break; + case MEV_DEL_PENDING: + ret = EV_DELETE; + break; + } + + return (ret); +} + +static int +mevent_kq_fflags(struct mevent *mevp) +{ + /* XXX nothing yet, perhaps EV_EOF for reads ? */ + return (0); +} + +static int +mevent_build(int mfd, struct kevent *kev) +{ + struct mevent *mevp, *tmpp; + int i; + + i = 0; + + mevent_qlock(); + + LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) { + if (mevp->me_closefd) { + /* + * A close of the file descriptor will remove the + * event + */ + close(mevp->me_fd); + } else { + kev[i].ident = mevp->me_fd; + kev[i].filter = mevent_kq_filter(mevp); + kev[i].flags = mevent_kq_flags(mevp); + kev[i].fflags = mevent_kq_fflags(mevp); + kev[i].data = 0; + kev[i].udata = mevp; + i++; + } + + mevp->me_cq = 0; + LIST_REMOVE(mevp, me_list); + + if (mevp->me_state == MEV_DEL_PENDING) { + free(mevp); + } else { + LIST_INSERT_HEAD(&global_head, mevp, me_list); + } + + assert(i < MEVENT_MAX); + } + + mevent_qunlock(); + + return (i); +} + +static void +mevent_handle(struct kevent *kev, int numev) +{ + struct mevent *mevp; + int i; + + for (i = 0; i < numev; i++) { + mevp = kev[i].udata; + + /* XXX check for EV_ERROR ? */ + + (*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param); + } +} + +struct mevent * +mevent_add(int fd, enum ev_type type, + void (*func)(int, enum ev_type, void *), void *param) +{ + struct mevent *lp, *mevp; + + if (fd < 0 || func == NULL) { + return (NULL); + } + + mevp = NULL; + + mevent_qlock(); + + /* + * Verify that the fd/type tuple is not present in any list + */ + LIST_FOREACH(lp, &global_head, me_list) { + if (lp->me_fd == fd && lp->me_type == type) { + goto exit; + } + } + + LIST_FOREACH(lp, &change_head, me_list) { + if (lp->me_fd == fd && lp->me_type == type) { + goto exit; + } + } + + /* + * Allocate an entry, populate it, and add it to the change list. + */ + mevp = malloc(sizeof(struct mevent)); + if (mevp == NULL) { + goto exit; + } + + memset(mevp, 0, sizeof(struct mevent)); + mevp->me_fd = fd; + mevp->me_type = type; + mevp->me_func = func; + mevp->me_param = param; + + LIST_INSERT_HEAD(&change_head, mevp, me_list); + mevp->me_cq = 1; + mevp->me_state = MEV_ENABLE; + mevent_notify(); + +exit: + mevent_qunlock(); + + return (mevp); +} + +static int +mevent_update(struct mevent *evp, int newstate) +{ + /* + * It's not possible to enable/disable a deleted event + */ + if (evp->me_state == MEV_DEL_PENDING) + return (EINVAL); + + /* + * No update needed if state isn't changing + */ + if (evp->me_state == newstate) + return (0); + + mevent_qlock(); + + evp->me_state = newstate; + + /* + * Place the entry onto the changed list if not already there. + */ + if (evp->me_cq == 0) { + evp->me_cq = 1; + LIST_REMOVE(evp, me_list); + LIST_INSERT_HEAD(&change_head, evp, me_list); + mevent_notify(); + } + + mevent_qunlock(); + + return (0); +} + +int +mevent_enable(struct mevent *evp) +{ + + return (mevent_update(evp, MEV_ENABLE)); +} + +int +mevent_disable(struct mevent *evp) +{ + + return (mevent_update(evp, MEV_DISABLE)); +} + +static int +mevent_delete_event(struct mevent *evp, int closefd) +{ + mevent_qlock(); + + /* + * Place the entry onto the changed list if not already there, and + * mark as to be deleted. + */ + if (evp->me_cq == 0) { + evp->me_cq = 1; + LIST_REMOVE(evp, me_list); + LIST_INSERT_HEAD(&change_head, evp, me_list); + mevent_notify(); + } + evp->me_state = MEV_DEL_PENDING; + + if (closefd) + evp->me_closefd = 1; + + mevent_qunlock(); + + return (0); +} + +int +mevent_delete(struct mevent *evp) +{ + + return (mevent_delete_event(evp, 0)); +} + +int +mevent_delete_close(struct mevent *evp) +{ + + return (mevent_delete_event(evp, 1)); +} + +void +mevent_dispatch(void) +{ + struct kevent changelist[MEVENT_MAX]; + struct kevent eventlist[MEVENT_MAX]; + struct mevent *pipev; + int mfd; + int numev; + int ret; + + mevent_tid = pthread_self(); + + mfd = kqueue(); + assert(mfd > 0); + + /* + * Open the pipe that will be used for other threads to force + * the blocking kqueue call to exit by writing to it. Set the + * descriptor to non-blocking. + */ + ret = pipe(mevent_pipefd); + if (ret < 0) { + perror("pipe"); + exit(0); + } + + /* + * Add internal event handler for the pipe write fd + */ + pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL); + assert(pipev != NULL); + + for (;;) { + /* + * Build changelist if required. + * XXX the changelist can be put into the blocking call + * to eliminate the extra syscall. Currently better for + * debug. + */ + numev = mevent_build(mfd, changelist); + if (numev) { + ret = kevent(mfd, changelist, numev, NULL, 0, NULL); + if (ret == -1) { + perror("Error return from kevent change"); + } + } + + /* + * Block awaiting events + */ + ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL); + if (ret == -1) { + perror("Error return from kevent monitor"); + } + + /* + * Handle reported events + */ + mevent_handle(eventlist, ret); + } +} diff --git a/usr.sbin/bhyve/mevent.h b/usr.sbin/bhyve/mevent.h new file mode 100644 index 0000000..32a9d74 --- /dev/null +++ b/usr.sbin/bhyve/mevent.h @@ -0,0 +1,49 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _MEVENT_H_ +#define _MEVENT_H_ + +enum ev_type { + EVF_READ, + EVF_WRITE +}; + +struct mevent; + +struct mevent *mevent_add(int fd, enum ev_type type, + void (*func)(int, enum ev_type, void *), + void *param); +int mevent_enable(struct mevent *evp); +int mevent_disable(struct mevent *evp); +int mevent_delete(struct mevent *evp); +int mevent_delete_close(struct mevent *evp); + +void mevent_dispatch(void); + +#endif /* _MEVENT_H_ */ diff --git a/usr.sbin/bhyve/mevent_test.c b/usr.sbin/bhyve/mevent_test.c new file mode 100644 index 0000000..c72a497 --- /dev/null +++ b/usr.sbin/bhyve/mevent_test.c @@ -0,0 +1,180 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * Test program for the micro event library. Set up a simple TCP echo + * service. + * + * cc mevent_test.c mevent.c -lpthread + */ + +#include +#include +#include + +#include +#include +#include + +#include "mevent.h" + +#define TEST_PORT 4321 + +static pthread_mutex_t accept_mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t accept_condvar = PTHREAD_COND_INITIALIZER; + +#define MEVENT_ECHO + +#ifdef MEVENT_ECHO +struct esync { + pthread_mutex_t e_mt; + pthread_cond_t e_cond; +}; + +static void +echoer_callback(int fd, enum ev_type type, void *param) +{ + struct esync *sync = param; + + pthread_mutex_lock(&sync->e_mt); + pthread_cond_signal(&sync->e_cond); + pthread_mutex_unlock(&sync->e_mt); +} + +static void * +echoer(void *param) +{ + struct esync sync; + struct mevent *mev; + char buf[128]; + int fd = (int)(uintptr_t) param; + int len; + + pthread_mutex_init(&sync.e_mt, NULL); + pthread_cond_init(&sync.e_cond, NULL); + + pthread_mutex_lock(&sync.e_mt); + + mev = mevent_add(fd, EVF_READ, echoer_callback, &sync); + if (mev == NULL) { + printf("Could not allocate echoer event\n"); + exit(1); + } + + while (!pthread_cond_wait(&sync.e_cond, &sync.e_mt)) { + len = read(fd, buf, sizeof(buf)); + if (len > 0) { + write(fd, buf, len); + write(0, buf, len); + } else { + break; + } + } + + mevent_delete_close(mev); + + pthread_mutex_unlock(&sync.e_mt); + pthread_mutex_destroy(&sync.e_mt); + pthread_cond_destroy(&sync.e_cond); +} + +#else + +static void * +echoer(void *param) +{ + char buf[128]; + int fd = (int)(uintptr_t) param; + int len; + + while ((len = read(fd, buf, sizeof(buf))) > 0) { + write(1, buf, len); + } +} +#endif /* MEVENT_ECHO */ + +static void +acceptor_callback(int fd, enum ev_type type, void *param) +{ + pthread_mutex_lock(&accept_mutex); + pthread_cond_signal(&accept_condvar); + pthread_mutex_unlock(&accept_mutex); +} + +static void * +acceptor(void *param) +{ + struct sockaddr_in sin; + pthread_t tid; + int news; + int s; + + if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) { + perror("socket"); + exit(1); + } + + sin.sin_len = sizeof(sin); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl(INADDR_ANY); + sin.sin_port = htons(TEST_PORT); + + if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0) { + perror("bind"); + exit(1); + } + + if (listen(s, 1) < 0) { + perror("listen"); + exit(1); + } + + (void) mevent_add(s, EVF_READ, acceptor_callback, NULL); + + pthread_mutex_lock(&accept_mutex); + + while (!pthread_cond_wait(&accept_condvar, &accept_mutex)) { + news = accept(s, NULL, NULL); + if (news < 0) { + perror("accept error"); + } else { + printf("incoming connection, spawning thread\n"); + pthread_create(&tid, NULL, echoer, + (void *)(uintptr_t)news); + } + } +} + +main() +{ + pthread_t tid; + + pthread_create(&tid, NULL, acceptor, NULL); + + mevent_dispatch(); +} diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c new file mode 100644 index 0000000..273c6a2 --- /dev/null +++ b/usr.sbin/bhyve/pci_emul.c @@ -0,0 +1,976 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "fbsdrun.h" +#include "inout.h" +#include "pci_emul.h" + +#define CONF1_ADDR_PORT 0x0cf8 +#define CONF1_DATA_PORT 0x0cfc + +#define CFGWRITE(pi,off,val,b) \ +do { \ + if ((b) == 1) { \ + pci_set_cfgdata8((pi),(off),(val)); \ + } else if ((b) == 2) { \ + pci_set_cfgdata16((pi),(off),(val)); \ + } else { \ + pci_set_cfgdata32((pi),(off),(val)); \ + } \ +} while (0) + +#define MAXSLOTS 32 + +static struct slotinfo { + char *si_name; + char *si_param; + struct pci_devinst *si_devi; + int si_titled; + int si_pslot; + char si_prefix; + char si_suffix; +} pci_slotinfo[MAXSLOTS]; + +/* + * NetApp specific: + * struct used to build an in-core OEM table to supply device names + * to driver instances + */ +static struct mptable_pci_devnames { +#define MPT_HDR_BASE 0 +#define MPT_HDR_NAME 2 + uint16_t md_hdrtype; + uint16_t md_entries; + uint16_t md_cksum; + uint16_t md_pad; +#define MPT_NTAP_SIG \ + ((uint32_t)(('P' << 24) | ('A' << 16) | ('T' << 8) | 'N')) + uint32_t md_sig; + uint32_t md_rsvd; + struct mptable_pci_slotinfo { + uint16_t mds_type; + uint16_t mds_phys_slot; + uint8_t mds_bus; + uint8_t mds_slot; + uint8_t mds_func; + uint8_t mds_pad; + uint16_t mds_vid; + uint16_t mds_did; + uint8_t mds_suffix[4]; + uint8_t mds_prefix[4]; + uint32_t mds_rsvd[3]; + } md_slotinfo[MAXSLOTS]; +} pci_devnames; + +SET_DECLARE(pci_devemu_set, struct pci_devemu); + +static uint64_t pci_emul_iobase; +static uint64_t pci_emul_membase32; +static uint64_t pci_emul_membase64; + +#define PCI_EMUL_IOBASE 0x2000 +#define PCI_EMUL_IOLIMIT 0x10000 + +#define PCI_EMUL_MEMBASE32 (lomem_sz) +#define PCI_EMUL_MEMLIMIT32 0xE0000000 /* 3.5GB */ + +#define PCI_EMUL_MEMBASE64 0xD000000000UL +#define PCI_EMUL_MEMLIMIT64 0xFD00000000UL + +static int pci_emul_devices; +static int devname_elems; + +/* + * I/O access + */ + +/* + * Slot options are in the form: + * + * ,[,] + * + * slot is 0..31 + * emul is a string describing the type of PCI device e.g. virtio-net + * config is an optional string, depending on the device, that can be + * used for configuration. + * Examples are: + * 1,virtio-net,tap0 + * 3,dummy + */ +static void +pci_parse_slot_usage(char *aopt) +{ + printf("Invalid PCI slot info field \"%s\"\n", aopt); + free(aopt); +} + +void +pci_parse_slot(char *opt) +{ + char *slot, *emul, *config; + char *str, *cpy; + int snum; + + str = cpy = strdup(opt); + config = NULL; + + slot = strsep(&str, ","); + emul = strsep(&str, ","); + if (str != NULL) { + config = strsep(&str, ","); + } + + if (emul == NULL) { + pci_parse_slot_usage(cpy); + return; + } + + snum = 255; + snum = atoi(slot); + if (snum < 0 || snum >= MAXSLOTS) { + pci_parse_slot_usage(cpy); + } else { + pci_slotinfo[snum].si_name = emul; + pci_slotinfo[snum].si_param = config; + } +} + + +/* + * + * PCI MPTable names are of the form: + * + * ,[prefix] + * + * .. with an alphabetic char, a 1 or 2-digit string, + * and a single char. + * + * Examples: + * 1,e0c + * 4,e0P + * 6,43a + * 7,0f + * 10,1 + * 12,e0M + * 2,12a + * + * Note that this is NetApp-specific, but is ignored on other o/s's. + */ +static void +pci_parse_name_usage(char *aopt) +{ + printf("Invalid PCI slot name field \"%s\"\n", aopt); +} + +void +pci_parse_name(char *opt) +{ + char csnum[4]; + char *namestr; + char *slotend; + char prefix, suffix; + int i; + int pslot; + int snum; + + pslot = -1; + prefix = suffix = 0; + slotend = strchr(opt, ','); + + /* + * A comma must be present, and can't be the first character + * or no slot would be present. Also, the slot number can't be + * more than 2 characters. + */ + if (slotend == NULL || slotend == opt || (slotend - opt > 2)) { + pci_parse_name_usage(opt); + return; + } + + for (i = 0; i < (slotend - opt); i++) { + csnum[i] = opt[i]; + } + csnum[i] = '\0'; + + snum = 255; + snum = atoi(csnum); + if (snum < 0 || snum >= MAXSLOTS) { + pci_parse_name_usage(opt); + return; + } + + namestr = slotend + 1; + + if (strlen(namestr) > 3) { + pci_parse_name_usage(opt); + return; + } + + if (isalpha(*namestr)) { + prefix = *namestr++; + } + + if (!isdigit(*namestr)) { + pci_parse_name_usage(opt); + } else { + pslot = *namestr++ - '0'; + if (isnumber(*namestr)) { + pslot = 10*pslot + *namestr++ - '0'; + + } + if (isalpha(*namestr) && *(namestr + 1) == 0) { + suffix = *namestr; + pci_slotinfo[snum].si_titled = 1; + pci_slotinfo[snum].si_pslot = pslot; + pci_slotinfo[snum].si_prefix = prefix; + pci_slotinfo[snum].si_suffix = suffix; + + } else { + pci_parse_name_usage(opt); + } + } +} + +static void +pci_add_mptable_name(struct slotinfo *si) +{ + struct mptable_pci_slotinfo *ms; + + /* + * If naming information has been supplied for this slot, populate + * the next available mptable OEM entry + */ + if (si->si_titled) { + ms = &pci_devnames.md_slotinfo[devname_elems]; + + ms->mds_type = MPT_HDR_NAME; + ms->mds_phys_slot = si->si_pslot; + ms->mds_bus = si->si_devi->pi_bus; + ms->mds_slot = si->si_devi->pi_slot; + ms->mds_func = si->si_devi->pi_func; + ms->mds_vid = pci_get_cfgdata16(si->si_devi, PCIR_VENDOR); + ms->mds_did = pci_get_cfgdata16(si->si_devi, PCIR_DEVICE); + ms->mds_suffix[0] = si->si_suffix; + ms->mds_prefix[0] = si->si_prefix; + + devname_elems++; + } +} + +static void +pci_finish_mptable_names(void) +{ + int size; + + if (devname_elems) { + pci_devnames.md_hdrtype = MPT_HDR_BASE; + pci_devnames.md_entries = devname_elems; + pci_devnames.md_cksum = 0; /* XXX */ + pci_devnames.md_sig = MPT_NTAP_SIG; + + size = (uintptr_t)&pci_devnames.md_slotinfo[devname_elems] - + (uintptr_t)&pci_devnames; + + fbsdrun_add_oemtbl(&pci_devnames, size); + } +} + +static int +pci_emul_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, + uint32_t *eax, void *arg) +{ + struct pci_devinst *pdi = arg; + struct pci_devemu *pe = pdi->pi_d; + int offset, i; + + for (i = 0; i <= PCI_BARMAX; i++) { + if (pdi->pi_bar[i].type == PCIBAR_IO && + port >= pdi->pi_bar[i].addr && + port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { + offset = port - pdi->pi_bar[i].addr; + if (in) + *eax = (*pe->pe_ior)(pdi, i, offset, bytes); + else + (*pe->pe_iow)(pdi, i, offset, bytes, *eax); + return (0); + } + } + return (-1); +} + +static int +pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size, + uint64_t *addr) +{ + uint64_t base; + + assert((size & (size - 1)) == 0); /* must be a power of 2 */ + + base = roundup2(*baseptr, size); + + if (base + size <= limit) { + *addr = base; + *baseptr = base + size; + return (0); + } else + return (-1); +} + +int +pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, uint64_t hostbase, + enum pcibar_type type, uint64_t size) +{ + int i, error; + uint64_t *baseptr, limit, addr, mask, lobits, bar; + struct inout_port iop; + + assert(idx >= 0 && idx <= PCI_BARMAX); + + if ((size & (size - 1)) != 0) + size = 1UL << flsl(size); /* round up to a power of 2 */ + + switch (type) { + case PCIBAR_NONE: + baseptr = NULL; + addr = mask = lobits = 0; + break; + case PCIBAR_IO: + baseptr = &pci_emul_iobase; + limit = PCI_EMUL_IOLIMIT; + mask = PCIM_BAR_IO_BASE; + lobits = PCIM_BAR_IO_SPACE; + break; + case PCIBAR_MEM64: + /* + * XXX + * Some drivers do not work well if the 64-bit BAR is allocated + * above 4GB. Allow for this by allocating small requests under + * 4GB unless then allocation size is larger than some arbitrary + * number (32MB currently). + */ + if (size > 32 * 1024 * 1024) { + /* + * XXX special case for device requiring peer-peer DMA + */ + if (size == 0x100000000UL) + baseptr = &hostbase; + else + baseptr = &pci_emul_membase64; + limit = PCI_EMUL_MEMLIMIT64; + mask = PCIM_BAR_MEM_BASE; + lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | + PCIM_BAR_MEM_PREFETCH; + break; + } + /* fallthrough */ + case PCIBAR_MEM32: + baseptr = &pci_emul_membase32; + limit = PCI_EMUL_MEMLIMIT32; + mask = PCIM_BAR_MEM_BASE; + lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; + break; + default: + printf("pci_emul_alloc_base: invalid bar type %d\n", type); + assert(0); + } + + if (baseptr != NULL) { + error = pci_emul_alloc_resource(baseptr, limit, size, &addr); + if (error != 0) + return (error); + } + + pdi->pi_bar[idx].type = type; + pdi->pi_bar[idx].addr = addr; + pdi->pi_bar[idx].size = size; + + /* Initialize the BAR register in config space */ + bar = (addr & mask) | lobits; + pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar); + + if (type == PCIBAR_MEM64) { + assert(idx + 1 <= PCI_BARMAX); + pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64; + pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); + } + + /* add a handler to intercept accesses to the I/O bar */ + if (type == PCIBAR_IO) { + iop.name = pdi->pi_name; + iop.flags = IOPORT_F_INOUT; + iop.handler = pci_emul_handler; + iop.arg = pdi; + + for (i = 0; i < size; i++) { + iop.port = addr + i; + register_inout(&iop); + } + } + + return (0); +} + +#define CAP_START_OFFSET 0x40 +static int +pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) +{ + int i, capoff, capid, reallen; + uint16_t sts; + + static u_char endofcap[4] = { + PCIY_RESERVED, 0, 0, 0 + }; + + assert(caplen > 0 && capdata[0] != PCIY_RESERVED); + + reallen = roundup2(caplen, 4); /* dword aligned */ + + sts = pci_get_cfgdata16(pi, PCIR_STATUS); + if ((sts & PCIM_STATUS_CAPPRESENT) == 0) { + capoff = CAP_START_OFFSET; + pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff); + pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT); + } else { + capoff = pci_get_cfgdata8(pi, PCIR_CAP_PTR); + while (1) { + assert((capoff & 0x3) == 0); + capid = pci_get_cfgdata8(pi, capoff); + if (capid == PCIY_RESERVED) + break; + capoff = pci_get_cfgdata8(pi, capoff + 1); + } + } + + /* Check if we have enough space */ + if (capoff + reallen + sizeof(endofcap) > PCI_REGMAX + 1) + return (-1); + + /* Copy the capability */ + for (i = 0; i < caplen; i++) + pci_set_cfgdata8(pi, capoff + i, capdata[i]); + + /* Set the next capability pointer */ + pci_set_cfgdata8(pi, capoff + 1, capoff + reallen); + + /* Copy of the reserved capability which serves as the end marker */ + for (i = 0; i < sizeof(endofcap); i++) + pci_set_cfgdata8(pi, capoff + reallen + i, endofcap[i]); + + return (0); +} + +static struct pci_devemu * +pci_emul_finddev(char *name) +{ + struct pci_devemu **pdpp, *pdp; + + SET_FOREACH(pdpp, pci_devemu_set) { + pdp = *pdpp; + if (!strcmp(pdp->pe_emu, name)) { + return (pdp); + } + } + + return (NULL); +} + +static void +pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int slot, char *params) +{ + struct pci_devinst *pdi; + pdi = malloc(sizeof(struct pci_devinst)); + bzero(pdi, sizeof(*pdi)); + + pdi->pi_vmctx = ctx; + pdi->pi_bus = 0; + pdi->pi_slot = slot; + pdi->pi_func = 0; + pdi->pi_d = pde; + snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot); + + /* Disable legacy interrupts */ + pci_set_cfgdata8(pdi, PCIR_INTLINE, 255); + pci_set_cfgdata8(pdi, PCIR_INTPIN, 0); + + pci_set_cfgdata8(pdi, PCIR_COMMAND, + PCIM_CMD_PORTEN | PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN); + + if ((*pde->pe_init)(ctx, pdi, params) != 0) { + free(pdi); + } else { + pci_emul_devices++; + pci_slotinfo[slot].si_devi = pdi; + } +} + +void +pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr) +{ + int mmc; + + CTASSERT(sizeof(struct msicap) == 14); + + /* Number of msi messages must be a power of 2 between 1 and 32 */ + assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32); + mmc = ffs(msgnum) - 1; + + bzero(msicap, sizeof(struct msicap)); + msicap->capid = PCIY_MSI; + msicap->nextptr = nextptr; + msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1); +} + +int +pci_emul_add_msicap(struct pci_devinst *pi, int msgnum) +{ + struct msicap msicap; + + pci_populate_msicap(&msicap, msgnum, 0); + + return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap))); +} + +void +msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, + int bytes, uint32_t val) +{ + uint16_t msgctrl, rwmask, msgdata, mme; + uint32_t addrlo; + + /* + * If guest is writing to the message control register make sure + * we do not overwrite read-only fields. + */ + if ((offset - capoff) == 2 && bytes == 2) { + rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE; + msgctrl = pci_get_cfgdata16(pi, offset); + msgctrl &= ~rwmask; + msgctrl |= val & rwmask; + val = msgctrl; + + addrlo = pci_get_cfgdata32(pi, capoff + 4); + if (msgctrl & PCIM_MSICTRL_64BIT) + msgdata = pci_get_cfgdata16(pi, capoff + 12); + else + msgdata = pci_get_cfgdata16(pi, capoff + 8); + + /* + * XXX check delivery mode, destination mode etc + */ + mme = msgctrl & PCIM_MSICTRL_MME_MASK; + pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; + if (pi->pi_msi.enabled) { + pi->pi_msi.cpu = (addrlo >> 12) & 0xff; + pi->pi_msi.vector = msgdata & 0xff; + pi->pi_msi.msgnum = 1 << (mme >> 4); + } else { + pi->pi_msi.cpu = 0; + pi->pi_msi.vector = 0; + pi->pi_msi.msgnum = 0; + } + } + + CFGWRITE(pi, offset, val, bytes); +} + +/* + * This function assumes that 'coff' is in the capabilities region of the + * config space. + */ +static void +pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val) +{ + int capid; + uint8_t capoff, nextoff; + + /* Do not allow un-aligned writes */ + if ((offset & (bytes - 1)) != 0) + return; + + /* Find the capability that we want to update */ + capoff = CAP_START_OFFSET; + while (1) { + capid = pci_get_cfgdata8(pi, capoff); + if (capid == PCIY_RESERVED) + break; + + nextoff = pci_get_cfgdata8(pi, capoff + 1); + if (offset >= capoff && offset < nextoff) + break; + + capoff = nextoff; + } + assert(offset >= capoff); + + /* + * Capability ID and Next Capability Pointer are readonly + */ + if (offset == capoff || offset == capoff + 1) + return; + + switch (capid) { + case PCIY_MSI: + msicap_cfgwrite(pi, capoff, offset, bytes, val); + break; + default: + break; + } +} + +static int +pci_emul_iscap(struct pci_devinst *pi, int offset) +{ + int found; + uint16_t sts; + uint8_t capid, lastoff; + + found = 0; + sts = pci_get_cfgdata16(pi, PCIR_STATUS); + if ((sts & PCIM_STATUS_CAPPRESENT) != 0) { + lastoff = pci_get_cfgdata8(pi, PCIR_CAP_PTR); + while (1) { + assert((lastoff & 0x3) == 0); + capid = pci_get_cfgdata8(pi, lastoff); + if (capid == PCIY_RESERVED) + break; + lastoff = pci_get_cfgdata8(pi, lastoff + 1); + } + if (offset >= CAP_START_OFFSET && offset <= lastoff) + found = 1; + } + return (found); +} + +void +init_pci(struct vmctx *ctx) +{ + struct pci_devemu *pde; + struct slotinfo *si; + int i; + + pci_emul_iobase = PCI_EMUL_IOBASE; + pci_emul_membase32 = PCI_EMUL_MEMBASE32; + pci_emul_membase64 = PCI_EMUL_MEMBASE64; + + si = pci_slotinfo; + + for (i = 0; i < MAXSLOTS; i++, si++) { + if (si->si_name != NULL) { + pde = pci_emul_finddev(si->si_name); + if (pde != NULL) { + pci_emul_init(ctx, pde, i, si->si_param); + pci_add_mptable_name(si); + } + } + } + pci_finish_mptable_names(); +} + +int +pci_msi_enabled(struct pci_devinst *pi) +{ + return (pi->pi_msi.enabled); +} + +int +pci_msi_msgnum(struct pci_devinst *pi) +{ + if (pi->pi_msi.enabled) + return (pi->pi_msi.msgnum); + else + return (0); +} + +void +pci_generate_msi(struct pci_devinst *pi, int msg) +{ + + if (pci_msi_enabled(pi) && msg < pci_msi_msgnum(pi)) { + vm_lapic_irq(pi->pi_vmctx, + pi->pi_msi.cpu, + pi->pi_msi.vector + msg); + } +} + +static int cfgbus, cfgslot, cfgfunc, cfgoff; + +static int +pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes, + uint32_t *eax, void *arg) +{ + uint32_t x; + + assert(!in); + + if (bytes != 4) + return (-1); + + x = *eax; + cfgoff = x & PCI_REGMAX; + cfgfunc = (x >> 8) & PCI_FUNCMAX; + cfgslot = (x >> 11) & PCI_SLOTMAX; + cfgbus = (x >> 16) & PCI_BUSMAX; + + return (0); +} +INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_OUT, pci_emul_cfgaddr); + +static int +pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, + uint32_t *eax, void *arg) +{ + struct pci_devinst *pi; + struct pci_devemu *pe; + int coff, idx; + uint64_t mask, bar; + + assert(bytes == 1 || bytes == 2 || bytes == 4); + + pi = pci_slotinfo[cfgslot].si_devi; + coff = cfgoff + (port - CONF1_DATA_PORT); + +#if 0 + printf("pcicfg-%s from 0x%0x of %d bytes (%d/%d/%d)\n\r", + in ? "read" : "write", coff, bytes, cfgbus, cfgslot, cfgfunc); +#endif + + if (pi == NULL || cfgfunc != 0) { + if (in) + *eax = 0xffffffff; + return (0); + } + + pe = pi->pi_d; + + /* + * Config read + */ + if (in) { + /* Let the device emulation override the default handler */ + if (pe->pe_cfgread != NULL && + (*pe->pe_cfgread)(ctx, vcpu, pi, coff, bytes, eax) == 0) + return (0); + + if (bytes == 1) + *eax = pci_get_cfgdata8(pi, coff); + else if (bytes == 2) + *eax = pci_get_cfgdata16(pi, coff); + else + *eax = pci_get_cfgdata32(pi, coff); + } else { + /* Let the device emulation override the default handler */ + if (pe->pe_cfgwrite != NULL && + (*pe->pe_cfgwrite)(ctx, vcpu, pi, coff, bytes, *eax) == 0) + return (0); + + /* + * Special handling for write to BAR registers + */ + if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) { + /* + * Ignore writes to BAR registers that are not + * 4-byte aligned. + */ + if (bytes != 4 || (coff & 0x3) != 0) + return (0); + idx = (coff - PCIR_BAR(0)) / 4; + switch (pi->pi_bar[idx].type) { + case PCIBAR_NONE: + bar = 0; + break; + case PCIBAR_IO: + mask = ~(pi->pi_bar[idx].size - 1); + mask &= PCIM_BAR_IO_BASE; + bar = (*eax & mask) | PCIM_BAR_IO_SPACE; + break; + case PCIBAR_MEM32: + mask = ~(pi->pi_bar[idx].size - 1); + mask &= PCIM_BAR_MEM_BASE; + bar = *eax & mask; + bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; + break; + case PCIBAR_MEM64: + mask = ~(pi->pi_bar[idx].size - 1); + mask &= PCIM_BAR_MEM_BASE; + bar = *eax & mask; + bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | + PCIM_BAR_MEM_PREFETCH; + break; + case PCIBAR_MEMHI64: + mask = ~(pi->pi_bar[idx - 1].size - 1); + mask &= PCIM_BAR_MEM_BASE; + bar = ((uint64_t)*eax << 32) & mask; + bar = bar >> 32; + break; + default: + assert(0); + } + pci_set_cfgdata32(pi, coff, bar); + } else if (pci_emul_iscap(pi, coff)) { + pci_emul_capwrite(pi, coff, bytes, *eax); + } else { + CFGWRITE(pi, coff, *eax, bytes); + } + } + + return (0); +} + +INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata); +INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata); +INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata); +INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); + +/* + * I/O ports to configure PCI IRQ routing. We ignore all writes to it. + */ +static int +pci_irq_port_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, + uint32_t *eax, void *arg) +{ + assert(in == 0); + return (0); +} +INOUT_PORT(pci_irq, 0xC00, IOPORT_F_OUT, pci_irq_port_handler); +INOUT_PORT(pci_irq, 0xC01, IOPORT_F_OUT, pci_irq_port_handler); + +#define PCI_EMUL_TEST +#ifdef PCI_EMUL_TEST +/* + * Define a dummy test device + */ +#define DREGSZ 20 +struct pci_emul_dsoftc { + uint8_t regs[DREGSZ]; +}; + +#define PCI_EMUL_MSGS 4 + +int +pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, char *opts) +{ + int error; + struct pci_emul_dsoftc *sc; + + sc = malloc(sizeof(struct pci_emul_dsoftc)); + memset(sc, 0, sizeof(struct pci_emul_dsoftc)); + + pi->pi_arg = sc; + + pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001); + pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD); + pci_set_cfgdata8(pi, PCIR_CLASS, 0x02); + + error = pci_emul_alloc_bar(pi, 0, 0, PCIBAR_IO, DREGSZ); + assert(error == 0); + + error = pci_emul_add_msicap(pi, PCI_EMUL_MSGS); + assert(error == 0); + + return (0); +} + +void +pci_emul_diow(struct pci_devinst *pi, int baridx, int offset, int size, + uint32_t value) +{ + int i; + struct pci_emul_dsoftc *sc = pi->pi_arg; + + if (offset + size > DREGSZ) { + printf("diow: too large, offset %d size %d\n", offset, size); + return; + } + + if (size == 1) { + sc->regs[offset] = value & 0xff; + } else if (size == 2) { + *(uint16_t *)&sc->regs[offset] = value & 0xffff; + } else { + *(uint32_t *)&sc->regs[offset] = value; + } + + /* + * Special magic value to generate an interrupt + */ + if (offset == 4 && size == 4 && pci_msi_enabled(pi)) + pci_generate_msi(pi, value % pci_msi_msgnum(pi)); + + if (value == 0xabcdef) { + for (i = 0; i < pci_msi_msgnum(pi); i++) + pci_generate_msi(pi, i); + } +} + +uint32_t +pci_emul_dior(struct pci_devinst *pi, int baridx, int offset, int size) +{ + struct pci_emul_dsoftc *sc = pi->pi_arg; + uint32_t value; + + if (offset + size > DREGSZ) { + printf("dior: too large, offset %d size %d\n", offset, size); + return (0); + } + + if (size == 1) { + value = sc->regs[offset]; + } else if (size == 2) { + value = *(uint16_t *) &sc->regs[offset]; + } else { + value = *(uint32_t *) &sc->regs[offset]; + } + + return (value); +} + +struct pci_devemu pci_dummy = { + .pe_emu = "dummy", + .pe_init = pci_emul_dinit, + .pe_iow = pci_emul_diow, + .pe_ior = pci_emul_dior +}; +PCI_EMUL_SET(pci_dummy); + +#endif /* PCI_EMUL_TEST */ diff --git a/usr.sbin/bhyve/pci_emul.h b/usr.sbin/bhyve/pci_emul.h new file mode 100644 index 0000000..f5f8e22 --- /dev/null +++ b/usr.sbin/bhyve/pci_emul.h @@ -0,0 +1,171 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _PCI_EMUL_H_ +#define _PCI_EMUL_H_ + +#include +#include +#include + +#include + +#include + +#define PCI_BARMAX PCIR_MAX_BAR_0 /* BAR registers in a Type 0 header */ +#define PCIY_RESERVED 0x00 + +struct vmctx; +struct pci_devinst; + +struct pci_devemu { + char *pe_emu; /* Name of device emulation */ + + /* instance creation */ + int (*pe_init)(struct vmctx *, struct pci_devinst *, char *opts); + + /* config space read/write callbacks */ + int (*pe_cfgwrite)(struct vmctx *ctx, int vcpu, + struct pci_devinst *pi, int offset, + int bytes, uint32_t val); + int (*pe_cfgread)(struct vmctx *ctx, int vcpu, + struct pci_devinst *pi, int offset, + int bytes, uint32_t *retval); + + /* I/O space read/write callbacks */ + void (*pe_iow)(struct pci_devinst *pi, int baridx, + int offset, int size, uint32_t value); + uint32_t (*pe_ior)(struct pci_devinst *pi, int baridx, + int offset, int size); +}; +#define PCI_EMUL_SET(x) DATA_SET(pci_devemu_set, x); + +enum pcibar_type { + PCIBAR_NONE, + PCIBAR_IO, + PCIBAR_MEM32, + PCIBAR_MEM64, + PCIBAR_MEMHI64 +}; + +struct pcibar { + enum pcibar_type type; /* io or memory */ + uint64_t size; + uint64_t addr; +}; + +#define PI_NAMESZ 40 + +struct pci_devinst { + struct pci_devemu *pi_d; + struct vmctx *pi_vmctx; + uint8_t pi_bus, pi_slot, pi_func; + char pi_name[PI_NAMESZ]; + uint16_t pi_iobase; + int pi_bar_getsize; + + struct { + int enabled; + int cpu; + int vector; + int msgnum; + } pi_msi; + + void *pi_arg; /* devemu-private data */ + + u_char pi_cfgdata[PCI_REGMAX + 1]; + struct pcibar pi_bar[PCI_BARMAX + 1]; +}; + +struct msicap { + uint8_t capid; + uint8_t nextptr; + uint16_t msgctrl; + uint32_t addrlo; + uint32_t addrhi; + uint16_t msgdata; +} __packed; + +void init_pci(struct vmctx *ctx); +void pci_parse_slot(char *opt); +void pci_parse_name(char *opt); +void pci_callback(void); +int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, uint64_t hostbase, + enum pcibar_type type, uint64_t size); +int pci_emul_add_msicap(struct pci_devinst *pi, int msgnum); +void msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, + int bytes, uint32_t val); + +void pci_generate_msi(struct pci_devinst *pi, int msgnum); +int pci_msi_enabled(struct pci_devinst *pi); +int pci_msi_msgnum(struct pci_devinst *pi); +void pci_populate_msicap(struct msicap *cap, int msgs, int nextptr); + +static __inline void +pci_set_cfgdata8(struct pci_devinst *pi, int offset, uint8_t val) +{ + assert(offset <= PCI_REGMAX); + *(uint8_t *)(pi->pi_cfgdata + offset) = val; +} + +static __inline void +pci_set_cfgdata16(struct pci_devinst *pi, int offset, uint16_t val) +{ + assert(offset <= (PCI_REGMAX - 1) && (offset & 1) == 0); + *(uint16_t *)(pi->pi_cfgdata + offset) = val; +} + +static __inline void +pci_set_cfgdata32(struct pci_devinst *pi, int offset, uint32_t val) +{ + assert(offset <= (PCI_REGMAX - 3) && (offset & 3) == 0); + *(uint32_t *)(pi->pi_cfgdata + offset) = val; +} + +static __inline uint8_t +pci_get_cfgdata8(struct pci_devinst *pi, int offset) +{ + assert(offset <= PCI_REGMAX); + return (*(uint8_t *)(pi->pi_cfgdata + offset)); +} + +static __inline uint16_t +pci_get_cfgdata16(struct pci_devinst *pi, int offset) +{ + assert(offset <= (PCI_REGMAX - 1) && (offset & 1) == 0); + return (*(uint16_t *)(pi->pi_cfgdata + offset)); +} + +static __inline uint32_t +pci_get_cfgdata32(struct pci_devinst *pi, int offset) +{ + assert(offset <= (PCI_REGMAX - 3) && (offset & 3) == 0); + return (*(uint32_t *)(pi->pi_cfgdata + offset)); +} + +#endif /* _PCI_EMUL_H_ */ diff --git a/usr.sbin/bhyve/pci_hostbridge.c b/usr.sbin/bhyve/pci_hostbridge.c new file mode 100644 index 0000000..c77762d --- /dev/null +++ b/usr.sbin/bhyve/pci_hostbridge.c @@ -0,0 +1,52 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "pci_emul.h" + +static int +pci_hostbridge_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) +{ + + /* config space */ + pci_set_cfgdata16(pi, PCIR_VENDOR, 0x1275); /* NetApp */ + pci_set_cfgdata16(pi, PCIR_DEVICE, 0x1275); /* NetApp */ + pci_set_cfgdata8(pi, PCIR_HDRTYPE, PCIM_HDRTYPE_BRIDGE); + pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_BRIDGE); + pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_BRIDGE_HOST); + + return (0); +} + +struct pci_devemu pci_de_hostbridge = { + .pe_emu = "hostbridge", + .pe_init = pci_hostbridge_init, +}; +PCI_EMUL_SET(pci_de_hostbridge); diff --git a/usr.sbin/bhyve/pci_passthru.c b/usr.sbin/bhyve/pci_passthru.c new file mode 100644 index 0000000..1c417fd --- /dev/null +++ b/usr.sbin/bhyve/pci_passthru.c @@ -0,0 +1,508 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include "pci_emul.h" + +#ifndef _PATH_DEVPCI +#define _PATH_DEVPCI "/dev/pci" +#endif + +#ifndef _PATH_DEVIO +#define _PATH_DEVIO "/dev/io" +#endif + +#define LEGACY_SUPPORT 1 + +static int pcifd = -1; +static int iofd = -1; + +struct passthru_softc { + struct pci_devinst *psc_pi; + struct pcibar psc_bar[PCI_BARMAX + 1]; + struct { + int capoff; + int msgctrl; + int emulated; + } psc_msi; + struct pcisel psc_sel; +}; + +static int +msi_caplen(int msgctrl) +{ + int len; + + len = 10; /* minimum length of msi capability */ + + if (msgctrl & PCIM_MSICTRL_64BIT) + len += 4; + +#if 0 + /* + * Ignore the 'mask' and 'pending' bits in the MSI capability. + * We'll let the guest manipulate them directly. + */ + if (msgctrl & PCIM_MSICTRL_VECTOR) + len += 10; +#endif + + return (len); +} + +static uint32_t +read_config(const struct pcisel *sel, long reg, int width) +{ + struct pci_io pi; + + bzero(&pi, sizeof(pi)); + pi.pi_sel = *sel; + pi.pi_reg = reg; + pi.pi_width = width; + + if (ioctl(pcifd, PCIOCREAD, &pi) < 0) + return (0); /* XXX */ + else + return (pi.pi_data); +} + +static void +write_config(const struct pcisel *sel, long reg, int width, uint32_t data) +{ + struct pci_io pi; + + bzero(&pi, sizeof(pi)); + pi.pi_sel = *sel; + pi.pi_reg = reg; + pi.pi_width = width; + pi.pi_data = data; + + (void)ioctl(pcifd, PCIOCWRITE, &pi); /* XXX */ +} + +#ifdef LEGACY_SUPPORT +static int +passthru_add_msicap(struct pci_devinst *pi, int msgnum, int nextptr) +{ + int capoff, i; + struct msicap msicap; + u_char *capdata; + + pci_populate_msicap(&msicap, msgnum, nextptr); + + /* + * XXX + * Copy the msi capability structure in the last 16 bytes of the + * config space. This is wrong because it could shadow something + * useful to the device. + */ + capoff = 256 - roundup(sizeof(msicap), 4); + capdata = (u_char *)&msicap; + for (i = 0; i < sizeof(msicap); i++) + pci_set_cfgdata8(pi, capoff + i, capdata[i]); + + return (capoff); +} +#endif /* LEGACY_SUPPORT */ + +static int +cfginitmsi(struct passthru_softc *sc) +{ + int ptr, cap, sts, caplen; + uint32_t u32; + struct pcisel sel; + struct pci_devinst *pi; + + pi = sc->psc_pi; + sel = sc->psc_sel; + + /* + * Parse the capabilities and cache the location of the MSI + * capability. + */ + sts = read_config(&sel, PCIR_STATUS, 2); + if (sts & PCIM_STATUS_CAPPRESENT) { + ptr = read_config(&sel, PCIR_CAP_PTR, 1); + while (ptr != 0 && ptr != 0xff) { + cap = read_config(&sel, ptr + PCICAP_ID, 1); + if (cap == PCIY_MSI) { + /* + * Copy the MSI capability into the config + * space of the emulated pci device + */ + sc->psc_msi.capoff = ptr; + sc->psc_msi.msgctrl = read_config(&sel, + ptr + 2, 2); + sc->psc_msi.emulated = 0; + caplen = msi_caplen(sc->psc_msi.msgctrl); + while (caplen > 0) { + u32 = read_config(&sel, ptr, 4); + pci_set_cfgdata32(pi, ptr, u32); + caplen -= 4; + ptr += 4; + } + break; + } + ptr = read_config(&sel, ptr + PCICAP_NEXTPTR, 1); + } + } + +#ifdef LEGACY_SUPPORT + /* + * If the passthrough device does not support MSI then craft a + * MSI capability for it. We link the new MSI capability at the + * head of the list of capabilities. + */ + if ((sts & PCIM_STATUS_CAPPRESENT) != 0 && sc->psc_msi.capoff == 0) { + int origptr, msiptr; + origptr = read_config(&sel, PCIR_CAP_PTR, 1); + msiptr = passthru_add_msicap(pi, 1, origptr); + sc->psc_msi.capoff = msiptr; + sc->psc_msi.msgctrl = pci_get_cfgdata16(pi, msiptr + 2); + sc->psc_msi.emulated = 1; + pci_set_cfgdata8(pi, PCIR_CAP_PTR, msiptr); + } +#endif + + if (sc->psc_msi.capoff == 0) /* MSI or bust */ + return (-1); + else + return (0); +} + +static int +cfginitbar(struct vmctx *ctx, struct passthru_softc *sc) +{ + int i, error; + struct pci_devinst *pi; + struct pci_bar_io bar; + enum pcibar_type bartype; + uint64_t base; + + pi = sc->psc_pi; + + /* + * Initialize BAR registers + */ + for (i = 0; i <= PCI_BARMAX; i++) { + bzero(&bar, sizeof(bar)); + bar.pbi_sel = sc->psc_sel; + bar.pbi_reg = PCIR_BAR(i); + + if (ioctl(pcifd, PCIOCGETBAR, &bar) < 0) + continue; + + if (PCI_BAR_IO(bar.pbi_base)) { + bartype = PCIBAR_IO; + base = bar.pbi_base & PCIM_BAR_IO_BASE; + } else { + switch (bar.pbi_base & PCIM_BAR_MEM_TYPE) { + case PCIM_BAR_MEM_64: + bartype = PCIBAR_MEM64; + break; + default: + bartype = PCIBAR_MEM32; + break; + } + base = bar.pbi_base & PCIM_BAR_MEM_BASE; + } + + /* Cache information about the "real" BAR */ + sc->psc_bar[i].type = bartype; + sc->psc_bar[i].size = bar.pbi_length; + sc->psc_bar[i].addr = base; + + /* Allocate the BAR in the guest I/O or MMIO space */ + error = pci_emul_alloc_bar(pi, i, base, bartype, + bar.pbi_length); + if (error) + return (-1); + + /* + * Map the physical MMIO space in the guest MMIO space + */ + if (bartype != PCIBAR_IO) { + error = vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus, + sc->psc_sel.pc_dev, sc->psc_sel.pc_func, + pi->pi_bar[i].addr, pi->pi_bar[i].size, base); + if (error) + return (-1); + } + + /* + * 64-bit BAR takes up two slots so skip the next one. + */ + if (bartype == PCIBAR_MEM64) { + i++; + assert(i <= PCI_BARMAX); + sc->psc_bar[i].type = PCIBAR_MEMHI64; + } + } + return (0); +} + +static int +cfginit(struct vmctx *ctx, struct pci_devinst *pi, int bus, int slot, int func) +{ + int error; + struct passthru_softc *sc; + + error = 1; + sc = pi->pi_arg; + + bzero(&sc->psc_sel, sizeof(struct pcisel)); + sc->psc_sel.pc_bus = bus; + sc->psc_sel.pc_dev = slot; + sc->psc_sel.pc_func = func; + + if (cfginitbar(ctx, sc) != 0) + goto done; + + if (cfginitmsi(sc) != 0) + goto done; + + error = 0; /* success */ +done: + return (error); +} + +static int +passthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) +{ + int bus, slot, func, error; + struct passthru_softc *sc; + + sc = NULL; + error = 1; + + if (pcifd < 0) { + pcifd = open(_PATH_DEVPCI, O_RDWR, 0); + if (pcifd < 0) + goto done; + } + + if (iofd < 0) { + iofd = open(_PATH_DEVIO, O_RDWR, 0); + if (iofd < 0) + goto done; + } + + if (opts == NULL || sscanf(opts, "%d/%d/%d", &bus, &slot, &func) != 3) + goto done; + + if (vm_assign_pptdev(ctx, bus, slot, func) != 0) + goto done; + + sc = malloc(sizeof(struct passthru_softc)); + memset(sc, 0, sizeof(struct passthru_softc)); + + pi->pi_arg = sc; + sc->psc_pi = pi; + + /* initialize config space */ + if (cfginit(ctx, pi, bus, slot, func) != 0) + goto done; + + error = 0; /* success */ +done: + if (error) { + free(sc); + vm_unassign_pptdev(ctx, bus, slot, func); + } + return (error); +} + +static int +bar_access(int coff) +{ + if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) + return (1); + else + return (0); +} + +static int +msicap_access(struct passthru_softc *sc, int coff) +{ + int caplen; + + if (sc->psc_msi.capoff == 0) + return (0); + + caplen = msi_caplen(sc->psc_msi.msgctrl); + + if (coff >= sc->psc_msi.capoff && coff < sc->psc_msi.capoff + caplen) + return (1); + else + return (0); +} + +static int +passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff, + int bytes, uint32_t *rv) +{ + struct passthru_softc *sc; + + sc = pi->pi_arg; + + /* + * PCI BARs and MSI capability is emulated. + */ + if (bar_access(coff) || msicap_access(sc, coff)) + return (-1); + +#ifdef LEGACY_SUPPORT + /* + * Emulate PCIR_CAP_PTR if this device does not support MSI capability + * natively. + */ + if (sc->psc_msi.emulated) { + if (coff >= PCIR_CAP_PTR && coff < PCIR_CAP_PTR + 4) + return (-1); + } +#endif + + /* Everything else just read from the device's config space */ + *rv = read_config(&sc->psc_sel, coff, bytes); + + return (0); +} + +static int +passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff, + int bytes, uint32_t val) +{ + int error; + struct passthru_softc *sc; + + sc = pi->pi_arg; + + /* + * PCI BARs are emulated + */ + if (bar_access(coff)) + return (-1); + + /* + * MSI capability is emulated + */ + if (msicap_access(sc, coff)) { + msicap_cfgwrite(pi, sc->psc_msi.capoff, coff, bytes, val); + + error = vm_setup_msi(ctx, vcpu, sc->psc_sel.pc_bus, + sc->psc_sel.pc_dev, sc->psc_sel.pc_func, pi->pi_msi.cpu, + pi->pi_msi.vector, pi->pi_msi.msgnum); + if (error != 0) { + printf("vm_setup_msi returned error %d\r\n", errno); + exit(1); + } + return (0); + } + +#ifdef LEGACY_SUPPORT + /* + * If this device does not support MSI natively then we cannot let + * the guest disable legacy interrupts from the device. It is the + * legacy interrupt that is triggering the virtual MSI to the guest. + */ + if (sc->psc_msi.emulated && pci_msi_enabled(pi)) { + if (coff == PCIR_COMMAND && bytes == 2) + val &= ~PCIM_CMD_INTxDIS; + } +#endif + + write_config(&sc->psc_sel, coff, bytes, val); + + return (0); +} + +static void +passthru_iow(struct pci_devinst *pi, int baridx, int offset, int size, + uint32_t value) +{ + struct passthru_softc *sc; + struct iodev_pio_req pio; + + sc = pi->pi_arg; + + bzero(&pio, sizeof(struct iodev_pio_req)); + pio.access = IODEV_PIO_WRITE; + pio.port = sc->psc_bar[baridx].addr + offset; + pio.width = size; + pio.val = value; + + (void)ioctl(iofd, IODEV_PIO, &pio); +} + +static uint32_t +passthru_ior(struct pci_devinst *pi, int baridx, int offset, int size) +{ + struct passthru_softc *sc; + struct iodev_pio_req pio; + + sc = pi->pi_arg; + + bzero(&pio, sizeof(struct iodev_pio_req)); + pio.access = IODEV_PIO_READ; + pio.port = sc->psc_bar[baridx].addr + offset; + pio.width = size; + pio.val = 0; + + (void)ioctl(iofd, IODEV_PIO, &pio); + + return (pio.val); +} + +struct pci_devemu passthru = { + .pe_emu = "passthru", + .pe_init = passthru_init, + .pe_cfgwrite = passthru_cfgwrite, + .pe_cfgread = passthru_cfgread, + .pe_iow = passthru_iow, + .pe_ior = passthru_ior, +}; +PCI_EMUL_SET(passthru); diff --git a/usr.sbin/bhyve/pci_virtio_block.c b/usr.sbin/bhyve/pci_virtio_block.c new file mode 100644 index 0000000..b86e21d --- /dev/null +++ b/usr.sbin/bhyve/pci_virtio_block.c @@ -0,0 +1,502 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fbsdrun.h" +#include "pci_emul.h" +#include "virtio.h" + +#define VTBLK_RINGSZ 64 + +#define VTBLK_CFGSZ 28 + +#define VTBLK_R_CFG VTCFG_R_CFG0 +#define VTBLK_R_CFG_END VTBLK_R_CFG + VTBLK_CFGSZ -1 +#define VTBLK_R_MAX VTBLK_R_CFG_END + +#define VTBLK_REGSZ VTBLK_R_MAX+1 + +#define VTBLK_MAXSEGS 32 + +#define VTBLK_S_OK 0 +#define VTBLK_S_IOERR 1 + +/* + * Host capabilities + */ +#define VTBLK_S_HOSTCAPS \ + ( 0x00000004 | /* host maximum request segments */ \ + 0x10000000 ) /* supports indirect descriptors */ + +struct vring_hqueue { + /* Internal state */ + uint16_t hq_size; + uint16_t hq_cur_aidx; /* trails behind 'avail_idx' */ + + /* Host-context pointers to the queue */ + struct virtio_desc *hq_dtable; + uint16_t *hq_avail_flags; + uint16_t *hq_avail_idx; /* monotonically increasing */ + uint16_t *hq_avail_ring; + + uint16_t *hq_used_flags; + uint16_t *hq_used_idx; /* monotonically increasing */ + struct virtio_used *hq_used_ring; +}; + +/* + * Config space + */ +struct vtblk_config { + uint64_t vbc_capacity; + uint32_t vbc_size_max; + uint32_t vbc_seg_max; + uint16_t vbc_geom_c; + uint8_t vbc_geom_h; + uint8_t vbc_geom_s; + uint32_t vbc_blk_size; + uint32_t vbc_sectors_max; +} __packed; +CTASSERT(sizeof(struct vtblk_config) == VTBLK_CFGSZ); + +/* + * Fixed-size block header + */ +struct virtio_blk_hdr { +#define VBH_OP_READ 0 +#define VBH_OP_WRITE 1 + uint32_t vbh_type; + uint32_t vbh_ioprio; + uint64_t vbh_sector; +} __packed; + +/* + * Debug printf + */ +static int pci_vtblk_debug; +#define DPRINTF(params) if (pci_vtblk_debug) printf params +#define WPRINTF(params) printf params + +/* + * Per-device softc + */ +struct pci_vtblk_softc { + struct pci_devinst *vbsc_pi; + int vbsc_fd; + int vbsc_status; + int vbsc_isr; + int vbsc_lastq; + uint32_t vbsc_features; + uint64_t vbsc_pfn; + struct vring_hqueue vbsc_q; + struct vtblk_config vbsc_cfg; +}; + +/* + * Return the number of available descriptors in the vring taking care + * of the 16-bit index wraparound. + */ +static int +hq_num_avail(struct vring_hqueue *hq) +{ + int ndesc; + + if (*hq->hq_avail_idx >= hq->hq_cur_aidx) + ndesc = *hq->hq_avail_idx - hq->hq_cur_aidx; + else + ndesc = UINT16_MAX - hq->hq_cur_aidx + *hq->hq_avail_idx + 1; + + assert(ndesc >= 0 && ndesc <= hq->hq_size); + + return (ndesc); +} + +static void +pci_vtblk_update_status(struct pci_vtblk_softc *sc, uint32_t value) +{ + if (value == 0) { + DPRINTF(("vtblk: device reset requested !\n")); + } + + sc->vbsc_status = value; +} + +static void +pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vring_hqueue *hq) +{ + struct iovec iov[VTBLK_MAXSEGS]; + struct virtio_blk_hdr *vbh; + struct virtio_desc *vd, *vid; + struct virtio_used *vu; + uint8_t *status; + int i; + int err; + int iolen; + int nsegs; + int uidx, aidx, didx; + int writeop; + off_t offset; + + uidx = *hq->hq_used_idx; + aidx = hq->hq_cur_aidx; + didx = hq->hq_avail_ring[aidx % hq->hq_size]; + assert(didx >= 0 && didx < hq->hq_size); + + vd = &hq->hq_dtable[didx]; + + /* + * Verify that the descriptor is indirect, and obtain + * the pointer to the indirect descriptor. + * There has to be space for at least 3 descriptors + * in the indirect descriptor array: the block header, + * 1 or more data descriptors, and a status byte. + */ + assert(vd->vd_flags & VRING_DESC_F_INDIRECT); + + nsegs = vd->vd_len / sizeof(struct virtio_desc); + assert(nsegs >= 3); + assert(nsegs < VTBLK_MAXSEGS + 2); + + vid = paddr_guest2host(vd->vd_addr); + assert((vid->vd_flags & VRING_DESC_F_INDIRECT) == 0); + + /* + * The first descriptor will be the read-only fixed header + */ + vbh = paddr_guest2host(vid[0].vd_addr); + assert(vid[0].vd_len == sizeof(struct virtio_blk_hdr)); + assert(vid[0].vd_flags & VRING_DESC_F_NEXT); + assert((vid[0].vd_flags & VRING_DESC_F_WRITE) == 0); + + writeop = (vbh->vbh_type == VBH_OP_WRITE); + + offset = vbh->vbh_sector * DEV_BSIZE; + + /* + * Build up the iovec based on the guest's data descriptors + */ + for (i = 1, iolen = 0; i < nsegs - 1; i++) { + iov[i-1].iov_base = paddr_guest2host(vid[i].vd_addr); + iov[i-1].iov_len = vid[i].vd_len; + iolen += vid[i].vd_len; + + assert(vid[i].vd_flags & VRING_DESC_F_NEXT); + assert((vid[i].vd_flags & VRING_DESC_F_INDIRECT) == 0); + + /* + * - write op implies read-only descriptor, + * - read op implies write-only descriptor, + * therefore test the inverse of the descriptor bit + * to the op. + */ + assert(((vid[i].vd_flags & VRING_DESC_F_WRITE) == 0) == + writeop); + } + + /* Lastly, get the address of the status byte */ + status = paddr_guest2host(vid[nsegs - 1].vd_addr); + assert(vid[nsegs - 1].vd_len == 1); + assert((vid[nsegs - 1].vd_flags & VRING_DESC_F_NEXT) == 0); + assert(vid[nsegs - 1].vd_flags & VRING_DESC_F_WRITE); + + DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r", + writeop ? "write" : "read", iolen, nsegs - 2, offset)); + + if (writeop){ + err = pwritev(sc->vbsc_fd, iov, nsegs - 2, offset); + } else { + err = preadv(sc->vbsc_fd, iov, nsegs - 2, offset); + } + + *status = err < 0 ? VTBLK_S_IOERR : VTBLK_S_OK; + + /* + * Return the single indirect descriptor back to the host + */ + vu = &hq->hq_used_ring[uidx % hq->hq_size]; + vu->vu_idx = didx; + vu->vu_tlen = 1; + hq->hq_cur_aidx++; + *hq->hq_used_idx += 1; +} + +static void +pci_vtblk_qnotify(struct pci_vtblk_softc *sc) +{ + struct vring_hqueue *hq = &sc->vbsc_q; + int i; + int ndescs; + + /* + * Calculate number of ring entries to process + */ + ndescs = hq_num_avail(hq); + + if (ndescs == 0) + return; + + /* + * Run through all the entries, placing them into iovecs and + * sending when an end-of-packet is found + */ + for (i = 0; i < ndescs; i++) + pci_vtblk_proc(sc, hq); + + /* + * Generate an interrupt if able + */ + if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0 && + sc->vbsc_isr == 0) { + sc->vbsc_isr = 1; + pci_generate_msi(sc->vbsc_pi, 0); + } + +} + +static void +pci_vtblk_ring_init(struct pci_vtblk_softc *sc, uint64_t pfn) +{ + struct vring_hqueue *hq; + + sc->vbsc_pfn = pfn << VRING_PFN; + + /* + * Set up host pointers to the various parts of the + * queue + */ + hq = &sc->vbsc_q; + hq->hq_size = VTBLK_RINGSZ; + + hq->hq_dtable = paddr_guest2host(pfn << VRING_PFN); + hq->hq_avail_flags = (uint16_t *)(hq->hq_dtable + hq->hq_size); + hq->hq_avail_idx = hq->hq_avail_flags + 1; + hq->hq_avail_ring = hq->hq_avail_flags + 2; + hq->hq_used_flags = (uint16_t *)roundup2((uintptr_t)hq->hq_avail_ring, + VRING_ALIGN); + hq->hq_used_idx = hq->hq_used_flags + 1; + hq->hq_used_ring = (struct virtio_used *)(hq->hq_used_flags + 2); + + /* + * Initialize queue indexes + */ + hq->hq_cur_aidx = 0; +} + +static int +pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) +{ + struct stat sbuf; + struct pci_vtblk_softc *sc; + int fd; + + if (opts == NULL) { + printf("virtio-block: backing device required\n"); + return (1); + } + + /* + * Access to guest memory is required. Fail if + * memory not mapped + */ + if (paddr_guest2host(0) == NULL) + return (1); + + /* + * The supplied backing file has to exist + */ + fd = open(opts, O_RDWR); + if (fd < 0) { + perror("Could not open backing file"); + return (1); + } + + if (fstat(fd, &sbuf) < 0) { + perror("Could not stat backing file"); + close(fd); + return (1); + } + + sc = malloc(sizeof(struct pci_vtblk_softc)); + memset(sc, 0, sizeof(struct pci_vtblk_softc)); + + pi->pi_arg = sc; + sc->vbsc_pi = pi; + sc->vbsc_fd = fd; + + /* setup virtio block config space */ + sc->vbsc_cfg.vbc_capacity = sbuf.st_size / DEV_BSIZE; + sc->vbsc_cfg.vbc_seg_max = VTBLK_MAXSEGS; + sc->vbsc_cfg.vbc_blk_size = DEV_BSIZE; + sc->vbsc_cfg.vbc_size_max = 0; /* not negotiated */ + sc->vbsc_cfg.vbc_geom_c = 0; /* no geometry */ + sc->vbsc_cfg.vbc_geom_h = 0; + sc->vbsc_cfg.vbc_geom_s = 0; + sc->vbsc_cfg.vbc_sectors_max = 0; + + /* initialize config space */ + pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK); + pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); + pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); + pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK); + pci_emul_alloc_bar(pi, 0, 0, PCIBAR_IO, VTBLK_REGSZ); + pci_emul_add_msicap(pi, 1); + + return (0); +} + +static void +pci_vtblk_write(struct pci_devinst *pi, int baridx, int offset, int size, + uint32_t value) +{ + struct pci_vtblk_softc *sc = pi->pi_arg; + + if (offset + size > VTBLK_REGSZ) { + DPRINTF(("vtblk_write: 2big, offset %d size %d\n", + offset, size)); + return; + } + + switch (offset) { + case VTCFG_R_GUESTCAP: + assert(size == 4); + sc->vbsc_features = value & VTBLK_S_HOSTCAPS; + break; + case VTCFG_R_PFN: + assert(size == 4); + pci_vtblk_ring_init(sc, value); + break; + case VTCFG_R_QSEL: + assert(size == 2); + sc->vbsc_lastq = value; + break; + case VTCFG_R_QNOTIFY: + assert(size == 2); + assert(value == 0); + pci_vtblk_qnotify(sc); + break; + case VTCFG_R_STATUS: + assert(size == 1); + pci_vtblk_update_status(sc, value); + break; + case VTCFG_R_HOSTCAP: + case VTCFG_R_QNUM: + case VTCFG_R_ISR: + case VTBLK_R_CFG ... VTBLK_R_CFG_END: + DPRINTF(("vtblk: write to readonly reg %d\n\r", offset)); + break; + default: + DPRINTF(("vtblk: unknown i/o write offset %d\n\r", offset)); + value = 0; + break; + } +} + +uint32_t +pci_vtblk_read(struct pci_devinst *pi, int baridx, int offset, int size) +{ + struct pci_vtblk_softc *sc = pi->pi_arg; + uint32_t value; + + if (offset + size > VTBLK_REGSZ) { + DPRINTF(("vtblk_read: 2big, offset %d size %d\n", + offset, size)); + return (0); + } + + switch (offset) { + case VTCFG_R_HOSTCAP: + assert(size == 4); + value = VTBLK_S_HOSTCAPS; + break; + case VTCFG_R_GUESTCAP: + assert(size == 4); + value = sc->vbsc_features; /* XXX never read ? */ + break; + case VTCFG_R_PFN: + assert(size == 4); + value = sc->vbsc_pfn >> VRING_PFN; + break; + case VTCFG_R_QNUM: + value = (sc->vbsc_lastq == 0) ? VTBLK_RINGSZ: 0; + break; + case VTCFG_R_QSEL: + assert(size == 2); + value = sc->vbsc_lastq; /* XXX never read ? */ + break; + case VTCFG_R_QNOTIFY: + assert(size == 2); + value = 0; /* XXX never read ? */ + break; + case VTCFG_R_STATUS: + assert(size == 1); + value = sc->vbsc_status; + break; + case VTCFG_R_ISR: + assert(size == 1); + value = sc->vbsc_isr; + sc->vbsc_isr = 0; /* a read clears this flag */ + break; + case VTBLK_R_CFG ... VTBLK_R_CFG_END: + assert(size == 1); + value = *((uint8_t *)&sc->vbsc_cfg + offset - VTBLK_R_CFG); + break; + default: + DPRINTF(("vtblk: unknown i/o read offset %d\n\r", offset)); + value = 0; + break; + } + + return (value); +} + +struct pci_devemu pci_de_vblk = { + .pe_emu = "virtio-blk", + .pe_init = pci_vtblk_init, + .pe_iow = pci_vtblk_write, + .pe_ior = pci_vtblk_read, +}; +PCI_EMUL_SET(pci_de_vblk); diff --git a/usr.sbin/bhyve/pci_virtio_net.c b/usr.sbin/bhyve/pci_virtio_net.c new file mode 100644 index 0000000..5db1eb7 --- /dev/null +++ b/usr.sbin/bhyve/pci_virtio_net.c @@ -0,0 +1,739 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fbsdrun.h" +#include "pci_emul.h" +#include "mevent.h" +#include "virtio.h" + +#define VTNET_RINGSZ 256 + +#define VTNET_MAXSEGS 32 + +/* + * PCI config-space register offsets + */ +#define VTNET_R_CFG0 20 +#define VTNET_R_CFG1 21 +#define VTNET_R_CFG2 22 +#define VTNET_R_CFG3 23 +#define VTNET_R_CFG4 24 +#define VTNET_R_CFG5 25 +#define VTNET_R_CFG6 26 +#define VTNET_R_CFG7 27 +#define VTNET_R_MAX 27 + +#define VTNET_REGSZ VTNET_R_MAX+1 + +/* + * Host capabilities + */ +#define VTNET_S_HOSTCAPS \ + ( 0x00000020 | /* host supplies MAC */ \ + 0x00008000 | /* host can merge Rx buffers */ \ + 0x00010000 ) /* config status available */ + +/* + * Queue definitions. + */ +#define VTNET_RXQ 0 +#define VTNET_TXQ 1 +#define VTNET_CTLQ 2 + +#define VTNET_MAXQ 3 + +struct vring_hqueue { + /* Internal state */ + uint16_t hq_size; + uint16_t hq_cur_aidx; /* trails behind 'avail_idx' */ + + /* Host-context pointers to the queue */ + struct virtio_desc *hq_dtable; + uint16_t *hq_avail_flags; + uint16_t *hq_avail_idx; /* monotonically increasing */ + uint16_t *hq_avail_ring; + + uint16_t *hq_used_flags; + uint16_t *hq_used_idx; /* monotonically increasing */ + struct virtio_used *hq_used_ring; +}; + +/* + * Fixed network header size + */ +struct virtio_net_rxhdr { + uint8_t vrh_flags; + uint8_t vrh_gso_type; + uint16_t vrh_hdr_len; + uint16_t vrh_gso_size; + uint16_t vrh_csum_start; + uint16_t vrh_csum_offset; + uint16_t vrh_bufs; +} __packed; + +/* + * Debug printf + */ +static int pci_vtnet_debug; +#define DPRINTF(params) if (pci_vtnet_debug) printf params +#define WPRINTF(params) printf params + +/* + * Per-device softc + */ +struct pci_vtnet_softc { + struct pci_devinst *vsc_pi; + pthread_mutex_t vsc_mtx; + struct mevent *vsc_mevp; + + int vsc_curq; + int vsc_status; + int vsc_isr; + int vsc_tapfd; + int vsc_rx_ready; + int vsc_rxpend; + + uint32_t vsc_features; + uint8_t vsc_macaddr[6]; + + uint64_t vsc_pfn[VTNET_MAXQ]; + struct vring_hqueue vsc_hq[VTNET_MAXQ]; +}; + +/* + * Return the number of available descriptors in the vring taking care + * of the 16-bit index wraparound. + */ +static int +hq_num_avail(struct vring_hqueue *hq) +{ + int ndesc; + + if (*hq->hq_avail_idx >= hq->hq_cur_aidx) + ndesc = *hq->hq_avail_idx - hq->hq_cur_aidx; + else + ndesc = UINT16_MAX - hq->hq_cur_aidx + *hq->hq_avail_idx + 1; + + assert(ndesc >= 0 && ndesc <= hq->hq_size); + + return (ndesc); +} + +static uint16_t +pci_vtnet_qsize(int qnum) +{ + /* XXX no ctl queue currently */ + if (qnum == VTNET_CTLQ) { + return (0); + } + + /* XXX fixed currently. Maybe different for tx/rx/ctl */ + return (VTNET_RINGSZ); +} + +static void +pci_vtnet_update_status(struct pci_vtnet_softc *sc, uint32_t value) +{ + if (value == 0) { + DPRINTF(("vtnet: device reset requested !\n")); + } + + sc->vsc_status = value; +} + +/* + * Called to send a buffer chain out to the tap device + */ +static void +pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt, + int len) +{ + char pad[60]; + + if (sc->vsc_tapfd == -1) + return; + + /* + * If the length is < 60, pad out to that and add the + * extra zero'd segment to the iov. It is guaranteed that + * there is always an extra iov available by the caller. + */ + if (len < 60) { + memset(pad, 0, 60 - len); + iov[iovcnt].iov_base = pad; + iov[iovcnt].iov_len = 60 - len; + iovcnt++; + } + (void) writev(sc->vsc_tapfd, iov, iovcnt); +} + +/* + * Called when there is read activity on the tap file descriptor. + * Each buffer posted by the guest is assumed to be able to contain + * an entire ethernet frame + rx header. + * MP note: the dummybuf is only used for discarding frames, so there + * is no need for it to be per-vtnet or locked. + */ +static uint8_t dummybuf[2048]; + +static void +pci_vtnet_tap_rx(struct pci_vtnet_softc *sc) +{ + struct virtio_desc *vd; + struct virtio_used *vu; + struct vring_hqueue *hq; + struct virtio_net_rxhdr *vrx; + uint8_t *buf; + int i; + int len; + int ndescs; + int didx, uidx, aidx; /* descriptor, avail and used index */ + + /* + * Should never be called without a valid tap fd + */ + assert(sc->vsc_tapfd != -1); + + /* + * But, will be called when the rx ring hasn't yet + * been set up. + */ + if (sc->vsc_rx_ready == 0) { + /* + * Drop the packet and try later. + */ + (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf)); + return; + } + + /* + * Calculate the number of available rx buffers + */ + hq = &sc->vsc_hq[VTNET_RXQ]; + + ndescs = hq_num_avail(hq); + + if (ndescs == 0) { + /* + * Need to wait for host notification to read + */ + if (sc->vsc_rxpend == 0) { + WPRINTF(("vtnet: no rx descriptors !\n")); + sc->vsc_rxpend = 1; + } + + /* + * Drop the packet and try later + */ + (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf)); + return; + } + + aidx = hq->hq_cur_aidx; + uidx = *hq->hq_used_idx; + for (i = 0; i < ndescs; i++) { + /* + * 'aidx' indexes into the an array of descriptor indexes + */ + didx = hq->hq_avail_ring[aidx % hq->hq_size]; + assert(didx >= 0 && didx < hq->hq_size); + + vd = &hq->hq_dtable[didx]; + + /* + * Get a pointer to the rx header, and use the + * data immediately following it for the packet buffer. + */ + vrx = (struct virtio_net_rxhdr *)paddr_guest2host(vd->vd_addr); + buf = (uint8_t *)(vrx + 1); + + len = read(sc->vsc_tapfd, buf, + vd->vd_len - sizeof(struct virtio_net_rxhdr)); + + if (len < 0 && errno == EWOULDBLOCK) { + break; + } + + /* + * The only valid field in the rx packet header is the + * number of buffers, which is always 1 without TSO + * support. + */ + memset(vrx, 0, sizeof(struct virtio_net_rxhdr)); + vrx->vrh_bufs = 1; + + /* + * Write this descriptor into the used ring + */ + vu = &hq->hq_used_ring[uidx % hq->hq_size]; + vu->vu_idx = didx; + vu->vu_tlen = len + sizeof(struct virtio_net_rxhdr); + uidx++; + aidx++; + } + + /* + * Update the used pointer, and signal an interrupt if allowed + */ + *hq->hq_used_idx = uidx; + hq->hq_cur_aidx = aidx; + + if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) { + sc->vsc_isr |= 1; + pci_generate_msi(sc->vsc_pi, 0); + } +} + +static void +pci_vtnet_tap_callback(int fd, enum ev_type type, void *param) +{ + struct pci_vtnet_softc *sc = param; + + pthread_mutex_lock(&sc->vsc_mtx); + pci_vtnet_tap_rx(sc); + pthread_mutex_unlock(&sc->vsc_mtx); + +} + +static void +pci_vtnet_ping_rxq(struct pci_vtnet_softc *sc) +{ + /* + * A qnotify means that the rx process can now begin + */ + if (sc->vsc_rx_ready == 0) { + sc->vsc_rx_ready = 1; + } + + /* + * If the rx queue was empty, attempt to receive a + * packet that was previously blocked due to no rx bufs + * available + */ + if (sc->vsc_rxpend) { + WPRINTF(("vtnet: rx resumed\n\r")); + sc->vsc_rxpend = 0; + pci_vtnet_tap_rx(sc); + } +} + +static void +pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vring_hqueue *hq) +{ + struct iovec iov[VTNET_MAXSEGS + 1]; + struct virtio_desc *vd; + struct virtio_used *vu; + int i; + int plen; + int tlen; + int uidx, aidx, didx; + + uidx = *hq->hq_used_idx; + aidx = hq->hq_cur_aidx; + didx = hq->hq_avail_ring[aidx % hq->hq_size]; + assert(didx >= 0 && didx < hq->hq_size); + + vd = &hq->hq_dtable[didx]; + + /* + * Run through the chain of descriptors, ignoring the + * first header descriptor. However, include the header + * length in the total length that will be put into the + * used queue. + */ + tlen = vd->vd_len; + vd = &hq->hq_dtable[vd->vd_next]; + + for (i = 0, plen = 0; + i < VTNET_MAXSEGS; + i++, vd = &hq->hq_dtable[vd->vd_next]) { + iov[i].iov_base = paddr_guest2host(vd->vd_addr); + iov[i].iov_len = vd->vd_len; + plen += vd->vd_len; + tlen += vd->vd_len; + + if ((vd->vd_flags & VRING_DESC_F_NEXT) == 0) + break; + } + assert(i < VTNET_MAXSEGS); + + DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, i + 1)); + pci_vtnet_tap_tx(sc, iov, i + 1, plen); + + /* + * Return this chain back to the host + */ + vu = &hq->hq_used_ring[uidx % hq->hq_size]; + vu->vu_idx = didx; + vu->vu_tlen = tlen; + hq->hq_cur_aidx = aidx + 1; + *hq->hq_used_idx = uidx + 1; + + /* + * Generate an interrupt if able + */ + if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) { + sc->vsc_isr |= 1; + pci_generate_msi(sc->vsc_pi, 0); + } +} + +static void +pci_vtnet_ping_txq(struct pci_vtnet_softc *sc) +{ + struct vring_hqueue *hq = &sc->vsc_hq[VTNET_TXQ]; + int i; + int ndescs; + + /* + * Calculate number of ring entries to process + */ + ndescs = hq_num_avail(hq); + + if (ndescs == 0) + return; + + /* + * Run through all the entries, placing them into iovecs and + * sending when an end-of-packet is found + */ + for (i = 0; i < ndescs; i++) + pci_vtnet_proctx(sc, hq); +} + +static void +pci_vtnet_ping_ctlq(struct pci_vtnet_softc *sc) +{ + + DPRINTF(("vtnet: control qnotify!\n\r")); +} + +static void +pci_vtnet_ring_init(struct pci_vtnet_softc *sc, uint64_t pfn) +{ + struct vring_hqueue *hq; + int qnum = sc->vsc_curq; + + assert(qnum < VTNET_MAXQ); + + sc->vsc_pfn[qnum] = pfn << VRING_PFN; + + /* + * Set up host pointers to the various parts of the + * queue + */ + hq = &sc->vsc_hq[qnum]; + hq->hq_size = pci_vtnet_qsize(qnum); + + hq->hq_dtable = paddr_guest2host(pfn << VRING_PFN); + hq->hq_avail_flags = (uint16_t *)(hq->hq_dtable + hq->hq_size); + hq->hq_avail_idx = hq->hq_avail_flags + 1; + hq->hq_avail_ring = hq->hq_avail_flags + 2; + hq->hq_used_flags = (uint16_t *)roundup2((uintptr_t)hq->hq_avail_ring, + VRING_ALIGN); + hq->hq_used_idx = hq->hq_used_flags + 1; + hq->hq_used_ring = (struct virtio_used *)(hq->hq_used_flags + 2); + + /* + * Initialize queue indexes + */ + hq->hq_cur_aidx = 0; +} + +static int +pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) +{ + MD5_CTX mdctx; + unsigned char digest[16]; + char nstr[80]; + struct pci_vtnet_softc *sc; + + /* + * Access to guest memory is required. Fail if + * memory not mapped + */ + if (paddr_guest2host(0) == NULL) + return (1); + + sc = malloc(sizeof(struct pci_vtnet_softc)); + memset(sc, 0, sizeof(struct pci_vtnet_softc)); + + pi->pi_arg = sc; + sc->vsc_pi = pi; + + pthread_mutex_init(&sc->vsc_mtx, NULL); + + /* + * Attempt to open the tap device + */ + sc->vsc_tapfd = -1; + if (opts != NULL) { + char tbuf[80]; + + strcpy(tbuf, "/dev/"); + strncat(tbuf, opts, sizeof(tbuf) - strlen(tbuf)); + + sc->vsc_tapfd = open(tbuf, O_RDWR); + if (sc->vsc_tapfd == -1) { + WPRINTF(("open of tap device %s failed\n", tbuf)); + } else { + /* + * Set non-blocking and register for read + * notifications with the event loop + */ + int opt = 1; + if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) { + WPRINTF(("tap device O_NONBLOCK failed\n")); + close(sc->vsc_tapfd); + sc->vsc_tapfd = -1; + } + + sc->vsc_mevp = mevent_add(sc->vsc_tapfd, + EVF_READ, + pci_vtnet_tap_callback, + sc); + if (sc->vsc_mevp == NULL) { + WPRINTF(("Could not register event\n")); + close(sc->vsc_tapfd); + sc->vsc_tapfd = -1; + } + } + } + + /* + * The MAC address is the standard NetApp OUI of 00-a0-98, + * followed by an MD5 of the vm name. The slot number is + * prepended to this for slots other than 1, so that + * CFE can netboot from the equivalent of slot 1. + */ + if (pi->pi_slot == 1) { + strncpy(nstr, vmname, sizeof(nstr)); + } else { + snprintf(nstr, sizeof(nstr), "%d-%s", pi->pi_slot, vmname); + } + + MD5Init(&mdctx); + MD5Update(&mdctx, nstr, strlen(nstr)); + MD5Final(digest, &mdctx); + + sc->vsc_macaddr[0] = 0x00; + sc->vsc_macaddr[1] = 0xa0; + sc->vsc_macaddr[2] = 0x98; + sc->vsc_macaddr[3] = digest[0]; + sc->vsc_macaddr[4] = digest[1]; + sc->vsc_macaddr[5] = digest[2]; + + /* initialize config space */ + pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET); + pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); + pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); + pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET); + pci_emul_alloc_bar(pi, 0, 0, PCIBAR_IO, VTNET_REGSZ); + pci_emul_add_msicap(pi, 1); + + return (0); +} + +/* + * Function pointer array to handle queue notifications + */ +static void (*pci_vtnet_qnotify[VTNET_MAXQ])(struct pci_vtnet_softc *) = { + pci_vtnet_ping_rxq, + pci_vtnet_ping_txq, + pci_vtnet_ping_ctlq +}; + +static void +pci_vtnet_write(struct pci_devinst *pi, int baridx, int offset, int size, + uint32_t value) +{ + struct pci_vtnet_softc *sc = pi->pi_arg; + + if (offset + size > VTNET_REGSZ) { + DPRINTF(("vtnet_write: 2big, offset %d size %d\n", + offset, size)); + return; + } + + pthread_mutex_lock(&sc->vsc_mtx); + + switch (offset) { + case VTCFG_R_GUESTCAP: + assert(size == 4); + sc->vsc_features = value & VTNET_S_HOSTCAPS; + break; + case VTCFG_R_PFN: + assert(size == 4); + pci_vtnet_ring_init(sc, value); + break; + case VTCFG_R_QSEL: + assert(size == 2); + assert(value < VTNET_MAXQ); + sc->vsc_curq = value; + break; + case VTCFG_R_QNOTIFY: + assert(size == 2); + assert(value < VTNET_MAXQ); + (*pci_vtnet_qnotify[value])(sc); + break; + case VTCFG_R_STATUS: + assert(size == 1); + pci_vtnet_update_status(sc, value); + break; + case VTNET_R_CFG0: + case VTNET_R_CFG1: + case VTNET_R_CFG2: + case VTNET_R_CFG3: + case VTNET_R_CFG4: + case VTNET_R_CFG5: + /* + * The driver is allowed to change the MAC address + */ + assert(size == 1); + sc->vsc_macaddr[offset - VTNET_R_CFG0] = value; + break; + case VTCFG_R_HOSTCAP: + case VTCFG_R_QNUM: + case VTCFG_R_ISR: + case VTNET_R_CFG6: + case VTNET_R_CFG7: + DPRINTF(("vtnet: write to readonly reg %d\n\r", offset)); + break; + default: + DPRINTF(("vtnet: unknown i/o write offset %d\n\r", offset)); + value = 0; + break; + } + + pthread_mutex_unlock(&sc->vsc_mtx); +} + +uint32_t +pci_vtnet_read(struct pci_devinst *pi, int baridx, int offset, int size) +{ + struct pci_vtnet_softc *sc = pi->pi_arg; + uint32_t value; + + if (offset + size > VTNET_REGSZ) { + DPRINTF(("vtnet_read: 2big, offset %d size %d\n", + offset, size)); + return (0); + } + + pthread_mutex_lock(&sc->vsc_mtx); + + switch (offset) { + case VTCFG_R_HOSTCAP: + assert(size == 4); + value = VTNET_S_HOSTCAPS; + break; + case VTCFG_R_GUESTCAP: + assert(size == 4); + value = sc->vsc_features; /* XXX never read ? */ + break; + case VTCFG_R_PFN: + assert(size == 4); + value = sc->vsc_pfn[sc->vsc_curq] >> VRING_PFN; + break; + case VTCFG_R_QNUM: + assert(size == 2); + value = pci_vtnet_qsize(sc->vsc_curq); + break; + case VTCFG_R_QSEL: + assert(size == 2); + value = sc->vsc_curq; /* XXX never read ? */ + break; + case VTCFG_R_QNOTIFY: + assert(size == 2); + value = sc->vsc_curq; /* XXX never read ? */ + break; + case VTCFG_R_STATUS: + assert(size == 1); + value = sc->vsc_status; + break; + case VTCFG_R_ISR: + assert(size == 1); + value = sc->vsc_isr; + sc->vsc_isr = 0; /* a read clears this flag */ + break; + case VTNET_R_CFG0: + case VTNET_R_CFG1: + case VTNET_R_CFG2: + case VTNET_R_CFG3: + case VTNET_R_CFG4: + case VTNET_R_CFG5: + assert(size == 1); + value = sc->vsc_macaddr[offset - VTNET_R_CFG0]; + break; + case VTNET_R_CFG6: + assert(size == 1); + value = 0x01; /* XXX link always up */ + break; + case VTNET_R_CFG7: + assert(size == 1); + value = 0; /* link status is in the LSB */ + break; + default: + DPRINTF(("vtnet: unknown i/o read offset %d\n\r", offset)); + value = 0; + break; + } + + pthread_mutex_unlock(&sc->vsc_mtx); + + return (value); +} + +struct pci_devemu pci_de_vnet = { + .pe_emu = "virtio-net", + .pe_init = pci_vtnet_init, + .pe_iow = pci_vtnet_write, + .pe_ior = pci_vtnet_read, +}; +PCI_EMUL_SET(pci_de_vnet); diff --git a/usr.sbin/bhyve/pit_8254.c b/usr.sbin/bhyve/pit_8254.c new file mode 100644 index 0000000..b510161 --- /dev/null +++ b/usr.sbin/bhyve/pit_8254.c @@ -0,0 +1,196 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +#include + +#include +#include + +#include "fbsdrun.h" +#include "inout.h" +#include "pit_8254.h" + +#define TIMER_SEL_MASK 0xc0 +#define TIMER_RW_MASK 0x30 +#define TIMER_MODE_MASK 0x0f +#define TIMER_SEL_READBACK 0xc0 + +#define TIMER_DIV(freq, hz) (((freq) + (hz) / 2) / (hz)) + +#define PIT_8254_FREQ 1193182 +static const int nsecs_per_tick = 1000000000 / PIT_8254_FREQ; + +struct counter { + struct timeval tv; /* uptime when counter was loaded */ + uint16_t initial; /* initial counter value */ + uint8_t cr[2]; + uint8_t ol[2]; + int crbyte; + int olbyte; +}; + +static void +timevalfix(struct timeval *t1) +{ + + if (t1->tv_usec < 0) { + t1->tv_sec--; + t1->tv_usec += 1000000; + } + if (t1->tv_usec >= 1000000) { + t1->tv_sec++; + t1->tv_usec -= 1000000; + } +} + +static void +timevalsub(struct timeval *t1, const struct timeval *t2) +{ + + t1->tv_sec -= t2->tv_sec; + t1->tv_usec -= t2->tv_usec; + timevalfix(t1); +} + +static void +latch(struct counter *c) +{ + struct timeval tv2; + uint16_t lval; + uint64_t delta_nsecs, delta_ticks; + + /* cannot latch a new value until the old one has been consumed */ + if (c->olbyte != 0) + return; + + if (c->initial == 0 || c->initial == 1) { + /* + * XXX the program that runs the VM can be stopped and + * restarted at any time. This means that state that was + * created by the guest is destroyed between invocations + * of the program. + * + * If the counter's initial value is not programmed we + * assume a value that would be set to generate 'guest_hz' + * interrupts per second. + */ + c->initial = TIMER_DIV(PIT_8254_FREQ, guest_hz); + gettimeofday(&c->tv, NULL); + } + + (void)gettimeofday(&tv2, NULL); + timevalsub(&tv2, &c->tv); + delta_nsecs = tv2.tv_sec * 1000000000 + tv2.tv_usec * 1000; + delta_ticks = delta_nsecs / nsecs_per_tick; + + lval = c->initial - delta_ticks % c->initial; + c->olbyte = 2; + c->ol[1] = lval; /* LSB */ + c->ol[0] = lval >> 8; /* MSB */ +} + +static int +pit_8254_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, + uint32_t *eax, void *arg) +{ + int sel, rw, mode; + uint8_t val; + struct counter *c; + + static struct counter counter[3]; + + if (bytes != 1) + return (-1); + + val = *eax; + + if (port == TIMER_MODE) { + assert(in == 0); + sel = val & TIMER_SEL_MASK; + rw = val & TIMER_RW_MASK; + mode = val & TIMER_MODE_MASK; + + if (sel == TIMER_SEL_READBACK) + return (-1); + if (rw != TIMER_LATCH && rw != TIMER_16BIT) + return (-1); + + if (rw != TIMER_LATCH) { + /* + * Counter mode is not affected when issuing a + * latch command. + */ + if (mode != TIMER_RATEGEN && mode != TIMER_SQWAVE) + return (-1); + } + + c = &counter[sel >> 6]; + if (rw == TIMER_LATCH) + latch(c); + else + c->olbyte = 0; /* reset latch after reprogramming */ + + return (0); + } + + /* counter ports */ + assert(port >= TIMER_CNTR0 && port <= TIMER_CNTR2); + c = &counter[port - TIMER_CNTR0]; + + if (in) { + /* + * XXX + * The spec says that once the output latch is completely + * read it should revert to "following" the counter. We don't + * do this because it is hard and any reasonable OS should + * always latch the counter before trying to read it. + */ + if (c->olbyte == 0) + c->olbyte = 2; + *eax = c->ol[--c->olbyte]; + } else { + c->cr[c->crbyte++] = *eax; + if (c->crbyte == 2) { + c->crbyte = 0; + c->initial = c->cr[0] | (uint16_t)c->cr[1] << 8; + gettimeofday(&c->tv, NULL); + } + } + + return (0); +} + +INOUT_PORT(8254, TIMER_MODE, IOPORT_F_OUT, pit_8254_handler); +INOUT_PORT(8254, TIMER_CNTR0, IOPORT_F_INOUT, pit_8254_handler); +INOUT_PORT(8254, TIMER_CNTR1, IOPORT_F_INOUT, pit_8254_handler); +INOUT_PORT(8254, TIMER_CNTR2, IOPORT_F_INOUT, pit_8254_handler); diff --git a/usr.sbin/bhyve/pit_8254.h b/usr.sbin/bhyve/pit_8254.h new file mode 100644 index 0000000..61bd15d --- /dev/null +++ b/usr.sbin/bhyve/pit_8254.h @@ -0,0 +1,45 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _PIT_8254_H_ +#define _PIT_8254_H_ + +/* + * Borrowed from amd64/include/timerreg.h because in that file it is + * conditionally compiled for #ifdef _KERNEL only. + */ + +#include + +#define IO_TIMER1 0x40 /* 8253 Timer #1 */ +#define TIMER_CNTR0 (IO_TIMER1 + TIMER_REG_CNTR0) +#define TIMER_CNTR1 (IO_TIMER1 + TIMER_REG_CNTR1) +#define TIMER_CNTR2 (IO_TIMER1 + TIMER_REG_CNTR2) +#define TIMER_MODE (IO_TIMER1 + TIMER_REG_MODE) + +#endif /* _PIT_8254_H_ */ diff --git a/usr.sbin/bhyve/post.c b/usr.sbin/bhyve/post.c new file mode 100644 index 0000000..092a551 --- /dev/null +++ b/usr.sbin/bhyve/post.c @@ -0,0 +1,51 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +#include + +#include "inout.h" + +static int +post_data_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, + uint32_t *eax, void *arg) +{ + assert(in == 1); + + if (bytes != 1) + return (-1); + + *eax = 0xff; /* return some garbage */ + return (0); +} + +INOUT_PORT(post, 0x84, IOPORT_F_IN, post_data_handler); diff --git a/usr.sbin/bhyve/rtc.c b/usr.sbin/bhyve/rtc.c new file mode 100644 index 0000000..a6f44e0 --- /dev/null +++ b/usr.sbin/bhyve/rtc.c @@ -0,0 +1,268 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include +#include +#include + +#include "inout.h" + +#define IO_RTC 0x70 + +#define RTC_SEC 0x00 /* seconds */ +#define RTC_MIN 0x02 +#define RTC_HRS 0x04 +#define RTC_WDAY 0x06 +#define RTC_DAY 0x07 +#define RTC_MONTH 0x08 +#define RTC_YEAR 0x09 +#define RTC_CENTURY 0x32 /* current century */ + +#define RTC_STATUSA 0xA +#define RTCSA_TUP 0x80 /* time update, don't look now */ + +#define RTC_STATUSB 0xB +#define RTCSB_DST 0x01 +#define RTCSB_24HR 0x02 +#define RTCSB_BIN 0x04 /* 0 = BCD, 1 = Binary */ +#define RTCSB_PINTR 0x40 /* 1 = enable periodic clock interrupt */ +#define RTCSB_HALT 0x80 /* stop clock updates */ + +#define RTC_INTR 0x0c /* status register C (R) interrupt source */ + +#define RTC_STATUSD 0x0d /* status register D (R) Lost Power */ +#define RTCSD_PWR 0x80 /* clock power OK */ + +#define RTC_DIAG 0x0e + +#define RTC_RSTCODE 0x0f + +static int addr; + +/* XXX initialize these to default values as they would be from BIOS */ +static uint8_t status_a, status_b, rstcode; + +static u_char const bin2bcd_data[] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99 +}; +#define bin2bcd(bin) (bin2bcd_data[bin]) + +#define rtcout(val) ((status_b & RTCSB_BIN) ? (val) : bin2bcd((val))) + +static void +timevalfix(struct timeval *t1) +{ + + if (t1->tv_usec < 0) { + t1->tv_sec--; + t1->tv_usec += 1000000; + } + if (t1->tv_usec >= 1000000) { + t1->tv_sec++; + t1->tv_usec -= 1000000; + } +} + +static void +timevalsub(struct timeval *t1, const struct timeval *t2) +{ + + t1->tv_sec -= t2->tv_sec; + t1->tv_usec -= t2->tv_usec; + timevalfix(t1); +} + +static int +rtc_addr_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, + uint32_t *eax, void *arg) +{ + assert(in == 0); + + if (bytes != 1) + return (-1); + + switch (*eax) { + case RTC_SEC: + case RTC_MIN: + case RTC_HRS: + case RTC_WDAY: + case RTC_DAY: + case RTC_MONTH: + case RTC_YEAR: + case RTC_CENTURY: + case RTC_STATUSA: + case RTC_STATUSB: + case RTC_INTR: + case RTC_STATUSD: + case RTC_DIAG: + case RTC_RSTCODE: + break; + default: + return (-1); + } + + addr = *eax; + return (0); +} + +static int +rtc_data_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, + uint32_t *eax, void *arg) +{ + int hour; + time_t t; + struct timeval cur, delta; + + static struct timeval last; + static struct tm tm; + + if (bytes != 1) + return (-1); + + gettimeofday(&cur, NULL); + + /* + * Increment the cached time only once per second so we can guarantee + * that the guest has at least one second to read the hour:min:sec + * separately and still get a coherent view of the time. + */ + delta = cur; + timevalsub(&delta, &last); + if (delta.tv_sec >= 1 && (status_b & RTCSB_HALT) == 0) { + t = cur.tv_sec; + localtime_r(&t, &tm); + last = cur; + } + + if (in) { + switch (addr) { + case RTC_SEC: + *eax = rtcout(tm.tm_sec); + return (0); + case RTC_MIN: + *eax = rtcout(tm.tm_min); + return (0); + case RTC_HRS: + if (status_b & RTCSB_24HR) + hour = tm.tm_hour; + else + hour = (tm.tm_hour % 12) + 1; + + *eax = rtcout(hour); + + /* + * If we are representing time in the 12-hour format + * then set the MSB to indicate PM. + */ + if ((status_b & RTCSB_24HR) == 0 && tm.tm_hour >= 12) + *eax |= 0x80; + + return (0); + case RTC_WDAY: + *eax = rtcout(tm.tm_wday + 1); + return (0); + case RTC_DAY: + *eax = rtcout(tm.tm_mday); + return (0); + case RTC_MONTH: + *eax = rtcout(tm.tm_mon + 1); + return (0); + case RTC_YEAR: + *eax = rtcout(tm.tm_year % 100); + return (0); + case RTC_CENTURY: + *eax = rtcout(tm.tm_year / 100); + break; + case RTC_STATUSA: + *eax = status_a; + return (0); + case RTC_INTR: + *eax = 0; + return (0); + case RTC_STATUSD: + *eax = RTCSD_PWR; + return (0); + case RTC_DIAG: + *eax = 0; + return (0); + case RTC_RSTCODE: + *eax = rstcode; + return (0); + default: + return (-1); + } + } + + switch (addr) { + case RTC_STATUSA: + status_a = *eax & ~RTCSA_TUP; + break; + case RTC_STATUSB: + /* XXX not implemented yet XXX */ + if (*eax & RTCSB_PINTR) + return (-1); + status_b = *eax; + break; + case RTC_RSTCODE: + rstcode = *eax; + break; + case RTC_SEC: + case RTC_MIN: + case RTC_HRS: + case RTC_WDAY: + case RTC_DAY: + case RTC_MONTH: + case RTC_YEAR: + case RTC_CENTURY: + /* + * Ignore writes to the time of day registers + */ + break; + default: + return (-1); + } + return (0); +} + +INOUT_PORT(rtc, IO_RTC, IOPORT_F_OUT, rtc_addr_handler); +INOUT_PORT(rtc, IO_RTC + 1, IOPORT_F_INOUT, rtc_data_handler); diff --git a/usr.sbin/bhyve/uart.c b/usr.sbin/bhyve/uart.c new file mode 100644 index 0000000..640f3bf --- /dev/null +++ b/usr.sbin/bhyve/uart.c @@ -0,0 +1,60 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include "inout.h" + +#define COM1 0x3F8 +#define COM2 0x2F8 + +#define REG_IIR 2 + +static int +com_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, + uint32_t *eax, void *arg) +{ + assert(in); + + if (bytes != 1) + return (-1); + + /* + * COM port is not implemented so we return 0xFF for all registers + */ + *eax = 0xFF; + + return (0); +} + +INOUT_PORT(uart, COM1 + REG_IIR, IOPORT_F_IN, com_handler); +INOUT_PORT(uart, COM2 + REG_IIR, IOPORT_F_IN, com_handler); diff --git a/usr.sbin/bhyve/virtio.h b/usr.sbin/bhyve/virtio.h new file mode 100644 index 0000000..474e244 --- /dev/null +++ b/usr.sbin/bhyve/virtio.h @@ -0,0 +1,85 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _VIRTIO_H_ +#define _VIRTIO_H_ + +#define VRING_ALIGN 4096 + +#define VRING_DESC_F_NEXT (1 << 0) +#define VRING_DESC_F_WRITE (1 << 1) +#define VRING_DESC_F_INDIRECT (1 << 2) + +#define VRING_AVAIL_F_NO_INTERRUPT 1 + +struct virtio_desc { + uint64_t vd_addr; + uint32_t vd_len; + uint16_t vd_flags; + uint16_t vd_next; +} __packed; + +struct virtio_used { + uint32_t vu_idx; + uint32_t vu_tlen; +} __packed; + +/* + * PFN register shift amount + */ +#define VRING_PFN 12 + +/* + * Virtio device types + */ +#define VIRTIO_TYPE_NET 1 +#define VIRTIO_TYPE_BLOCK 2 + +/* + * PCI vendor/device IDs + */ +#define VIRTIO_VENDOR 0x1AF4 +#define VIRTIO_DEV_NET 0x1000 +#define VIRTIO_DEV_BLOCK 0x1001 + +/* + * PCI config space constants + */ +#define VTCFG_R_HOSTCAP 0 +#define VTCFG_R_GUESTCAP 4 +#define VTCFG_R_PFN 8 +#define VTCFG_R_QNUM 12 +#define VTCFG_R_QSEL 14 +#define VTCFG_R_QNOTIFY 16 +#define VTCFG_R_STATUS 18 +#define VTCFG_R_ISR 19 +#define VTCFG_R_CFG0 20 /* No MSI-X */ +#define VTCFG_R_CFG1 24 /* With MSI-X */ +#define VTCFG_R_MSIX 20 + +#endif /* _VIRTIO_H_ */ diff --git a/usr.sbin/bhyve/xmsr.c b/usr.sbin/bhyve/xmsr.c new file mode 100644 index 0000000..931b7d7 --- /dev/null +++ b/usr.sbin/bhyve/xmsr.c @@ -0,0 +1,261 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include +#include +#include +#include +#include + +#include "fbsdrun.h" +#include "xmsr.h" + +/* + * Trampoline for hypervisor direct 64-bit jump. + * + * 0 - signature for guest->host verification + * 8 - kernel virtual address of trampoline + * 16 - instruction virtual address + * 24 - stack pointer virtual address + * 32 - CR3, physical address of kernel page table + * 40 - 24-byte area for null/code/data GDT entries + */ +#define MP_V64T_SIG 0xcafebabecafebabeULL +struct mp_v64tramp { + uint64_t mt_sig; + uint64_t mt_virt; + uint64_t mt_eip; + uint64_t mt_rsp; + uint64_t mt_cr3; + uint64_t mt_gdtr[3]; +}; + +/* + * CPU 0 is considered to be the BSP and is set to the RUNNING state. + * All other CPUs are set up in the INIT state. + */ +#define BSP 0 +enum cpu_bstate { + CPU_S_INIT, + CPU_S_SIPI, + CPU_S_RUNNING +} static cpu_b[VM_MAXCPU] = { [BSP] = CPU_S_RUNNING }; + +static void spinup_ap(struct vmctx *, int, int, uint64_t *); +static void spinup_ap_direct64(struct vmctx *, int, uintptr_t, uint64_t *); + +int +emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t code, uint64_t val) +{ + int dest; + int mode; + int thiscpu; + int vec; + int error, retval; + uint64_t rip; + + retval = vcpu; + thiscpu = 1 << vcpu; + + /* + * The only MSR value handled is the x2apic CR register + */ + if (code != 0x830) { + printf("Unknown WRMSR code %x, val %lx, cpu %d\n", + code, val, vcpu); + exit(1); + } + + /* + * The value written to the MSR will generate an IPI to + * a set of CPUs. If this is a SIPI, create the initial + * state for the CPU and switch to it. Otherwise, inject + * an interrupt for the destination CPU(s), and request + * a switch to the next available one by returning -1 + */ + dest = val >> 32; + vec = val & APIC_VECTOR_MASK; + mode = val & APIC_DELMODE_MASK; + + switch (mode) { + case APIC_DELMODE_INIT: + assert(dest != 0); + assert(dest < guest_ncpus); + + /* + * Ignore legacy de-assert INITs in x2apic mode + */ + if ((val & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) { + break; + } + assert(cpu_b[dest] == CPU_S_INIT); + + /* + * Move CPU to wait-for-SIPI state + */ + error = vcpu_reset(ctx, dest); + assert(error == 0); + + cpu_b[dest] = CPU_S_SIPI; + break; + + case APIC_DELMODE_STARTUP: + assert(dest != 0); + assert(dest < guest_ncpus); + /* + * Ignore SIPIs in any state other than wait-for-SIPI + */ + if (cpu_b[dest] != CPU_S_SIPI) { + break; + } + + /* + * Bring up the AP and signal the main loop that it is + * available and to switch to it. + */ + spinup_ap(ctx, dest, vec, &rip); + cpu_b[dest] = CPU_S_RUNNING; + fbsdrun_addcpu(ctx, dest, rip); + retval = dest; + break; + + default: + printf("APIC delivery mode %lx not supported!\n", + val & APIC_DELMODE_MASK); + exit(1); + } + + return (retval); +} + +/* + * There are 2 startup modes possible here: + * - if the CPU supports 'unrestricted guest' mode, the spinup can + * set up the processor state in power-on 16-bit mode, with the CS:IP + * init'd to the specified low-mem 4K page. + * - if the guest has requested a 64-bit trampoline in the low-mem 4K + * page by placing in the specified signature, set up the register + * state using register state in the signature. Note that this + * requires accessing guest physical memory to read the signature + * while 'unrestricted mode' does not. + */ +static void +spinup_ap(struct vmctx *ctx, int newcpu, int vector, uint64_t *rip) +{ + int error; + uint16_t cs; + uint64_t desc_base; + uint32_t desc_limit, desc_access; + + if (fbsdrun_vmexit_on_hlt()) { + error = vm_set_capability(ctx, newcpu, VM_CAP_HALT_EXIT, 1); + assert(error == 0); + } + + if (fbsdrun_vmexit_on_pause()) { + error = vm_set_capability(ctx, newcpu, VM_CAP_PAUSE_EXIT, 1); + assert(error == 0); + } + + error = vm_set_capability(ctx, newcpu, VM_CAP_UNRESTRICTED_GUEST, 1); + if (error) { + /* + * If the guest does not support real-mode execution then + * we will bring up the AP directly in 64-bit mode. + */ + spinup_ap_direct64(ctx, newcpu, vector << PAGE_SHIFT, rip); + } else { + /* + * Update the %cs and %rip of the guest so that it starts + * executing real mode code at at 'vector << 12'. + */ + *rip = 0; + error = vm_set_register(ctx, newcpu, VM_REG_GUEST_RIP, *rip); + assert(error == 0); + + error = vm_get_desc(ctx, newcpu, VM_REG_GUEST_CS, &desc_base, + &desc_limit, &desc_access); + assert(error == 0); + + desc_base = vector << PAGE_SHIFT; + error = vm_set_desc(ctx, newcpu, VM_REG_GUEST_CS, + desc_base, desc_limit, desc_access); + assert(error == 0); + + cs = (vector << PAGE_SHIFT) >> 4; + error = vm_set_register(ctx, newcpu, VM_REG_GUEST_CS, cs); + assert(error == 0); + } +} + +static void +spinup_ap_direct64(struct vmctx *ctx, int newcpu, uintptr_t gaddr, + uint64_t *rip) +{ + struct mp_v64tramp *mvt; + char *errstr; + int error; + uint64_t gdtbase; + + mvt = paddr_guest2host(gaddr); + + assert(mvt->mt_sig == MP_V64T_SIG); + + /* + * Set up the 3-entry GDT using memory supplied in the + * guest's trampoline structure. + */ + vm_setup_freebsd_gdt(mvt->mt_gdtr); + +#define CHECK_ERROR(msg) \ + if (error != 0) { \ + errstr = msg; \ + goto err_exit; \ + } + + /* entry point */ + *rip = mvt->mt_eip; + + /* Get the guest virtual address of the GDT */ + gdtbase = mvt->mt_virt + __offsetof(struct mp_v64tramp, mt_gdtr); + + error = vm_setup_freebsd_registers(ctx, newcpu, mvt->mt_eip, + mvt->mt_cr3, gdtbase, mvt->mt_rsp); + CHECK_ERROR("vm_setup_freebsd_registers"); + + return; +err_exit: + printf("spinup_ap_direct64: machine state error: %s", errstr); + exit(1); +} diff --git a/usr.sbin/bhyve/xmsr.h b/usr.sbin/bhyve/xmsr.h new file mode 100644 index 0000000..8cebcea --- /dev/null +++ b/usr.sbin/bhyve/xmsr.h @@ -0,0 +1,34 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _XMSR_H_ +#define _XMSR_H_ + +int emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t code, uint64_t val); + +#endif diff --git a/usr.sbin/vmmctl/Makefile b/usr.sbin/vmmctl/Makefile new file mode 100644 index 0000000..1f529b5 --- /dev/null +++ b/usr.sbin/vmmctl/Makefile @@ -0,0 +1,15 @@ +# +# $FreeBSD$ +# + +PROG= vmmctl +SRCS= vmmctl.c + +NO_MAN= + +DPADD= ${LIBVMMAPI} +LDADD= -lvmmapi + +CFLAGS+= -I${.CURDIR}/../../sys/amd64/vmm + +.include diff --git a/usr.sbin/vmmctl/sample.sh b/usr.sbin/vmmctl/sample.sh new file mode 100755 index 0000000..f38d0da --- /dev/null +++ b/usr.sbin/vmmctl/sample.sh @@ -0,0 +1,75 @@ +#!/bin/sh + +# $FreeBSD$ + +VMMCTL="sudo ./vmmctl" +VMNAME=sample + +${VMMCTL} --vm=${VMNAME} --create +${VMMCTL} --vm=${VMNAME} --set-lowmem=128 --set-highmem=256 +${VMMCTL} --vm=${VMNAME} --get-lowmem --get-highmem + +CR0_PE=$((1 << 0)) +CR0_PG=$((1 << 31)) +CR0=$(($CR0_PE | $CR0_PG)) +${VMMCTL} --vm=${VMNAME} --set-cr0=${CR0} --get-cr0 + +# XXX this is bogus the value of %cr3 should come from the loader +CR3=0 +${VMMCTL} --vm=${VMNAME} --set-cr3=${CR3} --get-cr3 + +CR4_PAE=$((1 << 5)) +CR4=$((${CR4_PAE})) +${VMMCTL} --vm=${VMNAME} --set-cr4=${CR4} --get-cr4 + +DR7=0x00000400 # Table 9-1 from Intel Architecture Manual 3A +${VMMCTL} --vm=${VMNAME} --set-dr7=${DR7} --get-dr7 + +# +# XXX the values of rsp and rip are bogus and should come from the loader. +# +RSP=0xa5a5a5a5 +RIP=0x0000bfbfbfbf0000 +RFLAGS=0x2 +${VMMCTL} --vm=${VMNAME} --set-rsp=${RSP} --get-rsp +${VMMCTL} --vm=${VMNAME} --set-rip=${RIP} --get-rip +${VMMCTL} --vm=${VMNAME} --set-rflags=${RFLAGS} --get-rflags + +# Set "hidden" state of %cs descriptor to indicate long mode code segment. +# +# Note that this should match the contents of the entry pointed to by the +# segment selector in the GDTR. +# +${VMMCTL} --vm=${VMNAME} --set-desc-cs --desc-access=0x00002098 --get-desc-cs + +# Set "hidden" state of all data descriptors to indicate a usable segment. +# The only useful fields are the "Present" and "Descriptor Type" bits. +${VMMCTL} --vm=${VMNAME} --set-desc-ds --desc-access=0x00000090 --get-desc-ds +${VMMCTL} --vm=${VMNAME} --set-desc-es --desc-access=0x00000090 --get-desc-es +${VMMCTL} --vm=${VMNAME} --set-desc-fs --desc-access=0x00000090 --get-desc-fs +${VMMCTL} --vm=${VMNAME} --set-desc-gs --desc-access=0x00000090 --get-desc-gs +${VMMCTL} --vm=${VMNAME} --set-desc-ss --desc-access=0x00000090 --get-desc-ss + +# +# Set the code segment selector to point to entry at offset 8 in the GDTR. +# +${VMMCTL} --vm=${VMNAME} --set-cs=0x0008 --get-cs + +# Set all the remaining data segment selectors to point to entry at offset +# 16 in the GDTR. +${VMMCTL} --vm=${VMNAME} --set-ds=0x0010 --get-ds +${VMMCTL} --vm=${VMNAME} --set-es=0x0010 --get-es +${VMMCTL} --vm=${VMNAME} --set-fs=0x0010 --get-fs +${VMMCTL} --vm=${VMNAME} --set-gs=0x0010 --get-gs +${VMMCTL} --vm=${VMNAME} --set-ss=0x0010 --get-ss + +# XXX the value of the GDTR should come from the loader. +# Set the GDTR +GDTR_BASE=0xffff0000 +GDTR_LIMIT=0x10 +${VMMCTL} --vm=${VMNAME} --set-desc-gdtr --desc-base=${GDTR_BASE} --desc-limit=${GDTR_LIMIT} --get-desc-gdtr + +${VMMCTL} --vm=${VMNAME} --set-pinning=0 --get-pinning +${VMMCTL} --vm=${VMNAME} --set-pinning=-1 --get-pinning + +${VMMCTL} --vm=${VMNAME} --destroy diff --git a/usr.sbin/vmmctl/vmmctl.c b/usr.sbin/vmmctl/vmmctl.c new file mode 100644 index 0000000..678f98b --- /dev/null +++ b/usr.sbin/vmmctl/vmmctl.c @@ -0,0 +1,1485 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "intel/vmcs.h" + +#define MB (1UL << 20) +#define GB (1UL << 30) + +#define REQ_ARG required_argument +#define NO_ARG no_argument +#define OPT_ARG optional_argument + +static const char *progname; + +static void +usage(void) +{ + + (void)fprintf(stderr, + "Usage: %s --vm=\n" + " [--cpu=]\n" + " [--create]\n" + " [--destroy]\n" + " [--get-stats]\n" + " [--set-desc-ds]\n" + " [--get-desc-ds]\n" + " [--set-desc-es]\n" + " [--get-desc-es]\n" + " [--set-desc-gs]\n" + " [--get-desc-gs]\n" + " [--set-desc-fs]\n" + " [--get-desc-fs]\n" + " [--set-desc-cs]\n" + " [--get-desc-cs]\n" + " [--set-desc-ss]\n" + " [--get-desc-ss]\n" + " [--set-desc-tr]\n" + " [--get-desc-tr]\n" + " [--set-desc-ldtr]\n" + " [--get-desc-ldtr]\n" + " [--set-desc-gdtr]\n" + " [--get-desc-gdtr]\n" + " [--set-desc-idtr]\n" + " [--get-desc-idtr]\n" + " [--run]\n" + " [--capname=]\n" + " [--getcap]\n" + " [--setcap=<0|1>]\n" + " [--desc-base=]\n" + " [--desc-limit=]\n" + " [--desc-access=]\n" + " [--set-cr0=]\n" + " [--get-cr0]\n" + " [--set-cr3=]\n" + " [--get-cr3]\n" + " [--set-cr4=]\n" + " [--get-cr4]\n" + " [--set-dr7=]\n" + " [--get-dr7]\n" + " [--set-rsp=]\n" + " [--get-rsp]\n" + " [--set-rip=]\n" + " [--get-rip]\n" + " [--get-rax]\n" + " [--set-rax=]\n" + " [--get-rbx]\n" + " [--get-rcx]\n" + " [--get-rdx]\n" + " [--get-rsi]\n" + " [--get-rdi]\n" + " [--get-rbp]\n" + " [--get-r8]\n" + " [--get-r9]\n" + " [--get-r10]\n" + " [--get-r11]\n" + " [--get-r12]\n" + " [--get-r13]\n" + " [--get-r14]\n" + " [--get-r15]\n" + " [--set-rflags=]\n" + " [--get-rflags]\n" + " [--set-cs]\n" + " [--get-cs]\n" + " [--set-ds]\n" + " [--get-ds]\n" + " [--set-es]\n" + " [--get-es]\n" + " [--set-fs]\n" + " [--get-fs]\n" + " [--set-gs]\n" + " [--get-gs]\n" + " [--set-ss]\n" + " [--get-ss]\n" + " [--get-tr]\n" + " [--get-ldtr]\n" + " [--get-vmcs-pinbased-ctls]\n" + " [--get-vmcs-procbased-ctls]\n" + " [--get-vmcs-procbased-ctls2]\n" + " [--get-vmcs-entry-interruption-info]\n" + " [--set-vmcs-entry-interruption-info=]\n" + " [--get-vmcs-eptp]\n" + " [--get-vmcs-guest-physical-address\n" + " [--get-vmcs-guest-linear-address\n" + " [--set-vmcs-exception-bitmap]\n" + " [--get-vmcs-exception-bitmap]\n" + " [--get-vmcs-io-bitmap-address]\n" + " [--get-vmcs-tsc-offset]\n" + " [--get-vmcs-guest-pat]\n" + " [--get-vmcs-host-pat]\n" + " [--get-vmcs-host-cr0]\n" + " [--get-vmcs-host-cr3]\n" + " [--get-vmcs-host-cr4]\n" + " [--get-vmcs-host-rip]\n" + " [--get-vmcs-host-rsp]\n" + " [--get-vmcs-cr0-mask]\n" + " [--get-vmcs-cr0-shadow]\n" + " [--get-vmcs-cr4-mask]\n" + " [--get-vmcs-cr4-shadow]\n" + " [--get-vmcs-cr3-targets]\n" + " [--get-vmcs-apic-access-address]\n" + " [--get-vmcs-virtual-apic-address]\n" + " [--get-vmcs-tpr-threshold]\n" + " [--get-vmcs-msr-bitmap]\n" + " [--get-vmcs-msr-bitmap-address]\n" + " [--get-vmcs-vpid]\n" + " [--get-vmcs-ple-gap]\n" + " [--get-vmcs-ple-window]\n" + " [--get-vmcs-instruction-error]\n" + " [--get-vmcs-exit-ctls]\n" + " [--get-vmcs-entry-ctls]\n" + " [--get-vmcs-guest-sysenter]\n" + " [--get-vmcs-link]\n" + " [--get-vmcs-exit-reason]\n" + " [--get-vmcs-exit-qualification]\n" + " [--get-vmcs-exit-interruption-info]\n" + " [--get-vmcs-exit-interruption-error]\n" + " [--get-vmcs-interruptibility]\n" + " [--set-pinning=]\n" + " [--get-pinning]\n" + " [--set-lowmem=]\n" + " [--get-lowmem]\n" + " [--set-highmem=]\n" + " [--get-highmem]\n", + progname); + exit(1); +} + +static int get_stats, getcap, setcap, capval; +static const char *capname; +static int create, destroy, get_lowmem, get_highmem; +static uint64_t lowmem, highmem; +static int set_cr0, get_cr0, set_cr3, get_cr3, set_cr4, get_cr4; +static int set_efer, get_efer; +static int set_dr7, get_dr7; +static int set_rsp, get_rsp, set_rip, get_rip, set_rflags, get_rflags; +static int set_rax, get_rax; +static int get_rbx, get_rcx, get_rdx, get_rsi, get_rdi, get_rbp; +static int get_r8, get_r9, get_r10, get_r11, get_r12, get_r13, get_r14, get_r15; +static int set_desc_ds, get_desc_ds; +static int set_desc_es, get_desc_es; +static int set_desc_fs, get_desc_fs; +static int set_desc_gs, get_desc_gs; +static int set_desc_cs, get_desc_cs; +static int set_desc_ss, get_desc_ss; +static int set_desc_gdtr, get_desc_gdtr; +static int set_desc_idtr, get_desc_idtr; +static int set_desc_tr, get_desc_tr; +static int set_desc_ldtr, get_desc_ldtr; +static int set_cs, set_ds, set_es, set_fs, set_gs, set_ss, set_tr, set_ldtr; +static int get_cs, get_ds, get_es, get_fs, get_gs, get_ss, get_tr, get_ldtr; +static int set_pinning, get_pinning, pincpu; +static int run; + +/* + * VMCS-specific fields + */ +static int get_pinbased_ctls, get_procbased_ctls, get_procbased_ctls2; +static int get_eptp, get_io_bitmap, get_tsc_offset; +static int get_vmcs_entry_interruption_info, set_vmcs_entry_interruption_info; +static int get_vmcs_interruptibility; +uint32_t vmcs_entry_interruption_info; +static int get_vmcs_gpa, get_vmcs_gla; +static int get_exception_bitmap, set_exception_bitmap, exception_bitmap; +static int get_cr0_mask, get_cr0_shadow; +static int get_cr4_mask, get_cr4_shadow; +static int get_cr3_targets; +static int get_apic_access_addr, get_virtual_apic_addr, get_tpr_threshold; +static int get_msr_bitmap, get_msr_bitmap_address; +static int get_vpid, get_ple_gap, get_ple_window; +static int get_inst_err, get_exit_ctls, get_entry_ctls; +static int get_host_cr0, get_host_cr3, get_host_cr4; +static int get_host_rip, get_host_rsp; +static int get_guest_pat, get_host_pat; +static int get_guest_sysenter, get_vmcs_link; +static int get_vmcs_exit_reason, get_vmcs_exit_qualification; +static int get_vmcs_exit_interruption_info, get_vmcs_exit_interruption_error; + +static uint64_t desc_base; +static uint32_t desc_limit, desc_access; + +static void +dump_vm_run_exitcode(struct vm_exit *vmexit, int vcpu) +{ + printf("vm exit[%d]\n", vcpu); + printf("\trip\t\t0x%016lx\n", vmexit->rip); + printf("\tinst_length\t%d\n", vmexit->inst_length); + switch (vmexit->exitcode) { + case VM_EXITCODE_INOUT: + printf("\treason\t\tINOUT\n"); + printf("\tdirection\t%s\n", vmexit->u.inout.in ? "IN" : "OUT"); + printf("\tbytes\t\t%d\n", vmexit->u.inout.bytes); + printf("\tflags\t\t%s%s\n", + vmexit->u.inout.string ? "STRING " : "", + vmexit->u.inout.rep ? "REP " : ""); + printf("\tport\t\t0x%04x\n", vmexit->u.inout.port); + printf("\teax\t\t0x%08x\n", vmexit->u.inout.eax); + break; + case VM_EXITCODE_VMX: + printf("\treason\t\tVMX\n"); + printf("\terror\t\t%d\n", vmexit->u.vmx.error); + printf("\texit_reason\t0x%08x (%u)\n", + vmexit->u.vmx.exit_reason, vmexit->u.vmx.exit_reason); + printf("\tqualification\t0x%016lx\n", + vmexit->u.vmx.exit_qualification); + break; + default: + printf("*** unknown vm run exitcode %d\n", vmexit->exitcode); + break; + } +} + +static int +dump_vmcs_msr_bitmap(int vcpu, u_long addr) +{ + int error, fd, byte, bit, readable, writeable; + u_int msr; + const char *bitmap; + + error = -1; + bitmap = MAP_FAILED; + + fd = open("/dev/mem", O_RDONLY, 0); + if (fd < 0) + goto done; + + bitmap = mmap(NULL, PAGE_SIZE, PROT_READ, 0, fd, addr); + if (bitmap == MAP_FAILED) + goto done; + + for (msr = 0; msr < 0x2000; msr++) { + byte = msr / 8; + bit = msr & 0x7; + + /* Look at MSRs in the range 0x00000000 to 0x00001FFF */ + readable = (bitmap[byte] & (1 << bit)) ? 0 : 1; + writeable = (bitmap[2048 + byte] & (1 << bit)) ? 0 : 1; + if (readable || writeable) { + printf("msr 0x%08x[%d]\t\t%c%c\n", msr, vcpu, + readable ? 'R' : '-', + writeable ? 'W' : '-'); + } + + /* Look at MSRs in the range 0xC0000000 to 0xC0001FFF */ + byte += 1024; + readable = (bitmap[byte] & (1 << bit)) ? 0 : 1; + writeable = (bitmap[2048 + byte] & (1 << bit)) ? 0 : 1; + if (readable || writeable) { + printf("msr 0x%08x[%d]\t\t%c%c\n", + 0xc0000000 + msr, vcpu, + readable ? 'R' : '-', + writeable ? 'W' : '-'); + } + } + + error = 0; +done: + if (bitmap != MAP_FAILED) + munmap((void *)bitmap, PAGE_SIZE); + if (fd >= 0) + close(fd); + return (error); +} + +static int +vm_get_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t *ret_val) +{ + + return (vm_get_register(ctx, vcpu, VMCS_IDENT(field), ret_val)); +} + +static int +vm_set_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t val) +{ + + return (vm_set_register(ctx, vcpu, VMCS_IDENT(field), val)); +} + +enum { + VMNAME = 1000, /* avoid collision with return values from getopt */ + VCPU, + SET_LOWMEM, + SET_HIGHMEM, + SET_EFER, + SET_CR0, + SET_CR3, + SET_CR4, + SET_DR7, + SET_RSP, + SET_RIP, + SET_RAX, + SET_RFLAGS, + DESC_BASE, + DESC_LIMIT, + DESC_ACCESS, + SET_CS, + SET_DS, + SET_ES, + SET_FS, + SET_GS, + SET_SS, + SET_TR, + SET_LDTR, + SET_PINNING, + SET_VMCS_EXCEPTION_BITMAP, + SET_VMCS_ENTRY_INTERRUPTION_INFO, + SET_CAP, + CAPNAME, +}; + +int +main(int argc, char *argv[]) +{ + char *vmname; + int error, ch, vcpu; + vm_paddr_t hpa; + size_t len; + struct vm_exit vmexit; + uint64_t ctl, eptp, bm, tsc_off, addr, u64; + struct vmctx *ctx; + + uint64_t cr0, cr3, cr4, dr7, rsp, rip, rflags, efer, pat; + uint64_t rax, rbx, rcx, rdx, rsi, rdi, rbp; + uint64_t r8, r9, r10, r11, r12, r13, r14, r15; + uint64_t cs, ds, es, fs, gs, ss, tr, ldtr; + + struct option opts[] = { + { "vm", REQ_ARG, 0, VMNAME }, + { "cpu", REQ_ARG, 0, VCPU }, + { "set-lowmem", REQ_ARG, 0, SET_LOWMEM }, + { "set-highmem",REQ_ARG, 0, SET_HIGHMEM }, + { "set-efer", REQ_ARG, 0, SET_EFER }, + { "set-cr0", REQ_ARG, 0, SET_CR0 }, + { "set-cr3", REQ_ARG, 0, SET_CR3 }, + { "set-cr4", REQ_ARG, 0, SET_CR4 }, + { "set-dr7", REQ_ARG, 0, SET_DR7 }, + { "set-rsp", REQ_ARG, 0, SET_RSP }, + { "set-rip", REQ_ARG, 0, SET_RIP }, + { "set-rax", REQ_ARG, 0, SET_RAX }, + { "set-rflags", REQ_ARG, 0, SET_RFLAGS }, + { "desc-base", REQ_ARG, 0, DESC_BASE }, + { "desc-limit", REQ_ARG, 0, DESC_LIMIT }, + { "desc-access",REQ_ARG, 0, DESC_ACCESS }, + { "set-cs", REQ_ARG, 0, SET_CS }, + { "set-ds", REQ_ARG, 0, SET_DS }, + { "set-es", REQ_ARG, 0, SET_ES }, + { "set-fs", REQ_ARG, 0, SET_FS }, + { "set-gs", REQ_ARG, 0, SET_GS }, + { "set-ss", REQ_ARG, 0, SET_SS }, + { "set-tr", REQ_ARG, 0, SET_TR }, + { "set-ldtr", REQ_ARG, 0, SET_LDTR }, + { "set-pinning",REQ_ARG, 0, SET_PINNING }, + { "set-vmcs-exception-bitmap", + REQ_ARG, 0, SET_VMCS_EXCEPTION_BITMAP }, + { "set-vmcs-entry-interruption-info", + REQ_ARG, 0, SET_VMCS_ENTRY_INTERRUPTION_INFO }, + { "capname", REQ_ARG, 0, CAPNAME }, + { "setcap", REQ_ARG, 0, SET_CAP }, + { "getcap", NO_ARG, &getcap, 1 }, + { "get-stats", NO_ARG, &get_stats, 1 }, + { "get-desc-ds",NO_ARG, &get_desc_ds, 1 }, + { "set-desc-ds",NO_ARG, &set_desc_ds, 1 }, + { "get-desc-es",NO_ARG, &get_desc_es, 1 }, + { "set-desc-es",NO_ARG, &set_desc_es, 1 }, + { "get-desc-ss",NO_ARG, &get_desc_ss, 1 }, + { "set-desc-ss",NO_ARG, &set_desc_ss, 1 }, + { "get-desc-cs",NO_ARG, &get_desc_cs, 1 }, + { "set-desc-cs",NO_ARG, &set_desc_cs, 1 }, + { "get-desc-fs",NO_ARG, &get_desc_fs, 1 }, + { "set-desc-fs",NO_ARG, &set_desc_fs, 1 }, + { "get-desc-gs",NO_ARG, &get_desc_gs, 1 }, + { "set-desc-gs",NO_ARG, &set_desc_gs, 1 }, + { "get-desc-tr",NO_ARG, &get_desc_tr, 1 }, + { "set-desc-tr",NO_ARG, &set_desc_tr, 1 }, + { "set-desc-ldtr", NO_ARG, &set_desc_ldtr, 1 }, + { "get-desc-ldtr", NO_ARG, &get_desc_ldtr, 1 }, + { "set-desc-gdtr", NO_ARG, &set_desc_gdtr, 1 }, + { "get-desc-gdtr", NO_ARG, &get_desc_gdtr, 1 }, + { "set-desc-idtr", NO_ARG, &set_desc_idtr, 1 }, + { "get-desc-idtr", NO_ARG, &get_desc_idtr, 1 }, + { "get-lowmem", NO_ARG, &get_lowmem, 1 }, + { "get-highmem",NO_ARG, &get_highmem, 1 }, + { "get-efer", NO_ARG, &get_efer, 1 }, + { "get-cr0", NO_ARG, &get_cr0, 1 }, + { "get-cr3", NO_ARG, &get_cr3, 1 }, + { "get-cr4", NO_ARG, &get_cr4, 1 }, + { "get-dr7", NO_ARG, &get_dr7, 1 }, + { "get-rsp", NO_ARG, &get_rsp, 1 }, + { "get-rip", NO_ARG, &get_rip, 1 }, + { "get-rax", NO_ARG, &get_rax, 1 }, + { "get-rbx", NO_ARG, &get_rbx, 1 }, + { "get-rcx", NO_ARG, &get_rcx, 1 }, + { "get-rdx", NO_ARG, &get_rdx, 1 }, + { "get-rsi", NO_ARG, &get_rsi, 1 }, + { "get-rdi", NO_ARG, &get_rdi, 1 }, + { "get-rbp", NO_ARG, &get_rbp, 1 }, + { "get-r8", NO_ARG, &get_r8, 1 }, + { "get-r9", NO_ARG, &get_r9, 1 }, + { "get-r10", NO_ARG, &get_r10, 1 }, + { "get-r11", NO_ARG, &get_r11, 1 }, + { "get-r12", NO_ARG, &get_r12, 1 }, + { "get-r13", NO_ARG, &get_r13, 1 }, + { "get-r14", NO_ARG, &get_r14, 1 }, + { "get-r15", NO_ARG, &get_r15, 1 }, + { "get-rflags", NO_ARG, &get_rflags, 1 }, + { "get-cs", NO_ARG, &get_cs, 1 }, + { "get-ds", NO_ARG, &get_ds, 1 }, + { "get-es", NO_ARG, &get_es, 1 }, + { "get-fs", NO_ARG, &get_fs, 1 }, + { "get-gs", NO_ARG, &get_gs, 1 }, + { "get-ss", NO_ARG, &get_ss, 1 }, + { "get-tr", NO_ARG, &get_tr, 1 }, + { "get-ldtr", NO_ARG, &get_ldtr, 1 }, + { "get-vmcs-pinbased-ctls", + NO_ARG, &get_pinbased_ctls, 1 }, + { "get-vmcs-procbased-ctls", + NO_ARG, &get_procbased_ctls, 1 }, + { "get-vmcs-procbased-ctls2", + NO_ARG, &get_procbased_ctls2, 1 }, + { "get-vmcs-guest-linear-address", + NO_ARG, &get_vmcs_gla, 1 }, + { "get-vmcs-guest-physical-address", + NO_ARG, &get_vmcs_gpa, 1 }, + { "get-vmcs-entry-interruption-info", + NO_ARG, &get_vmcs_entry_interruption_info, 1}, + { "get-vmcs-eptp", NO_ARG, &get_eptp, 1 }, + { "get-vmcs-exception-bitmap", + NO_ARG, &get_exception_bitmap, 1 }, + { "get-vmcs-io-bitmap-address", + NO_ARG, &get_io_bitmap, 1 }, + { "get-vmcs-tsc-offset", NO_ARG,&get_tsc_offset, 1 }, + { "get-vmcs-cr0-mask", NO_ARG, &get_cr0_mask, 1 }, + { "get-vmcs-cr0-shadow", NO_ARG,&get_cr0_shadow, 1 }, + { "get-vmcs-cr4-mask", NO_ARG, &get_cr4_mask, 1 }, + { "get-vmcs-cr4-shadow", NO_ARG,&get_cr4_shadow, 1 }, + { "get-vmcs-cr3-targets", NO_ARG, &get_cr3_targets, 1}, + { "get-vmcs-apic-access-address", + NO_ARG, &get_apic_access_addr, 1}, + { "get-vmcs-virtual-apic-address", + NO_ARG, &get_virtual_apic_addr, 1}, + { "get-vmcs-tpr-threshold", + NO_ARG, &get_tpr_threshold, 1 }, + { "get-vmcs-msr-bitmap", + NO_ARG, &get_msr_bitmap, 1 }, + { "get-vmcs-msr-bitmap-address", + NO_ARG, &get_msr_bitmap_address, 1 }, + { "get-vmcs-vpid", NO_ARG, &get_vpid, 1 }, + { "get-vmcs-ple-gap", NO_ARG, &get_ple_gap, 1 }, + { "get-vmcs-ple-window", NO_ARG,&get_ple_window,1 }, + { "get-vmcs-instruction-error", + NO_ARG, &get_inst_err, 1 }, + { "get-vmcs-exit-ctls", NO_ARG, &get_exit_ctls, 1 }, + { "get-vmcs-entry-ctls", + NO_ARG, &get_entry_ctls, 1 }, + { "get-vmcs-guest-pat", NO_ARG, &get_guest_pat, 1 }, + { "get-vmcs-host-pat", NO_ARG, &get_host_pat, 1 }, + { "get-vmcs-host-cr0", + NO_ARG, &get_host_cr0, 1 }, + { "get-vmcs-host-cr3", + NO_ARG, &get_host_cr3, 1 }, + { "get-vmcs-host-cr4", + NO_ARG, &get_host_cr4, 1 }, + { "get-vmcs-host-rip", + NO_ARG, &get_host_rip, 1 }, + { "get-vmcs-host-rsp", + NO_ARG, &get_host_rsp, 1 }, + { "get-vmcs-guest-sysenter", + NO_ARG, &get_guest_sysenter, 1 }, + { "get-vmcs-link", NO_ARG, &get_vmcs_link, 1 }, + { "get-vmcs-exit-reason", + NO_ARG, &get_vmcs_exit_reason, 1 }, + { "get-vmcs-exit-qualification", + NO_ARG, &get_vmcs_exit_qualification, 1 }, + { "get-vmcs-exit-interruption-info", + NO_ARG, &get_vmcs_exit_interruption_info, 1}, + { "get-vmcs-exit-interruption-error", + NO_ARG, &get_vmcs_exit_interruption_error, 1}, + { "get-vmcs-interruptibility", + NO_ARG, &get_vmcs_interruptibility, 1 }, + { "get-pinning",NO_ARG, &get_pinning, 1 }, + { "run", NO_ARG, &run, 1 }, + { "create", NO_ARG, &create, 1 }, + { "destroy", NO_ARG, &destroy, 1 }, + { NULL, 0, NULL, 0 } + }; + + vcpu = 0; + progname = basename(argv[0]); + + while ((ch = getopt_long(argc, argv, "", opts, NULL)) != -1) { + switch (ch) { + case 0: + break; + case VMNAME: + vmname = optarg; + break; + case VCPU: + vcpu = atoi(optarg); + break; + case SET_LOWMEM: + lowmem = atoi(optarg) * MB; + lowmem = roundup(lowmem, 2 * MB); + break; + case SET_HIGHMEM: + highmem = atoi(optarg) * MB; + highmem = roundup(highmem, 2 * MB); + break; + case SET_EFER: + efer = strtoul(optarg, NULL, 0); + set_efer = 1; + break; + case SET_CR0: + cr0 = strtoul(optarg, NULL, 0); + set_cr0 = 1; + break; + case SET_CR3: + cr3 = strtoul(optarg, NULL, 0); + set_cr3 = 1; + break; + case SET_CR4: + cr4 = strtoul(optarg, NULL, 0); + set_cr4 = 1; + break; + case SET_DR7: + dr7 = strtoul(optarg, NULL, 0); + set_dr7 = 1; + break; + case SET_RSP: + rsp = strtoul(optarg, NULL, 0); + set_rsp = 1; + break; + case SET_RIP: + rip = strtoul(optarg, NULL, 0); + set_rip = 1; + break; + case SET_RAX: + rax = strtoul(optarg, NULL, 0); + set_rax = 1; + break; + case SET_RFLAGS: + rflags = strtoul(optarg, NULL, 0); + set_rflags = 1; + break; + case DESC_BASE: + desc_base = strtoul(optarg, NULL, 0); + break; + case DESC_LIMIT: + desc_limit = strtoul(optarg, NULL, 0); + break; + case DESC_ACCESS: + desc_access = strtoul(optarg, NULL, 0); + break; + case SET_CS: + cs = strtoul(optarg, NULL, 0); + set_cs = 1; + break; + case SET_DS: + ds = strtoul(optarg, NULL, 0); + set_ds = 1; + break; + case SET_ES: + es = strtoul(optarg, NULL, 0); + set_es = 1; + break; + case SET_FS: + fs = strtoul(optarg, NULL, 0); + set_fs = 1; + break; + case SET_GS: + gs = strtoul(optarg, NULL, 0); + set_gs = 1; + break; + case SET_SS: + ss = strtoul(optarg, NULL, 0); + set_ss = 1; + break; + case SET_TR: + tr = strtoul(optarg, NULL, 0); + set_tr = 1; + break; + case SET_LDTR: + ldtr = strtoul(optarg, NULL, 0); + set_ldtr = 1; + break; + case SET_PINNING: + pincpu = strtol(optarg, NULL, 0); + set_pinning = 1; + break; + case SET_VMCS_EXCEPTION_BITMAP: + exception_bitmap = strtoul(optarg, NULL, 0); + set_exception_bitmap = 1; + break; + case SET_VMCS_ENTRY_INTERRUPTION_INFO: + vmcs_entry_interruption_info = strtoul(optarg, NULL, 0); + set_vmcs_entry_interruption_info = 1; + break; + case SET_CAP: + capval = strtoul(optarg, NULL, 0); + setcap = 1; + break; + case CAPNAME: + capname = optarg; + break; + default: + usage(); + } + } + argc -= optind; + argv += optind; + + if (vmname == NULL) + usage(); + + error = 0; + + if (!error && create) + error = vm_create(vmname); + + if (!error) { + ctx = vm_open(vmname); + if (ctx == NULL) + error = -1; + } + + if (!error && lowmem) + error = vm_setup_memory(ctx, 0, lowmem, NULL); + + if (!error && highmem) + error = vm_setup_memory(ctx, 4 * GB, highmem, NULL); + + if (!error && set_efer) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_EFER, efer); + + if (!error && set_cr0) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR0, cr0); + + if (!error && set_cr3) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR3, cr3); + + if (!error && set_cr4) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR4, cr4); + + if (!error && set_dr7) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_DR7, dr7); + + if (!error && set_rsp) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RSP, rsp); + + if (!error && set_rip) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, rip); + + if (!error && set_rax) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, rax); + + if (!error && set_rflags) { + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RFLAGS, + rflags); + } + + if (!error && set_desc_ds) { + error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_DS, + desc_base, desc_limit, desc_access); + } + + if (!error && set_desc_es) { + error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_ES, + desc_base, desc_limit, desc_access); + } + + if (!error && set_desc_ss) { + error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_SS, + desc_base, desc_limit, desc_access); + } + + if (!error && set_desc_cs) { + error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_CS, + desc_base, desc_limit, desc_access); + } + + if (!error && set_desc_fs) { + error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_FS, + desc_base, desc_limit, desc_access); + } + + if (!error && set_desc_gs) { + error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_GS, + desc_base, desc_limit, desc_access); + } + + if (!error && set_desc_tr) { + error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_TR, + desc_base, desc_limit, desc_access); + } + + if (!error && set_desc_ldtr) { + error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_LDTR, + desc_base, desc_limit, desc_access); + } + + if (!error && set_desc_gdtr) { + error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_GDTR, + desc_base, desc_limit, 0); + } + + if (!error && set_desc_idtr) { + error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_IDTR, + desc_base, desc_limit, 0); + } + + if (!error && set_cs) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CS, cs); + + if (!error && set_ds) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_DS, ds); + + if (!error && set_es) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_ES, es); + + if (!error && set_fs) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_FS, fs); + + if (!error && set_gs) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_GS, gs); + + if (!error && set_ss) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_SS, ss); + + if (!error && set_tr) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_TR, tr); + + if (!error && set_ldtr) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_LDTR, ldtr); + + if (!error && set_pinning) + error = vm_set_pinning(ctx, vcpu, pincpu); + + if (!error && set_exception_bitmap) { + error = vm_set_vmcs_field(ctx, vcpu, VMCS_EXCEPTION_BITMAP, + exception_bitmap); + } + + if (!error && set_vmcs_entry_interruption_info) { + error = vm_set_vmcs_field(ctx, vcpu, VMCS_ENTRY_INTR_INFO, + vmcs_entry_interruption_info); + } + + if (!error && get_lowmem) { + error = vm_get_memory_seg(ctx, 0, &hpa, &len); + if (error == 0) + printf("lowmem\t\t0x%016lx/%ld\n", hpa, len); + } + + if (!error && get_highmem) { + error = vm_get_memory_seg(ctx, 4 * GB, &hpa, &len); + if (error == 0) + printf("highmem\t\t0x%016lx/%ld\n", hpa, len); + } + + if (!error && get_efer) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_EFER, &efer); + if (error == 0) + printf("efer[%d]\t\t0x%016lx\n", vcpu, efer); + } + + if (!error && get_cr0) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR0, &cr0); + if (error == 0) + printf("cr0[%d]\t\t0x%016lx\n", vcpu, cr0); + } + + if (!error && get_cr3) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR3, &cr3); + if (error == 0) + printf("cr3[%d]\t\t0x%016lx\n", vcpu, cr3); + } + + if (!error && get_cr4) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR4, &cr4); + if (error == 0) + printf("cr4[%d]\t\t0x%016lx\n", vcpu, cr4); + } + + if (!error && get_dr7) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DR7, &dr7); + if (error == 0) + printf("dr7[%d]\t\t0x%016lx\n", vcpu, dr7); + } + + if (!error && get_rsp) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RSP, &rsp); + if (error == 0) + printf("rsp[%d]\t\t0x%016lx\n", vcpu, rsp); + } + + if (!error && get_rip) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip); + if (error == 0) + printf("rip[%d]\t\t0x%016lx\n", vcpu, rip); + } + + if (!error && get_rax) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RAX, &rax); + if (error == 0) + printf("rax[%d]\t\t0x%016lx\n", vcpu, rax); + } + + if (!error && get_rbx) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RBX, &rbx); + if (error == 0) + printf("rbx[%d]\t\t0x%016lx\n", vcpu, rbx); + } + + if (!error && get_rcx) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RCX, &rcx); + if (error == 0) + printf("rcx[%d]\t\t0x%016lx\n", vcpu, rcx); + } + + if (!error && get_rdx) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RDX, &rdx); + if (error == 0) + printf("rdx[%d]\t\t0x%016lx\n", vcpu, rdx); + } + + if (!error && get_rsi) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RSI, &rsi); + if (error == 0) + printf("rsi[%d]\t\t0x%016lx\n", vcpu, rsi); + } + + if (!error && get_rdi) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RDI, &rdi); + if (error == 0) + printf("rdi[%d]\t\t0x%016lx\n", vcpu, rdi); + } + + if (!error && get_rbp) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RBP, &rbp); + if (error == 0) + printf("rbp[%d]\t\t0x%016lx\n", vcpu, rbp); + } + + if (!error && get_r8) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R8, &r8); + if (error == 0) + printf("r8[%d]\t\t0x%016lx\n", vcpu, r8); + } + + if (!error && get_r9) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R9, &r9); + if (error == 0) + printf("r9[%d]\t\t0x%016lx\n", vcpu, r9); + } + + if (!error && get_r10) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R10, &r10); + if (error == 0) + printf("r10[%d]\t\t0x%016lx\n", vcpu, r10); + } + + if (!error && get_r11) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R11, &r11); + if (error == 0) + printf("r11[%d]\t\t0x%016lx\n", vcpu, r11); + } + + if (!error && get_r12) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R12, &r12); + if (error == 0) + printf("r12[%d]\t\t0x%016lx\n", vcpu, r12); + } + + if (!error && get_r13) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R13, &r13); + if (error == 0) + printf("r13[%d]\t\t0x%016lx\n", vcpu, r13); + } + + if (!error && get_r14) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R14, &r14); + if (error == 0) + printf("r14[%d]\t\t0x%016lx\n", vcpu, r14); + } + + if (!error && get_r15) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R15, &r15); + if (error == 0) + printf("r15[%d]\t\t0x%016lx\n", vcpu, r15); + } + + if (!error && get_rflags) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RFLAGS, + &rflags); + if (error == 0) + printf("rflags[%d]\t0x%016lx\n", vcpu, rflags); + } + + if (!error && get_stats) { + int i, num_stats; + uint64_t *stats; + struct timeval tv; + const char *desc; + + stats = vm_get_stats(ctx, vcpu, &tv, &num_stats); + if (stats != NULL) { + printf("vcpu%d\n", vcpu); + for (i = 0; i < num_stats; i++) { + desc = vm_get_stat_desc(ctx, i); + printf("%-32s\t%ld\n", desc, stats[i]); + } + } + } + + if (!error && get_desc_ds) { + error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_DS, + &desc_base, &desc_limit, &desc_access); + if (error == 0) { + printf("ds desc[%d]\t0x%016lx/0x%08x/0x%08x\n", + vcpu, desc_base, desc_limit, desc_access); + } + } + + if (!error && get_desc_es) { + error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_ES, + &desc_base, &desc_limit, &desc_access); + if (error == 0) { + printf("es desc[%d]\t0x%016lx/0x%08x/0x%08x\n", + vcpu, desc_base, desc_limit, desc_access); + } + } + + if (!error && get_desc_fs) { + error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_FS, + &desc_base, &desc_limit, &desc_access); + if (error == 0) { + printf("fs desc[%d]\t0x%016lx/0x%08x/0x%08x\n", + vcpu, desc_base, desc_limit, desc_access); + } + } + + if (!error && get_desc_gs) { + error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_GS, + &desc_base, &desc_limit, &desc_access); + if (error == 0) { + printf("gs desc[%d]\t0x%016lx/0x%08x/0x%08x\n", + vcpu, desc_base, desc_limit, desc_access); + } + } + + if (!error && get_desc_ss) { + error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_SS, + &desc_base, &desc_limit, &desc_access); + if (error == 0) { + printf("ss desc[%d]\t0x%016lx/0x%08x/0x%08x\n", + vcpu, desc_base, desc_limit, desc_access); + } + } + + if (!error && get_desc_cs) { + error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_CS, + &desc_base, &desc_limit, &desc_access); + if (error == 0) { + printf("cs desc[%d]\t0x%016lx/0x%08x/0x%08x\n", + vcpu, desc_base, desc_limit, desc_access); + } + } + + if (!error && get_desc_tr) { + error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_TR, + &desc_base, &desc_limit, &desc_access); + if (error == 0) { + printf("tr desc[%d]\t0x%016lx/0x%08x/0x%08x\n", + vcpu, desc_base, desc_limit, desc_access); + } + } + + if (!error && get_desc_ldtr) { + error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_LDTR, + &desc_base, &desc_limit, &desc_access); + if (error == 0) { + printf("ldtr desc[%d]\t0x%016lx/0x%08x/0x%08x\n", + vcpu, desc_base, desc_limit, desc_access); + } + } + + if (!error && get_desc_gdtr) { + error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_GDTR, + &desc_base, &desc_limit, &desc_access); + if (error == 0) { + printf("gdtr[%d]\t\t0x%016lx/0x%08x\n", + vcpu, desc_base, desc_limit); + } + } + + if (!error && get_desc_idtr) { + error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_IDTR, + &desc_base, &desc_limit, &desc_access); + if (error == 0) { + printf("idtr[%d]\t\t0x%016lx/0x%08x\n", + vcpu, desc_base, desc_limit); + } + } + + if (!error && get_cs) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CS, &cs); + if (error == 0) + printf("cs[%d]\t\t0x%04lx\n", vcpu, cs); + } + + if (!error && get_ds) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DS, &ds); + if (error == 0) + printf("ds[%d]\t\t0x%04lx\n", vcpu, ds); + } + + if (!error && get_es) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_ES, &es); + if (error == 0) + printf("es[%d]\t\t0x%04lx\n", vcpu, es); + } + + if (!error && get_fs) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_FS, &fs); + if (error == 0) + printf("fs[%d]\t\t0x%04lx\n", vcpu, fs); + } + + if (!error && get_gs) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_GS, &gs); + if (error == 0) + printf("gs[%d]\t\t0x%04lx\n", vcpu, gs); + } + + if (!error && get_ss) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_SS, &ss); + if (error == 0) + printf("ss[%d]\t\t0x%04lx\n", vcpu, ss); + } + + if (!error && get_tr) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_TR, &tr); + if (error == 0) + printf("tr[%d]\t\t0x%04lx\n", vcpu, tr); + } + + if (!error && get_ldtr) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_LDTR, &ldtr); + if (error == 0) + printf("ldtr[%d]\t\t0x%04lx\n", vcpu, ldtr); + } + + if (!error && get_pinning) { + error = vm_get_pinning(ctx, vcpu, &pincpu); + if (error == 0) { + if (pincpu < 0) + printf("pincpu[%d]\tunpinned\n", vcpu); + else + printf("pincpu[%d]\t%d\n", vcpu, pincpu); + } + } + + if (!error && get_pinbased_ctls) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_PIN_BASED_CTLS, &ctl); + if (error == 0) + printf("pinbased_ctls[%d]\t0x%08x\n", vcpu, ctl); + } + + if (!error && get_procbased_ctls) { + error = vm_get_vmcs_field(ctx, vcpu, + VMCS_PRI_PROC_BASED_CTLS, &ctl); + if (error == 0) + printf("procbased_ctls[%d]\t0x%08x\n", vcpu, ctl); + } + + if (!error && get_procbased_ctls2) { + error = vm_get_vmcs_field(ctx, vcpu, + VMCS_SEC_PROC_BASED_CTLS, &ctl); + if (error == 0) + printf("procbased_ctls2[%d]\t0x%08x\n", vcpu, ctl); + } + + if (!error && get_vmcs_gla) { + error = vm_get_vmcs_field(ctx, vcpu, + VMCS_GUEST_LINEAR_ADDRESS, &u64); + if (error == 0) + printf("gla[%d]\t\t0x%016lx\n", vcpu, u64); + } + + if (!error && get_vmcs_gpa) { + error = vm_get_vmcs_field(ctx, vcpu, + VMCS_GUEST_PHYSICAL_ADDRESS, &u64); + if (error == 0) + printf("gpa[%d]\t\t0x%016lx\n", vcpu, u64); + } + + if (!error && get_vmcs_entry_interruption_info) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_ENTRY_INTR_INFO,&u64); + if (error == 0) { + printf("entry_interruption_info[%d]\t0x%08x\n", + vcpu, u64); + } + } + + if (!error && get_eptp) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_EPTP, &eptp); + if (error == 0) + printf("eptp[%d]\t\t0x%016lx\n", vcpu, eptp); + } + + if (!error && get_exception_bitmap) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXCEPTION_BITMAP, + &bm); + if (error == 0) + printf("exception_bitmap[%d]\t0x%08x\n", vcpu, bm); + } + + if (!error && get_io_bitmap) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_IO_BITMAP_A, &bm); + if (error == 0) + printf("io_bitmap_a[%d]\t0x%08x\n", vcpu, bm); + error = vm_get_vmcs_field(ctx, vcpu, VMCS_IO_BITMAP_B, &bm); + if (error == 0) + printf("io_bitmap_b[%d]\t0x%08x\n", vcpu, bm); + } + + if (!error && get_tsc_offset) { + uint64_t tscoff; + error = vm_get_vmcs_field(ctx, vcpu, VMCS_TSC_OFFSET, &tscoff); + if (error == 0) + printf("tsc_offset[%d]\t0x%016lx\n", tscoff); + } + + if (!error && get_cr0_mask) { + uint64_t cr0mask; + error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR0_MASK, &cr0mask); + if (error == 0) + printf("cr0_mask[%d]\t\t0x%016lx\n", cr0mask); + } + + if (!error && get_cr0_shadow) { + uint64_t cr0shadow; + error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR0_SHADOW, + &cr0shadow); + if (error == 0) + printf("cr0_shadow[%d]\t\t0x%016lx\n", cr0shadow); + } + + if (!error && get_cr4_mask) { + uint64_t cr4mask; + error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR4_MASK, &cr4mask); + if (error == 0) + printf("cr4_mask[%d]\t\t0x%016lx\n", cr4mask); + } + + if (!error && get_cr4_shadow) { + uint64_t cr4shadow; + error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR4_SHADOW, + &cr4shadow); + if (error == 0) + printf("cr4_shadow[%d]\t\t0x%016lx\n", cr4shadow); + } + + if (!error && get_cr3_targets) { + uint64_t target_count, target_addr; + error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET_COUNT, + &target_count); + if (error == 0) { + printf("cr3_target_count[%d]\t0x%08x\n", + vcpu, target_count); + } + + error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET0, + &target_addr); + if (error == 0) { + printf("cr3_target0[%d]\t\t0x%016lx\n", + vcpu, target_addr); + } + + error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET1, + &target_addr); + if (error == 0) { + printf("cr3_target1[%d]\t\t0x%016lx\n", + vcpu, target_addr); + } + + error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET2, + &target_addr); + if (error == 0) { + printf("cr3_target2[%d]\t\t0x%016lx\n", + vcpu, target_addr); + } + + error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET3, + &target_addr); + if (error == 0) { + printf("cr3_target3[%d]\t\t0x%016lx\n", + vcpu, target_addr); + } + } + + if (!error && get_apic_access_addr) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_APIC_ACCESS, &addr); + if (error == 0) + printf("apic_access_addr[%d]\t0x%016lx\n", vcpu, addr); + } + + if (!error && get_virtual_apic_addr) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_VIRTUAL_APIC, &addr); + if (error == 0) + printf("virtual_apic_addr[%d]\t0x%016lx\n", vcpu, addr); + } + + if (!error && get_tpr_threshold) { + uint64_t threshold; + error = vm_get_vmcs_field(ctx, vcpu, VMCS_TPR_THRESHOLD, + &threshold); + if (error == 0) + printf("tpr_threshold[%d]\t0x%08x\n", vcpu, threshold); + } + + if (!error && get_msr_bitmap_address) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_MSR_BITMAP, &addr); + if (error == 0) + printf("msr_bitmap[%d]\t\t0x%016lx\n", vcpu, addr); + } + + if (!error && get_msr_bitmap) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_MSR_BITMAP, &addr); + if (error == 0) + error = dump_vmcs_msr_bitmap(vcpu, addr); + } + + if (!error && get_vpid) { + uint64_t vpid; + error = vm_get_vmcs_field(ctx, vcpu, VMCS_VPID, &vpid); + if (error == 0) + printf("vpid[%d]\t\t0x%04x\n", vcpu, vpid); + } + + if (!error && get_ple_window) { + uint64_t window; + error = vm_get_vmcs_field(ctx, vcpu, VMCS_PLE_WINDOW, &window); + if (error == 0) + printf("ple_window[%d]\t\t0x%08x\n", vcpu, window); + } + + if (!error && get_ple_gap) { + uint64_t gap; + error = vm_get_vmcs_field(ctx, vcpu, VMCS_PLE_GAP, &gap); + if (error == 0) + printf("ple_gap[%d]\t\t0x%08x\n", vcpu, gap); + } + + if (!error && get_inst_err) { + uint64_t insterr; + error = vm_get_vmcs_field(ctx, vcpu, VMCS_INSTRUCTION_ERROR, + &insterr); + if (error == 0) { + printf("instruction_error[%d]\t0x%08x\n", + vcpu, insterr); + } + } + + if (!error && get_exit_ctls) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_CTLS, &ctl); + if (error == 0) + printf("exit_ctls[%d]\t\t0x%08x\n", vcpu, ctl); + } + + if (!error && get_entry_ctls) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_ENTRY_CTLS, &ctl); + if (error == 0) + printf("entry_ctls[%d]\t\t0x%08x\n", vcpu, ctl); + } + + if (!error && get_host_pat) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_IA32_PAT, &pat); + if (error == 0) + printf("host_pat[%d]\t\t0x%016lx\n", vcpu, pat); + } + + if (!error && get_guest_pat) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_IA32_PAT, &pat); + if (error == 0) + printf("guest_pat[%d]\t\t0x%016lx\n", vcpu, pat); + } + + if (!error && get_host_cr0) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR0, &cr0); + if (error == 0) + printf("host_cr0[%d]\t\t0x%016lx\n", vcpu, cr0); + } + + if (!error && get_host_cr3) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR3, &cr3); + if (error == 0) + printf("host_cr3[%d]\t\t0x%016lx\n", vcpu, cr3); + } + + if (!error && get_host_cr4) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR4, &cr4); + if (error == 0) + printf("host_cr4[%d]\t\t0x%016lx\n", vcpu, cr4); + } + + if (!error && get_host_rip) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_RIP, &rip); + if (error == 0) + printf("host_rip[%d]\t\t0x%016lx\n", vcpu, rip); + } + + if (!error && get_host_rsp) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_RSP, &rsp); + if (error == 0) + printf("host_rip[%d]\t\t0x%016lx\n", vcpu, rsp); + } + + if (!error && get_guest_sysenter) { + error = vm_get_vmcs_field(ctx, vcpu, + VMCS_GUEST_IA32_SYSENTER_CS, &cs); + if (error == 0) + printf("guest_sysenter_cs[%d]\t0x%08x\n", vcpu, cs); + + error = vm_get_vmcs_field(ctx, vcpu, + VMCS_GUEST_IA32_SYSENTER_ESP, &rsp); + if (error == 0) + printf("guest_sysenter_sp[%d]\t0x%016lx\n", vcpu, rsp); + error = vm_get_vmcs_field(ctx, vcpu, + VMCS_GUEST_IA32_SYSENTER_EIP, &rip); + if (error == 0) + printf("guest_sysenter_ip[%d]\t0x%016lx\n", vcpu, rip); + } + + if (!error && get_vmcs_link) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_LINK_POINTER, &addr); + if (error == 0) + printf("vmcs_pointer[%d]\t0x%016lx\n", vcpu, addr); + } + + if (!error && get_vmcs_exit_reason) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_REASON, &u64); + if (error == 0) + printf("vmcs_exit_reason[%d]\t0x%016lx\n", vcpu, u64); + } + + if (!error && get_vmcs_exit_qualification) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_QUALIFICATION, + &u64); + if (error == 0) + printf("vmcs_exit_qualification[%d]\t0x%016lx\n", + vcpu, u64); + } + + if (!error && get_vmcs_exit_interruption_info) { + error = vm_get_vmcs_field(ctx, vcpu, + VMCS_EXIT_INTERRUPTION_INFO, &u64); + if (error == 0) { + printf("vmcs_exit_interruption_info[%d]\t0x%08x\n", + vcpu, u64); + } + } + + if (!error && get_vmcs_exit_interruption_error) { + error = vm_get_vmcs_field(ctx, vcpu, + VMCS_EXIT_INTERRUPTION_ERROR, &u64); + if (error == 0) { + printf("vmcs_exit_interruption_error[%d]\t0x%08x\n", + vcpu, u64); + } + } + + if (!error && get_vmcs_interruptibility) { + error = vm_get_vmcs_field(ctx, vcpu, + VMCS_GUEST_INTERRUPTIBILITY, &u64); + if (error == 0) { + printf("vmcs_guest_interruptibility[%d]\t0x%08x\n", + vcpu, u64); + } + } + + if (!error && setcap) { + int captype; + captype = vm_capability_name2type(capname); + error = vm_set_capability(ctx, vcpu, captype, capval); + if (error != 0 && errno == ENOENT) + printf("Capability \"%s\" is not available\n", capname); + } + + if (!error && getcap) { + int captype, val; + captype = vm_capability_name2type(capname); + error = vm_get_capability(ctx, vcpu, captype, &val); + if (error == 0) { + printf("Capability \"%s\" is %s on vcpu %d\n", capname, + val ? "set" : "not set", vcpu); + } else if (errno == ENOENT) { + printf("Capability \"%s\" is not available\n", capname); + } + } + + if (!error && run) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip); + assert(error == 0); + + error = vm_run(ctx, vcpu, rip, &vmexit); + if (error == 0) + dump_vm_run_exitcode(&vmexit, vcpu); + else + printf("vm_run error %d\n", error); + } + + if (error) + printf("errno = %d\n", errno); + + if (!error && destroy) + vm_destroy(ctx); + + exit(error); +} -- cgit v1.1 From 5a44aef8a38d88c0d5b265a72c5d90ef2d73cf95 Mon Sep 17 00:00:00 2001 From: grehan Date: Thu, 19 May 2011 21:53:25 +0000 Subject: Changes to allow the GENERIC+bhye kernel built from this branch to run as a 1/2 CPU guest on an 8.1 bhyve host. bhyve/inout.c inout.h fbsdrun.c - Rather than exiting on accesses to unhandled i/o ports, emulate hardware by returning -1 on reads and ignoring writes to unhandled ports. Support the previous mode by allowing a 'strict' parameter to be set from the command line. The 8.1 guest kernel was vastly cut down from GENERIC and had no ISA devices. Booting GENERIC exposes a massive amount of random touching of i/o ports (hello syscons/vga/atkbdc). bhyve/consport.c dev/bvm/bvm_console.c - implement a simplistic signature for the bvm console by returning 'bv' for an inw on the port. Also, set the priority of the console to CN_REMOTE if the signature was returned. This works better in an environment where multiple consoles are in the kernel (hello syscons) bhyve/rtc.c - return 0 for the access to RTC_EQUIPMENT (yes, you syscons) amd64/vmm/x86.c x86.h - hide a bunch more CPUID leaf 1 bits from the guest to prevent cpufreq drivers from probing. The next step will be to move CPUID handling completely into user-space. This will allow the full spectrum of changes from presenting a lowest-common-denominator CPU type/feature set, to exposing (almost) everything that the host can support. Reviewed by: neel Obtained from: NetApp --- usr.sbin/bhyve/consport.c | 6 ++++++ usr.sbin/bhyve/fbsdrun.c | 12 +++++++++--- usr.sbin/bhyve/inout.c | 41 +++++++++++++++++++++++++++++++++++++++-- usr.sbin/bhyve/inout.h | 2 +- usr.sbin/bhyve/rtc.c | 6 ++++++ 5 files changed, 61 insertions(+), 6 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/consport.c b/usr.sbin/bhyve/consport.c index 34f94a6..8e2156a 100644 --- a/usr.sbin/bhyve/consport.c +++ b/usr.sbin/bhyve/consport.c @@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$"); #include "inout.h" #define BVM_CONSOLE_PORT 0x220 +#define BVM_CONS_SIG ('b' << 8 | 'v') static struct termios tio_orig, tio_new; @@ -103,6 +104,11 @@ console_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, { static int opened; + if (bytes == 2 && in) { + *eax = BVM_CONS_SIG; + return (0); + } + if (bytes != 4) return (-1); diff --git a/usr.sbin/bhyve/fbsdrun.c b/usr.sbin/bhyve/fbsdrun.c index 6234958..6f009b5 100644 --- a/usr.sbin/bhyve/fbsdrun.c +++ b/usr.sbin/bhyve/fbsdrun.c @@ -85,6 +85,8 @@ static int guest_vmexit_on_hlt, guest_vmexit_on_pause; static int foundcpus; +static int strictio; + static char *lomem_addr; static char *himem_addr; @@ -122,7 +124,7 @@ usage(int code) { fprintf(stderr, - "Usage: %s [-hBHP][-g ][-z ][-s ][-p pincpu]" + "Usage: %s [-ehBHP][-g ][-z ][-s ][-p pincpu]" "[-n ][-m lowmem][-M highmem] \n" " -g: gdb port (default is %d and 0 means don't open)\n" " -c: # cpus (default 1)\n" @@ -130,6 +132,7 @@ usage(int code) " -B: inject breakpoint exception on vm entry\n" " -H: vmexit from the guest on hlt\n" " -P: vmexit from the guest on pause\n" + " -e: exit on unhandled i/o access\n" " -h: help\n" " -z: guest hz (default is %d)\n" " -s: PCI slot config\n" @@ -300,7 +303,7 @@ vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) if (out && port == GUEST_NIO_PORT) return (vmexit_handle_notify(ctx, vme, pvcpu, eax)); - error = emulate_inout(ctx, vcpu, in, port, bytes, &eax); + error = emulate_inout(ctx, vcpu, in, port, bytes, &eax, strictio); if (error == 0 && in) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, eax); @@ -510,7 +513,7 @@ main(int argc, char *argv[]) gdb_port = DEFAULT_GDB_PORT; guest_ncpus = 1; - while ((c = getopt(argc, argv, "hBHPxp:g:c:z:s:n:m:M:")) != -1) { + while ((c = getopt(argc, argv, "ehBHPxp:g:c:z:s:n:m:M:")) != -1) { switch (c) { case 'B': inject_bkpt = 1; @@ -551,6 +554,9 @@ main(int argc, char *argv[]) case 'P': guest_vmexit_on_pause = 1; break; + case 'e': + strictio = 1; + break; case 'h': usage(0); default: diff --git a/usr.sbin/bhyve/inout.c b/usr.sbin/bhyve/inout.c index 84445b1..bf9c5f7 100644 --- a/usr.sbin/bhyve/inout.c +++ b/usr.sbin/bhyve/inout.c @@ -48,9 +48,30 @@ static struct { void *arg; } inout_handlers[MAX_IOPORTS]; +static int +default_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes, + uint32_t *eax, void *arg) +{ + if (in) { + switch (bytes) { + case 4: + *eax = 0xffffffff; + break; + case 2: + *eax = 0xffff; + break; + case 1: + *eax = 0xff; + break; + } + } + + return (0); +} + int emulate_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes, - uint32_t *eax) + uint32_t *eax, int strict) { int flags; inout_func_t handler; @@ -58,7 +79,9 @@ emulate_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes, assert(port < MAX_IOPORTS); - if ((handler = inout_handlers[port].handler) == NULL) + handler = inout_handlers[port].handler; + + if (strict && handler == default_inout) return (-1); flags = inout_handlers[port].flags; @@ -74,7 +97,21 @@ void init_inout(void) { struct inout_port **iopp, *iop; + int i; + + /* + * Set up the default handler for all ports + */ + for (i = 0; i < MAX_IOPORTS; i++) { + inout_handlers[i].name = "default"; + inout_handlers[i].flags = IOPORT_F_IN | IOPORT_F_OUT; + inout_handlers[i].handler = default_inout; + inout_handlers[i].arg = NULL; + } + /* + * Overwrite with specified handlers + */ SET_FOREACH(iopp, inout_port_set) { iop = *iopp; assert(iop->port < MAX_IOPORTS); diff --git a/usr.sbin/bhyve/inout.h b/usr.sbin/bhyve/inout.h index b15011d..f6a8db2 100644 --- a/usr.sbin/bhyve/inout.h +++ b/usr.sbin/bhyve/inout.h @@ -59,7 +59,7 @@ struct inout_port { void init_inout(void); int emulate_inout(struct vmctx *, int vcpu, int in, int port, int bytes, - uint32_t *eax); + uint32_t *eax, int strict); int register_inout(struct inout_port *iop); #endif /* _INOUT_H_ */ diff --git a/usr.sbin/bhyve/rtc.c b/usr.sbin/bhyve/rtc.c index a6f44e0..f8b894e 100644 --- a/usr.sbin/bhyve/rtc.c +++ b/usr.sbin/bhyve/rtc.c @@ -68,6 +68,8 @@ __FBSDID("$FreeBSD$"); #define RTC_RSTCODE 0x0f +#define RTC_EQUIPMENT 0x14 + static int addr; /* XXX initialize these to default values as they would be from BIOS */ @@ -136,6 +138,7 @@ rtc_addr_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, case RTC_STATUSD: case RTC_DIAG: case RTC_RSTCODE: + case RTC_EQUIPMENT: break; default: return (-1); @@ -228,6 +231,9 @@ rtc_data_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, case RTC_RSTCODE: *eax = rstcode; return (0); + case RTC_EQUIPMENT: + *eax = 0; + return (0); default: return (-1); } -- cgit v1.1 From 949e126edfdb544c4b351ec5726c8dc3ff848dda Mon Sep 17 00:00:00 2001 From: grehan Date: Tue, 24 May 2011 01:08:53 +0000 Subject: Catch up with CURRENTs different timer usage compared to 8.1. A counter value of 0 in rategen mode is equivalent to a max initial value. The TSC is now correctly calibrated on a 9.0 guest. Obtained from: NetApp --- usr.sbin/bhyve/pit_8254.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/pit_8254.c b/usr.sbin/bhyve/pit_8254.c index b510161..d9b6d55 100644 --- a/usr.sbin/bhyve/pit_8254.c +++ b/usr.sbin/bhyve/pit_8254.c @@ -183,6 +183,8 @@ pit_8254_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, if (c->crbyte == 2) { c->crbyte = 0; c->initial = c->cr[0] | (uint16_t)c->cr[1] << 8; + if (c->initial == 0) + c->initial = 0xffff; gettimeofday(&c->tv, NULL); } } -- cgit v1.1 From 3c35264f695e0a1f8a04dbcca1c93bb5159b2274 Mon Sep 17 00:00:00 2001 From: grehan Date: Tue, 7 Jun 2011 18:35:45 +0000 Subject: Allow access to the device's config area with any size i/o access at any offset. This is now spec-compliant. --- usr.sbin/bhyve/pci_virtio_block.c | 12 ++++++++++-- usr.sbin/bhyve/pci_virtio_net.c | 31 ++++++++++++++++++++++++------- 2 files changed, 34 insertions(+), 9 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/pci_virtio_block.c b/usr.sbin/bhyve/pci_virtio_block.c index b86e21d..98c0695 100644 --- a/usr.sbin/bhyve/pci_virtio_block.c +++ b/usr.sbin/bhyve/pci_virtio_block.c @@ -439,6 +439,7 @@ uint32_t pci_vtblk_read(struct pci_devinst *pi, int baridx, int offset, int size) { struct pci_vtblk_softc *sc = pi->pi_arg; + void *ptr; uint32_t value; if (offset + size > VTBLK_REGSZ) { @@ -481,8 +482,15 @@ pci_vtblk_read(struct pci_devinst *pi, int baridx, int offset, int size) sc->vbsc_isr = 0; /* a read clears this flag */ break; case VTBLK_R_CFG ... VTBLK_R_CFG_END: - assert(size == 1); - value = *((uint8_t *)&sc->vbsc_cfg + offset - VTBLK_R_CFG); + assert(size + offset <= (VTBLK_R_CFG_END + 1)); + ptr = (uint8_t *)&sc->vbsc_cfg + offset - VTBLK_R_CFG; + if (size == 1) { + value = *(uint8_t *) ptr; + } else if (size == 2) { + value = *(uint16_t *) ptr; + } else { + value = *(uint32_t *) ptr; + } break; default: DPRINTF(("vtblk: unknown i/o read offset %d\n\r", offset)); diff --git a/usr.sbin/bhyve/pci_virtio_net.c b/usr.sbin/bhyve/pci_virtio_net.c index 5db1eb7..6160b14 100644 --- a/usr.sbin/bhyve/pci_virtio_net.c +++ b/usr.sbin/bhyve/pci_virtio_net.c @@ -594,7 +594,8 @@ pci_vtnet_write(struct pci_devinst *pi, int baridx, int offset, int size, uint32_t value) { struct pci_vtnet_softc *sc = pi->pi_arg; - + void *ptr; + if (offset + size > VTNET_REGSZ) { DPRINTF(("vtnet_write: 2big, offset %d size %d\n", offset, size)); @@ -632,11 +633,19 @@ pci_vtnet_write(struct pci_devinst *pi, int baridx, int offset, int size, case VTNET_R_CFG3: case VTNET_R_CFG4: case VTNET_R_CFG5: + assert((size + offset) <= (VTNET_R_CFG5 + 1)); + ptr = &sc->vsc_macaddr[offset - VTNET_R_CFG0]; /* * The driver is allowed to change the MAC address */ - assert(size == 1); sc->vsc_macaddr[offset - VTNET_R_CFG0] = value; + if (size == 1) { + *(uint8_t *) ptr = value; + } else if (size == 2) { + *(uint16_t *) ptr = value; + } else { + *(uint32_t *) ptr = value; + } break; case VTCFG_R_HOSTCAP: case VTCFG_R_QNUM: @@ -658,6 +667,7 @@ uint32_t pci_vtnet_read(struct pci_devinst *pi, int baridx, int offset, int size) { struct pci_vtnet_softc *sc = pi->pi_arg; + void *ptr; uint32_t value; if (offset + size > VTNET_REGSZ) { @@ -708,16 +718,23 @@ pci_vtnet_read(struct pci_devinst *pi, int baridx, int offset, int size) case VTNET_R_CFG3: case VTNET_R_CFG4: case VTNET_R_CFG5: - assert(size == 1); - value = sc->vsc_macaddr[offset - VTNET_R_CFG0]; + assert((size + offset) <= (VTNET_R_CFG5 + 1)); + ptr = &sc->vsc_macaddr[offset - VTNET_R_CFG0]; + if (size == 1) { + value = *(uint8_t *) ptr; + } else if (size == 2) { + value = *(uint16_t *) ptr; + } else { + value = *(uint32_t *) ptr; + } break; case VTNET_R_CFG6: - assert(size == 1); - value = 0x01; /* XXX link always up */ + assert(size != 4); + value = 0x01; /* XXX link always up */ break; case VTNET_R_CFG7: assert(size == 1); - value = 0; /* link status is in the LSB */ + value = 0; /* XXX link status in LSB */ break; default: DPRINTF(("vtnet: unknown i/o read offset %d\n\r", offset)); -- cgit v1.1 From cba736c8eee565e8ba803c9ef7c7f06aefb162ed Mon Sep 17 00:00:00 2001 From: neel Date: Wed, 6 Jul 2011 22:38:09 +0000 Subject: 'bhyveload' is a userspace FreeBSD loader that can load the kernel + metadata inside a BHyVe-based virtual machine. It is a thin wrapper on top of userboot.so which is a variant of the FreeBSD loader packaged as a shared library. 'bhyveload' provides callbacks that are utilized by userboot.so to do things like console i/o, disk i/o, set virtual machine registers etc. Thanks for Doug Rabson (dfr@) for making this happen. --- usr.sbin/Makefile.amd64 | 1 + usr.sbin/bhyveload/Makefile | 14 + usr.sbin/bhyveload/bhyveload.c | 604 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 619 insertions(+) create mode 100644 usr.sbin/bhyveload/Makefile create mode 100644 usr.sbin/bhyveload/bhyveload.c (limited to 'usr.sbin') diff --git a/usr.sbin/Makefile.amd64 b/usr.sbin/Makefile.amd64 index 811202b..e8f3bc0 100644 --- a/usr.sbin/Makefile.amd64 +++ b/usr.sbin/Makefile.amd64 @@ -11,6 +11,7 @@ SUBDIR+= apm .endif SUBDIR+= asf SUBDIR+= bhyve +SUBDIR+= bhyveload SUBDIR+= boot0cfg .if ${MK_TOOLCHAIN} != "no" SUBDIR+= btxld diff --git a/usr.sbin/bhyveload/Makefile b/usr.sbin/bhyveload/Makefile new file mode 100644 index 0000000..7d0dd92 --- /dev/null +++ b/usr.sbin/bhyveload/Makefile @@ -0,0 +1,14 @@ +# $FreeBSD$ + +PROG= bhyveload +SRCS= bhyveload.c +NO_MAN= + +DPADD+= ${LIBVMMAPI} +LDADD+= -lvmmapi + +WARNS?= 3 + +CFLAGS+=-I${.CURDIR}/../../sys/boot/userboot + +.include diff --git a/usr.sbin/bhyveload/bhyveload.c b/usr.sbin/bhyveload/bhyveload.c new file mode 100644 index 0000000..3af024b --- /dev/null +++ b/usr.sbin/bhyveload/bhyveload.c @@ -0,0 +1,604 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/*- + * Copyright (c) 2011 Google, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "userboot.h" + +#define MB (1024 * 1024UL) +#define GB (1024 * 1024 * 1024UL) +#define BSP 0 + +static char *host_base = "/"; +static struct termios term, oldterm; +static int disk_fd = -1; + +static char *vmname, *progname, *membase; +static uint64_t lowmem, highmem; +static struct vmctx *ctx; + +static uint64_t gdtbase, cr3, rsp; + +static void cb_exit(void *arg, int v); + +/* + * Console i/o callbacks + */ + +static void +cb_putc(void *arg, int ch) +{ + char c = ch; + + write(1, &c, 1); +} + +static int +cb_getc(void *arg) +{ + char c; + + if (read(0, &c, 1) == 1) + return (c); + return (-1); +} + +static int +cb_poll(void *arg) +{ + int n; + + if (ioctl(0, FIONREAD, &n) >= 0) + return (n > 0); + return (0); +} + +/* + * Host filesystem i/o callbacks + */ + +struct cb_file { + int cf_isdir; + size_t cf_size; + struct stat cf_stat; + union { + int fd; + DIR *dir; + } cf_u; +}; + +static int +cb_open(void *arg, const char *filename, void **hp) +{ + struct stat st; + struct cb_file *cf; + char path[PATH_MAX]; + + if (!host_base) + return (ENOENT); + + strlcpy(path, host_base, PATH_MAX); + if (path[strlen(path) - 1] == '/') + path[strlen(path) - 1] = 0; + strlcat(path, filename, PATH_MAX); + cf = malloc(sizeof(struct cb_file)); + if (stat(path, &cf->cf_stat) < 0) { + free(cf); + return (errno); + } + + cf->cf_size = st.st_size; + if (S_ISDIR(cf->cf_stat.st_mode)) { + cf->cf_isdir = 1; + cf->cf_u.dir = opendir(path); + if (!cf->cf_u.dir) + goto out; + *hp = cf; + return (0); + } + if (S_ISREG(cf->cf_stat.st_mode)) { + cf->cf_isdir = 0; + cf->cf_u.fd = open(path, O_RDONLY); + if (cf->cf_u.fd < 0) + goto out; + *hp = cf; + return (0); + } + +out: + free(cf); + return (EINVAL); +} + +static int +cb_close(void *arg, void *h) +{ + struct cb_file *cf = h; + + if (cf->cf_isdir) + closedir(cf->cf_u.dir); + else + close(cf->cf_u.fd); + free(cf); + + return (0); +} + +static int +cb_isdir(void *arg, void *h) +{ + struct cb_file *cf = h; + + return (cf->cf_isdir); +} + +static int +cb_read(void *arg, void *h, void *buf, size_t size, size_t *resid) +{ + struct cb_file *cf = h; + ssize_t sz; + + if (cf->cf_isdir) + return (EINVAL); + sz = read(cf->cf_u.fd, buf, size); + if (sz < 0) + return (EINVAL); + *resid = size - sz; + return (0); +} + +static int +cb_readdir(void *arg, void *h, uint32_t *fileno_return, uint8_t *type_return, + size_t *namelen_return, char *name) +{ + struct cb_file *cf = h; + struct dirent *dp; + + if (!cf->cf_isdir) + return (EINVAL); + + dp = readdir(cf->cf_u.dir); + if (!dp) + return (ENOENT); + + /* + * Note: d_namlen is in the range 0..255 and therefore less + * than PATH_MAX so we don't need to test before copying. + */ + *fileno_return = dp->d_fileno; + *type_return = dp->d_type; + *namelen_return = dp->d_namlen; + memcpy(name, dp->d_name, dp->d_namlen); + name[dp->d_namlen] = 0; + + return (0); +} + +static int +cb_seek(void *arg, void *h, uint64_t offset, int whence) +{ + struct cb_file *cf = h; + + if (cf->cf_isdir) + return (EINVAL); + if (lseek(cf->cf_u.fd, offset, whence) < 0) + return (errno); + return (0); +} + +static int +cb_stat(void *arg, void *h, int *mode, int *uid, int *gid, uint64_t *size) +{ + struct cb_file *cf = h; + + *mode = cf->cf_stat.st_mode; + *uid = cf->cf_stat.st_uid; + *gid = cf->cf_stat.st_gid; + *size = cf->cf_stat.st_size; + return (0); +} + +/* + * Disk image i/o callbacks + */ + +static int +cb_diskread(void *arg, int unit, uint64_t from, void *to, size_t size, + size_t *resid) +{ + ssize_t n; + + if (unit != 0 || disk_fd == -1) + return (EIO); + n = pread(disk_fd, to, size, from); + if (n < 0) + return (errno); + *resid = size - n; + return (0); +} + +/* + * Guest virtual machine i/o callbacks + */ +static int +cb_copyin(void *arg, const void *from, uint64_t to, size_t size) +{ + + to &= 0x7fffffff; + if (to > lowmem) + return (EFAULT); + if (to + size > lowmem) + size = lowmem - to; + + memcpy(&membase[to], from, size); + + return (0); +} + +static int +cb_copyout(void *arg, uint64_t from, void *to, size_t size) +{ + + from &= 0x7fffffff; + if (from > lowmem) + return (EFAULT); + if (from + size > lowmem) + size = lowmem - from; + + memcpy(to, &membase[from], size); + + return (0); +} + +static void +cb_setreg(void *arg, int r, uint64_t v) +{ + int error; + enum vm_reg_name vmreg; + + vmreg = VM_REG_LAST; + + switch (r) { + case 4: + vmreg = VM_REG_GUEST_RSP; + rsp = v; + break; + default: + break; + } + + if (vmreg == VM_REG_LAST) { + printf("test_setreg(%d): not implemented\n", r); + cb_exit(NULL, USERBOOT_EXIT_QUIT); + } + + error = vm_set_register(ctx, BSP, vmreg, v); + if (error) { + perror("vm_set_register"); + cb_exit(NULL, USERBOOT_EXIT_QUIT); + } +} + +static void +cb_setmsr(void *arg, int r, uint64_t v) +{ + int error; + enum vm_reg_name vmreg; + + vmreg = VM_REG_LAST; + + switch (r) { + case MSR_EFER: + vmreg = VM_REG_GUEST_EFER; + break; + default: + break; + } + + if (vmreg == VM_REG_LAST) { + printf("test_setmsr(%d): not implemented\n", r); + cb_exit(NULL, USERBOOT_EXIT_QUIT); + } + + error = vm_set_register(ctx, BSP, vmreg, v); + if (error) { + perror("vm_set_msr"); + cb_exit(NULL, USERBOOT_EXIT_QUIT); + } +} + +static void +cb_setcr(void *arg, int r, uint64_t v) +{ + int error; + enum vm_reg_name vmreg; + + vmreg = VM_REG_LAST; + + switch (r) { + case 0: + vmreg = VM_REG_GUEST_CR0; + break; + case 3: + vmreg = VM_REG_GUEST_CR3; + cr3 = v; + break; + case 4: + vmreg = VM_REG_GUEST_CR4; + break; + default: + break; + } + + if (vmreg == VM_REG_LAST) { + printf("test_setcr(%d): not implemented\n", r); + cb_exit(NULL, USERBOOT_EXIT_QUIT); + } + + error = vm_set_register(ctx, BSP, vmreg, v); + if (error) { + perror("vm_set_cr"); + cb_exit(NULL, USERBOOT_EXIT_QUIT); + } +} + +static void +cb_setgdt(void *arg, uint64_t base, size_t size) +{ + int error; + + error = vm_set_desc(ctx, BSP, VM_REG_GUEST_GDTR, base, size - 1, 0); + if (error != 0) { + perror("vm_set_desc(gdt)"); + cb_exit(NULL, USERBOOT_EXIT_QUIT); + } + + gdtbase = base; +} + +static void +cb_exec(void *arg, uint64_t rip) +{ + int error; + + error = vm_setup_freebsd_registers(ctx, BSP, rip, cr3, gdtbase, rsp); + if (error) { + perror("vm_setup_freebsd_registers"); + cb_exit(NULL, USERBOOT_EXIT_QUIT); + } + + cb_exit(NULL, 0); +} + +/* + * Misc + */ + +static void +cb_delay(void *arg, int usec) +{ + + usleep(usec); +} + +static void +cb_exit(void *arg, int v) +{ + + tcsetattr(0, TCSAFLUSH, &oldterm); + exit(v); +} + +static void +cb_getmem(void *arg, uint64_t *ret_lowmem, uint64_t *ret_highmem) +{ + + *ret_lowmem = lowmem; + *ret_highmem = highmem; +} + +static struct loader_callbacks_v1 cb = { + .getc = cb_getc, + .putc = cb_putc, + .poll = cb_poll, + + .open = cb_open, + .close = cb_close, + .isdir = cb_isdir, + .read = cb_read, + .readdir = cb_readdir, + .seek = cb_seek, + .stat = cb_stat, + + .diskread = cb_diskread, + + .copyin = cb_copyin, + .copyout = cb_copyout, + .setreg = cb_setreg, + .setmsr = cb_setmsr, + .setcr = cb_setcr, + .setgdt = cb_setgdt, + .exec = cb_exec, + + .delay = cb_delay, + .exit = cb_exit, + .getmem = cb_getmem, +}; + +static void +usage(void) +{ + + printf("usage: %s [-d ] [-h ] " + "[-m ][-M ] " + "\n", progname); + exit(1); +} + +int +main(int argc, char** argv) +{ + void *h; + void (*func)(struct loader_callbacks_v1 *, void *, int, int); + int opt, error; + char *disk_image; + + progname = argv[0]; + + lowmem = 768 * MB; + highmem = 0; + disk_image = NULL; + + while ((opt = getopt(argc, argv, "d:h:m:M:")) != -1) { + switch (opt) { + case 'd': + disk_image = optarg; + break; + + case 'h': + host_base = optarg; + break; + + case 'm': + lowmem = strtoul(optarg, NULL, 0) * MB; + break; + + case 'M': + highmem = strtoul(optarg, NULL, 0) * MB; + break; + + case '?': + usage(); + } + } + + argc -= optind; + argv += optind; + + if (argc != 1) + usage(); + + vmname = argv[0]; + + error = vm_create(vmname); + if (error != 0 && errno != EEXIST) { + perror("vm_create"); + exit(1); + + } + + ctx = vm_open(vmname); + if (ctx == NULL) { + perror("vm_open"); + exit(1); + } + + error = vm_setup_memory(ctx, 0, lowmem, &membase); + if (error) { + perror("vm_setup_memory(lowmem)"); + exit(1); + } + + if (highmem != 0) { + error = vm_setup_memory(ctx, 4 * GB, highmem, NULL); + if (error) { + perror("vm_setup_memory(highmem)"); + exit(1); + } + } + + tcgetattr(0, &term); + oldterm = term; + term.c_lflag &= ~(ICANON|ECHO); + term.c_iflag &= ~ICRNL; + tcsetattr(0, TCSAFLUSH, &term); + h = dlopen("./userboot.so", RTLD_LOCAL); + if (!h) { + printf("%s\n", dlerror()); + return (1); + } + func = dlsym(h, "loader_main"); + if (!func) { + printf("%s\n", dlerror()); + return (1); + } + + if (disk_image) { + disk_fd = open(disk_image, O_RDONLY); + } + func(&cb, NULL, USERBOOT_VERSION_1, disk_fd >= 0); +} -- cgit v1.1 From 9de15373c371786da8c7814387bad682698eb9bd Mon Sep 17 00:00:00 2001 From: grehan Date: Tue, 18 Oct 2011 18:52:22 +0000 Subject: Ignore legacy INIT de-asserts in x2apic mode before verifying the contents of the IPI. Uncovered by jhb's x2apic patch. Obtained from: NetApp --- usr.sbin/bhyve/xmsr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/xmsr.c b/usr.sbin/bhyve/xmsr.c index 676b5df..cc148d6 100644 --- a/usr.sbin/bhyve/xmsr.c +++ b/usr.sbin/bhyve/xmsr.c @@ -110,15 +110,15 @@ emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t code, uint64_t val) switch (mode) { case APIC_DELMODE_INIT: - assert(dest != 0); - assert(dest < guest_ncpus); - /* * Ignore legacy de-assert INITs in x2apic mode */ if ((val & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) { break; } + + assert(dest != 0); + assert(dest < guest_ncpus); assert(cpu_b[dest] == CPU_S_INIT); /* -- cgit v1.1 From 9f0c999f8126597eb572b80056df88335dbd0070 Mon Sep 17 00:00:00 2001 From: grehan Date: Sat, 28 Apr 2012 16:28:00 +0000 Subject: MSI-x interrupt support for PCI pass-thru devices. Includes instruction emulation for memory r/w access. This opens the door for io-apic, local apic, hpet timer, and legacy device emulation. Submitted by: ryan dot berryhill at sandvine dot com Reviewed by: grehan Obtained from: Sandvine --- usr.sbin/bhyve/Makefile | 3 +- usr.sbin/bhyve/fbsdrun.c | 29 +- usr.sbin/bhyve/instruction_emul.c | 555 ++++++++++++++++++++++++++++++++++++++ usr.sbin/bhyve/instruction_emul.h | 47 ++++ usr.sbin/bhyve/pci_emul.c | 29 ++ usr.sbin/bhyve/pci_emul.h | 40 +++ usr.sbin/bhyve/pci_passthru.c | 270 ++++++++++++++++++- 7 files changed, 953 insertions(+), 20 deletions(-) create mode 100644 usr.sbin/bhyve/instruction_emul.c create mode 100644 usr.sbin/bhyve/instruction_emul.h (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile index b0398ed..f64e579 100644 --- a/usr.sbin/bhyve/Makefile +++ b/usr.sbin/bhyve/Makefile @@ -4,7 +4,8 @@ PROG= bhyve -SRCS= atpic.c consport.c dbgport.c elcr.c fbsdrun.c inout.c mevent.c +SRCS= atpic.c consport.c dbgport.c elcr.c fbsdrun.c inout.c +SRCS+= instruction_emul.c mevent.c SRCS+= pci_emul.c pci_hostbridge.c pci_passthru.c pci_virtio_block.c SRCS+= pci_virtio_net.c pit_8254.c post.c rtc.c uart.c xmsr.c diff --git a/usr.sbin/bhyve/fbsdrun.c b/usr.sbin/bhyve/fbsdrun.c index 6f009b5..c2295ea 100644 --- a/usr.sbin/bhyve/fbsdrun.c +++ b/usr.sbin/bhyve/fbsdrun.c @@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$"); #include "mevent.h" #include "pci_emul.h" #include "xmsr.h" +#include "instruction_emul.h" #define DEFAULT_GUEST_HZ 100 #define DEFAULT_GUEST_TSLICE 200 @@ -108,6 +109,7 @@ struct fbsdstats { uint64_t vmexit_hlt; uint64_t vmexit_pause; uint64_t vmexit_mtrap; + uint64_t vmexit_paging; uint64_t cpu_switch_rotate; uint64_t cpu_switch_direct; int io_reset; @@ -412,6 +414,20 @@ vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) return (VMEXIT_RESTART); } +static int +vmexit_paging(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) +{ + + stats.vmexit_paging++; + + if (emulate_instruction(ctx, *pvcpu, vmexit->rip, vmexit->u.paging.cr3) != 0) { + printf("Failed to emulate instruction at 0x%lx\n", vmexit->rip); + return (VMEXIT_ABORT); + } + + return (VMEXIT_CONTINUE); +} + static void sigalrm(int sig) { @@ -446,12 +462,13 @@ setup_timeslice(void) } static vmexit_handler_t handler[VM_EXITCODE_MAX] = { - [VM_EXITCODE_INOUT] = vmexit_inout, - [VM_EXITCODE_VMX] = vmexit_vmx, - [VM_EXITCODE_BOGUS] = vmexit_bogus, - [VM_EXITCODE_RDMSR] = vmexit_rdmsr, - [VM_EXITCODE_WRMSR] = vmexit_wrmsr, - [VM_EXITCODE_MTRAP] = vmexit_mtrap, + [VM_EXITCODE_INOUT] = vmexit_inout, + [VM_EXITCODE_VMX] = vmexit_vmx, + [VM_EXITCODE_BOGUS] = vmexit_bogus, + [VM_EXITCODE_RDMSR] = vmexit_rdmsr, + [VM_EXITCODE_WRMSR] = vmexit_wrmsr, + [VM_EXITCODE_MTRAP] = vmexit_mtrap, + [VM_EXITCODE_PAGING] = vmexit_paging }; static void diff --git a/usr.sbin/bhyve/instruction_emul.c b/usr.sbin/bhyve/instruction_emul.c new file mode 100644 index 0000000..8c99194 --- /dev/null +++ b/usr.sbin/bhyve/instruction_emul.c @@ -0,0 +1,555 @@ +/*- + * Copyright (c) 2012 Sandvine, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include +#include +#include + +#include "fbsdrun.h" +#include "instruction_emul.h" + +#define PREFIX_LOCK 0xF0 +#define PREFIX_REPNE 0xF2 +#define PREFIX_REPE 0xF3 +#define PREFIX_CS_OVERRIDE 0x2E +#define PREFIX_SS_OVERRIDE 0x36 +#define PREFIX_DS_OVERRIDE 0x3E +#define PREFIX_ES_OVERRIDE 0x26 +#define PREFIX_FS_OVERRIDE 0x64 +#define PREFIX_GS_OVERRIDE 0x65 +#define PREFIX_BRANCH_NOT_TAKEN 0x2E +#define PREFIX_BRANCH_TAKEN 0x3E +#define PREFIX_OPSIZE 0x66 +#define PREFIX_ADDRSIZE 0x67 + +#define OPCODE_2BYTE_ESCAPE 0x0F +#define OPCODE_3BYTE_ESCAPE 0x38 + +#define MODRM_MOD_MASK 0xC0 +#define MODRM_MOD_SHIFT 6 +#define MODRM_RM_MASK 0x07 +#define MODRM_RM_SHIFT 0 +#define MODRM_REG_MASK 0x38 +#define MODRM_REG_SHIFT 3 + +#define MOD_INDIRECT 0x0 +#define MOD_INDIRECT_DISP8 0x1 +#define MOD_INDIRECT_DISP32 0x2 +#define MOD_DIRECT 0x3 + +#define RM_EAX 0x0 +#define RM_ECX 0x1 +#define RM_EDX 0x2 +#define RM_EBX 0x3 +#define RM_SIB 0x4 +#define RM_DISP32 0x5 +#define RM_EBP RM_DISP32 +#define RM_ESI 0x6 +#define RM_EDI 0x7 + +#define REG_EAX 0x0 +#define REG_ECX 0x1 +#define REG_EDX 0x2 +#define REG_EBX 0x3 +#define REG_ESP 0x4 +#define REG_EBP 0x5 +#define REG_ESI 0x6 +#define REG_EDI 0x7 +#define REG_R8 0x8 +#define REG_R9 0x9 +#define REG_R10 0xA +#define REG_R11 0xB +#define REG_R12 0xC +#define REG_R13 0xD +#define REG_R14 0xE +#define REG_R15 0xF + +#define HAS_MODRM 1 +#define FROM_RM (1<<1) +#define FROM_REG (1<<2) +#define TO_RM (1<<3) +#define TO_REG (1<<4) + +#define REX_MASK 0xF0 +#define REX_PREFIX 0x40 +#define is_rex_prefix(x) ( ((x) & REX_MASK) == REX_PREFIX ) +#define REX_W_MASK 0x8 +#define REX_R_MASK 0x4 +#define REX_X_MASK 0x2 +#define REX_B_MASK 0x1 + +#define is_prefix(x) ((x) == PREFIX_LOCK || (x) == PREFIX_REPNE || \ + (x) == PREFIX_REPE || (x) == PREFIX_CS_OVERRIDE || \ + (x) == PREFIX_SS_OVERRIDE || (x) == PREFIX_DS_OVERRIDE || \ + (x) == PREFIX_ES_OVERRIDE || (x) == PREFIX_FS_OVERRIDE || \ + (x) == PREFIX_GS_OVERRIDE || (x) == PREFIX_BRANCH_NOT_TAKEN || \ + (x) == PREFIX_BRANCH_TAKEN || (x) == PREFIX_OPSIZE || \ + (x) == PREFIX_ADDRSIZE || is_rex_prefix((x))) + +#define PAGE_FRAME_MASK 0x80 +#define PAGE_OFFSET_MASK 0xFFF +#define PAGE_TABLE_ENTRY_MASK (~PAGE_OFFSET_MASK) +#define PML4E_OFFSET_MASK 0x0000FF8000000000 +#define PML4E_SHIFT 39 + +#define MAX_EMULATED_REGIONS 8 +int registered_regions = 0; +struct memory_region +{ + uintptr_t start; + uintptr_t end; + emulated_read_func_t memread; + emulated_write_func_t memwrite; + void *arg; +} emulated_regions[MAX_EMULATED_REGIONS]; + +struct decoded_instruction +{ + void *instruction; + uint8_t *opcode; + uint8_t *modrm; + uint8_t *sib; + uint8_t *displacement; + uint8_t *immediate; + + uint8_t opcode_flags; + + uint8_t addressing_mode; + uint8_t rm; + uint8_t reg; + uint8_t rex_r; + uint8_t rex_w; + uint8_t rex_b; + uint8_t rex_x; + + int32_t disp; +}; + +static enum vm_reg_name vm_reg_name_mappings[] = { + [REG_EAX] = VM_REG_GUEST_RAX, + [REG_EBX] = VM_REG_GUEST_RBX, + [REG_ECX] = VM_REG_GUEST_RCX, + [REG_EDX] = VM_REG_GUEST_RDX, + [REG_ESP] = VM_REG_GUEST_RSP, + [REG_EBP] = VM_REG_GUEST_RBP, + [REG_ESI] = VM_REG_GUEST_RSI, + [REG_EDI] = VM_REG_GUEST_RDI, + [REG_R8] = VM_REG_GUEST_R8, + [REG_R9] = VM_REG_GUEST_R9, + [REG_R10] = VM_REG_GUEST_R10, + [REG_R11] = VM_REG_GUEST_R11, + [REG_R12] = VM_REG_GUEST_R12, + [REG_R13] = VM_REG_GUEST_R13, + [REG_R14] = VM_REG_GUEST_R14, + [REG_R15] = VM_REG_GUEST_R15 +}; + +uint8_t one_byte_opcodes[256] = { + [0x89] = HAS_MODRM | FROM_REG | TO_RM, + [0x8B] = HAS_MODRM | FROM_RM | TO_REG, +}; + +static uintptr_t +gla2gpa(uint64_t gla, uint64_t guest_cr3) +{ + uint64_t *table; + uint64_t mask, entry; + int level, shift; + uintptr_t page_frame; + + table = paddr_guest2host(guest_cr3 & PAGE_TABLE_ENTRY_MASK); + mask = PML4E_OFFSET_MASK; + shift = PML4E_SHIFT; + for (level = 0; level < 4; ++level) + { + entry = table[(gla & mask) >> shift]; + table = (uint64_t*)(entry & PAGE_TABLE_ENTRY_MASK); + + /* This entry does not point to another page table */ + if (entry & PAGE_FRAME_MASK || level >= 3) + break; + + table = paddr_guest2host((uintptr_t)table); + mask >>= 9; + shift -= 9; + } + + mask = (1 << shift) - 1; + page_frame = ((uintptr_t)table & ~mask); + return (page_frame | (gla & mask)); +} + +static void * +gla2hla(uint64_t gla, uint64_t guest_cr3) +{ + uintptr_t gpa; + + gpa = gla2gpa(gla, guest_cr3); + return paddr_guest2host(gpa); +} + +/* + * Decodes all of the prefixes of the instruction. Only a subset of REX + * prefixes are currently supported. If any unsupported prefix is + * encountered, returns -1. + */ +static int +decode_prefixes(struct decoded_instruction *decoded) +{ + uint8_t *current_prefix; + + current_prefix = decoded->instruction; + + if (is_rex_prefix(*current_prefix)) { + decoded->rex_w = *current_prefix & REX_W_MASK; + decoded->rex_r = *current_prefix & REX_R_MASK; + decoded->rex_x = *current_prefix & REX_X_MASK; + decoded->rex_b = *current_prefix & REX_B_MASK; + current_prefix++; + } else if (is_prefix(*current_prefix)) { + return (-1); + } + + decoded->opcode = current_prefix; + return (0); +} + +/* + * Decodes the instruction's opcode. If the opcode is not understood, returns + * -1 indicating an error. Sets the instruction's mod_rm pointer to the + * location of the ModR/M field. + */ +static int +decode_opcode(struct decoded_instruction *decoded) +{ + uint8_t opcode, flags; + + opcode = *decoded->opcode; + flags = one_byte_opcodes[opcode]; + + if (!flags) + return (-1); + + if (flags & HAS_MODRM) { + decoded->modrm = decoded->opcode + 1; + } + + decoded->opcode_flags = flags; + + return (0); +} + +/* + * Decodes the instruction's ModR/M field. Sets the instruction's sib pointer + * to the location of the SIB if one is expected to be present, or 0 if not. + */ +static int +decode_mod_rm(struct decoded_instruction *decoded) +{ + uint8_t modrm; + uint8_t *extension_operands; + + if (decoded->modrm) { + modrm = *decoded->modrm; + + decoded->addressing_mode = (modrm & MODRM_MOD_MASK) >> MODRM_MOD_SHIFT; + decoded->rm = (modrm & MODRM_RM_MASK) >> MODRM_RM_SHIFT; + decoded->reg = (modrm & MODRM_REG_MASK) >> MODRM_REG_SHIFT; + + if (decoded->rex_b) + decoded->rm |= (1<<3); + + if (decoded->rex_r) + decoded->reg |= (1<<3); + + extension_operands = decoded->modrm + 1; + + if (decoded->rm == RM_SIB) { + decoded->sib = decoded->modrm + 1; + extension_operands = decoded->sib + 1; + } + + switch (decoded->addressing_mode) { + case MOD_INDIRECT: + case MOD_DIRECT: + decoded->displacement = 0; + break; + case MOD_INDIRECT_DISP8: + decoded->displacement = extension_operands; + break; + case MOD_INDIRECT_DISP32: + decoded->displacement = extension_operands; + break; + } + } + + return (0); +} + +/* + * Decodes the instruction's SIB field. No such instructions are currently + * supported, so do nothing and return -1 if there is a SIB field, 0 otherwise. + */ +static int +decode_sib(struct decoded_instruction *decoded) +{ + + if (decoded->sib) + return (-1); + + return (0); +} + +/* + * Grabs and saves the instruction's immediate operand and displacement if + * they are present. Immediates are not currently supported, so if an + * immediate is present it will return -1 indicating an error. + */ +static int +decode_extension_operands(struct decoded_instruction *decoded) +{ + + if (decoded->displacement) { + if (decoded->addressing_mode == MOD_INDIRECT_DISP8) { + decoded->disp = (int32_t)*decoded->displacement; + } else if (decoded->addressing_mode == MOD_INDIRECT_DISP32) { + decoded->disp = *((int32_t*)decoded->displacement); + } + } + + if (decoded->immediate) { + return (-1); + } + + return (0); +} + +static int +decode_instruction(void *instr, struct decoded_instruction *decoded) +{ + int error; + + bzero(decoded, sizeof(*decoded)); + decoded->instruction = instr; + + error = decode_prefixes(decoded); + if (error) + return (error); + + error = decode_opcode(decoded); + if (error) + return (error); + + error = decode_mod_rm(decoded); + if (error) + return (error); + + error = decode_sib(decoded); + if (error) + return (error); + + error = decode_extension_operands(decoded); + if (error) + return (error); + + return (0); +} + +static struct memory_region * +find_region(uintptr_t addr) +{ + int i; + + for (i = 0; i < registered_regions; ++i) { + if (emulated_regions[i].start <= addr && + emulated_regions[i].end >= addr) { + return &emulated_regions[i]; + } + } + + return (0); +} + +static enum vm_reg_name +get_vm_reg_name(uint8_t reg) +{ + return vm_reg_name_mappings[reg]; +} + +static int +get_operand(struct vmctx *vm, int vcpu, uint64_t guest_cr3, + const struct decoded_instruction *instruction, uint64_t *operand) +{ + enum vm_reg_name regname; + uint64_t reg; + uintptr_t target; + int error; + uint8_t rm, addressing_mode; + struct memory_region *emulated_memory; + + if (instruction->opcode_flags & FROM_RM) { + rm = instruction->rm; + addressing_mode = instruction->addressing_mode; + } else if (instruction->opcode_flags & FROM_REG) { + rm = instruction->reg; + addressing_mode = MOD_DIRECT; + } else + return (-1); + + regname = get_vm_reg_name(rm); + error = vm_get_register(vm, vcpu, regname, ®); + if (error) + return (error); + + switch (addressing_mode) { + case MOD_DIRECT: + *operand = reg; + return (0); + case MOD_INDIRECT: + target = gla2gpa(reg, guest_cr3); + emulated_memory = find_region(target); + if (emulated_memory) { + return emulated_memory->memread(vm, vcpu, target, + 4, operand, + emulated_memory->arg); + } + return (-1); + case MOD_INDIRECT_DISP8: + case MOD_INDIRECT_DISP32: + target = gla2gpa(reg, guest_cr3); + target += instruction->disp; + emulated_memory = find_region(target); + if (emulated_memory) { + return emulated_memory->memread(vm, vcpu, target, + 4, operand, + emulated_memory->arg); + } + return (-1); + default: + return (-1); + } +} + +static int +perform_write(struct vmctx *vm, int vcpu, uint64_t guest_cr3, + const struct decoded_instruction *instruction, uint64_t operand) +{ + enum vm_reg_name regname; + uintptr_t target; + int error; + uint64_t reg; + struct memory_region *emulated_memory; + uint8_t addressing_mode; + + if (instruction->opcode_flags & TO_RM) { + reg = instruction->rm; + addressing_mode = instruction->addressing_mode; + } else if (instruction->opcode_flags & TO_REG) { + reg = instruction->reg; + addressing_mode = MOD_DIRECT; + } else + return (-1); + + regname = get_vm_reg_name(reg); + error = vm_get_register(vm, vcpu, regname, ®); + if (error) + return (error); + + switch(addressing_mode) { + case MOD_DIRECT: + return vm_set_register(vm, vcpu, regname, operand); + case MOD_INDIRECT: + target = gla2gpa(reg, guest_cr3); + emulated_memory = find_region(target); + if (emulated_memory) { + return emulated_memory->memwrite(vm, vcpu, target, + 4, operand, + emulated_memory->arg); + } + return (-1); + default: + return (-1); + } +} + +static int +emulate_decoded_instruction(struct vmctx *vm, int vcpu, uint64_t cr3, + const struct decoded_instruction *instruction) +{ + uint64_t operand; + int error; + + error = get_operand(vm, vcpu, cr3, instruction, &operand); + if (error) + return (error); + + return perform_write(vm, vcpu, cr3, instruction, operand); +} + +int +emulate_instruction(struct vmctx *vm, int vcpu, uint64_t rip, uint64_t cr3) +{ + struct decoded_instruction instr; + int error; + void *instruction = gla2hla(rip, cr3); + + if ((error = decode_instruction(instruction, &instr)) != 0) + return (error); + + return emulate_decoded_instruction(vm, vcpu, cr3, &instr); +} + +struct memory_region * +register_emulated_memory(uintptr_t start, size_t len, emulated_read_func_t memread, + emulated_write_func_t memwrite, void *arg) +{ + if (registered_regions > MAX_EMULATED_REGIONS) + return (NULL); + + struct memory_region *region = &emulated_regions[registered_regions]; + region->start = start; + region->end = start + len; + region->memread = memread; + region->memwrite = memwrite; + region->arg = arg; + + registered_regions++; + return (region); +} + +void +move_memory_region(struct memory_region *region, uintptr_t start) +{ + size_t len; + + len = region->end - region->start; + region->start = start; + region->end = start + len; +} + diff --git a/usr.sbin/bhyve/instruction_emul.h b/usr.sbin/bhyve/instruction_emul.h new file mode 100644 index 0000000..e7b6bff --- /dev/null +++ b/usr.sbin/bhyve/instruction_emul.h @@ -0,0 +1,47 @@ +/*- + * Copyright (c) 2012 Sandvine, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _INSTRUCTION_EMUL_H_ +#define _INSTRUCTION_EMUL_H_ + +struct memory_region; + +typedef int (*emulated_read_func_t)(struct vmctx *vm, int vcpu, uintptr_t addr, + int size, uint64_t *data, void *arg); +typedef int (*emulated_write_func_t)(struct vmctx *vm, int vcpu, uintptr_t addr, + int size, uint64_t data, void *arg); + +int emulate_instruction(struct vmctx *vm, int vcpu, uint64_t rip, + uint64_t cr3); +struct memory_region *register_emulated_memory(uintptr_t start, size_t len, + emulated_read_func_t memread, + emulated_write_func_t memwrite, + void *arg); +void move_memory_region(struct memory_region *memory_region, uintptr_t start); + +#endif diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c index 650c4de..9de87ad 100644 --- a/usr.sbin/bhyve/pci_emul.c +++ b/usr.sbin/bhyve/pci_emul.c @@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$"); #include "fbsdrun.h" #include "inout.h" #include "pci_emul.h" +#include "instruction_emul.h" #define CONF1_ADDR_PORT 0x0cf8 #define CONF1_DATA_PORT 0x0cfc @@ -572,6 +573,29 @@ pci_emul_add_msicap(struct pci_devinst *pi, int msgnum) } void +msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, + int bytes, uint32_t val) +{ + uint16_t msgctrl, rwmask; + int off, table_bar; + + off = offset - capoff; + table_bar = pi->pi_msix.table_bar; + /* Message Control Register */ + if (off == 2 && bytes == 2) { + rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK; + msgctrl = pci_get_cfgdata16(pi, offset); + msgctrl &= ~rwmask; + msgctrl |= val & rwmask; + val = msgctrl; + + pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE; + } + + CFGWRITE(pi, offset, val, bytes); +} + +void msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, int bytes, uint32_t val) { @@ -847,6 +871,11 @@ pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, assert(0); } pci_set_cfgdata32(pi, coff, bar); + + if (pi->pi_bar[idx].handler) { + pi->pi_bar[idx].handler(pi, idx, bar); + } + } else if (pci_emul_iscap(pi, coff)) { pci_emul_capwrite(pi, coff, bytes, *eax); } else { diff --git a/usr.sbin/bhyve/pci_emul.h b/usr.sbin/bhyve/pci_emul.h index f5f8e22..588e5ba 100644 --- a/usr.sbin/bhyve/pci_emul.h +++ b/usr.sbin/bhyve/pci_emul.h @@ -42,6 +42,7 @@ struct vmctx; struct pci_devinst; +struct memory_region; struct pci_devemu { char *pe_emu; /* Name of device emulation */ @@ -73,14 +74,30 @@ enum pcibar_type { PCIBAR_MEMHI64 }; +typedef int (*bar_write_func_t)(struct pci_devinst *pdi, int idx, uint64_t bar); + struct pcibar { enum pcibar_type type; /* io or memory */ uint64_t size; uint64_t addr; + bar_write_func_t handler; }; #define PI_NAMESZ 40 +struct msix_table_entry { + uint64_t addr; + uint32_t msg_data; + uint32_t vector_control; +} __packed; + +/* + * In case the structure is modified to hold extra information, use a define + * for the size that should be emulated. + */ +#define MSIX_TABLE_ENTRY_SIZE 16 +#define MAX_MSIX_TABLE_SIZE 2048 + struct pci_devinst { struct pci_devemu *pi_d; struct vmctx *pi_vmctx; @@ -96,6 +113,19 @@ struct pci_devinst { int msgnum; } pi_msi; + struct { + int enabled; + int table_bar; + int pba_bar; + size_t table_offset; + uintptr_t table_gpa; + size_t table_size; + int table_count; + size_t pba_offset; + struct memory_region *table_bar_region; + struct msix_table_entry table[MAX_MSIX_TABLE_SIZE]; + } pi_msix; + void *pi_arg; /* devemu-private data */ u_char pi_cfgdata[PCI_REGMAX + 1]; @@ -111,6 +141,14 @@ struct msicap { uint16_t msgdata; } __packed; +struct msixcap { + uint8_t capid; + uint8_t nextptr; + uint16_t msgctrl; + uint32_t table_offset; + uint32_t pba_offset; +} __packed; + void init_pci(struct vmctx *ctx); void pci_parse_slot(char *opt); void pci_parse_name(char *opt); @@ -120,6 +158,8 @@ int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, uint64_t hostbase, int pci_emul_add_msicap(struct pci_devinst *pi, int msgnum); void msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, int bytes, uint32_t val); +void msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, + int bytes, uint32_t val); void pci_generate_msi(struct pci_devinst *pi, int msgnum); int pci_msi_enabled(struct pci_devinst *pi); diff --git a/usr.sbin/bhyve/pci_passthru.c b/usr.sbin/bhyve/pci_passthru.c index 1c417fd..a6f1f63 100644 --- a/usr.sbin/bhyve/pci_passthru.c +++ b/usr.sbin/bhyve/pci_passthru.c @@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$"); #include #include #include "pci_emul.h" +#include "instruction_emul.h" #ifndef _PATH_DEVPCI #define _PATH_DEVPCI "/dev/pci" @@ -58,6 +59,11 @@ __FBSDID("$FreeBSD$"); #define LEGACY_SUPPORT 1 +#define MSIX_TABLE_BIR_MASK 7 +#define MSIX_TABLE_OFFSET_MASK (~MSIX_TABLE_BIR_MASK); +#define MSIX_TABLE_COUNT(x) (((x) & 0x7FF) + 1) +#define MSIX_CAPLEN 12 + static int pcifd = -1; static int iofd = -1; @@ -69,6 +75,9 @@ struct passthru_softc { int msgctrl; int emulated; } psc_msi; + struct { + int capoff; + } psc_msix; struct pcisel psc_sel; }; @@ -152,17 +161,19 @@ passthru_add_msicap(struct pci_devinst *pi, int msgnum, int nextptr) static int cfginitmsi(struct passthru_softc *sc) { - int ptr, cap, sts, caplen; + int ptr, capptr, cap, sts, caplen; uint32_t u32; struct pcisel sel; struct pci_devinst *pi; + struct msixcap msixcap; + uint32_t *msixcap_ptr; pi = sc->psc_pi; sel = sc->psc_sel; /* * Parse the capabilities and cache the location of the MSI - * capability. + * and MSI-X capabilities. */ sts = read_config(&sel, PCIR_STATUS, 2); if (sts & PCIM_STATUS_CAPPRESENT) { @@ -179,18 +190,44 @@ cfginitmsi(struct passthru_softc *sc) ptr + 2, 2); sc->psc_msi.emulated = 0; caplen = msi_caplen(sc->psc_msi.msgctrl); + capptr = ptr; while (caplen > 0) { - u32 = read_config(&sel, ptr, 4); - pci_set_cfgdata32(pi, ptr, u32); + u32 = read_config(&sel, capptr, 4); + pci_set_cfgdata32(pi, capptr, u32); caplen -= 4; - ptr += 4; + capptr += 4; + } + } else if (cap == PCIY_MSIX) { + /* + * Copy the MSI-X capability + */ + sc->psc_msix.capoff = ptr; + caplen = 12; + msixcap_ptr = (uint32_t*) &msixcap; + capptr = ptr; + while (caplen > 0) { + u32 = read_config(&sel, capptr, 4); + *msixcap_ptr = u32; + pci_set_cfgdata32(pi, capptr, u32); + caplen -= 4; + capptr += 4; + msixcap_ptr++; } - break; } ptr = read_config(&sel, ptr + PCICAP_NEXTPTR, 1); } } + if (sc->psc_msix.capoff == 0) + return (-1); + + pi->pi_msix.pba_bar = msixcap.pba_offset & MSIX_TABLE_BIR_MASK; + pi->pi_msix.pba_offset = msixcap.pba_offset & MSIX_TABLE_OFFSET_MASK; + pi->pi_msix.table_bar = msixcap.table_offset & MSIX_TABLE_BIR_MASK; + pi->pi_msix.table_offset = msixcap.table_offset & MSIX_TABLE_OFFSET_MASK; + + pi->pi_msix.table_count = MSIX_TABLE_COUNT(msixcap.msgctrl); + #ifdef LEGACY_SUPPORT /* * If the passthrough device does not support MSI then craft a @@ -208,12 +245,182 @@ cfginitmsi(struct passthru_softc *sc) } #endif - if (sc->psc_msi.capoff == 0) /* MSI or bust */ + /* Make sure one of the capabilities is present */ + if (sc->psc_msi.capoff == 0 && sc->psc_msix.capoff == 0) return (-1); else return (0); } +static int +msix_table_read(struct vmctx *vm, int vcpu, uintptr_t addr, + int size, uint64_t *data, void *arg) +{ + struct passthru_softc *sc; + struct pci_devinst *pi; + int index; + size_t offset, entry_offset; + uint8_t *src8; + uint16_t *src16; + uint32_t *src32; + uint64_t *src64; + struct msix_table_entry *entry; + + sc = arg; + pi = sc->psc_pi; + offset = addr - pi->pi_msix.table_gpa; + entry_offset = addr % MSIX_TABLE_ENTRY_SIZE; + index = offset / MSIX_TABLE_ENTRY_SIZE; + entry = &pi->pi_msix.table[index]; + + switch(size) { + case 1: + src8 = (uint8_t*)((void*)entry + entry_offset); + *data = *src8; + break; + case 2: + src16 = (uint16_t*)((void*)entry + entry_offset); + *data = *src16; + break; + case 4: + src32 = (uint32_t*)((void*)entry + entry_offset); + *data = *src32; + break; + case 8: + src64 = (uint64_t*)((void*)entry + entry_offset); + *data = *src64; + break; + default: + return (-1); + } + + return (0); +} + +static int +msix_table_write(struct vmctx *vm, int vcpu, uintptr_t addr, + int size, uint64_t data, void *arg) +{ + struct passthru_softc *sc; + struct pci_devinst *pi; + int error, index; + size_t offset, entry_offset; + uint32_t *dest; + struct msix_table_entry *entry; + uint32_t vector_control; + + sc = arg; + pi = sc->psc_pi; + offset = addr - pi->pi_msix.table_gpa; + entry_offset = addr % MSIX_TABLE_ENTRY_SIZE; + index = offset / MSIX_TABLE_ENTRY_SIZE; + entry = &pi->pi_msix.table[index]; + + /* Only 4 byte naturally-aligned writes are supported */ + if (size == 4 && entry_offset % 4 == 0) { + vector_control = entry->vector_control; + dest = (uint32_t*)((void*)entry + entry_offset); + *dest = data; + /* If MSI-X hasn't been enabled, do nothing */ + if (pi->pi_msix.enabled) { + /* If the entry is masked, don't set it up */ + if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0 || + (vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { + error = vm_setup_msix(vm, vcpu, sc->psc_sel.pc_bus, + sc->psc_sel.pc_dev, + sc->psc_sel.pc_func, + index, entry->msg_data, + entry->vector_control, + entry->addr); + if (error) + return (-1); + } + } + } else { + printf("Unsupported unaligned or non-4-byte write to MSI-X table\n"); + return (-1); + } + return (0); +} + +static int +msix_bar_handler(struct pci_devinst *pdi, int idx, uint64_t bar) +{ + uintptr_t start; + + start = (bar & PCIM_BAR_MEM_BASE) + pdi->pi_msix.table_offset; + move_memory_region(pdi->pi_msix.table_bar_region, start); + pdi->pi_msix.table_gpa = start; + return (0); +} + +static int +init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base) +{ + int idx; + size_t table_size; + vm_paddr_t start; + size_t len; + struct pci_devinst *pi = sc->psc_pi; + + /* + * If the MSI-X table BAR maps memory intended for + * other uses, it is at least assured that the table + * either resides in its own page within the region, + * or it resides in a page shared with only the PBA. + */ + if (pi->pi_msix.pba_bar == pi->pi_msix.table_bar && + ((pi->pi_msix.pba_offset - pi->pi_msix.table_offset) < 4096)) { + /* Need to also emulate the PBA, not supported yet */ + printf("Unsupported MSI-X table and PBA in same page\n"); + return (-1); + } + /* + * May need to split the BAR into 3 regions: + * Before the MSI-X table, the MSI-X table, and after it + * XXX for now, assume that the table is not in the middle + */ + table_size = pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE; + pi->pi_msix.table_size = table_size; + idx = pi->pi_msix.table_bar; + + /* Round up to page size */ + table_size = (table_size + 0x1000) & ~0xFFF; + if (pi->pi_msix.table_offset == 0) { + /* Map everything after the MSI-X table */ + start = pi->pi_bar[idx].addr + table_size; + len = pi->pi_bar[idx].size - table_size; + } else { + /* Map everything before the MSI-X table */ + start = pi->pi_bar[idx].addr; + len = pi->pi_msix.table_offset; + } + return vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus, + sc->psc_sel.pc_dev, sc->psc_sel.pc_func, + start, len, base + table_size); +} + +static int +cfginitmsix(struct passthru_softc *sc) +{ + int table_bar; + struct pci_devinst *pi; + + pi = sc->psc_pi; + table_bar = pi->pi_msix.table_bar; + pi->pi_msix.table_gpa = sc->psc_bar[table_bar].addr + pi->pi_msix.table_offset; + pi->pi_msix.table_bar_region = register_emulated_memory(pi->pi_msix.table_gpa, + pi->pi_msix.table_size, + msix_table_read, + msix_table_write, sc); + if (!pi->pi_msix.table_bar_region) + return (-1); + + pi->pi_bar[table_bar].handler = msix_bar_handler; + + return (0); +} + static int cfginitbar(struct vmctx *ctx, struct passthru_softc *sc) { @@ -262,10 +469,13 @@ cfginitbar(struct vmctx *ctx, struct passthru_softc *sc) if (error) return (-1); - /* - * Map the physical MMIO space in the guest MMIO space - */ - if (bartype != PCIBAR_IO) { + /* The MSI-X table needs special handling */ + if (i == pi->pi_msix.table_bar) { + error = init_msix_table(ctx, sc, base); + if (error) + return (-1); + } else if (bartype != PCIBAR_IO) { + /* Map the physical MMIO space in the guest MMIO space */ error = vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, sc->psc_sel.pc_func, pi->pi_bar[i].addr, pi->pi_bar[i].size, base); @@ -299,10 +509,13 @@ cfginit(struct vmctx *ctx, struct pci_devinst *pi, int bus, int slot, int func) sc->psc_sel.pc_dev = slot; sc->psc_sel.pc_func = func; + if (cfginitmsi(sc) != 0) + goto done; + if (cfginitbar(ctx, sc) != 0) goto done; - if (cfginitmsi(sc) != 0) + if (cfginitmsix(sc) != 0) goto done; error = 0; /* success */ @@ -381,6 +594,16 @@ msicap_access(struct passthru_softc *sc, int coff) return (0); } +static int +msixcap_access(struct passthru_softc *sc, int coff) +{ + if (sc->psc_msix.capoff == 0) + return (0); + + return (coff >= sc->psc_msix.capoff && + coff < sc->psc_msix.capoff + MSIX_CAPLEN); +} + static int passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff, int bytes, uint32_t *rv) @@ -416,7 +639,7 @@ static int passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff, int bytes, uint32_t val) { - int error; + int error, msix_table_entries, i; struct passthru_softc *sc; sc = pi->pi_arg; @@ -443,6 +666,27 @@ passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff, return (0); } + if (msixcap_access(sc, coff)) { + msixcap_cfgwrite(pi, sc->psc_msix.capoff, coff, bytes, val); + if (pi->pi_msix.enabled) { + msix_table_entries = pi->pi_msix.table_count; + for (i = 0; i < msix_table_entries; i++) { + error = vm_setup_msix(ctx, vcpu, sc->psc_sel.pc_bus, + sc->psc_sel.pc_dev, + sc->psc_sel.pc_func, i, + pi->pi_msix.table[i].msg_data, + pi->pi_msix.table[i].vector_control, + pi->pi_msix.table[i].addr); + + if (error) { + printf("vm_setup_msix returned error %d\r\n", errno); + exit(1); + } + } + } + return (0); + } + #ifdef LEGACY_SUPPORT /* * If this device does not support MSI natively then we cannot let -- cgit v1.1 From a913fe5916677166a9ec486f83637d7b78eb4ec6 Mon Sep 17 00:00:00 2001 From: grehan Date: Thu, 3 May 2012 03:11:27 +0000 Subject: Add 16550 uart emulation as a PCI device. This allows it to be activated as part of the slot config options. The syntax is: -s ,uart[,stdio] The stdio parameter instructs the code to perform i/o using stdin/stdout. It can only be used for one instance. To allow legacy i/o ports/irqs to be used, a new variant of the slot command, -S, is introduced. When used to specify a slot, the device will use legacy resources if it supports them; otherwise it will be treated the same as the '-s' option. Specifying the -S option with the uart will first use the 0x3f8/irq 4 config, and the second -S will use 0x2F8/irq 3. Interrupt delivery is awaiting the arrival of the i/o apic code, but this works fine in uart(4)'s polled mode. This code was written by Cynthia Lu @ MIT while an intern at NetApp, with further work from neel@ and grehan@. Obtained from: NetApp --- usr.sbin/bhyve/Makefile | 2 +- usr.sbin/bhyve/fbsdrun.c | 8 +- usr.sbin/bhyve/pci_emul.c | 107 ++++++++- usr.sbin/bhyve/pci_emul.h | 36 +-- usr.sbin/bhyve/pci_uart.c | 599 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 725 insertions(+), 27 deletions(-) create mode 100644 usr.sbin/bhyve/pci_uart.c (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile index f64e579..b381b8c 100644 --- a/usr.sbin/bhyve/Makefile +++ b/usr.sbin/bhyve/Makefile @@ -7,7 +7,7 @@ PROG= bhyve SRCS= atpic.c consport.c dbgport.c elcr.c fbsdrun.c inout.c SRCS+= instruction_emul.c mevent.c SRCS+= pci_emul.c pci_hostbridge.c pci_passthru.c pci_virtio_block.c -SRCS+= pci_virtio_net.c pit_8254.c post.c rtc.c uart.c xmsr.c +SRCS+= pci_virtio_net.c pci_uart.c pit_8254.c post.c rtc.c uart.c xmsr.c NO_MAN= diff --git a/usr.sbin/bhyve/fbsdrun.c b/usr.sbin/bhyve/fbsdrun.c index c2295ea..f5a7efc 100644 --- a/usr.sbin/bhyve/fbsdrun.c +++ b/usr.sbin/bhyve/fbsdrun.c @@ -138,6 +138,7 @@ usage(int code) " -h: help\n" " -z: guest hz (default is %d)\n" " -s: PCI slot config\n" + " -S: legacy PCI slot config\n" " -n: PCI slot naming\n" " -m: lowmem in MB\n" " -M: highmem in MB\n" @@ -530,7 +531,7 @@ main(int argc, char *argv[]) gdb_port = DEFAULT_GDB_PORT; guest_ncpus = 1; - while ((c = getopt(argc, argv, "ehBHPxp:g:c:z:s:n:m:M:")) != -1) { + while ((c = getopt(argc, argv, "ehBHPxp:g:c:z:s:S:n:m:M:")) != -1) { switch (c) { case 'B': inject_bkpt = 1; @@ -554,7 +555,10 @@ main(int argc, char *argv[]) guest_tslice = atoi(optarg); break; case 's': - pci_parse_slot(optarg); + pci_parse_slot(optarg, 0); + break; + case 'S': + pci_parse_slot(optarg, 1); break; case 'n': pci_parse_name(optarg); diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c index 9de87ad..eabfb89 100644 --- a/usr.sbin/bhyve/pci_emul.c +++ b/usr.sbin/bhyve/pci_emul.c @@ -64,16 +64,28 @@ do { \ #define MAXSLOTS 32 static struct slotinfo { - char *si_name; - char *si_param; + char *si_name; + char *si_param; struct pci_devinst *si_devi; - int si_titled; - int si_pslot; - char si_prefix; - char si_suffix; + int si_titled; + int si_pslot; + char si_prefix; + char si_suffix; + int si_legacy; } pci_slotinfo[MAXSLOTS]; /* + * Used to keep track of legacy interrupt owners/requestors + */ +#define NLIRQ 16 + +static struct lirqinfo { + int li_generic; + int li_acount; + struct pci_devinst *li_owner; /* XXX should be a list */ +} lirq[NLIRQ]; + +/* * NetApp specific: * struct used to build an in-core OEM table to supply device names * to driver instances @@ -147,7 +159,7 @@ pci_parse_slot_usage(char *aopt) } void -pci_parse_slot(char *opt) +pci_parse_slot(char *opt, int legacy) { char *slot, *emul, *config; char *str, *cpy; @@ -174,6 +186,7 @@ pci_parse_slot(char *opt) } else { pci_slotinfo[snum].si_name = emul; pci_slotinfo[snum].si_param = config; + pci_slotinfo[snum].si_legacy = legacy; } } @@ -377,7 +390,12 @@ pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, uint64_t hostbase, addr = mask = lobits = 0; break; case PCIBAR_IO: - baseptr = &pci_emul_iobase; + if (hostbase && pci_slotinfo[pdi->pi_slot].si_legacy) { + assert(hostbase < PCI_EMUL_IOBASE); + baseptr = &hostbase; + } else { + baseptr = &pci_emul_iobase; + } limit = PCI_EMUL_IOLIMIT; mask = PCIM_BAR_IO_BASE; lobits = PCIM_BAR_IO_SPACE; @@ -729,6 +747,16 @@ init_pci(struct vmctx *ctx) } } pci_finish_mptable_names(); + + /* + * Allow ISA IRQs 5,10,11,12, and 15 to be available for + * generic use + */ + lirq[5].li_generic = 1; + lirq[10].li_generic = 1; + lirq[11].li_generic = 1; + lirq[12].li_generic = 1; + lirq[15].li_generic = 1; } int @@ -757,6 +785,69 @@ pci_generate_msi(struct pci_devinst *pi, int msg) } } +int +pci_is_legacy(struct pci_devinst *pi) +{ + + return (pci_slotinfo[pi->pi_slot].si_legacy); +} + +static int +pci_lintr_alloc(struct pci_devinst *pi, int vec) +{ + int i; + + assert(vec < NLIRQ); + + if (vec == -1) { + for (i = 0; i < NLIRQ; i++) { + if (lirq[i].li_generic && + lirq[i].li_owner == NULL) { + vec = i; + break; + } + } + } else { + if (lirq[i].li_owner != NULL) { + vec = -1; + } + } + assert(vec != -1); + + lirq[vec].li_owner = pi; + pi->pi_lintr_pin = vec; + + return (vec); +} + +int +pci_lintr_request(struct pci_devinst *pi, int vec) +{ + + vec = pci_lintr_alloc(pi, vec); + pci_set_cfgdata8(pi, PCIR_INTLINE, vec); + pci_set_cfgdata8(pi, PCIR_INTPIN, 1); + return (0); +} + +void +pci_lintr_assert(struct pci_devinst *pi) +{ + + assert(pi->pi_lintr_pin); + /* ioapic_assert_pin(pi->pi_vmctx, pi->pi_lintr_pin); */ +} + +void +pci_lintr_deassert(struct pci_devinst *pi) +{ + + assert(pi->pi_lintr_pin); + /* ioapic_deassert_pin(pi->pi_vmctx, pi->pi_lintr_pin); */ +} + + + static int cfgbus, cfgslot, cfgfunc, cfgoff; static int diff --git a/usr.sbin/bhyve/pci_emul.h b/usr.sbin/bhyve/pci_emul.h index 588e5ba..19dba99 100644 --- a/usr.sbin/bhyve/pci_emul.h +++ b/usr.sbin/bhyve/pci_emul.h @@ -102,6 +102,7 @@ struct pci_devinst { struct pci_devemu *pi_d; struct vmctx *pi_vmctx; uint8_t pi_bus, pi_slot, pi_func; + uint8_t pi_lintr_pin; char pi_name[PI_NAMESZ]; uint16_t pi_iobase; int pi_bar_getsize; @@ -149,22 +150,25 @@ struct msixcap { uint32_t pba_offset; } __packed; -void init_pci(struct vmctx *ctx); -void pci_parse_slot(char *opt); -void pci_parse_name(char *opt); -void pci_callback(void); -int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, uint64_t hostbase, - enum pcibar_type type, uint64_t size); -int pci_emul_add_msicap(struct pci_devinst *pi, int msgnum); -void msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, - int bytes, uint32_t val); -void msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, - int bytes, uint32_t val); - -void pci_generate_msi(struct pci_devinst *pi, int msgnum); -int pci_msi_enabled(struct pci_devinst *pi); -int pci_msi_msgnum(struct pci_devinst *pi); -void pci_populate_msicap(struct msicap *cap, int msgs, int nextptr); +void init_pci(struct vmctx *ctx); +void msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, + int bytes, uint32_t val); +void msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, + int bytes, uint32_t val); +void pci_callback(void); +int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, uint64_t hostbase, + enum pcibar_type type, uint64_t size); +int pci_emul_add_msicap(struct pci_devinst *pi, int msgnum); +int pci_is_legacy(struct pci_devinst *pi); +void pci_generate_msi(struct pci_devinst *pi, int msgnum); +void pci_lintr_assert(struct pci_devinst *pi); +void pci_lintr_deassert(struct pci_devinst *pi); +int pci_lintr_request(struct pci_devinst *pi, int ivec); +int pci_msi_enabled(struct pci_devinst *pi); +int pci_msi_msgnum(struct pci_devinst *pi); +void pci_parse_name(char *opt); +void pci_parse_slot(char *opt, int legacy); +void pci_populate_msicap(struct msicap *cap, int msgs, int nextptr); static __inline void pci_set_cfgdata8(struct pci_devinst *pi, int offset, uint8_t val) diff --git a/usr.sbin/bhyve/pci_uart.c b/usr.sbin/bhyve/pci_uart.c new file mode 100644 index 0000000..1cb485e --- /dev/null +++ b/usr.sbin/bhyve/pci_uart.c @@ -0,0 +1,599 @@ +/*- + * Copyright (c) 2012 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "fbsdrun.h" +#include "pci_emul.h" +#include "mevent.h" + +#define COM1_BASE 0x3F8 +#define COM1_IRQ 4 +#define COM2_BASE 0x2F8 +#define COM2_IRQ 3 + +#define DEFAULT_RCLK 1843200 +#define DEFAULT_BAUD 9600 + +#define FCR_RX_MASK 0xC0 + +#define MCR_OUT1 0x04 +#define MCR_OUT2 0x08 + +#define MSR_DELTA_MASK 0x0f + +#ifndef REG_SCR +#define REG_SCR com_scr +#endif + +#define FIFOSZ 16 + +/* + * Pick a PCI vid/did of a chip with a single uart at + * BAR0, that most versions of FreeBSD can understand: + * Siig CyberSerial 1-port. + */ +#define COM_VENDOR 0x131f +#define COM_DEV 0x2000 + +static int pci_uart_stdio; /* stdio in use for i/o */ + +static int pci_uart_nldevs; /* number of legacy devices - 2 max */ + +static struct { + uint64_t baddr; + int vector; +} pci_uart_lres[] = { + { COM1_BASE, COM1_IRQ}, + { COM2_BASE, COM2_IRQ}, + { 0, 0 } +}; + +struct fifo { + uint8_t buf[FIFOSZ]; + int rindex; /* index to read from */ + int windex; /* index to write to */ + int num; /* number of characters in the fifo */ + int size; /* size of the fifo */ +}; + +struct pci_uart_softc { + struct pci_devinst *pi; + uint8_t data; /* Data register (R/W) */ + uint8_t ier; /* Interrupt enable register (R/W) */ + uint8_t lcr; /* Line control register (R/W) */ + uint8_t mcr; /* Modem control register (R/W) */ + uint8_t lsr; /* Line status register (R/W) */ + uint8_t msr; /* Modem status register (R/W) */ + uint8_t fcr; /* FIFO control register (W) */ + uint8_t scr; /* Scratch register (R/W) */ + + uint8_t dll; /* Baudrate divisor latch LSB */ + uint8_t dlh; /* Baudrate divisor latch MSB */ + + struct fifo rxfifo; + + int opened; + int stdio; + bool thre_int_pending; /* THRE interrupt pending */ +}; + +static void pci_uart_drain(int fd, enum ev_type ev, void *arg); + +static struct termios tio_orig, tio_new; /* I/O Terminals */ + +static void +ttyclose(void) +{ + tcsetattr(STDIN_FILENO, TCSANOW, &tio_orig); +} + +static void +ttyopen(void) +{ + tcgetattr(STDIN_FILENO, &tio_orig); + + cfmakeraw(&tio_new); + tcsetattr(STDIN_FILENO, TCSANOW, &tio_new); + + atexit(ttyclose); +} + +static bool +tty_char_available(void) +{ + fd_set rfds; + struct timeval tv; + + FD_ZERO(&rfds); + FD_SET(STDIN_FILENO, &rfds); + tv.tv_sec = 0; + tv.tv_usec = 0; + if (select(STDIN_FILENO + 1, &rfds, NULL, NULL, &tv) > 0 ) { + return (true); + } else { + return (false); + } +} + +static int +ttyread(void) +{ + char rb; + + if (tty_char_available()) { + read(STDIN_FILENO, &rb, 1); + return (rb & 0xff); + } else { + return (-1); + } +} + +static void +ttywrite(unsigned char wb) +{ + (void) write(STDIN_FILENO, &wb, 1); +} + +static void +fifo_reset(struct fifo *fifo, int size) +{ + bzero(fifo, sizeof(struct fifo)); + fifo->size = size; +} + +static int +fifo_putchar(struct fifo *fifo, uint8_t ch) +{ + + if (fifo->num < fifo->size) { + fifo->buf[fifo->windex] = ch; + fifo->windex = (fifo->windex + 1) % fifo->size; + fifo->num++; + return (0); + } else + return (-1); +} + +static int +fifo_getchar(struct fifo *fifo) +{ + int c; + + if (fifo->num > 0) { + c = fifo->buf[fifo->rindex]; + fifo->rindex = (fifo->rindex + 1) % fifo->size; + fifo->num--; + return (c); + } else + return (-1); +} + +static int +fifo_numchars(struct fifo *fifo) +{ + + return (fifo->num); +} + +static void +pci_uart_opentty(struct pci_uart_softc *sc) +{ + struct mevent *mev; + + assert(sc->opened == 0); + assert(sc->stdio); + + ttyopen(); + mev = mevent_add(STDIN_FILENO, EVF_READ, pci_uart_drain, sc); + assert(mev); +} + +static void +pci_uart_legacy_res(uint64_t *bar, int *ivec) +{ + if (pci_uart_lres[pci_uart_nldevs].baddr != 0) { + *bar = pci_uart_lres[pci_uart_nldevs].baddr; + *ivec = pci_uart_lres[pci_uart_nldevs].vector; + pci_uart_nldevs++; + } else { + /* TODO: print warning ? */ + *bar = 0; + *ivec= -1; + } +} + +/* + * The IIR returns a prioritized interrupt reason: + * - receive data available + * - transmit holding register empty + * - modem status change + * + * Return an interrupt reason if one is available. + */ +static int +pci_uart_intr_reason(struct pci_uart_softc *sc) +{ + + if ((sc->lsr & LSR_OE) != 0 && (sc->ier & IER_ERLS) != 0) + return (IIR_RLS); + else if (fifo_numchars(&sc->rxfifo) > 0 && (sc->ier & IER_ERXRDY) != 0) + return (IIR_RXTOUT); + else if (sc->thre_int_pending && (sc->ier & IER_ETXRDY) != 0) + return (IIR_TXRDY); + else if ((sc->msr & MSR_DELTA_MASK) != 0 && (sc->ier & IER_EMSC) != 0) + return (IIR_MLSC); + else + return (IIR_NOPEND); +} + +static void +pci_uart_reset(struct pci_uart_softc *sc) +{ + uint16_t divisor; + + divisor = DEFAULT_RCLK / DEFAULT_BAUD / 16; + sc->dll = divisor; + sc->dlh = divisor >> 16; + + fifo_reset(&sc->rxfifo, 1); /* no fifo until enabled by software */ +} + +/* + * Toggle the COM port's intr pin depending on whether or not we have an + * interrupt condition to report to the processor. + */ +static void +pci_uart_toggle_intr(struct pci_uart_softc *sc) +{ + uint8_t intr_reason; + + intr_reason = pci_uart_intr_reason(sc); + + if (intr_reason == IIR_NOPEND) + pci_lintr_deassert(sc->pi); + else + pci_lintr_assert(sc->pi); +} + +static void +pci_uart_drain(int fd, enum ev_type ev, void *arg) +{ + struct pci_uart_softc *sc; + int ch; + + sc = arg; + + assert(fd == STDIN_FILENO); + assert(ev == EVF_READ); + + while ((ch = ttyread()) != -1) { + /* + * If we are in loopback mode then drop this character. + */ + if ((sc->mcr & MCR_LOOPBACK) != 0) + continue; + + if (fifo_putchar(&sc->rxfifo, ch) != 0) + sc->lsr |= LSR_OE; + } + + pci_uart_toggle_intr(sc); +} + +static void +pci_uart_write(struct pci_devinst *pi, int baridx, int offset, int size, + uint32_t value) +{ + struct pci_uart_softc *sc; + int fifosz; + uint8_t msr; + + sc = pi->pi_arg; + + assert(size == 1); + + /* Open terminal */ + if (!sc->opened && sc->stdio) { + pci_uart_opentty(sc); + sc->opened = 1; + } + + /* + * Take care of the special case DLAB accesses first + */ + if ((sc->lcr & LCR_DLAB) != 0) { + if (offset == REG_DLL) { + sc->dll = value; + goto done; + } + + if (offset == REG_DLH) { + sc->dlh = value; + goto done; + } + } + + switch (offset) { + case REG_DATA: + if (sc->mcr & MCR_LOOPBACK) { + if (fifo_putchar(&sc->rxfifo, value) != 0) + sc->lsr |= LSR_OE; + } else if (sc->stdio) { + ttywrite(value); + } /* else drop on floor */ + sc->thre_int_pending = true; + break; + case REG_IER: + /* + * Apply mask so that bits 4-7 are 0 + * Also enables bits 0-3 only if they're 1 + */ + sc->ier = value & 0x0F; + break; + case REG_FCR: + /* + * When moving from FIFO and 16450 mode and vice versa, + * the FIFO contents are reset. + */ + if ((sc->fcr & FCR_ENABLE) ^ (value & FCR_ENABLE)) { + fifosz = (value & FCR_ENABLE) ? FIFOSZ : 1; + fifo_reset(&sc->rxfifo, fifosz); + } + + /* + * The FCR_ENABLE bit must be '1' for the programming + * of other FCR bits to be effective. + */ + if ((value & FCR_ENABLE) == 0) { + sc->fcr = 0; + } else { + if ((value & FCR_RCV_RST) != 0) + fifo_reset(&sc->rxfifo, FIFOSZ); + + sc->fcr = value & + (FCR_ENABLE | FCR_DMA | FCR_RX_MASK); + } + break; + case REG_LCR: + sc->lcr = value; + break; + case REG_MCR: + /* Apply mask so that bits 5-7 are 0 */ + sc->mcr = value & 0x1F; + + msr = 0; + if (sc->mcr & MCR_LOOPBACK) { + /* + * In the loopback mode certain bits from the + * MCR are reflected back into MSR + */ + if (sc->mcr & MCR_RTS) + msr |= MSR_CTS; + if (sc->mcr & MCR_DTR) + msr |= MSR_DSR; + if (sc->mcr & MCR_OUT1) + msr |= MSR_RI; + if (sc->mcr & MCR_OUT2) + msr |= MSR_DCD; + } + + /* + * Detect if there has been any change between the + * previous and the new value of MSR. If there is + * then assert the appropriate MSR delta bit. + */ + if ((msr & MSR_CTS) ^ (sc->msr & MSR_CTS)) + sc->msr |= MSR_DCTS; + if ((msr & MSR_DSR) ^ (sc->msr & MSR_DSR)) + sc->msr |= MSR_DDSR; + if ((msr & MSR_DCD) ^ (sc->msr & MSR_DCD)) + sc->msr |= MSR_DDCD; + if ((sc->msr & MSR_RI) != 0 && (msr & MSR_RI) == 0) + sc->msr |= MSR_TERI; + + /* + * Update the value of MSR while retaining the delta + * bits. + */ + sc->msr &= MSR_DELTA_MASK; + sc->msr |= msr; + break; + case REG_LSR: + /* + * Line status register is not meant to be written to + * during normal operation. + */ + break; + case REG_MSR: + /* + * As far as I can tell MSR is a read-only register. + */ + break; + case REG_SCR: + sc->scr = value; + break; + default: + break; + } + +done: + pci_uart_toggle_intr(sc); +} + +uint32_t +pci_uart_read(struct pci_devinst *pi, int baridx, int offset, int size) +{ + struct pci_uart_softc *sc; + uint8_t iir, intr_reason; + uint32_t reg; + + sc = pi->pi_arg; + + assert(size == 1); + + /* Open terminal */ + if (!sc->opened && sc->stdio) { + pci_uart_opentty(sc); + sc->opened = 1; + } + + /* + * Take care of the special case DLAB accesses first + */ + if ((sc->lcr & LCR_DLAB) != 0) { + if (offset == REG_DLL) { + reg = sc->dll; + goto done; + } + + if (offset == REG_DLH) { + reg = sc->dlh; + goto done; + } + } + + switch (offset) { + case REG_DATA: + reg = fifo_getchar(&sc->rxfifo); + break; + case REG_IER: + reg = sc->ier; + break; + case REG_IIR: + iir = (sc->fcr & FCR_ENABLE) ? IIR_FIFO_MASK : 0; + + intr_reason = pci_uart_intr_reason(sc); + + /* + * Deal with side effects of reading the IIR register + */ + if (intr_reason == IIR_TXRDY) + sc->thre_int_pending = false; + + iir |= intr_reason; + + reg = iir; + break; + case REG_LCR: + reg = sc->lcr; + break; + case REG_MCR: + reg = sc->mcr; + break; + case REG_LSR: + /* Transmitter is always ready for more data */ + sc->lsr |= LSR_TEMT | LSR_THRE; + + /* Check for new receive data */ + if (fifo_numchars(&sc->rxfifo) > 0) + sc->lsr |= LSR_RXRDY; + else + sc->lsr &= ~LSR_RXRDY; + + reg = sc->lsr; + + /* The LSR_OE bit is cleared on LSR read */ + sc->lsr &= ~LSR_OE; + break; + case REG_MSR: + /* + * MSR delta bits are cleared on read + */ + reg = sc->msr; + sc->msr &= ~MSR_DELTA_MASK; + break; + case REG_SCR: + reg = sc->scr; + break; + default: + reg = 0xFF; + break; + } + +done: + pci_uart_toggle_intr(sc); + return (reg); +} + +static int +pci_uart_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) +{ + struct pci_uart_softc *sc; + uint64_t bar; + int ivec; + + sc = malloc(sizeof(struct pci_uart_softc)); + memset(sc, 0, sizeof(struct pci_uart_softc)); + + pi->pi_arg = sc; + sc->pi = pi; + + /* initialize config space */ + pci_set_cfgdata16(pi, PCIR_DEVICE, COM_DEV); + pci_set_cfgdata16(pi, PCIR_VENDOR, COM_VENDOR); + pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_SIMPLECOMM); + if (pci_is_legacy(pi)) { + pci_uart_legacy_res(&bar, &ivec); + } else { + bar = 0; + ivec = -1; + } + pci_emul_alloc_bar(pi, 0, bar, PCIBAR_IO, 8); + pci_lintr_request(pi, ivec); + + if (!strcmp("stdio", opts) && !pci_uart_stdio) { + pci_uart_stdio = 1; + sc->stdio = 1; + } + + pci_uart_reset(sc); + + return (0); +} + +struct pci_devemu pci_de_com = { + .pe_emu = "uart", + .pe_init = pci_uart_init, + .pe_iow = pci_uart_write, + .pe_ior = pci_uart_read, +}; +PCI_EMUL_SET(pci_de_com); -- cgit v1.1 From 32b8268faaa2a9f5334563bbd7936b6f0362874a Mon Sep 17 00:00:00 2001 From: grehan Date: Thu, 12 Jul 2012 00:27:33 +0000 Subject: Add --get-all option to dump all readable parameters. Submitted by: Takuya Asada syuu at dokukino dot com Reviewed by: grehan, neel Obtained from: GSoC 12 bhyve project --- usr.sbin/vmmctl/vmmctl.c | 176 ++++++++++++++++++++++++----------------------- 1 file changed, 90 insertions(+), 86 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/vmmctl/vmmctl.c b/usr.sbin/vmmctl/vmmctl.c index 346cde1..c0e0402 100644 --- a/usr.sbin/vmmctl/vmmctl.c +++ b/usr.sbin/vmmctl/vmmctl.c @@ -68,6 +68,7 @@ usage(void) " [--cpu=]\n" " [--create]\n" " [--destroy]\n" + " [--get-all]\n" " [--get-stats]\n" " [--set-desc-ds]\n" " [--get-desc-ds]\n" @@ -245,6 +246,8 @@ static int get_vmcs_exit_interruption_info, get_vmcs_exit_interruption_error; static uint64_t desc_base; static uint32_t desc_limit, desc_access; +static int get_all; + static void dump_vm_run_exitcode(struct vm_exit *vmexit, int vcpu) { @@ -544,6 +547,7 @@ main(int argc, char *argv[]) { "get-vmcs-interruptibility", NO_ARG, &get_vmcs_interruptibility, 1 }, { "get-pinning",NO_ARG, &get_pinning, 1 }, + { "get-all", NO_ARG, &get_all, 1 }, { "run", NO_ARG, &run, 1 }, { "create", NO_ARG, &create, 1 }, { "destroy", NO_ARG, &destroy, 1 }, @@ -810,158 +814,158 @@ main(int argc, char *argv[]) vmcs_entry_interruption_info); } - if (!error && get_lowmem) { + if (!error && (get_lowmem || get_all)) { error = vm_get_memory_seg(ctx, 0, &hpa, &len); if (error == 0) printf("lowmem\t\t0x%016lx/%ld\n", hpa, len); } - if (!error && get_highmem) { + if (!error && (get_highmem || get_all)) { error = vm_get_memory_seg(ctx, 4 * GB, &hpa, &len); if (error == 0) printf("highmem\t\t0x%016lx/%ld\n", hpa, len); } - if (!error && get_efer) { + if (!error && (get_efer || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_EFER, &efer); if (error == 0) printf("efer[%d]\t\t0x%016lx\n", vcpu, efer); } - if (!error && get_cr0) { + if (!error && (get_cr0 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR0, &cr0); if (error == 0) printf("cr0[%d]\t\t0x%016lx\n", vcpu, cr0); } - if (!error && get_cr3) { + if (!error && (get_cr3 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR3, &cr3); if (error == 0) printf("cr3[%d]\t\t0x%016lx\n", vcpu, cr3); } - if (!error && get_cr4) { + if (!error && (get_cr4 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR4, &cr4); if (error == 0) printf("cr4[%d]\t\t0x%016lx\n", vcpu, cr4); } - if (!error && get_dr7) { + if (!error && (get_dr7 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DR7, &dr7); if (error == 0) printf("dr7[%d]\t\t0x%016lx\n", vcpu, dr7); } - if (!error && get_rsp) { + if (!error && (get_rsp || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RSP, &rsp); if (error == 0) printf("rsp[%d]\t\t0x%016lx\n", vcpu, rsp); } - if (!error && get_rip) { + if (!error && (get_rip || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip); if (error == 0) printf("rip[%d]\t\t0x%016lx\n", vcpu, rip); } - if (!error && get_rax) { + if (!error && (get_rax || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RAX, &rax); if (error == 0) printf("rax[%d]\t\t0x%016lx\n", vcpu, rax); } - if (!error && get_rbx) { + if (!error && (get_rbx || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RBX, &rbx); if (error == 0) printf("rbx[%d]\t\t0x%016lx\n", vcpu, rbx); } - if (!error && get_rcx) { + if (!error && (get_rcx || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RCX, &rcx); if (error == 0) printf("rcx[%d]\t\t0x%016lx\n", vcpu, rcx); } - if (!error && get_rdx) { + if (!error && (get_rdx || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RDX, &rdx); if (error == 0) printf("rdx[%d]\t\t0x%016lx\n", vcpu, rdx); } - if (!error && get_rsi) { + if (!error && (get_rsi || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RSI, &rsi); if (error == 0) printf("rsi[%d]\t\t0x%016lx\n", vcpu, rsi); } - if (!error && get_rdi) { + if (!error && (get_rdi || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RDI, &rdi); if (error == 0) printf("rdi[%d]\t\t0x%016lx\n", vcpu, rdi); } - if (!error && get_rbp) { + if (!error && (get_rbp || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RBP, &rbp); if (error == 0) printf("rbp[%d]\t\t0x%016lx\n", vcpu, rbp); } - if (!error && get_r8) { + if (!error && (get_r8 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R8, &r8); if (error == 0) printf("r8[%d]\t\t0x%016lx\n", vcpu, r8); } - if (!error && get_r9) { + if (!error && (get_r9 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R9, &r9); if (error == 0) printf("r9[%d]\t\t0x%016lx\n", vcpu, r9); } - if (!error && get_r10) { + if (!error && (get_r10 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R10, &r10); if (error == 0) printf("r10[%d]\t\t0x%016lx\n", vcpu, r10); } - if (!error && get_r11) { + if (!error && (get_r11 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R11, &r11); if (error == 0) printf("r11[%d]\t\t0x%016lx\n", vcpu, r11); } - if (!error && get_r12) { + if (!error && (get_r12 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R12, &r12); if (error == 0) printf("r12[%d]\t\t0x%016lx\n", vcpu, r12); } - if (!error && get_r13) { + if (!error && (get_r13 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R13, &r13); if (error == 0) printf("r13[%d]\t\t0x%016lx\n", vcpu, r13); } - if (!error && get_r14) { + if (!error && (get_r14 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R14, &r14); if (error == 0) printf("r14[%d]\t\t0x%016lx\n", vcpu, r14); } - if (!error && get_r15) { + if (!error && (get_r15 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R15, &r15); if (error == 0) printf("r15[%d]\t\t0x%016lx\n", vcpu, r15); } - if (!error && get_rflags) { + if (!error && (get_rflags || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RFLAGS, &rflags); if (error == 0) printf("rflags[%d]\t0x%016lx\n", vcpu, rflags); } - if (!error && get_stats) { + if (!error && (get_stats || get_all)) { int i, num_stats; uint64_t *stats; struct timeval tv; @@ -977,7 +981,7 @@ main(int argc, char *argv[]) } } - if (!error && get_desc_ds) { + if (!error && (get_desc_ds || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_DS, &desc_base, &desc_limit, &desc_access); if (error == 0) { @@ -986,7 +990,7 @@ main(int argc, char *argv[]) } } - if (!error && get_desc_es) { + if (!error && (get_desc_es || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_ES, &desc_base, &desc_limit, &desc_access); if (error == 0) { @@ -995,7 +999,7 @@ main(int argc, char *argv[]) } } - if (!error && get_desc_fs) { + if (!error && (get_desc_fs || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_FS, &desc_base, &desc_limit, &desc_access); if (error == 0) { @@ -1004,7 +1008,7 @@ main(int argc, char *argv[]) } } - if (!error && get_desc_gs) { + if (!error && (get_desc_gs || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_GS, &desc_base, &desc_limit, &desc_access); if (error == 0) { @@ -1013,7 +1017,7 @@ main(int argc, char *argv[]) } } - if (!error && get_desc_ss) { + if (!error && (get_desc_ss || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_SS, &desc_base, &desc_limit, &desc_access); if (error == 0) { @@ -1022,7 +1026,7 @@ main(int argc, char *argv[]) } } - if (!error && get_desc_cs) { + if (!error && (get_desc_cs || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_CS, &desc_base, &desc_limit, &desc_access); if (error == 0) { @@ -1031,7 +1035,7 @@ main(int argc, char *argv[]) } } - if (!error && get_desc_tr) { + if (!error && (get_desc_tr || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_TR, &desc_base, &desc_limit, &desc_access); if (error == 0) { @@ -1040,7 +1044,7 @@ main(int argc, char *argv[]) } } - if (!error && get_desc_ldtr) { + if (!error && (get_desc_ldtr || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_LDTR, &desc_base, &desc_limit, &desc_access); if (error == 0) { @@ -1049,7 +1053,7 @@ main(int argc, char *argv[]) } } - if (!error && get_desc_gdtr) { + if (!error && (get_desc_gdtr || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_GDTR, &desc_base, &desc_limit, &desc_access); if (error == 0) { @@ -1058,7 +1062,7 @@ main(int argc, char *argv[]) } } - if (!error && get_desc_idtr) { + if (!error && (get_desc_idtr || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_IDTR, &desc_base, &desc_limit, &desc_access); if (error == 0) { @@ -1067,55 +1071,55 @@ main(int argc, char *argv[]) } } - if (!error && get_cs) { + if (!error && (get_cs || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CS, &cs); if (error == 0) printf("cs[%d]\t\t0x%04lx\n", vcpu, cs); } - if (!error && get_ds) { + if (!error && (get_ds || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DS, &ds); if (error == 0) printf("ds[%d]\t\t0x%04lx\n", vcpu, ds); } - if (!error && get_es) { + if (!error && (get_es || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_ES, &es); if (error == 0) printf("es[%d]\t\t0x%04lx\n", vcpu, es); } - if (!error && get_fs) { + if (!error && (get_fs || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_FS, &fs); if (error == 0) printf("fs[%d]\t\t0x%04lx\n", vcpu, fs); } - if (!error && get_gs) { + if (!error && (get_gs || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_GS, &gs); if (error == 0) printf("gs[%d]\t\t0x%04lx\n", vcpu, gs); } - if (!error && get_ss) { + if (!error && (get_ss || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_SS, &ss); if (error == 0) printf("ss[%d]\t\t0x%04lx\n", vcpu, ss); } - if (!error && get_tr) { + if (!error && (get_tr || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_TR, &tr); if (error == 0) printf("tr[%d]\t\t0x%04lx\n", vcpu, tr); } - if (!error && get_ldtr) { + if (!error && (get_ldtr || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_LDTR, &ldtr); if (error == 0) printf("ldtr[%d]\t\t0x%04lx\n", vcpu, ldtr); } - if (!error && get_pinning) { + if (!error && (get_pinning || get_all)) { error = vm_get_pinning(ctx, vcpu, &pincpu); if (error == 0) { if (pincpu < 0) @@ -1125,41 +1129,41 @@ main(int argc, char *argv[]) } } - if (!error && get_pinbased_ctls) { + if (!error && (get_pinbased_ctls || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_PIN_BASED_CTLS, &ctl); if (error == 0) printf("pinbased_ctls[%d]\t0x%08lx\n", vcpu, ctl); } - if (!error && get_procbased_ctls) { + if (!error && (get_procbased_ctls || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_PRI_PROC_BASED_CTLS, &ctl); if (error == 0) printf("procbased_ctls[%d]\t0x%08lx\n", vcpu, ctl); } - if (!error && get_procbased_ctls2) { + if (!error && (get_procbased_ctls2 || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_SEC_PROC_BASED_CTLS, &ctl); if (error == 0) printf("procbased_ctls2[%d]\t0x%08lx\n", vcpu, ctl); } - if (!error && get_vmcs_gla) { + if (!error && (get_vmcs_gla || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_LINEAR_ADDRESS, &u64); if (error == 0) printf("gla[%d]\t\t0x%016lx\n", vcpu, u64); } - if (!error && get_vmcs_gpa) { + if (!error && (get_vmcs_gpa || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_PHYSICAL_ADDRESS, &u64); if (error == 0) printf("gpa[%d]\t\t0x%016lx\n", vcpu, u64); } - if (!error && get_vmcs_entry_interruption_info) { + if (!error && (get_vmcs_entry_interruption_info || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_ENTRY_INTR_INFO,&u64); if (error == 0) { printf("entry_interruption_info[%d]\t0x%08lx\n", @@ -1167,20 +1171,20 @@ main(int argc, char *argv[]) } } - if (!error && get_eptp) { + if (!error && (get_eptp || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_EPTP, &eptp); if (error == 0) printf("eptp[%d]\t\t0x%016lx\n", vcpu, eptp); } - if (!error && get_exception_bitmap) { + if (!error && (get_exception_bitmap || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXCEPTION_BITMAP, &bm); if (error == 0) printf("exception_bitmap[%d]\t0x%08lx\n", vcpu, bm); } - if (!error && get_io_bitmap) { + if (!error && (get_io_bitmap || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_IO_BITMAP_A, &bm); if (error == 0) printf("io_bitmap_a[%d]\t0x%08lx\n", vcpu, bm); @@ -1189,21 +1193,21 @@ main(int argc, char *argv[]) printf("io_bitmap_b[%d]\t0x%08lx\n", vcpu, bm); } - if (!error && get_tsc_offset) { + if (!error && (get_tsc_offset || get_all)) { uint64_t tscoff; error = vm_get_vmcs_field(ctx, vcpu, VMCS_TSC_OFFSET, &tscoff); if (error == 0) printf("tsc_offset[%d]\t0x%016lx\n", vcpu, tscoff); } - if (!error && get_cr0_mask) { + if (!error && (get_cr0_mask || get_all)) { uint64_t cr0mask; error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR0_MASK, &cr0mask); if (error == 0) printf("cr0_mask[%d]\t\t0x%016lx\n", vcpu, cr0mask); } - if (!error && get_cr0_shadow) { + if (!error && (get_cr0_shadow || get_all)) { uint64_t cr0shadow; error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR0_SHADOW, &cr0shadow); @@ -1211,14 +1215,14 @@ main(int argc, char *argv[]) printf("cr0_shadow[%d]\t\t0x%016lx\n", vcpu, cr0shadow); } - if (!error && get_cr4_mask) { + if (!error && (get_cr4_mask || get_all)) { uint64_t cr4mask; error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR4_MASK, &cr4mask); if (error == 0) printf("cr4_mask[%d]\t\t0x%016lx\n", vcpu, cr4mask); } - if (!error && get_cr4_shadow) { + if (!error && (get_cr4_shadow || get_all)) { uint64_t cr4shadow; error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR4_SHADOW, &cr4shadow); @@ -1226,7 +1230,7 @@ main(int argc, char *argv[]) printf("cr4_shadow[%d]\t\t0x%016lx\n", vcpu, cr4shadow); } - if (!error && get_cr3_targets) { + if (!error && (get_cr3_targets || get_all)) { uint64_t target_count, target_addr; error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET_COUNT, &target_count); @@ -1264,19 +1268,19 @@ main(int argc, char *argv[]) } } - if (!error && get_apic_access_addr) { + if (!error && (get_apic_access_addr || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_APIC_ACCESS, &addr); if (error == 0) printf("apic_access_addr[%d]\t0x%016lx\n", vcpu, addr); } - if (!error && get_virtual_apic_addr) { + if (!error && (get_virtual_apic_addr || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_VIRTUAL_APIC, &addr); if (error == 0) printf("virtual_apic_addr[%d]\t0x%016lx\n", vcpu, addr); } - if (!error && get_tpr_threshold) { + if (!error && (get_tpr_threshold || get_all)) { uint64_t threshold; error = vm_get_vmcs_field(ctx, vcpu, VMCS_TPR_THRESHOLD, &threshold); @@ -1284,40 +1288,40 @@ main(int argc, char *argv[]) printf("tpr_threshold[%d]\t0x%08lx\n", vcpu, threshold); } - if (!error && get_msr_bitmap_address) { + if (!error && (get_msr_bitmap_address || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_MSR_BITMAP, &addr); if (error == 0) printf("msr_bitmap[%d]\t\t0x%016lx\n", vcpu, addr); } - if (!error && get_msr_bitmap) { + if (!error && (get_msr_bitmap || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_MSR_BITMAP, &addr); if (error == 0) error = dump_vmcs_msr_bitmap(vcpu, addr); } - if (!error && get_vpid) { + if (!error && (get_vpid || get_all)) { uint64_t vpid; error = vm_get_vmcs_field(ctx, vcpu, VMCS_VPID, &vpid); if (error == 0) printf("vpid[%d]\t\t0x%04lx\n", vcpu, vpid); } - if (!error && get_ple_window) { + if (!error && (get_ple_window || get_all)) { uint64_t window; error = vm_get_vmcs_field(ctx, vcpu, VMCS_PLE_WINDOW, &window); if (error == 0) printf("ple_window[%d]\t\t0x%08lx\n", vcpu, window); } - if (!error && get_ple_gap) { + if (!error && (get_ple_gap || get_all)) { uint64_t gap; error = vm_get_vmcs_field(ctx, vcpu, VMCS_PLE_GAP, &gap); if (error == 0) printf("ple_gap[%d]\t\t0x%08lx\n", vcpu, gap); } - if (!error && get_inst_err) { + if (!error && (get_inst_err || get_all)) { uint64_t insterr; error = vm_get_vmcs_field(ctx, vcpu, VMCS_INSTRUCTION_ERROR, &insterr); @@ -1327,61 +1331,61 @@ main(int argc, char *argv[]) } } - if (!error && get_exit_ctls) { + if (!error && (get_exit_ctls || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_CTLS, &ctl); if (error == 0) printf("exit_ctls[%d]\t\t0x%08lx\n", vcpu, ctl); } - if (!error && get_entry_ctls) { + if (!error && (get_entry_ctls || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_ENTRY_CTLS, &ctl); if (error == 0) printf("entry_ctls[%d]\t\t0x%08lx\n", vcpu, ctl); } - if (!error && get_host_pat) { + if (!error && (get_host_pat || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_IA32_PAT, &pat); if (error == 0) printf("host_pat[%d]\t\t0x%016lx\n", vcpu, pat); } - if (!error && get_guest_pat) { + if (!error && (get_guest_pat || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_IA32_PAT, &pat); if (error == 0) printf("guest_pat[%d]\t\t0x%016lx\n", vcpu, pat); } - if (!error && get_host_cr0) { + if (!error && (get_host_cr0 || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR0, &cr0); if (error == 0) printf("host_cr0[%d]\t\t0x%016lx\n", vcpu, cr0); } - if (!error && get_host_cr3) { + if (!error && (get_host_cr3 || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR3, &cr3); if (error == 0) printf("host_cr3[%d]\t\t0x%016lx\n", vcpu, cr3); } - if (!error && get_host_cr4) { + if (!error && (get_host_cr4 || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR4, &cr4); if (error == 0) printf("host_cr4[%d]\t\t0x%016lx\n", vcpu, cr4); } - if (!error && get_host_rip) { + if (!error && (get_host_rip || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_RIP, &rip); if (error == 0) printf("host_rip[%d]\t\t0x%016lx\n", vcpu, rip); } - if (!error && get_host_rsp) { + if (!error && (get_host_rsp || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_RSP, &rsp); if (error == 0) printf("host_rip[%d]\t\t0x%016lx\n", vcpu, rsp); } - if (!error && get_guest_sysenter) { + if (!error && (get_guest_sysenter || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_IA32_SYSENTER_CS, &cs); if (error == 0) @@ -1397,19 +1401,19 @@ main(int argc, char *argv[]) printf("guest_sysenter_ip[%d]\t0x%016lx\n", vcpu, rip); } - if (!error && get_vmcs_link) { + if (!error && (get_vmcs_link || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_LINK_POINTER, &addr); if (error == 0) printf("vmcs_pointer[%d]\t0x%016lx\n", vcpu, addr); } - if (!error && get_vmcs_exit_reason) { + if (!error && (get_vmcs_exit_reason || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_REASON, &u64); if (error == 0) printf("vmcs_exit_reason[%d]\t0x%016lx\n", vcpu, u64); } - if (!error && get_vmcs_exit_qualification) { + if (!error && (get_vmcs_exit_qualification || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_QUALIFICATION, &u64); if (error == 0) @@ -1417,7 +1421,7 @@ main(int argc, char *argv[]) vcpu, u64); } - if (!error && get_vmcs_exit_interruption_info) { + if (!error && (get_vmcs_exit_interruption_info || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_INTERRUPTION_INFO, &u64); if (error == 0) { @@ -1426,7 +1430,7 @@ main(int argc, char *argv[]) } } - if (!error && get_vmcs_exit_interruption_error) { + if (!error && (get_vmcs_exit_interruption_error || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_INTERRUPTION_ERROR, &u64); if (error == 0) { @@ -1435,7 +1439,7 @@ main(int argc, char *argv[]) } } - if (!error && get_vmcs_interruptibility) { + if (!error && (get_vmcs_interruptibility || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_INTERRUPTIBILITY, &u64); if (error == 0) { @@ -1452,7 +1456,7 @@ main(int argc, char *argv[]) printf("Capability \"%s\" is not available\n", capname); } - if (!error && getcap) { + if (!error && (getcap || get_all)) { int captype, val; captype = vm_capability_name2type(capname); error = vm_get_capability(ctx, vcpu, captype, &val); -- cgit v1.1 From 9e8084ff867746055e5b7f4290819f36ab3461a3 Mon Sep 17 00:00:00 2001 From: neel Date: Sat, 4 Aug 2012 04:24:41 +0000 Subject: Check that 'opts' is actually not NULL before dereferencing it. It is expected that 'opts' will be NULL for the second serial port (-S ,uart) --- usr.sbin/bhyve/pci_uart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/pci_uart.c b/usr.sbin/bhyve/pci_uart.c index 1cb485e..0f8a281 100644 --- a/usr.sbin/bhyve/pci_uart.c +++ b/usr.sbin/bhyve/pci_uart.c @@ -580,7 +580,7 @@ pci_uart_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) pci_emul_alloc_bar(pi, 0, bar, PCIBAR_IO, 8); pci_lintr_request(pi, ivec); - if (!strcmp("stdio", opts) && !pci_uart_stdio) { + if (opts != NULL && !strcmp("stdio", opts) && !pci_uart_stdio) { pci_uart_stdio = 1; sc->stdio = 1; } -- cgit v1.1 From 5b0a5213170e5aaff34703c1e244b4b983c70962 Mon Sep 17 00:00:00 2001 From: neel Date: Sat, 4 Aug 2012 04:26:17 +0000 Subject: Use the correct variable to index into the 'lirq[]' array to check the legacy IRQ ownership. --- usr.sbin/bhyve/pci_emul.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c index eabfb89..faaea73 100644 --- a/usr.sbin/bhyve/pci_emul.c +++ b/usr.sbin/bhyve/pci_emul.c @@ -808,7 +808,7 @@ pci_lintr_alloc(struct pci_devinst *pi, int vec) } } } else { - if (lirq[i].li_owner != NULL) { + if (lirq[vec].li_owner != NULL) { vec = -1; } } -- cgit v1.1 From 72240bed65f0a58f17a96e6a3931f12e92eb37f1 Mon Sep 17 00:00:00 2001 From: neel Date: Sat, 4 Aug 2012 22:48:04 +0000 Subject: Add the "-I" option to control whether or not an ioapic is visible to the guest. Obtained from: NetApp --- usr.sbin/bhyve/fbsdrun.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/fbsdrun.c b/usr.sbin/bhyve/fbsdrun.c index f5a7efc..cb34477 100644 --- a/usr.sbin/bhyve/fbsdrun.c +++ b/usr.sbin/bhyve/fbsdrun.c @@ -126,13 +126,14 @@ usage(int code) { fprintf(stderr, - "Usage: %s [-ehBHP][-g ][-z ][-s ][-p pincpu]" - "[-n ][-m lowmem][-M highmem] \n" + "Usage: %s [-ehBHIP][-g ][-z ][-s ]" + "[-S ][-p pincpu][-n ][-m lowmem][-M highmem] \n" " -g: gdb port (default is %d and 0 means don't open)\n" " -c: # cpus (default 1)\n" " -p: pin vcpu 'n' to host cpu 'pincpu + n'\n" " -B: inject breakpoint exception on vm entry\n" " -H: vmexit from the guest on hlt\n" + " -I: present an ioapic to the guest\n" " -P: vmexit from the guest on pause\n" " -e: exit on unhandled i/o access\n" " -h: help\n" @@ -522,7 +523,7 @@ vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip) int main(int argc, char *argv[]) { - int c, error, gdb_port, inject_bkpt, tmp, err; + int c, error, gdb_port, inject_bkpt, tmp, err, ioapic; struct vmctx *ctx; uint64_t rip; @@ -530,8 +531,9 @@ main(int argc, char *argv[]) progname = basename(argv[0]); gdb_port = DEFAULT_GDB_PORT; guest_ncpus = 1; + ioapic = 0; - while ((c = getopt(argc, argv, "ehBHPxp:g:c:z:s:S:n:m:M:")) != -1) { + while ((c = getopt(argc, argv, "ehBHIPxp:g:c:z:s:S:n:m:M:")) != -1) { switch (c) { case 'B': inject_bkpt = 1; @@ -572,6 +574,9 @@ main(int argc, char *argv[]) case 'H': guest_vmexit_on_hlt = 1; break; + case 'I': + ioapic = 1; + break; case 'P': guest_vmexit_on_pause = 1; break; @@ -661,7 +666,7 @@ main(int argc, char *argv[]) /* * build the guest tables, MP etc. */ - vm_build_tables(ctx, guest_ncpus, oem_tbl_start, oem_tbl_size); + vm_build_tables(ctx, guest_ncpus, ioapic, oem_tbl_start, oem_tbl_size); /* * Add CPU 0 -- cgit v1.1 From 9f954dc59906b0a4f03d285e1c4b6a0fc4e36c40 Mon Sep 17 00:00:00 2001 From: neel Date: Sat, 4 Aug 2012 23:51:21 +0000 Subject: The displacement field in the decoded instruction should be treated as a 8-bit or 32-bit signed integer. Simplify the handling of indirect addressing with displacement by unconditionally adding the 'instruction->disp' to the target address. This is alright since 'instruction->disp' is non-zero only for the addressing modes that specify a displacement. Obtained from: NetApp --- usr.sbin/bhyve/instruction_emul.c | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/instruction_emul.c b/usr.sbin/bhyve/instruction_emul.c index 8c99194..790c5ff 100644 --- a/usr.sbin/bhyve/instruction_emul.c +++ b/usr.sbin/bhyve/instruction_emul.c @@ -135,7 +135,7 @@ struct decoded_instruction uint8_t *opcode; uint8_t *modrm; uint8_t *sib; - uint8_t *displacement; + uint8_t *displacement; uint8_t *immediate; uint8_t opcode_flags; @@ -337,9 +337,9 @@ decode_extension_operands(struct decoded_instruction *decoded) if (decoded->displacement) { if (decoded->addressing_mode == MOD_INDIRECT_DISP8) { - decoded->disp = (int32_t)*decoded->displacement; + decoded->disp = *((int8_t *)decoded->displacement); } else if (decoded->addressing_mode == MOD_INDIRECT_DISP32) { - decoded->disp = *((int32_t*)decoded->displacement); + decoded->disp = *((int32_t *)decoded->displacement); } } @@ -432,14 +432,6 @@ get_operand(struct vmctx *vm, int vcpu, uint64_t guest_cr3, *operand = reg; return (0); case MOD_INDIRECT: - target = gla2gpa(reg, guest_cr3); - emulated_memory = find_region(target); - if (emulated_memory) { - return emulated_memory->memread(vm, vcpu, target, - 4, operand, - emulated_memory->arg); - } - return (-1); case MOD_INDIRECT_DISP8: case MOD_INDIRECT_DISP32: target = gla2gpa(reg, guest_cr3); @@ -450,7 +442,7 @@ get_operand(struct vmctx *vm, int vcpu, uint64_t guest_cr3, 4, operand, emulated_memory->arg); } - return (-1); + return (-1); default: return (-1); } @@ -473,19 +465,22 @@ perform_write(struct vmctx *vm, int vcpu, uint64_t guest_cr3, } else if (instruction->opcode_flags & TO_REG) { reg = instruction->reg; addressing_mode = MOD_DIRECT; - } else + } else return (-1); regname = get_vm_reg_name(reg); error = vm_get_register(vm, vcpu, regname, ®); - if (error) + if (error) return (error); switch(addressing_mode) { case MOD_DIRECT: return vm_set_register(vm, vcpu, regname, operand); case MOD_INDIRECT: + case MOD_INDIRECT_DISP8: + case MOD_INDIRECT_DISP32: target = gla2gpa(reg, guest_cr3); + target += instruction->disp; emulated_memory = find_region(target); if (emulated_memory) { return emulated_memory->memwrite(vm, vcpu, target, @@ -506,7 +501,7 @@ emulate_decoded_instruction(struct vmctx *vm, int vcpu, uint64_t cr3, int error; error = get_operand(vm, vcpu, cr3, instruction, &operand); - if (error) + if (error) return (error); return perform_write(vm, vcpu, cr3, instruction, operand); @@ -519,17 +514,17 @@ emulate_instruction(struct vmctx *vm, int vcpu, uint64_t rip, uint64_t cr3) int error; void *instruction = gla2hla(rip, cr3); - if ((error = decode_instruction(instruction, &instr)) != 0) + if ((error = decode_instruction(instruction, &instr)) != 0) return (error); - + return emulate_decoded_instruction(vm, vcpu, cr3, &instr); } struct memory_region * -register_emulated_memory(uintptr_t start, size_t len, emulated_read_func_t memread, +register_emulated_memory(uintptr_t start, size_t len, emulated_read_func_t memread, emulated_write_func_t memwrite, void *arg) { - if (registered_regions > MAX_EMULATED_REGIONS) + if (registered_regions >= MAX_EMULATED_REGIONS) return (NULL); struct memory_region *region = &emulated_regions[registered_regions]; @@ -552,4 +547,3 @@ move_memory_region(struct memory_region *region, uintptr_t start) region->start = start; region->end = start + len; } - -- cgit v1.1 From 5bfba16f732de1d8697ff76a41623687b4d8f156 Mon Sep 17 00:00:00 2001 From: neel Date: Sun, 5 Aug 2012 00:00:52 +0000 Subject: Device model for ioapic emulation. With this change the uart emulation is entirely interrupt driven. Obtained from: NetApp --- usr.sbin/bhyve/Makefile | 2 +- usr.sbin/bhyve/fbsdrun.c | 3 + usr.sbin/bhyve/ioapic.c | 302 ++++++++++++++++++++++++++++++++++++++++++++++ usr.sbin/bhyve/ioapic.h | 38 ++++++ usr.sbin/bhyve/pci_emul.c | 5 +- 5 files changed, 347 insertions(+), 3 deletions(-) create mode 100644 usr.sbin/bhyve/ioapic.c create mode 100644 usr.sbin/bhyve/ioapic.h (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile index b381b8c..7f4806a 100644 --- a/usr.sbin/bhyve/Makefile +++ b/usr.sbin/bhyve/Makefile @@ -5,7 +5,7 @@ PROG= bhyve SRCS= atpic.c consport.c dbgport.c elcr.c fbsdrun.c inout.c -SRCS+= instruction_emul.c mevent.c +SRCS+= instruction_emul.c ioapic.c mevent.c SRCS+= pci_emul.c pci_hostbridge.c pci_passthru.c pci_virtio_block.c SRCS+= pci_virtio_net.c pci_uart.c pit_8254.c post.c rtc.c uart.c xmsr.c diff --git a/usr.sbin/bhyve/fbsdrun.c b/usr.sbin/bhyve/fbsdrun.c index cb34477..b988b1f 100644 --- a/usr.sbin/bhyve/fbsdrun.c +++ b/usr.sbin/bhyve/fbsdrun.c @@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$"); #include "pci_emul.h" #include "xmsr.h" #include "instruction_emul.h" +#include "ioapic.h" #define DEFAULT_GUEST_HZ 100 #define DEFAULT_GUEST_TSLICE 200 @@ -651,6 +652,8 @@ main(int argc, char *argv[]) init_inout(); init_pci(ctx); + if (ioapic) + ioapic_init(0); if (gdb_port != 0) init_dbgport(gdb_port); diff --git a/usr.sbin/bhyve/ioapic.c b/usr.sbin/bhyve/ioapic.c new file mode 100644 index 0000000..dc74cfa --- /dev/null +++ b/usr.sbin/bhyve/ioapic.c @@ -0,0 +1,302 @@ +/*- + * Copyright (c) 2012 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +#include +#include + +#include +#include +#include + +#include + +#include "inout.h" +#include "instruction_emul.h" +#include "fbsdrun.h" + +#include + +#define IOAPIC_PADDR 0xFEC00000 + +#define IOREGSEL 0x00 +#define IOWIN 0x10 + +#define REDIR_ENTRIES 16 +#define INTR_ASSERTED(ioapic, pin) ((ioapic)->pinstate[(pin)] == true) + +struct ioapic { + int inited; + uint32_t id; + uint64_t redtbl[REDIR_ENTRIES]; + bool pinstate[REDIR_ENTRIES]; + + uintptr_t paddr; /* gpa where the ioapic is mapped */ + uint32_t ioregsel; + struct memory_region *region; +}; + +static struct ioapic ioapics[1]; /* only a single ioapic for now */ + +static int ioapic_region_read(struct vmctx *vm, int vcpu, uintptr_t paddr, + int size, uint64_t *data, void *arg); +static int ioapic_region_write(struct vmctx *vm, int vcpu, uintptr_t paddr, + int size, uint64_t data, void *arg); + +static void +ioapic_set_pinstate(struct vmctx *ctx, int pin, bool newstate) +{ + int vector, apicid, vcpu; + uint32_t low, high; + struct ioapic *ioapic; + + ioapic = &ioapics[0]; /* assume a single ioapic */ + + if (pin < 0 || pin >= REDIR_ENTRIES) + return; + + /* Nothing to do if interrupt pin has not changed state */ + if (ioapic->pinstate[pin] == newstate) + return; + + ioapic->pinstate[pin] = newstate; /* record it */ + + /* Nothing to do if interrupt pin is deasserted */ + if (!INTR_ASSERTED(ioapic, pin)) + return; + + /* + * XXX + * We only deal with: + * - edge triggered interrupts + * - physical destination mode + * - fixed delivery mode + */ + low = ioapic->redtbl[pin]; + high = ioapic->redtbl[pin] >> 32; + if ((low & IOART_INTMASK) == IOART_INTMCLR && + (low & IOART_TRGRMOD) == IOART_TRGREDG && + (low & IOART_DESTMOD) == IOART_DESTPHY && + (low & IOART_DELMOD) == IOART_DELFIXED) { + vector = low & IOART_INTVEC; + apicid = high >> APIC_ID_SHIFT; + if (apicid != 0xff) { + /* unicast */ + vcpu = vm_apicid2vcpu(ctx, apicid); + vm_lapic_irq(ctx, vcpu, vector); + } else { + /* broadcast */ + vcpu = 0; + while (vcpu < guest_ncpus) { + vm_lapic_irq(ctx, vcpu, vector); + vcpu++; + } + } + } +} + +void +ioapic_deassert_pin(struct vmctx *ctx, int pin) +{ + ioapic_set_pinstate(ctx, pin, false); +} + +void +ioapic_assert_pin(struct vmctx *ctx, int pin) +{ + ioapic_set_pinstate(ctx, pin, true); +} + +void +ioapic_init(int which) +{ + int i; + struct ioapic *ioapic; + + assert(which == 0); + + ioapic = &ioapics[which]; + assert(ioapic->inited == 0); + + bzero(ioapic, sizeof(struct ioapic)); + + /* Initialize all redirection entries to mask all interrupts */ + for (i = 0; i < REDIR_ENTRIES; i++) + ioapic->redtbl[i] = 0x0001000000010000UL; + + /* Register emulated memory region */ + ioapic->paddr = IOAPIC_PADDR; + ioapic->region = register_emulated_memory(ioapic->paddr, + sizeof(struct IOAPIC), + ioapic_region_read, + ioapic_region_write, + (void *)(uintptr_t)which); + assert(ioapic->region != NULL); + + ioapic->inited = 1; +} + +static uint32_t +ioapic_read(struct ioapic *ioapic, uint32_t addr) +{ + int regnum, pin, rshift; + + assert(ioapic->inited); + + regnum = addr & 0xff; + switch (regnum) { + case IOAPIC_ID: + return (ioapic->id); + break; + case IOAPIC_VER: + return ((REDIR_ENTRIES << MAXREDIRSHIFT) | 0x11); + break; + case IOAPIC_ARB: + return (ioapic->id); + break; + default: + break; + } + + /* redirection table entries */ + if (regnum >= IOAPIC_REDTBL && + regnum < IOAPIC_REDTBL + REDIR_ENTRIES * 2) { + pin = (regnum - IOAPIC_REDTBL) / 2; + if ((regnum - IOAPIC_REDTBL) % 2) + rshift = 32; + else + rshift = 0; + + return (ioapic->redtbl[pin] >> rshift); + } + + return (0); +} + +static void +ioapic_write(struct ioapic *ioapic, uint32_t addr, uint32_t data) +{ + int regnum, pin, lshift; + + assert(ioapic->inited); + + regnum = addr & 0xff; + switch (regnum) { + case IOAPIC_ID: + ioapic->id = data & APIC_ID_MASK; + break; + case IOAPIC_VER: + case IOAPIC_ARB: + /* readonly */ + break; + default: + break; + } + + /* redirection table entries */ + if (regnum >= IOAPIC_REDTBL && + regnum < IOAPIC_REDTBL + REDIR_ENTRIES * 2) { + pin = (regnum - IOAPIC_REDTBL) / 2; + if ((regnum - IOAPIC_REDTBL) % 2) + lshift = 32; + else + lshift = 0; + + ioapic->redtbl[pin] &= ~((uint64_t)0xffffffff << lshift); + ioapic->redtbl[pin] |= ((uint64_t)data << lshift); + } +} + +static int +ioapic_region_read(struct vmctx *vm, int vcpu, uintptr_t paddr, int size, + uint64_t *data, void *arg) +{ + int which, offset; + struct ioapic *ioapic; + + which = (uintptr_t)arg; + + ioapic = &ioapics[which]; + offset = paddr - ioapic->paddr; + + /* + * The IOAPIC specification allows 32-bit wide accesses to the + * IOREGSEL (offset 0) and IOWIN (offset 16) registers. + */ + if (size != 4 || (offset != IOREGSEL && offset != IOWIN)) { +#if 1 + printf("invalid access to ioapic%d: size %d, offset %d\n", + which, size, offset); +#endif + *data = 0; + return (0); + } + + if (offset == IOREGSEL) + *data = ioapic->ioregsel; + else + *data = ioapic_read(ioapic, ioapic->ioregsel); + + return (0); +} + +static int +ioapic_region_write(struct vmctx *vm, int vcpu, uintptr_t paddr, int size, + uint64_t data, void *arg) +{ + int which, offset; + struct ioapic *ioapic; + + which = (uintptr_t)arg; + + ioapic = &ioapics[which]; + offset = paddr - ioapic->paddr; + + /* + * The ioapic specification allows 32-bit wide accesses to the + * IOREGSEL (offset 0) and IOWIN (offset 16) registers. + */ + if (size != 4 || (offset != IOREGSEL && offset != IOWIN)) { +#if 1 + printf("invalid access to ioapic%d: size %d, offset %d\n", + which, size, offset); +#endif + return (0); + } + + if (offset == IOREGSEL) + ioapic->ioregsel = data; + else + ioapic_write(ioapic, ioapic->ioregsel, data); + + return (0); +} diff --git a/usr.sbin/bhyve/ioapic.h b/usr.sbin/bhyve/ioapic.h new file mode 100644 index 0000000..4696f9a --- /dev/null +++ b/usr.sbin/bhyve/ioapic.h @@ -0,0 +1,38 @@ +/*- + * Copyright (c) 2012 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _IOAPIC_H_ +#define _IOAPIC_H_ + +struct vmctx; + +void ioapic_init(int num); +void ioapic_deassert_pin(struct vmctx *ctx, int pin); +void ioapic_assert_pin(struct vmctx *ctx, int pin); + +#endif diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c index faaea73..5f8436c 100644 --- a/usr.sbin/bhyve/pci_emul.c +++ b/usr.sbin/bhyve/pci_emul.c @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$"); #include "inout.h" #include "pci_emul.h" #include "instruction_emul.h" +#include "ioapic.h" #define CONF1_ADDR_PORT 0x0cf8 #define CONF1_DATA_PORT 0x0cfc @@ -835,7 +836,7 @@ pci_lintr_assert(struct pci_devinst *pi) { assert(pi->pi_lintr_pin); - /* ioapic_assert_pin(pi->pi_vmctx, pi->pi_lintr_pin); */ + ioapic_assert_pin(pi->pi_vmctx, pi->pi_lintr_pin); } void @@ -843,7 +844,7 @@ pci_lintr_deassert(struct pci_devinst *pi) { assert(pi->pi_lintr_pin); - /* ioapic_deassert_pin(pi->pi_vmctx, pi->pi_lintr_pin); */ + ioapic_deassert_pin(pi->pi_vmctx, pi->pi_lintr_pin); } -- cgit v1.1 From d4dec7419041f5934130a79960faf398446e95dd Mon Sep 17 00:00:00 2001 From: neel Date: Mon, 6 Aug 2012 06:51:27 +0000 Subject: Add support for emulating PCI multi-function devices. These function number is specified by an optional [:] after the slot number: -s 1:0,virtio-net,tap0 Ditto for the mptable naming: -n 1:0,e0a Obtained from: NetApp --- usr.sbin/bhyve/pci_emul.c | 200 +++++++++++++++++++++++++++++++++------------- 1 file changed, 146 insertions(+), 54 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c index 5f8436c..92c8715 100644 --- a/usr.sbin/bhyve/pci_emul.c +++ b/usr.sbin/bhyve/pci_emul.c @@ -45,7 +45,6 @@ __FBSDID("$FreeBSD$"); #include "fbsdrun.h" #include "inout.h" #include "pci_emul.h" -#include "instruction_emul.h" #include "ioapic.h" #define CONF1_ADDR_PORT 0x0cf8 @@ -62,7 +61,8 @@ do { \ } \ } while (0) -#define MAXSLOTS 32 +#define MAXSLOTS (PCI_SLOTMAX + 1) +#define MAXFUNCS (PCI_FUNCMAX + 1) static struct slotinfo { char *si_name; @@ -73,7 +73,7 @@ static struct slotinfo { char si_prefix; char si_suffix; int si_legacy; -} pci_slotinfo[MAXSLOTS]; +} pci_slotinfo[MAXSLOTS][MAXFUNCS]; /* * Used to keep track of legacy interrupt owners/requestors @@ -114,7 +114,7 @@ static struct mptable_pci_devnames { uint8_t mds_suffix[4]; uint8_t mds_prefix[4]; uint32_t mds_rsvd[3]; - } md_slotinfo[MAXSLOTS]; + } md_slotinfo[MAXSLOTS * MAXFUNCS]; } pci_devnames; SET_DECLARE(pci_devemu_set, struct pci_devemu); @@ -142,15 +142,16 @@ static int devname_elems; /* * Slot options are in the form: * - * ,[,] + * [:],[,] * * slot is 0..31 + * func is 0..7 * emul is a string describing the type of PCI device e.g. virtio-net * config is an optional string, depending on the device, that can be * used for configuration. * Examples are: * 1,virtio-net,tap0 - * 3,dummy + * 3:0,dummy */ static void pci_parse_slot_usage(char *aopt) @@ -162,14 +163,22 @@ pci_parse_slot_usage(char *aopt) void pci_parse_slot(char *opt, int legacy) { - char *slot, *emul, *config; + char *slot, *func, *emul, *config; char *str, *cpy; - int snum; + int snum, fnum; str = cpy = strdup(opt); + config = NULL; - slot = strsep(&str, ","); + if (strchr(str, ':') != NULL) { + slot = strsep(&str, ":"); + func = strsep(&str, ","); + } else { + slot = strsep(&str, ","); + func = NULL; + } + emul = strsep(&str, ","); if (str != NULL) { config = strsep(&str, ","); @@ -180,34 +189,33 @@ pci_parse_slot(char *opt, int legacy) return; } - snum = 255; snum = atoi(slot); - if (snum < 0 || snum >= MAXSLOTS) { + fnum = func ? atoi(func) : 0; + if (snum < 0 || snum >= MAXSLOTS || fnum < 0 || fnum >= MAXFUNCS) { pci_parse_slot_usage(cpy); } else { - pci_slotinfo[snum].si_name = emul; - pci_slotinfo[snum].si_param = config; - pci_slotinfo[snum].si_legacy = legacy; + pci_slotinfo[snum][fnum].si_name = emul; + pci_slotinfo[snum][fnum].si_param = config; + pci_slotinfo[snum][fnum].si_legacy = legacy; } } - /* * * PCI MPTable names are of the form: * - * ,[prefix] + * [:],[prefix] * * .. with an alphabetic char, a 1 or 2-digit string, * and a single char. * * Examples: * 1,e0c - * 4,e0P + * 4:0,e0P + * 4:1,e0M * 6,43a * 7,0f * 10,1 - * 12,e0M * 2,12a * * Note that this is NetApp-specific, but is ignored on other o/s's. @@ -223,15 +231,23 @@ pci_parse_name(char *opt) { char csnum[4]; char *namestr; - char *slotend; + char *slotend, *funcend, *funcstart; char prefix, suffix; int i; int pslot; - int snum; + int snum, fnum; pslot = -1; prefix = suffix = 0; - slotend = strchr(opt, ','); + + slotend = strchr(opt, ':'); + if (slotend != NULL) { + funcstart = slotend + 1; + funcend = strchr(funcstart, ','); + } else { + slotend = strchr(opt, ','); + funcstart = funcend = NULL; + } /* * A comma must be present, and can't be the first character @@ -248,14 +264,31 @@ pci_parse_name(char *opt) } csnum[i] = '\0'; - snum = 255; snum = atoi(csnum); if (snum < 0 || snum >= MAXSLOTS) { pci_parse_name_usage(opt); return; } - namestr = slotend + 1; + /* + * Parse the function number (if provided) + * + * A comma must be present and can't be the first character. + * The function cannot be greater than a single character and + * must be between '0' and '7' inclusive. + */ + if (funcstart != NULL) { + if (funcend == NULL || funcend != funcstart + 1 || + *funcstart < '0' || *funcstart > '7') { + pci_parse_name_usage(opt); + return; + } + fnum = *funcstart - '0'; + } else { + fnum = 0; + } + + namestr = funcend ? funcend + 1 : slotend + 1; if (strlen(namestr) > 3) { pci_parse_name_usage(opt); @@ -276,11 +309,10 @@ pci_parse_name(char *opt) } if (isalpha(*namestr) && *(namestr + 1) == 0) { suffix = *namestr; - pci_slotinfo[snum].si_titled = 1; - pci_slotinfo[snum].si_pslot = pslot; - pci_slotinfo[snum].si_prefix = prefix; - pci_slotinfo[snum].si_suffix = suffix; - + pci_slotinfo[snum][fnum].si_titled = 1; + pci_slotinfo[snum][fnum].si_pslot = pslot; + pci_slotinfo[snum][fnum].si_prefix = prefix; + pci_slotinfo[snum][fnum].si_suffix = suffix; } else { pci_parse_name_usage(opt); } @@ -391,7 +423,8 @@ pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, uint64_t hostbase, addr = mask = lobits = 0; break; case PCIBAR_IO: - if (hostbase && pci_slotinfo[pdi->pi_slot].si_legacy) { + if (hostbase && + pci_slotinfo[pdi->pi_slot][pdi->pi_func].si_legacy) { assert(hostbase < PCI_EMUL_IOBASE); baseptr = &hostbase; } else { @@ -536,7 +569,8 @@ pci_emul_finddev(char *name) } static void -pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int slot, char *params) +pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int slot, int func, + char *params) { struct pci_devinst *pdi; pdi = malloc(sizeof(struct pci_devinst)); @@ -545,7 +579,7 @@ pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int slot, char *params) pdi->pi_vmctx = ctx; pdi->pi_bus = 0; pdi->pi_slot = slot; - pdi->pi_func = 0; + pdi->pi_func = func; pdi->pi_d = pde; snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot); @@ -560,7 +594,7 @@ pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int slot, char *params) free(pdi); } else { pci_emul_devices++; - pci_slotinfo[slot].si_devi = pdi; + pci_slotinfo[slot][func].si_devi = pdi; } } @@ -730,20 +764,22 @@ init_pci(struct vmctx *ctx) { struct pci_devemu *pde; struct slotinfo *si; - int i; + int slot, func; pci_emul_iobase = PCI_EMUL_IOBASE; pci_emul_membase32 = PCI_EMUL_MEMBASE32; pci_emul_membase64 = PCI_EMUL_MEMBASE64; - si = pci_slotinfo; - - for (i = 0; i < MAXSLOTS; i++, si++) { - if (si->si_name != NULL) { - pde = pci_emul_finddev(si->si_name); - if (pde != NULL) { - pci_emul_init(ctx, pde, i, si->si_param); - pci_add_mptable_name(si); + for (slot = 0; slot < MAXSLOTS; slot++) { + for (func = 0; func < MAXFUNCS; func++) { + si = &pci_slotinfo[slot][func]; + if (si->si_name != NULL) { + pde = pci_emul_finddev(si->si_name); + if (pde != NULL) { + pci_emul_init(ctx, pde, slot, func, + si->si_param); + pci_add_mptable_name(si); + } } } } @@ -790,7 +826,7 @@ int pci_is_legacy(struct pci_devinst *pi) { - return (pci_slotinfo[pi->pi_slot].si_legacy); + return (pci_slotinfo[pi->pi_slot][pi->pi_func].si_legacy); } static int @@ -847,7 +883,52 @@ pci_lintr_deassert(struct pci_devinst *pi) ioapic_deassert_pin(pi->pi_vmctx, pi->pi_lintr_pin); } +/* + * Return 1 if the emulated device in 'slot' is a multi-function device. + * Return 0 otherwise. + */ +static int +pci_emul_is_mfdev(int slot) +{ + int f, numfuncs; + numfuncs = 0; + for (f = 0; f < MAXFUNCS; f++) { + if (pci_slotinfo[slot][f].si_devi != NULL) { + numfuncs++; + } + } + return (numfuncs > 1); +} + +/* + * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on + * whether or not is a multi-function being emulated in the pci 'slot'. + */ +static void +pci_emul_hdrtype_fixup(int slot, int off, int bytes, uint32_t *rv) +{ + int mfdev; + + if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) { + mfdev = pci_emul_is_mfdev(slot); + switch (bytes) { + case 1: + case 2: + *rv &= ~PCIM_MFDEV; + if (mfdev) { + *rv |= PCIM_MFDEV; + } + break; + case 4: + *rv &= ~(PCIM_MFDEV << 16); + if (mfdev) { + *rv |= (PCIM_MFDEV << 16); + } + break; + } + } +} static int cfgbus, cfgslot, cfgfunc, cfgoff; @@ -878,12 +959,12 @@ pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, { struct pci_devinst *pi; struct pci_devemu *pe; - int coff, idx; + int coff, idx, needcfg; uint64_t mask, bar; assert(bytes == 1 || bytes == 2 || bytes == 4); - pi = pci_slotinfo[cfgslot].si_devi; + pi = pci_slotinfo[cfgslot][cfgfunc].si_devi; coff = cfgoff + (port - CONF1_DATA_PORT); #if 0 @@ -891,7 +972,11 @@ pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, in ? "read" : "write", coff, bytes, cfgbus, cfgslot, cfgfunc); #endif - if (pi == NULL || cfgfunc != 0) { + /* + * Just return if there is no device at this cfgslot:cfgfunc or + * if the guest is doing an un-aligned access + */ + if (pi == NULL || (coff & (bytes - 1)) != 0) { if (in) *eax = 0xffffffff; return (0); @@ -904,16 +989,23 @@ pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, */ if (in) { /* Let the device emulation override the default handler */ - if (pe->pe_cfgread != NULL && - (*pe->pe_cfgread)(ctx, vcpu, pi, coff, bytes, eax) == 0) - return (0); + if (pe->pe_cfgread != NULL) { + needcfg = pe->pe_cfgread(ctx, vcpu, pi, + coff, bytes, eax); + } else { + needcfg = 1; + } - if (bytes == 1) - *eax = pci_get_cfgdata8(pi, coff); - else if (bytes == 2) - *eax = pci_get_cfgdata16(pi, coff); - else - *eax = pci_get_cfgdata32(pi, coff); + if (needcfg) { + if (bytes == 1) + *eax = pci_get_cfgdata8(pi, coff); + else if (bytes == 2) + *eax = pci_get_cfgdata16(pi, coff); + else + *eax = pci_get_cfgdata32(pi, coff); + } + + pci_emul_hdrtype_fixup(cfgslot, coff, bytes, eax); } else { /* Let the device emulation override the default handler */ if (pe->pe_cfgwrite != NULL && -- cgit v1.1 From 75106bd2988aa440d8beba6c6a92b55742c691b7 Mon Sep 17 00:00:00 2001 From: neel Date: Mon, 6 Aug 2012 07:20:25 +0000 Subject: Fix a bug in how a 64-bit bar in a pci passthru device would be presented to the guest. Prior to the fix it was possible for such a bar to appear as a 32-bit bar as long as it was allocated from the region below 4GB. This had the potential to confuse some drivers that were particular about the size of the bars. Obtained from: NetApp --- usr.sbin/bhyve/pci_emul.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c index 92c8715..96667d5 100644 --- a/usr.sbin/bhyve/pci_emul.c +++ b/usr.sbin/bhyve/pci_emul.c @@ -455,8 +455,13 @@ pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, uint64_t hostbase, lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | PCIM_BAR_MEM_PREFETCH; break; + } else { + baseptr = &pci_emul_membase32; + limit = PCI_EMUL_MEMLIMIT32; + mask = PCIM_BAR_MEM_BASE; + lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64; } - /* fallthrough */ + break; case PCIBAR_MEM32: baseptr = &pci_emul_membase32; limit = PCI_EMUL_MEMLIMIT32; -- cgit v1.1 From c34be7b811ad199e64f66db339e7f64c773ca0a7 Mon Sep 17 00:00:00 2001 From: neel Date: Tue, 25 Sep 2012 02:33:25 +0000 Subject: Add an explicit exit code 'SPINUP_AP' to tell the controlling process that an AP needs to be activated by spinning up an execution context for it. The local apic emulation is now completely done in the hypervisor and it will detect writes to the ICR_LO register that try to bring up the AP. In response to such writes it will return to userspace with an exit code of SPINUP_AP. Reviewed by: grehan --- usr.sbin/bhyve/Makefile | 1 + usr.sbin/bhyve/fbsdrun.c | 21 ++++- usr.sbin/bhyve/spinup_ap.c | 180 ++++++++++++++++++++++++++++++++++++ usr.sbin/bhyve/spinup_ap.h | 34 +++++++ usr.sbin/bhyve/xmsr.c | 223 +-------------------------------------------- 5 files changed, 240 insertions(+), 219 deletions(-) create mode 100644 usr.sbin/bhyve/spinup_ap.c create mode 100644 usr.sbin/bhyve/spinup_ap.h (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile index 7f4806a..f2e49ca 100644 --- a/usr.sbin/bhyve/Makefile +++ b/usr.sbin/bhyve/Makefile @@ -8,6 +8,7 @@ SRCS= atpic.c consport.c dbgport.c elcr.c fbsdrun.c inout.c SRCS+= instruction_emul.c ioapic.c mevent.c SRCS+= pci_emul.c pci_hostbridge.c pci_passthru.c pci_virtio_block.c SRCS+= pci_virtio_net.c pci_uart.c pit_8254.c post.c rtc.c uart.c xmsr.c +SRCS+= spinup_ap.c NO_MAN= diff --git a/usr.sbin/bhyve/fbsdrun.c b/usr.sbin/bhyve/fbsdrun.c index b988b1f..df2238a 100644 --- a/usr.sbin/bhyve/fbsdrun.c +++ b/usr.sbin/bhyve/fbsdrun.c @@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$"); #include "xmsr.h" #include "instruction_emul.h" #include "ioapic.h" +#include "spinup_ap.h" #define DEFAULT_GUEST_HZ 100 #define DEFAULT_GUEST_TSLICE 200 @@ -346,6 +347,23 @@ vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) } static int +vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) +{ + int newcpu; + int retval = VMEXIT_CONTINUE; + + newcpu = spinup_ap(ctx, *pvcpu, + vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip); + + if (guest_vcpu_mux && *pvcpu != newcpu) { + retval = VMEXIT_SWITCH; + *pvcpu = newcpu; + } + + return (retval); +} + +static int vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { @@ -471,7 +489,8 @@ static vmexit_handler_t handler[VM_EXITCODE_MAX] = { [VM_EXITCODE_RDMSR] = vmexit_rdmsr, [VM_EXITCODE_WRMSR] = vmexit_wrmsr, [VM_EXITCODE_MTRAP] = vmexit_mtrap, - [VM_EXITCODE_PAGING] = vmexit_paging + [VM_EXITCODE_PAGING] = vmexit_paging, + [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, }; static void diff --git a/usr.sbin/bhyve/spinup_ap.c b/usr.sbin/bhyve/spinup_ap.c new file mode 100644 index 0000000..e845515 --- /dev/null +++ b/usr.sbin/bhyve/spinup_ap.c @@ -0,0 +1,180 @@ +/*- + * Copyright (c) 2012 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include +#include + +#include +#include +#include + +#include "fbsdrun.h" +#include "spinup_ap.h" + +/* + * Trampoline for hypervisor direct 64-bit jump. + * + * 0 - signature for guest->host verification + * 8 - kernel virtual address of trampoline + * 16 - instruction virtual address + * 24 - stack pointer virtual address + * 32 - CR3, physical address of kernel page table + * 40 - 24-byte area for null/code/data GDT entries + */ +#define MP_V64T_SIG 0xcafebabecafebabeULL +struct mp_v64tramp { + uint64_t mt_sig; + uint64_t mt_virt; + uint64_t mt_eip; + uint64_t mt_rsp; + uint64_t mt_cr3; + uint64_t mt_gdtr[3]; +}; + +static void +spinup_ap_realmode(struct vmctx *ctx, int newcpu, uint64_t *rip) +{ + int vector, error; + uint16_t cs; + uint64_t desc_base; + uint32_t desc_limit, desc_access; + + vector = *rip >> PAGE_SHIFT; + *rip = 0; + + /* + * Update the %cs and %rip of the guest so that it starts + * executing real mode code at at 'vector << 12'. + */ + error = vm_set_register(ctx, newcpu, VM_REG_GUEST_RIP, *rip); + assert(error == 0); + + error = vm_get_desc(ctx, newcpu, VM_REG_GUEST_CS, &desc_base, + &desc_limit, &desc_access); + assert(error == 0); + + desc_base = vector << PAGE_SHIFT; + error = vm_set_desc(ctx, newcpu, VM_REG_GUEST_CS, + desc_base, desc_limit, desc_access); + assert(error == 0); + + cs = (vector << PAGE_SHIFT) >> 4; + error = vm_set_register(ctx, newcpu, VM_REG_GUEST_CS, cs); + assert(error == 0); +} + +static void +spinup_ap_direct64(struct vmctx *ctx, int newcpu, uint64_t *rip) +{ + struct mp_v64tramp *mvt; + char *errstr; + int error; + uint64_t gdtbase; + + mvt = paddr_guest2host(*rip); + + assert(mvt->mt_sig == MP_V64T_SIG); + + /* + * Set up the 3-entry GDT using memory supplied in the + * guest's trampoline structure. + */ + vm_setup_freebsd_gdt(mvt->mt_gdtr); + +#define CHECK_ERROR(msg) \ + if (error != 0) { \ + errstr = msg; \ + goto err_exit; \ + } + + /* entry point */ + *rip = mvt->mt_eip; + + /* Get the guest virtual address of the GDT */ + gdtbase = mvt->mt_virt + __offsetof(struct mp_v64tramp, mt_gdtr); + + error = vm_setup_freebsd_registers(ctx, newcpu, mvt->mt_eip, + mvt->mt_cr3, gdtbase, mvt->mt_rsp); + CHECK_ERROR("vm_setup_freebsd_registers"); + + return; +err_exit: + printf("spinup_ap_direct64: machine state error: %s", errstr); + exit(1); +} + +int +spinup_ap(struct vmctx *ctx, int vcpu, int newcpu, uint64_t rip) +{ + int error; + + assert(newcpu != 0); + assert(newcpu < guest_ncpus); + + error = vcpu_reset(ctx, newcpu); + assert(error == 0); + + /* Set up capabilities */ + if (fbsdrun_vmexit_on_hlt()) { + error = vm_set_capability(ctx, newcpu, VM_CAP_HALT_EXIT, 1); + assert(error == 0); + } + + if (fbsdrun_vmexit_on_pause()) { + error = vm_set_capability(ctx, newcpu, VM_CAP_PAUSE_EXIT, 1); + assert(error == 0); + } + + /* + * There are 2 startup modes possible here: + * - if the CPU supports 'unrestricted guest' mode, the spinup can + * set up the processor state in power-on 16-bit mode, with the CS:IP + * init'd to the specified low-mem 4K page. + * - if the guest has requested a 64-bit trampoline in the low-mem 4K + * page by placing in the specified signature, set up the register + * state using register state in the signature. Note that this + * requires accessing guest physical memory to read the signature + * while 'unrestricted mode' does not. + */ + error = vm_set_capability(ctx, newcpu, VM_CAP_UNRESTRICTED_GUEST, 1); + if (error) { + spinup_ap_direct64(ctx, newcpu, &rip); + } else { + spinup_ap_realmode(ctx, newcpu, &rip); + } + + fbsdrun_addcpu(ctx, newcpu, rip); + + return (newcpu); +} diff --git a/usr.sbin/bhyve/spinup_ap.h b/usr.sbin/bhyve/spinup_ap.h new file mode 100644 index 0000000..2749ee9 --- /dev/null +++ b/usr.sbin/bhyve/spinup_ap.h @@ -0,0 +1,34 @@ +/*- + * Copyright (c) 2012 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SPINUP_AP_H_ +#define _SPINUP_AP_H_ + +int spinup_ap(struct vmctx *ctx, int vcpu, int newcpu, uint64_t rip); + +#endif diff --git a/usr.sbin/bhyve/xmsr.c b/usr.sbin/bhyve/xmsr.c index cc148d6..9c05f02 100644 --- a/usr.sbin/bhyve/xmsr.c +++ b/usr.sbin/bhyve/xmsr.c @@ -29,233 +29,20 @@ #include __FBSDID("$FreeBSD$"); -#include -#include +#include -#include -#include -#include #include #include -#include "fbsdrun.h" -#include "xmsr.h" - -/* - * Trampoline for hypervisor direct 64-bit jump. - * - * 0 - signature for guest->host verification - * 8 - kernel virtual address of trampoline - * 16 - instruction virtual address - * 24 - stack pointer virtual address - * 32 - CR3, physical address of kernel page table - * 40 - 24-byte area for null/code/data GDT entries - */ -#define MP_V64T_SIG 0xcafebabecafebabeULL -struct mp_v64tramp { - uint64_t mt_sig; - uint64_t mt_virt; - uint64_t mt_eip; - uint64_t mt_rsp; - uint64_t mt_cr3; - uint64_t mt_gdtr[3]; -}; - -/* - * CPU 0 is considered to be the BSP and is set to the RUNNING state. - * All other CPUs are set up in the INIT state. - */ -#define BSP 0 -enum cpu_bstate { - CPU_S_INIT, - CPU_S_SIPI, - CPU_S_RUNNING -} static cpu_b[VM_MAXCPU] = { [BSP] = CPU_S_RUNNING }; +#include +#include -static void spinup_ap(struct vmctx *, int, int, uint64_t *); -static void spinup_ap_direct64(struct vmctx *, int, uintptr_t, uint64_t *); +#include "xmsr.h" int emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t code, uint64_t val) { - int dest; - int mode; - int thiscpu; - int vec; - int error, retval; - uint64_t rip; - - retval = vcpu; - thiscpu = 1 << vcpu; - - /* - * The only MSR value handled is the x2apic CR register - */ - if (code != 0x830) { - printf("Unknown WRMSR code %x, val %lx, cpu %d\n", - code, val, vcpu); - exit(1); - } - - /* - * The value written to the MSR will generate an IPI to - * a set of CPUs. If this is a SIPI, create the initial - * state for the CPU and switch to it. Otherwise, inject - * an interrupt for the destination CPU(s), and request - * a switch to the next available one by returning -1 - */ - dest = val >> 32; - vec = val & APIC_VECTOR_MASK; - mode = val & APIC_DELMODE_MASK; - - switch (mode) { - case APIC_DELMODE_INIT: - /* - * Ignore legacy de-assert INITs in x2apic mode - */ - if ((val & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) { - break; - } - - assert(dest != 0); - assert(dest < guest_ncpus); - assert(cpu_b[dest] == CPU_S_INIT); - - /* - * Move CPU to wait-for-SIPI state - */ - error = vcpu_reset(ctx, dest); - assert(error == 0); - - cpu_b[dest] = CPU_S_SIPI; - break; - - case APIC_DELMODE_STARTUP: - assert(dest != 0); - assert(dest < guest_ncpus); - /* - * Ignore SIPIs in any state other than wait-for-SIPI - */ - if (cpu_b[dest] != CPU_S_SIPI) { - break; - } - - /* - * Bring up the AP and signal the main loop that it is - * available and to switch to it. - */ - spinup_ap(ctx, dest, vec, &rip); - cpu_b[dest] = CPU_S_RUNNING; - fbsdrun_addcpu(ctx, dest, rip); - retval = dest; - break; - - default: - printf("APIC delivery mode %lx not supported!\n", - val & APIC_DELMODE_MASK); - exit(1); - } - - return (retval); -} - -/* - * There are 2 startup modes possible here: - * - if the CPU supports 'unrestricted guest' mode, the spinup can - * set up the processor state in power-on 16-bit mode, with the CS:IP - * init'd to the specified low-mem 4K page. - * - if the guest has requested a 64-bit trampoline in the low-mem 4K - * page by placing in the specified signature, set up the register - * state using register state in the signature. Note that this - * requires accessing guest physical memory to read the signature - * while 'unrestricted mode' does not. - */ -static void -spinup_ap(struct vmctx *ctx, int newcpu, int vector, uint64_t *rip) -{ - int error; - uint16_t cs; - uint64_t desc_base; - uint32_t desc_limit, desc_access; - - if (fbsdrun_vmexit_on_hlt()) { - error = vm_set_capability(ctx, newcpu, VM_CAP_HALT_EXIT, 1); - assert(error == 0); - } - - if (fbsdrun_vmexit_on_pause()) { - error = vm_set_capability(ctx, newcpu, VM_CAP_PAUSE_EXIT, 1); - assert(error == 0); - } - - error = vm_set_capability(ctx, newcpu, VM_CAP_UNRESTRICTED_GUEST, 1); - if (error) { - /* - * If the guest does not support real-mode execution then - * we will bring up the AP directly in 64-bit mode. - */ - spinup_ap_direct64(ctx, newcpu, vector << PAGE_SHIFT, rip); - } else { - /* - * Update the %cs and %rip of the guest so that it starts - * executing real mode code at at 'vector << 12'. - */ - *rip = 0; - error = vm_set_register(ctx, newcpu, VM_REG_GUEST_RIP, *rip); - assert(error == 0); - - error = vm_get_desc(ctx, newcpu, VM_REG_GUEST_CS, &desc_base, - &desc_limit, &desc_access); - assert(error == 0); - - desc_base = vector << PAGE_SHIFT; - error = vm_set_desc(ctx, newcpu, VM_REG_GUEST_CS, - desc_base, desc_limit, desc_access); - assert(error == 0); - - cs = (vector << PAGE_SHIFT) >> 4; - error = vm_set_register(ctx, newcpu, VM_REG_GUEST_CS, cs); - assert(error == 0); - } -} - -static void -spinup_ap_direct64(struct vmctx *ctx, int newcpu, uintptr_t gaddr, - uint64_t *rip) -{ - struct mp_v64tramp *mvt; - char *errstr; - int error; - uint64_t gdtbase; - - mvt = paddr_guest2host(gaddr); - - assert(mvt->mt_sig == MP_V64T_SIG); - - /* - * Set up the 3-entry GDT using memory supplied in the - * guest's trampoline structure. - */ - vm_setup_freebsd_gdt(mvt->mt_gdtr); - -#define CHECK_ERROR(msg) \ - if (error != 0) { \ - errstr = msg; \ - goto err_exit; \ - } - - /* entry point */ - *rip = mvt->mt_eip; - - /* Get the guest virtual address of the GDT */ - gdtbase = mvt->mt_virt + __offsetof(struct mp_v64tramp, mt_gdtr); - - error = vm_setup_freebsd_registers(ctx, newcpu, mvt->mt_eip, - mvt->mt_cr3, gdtbase, mvt->mt_rsp); - CHECK_ERROR("vm_setup_freebsd_registers"); - return; -err_exit: - printf("spinup_ap_direct64: machine state error: %s", errstr); + printf("Unknown WRMSR code %x, val %lx, cpu %d\n", code, val, vcpu); exit(1); } -- cgit v1.1 From ebdd69568d7fa97153aa47a86afe367476a0a1de Mon Sep 17 00:00:00 2001 From: neel Date: Tue, 25 Sep 2012 19:08:51 +0000 Subject: Add ioctls to control the X2APIC capability exposed by the virtual machine to the guest. At the moment this simply sets the state in the 'vcpu' instance but there is no code that acts upon these settings. --- usr.sbin/vmmctl/vmmctl.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'usr.sbin') diff --git a/usr.sbin/vmmctl/vmmctl.c b/usr.sbin/vmmctl/vmmctl.c index c0e0402..b205c7b 100644 --- a/usr.sbin/vmmctl/vmmctl.c +++ b/usr.sbin/vmmctl/vmmctl.c @@ -185,6 +185,8 @@ usage(void) " [--get-vmcs-interruptibility]\n" " [--set-pinning=]\n" " [--get-pinning]\n" + " [--set-x2apic-state=]\n" + " [--get-x2apic-state]\n" " [--set-lowmem=]\n" " [--get-lowmem]\n" " [--set-highmem=]\n" @@ -217,6 +219,8 @@ static int set_desc_ldtr, get_desc_ldtr; static int set_cs, set_ds, set_es, set_fs, set_gs, set_ss, set_tr, set_ldtr; static int get_cs, get_ds, get_es, get_fs, get_gs, get_ss, get_tr, get_ldtr; static int set_pinning, get_pinning, pincpu; +static int set_x2apic_state, get_x2apic_state; +enum x2apic_state x2apic_state; static int run; /* @@ -371,6 +375,7 @@ enum { SET_TR, SET_LDTR, SET_PINNING, + SET_X2APIC_STATE, SET_VMCS_EXCEPTION_BITMAP, SET_VMCS_ENTRY_INTERRUPTION_INFO, SET_CAP, @@ -419,6 +424,7 @@ main(int argc, char *argv[]) { "set-tr", REQ_ARG, 0, SET_TR }, { "set-ldtr", REQ_ARG, 0, SET_LDTR }, { "set-pinning",REQ_ARG, 0, SET_PINNING }, + { "set-x2apic-state",REQ_ARG, 0, SET_X2APIC_STATE }, { "set-vmcs-exception-bitmap", REQ_ARG, 0, SET_VMCS_EXCEPTION_BITMAP }, { "set-vmcs-entry-interruption-info", @@ -547,6 +553,7 @@ main(int argc, char *argv[]) { "get-vmcs-interruptibility", NO_ARG, &get_vmcs_interruptibility, 1 }, { "get-pinning",NO_ARG, &get_pinning, 1 }, + { "get-x2apic-state",NO_ARG, &get_x2apic_state, 1 }, { "get-all", NO_ARG, &get_all, 1 }, { "run", NO_ARG, &run, 1 }, { "create", NO_ARG, &create, 1 }, @@ -656,6 +663,10 @@ main(int argc, char *argv[]) pincpu = strtol(optarg, NULL, 0); set_pinning = 1; break; + case SET_X2APIC_STATE: + x2apic_state = strtol(optarg, NULL, 0); + set_x2apic_state = 1; + break; case SET_VMCS_EXCEPTION_BITMAP: exception_bitmap = strtoul(optarg, NULL, 0); set_exception_bitmap = 1; @@ -804,6 +815,9 @@ main(int argc, char *argv[]) if (!error && set_pinning) error = vm_set_pinning(ctx, vcpu, pincpu); + if (!error && set_x2apic_state) + error = vm_set_x2apic_state(ctx, vcpu, x2apic_state); + if (!error && set_exception_bitmap) { error = vm_set_vmcs_field(ctx, vcpu, VMCS_EXCEPTION_BITMAP, exception_bitmap); @@ -1129,6 +1143,12 @@ main(int argc, char *argv[]) } } + if (!error && (get_x2apic_state || get_all)) { + error = vm_get_x2apic_state(ctx, vcpu, &x2apic_state); + if (error == 0) + printf("x2apic_state[%d]\t%d\n", vcpu, x2apic_state); + } + if (!error && (get_pinbased_ctls || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_PIN_BASED_CTLS, &ctl); if (error == 0) -- cgit v1.1 From 5dbc1ca26acaa3175dae7b9d0c45151fba0275ab Mon Sep 17 00:00:00 2001 From: neel Date: Wed, 26 Sep 2012 00:06:17 +0000 Subject: Add an option "-a" to present the local apic in the XAPIC mode instead of the default X2APIC mode to the guest. --- usr.sbin/bhyve/fbsdrun.c | 27 ++++++++++++++++++++++++--- usr.sbin/bhyve/fbsdrun.h | 1 + usr.sbin/bhyve/spinup_ap.c | 6 ++++++ 3 files changed, 31 insertions(+), 3 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/fbsdrun.c b/usr.sbin/bhyve/fbsdrun.c index df2238a..b8bebba 100644 --- a/usr.sbin/bhyve/fbsdrun.c +++ b/usr.sbin/bhyve/fbsdrun.c @@ -84,7 +84,7 @@ int guest_ncpus; static int pincpu = -1; static int guest_vcpu_mux; -static int guest_vmexit_on_hlt, guest_vmexit_on_pause; +static int guest_vmexit_on_hlt, guest_vmexit_on_pause, disable_x2apic; static int foundcpus; @@ -128,8 +128,9 @@ usage(int code) { fprintf(stderr, - "Usage: %s [-ehBHIP][-g ][-z ][-s ]" + "Usage: %s [-aehBHIP][-g ][-z ][-s ]" "[-S ][-p pincpu][-n ][-m lowmem][-M highmem] \n" + " -a: local apic is in XAPIC mode (default is X2APIC)\n" " -g: gdb port (default is %d and 0 means don't open)\n" " -c: # cpus (default 1)\n" " -p: pin vcpu 'n' to host cpu 'pincpu + n'\n" @@ -174,6 +175,13 @@ fbsdrun_add_oemtbl(void *tbl, int tblsz) } int +fbsdrun_disable_x2apic(void) +{ + + return (disable_x2apic); +} + +int fbsdrun_vmexit_on_pause(void) { @@ -553,8 +561,11 @@ main(int argc, char *argv[]) guest_ncpus = 1; ioapic = 0; - while ((c = getopt(argc, argv, "ehBHIPxp:g:c:z:s:S:n:m:M:")) != -1) { + while ((c = getopt(argc, argv, "aehBHIPxp:g:c:z:s:S:n:m:M:")) != -1) { switch (c) { + case 'a': + disable_x2apic = 1; + break; case 'B': inject_bkpt = 1; break; @@ -656,6 +667,16 @@ main(int argc, char *argv[]) handler[VM_EXITCODE_PAUSE] = vmexit_pause; } + if (fbsdrun_disable_x2apic()) + err = vm_set_x2apic_state(ctx, BSP, X2APIC_DISABLED); + else + err = vm_set_x2apic_state(ctx, BSP, X2APIC_ENABLED); + + if (err) { + printf("Unable to set x2apic state (%d)\n", err); + exit(1); + } + if (lomem_sz != 0) { lomem_addr = vm_map_memory(ctx, 0, lomem_sz); if (lomem_addr == (char *) MAP_FAILED) { diff --git a/usr.sbin/bhyve/fbsdrun.h b/usr.sbin/bhyve/fbsdrun.h index 8106122..0b9fe7d 100644 --- a/usr.sbin/bhyve/fbsdrun.h +++ b/usr.sbin/bhyve/fbsdrun.h @@ -50,4 +50,5 @@ void fbsdrun_add_oemtbl(void *tbl, int tblsz); int fbsdrun_muxed(void); int fbsdrun_vmexit_on_hlt(void); int fbsdrun_vmexit_on_pause(void); +int fbsdrun_disable_x2apic(void); #endif diff --git a/usr.sbin/bhyve/spinup_ap.c b/usr.sbin/bhyve/spinup_ap.c index e845515..3aa3b65 100644 --- a/usr.sbin/bhyve/spinup_ap.c +++ b/usr.sbin/bhyve/spinup_ap.c @@ -156,6 +156,12 @@ spinup_ap(struct vmctx *ctx, int vcpu, int newcpu, uint64_t rip) assert(error == 0); } + if (fbsdrun_disable_x2apic()) + error = vm_set_x2apic_state(ctx, newcpu, X2APIC_DISABLED); + else + error = vm_set_x2apic_state(ctx, newcpu, X2APIC_ENABLED); + assert(error == 0); + /* * There are 2 startup modes possible here: * - if the CPU supports 'unrestricted guest' mode, the spinup can -- cgit v1.1 From 09939583a708e1199d76573009937c49606d19e5 Mon Sep 17 00:00:00 2001 From: neel Date: Thu, 4 Oct 2012 03:07:05 +0000 Subject: The ioctl VM_GET_MEMORY_SEG is no longer able to return the host physical address associated with the guest memory segment. This is because there is no longer a 1:1 mapping between GPA and HPA. As a result 'vmmctl' can only display the guest physical address and the length of the lowmem and highmem segments. --- usr.sbin/vmmctl/vmmctl.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/vmmctl/vmmctl.c b/usr.sbin/vmmctl/vmmctl.c index b205c7b..acd7338 100644 --- a/usr.sbin/vmmctl/vmmctl.c +++ b/usr.sbin/vmmctl/vmmctl.c @@ -387,7 +387,7 @@ main(int argc, char *argv[]) { char *vmname; int error, ch, vcpu; - vm_paddr_t hpa; + vm_paddr_t gpa; size_t len; struct vm_exit vmexit; uint64_t ctl, eptp, bm, addr, u64; @@ -829,15 +829,17 @@ main(int argc, char *argv[]) } if (!error && (get_lowmem || get_all)) { - error = vm_get_memory_seg(ctx, 0, &hpa, &len); + gpa = 0; + error = vm_get_memory_seg(ctx, gpa, &len); if (error == 0) - printf("lowmem\t\t0x%016lx/%ld\n", hpa, len); + printf("lowmem\t\t0x%016lx/%ld\n", gpa, len); } if (!error && (get_highmem || get_all)) { - error = vm_get_memory_seg(ctx, 4 * GB, &hpa, &len); + gpa = 4 * GB; + error = vm_get_memory_seg(ctx, gpa, &len); if (error == 0) - printf("highmem\t\t0x%016lx/%ld\n", hpa, len); + printf("highmem\t\t0x%016lx/%ld\n", gpa, len); } if (!error && (get_efer || get_all)) { -- cgit v1.1 From 4829fce72f3af3b96bc018ac20f556ee33b7f44f Mon Sep 17 00:00:00 2001 From: neel Date: Fri, 12 Oct 2012 18:14:54 +0000 Subject: Output the value of all capabilities when the "--getcap" option is used without a "--capname=". Do the same for the "--get-all" option. --- usr.sbin/vmmctl/vmmctl.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/vmmctl/vmmctl.c b/usr.sbin/vmmctl/vmmctl.c index acd7338..331bd728 100644 --- a/usr.sbin/vmmctl/vmmctl.c +++ b/usr.sbin/vmmctl/vmmctl.c @@ -1479,14 +1479,27 @@ main(int argc, char *argv[]) } if (!error && (getcap || get_all)) { - int captype, val; - captype = vm_capability_name2type(capname); - error = vm_get_capability(ctx, vcpu, captype, &val); - if (error == 0) { - printf("Capability \"%s\" is %s on vcpu %d\n", capname, - val ? "set" : "not set", vcpu); - } else if (errno == ENOENT) { - printf("Capability \"%s\" is not available\n", capname); + int captype, val, getcaptype; + + if (getcap && capname) + getcaptype = vm_capability_name2type(capname); + else + getcaptype = -1; + + for (captype = 0; captype < VM_CAP_MAX; captype++) { + if (getcaptype >= 0 && captype != getcaptype) + continue; + error = vm_get_capability(ctx, vcpu, captype, &val); + if (error == 0) { + printf("Capability \"%s\" is %s on vcpu %d\n", + vm_capability_type2name(captype), + val ? "set" : "not set", vcpu); + } else if (errno == ENOENT) { + printf("Capability \"%s\" is not available\n", + vm_capability_type2name(captype)); + } else { + break; + } } } -- cgit v1.1 From 4650e5d7766fa77e268a3c84cc8bd08857bb0287 Mon Sep 17 00:00:00 2001 From: neel Date: Fri, 12 Oct 2012 18:49:07 +0000 Subject: Deal with transient EBUSY error return from vm_run() by retrying the operation. --- usr.sbin/bhyve/fbsdrun.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/fbsdrun.c b/usr.sbin/bhyve/fbsdrun.c index b8bebba..1d59425 100644 --- a/usr.sbin/bhyve/fbsdrun.c +++ b/usr.sbin/bhyve/fbsdrun.c @@ -516,8 +516,19 @@ vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip) while (1) { error = vm_run(ctx, vcpu, rip, &vmexit[vcpu]); - if (error != 0) - break; + if (error != 0) { + /* + * It is possible that 'vmmctl' or some other process + * has transitioned the vcpu to CANNOT_RUN state right + * before we tried to transition it to RUNNING. + * + * This is expected to be temporary so just retry. + */ + if (errno == EBUSY) + continue; + else + break; + } prevcpu = vcpu; rc = (*handler[vmexit[vcpu].exitcode])(ctx, &vmexit[vcpu], -- cgit v1.1 From beaad57fa07508b2383454e13f0a26824ada6328 Mon Sep 17 00:00:00 2001 From: grehan Date: Fri, 19 Oct 2012 18:11:17 +0000 Subject: Rework how guest MMIO regions are dealt with. - New memory region interface. An RB tree holds the regions, with a last-found per-vCPU cache to deal with the common case of repeated guest accesses to MMIO registers in the same page. - Support memory-mapped BARs in PCI emulation. mem.c/h - memory region interface instruction_emul.c/h - remove old region interface. Use gpa from EPT exit to avoid a tablewalk to determine operand address. Determine operand size and use when calling through to region handler. fbsdrun.c - call into region interface on paging exit. Distinguish between instruction emul error and region not found pci_emul.c/h - implement new BAR callback api. Split BAR alloc routine into routines that require/don't require the BAR phys address. ioapic.c pci_passthru.c pci_virtio_block.c pci_virtio_net.c pci_uart.c - update to new BAR callback i/f Reviewed by: neel Obtained from: NetApp --- usr.sbin/bhyve/Makefile | 2 +- usr.sbin/bhyve/fbsdrun.c | 17 ++- usr.sbin/bhyve/instruction_emul.c | 290 +++++++++++++++++++++++++------------- usr.sbin/bhyve/instruction_emul.h | 15 +- usr.sbin/bhyve/ioapic.c | 79 +++++++---- usr.sbin/bhyve/mem.c | 196 ++++++++++++++++++++++++++ usr.sbin/bhyve/mem.h | 58 ++++++++ usr.sbin/bhyve/pci_emul.c | 221 ++++++++++++++++++++++------- usr.sbin/bhyve/pci_emul.h | 26 ++-- usr.sbin/bhyve/pci_passthru.c | 225 +++++++++++++---------------- usr.sbin/bhyve/pci_uart.c | 25 ++-- usr.sbin/bhyve/pci_virtio_block.c | 35 +++-- usr.sbin/bhyve/pci_virtio_net.c | 35 +++-- 13 files changed, 850 insertions(+), 374 deletions(-) create mode 100644 usr.sbin/bhyve/mem.c create mode 100644 usr.sbin/bhyve/mem.h (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile index f2e49ca..72d60ae 100644 --- a/usr.sbin/bhyve/Makefile +++ b/usr.sbin/bhyve/Makefile @@ -5,7 +5,7 @@ PROG= bhyve SRCS= atpic.c consport.c dbgport.c elcr.c fbsdrun.c inout.c -SRCS+= instruction_emul.c ioapic.c mevent.c +SRCS+= instruction_emul.c ioapic.c mem.c mevent.c SRCS+= pci_emul.c pci_hostbridge.c pci_passthru.c pci_virtio_block.c SRCS+= pci_virtio_net.c pci_uart.c pit_8254.c post.c rtc.c uart.c xmsr.c SRCS+= spinup_ap.c diff --git a/usr.sbin/bhyve/fbsdrun.c b/usr.sbin/bhyve/fbsdrun.c index 1d59425..d7061f9 100644 --- a/usr.sbin/bhyve/fbsdrun.c +++ b/usr.sbin/bhyve/fbsdrun.c @@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$"); #include "fbsdrun.h" #include "inout.h" #include "dbgport.h" +#include "mem.h" #include "mevent.h" #include "pci_emul.h" #include "xmsr.h" @@ -446,11 +447,21 @@ vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) static int vmexit_paging(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { - + int err; stats.vmexit_paging++; - if (emulate_instruction(ctx, *pvcpu, vmexit->rip, vmexit->u.paging.cr3) != 0) { - printf("Failed to emulate instruction at 0x%lx\n", vmexit->rip); + err = emulate_mem(ctx, *pvcpu, vmexit->u.paging.gpa, vmexit->rip, + vmexit->u.paging.cr3, vmexit->u.paging.rwx); + + if (err) { + if (err == EINVAL) { + printf("Failed to emulate instruction at 0x%lx\n", + vmexit->rip); + } else if (err == ESRCH) { + printf("Unhandled memory access to 0x%lx\n", + vmexit->u.paging.gpa); + } + return (VMEXIT_ABORT); } diff --git a/usr.sbin/bhyve/instruction_emul.c b/usr.sbin/bhyve/instruction_emul.c index 790c5ff..78c3608 100644 --- a/usr.sbin/bhyve/instruction_emul.c +++ b/usr.sbin/bhyve/instruction_emul.c @@ -28,10 +28,12 @@ #include #include +#include #include #include #include "fbsdrun.h" +#include "mem.h" #include "instruction_emul.h" #define PREFIX_LOCK 0xF0 @@ -46,6 +48,7 @@ #define PREFIX_BRANCH_NOT_TAKEN 0x2E #define PREFIX_BRANCH_TAKEN 0x3E #define PREFIX_OPSIZE 0x66 +#define is_opsz_prefix(x) ((x) == PREFIX_OPSIZE) #define PREFIX_ADDRSIZE 0x67 #define OPCODE_2BYTE_ESCAPE 0x0F @@ -95,6 +98,11 @@ #define FROM_REG (1<<2) #define TO_RM (1<<3) #define TO_REG (1<<4) +#define ZEXT (1<<5) +#define FROM_8 (1<<6) +#define FROM_16 (1<<7) +#define TO_8 (1<<8) +#define TO_16 (1<<9) #define REX_MASK 0xF0 #define REX_PREFIX 0x40 @@ -118,16 +126,7 @@ #define PML4E_OFFSET_MASK 0x0000FF8000000000 #define PML4E_SHIFT 39 -#define MAX_EMULATED_REGIONS 8 -int registered_regions = 0; -struct memory_region -{ - uintptr_t start; - uintptr_t end; - emulated_read_func_t memread; - emulated_write_func_t memwrite; - void *arg; -} emulated_regions[MAX_EMULATED_REGIONS]; +#define INSTR_VERIFY struct decoded_instruction { @@ -138,11 +137,12 @@ struct decoded_instruction uint8_t *displacement; uint8_t *immediate; - uint8_t opcode_flags; + uint16_t opcode_flags; uint8_t addressing_mode; uint8_t rm; uint8_t reg; + uint8_t opsz; uint8_t rex_r; uint8_t rex_w; uint8_t rex_b; @@ -170,11 +170,17 @@ static enum vm_reg_name vm_reg_name_mappings[] = { [REG_R15] = VM_REG_GUEST_R15 }; -uint8_t one_byte_opcodes[256] = { - [0x89] = HAS_MODRM | FROM_REG | TO_RM, +uint16_t one_byte_opcodes[256] = { + [0x88] = HAS_MODRM | FROM_REG | TO_RM | TO_8 | FROM_8, + [0x89] = HAS_MODRM | FROM_REG | TO_RM, [0x8B] = HAS_MODRM | FROM_RM | TO_REG, }; +uint16_t two_byte_opcodes[256] = { + [0xB6] = HAS_MODRM | FROM_RM | TO_REG | ZEXT | FROM_8, + [0xB7] = HAS_MODRM | FROM_RM | TO_REG | ZEXT | FROM_16, +}; + static uintptr_t gla2gpa(uint64_t gla, uint64_t guest_cr3) { @@ -211,7 +217,8 @@ gla2hla(uint64_t gla, uint64_t guest_cr3) uintptr_t gpa; gpa = gla2gpa(gla, guest_cr3); - return paddr_guest2host(gpa); + + return (paddr_guest2host(gpa)); } /* @@ -232,6 +239,9 @@ decode_prefixes(struct decoded_instruction *decoded) decoded->rex_x = *current_prefix & REX_X_MASK; decoded->rex_b = *current_prefix & REX_B_MASK; current_prefix++; + } else if (is_opsz_prefix(*current_prefix)) { + decoded->opsz = 1; + current_prefix++; } else if (is_prefix(*current_prefix)) { return (-1); } @@ -248,16 +258,26 @@ decode_prefixes(struct decoded_instruction *decoded) static int decode_opcode(struct decoded_instruction *decoded) { - uint8_t opcode, flags; + uint8_t opcode; + uint16_t flags; + int extra; opcode = *decoded->opcode; - flags = one_byte_opcodes[opcode]; - + extra = 0; + + if (opcode != 0xf) + flags = one_byte_opcodes[opcode]; + else { + opcode = *(decoded->opcode + 1); + flags = two_byte_opcodes[opcode]; + extra = 1; + } + if (!flags) return (-1); if (flags & HAS_MODRM) { - decoded->modrm = decoded->opcode + 1; + decoded->modrm = decoded->opcode + 1 + extra; } decoded->opcode_flags = flags; @@ -381,37 +401,70 @@ decode_instruction(void *instr, struct decoded_instruction *decoded) return (0); } -static struct memory_region * -find_region(uintptr_t addr) +static enum vm_reg_name +get_vm_reg_name(uint8_t reg) { - int i; - - for (i = 0; i < registered_regions; ++i) { - if (emulated_regions[i].start <= addr && - emulated_regions[i].end >= addr) { - return &emulated_regions[i]; - } - } - return (0); + return (vm_reg_name_mappings[reg]); } -static enum vm_reg_name -get_vm_reg_name(uint8_t reg) +static uint64_t +adjust_operand(const struct decoded_instruction *instruction, uint64_t val, + int size) { - return vm_reg_name_mappings[reg]; + uint64_t ret; + + if (instruction->opcode_flags & ZEXT) { + switch (size) { + case 1: + ret = val & 0xff; + break; + case 2: + ret = val & 0xffff; + break; + case 4: + ret = val & 0xffffffff; + break; + case 8: + ret = val; + break; + default: + break; + } + } else { + /* + * Extend the sign + */ + switch (size) { + case 1: + ret = (int8_t)(val & 0xff); + break; + case 2: + ret = (int16_t)(val & 0xffff); + break; + case 4: + ret = (int32_t)(val & 0xffffffff); + break; + case 8: + ret = val; + break; + default: + break; + } + } + + return (ret); } static int -get_operand(struct vmctx *vm, int vcpu, uint64_t guest_cr3, - const struct decoded_instruction *instruction, uint64_t *operand) +get_operand(struct vmctx *vm, int vcpu, uint64_t gpa, uint64_t guest_cr3, + const struct decoded_instruction *instruction, uint64_t *operand, + struct mem_range *mr) { enum vm_reg_name regname; uint64_t reg; - uintptr_t target; int error; - uint8_t rm, addressing_mode; - struct memory_region *emulated_memory; + uint8_t rm, addressing_mode, size; if (instruction->opcode_flags & FROM_RM) { rm = instruction->rm; @@ -422,6 +475,17 @@ get_operand(struct vmctx *vm, int vcpu, uint64_t guest_cr3, } else return (-1); + /* + * Determine size of operand + */ + size = 4; + if (instruction->opcode_flags & FROM_8) { + size = 1; + } else if (instruction->opcode_flags & FROM_16 || + instruction->opsz) { + size = 2; + } + regname = get_vm_reg_name(rm); error = vm_get_register(vm, vcpu, regname, ®); if (error) @@ -430,33 +494,67 @@ get_operand(struct vmctx *vm, int vcpu, uint64_t guest_cr3, switch (addressing_mode) { case MOD_DIRECT: *operand = reg; - return (0); + error = 0; + break; case MOD_INDIRECT: case MOD_INDIRECT_DISP8: case MOD_INDIRECT_DISP32: +#ifdef INSTR_VERIFY + { + uintptr_t target; + target = gla2gpa(reg, guest_cr3); target += instruction->disp; - emulated_memory = find_region(target); - if (emulated_memory) { - return emulated_memory->memread(vm, vcpu, target, - 4, operand, - emulated_memory->arg); - } - return (-1); + assert(gpa == target); + } +#endif + error = (*mr->handler)(vm, vcpu, MEM_F_READ, gpa, size, + operand, mr->arg1, mr->arg2); + break; default: return (-1); } + + if (!error) + *operand = adjust_operand(instruction, *operand, size); + + return (error); +} + +static uint64_t +adjust_write(uint64_t reg, uint64_t operand, int size) +{ + uint64_t val; + + switch (size) { + case 1: + val = (reg & ~0xff) | (operand & 0xff); + break; + case 2: + val = (reg & ~0xffff) | (operand & 0xffff); + break; + case 4: + val = (reg & ~0xffffffff) | (operand & 0xffffffff); + break; + case 8: + val = operand; + default: + break; + } + + return (val); } static int -perform_write(struct vmctx *vm, int vcpu, uint64_t guest_cr3, - const struct decoded_instruction *instruction, uint64_t operand) +perform_write(struct vmctx *vm, int vcpu, uint64_t gpa, uint64_t guest_cr3, + const struct decoded_instruction *instruction, uint64_t operand, + struct mem_range *mr) { enum vm_reg_name regname; uintptr_t target; int error; + int size; uint64_t reg; - struct memory_region *emulated_memory; uint8_t addressing_mode; if (instruction->opcode_flags & TO_RM) { @@ -467,83 +565,77 @@ perform_write(struct vmctx *vm, int vcpu, uint64_t guest_cr3, addressing_mode = MOD_DIRECT; } else return (-1); - - regname = get_vm_reg_name(reg); - error = vm_get_register(vm, vcpu, regname, ®); - if (error) - return (error); - + + /* + * Determine the operand size. rex.w has priority + */ + size = 4; + if (instruction->rex_w) { + size = 8; + } else if (instruction->opcode_flags & TO_8) { + size = 1; + } else if (instruction->opsz) { + size = 2; + }; + switch(addressing_mode) { case MOD_DIRECT: - return vm_set_register(vm, vcpu, regname, operand); + regname = get_vm_reg_name(reg); + error = vm_get_register(vm, vcpu, regname, ®); + if (error) + return (error); + operand = adjust_write(reg, operand, size); + + return (vm_set_register(vm, vcpu, regname, operand)); case MOD_INDIRECT: case MOD_INDIRECT_DISP8: case MOD_INDIRECT_DISP32: +#ifdef INSTR_VERIFY + regname = get_vm_reg_name(reg); + error = vm_get_register(vm, vcpu, regname, ®); + assert(!error); target = gla2gpa(reg, guest_cr3); target += instruction->disp; - emulated_memory = find_region(target); - if (emulated_memory) { - return emulated_memory->memwrite(vm, vcpu, target, - 4, operand, - emulated_memory->arg); - } - return (-1); + assert(gpa == target); +#endif + error = (*mr->handler)(vm, vcpu, MEM_F_WRITE, gpa, size, + &operand, mr->arg1, mr->arg2); + return (error); default: return (-1); } } static int -emulate_decoded_instruction(struct vmctx *vm, int vcpu, uint64_t cr3, - const struct decoded_instruction *instruction) +emulate_decoded_instruction(struct vmctx *vm, int vcpu, uint64_t gpa, + uint64_t cr3, + const struct decoded_instruction *instruction, + struct mem_range *mr) { uint64_t operand; int error; - error = get_operand(vm, vcpu, cr3, instruction, &operand); + error = get_operand(vm, vcpu, gpa, cr3, instruction, &operand, mr); if (error) return (error); - return perform_write(vm, vcpu, cr3, instruction, operand); + return perform_write(vm, vcpu, gpa, cr3, instruction, operand, mr); } -int -emulate_instruction(struct vmctx *vm, int vcpu, uint64_t rip, uint64_t cr3) +int +emulate_instruction(struct vmctx *vm, int vcpu, uint64_t rip, uint64_t cr3, + uint64_t gpa, int flags, struct mem_range *mr) { struct decoded_instruction instr; int error; - void *instruction = gla2hla(rip, cr3); - - if ((error = decode_instruction(instruction, &instr)) != 0) - return (error); - - return emulate_decoded_instruction(vm, vcpu, cr3, &instr); -} + void *instruction; -struct memory_region * -register_emulated_memory(uintptr_t start, size_t len, emulated_read_func_t memread, - emulated_write_func_t memwrite, void *arg) -{ - if (registered_regions >= MAX_EMULATED_REGIONS) - return (NULL); - - struct memory_region *region = &emulated_regions[registered_regions]; - region->start = start; - region->end = start + len; - region->memread = memread; - region->memwrite = memwrite; - region->arg = arg; - - registered_regions++; - return (region); -} + instruction = gla2hla(rip, cr3); -void -move_memory_region(struct memory_region *region, uintptr_t start) -{ - size_t len; + error = decode_instruction(instruction, &instr); + if (!error) + error = emulate_decoded_instruction(vm, vcpu, gpa, cr3, + &instr, mr); - len = region->end - region->start; - region->start = start; - region->end = start + len; + return (error); } diff --git a/usr.sbin/bhyve/instruction_emul.h b/usr.sbin/bhyve/instruction_emul.h index e7b6bff..ef85796 100644 --- a/usr.sbin/bhyve/instruction_emul.h +++ b/usr.sbin/bhyve/instruction_emul.h @@ -29,19 +29,8 @@ #ifndef _INSTRUCTION_EMUL_H_ #define _INSTRUCTION_EMUL_H_ -struct memory_region; - -typedef int (*emulated_read_func_t)(struct vmctx *vm, int vcpu, uintptr_t addr, - int size, uint64_t *data, void *arg); -typedef int (*emulated_write_func_t)(struct vmctx *vm, int vcpu, uintptr_t addr, - int size, uint64_t data, void *arg); - int emulate_instruction(struct vmctx *vm, int vcpu, uint64_t rip, - uint64_t cr3); -struct memory_region *register_emulated_memory(uintptr_t start, size_t len, - emulated_read_func_t memread, - emulated_write_func_t memwrite, - void *arg); -void move_memory_region(struct memory_region *memory_region, uintptr_t start); + uint64_t cr3, uint64_t gpa, int flags, + struct mem_range *mr); #endif diff --git a/usr.sbin/bhyve/ioapic.c b/usr.sbin/bhyve/ioapic.c index dc74cfa..ea6e47c 100644 --- a/usr.sbin/bhyve/ioapic.c +++ b/usr.sbin/bhyve/ioapic.c @@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$"); #include #include "inout.h" +#include "mem.h" #include "instruction_emul.h" #include "fbsdrun.h" @@ -67,10 +68,13 @@ struct ioapic { static struct ioapic ioapics[1]; /* only a single ioapic for now */ -static int ioapic_region_read(struct vmctx *vm, int vcpu, uintptr_t paddr, - int size, uint64_t *data, void *arg); -static int ioapic_region_write(struct vmctx *vm, int vcpu, uintptr_t paddr, - int size, uint64_t data, void *arg); +static int ioapic_region_read(struct ioapic *ioapic, uintptr_t paddr, + int size, uint64_t *data); +static int ioapic_region_write(struct ioapic *ioapic, uintptr_t paddr, + int size, uint64_t data); +static int ioapic_region_handler(struct vmctx *vm, int vcpu, int dir, + uintptr_t paddr, int size, uint64_t *val, + void *arg1, long arg2); static void ioapic_set_pinstate(struct vmctx *ctx, int pin, bool newstate) @@ -139,8 +143,10 @@ ioapic_assert_pin(struct vmctx *ctx, int pin) void ioapic_init(int which) { - int i; + struct mem_range memp; struct ioapic *ioapic; + int error; + int i; assert(which == 0); @@ -153,14 +159,19 @@ ioapic_init(int which) for (i = 0; i < REDIR_ENTRIES; i++) ioapic->redtbl[i] = 0x0001000000010000UL; - /* Register emulated memory region */ ioapic->paddr = IOAPIC_PADDR; - ioapic->region = register_emulated_memory(ioapic->paddr, - sizeof(struct IOAPIC), - ioapic_region_read, - ioapic_region_write, - (void *)(uintptr_t)which); - assert(ioapic->region != NULL); + + /* Register emulated memory region */ + memp.name = "ioapic"; + memp.flags = MEM_F_RW; + memp.handler = ioapic_region_handler; + memp.arg1 = ioapic; + memp.arg2 = which; + memp.base = ioapic->paddr; + memp.size = sizeof(struct IOAPIC); + error = register_mem(&memp); + + assert (error == 0); ioapic->inited = 1; } @@ -237,15 +248,11 @@ ioapic_write(struct ioapic *ioapic, uint32_t addr, uint32_t data) } static int -ioapic_region_read(struct vmctx *vm, int vcpu, uintptr_t paddr, int size, - uint64_t *data, void *arg) +ioapic_region_read(struct ioapic *ioapic, uintptr_t paddr, int size, + uint64_t *data) { - int which, offset; - struct ioapic *ioapic; - - which = (uintptr_t)arg; + int offset; - ioapic = &ioapics[which]; offset = paddr - ioapic->paddr; /* @@ -255,7 +262,7 @@ ioapic_region_read(struct vmctx *vm, int vcpu, uintptr_t paddr, int size, if (size != 4 || (offset != IOREGSEL && offset != IOWIN)) { #if 1 printf("invalid access to ioapic%d: size %d, offset %d\n", - which, size, offset); + (int)(ioapic - ioapics), size, offset); #endif *data = 0; return (0); @@ -270,15 +277,11 @@ ioapic_region_read(struct vmctx *vm, int vcpu, uintptr_t paddr, int size, } static int -ioapic_region_write(struct vmctx *vm, int vcpu, uintptr_t paddr, int size, - uint64_t data, void *arg) +ioapic_region_write(struct ioapic *ioapic, uintptr_t paddr, int size, + uint64_t data) { - int which, offset; - struct ioapic *ioapic; - - which = (uintptr_t)arg; + int offset; - ioapic = &ioapics[which]; offset = paddr - ioapic->paddr; /* @@ -288,7 +291,7 @@ ioapic_region_write(struct vmctx *vm, int vcpu, uintptr_t paddr, int size, if (size != 4 || (offset != IOREGSEL && offset != IOWIN)) { #if 1 printf("invalid access to ioapic%d: size %d, offset %d\n", - which, size, offset); + (int)(ioapic - ioapics), size, offset); #endif return (0); } @@ -300,3 +303,23 @@ ioapic_region_write(struct vmctx *vm, int vcpu, uintptr_t paddr, int size, return (0); } + +static int +ioapic_region_handler(struct vmctx *vm, int vcpu, int dir, uintptr_t paddr, + int size, uint64_t *val, void *arg1, long arg2) +{ + struct ioapic *ioapic; + int which; + + ioapic = arg1; + which = arg2; + + assert(ioapic == &ioapics[which]); + + if (dir == MEM_F_READ) + ioapic_region_read(ioapic, paddr, size, val); + else + ioapic_region_write(ioapic, paddr, size, *val); + + return (0); +} diff --git a/usr.sbin/bhyve/mem.c b/usr.sbin/bhyve/mem.c new file mode 100644 index 0000000..deb91dc --- /dev/null +++ b/usr.sbin/bhyve/mem.c @@ -0,0 +1,196 @@ +/*- + * Copyright (c) 2012 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * Memory ranges are represented with an RB tree. On insertion, the range + * is checked for overlaps. On lookup, the key has the same base and limit + * so it can be searched within the range. + * + * It is assumed that all setup of ranges takes place in single-threaded + * mode before vCPUs have been started. As such, no locks are used on the + * RB tree. If this is no longer the case, then a r/w lock could be used, + * with readers on the lookup and a writer if the tree needs to be changed + * (and per vCPU caches flushed) + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +#include +#include +#include + +#include "mem.h" +#include "instruction_emul.h" + +struct mmio_rb_range { + RB_ENTRY(mmio_rb_range) mr_link; /* RB tree links */ + struct mem_range mr_param; + uint64_t mr_base; + uint64_t mr_end; +}; + +struct mmio_rb_tree; +RB_PROTOTYPE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare); + +RB_HEAD(mmio_rb_tree, mmio_rb_range) mmio_rbroot; + +/* + * Per-vCPU cache. Since most accesses from a vCPU will be to + * consecutive addresses in a range, it makes sense to cache the + * result of a lookup. + */ +static struct mmio_rb_range *mmio_hint[VM_MAXCPU]; + +static int +mmio_rb_range_compare(struct mmio_rb_range *a, struct mmio_rb_range *b) +{ + if (a->mr_end < b->mr_base) + return (-1); + else if (a->mr_base > b->mr_end) + return (1); + return (0); +} + +static int +mmio_rb_lookup(uint64_t addr, struct mmio_rb_range **entry) +{ + struct mmio_rb_range find, *res; + + find.mr_base = find.mr_end = addr; + + res = RB_FIND(mmio_rb_tree, &mmio_rbroot, &find); + + if (res != NULL) { + *entry = res; + return (0); + } + + return (ENOENT); +} + +static int +mmio_rb_add(struct mmio_rb_range *new) +{ + struct mmio_rb_range *overlap; + + overlap = RB_INSERT(mmio_rb_tree, &mmio_rbroot, new); + + if (overlap != NULL) { +#ifdef RB_DEBUG + printf("overlap detected: new %lx:%lx, tree %lx:%lx\n", + new->mr_base, new->mr_end, + overlap->mr_base, overlap->mr_end); +#endif + + return (EEXIST); + } + + return (0); +} + +#if 0 +static void +mmio_rb_dump(void) +{ + struct mmio_rb_range *np; + + RB_FOREACH(np, mmio_rb_tree, &mmio_rbroot) { + printf(" %lx:%lx, %s\n", np->mr_base, np->mr_end, + np->mr_param.name); + } +} +#endif + +RB_GENERATE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare); + +int +emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, uint64_t rip, + uint64_t cr3, int mode) +{ + struct mmio_rb_range *entry; + int err; + + err = 0; + + /* + * First check the per-vCPU cache + */ + if (mmio_hint[vcpu] && + paddr >= mmio_hint[vcpu]->mr_base && + paddr <= mmio_hint[vcpu]->mr_end) { + err = emulate_instruction(ctx, vcpu, rip, cr3, paddr, mode, + &mmio_hint[vcpu]->mr_param); + } else { + if (mmio_rb_lookup(paddr, &entry)) { + err = ENOENT; + } else { + mmio_hint[vcpu] = entry; + err = emulate_instruction(ctx, vcpu, rip, cr3, paddr, + mode, &entry->mr_param); + } + } + + return (err); +} + +int +register_mem(struct mem_range *memp) +{ + struct mmio_rb_range *mrp; + int err; + + err = 0; + + mrp = malloc(sizeof(struct mmio_rb_range)); + + if (mrp != NULL) { + mrp->mr_param = *memp; + mrp->mr_base = memp->base; + mrp->mr_end = memp->base + memp->size - 1; + + err = mmio_rb_add(mrp); + if (err) + free(mrp); + } else + err = ENOMEM; + + return (err); +} + +void +init_mem(void) +{ + + RB_INIT(&mmio_rbroot); +} diff --git a/usr.sbin/bhyve/mem.h b/usr.sbin/bhyve/mem.h new file mode 100644 index 0000000..53c4f72 --- /dev/null +++ b/usr.sbin/bhyve/mem.h @@ -0,0 +1,58 @@ +/*- + * Copyright (c) 2012 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _MEM_H_ +#define _MEM_H_ + +#include + +struct vmctx; + +typedef int (*mem_func_t)(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, + int size, uint64_t *val, void *arg1, long arg2); + +struct mem_range { + const char *name; + int flags; + mem_func_t handler; + void *arg1; + long arg2; + uint64_t base; + uint64_t size; +}; +#define MEM_F_READ 0x1 +#define MEM_F_WRITE 0x2 +#define MEM_F_RW 0x3 + +void init_mem(void); +int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, uint64_t rip, + uint64_t cr3, int mode); + +int register_mem(struct mem_range *memp); + +#endif /* _MEM_H_ */ diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c index 96667d5..06fbcc8 100644 --- a/usr.sbin/bhyve/pci_emul.c +++ b/usr.sbin/bhyve/pci_emul.c @@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$"); #include "fbsdrun.h" #include "inout.h" +#include "mem.h" #include "pci_emul.h" #include "ioapic.h" @@ -364,22 +365,26 @@ pci_finish_mptable_names(void) } static int -pci_emul_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, - uint32_t *eax, void *arg) +pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, + uint32_t *eax, void *arg) { struct pci_devinst *pdi = arg; struct pci_devemu *pe = pdi->pi_d; - int offset, i; + uint64_t offset; + int i; for (i = 0; i <= PCI_BARMAX; i++) { if (pdi->pi_bar[i].type == PCIBAR_IO && port >= pdi->pi_bar[i].addr && - port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { + port + bytes <= + pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { offset = port - pdi->pi_bar[i].addr; if (in) - *eax = (*pe->pe_ior)(pdi, i, offset, bytes); + *eax = (*pe->pe_barread)(ctx, vcpu, pdi, i, + offset, bytes); else - (*pe->pe_iow)(pdi, i, offset, bytes, *eax); + (*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset, + bytes, *eax); return (0); } } @@ -387,6 +392,32 @@ pci_emul_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, } static int +pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, + int size, uint64_t *val, void *arg1, long arg2) +{ + struct pci_devinst *pdi = arg1; + struct pci_devemu *pe = pdi->pi_d; + uint64_t offset; + int bidx = (int) arg2; + + assert(bidx <= PCI_BARMAX); + assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 || + pdi->pi_bar[bidx].type == PCIBAR_MEM64); + assert(addr >= pdi->pi_bar[bidx].addr && + addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size); + + offset = addr - pdi->pi_bar[bidx].addr; + + if (dir == MEM_F_WRITE) + (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, size, *val); + else + *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, offset, size); + + return (0); +} + + +static int pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size, uint64_t *addr) { @@ -405,12 +436,21 @@ pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size, } int -pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, uint64_t hostbase, - enum pcibar_type type, uint64_t size) +pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, + uint64_t size) +{ + + return (pci_emul_alloc_pbar(pdi, idx, 0, type, size)); +} + +int +pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, uint64_t hostbase, + enum pcibar_type type, uint64_t size) { int i, error; uint64_t *baseptr, limit, addr, mask, lobits, bar; struct inout_port iop; + struct mem_range memp; assert(idx >= 0 && idx <= PCI_BARMAX); @@ -497,13 +537,25 @@ pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, uint64_t hostbase, if (type == PCIBAR_IO) { iop.name = pdi->pi_name; iop.flags = IOPORT_F_INOUT; - iop.handler = pci_emul_handler; + iop.handler = pci_emul_io_handler; iop.arg = pdi; for (i = 0; i < size; i++) { iop.port = addr + i; register_inout(&iop); } + } else if (type == PCIBAR_MEM32 || type == PCIBAR_MEM64) { + /* add memory bar intercept handler */ + memp.name = pdi->pi_name; + memp.flags = MEM_F_RW; + memp.base = addr; + memp.size = size; + memp.handler = pci_emul_mem_handler; + memp.arg1 = pdi; + memp.arg2 = idx; + + error = register_mem(&memp); + assert(error == 0); } return (0); @@ -1061,10 +1113,6 @@ pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, } pci_set_cfgdata32(pi, coff, bar); - if (pi->pi_bar[idx].handler) { - pi->pi_bar[idx].handler(pi, idx, bar); - } - } else if (pci_emul_iscap(pi, coff)) { pci_emul_capwrite(pi, coff, bytes, *eax); } else { @@ -1098,12 +1146,15 @@ INOUT_PORT(pci_irq, 0xC01, IOPORT_F_OUT, pci_irq_port_handler); /* * Define a dummy test device */ -#define DREGSZ 20 +#define DIOSZ 20 +#define DMEMSZ 4096 struct pci_emul_dsoftc { - uint8_t regs[DREGSZ]; + uint8_t ioregs[DIOSZ]; + uint8_t memregs[DMEMSZ]; }; -#define PCI_EMUL_MSGS 4 +#define PCI_EMUL_MSI_MSGS 4 +#define PCI_EMUL_MSIX_MSGS 16 static int pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, char *opts) @@ -1120,64 +1171,132 @@ pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, char *opts) pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD); pci_set_cfgdata8(pi, PCIR_CLASS, 0x02); - error = pci_emul_alloc_bar(pi, 0, 0, PCIBAR_IO, DREGSZ); + error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS); + assert(error == 0); + + error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ); assert(error == 0); - error = pci_emul_add_msicap(pi, PCI_EMUL_MSGS); + error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ); assert(error == 0); return (0); } static void -pci_emul_diow(struct pci_devinst *pi, int baridx, int offset, int size, - uint32_t value) +pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, + uint64_t offset, int size, uint64_t value) { int i; struct pci_emul_dsoftc *sc = pi->pi_arg; - if (offset + size > DREGSZ) { - printf("diow: too large, offset %d size %d\n", offset, size); - return; - } + if (baridx == 0) { + if (offset + size > DIOSZ) { + printf("diow: iow too large, offset %ld size %d\n", + offset, size); + return; + } - if (size == 1) { - sc->regs[offset] = value & 0xff; - } else if (size == 2) { - *(uint16_t *)&sc->regs[offset] = value & 0xffff; - } else { - *(uint32_t *)&sc->regs[offset] = value; + if (size == 1) { + sc->ioregs[offset] = value & 0xff; + } else if (size == 2) { + *(uint16_t *)&sc->ioregs[offset] = value & 0xffff; + } else if (size == 4) { + *(uint32_t *)&sc->ioregs[offset] = value; + } else { + printf("diow: iow unknown size %d\n", size); + } + + /* + * Special magic value to generate an interrupt + */ + if (offset == 4 && size == 4 && pci_msi_enabled(pi)) + pci_generate_msi(pi, value % pci_msi_msgnum(pi)); + + if (value == 0xabcdef) { + for (i = 0; i < pci_msi_msgnum(pi); i++) + pci_generate_msi(pi, i); + } } - /* - * Special magic value to generate an interrupt - */ - if (offset == 4 && size == 4 && pci_msi_enabled(pi)) - pci_generate_msi(pi, value % pci_msi_msgnum(pi)); + if (baridx == 1) { + if (offset + size > DMEMSZ) { + printf("diow: memw too large, offset %ld size %d\n", + offset, size); + return; + } - if (value == 0xabcdef) { - for (i = 0; i < pci_msi_msgnum(pi); i++) - pci_generate_msi(pi, i); + if (size == 1) { + sc->memregs[offset] = value; + } else if (size == 2) { + *(uint16_t *)&sc->memregs[offset] = value; + } else if (size == 4) { + *(uint32_t *)&sc->memregs[offset] = value; + } else if (size == 8) { + *(uint64_t *)&sc->memregs[offset] = value; + } else { + printf("diow: memw unknown size %d\n", size); + } + + /* + * magic interrupt ?? + */ + } + + if (baridx > 1) { + printf("diow: unknown bar idx %d\n", baridx); } } -static uint32_t -pci_emul_dior(struct pci_devinst *pi, int baridx, int offset, int size) +static uint64_t +pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, + uint64_t offset, int size) { struct pci_emul_dsoftc *sc = pi->pi_arg; uint32_t value; - if (offset + size > DREGSZ) { - printf("dior: too large, offset %d size %d\n", offset, size); - return (0); + if (baridx == 0) { + if (offset + size > DIOSZ) { + printf("dior: ior too large, offset %ld size %d\n", + offset, size); + return (0); + } + + if (size == 1) { + value = sc->ioregs[offset]; + } else if (size == 2) { + value = *(uint16_t *) &sc->ioregs[offset]; + } else if (size == 4) { + value = *(uint32_t *) &sc->ioregs[offset]; + } else { + printf("dior: ior unknown size %d\n", size); + } } - if (size == 1) { - value = sc->regs[offset]; - } else if (size == 2) { - value = *(uint16_t *) &sc->regs[offset]; - } else { - value = *(uint32_t *) &sc->regs[offset]; + if (baridx == 1) { + if (offset + size > DMEMSZ) { + printf("dior: memr too large, offset %ld size %d\n", + offset, size); + return (0); + } + + if (size == 1) { + value = sc->memregs[offset]; + } else if (size == 2) { + value = *(uint16_t *) &sc->memregs[offset]; + } else if (size == 4) { + value = *(uint32_t *) &sc->memregs[offset]; + } else if (size == 8) { + value = *(uint64_t *) &sc->memregs[offset]; + } else { + printf("dior: ior unknown size %d\n", size); + } + } + + + if (baridx > 1) { + printf("dior: unknown bar idx %d\n", baridx); + return (0); } return (value); @@ -1186,8 +1305,8 @@ pci_emul_dior(struct pci_devinst *pi, int baridx, int offset, int size) struct pci_devemu pci_dummy = { .pe_emu = "dummy", .pe_init = pci_emul_dinit, - .pe_iow = pci_emul_diow, - .pe_ior = pci_emul_dior + .pe_barwrite = pci_emul_diow, + .pe_barread = pci_emul_dior }; PCI_EMUL_SET(pci_dummy); diff --git a/usr.sbin/bhyve/pci_emul.h b/usr.sbin/bhyve/pci_emul.h index 19dba99..79b86d1 100644 --- a/usr.sbin/bhyve/pci_emul.h +++ b/usr.sbin/bhyve/pci_emul.h @@ -48,7 +48,8 @@ struct pci_devemu { char *pe_emu; /* Name of device emulation */ /* instance creation */ - int (*pe_init)(struct vmctx *, struct pci_devinst *, char *opts); + int (*pe_init)(struct vmctx *, struct pci_devinst *, + char *opts); /* config space read/write callbacks */ int (*pe_cfgwrite)(struct vmctx *ctx, int vcpu, @@ -58,11 +59,13 @@ struct pci_devemu { struct pci_devinst *pi, int offset, int bytes, uint32_t *retval); - /* I/O space read/write callbacks */ - void (*pe_iow)(struct pci_devinst *pi, int baridx, - int offset, int size, uint32_t value); - uint32_t (*pe_ior)(struct pci_devinst *pi, int baridx, - int offset, int size); + /* BAR read/write callbacks */ + void (*pe_barwrite)(struct vmctx *ctx, int vcpu, + struct pci_devinst *pi, int baridx, + uint64_t offset, int size, uint64_t value); + uint64_t (*pe_barread)(struct vmctx *ctx, int vcpu, + struct pci_devinst *pi, int baridx, + uint64_t offset, int size); }; #define PCI_EMUL_SET(x) DATA_SET(pci_devemu_set, x); @@ -74,13 +77,10 @@ enum pcibar_type { PCIBAR_MEMHI64 }; -typedef int (*bar_write_func_t)(struct pci_devinst *pdi, int idx, uint64_t bar); - struct pcibar { enum pcibar_type type; /* io or memory */ uint64_t size; uint64_t addr; - bar_write_func_t handler; }; #define PI_NAMESZ 40 @@ -119,11 +119,9 @@ struct pci_devinst { int table_bar; int pba_bar; size_t table_offset; - uintptr_t table_gpa; size_t table_size; int table_count; size_t pba_offset; - struct memory_region *table_bar_region; struct msix_table_entry table[MAX_MSIX_TABLE_SIZE]; } pi_msix; @@ -156,15 +154,19 @@ void msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, void msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, int bytes, uint32_t val); void pci_callback(void); -int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, uint64_t hostbase, +int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, uint64_t size); +int pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, + uint64_t hostbase, enum pcibar_type type, uint64_t size); int pci_emul_add_msicap(struct pci_devinst *pi, int msgnum); int pci_is_legacy(struct pci_devinst *pi); void pci_generate_msi(struct pci_devinst *pi, int msgnum); +void pci_generate_msix(struct pci_devinst *pi, int msgnum); void pci_lintr_assert(struct pci_devinst *pi); void pci_lintr_deassert(struct pci_devinst *pi); int pci_lintr_request(struct pci_devinst *pi, int ivec); int pci_msi_enabled(struct pci_devinst *pi); +int pci_msix_enabled(struct pci_devinst *pi); int pci_msi_msgnum(struct pci_devinst *pi); void pci_parse_name(char *opt); void pci_parse_slot(char *opt, int legacy); diff --git a/usr.sbin/bhyve/pci_passthru.c b/usr.sbin/bhyve/pci_passthru.c index a6f1f63..94e4416 100644 --- a/usr.sbin/bhyve/pci_passthru.c +++ b/usr.sbin/bhyve/pci_passthru.c @@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$"); #include #include #include "pci_emul.h" +#include "mem.h" #include "instruction_emul.h" #ifndef _PATH_DEVPCI @@ -218,15 +219,17 @@ cfginitmsi(struct passthru_softc *sc) } } - if (sc->psc_msix.capoff == 0) - return (-1); - - pi->pi_msix.pba_bar = msixcap.pba_offset & MSIX_TABLE_BIR_MASK; - pi->pi_msix.pba_offset = msixcap.pba_offset & MSIX_TABLE_OFFSET_MASK; - pi->pi_msix.table_bar = msixcap.table_offset & MSIX_TABLE_BIR_MASK; - pi->pi_msix.table_offset = msixcap.table_offset & MSIX_TABLE_OFFSET_MASK; - - pi->pi_msix.table_count = MSIX_TABLE_COUNT(msixcap.msgctrl); + if (sc->psc_msix.capoff != 0) { + pi->pi_msix.pba_bar = + msixcap.pba_offset & MSIX_TABLE_BIR_MASK; + pi->pi_msix.pba_offset = + msixcap.pba_offset & MSIX_TABLE_OFFSET_MASK; + pi->pi_msix.table_bar = + msixcap.table_offset & MSIX_TABLE_BIR_MASK; + pi->pi_msix.table_offset = + msixcap.table_offset & MSIX_TABLE_OFFSET_MASK; + pi->pi_msix.table_count = MSIX_TABLE_COUNT(msixcap.msgctrl); + } #ifdef LEGACY_SUPPORT /* @@ -252,106 +255,84 @@ cfginitmsi(struct passthru_softc *sc) return (0); } -static int -msix_table_read(struct vmctx *vm, int vcpu, uintptr_t addr, - int size, uint64_t *data, void *arg) +static uint64_t +msix_table_read(struct passthru_softc *sc, uint64_t offset, int size) { - struct passthru_softc *sc; struct pci_devinst *pi; - int index; - size_t offset, entry_offset; + struct msix_table_entry *entry; uint8_t *src8; uint16_t *src16; uint32_t *src32; uint64_t *src64; - struct msix_table_entry *entry; + uint64_t data; + size_t entry_offset; + int index; - sc = arg; pi = sc->psc_pi; - offset = addr - pi->pi_msix.table_gpa; - entry_offset = addr % MSIX_TABLE_ENTRY_SIZE; + entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; index = offset / MSIX_TABLE_ENTRY_SIZE; entry = &pi->pi_msix.table[index]; switch(size) { case 1: - src8 = (uint8_t*)((void*)entry + entry_offset); - *data = *src8; + src8 = (uint8_t *)((void *)entry + entry_offset); + data = *src8; break; case 2: - src16 = (uint16_t*)((void*)entry + entry_offset); - *data = *src16; + src16 = (uint16_t *)((void *)entry + entry_offset); + data = *src16; break; case 4: - src32 = (uint32_t*)((void*)entry + entry_offset); - *data = *src32; + src32 = (uint32_t *)((void *)entry + entry_offset); + data = *src32; break; case 8: - src64 = (uint64_t*)((void*)entry + entry_offset); - *data = *src64; + src64 = (uint64_t *)((void *)entry + entry_offset); + data = *src64; break; default: return (-1); } - return (0); + return (data); } -static int -msix_table_write(struct vmctx *vm, int vcpu, uintptr_t addr, - int size, uint64_t data, void *arg) +static void +msix_table_write(struct vmctx *ctx, int vcpu, struct passthru_softc *sc, + uint64_t offset, int size, uint64_t data) { - struct passthru_softc *sc; struct pci_devinst *pi; - int error, index; - size_t offset, entry_offset; - uint32_t *dest; struct msix_table_entry *entry; + uint32_t *dest; + size_t entry_offset; uint32_t vector_control; + int error, index; - sc = arg; pi = sc->psc_pi; - offset = addr - pi->pi_msix.table_gpa; - entry_offset = addr % MSIX_TABLE_ENTRY_SIZE; + entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; index = offset / MSIX_TABLE_ENTRY_SIZE; entry = &pi->pi_msix.table[index]; /* Only 4 byte naturally-aligned writes are supported */ - if (size == 4 && entry_offset % 4 == 0) { - vector_control = entry->vector_control; - dest = (uint32_t*)((void*)entry + entry_offset); - *dest = data; - /* If MSI-X hasn't been enabled, do nothing */ - if (pi->pi_msix.enabled) { - /* If the entry is masked, don't set it up */ - if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0 || - (vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { - error = vm_setup_msix(vm, vcpu, sc->psc_sel.pc_bus, - sc->psc_sel.pc_dev, - sc->psc_sel.pc_func, - index, entry->msg_data, - entry->vector_control, - entry->addr); - if (error) - return (-1); - } + assert(size == 4); + assert(entry_offset % 4 == 0); + + vector_control = entry->vector_control; + dest = (uint32_t *)((void *)entry + entry_offset); + *dest = data; + /* If MSI-X hasn't been enabled, do nothing */ + if (pi->pi_msix.enabled) { + /* If the entry is masked, don't set it up */ + if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0 || + (vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { + error = vm_setup_msix(ctx, vcpu, sc->psc_sel.pc_bus, + sc->psc_sel.pc_dev, + sc->psc_sel.pc_func, + index, entry->msg_data, + entry->vector_control, + entry->addr); } - } else { - printf("Unsupported unaligned or non-4-byte write to MSI-X table\n"); - return (-1); } - return (0); -} - -static int -msix_bar_handler(struct pci_devinst *pdi, int idx, uint64_t bar) -{ - uintptr_t start; - - start = (bar & PCIM_BAR_MEM_BASE) + pdi->pi_msix.table_offset; - move_memory_region(pdi->pi_msix.table_bar_region, start); - pdi->pi_msix.table_gpa = start; - return (0); } static int @@ -375,6 +356,7 @@ init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base) printf("Unsupported MSI-X table and PBA in same page\n"); return (-1); } + /* * May need to split the BAR into 3 regions: * Before the MSI-X table, the MSI-X table, and after it @@ -395,30 +377,9 @@ init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base) start = pi->pi_bar[idx].addr; len = pi->pi_msix.table_offset; } - return vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus, - sc->psc_sel.pc_dev, sc->psc_sel.pc_func, - start, len, base + table_size); -} - -static int -cfginitmsix(struct passthru_softc *sc) -{ - int table_bar; - struct pci_devinst *pi; - - pi = sc->psc_pi; - table_bar = pi->pi_msix.table_bar; - pi->pi_msix.table_gpa = sc->psc_bar[table_bar].addr + pi->pi_msix.table_offset; - pi->pi_msix.table_bar_region = register_emulated_memory(pi->pi_msix.table_gpa, - pi->pi_msix.table_size, - msix_table_read, - msix_table_write, sc); - if (!pi->pi_msix.table_bar_region) - return (-1); - - pi->pi_bar[table_bar].handler = msix_bar_handler; - - return (0); + return (vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus, + sc->psc_sel.pc_dev, sc->psc_sel.pc_func, + start, len, base + table_size)); } static int @@ -464,8 +425,8 @@ cfginitbar(struct vmctx *ctx, struct passthru_softc *sc) sc->psc_bar[i].addr = base; /* Allocate the BAR in the guest I/O or MMIO space */ - error = pci_emul_alloc_bar(pi, i, base, bartype, - bar.pbi_length); + error = pci_emul_alloc_pbar(pi, i, base, bartype, + bar.pbi_length); if (error) return (-1); @@ -515,9 +476,6 @@ cfginit(struct vmctx *ctx, struct pci_devinst *pi, int bus, int slot, int func) if (cfginitbar(ctx, sc) != 0) goto done; - if (cfginitmsix(sc) != 0) - goto done; - error = 0; /* success */ done: return (error); @@ -544,7 +502,8 @@ passthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) goto done; } - if (opts == NULL || sscanf(opts, "%d/%d/%d", &bus, &slot, &func) != 3) + if (opts == NULL || + sscanf(opts, "%d/%d/%d", &bus, &slot, &func) != 3) goto done; if (vm_assign_pptdev(ctx, bus, slot, func) != 0) @@ -557,7 +516,7 @@ passthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) sc->psc_pi = pi; /* initialize config space */ - if (cfginit(ctx, pi, bus, slot, func) != 0) + if ((error = cfginit(ctx, pi, bus, slot, func)) != 0) goto done; error = 0; /* success */ @@ -605,8 +564,8 @@ msixcap_access(struct passthru_softc *sc, int coff) } static int -passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff, - int bytes, uint32_t *rv) +passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, + int coff, int bytes, uint32_t *rv) { struct passthru_softc *sc; @@ -636,8 +595,8 @@ passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff, } static int -passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff, - int bytes, uint32_t val) +passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, + int coff, int bytes, uint32_t val) { int error, msix_table_entries, i; struct passthru_softc *sc; @@ -705,40 +664,54 @@ passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff, } static void -passthru_iow(struct pci_devinst *pi, int baridx, int offset, int size, - uint32_t value) +passthru_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, + uint64_t offset, int size, uint64_t value) { struct passthru_softc *sc; struct iodev_pio_req pio; sc = pi->pi_arg; - bzero(&pio, sizeof(struct iodev_pio_req)); - pio.access = IODEV_PIO_WRITE; - pio.port = sc->psc_bar[baridx].addr + offset; - pio.width = size; - pio.val = value; - - (void)ioctl(iofd, IODEV_PIO, &pio); + if (pi->pi_msix.enabled && pi->pi_msix.table_bar == baridx) { + msix_table_write(ctx, vcpu, sc, offset, size, value); + } else { + assert(pi->pi_bar[baridx].type == PCIBAR_IO); + bzero(&pio, sizeof(struct iodev_pio_req)); + pio.access = IODEV_PIO_WRITE; + pio.port = sc->psc_bar[baridx].addr + offset; + pio.width = size; + pio.val = value; + + (void)ioctl(iofd, IODEV_PIO, &pio); + } } -static uint32_t -passthru_ior(struct pci_devinst *pi, int baridx, int offset, int size) +static uint64_t +passthru_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, + uint64_t offset, int size) { struct passthru_softc *sc; struct iodev_pio_req pio; + uint64_t val; sc = pi->pi_arg; - bzero(&pio, sizeof(struct iodev_pio_req)); - pio.access = IODEV_PIO_READ; - pio.port = sc->psc_bar[baridx].addr + offset; - pio.width = size; - pio.val = 0; + if (pi->pi_msix.enabled && pi->pi_msix.table_bar == baridx) { + val = msix_table_read(sc, offset, size); + } else { + assert(pi->pi_bar[baridx].type == PCIBAR_IO); + bzero(&pio, sizeof(struct iodev_pio_req)); + pio.access = IODEV_PIO_READ; + pio.port = sc->psc_bar[baridx].addr + offset; + pio.width = size; + pio.val = 0; + + (void)ioctl(iofd, IODEV_PIO, &pio); - (void)ioctl(iofd, IODEV_PIO, &pio); + val = pio.val; + } - return (pio.val); + return (val); } struct pci_devemu passthru = { @@ -746,7 +719,7 @@ struct pci_devemu passthru = { .pe_init = passthru_init, .pe_cfgwrite = passthru_cfgwrite, .pe_cfgread = passthru_cfgread, - .pe_iow = passthru_iow, - .pe_ior = passthru_ior, + .pe_barwrite = passthru_write, + .pe_barread = passthru_read, }; PCI_EMUL_SET(passthru); diff --git a/usr.sbin/bhyve/pci_uart.c b/usr.sbin/bhyve/pci_uart.c index 0f8a281..51876f5 100644 --- a/usr.sbin/bhyve/pci_uart.c +++ b/usr.sbin/bhyve/pci_uart.c @@ -320,8 +320,8 @@ pci_uart_drain(int fd, enum ev_type ev, void *arg) } static void -pci_uart_write(struct pci_devinst *pi, int baridx, int offset, int size, - uint32_t value) +pci_uart_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, + int baridx, uint64_t offset, int size, uint64_t value) { struct pci_uart_softc *sc; int fifosz; @@ -329,6 +329,7 @@ pci_uart_write(struct pci_devinst *pi, int baridx, int offset, int size, sc = pi->pi_arg; + assert(baridx == 0); assert(size == 1); /* Open terminal */ @@ -459,15 +460,17 @@ done: pci_uart_toggle_intr(sc); } -uint32_t -pci_uart_read(struct pci_devinst *pi, int baridx, int offset, int size) +uint64_t +pci_uart_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, + int baridx, uint64_t offset, int size) { struct pci_uart_softc *sc; uint8_t iir, intr_reason; - uint32_t reg; + uint64_t reg; sc = pi->pi_arg; + assert(baridx == 0); assert(size == 1); /* Open terminal */ @@ -573,11 +576,11 @@ pci_uart_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_SIMPLECOMM); if (pci_is_legacy(pi)) { pci_uart_legacy_res(&bar, &ivec); + pci_emul_alloc_pbar(pi, 0, bar, PCIBAR_IO, 8); } else { - bar = 0; ivec = -1; + pci_emul_alloc_bar(pi, 0, PCIBAR_IO, 8); } - pci_emul_alloc_bar(pi, 0, bar, PCIBAR_IO, 8); pci_lintr_request(pi, ivec); if (opts != NULL && !strcmp("stdio", opts) && !pci_uart_stdio) { @@ -591,9 +594,9 @@ pci_uart_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) } struct pci_devemu pci_de_com = { - .pe_emu = "uart", - .pe_init = pci_uart_init, - .pe_iow = pci_uart_write, - .pe_ior = pci_uart_read, + .pe_emu = "uart", + .pe_init = pci_uart_init, + .pe_barwrite = pci_uart_write, + .pe_barread = pci_uart_read }; PCI_EMUL_SET(pci_de_com); diff --git a/usr.sbin/bhyve/pci_virtio_block.c b/usr.sbin/bhyve/pci_virtio_block.c index 98c0695..916c7c3 100644 --- a/usr.sbin/bhyve/pci_virtio_block.c +++ b/usr.sbin/bhyve/pci_virtio_block.c @@ -382,20 +382,22 @@ pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK); - pci_emul_alloc_bar(pi, 0, 0, PCIBAR_IO, VTBLK_REGSZ); pci_emul_add_msicap(pi, 1); + pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VTBLK_REGSZ); return (0); } static void -pci_vtblk_write(struct pci_devinst *pi, int baridx, int offset, int size, - uint32_t value) +pci_vtblk_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, + int baridx, uint64_t offset, int size, uint64_t value) { struct pci_vtblk_softc *sc = pi->pi_arg; - + + assert(baridx == 0); + if (offset + size > VTBLK_REGSZ) { - DPRINTF(("vtblk_write: 2big, offset %d size %d\n", + DPRINTF(("vtblk_write: 2big, offset %ld size %d\n", offset, size)); return; } @@ -426,24 +428,27 @@ pci_vtblk_write(struct pci_devinst *pi, int baridx, int offset, int size, case VTCFG_R_QNUM: case VTCFG_R_ISR: case VTBLK_R_CFG ... VTBLK_R_CFG_END: - DPRINTF(("vtblk: write to readonly reg %d\n\r", offset)); + DPRINTF(("vtblk: write to readonly reg %ld\n\r", offset)); break; default: - DPRINTF(("vtblk: unknown i/o write offset %d\n\r", offset)); + DPRINTF(("vtblk: unknown i/o write offset %ld\n\r", offset)); value = 0; break; } } -uint32_t -pci_vtblk_read(struct pci_devinst *pi, int baridx, int offset, int size) +uint64_t +pci_vtblk_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, + int baridx, uint64_t offset, int size) { struct pci_vtblk_softc *sc = pi->pi_arg; void *ptr; uint32_t value; + assert(baridx == 0); + if (offset + size > VTBLK_REGSZ) { - DPRINTF(("vtblk_read: 2big, offset %d size %d\n", + DPRINTF(("vtblk_read: 2big, offset %ld size %d\n", offset, size)); return (0); } @@ -493,7 +498,7 @@ pci_vtblk_read(struct pci_devinst *pi, int baridx, int offset, int size) } break; default: - DPRINTF(("vtblk: unknown i/o read offset %d\n\r", offset)); + DPRINTF(("vtblk: unknown i/o read offset %ld\n\r", offset)); value = 0; break; } @@ -502,9 +507,9 @@ pci_vtblk_read(struct pci_devinst *pi, int baridx, int offset, int size) } struct pci_devemu pci_de_vblk = { - .pe_emu = "virtio-blk", - .pe_init = pci_vtblk_init, - .pe_iow = pci_vtblk_write, - .pe_ior = pci_vtblk_read, + .pe_emu = "virtio-blk", + .pe_init = pci_vtblk_init, + .pe_barwrite = pci_vtblk_write, + .pe_barread = pci_vtblk_read }; PCI_EMUL_SET(pci_de_vblk); diff --git a/usr.sbin/bhyve/pci_virtio_net.c b/usr.sbin/bhyve/pci_virtio_net.c index 6160b14..26c8ee8 100644 --- a/usr.sbin/bhyve/pci_virtio_net.c +++ b/usr.sbin/bhyve/pci_virtio_net.c @@ -574,8 +574,8 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET); - pci_emul_alloc_bar(pi, 0, 0, PCIBAR_IO, VTNET_REGSZ); pci_emul_add_msicap(pi, 1); + pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VTNET_REGSZ); return (0); } @@ -590,14 +590,16 @@ static void (*pci_vtnet_qnotify[VTNET_MAXQ])(struct pci_vtnet_softc *) = { }; static void -pci_vtnet_write(struct pci_devinst *pi, int baridx, int offset, int size, - uint32_t value) +pci_vtnet_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, + int baridx, uint64_t offset, int size, uint64_t value) { struct pci_vtnet_softc *sc = pi->pi_arg; void *ptr; + assert(baridx == 0); + if (offset + size > VTNET_REGSZ) { - DPRINTF(("vtnet_write: 2big, offset %d size %d\n", + DPRINTF(("vtnet_write: 2big, offset %ld size %d\n", offset, size)); return; } @@ -652,10 +654,10 @@ pci_vtnet_write(struct pci_devinst *pi, int baridx, int offset, int size, case VTCFG_R_ISR: case VTNET_R_CFG6: case VTNET_R_CFG7: - DPRINTF(("vtnet: write to readonly reg %d\n\r", offset)); + DPRINTF(("vtnet: write to readonly reg %ld\n\r", offset)); break; default: - DPRINTF(("vtnet: unknown i/o write offset %d\n\r", offset)); + DPRINTF(("vtnet: unknown i/o write offset %ld\n\r", offset)); value = 0; break; } @@ -663,15 +665,18 @@ pci_vtnet_write(struct pci_devinst *pi, int baridx, int offset, int size, pthread_mutex_unlock(&sc->vsc_mtx); } -uint32_t -pci_vtnet_read(struct pci_devinst *pi, int baridx, int offset, int size) +uint64_t +pci_vtnet_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, + int baridx, uint64_t offset, int size) { struct pci_vtnet_softc *sc = pi->pi_arg; void *ptr; - uint32_t value; + uint64_t value; + + assert(baridx == 0); if (offset + size > VTNET_REGSZ) { - DPRINTF(("vtnet_read: 2big, offset %d size %d\n", + DPRINTF(("vtnet_read: 2big, offset %ld size %d\n", offset, size)); return (0); } @@ -737,7 +742,7 @@ pci_vtnet_read(struct pci_devinst *pi, int baridx, int offset, int size) value = 0; /* XXX link status in LSB */ break; default: - DPRINTF(("vtnet: unknown i/o read offset %d\n\r", offset)); + DPRINTF(("vtnet: unknown i/o read offset %ld\n\r", offset)); value = 0; break; } @@ -748,9 +753,9 @@ pci_vtnet_read(struct pci_devinst *pi, int baridx, int offset, int size) } struct pci_devemu pci_de_vnet = { - .pe_emu = "virtio-net", - .pe_init = pci_vtnet_init, - .pe_iow = pci_vtnet_write, - .pe_ior = pci_vtnet_read, + .pe_emu = "virtio-net", + .pe_init = pci_vtnet_init, + .pe_barwrite = pci_vtnet_write, + .pe_barread = pci_vtnet_read }; PCI_EMUL_SET(pci_de_vnet); -- cgit v1.1 From f5d9223df5e3346dffa11447825ecfb306814df6 Mon Sep 17 00:00:00 2001 From: neel Date: Thu, 25 Oct 2012 03:39:36 +0000 Subject: Fix typo: host_rip -> host_rsp Obtained from: NetApp --- usr.sbin/vmmctl/vmmctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/vmmctl/vmmctl.c b/usr.sbin/vmmctl/vmmctl.c index 331bd728..d5e0503 100644 --- a/usr.sbin/vmmctl/vmmctl.c +++ b/usr.sbin/vmmctl/vmmctl.c @@ -1404,7 +1404,7 @@ main(int argc, char *argv[]) if (!error && (get_host_rsp || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_RSP, &rsp); if (error == 0) - printf("host_rip[%d]\t\t0x%016lx\n", vcpu, rsp); + printf("host_rsp[%d]\t\t0x%016lx\n", vcpu, rsp); } if (!error && (get_guest_sysenter || get_all)) { -- cgit v1.1 From 1372a368e0c75317b5fcf37426faaabc995735c9 Mon Sep 17 00:00:00 2001 From: grehan Date: Fri, 26 Oct 2012 13:40:12 +0000 Subject: Remove mptable generation code from libvmmapi and move it to bhyve. Firmware tables require too much knowledge of system configuration, and it's difficult to pass that information in general terms to a library. The upcoming ACPI work exposed this - it will also livein bhyve. Also, remove code specific to NetApp from the mptable name, and remove the -n option from bhyve. Reviewed by: neel Obtained from: NetApp --- usr.sbin/bhyve/Makefile | 2 +- usr.sbin/bhyve/fbsdrun.c | 17 +- usr.sbin/bhyve/fbsdrun.h | 1 - usr.sbin/bhyve/mptbl.c | 398 ++++++++++++++++++++++++++++++++++++++++++++++ usr.sbin/bhyve/mptbl.h | 35 ++++ usr.sbin/bhyve/pci_emul.c | 202 +---------------------- usr.sbin/bhyve/pci_emul.h | 1 - 7 files changed, 437 insertions(+), 219 deletions(-) create mode 100644 usr.sbin/bhyve/mptbl.c create mode 100644 usr.sbin/bhyve/mptbl.h (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile index 72d60ae..3cae422 100644 --- a/usr.sbin/bhyve/Makefile +++ b/usr.sbin/bhyve/Makefile @@ -5,7 +5,7 @@ PROG= bhyve SRCS= atpic.c consport.c dbgport.c elcr.c fbsdrun.c inout.c -SRCS+= instruction_emul.c ioapic.c mem.c mevent.c +SRCS+= instruction_emul.c ioapic.c mem.c mevent.c mptbl.c SRCS+= pci_emul.c pci_hostbridge.c pci_passthru.c pci_virtio_block.c SRCS+= pci_virtio_net.c pci_uart.c pit_8254.c post.c rtc.c uart.c xmsr.c SRCS+= spinup_ap.c diff --git a/usr.sbin/bhyve/fbsdrun.c b/usr.sbin/bhyve/fbsdrun.c index d7061f9..9905f19 100644 --- a/usr.sbin/bhyve/fbsdrun.c +++ b/usr.sbin/bhyve/fbsdrun.c @@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$"); #include "dbgport.h" #include "mem.h" #include "mevent.h" +#include "mptbl.h" #include "pci_emul.h" #include "xmsr.h" #include "instruction_emul.h" @@ -99,9 +100,6 @@ static const int BSP = 0; static int cpumask; -static void *oem_tbl_start; -static int oem_tbl_size; - static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); struct vm_exit vmexit[VM_MAXCPU]; @@ -144,7 +142,6 @@ usage(int code) " -z: guest hz (default is %d)\n" " -s: PCI slot config\n" " -S: legacy PCI slot config\n" - " -n: PCI slot naming\n" " -m: lowmem in MB\n" " -M: highmem in MB\n" " -x: mux vcpus to 1 hcpu\n" @@ -168,13 +165,6 @@ paddr_guest2host(uintptr_t gaddr) return (NULL); } -void -fbsdrun_add_oemtbl(void *tbl, int tblsz) -{ - oem_tbl_start = tbl; - oem_tbl_size = tblsz; -} - int fbsdrun_disable_x2apic(void) { @@ -615,9 +605,6 @@ main(int argc, char *argv[]) case 'S': pci_parse_slot(optarg, 1); break; - case 'n': - pci_parse_name(optarg); - break; case 'm': lomem_sz = strtoul(optarg, NULL, 0) * MB; break; @@ -731,7 +718,7 @@ main(int argc, char *argv[]) /* * build the guest tables, MP etc. */ - vm_build_tables(ctx, guest_ncpus, ioapic, oem_tbl_start, oem_tbl_size); + mptable_build(ctx, guest_ncpus, ioapic); /* * Add CPU 0 diff --git a/usr.sbin/bhyve/fbsdrun.h b/usr.sbin/bhyve/fbsdrun.h index 0b9fe7d..45033b8 100644 --- a/usr.sbin/bhyve/fbsdrun.h +++ b/usr.sbin/bhyve/fbsdrun.h @@ -46,7 +46,6 @@ extern u_long lomem_sz, himem_sz; void *paddr_guest2host(uintptr_t); void fbsdrun_addcpu(struct vmctx *ctx, int cpu, uint64_t rip); -void fbsdrun_add_oemtbl(void *tbl, int tblsz); int fbsdrun_muxed(void); int fbsdrun_vmexit_on_hlt(void); int fbsdrun_vmexit_on_pause(void); diff --git a/usr.sbin/bhyve/mptbl.c b/usr.sbin/bhyve/mptbl.c new file mode 100644 index 0000000..03aaaee --- /dev/null +++ b/usr.sbin/bhyve/mptbl.c @@ -0,0 +1,398 @@ +/*- + * Copyright (c) 2012 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include + +#include +#include + +#include "fbsdrun.h" +#include "mptbl.h" + +#define MPTABLE_BASE 0xF0000 + +#define LAPIC_PADDR 0xFEE00000 +#define LAPIC_VERSION 16 + +#define IOAPIC_PADDR 0xFEC00000 +#define IOAPIC_VERSION 0x11 + +#define MP_SPECREV 4 +#define MPFP_SIG "_MP_" + +/* Configuration header defines */ +#define MPCH_SIG "PCMP" +#define MPCH_OEMID "BHyVe " +#define MPCH_OEMID_LEN 8 +#define MPCH_PRODID "Hypervisor " +#define MPCH_PRODID_LEN 12 + +/* Processor entry defines */ +#define MPEP_SIG_FAMILY 6 /* XXX bhyve should supply this */ +#define MPEP_SIG_MODEL 26 +#define MPEP_SIG_STEPPING 5 +#define MPEP_SIG \ + ((MPEP_SIG_FAMILY << 8) | \ + (MPEP_SIG_MODEL << 4) | \ + (MPEP_SIG_STEPPING)) + +#define MPEP_FEATURES (0xBFEBFBFF) /* XXX Intel i7 */ + +/* Define processor entry struct since gets it wrong */ +typedef struct BPROCENTRY { + u_char type; + u_char apic_id; + u_char apic_version; + u_char cpu_flags; + uint32_t cpu_signature; + uint32_t feature_flags; + uint32_t reserved1; + uint32_t reserved2; +} *bproc_entry_ptr; +CTASSERT(sizeof(struct BPROCENTRY) == 20); + +/* Bus entry defines */ +#define MPE_NUM_BUSES 2 +#define MPE_BUSNAME_LEN 6 +#define MPE_BUSNAME_ISA "ISA " +#define MPE_BUSNAME_PCI "PCI " + +static void *oem_tbl_start; +static int oem_tbl_size; + +static uint8_t +mpt_compute_checksum(void *base, size_t len) +{ + uint8_t *bytes; + uint8_t sum; + + for(bytes = base, sum = 0; len > 0; len--) { + sum += *bytes++; + } + + return (256 - sum); +} + +static void +mpt_build_mpfp(mpfps_t mpfp, vm_paddr_t gpa) +{ + + memset(mpfp, 0, sizeof(*mpfp)); + memcpy(mpfp->signature, MPFP_SIG, 4); + mpfp->pap = gpa + sizeof(*mpfp); + mpfp->length = 1; + mpfp->spec_rev = MP_SPECREV; + mpfp->checksum = mpt_compute_checksum(mpfp, sizeof(*mpfp)); +} + +static void +mpt_build_mpch(mpcth_t mpch) +{ + + memset(mpch, 0, sizeof(*mpch)); + memcpy(mpch->signature, MPCH_SIG, 4); + mpch->spec_rev = MP_SPECREV; + memcpy(mpch->oem_id, MPCH_OEMID, MPCH_OEMID_LEN); + memcpy(mpch->product_id, MPCH_PRODID, MPCH_PRODID_LEN); + mpch->apic_address = LAPIC_PADDR; +} + +static void +mpt_build_proc_entries(bproc_entry_ptr mpep, int ncpu) +{ + int i; + + for (i = 0; i < ncpu; i++) { + memset(mpep, 0, sizeof(*mpep)); + mpep->type = MPCT_ENTRY_PROCESSOR; + mpep->apic_id = i; // XXX + mpep->apic_version = LAPIC_VERSION; + mpep->cpu_flags = PROCENTRY_FLAG_EN; + if (i == 0) + mpep->cpu_flags |= PROCENTRY_FLAG_BP; + mpep->cpu_signature = MPEP_SIG; + mpep->feature_flags = MPEP_FEATURES; + mpep++; + } +} + +static void +mpt_build_bus_entries(bus_entry_ptr mpeb) +{ + + memset(mpeb, 0, sizeof(*mpeb)); + mpeb->type = MPCT_ENTRY_BUS; + mpeb->bus_id = ISA; + memcpy(mpeb->bus_type, MPE_BUSNAME_ISA, MPE_BUSNAME_LEN); + mpeb++; + + memset(mpeb, 0, sizeof(*mpeb)); + mpeb->type = MPCT_ENTRY_BUS; + mpeb->bus_id = PCI; + memcpy(mpeb->bus_type, MPE_BUSNAME_PCI, MPE_BUSNAME_LEN); +} + +static void +mpt_build_ioapic_entries(io_apic_entry_ptr mpei, int id) +{ + + memset(mpei, 0, sizeof(*mpei)); + mpei->type = MPCT_ENTRY_IOAPIC; + mpei->apic_id = id; + mpei->apic_version = IOAPIC_VERSION; + mpei->apic_flags = IOAPICENTRY_FLAG_EN; + mpei->apic_address = IOAPIC_PADDR; +} + +#ifdef notyet +static void +mpt_build_ioint_entries(struct mpe_ioint *mpeii, int num_pins, int id) +{ + int pin; + + /* + * The following config is taken from kernel mptable.c + * mptable_parse_default_config_ints(...), for now + * just use the default config, tweek later if needed. + */ + + + /* Run through all 16 pins. */ + for (pin = 0; pin < num_pins; pin++) { + memset(mpeii, 0, sizeof(*mpeii)); + mpeii->entry_type = MP_ENTRY_IOINT; + mpeii->src_bus_id = MPE_BUSID_ISA; + mpeii->dst_apic_id = id; + + /* + * All default configs route IRQs from bus 0 to the first 16 + * pins of the first I/O APIC with an APIC ID of 2. + */ + mpeii->dst_apic_intin = pin; + switch (pin) { + case 0: + /* Pin 0 is an ExtINT pin. */ + mpeii->intr_type = MPEII_INTR_EXTINT; + break; + case 2: + /* IRQ 0 is routed to pin 2. */ + mpeii->intr_type = MPEII_INTR_INT; + mpeii->src_bus_irq = 0; + break; + case 5: + case 10: + case 11: + /* + * PCI Irqs set to level triggered. + */ + mpeii->intr_flags = MPEII_FLAGS_TRIGMODE_LEVEL; + mpeii->src_bus_id = MPE_BUSID_PCI; + default: + /* All other pins are identity mapped. */ + mpeii->intr_type = MPEII_INTR_INT; + mpeii->src_bus_irq = pin; + break; + } + mpeii++; + } + +} + +#define COPYSTR(dest, src, bytes) \ + memcpy(dest, src, bytes); \ + str[bytes] = 0; + +static void +mptable_dump(struct mp_floating_pointer *mpfp, struct mp_config_hdr *mpch) +{ + static char str[16]; + int i; + char *cur; + + union mpe { + struct mpe_proc *proc; + struct mpe_bus *bus; + struct mpe_ioapic *ioapic; + struct mpe_ioint *ioint; + struct mpe_lint *lnit; + char *p; + }; + + union mpe mpe; + + printf(" MP Floating Pointer :\n"); + COPYSTR(str, mpfp->signature, 4); + printf("\tsignature:\t%s\n", str); + printf("\tmpch paddr:\t%x\n", mpfp->mptable_paddr); + printf("\tlength:\t%x\n", mpfp->length); + printf("\tspecrec:\t%x\n", mpfp->specrev); + printf("\tchecksum:\t%x\n", mpfp->checksum); + printf("\tfeature1:\t%x\n", mpfp->feature1); + printf("\tfeature2:\t%x\n", mpfp->feature2); + printf("\tfeature3:\t%x\n", mpfp->feature3); + printf("\tfeature4:\t%x\n", mpfp->feature4); + + printf(" MP Configuration Header :\n"); + COPYSTR(str, mpch->signature, 4); + printf(" signature: %s\n", str); + printf(" length: %x\n", mpch->length); + printf(" specrec: %x\n", mpch->specrev); + printf(" checksum: %x\n", mpch->checksum); + COPYSTR(str, mpch->oemid, MPCH_OEMID_LEN); + printf(" oemid: %s\n", str); + COPYSTR(str, mpch->prodid, MPCH_PRODID_LEN); + printf(" prodid: %s\n", str); + printf(" oem_ptr: %x\n", mpch->oem_ptr); + printf(" oem_sz: %x\n", mpch->oem_sz); + printf(" nr_entries: %x\n", mpch->nr_entries); + printf(" apic paddr: %x\n", mpch->lapic_paddr); + printf(" ext_length: %x\n", mpch->ext_length); + printf(" ext_checksum: %x\n", mpch->ext_checksum); + + cur = (char *)mpch + sizeof(*mpch); + for (i = 0; i < mpch->nr_entries; i++) { + mpe.p = cur; + switch(*mpe.p) { + case MP_ENTRY_PROC: + printf(" MP Processor Entry :\n"); + printf(" lapic_id: %x\n", mpe.proc->lapic_id); + printf(" lapic_version: %x\n", mpe.proc->lapic_version); + printf(" proc_flags: %x\n", mpe.proc->proc_flags); + printf(" proc_signature: %x\n", mpe.proc->proc_signature); + printf(" feature_flags: %x\n", mpe.proc->feature_flags); + cur += sizeof(struct mpe_proc); + break; + case MP_ENTRY_BUS: + printf(" MP Bus Entry :\n"); + printf(" busid: %x\n", mpe.bus->busid); + COPYSTR(str, mpe.bus->busname, MPE_BUSNAME_LEN); + printf(" busname: %s\n", str); + cur += sizeof(struct mpe_bus); + break; + case MP_ENTRY_IOAPIC: + printf(" MP IOAPIC Entry :\n"); + printf(" ioapi_id: %x\n", mpe.ioapic->ioapic_id); + printf(" ioapi_version: %x\n", mpe.ioapic->ioapic_version); + printf(" ioapi_flags: %x\n", mpe.ioapic->ioapic_flags); + printf(" ioapi_paddr: %x\n", mpe.ioapic->ioapic_paddr); + cur += sizeof(struct mpe_ioapic); + break; + case MP_ENTRY_IOINT: + printf(" MP IO Interrupt Entry :\n"); + printf(" intr_type: %x\n", mpe.ioint->intr_type); + printf(" intr_flags: %x\n", mpe.ioint->intr_flags); + printf(" src_bus_id: %x\n", mpe.ioint->src_bus_id); + printf(" src_bus_irq: %x\n", mpe.ioint->src_bus_irq); + printf(" dst_apic_id: %x\n", mpe.ioint->dst_apic_id); + printf(" dst_apic_intin: %x\n", mpe.ioint->dst_apic_intin); + cur += sizeof(struct mpe_ioint); + break; + case MP_ENTRY_LINT: + printf(" MP Local Interrupt Entry :\n"); + cur += sizeof(struct mpe_lint); + break; + } + + } +} +#endif + +void +mptable_add_oemtbl(void *tbl, int tblsz) +{ + + oem_tbl_start = tbl; + oem_tbl_size = tblsz; +} + +int +mptable_build(struct vmctx *ctx, int ncpu, int ioapic) +{ + mpcth_t mpch; + bus_entry_ptr mpeb; + io_apic_entry_ptr mpei; + bproc_entry_ptr mpep; + mpfps_t mpfp; + char *curraddr; + char *startaddr; + + if (paddr_guest2host(0) == NULL) { + printf("mptable requires mapped mem\n"); + return (ENOMEM); + } + + startaddr = curraddr = paddr_guest2host(MPTABLE_BASE); + + mpfp = (mpfps_t)curraddr; + mpt_build_mpfp(mpfp, MPTABLE_BASE); + curraddr += sizeof(*mpfp); + + mpch = (mpcth_t)curraddr; + mpt_build_mpch(mpch); + curraddr += sizeof(*mpch); + + mpep = (bproc_entry_ptr)curraddr; + mpt_build_proc_entries(mpep, ncpu); + curraddr += sizeof(*mpep) * ncpu; + mpch->entry_count += ncpu; + + mpeb = (bus_entry_ptr) curraddr; + mpt_build_bus_entries(mpeb); + curraddr += sizeof(*mpeb) * MPE_NUM_BUSES; + mpch->entry_count += MPE_NUM_BUSES; + + if (ioapic) { + mpei = (io_apic_entry_ptr)curraddr; + mpt_build_ioapic_entries(mpei, ncpu + 1); + curraddr += sizeof(*mpei); + mpch->entry_count++; + } + +#ifdef notyet + mpt_build_ioint_entries((struct mpe_ioint*)curraddr, MPEII_MAX_IRQ, + ncpu + 1); + curraddr += sizeof(struct mpe_ioint) * MPEII_MAX_IRQ; + mpch->entry_count += MPEII_MAX_IRQ; +#endif + + if (oem_tbl_start) { + mpch->oem_table_pointer = curraddr - startaddr + MPTABLE_BASE; + mpch->oem_table_size = oem_tbl_size; + memcpy(curraddr, oem_tbl_start, oem_tbl_size); + } + + mpch->base_table_length = curraddr - (char *)mpch; + mpch->checksum = mpt_compute_checksum(mpch, sizeof(*mpch)); + + return (0); +} diff --git a/usr.sbin/bhyve/mptbl.h b/usr.sbin/bhyve/mptbl.h new file mode 100644 index 0000000..3c4c527 --- /dev/null +++ b/usr.sbin/bhyve/mptbl.h @@ -0,0 +1,35 @@ +/*- + * Copyright (c) 2012 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _MPTBL_H_ +#define _MPTBL_H_ + +int mptable_build(struct vmctx *ctx, int ncpu, int ioapic); +void mptable_add_oemtbl(void *tbl, int tblsz); + +#endif /* _MPTBL_H_ */ diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c index 06fbcc8..f9d75e3 100644 --- a/usr.sbin/bhyve/pci_emul.c +++ b/usr.sbin/bhyve/pci_emul.c @@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$"); #include "fbsdrun.h" #include "inout.h" #include "mem.h" +#include "mptbl.h" #include "pci_emul.h" #include "ioapic.h" @@ -69,10 +70,6 @@ static struct slotinfo { char *si_name; char *si_param; struct pci_devinst *si_devi; - int si_titled; - int si_pslot; - char si_prefix; - char si_suffix; int si_legacy; } pci_slotinfo[MAXSLOTS][MAXFUNCS]; @@ -87,37 +84,6 @@ static struct lirqinfo { struct pci_devinst *li_owner; /* XXX should be a list */ } lirq[NLIRQ]; -/* - * NetApp specific: - * struct used to build an in-core OEM table to supply device names - * to driver instances - */ -static struct mptable_pci_devnames { -#define MPT_HDR_BASE 0 -#define MPT_HDR_NAME 2 - uint16_t md_hdrtype; - uint16_t md_entries; - uint16_t md_cksum; - uint16_t md_pad; -#define MPT_NTAP_SIG \ - ((uint32_t)(('P' << 24) | ('A' << 16) | ('T' << 8) | 'N')) - uint32_t md_sig; - uint32_t md_rsvd; - struct mptable_pci_slotinfo { - uint16_t mds_type; - uint16_t mds_phys_slot; - uint8_t mds_bus; - uint8_t mds_slot; - uint8_t mds_func; - uint8_t mds_pad; - uint16_t mds_vid; - uint16_t mds_did; - uint8_t mds_suffix[4]; - uint8_t mds_prefix[4]; - uint32_t mds_rsvd[3]; - } md_slotinfo[MAXSLOTS * MAXFUNCS]; -} pci_devnames; - SET_DECLARE(pci_devemu_set, struct pci_devemu); static uint64_t pci_emul_iobase; @@ -134,7 +100,6 @@ static uint64_t pci_emul_membase64; #define PCI_EMUL_MEMLIMIT64 0xFD00000000UL static int pci_emul_devices; -static int devname_elems; /* * I/O access @@ -201,169 +166,6 @@ pci_parse_slot(char *opt, int legacy) } } -/* - * - * PCI MPTable names are of the form: - * - * [:],[prefix] - * - * .. with an alphabetic char, a 1 or 2-digit string, - * and a single char. - * - * Examples: - * 1,e0c - * 4:0,e0P - * 4:1,e0M - * 6,43a - * 7,0f - * 10,1 - * 2,12a - * - * Note that this is NetApp-specific, but is ignored on other o/s's. - */ -static void -pci_parse_name_usage(char *aopt) -{ - printf("Invalid PCI slot name field \"%s\"\n", aopt); -} - -void -pci_parse_name(char *opt) -{ - char csnum[4]; - char *namestr; - char *slotend, *funcend, *funcstart; - char prefix, suffix; - int i; - int pslot; - int snum, fnum; - - pslot = -1; - prefix = suffix = 0; - - slotend = strchr(opt, ':'); - if (slotend != NULL) { - funcstart = slotend + 1; - funcend = strchr(funcstart, ','); - } else { - slotend = strchr(opt, ','); - funcstart = funcend = NULL; - } - - /* - * A comma must be present, and can't be the first character - * or no slot would be present. Also, the slot number can't be - * more than 2 characters. - */ - if (slotend == NULL || slotend == opt || (slotend - opt > 2)) { - pci_parse_name_usage(opt); - return; - } - - for (i = 0; i < (slotend - opt); i++) { - csnum[i] = opt[i]; - } - csnum[i] = '\0'; - - snum = atoi(csnum); - if (snum < 0 || snum >= MAXSLOTS) { - pci_parse_name_usage(opt); - return; - } - - /* - * Parse the function number (if provided) - * - * A comma must be present and can't be the first character. - * The function cannot be greater than a single character and - * must be between '0' and '7' inclusive. - */ - if (funcstart != NULL) { - if (funcend == NULL || funcend != funcstart + 1 || - *funcstart < '0' || *funcstart > '7') { - pci_parse_name_usage(opt); - return; - } - fnum = *funcstart - '0'; - } else { - fnum = 0; - } - - namestr = funcend ? funcend + 1 : slotend + 1; - - if (strlen(namestr) > 3) { - pci_parse_name_usage(opt); - return; - } - - if (isalpha(*namestr)) { - prefix = *namestr++; - } - - if (!isdigit(*namestr)) { - pci_parse_name_usage(opt); - } else { - pslot = *namestr++ - '0'; - if (isnumber(*namestr)) { - pslot = 10*pslot + *namestr++ - '0'; - - } - if (isalpha(*namestr) && *(namestr + 1) == 0) { - suffix = *namestr; - pci_slotinfo[snum][fnum].si_titled = 1; - pci_slotinfo[snum][fnum].si_pslot = pslot; - pci_slotinfo[snum][fnum].si_prefix = prefix; - pci_slotinfo[snum][fnum].si_suffix = suffix; - } else { - pci_parse_name_usage(opt); - } - } -} - -static void -pci_add_mptable_name(struct slotinfo *si) -{ - struct mptable_pci_slotinfo *ms; - - /* - * If naming information has been supplied for this slot, populate - * the next available mptable OEM entry - */ - if (si->si_titled) { - ms = &pci_devnames.md_slotinfo[devname_elems]; - - ms->mds_type = MPT_HDR_NAME; - ms->mds_phys_slot = si->si_pslot; - ms->mds_bus = si->si_devi->pi_bus; - ms->mds_slot = si->si_devi->pi_slot; - ms->mds_func = si->si_devi->pi_func; - ms->mds_vid = pci_get_cfgdata16(si->si_devi, PCIR_VENDOR); - ms->mds_did = pci_get_cfgdata16(si->si_devi, PCIR_DEVICE); - ms->mds_suffix[0] = si->si_suffix; - ms->mds_prefix[0] = si->si_prefix; - - devname_elems++; - } -} - -static void -pci_finish_mptable_names(void) -{ - int size; - - if (devname_elems) { - pci_devnames.md_hdrtype = MPT_HDR_BASE; - pci_devnames.md_entries = devname_elems; - pci_devnames.md_cksum = 0; /* XXX */ - pci_devnames.md_sig = MPT_NTAP_SIG; - - size = (uintptr_t)&pci_devnames.md_slotinfo[devname_elems] - - (uintptr_t)&pci_devnames; - - fbsdrun_add_oemtbl(&pci_devnames, size); - } -} - static int pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, uint32_t *eax, void *arg) @@ -835,12 +637,10 @@ init_pci(struct vmctx *ctx) if (pde != NULL) { pci_emul_init(ctx, pde, slot, func, si->si_param); - pci_add_mptable_name(si); } } } } - pci_finish_mptable_names(); /* * Allow ISA IRQs 5,10,11,12, and 15 to be available for diff --git a/usr.sbin/bhyve/pci_emul.h b/usr.sbin/bhyve/pci_emul.h index 79b86d1..e924475 100644 --- a/usr.sbin/bhyve/pci_emul.h +++ b/usr.sbin/bhyve/pci_emul.h @@ -168,7 +168,6 @@ int pci_lintr_request(struct pci_devinst *pi, int ivec); int pci_msi_enabled(struct pci_devinst *pi); int pci_msix_enabled(struct pci_devinst *pi); int pci_msi_msgnum(struct pci_devinst *pi); -void pci_parse_name(char *opt); void pci_parse_slot(char *opt, int legacy); void pci_populate_msicap(struct msicap *cap, int msgs, int nextptr); -- cgit v1.1 From ed9d132c4e9238eb85db69fb4223ddea58b61fed Mon Sep 17 00:00:00 2001 From: grehan Date: Sat, 27 Oct 2012 02:10:45 +0000 Subject: Rename vmmctl to bhyvectl. 'vmmctl' came from a pre-bhyve internal codebase at NetApp. No need for it to have an unrelated name to the other userspace utils. Reviewed by: neel Obtained from: NetApp --- usr.sbin/Makefile.amd64 | 2 +- usr.sbin/bhyvectl/Makefile | 17 + usr.sbin/bhyvectl/bhyvectl.c | 1524 ++++++++++++++++++++++++++++++++++++++++++ usr.sbin/bhyvectl/sample.sh | 75 +++ usr.sbin/vmmctl/Makefile | 17 - usr.sbin/vmmctl/sample.sh | 75 --- usr.sbin/vmmctl/vmmctl.c | 1524 ------------------------------------------ 7 files changed, 1617 insertions(+), 1617 deletions(-) create mode 100644 usr.sbin/bhyvectl/Makefile create mode 100644 usr.sbin/bhyvectl/bhyvectl.c create mode 100755 usr.sbin/bhyvectl/sample.sh delete mode 100644 usr.sbin/vmmctl/Makefile delete mode 100755 usr.sbin/vmmctl/sample.sh delete mode 100644 usr.sbin/vmmctl/vmmctl.c (limited to 'usr.sbin') diff --git a/usr.sbin/Makefile.amd64 b/usr.sbin/Makefile.amd64 index e8f3bc0..5fbf86d 100644 --- a/usr.sbin/Makefile.amd64 +++ b/usr.sbin/Makefile.amd64 @@ -32,5 +32,5 @@ SUBDIR+= spkrtest .if ${MK_SYSINSTALL} != "no" SUBDIR+= sade .endif -SUBDIR+= vmmctl +SUBDIR+= bhyvectl SUBDIR+= zzz diff --git a/usr.sbin/bhyvectl/Makefile b/usr.sbin/bhyvectl/Makefile new file mode 100644 index 0000000..9fde12c --- /dev/null +++ b/usr.sbin/bhyvectl/Makefile @@ -0,0 +1,17 @@ +# +# $FreeBSD$ +# + +PROG= bhyvectl +SRCS= bhyvectl.c + +NO_MAN= + +DPADD= ${LIBVMMAPI} +LDADD= -lvmmapi + +WARNS?= 3 + +CFLAGS+= -I${.CURDIR}/../../sys/amd64/vmm + +.include diff --git a/usr.sbin/bhyvectl/bhyvectl.c b/usr.sbin/bhyvectl/bhyvectl.c new file mode 100644 index 0000000..d5e0503 --- /dev/null +++ b/usr.sbin/bhyvectl/bhyvectl.c @@ -0,0 +1,1524 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "intel/vmcs.h" + +#define MB (1UL << 20) +#define GB (1UL << 30) + +#define REQ_ARG required_argument +#define NO_ARG no_argument +#define OPT_ARG optional_argument + +static const char *progname; + +static void +usage(void) +{ + + (void)fprintf(stderr, + "Usage: %s --vm=\n" + " [--cpu=]\n" + " [--create]\n" + " [--destroy]\n" + " [--get-all]\n" + " [--get-stats]\n" + " [--set-desc-ds]\n" + " [--get-desc-ds]\n" + " [--set-desc-es]\n" + " [--get-desc-es]\n" + " [--set-desc-gs]\n" + " [--get-desc-gs]\n" + " [--set-desc-fs]\n" + " [--get-desc-fs]\n" + " [--set-desc-cs]\n" + " [--get-desc-cs]\n" + " [--set-desc-ss]\n" + " [--get-desc-ss]\n" + " [--set-desc-tr]\n" + " [--get-desc-tr]\n" + " [--set-desc-ldtr]\n" + " [--get-desc-ldtr]\n" + " [--set-desc-gdtr]\n" + " [--get-desc-gdtr]\n" + " [--set-desc-idtr]\n" + " [--get-desc-idtr]\n" + " [--run]\n" + " [--capname=]\n" + " [--getcap]\n" + " [--setcap=<0|1>]\n" + " [--desc-base=]\n" + " [--desc-limit=]\n" + " [--desc-access=]\n" + " [--set-cr0=]\n" + " [--get-cr0]\n" + " [--set-cr3=]\n" + " [--get-cr3]\n" + " [--set-cr4=]\n" + " [--get-cr4]\n" + " [--set-dr7=]\n" + " [--get-dr7]\n" + " [--set-rsp=]\n" + " [--get-rsp]\n" + " [--set-rip=]\n" + " [--get-rip]\n" + " [--get-rax]\n" + " [--set-rax=]\n" + " [--get-rbx]\n" + " [--get-rcx]\n" + " [--get-rdx]\n" + " [--get-rsi]\n" + " [--get-rdi]\n" + " [--get-rbp]\n" + " [--get-r8]\n" + " [--get-r9]\n" + " [--get-r10]\n" + " [--get-r11]\n" + " [--get-r12]\n" + " [--get-r13]\n" + " [--get-r14]\n" + " [--get-r15]\n" + " [--set-rflags=]\n" + " [--get-rflags]\n" + " [--set-cs]\n" + " [--get-cs]\n" + " [--set-ds]\n" + " [--get-ds]\n" + " [--set-es]\n" + " [--get-es]\n" + " [--set-fs]\n" + " [--get-fs]\n" + " [--set-gs]\n" + " [--get-gs]\n" + " [--set-ss]\n" + " [--get-ss]\n" + " [--get-tr]\n" + " [--get-ldtr]\n" + " [--get-vmcs-pinbased-ctls]\n" + " [--get-vmcs-procbased-ctls]\n" + " [--get-vmcs-procbased-ctls2]\n" + " [--get-vmcs-entry-interruption-info]\n" + " [--set-vmcs-entry-interruption-info=]\n" + " [--get-vmcs-eptp]\n" + " [--get-vmcs-guest-physical-address\n" + " [--get-vmcs-guest-linear-address\n" + " [--set-vmcs-exception-bitmap]\n" + " [--get-vmcs-exception-bitmap]\n" + " [--get-vmcs-io-bitmap-address]\n" + " [--get-vmcs-tsc-offset]\n" + " [--get-vmcs-guest-pat]\n" + " [--get-vmcs-host-pat]\n" + " [--get-vmcs-host-cr0]\n" + " [--get-vmcs-host-cr3]\n" + " [--get-vmcs-host-cr4]\n" + " [--get-vmcs-host-rip]\n" + " [--get-vmcs-host-rsp]\n" + " [--get-vmcs-cr0-mask]\n" + " [--get-vmcs-cr0-shadow]\n" + " [--get-vmcs-cr4-mask]\n" + " [--get-vmcs-cr4-shadow]\n" + " [--get-vmcs-cr3-targets]\n" + " [--get-vmcs-apic-access-address]\n" + " [--get-vmcs-virtual-apic-address]\n" + " [--get-vmcs-tpr-threshold]\n" + " [--get-vmcs-msr-bitmap]\n" + " [--get-vmcs-msr-bitmap-address]\n" + " [--get-vmcs-vpid]\n" + " [--get-vmcs-ple-gap]\n" + " [--get-vmcs-ple-window]\n" + " [--get-vmcs-instruction-error]\n" + " [--get-vmcs-exit-ctls]\n" + " [--get-vmcs-entry-ctls]\n" + " [--get-vmcs-guest-sysenter]\n" + " [--get-vmcs-link]\n" + " [--get-vmcs-exit-reason]\n" + " [--get-vmcs-exit-qualification]\n" + " [--get-vmcs-exit-interruption-info]\n" + " [--get-vmcs-exit-interruption-error]\n" + " [--get-vmcs-interruptibility]\n" + " [--set-pinning=]\n" + " [--get-pinning]\n" + " [--set-x2apic-state=]\n" + " [--get-x2apic-state]\n" + " [--set-lowmem=]\n" + " [--get-lowmem]\n" + " [--set-highmem=]\n" + " [--get-highmem]\n", + progname); + exit(1); +} + +static int get_stats, getcap, setcap, capval; +static const char *capname; +static int create, destroy, get_lowmem, get_highmem; +static uint64_t lowmem, highmem; +static int set_cr0, get_cr0, set_cr3, get_cr3, set_cr4, get_cr4; +static int set_efer, get_efer; +static int set_dr7, get_dr7; +static int set_rsp, get_rsp, set_rip, get_rip, set_rflags, get_rflags; +static int set_rax, get_rax; +static int get_rbx, get_rcx, get_rdx, get_rsi, get_rdi, get_rbp; +static int get_r8, get_r9, get_r10, get_r11, get_r12, get_r13, get_r14, get_r15; +static int set_desc_ds, get_desc_ds; +static int set_desc_es, get_desc_es; +static int set_desc_fs, get_desc_fs; +static int set_desc_gs, get_desc_gs; +static int set_desc_cs, get_desc_cs; +static int set_desc_ss, get_desc_ss; +static int set_desc_gdtr, get_desc_gdtr; +static int set_desc_idtr, get_desc_idtr; +static int set_desc_tr, get_desc_tr; +static int set_desc_ldtr, get_desc_ldtr; +static int set_cs, set_ds, set_es, set_fs, set_gs, set_ss, set_tr, set_ldtr; +static int get_cs, get_ds, get_es, get_fs, get_gs, get_ss, get_tr, get_ldtr; +static int set_pinning, get_pinning, pincpu; +static int set_x2apic_state, get_x2apic_state; +enum x2apic_state x2apic_state; +static int run; + +/* + * VMCS-specific fields + */ +static int get_pinbased_ctls, get_procbased_ctls, get_procbased_ctls2; +static int get_eptp, get_io_bitmap, get_tsc_offset; +static int get_vmcs_entry_interruption_info, set_vmcs_entry_interruption_info; +static int get_vmcs_interruptibility; +uint32_t vmcs_entry_interruption_info; +static int get_vmcs_gpa, get_vmcs_gla; +static int get_exception_bitmap, set_exception_bitmap, exception_bitmap; +static int get_cr0_mask, get_cr0_shadow; +static int get_cr4_mask, get_cr4_shadow; +static int get_cr3_targets; +static int get_apic_access_addr, get_virtual_apic_addr, get_tpr_threshold; +static int get_msr_bitmap, get_msr_bitmap_address; +static int get_vpid, get_ple_gap, get_ple_window; +static int get_inst_err, get_exit_ctls, get_entry_ctls; +static int get_host_cr0, get_host_cr3, get_host_cr4; +static int get_host_rip, get_host_rsp; +static int get_guest_pat, get_host_pat; +static int get_guest_sysenter, get_vmcs_link; +static int get_vmcs_exit_reason, get_vmcs_exit_qualification; +static int get_vmcs_exit_interruption_info, get_vmcs_exit_interruption_error; + +static uint64_t desc_base; +static uint32_t desc_limit, desc_access; + +static int get_all; + +static void +dump_vm_run_exitcode(struct vm_exit *vmexit, int vcpu) +{ + printf("vm exit[%d]\n", vcpu); + printf("\trip\t\t0x%016lx\n", vmexit->rip); + printf("\tinst_length\t%d\n", vmexit->inst_length); + switch (vmexit->exitcode) { + case VM_EXITCODE_INOUT: + printf("\treason\t\tINOUT\n"); + printf("\tdirection\t%s\n", vmexit->u.inout.in ? "IN" : "OUT"); + printf("\tbytes\t\t%d\n", vmexit->u.inout.bytes); + printf("\tflags\t\t%s%s\n", + vmexit->u.inout.string ? "STRING " : "", + vmexit->u.inout.rep ? "REP " : ""); + printf("\tport\t\t0x%04x\n", vmexit->u.inout.port); + printf("\teax\t\t0x%08x\n", vmexit->u.inout.eax); + break; + case VM_EXITCODE_VMX: + printf("\treason\t\tVMX\n"); + printf("\terror\t\t%d\n", vmexit->u.vmx.error); + printf("\texit_reason\t0x%08x (%u)\n", + vmexit->u.vmx.exit_reason, vmexit->u.vmx.exit_reason); + printf("\tqualification\t0x%016lx\n", + vmexit->u.vmx.exit_qualification); + break; + default: + printf("*** unknown vm run exitcode %d\n", vmexit->exitcode); + break; + } +} + +static int +dump_vmcs_msr_bitmap(int vcpu, u_long addr) +{ + int error, fd, byte, bit, readable, writeable; + u_int msr; + const char *bitmap; + + error = -1; + bitmap = MAP_FAILED; + + fd = open("/dev/mem", O_RDONLY, 0); + if (fd < 0) + goto done; + + bitmap = mmap(NULL, PAGE_SIZE, PROT_READ, 0, fd, addr); + if (bitmap == MAP_FAILED) + goto done; + + for (msr = 0; msr < 0x2000; msr++) { + byte = msr / 8; + bit = msr & 0x7; + + /* Look at MSRs in the range 0x00000000 to 0x00001FFF */ + readable = (bitmap[byte] & (1 << bit)) ? 0 : 1; + writeable = (bitmap[2048 + byte] & (1 << bit)) ? 0 : 1; + if (readable || writeable) { + printf("msr 0x%08x[%d]\t\t%c%c\n", msr, vcpu, + readable ? 'R' : '-', + writeable ? 'W' : '-'); + } + + /* Look at MSRs in the range 0xC0000000 to 0xC0001FFF */ + byte += 1024; + readable = (bitmap[byte] & (1 << bit)) ? 0 : 1; + writeable = (bitmap[2048 + byte] & (1 << bit)) ? 0 : 1; + if (readable || writeable) { + printf("msr 0x%08x[%d]\t\t%c%c\n", + 0xc0000000 + msr, vcpu, + readable ? 'R' : '-', + writeable ? 'W' : '-'); + } + } + + error = 0; +done: + if (bitmap != MAP_FAILED) + munmap((void *)bitmap, PAGE_SIZE); + if (fd >= 0) + close(fd); + return (error); +} + +static int +vm_get_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t *ret_val) +{ + + return (vm_get_register(ctx, vcpu, VMCS_IDENT(field), ret_val)); +} + +static int +vm_set_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t val) +{ + + return (vm_set_register(ctx, vcpu, VMCS_IDENT(field), val)); +} + +enum { + VMNAME = 1000, /* avoid collision with return values from getopt */ + VCPU, + SET_LOWMEM, + SET_HIGHMEM, + SET_EFER, + SET_CR0, + SET_CR3, + SET_CR4, + SET_DR7, + SET_RSP, + SET_RIP, + SET_RAX, + SET_RFLAGS, + DESC_BASE, + DESC_LIMIT, + DESC_ACCESS, + SET_CS, + SET_DS, + SET_ES, + SET_FS, + SET_GS, + SET_SS, + SET_TR, + SET_LDTR, + SET_PINNING, + SET_X2APIC_STATE, + SET_VMCS_EXCEPTION_BITMAP, + SET_VMCS_ENTRY_INTERRUPTION_INFO, + SET_CAP, + CAPNAME, +}; + +int +main(int argc, char *argv[]) +{ + char *vmname; + int error, ch, vcpu; + vm_paddr_t gpa; + size_t len; + struct vm_exit vmexit; + uint64_t ctl, eptp, bm, addr, u64; + struct vmctx *ctx; + + uint64_t cr0, cr3, cr4, dr7, rsp, rip, rflags, efer, pat; + uint64_t rax, rbx, rcx, rdx, rsi, rdi, rbp; + uint64_t r8, r9, r10, r11, r12, r13, r14, r15; + uint64_t cs, ds, es, fs, gs, ss, tr, ldtr; + + struct option opts[] = { + { "vm", REQ_ARG, 0, VMNAME }, + { "cpu", REQ_ARG, 0, VCPU }, + { "set-lowmem", REQ_ARG, 0, SET_LOWMEM }, + { "set-highmem",REQ_ARG, 0, SET_HIGHMEM }, + { "set-efer", REQ_ARG, 0, SET_EFER }, + { "set-cr0", REQ_ARG, 0, SET_CR0 }, + { "set-cr3", REQ_ARG, 0, SET_CR3 }, + { "set-cr4", REQ_ARG, 0, SET_CR4 }, + { "set-dr7", REQ_ARG, 0, SET_DR7 }, + { "set-rsp", REQ_ARG, 0, SET_RSP }, + { "set-rip", REQ_ARG, 0, SET_RIP }, + { "set-rax", REQ_ARG, 0, SET_RAX }, + { "set-rflags", REQ_ARG, 0, SET_RFLAGS }, + { "desc-base", REQ_ARG, 0, DESC_BASE }, + { "desc-limit", REQ_ARG, 0, DESC_LIMIT }, + { "desc-access",REQ_ARG, 0, DESC_ACCESS }, + { "set-cs", REQ_ARG, 0, SET_CS }, + { "set-ds", REQ_ARG, 0, SET_DS }, + { "set-es", REQ_ARG, 0, SET_ES }, + { "set-fs", REQ_ARG, 0, SET_FS }, + { "set-gs", REQ_ARG, 0, SET_GS }, + { "set-ss", REQ_ARG, 0, SET_SS }, + { "set-tr", REQ_ARG, 0, SET_TR }, + { "set-ldtr", REQ_ARG, 0, SET_LDTR }, + { "set-pinning",REQ_ARG, 0, SET_PINNING }, + { "set-x2apic-state",REQ_ARG, 0, SET_X2APIC_STATE }, + { "set-vmcs-exception-bitmap", + REQ_ARG, 0, SET_VMCS_EXCEPTION_BITMAP }, + { "set-vmcs-entry-interruption-info", + REQ_ARG, 0, SET_VMCS_ENTRY_INTERRUPTION_INFO }, + { "capname", REQ_ARG, 0, CAPNAME }, + { "setcap", REQ_ARG, 0, SET_CAP }, + { "getcap", NO_ARG, &getcap, 1 }, + { "get-stats", NO_ARG, &get_stats, 1 }, + { "get-desc-ds",NO_ARG, &get_desc_ds, 1 }, + { "set-desc-ds",NO_ARG, &set_desc_ds, 1 }, + { "get-desc-es",NO_ARG, &get_desc_es, 1 }, + { "set-desc-es",NO_ARG, &set_desc_es, 1 }, + { "get-desc-ss",NO_ARG, &get_desc_ss, 1 }, + { "set-desc-ss",NO_ARG, &set_desc_ss, 1 }, + { "get-desc-cs",NO_ARG, &get_desc_cs, 1 }, + { "set-desc-cs",NO_ARG, &set_desc_cs, 1 }, + { "get-desc-fs",NO_ARG, &get_desc_fs, 1 }, + { "set-desc-fs",NO_ARG, &set_desc_fs, 1 }, + { "get-desc-gs",NO_ARG, &get_desc_gs, 1 }, + { "set-desc-gs",NO_ARG, &set_desc_gs, 1 }, + { "get-desc-tr",NO_ARG, &get_desc_tr, 1 }, + { "set-desc-tr",NO_ARG, &set_desc_tr, 1 }, + { "set-desc-ldtr", NO_ARG, &set_desc_ldtr, 1 }, + { "get-desc-ldtr", NO_ARG, &get_desc_ldtr, 1 }, + { "set-desc-gdtr", NO_ARG, &set_desc_gdtr, 1 }, + { "get-desc-gdtr", NO_ARG, &get_desc_gdtr, 1 }, + { "set-desc-idtr", NO_ARG, &set_desc_idtr, 1 }, + { "get-desc-idtr", NO_ARG, &get_desc_idtr, 1 }, + { "get-lowmem", NO_ARG, &get_lowmem, 1 }, + { "get-highmem",NO_ARG, &get_highmem, 1 }, + { "get-efer", NO_ARG, &get_efer, 1 }, + { "get-cr0", NO_ARG, &get_cr0, 1 }, + { "get-cr3", NO_ARG, &get_cr3, 1 }, + { "get-cr4", NO_ARG, &get_cr4, 1 }, + { "get-dr7", NO_ARG, &get_dr7, 1 }, + { "get-rsp", NO_ARG, &get_rsp, 1 }, + { "get-rip", NO_ARG, &get_rip, 1 }, + { "get-rax", NO_ARG, &get_rax, 1 }, + { "get-rbx", NO_ARG, &get_rbx, 1 }, + { "get-rcx", NO_ARG, &get_rcx, 1 }, + { "get-rdx", NO_ARG, &get_rdx, 1 }, + { "get-rsi", NO_ARG, &get_rsi, 1 }, + { "get-rdi", NO_ARG, &get_rdi, 1 }, + { "get-rbp", NO_ARG, &get_rbp, 1 }, + { "get-r8", NO_ARG, &get_r8, 1 }, + { "get-r9", NO_ARG, &get_r9, 1 }, + { "get-r10", NO_ARG, &get_r10, 1 }, + { "get-r11", NO_ARG, &get_r11, 1 }, + { "get-r12", NO_ARG, &get_r12, 1 }, + { "get-r13", NO_ARG, &get_r13, 1 }, + { "get-r14", NO_ARG, &get_r14, 1 }, + { "get-r15", NO_ARG, &get_r15, 1 }, + { "get-rflags", NO_ARG, &get_rflags, 1 }, + { "get-cs", NO_ARG, &get_cs, 1 }, + { "get-ds", NO_ARG, &get_ds, 1 }, + { "get-es", NO_ARG, &get_es, 1 }, + { "get-fs", NO_ARG, &get_fs, 1 }, + { "get-gs", NO_ARG, &get_gs, 1 }, + { "get-ss", NO_ARG, &get_ss, 1 }, + { "get-tr", NO_ARG, &get_tr, 1 }, + { "get-ldtr", NO_ARG, &get_ldtr, 1 }, + { "get-vmcs-pinbased-ctls", + NO_ARG, &get_pinbased_ctls, 1 }, + { "get-vmcs-procbased-ctls", + NO_ARG, &get_procbased_ctls, 1 }, + { "get-vmcs-procbased-ctls2", + NO_ARG, &get_procbased_ctls2, 1 }, + { "get-vmcs-guest-linear-address", + NO_ARG, &get_vmcs_gla, 1 }, + { "get-vmcs-guest-physical-address", + NO_ARG, &get_vmcs_gpa, 1 }, + { "get-vmcs-entry-interruption-info", + NO_ARG, &get_vmcs_entry_interruption_info, 1}, + { "get-vmcs-eptp", NO_ARG, &get_eptp, 1 }, + { "get-vmcs-exception-bitmap", + NO_ARG, &get_exception_bitmap, 1 }, + { "get-vmcs-io-bitmap-address", + NO_ARG, &get_io_bitmap, 1 }, + { "get-vmcs-tsc-offset", NO_ARG,&get_tsc_offset, 1 }, + { "get-vmcs-cr0-mask", NO_ARG, &get_cr0_mask, 1 }, + { "get-vmcs-cr0-shadow", NO_ARG,&get_cr0_shadow, 1 }, + { "get-vmcs-cr4-mask", NO_ARG, &get_cr4_mask, 1 }, + { "get-vmcs-cr4-shadow", NO_ARG,&get_cr4_shadow, 1 }, + { "get-vmcs-cr3-targets", NO_ARG, &get_cr3_targets, 1}, + { "get-vmcs-apic-access-address", + NO_ARG, &get_apic_access_addr, 1}, + { "get-vmcs-virtual-apic-address", + NO_ARG, &get_virtual_apic_addr, 1}, + { "get-vmcs-tpr-threshold", + NO_ARG, &get_tpr_threshold, 1 }, + { "get-vmcs-msr-bitmap", + NO_ARG, &get_msr_bitmap, 1 }, + { "get-vmcs-msr-bitmap-address", + NO_ARG, &get_msr_bitmap_address, 1 }, + { "get-vmcs-vpid", NO_ARG, &get_vpid, 1 }, + { "get-vmcs-ple-gap", NO_ARG, &get_ple_gap, 1 }, + { "get-vmcs-ple-window", NO_ARG,&get_ple_window,1 }, + { "get-vmcs-instruction-error", + NO_ARG, &get_inst_err, 1 }, + { "get-vmcs-exit-ctls", NO_ARG, &get_exit_ctls, 1 }, + { "get-vmcs-entry-ctls", + NO_ARG, &get_entry_ctls, 1 }, + { "get-vmcs-guest-pat", NO_ARG, &get_guest_pat, 1 }, + { "get-vmcs-host-pat", NO_ARG, &get_host_pat, 1 }, + { "get-vmcs-host-cr0", + NO_ARG, &get_host_cr0, 1 }, + { "get-vmcs-host-cr3", + NO_ARG, &get_host_cr3, 1 }, + { "get-vmcs-host-cr4", + NO_ARG, &get_host_cr4, 1 }, + { "get-vmcs-host-rip", + NO_ARG, &get_host_rip, 1 }, + { "get-vmcs-host-rsp", + NO_ARG, &get_host_rsp, 1 }, + { "get-vmcs-guest-sysenter", + NO_ARG, &get_guest_sysenter, 1 }, + { "get-vmcs-link", NO_ARG, &get_vmcs_link, 1 }, + { "get-vmcs-exit-reason", + NO_ARG, &get_vmcs_exit_reason, 1 }, + { "get-vmcs-exit-qualification", + NO_ARG, &get_vmcs_exit_qualification, 1 }, + { "get-vmcs-exit-interruption-info", + NO_ARG, &get_vmcs_exit_interruption_info, 1}, + { "get-vmcs-exit-interruption-error", + NO_ARG, &get_vmcs_exit_interruption_error, 1}, + { "get-vmcs-interruptibility", + NO_ARG, &get_vmcs_interruptibility, 1 }, + { "get-pinning",NO_ARG, &get_pinning, 1 }, + { "get-x2apic-state",NO_ARG, &get_x2apic_state, 1 }, + { "get-all", NO_ARG, &get_all, 1 }, + { "run", NO_ARG, &run, 1 }, + { "create", NO_ARG, &create, 1 }, + { "destroy", NO_ARG, &destroy, 1 }, + { NULL, 0, NULL, 0 } + }; + + vcpu = 0; + progname = basename(argv[0]); + + while ((ch = getopt_long(argc, argv, "", opts, NULL)) != -1) { + switch (ch) { + case 0: + break; + case VMNAME: + vmname = optarg; + break; + case VCPU: + vcpu = atoi(optarg); + break; + case SET_LOWMEM: + lowmem = atoi(optarg) * MB; + lowmem = roundup(lowmem, 2 * MB); + break; + case SET_HIGHMEM: + highmem = atoi(optarg) * MB; + highmem = roundup(highmem, 2 * MB); + break; + case SET_EFER: + efer = strtoul(optarg, NULL, 0); + set_efer = 1; + break; + case SET_CR0: + cr0 = strtoul(optarg, NULL, 0); + set_cr0 = 1; + break; + case SET_CR3: + cr3 = strtoul(optarg, NULL, 0); + set_cr3 = 1; + break; + case SET_CR4: + cr4 = strtoul(optarg, NULL, 0); + set_cr4 = 1; + break; + case SET_DR7: + dr7 = strtoul(optarg, NULL, 0); + set_dr7 = 1; + break; + case SET_RSP: + rsp = strtoul(optarg, NULL, 0); + set_rsp = 1; + break; + case SET_RIP: + rip = strtoul(optarg, NULL, 0); + set_rip = 1; + break; + case SET_RAX: + rax = strtoul(optarg, NULL, 0); + set_rax = 1; + break; + case SET_RFLAGS: + rflags = strtoul(optarg, NULL, 0); + set_rflags = 1; + break; + case DESC_BASE: + desc_base = strtoul(optarg, NULL, 0); + break; + case DESC_LIMIT: + desc_limit = strtoul(optarg, NULL, 0); + break; + case DESC_ACCESS: + desc_access = strtoul(optarg, NULL, 0); + break; + case SET_CS: + cs = strtoul(optarg, NULL, 0); + set_cs = 1; + break; + case SET_DS: + ds = strtoul(optarg, NULL, 0); + set_ds = 1; + break; + case SET_ES: + es = strtoul(optarg, NULL, 0); + set_es = 1; + break; + case SET_FS: + fs = strtoul(optarg, NULL, 0); + set_fs = 1; + break; + case SET_GS: + gs = strtoul(optarg, NULL, 0); + set_gs = 1; + break; + case SET_SS: + ss = strtoul(optarg, NULL, 0); + set_ss = 1; + break; + case SET_TR: + tr = strtoul(optarg, NULL, 0); + set_tr = 1; + break; + case SET_LDTR: + ldtr = strtoul(optarg, NULL, 0); + set_ldtr = 1; + break; + case SET_PINNING: + pincpu = strtol(optarg, NULL, 0); + set_pinning = 1; + break; + case SET_X2APIC_STATE: + x2apic_state = strtol(optarg, NULL, 0); + set_x2apic_state = 1; + break; + case SET_VMCS_EXCEPTION_BITMAP: + exception_bitmap = strtoul(optarg, NULL, 0); + set_exception_bitmap = 1; + break; + case SET_VMCS_ENTRY_INTERRUPTION_INFO: + vmcs_entry_interruption_info = strtoul(optarg, NULL, 0); + set_vmcs_entry_interruption_info = 1; + break; + case SET_CAP: + capval = strtoul(optarg, NULL, 0); + setcap = 1; + break; + case CAPNAME: + capname = optarg; + break; + default: + usage(); + } + } + argc -= optind; + argv += optind; + + if (vmname == NULL) + usage(); + + error = 0; + + if (!error && create) + error = vm_create(vmname); + + if (!error) { + ctx = vm_open(vmname); + if (ctx == NULL) + error = -1; + } + + if (!error && lowmem) + error = vm_setup_memory(ctx, 0, lowmem, NULL); + + if (!error && highmem) + error = vm_setup_memory(ctx, 4 * GB, highmem, NULL); + + if (!error && set_efer) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_EFER, efer); + + if (!error && set_cr0) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR0, cr0); + + if (!error && set_cr3) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR3, cr3); + + if (!error && set_cr4) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR4, cr4); + + if (!error && set_dr7) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_DR7, dr7); + + if (!error && set_rsp) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RSP, rsp); + + if (!error && set_rip) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, rip); + + if (!error && set_rax) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, rax); + + if (!error && set_rflags) { + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RFLAGS, + rflags); + } + + if (!error && set_desc_ds) { + error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_DS, + desc_base, desc_limit, desc_access); + } + + if (!error && set_desc_es) { + error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_ES, + desc_base, desc_limit, desc_access); + } + + if (!error && set_desc_ss) { + error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_SS, + desc_base, desc_limit, desc_access); + } + + if (!error && set_desc_cs) { + error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_CS, + desc_base, desc_limit, desc_access); + } + + if (!error && set_desc_fs) { + error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_FS, + desc_base, desc_limit, desc_access); + } + + if (!error && set_desc_gs) { + error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_GS, + desc_base, desc_limit, desc_access); + } + + if (!error && set_desc_tr) { + error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_TR, + desc_base, desc_limit, desc_access); + } + + if (!error && set_desc_ldtr) { + error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_LDTR, + desc_base, desc_limit, desc_access); + } + + if (!error && set_desc_gdtr) { + error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_GDTR, + desc_base, desc_limit, 0); + } + + if (!error && set_desc_idtr) { + error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_IDTR, + desc_base, desc_limit, 0); + } + + if (!error && set_cs) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CS, cs); + + if (!error && set_ds) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_DS, ds); + + if (!error && set_es) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_ES, es); + + if (!error && set_fs) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_FS, fs); + + if (!error && set_gs) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_GS, gs); + + if (!error && set_ss) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_SS, ss); + + if (!error && set_tr) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_TR, tr); + + if (!error && set_ldtr) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_LDTR, ldtr); + + if (!error && set_pinning) + error = vm_set_pinning(ctx, vcpu, pincpu); + + if (!error && set_x2apic_state) + error = vm_set_x2apic_state(ctx, vcpu, x2apic_state); + + if (!error && set_exception_bitmap) { + error = vm_set_vmcs_field(ctx, vcpu, VMCS_EXCEPTION_BITMAP, + exception_bitmap); + } + + if (!error && set_vmcs_entry_interruption_info) { + error = vm_set_vmcs_field(ctx, vcpu, VMCS_ENTRY_INTR_INFO, + vmcs_entry_interruption_info); + } + + if (!error && (get_lowmem || get_all)) { + gpa = 0; + error = vm_get_memory_seg(ctx, gpa, &len); + if (error == 0) + printf("lowmem\t\t0x%016lx/%ld\n", gpa, len); + } + + if (!error && (get_highmem || get_all)) { + gpa = 4 * GB; + error = vm_get_memory_seg(ctx, gpa, &len); + if (error == 0) + printf("highmem\t\t0x%016lx/%ld\n", gpa, len); + } + + if (!error && (get_efer || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_EFER, &efer); + if (error == 0) + printf("efer[%d]\t\t0x%016lx\n", vcpu, efer); + } + + if (!error && (get_cr0 || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR0, &cr0); + if (error == 0) + printf("cr0[%d]\t\t0x%016lx\n", vcpu, cr0); + } + + if (!error && (get_cr3 || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR3, &cr3); + if (error == 0) + printf("cr3[%d]\t\t0x%016lx\n", vcpu, cr3); + } + + if (!error && (get_cr4 || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR4, &cr4); + if (error == 0) + printf("cr4[%d]\t\t0x%016lx\n", vcpu, cr4); + } + + if (!error && (get_dr7 || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DR7, &dr7); + if (error == 0) + printf("dr7[%d]\t\t0x%016lx\n", vcpu, dr7); + } + + if (!error && (get_rsp || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RSP, &rsp); + if (error == 0) + printf("rsp[%d]\t\t0x%016lx\n", vcpu, rsp); + } + + if (!error && (get_rip || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip); + if (error == 0) + printf("rip[%d]\t\t0x%016lx\n", vcpu, rip); + } + + if (!error && (get_rax || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RAX, &rax); + if (error == 0) + printf("rax[%d]\t\t0x%016lx\n", vcpu, rax); + } + + if (!error && (get_rbx || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RBX, &rbx); + if (error == 0) + printf("rbx[%d]\t\t0x%016lx\n", vcpu, rbx); + } + + if (!error && (get_rcx || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RCX, &rcx); + if (error == 0) + printf("rcx[%d]\t\t0x%016lx\n", vcpu, rcx); + } + + if (!error && (get_rdx || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RDX, &rdx); + if (error == 0) + printf("rdx[%d]\t\t0x%016lx\n", vcpu, rdx); + } + + if (!error && (get_rsi || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RSI, &rsi); + if (error == 0) + printf("rsi[%d]\t\t0x%016lx\n", vcpu, rsi); + } + + if (!error && (get_rdi || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RDI, &rdi); + if (error == 0) + printf("rdi[%d]\t\t0x%016lx\n", vcpu, rdi); + } + + if (!error && (get_rbp || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RBP, &rbp); + if (error == 0) + printf("rbp[%d]\t\t0x%016lx\n", vcpu, rbp); + } + + if (!error && (get_r8 || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R8, &r8); + if (error == 0) + printf("r8[%d]\t\t0x%016lx\n", vcpu, r8); + } + + if (!error && (get_r9 || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R9, &r9); + if (error == 0) + printf("r9[%d]\t\t0x%016lx\n", vcpu, r9); + } + + if (!error && (get_r10 || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R10, &r10); + if (error == 0) + printf("r10[%d]\t\t0x%016lx\n", vcpu, r10); + } + + if (!error && (get_r11 || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R11, &r11); + if (error == 0) + printf("r11[%d]\t\t0x%016lx\n", vcpu, r11); + } + + if (!error && (get_r12 || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R12, &r12); + if (error == 0) + printf("r12[%d]\t\t0x%016lx\n", vcpu, r12); + } + + if (!error && (get_r13 || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R13, &r13); + if (error == 0) + printf("r13[%d]\t\t0x%016lx\n", vcpu, r13); + } + + if (!error && (get_r14 || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R14, &r14); + if (error == 0) + printf("r14[%d]\t\t0x%016lx\n", vcpu, r14); + } + + if (!error && (get_r15 || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R15, &r15); + if (error == 0) + printf("r15[%d]\t\t0x%016lx\n", vcpu, r15); + } + + if (!error && (get_rflags || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RFLAGS, + &rflags); + if (error == 0) + printf("rflags[%d]\t0x%016lx\n", vcpu, rflags); + } + + if (!error && (get_stats || get_all)) { + int i, num_stats; + uint64_t *stats; + struct timeval tv; + const char *desc; + + stats = vm_get_stats(ctx, vcpu, &tv, &num_stats); + if (stats != NULL) { + printf("vcpu%d\n", vcpu); + for (i = 0; i < num_stats; i++) { + desc = vm_get_stat_desc(ctx, i); + printf("%-32s\t%ld\n", desc, stats[i]); + } + } + } + + if (!error && (get_desc_ds || get_all)) { + error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_DS, + &desc_base, &desc_limit, &desc_access); + if (error == 0) { + printf("ds desc[%d]\t0x%016lx/0x%08x/0x%08x\n", + vcpu, desc_base, desc_limit, desc_access); + } + } + + if (!error && (get_desc_es || get_all)) { + error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_ES, + &desc_base, &desc_limit, &desc_access); + if (error == 0) { + printf("es desc[%d]\t0x%016lx/0x%08x/0x%08x\n", + vcpu, desc_base, desc_limit, desc_access); + } + } + + if (!error && (get_desc_fs || get_all)) { + error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_FS, + &desc_base, &desc_limit, &desc_access); + if (error == 0) { + printf("fs desc[%d]\t0x%016lx/0x%08x/0x%08x\n", + vcpu, desc_base, desc_limit, desc_access); + } + } + + if (!error && (get_desc_gs || get_all)) { + error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_GS, + &desc_base, &desc_limit, &desc_access); + if (error == 0) { + printf("gs desc[%d]\t0x%016lx/0x%08x/0x%08x\n", + vcpu, desc_base, desc_limit, desc_access); + } + } + + if (!error && (get_desc_ss || get_all)) { + error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_SS, + &desc_base, &desc_limit, &desc_access); + if (error == 0) { + printf("ss desc[%d]\t0x%016lx/0x%08x/0x%08x\n", + vcpu, desc_base, desc_limit, desc_access); + } + } + + if (!error && (get_desc_cs || get_all)) { + error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_CS, + &desc_base, &desc_limit, &desc_access); + if (error == 0) { + printf("cs desc[%d]\t0x%016lx/0x%08x/0x%08x\n", + vcpu, desc_base, desc_limit, desc_access); + } + } + + if (!error && (get_desc_tr || get_all)) { + error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_TR, + &desc_base, &desc_limit, &desc_access); + if (error == 0) { + printf("tr desc[%d]\t0x%016lx/0x%08x/0x%08x\n", + vcpu, desc_base, desc_limit, desc_access); + } + } + + if (!error && (get_desc_ldtr || get_all)) { + error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_LDTR, + &desc_base, &desc_limit, &desc_access); + if (error == 0) { + printf("ldtr desc[%d]\t0x%016lx/0x%08x/0x%08x\n", + vcpu, desc_base, desc_limit, desc_access); + } + } + + if (!error && (get_desc_gdtr || get_all)) { + error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_GDTR, + &desc_base, &desc_limit, &desc_access); + if (error == 0) { + printf("gdtr[%d]\t\t0x%016lx/0x%08x\n", + vcpu, desc_base, desc_limit); + } + } + + if (!error && (get_desc_idtr || get_all)) { + error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_IDTR, + &desc_base, &desc_limit, &desc_access); + if (error == 0) { + printf("idtr[%d]\t\t0x%016lx/0x%08x\n", + vcpu, desc_base, desc_limit); + } + } + + if (!error && (get_cs || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CS, &cs); + if (error == 0) + printf("cs[%d]\t\t0x%04lx\n", vcpu, cs); + } + + if (!error && (get_ds || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DS, &ds); + if (error == 0) + printf("ds[%d]\t\t0x%04lx\n", vcpu, ds); + } + + if (!error && (get_es || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_ES, &es); + if (error == 0) + printf("es[%d]\t\t0x%04lx\n", vcpu, es); + } + + if (!error && (get_fs || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_FS, &fs); + if (error == 0) + printf("fs[%d]\t\t0x%04lx\n", vcpu, fs); + } + + if (!error && (get_gs || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_GS, &gs); + if (error == 0) + printf("gs[%d]\t\t0x%04lx\n", vcpu, gs); + } + + if (!error && (get_ss || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_SS, &ss); + if (error == 0) + printf("ss[%d]\t\t0x%04lx\n", vcpu, ss); + } + + if (!error && (get_tr || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_TR, &tr); + if (error == 0) + printf("tr[%d]\t\t0x%04lx\n", vcpu, tr); + } + + if (!error && (get_ldtr || get_all)) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_LDTR, &ldtr); + if (error == 0) + printf("ldtr[%d]\t\t0x%04lx\n", vcpu, ldtr); + } + + if (!error && (get_pinning || get_all)) { + error = vm_get_pinning(ctx, vcpu, &pincpu); + if (error == 0) { + if (pincpu < 0) + printf("pincpu[%d]\tunpinned\n", vcpu); + else + printf("pincpu[%d]\t%d\n", vcpu, pincpu); + } + } + + if (!error && (get_x2apic_state || get_all)) { + error = vm_get_x2apic_state(ctx, vcpu, &x2apic_state); + if (error == 0) + printf("x2apic_state[%d]\t%d\n", vcpu, x2apic_state); + } + + if (!error && (get_pinbased_ctls || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_PIN_BASED_CTLS, &ctl); + if (error == 0) + printf("pinbased_ctls[%d]\t0x%08lx\n", vcpu, ctl); + } + + if (!error && (get_procbased_ctls || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, + VMCS_PRI_PROC_BASED_CTLS, &ctl); + if (error == 0) + printf("procbased_ctls[%d]\t0x%08lx\n", vcpu, ctl); + } + + if (!error && (get_procbased_ctls2 || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, + VMCS_SEC_PROC_BASED_CTLS, &ctl); + if (error == 0) + printf("procbased_ctls2[%d]\t0x%08lx\n", vcpu, ctl); + } + + if (!error && (get_vmcs_gla || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, + VMCS_GUEST_LINEAR_ADDRESS, &u64); + if (error == 0) + printf("gla[%d]\t\t0x%016lx\n", vcpu, u64); + } + + if (!error && (get_vmcs_gpa || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, + VMCS_GUEST_PHYSICAL_ADDRESS, &u64); + if (error == 0) + printf("gpa[%d]\t\t0x%016lx\n", vcpu, u64); + } + + if (!error && (get_vmcs_entry_interruption_info || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_ENTRY_INTR_INFO,&u64); + if (error == 0) { + printf("entry_interruption_info[%d]\t0x%08lx\n", + vcpu, u64); + } + } + + if (!error && (get_eptp || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_EPTP, &eptp); + if (error == 0) + printf("eptp[%d]\t\t0x%016lx\n", vcpu, eptp); + } + + if (!error && (get_exception_bitmap || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXCEPTION_BITMAP, + &bm); + if (error == 0) + printf("exception_bitmap[%d]\t0x%08lx\n", vcpu, bm); + } + + if (!error && (get_io_bitmap || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_IO_BITMAP_A, &bm); + if (error == 0) + printf("io_bitmap_a[%d]\t0x%08lx\n", vcpu, bm); + error = vm_get_vmcs_field(ctx, vcpu, VMCS_IO_BITMAP_B, &bm); + if (error == 0) + printf("io_bitmap_b[%d]\t0x%08lx\n", vcpu, bm); + } + + if (!error && (get_tsc_offset || get_all)) { + uint64_t tscoff; + error = vm_get_vmcs_field(ctx, vcpu, VMCS_TSC_OFFSET, &tscoff); + if (error == 0) + printf("tsc_offset[%d]\t0x%016lx\n", vcpu, tscoff); + } + + if (!error && (get_cr0_mask || get_all)) { + uint64_t cr0mask; + error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR0_MASK, &cr0mask); + if (error == 0) + printf("cr0_mask[%d]\t\t0x%016lx\n", vcpu, cr0mask); + } + + if (!error && (get_cr0_shadow || get_all)) { + uint64_t cr0shadow; + error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR0_SHADOW, + &cr0shadow); + if (error == 0) + printf("cr0_shadow[%d]\t\t0x%016lx\n", vcpu, cr0shadow); + } + + if (!error && (get_cr4_mask || get_all)) { + uint64_t cr4mask; + error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR4_MASK, &cr4mask); + if (error == 0) + printf("cr4_mask[%d]\t\t0x%016lx\n", vcpu, cr4mask); + } + + if (!error && (get_cr4_shadow || get_all)) { + uint64_t cr4shadow; + error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR4_SHADOW, + &cr4shadow); + if (error == 0) + printf("cr4_shadow[%d]\t\t0x%016lx\n", vcpu, cr4shadow); + } + + if (!error && (get_cr3_targets || get_all)) { + uint64_t target_count, target_addr; + error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET_COUNT, + &target_count); + if (error == 0) { + printf("cr3_target_count[%d]\t0x%08lx\n", + vcpu, target_count); + } + + error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET0, + &target_addr); + if (error == 0) { + printf("cr3_target0[%d]\t\t0x%016lx\n", + vcpu, target_addr); + } + + error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET1, + &target_addr); + if (error == 0) { + printf("cr3_target1[%d]\t\t0x%016lx\n", + vcpu, target_addr); + } + + error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET2, + &target_addr); + if (error == 0) { + printf("cr3_target2[%d]\t\t0x%016lx\n", + vcpu, target_addr); + } + + error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET3, + &target_addr); + if (error == 0) { + printf("cr3_target3[%d]\t\t0x%016lx\n", + vcpu, target_addr); + } + } + + if (!error && (get_apic_access_addr || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_APIC_ACCESS, &addr); + if (error == 0) + printf("apic_access_addr[%d]\t0x%016lx\n", vcpu, addr); + } + + if (!error && (get_virtual_apic_addr || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_VIRTUAL_APIC, &addr); + if (error == 0) + printf("virtual_apic_addr[%d]\t0x%016lx\n", vcpu, addr); + } + + if (!error && (get_tpr_threshold || get_all)) { + uint64_t threshold; + error = vm_get_vmcs_field(ctx, vcpu, VMCS_TPR_THRESHOLD, + &threshold); + if (error == 0) + printf("tpr_threshold[%d]\t0x%08lx\n", vcpu, threshold); + } + + if (!error && (get_msr_bitmap_address || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_MSR_BITMAP, &addr); + if (error == 0) + printf("msr_bitmap[%d]\t\t0x%016lx\n", vcpu, addr); + } + + if (!error && (get_msr_bitmap || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_MSR_BITMAP, &addr); + if (error == 0) + error = dump_vmcs_msr_bitmap(vcpu, addr); + } + + if (!error && (get_vpid || get_all)) { + uint64_t vpid; + error = vm_get_vmcs_field(ctx, vcpu, VMCS_VPID, &vpid); + if (error == 0) + printf("vpid[%d]\t\t0x%04lx\n", vcpu, vpid); + } + + if (!error && (get_ple_window || get_all)) { + uint64_t window; + error = vm_get_vmcs_field(ctx, vcpu, VMCS_PLE_WINDOW, &window); + if (error == 0) + printf("ple_window[%d]\t\t0x%08lx\n", vcpu, window); + } + + if (!error && (get_ple_gap || get_all)) { + uint64_t gap; + error = vm_get_vmcs_field(ctx, vcpu, VMCS_PLE_GAP, &gap); + if (error == 0) + printf("ple_gap[%d]\t\t0x%08lx\n", vcpu, gap); + } + + if (!error && (get_inst_err || get_all)) { + uint64_t insterr; + error = vm_get_vmcs_field(ctx, vcpu, VMCS_INSTRUCTION_ERROR, + &insterr); + if (error == 0) { + printf("instruction_error[%d]\t0x%08lx\n", + vcpu, insterr); + } + } + + if (!error && (get_exit_ctls || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_CTLS, &ctl); + if (error == 0) + printf("exit_ctls[%d]\t\t0x%08lx\n", vcpu, ctl); + } + + if (!error && (get_entry_ctls || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_ENTRY_CTLS, &ctl); + if (error == 0) + printf("entry_ctls[%d]\t\t0x%08lx\n", vcpu, ctl); + } + + if (!error && (get_host_pat || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_IA32_PAT, &pat); + if (error == 0) + printf("host_pat[%d]\t\t0x%016lx\n", vcpu, pat); + } + + if (!error && (get_guest_pat || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_IA32_PAT, &pat); + if (error == 0) + printf("guest_pat[%d]\t\t0x%016lx\n", vcpu, pat); + } + + if (!error && (get_host_cr0 || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR0, &cr0); + if (error == 0) + printf("host_cr0[%d]\t\t0x%016lx\n", vcpu, cr0); + } + + if (!error && (get_host_cr3 || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR3, &cr3); + if (error == 0) + printf("host_cr3[%d]\t\t0x%016lx\n", vcpu, cr3); + } + + if (!error && (get_host_cr4 || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR4, &cr4); + if (error == 0) + printf("host_cr4[%d]\t\t0x%016lx\n", vcpu, cr4); + } + + if (!error && (get_host_rip || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_RIP, &rip); + if (error == 0) + printf("host_rip[%d]\t\t0x%016lx\n", vcpu, rip); + } + + if (!error && (get_host_rsp || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_RSP, &rsp); + if (error == 0) + printf("host_rsp[%d]\t\t0x%016lx\n", vcpu, rsp); + } + + if (!error && (get_guest_sysenter || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, + VMCS_GUEST_IA32_SYSENTER_CS, &cs); + if (error == 0) + printf("guest_sysenter_cs[%d]\t0x%08lx\n", vcpu, cs); + + error = vm_get_vmcs_field(ctx, vcpu, + VMCS_GUEST_IA32_SYSENTER_ESP, &rsp); + if (error == 0) + printf("guest_sysenter_sp[%d]\t0x%016lx\n", vcpu, rsp); + error = vm_get_vmcs_field(ctx, vcpu, + VMCS_GUEST_IA32_SYSENTER_EIP, &rip); + if (error == 0) + printf("guest_sysenter_ip[%d]\t0x%016lx\n", vcpu, rip); + } + + if (!error && (get_vmcs_link || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_LINK_POINTER, &addr); + if (error == 0) + printf("vmcs_pointer[%d]\t0x%016lx\n", vcpu, addr); + } + + if (!error && (get_vmcs_exit_reason || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_REASON, &u64); + if (error == 0) + printf("vmcs_exit_reason[%d]\t0x%016lx\n", vcpu, u64); + } + + if (!error && (get_vmcs_exit_qualification || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_QUALIFICATION, + &u64); + if (error == 0) + printf("vmcs_exit_qualification[%d]\t0x%016lx\n", + vcpu, u64); + } + + if (!error && (get_vmcs_exit_interruption_info || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, + VMCS_EXIT_INTERRUPTION_INFO, &u64); + if (error == 0) { + printf("vmcs_exit_interruption_info[%d]\t0x%08lx\n", + vcpu, u64); + } + } + + if (!error && (get_vmcs_exit_interruption_error || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, + VMCS_EXIT_INTERRUPTION_ERROR, &u64); + if (error == 0) { + printf("vmcs_exit_interruption_error[%d]\t0x%08lx\n", + vcpu, u64); + } + } + + if (!error && (get_vmcs_interruptibility || get_all)) { + error = vm_get_vmcs_field(ctx, vcpu, + VMCS_GUEST_INTERRUPTIBILITY, &u64); + if (error == 0) { + printf("vmcs_guest_interruptibility[%d]\t0x%08lx\n", + vcpu, u64); + } + } + + if (!error && setcap) { + int captype; + captype = vm_capability_name2type(capname); + error = vm_set_capability(ctx, vcpu, captype, capval); + if (error != 0 && errno == ENOENT) + printf("Capability \"%s\" is not available\n", capname); + } + + if (!error && (getcap || get_all)) { + int captype, val, getcaptype; + + if (getcap && capname) + getcaptype = vm_capability_name2type(capname); + else + getcaptype = -1; + + for (captype = 0; captype < VM_CAP_MAX; captype++) { + if (getcaptype >= 0 && captype != getcaptype) + continue; + error = vm_get_capability(ctx, vcpu, captype, &val); + if (error == 0) { + printf("Capability \"%s\" is %s on vcpu %d\n", + vm_capability_type2name(captype), + val ? "set" : "not set", vcpu); + } else if (errno == ENOENT) { + printf("Capability \"%s\" is not available\n", + vm_capability_type2name(captype)); + } else { + break; + } + } + } + + if (!error && run) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip); + assert(error == 0); + + error = vm_run(ctx, vcpu, rip, &vmexit); + if (error == 0) + dump_vm_run_exitcode(&vmexit, vcpu); + else + printf("vm_run error %d\n", error); + } + + if (error) + printf("errno = %d\n", errno); + + if (!error && destroy) + vm_destroy(ctx); + + exit(error); +} diff --git a/usr.sbin/bhyvectl/sample.sh b/usr.sbin/bhyvectl/sample.sh new file mode 100755 index 0000000..356bd5f --- /dev/null +++ b/usr.sbin/bhyvectl/sample.sh @@ -0,0 +1,75 @@ +#!/bin/sh + +# $FreeBSD$ + +BHYVECTL="sudo ./bhyvectl" +VMNAME=sample + +${BHYVECTL} --vm=${VMNAME} --create +${BHYVECTL} --vm=${VMNAME} --set-lowmem=128 --set-highmem=256 +${BHYVECTL} --vm=${VMNAME} --get-lowmem --get-highmem + +CR0_PE=$((1 << 0)) +CR0_PG=$((1 << 31)) +CR0=$(($CR0_PE | $CR0_PG)) +${BHYVECTL} --vm=${VMNAME} --set-cr0=${CR0} --get-cr0 + +# XXX this is bogus the value of %cr3 should come from the loader +CR3=0 +${BHYVECTL} --vm=${VMNAME} --set-cr3=${CR3} --get-cr3 + +CR4_PAE=$((1 << 5)) +CR4=$((${CR4_PAE})) +${BHYVECTL} --vm=${VMNAME} --set-cr4=${CR4} --get-cr4 + +DR7=0x00000400 # Table 9-1 from Intel Architecture Manual 3A +${BHYVECTL} --vm=${VMNAME} --set-dr7=${DR7} --get-dr7 + +# +# XXX the values of rsp and rip are bogus and should come from the loader. +# +RSP=0xa5a5a5a5 +RIP=0x0000bfbfbfbf0000 +RFLAGS=0x2 +${BHYVECTL} --vm=${VMNAME} --set-rsp=${RSP} --get-rsp +${BHYVECTL} --vm=${VMNAME} --set-rip=${RIP} --get-rip +${BHYVECTL} --vm=${VMNAME} --set-rflags=${RFLAGS} --get-rflags + +# Set "hidden" state of %cs descriptor to indicate long mode code segment. +# +# Note that this should match the contents of the entry pointed to by the +# segment selector in the GDTR. +# +${BHYVECTL} --vm=${VMNAME} --set-desc-cs --desc-access=0x00002098 --get-desc-cs + +# Set "hidden" state of all data descriptors to indicate a usable segment. +# The only useful fields are the "Present" and "Descriptor Type" bits. +${BHYVECTL} --vm=${VMNAME} --set-desc-ds --desc-access=0x00000090 --get-desc-ds +${BHYVECTL} --vm=${VMNAME} --set-desc-es --desc-access=0x00000090 --get-desc-es +${BHYVECTL} --vm=${VMNAME} --set-desc-fs --desc-access=0x00000090 --get-desc-fs +${BHYVECTL} --vm=${VMNAME} --set-desc-gs --desc-access=0x00000090 --get-desc-gs +${BHYVECTL} --vm=${VMNAME} --set-desc-ss --desc-access=0x00000090 --get-desc-ss + +# +# Set the code segment selector to point to entry at offset 8 in the GDTR. +# +${BHYVECTL} --vm=${VMNAME} --set-cs=0x0008 --get-cs + +# Set all the remaining data segment selectors to point to entry at offset +# 16 in the GDTR. +${BHYVECTL} --vm=${VMNAME} --set-ds=0x0010 --get-ds +${BHYVECTL} --vm=${VMNAME} --set-es=0x0010 --get-es +${BHYVECTL} --vm=${VMNAME} --set-fs=0x0010 --get-fs +${BHYVECTL} --vm=${VMNAME} --set-gs=0x0010 --get-gs +${BHYVECTL} --vm=${VMNAME} --set-ss=0x0010 --get-ss + +# XXX the value of the GDTR should come from the loader. +# Set the GDTR +GDTR_BASE=0xffff0000 +GDTR_LIMIT=0x10 +${BHYVECTL} --vm=${VMNAME} --set-desc-gdtr --desc-base=${GDTR_BASE} --desc-limit=${GDTR_LIMIT} --get-desc-gdtr + +${BHYVECTL} --vm=${VMNAME} --set-pinning=0 --get-pinning +${BHYVECTL} --vm=${VMNAME} --set-pinning=-1 --get-pinning + +${BHYVECTL} --vm=${VMNAME} --destroy diff --git a/usr.sbin/vmmctl/Makefile b/usr.sbin/vmmctl/Makefile deleted file mode 100644 index a1654df..0000000 --- a/usr.sbin/vmmctl/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -# -# $FreeBSD$ -# - -PROG= vmmctl -SRCS= vmmctl.c - -NO_MAN= - -DPADD= ${LIBVMMAPI} -LDADD= -lvmmapi - -WARNS?= 3 - -CFLAGS+= -I${.CURDIR}/../../sys/amd64/vmm - -.include diff --git a/usr.sbin/vmmctl/sample.sh b/usr.sbin/vmmctl/sample.sh deleted file mode 100755 index f38d0da..0000000 --- a/usr.sbin/vmmctl/sample.sh +++ /dev/null @@ -1,75 +0,0 @@ -#!/bin/sh - -# $FreeBSD$ - -VMMCTL="sudo ./vmmctl" -VMNAME=sample - -${VMMCTL} --vm=${VMNAME} --create -${VMMCTL} --vm=${VMNAME} --set-lowmem=128 --set-highmem=256 -${VMMCTL} --vm=${VMNAME} --get-lowmem --get-highmem - -CR0_PE=$((1 << 0)) -CR0_PG=$((1 << 31)) -CR0=$(($CR0_PE | $CR0_PG)) -${VMMCTL} --vm=${VMNAME} --set-cr0=${CR0} --get-cr0 - -# XXX this is bogus the value of %cr3 should come from the loader -CR3=0 -${VMMCTL} --vm=${VMNAME} --set-cr3=${CR3} --get-cr3 - -CR4_PAE=$((1 << 5)) -CR4=$((${CR4_PAE})) -${VMMCTL} --vm=${VMNAME} --set-cr4=${CR4} --get-cr4 - -DR7=0x00000400 # Table 9-1 from Intel Architecture Manual 3A -${VMMCTL} --vm=${VMNAME} --set-dr7=${DR7} --get-dr7 - -# -# XXX the values of rsp and rip are bogus and should come from the loader. -# -RSP=0xa5a5a5a5 -RIP=0x0000bfbfbfbf0000 -RFLAGS=0x2 -${VMMCTL} --vm=${VMNAME} --set-rsp=${RSP} --get-rsp -${VMMCTL} --vm=${VMNAME} --set-rip=${RIP} --get-rip -${VMMCTL} --vm=${VMNAME} --set-rflags=${RFLAGS} --get-rflags - -# Set "hidden" state of %cs descriptor to indicate long mode code segment. -# -# Note that this should match the contents of the entry pointed to by the -# segment selector in the GDTR. -# -${VMMCTL} --vm=${VMNAME} --set-desc-cs --desc-access=0x00002098 --get-desc-cs - -# Set "hidden" state of all data descriptors to indicate a usable segment. -# The only useful fields are the "Present" and "Descriptor Type" bits. -${VMMCTL} --vm=${VMNAME} --set-desc-ds --desc-access=0x00000090 --get-desc-ds -${VMMCTL} --vm=${VMNAME} --set-desc-es --desc-access=0x00000090 --get-desc-es -${VMMCTL} --vm=${VMNAME} --set-desc-fs --desc-access=0x00000090 --get-desc-fs -${VMMCTL} --vm=${VMNAME} --set-desc-gs --desc-access=0x00000090 --get-desc-gs -${VMMCTL} --vm=${VMNAME} --set-desc-ss --desc-access=0x00000090 --get-desc-ss - -# -# Set the code segment selector to point to entry at offset 8 in the GDTR. -# -${VMMCTL} --vm=${VMNAME} --set-cs=0x0008 --get-cs - -# Set all the remaining data segment selectors to point to entry at offset -# 16 in the GDTR. -${VMMCTL} --vm=${VMNAME} --set-ds=0x0010 --get-ds -${VMMCTL} --vm=${VMNAME} --set-es=0x0010 --get-es -${VMMCTL} --vm=${VMNAME} --set-fs=0x0010 --get-fs -${VMMCTL} --vm=${VMNAME} --set-gs=0x0010 --get-gs -${VMMCTL} --vm=${VMNAME} --set-ss=0x0010 --get-ss - -# XXX the value of the GDTR should come from the loader. -# Set the GDTR -GDTR_BASE=0xffff0000 -GDTR_LIMIT=0x10 -${VMMCTL} --vm=${VMNAME} --set-desc-gdtr --desc-base=${GDTR_BASE} --desc-limit=${GDTR_LIMIT} --get-desc-gdtr - -${VMMCTL} --vm=${VMNAME} --set-pinning=0 --get-pinning -${VMMCTL} --vm=${VMNAME} --set-pinning=-1 --get-pinning - -${VMMCTL} --vm=${VMNAME} --destroy diff --git a/usr.sbin/vmmctl/vmmctl.c b/usr.sbin/vmmctl/vmmctl.c deleted file mode 100644 index d5e0503..0000000 --- a/usr.sbin/vmmctl/vmmctl.c +++ /dev/null @@ -1,1524 +0,0 @@ -/*- - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include -__FBSDID("$FreeBSD$"); - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "intel/vmcs.h" - -#define MB (1UL << 20) -#define GB (1UL << 30) - -#define REQ_ARG required_argument -#define NO_ARG no_argument -#define OPT_ARG optional_argument - -static const char *progname; - -static void -usage(void) -{ - - (void)fprintf(stderr, - "Usage: %s --vm=\n" - " [--cpu=]\n" - " [--create]\n" - " [--destroy]\n" - " [--get-all]\n" - " [--get-stats]\n" - " [--set-desc-ds]\n" - " [--get-desc-ds]\n" - " [--set-desc-es]\n" - " [--get-desc-es]\n" - " [--set-desc-gs]\n" - " [--get-desc-gs]\n" - " [--set-desc-fs]\n" - " [--get-desc-fs]\n" - " [--set-desc-cs]\n" - " [--get-desc-cs]\n" - " [--set-desc-ss]\n" - " [--get-desc-ss]\n" - " [--set-desc-tr]\n" - " [--get-desc-tr]\n" - " [--set-desc-ldtr]\n" - " [--get-desc-ldtr]\n" - " [--set-desc-gdtr]\n" - " [--get-desc-gdtr]\n" - " [--set-desc-idtr]\n" - " [--get-desc-idtr]\n" - " [--run]\n" - " [--capname=]\n" - " [--getcap]\n" - " [--setcap=<0|1>]\n" - " [--desc-base=]\n" - " [--desc-limit=]\n" - " [--desc-access=]\n" - " [--set-cr0=]\n" - " [--get-cr0]\n" - " [--set-cr3=]\n" - " [--get-cr3]\n" - " [--set-cr4=]\n" - " [--get-cr4]\n" - " [--set-dr7=]\n" - " [--get-dr7]\n" - " [--set-rsp=]\n" - " [--get-rsp]\n" - " [--set-rip=]\n" - " [--get-rip]\n" - " [--get-rax]\n" - " [--set-rax=]\n" - " [--get-rbx]\n" - " [--get-rcx]\n" - " [--get-rdx]\n" - " [--get-rsi]\n" - " [--get-rdi]\n" - " [--get-rbp]\n" - " [--get-r8]\n" - " [--get-r9]\n" - " [--get-r10]\n" - " [--get-r11]\n" - " [--get-r12]\n" - " [--get-r13]\n" - " [--get-r14]\n" - " [--get-r15]\n" - " [--set-rflags=]\n" - " [--get-rflags]\n" - " [--set-cs]\n" - " [--get-cs]\n" - " [--set-ds]\n" - " [--get-ds]\n" - " [--set-es]\n" - " [--get-es]\n" - " [--set-fs]\n" - " [--get-fs]\n" - " [--set-gs]\n" - " [--get-gs]\n" - " [--set-ss]\n" - " [--get-ss]\n" - " [--get-tr]\n" - " [--get-ldtr]\n" - " [--get-vmcs-pinbased-ctls]\n" - " [--get-vmcs-procbased-ctls]\n" - " [--get-vmcs-procbased-ctls2]\n" - " [--get-vmcs-entry-interruption-info]\n" - " [--set-vmcs-entry-interruption-info=]\n" - " [--get-vmcs-eptp]\n" - " [--get-vmcs-guest-physical-address\n" - " [--get-vmcs-guest-linear-address\n" - " [--set-vmcs-exception-bitmap]\n" - " [--get-vmcs-exception-bitmap]\n" - " [--get-vmcs-io-bitmap-address]\n" - " [--get-vmcs-tsc-offset]\n" - " [--get-vmcs-guest-pat]\n" - " [--get-vmcs-host-pat]\n" - " [--get-vmcs-host-cr0]\n" - " [--get-vmcs-host-cr3]\n" - " [--get-vmcs-host-cr4]\n" - " [--get-vmcs-host-rip]\n" - " [--get-vmcs-host-rsp]\n" - " [--get-vmcs-cr0-mask]\n" - " [--get-vmcs-cr0-shadow]\n" - " [--get-vmcs-cr4-mask]\n" - " [--get-vmcs-cr4-shadow]\n" - " [--get-vmcs-cr3-targets]\n" - " [--get-vmcs-apic-access-address]\n" - " [--get-vmcs-virtual-apic-address]\n" - " [--get-vmcs-tpr-threshold]\n" - " [--get-vmcs-msr-bitmap]\n" - " [--get-vmcs-msr-bitmap-address]\n" - " [--get-vmcs-vpid]\n" - " [--get-vmcs-ple-gap]\n" - " [--get-vmcs-ple-window]\n" - " [--get-vmcs-instruction-error]\n" - " [--get-vmcs-exit-ctls]\n" - " [--get-vmcs-entry-ctls]\n" - " [--get-vmcs-guest-sysenter]\n" - " [--get-vmcs-link]\n" - " [--get-vmcs-exit-reason]\n" - " [--get-vmcs-exit-qualification]\n" - " [--get-vmcs-exit-interruption-info]\n" - " [--get-vmcs-exit-interruption-error]\n" - " [--get-vmcs-interruptibility]\n" - " [--set-pinning=]\n" - " [--get-pinning]\n" - " [--set-x2apic-state=]\n" - " [--get-x2apic-state]\n" - " [--set-lowmem=]\n" - " [--get-lowmem]\n" - " [--set-highmem=]\n" - " [--get-highmem]\n", - progname); - exit(1); -} - -static int get_stats, getcap, setcap, capval; -static const char *capname; -static int create, destroy, get_lowmem, get_highmem; -static uint64_t lowmem, highmem; -static int set_cr0, get_cr0, set_cr3, get_cr3, set_cr4, get_cr4; -static int set_efer, get_efer; -static int set_dr7, get_dr7; -static int set_rsp, get_rsp, set_rip, get_rip, set_rflags, get_rflags; -static int set_rax, get_rax; -static int get_rbx, get_rcx, get_rdx, get_rsi, get_rdi, get_rbp; -static int get_r8, get_r9, get_r10, get_r11, get_r12, get_r13, get_r14, get_r15; -static int set_desc_ds, get_desc_ds; -static int set_desc_es, get_desc_es; -static int set_desc_fs, get_desc_fs; -static int set_desc_gs, get_desc_gs; -static int set_desc_cs, get_desc_cs; -static int set_desc_ss, get_desc_ss; -static int set_desc_gdtr, get_desc_gdtr; -static int set_desc_idtr, get_desc_idtr; -static int set_desc_tr, get_desc_tr; -static int set_desc_ldtr, get_desc_ldtr; -static int set_cs, set_ds, set_es, set_fs, set_gs, set_ss, set_tr, set_ldtr; -static int get_cs, get_ds, get_es, get_fs, get_gs, get_ss, get_tr, get_ldtr; -static int set_pinning, get_pinning, pincpu; -static int set_x2apic_state, get_x2apic_state; -enum x2apic_state x2apic_state; -static int run; - -/* - * VMCS-specific fields - */ -static int get_pinbased_ctls, get_procbased_ctls, get_procbased_ctls2; -static int get_eptp, get_io_bitmap, get_tsc_offset; -static int get_vmcs_entry_interruption_info, set_vmcs_entry_interruption_info; -static int get_vmcs_interruptibility; -uint32_t vmcs_entry_interruption_info; -static int get_vmcs_gpa, get_vmcs_gla; -static int get_exception_bitmap, set_exception_bitmap, exception_bitmap; -static int get_cr0_mask, get_cr0_shadow; -static int get_cr4_mask, get_cr4_shadow; -static int get_cr3_targets; -static int get_apic_access_addr, get_virtual_apic_addr, get_tpr_threshold; -static int get_msr_bitmap, get_msr_bitmap_address; -static int get_vpid, get_ple_gap, get_ple_window; -static int get_inst_err, get_exit_ctls, get_entry_ctls; -static int get_host_cr0, get_host_cr3, get_host_cr4; -static int get_host_rip, get_host_rsp; -static int get_guest_pat, get_host_pat; -static int get_guest_sysenter, get_vmcs_link; -static int get_vmcs_exit_reason, get_vmcs_exit_qualification; -static int get_vmcs_exit_interruption_info, get_vmcs_exit_interruption_error; - -static uint64_t desc_base; -static uint32_t desc_limit, desc_access; - -static int get_all; - -static void -dump_vm_run_exitcode(struct vm_exit *vmexit, int vcpu) -{ - printf("vm exit[%d]\n", vcpu); - printf("\trip\t\t0x%016lx\n", vmexit->rip); - printf("\tinst_length\t%d\n", vmexit->inst_length); - switch (vmexit->exitcode) { - case VM_EXITCODE_INOUT: - printf("\treason\t\tINOUT\n"); - printf("\tdirection\t%s\n", vmexit->u.inout.in ? "IN" : "OUT"); - printf("\tbytes\t\t%d\n", vmexit->u.inout.bytes); - printf("\tflags\t\t%s%s\n", - vmexit->u.inout.string ? "STRING " : "", - vmexit->u.inout.rep ? "REP " : ""); - printf("\tport\t\t0x%04x\n", vmexit->u.inout.port); - printf("\teax\t\t0x%08x\n", vmexit->u.inout.eax); - break; - case VM_EXITCODE_VMX: - printf("\treason\t\tVMX\n"); - printf("\terror\t\t%d\n", vmexit->u.vmx.error); - printf("\texit_reason\t0x%08x (%u)\n", - vmexit->u.vmx.exit_reason, vmexit->u.vmx.exit_reason); - printf("\tqualification\t0x%016lx\n", - vmexit->u.vmx.exit_qualification); - break; - default: - printf("*** unknown vm run exitcode %d\n", vmexit->exitcode); - break; - } -} - -static int -dump_vmcs_msr_bitmap(int vcpu, u_long addr) -{ - int error, fd, byte, bit, readable, writeable; - u_int msr; - const char *bitmap; - - error = -1; - bitmap = MAP_FAILED; - - fd = open("/dev/mem", O_RDONLY, 0); - if (fd < 0) - goto done; - - bitmap = mmap(NULL, PAGE_SIZE, PROT_READ, 0, fd, addr); - if (bitmap == MAP_FAILED) - goto done; - - for (msr = 0; msr < 0x2000; msr++) { - byte = msr / 8; - bit = msr & 0x7; - - /* Look at MSRs in the range 0x00000000 to 0x00001FFF */ - readable = (bitmap[byte] & (1 << bit)) ? 0 : 1; - writeable = (bitmap[2048 + byte] & (1 << bit)) ? 0 : 1; - if (readable || writeable) { - printf("msr 0x%08x[%d]\t\t%c%c\n", msr, vcpu, - readable ? 'R' : '-', - writeable ? 'W' : '-'); - } - - /* Look at MSRs in the range 0xC0000000 to 0xC0001FFF */ - byte += 1024; - readable = (bitmap[byte] & (1 << bit)) ? 0 : 1; - writeable = (bitmap[2048 + byte] & (1 << bit)) ? 0 : 1; - if (readable || writeable) { - printf("msr 0x%08x[%d]\t\t%c%c\n", - 0xc0000000 + msr, vcpu, - readable ? 'R' : '-', - writeable ? 'W' : '-'); - } - } - - error = 0; -done: - if (bitmap != MAP_FAILED) - munmap((void *)bitmap, PAGE_SIZE); - if (fd >= 0) - close(fd); - return (error); -} - -static int -vm_get_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t *ret_val) -{ - - return (vm_get_register(ctx, vcpu, VMCS_IDENT(field), ret_val)); -} - -static int -vm_set_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t val) -{ - - return (vm_set_register(ctx, vcpu, VMCS_IDENT(field), val)); -} - -enum { - VMNAME = 1000, /* avoid collision with return values from getopt */ - VCPU, - SET_LOWMEM, - SET_HIGHMEM, - SET_EFER, - SET_CR0, - SET_CR3, - SET_CR4, - SET_DR7, - SET_RSP, - SET_RIP, - SET_RAX, - SET_RFLAGS, - DESC_BASE, - DESC_LIMIT, - DESC_ACCESS, - SET_CS, - SET_DS, - SET_ES, - SET_FS, - SET_GS, - SET_SS, - SET_TR, - SET_LDTR, - SET_PINNING, - SET_X2APIC_STATE, - SET_VMCS_EXCEPTION_BITMAP, - SET_VMCS_ENTRY_INTERRUPTION_INFO, - SET_CAP, - CAPNAME, -}; - -int -main(int argc, char *argv[]) -{ - char *vmname; - int error, ch, vcpu; - vm_paddr_t gpa; - size_t len; - struct vm_exit vmexit; - uint64_t ctl, eptp, bm, addr, u64; - struct vmctx *ctx; - - uint64_t cr0, cr3, cr4, dr7, rsp, rip, rflags, efer, pat; - uint64_t rax, rbx, rcx, rdx, rsi, rdi, rbp; - uint64_t r8, r9, r10, r11, r12, r13, r14, r15; - uint64_t cs, ds, es, fs, gs, ss, tr, ldtr; - - struct option opts[] = { - { "vm", REQ_ARG, 0, VMNAME }, - { "cpu", REQ_ARG, 0, VCPU }, - { "set-lowmem", REQ_ARG, 0, SET_LOWMEM }, - { "set-highmem",REQ_ARG, 0, SET_HIGHMEM }, - { "set-efer", REQ_ARG, 0, SET_EFER }, - { "set-cr0", REQ_ARG, 0, SET_CR0 }, - { "set-cr3", REQ_ARG, 0, SET_CR3 }, - { "set-cr4", REQ_ARG, 0, SET_CR4 }, - { "set-dr7", REQ_ARG, 0, SET_DR7 }, - { "set-rsp", REQ_ARG, 0, SET_RSP }, - { "set-rip", REQ_ARG, 0, SET_RIP }, - { "set-rax", REQ_ARG, 0, SET_RAX }, - { "set-rflags", REQ_ARG, 0, SET_RFLAGS }, - { "desc-base", REQ_ARG, 0, DESC_BASE }, - { "desc-limit", REQ_ARG, 0, DESC_LIMIT }, - { "desc-access",REQ_ARG, 0, DESC_ACCESS }, - { "set-cs", REQ_ARG, 0, SET_CS }, - { "set-ds", REQ_ARG, 0, SET_DS }, - { "set-es", REQ_ARG, 0, SET_ES }, - { "set-fs", REQ_ARG, 0, SET_FS }, - { "set-gs", REQ_ARG, 0, SET_GS }, - { "set-ss", REQ_ARG, 0, SET_SS }, - { "set-tr", REQ_ARG, 0, SET_TR }, - { "set-ldtr", REQ_ARG, 0, SET_LDTR }, - { "set-pinning",REQ_ARG, 0, SET_PINNING }, - { "set-x2apic-state",REQ_ARG, 0, SET_X2APIC_STATE }, - { "set-vmcs-exception-bitmap", - REQ_ARG, 0, SET_VMCS_EXCEPTION_BITMAP }, - { "set-vmcs-entry-interruption-info", - REQ_ARG, 0, SET_VMCS_ENTRY_INTERRUPTION_INFO }, - { "capname", REQ_ARG, 0, CAPNAME }, - { "setcap", REQ_ARG, 0, SET_CAP }, - { "getcap", NO_ARG, &getcap, 1 }, - { "get-stats", NO_ARG, &get_stats, 1 }, - { "get-desc-ds",NO_ARG, &get_desc_ds, 1 }, - { "set-desc-ds",NO_ARG, &set_desc_ds, 1 }, - { "get-desc-es",NO_ARG, &get_desc_es, 1 }, - { "set-desc-es",NO_ARG, &set_desc_es, 1 }, - { "get-desc-ss",NO_ARG, &get_desc_ss, 1 }, - { "set-desc-ss",NO_ARG, &set_desc_ss, 1 }, - { "get-desc-cs",NO_ARG, &get_desc_cs, 1 }, - { "set-desc-cs",NO_ARG, &set_desc_cs, 1 }, - { "get-desc-fs",NO_ARG, &get_desc_fs, 1 }, - { "set-desc-fs",NO_ARG, &set_desc_fs, 1 }, - { "get-desc-gs",NO_ARG, &get_desc_gs, 1 }, - { "set-desc-gs",NO_ARG, &set_desc_gs, 1 }, - { "get-desc-tr",NO_ARG, &get_desc_tr, 1 }, - { "set-desc-tr",NO_ARG, &set_desc_tr, 1 }, - { "set-desc-ldtr", NO_ARG, &set_desc_ldtr, 1 }, - { "get-desc-ldtr", NO_ARG, &get_desc_ldtr, 1 }, - { "set-desc-gdtr", NO_ARG, &set_desc_gdtr, 1 }, - { "get-desc-gdtr", NO_ARG, &get_desc_gdtr, 1 }, - { "set-desc-idtr", NO_ARG, &set_desc_idtr, 1 }, - { "get-desc-idtr", NO_ARG, &get_desc_idtr, 1 }, - { "get-lowmem", NO_ARG, &get_lowmem, 1 }, - { "get-highmem",NO_ARG, &get_highmem, 1 }, - { "get-efer", NO_ARG, &get_efer, 1 }, - { "get-cr0", NO_ARG, &get_cr0, 1 }, - { "get-cr3", NO_ARG, &get_cr3, 1 }, - { "get-cr4", NO_ARG, &get_cr4, 1 }, - { "get-dr7", NO_ARG, &get_dr7, 1 }, - { "get-rsp", NO_ARG, &get_rsp, 1 }, - { "get-rip", NO_ARG, &get_rip, 1 }, - { "get-rax", NO_ARG, &get_rax, 1 }, - { "get-rbx", NO_ARG, &get_rbx, 1 }, - { "get-rcx", NO_ARG, &get_rcx, 1 }, - { "get-rdx", NO_ARG, &get_rdx, 1 }, - { "get-rsi", NO_ARG, &get_rsi, 1 }, - { "get-rdi", NO_ARG, &get_rdi, 1 }, - { "get-rbp", NO_ARG, &get_rbp, 1 }, - { "get-r8", NO_ARG, &get_r8, 1 }, - { "get-r9", NO_ARG, &get_r9, 1 }, - { "get-r10", NO_ARG, &get_r10, 1 }, - { "get-r11", NO_ARG, &get_r11, 1 }, - { "get-r12", NO_ARG, &get_r12, 1 }, - { "get-r13", NO_ARG, &get_r13, 1 }, - { "get-r14", NO_ARG, &get_r14, 1 }, - { "get-r15", NO_ARG, &get_r15, 1 }, - { "get-rflags", NO_ARG, &get_rflags, 1 }, - { "get-cs", NO_ARG, &get_cs, 1 }, - { "get-ds", NO_ARG, &get_ds, 1 }, - { "get-es", NO_ARG, &get_es, 1 }, - { "get-fs", NO_ARG, &get_fs, 1 }, - { "get-gs", NO_ARG, &get_gs, 1 }, - { "get-ss", NO_ARG, &get_ss, 1 }, - { "get-tr", NO_ARG, &get_tr, 1 }, - { "get-ldtr", NO_ARG, &get_ldtr, 1 }, - { "get-vmcs-pinbased-ctls", - NO_ARG, &get_pinbased_ctls, 1 }, - { "get-vmcs-procbased-ctls", - NO_ARG, &get_procbased_ctls, 1 }, - { "get-vmcs-procbased-ctls2", - NO_ARG, &get_procbased_ctls2, 1 }, - { "get-vmcs-guest-linear-address", - NO_ARG, &get_vmcs_gla, 1 }, - { "get-vmcs-guest-physical-address", - NO_ARG, &get_vmcs_gpa, 1 }, - { "get-vmcs-entry-interruption-info", - NO_ARG, &get_vmcs_entry_interruption_info, 1}, - { "get-vmcs-eptp", NO_ARG, &get_eptp, 1 }, - { "get-vmcs-exception-bitmap", - NO_ARG, &get_exception_bitmap, 1 }, - { "get-vmcs-io-bitmap-address", - NO_ARG, &get_io_bitmap, 1 }, - { "get-vmcs-tsc-offset", NO_ARG,&get_tsc_offset, 1 }, - { "get-vmcs-cr0-mask", NO_ARG, &get_cr0_mask, 1 }, - { "get-vmcs-cr0-shadow", NO_ARG,&get_cr0_shadow, 1 }, - { "get-vmcs-cr4-mask", NO_ARG, &get_cr4_mask, 1 }, - { "get-vmcs-cr4-shadow", NO_ARG,&get_cr4_shadow, 1 }, - { "get-vmcs-cr3-targets", NO_ARG, &get_cr3_targets, 1}, - { "get-vmcs-apic-access-address", - NO_ARG, &get_apic_access_addr, 1}, - { "get-vmcs-virtual-apic-address", - NO_ARG, &get_virtual_apic_addr, 1}, - { "get-vmcs-tpr-threshold", - NO_ARG, &get_tpr_threshold, 1 }, - { "get-vmcs-msr-bitmap", - NO_ARG, &get_msr_bitmap, 1 }, - { "get-vmcs-msr-bitmap-address", - NO_ARG, &get_msr_bitmap_address, 1 }, - { "get-vmcs-vpid", NO_ARG, &get_vpid, 1 }, - { "get-vmcs-ple-gap", NO_ARG, &get_ple_gap, 1 }, - { "get-vmcs-ple-window", NO_ARG,&get_ple_window,1 }, - { "get-vmcs-instruction-error", - NO_ARG, &get_inst_err, 1 }, - { "get-vmcs-exit-ctls", NO_ARG, &get_exit_ctls, 1 }, - { "get-vmcs-entry-ctls", - NO_ARG, &get_entry_ctls, 1 }, - { "get-vmcs-guest-pat", NO_ARG, &get_guest_pat, 1 }, - { "get-vmcs-host-pat", NO_ARG, &get_host_pat, 1 }, - { "get-vmcs-host-cr0", - NO_ARG, &get_host_cr0, 1 }, - { "get-vmcs-host-cr3", - NO_ARG, &get_host_cr3, 1 }, - { "get-vmcs-host-cr4", - NO_ARG, &get_host_cr4, 1 }, - { "get-vmcs-host-rip", - NO_ARG, &get_host_rip, 1 }, - { "get-vmcs-host-rsp", - NO_ARG, &get_host_rsp, 1 }, - { "get-vmcs-guest-sysenter", - NO_ARG, &get_guest_sysenter, 1 }, - { "get-vmcs-link", NO_ARG, &get_vmcs_link, 1 }, - { "get-vmcs-exit-reason", - NO_ARG, &get_vmcs_exit_reason, 1 }, - { "get-vmcs-exit-qualification", - NO_ARG, &get_vmcs_exit_qualification, 1 }, - { "get-vmcs-exit-interruption-info", - NO_ARG, &get_vmcs_exit_interruption_info, 1}, - { "get-vmcs-exit-interruption-error", - NO_ARG, &get_vmcs_exit_interruption_error, 1}, - { "get-vmcs-interruptibility", - NO_ARG, &get_vmcs_interruptibility, 1 }, - { "get-pinning",NO_ARG, &get_pinning, 1 }, - { "get-x2apic-state",NO_ARG, &get_x2apic_state, 1 }, - { "get-all", NO_ARG, &get_all, 1 }, - { "run", NO_ARG, &run, 1 }, - { "create", NO_ARG, &create, 1 }, - { "destroy", NO_ARG, &destroy, 1 }, - { NULL, 0, NULL, 0 } - }; - - vcpu = 0; - progname = basename(argv[0]); - - while ((ch = getopt_long(argc, argv, "", opts, NULL)) != -1) { - switch (ch) { - case 0: - break; - case VMNAME: - vmname = optarg; - break; - case VCPU: - vcpu = atoi(optarg); - break; - case SET_LOWMEM: - lowmem = atoi(optarg) * MB; - lowmem = roundup(lowmem, 2 * MB); - break; - case SET_HIGHMEM: - highmem = atoi(optarg) * MB; - highmem = roundup(highmem, 2 * MB); - break; - case SET_EFER: - efer = strtoul(optarg, NULL, 0); - set_efer = 1; - break; - case SET_CR0: - cr0 = strtoul(optarg, NULL, 0); - set_cr0 = 1; - break; - case SET_CR3: - cr3 = strtoul(optarg, NULL, 0); - set_cr3 = 1; - break; - case SET_CR4: - cr4 = strtoul(optarg, NULL, 0); - set_cr4 = 1; - break; - case SET_DR7: - dr7 = strtoul(optarg, NULL, 0); - set_dr7 = 1; - break; - case SET_RSP: - rsp = strtoul(optarg, NULL, 0); - set_rsp = 1; - break; - case SET_RIP: - rip = strtoul(optarg, NULL, 0); - set_rip = 1; - break; - case SET_RAX: - rax = strtoul(optarg, NULL, 0); - set_rax = 1; - break; - case SET_RFLAGS: - rflags = strtoul(optarg, NULL, 0); - set_rflags = 1; - break; - case DESC_BASE: - desc_base = strtoul(optarg, NULL, 0); - break; - case DESC_LIMIT: - desc_limit = strtoul(optarg, NULL, 0); - break; - case DESC_ACCESS: - desc_access = strtoul(optarg, NULL, 0); - break; - case SET_CS: - cs = strtoul(optarg, NULL, 0); - set_cs = 1; - break; - case SET_DS: - ds = strtoul(optarg, NULL, 0); - set_ds = 1; - break; - case SET_ES: - es = strtoul(optarg, NULL, 0); - set_es = 1; - break; - case SET_FS: - fs = strtoul(optarg, NULL, 0); - set_fs = 1; - break; - case SET_GS: - gs = strtoul(optarg, NULL, 0); - set_gs = 1; - break; - case SET_SS: - ss = strtoul(optarg, NULL, 0); - set_ss = 1; - break; - case SET_TR: - tr = strtoul(optarg, NULL, 0); - set_tr = 1; - break; - case SET_LDTR: - ldtr = strtoul(optarg, NULL, 0); - set_ldtr = 1; - break; - case SET_PINNING: - pincpu = strtol(optarg, NULL, 0); - set_pinning = 1; - break; - case SET_X2APIC_STATE: - x2apic_state = strtol(optarg, NULL, 0); - set_x2apic_state = 1; - break; - case SET_VMCS_EXCEPTION_BITMAP: - exception_bitmap = strtoul(optarg, NULL, 0); - set_exception_bitmap = 1; - break; - case SET_VMCS_ENTRY_INTERRUPTION_INFO: - vmcs_entry_interruption_info = strtoul(optarg, NULL, 0); - set_vmcs_entry_interruption_info = 1; - break; - case SET_CAP: - capval = strtoul(optarg, NULL, 0); - setcap = 1; - break; - case CAPNAME: - capname = optarg; - break; - default: - usage(); - } - } - argc -= optind; - argv += optind; - - if (vmname == NULL) - usage(); - - error = 0; - - if (!error && create) - error = vm_create(vmname); - - if (!error) { - ctx = vm_open(vmname); - if (ctx == NULL) - error = -1; - } - - if (!error && lowmem) - error = vm_setup_memory(ctx, 0, lowmem, NULL); - - if (!error && highmem) - error = vm_setup_memory(ctx, 4 * GB, highmem, NULL); - - if (!error && set_efer) - error = vm_set_register(ctx, vcpu, VM_REG_GUEST_EFER, efer); - - if (!error && set_cr0) - error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR0, cr0); - - if (!error && set_cr3) - error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR3, cr3); - - if (!error && set_cr4) - error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR4, cr4); - - if (!error && set_dr7) - error = vm_set_register(ctx, vcpu, VM_REG_GUEST_DR7, dr7); - - if (!error && set_rsp) - error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RSP, rsp); - - if (!error && set_rip) - error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, rip); - - if (!error && set_rax) - error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, rax); - - if (!error && set_rflags) { - error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RFLAGS, - rflags); - } - - if (!error && set_desc_ds) { - error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_DS, - desc_base, desc_limit, desc_access); - } - - if (!error && set_desc_es) { - error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_ES, - desc_base, desc_limit, desc_access); - } - - if (!error && set_desc_ss) { - error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_SS, - desc_base, desc_limit, desc_access); - } - - if (!error && set_desc_cs) { - error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_CS, - desc_base, desc_limit, desc_access); - } - - if (!error && set_desc_fs) { - error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_FS, - desc_base, desc_limit, desc_access); - } - - if (!error && set_desc_gs) { - error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_GS, - desc_base, desc_limit, desc_access); - } - - if (!error && set_desc_tr) { - error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_TR, - desc_base, desc_limit, desc_access); - } - - if (!error && set_desc_ldtr) { - error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_LDTR, - desc_base, desc_limit, desc_access); - } - - if (!error && set_desc_gdtr) { - error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_GDTR, - desc_base, desc_limit, 0); - } - - if (!error && set_desc_idtr) { - error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_IDTR, - desc_base, desc_limit, 0); - } - - if (!error && set_cs) - error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CS, cs); - - if (!error && set_ds) - error = vm_set_register(ctx, vcpu, VM_REG_GUEST_DS, ds); - - if (!error && set_es) - error = vm_set_register(ctx, vcpu, VM_REG_GUEST_ES, es); - - if (!error && set_fs) - error = vm_set_register(ctx, vcpu, VM_REG_GUEST_FS, fs); - - if (!error && set_gs) - error = vm_set_register(ctx, vcpu, VM_REG_GUEST_GS, gs); - - if (!error && set_ss) - error = vm_set_register(ctx, vcpu, VM_REG_GUEST_SS, ss); - - if (!error && set_tr) - error = vm_set_register(ctx, vcpu, VM_REG_GUEST_TR, tr); - - if (!error && set_ldtr) - error = vm_set_register(ctx, vcpu, VM_REG_GUEST_LDTR, ldtr); - - if (!error && set_pinning) - error = vm_set_pinning(ctx, vcpu, pincpu); - - if (!error && set_x2apic_state) - error = vm_set_x2apic_state(ctx, vcpu, x2apic_state); - - if (!error && set_exception_bitmap) { - error = vm_set_vmcs_field(ctx, vcpu, VMCS_EXCEPTION_BITMAP, - exception_bitmap); - } - - if (!error && set_vmcs_entry_interruption_info) { - error = vm_set_vmcs_field(ctx, vcpu, VMCS_ENTRY_INTR_INFO, - vmcs_entry_interruption_info); - } - - if (!error && (get_lowmem || get_all)) { - gpa = 0; - error = vm_get_memory_seg(ctx, gpa, &len); - if (error == 0) - printf("lowmem\t\t0x%016lx/%ld\n", gpa, len); - } - - if (!error && (get_highmem || get_all)) { - gpa = 4 * GB; - error = vm_get_memory_seg(ctx, gpa, &len); - if (error == 0) - printf("highmem\t\t0x%016lx/%ld\n", gpa, len); - } - - if (!error && (get_efer || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_EFER, &efer); - if (error == 0) - printf("efer[%d]\t\t0x%016lx\n", vcpu, efer); - } - - if (!error && (get_cr0 || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR0, &cr0); - if (error == 0) - printf("cr0[%d]\t\t0x%016lx\n", vcpu, cr0); - } - - if (!error && (get_cr3 || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR3, &cr3); - if (error == 0) - printf("cr3[%d]\t\t0x%016lx\n", vcpu, cr3); - } - - if (!error && (get_cr4 || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR4, &cr4); - if (error == 0) - printf("cr4[%d]\t\t0x%016lx\n", vcpu, cr4); - } - - if (!error && (get_dr7 || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DR7, &dr7); - if (error == 0) - printf("dr7[%d]\t\t0x%016lx\n", vcpu, dr7); - } - - if (!error && (get_rsp || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RSP, &rsp); - if (error == 0) - printf("rsp[%d]\t\t0x%016lx\n", vcpu, rsp); - } - - if (!error && (get_rip || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip); - if (error == 0) - printf("rip[%d]\t\t0x%016lx\n", vcpu, rip); - } - - if (!error && (get_rax || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RAX, &rax); - if (error == 0) - printf("rax[%d]\t\t0x%016lx\n", vcpu, rax); - } - - if (!error && (get_rbx || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RBX, &rbx); - if (error == 0) - printf("rbx[%d]\t\t0x%016lx\n", vcpu, rbx); - } - - if (!error && (get_rcx || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RCX, &rcx); - if (error == 0) - printf("rcx[%d]\t\t0x%016lx\n", vcpu, rcx); - } - - if (!error && (get_rdx || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RDX, &rdx); - if (error == 0) - printf("rdx[%d]\t\t0x%016lx\n", vcpu, rdx); - } - - if (!error && (get_rsi || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RSI, &rsi); - if (error == 0) - printf("rsi[%d]\t\t0x%016lx\n", vcpu, rsi); - } - - if (!error && (get_rdi || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RDI, &rdi); - if (error == 0) - printf("rdi[%d]\t\t0x%016lx\n", vcpu, rdi); - } - - if (!error && (get_rbp || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RBP, &rbp); - if (error == 0) - printf("rbp[%d]\t\t0x%016lx\n", vcpu, rbp); - } - - if (!error && (get_r8 || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R8, &r8); - if (error == 0) - printf("r8[%d]\t\t0x%016lx\n", vcpu, r8); - } - - if (!error && (get_r9 || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R9, &r9); - if (error == 0) - printf("r9[%d]\t\t0x%016lx\n", vcpu, r9); - } - - if (!error && (get_r10 || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R10, &r10); - if (error == 0) - printf("r10[%d]\t\t0x%016lx\n", vcpu, r10); - } - - if (!error && (get_r11 || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R11, &r11); - if (error == 0) - printf("r11[%d]\t\t0x%016lx\n", vcpu, r11); - } - - if (!error && (get_r12 || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R12, &r12); - if (error == 0) - printf("r12[%d]\t\t0x%016lx\n", vcpu, r12); - } - - if (!error && (get_r13 || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R13, &r13); - if (error == 0) - printf("r13[%d]\t\t0x%016lx\n", vcpu, r13); - } - - if (!error && (get_r14 || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R14, &r14); - if (error == 0) - printf("r14[%d]\t\t0x%016lx\n", vcpu, r14); - } - - if (!error && (get_r15 || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R15, &r15); - if (error == 0) - printf("r15[%d]\t\t0x%016lx\n", vcpu, r15); - } - - if (!error && (get_rflags || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RFLAGS, - &rflags); - if (error == 0) - printf("rflags[%d]\t0x%016lx\n", vcpu, rflags); - } - - if (!error && (get_stats || get_all)) { - int i, num_stats; - uint64_t *stats; - struct timeval tv; - const char *desc; - - stats = vm_get_stats(ctx, vcpu, &tv, &num_stats); - if (stats != NULL) { - printf("vcpu%d\n", vcpu); - for (i = 0; i < num_stats; i++) { - desc = vm_get_stat_desc(ctx, i); - printf("%-32s\t%ld\n", desc, stats[i]); - } - } - } - - if (!error && (get_desc_ds || get_all)) { - error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_DS, - &desc_base, &desc_limit, &desc_access); - if (error == 0) { - printf("ds desc[%d]\t0x%016lx/0x%08x/0x%08x\n", - vcpu, desc_base, desc_limit, desc_access); - } - } - - if (!error && (get_desc_es || get_all)) { - error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_ES, - &desc_base, &desc_limit, &desc_access); - if (error == 0) { - printf("es desc[%d]\t0x%016lx/0x%08x/0x%08x\n", - vcpu, desc_base, desc_limit, desc_access); - } - } - - if (!error && (get_desc_fs || get_all)) { - error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_FS, - &desc_base, &desc_limit, &desc_access); - if (error == 0) { - printf("fs desc[%d]\t0x%016lx/0x%08x/0x%08x\n", - vcpu, desc_base, desc_limit, desc_access); - } - } - - if (!error && (get_desc_gs || get_all)) { - error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_GS, - &desc_base, &desc_limit, &desc_access); - if (error == 0) { - printf("gs desc[%d]\t0x%016lx/0x%08x/0x%08x\n", - vcpu, desc_base, desc_limit, desc_access); - } - } - - if (!error && (get_desc_ss || get_all)) { - error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_SS, - &desc_base, &desc_limit, &desc_access); - if (error == 0) { - printf("ss desc[%d]\t0x%016lx/0x%08x/0x%08x\n", - vcpu, desc_base, desc_limit, desc_access); - } - } - - if (!error && (get_desc_cs || get_all)) { - error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_CS, - &desc_base, &desc_limit, &desc_access); - if (error == 0) { - printf("cs desc[%d]\t0x%016lx/0x%08x/0x%08x\n", - vcpu, desc_base, desc_limit, desc_access); - } - } - - if (!error && (get_desc_tr || get_all)) { - error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_TR, - &desc_base, &desc_limit, &desc_access); - if (error == 0) { - printf("tr desc[%d]\t0x%016lx/0x%08x/0x%08x\n", - vcpu, desc_base, desc_limit, desc_access); - } - } - - if (!error && (get_desc_ldtr || get_all)) { - error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_LDTR, - &desc_base, &desc_limit, &desc_access); - if (error == 0) { - printf("ldtr desc[%d]\t0x%016lx/0x%08x/0x%08x\n", - vcpu, desc_base, desc_limit, desc_access); - } - } - - if (!error && (get_desc_gdtr || get_all)) { - error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_GDTR, - &desc_base, &desc_limit, &desc_access); - if (error == 0) { - printf("gdtr[%d]\t\t0x%016lx/0x%08x\n", - vcpu, desc_base, desc_limit); - } - } - - if (!error && (get_desc_idtr || get_all)) { - error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_IDTR, - &desc_base, &desc_limit, &desc_access); - if (error == 0) { - printf("idtr[%d]\t\t0x%016lx/0x%08x\n", - vcpu, desc_base, desc_limit); - } - } - - if (!error && (get_cs || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CS, &cs); - if (error == 0) - printf("cs[%d]\t\t0x%04lx\n", vcpu, cs); - } - - if (!error && (get_ds || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DS, &ds); - if (error == 0) - printf("ds[%d]\t\t0x%04lx\n", vcpu, ds); - } - - if (!error && (get_es || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_ES, &es); - if (error == 0) - printf("es[%d]\t\t0x%04lx\n", vcpu, es); - } - - if (!error && (get_fs || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_FS, &fs); - if (error == 0) - printf("fs[%d]\t\t0x%04lx\n", vcpu, fs); - } - - if (!error && (get_gs || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_GS, &gs); - if (error == 0) - printf("gs[%d]\t\t0x%04lx\n", vcpu, gs); - } - - if (!error && (get_ss || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_SS, &ss); - if (error == 0) - printf("ss[%d]\t\t0x%04lx\n", vcpu, ss); - } - - if (!error && (get_tr || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_TR, &tr); - if (error == 0) - printf("tr[%d]\t\t0x%04lx\n", vcpu, tr); - } - - if (!error && (get_ldtr || get_all)) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_LDTR, &ldtr); - if (error == 0) - printf("ldtr[%d]\t\t0x%04lx\n", vcpu, ldtr); - } - - if (!error && (get_pinning || get_all)) { - error = vm_get_pinning(ctx, vcpu, &pincpu); - if (error == 0) { - if (pincpu < 0) - printf("pincpu[%d]\tunpinned\n", vcpu); - else - printf("pincpu[%d]\t%d\n", vcpu, pincpu); - } - } - - if (!error && (get_x2apic_state || get_all)) { - error = vm_get_x2apic_state(ctx, vcpu, &x2apic_state); - if (error == 0) - printf("x2apic_state[%d]\t%d\n", vcpu, x2apic_state); - } - - if (!error && (get_pinbased_ctls || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, VMCS_PIN_BASED_CTLS, &ctl); - if (error == 0) - printf("pinbased_ctls[%d]\t0x%08lx\n", vcpu, ctl); - } - - if (!error && (get_procbased_ctls || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, - VMCS_PRI_PROC_BASED_CTLS, &ctl); - if (error == 0) - printf("procbased_ctls[%d]\t0x%08lx\n", vcpu, ctl); - } - - if (!error && (get_procbased_ctls2 || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, - VMCS_SEC_PROC_BASED_CTLS, &ctl); - if (error == 0) - printf("procbased_ctls2[%d]\t0x%08lx\n", vcpu, ctl); - } - - if (!error && (get_vmcs_gla || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, - VMCS_GUEST_LINEAR_ADDRESS, &u64); - if (error == 0) - printf("gla[%d]\t\t0x%016lx\n", vcpu, u64); - } - - if (!error && (get_vmcs_gpa || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, - VMCS_GUEST_PHYSICAL_ADDRESS, &u64); - if (error == 0) - printf("gpa[%d]\t\t0x%016lx\n", vcpu, u64); - } - - if (!error && (get_vmcs_entry_interruption_info || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, VMCS_ENTRY_INTR_INFO,&u64); - if (error == 0) { - printf("entry_interruption_info[%d]\t0x%08lx\n", - vcpu, u64); - } - } - - if (!error && (get_eptp || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, VMCS_EPTP, &eptp); - if (error == 0) - printf("eptp[%d]\t\t0x%016lx\n", vcpu, eptp); - } - - if (!error && (get_exception_bitmap || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXCEPTION_BITMAP, - &bm); - if (error == 0) - printf("exception_bitmap[%d]\t0x%08lx\n", vcpu, bm); - } - - if (!error && (get_io_bitmap || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, VMCS_IO_BITMAP_A, &bm); - if (error == 0) - printf("io_bitmap_a[%d]\t0x%08lx\n", vcpu, bm); - error = vm_get_vmcs_field(ctx, vcpu, VMCS_IO_BITMAP_B, &bm); - if (error == 0) - printf("io_bitmap_b[%d]\t0x%08lx\n", vcpu, bm); - } - - if (!error && (get_tsc_offset || get_all)) { - uint64_t tscoff; - error = vm_get_vmcs_field(ctx, vcpu, VMCS_TSC_OFFSET, &tscoff); - if (error == 0) - printf("tsc_offset[%d]\t0x%016lx\n", vcpu, tscoff); - } - - if (!error && (get_cr0_mask || get_all)) { - uint64_t cr0mask; - error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR0_MASK, &cr0mask); - if (error == 0) - printf("cr0_mask[%d]\t\t0x%016lx\n", vcpu, cr0mask); - } - - if (!error && (get_cr0_shadow || get_all)) { - uint64_t cr0shadow; - error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR0_SHADOW, - &cr0shadow); - if (error == 0) - printf("cr0_shadow[%d]\t\t0x%016lx\n", vcpu, cr0shadow); - } - - if (!error && (get_cr4_mask || get_all)) { - uint64_t cr4mask; - error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR4_MASK, &cr4mask); - if (error == 0) - printf("cr4_mask[%d]\t\t0x%016lx\n", vcpu, cr4mask); - } - - if (!error && (get_cr4_shadow || get_all)) { - uint64_t cr4shadow; - error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR4_SHADOW, - &cr4shadow); - if (error == 0) - printf("cr4_shadow[%d]\t\t0x%016lx\n", vcpu, cr4shadow); - } - - if (!error && (get_cr3_targets || get_all)) { - uint64_t target_count, target_addr; - error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET_COUNT, - &target_count); - if (error == 0) { - printf("cr3_target_count[%d]\t0x%08lx\n", - vcpu, target_count); - } - - error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET0, - &target_addr); - if (error == 0) { - printf("cr3_target0[%d]\t\t0x%016lx\n", - vcpu, target_addr); - } - - error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET1, - &target_addr); - if (error == 0) { - printf("cr3_target1[%d]\t\t0x%016lx\n", - vcpu, target_addr); - } - - error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET2, - &target_addr); - if (error == 0) { - printf("cr3_target2[%d]\t\t0x%016lx\n", - vcpu, target_addr); - } - - error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET3, - &target_addr); - if (error == 0) { - printf("cr3_target3[%d]\t\t0x%016lx\n", - vcpu, target_addr); - } - } - - if (!error && (get_apic_access_addr || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, VMCS_APIC_ACCESS, &addr); - if (error == 0) - printf("apic_access_addr[%d]\t0x%016lx\n", vcpu, addr); - } - - if (!error && (get_virtual_apic_addr || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, VMCS_VIRTUAL_APIC, &addr); - if (error == 0) - printf("virtual_apic_addr[%d]\t0x%016lx\n", vcpu, addr); - } - - if (!error && (get_tpr_threshold || get_all)) { - uint64_t threshold; - error = vm_get_vmcs_field(ctx, vcpu, VMCS_TPR_THRESHOLD, - &threshold); - if (error == 0) - printf("tpr_threshold[%d]\t0x%08lx\n", vcpu, threshold); - } - - if (!error && (get_msr_bitmap_address || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, VMCS_MSR_BITMAP, &addr); - if (error == 0) - printf("msr_bitmap[%d]\t\t0x%016lx\n", vcpu, addr); - } - - if (!error && (get_msr_bitmap || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, VMCS_MSR_BITMAP, &addr); - if (error == 0) - error = dump_vmcs_msr_bitmap(vcpu, addr); - } - - if (!error && (get_vpid || get_all)) { - uint64_t vpid; - error = vm_get_vmcs_field(ctx, vcpu, VMCS_VPID, &vpid); - if (error == 0) - printf("vpid[%d]\t\t0x%04lx\n", vcpu, vpid); - } - - if (!error && (get_ple_window || get_all)) { - uint64_t window; - error = vm_get_vmcs_field(ctx, vcpu, VMCS_PLE_WINDOW, &window); - if (error == 0) - printf("ple_window[%d]\t\t0x%08lx\n", vcpu, window); - } - - if (!error && (get_ple_gap || get_all)) { - uint64_t gap; - error = vm_get_vmcs_field(ctx, vcpu, VMCS_PLE_GAP, &gap); - if (error == 0) - printf("ple_gap[%d]\t\t0x%08lx\n", vcpu, gap); - } - - if (!error && (get_inst_err || get_all)) { - uint64_t insterr; - error = vm_get_vmcs_field(ctx, vcpu, VMCS_INSTRUCTION_ERROR, - &insterr); - if (error == 0) { - printf("instruction_error[%d]\t0x%08lx\n", - vcpu, insterr); - } - } - - if (!error && (get_exit_ctls || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_CTLS, &ctl); - if (error == 0) - printf("exit_ctls[%d]\t\t0x%08lx\n", vcpu, ctl); - } - - if (!error && (get_entry_ctls || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, VMCS_ENTRY_CTLS, &ctl); - if (error == 0) - printf("entry_ctls[%d]\t\t0x%08lx\n", vcpu, ctl); - } - - if (!error && (get_host_pat || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_IA32_PAT, &pat); - if (error == 0) - printf("host_pat[%d]\t\t0x%016lx\n", vcpu, pat); - } - - if (!error && (get_guest_pat || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_IA32_PAT, &pat); - if (error == 0) - printf("guest_pat[%d]\t\t0x%016lx\n", vcpu, pat); - } - - if (!error && (get_host_cr0 || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR0, &cr0); - if (error == 0) - printf("host_cr0[%d]\t\t0x%016lx\n", vcpu, cr0); - } - - if (!error && (get_host_cr3 || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR3, &cr3); - if (error == 0) - printf("host_cr3[%d]\t\t0x%016lx\n", vcpu, cr3); - } - - if (!error && (get_host_cr4 || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR4, &cr4); - if (error == 0) - printf("host_cr4[%d]\t\t0x%016lx\n", vcpu, cr4); - } - - if (!error && (get_host_rip || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_RIP, &rip); - if (error == 0) - printf("host_rip[%d]\t\t0x%016lx\n", vcpu, rip); - } - - if (!error && (get_host_rsp || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_RSP, &rsp); - if (error == 0) - printf("host_rsp[%d]\t\t0x%016lx\n", vcpu, rsp); - } - - if (!error && (get_guest_sysenter || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, - VMCS_GUEST_IA32_SYSENTER_CS, &cs); - if (error == 0) - printf("guest_sysenter_cs[%d]\t0x%08lx\n", vcpu, cs); - - error = vm_get_vmcs_field(ctx, vcpu, - VMCS_GUEST_IA32_SYSENTER_ESP, &rsp); - if (error == 0) - printf("guest_sysenter_sp[%d]\t0x%016lx\n", vcpu, rsp); - error = vm_get_vmcs_field(ctx, vcpu, - VMCS_GUEST_IA32_SYSENTER_EIP, &rip); - if (error == 0) - printf("guest_sysenter_ip[%d]\t0x%016lx\n", vcpu, rip); - } - - if (!error && (get_vmcs_link || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, VMCS_LINK_POINTER, &addr); - if (error == 0) - printf("vmcs_pointer[%d]\t0x%016lx\n", vcpu, addr); - } - - if (!error && (get_vmcs_exit_reason || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_REASON, &u64); - if (error == 0) - printf("vmcs_exit_reason[%d]\t0x%016lx\n", vcpu, u64); - } - - if (!error && (get_vmcs_exit_qualification || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_QUALIFICATION, - &u64); - if (error == 0) - printf("vmcs_exit_qualification[%d]\t0x%016lx\n", - vcpu, u64); - } - - if (!error && (get_vmcs_exit_interruption_info || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, - VMCS_EXIT_INTERRUPTION_INFO, &u64); - if (error == 0) { - printf("vmcs_exit_interruption_info[%d]\t0x%08lx\n", - vcpu, u64); - } - } - - if (!error && (get_vmcs_exit_interruption_error || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, - VMCS_EXIT_INTERRUPTION_ERROR, &u64); - if (error == 0) { - printf("vmcs_exit_interruption_error[%d]\t0x%08lx\n", - vcpu, u64); - } - } - - if (!error && (get_vmcs_interruptibility || get_all)) { - error = vm_get_vmcs_field(ctx, vcpu, - VMCS_GUEST_INTERRUPTIBILITY, &u64); - if (error == 0) { - printf("vmcs_guest_interruptibility[%d]\t0x%08lx\n", - vcpu, u64); - } - } - - if (!error && setcap) { - int captype; - captype = vm_capability_name2type(capname); - error = vm_set_capability(ctx, vcpu, captype, capval); - if (error != 0 && errno == ENOENT) - printf("Capability \"%s\" is not available\n", capname); - } - - if (!error && (getcap || get_all)) { - int captype, val, getcaptype; - - if (getcap && capname) - getcaptype = vm_capability_name2type(capname); - else - getcaptype = -1; - - for (captype = 0; captype < VM_CAP_MAX; captype++) { - if (getcaptype >= 0 && captype != getcaptype) - continue; - error = vm_get_capability(ctx, vcpu, captype, &val); - if (error == 0) { - printf("Capability \"%s\" is %s on vcpu %d\n", - vm_capability_type2name(captype), - val ? "set" : "not set", vcpu); - } else if (errno == ENOENT) { - printf("Capability \"%s\" is not available\n", - vm_capability_type2name(captype)); - } else { - break; - } - } - } - - if (!error && run) { - error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip); - assert(error == 0); - - error = vm_run(ctx, vcpu, rip, &vmexit); - if (error == 0) - dump_vm_run_exitcode(&vmexit, vcpu); - else - printf("vm_run error %d\n", error); - } - - if (error) - printf("errno = %d\n", errno); - - if (!error && destroy) - vm_destroy(ctx); - - exit(error); -} -- cgit v1.1 From bd0ca87b04ee4089d4f0c5b34c5791f80c353d21 Mon Sep 17 00:00:00 2001 From: neel Date: Sat, 27 Oct 2012 02:39:08 +0000 Subject: Ignore PCI configuration accesses to all bus numbers other than PCI bus 0. Obtained from: NetApp --- usr.sbin/bhyve/pci_emul.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c index f9d75e3..1836bc9 100644 --- a/usr.sbin/bhyve/pci_emul.c +++ b/usr.sbin/bhyve/pci_emul.c @@ -821,7 +821,11 @@ pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, assert(bytes == 1 || bytes == 2 || bytes == 4); - pi = pci_slotinfo[cfgslot][cfgfunc].si_devi; + if (cfgbus == 0) + pi = pci_slotinfo[cfgslot][cfgfunc].si_devi; + else + pi = NULL; + coff = cfgoff + (port - CONF1_DATA_PORT); #if 0 -- cgit v1.1 From 86d868af7f0eb18ca829186c66cecc7117189f41 Mon Sep 17 00:00:00 2001 From: neel Date: Sat, 27 Oct 2012 22:33:23 +0000 Subject: Present the bvm console device to the guest only when explicitly requested via the "-b" command line option. Reviewed by: grehan Obtained from: NetApp --- usr.sbin/bhyve/consport.c | 15 ++++++++++++++- usr.sbin/bhyve/fbsdrun.c | 11 +++++++++-- usr.sbin/bhyve/inout.h | 2 ++ 3 files changed, 25 insertions(+), 3 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/consport.c b/usr.sbin/bhyve/consport.c index 8e2156a..3915b6d 100644 --- a/usr.sbin/bhyve/consport.c +++ b/usr.sbin/bhyve/consport.c @@ -124,4 +124,17 @@ console_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, return (0); } -INOUT_PORT(console, BVM_CONSOLE_PORT, IOPORT_F_INOUT, console_handler); + +static struct inout_port consport = { + "bvmcons", + BVM_CONSOLE_PORT, + IOPORT_F_INOUT, + console_handler +}; + +void +init_bvmcons(void) +{ + + register_inout(&consport); +} diff --git a/usr.sbin/bhyve/fbsdrun.c b/usr.sbin/bhyve/fbsdrun.c index 9905f19..9d604ce 100644 --- a/usr.sbin/bhyve/fbsdrun.c +++ b/usr.sbin/bhyve/fbsdrun.c @@ -563,21 +563,25 @@ vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip) int main(int argc, char *argv[]) { - int c, error, gdb_port, inject_bkpt, tmp, err, ioapic; + int c, error, gdb_port, inject_bkpt, tmp, err, ioapic, bvmcons; struct vmctx *ctx; uint64_t rip; + bvmcons = 0; inject_bkpt = 0; progname = basename(argv[0]); gdb_port = DEFAULT_GDB_PORT; guest_ncpus = 1; ioapic = 0; - while ((c = getopt(argc, argv, "aehBHIPxp:g:c:z:s:S:n:m:M:")) != -1) { + while ((c = getopt(argc, argv, "abehBHIPxp:g:c:z:s:S:n:m:M:")) != -1) { switch (c) { case 'a': disable_x2apic = 1; break; + case 'b': + bvmcons = 1; + break; case 'B': inject_bkpt = 1; break; @@ -707,6 +711,9 @@ main(int argc, char *argv[]) if (gdb_port != 0) init_dbgport(gdb_port); + if (bvmcons) + init_bvmcons(); + error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); assert(error == 0); diff --git a/usr.sbin/bhyve/inout.h b/usr.sbin/bhyve/inout.h index f6a8db2..a73b78d 100644 --- a/usr.sbin/bhyve/inout.h +++ b/usr.sbin/bhyve/inout.h @@ -62,4 +62,6 @@ int emulate_inout(struct vmctx *, int vcpu, int in, int port, int bytes, uint32_t *eax, int strict); int register_inout(struct inout_port *iop); +void init_bvmcons(void); + #endif /* _INOUT_H_ */ -- cgit v1.1 From 6a8b1eb58304d3599b0ddcb57d8340deac56f139 Mon Sep 17 00:00:00 2001 From: neel Date: Sat, 27 Oct 2012 22:58:02 +0000 Subject: Present the bvm dbgport to the guest only when explicitly requested via the "-g" command line option. Suggested by: grehan Obtained from: NetApp --- usr.sbin/bhyve/dbgport.c | 67 +++++++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 27 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/dbgport.c b/usr.sbin/bhyve/dbgport.c index 55efdad..034531c 100644 --- a/usr.sbin/bhyve/dbgport.c +++ b/usr.sbin/bhyve/dbgport.c @@ -44,37 +44,12 @@ __FBSDID("$FreeBSD$"); #include "dbgport.h" #define BVM_DBG_PORT 0x224 +#define BVM_DBG_SIG ('B' << 8 | 'V') static int listen_fd, conn_fd; static struct sockaddr_in sin; -void -init_dbgport(int sport) -{ - conn_fd = -1; - - if ((listen_fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) { - perror("socket"); - exit(1); - } - - sin.sin_len = sizeof(sin); - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = htonl(INADDR_ANY); - sin.sin_port = htons(sport); - - if (bind(listen_fd, (struct sockaddr *)&sin, sizeof(sin)) < 0) { - perror("bind"); - exit(1); - } - - if (listen(listen_fd, 1) < 0) { - perror("listen"); - exit(1); - } -} - static int dbg_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, uint32_t *eax, void *arg) @@ -82,6 +57,11 @@ dbg_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, char ch; int nwritten, nread, printonce; + if (bytes == 2 && in) { + *eax = BVM_DBG_SIG; + return (0); + } + if (bytes != 4) return (-1); @@ -122,4 +102,37 @@ again: return (0); } -INOUT_PORT(dbg, BVM_DBG_PORT, IOPORT_F_INOUT, dbg_handler); +static struct inout_port dbgport = { + "bvmdbg", + BVM_DBG_PORT, + IOPORT_F_INOUT, + dbg_handler +}; + +void +init_dbgport(int sport) +{ + conn_fd = -1; + + if ((listen_fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) { + perror("socket"); + exit(1); + } + + sin.sin_len = sizeof(sin); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl(INADDR_ANY); + sin.sin_port = htons(sport); + + if (bind(listen_fd, (struct sockaddr *)&sin, sizeof(sin)) < 0) { + perror("bind"); + exit(1); + } + + if (listen(listen_fd, 1) < 0) { + perror("listen"); + exit(1); + } + + register_inout(&dbgport); +} -- cgit v1.1 From 36e2485283941cef37e24de91d997041ad325573 Mon Sep 17 00:00:00 2001 From: grehan Date: Wed, 31 Oct 2012 03:29:52 +0000 Subject: Exit if the requested num vCPUs exceeds the maximum rather than waiting until AP bringup detects an out-of-range vCPU. While here, fix all error output to use fprintf(stderr, ... Reviewed by: neel Reported by: @allanjude --- usr.sbin/bhyve/fbsdrun.c | 43 +++++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 16 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/fbsdrun.c b/usr.sbin/bhyve/fbsdrun.c index 9d604ce..418faa7 100644 --- a/usr.sbin/bhyve/fbsdrun.c +++ b/usr.sbin/bhyve/fbsdrun.c @@ -213,7 +213,8 @@ fbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t rip) int error; if (cpumask & (1 << vcpu)) { - printf("addcpu: attempting to add existing cpu %d\n", vcpu); + fprintf(stderr, "addcpu: attempting to add existing cpu %d\n", + vcpu); exit(1); } @@ -325,7 +326,8 @@ vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) static int vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) { - printf("vm exit rdmsr 0x%x, cpu %d\n", vme->u.msr.code, *pvcpu); + fprintf(stderr, "vm exit rdmsr 0x%x, cpu %d\n", vme->u.msr.code, + *pvcpu); return (VMEXIT_ABORT); } @@ -366,13 +368,14 @@ static int vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { - printf("vm exit[%d]\n", *pvcpu); - printf("\treason\t\tVMX\n"); - printf("\trip\t\t0x%016lx\n", vmexit->rip); - printf("\tinst_length\t%d\n", vmexit->inst_length); - printf("\terror\t\t%d\n", vmexit->u.vmx.error); - printf("\texit_reason\t%u\n", vmexit->u.vmx.exit_reason); - printf("\tqualification\t0x%016lx\n", vmexit->u.vmx.exit_qualification); + fprintf(stderr, "vm exit[%d]\n", *pvcpu); + fprintf(stderr, "\treason\t\tVMX\n"); + fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); + fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); + fprintf(stderr, "\terror\t\t%d\n", vmexit->u.vmx.error); + fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason); + fprintf(stderr, "\tqualification\t0x%016lx\n", + vmexit->u.vmx.exit_qualification); return (VMEXIT_ABORT); } @@ -445,11 +448,12 @@ vmexit_paging(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) if (err) { if (err == EINVAL) { - printf("Failed to emulate instruction at 0x%lx\n", - vmexit->rip); + fprintf(stderr, + "Failed to emulate instruction at 0x%lx\n", + vmexit->rip); } else if (err == ESRCH) { - printf("Unhandled memory access to 0x%lx\n", - vmexit->u.paging.gpa); + fprintf(stderr, "Unhandled memory access to 0x%lx\n", + vmexit->u.paging.gpa); } return (VMEXIT_ABORT); @@ -643,6 +647,12 @@ main(int argc, char *argv[]) if (guest_ncpus <= 1) guest_vcpu_mux = 0; + if (guest_ncpus > VM_MAXCPU) { + fprintf(stderr, "%d vCPUs requested, max %d\n", + guest_ncpus, VM_MAXCPU); + exit(1); + } + /* vmexit on hlt if guest is muxed */ if (guest_vcpu_mux) { guest_vmexit_on_hlt = 1; @@ -660,7 +670,7 @@ main(int argc, char *argv[]) if (fbsdrun_vmexit_on_hlt()) { err = vm_get_capability(ctx, BSP, VM_CAP_HALT_EXIT, &tmp); if (err < 0) { - printf("VM exit on HLT not supported\n"); + fprintf(stderr, "VM exit on HLT not supported\n"); exit(1); } vm_set_capability(ctx, BSP, VM_CAP_HALT_EXIT, 1); @@ -673,7 +683,8 @@ main(int argc, char *argv[]) */ err = vm_get_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, &tmp); if (err < 0) { - printf("SMP mux requested, no pause support\n"); + fprintf(stderr, + "SMP mux requested, no pause support\n"); exit(1); } vm_set_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, 1); @@ -686,7 +697,7 @@ main(int argc, char *argv[]) err = vm_set_x2apic_state(ctx, BSP, X2APIC_ENABLED); if (err) { - printf("Unable to set x2apic state (%d)\n", err); + fprintf(stderr, "Unable to set x2apic state (%d)\n", err); exit(1); } -- cgit v1.1 From f31f52c1871f8a0551f877e5645b20d2629f84cd Mon Sep 17 00:00:00 2001 From: grehan Date: Wed, 31 Oct 2012 19:17:55 +0000 Subject: Change the thread name of the vCPU threads to contain the name of the VM and the vCPU number. This helps hugely when using top -H to identify what a VM is doing. Reviewed by: neel Obtained from: NetApp --- usr.sbin/bhyve/fbsdrun.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/fbsdrun.c b/usr.sbin/bhyve/fbsdrun.c index 418faa7..d8e4a4e 100644 --- a/usr.sbin/bhyve/fbsdrun.c +++ b/usr.sbin/bhyve/fbsdrun.c @@ -43,6 +43,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -196,10 +197,16 @@ fbsdrun_muxed(void) static void * fbsdrun_start_thread(void *param) { + char tname[MAXCOMLEN + 1]; + struct mt_vmm_info *mtp; int vcpu; - struct mt_vmm_info *mtp = param; + mtp = param; vcpu = mtp->mt_vcpu; + + snprintf(tname, sizeof(tname), "%s vcpu %d", vmname, vcpu); + pthread_set_name_np(mtp->mt_thr, tname); + vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); /* not reached */ -- cgit v1.1 From 263c4acf84c3be71025f3484c0378a83cd668e15 Mon Sep 17 00:00:00 2001 From: neel Date: Tue, 6 Nov 2012 21:48:45 +0000 Subject: Use the new userboot 'getenv' callback to set a couple of environment variables in the guest. The variables are: smbios.bios.vendor=BHYVE and boot_serial=1 The FreeBSD guest uses the "smbios.bios.vendor" environment variable to detect whether or not it is running as a guest inside a hypervisor. The "boot_serial=1" is temporary and will be dropped when bhyve can do VGA emulation. Obtained from: NetApp --- usr.sbin/bhyveload/bhyveload.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyveload/bhyveload.c b/usr.sbin/bhyveload/bhyveload.c index b8be5e3..3b7fad4 100644 --- a/usr.sbin/bhyveload/bhyveload.c +++ b/usr.sbin/bhyveload/bhyveload.c @@ -471,6 +471,25 @@ cb_getmem(void *arg, uint64_t *ret_lowmem, uint64_t *ret_highmem) *ret_highmem = highmem; } +static const char * +cb_getenv(void *arg, int num) +{ + int max; + + static const char * var[] = { + "smbios.bios.vendor=BHYVE", + "boot_serial=1", + NULL + }; + + max = sizeof(var) / sizeof(var[0]); + + if (num < max) + return (var[num]); + else + return (NULL); +} + static struct loader_callbacks_v1 cb = { .getc = cb_getc, .putc = cb_putc, @@ -497,6 +516,8 @@ static struct loader_callbacks_v1 cb = { .delay = cb_delay, .exit = cb_exit, .getmem = cb_getmem, + + .getenv = cb_getenv, }; static void @@ -600,5 +621,5 @@ main(int argc, char** argv) if (disk_image) { disk_fd = open(disk_image, O_RDONLY); } - func(&cb, NULL, USERBOOT_VERSION_1, disk_fd >= 0); + func(&cb, NULL, USERBOOT_VERSION_3, disk_fd >= 0); } -- cgit v1.1 From 6f600a733a70db5bd790d25fb885e348be51f240 Mon Sep 17 00:00:00 2001 From: grehan Date: Tue, 20 Nov 2012 07:01:26 +0000 Subject: ACPI support for bhyve. The -A option will create the minimal set of required ACPI tables in guest memory. Since ACPI mandates an IOAPIC, the -I option must also be used. Template ASL files are created, and then passed to the iasl compiler to generate AML files. These are then loaded into guest physical mem. In support of this, the ACPI PM timer is implemented, in 32-bit mode. Tested on 7.4/8.*/9.*/10-CURRENT. Reviewed by: neel Obtained from: NetApp Discussed with: jhb (a long while back) --- usr.sbin/bhyve/Makefile | 8 +- usr.sbin/bhyve/acpi.c | 804 +++++++++++++++++++++++++++++++++++++++++++++++ usr.sbin/bhyve/acpi.h | 34 ++ usr.sbin/bhyve/fbsdrun.c | 16 +- usr.sbin/bhyve/pmtmr.c | 101 ++++++ 5 files changed, 958 insertions(+), 5 deletions(-) create mode 100644 usr.sbin/bhyve/acpi.c create mode 100644 usr.sbin/bhyve/acpi.h create mode 100644 usr.sbin/bhyve/pmtmr.c (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile index 3cae422..9dc7a53 100644 --- a/usr.sbin/bhyve/Makefile +++ b/usr.sbin/bhyve/Makefile @@ -4,11 +4,13 @@ PROG= bhyve -SRCS= atpic.c consport.c dbgport.c elcr.c fbsdrun.c inout.c +DEBUG_FLAGS= -g -O0 + +SRCS= acpi.c atpic.c consport.c dbgport.c elcr.c fbsdrun.c inout.c SRCS+= instruction_emul.c ioapic.c mem.c mevent.c mptbl.c SRCS+= pci_emul.c pci_hostbridge.c pci_passthru.c pci_virtio_block.c -SRCS+= pci_virtio_net.c pci_uart.c pit_8254.c post.c rtc.c uart.c xmsr.c -SRCS+= spinup_ap.c +SRCS+= pci_virtio_net.c pci_uart.c pit_8254.c pmtmr.c post.c rtc.c uart.c +SRCS+= xmsr.c spinup_ap.c NO_MAN= diff --git a/usr.sbin/bhyve/acpi.c b/usr.sbin/bhyve/acpi.c new file mode 100644 index 0000000..1fb553e --- /dev/null +++ b/usr.sbin/bhyve/acpi.c @@ -0,0 +1,804 @@ +/*- + * Copyright (c) 2012 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * bhyve ACPI table generator. + * + * Create the minimal set of ACPI tables required to boot FreeBSD (and + * hopefully other o/s's) by writing out ASL template files for each of + * the tables and the compiling them to AML with the Intel iasl compiler. + * The AML files are then read into guest memory. + * + * The tables are placed in the guest's ROM area just below 1MB physical, + * above the MPTable. + * + * Layout + * ------ + * RSDP -> 0xf0400 (36 bytes fixed) + * RSDT -> 0xf0440 (36 bytes + 4*N table addrs, 2 used) + * MADT -> 0xf04a0 (depends on #CPUs) + * FADT -> 0xf0600 (268 bytes) + * FACS -> 0xf0780 (64 bytes) + * DSDT -> 0xf0800 (variable - can go up to 0x100000) + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "fbsdrun.h" +#include "acpi.h" + +/* + * Define the base address of the ACPI tables, and the offsets to + * the individual tables + */ +#define BHYVE_ACPI_BASE 0xf0400 +#define RSDT_OFFSET 0x040 +#define MADT_OFFSET 0x0a0 +#define FADT_OFFSET 0x200 +#define FACS_OFFSET 0x380 +#define DSDT_OFFSET 0x400 + +#define BHYVE_ASL_TEMPLATE "bhyve.XXXXXXX" +#define BHYVE_ASL_SUFFIX ".aml" +#define BHYVE_ASL_COMPILER "/usr/sbin/iasl" + +#define BHYVE_PM_TIMER_ADDR 0x408 + +static int basl_keep_temps; +static int basl_verbose_iasl; +static int basl_ncpu; +static uint32_t basl_acpi_base = BHYVE_ACPI_BASE; + +/* + * Contains the full pathname of the template to be passed + * to mkstemp/mktemps(3) + */ +static char basl_template[MAXPATHLEN]; +static char basl_stemplate[MAXPATHLEN]; + +struct basl_fio { + int fd; + FILE *fp; + char f_name[MAXPATHLEN]; +}; + +#define EFPRINTF(...) \ + err = fprintf(__VA_ARGS__); if (err < 0) goto err_exit; + +#define EFFLUSH(x) \ + err = fflush(x); if (err != 0) goto err_exit; + +static int +basl_fwrite_rsdp(FILE *fp) +{ + int err; + + err = 0; + + EFPRINTF(fp, "/*\n"); + EFPRINTF(fp, " * bhyve RSDP template\n"); + EFPRINTF(fp, " */\n"); + EFPRINTF(fp, "[0008]\t\tSignature : \"RSD PTR \"\n"); + EFPRINTF(fp, "[0001]\t\tChecksum : 43\n"); + EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n"); + EFPRINTF(fp, "[0001]\t\tRevision : 02\n"); + EFPRINTF(fp, "[0004]\t\tRSDT Address : %08X\n", + basl_acpi_base + RSDT_OFFSET); + EFPRINTF(fp, "[0004]\t\tLength : 00000024\n"); + EFPRINTF(fp, "[0008]\t\tXSDT Address : 0000000000000000\n"); + EFPRINTF(fp, "[0001]\t\tExtended Checksum : 00\n"); + EFPRINTF(fp, "[0003]\t\tReserved : 000000\n"); + + EFFLUSH(fp); + + return (0); + +err_exit: + return (errno); +} + +static int +basl_fwrite_rsdt(FILE *fp) +{ + int err; + + err = 0; + + EFPRINTF(fp, "/*\n"); + EFPRINTF(fp, " * bhyve RSDT template\n"); + EFPRINTF(fp, " */\n"); + EFPRINTF(fp, "[0004]\t\tSignature : \"RSDT\"\n"); + EFPRINTF(fp, "[0004]\t\tTable Length : 00000000\n"); + EFPRINTF(fp, "[0001]\t\tRevision : 01\n"); + EFPRINTF(fp, "[0001]\t\tChecksum : 00\n"); + EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n"); + EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVRSDT \"\n"); + EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n"); + /* iasl will fill in the compiler ID/revision fields */ + EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n"); + EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n"); + EFPRINTF(fp, "\n"); + + /* Add in pointers to the MADT and FADT */ + EFPRINTF(fp, "[0004]\t\tACPI Table Address 0 : %08X\n", + basl_acpi_base + MADT_OFFSET); + EFPRINTF(fp, "[0004]\t\tACPI Table Address 1 : %08X\n", + basl_acpi_base + FADT_OFFSET); + + EFFLUSH(fp); + + return (0); + +err_exit: + return (errno); +} + +static int +basl_fwrite_madt(FILE *fp) +{ + int err; + int i; + + err = 0; + + EFPRINTF(fp, "/*\n"); + EFPRINTF(fp, " * bhyve MADT template\n"); + EFPRINTF(fp, " */\n"); + EFPRINTF(fp, "[0004]\t\tSignature : \"APIC\"\n"); + EFPRINTF(fp, "[0004]\t\tTable Length : 00000000\n"); + EFPRINTF(fp, "[0001]\t\tRevision : 01\n"); + EFPRINTF(fp, "[0001]\t\tChecksum : 00\n"); + EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n"); + EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVMADT \"\n"); + EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n"); + + /* iasl will fill in the compiler ID/revision fields */ + EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n"); + EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n"); + EFPRINTF(fp, "\n"); + + EFPRINTF(fp, "[0004]\t\tLocal Apic Address : FEE00000\n"); + EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000001\n"); + EFPRINTF(fp, "\t\t\tPC-AT Compatibility : 1\n"); + EFPRINTF(fp, "\n"); + + /* Add a Processor Local APIC entry for each CPU */ + for (i = 0; i < basl_ncpu; i++) { + EFPRINTF(fp, "[0001]\t\tSubtable Type : 00\n"); + EFPRINTF(fp, "[0001]\t\tLength : 08\n"); + EFPRINTF(fp, "[0001]\t\tProcessor ID : %02d\n", i); + EFPRINTF(fp, "[0001]\t\tLocal Apic ID : %02d\n", i); + EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000001\n"); + EFPRINTF(fp, "\t\t\tProcessor Enabled : 1\n"); + EFPRINTF(fp, "\n"); + } + + /* Always a single IOAPIC entry, with ID ncpu+1 */ + EFPRINTF(fp, "[0001]\t\tSubtable Type : 01\n"); + EFPRINTF(fp, "[0001]\t\tLength : 0C\n"); + EFPRINTF(fp, "[0001]\t\tI/O Apic ID : %02d\n", basl_ncpu); + EFPRINTF(fp, "[0001]\t\tReserved : 00\n"); + EFPRINTF(fp, "[0004]\t\tAddress : fec00000\n"); + EFPRINTF(fp, "[0004]\t\tInterrupt : 00000000\n"); + EFPRINTF(fp, "\n"); + + /* Override the 8259 chained vector. XXX maybe not needed */ + EFPRINTF(fp, "[0001]\t\tSubtable Type : 02\n"); + EFPRINTF(fp, "[0001]\t\tLength : 0A\n"); + EFPRINTF(fp, "[0001]\t\tBus : 00\n"); + EFPRINTF(fp, "[0001]\t\tSource : 09\n"); + EFPRINTF(fp, "[0004]\t\tInterrupt : 00000009\n"); + EFPRINTF(fp, "[0002]\t\tFlags (decoded below) : 0000\n"); + EFPRINTF(fp, "\t\t\tPolarity : 0\n"); + EFPRINTF(fp, "\t\t\tTrigger Mode : 0\n"); + EFPRINTF(fp, "\n"); + + EFFLUSH(fp); + + return (0); + +err_exit: + return (errno); +} + +static int +basl_fwrite_fadt(FILE *fp) +{ + int err; + + err = 0; + + EFPRINTF(fp, "/*\n"); + EFPRINTF(fp, " * bhyve FADT template\n"); + EFPRINTF(fp, " */\n"); + EFPRINTF(fp, "[0004]\t\tSignature : \"FACP\"\n"); + EFPRINTF(fp, "[0004]\t\tTable Length : 0000010C\n"); + EFPRINTF(fp, "[0001]\t\tRevision : 05\n"); + EFPRINTF(fp, "[0001]\t\tChecksum : 00\n"); + EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n"); + EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVFACP \"\n"); + EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n"); + /* iasl will fill in the compiler ID/revision fields */ + EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n"); + EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n"); + EFPRINTF(fp, "\n"); + + EFPRINTF(fp, "[0004]\t\tFACS Address : %08X\n", + basl_acpi_base + FACS_OFFSET); + EFPRINTF(fp, "[0004]\t\tDSDT Address : %08X\n", + basl_acpi_base + DSDT_OFFSET); + EFPRINTF(fp, "[0001]\t\tModel : 00\n"); + EFPRINTF(fp, "[0001]\t\tPM Profile : 00 [Unspecified]\n"); + EFPRINTF(fp, "[0002]\t\tSCI Interrupt : 0009\n"); + EFPRINTF(fp, "[0004]\t\tSMI Command Port : 00000000\n"); + EFPRINTF(fp, "[0001]\t\tACPI Enable Value : 00\n"); + EFPRINTF(fp, "[0001]\t\tACPI Disable Value : 00\n"); + EFPRINTF(fp, "[0001]\t\tS4BIOS Command : 00\n"); + EFPRINTF(fp, "[0001]\t\tP-State Control : 00\n"); + EFPRINTF(fp, "[0004]\t\tPM1A Event Block Address : 00000000\n"); + EFPRINTF(fp, "[0004]\t\tPM1B Event Block Address : 00000000\n"); + EFPRINTF(fp, "[0004]\t\tPM1A Control Block Address : 00000000\n"); + EFPRINTF(fp, "[0004]\t\tPM1B Control Block Address : 00000000\n"); + EFPRINTF(fp, "[0004]\t\tPM2 Control Block Address : 00000000\n"); + EFPRINTF(fp, "[0004]\t\tPM Timer Block Address : %08X\n", + BHYVE_PM_TIMER_ADDR); + EFPRINTF(fp, "[0004]\t\tGPE0 Block Address : 00000000\n"); + EFPRINTF(fp, "[0004]\t\tGPE1 Block Address : 00000000\n"); + EFPRINTF(fp, "[0001]\t\tPM1 Event Block Length : 04\n"); + EFPRINTF(fp, "[0001]\t\tPM1 Control Block Length : 02\n"); + EFPRINTF(fp, "[0001]\t\tPM2 Control Block Length : 00\n"); + EFPRINTF(fp, "[0001]\t\tPM Timer Block Length : 04\n"); + EFPRINTF(fp, "[0001]\t\tGPE0 Block Length : 00\n"); + EFPRINTF(fp, "[0001]\t\tGPE1 Block Length : 00\n"); + EFPRINTF(fp, "[0001]\t\tGPE1 Base Offset : 00\n"); + EFPRINTF(fp, "[0001]\t\t_CST Support : 00\n"); + EFPRINTF(fp, "[0002]\t\tC2 Latency : 0000\n"); + EFPRINTF(fp, "[0002]\t\tC3 Latency : 0000\n"); + EFPRINTF(fp, "[0002]\t\tCPU Cache Size : 0000\n"); + EFPRINTF(fp, "[0002]\t\tCache Flush Stride : 0000\n"); + EFPRINTF(fp, "[0001]\t\tDuty Cycle Offset : 00\n"); + EFPRINTF(fp, "[0001]\t\tDuty Cycle Width : 00\n"); + EFPRINTF(fp, "[0001]\t\tRTC Day Alarm Index : 00\n"); + EFPRINTF(fp, "[0001]\t\tRTC Month Alarm Index : 00\n"); + EFPRINTF(fp, "[0001]\t\tRTC Century Index : 00\n"); + EFPRINTF(fp, "[0002]\t\tBoot Flags (decoded below) : 0000\n"); + EFPRINTF(fp, "\t\t\tLegacy Devices Supported (V2) : 0\n"); + EFPRINTF(fp, "\t\t\t8042 Present on ports 60/64 (V2) : 0\n"); + EFPRINTF(fp, "\t\t\tVGA Not Present (V4) : 1\n"); + EFPRINTF(fp, "\t\t\tMSI Not Supported (V4) : 0\n"); + EFPRINTF(fp, "\t\t\tPCIe ASPM Not Supported (V4) : 1\n"); + EFPRINTF(fp, "\t\t\tCMOS RTC Not Present (V5) : 0\n"); + EFPRINTF(fp, "[0001]\t\tReserved : 00\n"); + EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000000\n"); + EFPRINTF(fp, "\t\t\tWBINVD instruction is operational (V1) : 1\n"); + EFPRINTF(fp, "\t\t\tWBINVD flushes all caches (V1) : 0\n"); + EFPRINTF(fp, "\t\t\tAll CPUs support C1 (V1) : 0\n"); + EFPRINTF(fp, "\t\t\tC2 works on MP system (V1) : 0\n"); + EFPRINTF(fp, "\t\t\tControl Method Power Button (V1) : 1\n"); + EFPRINTF(fp, "\t\t\tControl Method Sleep Button (V1) : 1\n"); + EFPRINTF(fp, "\t\t\tRTC wake not in fixed reg space (V1) : 0\n"); + EFPRINTF(fp, "\t\t\tRTC can wake system from S4 (V1) : 0\n"); + EFPRINTF(fp, "\t\t\t32-bit PM Timer (V1) : 1\n"); + EFPRINTF(fp, "\t\t\tDocking Supported (V1) : 0\n"); + EFPRINTF(fp, "\t\t\tReset Register Supported (V2) : 0\n"); + EFPRINTF(fp, "\t\t\tSealed Case (V3) : 0\n"); + EFPRINTF(fp, "\t\t\tHeadless - No Video (V3) : 1\n"); + EFPRINTF(fp, "\t\t\tUse native instr after SLP_TYPx (V3) : 0\n"); + EFPRINTF(fp, "\t\t\tPCIEXP_WAK Bits Supported (V4) : 0\n"); + EFPRINTF(fp, "\t\t\tUse Platform Timer (V4) : 0\n"); + EFPRINTF(fp, "\t\t\tRTC_STS valid on S4 wake (V4) : 0\n"); + EFPRINTF(fp, "\t\t\tRemote Power-on capable (V4) : 0\n"); + EFPRINTF(fp, "\t\t\tUse APIC Cluster Model (V4) : 0\n"); + EFPRINTF(fp, "\t\t\tUse APIC Physical Destination Mode (V4) : 1\n"); + EFPRINTF(fp, "\t\t\tHardware Reduced (V5) : 0\n"); + EFPRINTF(fp, "\t\t\tLow Power S0 Idle (V5) : 0\n"); + EFPRINTF(fp, "\n"); + + EFPRINTF(fp, + "[0012]\t\tReset Register : [Generic Address Structure]\n"); + EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); + EFPRINTF(fp, "[0001]\t\tBit Width : 08\n"); + EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); + EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 01 [Byte Access:8]\n"); + EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000001\n"); + EFPRINTF(fp, "\n"); + + EFPRINTF(fp, "[0001]\t\tValue to cause reset : 00\n"); + EFPRINTF(fp, "[0003]\t\tReserved : 000000\n"); + EFPRINTF(fp, "[0008]\t\tFACS Address : 00000000%08X\n", + basl_acpi_base + FACS_OFFSET); + EFPRINTF(fp, "[0008]\t\tDSDT Address : 00000000%08X\n", + basl_acpi_base + DSDT_OFFSET); + EFPRINTF(fp, + "[0012]\t\tPM1A Event Block : [Generic Address Structure]\n"); + EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); + EFPRINTF(fp, "[0001]\t\tBit Width : 20\n"); + EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); + EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 02 [Word Access:16]\n"); + EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000001\n"); + EFPRINTF(fp, "\n"); + + EFPRINTF(fp, + "[0012]\t\tPM1B Event Block : [Generic Address Structure]\n"); + EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); + EFPRINTF(fp, "[0001]\t\tBit Width : 00\n"); + EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); + EFPRINTF(fp, + "[0001]\t\tEncoded Access Width : 00 [Undefined/Legacy]\n"); + EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n"); + EFPRINTF(fp, "\n"); + + EFPRINTF(fp, + "[0012]\t\tPM1A Control Block : [Generic Address Structure]\n"); + EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); + EFPRINTF(fp, "[0001]\t\tBit Width : 10\n"); + EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); + EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 02 [Word Access:16]\n"); + EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000001\n"); + EFPRINTF(fp, "\n"); + + EFPRINTF(fp, + "[0012]\t\tPM1B Control Block : [Generic Address Structure]\n"); + EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); + EFPRINTF(fp, "[0001]\t\tBit Width : 00\n"); + EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); + EFPRINTF(fp, + "[0001]\t\tEncoded Access Width : 00 [Undefined/Legacy]\n"); + EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n"); + EFPRINTF(fp, "\n"); + + EFPRINTF(fp, + "[0012]\t\tPM2 Control Block : [Generic Address Structure]\n"); + EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); + EFPRINTF(fp, "[0001]\t\tBit Width : 08\n"); + EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); + EFPRINTF(fp, + "[0001]\t\tEncoded Access Width : 00 [Undefined/Legacy]\n"); + EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n"); + EFPRINTF(fp, "\n"); + + /* Valid for bhyve */ + EFPRINTF(fp, + "[0012]\t\tPM Timer Block : [Generic Address Structure]\n"); + EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); + EFPRINTF(fp, "[0001]\t\tBit Width : 32\n"); + EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); + EFPRINTF(fp, + "[0001]\t\tEncoded Access Width : 03 [DWord Access:32]\n"); + EFPRINTF(fp, "[0008]\t\tAddress : 00000000%08X\n", + BHYVE_PM_TIMER_ADDR); + EFPRINTF(fp, "\n"); + + EFPRINTF(fp, "[0012]\t\tGPE0 Block : [Generic Address Structure]\n"); + EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); + EFPRINTF(fp, "[0001]\t\tBit Width : 80\n"); + EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); + EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 01 [Byte Access:8]\n"); + EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n"); + EFPRINTF(fp, "\n"); + + EFPRINTF(fp, "[0012]\t\tGPE1 Block : [Generic Address Structure]\n"); + EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); + EFPRINTF(fp, "[0001]\t\tBit Width : 00\n"); + EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); + EFPRINTF(fp, + "[0001]\t\tEncoded Access Width : 00 [Undefined/Legacy]\n"); + EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n"); + EFPRINTF(fp, "\n"); + + EFPRINTF(fp, + "[0012]\t\tSleep Control Register : [Generic Address Structure]\n"); + EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); + EFPRINTF(fp, "[0001]\t\tBit Width : 08\n"); + EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); + EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 01 [Byte Access:8]\n"); + EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n"); + EFPRINTF(fp, "\n"); + + EFPRINTF(fp, + "[0012]\t\tSleep Status Register : [Generic Address Structure]\n"); + EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); + EFPRINTF(fp, "[0001]\t\tBit Width : 08\n"); + EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); + EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 01 [Byte Access:8]\n"); + EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n"); + + EFFLUSH(fp); + + return (0); + +err_exit: + return (errno); +} + +static int +basl_fwrite_facs(FILE *fp) +{ + int err; + + err = 0; + + EFPRINTF(fp, "/*\n"); + EFPRINTF(fp, " * bhyve FACS template\n"); + EFPRINTF(fp, " */\n"); + EFPRINTF(fp, "[0004]\t\tSignature : \"FACS\"\n"); + EFPRINTF(fp, "[0004]\t\tLength : 00000040\n"); + EFPRINTF(fp, "[0004]\t\tHardware Signature : 00000000\n"); + EFPRINTF(fp, "[0004]\t\t32 Firmware Waking Vector : 00000000\n"); + EFPRINTF(fp, "[0004]\t\tGlobal Lock : 00000000\n"); + EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000000\n"); + EFPRINTF(fp, "\t\t\tS4BIOS Support Present : 0\n"); + EFPRINTF(fp, "\t\t\t64-bit Wake Supported (V2) : 0\n"); + EFPRINTF(fp, + "[0008]\t\t64 Firmware Waking Vector : 0000000000000000\n"); + EFPRINTF(fp, "[0001]\t\tVersion : 02\n"); + EFPRINTF(fp, "[0003]\t\tReserved : 000000\n"); + EFPRINTF(fp, "[0004]\t\tOspmFlags (decoded below) : 00000000\n"); + EFPRINTF(fp, "\t\t\t64-bit Wake Env Required (V2) : 0\n"); + + EFFLUSH(fp); + + return (0); + +err_exit: + return (errno); +} + +static int +basl_fwrite_dsdt(FILE *fp) +{ + int err; + + err = 0; + + EFPRINTF(fp, "/*\n"); + EFPRINTF(fp, " * bhyve DSDT template\n"); + EFPRINTF(fp, " */\n"); + EFPRINTF(fp, "DefinitionBlock (\"bhyve_dsdt.aml\", \"DSDT\", 2," + "\"BHYV\", \"BVDSDT\", 0x00000001)\n"); + EFPRINTF(fp, "{\n"); + EFPRINTF(fp, " Scope (_SB)\n"); + EFPRINTF(fp, " {\n"); + EFPRINTF(fp, " Device (PCI0)\n"); + EFPRINTF(fp, " {\n"); + EFPRINTF(fp, " Name (_HID, EisaId (\"PNP0A03\"))\n"); + EFPRINTF(fp, " Name (_ADR, Zero)\n"); + EFPRINTF(fp, " Name (_UID, One)\n"); + EFPRINTF(fp, " Name (_CRS, ResourceTemplate ()\n"); + EFPRINTF(fp, " {\n"); + EFPRINTF(fp, " WordBusNumber (ResourceProducer, MinFixed," + "MaxFixed, PosDecode,\n"); + EFPRINTF(fp, " 0x0000, // Granularity\n"); + EFPRINTF(fp, " 0x0000, // Range Minimum\n"); + EFPRINTF(fp, " 0x00FF, // Range Maximum\n"); + EFPRINTF(fp, " 0x0000, // Transl Offset\n"); + EFPRINTF(fp, " 0x0100, // Length\n"); + EFPRINTF(fp, " ,, )\n"); + EFPRINTF(fp, " IO (Decode16,\n"); + EFPRINTF(fp, " 0x0CF8, // Range Minimum\n"); + EFPRINTF(fp, " 0x0CF8, // Range Maximum\n"); + EFPRINTF(fp, " 0x01, // Alignment\n"); + EFPRINTF(fp, " 0x08, // Length\n"); + EFPRINTF(fp, " )\n"); + EFPRINTF(fp, " WordIO (ResourceProducer, MinFixed, MaxFixed," + "PosDecode, EntireRange,\n"); + EFPRINTF(fp, " 0x0000, // Granularity\n"); + EFPRINTF(fp, " 0x0000, // Range Minimum\n"); + EFPRINTF(fp, " 0x0CF7, // Range Maximum\n"); + EFPRINTF(fp, " 0x0000, // Transl Offset\n"); + EFPRINTF(fp, " 0x0CF8, // Length\n"); + EFPRINTF(fp, " ,, , TypeStatic)\n"); + EFPRINTF(fp, " WordIO (ResourceProducer, MinFixed, MaxFixed," + "PosDecode, EntireRange,\n"); + EFPRINTF(fp, " 0x0000, // Granularity\n"); + EFPRINTF(fp, " 0x0D00, // Range Minimum\n"); + EFPRINTF(fp, " 0xFFFF, // Range Maximum\n"); + EFPRINTF(fp, " 0x0000, // Transl Offset\n"); + EFPRINTF(fp, " 0xF300, // Length\n"); + EFPRINTF(fp, " ,, , TypeStatic)\n"); + EFPRINTF(fp, " })\n"); + EFPRINTF(fp, " }\n"); + EFPRINTF(fp, " }\n"); + EFPRINTF(fp, "\n"); + EFPRINTF(fp, " Scope (_SB.PCI0)\n"); + EFPRINTF(fp, " {\n"); + EFPRINTF(fp, " Device (ISA)\n"); + EFPRINTF(fp, " {\n"); + EFPRINTF(fp, " Name (_ADR, 0x00010000)\n"); + EFPRINTF(fp, " OperationRegion (P40C, PCI_Config, 0x60, 0x04)\n"); + EFPRINTF(fp, " }\n"); + EFPRINTF(fp, " }\n"); + EFPRINTF(fp, "\n"); + EFPRINTF(fp, " Scope (_SB.PCI0.ISA)\n"); + EFPRINTF(fp, " {\n"); + EFPRINTF(fp, " Device (RTC)\n"); + EFPRINTF(fp, " {\n"); + EFPRINTF(fp, " Name (_HID, EisaId (\"PNP0B00\"))\n"); + EFPRINTF(fp, " Name (_CRS, ResourceTemplate ()\n"); + EFPRINTF(fp, " {\n"); + EFPRINTF(fp, " IO (Decode16,\n"); + EFPRINTF(fp, " 0x0070, // Range Minimum\n"); + EFPRINTF(fp, " 0x0070, // Range Maximum\n"); + EFPRINTF(fp, " 0x10, // Alignment\n"); + EFPRINTF(fp, " 0x02, // Length\n"); + EFPRINTF(fp, " )\n"); + EFPRINTF(fp, " IRQNoFlags ()\n"); + EFPRINTF(fp, " {8}\n"); + EFPRINTF(fp, " IO (Decode16,\n"); + EFPRINTF(fp, " 0x0072, // Range Minimum\n"); + EFPRINTF(fp, " 0x0072, // Range Maximum\n"); + EFPRINTF(fp, " 0x02, // Alignment\n"); + EFPRINTF(fp, " 0x06, // Length\n"); + EFPRINTF(fp, " )\n"); + EFPRINTF(fp, " })\n"); + EFPRINTF(fp, " }\n"); + EFPRINTF(fp, " }\n"); + EFPRINTF(fp, "}\n"); + + EFFLUSH(fp); + + return (0); + +err_exit: + return (errno); +} + +static int +basl_open(struct basl_fio *bf, int suffix) +{ + int err; + + err = 0; + + if (suffix) { + strncpy(bf->f_name, basl_stemplate, MAXPATHLEN); + bf->fd = mkstemps(bf->f_name, strlen(BHYVE_ASL_SUFFIX)); + } else { + strncpy(bf->f_name, basl_template, MAXPATHLEN); + bf->fd = mkstemp(bf->f_name); + } + + if (bf->fd > 0) { + bf->fp = fdopen(bf->fd, "w+"); + if (bf->fp == NULL) { + unlink(bf->f_name); + close(bf->fd); + } + } else { + err = 1; + } + + return (err); +} + +static void +basl_close(struct basl_fio *bf) +{ + + if (!basl_keep_temps) + unlink(bf->f_name); + fclose(bf->fp); +} + +static int +basl_start(struct basl_fio *in, struct basl_fio *out) +{ + int err; + + err = basl_open(in, 0); + if (!err) { + err = basl_open(out, 1); + if (err) { + basl_close(in); + } + } + + return (err); +} + +static void +basl_end(struct basl_fio *in, struct basl_fio *out) +{ + + basl_close(in); + basl_close(out); +} + +static int +basl_load(int fd, uint64_t off) +{ + struct stat sb; + int err; + + err = 0; + + if (fstat(fd, &sb) < 0 || + read(fd, paddr_guest2host(basl_acpi_base + off), sb.st_size) < 0) + err = errno; + + return (err); +} + +static int +basl_compile(int (*fwrite_section)(FILE *fp), uint64_t offset) +{ + struct basl_fio io[2]; + static char iaslbuf[3*MAXPATHLEN + 10]; + char *fmt; + int err; + + err = basl_start(&io[0], &io[1]); + if (!err) { + err = (*fwrite_section)(io[0].fp); + + if (!err) { + /* + * iasl sends the results of the compilation to + * stdout. Shut this down by using the shell to + * redirect stdout to /dev/null, unless the user + * has requested verbose output for debugging + * purposes + */ + fmt = basl_verbose_iasl ? + "%s -p %s %s" : + "/bin/sh -c \"%s -p %s %s\" 1> /dev/null"; + + snprintf(iaslbuf, sizeof(iaslbuf), + fmt, + BHYVE_ASL_COMPILER, + io[1].f_name, io[0].f_name); + err = system(iaslbuf); + + if (!err) { + /* + * Copy the aml output file into guest + * memory at the specified location + */ + err = basl_load(io[1].fd, offset); + } + } + basl_end(&io[0], &io[1]); + } + + return (err); +} + +static int +basl_make_templates(void) +{ + const char *tmpdir; + int err; + int len; + + err = 0; + + /* + * + */ + if ((tmpdir = getenv("BHYVE_TMPDIR")) == NULL || *tmpdir == '\0' || + (tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0') { + tmpdir = _PATH_TMP; + } + + len = strlen(tmpdir); + + if ((len + sizeof(BHYVE_ASL_TEMPLATE) + 1) < MAXPATHLEN) { + strcpy(basl_template, tmpdir); + while (len > 0 && basl_template[len - 1] == '/') + len--; + basl_template[len] = '/'; + strcpy(&basl_template[len + 1], BHYVE_ASL_TEMPLATE); + } else + err = E2BIG; + + if (!err) { + /* + * len has been intialized (and maybe adjusted) above + */ + if ((len + sizeof(BHYVE_ASL_TEMPLATE) + 1 + + sizeof(BHYVE_ASL_SUFFIX)) < MAXPATHLEN) { + strcpy(basl_stemplate, tmpdir); + basl_stemplate[len] = '/'; + strcpy(&basl_stemplate[len + 1], BHYVE_ASL_TEMPLATE); + len = strlen(basl_stemplate); + strcpy(&basl_stemplate[len], BHYVE_ASL_SUFFIX); + } else + err = E2BIG; + } + + return (err); +} + +static struct { + int (*wsect)(FILE *fp); + uint64_t offset; +} basl_ftables[] = +{ + { basl_fwrite_rsdp, 0}, + { basl_fwrite_rsdt, RSDT_OFFSET }, + { basl_fwrite_madt, MADT_OFFSET }, + { basl_fwrite_fadt, FADT_OFFSET }, + { basl_fwrite_facs, FACS_OFFSET }, + { basl_fwrite_dsdt, DSDT_OFFSET }, + { NULL } +}; + +int +acpi_build(struct vmctx *ctx, int ncpu, int ioapic) +{ + int err; + int i; + + err = 0; + basl_ncpu = ncpu; + + if (!ioapic) { + fprintf(stderr, "ACPI tables require an ioapic\n"); + return (EINVAL); + } + + /* + * For debug, allow the user to have iasl compiler output sent + * to stdout rather than /dev/null + */ + if (getenv("BHYVE_ACPI_VERBOSE_IASL")) + basl_verbose_iasl = 1; + + /* + * Allow the user to keep the generated ASL files for debugging + * instead of deleting them following use + */ + if (getenv("BHYVE_ACPI_KEEPTMPS")) + basl_keep_temps = 1; + + i = 0; + err = basl_make_templates(); + + /* + * Run through all the ASL files, compiling them and + * copying them into guest memory + */ + while (!err && basl_ftables[i].wsect != NULL) { + err = basl_compile(basl_ftables[i].wsect, + basl_ftables[i].offset); + i++; + } + + return (err); +} diff --git a/usr.sbin/bhyve/acpi.h b/usr.sbin/bhyve/acpi.h new file mode 100644 index 0000000..fec6c9d --- /dev/null +++ b/usr.sbin/bhyve/acpi.h @@ -0,0 +1,34 @@ +/*- + * Copyright (c) 2012 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _ACPI_H_ +#define _ACPI_H_ + +int acpi_build(struct vmctx *ctx, int ncpu, int ioapic); + +#endif /* _ACPI_H_ */ diff --git a/usr.sbin/bhyve/fbsdrun.c b/usr.sbin/bhyve/fbsdrun.c index d8e4a4e..43fa797 100644 --- a/usr.sbin/bhyve/fbsdrun.c +++ b/usr.sbin/bhyve/fbsdrun.c @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include #include "fbsdrun.h" +#include "acpi.h" #include "inout.h" #include "dbgport.h" #include "mem.h" @@ -93,6 +94,8 @@ static int foundcpus; static int strictio; +static int acpi; + static char *lomem_addr; static char *himem_addr; @@ -128,9 +131,10 @@ usage(int code) { fprintf(stderr, - "Usage: %s [-aehBHIP][-g ][-z ][-s ]" + "Usage: %s [-aehABHIP][-g ][-z ][-s ]" "[-S ][-p pincpu][-n ][-m lowmem][-M highmem] \n" " -a: local apic is in XAPIC mode (default is X2APIC)\n" + " -A: create an ACPI table\n" " -g: gdb port (default is %d and 0 means don't open)\n" " -c: # cpus (default 1)\n" " -p: pin vcpu 'n' to host cpu 'pincpu + n'\n" @@ -585,11 +589,14 @@ main(int argc, char *argv[]) guest_ncpus = 1; ioapic = 0; - while ((c = getopt(argc, argv, "abehBHIPxp:g:c:z:s:S:n:m:M:")) != -1) { + while ((c = getopt(argc, argv, "abehABHIPxp:g:c:z:s:S:n:m:M:")) != -1) { switch (c) { case 'a': disable_x2apic = 1; break; + case 'A': + acpi = 1; + break; case 'b': bvmcons = 1; break; @@ -745,6 +752,11 @@ main(int argc, char *argv[]) */ mptable_build(ctx, guest_ncpus, ioapic); + if (acpi) { + error = acpi_build(ctx, guest_ncpus, ioapic); + assert(error == 0); + } + /* * Add CPU 0 */ diff --git a/usr.sbin/bhyve/pmtmr.c b/usr.sbin/bhyve/pmtmr.c new file mode 100644 index 0000000..11e7ae0 --- /dev/null +++ b/usr.sbin/bhyve/pmtmr.c @@ -0,0 +1,101 @@ +/*- + * Copyright (c) 2012 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +#include +#include +#include + +#include "inout.h" + +/* + * The ACPI Power Management timer is a free-running 24- or 32-bit + * timer with a frequency of 3.579545MHz + * + * This implementation will be 32-bits + */ + +#define IO_PMTMR 0x408 /* 4-byte i/o port for the timer */ + +#define PMTMR_FREQ 3579545 /* 3.579545MHz */ + +static uint32_t pmtmr_tscf; +static uint32_t pmtmr_old; +static uint64_t pmtmr_tsc_old; + +static uint32_t +pmtmr_val(void) +{ + uint64_t pmtmr_tsc_new; + uint32_t pmtmr_new; + static int inited = 0; + + if (!inited) { + size_t len; + + inited = 1; + len = sizeof(pmtmr_tscf); + sysctlbyname("machdep.tsc_freq", &pmtmr_tscf, &len, + NULL, 0); + pmtmr_tsc_old = rdtsc(); + pmtmr_old = pmtmr_tsc_old / pmtmr_tscf * PMTMR_FREQ; + return (pmtmr_old); + } + + pmtmr_tsc_new = rdtsc(); + pmtmr_new = (pmtmr_tsc_new - pmtmr_tsc_old) * PMTMR_FREQ / pmtmr_tscf + + pmtmr_old; + pmtmr_old = pmtmr_new; + pmtmr_tsc_old = pmtmr_tsc_new; + + return (pmtmr_old); +} + +static int +pmtmr_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, + uint32_t *eax, void *arg) +{ + assert(in == 1); + + if (bytes != 4) + return (-1); + + *eax = pmtmr_val(); + + return (0); +} + +INOUT_PORT(pmtmr, IO_PMTMR, IOPORT_F_IN, pmtmr_handler); + -- cgit v1.1 From 6f5296796595116674fad5b9f4be7d773f048e22 Mon Sep 17 00:00:00 2001 From: neel Date: Wed, 21 Nov 2012 00:14:03 +0000 Subject: Mask the %eax register properly based on whether the "out" instruction is operating on 1, 2 or 4 bytes. There could be garbage in the unused bytes so zero them off. Obtained from: NetApp --- usr.sbin/bhyve/inout.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/inout.c b/usr.sbin/bhyve/inout.c index bf9c5f7..5f47a89f 100644 --- a/usr.sbin/bhyve/inout.c +++ b/usr.sbin/bhyve/inout.c @@ -74,6 +74,7 @@ emulate_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes, uint32_t *eax, int strict) { int flags; + uint32_t mask; inout_func_t handler; void *arg; @@ -84,6 +85,21 @@ emulate_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes, if (strict && handler == default_inout) return (-1); + if (!in) { + switch (bytes) { + case 1: + mask = 0xff; + break; + case 2: + mask = 0xffff; + break; + default: + mask = 0xffffffff; + break; + } + *eax = *eax & mask; + } + flags = inout_handlers[port].flags; arg = inout_handlers[port].arg; -- cgit v1.1 From 76430d4106c5cb90837c5f728272d9f249e085c4 Mon Sep 17 00:00:00 2001 From: neel Date: Thu, 22 Nov 2012 04:17:32 +0000 Subject: MSI-X does not need to be enabled in the message control register for the guest to access the MSI-x tables. Obtained from: NetApp --- usr.sbin/bhyve/pci_passthru.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/pci_passthru.c b/usr.sbin/bhyve/pci_passthru.c index 94e4416..7ac6e3d 100644 --- a/usr.sbin/bhyve/pci_passthru.c +++ b/usr.sbin/bhyve/pci_passthru.c @@ -672,7 +672,7 @@ passthru_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, sc = pi->pi_arg; - if (pi->pi_msix.enabled && pi->pi_msix.table_bar == baridx) { + if (pi->pi_msix.table_bar == baridx) { msix_table_write(ctx, vcpu, sc, offset, size, value); } else { assert(pi->pi_bar[baridx].type == PCIBAR_IO); @@ -696,7 +696,7 @@ passthru_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, sc = pi->pi_arg; - if (pi->pi_msix.enabled && pi->pi_msix.table_bar == baridx) { + if (pi->pi_msix.table_bar == baridx) { val = msix_table_read(sc, offset, size); } else { assert(pi->pi_bar[baridx].type == PCIBAR_IO); -- cgit v1.1 From 36ab9a2e1ab7d2b1884270275584f989cfd65e2b Mon Sep 17 00:00:00 2001 From: neel Date: Wed, 28 Nov 2012 00:02:17 +0000 Subject: Revamp the x86 instruction emulation in bhyve. On a nested page table fault the hypervisor will: - fetch the instruction using the guest %rip and %cr3 - decode the instruction in 'struct vie' - emulate the instruction in host kernel context for local apic accesses - any other type of mmio access is punted up to user-space (e.g. ioapic) The decoded instruction is passed as collateral to the user-space process that is handling the PAGING exit. The emulation code is fleshed out to include more addressing modes (e.g. SIB) and more types of operands (e.g. imm8). The source code is unified into a single file (vmm_instruction_emul.c) that is compiled into vmm.ko as well as /usr/sbin/bhyve. Reviewed by: grehan Obtained from: NetApp --- usr.sbin/bhyve/Makefile | 5 +- usr.sbin/bhyve/fbsdrun.c | 4 +- usr.sbin/bhyve/instruction_emul.c | 641 -------------------------------------- usr.sbin/bhyve/instruction_emul.h | 36 --- usr.sbin/bhyve/ioapic.c | 1 - usr.sbin/bhyve/mem.c | 51 ++- usr.sbin/bhyve/mem.h | 2 +- usr.sbin/bhyve/pci_passthru.c | 1 - 8 files changed, 44 insertions(+), 697 deletions(-) delete mode 100644 usr.sbin/bhyve/instruction_emul.c delete mode 100644 usr.sbin/bhyve/instruction_emul.h (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile index 9dc7a53..c45b904 100644 --- a/usr.sbin/bhyve/Makefile +++ b/usr.sbin/bhyve/Makefile @@ -7,11 +7,14 @@ PROG= bhyve DEBUG_FLAGS= -g -O0 SRCS= acpi.c atpic.c consport.c dbgport.c elcr.c fbsdrun.c inout.c -SRCS+= instruction_emul.c ioapic.c mem.c mevent.c mptbl.c +SRCS+= ioapic.c mem.c mevent.c mptbl.c SRCS+= pci_emul.c pci_hostbridge.c pci_passthru.c pci_virtio_block.c SRCS+= pci_virtio_net.c pci_uart.c pit_8254.c pmtmr.c post.c rtc.c uart.c SRCS+= xmsr.c spinup_ap.c +.PATH: ${.CURDIR}/../../sys/amd64/vmm +SRCS+= vmm_instruction_emul.c + NO_MAN= DPADD= ${LIBVMMAPI} ${LIBMD} ${LIBPTHREAD} diff --git a/usr.sbin/bhyve/fbsdrun.c b/usr.sbin/bhyve/fbsdrun.c index 43fa797..b1c7098 100644 --- a/usr.sbin/bhyve/fbsdrun.c +++ b/usr.sbin/bhyve/fbsdrun.c @@ -57,7 +57,6 @@ __FBSDID("$FreeBSD$"); #include "mptbl.h" #include "pci_emul.h" #include "xmsr.h" -#include "instruction_emul.h" #include "ioapic.h" #include "spinup_ap.h" @@ -455,7 +454,8 @@ vmexit_paging(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) stats.vmexit_paging++; err = emulate_mem(ctx, *pvcpu, vmexit->u.paging.gpa, vmexit->rip, - vmexit->u.paging.cr3, vmexit->u.paging.rwx); + vmexit->u.paging.cr3, vmexit->u.paging.rwx, + &vmexit->u.paging.vie); if (err) { if (err == EINVAL) { diff --git a/usr.sbin/bhyve/instruction_emul.c b/usr.sbin/bhyve/instruction_emul.c deleted file mode 100644 index 78c3608..0000000 --- a/usr.sbin/bhyve/instruction_emul.c +++ /dev/null @@ -1,641 +0,0 @@ -/*- - * Copyright (c) 2012 Sandvine, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include -#include -#include -#include -#include - -#include "fbsdrun.h" -#include "mem.h" -#include "instruction_emul.h" - -#define PREFIX_LOCK 0xF0 -#define PREFIX_REPNE 0xF2 -#define PREFIX_REPE 0xF3 -#define PREFIX_CS_OVERRIDE 0x2E -#define PREFIX_SS_OVERRIDE 0x36 -#define PREFIX_DS_OVERRIDE 0x3E -#define PREFIX_ES_OVERRIDE 0x26 -#define PREFIX_FS_OVERRIDE 0x64 -#define PREFIX_GS_OVERRIDE 0x65 -#define PREFIX_BRANCH_NOT_TAKEN 0x2E -#define PREFIX_BRANCH_TAKEN 0x3E -#define PREFIX_OPSIZE 0x66 -#define is_opsz_prefix(x) ((x) == PREFIX_OPSIZE) -#define PREFIX_ADDRSIZE 0x67 - -#define OPCODE_2BYTE_ESCAPE 0x0F -#define OPCODE_3BYTE_ESCAPE 0x38 - -#define MODRM_MOD_MASK 0xC0 -#define MODRM_MOD_SHIFT 6 -#define MODRM_RM_MASK 0x07 -#define MODRM_RM_SHIFT 0 -#define MODRM_REG_MASK 0x38 -#define MODRM_REG_SHIFT 3 - -#define MOD_INDIRECT 0x0 -#define MOD_INDIRECT_DISP8 0x1 -#define MOD_INDIRECT_DISP32 0x2 -#define MOD_DIRECT 0x3 - -#define RM_EAX 0x0 -#define RM_ECX 0x1 -#define RM_EDX 0x2 -#define RM_EBX 0x3 -#define RM_SIB 0x4 -#define RM_DISP32 0x5 -#define RM_EBP RM_DISP32 -#define RM_ESI 0x6 -#define RM_EDI 0x7 - -#define REG_EAX 0x0 -#define REG_ECX 0x1 -#define REG_EDX 0x2 -#define REG_EBX 0x3 -#define REG_ESP 0x4 -#define REG_EBP 0x5 -#define REG_ESI 0x6 -#define REG_EDI 0x7 -#define REG_R8 0x8 -#define REG_R9 0x9 -#define REG_R10 0xA -#define REG_R11 0xB -#define REG_R12 0xC -#define REG_R13 0xD -#define REG_R14 0xE -#define REG_R15 0xF - -#define HAS_MODRM 1 -#define FROM_RM (1<<1) -#define FROM_REG (1<<2) -#define TO_RM (1<<3) -#define TO_REG (1<<4) -#define ZEXT (1<<5) -#define FROM_8 (1<<6) -#define FROM_16 (1<<7) -#define TO_8 (1<<8) -#define TO_16 (1<<9) - -#define REX_MASK 0xF0 -#define REX_PREFIX 0x40 -#define is_rex_prefix(x) ( ((x) & REX_MASK) == REX_PREFIX ) -#define REX_W_MASK 0x8 -#define REX_R_MASK 0x4 -#define REX_X_MASK 0x2 -#define REX_B_MASK 0x1 - -#define is_prefix(x) ((x) == PREFIX_LOCK || (x) == PREFIX_REPNE || \ - (x) == PREFIX_REPE || (x) == PREFIX_CS_OVERRIDE || \ - (x) == PREFIX_SS_OVERRIDE || (x) == PREFIX_DS_OVERRIDE || \ - (x) == PREFIX_ES_OVERRIDE || (x) == PREFIX_FS_OVERRIDE || \ - (x) == PREFIX_GS_OVERRIDE || (x) == PREFIX_BRANCH_NOT_TAKEN || \ - (x) == PREFIX_BRANCH_TAKEN || (x) == PREFIX_OPSIZE || \ - (x) == PREFIX_ADDRSIZE || is_rex_prefix((x))) - -#define PAGE_FRAME_MASK 0x80 -#define PAGE_OFFSET_MASK 0xFFF -#define PAGE_TABLE_ENTRY_MASK (~PAGE_OFFSET_MASK) -#define PML4E_OFFSET_MASK 0x0000FF8000000000 -#define PML4E_SHIFT 39 - -#define INSTR_VERIFY - -struct decoded_instruction -{ - void *instruction; - uint8_t *opcode; - uint8_t *modrm; - uint8_t *sib; - uint8_t *displacement; - uint8_t *immediate; - - uint16_t opcode_flags; - - uint8_t addressing_mode; - uint8_t rm; - uint8_t reg; - uint8_t opsz; - uint8_t rex_r; - uint8_t rex_w; - uint8_t rex_b; - uint8_t rex_x; - - int32_t disp; -}; - -static enum vm_reg_name vm_reg_name_mappings[] = { - [REG_EAX] = VM_REG_GUEST_RAX, - [REG_EBX] = VM_REG_GUEST_RBX, - [REG_ECX] = VM_REG_GUEST_RCX, - [REG_EDX] = VM_REG_GUEST_RDX, - [REG_ESP] = VM_REG_GUEST_RSP, - [REG_EBP] = VM_REG_GUEST_RBP, - [REG_ESI] = VM_REG_GUEST_RSI, - [REG_EDI] = VM_REG_GUEST_RDI, - [REG_R8] = VM_REG_GUEST_R8, - [REG_R9] = VM_REG_GUEST_R9, - [REG_R10] = VM_REG_GUEST_R10, - [REG_R11] = VM_REG_GUEST_R11, - [REG_R12] = VM_REG_GUEST_R12, - [REG_R13] = VM_REG_GUEST_R13, - [REG_R14] = VM_REG_GUEST_R14, - [REG_R15] = VM_REG_GUEST_R15 -}; - -uint16_t one_byte_opcodes[256] = { - [0x88] = HAS_MODRM | FROM_REG | TO_RM | TO_8 | FROM_8, - [0x89] = HAS_MODRM | FROM_REG | TO_RM, - [0x8B] = HAS_MODRM | FROM_RM | TO_REG, -}; - -uint16_t two_byte_opcodes[256] = { - [0xB6] = HAS_MODRM | FROM_RM | TO_REG | ZEXT | FROM_8, - [0xB7] = HAS_MODRM | FROM_RM | TO_REG | ZEXT | FROM_16, -}; - -static uintptr_t -gla2gpa(uint64_t gla, uint64_t guest_cr3) -{ - uint64_t *table; - uint64_t mask, entry; - int level, shift; - uintptr_t page_frame; - - table = paddr_guest2host(guest_cr3 & PAGE_TABLE_ENTRY_MASK); - mask = PML4E_OFFSET_MASK; - shift = PML4E_SHIFT; - for (level = 0; level < 4; ++level) - { - entry = table[(gla & mask) >> shift]; - table = (uint64_t*)(entry & PAGE_TABLE_ENTRY_MASK); - - /* This entry does not point to another page table */ - if (entry & PAGE_FRAME_MASK || level >= 3) - break; - - table = paddr_guest2host((uintptr_t)table); - mask >>= 9; - shift -= 9; - } - - mask = (1 << shift) - 1; - page_frame = ((uintptr_t)table & ~mask); - return (page_frame | (gla & mask)); -} - -static void * -gla2hla(uint64_t gla, uint64_t guest_cr3) -{ - uintptr_t gpa; - - gpa = gla2gpa(gla, guest_cr3); - - return (paddr_guest2host(gpa)); -} - -/* - * Decodes all of the prefixes of the instruction. Only a subset of REX - * prefixes are currently supported. If any unsupported prefix is - * encountered, returns -1. - */ -static int -decode_prefixes(struct decoded_instruction *decoded) -{ - uint8_t *current_prefix; - - current_prefix = decoded->instruction; - - if (is_rex_prefix(*current_prefix)) { - decoded->rex_w = *current_prefix & REX_W_MASK; - decoded->rex_r = *current_prefix & REX_R_MASK; - decoded->rex_x = *current_prefix & REX_X_MASK; - decoded->rex_b = *current_prefix & REX_B_MASK; - current_prefix++; - } else if (is_opsz_prefix(*current_prefix)) { - decoded->opsz = 1; - current_prefix++; - } else if (is_prefix(*current_prefix)) { - return (-1); - } - - decoded->opcode = current_prefix; - return (0); -} - -/* - * Decodes the instruction's opcode. If the opcode is not understood, returns - * -1 indicating an error. Sets the instruction's mod_rm pointer to the - * location of the ModR/M field. - */ -static int -decode_opcode(struct decoded_instruction *decoded) -{ - uint8_t opcode; - uint16_t flags; - int extra; - - opcode = *decoded->opcode; - extra = 0; - - if (opcode != 0xf) - flags = one_byte_opcodes[opcode]; - else { - opcode = *(decoded->opcode + 1); - flags = two_byte_opcodes[opcode]; - extra = 1; - } - - if (!flags) - return (-1); - - if (flags & HAS_MODRM) { - decoded->modrm = decoded->opcode + 1 + extra; - } - - decoded->opcode_flags = flags; - - return (0); -} - -/* - * Decodes the instruction's ModR/M field. Sets the instruction's sib pointer - * to the location of the SIB if one is expected to be present, or 0 if not. - */ -static int -decode_mod_rm(struct decoded_instruction *decoded) -{ - uint8_t modrm; - uint8_t *extension_operands; - - if (decoded->modrm) { - modrm = *decoded->modrm; - - decoded->addressing_mode = (modrm & MODRM_MOD_MASK) >> MODRM_MOD_SHIFT; - decoded->rm = (modrm & MODRM_RM_MASK) >> MODRM_RM_SHIFT; - decoded->reg = (modrm & MODRM_REG_MASK) >> MODRM_REG_SHIFT; - - if (decoded->rex_b) - decoded->rm |= (1<<3); - - if (decoded->rex_r) - decoded->reg |= (1<<3); - - extension_operands = decoded->modrm + 1; - - if (decoded->rm == RM_SIB) { - decoded->sib = decoded->modrm + 1; - extension_operands = decoded->sib + 1; - } - - switch (decoded->addressing_mode) { - case MOD_INDIRECT: - case MOD_DIRECT: - decoded->displacement = 0; - break; - case MOD_INDIRECT_DISP8: - decoded->displacement = extension_operands; - break; - case MOD_INDIRECT_DISP32: - decoded->displacement = extension_operands; - break; - } - } - - return (0); -} - -/* - * Decodes the instruction's SIB field. No such instructions are currently - * supported, so do nothing and return -1 if there is a SIB field, 0 otherwise. - */ -static int -decode_sib(struct decoded_instruction *decoded) -{ - - if (decoded->sib) - return (-1); - - return (0); -} - -/* - * Grabs and saves the instruction's immediate operand and displacement if - * they are present. Immediates are not currently supported, so if an - * immediate is present it will return -1 indicating an error. - */ -static int -decode_extension_operands(struct decoded_instruction *decoded) -{ - - if (decoded->displacement) { - if (decoded->addressing_mode == MOD_INDIRECT_DISP8) { - decoded->disp = *((int8_t *)decoded->displacement); - } else if (decoded->addressing_mode == MOD_INDIRECT_DISP32) { - decoded->disp = *((int32_t *)decoded->displacement); - } - } - - if (decoded->immediate) { - return (-1); - } - - return (0); -} - -static int -decode_instruction(void *instr, struct decoded_instruction *decoded) -{ - int error; - - bzero(decoded, sizeof(*decoded)); - decoded->instruction = instr; - - error = decode_prefixes(decoded); - if (error) - return (error); - - error = decode_opcode(decoded); - if (error) - return (error); - - error = decode_mod_rm(decoded); - if (error) - return (error); - - error = decode_sib(decoded); - if (error) - return (error); - - error = decode_extension_operands(decoded); - if (error) - return (error); - - return (0); -} - -static enum vm_reg_name -get_vm_reg_name(uint8_t reg) -{ - - return (vm_reg_name_mappings[reg]); -} - -static uint64_t -adjust_operand(const struct decoded_instruction *instruction, uint64_t val, - int size) -{ - uint64_t ret; - - if (instruction->opcode_flags & ZEXT) { - switch (size) { - case 1: - ret = val & 0xff; - break; - case 2: - ret = val & 0xffff; - break; - case 4: - ret = val & 0xffffffff; - break; - case 8: - ret = val; - break; - default: - break; - } - } else { - /* - * Extend the sign - */ - switch (size) { - case 1: - ret = (int8_t)(val & 0xff); - break; - case 2: - ret = (int16_t)(val & 0xffff); - break; - case 4: - ret = (int32_t)(val & 0xffffffff); - break; - case 8: - ret = val; - break; - default: - break; - } - } - - return (ret); -} - -static int -get_operand(struct vmctx *vm, int vcpu, uint64_t gpa, uint64_t guest_cr3, - const struct decoded_instruction *instruction, uint64_t *operand, - struct mem_range *mr) -{ - enum vm_reg_name regname; - uint64_t reg; - int error; - uint8_t rm, addressing_mode, size; - - if (instruction->opcode_flags & FROM_RM) { - rm = instruction->rm; - addressing_mode = instruction->addressing_mode; - } else if (instruction->opcode_flags & FROM_REG) { - rm = instruction->reg; - addressing_mode = MOD_DIRECT; - } else - return (-1); - - /* - * Determine size of operand - */ - size = 4; - if (instruction->opcode_flags & FROM_8) { - size = 1; - } else if (instruction->opcode_flags & FROM_16 || - instruction->opsz) { - size = 2; - } - - regname = get_vm_reg_name(rm); - error = vm_get_register(vm, vcpu, regname, ®); - if (error) - return (error); - - switch (addressing_mode) { - case MOD_DIRECT: - *operand = reg; - error = 0; - break; - case MOD_INDIRECT: - case MOD_INDIRECT_DISP8: - case MOD_INDIRECT_DISP32: -#ifdef INSTR_VERIFY - { - uintptr_t target; - - target = gla2gpa(reg, guest_cr3); - target += instruction->disp; - assert(gpa == target); - } -#endif - error = (*mr->handler)(vm, vcpu, MEM_F_READ, gpa, size, - operand, mr->arg1, mr->arg2); - break; - default: - return (-1); - } - - if (!error) - *operand = adjust_operand(instruction, *operand, size); - - return (error); -} - -static uint64_t -adjust_write(uint64_t reg, uint64_t operand, int size) -{ - uint64_t val; - - switch (size) { - case 1: - val = (reg & ~0xff) | (operand & 0xff); - break; - case 2: - val = (reg & ~0xffff) | (operand & 0xffff); - break; - case 4: - val = (reg & ~0xffffffff) | (operand & 0xffffffff); - break; - case 8: - val = operand; - default: - break; - } - - return (val); -} - -static int -perform_write(struct vmctx *vm, int vcpu, uint64_t gpa, uint64_t guest_cr3, - const struct decoded_instruction *instruction, uint64_t operand, - struct mem_range *mr) -{ - enum vm_reg_name regname; - uintptr_t target; - int error; - int size; - uint64_t reg; - uint8_t addressing_mode; - - if (instruction->opcode_flags & TO_RM) { - reg = instruction->rm; - addressing_mode = instruction->addressing_mode; - } else if (instruction->opcode_flags & TO_REG) { - reg = instruction->reg; - addressing_mode = MOD_DIRECT; - } else - return (-1); - - /* - * Determine the operand size. rex.w has priority - */ - size = 4; - if (instruction->rex_w) { - size = 8; - } else if (instruction->opcode_flags & TO_8) { - size = 1; - } else if (instruction->opsz) { - size = 2; - }; - - switch(addressing_mode) { - case MOD_DIRECT: - regname = get_vm_reg_name(reg); - error = vm_get_register(vm, vcpu, regname, ®); - if (error) - return (error); - operand = adjust_write(reg, operand, size); - - return (vm_set_register(vm, vcpu, regname, operand)); - case MOD_INDIRECT: - case MOD_INDIRECT_DISP8: - case MOD_INDIRECT_DISP32: -#ifdef INSTR_VERIFY - regname = get_vm_reg_name(reg); - error = vm_get_register(vm, vcpu, regname, ®); - assert(!error); - target = gla2gpa(reg, guest_cr3); - target += instruction->disp; - assert(gpa == target); -#endif - error = (*mr->handler)(vm, vcpu, MEM_F_WRITE, gpa, size, - &operand, mr->arg1, mr->arg2); - return (error); - default: - return (-1); - } -} - -static int -emulate_decoded_instruction(struct vmctx *vm, int vcpu, uint64_t gpa, - uint64_t cr3, - const struct decoded_instruction *instruction, - struct mem_range *mr) -{ - uint64_t operand; - int error; - - error = get_operand(vm, vcpu, gpa, cr3, instruction, &operand, mr); - if (error) - return (error); - - return perform_write(vm, vcpu, gpa, cr3, instruction, operand, mr); -} - -int -emulate_instruction(struct vmctx *vm, int vcpu, uint64_t rip, uint64_t cr3, - uint64_t gpa, int flags, struct mem_range *mr) -{ - struct decoded_instruction instr; - int error; - void *instruction; - - instruction = gla2hla(rip, cr3); - - error = decode_instruction(instruction, &instr); - if (!error) - error = emulate_decoded_instruction(vm, vcpu, gpa, cr3, - &instr, mr); - - return (error); -} diff --git a/usr.sbin/bhyve/instruction_emul.h b/usr.sbin/bhyve/instruction_emul.h deleted file mode 100644 index ef85796..0000000 --- a/usr.sbin/bhyve/instruction_emul.h +++ /dev/null @@ -1,36 +0,0 @@ -/*- - * Copyright (c) 2012 Sandvine, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _INSTRUCTION_EMUL_H_ -#define _INSTRUCTION_EMUL_H_ - -int emulate_instruction(struct vmctx *vm, int vcpu, uint64_t rip, - uint64_t cr3, uint64_t gpa, int flags, - struct mem_range *mr); - -#endif diff --git a/usr.sbin/bhyve/ioapic.c b/usr.sbin/bhyve/ioapic.c index ea6e47c..47dd833 100644 --- a/usr.sbin/bhyve/ioapic.c +++ b/usr.sbin/bhyve/ioapic.c @@ -42,7 +42,6 @@ __FBSDID("$FreeBSD$"); #include "inout.h" #include "mem.h" -#include "instruction_emul.h" #include "fbsdrun.h" #include diff --git a/usr.sbin/bhyve/mem.c b/usr.sbin/bhyve/mem.c index deb91dc..dc43ff4 100644 --- a/usr.sbin/bhyve/mem.c +++ b/usr.sbin/bhyve/mem.c @@ -51,7 +51,6 @@ __FBSDID("$FreeBSD$"); #include #include "mem.h" -#include "instruction_emul.h" struct mmio_rb_range { RB_ENTRY(mmio_rb_range) mr_link; /* RB tree links */ @@ -134,33 +133,57 @@ mmio_rb_dump(void) RB_GENERATE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare); +static int +mem_read(void *ctx, int vcpu, uint64_t gpa, uint64_t *rval, int size, void *arg) +{ + int error; + struct mem_range *mr = arg; + + error = (*mr->handler)(ctx, vcpu, MEM_F_READ, gpa, size, + rval, mr->arg1, mr->arg2); + return (error); +} + +static int +mem_write(void *ctx, int vcpu, uint64_t gpa, uint64_t wval, int size, void *arg) +{ + int error; + struct mem_range *mr = arg; + + error = (*mr->handler)(ctx, vcpu, MEM_F_WRITE, gpa, size, + &wval, mr->arg1, mr->arg2); + return (error); +} + int emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, uint64_t rip, - uint64_t cr3, int mode) + uint64_t cr3, int mode, struct vie *vie) { struct mmio_rb_range *entry; int err; - err = 0; - /* * First check the per-vCPU cache */ if (mmio_hint[vcpu] && paddr >= mmio_hint[vcpu]->mr_base && paddr <= mmio_hint[vcpu]->mr_end) { - err = emulate_instruction(ctx, vcpu, rip, cr3, paddr, mode, - &mmio_hint[vcpu]->mr_param); - } else { - if (mmio_rb_lookup(paddr, &entry)) { - err = ENOENT; - } else { - mmio_hint[vcpu] = entry; - err = emulate_instruction(ctx, vcpu, rip, cr3, paddr, - mode, &entry->mr_param); - } + entry = mmio_hint[vcpu]; + } else + entry = NULL; + + if (entry == NULL) { + if (mmio_rb_lookup(paddr, &entry)) + return (ESRCH); + + /* Update the per-vCPU cache */ + mmio_hint[vcpu] = entry; } + assert(entry != NULL && entry == mmio_hint[vcpu]); + + err = vmm_emulate_instruction(ctx, vcpu, paddr, vie, + mem_read, mem_write, &entry->mr_param); return (err); } diff --git a/usr.sbin/bhyve/mem.h b/usr.sbin/bhyve/mem.h index 53c4f72..b34c1fd 100644 --- a/usr.sbin/bhyve/mem.h +++ b/usr.sbin/bhyve/mem.h @@ -51,7 +51,7 @@ struct mem_range { void init_mem(void); int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, uint64_t rip, - uint64_t cr3, int mode); + uint64_t cr3, int mode, struct vie *vie); int register_mem(struct mem_range *memp); diff --git a/usr.sbin/bhyve/pci_passthru.c b/usr.sbin/bhyve/pci_passthru.c index 7ac6e3d..28abb6b 100644 --- a/usr.sbin/bhyve/pci_passthru.c +++ b/usr.sbin/bhyve/pci_passthru.c @@ -48,7 +48,6 @@ __FBSDID("$FreeBSD$"); #include #include "pci_emul.h" #include "mem.h" -#include "instruction_emul.h" #ifndef _PATH_DEVPCI #define _PATH_DEVPCI "/dev/pci" -- cgit v1.1 From da4e87dfd614fffb88e5a93c988e1caec9c9efe7 Mon Sep 17 00:00:00 2001 From: neel Date: Wed, 28 Nov 2012 13:34:44 +0000 Subject: Cleanup the user-space paging exit handler now that the unified instruction emulation is in place. Obtained from: NetApp --- usr.sbin/bhyve/fbsdrun.c | 3 +-- usr.sbin/bhyve/mem.c | 3 +-- usr.sbin/bhyve/mem.h | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/fbsdrun.c b/usr.sbin/bhyve/fbsdrun.c index b1c7098..06e043c 100644 --- a/usr.sbin/bhyve/fbsdrun.c +++ b/usr.sbin/bhyve/fbsdrun.c @@ -453,8 +453,7 @@ vmexit_paging(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) int err; stats.vmexit_paging++; - err = emulate_mem(ctx, *pvcpu, vmexit->u.paging.gpa, vmexit->rip, - vmexit->u.paging.cr3, vmexit->u.paging.rwx, + err = emulate_mem(ctx, *pvcpu, vmexit->u.paging.gpa, &vmexit->u.paging.vie); if (err) { diff --git a/usr.sbin/bhyve/mem.c b/usr.sbin/bhyve/mem.c index dc43ff4..27f4782 100644 --- a/usr.sbin/bhyve/mem.c +++ b/usr.sbin/bhyve/mem.c @@ -156,8 +156,7 @@ mem_write(void *ctx, int vcpu, uint64_t gpa, uint64_t wval, int size, void *arg) } int -emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, uint64_t rip, - uint64_t cr3, int mode, struct vie *vie) +emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie) { struct mmio_rb_range *entry; int err; diff --git a/usr.sbin/bhyve/mem.h b/usr.sbin/bhyve/mem.h index b34c1fd..88fafe1 100644 --- a/usr.sbin/bhyve/mem.h +++ b/usr.sbin/bhyve/mem.h @@ -50,8 +50,7 @@ struct mem_range { #define MEM_F_RW 0x3 void init_mem(void); -int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, uint64_t rip, - uint64_t cr3, int mode, struct vie *vie); +int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, struct vie *vie); int register_mem(struct mem_range *memp); -- cgit v1.1 From d45d8a8d668a9ee12073939bf775c404be8cf175 Mon Sep 17 00:00:00 2001 From: grehan Date: Fri, 30 Nov 2012 07:00:14 +0000 Subject: - Add in an XSDT to stop acpidump from exiting with a 'XSDT corrupted' error - Fix up OEMID/OEM Table ID string padding in the DSDT. Output on a verbose boot now looks like ... ACPI: RSDP 0xf0400 00024 (v02 BHYVE ) ACPI: XSDT 0xf0480 00034 (v01 BHYVE BVXSDT 00000001 INTL 20120320) ACPI: APIC 0xf0500 0004A (v01 BHYVE BVMADT 00000001 INTL 20120320) ACPI: FACP 0xf0600 0010C (v05 BHYVE BVFACP 00000001 INTL 20120320) ACPI: DSDT 0xf0800 000F2 (v02 BHYVE BVDSDT 00000001 INTL 20120320) ACPI: FACS 0xf0780 00040 ... Obtained from: NetApp --- usr.sbin/bhyve/acpi.c | 56 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 8 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/acpi.c b/usr.sbin/bhyve/acpi.c index 1fb553e..1671550 100644 --- a/usr.sbin/bhyve/acpi.c +++ b/usr.sbin/bhyve/acpi.c @@ -40,11 +40,12 @@ * Layout * ------ * RSDP -> 0xf0400 (36 bytes fixed) - * RSDT -> 0xf0440 (36 bytes + 4*N table addrs, 2 used) - * MADT -> 0xf04a0 (depends on #CPUs) - * FADT -> 0xf0600 (268 bytes) - * FACS -> 0xf0780 (64 bytes) - * DSDT -> 0xf0800 (variable - can go up to 0x100000) + * RSDT -> 0xf0440 (36 bytes + 4*N table addrs, 2 used) + * XSDT -> 0xf0480 (36 bytes + 8*N table addrs, 2 used) + * MADT -> 0xf0500 (depends on #CPUs) + * FADT -> 0xf0600 (268 bytes) + * FACS -> 0xf0780 (64 bytes) + * DSDT -> 0xf0800 (variable - can go up to 0x100000) */ #include @@ -69,7 +70,8 @@ __FBSDID("$FreeBSD$"); */ #define BHYVE_ACPI_BASE 0xf0400 #define RSDT_OFFSET 0x040 -#define MADT_OFFSET 0x0a0 +#define XSDT_OFFSET 0x080 +#define MADT_OFFSET 0x100 #define FADT_OFFSET 0x200 #define FACS_OFFSET 0x380 #define DSDT_OFFSET 0x400 @@ -121,7 +123,8 @@ basl_fwrite_rsdp(FILE *fp) EFPRINTF(fp, "[0004]\t\tRSDT Address : %08X\n", basl_acpi_base + RSDT_OFFSET); EFPRINTF(fp, "[0004]\t\tLength : 00000024\n"); - EFPRINTF(fp, "[0008]\t\tXSDT Address : 0000000000000000\n"); + EFPRINTF(fp, "[0008]\t\tXSDT Address : 00000000%08X\n", + basl_acpi_base + XSDT_OFFSET); EFPRINTF(fp, "[0001]\t\tExtended Checksum : 00\n"); EFPRINTF(fp, "[0003]\t\tReserved : 000000\n"); @@ -170,6 +173,42 @@ err_exit: } static int +basl_fwrite_xsdt(FILE *fp) +{ + int err; + + err = 0; + + EFPRINTF(fp, "/*\n"); + EFPRINTF(fp, " * bhyve XSDT template\n"); + EFPRINTF(fp, " */\n"); + EFPRINTF(fp, "[0004]\t\tSignature : \"XSDT\"\n"); + EFPRINTF(fp, "[0004]\t\tTable Length : 00000000\n"); + EFPRINTF(fp, "[0001]\t\tRevision : 01\n"); + EFPRINTF(fp, "[0001]\t\tChecksum : 00\n"); + EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n"); + EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVXSDT \"\n"); + EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n"); + /* iasl will fill in the compiler ID/revision fields */ + EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n"); + EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n"); + EFPRINTF(fp, "\n"); + + /* Add in pointers to the MADT and FADT */ + EFPRINTF(fp, "[0004]\t\tACPI Table Address 0 : 00000000%08X\n", + basl_acpi_base + MADT_OFFSET); + EFPRINTF(fp, "[0004]\t\tACPI Table Address 1 : 00000000%08X\n", + basl_acpi_base + FADT_OFFSET); + + EFFLUSH(fp); + + return (0); + +err_exit: + return (errno); +} + +static int basl_fwrite_madt(FILE *fp) { int err; @@ -491,7 +530,7 @@ basl_fwrite_dsdt(FILE *fp) EFPRINTF(fp, " * bhyve DSDT template\n"); EFPRINTF(fp, " */\n"); EFPRINTF(fp, "DefinitionBlock (\"bhyve_dsdt.aml\", \"DSDT\", 2," - "\"BHYV\", \"BVDSDT\", 0x00000001)\n"); + "\"BHYVE \", \"BVDSDT \", 0x00000001)\n"); EFPRINTF(fp, "{\n"); EFPRINTF(fp, " Scope (_SB)\n"); EFPRINTF(fp, " {\n"); @@ -752,6 +791,7 @@ static struct { { { basl_fwrite_rsdp, 0}, { basl_fwrite_rsdt, RSDT_OFFSET }, + { basl_fwrite_xsdt, XSDT_OFFSET }, { basl_fwrite_madt, MADT_OFFSET }, { basl_fwrite_fadt, FADT_OFFSET }, { basl_fwrite_facs, FACS_OFFSET }, -- cgit v1.1 From ad362d1440f4ef139f2108cda83df3bded25f5f7 Mon Sep 17 00:00:00 2001 From: grehan Date: Sat, 8 Dec 2012 02:37:18 +0000 Subject: Determine the correct length and sector size for raw devices. Obtained from: NetApp Tested by: Michael Dexter with iscsi LUNs --- usr.sbin/bhyve/pci_virtio_block.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/pci_virtio_block.c b/usr.sbin/bhyve/pci_virtio_block.c index 916c7c3..8c98615 100644 --- a/usr.sbin/bhyve/pci_virtio_block.c +++ b/usr.sbin/bhyve/pci_virtio_block.c @@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -331,7 +332,9 @@ pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) { struct stat sbuf; struct pci_vtblk_softc *sc; + off_t size; int fd; + int sectsz; if (opts == NULL) { printf("virtio-block: backing device required\n"); @@ -359,7 +362,23 @@ pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) close(fd); return (1); } - + + /* + * Deal with raw devices + */ + size = sbuf.st_size; + sectsz = DEV_BSIZE; + if (S_ISCHR(sbuf.st_mode)) { + if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || + ioctl(fd, DIOCGSECTORSIZE, §sz)) { + perror("Could not fetch dev blk/sector size"); + close(fd); + return (1); + } + assert(size != 0); + assert(sectsz != 0); + } + sc = malloc(sizeof(struct pci_vtblk_softc)); memset(sc, 0, sizeof(struct pci_vtblk_softc)); @@ -368,9 +387,9 @@ pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) sc->vbsc_fd = fd; /* setup virtio block config space */ - sc->vbsc_cfg.vbc_capacity = sbuf.st_size / DEV_BSIZE; + sc->vbsc_cfg.vbc_capacity = size / sectsz; sc->vbsc_cfg.vbc_seg_max = VTBLK_MAXSEGS; - sc->vbsc_cfg.vbc_blk_size = DEV_BSIZE; + sc->vbsc_cfg.vbc_blk_size = sectsz; sc->vbsc_cfg.vbc_size_max = 0; /* not negotiated */ sc->vbsc_cfg.vbc_geom_c = 0; /* no geometry */ sc->vbsc_cfg.vbc_geom_h = 0; -- cgit v1.1 From da369593398d675703857dee838cef0fe00d4082 Mon Sep 17 00:00:00 2001 From: grehan Date: Wed, 12 Dec 2012 19:25:48 +0000 Subject: Create unique MAC addresses for virtio devices that are created with non-zero PCI function numbers. Remove obsolete reference to CFE. Obtained from: NetApp --- usr.sbin/bhyve/pci_virtio_net.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/pci_virtio_net.c b/usr.sbin/bhyve/pci_virtio_net.c index e3ef573..be848c6 100644 --- a/usr.sbin/bhyve/pci_virtio_net.c +++ b/usr.sbin/bhyve/pci_virtio_net.c @@ -548,14 +548,15 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) /* * The MAC address is the standard NetApp OUI of 00-a0-98, - * followed by an MD5 of the vm name. The slot number is - * prepended to this for slots other than 1, so that - * CFE can netboot from the equivalent of slot 1. + * followed by an MD5 of the vm name. The slot/func number is + * prepended to this for slots other than 1:0, so that + * a bootloader can netboot from the equivalent of slot 1. */ - if (pi->pi_slot == 1) { + if (pi->pi_slot == 1 && pi->pi_func == 0) { strncpy(nstr, vmname, sizeof(nstr)); } else { - snprintf(nstr, sizeof(nstr), "%d-%s", pi->pi_slot, vmname); + snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot, + pi->pi_func, vmname); } MD5Init(&mdctx); -- cgit v1.1 From 56990e914687fa80fb72b9d4dbbfc174ede6bc6d Mon Sep 17 00:00:00 2001 From: grehan Date: Wed, 12 Dec 2012 19:45:36 +0000 Subject: Properly reset the tx/rx rings when a guest requests a device reset. Obtained from: NetApp --- usr.sbin/bhyve/pci_virtio_net.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/pci_virtio_net.c b/usr.sbin/bhyve/pci_virtio_net.c index be848c6..9e7a1c7 100644 --- a/usr.sbin/bhyve/pci_virtio_net.c +++ b/usr.sbin/bhyve/pci_virtio_net.c @@ -178,10 +178,29 @@ pci_vtnet_qsize(int qnum) } static void +pci_vtnet_ring_reset(struct pci_vtnet_softc *sc, int ring) +{ + struct vring_hqueue *hq; + + assert(ring < VTNET_MAXQ); + + hq = &sc->vsc_hq[ring]; + + /* + * Reset all soft state + */ + hq->hq_cur_aidx = 0; +} + +static void pci_vtnet_update_status(struct pci_vtnet_softc *sc, uint32_t value) { + if (value == 0) { DPRINTF(("vtnet: device reset requested !\n")); + pci_vtnet_ring_reset(sc, VTNET_RXQ); + pci_vtnet_ring_reset(sc, VTNET_TXQ); + sc->vsc_rx_ready = 0; } sc->vsc_status = value; -- cgit v1.1 From 70b50f16469543eddc3a9958b4a973bec1e41f52 Mon Sep 17 00:00:00 2001 From: grehan Date: Thu, 13 Dec 2012 01:58:11 +0000 Subject: Rename fbsdrun.* -> bhyverun.* bhyve is intended to be a generic hypervisor, and not FreeBSD-specific. (renaming internal routines will come later) Reviewed by: neel Obtained from: NetApp --- usr.sbin/bhyve/Makefile | 2 +- usr.sbin/bhyve/acpi.c | 2 +- usr.sbin/bhyve/bhyverun.c | 770 ++++++++++++++++++++++++++++++++++++++ usr.sbin/bhyve/bhyverun.h | 53 +++ usr.sbin/bhyve/fbsdrun.c | 770 -------------------------------------- usr.sbin/bhyve/fbsdrun.h | 53 --- usr.sbin/bhyve/ioapic.c | 2 +- usr.sbin/bhyve/mptbl.c | 2 +- usr.sbin/bhyve/pci_emul.c | 2 +- usr.sbin/bhyve/pci_uart.c | 2 +- usr.sbin/bhyve/pci_virtio_block.c | 2 +- usr.sbin/bhyve/pci_virtio_net.c | 2 +- usr.sbin/bhyve/pit_8254.c | 2 +- usr.sbin/bhyve/spinup_ap.c | 2 +- 14 files changed, 833 insertions(+), 833 deletions(-) create mode 100644 usr.sbin/bhyve/bhyverun.c create mode 100644 usr.sbin/bhyve/bhyverun.h delete mode 100644 usr.sbin/bhyve/fbsdrun.c delete mode 100644 usr.sbin/bhyve/fbsdrun.h (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile index c45b904..078ef9a 100644 --- a/usr.sbin/bhyve/Makefile +++ b/usr.sbin/bhyve/Makefile @@ -6,7 +6,7 @@ PROG= bhyve DEBUG_FLAGS= -g -O0 -SRCS= acpi.c atpic.c consport.c dbgport.c elcr.c fbsdrun.c inout.c +SRCS= acpi.c atpic.c bhyverun.c consport.c dbgport.c elcr.c inout.c SRCS+= ioapic.c mem.c mevent.c mptbl.c SRCS+= pci_emul.c pci_hostbridge.c pci_passthru.c pci_virtio_block.c SRCS+= pci_virtio_net.c pci_uart.c pit_8254.c pmtmr.c post.c rtc.c uart.c diff --git a/usr.sbin/bhyve/acpi.c b/usr.sbin/bhyve/acpi.c index 1671550..32effdc 100644 --- a/usr.sbin/bhyve/acpi.c +++ b/usr.sbin/bhyve/acpi.c @@ -61,7 +61,7 @@ __FBSDID("$FreeBSD$"); #include #include -#include "fbsdrun.h" +#include "bhyverun.h" #include "acpi.h" /* diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c new file mode 100644 index 0000000..fd3d8a2 --- /dev/null +++ b/usr.sbin/bhyve/bhyverun.c @@ -0,0 +1,770 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "bhyverun.h" +#include "acpi.h" +#include "inout.h" +#include "dbgport.h" +#include "mem.h" +#include "mevent.h" +#include "mptbl.h" +#include "pci_emul.h" +#include "xmsr.h" +#include "ioapic.h" +#include "spinup_ap.h" + +#define DEFAULT_GUEST_HZ 100 +#define DEFAULT_GUEST_TSLICE 200 + +#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ + +#define VMEXIT_SWITCH 0 /* force vcpu switch in mux mode */ +#define VMEXIT_CONTINUE 1 /* continue from next instruction */ +#define VMEXIT_RESTART 2 /* restart current instruction */ +#define VMEXIT_ABORT 3 /* abort the vm run loop */ +#define VMEXIT_RESET 4 /* guest machine has reset */ + +#define MB (1024UL * 1024) +#define GB (1024UL * MB) + +typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); + +int guest_tslice = DEFAULT_GUEST_TSLICE; +int guest_hz = DEFAULT_GUEST_HZ; +char *vmname; + +u_long lomem_sz; +u_long himem_sz; + +int guest_ncpus; + +static int pincpu = -1; +static int guest_vcpu_mux; +static int guest_vmexit_on_hlt, guest_vmexit_on_pause, disable_x2apic; + +static int foundcpus; + +static int strictio; + +static int acpi; + +static char *lomem_addr; +static char *himem_addr; + +static char *progname; +static const int BSP = 0; + +static int cpumask; + +static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); + +struct vm_exit vmexit[VM_MAXCPU]; + +struct fbsdstats { + uint64_t vmexit_bogus; + uint64_t vmexit_bogus_switch; + uint64_t vmexit_hlt; + uint64_t vmexit_pause; + uint64_t vmexit_mtrap; + uint64_t vmexit_paging; + uint64_t cpu_switch_rotate; + uint64_t cpu_switch_direct; + int io_reset; +} stats; + +struct mt_vmm_info { + pthread_t mt_thr; + struct vmctx *mt_ctx; + int mt_vcpu; +} mt_vmm_info[VM_MAXCPU]; + +static void +usage(int code) +{ + + fprintf(stderr, + "Usage: %s [-aehABHIP][-g ][-z ][-s ]" + "[-S ][-p pincpu][-n ][-m lowmem][-M highmem] \n" + " -a: local apic is in XAPIC mode (default is X2APIC)\n" + " -A: create an ACPI table\n" + " -g: gdb port (default is %d and 0 means don't open)\n" + " -c: # cpus (default 1)\n" + " -p: pin vcpu 'n' to host cpu 'pincpu + n'\n" + " -B: inject breakpoint exception on vm entry\n" + " -H: vmexit from the guest on hlt\n" + " -I: present an ioapic to the guest\n" + " -P: vmexit from the guest on pause\n" + " -e: exit on unhandled i/o access\n" + " -h: help\n" + " -z: guest hz (default is %d)\n" + " -s: PCI slot config\n" + " -S: legacy PCI slot config\n" + " -m: lowmem in MB\n" + " -M: highmem in MB\n" + " -x: mux vcpus to 1 hcpu\n" + " -t: mux vcpu timeslice hz (default %d)\n", + progname, DEFAULT_GDB_PORT, DEFAULT_GUEST_HZ, + DEFAULT_GUEST_TSLICE); + exit(code); +} + +void * +paddr_guest2host(uintptr_t gaddr) +{ + if (lomem_sz == 0) + return (NULL); + + if (gaddr < lomem_sz) { + return ((void *)(lomem_addr + gaddr)); + } else if (gaddr >= 4*GB && gaddr < (4*GB + himem_sz)) { + return ((void *)(himem_addr + gaddr - 4*GB)); + } else + return (NULL); +} + +int +fbsdrun_disable_x2apic(void) +{ + + return (disable_x2apic); +} + +int +fbsdrun_vmexit_on_pause(void) +{ + + return (guest_vmexit_on_pause); +} + +int +fbsdrun_vmexit_on_hlt(void) +{ + + return (guest_vmexit_on_hlt); +} + +int +fbsdrun_muxed(void) +{ + + return (guest_vcpu_mux); +} + +static void * +fbsdrun_start_thread(void *param) +{ + char tname[MAXCOMLEN + 1]; + struct mt_vmm_info *mtp; + int vcpu; + + mtp = param; + vcpu = mtp->mt_vcpu; + + snprintf(tname, sizeof(tname), "%s vcpu %d", vmname, vcpu); + pthread_set_name_np(mtp->mt_thr, tname); + + vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); + + /* not reached */ + exit(1); + return (NULL); +} + +void +fbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t rip) +{ + int error; + + if (cpumask & (1 << vcpu)) { + fprintf(stderr, "addcpu: attempting to add existing cpu %d\n", + vcpu); + exit(1); + } + + cpumask |= 1 << vcpu; + foundcpus++; + + /* + * Set up the vmexit struct to allow execution to start + * at the given RIP + */ + vmexit[vcpu].rip = rip; + vmexit[vcpu].inst_length = 0; + + if (vcpu == BSP || !guest_vcpu_mux){ + mt_vmm_info[vcpu].mt_ctx = ctx; + mt_vmm_info[vcpu].mt_vcpu = vcpu; + + error = pthread_create(&mt_vmm_info[vcpu].mt_thr, NULL, + fbsdrun_start_thread, &mt_vmm_info[vcpu]); + assert(error == 0); + } +} + +static int +fbsdrun_get_next_cpu(int curcpu) +{ + + /* + * Get the next available CPU. Assumes they arrive + * in ascending order with no gaps. + */ + return ((curcpu + 1) % foundcpus); +} + +static int +vmexit_catch_reset(void) +{ + stats.io_reset++; + return (VMEXIT_RESET); +} + +static int +vmexit_catch_inout(void) +{ + return (VMEXIT_ABORT); +} + +static int +vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, + uint32_t eax) +{ +#if PG_DEBUG /* put all types of debug here */ + if (eax == 0) { + pause_noswitch = 1; + } else if (eax == 1) { + pause_noswitch = 0; + } else { + pause_noswitch = 0; + if (eax == 5) { + vm_set_capability(ctx, *pvcpu, VM_CAP_MTRAP_EXIT, 1); + } + } +#endif + return (VMEXIT_CONTINUE); +} + +static int +vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) +{ + int error; + int bytes, port, in, out; + uint32_t eax; + int vcpu; + + vcpu = *pvcpu; + + port = vme->u.inout.port; + bytes = vme->u.inout.bytes; + eax = vme->u.inout.eax; + in = vme->u.inout.in; + out = !in; + + /* We don't deal with these */ + if (vme->u.inout.string || vme->u.inout.rep) + return (VMEXIT_ABORT); + + /* Special case of guest reset */ + if (out && port == 0x64 && (uint8_t)eax == 0xFE) + return (vmexit_catch_reset()); + + /* Extra-special case of host notifications */ + if (out && port == GUEST_NIO_PORT) + return (vmexit_handle_notify(ctx, vme, pvcpu, eax)); + + error = emulate_inout(ctx, vcpu, in, port, bytes, &eax, strictio); + if (error == 0 && in) + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, eax); + + if (error == 0) + return (VMEXIT_CONTINUE); + else { + fprintf(stderr, "Unhandled %s%c 0x%04x\n", + in ? "in" : "out", + bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port); + return (vmexit_catch_inout()); + } +} + +static int +vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) +{ + fprintf(stderr, "vm exit rdmsr 0x%x, cpu %d\n", vme->u.msr.code, + *pvcpu); + return (VMEXIT_ABORT); +} + +static int +vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) +{ + int newcpu; + int retval = VMEXIT_CONTINUE; + + newcpu = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code,vme->u.msr.wval); + + if (guest_vcpu_mux && *pvcpu != newcpu) { + retval = VMEXIT_SWITCH; + *pvcpu = newcpu; + } + + return (retval); +} + +static int +vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) +{ + int newcpu; + int retval = VMEXIT_CONTINUE; + + newcpu = spinup_ap(ctx, *pvcpu, + vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip); + + if (guest_vcpu_mux && *pvcpu != newcpu) { + retval = VMEXIT_SWITCH; + *pvcpu = newcpu; + } + + return (retval); +} + +static int +vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) +{ + + fprintf(stderr, "vm exit[%d]\n", *pvcpu); + fprintf(stderr, "\treason\t\tVMX\n"); + fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); + fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); + fprintf(stderr, "\terror\t\t%d\n", vmexit->u.vmx.error); + fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason); + fprintf(stderr, "\tqualification\t0x%016lx\n", + vmexit->u.vmx.exit_qualification); + + return (VMEXIT_ABORT); +} + +static int bogus_noswitch = 1; + +static int +vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) +{ + stats.vmexit_bogus++; + + if (!guest_vcpu_mux || guest_ncpus == 1 || bogus_noswitch) { + return (VMEXIT_RESTART); + } else { + stats.vmexit_bogus_switch++; + vmexit->inst_length = 0; + *pvcpu = -1; + return (VMEXIT_SWITCH); + } +} + +static int +vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) +{ + stats.vmexit_hlt++; + if (fbsdrun_muxed()) { + *pvcpu = -1; + return (VMEXIT_SWITCH); + } else { + /* + * Just continue execution with the next instruction. We use + * the HLT VM exit as a way to be friendly with the host + * scheduler. + */ + return (VMEXIT_CONTINUE); + } +} + +static int pause_noswitch; + +static int +vmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) +{ + stats.vmexit_pause++; + + if (fbsdrun_muxed() && !pause_noswitch) { + *pvcpu = -1; + return (VMEXIT_SWITCH); + } else { + return (VMEXIT_CONTINUE); + } +} + +static int +vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) +{ + stats.vmexit_mtrap++; + + return (VMEXIT_RESTART); +} + +static int +vmexit_paging(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) +{ + int err; + stats.vmexit_paging++; + + err = emulate_mem(ctx, *pvcpu, vmexit->u.paging.gpa, + &vmexit->u.paging.vie); + + if (err) { + if (err == EINVAL) { + fprintf(stderr, + "Failed to emulate instruction at 0x%lx\n", + vmexit->rip); + } else if (err == ESRCH) { + fprintf(stderr, "Unhandled memory access to 0x%lx\n", + vmexit->u.paging.gpa); + } + + return (VMEXIT_ABORT); + } + + return (VMEXIT_CONTINUE); +} + +static void +sigalrm(int sig) +{ + return; +} + +static void +setup_timeslice(void) +{ + struct sigaction sa; + struct itimerval itv; + int error; + + /* + * Setup a realtime timer to generate a SIGALRM at a + * frequency of 'guest_tslice' ticks per second. + */ + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + sa.sa_handler = sigalrm; + + error = sigaction(SIGALRM, &sa, NULL); + assert(error == 0); + + itv.it_interval.tv_sec = 0; + itv.it_interval.tv_usec = 1000000 / guest_tslice; + itv.it_value.tv_sec = 0; + itv.it_value.tv_usec = 1000000 / guest_tslice; + + error = setitimer(ITIMER_REAL, &itv, NULL); + assert(error == 0); +} + +static vmexit_handler_t handler[VM_EXITCODE_MAX] = { + [VM_EXITCODE_INOUT] = vmexit_inout, + [VM_EXITCODE_VMX] = vmexit_vmx, + [VM_EXITCODE_BOGUS] = vmexit_bogus, + [VM_EXITCODE_RDMSR] = vmexit_rdmsr, + [VM_EXITCODE_WRMSR] = vmexit_wrmsr, + [VM_EXITCODE_MTRAP] = vmexit_mtrap, + [VM_EXITCODE_PAGING] = vmexit_paging, + [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, +}; + +static void +vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip) +{ + int error, rc, prevcpu; + + if (guest_vcpu_mux) + setup_timeslice(); + + if (pincpu >= 0) { + error = vm_set_pinning(ctx, vcpu, pincpu + vcpu); + assert(error == 0); + } + + while (1) { + error = vm_run(ctx, vcpu, rip, &vmexit[vcpu]); + if (error != 0) { + /* + * It is possible that 'vmmctl' or some other process + * has transitioned the vcpu to CANNOT_RUN state right + * before we tried to transition it to RUNNING. + * + * This is expected to be temporary so just retry. + */ + if (errno == EBUSY) + continue; + else + break; + } + + prevcpu = vcpu; + rc = (*handler[vmexit[vcpu].exitcode])(ctx, &vmexit[vcpu], + &vcpu); + switch (rc) { + case VMEXIT_SWITCH: + assert(guest_vcpu_mux); + if (vcpu == -1) { + stats.cpu_switch_rotate++; + vcpu = fbsdrun_get_next_cpu(prevcpu); + } else { + stats.cpu_switch_direct++; + } + /* fall through */ + case VMEXIT_CONTINUE: + rip = vmexit[vcpu].rip + vmexit[vcpu].inst_length; + break; + case VMEXIT_RESTART: + rip = vmexit[vcpu].rip; + break; + case VMEXIT_RESET: + exit(0); + default: + exit(1); + } + } + fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); +} + + +int +main(int argc, char *argv[]) +{ + int c, error, gdb_port, inject_bkpt, tmp, err, ioapic, bvmcons; + struct vmctx *ctx; + uint64_t rip; + + bvmcons = 0; + inject_bkpt = 0; + progname = basename(argv[0]); + gdb_port = DEFAULT_GDB_PORT; + guest_ncpus = 1; + ioapic = 0; + + while ((c = getopt(argc, argv, "abehABHIPxp:g:c:z:s:S:n:m:M:")) != -1) { + switch (c) { + case 'a': + disable_x2apic = 1; + break; + case 'A': + acpi = 1; + break; + case 'b': + bvmcons = 1; + break; + case 'B': + inject_bkpt = 1; + break; + case 'x': + guest_vcpu_mux = 1; + break; + case 'p': + pincpu = atoi(optarg); + break; + case 'c': + guest_ncpus = atoi(optarg); + break; + case 'g': + gdb_port = atoi(optarg); + break; + case 'z': + guest_hz = atoi(optarg); + break; + case 't': + guest_tslice = atoi(optarg); + break; + case 's': + pci_parse_slot(optarg, 0); + break; + case 'S': + pci_parse_slot(optarg, 1); + break; + case 'm': + lomem_sz = strtoul(optarg, NULL, 0) * MB; + break; + case 'M': + himem_sz = strtoul(optarg, NULL, 0) * MB; + break; + case 'H': + guest_vmexit_on_hlt = 1; + break; + case 'I': + ioapic = 1; + break; + case 'P': + guest_vmexit_on_pause = 1; + break; + case 'e': + strictio = 1; + break; + case 'h': + usage(0); + default: + usage(1); + } + } + argc -= optind; + argv += optind; + + if (argc != 1) + usage(1); + + /* No need to mux if guest is uni-processor */ + if (guest_ncpus <= 1) + guest_vcpu_mux = 0; + + if (guest_ncpus > VM_MAXCPU) { + fprintf(stderr, "%d vCPUs requested, max %d\n", + guest_ncpus, VM_MAXCPU); + exit(1); + } + + /* vmexit on hlt if guest is muxed */ + if (guest_vcpu_mux) { + guest_vmexit_on_hlt = 1; + guest_vmexit_on_pause = 1; + } + + vmname = argv[0]; + + ctx = vm_open(vmname); + if (ctx == NULL) { + perror("vm_open"); + exit(1); + } + + if (fbsdrun_vmexit_on_hlt()) { + err = vm_get_capability(ctx, BSP, VM_CAP_HALT_EXIT, &tmp); + if (err < 0) { + fprintf(stderr, "VM exit on HLT not supported\n"); + exit(1); + } + vm_set_capability(ctx, BSP, VM_CAP_HALT_EXIT, 1); + handler[VM_EXITCODE_HLT] = vmexit_hlt; + } + + if (fbsdrun_vmexit_on_pause()) { + /* + * pause exit support required for this mode + */ + err = vm_get_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, &tmp); + if (err < 0) { + fprintf(stderr, + "SMP mux requested, no pause support\n"); + exit(1); + } + vm_set_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, 1); + handler[VM_EXITCODE_PAUSE] = vmexit_pause; + } + + if (fbsdrun_disable_x2apic()) + err = vm_set_x2apic_state(ctx, BSP, X2APIC_DISABLED); + else + err = vm_set_x2apic_state(ctx, BSP, X2APIC_ENABLED); + + if (err) { + fprintf(stderr, "Unable to set x2apic state (%d)\n", err); + exit(1); + } + + if (lomem_sz != 0) { + lomem_addr = vm_map_memory(ctx, 0, lomem_sz); + if (lomem_addr == (char *) MAP_FAILED) { + lomem_sz = 0; + } else if (himem_sz != 0) { + himem_addr = vm_map_memory(ctx, 4*GB, himem_sz); + if (himem_addr == (char *) MAP_FAILED) { + lomem_sz = 0; + himem_sz = 0; + } + } + } + + init_inout(); + init_pci(ctx); + if (ioapic) + ioapic_init(0); + + if (gdb_port != 0) + init_dbgport(gdb_port); + + if (bvmcons) + init_bvmcons(); + + error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); + assert(error == 0); + + if (inject_bkpt) { + error = vm_inject_event(ctx, BSP, VM_HW_EXCEPTION, IDT_BP); + assert(error == 0); + } + + /* + * build the guest tables, MP etc. + */ + mptable_build(ctx, guest_ncpus, ioapic); + + if (acpi) { + error = acpi_build(ctx, guest_ncpus, ioapic); + assert(error == 0); + } + + /* + * Add CPU 0 + */ + fbsdrun_addcpu(ctx, BSP, rip); + + /* + * Head off to the main event dispatch loop + */ + mevent_dispatch(); + + exit(1); +} diff --git a/usr.sbin/bhyve/bhyverun.h b/usr.sbin/bhyve/bhyverun.h new file mode 100644 index 0000000..45033b8 --- /dev/null +++ b/usr.sbin/bhyve/bhyverun.h @@ -0,0 +1,53 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _FBSDRUN_H_ +#define _FBSDRUN_H_ + +#ifndef CTASSERT /* Allow lint to override */ +#define CTASSERT(x) _CTASSERT(x, __LINE__) +#define _CTASSERT(x, y) __CTASSERT(x, y) +#define __CTASSERT(x, y) typedef char __assert ## y[(x) ? 1 : -1] +#endif + +struct vmctx; +extern int guest_hz; +extern int guest_tslice; +extern int guest_ncpus; +extern char *vmname; + +extern u_long lomem_sz, himem_sz; + +void *paddr_guest2host(uintptr_t); + +void fbsdrun_addcpu(struct vmctx *ctx, int cpu, uint64_t rip); +int fbsdrun_muxed(void); +int fbsdrun_vmexit_on_hlt(void); +int fbsdrun_vmexit_on_pause(void); +int fbsdrun_disable_x2apic(void); +#endif diff --git a/usr.sbin/bhyve/fbsdrun.c b/usr.sbin/bhyve/fbsdrun.c deleted file mode 100644 index 06e043c..0000000 --- a/usr.sbin/bhyve/fbsdrun.c +++ /dev/null @@ -1,770 +0,0 @@ -/*- - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include -__FBSDID("$FreeBSD$"); - -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "fbsdrun.h" -#include "acpi.h" -#include "inout.h" -#include "dbgport.h" -#include "mem.h" -#include "mevent.h" -#include "mptbl.h" -#include "pci_emul.h" -#include "xmsr.h" -#include "ioapic.h" -#include "spinup_ap.h" - -#define DEFAULT_GUEST_HZ 100 -#define DEFAULT_GUEST_TSLICE 200 - -#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ - -#define VMEXIT_SWITCH 0 /* force vcpu switch in mux mode */ -#define VMEXIT_CONTINUE 1 /* continue from next instruction */ -#define VMEXIT_RESTART 2 /* restart current instruction */ -#define VMEXIT_ABORT 3 /* abort the vm run loop */ -#define VMEXIT_RESET 4 /* guest machine has reset */ - -#define MB (1024UL * 1024) -#define GB (1024UL * MB) - -typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); - -int guest_tslice = DEFAULT_GUEST_TSLICE; -int guest_hz = DEFAULT_GUEST_HZ; -char *vmname; - -u_long lomem_sz; -u_long himem_sz; - -int guest_ncpus; - -static int pincpu = -1; -static int guest_vcpu_mux; -static int guest_vmexit_on_hlt, guest_vmexit_on_pause, disable_x2apic; - -static int foundcpus; - -static int strictio; - -static int acpi; - -static char *lomem_addr; -static char *himem_addr; - -static char *progname; -static const int BSP = 0; - -static int cpumask; - -static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); - -struct vm_exit vmexit[VM_MAXCPU]; - -struct fbsdstats { - uint64_t vmexit_bogus; - uint64_t vmexit_bogus_switch; - uint64_t vmexit_hlt; - uint64_t vmexit_pause; - uint64_t vmexit_mtrap; - uint64_t vmexit_paging; - uint64_t cpu_switch_rotate; - uint64_t cpu_switch_direct; - int io_reset; -} stats; - -struct mt_vmm_info { - pthread_t mt_thr; - struct vmctx *mt_ctx; - int mt_vcpu; -} mt_vmm_info[VM_MAXCPU]; - -static void -usage(int code) -{ - - fprintf(stderr, - "Usage: %s [-aehABHIP][-g ][-z ][-s ]" - "[-S ][-p pincpu][-n ][-m lowmem][-M highmem] \n" - " -a: local apic is in XAPIC mode (default is X2APIC)\n" - " -A: create an ACPI table\n" - " -g: gdb port (default is %d and 0 means don't open)\n" - " -c: # cpus (default 1)\n" - " -p: pin vcpu 'n' to host cpu 'pincpu + n'\n" - " -B: inject breakpoint exception on vm entry\n" - " -H: vmexit from the guest on hlt\n" - " -I: present an ioapic to the guest\n" - " -P: vmexit from the guest on pause\n" - " -e: exit on unhandled i/o access\n" - " -h: help\n" - " -z: guest hz (default is %d)\n" - " -s: PCI slot config\n" - " -S: legacy PCI slot config\n" - " -m: lowmem in MB\n" - " -M: highmem in MB\n" - " -x: mux vcpus to 1 hcpu\n" - " -t: mux vcpu timeslice hz (default %d)\n", - progname, DEFAULT_GDB_PORT, DEFAULT_GUEST_HZ, - DEFAULT_GUEST_TSLICE); - exit(code); -} - -void * -paddr_guest2host(uintptr_t gaddr) -{ - if (lomem_sz == 0) - return (NULL); - - if (gaddr < lomem_sz) { - return ((void *)(lomem_addr + gaddr)); - } else if (gaddr >= 4*GB && gaddr < (4*GB + himem_sz)) { - return ((void *)(himem_addr + gaddr - 4*GB)); - } else - return (NULL); -} - -int -fbsdrun_disable_x2apic(void) -{ - - return (disable_x2apic); -} - -int -fbsdrun_vmexit_on_pause(void) -{ - - return (guest_vmexit_on_pause); -} - -int -fbsdrun_vmexit_on_hlt(void) -{ - - return (guest_vmexit_on_hlt); -} - -int -fbsdrun_muxed(void) -{ - - return (guest_vcpu_mux); -} - -static void * -fbsdrun_start_thread(void *param) -{ - char tname[MAXCOMLEN + 1]; - struct mt_vmm_info *mtp; - int vcpu; - - mtp = param; - vcpu = mtp->mt_vcpu; - - snprintf(tname, sizeof(tname), "%s vcpu %d", vmname, vcpu); - pthread_set_name_np(mtp->mt_thr, tname); - - vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); - - /* not reached */ - exit(1); - return (NULL); -} - -void -fbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t rip) -{ - int error; - - if (cpumask & (1 << vcpu)) { - fprintf(stderr, "addcpu: attempting to add existing cpu %d\n", - vcpu); - exit(1); - } - - cpumask |= 1 << vcpu; - foundcpus++; - - /* - * Set up the vmexit struct to allow execution to start - * at the given RIP - */ - vmexit[vcpu].rip = rip; - vmexit[vcpu].inst_length = 0; - - if (vcpu == BSP || !guest_vcpu_mux){ - mt_vmm_info[vcpu].mt_ctx = ctx; - mt_vmm_info[vcpu].mt_vcpu = vcpu; - - error = pthread_create(&mt_vmm_info[vcpu].mt_thr, NULL, - fbsdrun_start_thread, &mt_vmm_info[vcpu]); - assert(error == 0); - } -} - -static int -fbsdrun_get_next_cpu(int curcpu) -{ - - /* - * Get the next available CPU. Assumes they arrive - * in ascending order with no gaps. - */ - return ((curcpu + 1) % foundcpus); -} - -static int -vmexit_catch_reset(void) -{ - stats.io_reset++; - return (VMEXIT_RESET); -} - -static int -vmexit_catch_inout(void) -{ - return (VMEXIT_ABORT); -} - -static int -vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, - uint32_t eax) -{ -#if PG_DEBUG /* put all types of debug here */ - if (eax == 0) { - pause_noswitch = 1; - } else if (eax == 1) { - pause_noswitch = 0; - } else { - pause_noswitch = 0; - if (eax == 5) { - vm_set_capability(ctx, *pvcpu, VM_CAP_MTRAP_EXIT, 1); - } - } -#endif - return (VMEXIT_CONTINUE); -} - -static int -vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) -{ - int error; - int bytes, port, in, out; - uint32_t eax; - int vcpu; - - vcpu = *pvcpu; - - port = vme->u.inout.port; - bytes = vme->u.inout.bytes; - eax = vme->u.inout.eax; - in = vme->u.inout.in; - out = !in; - - /* We don't deal with these */ - if (vme->u.inout.string || vme->u.inout.rep) - return (VMEXIT_ABORT); - - /* Special case of guest reset */ - if (out && port == 0x64 && (uint8_t)eax == 0xFE) - return (vmexit_catch_reset()); - - /* Extra-special case of host notifications */ - if (out && port == GUEST_NIO_PORT) - return (vmexit_handle_notify(ctx, vme, pvcpu, eax)); - - error = emulate_inout(ctx, vcpu, in, port, bytes, &eax, strictio); - if (error == 0 && in) - error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, eax); - - if (error == 0) - return (VMEXIT_CONTINUE); - else { - fprintf(stderr, "Unhandled %s%c 0x%04x\n", - in ? "in" : "out", - bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port); - return (vmexit_catch_inout()); - } -} - -static int -vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) -{ - fprintf(stderr, "vm exit rdmsr 0x%x, cpu %d\n", vme->u.msr.code, - *pvcpu); - return (VMEXIT_ABORT); -} - -static int -vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) -{ - int newcpu; - int retval = VMEXIT_CONTINUE; - - newcpu = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code,vme->u.msr.wval); - - if (guest_vcpu_mux && *pvcpu != newcpu) { - retval = VMEXIT_SWITCH; - *pvcpu = newcpu; - } - - return (retval); -} - -static int -vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) -{ - int newcpu; - int retval = VMEXIT_CONTINUE; - - newcpu = spinup_ap(ctx, *pvcpu, - vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip); - - if (guest_vcpu_mux && *pvcpu != newcpu) { - retval = VMEXIT_SWITCH; - *pvcpu = newcpu; - } - - return (retval); -} - -static int -vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) -{ - - fprintf(stderr, "vm exit[%d]\n", *pvcpu); - fprintf(stderr, "\treason\t\tVMX\n"); - fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); - fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); - fprintf(stderr, "\terror\t\t%d\n", vmexit->u.vmx.error); - fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason); - fprintf(stderr, "\tqualification\t0x%016lx\n", - vmexit->u.vmx.exit_qualification); - - return (VMEXIT_ABORT); -} - -static int bogus_noswitch = 1; - -static int -vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) -{ - stats.vmexit_bogus++; - - if (!guest_vcpu_mux || guest_ncpus == 1 || bogus_noswitch) { - return (VMEXIT_RESTART); - } else { - stats.vmexit_bogus_switch++; - vmexit->inst_length = 0; - *pvcpu = -1; - return (VMEXIT_SWITCH); - } -} - -static int -vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) -{ - stats.vmexit_hlt++; - if (fbsdrun_muxed()) { - *pvcpu = -1; - return (VMEXIT_SWITCH); - } else { - /* - * Just continue execution with the next instruction. We use - * the HLT VM exit as a way to be friendly with the host - * scheduler. - */ - return (VMEXIT_CONTINUE); - } -} - -static int pause_noswitch; - -static int -vmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) -{ - stats.vmexit_pause++; - - if (fbsdrun_muxed() && !pause_noswitch) { - *pvcpu = -1; - return (VMEXIT_SWITCH); - } else { - return (VMEXIT_CONTINUE); - } -} - -static int -vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) -{ - stats.vmexit_mtrap++; - - return (VMEXIT_RESTART); -} - -static int -vmexit_paging(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) -{ - int err; - stats.vmexit_paging++; - - err = emulate_mem(ctx, *pvcpu, vmexit->u.paging.gpa, - &vmexit->u.paging.vie); - - if (err) { - if (err == EINVAL) { - fprintf(stderr, - "Failed to emulate instruction at 0x%lx\n", - vmexit->rip); - } else if (err == ESRCH) { - fprintf(stderr, "Unhandled memory access to 0x%lx\n", - vmexit->u.paging.gpa); - } - - return (VMEXIT_ABORT); - } - - return (VMEXIT_CONTINUE); -} - -static void -sigalrm(int sig) -{ - return; -} - -static void -setup_timeslice(void) -{ - struct sigaction sa; - struct itimerval itv; - int error; - - /* - * Setup a realtime timer to generate a SIGALRM at a - * frequency of 'guest_tslice' ticks per second. - */ - sigemptyset(&sa.sa_mask); - sa.sa_flags = 0; - sa.sa_handler = sigalrm; - - error = sigaction(SIGALRM, &sa, NULL); - assert(error == 0); - - itv.it_interval.tv_sec = 0; - itv.it_interval.tv_usec = 1000000 / guest_tslice; - itv.it_value.tv_sec = 0; - itv.it_value.tv_usec = 1000000 / guest_tslice; - - error = setitimer(ITIMER_REAL, &itv, NULL); - assert(error == 0); -} - -static vmexit_handler_t handler[VM_EXITCODE_MAX] = { - [VM_EXITCODE_INOUT] = vmexit_inout, - [VM_EXITCODE_VMX] = vmexit_vmx, - [VM_EXITCODE_BOGUS] = vmexit_bogus, - [VM_EXITCODE_RDMSR] = vmexit_rdmsr, - [VM_EXITCODE_WRMSR] = vmexit_wrmsr, - [VM_EXITCODE_MTRAP] = vmexit_mtrap, - [VM_EXITCODE_PAGING] = vmexit_paging, - [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, -}; - -static void -vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip) -{ - int error, rc, prevcpu; - - if (guest_vcpu_mux) - setup_timeslice(); - - if (pincpu >= 0) { - error = vm_set_pinning(ctx, vcpu, pincpu + vcpu); - assert(error == 0); - } - - while (1) { - error = vm_run(ctx, vcpu, rip, &vmexit[vcpu]); - if (error != 0) { - /* - * It is possible that 'vmmctl' or some other process - * has transitioned the vcpu to CANNOT_RUN state right - * before we tried to transition it to RUNNING. - * - * This is expected to be temporary so just retry. - */ - if (errno == EBUSY) - continue; - else - break; - } - - prevcpu = vcpu; - rc = (*handler[vmexit[vcpu].exitcode])(ctx, &vmexit[vcpu], - &vcpu); - switch (rc) { - case VMEXIT_SWITCH: - assert(guest_vcpu_mux); - if (vcpu == -1) { - stats.cpu_switch_rotate++; - vcpu = fbsdrun_get_next_cpu(prevcpu); - } else { - stats.cpu_switch_direct++; - } - /* fall through */ - case VMEXIT_CONTINUE: - rip = vmexit[vcpu].rip + vmexit[vcpu].inst_length; - break; - case VMEXIT_RESTART: - rip = vmexit[vcpu].rip; - break; - case VMEXIT_RESET: - exit(0); - default: - exit(1); - } - } - fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); -} - - -int -main(int argc, char *argv[]) -{ - int c, error, gdb_port, inject_bkpt, tmp, err, ioapic, bvmcons; - struct vmctx *ctx; - uint64_t rip; - - bvmcons = 0; - inject_bkpt = 0; - progname = basename(argv[0]); - gdb_port = DEFAULT_GDB_PORT; - guest_ncpus = 1; - ioapic = 0; - - while ((c = getopt(argc, argv, "abehABHIPxp:g:c:z:s:S:n:m:M:")) != -1) { - switch (c) { - case 'a': - disable_x2apic = 1; - break; - case 'A': - acpi = 1; - break; - case 'b': - bvmcons = 1; - break; - case 'B': - inject_bkpt = 1; - break; - case 'x': - guest_vcpu_mux = 1; - break; - case 'p': - pincpu = atoi(optarg); - break; - case 'c': - guest_ncpus = atoi(optarg); - break; - case 'g': - gdb_port = atoi(optarg); - break; - case 'z': - guest_hz = atoi(optarg); - break; - case 't': - guest_tslice = atoi(optarg); - break; - case 's': - pci_parse_slot(optarg, 0); - break; - case 'S': - pci_parse_slot(optarg, 1); - break; - case 'm': - lomem_sz = strtoul(optarg, NULL, 0) * MB; - break; - case 'M': - himem_sz = strtoul(optarg, NULL, 0) * MB; - break; - case 'H': - guest_vmexit_on_hlt = 1; - break; - case 'I': - ioapic = 1; - break; - case 'P': - guest_vmexit_on_pause = 1; - break; - case 'e': - strictio = 1; - break; - case 'h': - usage(0); - default: - usage(1); - } - } - argc -= optind; - argv += optind; - - if (argc != 1) - usage(1); - - /* No need to mux if guest is uni-processor */ - if (guest_ncpus <= 1) - guest_vcpu_mux = 0; - - if (guest_ncpus > VM_MAXCPU) { - fprintf(stderr, "%d vCPUs requested, max %d\n", - guest_ncpus, VM_MAXCPU); - exit(1); - } - - /* vmexit on hlt if guest is muxed */ - if (guest_vcpu_mux) { - guest_vmexit_on_hlt = 1; - guest_vmexit_on_pause = 1; - } - - vmname = argv[0]; - - ctx = vm_open(vmname); - if (ctx == NULL) { - perror("vm_open"); - exit(1); - } - - if (fbsdrun_vmexit_on_hlt()) { - err = vm_get_capability(ctx, BSP, VM_CAP_HALT_EXIT, &tmp); - if (err < 0) { - fprintf(stderr, "VM exit on HLT not supported\n"); - exit(1); - } - vm_set_capability(ctx, BSP, VM_CAP_HALT_EXIT, 1); - handler[VM_EXITCODE_HLT] = vmexit_hlt; - } - - if (fbsdrun_vmexit_on_pause()) { - /* - * pause exit support required for this mode - */ - err = vm_get_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, &tmp); - if (err < 0) { - fprintf(stderr, - "SMP mux requested, no pause support\n"); - exit(1); - } - vm_set_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, 1); - handler[VM_EXITCODE_PAUSE] = vmexit_pause; - } - - if (fbsdrun_disable_x2apic()) - err = vm_set_x2apic_state(ctx, BSP, X2APIC_DISABLED); - else - err = vm_set_x2apic_state(ctx, BSP, X2APIC_ENABLED); - - if (err) { - fprintf(stderr, "Unable to set x2apic state (%d)\n", err); - exit(1); - } - - if (lomem_sz != 0) { - lomem_addr = vm_map_memory(ctx, 0, lomem_sz); - if (lomem_addr == (char *) MAP_FAILED) { - lomem_sz = 0; - } else if (himem_sz != 0) { - himem_addr = vm_map_memory(ctx, 4*GB, himem_sz); - if (himem_addr == (char *) MAP_FAILED) { - lomem_sz = 0; - himem_sz = 0; - } - } - } - - init_inout(); - init_pci(ctx); - if (ioapic) - ioapic_init(0); - - if (gdb_port != 0) - init_dbgport(gdb_port); - - if (bvmcons) - init_bvmcons(); - - error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); - assert(error == 0); - - if (inject_bkpt) { - error = vm_inject_event(ctx, BSP, VM_HW_EXCEPTION, IDT_BP); - assert(error == 0); - } - - /* - * build the guest tables, MP etc. - */ - mptable_build(ctx, guest_ncpus, ioapic); - - if (acpi) { - error = acpi_build(ctx, guest_ncpus, ioapic); - assert(error == 0); - } - - /* - * Add CPU 0 - */ - fbsdrun_addcpu(ctx, BSP, rip); - - /* - * Head off to the main event dispatch loop - */ - mevent_dispatch(); - - exit(1); -} diff --git a/usr.sbin/bhyve/fbsdrun.h b/usr.sbin/bhyve/fbsdrun.h deleted file mode 100644 index 45033b8..0000000 --- a/usr.sbin/bhyve/fbsdrun.h +++ /dev/null @@ -1,53 +0,0 @@ -/*- - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _FBSDRUN_H_ -#define _FBSDRUN_H_ - -#ifndef CTASSERT /* Allow lint to override */ -#define CTASSERT(x) _CTASSERT(x, __LINE__) -#define _CTASSERT(x, y) __CTASSERT(x, y) -#define __CTASSERT(x, y) typedef char __assert ## y[(x) ? 1 : -1] -#endif - -struct vmctx; -extern int guest_hz; -extern int guest_tslice; -extern int guest_ncpus; -extern char *vmname; - -extern u_long lomem_sz, himem_sz; - -void *paddr_guest2host(uintptr_t); - -void fbsdrun_addcpu(struct vmctx *ctx, int cpu, uint64_t rip); -int fbsdrun_muxed(void); -int fbsdrun_vmexit_on_hlt(void); -int fbsdrun_vmexit_on_pause(void); -int fbsdrun_disable_x2apic(void); -#endif diff --git a/usr.sbin/bhyve/ioapic.c b/usr.sbin/bhyve/ioapic.c index 47dd833..c712692 100644 --- a/usr.sbin/bhyve/ioapic.c +++ b/usr.sbin/bhyve/ioapic.c @@ -42,7 +42,7 @@ __FBSDID("$FreeBSD$"); #include "inout.h" #include "mem.h" -#include "fbsdrun.h" +#include "bhyverun.h" #include diff --git a/usr.sbin/bhyve/mptbl.c b/usr.sbin/bhyve/mptbl.c index 03aaaee..52790f3 100644 --- a/usr.sbin/bhyve/mptbl.c +++ b/usr.sbin/bhyve/mptbl.c @@ -36,7 +36,7 @@ __FBSDID("$FreeBSD$"); #include #include -#include "fbsdrun.h" +#include "bhyverun.h" #include "mptbl.h" #define MPTABLE_BASE 0xF0000 diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c index 1836bc9..e086aeb 100644 --- a/usr.sbin/bhyve/pci_emul.c +++ b/usr.sbin/bhyve/pci_emul.c @@ -42,7 +42,7 @@ __FBSDID("$FreeBSD$"); #include #include -#include "fbsdrun.h" +#include "bhyverun.h" #include "inout.h" #include "mem.h" #include "mptbl.h" diff --git a/usr.sbin/bhyve/pci_uart.c b/usr.sbin/bhyve/pci_uart.c index 51876f5..3f192e1 100644 --- a/usr.sbin/bhyve/pci_uart.c +++ b/usr.sbin/bhyve/pci_uart.c @@ -41,7 +41,7 @@ __FBSDID("$FreeBSD$"); #include #include -#include "fbsdrun.h" +#include "bhyverun.h" #include "pci_emul.h" #include "mevent.h" diff --git a/usr.sbin/bhyve/pci_virtio_block.c b/usr.sbin/bhyve/pci_virtio_block.c index 8c98615..3382097 100644 --- a/usr.sbin/bhyve/pci_virtio_block.c +++ b/usr.sbin/bhyve/pci_virtio_block.c @@ -47,7 +47,7 @@ __FBSDID("$FreeBSD$"); #include #include -#include "fbsdrun.h" +#include "bhyverun.h" #include "pci_emul.h" #include "virtio.h" diff --git a/usr.sbin/bhyve/pci_virtio_net.c b/usr.sbin/bhyve/pci_virtio_net.c index 9e7a1c7..3f6f88a 100644 --- a/usr.sbin/bhyve/pci_virtio_net.c +++ b/usr.sbin/bhyve/pci_virtio_net.c @@ -47,7 +47,7 @@ __FBSDID("$FreeBSD$"); #include #include -#include "fbsdrun.h" +#include "bhyverun.h" #include "pci_emul.h" #include "mevent.h" #include "virtio.h" diff --git a/usr.sbin/bhyve/pit_8254.c b/usr.sbin/bhyve/pit_8254.c index d9b6d55..c96596a 100644 --- a/usr.sbin/bhyve/pit_8254.c +++ b/usr.sbin/bhyve/pit_8254.c @@ -36,7 +36,7 @@ __FBSDID("$FreeBSD$"); #include #include -#include "fbsdrun.h" +#include "bhyverun.h" #include "inout.h" #include "pit_8254.h" diff --git a/usr.sbin/bhyve/spinup_ap.c b/usr.sbin/bhyve/spinup_ap.c index 3aa3b65..b5e553f 100644 --- a/usr.sbin/bhyve/spinup_ap.c +++ b/usr.sbin/bhyve/spinup_ap.c @@ -39,7 +39,7 @@ __FBSDID("$FreeBSD$"); #include #include -#include "fbsdrun.h" +#include "bhyverun.h" #include "spinup_ap.h" /* -- cgit v1.1 From 5bbdfeda581630ee4ec73f726ddec4465782fe22 Mon Sep 17 00:00:00 2001 From: grehan Date: Thu, 20 Dec 2012 23:01:53 +0000 Subject: Change thread name for the main kqueue event loop to " mevent" so it can be easily distinguished from other non-vCPU threads in forthcoming changes. Obtained from: NetApp --- usr.sbin/bhyve/mevent.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/mevent.c b/usr.sbin/bhyve/mevent.c index 0d3b287..a6109db 100644 --- a/usr.sbin/bhyve/mevent.c +++ b/usr.sbin/bhyve/mevent.c @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include "mevent.h" @@ -55,6 +56,8 @@ __FBSDID("$FreeBSD$"); #define MEV_DISABLE 2 #define MEV_DEL_PENDING 3 +extern char *vmname; + static pthread_t mevent_tid; static int mevent_pipefd[2]; static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER; @@ -356,6 +359,15 @@ mevent_delete_close(struct mevent *evp) return (mevent_delete_event(evp, 1)); } +static void +mevent_set_name(void) +{ + char tname[MAXCOMLEN + 1]; + + snprintf(tname, sizeof(tname), "%s mevent", vmname); + pthread_set_name_np(mevent_tid, tname); +} + void mevent_dispatch(void) { @@ -367,6 +379,7 @@ mevent_dispatch(void) int ret; mevent_tid = pthread_self(); + mevent_set_name(); mfd = kqueue(); assert(mfd > 0); -- cgit v1.1 From 01173b0b4a9b00c153489a51f2cba1b3d0cfc119 Mon Sep 17 00:00:00 2001 From: neel Date: Fri, 4 Jan 2013 02:04:41 +0000 Subject: The "unrestricted guest" capability is a feature of Intel VT-x that allows the guest to execute real or unpaged protected mode code - bhyve relies on this feature to execute the AP bootstrap code. Get rid of the hack that allowed bhyve to support SMP guests on processors that do not have the "unrestricted guest" capability. This hack was entirely FreeBSD-specific and would not work with any other guest OS. Instead, limit the number of vcpus to 1 when executing on processors without "unrestricted guest" capability. Suggested by: grehan Obtained from: NetApp --- usr.sbin/bhyve/bhyverun.c | 30 +++++++++++++---- usr.sbin/bhyve/spinup_ap.c | 81 ++++------------------------------------------ 2 files changed, 31 insertions(+), 80 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c index fd3d8a2..999040f 100644 --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -573,11 +573,28 @@ vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip) fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); } +static int +num_vcpus_allowed(struct vmctx *ctx) +{ + int tmp, error; + + error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp); + + /* + * The guest is allowed to spinup more than one processor only if the + * UNRESTRICTED_GUEST capability is available. + */ + if (error == 0) + return (VM_MAXCPU); + else + return (1); +} int main(int argc, char *argv[]) { int c, error, gdb_port, inject_bkpt, tmp, err, ioapic, bvmcons; + int max_vcpus; struct vmctx *ctx; uint64_t rip; @@ -660,12 +677,6 @@ main(int argc, char *argv[]) if (guest_ncpus <= 1) guest_vcpu_mux = 0; - if (guest_ncpus > VM_MAXCPU) { - fprintf(stderr, "%d vCPUs requested, max %d\n", - guest_ncpus, VM_MAXCPU); - exit(1); - } - /* vmexit on hlt if guest is muxed */ if (guest_vcpu_mux) { guest_vmexit_on_hlt = 1; @@ -680,6 +691,13 @@ main(int argc, char *argv[]) exit(1); } + max_vcpus = num_vcpus_allowed(ctx); + if (guest_ncpus > max_vcpus) { + fprintf(stderr, "%d vCPUs requested but only %d available\n", + guest_ncpus, max_vcpus); + exit(1); + } + if (fbsdrun_vmexit_on_hlt()) { err = vm_get_capability(ctx, BSP, VM_CAP_HALT_EXIT, &tmp); if (err < 0) { diff --git a/usr.sbin/bhyve/spinup_ap.c b/usr.sbin/bhyve/spinup_ap.c index b5e553f..2632aed 100644 --- a/usr.sbin/bhyve/spinup_ap.c +++ b/usr.sbin/bhyve/spinup_ap.c @@ -42,26 +42,6 @@ __FBSDID("$FreeBSD$"); #include "bhyverun.h" #include "spinup_ap.h" -/* - * Trampoline for hypervisor direct 64-bit jump. - * - * 0 - signature for guest->host verification - * 8 - kernel virtual address of trampoline - * 16 - instruction virtual address - * 24 - stack pointer virtual address - * 32 - CR3, physical address of kernel page table - * 40 - 24-byte area for null/code/data GDT entries - */ -#define MP_V64T_SIG 0xcafebabecafebabeULL -struct mp_v64tramp { - uint64_t mt_sig; - uint64_t mt_virt; - uint64_t mt_eip; - uint64_t mt_rsp; - uint64_t mt_cr3; - uint64_t mt_gdtr[3]; -}; - static void spinup_ap_realmode(struct vmctx *ctx, int newcpu, uint64_t *rip) { @@ -94,46 +74,6 @@ spinup_ap_realmode(struct vmctx *ctx, int newcpu, uint64_t *rip) assert(error == 0); } -static void -spinup_ap_direct64(struct vmctx *ctx, int newcpu, uint64_t *rip) -{ - struct mp_v64tramp *mvt; - char *errstr; - int error; - uint64_t gdtbase; - - mvt = paddr_guest2host(*rip); - - assert(mvt->mt_sig == MP_V64T_SIG); - - /* - * Set up the 3-entry GDT using memory supplied in the - * guest's trampoline structure. - */ - vm_setup_freebsd_gdt(mvt->mt_gdtr); - -#define CHECK_ERROR(msg) \ - if (error != 0) { \ - errstr = msg; \ - goto err_exit; \ - } - - /* entry point */ - *rip = mvt->mt_eip; - - /* Get the guest virtual address of the GDT */ - gdtbase = mvt->mt_virt + __offsetof(struct mp_v64tramp, mt_gdtr); - - error = vm_setup_freebsd_registers(ctx, newcpu, mvt->mt_eip, - mvt->mt_cr3, gdtbase, mvt->mt_rsp); - CHECK_ERROR("vm_setup_freebsd_registers"); - - return; -err_exit: - printf("spinup_ap_direct64: machine state error: %s", errstr); - exit(1); -} - int spinup_ap(struct vmctx *ctx, int vcpu, int newcpu, uint64_t rip) { @@ -163,22 +103,15 @@ spinup_ap(struct vmctx *ctx, int vcpu, int newcpu, uint64_t rip) assert(error == 0); /* - * There are 2 startup modes possible here: - * - if the CPU supports 'unrestricted guest' mode, the spinup can - * set up the processor state in power-on 16-bit mode, with the CS:IP - * init'd to the specified low-mem 4K page. - * - if the guest has requested a 64-bit trampoline in the low-mem 4K - * page by placing in the specified signature, set up the register - * state using register state in the signature. Note that this - * requires accessing guest physical memory to read the signature - * while 'unrestricted mode' does not. + * Enable the 'unrestricted guest' mode for 'newcpu'. + * + * Set up the processor state in power-on 16-bit mode, with the CS:IP + * init'd to the specified low-mem 4K page. */ error = vm_set_capability(ctx, newcpu, VM_CAP_UNRESTRICTED_GUEST, 1); - if (error) { - spinup_ap_direct64(ctx, newcpu, &rip); - } else { - spinup_ap_realmode(ctx, newcpu, &rip); - } + assert(error == 0); + + spinup_ap_realmode(ctx, newcpu, &rip); fbsdrun_addcpu(ctx, newcpu, rip); -- cgit v1.1 From 540789547feb13a366239f027262bcbdf3ae479a Mon Sep 17 00:00:00 2001 From: grehan Date: Mon, 7 Jan 2013 04:51:43 +0000 Subject: Use 64-bit arithmetic throughout, and lock accesses to globals. With this change, dbench with >= 4 processes runs without getting weird jumps forward in time when the APCI pmtimer is the default timecounter. Obtained from: NetApp --- usr.sbin/bhyve/pmtmr.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/pmtmr.c b/usr.sbin/bhyve/pmtmr.c index 11e7ae0..78d14eb 100644 --- a/usr.sbin/bhyve/pmtmr.c +++ b/usr.sbin/bhyve/pmtmr.c @@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include "inout.h" @@ -51,36 +52,42 @@ __FBSDID("$FreeBSD$"); #define PMTMR_FREQ 3579545 /* 3.579545MHz */ -static uint32_t pmtmr_tscf; -static uint32_t pmtmr_old; +static pthread_mutex_t pmtmr_mtx; +static uint64_t pmtmr_tscf; +static uint64_t pmtmr_old; static uint64_t pmtmr_tsc_old; static uint32_t pmtmr_val(void) { uint64_t pmtmr_tsc_new; - uint32_t pmtmr_new; + uint64_t pmtmr_new; static int inited = 0; if (!inited) { size_t len; + uint32_t tmpf; inited = 1; - len = sizeof(pmtmr_tscf); - sysctlbyname("machdep.tsc_freq", &pmtmr_tscf, &len, + pthread_mutex_init(&pmtmr_mtx, NULL); + len = sizeof(tmpf); + sysctlbyname("machdep.tsc_freq", &tmpf, &len, NULL, 0); + pmtmr_tscf = tmpf; pmtmr_tsc_old = rdtsc(); pmtmr_old = pmtmr_tsc_old / pmtmr_tscf * PMTMR_FREQ; return (pmtmr_old); } + pthread_mutex_lock(&pmtmr_mtx); pmtmr_tsc_new = rdtsc(); pmtmr_new = (pmtmr_tsc_new - pmtmr_tsc_old) * PMTMR_FREQ / pmtmr_tscf + pmtmr_old; pmtmr_old = pmtmr_new; pmtmr_tsc_old = pmtmr_tsc_new; + pthread_mutex_unlock(&pmtmr_mtx); - return (pmtmr_old); + return (pmtmr_new); } static int -- cgit v1.1 From 6b4735f33a04654288e66e77e13b9ec13c620488 Mon Sep 17 00:00:00 2001 From: grehan Date: Mon, 7 Jan 2013 07:33:48 +0000 Subject: Don't completely drain the read file descriptor. Instead, only fill up to the uart's rx fifo size, and leave any remaining input for when the rx fifo is read. This allows cut'n'paste of long lines to be done into the bhyve console without truncation. Also, introduce a mutex since the file input will run in the mevent thread context and may corrupt state accessed by a vCPU thread. Reviewed by: neel Approved by: NetApp --- usr.sbin/bhyve/pci_uart.c | 44 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 10 deletions(-) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyve/pci_uart.c b/usr.sbin/bhyve/pci_uart.c index 3f192e1..dd30551 100644 --- a/usr.sbin/bhyve/pci_uart.c +++ b/usr.sbin/bhyve/pci_uart.c @@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include "bhyverun.h" #include "pci_emul.h" @@ -97,6 +98,7 @@ struct fifo { struct pci_uart_softc { struct pci_devinst *pi; + pthread_mutex_t mtx; /* protects all softc elements */ uint8_t data; /* Data register (R/W) */ uint8_t ier; /* Interrupt enable register (R/W) */ uint8_t lcr; /* Line control register (R/W) */ @@ -214,6 +216,13 @@ fifo_numchars(struct fifo *fifo) return (fifo->num); } +static int +fifo_available(struct fifo *fifo) +{ + + return (fifo->num < fifo->size); +} + static void pci_uart_opentty(struct pci_uart_softc *sc) { @@ -304,19 +313,25 @@ pci_uart_drain(int fd, enum ev_type ev, void *arg) assert(fd == STDIN_FILENO); assert(ev == EVF_READ); + + /* + * This routine is called in the context of the mevent thread + * to take out the softc lock to protect against concurrent + * access from a vCPU i/o exit + */ + pthread_mutex_lock(&sc->mtx); - while ((ch = ttyread()) != -1) { - /* - * If we are in loopback mode then drop this character. - */ - if ((sc->mcr & MCR_LOOPBACK) != 0) - continue; - - if (fifo_putchar(&sc->rxfifo, ch) != 0) - sc->lsr |= LSR_OE; + if ((sc->mcr & MCR_LOOPBACK) != 0) { + (void) ttyread(); + } else { + while (fifo_available(&sc->rxfifo) && + ((ch = ttyread()) != -1)) { + fifo_putchar(&sc->rxfifo, ch); + } + pci_uart_toggle_intr(sc); } - pci_uart_toggle_intr(sc); + pthread_mutex_unlock(&sc->mtx); } static void @@ -337,6 +352,8 @@ pci_uart_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, pci_uart_opentty(sc); sc->opened = 1; } + + pthread_mutex_lock(&sc->mtx); /* * Take care of the special case DLAB accesses first @@ -458,6 +475,7 @@ pci_uart_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, done: pci_uart_toggle_intr(sc); + pthread_mutex_unlock(&sc->mtx); } uint64_t @@ -479,6 +497,8 @@ pci_uart_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, sc->opened = 1; } + pthread_mutex_lock(&sc->mtx); + /* * Take care of the special case DLAB accesses first */ @@ -554,6 +574,8 @@ pci_uart_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, done: pci_uart_toggle_intr(sc); + pthread_mutex_unlock(&sc->mtx); + return (reg); } @@ -570,6 +592,8 @@ pci_uart_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) pi->pi_arg = sc; sc->pi = pi; + pthread_mutex_init(&sc->mtx, NULL); + /* initialize config space */ pci_set_cfgdata16(pi, PCIR_DEVICE, COM_DEV); pci_set_cfgdata16(pi, PCIR_VENDOR, COM_VENDOR); -- cgit v1.1 From 999dfea6a9e2fadae2d07ec5ae1749927f7c55e6 Mon Sep 17 00:00:00 2001 From: neel Date: Tue, 8 Jan 2013 01:56:05 +0000 Subject: Reduce the default memory allocation for a VM from 768MB to 128MB. Obtained from: NetApp --- usr.sbin/bhyveload/bhyveload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/bhyveload/bhyveload.c b/usr.sbin/bhyveload/bhyveload.c index 12f7c4b..ef12d9f 100644 --- a/usr.sbin/bhyveload/bhyveload.c +++ b/usr.sbin/bhyveload/bhyveload.c @@ -567,7 +567,7 @@ main(int argc, char** argv) progname = argv[0]; - lowmem = 768 * MB; + lowmem = 128 * MB; highmem = 0; disk_image = NULL; -- cgit v1.1 From 3690b68127142ebb2d5a307cb397988a0090adae Mon Sep 17 00:00:00 2001 From: neel Date: Tue, 8 Jan 2013 06:00:32 +0000 Subject: Add the 'bhyveload(8)' man page. Obtained from: NetApp Reviewed by: grehan --- usr.sbin/bhyveload/Makefile | 2 +- usr.sbin/bhyveload/bhyveload.8 | 130 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 usr.sbin/bhyveload/bhyveload.8 (limited to 'usr.sbin') diff --git a/usr.sbin/bhyveload/Makefile b/usr.sbin/bhyveload/Makefile index 7d0dd92..7b00818 100644 --- a/usr.sbin/bhyveload/Makefile +++ b/usr.sbin/bhyveload/Makefile @@ -2,7 +2,7 @@ PROG= bhyveload SRCS= bhyveload.c -NO_MAN= +MAN= bhyveload.8 DPADD+= ${LIBVMMAPI} LDADD+= -lvmmapi diff --git a/usr.sbin/bhyveload/bhyveload.8 b/usr.sbin/bhyveload/bhyveload.8 new file mode 100644 index 0000000..2918c4c --- /dev/null +++ b/usr.sbin/bhyveload/bhyveload.8 @@ -0,0 +1,130 @@ +.\" +.\" Copyright (c) 2012 NetApp Inc +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd January 7, 2012 +.Dt BHYVELOAD 8 +.Os +.Sh NAME +.Nm bhyveload +.Nd load a +.Fx +guest inside a bhyve virtual machine +.Sh SYNOPSIS +.Nm +.Op Fl m Ar lowmem +.Op Fl M Ar highmem +.Op Fl d Ar disk-path +.Op Fl h Ar host-path +.Ar vmname +.Sh DESCRIPTION +.Nm +is used to load a +.Fx +guest inside a +.Xr bhyve 4 +virtual machine. +.Pp +.Nm +is based on +.Xr loader 8 +and will present an interface identical to +.Fx +loader on the user's terminal. +.Pp +The virtual machine is identified as +.Ar vmname +and will be created if it does not already exist. +.Sh OPTIONS +The following options are available: +.Bl -tag -width indent +.It Fl m Ar lowmem +.Ar lowmem +is the amount of memory allocated below 4GB in the guest's physical address +space. +.Pp +The default value of +.Ar lowmem +is 256MB. +.It Fl M Ar highmem +.Ar highmem +is the amount of memory allocated above 4GB in the guest's physical address +space. +.Pp +The default value of +.Ar highmem +is 0MB. +.It Fl d Ar disk-path +The +.Ar disk-path +is the pathname of the guest's boot disk image. +.It Fl h Ar host-path +The +.Ar host-path +is the directory at the top of the guest's boot filesystem. + +.Sh EXAMPLES +To create a virtual machine named +.Ar freebsd-vm +that boots off the ISO image +.Pa /freebsd/release.iso +and has 1GB memory allocated to it: + +.Dl "bhyveload -m 256 -M 768 -d /freebsd/release.iso freebsd-vm + +In the example above the 1GB allocation is split in two segments: +.Bl -dash -compact +.It +256MB below the 4GB boundary (0MB - 256MB) +.It +768MB above the 4GB boundary (4096MB - 4864MB) +.El + +.Sh SEE ALSO +.Xr bhyve 4 , +.Xr bhyve 8 , +.Xr loader 8 , +.Xr vmm 4 + +.Sh HISTORY +.Nm +first appeared in +.Fx 10.0 , +and was developed at NetApp Inc. + +.Sh AUTHORS +.Nm +was developed by +.An -nosplit +.An "Neel Natu" Aq neel@FreeBSD.org +at NetApp Inc with a lot of help from +.An Doug Rabson Aq dfr@FreeBSD.org + +.Sh BUGS +.Nm +can load only +.Fx +as a guest. -- cgit v1.1 From 2212c8eaabe9c172dbb42f512a932d0d60e80f28 Mon Sep 17 00:00:00 2001 From: neel Date: Wed, 9 Jan 2013 04:02:23 +0000 Subject: Sort the 'bhyvectl' subdir alphabetically. Obtained from: NetApp --- usr.sbin/Makefile.amd64 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'usr.sbin') diff --git a/usr.sbin/Makefile.amd64 b/usr.sbin/Makefile.amd64 index e1ccf49..5ee2165 100644 --- a/usr.sbin/Makefile.amd64 +++ b/usr.sbin/Makefile.amd64 @@ -11,6 +11,7 @@ SUBDIR+= apm .endif SUBDIR+= asf SUBDIR+= bhyve +SUBDIR+= bhyvectl SUBDIR+= bhyveload SUBDIR+= boot0cfg .if ${MK_TOOLCHAIN} != "no" @@ -25,5 +26,4 @@ SUBDIR+= ndiscvt .endif SUBDIR+= sicontrol SUBDIR+= spkrtest -SUBDIR+= bhyvectl SUBDIR+= zzz -- cgit v1.1 From dee2455ab597b46c857f50c31c4ae88b56c694d4 Mon Sep 17 00:00:00 2001 From: neel Date: Wed, 9 Jan 2013 04:04:30 +0000 Subject: Get rid of 'sample.sh' from here - it belongs in the /usr/share/examples directory. Obtained from: NetApp --- usr.sbin/bhyvectl/sample.sh | 75 --------------------------------------------- 1 file changed, 75 deletions(-) delete mode 100755 usr.sbin/bhyvectl/sample.sh (limited to 'usr.sbin') diff --git a/usr.sbin/bhyvectl/sample.sh b/usr.sbin/bhyvectl/sample.sh deleted file mode 100755 index 356bd5f..0000000 --- a/usr.sbin/bhyvectl/sample.sh +++ /dev/null @@ -1,75 +0,0 @@ -#!/bin/sh - -# $FreeBSD$ - -BHYVECTL="sudo ./bhyvectl" -VMNAME=sample - -${BHYVECTL} --vm=${VMNAME} --create -${BHYVECTL} --vm=${VMNAME} --set-lowmem=128 --set-highmem=256 -${BHYVECTL} --vm=${VMNAME} --get-lowmem --get-highmem - -CR0_PE=$((1 << 0)) -CR0_PG=$((1 << 31)) -CR0=$(($CR0_PE | $CR0_PG)) -${BHYVECTL} --vm=${VMNAME} --set-cr0=${CR0} --get-cr0 - -# XXX this is bogus the value of %cr3 should come from the loader -CR3=0 -${BHYVECTL} --vm=${VMNAME} --set-cr3=${CR3} --get-cr3 - -CR4_PAE=$((1 << 5)) -CR4=$((${CR4_PAE})) -${BHYVECTL} --vm=${VMNAME} --set-cr4=${CR4} --get-cr4 - -DR7=0x00000400 # Table 9-1 from Intel Architecture Manual 3A -${BHYVECTL} --vm=${VMNAME} --set-dr7=${DR7} --get-dr7 - -# -# XXX the values of rsp and rip are bogus and should come from the loader. -# -RSP=0xa5a5a5a5 -RIP=0x0000bfbfbfbf0000 -RFLAGS=0x2 -${BHYVECTL} --vm=${VMNAME} --set-rsp=${RSP} --get-rsp -${BHYVECTL} --vm=${VMNAME} --set-rip=${RIP} --get-rip -${BHYVECTL} --vm=${VMNAME} --set-rflags=${RFLAGS} --get-rflags - -# Set "hidden" state of %cs descriptor to indicate long mode code segment. -# -# Note that this should match the contents of the entry pointed to by the -# segment selector in the GDTR. -# -${BHYVECTL} --vm=${VMNAME} --set-desc-cs --desc-access=0x00002098 --get-desc-cs - -# Set "hidden" state of all data descriptors to indicate a usable segment. -# The only useful fields are the "Present" and "Descriptor Type" bits. -${BHYVECTL} --vm=${VMNAME} --set-desc-ds --desc-access=0x00000090 --get-desc-ds -${BHYVECTL} --vm=${VMNAME} --set-desc-es --desc-access=0x00000090 --get-desc-es -${BHYVECTL} --vm=${VMNAME} --set-desc-fs --desc-access=0x00000090 --get-desc-fs -${BHYVECTL} --vm=${VMNAME} --set-desc-gs --desc-access=0x00000090 --get-desc-gs -${BHYVECTL} --vm=${VMNAME} --set-desc-ss --desc-access=0x00000090 --get-desc-ss - -# -# Set the code segment selector to point to entry at offset 8 in the GDTR. -# -${BHYVECTL} --vm=${VMNAME} --set-cs=0x0008 --get-cs - -# Set all the remaining data segment selectors to point to entry at offset -# 16 in the GDTR. -${BHYVECTL} --vm=${VMNAME} --set-ds=0x0010 --get-ds -${BHYVECTL} --vm=${VMNAME} --set-es=0x0010 --get-es -${BHYVECTL} --vm=${VMNAME} --set-fs=0x0010 --get-fs -${BHYVECTL} --vm=${VMNAME} --set-gs=0x0010 --get-gs -${BHYVECTL} --vm=${VMNAME} --set-ss=0x0010 --get-ss - -# XXX the value of the GDTR should come from the loader. -# Set the GDTR -GDTR_BASE=0xffff0000 -GDTR_LIMIT=0x10 -${BHYVECTL} --vm=${VMNAME} --set-desc-gdtr --desc-base=${GDTR_BASE} --desc-limit=${GDTR_LIMIT} --get-desc-gdtr - -${BHYVECTL} --vm=${VMNAME} --set-pinning=0 --get-pinning -${BHYVECTL} --vm=${VMNAME} --set-pinning=-1 --get-pinning - -${BHYVECTL} --vm=${VMNAME} --destroy -- cgit v1.1