diff options
189 files changed, 6335 insertions, 3319 deletions
diff --git a/contrib/libarchive/NEWS b/contrib/libarchive/NEWS index 5bf8776..f672d3d 100644 --- a/contrib/libarchive/NEWS +++ b/contrib/libarchive/NEWS @@ -1,3 +1,8 @@ +Jun 20, 2016: libarchive 3.2.1 released + This fixes a handful of security and other critical issues with 3.2.0 + +May 01, 2016: libarchive 3.2.0 released + Apr 09, 2016: libarchive 3.1.901a released Another test release in preparation for 3.2.0 diff --git a/contrib/libarchive/cat/test/main.c b/contrib/libarchive/cat/test/main.c index 319f68c..0aa1deb 100644 --- a/contrib/libarchive/cat/test/main.c +++ b/contrib/libarchive/cat/test/main.c @@ -2534,18 +2534,36 @@ usage(const char *program) static char * get_refdir(const char *d) { - char tried[512] = { '\0' }; - char buff[128]; - char *pwd, *p; + size_t tried_size, buff_size; + char *buff, *tried, *pwd = NULL, *p = NULL; + +#ifdef PATH_MAX + buff_size = PATH_MAX; +#else + buff_size = 8192; +#endif + buff = calloc(buff_size, 1); + if (buff == NULL) { + fprintf(stderr, "Unable to allocate memory\n"); + exit(1); + } + + /* Allocate a buffer to hold the various directories we checked. */ + tried_size = buff_size * 2; + tried = calloc(tried_size, 1); + if (tried == NULL) { + fprintf(stderr, "Unable to allocate memory\n"); + exit(1); + } /* If a dir was specified, try that */ if (d != NULL) { pwd = NULL; - snprintf(buff, sizeof(buff), "%s", d); + snprintf(buff, buff_size, "%s", d); p = slurpfile(NULL, "%s/%s", buff, KNOWNREF); if (p != NULL) goto success; - strncat(tried, buff, sizeof(tried) - strlen(tried) - 1); - strncat(tried, "\n", sizeof(tried) - strlen(tried) - 1); + strncat(tried, buff, tried_size - strlen(tried) - 1); + strncat(tried, "\n", tried_size - strlen(tried) - 1); goto failure; } @@ -2559,48 +2577,48 @@ get_refdir(const char *d) pwd[strlen(pwd) - 1] = '\0'; /* Look for a known file. */ - snprintf(buff, sizeof(buff), "%s", pwd); + snprintf(buff, buff_size, "%s", pwd); p = slurpfile(NULL, "%s/%s", buff, KNOWNREF); if (p != NULL) goto success; - strncat(tried, buff, sizeof(tried) - strlen(tried) - 1); - strncat(tried, "\n", sizeof(tried) - strlen(tried) - 1); + strncat(tried, buff, tried_size - strlen(tried) - 1); + strncat(tried, "\n", tried_size - strlen(tried) - 1); - snprintf(buff, sizeof(buff), "%s/test", pwd); + snprintf(buff, buff_size, "%s/test", pwd); p = slurpfile(NULL, "%s/%s", buff, KNOWNREF); if (p != NULL) goto success; - strncat(tried, buff, sizeof(tried) - strlen(tried) - 1); - strncat(tried, "\n", sizeof(tried) - strlen(tried) - 1); + strncat(tried, buff, tried_size - strlen(tried) - 1); + strncat(tried, "\n", tried_size - strlen(tried) - 1); #if defined(LIBRARY) - snprintf(buff, sizeof(buff), "%s/%s/test", pwd, LIBRARY); + snprintf(buff, buff_size, "%s/%s/test", pwd, LIBRARY); #else - snprintf(buff, sizeof(buff), "%s/%s/test", pwd, PROGRAM); + snprintf(buff, buff_size, "%s/%s/test", pwd, PROGRAM); #endif p = slurpfile(NULL, "%s/%s", buff, KNOWNREF); if (p != NULL) goto success; - strncat(tried, buff, sizeof(tried) - strlen(tried) - 1); - strncat(tried, "\n", sizeof(tried) - strlen(tried) - 1); + strncat(tried, buff, tried_size - strlen(tried) - 1); + strncat(tried, "\n", tried_size - strlen(tried) - 1); #if defined(PROGRAM_ALIAS) - snprintf(buff, sizeof(buff), "%s/%s/test", pwd, PROGRAM_ALIAS); + snprintf(buff, buff_size, "%s/%s/test", pwd, PROGRAM_ALIAS); p = slurpfile(NULL, "%s/%s", buff, KNOWNREF); if (p != NULL) goto success; - strncat(tried, buff, sizeof(tried) - strlen(tried) - 1); - strncat(tried, "\n", sizeof(tried) - strlen(tried) - 1); + strncat(tried, buff, tried_size - strlen(tried) - 1); + strncat(tried, "\n", tried_size - strlen(tried) - 1); #endif if (memcmp(pwd, "/usr/obj", 8) == 0) { - snprintf(buff, sizeof(buff), "%s", pwd + 8); + snprintf(buff, buff_size, "%s", pwd + 8); p = slurpfile(NULL, "%s/%s", buff, KNOWNREF); if (p != NULL) goto success; - strncat(tried, buff, sizeof(tried) - strlen(tried) - 1); - strncat(tried, "\n", sizeof(tried) - strlen(tried) - 1); + strncat(tried, buff, tried_size - strlen(tried) - 1); + strncat(tried, "\n", tried_size - strlen(tried) - 1); - snprintf(buff, sizeof(buff), "%s/test", pwd + 8); + snprintf(buff, buff_size, "%s/test", pwd + 8); p = slurpfile(NULL, "%s/%s", buff, KNOWNREF); if (p != NULL) goto success; - strncat(tried, buff, sizeof(tried) - strlen(tried) - 1); - strncat(tried, "\n", sizeof(tried) - strlen(tried) - 1); + strncat(tried, buff, tried_size - strlen(tried) - 1); + strncat(tried, "\n", tried_size - strlen(tried) - 1); } failure: @@ -2615,7 +2633,12 @@ failure: success: free(p); free(pwd); - return strdup(buff); + free(tried); + + /* Copy result into a fresh buffer to reduce memory usage. */ + p = strdup(buff); + free(buff); + return p; } int diff --git a/contrib/libarchive/cpio/cpio.c b/contrib/libarchive/cpio/cpio.c index 2addb9a..72fde41 100644 --- a/contrib/libarchive/cpio/cpio.c +++ b/contrib/libarchive/cpio/cpio.c @@ -498,7 +498,7 @@ long_help(void) static void version(void) { - fprintf(stdout,"bsdcpio %s -- %s\n", + fprintf(stdout,"bsdcpio %s - %s\n", BSDCPIO_VERSION_STRING, archive_version_details()); exit(0); diff --git a/contrib/libarchive/cpio/test/main.c b/contrib/libarchive/cpio/test/main.c index 2afeae7..00813f8 100644 --- a/contrib/libarchive/cpio/test/main.c +++ b/contrib/libarchive/cpio/test/main.c @@ -2535,18 +2535,36 @@ usage(const char *program) static char * get_refdir(const char *d) { - char tried[512] = { '\0' }; - char buff[128]; - char *pwd, *p; + size_t tried_size, buff_size; + char *buff, *tried, *pwd = NULL, *p = NULL; + +#ifdef PATH_MAX + buff_size = PATH_MAX; +#else + buff_size = 8192; +#endif + buff = calloc(buff_size, 1); + if (buff == NULL) { + fprintf(stderr, "Unable to allocate memory\n"); + exit(1); + } + + /* Allocate a buffer to hold the various directories we checked. */ + tried_size = buff_size * 2; + tried = calloc(tried_size, 1); + if (tried == NULL) { + fprintf(stderr, "Unable to allocate memory\n"); + exit(1); + } /* If a dir was specified, try that */ if (d != NULL) { pwd = NULL; - snprintf(buff, sizeof(buff), "%s", d); + snprintf(buff, buff_size, "%s", d); p = slurpfile(NULL, "%s/%s", buff, KNOWNREF); if (p != NULL) goto success; - strncat(tried, buff, sizeof(tried) - strlen(tried) - 1); - strncat(tried, "\n", sizeof(tried) - strlen(tried) - 1); + strncat(tried, buff, tried_size - strlen(tried) - 1); + strncat(tried, "\n", tried_size - strlen(tried) - 1); goto failure; } @@ -2560,48 +2578,48 @@ get_refdir(const char *d) pwd[strlen(pwd) - 1] = '\0'; /* Look for a known file. */ - snprintf(buff, sizeof(buff), "%s", pwd); + snprintf(buff, buff_size, "%s", pwd); p = slurpfile(NULL, "%s/%s", buff, KNOWNREF); if (p != NULL) goto success; - strncat(tried, buff, sizeof(tried) - strlen(tried) - 1); - strncat(tried, "\n", sizeof(tried) - strlen(tried) - 1); + strncat(tried, buff, tried_size - strlen(tried) - 1); + strncat(tried, "\n", tried_size - strlen(tried) - 1); - snprintf(buff, sizeof(buff), "%s/test", pwd); + snprintf(buff, buff_size, "%s/test", pwd); p = slurpfile(NULL, "%s/%s", buff, KNOWNREF); if (p != NULL) goto success; - strncat(tried, buff, sizeof(tried) - strlen(tried) - 1); - strncat(tried, "\n", sizeof(tried) - strlen(tried) - 1); + strncat(tried, buff, tried_size - strlen(tried) - 1); + strncat(tried, "\n", tried_size - strlen(tried) - 1); #if defined(LIBRARY) - snprintf(buff, sizeof(buff), "%s/%s/test", pwd, LIBRARY); + snprintf(buff, buff_size, "%s/%s/test", pwd, LIBRARY); #else - snprintf(buff, sizeof(buff), "%s/%s/test", pwd, PROGRAM); + snprintf(buff, buff_size, "%s/%s/test", pwd, PROGRAM); #endif p = slurpfile(NULL, "%s/%s", buff, KNOWNREF); if (p != NULL) goto success; - strncat(tried, buff, sizeof(tried) - strlen(tried) - 1); - strncat(tried, "\n", sizeof(tried) - strlen(tried) - 1); + strncat(tried, buff, tried_size - strlen(tried) - 1); + strncat(tried, "\n", tried_size - strlen(tried) - 1); #if defined(PROGRAM_ALIAS) - snprintf(buff, sizeof(buff), "%s/%s/test", pwd, PROGRAM_ALIAS); + snprintf(buff, buff_size, "%s/%s/test", pwd, PROGRAM_ALIAS); p = slurpfile(NULL, "%s/%s", buff, KNOWNREF); if (p != NULL) goto success; - strncat(tried, buff, sizeof(tried) - strlen(tried) - 1); - strncat(tried, "\n", sizeof(tried) - strlen(tried) - 1); + strncat(tried, buff, tried_size - strlen(tried) - 1); + strncat(tried, "\n", tried_size - strlen(tried) - 1); #endif if (memcmp(pwd, "/usr/obj", 8) == 0) { - snprintf(buff, sizeof(buff), "%s", pwd + 8); + snprintf(buff, buff_size, "%s", pwd + 8); p = slurpfile(NULL, "%s/%s", buff, KNOWNREF); if (p != NULL) goto success; - strncat(tried, buff, sizeof(tried) - strlen(tried) - 1); - strncat(tried, "\n", sizeof(tried) - strlen(tried) - 1); + strncat(tried, buff, tried_size - strlen(tried) - 1); + strncat(tried, "\n", tried_size - strlen(tried) - 1); - snprintf(buff, sizeof(buff), "%s/test", pwd + 8); + snprintf(buff, buff_size, "%s/test", pwd + 8); p = slurpfile(NULL, "%s/%s", buff, KNOWNREF); if (p != NULL) goto success; - strncat(tried, buff, sizeof(tried) - strlen(tried) - 1); - strncat(tried, "\n", sizeof(tried) - strlen(tried) - 1); + strncat(tried, buff, tried_size - strlen(tried) - 1); + strncat(tried, "\n", tried_size - strlen(tried) - 1); } failure: @@ -2616,7 +2634,12 @@ failure: success: free(p); free(pwd); - return strdup(buff); + free(tried); + + /* Copy result into a fresh buffer to reduce memory usage. */ + p = strdup(buff); + free(buff); + return p; } int diff --git a/contrib/libarchive/cpio/test/test_missing_file.c b/contrib/libarchive/cpio/test/test_missing_file.c new file mode 100644 index 0000000..a908f53 --- /dev/null +++ b/contrib/libarchive/cpio/test/test_missing_file.c @@ -0,0 +1,52 @@ +/*- + * Copyright (c) 2016 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "test.h" +__FBSDID("$FreeBSD$"); + +DEFINE_TEST(test_missing_file) +{ + int r; + + assertMakeFile("file1", 0644, "file1"); + assertMakeFile("file2", 0644, "file2"); + + assertMakeFile("filelist1", 0644, "file1\nfile2\n"); + r = systemf("%s -o <filelist1 >stdout1 2>stderr1", testprog); + assertEqualInt(r, 0); + assertTextFileContents("1 block\n", "stderr1"); + + assertMakeFile("filelist2", 0644, "file1\nfile2\nfile3\n"); + r = systemf("%s -o <filelist2 >stdout2 2>stderr2", testprog); + assert(r != 0); + + assertMakeFile("filelist3", 0644, ""); + r = systemf("%s -o <filelist3 >stdout3 2>stderr3", testprog); + assertEqualInt(r, 0); + assertTextFileContents("1 block\n", "stderr3"); + + assertMakeFile("filelist4", 0644, "file3\n"); + r = systemf("%s -o <filelist4 >stdout4 2>stderr4", testprog); + assert(r != 0); +} diff --git a/contrib/libarchive/cpio/test/test_option_version.c b/contrib/libarchive/cpio/test/test_option_version.c index 2f2c409..32ba300 100644 --- a/contrib/libarchive/cpio/test/test_option_version.c +++ b/contrib/libarchive/cpio/test/test_option_version.c @@ -59,8 +59,8 @@ verify(const char *p, size_t s) ++q; --s; /* Separator. */ failure("Version: %s", p); - assertEqualMem(q, "-- ", 3); - q += 3; s -= 3; + assertEqualMem(q, "- ", 2); + q += 2; s -= 2; /* libarchive name and version number */ assert(s > 11); failure("Version: %s", p); diff --git a/contrib/libarchive/libarchive/archive.h b/contrib/libarchive/libarchive/archive.h index c57c6a1..013eee8 100644 --- a/contrib/libarchive/libarchive/archive.h +++ b/contrib/libarchive/libarchive/archive.h @@ -36,7 +36,7 @@ * assert that ARCHIVE_VERSION_NUMBER >= 2012108. */ /* Note: Compiler will complain if this does not match archive_entry.h! */ -#define ARCHIVE_VERSION_NUMBER 3002000 +#define ARCHIVE_VERSION_NUMBER 3002001 #include <sys/stat.h> #include <stddef.h> /* for wchar_t */ @@ -155,7 +155,7 @@ __LA_DECL int archive_version_number(void); /* * Textual name/version of the library, useful for version displays. */ -#define ARCHIVE_VERSION_ONLY_STRING "3.2.0" +#define ARCHIVE_VERSION_ONLY_STRING "3.2.1" #define ARCHIVE_VERSION_STRING "libarchive " ARCHIVE_VERSION_ONLY_STRING __LA_DECL const char * archive_version_string(void); diff --git a/contrib/libarchive/libarchive/archive_entry.h b/contrib/libarchive/libarchive/archive_entry.h index e7c0025..3a90ac7 100644 --- a/contrib/libarchive/libarchive/archive_entry.h +++ b/contrib/libarchive/libarchive/archive_entry.h @@ -29,7 +29,7 @@ #define ARCHIVE_ENTRY_H_INCLUDED /* Note: Compiler will complain if this does not match archive.h! */ -#define ARCHIVE_VERSION_NUMBER 3002000 +#define ARCHIVE_VERSION_NUMBER 3002001 /* * Note: archive_entry.h is for use outside of libarchive; the diff --git a/contrib/libarchive/libarchive/archive_entry_xattr.c b/contrib/libarchive/libarchive/archive_entry_xattr.c index cab420b..f9e7236 100644 --- a/contrib/libarchive/libarchive/archive_entry_xattr.c +++ b/contrib/libarchive/libarchive/archive_entry_xattr.c @@ -91,16 +91,11 @@ archive_entry_xattr_add_entry(struct archive_entry *entry, { struct ae_xattr *xp; - for (xp = entry->xattr_head; xp != NULL; xp = xp->next) - ; - if ((xp = (struct ae_xattr *)malloc(sizeof(struct ae_xattr))) == NULL) - /* XXX Error XXX */ - return; + __archive_errx(1, "Out of memory"); if ((xp->name = strdup(name)) == NULL) - /* XXX Error XXX */ - return; + __archive_errx(1, "Out of memory"); if ((xp->value = malloc(size)) != NULL) { memcpy(xp->value, value, size); diff --git a/contrib/libarchive/libarchive/archive_ppmd7.c b/contrib/libarchive/libarchive/archive_ppmd7.c index fe0b031..1aed922 100644 --- a/contrib/libarchive/libarchive/archive_ppmd7.c +++ b/contrib/libarchive/libarchive/archive_ppmd7.c @@ -126,6 +126,11 @@ static Bool Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAlloc *alloc) { if (p->Base == 0 || p->Size != size) { + /* RestartModel() below assumes that p->Size >= UNIT_SIZE + (see the calculation of m->MinContext). */ + if (size < UNIT_SIZE) { + return False; + } Ppmd7_Free(p, alloc); p->AlignOffset = #ifdef PPMD_32BIT diff --git a/contrib/libarchive/libarchive/archive_read_support_format_7zip.c b/contrib/libarchive/libarchive/archive_read_support_format_7zip.c index 90901ac..1dfe52b 100644 --- a/contrib/libarchive/libarchive/archive_read_support_format_7zip.c +++ b/contrib/libarchive/libarchive/archive_read_support_format_7zip.c @@ -2153,6 +2153,9 @@ read_SubStreamsInfo(struct archive_read *a, struct _7z_substream_info *ss, return (-1); if (UMAX_ENTRY < f[i].numUnpackStreams) return (-1); + if (unpack_streams > SIZE_MAX - UMAX_ENTRY) { + return (-1); + } unpack_streams += (size_t)f[i].numUnpackStreams; } if ((p = header_bytes(a, 1)) == NULL) diff --git a/contrib/libarchive/libarchive/archive_read_support_format_iso9660.c b/contrib/libarchive/libarchive/archive_read_support_format_iso9660.c index 3139cf5..3807abd 100644 --- a/contrib/libarchive/libarchive/archive_read_support_format_iso9660.c +++ b/contrib/libarchive/libarchive/archive_read_support_format_iso9660.c @@ -1091,7 +1091,7 @@ choose_volume(struct archive_read *a, struct iso9660 *iso9660) /* This condition is unlikely; by way of caution. */ vd = &(iso9660->joliet); - skipsize = LOGICAL_BLOCK_SIZE * vd->location; + skipsize = LOGICAL_BLOCK_SIZE * (int64_t)vd->location; skipsize = __archive_read_consume(a, skipsize); if (skipsize < 0) return ((int)skipsize); @@ -1129,7 +1129,7 @@ choose_volume(struct archive_read *a, struct iso9660 *iso9660) && iso9660->seenJoliet) { /* Switch reading data from primary to joliet. */ vd = &(iso9660->joliet); - skipsize = LOGICAL_BLOCK_SIZE * vd->location; + skipsize = LOGICAL_BLOCK_SIZE * (int64_t)vd->location; skipsize -= iso9660->current_position; skipsize = __archive_read_consume(a, skipsize); if (skipsize < 0) diff --git a/contrib/libarchive/libarchive/archive_read_support_format_lha.c b/contrib/libarchive/libarchive/archive_read_support_format_lha.c index c359d83..dbfc1cd 100644 --- a/contrib/libarchive/libarchive/archive_read_support_format_lha.c +++ b/contrib/libarchive/libarchive/archive_read_support_format_lha.c @@ -1712,6 +1712,7 @@ lha_crc16(uint16_t crc, const void *pp, size_t len) for (;len >= 8; len -= 8) { /* This if statement expects compiler optimization will * remove the stament which will not be executed. */ +#undef bswap16 #if defined(_MSC_VER) && _MSC_VER >= 1400 /* Visual Studio */ # define bswap16(x) _byteswap_ushort(x) #elif (defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8) \ diff --git a/contrib/libarchive/libarchive/archive_read_support_format_mtree.c b/contrib/libarchive/libarchive/archive_read_support_format_mtree.c index ec9d1b6..f8d5574 100644 --- a/contrib/libarchive/libarchive/archive_read_support_format_mtree.c +++ b/contrib/libarchive/libarchive/archive_read_support_format_mtree.c @@ -1385,12 +1385,12 @@ parse_device(dev_t *pdev, struct archive *a, char *val) "Missing number"); return ARCHIVE_WARN; } - numbers[argc++] = (unsigned long)mtree_atol(&p); - if (argc > MAX_PACK_ARGS) { + if (argc >= MAX_PACK_ARGS) { archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, "Too many arguments"); return ARCHIVE_WARN; } + numbers[argc++] = (unsigned long)mtree_atol(&p); } if (argc < 2) { archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, diff --git a/contrib/libarchive/libarchive/archive_read_support_format_rar.c b/contrib/libarchive/libarchive/archive_read_support_format_rar.c index 6450aac..f729f17 100644 --- a/contrib/libarchive/libarchive/archive_read_support_format_rar.c +++ b/contrib/libarchive/libarchive/archive_read_support_format_rar.c @@ -2127,6 +2127,12 @@ parse_codes(struct archive_read *a) rar->range_dec.Stream = &rar->bytein; __archive_ppmd7_functions.Ppmd7_Construct(&rar->ppmd7_context); + if (rar->dictionary_size == 0) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Invalid zero dictionary size"); + return (ARCHIVE_FATAL); + } + if (!__archive_ppmd7_functions.Ppmd7_Alloc(&rar->ppmd7_context, rar->dictionary_size, &g_szalloc)) { @@ -2884,11 +2890,10 @@ copy_from_lzss_window(struct archive_read *a, const void **buffer, } windowoffs = lzss_offset_for_position(&rar->lzss, startpos); - if(windowoffs + length <= lzss_size(&rar->lzss)) + if(windowoffs + length <= lzss_size(&rar->lzss)) { memcpy(&rar->unp_buffer[rar->unp_offset], &rar->lzss.window[windowoffs], length); - else - { + } else if (length <= lzss_size(&rar->lzss)) { firstpart = lzss_size(&rar->lzss) - windowoffs; if (firstpart < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, @@ -2900,9 +2905,14 @@ copy_from_lzss_window(struct archive_read *a, const void **buffer, &rar->lzss.window[windowoffs], firstpart); memcpy(&rar->unp_buffer[rar->unp_offset + firstpart], &rar->lzss.window[0], length - firstpart); - } else + } else { memcpy(&rar->unp_buffer[rar->unp_offset], &rar->lzss.window[windowoffs], length); + } + } else { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Bad RAR file data"); + return (ARCHIVE_FATAL); } rar->unp_offset += length; if (rar->unp_offset >= rar->unp_buffer_size) diff --git a/contrib/libarchive/libarchive/archive_read_support_format_warc.c b/contrib/libarchive/libarchive/archive_read_support_format_warc.c index 46a59ea..9d80132 100644 --- a/contrib/libarchive/libarchive/archive_read_support_format_warc.c +++ b/contrib/libarchive/libarchive/archive_read_support_format_warc.c @@ -535,7 +535,8 @@ xstrpisotime(const char *s, char **endptr) /* as a courtesy to our callers, and since this is a non-standard * routine, we skip leading whitespace */ - for (; isspace(*s); s++); + while (isspace((unsigned char)*s)) + ++s; /* read year */ if ((tm.tm_year = strtoi_lim(s, &s, 1583, 4095)) < 0 || *s++ != '-') { @@ -639,7 +640,9 @@ _warc_rdtyp(const char *buf, size_t bsz) return WT_NONE; } /* overread whitespace */ - for (val += sizeof(_key) - 1U; val < eob && isspace(*val); val++); + val += sizeof(_key) - 1U; + while (val < eob && isspace((unsigned char)*val)) + ++val; if (val + 8U > eob) { ; @@ -676,7 +679,9 @@ _warc_rduri(const char *buf, size_t bsz) return res; } /* overread whitespace */ - for (val += sizeof(_key) - 1U; val < eob && isspace(*val); val++); + val += sizeof(_key) - 1U; + while (val < eob && isspace((unsigned char)*val)) + ++val; /* overread URL designators */ if ((uri = xmemmem(val, eob - val, "://", 3U)) == NULL) { @@ -692,7 +697,8 @@ _warc_rduri(const char *buf, size_t bsz) /* also massage eol to point to the first whitespace * after the last non-whitespace character before * the end of the line */ - for (; eol > uri && isspace(eol[-1]); eol--); + while (eol > uri && isspace((unsigned char)eol[-1])) + --eol; /* now then, inspect the URI */ if (memcmp(val, "file", 4U) == 0) { @@ -727,7 +733,7 @@ _warc_rdlen(const char *buf, size_t bsz) /* strtol kindly overreads whitespace for us, so use that */ val += sizeof(_key) - 1U; len = strtol(val, &on, 10); - if (on == NULL || !isspace(*on)) { + if (on == NULL || !isspace((unsigned char)*on)) { /* hm, can we trust that number? Best not. */ return -1; } @@ -750,7 +756,7 @@ _warc_rdrtm(const char *buf, size_t bsz) /* xstrpisotime() kindly overreads whitespace for us, so use that */ val += sizeof(_key) - 1U; res = xstrpisotime(val, &on); - if (on == NULL || !isspace(*on)) { + if (on == NULL || !isspace((unsigned char)*on)) { /* hm, can we trust that number? Best not. */ return (time_t)-1; } @@ -773,7 +779,7 @@ _warc_rdmtm(const char *buf, size_t bsz) /* xstrpisotime() kindly overreads whitespace for us, so use that */ val += sizeof(_key) - 1U; res = xstrpisotime(val, &on); - if (on == NULL || !isspace(*on)) { + if (on == NULL || !isspace((unsigned char)*on)) { /* hm, can we trust that number? Best not. */ return (time_t)-1; } diff --git a/contrib/libarchive/libarchive/archive_read_support_format_zip.c b/contrib/libarchive/libarchive/archive_read_support_format_zip.c index 2973678..2b025cb 100644 --- a/contrib/libarchive/libarchive/archive_read_support_format_zip.c +++ b/contrib/libarchive/libarchive/archive_read_support_format_zip.c @@ -181,6 +181,14 @@ struct zip { char init_decryption; /* Decryption buffer. */ + /* + * The decrypted data starts at decrypted_ptr and + * extends for decrypted_bytes_remaining. Decryption + * adds new data to the end of this block, data is returned + * to clients from the beginning. When the block hits the + * end of decrypted_buffer, it has to be shuffled back to + * the beginning of the buffer. + */ unsigned char *decrypted_buffer; unsigned char *decrypted_ptr; size_t decrypted_buffer_size; @@ -1293,8 +1301,9 @@ zip_read_data_deflate(struct archive_read *a, const void **buff, if (zip->tctx_valid || zip->cctx_valid) { if (zip->decrypted_bytes_remaining < (size_t)bytes_avail) { - size_t buff_remaining = zip->decrypted_buffer_size - - (zip->decrypted_ptr - zip->decrypted_buffer); + size_t buff_remaining = + (zip->decrypted_buffer + zip->decrypted_buffer_size) + - (zip->decrypted_ptr + zip->decrypted_bytes_remaining); if (buff_remaining > (size_t)bytes_avail) buff_remaining = (size_t)bytes_avail; diff --git a/contrib/libarchive/libarchive/archive_write_filter.3 b/contrib/libarchive/libarchive/archive_write_filter.3 index 869dc46..e1d1891 100644 --- a/contrib/libarchive/libarchive/archive_write_filter.3 +++ b/contrib/libarchive/libarchive/archive_write_filter.3 @@ -43,6 +43,7 @@ .Nm archive_write_add_filter_program , .Nm archive_write_add_filter_uuencode , .Nm archive_write_add_filter_xz +.Nd functions enabling output filters .Sh LIBRARY Streaming Archive Library (libarchive, -larchive) .Sh SYNOPSIS diff --git a/contrib/libarchive/libarchive/archive_write_set_format_gnutar.c b/contrib/libarchive/libarchive/archive_write_set_format_gnutar.c index 647079d..1d635d2 100644 --- a/contrib/libarchive/libarchive/archive_write_set_format_gnutar.c +++ b/contrib/libarchive/libarchive/archive_write_set_format_gnutar.c @@ -467,7 +467,7 @@ archive_write_gnutar_header(struct archive_write *a, } } if (gnutar->linkname_length > GNUTAR_linkname_size) { - size_t todo = gnutar->linkname_length; + size_t length = gnutar->linkname_length + 1; struct archive_entry *temp = archive_entry_new2(&a->archive); /* Uname/gname here don't really matter since no one reads them; @@ -476,7 +476,7 @@ archive_write_gnutar_header(struct archive_write *a, archive_entry_set_gname(temp, "wheel"); archive_entry_set_pathname(temp, "././@LongLink"); - archive_entry_set_size(temp, gnutar->linkname_length + 1); + archive_entry_set_size(temp, length); ret = archive_format_gnutar_header(a, buff, temp, 'K'); if (ret < ARCHIVE_WARN) goto exit_write_header; @@ -484,11 +484,12 @@ archive_write_gnutar_header(struct archive_write *a, if(ret < ARCHIVE_WARN) goto exit_write_header; archive_entry_free(temp); - /* Write as many 512 bytes blocks as needed to write full name. */ - ret = __archive_write_output(a, gnutar->linkname, todo); + /* Write name and trailing null byte. */ + ret = __archive_write_output(a, gnutar->linkname, length); if(ret < ARCHIVE_WARN) goto exit_write_header; - ret = __archive_write_nulls(a, 0x1ff & (-(ssize_t)todo)); + /* Pad to 512 bytes */ + ret = __archive_write_nulls(a, 0x1ff & (-(ssize_t)length)); if (ret < ARCHIVE_WARN) goto exit_write_header; } @@ -496,7 +497,7 @@ archive_write_gnutar_header(struct archive_write *a, /* If pathname is longer than 100 chars we need to add an 'L' header. */ if (gnutar->pathname_length > GNUTAR_name_size) { const char *pathname = gnutar->pathname; - size_t todo = gnutar->pathname_length; + size_t length = gnutar->pathname_length + 1; struct archive_entry *temp = archive_entry_new2(&a->archive); /* Uname/gname here don't really matter since no one reads them; @@ -505,7 +506,7 @@ archive_write_gnutar_header(struct archive_write *a, archive_entry_set_gname(temp, "wheel"); archive_entry_set_pathname(temp, "././@LongLink"); - archive_entry_set_size(temp, gnutar->pathname_length + 1); + archive_entry_set_size(temp, length); ret = archive_format_gnutar_header(a, buff, temp, 'L'); if (ret < ARCHIVE_WARN) goto exit_write_header; @@ -513,11 +514,12 @@ archive_write_gnutar_header(struct archive_write *a, if(ret < ARCHIVE_WARN) goto exit_write_header; archive_entry_free(temp); - /* Write as many 512 bytes blocks as needed to write full name. */ - ret = __archive_write_output(a, pathname, todo); + /* Write pathname + trailing null byte. */ + ret = __archive_write_output(a, pathname, length); if(ret < ARCHIVE_WARN) goto exit_write_header; - ret = __archive_write_nulls(a, 0x1ff & (-(ssize_t)todo)); + /* Pad to multiple of 512 bytes. */ + ret = __archive_write_nulls(a, 0x1ff & (-(ssize_t)length)); if (ret < ARCHIVE_WARN) goto exit_write_header; } diff --git a/contrib/libarchive/libarchive/archive_write_set_format_iso9660.c b/contrib/libarchive/libarchive/archive_write_set_format_iso9660.c index 4d832fb..cb3e54e 100644 --- a/contrib/libarchive/libarchive/archive_write_set_format_iso9660.c +++ b/contrib/libarchive/libarchive/archive_write_set_format_iso9660.c @@ -6225,7 +6225,7 @@ isoent_gen_joliet_identifier(struct archive_write *a, struct isoent *isoent, unsigned char *p; size_t l; int r; - int ffmax, parent_len; + size_t ffmax, parent_len; static const struct archive_rb_tree_ops rb_ops = { isoent_cmp_node_joliet, isoent_cmp_key_joliet }; @@ -6239,7 +6239,7 @@ isoent_gen_joliet_identifier(struct archive_write *a, struct isoent *isoent, else ffmax = 128; - r = idr_start(a, idr, isoent->children.cnt, ffmax, 6, 2, &rb_ops); + r = idr_start(a, idr, isoent->children.cnt, (int)ffmax, 6, 2, &rb_ops); if (r < 0) return (r); @@ -6252,7 +6252,7 @@ isoent_gen_joliet_identifier(struct archive_write *a, struct isoent *isoent, int ext_off, noff, weight; size_t lt; - if ((int)(l = np->file->basename_utf16.length) > ffmax) + if ((l = np->file->basename_utf16.length) > ffmax) l = ffmax; p = malloc((l+1)*2); @@ -6285,7 +6285,7 @@ isoent_gen_joliet_identifier(struct archive_write *a, struct isoent *isoent, /* * Get a length of MBS of a full-pathname. */ - if ((int)np->file->basename_utf16.length > ffmax) { + if (np->file->basename_utf16.length > ffmax) { if (archive_strncpy_l(&iso9660->mbs, (const char *)np->identifier, l, iso9660->sconv_from_utf16be) != 0 && @@ -6302,7 +6302,9 @@ isoent_gen_joliet_identifier(struct archive_write *a, struct isoent *isoent, /* If a length of full-pathname is longer than 240 bytes, * it violates Joliet extensions regulation. */ - if (parent_len + np->mb_len > 240) { + if (parent_len > 240 + || np->mb_len > 240 + || parent_len + np->mb_len > 240) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "The regulation of Joliet extensions;" " A length of a full-pathname of `%s' is " @@ -6314,11 +6316,11 @@ isoent_gen_joliet_identifier(struct archive_write *a, struct isoent *isoent, /* Make an offset of the number which is used to be set * hexadecimal number to avoid duplicate identifier. */ - if ((int)l == ffmax) + if (l == ffmax) noff = ext_off - 6; - else if ((int)l == ffmax-2) + else if (l == ffmax-2) noff = ext_off - 4; - else if ((int)l == ffmax-4) + else if (l == ffmax-4) noff = ext_off - 2; else noff = ext_off; diff --git a/contrib/libarchive/libarchive/archive_write_set_options.3 b/contrib/libarchive/libarchive/archive_write_set_options.3 index ce7ed89..aeb7a18 100644 --- a/contrib/libarchive/libarchive/archive_write_set_options.3 +++ b/contrib/libarchive/libarchive/archive_write_set_options.3 @@ -32,7 +32,7 @@ .Nm archive_write_set_format_option , .Nm archive_write_set_option , .Nm archive_write_set_options -.Nd functions controlling options for reading archives +.Nd functions controlling options for writing archives .Sh LIBRARY Streaming Archive Library (libarchive, -larchive) .Sh SYNOPSIS diff --git a/contrib/libarchive/libarchive/libarchive-formats.5 b/contrib/libarchive/libarchive/libarchive-formats.5 index e619fe5..9cec760 100644 --- a/contrib/libarchive/libarchive/libarchive-formats.5 +++ b/contrib/libarchive/libarchive/libarchive-formats.5 @@ -65,7 +65,6 @@ Later variants have extended this by either appropriating undefined areas of the header record, extending the header to multiple records, or by storing special entries that modify the interpretation of subsequent entries. -.Pp .Bl -tag -width indent .It Cm gnutar The diff --git a/contrib/libarchive/libarchive/libarchive_changes.3 b/contrib/libarchive/libarchive/libarchive_changes.3 index bacd6e1..881a67c 100644 --- a/contrib/libarchive/libarchive/libarchive_changes.3 +++ b/contrib/libarchive/libarchive/libarchive_changes.3 @@ -28,7 +28,7 @@ .Dt LIBARCHIVE_CHANGES 3 .Os .Sh NAME -.Nm changes in libarchive interface +.Nd changes in libarchive interface .\" .Sh CHANGES IN LIBARCHIVE 3 This page describes user-visible changes in libarchive3, and lists diff --git a/contrib/libarchive/libarchive/test/main.c b/contrib/libarchive/libarchive/test/main.c index 5eb18bd..49ad7f6 100644 --- a/contrib/libarchive/libarchive/test/main.c +++ b/contrib/libarchive/libarchive/test/main.c @@ -2533,18 +2533,36 @@ usage(const char *program) static char * get_refdir(const char *d) { - char tried[512] = { '\0' }; - char buff[128]; - char *pwd, *p; + size_t tried_size, buff_size; + char *buff, *tried, *pwd = NULL, *p = NULL; + +#ifdef PATH_MAX + buff_size = PATH_MAX; +#else + buff_size = 8192; +#endif + buff = calloc(buff_size, 1); + if (buff == NULL) { + fprintf(stderr, "Unable to allocate memory\n"); + exit(1); + } + + /* Allocate a buffer to hold the various directories we checked. */ + tried_size = buff_size * 2; + tried = calloc(tried_size, 1); + if (tried == NULL) { + fprintf(stderr, "Unable to allocate memory\n"); + exit(1); + } /* If a dir was specified, try that */ if (d != NULL) { pwd = NULL; - snprintf(buff, sizeof(buff), "%s", d); + snprintf(buff, buff_size, "%s", d); p = slurpfile(NULL, "%s/%s", buff, KNOWNREF); if (p != NULL) goto success; - strncat(tried, buff, sizeof(tried) - strlen(tried) - 1); - strncat(tried, "\n", sizeof(tried) - strlen(tried) - 1); + strncat(tried, buff, tried_size - strlen(tried) - 1); + strncat(tried, "\n", tried_size - strlen(tried) - 1); goto failure; } @@ -2558,48 +2576,48 @@ get_refdir(const char *d) pwd[strlen(pwd) - 1] = '\0'; /* Look for a known file. */ - snprintf(buff, sizeof(buff), "%s", pwd); + snprintf(buff, buff_size, "%s", pwd); p = slurpfile(NULL, "%s/%s", buff, KNOWNREF); if (p != NULL) goto success; - strncat(tried, buff, sizeof(tried) - strlen(tried) - 1); - strncat(tried, "\n", sizeof(tried) - strlen(tried) - 1); + strncat(tried, buff, tried_size - strlen(tried) - 1); + strncat(tried, "\n", tried_size - strlen(tried) - 1); - snprintf(buff, sizeof(buff), "%s/test", pwd); + snprintf(buff, buff_size, "%s/test", pwd); p = slurpfile(NULL, "%s/%s", buff, KNOWNREF); if (p != NULL) goto success; - strncat(tried, buff, sizeof(tried) - strlen(tried) - 1); - strncat(tried, "\n", sizeof(tried) - strlen(tried) - 1); + strncat(tried, buff, tried_size - strlen(tried) - 1); + strncat(tried, "\n", tried_size - strlen(tried) - 1); #if defined(LIBRARY) - snprintf(buff, sizeof(buff), "%s/%s/test", pwd, LIBRARY); + snprintf(buff, buff_size, "%s/%s/test", pwd, LIBRARY); #else - snprintf(buff, sizeof(buff), "%s/%s/test", pwd, PROGRAM); + snprintf(buff, buff_size, "%s/%s/test", pwd, PROGRAM); #endif p = slurpfile(NULL, "%s/%s", buff, KNOWNREF); if (p != NULL) goto success; - strncat(tried, buff, sizeof(tried) - strlen(tried) - 1); - strncat(tried, "\n", sizeof(tried) - strlen(tried) - 1); + strncat(tried, buff, tried_size - strlen(tried) - 1); + strncat(tried, "\n", tried_size - strlen(tried) - 1); #if defined(PROGRAM_ALIAS) - snprintf(buff, sizeof(buff), "%s/%s/test", pwd, PROGRAM_ALIAS); + snprintf(buff, buff_size, "%s/%s/test", pwd, PROGRAM_ALIAS); p = slurpfile(NULL, "%s/%s", buff, KNOWNREF); if (p != NULL) goto success; - strncat(tried, buff, sizeof(tried) - strlen(tried) - 1); - strncat(tried, "\n", sizeof(tried) - strlen(tried) - 1); + strncat(tried, buff, tried_size - strlen(tried) - 1); + strncat(tried, "\n", tried_size - strlen(tried) - 1); #endif if (memcmp(pwd, "/usr/obj", 8) == 0) { - snprintf(buff, sizeof(buff), "%s", pwd + 8); + snprintf(buff, buff_size, "%s", pwd + 8); p = slurpfile(NULL, "%s/%s", buff, KNOWNREF); if (p != NULL) goto success; - strncat(tried, buff, sizeof(tried) - strlen(tried) - 1); - strncat(tried, "\n", sizeof(tried) - strlen(tried) - 1); + strncat(tried, buff, tried_size - strlen(tried) - 1); + strncat(tried, "\n", tried_size - strlen(tried) - 1); - snprintf(buff, sizeof(buff), "%s/test", pwd + 8); + snprintf(buff, buff_size, "%s/test", pwd + 8); p = slurpfile(NULL, "%s/%s", buff, KNOWNREF); if (p != NULL) goto success; - strncat(tried, buff, sizeof(tried) - strlen(tried) - 1); - strncat(tried, "\n", sizeof(tried) - strlen(tried) - 1); + strncat(tried, buff, tried_size - strlen(tried) - 1); + strncat(tried, "\n", tried_size - strlen(tried) - 1); } failure: @@ -2614,7 +2632,12 @@ failure: success: free(p); free(pwd); - return strdup(buff); + free(tried); + + /* Copy result into a fresh buffer to reduce memory usage. */ + p = strdup(buff); + free(buff); + return p; } int diff --git a/contrib/libarchive/libarchive/test/test_read_format_rar_invalid1.c b/contrib/libarchive/libarchive/test/test_read_format_rar_invalid1.c new file mode 100644 index 0000000..61dea16 --- /dev/null +++ b/contrib/libarchive/libarchive/test/test_read_format_rar_invalid1.c @@ -0,0 +1,44 @@ +/*- + * Copyright (c) 2003-2016 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "test.h" +__FBSDID("$FreeBSD$"); + +DEFINE_TEST(test_read_format_rar_invalid1) +{ + const char *refname = "test_read_format_rar_invalid1.rar"; + struct archive *a; + struct archive_entry *ae; + char *buff[100]; + + extract_reference_file(refname); + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, refname, 10240)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualIntA(a, ARCHIVE_FATAL, archive_read_data(a, buff, 99)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); + assertEqualInt(ARCHIVE_OK, archive_read_free(a)); +} diff --git a/contrib/libarchive/libarchive/test/test_read_format_rar_invalid1.rar.uu b/contrib/libarchive/libarchive/test/test_read_format_rar_invalid1.rar.uu new file mode 100644 index 0000000..2380399 --- /dev/null +++ b/contrib/libarchive/libarchive/test/test_read_format_rar_invalid1.rar.uu @@ -0,0 +1,5 @@ +begin 644 test_read_format_rar_invalid1.rar +M4F%R(1H'`,^0<P``#0````````"9SG0@D"8`#`````,````#+7,'\(^>B$4= +2,P0`I($``'1E<W0`P/\````) +` +end diff --git a/contrib/libarchive/libarchive/test/test_write_format_gnutar_filenames.c b/contrib/libarchive/libarchive/test/test_write_format_gnutar_filenames.c new file mode 100644 index 0000000..38b4ca9 --- /dev/null +++ b/contrib/libarchive/libarchive/test/test_write_format_gnutar_filenames.c @@ -0,0 +1,145 @@ +/*- + * Copyright (c) 2016 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "test.h" +__FBSDID("$FreeBSD$"); + +/* + * Inspired by Github issue #682, which reported that gnutar filenames + * of exactly 512 bytes weren't getting written correctly. + * + * This writes a filename of every length from 1 to 2000 bytes and + * reads back to verify it. + */ + +static char filename[2048]; + +DEFINE_TEST(test_write_format_gnutar_filenames) +{ + size_t buffsize = 1000000; + char *buff; + struct archive_entry *ae, *template; + struct archive *a; + size_t used; + + buff = malloc(buffsize); /* million bytes of work area */ + assert(buff != NULL); + + /* Create a template entry. */ + assert((template = archive_entry_new()) != NULL); + archive_entry_set_atime(template, 2, 20); + archive_entry_set_birthtime(template, 3, 30); + archive_entry_set_ctime(template, 4, 40); + archive_entry_set_mtime(template, 5, 50); + archive_entry_set_mode(template, S_IFREG | 0755); + archive_entry_set_size(template, 8); + + for (int i = 0; i < 2000; ++i) { + filename[i] = 'a'; + filename[i + 1] = '\0'; + archive_entry_copy_pathname(template, filename); + + /* Write a one-item gnutar format archive. */ + assert((a = archive_write_new()) != NULL); + assertA(0 == archive_write_set_format_gnutar(a)); + assertA(0 == archive_write_add_filter_none(a)); + assertA(0 == archive_write_open_memory(a, buff, buffsize, &used)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_header(a, template)); + assertEqualIntA(a, 8, archive_write_data(a, "12345678", 9)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_close(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_free(a)); + + + /* Read back and verify the filename. */ + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, 0, archive_read_support_format_all(a)); + assertEqualIntA(a, 0, archive_read_support_filter_all(a)); + assertEqualIntA(a, 0, archive_read_open_memory(a, buff, used)); + + assertEqualIntA(a, 0, archive_read_next_header(a, &ae)); + assertEqualString(filename, archive_entry_pathname(ae)); + assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_free(a)); + } + + archive_entry_free(template); + + free(buff); +} + + +DEFINE_TEST(test_write_format_gnutar_linknames) +{ + size_t buffsize = 1000000; + char *buff; + struct archive_entry *ae, *template; + struct archive *a; + size_t used; + + buff = malloc(buffsize); /* million bytes of work area */ + assert(buff != NULL); + + /* Create a template entry. */ + assert((template = archive_entry_new()) != NULL); + archive_entry_set_atime(template, 2, 20); + archive_entry_set_birthtime(template, 3, 30); + archive_entry_set_ctime(template, 4, 40); + archive_entry_set_mtime(template, 5, 50); + archive_entry_set_mode(template, S_IFLNK | 0755); + archive_entry_copy_pathname(template, "link"); + + for (int i = 0; i < 2000; ++i) { + filename[i] = 'a'; + filename[i + 1] = '\0'; + archive_entry_copy_symlink(template, filename); + + /* Write a one-item gnutar format archive. */ + assert((a = archive_write_new()) != NULL); + assertA(0 == archive_write_set_format_gnutar(a)); + assertA(0 == archive_write_add_filter_none(a)); + assertA(0 == archive_write_open_memory(a, buff, buffsize, &used)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_header(a, template)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_close(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_free(a)); + + + /* Read back and verify the filename. */ + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, 0, archive_read_support_format_all(a)); + assertEqualIntA(a, 0, archive_read_support_filter_all(a)); + assertEqualIntA(a, 0, archive_read_open_memory(a, buff, used)); + + assertEqualIntA(a, 0, archive_read_next_header(a, &ae)); + assertEqualString("link", archive_entry_pathname(ae)); + assertEqualString(filename, archive_entry_symlink(ae)); + assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_free(a)); + } + + archive_entry_free(template); + + free(buff); +} diff --git a/contrib/libarchive/libarchive_fe/passphrase.c b/contrib/libarchive/libarchive_fe/passphrase.c index 1eae0b8..3322437 100644 --- a/contrib/libarchive/libarchive_fe/passphrase.c +++ b/contrib/libarchive/libarchive_fe/passphrase.c @@ -121,14 +121,15 @@ readpassphrase(const char *prompt, char *buf, size_t bufsiz, int flags) #else /* _WIN32 && !__CYGWIN__ */ -#include <termios.h> -#include <signal.h> +#include <assert.h> #include <ctype.h> #include <fcntl.h> #ifdef HAVE_PATHS_H #include <paths.h> #endif +#include <signal.h> #include <string.h> +#include <termios.h> #include <unistd.h> #ifdef TCSASOFT @@ -142,11 +143,18 @@ readpassphrase(const char *prompt, char *buf, size_t bufsiz, int flags) # define _POSIX_VDISABLE VDISABLE #endif -static volatile sig_atomic_t *signo; +#define M(a,b) (a > b ? a : b) +#define MAX_SIGNO M(M(M(SIGALRM, SIGHUP), \ + M(SIGINT, SIGPIPE)), \ + M(M(SIGQUIT, SIGTERM), \ + M(M(SIGTSTP, SIGTTIN), SIGTTOU))) + +static volatile sig_atomic_t signo[MAX_SIGNO + 1]; static void handler(int s) { + assert(s <= MAX_SIGNO); signo[s] = 1; } @@ -166,12 +174,8 @@ readpassphrase(const char *prompt, char *buf, size_t bufsiz, int flags) return(NULL); } - if (signo == NULL) { - signo = calloc(SIGRTMAX, sizeof(sig_atomic_t)); - } - restart: - for (i = 0; i < SIGRTMAX; i++) + for (i = 0; i <= MAX_SIGNO; i++) signo[i] = 0; nr = -1; save_errno = 0; @@ -198,6 +202,7 @@ restart: sigemptyset(&sa.sa_mask); sa.sa_flags = 0; /* don't restart system calls */ sa.sa_handler = handler; + /* Keep this list in sync with MAX_SIGNO! */ (void)sigaction(SIGALRM, &sa, &savealrm); (void)sigaction(SIGHUP, &sa, &savehup); (void)sigaction(SIGINT, &sa, &saveint); @@ -237,11 +242,11 @@ restart: if (p < end) { if ((flags & RPP_SEVENBIT)) ch &= 0x7f; - if (isalpha(ch)) { + if (isalpha((unsigned char)ch)) { if ((flags & RPP_FORCELOWER)) - ch = (char)tolower(ch); + ch = (char)tolower((unsigned char)ch); if ((flags & RPP_FORCEUPPER)) - ch = (char)toupper(ch); + ch = (char)toupper((unsigned char)ch); } *p++ = ch; } @@ -276,7 +281,7 @@ restart: * If we were interrupted by a signal, resend it to ourselves * now that we have restored the signal handlers. */ - for (i = 0; i < SIGRTMAX; i++) { + for (i = 0; i <= MAX_SIGNO; i++) { if (signo[i]) { kill(getpid(), i); switch (i) { diff --git a/contrib/libarchive/tar/test/main.c b/contrib/libarchive/tar/test/main.c index 7a71b5dc..6ebe10e 100644 --- a/contrib/libarchive/tar/test/main.c +++ b/contrib/libarchive/tar/test/main.c @@ -2535,18 +2535,36 @@ usage(const char *program) static char * get_refdir(const char *d) { - char tried[512] = { '\0' }; - char buff[128]; - char *pwd, *p; + size_t tried_size, buff_size; + char *buff, *tried, *pwd = NULL, *p = NULL; + +#ifdef PATH_MAX + buff_size = PATH_MAX; +#else + buff_size = 8192; +#endif + buff = calloc(buff_size, 1); + if (buff == NULL) { + fprintf(stderr, "Unable to allocate memory\n"); + exit(1); + } + + /* Allocate a buffer to hold the various directories we checked. */ + tried_size = buff_size * 2; + tried = calloc(tried_size, 1); + if (tried == NULL) { + fprintf(stderr, "Unable to allocate memory\n"); + exit(1); + } /* If a dir was specified, try that */ if (d != NULL) { pwd = NULL; - snprintf(buff, sizeof(buff), "%s", d); + snprintf(buff, buff_size, "%s", d); p = slurpfile(NULL, "%s/%s", buff, KNOWNREF); if (p != NULL) goto success; - strncat(tried, buff, sizeof(tried) - strlen(tried) - 1); - strncat(tried, "\n", sizeof(tried) - strlen(tried) - 1); + strncat(tried, buff, tried_size - strlen(tried) - 1); + strncat(tried, "\n", tried_size - strlen(tried) - 1); goto failure; } @@ -2560,48 +2578,48 @@ get_refdir(const char *d) pwd[strlen(pwd) - 1] = '\0'; /* Look for a known file. */ - snprintf(buff, sizeof(buff), "%s", pwd); + snprintf(buff, buff_size, "%s", pwd); p = slurpfile(NULL, "%s/%s", buff, KNOWNREF); if (p != NULL) goto success; - strncat(tried, buff, sizeof(tried) - strlen(tried) - 1); - strncat(tried, "\n", sizeof(tried) - strlen(tried) - 1); + strncat(tried, buff, tried_size - strlen(tried) - 1); + strncat(tried, "\n", tried_size - strlen(tried) - 1); - snprintf(buff, sizeof(buff), "%s/test", pwd); + snprintf(buff, buff_size, "%s/test", pwd); p = slurpfile(NULL, "%s/%s", buff, KNOWNREF); if (p != NULL) goto success; - strncat(tried, buff, sizeof(tried) - strlen(tried) - 1); - strncat(tried, "\n", sizeof(tried) - strlen(tried) - 1); + strncat(tried, buff, tried_size - strlen(tried) - 1); + strncat(tried, "\n", tried_size - strlen(tried) - 1); #if defined(LIBRARY) - snprintf(buff, sizeof(buff), "%s/%s/test", pwd, LIBRARY); + snprintf(buff, buff_size, "%s/%s/test", pwd, LIBRARY); #else - snprintf(buff, sizeof(buff), "%s/%s/test", pwd, PROGRAM); + snprintf(buff, buff_size, "%s/%s/test", pwd, PROGRAM); #endif p = slurpfile(NULL, "%s/%s", buff, KNOWNREF); if (p != NULL) goto success; - strncat(tried, buff, sizeof(tried) - strlen(tried) - 1); - strncat(tried, "\n", sizeof(tried) - strlen(tried) - 1); + strncat(tried, buff, tried_size - strlen(tried) - 1); + strncat(tried, "\n", tried_size - strlen(tried) - 1); #if defined(PROGRAM_ALIAS) - snprintf(buff, sizeof(buff), "%s/%s/test", pwd, PROGRAM_ALIAS); + snprintf(buff, buff_size, "%s/%s/test", pwd, PROGRAM_ALIAS); p = slurpfile(NULL, "%s/%s", buff, KNOWNREF); if (p != NULL) goto success; - strncat(tried, buff, sizeof(tried) - strlen(tried) - 1); - strncat(tried, "\n", sizeof(tried) - strlen(tried) - 1); + strncat(tried, buff, tried_size - strlen(tried) - 1); + strncat(tried, "\n", tried_size - strlen(tried) - 1); #endif if (memcmp(pwd, "/usr/obj", 8) == 0) { - snprintf(buff, sizeof(buff), "%s", pwd + 8); + snprintf(buff, buff_size, "%s", pwd + 8); p = slurpfile(NULL, "%s/%s", buff, KNOWNREF); if (p != NULL) goto success; - strncat(tried, buff, sizeof(tried) - strlen(tried) - 1); - strncat(tried, "\n", sizeof(tried) - strlen(tried) - 1); + strncat(tried, buff, tried_size - strlen(tried) - 1); + strncat(tried, "\n", tried_size - strlen(tried) - 1); - snprintf(buff, sizeof(buff), "%s/test", pwd + 8); + snprintf(buff, buff_size, "%s/test", pwd + 8); p = slurpfile(NULL, "%s/%s", buff, KNOWNREF); if (p != NULL) goto success; - strncat(tried, buff, sizeof(tried) - strlen(tried) - 1); - strncat(tried, "\n", sizeof(tried) - strlen(tried) - 1); + strncat(tried, buff, tried_size - strlen(tried) - 1); + strncat(tried, "\n", tried_size - strlen(tried) - 1); } failure: @@ -2616,7 +2634,12 @@ failure: success: free(p); free(pwd); - return strdup(buff); + free(tried); + + /* Copy result into a fresh buffer to reduce memory usage. */ + p = strdup(buff); + free(buff); + return p; } int diff --git a/contrib/libarchive/tar/test/test_missing_file.c b/contrib/libarchive/tar/test/test_missing_file.c new file mode 100644 index 0000000..e2e5da5 --- /dev/null +++ b/contrib/libarchive/tar/test/test_missing_file.c @@ -0,0 +1,37 @@ +/*- + * Copyright (c) 2016 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "test.h" +__FBSDID("$FreeBSD$"); + +DEFINE_TEST(test_missing_file) +{ + assertMakeFile("file1", 0644, "file1"); + assertMakeFile("file2", 0644, "file2"); + assert(0 == systemf("%s -cf archive.tar file1 file2 2>stderr1", testprog)); + assertEmptyFile("stderr1"); + assert(0 != systemf("%s -cf archive.tar file1 file2 file3 2>stderr2", testprog)); + assert(0 != systemf("%s -cf archive.tar 2>stderr3", testprog)); + assert(0 != systemf("%s -cf archive.tar file3 2>stderr4", testprog)); +} diff --git a/contrib/libarchive/tar/write.c b/contrib/libarchive/tar/write.c index 430c97c..30fb224 100644 --- a/contrib/libarchive/tar/write.c +++ b/contrib/libarchive/tar/write.c @@ -884,7 +884,7 @@ write_hierarchy(struct bsdtar *bsdtar, struct archive *a, const char *path) else if (r != ARCHIVE_OK) { lafe_warnc(archive_errno(disk), "%s", archive_error_string(disk)); - if (r == ARCHIVE_FATAL) { + if (r == ARCHIVE_FATAL || r == ARCHIVE_FAILED) { bsdtar->return_value = 1; return; } else if (r < ARCHIVE_WARN) diff --git a/lib/libarchive/tests/Makefile b/lib/libarchive/tests/Makefile index b8c2f20..b29dcf4 100644 --- a/lib/libarchive/tests/Makefile +++ b/lib/libarchive/tests/Makefile @@ -155,6 +155,7 @@ TESTS_SRCS= \ test_read_format_rar_encryption_data.c \ test_read_format_rar_encryption_header.c \ test_read_format_rar_encryption_partially.c \ + test_read_format_rar_invalid1.c \ test_read_format_raw.c \ test_read_format_tar.c \ test_read_format_tar_concatenated.c \ @@ -234,6 +235,7 @@ TESTS_SRCS= \ test_write_format_cpio_newc.c \ test_write_format_cpio_odc.c \ test_write_format_gnutar.c \ + test_write_format_gnutar_filenames.c \ test_write_format_iso9660.c \ test_write_format_iso9660_boot.c \ test_write_format_iso9660_empty.c \ @@ -470,6 +472,7 @@ FILES+= test_read_format_rar_compress_normal.rar.uu FILES+= test_read_format_rar_encryption_data.rar.uu FILES+= test_read_format_rar_encryption_header.rar.uu FILES+= test_read_format_rar_encryption_partially.rar.uu +FILES+= test_read_format_rar_invalid1.rar.uu FILES+= test_read_format_rar_multi_lzss_blocks.rar.uu FILES+= test_read_format_rar_multivolume.part0001.rar.uu FILES+= test_read_format_rar_multivolume.part0002.rar.uu diff --git a/lib/libc/regex/engine.c b/lib/libc/regex/engine.c index 2ca971b..a756bba 100644 --- a/lib/libc/regex/engine.c +++ b/lib/libc/regex/engine.c @@ -786,7 +786,7 @@ fast( struct match *m, ASSIGN(fresh, st); SP("start", st, *p); coldp = NULL; - if (start == m->beginp) + if (start == m->offp || (start == m->beginp && !(m->eflags®_NOTBOL))) c = OUT; else { /* @@ -891,7 +891,7 @@ slow( struct match *m, SP("sstart", st, *p); st = step(m->g, startst, stopst, st, NOTHING, st); matchp = NULL; - if (start == m->beginp) + if (start == m->offp || (start == m->beginp && !(m->eflags®_NOTBOL))) c = OUT; else { /* diff --git a/lib/libc/regex/regex.3 b/lib/libc/regex/regex.3 index ea1ba25..70be400 100644 --- a/lib/libc/regex/regex.3 +++ b/lib/libc/regex/regex.3 @@ -32,7 +32,7 @@ .\" @(#)regex.3 8.4 (Berkeley) 3/20/94 .\" $FreeBSD$ .\" -.Dd August 17, 2005 +.Dd May 25, 2016 .Dt REGEX 3 .Os .Sh NAME @@ -235,11 +235,16 @@ The argument is the bitwise OR of zero or more of the following flags: .Bl -tag -width REG_STARTEND .It Dv REG_NOTBOL -The first character of -the string -is not the beginning of a line, so the -.Ql ^\& -anchor should not match before it. +The first character of the string is treated as the continuation +of a line. +This means that the anchors +.Ql ^\& , +.Ql [[:<:]] , +and +.Ql \e< +do not match before it; but see +.Dv REG_STARTEND +below. This does not affect the behavior of newlines under .Dv REG_NEWLINE . .It Dv REG_NOTEOL @@ -247,19 +252,16 @@ The NUL terminating the string does not end a line, so the .Ql $\& -anchor should not match before it. +anchor does not match before it. This does not affect the behavior of newlines under .Dv REG_NEWLINE . .It Dv REG_STARTEND The string is considered to start at -.Fa string -+ -.Fa pmatch Ns [0]. Ns Va rm_so -and to have a terminating NUL located at -.Fa string -+ -.Fa pmatch Ns [0]. Ns Va rm_eo -(there need not actually be a NUL at that location), +.Fa string No + +.Fa pmatch Ns [0]. Ns Fa rm_so +and to end before the byte located at +.Fa string No + +.Fa pmatch Ns [0]. Ns Fa rm_eo , regardless of the value of .Fa nmatch . See below for the definition of @@ -271,13 +273,37 @@ compatible with but not specified by .St -p1003.2 , and should be used with caution in software intended to be portable to other systems. -Note that a non-zero -.Va rm_so -does not imply -.Dv REG_NOTBOL ; -.Dv REG_STARTEND -affects only the location of the string, -not how it is matched. +.Pp +Without +.Dv REG_NOTBOL , +the position +.Fa rm_so +is considered the beginning of a line, such that +.Ql ^ +matches before it, and the beginning of a word if there is a word +character at this position, such that +.Ql [[:<:]] +and +.Ql \e< +match before it. +.Pp +With +.Dv REG_NOTBOL , +the character at position +.Fa rm_so +is treated as the continuation of a line, and if +.Fa rm_so +is greater than 0, the preceding character is taken into consideration. +If the preceding character is a newline and the regular expression was compiled +with +.Dv REG_NEWLINE , +.Ql ^ +matches before the string; if the preceding character is not a word character +but the string starts with a word character, +.Ql [[:<:]] +and +.Ql \e< +match before the string. .El .Pp See diff --git a/lib/libc/stdlib/l64a.c b/lib/libc/stdlib/l64a.c index bc10553..c281d7d 100644 --- a/lib/libc/stdlib/l64a.c +++ b/lib/libc/stdlib/l64a.c @@ -12,18 +12,13 @@ __RCSID("$NetBSD: l64a.c,v 1.13 2003/07/26 19:24:54 salo Exp $"); #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include <stdint.h> #include <stdlib.h> -#define ADOT 46 /* ASCII '.' */ -#define ASLASH ADOT + 1 /* ASCII '/' */ -#define A0 48 /* ASCII '0' */ -#define AA 65 /* ASCII 'A' */ -#define Aa 97 /* ASCII 'a' */ - char * l64a(long value) { - static char buf[8]; + static char buf[7]; (void)l64a_r(value, buf, sizeof(buf)); return (buf); @@ -32,21 +27,18 @@ l64a(long value) int l64a_r(long value, char *buffer, int buflen) { - long v; - int digit; - - v = value & (long)0xffffffff; - for (; v != 0 && buflen > 1; buffer++, buflen--) { - digit = v & 0x3f; - if (digit < 2) - *buffer = digit + ADOT; - else if (digit < 12) - *buffer = digit + A0 - 2; - else if (digit < 38) - *buffer = digit + AA - 12; - else - *buffer = digit + Aa - 38; + static const char chars[] = + "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + uint32_t v; + + v = value; + while (buflen-- > 0) { + if (v == 0) { + *buffer = '\0'; + return (0); + } + *buffer++ = chars[v & 0x3f]; v >>= 6; } - return (v == 0 ? 0 : -1); + return (-1); } diff --git a/lib/libusb/Makefile b/lib/libusb/Makefile index 15b2533..680c44b 100644 --- a/lib/libusb/Makefile +++ b/lib/libusb/Makefile @@ -32,6 +32,7 @@ SRCS+= libusb01.c INCS+= libusb.h SRCS+= libusb10.c SRCS+= libusb10_desc.c +SRCS+= libusb10_hotplug.c SRCS+= libusb10_io.c .if defined(COMPAT_32BIT) @@ -67,6 +68,7 @@ CFLAGS+= -I ../../sys .include <bsd.lib.mk> # LibUSB v1.0 +MLINKS += libusb.3 libusb_get_version.3 MLINKS += libusb.3 libusb_init.3 MLINKS += libusb.3 libusb_exit.3 MLINKS += libusb.3 libusb_strerror.3 @@ -75,6 +77,7 @@ MLINKS += libusb.3 libusb_set_debug.3 MLINKS += libusb.3 libusb_get_device_list.3 MLINKS += libusb.3 libusb_free_device_list.3 MLINKS += libusb.3 libusb_get_bus_number.3 +MLINKS += libusb.3 libusb_get_port_number.3 MLINKS += libusb.3 libusb_get_device_address.3 MLINKS += libusb.3 libusb_get_device_speed.3 MLINKS += libusb.3 libusb_get_max_packet_size.3 @@ -99,6 +102,7 @@ MLINKS += libusb.3 libusb_get_driver_np.3 MLINKS += libusb.3 libusb_detach_kernel_driver.3 MLINKS += libusb.3 libusb_detach_kernel_driver_np.3 MLINKS += libusb.3 libusb_attach_kernel_driver.3 +MLINKS += libusb.3 libusb_set_auto_detach_kernel_driver.3 MLINKS += libusb.3 libusb_get_device_descriptor.3 MLINKS += libusb.3 libusb_get_active_config_descriptor.3 MLINKS += libusb.3 libusb_get_config_descriptor.3 @@ -108,10 +112,22 @@ MLINKS += libusb.3 libusb_get_string_descriptor.3 MLINKS += libusb.3 libusb_get_string_descriptor_ascii.3 MLINKS += libusb.3 libusb_parse_ss_endpoint_comp.3 MLINKS += libusb.3 libusb_free_ss_endpoint_comp.3 +MLINKS += libusb.3 libusb_get_ss_endpoint_companion_descriptor.3 +MLINKS += libusb.3 libusb_free_ss_endpoint_companion_descriptor.3 MLINKS += libusb.3 libusb_parse_bos_descriptor.3 MLINKS += libusb.3 libusb_free_bos_descriptor.3 +MLINKS += libusb.3 libusb_get_usb_2_0_extension_descriptor.3 +MLINKS += libusb.3 libusb_free_usb_2_0_extension_descriptor.3 +MLINKS += libusb.3 libusb_get_ss_usb_device_capability_descriptor.3 +MLINKS += libusb.3 libusb_free_ss_usb_device_capability_descriptor.3 +MLINKS += libusb.3 libusb_get_container_id_descriptor.3 +MLINKS += libusb.3 libusb_free_container_id_descriptor.3 +MLINKS += libusb.3 libusb_alloc_streams.3 +MLINKS += libusb.3 libusb_free_streams.3 MLINKS += libusb.3 libusb_alloc_transfer.3 MLINKS += libusb.3 libusb_free_transfer.3 +MLINKS += libusb.3 libusb_transfer_set_stream_id.3 +MLINKS += libusb.3 libusb_transfer_get_stream_id.3 MLINKS += libusb.3 libusb_submit_transfer.3 MLINKS += libusb.3 libusb_cancel_transfer.3 MLINKS += libusb.3 libusb_control_transfer.3 @@ -133,6 +149,8 @@ MLINKS += libusb.3 libusb_handle_events_locked.3 MLINKS += libusb.3 libusb_get_next_timeout.3 MLINKS += libusb.3 libusb_set_pollfd_notifiers.3 MLINKS += libusb.3 libusb_get_pollfds.3 +MLINKS += libusb.3 libusb_hotplug_register_callback.3 +MLINKS += libusb.3 libusb_hotplug_deregister_callback.3 # LibUSB v0.1 MLINKS += libusb.3 usb_open.3 diff --git a/lib/libusb/libusb.3 b/lib/libusb/libusb.3 index 785c8c4..c6f4082 100644 --- a/lib/libusb/libusb.3 +++ b/lib/libusb/libusb.3 @@ -26,7 +26,7 @@ .\" .\" $FreeBSD$ .\" -.Dd January 5, 2014 +.Dd June 23, 2016 .Dt LIBUSB 3 .Os .Sh NAME @@ -43,6 +43,10 @@ The library contains interfaces for directly managing a usb device. The current implementation supports v1.0 of the libusb API. .Sh LIBRARY INITIALISATION / DEINITIALISATION +.Ft "const struct libusb_version *" +.Fn libusb_get_version "void" +This function returns version information about LibUSB. +.Pp .Ft int .Fn libusb_init libusb_context **ctx This function initialises libusb. @@ -102,6 +106,12 @@ counter decremented once. Returns the number of the bus contained by the device .Fa dev . .Pp +.Ft uint8_t +.Fn libusb_get_port_number "libusb_device *dev" +Returns the port number which the device given by +.Fa dev +is attached to. +.Pp .Ft int .Fn libusb_get_port_numbers "libusb_device *dev" "uint8_t *buf" "uint8_t bufsize" Stores, in the buffer @@ -288,6 +298,18 @@ LIBUSB_ERROR_NO_DEVICE if the device has been disconnected, LIBUSB_ERROR_BUSY if the driver cannot be attached because the interface is claimed by a program or driver and a LIBUSB_ERROR code on failure. +.Pp +.Ft int +.Fn libusb_set_auto_detach_kernel_driver "libusb_device_handle *devh" "int enable" +This function enables automatic kernel interface driver detach when an +interface is claimed. +When the interface is restored the kernel driver is allowed to be re-attached. +If the +.Fa enable +argument is non-zero the feature is enabled. +Else disabled. +Returns 0 on success and a LIBUSB_ERROR code on +failure. .Sh USB DESCRIPTORS .Ft int .Fn libusb_get_device_descriptor "libusb_device *dev" "libusb_device_descriptor *desc" @@ -354,7 +376,31 @@ freed using the libusb_free_ss_endpoint_comp function. .Pp .Ft void .Fn libusb_free_ss_endpoint_comp "libusb_ss_endpoint_companion_descriptor *ep_comp" -This function is NULL safe and frees a parsed USB 3.0 endpoint companion descriptor. +This function is NULL safe and frees a parsed USB 3.0 endpoint companion descriptor given by +.Fa ep_comp . +.Pp +.Ft int +.Fn libusb_get_ss_endpoint_companion_descriptor "struct libusb_context *ctx" "const struct libusb_endpoint_descriptor *endpoint" "struct libusb_ss_endpoint_companion_descriptor **ep_comp" +This function finds and parses the USB 3.0 endpoint companion descriptor given by +.Fa endpoint . +Returns zero on success and a LIBUSB_ERROR code on failure. +On success the parsed USB 3.0 endpoint companion descriptor must be +freed using the libusb_free_ss_endpoint_companion_descriptor function. +.Pp +.Ft void +.Fn libusb_free_ss_endpoint_companion_descriptor "struct libusb_ss_endpoint_companion_descriptor *ep_comp" +This function is NULL safe and frees a parsed USB 3.0 endpoint companion descriptor given by +.Fa ep_comp . +.Pp +.Ft int +.Fn libusb_get_bos_descriptor "libusb_device_handle *handle" "struct libusb_bos_descriptor **bos" +This function queries the USB device given by +.Fa handle +and stores a pointer to a parsed BOS descriptor into +.Fa bos . +Returns zero on success and a LIBUSB_ERROR code on failure. +On success the parsed BOS descriptor must be +freed using the libusb_free_bos_descriptor function. .Pp .Ft int .Fn libusb_parse_bos_descriptor "const void *buf" "int len" "libusb_bos_descriptor **bos" @@ -370,7 +416,53 @@ libusb_free_bos_descriptor function. .Pp .Ft void .Fn libusb_free_bos_descriptor "libusb_bos_descriptor *bos" -This function is NULL safe and frees a parsed BOS descriptor. +This function is NULL safe and frees a parsed BOS descriptor given by +.Fa bos . +.Pp +.Ft int +.Fn libusb_get_usb_2_0_extension_descriptor "struct libusb_context *ctx" "struct libusb_bos_dev_capability_descriptor *dev_cap" "struct libusb_usb_2_0_extension_descriptor **usb_2_0_extension" +This function parses the USB 2.0 extension descriptor from the descriptor given by +.Fa dev_cap +and stores a pointer to the parsed descriptor into +.Fa usb_2_0_extension . +Returns zero on success and a LIBUSB_ERROR code on failure. +On success the parsed USB 2.0 extension descriptor must be freed using the +libusb_free_usb_2_0_extension_descriptor function. +.Pp +.Ft void +.Fn libusb_free_usb_2_0_extension_descriptor "struct libusb_usb_2_0_extension_descriptor *usb_2_0_extension" +This function is NULL safe and frees a parsed USB 2.0 extension descriptor given by +.Fa usb_2_0_extension . +.Pp +.Ft int +.Fn libusb_get_ss_usb_device_capability_descriptor "struct libusb_context *ctx" "struct libusb_bos_dev_capability_descriptor *dev_cap" "struct libusb_ss_usb_device_capability_descriptor **ss_usb_device_capability" +This function parses the SuperSpeed device capability descriptor from the descriptor given by +.Fa dev_cap +and stores a pointer to the parsed descriptor into +.Fa ss_usb_device_capability . +Returns zero on success and a LIBUSB_ERROR code on failure. +On success the parsed SuperSpeed device capability descriptor must be freed using the +libusb_free_ss_usb_device_capability_descriptor function. +.Pp +.Ft void +.Fn libusb_free_ss_usb_device_capability_descriptor "struct libusb_ss_usb_device_capability_descriptor *ss_usb_device_capability" +This function is NULL safe and frees a parsed SuperSpeed device capability descriptor given by +.Fa ss_usb_device_capability . +.Pp +.Ft int +.Fn libusb_get_container_id_descriptor "struct libusb_context *ctx" "struct libusb_bos_dev_capability_descriptor *dev_cap" "struct libusb_container_id_descriptor **container_id" +This function parses the container ID descriptor from the descriptor given by +.Fa dev_cap +and stores a pointer to the parsed descriptor into +.Fa container_id . +Returns zero on success and a LIBUSB_ERROR code on failure. +On success the parsed container ID descriptor must be freed using the +libusb_free_container_id_descriptor function. +.Pp +.Ft void +.Fn libusb_free_container_id_descriptor "struct libusb_container_id_descriptor *container_id" +This function is NULL safe and frees a parsed container ID descriptor given by +.Fa container_id . .Sh USB ASYNCHRONOUS I/O .Ft struct libusb_transfer * .Fn libusb_alloc_transfer "int iso_packets" @@ -429,6 +521,29 @@ if the transfer timed out, LIBUSB_ERROR_PIPE if the control request was not supported, LIBUSB_ERROR_OVERFLOW if the device offered more data, LIBUSB_ERROR_NO_DEVICE if the device has been disconnected and a LIBUSB_ERROR code on other failure. +.Sh USB STREAMS SUPPORT +.Ft int +.Fn libusb_alloc_streams "libusb_device_handle *dev" "uint32_t num_streams" "unsigned char *endpoints" "int num_endpoints" +This function verifies that the given number of streams using the +given number of endpoints is allowed and allocates the resources +needed to use so-called USB streams. +Currently only a single stream per endpoint is supported to simplify +the internals of LibUSB. +This function returns 0 on success or a LIBUSB_ERROR code on failure. +.Pp +.Ft int +.Fn libusb_free_streams "libusb_device_handle *dev" "unsigned char *endpoints" "int num_endpoints" +This function release resources needed for streams usage. +Returns 0 on success or a LIBUSB_ERROR code on failure. +.Pp +.Ft void +.Fn libusb_transfer_set_stream_id "struct libusb_transfer *transfer" "uint32_t stream_id" +This function sets the stream ID for the given USB transfer. +.Pp +.Ft uint32_t +.Fn libusb_transfer_get_stream_id "struct libusb_transfer *transfer" +This function returns the stream ID for the given USB transfer. +If no stream ID is used a value of zero is returned. .Sh USB EVENTS .Ft int .Fn libusb_try_lock_events "libusb_context *ctx" @@ -550,6 +665,47 @@ that libusb uses as an event source. Retrive a list of file descriptors that should be polled by your main loop as libusb event sources. Returns a NULL-terminated list on success or NULL on failure. +.Pp +.Ft int +.Fn libusb_hotplug_register_callback "libusb_context *ctx" "libusb_hotplug_event events" "libusb_hotplug_flag flags" "int vendor_id" "int product_id" "int dev_class" "libusb_hotplug_callback_fn cb_fn" "void *user_data" "libusb_hotplug_callback_handle *handle" +This function registers a hotplug filter. +The +.Fa events +argument select which events makes the hotplug filter trigger. +Available event values are LIBUSB_HOTPLUG_EVENT_DEVICE_ARRIVED and LIBUSB_HOTPLUG_EVENT_DEVICE_LEFT. +One or more events must be specified. +The +.Fa vendor_id , +.Fa product_id +and +.Fa dev_class +arguments can be set to LIBUSB_HOTPLUG_MATCH_ANY to match any value in the USB device descriptor. +Else the specified value is used for matching. +If the +.Fa flags +argument is set to LIBUSB_HOTPLUG_ENUMERATE, all currently attached and matching USB devices will be passed to the hotplug filter, given by the +.Fa cb_fn +argument. +Else the +.Fa flags +argument should be set to LIBUSB_HOTPLUG_NO_FLAGS. +This function returns 0 upon success or a LIBUSB_ERROR code on failure. +.Pp +.Ft int +.Fn libusb_hotplug_callback_fn "libusb_context *ctx" "libusb_device *device" "libusb_hotplug_event event" "void *user_data" +The hotplug filter function. +If this function returns non-zero, the filter is removed. +Else the filter is kept and can receive more events. +The +.Fa user_data +argument is the same as given when the filter was registered. +The +.Fa event +argument can be either of LIBUSB_HOTPLUG_EVENT_DEVICE_ARRIVED or LIBUSB_HOTPLUG_EVENT_DEVICE_LEFT. +.Pp +.Ft void +.Fn libusb_hotplug_deregister_callback "libusb_context *ctx" "libusb_hotplug_callback_handle handle" +This function unregisters a hotplug filter. .Sh LIBUSB VERSION 0.1 COMPATIBILITY The library is also compliant with LibUSB version 0.1.12. .Pp diff --git a/lib/libusb/libusb.h b/lib/libusb/libusb.h index ab20e92..44bdb4b 100644 --- a/lib/libusb/libusb.h +++ b/lib/libusb/libusb.h @@ -33,6 +33,8 @@ #include <sys/types.h> #endif +#define LIBUSB_API_VERSION 0x01000102 + #define LIBUSB_CALL #ifdef __cplusplus @@ -99,6 +101,10 @@ enum libusb_device_capability_type { #define LIBUSB_USB_2_0_EXTENSION_DEVICE_CAPABILITY_SIZE 7 #define LIBUSB_SS_USB_DEVICE_CAPABILITY_SIZE 10 +#define LIBUSB_BT_USB_2_0_EXTENSION_SIZE 7 +#define LIBUSB_BT_SS_USB_DEVICE_CAPABILITY_SIZE 10 +#define LIBUSB_BT_CONTAINER_ID_SIZE 20 + #define LIBUSB_ENDPOINT_ADDRESS_MASK 0x0f #define LIBUSB_ENDPOINT_DIR_MASK 0x80 @@ -163,6 +169,13 @@ enum libusb_iso_usage_type { LIBUSB_ISO_USAGE_TYPE_IMPLICIT = 2, }; +enum libusb_bos_type { + LIBUSB_BT_WIRELESS_USB_DEVICE_CAPABILITY = 1, + LIBUSB_BT_USB_2_0_EXTENSION = 2, + LIBUSB_BT_SS_USB_DEVICE_CAPABILITY = 3, + LIBUSB_BT_CONTAINER_ID = 4, +}; + enum libusb_error { LIBUSB_SUCCESS = 0, LIBUSB_ERROR_IO = -1, @@ -223,24 +236,47 @@ enum libusb_debug_level { LIBUSB_DEBUG_TRANSFER=2, }; +#define LIBUSB_HOTPLUG_MATCH_ANY -1 + +typedef enum { + LIBUSB_HOTPLUG_NO_FLAGS = 0, + LIBUSB_HOTPLUG_ENUMERATE = 1 << 0, +} libusb_hotplug_flag; + +typedef enum { + LIBUSB_HOTPLUG_EVENT_DEVICE_ARRIVED = 1, + LIBUSB_HOTPLUG_EVENT_DEVICE_LEFT = 2, +} libusb_hotplug_event; + /* libusb structures */ struct libusb_context; struct libusb_device; struct libusb_transfer; struct libusb_device_handle; +struct libusb_hotplug_callback_handle_struct; struct libusb_pollfd { int fd; short events; }; +struct libusb_version { + const uint16_t major; + const uint16_t minor; + const uint16_t micro; + const uint16_t nano; + const char *rc; + const char *describe; +}; + typedef struct libusb_context libusb_context; typedef struct libusb_device libusb_device; typedef struct libusb_device_handle libusb_device_handle; typedef struct libusb_pollfd libusb_pollfd; typedef void (*libusb_pollfd_added_cb) (int fd, short events, void *user_data); typedef void (*libusb_pollfd_removed_cb) (int fd, void *user_data); +typedef struct libusb_hotplug_callback_handle_struct *libusb_hotplug_callback_handle; typedef struct libusb_device_descriptor { uint8_t bLength; @@ -338,6 +374,13 @@ typedef struct libusb_ss_usb_device_capability_descriptor { uint16_t wU2DevExitLat; } libusb_ss_usb_device_capability_descriptor __aligned(sizeof(void *)); +typedef struct libusb_bos_dev_capability_descriptor { + uint8_t bLength; + uint8_t bDescriptorType; + uint8_t bDevCapabilityType; + uint8_t dev_capability_data[0]; +} libusb_bos_dev_capability_descriptor __aligned(sizeof(void *)); + typedef struct libusb_bos_descriptor { uint8_t bLength; uint8_t bDescriptorType; @@ -347,6 +390,21 @@ typedef struct libusb_bos_descriptor { struct libusb_ss_usb_device_capability_descriptor *ss_usb_cap; } libusb_bos_descriptor __aligned(sizeof(void *)); +typedef struct libusb_usb_2_0_extension_descriptor { + uint8_t bLength; + uint8_t bDescriptorType; + uint8_t bDevCapabilityType; + uint32_t bmAttributes; +} libusb_usb_2_0_extension_descriptor __aligned(sizeof(void *)); + +typedef struct libusb_container_id_descriptor { + uint8_t bLength; + uint8_t bDescriptorType; + uint8_t bDevCapabilityType; + uint8_t bReserved; + uint8_t ContainerID[16]; +} libusb_container_id_descriptor __aligned(sizeof(void *)); + typedef struct libusb_control_setup { uint8_t bmRequestType; uint8_t bRequest; @@ -384,6 +442,7 @@ typedef struct libusb_transfer { /* Library initialisation */ void libusb_set_debug(libusb_context * ctx, int level); +const struct libusb_version *libusb_get_version(void); const char *libusb_strerror(int code); const char *libusb_error_name(int code); int libusb_init(libusb_context ** context); @@ -394,6 +453,7 @@ void libusb_exit(struct libusb_context *ctx); ssize_t libusb_get_device_list(libusb_context * ctx, libusb_device *** list); void libusb_free_device_list(libusb_device ** list, int unref_devices); uint8_t libusb_get_bus_number(libusb_device * dev); +uint8_t libusb_get_port_number(libusb_device * dev); int libusb_get_port_numbers(libusb_device *dev, uint8_t *buf, uint8_t bufsize); int libusb_get_port_path(libusb_context *ctx, libusb_device *dev, uint8_t *buf, uint8_t bufsize); uint8_t libusb_get_device_address(libusb_device * dev); @@ -419,6 +479,7 @@ int libusb_get_driver(libusb_device_handle * devh, int interface, char *name, in int libusb_detach_kernel_driver_np(libusb_device_handle * devh, int interface); int libusb_detach_kernel_driver(libusb_device_handle * devh, int interface); int libusb_attach_kernel_driver(libusb_device_handle * devh, int interface); +int libusb_set_auto_detach_kernel_driver(libusb_device_handle *dev, int enable); int libusb_set_interface_alt_setting(libusb_device_handle * devh, int interface_number, int alternate_setting); /* USB Descriptors */ @@ -428,6 +489,8 @@ int libusb_get_active_config_descriptor(libusb_device * dev, struct libusb_confi int libusb_get_config_descriptor(libusb_device * dev, uint8_t config_index, struct libusb_config_descriptor **config); int libusb_get_config_descriptor_by_value(libusb_device * dev, uint8_t bConfigurationValue, struct libusb_config_descriptor **config); void libusb_free_config_descriptor(struct libusb_config_descriptor *config); +int libusb_get_ss_endpoint_companion_descriptor(struct libusb_context *ctx, const struct libusb_endpoint_descriptor *endpoint, struct libusb_ss_endpoint_companion_descriptor **ep_comp); +void libusb_free_ss_endpoint_companion_descriptor(struct libusb_ss_endpoint_companion_descriptor *ep_comp); int libusb_get_string_descriptor(libusb_device_handle * devh, uint8_t desc_index, uint16_t langid, unsigned char *data, int length); int libusb_get_string_descriptor_ascii(libusb_device_handle * devh, uint8_t desc_index, uint8_t *data, int length); int libusb_get_descriptor(libusb_device_handle * devh, uint8_t desc_type, uint8_t desc_index, uint8_t *data, int length); @@ -435,6 +498,13 @@ int libusb_parse_ss_endpoint_comp(const void *buf, int len, struct libusb_ss_end void libusb_free_ss_endpoint_comp(struct libusb_ss_endpoint_companion_descriptor *ep_comp); int libusb_parse_bos_descriptor(const void *buf, int len, struct libusb_bos_descriptor **bos); void libusb_free_bos_descriptor(struct libusb_bos_descriptor *bos); +int libusb_get_bos_descriptor(libusb_device_handle *handle, struct libusb_bos_descriptor **bos); +int libusb_get_usb_2_0_extension_descriptor(struct libusb_context *ctx, struct libusb_bos_dev_capability_descriptor *dev_cap, struct libusb_usb_2_0_extension_descriptor **usb_2_0_extension); +void libusb_free_usb_2_0_extension_descriptor(struct libusb_usb_2_0_extension_descriptor *usb_2_0_extension); +int libusb_get_ss_usb_device_capability_descriptor(struct libusb_context *ctx, struct libusb_bos_dev_capability_descriptor *dev_cap, struct libusb_ss_usb_device_capability_descriptor **ss_usb_device_capability); +void libusb_free_ss_usb_device_capability_descriptor(struct libusb_ss_usb_device_capability_descriptor *ss_usb_device_capability); +int libusb_get_container_id_descriptor(struct libusb_context *ctx, struct libusb_bos_dev_capability_descriptor *dev_cap, struct libusb_container_id_descriptor **container_id); +void libusb_free_container_id_descriptor(struct libusb_container_id_descriptor *container_id); /* Asynchronous device I/O */ @@ -483,6 +553,21 @@ int libusb_interrupt_transfer(libusb_device_handle * devh, uint8_t endpoint, uin uint16_t libusb_cpu_to_le16(uint16_t x); uint16_t libusb_le16_to_cpu(uint16_t x); +/* Hotplug support */ + +typedef int (*libusb_hotplug_callback_fn)(libusb_context *ctx, + libusb_device *device, libusb_hotplug_event event, void *user_data); + +int libusb_hotplug_register_callback(libusb_context *ctx, libusb_hotplug_event events, libusb_hotplug_flag flags, int vendor_id, int product_id, int dev_class, libusb_hotplug_callback_fn cb_fn, void *user_data, libusb_hotplug_callback_handle *handle); +void libusb_hotplug_deregister_callback(libusb_context *ctx, libusb_hotplug_callback_handle handle); + +/* Streams support */ + +int libusb_alloc_streams(libusb_device_handle *dev, uint32_t num_streams, unsigned char *endpoints, int num_endpoints); +int libusb_free_streams(libusb_device_handle *dev, unsigned char *endpoints, int num_endpoints); +void libusb_transfer_set_stream_id(struct libusb_transfer *transfer, uint32_t stream_id); +uint32_t libusb_transfer_get_stream_id(struct libusb_transfer *transfer); + #if 0 { /* indent fix */ #endif diff --git a/lib/libusb/libusb10.c b/lib/libusb/libusb10.c index e8f9314..341b9fd 100644 --- a/lib/libusb/libusb10.c +++ b/lib/libusb/libusb10.c @@ -51,6 +51,8 @@ #include "libusb.h" #include "libusb10.h" +#define LIBUSB_NUM_SW_ENDPOINTS (16 * 4) + static pthread_mutex_t default_context_lock = PTHREAD_MUTEX_INITIALIZER; struct libusb_context *usbi_default_context = NULL; @@ -67,6 +69,22 @@ static void libusb10_submit_transfer_sub(struct libusb20_device *, uint8_t); /* Library initialisation / deinitialisation */ +static const struct libusb_version libusb_version = { + .major = 1, + .minor = 0, + .micro = 0, + .nano = 2016, + .rc = "", + .describe = "http://www.freebsd.org" +}; + +const struct libusb_version * +libusb_get_version(void) +{ + + return (&libusb_version); +} + void libusb_set_debug(libusb_context *ctx, int level) { @@ -116,24 +134,34 @@ libusb_init(libusb_context **context) } TAILQ_INIT(&ctx->pollfds); TAILQ_INIT(&ctx->tr_done); + TAILQ_INIT(&ctx->hotplug_cbh); + TAILQ_INIT(&ctx->hotplug_devs); if (pthread_mutex_init(&ctx->ctx_lock, NULL) != 0) { free(ctx); return (LIBUSB_ERROR_NO_MEM); } + if (pthread_mutex_init(&ctx->hotplug_lock, NULL) != 0) { + pthread_mutex_destroy(&ctx->ctx_lock); + free(ctx); + return (LIBUSB_ERROR_NO_MEM); + } if (pthread_condattr_init(&attr) != 0) { pthread_mutex_destroy(&ctx->ctx_lock); + pthread_mutex_destroy(&ctx->hotplug_lock); free(ctx); return (LIBUSB_ERROR_NO_MEM); } if (pthread_condattr_setclock(&attr, CLOCK_MONOTONIC) != 0) { pthread_mutex_destroy(&ctx->ctx_lock); + pthread_mutex_destroy(&ctx->hotplug_lock); pthread_condattr_destroy(&attr); free(ctx); return (LIBUSB_ERROR_OTHER); } if (pthread_cond_init(&ctx->ctx_cond, &attr) != 0) { pthread_mutex_destroy(&ctx->ctx_lock); + pthread_mutex_destroy(&ctx->hotplug_lock); pthread_condattr_destroy(&attr); free(ctx); return (LIBUSB_ERROR_NO_MEM); @@ -141,10 +169,12 @@ libusb_init(libusb_context **context) pthread_condattr_destroy(&attr); ctx->ctx_handler = NO_THREAD; + ctx->hotplug_handler = NO_THREAD; ret = pipe(ctx->ctrl_pipe); if (ret < 0) { pthread_mutex_destroy(&ctx->ctx_lock); + pthread_mutex_destroy(&ctx->hotplug_lock); pthread_cond_destroy(&ctx->ctx_cond); free(ctx); return (LIBUSB_ERROR_OTHER); @@ -177,12 +207,27 @@ libusb_exit(libusb_context *ctx) if (ctx == NULL) return; + /* stop hotplug thread, if any */ + + if (ctx->hotplug_handler != NO_THREAD) { + pthread_t td; + void *ptr; + + HOTPLUG_LOCK(ctx); + td = ctx->hotplug_handler; + ctx->hotplug_handler = NO_THREAD; + HOTPLUG_UNLOCK(ctx); + + pthread_join(td, &ptr); + } + /* XXX cleanup devices */ libusb10_remove_pollfd(ctx, &ctx->ctx_poll); close(ctx->ctrl_pipe[0]); close(ctx->ctrl_pipe[1]); pthread_mutex_destroy(&ctx->ctx_lock); + pthread_mutex_destroy(&ctx->hotplug_lock); pthread_cond_destroy(&ctx->ctx_cond); pthread_mutex_lock(&default_context_lock); @@ -290,6 +335,14 @@ libusb_get_bus_number(libusb_device *dev) return (libusb20_dev_get_bus_number(dev->os_priv)); } +uint8_t +libusb_get_port_number(libusb_device *dev) +{ + if (dev == NULL) + return (0); /* should not happen */ + return (libusb20_dev_get_parent_port(dev->os_priv)); +} + int libusb_get_port_numbers(libusb_device *dev, uint8_t *buf, uint8_t bufsize) { @@ -442,7 +495,7 @@ libusb_open(libusb_device *dev, libusb_device_handle **devh) if (dev == NULL) return (LIBUSB_ERROR_INVALID_PARAM); - err = libusb20_dev_open(pdev, 16 * 4 /* number of endpoints */ ); + err = libusb20_dev_open(pdev, LIBUSB_NUM_SW_ENDPOINTS); if (err) { libusb_unref_device(dev); return (LIBUSB_ERROR_NO_MEM); @@ -611,6 +664,7 @@ int libusb_claim_interface(struct libusb20_device *pdev, int interface_number) { libusb_device *dev; + int err = 0; dev = libusb_get_device(pdev); if (dev == NULL) @@ -619,11 +673,17 @@ libusb_claim_interface(struct libusb20_device *pdev, int interface_number) if (interface_number < 0 || interface_number > 31) return (LIBUSB_ERROR_INVALID_PARAM); + if (pdev->auto_detach != 0) { + err = libusb_detach_kernel_driver(pdev, interface_number); + if (err != 0) + goto done; + } + CTX_LOCK(dev->ctx); dev->claimed_interfaces |= (1 << interface_number); CTX_UNLOCK(dev->ctx); - - return (0); +done: + return (err); } int @@ -639,13 +699,19 @@ libusb_release_interface(struct libusb20_device *pdev, int interface_number) if (interface_number < 0 || interface_number > 31) return (LIBUSB_ERROR_INVALID_PARAM); + if (pdev->auto_detach != 0) { + err = libusb_attach_kernel_driver(pdev, interface_number); + if (err != 0) + goto done; + } + CTX_LOCK(dev->ctx); if (!(dev->claimed_interfaces & (1 << interface_number))) err = LIBUSB_ERROR_NOT_FOUND; - - if (!err) + else dev->claimed_interfaces &= ~(1 << interface_number); CTX_UNLOCK(dev->ctx); +done: return (err); } @@ -847,6 +913,13 @@ libusb_attach_kernel_driver(struct libusb20_device *pdev, int interface) return (0); } +int +libusb_set_auto_detach_kernel_driver(libusb_device_handle *dev, int enable) +{ + dev->auto_detach = (enable ? 1 : 0); + return (0); +} + /* Asynchronous device I/O */ struct libusb_transfer * @@ -1337,7 +1410,8 @@ found: maxframe = libusb10_get_maxframe(pdev, uxfer); /* make sure the transfer is opened */ - err = libusb20_tr_open(pxfer0, buffsize, maxframe, endpoint); + err = libusb20_tr_open_stream(pxfer0, buffsize, maxframe, + endpoint, sxfer->stream_id); if (err && (err != LIBUSB20_ERROR_BUSY)) { goto failure; } @@ -1489,7 +1563,17 @@ libusb_cancel_transfer(struct libusb_transfer *uxfer) UNEXPORTED void libusb10_cancel_all_transfer(libusb_device *dev) { - /* TODO */ + struct libusb20_device *pdev = dev->os_priv; + unsigned x; + + for (x = 0; x != LIBUSB_NUM_SW_ENDPOINTS; x++) { + struct libusb20_transfer *xfer; + + xfer = libusb20_tr_get_pointer(pdev, x); + if (xfer == NULL) + continue; + libusb20_tr_close(xfer); + } } uint16_t diff --git a/lib/libusb/libusb10.h b/lib/libusb/libusb10.h index f1e5460..86bf5e3 100644 --- a/lib/libusb/libusb10.h +++ b/lib/libusb/libusb10.h @@ -36,6 +36,8 @@ #define CTX_LOCK(ctx) pthread_mutex_lock(&(ctx)->ctx_lock) #define CTX_TRYLOCK(ctx) pthread_mutex_trylock(&(ctx)->ctx_lock) #define CTX_UNLOCK(ctx) pthread_mutex_unlock(&(ctx)->ctx_lock) +#define HOTPLUG_LOCK(ctx) pthread_mutex_lock(&(ctx)->hotplug_lock) +#define HOTPLUG_UNLOCK(ctx) pthread_mutex_unlock(&(ctx)->hotplug_lock) #define DPRINTF(ctx, dbg, format, args...) do { \ if ((ctx)->debug == dbg) { \ @@ -67,11 +69,22 @@ struct libusb_super_transfer { uint8_t *curr_data; uint32_t rem_len; uint32_t last_len; + uint32_t stream_id; uint8_t state; #define LIBUSB_SUPER_XFER_ST_NONE 0 #define LIBUSB_SUPER_XFER_ST_PEND 1 }; +struct libusb_hotplug_callback_handle_struct { + TAILQ_ENTRY(libusb_hotplug_callback_handle_struct) entry; + int events; + int vendor; + int product; + int devclass; + libusb_hotplug_callback_fn fn; + void *user_data; +}; + struct libusb_context { int debug; int debug_fixed; @@ -80,12 +93,16 @@ struct libusb_context { int tr_done_gen; pthread_mutex_t ctx_lock; + pthread_mutex_t hotplug_lock; pthread_cond_t ctx_cond; + pthread_t hotplug_handler; pthread_t ctx_handler; #define NO_THREAD ((pthread_t)-1) TAILQ_HEAD(, libusb_super_pollfd) pollfds; TAILQ_HEAD(, libusb_super_transfer) tr_done; + TAILQ_HEAD(, libusb_hotplug_callback_handle_struct) hotplug_cbh; + TAILQ_HEAD(, libusb_device) hotplug_devs; struct libusb_super_pollfd ctx_poll; @@ -103,6 +120,8 @@ struct libusb_device { struct libusb_context *ctx; + TAILQ_ENTRY(libusb_device) hotplug_entry; + TAILQ_HEAD(, libusb_super_transfer) tr_head; struct libusb20_device *os_priv; diff --git a/lib/libusb/libusb10_desc.c b/lib/libusb/libusb10_desc.c index d2f3f53..87492da 100644 --- a/lib/libusb/libusb10_desc.c +++ b/lib/libusb/libusb10_desc.c @@ -410,6 +410,23 @@ libusb_free_ss_endpoint_comp(struct libusb_ss_endpoint_companion_descriptor *ep_ } int +libusb_get_ss_endpoint_companion_descriptor(struct libusb_context *ctx, + const struct libusb_endpoint_descriptor *endpoint, + struct libusb_ss_endpoint_companion_descriptor **ep_comp) +{ + if (endpoint == NULL) + return (LIBUSB_ERROR_INVALID_PARAM); + return (libusb_parse_ss_endpoint_comp(endpoint->extra, endpoint->extra_length, ep_comp)); +} + +void +libusb_free_ss_endpoint_companion_descriptor(struct libusb_ss_endpoint_companion_descriptor *ep_comp) +{ + + libusb_free_ss_endpoint_comp(ep_comp); +} + +int libusb_parse_bos_descriptor(const void *buf, int len, struct libusb_bos_descriptor **bos) { @@ -520,3 +537,154 @@ libusb_free_bos_descriptor(struct libusb_bos_descriptor *bos) free(bos); } + +int +libusb_get_bos_descriptor(libusb_device_handle *handle, + struct libusb_bos_descriptor **bos) +{ + uint8_t bos_header[LIBUSB_DT_BOS_SIZE] = {0}; + uint16_t wTotalLength; + uint8_t *bos_data; + int err; + + err = libusb_get_descriptor(handle, LIBUSB_DT_BOS, 0, + bos_header, sizeof(bos_header)); + if (err < 0) + return (err); + + wTotalLength = bos_header[2] | (bos_header[3] << 8); + if (wTotalLength < LIBUSB_DT_BOS_SIZE) + return (LIBUSB_ERROR_INVALID_PARAM); + + bos_data = calloc(wTotalLength, 1); + if (bos_data == NULL) + return (LIBUSB_ERROR_NO_MEM); + + err = libusb_get_descriptor(handle, LIBUSB_DT_BOS, 0, + bos_data, wTotalLength); + if (err < 0) + goto done; + + /* avoid descriptor length mismatches */ + bos_data[2] = (wTotalLength & 0xFF); + bos_data[3] = (wTotalLength >> 8); + + err = libusb_parse_bos_descriptor(bos_data, wTotalLength, bos); +done: + free(bos_data); + return (err); +} + +int +libusb_get_usb_2_0_extension_descriptor(struct libusb_context *ctx, + struct libusb_bos_dev_capability_descriptor *dev_cap, + struct libusb_usb_2_0_extension_descriptor **usb_2_0_extension) +{ + struct libusb_usb_2_0_extension_descriptor *desc; + + if (dev_cap == NULL || usb_2_0_extension == NULL || + dev_cap->bDevCapabilityType != LIBUSB_BT_USB_2_0_EXTENSION) + return (LIBUSB_ERROR_INVALID_PARAM); + if (dev_cap->bLength < LIBUSB_BT_USB_2_0_EXTENSION_SIZE) + return (LIBUSB_ERROR_IO); + + desc = malloc(sizeof(*desc)); + if (desc == NULL) + return (LIBUSB_ERROR_NO_MEM); + + desc->bLength = LIBUSB_BT_USB_2_0_EXTENSION_SIZE; + desc->bDescriptorType = dev_cap->bDescriptorType; + desc->bDevCapabilityType = dev_cap->bDevCapabilityType; + desc->bmAttributes = + (dev_cap->dev_capability_data[0]) | + (dev_cap->dev_capability_data[1] << 8) | + (dev_cap->dev_capability_data[2] << 16) | + (dev_cap->dev_capability_data[3] << 24); + + *usb_2_0_extension = desc; + return (0); +} + +void +libusb_free_usb_2_0_extension_descriptor( + struct libusb_usb_2_0_extension_descriptor *usb_2_0_extension) +{ + + free(usb_2_0_extension); +} + +int +libusb_get_ss_usb_device_capability_descriptor(struct libusb_context *ctx, + struct libusb_bos_dev_capability_descriptor *dev_cap, + struct libusb_ss_usb_device_capability_descriptor **ss_usb_device_capability) +{ + struct libusb_ss_usb_device_capability_descriptor *desc; + + if (dev_cap == NULL || ss_usb_device_capability == NULL || + dev_cap->bDevCapabilityType != LIBUSB_BT_SS_USB_DEVICE_CAPABILITY) + return (LIBUSB_ERROR_INVALID_PARAM); + if (dev_cap->bLength < LIBUSB_BT_SS_USB_DEVICE_CAPABILITY_SIZE) + return (LIBUSB_ERROR_IO); + + desc = malloc(sizeof(*desc)); + if (desc == NULL) + return (LIBUSB_ERROR_NO_MEM); + + desc->bLength = LIBUSB_BT_SS_USB_DEVICE_CAPABILITY_SIZE; + desc->bDescriptorType = dev_cap->bDescriptorType; + desc->bDevCapabilityType = dev_cap->bDevCapabilityType; + desc->bmAttributes = dev_cap->dev_capability_data[0]; + desc->wSpeedSupported = dev_cap->dev_capability_data[1] | + (dev_cap->dev_capability_data[2] << 8); + desc->bFunctionalitySupport = dev_cap->dev_capability_data[3]; + desc->bU1DevExitLat = dev_cap->dev_capability_data[4]; + desc->wU2DevExitLat = dev_cap->dev_capability_data[5] | + (dev_cap->dev_capability_data[6] << 8); + + *ss_usb_device_capability = desc; + return (0); +} + +void +libusb_free_ss_usb_device_capability_descriptor( + struct libusb_ss_usb_device_capability_descriptor *ss_usb_device_capability) +{ + + free(ss_usb_device_capability); +} + +int +libusb_get_container_id_descriptor(struct libusb_context *ctx, + struct libusb_bos_dev_capability_descriptor *dev_cap, + struct libusb_container_id_descriptor **container_id) +{ + struct libusb_container_id_descriptor *desc; + + if (dev_cap == NULL || container_id == NULL || + dev_cap->bDevCapabilityType != LIBUSB_BT_CONTAINER_ID) + return (LIBUSB_ERROR_INVALID_PARAM); + if (dev_cap->bLength < LIBUSB_BT_CONTAINER_ID_SIZE) + return (LIBUSB_ERROR_IO); + + desc = malloc(sizeof(*desc)); + if (desc == NULL) + return (LIBUSB_ERROR_NO_MEM); + + desc->bLength = LIBUSB_BT_CONTAINER_ID_SIZE; + desc->bDescriptorType = dev_cap->bDescriptorType; + desc->bDevCapabilityType = dev_cap->bDevCapabilityType; + desc->bReserved = dev_cap->dev_capability_data[0]; + memcpy(desc->ContainerID, dev_cap->dev_capability_data + 1, + sizeof(desc->ContainerID)); + + *container_id = desc; + return (0); +} + +void +libusb_free_container_id_descriptor( + struct libusb_container_id_descriptor *container_id) +{ + + free(container_id); +} diff --git a/lib/libusb/libusb10_hotplug.c b/lib/libusb/libusb10_hotplug.c new file mode 100644 index 0000000..162cf2b --- /dev/null +++ b/lib/libusb/libusb10_hotplug.c @@ -0,0 +1,237 @@ +/* $FreeBSD$ */ +/*- + * Copyright (c) 2016 Hans Petter Selasky. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef LIBUSB_GLOBAL_INCLUDE_FILE +#include LIBUSB_GLOBAL_INCLUDE_FILE +#else +#include <assert.h> +#include <errno.h> +#include <poll.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <time.h> +#include <sys/fcntl.h> +#include <sys/ioctl.h> +#include <sys/queue.h> +#include <sys/endian.h> +#endif + +#define libusb_device_handle libusb20_device + +#include "libusb20.h" +#include "libusb20_desc.h" +#include "libusb20_int.h" +#include "libusb.h" +#include "libusb10.h" + +static int +libusb_hotplug_equal(libusb_device *_adev, libusb_device *_bdev) +{ + struct libusb20_device *adev = _adev->os_priv; + struct libusb20_device *bdev = _bdev->os_priv; + + if (adev->bus_number != bdev->bus_number) + return (0); + if (adev->device_address != bdev->device_address) + return (0); + if (memcmp(&adev->ddesc, &bdev->ddesc, sizeof(adev->ddesc))) + return (0); + if (memcmp(&adev->session_data, &bdev->session_data, sizeof(adev->session_data))) + return (0); + return (1); +} + +static int +libusb_hotplug_filter(libusb_context *ctx, libusb_hotplug_callback_handle pcbh, + libusb_device *dev, libusb_hotplug_event event) +{ + if (!(pcbh->events & event)) + return (0); + if (pcbh->vendor != LIBUSB_HOTPLUG_MATCH_ANY && + pcbh->vendor != libusb20_dev_get_device_desc(dev->os_priv)->idVendor) + return (0); + if (pcbh->product != LIBUSB_HOTPLUG_MATCH_ANY && + pcbh->product != libusb20_dev_get_device_desc(dev->os_priv)->idProduct) + return (0); + if (pcbh->devclass != LIBUSB_HOTPLUG_MATCH_ANY && + pcbh->devclass != libusb20_dev_get_device_desc(dev->os_priv)->bDeviceClass) + return (0); + return (pcbh->fn(ctx, dev, event, pcbh->user_data)); +} + +static void * +libusb_hotplug_scan(void *arg) +{ + TAILQ_HEAD(, libusb_device) hotplug_devs; + libusb_hotplug_callback_handle acbh; + libusb_hotplug_callback_handle bcbh; + libusb_context *ctx = arg; + libusb_device **ppdev; + libusb_device *temp; + libusb_device *adev; + libusb_device *bdev; + unsigned do_loop = 1; + ssize_t count; + ssize_t x; + + while (do_loop) { + usleep(4000000); + + HOTPLUG_LOCK(ctx); + + TAILQ_INIT(&hotplug_devs); + + if (ctx->hotplug_handler != NO_THREAD) { + count = libusb_get_device_list(ctx, &ppdev); + if (count < 0) + continue; + for (x = 0; x != count; x++) { + TAILQ_INSERT_TAIL(&hotplug_devs, ppdev[x], + hotplug_entry); + } + libusb_free_device_list(ppdev, 0); + } else { + do_loop = 0; + } + + /* figure out which devices are gone */ + TAILQ_FOREACH_SAFE(adev, &ctx->hotplug_devs, hotplug_entry, temp) { + TAILQ_FOREACH(bdev, &hotplug_devs, hotplug_entry) { + if (libusb_hotplug_equal(adev, bdev)) + break; + } + if (bdev == NULL) { + TAILQ_REMOVE(&ctx->hotplug_devs, adev, hotplug_entry); + TAILQ_FOREACH_SAFE(acbh, &ctx->hotplug_cbh, entry, bcbh) { + if (libusb_hotplug_filter(ctx, acbh, adev, + LIBUSB_HOTPLUG_EVENT_DEVICE_LEFT) == 0) + continue; + TAILQ_REMOVE(&ctx->hotplug_cbh, acbh, entry); + free(acbh); + } + libusb_unref_device(adev); + } + } + + /* figure out which devices are new */ + TAILQ_FOREACH_SAFE(adev, &hotplug_devs, hotplug_entry, temp) { + TAILQ_FOREACH(bdev, &ctx->hotplug_devs, hotplug_entry) { + if (libusb_hotplug_equal(adev, bdev)) + break; + } + if (bdev == NULL) { + TAILQ_REMOVE(&hotplug_devs, adev, hotplug_entry); + TAILQ_INSERT_TAIL(&ctx->hotplug_devs, adev, hotplug_entry); + TAILQ_FOREACH_SAFE(acbh, &ctx->hotplug_cbh, entry, bcbh) { + if (libusb_hotplug_filter(ctx, acbh, adev, + LIBUSB_HOTPLUG_EVENT_DEVICE_ARRIVED) == 0) + continue; + TAILQ_REMOVE(&ctx->hotplug_cbh, acbh, entry); + free(acbh); + } + } + } + HOTPLUG_UNLOCK(ctx); + + /* unref remaining devices */ + while ((adev = TAILQ_FIRST(&hotplug_devs)) != NULL) { + TAILQ_REMOVE(&hotplug_devs, adev, hotplug_entry); + libusb_unref_device(adev); + } + } + return (NULL); +} + +int libusb_hotplug_register_callback(libusb_context *ctx, + libusb_hotplug_event events, libusb_hotplug_flag flags, + int vendor_id, int product_id, int dev_class, + libusb_hotplug_callback_fn cb_fn, void *user_data, + libusb_hotplug_callback_handle *phandle) +{ + libusb_hotplug_callback_handle handle; + struct libusb_device *adev; + + ctx = GET_CONTEXT(ctx); + + if (ctx == NULL || cb_fn == NULL || events == 0 || + vendor_id < -1 || vendor_id > 0xffff || + product_id < -1 || product_id > 0xffff || + dev_class < -1 || dev_class > 0xff) + return (LIBUSB_ERROR_INVALID_PARAM); + + handle = malloc(sizeof(*handle)); + if (handle == NULL) + return (LIBUSB_ERROR_NO_MEM); + + HOTPLUG_LOCK(ctx); + if (ctx->hotplug_handler == NO_THREAD) { + if (pthread_create(&ctx->hotplug_handler, NULL, + &libusb_hotplug_scan, ctx) != 0) + ctx->hotplug_handler = NO_THREAD; + } + handle->events = events; + handle->vendor = vendor_id; + handle->product = product_id; + handle->devclass = dev_class; + handle->fn = cb_fn; + handle->user_data = user_data; + + if (flags & LIBUSB_HOTPLUG_ENUMERATE) { + TAILQ_FOREACH(adev, &ctx->hotplug_devs, hotplug_entry) { + if (libusb_hotplug_filter(ctx, handle, adev, + LIBUSB_HOTPLUG_EVENT_DEVICE_ARRIVED) == 0) + continue; + free(handle); + handle = NULL; + break; + } + } + if (handle != NULL) + TAILQ_INSERT_TAIL(&ctx->hotplug_cbh, handle, entry); + HOTPLUG_UNLOCK(ctx); + + if (phandle != NULL) + *phandle = handle; + return (LIBUSB_SUCCESS); +} + +void libusb_hotplug_deregister_callback(libusb_context *ctx, + libusb_hotplug_callback_handle handle) +{ + ctx = GET_CONTEXT(ctx); + + if (ctx == NULL || handle == NULL) + return; + + HOTPLUG_LOCK(ctx); + TAILQ_REMOVE(&ctx->hotplug_cbh, handle, entry); + HOTPLUG_UNLOCK(ctx); + + free(handle); +} diff --git a/lib/libusb/libusb10_io.c b/lib/libusb/libusb10_io.c index e27bbba..a276231 100644 --- a/lib/libusb/libusb10_io.c +++ b/lib/libusb/libusb10_io.c @@ -767,3 +767,48 @@ libusb_fill_iso_transfer(struct libusb_transfer *transfer, transfer->callback = callback; } +int +libusb_alloc_streams(libusb_device_handle *dev, uint32_t num_streams, + unsigned char *endpoints, int num_endpoints) +{ + if (num_streams > 1) + return (LIBUSB_ERROR_INVALID_PARAM); + return (0); +} + +int +libusb_free_streams(libusb_device_handle *dev, unsigned char *endpoints, int num_endpoints) +{ + + return (0); +} + +void +libusb_transfer_set_stream_id(struct libusb_transfer *transfer, uint32_t stream_id) +{ + struct libusb_super_transfer *sxfer; + + if (transfer == NULL) + return; + + sxfer = (struct libusb_super_transfer *)( + ((uint8_t *)transfer) - sizeof(*sxfer)); + + /* set stream ID */ + sxfer->stream_id = stream_id; +} + +uint32_t +libusb_transfer_get_stream_id(struct libusb_transfer *transfer) +{ + struct libusb_super_transfer *sxfer; + + if (transfer == NULL) + return (0); + + sxfer = (struct libusb_super_transfer *)( + ((uint8_t *)transfer) - sizeof(*sxfer)); + + /* get stream ID */ + return (sxfer->stream_id); +} diff --git a/lib/libusb/libusb20.c b/lib/libusb/libusb20.c index 1de3a26..5ab6c86 100644 --- a/lib/libusb/libusb20.c +++ b/lib/libusb/libusb20.c @@ -601,6 +601,12 @@ libusb20_dev_close(struct libusb20_device *pdev) */ pdev->claimed_interface = 0; + /* + * The following variable is only used by the libusb v1.0 + * compat layer: + */ + pdev->auto_detach = 0; + return (error); } diff --git a/lib/libusb/libusb20_int.h b/lib/libusb/libusb20_int.h index 27adf00..7a1c515 100644 --- a/lib/libusb/libusb20_int.h +++ b/lib/libusb/libusb20_int.h @@ -213,6 +213,9 @@ struct libusb20_device { /* claimed interface */ uint8_t claimed_interface; + /* auto detach kernel driver */ + uint8_t auto_detach; + /* device file handle */ int file; diff --git a/share/man/man4/filemon.4 b/share/man/man4/filemon.4 index 92876a3..6ab3e9e 100644 --- a/share/man/man4/filemon.4 +++ b/share/man/man4/filemon.4 @@ -31,7 +31,7 @@ .\" .\" $FreeBSD$ .\" -.Dd March 9, 2016 +.Dd June 21, 2016 .Dt FILEMON 4 .Os .Sh NAME @@ -51,7 +51,7 @@ calls. .Pp .Nm is not intended to be a security auditing tool. -Many syscalls are not tracked and binaries of foreign ABI will not be fully +Many system calls are not tracked and binaries of foreign ABI will not be fully audited. It is intended for auditing of processes for the purpose of determining its dependencies in an efficient and easily parsable format. @@ -64,6 +64,9 @@ to handle incremental builds more smartly. System calls are denoted using the following single letters: .Pp .Bl -tag -width indent -compact +.It Ql A +.Xr openat 2 . +The next log entry may be lacking an absolute path or be inaccurate. .It Ql C .Xr chdir 2 .It Ql D @@ -82,11 +85,13 @@ System calls are denoted using the following single letters: .Xr rename 2 .It Ql R .Xr open 2 +or +.Xr openat 2 for read -.It Ql S -.Xr stat 2 .It Ql W .Xr open 2 +or +.Xr openat 2 for write .It Ql X .Xr _exit 2 @@ -116,6 +121,10 @@ Each takes a single argument. Write the internal tracing buffer to the supplied open file descriptor. .It Dv FILEMON_SET_PID Child process ID to trace. +This should normally be done under the control of a parent in the child after +.Xr fork 2 +but before anything else. +See the example below. .El .Sh RETURN VALUES .\" .Rv -std ioctl @@ -138,6 +147,35 @@ The .Nm handle is already associated with a file descriptor. .El +.Pp +The +.Fn ioctl +system call +with +.Dv FILEMON_SET_PID +will fail if: +.Bl -tag -width Er +.It Bq Er ESRCH +No process having the specified process ID exists. +.It Bq Er EBUSY +The process ID specified is already being traced and was not the current +process. +.El +.Pp +The +.Fn close +system call on the filemon file descriptor may fail with the errors from +.Xr write 2 +if any error is encountered while writing the log. +It may also fail if: +.Bl -tag -width Er +.It Bq Er EFAULT +An invalid address was used for a traced system call argument, resulting in +no log entry for the system call. +.It Bq Er ENAMETOOLONG +An argument for a traced system call was too long, resulting in +no log entry for the system call. +.El .Sh FILES .Bl -tag -width ".Pa /dev/filemon" .It Pa /dev/filemon @@ -190,6 +228,7 @@ buffer contents to it. .Sh SEE ALSO .Xr dtrace 1 , .Xr ktrace 1 , +.Xr script 1 , .Xr truss 1 , .Xr ioctl 2 .Sh HISTORY @@ -198,14 +237,5 @@ A device appeared in .Fx 9.1 . .Sh BUGS -Loading -.Nm -may reduce system performance for the noted syscalls. -.Pp -Only children of the set process are logged. -Processes can escape being traced by double forking. -This is not seen as a problem as the intended use is build monitoring, which -does not make sense to have daemons for. -.Pp Unloading the module may panic the system, thus requires using .Ic kldunload -f . diff --git a/share/man/man4/ng_mppc.4 b/share/man/man4/ng_mppc.4 index e7eeedf..94a0f4a 100644 --- a/share/man/man4/ng_mppc.4 +++ b/share/man/man4/ng_mppc.4 @@ -35,7 +35,7 @@ .\" $Whistle: ng_mppc.8,v 1.1 1999/12/08 20:20:39 archie Exp $ .\" $FreeBSD$ .\" -.Dd December 8, 1999 +.Dd June 7, 2016 .Dt NG_MPPC 4 .Os .Sh NAME @@ -153,12 +153,6 @@ and are supplied to selectively compile in either or both capabilities. At least one of these must be defined, or else this node type is useless. .Pp -The MPPC protocol requires proprietary compression code available -from Hi/Fn (formerly STAC). -These files must be obtained elsewhere and added to the kernel -sources before this node type will compile with the -.Dv NETGRAPH_MPPC_COMPRESSION -option. .Sh SEE ALSO .Xr netgraph 4 , .Xr ng_ppp 4 , diff --git a/share/man/man9/atomic.9 b/share/man/man9/atomic.9 index 5939b9c..fdeb0d3 100644 --- a/share/man/man9/atomic.9 +++ b/share/man/man9/atomic.9 @@ -23,7 +23,7 @@ .\" .\" $FreeBSD$ .\" -.Dd August 14, 2015 +.Dd May 12, 2016 .Dt ATOMIC 9 .Os .Sh NAME @@ -65,6 +65,8 @@ .Ft <type> .Fn atomic_swap_<type> "volatile <type> *p" "<type> v" .Ft int +.Fn atomic_testandclear_<type> "volatile <type> *p" "u_int v" +.Ft int .Fn atomic_testandset_<type> "volatile <type> *p" "u_int v" .Sh DESCRIPTION Each of the atomic operations is guaranteed to be atomic across multiple @@ -313,6 +315,15 @@ and .Dq Li 16 and do not have any variants with memory barriers at this time. .Bl -hang +.It Fn atomic_testandclear p v +.Bd -literal -compact +bit = 1 << (v % (sizeof(*p) * NBBY)); +tmp = (*p & bit) != 0; +*p &= ~bit; +return (tmp); +.Ed +.El +.Bl -hang .It Fn atomic_testandset p v .Bd -literal -compact bit = 1 << (v % (sizeof(*p) * NBBY)); @@ -324,6 +335,8 @@ return (tmp); .Pp The .Fn atomic_testandset +and +.Fn atomic_testandclear functions are only implemented for the types .Dq Li int , .Dq Li long @@ -352,6 +365,8 @@ and functions return the value at the specified address. The .Fn atomic_testandset +and +.Fn atomic_testandclear function returns the result of the test operation. .Sh EXAMPLES This example uses the @@ -429,3 +444,6 @@ and .Fn atomic_testandset operations were added in .Fx 10.0 . +.Fn atomic_testandclear +operation was added in +.Fx 11.0 . diff --git a/sys/amd64/include/apicvar.h b/sys/amd64/include/apicvar.h index 2073fa5..62e5ff2 100644 --- a/sys/amd64/include/apicvar.h +++ b/sys/amd64/include/apicvar.h @@ -216,7 +216,6 @@ int lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, void lapic_set_tpr(u_int vector); void lapic_setup(int boot); void xen_intr_handle_upcall(struct trapframe *frame); -void hv_vector_handler(struct trapframe *frame); #endif /* !LOCORE */ #endif /* _MACHINE_APICVAR_H_ */ diff --git a/sys/amd64/include/atomic.h b/sys/amd64/include/atomic.h index bb93835..cdcc7b8 100644 --- a/sys/amd64/include/atomic.h +++ b/sys/amd64/include/atomic.h @@ -84,6 +84,8 @@ u_int atomic_fetchadd_int(volatile u_int *p, u_int v); u_long atomic_fetchadd_long(volatile u_long *p, u_long v); int atomic_testandset_int(volatile u_int *p, u_int v); int atomic_testandset_long(volatile u_long *p, u_int v); +int atomic_testandclear_int(volatile u_int *p, u_int v); +int atomic_testandclear_long(volatile u_long *p, u_int v); #define ATOMIC_LOAD(TYPE, LOP) \ u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p) @@ -241,6 +243,40 @@ atomic_testandset_long(volatile u_long *p, u_int v) return (res); } +static __inline int +atomic_testandclear_int(volatile u_int *p, u_int v) +{ + u_char res; + + __asm __volatile( + " " MPLOCKED " " + " btrl %2,%1 ; " + " setc %0 ; " + "# atomic_testandclear_int" + : "=q" (res), /* 0 */ + "+m" (*p) /* 1 */ + : "Ir" (v & 0x1f) /* 2 */ + : "cc"); + return (res); +} + +static __inline int +atomic_testandclear_long(volatile u_long *p, u_int v) +{ + u_char res; + + __asm __volatile( + " " MPLOCKED " " + " btrq %2,%1 ; " + " setc %0 ; " + "# atomic_testandclear_long" + : "=q" (res), /* 0 */ + "+m" (*p) /* 1 */ + : "Jr" ((u_long)(v & 0x3f)) /* 2 */ + : "cc"); + return (res); +} + /* * We assume that a = b will do atomic loads and stores. Due to the * IA32 memory model, a simple store guarantees release semantics. @@ -462,6 +498,7 @@ u_long atomic_swap_long(volatile u_long *p, u_long v); #define atomic_readandclear_32 atomic_readandclear_int #define atomic_fetchadd_32 atomic_fetchadd_int #define atomic_testandset_32 atomic_testandset_int +#define atomic_testandclear_32 atomic_testandclear_int /* Operations on 64-bit quad words. */ #define atomic_set_64 atomic_set_long @@ -485,6 +522,7 @@ u_long atomic_swap_long(volatile u_long *p, u_long v); #define atomic_readandclear_64 atomic_readandclear_long #define atomic_fetchadd_64 atomic_fetchadd_long #define atomic_testandset_64 atomic_testandset_long +#define atomic_testandclear_64 atomic_testandclear_long /* Operations on pointers. */ #define atomic_set_ptr atomic_set_long diff --git a/sys/cam/ctl/ctl_backend_block.c b/sys/cam/ctl/ctl_backend_block.c index 5b86356..924455d 100644 --- a/sys/cam/ctl/ctl_backend_block.c +++ b/sys/cam/ctl/ctl_backend_block.c @@ -613,10 +613,10 @@ ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, ctl_complete_beio(beio); } -SDT_PROBE_DEFINE1(cbb, kernel, read, file_start, "uint64_t"); -SDT_PROBE_DEFINE1(cbb, kernel, write, file_start, "uint64_t"); -SDT_PROBE_DEFINE1(cbb, kernel, read, file_done,"uint64_t"); -SDT_PROBE_DEFINE1(cbb, kernel, write, file_done, "uint64_t"); +SDT_PROBE_DEFINE1(cbb, , read, file_start, "uint64_t"); +SDT_PROBE_DEFINE1(cbb, , write, file_start, "uint64_t"); +SDT_PROBE_DEFINE1(cbb, , read, file_done,"uint64_t"); +SDT_PROBE_DEFINE1(cbb, , write, file_done, "uint64_t"); static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, @@ -641,10 +641,10 @@ ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, bzero(&xuio, sizeof(xuio)); if (beio->bio_cmd == BIO_READ) { - SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0); + SDT_PROBE0(cbb, , read, file_start); xuio.uio_rw = UIO_READ; } else { - SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0); + SDT_PROBE0(cbb, , write, file_start); xuio.uio_rw = UIO_WRITE; } xuio.uio_offset = beio->io_offset; @@ -687,7 +687,7 @@ ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred); VOP_UNLOCK(be_lun->vn, 0); - SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0); + SDT_PROBE0(cbb, , read, file_done); if (error == 0 && xuio.uio_resid > 0) { /* * If we red less then requested (EOF), then @@ -736,7 +736,7 @@ ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, VOP_UNLOCK(be_lun->vn, 0); vn_finished_write(mountpoint); - SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0); + SDT_PROBE0(cbb, , write, file_done); } mtx_lock(&be_lun->io_lock); @@ -872,10 +872,10 @@ ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun, bzero(&xuio, sizeof(xuio)); if (beio->bio_cmd == BIO_READ) { - SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0); + SDT_PROBE0(cbb, , read, file_start); xuio.uio_rw = UIO_READ; } else { - SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0); + SDT_PROBE0(cbb, , write, file_start); xuio.uio_rw = UIO_WRITE; } xuio.uio_offset = beio->io_offset; @@ -906,9 +906,9 @@ ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun, error = ENXIO; if (beio->bio_cmd == BIO_READ) - SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0); + SDT_PROBE0(cbb, , read, file_done); else - SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0); + SDT_PROBE0(cbb, , write, file_done); mtx_lock(&be_lun->io_lock); devstat_end_transaction(beio->lun->disk_stats, beio->io_len, @@ -1504,10 +1504,10 @@ ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, } } -SDT_PROBE_DEFINE1(cbb, kernel, read, start, "uint64_t"); -SDT_PROBE_DEFINE1(cbb, kernel, write, start, "uint64_t"); -SDT_PROBE_DEFINE1(cbb, kernel, read, alloc_done, "uint64_t"); -SDT_PROBE_DEFINE1(cbb, kernel, write, alloc_done, "uint64_t"); +SDT_PROBE_DEFINE1(cbb, , read, start, "uint64_t"); +SDT_PROBE_DEFINE1(cbb, , write, start, "uint64_t"); +SDT_PROBE_DEFINE1(cbb, , read, alloc_done, "uint64_t"); +SDT_PROBE_DEFINE1(cbb, , write, alloc_done, "uint64_t"); static void ctl_be_block_next(struct ctl_be_block_io *beio) @@ -1552,9 +1552,9 @@ ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, lbalen = ARGS(io); if (lbalen->flags & CTL_LLF_WRITE) { - SDT_PROBE(cbb, kernel, write, start, 0, 0, 0, 0, 0); + SDT_PROBE0(cbb, , write, start); } else { - SDT_PROBE(cbb, kernel, read, start, 0, 0, 0, 0, 0); + SDT_PROBE0(cbb, , read, start); } beio = ctl_alloc_beio(softc); @@ -1641,10 +1641,10 @@ ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, * need to get the data from the user first. */ if (beio->bio_cmd == BIO_READ) { - SDT_PROBE(cbb, kernel, read, alloc_done, 0, 0, 0, 0, 0); + SDT_PROBE0(cbb, , read, alloc_done); be_lun->dispatch(be_lun, beio); } else { - SDT_PROBE(cbb, kernel, write, alloc_done, 0, 0, 0, 0, 0); + SDT_PROBE0(cbb, , write, alloc_done); #ifdef CTL_TIME_IO getbinuptime(&io->io_hdr.dma_start_bt); #endif diff --git a/sys/compat/linux/linux_dtrace.h b/sys/compat/linux/linux_dtrace.h index c446b3e..6e91327 100644 --- a/sys/compat/linux/linux_dtrace.h +++ b/sys/compat/linux/linux_dtrace.h @@ -72,8 +72,8 @@ #define LIN_SDT_PROBE_DEFINE5(a, b, c, d, e, f, g, h) _LIN_SDT_PROBE_DEFINE5(\ LINUX_DTRACE, a, b, c, d, e, f, g, h) -#define LIN_SDT_PROBE0(a, b, c) SDT_PROBE1(LINUX_DTRACE, a, b, \ - c, 0) +#define LIN_SDT_PROBE0(a, b, c) SDT_PROBE0(LINUX_DTRACE, a, b, \ + c) #define LIN_SDT_PROBE1(a, b, c, d) SDT_PROBE1(LINUX_DTRACE, a, b, \ c, d) #define LIN_SDT_PROBE2(a, b, c, d, e) SDT_PROBE2(LINUX_DTRACE, a, b, \ diff --git a/sys/compat/linux/linux_misc.c b/sys/compat/linux/linux_misc.c index db7488b..cb2379a5 100644 --- a/sys/compat/linux/linux_misc.c +++ b/sys/compat/linux/linux_misc.c @@ -1338,7 +1338,7 @@ linux_setgroups(struct thread *td, struct linux_setgroups_args *args) newcred->cr_ngroups = 1; setsugid(p); - p->p_ucred = newcred; + proc_set_cred(p, newcred); PROC_UNLOCK(p); crfree(oldcred); error = 0; diff --git a/sys/compat/linux/linux_socket.c b/sys/compat/linux/linux_socket.c index bc543c7..e340873 100644 --- a/sys/compat/linux/linux_socket.c +++ b/sys/compat/linux/linux_socket.c @@ -1040,18 +1040,16 @@ linux_recvfrom(struct thread *td, struct linux_recvfrom_args *args) { struct msghdr msg; struct iovec aiov; - int error; + int error, fromlen; if (PTRIN(args->fromlen) != NULL) { - error = copyin(PTRIN(args->fromlen), &msg.msg_namelen, - sizeof(msg.msg_namelen)); - if (error != 0) - return (error); - - error = linux_to_bsd_sockaddr((struct sockaddr *)PTRIN(args->from), - msg.msg_namelen); + error = copyin(PTRIN(args->fromlen), &fromlen, + sizeof(fromlen)); if (error != 0) return (error); + if (fromlen < 0) + return (EINVAL); + msg.msg_namelen = fromlen; } else msg.msg_namelen = 0; diff --git a/sys/compat/linux/linux_uid16.c b/sys/compat/linux/linux_uid16.c index 9acc047..5e6b3df 100644 --- a/sys/compat/linux/linux_uid16.c +++ b/sys/compat/linux/linux_uid16.c @@ -214,7 +214,7 @@ linux_setgroups16(struct thread *td, struct linux_setgroups16_args *args) newcred->cr_ngroups = 1; setsugid(td->td_proc); - p->p_ucred = newcred; + proc_set_cred(p, newcred); PROC_UNLOCK(p); crfree(oldcred); error = 0; diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index 13289ec..798d105 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -279,11 +279,13 @@ dev/hyperv/utilities/hv_util.c optional hyperv dev/hyperv/vmbus/hv_channel.c optional hyperv dev/hyperv/vmbus/hv_channel_mgmt.c optional hyperv dev/hyperv/vmbus/hv_connection.c optional hyperv -dev/hyperv/vmbus/hv_hv.c optional hyperv -dev/hyperv/vmbus/hv_et.c optional hyperv dev/hyperv/vmbus/hv_ring_buffer.c optional hyperv -dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c optional hyperv -dev/hyperv/vmbus/amd64/hv_vector.S optional hyperv +dev/hyperv/vmbus/hyperv.c optional hyperv +dev/hyperv/vmbus/hyperv_busdma.c optional hyperv +dev/hyperv/vmbus/vmbus.c optional hyperv +dev/hyperv/vmbus/vmbus_et.c optional hyperv +dev/hyperv/vmbus/amd64/hyperv_machdep.c optional hyperv +dev/hyperv/vmbus/amd64/vmbus_vector.S optional hyperv dev/kbd/kbd.c optional atkbd | sc | ukbd | vt dev/lindev/full.c optional lindev dev/lindev/lindev.c optional lindev diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index 97ffd8f..72686d9 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -255,11 +255,13 @@ dev/hyperv/utilities/hv_util.c optional hyperv dev/hyperv/vmbus/hv_channel.c optional hyperv dev/hyperv/vmbus/hv_channel_mgmt.c optional hyperv dev/hyperv/vmbus/hv_connection.c optional hyperv -dev/hyperv/vmbus/hv_hv.c optional hyperv -dev/hyperv/vmbus/hv_et.c optional hyperv dev/hyperv/vmbus/hv_ring_buffer.c optional hyperv -dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c optional hyperv -dev/hyperv/vmbus/i386/hv_vector.S optional hyperv +dev/hyperv/vmbus/hyperv.c optional hyperv +dev/hyperv/vmbus/hyperv_busdma.c optional hyperv +dev/hyperv/vmbus/vmbus.c optional hyperv +dev/hyperv/vmbus/vmbus_et.c optional hyperv +dev/hyperv/vmbus/i386/hyperv_machdep.c optional hyperv +dev/hyperv/vmbus/i386/vmbus_vector.S optional hyperv dev/ichwd/ichwd.c optional ichwd dev/if_ndis/if_ndis.c optional ndis dev/if_ndis/if_ndis_pccard.c optional ndis pccard diff --git a/sys/dev/cpuctl/cpuctl.c b/sys/dev/cpuctl/cpuctl.c index ec41a2e..af92bf5 100644 --- a/sys/dev/cpuctl/cpuctl.c +++ b/sys/dev/cpuctl/cpuctl.c @@ -67,9 +67,9 @@ static d_ioctl_t cpuctl_ioctl; static int cpuctl_do_msr(int cpu, cpuctl_msr_args_t *data, u_long cmd, struct thread *td); -static void cpuctl_do_cpuid(int cpu, cpuctl_cpuid_args_t *data, +static int cpuctl_do_cpuid(int cpu, cpuctl_cpuid_args_t *data, struct thread *td); -static void cpuctl_do_cpuid_count(int cpu, cpuctl_cpuid_count_args_t *data, +static int cpuctl_do_cpuid_count(int cpu, cpuctl_cpuid_count_args_t *data, struct thread *td); static int cpuctl_do_update(int cpu, cpuctl_update_args_t *data, struct thread *td); @@ -171,8 +171,7 @@ cpuctl_ioctl(struct cdev *dev, u_long cmd, caddr_t data, ret = cpuctl_do_msr(cpu, (cpuctl_msr_args_t *)data, cmd, td); break; case CPUCTL_CPUID: - cpuctl_do_cpuid(cpu, (cpuctl_cpuid_args_t *)data, td); - ret = 0; + ret = cpuctl_do_cpuid(cpu, (cpuctl_cpuid_args_t *)data, td); break; case CPUCTL_UPDATE: ret = priv_check(td, PRIV_CPUCTL_UPDATE); @@ -181,9 +180,8 @@ cpuctl_ioctl(struct cdev *dev, u_long cmd, caddr_t data, ret = cpuctl_do_update(cpu, (cpuctl_update_args_t *)data, td); break; case CPUCTL_CPUID_COUNT: - cpuctl_do_cpuid_count(cpu, (cpuctl_cpuid_count_args_t *)data, - td); - ret = 0; + ret = cpuctl_do_cpuid_count(cpu, + (cpuctl_cpuid_count_args_t *)data, td); break; default: ret = EINVAL; @@ -196,7 +194,7 @@ fail: /* * Actually perform cpuid operation. */ -static void +static int cpuctl_do_cpuid_count(int cpu, cpuctl_cpuid_count_args_t *data, struct thread *td) { @@ -210,23 +208,30 @@ cpuctl_do_cpuid_count(int cpu, cpuctl_cpuid_count_args_t *data, bzero(data->data, sizeof(data->data)); DPRINTF("[cpuctl,%d]: retrieving cpuid lev %#0x type %#0x for %d cpu\n", __LINE__, data->level, data->level_type, cpu); +#ifdef __i386__ + if (cpu_id == 0) + return (ENODEV); +#endif oldcpu = td->td_oncpu; is_bound = cpu_sched_is_bound(td); set_cpu(cpu, td); cpuid_count(data->level, data->level_type, data->data); restore_cpu(oldcpu, is_bound, td); + return (0); } -static void +static int cpuctl_do_cpuid(int cpu, cpuctl_cpuid_args_t *data, struct thread *td) { cpuctl_cpuid_count_args_t cdata; + int error; cdata.level = data->level; /* Override the level type. */ cdata.level_type = 0; - cpuctl_do_cpuid_count(cpu, &cdata, td); + error = cpuctl_do_cpuid_count(cpu, &cdata, td); bcopy(cdata.data, data->data, sizeof(data->data)); /* Ignore error */ + return (error); } /* @@ -249,6 +254,10 @@ cpuctl_do_msr(int cpu, cpuctl_msr_args_t *data, u_long cmd, struct thread *td) */ DPRINTF("[cpuctl,%d]: operating on MSR %#0x for %d cpu\n", __LINE__, data->msr, cpu); +#ifdef __i386__ + if ((cpu_feature & CPUID_MSR) == 0) + return (ENODEV); +#endif oldcpu = td->td_oncpu; is_bound = cpu_sched_is_bound(td); set_cpu(cpu, td); @@ -291,7 +300,9 @@ cpuctl_do_update(int cpu, cpuctl_update_args_t *data, struct thread *td) ("[cpuctl,%d]: bad cpu number %d", __LINE__, cpu)); DPRINTF("[cpuctl,%d]: XXX %d", __LINE__, cpu); - cpuctl_do_cpuid(cpu, &args, td); + ret = cpuctl_do_cpuid(cpu, &args, td); + if (ret != 0) + return (ret); ((uint32_t *)vendor)[0] = args.data[1]; ((uint32_t *)vendor)[1] = args.data[3]; ((uint32_t *)vendor)[2] = args.data[2]; @@ -518,11 +529,6 @@ cpuctl_modevent(module_t mod __unused, int type, void *data __unused) switch(type) { case MOD_LOAD: - if ((cpu_feature & CPUID_MSR) == 0) { - if (bootverbose) - printf("cpuctl: not available.\n"); - return (ENODEV); - } if (bootverbose) printf("cpuctl: access to MSR registers/cpuid info.\n"); cpuctl_devs = malloc(sizeof(*cpuctl_devs) * mp_ncpus, M_CPUCTL, diff --git a/sys/dev/filemon/filemon.c b/sys/dev/filemon/filemon.c index cd40c5a..919af9d 100644 --- a/sys/dev/filemon/filemon.c +++ b/sys/dev/filemon/filemon.c @@ -1,7 +1,7 @@ /*- * Copyright (c) 2011, David E. O'Brien. * Copyright (c) 2009-2011, Juniper Networks, Inc. - * Copyright (c) 2015, EMC Corp. + * Copyright (c) 2015-2016, EMC Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -46,7 +46,6 @@ __FBSDID("$FreeBSD$"); #include <sys/module.h> #include <sys/poll.h> #include <sys/proc.h> -#include <sys/queue.h> #include <sys/sx.h> #include <sys/syscall.h> #include <sys/sysent.h> @@ -55,16 +54,12 @@ __FBSDID("$FreeBSD$"); #include "filemon.h" -#if defined(COMPAT_IA32) || defined(COMPAT_FREEBSD32) || defined(COMPAT_ARCH32) +#if defined(COMPAT_FREEBSD32) #include <compat/freebsd32/freebsd32_syscall.h> #include <compat/freebsd32/freebsd32_proto.h> - -extern struct sysentvec ia32_freebsd_sysvec; +#include <compat/freebsd32/freebsd32_util.h> #endif -extern struct sysentvec elf32_freebsd_sysvec; -extern struct sysentvec elf64_freebsd_sysvec; - static d_close_t filemon_close; static d_ioctl_t filemon_ioctl; static d_open_t filemon_open; @@ -80,27 +75,119 @@ static struct cdevsw filemon_cdevsw = { MALLOC_DECLARE(M_FILEMON); MALLOC_DEFINE(M_FILEMON, "filemon", "File access monitor"); +/* + * The filemon->lock protects several things currently: + * - fname1/fname2/msgbufr are pre-allocated and used per syscall + * for logging and copyins rather than stack variables. + * - Serializing the filemon's log output. + * - Preventing inheritance or removal of the filemon into proc.p_filemon. + */ struct filemon { - TAILQ_ENTRY(filemon) link; /* Link into the in-use list. */ - struct sx lock; /* Lock mutex for this filemon. */ + struct sx lock; /* Lock for this filemon. */ struct file *fp; /* Output file pointer. */ - struct proc *p; /* The process being monitored. */ + struct ucred *cred; /* Credential of tracer. */ char fname1[MAXPATHLEN]; /* Temporary filename buffer. */ char fname2[MAXPATHLEN]; /* Temporary filename buffer. */ char msgbufr[1024]; /* Output message buffer. */ + int error; /* Log write error, returned on close(2). */ + u_int refcnt; /* Pointer reference count. */ + u_int proccnt; /* Process count. */ }; -static TAILQ_HEAD(, filemon) filemons_inuse = TAILQ_HEAD_INITIALIZER(filemons_inuse); -static TAILQ_HEAD(, filemon) filemons_free = TAILQ_HEAD_INITIALIZER(filemons_free); -static struct sx access_lock; - static struct cdev *filemon_dev; +static void filemon_output(struct filemon *filemon, char *msg, size_t len); + +static __inline struct filemon * +filemon_acquire(struct filemon *filemon) +{ + + if (filemon != NULL) + refcount_acquire(&filemon->refcnt); + return (filemon); +} + +/* + * Release a reference and free on the last one. + */ +static void +filemon_release(struct filemon *filemon) +{ + + if (refcount_release(&filemon->refcnt) == 0) + return; + /* + * There are valid cases of releasing while locked, such as in + * filemon_untrack_processes, but none which are done where there + * is not at least 1 reference remaining. + */ + sx_assert(&filemon->lock, SA_UNLOCKED); + + if (filemon->cred != NULL) + crfree(filemon->cred); + sx_destroy(&filemon->lock); + free(filemon, M_FILEMON); +} + +/* + * Acquire the proc's p_filemon reference and lock the filemon. + * The proc's p_filemon may not match this filemon on return. + */ +static struct filemon * +filemon_proc_get(struct proc *p) +{ + struct filemon *filemon; + + PROC_LOCK(p); + filemon = filemon_acquire(p->p_filemon); + PROC_UNLOCK(p); + + if (filemon == NULL) + return (NULL); + /* + * The p->p_filemon may have changed by now. That case is handled + * by the exit and fork hooks and filemon_attach_proc specially. + */ + sx_xlock(&filemon->lock); + return (filemon); +} + +/* Remove and release the filemon on the given process. */ +static void +filemon_proc_drop(struct proc *p) +{ + struct filemon *filemon; + + KASSERT(p->p_filemon != NULL, ("%s: proc %p NULL p_filemon", + __func__, p)); + sx_assert(&p->p_filemon->lock, SA_XLOCKED); + PROC_LOCK(p); + filemon = p->p_filemon; + p->p_filemon = NULL; + --filemon->proccnt; + PROC_UNLOCK(p); + /* + * This should not be the last reference yet. filemon_release() + * cannot be called with filemon locked, which the caller expects + * will stay locked. + */ + KASSERT(filemon->refcnt > 1, ("%s: proc %p dropping filemon %p " + "with last reference", __func__, p, filemon)); + filemon_release(filemon); +} + +/* Unlock and release the filemon. */ +static __inline void +filemon_drop(struct filemon *filemon) +{ + + sx_xunlock(&filemon->lock); + filemon_release(filemon); +} -#include "filemon_lock.c" #include "filemon_wrapper.c" static void -filemon_comment(struct filemon *filemon) +filemon_write_header(struct filemon *filemon) { int len; struct timeval now; @@ -115,35 +202,154 @@ filemon_comment(struct filemon *filemon) filemon_output(filemon, filemon->msgbufr, len); } +/* + * Invalidate the passed filemon in all processes. + */ static void -filemon_dtr(void *data) +filemon_untrack_processes(struct filemon *filemon) { - struct filemon *filemon = data; + struct proc *p; - if (filemon != NULL) { - struct file *fp; + sx_assert(&filemon->lock, SA_XLOCKED); - /* Follow same locking order as filemon_pid_check. */ - filemon_lock_write(); - sx_xlock(&filemon->lock); + /* Avoid allproc loop if there is no need. */ + if (filemon->proccnt == 0) + return; - /* Remove from the in-use list. */ - TAILQ_REMOVE(&filemons_inuse, filemon, link); + /* + * Processes in this list won't go away while here since + * filemon_event_process_exit() will lock on filemon->lock + * which we hold. + */ + sx_slock(&allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + /* + * No PROC_LOCK is needed to compare here since it is + * guaranteed to not change since we have its filemon + * locked. Everything that changes this p_filemon will + * be locked on it. + */ + if (p->p_filemon == filemon) + filemon_proc_drop(p); + } + sx_sunlock(&allproc_lock); + + /* + * It's possible some references were acquired but will be + * dropped shortly as they are restricted from being + * inherited. There is at least the reference in cdevpriv remaining. + */ + KASSERT(filemon->refcnt > 0, ("%s: filemon %p should have " + "references still.", __func__, filemon)); + KASSERT(filemon->proccnt == 0, ("%s: filemon %p should not have " + "attached procs still.", __func__, filemon)); +} - fp = filemon->fp; - filemon->fp = NULL; - filemon->p = NULL; +/* + * Close out the log. + */ +static void +filemon_close_log(struct filemon *filemon) +{ + struct file *fp; + struct timeval now; + size_t len; - /* Add to the free list. */ - TAILQ_INSERT_TAIL(&filemons_free, filemon, link); + sx_assert(&filemon->lock, SA_XLOCKED); + if (filemon->fp == NULL) + return; - /* Give up write access. */ - sx_xunlock(&filemon->lock); - filemon_unlock_write(); + getmicrotime(&now); + + len = snprintf(filemon->msgbufr, + sizeof(filemon->msgbufr), + "# Stop %ju.%06ju\n# Bye bye\n", + (uintmax_t)now.tv_sec, (uintmax_t)now.tv_usec); + + filemon_output(filemon, filemon->msgbufr, len); + fp = filemon->fp; + filemon->fp = NULL; + + sx_xunlock(&filemon->lock); + fdrop(fp, curthread); + sx_xlock(&filemon->lock); +} + +/* + * The devfs file is being closed. Untrace all processes. It is possible + * filemon_close/close(2) was not called. + */ +static void +filemon_dtr(void *data) +{ + struct filemon *filemon = data; - if (fp != NULL) - fdrop(fp, curthread); + if (filemon == NULL) + return; + + sx_xlock(&filemon->lock); + /* + * Detach the filemon. It cannot be inherited after this. + */ + filemon_untrack_processes(filemon); + filemon_close_log(filemon); + filemon_drop(filemon); +} + +/* Attach the filemon to the process. */ +static int +filemon_attach_proc(struct filemon *filemon, struct proc *p) +{ + struct filemon *filemon2; + + sx_assert(&filemon->lock, SA_XLOCKED); + PROC_LOCK_ASSERT(p, MA_OWNED); + KASSERT((p->p_flag & P_WEXIT) == 0, + ("%s: filemon %p attaching to exiting process %p", + __func__, filemon, p)); + KASSERT((p->p_flag & P_INEXEC) == 0, + ("%s: filemon %p attaching to execing process %p", + __func__, filemon, p)); + + if (p->p_filemon == filemon) + return (0); + /* + * Don't allow truncating other process traces. It is + * not really intended to trace procs other than curproc + * anyhow. + */ + if (p->p_filemon != NULL && p != curproc) + return (EBUSY); + /* + * Historic behavior of filemon has been to let a child initiate + * tracing on itself and cease existing tracing. Bmake + * .META + .MAKE relies on this. It is only relevant for attaching to + * curproc. + */ + while (p->p_filemon != NULL) { + PROC_UNLOCK(p); + sx_xunlock(&filemon->lock); + while ((filemon2 = filemon_proc_get(p)) != NULL) { + /* It may have changed. */ + if (p->p_filemon == filemon2) + filemon_proc_drop(p); + filemon_drop(filemon2); + } + sx_xlock(&filemon->lock); + PROC_LOCK(p); + /* + * It may have been attached to, though unlikely. + * Try again if needed. + */ } + + KASSERT(p->p_filemon == NULL, + ("%s: proc %p didn't detach filemon %p", __func__, p, + p->p_filemon)); + p->p_filemon = filemon_acquire(filemon); + ++filemon->proccnt; + + return (0); } static int @@ -173,15 +379,21 @@ filemon_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag __unused, &filemon->fp); if (error == 0) /* Write the file header. */ - filemon_comment(filemon); + filemon_write_header(filemon); break; /* Set the monitored process ID. */ case FILEMON_SET_PID: - error = pget(*((pid_t *)data), PGET_CANDEBUG | PGET_NOTWEXIT, - &p); + /* Invalidate any existing processes already set. */ + filemon_untrack_processes(filemon); + + error = pget(*((pid_t *)data), + PGET_CANDEBUG | PGET_NOTWEXIT | PGET_NOTINEXEC, &p); if (error == 0) { - filemon->p = p; + KASSERT(p->p_filemon != filemon, + ("%s: proc %p didn't untrack filemon %p", + __func__, p, filemon)); + error = filemon_attach_proc(filemon, p); PROC_UNLOCK(p); } break; @@ -197,51 +409,51 @@ filemon_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag __unused, static int filemon_open(struct cdev *dev, int oflags __unused, int devtype __unused, - struct thread *td __unused) + struct thread *td) { + int error; struct filemon *filemon; - /* Get exclusive write access. */ - filemon_lock_write(); - - if ((filemon = TAILQ_FIRST(&filemons_free)) != NULL) - TAILQ_REMOVE(&filemons_free, filemon, link); - - /* Give up write access. */ - filemon_unlock_write(); - - if (filemon == NULL) { - filemon = malloc(sizeof(struct filemon), M_FILEMON, - M_WAITOK | M_ZERO); - sx_init(&filemon->lock, "filemon"); - } - - devfs_set_cdevpriv(filemon, filemon_dtr); - - /* Get exclusive write access. */ - filemon_lock_write(); - - /* Add to the in-use list. */ - TAILQ_INSERT_TAIL(&filemons_inuse, filemon, link); + filemon = malloc(sizeof(*filemon), M_FILEMON, + M_WAITOK | M_ZERO); + sx_init(&filemon->lock, "filemon"); + refcount_init(&filemon->refcnt, 1); + filemon->cred = crhold(td->td_ucred); - /* Give up write access. */ - filemon_unlock_write(); + error = devfs_set_cdevpriv(filemon, filemon_dtr); + if (error != 0) + filemon_release(filemon); - return (0); + return (error); } +/* Called on close of last devfs file handle, before filemon_dtr(). */ static int filemon_close(struct cdev *dev __unused, int flag __unused, int fmt __unused, struct thread *td __unused) { + struct filemon *filemon; + int error; - return (0); + if ((error = devfs_get_cdevpriv((void **) &filemon)) != 0) + return (error); + + sx_xlock(&filemon->lock); + filemon_close_log(filemon); + error = filemon->error; + sx_xunlock(&filemon->lock); + /* + * Processes are still being traced but won't log anything + * now. After this call returns filemon_dtr() is called which + * will detach processes. + */ + + return (error); } static void filemon_load(void *dummy __unused) { - sx_init(&access_lock, "filemons_inuse"); /* Install the syscall wrappers. */ filemon_wrapper_install(); @@ -253,38 +465,11 @@ filemon_load(void *dummy __unused) static int filemon_unload(void) { - struct filemon *filemon; - int error = 0; - - /* Get exclusive write access. */ - filemon_lock_write(); - - if (TAILQ_FIRST(&filemons_inuse) != NULL) - error = EBUSY; - else { - destroy_dev(filemon_dev); - - /* Deinstall the syscall wrappers. */ - filemon_wrapper_deinstall(); - } - /* Give up write access. */ - filemon_unlock_write(); + destroy_dev(filemon_dev); + filemon_wrapper_deinstall(); - if (error == 0) { - /* free() filemon structs free list. */ - filemon_lock_write(); - while ((filemon = TAILQ_FIRST(&filemons_free)) != NULL) { - TAILQ_REMOVE(&filemons_free, filemon, link); - sx_destroy(&filemon->lock); - free(filemon, M_FILEMON); - } - filemon_unlock_write(); - - sx_destroy(&access_lock); - } - - return (error); + return (0); } static int diff --git a/sys/dev/filemon/filemon_lock.c b/sys/dev/filemon/filemon_lock.c deleted file mode 100644 index 5cac47c..0000000 --- a/sys/dev/filemon/filemon_lock.c +++ /dev/null @@ -1,57 +0,0 @@ -/*- - * Copyright (c) 2009-2011, Juniper Networks, Inc. - * Copyright (c) 2015, EMC Corp. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY JUNIPER NETWORKS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL JUNIPER NETWORKS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -static __inline void -filemon_lock_read(void) -{ - - sx_slock(&access_lock); -} - -static __inline void -filemon_unlock_read(void) -{ - - sx_sunlock(&access_lock); -} - -static __inline void -filemon_lock_write(void) -{ - - sx_xlock(&access_lock); -} - -static __inline void -filemon_unlock_write(void) -{ - - sx_xunlock(&access_lock); -} diff --git a/sys/dev/filemon/filemon_wrapper.c b/sys/dev/filemon/filemon_wrapper.c index ccadbce..4bea025 100644 --- a/sys/dev/filemon/filemon_wrapper.c +++ b/sys/dev/filemon/filemon_wrapper.c @@ -1,7 +1,7 @@ /*- * Copyright (c) 2011, David E. O'Brien. * Copyright (c) 2009-2011, Juniper Networks, Inc. - * Copyright (c) 2015, EMC Corp. + * Copyright (c) 2015-2016, EMC Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,9 +29,12 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#include <sys/imgact.h> #include <sys/eventhandler.h> +#include <sys/filedesc.h> +#include <sys/imgact.h> +#include <sys/priv.h> #include <sys/sx.h> +#include <sys/sysent.h> #include <sys/vnode.h> #include "opt_compat.h" @@ -45,6 +48,7 @@ filemon_output(struct filemon *filemon, char *msg, size_t len) { struct uio auio; struct iovec aiov; + int error; if (filemon->fp == NULL) return; @@ -62,56 +66,33 @@ filemon_output(struct filemon *filemon, char *msg, size_t len) if (filemon->fp->f_type == DTYPE_VNODE) bwillwrite(); - fo_write(filemon->fp, &auio, curthread->td_ucred, 0, curthread); -} - -static struct filemon * -filemon_pid_check(struct proc *p) -{ - struct filemon *filemon; - - filemon_lock_read(); - if (TAILQ_EMPTY(&filemons_inuse)) { - filemon_unlock_read(); - return (NULL); - } - sx_slock(&proctree_lock); - while (p->p_pid != 0) { - TAILQ_FOREACH(filemon, &filemons_inuse, link) { - if (p == filemon->p) { - sx_sunlock(&proctree_lock); - sx_xlock(&filemon->lock); - filemon_unlock_read(); - return (filemon); - } - } - p = proc_realparent(p); - } - sx_sunlock(&proctree_lock); - filemon_unlock_read(); - return (NULL); + error = fo_write(filemon->fp, &auio, filemon->cred, 0, curthread); + if (error != 0 && filemon->error == 0) + filemon->error = error; } static int filemon_wrapper_chdir(struct thread *td, struct chdir_args *uap) { - int ret; - size_t done; + int error, ret; size_t len; struct filemon *filemon; if ((ret = sys_chdir(td, uap)) == 0) { - if ((filemon = filemon_pid_check(curproc)) != NULL) { - copyinstr(uap->path, filemon->fname1, - sizeof(filemon->fname1), &done); + if ((filemon = filemon_proc_get(curproc)) != NULL) { + if ((error = copyinstr(uap->path, filemon->fname1, + sizeof(filemon->fname1), NULL)) != 0) { + filemon->error = error; + goto copyfail; + } len = snprintf(filemon->msgbufr, sizeof(filemon->msgbufr), "C %d %s\n", curproc->p_pid, filemon->fname1); filemon_output(filemon, filemon->msgbufr, len); - - sx_xunlock(&filemon->lock); +copyfail: + filemon_drop(filemon); } } @@ -123,337 +104,270 @@ filemon_event_process_exec(void *arg __unused, struct proc *p, struct image_params *imgp) { struct filemon *filemon; - char *fullpath, *freepath; size_t len; - if ((filemon = filemon_pid_check(p)) != NULL) { - fullpath = "<unknown>"; - freepath = NULL; - - vn_fullpath(FIRST_THREAD_IN_PROC(p), imgp->vp, &fullpath, - &freepath); - + if ((filemon = filemon_proc_get(p)) != NULL) { len = snprintf(filemon->msgbufr, sizeof(filemon->msgbufr), "E %d %s\n", - p->p_pid, fullpath); + p->p_pid, + imgp->execpath != NULL ? imgp->execpath : "<unknown>"); filemon_output(filemon, filemon->msgbufr, len); - sx_xunlock(&filemon->lock); + /* If the credentials changed then cease tracing. */ + if (imgp->newcred != NULL && + imgp->credential_setid && + priv_check_cred(filemon->cred, + PRIV_DEBUG_DIFFCRED, 0) != 0) { + /* + * It may have changed to NULL already, but + * will not be re-attached by anything else. + */ + if (p->p_filemon != NULL) { + KASSERT(p->p_filemon == filemon, + ("%s: proc %p didn't have expected" + " filemon %p", __func__, p, filemon)); + filemon_proc_drop(p); + } + } - free(freepath, M_TEMP); + + filemon_drop(filemon); } } -static int -filemon_wrapper_open(struct thread *td, struct open_args *uap) +static void +_filemon_wrapper_openat(struct thread *td, char *upath, int flags, int fd) { - int ret; - size_t done; + int error; size_t len; + struct file *fp; struct filemon *filemon; + char *atpath, *freepath; + cap_rights_t rights; - if ((ret = sys_open(td, uap)) == 0) { - if ((filemon = filemon_pid_check(curproc)) != NULL) { - copyinstr(uap->path, filemon->fname1, - sizeof(filemon->fname1), &done); - - if (uap->flags & O_RDWR) { - /* - * We'll get the W record below, but need - * to also output an R to distingish from - * O_WRONLY. - */ - len = snprintf(filemon->msgbufr, - sizeof(filemon->msgbufr), "R %d %s\n", - curproc->p_pid, filemon->fname1); - filemon_output(filemon, filemon->msgbufr, len); - } + if ((filemon = filemon_proc_get(curproc)) != NULL) { + atpath = ""; + freepath = NULL; + fp = NULL; + if ((error = copyinstr(upath, filemon->fname1, + sizeof(filemon->fname1), NULL)) != 0) { + filemon->error = error; + goto copyfail; + } + if (filemon->fname1[0] != '/' && fd != AT_FDCWD) { + /* + * rats - we cannot do too much about this. + * the trace should show a dir we read + * recently.. output an A record as a clue + * until we can do better. + * XXX: This may be able to come out with + * the namecache lookup now. + */ len = snprintf(filemon->msgbufr, - sizeof(filemon->msgbufr), "%c %d %s\n", - (uap->flags & O_ACCMODE) ? 'W':'R', + sizeof(filemon->msgbufr), "A %d %s\n", curproc->p_pid, filemon->fname1); filemon_output(filemon, filemon->msgbufr, len); - - sx_xunlock(&filemon->lock); + /* + * Try to resolve the path from the vnode using the + * namecache. It may be inaccurate, but better + * than nothing. + */ + if (getvnode(td->td_proc->p_fd, fd, + cap_rights_init(&rights, CAP_LOOKUP), &fp) == 0) { + vn_fullpath(td, fp->f_vnode, &atpath, + &freepath); + } + } + if (flags & O_RDWR) { + /* + * We'll get the W record below, but need + * to also output an R to distinguish from + * O_WRONLY. + */ + len = snprintf(filemon->msgbufr, + sizeof(filemon->msgbufr), "R %d %s%s%s\n", + curproc->p_pid, atpath, + atpath[0] != '\0' ? "/" : "", filemon->fname1); + filemon_output(filemon, filemon->msgbufr, len); } - } - return (ret); + len = snprintf(filemon->msgbufr, + sizeof(filemon->msgbufr), "%c %d %s%s%s\n", + (flags & O_ACCMODE) ? 'W':'R', + curproc->p_pid, atpath, + atpath[0] != '\0' ? "/" : "", filemon->fname1); + filemon_output(filemon, filemon->msgbufr, len); +copyfail: + filemon_drop(filemon); + if (fp != NULL) + fdrop(fp, td); + free(freepath, M_TEMP); + } } static int -filemon_wrapper_openat(struct thread *td, struct openat_args *uap) +filemon_wrapper_open(struct thread *td, struct open_args *uap) { int ret; - size_t done; - size_t len; - struct filemon *filemon; - if ((ret = sys_openat(td, uap)) == 0) { - if ((filemon = filemon_pid_check(curproc)) != NULL) { - copyinstr(uap->path, filemon->fname1, - sizeof(filemon->fname1), &done); - - filemon->fname2[0] = '\0'; - if (filemon->fname1[0] != '/' && uap->fd != AT_FDCWD) { - /* - * rats - we cannot do too much about this. - * the trace should show a dir we read - * recently.. output an A record as a clue - * until we can do better. - */ - len = snprintf(filemon->msgbufr, - sizeof(filemon->msgbufr), "A %d %s\n", - curproc->p_pid, filemon->fname1); - filemon_output(filemon, filemon->msgbufr, len); - } - if (uap->flag & O_RDWR) { - /* - * We'll get the W record below, but need - * to also output an R to distingish from - * O_WRONLY. - */ - len = snprintf(filemon->msgbufr, - sizeof(filemon->msgbufr), "R %d %s%s\n", - curproc->p_pid, filemon->fname2, filemon->fname1); - filemon_output(filemon, filemon->msgbufr, len); - } - - - len = snprintf(filemon->msgbufr, - sizeof(filemon->msgbufr), "%c %d %s%s\n", - (uap->flag & O_ACCMODE) ? 'W':'R', - curproc->p_pid, filemon->fname2, filemon->fname1); - filemon_output(filemon, filemon->msgbufr, len); - - sx_xunlock(&filemon->lock); - } - } + if ((ret = sys_open(td, uap)) == 0) + _filemon_wrapper_openat(td, uap->path, uap->flags, AT_FDCWD); return (ret); } static int -filemon_wrapper_rename(struct thread *td, struct rename_args *uap) +filemon_wrapper_openat(struct thread *td, struct openat_args *uap) { int ret; - size_t done; - size_t len; - struct filemon *filemon; - if ((ret = sys_rename(td, uap)) == 0) { - if ((filemon = filemon_pid_check(curproc)) != NULL) { - copyinstr(uap->from, filemon->fname1, - sizeof(filemon->fname1), &done); - copyinstr(uap->to, filemon->fname2, - sizeof(filemon->fname2), &done); - - len = snprintf(filemon->msgbufr, - sizeof(filemon->msgbufr), "M %d '%s' '%s'\n", - curproc->p_pid, filemon->fname1, filemon->fname2); - - filemon_output(filemon, filemon->msgbufr, len); - - sx_xunlock(&filemon->lock); - } - } + if ((ret = sys_openat(td, uap)) == 0) + _filemon_wrapper_openat(td, uap->path, uap->flag, uap->fd); return (ret); } static int -filemon_wrapper_link(struct thread *td, struct link_args *uap) +filemon_wrapper_rename(struct thread *td, struct rename_args *uap) { - int ret; - size_t done; + int error, ret; size_t len; struct filemon *filemon; - if ((ret = sys_link(td, uap)) == 0) { - if ((filemon = filemon_pid_check(curproc)) != NULL) { - copyinstr(uap->path, filemon->fname1, - sizeof(filemon->fname1), &done); - copyinstr(uap->link, filemon->fname2, - sizeof(filemon->fname2), &done); + if ((ret = sys_rename(td, uap)) == 0) { + if ((filemon = filemon_proc_get(curproc)) != NULL) { + if (((error = copyinstr(uap->from, filemon->fname1, + sizeof(filemon->fname1), NULL)) != 0) || + ((error = copyinstr(uap->to, filemon->fname2, + sizeof(filemon->fname2), NULL)) != 0)) { + filemon->error = error; + goto copyfail; + } len = snprintf(filemon->msgbufr, - sizeof(filemon->msgbufr), "L %d '%s' '%s'\n", + sizeof(filemon->msgbufr), "M %d '%s' '%s'\n", curproc->p_pid, filemon->fname1, filemon->fname2); filemon_output(filemon, filemon->msgbufr, len); - - sx_xunlock(&filemon->lock); +copyfail: + filemon_drop(filemon); } } return (ret); } -static int -filemon_wrapper_symlink(struct thread *td, struct symlink_args *uap) +static void +_filemon_wrapper_link(struct thread *td, char *upath1, char *upath2) { - int ret; - size_t done; - size_t len; struct filemon *filemon; + size_t len; + int error; + + if ((filemon = filemon_proc_get(curproc)) != NULL) { + if (((error = copyinstr(upath1, filemon->fname1, + sizeof(filemon->fname1), NULL)) != 0) || + ((error = copyinstr(upath2, filemon->fname2, + sizeof(filemon->fname2), NULL)) != 0)) { + filemon->error = error; + goto copyfail; + } - if ((ret = sys_symlink(td, uap)) == 0) { - if ((filemon = filemon_pid_check(curproc)) != NULL) { - copyinstr(uap->path, filemon->fname1, - sizeof(filemon->fname1), &done); - copyinstr(uap->link, filemon->fname2, - sizeof(filemon->fname2), &done); - - len = snprintf(filemon->msgbufr, - sizeof(filemon->msgbufr), "L %d '%s' '%s'\n", - curproc->p_pid, filemon->fname1, filemon->fname2); - - filemon_output(filemon, filemon->msgbufr, len); + len = snprintf(filemon->msgbufr, + sizeof(filemon->msgbufr), "L %d '%s' '%s'\n", + curproc->p_pid, filemon->fname1, filemon->fname2); - sx_xunlock(&filemon->lock); - } + filemon_output(filemon, filemon->msgbufr, len); +copyfail: + filemon_drop(filemon); } - - return (ret); } static int -filemon_wrapper_linkat(struct thread *td, struct linkat_args *uap) +filemon_wrapper_link(struct thread *td, struct link_args *uap) { int ret; - size_t done; - size_t len; - struct filemon *filemon; - if ((ret = sys_linkat(td, uap)) == 0) { - if ((filemon = filemon_pid_check(curproc)) != NULL) { - copyinstr(uap->path1, filemon->fname1, - sizeof(filemon->fname1), &done); - copyinstr(uap->path2, filemon->fname2, - sizeof(filemon->fname2), &done); - - len = snprintf(filemon->msgbufr, - sizeof(filemon->msgbufr), "L %d '%s' '%s'\n", - curproc->p_pid, filemon->fname1, filemon->fname2); - - filemon_output(filemon, filemon->msgbufr, len); - - sx_xunlock(&filemon->lock); - } - } + if ((ret = sys_link(td, uap)) == 0) + _filemon_wrapper_link(td, uap->path, uap->link); return (ret); } static int -filemon_wrapper_stat(struct thread *td, struct stat_args *uap) +filemon_wrapper_symlink(struct thread *td, struct symlink_args *uap) { int ret; - size_t done; - size_t len; - struct filemon *filemon; - - if ((ret = sys_stat(td, uap)) == 0) { - if ((filemon = filemon_pid_check(curproc)) != NULL) { - copyinstr(uap->path, filemon->fname1, - sizeof(filemon->fname1), &done); - - len = snprintf(filemon->msgbufr, - sizeof(filemon->msgbufr), "S %d %s\n", - curproc->p_pid, filemon->fname1); - filemon_output(filemon, filemon->msgbufr, len); - - sx_xunlock(&filemon->lock); - } - } + if ((ret = sys_symlink(td, uap)) == 0) + _filemon_wrapper_link(td, uap->path, uap->link); return (ret); } -#if defined(COMPAT_IA32) || defined(COMPAT_FREEBSD32) || defined(COMPAT_ARCH32) static int -filemon_wrapper_freebsd32_stat(struct thread *td, - struct freebsd32_stat_args *uap) +filemon_wrapper_linkat(struct thread *td, struct linkat_args *uap) { int ret; - size_t done; - size_t len; - struct filemon *filemon; - - if ((ret = freebsd32_stat(td, uap)) == 0) { - if ((filemon = filemon_pid_check(curproc)) != NULL) { - copyinstr(uap->path, filemon->fname1, - sizeof(filemon->fname1), &done); - - len = snprintf(filemon->msgbufr, - sizeof(filemon->msgbufr), "S %d %s\n", - curproc->p_pid, filemon->fname1); - - filemon_output(filemon, filemon->msgbufr, len); - sx_xunlock(&filemon->lock); - } - } + if ((ret = sys_linkat(td, uap)) == 0) + _filemon_wrapper_link(td, uap->path1, uap->path2); return (ret); } -#endif static void filemon_event_process_exit(void *arg __unused, struct proc *p) { size_t len; struct filemon *filemon; - struct timeval now; - /* Get timestamp before locking. */ - getmicrotime(&now); - - if ((filemon = filemon_pid_check(p)) != NULL) { + if ((filemon = filemon_proc_get(p)) != NULL) { len = snprintf(filemon->msgbufr, sizeof(filemon->msgbufr), "X %d %d\n", p->p_pid, W_EXITCODE(p->p_xstat, 0)); filemon_output(filemon, filemon->msgbufr, len); - /* Check if the monitored process is about to exit. */ - if (filemon->p == p) { - len = snprintf(filemon->msgbufr, - sizeof(filemon->msgbufr), - "# Stop %ju.%06ju\n# Bye bye\n", - (uintmax_t)now.tv_sec, (uintmax_t)now.tv_usec); - - filemon_output(filemon, filemon->msgbufr, len); - filemon->p = NULL; - } - - sx_xunlock(&filemon->lock); + /* + * filemon_untrack_processes() may have dropped this p_filemon + * already while in filemon_proc_get() before acquiring the + * filemon lock. + */ + KASSERT(p->p_filemon == NULL || p->p_filemon == filemon, + ("%s: p %p was attached while exiting, expected " + "filemon %p or NULL", __func__, p, filemon)); + if (p->p_filemon == filemon) + filemon_proc_drop(p); + + filemon_drop(filemon); } } static int filemon_wrapper_unlink(struct thread *td, struct unlink_args *uap) { - int ret; - size_t done; + int error, ret; size_t len; struct filemon *filemon; if ((ret = sys_unlink(td, uap)) == 0) { - if ((filemon = filemon_pid_check(curproc)) != NULL) { - copyinstr(uap->path, filemon->fname1, - sizeof(filemon->fname1), &done); + if ((filemon = filemon_proc_get(curproc)) != NULL) { + if ((error = copyinstr(uap->path, filemon->fname1, + sizeof(filemon->fname1), NULL)) != 0) { + filemon->error = error; + goto copyfail; + } len = snprintf(filemon->msgbufr, sizeof(filemon->msgbufr), "D %d %s\n", curproc->p_pid, filemon->fname1); filemon_output(filemon, filemon->msgbufr, len); - - sx_xunlock(&filemon->lock); +copyfail: + filemon_drop(filemon); } } @@ -467,49 +381,60 @@ filemon_event_process_fork(void *arg __unused, struct proc *p1, size_t len; struct filemon *filemon; - if ((filemon = filemon_pid_check(p1)) != NULL) { + if ((filemon = filemon_proc_get(p1)) != NULL) { len = snprintf(filemon->msgbufr, sizeof(filemon->msgbufr), "F %d %d\n", p1->p_pid, p2->p_pid); filemon_output(filemon, filemon->msgbufr, len); - sx_xunlock(&filemon->lock); + /* + * filemon_untrack_processes() or + * filemon_ioctl(FILEMON_SET_PID) may have changed the parent's + * p_filemon while in filemon_proc_get() before acquiring the + * filemon lock. Only inherit if the parent is still traced by + * this filemon. + */ + if (p1->p_filemon == filemon) { + PROC_LOCK(p2); + /* + * It may have been attached to already by a new + * filemon. + */ + if (p2->p_filemon == NULL) { + p2->p_filemon = filemon_acquire(filemon); + ++filemon->proccnt; + } + PROC_UNLOCK(p2); + } + + filemon_drop(filemon); } } static void filemon_wrapper_install(void) { -#if defined(__LP64__) - struct sysent *sv_table = elf64_freebsd_sysvec.sv_table; -#else - struct sysent *sv_table = elf32_freebsd_sysvec.sv_table; -#endif - - sv_table[SYS_chdir].sy_call = (sy_call_t *) filemon_wrapper_chdir; - sv_table[SYS_open].sy_call = (sy_call_t *) filemon_wrapper_open; - sv_table[SYS_openat].sy_call = (sy_call_t *) filemon_wrapper_openat; - sv_table[SYS_rename].sy_call = (sy_call_t *) filemon_wrapper_rename; - sv_table[SYS_stat].sy_call = (sy_call_t *) filemon_wrapper_stat; - sv_table[SYS_unlink].sy_call = (sy_call_t *) filemon_wrapper_unlink; - sv_table[SYS_link].sy_call = (sy_call_t *) filemon_wrapper_link; - sv_table[SYS_symlink].sy_call = (sy_call_t *) filemon_wrapper_symlink; - sv_table[SYS_linkat].sy_call = (sy_call_t *) filemon_wrapper_linkat; - -#if defined(COMPAT_IA32) || defined(COMPAT_FREEBSD32) || defined(COMPAT_ARCH32) - sv_table = ia32_freebsd_sysvec.sv_table; - - sv_table[FREEBSD32_SYS_chdir].sy_call = (sy_call_t *) filemon_wrapper_chdir; - sv_table[FREEBSD32_SYS_open].sy_call = (sy_call_t *) filemon_wrapper_open; - sv_table[FREEBSD32_SYS_openat].sy_call = (sy_call_t *) filemon_wrapper_openat; - sv_table[FREEBSD32_SYS_rename].sy_call = (sy_call_t *) filemon_wrapper_rename; - sv_table[FREEBSD32_SYS_freebsd32_stat].sy_call = (sy_call_t *) filemon_wrapper_freebsd32_stat; - sv_table[FREEBSD32_SYS_unlink].sy_call = (sy_call_t *) filemon_wrapper_unlink; - sv_table[FREEBSD32_SYS_link].sy_call = (sy_call_t *) filemon_wrapper_link; - sv_table[FREEBSD32_SYS_symlink].sy_call = (sy_call_t *) filemon_wrapper_symlink; - sv_table[FREEBSD32_SYS_linkat].sy_call = (sy_call_t *) filemon_wrapper_linkat; -#endif /* COMPAT_ARCH32 */ + + sysent[SYS_chdir].sy_call = (sy_call_t *) filemon_wrapper_chdir; + sysent[SYS_open].sy_call = (sy_call_t *) filemon_wrapper_open; + sysent[SYS_openat].sy_call = (sy_call_t *) filemon_wrapper_openat; + sysent[SYS_rename].sy_call = (sy_call_t *) filemon_wrapper_rename; + sysent[SYS_unlink].sy_call = (sy_call_t *) filemon_wrapper_unlink; + sysent[SYS_link].sy_call = (sy_call_t *) filemon_wrapper_link; + sysent[SYS_symlink].sy_call = (sy_call_t *) filemon_wrapper_symlink; + sysent[SYS_linkat].sy_call = (sy_call_t *) filemon_wrapper_linkat; + +#if defined(COMPAT_FREEBSD32) + freebsd32_sysent[FREEBSD32_SYS_chdir].sy_call = (sy_call_t *) filemon_wrapper_chdir; + freebsd32_sysent[FREEBSD32_SYS_open].sy_call = (sy_call_t *) filemon_wrapper_open; + freebsd32_sysent[FREEBSD32_SYS_openat].sy_call = (sy_call_t *) filemon_wrapper_openat; + freebsd32_sysent[FREEBSD32_SYS_rename].sy_call = (sy_call_t *) filemon_wrapper_rename; + freebsd32_sysent[FREEBSD32_SYS_unlink].sy_call = (sy_call_t *) filemon_wrapper_unlink; + freebsd32_sysent[FREEBSD32_SYS_link].sy_call = (sy_call_t *) filemon_wrapper_link; + freebsd32_sysent[FREEBSD32_SYS_symlink].sy_call = (sy_call_t *) filemon_wrapper_symlink; + freebsd32_sysent[FREEBSD32_SYS_linkat].sy_call = (sy_call_t *) filemon_wrapper_linkat; +#endif /* COMPAT_FREEBSD32 */ filemon_exec_tag = EVENTHANDLER_REGISTER(process_exec, filemon_event_process_exec, NULL, EVENTHANDLER_PRI_LAST); @@ -522,35 +447,26 @@ filemon_wrapper_install(void) static void filemon_wrapper_deinstall(void) { -#if defined(__LP64__) - struct sysent *sv_table = elf64_freebsd_sysvec.sv_table; -#else - struct sysent *sv_table = elf32_freebsd_sysvec.sv_table; -#endif - - sv_table[SYS_chdir].sy_call = (sy_call_t *)sys_chdir; - sv_table[SYS_open].sy_call = (sy_call_t *)sys_open; - sv_table[SYS_openat].sy_call = (sy_call_t *)sys_openat; - sv_table[SYS_rename].sy_call = (sy_call_t *)sys_rename; - sv_table[SYS_stat].sy_call = (sy_call_t *)sys_stat; - sv_table[SYS_unlink].sy_call = (sy_call_t *)sys_unlink; - sv_table[SYS_link].sy_call = (sy_call_t *)sys_link; - sv_table[SYS_symlink].sy_call = (sy_call_t *)sys_symlink; - sv_table[SYS_linkat].sy_call = (sy_call_t *)sys_linkat; - -#if defined(COMPAT_IA32) || defined(COMPAT_FREEBSD32) || defined(COMPAT_ARCH32) - sv_table = ia32_freebsd_sysvec.sv_table; - - sv_table[FREEBSD32_SYS_chdir].sy_call = (sy_call_t *)sys_chdir; - sv_table[FREEBSD32_SYS_open].sy_call = (sy_call_t *)sys_open; - sv_table[FREEBSD32_SYS_openat].sy_call = (sy_call_t *)sys_openat; - sv_table[FREEBSD32_SYS_rename].sy_call = (sy_call_t *)sys_rename; - sv_table[FREEBSD32_SYS_freebsd32_stat].sy_call = (sy_call_t *)freebsd32_stat; - sv_table[FREEBSD32_SYS_unlink].sy_call = (sy_call_t *)sys_unlink; - sv_table[FREEBSD32_SYS_link].sy_call = (sy_call_t *)sys_link; - sv_table[FREEBSD32_SYS_symlink].sy_call = (sy_call_t *)sys_symlink; - sv_table[FREEBSD32_SYS_linkat].sy_call = (sy_call_t *)sys_linkat; -#endif /* COMPAT_ARCH32 */ + + sysent[SYS_chdir].sy_call = (sy_call_t *)sys_chdir; + sysent[SYS_open].sy_call = (sy_call_t *)sys_open; + sysent[SYS_openat].sy_call = (sy_call_t *)sys_openat; + sysent[SYS_rename].sy_call = (sy_call_t *)sys_rename; + sysent[SYS_unlink].sy_call = (sy_call_t *)sys_unlink; + sysent[SYS_link].sy_call = (sy_call_t *)sys_link; + sysent[SYS_symlink].sy_call = (sy_call_t *)sys_symlink; + sysent[SYS_linkat].sy_call = (sy_call_t *)sys_linkat; + +#if defined(COMPAT_FREEBSD32) + freebsd32_sysent[FREEBSD32_SYS_chdir].sy_call = (sy_call_t *)sys_chdir; + freebsd32_sysent[FREEBSD32_SYS_open].sy_call = (sy_call_t *)sys_open; + freebsd32_sysent[FREEBSD32_SYS_openat].sy_call = (sy_call_t *)sys_openat; + freebsd32_sysent[FREEBSD32_SYS_rename].sy_call = (sy_call_t *)sys_rename; + freebsd32_sysent[FREEBSD32_SYS_unlink].sy_call = (sy_call_t *)sys_unlink; + freebsd32_sysent[FREEBSD32_SYS_link].sy_call = (sy_call_t *)sys_link; + freebsd32_sysent[FREEBSD32_SYS_symlink].sy_call = (sy_call_t *)sys_symlink; + freebsd32_sysent[FREEBSD32_SYS_linkat].sy_call = (sy_call_t *)sys_linkat; +#endif /* COMPAT_FREEBSD32 */ EVENTHANDLER_DEREGISTER(process_exec, filemon_exec_tag); EVENTHANDLER_DEREGISTER(process_exit, filemon_exit_tag); diff --git a/sys/dev/hyperv/include/hyperv.h b/sys/dev/hyperv/include/hyperv.h index aeec8ec..44683d8 100644 --- a/sys/dev/hyperv/include/hyperv.h +++ b/sys/dev/hyperv/include/hyperv.h @@ -121,10 +121,12 @@ typedef uint8_t hv_bool_uint8_t; HV_ALIGN_DOWN(addr, PAGE_SIZE)) >> PAGE_SHIFT ) typedef struct hv_guid { - unsigned char data[16]; + uint8_t data[16]; } __packed hv_guid; -int snprintf_hv_guid(char *, size_t, const hv_guid *); +#define HYPERV_GUID_STRLEN 40 + +int hyperv_guid2str(const struct hv_guid *, char *, size_t); #define HV_NIC_GUID \ .data = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46, \ diff --git a/sys/dev/hyperv/include/hyperv_busdma.h b/sys/dev/hyperv/include/hyperv_busdma.h new file mode 100644 index 0000000..a27d2db --- /dev/null +++ b/sys/dev/hyperv/include/hyperv_busdma.h @@ -0,0 +1,49 @@ +/*- + * Copyright (c) 2016 Microsoft Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _HYPERV_BUSDMA_H_ +#define _HYPERV_BUSDMA_H_ + +#include <sys/param.h> +#include <sys/bus.h> +#include <machine/bus.h> + +struct hyperv_dma { + bus_addr_t hv_paddr; + bus_dma_tag_t hv_dtag; + bus_dmamap_t hv_dmap; +}; + +void hyperv_dma_map_paddr(void *arg, bus_dma_segment_t *segs, int nseg, + int error); +void *hyperv_dmamem_alloc(bus_dma_tag_t parent_dtag, bus_size_t alignment, + bus_addr_t boundary, bus_size_t size, struct hyperv_dma *dma, + int flags); +void hyperv_dmamem_free(struct hyperv_dma *dma, void *ptr); + +#endif /* !_HYPERV_BUSDMA_H_ */ diff --git a/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c b/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c index f670c12..520cbf7 100644 --- a/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c +++ b/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c @@ -115,6 +115,7 @@ __FBSDID("$FreeBSD$"); #include <machine/in_cksum.h> #include <dev/hyperv/include/hyperv.h> +#include <dev/hyperv/include/hyperv_busdma.h> #include "hv_net_vsc.h" #include "hv_rndis.h" #include "hv_rndis_filter.h" @@ -2217,18 +2218,6 @@ hn_check_iplen(const struct mbuf *m, int hoff) } static void -hn_dma_map_paddr(void *arg, bus_dma_segment_t *segs, int nseg, int error) -{ - bus_addr_t *paddr = arg; - - if (error) - return; - - KASSERT(nseg == 1, ("too many segments %d!", nseg)); - *paddr = segs->ds_addr; -} - -static void hn_create_rx_data(struct hn_softc *sc, int ring_cnt) { struct sysctl_oid_list *child; @@ -2528,7 +2517,7 @@ hn_create_tx_ring(struct hn_softc *sc, int id) error = bus_dmamap_load(txr->hn_tx_rndis_dtag, txd->rndis_msg_dmap, txd->rndis_msg, HN_RNDIS_MSG_LEN, - hn_dma_map_paddr, &txd->rndis_msg_paddr, + hyperv_dma_map_paddr, &txd->rndis_msg_paddr, BUS_DMA_NOWAIT); if (error) { device_printf(sc->hn_dev, diff --git a/sys/dev/hyperv/utilities/hv_kvp.c b/sys/dev/hyperv/utilities/hv_kvp.c index b1f6ec1..ebf948c 100644 --- a/sys/dev/hyperv/utilities/hv_kvp.c +++ b/sys/dev/hyperv/utilities/hv_kvp.c @@ -58,7 +58,10 @@ __FBSDID("$FreeBSD$"); #include <sys/syslog.h> #include <sys/systm.h> #include <sys/mutex.h> + +#include <net/if.h> #include <net/if_arp.h> +#include <net/if_var.h> #include <dev/hyperv/include/hyperv.h> #include <dev/hyperv/netvsc/hv_net_vsc.h> @@ -306,8 +309,7 @@ hv_kvp_convert_utf16_ipinfo_to_utf8(struct hv_kvp_ip_msg *host_ip_msg, int UNUSED_FLAG = 1; struct hv_device *hv_dev; /* GUID Data Structure */ hn_softc_t *sc; /* hn softc structure */ - char if_name[4]; - char buf[39]; + char buf[HYPERV_GUID_STRLEN]; device_t *devs; int devcnt; @@ -335,11 +337,12 @@ hv_kvp_convert_utf16_ipinfo_to_utf8(struct hv_kvp_ip_msg *host_ip_msg, /* Trying to find GUID of Network Device */ hv_dev = sc->hn_dev_obj; - snprintf_hv_guid(buf, sizeof(buf), &hv_dev->device_id); - sprintf(if_name, "%s%d", "hn", device_get_unit(devs[devcnt])); + hyperv_guid2str(&hv_dev->device_id, buf, sizeof(buf)); - if (strncmp(buf, (char *)umsg->body.kvp_ip_val.adapter_id, 39) == 0) { - strcpy((char *)umsg->body.kvp_ip_val.adapter_id, if_name); + if (strncmp(buf, (char *)umsg->body.kvp_ip_val.adapter_id, + HYPERV_GUID_STRLEN - 1) == 0) { + strlcpy((char *)umsg->body.kvp_ip_val.adapter_id, + sc->hn_ifp->if_xname, MAX_ADAPTER_ID_SIZE); break; } } diff --git a/sys/dev/hyperv/vmbus/amd64/hyperv_machdep.c b/sys/dev/hyperv/vmbus/amd64/hyperv_machdep.c new file mode 100644 index 0000000..5b5f205 --- /dev/null +++ b/sys/dev/hyperv/vmbus/amd64/hyperv_machdep.c @@ -0,0 +1,43 @@ +/*- + * Copyright (c) 2016 Microsoft Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <dev/hyperv/vmbus/hyperv_machdep.h> + +uint64_t +hypercall_md(volatile void *hc_addr, uint64_t in_val, + uint64_t in_paddr, uint64_t out_paddr) +{ + uint64_t status; + + __asm__ __volatile__ ("mov %0, %%r8" : : "r" (out_paddr): "r8"); + __asm__ __volatile__ ("call *%3" : "=a" (status) : + "c" (in_val), "d" (in_paddr), "m" (hc_addr)); + return (status); +} diff --git a/sys/dev/hyperv/vmbus/amd64/hv_vector.S b/sys/dev/hyperv/vmbus/amd64/vmbus_vector.S index 2594483..8d09e24 100644 --- a/sys/dev/hyperv/vmbus/amd64/hv_vector.S +++ b/sys/dev/hyperv/vmbus/amd64/vmbus_vector.S @@ -37,10 +37,10 @@ */ .text SUPERALIGN_TEXT -IDTVEC(hv_vmbus_callback) +IDTVEC(vmbus_isr) PUSH_FRAME FAKE_MCOUNT(TF_RIP(%rsp)) movq %rsp, %rdi - call hv_vector_handler + call vmbus_handle_intr MEXITCOUNT jmp doreti diff --git a/sys/dev/hyperv/vmbus/hv_channel.c b/sys/dev/hyperv/vmbus/hv_channel.c index 6da0643..88ba7ca 100644 --- a/sys/dev/hyperv/vmbus/hv_channel.c +++ b/sys/dev/hyperv/vmbus/hv_channel.c @@ -37,12 +37,17 @@ __FBSDID("$FreeBSD$"); #include <sys/lock.h> #include <sys/mutex.h> #include <sys/sysctl.h> + +#include <machine/atomic.h> #include <machine/bus.h> + #include <vm/vm.h> #include <vm/vm_param.h> #include <vm/pmap.h> -#include "hv_vmbus_priv.h" +#include <dev/hyperv/vmbus/hv_vmbus_priv.h> +#include <dev/hyperv/vmbus/vmbus_reg.h> +#include <dev/hyperv/vmbus/vmbus_var.h> static int vmbus_channel_create_gpadl_header( /* must be phys and virt contiguous*/ @@ -61,17 +66,16 @@ static void VmbusProcessChannelEvent(void* channel, int pending); static void vmbus_channel_set_event(hv_vmbus_channel *channel) { - hv_vmbus_monitor_page *monitor_page; - if (channel->offer_msg.monitor_allocated) { - /* Each uint32_t represents 32 channels */ - synch_set_bit((channel->offer_msg.child_rel_id & 31), - ((uint32_t *)hv_vmbus_g_connection.send_interrupt_page - + ((channel->offer_msg.child_rel_id >> 5)))); + struct vmbus_softc *sc = vmbus_get_softc(); + hv_vmbus_monitor_page *monitor_page; + uint32_t chanid = channel->offer_msg.child_rel_id; - monitor_page = (hv_vmbus_monitor_page *) - hv_vmbus_g_connection.monitor_page_2; + atomic_set_long( + &sc->vmbus_tx_evtflags[chanid >> VMBUS_EVTFLAG_SHIFT], + 1UL << (chanid & VMBUS_EVTFLAG_MASK)); + monitor_page = sc->vmbus_mnf2; synch_set_bit(channel->monitor_bit, (uint32_t *)&monitor_page-> trigger_group[channel->monitor_group].u.pending); @@ -199,7 +203,10 @@ hv_vmbus_channel_open( new_channel->on_channel_callback = pfn_on_channel_callback; new_channel->channel_callback_context = context; - new_channel->rxq = hv_vmbus_g_context.hv_event_queue[new_channel->target_cpu]; + vmbus_on_channel_open(new_channel); + + new_channel->rxq = VMBUS_PCPU_GET(vmbus_get_softc(), event_tq, + new_channel->target_cpu); TASK_INIT(&new_channel->channel_task, 0, VmbusProcessChannelEvent, new_channel); /* Allocate the ring buffer */ diff --git a/sys/dev/hyperv/vmbus/hv_channel_mgmt.c b/sys/dev/hyperv/vmbus/hv_channel_mgmt.c index 00b54ed..e97c5d1 100644 --- a/sys/dev/hyperv/vmbus/hv_channel_mgmt.c +++ b/sys/dev/hyperv/vmbus/hv_channel_mgmt.c @@ -35,26 +35,42 @@ __FBSDID("$FreeBSD$"); #include <sys/mbuf.h> #include <sys/mutex.h> -#include "hv_vmbus_priv.h" +#include <dev/hyperv/vmbus/hv_vmbus_priv.h> +#include <dev/hyperv/vmbus/vmbus_reg.h> +#include <dev/hyperv/vmbus/vmbus_var.h> /* * Internal functions */ -static void vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr); -static void vmbus_channel_on_offer_internal(void* context); -static void vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr); -static void vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr); -static void vmbus_channel_on_offer_rescind_internal(void* context); -static void vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr); -static void vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr); -static void vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr); -static void vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr); +typedef void (*vmbus_msg_handler)(const hv_vmbus_channel_msg_header *msg); + +typedef struct hv_vmbus_channel_msg_table_entry { + hv_vmbus_channel_msg_type messageType; + vmbus_msg_handler messageHandler; +} hv_vmbus_channel_msg_table_entry; + +static void vmbus_channel_on_offer_internal(void *context); +static void vmbus_channel_on_offer_rescind_internal(void *context); + +static void vmbus_channel_on_offer(const hv_vmbus_channel_msg_header *hdr); +static void vmbus_channel_on_open_result( + const hv_vmbus_channel_msg_header *hdr); +static void vmbus_channel_on_offer_rescind( + const hv_vmbus_channel_msg_header *hdr); +static void vmbus_channel_on_gpadl_created( + const hv_vmbus_channel_msg_header *hdr); +static void vmbus_channel_on_gpadl_torndown( + const hv_vmbus_channel_msg_header *hdr); +static void vmbus_channel_on_offers_delivered( + const hv_vmbus_channel_msg_header *hdr); +static void vmbus_channel_on_version_response( + const hv_vmbus_channel_msg_header *hdr); /** * Channel message dispatch table */ -hv_vmbus_channel_msg_table_entry +static const hv_vmbus_channel_msg_table_entry g_channel_message_table[HV_CHANNEL_MESSAGE_COUNT] = { { HV_CHANNEL_MESSAGE_INVALID, NULL }, @@ -186,7 +202,14 @@ vmbus_channel_process_offer(hv_vmbus_channel *new_channel) * Make sure this is a new offer */ mtx_lock(&hv_vmbus_g_connection.channel_lock); - hv_vmbus_g_connection.channels[relid] = new_channel; + if (relid == 0) { + /* + * XXX channel0 will not be processed; skip it. + */ + printf("VMBUS: got channel0 offer\n"); + } else { + hv_vmbus_g_connection.channels[relid] = new_channel; + } TAILQ_FOREACH(channel, &hv_vmbus_g_connection.channel_anchor, list_entry) { @@ -303,7 +326,7 @@ vmbus_channel_cpu_set(struct hv_vmbus_channel *chan, int cpu) } chan->target_cpu = cpu; - chan->target_vcpu = hv_vmbus_g_context.hv_vcpu_index[cpu]; + chan->target_vcpu = VMBUS_PCPU_GET(vmbus_get_softc(), vcpuid, cpu); if (bootverbose) { printf("vmbus_chan%u: assigned to cpu%u [vcpu%u]\n", @@ -378,12 +401,12 @@ vmbus_channel_select_defcpu(struct hv_vmbus_channel *channel) * object to process the offer synchronously */ static void -vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr) +vmbus_channel_on_offer(const hv_vmbus_channel_msg_header *hdr) { - hv_vmbus_channel_offer_channel* offer; - hv_vmbus_channel_offer_channel* copied; + const hv_vmbus_channel_offer_channel *offer; + hv_vmbus_channel_offer_channel *copied; - offer = (hv_vmbus_channel_offer_channel*) hdr; + offer = (const hv_vmbus_channel_offer_channel *)hdr; // copy offer data copied = malloc(sizeof(*copied), M_DEVBUF, M_NOWAIT); @@ -456,12 +479,12 @@ vmbus_channel_on_offer_internal(void* context) * synchronously */ static void -vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr) +vmbus_channel_on_offer_rescind(const hv_vmbus_channel_msg_header *hdr) { - hv_vmbus_channel_rescind_offer* rescind; + const hv_vmbus_channel_rescind_offer *rescind; hv_vmbus_channel* channel; - rescind = (hv_vmbus_channel_rescind_offer*) hdr; + rescind = (const hv_vmbus_channel_rescind_offer *)hdr; channel = hv_vmbus_g_connection.channels[rescind->child_rel_id]; if (channel == NULL) @@ -488,7 +511,8 @@ vmbus_channel_on_offer_rescind_internal(void *context) * @brief Invoked when all offers have been delivered. */ static void -vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr) +vmbus_channel_on_offers_delivered( + const hv_vmbus_channel_msg_header *hdr __unused) { mtx_lock(&vmbus_chwait_lock); @@ -505,14 +529,14 @@ vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr) * response and signal the requesting thread. */ static void -vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr) +vmbus_channel_on_open_result(const hv_vmbus_channel_msg_header *hdr) { - hv_vmbus_channel_open_result* result; + const hv_vmbus_channel_open_result *result; hv_vmbus_channel_msg_info* msg_info; hv_vmbus_channel_msg_header* requestHeader; hv_vmbus_channel_open_channel* openMsg; - result = (hv_vmbus_channel_open_result*) hdr; + result = (const hv_vmbus_channel_open_result *)hdr; /* * Find the open msg, copy the result and signal/unblock the wait event @@ -547,14 +571,14 @@ vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr) * response and signal the requesting thread. */ static void -vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr) +vmbus_channel_on_gpadl_created(const hv_vmbus_channel_msg_header *hdr) { - hv_vmbus_channel_gpadl_created* gpadl_created; + const hv_vmbus_channel_gpadl_created *gpadl_created; hv_vmbus_channel_msg_info* msg_info; hv_vmbus_channel_msg_header* request_header; hv_vmbus_channel_gpadl_header* gpadl_header; - gpadl_created = (hv_vmbus_channel_gpadl_created*) hdr; + gpadl_created = (const hv_vmbus_channel_gpadl_created *)hdr; /* Find the establish msg, copy the result and signal/unblock * the wait event @@ -589,14 +613,14 @@ vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr) * response and signal the requesting thread */ static void -vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr) +vmbus_channel_on_gpadl_torndown(const hv_vmbus_channel_msg_header *hdr) { - hv_vmbus_channel_gpadl_torndown* gpadl_torndown; + const hv_vmbus_channel_gpadl_torndown *gpadl_torndown; hv_vmbus_channel_msg_info* msg_info; hv_vmbus_channel_msg_header* requestHeader; hv_vmbus_channel_gpadl_teardown* gpadlTeardown; - gpadl_torndown = (hv_vmbus_channel_gpadl_torndown*)hdr; + gpadl_torndown = (const hv_vmbus_channel_gpadl_torndown *)hdr; /* * Find the open msg, copy the result and signal/unblock the @@ -634,14 +658,14 @@ vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr) * response and signal the requesting thread. */ static void -vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr) +vmbus_channel_on_version_response(const hv_vmbus_channel_msg_header *hdr) { hv_vmbus_channel_msg_info* msg_info; hv_vmbus_channel_msg_header* requestHeader; hv_vmbus_channel_initiate_contact* initiate; - hv_vmbus_channel_version_response* versionResponse; + const hv_vmbus_channel_version_response *versionResponse; - versionResponse = (hv_vmbus_channel_version_response*)hdr; + versionResponse = (const hv_vmbus_channel_version_response *)hdr; mtx_lock(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor, @@ -712,8 +736,8 @@ hv_vmbus_release_unattached_channels(void) } hv_vmbus_free_vmbus_channel(channel); } - bzero(hv_vmbus_g_connection.channels, - sizeof(hv_vmbus_channel*) * HV_CHANNEL_MAX_COUNT); + bzero(hv_vmbus_g_connection.channels, + sizeof(hv_vmbus_channel*) * VMBUS_CHAN_MAX); mtx_unlock(&hv_vmbus_g_connection.channel_lock); } @@ -744,7 +768,7 @@ vmbus_select_outgoing_channel(struct hv_vmbus_channel *primary) return outgoing_channel; } - cur_vcpu = hv_vmbus_g_context.hv_vcpu_index[smp_pro_id]; + cur_vcpu = VMBUS_PCPU_GET(vmbus_get_softc(), vcpuid, smp_pro_id); TAILQ_FOREACH(new_channel, &primary->sc_list_anchor, sc_list_entry) { if (new_channel->state != HV_CHANNEL_OPENED_STATE){ @@ -825,3 +849,24 @@ vmbus_rel_subchan(struct hv_vmbus_channel **subchan, int subchan_cnt __unused) free(subchan, M_TEMP); } + +void +vmbus_chan_msgproc(struct vmbus_softc *sc, const struct vmbus_message *msg) +{ + const hv_vmbus_channel_msg_table_entry *entry; + const hv_vmbus_channel_msg_header *hdr; + hv_vmbus_channel_msg_type msg_type; + + hdr = (const hv_vmbus_channel_msg_header *)msg->msg_data; + msg_type = hdr->message_type; + + if (msg_type >= HV_CHANNEL_MESSAGE_COUNT) { + device_printf(sc->vmbus_dev, "unknown message type 0x%x\n", + msg_type); + return; + } + + entry = &g_channel_message_table[msg_type]; + if (entry->messageHandler) + entry->messageHandler(hdr); +} diff --git a/sys/dev/hyperv/vmbus/hv_connection.c b/sys/dev/hyperv/vmbus/hv_connection.c index 0424b47..84d966c 100644 --- a/sys/dev/hyperv/vmbus/hv_connection.c +++ b/sys/dev/hyperv/vmbus/hv_connection.c @@ -36,11 +36,14 @@ __FBSDID("$FreeBSD$"); #include <sys/lock.h> #include <sys/mutex.h> #include <machine/bus.h> +#include <machine/atomic.h> #include <vm/vm.h> #include <vm/vm_param.h> #include <vm/pmap.h> -#include "hv_vmbus_priv.h" +#include <dev/hyperv/vmbus/hv_vmbus_priv.h> +#include <dev/hyperv/vmbus/vmbus_reg.h> +#include <dev/hyperv/vmbus/vmbus_var.h> /* * Globals @@ -74,8 +77,8 @@ hv_vmbus_get_next_version(uint32_t current_ver) * Negotiate the highest supported hypervisor version. */ static int -hv_vmbus_negotiate_version(hv_vmbus_channel_msg_info *msg_info, - uint32_t version) +hv_vmbus_negotiate_version(struct vmbus_softc *sc, + hv_vmbus_channel_msg_info *msg_info, uint32_t version) { int ret = 0; hv_vmbus_channel_initiate_contact *msg; @@ -86,14 +89,9 @@ hv_vmbus_negotiate_version(hv_vmbus_channel_msg_info *msg_info, msg->header.message_type = HV_CHANNEL_MESSAGE_INITIATED_CONTACT; msg->vmbus_version_requested = version; - msg->interrupt_page = hv_get_phys_addr( - hv_vmbus_g_connection.interrupt_page); - - msg->monitor_page_1 = hv_get_phys_addr( - hv_vmbus_g_connection.monitor_page_1); - - msg->monitor_page_2 = hv_get_phys_addr( - hv_vmbus_g_connection.monitor_page_2); + msg->interrupt_page = sc->vmbus_evtflags_dma.hv_paddr; + msg->monitor_page_1 = sc->vmbus_mnf1_dma.hv_paddr; + msg->monitor_page_2 = sc->vmbus_mnf2_dma.hv_paddr; /** * Add to list before we send the request since we may receive the @@ -150,7 +148,8 @@ hv_vmbus_negotiate_version(hv_vmbus_channel_msg_info *msg_info, * Send a connect request on the partition service connection */ int -hv_vmbus_connect(void) { +hv_vmbus_connect(struct vmbus_softc *sc) +{ int ret = 0; uint32_t version; hv_vmbus_channel_msg_info* msg_info = NULL; @@ -175,49 +174,20 @@ hv_vmbus_connect(void) { mtx_init(&hv_vmbus_g_connection.channel_lock, "vmbus channel", NULL, MTX_DEF); - /** - * Setup the vmbus event connection for channel interrupt abstraction - * stuff - */ - hv_vmbus_g_connection.interrupt_page = malloc( - PAGE_SIZE, M_DEVBUF, - M_WAITOK | M_ZERO); - - hv_vmbus_g_connection.recv_interrupt_page = - hv_vmbus_g_connection.interrupt_page; - - hv_vmbus_g_connection.send_interrupt_page = - ((uint8_t *) hv_vmbus_g_connection.interrupt_page + - (PAGE_SIZE >> 1)); - - /** - * Set up the monitor notification facility. The 1st page for - * parent->child and the 2nd page for child->parent - */ - hv_vmbus_g_connection.monitor_page_1 = malloc( - PAGE_SIZE, - M_DEVBUF, - M_WAITOK | M_ZERO); - hv_vmbus_g_connection.monitor_page_2 = malloc( - PAGE_SIZE, - M_DEVBUF, - M_WAITOK | M_ZERO); - msg_info = (hv_vmbus_channel_msg_info*) malloc(sizeof(hv_vmbus_channel_msg_info) + sizeof(hv_vmbus_channel_initiate_contact), M_DEVBUF, M_WAITOK | M_ZERO); hv_vmbus_g_connection.channels = malloc(sizeof(hv_vmbus_channel*) * - HV_CHANNEL_MAX_COUNT, - M_DEVBUF, M_WAITOK | M_ZERO); + VMBUS_CHAN_MAX, M_DEVBUF, M_WAITOK | M_ZERO); /* * Find the highest vmbus version number we can support. */ version = HV_VMBUS_VERSION_CURRENT; do { - ret = hv_vmbus_negotiate_version(msg_info, version); + ret = hv_vmbus_negotiate_version(sc, msg_info, version); if (ret == EWOULDBLOCK) { /* * We timed out. @@ -251,14 +221,6 @@ hv_vmbus_connect(void) { mtx_destroy(&hv_vmbus_g_connection.channel_lock); mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock); - if (hv_vmbus_g_connection.interrupt_page != NULL) { - free(hv_vmbus_g_connection.interrupt_page, M_DEVBUF); - hv_vmbus_g_connection.interrupt_page = NULL; - } - - free(hv_vmbus_g_connection.monitor_page_1, M_DEVBUF); - free(hv_vmbus_g_connection.monitor_page_2, M_DEVBUF); - if (msg_info) { sema_destroy(&msg_info->wait_sema); free(msg_info, M_DEVBUF); @@ -272,7 +234,8 @@ hv_vmbus_connect(void) { * Send a disconnect request on the partition service connection */ int -hv_vmbus_disconnect(void) { +hv_vmbus_disconnect(void) +{ int ret = 0; hv_vmbus_channel_unload msg; @@ -280,8 +243,6 @@ hv_vmbus_disconnect(void) { ret = hv_vmbus_post_message(&msg, sizeof(hv_vmbus_channel_unload)); - free(hv_vmbus_g_connection.interrupt_page, M_DEVBUF); - mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock); free(hv_vmbus_g_connection.channels, M_DEVBUF); @@ -290,79 +251,67 @@ hv_vmbus_disconnect(void) { return (ret); } -/** - * Handler for events - */ -void -hv_vmbus_on_events(int cpu) +static __inline void +vmbus_event_flags_proc(volatile u_long *event_flags, int flag_cnt) { - int bit; - int dword; - void *page_addr; - uint32_t* recv_interrupt_page = NULL; - int rel_id; - int maxdword; - hv_vmbus_synic_event_flags *event; - /* int maxdword = PAGE_SIZE >> 3; */ - - KASSERT(cpu <= mp_maxid, ("VMBUS: hv_vmbus_on_events: " - "cpu out of range!")); - - page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu]; - event = (hv_vmbus_synic_event_flags *) - page_addr + HV_VMBUS_MESSAGE_SINT; - if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) || - (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) { - maxdword = HV_MAX_NUM_CHANNELS_SUPPORTED >> 5; - /* - * receive size is 1/2 page and divide that by 4 bytes - */ - if (synch_test_and_clear_bit(0, &event->flags32[0])) - recv_interrupt_page = - hv_vmbus_g_connection.recv_interrupt_page; - } else { - /* - * On Host with Win8 or above, the event page can be - * checked directly to get the id of the channel - * that has the pending interrupt. - */ - maxdword = HV_EVENT_FLAGS_DWORD_COUNT; - recv_interrupt_page = event->flags32; - } + int f; - /* - * Check events - */ - if (recv_interrupt_page != NULL) { - for (dword = 0; dword < maxdword; dword++) { - if (recv_interrupt_page[dword]) { - for (bit = 0; bit < HV_CHANNEL_DWORD_LEN; bit++) { - if (synch_test_and_clear_bit(bit, - (uint32_t *) &recv_interrupt_page[dword])) { - rel_id = (dword << 5) + bit; - if (rel_id == 0) { - /* - * Special case - - * vmbus channel protocol msg. - */ + for (f = 0; f < flag_cnt; ++f) { + uint32_t rel_id_base; + u_long flags; + int bit; + + if (event_flags[f] == 0) + continue; + + flags = atomic_swap_long(&event_flags[f], 0); + rel_id_base = f << VMBUS_EVTFLAG_SHIFT; + + while ((bit = ffsl(flags)) != 0) { + struct hv_vmbus_channel *channel; + uint32_t rel_id; + + --bit; /* NOTE: ffsl is 1-based */ + flags &= ~(1UL << bit); + + rel_id = rel_id_base + bit; + channel = hv_vmbus_g_connection.channels[rel_id]; + + /* if channel is closed or closing */ + if (channel == NULL || channel->rxq == NULL) continue; - } else { - hv_vmbus_channel * channel = hv_vmbus_g_connection.channels[rel_id]; - /* if channel is closed or closing */ - if (channel == NULL || channel->rxq == NULL) - continue; - - if (channel->batched_reading) - hv_ring_buffer_read_begin(&channel->inbound); - taskqueue_enqueue_fast(channel->rxq, &channel->channel_task); - } - } - } + + if (channel->batched_reading) + hv_ring_buffer_read_begin(&channel->inbound); + taskqueue_enqueue(channel->rxq, &channel->channel_task); } - } } +} + +void +vmbus_event_proc(struct vmbus_softc *sc, int cpu) +{ + struct vmbus_evtflags *eventf; + + /* + * On Host with Win8 or above, the event page can be checked directly + * to get the id of the channel that has the pending interrupt. + */ + eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE; + vmbus_event_flags_proc(eventf->evt_flags, + VMBUS_PCPU_GET(sc, event_flags_cnt, cpu)); +} - return; +void +vmbus_event_proc_compat(struct vmbus_softc *sc, int cpu) +{ + struct vmbus_evtflags *eventf; + + eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE; + if (atomic_testandclear_long(&eventf->evt_flags[0], 0)) { + vmbus_event_flags_proc(sc->vmbus_rx_evtflags, + VMBUS_CHAN_MAX_COMPAT >> VMBUS_EVTFLAG_SHIFT); + } } /** @@ -383,8 +332,8 @@ int hv_vmbus_post_message(void *buffer, size_t bufferLen) * insufficient resources. 20 times should suffice in practice. */ for (retries = 0; retries < 20; retries++) { - ret = hv_vmbus_post_msg_via_msg_ipc(connId, 1, buffer, - bufferLen); + ret = hv_vmbus_post_msg_via_msg_ipc(connId, + VMBUS_MSGTYPE_CHANNEL, buffer, bufferLen); if (ret == HV_STATUS_SUCCESS) return (0); @@ -403,16 +352,43 @@ int hv_vmbus_post_message(void *buffer, size_t bufferLen) * Send an event notification to the parent */ int -hv_vmbus_set_event(hv_vmbus_channel *channel) { +hv_vmbus_set_event(hv_vmbus_channel *channel) +{ + struct vmbus_softc *sc = vmbus_get_softc(); int ret = 0; - uint32_t child_rel_id = channel->offer_msg.child_rel_id; - - /* Each uint32_t represents 32 channels */ + uint32_t chanid = channel->offer_msg.child_rel_id; - synch_set_bit(child_rel_id & 31, - (((uint32_t *)hv_vmbus_g_connection.send_interrupt_page - + (child_rel_id >> 5)))); + atomic_set_long(&sc->vmbus_tx_evtflags[chanid >> VMBUS_EVTFLAG_SHIFT], + 1UL << (chanid & VMBUS_EVTFLAG_MASK)); ret = hv_vmbus_signal_event(channel->signal_event_param); return (ret); } + +void +vmbus_on_channel_open(const struct hv_vmbus_channel *chan) +{ + volatile int *flag_cnt_ptr; + int flag_cnt; + + flag_cnt = (chan->offer_msg.child_rel_id / VMBUS_EVTFLAG_LEN) + 1; + flag_cnt_ptr = VMBUS_PCPU_PTR(vmbus_get_softc(), event_flags_cnt, + chan->target_cpu); + + for (;;) { + int old_flag_cnt; + + old_flag_cnt = *flag_cnt_ptr; + if (old_flag_cnt >= flag_cnt) + break; + if (atomic_cmpset_int(flag_cnt_ptr, old_flag_cnt, flag_cnt)) { + if (bootverbose) { + printf("VMBUS: channel%u update " + "cpu%d flag_cnt to %d\n", + chan->offer_msg.child_rel_id, + chan->target_cpu, flag_cnt); + } + break; + } + } +} diff --git a/sys/dev/hyperv/vmbus/hv_et.c b/sys/dev/hyperv/vmbus/hv_et.c deleted file mode 100644 index 440b514..0000000 --- a/sys/dev/hyperv/vmbus/hv_et.c +++ /dev/null @@ -1,161 +0,0 @@ -/*- - * Copyright (c) 2015,2016 Microsoft Corp. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/bus.h> -#include <sys/kernel.h> -#include <sys/module.h> -#include <sys/proc.h> -#include <sys/systm.h> -#include <sys/smp.h> -#include <sys/time.h> -#include <sys/timeet.h> - -#include "hv_vmbus_priv.h" - -#define HV_TIMER_FREQUENCY (10 * 1000 * 1000LL) /* 100ns period */ -#define HV_MAX_DELTA_TICKS 0xffffffffLL -#define HV_MIN_DELTA_TICKS 1LL - -static struct eventtimer *et; - -static inline uint64_t -sbintime2tick(sbintime_t time) -{ - struct timespec val; - - val = sbttots(time); - return val.tv_sec * HV_TIMER_FREQUENCY + val.tv_nsec / 100; -} - -static int -hv_et_start(struct eventtimer *et, sbintime_t firsttime, sbintime_t periodtime) -{ - union hv_timer_config timer_cfg; - uint64_t current; - - timer_cfg.as_uint64 = 0; - timer_cfg.auto_enable = 1; - timer_cfg.sintx = HV_VMBUS_TIMER_SINT; - - current = rdmsr(HV_X64_MSR_TIME_REF_COUNT); - current += sbintime2tick(firsttime); - - wrmsr(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64); - wrmsr(HV_X64_MSR_STIMER0_COUNT, current); - - return (0); -} - -static int -hv_et_stop(struct eventtimer *et) -{ - wrmsr(HV_X64_MSR_STIMER0_CONFIG, 0); - wrmsr(HV_X64_MSR_STIMER0_COUNT, 0); - - return (0); -} - -void -hv_et_intr(struct trapframe *frame) -{ - struct trapframe *oldframe; - struct thread *td; - - if (et->et_active) { - td = curthread; - td->td_intr_nesting_level++; - oldframe = td->td_intr_frame; - td->td_intr_frame = frame; - et->et_event_cb(et, et->et_arg); - td->td_intr_frame = oldframe; - td->td_intr_nesting_level--; - } -} - -static void -hv_et_identify(driver_t *driver, device_t parent) -{ - if (device_find_child(parent, "hv_et", -1) != NULL) - return; - - device_add_child(parent, "hv_et", -1); -} - -static int -hv_et_probe(device_t dev) -{ - device_set_desc(dev, "Hyper-V event timer"); - - return (BUS_PROBE_NOWILDCARD); -} - -static int -hv_et_attach(device_t dev) -{ - /* XXX: need allocate SINT and remove global et */ - et = device_get_softc(dev); - - et->et_name = "Hyper-V"; - et->et_flags = ET_FLAGS_ONESHOT | ET_FLAGS_PERCPU; - et->et_quality = 1000; - et->et_frequency = HV_TIMER_FREQUENCY; - et->et_min_period = HV_MIN_DELTA_TICKS * ((1LL << 32) / HV_TIMER_FREQUENCY); - et->et_max_period = HV_MAX_DELTA_TICKS * ((1LL << 32) / HV_TIMER_FREQUENCY); - et->et_start = hv_et_start; - et->et_stop = hv_et_stop; - et->et_priv = dev; - - return (et_register(et)); -} - -static int -hv_et_detach(device_t dev) -{ - return (et_deregister(et)); -} - -static device_method_t hv_et_methods[] = { - DEVMETHOD(device_identify, hv_et_identify), - DEVMETHOD(device_probe, hv_et_probe), - DEVMETHOD(device_attach, hv_et_attach), - DEVMETHOD(device_detach, hv_et_detach), - - DEVMETHOD_END -}; - -static driver_t hv_et_driver = { - "hv_et", - hv_et_methods, - sizeof(struct eventtimer) -}; - -static devclass_t hv_et_devclass; -DRIVER_MODULE(hv_et, vmbus, hv_et_driver, hv_et_devclass, NULL, 0); -MODULE_VERSION(hv_et, 1); diff --git a/sys/dev/hyperv/vmbus/hv_hv.c b/sys/dev/hyperv/vmbus/hv_hv.c deleted file mode 100644 index 70a5608..0000000 --- a/sys/dev/hyperv/vmbus/hv_hv.c +++ /dev/null @@ -1,513 +0,0 @@ -/*- - * Copyright (c) 2009-2012,2016 Microsoft Corp. - * Copyright (c) 2012 NetApp Inc. - * Copyright (c) 2012 Citrix Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice unmodified, this list of conditions, and the following - * disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/** - * Implements low-level interactions with Hypver-V/Azure - */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/kernel.h> -#include <sys/malloc.h> -#include <sys/pcpu.h> -#include <sys/timetc.h> -#include <machine/bus.h> -#include <machine/md_var.h> -#include <vm/vm.h> -#include <vm/vm_param.h> -#include <vm/pmap.h> - - -#include "hv_vmbus_priv.h" - -#define HV_NANOSECONDS_PER_SEC 1000000000L - -#define HYPERV_INTERFACE 0x31237648 /* HV#1 */ - -static u_int hv_get_timecount(struct timecounter *tc); - -u_int hyperv_features; -u_int hyperv_recommends; - -static u_int hyperv_pm_features; -static u_int hyperv_features3; - -/** - * Globals - */ -hv_vmbus_context hv_vmbus_g_context = { - .syn_ic_initialized = FALSE, - .hypercall_page = NULL, -}; - -static struct timecounter hv_timecounter = { - hv_get_timecount, 0, ~0u, HV_NANOSECONDS_PER_SEC/100, "Hyper-V", HV_NANOSECONDS_PER_SEC/100 -}; - -static u_int -hv_get_timecount(struct timecounter *tc) -{ - u_int now = rdmsr(HV_X64_MSR_TIME_REF_COUNT); - return (now); -} - -/** - * @brief Invoke the specified hypercall - */ -static uint64_t -hv_vmbus_do_hypercall(uint64_t control, void* input, void* output) -{ -#ifdef __x86_64__ - uint64_t hv_status = 0; - uint64_t input_address = (input) ? hv_get_phys_addr(input) : 0; - uint64_t output_address = (output) ? hv_get_phys_addr(output) : 0; - volatile void* hypercall_page = hv_vmbus_g_context.hypercall_page; - - __asm__ __volatile__ ("mov %0, %%r8" : : "r" (output_address): "r8"); - __asm__ __volatile__ ("call *%3" : "=a"(hv_status): - "c" (control), "d" (input_address), - "m" (hypercall_page)); - return (hv_status); -#else - uint32_t control_high = control >> 32; - uint32_t control_low = control & 0xFFFFFFFF; - uint32_t hv_status_high = 1; - uint32_t hv_status_low = 1; - uint64_t input_address = (input) ? hv_get_phys_addr(input) : 0; - uint32_t input_address_high = input_address >> 32; - uint32_t input_address_low = input_address & 0xFFFFFFFF; - uint64_t output_address = (output) ? hv_get_phys_addr(output) : 0; - uint32_t output_address_high = output_address >> 32; - uint32_t output_address_low = output_address & 0xFFFFFFFF; - volatile void* hypercall_page = hv_vmbus_g_context.hypercall_page; - - __asm__ __volatile__ ("call *%8" : "=d"(hv_status_high), - "=a"(hv_status_low) : "d" (control_high), - "a" (control_low), "b" (input_address_high), - "c" (input_address_low), - "D"(output_address_high), - "S"(output_address_low), "m" (hypercall_page)); - return (hv_status_low | ((uint64_t)hv_status_high << 32)); -#endif /* __x86_64__ */ -} - -/** - * @brief Main initialization routine. - * - * This routine must be called - * before any other routines in here are called - */ -int -hv_vmbus_init(void) -{ - hv_vmbus_x64_msr_hypercall_contents hypercall_msr; - void* virt_addr = NULL; - - memset( - hv_vmbus_g_context.syn_ic_event_page, - 0, - sizeof(hv_vmbus_handle) * MAXCPU); - - memset( - hv_vmbus_g_context.syn_ic_msg_page, - 0, - sizeof(hv_vmbus_handle) * MAXCPU); - - if (vm_guest != VM_GUEST_HV) - goto cleanup; - - /* - * Write our OS info - */ - uint64_t os_guest_info = HV_FREEBSD_GUEST_ID; - wrmsr(HV_X64_MSR_GUEST_OS_ID, os_guest_info); - hv_vmbus_g_context.guest_id = os_guest_info; - - /* - * See if the hypercall page is already set - */ - hypercall_msr.as_uint64_t = rdmsr(HV_X64_MSR_HYPERCALL); - virt_addr = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO); - - hypercall_msr.u.enable = 1; - hypercall_msr.u.guest_physical_address = - (hv_get_phys_addr(virt_addr) >> PAGE_SHIFT); - wrmsr(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64_t); - - /* - * Confirm that hypercall page did get set up - */ - hypercall_msr.as_uint64_t = 0; - hypercall_msr.as_uint64_t = rdmsr(HV_X64_MSR_HYPERCALL); - - if (!hypercall_msr.u.enable) - goto cleanup; - - hv_vmbus_g_context.hypercall_page = virt_addr; - - return (0); - - cleanup: - if (virt_addr != NULL) { - if (hypercall_msr.u.enable) { - hypercall_msr.as_uint64_t = 0; - wrmsr(HV_X64_MSR_HYPERCALL, - hypercall_msr.as_uint64_t); - } - - free(virt_addr, M_DEVBUF); - } - return (ENOTSUP); -} - -/** - * @brief Cleanup routine, called normally during driver unloading or exiting - */ -void -hv_vmbus_cleanup(void) -{ - hv_vmbus_x64_msr_hypercall_contents hypercall_msr; - - if (hv_vmbus_g_context.guest_id == HV_FREEBSD_GUEST_ID) { - if (hv_vmbus_g_context.hypercall_page != NULL) { - hypercall_msr.as_uint64_t = 0; - wrmsr(HV_X64_MSR_HYPERCALL, - hypercall_msr.as_uint64_t); - free(hv_vmbus_g_context.hypercall_page, M_DEVBUF); - hv_vmbus_g_context.hypercall_page = NULL; - } - } -} - -/** - * @brief Post a message using the hypervisor message IPC. - * (This involves a hypercall.) - */ -hv_vmbus_status -hv_vmbus_post_msg_via_msg_ipc( - hv_vmbus_connection_id connection_id, - hv_vmbus_msg_type message_type, - void* payload, - size_t payload_size) -{ - struct alignedinput { - uint64_t alignment8; - hv_vmbus_input_post_message msg; - }; - - hv_vmbus_input_post_message* aligned_msg; - hv_vmbus_status status; - size_t addr; - - if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) - return (EMSGSIZE); - - addr = (size_t) malloc(sizeof(struct alignedinput), M_DEVBUF, - M_ZERO | M_NOWAIT); - KASSERT(addr != 0, - ("Error VMBUS: malloc failed to allocate message buffer!")); - if (addr == 0) - return (ENOMEM); - - aligned_msg = (hv_vmbus_input_post_message*) - (HV_ALIGN_UP(addr, HV_HYPERCALL_PARAM_ALIGN)); - - aligned_msg->connection_id = connection_id; - aligned_msg->message_type = message_type; - aligned_msg->payload_size = payload_size; - memcpy((void*) aligned_msg->payload, payload, payload_size); - - status = hv_vmbus_do_hypercall( - HV_CALL_POST_MESSAGE, aligned_msg, 0) & 0xFFFF; - - free((void *) addr, M_DEVBUF); - return (status); -} - -/** - * @brief Signal an event on the specified connection using the hypervisor - * event IPC. (This involves a hypercall.) - */ -hv_vmbus_status -hv_vmbus_signal_event(void *con_id) -{ - hv_vmbus_status status; - - status = hv_vmbus_do_hypercall( - HV_CALL_SIGNAL_EVENT, - con_id, - 0) & 0xFFFF; - - return (status); -} - -/** - * @brief hv_vmbus_synic_init - */ -void -hv_vmbus_synic_init(void *arg) - -{ - int cpu; - uint64_t hv_vcpu_index; - hv_vmbus_synic_simp simp; - hv_vmbus_synic_siefp siefp; - hv_vmbus_synic_scontrol sctrl; - hv_vmbus_synic_sint shared_sint; - uint64_t version; - hv_setup_args* setup_args = (hv_setup_args *)arg; - - cpu = PCPU_GET(cpuid); - - if (hv_vmbus_g_context.hypercall_page == NULL) - return; - - /* - * TODO: Check the version - */ - version = rdmsr(HV_X64_MSR_SVERSION); - - hv_vmbus_g_context.syn_ic_msg_page[cpu] = - setup_args->page_buffers[2 * cpu]; - hv_vmbus_g_context.syn_ic_event_page[cpu] = - setup_args->page_buffers[2 * cpu + 1]; - - /* - * Setup the Synic's message page - */ - - simp.as_uint64_t = rdmsr(HV_X64_MSR_SIMP); - simp.u.simp_enabled = 1; - simp.u.base_simp_gpa = ((hv_get_phys_addr( - hv_vmbus_g_context.syn_ic_msg_page[cpu])) >> PAGE_SHIFT); - - wrmsr(HV_X64_MSR_SIMP, simp.as_uint64_t); - - /* - * Setup the Synic's event page - */ - siefp.as_uint64_t = rdmsr(HV_X64_MSR_SIEFP); - siefp.u.siefp_enabled = 1; - siefp.u.base_siefp_gpa = ((hv_get_phys_addr( - hv_vmbus_g_context.syn_ic_event_page[cpu])) >> PAGE_SHIFT); - - wrmsr(HV_X64_MSR_SIEFP, siefp.as_uint64_t); - - /*HV_SHARED_SINT_IDT_VECTOR + 0x20; */ - shared_sint.as_uint64_t = 0; - shared_sint.u.vector = setup_args->vector; - shared_sint.u.masked = FALSE; - shared_sint.u.auto_eoi = TRUE; - - wrmsr(HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT, - shared_sint.as_uint64_t); - - wrmsr(HV_X64_MSR_SINT0 + HV_VMBUS_TIMER_SINT, - shared_sint.as_uint64_t); - - /* Enable the global synic bit */ - sctrl.as_uint64_t = rdmsr(HV_X64_MSR_SCONTROL); - sctrl.u.enable = 1; - - wrmsr(HV_X64_MSR_SCONTROL, sctrl.as_uint64_t); - - hv_vmbus_g_context.syn_ic_initialized = TRUE; - - /* - * Set up the cpuid mapping from Hyper-V to FreeBSD. - * The array is indexed using FreeBSD cpuid. - */ - hv_vcpu_index = rdmsr(HV_X64_MSR_VP_INDEX); - hv_vmbus_g_context.hv_vcpu_index[cpu] = (uint32_t)hv_vcpu_index; - - return; -} - -/** - * @brief Cleanup routine for hv_vmbus_synic_init() - */ -void hv_vmbus_synic_cleanup(void *arg) -{ - hv_vmbus_synic_sint shared_sint; - hv_vmbus_synic_simp simp; - hv_vmbus_synic_siefp siefp; - - if (!hv_vmbus_g_context.syn_ic_initialized) - return; - - shared_sint.as_uint64_t = rdmsr( - HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT); - - shared_sint.u.masked = 1; - - /* - * Disable the interrupt 0 - */ - wrmsr( - HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT, - shared_sint.as_uint64_t); - - shared_sint.as_uint64_t = rdmsr( - HV_X64_MSR_SINT0 + HV_VMBUS_TIMER_SINT); - - shared_sint.u.masked = 1; - - /* - * Disable the interrupt 1 - */ - wrmsr( - HV_X64_MSR_SINT0 + HV_VMBUS_TIMER_SINT, - shared_sint.as_uint64_t); - simp.as_uint64_t = rdmsr(HV_X64_MSR_SIMP); - simp.u.simp_enabled = 0; - simp.u.base_simp_gpa = 0; - - wrmsr(HV_X64_MSR_SIMP, simp.as_uint64_t); - - siefp.as_uint64_t = rdmsr(HV_X64_MSR_SIEFP); - siefp.u.siefp_enabled = 0; - siefp.u.base_siefp_gpa = 0; - - wrmsr(HV_X64_MSR_SIEFP, siefp.as_uint64_t); -} - -static bool -hyperv_identify(void) -{ - u_int regs[4]; - unsigned int maxLeaf; - unsigned int op; - - if (vm_guest != VM_GUEST_HV) - return (false); - - op = HV_CPU_ID_FUNCTION_HV_VENDOR_AND_MAX_FUNCTION; - do_cpuid(op, regs); - maxLeaf = regs[0]; - if (maxLeaf < HV_CPU_ID_FUNCTION_MS_HV_IMPLEMENTATION_LIMITS) - return (false); - - op = HV_CPU_ID_FUNCTION_HV_INTERFACE; - do_cpuid(op, regs); - if (regs[0] != HYPERV_INTERFACE) - return (false); - - op = HV_CPU_ID_FUNCTION_MS_HV_FEATURES; - do_cpuid(op, regs); - if ((regs[0] & HV_FEATURE_MSR_HYPERCALL) == 0) { - /* - * Hyper-V w/o Hypercall is impossible; someone - * is faking Hyper-V. - */ - return (false); - } - hyperv_features = regs[0]; - hyperv_pm_features = regs[2]; - hyperv_features3 = regs[3]; - - op = HV_CPU_ID_FUNCTION_MS_HV_VERSION; - do_cpuid(op, regs); - printf("Hyper-V Version: %d.%d.%d [SP%d]\n", - regs[1] >> 16, regs[1] & 0xffff, regs[0], regs[2]); - - printf(" Features=0x%b\n", hyperv_features, - "\020" - "\001VPRUNTIME" /* MSR_VP_RUNTIME */ - "\002TMREFCNT" /* MSR_TIME_REF_COUNT */ - "\003SYNIC" /* MSRs for SynIC */ - "\004SYNTM" /* MSRs for SynTimer */ - "\005APIC" /* MSR_{EOI,ICR,TPR} */ - "\006HYPERCALL" /* MSR_{GUEST_OS_ID,HYPERCALL} */ - "\007VPINDEX" /* MSR_VP_INDEX */ - "\010RESET" /* MSR_RESET */ - "\011STATS" /* MSR_STATS_ */ - "\012REFTSC" /* MSR_REFERENCE_TSC */ - "\013IDLE" /* MSR_GUEST_IDLE */ - "\014TMFREQ" /* MSR_{TSC,APIC}_FREQUENCY */ - "\015DEBUG"); /* MSR_SYNTH_DEBUG_ */ - printf(" PM Features=max C%u, 0x%b\n", - HV_PM_FEATURE_CSTATE(hyperv_pm_features), - (hyperv_pm_features & ~HV_PM_FEATURE_CSTATE_MASK), - "\020" - "\005C3HPET"); /* HPET is required for C3 state */ - printf(" Features3=0x%b\n", hyperv_features3, - "\020" - "\001MWAIT" /* MWAIT */ - "\002DEBUG" /* guest debug support */ - "\003PERFMON" /* performance monitor */ - "\004PCPUDPE" /* physical CPU dynamic partition event */ - "\005XMMHC" /* hypercall input through XMM regs */ - "\006IDLE" /* guest idle support */ - "\007SLEEP" /* hypervisor sleep support */ - "\010NUMA" /* NUMA distance query support */ - "\011TMFREQ" /* timer frequency query (TSC, LAPIC) */ - "\012SYNCMC" /* inject synthetic machine checks */ - "\013CRASH" /* MSRs for guest crash */ - "\014DEBUGMSR" /* MSRs for guest debug */ - "\015NPIEP" /* NPIEP */ - "\016HVDIS"); /* disabling hypervisor */ - - op = HV_CPU_ID_FUNCTION_MS_HV_ENLIGHTENMENT_INFORMATION; - do_cpuid(op, regs); - hyperv_recommends = regs[0]; - if (bootverbose) - printf(" Recommends: %08x %08x\n", regs[0], regs[1]); - - op = HV_CPU_ID_FUNCTION_MS_HV_IMPLEMENTATION_LIMITS; - do_cpuid(op, regs); - if (bootverbose) { - printf(" Limits: Vcpu:%d Lcpu:%d Int:%d\n", - regs[0], regs[1], regs[2]); - } - - if (maxLeaf >= HV_CPU_ID_FUNCTION_MS_HV_HARDWARE_FEATURE) { - op = HV_CPU_ID_FUNCTION_MS_HV_HARDWARE_FEATURE; - do_cpuid(op, regs); - if (bootverbose) { - printf(" HW Features: %08x AMD: %08x\n", - regs[0], regs[3]); - } - } - - return (true); -} - -static void -hyperv_init(void *dummy __unused) -{ - if (!hyperv_identify()) - return; - - if (hyperv_features & HV_FEATURE_MSR_TIME_REFCNT) { - /* Register virtual timecount */ - tc_init(&hv_timecounter); - } -} -SYSINIT(hyperv_initialize, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, hyperv_init, - NULL); diff --git a/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c deleted file mode 100644 index e274d59..0000000 --- a/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c +++ /dev/null @@ -1,735 +0,0 @@ -/*- - * Copyright (c) 2009-2012,2016 Microsoft Corp. - * Copyright (c) 2012 NetApp Inc. - * Copyright (c) 2012 Citrix Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice unmodified, this list of conditions, and the following - * disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * VM Bus Driver Implementation - */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/bus.h> -#include <sys/kernel.h> -#include <sys/lock.h> -#include <sys/malloc.h> -#include <sys/module.h> -#include <sys/proc.h> -#include <sys/sysctl.h> -#include <sys/syslog.h> -#include <sys/systm.h> -#include <sys/rtprio.h> -#include <sys/interrupt.h> -#include <sys/sx.h> -#include <sys/taskqueue.h> -#include <sys/mutex.h> -#include <sys/smp.h> - -#include <machine/resource.h> -#include <sys/rman.h> - -#include <machine/stdarg.h> -#include <machine/intr_machdep.h> -#include <machine/md_var.h> -#include <machine/segments.h> -#include <sys/pcpu.h> -#include <machine/apicvar.h> - -#include <dev/hyperv/include/hyperv.h> -#include "hv_vmbus_priv.h" - -#include <contrib/dev/acpica/include/acpi.h> -#include "acpi_if.h" - -static device_t vmbus_devp; -static int vmbus_inited; -static hv_setup_args setup_args; /* only CPU 0 supported at this time */ - -static char *vmbus_ids[] = { "VMBUS", NULL }; - -/** - * @brief Software interrupt thread routine to handle channel messages from - * the hypervisor. - */ -static void -vmbus_msg_swintr(void *arg, int pending __unused) -{ - int cpu; - void* page_addr; - hv_vmbus_channel_msg_header *hdr; - hv_vmbus_channel_msg_table_entry *entry; - hv_vmbus_channel_msg_type msg_type; - hv_vmbus_message* msg; - - cpu = (int)(long)arg; - KASSERT(cpu <= mp_maxid, ("VMBUS: vmbus_msg_swintr: " - "cpu out of range!")); - - page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu]; - msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT; - - for (;;) { - if (msg->header.message_type == HV_MESSAGE_TYPE_NONE) - break; /* no message */ - - hdr = (hv_vmbus_channel_msg_header *)msg->u.payload; - msg_type = hdr->message_type; - - if (msg_type >= HV_CHANNEL_MESSAGE_COUNT) { - printf("VMBUS: unknown message type = %d\n", msg_type); - goto handled; - } - - entry = &g_channel_message_table[msg_type]; - - if (entry->messageHandler) - entry->messageHandler(hdr); -handled: - msg->header.message_type = HV_MESSAGE_TYPE_NONE; - - /* - * Make sure the write to message_type (ie set to - * HV_MESSAGE_TYPE_NONE) happens before we read the - * message_pending and EOMing. Otherwise, the EOMing will - * not deliver any more messages - * since there is no empty slot - * - * NOTE: - * mb() is used here, since atomic_thread_fence_seq_cst() - * will become compiler fence on UP kernel. - */ - mb(); - - if (msg->header.message_flags.u.message_pending) { - /* - * This will cause message queue rescan to possibly - * deliver another msg from the hypervisor - */ - wrmsr(HV_X64_MSR_EOM, 0); - } - } -} - -/** - * @brief Interrupt filter routine for VMBUS. - * - * The purpose of this routine is to determine the type of VMBUS protocol - * message to process - an event or a channel message. - */ -static inline int -hv_vmbus_isr(struct trapframe *frame) -{ - int cpu; - hv_vmbus_message* msg; - void* page_addr; - - cpu = PCPU_GET(cpuid); - - /* - * The Windows team has advised that we check for events - * before checking for messages. This is the way they do it - * in Windows when running as a guest in Hyper-V - */ - - hv_vmbus_on_events(cpu); - - /* Check if there are actual msgs to be process */ - page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu]; - msg = (hv_vmbus_message*) page_addr + HV_VMBUS_TIMER_SINT; - - /* we call eventtimer process the message */ - if (msg->header.message_type == HV_MESSAGE_TIMER_EXPIRED) { - msg->header.message_type = HV_MESSAGE_TYPE_NONE; - - /* call intrrupt handler of event timer */ - hv_et_intr(frame); - - /* - * Make sure the write to message_type (ie set to - * HV_MESSAGE_TYPE_NONE) happens before we read the - * message_pending and EOMing. Otherwise, the EOMing will - * not deliver any more messages - * since there is no empty slot - * - * NOTE: - * mb() is used here, since atomic_thread_fence_seq_cst() - * will become compiler fence on UP kernel. - */ - mb(); - - if (msg->header.message_flags.u.message_pending) { - /* - * This will cause message queue rescan to possibly - * deliver another msg from the hypervisor - */ - wrmsr(HV_X64_MSR_EOM, 0); - } - } - - msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT; - if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) { - taskqueue_enqueue(hv_vmbus_g_context.hv_msg_tq[cpu], - &hv_vmbus_g_context.hv_msg_task[cpu]); - } - - return (FILTER_HANDLED); -} - -u_long *hv_vmbus_intr_cpu[MAXCPU]; - -void -hv_vector_handler(struct trapframe *trap_frame) -{ - int cpu; - - /* - * Disable preemption. - */ - critical_enter(); - - /* - * Do a little interrupt counting. - */ - cpu = PCPU_GET(cpuid); - (*hv_vmbus_intr_cpu[cpu])++; - - hv_vmbus_isr(trap_frame); - - /* - * Enable preemption. - */ - critical_exit(); -} - -static int -vmbus_read_ivar( - device_t dev, - device_t child, - int index, - uintptr_t* result) -{ - struct hv_device *child_dev_ctx = device_get_ivars(child); - - switch (index) { - - case HV_VMBUS_IVAR_TYPE: - *result = (uintptr_t) &child_dev_ctx->class_id; - return (0); - case HV_VMBUS_IVAR_INSTANCE: - *result = (uintptr_t) &child_dev_ctx->device_id; - return (0); - case HV_VMBUS_IVAR_DEVCTX: - *result = (uintptr_t) child_dev_ctx; - return (0); - case HV_VMBUS_IVAR_NODE: - *result = (uintptr_t) child_dev_ctx->device; - return (0); - } - return (ENOENT); -} - -static int -vmbus_write_ivar( - device_t dev, - device_t child, - int index, - uintptr_t value) -{ - switch (index) { - - case HV_VMBUS_IVAR_TYPE: - case HV_VMBUS_IVAR_INSTANCE: - case HV_VMBUS_IVAR_DEVCTX: - case HV_VMBUS_IVAR_NODE: - /* read-only */ - return (EINVAL); - } - return (ENOENT); -} - -static int -vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen) -{ - char guidbuf[40]; - struct hv_device *dev_ctx = device_get_ivars(child); - - if (dev_ctx == NULL) - return (0); - - strlcat(buf, "classid=", buflen); - snprintf_hv_guid(guidbuf, sizeof(guidbuf), &dev_ctx->class_id); - strlcat(buf, guidbuf, buflen); - - strlcat(buf, " deviceid=", buflen); - snprintf_hv_guid(guidbuf, sizeof(guidbuf), &dev_ctx->device_id); - strlcat(buf, guidbuf, buflen); - - return (0); -} - -struct hv_device* -hv_vmbus_child_device_create( - hv_guid type, - hv_guid instance, - hv_vmbus_channel* channel) -{ - hv_device* child_dev; - - /* - * Allocate the new child device - */ - child_dev = malloc(sizeof(hv_device), M_DEVBUF, - M_WAITOK | M_ZERO); - - child_dev->channel = channel; - memcpy(&child_dev->class_id, &type, sizeof(hv_guid)); - memcpy(&child_dev->device_id, &instance, sizeof(hv_guid)); - - return (child_dev); -} - -int -snprintf_hv_guid(char *buf, size_t sz, const hv_guid *guid) -{ - int cnt; - const unsigned char *d = guid->data; - - cnt = snprintf(buf, sz, - "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", - d[3], d[2], d[1], d[0], d[5], d[4], d[7], d[6], - d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15]); - return (cnt); -} - -int -hv_vmbus_child_device_register(struct hv_device *child_dev) -{ - device_t child; - - if (bootverbose) { - char name[40]; - snprintf_hv_guid(name, sizeof(name), &child_dev->class_id); - printf("VMBUS: Class ID: %s\n", name); - } - - child = device_add_child(vmbus_devp, NULL, -1); - child_dev->device = child; - device_set_ivars(child, child_dev); - - return (0); -} - -int -hv_vmbus_child_device_unregister(struct hv_device *child_dev) -{ - int ret = 0; - /* - * XXXKYS: Ensure that this is the opposite of - * device_add_child() - */ - mtx_lock(&Giant); - ret = device_delete_child(vmbus_devp, child_dev->device); - mtx_unlock(&Giant); - return(ret); -} - -static int -vmbus_probe(device_t dev) { - if (ACPI_ID_PROBE(device_get_parent(dev), dev, vmbus_ids) == NULL || - device_get_unit(dev) != 0) - return (ENXIO); - - device_set_desc(dev, "Vmbus Devices"); - - return (BUS_PROBE_DEFAULT); -} - -extern inthand_t IDTVEC(rsvd), IDTVEC(hv_vmbus_callback); - -/** - * @brief Find a free IDT slot and setup the interrupt handler. - */ -static int -vmbus_vector_alloc(void) -{ - int vector; - uintptr_t func; - struct gate_descriptor *ip; - - /* - * Search backwards form the highest IDT vector available for use - * as vmbus channel callback vector. We install 'hv_vmbus_callback' - * handler at that vector and use it to interrupt vcpus. - */ - vector = APIC_SPURIOUS_INT; - while (--vector >= APIC_IPI_INTS) { - ip = &idt[vector]; - func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset); - if (func == (uintptr_t)&IDTVEC(rsvd)) { -#ifdef __i386__ - setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYS386IGT, - SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); -#else - setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYSIGT, - SEL_KPL, 0); -#endif - - return (vector); - } - } - return (0); -} - -/** - * @brief Restore the IDT slot to rsvd. - */ -static void -vmbus_vector_free(int vector) -{ - uintptr_t func; - struct gate_descriptor *ip; - - if (vector == 0) - return; - - KASSERT(vector >= APIC_IPI_INTS && vector < APIC_SPURIOUS_INT, - ("invalid vector %d", vector)); - - ip = &idt[vector]; - func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset); - KASSERT(func == (uintptr_t)&IDTVEC(hv_vmbus_callback), - ("invalid vector %d", vector)); - - setidt(vector, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); -} - -static void -vmbus_cpuset_setthread_task(void *xmask, int pending __unused) -{ - cpuset_t *mask = xmask; - int error; - - error = cpuset_setthread(curthread->td_tid, mask); - if (error) { - panic("curthread=%ju: can't pin; error=%d", - (uintmax_t)curthread->td_tid, error); - } -} - -/** - * @brief Main vmbus driver initialization routine. - * - * Here, we - * - initialize the vmbus driver context - * - setup various driver entry points - * - invoke the vmbus hv main init routine - * - get the irq resource - * - invoke the vmbus to add the vmbus root device - * - setup the vmbus root device - * - retrieve the channel offers - */ -static int -vmbus_bus_init(void) -{ - int i, j, n, ret; - char buf[MAXCOMLEN + 1]; - cpuset_t cpu_mask; - - if (vmbus_inited) - return (0); - - vmbus_inited = 1; - - ret = hv_vmbus_init(); - - if (ret) { - if(bootverbose) - printf("Error VMBUS: Hypervisor Initialization Failed!\n"); - return (ret); - } - - /* - * Find a free IDT slot for vmbus callback. - */ - hv_vmbus_g_context.hv_cb_vector = vmbus_vector_alloc(); - - if (hv_vmbus_g_context.hv_cb_vector == 0) { - if(bootverbose) - printf("Error VMBUS: Cannot find free IDT slot for " - "vmbus callback!\n"); - goto cleanup; - } - - if(bootverbose) - printf("VMBUS: vmbus callback vector %d\n", - hv_vmbus_g_context.hv_cb_vector); - - /* - * Notify the hypervisor of our vector. - */ - setup_args.vector = hv_vmbus_g_context.hv_cb_vector; - - CPU_FOREACH(j) { - snprintf(buf, sizeof(buf), "cpu%d:hyperv", j); - intrcnt_add(buf, &hv_vmbus_intr_cpu[j]); - - for (i = 0; i < 2; i++) - setup_args.page_buffers[2 * j + i] = NULL; - } - - /* - * Per cpu setup. - */ - CPU_FOREACH(j) { - struct task cpuset_task; - - /* - * Setup taskqueue to handle events - */ - hv_vmbus_g_context.hv_event_queue[j] = taskqueue_create_fast("hyperv event", M_WAITOK, - taskqueue_thread_enqueue, &hv_vmbus_g_context.hv_event_queue[j]); - taskqueue_start_threads(&hv_vmbus_g_context.hv_event_queue[j], 1, PI_NET, - "hvevent%d", j); - - CPU_SETOF(j, &cpu_mask); - TASK_INIT(&cpuset_task, 0, vmbus_cpuset_setthread_task, &cpu_mask); - taskqueue_enqueue(hv_vmbus_g_context.hv_event_queue[j], &cpuset_task); - taskqueue_drain(hv_vmbus_g_context.hv_event_queue[j], &cpuset_task); - - /* - * Setup per-cpu tasks and taskqueues to handle msg. - */ - hv_vmbus_g_context.hv_msg_tq[j] = taskqueue_create_fast( - "hyperv msg", M_WAITOK, taskqueue_thread_enqueue, - &hv_vmbus_g_context.hv_msg_tq[j]); - taskqueue_start_threads(&hv_vmbus_g_context.hv_msg_tq[j], 1, PI_NET, - "hvmsg%d", j); - TASK_INIT(&hv_vmbus_g_context.hv_msg_task[j], 0, - vmbus_msg_swintr, (void *)(long)j); - - CPU_SETOF(j, &cpu_mask); - TASK_INIT(&cpuset_task, 0, vmbus_cpuset_setthread_task, &cpu_mask); - taskqueue_enqueue(hv_vmbus_g_context.hv_msg_tq[j], &cpuset_task); - taskqueue_drain(hv_vmbus_g_context.hv_msg_tq[j], &cpuset_task); - - /* - * Prepare the per cpu msg and event pages to be called on each cpu. - */ - for(i = 0; i < 2; i++) { - setup_args.page_buffers[2 * j + i] = - malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO); - } - } - - if (bootverbose) - printf("VMBUS: Calling smp_rendezvous, smp_started = %d\n", - smp_started); - - smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &setup_args); - - /* - * Connect to VMBus in the root partition - */ - ret = hv_vmbus_connect(); - - if (ret != 0) - goto cleanup1; - - hv_vmbus_request_channel_offers(); - - vmbus_scan(); - bus_generic_attach(vmbus_devp); - device_printf(vmbus_devp, "device scan, probe and attach done\n"); - - return (ret); - - cleanup1: - /* - * Free pages alloc'ed - */ - for (n = 0; n < 2 * MAXCPU; n++) - if (setup_args.page_buffers[n] != NULL) - free(setup_args.page_buffers[n], M_DEVBUF); - - /* - * remove swi and vmbus callback vector; - */ - CPU_FOREACH(j) { - if (hv_vmbus_g_context.hv_event_queue[j] != NULL) { - taskqueue_free(hv_vmbus_g_context.hv_event_queue[j]); - hv_vmbus_g_context.hv_event_queue[j] = NULL; - } - } - - vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector); - - cleanup: - hv_vmbus_cleanup(); - - return (ret); -} - -static int -vmbus_attach(device_t dev) -{ - if(bootverbose) - device_printf(dev, "VMBUS: attach dev: %p\n", dev); - vmbus_devp = dev; - - /* - * If the system has already booted and thread - * scheduling is possible indicated by the global - * cold set to zero, we just call the driver - * initialization directly. - */ - if (!cold) - vmbus_bus_init(); - - bus_generic_probe(dev); - return (0); -} - -static void -vmbus_init(void) -{ - if (vm_guest != VM_GUEST_HV) - return; - - /* - * If the system has already booted and thread - * scheduling is possible, as indicated by the - * global cold set to zero, we just call the driver - * initialization directly. - */ - if (!cold) - vmbus_bus_init(); -} - -static void -vmbus_bus_exit(void) -{ - int i; - - hv_vmbus_release_unattached_channels(); - hv_vmbus_disconnect(); - - smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL); - - for(i = 0; i < 2 * MAXCPU; i++) { - if (setup_args.page_buffers[i] != NULL) - free(setup_args.page_buffers[i], M_DEVBUF); - } - - hv_vmbus_cleanup(); - - /* remove swi */ - CPU_FOREACH(i) { - if (hv_vmbus_g_context.hv_event_queue[i] != NULL) { - taskqueue_free(hv_vmbus_g_context.hv_event_queue[i]); - hv_vmbus_g_context.hv_event_queue[i] = NULL; - } - } - - vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector); - - return; -} - -static void -vmbus_exit(void) -{ - vmbus_bus_exit(); -} - -static int -vmbus_detach(device_t dev) -{ - vmbus_exit(); - return (0); -} - -static void -vmbus_mod_load(void) -{ - if(bootverbose) - printf("VMBUS: load\n"); -} - -static void -vmbus_mod_unload(void) -{ - if(bootverbose) - printf("VMBUS: unload\n"); -} - -static int -vmbus_modevent(module_t mod, int what, void *arg) -{ - switch (what) { - - case MOD_LOAD: - vmbus_mod_load(); - break; - case MOD_UNLOAD: - vmbus_mod_unload(); - break; - } - - return (0); -} - -static device_method_t vmbus_methods[] = { - /** Device interface */ - DEVMETHOD(device_probe, vmbus_probe), - DEVMETHOD(device_attach, vmbus_attach), - DEVMETHOD(device_detach, vmbus_detach), - DEVMETHOD(device_shutdown, bus_generic_shutdown), - DEVMETHOD(device_suspend, bus_generic_suspend), - DEVMETHOD(device_resume, bus_generic_resume), - - /** Bus interface */ - DEVMETHOD(bus_add_child, bus_generic_add_child), - DEVMETHOD(bus_print_child, bus_generic_print_child), - DEVMETHOD(bus_read_ivar, vmbus_read_ivar), - DEVMETHOD(bus_write_ivar, vmbus_write_ivar), - DEVMETHOD(bus_child_pnpinfo_str, vmbus_child_pnpinfo_str), - - { 0, 0 } }; - -static char driver_name[] = "vmbus"; -static driver_t vmbus_driver = { driver_name, vmbus_methods,0, }; - - -devclass_t vmbus_devclass; - -DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, vmbus_modevent, 0); -MODULE_DEPEND(vmbus, acpi, 1, 1, 1); -MODULE_VERSION(vmbus, 1); - -/* We want to be started after SMP is initialized */ -SYSINIT(vmb_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, vmbus_init, NULL); - diff --git a/sys/dev/hyperv/vmbus/hv_vmbus_priv.h b/sys/dev/hyperv/vmbus/hv_vmbus_priv.h index f83102a..0008226 100644 --- a/sys/dev/hyperv/vmbus/hv_vmbus_priv.h +++ b/sys/dev/hyperv/vmbus/hv_vmbus_priv.h @@ -51,20 +51,6 @@ typedef uint16_t hv_vmbus_status; #define HV_ANY_VP (0xFFFFFFFF) /* - * Synthetic interrupt controller flag constants. - */ - -#define HV_EVENT_FLAGS_COUNT (256 * 8) -#define HV_EVENT_FLAGS_BYTE_COUNT (256) -#define HV_EVENT_FLAGS_DWORD_COUNT (256 / sizeof(uint32_t)) - -/** - * max channel count <== event_flags_dword_count * bit_of_dword - */ -#define HV_CHANNEL_DWORD_LEN (32) -#define HV_CHANNEL_MAX_COUNT \ - ((HV_EVENT_FLAGS_DWORD_COUNT) * HV_CHANNEL_DWORD_LEN) -/* * MessageId: HV_STATUS_INSUFFICIENT_BUFFERS * MessageText: * You did not supply enough message buffers to send a message. @@ -181,39 +167,12 @@ enum { HV_VMBUS_EVENT_PORT_ID = 2, HV_VMBUS_MONITOR_CONNECTION_ID = 3, HV_VMBUS_MONITOR_PORT_ID = 3, - HV_VMBUS_MESSAGE_SINT = 2, - HV_VMBUS_TIMER_SINT = 4, }; #define HV_PRESENT_BIT 0x80000000 #define HV_HYPERCALL_PARAM_ALIGN sizeof(uint64_t) -typedef struct { - uint64_t guest_id; - void* hypercall_page; - hv_bool_uint8_t syn_ic_initialized; - - hv_vmbus_handle syn_ic_msg_page[MAXCPU]; - hv_vmbus_handle syn_ic_event_page[MAXCPU]; - /* - * For FreeBSD cpuid to Hyper-V vcpuid mapping. - */ - uint32_t hv_vcpu_index[MAXCPU]; - /* - * Each cpu has its own software interrupt handler for channel - * event and msg handling. - */ - struct taskqueue *hv_event_queue[MAXCPU]; - struct taskqueue *hv_msg_tq[MAXCPU]; - struct task hv_msg_task[MAXCPU]; - /* - * Host use this vector to intrrupt guest for vmbus channel - * event and msg. - */ - unsigned int hv_cb_vector; -} hv_vmbus_context; - /* * Define hypervisor message types */ @@ -267,59 +226,9 @@ typedef union _hv_vmbus_port_id { } u ; } hv_vmbus_port_id; -/* - * Define synthetic interrupt controller message flag - */ -typedef union { - uint8_t as_uint8_t; - struct { - uint8_t message_pending:1; - uint8_t reserved:7; - } u; -} hv_vmbus_msg_flags; - typedef uint64_t hv_vmbus_partition_id; /* - * Define synthetic interrupt controller message header - */ -typedef struct { - hv_vmbus_msg_type message_type; - uint8_t payload_size; - hv_vmbus_msg_flags message_flags; - uint8_t reserved[2]; - union { - hv_vmbus_partition_id sender; - hv_vmbus_port_id port; - } u; -} hv_vmbus_msg_header; - -/* - * Define synthetic interrupt controller message format - */ -typedef struct { - hv_vmbus_msg_header header; - union { - uint64_t payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; - } u ; -} hv_vmbus_message; - -/* - * Maximum channels is determined by the size of the interrupt - * page which is PAGE_SIZE. 1/2 of PAGE_SIZE is for - * send endpoint interrupt and the other is receive - * endpoint interrupt. - * - * Note: (PAGE_SIZE >> 1) << 3 allocates 16348 channels - */ -#define HV_MAX_NUM_CHANNELS (PAGE_SIZE >> 1) << 3 - -/* - * (The value here must be in multiple of 32) - */ -#define HV_MAX_NUM_CHANNELS_SUPPORTED 256 - -/* * VM Bus connection states */ typedef enum { @@ -335,25 +244,7 @@ typedef enum { typedef struct { hv_vmbus_connect_state connect_state; uint32_t next_gpadl_handle; - /** - * Represents channel interrupts. Each bit position - * represents a channel. - * When a channel sends an interrupt via VMBUS, it - * finds its bit in the send_interrupt_page, set it and - * calls Hv to generate a port event. The other end - * receives the port event and parse the - * recv_interrupt_page to see which bit is set - */ - void *interrupt_page; - void *send_interrupt_page; - void *recv_interrupt_page; - /* - * 2 pages - 1st page for parent->child - * notification and 2nd is child->parent - * notification - */ - void *monitor_page_1; - void *monitor_page_2; + TAILQ_HEAD(, hv_vmbus_channel_msg_info) channel_msg_anchor; struct mtx channel_msg_lock; /** @@ -370,32 +261,6 @@ typedef struct { } hv_vmbus_connection; typedef union { - uint64_t as_uint64_t; - struct { - uint64_t build_number : 16; - uint64_t service_version : 8; /* Service Pack, etc. */ - uint64_t minor_version : 8; - uint64_t major_version : 8; - /* - * HV_GUEST_OS_MICROSOFT_IDS (If Vendor=MS) - * HV_GUEST_OS_VENDOR - */ - uint64_t os_id : 8; - uint64_t vendor_id : 16; - } u; -} hv_vmbus_x64_msr_guest_os_id_contents; - - -typedef union { - uint64_t as_uint64_t; - struct { - uint64_t enable :1; - uint64_t reserved :11; - uint64_t guest_physical_address :52; - } u; -} hv_vmbus_x64_msr_hypercall_contents; - -typedef union { uint32_t as_uint32_t; struct { uint32_t group_enable :4; @@ -457,108 +322,6 @@ typedef struct { } hv_vmbus_monitor_page; /* - * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent - * is set by CPUID(HV_CPU_ID_FUNCTION_VERSION_AND_FEATURES). - */ -typedef enum { - HV_CPU_ID_FUNCTION_VERSION_AND_FEATURES = 0x00000001, - HV_CPU_ID_FUNCTION_HV_VENDOR_AND_MAX_FUNCTION = 0x40000000, - HV_CPU_ID_FUNCTION_HV_INTERFACE = 0x40000001, - /* - * The remaining functions depend on the value - * of hv_cpu_id_function_interface - */ - HV_CPU_ID_FUNCTION_MS_HV_VERSION = 0x40000002, - HV_CPU_ID_FUNCTION_MS_HV_FEATURES = 0x40000003, - HV_CPU_ID_FUNCTION_MS_HV_ENLIGHTENMENT_INFORMATION = 0x40000004, - HV_CPU_ID_FUNCTION_MS_HV_IMPLEMENTATION_LIMITS = 0x40000005, - HV_CPU_ID_FUNCTION_MS_HV_HARDWARE_FEATURE = 0x40000006 -} hv_vmbus_cpuid_function; - -#define HV_FEATURE_MSR_TIME_REFCNT 0x0002 /* MSR_TIME_REF_COUNT */ -#define HV_FEATURE_MSR_SYNIC 0x0004 /* MSRs for SynIC */ -#define HV_FEATURE_MSR_SYNTIMER 0x0008 /* MSRs for SynTimer */ -#define HV_FEATURE_MSR_APIC 0x0010 /* MSR_{EOI,ICR,TPR} */ -#define HV_FEATURE_MSR_HYPERCALL 0x0020 /* MSR_{GUEST_OS_ID,HYPERCALL} */ -#define HV_FEATURE_MSR_GUEST_IDLE 0x0400 /* MSR_GUEST_IDLE */ - -#define HV_PM_FEATURE_CSTATE_MASK 0x000f -#define HV_PM_FEATURE_C3_HPET 0x0010 /* C3 requires HPET */ -#define HV_PM_FEATURE_CSTATE(f) ((f) & HV_PM_FEATURE_CSTATE_MASK) - -#define HV_FEATURE3_MWAIT 0x0001 /* MWAIT */ -#define HV_FEATURE3_XMM_HYPERCALL 0x0010 /* hypercall input through XMM regs */ -#define HV_FEATURE3_GUEST_IDLE 0x0020 /* guest idle support */ -#define HV_FEATURE3_NUMA 0x0080 /* NUMA distance query support */ -#define HV_FEATURE3_TIME_FREQ 0x0100 /* timer frequency query (TSC, LAPIC) */ -#define HV_FEATURE3_MSR_CRASH 0x0400 /* MSRs for guest crash */ - -/* - * Define the format of the SIMP register - */ -typedef union { - uint64_t as_uint64_t; - struct { - uint64_t simp_enabled : 1; - uint64_t preserved : 11; - uint64_t base_simp_gpa : 52; - } u; -} hv_vmbus_synic_simp; - -/* - * Define the format of the SIEFP register - */ -typedef union { - uint64_t as_uint64_t; - struct { - uint64_t siefp_enabled : 1; - uint64_t preserved : 11; - uint64_t base_siefp_gpa : 52; - } u; -} hv_vmbus_synic_siefp; - -/* - * Define synthetic interrupt source - */ -typedef union { - uint64_t as_uint64_t; - struct { - uint64_t vector : 8; - uint64_t reserved1 : 8; - uint64_t masked : 1; - uint64_t auto_eoi : 1; - uint64_t reserved2 : 46; - } u; -} hv_vmbus_synic_sint; - -/* - * Timer configuration register. - */ -union hv_timer_config { - uint64_t as_uint64; - struct { - uint64_t enable:1; - uint64_t periodic:1; - uint64_t lazy:1; - uint64_t auto_enable:1; - uint64_t reserved_z0:12; - uint64_t sintx:4; - uint64_t reserved_z1:44; - }; -}; - -/* - * Define syn_ic control register - */ -typedef union _hv_vmbus_synic_scontrol { - uint64_t as_uint64_t; - struct { - uint64_t enable : 1; - uint64_t reserved : 63; - } u; -} hv_vmbus_synic_scontrol; - -/* * Define the hv_vmbus_post_message hypercall input structure */ typedef struct { @@ -570,68 +333,6 @@ typedef struct { } hv_vmbus_input_post_message; /* - * Define the synthetic interrupt controller event flags format - */ -typedef union { - uint8_t flags8[HV_EVENT_FLAGS_BYTE_COUNT]; - uint32_t flags32[HV_EVENT_FLAGS_DWORD_COUNT]; -} hv_vmbus_synic_event_flags; - -#define HV_X64_CPUID_MIN (0x40000005) -#define HV_X64_CPUID_MAX (0x4000ffff) - -/* - * Declare the MSR used to identify the guest OS - */ -#define HV_X64_MSR_GUEST_OS_ID (0x40000000) -/* - * Declare the MSR used to setup pages used to communicate with the hypervisor - */ -#define HV_X64_MSR_HYPERCALL (0x40000001) -/* MSR used to provide vcpu index */ -#define HV_X64_MSR_VP_INDEX (0x40000002) - -#define HV_X64_MSR_TIME_REF_COUNT (0x40000020) - -/* - * Define synthetic interrupt controller model specific registers - */ -#define HV_X64_MSR_SCONTROL (0x40000080) -#define HV_X64_MSR_SVERSION (0x40000081) -#define HV_X64_MSR_SIEFP (0x40000082) -#define HV_X64_MSR_SIMP (0x40000083) -#define HV_X64_MSR_EOM (0x40000084) - -#define HV_X64_MSR_SINT0 (0x40000090) -#define HV_X64_MSR_SINT1 (0x40000091) -#define HV_X64_MSR_SINT2 (0x40000092) -#define HV_X64_MSR_SINT3 (0x40000093) -#define HV_X64_MSR_SINT4 (0x40000094) -#define HV_X64_MSR_SINT5 (0x40000095) -#define HV_X64_MSR_SINT6 (0x40000096) -#define HV_X64_MSR_SINT7 (0x40000097) -#define HV_X64_MSR_SINT8 (0x40000098) -#define HV_X64_MSR_SINT9 (0x40000099) -#define HV_X64_MSR_SINT10 (0x4000009A) -#define HV_X64_MSR_SINT11 (0x4000009B) -#define HV_X64_MSR_SINT12 (0x4000009C) -#define HV_X64_MSR_SINT13 (0x4000009D) -#define HV_X64_MSR_SINT14 (0x4000009E) -#define HV_X64_MSR_SINT15 (0x4000009F) - -/* - * Synthetic Timer MSRs. Four timers per vcpu. - */ -#define HV_X64_MSR_STIMER0_CONFIG 0x400000B0 -#define HV_X64_MSR_STIMER0_COUNT 0x400000B1 -#define HV_X64_MSR_STIMER1_CONFIG 0x400000B2 -#define HV_X64_MSR_STIMER1_COUNT 0x400000B3 -#define HV_X64_MSR_STIMER2_CONFIG 0x400000B4 -#define HV_X64_MSR_STIMER2_COUNT 0x400000B5 -#define HV_X64_MSR_STIMER3_CONFIG 0x400000B6 -#define HV_X64_MSR_STIMER3_COUNT 0x400000B7 - -/* * Declare the various hypercall operations */ typedef enum { @@ -643,22 +344,8 @@ typedef enum { * Global variables */ -extern hv_vmbus_context hv_vmbus_g_context; extern hv_vmbus_connection hv_vmbus_g_connection; -extern u_int hyperv_features; -extern u_int hyperv_recommends; - -typedef void (*vmbus_msg_handler)(hv_vmbus_channel_msg_header *msg); - -typedef struct hv_vmbus_channel_msg_table_entry { - hv_vmbus_channel_msg_type messageType; - - vmbus_msg_handler messageHandler; -} hv_vmbus_channel_msg_table_entry; - -extern hv_vmbus_channel_msg_table_entry g_channel_message_table[]; - /* * Private, VM Bus functions */ @@ -713,8 +400,6 @@ hv_vmbus_channel* hv_vmbus_allocate_channel(void); void hv_vmbus_free_vmbus_channel(hv_vmbus_channel *channel); int hv_vmbus_request_channel_offers(void); void hv_vmbus_release_unattached_channels(void); -int hv_vmbus_init(void); -void hv_vmbus_cleanup(void); uint16_t hv_vmbus_post_msg_via_msg_ipc( hv_vmbus_connection_id connection_id, @@ -723,8 +408,6 @@ uint16_t hv_vmbus_post_msg_via_msg_ipc( size_t payload_size); uint16_t hv_vmbus_signal_event(void *con_id); -void hv_vmbus_synic_init(void *irq_arg); -void hv_vmbus_synic_cleanup(void *arg); struct hv_device* hv_vmbus_child_device_create( hv_guid device_type, @@ -739,62 +422,13 @@ int hv_vmbus_child_device_unregister( /** * Connection interfaces */ -int hv_vmbus_connect(void); +struct vmbus_softc; +int hv_vmbus_connect(struct vmbus_softc *); int hv_vmbus_disconnect(void); int hv_vmbus_post_message(void *buffer, size_t buf_size); int hv_vmbus_set_event(hv_vmbus_channel *channel); -void hv_vmbus_on_events(int cpu); - -/** - * Event Timer interfaces - */ -void hv_et_init(void); -void hv_et_intr(struct trapframe*); /* Wait for device creation */ void vmbus_scan(void); -/* - * The guest OS needs to register the guest ID with the hypervisor. - * The guest ID is a 64 bit entity and the structure of this ID is - * specified in the Hyper-V specification: - * - * http://msdn.microsoft.com/en-us/library/windows/ - * hardware/ff542653%28v=vs.85%29.aspx - * - * While the current guideline does not specify how FreeBSD guest ID(s) - * need to be generated, our plan is to publish the guidelines for - * FreeBSD and other guest operating systems that currently are hosted - * on Hyper-V. The implementation here conforms to this yet - * unpublished guidelines. - * - * Bit(s) - * 63 - Indicates if the OS is Open Source or not; 1 is Open Source - * 62:56 - Os Type; Linux is 0x100, FreeBSD is 0x200 - * 55:48 - Distro specific identification - * 47:16 - FreeBSD kernel version number - * 15:0 - Distro specific identification - * - */ - -#define HV_FREEBSD_VENDOR_ID 0x8200 -#define HV_FREEBSD_GUEST_ID hv_generate_guest_id(0,0) - -static inline uint64_t hv_generate_guest_id( - uint8_t distro_id_part1, - uint16_t distro_id_part2) -{ - uint64_t guest_id; - guest_id = (((uint64_t)HV_FREEBSD_VENDOR_ID) << 48); - guest_id |= (((uint64_t)(distro_id_part1)) << 48); - guest_id |= (((uint64_t)(__FreeBSD_version)) << 16); /* in param.h */ - guest_id |= ((uint64_t)(distro_id_part2)); - return guest_id; -} - -typedef struct { - unsigned int vector; - void *page_buffers[2 * MAXCPU]; -} hv_setup_args; - #endif /* __HYPERV_PRIV_H__ */ diff --git a/sys/dev/hyperv/vmbus/hyperv.c b/sys/dev/hyperv/vmbus/hyperv.c new file mode 100644 index 0000000..c49f966 --- /dev/null +++ b/sys/dev/hyperv/vmbus/hyperv.c @@ -0,0 +1,382 @@ +/*- + * Copyright (c) 2009-2012,2016 Microsoft Corp. + * Copyright (c) 2012 NetApp Inc. + * Copyright (c) 2012 Citrix Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Implements low-level interactions with Hypver-V/Azure + */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/pcpu.h> +#include <sys/timetc.h> +#include <machine/bus.h> +#include <machine/md_var.h> +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/pmap.h> + +#include <dev/hyperv/include/hyperv_busdma.h> +#include <dev/hyperv/vmbus/hv_vmbus_priv.h> +#include <dev/hyperv/vmbus/hyperv_machdep.h> +#include <dev/hyperv/vmbus/hyperv_reg.h> +#include <dev/hyperv/vmbus/hyperv_var.h> +#include <dev/hyperv/vmbus/vmbus_var.h> + +#define HYPERV_FREEBSD_BUILD 0ULL +#define HYPERV_FREEBSD_VERSION ((uint64_t)__FreeBSD_version) +#define HYPERV_FREEBSD_OSID 0ULL + +#define MSR_HV_GUESTID_BUILD_FREEBSD \ + (HYPERV_FREEBSD_BUILD & MSR_HV_GUESTID_BUILD_MASK) +#define MSR_HV_GUESTID_VERSION_FREEBSD \ + ((HYPERV_FREEBSD_VERSION << MSR_HV_GUESTID_VERSION_SHIFT) & \ + MSR_HV_GUESTID_VERSION_MASK) +#define MSR_HV_GUESTID_OSID_FREEBSD \ + ((HYPERV_FREEBSD_OSID << MSR_HV_GUESTID_OSID_SHIFT) & \ + MSR_HV_GUESTID_OSID_MASK) + +#define MSR_HV_GUESTID_FREEBSD \ + (MSR_HV_GUESTID_BUILD_FREEBSD | \ + MSR_HV_GUESTID_VERSION_FREEBSD | \ + MSR_HV_GUESTID_OSID_FREEBSD | \ + MSR_HV_GUESTID_OSTYPE_FREEBSD) + +struct hypercall_ctx { + void *hc_addr; + struct hyperv_dma hc_dma; +}; + +static u_int hyperv_get_timecount(struct timecounter *tc); + +u_int hyperv_features; +u_int hyperv_recommends; + +static u_int hyperv_pm_features; +static u_int hyperv_features3; + +static struct timecounter hyperv_timecounter = { + .tc_get_timecount = hyperv_get_timecount, + .tc_poll_pps = NULL, + .tc_counter_mask = 0xffffffff, + .tc_frequency = HYPERV_TIMER_FREQ, + .tc_name = "Hyper-V", + .tc_quality = 2000, + .tc_flags = 0, + .tc_priv = NULL +}; + +static struct hypercall_ctx hypercall_context; + +static u_int +hyperv_get_timecount(struct timecounter *tc __unused) +{ + return rdmsr(MSR_HV_TIME_REF_COUNT); +} + +/** + * @brief Invoke the specified hypercall + */ +static uint64_t +hv_vmbus_do_hypercall(uint64_t value, void *input, void *output) +{ + uint64_t in_paddr = 0, out_paddr = 0; + + if (input != NULL) + in_paddr = hv_get_phys_addr(input); + if (output != NULL) + out_paddr = hv_get_phys_addr(output); + + return hypercall_md(hypercall_context.hc_addr, value, + in_paddr, out_paddr); +} + +/** + * @brief Post a message using the hypervisor message IPC. + * (This involves a hypercall.) + */ +hv_vmbus_status +hv_vmbus_post_msg_via_msg_ipc( + hv_vmbus_connection_id connection_id, + hv_vmbus_msg_type message_type, + void* payload, + size_t payload_size) +{ + struct alignedinput { + uint64_t alignment8; + hv_vmbus_input_post_message msg; + }; + + hv_vmbus_input_post_message* aligned_msg; + hv_vmbus_status status; + size_t addr; + + if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) + return (EMSGSIZE); + + addr = (size_t) malloc(sizeof(struct alignedinput), M_DEVBUF, + M_ZERO | M_NOWAIT); + KASSERT(addr != 0, + ("Error VMBUS: malloc failed to allocate message buffer!")); + if (addr == 0) + return (ENOMEM); + + aligned_msg = (hv_vmbus_input_post_message*) + (HV_ALIGN_UP(addr, HV_HYPERCALL_PARAM_ALIGN)); + + aligned_msg->connection_id = connection_id; + aligned_msg->message_type = message_type; + aligned_msg->payload_size = payload_size; + memcpy((void*) aligned_msg->payload, payload, payload_size); + + status = hv_vmbus_do_hypercall( + HV_CALL_POST_MESSAGE, aligned_msg, 0) & 0xFFFF; + + free((void *) addr, M_DEVBUF); + return (status); +} + +/** + * @brief Signal an event on the specified connection using the hypervisor + * event IPC. (This involves a hypercall.) + */ +hv_vmbus_status +hv_vmbus_signal_event(void *con_id) +{ + hv_vmbus_status status; + + status = hv_vmbus_do_hypercall( + HV_CALL_SIGNAL_EVENT, + con_id, + 0) & 0xFFFF; + + return (status); +} + +int +hyperv_guid2str(const struct hv_guid *guid, char *buf, size_t sz) +{ + const uint8_t *d = guid->data; + + return snprintf(buf, sz, "%02x%02x%02x%02x-" + "%02x%02x-%02x%02x-%02x%02x-" + "%02x%02x%02x%02x%02x%02x", + d[3], d[2], d[1], d[0], + d[5], d[4], d[7], d[6], d[8], d[9], + d[10], d[11], d[12], d[13], d[14], d[15]); +} + +static bool +hyperv_identify(void) +{ + u_int regs[4]; + unsigned int maxleaf; + + if (vm_guest != VM_GUEST_HV) + return (false); + + do_cpuid(CPUID_LEAF_HV_MAXLEAF, regs); + maxleaf = regs[0]; + if (maxleaf < CPUID_LEAF_HV_LIMITS) + return (false); + + do_cpuid(CPUID_LEAF_HV_INTERFACE, regs); + if (regs[0] != CPUID_HV_IFACE_HYPERV) + return (false); + + do_cpuid(CPUID_LEAF_HV_FEATURES, regs); + if ((regs[0] & CPUID_HV_MSR_HYPERCALL) == 0) { + /* + * Hyper-V w/o Hypercall is impossible; someone + * is faking Hyper-V. + */ + return (false); + } + hyperv_features = regs[0]; + hyperv_pm_features = regs[2]; + hyperv_features3 = regs[3]; + + do_cpuid(CPUID_LEAF_HV_IDENTITY, regs); + printf("Hyper-V Version: %d.%d.%d [SP%d]\n", + regs[1] >> 16, regs[1] & 0xffff, regs[0], regs[2]); + + printf(" Features=0x%b\n", hyperv_features, + "\020" + "\001VPRUNTIME" /* MSR_HV_VP_RUNTIME */ + "\002TMREFCNT" /* MSR_HV_TIME_REF_COUNT */ + "\003SYNIC" /* MSRs for SynIC */ + "\004SYNTM" /* MSRs for SynTimer */ + "\005APIC" /* MSR_HV_{EOI,ICR,TPR} */ + "\006HYPERCALL" /* MSR_HV_{GUEST_OS_ID,HYPERCALL} */ + "\007VPINDEX" /* MSR_HV_VP_INDEX */ + "\010RESET" /* MSR_HV_RESET */ + "\011STATS" /* MSR_HV_STATS_ */ + "\012REFTSC" /* MSR_HV_REFERENCE_TSC */ + "\013IDLE" /* MSR_HV_GUEST_IDLE */ + "\014TMFREQ" /* MSR_HV_{TSC,APIC}_FREQUENCY */ + "\015DEBUG"); /* MSR_HV_SYNTH_DEBUG_ */ + printf(" PM Features=0x%b [C%u]\n", + (hyperv_pm_features & ~CPUPM_HV_CSTATE_MASK), + "\020" + "\005C3HPET", /* HPET is required for C3 state */ + CPUPM_HV_CSTATE(hyperv_pm_features)); + printf(" Features3=0x%b\n", hyperv_features3, + "\020" + "\001MWAIT" /* MWAIT */ + "\002DEBUG" /* guest debug support */ + "\003PERFMON" /* performance monitor */ + "\004PCPUDPE" /* physical CPU dynamic partition event */ + "\005XMMHC" /* hypercall input through XMM regs */ + "\006IDLE" /* guest idle support */ + "\007SLEEP" /* hypervisor sleep support */ + "\010NUMA" /* NUMA distance query support */ + "\011TMFREQ" /* timer frequency query (TSC, LAPIC) */ + "\012SYNCMC" /* inject synthetic machine checks */ + "\013CRASH" /* MSRs for guest crash */ + "\014DEBUGMSR" /* MSRs for guest debug */ + "\015NPIEP" /* NPIEP */ + "\016HVDIS"); /* disabling hypervisor */ + + do_cpuid(CPUID_LEAF_HV_RECOMMENDS, regs); + hyperv_recommends = regs[0]; + if (bootverbose) + printf(" Recommends: %08x %08x\n", regs[0], regs[1]); + + do_cpuid(CPUID_LEAF_HV_LIMITS, regs); + if (bootverbose) { + printf(" Limits: Vcpu:%d Lcpu:%d Int:%d\n", + regs[0], regs[1], regs[2]); + } + + if (maxleaf >= CPUID_LEAF_HV_HWFEATURES) { + do_cpuid(CPUID_LEAF_HV_HWFEATURES, regs); + if (bootverbose) { + printf(" HW Features: %08x, AMD: %08x\n", + regs[0], regs[3]); + } + } + + return (true); +} + +static void +hyperv_init(void *dummy __unused) +{ + if (!hyperv_identify()) { + /* Not Hyper-V; reset guest id to the generic one. */ + if (vm_guest == VM_GUEST_HV) + vm_guest = VM_GUEST_VM; + return; + } + + /* Set guest id */ + wrmsr(MSR_HV_GUEST_OS_ID, MSR_HV_GUESTID_FREEBSD); + + if (hyperv_features & CPUID_HV_MSR_TIME_REFCNT) { + /* Register Hyper-V timecounter */ + tc_init(&hyperv_timecounter); + } +} +SYSINIT(hyperv_initialize, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, hyperv_init, + NULL); + +static void +hypercall_memfree(void) +{ + hyperv_dmamem_free(&hypercall_context.hc_dma, + hypercall_context.hc_addr); + hypercall_context.hc_addr = NULL; +} + +static void +hypercall_create(void *arg __unused) +{ + uint64_t hc, hc_orig; + + if (vm_guest != VM_GUEST_HV) + return; + + hypercall_context.hc_addr = hyperv_dmamem_alloc(NULL, PAGE_SIZE, 0, + PAGE_SIZE, &hypercall_context.hc_dma, BUS_DMA_WAITOK); + if (hypercall_context.hc_addr == NULL) { + printf("hyperv: Hypercall page allocation failed\n"); + /* Can't perform any Hyper-V specific actions */ + vm_guest = VM_GUEST_VM; + return; + } + + /* Get the 'reserved' bits, which requires preservation. */ + hc_orig = rdmsr(MSR_HV_HYPERCALL); + + /* + * Setup the Hypercall page. + * + * NOTE: 'reserved' bits MUST be preserved. + */ + hc = ((hypercall_context.hc_dma.hv_paddr >> PAGE_SHIFT) << + MSR_HV_HYPERCALL_PGSHIFT) | + (hc_orig & MSR_HV_HYPERCALL_RSVD_MASK) | + MSR_HV_HYPERCALL_ENABLE; + wrmsr(MSR_HV_HYPERCALL, hc); + + /* + * Confirm that Hypercall page did get setup. + */ + hc = rdmsr(MSR_HV_HYPERCALL); + if ((hc & MSR_HV_HYPERCALL_ENABLE) == 0) { + printf("hyperv: Hypercall setup failed\n"); + hypercall_memfree(); + /* Can't perform any Hyper-V specific actions */ + vm_guest = VM_GUEST_VM; + return; + } + if (bootverbose) + printf("hyperv: Hypercall created\n"); +} +SYSINIT(hypercall_ctor, SI_SUB_DRIVERS, SI_ORDER_FIRST, hypercall_create, NULL); + +static void +hypercall_destroy(void *arg __unused) +{ + uint64_t hc; + + if (hypercall_context.hc_addr == NULL) + return; + + /* Disable Hypercall */ + hc = rdmsr(MSR_HV_HYPERCALL); + wrmsr(MSR_HV_HYPERCALL, (hc & MSR_HV_HYPERCALL_RSVD_MASK)); + hypercall_memfree(); + + if (bootverbose) + printf("hyperv: Hypercall destroyed\n"); +} +SYSUNINIT(hypercall_dtor, SI_SUB_DRIVERS, SI_ORDER_FIRST, hypercall_destroy, + NULL); diff --git a/sys/dev/hyperv/vmbus/hyperv_busdma.c b/sys/dev/hyperv/vmbus/hyperv_busdma.c new file mode 100644 index 0000000..9550540 --- /dev/null +++ b/sys/dev/hyperv/vmbus/hyperv_busdma.c @@ -0,0 +1,98 @@ +/*- + * Copyright (c) 2016 Microsoft Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/bus.h> + +#include <machine/bus.h> + +#include <dev/hyperv/include/hyperv_busdma.h> + +#define HYPERV_DMA_MASK (BUS_DMA_WAITOK | BUS_DMA_NOWAIT | BUS_DMA_ZERO) + +void +hyperv_dma_map_paddr(void *arg, bus_dma_segment_t *segs, int nseg, int error) +{ + bus_addr_t *paddr = arg; + + if (error) + return; + + KASSERT(nseg == 1, ("too many segments %d!", nseg)); + *paddr = segs->ds_addr; +} + +void * +hyperv_dmamem_alloc(bus_dma_tag_t parent_dtag, bus_size_t alignment, + bus_addr_t boundary, bus_size_t size, struct hyperv_dma *dma, int flags) +{ + void *ret; + int error; + + error = bus_dma_tag_create(parent_dtag, /* parent */ + alignment, /* alignment */ + boundary, /* boundary */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + size, /* maxsize */ + 1, /* nsegments */ + size, /* maxsegsize */ + 0, /* flags */ + NULL, /* lockfunc */ + NULL, /* lockfuncarg */ + &dma->hv_dtag); + if (error) + return NULL; + + error = bus_dmamem_alloc(dma->hv_dtag, &ret, + (flags & HYPERV_DMA_MASK) | BUS_DMA_COHERENT, &dma->hv_dmap); + if (error) { + bus_dma_tag_destroy(dma->hv_dtag); + return NULL; + } + + error = bus_dmamap_load(dma->hv_dtag, dma->hv_dmap, ret, size, + hyperv_dma_map_paddr, &dma->hv_paddr, BUS_DMA_NOWAIT); + if (error) { + bus_dmamem_free(dma->hv_dtag, ret, dma->hv_dmap); + bus_dma_tag_destroy(dma->hv_dtag); + return NULL; + } + return ret; +} + +void +hyperv_dmamem_free(struct hyperv_dma *dma, void *ptr) +{ + bus_dmamap_unload(dma->hv_dtag, dma->hv_dmap); + bus_dmamem_free(dma->hv_dtag, ptr, dma->hv_dmap); + bus_dma_tag_destroy(dma->hv_dtag); +} diff --git a/sys/dev/hyperv/vmbus/hyperv_machdep.h b/sys/dev/hyperv/vmbus/hyperv_machdep.h new file mode 100644 index 0000000..48cf5b7 --- /dev/null +++ b/sys/dev/hyperv/vmbus/hyperv_machdep.h @@ -0,0 +1,37 @@ +/*- + * Copyright (c) 2016 Microsoft Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _HYPERV_MACHDEP_H_ +#define _HYPERV_MACHDEP_H_ + +#include <sys/param.h> + +uint64_t hypercall_md(volatile void *hc_addr, uint64_t in_val, + uint64_t in_paddr, uint64_t out_paddr); + +#endif /* !_HYPERV_MACHDEP_H_ */ diff --git a/sys/dev/hyperv/vmbus/hyperv_reg.h b/sys/dev/hyperv/vmbus/hyperv_reg.h new file mode 100644 index 0000000..ae39b95 --- /dev/null +++ b/sys/dev/hyperv/vmbus/hyperv_reg.h @@ -0,0 +1,133 @@ +/*- + * Copyright (c) 2016 Microsoft Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _HYPERV_REG_H_ +#define _HYPERV_REG_H_ + +/* + * Hyper-V Synthetic MSRs + */ + +#define MSR_HV_GUEST_OS_ID 0x40000000 +#define MSR_HV_GUESTID_BUILD_MASK 0xffffULL +#define MSR_HV_GUESTID_VERSION_MASK 0x0000ffffffff0000ULL +#define MSR_HV_GUESTID_VERSION_SHIFT 16 +#define MSR_HV_GUESTID_OSID_MASK 0x00ff000000000000ULL +#define MSR_HV_GUESTID_OSID_SHIFT 48 +#define MSR_HV_GUESTID_OSTYPE_MASK 0x7f00000000000000ULL +#define MSR_HV_GUESTID_OSTYPE_SHIFT 56 +#define MSR_HV_GUESTID_OPENSRC 0x8000000000000000ULL +#define MSR_HV_GUESTID_OSTYPE_LINUX \ + ((0x01ULL << MSR_HV_GUESTID_OSTYPE_SHIFT) | MSR_HV_GUESTID_OPENSRC) +#define MSR_HV_GUESTID_OSTYPE_FREEBSD \ + ((0x02ULL << MSR_HV_GUESTID_OSTYPE_SHIFT) | MSR_HV_GUESTID_OPENSRC) + +#define MSR_HV_HYPERCALL 0x40000001 +#define MSR_HV_HYPERCALL_ENABLE 0x0001ULL +#define MSR_HV_HYPERCALL_RSVD_MASK 0x0ffeULL +#define MSR_HV_HYPERCALL_PGSHIFT 12 + +#define MSR_HV_VP_INDEX 0x40000002 + +#define MSR_HV_TIME_REF_COUNT 0x40000020 + +#define MSR_HV_SCONTROL 0x40000080 +#define MSR_HV_SCTRL_ENABLE 0x0001ULL +#define MSR_HV_SCTRL_RSVD_MASK 0xfffffffffffffffeULL + +#define MSR_HV_SIEFP 0x40000082 +#define MSR_HV_SIEFP_ENABLE 0x0001ULL +#define MSR_HV_SIEFP_RSVD_MASK 0x0ffeULL +#define MSR_HV_SIEFP_PGSHIFT 12 + +#define MSR_HV_SIMP 0x40000083 +#define MSR_HV_SIMP_ENABLE 0x0001ULL +#define MSR_HV_SIMP_RSVD_MASK 0x0ffeULL +#define MSR_HV_SIMP_PGSHIFT 12 + +#define MSR_HV_EOM 0x40000084 + +#define MSR_HV_SINT0 0x40000090 +#define MSR_HV_SINT_VECTOR_MASK 0x00ffULL +#define MSR_HV_SINT_RSVD1_MASK 0xff00ULL +#define MSR_HV_SINT_MASKED 0x00010000ULL +#define MSR_HV_SINT_AUTOEOI 0x00020000ULL +#define MSR_HV_SINT_RSVD2_MASK 0xfffffffffffc0000ULL +#define MSR_HV_SINT_RSVD_MASK (MSR_HV_SINT_RSVD1_MASK | \ + MSR_HV_SINT_RSVD2_MASK) + +#define MSR_HV_STIMER0_CONFIG 0x400000b0 +#define MSR_HV_STIMER_CFG_ENABLE 0x0001ULL +#define MSR_HV_STIMER_CFG_PERIODIC 0x0002ULL +#define MSR_HV_STIMER_CFG_LAZY 0x0004ULL +#define MSR_HV_STIMER_CFG_AUTOEN 0x0008ULL +#define MSR_HV_STIMER_CFG_SINT_MASK 0x000f0000ULL +#define MSR_HV_STIMER_CFG_SINT_SHIFT 16 + +#define MSR_HV_STIMER0_COUNT 0x400000b1 + +/* + * CPUID leaves + */ + +#define CPUID_LEAF_HV_MAXLEAF 0x40000000 + +#define CPUID_LEAF_HV_INTERFACE 0x40000001 +#define CPUID_HV_IFACE_HYPERV 0x31237648 /* HV#1 */ + +#define CPUID_LEAF_HV_IDENTITY 0x40000002 + +#define CPUID_LEAF_HV_FEATURES 0x40000003 +/* EAX: features */ +#define CPUID_HV_MSR_TIME_REFCNT 0x0002 /* MSR_HV_TIME_REF_COUNT */ +#define CPUID_HV_MSR_SYNIC 0x0004 /* MSRs for SynIC */ +#define CPUID_HV_MSR_SYNTIMER 0x0008 /* MSRs for SynTimer */ +#define CPUID_HV_MSR_APIC 0x0010 /* MSR_HV_{EOI,ICR,TPR} */ +#define CPUID_HV_MSR_HYPERCALL 0x0020 /* MSR_HV_GUEST_OS_ID + * MSR_HV_HYPERCALL */ +#define CPUID_HV_MSR_VP_INDEX 0x0040 /* MSR_HV_VP_INDEX */ +#define CPUID_HV_MSR_GUEST_IDLE 0x0400 /* MSR_HV_GUEST_IDLE */ +/* ECX: power management features */ +#define CPUPM_HV_CSTATE_MASK 0x000f /* deepest C-state */ +#define CPUPM_HV_C3_HPET 0x0010 /* C3 requires HPET */ +#define CPUPM_HV_CSTATE(f) ((f) & CPUPM_HV_CSTATE_MASK) +/* EDX: features3 */ +#define CPUID3_HV_MWAIT 0x0001 /* MWAIT */ +#define CPUID3_HV_XMM_HYPERCALL 0x0010 /* Hypercall input through + * XMM regs */ +#define CPUID3_HV_GUEST_IDLE 0x0020 /* guest idle */ +#define CPUID3_HV_NUMA 0x0080 /* NUMA distance query */ +#define CPUID3_HV_TIME_FREQ 0x0100 /* timer frequency query + * (TSC, LAPIC) */ +#define CPUID3_HV_MSR_CRASH 0x0400 /* MSRs for guest crash */ + +#define CPUID_LEAF_HV_RECOMMENDS 0x40000004 +#define CPUID_LEAF_HV_LIMITS 0x40000005 +#define CPUID_LEAF_HV_HWFEATURES 0x40000006 + +#endif /* !_HYPERV_REG_H_ */ diff --git a/sys/dev/hyperv/vmbus/hyperv_var.h b/sys/dev/hyperv/vmbus/hyperv_var.h new file mode 100644 index 0000000..9d7a877 --- /dev/null +++ b/sys/dev/hyperv/vmbus/hyperv_var.h @@ -0,0 +1,41 @@ +/*- + * Copyright (c) 2016 Microsoft Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _HYPERV_VAR_H_ +#define _HYPERV_VAR_H_ + +#ifndef NANOSEC +#define NANOSEC 1000000000ULL +#endif +#define HYPERV_TIMER_NS_FACTOR 100ULL +#define HYPERV_TIMER_FREQ (NANOSEC / HYPERV_TIMER_NS_FACTOR) + +extern u_int hyperv_features; +extern u_int hyperv_recommends; + +#endif /* !_HYPERV_VAR_H_ */ diff --git a/sys/dev/hyperv/vmbus/i386/hyperv_machdep.c b/sys/dev/hyperv/vmbus/i386/hyperv_machdep.c new file mode 100644 index 0000000..b12bff8 --- /dev/null +++ b/sys/dev/hyperv/vmbus/i386/hyperv_machdep.c @@ -0,0 +1,51 @@ +/*- + * Copyright (c) 2016 Microsoft Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <dev/hyperv/vmbus/hyperv_machdep.h> + +uint64_t +hypercall_md(volatile void *hc_addr, uint64_t in_val, + uint64_t in_paddr, uint64_t out_paddr) +{ + uint32_t in_val_hi = in_val >> 32; + uint32_t in_val_lo = in_val & 0xFFFFFFFF; + uint32_t status_hi, status_lo; + uint32_t in_paddr_hi = in_paddr >> 32; + uint32_t in_paddr_lo = in_paddr & 0xFFFFFFFF; + uint32_t out_paddr_hi = out_paddr >> 32; + uint32_t out_paddr_lo = out_paddr & 0xFFFFFFFF; + + __asm__ __volatile__ ("call *%8" : "=d"(status_hi), "=a"(status_lo) : + "d" (in_val_hi), "a" (in_val_lo), + "b" (in_paddr_hi), "c" (in_paddr_lo), + "D"(out_paddr_hi), "S"(out_paddr_lo), + "m" (hc_addr)); + return (status_lo | ((uint64_t)status_hi << 32)); +} diff --git a/sys/dev/hyperv/vmbus/i386/hv_vector.S b/sys/dev/hyperv/vmbus/i386/vmbus_vector.S index 55a2613..b9ea849 100644 --- a/sys/dev/hyperv/vmbus/i386/hv_vector.S +++ b/sys/dev/hyperv/vmbus/i386/vmbus_vector.S @@ -35,15 +35,15 @@ * This is the Hyper-V vmbus channel direct callback interrupt. * Only used when it is running on Hyper-V. */ - .text - SUPERALIGN_TEXT -IDTVEC(hv_vmbus_callback) - PUSH_FRAME - SET_KERNEL_SREGS - cld - FAKE_MCOUNT(TF_EIP(%esp)) - pushl %esp - call hv_vector_handler - add $4, %esp - MEXITCOUNT - jmp doreti + .text + SUPERALIGN_TEXT +IDTVEC(vmbus_isr) + PUSH_FRAME + SET_KERNEL_SREGS + cld + FAKE_MCOUNT(TF_EIP(%esp)) + pushl %esp + call vmbus_handle_intr + add $4, %esp + MEXITCOUNT + jmp doreti diff --git a/sys/dev/hyperv/vmbus/vmbus.c b/sys/dev/hyperv/vmbus/vmbus.c new file mode 100644 index 0000000..4ad96d7 --- /dev/null +++ b/sys/dev/hyperv/vmbus/vmbus.c @@ -0,0 +1,855 @@ +/*- + * Copyright (c) 2009-2012,2016 Microsoft Corp. + * Copyright (c) 2012 NetApp Inc. + * Copyright (c) 2012 Citrix Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * VM Bus Driver Implementation + */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/proc.h> +#include <sys/sysctl.h> +#include <sys/syslog.h> +#include <sys/systm.h> +#include <sys/rtprio.h> +#include <sys/interrupt.h> +#include <sys/sx.h> +#include <sys/taskqueue.h> +#include <sys/mutex.h> +#include <sys/smp.h> + +#include <machine/resource.h> +#include <sys/rman.h> + +#include <machine/stdarg.h> +#include <machine/intr_machdep.h> +#include <machine/md_var.h> +#include <machine/segments.h> +#include <sys/pcpu.h> +#include <machine/apicvar.h> + +#include <dev/hyperv/include/hyperv.h> +#include <dev/hyperv/vmbus/hv_vmbus_priv.h> +#include <dev/hyperv/vmbus/hyperv_reg.h> +#include <dev/hyperv/vmbus/hyperv_var.h> +#include <dev/hyperv/vmbus/vmbus_reg.h> +#include <dev/hyperv/vmbus/vmbus_var.h> + +#include <contrib/dev/acpica/include/acpi.h> +#include "acpi_if.h" + +struct vmbus_softc *vmbus_sc; + +extern inthand_t IDTVEC(rsvd), IDTVEC(vmbus_isr); + +static void +vmbus_msg_task(void *xsc, int pending __unused) +{ + struct vmbus_softc *sc = xsc; + volatile struct vmbus_message *msg; + + msg = VMBUS_PCPU_GET(sc, message, curcpu) + VMBUS_SINT_MESSAGE; + for (;;) { + if (msg->msg_type == VMBUS_MSGTYPE_NONE) { + /* No message */ + break; + } else if (msg->msg_type == VMBUS_MSGTYPE_CHANNEL) { + /* Channel message */ + vmbus_chan_msgproc(sc, + __DEVOLATILE(const struct vmbus_message *, msg)); + } + + msg->msg_type = VMBUS_MSGTYPE_NONE; + /* + * Make sure the write to msg_type (i.e. set to + * VMBUS_MSGTYPE_NONE) happens before we read the + * msg_flags and EOMing. Otherwise, the EOMing will + * not deliver any more messages since there is no + * empty slot + * + * NOTE: + * mb() is used here, since atomic_thread_fence_seq_cst() + * will become compiler fence on UP kernel. + */ + mb(); + if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) { + /* + * This will cause message queue rescan to possibly + * deliver another msg from the hypervisor + */ + wrmsr(MSR_HV_EOM, 0); + } + } +} + +static __inline int +vmbus_handle_intr1(struct vmbus_softc *sc, struct trapframe *frame, int cpu) +{ + volatile struct vmbus_message *msg; + struct vmbus_message *msg_base; + + msg_base = VMBUS_PCPU_GET(sc, message, cpu); + + /* + * Check event timer. + * + * TODO: move this to independent IDT vector. + */ + msg = msg_base + VMBUS_SINT_TIMER; + if (msg->msg_type == VMBUS_MSGTYPE_TIMER_EXPIRED) { + msg->msg_type = VMBUS_MSGTYPE_NONE; + + vmbus_et_intr(frame); + + /* + * Make sure the write to msg_type (i.e. set to + * VMBUS_MSGTYPE_NONE) happens before we read the + * msg_flags and EOMing. Otherwise, the EOMing will + * not deliver any more messages since there is no + * empty slot + * + * NOTE: + * mb() is used here, since atomic_thread_fence_seq_cst() + * will become compiler fence on UP kernel. + */ + mb(); + if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) { + /* + * This will cause message queue rescan to possibly + * deliver another msg from the hypervisor + */ + wrmsr(MSR_HV_EOM, 0); + } + } + + /* + * Check events. Hot path for network and storage I/O data; high rate. + * + * NOTE: + * As recommended by the Windows guest fellows, we check events before + * checking messages. + */ + sc->vmbus_event_proc(sc, cpu); + + /* + * Check messages. Mainly management stuffs; ultra low rate. + */ + msg = msg_base + VMBUS_SINT_MESSAGE; + if (__predict_false(msg->msg_type != VMBUS_MSGTYPE_NONE)) { + taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu), + VMBUS_PCPU_PTR(sc, message_task, cpu)); + } + + return (FILTER_HANDLED); +} + +void +vmbus_handle_intr(struct trapframe *trap_frame) +{ + struct vmbus_softc *sc = vmbus_get_softc(); + int cpu = curcpu; + + /* + * Disable preemption. + */ + critical_enter(); + + /* + * Do a little interrupt counting. + */ + (*VMBUS_PCPU_GET(sc, intr_cnt, cpu))++; + + vmbus_handle_intr1(sc, trap_frame, cpu); + + /* + * Enable preemption. + */ + critical_exit(); +} + +static void +vmbus_synic_setup(void *xsc) +{ + struct vmbus_softc *sc = xsc; + int cpu = curcpu; + uint64_t val, orig; + uint32_t sint; + + if (hyperv_features & CPUID_HV_MSR_VP_INDEX) { + /* + * Save virtual processor id. + */ + VMBUS_PCPU_GET(sc, vcpuid, cpu) = rdmsr(MSR_HV_VP_INDEX); + } else { + /* + * XXX + * Virtual processoor id is only used by a pretty broken + * channel selection code from storvsc. It's nothing + * critical even if CPUID_HV_MSR_VP_INDEX is not set; keep + * moving on. + */ + VMBUS_PCPU_GET(sc, vcpuid, cpu) = cpu; + } + + /* + * Setup the SynIC message. + */ + orig = rdmsr(MSR_HV_SIMP); + val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) | + ((VMBUS_PCPU_GET(sc, message_dma.hv_paddr, cpu) >> PAGE_SHIFT) << + MSR_HV_SIMP_PGSHIFT); + wrmsr(MSR_HV_SIMP, val); + + /* + * Setup the SynIC event flags. + */ + orig = rdmsr(MSR_HV_SIEFP); + val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) | + ((VMBUS_PCPU_GET(sc, event_flags_dma.hv_paddr, cpu) + >> PAGE_SHIFT) << MSR_HV_SIEFP_PGSHIFT); + wrmsr(MSR_HV_SIEFP, val); + + + /* + * Configure and unmask SINT for message and event flags. + */ + sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE; + orig = rdmsr(sint); + val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI | + (orig & MSR_HV_SINT_RSVD_MASK); + wrmsr(sint, val); + + /* + * Configure and unmask SINT for timer. + */ + sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER; + orig = rdmsr(sint); + val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI | + (orig & MSR_HV_SINT_RSVD_MASK); + wrmsr(sint, val); + + /* + * All done; enable SynIC. + */ + orig = rdmsr(MSR_HV_SCONTROL); + val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK); + wrmsr(MSR_HV_SCONTROL, val); +} + +static void +vmbus_synic_teardown(void *arg) +{ + uint64_t orig; + uint32_t sint; + + /* + * Disable SynIC. + */ + orig = rdmsr(MSR_HV_SCONTROL); + wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK)); + + /* + * Mask message and event flags SINT. + */ + sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE; + orig = rdmsr(sint); + wrmsr(sint, orig | MSR_HV_SINT_MASKED); + + /* + * Mask timer SINT. + */ + sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER; + orig = rdmsr(sint); + wrmsr(sint, orig | MSR_HV_SINT_MASKED); + + /* + * Teardown SynIC message. + */ + orig = rdmsr(MSR_HV_SIMP); + wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK)); + + /* + * Teardown SynIC event flags. + */ + orig = rdmsr(MSR_HV_SIEFP); + wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK)); +} + +static int +vmbus_dma_alloc(struct vmbus_softc *sc) +{ + bus_dma_tag_t parent_dtag; + uint8_t *evtflags; + int cpu; + + parent_dtag = bus_get_dma_tag(sc->vmbus_dev); + CPU_FOREACH(cpu) { + void *ptr; + + /* + * Per-cpu messages and event flags. + */ + ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0, + PAGE_SIZE, VMBUS_PCPU_PTR(sc, message_dma, cpu), + BUS_DMA_WAITOK | BUS_DMA_ZERO); + if (ptr == NULL) + return ENOMEM; + VMBUS_PCPU_GET(sc, message, cpu) = ptr; + + ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0, + PAGE_SIZE, VMBUS_PCPU_PTR(sc, event_flags_dma, cpu), + BUS_DMA_WAITOK | BUS_DMA_ZERO); + if (ptr == NULL) + return ENOMEM; + VMBUS_PCPU_GET(sc, event_flags, cpu) = ptr; + } + + evtflags = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0, + PAGE_SIZE, &sc->vmbus_evtflags_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); + if (evtflags == NULL) + return ENOMEM; + sc->vmbus_rx_evtflags = (u_long *)evtflags; + sc->vmbus_tx_evtflags = (u_long *)(evtflags + (PAGE_SIZE / 2)); + sc->vmbus_evtflags = evtflags; + + sc->vmbus_mnf1 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0, + PAGE_SIZE, &sc->vmbus_mnf1_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); + if (sc->vmbus_mnf1 == NULL) + return ENOMEM; + + sc->vmbus_mnf2 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0, + PAGE_SIZE, &sc->vmbus_mnf2_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); + if (sc->vmbus_mnf2 == NULL) + return ENOMEM; + + return 0; +} + +static void +vmbus_dma_free(struct vmbus_softc *sc) +{ + int cpu; + + if (sc->vmbus_evtflags != NULL) { + hyperv_dmamem_free(&sc->vmbus_evtflags_dma, sc->vmbus_evtflags); + sc->vmbus_evtflags = NULL; + sc->vmbus_rx_evtflags = NULL; + sc->vmbus_tx_evtflags = NULL; + } + if (sc->vmbus_mnf1 != NULL) { + hyperv_dmamem_free(&sc->vmbus_mnf1_dma, sc->vmbus_mnf1); + sc->vmbus_mnf1 = NULL; + } + if (sc->vmbus_mnf2 != NULL) { + hyperv_dmamem_free(&sc->vmbus_mnf2_dma, sc->vmbus_mnf2); + sc->vmbus_mnf2 = NULL; + } + + CPU_FOREACH(cpu) { + if (VMBUS_PCPU_GET(sc, message, cpu) != NULL) { + hyperv_dmamem_free( + VMBUS_PCPU_PTR(sc, message_dma, cpu), + VMBUS_PCPU_GET(sc, message, cpu)); + VMBUS_PCPU_GET(sc, message, cpu) = NULL; + } + if (VMBUS_PCPU_GET(sc, event_flags, cpu) != NULL) { + hyperv_dmamem_free( + VMBUS_PCPU_PTR(sc, event_flags_dma, cpu), + VMBUS_PCPU_GET(sc, event_flags, cpu)); + VMBUS_PCPU_GET(sc, event_flags, cpu) = NULL; + } + } +} + +/** + * @brief Find a free IDT slot and setup the interrupt handler. + */ +static int +vmbus_vector_alloc(void) +{ + int vector; + uintptr_t func; + struct gate_descriptor *ip; + + /* + * Search backwards form the highest IDT vector available for use + * as vmbus channel callback vector. We install 'vmbus_isr' + * handler at that vector and use it to interrupt vcpus. + */ + vector = APIC_SPURIOUS_INT; + while (--vector >= APIC_IPI_INTS) { + ip = &idt[vector]; + func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset); + if (func == (uintptr_t)&IDTVEC(rsvd)) { +#ifdef __i386__ + setidt(vector , IDTVEC(vmbus_isr), SDT_SYS386IGT, + SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); +#else + setidt(vector , IDTVEC(vmbus_isr), SDT_SYSIGT, + SEL_KPL, 0); +#endif + + return (vector); + } + } + return (0); +} + +/** + * @brief Restore the IDT slot to rsvd. + */ +static void +vmbus_vector_free(int vector) +{ + uintptr_t func; + struct gate_descriptor *ip; + + if (vector == 0) + return; + + KASSERT(vector >= APIC_IPI_INTS && vector < APIC_SPURIOUS_INT, + ("invalid vector %d", vector)); + + ip = &idt[vector]; + func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset); + KASSERT(func == (uintptr_t)&IDTVEC(vmbus_isr), + ("invalid vector %d", vector)); + + setidt(vector, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); +} + +static void +vmbus_cpuset_setthread_task(void *xmask, int pending __unused) +{ + cpuset_t *mask = xmask; + int error; + + error = cpuset_setthread(curthread->td_tid, mask); + if (error) { + panic("curthread=%ju: can't pin; error=%d", + (uintmax_t)curthread->td_tid, error); + } +} + +static int +vmbus_intr_setup(struct vmbus_softc *sc) +{ + int cpu; + + CPU_FOREACH(cpu) { + struct task cpuset_task; + char buf[MAXCOMLEN + 1]; + cpuset_t cpu_mask; + + /* Allocate an interrupt counter for Hyper-V interrupt */ + snprintf(buf, sizeof(buf), "cpu%d:hyperv", cpu); + intrcnt_add(buf, VMBUS_PCPU_PTR(sc, intr_cnt, cpu)); + + /* + * Setup taskqueue to handle events. Task will be per- + * channel. + */ + VMBUS_PCPU_GET(sc, event_tq, cpu) = taskqueue_create_fast( + "hyperv event", M_WAITOK, taskqueue_thread_enqueue, + VMBUS_PCPU_PTR(sc, event_tq, cpu)); + taskqueue_start_threads(VMBUS_PCPU_PTR(sc, event_tq, cpu), + 1, PI_NET, "hvevent%d", cpu); + + CPU_SETOF(cpu, &cpu_mask); + TASK_INIT(&cpuset_task, 0, vmbus_cpuset_setthread_task, + &cpu_mask); + taskqueue_enqueue(VMBUS_PCPU_GET(sc, event_tq, cpu), + &cpuset_task); + taskqueue_drain(VMBUS_PCPU_GET(sc, event_tq, cpu), + &cpuset_task); + + /* + * Setup tasks and taskqueues to handle messages. + */ + VMBUS_PCPU_GET(sc, message_tq, cpu) = taskqueue_create_fast( + "hyperv msg", M_WAITOK, taskqueue_thread_enqueue, + VMBUS_PCPU_PTR(sc, message_tq, cpu)); + taskqueue_start_threads(VMBUS_PCPU_PTR(sc, message_tq, cpu), 1, + PI_NET, "hvmsg%d", cpu); + TASK_INIT(VMBUS_PCPU_PTR(sc, message_task, cpu), 0, + vmbus_msg_task, sc); + + CPU_SETOF(cpu, &cpu_mask); + TASK_INIT(&cpuset_task, 0, vmbus_cpuset_setthread_task, + &cpu_mask); + taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu), + &cpuset_task); + taskqueue_drain(VMBUS_PCPU_GET(sc, message_tq, cpu), + &cpuset_task); + } + + /* + * All Hyper-V ISR required resources are setup, now let's find a + * free IDT vector for Hyper-V ISR and set it up. + */ + sc->vmbus_idtvec = vmbus_vector_alloc(); + if (sc->vmbus_idtvec == 0) { + device_printf(sc->vmbus_dev, "cannot find free IDT vector\n"); + return ENXIO; + } + if(bootverbose) { + device_printf(sc->vmbus_dev, "vmbus IDT vector %d\n", + sc->vmbus_idtvec); + } + return 0; +} + +static void +vmbus_intr_teardown(struct vmbus_softc *sc) +{ + int cpu; + + vmbus_vector_free(sc->vmbus_idtvec); + + CPU_FOREACH(cpu) { + if (VMBUS_PCPU_GET(sc, event_tq, cpu) != NULL) { + taskqueue_free(VMBUS_PCPU_GET(sc, event_tq, cpu)); + VMBUS_PCPU_GET(sc, event_tq, cpu) = NULL; + } + if (VMBUS_PCPU_GET(sc, message_tq, cpu) != NULL) { + taskqueue_drain(VMBUS_PCPU_GET(sc, message_tq, cpu), + VMBUS_PCPU_PTR(sc, message_task, cpu)); + taskqueue_free(VMBUS_PCPU_GET(sc, message_tq, cpu)); + VMBUS_PCPU_GET(sc, message_tq, cpu) = NULL; + } + } +} + +static int +vmbus_read_ivar(device_t dev, device_t child, int index, uintptr_t *result) +{ + struct hv_device *child_dev_ctx = device_get_ivars(child); + + switch (index) { + case HV_VMBUS_IVAR_TYPE: + *result = (uintptr_t)&child_dev_ctx->class_id; + return (0); + + case HV_VMBUS_IVAR_INSTANCE: + *result = (uintptr_t)&child_dev_ctx->device_id; + return (0); + + case HV_VMBUS_IVAR_DEVCTX: + *result = (uintptr_t)child_dev_ctx; + return (0); + + case HV_VMBUS_IVAR_NODE: + *result = (uintptr_t)child_dev_ctx->device; + return (0); + } + return (ENOENT); +} + +static int +vmbus_write_ivar(device_t dev, device_t child, int index, uintptr_t value) +{ + switch (index) { + case HV_VMBUS_IVAR_TYPE: + case HV_VMBUS_IVAR_INSTANCE: + case HV_VMBUS_IVAR_DEVCTX: + case HV_VMBUS_IVAR_NODE: + /* read-only */ + return (EINVAL); + } + return (ENOENT); +} + +static int +vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen) +{ + struct hv_device *dev_ctx = device_get_ivars(child); + char guidbuf[HYPERV_GUID_STRLEN]; + + if (dev_ctx == NULL) + return (0); + + strlcat(buf, "classid=", buflen); + hyperv_guid2str(&dev_ctx->class_id, guidbuf, sizeof(guidbuf)); + strlcat(buf, guidbuf, buflen); + + strlcat(buf, " deviceid=", buflen); + hyperv_guid2str(&dev_ctx->device_id, guidbuf, sizeof(guidbuf)); + strlcat(buf, guidbuf, buflen); + + return (0); +} + +struct hv_device * +hv_vmbus_child_device_create(hv_guid type, hv_guid instance, + hv_vmbus_channel *channel) +{ + hv_device *child_dev; + + /* + * Allocate the new child device + */ + child_dev = malloc(sizeof(hv_device), M_DEVBUF, M_WAITOK | M_ZERO); + + child_dev->channel = channel; + memcpy(&child_dev->class_id, &type, sizeof(hv_guid)); + memcpy(&child_dev->device_id, &instance, sizeof(hv_guid)); + + return (child_dev); +} + +int +hv_vmbus_child_device_register(struct hv_device *child_dev) +{ + device_t child, parent; + + parent = vmbus_get_device(); + if (bootverbose) { + char name[HYPERV_GUID_STRLEN]; + + hyperv_guid2str(&child_dev->class_id, name, sizeof(name)); + device_printf(parent, "add device, classid: %s\n", name); + } + + child = device_add_child(parent, NULL, -1); + child_dev->device = child; + device_set_ivars(child, child_dev); + + return (0); +} + +int +hv_vmbus_child_device_unregister(struct hv_device *child_dev) +{ + int ret = 0; + /* + * XXXKYS: Ensure that this is the opposite of + * device_add_child() + */ + mtx_lock(&Giant); + ret = device_delete_child(vmbus_get_device(), child_dev->device); + mtx_unlock(&Giant); + return(ret); +} + +static int +vmbus_probe(device_t dev) +{ + char *id[] = { "VMBUS", NULL }; + + if (ACPI_ID_PROBE(device_get_parent(dev), dev, id) == NULL || + device_get_unit(dev) != 0 || vm_guest != VM_GUEST_HV || + (hyperv_features & CPUID_HV_MSR_SYNIC) == 0) + return (ENXIO); + + device_set_desc(dev, "Hyper-V Vmbus"); + + return (BUS_PROBE_DEFAULT); +} + +/** + * @brief Main vmbus driver initialization routine. + * + * Here, we + * - initialize the vmbus driver context + * - setup various driver entry points + * - invoke the vmbus hv main init routine + * - get the irq resource + * - invoke the vmbus to add the vmbus root device + * - setup the vmbus root device + * - retrieve the channel offers + */ +static int +vmbus_bus_init(void) +{ + struct vmbus_softc *sc = vmbus_get_softc(); + int ret; + + if (sc->vmbus_flags & VMBUS_FLAG_ATTACHED) + return (0); + sc->vmbus_flags |= VMBUS_FLAG_ATTACHED; + + /* + * Allocate DMA stuffs. + */ + ret = vmbus_dma_alloc(sc); + if (ret != 0) + goto cleanup; + + /* + * Setup interrupt. + */ + ret = vmbus_intr_setup(sc); + if (ret != 0) + goto cleanup; + + /* + * Setup SynIC. + */ + if (bootverbose) + device_printf(sc->vmbus_dev, "smp_started = %d\n", smp_started); + smp_rendezvous(NULL, vmbus_synic_setup, NULL, sc); + sc->vmbus_flags |= VMBUS_FLAG_SYNIC; + + /* + * Connect to VMBus in the root partition + */ + ret = hv_vmbus_connect(sc); + if (ret != 0) + goto cleanup; + + if (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008 || + hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7) + sc->vmbus_event_proc = vmbus_event_proc_compat; + else + sc->vmbus_event_proc = vmbus_event_proc; + + hv_vmbus_request_channel_offers(); + + vmbus_scan(); + bus_generic_attach(sc->vmbus_dev); + device_printf(sc->vmbus_dev, "device scan, probe and attach done\n"); + + return (ret); + +cleanup: + vmbus_intr_teardown(sc); + vmbus_dma_free(sc); + + return (ret); +} + +static void +vmbus_event_proc_dummy(struct vmbus_softc *sc __unused, int cpu __unused) +{ +} + +static int +vmbus_attach(device_t dev) +{ + vmbus_sc = device_get_softc(dev); + vmbus_sc->vmbus_dev = dev; + + /* + * Event processing logic will be configured: + * - After the vmbus protocol version negotiation. + * - Before we request channel offers. + */ + vmbus_sc->vmbus_event_proc = vmbus_event_proc_dummy; + + /* + * If the system has already booted and thread + * scheduling is possible indicated by the global + * cold set to zero, we just call the driver + * initialization directly. + */ + if (!cold) + vmbus_bus_init(); + + bus_generic_probe(dev); + return (0); +} + +static void +vmbus_sysinit(void *arg __unused) +{ + if (vm_guest != VM_GUEST_HV || vmbus_get_softc() == NULL) + return; + + /* + * If the system has already booted and thread + * scheduling is possible, as indicated by the + * global cold set to zero, we just call the driver + * initialization directly. + */ + if (!cold) + vmbus_bus_init(); +} + +static int +vmbus_detach(device_t dev) +{ + struct vmbus_softc *sc = device_get_softc(dev); + + hv_vmbus_release_unattached_channels(); + hv_vmbus_disconnect(); + + if (sc->vmbus_flags & VMBUS_FLAG_SYNIC) { + sc->vmbus_flags &= ~VMBUS_FLAG_SYNIC; + smp_rendezvous(NULL, vmbus_synic_teardown, NULL, NULL); + } + + vmbus_intr_teardown(sc); + vmbus_dma_free(sc); + + return (0); +} + +static device_method_t vmbus_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, vmbus_probe), + DEVMETHOD(device_attach, vmbus_attach), + DEVMETHOD(device_detach, vmbus_detach), + DEVMETHOD(device_shutdown, bus_generic_shutdown), + DEVMETHOD(device_suspend, bus_generic_suspend), + DEVMETHOD(device_resume, bus_generic_resume), + + /* Bus interface */ + DEVMETHOD(bus_add_child, bus_generic_add_child), + DEVMETHOD(bus_print_child, bus_generic_print_child), + DEVMETHOD(bus_read_ivar, vmbus_read_ivar), + DEVMETHOD(bus_write_ivar, vmbus_write_ivar), + DEVMETHOD(bus_child_pnpinfo_str, vmbus_child_pnpinfo_str), + + DEVMETHOD_END +}; + +static driver_t vmbus_driver = { + "vmbus", + vmbus_methods, + sizeof(struct vmbus_softc) +}; + +static devclass_t vmbus_devclass; + +DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, NULL, NULL); +MODULE_DEPEND(vmbus, acpi, 1, 1, 1); +MODULE_VERSION(vmbus, 1); + +/* + * NOTE: + * We have to start as the last step of SI_SUB_SMP, i.e. after SMP is + * initialized. + */ +SYSINIT(vmbus_initialize, SI_SUB_SMP, SI_ORDER_ANY, vmbus_sysinit, NULL); + diff --git a/sys/dev/hyperv/vmbus/vmbus_et.c b/sys/dev/hyperv/vmbus/vmbus_et.c new file mode 100644 index 0000000..391fa1f --- /dev/null +++ b/sys/dev/hyperv/vmbus/vmbus_et.c @@ -0,0 +1,198 @@ +/*- + * Copyright (c) 2015,2016 Microsoft Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/module.h> +#include <sys/proc.h> +#include <sys/systm.h> +#include <sys/smp.h> +#include <sys/time.h> +#include <sys/timeet.h> + +#include <machine/cpu.h> + +#include <dev/hyperv/vmbus/hyperv_reg.h> +#include <dev/hyperv/vmbus/hyperv_var.h> +#include <dev/hyperv/vmbus/vmbus_var.h> + +#define VMBUS_ET_NAME "hvet" + +#define MSR_HV_STIMER0_CFG_SINT \ + ((((uint64_t)VMBUS_SINT_TIMER) << MSR_HV_STIMER_CFG_SINT_SHIFT) & \ + MSR_HV_STIMER_CFG_SINT_MASK) + +/* + * Two additionally required features: + * - SynIC is needed for interrupt generation. + * - Time reference counter is needed to set ABS reference count to + * STIMER0_COUNT. + */ +#define CPUID_HV_ET_MASK (CPUID_HV_MSR_TIME_REFCNT | \ + CPUID_HV_MSR_SYNIC | \ + CPUID_HV_MSR_SYNTIMER) + +static struct eventtimer vmbus_et; + +static __inline uint64_t +hyperv_sbintime2count(sbintime_t time) +{ + struct timespec val; + + val = sbttots(time); + return (val.tv_sec * HYPERV_TIMER_FREQ) + + (val.tv_nsec / HYPERV_TIMER_NS_FACTOR); +} + +static int +vmbus_et_start(struct eventtimer *et __unused, sbintime_t first, + sbintime_t period __unused) +{ + uint64_t current; + + current = rdmsr(MSR_HV_TIME_REF_COUNT); + current += hyperv_sbintime2count(first); + wrmsr(MSR_HV_STIMER0_COUNT, current); + + return (0); +} + +void +vmbus_et_intr(struct trapframe *frame) +{ + struct trapframe *oldframe; + struct thread *td; + + if (vmbus_et.et_active) { + td = curthread; + td->td_intr_nesting_level++; + oldframe = td->td_intr_frame; + td->td_intr_frame = frame; + vmbus_et.et_event_cb(&vmbus_et, vmbus_et.et_arg); + td->td_intr_frame = oldframe; + td->td_intr_nesting_level--; + } +} + +static void +vmbus_et_identify(driver_t *driver, device_t parent) +{ + if (device_get_unit(parent) != 0 || + device_find_child(parent, VMBUS_ET_NAME, -1) != NULL || + (hyperv_features & CPUID_HV_ET_MASK) != CPUID_HV_ET_MASK) + return; + + device_add_child(parent, VMBUS_ET_NAME, -1); +} + +static int +vmbus_et_probe(device_t dev) +{ + if (resource_disabled(VMBUS_ET_NAME, 0)) + return (ENXIO); + + device_set_desc(dev, "Hyper-V event timer"); + + return (BUS_PROBE_NOWILDCARD); +} + +static void +vmbus_et_config(void *arg __unused) +{ + /* + * Make sure that STIMER0 is really disabled before writing + * to STIMER0_CONFIG. + * + * "Writing to the configuration register of a timer that + * is already enabled may result in undefined behaviour." + */ + for (;;) { + uint64_t val; + + /* Stop counting, and this also implies disabling STIMER0 */ + wrmsr(MSR_HV_STIMER0_COUNT, 0); + + val = rdmsr(MSR_HV_STIMER0_CONFIG); + if ((val & MSR_HV_STIMER_CFG_ENABLE) == 0) + break; + cpu_spinwait(); + } + wrmsr(MSR_HV_STIMER0_CONFIG, + MSR_HV_STIMER_CFG_AUTOEN | MSR_HV_STIMER0_CFG_SINT); +} + +static int +vmbus_et_attach(device_t dev) +{ + /* TODO: use independent IDT vector */ + + vmbus_et.et_name = "Hyper-V"; + vmbus_et.et_flags = ET_FLAGS_ONESHOT | ET_FLAGS_PERCPU; + vmbus_et.et_quality = 1000; + vmbus_et.et_frequency = HYPERV_TIMER_FREQ; + vmbus_et.et_min_period = (0x00000001ULL << 32) / HYPERV_TIMER_FREQ; + vmbus_et.et_max_period = (0xfffffffeULL << 32) / HYPERV_TIMER_FREQ; + vmbus_et.et_start = vmbus_et_start; + + /* + * Delay a bit to make sure that MSR_HV_TIME_REF_COUNT will + * not return 0, since writing 0 to STIMER0_COUNT will disable + * STIMER0. + */ + DELAY(100); + smp_rendezvous(NULL, vmbus_et_config, NULL, NULL); + + return (et_register(&vmbus_et)); +} + +static int +vmbus_et_detach(device_t dev) +{ + return (et_deregister(&vmbus_et)); +} + +static device_method_t vmbus_et_methods[] = { + DEVMETHOD(device_identify, vmbus_et_identify), + DEVMETHOD(device_probe, vmbus_et_probe), + DEVMETHOD(device_attach, vmbus_et_attach), + DEVMETHOD(device_detach, vmbus_et_detach), + + DEVMETHOD_END +}; + +static driver_t vmbus_et_driver = { + VMBUS_ET_NAME, + vmbus_et_methods, + 0 +}; + +static devclass_t vmbus_et_devclass; +DRIVER_MODULE(hv_et, vmbus, vmbus_et_driver, vmbus_et_devclass, NULL, NULL); +MODULE_VERSION(hv_et, 1); diff --git a/sys/dev/hyperv/vmbus/vmbus_reg.h b/sys/dev/hyperv/vmbus/vmbus_reg.h new file mode 100644 index 0000000..4be8614 --- /dev/null +++ b/sys/dev/hyperv/vmbus/vmbus_reg.h @@ -0,0 +1,84 @@ +/*- + * Copyright (c) 2016 Microsoft Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _VMBUS_REG_H_ +#define _VMBUS_REG_H_ + +#include <sys/param.h> + +/* + * Hyper-V SynIC message format. + */ + +#define VMBUS_MSG_DSIZE_MAX 240 +#define VMBUS_MSG_SIZE 256 + +struct vmbus_message { + uint32_t msg_type; /* VMBUS_MSGTYPE_ */ + uint8_t msg_dsize; /* data size */ + uint8_t msg_flags; /* VMBUS_MSGFLAG_ */ + uint16_t msg_rsvd; + uint64_t msg_id; + uint8_t msg_data[VMBUS_MSG_DSIZE_MAX]; +} __packed; +CTASSERT(sizeof(struct vmbus_message) == VMBUS_MSG_SIZE); + +#define VMBUS_MSGTYPE_NONE 0 +#define VMBUS_MSGTYPE_CHANNEL 1 +#define VMBUS_MSGTYPE_TIMER_EXPIRED 0x80000010 + +#define VMBUS_MSGFLAG_PENDING 0x01 + +/* + * Hyper-V SynIC event flags + */ + +#ifdef __LP64__ +#define VMBUS_EVTFLAGS_MAX 32 +#define VMBUS_EVTFLAG_SHIFT 6 +#else +#define VMBUS_EVTFLAGS_MAX 64 +#define VMBUS_EVTFLAG_SHIFT 5 +#endif +#define VMBUS_EVTFLAG_LEN (1 << VMBUS_EVTFLAG_SHIFT) +#define VMBUS_EVTFLAG_MASK (VMBUS_EVTFLAG_LEN - 1) +#define VMBUS_EVTFLAGS_SIZE 256 + +struct vmbus_evtflags { + u_long evt_flags[VMBUS_EVTFLAGS_MAX]; +} __packed; +CTASSERT(sizeof(struct vmbus_evtflags) == VMBUS_EVTFLAGS_SIZE); + +/* + * Channel + */ + +#define VMBUS_CHAN_MAX_COMPAT 256 +#define VMBUS_CHAN_MAX (VMBUS_EVTFLAG_LEN * VMBUS_EVTFLAGS_MAX) + +#endif /* !_VMBUS_REG_H_ */ diff --git a/sys/dev/hyperv/vmbus/vmbus_var.h b/sys/dev/hyperv/vmbus/vmbus_var.h new file mode 100644 index 0000000..efc6f03 --- /dev/null +++ b/sys/dev/hyperv/vmbus/vmbus_var.h @@ -0,0 +1,120 @@ +/*- + * Copyright (c) 2016 Microsoft Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _VMBUS_VAR_H_ +#define _VMBUS_VAR_H_ + +#include <sys/param.h> +#include <sys/taskqueue.h> + +#include <dev/hyperv/include/hyperv_busdma.h> + +/* + * NOTE: DO NOT CHANGE THIS. + */ +#define VMBUS_SINT_MESSAGE 2 +/* + * NOTE: + * - DO NOT set it to the same value as VMBUS_SINT_MESSAGE. + * - DO NOT set it to 0. + */ +#define VMBUS_SINT_TIMER 4 + +struct vmbus_pcpu_data { + u_long *intr_cnt; /* Hyper-V interrupt counter */ + struct vmbus_message *message; /* shared messages */ + uint32_t vcpuid; /* virtual cpuid */ + int event_flags_cnt;/* # of event flags */ + struct vmbus_evtflags *event_flags; /* event flags from host */ + + /* Rarely used fields */ + struct hyperv_dma message_dma; /* busdma glue */ + struct hyperv_dma event_flags_dma;/* busdma glue */ + struct taskqueue *event_tq; /* event taskq */ + struct taskqueue *message_tq; /* message taskq */ + struct task message_task; /* message task */ +} __aligned(CACHE_LINE_SIZE); + +struct vmbus_softc { + void (*vmbus_event_proc)(struct vmbus_softc *, int); + u_long *vmbus_tx_evtflags; + /* event flags to host */ + void *vmbus_mnf2; /* monitored by host */ + + u_long *vmbus_rx_evtflags; + /* compat evtflgs from host */ + struct vmbus_pcpu_data vmbus_pcpu[MAXCPU]; + + /* Rarely used fields */ + device_t vmbus_dev; + int vmbus_idtvec; + uint32_t vmbus_flags; /* see VMBUS_FLAG_ */ + + /* Shared memory for vmbus_{rx,tx}_evtflags */ + void *vmbus_evtflags; + struct hyperv_dma vmbus_evtflags_dma; + + void *vmbus_mnf1; /* monitored by VM, unused */ + struct hyperv_dma vmbus_mnf1_dma; + struct hyperv_dma vmbus_mnf2_dma; +}; + +#define VMBUS_FLAG_ATTACHED 0x0001 /* vmbus was attached */ +#define VMBUS_FLAG_SYNIC 0x0002 /* SynIC was setup */ + +extern struct vmbus_softc *vmbus_sc; + +static __inline struct vmbus_softc * +vmbus_get_softc(void) +{ + return vmbus_sc; +} + +static __inline device_t +vmbus_get_device(void) +{ + return vmbus_sc->vmbus_dev; +} + +#define VMBUS_PCPU_GET(sc, field, cpu) (sc)->vmbus_pcpu[(cpu)].field +#define VMBUS_PCPU_PTR(sc, field, cpu) &(sc)->vmbus_pcpu[(cpu)].field + +struct hv_vmbus_channel; +struct trapframe; +struct vmbus_message; + +void vmbus_on_channel_open(const struct hv_vmbus_channel *); +void vmbus_event_proc(struct vmbus_softc *, int); +void vmbus_event_proc_compat(struct vmbus_softc *, int); +void vmbus_handle_intr(struct trapframe *); + +void vmbus_et_intr(struct trapframe *); + +void vmbus_chan_msgproc(struct vmbus_softc *, const struct vmbus_message *); + +#endif /* !_VMBUS_VAR_H_ */ diff --git a/sys/dev/mlx5/driver.h b/sys/dev/mlx5/driver.h index 8136e57..7f64850 100644 --- a/sys/dev/mlx5/driver.h +++ b/sys/dev/mlx5/driver.h @@ -33,6 +33,7 @@ #include <linux/pci.h> #include <linux/cache.h> #include <linux/rbtree.h> +#include <linux/if_ether.h> #include <linux/semaphore.h> #include <linux/slab.h> #include <linux/vmalloc.h> diff --git a/sys/dev/mlx5/mlx5_core/mlx5_vport.c b/sys/dev/mlx5/mlx5_core/mlx5_vport.c index 5c8626b..a3e1751 100644 --- a/sys/dev/mlx5/mlx5_core/mlx5_vport.c +++ b/sys/dev/mlx5/mlx5_core/mlx5_vport.c @@ -71,7 +71,7 @@ static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u32 vport, int mlx5_vport_alloc_q_counter(struct mlx5_core_dev *mdev, int *counter_set_id) { u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)]; - u32 out[MLX5_ST_SZ_DW(alloc_q_counter_in)]; + u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)]; int err; memset(in, 0, sizeof(in)); @@ -471,6 +471,241 @@ int mlx5_set_nic_vport_promisc(struct mlx5_core_dev *mdev, int vport, return mlx5_modify_nic_vport_context(mdev, in, sizeof(in)); } EXPORT_SYMBOL_GPL(mlx5_set_nic_vport_promisc); + +int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, + u32 vport, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int *list_size) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; + void *nic_vport_ctx; + int max_list_size; + int req_list_size; + u8 *mac_addr; + int out_sz; + void *out; + int err; + int i; + + req_list_size = *list_size; + + max_list_size = (list_type == MLX5_NIC_VPORT_LIST_TYPE_UC) ? + 1 << MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN_MAX(dev, log_max_current_mc_list); + + if (req_list_size > max_list_size) { + mlx5_core_warn(dev, "Requested list size (%d) > (%d) max_list_size\n", + req_list_size, max_list_size); + req_list_size = max_list_size; + } + + out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout); + + memset(in, 0, sizeof(in)); + out = kzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, list_type); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + + if (vport) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_sz); + if (err) + goto out; + + nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out, + nic_vport_context); + req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx, + allowed_list_size); + + *list_size = req_list_size; + for (i = 0; i < req_list_size; i++) { + mac_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]) + 2; + ether_addr_copy(addr_list[i], mac_addr); + } +out: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_list); + +int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int list_size) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + void *nic_vport_ctx; + int max_list_size; + int in_sz; + void *in; + int err; + int i; + + max_list_size = list_type == MLX5_NIC_VPORT_LIST_TYPE_UC ? + 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list); + + if (list_size > max_list_size) + return -ENOSPC; + + in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + list_size * MLX5_ST_SZ_BYTES(mac_address_layout); + + memset(out, 0, sizeof(out)); + in = kzalloc(in_sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, + field_select.addresses_list, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, + nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_type, list_type); + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_size, list_size); + + for (i = 0; i < list_size; i++) { + u8 *curr_mac = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]) + 2; + ether_addr_copy(curr_mac, addr_list[i]); + } + + err = mlx5_cmd_exec_check_status(dev, in, in_sz, out, sizeof(out)); + kfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list); + +int mlx5_query_nic_vport_vlan_list(struct mlx5_core_dev *dev, + u32 vport, + u16 *vlan_list, + int *list_size) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; + void *nic_vport_ctx; + int max_list_size; + int req_list_size; + int out_sz; + void *out; + void *vlan_addr; + int err; + int i; + + req_list_size = *list_size; + + max_list_size = 1 << MLX5_CAP_GEN_MAX(dev, log_max_vlan_list); + + if (req_list_size > max_list_size) { + mlx5_core_warn(dev, "Requested list size (%d) > (%d) max_list_size\n", + req_list_size, max_list_size); + req_list_size = max_list_size; + } + + out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + req_list_size * MLX5_ST_SZ_BYTES(vlan_layout); + + memset(in, 0, sizeof(in)); + out = kzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, + MLX5_NIC_VPORT_CONTEXT_ALLOWED_LIST_TYPE_VLAN_LIST); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + + if (vport) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_sz); + if (err) + goto out; + + nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out, + nic_vport_context); + req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx, + allowed_list_size); + + *list_size = req_list_size; + for (i = 0; i < req_list_size; i++) { + vlan_addr = MLX5_ADDR_OF(nic_vport_context, nic_vport_ctx, + current_uc_mac_address[i]); + vlan_list[i] = MLX5_GET(vlan_layout, vlan_addr, vlan); + } +out: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_vlan_list); + +int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, + u16 vlans[], + int list_size) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + void *nic_vport_ctx; + int max_list_size; + int in_sz; + void *in; + int err; + int i; + + max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list); + + if (list_size > max_list_size) + return -ENOSPC; + + in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + list_size * MLX5_ST_SZ_BYTES(vlan_layout); + + memset(out, 0, sizeof(out)); + in = kzalloc(in_sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, + field_select.addresses_list, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, + nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_type, MLX5_NIC_VPORT_LIST_TYPE_VLAN); + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_size, list_size); + + for (i = 0; i < list_size; i++) { + void *vlan_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]); + MLX5_SET(vlan_layout, vlan_addr, vlan, vlans[i]); + } + + err = mlx5_cmd_exec_check_status(dev, in, in_sz, out, sizeof(out)); + kfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans); + int mlx5_set_nic_vport_permanent_mac(struct mlx5_core_dev *mdev, int vport, u8 *addr) { @@ -785,6 +1020,129 @@ int mlx5_set_eswitch_cvlan_info(struct mlx5_core_dev *mdev, u8 vport, } EXPORT_SYMBOL_GPL(mlx5_set_eswitch_cvlan_info); +int mlx5_arm_vport_context_events(struct mlx5_core_dev *mdev, + u8 vport, + u32 events_mask) +{ + u32 *in; + u32 inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *nic_vport_ctx; + int err; + + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, + in, + opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, + in, + field_select.change_event, + 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, + nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, arm_change_event, 1); + + if (events_mask & MLX5_UC_ADDR_CHANGE) + MLX5_SET(nic_vport_context, + nic_vport_ctx, + event_on_uc_address_change, + 1); + if (events_mask & MLX5_MC_ADDR_CHANGE) + MLX5_SET(nic_vport_context, + nic_vport_ctx, + event_on_mc_address_change, + 1); + if (events_mask & MLX5_VLAN_CHANGE) + MLX5_SET(nic_vport_context, + nic_vport_ctx, + event_on_vlan_change, + 1); + if (events_mask & MLX5_PROMISC_CHANGE) + MLX5_SET(nic_vport_context, + nic_vport_ctx, + event_on_promisc_change, + 1); + if (events_mask & MLX5_MTU_CHANGE) + MLX5_SET(nic_vport_context, + nic_vport_ctx, + event_on_mtu, + 1); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_arm_vport_context_events); + +int mlx5_query_vport_promisc(struct mlx5_core_dev *mdev, + u32 vport, + u8 *promisc_uc, + u8 *promisc_mc, + u8 *promisc_all) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_query_nic_vport_context(mdev, vport, out, outlen); + if (err) + goto out; + + *promisc_uc = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_uc); + *promisc_mc = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_mc); + *promisc_all = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_all); + +out: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_promisc); + +int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev, + int promisc_uc, + int promisc_mc, + int promisc_all) +{ + void *in; + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + int err; + + in = mlx5_vzalloc(inlen); + if (!in) { + mlx5_core_err(mdev, "failed to allocate inbox\n"); + return -ENOMEM; + } + + MLX5_SET(modify_nic_vport_context_in, in, field_select.promisc, 1); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_uc, promisc_uc); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_mc, promisc_mc); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_all, promisc_all); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_promisc); + int mlx5_query_vport_counter(struct mlx5_core_dev *dev, u8 port_num, u16 vport_num, void *out, int out_size) diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c b/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c index c618c92..a0dcd2d 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c @@ -390,6 +390,49 @@ add_eth_addr_rule_out: return (err); } +static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) +{ + struct ifnet *ifp = priv->ifp; + int max_list_size; + int list_size; + u16 *vlans; + int vlan; + int err; + int i; + + list_size = 0; + for_each_set_bit(vlan, priv->vlan.active_vlans, VLAN_N_VID) + list_size++; + + max_list_size = 1 << MLX5_CAP_GEN(priv->mdev, log_max_vlan_list); + + if (list_size > max_list_size) { + if_printf(ifp, + "ifnet vlans list size (%d) > (%d) max vport list size, some vlans will be dropped\n", + list_size, max_list_size); + list_size = max_list_size; + } + + vlans = kcalloc(list_size, sizeof(*vlans), GFP_KERNEL); + if (!vlans) + return -ENOMEM; + + i = 0; + for_each_set_bit(vlan, priv->vlan.active_vlans, VLAN_N_VID) { + if (i >= list_size) + break; + vlans[i++] = vlan; + } + + err = mlx5_modify_nic_vport_vlans(priv->mdev, vlans, list_size); + if (err) + if_printf(ifp, "Failed to modify vport vlans list err(%d)\n", + err); + + kfree(vlans); + return err; +} + enum mlx5e_vlan_rule_type { MLX5E_VLAN_RULE_TYPE_UNTAGGED, MLX5E_VLAN_RULE_TYPE_ANY_VID, @@ -448,6 +491,7 @@ mlx5e_add_vlan_rule(struct mlx5e_priv *priv, outer_headers.first_vid); MLX5_SET(fte_match_param, match_value, outer_headers.first_vid, vid); + mlx5e_vport_context_update_vlans(priv); break; } @@ -478,6 +522,7 @@ mlx5e_del_vlan_rule(struct mlx5e_priv *priv, case MLX5E_VLAN_RULE_TYPE_MATCH_VID: mlx5_del_flow_table_entry(priv->ft.vlan, priv->vlan.active_vlans_ft_ix[vid]); + mlx5e_vport_context_update_vlans(priv); break; } } @@ -628,6 +673,91 @@ mlx5e_sync_ifp_addr(struct mlx5e_priv *priv) if_maddr_runlock(ifp); } +static void mlx5e_fill_addr_array(struct mlx5e_priv *priv, int list_type, + u8 addr_array[][ETH_ALEN], int size) +{ + bool is_uc = (list_type == MLX5_NIC_VPORT_LIST_TYPE_UC); + struct ifnet *ifp = priv->ifp; + struct mlx5e_eth_addr_hash_node *hn; + struct mlx5e_eth_addr_hash_head *addr_list; + struct mlx5e_eth_addr_hash_node *tmp; + int i = 0; + int hi; + + addr_list = is_uc ? priv->eth_addr.if_uc : priv->eth_addr.if_mc; + + if (is_uc) /* Make sure our own address is pushed first */ + ether_addr_copy(addr_array[i++], IF_LLADDR(ifp)); + else if (priv->eth_addr.broadcast_enabled) + ether_addr_copy(addr_array[i++], ifp->if_broadcastaddr); + + mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) { + if (ether_addr_equal(IF_LLADDR(ifp), hn->ai.addr)) + continue; + if (i >= size) + break; + ether_addr_copy(addr_array[i++], hn->ai.addr); + } +} + +static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv, + int list_type) +{ + bool is_uc = (list_type == MLX5_NIC_VPORT_LIST_TYPE_UC); + struct mlx5e_eth_addr_hash_node *hn; + u8 (*addr_array)[ETH_ALEN] = NULL; + struct mlx5e_eth_addr_hash_head *addr_list; + struct mlx5e_eth_addr_hash_node *tmp; + int max_size; + int size; + int err; + int hi; + + size = is_uc ? 0 : (priv->eth_addr.broadcast_enabled ? 1 : 0); + max_size = is_uc ? + 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_mc_list); + + addr_list = is_uc ? priv->eth_addr.if_uc : priv->eth_addr.if_mc; + mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) + size++; + + if (size > max_size) { + if_printf(priv->ifp, + "ifp %s list size (%d) > (%d) max vport list size, some addresses will be dropped\n", + is_uc ? "UC" : "MC", size, max_size); + size = max_size; + } + + if (size) { + addr_array = kcalloc(size, ETH_ALEN, GFP_KERNEL); + if (!addr_array) { + err = -ENOMEM; + goto out; + } + mlx5e_fill_addr_array(priv, list_type, addr_array, size); + } + + err = mlx5_modify_nic_vport_mac_list(priv->mdev, list_type, addr_array, size); +out: + if (err) + if_printf(priv->ifp, + "Failed to modify vport %s list err(%d)\n", + is_uc ? "UC" : "MC", err); + kfree(addr_array); +} + +static void mlx5e_vport_context_update(struct mlx5e_priv *priv) +{ + struct mlx5e_eth_addr_db *ea = &priv->eth_addr; + + mlx5e_vport_context_update_addr_list(priv, MLX5_NIC_VPORT_LIST_TYPE_UC); + mlx5e_vport_context_update_addr_list(priv, MLX5_NIC_VPORT_LIST_TYPE_MC); + mlx5_modify_nic_vport_promisc(priv->mdev, 0, + ea->allmulti_enabled, + ea->promisc_enabled); +} + static void mlx5e_apply_ifp_addr(struct mlx5e_priv *priv) { @@ -701,6 +831,8 @@ mlx5e_set_rx_mode_core(struct mlx5e_priv *priv) ea->promisc_enabled = promisc_enabled; ea->allmulti_enabled = allmulti_enabled; ea->broadcast_enabled = broadcast_enabled; + + mlx5e_vport_context_update(priv); } void diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c index d71cbb3..4a8029b 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c @@ -3001,6 +3001,13 @@ mlx5e_create_ifp(struct mlx5_core_dev *mdev) } mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr); + /* check if we should generate a random MAC address */ + if (MLX5_CAP_GEN(priv->mdev, vport_group_manager) == 0 && + is_zero_ether_addr(dev_addr)) { + random_ether_addr(dev_addr); + if_printf(ifp, "Assigned random MAC address\n"); + } + /* set default MTU */ mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu); diff --git a/sys/dev/mlx5/vport.h b/sys/dev/mlx5/vport.h index c5948e7..cf52785 100644 --- a/sys/dev/mlx5/vport.h +++ b/sys/dev/mlx5/vport.h @@ -38,6 +38,18 @@ int mlx5_vport_query_out_of_rx_buffer(struct mlx5_core_dev *mdev, u32 *out_of_rx_buffer); u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod); +int mlx5_arm_vport_context_events(struct mlx5_core_dev *mdev, + u8 vport, + u32 events_mask); +int mlx5_query_vport_promisc(struct mlx5_core_dev *mdev, + u32 vport, + u8 *promisc_uc, + u8 *promisc_mc, + u8 *promisc_all); +int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev, + int promisc_uc, + int promisc_mc, + int promisc_all); int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u32 vport, u8 *addr); int mlx5_set_nic_vport_current_mac(struct mlx5_core_dev *mdev, int vport, @@ -49,6 +61,22 @@ int mlx5_set_nic_vport_mc_list(struct mlx5_core_dev *mdev, int vport, int mlx5_set_nic_vport_promisc(struct mlx5_core_dev *mdev, int vport, bool promisc_mc, bool promisc_uc, bool promisc_all); +int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, + u32 vport, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int *list_size); +int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int list_size); +int mlx5_query_nic_vport_vlan_list(struct mlx5_core_dev *dev, + u32 vport, + u16 *vlan_list, + int *list_size); +int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, + u16 vlans[], + int list_size); int mlx5_set_nic_vport_permanent_mac(struct mlx5_core_dev *mdev, int vport, u8 *addr); int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev); diff --git a/sys/dev/mps/mps.c b/sys/dev/mps/mps.c index d4fbda1..2cdc39d 100644 --- a/sys/dev/mps/mps.c +++ b/sys/dev/mps/mps.c @@ -1916,9 +1916,10 @@ mps_intr_locked(void *data) */ rel_rep = (MPI2_DIAG_RELEASE_REPLY *)reply; - if (le16toh(rel_rep->IOCStatus) == + if ((le16toh(rel_rep->IOCStatus) & + MPI2_IOCSTATUS_MASK) == MPI2_IOCSTATUS_DIAGNOSTIC_RELEASED) - { + { pBuffer = &sc->fw_diag_buffer_list[ rel_rep->BufferType]; diff --git a/sys/dev/mps/mps_config.c b/sys/dev/mps/mps_config.c index e3bebaa..9bee773 100644 --- a/sys/dev/mps/mps_config.c +++ b/sys/dev/mps/mps_config.c @@ -499,7 +499,8 @@ mps_wd_config_pages(struct mps_softc *sc) */ if (mps_config_get_raid_volume_pg0(sc, &mpi_reply, raid_vol_pg0, (u32)raid_vol_pg0->DevHandle)) { - if (mpi_reply.IOCStatus != + if ((le16toh(mpi_reply.IOCStatus) & + MPI2_IOCSTATUS_MASK) != MPI2_IOCSTATUS_CONFIG_INVALID_PAGE) { mps_dprint(sc, MPS_FAULT, "Multiple RAID Volume Page0! Direct Drive " diff --git a/sys/dev/mps/mps_sas.c b/sys/dev/mps/mps_sas.c index f3ea8d3..0ab0889 100644 --- a/sys/dev/mps/mps_sas.c +++ b/sys/dev/mps/mps_sas.c @@ -241,6 +241,8 @@ mpssas_alloc_tm(struct mps_softc *sc) void mpssas_free_tm(struct mps_softc *sc, struct mps_command *tm) { + int target_id = 0xFFFFFFFF; + if (tm == NULL) return; @@ -251,10 +253,11 @@ mpssas_free_tm(struct mps_softc *sc, struct mps_command *tm) */ if (tm->cm_targ != NULL) { tm->cm_targ->flags &= ~MPSSAS_TARGET_INRESET; + target_id = tm->cm_targ->tid; } if (tm->cm_ccb) { mps_dprint(sc, MPS_INFO, "Unfreezing devq for target ID %d\n", - tm->cm_targ->tid); + target_id); xpt_release_devq(tm->cm_ccb->ccb_h.path, 1, TRUE); xpt_free_path(tm->cm_ccb->ccb_h.path); xpt_free_ccb(tm->cm_ccb); @@ -372,12 +375,11 @@ mpssas_remove_volume(struct mps_softc *sc, struct mps_command *tm) return; } - if (reply->IOCStatus != MPI2_IOCSTATUS_SUCCESS) { - mps_dprint(sc, MPS_FAULT, + if ((le16toh(reply->IOCStatus) & MPI2_IOCSTATUS_MASK) != + MPI2_IOCSTATUS_SUCCESS) { + mps_dprint(sc, MPS_ERROR, "IOCStatus = 0x%x while resetting device 0x%x\n", - reply->IOCStatus, handle); - mpssas_free_tm(sc, tm); - return; + le16toh(reply->IOCStatus), handle); } mps_dprint(sc, MPS_XINFO, @@ -394,7 +396,8 @@ mpssas_remove_volume(struct mps_softc *sc, struct mps_command *tm) * this target id if possible, and so we can assign the same target id * to this device if it comes back in the future. */ - if (reply->IOCStatus == MPI2_IOCSTATUS_SUCCESS) { + if ((le16toh(reply->IOCStatus) & MPI2_IOCSTATUS_MASK) == + MPI2_IOCSTATUS_SUCCESS) { targ = tm->cm_targ; targ->handle = 0x0; targ->encl_handle = 0x0; @@ -567,24 +570,22 @@ mpssas_remove_device(struct mps_softc *sc, struct mps_command *tm) "%s: cm_flags = %#x for remove of handle %#04x! " "This should not happen!\n", __func__, tm->cm_flags, handle); - mpssas_free_tm(sc, tm); - return; } if (reply == NULL) { /* XXX retry the remove after the diag reset completes? */ mps_dprint(sc, MPS_FAULT, - "%s NULL reply reseting device 0x%04x\n", __func__, handle); + "%s NULL reply resetting device 0x%04x\n", __func__, + handle); mpssas_free_tm(sc, tm); return; } - if (le16toh(reply->IOCStatus) != MPI2_IOCSTATUS_SUCCESS) { - mps_dprint(sc, MPS_FAULT, + if ((le16toh(reply->IOCStatus) & MPI2_IOCSTATUS_MASK) != + MPI2_IOCSTATUS_SUCCESS) { + mps_dprint(sc, MPS_ERROR, "IOCStatus = 0x%x while resetting device 0x%x\n", le16toh(reply->IOCStatus), handle); - mpssas_free_tm(sc, tm); - return; } mps_dprint(sc, MPS_XINFO, "Reset aborted %u commands\n", @@ -662,7 +663,8 @@ mpssas_remove_complete(struct mps_softc *sc, struct mps_command *tm) * this target id if possible, and so we can assign the same target id * to this device if it comes back in the future. */ - if (le16toh(reply->IOCStatus) == MPI2_IOCSTATUS_SUCCESS) { + if ((le16toh(reply->IOCStatus) & MPI2_IOCSTATUS_MASK) == + MPI2_IOCSTATUS_SUCCESS) { targ = tm->cm_targ; targ->handle = 0x0; targ->encl_handle = 0x0; @@ -880,7 +882,6 @@ mps_detach_sas(struct mps_softc *sc) cam_sim_free(sassc->sim, FALSE); } - sassc->flags |= MPSSAS_SHUTDOWN; mps_unlock(sc); if (sassc->devq != NULL) diff --git a/sys/dev/mps/mps_sas_lsi.c b/sys/dev/mps/mps_sas_lsi.c index 268383a..7cbfea5 100644 --- a/sys/dev/mps/mps_sas_lsi.c +++ b/sys/dev/mps/mps_sas_lsi.c @@ -1161,15 +1161,15 @@ mpssas_stop_unit_done(struct cam_periph *periph, union ccb *done_ccb) struct mpssas_softc *sassc; char path_str[64]; + if (done_ccb == NULL) + return; + sassc = (struct mpssas_softc *)done_ccb->ccb_h.ppriv_ptr1; xpt_path_string(done_ccb->ccb_h.path, path_str, sizeof(path_str)); mps_dprint(sassc->sc, MPS_INFO, "Completing stop unit for %s\n", path_str); - if (done_ccb == NULL) - return; - /* * Nothing more to do except free the CCB and path. If the command * timed out, an abort reset, then target reset will be issued during diff --git a/sys/dev/mps/mps_user.c b/sys/dev/mps/mps_user.c index 71a3b9b..bd749f0 100644 --- a/sys/dev/mps/mps_user.c +++ b/sys/dev/mps/mps_user.c @@ -1230,12 +1230,14 @@ mps_post_fw_diag_buffer(struct mps_softc *sc, * Process POST reply. */ reply = (MPI2_DIAG_BUFFER_POST_REPLY *)cm->cm_reply; - if (reply->IOCStatus != MPI2_IOCSTATUS_SUCCESS) { + if ((le16toh(reply->IOCStatus) & MPI2_IOCSTATUS_MASK) != + MPI2_IOCSTATUS_SUCCESS) { status = MPS_DIAG_FAILURE; mps_dprint(sc, MPS_FAULT, "%s: post of FW Diag Buffer failed " "with IOCStatus = 0x%x, IOCLogInfo = 0x%x and " - "TransferLength = 0x%x\n", __func__, reply->IOCStatus, - reply->IOCLogInfo, reply->TransferLength); + "TransferLength = 0x%x\n", __func__, + le16toh(reply->IOCStatus), le32toh(reply->IOCLogInfo), + le32toh(reply->TransferLength)); goto done; } @@ -1314,12 +1316,13 @@ mps_release_fw_diag_buffer(struct mps_softc *sc, * Process RELEASE reply. */ reply = (MPI2_DIAG_RELEASE_REPLY *)cm->cm_reply; - if ((reply->IOCStatus != MPI2_IOCSTATUS_SUCCESS) || - pBuffer->owned_by_firmware) { + if (((le16toh(reply->IOCStatus) & MPI2_IOCSTATUS_MASK) != + MPI2_IOCSTATUS_SUCCESS) || pBuffer->owned_by_firmware) { status = MPS_DIAG_FAILURE; mps_dprint(sc, MPS_FAULT, "%s: release of FW Diag Buffer " "failed with IOCStatus = 0x%x and IOCLogInfo = 0x%x\n", - __func__, reply->IOCStatus, reply->IOCLogInfo); + __func__, le16toh(reply->IOCStatus), + le32toh(reply->IOCLogInfo)); goto done; } diff --git a/sys/dev/mps/mpsvar.h b/sys/dev/mps/mpsvar.h index 0a82034..62778c0 100644 --- a/sys/dev/mps/mpsvar.h +++ b/sys/dev/mps/mpsvar.h @@ -33,7 +33,7 @@ #ifndef _MPSVAR_H #define _MPSVAR_H -#define MPS_DRIVER_VERSION "20.00.00.00-fbsd" +#define MPS_DRIVER_VERSION "21.00.00.00-fbsd" #define MPS_DB_MAX_WAIT 2500 diff --git a/sys/dev/usb/controller/xhci.c b/sys/dev/usb/controller/xhci.c index 18f871e..0b708b8 100644 --- a/sys/dev/usb/controller/xhci.c +++ b/sys/dev/usb/controller/xhci.c @@ -215,7 +215,7 @@ static void xhci_iterate_hw_softc(struct usb_bus *bus, usb_bus_mem_sub_cb_t *cb) { struct xhci_softc *sc = XHCI_BUS2SC(bus); - uint8_t i; + uint16_t i; cb(bus, &sc->sc_hw.root_pc, &sc->sc_hw.root_pg, sizeof(struct xhci_hw_root), XHCI_PAGE_SIZE); @@ -223,7 +223,7 @@ xhci_iterate_hw_softc(struct usb_bus *bus, usb_bus_mem_sub_cb_t *cb) cb(bus, &sc->sc_hw.ctx_pc, &sc->sc_hw.ctx_pg, sizeof(struct xhci_dev_ctx_addr), XHCI_PAGE_SIZE); - for (i = 0; i != XHCI_MAX_SCRATCHPADS; i++) { + for (i = 0; i != sc->sc_noscratch; i++) { cb(bus, &sc->sc_hw.scratch_pc[i], &sc->sc_hw.scratch_pg[i], XHCI_PAGE_SIZE, XHCI_PAGE_SIZE); } diff --git a/sys/dev/usb/controller/xhci.h b/sys/dev/usb/controller/xhci.h index af5b913..d4ed740 100644 --- a/sys/dev/usb/controller/xhci.h +++ b/sys/dev/usb/controller/xhci.h @@ -30,7 +30,7 @@ #define XHCI_MAX_DEVICES MIN(USB_MAX_DEVICES, 128) #define XHCI_MAX_ENDPOINTS 32 /* hardcoded - do not change */ -#define XHCI_MAX_SCRATCHPADS 32 +#define XHCI_MAX_SCRATCHPADS 1024 #define XHCI_MAX_EVENTS (16 * 13) #define XHCI_MAX_COMMANDS (16 * 1) #define XHCI_MAX_RSEG 1 @@ -495,14 +495,15 @@ struct xhci_softc { uint16_t sc_command_idx; uint16_t sc_imod_default; + /* number of scratch pages */ + uint16_t sc_noscratch; + uint8_t sc_event_ccs; uint8_t sc_command_ccs; /* number of XHCI device slots */ uint8_t sc_noslot; /* number of ports on root HUB */ uint8_t sc_noport; - /* number of scratch pages */ - uint8_t sc_noscratch; /* root HUB device configuration */ uint8_t sc_conf; /* root HUB port event bitmap, max 256 ports */ diff --git a/sys/dev/usb/controller/xhcireg.h b/sys/dev/usb/controller/xhcireg.h index a0b7397..32288c0 100644 --- a/sys/dev/usb/controller/xhcireg.h +++ b/sys/dev/usb/controller/xhcireg.h @@ -52,8 +52,8 @@ #define XHCI_HCSPARAMS2 0x08 /* RO structual parameters 2 */ #define XHCI_HCS2_IST(x) ((x) & 0xF) #define XHCI_HCS2_ERST_MAX(x) (((x) >> 4) & 0xF) -#define XHCI_HCS2_SPR(x) (((x) >> 24) & 0x1) -#define XHCI_HCS2_SPB_MAX(x) (((x) >> 27) & 0x7F) +#define XHCI_HCS2_SPR(x) (((x) >> 26) & 0x1) +#define XHCI_HCS2_SPB_MAX(x) ((((x) >> 16) & 0x3E0) | (((x) >> 27) & 0x1F)) #define XHCI_HCSPARAMS3 0x0C /* RO structual parameters 3 */ #define XHCI_HCS3_U1_DEL(x) ((x) & 0xFF) #define XHCI_HCS3_U2_DEL(x) (((x) >> 16) & 0xFFFF) diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c index c0d135d..e1a0719d 100644 --- a/sys/fs/devfs/devfs_vnops.c +++ b/sys/fs/devfs/devfs_vnops.c @@ -1325,10 +1325,10 @@ devfs_readlink(struct vop_readlink_args *ap) static int devfs_reclaim(struct vop_reclaim_args *ap) { - struct vnode *vp = ap->a_vp; + struct vnode *vp; struct devfs_dirent *de; - struct cdev *dev; + vp = ap->a_vp; mtx_lock(&devfs_de_interlock); de = vp->v_data; if (de != NULL) { @@ -1336,24 +1336,31 @@ devfs_reclaim(struct vop_reclaim_args *ap) vp->v_data = NULL; } mtx_unlock(&devfs_de_interlock); - vnode_destroy_vobject(vp); + return (0); +} + +static int +devfs_reclaim_vchr(struct vop_reclaim_args *ap) +{ + struct vnode *vp; + struct cdev *dev; + + vp = ap->a_vp; + MPASS(vp->v_type == VCHR); + + devfs_reclaim(ap); VI_LOCK(vp); dev_lock(); dev = vp->v_rdev; vp->v_rdev = NULL; - - if (dev == NULL) { - dev_unlock(); - VI_UNLOCK(vp); - return (0); - } - - dev->si_usecount -= vp->v_usecount; + if (dev != NULL) + dev->si_usecount -= vp->v_usecount; dev_unlock(); VI_UNLOCK(vp); - dev_rel(dev); + if (dev != NULL) + dev_rel(dev); return (0); } @@ -1791,7 +1798,7 @@ static struct vop_vector devfs_specops = { .vop_readdir = VOP_PANIC, .vop_readlink = VOP_PANIC, .vop_reallocblks = VOP_PANIC, - .vop_reclaim = devfs_reclaim, + .vop_reclaim = devfs_reclaim_vchr, .vop_remove = devfs_remove, .vop_rename = VOP_PANIC, .vop_revoke = devfs_revoke, diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c index ac0d179..faff121 100644 --- a/sys/fs/nfsclient/nfs_clvnops.c +++ b/sys/fs/nfsclient/nfs_clvnops.c @@ -3098,10 +3098,14 @@ nfs_advlock(struct vop_advlock_args *ap) } } if (error == 0 && ap->a_op == F_SETLK) { - /* Mark that a file lock has been acquired. */ - mtx_lock(&np->n_mtx); - np->n_flag |= NHASBEENLOCKED; - mtx_unlock(&np->n_mtx); + error = NFSVOPLOCK(vp, LK_SHARED); + if (error == 0) { + /* Mark that a file lock has been acquired. */ + mtx_lock(&np->n_mtx); + np->n_flag |= NHASBEENLOCKED; + mtx_unlock(&np->n_mtx); + NFSVOPUNLOCK(vp, 0); + } } } return (error); diff --git a/sys/i386/include/apicvar.h b/sys/i386/include/apicvar.h index afb8639..425601e 100644 --- a/sys/i386/include/apicvar.h +++ b/sys/i386/include/apicvar.h @@ -215,7 +215,6 @@ int lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, void lapic_set_tpr(u_int vector); void lapic_setup(int boot); void xen_intr_handle_upcall(struct trapframe *frame); -void hv_vector_handler(struct trapframe *frame); #endif /* !LOCORE */ #endif /* _MACHINE_APICVAR_H_ */ diff --git a/sys/i386/include/atomic.h b/sys/i386/include/atomic.h index 0156b5b..6e71b05 100644 --- a/sys/i386/include/atomic.h +++ b/sys/i386/include/atomic.h @@ -86,6 +86,7 @@ void atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v) int atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src); u_int atomic_fetchadd_int(volatile u_int *p, u_int v); int atomic_testandset_int(volatile u_int *p, u_int v); +int atomic_testandclear_int(volatile u_int *p, u_int v); #define ATOMIC_LOAD(TYPE, LOP) \ u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p) @@ -224,6 +225,23 @@ atomic_testandset_int(volatile u_int *p, u_int v) return (res); } +static __inline int +atomic_testandclear_int(volatile u_int *p, u_int v) +{ + u_char res; + + __asm __volatile( + " " MPLOCKED " " + " btrl %2,%1 ; " + " setc %0 ; " + "# atomic_testandclear_int" + : "=q" (res), /* 0 */ + "+m" (*p) /* 1 */ + : "Ir" (v & 0x1f) /* 2 */ + : "cc"); + return (res); +} + /* * We assume that a = b will do atomic loads and stores. Due to the * IA32 memory model, a simple store guarantees release semantics. @@ -549,6 +567,13 @@ atomic_testandset_long(volatile u_long *p, u_int v) return (atomic_testandset_int((volatile u_int *)p, v)); } +static __inline int +atomic_testandclear_long(volatile u_long *p, u_int v) +{ + + return (atomic_testandclear_int((volatile u_int *)p, v)); +} + /* Read the current value and store a new value in the destination. */ #ifdef __GNUCLIKE_ASM @@ -675,6 +700,7 @@ u_long atomic_swap_long(volatile u_long *p, u_long v); #define atomic_readandclear_32 atomic_readandclear_int #define atomic_fetchadd_32 atomic_fetchadd_int #define atomic_testandset_32 atomic_testandset_int +#define atomic_testandclear_32 atomic_testandclear_int /* Operations on pointers. */ #define atomic_set_ptr(p, v) \ diff --git a/sys/kern/bus_if.m b/sys/kern/bus_if.m index af39036..f1fd64b 100644 --- a/sys/kern/bus_if.m +++ b/sys/kern/bus_if.m @@ -121,7 +121,7 @@ METHOD void probe_nomatch { * @param _child the child device whose instance variable is * being read * @param _index the instance variable to read - * @param _result a loction to recieve the instance variable + * @param _result a location to receive the instance variable * value * * @retval 0 success @@ -374,7 +374,7 @@ METHOD int release_resource { * triggers * @param _arg a value to use as the single argument in calls * to @p _intr - * @param _cookiep a pointer to a location to recieve a cookie + * @param _cookiep a pointer to a location to receive a cookie * value that may be used to remove the interrupt * handler */ @@ -445,9 +445,9 @@ METHOD int set_resource { * @param _child the device which owns the resource * @param _type the type of resource * @param _rid the resource identifier - * @param _start the address of a location to recieve the start + * @param _start the address of a location to receive the start * index of the resource range - * @param _count the address of a location to recieve the size + * @param _count the address of a location to receive the size * of the resource range */ METHOD int get_resource { diff --git a/sys/kern/imgact_binmisc.c b/sys/kern/imgact_binmisc.c index 5712838..0fceb13 100644 --- a/sys/kern/imgact_binmisc.c +++ b/sys/kern/imgact_binmisc.c @@ -708,7 +708,7 @@ imgact_binmisc_exec(struct image_params *imgp) break; case ' ': - /* Replace space with NUL to seperate arguments. */ + /* Replace space with NUL to separate arguments. */ *d++ = '\0'; break; diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index e3c08de..1e237c2 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -1325,10 +1325,6 @@ __elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags) * and write it out following the notes. */ hdr = malloc(hdrsize, M_TEMP, M_WAITOK); - if (hdr == NULL) { - error = EINVAL; - goto done; - } error = __elfN(corehdr)(td, vp, cred, seginfo.count, hdr, hdrsize, ¬elst, notesz, gzfile); diff --git a/sys/kern/inflate.c b/sys/kern/inflate.c index 383ebc4..8fde5cb 100644 --- a/sys/kern/inflate.c +++ b/sys/kern/inflate.c @@ -206,7 +206,7 @@ extern void kzipfree (void*); end-of-block. Note however that the static length tree defines 288 codes just to fill out the Huffman codes. Codes 286 and 287 cannot be used though, since there is no length base or extra bits - defined for them. Similarily, there are up to 30 distance codes. + defined for them. Similarly, there are up to 30 distance codes. However, static trees define 32 codes (all 5 bits) to fill out the Huffman codes, but the last two had better not show up in the data. 7. Unzip can check dynamic Huffman blocks for complete code sets. @@ -335,7 +335,7 @@ static const ush mask[] = { where NEEDBITS makes sure that b has at least j bits in it, and DUMPBITS removes the bits from b. The macros use the variable k for the number of bits in b. Normally, b and k are register - variables for speed, and are initialized at the begining of a + variables for speed, and are initialized at the beginning of a routine that uses these macros from a global bit buffer and count. In order to not ask for more bits than there are in the compressed diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index ae6bd3a..03a3d9e 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -437,6 +437,7 @@ proc0_init(void *dummy __unused) { struct proc *p; struct thread *td; + struct ucred *newcred; vm_paddr_t pageablemem; int i; @@ -513,19 +514,20 @@ proc0_init(void *dummy __unused) callout_init(&td->td_slpcallout, CALLOUT_MPSAFE); /* Create credentials. */ - p->p_ucred = crget(); - p->p_ucred->cr_ngroups = 1; /* group 0 */ - p->p_ucred->cr_uidinfo = uifind(0); - p->p_ucred->cr_ruidinfo = uifind(0); - p->p_ucred->cr_prison = &prison0; - p->p_ucred->cr_loginclass = loginclass_find("default"); + newcred = crget(); + newcred->cr_ngroups = 1; /* group 0 */ + newcred->cr_uidinfo = uifind(0); + newcred->cr_ruidinfo = uifind(0); + newcred->cr_prison = &prison0; + newcred->cr_loginclass = loginclass_find("default"); + proc_set_cred(p, newcred); #ifdef AUDIT - audit_cred_kproc0(p->p_ucred); + audit_cred_kproc0(newcred); #endif #ifdef MAC - mac_cred_create_swapper(p->p_ucred); + mac_cred_create_swapper(newcred); #endif - td->td_ucred = crhold(p->p_ucred); + td->td_ucred = crhold(newcred); /* Create sigacts. */ p->p_sigacts = sigacts_alloc(); @@ -844,7 +846,7 @@ create_init(const void *udata __unused) #ifdef AUDIT audit_cred_proc1(newcred); #endif - initproc->p_ucred = newcred; + proc_set_cred(initproc, newcred); PROC_UNLOCK(initproc); sx_xunlock(&proctree_lock); crfree(oldcred); diff --git a/sys/kern/kern_condvar.c b/sys/kern/kern_condvar.c index 95a6d09..60895c9 100644 --- a/sys/kern/kern_condvar.c +++ b/sys/kern/kern_condvar.c @@ -163,7 +163,7 @@ _cv_wait(struct cv *cvp, struct lock_object *lock) /* * Wait on a condition variable. This function differs from cv_wait by - * not aquiring the mutex after condition variable was signaled. + * not acquiring the mutex after condition variable was signaled. */ void _cv_wait_unlock(struct cv *cvp, struct lock_object *lock) diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 7561368..24381a9 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -1525,7 +1525,7 @@ fdgrowtable_exp(struct filedesc *fdp, int nfd) } /* - * Grow the file table to accomodate (at least) nfd descriptors. + * Grow the file table to accommodate (at least) nfd descriptors. */ static void fdgrowtable(struct filedesc *fdp, int nfd) @@ -1730,7 +1730,7 @@ fdavail(struct thread *td, int n) } /* - * Create a new open file structure and allocate a file decriptor for the + * Create a new open file structure and allocate a file descriptor for the * process that refers to it. We add one reference to the file for the * descriptor table and one reference for resultfp. This is to prevent us * being preempted and the entry in the descriptor table closed after we @@ -2448,7 +2448,7 @@ fget_unlocked(struct filedesc *fdp, int fd, cap_rights_t *needrightsp, * * File's rights will be checked against the capability rights mask. * - * If an error occured the non-zero error is returned and *fpp is set to + * If an error occurred the non-zero error is returned and *fpp is set to * NULL. Otherwise *fpp is held and set and zero is returned. Caller is * responsible for fdrop(). */ diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 47ea9b0..eeb8369 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -96,9 +96,9 @@ dtrace_execexit_func_t dtrace_fasttrap_exec; #endif SDT_PROVIDER_DECLARE(proc); -SDT_PROBE_DEFINE1(proc, kernel, , exec, "char *"); -SDT_PROBE_DEFINE1(proc, kernel, , exec__failure, "int"); -SDT_PROBE_DEFINE1(proc, kernel, , exec__success, "char *"); +SDT_PROBE_DEFINE1(proc, , , exec, "char *"); +SDT_PROBE_DEFINE1(proc, , , exec__failure, "int"); +SDT_PROBE_DEFINE1(proc, , , exec__success, "char *"); MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments"); @@ -359,7 +359,7 @@ do_execve(td, args, mac_p) { struct proc *p = td->td_proc; struct nameidata nd; - struct ucred *newcred = NULL, *oldcred; + struct ucred *oldcred; struct uidinfo *euip = NULL; register_t *stack_base; int error, i; @@ -367,12 +367,12 @@ do_execve(td, args, mac_p) struct vattr attr; int (*img_first)(struct image_params *); struct pargs *oldargs = NULL, *newargs = NULL; - struct sigacts *oldsigacts, *newsigacts; + struct sigacts *oldsigacts = NULL, *newsigacts = NULL; #ifdef KTRACE struct vnode *tracevp = NULL; struct ucred *tracecred = NULL; #endif - struct vnode *textvp = NULL, *binvp = NULL; + struct vnode *oldtextvp = NULL, *newtextvp; cap_rights_t rights; int credential_changing; int textset; @@ -407,6 +407,7 @@ do_execve(td, args, mac_p) imgp->proc = p; imgp->attr = &attr; imgp->args = args; + oldcred = p->p_ucred; #ifdef MAC error = mac_execve_enter(imgp, mac_p); @@ -416,7 +417,7 @@ do_execve(td, args, mac_p) /* * Translate the file name. namei() returns a vnode pointer - * in ni_vp amoung other things. + * in ni_vp among other things. * * XXXAUDIT: It would be desirable to also audit the name of the * interpreter if this is an interpreted binary. @@ -426,7 +427,7 @@ do_execve(td, args, mac_p) | AUDITVNODE1, UIO_SYSSPACE, args->fname, td); } - SDT_PROBE1(proc, kernel, , exec, args->fname); + SDT_PROBE1(proc, , , exec, args->fname); interpret: if (args->fname != NULL) { @@ -446,20 +447,20 @@ interpret: if (error) goto exec_fail; - binvp = nd.ni_vp; - imgp->vp = binvp; + newtextvp = nd.ni_vp; + imgp->vp = newtextvp; } else { AUDIT_ARG_FD(args->fd); /* * Descriptors opened only with O_EXEC or O_RDONLY are allowed. */ error = fgetvp_exec(td, args->fd, - cap_rights_init(&rights, CAP_FEXECVE), &binvp); + cap_rights_init(&rights, CAP_FEXECVE), &newtextvp); if (error) goto exec_fail; - vn_lock(binvp, LK_EXCLUSIVE | LK_RETRY); - AUDIT_ARG_VNODE1(binvp); - imgp->vp = binvp; + vn_lock(newtextvp, LK_EXCLUSIVE | LK_RETRY); + AUDIT_ARG_VNODE1(newtextvp); + imgp->vp = newtextvp; } /* @@ -488,6 +489,100 @@ interpret: goto exec_fail_dealloc; imgp->proc->p_osrel = 0; + + /* + * Implement image setuid/setgid. + * + * Determine new credentials before attempting image activators + * so that it can be used by process_exec handlers to determine + * credential/setid changes. + * + * Don't honor setuid/setgid if the filesystem prohibits it or if + * the process is being traced. + * + * We disable setuid/setgid/etc in capability mode on the basis + * that most setugid applications are not written with that + * environment in mind, and will therefore almost certainly operate + * incorrectly. In principle there's no reason that setugid + * applications might not be useful in capability mode, so we may want + * to reconsider this conservative design choice in the future. + * + * XXXMAC: For the time being, use NOSUID to also prohibit + * transitions on the file system. + */ + credential_changing = 0; + credential_changing |= (attr.va_mode & S_ISUID) && + oldcred->cr_uid != attr.va_uid; + credential_changing |= (attr.va_mode & S_ISGID) && + oldcred->cr_gid != attr.va_gid; +#ifdef MAC + will_transition = mac_vnode_execve_will_transition(oldcred, imgp->vp, + interpvplabel, imgp); + credential_changing |= will_transition; +#endif + + if (credential_changing && +#ifdef CAPABILITY_MODE + ((oldcred->cr_flags & CRED_FLAG_CAPMODE) == 0) && +#endif + (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 && + (p->p_flag & P_TRACED) == 0) { + imgp->credential_setid = true; + VOP_UNLOCK(imgp->vp, 0); + imgp->newcred = crdup(oldcred); + if (attr.va_mode & S_ISUID) { + euip = uifind(attr.va_uid); + change_euid(imgp->newcred, euip); + } + vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); + if (attr.va_mode & S_ISGID) + change_egid(imgp->newcred, attr.va_gid); + /* + * Implement correct POSIX saved-id behavior. + * + * XXXMAC: Note that the current logic will save the + * uid and gid if a MAC domain transition occurs, even + * though maybe it shouldn't. + */ + change_svuid(imgp->newcred, imgp->newcred->cr_uid); + change_svgid(imgp->newcred, imgp->newcred->cr_gid); + } else { + /* + * Implement correct POSIX saved-id behavior. + * + * XXX: It's not clear that the existing behavior is + * POSIX-compliant. A number of sources indicate that the + * saved uid/gid should only be updated if the new ruid is + * not equal to the old ruid, or the new euid is not equal + * to the old euid and the new euid is not equal to the old + * ruid. The FreeBSD code always updates the saved uid/gid. + * Also, this code uses the new (replaced) euid and egid as + * the source, which may or may not be the right ones to use. + */ + if (oldcred->cr_svuid != oldcred->cr_uid || + oldcred->cr_svgid != oldcred->cr_gid) { + VOP_UNLOCK(imgp->vp, 0); + imgp->newcred = crdup(oldcred); + vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); + change_svuid(imgp->newcred, imgp->newcred->cr_uid); + change_svgid(imgp->newcred, imgp->newcred->cr_gid); + } + } + /* The new credentials are installed into the process later. */ + + /* + * Do the best to calculate the full path to the image file. + */ + if (args->fname != NULL && args->fname[0] == '/') + imgp->execpath = args->fname; + else { + VOP_UNLOCK(imgp->vp, 0); + if (vn_fullpath(td, imgp->vp, &imgp->execpath, + &imgp->freepath) != 0) + imgp->execpath = args->fname; + vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); + } + /* * If the current process has a special image activator it * wants to try first, call it. For example, emulating shell @@ -536,15 +631,23 @@ interpret: if (args->fname != NULL) NDFREE(&nd, NDF_ONLY_PNBUF); #ifdef MAC - mac_execve_interpreter_enter(binvp, &interpvplabel); + mac_execve_interpreter_enter(newtextvp, &interpvplabel); #endif if (imgp->opened) { - VOP_CLOSE(binvp, FREAD, td->td_ucred, td); + VOP_CLOSE(newtextvp, FREAD, td->td_ucred, td); imgp->opened = 0; } - vput(binvp); + vput(newtextvp); vm_object_deallocate(imgp->object); imgp->object = NULL; + imgp->credential_setid = false; + if (imgp->newcred != NULL) { + crfree(imgp->newcred); + imgp->newcred = NULL; + } + imgp->execpath = NULL; + free(imgp->freepath, M_TEMP); + imgp->freepath = NULL; /* set new name to that of the interpreter */ NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME, UIO_SYSSPACE, imgp->interpreter_name, td); @@ -558,14 +661,6 @@ interpret: */ VOP_UNLOCK(imgp->vp, 0); - /* - * Do the best to calculate the full path to the image file. - */ - if (imgp->auxargs != NULL && - ((args->fname != NULL && args->fname[0] == '/') || - vn_fullpath(td, imgp->vp, &imgp->execpath, &imgp->freepath) != 0)) - imgp->execpath = args->fname; - if (disallow_high_osrel && P_OSREL_MAJOR(p->p_osrel) > P_OSREL_MAJOR(__FreeBSD_version)) { error = ENOEXEC; @@ -611,11 +706,6 @@ interpret: bcopy(imgp->args->begin_argv, newargs->ar_args, i); } - vn_lock(imgp->vp, LK_SHARED | LK_RETRY); - - /* Get a reference to the vnode prior to locking the proc */ - VREF(binvp); - /* * For security and other reasons, signal handlers cannot * be shared after an exec. The new process gets a copy of the old @@ -626,15 +716,13 @@ interpret: oldsigacts = p->p_sigacts; newsigacts = sigacts_alloc(); sigacts_copy(newsigacts, oldsigacts); - } else { - oldsigacts = NULL; - newsigacts = NULL; /* satisfy gcc */ } + vn_lock(imgp->vp, LK_SHARED | LK_RETRY); + PROC_LOCK(p); if (oldsigacts) p->p_sigacts = newsigacts; - oldcred = p->p_ucred; /* Stop profiling */ stopprofclock(p); @@ -646,7 +734,7 @@ interpret: if (args->fname) bcopy(nd.ni_cnd.cn_nameptr, p->p_comm, min(nd.ni_cnd.cn_namelen, MAXCOMLEN)); - else if (vn_commname(binvp, p->p_comm, sizeof(p->p_comm)) != 0) + else if (vn_commname(newtextvp, p->p_comm, sizeof(p->p_comm)) != 0) bcopy(fexecv_proc_title, p->p_comm, sizeof(fexecv_proc_title)); bcopy(p->p_comm, td->td_name, sizeof(td->td_name)); #ifdef KTR @@ -666,38 +754,9 @@ interpret: } /* - * Implement image setuid/setgid. - * - * Don't honor setuid/setgid if the filesystem prohibits it or if - * the process is being traced. - * - * We disable setuid/setgid/etc in compatibility mode on the basis - * that most setugid applications are not written with that - * environment in mind, and will therefore almost certainly operate - * incorrectly. In principle there's no reason that setugid - * applications might not be useful in capability mode, so we may want - * to reconsider this conservative design choice in the future. - * - * XXXMAC: For the time being, use NOSUID to also prohibit - * transitions on the file system. + * Implement image setuid/setgid installation. */ - credential_changing = 0; - credential_changing |= (attr.va_mode & S_ISUID) && oldcred->cr_uid != - attr.va_uid; - credential_changing |= (attr.va_mode & S_ISGID) && oldcred->cr_gid != - attr.va_gid; -#ifdef MAC - will_transition = mac_vnode_execve_will_transition(oldcred, imgp->vp, - interpvplabel, imgp); - credential_changing |= will_transition; -#endif - - if (credential_changing && -#ifdef CAPABILITY_MODE - ((oldcred->cr_flags & CRED_FLAG_CAPMODE) == 0) && -#endif - (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 && - (p->p_flag & P_TRACED) == 0) { + if (imgp->credential_setid) { /* * Turn off syscall tracing for set-id programs, except for * root. Record any set-id flags first to make sure that @@ -723,70 +782,36 @@ interpret: VOP_UNLOCK(imgp->vp, 0); setugidsafety(td); error = fdcheckstd(td); - if (error != 0) - goto done1; - newcred = crdup(oldcred); - euip = uifind(attr.va_uid); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); + if (error != 0) + goto exec_fail_dealloc; PROC_LOCK(p); - /* - * Set the new credentials. - */ - if (attr.va_mode & S_ISUID) - change_euid(newcred, euip); - if (attr.va_mode & S_ISGID) - change_egid(newcred, attr.va_gid); #ifdef MAC if (will_transition) { - mac_vnode_execve_transition(oldcred, newcred, imgp->vp, - interpvplabel, imgp); + mac_vnode_execve_transition(oldcred, imgp->newcred, + imgp->vp, interpvplabel, imgp); } #endif - /* - * Implement correct POSIX saved-id behavior. - * - * XXXMAC: Note that the current logic will save the - * uid and gid if a MAC domain transition occurs, even - * though maybe it shouldn't. - */ - change_svuid(newcred, newcred->cr_uid); - change_svgid(newcred, newcred->cr_gid); - p->p_ucred = newcred; } else { if (oldcred->cr_uid == oldcred->cr_ruid && oldcred->cr_gid == oldcred->cr_rgid) p->p_flag &= ~P_SUGID; - /* - * Implement correct POSIX saved-id behavior. - * - * XXX: It's not clear that the existing behavior is - * POSIX-compliant. A number of sources indicate that the - * saved uid/gid should only be updated if the new ruid is - * not equal to the old ruid, or the new euid is not equal - * to the old euid and the new euid is not equal to the old - * ruid. The FreeBSD code always updates the saved uid/gid. - * Also, this code uses the new (replaced) euid and egid as - * the source, which may or may not be the right ones to use. - */ - if (oldcred->cr_svuid != oldcred->cr_uid || - oldcred->cr_svgid != oldcred->cr_gid) { - PROC_UNLOCK(p); - VOP_UNLOCK(imgp->vp, 0); - newcred = crdup(oldcred); - vn_lock(imgp->vp, LK_SHARED | LK_RETRY); - PROC_LOCK(p); - change_svuid(newcred, newcred->cr_uid); - change_svgid(newcred, newcred->cr_gid); - p->p_ucred = newcred; - } + } + /* + * Set the new credentials. + */ + if (imgp->newcred != NULL) { + proc_set_cred(p, imgp->newcred); + crfree(oldcred); + oldcred = NULL; } /* - * Store the vp for use in procfs. This vnode was referenced prior - * to locking the proc lock. + * Store the vp for use in procfs. This vnode was referenced by namei + * or fgetvp_exec. */ - textvp = p->p_textvp; - p->p_textvp = binvp; + oldtextvp = p->p_textvp; + p->p_textvp = newtextvp; #ifdef KDTRACE_HOOKS /* @@ -848,42 +873,9 @@ interpret: vfs_mark_atime(imgp->vp, td->td_ucred); - SDT_PROBE1(proc, kernel, , exec__success, args->fname); - - VOP_UNLOCK(imgp->vp, 0); -done1: - /* - * Free any resources malloc'd earlier that we didn't use. - */ - if (euip != NULL) - uifree(euip); - if (newcred != NULL) - crfree(oldcred); - - /* - * Handle deferred decrement of ref counts. - */ - if (textvp != NULL) - vrele(textvp); - if (binvp && error != 0) - vrele(binvp); -#ifdef KTRACE - if (tracevp != NULL) - vrele(tracevp); - if (tracecred != NULL) - crfree(tracecred); -#endif - vn_lock(imgp->vp, LK_SHARED | LK_RETRY); - pargs_drop(oldargs); - pargs_drop(newargs); - if (oldsigacts != NULL) - sigacts_free(oldsigacts); + SDT_PROBE1(proc, , , exec__success, args->fname); exec_fail_dealloc: - - /* - * free various allocated resources - */ if (imgp->firstpage != NULL) exec_unmap_first_page(imgp); @@ -892,7 +884,10 @@ exec_fail_dealloc: NDFREE(&nd, NDF_ONLY_PNBUF); if (imgp->opened) VOP_CLOSE(imgp->vp, FREAD, td->td_ucred, td); - vput(imgp->vp); + if (error != 0) + vput(imgp->vp); + else + VOP_UNLOCK(imgp->vp, 0); } if (imgp->object != NULL) @@ -910,24 +905,43 @@ exec_fail_dealloc: * the S_EXEC bit set. */ STOPEVENT(p, S_EXEC, 0); - goto done2; - } - + } else { exec_fail: - /* we're done here, clear P_INEXEC */ - PROC_LOCK(p); - p->p_flag &= ~P_INEXEC; - PROC_UNLOCK(p); + /* we're done here, clear P_INEXEC */ + PROC_LOCK(p); + p->p_flag &= ~P_INEXEC; + PROC_UNLOCK(p); - SDT_PROBE1(proc, kernel, , exec__failure, error); + SDT_PROBE1(proc, , , exec__failure, error); + } + + if (imgp->newcred != NULL && oldcred != NULL) + crfree(imgp->newcred); -done2: #ifdef MAC mac_execve_exit(imgp); mac_execve_interpreter_exit(interpvplabel); #endif exec_free_args(args); + /* + * Handle deferred decrement of ref counts. + */ + if (oldtextvp != NULL) + vrele(oldtextvp); +#ifdef KTRACE + if (tracevp != NULL) + vrele(tracevp); + if (tracecred != NULL) + crfree(tracecred); +#endif + pargs_drop(oldargs); + pargs_drop(newargs); + if (oldsigacts != NULL) + sigacts_free(oldsigacts); + if (euip != NULL) + uifree(euip); + if (error && imgp->vmspace_destroyed) { /* sorry, no more process anymore. exit gracefully */ exit1(td, W_EXITCODE(0, SIGABRT)); @@ -1500,8 +1514,6 @@ exec_register(execsw_arg) for (es = execsw; *es; es++) count++; newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); - if (newexecsw == NULL) - return (ENOMEM); xs = newexecsw; if (execsw) for (es = execsw; *es; es++) @@ -1534,8 +1546,6 @@ exec_unregister(execsw_arg) if (*es != execsw_arg) count++; newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); - if (newexecsw == NULL) - return (ENOMEM); xs = newexecsw; for (es = execsw; *es; es++) if (*es != execsw_arg) diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 76b4427..f9244af 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -95,7 +95,7 @@ dtrace_execexit_func_t dtrace_fasttrap_exit; #endif SDT_PROVIDER_DECLARE(proc); -SDT_PROBE_DEFINE1(proc, kernel, , exit, "int"); +SDT_PROBE_DEFINE1(proc, , , exit, "int"); /* Hook for NFS teardown procedure. */ void (*nlminfo_release_p)(struct proc *p); @@ -564,7 +564,7 @@ exit1(struct thread *td, int rv) reason = CLD_DUMPED; else if (WIFSIGNALED(rv)) reason = CLD_KILLED; - SDT_PROBE1(proc, kernel, , exit, reason); + SDT_PROBE1(proc, , , exit, reason); #endif /* @@ -937,7 +937,7 @@ proc_reap(struct thread *td, struct proc *p, int *status, int options) * Free credentials, arguments, and sigacts. */ crfree(p->p_ucred); - p->p_ucred = NULL; + proc_set_cred(p, NULL); pargs_drop(p->p_args); p->p_args = NULL; sigacts_free(p->p_sigacts); diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index f24ba20..d50db75 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -90,8 +90,7 @@ dtrace_fork_func_t dtrace_fasttrap_fork; #endif SDT_PROVIDER_DECLARE(proc); -SDT_PROBE_DEFINE3(proc, kernel, , create, "struct proc *", - "struct proc *", "int"); +SDT_PROBE_DEFINE3(proc, , , create, "struct proc *", "struct proc *", "int"); #ifndef _SYS_SYSPROTO_H_ struct fork_args { @@ -411,8 +410,10 @@ do_fork(struct thread *td, int flags, struct proc *p2, struct thread *td2, bzero(&p2->p_startzero, __rangeof(struct proc, p_startzero, p_endzero)); p2->p_treeflag = 0; + p2->p_filemon = NULL; - p2->p_ucred = crhold(td->td_ucred); + crhold(td->td_ucred); + proc_set_cred(p2, td->td_ucred); /* Tell the prison that we exist. */ prison_proc_hold(p2->p_ucred->cr_prison); @@ -753,7 +754,7 @@ do_fork(struct thread *td, int flags, struct proc *p2, struct thread *td2, * Tell any interested parties about the new process. */ knote_fork(&p1->p_klist, p2->p_pid); - SDT_PROBE3(proc, kernel, , create, p2, p1, flags); + SDT_PROBE3(proc, , , create, p2, p1, flags); /* * Wait until debugger is attached to child. @@ -898,7 +899,7 @@ fork1(struct thread *td, int flags, int pages, struct proc **procp, /* * The swap reservation failed. The accounting * from the entries of the copied vm2 will be - * substracted in vmspace_free(), so force the + * subtracted in vmspace_free(), so force the * reservation there. */ swap_reserve_force(mem_charged); @@ -912,7 +913,7 @@ fork1(struct thread *td, int flags, int pages, struct proc **procp, * XXX: This is ugly; when we copy resource usage, we need to bump * per-cred resource counters. */ - newproc->p_ucred = p1->p_ucred; + proc_set_cred(newproc, p1->p_ucred); /* * Initialize resource accounting for the child process. diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c index a0e4c61..f0a046e 100644 --- a/sys/kern/kern_jail.c +++ b/sys/kern/kern_jail.c @@ -2476,7 +2476,7 @@ do_jail_attach(struct thread *td, struct prison *pr) PROC_LOCK(p); oldcred = crcopysafe(p, newcred); newcred->cr_prison = pr; - p->p_ucred = newcred; + proc_set_cred(p, newcred); setsugid(p); PROC_UNLOCK(p); #ifdef RACCT @@ -4072,7 +4072,7 @@ prison_priv_check(struct ucred *cred, int priv) return (0); /* - * Allow jailed root to set certian IPv4/6 (option) headers. + * Allow jailed root to set certain IPv4/6 (option) headers. */ case PRIV_NETINET_SETHDROPTS: return (0); @@ -4313,7 +4313,7 @@ SYSCTL_UINT(_security_jail, OID_AUTO, jail_max_af_ips, CTLFLAG_RW, #endif /* - * Default parameters for jail(2) compatability. For historical reasons, + * Default parameters for jail(2) compatibility. For historical reasons, * the sysctl names have varying similarity to the parameter names. Prisons * just see their own parameters, and can't change them. */ diff --git a/sys/kern/kern_linker.c b/sys/kern/kern_linker.c index 78eab87..ca8c10a 100644 --- a/sys/kern/kern_linker.c +++ b/sys/kern/kern_linker.c @@ -948,7 +948,7 @@ linker_debug_search_symbol_name(caddr_t value, char *buf, u_int buflen, * * Note that we do not obey list locking protocols here. We really don't need * DDB to hang because somebody's got the lock held. We'll take the chance - * that the files list is inconsistant instead. + * that the files list is inconsistent instead. */ #ifdef DDB int @@ -1762,8 +1762,6 @@ linker_hints_lookup(const char *path, int pathlen, const char *modname, goto bad; } hints = malloc(vattr.va_size, M_TEMP, M_WAITOK); - if (hints == NULL) - goto bad; error = vn_rdwr(UIO_READ, nd.ni_vp, (caddr_t)hints, vattr.va_size, 0, UIO_SYSSPACE, IO_NODELOCKED, cred, NOCRED, &reclen, td); if (error) @@ -2038,7 +2036,7 @@ linker_load_dependencies(linker_file_t lf) int ver, error = 0, count; /* - * All files are dependant on /kernel. + * All files are dependent on /kernel. */ sx_assert(&kld_sx, SA_XLOCKED); if (linker_kernel_file) { diff --git a/sys/kern/kern_lock.c b/sys/kern/kern_lock.c index ca2a359..e3e946e 100644 --- a/sys/kern/kern_lock.c +++ b/sys/kern/kern_lock.c @@ -286,7 +286,7 @@ wakeupshlk(struct lock *lk, const char *file, int line) * exclusive waiters bit anyway. * Please note that lk_exslpfail count may be lying about * the real number of waiters with the LK_SLEEPFAIL flag on - * because they may be used in conjuction with interruptible + * because they may be used in conjunction with interruptible * sleeps so lk_exslpfail might be considered an 'upper limit' * bound, including the edge cases. */ @@ -1063,7 +1063,7 @@ __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk, * Please note that lk_exslpfail count may be lying * about the real number of waiters with the * LK_SLEEPFAIL flag on because they may be used in - * conjuction with interruptible sleeps so + * conjunction with interruptible sleeps so * lk_exslpfail might be considered an 'upper limit' * bound, including the edge cases. */ @@ -1176,7 +1176,7 @@ __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk, * Please note that lk_exslpfail count may be * lying about the real number of waiters with * the LK_SLEEPFAIL flag on because they may - * be used in conjuction with interruptible + * be used in conjunction with interruptible * sleeps so lk_exslpfail might be considered * an 'upper limit' bound, including the edge * cases. diff --git a/sys/kern/kern_lockf.c b/sys/kern/kern_lockf.c index 91eae09..a0a3789 100644 --- a/sys/kern/kern_lockf.c +++ b/sys/kern/kern_lockf.c @@ -362,7 +362,7 @@ lf_free_lock(struct lockf_entry *lock) struct lock_owner *lo = lock->lf_owner; if (lo) { KASSERT(LIST_EMPTY(&lock->lf_outedges), - ("freeing lock with dependancies")); + ("freeing lock with dependencies")); KASSERT(LIST_EMPTY(&lock->lf_inedges), ("freeing lock with dependants")); sx_xlock(&lf_lock_owners_lock); @@ -827,7 +827,7 @@ lf_purgelocks(struct vnode *vp, struct lockf **statep) /* * We can just free all the active locks since they - * will have no dependancies (we removed them all + * will have no dependencies (we removed them all * above). We don't need to bother locking since we * are the last thread using this state structure. */ @@ -1112,7 +1112,7 @@ lf_insert_lock(struct lockf *state, struct lockf_entry *lock) /* * Wake up a sleeping lock and remove it from the pending list now - * that all its dependancies have been resolved. The caller should + * that all its dependencies have been resolved. The caller should * arrange for the lock to be added to the active list, adjusting any * existing locks for the same owner as needed. */ @@ -1137,9 +1137,9 @@ lf_wakeup_lock(struct lockf *state, struct lockf_entry *wakelock) } /* - * Re-check all dependant locks and remove edges to locks that we no + * Re-check all dependent locks and remove edges to locks that we no * longer block. If 'all' is non-zero, the lock has been removed and - * we must remove all the dependancies, otherwise it has simply been + * we must remove all the dependencies, otherwise it has simply been * reduced but remains active. Any pending locks which have been been * unblocked are added to 'granted' */ @@ -1165,7 +1165,7 @@ lf_update_dependancies(struct lockf *state, struct lockf_entry *lock, int all, } /* - * Set the start of an existing active lock, updating dependancies and + * Set the start of an existing active lock, updating dependencies and * adding any newly woken locks to 'granted'. */ static void @@ -1181,7 +1181,7 @@ lf_set_start(struct lockf *state, struct lockf_entry *lock, off_t new_start, } /* - * Set the end of an existing active lock, updating dependancies and + * Set the end of an existing active lock, updating dependencies and * adding any newly woken locks to 'granted'. */ static void @@ -1204,7 +1204,7 @@ lf_set_end(struct lockf *state, struct lockf_entry *lock, off_t new_end, * pending locks as a result of downgrading/unlocking. We simply * activate the newly granted locks by looping. * - * Since the new lock already has its dependancies set up, we always + * Since the new lock already has its dependencies set up, we always * add it to the list (unless its an unlock request). This may * fragment the lock list in some pathological cases but its probably * not a real problem. @@ -1332,7 +1332,7 @@ lf_cancel_lock(struct lockf *state, struct lockf_entry *lock) * may allow some other pending lock to become * active. Consider this case: * - * Owner Action Result Dependancies + * Owner Action Result Dependencies * * A: lock [0..0] succeeds * B: lock [2..2] succeeds @@ -1840,7 +1840,7 @@ lf_split(struct lockf *state, struct lockf_entry *lock1, /* * This cannot cause a deadlock since any edges we would add * to splitlock already exist in lock1. We must be sure to add - * necessary dependancies to splitlock before we reduce lock1 + * necessary dependencies to splitlock before we reduce lock1 * otherwise we may accidentally grant a pending lock that * was blocked by the tail end of lock1. */ diff --git a/sys/kern/kern_loginclass.c b/sys/kern/kern_loginclass.c index b20f60b..04f6809 100644 --- a/sys/kern/kern_loginclass.c +++ b/sys/kern/kern_loginclass.c @@ -205,7 +205,7 @@ sys_setloginclass(struct thread *td, struct setloginclass_args *uap) PROC_LOCK(p); oldcred = crcopysafe(p, newcred); newcred->cr_loginclass = newlc; - p->p_ucred = newcred; + proc_set_cred(p, newcred); PROC_UNLOCK(p); #ifdef RACCT racct_proc_ucred_changed(p, oldcred, newcred); diff --git a/sys/kern/kern_mbuf.c b/sys/kern/kern_mbuf.c index 7f9f666..c610f54 100644 --- a/sys/kern/kern_mbuf.c +++ b/sys/kern/kern_mbuf.c @@ -92,7 +92,7 @@ __FBSDID("$FreeBSD$"); * * Whenever an object is allocated from the underlying global * memory pool it gets pre-initialized with the _zinit_ functions. - * When the Keg's are overfull objects get decomissioned with + * When the Keg's are overfull objects get decommissioned with * _zfini_ functions and free'd back to the global memory pool. * */ diff --git a/sys/kern/kern_mtxpool.c b/sys/kern/kern_mtxpool.c index 23b41bb..6bfe611 100644 --- a/sys/kern/kern_mtxpool.c +++ b/sys/kern/kern_mtxpool.c @@ -39,7 +39,7 @@ * * Disadvantages: * - should generally only be used as leaf mutexes. - * - pool/pool dependancy ordering cannot be depended on. + * - pool/pool dependency ordering cannot be depended on. * - possible L1 cache mastersip contention between cpus. */ diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index ac375a4..0f60553 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -92,18 +92,15 @@ __FBSDID("$FreeBSD$"); #endif SDT_PROVIDER_DEFINE(proc); -SDT_PROBE_DEFINE4(proc, kernel, ctor, entry, "struct proc *", "int", - "void *", "int"); -SDT_PROBE_DEFINE4(proc, kernel, ctor, return, "struct proc *", "int", - "void *", "int"); -SDT_PROBE_DEFINE4(proc, kernel, dtor, entry, "struct proc *", "int", - "void *", "struct thread *"); -SDT_PROBE_DEFINE3(proc, kernel, dtor, return, "struct proc *", "int", - "void *"); -SDT_PROBE_DEFINE3(proc, kernel, init, entry, "struct proc *", "int", +SDT_PROBE_DEFINE4(proc, , ctor, entry, "struct proc *", "int", "void *", "int"); -SDT_PROBE_DEFINE3(proc, kernel, init, return, "struct proc *", "int", +SDT_PROBE_DEFINE4(proc, , ctor, return, "struct proc *", "int", "void *", "int"); +SDT_PROBE_DEFINE4(proc, , dtor, entry, "struct proc *", "int", "void *", + "struct thread *"); +SDT_PROBE_DEFINE3(proc, , dtor, return, "struct proc *", "int", "void *"); +SDT_PROBE_DEFINE3(proc, , init, entry, "struct proc *", "int", "int"); +SDT_PROBE_DEFINE3(proc, , init, return, "struct proc *", "int", "int"); MALLOC_DEFINE(M_PGRP, "pgrp", "process group header"); MALLOC_DEFINE(M_SESSION, "session", "session header"); @@ -196,9 +193,9 @@ proc_ctor(void *mem, int size, void *arg, int flags) struct proc *p; p = (struct proc *)mem; - SDT_PROBE4(proc, kernel, ctor , entry, p, size, arg, flags); + SDT_PROBE4(proc, , ctor , entry, p, size, arg, flags); EVENTHANDLER_INVOKE(process_ctor, p); - SDT_PROBE4(proc, kernel, ctor , return, p, size, arg, flags); + SDT_PROBE4(proc, , ctor , return, p, size, arg, flags); return (0); } @@ -214,7 +211,7 @@ proc_dtor(void *mem, int size, void *arg) /* INVARIANTS checks go here */ p = (struct proc *)mem; td = FIRST_THREAD_IN_PROC(p); - SDT_PROBE4(proc, kernel, dtor, entry, p, size, arg, td); + SDT_PROBE4(proc, , dtor, entry, p, size, arg, td); if (td != NULL) { #ifdef INVARIANTS KASSERT((p->p_numthreads == 1), @@ -227,7 +224,7 @@ proc_dtor(void *mem, int size, void *arg) EVENTHANDLER_INVOKE(process_dtor, p); if (p->p_ksi != NULL) KASSERT(! KSI_ONQ(p->p_ksi), ("SIGCHLD queue")); - SDT_PROBE3(proc, kernel, dtor, return, p, size, arg); + SDT_PROBE3(proc, , dtor, return, p, size, arg); } /* @@ -239,7 +236,7 @@ proc_init(void *mem, int size, int flags) struct proc *p; p = (struct proc *)mem; - SDT_PROBE3(proc, kernel, init, entry, p, size, flags); + SDT_PROBE3(proc, , init, entry, p, size, flags); p->p_sched = (struct p_sched *)&p[1]; bzero(&p->p_mtx, sizeof(struct mtx)); mtx_init(&p->p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK); @@ -250,7 +247,7 @@ proc_init(void *mem, int size, int flags) EVENTHANDLER_INVOKE(process_init, p); p->p_stats = pstats_alloc(); p->p_pgrp = NULL; - SDT_PROBE3(proc, kernel, init, return, p, size, flags); + SDT_PROBE3(proc, , init, return, p, size, flags); return (0); } diff --git a/sys/kern/kern_prot.c b/sys/kern/kern_prot.c index 03b9001..8235a1a 100644 --- a/sys/kern/kern_prot.c +++ b/sys/kern/kern_prot.c @@ -147,7 +147,7 @@ sys_getpgrp(struct thread *td, struct getpgrp_args *uap) return (0); } -/* Get an arbitary pid's process group id */ +/* Get an arbitrary pid's process group id */ #ifndef _SYS_SYSPROTO_H_ struct getpgid_args { pid_t pid; @@ -178,7 +178,7 @@ sys_getpgid(struct thread *td, struct getpgid_args *uap) } /* - * Get an arbitary pid's session id. + * Get an arbitrary pid's session id. */ #ifndef _SYS_SYSPROTO_H_ struct getsid_args { @@ -582,7 +582,7 @@ sys_setuid(struct thread *td, struct setuid_args *uap) change_euid(newcred, uip); setsugid(p); } - p->p_ucred = newcred; + proc_set_cred(p, newcred); PROC_UNLOCK(p); #ifdef RACCT racct_proc_ucred_changed(p, oldcred, newcred); @@ -641,7 +641,7 @@ sys_seteuid(struct thread *td, struct seteuid_args *uap) change_euid(newcred, euip); setsugid(p); } - p->p_ucred = newcred; + proc_set_cred(p, newcred); PROC_UNLOCK(p); uifree(euip); crfree(oldcred); @@ -741,7 +741,7 @@ sys_setgid(struct thread *td, struct setgid_args *uap) change_egid(newcred, gid); setsugid(p); } - p->p_ucred = newcred; + proc_set_cred(p, newcred); PROC_UNLOCK(p); crfree(oldcred); return (0); @@ -787,7 +787,7 @@ sys_setegid(struct thread *td, struct setegid_args *uap) change_egid(newcred, egid); setsugid(p); } - p->p_ucred = newcred; + proc_set_cred(p, newcred); PROC_UNLOCK(p); crfree(oldcred); return (0); @@ -860,7 +860,7 @@ kern_setgroups(struct thread *td, u_int ngrp, gid_t *groups) crsetgroups_locked(newcred, ngrp, groups); } setsugid(p); - p->p_ucred = newcred; + proc_set_cred(p, newcred); PROC_UNLOCK(p); crfree(oldcred); return (0); @@ -923,7 +923,7 @@ sys_setreuid(register struct thread *td, struct setreuid_args *uap) change_svuid(newcred, newcred->cr_uid); setsugid(p); } - p->p_ucred = newcred; + proc_set_cred(p, newcred); PROC_UNLOCK(p); #ifdef RACCT racct_proc_ucred_changed(p, oldcred, newcred); @@ -990,7 +990,7 @@ sys_setregid(register struct thread *td, struct setregid_args *uap) change_svgid(newcred, newcred->cr_groups[0]); setsugid(p); } - p->p_ucred = newcred; + proc_set_cred(p, newcred); PROC_UNLOCK(p); crfree(oldcred); return (0); @@ -1064,7 +1064,7 @@ sys_setresuid(register struct thread *td, struct setresuid_args *uap) change_svuid(newcred, suid); setsugid(p); } - p->p_ucred = newcred; + proc_set_cred(p, newcred); PROC_UNLOCK(p); #ifdef RACCT racct_proc_ucred_changed(p, oldcred, newcred); @@ -1143,7 +1143,7 @@ sys_setresgid(register struct thread *td, struct setresgid_args *uap) change_svgid(newcred, sgid); setsugid(p); } - p->p_ucred = newcred; + proc_set_cred(p, newcred); PROC_UNLOCK(p); crfree(oldcred); return (0); @@ -1956,6 +1956,31 @@ cred_update_thread(struct thread *td) crfree(cred); } +/* + * Change process credentials. + * Callers are responsible for providing the reference for current credentials + * and for freeing old ones. + * + * Process has to be locked except when it does not have credentials (as it + * should not be visible just yet) or when newcred is NULL (as this can be + * only used when the process is about to be freed, at which point it should + * not be visible anymore). + */ +struct ucred * +proc_set_cred(struct proc *p, struct ucred *newcred) +{ + struct ucred *oldcred; + + if (newcred == NULL) + MPASS(p->p_state == PRS_ZOMBIE); + else if (p->p_ucred != NULL) + PROC_LOCK_ASSERT(p, MA_OWNED); + + oldcred = p->p_ucred; + p->p_ucred = newcred; + return (oldcred); +} + struct ucred * crcopysafe(struct proc *p, struct ucred *cr) { diff --git a/sys/kern/kern_racct.c b/sys/kern/kern_racct.c index 9e67034..eaf93fc 100644 --- a/sys/kern/kern_racct.c +++ b/sys/kern/kern_racct.c @@ -104,30 +104,32 @@ static void racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount); SDT_PROVIDER_DEFINE(racct); -SDT_PROBE_DEFINE3(racct, kernel, rusage, add, "struct proc *", "int", - "uint64_t"); -SDT_PROBE_DEFINE3(racct, kernel, rusage, add__failure, +SDT_PROBE_DEFINE3(racct, , rusage, add, "struct proc *", "int", "uint64_t"); -SDT_PROBE_DEFINE3(racct, kernel, rusage, add__cred, "struct ucred *", - "int", "uint64_t"); -SDT_PROBE_DEFINE3(racct, kernel, rusage, add__force, "struct proc *", - "int", "uint64_t"); -SDT_PROBE_DEFINE3(racct, kernel, rusage, set, "struct proc *", "int", - "uint64_t"); -SDT_PROBE_DEFINE3(racct, kernel, rusage, set__failure, +SDT_PROBE_DEFINE3(racct, , rusage, add__failure, "struct proc *", "int", "uint64_t"); -SDT_PROBE_DEFINE3(racct, kernel, rusage, sub, "struct proc *", "int", - "uint64_t"); -SDT_PROBE_DEFINE3(racct, kernel, rusage, sub__cred, "struct ucred *", - "int", "uint64_t"); -SDT_PROBE_DEFINE1(racct, kernel, racct, create, "struct racct *"); -SDT_PROBE_DEFINE1(racct, kernel, racct, destroy, "struct racct *"); -SDT_PROBE_DEFINE2(racct, kernel, racct, join, "struct racct *", +SDT_PROBE_DEFINE3(racct, , rusage, add__cred, + "struct ucred *", "int", "uint64_t"); +SDT_PROBE_DEFINE3(racct, , rusage, add__force, + "struct proc *", "int", "uint64_t"); +SDT_PROBE_DEFINE3(racct, , rusage, set, + "struct proc *", "int", "uint64_t"); +SDT_PROBE_DEFINE3(racct, , rusage, set__failure, + "struct proc *", "int", "uint64_t"); +SDT_PROBE_DEFINE3(racct, , rusage, sub, + "struct proc *", "int", "uint64_t"); +SDT_PROBE_DEFINE3(racct, , rusage, sub__cred, + "struct ucred *", "int", "uint64_t"); +SDT_PROBE_DEFINE1(racct, , racct, create, "struct racct *"); -SDT_PROBE_DEFINE2(racct, kernel, racct, join__failure, - "struct racct *", "struct racct *"); -SDT_PROBE_DEFINE2(racct, kernel, racct, leave, "struct racct *", +SDT_PROBE_DEFINE1(racct, , racct, destroy, "struct racct *"); +SDT_PROBE_DEFINE2(racct, , racct, join, + "struct racct *", "struct racct *"); +SDT_PROBE_DEFINE2(racct, , racct, join__failure, + "struct racct *", "struct racct *"); +SDT_PROBE_DEFINE2(racct, , racct, leave, + "struct racct *", "struct racct *"); int racct_types[] = { [RACCT_CPU] = @@ -447,7 +449,7 @@ racct_create(struct racct **racctp) if (!racct_enable) return; - SDT_PROBE1(racct, kernel, racct, create, racctp); + SDT_PROBE1(racct, , racct, create, racctp); KASSERT(*racctp == NULL, ("racct already allocated")); @@ -462,7 +464,7 @@ racct_destroy_locked(struct racct **racctp) ASSERT_RACCT_ENABLED(); - SDT_PROBE1(racct, kernel, racct, destroy, racctp); + SDT_PROBE1(racct, , racct, destroy, racctp); mtx_assert(&racct_lock, MA_OWNED); KASSERT(racctp != NULL, ("NULL racctp")); @@ -540,7 +542,7 @@ racct_add_locked(struct proc *p, int resource, uint64_t amount) ASSERT_RACCT_ENABLED(); - SDT_PROBE3(racct, kernel, rusage, add, p, resource, amount); + SDT_PROBE3(racct, , rusage, add, p, resource, amount); /* * We need proc lock to dereference p->p_ucred. @@ -550,8 +552,7 @@ racct_add_locked(struct proc *p, int resource, uint64_t amount) #ifdef RCTL error = rctl_enforce(p, resource, amount); if (error && RACCT_IS_DENIABLE(resource)) { - SDT_PROBE3(racct, kernel, rusage, add__failure, p, resource, - amount); + SDT_PROBE3(racct, , rusage, add__failure, p, resource, amount); return (error); } #endif @@ -586,7 +587,7 @@ racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount) ASSERT_RACCT_ENABLED(); - SDT_PROBE3(racct, kernel, rusage, add__cred, cred, resource, amount); + SDT_PROBE3(racct, , rusage, add__cred, cred, resource, amount); racct_adjust_resource(cred->cr_ruidinfo->ui_racct, resource, amount); for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) @@ -624,7 +625,7 @@ racct_add_force(struct proc *p, int resource, uint64_t amount) if (!racct_enable) return; - SDT_PROBE3(racct, kernel, rusage, add__force, p, resource, amount); + SDT_PROBE3(racct, , rusage, add__force, p, resource, amount); /* * We need proc lock to dereference p->p_ucred. @@ -648,7 +649,7 @@ racct_set_locked(struct proc *p, int resource, uint64_t amount) ASSERT_RACCT_ENABLED(); - SDT_PROBE3(racct, kernel, rusage, set, p, resource, amount); + SDT_PROBE3(racct, , rusage, set, p, resource, amount); /* * We need proc lock to dereference p->p_ucred. @@ -680,8 +681,8 @@ racct_set_locked(struct proc *p, int resource, uint64_t amount) if (diff_proc > 0) { error = rctl_enforce(p, resource, diff_proc); if (error && RACCT_IS_DENIABLE(resource)) { - SDT_PROBE3(racct, kernel, rusage, set__failure, p, - resource, amount); + SDT_PROBE3(racct, , rusage, set__failure, p, resource, + amount); return (error); } } @@ -724,7 +725,7 @@ racct_set_force_locked(struct proc *p, int resource, uint64_t amount) ASSERT_RACCT_ENABLED(); - SDT_PROBE3(racct, kernel, rusage, set, p, resource, amount); + SDT_PROBE3(racct, , rusage, set, p, resource, amount); /* * We need proc lock to dereference p->p_ucred. @@ -835,7 +836,7 @@ racct_sub(struct proc *p, int resource, uint64_t amount) if (!racct_enable) return; - SDT_PROBE3(racct, kernel, rusage, sub, p, resource, amount); + SDT_PROBE3(racct, , rusage, sub, p, resource, amount); /* * We need proc lock to dereference p->p_ucred. @@ -862,7 +863,7 @@ racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount) ASSERT_RACCT_ENABLED(); - SDT_PROBE3(racct, kernel, rusage, sub__cred, cred, resource, amount); + SDT_PROBE3(racct, , rusage, sub__cred, cred, resource, amount); #ifdef notyet KASSERT(RACCT_CAN_DROP(resource), diff --git a/sys/kern/kern_rctl.c b/sys/kern/kern_rctl.c index c43b83d..2aa55e2 100644 --- a/sys/kern/kern_rctl.c +++ b/sys/kern/kern_rctl.c @@ -310,7 +310,7 @@ rctl_pcpu_available(const struct proc *p) { /* * Return slightly less than actual value of the available - * %cpu resource. This makes %cpu throttling more agressive + * %cpu resource. This makes %cpu throttling more aggressive * and lets us act sooner than the limits are already exceeded. */ if (limit != 0) { diff --git a/sys/kern/kern_rmlock.c b/sys/kern/kern_rmlock.c index 194c603..aa8d4b0 100644 --- a/sys/kern/kern_rmlock.c +++ b/sys/kern/kern_rmlock.c @@ -370,7 +370,7 @@ _rm_rlock_hard(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) } /* - * We allow readers to aquire a lock even if a writer is blocked if + * We allow readers to acquire a lock even if a writer is blocked if * the lock is recursive and the reader already holds the lock. */ if ((rm->lock_object.lo_flags & LO_RECURSABLE) != 0) { diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index 841fc49..29783f8 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -94,11 +94,11 @@ __FBSDID("$FreeBSD$"); #define ONSIG 32 /* NSIG for osig* syscalls. XXX. */ SDT_PROVIDER_DECLARE(proc); -SDT_PROBE_DEFINE3(proc, kernel, , signal__send, "struct thread *", - "struct proc *", "int"); -SDT_PROBE_DEFINE2(proc, kernel, , signal__clear, "int", - "ksiginfo_t *"); -SDT_PROBE_DEFINE3(proc, kernel, , signal__discard, +SDT_PROBE_DEFINE3(proc, , , signal__send, + "struct thread *", "struct proc *", "int"); +SDT_PROBE_DEFINE2(proc, , , signal__clear, + "int", "ksiginfo_t *"); +SDT_PROBE_DEFINE3(proc, , , signal__discard, "struct thread *", "struct proc *", "int"); static int coredump(struct thread *); @@ -1291,7 +1291,7 @@ kern_sigtimedwait(struct thread *td, sigset_t waitset, ksiginfo_t *ksi, reschedule_signals(p, new_block, 0); if (error == 0) { - SDT_PROBE2(proc, kernel, , signal__clear, sig, ksi); + SDT_PROBE2(proc, , , signal__clear, sig, ksi); if (ksi->ksi_code == SI_TIMER) itimer_accept(p, ksi->ksi_timerid, ksi); @@ -2108,7 +2108,7 @@ tdsendsignal(struct proc *p, struct thread *td, int sig, ksiginfo_t *ksi) } else sigqueue = &td->td_sigqueue; - SDT_PROBE3(proc, kernel, , signal__send, td, p, sig); + SDT_PROBE3(proc, , , signal__send, td, p, sig); /* * If the signal is being ignored, @@ -2119,7 +2119,7 @@ tdsendsignal(struct proc *p, struct thread *td, int sig, ksiginfo_t *ksi) */ mtx_lock(&ps->ps_mtx); if (SIGISMEMBER(ps->ps_sigignore, sig)) { - SDT_PROBE3(proc, kernel, , signal__discard, td, p, sig); + SDT_PROBE3(proc, , , signal__discard, td, p, sig); mtx_unlock(&ps->ps_mtx); if (ksi && (ksi->ksi_flags & KSI_INS)) diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c index ddc3369..af7ac77 100644 --- a/sys/kern/kern_sysctl.c +++ b/sys/kern/kern_sysctl.c @@ -172,7 +172,7 @@ sysctl_register_oid(struct sysctl_oid *oidp) * * NOTE: DO NOT change the starting value here, change it in * <sys/sysctl.h>, and make sure it is at least 256 to - * accomodate e.g. net.inet.raw as a static sysctl node. + * accommodate e.g. net.inet.raw as a static sysctl node. */ if (oid_number < 0) { static int newoid; @@ -295,7 +295,7 @@ sysctl_ctx_free(struct sysctl_ctx_list *clist) } /* * Restore deregistered entries, either from the end, - * or from the place where error occured. + * or from the place where error occurred. * e contains the entry that was not unregistered */ if (error) diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c index e9240ce..eabdd54 100644 --- a/sys/kern/kern_tc.c +++ b/sys/kern/kern_tc.c @@ -1855,7 +1855,7 @@ inittimecounter(void *dummy) * Set the initial timeout to * max(1, <approx. number of hardclock ticks in a millisecond>). * People should probably not use the sysctl to set the timeout - * to smaller than its inital value, since that value is the + * to smaller than its initial value, since that value is the * smallest reasonable one. If they want better timestamps they * should use the non-"get"* functions. */ diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c index c85813b..2f8382c 100644 --- a/sys/kern/kern_thread.c +++ b/sys/kern/kern_thread.c @@ -424,7 +424,7 @@ thread_exit(void) * architecture specific resources that * would not be on a new untouched process. */ - cpu_thread_exit(td); /* XXXSMP */ + cpu_thread_exit(td); /* * The last thread is left attached to the process @@ -613,11 +613,6 @@ weed_inhib(int mode, struct thread *td2, struct proc *p) wakeup_swapper |= sleepq_abort(td2, EINTR); break; case SINGLE_BOUNDARY: - if (TD_IS_SUSPENDED(td2) && (td2->td_flags & TDF_BOUNDARY) == 0) - wakeup_swapper |= thread_unsuspend_one(td2, p, false); - if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0) - wakeup_swapper |= sleepq_abort(td2, ERESTART); - break; case SINGLE_NO_EXIT: if (TD_IS_SUSPENDED(td2) && (td2->td_flags & TDF_BOUNDARY) == 0) wakeup_swapper |= thread_unsuspend_one(td2, p, false); @@ -856,8 +851,8 @@ thread_suspend_check(int return_instead) /* * The only suspension in action is a * single-threading. Single threader need not stop. - * XXX Should be safe to access unlocked - * as it can only be set to be true by us. + * It is safe to access p->p_singlethread unlocked + * because it can only be set to our address by us. */ if (p->p_singlethread == td) return (0); /* Exempt from stopping. */ diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c index a3402f8..9aa11ba 100644 --- a/sys/kern/kern_timeout.c +++ b/sys/kern/kern_timeout.c @@ -69,10 +69,8 @@ DPCPU_DECLARE(sbintime_t, hardclocktime); #endif SDT_PROVIDER_DEFINE(callout_execute); -SDT_PROBE_DEFINE1(callout_execute, kernel, , callout__start, - "struct callout *"); -SDT_PROBE_DEFINE1(callout_execute, kernel, , callout__end, - "struct callout *"); +SDT_PROBE_DEFINE1(callout_execute, , , callout__start, "struct callout *"); +SDT_PROBE_DEFINE1(callout_execute, , , callout__end, "struct callout *"); #ifdef CALLOUT_PROFILING static int avg_depth; @@ -681,9 +679,9 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc, sbt1 = sbinuptime(); #endif THREAD_NO_SLEEPING(); - SDT_PROBE1(callout_execute, kernel, , callout__start, c); + SDT_PROBE1(callout_execute, , , callout__start, c); c_func(c_arg); - SDT_PROBE1(callout_execute, kernel, , callout__end, c); + SDT_PROBE1(callout_execute, , , callout__end, c); THREAD_SLEEPING_OK(); #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) sbt2 = sbinuptime(); @@ -1397,7 +1395,7 @@ _callout_init_lock(c, lock, flags) * which set the timer can do the maintanence the timer was for as close * as possible to the originally intended time. Testing this code for a * week showed that resuming from a suspend resulted in 22 to 25 timers - * firing, which seemed independant on whether the suspend was 2 hours or + * firing, which seemed independent on whether the suspend was 2 hours or * 2 days. Your milage may vary. - Ken Key <key@cs.utk.edu> */ void diff --git a/sys/kern/link_elf_obj.c b/sys/kern/link_elf_obj.c index a578049..b7610b7 100644 --- a/sys/kern/link_elf_obj.c +++ b/sys/kern/link_elf_obj.c @@ -1099,7 +1099,7 @@ relocate_file(elf_file_t ef) } /* - * Only clean SHN_FBSD_CACHED for successfull return. If we + * Only clean SHN_FBSD_CACHED for successful return. If we * modified symbol table for the object but found an * unresolved symbol, there is no reason to roll back. */ diff --git a/sys/kern/linker_if.m b/sys/kern/linker_if.m index 3df592c..a583a03 100644 --- a/sys/kern/linker_if.m +++ b/sys/kern/linker_if.m @@ -89,7 +89,7 @@ METHOD int lookup_set { }; # -# Unload a file, releasing dependancies and freeing storage. +# Unload a file, releasing dependencies and freeing storage. # METHOD void unload { linker_file_t file; diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c index 6498ae7..676bd35 100644 --- a/sys/kern/sched_4bsd.c +++ b/sys/kern/sched_4bsd.c @@ -1393,7 +1393,7 @@ sched_add(struct thread *td, int flags) * or kicking off another CPU as it won't help and may hinder. * In the YIEDLING case, we are about to run whoever is being * put in the queue anyhow, and in the OURSELF case, we are - * puting ourself on the run queue which also only happens + * putting ourself on the run queue which also only happens * when we are about to yield. */ if ((flags & SRQ_YIELDING) == 0) { diff --git a/sys/kern/subr_blist.c b/sys/kern/subr_blist.c index 5c45b81..fb3526b 100644 --- a/sys/kern/subr_blist.c +++ b/sys/kern/subr_blist.c @@ -57,8 +57,8 @@ * The non-blocking features of the blist code are used in the swap code * (vm/swap_pager.c). * - * LAYOUT: The radix tree is layed out recursively using a - * linear array. Each meta node is immediately followed (layed out + * LAYOUT: The radix tree is laid out recursively using a + * linear array. Each meta node is immediately followed (laid out * sequentially in memory) by BLIST_META_RADIX lower level nodes. This * is a recursive structure but one that can be easily scanned through * a very simple 'skip' calculation. In order to support large radixes, diff --git a/sys/kern/subr_bus.c b/sys/kern/subr_bus.c index faf4aa0..aba4364 100644 --- a/sys/kern/subr_bus.c +++ b/sys/kern/subr_bus.c @@ -4564,7 +4564,7 @@ root_setup_intr(device_t dev, device_t child, struct resource *irq, int flags, } /* - * If we get here, assume that the device is permanant and really is + * If we get here, assume that the device is permanent and really is * present in the system. Removable bus drivers are expected to intercept * this call long before it gets here. We return -1 so that drivers that * really care can check vs -1 or some ERRNO returned higher in the food diff --git a/sys/kern/subr_devstat.c b/sys/kern/subr_devstat.c index 84696c2..84305db 100644 --- a/sys/kern/subr_devstat.c +++ b/sys/kern/subr_devstat.c @@ -391,7 +391,7 @@ sysctl_devstat(SYSCTL_HANDLER_ARGS) * XXX devstat_generation should really be "volatile" but that * XXX freaks out the sysctl macro below. The places where we * XXX change it and inspect it are bracketed in the mutex which - * XXX guarantees us proper write barriers. I don't belive the + * XXX guarantees us proper write barriers. I don't believe the * XXX compiler is allowed to optimize mygen away across calls * XXX to other functions, so the following is belived to be safe. */ diff --git a/sys/kern/subr_mbpool.c b/sys/kern/subr_mbpool.c index f0b44d2..acc73ef 100644 --- a/sys/kern/subr_mbpool.c +++ b/sys/kern/subr_mbpool.c @@ -289,7 +289,7 @@ mbp_ext_free(struct mbuf *m, void *buf, void *arg) } /* - * Free all buffers that are marked as beeing on the card + * Free all buffers that are marked as being on the card */ void mbp_card_free(struct mbpool *p) diff --git a/sys/kern/subr_mchain.c b/sys/kern/subr_mchain.c index e9d7d22..204a27c 100644 --- a/sys/kern/subr_mchain.c +++ b/sys/kern/subr_mchain.c @@ -101,7 +101,7 @@ mb_fixhdr(struct mbchain *mbp) /* * Check if object of size 'size' fit to the current position and * allocate new mbuf if not. Advance pointers and increase length of mbuf(s). - * Return pointer to the object placeholder or NULL if any error occured. + * Return pointer to the object placeholder or NULL if any error occurred. * Note: size should be <= MLEN */ caddr_t diff --git a/sys/kern/subr_msgbuf.c b/sys/kern/subr_msgbuf.c index ecdbe72..10096c9 100644 --- a/sys/kern/subr_msgbuf.c +++ b/sys/kern/subr_msgbuf.c @@ -50,7 +50,7 @@ static u_int msgbuf_cksum(struct msgbuf *mbp); /* * Timestamps in msgbuf are useful when trying to diagnose when core dumps - * or other actions occured. + * or other actions occurred. */ static int msgbuf_show_timestamp = 0; SYSCTL_INT(_kern, OID_AUTO, msgbuf_show_timestamp, CTLFLAG_RW | CTLFLAG_TUN, diff --git a/sys/kern/subr_prof.c b/sys/kern/subr_prof.c index 3d4e86a..dc2b91b 100644 --- a/sys/kern/subr_prof.c +++ b/sys/kern/subr_prof.c @@ -269,7 +269,7 @@ kmstartup(dummy) * without much risk of reducing the profiling times below what they * would be when profiling is not configured. Abbreviate: * ab = minimum time between MC1 and MC3 - * a = minumum time between MC1 and MC2 + * a = minimum time between MC1 and MC2 * b = minimum time between MC2 and MC3 * cd = minimum time between ME1 and ME3 * c = minimum time between ME1 and ME2 diff --git a/sys/kern/subr_scanf.c b/sys/kern/subr_scanf.c index 824e392..806ca4f 100644 --- a/sys/kern/subr_scanf.c +++ b/sys/kern/subr_scanf.c @@ -603,7 +603,7 @@ doswitch: * z', but treats `a-a' as `the letter a, the * character -, and the letter a'. * - * For compatibility, the `-' is not considerd + * For compatibility, the `-' is not considered * to define a range if the character following * it is either a close bracket (required by ANSI) * or is not numerically greater than the character diff --git a/sys/kern/subr_uio.c b/sys/kern/subr_uio.c index 3712f92..8141958 100644 --- a/sys/kern/subr_uio.c +++ b/sys/kern/subr_uio.c @@ -468,7 +468,7 @@ copyout_map(struct thread *td, vm_offset_t *addr, size_t sz) lim_max(td->td_proc, RLIMIT_DATA)); PROC_UNLOCK(td->td_proc); - /* round size up to page boundry */ + /* round size up to page boundary */ size = (vm_size_t)round_page(sz); error = vm_mmap(&vms->vm_map, addr, size, PROT_READ | PROT_WRITE, diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c index bd54f57..b767103 100644 --- a/sys/kern/subr_witness.c +++ b/sys/kern/subr_witness.c @@ -2898,7 +2898,7 @@ witness_lock_order_add(struct witness *parent, struct witness *child) return (1); } -/* Call this whenver the structure of the witness graph changes. */ +/* Call this whenever the structure of the witness graph changes. */ static void witness_increment_graph_generation(void) { diff --git a/sys/kern/sys_capability.c b/sys/kern/sys_capability.c index b309905..b3dc1b5 100644 --- a/sys/kern/sys_capability.c +++ b/sys/kern/sys_capability.c @@ -105,7 +105,7 @@ sys_cap_enter(struct thread *td, struct cap_enter_args *uap) oldcred = p->p_ucred; crcopy(newcred, oldcred); newcred->cr_flags |= CRED_FLAG_CAPMODE; - p->p_ucred = newcred; + proc_set_cred(p, newcred); PROC_UNLOCK(p); crfree(oldcred); return (0); diff --git a/sys/kern/sysv_sem.c b/sys/kern/sysv_sem.c index c39d93d..7d27b48 100644 --- a/sys/kern/sysv_sem.c +++ b/sys/kern/sysv_sem.c @@ -1153,7 +1153,7 @@ sys_semop(struct thread *td, struct semop_args *uap) if ((error = sem_prison_cansee(rpr, semakptr)) != 0) goto done2; /* - * Initial pass thru sops to see what permissions are needed. + * Initial pass through sops to see what permissions are needed. * Also perform any checks that don't need repeating on each * attempt to satisfy the request vector. */ diff --git a/sys/kern/tty.c b/sys/kern/tty.c index 0e352a6..e977150 100644 --- a/sys/kern/tty.c +++ b/sys/kern/tty.c @@ -397,7 +397,7 @@ tty_wait_background(struct tty *tp, struct thread *td, int sig) PROC_LOCK(p); /* * The process should only sleep, when: - * - This terminal is the controling terminal + * - This terminal is the controlling terminal * - Its process group is not the foreground process * group * - The parent process isn't waiting for the child to diff --git a/sys/kern/tty_pts.c b/sys/kern/tty_pts.c index 3d2d745..5de9f5e 100644 --- a/sys/kern/tty_pts.c +++ b/sys/kern/tty_pts.c @@ -123,7 +123,7 @@ ptsdev_read(struct file *fp, struct uio *uio, struct ucred *active_cred, /* * Implement packet mode. When packet mode is turned on, * the first byte contains a bitmask of events that - * occured (start, stop, flush, window size, etc). + * occurred (start, stop, flush, window size, etc). */ if (psc->pts_flags & PTS_PKT && psc->pts_pkt) { pkt = psc->pts_pkt; diff --git a/sys/kern/uipc_mbuf2.c b/sys/kern/uipc_mbuf2.c index e32e2a1..00472d3 100644 --- a/sys/kern/uipc_mbuf2.c +++ b/sys/kern/uipc_mbuf2.c @@ -141,7 +141,7 @@ m_pulldown(struct mbuf *m, int off, int len, int *offp) * Ideally, the requirement should only be (iii). * * If we're writable, we're sure we're writable, because the ref. count - * cannot increase from 1, as that would require posession of mbuf + * cannot increase from 1, as that would require possession of mbuf * n by someone else (which is impossible). However, if we're _not_ * writable, we may eventually become writable )if the ref. count drops * to 1), but we'll fail to notice it unless we re-evaluate diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 141e268..23acdf7 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -186,7 +186,7 @@ MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); /* * Limit on the number of connections in the listen queue waiting * for accept(2). - * NB: The orginal sysctl somaxconn is still available but hidden + * NB: The original sysctl somaxconn is still available but hidden * to prevent confusion about the actual purpose of this number. */ static int somaxconn = SOMAXCONN; @@ -1091,7 +1091,7 @@ sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, } /* * XXX all the SBS_CANTSENDMORE checks previously done could be out - * of date. We could have recieved a reset packet in an interrupt or + * of date. We could have received a reset packet in an interrupt or * maybe we slept while doing page faults in uiomove() etc. We could * probably recheck again inside the locking protection here, but * there are probably other places that this also happens. We must @@ -1271,7 +1271,7 @@ restart: } /* * XXX all the SBS_CANTSENDMORE checks previously - * done could be out of date. We could have recieved + * done could be out of date. We could have received * a reset packet in an interrupt or maybe we slept * while doing page faults in uiomove() etc. We * could probably recheck again inside the locking diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index 311271b..4c123fd 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -150,7 +150,7 @@ struct namecache_ts { */ /* - * Structures associated with name cacheing. + * Structures associated with name caching. */ #define NCHHASH(hash) \ (&nchashtbl[(hash) & nchash]) @@ -418,7 +418,6 @@ cache_zap(ncp) rw_assert(&cache_lock, RA_WLOCKED); CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp, ncp->nc_vp); -#ifdef KDTRACE_HOOKS if (ncp->nc_vp != NULL) { SDT_PROBE3(vfs, namecache, zap, done, ncp->nc_dvp, nc_get_name(ncp), ncp->nc_vp); @@ -426,7 +425,6 @@ cache_zap(ncp) SDT_PROBE2(vfs, namecache, zap_negative, done, ncp->nc_dvp, nc_get_name(ncp)); } -#endif vp = NULL; LIST_REMOVE(ncp, nc_hash); if (ncp->nc_flag & NCF_ISDOTDOT) { @@ -460,7 +458,7 @@ cache_zap(ncp) * cnp pointing to the name of the entry being sought. If the lookup * succeeds, the vnode is returned in *vpp, and a status of -1 is * returned. If the lookup determines that the name does not exist - * (negative cacheing), a status of ENOENT is returned. If the lookup + * (negative caching), a status of ENOENT is returned. If the lookup * fails, a status of zero is returned. If the directory vnode is * recycled out from under us due to a forced unmount, a status of * ENOENT is returned. diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c index 362ebb7b..fe3d5db 100644 --- a/sys/kern/vfs_cluster.c +++ b/sys/kern/vfs_cluster.c @@ -540,7 +540,7 @@ cluster_callback(bp) int error = 0; /* - * Must propogate errors to all the components. + * Must propagate errors to all the components. */ if (bp->b_ioflags & BIO_ERROR) error = bp->b_error; diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index 8236f32..123ef54 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -1127,7 +1127,7 @@ NDFREE(struct nameidata *ndp, const u_int flags) * Determine if there is a suitable alternate filename under the specified * prefix for the specified path. If the create flag is set, then the * alternate prefix will be used so long as the parent directory exists. - * This is used by the various compatiblity ABIs so that Linux binaries prefer + * This is used by the various compatibility ABIs so that Linux binaries prefer * files under /compat/linux for example. The chosen path (whether under * the prefix or under /) is returned in a kernel malloc'd buffer pointed * to by pathbuf. The caller is responsible for free'ing the buffer from diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c index 3ca995f..f5f522e 100644 --- a/sys/kern/vfs_mount.c +++ b/sys/kern/vfs_mount.c @@ -1222,7 +1222,6 @@ dounmount(struct mount *mp, int flags, struct thread *td) VI_LOCK(coveredvp); vholdl(coveredvp); vn_lock(coveredvp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY); - vdrop(coveredvp); /* * Check for mp being unmounted while waiting for the * covered vnode lock. @@ -1230,18 +1229,22 @@ dounmount(struct mount *mp, int flags, struct thread *td) if (coveredvp->v_mountedhere != mp || coveredvp->v_mountedhere->mnt_gen != mnt_gen_r) { VOP_UNLOCK(coveredvp, 0); + vdrop(coveredvp); vfs_rel(mp); return (EBUSY); } } + /* * Only privileged root, or (if MNT_USER is set) the user that did the * original mount is permitted to unmount this filesystem. */ error = vfs_suser(mp, td); if (error != 0) { - if (coveredvp) + if (coveredvp != NULL) { VOP_UNLOCK(coveredvp, 0); + vdrop(coveredvp); + } vfs_rel(mp); return (error); } @@ -1251,8 +1254,10 @@ dounmount(struct mount *mp, int flags, struct thread *td) if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0 || !TAILQ_EMPTY(&mp->mnt_uppers)) { MNT_IUNLOCK(mp); - if (coveredvp) + if (coveredvp != NULL) { VOP_UNLOCK(coveredvp, 0); + vdrop(coveredvp); + } vn_finished_write(mp); return (EBUSY); } @@ -1285,6 +1290,16 @@ dounmount(struct mount *mp, int flags, struct thread *td) if (mp->mnt_flag & MNT_EXPUBLIC) vfs_setpublicfs(NULL, NULL, NULL); + /* + * From now, we can claim that the use reference on the + * coveredvp is ours, and the ref can be released only by + * successfull unmount by us, or left for later unmount + * attempt. The previously acquired hold reference is no + * longer needed to protect the vnode from reuse. + */ + if (coveredvp != NULL) + vdrop(coveredvp); + vfs_msync(mp, MNT_WAIT); MNT_ILOCK(mp); async_flag = mp->mnt_flag & MNT_ASYNC; diff --git a/sys/kern/vfs_mountroot.c b/sys/kern/vfs_mountroot.c index 184976a..12cddff 100644 --- a/sys/kern/vfs_mountroot.c +++ b/sys/kern/vfs_mountroot.c @@ -79,7 +79,7 @@ __FBSDID("$FreeBSD$"); * * If the environment variable vfs.root.mountfrom is a space separated list, * each list element is tried in turn and the root filesystem will be mounted - * from the first one that suceeds. + * from the first one that succeeds. * * The environment variable vfs.root.mountfrom.options is a comma delimited * set of string mount options. These mount options must be parseable diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index a137217..e0e7205 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -501,7 +501,7 @@ vfs_busy(struct mount *mp, int flags) MNT_ILOCK(mp); MNT_REF(mp); /* - * If mount point is currenly being unmounted, sleep until the + * If mount point is currently being unmounted, sleep until the * mount point fate is decided. If thread doing the unmounting fails, * it will clear MNTK_UNMOUNT flag before waking us up, indicating * that this mount point has survived the unmount attempt and vfs_busy @@ -797,7 +797,7 @@ vattr_null(struct vattr *vap) * the buffer cache may have references on the vnode, a directory * vnode may still have references due to the namei cache representing * underlying files, or the vnode may be in active use. It is not - * desireable to reuse such vnodes. These conditions may cause the + * desirable to reuse such vnodes. These conditions may cause the * number of vnodes to reach some minimum value regardless of what * you set kern.maxvnodes to. Do not set kern.maxvnodes too low. */ @@ -3669,7 +3669,7 @@ destroy_vpollinfo(struct vpollinfo *vi) } /* - * Initalize per-vnode helper structure to hold poll-related state. + * Initialize per-vnode helper structure to hold poll-related state. */ void v_addpollinfo(struct vnode *vp) @@ -4080,7 +4080,7 @@ extattr_check_cred(struct vnode *vp, int attrnamespace, struct ucred *cred, #ifdef DEBUG_VFS_LOCKS /* - * This only exists to supress warnings from unlocked specfs accesses. It is + * This only exists to suppress warnings from unlocked specfs accesses. It is * no longer ok to have an unlocked VFS. */ #define IGNORE_LOCK(vp) (panicstr != NULL || (vp) == NULL || \ diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src index fbffffe..e571a5f 100644 --- a/sys/kern/vnode_if.src +++ b/sys/kern/vnode_if.src @@ -35,7 +35,7 @@ # is a specification of the locking protocol used by each vop call. # The first column is the name of the variable, the remaining three # columns are in, out and error respectively. The "in" column defines -# the lock state on input, the "out" column defines the state on succesful +# the lock state on input, the "out" column defines the state on successful # return, and the "error" column defines the locking state on error exit. # # The locking value can take the following values: diff --git a/sys/modules/Makefile b/sys/modules/Makefile index 9d075d1..c26e560 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -110,7 +110,7 @@ SUBDIR= \ fdc \ fdescfs \ ${_fe} \ - ${_filemon} \ + filemon \ firewire \ firmware \ fuse \ @@ -388,7 +388,6 @@ SUBDIR= \ zlib \ .if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64" -_filemon= filemon _imgact_binmisc= imgact_binmisc _vmware= vmware .endif diff --git a/sys/modules/hyperv/vmbus/Makefile b/sys/modules/hyperv/vmbus/Makefile index 8187146..6dfec94 100644 --- a/sys/modules/hyperv/vmbus/Makefile +++ b/sys/modules/hyperv/vmbus/Makefile @@ -7,20 +7,21 @@ KMOD= hv_vmbus SRCS= hv_channel.c \ hv_channel_mgmt.c \ hv_connection.c \ - hv_et.c \ - hv_hv.c \ hv_ring_buffer.c \ - hv_vmbus_drv_freebsd.c \ - hv_vmbus_priv.h + hyperv.c \ + hyperv_busdma.c \ + hyperv_machdep.c \ + vmbus.c \ + vmbus_et.c SRCS+= acpi_if.h bus_if.h device_if.h opt_acpi.h # XXX: for assym.s SRCS+= opt_kstack_pages.h opt_nfs.h opt_apic.h opt_hwpmc_hooks.h opt_compat.h SRCS+= assym.s \ - hv_vector.S + vmbus_vector.S -hv_vector.o: +vmbus_vector.o: ${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \ ${.IMPSRC} -o ${.TARGET} diff --git a/sys/modules/netgraph/mppc/Makefile b/sys/modules/netgraph/mppc/Makefile index 40b3717..1ab69e6 100644 --- a/sys/modules/netgraph/mppc/Makefile +++ b/sys/modules/netgraph/mppc/Makefile @@ -4,11 +4,10 @@ KMOD= ng_mppc SRCS= ng_mppc.c opt_netgraph.h -NETGRAPH_MPPC_COMPRESSION?= 0 +NETGRAPH_MPPC_COMPRESSION?= 1 NETGRAPH_MPPC_ENCRYPTION?= 1 .if ${NETGRAPH_MPPC_COMPRESSION} > 0 -# XXX These files don't exist yet, but hopefully someday they will... .PATH: ${.CURDIR}/../../../net SRCS+= mppcc.c mppcd.c .endif diff --git a/sys/net/mppc.h b/sys/net/mppc.h new file mode 100644 index 0000000..29b5113 --- /dev/null +++ b/sys/net/mppc.h @@ -0,0 +1,62 @@ +/*- + * Copyright (c) 2007 Alexander Motin <mav@freebsd.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * MPPC decompression library. + * Version 1.0 + * + * Note that Hi/Fn (later acquired by Exar Corporation) held US patents + * on some implementation-critical aspects of MPPC compression. + * These patents lapsed due to non-payment of fees in 2007 and by 2015 + * expired altogether. + */ + +#ifndef _NET_MPPC_H_ +#define _NET_MPPC_H_ + +#define MPPC_MANDATORY_COMPRESS_FLAGS 0 +#define MPPC_MANDATORY_DECOMPRESS_FLAGS 0 + +#define MPPC_SAVE_HISTORY 1 + +#define MPPC_OK 5 +#define MPPC_EXPANDED 8 +#define MPPC_RESTART_HISTORY 16 +#define MPPC_DEST_EXHAUSTED 32 + +extern size_t MPPC_SizeOfCompressionHistory(void); +extern size_t MPPC_SizeOfDecompressionHistory(void); + +extern void MPPC_InitCompressionHistory(char *history); +extern void MPPC_InitDecompressionHistory(char *history); + +extern int MPPC_Compress(u_char **src, u_char **dst, u_long *srcCnt, u_long *dstCnt, char *history, int flags, int undef); +extern int MPPC_Decompress(u_char **src, u_char **dst, u_long *srcCnt, u_long *dstCnt, char *history, int flags); + +#endif diff --git a/sys/net/mppcc.c b/sys/net/mppcc.c new file mode 100644 index 0000000..01ce3ff --- /dev/null +++ b/sys/net/mppcc.c @@ -0,0 +1,299 @@ +/*- + * Copyright (c) 2002-2004 Jan Dubiec <jdx@slackware.pl> + * Copyright (c) 2007 Alexander Motin <mav@freebsd.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * MPPC decompression library. + * Version 1.0 + * + * Note that Hi/Fn (later acquired by Exar Corporation) held US patents + * on some implementation-critical aspects of MPPC compression. + * These patents lapsed due to non-payment of fees in 2007 and by 2015 + * expired altogether. + */ + +#include <sys/param.h> +#include <sys/systm.h> + +#include <net/mppc.h> + +#define MPPE_HIST_LEN 8192 + +#define HASH(x) (((40543*(((((x)[0]<<4)^(x)[1])<<4)^(x)[2]))>>4) & 0x1fff) + +struct MPPC_comp_state { + uint8_t hist[2*MPPE_HIST_LEN]; + uint16_t histptr; + uint16_t hash[MPPE_HIST_LEN]; +}; + +/* Inserts 1 to 8 bits into the output buffer. */ +static void __inline +putbits8(uint8_t *buf, uint32_t val, const uint32_t n, uint32_t *i, uint32_t *l) +{ + buf += *i; + if (*l >= n) { + *l = (*l) - n; + val <<= *l; + *buf = *buf | (val & 0xff); + if (*l == 0) { + *l = 8; + (*i)++; + *(++buf) = 0; + } + } else { + (*i)++; + *l = 8 - n + (*l); + val <<= *l; + *buf = *buf | ((val >> 8) & 0xff); + *(++buf) = val & 0xff; + } +} + +/* Inserts 9 to 16 bits into the output buffer. */ +static void __inline +putbits16(uint8_t *buf, uint32_t val, const uint32_t n, uint32_t *i, uint32_t *l) +{ + buf += *i; + if (*l >= n - 8) { + (*i)++; + *l = 8 - n + (*l); + val <<= *l; + *buf = *buf | ((val >> 8) & 0xff); + *(++buf) = val & 0xff; + if (*l == 0) { + *l = 8; + (*i)++; + *(++buf) = 0; + } + } else { + (*i)++; (*i)++; + *l = 16 - n + (*l); + val <<= *l; + *buf = *buf | ((val >> 16) & 0xff); + *(++buf) = (val >> 8) & 0xff; + *(++buf) = val & 0xff; + } +} + +/* Inserts 17 to 24 bits into the output buffer. */ +static void __inline +putbits24(uint8_t *buf, uint32_t val, const uint32_t n, uint32_t *i, uint32_t *l) +{ + buf += *i; + if (*l >= n - 16) { + (*i)++; (*i)++; + *l = 16 - n + (*l); + val <<= *l; + *buf = *buf | ((val >> 16) & 0xff); + *(++buf) = (val >> 8) & 0xff; + *(++buf) = val & 0xff; + if (*l == 0) { + *l = 8; + (*i)++; + *(++buf) = 0; + } + } else { + (*i)++; (*i)++; (*i)++; + *l = 24 - n + (*l); + val <<= *l; + *buf = *buf | ((val >> 24) & 0xff); + *(++buf) = (val >> 16) & 0xff; + *(++buf) = (val >> 8) & 0xff; + *(++buf) = val & 0xff; + } +} + +size_t MPPC_SizeOfCompressionHistory(void) +{ + return (sizeof(struct MPPC_comp_state)); +} + +void MPPC_InitCompressionHistory(char *history) +{ + struct MPPC_comp_state *state = (struct MPPC_comp_state*)history; + + bzero(history, sizeof(struct MPPC_comp_state)); + state->histptr = MPPE_HIST_LEN; +} + +int MPPC_Compress(u_char **src, u_char **dst, u_long *srcCnt, u_long *dstCnt, char *history, int flags, int undef) +{ + struct MPPC_comp_state *state = (struct MPPC_comp_state*)history; + uint32_t olen, off, len, idx, i, l; + uint8_t *hist, *sbuf, *p, *q, *r, *s; + int rtn = MPPC_OK; + + /* + * At this point, to avoid possible buffer overflow caused by packet + * expansion during/after compression, we should make sure we have + * space for the worst case. + + * Maximum MPPC packet expansion is 12.5%. This is the worst case when + * all octets in the input buffer are >= 0x80 and we cannot find any + * repeated tokens. + */ + if (*dstCnt < (*srcCnt * 9 / 8 + 2)) { + rtn &= ~MPPC_OK; + return (rtn); + } + + /* We can't compress more then MPPE_HIST_LEN bytes in a call. */ + if (*srcCnt > MPPE_HIST_LEN) { + rtn &= ~MPPC_OK; + return (rtn); + } + + hist = state->hist + MPPE_HIST_LEN; + /* check if there is enough room at the end of the history */ + if (state->histptr + *srcCnt >= 2*MPPE_HIST_LEN) { + rtn |= MPPC_RESTART_HISTORY; + state->histptr = MPPE_HIST_LEN; + memcpy(state->hist, hist, MPPE_HIST_LEN); + } + /* Add packet to the history. */ + sbuf = state->hist + state->histptr; + memcpy(sbuf, *src, *srcCnt); + state->histptr += *srcCnt; + + /* compress data */ + r = sbuf + *srcCnt; + **dst = olen = i = 0; + l = 8; + while (i < *srcCnt - 2) { + s = q = sbuf + i; + + /* Prognose matching position using hash function. */ + idx = HASH(s); + p = hist + state->hash[idx]; + state->hash[idx] = (uint16_t) (s - hist); + if (p > s) /* It was before MPPC_RESTART_HISTORY. */ + p -= MPPE_HIST_LEN; /* Try previous history buffer. */ + off = s - p; + + /* Check our prognosis. */ + if (off > MPPE_HIST_LEN - 1 || off < 1 || *p++ != *s++ || + *p++ != *s++ || *p++ != *s++) { + /* No match found; encode literal byte. */ + if ((*src)[i] < 0x80) { /* literal byte < 0x80 */ + putbits8(*dst, (uint32_t) (*src)[i], 8, &olen, &l); + } else { /* literal byte >= 0x80 */ + putbits16(*dst, (uint32_t) (0x100|((*src)[i]&0x7f)), 9, + &olen, &l); + } + ++i; + continue; + } + + /* Find length of the matching fragment */ +#if defined(__amd64__) || defined(__i386__) + /* Optimization for CPUs without strict data aligning requirements */ + while ((*((uint32_t*)p) == *((uint32_t*)s)) && (s < (r - 3))) { + p+=4; + s+=4; + } +#endif + while((*p++ == *s++) && (s <= r)); + len = s - q - 1; + i += len; + + /* At least 3 character match found; code data. */ + /* Encode offset. */ + if (off < 64) { /* 10-bit offset; 0 <= offset < 64 */ + putbits16(*dst, 0x3c0|off, 10, &olen, &l); + } else if (off < 320) { /* 12-bit offset; 64 <= offset < 320 */ + putbits16(*dst, 0xe00|(off-64), 12, &olen, &l); + } else if (off < 8192) { /* 16-bit offset; 320 <= offset < 8192 */ + putbits16(*dst, 0xc000|(off-320), 16, &olen, &l); + } else { /* NOTREACHED */ + rtn &= ~MPPC_OK; + return rtn; + } + + /* Encode length of match. */ + if (len < 4) { /* length = 3 */ + putbits8(*dst, 0, 1, &olen, &l); + } else if (len < 8) { /* 4 <= length < 8 */ + putbits8(*dst, 0x08|(len&0x03), 4, &olen, &l); + } else if (len < 16) { /* 8 <= length < 16 */ + putbits8(*dst, 0x30|(len&0x07), 6, &olen, &l); + } else if (len < 32) { /* 16 <= length < 32 */ + putbits8(*dst, 0xe0|(len&0x0f), 8, &olen, &l); + } else if (len < 64) { /* 32 <= length < 64 */ + putbits16(*dst, 0x3c0|(len&0x1f), 10, &olen, &l); + } else if (len < 128) { /* 64 <= length < 128 */ + putbits16(*dst, 0xf80|(len&0x3f), 12, &olen, &l); + } else if (len < 256) { /* 128 <= length < 256 */ + putbits16(*dst, 0x3f00|(len&0x7f), 14, &olen, &l); + } else if (len < 512) { /* 256 <= length < 512 */ + putbits16(*dst, 0xfe00|(len&0xff), 16, &olen, &l); + } else if (len < 1024) { /* 512 <= length < 1024 */ + putbits24(*dst, 0x3fc00|(len&0x1ff), 18, &olen, &l); + } else if (len < 2048) { /* 1024 <= length < 2048 */ + putbits24(*dst, 0xff800|(len&0x3ff), 20, &olen, &l); + } else if (len < 4096) { /* 2048 <= length < 4096 */ + putbits24(*dst, 0x3ff000|(len&0x7ff), 22, &olen, &l); + } else if (len < 8192) { /* 4096 <= length < 8192 */ + putbits24(*dst, 0xffe000|(len&0xfff), 24, &olen, &l); + } else { /* NOTREACHED */ + rtn &= ~MPPC_OK; + return (rtn); + } + } + + /* Add remaining octets to the output. */ + while(*srcCnt - i > 0) { + if ((*src)[i] < 0x80) { /* literal byte < 0x80 */ + putbits8(*dst, (uint32_t) (*src)[i++], 8, &olen, &l); + } else { /* literal byte >= 0x80 */ + putbits16(*dst, (uint32_t) (0x100|((*src)[i++]&0x7f)), 9, &olen, + &l); + } + } + + /* Reset unused bits of the last output octet. */ + if ((l != 0) && (l != 8)) { + putbits8(*dst, 0, l, &olen, &l); + } + + /* If result is bigger then original, set flag and flush history. */ + if ((*srcCnt < olen) || ((flags & MPPC_SAVE_HISTORY) == 0)) { + if (*srcCnt < olen) + rtn |= MPPC_EXPANDED; + bzero(history, sizeof(struct MPPC_comp_state)); + state->histptr = MPPE_HIST_LEN; + } + + *src += *srcCnt; + *srcCnt = 0; + *dst += olen; + *dstCnt -= olen; + + return (rtn); +} diff --git a/sys/net/mppcd.c b/sys/net/mppcd.c new file mode 100644 index 0000000..c1730e5 --- /dev/null +++ b/sys/net/mppcd.c @@ -0,0 +1,284 @@ +/*- + * Copyright (c) 2002-2004 Jan Dubiec <jdx@slackware.pl> + * Copyright (c) 2007 Alexander Motin <mav@freebsd.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * MPPC decompression library. + * Version 1.0 + * + * Note that Hi/Fn (later acquired by Exar Corporation) held US patents + * on some implementation-critical aspects of MPPC compression. + * These patents lapsed due to non-payment of fees in 2007 and by 2015 + * expired altogether. + */ + +#include <sys/param.h> +#include <sys/systm.h> + +#include <net/mppc.h> + +#define MPPE_HIST_LEN 8192 + +struct MPPC_decomp_state { + uint8_t hist[2*MPPE_HIST_LEN]; + uint16_t histptr; +}; + +static uint32_t __inline +getbits(const uint8_t *buf, const uint32_t n, uint32_t *i, uint32_t *l) +{ + static const uint32_t m[] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; + uint32_t res, ol; + + ol = *l; + if (*l >= n) { + *l = (*l) - n; + res = (buf[*i] & m[ol]) >> (*l); + if (*l == 0) { + *l = 8; + (*i)++; + } + } else { + *l = 8 - n + (*l); + res = (buf[(*i)++] & m[ol]) << 8; + res = (res | buf[*i]) >> (*l); + } + + return (res); +} + +static uint32_t __inline +getbyte(const uint8_t *buf, const uint32_t i, const uint32_t l) +{ + if (l == 8) { + return (buf[i]); + } else { + return ((((buf[i] << 8) | buf[i+1]) >> l) & 0xff); + } +} + +static void __inline +lamecopy(uint8_t *dst, uint8_t *src, uint32_t len) +{ + while (len--) + *dst++ = *src++; +} + +size_t MPPC_SizeOfDecompressionHistory(void) +{ + return (sizeof(struct MPPC_decomp_state)); +} + +void MPPC_InitDecompressionHistory(char *history) +{ + struct MPPC_decomp_state *state = (struct MPPC_decomp_state*)history; + + bzero(history, sizeof(struct MPPC_decomp_state)); + state->histptr = MPPE_HIST_LEN; +} + +int MPPC_Decompress(u_char **src, u_char **dst, u_long *srcCnt, u_long *dstCnt, char *history, int flags) +{ + struct MPPC_decomp_state *state = (struct MPPC_decomp_state*)history; + uint32_t olen, off, len, bits, val, sig, i, l; + uint8_t *hist, *s; + u_char *isrc = *src; + int rtn = MPPC_OK; + + if ((flags & MPPC_RESTART_HISTORY) != 0) { + memcpy(state->hist, state->hist + MPPE_HIST_LEN, MPPE_HIST_LEN); + state->histptr = MPPE_HIST_LEN; + } + + hist = state->hist + state->histptr; + olen = len = i = 0; + l = 8; + bits = *srcCnt * 8; + while (bits >= 8) { + val = getbyte(isrc, i++, l); + if (val < 0x80) { /* literal byte < 0x80 */ + if (state->histptr < 2*MPPE_HIST_LEN) { + /* Copy uncompressed byte to the history. */ + (state->hist)[(state->histptr)++] = (uint8_t) val; + } else { + /* Buffer overflow; drop packet. */ + rtn &= ~MPPC_OK; + return rtn; + } + olen++; + bits -= 8; + continue; + } + + sig = val & 0xc0; + if (sig == 0x80) { /* literal byte >= 0x80 */ + if (state->histptr < 2*MPPE_HIST_LEN) { + /* Copy uncompressed byte to the history. */ + (state->hist)[(state->histptr)++] = + (uint8_t) (0x80|((val&0x3f)<<1)|getbits(isrc, 1 , &i ,&l)); + } else { + /* buffer overflow; drop packet */ + rtn &= ~MPPC_OK; + return (rtn); + } + olen++; + bits -= 9; + continue; + } + + /* Not a literal byte so it must be an (offset,length) pair */ + /* decode offset */ + sig = val & 0xf0; + if (sig == 0xf0) { /* 10-bit offset; 0 <= offset < 64 */ + off = (((val&0x0f)<<2)|getbits(isrc, 2 , &i ,&l)); + bits -= 10; + } else { + if (sig == 0xe0) { /* 12-bit offset; 64 <= offset < 320 */ + off = ((((val&0x0f)<<4)|getbits(isrc, 4 , &i ,&l))+64); + bits -= 12; + } else { + if ((sig&0xe0) == 0xc0) {/* 16-bit offset; 320 <= offset < 8192 */ + off = ((((val&0x1f)<<8)|getbyte(isrc, i++, l))+320); + bits -= 16; + if (off > MPPE_HIST_LEN - 1) { + rtn &= ~MPPC_OK; + return (rtn); + } + } else { /* NOTREACHED */ + rtn &= ~MPPC_OK; + return (rtn); + } + } + } + /* Decode length of match. */ + val = getbyte(isrc, i, l); + if ((val & 0x80) == 0x00) { /* len = 3 */ + len = 3; + bits--; + getbits(isrc, 1 , &i ,&l); + } else if ((val & 0xc0) == 0x80) { /* 4 <= len < 8 */ + len = 0x04 | ((val>>4) & 0x03); + bits -= 4; + getbits(isrc, 4 , &i ,&l); + } else if ((val & 0xe0) == 0xc0) { /* 8 <= len < 16 */ + len = 0x08 | ((val>>2) & 0x07); + bits -= 6; + getbits(isrc, 6 , &i ,&l); + } else if ((val & 0xf0) == 0xe0) { /* 16 <= len < 32 */ + len = 0x10 | (val & 0x0f); + bits -= 8; + i++; + } else { + bits -= 8; + val = (val << 8) | getbyte(isrc, ++i, l); + if ((val & 0xf800) == 0xf000) { /* 32 <= len < 64 */ + len = 0x0020 | ((val >> 6) & 0x001f); + bits -= 2; + getbits(isrc, 2 , &i ,&l); + } else if ((val & 0xfc00) == 0xf800) { /* 64 <= len < 128 */ + len = 0x0040 | ((val >> 4) & 0x003f); + bits -= 4; + getbits(isrc, 4 , &i ,&l); + } else if ((val & 0xfe00) == 0xfc00) { /* 128 <= len < 256 */ + len = 0x0080 | ((val >> 2) & 0x007f); + bits -= 6; + getbits(isrc, 6 , &i ,&l); + } else if ((val & 0xff00) == 0xfe00) { /* 256 <= len < 512 */ + len = 0x0100 | (val & 0x00ff); + bits -= 8; + i++; + } else { + bits -= 8; + val = (val << 8) | getbyte(isrc, ++i, l); + if ((val & 0xff8000) == 0xff0000) { /* 512 <= len < 1024 */ + len = 0x000200 | ((val >> 6) & 0x0001ff); + bits -= 2; + getbits(isrc, 2 , &i ,&l); + } else if ((val & 0xffc000) == 0xff8000) {/* 1024 <= len < 2048 */ + len = 0x000400 | ((val >> 4) & 0x0003ff); + bits -= 4; + getbits(isrc, 4 , &i ,&l); + } else if ((val & 0xffe000) == 0xffc000) {/* 2048 <= len < 4096 */ + len = 0x000800 | ((val >> 2) & 0x0007ff); + bits -= 6; + getbits(isrc, 6 , &i ,&l); + } else if ((val & 0xfff000) == 0xffe000) {/* 4096 <= len < 8192 */ + len = 0x001000 | (val & 0x000fff); + bits -= 8; + i++; + } else { /* NOTREACHED */ + rtn &= ~MPPC_OK; + return (rtn); + } + } + } + + s = state->hist + state->histptr; + state->histptr += len; + olen += len; + if (state->histptr < 2*MPPE_HIST_LEN) { + /* Copy uncompressed bytes to the history. */ + + /* + * In some cases len may be greater than off. It means that memory + * areas pointed by s and s-off overlap. To decode that strange case + * data should be copied exactly by address increasing to make + * some data repeated. + */ + lamecopy(s, s - off, len); + } else { + /* Buffer overflow; drop packet. */ + rtn &= ~MPPC_OK; + return (rtn); + } + } + + /* Do PFC decompression. */ + len = olen; + if ((hist[0] & 0x01) != 0) { + (*dst)[0] = 0; + (*dst)++; + len++; + } + + if (len <= *dstCnt) { + /* Copy uncompressed packet to the output buffer. */ + memcpy(*dst, hist, olen); + } else { + /* Buffer overflow; drop packet. */ + rtn |= MPPC_DEST_EXHAUSTED; + } + + *src += *srcCnt; + *srcCnt = 0; + *dst += len; + *dstCnt -= len; + + return (rtn); +} diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c index 1979ea0..cdabb3e 100644 --- a/sys/net/rtsock.c +++ b/sys/net/rtsock.c @@ -301,8 +301,6 @@ rts_attach(struct socket *so, int proto, struct thread *td) /* XXX */ rp = malloc(sizeof *rp, M_PCB, M_WAITOK | M_ZERO); - if (rp == NULL) - return ENOBUFS; so->so_pcb = (caddr_t)rp; so->so_fibnum = td->td_proc->p_fibnum; diff --git a/sys/netinet/sctp_cc_functions.c b/sys/netinet/sctp_cc_functions.c index d616d19..0bddcfd 100644 --- a/sys/netinet/sctp_cc_functions.c +++ b/sys/netinet/sctp_cc_functions.c @@ -95,7 +95,7 @@ sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net) } sctp_enforce_cwnd_limit(assoc, net); net->ssthresh = assoc->peers_rwnd; - SDT_PROBE(sctp, cwnd, net, init, + SDT_PROBE5(sctp, cwnd, net, init, stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, 0, net->cwnd); if (SCTP_BASE_SYSCTL(sctp_logging_level) & @@ -193,7 +193,7 @@ sctp_cwnd_update_after_fr(struct sctp_tcb *stcb, } net->cwnd = net->ssthresh; sctp_enforce_cwnd_limit(asoc, net); - SDT_PROBE(sctp, cwnd, net, fr, + SDT_PROBE5(sctp, cwnd, net, fr, stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, old_cwnd, net->cwnd); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { @@ -261,7 +261,7 @@ cc_bw_same(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, */ /* Probe point 5 */ probepoint |= ((5 << 16) | 1); - SDT_PROBE(sctp, cwnd, net, rttvar, + SDT_PROBE5(sctp, cwnd, net, rttvar, vtag, ((net->cc_mod.rtcc.lbw << 32) | nbw), ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), @@ -282,7 +282,7 @@ cc_bw_same(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, oth |= net->cc_mod.rtcc.step_cnt; oth <<= 16; oth |= net->cc_mod.rtcc.last_step_state; - SDT_PROBE(sctp, cwnd, net, rttstep, + SDT_PROBE5(sctp, cwnd, net, rttstep, vtag, ((net->cc_mod.rtcc.lbw << 32) | nbw), ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), @@ -306,7 +306,7 @@ cc_bw_same(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, */ /* Probe point 6 */ probepoint |= ((6 << 16) | 0); - SDT_PROBE(sctp, cwnd, net, rttvar, + SDT_PROBE5(sctp, cwnd, net, rttvar, vtag, ((net->cc_mod.rtcc.lbw << 32) | nbw), ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), @@ -318,7 +318,7 @@ cc_bw_same(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, oth |= net->cc_mod.rtcc.step_cnt; oth <<= 16; oth |= net->cc_mod.rtcc.last_step_state; - SDT_PROBE(sctp, cwnd, net, rttstep, + SDT_PROBE5(sctp, cwnd, net, rttstep, vtag, ((net->cc_mod.rtcc.lbw << 32) | nbw), ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), @@ -349,7 +349,7 @@ cc_bw_same(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, */ /* Probe point 7 */ probepoint |= ((7 << 16) | net->cc_mod.rtcc.ret_from_eq); - SDT_PROBE(sctp, cwnd, net, rttvar, + SDT_PROBE5(sctp, cwnd, net, rttvar, vtag, ((net->cc_mod.rtcc.lbw << 32) | nbw), ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), @@ -398,7 +398,7 @@ cc_bw_decrease(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint6 /* We caused it maybe.. back off? */ /* PROBE POINT 1 */ probepoint |= ((1 << 16) | 1); - SDT_PROBE(sctp, cwnd, net, rttvar, + SDT_PROBE5(sctp, cwnd, net, rttvar, vtag, ((net->cc_mod.rtcc.lbw << 32) | nbw), ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), @@ -416,7 +416,7 @@ cc_bw_decrease(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint6 } /* Probe point 2 */ probepoint |= ((2 << 16) | 0); - SDT_PROBE(sctp, cwnd, net, rttvar, + SDT_PROBE5(sctp, cwnd, net, rttvar, vtag, ((net->cc_mod.rtcc.lbw << 32) | nbw), ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), @@ -429,7 +429,7 @@ cc_bw_decrease(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint6 oth |= net->cc_mod.rtcc.step_cnt; oth <<= 16; oth |= net->cc_mod.rtcc.last_step_state; - SDT_PROBE(sctp, cwnd, net, rttstep, + SDT_PROBE5(sctp, cwnd, net, rttstep, vtag, ((net->cc_mod.rtcc.lbw << 32) | nbw), ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), @@ -453,7 +453,7 @@ cc_bw_decrease(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint6 /* bw & rtt decreased */ /* Probe point 3 */ probepoint |= ((3 << 16) | 0); - SDT_PROBE(sctp, cwnd, net, rttvar, + SDT_PROBE5(sctp, cwnd, net, rttvar, vtag, ((net->cc_mod.rtcc.lbw << 32) | nbw), ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), @@ -465,7 +465,7 @@ cc_bw_decrease(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint6 oth |= net->cc_mod.rtcc.step_cnt; oth <<= 16; oth |= net->cc_mod.rtcc.last_step_state; - SDT_PROBE(sctp, cwnd, net, rttstep, + SDT_PROBE5(sctp, cwnd, net, rttstep, vtag, ((net->cc_mod.rtcc.lbw << 32) | nbw), ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), @@ -485,7 +485,7 @@ cc_bw_decrease(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint6 /* The bw decreased but rtt stayed the same */ /* Probe point 4 */ probepoint |= ((4 << 16) | 0); - SDT_PROBE(sctp, cwnd, net, rttvar, + SDT_PROBE5(sctp, cwnd, net, rttvar, vtag, ((net->cc_mod.rtcc.lbw << 32) | nbw), ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), @@ -497,7 +497,7 @@ cc_bw_decrease(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint6 oth |= net->cc_mod.rtcc.step_cnt; oth <<= 16; oth |= net->cc_mod.rtcc.last_step_state; - SDT_PROBE(sctp, cwnd, net, rttstep, + SDT_PROBE5(sctp, cwnd, net, rttstep, vtag, ((net->cc_mod.rtcc.lbw << 32) | nbw), ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), @@ -535,7 +535,7 @@ cc_bw_increase(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint6 */ /* PROBE POINT 0 */ probepoint = (((uint64_t) net->cwnd) << 32); - SDT_PROBE(sctp, cwnd, net, rttvar, + SDT_PROBE5(sctp, cwnd, net, rttvar, vtag, ((net->cc_mod.rtcc.lbw << 32) | nbw), ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), @@ -547,7 +547,7 @@ cc_bw_increase(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint6 oth |= net->cc_mod.rtcc.step_cnt; oth <<= 16; oth |= net->cc_mod.rtcc.last_step_state; - SDT_PROBE(sctp, cwnd, net, rttstep, + SDT_PROBE5(sctp, cwnd, net, rttstep, vtag, ((net->cc_mod.rtcc.lbw << 32) | nbw), ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), @@ -647,7 +647,7 @@ cc_bw_limit(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw) /* Can't determine do not change */ probepoint |= ((0xd << 16) | inst_ind); } - SDT_PROBE(sctp, cwnd, net, rttvar, + SDT_PROBE5(sctp, cwnd, net, rttvar, vtag, ((nbw << 32) | inst_bw), ((net->cc_mod.rtcc.lbw_rtt << 32) | rtt), @@ -807,7 +807,7 @@ sctp_cwnd_update_after_sack_common(struct sctp_tcb *stcb, (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) | (stcb->rport); - SDT_PROBE(sctp, cwnd, net, rttvar, + SDT_PROBE5(sctp, cwnd, net, rttvar, vtag, nbw, ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), @@ -906,7 +906,7 @@ sctp_cwnd_update_after_sack_common(struct sctp_tcb *stcb, sctp_log_cwnd(stcb, net, incr, SCTP_CWND_LOG_FROM_SS); } - SDT_PROBE(sctp, cwnd, net, ack, + SDT_PROBE5(sctp, cwnd, net, ack, stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, @@ -969,7 +969,7 @@ sctp_cwnd_update_after_sack_common(struct sctp_tcb *stcb, } net->cwnd += incr; sctp_enforce_cwnd_limit(asoc, net); - SDT_PROBE(sctp, cwnd, net, ack, + SDT_PROBE5(sctp, cwnd, net, ack, stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, @@ -1001,7 +1001,7 @@ sctp_cwnd_update_exit_pf_common(struct sctp_tcb *stcb, struct sctp_nets *net) old_cwnd = net->cwnd; net->cwnd = net->mtu; - SDT_PROBE(sctp, cwnd, net, ack, + SDT_PROBE5(sctp, cwnd, net, ack, stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, old_cwnd, net->cwnd); SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n", @@ -1072,7 +1072,7 @@ sctp_cwnd_update_after_timeout(struct sctp_tcb *stcb, struct sctp_nets *net) } net->cwnd = net->mtu; net->partial_bytes_acked = 0; - SDT_PROBE(sctp, cwnd, net, to, + SDT_PROBE5(sctp, cwnd, net, to, stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, @@ -1132,7 +1132,7 @@ sctp_cwnd_update_after_ecn_echo_common(struct sctp_tcb *stcb, struct sctp_nets * net->RTO <<= 1; } net->cwnd = net->ssthresh; - SDT_PROBE(sctp, cwnd, net, ecn, + SDT_PROBE5(sctp, cwnd, net, ecn, stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, @@ -1251,7 +1251,7 @@ sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb, sctp_enforce_cwnd_limit(&stcb->asoc, net); if (net->cwnd - old_cwnd != 0) { /* log only changes */ - SDT_PROBE(sctp, cwnd, net, pd, + SDT_PROBE5(sctp, cwnd, net, pd, stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, @@ -1274,7 +1274,7 @@ sctp_cwnd_update_after_output(struct sctp_tcb *stcb, if (burst_limit) { net->cwnd = (net->flight_size + (burst_limit * net->mtu)); sctp_enforce_cwnd_limit(&stcb->asoc, net); - SDT_PROBE(sctp, cwnd, net, bl, + SDT_PROBE5(sctp, cwnd, net, bl, stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, @@ -1350,7 +1350,7 @@ sctp_cwnd_new_rtcc_transmission_begins(struct sctp_tcb *stcb, probepoint = (((uint64_t) net->cwnd) << 32); /* Probe point 8 */ probepoint |= ((8 << 16) | 0); - SDT_PROBE(sctp, cwnd, net, rttvar, + SDT_PROBE5(sctp, cwnd, net, rttvar, vtag, ((net->cc_mod.rtcc.lbw << 32) | 0), ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), @@ -1413,7 +1413,7 @@ sctp_set_rtcc_initial_cc_param(struct sctp_tcb *stcb, vtag = (net->rtt << 32) | (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) | (stcb->rport); - SDT_PROBE(sctp, cwnd, net, rttvar, + SDT_PROBE5(sctp, cwnd, net, rttvar, vtag, 0, 0, diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index 57332be..b5781d3 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -127,6 +127,16 @@ SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, sendbuf_max, CTLFLAG_RW, &VNET_NAME(tcp_autosndbuf_max), 0, "Max size of automatic send buffer"); +/* + * Make sure that either retransmit or persist timer is set for SYN, FIN and + * non-ACK. + */ +#define TCP_XMIT_TIMER_ASSERT(tp, len, th_flags) \ + KASSERT(((len) == 0 && ((th_flags) & (TH_SYN | TH_FIN)) == 0) ||\ + tcp_timer_active((tp), TT_REXMT) || \ + tcp_timer_active((tp), TT_PERSIST), \ + ("neither rexmt nor persist timer is set")) + static void inline hhook_run_tcp_est_out(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to, long len, int tso); @@ -1531,9 +1541,7 @@ timer: tp->t_softerror = error; return (error); case ENOBUFS: - if (!tcp_timer_active(tp, TT_REXMT) && - !tcp_timer_active(tp, TT_PERSIST)) - tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur); + TCP_XMIT_TIMER_ASSERT(tp, len, flags); tp->snd_cwnd = tp->t_maxseg; return (0); case EMSGSIZE: diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 9fe6eac..8afb05f 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -1285,8 +1285,6 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) return (error); inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); - if (inp_list == NULL) - return (ENOMEM); INP_INFO_RLOCK(&V_tcbinfo); for (inp = LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0; diff --git a/sys/nlm/nlm_advlock.c b/sys/nlm/nlm_advlock.c index 003a43d..72702de 100644 --- a/sys/nlm/nlm_advlock.c +++ b/sys/nlm/nlm_advlock.c @@ -210,7 +210,7 @@ nlm_advlock_internal(struct vnode *vp, void *id, int op, struct flock *fl, struct rpc_callextra ext; struct nlm_feedback_arg nf; AUTH *auth; - struct ucred *cred; + struct ucred *cred, *cred1; struct nlm_file_svid *ns; int svid; int error; @@ -240,15 +240,17 @@ nlm_advlock_internal(struct vnode *vp, void *id, int op, struct flock *fl, else retries = INT_MAX; - if (unlock_vp) - VOP_UNLOCK(vp, 0); - /* * We need to switch to mount-point creds so that we can send - * packets from a privileged port. + * packets from a privileged port. Reference mnt_cred and + * switch to them before unlocking the vnode, since mount + * point could be unmounted right after unlock. */ cred = td->td_ucred; td->td_ucred = vp->v_mount->mnt_cred; + crhold(td->td_ucred); + if (unlock_vp) + VOP_UNLOCK(vp, 0); host = nlm_find_host_by_name(servername, sa, vers); auth = authunix_create(cred); @@ -373,7 +375,9 @@ nlm_advlock_internal(struct vnode *vp, void *id, int op, struct flock *fl, if (ns) nlm_free_svid(ns); + cred1 = td->td_ucred; td->td_ucred = cred; + crfree(cred1); AUTH_DESTROY(auth); nlm_host_release(host); @@ -709,7 +713,37 @@ nlm_record_lock(struct vnode *vp, int op, struct flock *fl, newfl.l_pid = svid; newfl.l_sysid = NLM_SYSID_CLIENT | sysid; - error = lf_advlockasync(&a, &vp->v_lockf, size); + for (;;) { + error = lf_advlockasync(&a, &vp->v_lockf, size); + if (error == EDEADLK) { + /* + * Locks are associated with the processes and + * not with threads. Suppose we have two + * threads A1 A2 in one process, A1 locked + * file f1, A2 is locking file f2, and A1 is + * unlocking f1. Then remote server may + * already unlocked f1, while local still not + * yet scheduled A1 to make the call to local + * advlock manager. The process B owns lock on + * f2 and issued the lock on f1. Remote would + * grant B the request on f1, but local would + * return EDEADLK. + */ + pause("nlmdlk", 1); + /* XXXKIB allow suspend */ + } else if (error == EINTR) { + /* + * lf_purgelocks() might wake up the lock + * waiter and removed our lock graph edges. + * There is no sense in re-trying recording + * the lock to the local manager after + * reclaim. + */ + error = 0; + break; + } else + break; + } KASSERT(error == 0 || error == ENOENT, ("Failed to register NFS lock locally - error=%d", error)); } diff --git a/sys/ofed/include/linux/etherdevice.h b/sys/ofed/include/linux/etherdevice.h index a975bd0..ca8aef7 100644 --- a/sys/ofed/include/linux/etherdevice.h +++ b/sys/ofed/include/linux/etherdevice.h @@ -137,7 +137,8 @@ eth_broadcast_addr(u8 *pa) static inline void random_ether_addr(u8 * dst) { - read_random(dst, 6); + if (read_random(dst, 6) == 0) + arc4rand(dst, 6, 0); dst[0] &= 0xfe; dst[0] |= 0x02; diff --git a/sys/ofed/include/linux/random.h b/sys/ofed/include/linux/random.h index 0dac9fa..9c0a681 100644 --- a/sys/ofed/include/linux/random.h +++ b/sys/ofed/include/linux/random.h @@ -2,7 +2,7 @@ * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. - * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. + * Copyright (c) 2013-2016 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,11 +31,13 @@ #define _LINUX_RANDOM_H_ #include <sys/random.h> +#include <sys/libkern.h> static inline void get_random_bytes(void *buf, int nbytes) { - read_random(buf, nbytes); + if (read_random(buf, nbytes) == 0) + arc4rand(buf, nbytes, 0); } #endif /* _LINUX_RANDOM_H_ */ diff --git a/sys/rpc/rpc_generic.c b/sys/rpc/rpc_generic.c index 28aa849..80d658d 100644 --- a/sys/rpc/rpc_generic.c +++ b/sys/rpc/rpc_generic.c @@ -390,15 +390,11 @@ __rpc_uaddr2taddr_af(int af, const char *uaddr) } ret = (struct netbuf *)malloc(sizeof *ret, M_RPC, M_WAITOK); - if (ret == NULL) - goto out; switch (af) { case AF_INET: sin = (struct sockaddr_in *)malloc(sizeof *sin, M_RPC, M_WAITOK); - if (sin == NULL) - goto out; memset(sin, 0, sizeof *sin); sin->sin_family = AF_INET; sin->sin_port = htons(port); @@ -415,8 +411,6 @@ __rpc_uaddr2taddr_af(int af, const char *uaddr) case AF_INET6: sin6 = (struct sockaddr_in6 *)malloc(sizeof *sin6, M_RPC, M_WAITOK); - if (sin6 == NULL) - goto out; memset(sin6, 0, sizeof *sin6); sin6->sin6_family = AF_INET6; sin6->sin6_port = htons(port); @@ -433,8 +427,6 @@ __rpc_uaddr2taddr_af(int af, const char *uaddr) case AF_LOCAL: sun = (struct sockaddr_un *)malloc(sizeof *sun, M_RPC, M_WAITOK); - if (sun == NULL) - goto out; memset(sun, 0, sizeof *sun); sun->sun_family = AF_LOCAL; strncpy(sun->sun_path, addrstr, sizeof(sun->sun_path) - 1); diff --git a/sys/security/audit/audit_syscalls.c b/sys/security/audit/audit_syscalls.c index acf005e..90d811d 100644 --- a/sys/security/audit/audit_syscalls.c +++ b/sys/security/audit/audit_syscalls.c @@ -461,7 +461,7 @@ sys_auditon(struct thread *td, struct auditon_args *uap) udata.au_aupinfo.ap_mask.am_success; newcred->cr_audit.ai_mask.am_failure = udata.au_aupinfo.ap_mask.am_failure; - td->td_proc->p_ucred = newcred; + proc_set_cred(tp, newcred); PROC_UNLOCK(tp); crfree(oldcred); break; @@ -600,7 +600,7 @@ sys_setauid(struct thread *td, struct setauid_args *uap) if (error) goto fail; newcred->cr_audit.ai_auid = id; - td->td_proc->p_ucred = newcred; + proc_set_cred(td->td_proc, newcred); PROC_UNLOCK(td->td_proc); crfree(oldcred); return (0); @@ -671,7 +671,7 @@ sys_setaudit(struct thread *td, struct setaudit_args *uap) newcred->cr_audit.ai_termid.at_addr[0] = ai.ai_termid.machine; newcred->cr_audit.ai_termid.at_port = ai.ai_termid.port; newcred->cr_audit.ai_termid.at_type = AU_IPv4; - td->td_proc->p_ucred = newcred; + proc_set_cred(td->td_proc, newcred); PROC_UNLOCK(td->td_proc); crfree(oldcred); return (0); @@ -728,7 +728,7 @@ sys_setaudit_addr(struct thread *td, struct setaudit_addr_args *uap) if (error) goto fail; newcred->cr_audit = aia; - td->td_proc->p_ucred = newcred; + proc_set_cred(td->td_proc, newcred); PROC_UNLOCK(td->td_proc); crfree(oldcred); return (0); diff --git a/sys/security/mac/mac_framework.c b/sys/security/mac/mac_framework.c index 0c64414..8554ba2 100644 --- a/sys/security/mac/mac_framework.c +++ b/sys/security/mac/mac_framework.c @@ -94,11 +94,11 @@ __FBSDID("$FreeBSD$"); SDT_PROVIDER_DEFINE(mac); SDT_PROVIDER_DEFINE(mac_framework); -SDT_PROBE_DEFINE2(mac, kernel, policy, modevent, "int", +SDT_PROBE_DEFINE2(mac, , policy, modevent, "int", "struct mac_policy_conf *"); -SDT_PROBE_DEFINE1(mac, kernel, policy, register, +SDT_PROBE_DEFINE1(mac, , policy, register, "struct mac_policy_conf *"); -SDT_PROBE_DEFINE1(mac, kernel, policy, unregister, +SDT_PROBE_DEFINE1(mac, , policy, unregister, "struct mac_policy_conf *"); /* @@ -445,7 +445,7 @@ mac_policy_register(struct mac_policy_conf *mpc) (*(mpc->mpc_ops->mpo_init))(mpc); mac_policy_update(); - SDT_PROBE(mac, kernel, policy, register, mpc, 0, 0, 0, 0); + SDT_PROBE1(mac, , policy, register, mpc); printf("Security policy loaded: %s (%s)\n", mpc->mpc_fullname, mpc->mpc_name); @@ -492,7 +492,7 @@ mac_policy_unregister(struct mac_policy_conf *mpc) mac_policy_update(); mac_policy_xunlock(); - SDT_PROBE(mac, kernel, policy, unregister, mpc, 0, 0, 0, 0); + SDT_PROBE1(mac, , policy, unregister, mpc); printf("Security policy unload: %s (%s)\n", mpc->mpc_fullname, mpc->mpc_name); @@ -518,7 +518,7 @@ mac_policy_modevent(module_t mod, int type, void *data) } #endif - SDT_PROBE(mac, kernel, policy, modevent, type, mpc, 0, 0, 0); + SDT_PROBE2(mac, , policy, modevent, type, mpc); switch (type) { case MOD_LOAD: if (mpc->mpc_loadtime_flags & MPC_LOADTIME_FLAG_NOTLATE && diff --git a/sys/security/mac/mac_internal.h b/sys/security/mac/mac_internal.h index ceb1e87..2ecffc6 100644 --- a/sys/security/mac/mac_internal.h +++ b/sys/security/mac/mac_internal.h @@ -74,35 +74,35 @@ SDT_PROVIDER_DECLARE(mac); /* MAC Framework-level events. */ SDT_PROVIDER_DECLARE(mac_framework); /* Entry points to MAC. */ #define MAC_CHECK_PROBE_DEFINE4(name, arg0, arg1, arg2, arg3) \ - SDT_PROBE_DEFINE5(mac_framework, kernel, name, mac__check__err, \ + SDT_PROBE_DEFINE5(mac_framework, , name, mac__check__err, \ "int", arg0, arg1, arg2, arg3); \ - SDT_PROBE_DEFINE5(mac_framework, kernel, name, mac__check__ok, \ + SDT_PROBE_DEFINE5(mac_framework, , name, mac__check__ok, \ "int", arg0, arg1, arg2, arg3); #define MAC_CHECK_PROBE_DEFINE3(name, arg0, arg1, arg2) \ - SDT_PROBE_DEFINE4(mac_framework, kernel, name, mac__check__err, \ + SDT_PROBE_DEFINE4(mac_framework, , name, mac__check__err, \ "int", arg0, arg1, arg2); \ - SDT_PROBE_DEFINE4(mac_framework, kernel, name, mac__check__ok, \ + SDT_PROBE_DEFINE4(mac_framework, , name, mac__check__ok, \ "int", arg0, arg1, arg2); #define MAC_CHECK_PROBE_DEFINE2(name, arg0, arg1) \ - SDT_PROBE_DEFINE3(mac_framework, kernel, name, mac__check__err, \ + SDT_PROBE_DEFINE3(mac_framework, , name, mac__check__err, \ "int", arg0, arg1); \ - SDT_PROBE_DEFINE3(mac_framework, kernel, name, mac__check__ok, \ + SDT_PROBE_DEFINE3(mac_framework, , name, mac__check__ok, \ "int", arg0, arg1); #define MAC_CHECK_PROBE_DEFINE1(name, arg0) \ - SDT_PROBE_DEFINE2(mac_framework, kernel, name, mac__check__err, \ + SDT_PROBE_DEFINE2(mac_framework, , name, mac__check__err, \ "int", arg0); \ - SDT_PROBE_DEFINE2(mac_framework, kernel, name, mac__check__ok, \ + SDT_PROBE_DEFINE2(mac_framework, , name, mac__check__ok, \ "int", arg0); #define MAC_CHECK_PROBE4(name, error, arg0, arg1, arg2, arg3) do { \ if (error) { \ - SDT_PROBE(mac_framework, kernel, name, mac__check__err, \ + SDT_PROBE5(mac_framework, , name, mac__check__err, \ error, arg0, arg1, arg2, arg3); \ } else { \ - SDT_PROBE(mac_framework, kernel, name, mac__check__ok, \ + SDT_PROBE5(mac_framework, , name, mac__check__ok, \ 0, arg0, arg1, arg2, arg3); \ } \ } while (0) @@ -116,18 +116,18 @@ SDT_PROVIDER_DECLARE(mac_framework); /* Entry points to MAC. */ #endif #define MAC_GRANT_PROBE_DEFINE2(name, arg0, arg1) \ - SDT_PROBE_DEFINE3(mac_framework, kernel, name, mac__grant__err, \ + SDT_PROBE_DEFINE3(mac_framework, , name, mac__grant__err, \ "int", arg0, arg1); \ - SDT_PROBE_DEFINE3(mac_framework, kernel, name, mac__grant__ok, \ + SDT_PROBE_DEFINE3(mac_framework, , name, mac__grant__ok, \ "int", arg0, arg1); #define MAC_GRANT_PROBE2(name, error, arg0, arg1) do { \ if (error) { \ - SDT_PROBE(mac_framework, kernel, name, mac__grant__err, \ - error, arg0, arg1, 0, 0); \ + SDT_PROBE3(mac_framework, , name, mac__grant__err, \ + error, arg0, arg1); \ } else { \ - SDT_PROBE(mac_framework, kernel, name, mac__grant__ok, \ - error, arg0, arg1, 0, 0); \ + SDT_PROBE3(mac_framework, , name, mac__grant__ok, \ + error, arg0, arg1); \ } \ } while (0) diff --git a/sys/security/mac/mac_syscalls.c b/sys/security/mac/mac_syscalls.c index f084ea4..304d313 100644 --- a/sys/security/mac/mac_syscalls.c +++ b/sys/security/mac/mac_syscalls.c @@ -208,7 +208,7 @@ sys___mac_set_proc(struct thread *td, struct __mac_set_proc_args *uap) setsugid(p); crcopy(newcred, oldcred); mac_cred_relabel(newcred, intlabel); - p->p_ucred = newcred; + proc_set_cred(p, newcred); PROC_UNLOCK(p); crfree(oldcred); diff --git a/sys/security/mac_lomac/mac_lomac.c b/sys/security/mac_lomac/mac_lomac.c index 40c9c2f..67f3a36 100644 --- a/sys/security/mac_lomac/mac_lomac.c +++ b/sys/security/mac_lomac/mac_lomac.c @@ -2275,7 +2275,7 @@ lomac_thread_userret(struct thread *td) crcopy(newcred, oldcred); crhold(newcred); lomac_copy(&subj->mac_lomac, SLOT(newcred->cr_label)); - p->p_ucred = newcred; + proc_set_cred(p, newcred); crfree(oldcred); dodrop = 1; out: diff --git a/sys/sys/imgact.h b/sys/sys/imgact.h index 4a5ce9c..a893b7b 100644 --- a/sys/sys/imgact.h +++ b/sys/sys/imgact.h @@ -38,6 +38,8 @@ #define MAXSHELLCMDLEN PAGE_SIZE +struct ucred; + struct image_args { char *buf; /* pointer to string buffer */ char *begin_argv; /* beginning of argv in buf */ @@ -81,6 +83,8 @@ struct image_params { int pagesizeslen; vm_prot_t stack_prot; u_long stack_sz; + struct ucred *newcred; /* new credentials if changing */ + bool credential_setid; /* true if becoming setid */ }; #ifdef _KERNEL diff --git a/sys/sys/param.h b/sys/sys/param.h index 023747d..4b6c601 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -58,7 +58,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1003503 /* Master, propagated to newvers */ +#define __FreeBSD_version 1003505 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 836e123..ef60b89 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -158,6 +158,7 @@ struct pargs { * for write access. */ struct cpuset; +struct filemon; struct kaioinfo; struct kaudit_record; struct kdtrace_proc; @@ -606,6 +607,7 @@ struct proc { reaper which spawned our subtree. */ struct pgrp *p_pgrp; /* (c + e) Pointer to process group. */ + struct filemon *p_filemon; /* (c) filemon-specific data. */ }; #define p_session p_pgrp->pg_session diff --git a/sys/sys/ucred.h b/sys/sys/ucred.h index 45ed0d5..b3d6f52 100644 --- a/sys/sys/ucred.h +++ b/sys/sys/ucred.h @@ -106,6 +106,7 @@ struct ucred *crcopysafe(struct proc *p, struct ucred *cr); struct ucred *crdup(struct ucred *cr); void crextend(struct ucred *cr, int n); void cred_update_thread(struct thread *td); +struct ucred *proc_set_cred(struct proc *p, struct ucred *cr); void crfree(struct ucred *cr); struct ucred *crget(void); struct ucred *crhold(struct ucred *cr); diff --git a/sys/ufs/ufs/ufs_extattr.c b/sys/ufs/ufs/ufs_extattr.c index 2313d68..d01320a 100644 --- a/sys/ufs/ufs/ufs_extattr.c +++ b/sys/ufs/ufs/ufs_extattr.c @@ -599,8 +599,6 @@ ufs_extattr_enable(struct ufsmount *ump, int attrnamespace, attribute = malloc(sizeof(struct ufs_extattr_list_entry), M_UFS_EXTATTR, M_WAITOK); - if (attribute == NULL) - return (ENOMEM); if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)) { error = EOPNOTSUPP; diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c index 75974c6..3f8ba38 100644 --- a/sys/vm/vm_meter.c +++ b/sys/vm/vm_meter.c @@ -93,29 +93,31 @@ SYSCTL_PROC(_vm, VM_LOADAVG, loadavg, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_loadavg, "S,loadavg", "Machine loadaverage history"); +/* + * This function aims to determine if the object is mapped, + * specifically, if it is referenced by a vm_map_entry. Because + * objects occasionally acquire transient references that do not + * represent a mapping, the method used here is inexact. However, it + * has very low overhead and is good enough for the advisory + * vm.vmtotal sysctl. + */ +static bool +is_object_active(vm_object_t obj) +{ + + return (obj->ref_count > obj->shadow_count); +} + static int vmtotal(SYSCTL_HANDLER_ARGS) { - struct proc *p; struct vmtotal total; - vm_map_entry_t entry; vm_object_t object; - vm_map_t map; - int paging; + struct proc *p; struct thread *td; - struct vmspace *vm; bzero(&total, sizeof(total)); - /* - * Mark all objects as inactive. - */ - mtx_lock(&vm_object_list_mtx); - TAILQ_FOREACH(object, &vm_object_list, object_list) { - VM_OBJECT_WLOCK(object); - vm_object_clear_flag(object, OBJ_ACTIVE); - VM_OBJECT_WUNLOCK(object); - } - mtx_unlock(&vm_object_list_mtx); + /* * Calculate process statistics. */ @@ -136,11 +138,15 @@ vmtotal(SYSCTL_HANDLER_ARGS) case TDS_INHIBITED: if (TD_IS_SWAPPED(td)) total.t_sw++; - else if (TD_IS_SLEEPING(td) && - td->td_priority <= PZERO) - total.t_dw++; - else - total.t_sl++; + else if (TD_IS_SLEEPING(td)) { + if (td->td_priority <= PZERO) + total.t_dw++; + else + total.t_sl++; + if (td->td_wchan == + &cnt.v_free_count) + total.t_pw++; + } break; case TDS_CAN_RUN: @@ -158,29 +164,6 @@ vmtotal(SYSCTL_HANDLER_ARGS) } } PROC_UNLOCK(p); - /* - * Note active objects. - */ - paging = 0; - vm = vmspace_acquire_ref(p); - if (vm == NULL) - continue; - map = &vm->vm_map; - vm_map_lock_read(map); - for (entry = map->header.next; - entry != &map->header; entry = entry->next) { - if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) || - (object = entry->object.vm_object) == NULL) - continue; - VM_OBJECT_WLOCK(object); - vm_object_set_flag(object, OBJ_ACTIVE); - paging |= object->paging_in_progress; - VM_OBJECT_WUNLOCK(object); - } - vm_map_unlock_read(map); - vmspace_free(vm); - if (paging) - total.t_pw++; } sx_sunlock(&allproc_lock); /* @@ -206,9 +189,18 @@ vmtotal(SYSCTL_HANDLER_ARGS) */ continue; } + if (object->ref_count == 1 && + (object->flags & OBJ_NOSPLIT) != 0) { + /* + * Also skip otherwise unreferenced swap + * objects backing tmpfs vnodes, and POSIX or + * SysV shared memory. + */ + continue; + } total.t_vm += object->size; total.t_rm += object->resident_page_count; - if (object->flags & OBJ_ACTIVE) { + if (is_object_active(object)) { total.t_avm += object->size; total.t_arm += object->resident_page_count; } @@ -216,7 +208,7 @@ vmtotal(SYSCTL_HANDLER_ARGS) /* shared object */ total.t_vmshr += object->size; total.t_rmshr += object->resident_page_count; - if (object->flags & OBJ_ACTIVE) { + if (is_object_active(object)) { total.t_avmshr += object->size; total.t_armshr += object->resident_page_count; } diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index ac8feae..f45aa9e 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -181,7 +181,6 @@ struct vm_object { */ #define OBJ_FICTITIOUS 0x0001 /* (c) contains fictitious pages */ #define OBJ_UNMANAGED 0x0002 /* (c) contains unmanaged pages */ -#define OBJ_ACTIVE 0x0004 /* active objects */ #define OBJ_DEAD 0x0008 /* dead objects (during rundown) */ #define OBJ_NOSPLIT 0x0010 /* dont split this object */ #define OBJ_PIPWNT 0x0040 /* paging in progress wanted */ diff --git a/usr.bin/bsdcat/Makefile b/usr.bin/bsdcat/Makefile index edbb212..93c1b71 100644 --- a/usr.bin/bsdcat/Makefile +++ b/usr.bin/bsdcat/Makefile @@ -6,7 +6,7 @@ _LIBARCHIVEDIR= ${.CURDIR}/../../contrib/libarchive _LIBARCHIVECONFDIR= ${.CURDIR}/../../lib/libarchive PROG= bsdcat -BSDCAT_VERSION_STRING= 3.2.0 +BSDCAT_VERSION_STRING= 3.2.1 .PATH: ${_LIBARCHIVEDIR}/cat SRCS= bsdcat.c cmdline.c diff --git a/usr.bin/cpio/Makefile b/usr.bin/cpio/Makefile index ed259f2..0283c5f 100644 --- a/usr.bin/cpio/Makefile +++ b/usr.bin/cpio/Makefile @@ -6,7 +6,7 @@ LIBARCHIVEDIR= ${.CURDIR}/../../contrib/libarchive LIBARCHIVECONFDIR= ${.CURDIR}/../../lib/libarchive PROG= bsdcpio -BSDCPIO_VERSION_STRING= 3.2.0 +BSDCPIO_VERSION_STRING= 3.2.1 .PATH: ${LIBARCHIVEDIR}/cpio SRCS= cpio.c cmdline.c diff --git a/usr.bin/cpio/tests/Makefile b/usr.bin/cpio/tests/Makefile index 3159cf0..38e7f20 100644 --- a/usr.bin/cpio/tests/Makefile +++ b/usr.bin/cpio/tests/Makefile @@ -45,6 +45,7 @@ TESTS_SRCS= \ test_extract_cpio_xz.c \ test_format_newc.c \ test_gcpio_compat.c \ + test_missing_file.c \ test_option_0.c \ test_option_B_upper.c \ test_option_C_upper.c \ diff --git a/usr.bin/sed/process.c b/usr.bin/sed/process.c index 950e30d..891a7ce 100644 --- a/usr.bin/sed/process.c +++ b/usr.bin/sed/process.c @@ -70,7 +70,8 @@ static inline int applies(struct s_command *); static void do_tr(struct s_tr *); static void flush_appends(void); static void lputs(char *, size_t); -static int regexec_e(regex_t *, const char *, int, int, size_t); +static int regexec_e(regex_t *, const char *, int, int, size_t, + size_t); static void regsub(SPACE *, char *, char *); static int substitute(struct s_command *); @@ -271,7 +272,7 @@ new: if (!nflag && !pd) * (lastline, linenumber, ps). */ #define MATCH(a) \ - ((a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \ + ((a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, 0, psl) : \ (a)->type == AT_LINE ? linenum == (a)->u.l : lastline()) /* @@ -371,6 +372,7 @@ substitute(struct s_command *cp) regex_t *re; regoff_t slen; int lastempty, n; + size_t le = 0; char *s; s = ps; @@ -382,7 +384,7 @@ substitute(struct s_command *cp) linenum, fname, cp->u.s->maxbref); } } - if (!regexec_e(re, s, 0, 0, psl)) + if (!regexec_e(re, ps, 0, 0, 0, psl)) return (0); SS.len = 0; /* Clean substitute space. */ @@ -392,28 +394,30 @@ substitute(struct s_command *cp) do { /* Copy the leading retained string. */ - if (n <= 1 && match[0].rm_so) - cspace(&SS, s, match[0].rm_so, APPEND); + if (n <= 1 && (match[0].rm_so > le)) + cspace(&SS, s, match[0].rm_so - le, APPEND); /* Skip zero-length matches right after other matches. */ - if (lastempty || match[0].rm_so || + if (lastempty || (match[0].rm_so - le) || match[0].rm_so != match[0].rm_eo) { if (n <= 1) { /* Want this match: append replacement. */ - regsub(&SS, s, cp->u.s->new); + regsub(&SS, ps, cp->u.s->new); if (n == 1) n = -1; } else { /* Want a later match: append original. */ - if (match[0].rm_eo) - cspace(&SS, s, match[0].rm_eo, APPEND); + if (match[0].rm_eo - le) + cspace(&SS, s, match[0].rm_eo - le, + APPEND); n--; } } /* Move past this match. */ - s += match[0].rm_eo; - slen -= match[0].rm_eo; + s = ps + match[0].rm_eo; + slen = psl - match[0].rm_eo; + le = match[0].rm_eo; /* * After a zero-length match, advance one byte, @@ -424,13 +428,16 @@ substitute(struct s_command *cp) slen = -1; else slen--; - if (*s != '\0') + if (*s != '\0') { cspace(&SS, s++, 1, APPEND); + le++; + } lastempty = 1; } else lastempty = 0; - } while (n >= 0 && slen >= 0 && regexec_e(re, s, REG_NOTBOL, 0, slen)); + } while (n >= 0 && slen >= 0 && + regexec_e(re, ps, REG_NOTBOL, 0, le, psl)); /* Did not find the requested number of matches. */ if (n > 1) @@ -640,7 +647,7 @@ lputs(char *s, size_t len) static int regexec_e(regex_t *preg, const char *string, int eflags, int nomatch, - size_t slen) + size_t start, size_t stop) { int eval; @@ -651,8 +658,8 @@ regexec_e(regex_t *preg, const char *string, int eflags, int nomatch, defpreg = preg; /* Set anchors */ - match[0].rm_so = 0; - match[0].rm_eo = slen; + match[0].rm_so = start; + match[0].rm_eo = stop; eval = regexec(defpreg, string, nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND); diff --git a/usr.bin/tar/Makefile b/usr.bin/tar/Makefile index 8b0ebc2..42a6f0c 100644 --- a/usr.bin/tar/Makefile +++ b/usr.bin/tar/Makefile @@ -4,7 +4,7 @@ LIBARCHIVEDIR= ${.CURDIR}/../../contrib/libarchive PROG= bsdtar -BSDTAR_VERSION_STRING= 3.2.0 +BSDTAR_VERSION_STRING= 3.2.1 .PATH: ${LIBARCHIVEDIR}/tar SRCS= bsdtar.c \ diff --git a/usr.bin/tar/tests/Makefile b/usr.bin/tar/tests/Makefile index e57cb3e..059ee50 100644 --- a/usr.bin/tar/tests/Makefile +++ b/usr.bin/tar/tests/Makefile @@ -38,6 +38,7 @@ TESTS_SRCS= \ test_format_newc.c \ test_help.c \ test_leading_slash.c \ + test_missing_file.c \ test_option_C_upper.c \ test_option_H_upper.c \ test_option_L_upper.c \ diff --git a/usr.sbin/cpucontrol/cpucontrol.c b/usr.sbin/cpucontrol/cpucontrol.c index 69fdf3a..504ac50 100644 --- a/usr.sbin/cpucontrol/cpucontrol.c +++ b/usr.sbin/cpucontrol/cpucontrol.c @@ -481,5 +481,5 @@ main(int argc, char *argv[]) usage(); /* Only one command can be selected. */ } SLIST_FREE(&datadirs, next, free); - return (error); + return (error == 0 ? 0 : 1); } |