diff options
author | Renato Botelho <renato@netgate.com> | 2016-11-02 10:47:11 -0200 |
---|---|---|
committer | Renato Botelho <renato@netgate.com> | 2016-11-02 10:47:11 -0200 |
commit | 3c84ef4df7f599fdf9db66521c3d33c167458687 (patch) | |
tree | 95795d4a6304e89128b277db8769df4a7d074fa4 | |
parent | eb2387fb3f25275b8ca223e3c464563294982bde (diff) | |
parent | 3944e88fda9dc9f4f391a06b18cd7583f783e8ec (diff) | |
download | FreeBSD-src-3c84ef4df7f599fdf9db66521c3d33c167458687.zip FreeBSD-src-3c84ef4df7f599fdf9db66521c3d33c167458687.tar.gz |
Merge remote-tracking branch 'origin/stable/10' into devel
124 files changed, 3544 insertions, 1956 deletions
diff --git a/contrib/libarchive/NEWS b/contrib/libarchive/NEWS index f672d3d..dd4cfd5 100644 --- a/contrib/libarchive/NEWS +++ b/contrib/libarchive/NEWS @@ -1,3 +1,6 @@ +Oct 23, 2016: libarchive 3.2.2 released + Security release + Jun 20, 2016: libarchive 3.2.1 released This fixes a handful of security and other critical issues with 3.2.0 diff --git a/contrib/libarchive/cat/test/main.c b/contrib/libarchive/cat/test/main.c index 29f7b14..a579842 100644 --- a/contrib/libarchive/cat/test/main.c +++ b/contrib/libarchive/cat/test/main.c @@ -129,6 +129,13 @@ # include <crtdbg.h> #endif +mode_t umasked(mode_t expected_mode) +{ + mode_t mode = umask(0); + umask(mode); + return expected_mode & ~mode; +} + /* Path to working directory for current test */ const char *testworkdir; #ifdef PROGRAM @@ -1156,6 +1163,35 @@ assertion_file_contains_lines_any_order(const char *file, int line, return (0); } +/* Verify that a text file does not contains the specified strings */ +int +assertion_file_contains_no_invalid_strings(const char *file, int line, + const char *pathname, const char *strings[]) +{ + char *buff; + int i; + + buff = slurpfile(NULL, "%s", pathname); + if (buff == NULL) { + failure_start(file, line, "Can't read file: %s", pathname); + failure_finish(NULL); + return (0); + } + + for (i = 0; strings[i] != NULL; ++i) { + if (strstr(buff, strings[i]) != NULL) { + failure_start(file, line, "Invalid string in %s: %s", pathname, + strings[i]); + failure_finish(NULL); + free(buff); + return(0); + } + } + + free(buff); + return (0); +} + /* Test that two paths point to the same file. */ /* As a side-effect, asserts that both files exist. */ static int @@ -1293,6 +1329,11 @@ assertion_file_time(const char *file, int line, switch (type) { case 'a': filet_nsec = st.st_atimespec.tv_nsec; break; case 'b': filet = st.st_birthtime; + /* FreeBSD filesystems that don't support birthtime + * (e.g., UFS1) always return -1 here. */ + if (filet == -1) { + return (1); + } filet_nsec = st.st_birthtimespec.tv_nsec; break; case 'm': filet_nsec = st.st_mtimespec.tv_nsec; break; default: fprintf(stderr, "INTERNAL: Bad type %c for file time", type); @@ -1370,6 +1411,8 @@ assertion_file_mode(const char *file, int line, const char *pathname, int expect assertion_count(file, line); #if defined(_WIN32) && !defined(__CYGWIN__) failure_start(file, line, "assertFileMode not yet implemented for Windows"); + (void)mode; /* UNUSED */ + (void)r; /* UNUSED */ #else { struct stat st; @@ -1424,7 +1467,7 @@ assertion_file_nlinks(const char *file, int line, assertion_count(file, line); r = lstat(pathname, &st); if (r == 0 && (int)st.st_nlink == nlinks) - return (1); + return (1); failure_start(file, line, "File %s has %d links, expected %d", pathname, st.st_nlink, nlinks); failure_finish(NULL); @@ -1660,6 +1703,7 @@ assertion_make_file(const char *file, int line, if (0 != chmod(path, mode)) { failure_start(file, line, "Could not chmod %s", path); failure_finish(NULL); + close(fd); return (0); } if (contents != NULL) { @@ -1674,6 +1718,7 @@ assertion_make_file(const char *file, int line, failure_start(file, line, "Could not write to %s", path); failure_finish(NULL); + close(fd); return (0); } } diff --git a/contrib/libarchive/cat/test/test.h b/contrib/libarchive/cat/test/test.h index 1d21964..c002a2c 100644 --- a/contrib/libarchive/cat/test/test.h +++ b/contrib/libarchive/cat/test/test.h @@ -174,6 +174,9 @@ /* Assert that file contents match a string. */ #define assertFileContents(data, data_size, pathname) \ assertion_file_contents(__FILE__, __LINE__, data, data_size, pathname) +/* Verify that a file does not contain invalid strings */ +#define assertFileContainsNoInvalidStrings(pathname, strings) \ + assertion_file_contains_no_invalid_strings(__FILE__, __LINE__, pathname, strings) #define assertFileMtime(pathname, sec, nsec) \ assertion_file_mtime(__FILE__, __LINE__, pathname, sec, nsec) #define assertFileMtimeRecent(pathname) \ @@ -182,6 +185,8 @@ assertion_file_nlinks(__FILE__, __LINE__, pathname, nlinks) #define assertFileSize(pathname, size) \ assertion_file_size(__FILE__, __LINE__, pathname, size) +#define assertFileMode(pathname, mode) \ + assertion_file_mode(__FILE__, __LINE__, pathname, mode) #define assertTextFileContents(text, pathname) \ assertion_text_file_contents(__FILE__, __LINE__, text, pathname) #define assertFileContainsLinesAnyOrder(pathname, lines) \ @@ -239,6 +244,7 @@ int assertion_file_atime_recent(const char *, int, const char *); int assertion_file_birthtime(const char *, int, const char *, long, long); int assertion_file_birthtime_recent(const char *, int, const char *); int assertion_file_contains_lines_any_order(const char *, int, const char *, const char **); +int assertion_file_contains_no_invalid_strings(const char *, int, const char *, const char **); int assertion_file_contents(const char *, int, const void *, int, const char *); int assertion_file_exists(const char *, int, const char *); int assertion_file_mode(const char *, int, const char *, int); @@ -327,6 +333,9 @@ void copy_reference_file(const char *); */ void extract_reference_files(const char **); +/* Subtract umask from mode */ +mode_t umasked(mode_t expected_mode); + /* Path to working directory for current test */ extern const char *testworkdir; diff --git a/contrib/libarchive/cpio/test/main.c b/contrib/libarchive/cpio/test/main.c index 541a550..d6842f5 100644 --- a/contrib/libarchive/cpio/test/main.c +++ b/contrib/libarchive/cpio/test/main.c @@ -130,6 +130,13 @@ __FBSDID("$FreeBSD$"); # include <crtdbg.h> #endif +mode_t umasked(mode_t expected_mode) +{ + mode_t mode = umask(0); + umask(mode); + return expected_mode & ~mode; +} + /* Path to working directory for current test */ const char *testworkdir; #ifdef PROGRAM @@ -1157,6 +1164,35 @@ assertion_file_contains_lines_any_order(const char *file, int line, return (0); } +/* Verify that a text file does not contains the specified strings */ +int +assertion_file_contains_no_invalid_strings(const char *file, int line, + const char *pathname, const char *strings[]) +{ + char *buff; + int i; + + buff = slurpfile(NULL, "%s", pathname); + if (buff == NULL) { + failure_start(file, line, "Can't read file: %s", pathname); + failure_finish(NULL); + return (0); + } + + for (i = 0; strings[i] != NULL; ++i) { + if (strstr(buff, strings[i]) != NULL) { + failure_start(file, line, "Invalid string in %s: %s", pathname, + strings[i]); + failure_finish(NULL); + free(buff); + return(0); + } + } + + free(buff); + return (0); +} + /* Test that two paths point to the same file. */ /* As a side-effect, asserts that both files exist. */ static int @@ -1294,6 +1330,11 @@ assertion_file_time(const char *file, int line, switch (type) { case 'a': filet_nsec = st.st_atimespec.tv_nsec; break; case 'b': filet = st.st_birthtime; + /* FreeBSD filesystems that don't support birthtime + * (e.g., UFS1) always return -1 here. */ + if (filet == -1) { + return (1); + } filet_nsec = st.st_birthtimespec.tv_nsec; break; case 'm': filet_nsec = st.st_mtimespec.tv_nsec; break; default: fprintf(stderr, "INTERNAL: Bad type %c for file time", type); @@ -1371,6 +1412,8 @@ assertion_file_mode(const char *file, int line, const char *pathname, int expect assertion_count(file, line); #if defined(_WIN32) && !defined(__CYGWIN__) failure_start(file, line, "assertFileMode not yet implemented for Windows"); + (void)mode; /* UNUSED */ + (void)r; /* UNUSED */ #else { struct stat st; @@ -1425,7 +1468,7 @@ assertion_file_nlinks(const char *file, int line, assertion_count(file, line); r = lstat(pathname, &st); if (r == 0 && (int)st.st_nlink == nlinks) - return (1); + return (1); failure_start(file, line, "File %s has %d links, expected %d", pathname, st.st_nlink, nlinks); failure_finish(NULL); @@ -1661,6 +1704,7 @@ assertion_make_file(const char *file, int line, if (0 != chmod(path, mode)) { failure_start(file, line, "Could not chmod %s", path); failure_finish(NULL); + close(fd); return (0); } if (contents != NULL) { @@ -1675,6 +1719,7 @@ assertion_make_file(const char *file, int line, failure_start(file, line, "Could not write to %s", path); failure_finish(NULL); + close(fd); return (0); } } diff --git a/contrib/libarchive/cpio/test/test.h b/contrib/libarchive/cpio/test/test.h index 1d21964..c002a2c 100644 --- a/contrib/libarchive/cpio/test/test.h +++ b/contrib/libarchive/cpio/test/test.h @@ -174,6 +174,9 @@ /* Assert that file contents match a string. */ #define assertFileContents(data, data_size, pathname) \ assertion_file_contents(__FILE__, __LINE__, data, data_size, pathname) +/* Verify that a file does not contain invalid strings */ +#define assertFileContainsNoInvalidStrings(pathname, strings) \ + assertion_file_contains_no_invalid_strings(__FILE__, __LINE__, pathname, strings) #define assertFileMtime(pathname, sec, nsec) \ assertion_file_mtime(__FILE__, __LINE__, pathname, sec, nsec) #define assertFileMtimeRecent(pathname) \ @@ -182,6 +185,8 @@ assertion_file_nlinks(__FILE__, __LINE__, pathname, nlinks) #define assertFileSize(pathname, size) \ assertion_file_size(__FILE__, __LINE__, pathname, size) +#define assertFileMode(pathname, mode) \ + assertion_file_mode(__FILE__, __LINE__, pathname, mode) #define assertTextFileContents(text, pathname) \ assertion_text_file_contents(__FILE__, __LINE__, text, pathname) #define assertFileContainsLinesAnyOrder(pathname, lines) \ @@ -239,6 +244,7 @@ int assertion_file_atime_recent(const char *, int, const char *); int assertion_file_birthtime(const char *, int, const char *, long, long); int assertion_file_birthtime_recent(const char *, int, const char *); int assertion_file_contains_lines_any_order(const char *, int, const char *, const char **); +int assertion_file_contains_no_invalid_strings(const char *, int, const char *, const char **); int assertion_file_contents(const char *, int, const void *, int, const char *); int assertion_file_exists(const char *, int, const char *); int assertion_file_mode(const char *, int, const char *, int); @@ -327,6 +333,9 @@ void copy_reference_file(const char *); */ void extract_reference_files(const char **); +/* Subtract umask from mode */ +mode_t umasked(mode_t expected_mode); + /* Path to working directory for current test */ extern const char *testworkdir; diff --git a/contrib/libarchive/libarchive/archive.h b/contrib/libarchive/libarchive/archive.h index 013eee8..18462bb 100644 --- a/contrib/libarchive/libarchive/archive.h +++ b/contrib/libarchive/libarchive/archive.h @@ -36,7 +36,7 @@ * assert that ARCHIVE_VERSION_NUMBER >= 2012108. */ /* Note: Compiler will complain if this does not match archive_entry.h! */ -#define ARCHIVE_VERSION_NUMBER 3002001 +#define ARCHIVE_VERSION_NUMBER 3002002 #include <sys/stat.h> #include <stddef.h> /* for wchar_t */ @@ -155,7 +155,7 @@ __LA_DECL int archive_version_number(void); /* * Textual name/version of the library, useful for version displays. */ -#define ARCHIVE_VERSION_ONLY_STRING "3.2.1" +#define ARCHIVE_VERSION_ONLY_STRING "3.2.2" #define ARCHIVE_VERSION_STRING "libarchive " ARCHIVE_VERSION_ONLY_STRING __LA_DECL const char * archive_version_string(void); diff --git a/contrib/libarchive/libarchive/archive_entry.h b/contrib/libarchive/libarchive/archive_entry.h index 3a90ac7..1150889 100644 --- a/contrib/libarchive/libarchive/archive_entry.h +++ b/contrib/libarchive/libarchive/archive_entry.h @@ -29,7 +29,7 @@ #define ARCHIVE_ENTRY_H_INCLUDED /* Note: Compiler will complain if this does not match archive.h! */ -#define ARCHIVE_VERSION_NUMBER 3002001 +#define ARCHIVE_VERSION_NUMBER 3002002 /* * Note: archive_entry.h is for use outside of libarchive; the diff --git a/contrib/libarchive/libarchive/archive_read_disk_entry_from_file.c b/contrib/libarchive/libarchive/archive_read_disk_entry_from_file.c index 4cffdeb..f4a861c 100644 --- a/contrib/libarchive/libarchive/archive_read_disk_entry_from_file.c +++ b/contrib/libarchive/libarchive/archive_read_disk_entry_from_file.c @@ -627,7 +627,6 @@ translate_acl(struct archive_read_disk *a, archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Unknown ACL brand"); return (ARCHIVE_WARN); - break; } #endif diff --git a/contrib/libarchive/libarchive/archive_read_disk_set_standard_lookup.c b/contrib/libarchive/libarchive/archive_read_disk_set_standard_lookup.c index 60a9376..7191969 100644 --- a/contrib/libarchive/libarchive/archive_read_disk_set_standard_lookup.c +++ b/contrib/libarchive/libarchive/archive_read_disk_set_standard_lookup.c @@ -232,6 +232,7 @@ static const char * lookup_uname_helper(struct name_cache *cache, id_t id) { struct passwd *result; + (void)cache; /* UNUSED */ result = getpwuid((uid_t)id); @@ -298,6 +299,7 @@ static const char * lookup_gname_helper(struct name_cache *cache, id_t id) { struct group *result; + (void)cache; /* UNUSED */ result = getgrgid((gid_t)id); diff --git a/contrib/libarchive/libarchive/test/main.c b/contrib/libarchive/libarchive/test/main.c index 0f9c64b..304c7f6 100644 --- a/contrib/libarchive/libarchive/test/main.c +++ b/contrib/libarchive/libarchive/test/main.c @@ -128,6 +128,13 @@ __FBSDID("$FreeBSD$"); # include <crtdbg.h> #endif +mode_t umasked(mode_t expected_mode) +{ + mode_t mode = umask(0); + umask(mode); + return expected_mode & ~mode; +} + /* Path to working directory for current test */ const char *testworkdir; #ifdef PROGRAM @@ -1155,6 +1162,35 @@ assertion_file_contains_lines_any_order(const char *file, int line, return (0); } +/* Verify that a text file does not contains the specified strings */ +int +assertion_file_contains_no_invalid_strings(const char *file, int line, + const char *pathname, const char *strings[]) +{ + char *buff; + int i; + + buff = slurpfile(NULL, "%s", pathname); + if (buff == NULL) { + failure_start(file, line, "Can't read file: %s", pathname); + failure_finish(NULL); + return (0); + } + + for (i = 0; strings[i] != NULL; ++i) { + if (strstr(buff, strings[i]) != NULL) { + failure_start(file, line, "Invalid string in %s: %s", pathname, + strings[i]); + failure_finish(NULL); + free(buff); + return(0); + } + } + + free(buff); + return (0); +} + /* Test that two paths point to the same file. */ /* As a side-effect, asserts that both files exist. */ static int @@ -1364,6 +1400,33 @@ assertion_file_birthtime_recent(const char *file, int line, return assertion_file_time(file, line, pathname, 0, 0, 'b', 1); } +/* Verify mode of 'pathname'. */ +int +assertion_file_mode(const char *file, int line, const char *pathname, int expected_mode) +{ + int mode; + int r; + + assertion_count(file, line); +#if defined(_WIN32) && !defined(__CYGWIN__) + failure_start(file, line, "assertFileMode not yet implemented for Windows"); + (void)mode; /* UNUSED */ + (void)r; /* UNUSED */ +#else + { + struct stat st; + r = lstat(pathname, &st); + mode = (int)(st.st_mode & 0777); + } + if (r == 0 && mode == expected_mode) + return (1); + failure_start(file, line, "File %s has mode %o, expected %o", + pathname, mode, expected_mode); +#endif + failure_finish(NULL); + return (0); +} + /* Verify mtime of 'pathname'. */ int assertion_file_mtime(const char *file, int line, @@ -1403,7 +1466,7 @@ assertion_file_nlinks(const char *file, int line, assertion_count(file, line); r = lstat(pathname, &st); if (r == 0 && (int)st.st_nlink == nlinks) - return (1); + return (1); failure_start(file, line, "File %s has %d links, expected %d", pathname, st.st_nlink, nlinks); failure_finish(NULL); @@ -1440,31 +1503,6 @@ assertion_file_size(const char *file, int line, const char *pathname, long size) return (0); } -/* Verify mode of 'pathname'. */ -int -assertion_file_mode(const char *file, int line, const char *pathname, int expected_mode) -{ - int mode; - int r; - - assertion_count(file, line); -#if defined(_WIN32) && !defined(__CYGWIN__) - failure_start(file, line, "assertFileMode not yet implemented for Windows"); -#else - { - struct stat st; - r = lstat(pathname, &st); - mode = (int)(st.st_mode & 0777); - } - if (r == 0 && mode == expected_mode) - return (1); - failure_start(file, line, "File %s has mode %o, expected %o", - pathname, mode, expected_mode); -#endif - failure_finish(NULL); - return (0); -} - /* Assert that 'pathname' is a dir. If mode >= 0, verify that too. */ int assertion_is_dir(const char *file, int line, const char *pathname, int mode) @@ -1664,6 +1702,7 @@ assertion_make_file(const char *file, int line, if (0 != chmod(path, mode)) { failure_start(file, line, "Could not chmod %s", path); failure_finish(NULL); + close(fd); return (0); } if (contents != NULL) { @@ -1678,6 +1717,7 @@ assertion_make_file(const char *file, int line, failure_start(file, line, "Could not write to %s", path); failure_finish(NULL); + close(fd); return (0); } } diff --git a/contrib/libarchive/libarchive/test/test.h b/contrib/libarchive/libarchive/test/test.h index bcf6869..f7ec59f 100644 --- a/contrib/libarchive/libarchive/test/test.h +++ b/contrib/libarchive/libarchive/test/test.h @@ -174,6 +174,9 @@ /* Assert that file contents match a string. */ #define assertFileContents(data, data_size, pathname) \ assertion_file_contents(__FILE__, __LINE__, data, data_size, pathname) +/* Verify that a file does not contain invalid strings */ +#define assertFileContainsNoInvalidStrings(pathname, strings) \ + assertion_file_contains_no_invalid_strings(__FILE__, __LINE__, pathname, strings) #define assertFileMtime(pathname, sec, nsec) \ assertion_file_mtime(__FILE__, __LINE__, pathname, sec, nsec) #define assertFileMtimeRecent(pathname) \ @@ -241,14 +244,15 @@ int assertion_file_atime_recent(const char *, int, const char *); int assertion_file_birthtime(const char *, int, const char *, long, long); int assertion_file_birthtime_recent(const char *, int, const char *); int assertion_file_contains_lines_any_order(const char *, int, const char *, const char **); +int assertion_file_contains_no_invalid_strings(const char *, int, const char *, const char **); int assertion_file_contents(const char *, int, const void *, int, const char *); int assertion_file_exists(const char *, int, const char *); +int assertion_file_mode(const char *, int, const char *, int); int assertion_file_mtime(const char *, int, const char *, long, long); int assertion_file_mtime_recent(const char *, int, const char *); int assertion_file_nlinks(const char *, int, const char *, int); int assertion_file_not_exists(const char *, int, const char *); int assertion_file_size(const char *, int, const char *, long); -int assertion_file_mode(const char *, int, const char *, int); int assertion_is_dir(const char *, int, const char *, int); int assertion_is_hardlink(const char *, int, const char *, const char *); int assertion_is_not_hardlink(const char *, int, const char *, const char *); @@ -329,6 +333,9 @@ void copy_reference_file(const char *); */ void extract_reference_files(const char **); +/* Subtract umask from mode */ +mode_t umasked(mode_t expected_mode); + /* Path to working directory for current test */ extern const char *testworkdir; diff --git a/contrib/libarchive/libarchive/test/test_read_format_mtree_crash747.c b/contrib/libarchive/libarchive/test/test_read_format_mtree_crash747.c index c082845..9500bba 100644 --- a/contrib/libarchive/libarchive/test/test_read_format_mtree_crash747.c +++ b/contrib/libarchive/libarchive/test/test_read_format_mtree_crash747.c @@ -33,6 +33,11 @@ DEFINE_TEST(test_read_format_mtree_crash747) const char *reffile = "test_read_format_mtree_crash747.mtree.bz2"; struct archive *a; + if (archive_bzlib_version() == NULL) { + skipping("This test requires bzlib"); + return; + } + extract_reference_file(reffile); assert((a = archive_read_new()) != NULL); diff --git a/contrib/libarchive/libarchive/test/test_read_format_zip_high_compression.c b/contrib/libarchive/libarchive/test/test_read_format_zip_high_compression.c index 6c8aa8e..42faed3 100644 --- a/contrib/libarchive/libarchive/test/test_read_format_zip_high_compression.c +++ b/contrib/libarchive/libarchive/test/test_read_format_zip_high_compression.c @@ -50,6 +50,11 @@ DEFINE_TEST(test_read_format_zip_high_compression) size_t s; int64_t o; + if (archive_zlib_version() == NULL) { + skipping("Zip compression test requires zlib"); + return; + } + extract_reference_file(refname); p = slurpfile(&archive_size, refname); @@ -82,6 +87,11 @@ DEFINE_TEST(test_read_format_zip_high_compression2) char *body, *body_read, *buff; int n; + if (archive_zlib_version() == NULL) { + skipping("Zip compression test requires zlib"); + return; + } + assert((body = malloc(body_size)) != NULL); assert((body_read = malloc(body_size)) != NULL); assert((buff = malloc(buff_size)) != NULL); diff --git a/contrib/libarchive/libarchive/test/test_read_set_format.c b/contrib/libarchive/libarchive/test/test_read_set_format.c index f9be978..fb5e004 100644 --- a/contrib/libarchive/libarchive/test/test_read_set_format.c +++ b/contrib/libarchive/libarchive/test/test_read_set_format.c @@ -133,11 +133,12 @@ DEFINE_TEST(test_read_append_filter) assert((a = archive_read_new()) != NULL); assertA(0 == archive_read_set_format(a, ARCHIVE_FORMAT_TAR)); r = archive_read_append_filter(a, ARCHIVE_FILTER_GZIP); - if (r == ARCHIVE_WARN && !canGzip()) { - skipping("gzip reading not fully supported on this platform"); + if (r != ARCHIVE_OK && archive_zlib_version() == NULL && !canGzip()) { + skipping("gzip tests require zlib or working gzip command"); assertEqualInt(ARCHIVE_OK, archive_read_free(a)); return; } + assertEqualIntA(a, ARCHIVE_OK, r); assertEqualInt(ARCHIVE_OK, archive_read_open_memory(a, archive, sizeof(archive))); assertEqualInt(ARCHIVE_OK, archive_read_next_header(a, &ae)); @@ -200,6 +201,11 @@ DEFINE_TEST(test_read_append_filter_wrong_program) { struct archive_entry *ae; struct archive *a; +#if !defined(_WIN32) || defined(__CYGWIN__) + FILE * fp; + int fd; + fpos_t pos; +#endif /* * If we have "bunzip2 -q", try using that. @@ -208,6 +214,15 @@ DEFINE_TEST(test_read_append_filter_wrong_program) skipping("Can't run bunzip2 program on this platform"); return; } + +#if !defined(_WIN32) || defined(__CYGWIN__) + /* bunzip2 will write to stderr, redirect it to a file */ + fflush(stderr); + fgetpos(stderr, &pos); + fd = dup(fileno(stderr)); + fp = freopen("stderr1", "w", stderr); +#endif + assert((a = archive_read_new()) != NULL); assertA(0 == archive_read_set_format(a, ARCHIVE_FORMAT_TAR)); assertEqualIntA(a, ARCHIVE_OK, @@ -217,4 +232,16 @@ DEFINE_TEST(test_read_append_filter_wrong_program) assertA(archive_read_next_header(a, &ae) < (ARCHIVE_WARN)); assertEqualIntA(a, ARCHIVE_WARN, archive_read_close(a)); assertEqualInt(ARCHIVE_OK, archive_read_free(a)); + +#if !defined(_WIN32) || defined(__CYGWIN__) + /* restore stderr and verify results */ + if (fp != NULL) { + fflush(stderr); + dup2(fd, fileno(stderr)); + close(fd); + clearerr(stderr); + fsetpos(stderr, &pos); + } + assertTextFileContents("bunzip2: (stdin) is not a bzip2 file.\n", "stderr1"); +#endif } diff --git a/contrib/libarchive/libarchive/test/test_write_format_iso9660.c b/contrib/libarchive/libarchive/test/test_write_format_iso9660.c index 1ea69a1..ee6db6f 100644 --- a/contrib/libarchive/libarchive/test/test_write_format_iso9660.c +++ b/contrib/libarchive/libarchive/test/test_write_format_iso9660.c @@ -117,8 +117,8 @@ DEFINE_TEST(test_write_format_iso9660) */ dirname[0] = '\0'; strcpy(dir, "/dir0"); - for (i = 0; i < 10; i++) { - dir[4] = '0' + i; + for (i = 0; i < 13; i++) { + dir[4] = "0123456789ABCDEF"[i]; if (i == 0) strcat(dirname, dir+1); else @@ -134,6 +134,19 @@ DEFINE_TEST(test_write_format_iso9660) archive_entry_free(ae); } + strcat(dirname, "/file"); + assert((ae = archive_entry_new()) != NULL); + archive_entry_set_atime(ae, 2, 20); + archive_entry_set_birthtime(ae, 3, 30); + archive_entry_set_ctime(ae, 4, 40); + archive_entry_set_mtime(ae, 5, 50); + archive_entry_copy_pathname(ae, dirname); + archive_entry_set_mode(ae, S_IFREG | 0755); + archive_entry_set_size(ae, 8); + assertEqualIntA(a, ARCHIVE_OK, archive_write_header(a, ae)); + archive_entry_free(ae); + assertEqualIntA(a, 8, archive_write_data(a, "12345678", 9)); + /* * "dir0/dir1/file1" has 8 bytes of data. */ @@ -333,6 +346,45 @@ DEFINE_TEST(test_write_format_iso9660) assertEqualInt(2048, archive_entry_size(ae)); /* + * Read "dir0/dir1/dir2/dir3/dir4/dir5/dir6/dir7/dir8/dir9/dirA" + */ + assertEqualIntA(a, 0, archive_read_next_header(a, &ae)); + assertEqualInt(2, archive_entry_atime(ae)); + assertEqualInt(3, archive_entry_birthtime(ae)); + assertEqualInt(4, archive_entry_ctime(ae)); + assertEqualInt(5, archive_entry_mtime(ae)); + assertEqualString("dir0/dir1/dir2/dir3/dir4/dir5/dir6/dir7/dir8/dir9/dirA", + archive_entry_pathname(ae)); + assert((S_IFDIR | 0555) == archive_entry_mode(ae)); + assertEqualInt(2048, archive_entry_size(ae)); + + /* + * Read "dir0/dir1/dir2/dir3/dir4/dir5/dir6/dir7/dir8/dir9/dirA/dirB" + */ + assertEqualIntA(a, 0, archive_read_next_header(a, &ae)); + assertEqualInt(2, archive_entry_atime(ae)); + assertEqualInt(3, archive_entry_birthtime(ae)); + assertEqualInt(4, archive_entry_ctime(ae)); + assertEqualInt(5, archive_entry_mtime(ae)); + assertEqualString("dir0/dir1/dir2/dir3/dir4/dir5/dir6/dir7/dir8/dir9/dirA/dirB", + archive_entry_pathname(ae)); + assert((S_IFDIR | 0555) == archive_entry_mode(ae)); + assertEqualInt(2048, archive_entry_size(ae)); + + /* + * Read "dir0/dir1/dir2/dir3/dir4/dir5/dir6/dir7/dir8/dir9/dirA/dirB/dirC" + */ + assertEqualIntA(a, 0, archive_read_next_header(a, &ae)); + assertEqualInt(2, archive_entry_atime(ae)); + assertEqualInt(3, archive_entry_birthtime(ae)); + assertEqualInt(4, archive_entry_ctime(ae)); + assertEqualInt(5, archive_entry_mtime(ae)); + assertEqualString("dir0/dir1/dir2/dir3/dir4/dir5/dir6/dir7/dir8/dir9/dirA/dirB/dirC", + archive_entry_pathname(ae)); + assert((S_IFDIR | 0555) == archive_entry_mode(ae)); + assertEqualInt(2048, archive_entry_size(ae)); + + /* * Read "hardlnk" */ assertEqualIntA(a, 0, archive_read_next_header(a, &ae)); @@ -386,6 +438,21 @@ DEFINE_TEST(test_write_format_iso9660) assertEqualMem(buff2, "12345678", 8); /* + * Read "dir0/dir1/dir2/dir3/dir4/dir5/dir6/dir7/dir8/dir9/dirA/dirB/dirC/file" + */ + assertEqualIntA(a, 0, archive_read_next_header(a, &ae)); + assertEqualInt(2, archive_entry_atime(ae)); + assertEqualInt(3, archive_entry_birthtime(ae)); + assertEqualInt(4, archive_entry_ctime(ae)); + assertEqualInt(5, archive_entry_mtime(ae)); + assertEqualString("dir0/dir1/dir2/dir3/dir4/dir5/dir6/dir7/dir8/dir9/dirA/dirB/dirC/file", archive_entry_pathname(ae)); + assert((AE_IFREG | 0555) == archive_entry_mode(ae)); + assertEqualInt(1, archive_entry_nlink(ae)); + assertEqualInt(8, archive_entry_size(ae)); + assertEqualIntA(a, 8, archive_read_data(a, buff2, 10)); + assertEqualMem(buff2, "12345678", 8); + + /* * Read "dir0/dir1/file1" */ assertEqualIntA(a, 0, archive_read_next_header(a, &ae)); @@ -580,29 +647,65 @@ DEFINE_TEST(test_write_format_iso9660) assertEqualInt(2048, archive_entry_size(ae)); /* - * Read "hardlnk" + * Read "dir0/dir1/dir2/dir3/dir4/dir5/dir6/dir7/dir8/dir9/dirA" */ assertEqualIntA(a, 0, archive_read_next_header(a, &ae)); assertEqualInt(5, archive_entry_atime(ae)); assertEqualInt(5, archive_entry_ctime(ae)); assertEqualInt(5, archive_entry_mtime(ae)); - assertEqualString("hardlnk", archive_entry_pathname(ae)); + assertEqualString("dir0/dir1/dir2/dir3/dir4/dir5/dir6/dir7/dir8/dir9/dirA", + archive_entry_pathname(ae)); + assert((S_IFDIR | 0700) == archive_entry_mode(ae)); + assertEqualInt(2048, archive_entry_size(ae)); + + /* + * Read "dir0/dir1/dir2/dir3/dir4/dir5/dir6/dir7/dir8/dir9/dirA/dirB" + */ + assertEqualIntA(a, 0, archive_read_next_header(a, &ae)); + assertEqualInt(5, archive_entry_atime(ae)); + assertEqualInt(5, archive_entry_ctime(ae)); + assertEqualInt(5, archive_entry_mtime(ae)); + assertEqualString("dir0/dir1/dir2/dir3/dir4/dir5/dir6/dir7/dir8/dir9/dirA/dirB", + archive_entry_pathname(ae)); + assert((S_IFDIR | 0700) == archive_entry_mode(ae)); + assertEqualInt(2048, archive_entry_size(ae)); + + /* + * Read "dir0/dir1/dir2/dir3/dir4/dir5/dir6/dir7/dir8/dir9/dirA/dirB/dirC" + */ + assertEqualIntA(a, 0, archive_read_next_header(a, &ae)); + assertEqualInt(5, archive_entry_atime(ae)); + assertEqualInt(5, archive_entry_ctime(ae)); + assertEqualInt(5, archive_entry_mtime(ae)); + assertEqualString("dir0/dir1/dir2/dir3/dir4/dir5/dir6/dir7/dir8/dir9/dirA/dirB/dirC", + archive_entry_pathname(ae)); + assert((S_IFDIR | 0700) == archive_entry_mode(ae)); + assertEqualInt(2048, archive_entry_size(ae)); + + /* + * Read "file" + */ + assertEqualIntA(a, 0, archive_read_next_header(a, &ae)); + assertEqualInt(5, archive_entry_atime(ae)); + assertEqualInt(5, archive_entry_ctime(ae)); + assertEqualInt(5, archive_entry_mtime(ae)); + assertEqualString("file", archive_entry_pathname(ae)); assert((AE_IFREG | 0400) == archive_entry_mode(ae)); - assertEqualInt(2, archive_entry_nlink(ae)); assertEqualInt(8, archive_entry_size(ae)); assertEqualIntA(a, 8, archive_read_data(a, buff2, 10)); assertEqualMem(buff2, "12345678", 8); /* - * Read "file" + * Read "hardlnk" */ assertEqualIntA(a, 0, archive_read_next_header(a, &ae)); assertEqualInt(5, archive_entry_atime(ae)); assertEqualInt(5, archive_entry_ctime(ae)); assertEqualInt(5, archive_entry_mtime(ae)); - assertEqualString("file", archive_entry_pathname(ae)); - assertEqualString("hardlnk", archive_entry_hardlink(ae)); + assertEqualString("hardlnk", archive_entry_pathname(ae)); + assertEqualString("file", archive_entry_hardlink(ae)); assert((AE_IFREG | 0400) == archive_entry_mode(ae)); + assertEqualInt(2, archive_entry_nlink(ae)); assertEqualInt(0, archive_entry_size(ae)); assertEqualIntA(a, 0, archive_read_data(a, buff2, 10)); @@ -625,6 +728,22 @@ DEFINE_TEST(test_write_format_iso9660) assertEqualMem(buff2, "12345678", 8); /* + * Read "dir0/dir1/dir2/dir3/dir4/dir5/dir6/dir7/dir8/dir9/dirA/dirB/dirC/file" + */ + assertEqualIntA(a, 0, archive_read_next_header(a, &ae)); + assertEqualInt(5, archive_entry_atime(ae)); + assertEqualInt(5, archive_entry_ctime(ae)); + assertEqualInt(5, archive_entry_mtime(ae)); + assertEqualString( + "dir0/dir1/dir2/dir3/dir4/dir5/dir6/dir7/dir8/dir9/dirA/dirB/dirC/file", + archive_entry_pathname(ae)); + assert((AE_IFREG | 0400) == archive_entry_mode(ae)); + assertEqualInt(1, archive_entry_nlink(ae)); + assertEqualInt(8, archive_entry_size(ae)); + assertEqualIntA(a, 8, archive_read_data(a, buff2, 10)); + assertEqualMem(buff2, "12345678", 8); + + /* * Read "dir0/dir1/file1" */ assertEqualIntA(a, 0, archive_read_next_header(a, &ae)); @@ -746,6 +865,42 @@ DEFINE_TEST(test_write_format_iso9660) assertEqualInt(2048, archive_entry_size(ae)); /* + * Read "rr_moved/dir7/dir8/dir9/dira" + */ + assertEqualIntA(a, 0, archive_read_next_header(a, &ae)); + assertEqualInt(5, archive_entry_atime(ae)); + assertEqualInt(5, archive_entry_ctime(ae)); + assertEqualInt(5, archive_entry_mtime(ae)); + assertEqualString("RR_MOVED/DIR7/DIR8/DIR9/DIRA", + archive_entry_pathname(ae)); + assert((S_IFDIR | 0700) == archive_entry_mode(ae)); + assertEqualInt(2048, archive_entry_size(ae)); + + /* + * Read "rr_moved/dir7/dir8/dir9/dira/dirB" + */ + assertEqualIntA(a, 0, archive_read_next_header(a, &ae)); + assertEqualInt(5, archive_entry_atime(ae)); + assertEqualInt(5, archive_entry_ctime(ae)); + assertEqualInt(5, archive_entry_mtime(ae)); + assertEqualString("RR_MOVED/DIR7/DIR8/DIR9/DIRA/DIRB", + archive_entry_pathname(ae)); + assert((S_IFDIR | 0700) == archive_entry_mode(ae)); + assertEqualInt(2048, archive_entry_size(ae)); + + /* + * Read "rr_moved/dir7/dir8/dir9/dirA/dirB/dirC" + */ + assertEqualIntA(a, 0, archive_read_next_header(a, &ae)); + assertEqualInt(5, archive_entry_atime(ae)); + assertEqualInt(5, archive_entry_ctime(ae)); + assertEqualInt(5, archive_entry_mtime(ae)); + assertEqualString("RR_MOVED/DIR7/DIR8/DIR9/DIRA/DIRB/DIRC", + archive_entry_pathname(ae)); + assert((S_IFDIR | 0700) == archive_entry_mode(ae)); + assertEqualInt(2048, archive_entry_size(ae)); + + /* * Read "dir0" */ assertEqualIntA(a, 0, archive_read_next_header(a, &ae)); @@ -827,33 +982,35 @@ DEFINE_TEST(test_write_format_iso9660) assertEqualInt(2048, archive_entry_size(ae)); /* - * Read "file" + * Read "hardlink" */ assertEqualIntA(a, 0, archive_read_next_header(a, &ae)); assertEqualInt(5, archive_entry_atime(ae)); - assertEqualInt(0, archive_entry_birthtime(ae)); assertEqualInt(5, archive_entry_ctime(ae)); assertEqualInt(5, archive_entry_mtime(ae)); - assertEqualString("FILE", archive_entry_pathname(ae)); + assertEqualString("HARDLNK", archive_entry_pathname(ae)); + assertEqualString(NULL, archive_entry_hardlink(ae)); assert((AE_IFREG | 0400) == archive_entry_mode(ae)); - assertEqualInt(2, archive_entry_nlink(ae)); assertEqualInt(8, archive_entry_size(ae)); assertEqualIntA(a, 8, archive_read_data(a, buff2, 10)); assertEqualMem(buff2, "12345678", 8); /* - * Read "hardlink" + * Read "file" */ assertEqualIntA(a, 0, archive_read_next_header(a, &ae)); assertEqualInt(5, archive_entry_atime(ae)); + assertEqualInt(0, archive_entry_birthtime(ae)); assertEqualInt(5, archive_entry_ctime(ae)); assertEqualInt(5, archive_entry_mtime(ae)); - assertEqualString("HARDLNK", archive_entry_pathname(ae)); - assertEqualString("FILE", archive_entry_hardlink(ae)); + assertEqualString("FILE", archive_entry_pathname(ae)); + assertEqualString("HARDLNK", archive_entry_hardlink(ae)); assert((AE_IFREG | 0400) == archive_entry_mode(ae)); + assertEqualInt(2, archive_entry_nlink(ae)); assertEqualInt(0, archive_entry_size(ae)); assertEqualIntA(a, 0, archive_read_data(a, buff2, 10)); + /* * Read longname */ @@ -871,6 +1028,22 @@ DEFINE_TEST(test_write_format_iso9660) assertEqualMem(buff2, "12345678", 8); /* + * Read "rr_moved/dir7/dir8/dir9/dirA/dirB/dirC/file" + */ + assertEqualIntA(a, 0, archive_read_next_header(a, &ae)); + assertEqualInt(5, archive_entry_atime(ae)); + assertEqualInt(5, archive_entry_ctime(ae)); + assertEqualInt(5, archive_entry_mtime(ae)); + assertEqualString( + "RR_MOVED/DIR7/DIR8/DIR9/DIRA/DIRB/DIRC/FILE", + archive_entry_pathname(ae)); + assert((AE_IFREG | 0400) == archive_entry_mode(ae)); + assertEqualInt(1, archive_entry_nlink(ae)); + assertEqualInt(8, archive_entry_size(ae)); + assertEqualIntA(a, 8, archive_read_data(a, buff2, 10)); + assertEqualMem(buff2, "12345678", 8); + + /* * Read "dir0/dir1/file1" */ assertEqualIntA(a, 0, archive_read_next_header(a, &ae)); diff --git a/contrib/libarchive/tar/test/main.c b/contrib/libarchive/tar/test/main.c index 8bf1d36..6bffee2 100644 --- a/contrib/libarchive/tar/test/main.c +++ b/contrib/libarchive/tar/test/main.c @@ -1164,6 +1164,35 @@ assertion_file_contains_lines_any_order(const char *file, int line, return (0); } +/* Verify that a text file does not contains the specified strings */ +int +assertion_file_contains_no_invalid_strings(const char *file, int line, + const char *pathname, const char *strings[]) +{ + char *buff; + int i; + + buff = slurpfile(NULL, "%s", pathname); + if (buff == NULL) { + failure_start(file, line, "Can't read file: %s", pathname); + failure_finish(NULL); + return (0); + } + + for (i = 0; strings[i] != NULL; ++i) { + if (strstr(buff, strings[i]) != NULL) { + failure_start(file, line, "Invalid string in %s: %s", pathname, + strings[i]); + failure_finish(NULL); + free(buff); + return(0); + } + } + + free(buff); + return (0); +} + /* Test that two paths point to the same file. */ /* As a side-effect, asserts that both files exist. */ static int @@ -1301,6 +1330,11 @@ assertion_file_time(const char *file, int line, switch (type) { case 'a': filet_nsec = st.st_atimespec.tv_nsec; break; case 'b': filet = st.st_birthtime; + /* FreeBSD filesystems that don't support birthtime + * (e.g., UFS1) always return -1 here. */ + if (filet == -1) { + return (1); + } filet_nsec = st.st_birthtimespec.tv_nsec; break; case 'm': filet_nsec = st.st_mtimespec.tv_nsec; break; default: fprintf(stderr, "INTERNAL: Bad type %c for file time", type); @@ -1378,6 +1412,8 @@ assertion_file_mode(const char *file, int line, const char *pathname, int expect assertion_count(file, line); #if defined(_WIN32) && !defined(__CYGWIN__) failure_start(file, line, "assertFileMode not yet implemented for Windows"); + (void)mode; /* UNUSED */ + (void)r; /* UNUSED */ #else { struct stat st; @@ -1432,7 +1468,7 @@ assertion_file_nlinks(const char *file, int line, assertion_count(file, line); r = lstat(pathname, &st); if (r == 0 && (int)st.st_nlink == nlinks) - return (1); + return (1); failure_start(file, line, "File %s has %d links, expected %d", pathname, st.st_nlink, nlinks); failure_finish(NULL); @@ -1668,6 +1704,7 @@ assertion_make_file(const char *file, int line, if (0 != chmod(path, mode)) { failure_start(file, line, "Could not chmod %s", path); failure_finish(NULL); + close(fd); return (0); } if (contents != NULL) { @@ -1682,6 +1719,7 @@ assertion_make_file(const char *file, int line, failure_start(file, line, "Could not write to %s", path); failure_finish(NULL); + close(fd); return (0); } } diff --git a/contrib/libarchive/tar/test/test.h b/contrib/libarchive/tar/test/test.h index 340eadc..c002a2c 100644 --- a/contrib/libarchive/tar/test/test.h +++ b/contrib/libarchive/tar/test/test.h @@ -174,6 +174,9 @@ /* Assert that file contents match a string. */ #define assertFileContents(data, data_size, pathname) \ assertion_file_contents(__FILE__, __LINE__, data, data_size, pathname) +/* Verify that a file does not contain invalid strings */ +#define assertFileContainsNoInvalidStrings(pathname, strings) \ + assertion_file_contains_no_invalid_strings(__FILE__, __LINE__, pathname, strings) #define assertFileMtime(pathname, sec, nsec) \ assertion_file_mtime(__FILE__, __LINE__, pathname, sec, nsec) #define assertFileMtimeRecent(pathname) \ @@ -182,6 +185,8 @@ assertion_file_nlinks(__FILE__, __LINE__, pathname, nlinks) #define assertFileSize(pathname, size) \ assertion_file_size(__FILE__, __LINE__, pathname, size) +#define assertFileMode(pathname, mode) \ + assertion_file_mode(__FILE__, __LINE__, pathname, mode) #define assertTextFileContents(text, pathname) \ assertion_text_file_contents(__FILE__, __LINE__, text, pathname) #define assertFileContainsLinesAnyOrder(pathname, lines) \ @@ -239,6 +244,7 @@ int assertion_file_atime_recent(const char *, int, const char *); int assertion_file_birthtime(const char *, int, const char *, long, long); int assertion_file_birthtime_recent(const char *, int, const char *); int assertion_file_contains_lines_any_order(const char *, int, const char *, const char **); +int assertion_file_contains_no_invalid_strings(const char *, int, const char *, const char **); int assertion_file_contents(const char *, int, const void *, int, const char *); int assertion_file_exists(const char *, int, const char *); int assertion_file_mode(const char *, int, const char *, int); diff --git a/contrib/libarchive/tar/test/test_missing_file.c b/contrib/libarchive/tar/test/test_missing_file.c index e2e5da5..808e384 100644 --- a/contrib/libarchive/tar/test/test_missing_file.c +++ b/contrib/libarchive/tar/test/test_missing_file.c @@ -27,11 +27,15 @@ __FBSDID("$FreeBSD$"); DEFINE_TEST(test_missing_file) { + const char * invalid_stderr[] = { "INTERNAL ERROR", NULL }; assertMakeFile("file1", 0644, "file1"); assertMakeFile("file2", 0644, "file2"); assert(0 == systemf("%s -cf archive.tar file1 file2 2>stderr1", testprog)); assertEmptyFile("stderr1"); assert(0 != systemf("%s -cf archive.tar file1 file2 file3 2>stderr2", testprog)); + assertFileContainsNoInvalidStrings("stderr2", invalid_stderr); assert(0 != systemf("%s -cf archive.tar 2>stderr3", testprog)); - assert(0 != systemf("%s -cf archive.tar file3 2>stderr4", testprog)); + assertFileContainsNoInvalidStrings("stderr3", invalid_stderr); + assert(0 != systemf("%s -cf archive.tar file3 file4 2>stderr4", testprog)); + assertFileContainsNoInvalidStrings("stderr4", invalid_stderr); } diff --git a/contrib/libarchive/tar/test/test_option_b.c b/contrib/libarchive/tar/test/test_option_b.c index 81f50be..7c2f604 100644 --- a/contrib/libarchive/tar/test/test_option_b.c +++ b/contrib/libarchive/tar/test/test_option_b.c @@ -33,7 +33,7 @@ DEFINE_TEST(test_option_b) assertMakeFile("file1", 0644, "file1"); if (systemf("cat file1 > test_cat.out 2> test_cat.err") != 0) { - skipping("Platform doesn't have cat"); + skipping("This test requires a `cat` program"); return; } testprog_ustar = malloc(strlen(testprog) + sizeof(USTAR_OPT) + 1); diff --git a/contrib/libarchive/tar/test/test_symlink_dir.c b/contrib/libarchive/tar/test/test_symlink_dir.c index 96bc3a4..ba643c2 100644 --- a/contrib/libarchive/tar/test/test_symlink_dir.c +++ b/contrib/libarchive/tar/test/test_symlink_dir.c @@ -63,7 +63,7 @@ DEFINE_TEST(test_symlink_dir) /* "dir2" is a symlink to a non-existing "real_dir2" */ assertMakeSymlink("dest1/dir2", "real_dir2"); } else { - skipping("some symlink checks"); + skipping("Symlinks are not supported on this platform"); } /* "dir3" is a symlink to an existing "non_dir3" */ assertMakeFile("dest1/non_dir3", 0755, "abcdef"); diff --git a/contrib/libc++/include/__functional_03 b/contrib/libc++/include/__functional_03 index d8a9f05..157d6bf 100644 --- a/contrib/libc++/include/__functional_03 +++ b/contrib/libc++/include/__functional_03 @@ -369,7 +369,8 @@ template<class _Fp, class _Alloc, class _Rp> _Rp __func<_Fp, _Alloc, _Rp()>::operator()() { - return __invoke(__f_.first()); + typedef __invoke_void_return_wrapper<_Rp> _Invoker; + return _Invoker::__call(__f_.first()); } #ifndef _LIBCPP_NO_RTTI @@ -452,7 +453,8 @@ template<class _Fp, class _Alloc, class _Rp, class _A0> _Rp __func<_Fp, _Alloc, _Rp(_A0)>::operator()(_A0 __a0) { - return __invoke(__f_.first(), __a0); + typedef __invoke_void_return_wrapper<_Rp> _Invoker; + return _Invoker::__call(__f_.first(), __a0); } #ifndef _LIBCPP_NO_RTTI @@ -535,7 +537,8 @@ template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1> _Rp __func<_Fp, _Alloc, _Rp(_A0, _A1)>::operator()(_A0 __a0, _A1 __a1) { - return __invoke(__f_.first(), __a0, __a1); + typedef __invoke_void_return_wrapper<_Rp> _Invoker; + return _Invoker::__call(__f_.first(), __a0, __a1); } #ifndef _LIBCPP_NO_RTTI @@ -618,7 +621,8 @@ template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1, class _A2> _Rp __func<_Fp, _Alloc, _Rp(_A0, _A1, _A2)>::operator()(_A0 __a0, _A1 __a1, _A2 __a2) { - return __invoke(__f_.first(), __a0, __a1, __a2); + typedef __invoke_void_return_wrapper<_Rp> _Invoker; + return _Invoker::__call(__f_.first(), __a0, __a1, __a2); } #ifndef _LIBCPP_NO_RTTI diff --git a/contrib/libc++/include/__functional_base b/contrib/libc++/include/__functional_base index 6766793..e174e0c 100644 --- a/contrib/libc++/include/__functional_base +++ b/contrib/libc++/include/__functional_base @@ -419,6 +419,26 @@ struct __invoke_return typedef decltype(__invoke(_VSTD::declval<_Tp>(), _VSTD::declval<_Args>()...)) type; }; +template <class _Ret> +struct __invoke_void_return_wrapper +{ + template <class ..._Args> + static _Ret __call(_Args&&... __args) + { + return __invoke(_VSTD::forward<_Args>(__args)...); + } +}; + +template <> +struct __invoke_void_return_wrapper<void> +{ + template <class ..._Args> + static void __call(_Args&&... __args) + { + __invoke(_VSTD::forward<_Args>(__args)...); + } +}; + template <class _Tp> class _LIBCPP_TYPE_VIS_ONLY reference_wrapper : public __weak_result_type<_Tp> diff --git a/contrib/libc++/include/__functional_base_03 b/contrib/libc++/include/__functional_base_03 index 22c06ad..6550277 100644 --- a/contrib/libc++/include/__functional_base_03 +++ b/contrib/libc++/include/__functional_base_03 @@ -995,6 +995,63 @@ struct __invoke_return2 _VSTD::declval<_A2>())) type; }; +template <class _Ret> +struct __invoke_void_return_wrapper +{ + template <class _Fn> + static _Ret __call(_Fn __f) + { + return __invoke(__f); + } + + template <class _Fn, class _A0> + static _Ret __call(_Fn __f, _A0& __a0) + { + return __invoke(__f, __a0); + } + + template <class _Fn, class _A0, class _A1> + static _Ret __call(_Fn __f, _A0& __a0, _A1& __a1) + { + return __invoke(__f, __a0, __a1); + } + + template <class _Fn, class _A0, class _A1, class _A2> + static _Ret __call(_Fn __f, _A0& __a0, _A1& __a1, _A2& __a2) + { + return __invoke(__f, __a0, __a1, __a2); + } +}; + + +template <> +struct __invoke_void_return_wrapper<void> +{ + template <class _Fn> + static void __call(_Fn __f) + { + __invoke(__f); + } + + template <class _Fn, class _A0> + static void __call(_Fn __f, _A0& __a0) + { + __invoke(__f, __a0); + } + + template <class _Fn, class _A0, class _A1> + static void __call(_Fn __f, _A0& __a0, _A1& __a1) + { + __invoke(__f, __a0, __a1); + } + + template <class _Fn, class _A0, class _A1, class _A2> + static void __call(_Fn __f, _A0& __a0, _A1& __a1, _A2& __a2) + { + __invoke(__f, __a0, __a1, __a2); + } +}; + template <class _Tp> class _LIBCPP_TYPE_VIS_ONLY reference_wrapper : public __weak_result_type<_Tp> diff --git a/contrib/libc++/include/functional b/contrib/libc++/include/functional index d14b46b..36d422c 100644 --- a/contrib/libc++/include/functional +++ b/contrib/libc++/include/functional @@ -1367,7 +1367,8 @@ template<class _Fp, class _Alloc, class _Rp, class ..._ArgTypes> _Rp __func<_Fp, _Alloc, _Rp(_ArgTypes...)>::operator()(_ArgTypes&& ... __arg) { - return __invoke(__f_.first(), _VSTD::forward<_ArgTypes>(__arg)...); + typedef __invoke_void_return_wrapper<_Rp> _Invoker; + return _Invoker::__call(__f_.first(), _VSTD::forward<_ArgTypes>(__arg)...); } #ifndef _LIBCPP_NO_RTTI @@ -1429,7 +1430,7 @@ class _LIBCPP_TYPE_VIS_ONLY function<_Rp(_ArgTypes...)> template <class _Fp> struct __callable<_Fp, true> { - static const bool value = + static const bool value = is_same<void, _Rp>::value || is_convertible<typename __invoke_of<_Fp&, _ArgTypes...>::type, _Rp>::value; }; diff --git a/contrib/tzdata/asia b/contrib/tzdata/asia index 71ef878..b2c9930 100644 --- a/contrib/tzdata/asia +++ b/contrib/tzdata/asia @@ -2544,11 +2544,6 @@ Zone Asia/Karachi 4:28:12 - LMT 1907 # From Paul Eggert (2015-03-03): # http://www.timeanddate.com/time/change/west-bank/ramallah?year=2014 # says that the fall 2014 transition was Oct 23 at 24:00. -# For future dates, guess the last Friday in March at 24:00 through -# the first Friday on or after October 21 at 00:00. This is consistent with -# the predictions in today's editions of the following URLs: -# http://www.timeanddate.com/time/change/gaza-strip/gaza -# http://www.timeanddate.com/time/change/west-bank/hebron # From Hannah Kreitem (2016-03-09): # http://www.palestinecabinet.gov.ps/WebSite/ar/ViewDetails?ID=31728 @@ -2558,7 +2553,21 @@ Zone Asia/Karachi 4:28:12 - LMT 1907 # # From Paul Eggert (2016-03-12): # Predict spring transitions on March's last Saturday at 01:00 from now on. -# Leave fall predictions alone for now. + +# From Sharef Mustafa (2016-10-19): +# [T]he Palestinian cabinet decision (Mar 8th 2016) published on +# http://www.palestinecabinet.gov.ps/WebSite/Upload/Decree/GOV_17/16032016134830.pdf +# states that summer time will end on Oct 29th at 01:00. +# +# From Tim Parenti (2016-10-19): +# Predict fall transitions on October's last Saturday at 01:00 from now on. +# This is consistent with the 2016 transition as well as our spring +# predictions. +# +# From Paul Eggert (2016-10-19): +# It's also consistent with predictions in the following URLs today: +# http://www.timeanddate.com/time/change/gaza-strip/gaza +# http://www.timeanddate.com/time/change/west-bank/hebron # Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S Rule EgyptAsia 1957 only - May 10 0:00 1:00 S @@ -2587,9 +2596,10 @@ Rule Palestine 2011 only - Sep 30 0:00 0 - Rule Palestine 2012 2014 - Mar lastThu 24:00 1:00 S Rule Palestine 2012 only - Sep 21 1:00 0 - Rule Palestine 2013 only - Sep Fri>=21 0:00 0 - -Rule Palestine 2014 max - Oct Fri>=21 0:00 0 - +Rule Palestine 2014 2015 - Oct Fri>=21 0:00 0 - Rule Palestine 2015 only - Mar lastFri 24:00 1:00 S Rule Palestine 2016 max - Mar lastSat 1:00 1:00 S +Rule Palestine 2016 max - Oct lastSat 1:00 0 - # Zone NAME GMTOFF RULES FORMAT [UNTIL] Zone Asia/Gaza 2:17:52 - LMT 1900 Oct @@ -2739,45 +2749,31 @@ Zone Asia/Singapore 6:55:25 - LMT 1901 Jan 1 # People who live in regions under Tamil control can use [TZ='Asia/Kolkata'], # as that zone has agreed with the Tamil areas since our cutoff date of 1970. -# From K Sethu (2006-04-25): -# I think the abbreviation LKT originated from the world of computers at -# the time of or subsequent to the time zone changes by SL Government -# twice in 1996 and probably SL Government or its standardization -# agencies never declared an abbreviation as a national standard. -# -# I recollect before the recent change the government announcements -# mentioning it as simply changing Sri Lanka Standard Time or Sri Lanka -# Time and no mention was made about the abbreviation. -# -# If we look at Sri Lanka Department of Government's "Official News -# Website of Sri Lanka" ... http://www.news.lk/ we can see that they -# use SLT as abbreviation in time stamp at the beginning of each news -# item.... -# -# Within Sri Lanka I think LKT is well known among computer users and -# administrators. In my opinion SLT may not be a good choice because the -# nation's largest telcom / internet operator Sri Lanka Telcom is well -# known by that abbreviation - simply as SLT (there IP domains are -# slt.lk and sltnet.lk). -# -# But if indeed our government has adopted SLT as standard abbreviation -# (that we have not known so far) then it is better that it be used for -# all computers. - -# From Paul Eggert (2006-04-25): -# One possibility is that we wait for a bit for the dust to settle down -# and then see what people actually say in practice. +# From Sadika Sumanapala (2016-10-19): +# According to http://www.sltime.org (maintained by Measurement Units, +# Standards & Services Department, Sri Lanka) abbreviation for Sri Lanka +# standard time is SLST. +# +# From Paul Eggert (2016-10-18): +# "SLST" seems to be reasonably recent and rarely-used outside time +# zone nerd sources. I searched Google News and found three uses of +# it in the International Business Times of India in February and +# March of this year when discussing cricket match times, but nothing +# since then (though there has been a lot of cricket) and nothing in +# other English-language news sources. Our old abbreviation "LKT" is +# even worse. For now, let's use a numeric abbreviation; we can +# switch to "SLST" if it catches on. # Zone NAME GMTOFF RULES FORMAT [UNTIL] Zone Asia/Colombo 5:19:24 - LMT 1880 5:19:32 - MMT 1906 # Moratuwa Mean Time - 5:30 - IST 1942 Jan 5 - 5:30 0:30 IHST 1942 Sep - 5:30 1:00 IST 1945 Oct 16 2:00 - 5:30 - IST 1996 May 25 0:00 - 6:30 - LKT 1996 Oct 26 0:30 - 6:00 - LKT 2006 Apr 15 0:30 - 5:30 - IST + 5:30 - +0530 1942 Jan 5 + 5:30 0:30 +0530/+06 1942 Sep + 5:30 1:00 +0530/+0630 1945 Oct 16 2:00 + 5:30 - +0530 1996 May 25 0:00 + 6:30 - +0630 1996 Oct 26 0:30 + 6:00 - +06 2006 Apr 15 0:30 + 5:30 - +0530 # Syria # Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S diff --git a/contrib/tzdata/australasia b/contrib/tzdata/australasia index f49df1d..85d3632 100644 --- a/contrib/tzdata/australasia +++ b/contrib/tzdata/australasia @@ -350,7 +350,13 @@ Zone Indian/Cocos 6:27:40 - LMT 1900 # commencing at 2.00 am on Sunday 1st November, 2015 and ending at # 3.00 am on Sunday 17th January, 2016. -# From Paul Eggert (2015-09-01): +# From Raymond Kumar (2016-10-04): +# http://www.fiji.gov.fj/Media-Center/Press-Releases/DAYLIGHT-SAVING-STARTS-ON-6th-NOVEMBER,-2016.aspx +# "Fiji's daylight savings will begin on Sunday, 6 November 2016, when +# clocks go forward an hour at 2am to 3am.... Daylight Saving will +# end at 3.00am on Sunday 15th January 2017." + +# From Paul Eggert (2016-10-03): # For now, guess DST from 02:00 the first Sunday in November to # 03:00 the third Sunday in January. Although ad hoc, it matches # transitions since late 2014 and seems more likely to match future diff --git a/contrib/tzdata/europe b/contrib/tzdata/europe index 6020059..a7dc350 100644 --- a/contrib/tzdata/europe +++ b/contrib/tzdata/europe @@ -1908,7 +1908,7 @@ Zone Europe/Monaco 0:29:32 - LMT 1891 Mar 15 # Amsterdam mean time. # The data entries before 1945 are taken from -# http://www.phys.uu.nl/~vgent/wettijd/wettijd.htm +# http://www.staff.science.uu.nl/~gent0113/idl/idl.htm # Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S Rule Neth 1916 only - May 1 0:00 1:00 NST # Netherlands Summer Time @@ -3427,22 +3427,24 @@ Zone Europe/Zurich 0:34:08 - LMT 1853 Jul 16 # See above comment. # Turkey -# From Amar Devegowda (2007-01-03): -# The time zone rules for Istanbul, Turkey have not been changed for years now. -# ... The latest rules are available at: -# http://www.timeanddate.com/worldclock/timezone.html?n=107 -# From Steffen Thorsen (2007-01-03): -# I have been able to find press records back to 1996 which all say that -# DST started 01:00 local time and end at 02:00 local time. I am not sure -# what happened before that. One example for each year from 1996 to 2001: -# http://newspot.byegm.gov.tr/arsiv/1996/21/N4.htm -# http://www.byegm.gov.tr/YAYINLARIMIZ/CHR/ING97/03/97X03X25.TXT -# http://www.byegm.gov.tr/YAYINLARIMIZ/CHR/ING98/03/98X03X02.HTM -# http://www.byegm.gov.tr/YAYINLARIMIZ/CHR/ING99/10/99X10X26.HTM#%2016 -# http://www.byegm.gov.tr/YAYINLARIMIZ/CHR/ING2000/03/00X03X06.HTM#%2021 -# http://www.byegm.gov.tr/YAYINLARIMIZ/CHR/ING2001/03/23x03x01.HTM#%2027 -# From Paul Eggert (2007-01-03): -# Prefer the above source to Shanks & Pottenger for time stamps after 1990. +# From Kıvanç Yazan (2016-09-25): +# 1) For 1986-2006, DST started at 01:00 local and ended at 02:00 local, with +# no exceptions. +# 2) 1994's lastSun was overridden with Mar 20 ... +# Here are official papers: +# http://www.resmigazete.gov.tr/arsiv/19032.pdf - page 2 for 1986 +# http://www.resmigazete.gov.tr/arsiv/19400.pdf - page 4 for 1987 +# http://www.resmigazete.gov.tr/arsiv/19752.pdf - page 15 for 1988 +# http://www.resmigazete.gov.tr/arsiv/20102.pdf - page 6 for 1989 +# http://www.resmigazete.gov.tr/arsiv/20464.pdf - page 1 for 1990 - 1992 +# http://www.resmigazete.gov.tr/arsiv/21531.pdf - page 15 for 1993 - 1995 +# http://www.resmigazete.gov.tr/arsiv/21879.pdf - page 1 for overriding 1994 +# http://www.resmigazete.gov.tr/arsiv/22588.pdf - page 1 for 1996, 1997 +# http://www.resmigazete.gov.tr/arsiv/23286.pdf - page 10 for 1998 - 2000 +# http://www.resmigazete.gov.tr/eskiler/2001/03/20010324.htm#2 - for 2001 +# http://www.resmigazete.gov.tr/eskiler/2002/03/20020316.htm#2 - for 2002-2006 +# From Paul Eggert (2016-09-25): +# Prefer the above sources to Shanks & Pottenger for time stamps after 1985. # From Steffen Thorsen (2007-03-09): # Starting 2007 though, it seems that they are adopting EU's 1:00 UTC @@ -3551,10 +3553,10 @@ Rule Turkey 1983 only - Jul 31 0:00 1:00 S Rule Turkey 1983 only - Oct 2 0:00 0 - Rule Turkey 1985 only - Apr 20 0:00 1:00 S Rule Turkey 1985 only - Sep 28 0:00 0 - -Rule Turkey 1986 1990 - Mar lastSun 2:00s 1:00 S -Rule Turkey 1986 1990 - Sep lastSun 2:00s 0 - -Rule Turkey 1991 2006 - Mar lastSun 1:00s 1:00 S -Rule Turkey 1991 1995 - Sep lastSun 1:00s 0 - +Rule Turkey 1986 1993 - Mar lastSun 1:00s 1:00 S +Rule Turkey 1986 1995 - Sep lastSun 1:00s 0 - +Rule Turkey 1994 only - Mar 20 1:00s 1:00 S +Rule Turkey 1995 2006 - Mar lastSun 1:00s 1:00 S Rule Turkey 1996 2006 - Oct lastSun 1:00s 0 - # Zone NAME GMTOFF RULES FORMAT [UNTIL] Zone Europe/Istanbul 1:55:52 - LMT 1880 diff --git a/contrib/tzdata/northamerica b/contrib/tzdata/northamerica index 0bafb00..e1ed9e4 100644 --- a/contrib/tzdata/northamerica +++ b/contrib/tzdata/northamerica @@ -24,8 +24,32 @@ # was the result of his proposals at the Convention of Railroad Trunk Lines # in New York City (1869-10). His 1870 proposal was based on Washington, DC, # but in 1872-05 he moved the proposed origin to Greenwich. -# His proposal was adopted by the railroads on 1883-11-18 at 12:00, -# and the most of the country soon followed suit. + +# From Paul Eggert (2016-09-21): +# Dowd's proposal left many details unresolved, such as where to draw +# lines between time zones. The key individual who made time zones +# work in the US was William Frederick Allen - railway engineer, +# managing editor of the Travelers' Guide, and secretary of the +# General Time Convention, a railway standardization group. Allen +# spent months in dialogs with scientific and railway leaders, +# developed a workable plan to institute time zones, and presented it +# to the General Time Convention on 1883-04-11, saying that his plan +# meant "local time would be practically abolished" - a plus for +# railway scheduling. By the next convention on 1883-10-11 nearly all +# railroads had agreed and it took effect on 1883-11-18 at 12:00. +# That Sunday was called the "day of two noons", as the eastern parts +# of the new zones observed noon twice. Allen witnessed the +# transition in New York City, writing: +# +# I heard the bells of St. Paul's strike on the old time. Four +# minutes later, obedient to the electrical signal from the Naval +# Observatory ... the time-ball made its rapid descent, the chimes +# of old Trinity rang twelve measured strokes, and local time was +# abandoned, probably forever. +# +# Most of the US soon followed suit. See: +# Bartky IR. The adoption of standard time. Technol Cult 1989 Jan;30(1):25-56. +# http://dx.doi.org/10.2307/3105430 # From Paul Eggert (2005-04-16): # That 1883 transition occurred at 12:00 new time, not at 12:00 old time. diff --git a/crypto/openssh/kex.c b/crypto/openssh/kex.c index d371f47..9c9f562 100644 --- a/crypto/openssh/kex.c +++ b/crypto/openssh/kex.c @@ -468,6 +468,7 @@ kex_input_kexinit(int type, u_int32_t seq, void *ctxt) if (kex == NULL) return SSH_ERR_INVALID_ARGUMENT; + ssh_dispatch_set(ssh, SSH2_MSG_KEXINIT, NULL); ptr = sshpkt_ptr(ssh, &dlen); if ((r = sshbuf_put(kex->peer, ptr, dlen)) != 0) return r; diff --git a/crypto/openssl/ssl/d1_pkt.c b/crypto/openssl/ssl/d1_pkt.c index 086ee98..8689583 100644 --- a/crypto/openssl/ssl/d1_pkt.c +++ b/crypto/openssl/ssl/d1_pkt.c @@ -924,6 +924,13 @@ int dtls1_read_bytes(SSL *s, int type, unsigned char *buf, int len, int peek) goto start; } + /* + * Reset the count of consecutive warning alerts if we've got a non-empty + * record that isn't an alert. + */ + if (rr->type != SSL3_RT_ALERT && rr->length != 0) + s->s3->alert_count = 0; + /* we now have a packet which can be read and processed */ if (s->s3->change_cipher_spec /* set when we receive ChangeCipherSpec, @@ -1190,6 +1197,14 @@ int dtls1_read_bytes(SSL *s, int type, unsigned char *buf, int len, int peek) if (alert_level == SSL3_AL_WARNING) { s->s3->warn_alert = alert_descr; + + s->s3->alert_count++; + if (s->s3->alert_count == MAX_WARN_ALERT_COUNT) { + al = SSL_AD_UNEXPECTED_MESSAGE; + SSLerr(SSL_F_DTLS1_READ_BYTES, SSL_R_TOO_MANY_WARN_ALERTS); + goto f_err; + } + if (alert_descr == SSL_AD_CLOSE_NOTIFY) { #ifndef OPENSSL_NO_SCTP /* diff --git a/crypto/openssl/ssl/s3_pkt.c b/crypto/openssl/ssl/s3_pkt.c index 25cf929..cbc348a 100644 --- a/crypto/openssl/ssl/s3_pkt.c +++ b/crypto/openssl/ssl/s3_pkt.c @@ -1057,6 +1057,13 @@ int ssl3_read_bytes(SSL *s, int type, unsigned char *buf, int len, int peek) return (ret); } + /* + * Reset the count of consecutive warning alerts if we've got a non-empty + * record that isn't an alert. + */ + if (rr->type != SSL3_RT_ALERT && rr->length != 0) + s->s3->alert_count = 0; + /* we now have a packet which can be read and processed */ if (s->s3->change_cipher_spec /* set when we receive ChangeCipherSpec, @@ -1271,6 +1278,14 @@ int ssl3_read_bytes(SSL *s, int type, unsigned char *buf, int len, int peek) if (alert_level == SSL3_AL_WARNING) { s->s3->warn_alert = alert_descr; + + s->s3->alert_count++; + if (s->s3->alert_count == MAX_WARN_ALERT_COUNT) { + al = SSL_AD_UNEXPECTED_MESSAGE; + SSLerr(SSL_F_SSL3_READ_BYTES, SSL_R_TOO_MANY_WARN_ALERTS); + goto f_err; + } + if (alert_descr == SSL_AD_CLOSE_NOTIFY) { s->shutdown |= SSL_RECEIVED_SHUTDOWN; return (0); diff --git a/crypto/openssl/ssl/ssl.h b/crypto/openssl/ssl/ssl.h index 114ee97..c150672 100644 --- a/crypto/openssl/ssl/ssl.h +++ b/crypto/openssl/ssl/ssl.h @@ -2717,6 +2717,7 @@ void ERR_load_SSL_strings(void); # define SSL_R_TLS_HEARTBEAT_PENDING 366 # define SSL_R_TLS_ILLEGAL_EXPORTER_LABEL 367 # define SSL_R_TLS_INVALID_ECPOINTFORMAT_LIST 157 +# define SSL_R_TOO_MANY_WARN_ALERTS 409 # define SSL_R_TLS_PEER_DID_NOT_RESPOND_WITH_CERTIFICATE_LIST 233 # define SSL_R_TLS_RSA_ENCRYPTED_VALUE_LENGTH_IS_WRONG 234 # define SSL_R_TRIED_TO_USE_UNSUPPORTED_CIPHER 235 diff --git a/crypto/openssl/ssl/ssl3.h b/crypto/openssl/ssl/ssl3.h index e9b1170..1d50f66 100644 --- a/crypto/openssl/ssl/ssl3.h +++ b/crypto/openssl/ssl/ssl3.h @@ -587,6 +587,8 @@ typedef struct ssl3_state_st { char is_probably_safari; # endif /* !OPENSSL_NO_EC */ # endif /* !OPENSSL_NO_TLSEXT */ + /* Count of the number of consecutive warning alerts received */ + unsigned int alert_count; } SSL3_STATE; # endif diff --git a/crypto/openssl/ssl/ssl_locl.h b/crypto/openssl/ssl/ssl_locl.h index 7b1fd1f..8ab2755 100644 --- a/crypto/openssl/ssl/ssl_locl.h +++ b/crypto/openssl/ssl/ssl_locl.h @@ -389,6 +389,8 @@ */ # define SSL_MAX_DIGEST 6 +# define MAX_WARN_ALERT_COUNT 5 + # define TLS1_PRF_DGST_MASK (0xff << TLS1_PRF_DGST_SHIFT) # define TLS1_PRF_DGST_SHIFT 10 diff --git a/include/Makefile b/include/Makefile index 1c2bf39..449cdf4 100644 --- a/include/Makefile +++ b/include/Makefile @@ -236,6 +236,17 @@ copies: cd ${.CURDIR}/../sys/rpc; \ ${INSTALL} -C -o ${BINOWN} -g ${BINGRP} -m 444 types.h \ ${DESTDIR}${INCLUDEDIR}/rpc +.if ${MK_CDDL} != "no" + cd ${.CURDIR}/../cddl/contrib/opensolaris/lib/libzfs_core/common; \ + ${INSTALL} -C ${TAG_ARGS} -o ${BINOWN} -g ${BINGRP} -m 444 libzfs_core.h \ + ${DESTDIR}${INCLUDEDIR} + cd ${.CURDIR}/../cddl/contrib/opensolaris/lib/libnvpair; \ + ${INSTALL} -C ${TAG_ARGS} -o ${BINOWN} -g ${BINGRP} -m 444 libnvpair.h \ + ${DESTDIR}${INCLUDEDIR} + cd ${.CURDIR}/../sys/cddl/contrib/opensolaris/uts/common/sys; \ + ${INSTALL} -C ${TAG_ARGS} -o ${BINOWN} -g ${BINGRP} -m 444 nvpair.h \ + ${DESTDIR}${INCLUDEDIR}/sys +.endif symlinks: @${ECHO} "Setting up symlinks to kernel source tree..." diff --git a/include/search.h b/include/search.h index 4e4606f..40b736a 100644 --- a/include/search.h +++ b/include/search.h @@ -43,6 +43,8 @@ struct que_elem { struct que_elem *next; struct que_elem *prev; }; +#else +typedef void posix_tnode; #endif __BEGIN_DECLS diff --git a/lib/libc/db/btree/bt_close.c b/lib/libc/db/btree/bt_close.c index 1f85992..f1e7c8d 100644 --- a/lib/libc/db/btree/bt_close.c +++ b/lib/libc/db/btree/bt_close.c @@ -134,7 +134,8 @@ __bt_sync(const DB *dbp, u_int flags) return (RET_ERROR); } - if (F_ISSET(t, B_INMEM | B_RDONLY) || !F_ISSET(t, B_MODIFIED)) + if (F_ISSET(t, B_INMEM | B_RDONLY) || + !F_ISSET(t, B_MODIFIED | B_METADIRTY)) return (RET_SUCCESS); if (F_ISSET(t, B_METADIRTY) && bt_meta(t) == RET_ERROR) diff --git a/lib/libc/tests/stdio/printbasic_test.c b/lib/libc/tests/stdio/printbasic_test.c index 376981e..bebb734 100644 --- a/lib/libc/tests/stdio/printbasic_test.c +++ b/lib/libc/tests/stdio/printbasic_test.c @@ -80,7 +80,7 @@ _testfmt(const char *result, const char *argstr, const char *fmt,...) vsnprintf(s, sizeof(s), fmt, ap); if (strcmp(result, s) != 0) { atf_tc_fail( - "printf(\"%s\", %s) ==> [%s], expected [%s]\n", + "printf(\"%s\", %s) ==> [%s], expected [%s]", fmt, argstr, s, result); } @@ -91,7 +91,7 @@ _testfmt(const char *result, const char *argstr, const char *fmt,...) vswprintf(ws, sizeof(ws) / sizeof(ws[0]), wfmt, ap2); if (wcscmp(wresult, ws) != 0) { atf_tc_fail( - "wprintf(\"%ls\", %s) ==> [%ls], expected [%ls]\n", + "wprintf(\"%ls\", %s) ==> [%ls], expected [%ls]", wfmt, argstr, ws, wresult); } va_end(ap); diff --git a/lib/libc/tests/stdio/printfloat_test.c b/lib/libc/tests/stdio/printfloat_test.c index 259c5df..8c02bec 100644 --- a/lib/libc/tests/stdio/printfloat_test.c +++ b/lib/libc/tests/stdio/printfloat_test.c @@ -72,7 +72,7 @@ _testfmt(const char *result, const char *argstr, const char *fmt,...) vsnprintf(s, sizeof(s), fmt, ap); if (strcmp(result, s) != 0) { atf_tc_fail( - "printf(\"%s\", %s) ==> [%s], expected [%s]\n", + "printf(\"%s\", %s) ==> [%s], expected [%s]", fmt, argstr, s, result); } @@ -83,7 +83,7 @@ _testfmt(const char *result, const char *argstr, const char *fmt,...) vswprintf(ws, sizeof(ws) / sizeof(ws[0]), wfmt, ap2); if (wcscmp(wresult, ws) != 0) { atf_tc_fail( - "wprintf(\"%ls\", %s) ==> [%ls], expected [%ls]\n", + "wprintf(\"%ls\", %s) ==> [%ls], expected [%ls]", wfmt, argstr, ws, wresult); } va_end(ap); diff --git a/libexec/atf/atf-check/Makefile b/libexec/atf/atf-check/Makefile index 0792c03..f445dba 100644 --- a/libexec/atf/atf-check/Makefile +++ b/libexec/atf/atf-check/Makefile @@ -27,7 +27,7 @@ .include <bsd.init.mk> -ATF= ${.CURDIR:H:H:H}/contrib/atf +ATF= ${SRCTOP}/contrib/atf .PATH: ${ATF}/atf-sh PROG_CXX= atf-check diff --git a/libexec/atf/atf-sh/Makefile b/libexec/atf/atf-sh/Makefile index 8a044b2..5a90771 100644 --- a/libexec/atf/atf-sh/Makefile +++ b/libexec/atf/atf-sh/Makefile @@ -27,7 +27,7 @@ .include <bsd.init.mk> -ATF= ${.CURDIR:H:H:H}/contrib/atf +ATF= ${SRCTOP}/contrib/atf .PATH: ${ATF}/atf-sh PROG_CXX= atf-sh diff --git a/release/doc/share/xml/errata.xml b/release/doc/share/xml/errata.xml index a1453e9..a46ec30 100644 --- a/release/doc/share/xml/errata.xml +++ b/release/doc/share/xml/errata.xml @@ -104,6 +104,13 @@ <entry><para>Fix SCSI INQUIRY checks and error handling</para></entry> </row> + + <row> + <entry><link + xlink:href="&security.url;/FreeBSD-EN-16:17.vm.asc">FreeBSD-EN-16:17.vm</link></entry> + <entry>25 October 2016</entry> + <entry><para>Several virtual memory issues</para></entry> + </row> </tbody> </tgroup> </informaltable> diff --git a/release/doc/share/xml/security.xml b/release/doc/share/xml/security.xml index c253b76..eb5ab2a 100644 --- a/release/doc/share/xml/security.xml +++ b/release/doc/share/xml/security.xml @@ -95,6 +95,42 @@ <entry>25 July 2016</entry> <entry><para>heap overflow vulnerability</para></entry> </row> + + <row> + <entry><link + xlink:href="&security.url;/FreeBSD-SA-16:26.openssl.asc">FreeBSD-SA-16:26.openssl</link></entry> + <entry>23 September 2016</entry> + <entry><para>Multiple vulnerabilities</para></entry> + </row> + + <row> + <entry><link + xlink:href="&security.url;/FreeBSD-SA-16:27.openssl.asc">FreeBSD-SA-16:27.openssl</link></entry> + <entry>26 September 2016</entry> + <entry><para>Regression in <application>OpenSSL</application> + suite</para></entry> + </row> + + <row> + <entry><link + xlink:href="&security.url;/FreeBSD-SA-16:29.bspatch.asc">FreeBSD-SA-16:29.bspatch</link></entry> + <entry>10 October 2016</entry> + <entry><para>Heap overflow vulnerability</para></entry> + </row> + + <row> + <entry><link + xlink:href="&security.url;/FreeBSD-SA-16:30.portsnap.asc">FreeBSD-SA-16:30.portsnap</link></entry> + <entry>10 October 2016</entry> + <entry><para>Multiple vulnerabilities</para></entry> + </row> + + <row> + <entry><link + xlink:href="&security.url;/FreeBSD-SA-16:31.libarchive.asc">FreeBSD-SA-16:31.libarchive</link></entry> + <entry>10 October 2016</entry> + <entry><para>Multiple vulnerabilities</para></entry> + </row> </tbody> </tgroup> </informaltable> diff --git a/sbin/init/init.c b/sbin/init/init.c index af672a9..7c571d5 100644 --- a/sbin/init/init.c +++ b/sbin/init/init.c @@ -876,6 +876,7 @@ single_user(void) sigset_t mask; const char *shell; char *argv[2]; + struct timeval tv, tn; #ifdef SECURE struct ttyent *typ; struct passwd *pp; @@ -890,8 +891,13 @@ single_user(void) if (Reboot) { /* Instead of going single user, let's reboot the machine */ sync(); - reboot(howto); - _exit(0); + if (reboot(howto) == -1) { + emergency("reboot(%#x) failed, %s", howto, + strerror(errno)); + _exit(1); /* panic and reboot */ + } + warning("reboot(%#x) returned", howto); + _exit(0); /* panic as well */ } shell = get_shell(); @@ -1008,7 +1014,14 @@ single_user(void) * reboot(8) killed shell? */ warning("single user shell terminated."); - sleep(STALL_TIMEOUT); + gettimeofday(&tv, NULL); + tn = tv; + tv.tv_sec += STALL_TIMEOUT; + while (tv.tv_sec > tn.tv_sec || (tv.tv_sec == + tn.tv_sec && tv.tv_usec > tn.tv_usec)) { + sleep(1); + gettimeofday(&tn, NULL); + } _exit(0); } else { warning("single user shell terminated, restarting"); diff --git a/sbin/mount_msdosfs/mount_msdosfs.8 b/sbin/mount_msdosfs/mount_msdosfs.8 index 3e4b08d..9f39ffb 100644 --- a/sbin/mount_msdosfs/mount_msdosfs.8 +++ b/sbin/mount_msdosfs/mount_msdosfs.8 @@ -142,15 +142,8 @@ If neither nor .Fl l are given, -.Nm -searches the root directory of the file system to -be mounted for any existing Win'95 long filenames. -If no such entries are found, but short DOS filenames are found, -.Fl s -is the default. -Otherwise .Fl l -is assumed. +is the default. .It Fl 9 Ignore the special Win'95 directory entries even if deleting or renaming a file. diff --git a/share/man/man4/cxgbe.4 b/share/man/man4/cxgbe.4 index 148bb1d..247e57a 100644 --- a/share/man/man4/cxgbe.4 +++ b/share/man/man4/cxgbe.4 @@ -31,7 +31,7 @@ .\" .\" $FreeBSD$ .\" -.Dd March 20, 2014 +.Dd December 2, 2015 .Dt CXGBE 4 .Os .Sh NAME @@ -171,6 +171,16 @@ number of CPU cores in the system, whichever is less. .It Va hw.cxgbe.nofldrxq1g The number of TOE rx queues to use for a 1Gb port. The default is 1. +.It Va hw.cxgbe.num_vis +The number of virtual interfaces (VIs) created for each port. +Each virtual interface creates a separate network interface. +The first virtual interface on each port is required and represents +the primary network interface on the port. +Additional virtual interfaces on a port are named vcxgbe (T4) or +vcxl (T5) and only use a single rx and tx queue. +Additional virtual interfaces use a single pair of queues +for rx and tx as well an additional pair of queues for TOE rx and tx. +The default is 1. .It Va hw.cxgbe.holdoff_timer_idx_10G .It Va hw.cxgbe.holdoff_timer_idx_1G The timer index value to use to delay interrupts. diff --git a/share/misc/pci_vendors b/share/misc/pci_vendors index 2bc34d3..3d9c83c 100644 --- a/share/misc/pci_vendors +++ b/share/misc/pci_vendors @@ -3,8 +3,8 @@ # # List of PCI ID's # -# Version: 2016.10.03 -# Date: 2016-10-03 03:15:01 +# Version: 2016.10.20 +# Date: 2016-10-20 03:15:02 # # Maintained by Albert Pool, Martin Mares, and other volunteers from # the PCI ID Project at http://pci-ids.ucw.cz/. @@ -245,8 +245,13 @@ 0013 53c875a 1000 1000 LSI53C875A PCI to Ultra SCSI Controller 0014 MegaRAID Tri-Mode SAS3516 + 1028 1fd4 PERC H745P MX 1d49 0602 ThinkSystem RAID 930-16i 4GB Flash PCIe 12Gb Adapter 0016 MegaRAID Tri-Mode SAS3508 + 1028 1fc9 PERC H840 Adapter + 1028 1fcb PERC H740P Adapter + 1028 1fcd PERC H740P Mini + 1028 1fcf PERC H740P Mini 1d49 0601 ThinkSystem RAID 930-8i 2GB Flash PCIe 12Gb Adapter 1d49 0603 ThinkSystem RAID 930-24i 4GB Flash PCIe 12Gb Adapter 1d49 0604 ThinkSystem RAID 930-8e 4GB Flash PCIe 12Gb Adapter @@ -376,6 +381,7 @@ 1028 1f4d PERC FD33xS 1028 1f4f PERC H730P Slim 1028 1f54 PERC FD33xD + 1028 1fd1 PERC H730P MX 17aa 1052 ThinkServer RAID 720i 17aa 1053 ThinkServer RAID 720ix 1d49 0600 ThinkSystem RAID 730-8i 1GB Cache PCIe 12Gb Adapter @@ -535,8 +541,11 @@ 0097 SAS3008 PCI-Express Fusion-MPT SAS-3 1000 3090 SAS9311-8i 1000 30e0 SAS9300-8i - 1028 1f45 12GB/s HBA internal + 1028 1f45 HBA330 Adapter 1028 1f46 12Gbps HBA + 1028 1f53 HBA330 Mini + 1028 1fd2 HBA330 MX + 1028 1fd3 HBA330 MMZ 00ab SAS3516 Fusion-MPT Tri-Mode RAID On Chip (ROC) 00ac SAS3416 Fusion-MPT Tri-Mode I/O Controller Chip (IOC) 1d49 0201 ThinkSystem 9400-16i PCIe 12Gb HBA @@ -1588,7 +1597,7 @@ 1462 2938 Radeon R9 360 OEM 1462 3271 Radeon R9 360 OEM 1682 7360 Radeon R7 360 - 6660 Sun XT [Radeon HD 8670A/8670M/8690M / R5 M330] + 6660 Sun XT [Radeon HD 8670A/8670M/8690M / R5 M330 / M430] 1028 05ea Radeon HD 8670M 1028 06bf Radeon R5 M335 103c 1970 Radeon HD 8670M @@ -1596,6 +1605,7 @@ 103c 8136 Radeon R5 M330 17aa 3804 Radeon R5 M330 17aa 3809 Radeon R5 M330 + 17aa 381a Radeon R5 M430 17aa 390c Radeon R5 M330 6663 Sun PRO [Radeon HD 8570A/8570M] 1025 0846 Radeon HD 8570A @@ -2866,11 +2876,12 @@ 174b e180 Radeon HD 7350 17af 3015 Radeon HD 7350 68fe Cedar LE - 6900 Topaz XT [Radeon R7 M260/M265 / M340/M360] + 6900 Topaz XT [Radeon R7 M260/M265 / M340/M360 / M440/M445] 1025 1056 Radeon R7 M360 / R8 M365DX 1028 0640 Radeon R7 M260/M265 1028 0643 Radeon R7 M260/M265 1028 067f Radeon R7 M260 + 1028 0767 Radeon R7 M445 1028 130a Radeon R7 M260 103c 2263 Radeon R7 M260 103c 2269 Radeon R7 M260 @@ -2881,6 +2892,7 @@ 103c 80b5 Radeon R7 M360 103c 80b9 Radeon R7 M360 103c 811c Radeon R7 M340 + 103c 8226 Radeon R7 M440 10cf 1906 Radeon R7 M260 1170 9979 Radeon R7 M360 1179 f903 Radeon R7 M260 @@ -2892,6 +2904,7 @@ 17aa 5021 Radeon R7 M260 6901 Topaz PRO [Radeon R5 M255] 103c 1318 Radeon R6 M255DX + 6907 Meso XT [Radeon R5 M315] 6921 Amethyst XT [Radeon R9 M295X] 6929 Tonga XT GL [FirePro S7150] 692b Tonga PRO GL [FirePro W7100] @@ -3349,7 +3362,7 @@ 99a4 Trinity [Radeon HD 7400G] aa00 R600 HDMI Audio [Radeon HD 2900 GT/PRO/XT] aa01 RV635 HDMI Audio [Radeon HD 3650/3730/3750] - aa08 RV630 HDMI Audio [Radeon HD 2600 Series] + aa08 RV630 HDMI Audio [Radeon HD 2600 PRO/XT / HD 3610] aa10 RV610 HDMI Audio [Radeon HD 2350 PRO / 2400 PRO/XT / HD 3410] 174b aa10 Radeon HD 2400 PRO 18bc aa10 Radeon HD 2400 PRO @@ -3369,10 +3382,10 @@ aa68 Cedar HDMI Audio [Radeon HD 5400/6300/7300 Series] 1028 aa68 XPS 8300 aa80 Cayman/Antilles HDMI Audio [Radeon HD 6930/6950/6970/6990] - aa88 Barts HDMI Audio [Radeon HD 6800 Series] + aa88 Barts HDMI Audio [Radeon HD 6790/6850/6870 / 7720 OEM] aa90 Turks HDMI Audio [Radeon HD 6500/6600 / 6700M Series] 1028 04a3 Precision M4600 - aa98 Caicos HDMI Audio [Radeon HD 6400 Series] + aa98 Caicos HDMI Audio [Radeon HD 6450 / 7450/8450/8490 OEM / R5 230/235/235X OEM] 174b aa98 Radeon HD 6450 1GB DDR3 aaa0 Tahiti HDMI Audio [Radeon HD 7870 XT / 7950/7970] aab0 Cape Verde/Pitcairn HDMI Audio [Radeon HD 7700/7800 Series] @@ -7347,6 +7360,7 @@ 8533 PEX 8533 32-lane, 6-port PCI Express Switch 8547 PEX 8547 48-lane, 3-port PCI Express Switch 8548 PEX 8548 48-lane, 9-port PCI Express Switch + 8603 PEX 8603 3-lane, 3-Port PCI Express Gen 2 (5.0 GT/s) Switch 8604 PEX 8604 4-lane, 4-Port PCI Express Gen 2 (5.0 GT/s) Switch 8605 PEX 8605 PCI Express 4-port Gen2 Switch 8606 PEX 8606 6 Lane, 6 Port PCI Express Gen 2 (5.0 GT/s) Switch @@ -17921,7 +17935,7 @@ 1015 MT27710 Family [ConnectX-4 Lx] 1016 MT27710 Family [ConnectX-4 Lx Virtual Function] 1017 MT27800 Family [ConnectX-5] - 1018 MT28800 Family [ConnectX-5 Virtual Function] + 1018 MT27800 Family [ConnectX-5 Virtual Function] 1019 MT28800 Family [ConnectX-5 Ex] 101a MT28800 Family [ConnectX-5 Ex Virtual Function] 101b MT28831 @@ -17932,6 +17946,7 @@ 1020 MT28860 1021 MT28861 1974 MT28800 Family [ConnectX-5 PCIe Bridge] + 1975 MT416842 Family [BlueField SoC PCIe Bridge] 5274 MT21108 InfiniBridge 5a44 MT23108 InfiniHost 5a45 MT23108 [Infinihost HCA Flash Recovery] @@ -17966,6 +17981,9 @@ 7121 NPS-600 configuration and management interface 7122 NPS-600 network interface PF 7123 NPS-600 network interface VF + a2d0 MT416842 + a2d1 MT416842 + a2d3 MT416842 BlueField multicore SoC family VF # SwitchX-2, 40GbE switch c738 MT51136 c739 MT51136 GW @@ -18638,7 +18656,12 @@ 7018 AP408: 32-Channel Digital I/O Module 701a AP220-16 12-Bit, 16-Channel Analog Output Module 701b AP231-16 16-Bit, 16-Channel Analog Output Module + 7021 APA7-201 Reconfigurable Artix-7 FPGA module 48 TTL channels + 7022 APA7-202 Reconfigurable Artix-7 FPGA module 24 RS485 channels + 7023 APA7-203 Reconfigurable Artix-7 FPGA module 24 TTL & 12 RS485 channels + 7024 APA7-204 Reconfigurable Artix-7 FPGA module 24 LVDS channels 7042 AP482 Counter Timer Module with TTL Level Input/Output + 7043 AP483 Counter Timer Module with TTL Level and RS422 Input/Output 7044 AP484 Counter Timer Module with RS422 Input/Output 16da Advantech Co., Ltd. 0011 INES GPIB-PCI @@ -18881,6 +18904,7 @@ 0401 Datacenter Technologies QDF2400 PCI Express Root Port 17cc NetChip Technology, Inc 2280 USB 2.0 +17cd Cadence Design Systems, Inc. 17cf Z-Com, Inc. 17d3 Areca Technology Corp. 1110 ARC-1110 4-Port PCI-X to SATA RAID Controller @@ -20290,7 +20314,8 @@ 1432 8102 EN-8102P 10GbE Ethernet Adapter 1fc9 3015 Ethernet Adapter 4026 TN9610 10GbE SFP+ Ethernet Adapter - 4027 TN9710 10GBase-T/NBASE-T Ethernet Adapter + 4027 TN9710P 10GBase-T/NBASE-T Ethernet Adapter + 4527 TN9710Q 5GBase-T/NBASE-T Ethernet Adapter 1fcc StreamLabs f416 MS416 fb01 MH4LM @@ -22295,6 +22320,7 @@ 17aa 4007 82599ES 10-Gigabit SFI/SFP+ Network Connection 17aa 402b 82599ES 10Gb 2-port Server Adapter X520-DA2 17aa 402f FPGA Card XC7VX690T-3FFG1157E + 18d4 0c09 82599ES 10Gb 2-port SFP+ OCP Mezz Card MOP81-I-10GS2 1bd4 001b 10G SFP+ DP ER102Fi4 Rack Adapter 1bd4 002f 10G SFP+ DP EP102Fi4A Adapter 1bd4 0032 10G SFP+ DP EP102Fi4 Adapter @@ -22570,8 +22596,15 @@ 1520 I350 Ethernet Controller Virtual Function 1521 I350 Gigabit Network Connection 1028 0602 Gigabit 2P I350-t LOM + 1028 0693 Gigabit 2P I350-t LOM + 1028 06e2 Gigabit 2P I350-t LOM + 1028 0757 Gigabit I350-t LOM + 1028 075a Gigabit I350-t LOM 1028 1f60 Gigabit 4P I350-t rNDC 1028 1f62 Gigabit 4P X540/I350 rNDC + 1028 1fa8 Ethernet 10G 4P X550/I350 rNDC + 1028 1fa9 Ethernet 10G 4P X550 rNDC + 1028 1faa Gigabit 4P X550/I350 rNDC 1028 ff9a Gigabit 4P X710/I350 rNDC 103c 17d1 Ethernet 1Gb 4-port 366FLR Adapter 103c 2003 Ethernet 1Gb 2-port 367i Adapter @@ -22590,6 +22623,7 @@ 15d9 0652 Dual Port i350 GbE MicroLP [AOC-CGP-i2] 17aa 1074 ThinkServer I350-T4 AnyFabric 17aa 4005 I350 Gigabit Network Connection + 18d4 0c07 I350 1Gb 2-port RJ45 OCP Mezz Card MOP41-I-1GT2 1bd4 001d 1G base-T QP EP014Ti1 Adapter 1bd4 0035 1G base-T QP EP014Ti1 Adapter 8086 0001 Ethernet Server Adapter I350-T4 @@ -22698,6 +22732,7 @@ 1028 1fa9 Ethernet 10G 4P X550 rNDC 1590 00d1 Ethernet 10Gb 2-port 562T Adapter 1590 00d2 Ethernet 10Gb 2-port 562FLR-T Adapter + 18d4 0c08 X550 10Gb 2-port RJ45 OCP Mezz Card MOP81-I-10GT2 8086 0001 Ethernet Converged Network Adapter X550-T2 8086 001a Ethernet Converged Network Adapter X550-T2 8086 0022 Ethernet Converged Network Adapter X550-T2 @@ -22782,11 +22817,11 @@ 108e 0000 Ethernet Controller X710 for 10GBASE-T 108e 4857 Ethernet Controller X710 for 10GBASE-T 1587 Ethernet Controller XL710 for 20GbE backplane - 103c 0000 HP Flex-20 20Gb 2-port 660FLB Adapter - 103c 22fe HP Flex-20 20Gb 2-port 660FLB Adapter + 103c 0000 HPE Ethernet 10/20Gb 2-port 660FLB Adapter + 103c 22fe HPE Ethernet 10/20Gb 2-port 660FLB Adapter 1588 Ethernet Controller XL710 for 20GbE backplane - 103c 0000 HP Flex-20 20Gb 2-port 660M Adapter - 103c 22ff HP Flex-20 20Gb 2-port 660M Adapter + 103c 0000 HPE Ethernet 10/20Gb 2-port 660M Adapter + 103c 22ff HPE Ethernet 10/20Gb 2-port 660M Adapter 1589 Ethernet Controller X710/X557-AT 10GBASE-T 108e 0000 Quad Port 10GBase-T Adapter 108e 7b1c Quad Port 10GBase-T Adapter @@ -23169,7 +23204,7 @@ 1e09 7 Series Chipset Family 2-port SATA Controller [IDE mode] 144d c652 NP300E5C series laptop 1e0e 7 Series/C210 Series Chipset Family SATA Controller [RAID mode] - 1e10 7 Series/C210 Series Chipset Family PCI Express Root Port 1 + 1e10 7 Series/C216 Chipset Family PCI Express Root Port 1 1043 108d VivoBook X202EV 1043 1477 N56VZ 1043 1517 Zenbook Prime UX31A @@ -23181,7 +23216,7 @@ 1043 1477 N56VZ 1043 1517 Zenbook Prime UX31A 1e14 7 Series/C210 Series Chipset Family PCI Express Root Port 3 - 1e16 7 Series/C210 Series Chipset Family PCI Express Root Port 4 + 1e16 7 Series/C216 Chipset Family PCI Express Root Port 4 1043 108d VivoBook X202EV 1043 1477 N56VZ 144d c652 NP300E5C series laptop @@ -23194,7 +23229,7 @@ 1e1c 7 Series/C210 Series Chipset Family PCI Express Root Port 7 1e1e 7 Series/C210 Series Chipset Family PCI Express Root Port 8 1849 1e1e Motherboard - 1e20 7 Series/C210 Series Chipset Family High Definition Audio Controller + 1e20 7 Series/C216 Chipset Family High Definition Audio Controller 1028 054b Dell XPS One 2710 1043 108d VivoBook X202EV 1043 1477 N56VZ @@ -23203,7 +23238,7 @@ 1043 8445 ASUS P8Z77-V LX Motherboard 144d c652 NP300E5C series laptop 1849 1898 Z77 Extreme4 motherboard - 1e22 7 Series/C210 Series Chipset Family SMBus Controller + 1e22 7 Series/C216 Chipset Family SMBus Controller 1043 108d VivoBook X202EV 1043 1477 N56VZ 1043 1517 Zenbook Prime UX31A @@ -23213,14 +23248,14 @@ 1e24 7 Series/C210 Series Chipset Family Thermal Management Controller 1043 1517 Zenbook Prime UX31A 1e25 7 Series/C210 Series Chipset Family DMI to PCI Bridge - 1e26 7 Series/C210 Series Chipset Family USB Enhanced Host Controller #1 + 1e26 7 Series/C216 Chipset Family USB Enhanced Host Controller #1 1043 108d VivoBook X202EV 1043 1477 N56VZ 1043 1517 Zenbook Prime UX31A 1043 84ca P8 series motherboard 144d c652 NP300E5C series laptop 1849 1e26 Motherboard - 1e2d 7 Series/C210 Series Chipset Family USB Enhanced Host Controller #2 + 1e2d 7 Series/C216 Chipset Family USB Enhanced Host Controller #2 1043 108d VivoBook X202EV 1043 1477 N56VZ 1043 1517 Zenbook Prime UX31A @@ -23235,7 +23270,7 @@ 1043 84ca P8 series motherboard 1849 1e31 Motherboard 1e33 7 Series/C210 Series Chipset Family LAN Controller - 1e3a 7 Series/C210 Series Chipset Family MEI Controller #1 + 1e3a 7 Series/C216 Chipset Family MEI Controller #1 1043 108d VivoBook X202EV 1043 1477 N56VZ 1043 1517 Zenbook Prime UX31A @@ -26234,10 +26269,21 @@ 37cd X722 Virtual Function 37ce Ethernet Connection X722 for 10GbE backplane 1590 0215 Ethernet 10Gb 2-port 568i Adapter + 17aa 4023 Intel Ethernet Connection X722 for 10GbE backplane 37cf Ethernet Connection X722 for 10GbE QSFP+ 37d0 Ethernet Connection X722 for 10GbE SFP+ 37d1 Ethernet Connection X722 for 1GbE + 17aa 4020 Intel Ethernet Connection X722 for 1GbE + 17aa 4021 Intel Ethernet Connection X722 for 1GbE + 17aa 4022 Intel Ethernet Connection X722 for 1GbE + 8086 4020 Ethernet Connection X722 for 1GbE + 8086 4021 Ethernet Connection X722 for 1GbE + 8086 4022 Ethernet Connection X722 for 1GbE 37d2 Ethernet Connection X722 for 10GBASE-T + 17aa 4020 Intel Ethernet Connection X722 for 10GBASE + 17aa 4021 Intel Ethernet Connection X722 for 10GBASE + 8086 4020 Ethernet Connection X722 for 10GBASE + 8086 4021 Ethernet Connection X722 for 10GBASE 37d3 Ethernet Connection X722 for 10GbE SFP+ 37d4 Ethernet Connection X722 for 10GbE QSFP+ 37d9 X722 Hyper-V Virtual Function diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c index e20a271..66e465e 100644 --- a/sys/amd64/amd64/initcpu.c +++ b/sys/amd64/amd64/initcpu.c @@ -139,6 +139,20 @@ init_amd(void) wrmsr(0xc001102a, msr); } } + + /* + * Work around Erratum 793: Specific Combination of Writes to Write + * Combined Memory Types and Locked Instructions May Cause Core Hang. + * See Revision Guide for AMD Family 16h Models 00h-0Fh Processors, + * revision 3.04 or later, publication 51810. + */ + if (CPUID_TO_FAMILY(cpu_id) == 0x16 && CPUID_TO_MODEL(cpu_id) <= 0xf) { + if ((cpu_feature2 & CPUID2_HV) == 0) { + msr = rdmsr(0xc0011020); + msr |= (uint64_t)1 << 15; + wrmsr(0xc0011020, msr); + } + } } /* diff --git a/sys/amd64/amd64/sys_machdep.c b/sys/amd64/amd64/sys_machdep.c index 77abfe8..fb984f5 100644 --- a/sys/amd64/amd64/sys_machdep.c +++ b/sys/amd64/amd64/sys_machdep.c @@ -619,6 +619,8 @@ amd64_set_ldt(td, uap, descs) largest_ld = uap->start + uap->num; if (largest_ld > max_ldt_segment) largest_ld = max_ldt_segment; + if (largest_ld < uap->start) + return (EINVAL); i = largest_ld - uap->start; mtx_lock(&dt_lock); bzero(&((struct user_segment_descriptor *)(pldt->ldt_base)) @@ -631,7 +633,8 @@ amd64_set_ldt(td, uap, descs) /* verify range of descriptors to modify */ largest_ld = uap->start + uap->num; if (uap->start >= max_ldt_segment || - largest_ld > max_ldt_segment) + largest_ld > max_ldt_segment || + largest_ld < uap->start) return (EINVAL); } diff --git a/sys/cam/ata/ata_all.c b/sys/cam/ata/ata_all.c index f0d6202..c732854 100644 --- a/sys/cam/ata/ata_all.c +++ b/sys/cam/ata/ata_all.c @@ -88,6 +88,7 @@ ata_op_string(struct ata_cmd *cmd) } return "DSM"; case 0x08: return ("DEVICE_RESET"); + case 0x0b: return ("REQUEST_SENSE_DATA_EXT"); case 0x20: return ("READ"); case 0x24: return ("READ48"); case 0x25: return ("READ_DMA48"); @@ -117,6 +118,12 @@ ata_op_string(struct ata_cmd *cmd) } return "WRITE_UNCORRECTABLE48"; case 0x51: return ("CONFIGURE_STREAM"); + case 0x57: return ("WRITE_LOG_DMA_EXT"); + case 0x5b: return ("TRUSTED_NON_DATA"); + case 0x5c: return ("TRUSTED_RECEIVE"); + case 0x5d: return ("TRUSTED_RECEIVE_DMA"); + case 0x5e: return ("TRUSTED_SEND"); + case 0x5f: return ("TRUSTED_SEND_DMA"); case 0x60: return ("READ_FPDMA_QUEUED"); case 0x61: return ("WRITE_FPDMA_QUEUED"); case 0x63: return ("NCQ_NON_DATA"); @@ -133,9 +140,13 @@ ata_op_string(struct ata_cmd *cmd) } return ("SEP_ATTN"); case 0x70: return ("SEEK"); + case 0x77: return ("SET_DATE_TIME_EXT"); + case 0x78: return ("ACCESSIBLE_MAX_ADDRESS_CONFIGURATION"); case 0x87: return ("CFA_TRANSLATE_SECTOR"); case 0x90: return ("EXECUTE_DEVICE_DIAGNOSTIC"); case 0x92: return ("DOWNLOAD_MICROCODE"); + case 0x93: return ("DOWNLOAD_MICROCODE_DMA"); + case 0x9a: return ("ZAC_MANAGEMENT_OUT"); case 0xa0: return ("PACKET"); case 0xa1: return ("ATAPI_IDENTIFY"); case 0xa2: return ("SERVICE"); @@ -152,6 +163,7 @@ ata_op_string(struct ata_cmd *cmd) } return ("SMART"); case 0xb1: return ("DEVICE CONFIGURATION"); + case 0xb4: return ("SANITIZE_DEVICE"); case 0xc0: return ("CFA_ERASE"); case 0xc4: return ("READ_MUL"); case 0xc5: return ("WRITE_MUL"); diff --git a/sys/cam/ata/ata_da.c b/sys/cam/ata/ata_da.c index a374a16..daa2ebf 100644 --- a/sys/cam/ata/ata_da.c +++ b/sys/cam/ata/ata_da.c @@ -707,8 +707,7 @@ adaclose(struct disk *dp) if (error != 0) xpt_print(periph->path, "Synchronize cache failed\n"); - else - softc->flags &= ~ADA_FLAG_DIRTY; + softc->flags &= ~ADA_FLAG_DIRTY; xpt_release_ccb(ccb); cam_periph_unhold(periph); } diff --git a/sys/cam/ctl/ctl.c b/sys/cam/ctl/ctl.c index 8ec048b..e5c12c1 100644 --- a/sys/cam/ctl/ctl.c +++ b/sys/cam/ctl/ctl.c @@ -9907,6 +9907,7 @@ ctl_inquiry_evpd_block_limits(struct ctl_scsiio *ctsio, int alloc_len) { struct scsi_vpd_block_limits *bl_ptr; struct ctl_lun *lun; + uint64_t ival; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; @@ -9945,8 +9946,14 @@ ctl_inquiry_evpd_block_limits(struct ctl_scsiio *ctsio, int alloc_len) if (lun != NULL) { scsi_ulto4b(lun->be_lun->opttxferlen, bl_ptr->opt_txfer_len); if (lun->be_lun->flags & CTL_LUN_FLAG_UNMAP) { - scsi_ulto4b(0xffffffff, bl_ptr->max_unmap_lba_cnt); - scsi_ulto4b(0xffffffff, bl_ptr->max_unmap_blk_cnt); + ival = 0xffffffff; + ctl_get_opt_number(&lun->be_lun->options, + "unmap_max_lba", &ival); + scsi_ulto4b(ival, bl_ptr->max_unmap_lba_cnt); + ival = 0xffffffff; + ctl_get_opt_number(&lun->be_lun->options, + "unmap_max_descr", &ival); + scsi_ulto4b(ival, bl_ptr->max_unmap_blk_cnt); if (lun->be_lun->ublockexp != 0) { scsi_ulto4b((1 << lun->be_lun->ublockexp), bl_ptr->opt_unmap_grain); @@ -9960,8 +9967,10 @@ ctl_inquiry_evpd_block_limits(struct ctl_scsiio *ctsio, int alloc_len) scsi_ulto4b(0, bl_ptr->atomic_transfer_length_granularity); scsi_ulto4b(0, bl_ptr->max_atomic_transfer_length_with_atomic_boundary); scsi_ulto4b(0, bl_ptr->max_atomic_boundary_size); + ival = UINT64_MAX; + ctl_get_opt_number(&lun->be_lun->options, "write_same_max_lba", &ival); + scsi_u64to8b(ival, bl_ptr->max_write_same_length); } - scsi_u64to8b(UINT64_MAX, bl_ptr->max_write_same_length); ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; @@ -10035,6 +10044,7 @@ ctl_inquiry_evpd_lbp(struct ctl_scsiio *ctsio, int alloc_len) { struct scsi_vpd_logical_block_prov *lbp_ptr; struct ctl_lun *lun; + const char *value; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; @@ -10072,7 +10082,14 @@ ctl_inquiry_evpd_lbp(struct ctl_scsiio *ctsio, int alloc_len) if (lun != NULL && lun->be_lun->flags & CTL_LUN_FLAG_UNMAP) { lbp_ptr->flags = SVPD_LBP_UNMAP | SVPD_LBP_WS16 | SVPD_LBP_WS10 | SVPD_LBP_RZ | SVPD_LBP_ANC_SUP; - lbp_ptr->prov_type = SVPD_LBP_THIN; + value = ctl_get_opt(&lun->be_lun->options, "provisioning_type"); + if (value != NULL) { + if (strcmp(value, "resource") == 0) + lbp_ptr->prov_type = SVPD_LBP_RESOURCE; + else if (strcmp(value, "thin") == 0) + lbp_ptr->prov_type = SVPD_LBP_THIN; + } else + lbp_ptr->prov_type = SVPD_LBP_THIN; } ctl_set_success(ctsio); diff --git a/sys/cam/ctl/ctl.h b/sys/cam/ctl/ctl.h index ba78ff0..3c47a6f 100644 --- a/sys/cam/ctl/ctl.h +++ b/sys/cam/ctl/ctl.h @@ -217,6 +217,7 @@ void ctl_update_opts(ctl_options_t *opts, int num_args, struct ctl_be_arg *args); void ctl_free_opts(ctl_options_t *opts); char * ctl_get_opt(ctl_options_t *opts, const char *name); +int ctl_get_opt_number(ctl_options_t *opts, const char *name, uint64_t *num); int ctl_expand_number(const char *buf, uint64_t *num); #endif /* _KERNEL */ diff --git a/sys/cam/ctl/ctl_backend.c b/sys/cam/ctl/ctl_backend.c index 91576c4..86f7d3c 100644 --- a/sys/cam/ctl/ctl_backend.c +++ b/sys/cam/ctl/ctl_backend.c @@ -243,3 +243,14 @@ ctl_get_opt(ctl_options_t *opts, const char *name) } return (NULL); } + +int +ctl_get_opt_number(ctl_options_t *opts, const char *name, uint64_t *val) +{ + const char *value; + + value = ctl_get_opt(opts, name); + if (value == NULL) + return (-2); + return (ctl_expand_number(value, val)); +} diff --git a/sys/cam/scsi/scsi_da.c b/sys/cam/scsi/scsi_da.c index c85f9f3..8874285 100644 --- a/sys/cam/scsi/scsi_da.c +++ b/sys/cam/scsi/scsi_da.c @@ -1375,8 +1375,7 @@ daclose(struct disk *dp) error = cam_periph_runccb(ccb, daerror, /*cam_flags*/0, /*sense_flags*/SF_RETRY_UA | SF_QUIET_IR, softc->disk->d_devstat); - if (error == 0) - softc->flags &= ~DA_FLAG_DIRTY; + softc->flags &= ~DA_FLAG_DIRTY; xpt_release_ccb(ccb); } diff --git a/sys/cam/scsi/scsi_enc_ses.c b/sys/cam/scsi/scsi_enc_ses.c index 8387ef2..2c3f07e 100644 --- a/sys/cam/scsi/scsi_enc_ses.c +++ b/sys/cam/scsi/scsi_enc_ses.c @@ -2675,13 +2675,13 @@ ses_get_elm_devnames(enc_softc_t *enc, encioc_elm_devnames_t *elmdn) if (len < 0) return (EINVAL); - sbuf_new(&sb, elmdn->elm_devnames, len, 0); - cam_periph_unlock(enc->periph); + sbuf_new(&sb, NULL, len, SBUF_FIXEDLEN); ses_paths_iter(enc, &enc->enc_cache.elm_map[elmdn->elm_idx], ses_elmdevname_callback, &sb); sbuf_finish(&sb); elmdn->elm_names_len = sbuf_len(&sb); + copyout(sbuf_data(&sb), elmdn->elm_devnames, elmdn->elm_names_len + 1); cam_periph_lock(enc->periph); return (elmdn->elm_names_len > 0 ? 0 : ENODEV); } diff --git a/sys/cam/scsi/scsi_pass.c b/sys/cam/scsi/scsi_pass.c index c0c313e..09694c5 100644 --- a/sys/cam/scsi/scsi_pass.c +++ b/sys/cam/scsi/scsi_pass.c @@ -1881,6 +1881,18 @@ passdoioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread break; } + if (ccb->ccb_h.flags & CAM_CDB_POINTER) { + if (ccb->csio.cdb_len > IOCDBLEN) { + error = EINVAL; + break; + } + error = copyin(ccb->csio.cdb_io.cdb_ptr, + ccb->csio.cdb_io.cdb_bytes, ccb->csio.cdb_len); + if (error) + break; + ccb->ccb_h.flags &= ~CAM_CDB_POINTER; + } + /* * Some CCB types, like scan bus and scan lun can only go * through the transport layer device. @@ -2148,6 +2160,7 @@ passsendccb(struct cam_periph *periph, union ccb *ccb, union ccb *inccb) { struct pass_softc *softc; struct cam_periph_map_info mapinfo; + uint8_t *cmd; xpt_opcode fc; int error; @@ -2159,6 +2172,14 @@ passsendccb(struct cam_periph *periph, union ccb *ccb, union ccb *inccb) */ xpt_merge_ccb(ccb, inccb); + if (ccb->ccb_h.flags & CAM_CDB_POINTER) { + cmd = __builtin_alloca(ccb->csio.cdb_len); + error = copyin(ccb->csio.cdb_io.cdb_ptr, cmd, ccb->csio.cdb_len); + if (error) + return (error); + ccb->csio.cdb_io.cdb_ptr = cmd; + } + /* */ ccb->ccb_h.cbfcnp = passdone; diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c index e27aa18..7c51399 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c @@ -1482,10 +1482,18 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) * objects may be dirtied in syncing context, but only if they * were already pre-dirtied in open context. */ +#ifdef DEBUG + if (dn->dn_objset->os_dsl_dataset != NULL) { + rrw_enter(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock, + RW_READER, FTAG); + } ASSERT(!dmu_tx_is_syncing(tx) || BP_IS_HOLE(dn->dn_objset->os_rootbp) || DMU_OBJECT_IS_SPECIAL(dn->dn_object) || dn->dn_objset->os_dsl_dataset == NULL); + if (dn->dn_objset->os_dsl_dataset != NULL) + rrw_exit(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock, FTAG); +#endif /* * We make this assert for private objects as well, but after we * check if we're already dirty. They are allowed to re-dirty @@ -1510,12 +1518,21 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) * Don't set dirtyctx to SYNC if we're just modifying this as we * initialize the objset. */ - if (dn->dn_dirtyctx == DN_UNDIRTIED && - !BP_IS_HOLE(dn->dn_objset->os_rootbp)) { - dn->dn_dirtyctx = - (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN); - ASSERT(dn->dn_dirtyctx_firstset == NULL); - dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_SLEEP); + if (dn->dn_dirtyctx == DN_UNDIRTIED) { + if (dn->dn_objset->os_dsl_dataset != NULL) { + rrw_enter(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock, + RW_READER, FTAG); + } + if (!BP_IS_HOLE(dn->dn_objset->os_rootbp)) { + dn->dn_dirtyctx = (dmu_tx_is_syncing(tx) ? + DN_DIRTY_SYNC : DN_DIRTY_OPEN); + ASSERT(dn->dn_dirtyctx_firstset == NULL); + dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_SLEEP); + } + if (dn->dn_objset->os_dsl_dataset != NULL) { + rrw_exit(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock, + FTAG); + } } mutex_exit(&dn->dn_mtx); @@ -1560,8 +1577,14 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) * this assertion only if we're not already dirty. */ os = dn->dn_objset; +#ifdef DEBUG + if (dn->dn_objset->os_dsl_dataset != NULL) + rrw_enter(&os->os_dsl_dataset->ds_bp_rwlock, RW_READER, FTAG); ASSERT(!dmu_tx_is_syncing(tx) || DMU_OBJECT_IS_SPECIAL(dn->dn_object) || os->os_dsl_dataset == NULL || BP_IS_HOLE(os->os_rootbp)); + if (dn->dn_objset->os_dsl_dataset != NULL) + rrw_exit(&os->os_dsl_dataset->ds_bp_rwlock, FTAG); +#endif ASSERT(db->db.db_size != 0); dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c index b6ae968..f31ad2f 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c @@ -489,8 +489,10 @@ dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp) mutex_enter(&ds->ds_opening_lock); if (ds->ds_objset == NULL) { objset_t *os; + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); err = dmu_objset_open_impl(dsl_dataset_get_spa(ds), ds, dsl_dataset_get_blkptr(ds), &os); + rrw_exit(&ds->ds_bp_rwlock, FTAG); if (err == 0) { mutex_enter(&ds->ds_lock); @@ -876,9 +878,11 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx) doca->doca_cred, tx); VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds)); + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); bp = dsl_dataset_get_blkptr(ds); os = dmu_objset_create_impl(pdd->dd_pool->dp_spa, ds, bp, doca->doca_type, tx); + rrw_exit(&ds->ds_bp_rwlock, FTAG); if (doca->doca_userfunc != NULL) { doca->doca_userfunc(os, doca->doca_userarg, @@ -1051,7 +1055,6 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg) dnode_phys_t *dnp = &os->os_phys->os_meta_dnode; ASSERT(!BP_IS_EMBEDDED(bp)); - ASSERT3P(bp, ==, os->os_rootbp); ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET); ASSERT0(BP_GET_LEVEL(bp)); @@ -1064,6 +1067,11 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg) bp->blk_fill = 0; for (int i = 0; i < dnp->dn_nblkptr; i++) bp->blk_fill += BP_GET_FILL(&dnp->dn_blkptr[i]); + if (os->os_dsl_dataset != NULL) + rrw_enter(&os->os_dsl_dataset->ds_bp_rwlock, RW_WRITER, FTAG); + *os->os_rootbp = *bp; + if (os->os_dsl_dataset != NULL) + rrw_exit(&os->os_dsl_dataset->ds_bp_rwlock, FTAG); } /* ARGSUSED */ @@ -1083,6 +1091,7 @@ dmu_objset_write_done(zio_t *zio, arc_buf_t *abuf, void *arg) (void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE); dsl_dataset_block_born(ds, bp, tx); } + kmem_free(bp, sizeof (*bp)); } /* called from dsl */ @@ -1096,6 +1105,8 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) list_t *list; list_t *newlist = NULL; dbuf_dirty_record_t *dr; + blkptr_t *blkptr_copy = kmem_alloc(sizeof (*os->os_rootbp), KM_SLEEP); + *blkptr_copy = *os->os_rootbp; dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg); @@ -1123,7 +1134,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) dmu_write_policy(os, NULL, 0, 0, &zp); zio = arc_write(pio, os->os_spa, tx->tx_txg, - os->os_rootbp, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os), + blkptr_copy, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os), &zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done, os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c index 21ea6ef..28c6c48 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c @@ -1510,10 +1510,12 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx) * If we actually created a non-clone, we need to create the * objset in our new dataset. */ + rrw_enter(&newds->ds_bp_rwlock, RW_READER, FTAG); if (BP_IS_HOLE(dsl_dataset_get_blkptr(newds))) { (void) dmu_objset_create_impl(dp->dp_spa, newds, dsl_dataset_get_blkptr(newds), drrb->drr_type, tx); } + rrw_exit(&newds->ds_bp_rwlock, FTAG); drba->drba_cookie->drc_ds = newds; @@ -1656,7 +1658,9 @@ dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx) dmu_buf_will_dirty(ds->ds_dbuf, tx); dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_INCONSISTENT; + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); ASSERT(!BP_IS_HOLE(dsl_dataset_get_blkptr(ds))); + rrw_exit(&ds->ds_bp_rwlock, FTAG); drba->drba_cookie->drc_ds = ds; diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c index a76e74b8..fcd704f 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c @@ -40,6 +40,13 @@ #include <sys/zfeature.h> int32_t zfs_pd_bytes_max = 50 * 1024 * 1024; /* 50MB */ +boolean_t send_holes_without_birth_time = B_TRUE; + +#ifdef _KERNEL +SYSCTL_DECL(_vfs_zfs); +SYSCTL_UINT(_vfs_zfs, OID_AUTO, send_holes_without_birth_time, CTLFLAG_RWTUN, + &send_holes_without_birth_time, 0, "Send holes without birth time"); +#endif typedef struct prefetch_data { kmutex_t pd_mtx; @@ -254,7 +261,8 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, * * Note that the meta-dnode cannot be reallocated. */ - if ((!td->td_realloc_possible || + if (!send_holes_without_birth_time && + (!td->td_realloc_possible || zb->zb_object == DMU_META_DNODE_OBJECT) && td->td_hole_birth_enabled_txg <= td->td_min_txg) return (0); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c index eb203fe..3b7131e 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c @@ -312,6 +312,7 @@ dsl_dataset_evict(void *dbu) mutex_destroy(&ds->ds_opening_lock); mutex_destroy(&ds->ds_sendstream_lock); refcount_destroy(&ds->ds_longholds); + rrw_destroy(&ds->ds_bp_rwlock); kmem_free(ds, sizeof (dsl_dataset_t)); } @@ -441,6 +442,7 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL); + rrw_init(&ds->ds_bp_rwlock, B_FALSE); refcount_create(&ds->ds_longholds); bplist_create(&ds->ds_pending_deadlist); @@ -831,7 +833,9 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, dsl_dataset_phys(origin)->ds_compressed_bytes; dsphys->ds_uncompressed_bytes = dsl_dataset_phys(origin)->ds_uncompressed_bytes; + rrw_enter(&origin->ds_bp_rwlock, RW_READER, FTAG); dsphys->ds_bp = dsl_dataset_phys(origin)->ds_bp; + rrw_exit(&origin->ds_bp_rwlock, FTAG); /* * Inherit flags that describe the dataset's contents @@ -1389,7 +1393,9 @@ dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname, dsphys->ds_uncompressed_bytes = dsl_dataset_phys(ds)->ds_uncompressed_bytes; dsphys->ds_flags = dsl_dataset_phys(ds)->ds_flags; + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); dsphys->ds_bp = dsl_dataset_phys(ds)->ds_bp; + rrw_exit(&ds->ds_bp_rwlock, FTAG); dmu_buf_rele(dbuf, FTAG); for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { @@ -1981,7 +1987,9 @@ dsl_dataset_space(dsl_dataset_t *ds, else *availbytesp = 0; } + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); *usedobjsp = BP_GET_FILL(&dsl_dataset_phys(ds)->ds_bp); + rrw_exit(&ds->ds_bp_rwlock, FTAG); *availobjsp = DN_MAX_OBJECT - *usedobjsp; } @@ -1989,12 +1997,15 @@ boolean_t dsl_dataset_modified_since_snap(dsl_dataset_t *ds, dsl_dataset_t *snap) { dsl_pool_t *dp = ds->ds_dir->dd_pool; + uint64_t birth; ASSERT(dsl_pool_config_held(dp)); if (snap == NULL) return (B_FALSE); - if (dsl_dataset_phys(ds)->ds_bp.blk_birth > - dsl_dataset_phys(snap)->ds_creation_txg) { + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); + birth = dsl_dataset_get_blkptr(ds)->blk_birth; + rrw_exit(&ds->ds_bp_rwlock, FTAG); + if (birth > dsl_dataset_phys(snap)->ds_creation_txg) { objset_t *os, *os_snap; /* * It may be that only the ZIL differs, because it was @@ -3058,11 +3069,15 @@ dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone, /* swap blkptrs */ { + rrw_enter(&clone->ds_bp_rwlock, RW_WRITER, FTAG); + rrw_enter(&origin_head->ds_bp_rwlock, RW_WRITER, FTAG); blkptr_t tmp; tmp = dsl_dataset_phys(origin_head)->ds_bp; dsl_dataset_phys(origin_head)->ds_bp = dsl_dataset_phys(clone)->ds_bp; dsl_dataset_phys(clone)->ds_bp = tmp; + rrw_exit(&origin_head->ds_bp_rwlock, FTAG); + rrw_exit(&clone->ds_bp_rwlock, FTAG); } /* set dd_*_bytes */ diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c index b897176..09fa540 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c @@ -246,7 +246,9 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx) uint64_t obj; ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg); + rrw_exit(&ds->ds_bp_rwlock, FTAG); ASSERT(refcount_is_zero(&ds->ds_longholds)); if (defer && @@ -720,7 +722,9 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx) ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1); ASSERT(ds->ds_prev == NULL || dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj != ds->ds_object); + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg); + rrw_exit(&ds->ds_bp_rwlock, FTAG); ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); /* We need to log before removing it from the namespace. */ @@ -812,10 +816,12 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx) ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || dsl_dataset_phys(ds)->ds_unique_bytes == used); + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); bptree_add(mos, dp->dp_bptree_obj, &dsl_dataset_phys(ds)->ds_bp, dsl_dataset_phys(ds)->ds_prev_snap_txg, used, comp, uncomp, tx); + rrw_exit(&ds->ds_bp_rwlock, FTAG); dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, -used, -comp, -uncomp, tx); dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD, diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c index 9c34f45..4765359 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2014 by Delphix. All rights reserved. + * Copyright (c) 2011, 2015 by Delphix. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright (c) 2014 Integros [integros.com] @@ -497,8 +497,10 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) /* create the root objset */ VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds)); + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); os = dmu_objset_create_impl(dp->dp_spa, ds, dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx); + rrw_exit(&ds->ds_bp_rwlock, FTAG); #ifdef _KERNEL zfs_create_fs(os, kcred, zplprops, tx); #endif @@ -811,7 +813,9 @@ upgrade_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg) * The $ORIGIN can't have any data, or the accounting * will be wrong. */ + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); ASSERT0(dsl_dataset_phys(prev)->ds_bp.blk_birth); + rrw_exit(&ds->ds_bp_rwlock, FTAG); /* The origin doesn't get attached to itself */ if (ds->ds_object == prev->ds_object) { diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c index fee1eac..2d8bf01 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c @@ -1122,7 +1122,9 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx) * Iterate over the bps in this ds. */ dmu_buf_will_dirty(ds->ds_dbuf, tx); + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); dsl_scan_visit_rootbp(scn, ds, &dsl_dataset_phys(ds)->ds_bp, tx); + rrw_exit(&ds->ds_bp_rwlock, FTAG); char *dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); dsl_dataset_name(ds, dsname); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h index f05b2cf..977232d 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2014 by Delphix. All rights reserved. + * Copyright (c) 2012, 2015 by Delphix. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright (c) 2014 Integros [integros.com] @@ -101,9 +101,14 @@ struct objset { zfs_redundant_metadata_type_t os_redundant_metadata; int os_recordsize; + /* + * Pointer is constant; the blkptr it points to is protected by + * os_dsl_dataset->ds_bp_rwlock + */ + blkptr_t *os_rootbp; + /* no lock needed: */ struct dmu_tx *os_synctx; /* XXX sketchy */ - blkptr_t *os_rootbp; zil_header_t os_zil_header; list_t os_synced_dnodes; uint64_t os_flags; diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h index 54c63b9..18466bb 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h @@ -39,6 +39,7 @@ #include <sys/zfs_context.h> #include <sys/dsl_deadlist.h> #include <sys/refcount.h> +#include <sys/rrwlock.h> #include <zfeature_common.h> #ifdef __cplusplus @@ -141,6 +142,7 @@ typedef struct dsl_dataset_phys { typedef struct dsl_dataset { dmu_buf_user_t ds_dbu; + rrwlock_t ds_bp_rwlock; /* Protects ds_phys->ds_bp */ /* Immutable: */ struct dsl_dir *ds_dir; diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h index 6f3013d..dc5b5be 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h @@ -381,6 +381,7 @@ extern void vdev_set_min_asize(vdev_t *vd); */ /* zdb uses this tunable, so it must be declared here to make lint happy. */ extern int zfs_vdev_cache_size; +extern uint_t zfs_geom_probe_vdev_key; #ifdef illumos /* diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h index 79728c1..c127788 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h @@ -355,6 +355,8 @@ extern zil_get_data_t zfs_get_data; extern zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE]; extern int zfsfstype; +extern int zfs_znode_parent_and_name(znode_t *zp, znode_t **dzpp, char *buf); + #endif /* _KERNEL */ extern int zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c index ac994f5..fa38d08 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c @@ -63,6 +63,16 @@ TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable); SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW, &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE"); +/* Declare local functions */ +static void vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read); + +/* + * Thread local storage used to indicate when a thread is probing geoms + * for their guids. If NULL, this thread is not tasting geoms. If non NULL, + * it is looking for a replacement for the vdev_t* that is its value. + */ +uint_t zfs_geom_probe_vdev_key; + static void vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp) { @@ -77,32 +87,17 @@ vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp) } static void -vdev_geom_attrchanged(struct g_consumer *cp, const char *attr) +vdev_geom_set_physpath(struct g_consumer *cp, boolean_t do_null_update) { + boolean_t needs_update = B_FALSE; vdev_t *vd; - spa_t *spa; char *physpath; int error, physpath_len; - vd = cp->private; - if (vd == NULL) - return; - - if (strcmp(attr, "GEOM::rotation_rate") == 0) { - vdev_geom_set_rotation_rate(vd, cp); - return; - } - - if (strcmp(attr, "GEOM::physpath") != 0) - return; - if (g_access(cp, 1, 0, 0) != 0) return; - /* - * Record/Update physical path information for this device. - */ - spa = vd->vdev_spa; + vd = cp->private; physpath_len = MAXPATHLEN; physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO); error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath); @@ -114,12 +109,46 @@ vdev_geom_attrchanged(struct g_consumer *cp, const char *attr) g_topology_assert(); old_physpath = vd->vdev_physpath; vd->vdev_physpath = spa_strdup(physpath); - spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); - if (old_physpath != NULL) + if (old_physpath != NULL) { + needs_update = (strcmp(old_physpath, + vd->vdev_physpath) != 0); spa_strfree(old_physpath); + } else + needs_update = do_null_update; } g_free(physpath); + + /* + * If the physical path changed, update the config. + * Only request an update for previously unset physpaths if + * requested by the caller. + */ + if (needs_update) + spa_async_request(vd->vdev_spa, SPA_ASYNC_CONFIG_UPDATE); + +} + +static void +vdev_geom_attrchanged(struct g_consumer *cp, const char *attr) +{ + vdev_t *vd; + char *old_physpath; + int error; + + vd = cp->private; + if (vd == NULL) + return; + + if (strcmp(attr, "GEOM::rotation_rate") == 0) { + vdev_geom_set_rotation_rate(vd, cp); + return; + } + + if (strcmp(attr, "GEOM::physpath") == 0) { + vdev_geom_set_physpath(cp, /*do_null_update*/B_TRUE); + return; + } } static void @@ -163,6 +192,17 @@ vdev_geom_attach(struct g_provider *pp, vdev_t *vd) g_topology_assert(); ZFS_LOG(1, "Attaching to %s.", pp->name); + + if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize)) { + ZFS_LOG(1, "Failing attach of %s. Incompatible sectorsize %d\n", + pp->name, pp->sectorsize); + return (NULL); + } else if (pp->mediasize < SPA_MINDEVSIZE) { + ZFS_LOG(1, "Failing attach of %s. Incompatible mediasize %ju\n", + pp->name, pp->mediasize); + return (NULL); + } + /* Do we have geom already? No? Create one. */ LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) { if (gp->flags & G_GEOM_WITHER) @@ -180,14 +220,14 @@ vdev_geom_attach(struct g_provider *pp, vdev_t *vd) if (error != 0) { ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", __func__, __LINE__, error); - g_wither_geom(gp, ENXIO); + vdev_geom_detach(cp, B_FALSE); return (NULL); } error = g_access(cp, 1, 0, 1); if (error != 0) { ZFS_LOG(1, "%s(%d): g_access failed: %d\n", __func__, __LINE__, error); - g_wither_geom(gp, ENXIO); + vdev_geom_detach(cp, B_FALSE); return (NULL); } ZFS_LOG(1, "Created geom and consumer for %s.", pp->name); @@ -205,15 +245,14 @@ vdev_geom_attach(struct g_provider *pp, vdev_t *vd) if (error != 0) { ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", __func__, __LINE__, error); - g_destroy_consumer(cp); + vdev_geom_detach(cp, B_FALSE); return (NULL); } error = g_access(cp, 1, 0, 1); if (error != 0) { ZFS_LOG(1, "%s(%d): g_access failed: %d\n", __func__, __LINE__, error); - g_detach(cp); - g_destroy_consumer(cp); + vdev_geom_detach(cp, B_FALSE); return (NULL); } ZFS_LOG(1, "Created consumer for %s.", pp->name); @@ -239,39 +278,39 @@ vdev_geom_attach(struct g_provider *pp, vdev_t *vd) * 2) Set it to a linked list of vdevs, not just a single vdev */ cp->private = vd; - vd->vdev_tsd = cp; + if (vd != NULL) { + vd->vdev_tsd = cp; + vdev_geom_set_physpath(cp, /*do_null_update*/B_FALSE); + } cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; return (cp); } static void -vdev_geom_close_locked(vdev_t *vd) +vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read) { struct g_geom *gp; - struct g_consumer *cp; + vdev_t *vd; g_topology_assert(); - cp = vd->vdev_tsd; - if (cp == NULL) - return; + ZFS_LOG(1, "Detaching consumer. Provider %s.", + cp->provider && cp->provider->name ? cp->provider->name : "NULL"); - ZFS_LOG(1, "Closing access to %s.", cp->provider->name); - KASSERT(vd->vdev_tsd == cp, ("%s: vdev_tsd is not cp", __func__)); - vd->vdev_tsd = NULL; - vd->vdev_delayed_close = B_FALSE; + vd = cp->private; cp->private = NULL; gp = cp->geom; - g_access(cp, -1, 0, -1); + if (open_for_read) + g_access(cp, -1, 0, -1); /* Destroy consumer on last close. */ if (cp->acr == 0 && cp->ace == 0) { if (cp->acw > 0) g_access(cp, 0, -cp->acw, 0); if (cp->provider != NULL) { - ZFS_LOG(1, "Destroyed consumer to %s.", - cp->provider->name); + ZFS_LOG(1, "Destroying consumer to %s.", + cp->provider->name ? cp->provider->name : "NULL"); g_detach(cp); } g_destroy_consumer(cp); @@ -284,6 +323,24 @@ vdev_geom_close_locked(vdev_t *vd) } static void +vdev_geom_close_locked(vdev_t *vd) +{ + struct g_consumer *cp; + + g_topology_assert(); + + cp = vd->vdev_tsd; + vd->vdev_tsd = NULL; + vd->vdev_delayed_close = B_FALSE; + if (cp == NULL) + return; + + ZFS_LOG(1, "Closing access to %s.", cp->provider->name); + + vdev_geom_detach(cp, B_TRUE); +} + +static void nvlist_get_guids(nvlist_t *list, uint64_t *pguid, uint64_t *vguid) { @@ -291,60 +348,82 @@ nvlist_get_guids(nvlist_t *list, uint64_t *pguid, uint64_t *vguid) (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_POOL_GUID, pguid); } -static int -vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size) +/* + * Issue one or more bios to the vdev in parallel + * cmds, datas, offsets, errors, and sizes are arrays of length ncmds. Each IO + * operation is described by parallel entries from each array. There may be + * more bios actually issued than entries in the array + */ +static void +vdev_geom_io(struct g_consumer *cp, int *cmds, void **datas, off_t *offsets, + off_t *sizes, int *errors, int ncmds) { - struct bio *bp; + struct bio **bios; u_char *p; - off_t off, maxio; - int error; - - ASSERT((offset % cp->provider->sectorsize) == 0); - ASSERT((size % cp->provider->sectorsize) == 0); + off_t off, maxio, s, end; + int i, n_bios, j; + size_t bios_size; - bp = g_alloc_bio(); - off = offset; - offset += size; - p = data; maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize); - error = 0; - - for (; off < offset; off += maxio, p += maxio, size -= maxio) { - bzero(bp, sizeof(*bp)); - bp->bio_cmd = cmd; - bp->bio_done = NULL; - bp->bio_offset = off; - bp->bio_length = MIN(size, maxio); - bp->bio_data = p; - g_io_request(bp, cp); - error = biowait(bp, "vdev_geom_io"); - if (error != 0) - break; + n_bios = 0; + + /* How many bios are required for all commands ? */ + for (i = 0; i < ncmds; i++) + n_bios += (sizes[i] + maxio - 1) / maxio; + + /* Allocate memory for the bios */ + bios_size = n_bios * sizeof(struct bio*); + bios = kmem_zalloc(bios_size, KM_SLEEP); + + /* Prepare and issue all of the bios */ + for (i = j = 0; i < ncmds; i++) { + off = offsets[i]; + p = datas[i]; + s = sizes[i]; + end = off + s; + ASSERT((off % cp->provider->sectorsize) == 0); + ASSERT((s % cp->provider->sectorsize) == 0); + + for (; off < end; off += maxio, p += maxio, s -= maxio, j++) { + bios[j] = g_alloc_bio(); + bios[j]->bio_cmd = cmds[i]; + bios[j]->bio_done = NULL; + bios[j]->bio_offset = off; + bios[j]->bio_length = MIN(s, maxio); + bios[j]->bio_data = p; + g_io_request(bios[j], cp); + } } + ASSERT(j == n_bios); - g_destroy_bio(bp); - return (error); -} - -static void -vdev_geom_taste_orphan(struct g_consumer *cp) -{ + /* Wait for all of the bios to complete, and clean them up */ + for (i = j = 0; i < ncmds; i++) { + off = offsets[i]; + s = sizes[i]; + end = off + s; - KASSERT(1 == 0, ("%s called while tasting %s.", __func__, - cp->provider->name)); + for (; off < end; off += maxio, s -= maxio, j++) { + errors[i] = biowait(bios[j], "vdev_geom_io") || errors[i]; + g_destroy_bio(bios[j]); + } + } + kmem_free(bios, bios_size); } static int vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config) { struct g_provider *pp; - vdev_label_t *label; + vdev_phys_t *vdev_lists[VDEV_LABELS]; char *p, *buf; size_t buflen; - uint64_t psize; - off_t offset, size; - uint64_t state, txg; - int error, l, len; + uint64_t psize, state, txg; + off_t offsets[VDEV_LABELS]; + off_t size; + off_t sizes[VDEV_LABELS]; + int cmds[VDEV_LABELS]; + int errors[VDEV_LABELS]; + int l, len; g_topology_assert_not(); @@ -354,22 +433,32 @@ vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config) psize = pp->mediasize; psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t)); - size = sizeof(*label) + pp->sectorsize - - ((sizeof(*label) - 1) % pp->sectorsize) - 1; + size = sizeof(*vdev_lists[0]) + pp->sectorsize - + ((sizeof(*vdev_lists[0]) - 1) % pp->sectorsize) - 1; - label = kmem_alloc(size, KM_SLEEP); - buflen = sizeof(label->vl_vdev_phys.vp_nvlist); + buflen = sizeof(vdev_lists[0]->vp_nvlist); *config = NULL; + /* Create all of the IO requests */ for (l = 0; l < VDEV_LABELS; l++) { + cmds[l] = BIO_READ; + vdev_lists[l] = kmem_alloc(size, KM_SLEEP); + offsets[l] = vdev_label_offset(psize, l, 0) + VDEV_SKIP_SIZE; + sizes[l] = size; + errors[l] = 0; + ASSERT(offsets[l] % pp->sectorsize == 0); + } - offset = vdev_label_offset(psize, l, 0); - if ((offset % pp->sectorsize) != 0) - continue; + /* Issue the IO requests */ + vdev_geom_io(cp, cmds, (void**)vdev_lists, offsets, sizes, errors, + VDEV_LABELS); - if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0) + /* Parse the labels */ + for (l = 0; l < VDEV_LABELS; l++) { + if (errors[l] != 0) continue; - buf = label->vl_vdev_phys.vp_nvlist; + + buf = vdev_lists[l]->vp_nvlist; if (nvlist_unpack(buf, buflen, config, 0) != 0) continue; @@ -381,7 +470,8 @@ vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config) continue; } - if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && + if (state != POOL_STATE_SPARE && + state != POOL_STATE_L2CACHE && (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, &txg) != 0 || txg == 0)) { nvlist_free(*config); @@ -392,7 +482,10 @@ vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config) break; } - kmem_free(label, size); + /* Free the label storage */ + for (l = 0; l < VDEV_LABELS; l++) + kmem_free(vdev_lists[l], size); + return (*config == NULL ? ENOENT : 0); } @@ -466,41 +559,12 @@ ignore: nvlist_free(cfg); } -static int -vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp) -{ - int error; - - if (pp->flags & G_PF_WITHER) - return (EINVAL); - g_attach(cp, pp); - error = g_access(cp, 1, 0, 0); - if (error == 0) { - if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize)) - error = EINVAL; - else if (pp->mediasize < SPA_MINDEVSIZE) - error = EINVAL; - if (error != 0) - g_access(cp, -1, 0, 0); - } - if (error != 0) - g_detach(cp); - return (error); -} - -static void -vdev_geom_detach_taster(struct g_consumer *cp) -{ - g_access(cp, -1, 0, 0); - g_detach(cp); -} - int vdev_geom_read_pool_label(const char *name, nvlist_t ***configs, uint64_t *count) { struct g_class *mp; - struct g_geom *gp, *zgp; + struct g_geom *gp; struct g_provider *pp; struct g_consumer *zcp; nvlist_t *vdev_cfg; @@ -510,11 +574,6 @@ vdev_geom_read_pool_label(const char *name, DROP_GIANT(); g_topology_lock(); - zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste"); - /* This orphan function should be never called. */ - zgp->orphan = vdev_geom_taste_orphan; - zcp = g_new_consumer(zgp); - *configs = NULL; *count = 0; pool_guid = 0; @@ -527,12 +586,13 @@ vdev_geom_read_pool_label(const char *name, LIST_FOREACH(pp, &gp->provider, provider) { if (pp->flags & G_PF_WITHER) continue; - if (vdev_geom_attach_taster(zcp, pp) != 0) + zcp = vdev_geom_attach(pp, NULL); + if (zcp == NULL) continue; g_topology_unlock(); error = vdev_geom_read_config(zcp, &vdev_cfg); g_topology_lock(); - vdev_geom_detach_taster(zcp); + vdev_geom_detach(zcp, B_TRUE); if (error) continue; ZFS_LOG(1, "successfully read vdev config"); @@ -542,9 +602,6 @@ vdev_geom_read_pool_label(const char *name, } } } - - g_destroy_consumer(zcp); - g_destroy_geom(zgp); g_topology_unlock(); PICKUP_GIANT(); @@ -566,22 +623,55 @@ vdev_geom_read_guids(struct g_consumer *cp, uint64_t *pguid, uint64_t *vguid) } } +static boolean_t +vdev_attach_ok(vdev_t *vd, struct g_provider *pp) +{ + uint64_t pool_guid; + uint64_t vdev_guid; + struct g_consumer *zcp; + boolean_t pool_ok; + boolean_t vdev_ok; + + zcp = vdev_geom_attach(pp, NULL); + if (zcp == NULL) { + ZFS_LOG(1, "Unable to attach tasting instance to %s.", + pp->name); + return (B_FALSE); + } + g_topology_unlock(); + vdev_geom_read_guids(zcp, &pool_guid, &vdev_guid); + g_topology_lock(); + vdev_geom_detach(zcp, B_TRUE); + + /* + * Check that the label's vdev guid matches the desired guid. If the + * label has a pool guid, check that it matches too. (Inactive spares + * and L2ARCs do not have any pool guid in the label.) + */ + if ((pool_guid == 0 || pool_guid == spa_guid(vd->vdev_spa)) && + vdev_guid == vd->vdev_guid) { + ZFS_LOG(1, "guids match for provider %s.", vd->vdev_path); + return (B_TRUE); + } else { + ZFS_LOG(1, "guid mismatch for provider %s: " + "%ju:%ju != %ju:%ju.", vd->vdev_path, + (uintmax_t)spa_guid(vd->vdev_spa), + (uintmax_t)vd->vdev_guid, + (uintmax_t)pool_guid, (uintmax_t)vdev_guid); + return (B_FALSE); + } +} + static struct g_consumer * vdev_geom_attach_by_guids(vdev_t *vd) { struct g_class *mp; - struct g_geom *gp, *zgp; + struct g_geom *gp; struct g_provider *pp; - struct g_consumer *cp, *zcp; - uint64_t pguid, vguid; + struct g_consumer *cp; g_topology_assert(); - zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste"); - /* This orphan function should be never called. */ - zgp->orphan = vdev_geom_taste_orphan; - zcp = g_new_consumer(zgp); - cp = NULL; LIST_FOREACH(mp, &g_classes, class) { if (mp == &zfs_vdev_class) @@ -590,22 +680,7 @@ vdev_geom_attach_by_guids(vdev_t *vd) if (gp->flags & G_GEOM_WITHER) continue; LIST_FOREACH(pp, &gp->provider, provider) { - if (vdev_geom_attach_taster(zcp, pp) != 0) - continue; - g_topology_unlock(); - vdev_geom_read_guids(zcp, &pguid, &vguid); - g_topology_lock(); - vdev_geom_detach_taster(zcp); - /* - * Check that the label's vdev guid matches the - * desired guid. If the label has a pool guid, - * check that it matches too. (Inactive spares - * and L2ARCs do not have any pool guid in the - * label.) - */ - if ((pguid != 0 && - pguid != spa_guid(vd->vdev_spa)) || - vguid != vd->vdev_guid) + if (!vdev_attach_ok(vd, pp)) continue; cp = vdev_geom_attach(pp, vd); if (cp == NULL) { @@ -622,8 +697,6 @@ vdev_geom_attach_by_guids(vdev_t *vd) break; } end: - g_destroy_consumer(zcp); - g_destroy_geom(zgp); return (cp); } @@ -664,7 +737,6 @@ vdev_geom_open_by_path(vdev_t *vd, int check_guid) { struct g_provider *pp; struct g_consumer *cp; - uint64_t pguid, vguid; g_topology_assert(); @@ -672,34 +744,8 @@ vdev_geom_open_by_path(vdev_t *vd, int check_guid) pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1); if (pp != NULL) { ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path); - cp = vdev_geom_attach(pp, vd); - if (cp != NULL && check_guid && ISP2(pp->sectorsize) && - pp->sectorsize <= VDEV_PAD_SIZE) { - g_topology_unlock(); - vdev_geom_read_guids(cp, &pguid, &vguid); - g_topology_lock(); - /* - * Check that the label's vdev guid matches the - * desired guid. If the label has a pool guid, - * check that it matches too. (Inactive spares - * and L2ARCs do not have any pool guid in the - * label.) - */ - if ((pguid != 0 && - pguid != spa_guid(vd->vdev_spa)) || - vguid != vd->vdev_guid) { - vdev_geom_close_locked(vd); - cp = NULL; - ZFS_LOG(1, "guid mismatch for provider %s: " - "%ju:%ju != %ju:%ju.", vd->vdev_path, - (uintmax_t)spa_guid(vd->vdev_spa), - (uintmax_t)vd->vdev_guid, - (uintmax_t)pguid, (uintmax_t)vguid); - } else { - ZFS_LOG(1, "guid match for provider %s.", - vd->vdev_path); - } - } + if (!check_guid || vdev_attach_ok(vd, pp)) + cp = vdev_geom_attach(pp, vd); } return (cp); @@ -714,6 +760,9 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, size_t bufsize; int error; + /* Set the TLS to indicate downstack that we should not access zvols*/ + VERIFY(tsd_set(zfs_geom_probe_vdev_key, vd) == 0); + /* * We must have a pathname, and it must be absolute. */ @@ -764,6 +813,9 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, } } + /* Clear the TLS now that tasting is done */ + VERIFY(tsd_set(zfs_geom_probe_vdev_key, NULL) == 0); + if (cp == NULL) { ZFS_LOG(1, "Provider %s not found.", vd->vdev_path); error = ENOENT; diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c index aea3c9e..d76ba07 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c @@ -207,6 +207,7 @@ extern void zfs_fini(void); uint_t zfs_fsyncer_key; extern uint_t rrw_tsd_key; static uint_t zfs_allow_log_key; +extern uint_t zfs_geom_probe_vdev_key; typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *); typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *); @@ -6735,6 +6736,7 @@ zfs__init(void) tsd_create(&zfs_fsyncer_key, NULL); tsd_create(&rrw_tsd_key, rrw_tsd_destroy); tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy); + tsd_create(&zfs_geom_probe_vdev_key, NULL); printf("ZFS storage pool version: features support (" SPA_VERSION_STRING ")\n"); root_mount_rel(zfs_root_token); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c index 23778fb..dcfa70f 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -421,7 +421,7 @@ page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) vm_page_reference(pp); vm_page_lock(pp); zfs_vmobject_wunlock(obj); - vm_page_busy_sleep(pp, "zfsmwb"); + vm_page_busy_sleep(pp, "zfsmwb", true); zfs_vmobject_wlock(obj); continue; } @@ -476,7 +476,7 @@ page_hold(vnode_t *vp, int64_t start) vm_page_reference(pp); vm_page_lock(pp); zfs_vmobject_wunlock(obj); - vm_page_busy_sleep(pp, "zfsmwb"); + vm_page_busy_sleep(pp, "zfsmwb", true); zfs_vmobject_wlock(obj); continue; } @@ -5977,8 +5977,19 @@ zfs_vptocnp(struct vop_vptocnp_args *ap) } if (zp->z_id != parent || zfsvfs->z_parent == zfsvfs) { + char name[MAXNAMLEN + 1]; + znode_t *dzp; + size_t len; + + error = zfs_znode_parent_and_name(zp, &dzp, name); + if (error == 0) { + len = strlen(name); + *ap->a_buflen -= len; + bcopy(name, ap->a_buf + *ap->a_buflen, len); + *ap->a_vpp = ZTOV(dzp); + } ZFS_EXIT(zfsvfs); - return (vop_stdvptocnp(ap)); + return (error); } ZFS_EXIT(zfsvfs); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c index c947e54..224e829 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c @@ -1936,7 +1936,6 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) mutex_destroy(&zfsvfs->z_hold_mtx[i]); kmem_free(zfsvfs, sizeof (zfsvfs_t)); } - #endif /* _KERNEL */ static int @@ -2192,3 +2191,35 @@ zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb, zfs_release_sa_handle(hdl, db, FTAG); return (error); } + +#ifdef _KERNEL +int +zfs_znode_parent_and_name(znode_t *zp, znode_t **dzpp, char *buf) +{ + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + uint64_t parent; + int is_xattrdir; + int err; + + /* Extended attributes should not be visible as regular files. */ + if ((zp->z_pflags & ZFS_XATTR) != 0) + return (SET_ERROR(EINVAL)); + + err = zfs_obj_to_pobj(zfsvfs->z_os, zp->z_sa_hdl, zfsvfs->z_attr_table, + &parent, &is_xattrdir); + if (err != 0) + return (err); + ASSERT0(is_xattrdir); + + /* No name as this is a root object. */ + if (parent == zp->z_id) + return (SET_ERROR(EINVAL)); + + err = zap_value_search(zfsvfs->z_os, parent, zp->z_id, + ZFS_DIRENT_OBJ(-1ULL), buf); + if (err != 0) + return (err); + err = zfs_zget(zfsvfs, parent, dzpp); + return (err); +} +#endif /* _KERNEL */ diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c index 77d951e..d1427b0 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c @@ -1123,36 +1123,30 @@ zvol_open(struct g_provider *pp, int flag, int count) return (err); } #else /* !illumos */ - boolean_t locked = B_FALSE; - - /* - * Protect against recursively entering spa_namespace_lock - * when spa_open() is used for a pool on a (local) ZVOL(s). - * This is needed since we replaced upstream zfsdev_state_lock - * with spa_namespace_lock in the ZVOL code. - * We are using the same trick as spa_open(). - * Note that calls in zvol_first_open which need to resolve - * pool name to a spa object will enter spa_open() - * recursively, but that function already has all the - * necessary protection. - */ - if (!MUTEX_HELD(&zfsdev_state_lock)) { - mutex_enter(&zfsdev_state_lock); - locked = B_TRUE; + if (tsd_get(zfs_geom_probe_vdev_key) != NULL) { + /* + * if zfs_geom_probe_vdev_key is set, that means that zfs is + * attempting to probe geom providers while looking for a + * replacement for a missing VDEV. In this case, the + * spa_namespace_lock will not be held, but it is still illegal + * to use a zvol as a vdev. Deadlocks can result if another + * thread has spa_namespace_lock + */ + return (EOPNOTSUPP); } + mutex_enter(&zfsdev_state_lock); + zv = pp->private; if (zv == NULL) { - if (locked) - mutex_exit(&zfsdev_state_lock); + mutex_exit(&zfsdev_state_lock); return (SET_ERROR(ENXIO)); } if (zv->zv_total_opens == 0) { err = zvol_first_open(zv); if (err) { - if (locked) - mutex_exit(&zfsdev_state_lock); + mutex_exit(&zfsdev_state_lock); return (err); } pp->mediasize = zv->zv_volsize; @@ -1186,8 +1180,7 @@ zvol_open(struct g_provider *pp, int flag, int count) mutex_exit(&zfsdev_state_lock); #else zv->zv_total_opens += count; - if (locked) - mutex_exit(&zfsdev_state_lock); + mutex_exit(&zfsdev_state_lock); #endif return (err); @@ -1197,8 +1190,7 @@ out: #ifdef illumos mutex_exit(&zfsdev_state_lock); #else - if (locked) - mutex_exit(&zfsdev_state_lock); + mutex_exit(&zfsdev_state_lock); #endif return (err); } diff --git a/sys/dev/bxe/bxe.c b/sys/dev/bxe/bxe.c index b37f6c8..0c3d1d6 100644 --- a/sys/dev/bxe/bxe.c +++ b/sys/dev/bxe/bxe.c @@ -27,7 +27,7 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#define BXE_DRIVER_VERSION "1.78.81" +#define BXE_DRIVER_VERSION "1.78.89" #include "bxe.h" #include "ecore_sp.h" @@ -500,7 +500,14 @@ static const struct { { STATS_OFFSET32(mbuf_alloc_tpa), 4, STATS_FLAGS_FUNC, "mbuf_alloc_tpa"}, { STATS_OFFSET32(tx_queue_full_return), - 4, STATS_FLAGS_FUNC, "tx_queue_full_return"} + 4, STATS_FLAGS_FUNC, "tx_queue_full_return"}, + { STATS_OFFSET32(tx_request_link_down_failures), + 4, STATS_FLAGS_FUNC, "tx_request_link_down_failures"}, + { STATS_OFFSET32(bd_avail_too_less_failures), + 4, STATS_FLAGS_FUNC, "bd_avail_too_less_failures"}, + { STATS_OFFSET32(tx_mq_not_empty), + 4, STATS_FLAGS_FUNC, "tx_mq_not_empty"} + }; static const struct { @@ -613,7 +620,14 @@ static const struct { { Q_STATS_OFFSET32(mbuf_alloc_tpa), 4, "mbuf_alloc_tpa"}, { Q_STATS_OFFSET32(tx_queue_full_return), - 4, "tx_queue_full_return"} + 4, "tx_queue_full_return"}, + { Q_STATS_OFFSET32(tx_request_link_down_failures), + 4, "tx_request_link_down_failures"}, + { Q_STATS_OFFSET32(bd_avail_too_less_failures), + 4, "bd_avail_too_less_failures"}, + { Q_STATS_OFFSET32(tx_mq_not_empty), + 4, "tx_mq_not_empty"} + }; #define BXE_NUM_ETH_STATS ARRAY_SIZE(bxe_eth_stats_arr) @@ -5612,7 +5626,7 @@ bxe_tx_start(struct ifnet *ifp) BXE_FP_TX_UNLOCK(fp); } -#if __FreeBSD_version >= 800000 +#if __FreeBSD_version >= 901504 static int bxe_tx_mq_start_locked(struct bxe_softc *sc, @@ -5634,10 +5648,16 @@ bxe_tx_mq_start_locked(struct bxe_softc *sc, return (EINVAL); } - if (!sc->link_vars.link_up || - (ifp->if_drv_flags & - (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) { + if (m != NULL) { rc = drbr_enqueue(ifp, tx_br, m); + if (rc != 0) { + fp->eth_q_stats.tx_soft_errors++; + goto bxe_tx_mq_start_locked_exit; + } + } + + if (!sc->link_vars.link_up || !(ifp->if_drv_flags & IFF_DRV_RUNNING)) { + fp->eth_q_stats.tx_request_link_down_failures++; goto bxe_tx_mq_start_locked_exit; } @@ -5647,24 +5667,22 @@ bxe_tx_mq_start_locked(struct bxe_softc *sc, fp->eth_q_stats.tx_max_drbr_queue_depth = depth; } - if (m == NULL) { - /* no new work, check for pending frames */ - next = drbr_dequeue(ifp, tx_br); - } else if (drbr_needs_enqueue(ifp, tx_br)) { - /* have both new and pending work, maintain packet order */ - rc = drbr_enqueue(ifp, tx_br, m); - if (rc != 0) { - fp->eth_q_stats.tx_soft_errors++; - goto bxe_tx_mq_start_locked_exit; - } - next = drbr_dequeue(ifp, tx_br); - } else { - /* new work only and nothing pending */ - next = m; - } - /* keep adding entries while there are frames to send */ - while (next != NULL) { + while ((next = drbr_peek(ifp, tx_br)) != NULL) { + /* handle any completions if we're running low */ + tx_bd_avail = bxe_tx_avail(sc, fp); + if (tx_bd_avail < BXE_TX_CLEANUP_THRESHOLD) { + /* bxe_txeof will set IFF_DRV_OACTIVE appropriately */ + bxe_txeof(sc, fp); + tx_bd_avail = bxe_tx_avail(sc, fp); + if (tx_bd_avail < (BXE_TSO_MAX_SEGMENTS + 1)) { + fp->eth_q_stats.bd_avail_too_less_failures++; + m_freem(next); + drbr_advance(ifp, tx_br); + rc = ENOBUFS; + break; + } + } /* the mbuf now belongs to us */ fp->eth_q_stats.mbuf_alloc_tx++; @@ -5680,11 +5698,11 @@ bxe_tx_mq_start_locked(struct bxe_softc *sc, if (next != NULL) { /* mark the TX queue as full and save the frame */ ifp->if_drv_flags |= IFF_DRV_OACTIVE; - /* XXX this may reorder the frame */ - rc = drbr_enqueue(ifp, tx_br, next); + drbr_putback(ifp, tx_br, next); fp->eth_q_stats.mbuf_alloc_tx--; fp->eth_q_stats.tx_frames_deferred++; - } + } else + drbr_advance(ifp, tx_br); /* stop looking for more work */ break; @@ -5696,18 +5714,7 @@ bxe_tx_mq_start_locked(struct bxe_softc *sc, /* send a copy of the frame to any BPF listeners */ BPF_MTAP(ifp, next); - tx_bd_avail = bxe_tx_avail(sc, fp); - - /* handle any completions if we're running low */ - if (tx_bd_avail < BXE_TX_CLEANUP_THRESHOLD) { - /* bxe_txeof will set IFF_DRV_OACTIVE appropriately */ - bxe_txeof(sc, fp); - if (ifp->if_drv_flags & IFF_DRV_OACTIVE) { - break; - } - } - - next = drbr_dequeue(ifp, tx_br); + drbr_advance(ifp, tx_br); } /* all TX packets were dequeued and/or the tx ring is full */ @@ -5717,10 +5724,28 @@ bxe_tx_mq_start_locked(struct bxe_softc *sc, } bxe_tx_mq_start_locked_exit: + /* If we didn't drain the drbr, enqueue a task in the future to do it. */ + if (!drbr_empty(ifp, tx_br)) { + fp->eth_q_stats.tx_mq_not_empty++; + taskqueue_enqueue_timeout(fp->tq, &fp->tx_timeout_task, 1); + } return (rc); } +static void +bxe_tx_mq_start_deferred(void *arg, + int pending) +{ + struct bxe_fastpath *fp = (struct bxe_fastpath *)arg; + struct bxe_softc *sc = fp->sc; + struct ifnet *ifp = sc->ifnet; + + BXE_FP_TX_LOCK(fp); + bxe_tx_mq_start_locked(sc, ifp, fp, NULL); + BXE_FP_TX_UNLOCK(fp); +} + /* Multiqueue (TSS) dispatch routine. */ static int bxe_tx_mq_start(struct ifnet *ifp, @@ -5742,8 +5767,10 @@ bxe_tx_mq_start(struct ifnet *ifp, if (BXE_FP_TX_TRYLOCK(fp)) { rc = bxe_tx_mq_start_locked(sc, ifp, fp, m); BXE_FP_TX_UNLOCK(fp); - } else + } else { rc = drbr_enqueue(ifp, fp->tx_br, m); + taskqueue_enqueue(fp->tq, &fp->tx_task); + } return (rc); } @@ -5778,7 +5805,7 @@ bxe_mq_flush(struct ifnet *ifp) if_qflush(ifp); } -#endif /* FreeBSD_version >= 800000 */ +#endif /* FreeBSD_version >= 901504 */ static uint16_t bxe_cid_ilt_lines(struct bxe_softc *sc) @@ -6138,7 +6165,7 @@ bxe_free_fp_buffers(struct bxe_softc *sc) for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; -#if __FreeBSD_version >= 800000 +#if __FreeBSD_version >= 901504 if (fp->tx_br != NULL) { /* just in case bxe_mq_flush() wasn't called */ if (mtx_initialized(&fp->tx_mtx)) { @@ -6965,6 +6992,8 @@ bxe_link_attn(struct bxe_softc *sc) uint32_t pause_enabled = 0; struct host_port_stats *pstats; int cmng_fns; + struct bxe_fastpath *fp; + int i; /* Make sure that we are synced with the current statistics */ bxe_stats_handle(sc, STATS_EVENT_STOP); @@ -6996,6 +7025,12 @@ bxe_link_attn(struct bxe_softc *sc) if (sc->state == BXE_STATE_OPEN) { bxe_stats_handle(sc, STATS_EVENT_LINK_UP); } + + /* Restart tx when the link comes back. */ + FOR_EACH_ETH_QUEUE(sc, i) { + fp = &sc->fp[i]; + taskqueue_enqueue(fp->tq, &fp->tx_task); + } } if (sc->link_vars.link_up && sc->link_vars.line_speed) { @@ -9047,6 +9082,10 @@ bxe_interrupt_detach(struct bxe_softc *sc) fp = &sc->fp[i]; if (fp->tq) { taskqueue_drain(fp->tq, &fp->tq_task); + taskqueue_drain(fp->tq, &fp->tx_task); + while (taskqueue_cancel_timeout(fp->tq, &fp->tx_timeout_task, + NULL)) + taskqueue_drain_timeout(fp->tq, &fp->tx_timeout_task); taskqueue_free(fp->tq); fp->tq = NULL; } @@ -9079,9 +9118,9 @@ bxe_interrupt_attach(struct bxe_softc *sc) snprintf(sc->sp_tq_name, sizeof(sc->sp_tq_name), "bxe%d_sp_tq", sc->unit); TASK_INIT(&sc->sp_tq_task, 0, bxe_handle_sp_tq, sc); - sc->sp_tq = taskqueue_create_fast(sc->sp_tq_name, M_NOWAIT, - taskqueue_thread_enqueue, - &sc->sp_tq); + sc->sp_tq = taskqueue_create(sc->sp_tq_name, M_NOWAIT, + taskqueue_thread_enqueue, + &sc->sp_tq); taskqueue_start_threads(&sc->sp_tq, 1, PWAIT, /* lower priority */ "%s", sc->sp_tq_name); @@ -9091,9 +9130,12 @@ bxe_interrupt_attach(struct bxe_softc *sc) snprintf(fp->tq_name, sizeof(fp->tq_name), "bxe%d_fp%d_tq", sc->unit, i); TASK_INIT(&fp->tq_task, 0, bxe_handle_fp_tq, fp); - fp->tq = taskqueue_create_fast(fp->tq_name, M_NOWAIT, - taskqueue_thread_enqueue, - &fp->tq); + TASK_INIT(&fp->tx_task, 0, bxe_tx_mq_start_deferred, fp); + fp->tq = taskqueue_create(fp->tq_name, M_NOWAIT, + taskqueue_thread_enqueue, + &fp->tq); + TIMEOUT_TASK_INIT(fp->tq, &fp->tx_timeout_task, 0, + bxe_tx_mq_start_deferred, fp); taskqueue_start_threads(&fp->tq, 1, PI_NET, /* higher priority */ "%s", fp->tq_name); } @@ -12150,8 +12192,6 @@ static void bxe_periodic_callout_func(void *xsc) { struct bxe_softc *sc = (struct bxe_softc *)xsc; - struct bxe_fastpath *fp; - uint16_t tx_bd_avail; int i; if (!BXE_CORE_TRYLOCK(sc)) { @@ -12174,47 +12214,6 @@ bxe_periodic_callout_func(void *xsc) return; } -#if __FreeBSD_version >= 800000 - - FOR_EACH_QUEUE(sc, i) { - fp = &sc->fp[i]; - - if (BXE_FP_TX_TRYLOCK(fp)) { - struct ifnet *ifp = sc->ifnet; - /* - * If interface was stopped due to unavailable - * bds, try to process some tx completions - */ - (void) bxe_txeof(sc, fp); - - tx_bd_avail = bxe_tx_avail(sc, fp); - if (tx_bd_avail >= BXE_TX_CLEANUP_THRESHOLD) { - bxe_tx_mq_start_locked(sc, ifp, fp, NULL); - } - BXE_FP_TX_UNLOCK(fp); - } - } - -#else - - fp = &sc->fp[0]; - if (BXE_FP_TX_TRYLOCK(fp)) { - struct ifnet *ifp = sc->ifnet; - /* - * If interface was stopped due to unavailable - * bds, try to process some tx completions - */ - (void) bxe_txeof(sc, fp); - - tx_bd_avail = bxe_tx_avail(sc, fp); - if (tx_bd_avail >= BXE_TX_CLEANUP_THRESHOLD) { - bxe_tx_start_locked(sc, ifp, fp); - } - - BXE_FP_TX_UNLOCK(fp); - } - -#endif /* #if __FreeBSD_version >= 800000 */ /* Check for TX timeouts on any fastpath. */ FOR_EACH_QUEUE(sc, i) { @@ -12690,7 +12689,7 @@ bxe_init_ifnet(struct bxe_softc *sc) ifp->if_flags = (IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); ifp->if_ioctl = bxe_ioctl; ifp->if_start = bxe_tx_start; -#if __FreeBSD_version >= 800000 +#if __FreeBSD_version >= 901504 ifp->if_transmit = bxe_tx_mq_start; ifp->if_qflush = bxe_mq_flush; #endif @@ -15737,7 +15736,7 @@ bxe_add_sysctls(struct bxe_softc *sc) static int bxe_alloc_buf_rings(struct bxe_softc *sc) { -#if __FreeBSD_version >= 800000 +#if __FreeBSD_version >= 901504 int i; struct bxe_fastpath *fp; @@ -15758,7 +15757,7 @@ bxe_alloc_buf_rings(struct bxe_softc *sc) static void bxe_free_buf_rings(struct bxe_softc *sc) { -#if __FreeBSD_version >= 800000 +#if __FreeBSD_version >= 901504 int i; struct bxe_fastpath *fp; diff --git a/sys/dev/bxe/bxe.h b/sys/dev/bxe/bxe.h index ae98e9c..6b30bde 100644 --- a/sys/dev/bxe/bxe.h +++ b/sys/dev/bxe/bxe.h @@ -644,6 +644,9 @@ struct bxe_fastpath { struct taskqueue *tq; char tq_name[32]; + struct task tx_task; + struct timeout_task tx_timeout_task; + /* ethernet client ID (each fastpath set of RX/TX/CQE is a client) */ uint8_t cl_id; #define FP_CL_ID(fp) (fp->cl_id) @@ -2300,7 +2303,8 @@ void bxe_dump_mbuf_data(struct bxe_softc *sc, char *pTag, extern int bxe_grc_dump(struct bxe_softc *sc); #if __FreeBSD_version >= 800000 -#if __FreeBSD_version >= 1000000 +#if (__FreeBSD_version >= 1001513 && __FreeBSD_version < 1100000) ||\ + __FreeBSD_version >= 1100048 #define BXE_SET_FLOWID(m) M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE) #define BXE_VALID_FLOWID(m) (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) #else diff --git a/sys/dev/bxe/bxe_stats.h b/sys/dev/bxe/bxe_stats.h index 31f4c83..915e359 100644 --- a/sys/dev/bxe/bxe_stats.h +++ b/sys/dev/bxe/bxe_stats.h @@ -266,6 +266,10 @@ struct bxe_eth_stats { /* num. of times tx queue full occured */ uint32_t tx_queue_full_return; + /* debug stats */ + uint32_t tx_request_link_down_failures; + uint32_t bd_avail_too_less_failures; + uint32_t tx_mq_not_empty; }; @@ -372,6 +376,11 @@ struct bxe_eth_q_stats { /* num. of times tx queue full occured */ uint32_t tx_queue_full_return; + + /* debug stats */ + uint32_t tx_request_link_down_failures; + uint32_t bd_avail_too_less_failures; + uint32_t tx_mq_not_empty; }; struct bxe_eth_stats_old { diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h index ceba9e0..60c4ddb 100644 --- a/sys/dev/cxgbe/adapter.h +++ b/sys/dev/cxgbe/adapter.h @@ -192,49 +192,46 @@ enum { CXGBE_BUSY = (1 << 9), /* port flags */ - DOOMED = (1 << 0), - PORT_INIT_DONE = (1 << 1), - PORT_SYSCTL_CTX = (1 << 2), HAS_TRACEQ = (1 << 3), + + /* VI flags */ + DOOMED = (1 << 0), + VI_INIT_DONE = (1 << 1), + VI_SYSCTL_CTX = (1 << 2), INTR_RXQ = (1 << 4), /* All NIC rxq's take interrupts */ INTR_OFLD_RXQ = (1 << 5), /* All TOE rxq's take interrupts */ - INTR_NM_RXQ = (1 << 6), /* All netmap rxq's take interrupts */ - INTR_ALL = (INTR_RXQ | INTR_OFLD_RXQ | INTR_NM_RXQ), + INTR_ALL = (INTR_RXQ | INTR_OFLD_RXQ), /* adapter debug_flags */ DF_DUMP_MBOX = (1 << 0), }; -#define IS_DOOMED(pi) ((pi)->flags & DOOMED) -#define SET_DOOMED(pi) do {(pi)->flags |= DOOMED;} while (0) +#define IS_DOOMED(vi) ((vi)->flags & DOOMED) +#define SET_DOOMED(vi) do {(vi)->flags |= DOOMED;} while (0) #define IS_BUSY(sc) ((sc)->flags & CXGBE_BUSY) #define SET_BUSY(sc) do {(sc)->flags |= CXGBE_BUSY;} while (0) #define CLR_BUSY(sc) do {(sc)->flags &= ~CXGBE_BUSY;} while (0) -struct port_info { +struct vi_info { device_t dev; - struct adapter *adapter; + struct port_info *pi; struct ifnet *ifp; struct ifmedia media; - struct mtx pi_lock; - char lockname[16]; unsigned long flags; int if_flags; - uint16_t *rss; + uint16_t *rss, *nm_rss; uint16_t viid; int16_t xact_addr_filt;/* index of exact MAC address filter */ uint16_t rss_size; /* size of VI's RSS table slice */ uint16_t rss_base; /* start of VI's RSS table slice */ - uint8_t lport; /* associated offload logical port */ - int8_t mdio_addr; - uint8_t port_type; - uint8_t mod_type; - uint8_t port_id; - uint8_t tx_chan; - uint8_t rx_chan_map; /* rx MPS channel bitmap */ + + eventhandler_tag vlan_c; + + int nintr; + int first_intr; /* These need to be int as they are used in sysctl */ int ntxq; /* # of tx queues */ @@ -242,30 +239,49 @@ struct port_info { int rsrv_noflowq; /* Reserve queue 0 for non-flowid packets */ int nrxq; /* # of rx queues */ int first_rxq; /* index of first rx queue */ -#ifdef TCP_OFFLOAD int nofldtxq; /* # of offload tx queues */ int first_ofld_txq; /* index of first offload tx queue */ int nofldrxq; /* # of offload rx queues */ int first_ofld_rxq; /* index of first offload rx queue */ -#endif -#ifdef DEV_NETMAP - int nnmtxq; /* # of netmap tx queues */ - int first_nm_txq; /* index of first netmap tx queue */ - int nnmrxq; /* # of netmap rx queues */ - int first_nm_rxq; /* index of first netmap rx queue */ - - struct ifnet *nm_ifp; - struct ifmedia nm_media; - int nmif_flags; - uint16_t nm_viid; - int16_t nm_xact_addr_filt; - uint16_t nm_rss_size; /* size of netmap VI's RSS table slice */ -#endif + int nnmtxq; + int first_nm_txq; + int nnmrxq; + int first_nm_rxq; int tmr_idx; int pktc_idx; int qsize_rxq; int qsize_txq; + struct timeval last_refreshed; + struct fw_vi_stats_vf stats; + + struct callout tick; + struct sysctl_ctx_list ctx; /* from ifconfig up to driver detach */ + + uint8_t hw_addr[ETHER_ADDR_LEN]; /* factory MAC address, won't change */ +}; + +struct port_info { + device_t dev; + struct adapter *adapter; + + struct vi_info *vi; + int nvi; + int up_vis; + int uld_vis; + + struct mtx pi_lock; + char lockname[16]; + unsigned long flags; + + uint8_t lport; /* associated offload logical port */ + int8_t mdio_addr; + uint8_t port_type; + uint8_t mod_type; + uint8_t port_id; + uint8_t tx_chan; + uint8_t rx_chan_map; /* rx MPS channel bitmap */ + int linkdnrc; struct link_config link_cfg; @@ -273,14 +289,11 @@ struct port_info { struct port_stats stats; u_int tx_parse_error; - eventhandler_tag vlan_c; - struct callout tick; - struct sysctl_ctx_list ctx; /* from ifconfig up to driver detach */ - - uint8_t hw_addr[ETHER_ADDR_LEN]; /* factory MAC address, won't change */ }; +#define IS_MAIN_VI(vi) ((vi) == &((vi)->pi->vi[0])) + /* Where the cluster came from, how it has been carved up. */ struct cluster_layout { int8_t zidx; @@ -292,9 +305,7 @@ struct cluster_layout { struct cluster_metadata { u_int refcount; -#ifdef INVARIANTS struct fl_sdesc *sd; /* For debug only. Could easily be stale */ -#endif }; struct fl_sdesc { @@ -333,6 +344,11 @@ enum { IQS_DISABLED = 0, IQS_BUSY = 1, IQS_IDLE = 2, + + /* netmap related flags */ + NM_OFF = 0, + NM_ON = 1, + NM_BUSY = 2, }; /* @@ -529,7 +545,6 @@ iq_to_rxq(struct sge_iq *iq) } -#ifdef TCP_OFFLOAD /* ofld_rxq: SGE ingress queue + SGE free list + miscellaneous items */ struct sge_ofld_rxq { struct sge_iq iq; /* MUST be first */ @@ -542,7 +557,6 @@ iq_to_ofld_rxq(struct sge_iq *iq) return (__containerof(iq, struct sge_ofld_rxq, iq)); } -#endif struct wrqe { STAILQ_ENTRY(wrqe) link; @@ -594,9 +608,8 @@ struct sge_wrq { } __aligned(CACHE_LINE_SIZE); -#ifdef DEV_NETMAP struct sge_nm_rxq { - struct port_info *pi; + struct vi_info *vi; struct iq_desc *iq_desc; uint16_t iq_abs_id; @@ -649,7 +662,6 @@ struct sge_nm_txq { bus_addr_t ba; int iqidx; } __aligned(CACHE_LINE_SIZE); -#endif struct sge { int timer_val[SGE_NTIMERS]; @@ -661,14 +673,10 @@ struct sge { int nrxq; /* total # of Ethernet rx queues */ int ntxq; /* total # of Ethernet tx tx queues */ -#ifdef TCP_OFFLOAD int nofldrxq; /* total # of TOE rx queues */ int nofldtxq; /* total # of TOE tx queues */ -#endif -#ifdef DEV_NETMAP int nnmrxq; /* total # of netmap rx queues */ int nnmtxq; /* total # of netmap tx queues */ -#endif int niq; /* total # of ingress queues */ int neq; /* total # of egress queues */ @@ -677,14 +685,10 @@ struct sge { struct sge_wrq *ctrlq; /* Control queues */ struct sge_txq *txq; /* NIC tx queues */ struct sge_rxq *rxq; /* NIC rx queues */ -#ifdef TCP_OFFLOAD struct sge_wrq *ofld_txq; /* TOE tx queues */ struct sge_ofld_rxq *ofld_rxq; /* TOE rx queues */ -#endif -#ifdef DEV_NETMAP struct sge_nm_txq *nm_txq; /* netmap tx queues */ struct sge_nm_rxq *nm_rxq; /* netmap rx queues */ -#endif uint16_t iq_start; int eq_start; @@ -731,8 +735,11 @@ struct adapter { struct irq { struct resource *res; int rid; + volatile int nm_state; /* NM_OFF, NM_ON, or NM_BUSY */ void *tag; - } *irq; + struct sge_rxq *rxq; + struct sge_nm_rxq *nm_rxq; + } __aligned(CACHE_LINE_SIZE) *irq; bus_dma_tag_t dmat; /* Parent DMA tag */ @@ -743,21 +750,16 @@ struct adapter { struct port_info *port[MAX_NPORTS]; uint8_t chan_map[NCHAN]; -#ifdef TCP_OFFLOAD void *tom_softc; /* (struct tom_data *) */ struct tom_tunables tt; void *iwarp_softc; /* (struct c4iw_dev *) */ void *iscsi_softc; -#endif struct l2t_data *l2t; /* L2 table */ struct tid_info tids; uint16_t doorbells; - int open_device_map; -#ifdef TCP_OFFLOAD int offload_map; /* ports with IFCAP_TOE enabled */ int active_ulds; /* ULDs activated on this adapter */ -#endif int flags; int debug_flags; @@ -798,11 +800,9 @@ struct adapter { fw_msg_handler_t fw_msg_handler[5]; /* NUM_FW6_TYPES */ cpl_handler_t cpl_handler[0xef]; /* NUM_CPL_CMDS */ -#ifdef INVARIANTS const char *last_op; const void *last_op_thr; int last_op_flags; -#endif int sc_do_rxcopy; }; @@ -863,24 +863,27 @@ struct adapter { } \ } while (0) -#define for_each_txq(pi, iter, q) \ - for (q = &pi->adapter->sge.txq[pi->first_txq], iter = 0; \ - iter < pi->ntxq; ++iter, ++q) -#define for_each_rxq(pi, iter, q) \ - for (q = &pi->adapter->sge.rxq[pi->first_rxq], iter = 0; \ - iter < pi->nrxq; ++iter, ++q) -#define for_each_ofld_txq(pi, iter, q) \ - for (q = &pi->adapter->sge.ofld_txq[pi->first_ofld_txq], iter = 0; \ - iter < pi->nofldtxq; ++iter, ++q) -#define for_each_ofld_rxq(pi, iter, q) \ - for (q = &pi->adapter->sge.ofld_rxq[pi->first_ofld_rxq], iter = 0; \ - iter < pi->nofldrxq; ++iter, ++q) -#define for_each_nm_txq(pi, iter, q) \ - for (q = &pi->adapter->sge.nm_txq[pi->first_nm_txq], iter = 0; \ - iter < pi->nnmtxq; ++iter, ++q) -#define for_each_nm_rxq(pi, iter, q) \ - for (q = &pi->adapter->sge.nm_rxq[pi->first_nm_rxq], iter = 0; \ - iter < pi->nnmrxq; ++iter, ++q) +#define for_each_txq(vi, iter, q) \ + for (q = &vi->pi->adapter->sge.txq[vi->first_txq], iter = 0; \ + iter < vi->ntxq; ++iter, ++q) +#define for_each_rxq(vi, iter, q) \ + for (q = &vi->pi->adapter->sge.rxq[vi->first_rxq], iter = 0; \ + iter < vi->nrxq; ++iter, ++q) +#define for_each_ofld_txq(vi, iter, q) \ + for (q = &vi->pi->adapter->sge.ofld_txq[vi->first_ofld_txq], iter = 0; \ + iter < vi->nofldtxq; ++iter, ++q) +#define for_each_ofld_rxq(vi, iter, q) \ + for (q = &vi->pi->adapter->sge.ofld_rxq[vi->first_ofld_rxq], iter = 0; \ + iter < vi->nofldrxq; ++iter, ++q) +#define for_each_nm_txq(vi, iter, q) \ + for (q = &vi->pi->adapter->sge.nm_txq[vi->first_nm_txq], iter = 0; \ + iter < vi->nnmtxq; ++iter, ++q) +#define for_each_nm_rxq(vi, iter, q) \ + for (q = &vi->pi->adapter->sge.nm_rxq[vi->first_nm_rxq], iter = 0; \ + iter < vi->nnmrxq; ++iter, ++q) +#define for_each_vi(_pi, _iter, _vi) \ + for ((_vi) = (_pi)->vi, (_iter) = 0; (_iter) < (_pi)->nvi; \ + ++(_iter), ++(_vi)) #define IDXINCR(idx, incr, wrap) do { \ idx = wrap - idx > incr ? idx + incr : incr - (wrap - idx); \ @@ -972,7 +975,7 @@ static inline void t4_os_set_hw_addr(struct adapter *sc, int idx, uint8_t hw_addr[]) { - bcopy(hw_addr, sc->port[idx]->hw_addr, ETHER_ADDR_LEN); + bcopy(hw_addr, sc->port[idx]->vi[0].hw_addr, ETHER_ADDR_LEN); } static inline bool @@ -1008,18 +1011,22 @@ int t4_register_cpl_handler(struct adapter *, int, cpl_handler_t); int t4_register_an_handler(struct adapter *, an_handler_t); int t4_register_fw_msg_handler(struct adapter *, int, fw_msg_handler_t); int t4_filter_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *); -int begin_synchronized_op(struct adapter *, struct port_info *, int, char *); +int begin_synchronized_op(struct adapter *, struct vi_info *, int, char *); +void doom_vi(struct adapter *, struct vi_info *); void end_synchronized_op(struct adapter *, int); int update_mac_settings(struct ifnet *, int); int adapter_full_init(struct adapter *); int adapter_full_uninit(struct adapter *); -int port_full_init(struct port_info *); -int port_full_uninit(struct port_info *); +uint64_t cxgbe_get_counter(struct ifnet *, ift_counter); +int vi_full_init(struct vi_info *); +int vi_full_uninit(struct vi_info *); +void vi_sysctls(struct vi_info *); +void vi_tick(void *); #ifdef DEV_NETMAP /* t4_netmap.c */ -int create_netmap_ifnet(struct port_info *); -int destroy_netmap_ifnet(struct port_info *); +void cxgbe_nm_attach(struct vi_info *); +void cxgbe_nm_detach(struct vi_info *); void t4_nm_intr(void *); #endif @@ -1036,10 +1043,11 @@ void t4_sge_sysctls(struct adapter *, struct sysctl_ctx_list *, int t4_destroy_dma_tag(struct adapter *); int t4_setup_adapter_queues(struct adapter *); int t4_teardown_adapter_queues(struct adapter *); -int t4_setup_port_queues(struct port_info *); -int t4_teardown_port_queues(struct port_info *); +int t4_setup_vi_queues(struct vi_info *); +int t4_teardown_vi_queues(struct vi_info *); void t4_intr_all(void *); void t4_intr(void *); +void t4_vi_intr(void *); void t4_intr_err(void *); void t4_intr_evt(void *); void t4_wrq_tx_locked(struct adapter *, struct sge_wrq *, struct wrqe *); diff --git a/sys/dev/cxgbe/common/t4_hw.c b/sys/dev/cxgbe/common/t4_hw.c index cac1c9c..31e8668 100644 --- a/sys/dev/cxgbe/common/t4_hw.c +++ b/sys/dev/cxgbe/common/t4_hw.c @@ -5720,11 +5720,11 @@ int __devinit t4_port_init(struct port_info *p, int mbox, int pf, int vf) if (ret < 0) return ret; - p->viid = ret; + p->vi[0].viid = ret; p->tx_chan = j; p->rx_chan_map = get_mps_bg_map(adap, j); p->lport = j; - p->rss_size = rss_size; + p->vi[0].rss_size = rss_size; t4_os_set_hw_addr(adap, p->port_id, addr); ret = ntohl(c.u.info.lstatus_to_modtype); @@ -5737,13 +5737,13 @@ int __devinit t4_port_init(struct port_info *p, int mbox, int pf, int vf) param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_RSSINFO) | - V_FW_PARAMS_PARAM_YZ(p->viid); + V_FW_PARAMS_PARAM_YZ(p->vi[0].viid); ret = t4_query_params(adap, mbox, pf, vf, 1, ¶m, &val); if (ret) - p->rss_base = 0xffff; + p->vi[0].rss_base = 0xffff; else { /* MPASS((val >> 16) == rss_size); */ - p->rss_base = val & 0xffff; + p->vi[0].rss_base = val & 0xffff; } return 0; diff --git a/sys/dev/cxgbe/iw_cxgbe/provider.c b/sys/dev/cxgbe/iw_cxgbe/provider.c index 6b1dfa1..d7ce079 100644 --- a/sys/dev/cxgbe/iw_cxgbe/provider.c +++ b/sys/dev/cxgbe/iw_cxgbe/provider.c @@ -296,7 +296,7 @@ c4iw_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) if (port == 0 || port > sc->params.nports) return (-EINVAL); pi = sc->port[port - 1]; - memcpy(&gid->raw[0], pi->hw_addr, sizeof(pi->hw_addr)); + memcpy(&gid->raw[0], pi->vi[0].hw_addr, ETHER_ADDR_LEN); return (0); } @@ -309,7 +309,8 @@ c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *props) CTR3(KTR_IW_CXGBE, "%s ibdev %p, props %p", __func__, ibdev, props); memset(props, 0, sizeof *props); - memcpy(&props->sys_image_guid, sc->port[0]->hw_addr, 6); + memcpy(&props->sys_image_guid, sc->port[0]->vi[0].hw_addr, + ETHER_ADDR_LEN); props->hw_ver = sc->params.chipid; props->fw_ver = sc->params.fw_vers; props->device_cap_flags = dev->device_cap_flags; @@ -352,7 +353,7 @@ c4iw_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) if (port > sc->params.nports) return (-EINVAL); pi = sc->port[port - 1]; - ifp = pi->ifp; + ifp = pi->vi[0].ifp; memset(props, 0, sizeof(struct ib_port_attr)); props->max_mtu = IB_MTU_4096; @@ -397,7 +398,7 @@ c4iw_register_device(struct c4iw_dev *dev) BUG_ON(!sc->port[0]); strlcpy(ibdev->name, device_get_nameunit(sc->dev), sizeof(ibdev->name)); memset(&ibdev->node_guid, 0, sizeof(ibdev->node_guid)); - memcpy(&ibdev->node_guid, sc->port[0]->hw_addr, 6); + memcpy(&ibdev->node_guid, sc->port[0]->vi[0].hw_addr, ETHER_ADDR_LEN); ibdev->owner = THIS_MODULE; dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW; if (fastreg_support) diff --git a/sys/dev/cxgbe/offload.h b/sys/dev/cxgbe/offload.h index 2a12283..bebaad9 100644 --- a/sys/dev/cxgbe/offload.h +++ b/sys/dev/cxgbe/offload.h @@ -125,7 +125,6 @@ struct t4_virt_res { /* virtualized HW resources */ struct t4_range l2t; }; -#ifdef TCP_OFFLOAD enum { ULD_TOM = 0, ULD_IWARP, @@ -152,6 +151,7 @@ struct tom_tunables { int tx_align; }; +#ifdef TCP_OFFLOAD int t4_register_uld(struct uld_info *); int t4_unregister_uld(struct uld_info *); int t4_activate_uld(struct adapter *, int); diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c index 3dfb53f..392cd0c 100644 --- a/sys/dev/cxgbe/t4_main.c +++ b/sys/dev/cxgbe/t4_main.c @@ -57,6 +57,9 @@ __FBSDID("$FreeBSD$"); #include <net/if_types.h> #include <net/if_dl.h> #include <net/if_vlan_var.h> +#ifdef RSS +#include <net/rss_config.h> +#endif #if defined(__i386__) || defined(__amd64__) #include <vm/vm.h> #include <vm/pmap.h> @@ -104,6 +107,22 @@ static driver_t cxgbe_driver = { sizeof(struct port_info) }; +/* T4 VI (vcxgbe) interface */ +static int vcxgbe_probe(device_t); +static int vcxgbe_attach(device_t); +static int vcxgbe_detach(device_t); +static device_method_t vcxgbe_methods[] = { + DEVMETHOD(device_probe, vcxgbe_probe), + DEVMETHOD(device_attach, vcxgbe_attach), + DEVMETHOD(device_detach, vcxgbe_detach), + { 0, 0 } +}; +static driver_t vcxgbe_driver = { + "vcxgbe", + vcxgbe_methods, + sizeof(struct vi_info) +}; + static d_ioctl_t t4_ioctl; static d_open_t t4_open; static d_close_t t4_close; @@ -140,6 +159,13 @@ static driver_t cxl_driver = { sizeof(struct port_info) }; +/* T5 VI (vcxl) interface */ +static driver_t vcxl_driver = { + "vcxl", + vcxgbe_methods, + sizeof(struct vi_info) +}; + static struct cdevsw t5_cdevsw = { .d_version = D_VERSION, .d_flags = 0, @@ -200,6 +226,14 @@ TUNABLE_INT("hw.cxgbe.ntxq1g", &t4_ntxq1g); static int t4_nrxq1g = -1; TUNABLE_INT("hw.cxgbe.nrxq1g", &t4_nrxq1g); +#define NTXQ_VI 1 +static int t4_ntxq_vi = -1; +TUNABLE_INT("hw.cxgbe.ntxq_vi", &t4_ntxq_vi); + +#define NRXQ_VI 1 +static int t4_nrxq_vi = -1; +TUNABLE_INT("hw.cxgbe.nrxq_vi", &t4_nrxq_vi); + static int t4_rsrv_noflowq = 0; TUNABLE_INT("hw.cxgbe.rsrv_noflowq", &t4_rsrv_noflowq); @@ -219,24 +253,24 @@ TUNABLE_INT("hw.cxgbe.nofldtxq1g", &t4_nofldtxq1g); #define NOFLDRXQ_1G 1 static int t4_nofldrxq1g = -1; TUNABLE_INT("hw.cxgbe.nofldrxq1g", &t4_nofldrxq1g); -#endif -#ifdef DEV_NETMAP -#define NNMTXQ_10G 2 -static int t4_nnmtxq10g = -1; -TUNABLE_INT("hw.cxgbe.nnmtxq10g", &t4_nnmtxq10g); +#define NOFLDTXQ_VI 1 +static int t4_nofldtxq_vi = -1; +TUNABLE_INT("hw.cxgbe.nofldtxq_vi", &t4_nofldtxq_vi); -#define NNMRXQ_10G 2 -static int t4_nnmrxq10g = -1; -TUNABLE_INT("hw.cxgbe.nnmrxq10g", &t4_nnmrxq10g); +#define NOFLDRXQ_VI 1 +static int t4_nofldrxq_vi = -1; +TUNABLE_INT("hw.cxgbe.nofldrxq_vi", &t4_nofldrxq_vi); +#endif -#define NNMTXQ_1G 1 -static int t4_nnmtxq1g = -1; -TUNABLE_INT("hw.cxgbe.nnmtxq1g", &t4_nnmtxq1g); +#ifdef DEV_NETMAP +#define NNMTXQ_VI 2 +static int t4_nnmtxq_vi = -1; +TUNABLE_INT("hw.cxgbe.nnmtxq_vi", &t4_nnmtxq_vi); -#define NNMRXQ_1G 1 -static int t4_nnmrxq1g = -1; -TUNABLE_INT("hw.cxgbe.nnmrxq1g", &t4_nnmrxq1g); +#define NNMRXQ_VI 2 +static int t4_nnmrxq_vi = -1; +TUNABLE_INT("hw.cxgbe.nnmrxq_vi", &t4_nnmrxq_vi); #endif /* @@ -324,6 +358,19 @@ TUNABLE_INT("hw.cxgbe.fcoecaps_allowed", &t4_fcoecaps_allowed); static int t5_write_combine = 0; TUNABLE_INT("hw.cxl.write_combine", &t5_write_combine); +static int t4_num_vis = 1; +TUNABLE_INT("hw.cxgbe.num_vis", &t4_num_vis); + +/* Functions used by extra VIs to obtain unique MAC addresses for each VI. */ +static int vi_mac_funcs[] = { + FW_VI_FUNC_OFLD, + FW_VI_FUNC_IWARP, + FW_VI_FUNC_OPENISCSI, + FW_VI_FUNC_OPENFCOE, + FW_VI_FUNC_FOISCSI, + FW_VI_FUNC_FOFCOE, +}; + struct intrs_and_queues { uint16_t intr_type; /* INTx, MSI, or MSI-X */ uint16_t nirq; /* Total # of vectors */ @@ -334,18 +381,18 @@ struct intrs_and_queues { uint16_t ntxq1g; /* # of NIC txq's for each 1G port */ uint16_t nrxq1g; /* # of NIC rxq's for each 1G port */ uint16_t rsrv_noflowq; /* Flag whether to reserve queue 0 */ -#ifdef TCP_OFFLOAD uint16_t nofldtxq10g; /* # of TOE txq's for each 10G port */ uint16_t nofldrxq10g; /* # of TOE rxq's for each 10G port */ uint16_t nofldtxq1g; /* # of TOE txq's for each 1G port */ uint16_t nofldrxq1g; /* # of TOE rxq's for each 1G port */ -#endif -#ifdef DEV_NETMAP - uint16_t nnmtxq10g; /* # of netmap txq's for each 10G port */ - uint16_t nnmrxq10g; /* # of netmap rxq's for each 10G port */ - uint16_t nnmtxq1g; /* # of netmap txq's for each 1G port */ - uint16_t nnmrxq1g; /* # of netmap rxq's for each 1G port */ -#endif + + /* The vcxgbe/vcxl interfaces use these and not the ones above. */ + uint16_t ntxq_vi; /* # of NIC txq's */ + uint16_t nrxq_vi; /* # of NIC rxq's */ + uint16_t nofldtxq_vi; /* # of TOE txq's */ + uint16_t nofldrxq_vi; /* # of TOE rxq's */ + uint16_t nnmtxq_vi; /* # of netmap txq's */ + uint16_t nnmrxq_vi; /* # of netmap rxq's */ }; struct filter_entry { @@ -367,7 +414,7 @@ static int validate_mt_off_len(struct adapter *, int, uint32_t, int, uint32_t *); static void memwin_info(struct adapter *, int, uint32_t *, uint32_t *); static uint32_t position_memwin(struct adapter *, int, uint32_t); -static int cfg_itype_and_nqueues(struct adapter *, int, int, +static int cfg_itype_and_nqueues(struct adapter *, int, int, int, struct intrs_and_queues *); static int prep_firmware(struct adapter *); static int partition_resources(struct adapter *, const struct firmware *, @@ -377,8 +424,8 @@ static int get_params__post_init(struct adapter *); static int set_params__post_init(struct adapter *); static void t4_set_desc(struct adapter *); static void build_medialist(struct port_info *, struct ifmedia *); -static int cxgbe_init_synchronized(struct port_info *); -static int cxgbe_uninit_synchronized(struct port_info *); +static int cxgbe_init_synchronized(struct vi_info *); +static int cxgbe_uninit_synchronized(struct vi_info *); static int setup_intr_handlers(struct adapter *); static void quiesce_txq(struct adapter *, struct sge_txq *); static void quiesce_wrq(struct adapter *, struct sge_wrq *); @@ -390,6 +437,7 @@ static int t4_free_irq(struct adapter *, struct irq *); static void reg_block_dump(struct adapter *, uint8_t *, unsigned int, unsigned int); static void t4_get_regs(struct adapter *, struct t4_regdump *, uint8_t *); +static void vi_refresh_stats(struct adapter *, struct vi_info *); static void cxgbe_refresh_stats(struct adapter *, struct port_info *); static void cxgbe_tick(void *); static void cxgbe_vlan_config(void *, struct ifnet *, uint16_t); @@ -397,8 +445,8 @@ static int cpl_not_handled(struct sge_iq *, const struct rss_header *, struct mbuf *); static int an_not_handled(struct sge_iq *, const struct rsp_ctrl *); static int fw_msg_not_handled(struct adapter *, const __be64 *); -static int t4_sysctls(struct adapter *); -static int cxgbe_sysctls(struct port_info *); +static void t4_sysctls(struct adapter *); +static void cxgbe_sysctls(struct port_info *); static int sysctl_int_array(SYSCTL_HANDLER_ARGS); static int sysctl_bitfield(SYSCTL_HANDLER_ARGS); static int sysctl_btphy(SYSCTL_HANDLER_ARGS); @@ -456,7 +504,7 @@ static int read_i2c(struct adapter *, struct t4_i2c_data *); static int set_sched_class(struct adapter *, struct t4_sched_params *); static int set_sched_queue(struct adapter *, struct t4_sched_queue *); #ifdef TCP_OFFLOAD -static int toe_capability(struct port_info *, int); +static int toe_capability(struct vi_info *, int); #endif static int mod_event(module_t, int, void *); @@ -601,7 +649,7 @@ static int t4_attach(device_t dev) { struct adapter *sc; - int rc = 0, i, n10g, n1g, rqidx, tqidx; + int rc = 0, i, j, n10g, n1g, rqidx, tqidx; struct intrs_and_queues iaq; struct sge *s; #ifdef TCP_OFFLOAD @@ -610,6 +658,7 @@ t4_attach(device_t dev) #ifdef DEV_NETMAP int nm_rqidx, nm_tqidx; #endif + int num_vis; sc = device_get_softc(dev); sc->dev = dev; @@ -643,7 +692,7 @@ t4_attach(device_t dev) mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF); TAILQ_INIT(&sc->sfl); - callout_init(&sc->sfl_callout, CALLOUT_MPSAFE); + callout_init_mtx(&sc->sfl_callout, &sc->sfl_lock, 0); mtx_init(&sc->regwin_lock, "register and memory window", 0, MTX_DEF); @@ -728,6 +777,24 @@ t4_attach(device_t dev) goto done; /* error message displayed already */ /* + * Number of VIs to create per-port. The first VI is the "main" regular + * VI for the port. The rest are additional virtual interfaces on the + * same physical port. Note that the main VI does not have native + * netmap support but the extra VIs do. + * + * Limit the number of VIs per port to the number of available + * MAC addresses per port. + */ + if (t4_num_vis >= 1) + num_vis = t4_num_vis; + else + num_vis = 1; + if (num_vis > nitems(vi_mac_funcs)) { + num_vis = nitems(vi_mac_funcs); + device_printf(dev, "Number of VIs limited to %d\n", num_vis); + } + + /* * First pass over all the ports - allocate VIs and initialize some * basic parameters like mac address, port type, etc. We also figure * out whether a port is 10G or 1G and use that information when @@ -743,12 +810,22 @@ t4_attach(device_t dev) /* These must be set before t4_port_init */ pi->adapter = sc; pi->port_id = i; + /* + * XXX: vi[0] is special so we can't delay this allocation until + * pi->nvi's final value is known. + */ + pi->vi = malloc(sizeof(struct vi_info) * num_vis, M_CXGBE, + M_ZERO | M_WAITOK); - /* Allocate the vi and initialize parameters like mac addr */ + /* + * Allocate the "main" VI and initialize parameters + * like mac addr. + */ rc = -t4_port_init(pi, sc->mbox, sc->pf, 0); if (rc != 0) { device_printf(dev, "unable to initialize port %d: %d\n", i, rc); + free(pi->vi, M_CXGBE); free(pi, M_CXGBE); sc->port[i] = NULL; goto done; @@ -762,6 +839,7 @@ t4_attach(device_t dev) rc = -t4_link_start(sc, sc->mbox, pi->tx_chan, &pi->link_cfg); if (rc != 0) { device_printf(dev, "port %d l1cfg failed: %d\n", i, rc); + free(pi->vi, M_CXGBE); free(pi, M_CXGBE); sc->port[i] = NULL; goto done; @@ -774,20 +852,12 @@ t4_attach(device_t dev) if (is_10G_port(pi) || is_40G_port(pi)) { n10g++; - pi->tmr_idx = t4_tmr_idx_10g; - pi->pktc_idx = t4_pktc_idx_10g; } else { n1g++; - pi->tmr_idx = t4_tmr_idx_1g; - pi->pktc_idx = t4_pktc_idx_1g; } - pi->xact_addr_filt = -1; pi->linkdnrc = -1; - pi->qsize_rxq = t4_qsize_rxq; - pi->qsize_txq = t4_qsize_txq; - pi->dev = device_add_child(dev, is_t4(sc) ? "cxgbe" : "cxl", -1); if (pi->dev == NULL) { device_printf(dev, @@ -795,15 +865,18 @@ t4_attach(device_t dev) rc = ENXIO; goto done; } + pi->vi[0].dev = pi->dev; device_set_softc(pi->dev, pi); } /* * Interrupt type, # of interrupts, # of rx/tx queues, etc. */ - rc = cfg_itype_and_nqueues(sc, n10g, n1g, &iaq); + rc = cfg_itype_and_nqueues(sc, n10g, n1g, num_vis, &iaq); if (rc != 0) goto done; /* error message displayed already */ + if (iaq.nrxq_vi + iaq.nofldrxq_vi + iaq.nnmrxq_vi == 0) + num_vis = 1; sc->intr_type = iaq.intr_type; sc->intr_count = iaq.nirq; @@ -811,6 +884,10 @@ t4_attach(device_t dev) s = &sc->sge; s->nrxq = n10g * iaq.nrxq10g + n1g * iaq.nrxq1g; s->ntxq = n10g * iaq.ntxq10g + n1g * iaq.ntxq1g; + if (num_vis > 1) { + s->nrxq += (n10g + n1g) * (num_vis - 1) * iaq.nrxq_vi; + s->ntxq += (n10g + n1g) * (num_vis - 1) * iaq.ntxq_vi; + } s->neq = s->ntxq + s->nrxq; /* the free list in an rxq is an eq */ s->neq += sc->params.nports + 1;/* ctrl queues: 1 per port + 1 mgmt */ s->niq = s->nrxq + 1; /* 1 extra for firmware event queue */ @@ -818,6 +895,12 @@ t4_attach(device_t dev) if (is_offload(sc)) { s->nofldrxq = n10g * iaq.nofldrxq10g + n1g * iaq.nofldrxq1g; s->nofldtxq = n10g * iaq.nofldtxq10g + n1g * iaq.nofldtxq1g; + if (num_vis > 1) { + s->nofldrxq += (n10g + n1g) * (num_vis - 1) * + iaq.nofldrxq_vi; + s->nofldtxq += (n10g + n1g) * (num_vis - 1) * + iaq.nofldtxq_vi; + } s->neq += s->nofldtxq + s->nofldrxq; s->niq += s->nofldrxq; @@ -828,8 +911,10 @@ t4_attach(device_t dev) } #endif #ifdef DEV_NETMAP - s->nnmrxq = n10g * iaq.nnmrxq10g + n1g * iaq.nnmrxq1g; - s->nnmtxq = n10g * iaq.nnmtxq10g + n1g * iaq.nnmtxq1g; + if (num_vis > 1) { + s->nnmrxq = (n10g + n1g) * (num_vis - 1) * iaq.nnmrxq_vi; + s->nnmtxq = (n10g + n1g) * (num_vis - 1) * iaq.nnmtxq_vi; + } s->neq += s->nnmtxq + s->nnmrxq; s->niq += s->nnmrxq; @@ -868,57 +953,70 @@ t4_attach(device_t dev) #endif for_each_port(sc, i) { struct port_info *pi = sc->port[i]; + struct vi_info *vi; if (pi == NULL) continue; - pi->first_rxq = rqidx; - pi->first_txq = tqidx; - if (is_10G_port(pi) || is_40G_port(pi)) { - pi->flags |= iaq.intr_flags_10g; - pi->nrxq = iaq.nrxq10g; - pi->ntxq = iaq.ntxq10g; - } else { - pi->flags |= iaq.intr_flags_1g; - pi->nrxq = iaq.nrxq1g; - pi->ntxq = iaq.ntxq1g; - } + pi->nvi = num_vis; + for_each_vi(pi, j, vi) { + vi->pi = pi; + vi->qsize_rxq = t4_qsize_rxq; + vi->qsize_txq = t4_qsize_txq; - if (pi->ntxq > 1) - pi->rsrv_noflowq = iaq.rsrv_noflowq ? 1 : 0; - else - pi->rsrv_noflowq = 0; + vi->first_rxq = rqidx; + vi->first_txq = tqidx; + if (is_10G_port(pi) || is_40G_port(pi)) { + vi->tmr_idx = t4_tmr_idx_10g; + vi->pktc_idx = t4_pktc_idx_10g; + vi->flags |= iaq.intr_flags_10g & INTR_RXQ; + vi->nrxq = j == 0 ? iaq.nrxq10g : iaq.nrxq_vi; + vi->ntxq = j == 0 ? iaq.ntxq10g : iaq.ntxq_vi; + } else { + vi->tmr_idx = t4_tmr_idx_1g; + vi->pktc_idx = t4_pktc_idx_1g; + vi->flags |= iaq.intr_flags_1g & INTR_RXQ; + vi->nrxq = j == 0 ? iaq.nrxq1g : iaq.nrxq_vi; + vi->ntxq = j == 0 ? iaq.ntxq1g : iaq.ntxq_vi; + } + rqidx += vi->nrxq; + tqidx += vi->ntxq; + + if (j == 0 && vi->ntxq > 1) + vi->rsrv_noflowq = iaq.rsrv_noflowq ? 1 : 0; + else + vi->rsrv_noflowq = 0; - rqidx += pi->nrxq; - tqidx += pi->ntxq; #ifdef TCP_OFFLOAD - if (is_offload(sc)) { - pi->first_ofld_rxq = ofld_rqidx; - pi->first_ofld_txq = ofld_tqidx; + vi->first_ofld_rxq = ofld_rqidx; + vi->first_ofld_txq = ofld_tqidx; if (is_10G_port(pi) || is_40G_port(pi)) { - pi->nofldrxq = iaq.nofldrxq10g; - pi->nofldtxq = iaq.nofldtxq10g; + vi->flags |= iaq.intr_flags_10g & INTR_OFLD_RXQ; + vi->nofldrxq = j == 0 ? iaq.nofldrxq10g : + iaq.nofldrxq_vi; + vi->nofldtxq = j == 0 ? iaq.nofldtxq10g : + iaq.nofldtxq_vi; } else { - pi->nofldrxq = iaq.nofldrxq1g; - pi->nofldtxq = iaq.nofldtxq1g; + vi->flags |= iaq.intr_flags_1g & INTR_OFLD_RXQ; + vi->nofldrxq = j == 0 ? iaq.nofldrxq1g : + iaq.nofldrxq_vi; + vi->nofldtxq = j == 0 ? iaq.nofldtxq1g : + iaq.nofldtxq_vi; } - ofld_rqidx += pi->nofldrxq; - ofld_tqidx += pi->nofldtxq; - } + ofld_rqidx += vi->nofldrxq; + ofld_tqidx += vi->nofldtxq; #endif #ifdef DEV_NETMAP - pi->first_nm_rxq = nm_rqidx; - pi->first_nm_txq = nm_tqidx; - if (is_10G_port(pi) || is_40G_port(pi)) { - pi->nnmrxq = iaq.nnmrxq10g; - pi->nnmtxq = iaq.nnmtxq10g; - } else { - pi->nnmrxq = iaq.nnmrxq1g; - pi->nnmtxq = iaq.nnmtxq1g; - } - nm_rqidx += pi->nnmrxq; - nm_tqidx += pi->nnmtxq; + if (j > 0) { + vi->first_nm_rxq = nm_rqidx; + vi->first_nm_txq = nm_tqidx; + vi->nnmrxq = iaq.nnmrxq_vi; + vi->nnmtxq = iaq.nnmtxq_vi; + nm_rqidx += vi->nnmrxq; + nm_tqidx += vi->nnmtxq; + } #endif + } } rc = setup_intr_handlers(sc); @@ -993,11 +1091,12 @@ t4_detach(device_t dev) for (i = 0; i < MAX_NPORTS; i++) { pi = sc->port[i]; if (pi) { - t4_free_vi(sc, sc->mbox, sc->pf, 0, pi->viid); + t4_free_vi(sc, sc->mbox, sc->pf, 0, pi->vi[0].viid); if (pi->dev) device_delete_child(dev, pi->dev); mtx_destroy(&pi->pi_lock); + free(pi->vi, M_CXGBE); free(pi, M_CXGBE); } } @@ -1049,6 +1148,7 @@ t4_detach(device_t dev) mtx_destroy(&sc->sc_lock); } + callout_drain(&sc->sfl_callout); if (mtx_initialized(&sc->tids.ftid_lock)) mtx_destroy(&sc->tids.ftid_lock); if (mtx_initialized(&sc->sfl_lock)) @@ -1081,12 +1181,13 @@ cxgbe_probe(device_t dev) #define T4_CAP_ENABLE (T4_CAP) static int -cxgbe_attach(device_t dev) +cxgbe_vi_attach(device_t dev, struct vi_info *vi) { - struct port_info *pi = device_get_softc(dev); struct ifnet *ifp; - char *s; - int n, o; + struct sbuf *sb; + + vi->xact_addr_filt = -1; + callout_init(&vi->tick, 1); /* Allocate an ifnet and set it up */ ifp = if_alloc(IFT_ETHER); @@ -1094,10 +1195,8 @@ cxgbe_attach(device_t dev) device_printf(dev, "Cannot allocate ifnet\n"); return (ENOMEM); } - pi->ifp = ifp; - ifp->if_softc = pi; - - callout_init(&pi->tick, CALLOUT_MPSAFE); + vi->ifp = ifp; + ifp->if_softc = vi; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; @@ -1109,7 +1208,7 @@ cxgbe_attach(device_t dev) ifp->if_capabilities = T4_CAP; #ifdef TCP_OFFLOAD - if (is_offload(pi->adapter)) + if (vi->nofldrxq != 0) ifp->if_capabilities |= IFCAP_TOE; #endif ifp->if_capenable = T4_CAP_ENABLE; @@ -1120,99 +1219,120 @@ cxgbe_attach(device_t dev) ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS; ifp->if_hw_tsomaxsegsize = 65536; - /* Initialize ifmedia for this port */ - ifmedia_init(&pi->media, IFM_IMASK, cxgbe_media_change, + /* Initialize ifmedia for this VI */ + ifmedia_init(&vi->media, IFM_IMASK, cxgbe_media_change, cxgbe_media_status); - build_medialist(pi, &pi->media); + build_medialist(vi->pi, &vi->media); - pi->vlan_c = EVENTHANDLER_REGISTER(vlan_config, cxgbe_vlan_config, ifp, + vi->vlan_c = EVENTHANDLER_REGISTER(vlan_config, cxgbe_vlan_config, ifp, EVENTHANDLER_PRI_ANY); - ether_ifattach(ifp, pi->hw_addr); - - n = 128; - s = malloc(n, M_CXGBE, M_WAITOK); - o = snprintf(s, n, "%d txq, %d rxq (NIC)", pi->ntxq, pi->nrxq); - MPASS(n > o); + ether_ifattach(ifp, vi->hw_addr); +#ifdef DEV_NETMAP + if (vi->nnmrxq != 0) + cxgbe_nm_attach(vi); +#endif + sb = sbuf_new_auto(); + sbuf_printf(sb, "%d txq, %d rxq (NIC)", vi->ntxq, vi->nrxq); #ifdef TCP_OFFLOAD - if (is_offload(pi->adapter)) { - o += snprintf(s + o, n - o, "; %d txq, %d rxq (TOE)", - pi->nofldtxq, pi->nofldrxq); - MPASS(n > o); - } + if (ifp->if_capabilities & IFCAP_TOE) + sbuf_printf(sb, "; %d txq, %d rxq (TOE)", + vi->nofldtxq, vi->nofldrxq); #endif #ifdef DEV_NETMAP - o += snprintf(s + o, n - o, "; %d txq, %d rxq (netmap)", pi->nnmtxq, - pi->nnmrxq); - MPASS(n > o); + if (ifp->if_capabilities & IFCAP_NETMAP) + sbuf_printf(sb, "; %d txq, %d rxq (netmap)", + vi->nnmtxq, vi->nnmrxq); #endif - device_printf(dev, "%s\n", s); - free(s, M_CXGBE); + sbuf_finish(sb); + device_printf(dev, "%s\n", sbuf_data(sb)); + sbuf_delete(sb); + + vi_sysctls(vi); + + return (0); +} + +static int +cxgbe_attach(device_t dev) +{ + struct port_info *pi = device_get_softc(dev); + struct vi_info *vi; + int i, rc; + + callout_init_mtx(&pi->tick, &pi->pi_lock, 0); + + rc = cxgbe_vi_attach(dev, &pi->vi[0]); + if (rc) + return (rc); + + for_each_vi(pi, i, vi) { + if (i == 0) + continue; + vi->dev = device_add_child(dev, is_t4(pi->adapter) ? + "vcxgbe" : "vcxl", -1); + if (vi->dev == NULL) { + device_printf(dev, "failed to add VI %d\n", i); + continue; + } + device_set_softc(vi->dev, vi); + } -#ifdef DEV_NETMAP - /* nm_media handled here to keep implementation private to this file */ - ifmedia_init(&pi->nm_media, IFM_IMASK, cxgbe_media_change, - cxgbe_media_status); - build_medialist(pi, &pi->nm_media); - create_netmap_ifnet(pi); /* logs errors it something fails */ -#endif cxgbe_sysctls(pi); + bus_generic_attach(dev); + return (0); } +static void +cxgbe_vi_detach(struct vi_info *vi) +{ + struct ifnet *ifp = vi->ifp; + + ether_ifdetach(ifp); + + if (vi->vlan_c) + EVENTHANDLER_DEREGISTER(vlan_config, vi->vlan_c); + + /* Let detach proceed even if these fail. */ +#ifdef DEV_NETMAP + if (ifp->if_capabilities & IFCAP_NETMAP) + cxgbe_nm_detach(vi); +#endif + cxgbe_uninit_synchronized(vi); + callout_drain(&vi->tick); + vi_full_uninit(vi); + + ifmedia_removeall(&vi->media); + if_free(vi->ifp); + vi->ifp = NULL; +} + static int cxgbe_detach(device_t dev) { struct port_info *pi = device_get_softc(dev); struct adapter *sc = pi->adapter; - struct ifnet *ifp = pi->ifp; + int rc; - /* Tell if_ioctl and if_init that the port is going away */ - ADAPTER_LOCK(sc); - SET_DOOMED(pi); - wakeup(&sc->flags); - while (IS_BUSY(sc)) - mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0); - SET_BUSY(sc); -#ifdef INVARIANTS - sc->last_op = "t4detach"; - sc->last_op_thr = curthread; - sc->last_op_flags = 0; -#endif - ADAPTER_UNLOCK(sc); + /* Detach the extra VIs first. */ + rc = bus_generic_detach(dev); + if (rc) + return (rc); + device_delete_children(dev); + + doom_vi(sc, &pi->vi[0]); if (pi->flags & HAS_TRACEQ) { sc->traceq = -1; /* cloner should not create ifnet */ t4_tracer_port_detach(sc); } - if (pi->vlan_c) - EVENTHANDLER_DEREGISTER(vlan_config, pi->vlan_c); - - PORT_LOCK(pi); - ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - callout_stop(&pi->tick); - PORT_UNLOCK(pi); + cxgbe_vi_detach(&pi->vi[0]); callout_drain(&pi->tick); - /* Let detach proceed even if these fail. */ - cxgbe_uninit_synchronized(pi); - port_full_uninit(pi); - - ifmedia_removeall(&pi->media); - ether_ifdetach(pi->ifp); - if_free(pi->ifp); - -#ifdef DEV_NETMAP - /* XXXNM: equivalent of cxgbe_uninit_synchronized to ifdown nm_ifp */ - destroy_netmap_ifnet(pi); -#endif - - ADAPTER_LOCK(sc); - CLR_BUSY(sc); - wakeup(&sc->flags); - ADAPTER_UNLOCK(sc); + end_synchronized_op(sc, 0); return (0); } @@ -1220,12 +1340,12 @@ cxgbe_detach(device_t dev) static void cxgbe_init(void *arg) { - struct port_info *pi = arg; - struct adapter *sc = pi->adapter; + struct vi_info *vi = arg; + struct adapter *sc = vi->pi->adapter; - if (begin_synchronized_op(sc, pi, SLEEP_OK | INTR_OK, "t4init") != 0) + if (begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4init") != 0) return; - cxgbe_init_synchronized(pi); + cxgbe_init_synchronized(vi); end_synchronized_op(sc, 0); } @@ -1233,8 +1353,8 @@ static int cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data) { int rc = 0, mtu, flags, can_sleep; - struct port_info *pi = ifp->if_softc; - struct adapter *sc = pi->adapter; + struct vi_info *vi = ifp->if_softc; + struct adapter *sc = vi->pi->adapter; struct ifreq *ifr = (struct ifreq *)data; uint32_t mask; @@ -1244,11 +1364,11 @@ cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data) if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) return (EINVAL); - rc = begin_synchronized_op(sc, pi, SLEEP_OK | INTR_OK, "t4mtu"); + rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4mtu"); if (rc) return (rc); ifp->if_mtu = mtu; - if (pi->flags & PORT_INIT_DONE) { + if (vi->flags & VI_INIT_DONE) { t4_update_fl_bufsize(ifp); if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = update_mac_settings(ifp, XGMAC_MTU); @@ -1259,14 +1379,14 @@ cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data) case SIOCSIFFLAGS: can_sleep = 0; redo_sifflags: - rc = begin_synchronized_op(sc, pi, + rc = begin_synchronized_op(sc, vi, can_sleep ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4flg"); if (rc) return (rc); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - flags = pi->if_flags; + flags = vi->if_flags; if ((ifp->if_flags ^ flags) & (IFF_PROMISC | IFF_ALLMULTI)) { if (can_sleep == 1) { @@ -1283,23 +1403,23 @@ redo_sifflags: can_sleep = 1; goto redo_sifflags; } - rc = cxgbe_init_synchronized(pi); + rc = cxgbe_init_synchronized(vi); } - pi->if_flags = ifp->if_flags; + vi->if_flags = ifp->if_flags; } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if (can_sleep == 0) { end_synchronized_op(sc, LOCK_HELD); can_sleep = 1; goto redo_sifflags; } - rc = cxgbe_uninit_synchronized(pi); + rc = cxgbe_uninit_synchronized(vi); } end_synchronized_op(sc, can_sleep ? 0 : LOCK_HELD); break; case SIOCADDMULTI: case SIOCDELMULTI: /* these two are called with a mutex held :-( */ - rc = begin_synchronized_op(sc, pi, HOLD_LOCK, "t4multi"); + rc = begin_synchronized_op(sc, vi, HOLD_LOCK, "t4multi"); if (rc) return (rc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) @@ -1308,7 +1428,7 @@ redo_sifflags: break; case SIOCSIFCAP: - rc = begin_synchronized_op(sc, pi, SLEEP_OK | INTR_OK, "t4cap"); + rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4cap"); if (rc) return (rc); @@ -1370,7 +1490,7 @@ redo_sifflags: struct sge_rxq *rxq; ifp->if_capenable ^= IFCAP_LRO; - for_each_rxq(pi, i, rxq) { + for_each_rxq(vi, i, rxq) { if (ifp->if_capenable & IFCAP_LRO) rxq->iq.flags |= IQ_LRO_ENABLED; else @@ -1382,7 +1502,7 @@ redo_sifflags: if (mask & IFCAP_TOE) { int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE; - rc = toe_capability(pi, enable); + rc = toe_capability(vi, enable); if (rc != 0) goto fail; @@ -1413,7 +1533,7 @@ fail: case SIOCSIFMEDIA: case SIOCGIFMEDIA: - ifmedia_ioctl(ifp, ifr, &pi->media, cmd); + ifmedia_ioctl(ifp, ifr, &vi->media, cmd); break; case SIOCGI2C: { @@ -1430,10 +1550,10 @@ fail: rc = EINVAL; break; } - rc = begin_synchronized_op(sc, pi, SLEEP_OK | INTR_OK, "t4i2c"); + rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4i2c"); if (rc) return (rc); - rc = -t4_i2c_rd(sc, sc->mbox, pi->port_id, i2c.dev_addr, + rc = -t4_i2c_rd(sc, sc->mbox, vi->pi->port_id, i2c.dev_addr, i2c.offset, i2c.len, &i2c.data[0]); end_synchronized_op(sc, 0); if (rc == 0) @@ -1451,7 +1571,8 @@ fail: static int cxgbe_transmit(struct ifnet *ifp, struct mbuf *m) { - struct port_info *pi = ifp->if_softc; + struct vi_info *vi = ifp->if_softc; + struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct sge_txq *txq; void *items[1]; @@ -1473,10 +1594,10 @@ cxgbe_transmit(struct ifnet *ifp, struct mbuf *m) } /* Select a txq. */ - txq = &sc->sge.txq[pi->first_txq]; + txq = &sc->sge.txq[vi->first_txq]; if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) - txq += ((m->m_pkthdr.flowid % (pi->ntxq - pi->rsrv_noflowq)) + - pi->rsrv_noflowq); + txq += ((m->m_pkthdr.flowid % (vi->ntxq - vi->rsrv_noflowq)) + + vi->rsrv_noflowq); items[0] = m; rc = mp_ring_enqueue(txq->r, items, 1, 4096); @@ -1489,13 +1610,13 @@ cxgbe_transmit(struct ifnet *ifp, struct mbuf *m) static void cxgbe_qflush(struct ifnet *ifp) { - struct port_info *pi = ifp->if_softc; + struct vi_info *vi = ifp->if_softc; struct sge_txq *txq; int i; - /* queues do not exist if !PORT_INIT_DONE. */ - if (pi->flags & PORT_INIT_DONE) { - for_each_txq(pi, i, txq) { + /* queues do not exist if !VI_INIT_DONE. */ + if (vi->flags & VI_INIT_DONE) { + for_each_txq(vi, i, txq) { TXQ_LOCK(txq); txq->eq.flags &= ~EQ_ENABLED; TXQ_UNLOCK(txq); @@ -1511,9 +1632,9 @@ cxgbe_qflush(struct ifnet *ifp) static int cxgbe_media_change(struct ifnet *ifp) { - struct port_info *pi = ifp->if_softc; + struct vi_info *vi = ifp->if_softc; - device_printf(pi->dev, "%s unimplemented.\n", __func__); + device_printf(vi->dev, "%s unimplemented.\n", __func__); return (EOPNOTSUPP); } @@ -1521,24 +1642,12 @@ cxgbe_media_change(struct ifnet *ifp) static void cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { - struct port_info *pi = ifp->if_softc; - struct ifmedia *media = NULL; + struct vi_info *vi = ifp->if_softc; + struct port_info *pi = vi->pi; struct ifmedia_entry *cur; int speed = pi->link_cfg.speed; -#ifdef INVARIANTS - int data = (pi->port_type << 8) | pi->mod_type; -#endif - if (ifp == pi->ifp) - media = &pi->media; -#ifdef DEV_NETMAP - else if (ifp == pi->nm_ifp) - media = &pi->nm_media; -#endif - MPASS(media != NULL); - - cur = media->ifm_cur; - MPASS(cur->ifm_data == data); + cur = vi->media.ifm_cur; ifmr->ifm_status = IFM_AVALID; if (!pi->link_cfg.link_ok) @@ -1564,6 +1673,84 @@ cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) speed)); } +static int +vcxgbe_probe(device_t dev) +{ + char buf[128]; + struct vi_info *vi = device_get_softc(dev); + + snprintf(buf, sizeof(buf), "port %d vi %td", vi->pi->port_id, + vi - vi->pi->vi); + device_set_desc_copy(dev, buf); + + return (BUS_PROBE_DEFAULT); +} + +static int +vcxgbe_attach(device_t dev) +{ + struct vi_info *vi; + struct port_info *pi; + struct adapter *sc; + int func, index, rc; + u32 param, val; + + vi = device_get_softc(dev); + pi = vi->pi; + sc = pi->adapter; + + index = vi - pi->vi; + KASSERT(index < nitems(vi_mac_funcs), + ("%s: VI %s doesn't have a MAC func", __func__, + device_get_nameunit(dev))); + func = vi_mac_funcs[index]; + rc = t4_alloc_vi_func(sc, sc->mbox, pi->tx_chan, sc->pf, 0, 1, + vi->hw_addr, &vi->rss_size, func, 0); + if (rc < 0) { + device_printf(dev, "Failed to allocate virtual interface " + "for port %d: %d\n", pi->port_id, -rc); + return (-rc); + } + vi->viid = rc; + + param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | + V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_RSSINFO) | + V_FW_PARAMS_PARAM_YZ(vi->viid); + rc = t4_query_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); + if (rc) + vi->rss_base = 0xffff; + else { + /* MPASS((val >> 16) == rss_size); */ + vi->rss_base = val & 0xffff; + } + + rc = cxgbe_vi_attach(dev, vi); + if (rc) { + t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid); + return (rc); + } + return (0); +} + +static int +vcxgbe_detach(device_t dev) +{ + struct vi_info *vi; + struct adapter *sc; + + vi = device_get_softc(dev); + sc = vi->pi->adapter; + + doom_vi(sc, vi); + + cxgbe_vi_detach(vi); + t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid); + + end_synchronized_op(sc, 0); + + return (0); +} + void t4_fatal_err(struct adapter *sc) { @@ -1885,33 +2072,34 @@ position_memwin(struct adapter *sc, int n, uint32_t addr) } static int -cfg_itype_and_nqueues(struct adapter *sc, int n10g, int n1g, +cfg_itype_and_nqueues(struct adapter *sc, int n10g, int n1g, int num_vis, struct intrs_and_queues *iaq) { int rc, itype, navail, nrxq10g, nrxq1g, n; int nofldrxq10g = 0, nofldrxq1g = 0; - int nnmrxq10g = 0, nnmrxq1g = 0; bzero(iaq, sizeof(*iaq)); iaq->ntxq10g = t4_ntxq10g; iaq->ntxq1g = t4_ntxq1g; + iaq->ntxq_vi = t4_ntxq_vi; iaq->nrxq10g = nrxq10g = t4_nrxq10g; iaq->nrxq1g = nrxq1g = t4_nrxq1g; + iaq->nrxq_vi = t4_nrxq_vi; iaq->rsrv_noflowq = t4_rsrv_noflowq; #ifdef TCP_OFFLOAD if (is_offload(sc)) { iaq->nofldtxq10g = t4_nofldtxq10g; iaq->nofldtxq1g = t4_nofldtxq1g; + iaq->nofldtxq_vi = t4_nofldtxq_vi; iaq->nofldrxq10g = nofldrxq10g = t4_nofldrxq10g; iaq->nofldrxq1g = nofldrxq1g = t4_nofldrxq1g; + iaq->nofldrxq_vi = t4_nofldrxq_vi; } #endif #ifdef DEV_NETMAP - iaq->nnmtxq10g = t4_nnmtxq10g; - iaq->nnmtxq1g = t4_nnmtxq1g; - iaq->nnmrxq10g = nnmrxq10g = t4_nnmrxq10g; - iaq->nnmrxq1g = nnmrxq1g = t4_nnmrxq1g; + iaq->nnmtxq_vi = t4_nnmtxq_vi; + iaq->nnmrxq_vi = t4_nnmrxq_vi; #endif for (itype = INTR_MSIX; itype; itype >>= 1) { @@ -1935,12 +2123,17 @@ restart: /* * Best option: an interrupt vector for errors, one for the - * firmware event queue, and one for every rxq (NIC, TOE, and - * netmap). + * firmware event queue, and one for every rxq (NIC and TOE) of + * every VI. The VIs that support netmap use the same + * interrupts for the NIC rx queues and the netmap rx queues + * because only one set of queues is active at a time. */ iaq->nirq = T4_EXTRA_INTR; - iaq->nirq += n10g * (nrxq10g + nofldrxq10g + nnmrxq10g); - iaq->nirq += n1g * (nrxq1g + nofldrxq1g + nnmrxq1g); + iaq->nirq += n10g * (nrxq10g + nofldrxq10g); + iaq->nirq += n1g * (nrxq1g + nofldrxq1g); + iaq->nirq += (n10g + n1g) * (num_vis - 1) * + max(iaq->nrxq_vi, iaq->nnmrxq_vi); /* See comment above. */ + iaq->nirq += (n10g + n1g) * (num_vis - 1) * iaq->nofldrxq_vi; if (iaq->nirq <= navail && (itype != INTR_MSI || powerof2(iaq->nirq))) { iaq->intr_flags_10g = INTR_ALL; @@ -1948,41 +2141,44 @@ restart: goto allocate; } + /* Disable the VIs (and netmap) if there aren't enough intrs */ + if (num_vis > 1) { + device_printf(sc->dev, "virtual interfaces disabled " + "because num_vis=%u with current settings " + "(nrxq10g=%u, nrxq1g=%u, nofldrxq10g=%u, " + "nofldrxq1g=%u, nrxq_vi=%u nofldrxq_vi=%u, " + "nnmrxq_vi=%u) would need %u interrupts but " + "only %u are available.\n", num_vis, nrxq10g, + nrxq1g, nofldrxq10g, nofldrxq1g, iaq->nrxq_vi, + iaq->nofldrxq_vi, iaq->nnmrxq_vi, iaq->nirq, + navail); + num_vis = 1; + iaq->ntxq_vi = iaq->nrxq_vi = 0; + iaq->nofldtxq_vi = iaq->nofldrxq_vi = 0; + iaq->nnmtxq_vi = iaq->nnmrxq_vi = 0; + goto restart; + } + /* * Second best option: a vector for errors, one for the firmware * event queue, and vectors for either all the NIC rx queues or * all the TOE rx queues. The queues that don't get vectors * will forward their interrupts to those that do. - * - * Note: netmap rx queues cannot be created early and so they - * can't be setup to receive forwarded interrupts for others. */ iaq->nirq = T4_EXTRA_INTR; if (nrxq10g >= nofldrxq10g) { iaq->intr_flags_10g = INTR_RXQ; iaq->nirq += n10g * nrxq10g; -#ifdef DEV_NETMAP - iaq->nnmrxq10g = min(nnmrxq10g, nrxq10g); -#endif } else { iaq->intr_flags_10g = INTR_OFLD_RXQ; iaq->nirq += n10g * nofldrxq10g; -#ifdef DEV_NETMAP - iaq->nnmrxq10g = min(nnmrxq10g, nofldrxq10g); -#endif } if (nrxq1g >= nofldrxq1g) { iaq->intr_flags_1g = INTR_RXQ; iaq->nirq += n1g * nrxq1g; -#ifdef DEV_NETMAP - iaq->nnmrxq1g = min(nnmrxq1g, nrxq1g); -#endif } else { iaq->intr_flags_1g = INTR_OFLD_RXQ; iaq->nirq += n1g * nofldrxq1g; -#ifdef DEV_NETMAP - iaq->nnmrxq1g = min(nnmrxq1g, nofldrxq1g); -#endif } if (iaq->nirq <= navail && (itype != INTR_MSI || powerof2(iaq->nirq))) @@ -1990,9 +2186,9 @@ restart: /* * Next best option: an interrupt vector for errors, one for the - * firmware event queue, and at least one per port. At this - * point we know we'll have to downsize nrxq and/or nofldrxq - * and/or nnmrxq to fit what's available to us. + * firmware event queue, and at least one per main-VI. At this + * point we know we'll have to downsize nrxq and/or nofldrxq to + * fit what's available to us. */ iaq->nirq = T4_EXTRA_INTR; iaq->nirq += n10g + n1g; @@ -2015,9 +2211,6 @@ restart: #ifdef TCP_OFFLOAD iaq->nofldrxq10g = min(n, nofldrxq10g); #endif -#ifdef DEV_NETMAP - iaq->nnmrxq10g = min(n, nnmrxq10g); -#endif } if (n1g > 0) { @@ -2036,9 +2229,6 @@ restart: #ifdef TCP_OFFLOAD iaq->nofldrxq1g = min(n, nofldrxq1g); #endif -#ifdef DEV_NETMAP - iaq->nnmrxq1g = min(n, nnmrxq1g); -#endif } if (itype != INTR_MSI || powerof2(iaq->nirq)) @@ -2054,10 +2244,6 @@ restart: if (is_offload(sc)) iaq->nofldrxq10g = iaq->nofldrxq1g = 1; #endif -#ifdef DEV_NETMAP - iaq->nnmrxq10g = iaq->nnmrxq1g = 1; -#endif - allocate: navail = iaq->nirq; rc = 0; @@ -2838,30 +3024,29 @@ t4_set_desc(struct adapter *sc) static void build_medialist(struct port_info *pi, struct ifmedia *media) { - int data, m; + int m; PORT_LOCK(pi); ifmedia_removeall(media); m = IFM_ETHER | IFM_FDX; - data = (pi->port_type << 8) | pi->mod_type; switch(pi->port_type) { case FW_PORT_TYPE_BT_XFI: case FW_PORT_TYPE_BT_XAUI: - ifmedia_add(media, m | IFM_10G_T, data, NULL); + ifmedia_add(media, m | IFM_10G_T, 0, NULL); /* fall through */ case FW_PORT_TYPE_BT_SGMII: - ifmedia_add(media, m | IFM_1000_T, data, NULL); - ifmedia_add(media, m | IFM_100_TX, data, NULL); - ifmedia_add(media, IFM_ETHER | IFM_AUTO, data, NULL); + ifmedia_add(media, m | IFM_1000_T, 0, NULL); + ifmedia_add(media, m | IFM_100_TX, 0, NULL); + ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(media, IFM_ETHER | IFM_AUTO); break; case FW_PORT_TYPE_CX4: - ifmedia_add(media, m | IFM_10G_CX4, data, NULL); + ifmedia_add(media, m | IFM_10G_CX4, 0, NULL); ifmedia_set(media, m | IFM_10G_CX4); break; @@ -2872,29 +3057,29 @@ build_medialist(struct port_info *pi, struct ifmedia *media) switch (pi->mod_type) { case FW_PORT_MOD_TYPE_LR: - ifmedia_add(media, m | IFM_10G_LR, data, NULL); + ifmedia_add(media, m | IFM_10G_LR, 0, NULL); ifmedia_set(media, m | IFM_10G_LR); break; case FW_PORT_MOD_TYPE_SR: - ifmedia_add(media, m | IFM_10G_SR, data, NULL); + ifmedia_add(media, m | IFM_10G_SR, 0, NULL); ifmedia_set(media, m | IFM_10G_SR); break; case FW_PORT_MOD_TYPE_LRM: - ifmedia_add(media, m | IFM_10G_LRM, data, NULL); + ifmedia_add(media, m | IFM_10G_LRM, 0, NULL); ifmedia_set(media, m | IFM_10G_LRM); break; case FW_PORT_MOD_TYPE_TWINAX_PASSIVE: case FW_PORT_MOD_TYPE_TWINAX_ACTIVE: - ifmedia_add(media, m | IFM_10G_TWINAX, data, NULL); + ifmedia_add(media, m | IFM_10G_TWINAX, 0, NULL); ifmedia_set(media, m | IFM_10G_TWINAX); break; case FW_PORT_MOD_TYPE_NONE: m &= ~IFM_FDX; - ifmedia_add(media, m | IFM_NONE, data, NULL); + ifmedia_add(media, m | IFM_NONE, 0, NULL); ifmedia_set(media, m | IFM_NONE); break; @@ -2904,7 +3089,7 @@ build_medialist(struct port_info *pi, struct ifmedia *media) device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); - ifmedia_add(media, m | IFM_UNKNOWN, data, NULL); + ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } @@ -2914,24 +3099,24 @@ build_medialist(struct port_info *pi, struct ifmedia *media) switch (pi->mod_type) { case FW_PORT_MOD_TYPE_LR: - ifmedia_add(media, m | IFM_40G_LR4, data, NULL); + ifmedia_add(media, m | IFM_40G_LR4, 0, NULL); ifmedia_set(media, m | IFM_40G_LR4); break; case FW_PORT_MOD_TYPE_SR: - ifmedia_add(media, m | IFM_40G_SR4, data, NULL); + ifmedia_add(media, m | IFM_40G_SR4, 0, NULL); ifmedia_set(media, m | IFM_40G_SR4); break; case FW_PORT_MOD_TYPE_TWINAX_PASSIVE: case FW_PORT_MOD_TYPE_TWINAX_ACTIVE: - ifmedia_add(media, m | IFM_40G_CR4, data, NULL); + ifmedia_add(media, m | IFM_40G_CR4, 0, NULL); ifmedia_set(media, m | IFM_40G_CR4); break; case FW_PORT_MOD_TYPE_NONE: m &= ~IFM_FDX; - ifmedia_add(media, m | IFM_NONE, data, NULL); + ifmedia_add(media, m | IFM_NONE, 0, NULL); ifmedia_set(media, m | IFM_NONE); break; @@ -2939,7 +3124,7 @@ build_medialist(struct port_info *pi, struct ifmedia *media) device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); - ifmedia_add(media, m | IFM_UNKNOWN, data, NULL); + ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } @@ -2949,7 +3134,7 @@ build_medialist(struct port_info *pi, struct ifmedia *media) device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); - ifmedia_add(media, m | IFM_UNKNOWN, data, NULL); + ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } @@ -2967,25 +3152,14 @@ int update_mac_settings(struct ifnet *ifp, int flags) { int rc = 0; - struct port_info *pi = ifp->if_softc; + struct vi_info *vi = ifp->if_softc; + struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1; - uint16_t viid = 0xffff; - int16_t *xact_addr_filt = NULL; ASSERT_SYNCHRONIZED_OP(sc); KASSERT(flags, ("%s: not told what to update.", __func__)); - if (ifp == pi->ifp) { - viid = pi->viid; - xact_addr_filt = &pi->xact_addr_filt; - } -#ifdef DEV_NETMAP - else if (ifp == pi->nm_ifp) { - viid = pi->nm_viid; - xact_addr_filt = &pi->nm_xact_addr_filt; - } -#endif if (flags & XGMAC_MTU) mtu = ifp->if_mtu; @@ -2999,8 +3173,8 @@ update_mac_settings(struct ifnet *ifp, int flags) vlanex = ifp->if_capenable & IFCAP_VLAN_HWTAGGING ? 1 : 0; if (flags & (XGMAC_MTU|XGMAC_PROMISC|XGMAC_ALLMULTI|XGMAC_VLANEX)) { - rc = -t4_set_rxmode(sc, sc->mbox, viid, mtu, promisc, allmulti, - 1, vlanex, false); + rc = -t4_set_rxmode(sc, sc->mbox, vi->viid, mtu, promisc, + allmulti, 1, vlanex, false); if (rc) { if_printf(ifp, "set_rxmode (%x) failed: %d\n", flags, rc); @@ -3012,14 +3186,14 @@ update_mac_settings(struct ifnet *ifp, int flags) uint8_t ucaddr[ETHER_ADDR_LEN]; bcopy(IF_LLADDR(ifp), ucaddr, sizeof(ucaddr)); - rc = t4_change_mac(sc, sc->mbox, viid, *xact_addr_filt, ucaddr, - true, true); + rc = t4_change_mac(sc, sc->mbox, vi->viid, vi->xact_addr_filt, + ucaddr, true, true); if (rc < 0) { rc = -rc; if_printf(ifp, "change_mac failed: %d\n", rc); return (rc); } else { - *xact_addr_filt = rc; + vi->xact_addr_filt = rc; rc = 0; } } @@ -3041,8 +3215,8 @@ update_mac_settings(struct ifnet *ifp, int flags) i++; if (i == FW_MAC_EXACT_CHUNK) { - rc = t4_alloc_mac_filt(sc, sc->mbox, viid, del, - i, mcaddr, NULL, &hash, 0); + rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, + del, i, mcaddr, NULL, &hash, 0); if (rc < 0) { rc = -rc; for (j = 0; j < i; j++) { @@ -3062,7 +3236,7 @@ update_mac_settings(struct ifnet *ifp, int flags) } } if (i > 0) { - rc = t4_alloc_mac_filt(sc, sc->mbox, viid, del, i, + rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, del, i, mcaddr, NULL, &hash, 0); if (rc < 0) { rc = -rc; @@ -3080,7 +3254,7 @@ update_mac_settings(struct ifnet *ifp, int flags) } } - rc = -t4_set_addr_hash(sc, sc->mbox, viid, 0, hash, 0); + rc = -t4_set_addr_hash(sc, sc->mbox, vi->viid, 0, hash, 0); if (rc != 0) if_printf(ifp, "failed to set mc address hash: %d", rc); mcfail: @@ -3094,7 +3268,7 @@ mcfail: * {begin|end}_synchronized_op must be called from the same thread. */ int -begin_synchronized_op(struct adapter *sc, struct port_info *pi, int flags, +begin_synchronized_op(struct adapter *sc, struct vi_info *vi, int flags, char *wmesg) { int rc, pri; @@ -3102,7 +3276,8 @@ begin_synchronized_op(struct adapter *sc, struct port_info *pi, int flags, #ifdef WITNESS /* the caller thinks it's ok to sleep, but is it really? */ if (flags & SLEEP_OK) - pause("t4slptst", 1); + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, + "begin_synchronized_op"); #endif if (INTR_OK) @@ -3113,7 +3288,7 @@ begin_synchronized_op(struct adapter *sc, struct port_info *pi, int flags, ADAPTER_LOCK(sc); for (;;) { - if (pi && IS_DOOMED(pi)) { + if (vi && IS_DOOMED(vi)) { rc = ENXIO; goto done; } @@ -3150,6 +3325,29 @@ done: } /* + * Tell if_ioctl and if_init that the VI is going away. This is + * special variant of begin_synchronized_op and must be paired with a + * call to end_synchronized_op. + */ +void +doom_vi(struct adapter *sc, struct vi_info *vi) +{ + + ADAPTER_LOCK(sc); + SET_DOOMED(vi); + wakeup(&sc->flags); + while (IS_BUSY(sc)) + mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0); + SET_BUSY(sc); +#ifdef INVARIANTS + sc->last_op = "t4detach"; + sc->last_op_thr = curthread; + sc->last_op_flags = 0; +#endif + ADAPTER_UNLOCK(sc); +} + +/* * {begin|end}_synchronized_op must be called from the same thread. */ void @@ -3168,34 +3366,32 @@ end_synchronized_op(struct adapter *sc, int flags) } static int -cxgbe_init_synchronized(struct port_info *pi) +cxgbe_init_synchronized(struct vi_info *vi) { + struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; - struct ifnet *ifp = pi->ifp; + struct ifnet *ifp = vi->ifp; int rc = 0, i; struct sge_txq *txq; ASSERT_SYNCHRONIZED_OP(sc); - if (isset(&sc->open_device_map, pi->port_id)) { - KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING, - ("mismatch between open_device_map and if_drv_flags")); + if (ifp->if_drv_flags & IFF_DRV_RUNNING) return (0); /* already running */ - } if (!(sc->flags & FULL_INIT_DONE) && ((rc = adapter_full_init(sc)) != 0)) return (rc); /* error message displayed already */ - if (!(pi->flags & PORT_INIT_DONE) && - ((rc = port_full_init(pi)) != 0)) + if (!(vi->flags & VI_INIT_DONE) && + ((rc = vi_full_init(vi)) != 0)) return (rc); /* error message displayed already */ rc = update_mac_settings(ifp, XGMAC_ALL); if (rc) goto done; /* error message displayed already */ - rc = -t4_enable_vi(sc, sc->mbox, pi->viid, true, true); + rc = -t4_enable_vi(sc, sc->mbox, vi->viid, true, true); if (rc != 0) { if_printf(ifp, "enable_vi failed: %d\n", rc); goto done; @@ -3206,7 +3402,7 @@ cxgbe_init_synchronized(struct port_info *pi) * if this changes. */ - for_each_txq(pi, i, txq) { + for_each_txq(vi, i, txq) { TXQ_LOCK(txq); txq->eq.flags |= EQ_ENABLED; TXQ_UNLOCK(txq); @@ -3215,8 +3411,8 @@ cxgbe_init_synchronized(struct port_info *pi) /* * The first iq of the first port to come up is used for tracing. */ - if (sc->traceq < 0) { - sc->traceq = sc->sge.rxq[pi->first_rxq].iq.abs_id; + if (sc->traceq < 0 && IS_MAIN_VI(vi)) { + sc->traceq = sc->sge.rxq[vi->first_rxq].iq.abs_id; t4_write_reg(sc, is_t4(sc) ? A_MPS_TRC_RSS_CONTROL : A_MPS_T5_TRC_RSS_CONTROL, V_RSSCONTROL(pi->tx_chan) | V_QUEUENUMBER(sc->traceq)); @@ -3224,15 +3420,18 @@ cxgbe_init_synchronized(struct port_info *pi) } /* all ok */ - setbit(&sc->open_device_map, pi->port_id); PORT_LOCK(pi); ifp->if_drv_flags |= IFF_DRV_RUNNING; - PORT_UNLOCK(pi); + pi->up_vis++; - callout_reset(&pi->tick, hz, cxgbe_tick, pi); + if (pi->nvi > 1) + callout_reset(&vi->tick, hz, vi_tick, vi); + else + callout_reset(&pi->tick, hz, cxgbe_tick, pi); + PORT_UNLOCK(pi); done: if (rc != 0) - cxgbe_uninit_synchronized(pi); + cxgbe_uninit_synchronized(vi); return (rc); } @@ -3241,18 +3440,19 @@ done: * Idempotent. */ static int -cxgbe_uninit_synchronized(struct port_info *pi) +cxgbe_uninit_synchronized(struct vi_info *vi) { + struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; - struct ifnet *ifp = pi->ifp; + struct ifnet *ifp = vi->ifp; int rc, i; struct sge_txq *txq; ASSERT_SYNCHRONIZED_OP(sc); - if (!(pi->flags & PORT_INIT_DONE)) { + if (!(vi->flags & VI_INIT_DONE)) { KASSERT(!(ifp->if_drv_flags & IFF_DRV_RUNNING), - ("uninited port is running")); + ("uninited VI is running")); return (0); } @@ -3263,21 +3463,33 @@ cxgbe_uninit_synchronized(struct port_info *pi) * holding in its RAM (for an offloaded connection) even after the VI is * disabled. */ - rc = -t4_enable_vi(sc, sc->mbox, pi->viid, false, false); + rc = -t4_enable_vi(sc, sc->mbox, vi->viid, false, false); if (rc) { if_printf(ifp, "disable_vi failed: %d\n", rc); return (rc); } - for_each_txq(pi, i, txq) { + for_each_txq(vi, i, txq) { TXQ_LOCK(txq); txq->eq.flags &= ~EQ_ENABLED; TXQ_UNLOCK(txq); } - clrbit(&sc->open_device_map, pi->port_id); PORT_LOCK(pi); + if (pi->nvi == 1) + callout_stop(&pi->tick); + else + callout_stop(&vi->tick); + if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { + PORT_UNLOCK(pi); + return (0); + } ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + pi->up_vis--; + if (pi->up_vis > 0) { + PORT_UNLOCK(pi); + return (0); + } PORT_UNLOCK(pi); pi->link_cfg.link_ok = 0; @@ -3295,10 +3507,12 @@ cxgbe_uninit_synchronized(struct port_info *pi) static int setup_intr_handlers(struct adapter *sc) { - int rc, rid, p, q; + int rc, rid, p, q, v; char s[8]; struct irq *irq; struct port_info *pi; + struct vi_info *vi; + struct sge *sge = &sc->sge; struct sge_rxq *rxq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; @@ -3327,7 +3541,7 @@ setup_intr_handlers(struct adapter *sc) rid++; /* The second one is always the firmware event queue */ - rc = t4_alloc_irq(sc, irq, rid, t4_intr_evt, &sc->sge.fwq, "evt"); + rc = t4_alloc_irq(sc, irq, rid, t4_intr_evt, &sge->fwq, "evt"); if (rc != 0) return (rc); irq++; @@ -3335,44 +3549,64 @@ setup_intr_handlers(struct adapter *sc) for_each_port(sc, p) { pi = sc->port[p]; + for_each_vi(pi, v, vi) { + vi->first_intr = rid - 1; - if (pi->flags & INTR_RXQ) { - for_each_rxq(pi, q, rxq) { - snprintf(s, sizeof(s), "%d.%d", p, q); - rc = t4_alloc_irq(sc, irq, rid, t4_intr, rxq, - s); - if (rc != 0) - return (rc); - irq++; - rid++; + if (vi->nnmrxq > 0) { + int n = max(vi->nrxq, vi->nnmrxq); + + MPASS(vi->flags & INTR_RXQ); + + rxq = &sge->rxq[vi->first_rxq]; +#ifdef DEV_NETMAP + nm_rxq = &sge->nm_rxq[vi->first_nm_rxq]; +#endif + for (q = 0; q < n; q++) { + snprintf(s, sizeof(s), "%x%c%x", p, + 'a' + v, q); + if (q < vi->nrxq) + irq->rxq = rxq++; +#ifdef DEV_NETMAP + if (q < vi->nnmrxq) + irq->nm_rxq = nm_rxq++; +#endif + rc = t4_alloc_irq(sc, irq, rid, + t4_vi_intr, irq, s); + if (rc != 0) + return (rc); + irq++; + rid++; + vi->nintr++; + } + } else if (vi->flags & INTR_RXQ) { + for_each_rxq(vi, q, rxq) { + snprintf(s, sizeof(s), "%x%c%x", p, + 'a' + v, q); + rc = t4_alloc_irq(sc, irq, rid, + t4_intr, rxq, s); + if (rc != 0) + return (rc); + irq++; + rid++; + vi->nintr++; + } } - } #ifdef TCP_OFFLOAD - if (pi->flags & INTR_OFLD_RXQ) { - for_each_ofld_rxq(pi, q, ofld_rxq) { - snprintf(s, sizeof(s), "%d,%d", p, q); - rc = t4_alloc_irq(sc, irq, rid, t4_intr, - ofld_rxq, s); - if (rc != 0) - return (rc); - irq++; - rid++; + if (vi->flags & INTR_OFLD_RXQ) { + for_each_ofld_rxq(vi, q, ofld_rxq) { + snprintf(s, sizeof(s), "%x%c%x", p, + 'A' + v, q); + rc = t4_alloc_irq(sc, irq, rid, + t4_intr, ofld_rxq, s); + if (rc != 0) + return (rc); + irq++; + rid++; + vi->nintr++; + } } - } #endif -#ifdef DEV_NETMAP - if (pi->flags & INTR_NM_RXQ) { - for_each_nm_rxq(pi, q, nm_rxq) { - snprintf(s, sizeof(s), "%d-%d", p, q); - rc = t4_alloc_irq(sc, irq, rid, t4_nm_intr, - nm_rxq, s); - if (rc != 0) - return (rc); - irq++; - rid++; - } } -#endif } MPASS(irq == &sc->irq[sc->intr_count]); @@ -3437,53 +3671,202 @@ adapter_full_uninit(struct adapter *sc) return (0); } +#ifdef RSS +#define SUPPORTED_RSS_HASHTYPES (RSS_HASHTYPE_RSS_IPV4 | \ + RSS_HASHTYPE_RSS_TCP_IPV4 | RSS_HASHTYPE_RSS_IPV6 | \ + RSS_HASHTYPE_RSS_TCP_IPV6 | RSS_HASHTYPE_RSS_UDP_IPV4 | \ + RSS_HASHTYPE_RSS_UDP_IPV6) + +/* Translates kernel hash types to hardware. */ +static int +hashconfig_to_hashen(int hashconfig) +{ + int hashen = 0; + + if (hashconfig & RSS_HASHTYPE_RSS_IPV4) + hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN; + if (hashconfig & RSS_HASHTYPE_RSS_IPV6) + hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN; + if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV4) { + hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN | + F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN; + } + if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV6) { + hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN | + F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN; + } + if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV4) + hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN; + if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV6) + hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN; + + return (hashen); +} + +/* Translates hardware hash types to kernel. */ +static int +hashen_to_hashconfig(int hashen) +{ + int hashconfig = 0; + + if (hashen & F_FW_RSS_VI_CONFIG_CMD_UDPEN) { + /* + * If UDP hashing was enabled it must have been enabled for + * either IPv4 or IPv6 (inclusive or). Enabling UDP without + * enabling any 4-tuple hash is nonsense configuration. + */ + MPASS(hashen & (F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN | + F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)); + + if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN) + hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV4; + if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN) + hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV6; + } + if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN) + hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV4; + if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN) + hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV6; + if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN) + hashconfig |= RSS_HASHTYPE_RSS_IPV4; + if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN) + hashconfig |= RSS_HASHTYPE_RSS_IPV6; + + return (hashconfig); +} +#endif + int -port_full_init(struct port_info *pi) +vi_full_init(struct vi_info *vi) { - struct adapter *sc = pi->adapter; - struct ifnet *ifp = pi->ifp; + struct adapter *sc = vi->pi->adapter; + struct ifnet *ifp = vi->ifp; uint16_t *rss; struct sge_rxq *rxq; - int rc, i, j; + int rc, i, j, hashen; +#ifdef RSS + int nbuckets = rss_getnumbuckets(); + int hashconfig = rss_gethashconfig(); + int extra; + uint32_t raw_rss_key[RSS_KEYSIZE / sizeof(uint32_t)]; + uint32_t rss_key[RSS_KEYSIZE / sizeof(uint32_t)]; +#endif ASSERT_SYNCHRONIZED_OP(sc); - KASSERT((pi->flags & PORT_INIT_DONE) == 0, - ("%s: PORT_INIT_DONE already", __func__)); + KASSERT((vi->flags & VI_INIT_DONE) == 0, + ("%s: VI_INIT_DONE already", __func__)); - sysctl_ctx_init(&pi->ctx); - pi->flags |= PORT_SYSCTL_CTX; + sysctl_ctx_init(&vi->ctx); + vi->flags |= VI_SYSCTL_CTX; /* - * Allocate tx/rx/fl queues for this port. + * Allocate tx/rx/fl queues for this VI. */ - rc = t4_setup_port_queues(pi); + rc = t4_setup_vi_queues(vi); if (rc != 0) goto done; /* error message displayed already */ /* - * Setup RSS for this port. Save a copy of the RSS table for later use. + * Setup RSS for this VI. Save a copy of the RSS table for later use. */ - rss = malloc(pi->rss_size * sizeof (*rss), M_CXGBE, M_ZERO | M_WAITOK); - for (i = 0; i < pi->rss_size;) { - for_each_rxq(pi, j, rxq) { + if (vi->nrxq > vi->rss_size) { + if_printf(ifp, "nrxq (%d) > hw RSS table size (%d); " + "some queues will never receive traffic.\n", vi->nrxq, + vi->rss_size); + } else if (vi->rss_size % vi->nrxq) { + if_printf(ifp, "nrxq (%d), hw RSS table size (%d); " + "expect uneven traffic distribution.\n", vi->nrxq, + vi->rss_size); + } +#ifdef RSS + MPASS(RSS_KEYSIZE == 40); + if (vi->nrxq != nbuckets) { + if_printf(ifp, "nrxq (%d) != kernel RSS buckets (%d);" + "performance will be impacted.\n", vi->nrxq, nbuckets); + } + + rss_getkey((void *)&raw_rss_key[0]); + for (i = 0; i < nitems(rss_key); i++) { + rss_key[i] = htobe32(raw_rss_key[nitems(rss_key) - 1 - i]); + } + t4_write_rss_key(sc, (void *)&rss_key[0], -1); +#endif + rss = malloc(vi->rss_size * sizeof (*rss), M_CXGBE, M_ZERO | M_WAITOK); + for (i = 0; i < vi->rss_size;) { +#ifdef RSS + j = rss_get_indirection_to_bucket(i); + j %= vi->nrxq; + rxq = &sc->sge.rxq[vi->first_rxq + j]; + rss[i++] = rxq->iq.abs_id; +#else + for_each_rxq(vi, j, rxq) { rss[i++] = rxq->iq.abs_id; - if (i == pi->rss_size) + if (i == vi->rss_size) break; } +#endif } - rc = -t4_config_rss_range(sc, sc->mbox, pi->viid, 0, pi->rss_size, rss, - pi->rss_size); + rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, rss, + vi->rss_size); if (rc != 0) { if_printf(ifp, "rss_config failed: %d\n", rc); goto done; } - pi->rss = rss; - pi->flags |= PORT_INIT_DONE; +#ifdef RSS + hashen = hashconfig_to_hashen(hashconfig); + + /* + * We may have had to enable some hashes even though the global config + * wants them disabled. This is a potential problem that must be + * reported to the user. + */ + extra = hashen_to_hashconfig(hashen) ^ hashconfig; + + /* + * If we consider only the supported hash types, then the enabled hashes + * are a superset of the requested hashes. In other words, there cannot + * be any supported hash that was requested but not enabled, but there + * can be hashes that were not requested but had to be enabled. + */ + extra &= SUPPORTED_RSS_HASHTYPES; + MPASS((extra & hashconfig) == 0); + + if (extra) { + if_printf(ifp, + "global RSS config (0x%x) cannot be accomodated.\n", + hashconfig); + } + if (extra & RSS_HASHTYPE_RSS_IPV4) + if_printf(ifp, "IPv4 2-tuple hashing forced on.\n"); + if (extra & RSS_HASHTYPE_RSS_TCP_IPV4) + if_printf(ifp, "TCP/IPv4 4-tuple hashing forced on.\n"); + if (extra & RSS_HASHTYPE_RSS_IPV6) + if_printf(ifp, "IPv6 2-tuple hashing forced on.\n"); + if (extra & RSS_HASHTYPE_RSS_TCP_IPV6) + if_printf(ifp, "TCP/IPv6 4-tuple hashing forced on.\n"); + if (extra & RSS_HASHTYPE_RSS_UDP_IPV4) + if_printf(ifp, "UDP/IPv4 4-tuple hashing forced on.\n"); + if (extra & RSS_HASHTYPE_RSS_UDP_IPV6) + if_printf(ifp, "UDP/IPv6 4-tuple hashing forced on.\n"); +#else + hashen = F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN | + F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN | + F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN | + F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_UDPEN; +#endif + rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, hashen, rss[0]); + if (rc != 0) { + if_printf(ifp, "rss hash/defaultq config failed: %d\n", rc); + goto done; + } + + vi->rss = rss; + vi->flags |= VI_INIT_DONE; done: if (rc != 0) - port_full_uninit(pi); + vi_full_uninit(vi); return (rc); } @@ -3492,8 +3875,9 @@ done: * Idempotent. */ int -port_full_uninit(struct port_info *pi) +vi_full_uninit(struct vi_info *vi) { + struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; int i; struct sge_rxq *rxq; @@ -3503,38 +3887,41 @@ port_full_uninit(struct port_info *pi) struct sge_wrq *ofld_txq; #endif - if (pi->flags & PORT_INIT_DONE) { + if (vi->flags & VI_INIT_DONE) { /* Need to quiesce queues. */ - quiesce_wrq(sc, &sc->sge.ctrlq[pi->port_id]); + /* XXX: Only for the first VI? */ + if (IS_MAIN_VI(vi)) + quiesce_wrq(sc, &sc->sge.ctrlq[pi->port_id]); - for_each_txq(pi, i, txq) { + for_each_txq(vi, i, txq) { quiesce_txq(sc, txq); } #ifdef TCP_OFFLOAD - for_each_ofld_txq(pi, i, ofld_txq) { + for_each_ofld_txq(vi, i, ofld_txq) { quiesce_wrq(sc, ofld_txq); } #endif - for_each_rxq(pi, i, rxq) { + for_each_rxq(vi, i, rxq) { quiesce_iq(sc, &rxq->iq); quiesce_fl(sc, &rxq->fl); } #ifdef TCP_OFFLOAD - for_each_ofld_rxq(pi, i, ofld_rxq) { + for_each_ofld_rxq(vi, i, ofld_rxq) { quiesce_iq(sc, &ofld_rxq->iq); quiesce_fl(sc, &ofld_rxq->fl); } #endif - free(pi->rss, M_CXGBE); + free(vi->rss, M_CXGBE); + free(vi->nm_rss, M_CXGBE); } - t4_teardown_port_queues(pi); - pi->flags &= ~PORT_INIT_DONE; + t4_teardown_vi_queues(vi); + vi->flags &= ~VI_INIT_DONE; return (0); } @@ -3592,9 +3979,9 @@ quiesce_fl(struct adapter *sc, struct sge_fl *fl) FL_LOCK(fl); fl->flags |= FL_DOOMED; FL_UNLOCK(fl); + callout_stop(&sc->sfl_callout); mtx_unlock(&sc->sfl_lock); - callout_drain(&sc->sfl_callout); KASSERT((fl->flags & FL_STARVING) == 0, ("%s: still starving", __func__)); } @@ -4327,10 +4714,127 @@ t4_get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf) reg_block_dump(sc, buf, reg_ranges[i], reg_ranges[i + 1]); } +#define A_PL_INDIR_CMD 0x1f8 + +#define S_PL_AUTOINC 31 +#define M_PL_AUTOINC 0x1U +#define V_PL_AUTOINC(x) ((x) << S_PL_AUTOINC) +#define G_PL_AUTOINC(x) (((x) >> S_PL_AUTOINC) & M_PL_AUTOINC) + +#define S_PL_VFID 20 +#define M_PL_VFID 0xffU +#define V_PL_VFID(x) ((x) << S_PL_VFID) +#define G_PL_VFID(x) (((x) >> S_PL_VFID) & M_PL_VFID) + +#define S_PL_ADDR 0 +#define M_PL_ADDR 0xfffffU +#define V_PL_ADDR(x) ((x) << S_PL_ADDR) +#define G_PL_ADDR(x) (((x) >> S_PL_ADDR) & M_PL_ADDR) + +#define A_PL_INDIR_DATA 0x1fc + +static uint64_t +read_vf_stat(struct adapter *sc, unsigned int viid, int reg) +{ + u32 stats[2]; + + mtx_assert(&sc->regwin_lock, MA_OWNED); + t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) | + V_PL_VFID(G_FW_VIID_VIN(viid)) | V_PL_ADDR(VF_MPS_REG(reg))); + stats[0] = t4_read_reg(sc, A_PL_INDIR_DATA); + stats[1] = t4_read_reg(sc, A_PL_INDIR_DATA); + return (((uint64_t)stats[1]) << 32 | stats[0]); +} + +static void +t4_get_vi_stats(struct adapter *sc, unsigned int viid, + struct fw_vi_stats_vf *stats) +{ + +#define GET_STAT(name) \ + read_vf_stat(sc, viid, A_MPS_VF_STAT_##name##_L) + + stats->tx_bcast_bytes = GET_STAT(TX_VF_BCAST_BYTES); + stats->tx_bcast_frames = GET_STAT(TX_VF_BCAST_FRAMES); + stats->tx_mcast_bytes = GET_STAT(TX_VF_MCAST_BYTES); + stats->tx_mcast_frames = GET_STAT(TX_VF_MCAST_FRAMES); + stats->tx_ucast_bytes = GET_STAT(TX_VF_UCAST_BYTES); + stats->tx_ucast_frames = GET_STAT(TX_VF_UCAST_FRAMES); + stats->tx_drop_frames = GET_STAT(TX_VF_DROP_FRAMES); + stats->tx_offload_bytes = GET_STAT(TX_VF_OFFLOAD_BYTES); + stats->tx_offload_frames = GET_STAT(TX_VF_OFFLOAD_FRAMES); + stats->rx_bcast_bytes = GET_STAT(RX_VF_BCAST_BYTES); + stats->rx_bcast_frames = GET_STAT(RX_VF_BCAST_FRAMES); + stats->rx_mcast_bytes = GET_STAT(RX_VF_MCAST_BYTES); + stats->rx_mcast_frames = GET_STAT(RX_VF_MCAST_FRAMES); + stats->rx_ucast_bytes = GET_STAT(RX_VF_UCAST_BYTES); + stats->rx_ucast_frames = GET_STAT(RX_VF_UCAST_FRAMES); + stats->rx_err_frames = GET_STAT(RX_VF_ERR_FRAMES); + +#undef GET_STAT +} + +static void +t4_clr_vi_stats(struct adapter *sc, unsigned int viid) +{ + int reg; + + t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) | + V_PL_VFID(G_FW_VIID_VIN(viid)) | + V_PL_ADDR(VF_MPS_REG(A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L))); + for (reg = A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L; + reg <= A_MPS_VF_STAT_RX_VF_ERR_FRAMES_H; reg += 4) + t4_write_reg(sc, A_PL_INDIR_DATA, 0); +} + +static void +vi_refresh_stats(struct adapter *sc, struct vi_info *vi) +{ + struct ifnet *ifp = vi->ifp; + struct sge_txq *txq; + int i, drops; + struct fw_vi_stats_vf *s = &vi->stats; + struct timeval tv; + const struct timeval interval = {0, 250000}; /* 250ms */ + + if (!(vi->flags & VI_INIT_DONE)) + return; + + getmicrotime(&tv); + timevalsub(&tv, &interval); + if (timevalcmp(&tv, &vi->last_refreshed, <)) + return; + + mtx_lock(&sc->regwin_lock); + t4_get_vi_stats(sc, vi->viid, &vi->stats); + + ifp->if_ipackets = s->rx_bcast_frames + s->rx_mcast_frames + + s->rx_ucast_frames; + ifp->if_ierrors = s->rx_err_frames; + ifp->if_opackets = s->tx_bcast_frames + s->tx_mcast_frames + + s->tx_ucast_frames + s->tx_offload_frames; + ifp->if_oerrors = s->tx_drop_frames; + ifp->if_ibytes = s->rx_bcast_bytes + s->rx_mcast_bytes + + s->rx_ucast_bytes; + ifp->if_obytes = s->tx_bcast_bytes + s->tx_mcast_bytes + + s->tx_ucast_bytes + s->tx_offload_bytes; + ifp->if_imcasts = s->rx_mcast_frames; + ifp->if_omcasts = s->tx_mcast_frames; + + drops = 0; + for_each_txq(vi, i, txq) + drops += counter_u64_fetch(txq->r->drops); + ifp->if_snd.ifq_drops = drops; + + getmicrotime(&vi->last_refreshed); + mtx_unlock(&sc->regwin_lock); +} + static void cxgbe_refresh_stats(struct adapter *sc, struct port_info *pi) { - struct ifnet *ifp = pi->ifp; + struct vi_info *vi = &pi->vi[0]; + struct ifnet *ifp = vi->ifp; struct sge_txq *txq; int i, drops; struct port_stats *s = &pi->stats; @@ -4366,7 +4870,7 @@ cxgbe_refresh_stats(struct adapter *sc, struct port_info *pi) } drops = s->tx_drop; - for_each_txq(pi, i, txq) + for_each_txq(vi, i, txq) drops += counter_u64_fetch(txq->r->drops); ifp->if_snd.ifq_drops = drops; @@ -4382,18 +4886,22 @@ cxgbe_tick(void *arg) { struct port_info *pi = arg; struct adapter *sc = pi->adapter; - struct ifnet *ifp = pi->ifp; - - PORT_LOCK(pi); - if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { - PORT_UNLOCK(pi); - return; /* without scheduling another callout */ - } + PORT_LOCK_ASSERT_OWNED(pi); cxgbe_refresh_stats(sc, pi); callout_schedule(&pi->tick, hz); - PORT_UNLOCK(pi); +} + +void +vi_tick(void *arg) +{ + struct vi_info *vi = arg; + struct adapter *sc = vi->pi->adapter; + + vi_refresh_stats(sc, vi); + + callout_schedule(&vi->tick, hz); } static void @@ -4500,7 +5008,7 @@ t4_register_fw_msg_handler(struct adapter *sc, int type, fw_msg_handler_t h) return (0); } -static int +static void t4_sysctls(struct adapter *sc) { struct sysctl_ctx_list *ctx; @@ -4805,91 +5313,112 @@ t4_sysctls(struct adapter *sc) CTLFLAG_RW, &sc->tt.tx_align, 0, "chop and align payload"); } #endif - - - return (0); } -static int -cxgbe_sysctls(struct port_info *pi) +void +vi_sysctls(struct vi_info *vi) { struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; struct sysctl_oid_list *children; - struct adapter *sc = pi->adapter; - ctx = device_get_sysctl_ctx(pi->dev); + ctx = device_get_sysctl_ctx(vi->dev); /* - * dev.cxgbe.X. + * dev.v?(cxgbe|cxl).X. */ - oid = device_get_sysctl_tree(pi->dev); + oid = device_get_sysctl_tree(vi->dev); children = SYSCTL_CHILDREN(oid); - SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkdnrc", CTLTYPE_STRING | - CTLFLAG_RD, pi, 0, sysctl_linkdnrc, "A", "reason why link is down"); - if (pi->port_type == FW_PORT_TYPE_BT_XAUI) { - SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", - CTLTYPE_INT | CTLFLAG_RD, pi, 0, sysctl_btphy, "I", - "PHY temperature (in Celsius)"); - SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fw_version", - CTLTYPE_INT | CTLFLAG_RD, pi, 1, sysctl_btphy, "I", - "PHY firmware version"); - } + SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "viid", CTLFLAG_RD, NULL, + vi->viid, "VI identifer"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nrxq", CTLFLAG_RD, - &pi->nrxq, 0, "# of rx queues"); + &vi->nrxq, 0, "# of rx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ntxq", CTLFLAG_RD, - &pi->ntxq, 0, "# of tx queues"); + &vi->ntxq, 0, "# of tx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_rxq", CTLFLAG_RD, - &pi->first_rxq, 0, "index of first rx queue"); + &vi->first_rxq, 0, "index of first rx queue"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_txq", CTLFLAG_RD, - &pi->first_txq, 0, "index of first tx queue"); - SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rsrv_noflowq", CTLTYPE_INT | - CTLFLAG_RW, pi, 0, sysctl_noflowq, "IU", - "Reserve queue 0 for non-flowid packets"); + &vi->first_txq, 0, "index of first tx queue"); + + if (IS_MAIN_VI(vi)) { + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rsrv_noflowq", + CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_noflowq, "IU", + "Reserve queue 0 for non-flowid packets"); + } #ifdef TCP_OFFLOAD - if (is_offload(sc)) { + if (vi->nofldrxq != 0) { SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD, - &pi->nofldrxq, 0, + &vi->nofldrxq, 0, "# of rx queues for offloaded TCP connections"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldtxq", CTLFLAG_RD, - &pi->nofldtxq, 0, + &vi->nofldtxq, 0, "# of tx queues for offloaded TCP connections"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_rxq", - CTLFLAG_RD, &pi->first_ofld_rxq, 0, + CTLFLAG_RD, &vi->first_ofld_rxq, 0, "index of first TOE rx queue"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_txq", - CTLFLAG_RD, &pi->first_ofld_txq, 0, + CTLFLAG_RD, &vi->first_ofld_txq, 0, "index of first TOE tx queue"); } #endif #ifdef DEV_NETMAP - SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmrxq", CTLFLAG_RD, - &pi->nnmrxq, 0, "# of rx queues for netmap"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmtxq", CTLFLAG_RD, - &pi->nnmtxq, 0, "# of tx queues for netmap"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_rxq", - CTLFLAG_RD, &pi->first_nm_rxq, 0, - "index of first netmap rx queue"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_txq", - CTLFLAG_RD, &pi->first_nm_txq, 0, - "index of first netmap tx queue"); + if (vi->nnmrxq != 0) { + SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmrxq", CTLFLAG_RD, + &vi->nnmrxq, 0, "# of netmap rx queues"); + SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmtxq", CTLFLAG_RD, + &vi->nnmtxq, 0, "# of netmap tx queues"); + SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_rxq", + CTLFLAG_RD, &vi->first_nm_rxq, 0, + "index of first netmap rx queue"); + SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_txq", + CTLFLAG_RD, &vi->first_nm_txq, 0, + "index of first netmap tx queue"); + } #endif SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx", - CTLTYPE_INT | CTLFLAG_RW, pi, 0, sysctl_holdoff_tmr_idx, "I", + CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_tmr_idx, "I", "holdoff timer index"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx", - CTLTYPE_INT | CTLFLAG_RW, pi, 0, sysctl_holdoff_pktc_idx, "I", + CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_pktc_idx, "I", "holdoff packet counter index"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_rxq", - CTLTYPE_INT | CTLFLAG_RW, pi, 0, sysctl_qsize_rxq, "I", + CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_rxq, "I", "rx queue size"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_txq", - CTLTYPE_INT | CTLFLAG_RW, pi, 0, sysctl_qsize_txq, "I", + CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_txq, "I", "tx queue size"); +} + +static void +cxgbe_sysctls(struct port_info *pi) +{ + struct sysctl_ctx_list *ctx; + struct sysctl_oid *oid; + struct sysctl_oid_list *children; + struct adapter *sc = pi->adapter; + + ctx = device_get_sysctl_ctx(pi->dev); + + /* + * dev.cxgbe.X. + */ + oid = device_get_sysctl_tree(pi->dev); + children = SYSCTL_CHILDREN(oid); + + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkdnrc", CTLTYPE_STRING | + CTLFLAG_RD, pi, 0, sysctl_linkdnrc, "A", "reason why link is down"); + if (pi->port_type == FW_PORT_TYPE_BT_XAUI) { + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", + CTLTYPE_INT | CTLFLAG_RD, pi, 0, sysctl_btphy, "I", + "PHY temperature (in Celsius)"); + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fw_version", + CTLTYPE_INT | CTLFLAG_RD, pi, 1, sysctl_btphy, "I", + "PHY firmware version"); + } SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pause_settings", CTLTYPE_STRING | CTLFLAG_RW, pi, PAUSE_TX, sysctl_pause_settings, @@ -5053,8 +5582,6 @@ cxgbe_sysctls(struct port_info *pi) "# of buffer-group 3 truncated packets"); #undef SYSCTL_ADD_T4_PORTSTAT - - return (0); } static int @@ -5106,7 +5633,7 @@ sysctl_btphy(SYSCTL_HANDLER_ARGS) u_int v; int rc; - rc = begin_synchronized_op(sc, pi, SLEEP_OK | INTR_OK, "t4btt"); + rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4btt"); if (rc) return (rc); /* XXX: magic numbers */ @@ -5125,18 +5652,18 @@ sysctl_btphy(SYSCTL_HANDLER_ARGS) static int sysctl_noflowq(SYSCTL_HANDLER_ARGS) { - struct port_info *pi = arg1; + struct vi_info *vi = arg1; int rc, val; - val = pi->rsrv_noflowq; + val = vi->rsrv_noflowq; rc = sysctl_handle_int(oidp, &val, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); - if ((val >= 1) && (pi->ntxq > 1)) - pi->rsrv_noflowq = 1; + if ((val >= 1) && (vi->ntxq > 1)) + vi->rsrv_noflowq = 1; else - pi->rsrv_noflowq = 0; + vi->rsrv_noflowq = 0; return (rc); } @@ -5144,8 +5671,8 @@ sysctl_noflowq(SYSCTL_HANDLER_ARGS) static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS) { - struct port_info *pi = arg1; - struct adapter *sc = pi->adapter; + struct vi_info *vi = arg1; + struct adapter *sc = vi->pi->adapter; int idx, rc, i; struct sge_rxq *rxq; #ifdef TCP_OFFLOAD @@ -5153,7 +5680,7 @@ sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS) #endif uint8_t v; - idx = pi->tmr_idx; + idx = vi->tmr_idx; rc = sysctl_handle_int(oidp, &idx, 0, req); if (rc != 0 || req->newptr == NULL) @@ -5162,13 +5689,13 @@ sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS) if (idx < 0 || idx >= SGE_NTIMERS) return (EINVAL); - rc = begin_synchronized_op(sc, pi, HOLD_LOCK | SLEEP_OK | INTR_OK, + rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4tmr"); if (rc) return (rc); - v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(pi->pktc_idx != -1); - for_each_rxq(pi, i, rxq) { + v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->pktc_idx != -1); + for_each_rxq(vi, i, rxq) { #ifdef atomic_store_rel_8 atomic_store_rel_8(&rxq->iq.intr_params, v); #else @@ -5176,7 +5703,7 @@ sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS) #endif } #ifdef TCP_OFFLOAD - for_each_ofld_rxq(pi, i, ofld_rxq) { + for_each_ofld_rxq(vi, i, ofld_rxq) { #ifdef atomic_store_rel_8 atomic_store_rel_8(&ofld_rxq->iq.intr_params, v); #else @@ -5184,7 +5711,7 @@ sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS) #endif } #endif - pi->tmr_idx = idx; + vi->tmr_idx = idx; end_synchronized_op(sc, LOCK_HELD); return (0); @@ -5193,11 +5720,11 @@ sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS) static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS) { - struct port_info *pi = arg1; - struct adapter *sc = pi->adapter; + struct vi_info *vi = arg1; + struct adapter *sc = vi->pi->adapter; int idx, rc; - idx = pi->pktc_idx; + idx = vi->pktc_idx; rc = sysctl_handle_int(oidp, &idx, 0, req); if (rc != 0 || req->newptr == NULL) @@ -5206,15 +5733,15 @@ sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS) if (idx < -1 || idx >= SGE_NCOUNTERS) return (EINVAL); - rc = begin_synchronized_op(sc, pi, HOLD_LOCK | SLEEP_OK | INTR_OK, + rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4pktc"); if (rc) return (rc); - if (pi->flags & PORT_INIT_DONE) + if (vi->flags & VI_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else - pi->pktc_idx = idx; + vi->pktc_idx = idx; end_synchronized_op(sc, LOCK_HELD); return (rc); @@ -5223,11 +5750,11 @@ sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS) static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS) { - struct port_info *pi = arg1; - struct adapter *sc = pi->adapter; + struct vi_info *vi = arg1; + struct adapter *sc = vi->pi->adapter; int qsize, rc; - qsize = pi->qsize_rxq; + qsize = vi->qsize_rxq; rc = sysctl_handle_int(oidp, &qsize, 0, req); if (rc != 0 || req->newptr == NULL) @@ -5236,15 +5763,15 @@ sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS) if (qsize < 128 || (qsize & 7)) return (EINVAL); - rc = begin_synchronized_op(sc, pi, HOLD_LOCK | SLEEP_OK | INTR_OK, + rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4rxqs"); if (rc) return (rc); - if (pi->flags & PORT_INIT_DONE) + if (vi->flags & VI_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else - pi->qsize_rxq = qsize; + vi->qsize_rxq = qsize; end_synchronized_op(sc, LOCK_HELD); return (rc); @@ -5253,11 +5780,11 @@ sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS) static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS) { - struct port_info *pi = arg1; - struct adapter *sc = pi->adapter; + struct vi_info *vi = arg1; + struct adapter *sc = vi->pi->adapter; int qsize, rc; - qsize = pi->qsize_txq; + qsize = vi->qsize_txq; rc = sysctl_handle_int(oidp, &qsize, 0, req); if (rc != 0 || req->newptr == NULL) @@ -5266,15 +5793,15 @@ sysctl_qsize_txq(SYSCTL_HANDLER_ARGS) if (qsize < 128 || qsize > 65536) return (EINVAL); - rc = begin_synchronized_op(sc, pi, HOLD_LOCK | SLEEP_OK | INTR_OK, + rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4txqs"); if (rc) return (rc); - if (pi->flags & PORT_INIT_DONE) + if (vi->flags & VI_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else - pi->qsize_txq = qsize; + vi->qsize_txq = qsize; end_synchronized_op(sc, LOCK_HELD); return (rc); @@ -5322,7 +5849,8 @@ sysctl_pause_settings(SYSCTL_HANDLER_ARGS) if (n & ~(PAUSE_TX | PAUSE_RX)) return (EINVAL); /* some other bit is set too */ - rc = begin_synchronized_op(sc, pi, SLEEP_OK | INTR_OK, "t4PAUSE"); + rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, + "t4PAUSE"); if (rc) return (rc); if ((lc->requested_fc & (PAUSE_TX | PAUSE_RX)) != n) { @@ -7838,6 +8366,7 @@ static int set_sched_queue(struct adapter *sc, struct t4_sched_queue *p) { struct port_info *pi = NULL; + struct vi_info *vi; struct sge_txq *txq; uint32_t fw_mnem, fw_queue, fw_class; int i, rc; @@ -7856,8 +8385,10 @@ set_sched_queue(struct adapter *sc, struct t4_sched_queue *p) goto done; } + /* XXX: Only supported for the main VI. */ pi = sc->port[p->port]; - if (!in_range(p->queue, 0, pi->ntxq - 1) || !in_range(p->cl, 0, 7)) { + vi = &pi->vi[0]; + if (!in_range(p->queue, 0, vi->ntxq - 1) || !in_range(p->cl, 0, 7)) { rc = EINVAL; goto done; } @@ -7875,7 +8406,7 @@ set_sched_queue(struct adapter *sc, struct t4_sched_queue *p) * on a single specified TX queue. */ if (p->queue >= 0) { - txq = &sc->sge.txq[pi->first_txq + p->queue]; + txq = &sc->sge.txq[vi->first_txq + p->queue]; fw_queue = (fw_mnem | V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id)); rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue, &fw_class); @@ -7886,7 +8417,7 @@ set_sched_queue(struct adapter *sc, struct t4_sched_queue *p) * Change the scheduling on all the TX queues for the * interface. */ - for_each_txq(pi, i, txq) { + for_each_txq(vi, i, txq) { fw_queue = (fw_mnem | V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id)); rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue, &fw_class); @@ -7938,26 +8469,29 @@ void t4_os_portmod_changed(const struct adapter *sc, int idx) { struct port_info *pi = sc->port[idx]; + struct vi_info *vi; + struct ifnet *ifp; + int v; static const char *mod_str[] = { NULL, "LR", "SR", "ER", "TWINAX", "active TWINAX", "LRM" }; - build_medialist(pi, &pi->media); -#ifdef DEV_NETMAP - build_medialist(pi, &pi->nm_media); -#endif + for_each_vi(pi, v, vi) { + build_medialist(pi, &vi->media); + } + ifp = pi->vi[0].ifp; if (pi->mod_type == FW_PORT_MOD_TYPE_NONE) - if_printf(pi->ifp, "transceiver unplugged.\n"); + if_printf(ifp, "transceiver unplugged.\n"); else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN) - if_printf(pi->ifp, "unknown transceiver inserted.\n"); + if_printf(ifp, "unknown transceiver inserted.\n"); else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED) - if_printf(pi->ifp, "unsupported transceiver inserted.\n"); + if_printf(ifp, "unsupported transceiver inserted.\n"); else if (pi->mod_type > 0 && pi->mod_type < nitems(mod_str)) { - if_printf(pi->ifp, "%s transceiver inserted.\n", + if_printf(ifp, "%s transceiver inserted.\n", mod_str[pi->mod_type]); } else { - if_printf(pi->ifp, "transceiver (type %d) inserted.\n", + if_printf(ifp, "transceiver (type %d) inserted.\n", pi->mod_type); } } @@ -7966,16 +8500,27 @@ void t4_os_link_changed(struct adapter *sc, int idx, int link_stat, int reason) { struct port_info *pi = sc->port[idx]; - struct ifnet *ifp = pi->ifp; + struct vi_info *vi; + struct ifnet *ifp; + int v; - if (link_stat) { + if (link_stat) pi->linkdnrc = -1; - ifp->if_baudrate = IF_Mbps(pi->link_cfg.speed); - if_link_state_change(ifp, LINK_STATE_UP); - } else { + else { if (reason >= 0) pi->linkdnrc = reason; - if_link_state_change(ifp, LINK_STATE_DOWN); + } + for_each_vi(pi, v, vi) { + ifp = vi->ifp; + if (ifp == NULL) + continue; + + if (link_stat) { + ifp->if_baudrate = IF_Mbps(pi->link_cfg.speed); + if_link_state_change(ifp, LINK_STATE_UP); + } else { + if_link_state_change(ifp, LINK_STATE_DOWN); + } } } @@ -8095,9 +8640,10 @@ t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag, rc = read_i2c(sc, (struct t4_i2c_data *)data); break; case CHELSIO_T4_CLEAR_STATS: { - int i; + int i, v; u_int port_id = *(uint32_t *)data; struct port_info *pi; + struct vi_info *vi; if (port_id >= sc->params.nports) return (EINVAL); @@ -8106,46 +8652,61 @@ t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag, /* MAC stats */ t4_clr_port_stats(sc, pi->tx_chan); pi->tx_parse_error = 0; + mtx_lock(&sc->regwin_lock); + for_each_vi(pi, v, vi) { + if (vi->flags & VI_INIT_DONE) + t4_clr_vi_stats(sc, vi->viid); + } + mtx_unlock(&sc->regwin_lock); - if (pi->flags & PORT_INIT_DONE) { - struct sge_rxq *rxq; - struct sge_txq *txq; - struct sge_wrq *wrq; + /* + * Since this command accepts a port, clear stats for + * all VIs on this port. + */ + for_each_vi(pi, v, vi) { + if (vi->flags & VI_INIT_DONE) { + struct sge_rxq *rxq; + struct sge_txq *txq; + struct sge_wrq *wrq; - for_each_rxq(pi, i, rxq) { + for_each_rxq(vi, i, rxq) { #if defined(INET) || defined(INET6) - rxq->lro.lro_queued = 0; - rxq->lro.lro_flushed = 0; + rxq->lro.lro_queued = 0; + rxq->lro.lro_flushed = 0; #endif - rxq->rxcsum = 0; - rxq->vlan_extraction = 0; - } + rxq->rxcsum = 0; + rxq->vlan_extraction = 0; + } - for_each_txq(pi, i, txq) { - txq->txcsum = 0; - txq->tso_wrs = 0; - txq->vlan_insertion = 0; - txq->imm_wrs = 0; - txq->sgl_wrs = 0; - txq->txpkt_wrs = 0; - txq->txpkts0_wrs = 0; - txq->txpkts1_wrs = 0; - txq->txpkts0_pkts = 0; - txq->txpkts1_pkts = 0; - mp_ring_reset_stats(txq->r); - } + for_each_txq(vi, i, txq) { + txq->txcsum = 0; + txq->tso_wrs = 0; + txq->vlan_insertion = 0; + txq->imm_wrs = 0; + txq->sgl_wrs = 0; + txq->txpkt_wrs = 0; + txq->txpkts0_wrs = 0; + txq->txpkts1_wrs = 0; + txq->txpkts0_pkts = 0; + txq->txpkts1_pkts = 0; + mp_ring_reset_stats(txq->r); + } #ifdef TCP_OFFLOAD - /* nothing to clear for each ofld_rxq */ + /* nothing to clear for each ofld_rxq */ - for_each_ofld_txq(pi, i, wrq) { - wrq->tx_wrs_direct = 0; - wrq->tx_wrs_copied = 0; - } + for_each_ofld_txq(vi, i, wrq) { + wrq->tx_wrs_direct = 0; + wrq->tx_wrs_copied = 0; + } #endif - wrq = &sc->sge.ctrlq[pi->port_id]; - wrq->tx_wrs_direct = 0; - wrq->tx_wrs_copied = 0; + + if (IS_MAIN_VI(vi)) { + wrq = &sc->sge.ctrlq[pi->port_id]; + wrq->tx_wrs_direct = 0; + wrq->tx_wrs_copied = 0; + } + } } break; } @@ -8173,8 +8734,8 @@ void t4_iscsi_init(struct ifnet *ifp, unsigned int tag_mask, const unsigned int *pgsz_order) { - struct port_info *pi = ifp->if_softc; - struct adapter *sc = pi->adapter; + struct vi_info *vi = ifp->if_softc; + struct adapter *sc = vi->pi->adapter; t4_write_reg(sc, A_ULP_RX_ISCSI_TAGMASK, tag_mask); t4_write_reg(sc, A_ULP_RX_ISCSI_PSZ, V_HPZ0(pgsz_order[0]) | @@ -8183,9 +8744,10 @@ t4_iscsi_init(struct ifnet *ifp, unsigned int tag_mask, } static int -toe_capability(struct port_info *pi, int enable) +toe_capability(struct vi_info *vi, int enable) { int rc; + struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; ASSERT_SYNCHRONIZED_OP(sc); @@ -8194,19 +8756,32 @@ toe_capability(struct port_info *pi, int enable) return (ENODEV); if (enable) { + if ((vi->ifp->if_capenable & IFCAP_TOE) != 0) { + /* TOE is already enabled. */ + return (0); + } + /* * We need the port's queues around so that we're able to send * and receive CPLs to/from the TOE even if the ifnet for this * port has never been UP'd administratively. */ - if (!(pi->flags & PORT_INIT_DONE)) { - rc = cxgbe_init_synchronized(pi); + if (!(vi->flags & VI_INIT_DONE)) { + rc = vi_full_init(vi); + if (rc) + return (rc); + } + if (!(pi->vi[0].flags & VI_INIT_DONE)) { + rc = vi_full_init(&pi->vi[0]); if (rc) return (rc); } - if (isset(&sc->offload_map, pi->port_id)) + if (isset(&sc->offload_map, pi->port_id)) { + /* TOE is enabled on another VI of this port. */ + pi->uld_vis++; return (0); + } if (!uld_active(sc, ULD_TOM)) { rc = t4_activate_uld(sc, ULD_TOM); @@ -8229,9 +8804,12 @@ toe_capability(struct port_info *pi, int enable) if (!uld_active(sc, ULD_ISCSI)) (void) t4_activate_uld(sc, ULD_ISCSI); + pi->uld_vis++; setbit(&sc->offload_map, pi->port_id); } else { - if (!isset(&sc->offload_map, pi->port_id)) + pi->uld_vis--; + + if (!isset(&sc->offload_map, pi->port_id) || pi->uld_vis > 0) return (0); KASSERT(uld_active(sc, ULD_TOM), @@ -8376,17 +8954,45 @@ tweak_tunables(void) { int nc = mp_ncpus; /* our snapshot of the number of CPUs */ - if (t4_ntxq10g < 1) + if (t4_ntxq10g < 1) { +#ifdef RSS + t4_ntxq10g = rss_getnumbuckets(); +#else t4_ntxq10g = min(nc, NTXQ_10G); +#endif + } - if (t4_ntxq1g < 1) + if (t4_ntxq1g < 1) { +#ifdef RSS + /* XXX: way too many for 1GbE? */ + t4_ntxq1g = rss_getnumbuckets(); +#else t4_ntxq1g = min(nc, NTXQ_1G); +#endif + } - if (t4_nrxq10g < 1) + if (t4_ntxq_vi < 1) + t4_ntxq_vi = min(nc, NTXQ_VI); + + if (t4_nrxq10g < 1) { +#ifdef RSS + t4_nrxq10g = rss_getnumbuckets(); +#else t4_nrxq10g = min(nc, NRXQ_10G); +#endif + } - if (t4_nrxq1g < 1) + if (t4_nrxq1g < 1) { +#ifdef RSS + /* XXX: way too many for 1GbE? */ + t4_nrxq1g = rss_getnumbuckets(); +#else t4_nrxq1g = min(nc, NRXQ_1G); +#endif + } + + if (t4_nrxq_vi < 1) + t4_nrxq_vi = min(nc, NRXQ_VI); #ifdef TCP_OFFLOAD if (t4_nofldtxq10g < 1) @@ -8395,12 +9001,18 @@ tweak_tunables(void) if (t4_nofldtxq1g < 1) t4_nofldtxq1g = min(nc, NOFLDTXQ_1G); + if (t4_nofldtxq_vi < 1) + t4_nofldtxq_vi = min(nc, NOFLDTXQ_VI); + if (t4_nofldrxq10g < 1) t4_nofldrxq10g = min(nc, NOFLDRXQ_10G); if (t4_nofldrxq1g < 1) t4_nofldrxq1g = min(nc, NOFLDRXQ_1G); + if (t4_nofldrxq_vi < 1) + t4_nofldrxq_vi = min(nc, NOFLDRXQ_VI); + if (t4_toecaps_allowed == -1) t4_toecaps_allowed = FW_CAPS_CONFIG_TOE; #else @@ -8409,17 +9021,11 @@ tweak_tunables(void) #endif #ifdef DEV_NETMAP - if (t4_nnmtxq10g < 1) - t4_nnmtxq10g = min(nc, NNMTXQ_10G); - - if (t4_nnmtxq1g < 1) - t4_nnmtxq1g = min(nc, NNMTXQ_1G); + if (t4_nnmtxq_vi < 1) + t4_nnmtxq_vi = min(nc, NNMTXQ_VI); - if (t4_nnmrxq10g < 1) - t4_nnmrxq10g = min(nc, NNMRXQ_10G); - - if (t4_nnmrxq1g < 1) - t4_nnmrxq1g = min(nc, NNMRXQ_1G); + if (t4_nnmrxq_vi < 1) + t4_nnmrxq_vi = min(nc, NNMRXQ_VI); #endif if (t4_tmr_idx_10g < 0 || t4_tmr_idx_10g >= SGE_NTIMERS) @@ -8525,6 +9131,7 @@ done_unload: static devclass_t t4_devclass, t5_devclass; static devclass_t cxgbe_devclass, cxl_devclass; +static devclass_t vcxgbe_devclass, vcxl_devclass; DRIVER_MODULE(t4nex, pci, t4_driver, t4_devclass, mod_event, 0); MODULE_VERSION(t4nex, 1); @@ -8539,3 +9146,9 @@ MODULE_VERSION(cxgbe, 1); DRIVER_MODULE(cxl, t5nex, cxl_driver, cxl_devclass, 0, 0); MODULE_VERSION(cxl, 1); + +DRIVER_MODULE(vcxgbe, cxgbe, vcxgbe_driver, vcxgbe_devclass, 0, 0); +MODULE_VERSION(vcxgbe, 1); + +DRIVER_MODULE(vcxl, cxl, vcxl_driver, vcxl_devclass, 0, 0); +MODULE_VERSION(vcxl, 1); diff --git a/sys/dev/cxgbe/t4_netmap.c b/sys/dev/cxgbe/t4_netmap.c index f54a67f..80ec2f9 100644 --- a/sys/dev/cxgbe/t4_netmap.c +++ b/sys/dev/cxgbe/t4_netmap.c @@ -33,10 +33,11 @@ __FBSDID("$FreeBSD$"); #ifdef DEV_NETMAP #include <sys/param.h> +#include <sys/bus.h> #include <sys/eventhandler.h> #include <sys/lock.h> -#include <sys/types.h> #include <sys/mbuf.h> +#include <sys/module.h> #include <sys/selinfo.h> #include <sys/socket.h> #include <sys/sockio.h> @@ -86,187 +87,20 @@ SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_holdoff_tmr_idx, CTLFLAG_RWTUN, static int nm_cong_drop = 1; TUNABLE_INT("hw.cxgbe.nm_cong_drop", &nm_cong_drop); -/* netmap ifnet routines */ -static void cxgbe_nm_init(void *); -static int cxgbe_nm_ioctl(struct ifnet *, unsigned long, caddr_t); -static int cxgbe_nm_transmit(struct ifnet *, struct mbuf *); -static void cxgbe_nm_qflush(struct ifnet *); - -static int cxgbe_nm_init_synchronized(struct port_info *); -static int cxgbe_nm_uninit_synchronized(struct port_info *); - -static void -cxgbe_nm_init(void *arg) -{ - struct port_info *pi = arg; - struct adapter *sc = pi->adapter; - - if (begin_synchronized_op(sc, pi, SLEEP_OK | INTR_OK, "t4nminit") != 0) - return; - cxgbe_nm_init_synchronized(pi); - end_synchronized_op(sc, 0); - - return; -} - -static int -cxgbe_nm_init_synchronized(struct port_info *pi) -{ - struct adapter *sc = pi->adapter; - struct ifnet *ifp = pi->nm_ifp; - int rc = 0; - - ASSERT_SYNCHRONIZED_OP(sc); - - if (ifp->if_drv_flags & IFF_DRV_RUNNING) - return (0); /* already running */ - - if (!(sc->flags & FULL_INIT_DONE) && - ((rc = adapter_full_init(sc)) != 0)) - return (rc); /* error message displayed already */ - - if (!(pi->flags & PORT_INIT_DONE) && - ((rc = port_full_init(pi)) != 0)) - return (rc); /* error message displayed already */ - - rc = update_mac_settings(ifp, XGMAC_ALL); - if (rc) - return (rc); /* error message displayed already */ - - ifp->if_drv_flags |= IFF_DRV_RUNNING; - - return (rc); -} - -static int -cxgbe_nm_uninit_synchronized(struct port_info *pi) -{ -#ifdef INVARIANTS - struct adapter *sc = pi->adapter; -#endif - struct ifnet *ifp = pi->nm_ifp; - - ASSERT_SYNCHRONIZED_OP(sc); - - ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - - return (0); -} - -static int -cxgbe_nm_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data) -{ - int rc = 0, mtu, flags; - struct port_info *pi = ifp->if_softc; - struct adapter *sc = pi->adapter; - struct ifreq *ifr = (struct ifreq *)data; - uint32_t mask; - - MPASS(pi->nm_ifp == ifp); - - switch (cmd) { - case SIOCSIFMTU: - mtu = ifr->ifr_mtu; - if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) - return (EINVAL); - - rc = begin_synchronized_op(sc, pi, SLEEP_OK | INTR_OK, "t4nmtu"); - if (rc) - return (rc); - ifp->if_mtu = mtu; - if (ifp->if_drv_flags & IFF_DRV_RUNNING) - rc = update_mac_settings(ifp, XGMAC_MTU); - end_synchronized_op(sc, 0); - break; - - case SIOCSIFFLAGS: - rc = begin_synchronized_op(sc, pi, SLEEP_OK | INTR_OK, "t4nflg"); - if (rc) - return (rc); - - if (ifp->if_flags & IFF_UP) { - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - flags = pi->nmif_flags; - if ((ifp->if_flags ^ flags) & - (IFF_PROMISC | IFF_ALLMULTI)) { - rc = update_mac_settings(ifp, - XGMAC_PROMISC | XGMAC_ALLMULTI); - } - } else - rc = cxgbe_nm_init_synchronized(pi); - pi->nmif_flags = ifp->if_flags; - } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) - rc = cxgbe_nm_uninit_synchronized(pi); - end_synchronized_op(sc, 0); - break; - - case SIOCADDMULTI: - case SIOCDELMULTI: /* these two are called with a mutex held :-( */ - rc = begin_synchronized_op(sc, pi, HOLD_LOCK, "t4nmulti"); - if (rc) - return (rc); - if (ifp->if_drv_flags & IFF_DRV_RUNNING) - rc = update_mac_settings(ifp, XGMAC_MCADDRS); - end_synchronized_op(sc, LOCK_HELD); - break; - - case SIOCSIFCAP: - mask = ifr->ifr_reqcap ^ ifp->if_capenable; - if (mask & IFCAP_TXCSUM) { - ifp->if_capenable ^= IFCAP_TXCSUM; - ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP); - } - if (mask & IFCAP_TXCSUM_IPV6) { - ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; - ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); - } - if (mask & IFCAP_RXCSUM) - ifp->if_capenable ^= IFCAP_RXCSUM; - if (mask & IFCAP_RXCSUM_IPV6) - ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; - break; - - case SIOCSIFMEDIA: - case SIOCGIFMEDIA: - ifmedia_ioctl(ifp, ifr, &pi->nm_media, cmd); - break; - - default: - rc = ether_ioctl(ifp, cmd, data); - } - - return (rc); -} - -static int -cxgbe_nm_transmit(struct ifnet *ifp, struct mbuf *m) -{ - - m_freem(m); - return (0); -} - -static void -cxgbe_nm_qflush(struct ifnet *ifp) -{ - - return; -} - static int -alloc_nm_rxq_hwq(struct port_info *pi, struct sge_nm_rxq *nm_rxq, int cong) +alloc_nm_rxq_hwq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq, int cong) { int rc, cntxt_id, i; __be32 v; - struct adapter *sc = pi->adapter; - struct netmap_adapter *na = NA(pi->nm_ifp); + struct adapter *sc = vi->pi->adapter; + struct netmap_adapter *na = NA(vi->ifp); struct fw_iq_cmd c; MPASS(na != NULL); MPASS(nm_rxq->iq_desc != NULL); MPASS(nm_rxq->fl_desc != NULL); - bzero(nm_rxq->iq_desc, pi->qsize_rxq * IQ_ESIZE); + bzero(nm_rxq->iq_desc, vi->qsize_rxq * IQ_ESIZE); bzero(nm_rxq->fl_desc, na->num_rx_desc * EQ_ESIZE + spg_len); bzero(&c, sizeof(c)); @@ -275,7 +109,7 @@ alloc_nm_rxq_hwq(struct port_info *pi, struct sge_nm_rxq *nm_rxq, int cong) V_FW_IQ_CMD_VFN(0)); c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART | FW_LEN16(c)); - if (pi->flags & INTR_NM_RXQ) { + if (vi->flags & INTR_RXQ) { KASSERT(nm_rxq->intr_idx < sc->intr_count, ("%s: invalid direct intr_idx %d", __func__, nm_rxq->intr_idx)); @@ -287,13 +121,13 @@ alloc_nm_rxq_hwq(struct port_info *pi, struct sge_nm_rxq *nm_rxq, int cong) } c.type_to_iqandstindex = htobe32(v | V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | - V_FW_IQ_CMD_VIID(pi->nm_viid) | + V_FW_IQ_CMD_VIID(vi->viid) | V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT)); - c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) | + c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(vi->pi->tx_chan) | F_FW_IQ_CMD_IQGTSMODE | V_FW_IQ_CMD_IQINTCNTTHRESH(0) | V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4)); - c.iqsize = htobe16(pi->qsize_rxq); + c.iqsize = htobe16(vi->qsize_rxq); c.iqaddr = htobe64(nm_rxq->iq_ba); if (cong >= 0) { c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN | @@ -319,7 +153,7 @@ alloc_nm_rxq_hwq(struct port_info *pi, struct sge_nm_rxq *nm_rxq, int cong) } nm_rxq->iq_cidx = 0; - MPASS(nm_rxq->iq_sidx == pi->qsize_rxq - spg_len / IQ_ESIZE); + MPASS(nm_rxq->iq_sidx == vi->qsize_rxq - spg_len / IQ_ESIZE); nm_rxq->iq_gen = F_RSPD_GEN; nm_rxq->iq_cntxt_id = be16toh(c.iqid); nm_rxq->iq_abs_id = be16toh(c.physiqid); @@ -380,9 +214,9 @@ alloc_nm_rxq_hwq(struct port_info *pi, struct sge_nm_rxq *nm_rxq, int cong) } static int -free_nm_rxq_hwq(struct port_info *pi, struct sge_nm_rxq *nm_rxq) +free_nm_rxq_hwq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq) { - struct adapter *sc = pi->adapter; + struct adapter *sc = vi->pi->adapter; int rc; rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, FW_IQ_TYPE_FL_INT_CAP, @@ -394,12 +228,12 @@ free_nm_rxq_hwq(struct port_info *pi, struct sge_nm_rxq *nm_rxq) } static int -alloc_nm_txq_hwq(struct port_info *pi, struct sge_nm_txq *nm_txq) +alloc_nm_txq_hwq(struct vi_info *vi, struct sge_nm_txq *nm_txq) { int rc, cntxt_id; size_t len; - struct adapter *sc = pi->adapter; - struct netmap_adapter *na = NA(pi->nm_ifp); + struct adapter *sc = vi->pi->adapter; + struct netmap_adapter *na = NA(vi->ifp); struct fw_eq_eth_cmd c; MPASS(na != NULL); @@ -415,10 +249,10 @@ alloc_nm_txq_hwq(struct port_info *pi, struct sge_nm_txq *nm_txq) c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC | F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); c.autoequiqe_to_viid = htobe32(F_FW_EQ_ETH_CMD_AUTOEQUIQE | - F_FW_EQ_ETH_CMD_AUTOEQUEQE | V_FW_EQ_ETH_CMD_VIID(pi->nm_viid)); + F_FW_EQ_ETH_CMD_AUTOEQUEQE | V_FW_EQ_ETH_CMD_VIID(vi->viid)); c.fetchszm_to_iqid = htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) | - V_FW_EQ_ETH_CMD_PCIECHN(pi->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO | + V_FW_EQ_ETH_CMD_PCIECHN(vi->pi->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO | V_FW_EQ_ETH_CMD_IQID(sc->sge.nm_rxq[nm_txq->iqidx].iq_cntxt_id)); c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) | V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) | @@ -427,7 +261,7 @@ alloc_nm_txq_hwq(struct port_info *pi, struct sge_nm_txq *nm_txq) rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); if (rc != 0) { - device_printf(pi->dev, + device_printf(vi->dev, "failed to create netmap egress queue: %d\n", rc); return (rc); } @@ -467,9 +301,9 @@ alloc_nm_txq_hwq(struct port_info *pi, struct sge_nm_txq *nm_txq) } static int -free_nm_txq_hwq(struct port_info *pi, struct sge_nm_txq *nm_txq) +free_nm_txq_hwq(struct vi_info *vi, struct sge_nm_txq *nm_txq) { - struct adapter *sc = pi->adapter; + struct adapter *sc = vi->pi->adapter; int rc; rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, nm_txq->cntxt_id); @@ -480,7 +314,7 @@ free_nm_txq_hwq(struct port_info *pi, struct sge_nm_txq *nm_txq) } static int -cxgbe_netmap_on(struct adapter *sc, struct port_info *pi, struct ifnet *ifp, +cxgbe_netmap_on(struct adapter *sc, struct vi_info *vi, struct ifnet *ifp, struct netmap_adapter *na) { struct netmap_slot *slot; @@ -488,11 +322,10 @@ cxgbe_netmap_on(struct adapter *sc, struct port_info *pi, struct ifnet *ifp, struct sge_nm_txq *nm_txq; int rc, i, j, hwidx; struct hw_buf_info *hwb; - uint16_t *rss; ASSERT_SYNCHRONIZED_OP(sc); - if ((pi->flags & PORT_INIT_DONE) == 0 || + if ((vi->flags & VI_INIT_DONE) == 0 || (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return (EAGAIN); @@ -511,8 +344,10 @@ cxgbe_netmap_on(struct adapter *sc, struct port_info *pi, struct ifnet *ifp, /* Must set caps before calling netmap_reset */ nm_set_native_flags(na); - for_each_nm_rxq(pi, i, nm_rxq) { - alloc_nm_rxq_hwq(pi, nm_rxq, tnl_cong(pi, nm_cong_drop)); + for_each_nm_rxq(vi, i, nm_rxq) { + struct irq *irq = &sc->irq[vi->first_intr + i]; + + alloc_nm_rxq_hwq(vi, nm_rxq, tnl_cong(vi->pi, nm_cong_drop)); nm_rxq->fl_hwidx = hwidx; slot = netmap_reset(na, NR_RX, i, 0); MPASS(slot != NULL); /* XXXNM: error check, not assert */ @@ -533,38 +368,37 @@ cxgbe_netmap_on(struct adapter *sc, struct port_info *pi, struct ifnet *ifp, wmb(); t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), nm_rxq->fl_db_val | V_PIDX(j)); + + atomic_cmpset_int(&irq->nm_state, NM_OFF, NM_ON); } - for_each_nm_txq(pi, i, nm_txq) { - alloc_nm_txq_hwq(pi, nm_txq); + for_each_nm_txq(vi, i, nm_txq) { + alloc_nm_txq_hwq(vi, nm_txq); slot = netmap_reset(na, NR_TX, i, 0); MPASS(slot != NULL); /* XXXNM: error check, not assert */ } - rss = malloc(pi->nm_rss_size * sizeof (*rss), M_CXGBE, M_ZERO | - M_WAITOK); - for (i = 0; i < pi->nm_rss_size;) { - for_each_nm_rxq(pi, j, nm_rxq) { - rss[i++] = nm_rxq->iq_abs_id; - if (i == pi->nm_rss_size) + if (vi->nm_rss == NULL) { + vi->nm_rss = malloc(vi->rss_size * sizeof(uint16_t), M_CXGBE, + M_ZERO | M_WAITOK); + } + for (i = 0; i < vi->rss_size;) { + for_each_nm_rxq(vi, j, nm_rxq) { + vi->nm_rss[i++] = nm_rxq->iq_abs_id; + if (i == vi->rss_size) break; } } - rc = -t4_config_rss_range(sc, sc->mbox, pi->nm_viid, 0, pi->nm_rss_size, - rss, pi->nm_rss_size); + rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, + vi->nm_rss, vi->rss_size); if (rc != 0) if_printf(ifp, "netmap rss_config failed: %d\n", rc); - free(rss, M_CXGBE); - - rc = -t4_enable_vi(sc, sc->mbox, pi->nm_viid, true, true); - if (rc != 0) - if_printf(ifp, "netmap enable_vi failed: %d\n", rc); return (rc); } static int -cxgbe_netmap_off(struct adapter *sc, struct port_info *pi, struct ifnet *ifp, +cxgbe_netmap_off(struct adapter *sc, struct vi_info *vi, struct ifnet *ifp, struct netmap_adapter *na) { int rc, i; @@ -573,12 +407,16 @@ cxgbe_netmap_off(struct adapter *sc, struct port_info *pi, struct ifnet *ifp, ASSERT_SYNCHRONIZED_OP(sc); - rc = -t4_enable_vi(sc, sc->mbox, pi->nm_viid, false, false); + if ((vi->flags & VI_INIT_DONE) == 0) + return (0); + + rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, + vi->rss, vi->rss_size); if (rc != 0) - if_printf(ifp, "netmap disable_vi failed: %d\n", rc); + if_printf(ifp, "failed to restore RSS config: %d\n", rc); nm_clear_native_flags(na); - for_each_nm_txq(pi, i, nm_txq) { + for_each_nm_txq(vi, i, nm_txq) { struct sge_qstat *spg = (void *)&nm_txq->desc[nm_txq->sidx]; /* Wait for hw pidx to catch up ... */ @@ -589,10 +427,15 @@ cxgbe_netmap_off(struct adapter *sc, struct port_info *pi, struct ifnet *ifp, while (spg->pidx != spg->cidx) pause("nmcidx", 1); - free_nm_txq_hwq(pi, nm_txq); + free_nm_txq_hwq(vi, nm_txq); } - for_each_nm_rxq(pi, i, nm_rxq) { - free_nm_rxq_hwq(pi, nm_rxq); + for_each_nm_rxq(vi, i, nm_rxq) { + struct irq *irq = &sc->irq[vi->first_intr + i]; + + while (!atomic_cmpset_int(&irq->nm_state, NM_ON, NM_OFF)) + pause("nmst", 1); + + free_nm_rxq_hwq(vi, nm_rxq); } return (rc); @@ -602,17 +445,17 @@ static int cxgbe_netmap_reg(struct netmap_adapter *na, int on) { struct ifnet *ifp = na->ifp; - struct port_info *pi = ifp->if_softc; - struct adapter *sc = pi->adapter; + struct vi_info *vi = ifp->if_softc; + struct adapter *sc = vi->pi->adapter; int rc; - rc = begin_synchronized_op(sc, pi, SLEEP_OK | INTR_OK, "t4nmreg"); + rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4nmreg"); if (rc != 0) return (rc); if (on) - rc = cxgbe_netmap_on(sc, pi, ifp, na); + rc = cxgbe_netmap_on(sc, vi, ifp, na); else - rc = cxgbe_netmap_off(sc, pi, ifp, na); + rc = cxgbe_netmap_off(sc, vi, ifp, na); end_synchronized_op(sc, 0); return (rc); @@ -861,9 +704,9 @@ cxgbe_netmap_txsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct ifnet *ifp = na->ifp; - struct port_info *pi = ifp->if_softc; - struct adapter *sc = pi->adapter; - struct sge_nm_txq *nm_txq = &sc->sge.nm_txq[pi->first_nm_txq + kring->ring_id]; + struct vi_info *vi = ifp->if_softc; + struct adapter *sc = vi->pi->adapter; + struct sge_nm_txq *nm_txq = &sc->sge.nm_txq[vi->first_nm_txq + kring->ring_id]; const u_int head = kring->rhead; u_int reclaimed = 0; int n, d, npkt_remaining, ndesc_remaining, txcsum; @@ -928,9 +771,9 @@ cxgbe_netmap_rxsync(struct netmap_kring *kring, int flags) struct netmap_adapter *na = kring->na; struct netmap_ring *ring = kring->ring; struct ifnet *ifp = na->ifp; - struct port_info *pi = ifp->if_softc; - struct adapter *sc = pi->adapter; - struct sge_nm_rxq *nm_rxq = &sc->sge.nm_rxq[pi->first_nm_rxq + kring->ring_id]; + struct vi_info *vi = ifp->if_softc; + struct adapter *sc = vi->pi->adapter; + struct sge_nm_rxq *nm_rxq = &sc->sge.nm_rxq[vi->first_nm_rxq + kring->ring_id]; u_int const head = nm_rxsync_prologue(kring); u_int n; int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; @@ -998,83 +841,26 @@ cxgbe_netmap_rxsync(struct netmap_kring *kring, int flags) return (0); } -/* - * Create an ifnet solely for netmap use and register it with the kernel. - */ -int -create_netmap_ifnet(struct port_info *pi) +void +cxgbe_nm_attach(struct vi_info *vi) { - struct adapter *sc = pi->adapter; + struct port_info *pi; + struct adapter *sc; struct netmap_adapter na; - struct ifnet *ifp; - device_t dev = pi->dev; - uint8_t mac[ETHER_ADDR_LEN]; - int rc; - - if (pi->nnmtxq <= 0 || pi->nnmrxq <= 0) - return (0); - MPASS(pi->nm_ifp == NULL); - - /* - * Allocate a virtual interface exclusively for netmap use. Give it the - * MAC address normally reserved for use by a TOE interface. (The TOE - * driver on FreeBSD doesn't use it). - */ - rc = t4_alloc_vi_func(sc, sc->mbox, pi->tx_chan, sc->pf, 0, 1, &mac[0], - &pi->nm_rss_size, FW_VI_FUNC_OFLD, 0); - if (rc < 0) { - device_printf(dev, "unable to allocate netmap virtual " - "interface for port %d: %d\n", pi->port_id, -rc); - return (-rc); - } - pi->nm_viid = rc; - pi->nm_xact_addr_filt = -1; - - ifp = if_alloc(IFT_ETHER); - if (ifp == NULL) { - device_printf(dev, "Cannot allocate netmap ifnet\n"); - return (ENOMEM); - } - pi->nm_ifp = ifp; - ifp->if_softc = pi; - - if_initname(ifp, is_t4(pi->adapter) ? "ncxgbe" : "ncxl", - device_get_unit(dev)); - ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; - ifp->if_init = cxgbe_nm_init; - ifp->if_ioctl = cxgbe_nm_ioctl; - ifp->if_transmit = cxgbe_nm_transmit; - ifp->if_qflush = cxgbe_nm_qflush; + MPASS(vi->nnmrxq > 0); + MPASS(vi->ifp != NULL); - /* - * netmap(4) says "netmap does not use features such as checksum - * offloading, TCP segmentation offloading, encryption, VLAN - * encapsulation/decapsulation, etc." - * - * By default we comply with the statement above. But we do declare the - * ifnet capable of L3/L4 checksumming so that a user can override - * netmap and have the hardware do the L3/L4 checksums. - */ - ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_JUMBO_MTU | - IFCAP_HWCSUM_IPV6; - ifp->if_capenable = 0; - ifp->if_hwassist = 0; - - /* nm_media has already been setup by the caller */ + pi = vi->pi; + sc = pi->adapter; - ether_ifattach(ifp, mac); - - /* - * Register with netmap in the kernel. - */ bzero(&na, sizeof(na)); - na.ifp = pi->nm_ifp; + na.ifp = vi->ifp; na.na_flags = NAF_BDG_MAYSLEEP; /* Netmap doesn't know about the space reserved for the status page. */ - na.num_tx_desc = pi->qsize_txq - spg_len / EQ_ESIZE; + na.num_tx_desc = vi->qsize_txq - spg_len / EQ_ESIZE; /* * The freelist's cidx/pidx drives netmap's rx cidx/pidx. So @@ -1082,32 +868,23 @@ create_netmap_ifnet(struct port_info *pi) * freelist, and not the number of entries in the iq. (These two are * not exactly the same due to the space taken up by the status page). */ - na.num_rx_desc = (pi->qsize_rxq / 8) * 8; + na.num_rx_desc = (vi->qsize_rxq / 8) * 8; na.nm_txsync = cxgbe_netmap_txsync; na.nm_rxsync = cxgbe_netmap_rxsync; na.nm_register = cxgbe_netmap_reg; - na.num_tx_rings = pi->nnmtxq; - na.num_rx_rings = pi->nnmrxq; + na.num_tx_rings = vi->nnmtxq; + na.num_rx_rings = vi->nnmrxq; netmap_attach(&na); /* This adds IFCAP_NETMAP to if_capabilities */ - - return (0); } -int -destroy_netmap_ifnet(struct port_info *pi) +void +cxgbe_nm_detach(struct vi_info *vi) { - struct adapter *sc = pi->adapter; - if (pi->nm_ifp == NULL) - return (0); - - netmap_detach(pi->nm_ifp); - ifmedia_removeall(&pi->nm_media); - ether_ifdetach(pi->nm_ifp); - if_free(pi->nm_ifp); - t4_free_vi(sc, sc->mbox, sc->pf, 0, pi->nm_viid); + MPASS(vi->nnmrxq > 0); + MPASS(vi->ifp != NULL); - return (0); + netmap_detach(vi->ifp); } static void @@ -1134,9 +911,9 @@ void t4_nm_intr(void *arg) { struct sge_nm_rxq *nm_rxq = arg; - struct port_info *pi = nm_rxq->pi; - struct adapter *sc = pi->adapter; - struct ifnet *ifp = pi->nm_ifp; + struct vi_info *vi = nm_rxq->vi; + struct adapter *sc = vi->pi->adapter; + struct ifnet *ifp = vi->ifp; struct netmap_adapter *na = NA(ifp); struct netmap_kring *kring = &na->rx_rings[nm_rxq->nid]; struct netmap_ring *ring = kring->ring; diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c index 5c7eb48..5216dd3 100644 --- a/sys/dev/cxgbe/t4_sge.c +++ b/sys/dev/cxgbe/t4_sge.c @@ -171,44 +171,44 @@ static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *, bus_addr_t *, void **); static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t, void *); -static int alloc_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *, +static int alloc_iq_fl(struct vi_info *, struct sge_iq *, struct sge_fl *, int, int); -static int free_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *); +static int free_iq_fl(struct vi_info *, struct sge_iq *, struct sge_fl *); static void add_fl_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *, struct sge_fl *); static int alloc_fwq(struct adapter *); static int free_fwq(struct adapter *); static int alloc_mgmtq(struct adapter *); static int free_mgmtq(struct adapter *); -static int alloc_rxq(struct port_info *, struct sge_rxq *, int, int, +static int alloc_rxq(struct vi_info *, struct sge_rxq *, int, int, struct sysctl_oid *); -static int free_rxq(struct port_info *, struct sge_rxq *); +static int free_rxq(struct vi_info *, struct sge_rxq *); #ifdef TCP_OFFLOAD -static int alloc_ofld_rxq(struct port_info *, struct sge_ofld_rxq *, int, int, +static int alloc_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *, int, int, struct sysctl_oid *); -static int free_ofld_rxq(struct port_info *, struct sge_ofld_rxq *); +static int free_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *); #endif #ifdef DEV_NETMAP -static int alloc_nm_rxq(struct port_info *, struct sge_nm_rxq *, int, int, +static int alloc_nm_rxq(struct vi_info *, struct sge_nm_rxq *, int, int, struct sysctl_oid *); -static int free_nm_rxq(struct port_info *, struct sge_nm_rxq *); -static int alloc_nm_txq(struct port_info *, struct sge_nm_txq *, int, int, +static int free_nm_rxq(struct vi_info *, struct sge_nm_rxq *); +static int alloc_nm_txq(struct vi_info *, struct sge_nm_txq *, int, int, struct sysctl_oid *); -static int free_nm_txq(struct port_info *, struct sge_nm_txq *); +static int free_nm_txq(struct vi_info *, struct sge_nm_txq *); #endif static int ctrl_eq_alloc(struct adapter *, struct sge_eq *); -static int eth_eq_alloc(struct adapter *, struct port_info *, struct sge_eq *); +static int eth_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *); #ifdef TCP_OFFLOAD -static int ofld_eq_alloc(struct adapter *, struct port_info *, struct sge_eq *); +static int ofld_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *); #endif -static int alloc_eq(struct adapter *, struct port_info *, struct sge_eq *); +static int alloc_eq(struct adapter *, struct vi_info *, struct sge_eq *); static int free_eq(struct adapter *, struct sge_eq *); -static int alloc_wrq(struct adapter *, struct port_info *, struct sge_wrq *, +static int alloc_wrq(struct adapter *, struct vi_info *, struct sge_wrq *, struct sysctl_oid *); static int free_wrq(struct adapter *, struct sge_wrq *); -static int alloc_txq(struct port_info *, struct sge_txq *, int, +static int alloc_txq(struct vi_info *, struct sge_txq *, int, struct sysctl_oid *); -static int free_txq(struct port_info *, struct sge_txq *); +static int free_txq(struct vi_info *, struct sge_txq *); static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int); static inline void ring_fl_db(struct adapter *, struct sge_fl *); static int refill_fl(struct adapter *, struct sge_fl *, int); @@ -833,51 +833,25 @@ t4_teardown_adapter_queues(struct adapter *sc) } static inline int -port_intr_count(struct port_info *pi) +first_vector(struct vi_info *vi) { - int rc = 0; - - if (pi->flags & INTR_RXQ) - rc += pi->nrxq; -#ifdef TCP_OFFLOAD - if (pi->flags & INTR_OFLD_RXQ) - rc += pi->nofldrxq; -#endif -#ifdef DEV_NETMAP - if (pi->flags & INTR_NM_RXQ) - rc += pi->nnmrxq; -#endif - return (rc); -} - -static inline int -first_vector(struct port_info *pi) -{ - struct adapter *sc = pi->adapter; - int rc = T4_EXTRA_INTR, i; + struct adapter *sc = vi->pi->adapter; if (sc->intr_count == 1) return (0); - for_each_port(sc, i) { - if (i == pi->port_id) - break; - - rc += port_intr_count(sc->port[i]); - } - - return (rc); + return (vi->first_intr); } /* * Given an arbitrary "index," come up with an iq that can be used by other - * queues (of this port) for interrupt forwarding, SGE egress updates, etc. + * queues (of this VI) for interrupt forwarding, SGE egress updates, etc. * The iq returned is guaranteed to be something that takes direct interrupts. */ static struct sge_iq * -port_intr_iq(struct port_info *pi, int idx) +vi_intr_iq(struct vi_info *vi, int idx) { - struct adapter *sc = pi->adapter; + struct adapter *sc = vi->pi->adapter; struct sge *s = &sc->sge; struct sge_iq *iq = NULL; int nintr, i; @@ -885,43 +859,35 @@ port_intr_iq(struct port_info *pi, int idx) if (sc->intr_count == 1) return (&sc->sge.fwq); - nintr = port_intr_count(pi); + nintr = vi->nintr; KASSERT(nintr != 0, - ("%s: pi %p has no exclusive interrupts, total interrupts = %d", - __func__, pi, sc->intr_count)); -#ifdef DEV_NETMAP - /* Exclude netmap queues as they can't take anyone else's interrupts */ - if (pi->flags & INTR_NM_RXQ) - nintr -= pi->nnmrxq; - KASSERT(nintr > 0, - ("%s: pi %p has nintr %d after netmap adjustment of %d", __func__, - pi, nintr, pi->nnmrxq)); -#endif + ("%s: vi %p has no exclusive interrupts, total interrupts = %d", + __func__, vi, sc->intr_count)); i = idx % nintr; - if (pi->flags & INTR_RXQ) { - if (i < pi->nrxq) { - iq = &s->rxq[pi->first_rxq + i].iq; + if (vi->flags & INTR_RXQ) { + if (i < vi->nrxq) { + iq = &s->rxq[vi->first_rxq + i].iq; goto done; } - i -= pi->nrxq; + i -= vi->nrxq; } #ifdef TCP_OFFLOAD - if (pi->flags & INTR_OFLD_RXQ) { - if (i < pi->nofldrxq) { - iq = &s->ofld_rxq[pi->first_ofld_rxq + i].iq; + if (vi->flags & INTR_OFLD_RXQ) { + if (i < vi->nofldrxq) { + iq = &s->ofld_rxq[vi->first_ofld_rxq + i].iq; goto done; } - i -= pi->nofldrxq; + i -= vi->nofldrxq; } #endif - panic("%s: pi %p, intr_flags 0x%lx, idx %d, total intr %d\n", __func__, - pi, pi->flags & INTR_ALL, idx, nintr); + panic("%s: vi %p, intr_flags 0x%lx, idx %d, total intr %d\n", __func__, + vi, vi->flags & INTR_ALL, idx, nintr); done: MPASS(iq != NULL); KASSERT(iq->flags & IQ_INTR, - ("%s: iq %p (port %p, intr_flags 0x%lx, idx %d)", __func__, iq, pi, - pi->flags & INTR_ALL, idx)); + ("%s: iq %p (vi %p, intr_flags 0x%lx, idx %d)", __func__, iq, vi, + vi->flags & INTR_ALL, idx)); return (iq); } @@ -948,7 +914,7 @@ mtu_to_max_payload(struct adapter *sc, int mtu, const int toe) } int -t4_setup_port_queues(struct port_info *pi) +t4_setup_vi_queues(struct vi_info *vi) { int rc = 0, i, j, intr_idx, iqid; struct sge_rxq *rxq; @@ -959,18 +925,55 @@ t4_setup_port_queues(struct port_info *pi) struct sge_wrq *ofld_txq; #endif #ifdef DEV_NETMAP + int saved_idx; struct sge_nm_rxq *nm_rxq; struct sge_nm_txq *nm_txq; #endif char name[16]; + struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; - struct ifnet *ifp = pi->ifp; - struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev); + struct ifnet *ifp = vi->ifp; + struct sysctl_oid *oid = device_get_sysctl_tree(vi->dev); struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); int maxp, mtu = ifp->if_mtu; /* Interrupt vector to start from (when using multiple vectors) */ - intr_idx = first_vector(pi); + intr_idx = first_vector(vi); + +#ifdef DEV_NETMAP + saved_idx = intr_idx; + if (ifp->if_capabilities & IFCAP_NETMAP) { + + /* netmap is supported with direct interrupts only. */ + MPASS(vi->flags & INTR_RXQ); + + /* + * We don't have buffers to back the netmap rx queues + * right now so we create the queues in a way that + * doesn't set off any congestion signal in the chip. + */ + oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "nm_rxq", + CTLFLAG_RD, NULL, "rx queues"); + for_each_nm_rxq(vi, i, nm_rxq) { + rc = alloc_nm_rxq(vi, nm_rxq, intr_idx, i, oid); + if (rc != 0) + goto done; + intr_idx++; + } + + oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "nm_txq", + CTLFLAG_RD, NULL, "tx queues"); + for_each_nm_txq(vi, i, nm_txq) { + iqid = vi->first_nm_rxq + (i % vi->nnmrxq); + rc = alloc_nm_txq(vi, nm_txq, iqid, i, oid); + if (rc != 0) + goto done; + } + } + + /* Normal rx queues and netmap rx queues share the same interrupts. */ + intr_idx = saved_idx; +#endif /* * First pass over all NIC and TOE rx queues: @@ -978,62 +981,49 @@ t4_setup_port_queues(struct port_info *pi) * b) allocate queue iff it will take direct interrupts. */ maxp = mtu_to_max_payload(sc, mtu, 0); - if (pi->flags & INTR_RXQ) { - oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "rxq", + if (vi->flags & INTR_RXQ) { + oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "rxq", CTLFLAG_RD, NULL, "rx queues"); } - for_each_rxq(pi, i, rxq) { + for_each_rxq(vi, i, rxq) { - init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, pi->qsize_rxq); + init_iq(&rxq->iq, sc, vi->tmr_idx, vi->pktc_idx, vi->qsize_rxq); snprintf(name, sizeof(name), "%s rxq%d-fl", - device_get_nameunit(pi->dev), i); - init_fl(sc, &rxq->fl, pi->qsize_rxq / 8, maxp, name); + device_get_nameunit(vi->dev), i); + init_fl(sc, &rxq->fl, vi->qsize_rxq / 8, maxp, name); - if (pi->flags & INTR_RXQ) { + if (vi->flags & INTR_RXQ) { rxq->iq.flags |= IQ_INTR; - rc = alloc_rxq(pi, rxq, intr_idx, i, oid); + rc = alloc_rxq(vi, rxq, intr_idx, i, oid); if (rc != 0) goto done; intr_idx++; } } +#ifdef DEV_NETMAP + if (ifp->if_capabilities & IFCAP_NETMAP) + intr_idx = saved_idx + max(vi->nrxq, vi->nnmrxq); +#endif #ifdef TCP_OFFLOAD maxp = mtu_to_max_payload(sc, mtu, 1); - if (is_offload(sc) && pi->flags & INTR_OFLD_RXQ) { - oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_rxq", + if (vi->flags & INTR_OFLD_RXQ) { + oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_rxq", CTLFLAG_RD, NULL, "rx queues for offloaded TCP connections"); } - for_each_ofld_rxq(pi, i, ofld_rxq) { + for_each_ofld_rxq(vi, i, ofld_rxq) { - init_iq(&ofld_rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, - pi->qsize_rxq); + init_iq(&ofld_rxq->iq, sc, vi->tmr_idx, vi->pktc_idx, + vi->qsize_rxq); snprintf(name, sizeof(name), "%s ofld_rxq%d-fl", - device_get_nameunit(pi->dev), i); - init_fl(sc, &ofld_rxq->fl, pi->qsize_rxq / 8, maxp, name); + device_get_nameunit(vi->dev), i); + init_fl(sc, &ofld_rxq->fl, vi->qsize_rxq / 8, maxp, name); - if (pi->flags & INTR_OFLD_RXQ) { + if (vi->flags & INTR_OFLD_RXQ) { ofld_rxq->iq.flags |= IQ_INTR; - rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx, i, oid); - if (rc != 0) - goto done; - intr_idx++; - } - } -#endif -#ifdef DEV_NETMAP - /* - * We don't have buffers to back the netmap rx queues right now so we - * create the queues in a way that doesn't set off any congestion signal - * in the chip. - */ - if (pi->flags & INTR_NM_RXQ) { - oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "nm_rxq", - CTLFLAG_RD, NULL, "rx queues for netmap"); - for_each_nm_rxq(pi, i, nm_rxq) { - rc = alloc_nm_rxq(pi, nm_rxq, intr_idx, i, oid); + rc = alloc_ofld_rxq(vi, ofld_rxq, intr_idx, i, oid); if (rc != 0) goto done; intr_idx++; @@ -1046,88 +1036,73 @@ t4_setup_port_queues(struct port_info *pi) * their interrupts are allocated now. */ j = 0; - if (!(pi->flags & INTR_RXQ)) { - oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "rxq", + if (!(vi->flags & INTR_RXQ)) { + oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "rxq", CTLFLAG_RD, NULL, "rx queues"); - for_each_rxq(pi, i, rxq) { + for_each_rxq(vi, i, rxq) { MPASS(!(rxq->iq.flags & IQ_INTR)); - intr_idx = port_intr_iq(pi, j)->abs_id; + intr_idx = vi_intr_iq(vi, j)->abs_id; - rc = alloc_rxq(pi, rxq, intr_idx, i, oid); + rc = alloc_rxq(vi, rxq, intr_idx, i, oid); if (rc != 0) goto done; j++; } } #ifdef TCP_OFFLOAD - if (is_offload(sc) && !(pi->flags & INTR_OFLD_RXQ)) { - oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_rxq", + if (vi->nofldrxq != 0 && !(vi->flags & INTR_OFLD_RXQ)) { + oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_rxq", CTLFLAG_RD, NULL, "rx queues for offloaded TCP connections"); - for_each_ofld_rxq(pi, i, ofld_rxq) { + for_each_ofld_rxq(vi, i, ofld_rxq) { MPASS(!(ofld_rxq->iq.flags & IQ_INTR)); - intr_idx = port_intr_iq(pi, j)->abs_id; + intr_idx = vi_intr_iq(vi, j)->abs_id; - rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx, i, oid); + rc = alloc_ofld_rxq(vi, ofld_rxq, intr_idx, i, oid); if (rc != 0) goto done; j++; } } #endif -#ifdef DEV_NETMAP - if (!(pi->flags & INTR_NM_RXQ)) - CXGBE_UNIMPLEMENTED(__func__); -#endif /* * Now the tx queues. Only one pass needed. */ - oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "txq", CTLFLAG_RD, + oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "txq", CTLFLAG_RD, NULL, "tx queues"); j = 0; - for_each_txq(pi, i, txq) { - iqid = port_intr_iq(pi, j)->cntxt_id; + for_each_txq(vi, i, txq) { + iqid = vi_intr_iq(vi, j)->cntxt_id; snprintf(name, sizeof(name), "%s txq%d", - device_get_nameunit(pi->dev), i); - init_eq(&txq->eq, EQ_ETH, pi->qsize_txq, pi->tx_chan, iqid, + device_get_nameunit(vi->dev), i); + init_eq(&txq->eq, EQ_ETH, vi->qsize_txq, pi->tx_chan, iqid, name); - rc = alloc_txq(pi, txq, i, oid); + rc = alloc_txq(vi, txq, i, oid); if (rc != 0) goto done; j++; } #ifdef TCP_OFFLOAD - oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_txq", + oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_txq", CTLFLAG_RD, NULL, "tx queues for offloaded TCP connections"); - for_each_ofld_txq(pi, i, ofld_txq) { + for_each_ofld_txq(vi, i, ofld_txq) { struct sysctl_oid *oid2; - iqid = port_intr_iq(pi, j)->cntxt_id; + iqid = vi_intr_iq(vi, j)->cntxt_id; snprintf(name, sizeof(name), "%s ofld_txq%d", - device_get_nameunit(pi->dev), i); - init_eq(&ofld_txq->eq, EQ_OFLD, pi->qsize_txq, pi->tx_chan, + device_get_nameunit(vi->dev), i); + init_eq(&ofld_txq->eq, EQ_OFLD, vi->qsize_txq, pi->tx_chan, iqid, name); snprintf(name, sizeof(name), "%d", i); - oid2 = SYSCTL_ADD_NODE(&pi->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, + oid2 = SYSCTL_ADD_NODE(&vi->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, name, CTLFLAG_RD, NULL, "offload tx queue"); - rc = alloc_wrq(sc, pi, ofld_txq, oid2); - if (rc != 0) - goto done; - j++; - } -#endif -#ifdef DEV_NETMAP - oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "nm_txq", - CTLFLAG_RD, NULL, "tx queues for netmap use"); - for_each_nm_txq(pi, i, nm_txq) { - iqid = pi->first_nm_rxq + (j % pi->nnmrxq); - rc = alloc_nm_txq(pi, nm_txq, iqid, i, oid); + rc = alloc_wrq(sc, vi, ofld_txq, oid2); if (rc != 0) goto done; j++; @@ -1137,17 +1112,19 @@ t4_setup_port_queues(struct port_info *pi) /* * Finally, the control queue. */ - oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ctrlq", CTLFLAG_RD, + if (!IS_MAIN_VI(vi)) + goto done; + oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ctrlq", CTLFLAG_RD, NULL, "ctrl queue"); ctrlq = &sc->sge.ctrlq[pi->port_id]; - iqid = port_intr_iq(pi, 0)->cntxt_id; - snprintf(name, sizeof(name), "%s ctrlq", device_get_nameunit(pi->dev)); + iqid = vi_intr_iq(vi, 0)->cntxt_id; + snprintf(name, sizeof(name), "%s ctrlq", device_get_nameunit(vi->dev)); init_eq(&ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, pi->tx_chan, iqid, name); - rc = alloc_wrq(sc, pi, ctrlq, oid); + rc = alloc_wrq(sc, vi, ctrlq, oid); done: if (rc) - t4_teardown_port_queues(pi); + t4_teardown_vi_queues(vi); return (rc); } @@ -1156,9 +1133,10 @@ done: * Idempotent */ int -t4_teardown_port_queues(struct port_info *pi) +t4_teardown_vi_queues(struct vi_info *vi) { int i; + struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct sge_rxq *rxq; struct sge_txq *txq; @@ -1172,63 +1150,68 @@ t4_teardown_port_queues(struct port_info *pi) #endif /* Do this before freeing the queues */ - if (pi->flags & PORT_SYSCTL_CTX) { - sysctl_ctx_free(&pi->ctx); - pi->flags &= ~PORT_SYSCTL_CTX; + if (vi->flags & VI_SYSCTL_CTX) { + sysctl_ctx_free(&vi->ctx); + vi->flags &= ~VI_SYSCTL_CTX; } +#ifdef DEV_NETMAP + if (vi->ifp->if_capabilities & IFCAP_NETMAP) { + for_each_nm_txq(vi, i, nm_txq) { + free_nm_txq(vi, nm_txq); + } + + for_each_nm_rxq(vi, i, nm_rxq) { + free_nm_rxq(vi, nm_rxq); + } + } +#endif + /* * Take down all the tx queues first, as they reference the rx queues * (for egress updates, etc.). */ - free_wrq(sc, &sc->sge.ctrlq[pi->port_id]); + if (IS_MAIN_VI(vi)) + free_wrq(sc, &sc->sge.ctrlq[pi->port_id]); - for_each_txq(pi, i, txq) { - free_txq(pi, txq); + for_each_txq(vi, i, txq) { + free_txq(vi, txq); } #ifdef TCP_OFFLOAD - for_each_ofld_txq(pi, i, ofld_txq) { + for_each_ofld_txq(vi, i, ofld_txq) { free_wrq(sc, ofld_txq); } #endif -#ifdef DEV_NETMAP - for_each_nm_txq(pi, i, nm_txq) - free_nm_txq(pi, nm_txq); -#endif /* * Then take down the rx queues that forward their interrupts, as they * reference other rx queues. */ - for_each_rxq(pi, i, rxq) { + for_each_rxq(vi, i, rxq) { if ((rxq->iq.flags & IQ_INTR) == 0) - free_rxq(pi, rxq); + free_rxq(vi, rxq); } #ifdef TCP_OFFLOAD - for_each_ofld_rxq(pi, i, ofld_rxq) { + for_each_ofld_rxq(vi, i, ofld_rxq) { if ((ofld_rxq->iq.flags & IQ_INTR) == 0) - free_ofld_rxq(pi, ofld_rxq); + free_ofld_rxq(vi, ofld_rxq); } #endif -#ifdef DEV_NETMAP - for_each_nm_rxq(pi, i, nm_rxq) - free_nm_rxq(pi, nm_rxq); -#endif /* * Then take down the rx queues that take direct interrupts. */ - for_each_rxq(pi, i, rxq) { + for_each_rxq(vi, i, rxq) { if (rxq->iq.flags & IQ_INTR) - free_rxq(pi, rxq); + free_rxq(vi, rxq); } #ifdef TCP_OFFLOAD - for_each_ofld_rxq(pi, i, ofld_rxq) { + for_each_ofld_rxq(vi, i, ofld_rxq) { if (ofld_rxq->iq.flags & IQ_INTR) - free_ofld_rxq(pi, ofld_rxq); + free_ofld_rxq(vi, ofld_rxq); } #endif @@ -1284,6 +1267,21 @@ t4_intr(void *arg) } } +void +t4_vi_intr(void *arg) +{ + struct irq *irq = arg; + +#ifdef DEV_NETMAP + if (atomic_cmpset_int(&irq->nm_state, NM_ON, NM_BUSY)) { + t4_nm_intr(irq->nm_rxq); + atomic_cmpset_int(&irq->nm_state, NM_BUSY, NM_ON); + } +#endif + if (irq->rxq != NULL) + t4_intr(irq->rxq); +} + /* * Deals with anything and everything on the given ingress queue. */ @@ -1887,8 +1885,8 @@ t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct wrqe *wr) void t4_update_fl_bufsize(struct ifnet *ifp) { - struct port_info *pi = ifp->if_softc; - struct adapter *sc = pi->adapter; + struct vi_info *vi = ifp->if_softc; + struct adapter *sc = vi->pi->adapter; struct sge_rxq *rxq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; @@ -1897,7 +1895,7 @@ t4_update_fl_bufsize(struct ifnet *ifp) int i, maxp, mtu = ifp->if_mtu; maxp = mtu_to_max_payload(sc, mtu, 0); - for_each_rxq(pi, i, rxq) { + for_each_rxq(vi, i, rxq) { fl = &rxq->fl; FL_LOCK(fl); @@ -1906,7 +1904,7 @@ t4_update_fl_bufsize(struct ifnet *ifp) } #ifdef TCP_OFFLOAD maxp = mtu_to_max_payload(sc, mtu, 1); - for_each_ofld_rxq(pi, i, ofld_rxq) { + for_each_ofld_rxq(vi, i, ofld_rxq) { fl = &ofld_rxq->fl; FL_LOCK(fl); @@ -2328,7 +2326,8 @@ eth_tx(struct mp_ring *r, u_int cidx, u_int pidx) struct sge_txq *txq = r->cookie; struct sge_eq *eq = &txq->eq; struct ifnet *ifp = txq->ifp; - struct port_info *pi = (void *)ifp->if_softc; + struct vi_info *vi = ifp->if_softc; + struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; u_int total, remaining; /* # of packets */ u_int available, dbdiff; /* # of hardware descriptors */ @@ -2556,12 +2555,13 @@ free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map, * the abs_id of the ingress queue to which its interrupts should be forwarded. */ static int -alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl, +alloc_iq_fl(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl, int intr_idx, int cong) { int rc, i, cntxt_id; size_t len; struct fw_iq_cmd c; + struct port_info *pi = vi->pi; struct adapter *sc = iq->adapter; __be32 v = 0; @@ -2592,7 +2592,7 @@ alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl, c.type_to_iqandstindex = htobe32(v | V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | - V_FW_IQ_CMD_VIID(pi->viid) | + V_FW_IQ_CMD_VIID(vi->viid) | V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT)); c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) | F_FW_IQ_CMD_IQGTSMODE | @@ -2744,7 +2744,7 @@ alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl, } static int -free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl) +free_iq_fl(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl) { int rc; struct adapter *sc = iq->adapter; @@ -2753,7 +2753,7 @@ free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl) if (sc == NULL) return (0); /* nothing to do */ - dev = pi ? pi->dev : sc->dev; + dev = vi ? vi->dev : sc->dev; if (iq->flags & IQ_ALLOCATED) { rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, @@ -2835,7 +2835,7 @@ alloc_fwq(struct adapter *sc) init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE); fwq->flags |= IQ_INTR; /* always */ intr_idx = sc->intr_count > 1 ? 1 : 0; - rc = alloc_iq_fl(sc->port[0], fwq, NULL, intr_idx, -1); + rc = alloc_iq_fl(&sc->port[0]->vi[0], fwq, NULL, intr_idx, -1); if (rc != 0) { device_printf(sc->dev, "failed to create firmware event queue: %d\n", rc); @@ -2910,15 +2910,15 @@ tnl_cong(struct port_info *pi, int drop) } static int -alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx, +alloc_rxq(struct vi_info *vi, struct sge_rxq *rxq, int intr_idx, int idx, struct sysctl_oid *oid) { int rc; struct sysctl_oid_list *children; char name[16]; - rc = alloc_iq_fl(pi, &rxq->iq, &rxq->fl, intr_idx, - tnl_cong(pi, cong_drop)); + rc = alloc_iq_fl(vi, &rxq->iq, &rxq->fl, intr_idx, + tnl_cong(vi->pi, cong_drop)); if (rc != 0) return (rc); @@ -2927,55 +2927,55 @@ alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx, * fill it up a bit more. */ FL_LOCK(&rxq->fl); - refill_fl(pi->adapter, &rxq->fl, 128); + refill_fl(vi->pi->adapter, &rxq->fl, 128); FL_UNLOCK(&rxq->fl); #if defined(INET) || defined(INET6) rc = tcp_lro_init(&rxq->lro); if (rc != 0) return (rc); - rxq->lro.ifp = pi->ifp; /* also indicates LRO init'ed */ + rxq->lro.ifp = vi->ifp; /* also indicates LRO init'ed */ - if (pi->ifp->if_capenable & IFCAP_LRO) + if (vi->ifp->if_capenable & IFCAP_LRO) rxq->iq.flags |= IQ_LRO_ENABLED; #endif - rxq->ifp = pi->ifp; + rxq->ifp = vi->ifp; children = SYSCTL_CHILDREN(oid); snprintf(name, sizeof(name), "%d", idx); - oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD, + oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL, "rx queue"); children = SYSCTL_CHILDREN(oid); - SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id", + SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "abs_id", CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.abs_id, 0, sysctl_uint16, "I", "absolute id of the queue"); - SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id", + SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cntxt_id", CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cntxt_id, 0, sysctl_uint16, "I", "SGE context id of the queue"); - SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx", + SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx", CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cidx, 0, sysctl_uint16, "I", "consumer index"); #if defined(INET) || defined(INET6) - SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD, + SYSCTL_ADD_INT(&vi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD, &rxq->lro.lro_queued, 0, NULL); - SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD, + SYSCTL_ADD_INT(&vi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD, &rxq->lro.lro_flushed, 0, NULL); #endif - SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD, + SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD, &rxq->rxcsum, "# of times hardware assisted with checksum"); - SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_extraction", + SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vlan_extraction", CTLFLAG_RD, &rxq->vlan_extraction, "# of times hardware extracted 802.1Q tag"); - add_fl_sysctls(&pi->ctx, oid, &rxq->fl); + add_fl_sysctls(&vi->ctx, oid, &rxq->fl); return (rc); } static int -free_rxq(struct port_info *pi, struct sge_rxq *rxq) +free_rxq(struct vi_info *vi, struct sge_rxq *rxq) { int rc; @@ -2986,7 +2986,7 @@ free_rxq(struct port_info *pi, struct sge_rxq *rxq) } #endif - rc = free_iq_fl(pi, &rxq->iq, &rxq->fl); + rc = free_iq_fl(vi, &rxq->iq, &rxq->fl); if (rc == 0) bzero(rxq, sizeof(*rxq)); @@ -2995,46 +2995,46 @@ free_rxq(struct port_info *pi, struct sge_rxq *rxq) #ifdef TCP_OFFLOAD static int -alloc_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq, +alloc_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq, int intr_idx, int idx, struct sysctl_oid *oid) { int rc; struct sysctl_oid_list *children; char name[16]; - rc = alloc_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl, intr_idx, - pi->rx_chan_map); + rc = alloc_iq_fl(vi, &ofld_rxq->iq, &ofld_rxq->fl, intr_idx, + vi->pi->rx_chan_map); if (rc != 0) return (rc); children = SYSCTL_CHILDREN(oid); snprintf(name, sizeof(name), "%d", idx); - oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD, + oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL, "rx queue"); children = SYSCTL_CHILDREN(oid); - SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id", + SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "abs_id", CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.abs_id, 0, sysctl_uint16, "I", "absolute id of the queue"); - SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id", + SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cntxt_id", CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cntxt_id, 0, sysctl_uint16, "I", "SGE context id of the queue"); - SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx", + SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx", CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cidx, 0, sysctl_uint16, "I", "consumer index"); - add_fl_sysctls(&pi->ctx, oid, &ofld_rxq->fl); + add_fl_sysctls(&vi->ctx, oid, &ofld_rxq->fl); return (rc); } static int -free_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq) +free_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq) { int rc; - rc = free_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl); + rc = free_iq_fl(vi, &ofld_rxq->iq, &ofld_rxq->fl); if (rc == 0) bzero(ofld_rxq, sizeof(*ofld_rxq)); @@ -3044,7 +3044,7 @@ free_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq) #ifdef DEV_NETMAP static int -alloc_nm_rxq(struct port_info *pi, struct sge_nm_rxq *nm_rxq, int intr_idx, +alloc_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq, int intr_idx, int idx, struct sysctl_oid *oid) { int rc; @@ -3052,12 +3052,12 @@ alloc_nm_rxq(struct port_info *pi, struct sge_nm_rxq *nm_rxq, int intr_idx, struct sysctl_ctx_list *ctx; char name[16]; size_t len; - struct adapter *sc = pi->adapter; - struct netmap_adapter *na = NA(pi->nm_ifp); + struct adapter *sc = vi->pi->adapter; + struct netmap_adapter *na = NA(vi->ifp); MPASS(na != NULL); - len = pi->qsize_rxq * IQ_ESIZE; + len = vi->qsize_rxq * IQ_ESIZE; rc = alloc_ring(sc, len, &nm_rxq->iq_desc_tag, &nm_rxq->iq_desc_map, &nm_rxq->iq_ba, (void **)&nm_rxq->iq_desc); if (rc != 0) @@ -3069,16 +3069,16 @@ alloc_nm_rxq(struct port_info *pi, struct sge_nm_rxq *nm_rxq, int intr_idx, if (rc != 0) return (rc); - nm_rxq->pi = pi; + nm_rxq->vi = vi; nm_rxq->nid = idx; nm_rxq->iq_cidx = 0; - nm_rxq->iq_sidx = pi->qsize_rxq - spg_len / IQ_ESIZE; + nm_rxq->iq_sidx = vi->qsize_rxq - spg_len / IQ_ESIZE; nm_rxq->iq_gen = F_RSPD_GEN; nm_rxq->fl_pidx = nm_rxq->fl_cidx = 0; nm_rxq->fl_sidx = na->num_rx_desc; nm_rxq->intr_idx = intr_idx; - ctx = &pi->ctx; + ctx = &vi->ctx; children = SYSCTL_CHILDREN(oid); snprintf(name, sizeof(name), "%d", idx); @@ -3114,9 +3114,9 @@ alloc_nm_rxq(struct port_info *pi, struct sge_nm_rxq *nm_rxq, int intr_idx, static int -free_nm_rxq(struct port_info *pi, struct sge_nm_rxq *nm_rxq) +free_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq) { - struct adapter *sc = pi->adapter; + struct adapter *sc = vi->pi->adapter; free_ring(sc, nm_rxq->iq_desc_tag, nm_rxq->iq_desc_map, nm_rxq->iq_ba, nm_rxq->iq_desc); @@ -3127,13 +3127,14 @@ free_nm_rxq(struct port_info *pi, struct sge_nm_rxq *nm_rxq) } static int -alloc_nm_txq(struct port_info *pi, struct sge_nm_txq *nm_txq, int iqidx, int idx, +alloc_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq, int iqidx, int idx, struct sysctl_oid *oid) { int rc; size_t len; + struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; - struct netmap_adapter *na = NA(pi->nm_ifp); + struct netmap_adapter *na = NA(vi->ifp); char name[16]; struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); @@ -3148,19 +3149,20 @@ alloc_nm_txq(struct port_info *pi, struct sge_nm_txq *nm_txq, int iqidx, int idx nm_txq->nid = idx; nm_txq->iqidx = iqidx; nm_txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | - V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf)); + V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_VF_VLD(1) | + V_TXPKT_VF(vi->viid)); snprintf(name, sizeof(name), "%d", idx); - oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD, + oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL, "netmap tx queue"); children = SYSCTL_CHILDREN(oid); - SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, + SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, &nm_txq->cntxt_id, 0, "SGE context id of the queue"); - SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx", + SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx", CTLTYPE_INT | CTLFLAG_RD, &nm_txq->cidx, 0, sysctl_uint16, "I", "consumer index"); - SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "pidx", + SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "pidx", CTLTYPE_INT | CTLFLAG_RD, &nm_txq->pidx, 0, sysctl_uint16, "I", "producer index"); @@ -3168,9 +3170,9 @@ alloc_nm_txq(struct port_info *pi, struct sge_nm_txq *nm_txq, int iqidx, int idx } static int -free_nm_txq(struct port_info *pi, struct sge_nm_txq *nm_txq) +free_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq) { - struct adapter *sc = pi->adapter; + struct adapter *sc = vi->pi->adapter; free_ring(sc, nm_txq->desc_tag, nm_txq->desc_map, nm_txq->ba, nm_txq->desc); @@ -3224,7 +3226,7 @@ ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq) } static int -eth_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq) +eth_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) { int rc, cntxt_id; struct fw_eq_eth_cmd c; @@ -3238,7 +3240,7 @@ eth_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq) c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC | F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); c.autoequiqe_to_viid = htobe32(F_FW_EQ_ETH_CMD_AUTOEQUIQE | - F_FW_EQ_ETH_CMD_AUTOEQUEQE | V_FW_EQ_ETH_CMD_VIID(pi->viid)); + F_FW_EQ_ETH_CMD_AUTOEQUEQE | V_FW_EQ_ETH_CMD_VIID(vi->viid)); c.fetchszm_to_iqid = htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) | V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO | @@ -3250,7 +3252,7 @@ eth_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq) rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); if (rc != 0) { - device_printf(pi->dev, + device_printf(vi->dev, "failed to create Ethernet egress queue: %d\n", rc); return (rc); } @@ -3268,7 +3270,7 @@ eth_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq) #ifdef TCP_OFFLOAD static int -ofld_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq) +ofld_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) { int rc, cntxt_id; struct fw_eq_ofld_cmd c; @@ -3293,7 +3295,7 @@ ofld_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq) rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); if (rc != 0) { - device_printf(pi->dev, + device_printf(vi->dev, "failed to create egress queue for TCP offload: %d\n", rc); return (rc); } @@ -3311,7 +3313,7 @@ ofld_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq) #endif static int -alloc_eq(struct adapter *sc, struct port_info *pi, struct sge_eq *eq) +alloc_eq(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) { int rc, qsize; size_t len; @@ -3335,12 +3337,12 @@ alloc_eq(struct adapter *sc, struct port_info *pi, struct sge_eq *eq) break; case EQ_ETH: - rc = eth_eq_alloc(sc, pi, eq); + rc = eth_eq_alloc(sc, vi, eq); break; #ifdef TCP_OFFLOAD case EQ_OFLD: - rc = ofld_eq_alloc(sc, pi, eq); + rc = ofld_eq_alloc(sc, vi, eq); break; #endif @@ -3423,14 +3425,14 @@ free_eq(struct adapter *sc, struct sge_eq *eq) } static int -alloc_wrq(struct adapter *sc, struct port_info *pi, struct sge_wrq *wrq, +alloc_wrq(struct adapter *sc, struct vi_info *vi, struct sge_wrq *wrq, struct sysctl_oid *oid) { int rc; - struct sysctl_ctx_list *ctx = pi ? &pi->ctx : &sc->ctx; + struct sysctl_ctx_list *ctx = vi ? &vi->ctx : &sc->ctx; struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); - rc = alloc_eq(sc, pi, &wrq->eq); + rc = alloc_eq(sc, vi, &wrq->eq); if (rc) return (rc); @@ -3471,10 +3473,11 @@ free_wrq(struct adapter *sc, struct sge_wrq *wrq) } static int -alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx, +alloc_txq(struct vi_info *vi, struct sge_txq *txq, int idx, struct sysctl_oid *oid) { int rc; + struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct sge_eq *eq = &txq->eq; char name[16]; @@ -3487,7 +3490,7 @@ alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx, return (rc); } - rc = alloc_eq(sc, pi, eq); + rc = alloc_eq(sc, vi, eq); if (rc != 0) { mp_ring_free(txq->r); txq->r = NULL; @@ -3497,69 +3500,70 @@ alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx, /* Can't fail after this point. */ TASK_INIT(&txq->tx_reclaim_task, 0, tx_reclaim, eq); - txq->ifp = pi->ifp; + txq->ifp = vi->ifp; txq->gl = sglist_alloc(TX_SGL_SEGS, M_WAITOK); txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | - V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf)); + V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_VF_VLD(1) | + V_TXPKT_VF(vi->viid)); txq->sdesc = malloc(eq->sidx * sizeof(struct tx_sdesc), M_CXGBE, M_ZERO | M_WAITOK); snprintf(name, sizeof(name), "%d", idx); - oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD, + oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL, "tx queue"); children = SYSCTL_CHILDREN(oid); - SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, + SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, &eq->cntxt_id, 0, "SGE context id of the queue"); - SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx", + SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx", CTLTYPE_INT | CTLFLAG_RD, &eq->cidx, 0, sysctl_uint16, "I", "consumer index"); - SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "pidx", + SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "pidx", CTLTYPE_INT | CTLFLAG_RD, &eq->pidx, 0, sysctl_uint16, "I", "producer index"); - SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD, + SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD, &txq->txcsum, "# of times hardware assisted with checksum"); - SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_insertion", + SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vlan_insertion", CTLFLAG_RD, &txq->vlan_insertion, "# of times hardware inserted 802.1Q tag"); - SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD, + SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD, &txq->tso_wrs, "# of TSO work requests"); - SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD, + SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD, &txq->imm_wrs, "# of work requests with immediate data"); - SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD, + SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD, &txq->sgl_wrs, "# of work requests with direct SGL"); - SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD, + SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD, &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)"); - SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts0_wrs", + SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts0_wrs", CTLFLAG_RD, &txq->txpkts0_wrs, "# of txpkts (type 0) work requests"); - SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts1_wrs", + SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts1_wrs", CTLFLAG_RD, &txq->txpkts1_wrs, "# of txpkts (type 1) work requests"); - SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts0_pkts", + SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts0_pkts", CTLFLAG_RD, &txq->txpkts0_pkts, "# of frames tx'd using type0 txpkts work requests"); - SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts1_pkts", + SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts1_pkts", CTLFLAG_RD, &txq->txpkts1_pkts, "# of frames tx'd using type1 txpkts work requests"); - SYSCTL_ADD_COUNTER_U64(&pi->ctx, children, OID_AUTO, "r_enqueues", + SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_enqueues", CTLFLAG_RD, &txq->r->enqueues, "# of enqueues to the mp_ring for this queue"); - SYSCTL_ADD_COUNTER_U64(&pi->ctx, children, OID_AUTO, "r_drops", + SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_drops", CTLFLAG_RD, &txq->r->drops, "# of drops in the mp_ring for this queue"); - SYSCTL_ADD_COUNTER_U64(&pi->ctx, children, OID_AUTO, "r_starts", + SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_starts", CTLFLAG_RD, &txq->r->starts, "# of normal consumer starts in the mp_ring for this queue"); - SYSCTL_ADD_COUNTER_U64(&pi->ctx, children, OID_AUTO, "r_stalls", + SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_stalls", CTLFLAG_RD, &txq->r->stalls, "# of consumer stalls in the mp_ring for this queue"); - SYSCTL_ADD_COUNTER_U64(&pi->ctx, children, OID_AUTO, "r_restarts", + SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_restarts", CTLFLAG_RD, &txq->r->restarts, "# of consumer restarts in the mp_ring for this queue"); - SYSCTL_ADD_COUNTER_U64(&pi->ctx, children, OID_AUTO, "r_abdications", + SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_abdications", CTLFLAG_RD, &txq->r->abdications, "# of consumer abdications in the mp_ring for this queue"); @@ -3567,10 +3571,10 @@ alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx, } static int -free_txq(struct port_info *pi, struct sge_txq *txq) +free_txq(struct vi_info *vi, struct sge_txq *txq) { int rc; - struct adapter *sc = pi->adapter; + struct adapter *sc = vi->pi->adapter; struct sge_eq *eq = &txq->eq; rc = free_eq(sc, eq); @@ -3750,7 +3754,7 @@ refill_sfl(void *arg) struct adapter *sc = arg; struct sge_fl *fl, *fl_temp; - mtx_lock(&sc->sfl_lock); + mtx_assert(&sc->sfl_lock, MA_OWNED); TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) { FL_LOCK(fl); refill_fl(sc, fl, 64); @@ -3763,7 +3767,6 @@ refill_sfl(void *arg) if (!TAILQ_EMPTY(&sc->sfl)) callout_schedule(&sc->sfl_callout, hz / 5); - mtx_unlock(&sc->sfl_lock); } static int diff --git a/sys/dev/cxgbe/tom/t4_connect.c b/sys/dev/cxgbe/tom/t4_connect.c index 941f4d4..60f1e6c 100644 --- a/sys/dev/cxgbe/tom/t4_connect.c +++ b/sys/dev/cxgbe/tom/t4_connect.c @@ -233,7 +233,7 @@ static uint32_t calc_opt2a(struct socket *so, struct toepcb *toep) { struct tcpcb *tp = so_sototcpcb(so); - struct port_info *pi = toep->port; + struct port_info *pi = toep->vi->pi; struct adapter *sc = pi->adapter; uint32_t opt2; @@ -321,7 +321,7 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt, struct toepcb *toep = NULL; struct wrqe *wr = NULL; struct ifnet *rt_ifp = rt->rt_ifp; - struct port_info *pi; + struct vi_info *vi; int mtu_idx, rscale, qid_atid, rc, isipv6; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); @@ -332,17 +332,17 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt, ("%s: dest addr %p has family %u", __func__, nam, nam->sa_family)); if (rt_ifp->if_type == IFT_ETHER) - pi = rt_ifp->if_softc; + vi = rt_ifp->if_softc; else if (rt_ifp->if_type == IFT_L2VLAN) { struct ifnet *ifp = VLAN_COOKIE(rt_ifp); - pi = ifp->if_softc; + vi = ifp->if_softc; } else if (rt_ifp->if_type == IFT_IEEE8023ADLAG) DONT_OFFLOAD_ACTIVE_OPEN(ENOSYS); /* XXX: implement lagg+TOE */ else DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP); - toep = alloc_toepcb(pi, -1, -1, M_NOWAIT); + toep = alloc_toepcb(vi, -1, -1, M_NOWAIT); if (toep == NULL) DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); @@ -350,7 +350,7 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt, if (toep->tid < 0) DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); - toep->l2te = t4_l2t_get(pi, rt_ifp, + toep->l2te = t4_l2t_get(vi->pi, rt_ifp, rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway : nam); if (toep->l2te == NULL) DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); @@ -398,13 +398,13 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt, if (is_t4(sc)) { INIT_TP_WR(cpl, 0); - cpl->params = select_ntuple(pi, toep->l2te); + cpl->params = select_ntuple(vi, toep->l2te); } else { struct cpl_t5_act_open_req6 *c5 = (void *)cpl; INIT_TP_WR(c5, 0); c5->iss = htobe32(tp->iss); - c5->params = select_ntuple(pi, toep->l2te); + c5->params = select_ntuple(vi, toep->l2te); } OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, qid_atid)); @@ -414,7 +414,7 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt, cpl->peer_port = inp->inp_fport; cpl->peer_ip_hi = *(uint64_t *)&inp->in6p_faddr.s6_addr[0]; cpl->peer_ip_lo = *(uint64_t *)&inp->in6p_faddr.s6_addr[8]; - cpl->opt0 = calc_opt0(so, pi, toep->l2te, mtu_idx, rscale, + cpl->opt0 = calc_opt0(so, vi, toep->l2te, mtu_idx, rscale, toep->rx_credits, toep->ulp_mode); cpl->opt2 = calc_opt2a(so, toep); } else { @@ -422,19 +422,19 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt, if (is_t4(sc)) { INIT_TP_WR(cpl, 0); - cpl->params = select_ntuple(pi, toep->l2te); + cpl->params = select_ntuple(vi, toep->l2te); } else { struct cpl_t5_act_open_req *c5 = (void *)cpl; INIT_TP_WR(c5, 0); c5->iss = htobe32(tp->iss); - c5->params = select_ntuple(pi, toep->l2te); + c5->params = select_ntuple(vi, toep->l2te); } OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, qid_atid)); inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port, &cpl->peer_ip, &cpl->peer_port); - cpl->opt0 = calc_opt0(so, pi, toep->l2te, mtu_idx, rscale, + cpl->opt0 = calc_opt0(so, vi, toep->l2te, mtu_idx, rscale, toep->rx_credits, toep->ulp_mode); cpl->opt2 = calc_opt2a(so, toep); } diff --git a/sys/dev/cxgbe/tom/t4_cpl_io.c b/sys/dev/cxgbe/tom/t4_cpl_io.c index 6935b44..f1ac76e 100644 --- a/sys/dev/cxgbe/tom/t4_cpl_io.c +++ b/sys/dev/cxgbe/tom/t4_cpl_io.c @@ -104,9 +104,10 @@ send_flowc_wr(struct toepcb *toep, struct flowc_tx_params *ftxp) struct wrqe *wr; struct fw_flowc_wr *flowc; unsigned int nparams = ftxp ? 8 : 6, flowclen; - struct port_info *pi = toep->port; + struct vi_info *vi = toep->vi; + struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; - unsigned int pfvf = G_FW_VIID_PFN(pi->viid) << S_FW_VIID_PFN; + unsigned int pfvf = G_FW_VIID_PFN(vi->viid) << S_FW_VIID_PFN; struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; KASSERT(!(toep->flags & TPF_FLOWC_WR_SENT), @@ -513,7 +514,7 @@ write_tx_wr(void *dst, struct toepcb *toep, unsigned int immdlen, if (txalign > 0) { struct tcpcb *tp = intotcpcb(toep->inp); - if (plen < 2 * tp->t_maxseg || is_10G_port(toep->port)) + if (plen < 2 * tp->t_maxseg || is_10G_port(toep->vi->pi)) txwr->lsodisable_to_flags |= htobe32(F_FW_OFLD_TX_DATA_WR_LSODISABLE); else diff --git a/sys/dev/cxgbe/tom/t4_listen.c b/sys/dev/cxgbe/tom/t4_listen.c index 9a71221..fa2385d 100644 --- a/sys/dev/cxgbe/tom/t4_listen.c +++ b/sys/dev/cxgbe/tom/t4_listen.c @@ -72,7 +72,7 @@ static void free_stid(struct adapter *, struct listen_ctx *); /* lctx services */ static struct listen_ctx *alloc_lctx(struct adapter *, struct inpcb *, - struct port_info *); + struct vi_info *); static int free_lctx(struct adapter *, struct listen_ctx *); static void hold_lctx(struct listen_ctx *); static void listen_hash_add(struct adapter *, struct listen_ctx *); @@ -80,7 +80,7 @@ static struct listen_ctx *listen_hash_find(struct adapter *, struct inpcb *); static struct listen_ctx *listen_hash_del(struct adapter *, struct inpcb *); static struct inpcb *release_lctx(struct adapter *, struct listen_ctx *); -static inline void save_qids_in_mbuf(struct mbuf *, struct port_info *); +static inline void save_qids_in_mbuf(struct mbuf *, struct vi_info *); static inline void get_qids_from_mbuf(struct mbuf *m, int *, int *); static void send_reset_synqe(struct toedev *, struct synq_entry *); @@ -187,7 +187,7 @@ free_stid(struct adapter *sc, struct listen_ctx *lctx) } static struct listen_ctx * -alloc_lctx(struct adapter *sc, struct inpcb *inp, struct port_info *pi) +alloc_lctx(struct adapter *sc, struct inpcb *inp, struct vi_info *vi) { struct listen_ctx *lctx; @@ -214,8 +214,8 @@ alloc_lctx(struct adapter *sc, struct inpcb *inp, struct port_info *pi) } } - lctx->ctrlq = &sc->sge.ctrlq[pi->port_id]; - lctx->ofld_rxq = &sc->sge.ofld_rxq[pi->first_ofld_rxq]; + lctx->ctrlq = &sc->sge.ctrlq[vi->pi->port_id]; + lctx->ofld_rxq = &sc->sge.ofld_rxq[vi->first_ofld_rxq]; refcount_init(&lctx->refcount, 1); TAILQ_INIT(&lctx->synq); @@ -346,7 +346,8 @@ send_reset_synqe(struct toedev *tod, struct synq_entry *synqe) struct adapter *sc = tod->tod_softc; struct mbuf *m = synqe->syn; struct ifnet *ifp = m->m_pkthdr.rcvif; - struct port_info *pi = ifp->if_softc; + struct vi_info *vi = ifp->if_softc; + struct port_info *pi = vi->pi; struct l2t_entry *e = &sc->l2t->l2tab[synqe->l2e_idx]; struct wrqe *wr; struct fw_flowc_wr *flowc; @@ -355,7 +356,7 @@ send_reset_synqe(struct toedev *tod, struct synq_entry *synqe) struct sge_wrq *ofld_txq; struct sge_ofld_rxq *ofld_rxq; const int nparams = 6; - unsigned int pfvf = G_FW_VIID_PFN(pi->viid) << S_FW_VIID_PFN; + unsigned int pfvf = G_FW_VIID_PFN(vi->viid) << S_FW_VIID_PFN; INP_WLOCK_ASSERT(synqe->lctx->inp); @@ -495,17 +496,18 @@ destroy_server(struct adapter *sc, struct listen_ctx *lctx) /* * Start a listening server by sending a passive open request to HW. * - * Can't take adapter lock here and access to sc->flags, sc->open_device_map, + * Can't take adapter lock here and access to sc->flags, * sc->offload_map, if_capenable are all race prone. */ int t4_listen_start(struct toedev *tod, struct tcpcb *tp) { struct adapter *sc = tod->tod_softc; + struct vi_info *vi; struct port_info *pi; struct inpcb *inp = tp->t_inpcb; struct listen_ctx *lctx; - int i, rc; + int i, rc, v; INP_WLOCK_ASSERT(inp); @@ -527,12 +529,9 @@ t4_listen_start(struct toedev *tod, struct tcpcb *tp) ("%s: TOM not initialized", __func__)); #endif - if ((sc->open_device_map & sc->offload_map) == 0) - goto done; /* no port that's UP with IFCAP_TOE enabled */ - /* - * Find a running port with IFCAP_TOE (4 or 6). We'll use the first - * such port's queues to send the passive open and receive the reply to + * Find an initialized VI with IFCAP_TOE (4 or 6). We'll use the first + * such VI's queues to send the passive open and receive the reply to * it. * * XXX: need a way to mark a port in use by offload. if_cxgbe should @@ -540,18 +539,20 @@ t4_listen_start(struct toedev *tod, struct tcpcb *tp) * attempts to disable IFCAP_TOE on that port too?). */ for_each_port(sc, i) { - if (isset(&sc->open_device_map, i) && - sc->port[i]->ifp->if_capenable & IFCAP_TOE) - break; + pi = sc->port[i]; + for_each_vi(pi, v, vi) { + if (vi->flags & VI_INIT_DONE && + vi->ifp->if_capenable & IFCAP_TOE) + goto found; + } } - KASSERT(i < sc->params.nports, - ("%s: no running port with TOE capability enabled.", __func__)); - pi = sc->port[i]; + goto done; /* no port that's UP with IFCAP_TOE enabled */ +found: if (listen_hash_find(sc, inp) != NULL) goto done; /* already setup */ - lctx = alloc_lctx(sc, inp, pi); + lctx = alloc_lctx(sc, inp, vi); if (lctx == NULL) { log(LOG_ERR, "%s: listen request ignored, %s couldn't allocate lctx\n", @@ -822,7 +823,7 @@ done_with_synqe(struct adapter *sc, struct synq_entry *synqe) { struct listen_ctx *lctx = synqe->lctx; struct inpcb *inp = lctx->inp; - struct port_info *pi = synqe->syn->m_pkthdr.rcvif->if_softc; + struct vi_info *vi = synqe->syn->m_pkthdr.rcvif->if_softc; struct l2t_entry *e = &sc->l2t->l2tab[synqe->l2e_idx]; INP_WLOCK_ASSERT(inp); @@ -832,7 +833,7 @@ done_with_synqe(struct adapter *sc, struct synq_entry *synqe) if (inp) INP_WUNLOCK(inp); remove_tid(sc, synqe->tid); - release_tid(sc, synqe->tid, &sc->sge.ctrlq[pi->port_id]); + release_tid(sc, synqe->tid, &sc->sge.ctrlq[vi->pi->port_id]); t4_l2t_release(e); release_synqe(synqe); /* removed from synq list */ } @@ -943,12 +944,12 @@ t4_offload_socket(struct toedev *tod, void *arg, struct socket *so) } static inline void -save_qids_in_mbuf(struct mbuf *m, struct port_info *pi) +save_qids_in_mbuf(struct mbuf *m, struct vi_info *vi) { uint32_t txqid, rxqid; - txqid = (arc4random() % pi->nofldtxq) + pi->first_ofld_txq; - rxqid = (arc4random() % pi->nofldrxq) + pi->first_ofld_rxq; + txqid = (arc4random() % vi->nofldtxq) + vi->first_ofld_txq; + rxqid = (arc4random() % vi->nofldrxq) + vi->first_ofld_rxq; m->m_pkthdr.flowid = (txqid << 16) | (rxqid & 0xffff); } @@ -1224,11 +1225,12 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, struct tcphdr th; struct tcpopt to; struct port_info *pi; + struct vi_info *vi; struct ifnet *hw_ifp, *ifp; struct l2t_entry *e = NULL; int rscale, mtu_idx, rx_credits, rxqid, ulp_mode; struct synq_entry *synqe = NULL; - int reject_reason; + int reject_reason, v; uint16_t vid; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); @@ -1245,7 +1247,26 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, t4opt_to_tcpopt(&cpl->tcpopt, &to); pi = sc->port[G_SYN_INTF(be16toh(cpl->l2info))]; - hw_ifp = pi->ifp; /* the cxgbeX ifnet */ + + /* + * Use the MAC index to lookup the associated VI. If this SYN + * didn't match a perfect MAC filter, punt. + */ + if (!(be16toh(cpl->l2info) & F_SYN_XACT_MATCH)) { + m_freem(m); + m = NULL; + REJECT_PASS_ACCEPT(); + } + for_each_vi(pi, v, vi) { + if (vi->xact_addr_filt == G_SYN_MAC_IDX(be16toh(cpl->l2info))) + goto found; + } + m_freem(m); + m = NULL; + REJECT_PASS_ACCEPT(); + +found: + hw_ifp = vi->ifp; /* the (v)cxgbeX ifnet */ m->m_pkthdr.rcvif = hw_ifp; tod = TOEDEV(hw_ifp); @@ -1311,7 +1332,7 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, REJECT_PASS_ACCEPT(); rpl = wrtod(wr); - INP_INFO_WLOCK(&V_tcbinfo); /* for 4-tuple check, syncache_add */ + INP_INFO_WLOCK(&V_tcbinfo); /* for 4-tuple check */ /* Don't offload if the 4-tuple is already in use */ if (toe_4tuple_check(&inc, &th, ifp) != 0) { @@ -1319,6 +1340,7 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, free(wr, M_CXGBE); REJECT_PASS_ACCEPT(); } + INP_INFO_WUNLOCK(&V_tcbinfo); inp = lctx->inp; /* listening socket, not owned by TOE */ INP_WLOCK(inp); @@ -1331,7 +1353,6 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, * resources tied to this listen context. */ INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_tcbinfo); free(wr, M_CXGBE); REJECT_PASS_ACCEPT(); } @@ -1344,7 +1365,7 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ); SOCKBUF_UNLOCK(&so->so_rcv); - save_qids_in_mbuf(m, pi); + save_qids_in_mbuf(m, vi); get_qids_from_mbuf(m, NULL, &rxqid); if (is_t4(sc)) @@ -1359,7 +1380,7 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, synqe->flags |= TPF_SYNQE_TCPDDP; } else ulp_mode = ULP_MODE_NONE; - rpl->opt0 = calc_opt0(so, pi, e, mtu_idx, rscale, rx_credits, ulp_mode); + rpl->opt0 = calc_opt0(so, vi, e, mtu_idx, rscale, rx_credits, ulp_mode); rpl->opt2 = calc_opt2p(sc, pi, rxqid, &cpl->tcpopt, &th, ulp_mode); synqe->tid = tid; @@ -1378,12 +1399,10 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, /* * If all goes well t4_syncache_respond will get called during - * syncache_add. Also note that syncache_add releases both pcbinfo and - * pcb locks. + * syncache_add. Note that syncache_add releases the pcb lock. */ toe_syncache_add(&inc, &to, &th, inp, tod, synqe); INP_UNLOCK_ASSERT(inp); /* ok to assert, we have a ref on the inp */ - INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); /* * If we replied during syncache_add (synqe->wr has been consumed), @@ -1486,7 +1505,7 @@ do_pass_establish(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; - struct port_info *pi; + struct vi_info *vi; struct ifnet *ifp; const struct cpl_pass_establish *cpl = (const void *)(rss + 1); #if defined(KTR) || defined(INVARIANTS) @@ -1534,16 +1553,16 @@ do_pass_establish(struct sge_iq *iq, const struct rss_header *rss, } ifp = synqe->syn->m_pkthdr.rcvif; - pi = ifp->if_softc; - KASSERT(pi->adapter == sc, - ("%s: pi %p, sc %p mismatch", __func__, pi, sc)); + vi = ifp->if_softc; + KASSERT(vi->pi->adapter == sc, + ("%s: vi %p, sc %p mismatch", __func__, vi, sc)); get_qids_from_mbuf(synqe->syn, &txqid, &rxqid); KASSERT(rxqid == iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0], ("%s: CPL arrived on unexpected rxq. %d %d", __func__, rxqid, (int)(iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0]))); - toep = alloc_toepcb(pi, txqid, rxqid, M_NOWAIT); + toep = alloc_toepcb(vi, txqid, rxqid, M_NOWAIT); if (toep == NULL) { reset: /* diff --git a/sys/dev/cxgbe/tom/t4_tom.c b/sys/dev/cxgbe/tom/t4_tom.c index 2d88d3a..8d3cb2f 100644 --- a/sys/dev/cxgbe/tom/t4_tom.c +++ b/sys/dev/cxgbe/tom/t4_tom.c @@ -104,8 +104,9 @@ static eventhandler_tag ifaddr_evhandler; static struct timeout_task clip_task; struct toepcb * -alloc_toepcb(struct port_info *pi, int txqid, int rxqid, int flags) +alloc_toepcb(struct vi_info *vi, int txqid, int rxqid, int flags) { + struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct toepcb *toep; int tx_credits, txsd_total, len; @@ -127,18 +128,18 @@ alloc_toepcb(struct port_info *pi, int txqid, int rxqid, int flags) howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16); if (txqid < 0) - txqid = (arc4random() % pi->nofldtxq) + pi->first_ofld_txq; - KASSERT(txqid >= pi->first_ofld_txq && - txqid < pi->first_ofld_txq + pi->nofldtxq, - ("%s: txqid %d for port %p (first %d, n %d)", __func__, txqid, pi, - pi->first_ofld_txq, pi->nofldtxq)); + txqid = (arc4random() % vi->nofldtxq) + vi->first_ofld_txq; + KASSERT(txqid >= vi->first_ofld_txq && + txqid < vi->first_ofld_txq + vi->nofldtxq, + ("%s: txqid %d for vi %p (first %d, n %d)", __func__, txqid, vi, + vi->first_ofld_txq, vi->nofldtxq)); if (rxqid < 0) - rxqid = (arc4random() % pi->nofldrxq) + pi->first_ofld_rxq; - KASSERT(rxqid >= pi->first_ofld_rxq && - rxqid < pi->first_ofld_rxq + pi->nofldrxq, - ("%s: rxqid %d for port %p (first %d, n %d)", __func__, rxqid, pi, - pi->first_ofld_rxq, pi->nofldrxq)); + rxqid = (arc4random() % vi->nofldrxq) + vi->first_ofld_rxq; + KASSERT(rxqid >= vi->first_ofld_rxq && + rxqid < vi->first_ofld_rxq + vi->nofldrxq, + ("%s: rxqid %d for vi %p (first %d, n %d)", __func__, rxqid, vi, + vi->first_ofld_rxq, vi->nofldrxq)); len = offsetof(struct toepcb, txsd) + txsd_total * sizeof(struct ofld_tx_sdesc); @@ -148,7 +149,7 @@ alloc_toepcb(struct port_info *pi, int txqid, int rxqid, int flags) return (NULL); toep->td = sc->tom_softc; - toep->port = pi; + toep->vi = vi; toep->tx_total = tx_credits; toep->tx_credits = tx_credits; toep->ofld_txq = &sc->sge.ofld_txq[txqid]; @@ -509,7 +510,7 @@ extern int always_keepalive; * socket so could be a listening socket too. */ uint64_t -calc_opt0(struct socket *so, struct port_info *pi, struct l2t_entry *e, +calc_opt0(struct socket *so, struct vi_info *vi, struct l2t_entry *e, int mtu_idx, int rscale, int rx_credits, int ulp_mode) { uint64_t opt0; @@ -533,20 +534,20 @@ calc_opt0(struct socket *so, struct port_info *pi, struct l2t_entry *e, if (e != NULL) opt0 |= V_L2T_IDX(e->idx); - if (pi != NULL) { - opt0 |= V_SMAC_SEL(VIID_SMACIDX(pi->viid)); - opt0 |= V_TX_CHAN(pi->tx_chan); + if (vi != NULL) { + opt0 |= V_SMAC_SEL(VIID_SMACIDX(vi->viid)); + opt0 |= V_TX_CHAN(vi->pi->tx_chan); } return htobe64(opt0); } uint64_t -select_ntuple(struct port_info *pi, struct l2t_entry *e) +select_ntuple(struct vi_info *vi, struct l2t_entry *e) { - struct adapter *sc = pi->adapter; + struct adapter *sc = vi->pi->adapter; struct tp_params *tp = &sc->params.tp; - uint16_t viid = pi->viid; + uint16_t viid = vi->viid; uint64_t ntuple = 0; /* @@ -961,7 +962,8 @@ t4_tom_activate(struct adapter *sc) { struct tom_data *td; struct toedev *tod; - int i, rc; + struct vi_info *vi; + int i, rc, v; ASSERT_SYNCHRONIZED_OP(sc); @@ -1020,8 +1022,11 @@ t4_tom_activate(struct adapter *sc) tod->tod_offload_socket = t4_offload_socket; tod->tod_ctloutput = t4_ctloutput; - for_each_port(sc, i) - TOEDEV(sc->port[i]->ifp) = &td->tod; + for_each_port(sc, i) { + for_each_vi(sc->port[i], v, vi) { + TOEDEV(vi->ifp) = &td->tod; + } + } sc->tom_softc = td; register_toedev(sc->tom_softc); diff --git a/sys/dev/cxgbe/tom/t4_tom.h b/sys/dev/cxgbe/tom/t4_tom.h index c54f6be..cc5d3b4 100644 --- a/sys/dev/cxgbe/tom/t4_tom.h +++ b/sys/dev/cxgbe/tom/t4_tom.h @@ -96,7 +96,7 @@ struct toepcb { u_int flags; /* miscellaneous flags */ struct tom_data *td; struct inpcb *inp; /* backpointer to host stack's PCB */ - struct port_info *port; /* physical port */ + struct vi_info *vi; /* virtual interface */ struct sge_wrq *ofld_txq; struct sge_ofld_rxq *ofld_rxq; struct sge_wrq *ctrlq; @@ -221,7 +221,7 @@ td_adapter(struct tom_data *td) } /* t4_tom.c */ -struct toepcb *alloc_toepcb(struct port_info *, int, int, int); +struct toepcb *alloc_toepcb(struct vi_info *, int, int, int); void free_toepcb(struct toepcb *); void offload_socket(struct socket *, struct toepcb *); void undo_offload_socket(struct socket *); @@ -234,9 +234,9 @@ void release_tid(struct adapter *, int, struct sge_wrq *); int find_best_mtu_idx(struct adapter *, struct in_conninfo *, int); u_long select_rcv_wnd(struct socket *); int select_rcv_wscale(void); -uint64_t calc_opt0(struct socket *, struct port_info *, struct l2t_entry *, +uint64_t calc_opt0(struct socket *, struct vi_info *, struct l2t_entry *, int, int, int, int); -uint64_t select_ntuple(struct port_info *, struct l2t_entry *); +uint64_t select_ntuple(struct vi_info *, struct l2t_entry *); void set_tcpddp_ulp_mode(struct toepcb *); int negative_advice(int); struct clip_entry *hold_lip(struct tom_data *, struct in6_addr *); diff --git a/sys/dev/drm2/i915/i915_gem.c b/sys/dev/drm2/i915/i915_gem.c index 5114008..4624997 100644 --- a/sys/dev/drm2/i915/i915_gem.c +++ b/sys/dev/drm2/i915/i915_gem.c @@ -1976,7 +1976,7 @@ retry: DRM_UNLOCK(dev); vm_page_lock(m); VM_OBJECT_WUNLOCK(vm_obj); - vm_page_busy_sleep(m, "915pee"); + vm_page_busy_sleep(m, "915pee", false); goto retry; } goto have_page; @@ -2037,7 +2037,7 @@ retry: DRM_UNLOCK(dev); vm_page_lock(m); VM_OBJECT_WUNLOCK(vm_obj); - vm_page_busy_sleep(m, "915pbs"); + vm_page_busy_sleep(m, "915pbs", false); goto retry; } if (vm_page_insert(m, vm_obj, OFF_TO_IDX(offset))) { diff --git a/sys/dev/drm2/ttm/ttm_bo_vm.c b/sys/dev/drm2/ttm/ttm_bo_vm.c index 9c98448..f172a03 100644 --- a/sys/dev/drm2/ttm/ttm_bo_vm.c +++ b/sys/dev/drm2/ttm/ttm_bo_vm.c @@ -239,7 +239,7 @@ reserve: if (vm_page_busied(m)) { vm_page_lock(m); VM_OBJECT_WUNLOCK(vm_obj); - vm_page_busy_sleep(m, "ttmpbs"); + vm_page_busy_sleep(m, "ttmpbs", false); VM_OBJECT_WLOCK(vm_obj); ttm_mem_io_unlock(man); ttm_bo_unreserve(bo); diff --git a/sys/dev/netmap/if_em_netmap.h b/sys/dev/netmap/if_em_netmap.h index 3a8ccab..3cd8d29 100644 --- a/sys/dev/netmap/if_em_netmap.h +++ b/sys/dev/netmap/if_em_netmap.h @@ -279,9 +279,9 @@ em_netmap_rxsync(struct netmap_kring *kring, int flags) if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ goto ring_reset; + curr->read.buffer_addr = htole64(paddr); if (slot->flags & NS_BUF_CHANGED) { /* buffer has changed, reload map */ - curr->read.buffer_addr = htole64(paddr); netmap_reload_map(na, rxr->rxtag, rxbuf->map, addr); slot->flags &= ~NS_BUF_CHANGED; } diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c index 9bf37bc..792469f 100644 --- a/sys/dev/pci/pci.c +++ b/sys/dev/pci/pci.c @@ -4185,6 +4185,9 @@ static const struct {PCIC_CRYPTO, PCIS_CRYPTO_ENTERTAIN, 1, "entertainment crypto"}, {PCIC_DASP, -1, 0, "dasp"}, {PCIC_DASP, PCIS_DASP_DPIO, 1, "DPIO module"}, + {PCIC_DASP, PCIS_DASP_PERFCNTRS, 1, "performance counters"}, + {PCIC_DASP, PCIS_DASP_COMM_SYNC, 1, "communication synchronizer"}, + {PCIC_DASP, PCIS_DASP_MGMT_CARD, 1, "signal processing management"}, {0, 0, 0, NULL} }; diff --git a/sys/dev/smbus/smbconf.h b/sys/dev/smbus/smbconf.h index a3d403d..0d57360 100644 --- a/sys/dev/smbus/smbconf.h +++ b/sys/dev/smbus/smbconf.h @@ -34,6 +34,10 @@ #define n(flags) (~(flags) & (flags)) +/* Order constants for smbus children. */ +#define SMBUS_ORDER_HINTED 20 +#define SMBUS_ORDER_PNP 40 + /* * How tsleep() is called in smb_request_bus(). */ diff --git a/sys/dev/smbus/smbus.c b/sys/dev/smbus/smbus.c index a111332..89d0a73 100644 --- a/sys/dev/smbus/smbus.c +++ b/sys/dev/smbus/smbus.c @@ -31,49 +31,24 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> #include <sys/lock.h> +#include <sys/malloc.h> #include <sys/module.h> #include <sys/mutex.h> -#include <sys/bus.h> +#include <sys/bus.h> #include <dev/smbus/smbconf.h> #include <dev/smbus/smbus.h> -/* - * Autoconfiguration and support routines for System Management bus - */ -/* - * Device methods - */ -static int smbus_probe(device_t); -static int smbus_attach(device_t); -static int smbus_detach(device_t); - -static device_method_t smbus_methods[] = { - /* device interface */ - DEVMETHOD(device_probe, smbus_probe), - DEVMETHOD(device_attach, smbus_attach), - DEVMETHOD(device_detach, smbus_detach), - - /* bus interface */ - DEVMETHOD(bus_add_child, bus_generic_add_child), - - DEVMETHOD_END -}; - -driver_t smbus_driver = { - "smbus", - smbus_methods, - sizeof(struct smbus_softc), +struct smbus_ivar +{ + uint8_t addr; }; -devclass_t smbus_devclass; - /* - * At 'probe' time, we add all the devices which we know about to the - * bus. The generic attach routine will probe and attach them if they - * are alive. + * Autoconfiguration and support routines for System Management bus */ + static int smbus_probe(device_t dev) { @@ -90,6 +65,7 @@ smbus_attach(device_t dev) mtx_init(&sc->lock, device_get_nameunit(dev), "smbus", MTX_DEF); bus_generic_probe(dev); + bus_enumerate_hinted_children(dev); bus_generic_attach(dev); return (0); @@ -104,6 +80,7 @@ smbus_detach(device_t dev) error = bus_generic_detach(dev); if (error) return (error); + device_delete_children(dev); mtx_destroy(&sc->lock); return (0); @@ -114,4 +91,154 @@ smbus_generic_intr(device_t dev, u_char devaddr, char low, char high, int err) { } +static device_t +smbus_add_child(device_t dev, u_int order, const char *name, int unit) +{ + struct smbus_ivar *devi; + device_t child; + + child = device_add_child_ordered(dev, order, name, unit); + if (child == NULL) + return (child); + devi = malloc(sizeof(struct smbus_ivar), M_DEVBUF, M_NOWAIT | M_ZERO); + if (devi == NULL) { + device_delete_child(dev, child); + return (NULL); + } + device_set_ivars(child, devi); + return (child); +} + +static void +smbus_hinted_child(device_t bus, const char *dname, int dunit) +{ + struct smbus_ivar *devi; + device_t child; + int addr; + + addr = 0; + resource_int_value(dname, dunit, "addr", &addr); + if (addr > UINT8_MAX) { + device_printf(bus, "ignored incorrect slave address hint 0x%x" + " for %s%d\n", addr, dname, dunit); + return; + } + child = BUS_ADD_CHILD(bus, SMBUS_ORDER_HINTED, dname, dunit); + if (child == NULL) + return; + devi = device_get_ivars(child); + devi->addr = addr; +} + + +static int +smbus_child_location_str(device_t parent, device_t child, char *buf, + size_t buflen) +{ + struct smbus_ivar *devi; + + devi = device_get_ivars(child); + if (devi->addr != 0) + snprintf(buf, buflen, "addr=0x%x", devi->addr); + else if (buflen) + buf[0] = 0; + return (0); +} + +static int +smbus_print_child(device_t parent, device_t child) +{ + struct smbus_ivar *devi; + int retval; + + devi = device_get_ivars(child); + retval = bus_print_child_header(parent, child); + if (devi->addr != 0) + retval += printf(" at addr 0x%x", devi->addr); + retval += bus_print_child_footer(parent, child); + + return (retval); +} + +static int +smbus_read_ivar(device_t parent, device_t child, int which, uintptr_t *result) +{ + struct smbus_ivar *devi; + + devi = device_get_ivars(child); + switch (which) { + case SMBUS_IVAR_ADDR: + if (devi->addr != 0) + *result = devi->addr; + else + *result = -1; + break; + default: + return (ENOENT); + } + return (0); +} + +static int +smbus_write_ivar(device_t parent, device_t child, int which, uintptr_t value) +{ + struct smbus_ivar *devi; + + devi = device_get_ivars(child); + switch (which) { + case SMBUS_IVAR_ADDR: + /* Allow to set but no change the slave address. */ + if (devi->addr != 0) + return (EINVAL); + devi->addr = value; + break; + default: + return (ENOENT); + } + return (0); +} + +static void +smbus_probe_nomatch(device_t bus, device_t child) +{ + struct smbus_ivar *devi = device_get_ivars(child); + + /* + * Ignore (self-identified) devices without a slave address set. + * For example, smb(4). + */ + if (devi->addr != 0) + device_printf(bus, "<unknown device> at addr %#x\n", + devi->addr); +} + +/* + * Device methods + */ +static device_method_t smbus_methods[] = { + /* device interface */ + DEVMETHOD(device_probe, smbus_probe), + DEVMETHOD(device_attach, smbus_attach), + DEVMETHOD(device_detach, smbus_detach), + + /* bus interface */ + DEVMETHOD(bus_add_child, smbus_add_child), + DEVMETHOD(bus_hinted_child, smbus_hinted_child), + DEVMETHOD(bus_probe_nomatch, smbus_probe_nomatch), + DEVMETHOD(bus_child_location_str, smbus_child_location_str), + DEVMETHOD(bus_print_child, smbus_print_child), + DEVMETHOD(bus_read_ivar, smbus_read_ivar), + DEVMETHOD(bus_write_ivar, smbus_write_ivar), + + DEVMETHOD_END +}; + +driver_t smbus_driver = { + "smbus", + smbus_methods, + sizeof(struct smbus_softc), +}; + +devclass_t smbus_devclass; + MODULE_VERSION(smbus, SMBUS_MODVER); diff --git a/sys/dev/sound/usb/uaudio.c b/sys/dev/sound/usb/uaudio.c index e3b052a..99369ca 100644 --- a/sys/dev/sound/usb/uaudio.c +++ b/sys/dev/sound/usb/uaudio.c @@ -95,6 +95,7 @@ __FBSDID("$FreeBSD$"); static int uaudio_default_rate = 0; /* use rate list */ static int uaudio_default_bits = 32; static int uaudio_default_channels = 0; /* use default */ +static int uaudio_buffer_ms = 8; #ifdef USB_DEBUG static int uaudio_debug = 0; @@ -115,9 +116,32 @@ SYSCTL_INT(_hw_usb_uaudio, OID_AUTO, default_bits, CTLFLAG_RW, TUNABLE_INT("hw.usb.uaudio.default_channels", &uaudio_default_channels); SYSCTL_INT(_hw_usb_uaudio, OID_AUTO, default_channels, CTLFLAG_RW, &uaudio_default_channels, 0, "uaudio default sample channels"); + +static int +uaudio_buffer_ms_sysctl(SYSCTL_HANDLER_ARGS) +{ + int err, val; + + val = uaudio_buffer_ms; + err = sysctl_handle_int(oidp, &val, 0, req); + + if (err != 0 || req->newptr == NULL || val == uaudio_buffer_ms) + return (err); + + if (val > 8) + val = 8; + else if (val < 2) + val = 2; + + uaudio_buffer_ms = val; + + return (0); +} +SYSCTL_PROC(_hw_usb_uaudio, OID_AUTO, buffer_ms, CTLTYPE_INT | CTLFLAG_RWTUN, + 0, sizeof(int), uaudio_buffer_ms_sysctl, "I", + "uaudio buffering delay from 2ms to 8ms"); #endif -#define UAUDIO_IRQS (8000 / UAUDIO_NFRAMES) /* interrupts per second */ #define UAUDIO_NFRAMES 64 /* must be factor of 8 due HS-USB */ #define UAUDIO_NCHANBUFS 2 /* number of outstanding request */ #define UAUDIO_RECURSE_LIMIT 255 /* rounds */ @@ -1284,10 +1308,10 @@ uaudio_configure_msg_sub(struct uaudio_softc *sc, if (fps < 8000) { /* FULL speed USB */ - frames = 8; + frames = uaudio_buffer_ms; } else { /* HIGH speed USB */ - frames = UAUDIO_NFRAMES; + frames = uaudio_buffer_ms * 8; } fps_shift = usbd_xfer_get_fps_shift(chan->xfer[0]); @@ -2164,8 +2188,9 @@ tr_setup: } /* start the SYNC transfer one time per second, if any */ - if (++(ch->intr_counter) >= UAUDIO_IRQS) { - ch->intr_counter = 0; + ch->intr_counter += ch->intr_frames; + if (ch->intr_counter >= ch->frames_per_second) { + ch->intr_counter -= ch->frames_per_second; usbd_transfer_start(ch->xfer[UAUDIO_NCHANBUFS]); } diff --git a/sys/fs/msdosfs/msdosfs_vfsops.c b/sys/fs/msdosfs/msdosfs_vfsops.c index 7c6d3ed..b3875e7 100644 --- a/sys/fs/msdosfs/msdosfs_vfsops.c +++ b/sys/fs/msdosfs/msdosfs_vfsops.c @@ -175,24 +175,8 @@ update_mp(struct mount *mp, struct thread *td) if (pmp->pm_flags & MSDOSFSMNT_NOWIN95) pmp->pm_flags |= MSDOSFSMNT_SHORTNAME; - else if (!(pmp->pm_flags & - (MSDOSFSMNT_SHORTNAME | MSDOSFSMNT_LONGNAME))) { - struct vnode *rootvp; - - /* - * Try to divine whether to support Win'95 long filenames - */ - if (FAT32(pmp)) - pmp->pm_flags |= MSDOSFSMNT_LONGNAME; - else { - if ((error = - msdosfs_root(mp, LK_EXCLUSIVE, &rootvp)) != 0) - return error; - pmp->pm_flags |= findwin95(VTODE(rootvp)) ? - MSDOSFSMNT_LONGNAME : MSDOSFSMNT_SHORTNAME; - vput(rootvp); - } - } + else + pmp->pm_flags |= MSDOSFSMNT_LONGNAME; return 0; } diff --git a/sys/geom/geom_dev.c b/sys/geom/geom_dev.c index cd40abe..11a0dd2 100644 --- a/sys/geom/geom_dev.c +++ b/sys/geom/geom_dev.c @@ -222,55 +222,68 @@ g_dev_print(void) } static void -g_dev_attrchanged(struct g_consumer *cp, const char *attr) +g_dev_set_physpath(struct g_consumer *cp) +{ + struct g_dev_softc *sc; + char *physpath; + int error, physpath_len; + + if (g_access(cp, 1, 0, 0) != 0) + return; + + sc = cp->private; + physpath_len = MAXPATHLEN; + physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO); + error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath); + g_access(cp, -1, 0, 0); + if (error == 0 && strlen(physpath) != 0) { + struct cdev *dev, *old_alias_dev; + struct cdev **alias_devp; + + dev = sc->sc_dev; + old_alias_dev = sc->sc_alias; + alias_devp = (struct cdev **)&sc->sc_alias; + make_dev_physpath_alias(MAKEDEV_WAITOK, alias_devp, dev, + old_alias_dev, physpath); + } else if (sc->sc_alias) { + destroy_dev((struct cdev *)sc->sc_alias); + sc->sc_alias = NULL; + } + g_free(physpath); +} + +static void +g_dev_set_media(struct g_consumer *cp) { struct g_dev_softc *sc; struct cdev *dev; char buf[SPECNAMELEN + 6]; sc = cp->private; - if (strcmp(attr, "GEOM::media") == 0) { - dev = sc->sc_dev; + dev = sc->sc_dev; + snprintf(buf, sizeof(buf), "cdev=%s", dev->si_name); + devctl_notify_f("DEVFS", "CDEV", "MEDIACHANGE", buf, M_WAITOK); + devctl_notify_f("GEOM", "DEV", "MEDIACHANGE", buf, M_WAITOK); + dev = sc->sc_alias; + if (dev != NULL) { snprintf(buf, sizeof(buf), "cdev=%s", dev->si_name); devctl_notify_f("DEVFS", "CDEV", "MEDIACHANGE", buf, M_WAITOK); devctl_notify_f("GEOM", "DEV", "MEDIACHANGE", buf, M_WAITOK); - dev = sc->sc_alias; - if (dev != NULL) { - snprintf(buf, sizeof(buf), "cdev=%s", dev->si_name); - devctl_notify_f("DEVFS", "CDEV", "MEDIACHANGE", buf, - M_WAITOK); - devctl_notify_f("GEOM", "DEV", "MEDIACHANGE", buf, - M_WAITOK); - } - return; } +} - if (strcmp(attr, "GEOM::physpath") != 0) +static void +g_dev_attrchanged(struct g_consumer *cp, const char *attr) +{ + + if (strcmp(attr, "GEOM::media") == 0) { + g_dev_set_media(cp); return; + } - if (g_access(cp, 1, 0, 0) == 0) { - char *physpath; - int error, physpath_len; - - physpath_len = MAXPATHLEN; - physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO); - error = - g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath); - g_access(cp, -1, 0, 0); - if (error == 0 && strlen(physpath) != 0) { - struct cdev *old_alias_dev; - struct cdev **alias_devp; - - dev = sc->sc_dev; - old_alias_dev = sc->sc_alias; - alias_devp = (struct cdev **)&sc->sc_alias; - make_dev_physpath_alias(MAKEDEV_WAITOK, alias_devp, - dev, old_alias_dev, physpath); - } else if (sc->sc_alias) { - destroy_dev((struct cdev *)sc->sc_alias); - sc->sc_alias = NULL; - } - g_free(physpath); + if (strcmp(attr, "GEOM::physpath") == 0) { + g_dev_set_physpath(cp); + return; } } diff --git a/sys/geom/mirror/g_mirror.c b/sys/geom/mirror/g_mirror.c index 356d1ea..848c070 100644 --- a/sys/geom/mirror/g_mirror.c +++ b/sys/geom/mirror/g_mirror.c @@ -2143,10 +2143,9 @@ g_mirror_destroy_provider(struct g_mirror_softc *sc) } } mtx_unlock(&sc->sc_queue_mtx); - G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name, - sc->sc_provider->name); g_wither_provider(sc->sc_provider, ENXIO); sc->sc_provider = NULL; + G_MIRROR_DEBUG(0, "Device %s: provider destroyed.", sc->sc_name); g_topology_unlock(); LIST_FOREACH(disk, &sc->sc_disks, d_next) { if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) @@ -2872,7 +2871,7 @@ static int g_mirror_access(struct g_provider *pp, int acr, int acw, int ace) { struct g_mirror_softc *sc; - int dcr, dcw, dce, error = 0; + int error = 0; g_topology_assert(); G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr, @@ -2883,30 +2882,21 @@ g_mirror_access(struct g_provider *pp, int acr, int acw, int ace) return (0); KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name)); - dcr = pp->acr + acr; - dcw = pp->acw + acw; - dce = pp->ace + ace; - g_topology_unlock(); sx_xlock(&sc->sc_lock); if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 || + (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0 || LIST_EMPTY(&sc->sc_disks)) { if (acr > 0 || acw > 0 || ace > 0) error = ENXIO; goto end; } - if (dcw == 0) - g_mirror_idle(sc, dcw); - if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0) { - if (acr > 0 || acw > 0 || ace > 0) { - error = ENXIO; - goto end; - } - if (dcr == 0 && dcw == 0 && dce == 0) { - g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK, - sc, NULL); - } - } + sc->sc_provider_open += acr + acw + ace; + if (pp->acw + acw == 0) + g_mirror_idle(sc, 0); + if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0 && + sc->sc_provider_open == 0) + g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK, sc, NULL); end: sx_xunlock(&sc->sc_lock); g_topology_lock(); @@ -2963,6 +2953,7 @@ g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md) gp->softc = sc; sc->sc_geom = gp; sc->sc_provider = NULL; + sc->sc_provider_open = 0; /* * Synchronization geom. */ @@ -3003,25 +2994,23 @@ int g_mirror_destroy(struct g_mirror_softc *sc, int how) { struct g_mirror_disk *disk; - struct g_provider *pp; g_topology_assert_not(); if (sc == NULL) return (ENXIO); sx_assert(&sc->sc_lock, SX_XLOCKED); - pp = sc->sc_provider; - if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { + if (sc->sc_provider_open != 0) { switch (how) { case G_MIRROR_DESTROY_SOFT: G_MIRROR_DEBUG(1, - "Device %s is still open (r%dw%de%d).", pp->name, - pp->acr, pp->acw, pp->ace); + "Device %s is still open (%d).", sc->sc_name, + sc->sc_provider_open); return (EBUSY); case G_MIRROR_DESTROY_DELAYED: G_MIRROR_DEBUG(1, "Device %s will be destroyed on last close.", - pp->name); + sc->sc_name); LIST_FOREACH(disk, &sc->sc_disks, d_next) { if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) { @@ -3032,7 +3021,7 @@ g_mirror_destroy(struct g_mirror_softc *sc, int how) return (EBUSY); case G_MIRROR_DESTROY_HARD: G_MIRROR_DEBUG(1, "Device %s is still open, so it " - "can't be definitely removed.", pp->name); + "can't be definitely removed.", sc->sc_name); } } diff --git a/sys/geom/mirror/g_mirror.h b/sys/geom/mirror/g_mirror.h index d203b97..e730e42 100644 --- a/sys/geom/mirror/g_mirror.h +++ b/sys/geom/mirror/g_mirror.h @@ -179,6 +179,7 @@ struct g_mirror_softc { struct g_geom *sc_geom; struct g_provider *sc_provider; + int sc_provider_open; uint32_t sc_id; /* Mirror unique ID. */ diff --git a/sys/geom/mirror/g_mirror_ctl.c b/sys/geom/mirror/g_mirror_ctl.c index 2b56765..df24e52 100644 --- a/sys/geom/mirror/g_mirror_ctl.c +++ b/sys/geom/mirror/g_mirror_ctl.c @@ -658,8 +658,7 @@ g_mirror_ctl_resize(struct gctl_req *req, struct g_class *mp) return; } /* Deny shrinking of an opened provider */ - if ((g_debugflags & 16) == 0 && (sc->sc_provider->acr > 0 || - sc->sc_provider->acw > 0 || sc->sc_provider->ace > 0)) { + if ((g_debugflags & 16) == 0 && sc->sc_provider_open > 0) { if (sc->sc_mediasize > mediasize) { gctl_error(req, "Device %s is busy.", sc->sc_provider->name); diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index 7dc785b..293a65d 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -1835,6 +1835,7 @@ __elfN(note_prpsinfo)(void *arg, struct sbuf *sb, size_t *sizep) *cp = ' '; } } + psinfo->pr_pid = p->p_pid; sbuf_bcat(sb, psinfo, sizeof(*psinfo)); free(psinfo, M_TEMP); } diff --git a/sys/kern/makesyscalls.sh b/sys/kern/makesyscalls.sh index 1d3af65..e6c1515 100644 --- a/sys/kern/makesyscalls.sh +++ b/sys/kern/makesyscalls.sh @@ -62,7 +62,6 @@ if [ -n "$2" ]; then fi sed -e ' -s/\$//g :join /\\$/{a\ @@ -140,7 +139,7 @@ s/\$//g printf " * $%s$\n", "FreeBSD" > systrace } NR == 1 { - gsub("[$]FreeBSD: ", "", $0) + gsub("[$]FreeBSD: ", "FreeBSD: ", $0) gsub(" [$]", "", $0) printf " * created from%s\n */\n\n", $0 > syssw diff --git a/sys/kern/subr_uio.c b/sys/kern/subr_uio.c index 8141958..a335f6d 100644 --- a/sys/kern/subr_uio.c +++ b/sys/kern/subr_uio.c @@ -534,7 +534,7 @@ fueword32(volatile const void *base, int32_t *val) int fueword64(volatile const void *base, int64_t *val) { - int32_t res; + int64_t res; res = fuword64(base); if (res == -1) diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index f728859..f28db3e 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -1724,7 +1724,7 @@ brelse(struct buf *bp) while (vm_page_xbusied(m)) { vm_page_lock(m); VM_OBJECT_WUNLOCK(obj); - vm_page_busy_sleep(m, "mbncsh"); + vm_page_busy_sleep(m, "mbncsh", true); VM_OBJECT_WLOCK(obj); } if (pmap_page_wired_mappings(m) == 0) @@ -4069,7 +4069,7 @@ vfs_drain_busy_pages(struct buf *bp) while (vm_page_xbusied(m)) { vm_page_lock(m); VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object); - vm_page_busy_sleep(m, "vbpage"); + vm_page_busy_sleep(m, "vbpage", true); VM_OBJECT_WLOCK(bp->b_bufobj->bo_object); } } diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 3e7240b..89b6af7 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -879,6 +879,16 @@ findpcb: goto dropwithreset; } INP_WLOCK_ASSERT(inp); + /* + * While waiting for inp lock during the lookup, another thread + * can have dropped the inpcb, in which case we need to loop back + * and try to find a new inpcb to deliver to. + */ + if (inp->inp_flags & INP_DROPPED) { + INP_WUNLOCK(inp); + inp = NULL; + goto findpcb; + } if ((inp->inp_flowtype == M_HASHTYPE_NONE) && (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) && ((inp->inp_socket == NULL) || @@ -938,6 +948,10 @@ relocked: if (in_pcbrele_wlocked(inp)) { inp = NULL; goto findpcb; + } else if (inp->inp_flags & INP_DROPPED) { + INP_WUNLOCK(inp); + inp = NULL; + goto findpcb; } } else ti_locked = TI_WLOCKED; @@ -997,6 +1011,10 @@ relocked: if (in_pcbrele_wlocked(inp)) { inp = NULL; goto findpcb; + } else if (inp->inp_flags & INP_DROPPED) { + INP_WUNLOCK(inp); + inp = NULL; + goto findpcb; } goto relocked; } else diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c index 97eeeed..8139c6d 100644 --- a/sys/netinet/tcp_timewait.c +++ b/sys/netinet/tcp_timewait.c @@ -230,6 +230,10 @@ tcp_twstart(struct tcpcb *tp) INP_INFO_WLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); + /* A dropped inp should never transition to TIME_WAIT state. */ + KASSERT((inp->inp_flags & INP_DROPPED) == 0, ("tcp_twstart: " + "(inp->inp_flags & INP_DROPPED) != 0")); + if (V_nolocaltimewait) { int error = 0; #ifdef INET6 diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index 9955468..131eb28 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$"); #include <sys/protosw.h> #include <sys/proc.h> #include <sys/jail.h> +#include <sys/syslog.h> #ifdef DDB #include <ddb/ddb.h> @@ -202,10 +203,26 @@ tcp_detach(struct socket *so, struct inpcb *inp) * In all three cases the tcptw should not be freed here. */ if (inp->inp_flags & INP_DROPPED) { - KASSERT(tp == NULL, ("tcp_detach: INP_TIMEWAIT && " - "INP_DROPPED && tp != NULL")); in_pcbdetach(inp); - in_pcbfree(inp); + if (__predict_true(tp == NULL)) { + in_pcbfree(inp); + } else { + /* + * This case should not happen as in TIMEWAIT + * state the inp should not be destroyed before + * its tcptw. If INVARIANTS is defined, panic. + */ +#ifdef INVARIANTS + panic("%s: Panic before an inp double-free: " + "INP_TIMEWAIT && INP_DROPPED && tp != NULL" + , __func__); +#else + log(LOG_ERR, "%s: Avoid an inp double-free: " + "INP_TIMEWAIT && INP_DROPPED && tp != NULL" + , __func__); +#endif + INP_WUNLOCK(inp); + } } else { in_pcbdetach(inp); INP_WUNLOCK(inp); diff --git a/sys/sys/param.h b/sys/sys/param.h index 26ab838..97e660d 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -44,7 +44,7 @@ #define BSD4_3 1 #define BSD4_4 1 -/* +/* * __FreeBSD_version numbers are documented in the Porter's Handbook. * If you bump the version for any reason, you should update the documentation * there. @@ -58,7 +58,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1003509 /* Master, propagated to newvers */ +#define __FreeBSD_version 1003510 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, @@ -239,7 +239,7 @@ * * BKVASIZE - Nominal buffer space per buffer, in bytes. BKVASIZE is the * minimum KVM memory reservation the kernel is willing to make. - * Filesystems can of course request smaller chunks. Actual + * Filesystems can of course request smaller chunks. Actual * backing memory uses a chunk size of a page (PAGE_SIZE). * The default value here can be overridden on a per-architecture * basis by defining it in <machine/param.h>. This should @@ -248,8 +248,8 @@ * * If you make BKVASIZE too small you risk seriously fragmenting * the buffer KVM map which may slow things down a bit. If you - * make it too big the kernel will not be able to optimally use - * the KVM memory reserved for the buffer cache and will wind + * make it too big the kernel will not be able to optimally use + * the KVM memory reserved for the buffer cache and will wind * up with too-few buffers. * * The default is 16384, roughly 2x the block size used by a @@ -342,7 +342,7 @@ __END_DECLS #define dbtoc(db) /* calculates devblks to pages */ \ ((db + (ctodb(1) - 1)) >> (PAGE_SHIFT - DEV_BSHIFT)) - + #define ctodb(db) /* calculates pages to devblks */ \ ((db) << (PAGE_SHIFT - DEV_BSHIFT)) diff --git a/sys/sys/procfs.h b/sys/sys/procfs.h index 6b32dfd..5f857ef 100644 --- a/sys/sys/procfs.h +++ b/sys/sys/procfs.h @@ -49,7 +49,10 @@ typedef struct fpreg fpregset_t; /* * The parenthsized numbers like (1) indicate the minimum version number - * for which each element exists in the structure. + * for which each element exists in the structure. The version number is + * not bumped when adding new fields to the end, only if the meaning of + * an existing field changes. Additional fields are annotated as (1a), + * (1b), etc. to indicate the groupings of additions. */ #define PRSTATUS_VERSION 1 /* Current version of prstatus_t */ @@ -61,7 +64,7 @@ typedef struct prstatus { size_t pr_fpregsetsz; /* sizeof(fpregset_t) (1) */ int pr_osreldate; /* Kernel version (1) */ int pr_cursig; /* Current signal (1) */ - pid_t pr_pid; /* Process ID (1) */ + pid_t pr_pid; /* LWP (Thread) ID (1) */ gregset_t pr_reg; /* General purpose registers (1) */ } prstatus_t; @@ -78,6 +81,7 @@ typedef struct prpsinfo { size_t pr_psinfosz; /* sizeof(prpsinfo_t) (1) */ char pr_fname[PRFNAMESZ+1]; /* Command name, null terminated (1) */ char pr_psargs[PRARGSZ+1]; /* Arguments, null terminated (1) */ + pid_t pr_pid; /* Process ID (1a) */ } prpsinfo_t; typedef struct thrmisc { @@ -104,6 +108,7 @@ typedef struct prpsinfo32 { uint32_t pr_psinfosz; char pr_fname[PRFNAMESZ+1]; char pr_psargs[PRARGSZ+1]; + int32_t pr_pid; } prpsinfo32_t; struct thrmisc32 { diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 909f57f..f07b127 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -320,6 +320,8 @@ RetryFault:; growstack = FALSE; goto RetryFault; } + if (fs.vp != NULL) + vput(fs.vp); return (result); } diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index b2a7013..d4c7116 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -1182,7 +1182,7 @@ shadowlookup: if (object != tobject) VM_OBJECT_WUNLOCK(object); VM_OBJECT_WUNLOCK(tobject); - vm_page_busy_sleep(m, "madvpo"); + vm_page_busy_sleep(m, "madvpo", false); VM_OBJECT_WLOCK(object); goto relookup; } @@ -1361,7 +1361,7 @@ retry: VM_OBJECT_WUNLOCK(new_object); vm_page_lock(m); VM_OBJECT_WUNLOCK(orig_object); - vm_page_busy_sleep(m, "spltwt"); + vm_page_busy_sleep(m, "spltwt", false); VM_OBJECT_WLOCK(orig_object); VM_OBJECT_WLOCK(new_object); goto retry; @@ -1450,7 +1450,7 @@ vm_object_backing_scan_wait(vm_object_t object, vm_page_t p, vm_page_t next, if (p == NULL) VM_WAIT; else - vm_page_busy_sleep(p, "vmocol"); + vm_page_busy_sleep(p, "vmocol", false); VM_OBJECT_WLOCK(object); VM_OBJECT_WLOCK(backing_object); return (TAILQ_FIRST(&backing_object->memq)); @@ -1917,7 +1917,7 @@ again: vm_page_lock(p); if (vm_page_xbusied(p)) { VM_OBJECT_WUNLOCK(object); - vm_page_busy_sleep(p, "vmopax"); + vm_page_busy_sleep(p, "vmopax", true); VM_OBJECT_WLOCK(object); goto again; } @@ -1932,7 +1932,7 @@ again: } if (vm_page_busied(p)) { VM_OBJECT_WUNLOCK(object); - vm_page_busy_sleep(p, "vmopar"); + vm_page_busy_sleep(p, "vmopar", false); VM_OBJECT_WLOCK(object); goto again; } diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index f166649..054148b 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -601,21 +601,20 @@ vm_page_sunbusy(vm_page_t m) * This is used to implement the hard-path of busying mechanism. * * The given page must be locked. + * + * If nonshared is true, sleep only if the page is xbusy. */ void -vm_page_busy_sleep(vm_page_t m, const char *wmesg) +vm_page_busy_sleep(vm_page_t m, const char *wmesg, bool nonshared) { u_int x; - vm_page_lock_assert(m, MA_OWNED); + vm_page_assert_locked(m); x = m->busy_lock; - if (x == VPB_UNBUSIED) { - vm_page_unlock(m); - return; - } - if ((x & VPB_BIT_WAITERS) == 0 && - !atomic_cmpset_int(&m->busy_lock, x, x | VPB_BIT_WAITERS)) { + if (x == VPB_UNBUSIED || (nonshared && (x & VPB_BIT_SHARED) != 0) || + ((x & VPB_BIT_WAITERS) == 0 && + !atomic_cmpset_int(&m->busy_lock, x, x | VPB_BIT_WAITERS))) { vm_page_unlock(m); return; } @@ -928,7 +927,7 @@ vm_page_sleep_if_busy(vm_page_t m, const char *msg) obj = m->object; vm_page_lock(m); VM_OBJECT_WUNLOCK(obj); - vm_page_busy_sleep(m, msg); + vm_page_busy_sleep(m, msg, false); VM_OBJECT_WLOCK(obj); return (TRUE); } @@ -2717,7 +2716,8 @@ retrylookup: vm_page_aflag_set(m, PGA_REFERENCED); vm_page_lock(m); VM_OBJECT_WUNLOCK(object); - vm_page_busy_sleep(m, "pgrbwt"); + vm_page_busy_sleep(m, "pgrbwt", (allocflags & + VM_ALLOC_IGN_SBUSY) != 0); VM_OBJECT_WLOCK(object); goto retrylookup; } else { diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index 3ab4c24..7cf949e 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -420,7 +420,7 @@ malloc2vm_flags(int malloc_flags) #endif void vm_page_busy_downgrade(vm_page_t m); -void vm_page_busy_sleep(vm_page_t m, const char *msg); +void vm_page_busy_sleep(vm_page_t m, const char *msg, bool nonshared); void vm_page_flash(vm_page_t m); void vm_page_hold(vm_page_t mem); void vm_page_unhold(vm_page_t mem); diff --git a/tests/sys/mac/bsdextended/matches_test.sh b/tests/sys/mac/bsdextended/matches_test.sh index 11cac81..929bbca 100644 --- a/tests/sys/mac/bsdextended/matches_test.sh +++ b/tests/sys/mac/bsdextended/matches_test.sh @@ -36,6 +36,12 @@ if ! sysctl -N security.mac.bsdextended >/dev/null 2>&1; then echo "1..0 # SKIP mac_bsdextended(4) support isn't available" exit 0 fi +if [ "$TMPDIR" != "/tmp" ]; then + if ! chmod -Rf 0755 $TMPDIR; then + echo "1..0 # SKIP failed to chmod $TMPDIR" + exit 0 + fi +fi if ! playground=$(mktemp -d $TMPDIR/tmp.XXXXXXX); then echo "1..0 # SKIP failed to create temporary directory" exit 0 diff --git a/usr.bin/bsdcat/Makefile b/usr.bin/bsdcat/Makefile index 93c1b71..da1c43f 100644 --- a/usr.bin/bsdcat/Makefile +++ b/usr.bin/bsdcat/Makefile @@ -6,7 +6,7 @@ _LIBARCHIVEDIR= ${.CURDIR}/../../contrib/libarchive _LIBARCHIVECONFDIR= ${.CURDIR}/../../lib/libarchive PROG= bsdcat -BSDCAT_VERSION_STRING= 3.2.1 +BSDCAT_VERSION_STRING= 3.2.2 .PATH: ${_LIBARCHIVEDIR}/cat SRCS= bsdcat.c cmdline.c diff --git a/usr.bin/calendar/calendars/calendar.holiday b/usr.bin/calendar/calendars/calendar.holiday index 2d9ebe5..05bf302 100644 --- a/usr.bin/calendar/calendars/calendar.holiday +++ b/usr.bin/calendar/calendars/calendar.holiday @@ -447,7 +447,7 @@ 10/20 Kenyatta Day in Kenya 10/21 Armed Forces Day in Honduras 10/21 Revolution Days (2 days) in Somalia -10/23 Chulalongkron's Day in Thailand +10/23 King Chulalongkorn Day in Thailand 10/24 Independence Day in Zambia 10/24 United Nations Day 10/25 Taiwan Restoration Day in Taiwan diff --git a/usr.bin/cpio/Makefile b/usr.bin/cpio/Makefile index 0283c5f..f1ca143 100644 --- a/usr.bin/cpio/Makefile +++ b/usr.bin/cpio/Makefile @@ -6,7 +6,7 @@ LIBARCHIVEDIR= ${.CURDIR}/../../contrib/libarchive LIBARCHIVECONFDIR= ${.CURDIR}/../../lib/libarchive PROG= bsdcpio -BSDCPIO_VERSION_STRING= 3.2.1 +BSDCPIO_VERSION_STRING= 3.2.2 .PATH: ${LIBARCHIVEDIR}/cpio SRCS= cpio.c cmdline.c diff --git a/usr.bin/elfdump/elfdump.c b/usr.bin/elfdump/elfdump.c index 929125b..b55389c 100644 --- a/usr.bin/elfdump/elfdump.c +++ b/usr.bin/elfdump/elfdump.c @@ -240,9 +240,9 @@ d_tags(u_int64_t tag) case 0x6ffffff0: return "DT_GNU_VERSYM"; /* 0x70000000 - 0x7fffffff processor-specific semantics */ case 0x70000000: return "DT_IA_64_PLT_RESERVE"; - case 0x7ffffffd: return "DT_SUNW_AUXILIARY"; - case 0x7ffffffe: return "DT_SUNW_USED"; - case 0x7fffffff: return "DT_SUNW_FILTER"; + case DT_AUXILIARY: return "DT_AUXILIARY"; + case DT_USED: return "DT_USED"; + case DT_FILTER: return "DT_FILTER"; } snprintf(unknown_tag, sizeof(unknown_tag), "ERROR: TAG NOT DEFINED -- tag 0x%jx", (uintmax_t)tag); diff --git a/usr.bin/gcore/elfcore.c b/usr.bin/gcore/elfcore.c index 3d87d29..7dcf6a1 100644 --- a/usr.bin/gcore/elfcore.c +++ b/usr.bin/gcore/elfcore.c @@ -580,6 +580,7 @@ elf_note_prpsinfo(void *arg, size_t *sizep) } else strlcpy(psinfo->pr_psargs, kip.ki_comm, sizeof(psinfo->pr_psargs)); + psinfo->pr_pid = pid; *sizep = sizeof(*psinfo); return (psinfo); diff --git a/usr.bin/tar/Makefile b/usr.bin/tar/Makefile index 42a6f0c..bf7924a 100644 --- a/usr.bin/tar/Makefile +++ b/usr.bin/tar/Makefile @@ -4,7 +4,7 @@ LIBARCHIVEDIR= ${.CURDIR}/../../contrib/libarchive PROG= bsdtar -BSDTAR_VERSION_STRING= 3.2.1 +BSDTAR_VERSION_STRING= 3.2.2 .PATH: ${LIBARCHIVEDIR}/tar SRCS= bsdtar.c \ diff --git a/usr.sbin/ctladm/ctladm.8 b/usr.sbin/ctladm/ctladm.8 index 7ff0c8d..ebdd973 100644 --- a/usr.sbin/ctladm/ctladm.8 +++ b/usr.sbin/ctladm/ctladm.8 @@ -35,7 +35,7 @@ .\" $Id: //depot/users/kenm/FreeBSD-test2/usr.sbin/ctladm/ctladm.8#3 $ .\" $FreeBSD$ .\" -.Dd September 26, 2015 +.Dd October 15, 2016 .Dt CTLADM 8 .Os .Sh NAME @@ -903,8 +903,21 @@ Specifies medium rotation rate of the device: 0 -- not reported, .It Va formfactor Specifies nominal form factor of the device: 0 -- not reported, 1 -- 5.25", 2 -- 3.5", 3 -- 2.5", 4 -- 1.8", 5 -- less then 1.8". +.It Va provisioning_type +When UNMAP support is enabled, this option specifies provisioning type: +"resource", "thin" or "unknown". +Default value is "thin". +Logical units without UNMAP support are reported as fully provisioned. .It Va unmap -Set to "on", enables UNMAP support for the LUN, if supported by the backend. +Setting to "on" or "off" controls UNMAP support for the logical unit. +Default value is "on" if supported by the backend. +.It Va unmap_max_lba +.It Va unmap_max_descr +Specify maximum allowed number of LBAs and block descriptors per UNMAP +command to report in Block Limits VPD page. +.It Va write_same_max_lba +Specify maximum allowed number of LBAs per WRITE SAME command to report +in Block Limits VPD page. .It Va avail-threshold .It Va used-threshold .It Va pool-avail-threshold diff --git a/usr.sbin/pciconf/pciconf.c b/usr.sbin/pciconf/pciconf.c index bb013b0..4804ac9 100644 --- a/usr.sbin/pciconf/pciconf.c +++ b/usr.sbin/pciconf/pciconf.c @@ -690,6 +690,9 @@ static struct {PCIC_CRYPTO, PCIS_CRYPTO_NETCOMP, "entertainment crypto"}, {PCIC_DASP, -1, "dasp"}, {PCIC_DASP, PCIS_DASP_DPIO, "DPIO module"}, + {PCIC_DASP, PCIS_DASP_PERFCNTRS, "performance counters"}, + {PCIC_DASP, PCIS_DASP_COMM_SYNC, "communication synchronizer"}, + {PCIC_DASP, PCIS_DASP_MGMT_CARD, "signal processing management"}, {0, 0, NULL} }; diff --git a/usr.sbin/rtsold/rtsol.c b/usr.sbin/rtsold/rtsol.c index ced0a73..d40f243 100644 --- a/usr.sbin/rtsold/rtsol.c +++ b/usr.sbin/rtsold/rtsol.c @@ -347,7 +347,7 @@ rtsol_input(int s) /* xxx: more validation? */ if ((ifi = find_ifinfo(pi->ipi6_ifindex)) == NULL) { - warnmsg(LOG_INFO, __func__, + warnmsg(LOG_DEBUG, __func__, "received RA from %s on an unexpected IF(%s)", inet_ntop(AF_INET6, &from.sin6_addr, ntopbuf, sizeof(ntopbuf)), |