diff options
author | Renato Botelho <renato@netgate.com> | 2015-09-22 07:24:31 -0300 |
---|---|---|
committer | Renato Botelho <renato@netgate.com> | 2015-09-22 07:24:31 -0300 |
commit | b5a91a37da44408c68a25426a16a8ca84686f054 (patch) | |
tree | 5761589f6059a9c8a3df1d4aed7e7972e6b1c431 | |
parent | 8400c0790e456038fbca4995d032d4e3d44c3d31 (diff) | |
parent | 0f328f755ef12f6cb68a753ed5e48e934002a2b8 (diff) | |
download | FreeBSD-src-b5a91a37da44408c68a25426a16a8ca84686f054.zip FreeBSD-src-b5a91a37da44408c68a25426a16a8ca84686f054.tar.gz |
Merge branch 'stable/10' into devel
110 files changed, 1824 insertions, 1367 deletions
diff --git a/bin/df/df.c b/bin/df/df.c index 13ef129..ba87055 100644 --- a/bin/df/df.c +++ b/bin/df/df.c @@ -296,7 +296,7 @@ main(int argc, char *argv[]) prtstat(&mntbuf[i], &maxwidths); if (cflag) prtstat(&totalbuf, &maxwidths); - return (rv); + exit(rv); } static char * diff --git a/bin/rm/rm.c b/bin/rm/rm.c index 46807b9..d91af54 100644 --- a/bin/rm/rm.c +++ b/bin/rm/rm.c @@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$"); #include <fcntl.h> #include <fts.h> #include <grp.h> +#include <locale.h> #include <pwd.h> #include <stdint.h> #include <stdio.h> @@ -86,6 +87,8 @@ main(int argc, char *argv[]) int ch; char *p; + (void)setlocale(LC_ALL, ""); + /* * Test for the special case where the utility is called as * "unlink", for which the functionality provided is greatly diff --git a/bin/sh/eval.c b/bin/sh/eval.c index 055d1cc..c8ae089 100644 --- a/bin/sh/eval.c +++ b/bin/sh/eval.c @@ -316,9 +316,10 @@ evalloop(union node *n, int flags) loopnest++; status = 0; for (;;) { - evaltree(n->nbinary.ch1, EV_TESTED); + if (!evalskip) + evaltree(n->nbinary.ch1, EV_TESTED); if (evalskip) { -skipping: if (evalskip == SKIPCONT && --skipcount <= 0) { + if (evalskip == SKIPCONT && --skipcount <= 0) { evalskip = 0; continue; } @@ -337,8 +338,6 @@ skipping: if (evalskip == SKIPCONT && --skipcount <= 0) { } evaltree(n->nbinary.ch2, flags); status = exitstatus; - if (evalskip) - goto skipping; } loopnest--; exitstatus = status; @@ -648,15 +647,15 @@ evalbackcmd(union node *n, struct backcmd *result) struct jmploc *savehandler; struct localvar *savelocalvars; - setstackmark(&smark); result->fd = -1; result->buf = NULL; result->nleft = 0; result->jp = NULL; if (n == NULL) { exitstatus = 0; - goto out; + return; } + setstackmark(&smark); exitstatus = oexitstatus; if (is_valid_fast_cmdsubst(n)) { savelocalvars = localvars; @@ -698,7 +697,6 @@ evalbackcmd(union node *n, struct backcmd *result) result->fd = pip[0]; result->jp = jp; } -out: popstackmark(&smark); TRACE(("evalbackcmd done: fd=%d buf=%p nleft=%d jp=%p\n", result->fd, result->buf, result->nleft, result->jp)); diff --git a/bin/sh/expand.c b/bin/sh/expand.c index 67b5c82..84e342d 100644 --- a/bin/sh/expand.c +++ b/bin/sh/expand.c @@ -105,11 +105,12 @@ static void expbackq(union node *, int, int); static int subevalvar(char *, char *, int, int, int, int, int); static char *evalvar(char *, int); static int varisset(const char *, int); +static void strtodest(const char *, int, int, int); static void varvalue(const char *, int, int, int); static void recordregion(int, int, int); static void removerecordregions(int); static void ifsbreakup(char *, struct arglist *); -static void expandmeta(struct strlist *, int); +static void expandmeta(struct strlist *); static void expmeta(char *, char *); static void addfname(char *); static struct strlist *expsort(struct strlist *); @@ -175,7 +176,7 @@ expandarg(union node *arg, struct arglist *arglist, int flag) ifsbreakup(p, &exparg); *exparg.lastp = NULL; exparg.lastp = &exparg.list; - expandmeta(exparg.list, flag); + expandmeta(exparg.list); } else { sp = (struct strlist *)stalloc(sizeof (struct strlist)); sp->text = p; @@ -298,9 +299,9 @@ exptilde(char *p, int flag) char c, *startp = p; struct passwd *pw; char *home; - int quotes = flag & (EXP_FULL | EXP_CASE); - while ((c = *p) != '\0') { + for (;;) { + c = *p; switch(c) { case CTLESC: /* This means CTL* are always considered quoted. */ case CTLVAR: @@ -311,36 +312,27 @@ exptilde(char *p, int flag) case CTLQUOTEMARK: return (startp); case ':': - if (flag & EXP_VARTILDE) - goto done; - break; + if ((flag & EXP_VARTILDE) == 0) + break; + /* FALLTHROUGH */ + case '\0': case '/': case CTLENDVAR: - goto done; + *p = '\0'; + if (*(startp+1) == '\0') { + home = lookupvar("HOME"); + } else { + pw = getpwnam(startp+1); + home = pw != NULL ? pw->pw_dir : NULL; + } + *p = c; + if (home == NULL || *home == '\0') + return (startp); + strtodest(home, flag, VSNORMAL, 1); + return (p); } p++; } -done: - *p = '\0'; - if (*(startp+1) == '\0') { - if ((home = lookupvar("HOME")) == NULL) - goto lose; - } else { - if ((pw = getpwnam(startp+1)) == NULL) - goto lose; - home = pw->pw_dir; - } - if (*home == '\0') - goto lose; - *p = c; - if (quotes) - STPUTS_QUOTES(home, DQSYNTAX, expdest); - else - STPUTS(home, expdest); - return (p); -lose: - *p = c; - return (startp); } @@ -501,6 +493,17 @@ expbackq(union node *cmd, int quoted, int flag) +static void +recordleft(const char *str, const char *loc, char *startp) +{ + int amount; + + amount = ((str - 1) - (loc - startp)) - expdest; + STADJUST(amount, expdest); + while (loc != str - 1) + *startp++ = *loc++; +} + static int subevalvar(char *p, char *str, int strloc, int subtype, int startloc, int varflags, int quotes) @@ -545,7 +548,8 @@ subevalvar(char *p, char *str, int strloc, int subtype, int startloc, *loc = '\0'; if (patmatch(str, startp, quotes)) { *loc = c; - goto recordleft; + recordleft(str, loc, startp); + return 1; } *loc = c; if (quotes && *loc == CTLESC) @@ -559,7 +563,8 @@ subevalvar(char *p, char *str, int strloc, int subtype, int startloc, *loc = '\0'; if (patmatch(str, startp, quotes)) { *loc = c; - goto recordleft; + recordleft(str, loc, startp); + return 1; } *loc = c; loc--; @@ -607,13 +612,6 @@ subevalvar(char *p, char *str, int strloc, int subtype, int startloc, default: abort(); } - -recordleft: - amount = ((str - 1) - (loc - startp)) - expdest; - STADJUST(amount, expdest); - while (loc != str - 1) - *startp++ = *loc++; - return 1; } @@ -638,6 +636,7 @@ evalvar(char *p, int flag) int varlenb; int easy; int quotes = flag & (EXP_FULL | EXP_CASE); + int record = 0; varflags = (unsigned char)*p++; subtype = varflags & VSTYPE; @@ -695,22 +694,15 @@ again: /* jump here after setting a variable with ${var=text} */ STADJUST(-varlenb, expdest); } } else { - char const *syntax = (varflags & VSQUOTE) ? DQSYNTAX - : BASESYNTAX; - if (subtype == VSLENGTH) { for (;*val; val++) if (!localeisutf8 || (*val & 0xC0) != 0x80) varlen++; } - else { - if (quotes) - STPUTS_QUOTES(val, syntax, expdest); - else - STPUTS(val, expdest); - - } + else + strtodest(val, flag, subtype, + varflags & VSQUOTE); } } @@ -724,15 +716,11 @@ again: /* jump here after setting a variable with ${var=text} */ switch (subtype) { case VSLENGTH: expdest = cvtnum(varlen, expdest); - goto record; + record = 1; + break; case VSNORMAL: - if (!easy) - break; -record: - recordregion(startloc, expdest - stackblock(), - varflags & VSQUOTE || (ifsset() && ifsval()[0] == '\0' && - (*var == '@' || *var == '*'))); + record = easy; break; case VSPLUS: @@ -742,8 +730,7 @@ record: (varflags & VSQUOTE ? EXP_LIT_QUOTED : 0)); break; } - if (easy) - goto record; + record = easy; break; case VSTRIMLEFT: @@ -765,7 +752,8 @@ record: } /* Remove any recorded regions beyond start of variable */ removerecordregions(startloc); - goto record; + record = 1; + break; case VSASSIGN: case VSQUESTION: @@ -782,8 +770,7 @@ record: } break; } - if (easy) - goto record; + record = easy; break; case VSERROR: @@ -795,6 +782,11 @@ record: abort(); } + if (record) + recordregion(startloc, expdest - stackblock(), + varflags & VSQUOTE || (ifsset() && ifsval()[0] == '\0' && + (*var == '@' || *var == '*'))); + if (subtype != VSNORMAL) { /* skip to end of alternative */ int nesting = 1; for (;;) { @@ -1097,7 +1089,7 @@ static char expdir[PATH_MAX]; * The results are stored in the list exparg. */ static void -expandmeta(struct strlist *str, int flag __unused) +expandmeta(struct strlist *str) { char *p; struct strlist **savelastp; @@ -1476,23 +1468,11 @@ patmatch(const char *pattern, const char *string, int squoted) bt_q = q; break; case '[': { - const char *endp; + const char *savep, *saveq; int invert, found; wchar_t chr; - endp = p; - if (*endp == '!' || *endp == '^') - endp++; - for (;;) { - while (*endp == CTLQUOTEMARK) - endp++; - if (*endp == 0) - goto dft; /* no matching ] */ - if (*endp == CTLESC) - endp++; - if (*++endp == ']') - break; - } + savep = p, saveq = q; invert = 0; if (*p == '!' || *p == '^') { invert++; @@ -1511,6 +1491,11 @@ patmatch(const char *pattern, const char *string, int squoted) chr = (unsigned char)*q++; c = *p++; do { + if (c == '\0') { + p = savep, q = saveq; + c = '['; + goto dft; + } if (c == CTLQUOTEMARK) continue; if (c == '[' && *p == ':') { diff --git a/bin/sh/jobs.c b/bin/sh/jobs.c index 2e0188a..16004a3 100644 --- a/bin/sh/jobs.c +++ b/bin/sh/jobs.c @@ -347,13 +347,13 @@ showjob(struct job *jp, int mode) strcat(statestr, " (core dumped)"); } - for (ps = jp->ps ; ; ps++) { /* for each process */ + for (ps = jp->ps ; procno > 0 ; ps++, procno--) { /* for each process */ if (mode == SHOWJOBS_PIDS || mode == SHOWJOBS_PGIDS) { out1fmt("%d\n", (int)ps->pid); - goto skip; + continue; } if (mode != SHOWJOBS_VERBOSE && ps != jp->ps) - goto skip; + continue; if (jobno == curr && ps == jp->ps) c = '+'; else if (jobno == prev && ps == jp->ps) @@ -384,8 +384,6 @@ showjob(struct job *jp, int mode) out1c('\n'); } else printjobcmd(jp); -skip: if (--procno <= 0) - break; } } diff --git a/bin/sh/parser.c b/bin/sh/parser.c index e6e9f82..b577a8a 100644 --- a/bin/sh/parser.c +++ b/bin/sh/parser.c @@ -1671,7 +1671,7 @@ varname: pungetc(); else if (c == '\n' || c == PEOF) synerror("Unexpected end of line in substitution"); - else + else if (BASESYNTAX[c] != CCTL) USTPUTC(c, out); } if (subtype == 0) { @@ -1687,7 +1687,8 @@ varname: synerror("Unexpected end of line in substitution"); if (flags == VSNUL) STPUTC(':', out); - STPUTC(c, out); + if (BASESYNTAX[c] != CCTL) + STPUTC(c, out); subtype = VSERROR; } else subtype = p - types + VSNORMAL; diff --git a/bin/sh/redir.c b/bin/sh/redir.c index 6127e86..95d3238 100644 --- a/bin/sh/redir.c +++ b/bin/sh/redir.c @@ -173,21 +173,12 @@ openredirect(union node *redir, char memory[10]) fname = redir->nfile.expfname; if ((f = open(fname, O_RDONLY)) < 0) error("cannot open %s: %s", fname, strerror(errno)); -movefd: - if (f != fd) { - if (dup2(f, fd) == -1) { - e = errno; - close(f); - error("%d: %s", fd, strerror(e)); - } - close(f); - } break; case NFROMTO: fname = redir->nfile.expfname; if ((f = open(fname, O_RDWR|O_CREAT, 0666)) < 0) error("cannot create %s: %s", fname, strerror(errno)); - goto movefd; + break; case NTO: if (Cflag) { fname = redir->nfile.expfname; @@ -205,19 +196,19 @@ movefd: } else error("cannot create %s: %s", fname, strerror(EEXIST)); - goto movefd; + break; } /* FALLTHROUGH */ case NCLOBBER: fname = redir->nfile.expfname; if ((f = open(fname, O_WRONLY|O_CREAT|O_TRUNC, 0666)) < 0) error("cannot create %s: %s", fname, strerror(errno)); - goto movefd; + break; case NAPPEND: fname = redir->nfile.expfname; if ((f = open(fname, O_WRONLY|O_CREAT|O_APPEND, 0666)) < 0) error("cannot create %s: %s", fname, strerror(errno)); - goto movefd; + break; case NTOFD: case NFROMFD: if (redir->ndup.dupfd >= 0) { /* if not ">&-" */ @@ -231,14 +222,22 @@ movefd: } else { close(fd); } - break; + return; case NHERE: case NXHERE: f = openhere(redir); - goto movefd; + break; default: abort(); } + if (f != fd) { + if (dup2(f, fd) == -1) { + e = errno; + close(f); + error("%d: %s", fd, strerror(e)); + } + close(f); + } } diff --git a/bin/sh/tests/builtins/Makefile b/bin/sh/tests/builtins/Makefile index 2c90cbd..ec4cab6 100644 --- a/bin/sh/tests/builtins/Makefile +++ b/bin/sh/tests/builtins/Makefile @@ -34,6 +34,7 @@ FILES+= case16.0 FILES+= case17.0 FILES+= case18.0 FILES+= case19.0 +FILES+= case20.0 FILES+= cd1.0 FILES+= cd2.0 FILES+= cd3.0 diff --git a/bin/sh/tests/builtins/case20.0 b/bin/sh/tests/builtins/case20.0 new file mode 100644 index 0000000..03a4eb2 --- /dev/null +++ b/bin/sh/tests/builtins/case20.0 @@ -0,0 +1,9 @@ +# $FreeBSD$ + +# Shells do not agree about what this pattern should match, but it is +# certain that it must not crash and the missing close bracket must not +# be simply ignored. + +case B in +[[:alpha:]) echo bad ;; +esac diff --git a/bin/sh/tests/errors/Makefile b/bin/sh/tests/errors/Makefile index 9f8b0f2..ab04f14 100644 --- a/bin/sh/tests/errors/Makefile +++ b/bin/sh/tests/errors/Makefile @@ -17,6 +17,8 @@ FILES+= bad-parm-exp3.2 bad-parm-exp3.2.stderr FILES+= bad-parm-exp4.2 bad-parm-exp4.2.stderr FILES+= bad-parm-exp5.2 bad-parm-exp5.2.stderr FILES+= bad-parm-exp6.2 bad-parm-exp6.2.stderr +FILES+= bad-parm-exp7.0 +FILES+= bad-parm-exp8.0 FILES+= option-error.0 FILES+= redirection-error.0 FILES+= redirection-error2.2 diff --git a/bin/sh/tests/errors/bad-parm-exp7.0 b/bin/sh/tests/errors/bad-parm-exp7.0 new file mode 100644 index 0000000..b8562fb --- /dev/null +++ b/bin/sh/tests/errors/bad-parm-exp7.0 @@ -0,0 +1,4 @@ +# $FreeBSD$ + +v=1 +eval ": $(printf '${v-${\372}}')" diff --git a/bin/sh/tests/errors/bad-parm-exp8.0 b/bin/sh/tests/errors/bad-parm-exp8.0 new file mode 100644 index 0000000..28f00cd --- /dev/null +++ b/bin/sh/tests/errors/bad-parm-exp8.0 @@ -0,0 +1,4 @@ +# $FreeBSD$ + +v=1 +eval ": $(printf '${v-${w\372}}')" diff --git a/bin/sh/tests/expansion/Makefile b/bin/sh/tests/expansion/Makefile index 5b52efc..e62fea4 100644 --- a/bin/sh/tests/expansion/Makefile +++ b/bin/sh/tests/expansion/Makefile @@ -45,6 +45,9 @@ FILES+= ifs1.0 FILES+= ifs2.0 FILES+= ifs3.0 FILES+= ifs4.0 +FILES+= ifs5.0 +FILES+= ifs6.0 +FILES+= ifs7.0 FILES+= length1.0 FILES+= length2.0 FILES+= length3.0 @@ -59,6 +62,7 @@ FILES+= pathname1.0 FILES+= pathname2.0 FILES+= pathname3.0 FILES+= pathname4.0 +FILES+= pathname5.0 FILES+= plus-minus1.0 FILES+= plus-minus2.0 FILES+= plus-minus3.0 diff --git a/bin/sh/tests/expansion/ifs5.0 b/bin/sh/tests/expansion/ifs5.0 new file mode 100644 index 0000000..ab0e646 --- /dev/null +++ b/bin/sh/tests/expansion/ifs5.0 @@ -0,0 +1,4 @@ +# $FreeBSD$ + +set -- $(echo a b c d) +[ "$#" = 4 ] diff --git a/bin/sh/tests/expansion/ifs6.0 b/bin/sh/tests/expansion/ifs6.0 new file mode 100644 index 0000000..be77945 --- /dev/null +++ b/bin/sh/tests/expansion/ifs6.0 @@ -0,0 +1,6 @@ +# $FreeBSD$ + +IFS=': ' +x=': :' +set -- $x +[ "$#|$1|$2|$3" = "2|||" ] diff --git a/bin/sh/tests/expansion/ifs7.0 b/bin/sh/tests/expansion/ifs7.0 new file mode 100644 index 0000000..0cc0834 --- /dev/null +++ b/bin/sh/tests/expansion/ifs7.0 @@ -0,0 +1,5 @@ +# $FreeBSD$ + +IFS=2 +set -- $((123)) +[ "$#|$1|$2|$3" = "2|1|3|" ] diff --git a/bin/sh/tests/expansion/pathname5.0 b/bin/sh/tests/expansion/pathname5.0 new file mode 100644 index 0000000..bc27812 --- /dev/null +++ b/bin/sh/tests/expansion/pathname5.0 @@ -0,0 +1,3 @@ +# $FreeBSD$ + +[ `echo '/[e]tc'` = /etc ] diff --git a/bin/sh/tests/parameters/Makefile b/bin/sh/tests/parameters/Makefile index 982b133..e31b78f 100644 --- a/bin/sh/tests/parameters/Makefile +++ b/bin/sh/tests/parameters/Makefile @@ -13,6 +13,7 @@ FILES+= optind1.0 FILES+= optind2.0 FILES+= positional1.0 FILES+= positional2.0 +FILES+= positional3.0 FILES+= positional5.0 FILES+= pwd1.0 FILES+= pwd2.0 diff --git a/bin/sh/tests/parameters/positional3.0 b/bin/sh/tests/parameters/positional3.0 new file mode 100644 index 0000000..1200469 --- /dev/null +++ b/bin/sh/tests/parameters/positional3.0 @@ -0,0 +1,4 @@ +# $FreeBSD$ + +r=$(${SH} -c 'echo ${01:+yes}${010:+yes}' '' a '' '' '' '' '' '' '' '' b) +[ "$r" = yesyes ] diff --git a/bin/sh/tests/parser/Makefile b/bin/sh/tests/parser/Makefile index 03650b1..b769a30 100644 --- a/bin/sh/tests/parser/Makefile +++ b/bin/sh/tests/parser/Makefile @@ -34,6 +34,7 @@ FILES+= dollar-quote8.0 FILES+= dollar-quote9.0 FILES+= dollar-quote10.0 FILES+= dollar-quote11.0 +FILES+= dollar-quote12.0 FILES+= empty-braces1.0 FILES+= empty-cmd1.0 FILES+= for1.0 @@ -52,6 +53,9 @@ FILES+= heredoc8.0 FILES+= heredoc9.0 FILES+= heredoc10.0 FILES+= heredoc11.0 +FILES+= line-cont1.0 +FILES+= line-cont2.0 +FILES+= line-cont3.0 FILES+= no-space1.0 FILES+= no-space2.0 FILES+= only-redir1.0 diff --git a/bin/sh/tests/parser/dollar-quote12.0 b/bin/sh/tests/parser/dollar-quote12.0 new file mode 100644 index 0000000..838e27c --- /dev/null +++ b/bin/sh/tests/parser/dollar-quote12.0 @@ -0,0 +1,7 @@ +# $FreeBSD$ + +# \u without any digits at all remains invalid. +# Our choice is a parse error. + +v=$( (eval ": \$'\u'") 2>&1 >/dev/null) +[ $? -ne 0 ] && [ -n "$v" ] diff --git a/bin/sh/tests/parser/line-cont1.0 b/bin/sh/tests/parser/line-cont1.0 new file mode 100644 index 0000000..7ef5eba --- /dev/null +++ b/bin/sh/tests/parser/line-cont1.0 @@ -0,0 +1,16 @@ +# $FreeBSD$ + +i\ +f +t\ +r\ +u\ +e +t\ +h\ +e\ +n +: +\ +f\ +i diff --git a/bin/sh/tests/parser/line-cont2.0 b/bin/sh/tests/parser/line-cont2.0 new file mode 100644 index 0000000..9a293fa --- /dev/null +++ b/bin/sh/tests/parser/line-cont2.0 @@ -0,0 +1,4 @@ +# $FreeBSD$ + +[ "a\ +b" = ab ] diff --git a/bin/sh/tests/parser/line-cont3.0 b/bin/sh/tests/parser/line-cont3.0 new file mode 100644 index 0000000..09d3aa8 --- /dev/null +++ b/bin/sh/tests/parser/line-cont3.0 @@ -0,0 +1,7 @@ +# $FreeBSD$ + +v=`printf %s 'a\ +b'` +w="`printf %s 'c\ +d'`" +[ "$v$w" = abcd ] diff --git a/bin/sh/trap.c b/bin/sh/trap.c index e5a2a91..8ea3b12 100644 --- a/bin/sh/trap.c +++ b/bin/sh/trap.c @@ -510,28 +510,25 @@ exitshell_savedstatus(void) exiting_exitstatus = oexitstatus; } exitstatus = oexitstatus = exiting_exitstatus; - if (setjmp(loc1.loc)) { - goto l1; - } - if (setjmp(loc2.loc)) { - goto l2; - } - handler = &loc1; - if ((p = trap[0]) != NULL && *p != '\0') { - /* - * Reset evalskip, or the trap on EXIT could be - * interrupted if the last command was a "return". - */ - evalskip = 0; - trap[0] = NULL; - evalstring(p, 0); + if (!setjmp(loc1.loc)) { + handler = &loc1; + if ((p = trap[0]) != NULL && *p != '\0') { + /* + * Reset evalskip, or the trap on EXIT could be + * interrupted if the last command was a "return". + */ + evalskip = 0; + trap[0] = NULL; + evalstring(p, 0); + } } -l1: handler = &loc2; /* probably unnecessary */ - flushall(); + if (!setjmp(loc2.loc)) { + handler = &loc2; /* probably unnecessary */ + flushall(); #if JOBS - setjobctl(0); + setjobctl(0); #endif -l2: + } if (sig != 0 && sig != SIGSTOP && sig != SIGTSTP && sig != SIGTTIN && sig != SIGTTOU) { signal(sig, SIG_DFL); diff --git a/cddl/lib/libdtrace/ip.d b/cddl/lib/libdtrace/ip.d index b886a0a..f8fa9b1 100644 --- a/cddl/lib/libdtrace/ip.d +++ b/cddl/lib/libdtrace/ip.d @@ -109,7 +109,6 @@ typedef struct ipv4info { * These values are NULL if the packet is not IPv6. */ typedef struct in6_addr in6_addr_t; -typedef struct ip6_hdr ip6_t; typedef struct ipv6info { uint8_t ipv6_ver; /* IP version (6) */ uint8_t ipv6_tclass; /* traffic class */ @@ -122,7 +121,7 @@ typedef struct ipv6info { in6_addr_t *ipv6_dst; /* destination address */ string ipv6_saddr; /* source address, string */ string ipv6_daddr; /* destination address, string */ - ip6_t *ipv6_hdr; /* pointer to raw header */ + struct ip6_hdr *ipv6_hdr; /* pointer to raw header */ } ipv6info_t; #pragma D binding "1.0" IPPROTO_IP @@ -281,5 +280,5 @@ translator ipv6info_t < struct ip6_hdr *p > { ipv6_dst = p == NULL ? 0 : (in6_addr_t *)&p->ip6_dst; ipv6_saddr = p == NULL ? 0 : inet_ntoa6(&p->ip6_src); ipv6_daddr = p == NULL ? 0 : inet_ntoa6(&p->ip6_dst); - ipv6_hdr = (ip6_t *)p; + ipv6_hdr = p; }; diff --git a/etc/rc.d/bgfsck b/etc/rc.d/bgfsck index 101577e..008ec08 100755 --- a/etc/rc.d/bgfsck +++ b/etc/rc.d/bgfsck @@ -12,17 +12,24 @@ name="background-fsck" rcvar="background_fsck" start_cmd="bgfsck_start" +start_precmd="bgfsck_start_precmd" stop_cmd=":" +bgfsck_start_precmd() +{ + if [ $($ID -u) != 0 ]; then + err 1 "Must be root." + fi +} + bgfsck_start() { - if [ -z "${rc_force}" ]; then - background_fsck_delay=${background_fsck_delay:-0} - else + : ${background_fsck_delay=0} + if [ -n "${rc_force}" ]; then background_fsck_delay=0 fi if [ ${background_fsck_delay} -lt 0 ]; then - echo "Background file system checks delayed indefinitely" + warn "Background file system checks delayed indefinitely" return 0 fi diff --git a/etc/rc.d/jail b/etc/rc.d/jail index ea62f48..d1307db 100755 --- a/etc/rc.d/jail +++ b/etc/rc.d/jail @@ -419,7 +419,7 @@ jail_status() jail_start() { - local _j _jid _jl + local _j _jid _jl _id _name if [ $# = 0 ]; then return @@ -432,10 +432,9 @@ jail_start() command_args="-f $jail_conf -c" _tmp=`mktemp -t jail` || exit 3 if $command $rc_flags $command_args >> $_tmp 2>&1; then - $jail_jls jid name | while read IN; do - set -- $IN - echo -n " $2" - echo $1 > /var/run/jail_$2.id + $jail_jls jid name | while read _id _name; do + echo -n " $_name" + echo $_id > /var/run/jail_${_name}.id done else tail -1 $_tmp diff --git a/etc/rc.d/netif b/etc/rc.d/netif index daece80..d8998e5 100755 --- a/etc/rc.d/netif +++ b/etc/rc.d/netif @@ -85,7 +85,7 @@ network_start() fi if [ -f /etc/rc.d/routing -a -n "$cmdifn" ] ; then for _if in $cmdifn; do - /etc/rc.d/routing start any $_if + /etc/rc.d/routing static any $_if done fi } diff --git a/lib/libc/net/getnameinfo.c b/lib/libc/net/getnameinfo.c index ffd34a1..2b16b75 100644 --- a/lib/libc/net/getnameinfo.c +++ b/lib/libc/net/getnameinfo.c @@ -78,6 +78,8 @@ getnameinfo(const struct sockaddr *sa, socklen_t salen, char *host, size_t hostlen, char *serv, size_t servlen, int flags) { + if (sa == NULL) + return (EAI_FAIL); switch (sa->sa_family) { case AF_INET: @@ -124,25 +126,19 @@ getnameinfo_inet(const struct sockaddr *sa, socklen_t salen, struct servent *sp; struct hostent *hp; u_short port; - int family, i; const char *addr; u_int32_t v4a; int h_error; char numserv[512]; char numaddr[512]; - if (sa == NULL) - return EAI_FAIL; - - family = sa->sa_family; - for (i = 0; afdl[i].a_af; i++) - if (afdl[i].a_af == family) { - afd = &afdl[i]; - goto found; - } - return EAI_FAMILY; + for (afd = &afdl[0]; afd->a_af > 0; afd++) { + if (afd->a_af == sa->sa_family) + break; + } + if (afd->a_af == 0) + return (EAI_FAMILY); - found: if (salen != afd->a_socklen) return EAI_FAIL; @@ -394,11 +390,22 @@ getnameinfo_link(const struct sockaddr *sa, socklen_t salen, if (sdl->sdl_nlen == 0 && sdl->sdl_alen == 0 && sdl->sdl_slen == 0) { n = snprintf(host, hostlen, "link#%d", sdl->sdl_index); - if (n > hostlen) { + if (n >= hostlen) { *host = '\0'; - return EAI_MEMORY; + return (EAI_MEMORY); + } + return (0); + } + + if (sdl->sdl_nlen > 0 && sdl->sdl_alen == 0) { + n = sdl->sdl_nlen; + if (n >= hostlen) { + *host = '\0'; + return (EAI_MEMORY); } - return 0; + memcpy(host, sdl->sdl_data, sdl->sdl_nlen); + host[n] = '\0'; + return (0); } switch (sdl->sdl_type) { @@ -441,10 +448,7 @@ getnameinfo_link(const struct sockaddr *sa, socklen_t salen, } static int -hexname(cp, len, host, hostlen) - const u_int8_t *cp; - char *host; - size_t len, hostlen; +hexname(const u_int8_t *cp, size_t len, char *host, size_t hostlen) { int i, n; char *outp = host; diff --git a/lib/libgeom/geom_xml2tree.c b/lib/libgeom/geom_xml2tree.c index 54a31ca..2a373d7 100644 --- a/lib/libgeom/geom_xml2tree.c +++ b/lib/libgeom/geom_xml2tree.c @@ -275,15 +275,17 @@ EndElement(void *userData, const char *name) XML_StopParser(mt->parser, 0); warn("Cannot allocate memory during processing of '%s' " "element", name); + free(p); return; } gc->lg_name = strdup(name); if (gc->lg_name == NULL) { - free(gc); mt->error = errno; XML_StopParser(mt->parser, 0); warn("Cannot allocate memory during processing of '%s' " "element", name); + free(gc); + free(p); return; } gc->lg_val = p; diff --git a/release/doc/share/xml/errata.xml b/release/doc/share/xml/errata.xml index fb617dc..54c2713 100644 --- a/release/doc/share/xml/errata.xml +++ b/release/doc/share/xml/errata.xml @@ -49,6 +49,30 @@ <entry><para>Insufficient check of supported &man.pkg.7; signature methods.</para></entry> </row> + + <row> + <entry><link + xlink:href="&security.url;/FreeBSD-EN-15:16.pw.asc">FreeBSD-EN-15:16.pw</link></entry> + <entry>16 September 2015</entry> + <entry><para>Fix &man.pw.8; regression when creating numeric + users or groups.</para></entry> + </row> + + <row> + <entry><link + xlink:href="&security.url;/FreeBSD-EN-15:17.libc.asc">FreeBSD-EN-15:17.libc</link></entry> + <entry>16 September 2015</entry> + <entry><para>Fix libc handling of signals for multi-threaded + processes.</para></entry> + </row> + + <row> + <entry><link + xlink:href="&security.url;/FreeBSD-EN-15:18.pkg.asc">FreeBSD-EN-15:18.pkg</link></entry> + <entry>16 September 2015</entry> + <entry><para>Implement <literal>pubkey</literal> support for + &man.pkg.7; bootstrap.</para></entry> + </row> </tbody> </tgroup> </informaltable> diff --git a/release/tools/ec2.conf b/release/tools/ec2.conf index 9472ec5..557e602 100644 --- a/release/tools/ec2.conf +++ b/release/tools/ec2.conf @@ -70,6 +70,11 @@ vm_extra_pre_umount() { # nodes, but apply the workaround just in case. echo 'hw.broken_txfifo="1"' >> ${DESTDIR}/boot/loader.conf + # Some EC2 instances suffer a significant (~40%) reduction in + # throughput when using blkif indirect segment I/Os. Disable this + # by default for now. + echo 'hw.xbd.xbd_enable_indirect="0"' >> ${DESTDIR}/boot/loader.conf + # The first time the AMI boots, the installed "first boot" scripts # should be allowed to run: # * ec2_configinit (download and process EC2 user-data) diff --git a/sbin/ifconfig/ifconfig.8 b/sbin/ifconfig/ifconfig.8 index 6152e39..911cb24 100644 --- a/sbin/ifconfig/ifconfig.8 +++ b/sbin/ifconfig/ifconfig.8 @@ -28,7 +28,7 @@ .\" From: @(#)ifconfig.8 8.3 (Berkeley) 1/5/94 .\" $FreeBSD$ .\" -.Dd July 24, 2015 +.Dd September 12, 2015 .Dt IFCONFIG 8 .Os .Sh NAME @@ -2393,9 +2393,16 @@ Remove the interface named by from the aggregation interface. .It Cm laggproto Ar proto Set the aggregation protocol. -The default is failover. -The available options are failover, fec, lacp, loadbalance, roundrobin and -none. +The default is +.Li failover . +The available options are +.Li failover , +.Li lacp , +.Li loadbalance , +.Li roundrobin , +.Li broadcast +and +.Li none . .It Cm lagghash Ar option Ns Oo , Ns Ar option Oc Set the packet layers to hash for aggregation protocols which load balance. The default is @@ -2410,7 +2417,38 @@ src/dst address for IPv4 or IPv6. .It Cm l4 src/dst port for TCP/UDP/SCTP. .El -.Pp +.It Cm use_flowid +Enable local hash computation for RSS hash on the interface. +The +.Li loadbalance +and +.Li lacp +modes will use the RSS hash from the network card if available +to avoid computing one, this may give poor traffic distribution +if the hash is invalid or uses less of the protocol header information. +.Cm use_flowid +disables use of RSS hash from the network card. +The default value can be set via the +.Va net.link.lagg.default_use_flowid +.Xr sysctl 8 +variable. +.Li 0 +means +.Dq disabled +and +.Li 1 +means +.Dq enabled . +.It Cm -use_flowid +Disable local hash computation for RSS hash on the interface. +.It Cm flowid_shift Ar number +Set a shift parameter for RSS local hash computation. +Hash is calculated by using flowid bits in a packet header mbuf +which are shifted by the number of this parameter. +.It Cm lacp_fast_timeout +Enable lacp fast-timeout on the interface. +.It Cm -lacp_fast_timeout +Disable lacp fast-timeout on the interface. .El .Pp The following parameters are specific to IP tunnel interfaces, diff --git a/sbin/ifconfig/ifgif.c b/sbin/ifconfig/ifgif.c index 91c433c..de9696a 100644 --- a/sbin/ifconfig/ifgif.c +++ b/sbin/ifconfig/ifgif.c @@ -51,7 +51,7 @@ static const char rcsid[] = #include "ifconfig.h" -#define GIFBITS "\020\1ACCEPT_REV_ETHIP_VER\2IGNORE_SOURCE\5SEND_REV_ETHIP_VER" +#define GIFBITS "\020\2IGNORE_SOURCE" static void gif_status(int); @@ -70,11 +70,12 @@ gif_status(int s) } static void -setgifopts(const char *val, - int d, int s, const struct afswtch *afp) +setgifopts(const char *val, int d, int s, const struct afswtch *afp) { int opts; + if (d == 0) + return; ifr.ifr_data = (caddr_t)&opts; if (ioctl(s, GIFGOPTS, &ifr) == -1) { warn("ioctl(GIFGOPTS)"); @@ -93,12 +94,12 @@ setgifopts(const char *val, } static struct cmd gif_cmds[] = { - DEF_CMD("accept_rev_ethip_ver", GIF_ACCEPT_REVETHIP, setgifopts), - DEF_CMD("-accept_rev_ethip_ver",-GIF_ACCEPT_REVETHIP, setgifopts), + DEF_CMD("accept_rev_ethip_ver", 0, setgifopts), + DEF_CMD("-accept_rev_ethip_ver",0, setgifopts), DEF_CMD("ignore_source", GIF_IGNORE_SOURCE, setgifopts), DEF_CMD("-ignore_source", -GIF_IGNORE_SOURCE, setgifopts), - DEF_CMD("send_rev_ethip_ver", GIF_SEND_REVETHIP, setgifopts), - DEF_CMD("-send_rev_ethip_ver", -GIF_SEND_REVETHIP, setgifopts), + DEF_CMD("send_rev_ethip_ver", 0, setgifopts), + DEF_CMD("-send_rev_ethip_ver", 0, setgifopts), }; static struct afswtch af_gif = { @@ -110,11 +111,9 @@ static struct afswtch af_gif = { static __constructor void gif_ctor(void) { -#define N(a) (sizeof(a) / sizeof(a[0])) size_t i; - for (i = 0; i < N(gif_cmds); i++) + for (i = 0; i < nitems(gif_cmds); i++) cmd_register(&gif_cmds[i]); af_register(&af_gif); -#undef N } diff --git a/sbin/ifconfig/iflagg.c b/sbin/ifconfig/iflagg.c index 29b8574..c595dc9 100644 --- a/sbin/ifconfig/iflagg.c +++ b/sbin/ifconfig/iflagg.c @@ -17,6 +17,7 @@ static const char rcsid[] = #include <net/ethernet.h> #include <net/if.h> #include <net/if_lagg.h> +#include <net/ieee8023ad_lacp.h> #include <net/route.h> #include <ctype.h> @@ -68,7 +69,7 @@ setlaggproto(const char *val, int d, int s, const struct afswtch *afp) bzero(&ra, sizeof(ra)); ra.ra_proto = LAGG_PROTO_MAX; - for (i = 0; i < (sizeof(lpr) / sizeof(lpr[0])); i++) { + for (i = 0; i < nitems(lpr); i++) { if (strcmp(val, lpr[i].lpr_name) == 0) { ra.ra_proto = lpr[i].lpr_proto; break; @@ -83,6 +84,50 @@ setlaggproto(const char *val, int d, int s, const struct afswtch *afp) } static void +setlaggflowidshift(const char *val, int d, int s, const struct afswtch *afp) +{ + struct lagg_reqopts ro; + + bzero(&ro, sizeof(ro)); + ro.ro_opts = LAGG_OPT_FLOWIDSHIFT; + strlcpy(ro.ro_ifname, name, sizeof(ro.ro_ifname)); + ro.ro_flowid_shift = (int)strtol(val, NULL, 10); + if (ro.ro_flowid_shift & ~LAGG_OPT_FLOWIDSHIFT_MASK) + errx(1, "Invalid flowid_shift option: %s", val); + + if (ioctl(s, SIOCSLAGGOPTS, &ro) != 0) + err(1, "SIOCSLAGGOPTS"); +} + +static void +setlaggsetopt(const char *val, int d, int s, const struct afswtch *afp) +{ + struct lagg_reqopts ro; + + bzero(&ro, sizeof(ro)); + ro.ro_opts = d; + switch (ro.ro_opts) { + case LAGG_OPT_USE_FLOWID: + case -LAGG_OPT_USE_FLOWID: + case LAGG_OPT_LACP_STRICT: + case -LAGG_OPT_LACP_STRICT: + case LAGG_OPT_LACP_TXTEST: + case -LAGG_OPT_LACP_TXTEST: + case LAGG_OPT_LACP_RXTEST: + case -LAGG_OPT_LACP_RXTEST: + case LAGG_OPT_LACP_TIMEOUT: + case -LAGG_OPT_LACP_TIMEOUT: + break; + default: + err(1, "Invalid lagg option"); + } + strlcpy(ro.ro_ifname, name, sizeof(ro.ro_ifname)); + + if (ioctl(s, SIOCSLAGGOPTS, &ro) != 0) + err(1, "SIOCSLAGGOPTS"); +} + +static void setlagghash(const char *val, int d, int s, const struct afswtch *afp) { struct lagg_reqflags rf; @@ -144,6 +189,7 @@ lagg_status(int s) struct lagg_protos lpr[] = LAGG_PROTOS; struct lagg_reqport rp, rpbuf[LAGG_MAX_PORTS]; struct lagg_reqall ra; + struct lagg_reqopts ro; struct lagg_reqflags rf; struct lacp_opreq *lp; const char *proto = "<unknown>"; @@ -151,6 +197,7 @@ lagg_status(int s) bzero(&rp, sizeof(rp)); bzero(&ra, sizeof(ra)); + bzero(&ro, sizeof(ro)); strlcpy(rp.rp_ifname, name, sizeof(rp.rp_ifname)); strlcpy(rp.rp_portname, name, sizeof(rp.rp_portname)); @@ -162,6 +209,9 @@ lagg_status(int s) ra.ra_size = sizeof(rpbuf); ra.ra_port = rpbuf; + strlcpy(ro.ro_ifname, name, sizeof(ro.ro_ifname)); + ioctl(s, SIOCGLAGGOPTS, &ro); + strlcpy(rf.rf_ifname, name, sizeof(rf.rf_ifname)); if (ioctl(s, SIOCGLAGGFLAGS, &rf) != 0) rf.rf_flags = 0; @@ -169,7 +219,7 @@ lagg_status(int s) if (ioctl(s, SIOCGLAGG, &ra) == 0) { lp = (struct lacp_opreq *)&ra.ra_lacpreq; - for (i = 0; i < (sizeof(lpr) / sizeof(lpr[0])); i++) { + for (i = 0; i < nitems(lpr); i++) { if (ra.ra_proto == lpr[i].lpr_proto) { proto = lpr[i].lpr_name; break; @@ -197,16 +247,27 @@ lagg_status(int s) if (isport) printf(" laggdev %s", rp.rp_ifname); putchar('\n'); - if (verbose && ra.ra_proto == LAGG_PROTO_LACP) - printf("\tlag id: %s\n", - lacp_format_peer(lp, "\n\t\t ")); + if (verbose) { + printf("\tlagg options:\n"); + printb("\t\tflags", ro.ro_opts, LAGG_OPT_BITS); + putchar('\n'); + printf("\t\tflowid_shift: %d\n", ro.ro_flowid_shift); + printf("\tlagg statistics:\n"); + printf("\t\tactive ports: %d\n", ro.ro_active); + printf("\t\tflapping: %u\n", ro.ro_flapping); + if (ra.ra_proto == LAGG_PROTO_LACP) { + printf("\tlag id: %s\n", + lacp_format_peer(lp, "\n\t\t ")); + } + } for (i = 0; i < ra.ra_ports; i++) { lp = (struct lacp_opreq *)&rpbuf[i].rp_lacpreq; printf("\tlaggport: %s ", rpbuf[i].rp_portname); printb("flags", rpbuf[i].rp_flags, LAGG_PORT_BITS); if (verbose && ra.ra_proto == LAGG_PROTO_LACP) - printf(" state=%X", lp->actor_state); + printb(" state", lp->actor_state, + LACP_STATE_BITS); putchar('\n'); if (verbose && ra.ra_proto == LAGG_PROTO_LACP) printf("\t\t%s\n", @@ -226,6 +287,17 @@ static struct cmd lagg_cmds[] = { DEF_CMD_ARG("-laggport", unsetlaggport), DEF_CMD_ARG("laggproto", setlaggproto), DEF_CMD_ARG("lagghash", setlagghash), + DEF_CMD("use_flowid", LAGG_OPT_USE_FLOWID, setlaggsetopt), + DEF_CMD("-use_flowid", -LAGG_OPT_USE_FLOWID, setlaggsetopt), + DEF_CMD("lacp_strict", LAGG_OPT_LACP_STRICT, setlaggsetopt), + DEF_CMD("-lacp_strict", -LAGG_OPT_LACP_STRICT, setlaggsetopt), + DEF_CMD("lacp_txtest", LAGG_OPT_LACP_TXTEST, setlaggsetopt), + DEF_CMD("-lacp_txtest", -LAGG_OPT_LACP_TXTEST, setlaggsetopt), + DEF_CMD("lacp_rxtest", LAGG_OPT_LACP_RXTEST, setlaggsetopt), + DEF_CMD("-lacp_rxtest", -LAGG_OPT_LACP_RXTEST, setlaggsetopt), + DEF_CMD("lacp_fast_timeout", LAGG_OPT_LACP_TIMEOUT, setlaggsetopt), + DEF_CMD("-lacp_fast_timeout", -LAGG_OPT_LACP_TIMEOUT, setlaggsetopt), + DEF_CMD_ARG("flowid_shift", setlaggflowidshift), }; static struct afswtch af_lagg = { .af_name = "af_lagg", diff --git a/share/man/man4/gif.4 b/share/man/man4/gif.4 index 27ee61b..76b7976 100644 --- a/share/man/man4/gif.4 +++ b/share/man/man4/gif.4 @@ -29,7 +29,7 @@ .\" .\" $FreeBSD$ .\" -.Dd October 14, 2014 +.Dd September 10, 2015 .Dt GIF 4 .Os .Sh NAME @@ -246,32 +246,3 @@ had a multi-destination behavior, configurable via .Dv IFF_LINK0 flag. The behavior is obsolete and is no longer supported. -.Pp -On -.Fx -6.1, 6.2, 6.3, 7.0, 7.1, and 7.2 -the -.Nm -sends and receives incorrect EtherIP packets with reversed version -field when -.Xr if_bridge 4 -is used together. As a workaround on this interoperability issue, the -following two -.Xr ifconfig 8 -flags can be used: -.Bl -tag -width "accept_rev_ethip_ver" -offset indent -.It accept_rev_ethip_ver -accepts both correct EtherIP packets and ones with reversed version -field, if enabled. If disabled, the -.Nm -accepts the correct packets only. This flag is enabled by default. -.It send_rev_ethip_ver -sends EtherIP packets with reversed version field intentionally, if -enabled. If disabled, the -.Nm -sends the correct packets only. This flag is disabled by default. -.El -.Pp -If interoperability with the older -.Fx -machines is needed, both of these two flags must be enabled. diff --git a/share/man/man4/lagg.4 b/share/man/man4/lagg.4 index aec7b7e..b511eea 100644 --- a/share/man/man4/lagg.4 +++ b/share/man/man4/lagg.4 @@ -16,7 +16,7 @@ .\" .\" $FreeBSD$ .\" -.Dd February 23, 2012 +.Dd October 1, 2014 .Dt LAGG 4 .Os .Sh NAME @@ -143,9 +143,9 @@ modes will use the RSS hash from the network card if available to avoid computing one, this may give poor traffic distribution if the hash is invalid or uses less of the protocol header information. Local hash computation can be forced per interface by setting the -.Va net.link.lagg.X.use_flowid -.Xr sysctl 8 -variable to zero where X is the interface number. +.Cm use_flowid +.Xr ifconfig 8 +flag. The default for new interfaces is set via the .Va net.link.lagg.default_use_flowid .Xr sysctl 8 . diff --git a/share/mk/bsd.port.mk b/share/mk/bsd.port.mk index 4f73d12..8c0e72e 100644 --- a/share/mk/bsd.port.mk +++ b/share/mk/bsd.port.mk @@ -10,8 +10,12 @@ _PORTSDIR= ${.CURDIR}/${RELPATH} .endif .endfor _PORTSDIR?= /usr/ports +.if defined(.PARSEDIR) +PORTSDIR= ${_PORTSDIR:tA} +.else # fmake doesn't have :tA PORTSDIR!= realpath ${_PORTSDIR} .endif +.endif BSDPORTMK?= ${PORTSDIR}/Mk/bsd.port.mk diff --git a/share/mk/bsd.port.subdir.mk b/share/mk/bsd.port.subdir.mk index 8e608a1..380983d 100644 --- a/share/mk/bsd.port.subdir.mk +++ b/share/mk/bsd.port.subdir.mk @@ -10,8 +10,12 @@ _PORTSDIR= ${.CURDIR}/${RELPATH} .endif .endfor _PORTSDIR?= /usr/ports +.if defined(.PARSEDIR) +PORTSDIR= ${_PORTSDIR:tA} +.else # fmake doesn't have :tA PORTSDIR!= realpath ${_PORTSDIR} .endif +.endif BSDPORTSUBDIRMK?= ${PORTSDIR}/Mk/bsd.port.subdir.mk diff --git a/sys/amd64/amd64/uma_machdep.c b/sys/amd64/amd64/uma_machdep.c index c4ca677..72adb27 100644 --- a/sys/amd64/amd64/uma_machdep.c +++ b/sys/amd64/amd64/uma_machdep.c @@ -41,7 +41,7 @@ __FBSDID("$FreeBSD$"); #include <machine/vmparam.h> void * -uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) { vm_page_t m; vm_paddr_t pa; @@ -70,7 +70,7 @@ uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) } void -uma_small_free(void *mem, int size, u_int8_t flags) +uma_small_free(void *mem, vm_size_t size, u_int8_t flags) { vm_page_t m; vm_paddr_t pa; diff --git a/sys/contrib/ipfilter/netinet/ip_state.c b/sys/contrib/ipfilter/netinet/ip_state.c index 9c6a244..ad2bf38 100644 --- a/sys/contrib/ipfilter/netinet/ip_state.c +++ b/sys/contrib/ipfilter/netinet/ip_state.c @@ -1,4 +1,4 @@ -/* $FreeBSD$ */ +/* $FreeBSD$ */ /* * Copyright (C) 2012 by Darren Reed. @@ -1054,7 +1054,7 @@ ipf_state_putent(softc, softs, data) /* to pointers and adjusts running stats for the hash table as appropriate. */ /* */ /* This function can fail if the filter rule has had a population policy of */ -/* IP addresses used with stateful filteirng assigned to it. */ +/* IP addresses used with stateful filtering assigned to it. */ /* */ /* Locking: it is assumed that some kind of lock on ipf_state is held. */ /* Exits with is_lock initialised and held - *EVEN IF ERROR*. */ @@ -1081,7 +1081,7 @@ ipf_state_insert(softc, is, rev) } /* - * If we could trust is_hv, then the modulous would not be needed, + * If we could trust is_hv, then the modulus would not be needed, * but when running with IPFILTER_SYNC, this stops bad values. */ hv = is->is_hv % softs->ipf_state_size; @@ -1672,6 +1672,10 @@ ipf_state_add(softc, fin, stsave, flags) SBUMPD(ipf_state_stats, iss_bucket_full); return 4; } + + /* + * No existing state; create new + */ KMALLOC(is, ipstate_t *); if (is == NULL) { SBUMPD(ipf_state_stats, iss_nomem); @@ -1683,7 +1687,7 @@ ipf_state_add(softc, fin, stsave, flags) is->is_rule = fr; /* - * Do not do the modulous here, it is done in ipf_state_insert(). + * Do not do the modulus here, it is done in ipf_state_insert(). */ if (fr != NULL) { ipftq_t *tq; @@ -1711,7 +1715,7 @@ ipf_state_add(softc, fin, stsave, flags) /* * It may seem strange to set is_ref to 2, but if stsave is not NULL * then a copy of the pointer is being stored somewhere else and in - * the end, it will expect to be able to do osmething with it. + * the end, it will expect to be able to do something with it. */ is->is_me = stsave; if (stsave != NULL) { @@ -3652,7 +3656,6 @@ ipf_state_del(softc, is, why) softs->ipf_state_stats.iss_orphan++; return refs; } - MUTEX_EXIT(&is->is_lock); fr = is->is_rule; is->is_rule = NULL; @@ -3664,6 +3667,7 @@ ipf_state_del(softc, is, why) } is->is_ref = 0; + MUTEX_EXIT(&is->is_lock); if (is->is_tqehead[0] != NULL) { if (ipf_deletetimeoutqueue(is->is_tqehead[0]) == 0) diff --git a/sys/dev/msk/if_mskreg.h b/sys/dev/msk/if_mskreg.h index 9c55192..da69f2e 100644 --- a/sys/dev/msk/if_mskreg.h +++ b/sys/dev/msk/if_mskreg.h @@ -2175,13 +2175,8 @@ #define MSK_ADDR_LO(x) ((uint64_t) (x) & 0xffffffffUL) #define MSK_ADDR_HI(x) ((uint64_t) (x) >> 32) -/* - * At first I guessed 8 bytes, the size of a single descriptor, would be - * required alignment constraints. But, it seems that Yukon II have 4096 - * bytes boundary alignment constraints. - */ -#define MSK_RING_ALIGN 4096 -#define MSK_STAT_ALIGN 4096 +#define MSK_RING_ALIGN 32768 +#define MSK_STAT_ALIGN 32768 /* Rx descriptor data structure */ struct msk_rx_desc { diff --git a/sys/dev/puc/puc.c b/sys/dev/puc/puc.c index d7bfb63..2b320ca 100644 --- a/sys/dev/puc/puc.c +++ b/sys/dev/puc/puc.c @@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$"); #include <sys/conf.h> #include <sys/malloc.h> #include <sys/mutex.h> +#include <sys/sysctl.h> #include <machine/bus.h> #include <machine/resource.h> @@ -70,6 +71,8 @@ const char puc_driver_name[] = "puc"; static MALLOC_DEFINE(M_PUC, "PUC", "PUC driver"); +SYSCTL_NODE(_hw, OID_AUTO, puc, CTLFLAG_RD, 0, "puc(9) driver configuration"); + struct puc_bar * puc_get_bar(struct puc_softc *sc, int rid) { @@ -324,7 +327,6 @@ puc_bfe_attach(device_t dev) if (bootverbose && sc->sc_ilr != 0) device_printf(dev, "using interrupt latch register\n"); - sc->sc_irid = 0; sc->sc_ires = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->sc_irid, RF_ACTIVE|RF_SHAREABLE); if (sc->sc_ires != NULL) { diff --git a/sys/dev/puc/puc_bfe.h b/sys/dev/puc/puc_bfe.h index 3cf0c53..9f2f3fb 100644 --- a/sys/dev/puc/puc_bfe.h +++ b/sys/dev/puc/puc_bfe.h @@ -66,6 +66,7 @@ struct puc_softc { int sc_fastintr:1; int sc_leaving:1; int sc_polled:1; + int sc_msi:1; int sc_ilr; @@ -94,4 +95,6 @@ int puc_bus_setup_intr(device_t, device_t, struct resource *, int, driver_filter_t *, driver_intr_t *, void *, void **); int puc_bus_teardown_intr(device_t, device_t, struct resource *, void *); +SYSCTL_DECL(_hw_puc); + #endif /* _DEV_PUC_BFE_H_ */ diff --git a/sys/dev/puc/puc_cfg.c b/sys/dev/puc/puc_cfg.c index 7de6ea3..99d9816 100644 --- a/sys/dev/puc/puc_cfg.c +++ b/sys/dev/puc/puc_cfg.c @@ -31,6 +31,7 @@ __FBSDID("$FreeBSD$"); #include <sys/systm.h> #include <sys/bus.h> #include <sys/rman.h> +#include <sys/sysctl.h> #include <dev/puc/puc_bus.h> #include <dev/puc/puc_cfg.h> diff --git a/sys/dev/puc/puc_pccard.c b/sys/dev/puc/puc_pccard.c index 370c6af..232741e 100644 --- a/sys/dev/puc/puc_pccard.c +++ b/sys/dev/puc/puc_pccard.c @@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$"); #include <sys/bus.h> #include <sys/conf.h> #include <sys/malloc.h> +#include <sys/sysctl.h> #include <machine/bus.h> #include <machine/resource.h> diff --git a/sys/dev/puc/puc_pci.c b/sys/dev/puc/puc_pci.c index 4ad1e3a..d6d5509 100644 --- a/sys/dev/puc/puc_pci.c +++ b/sys/dev/puc/puc_pci.c @@ -67,6 +67,7 @@ __FBSDID("$FreeBSD$"); #include <sys/bus.h> #include <sys/conf.h> #include <sys/malloc.h> +#include <sys/sysctl.h> #include <machine/bus.h> #include <machine/resource.h> @@ -78,6 +79,11 @@ __FBSDID("$FreeBSD$"); #include <dev/puc/puc_cfg.h> #include <dev/puc/puc_bfe.h> +static int puc_msi_disable; +TUNABLE_INT("hw.puc.msi_disable", &puc_msi_disable); +SYSCTL_INT(_hw_puc, OID_AUTO, msi_disable, CTLFLAG_RD | CTLFLAG_TUN, + &puc_msi_disable, 0, "Disable use of MSI interrupts by puc(9)"); + static const struct puc_cfg * puc_pci_match(device_t dev, const struct puc_cfg *desc) { @@ -120,11 +126,56 @@ puc_pci_probe(device_t dev) return (puc_bfe_probe(dev, desc)); } +static int +puc_pci_attach(device_t dev) +{ + struct puc_softc *sc; + int error, count; + + sc = device_get_softc(dev); + + if (!puc_msi_disable) { + count = 1; + + if (pci_alloc_msi(dev, &count) == 0) { + sc->sc_msi = 1; + sc->sc_irid = 1; + } + } + + error = puc_bfe_attach(dev); + + if (error != 0 && sc->sc_msi) + pci_release_msi(dev); + + return (error); +} + +static int +puc_pci_detach(device_t dev) +{ + struct puc_softc *sc; + int error; + + sc = device_get_softc(dev); + + error = puc_bfe_detach(dev); + + if (error != 0) + return (error); + + if (sc->sc_msi) + error = pci_release_msi(dev); + + return (error); +} + + static device_method_t puc_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, puc_pci_probe), - DEVMETHOD(device_attach, puc_bfe_attach), - DEVMETHOD(device_detach, puc_bfe_detach), + DEVMETHOD(device_attach, puc_pci_attach), + DEVMETHOD(device_detach, puc_pci_detach), DEVMETHOD(bus_alloc_resource, puc_bus_alloc_resource), DEVMETHOD(bus_release_resource, puc_bus_release_resource), diff --git a/sys/dev/puc/pucdata.c b/sys/dev/puc/pucdata.c index 77953e0..4646fa4 100644 --- a/sys/dev/puc/pucdata.c +++ b/sys/dev/puc/pucdata.c @@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$"); #include <sys/systm.h> #include <sys/kernel.h> #include <sys/bus.h> +#include <sys/sysctl.h> #include <machine/resource.h> #include <machine/bus.h> diff --git a/sys/dev/usb/serial/u3g.c b/sys/dev/usb/serial/u3g.c index 1c8f3b5..1fe9b21 100644 --- a/sys/dev/usb/serial/u3g.c +++ b/sys/dev/usb/serial/u3g.c @@ -522,6 +522,7 @@ static const STRUCT_USB_HOST_ID u3g_devs[] = { U3G_DEV(SIERRA, MC5727_2, 0), U3G_DEV(SIERRA, MC5728, 0), U3G_DEV(SIERRA, MC7354, 0), + U3G_DEV(SIERRA, MC7355, 0), U3G_DEV(SIERRA, MC8700, 0), U3G_DEV(SIERRA, MC8755, 0), U3G_DEV(SIERRA, MC8755_2, 0), diff --git a/sys/dev/usb/usbdevs b/sys/dev/usb/usbdevs index 04cfd54..d649402 100644 --- a/sys/dev/usb/usbdevs +++ b/sys/dev/usb/usbdevs @@ -4010,6 +4010,7 @@ product SIERRA E6892 0x6892 E6892 product SIERRA E6893 0x6893 E6893 product SIERRA MC8700 0x68A3 MC8700 product SIERRA MC7354 0x68C0 MC7354 +product SIERRA MC7355 0x9041 MC7355 product SIERRA AC313U 0x68aa Sierra Wireless AirCard 313U product SIERRA TRUINSTALL 0x0fff Aircard Tru Installer diff --git a/sys/dev/vt/vt_core.c b/sys/dev/vt/vt_core.c index b661a0f..cc85e1c 100644 --- a/sys/dev/vt/vt_core.c +++ b/sys/dev/vt/vt_core.c @@ -121,6 +121,7 @@ const struct terminal_class vt_termclass = { static SYSCTL_NODE(_kern, OID_AUTO, vt, CTLFLAG_RD, 0, "vt(9) parameters"); VT_SYSCTL_INT(enable_altgr, 1, "Enable AltGr key (Do not assume R.Alt as Alt)"); +VT_SYSCTL_INT(enable_bell, 1, "Enable bell"); VT_SYSCTL_INT(debug, 0, "vt(9) debug level"); VT_SYSCTL_INT(deadtimer, 15, "Time to wait busy process in VT_PROCESS mode"); VT_SYSCTL_INT(suspendswitch, 1, "Switch to VT0 before suspend"); @@ -935,6 +936,9 @@ vtterm_bell(struct terminal *tm) struct vt_window *vw = tm->tm_softc; struct vt_device *vd = vw->vw_device; + if (!vt_enable_bell) + return; + if (vd->vd_flags & VDF_QUIET_BELL) return; @@ -946,6 +950,9 @@ vtterm_beep(struct terminal *tm, u_int param) { u_int freq, period; + if (!vt_enable_bell) + return; + if ((param == 0) || ((param & 0xffff) == 0)) { vtterm_bell(tm); return; diff --git a/sys/dev/watchdog/watchdog.c b/sys/dev/watchdog/watchdog.c index 087d6e8..5c9c6a1 100644 --- a/sys/dev/watchdog/watchdog.c +++ b/sys/dev/watchdog/watchdog.c @@ -28,6 +28,8 @@ * */ +#include "opt_ddb.h" + #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); @@ -37,6 +39,7 @@ __FBSDID("$FreeBSD$"); #include <sys/conf.h> #include <sys/uio.h> #include <sys/kernel.h> +#include <sys/kdb.h> #include <sys/malloc.h> #include <sys/module.h> #include <sys/sysctl.h> diff --git a/sys/dev/xen/blkfront/blkfront.c b/sys/dev/xen/blkfront/blkfront.c index a71251d..312a077 100644 --- a/sys/dev/xen/blkfront/blkfront.c +++ b/sys/dev/xen/blkfront/blkfront.c @@ -84,6 +84,11 @@ static void xbd_startio(struct xbd_softc *sc); /*---------------------------- Global Static Data ----------------------------*/ static MALLOC_DEFINE(M_XENBLOCKFRONT, "xbd", "Xen Block Front driver data"); +static int xbd_enable_indirect = 1; +SYSCTL_NODE(_hw, OID_AUTO, xbd, CTLFLAG_RD, 0, "xbd driver parameters"); +SYSCTL_INT(_hw_xbd, OID_AUTO, xbd_enable_indirect, CTLFLAG_RDTUN, + &xbd_enable_indirect, 0, "Enable xbd indirect segments"); + /*---------------------------- Command Processing ----------------------------*/ static void xbd_freeze(struct xbd_softc *sc, xbd_flag_t xbd_flag) @@ -156,44 +161,14 @@ xbd_free_command(struct xbd_command *cm) } static void -xbd_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) +xbd_mksegarray(bus_dma_segment_t *segs, int nsegs, + grant_ref_t * gref_head, int otherend_id, int readonly, + grant_ref_t * sg_ref, blkif_request_segment_t * sg) { - struct xbd_softc *sc; - struct xbd_command *cm; - blkif_request_t *ring_req; - struct blkif_request_segment *sg; - struct blkif_request_segment *last_block_sg; - grant_ref_t *sg_ref; + struct blkif_request_segment *last_block_sg = sg + nsegs; vm_paddr_t buffer_ma; uint64_t fsect, lsect; int ref; - int op; - int block_segs; - - cm = arg; - sc = cm->cm_sc; - - if (error) { - cm->cm_bp->bio_error = EIO; - biodone(cm->cm_bp); - xbd_free_command(cm); - return; - } - - /* Fill out a communications ring structure. */ - ring_req = RING_GET_REQUEST(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt); - sc->xbd_ring.req_prod_pvt++; - ring_req->id = cm->cm_id; - ring_req->operation = cm->cm_operation; - ring_req->sector_number = cm->cm_sector_number; - ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xbd_disk; - ring_req->nr_segments = nsegs; - cm->cm_nseg = nsegs; - - block_segs = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_REQUEST); - sg = ring_req->seg; - last_block_sg = sg + block_segs; - sg_ref = cm->cm_sg_refs; while (sg < last_block_sg) { buffer_ma = segs->ds_addr; @@ -204,7 +179,7 @@ xbd_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) "cross a page boundary")); /* install a grant reference. */ - ref = gnttab_claim_grant_reference(&cm->cm_gref_head); + ref = gnttab_claim_grant_reference(gref_head); /* * GNTTAB_LIST_END == 0xffffffff, but it is private @@ -214,9 +189,9 @@ xbd_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) gnttab_grant_foreign_access_ref( ref, - xenbus_get_otherend_id(sc->xbd_dev), + otherend_id, buffer_ma >> PAGE_SHIFT, - ring_req->operation == BLKIF_OP_WRITE); + readonly); *sg_ref = ref; *sg = (struct blkif_request_segment) { @@ -227,7 +202,66 @@ xbd_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) sg++; sg_ref++; segs++; - nsegs--; + } +} + +static void +xbd_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) +{ + struct xbd_softc *sc; + struct xbd_command *cm; + int op; + + cm = arg; + sc = cm->cm_sc; + + if (error) { + cm->cm_bp->bio_error = EIO; + biodone(cm->cm_bp); + xbd_free_command(cm); + return; + } + + KASSERT(nsegs <= sc->xbd_max_request_segments, + ("Too many segments in a blkfront I/O")); + + if (nsegs <= BLKIF_MAX_SEGMENTS_PER_REQUEST) { + blkif_request_t *ring_req; + + /* Fill out a blkif_request_t structure. */ + ring_req = (blkif_request_t *) + RING_GET_REQUEST(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt); + sc->xbd_ring.req_prod_pvt++; + ring_req->id = cm->cm_id; + ring_req->operation = cm->cm_operation; + ring_req->sector_number = cm->cm_sector_number; + ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xbd_disk; + ring_req->nr_segments = nsegs; + cm->cm_nseg = nsegs; + xbd_mksegarray(segs, nsegs, &cm->cm_gref_head, + xenbus_get_otherend_id(sc->xbd_dev), + cm->cm_operation == BLKIF_OP_WRITE, + cm->cm_sg_refs, ring_req->seg); + } else { + blkif_request_indirect_t *ring_req; + + /* Fill out a blkif_request_indirect_t structure. */ + ring_req = (blkif_request_indirect_t *) + RING_GET_REQUEST(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt); + sc->xbd_ring.req_prod_pvt++; + ring_req->id = cm->cm_id; + ring_req->operation = BLKIF_OP_INDIRECT; + ring_req->indirect_op = cm->cm_operation; + ring_req->sector_number = cm->cm_sector_number; + ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xbd_disk; + ring_req->nr_segments = nsegs; + cm->cm_nseg = nsegs; + xbd_mksegarray(segs, nsegs, &cm->cm_gref_head, + xenbus_get_otherend_id(sc->xbd_dev), + cm->cm_operation == BLKIF_OP_WRITE, + cm->cm_sg_refs, cm->cm_indirectionpages); + memcpy(ring_req->indirect_grefs, &cm->cm_indirectionrefs, + sizeof(grant_ref_t) * sc->xbd_max_request_indirectpages); } if (cm->cm_operation == BLKIF_OP_READ) @@ -1010,6 +1044,16 @@ xbd_free(struct xbd_softc *sc) cm->cm_sg_refs = NULL; } + if (cm->cm_indirectionpages != NULL) { + gnttab_end_foreign_access_references( + sc->xbd_max_request_indirectpages, + &cm->cm_indirectionrefs[0]); + contigfree(cm->cm_indirectionpages, PAGE_SIZE * + sc->xbd_max_request_indirectpages, + M_XENBLOCKFRONT); + cm->cm_indirectionpages = NULL; + } + bus_dmamap_destroy(sc->xbd_io_dmat, cm->cm_map); } free(sc->xbd_shadow, M_XENBLOCKFRONT); @@ -1034,7 +1078,6 @@ xbd_initialize(struct xbd_softc *sc) const char *node_path; uint32_t max_ring_page_order; int error; - int i; if (xenbus_get_state(sc->xbd_dev) != XenbusStateInitialising) { /* Initialization has already been performed. */ @@ -1047,9 +1090,6 @@ xbd_initialize(struct xbd_softc *sc) */ max_ring_page_order = 0; sc->xbd_ring_pages = 1; - sc->xbd_max_request_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST; - sc->xbd_max_request_size = - XBD_SEGS_TO_SIZE(sc->xbd_max_request_segments); /* * Protocol negotiation. @@ -1105,53 +1145,6 @@ xbd_initialize(struct xbd_softc *sc) sc->xbd_max_requests = XBD_MAX_REQUESTS; } - /* Allocate datastructures based on negotiated values. */ - error = bus_dma_tag_create( - bus_get_dma_tag(sc->xbd_dev), /* parent */ - 512, PAGE_SIZE, /* algnmnt, boundary */ - BUS_SPACE_MAXADDR, /* lowaddr */ - BUS_SPACE_MAXADDR, /* highaddr */ - NULL, NULL, /* filter, filterarg */ - sc->xbd_max_request_size, - sc->xbd_max_request_segments, - PAGE_SIZE, /* maxsegsize */ - BUS_DMA_ALLOCNOW, /* flags */ - busdma_lock_mutex, /* lockfunc */ - &sc->xbd_io_lock, /* lockarg */ - &sc->xbd_io_dmat); - if (error != 0) { - xenbus_dev_fatal(sc->xbd_dev, error, - "Cannot allocate parent DMA tag\n"); - return; - } - - /* Per-transaction data allocation. */ - sc->xbd_shadow = malloc(sizeof(*sc->xbd_shadow) * sc->xbd_max_requests, - M_XENBLOCKFRONT, M_NOWAIT|M_ZERO); - if (sc->xbd_shadow == NULL) { - bus_dma_tag_destroy(sc->xbd_io_dmat); - xenbus_dev_fatal(sc->xbd_dev, error, - "Cannot allocate request structures\n"); - return; - } - - for (i = 0; i < sc->xbd_max_requests; i++) { - struct xbd_command *cm; - - cm = &sc->xbd_shadow[i]; - cm->cm_sg_refs = malloc( - sizeof(grant_ref_t) * sc->xbd_max_request_segments, - M_XENBLOCKFRONT, M_NOWAIT); - if (cm->cm_sg_refs == NULL) - break; - cm->cm_id = i; - cm->cm_flags = XBDCF_INITIALIZER; - cm->cm_sc = sc; - if (bus_dmamap_create(sc->xbd_io_dmat, 0, &cm->cm_map) != 0) - break; - xbd_free_command(cm); - } - if (xbd_alloc_ring(sc) != 0) return; @@ -1210,6 +1203,7 @@ xbd_connect(struct xbd_softc *sc) unsigned long sectors, sector_size; unsigned int binfo; int err, feature_barrier, feature_flush; + int i, j; if (sc->xbd_state == XBD_STATE_CONNECTED || sc->xbd_state == XBD_STATE_SUSPENDED) @@ -1240,6 +1234,88 @@ xbd_connect(struct xbd_softc *sc) if (err == 0 && feature_flush != 0) sc->xbd_flags |= XBDF_FLUSH; + err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), + "feature-max-indirect-segments", "%" PRIu32, + &sc->xbd_max_request_segments, NULL); + if ((err != 0) || (xbd_enable_indirect == 0)) + sc->xbd_max_request_segments = 0; + if (sc->xbd_max_request_segments > XBD_MAX_INDIRECT_SEGMENTS) + sc->xbd_max_request_segments = XBD_MAX_INDIRECT_SEGMENTS; + if (sc->xbd_max_request_segments > XBD_SIZE_TO_SEGS(MAXPHYS)) + sc->xbd_max_request_segments = XBD_SIZE_TO_SEGS(MAXPHYS); + sc->xbd_max_request_indirectpages = + XBD_INDIRECT_SEGS_TO_PAGES(sc->xbd_max_request_segments); + if (sc->xbd_max_request_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST) + sc->xbd_max_request_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST; + sc->xbd_max_request_size = + XBD_SEGS_TO_SIZE(sc->xbd_max_request_segments); + + /* Allocate datastructures based on negotiated values. */ + err = bus_dma_tag_create( + bus_get_dma_tag(sc->xbd_dev), /* parent */ + 512, PAGE_SIZE, /* algnmnt, boundary */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + sc->xbd_max_request_size, + sc->xbd_max_request_segments, + PAGE_SIZE, /* maxsegsize */ + BUS_DMA_ALLOCNOW, /* flags */ + busdma_lock_mutex, /* lockfunc */ + &sc->xbd_io_lock, /* lockarg */ + &sc->xbd_io_dmat); + if (err != 0) { + xenbus_dev_fatal(sc->xbd_dev, err, + "Cannot allocate parent DMA tag\n"); + return; + } + + /* Per-transaction data allocation. */ + sc->xbd_shadow = malloc(sizeof(*sc->xbd_shadow) * sc->xbd_max_requests, + M_XENBLOCKFRONT, M_NOWAIT|M_ZERO); + if (sc->xbd_shadow == NULL) { + bus_dma_tag_destroy(sc->xbd_io_dmat); + xenbus_dev_fatal(sc->xbd_dev, ENOMEM, + "Cannot allocate request structures\n"); + return; + } + + for (i = 0; i < sc->xbd_max_requests; i++) { + struct xbd_command *cm; + void * indirectpages; + + cm = &sc->xbd_shadow[i]; + cm->cm_sg_refs = malloc( + sizeof(grant_ref_t) * sc->xbd_max_request_segments, + M_XENBLOCKFRONT, M_NOWAIT); + if (cm->cm_sg_refs == NULL) + break; + cm->cm_id = i; + cm->cm_flags = XBDCF_INITIALIZER; + cm->cm_sc = sc; + if (bus_dmamap_create(sc->xbd_io_dmat, 0, &cm->cm_map) != 0) + break; + if (sc->xbd_max_request_indirectpages > 0) { + indirectpages = contigmalloc( + PAGE_SIZE * sc->xbd_max_request_indirectpages, + M_XENBLOCKFRONT, M_ZERO, 0, ~0, PAGE_SIZE, 0); + } else { + indirectpages = NULL; + } + for (j = 0; j < sc->xbd_max_request_indirectpages; j++) { + if (gnttab_grant_foreign_access( + xenbus_get_otherend_id(sc->xbd_dev), + (vtomach(indirectpages) >> PAGE_SHIFT) + j, + 1 /* grant read-only access */, + &cm->cm_indirectionrefs[j])) + break; + } + if (j < sc->xbd_max_request_indirectpages) + break; + cm->cm_indirectionpages = indirectpages; + xbd_free_command(cm); + } + if (sc->xbd_disk == NULL) { device_printf(dev, "%juMB <%s> at %s", (uintmax_t) sectors / (1048576 / sector_size), diff --git a/sys/dev/xen/blkfront/block.h b/sys/dev/xen/blkfront/block.h index 3007118..28c6ff2 100644 --- a/sys/dev/xen/blkfront/block.h +++ b/sys/dev/xen/blkfront/block.h @@ -68,28 +68,32 @@ #define XBD_MAX_RING_PAGES 32 /** - * The maximum number of outstanding requests blocks (request headers plus - * additional segment blocks) we will allow in a negotiated block-front/back - * communication channel. + * The maximum number of outstanding requests we will allow in a negotiated + * block-front/back communication channel. */ #define XBD_MAX_REQUESTS \ __CONST_RING_SIZE(blkif, PAGE_SIZE * XBD_MAX_RING_PAGES) /** - * The maximum mapped region size per request we will allow in a negotiated - * block-front/back communication channel. + * The maximum number of blkif segments which can be provided per indirect + * page in an indirect request. + */ +#define XBD_MAX_SEGMENTS_PER_PAGE \ + (PAGE_SIZE / sizeof(struct blkif_request_segment)) + +/** + * The maximum number of blkif segments which can be provided in an indirect + * request. */ -#define XBD_MAX_REQUEST_SIZE \ - MIN(MAXPHYS, XBD_SEGS_TO_SIZE(BLKIF_MAX_SEGMENTS_PER_REQUEST)) +#define XBD_MAX_INDIRECT_SEGMENTS \ + (BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST * XBD_MAX_SEGMENTS_PER_PAGE) /** - * The maximum number of segments (within a request header and accompanying - * segment blocks) per request we will allow in a negotiated block-front/back - * communication channel. + * Compute the number of indirect segment pages required for an I/O with the + * specified number of indirect segments. */ -#define XBD_MAX_SEGMENTS_PER_REQUEST \ - (MIN(BLKIF_MAX_SEGMENTS_PER_REQUEST, \ - XBD_SIZE_TO_SEGS(XBD_MAX_REQUEST_SIZE))) +#define XBD_INDIRECT_SEGS_TO_PAGES(segs) \ + ((segs + XBD_MAX_SEGMENTS_PER_PAGE - 1) / XBD_MAX_SEGMENTS_PER_PAGE) typedef enum { XBDCF_Q_MASK = 0xFF, @@ -121,6 +125,8 @@ struct xbd_command { blkif_sector_t cm_sector_number; int cm_status; xbd_cbcf_t *cm_complete; + void *cm_indirectionpages; + grant_ref_t cm_indirectionrefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]; }; typedef enum { @@ -175,6 +181,7 @@ struct xbd_softc { uint32_t xbd_max_requests; uint32_t xbd_max_request_segments; uint32_t xbd_max_request_size; + uint32_t xbd_max_request_indirectpages; grant_ref_t xbd_ring_ref[XBD_MAX_RING_PAGES]; blkif_front_ring_t xbd_ring; xen_intr_handle_t xen_intr_handle; diff --git a/sys/geom/geom_dev.c b/sys/geom/geom_dev.c index b53065d..621a037 100644 --- a/sys/geom/geom_dev.c +++ b/sys/geom/geom_dev.c @@ -570,7 +570,7 @@ g_dev_done(struct bio *bp2) } mtx_unlock(&sc->sc_mtx); if (destroy) - g_post_event(g_dev_destroy, cp, M_WAITOK, NULL); + g_post_event(g_dev_destroy, cp, M_NOWAIT, NULL); biodone(bp); } diff --git a/sys/geom/multipath/g_multipath.c b/sys/geom/multipath/g_multipath.c index 7593cca..0953d18 100644 --- a/sys/geom/multipath/g_multipath.c +++ b/sys/geom/multipath/g_multipath.c @@ -369,9 +369,9 @@ g_multipath_done(struct bio *bp) mtx_lock(&sc->sc_mtx); (*cnt)--; if (*cnt == 0 && (cp->index & MP_LOST)) { - cp->index |= MP_POSTED; + if (g_post_event(g_mpd, cp, M_NOWAIT, NULL) == 0) + cp->index |= MP_POSTED; mtx_unlock(&sc->sc_mtx); - g_post_event(g_mpd, cp, M_WAITOK, NULL); } else mtx_unlock(&sc->sc_mtx); g_std_done(bp); diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 540eb65..299ee77 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -352,7 +352,8 @@ static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va); static void pmap_pte_release(pt_entry_t *pte); static int pmap_unuse_pt(pmap_t, vm_offset_t, struct spglist *); #if defined(PAE) || defined(PAE_TABLES) -static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); +static void *pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, + int wait); #endif static void pmap_set_pg(void); @@ -670,7 +671,7 @@ pmap_page_init(vm_page_t m) #if defined(PAE) || defined(PAE_TABLES) static void * -pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait) { /* Inform UMA that this allocator uses kernel_map/object. */ diff --git a/sys/ia64/ia64/uma_machdep.c b/sys/ia64/ia64/uma_machdep.c index 77791ae..b536820 100644 --- a/sys/ia64/ia64/uma_machdep.c +++ b/sys/ia64/ia64/uma_machdep.c @@ -40,7 +40,7 @@ __FBSDID("$FreeBSD$"); #include <machine/vmparam.h> void * -uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) { void *va; vm_page_t m; @@ -66,7 +66,7 @@ uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) } void -uma_small_free(void *mem, int size, u_int8_t flags) +uma_small_free(void *mem, vm_size_t size, u_int8_t flags) { vm_page_t m; diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index 9de8fa7..dae1d54 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -1849,7 +1849,7 @@ void knote(struct knlist *list, long hint, int lockflags) { struct kqueue *kq; - struct knote *kn; + struct knote *kn, *tkn; int error; if (list == NULL) @@ -1861,14 +1861,13 @@ knote(struct knlist *list, long hint, int lockflags) list->kl_lock(list->kl_lockarg); /* - * If we unlock the list lock (and set KN_INFLUX), we can eliminate - * the kqueue scheduling, but this will introduce four - * lock/unlock's for each knote to test. If we do, continue to use - * SLIST_FOREACH, SLIST_FOREACH_SAFE is not safe in our case, it is - * only safe if you want to remove the current item, which we are - * not doing. + * If we unlock the list lock (and set KN_INFLUX), we can + * eliminate the kqueue scheduling, but this will introduce + * four lock/unlock's for each knote to test. Also, marker + * would be needed to keep iteration position, since filters + * or other threads could remove events. */ - SLIST_FOREACH(kn, &list->kl_list, kn_selnext) { + SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, tkn) { kq = kn->kn_kq; KQ_LOCK(kq); if ((kn->kn_status & (KN_INFLUX | KN_SCAN)) == KN_INFLUX) { diff --git a/sys/kern/kern_mbuf.c b/sys/kern/kern_mbuf.c index e7b8016..c232a37 100644 --- a/sys/kern/kern_mbuf.c +++ b/sys/kern/kern_mbuf.c @@ -284,7 +284,7 @@ static int mb_zinit_pack(void *, int, int); static void mb_zfini_pack(void *, int); static void mb_reclaim(void *); -static void *mbuf_jumbo_alloc(uma_zone_t, int, uint8_t *, int); +static void *mbuf_jumbo_alloc(uma_zone_t, vm_size_t, uint8_t *, int); /* Ensure that MSIZE is a power of 2. */ CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE); @@ -389,7 +389,7 @@ SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL); * pages. */ static void * -mbuf_jumbo_alloc(uma_zone_t zone, int bytes, uint8_t *flags, int wait) +mbuf_jumbo_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait) { /* Inform UMA that this allocator uses kernel_map/object. */ diff --git a/sys/kern/subr_busdma_bufalloc.c b/sys/kern/subr_busdma_bufalloc.c index a80a233..b0b1ba8 100644 --- a/sys/kern/subr_busdma_bufalloc.c +++ b/sys/kern/subr_busdma_bufalloc.c @@ -147,8 +147,8 @@ busdma_bufalloc_findzone(busdma_bufalloc_t ba, bus_size_t size) } void * -busdma_bufalloc_alloc_uncacheable(uma_zone_t zone, int size, u_int8_t *pflag, - int wait) +busdma_bufalloc_alloc_uncacheable(uma_zone_t zone, vm_size_t size, + uint8_t *pflag, int wait) { #ifdef VM_MEMATTR_UNCACHEABLE @@ -166,7 +166,7 @@ busdma_bufalloc_alloc_uncacheable(uma_zone_t zone, int size, u_int8_t *pflag, } void -busdma_bufalloc_free_uncacheable(void *item, int size, u_int8_t pflag) +busdma_bufalloc_free_uncacheable(void *item, vm_size_t size, uint8_t pflag) { kmem_free(kernel_arena, (vm_offset_t)item, size); diff --git a/sys/kern/subr_vmem.c b/sys/kern/subr_vmem.c index 8cc020a..389b7ee 100644 --- a/sys/kern/subr_vmem.c +++ b/sys/kern/subr_vmem.c @@ -608,7 +608,7 @@ static struct mtx_padalign vmem_bt_lock; * we are really out of KVA. */ static void * -vmem_bt_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait) +vmem_bt_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait) { vmem_addr_t addr; diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index 708457d..fa36849 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -2114,6 +2114,7 @@ sendfile_getobj(struct thread *td, struct file *fp, vm_object_t *obj_res, goto out; } } else if (fp->f_type == DTYPE_SHM) { + error = 0; shmfd = fp->f_data; obj = shmfd->shm_object; *obj_size = shmfd->shm_size; diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index 17f72a7..48f7550 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -330,23 +330,27 @@ sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS) int n_nchash; int count; +retry: n_nchash = nchash + 1; /* nchash is max index, not count */ if (!req->oldptr) return SYSCTL_OUT(req, 0, n_nchash * sizeof(int)); - - /* Scan hash tables for applicable entries */ - for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { - CACHE_RLOCK(); - count = 0; - LIST_FOREACH(ncp, ncpp, nc_hash) { - count++; - } + cntbuf = malloc(n_nchash * sizeof(int), M_TEMP, M_ZERO | M_WAITOK); + CACHE_RLOCK(); + if (n_nchash != nchash + 1) { CACHE_RUNLOCK(); - error = SYSCTL_OUT(req, &count, sizeof(count)); - if (error) - return (error); + free(cntbuf, M_TEMP); + goto retry; } - return (0); + /* Scan hash tables counting entries */ + for (ncpp = nchashtbl, i = 0; i < n_nchash; ncpp++, i++) + LIST_FOREACH(ncp, ncpp, nc_hash) + cntbuf[i]++; + CACHE_RUNLOCK(); + for (error = 0, i = 0; i < n_nchash; i++) + if ((error = SYSCTL_OUT(req, &cntbuf[i], sizeof(int))) != 0) + break; + free(cntbuf, M_TEMP); + return (error); } SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD| CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_rawnchash, "S,int", @@ -935,6 +939,44 @@ nchinit(void *dummy __unused) } SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL); +void +cache_changesize(int newmaxvnodes) +{ + struct nchashhead *new_nchashtbl, *old_nchashtbl; + u_long new_nchash, old_nchash; + struct namecache *ncp; + uint32_t hash; + int i; + + new_nchashtbl = hashinit(newmaxvnodes * 2, M_VFSCACHE, &new_nchash); + /* If same hash table size, nothing to do */ + if (nchash == new_nchash) { + free(new_nchashtbl, M_VFSCACHE); + return; + } + /* + * Move everything from the old hash table to the new table. + * None of the namecache entries in the table can be removed + * because to do so, they have to be removed from the hash table. + */ + CACHE_WLOCK(); + old_nchashtbl = nchashtbl; + old_nchash = nchash; + nchashtbl = new_nchashtbl; + nchash = new_nchash; + for (i = 0; i <= old_nchash; i++) { + while ((ncp = LIST_FIRST(&old_nchashtbl[i])) != NULL) { + hash = fnv_32_buf(nc_get_name(ncp), ncp->nc_nlen, + FNV1_32_INIT); + hash = fnv_32_buf(&ncp->nc_dvp, sizeof(ncp->nc_dvp), + hash); + LIST_REMOVE(ncp, nc_hash); + LIST_INSERT_HEAD(NCHHASH(hash), ncp, nc_hash); + } + } + CACHE_WUNLOCK(); + free(old_nchashtbl, M_VFSCACHE); +} /* * Invalidate all entries to a particular vnode. diff --git a/sys/kern/vfs_hash.c b/sys/kern/vfs_hash.c index 0271e49..1398a47 100644 --- a/sys/kern/vfs_hash.c +++ b/sys/kern/vfs_hash.c @@ -160,3 +160,40 @@ vfs_hash_rehash(struct vnode *vp, u_int hash) vp->v_hash = hash; mtx_unlock(&vfs_hash_mtx); } + +void +vfs_hash_changesize(int newmaxvnodes) +{ + struct vfs_hash_head *vfs_hash_newtbl, *vfs_hash_oldtbl; + u_long vfs_hash_newmask, vfs_hash_oldmask; + struct vnode *vp; + int i; + + vfs_hash_newtbl = hashinit(newmaxvnodes, M_VFS_HASH, + &vfs_hash_newmask); + /* If same hash table size, nothing to do */ + if (vfs_hash_mask == vfs_hash_newmask) { + free(vfs_hash_newtbl, M_VFS_HASH); + return; + } + /* + * Move everything from the old hash table to the new table. + * None of the vnodes in the table can be recycled because to + * do so, they have to be removed from the hash table. + */ + rw_wlock(&vfs_hash_lock); + vfs_hash_oldtbl = vfs_hash_tbl; + vfs_hash_oldmask = vfs_hash_mask; + vfs_hash_tbl = vfs_hash_newtbl; + vfs_hash_mask = vfs_hash_newmask; + for (i = 0; i <= vfs_hash_oldmask; i++) { + while ((vp = LIST_FIRST(&vfs_hash_oldtbl[i])) != NULL) { + LIST_REMOVE(vp, v_hashlist); + LIST_INSERT_HEAD( + vfs_hash_bucket(vp->v_mount, vp->v_hash), + vp, v_hashlist); + } + } + rw_wunlock(&vfs_hash_lock); + free(vfs_hash_oldtbl, M_VFS_HASH); +} diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 2c471af..a9e17f1 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -280,8 +280,25 @@ static enum { SYNCER_RUNNING, SYNCER_SHUTTING_DOWN, SYNCER_FINAL_DELAY } * XXX desiredvnodes is historical cruft and should not exist. */ int desiredvnodes; -SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, - &desiredvnodes, 0, "Maximum number of vnodes"); + +static int +sysctl_update_desiredvnodes(SYSCTL_HANDLER_ARGS) +{ + int error, old_desiredvnodes; + + old_desiredvnodes = desiredvnodes; + if ((error = sysctl_handle_int(oidp, arg1, arg2, req)) != 0) + return (error); + if (old_desiredvnodes != desiredvnodes) { + vfs_hash_changesize(desiredvnodes); + cache_changesize(desiredvnodes); + } + return (0); +} + +SYSCTL_PROC(_kern, KERN_MAXVNODES, maxvnodes, + CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, &desiredvnodes, 0, + sysctl_update_desiredvnodes, "I", "Maximum number of vnodes"); SYSCTL_ULONG(_kern, OID_AUTO, minvnodes, CTLFLAG_RW, &wantfreevnodes, 0, "Minimum number of vnodes (legacy)"); static int vnlru_nowhere; diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 2c92445..d4c8693 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -188,7 +188,10 @@ vn_open_cred(struct nameidata *ndp, int *flagp, int cmode, u_int vn_open_flags, restart: fmode = *flagp; - if (fmode & O_CREAT) { + if ((fmode & (O_CREAT | O_EXCL | O_DIRECTORY)) == (O_CREAT | + O_EXCL | O_DIRECTORY)) + return (EINVAL); + else if ((fmode & (O_CREAT | O_DIRECTORY)) == O_CREAT) { ndp->ni_cnd.cn_nameiop = CREATE; /* * Set NOCACHE to avoid flushing the cache when diff --git a/sys/mips/mips/uma_machdep.c b/sys/mips/mips/uma_machdep.c index 1c8e6c8..a1f5e5f 100644 --- a/sys/mips/mips/uma_machdep.c +++ b/sys/mips/mips/uma_machdep.c @@ -41,7 +41,7 @@ __FBSDID("$FreeBSD$"); #include <machine/vmparam.h> void * -uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) { vm_paddr_t pa; vm_page_t m; @@ -70,7 +70,7 @@ uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) } void -uma_small_free(void *mem, int size, u_int8_t flags) +uma_small_free(void *mem, vm_size_t size, u_int8_t flags) { vm_page_t m; vm_paddr_t pa; diff --git a/sys/net/ieee8023ad_lacp.c b/sys/net/ieee8023ad_lacp.c index e0fd776..361f592 100644 --- a/sys/net/ieee8023ad_lacp.c +++ b/sys/net/ieee8023ad_lacp.c @@ -187,15 +187,15 @@ static const char *lacp_format_portid(const struct lacp_portid *, char *, static void lacp_dprintf(const struct lacp_port *, const char *, ...) __attribute__((__format__(__printf__, 2, 3))); -static int lacp_debug = 0; +static VNET_DEFINE(int, lacp_debug); +#define V_lacp_debug VNET(lacp_debug) SYSCTL_NODE(_net_link_lagg, OID_AUTO, lacp, CTLFLAG_RD, 0, "ieee802.3ad"); -SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_TUN, - &lacp_debug, 0, "Enable LACP debug logging (1=debug, 2=trace)"); -TUNABLE_INT("net.link.lagg.lacp.debug", &lacp_debug); +SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, debug, CTLFLAG_RWTUN | CTLFLAG_VNET, + &VNET_NAME(lacp_debug), 0, "Enable LACP debug logging (1=debug, 2=trace)"); -#define LACP_DPRINTF(a) if (lacp_debug & 0x01) { lacp_dprintf a ; } -#define LACP_TRACE(a) if (lacp_debug & 0x02) { lacp_dprintf(a,"%s\n",__func__); } -#define LACP_TPRINTF(a) if (lacp_debug & 0x04) { lacp_dprintf a ; } +#define LACP_DPRINTF(a) if (V_lacp_debug & 0x01) { lacp_dprintf a ; } +#define LACP_TRACE(a) if (V_lacp_debug & 0x02) { lacp_dprintf(a,"%s\n",__func__); } +#define LACP_TPRINTF(a) if (V_lacp_debug & 0x04) { lacp_dprintf a ; } /* * partner administration variables. @@ -298,7 +298,7 @@ lacp_pdu_input(struct lacp_port *lp, struct mbuf *m) goto bad; } - if (lacp_debug > 0) { + if (V_lacp_debug > 0) { lacp_dprintf(lp, "lacpdu receive\n"); lacp_dump_lacpdu(du); } @@ -383,7 +383,7 @@ lacp_xmit_lacpdu(struct lacp_port *lp) sizeof(du->ldu_collector)); du->ldu_collector.lci_maxdelay = 0; - if (lacp_debug > 0) { + if (V_lacp_debug > 0) { lacp_dprintf(lp, "lacpdu transmit\n"); lacp_dump_lacpdu(du); } @@ -495,12 +495,14 @@ lacp_tick(void *arg) if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0) continue; + CURVNET_SET(lp->lp_ifp->if_vnet); lacp_run_timers(lp); lacp_select(lp); lacp_sm_mux(lp); lacp_sm_tx(lp); lacp_sm_ptx_tx_schedule(lp); + CURVNET_RESTORE(); } callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc); } @@ -517,7 +519,7 @@ lacp_port_create(struct lagg_port *lgp) int error; boolean_t active = TRUE; /* XXX should be configurable */ - boolean_t fast = FALSE; /* XXX should be configurable */ + boolean_t fast = FALSE; /* Configurable via ioctl */ bzero((char *)&sdl, sizeof(sdl)); sdl.sdl_len = sizeof(sdl); @@ -577,12 +579,13 @@ lacp_port_destroy(struct lagg_port *lgp) lacp_disable_distributing(lp); lacp_unselect(lp); + LIST_REMOVE(lp, lp_next); + LACP_UNLOCK(lsc); + /* The address may have already been removed by if_purgemaddrs() */ if (!lgp->lp_detaching) if_delmulti_ifma(lp->lp_ifma); - LIST_REMOVE(lp, lp_next); - LACP_UNLOCK(lsc); free(lp, M_DEVBUF); } @@ -743,58 +746,19 @@ lacp_transit_expire(void *vp) LACP_LOCK_ASSERT(lsc); + CURVNET_SET(lsc->lsc_softc->sc_ifp->if_vnet); LACP_TRACE(NULL); + CURVNET_RESTORE(); lsc->lsc_suppress_distributing = FALSE; } -static void -lacp_attach_sysctl(struct lacp_softc *lsc, struct sysctl_oid *p_oid) -{ - struct lagg_softc *sc = lsc->lsc_softc; - - SYSCTL_ADD_UINT(&sc->ctx, SYSCTL_CHILDREN(p_oid), OID_AUTO, - "lacp_strict_mode", - CTLFLAG_RW, - &lsc->lsc_strict_mode, - lsc->lsc_strict_mode, - "Enable LACP strict mode"); -} - -static void -lacp_attach_sysctl_debug(struct lacp_softc *lsc, struct sysctl_oid *p_oid) -{ - struct lagg_softc *sc = lsc->lsc_softc; - struct sysctl_oid *oid; - - /* Create a child of the parent lagg interface */ - oid = SYSCTL_ADD_NODE(&sc->ctx, SYSCTL_CHILDREN(p_oid), - OID_AUTO, "debug", CTLFLAG_RD, NULL, "DEBUG"); - - SYSCTL_ADD_UINT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "rx_test", - CTLFLAG_RW, - &lsc->lsc_debug.lsc_rx_test, - lsc->lsc_debug.lsc_rx_test, - "Bitmap of if_dunit entries to drop RX frames for"); - SYSCTL_ADD_UINT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "tx_test", - CTLFLAG_RW, - &lsc->lsc_debug.lsc_tx_test, - lsc->lsc_debug.lsc_tx_test, - "Bitmap of if_dunit entries to drop TX frames for"); -} - -int +void lacp_attach(struct lagg_softc *sc) { struct lacp_softc *lsc; - struct sysctl_oid *oid; - lsc = malloc(sizeof(struct lacp_softc), - M_DEVBUF, M_NOWAIT|M_ZERO); - if (lsc == NULL) - return (ENOMEM); + lsc = malloc(sizeof(struct lacp_softc), M_DEVBUF, M_WAITOK | M_ZERO); sc->sc_psc = (caddr_t)lsc; lsc->lsc_softc = sc; @@ -806,35 +770,24 @@ lacp_attach(struct lagg_softc *sc) TAILQ_INIT(&lsc->lsc_aggregators); LIST_INIT(&lsc->lsc_ports); - /* Create a child of the parent lagg interface */ - oid = SYSCTL_ADD_NODE(&sc->ctx, SYSCTL_CHILDREN(sc->sc_oid), - OID_AUTO, "lacp", CTLFLAG_RD, NULL, "LACP"); - - /* Attach sysctl nodes */ - lacp_attach_sysctl(lsc, oid); - lacp_attach_sysctl_debug(lsc, oid); - callout_init_mtx(&lsc->lsc_transit_callout, &lsc->lsc_mtx, 0); callout_init_mtx(&lsc->lsc_callout, &lsc->lsc_mtx, 0); /* if the lagg is already up then do the same */ if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) lacp_init(sc); - - return (0); } int -lacp_detach(struct lagg_softc *sc) +lacp_detach(void *psc) { - struct lacp_softc *lsc = LACP_SOFTC(sc); + struct lacp_softc *lsc = (struct lacp_softc *)psc; KASSERT(TAILQ_EMPTY(&lsc->lsc_aggregators), ("aggregators still active")); KASSERT(lsc->lsc_active_aggregator == NULL, ("aggregator still attached")); - sc->sc_psc = NULL; callout_drain(&lsc->lsc_transit_callout); callout_drain(&lsc->lsc_callout); @@ -883,7 +836,7 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m) return (NULL); } - if (sc->use_flowid && + if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) && M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) hash = m->m_pkthdr.flowid >> sc->flowid_shift; else @@ -1425,7 +1378,7 @@ lacp_sm_mux(struct lacp_port *lp) enum lacp_selected selected = lp->lp_selected; struct lacp_aggregator *la; - if (lacp_debug > 1) + if (V_lacp_debug > 1) lacp_dprintf(lp, "%s: state= 0x%x, selected= 0x%x, " "p_sync= 0x%x, p_collecting= 0x%x\n", __func__, lp->lp_mux_state, selected, p_sync, p_collecting); diff --git a/sys/net/ieee8023ad_lacp.h b/sys/net/ieee8023ad_lacp.h index ca5f76e..1573c0b 100644 --- a/sys/net/ieee8023ad_lacp.h +++ b/sys/net/ieee8023ad_lacp.h @@ -75,6 +75,7 @@ "\007DEFAULTED" \ "\010EXPIRED" +#ifdef _KERNEL /* * IEEE802.3 slow protocols * @@ -250,6 +251,7 @@ struct lacp_softc { u_int32_t lsc_tx_test; } lsc_debug; u_int32_t lsc_strict_mode; + boolean_t lsc_fast_timeout; /* if set, fast timeout */ }; #define LACP_TYPE_ACTORINFO 1 @@ -282,8 +284,8 @@ struct lacp_softc { struct mbuf *lacp_input(struct lagg_port *, struct mbuf *); struct lagg_port *lacp_select_tx_port(struct lagg_softc *, struct mbuf *); -int lacp_attach(struct lagg_softc *); -int lacp_detach(struct lagg_softc *); +void lacp_attach(struct lagg_softc *); +int lacp_detach(void *); void lacp_init(struct lagg_softc *); void lacp_stop(struct lagg_softc *); int lacp_port_create(struct lagg_port *); @@ -336,3 +338,4 @@ lacp_isdistributing(struct lagg_port *lgp) #define LACP_LAGIDSTR_MAX \ (1 + LACP_PARTNERSTR_MAX + 1 + LACP_PARTNERSTR_MAX + 1) #define LACP_STATESTR_MAX (255) /* XXX */ +#endif /* _KERNEL */ diff --git a/sys/net/if_gif.c b/sys/net/if_gif.c index 66669ca..f2a38c1 100644 --- a/sys/net/if_gif.c +++ b/sys/net/if_gif.c @@ -419,13 +419,8 @@ gif_transmit(struct ifnet *ifp, struct mbuf *m) } eth = mtod(m, struct etherip_header *); eth->eip_resvh = 0; - if ((sc->gif_options & GIF_SEND_REVETHIP) != 0) { - eth->eip_ver = 0; - eth->eip_resvl = ETHERIP_VERSION; - } else { - eth->eip_ver = ETHERIP_VERSION; - eth->eip_resvl = 0; - } + eth->eip_ver = ETHERIP_VERSION; + eth->eip_resvl = 0; break; default: error = EAFNOSUPPORT; @@ -633,19 +628,10 @@ gif_input(struct mbuf *m, struct ifnet *ifp, int proto, uint8_t ecn) if (m == NULL) goto drop; eip = mtod(m, struct etherip_header *); - /* - * GIF_ACCEPT_REVETHIP (enabled by default) intentionally - * accepts an EtherIP packet with revered version field in - * the header. This is a knob for backward compatibility - * with FreeBSD 7.2R or prior. - */ if (eip->eip_ver != ETHERIP_VERSION) { - if ((gif_options & GIF_ACCEPT_REVETHIP) == 0 || - eip->eip_resvl != ETHERIP_VERSION) { - /* discard unknown versions */ - m_freem(m); - goto drop; - } + /* discard unknown versions */ + m_freem(m); + goto drop; } m_adj(m, sizeof(struct etherip_header)); @@ -766,50 +752,32 @@ gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) goto bad; /* validate sa_len */ + /* check sa_family looks sane for the cmd */ switch (src->sa_family) { #ifdef INET case AF_INET: if (src->sa_len != sizeof(struct sockaddr_in)) goto bad; - break; -#endif -#ifdef INET6 - case AF_INET6: - if (src->sa_len != sizeof(struct sockaddr_in6)) + if (cmd != SIOCSIFPHYADDR) { + error = EAFNOSUPPORT; goto bad; - break; -#endif - default: - error = EAFNOSUPPORT; - goto bad; - } - /* check sa_family looks sane for the cmd */ - error = EAFNOSUPPORT; - switch (cmd) { -#ifdef INET - case SIOCSIFPHYADDR: - if (src->sa_family == AF_INET) - break; - goto bad; -#endif -#ifdef INET6 - case SIOCSIFPHYADDR_IN6: - if (src->sa_family == AF_INET6) - break; - goto bad; -#endif - } - error = EADDRNOTAVAIL; - switch (src->sa_family) { -#ifdef INET - case AF_INET: + } if (satosin(src)->sin_addr.s_addr == INADDR_ANY || - satosin(dst)->sin_addr.s_addr == INADDR_ANY) + satosin(dst)->sin_addr.s_addr == INADDR_ANY) { + error = EADDRNOTAVAIL; goto bad; + } break; #endif #ifdef INET6 case AF_INET6: + if (src->sa_len != sizeof(struct sockaddr_in6)) + goto bad; + if (cmd != SIOCSIFPHYADDR_IN6) { + error = EAFNOSUPPORT; + goto bad; + } + error = EADDRNOTAVAIL; if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr) || IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr)) @@ -825,8 +793,12 @@ gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) error = sa6_embedscope(satosin6(dst), 0); if (error != 0) goto bad; + break; #endif - }; + default: + error = EAFNOSUPPORT; + goto bad; + } error = gif_set_tunnel(ifp, src, dst); break; case SIOCDIFPHYADDR: diff --git a/sys/net/if_gif.h b/sys/net/if_gif.h index ed143e8..b71c8e8 100644 --- a/sys/net/if_gif.h +++ b/sys/net/if_gif.h @@ -126,10 +126,7 @@ int in6_gif_attach(struct gif_softc *); #define GIFGOPTS _IOWR('i', 150, struct ifreq) #define GIFSOPTS _IOW('i', 151, struct ifreq) -#define GIF_ACCEPT_REVETHIP 0x0001 #define GIF_IGNORE_SOURCE 0x0002 -#define GIF_SEND_REVETHIP 0x0010 -#define GIF_OPTMASK (GIF_ACCEPT_REVETHIP|GIF_SEND_REVETHIP| \ - GIF_IGNORE_SOURCE) +#define GIF_OPTMASK (GIF_IGNORE_SOURCE) #endif /* _NET_IF_GIF_H_ */ diff --git a/sys/net/if_lagg.c b/sys/net/if_lagg.c index ce1365b..2522fb6 100644 --- a/sys/net/if_lagg.c +++ b/sys/net/if_lagg.c @@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$"); #include <net/if_types.h> #include <net/if_var.h> #include <net/bpf.h> +#include <net/vnet.h> #if defined(INET) || defined(INET6) #include <netinet/in.h> @@ -81,13 +82,21 @@ static struct { {0, NULL} }; -SLIST_HEAD(__trhead, lagg_softc) lagg_list; /* list of laggs */ -static struct mtx lagg_list_mtx; +VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */ +#define V_lagg_list VNET(lagg_list) +static VNET_DEFINE(struct mtx, lagg_list_mtx); +#define V_lagg_list_mtx VNET(lagg_list_mtx) +#define LAGG_LIST_LOCK_INIT(x) mtx_init(&V_lagg_list_mtx, \ + "if_lagg list", NULL, MTX_DEF) +#define LAGG_LIST_LOCK_DESTROY(x) mtx_destroy(&V_lagg_list_mtx) +#define LAGG_LIST_LOCK(x) mtx_lock(&V_lagg_list_mtx) +#define LAGG_LIST_UNLOCK(x) mtx_unlock(&V_lagg_list_mtx) eventhandler_tag lagg_detach_cookie = NULL; static int lagg_clone_create(struct if_clone *, int, caddr_t); static void lagg_clone_destroy(struct ifnet *); -static struct if_clone *lagg_cloner; +static VNET_DEFINE(struct if_clone *, lagg_cloner); +#define V_lagg_cloner VNET(lagg_cloner) static const char laggname[] = "lagg"; static void lagg_lladdr(struct lagg_softc *, uint8_t *); @@ -123,24 +132,23 @@ static void lagg_media_status(struct ifnet *, struct ifmediareq *); static struct lagg_port *lagg_link_active(struct lagg_softc *, struct lagg_port *); static const void *lagg_gethdr(struct mbuf *, u_int, u_int, void *); -static int lagg_sysctl_active(SYSCTL_HANDLER_ARGS); /* Simple round robin */ -static int lagg_rr_attach(struct lagg_softc *); +static void lagg_rr_attach(struct lagg_softc *); static int lagg_rr_detach(struct lagg_softc *); static int lagg_rr_start(struct lagg_softc *, struct mbuf *); static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *, struct mbuf *); /* Active failover */ -static int lagg_fail_attach(struct lagg_softc *); +static void lagg_fail_attach(struct lagg_softc *); static int lagg_fail_detach(struct lagg_softc *); static int lagg_fail_start(struct lagg_softc *, struct mbuf *); static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *, struct mbuf *); /* Loadbalancing */ -static int lagg_lb_attach(struct lagg_softc *); +static void lagg_lb_attach(struct lagg_softc *); static int lagg_lb_detach(struct lagg_softc *); static int lagg_lb_port_create(struct lagg_port *); static void lagg_lb_port_destroy(struct lagg_port *); @@ -150,7 +158,7 @@ static struct mbuf *lagg_lb_input(struct lagg_softc *, struct lagg_port *, static int lagg_lb_porttable(struct lagg_softc *, struct lagg_port *); /* 802.3ad LACP */ -static int lagg_lacp_attach(struct lagg_softc *); +static void lagg_lacp_attach(struct lagg_softc *); static int lagg_lacp_detach(struct lagg_softc *); static int lagg_lacp_start(struct lagg_softc *, struct mbuf *); static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *, @@ -160,9 +168,9 @@ static void lagg_lacp_lladdr(struct lagg_softc *); static void lagg_callout(void *); /* lagg protocol table */ -static const struct { - int ti_proto; - int (*ti_attach)(struct lagg_softc *); +static const struct lagg_proto { + lagg_proto ti_proto; + void (*ti_attach)(struct lagg_softc *); } lagg_protos[] = { { LAGG_PROTO_ROUNDROBIN, lagg_rr_attach }, { LAGG_PROTO_FAILOVER, lagg_fail_attach }, @@ -176,31 +184,55 @@ SYSCTL_DECL(_net_link); SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0, "Link Aggregation"); -static int lagg_failover_rx_all = 0; /* Allow input on any failover links */ -SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW, - &lagg_failover_rx_all, 0, +/* Allow input on any failover links */ +static VNET_DEFINE(int, lagg_failover_rx_all); +#define V_lagg_failover_rx_all VNET(lagg_failover_rx_all) +SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW | CTLFLAG_VNET, + &VNET_NAME(lagg_failover_rx_all), 0, "Accept input from any interface in a failover lagg"); -static int def_use_flowid = 1; /* Default value for using flowid */ -TUNABLE_INT("net.link.lagg.default_use_flowid", &def_use_flowid); -SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RW, - &def_use_flowid, 0, + +/* Default value for using M_FLOWID */ +static VNET_DEFINE(int, def_use_flowid) = 1; +#define V_def_use_flowid VNET(def_use_flowid) +SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RWTUN, + &VNET_NAME(def_use_flowid), 0, "Default setting for using flow id for load sharing"); -static int def_flowid_shift = 16; /* Default value for using flow shift */ -TUNABLE_INT("net.link.lagg.default_flowid_shift", &def_flowid_shift); -SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift, CTLFLAG_RW, - &def_flowid_shift, 0, + +/* Default value for using M_FLOWID */ +static VNET_DEFINE(int, def_flowid_shift) = 16; +#define V_def_flowid_shift VNET(def_flowid_shift) +SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift, CTLFLAG_RWTUN, + &VNET_NAME(def_flowid_shift), 0, "Default setting for flowid shift for load sharing"); +static void +vnet_lagg_init(const void *unused __unused) +{ + + LAGG_LIST_LOCK_INIT(); + SLIST_INIT(&V_lagg_list); + V_lagg_cloner = if_clone_simple(laggname, lagg_clone_create, + lagg_clone_destroy, 0); +} +VNET_SYSINIT(vnet_lagg_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_lagg_init, NULL); + +static void +vnet_lagg_uninit(const void *unused __unused) +{ + + if_clone_detach(V_lagg_cloner); + LAGG_LIST_LOCK_DESTROY(); +} +VNET_SYSUNINIT(vnet_lagg_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_lagg_uninit, NULL); + static int lagg_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: - mtx_init(&lagg_list_mtx, "if_lagg list", NULL, MTX_DEF); - SLIST_INIT(&lagg_list); - lagg_cloner = if_clone_simple(laggname, lagg_clone_create, - lagg_clone_destroy, 0); lagg_input_p = lagg_input; lagg_linkstate_p = lagg_port_state; lagg_detach_cookie = EVENTHANDLER_REGISTER( @@ -210,10 +242,8 @@ lagg_modevent(module_t mod, int type, void *data) case MOD_UNLOAD: EVENTHANDLER_DEREGISTER(ifnet_departure_event, lagg_detach_cookie); - if_clone_detach(lagg_cloner); lagg_input_p = NULL; lagg_linkstate_p = NULL; - mtx_destroy(&lagg_list_mtx); break; default: return (EOPNOTSUPP); @@ -279,10 +309,8 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) { struct lagg_softc *sc; struct ifnet *ifp; - int i, error = 0; static const u_char eaddr[6]; /* 00:00:00:00:00:00 */ - struct sysctl_oid *oid; - char num[14]; /* sufficient for 32 bits */ + int i; sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); ifp = sc->sc_ifp = if_alloc(IFT_ETHER); @@ -296,29 +324,10 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) sc->sc_ibytes = counter_u64_alloc(M_WAITOK); sc->sc_obytes = counter_u64_alloc(M_WAITOK); - sysctl_ctx_init(&sc->ctx); - snprintf(num, sizeof(num), "%u", unit); - sc->use_flowid = def_use_flowid; - sc->flowid_shift = def_flowid_shift; - sc->sc_oid = oid = SYSCTL_ADD_NODE(&sc->ctx, - &SYSCTL_NODE_CHILDREN(_net_link, lagg), - OID_AUTO, num, CTLFLAG_RD, NULL, ""); - SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "use_flowid", CTLFLAG_RW, &sc->use_flowid, - sc->use_flowid, "Use flow id for load sharing"); - SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "flowid_shift", CTLFLAG_RW, &sc->flowid_shift, - sc->flowid_shift, - "Shift flowid bits to prevent multiqueue collisions"); - SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "count", CTLFLAG_RD, &sc->sc_count, sc->sc_count, - "Total number of ports"); - SYSCTL_ADD_PROC(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "active", CTLTYPE_INT|CTLFLAG_RD, sc, 0, lagg_sysctl_active, - "I", "Total number of active ports"); - SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "flapping", CTLFLAG_RD, &sc->sc_flapping, - sc->sc_flapping, "Total number of port change events"); + if (V_def_use_flowid) + sc->sc_opts |= LAGG_OPT_USE_FLOWID; + sc->flowid_shift = V_def_flowid_shift; + /* Hash all layers by default */ sc->sc_flags = LAGG_F_HASHL2|LAGG_F_HASHL3|LAGG_F_HASHL4; @@ -326,11 +335,7 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) { if (lagg_protos[i].ti_proto == LAGG_PROTO_DEFAULT) { sc->sc_proto = lagg_protos[i].ti_proto; - if ((error = lagg_protos[i].ti_attach(sc)) != 0) { - if_free(ifp); - free(sc, M_DEVBUF); - return (error); - } + lagg_protos[i].ti_attach(sc); break; } } @@ -375,9 +380,9 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST); /* Insert into the global list of laggs */ - mtx_lock(&lagg_list_mtx); - SLIST_INSERT_HEAD(&lagg_list, sc, sc_entries); - mtx_unlock(&lagg_list_mtx); + LAGG_LIST_LOCK(); + SLIST_INSERT_HEAD(&V_lagg_list, sc, sc_entries); + LAGG_LIST_UNLOCK(); callout_reset(&sc->sc_callout, hz, lagg_callout, sc); @@ -404,10 +409,9 @@ lagg_clone_destroy(struct ifnet *ifp) /* Unhook the aggregation protocol */ if (sc->sc_detach != NULL) (*sc->sc_detach)(sc); + else + LAGG_WUNLOCK(sc); - LAGG_WUNLOCK(sc); - - sysctl_ctx_free(&sc->ctx); ifmedia_removeall(&sc->sc_media); ether_ifdetach(ifp); if_free(ifp); @@ -421,9 +425,9 @@ lagg_clone_destroy(struct ifnet *ifp) counter_u64_free(sc->sc_ibytes); counter_u64_free(sc->sc_obytes); - mtx_lock(&lagg_list_mtx); - SLIST_REMOVE(&lagg_list, sc, lagg_softc, sc_entries); - mtx_unlock(&lagg_list_mtx); + LAGG_LIST_LOCK(); + SLIST_REMOVE(&V_lagg_list, sc, lagg_softc, sc_entries); + LAGG_LIST_UNLOCK(); taskqueue_drain(taskqueue_swi, &sc->sc_lladdr_task); LAGG_LOCK_DESTROY(sc); @@ -435,15 +439,28 @@ static void lagg_lladdr(struct lagg_softc *sc, uint8_t *lladdr) { struct ifnet *ifp = sc->sc_ifp; + struct lagg_port lp; if (memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0) return; + LAGG_WLOCK_ASSERT(sc); + /* + * Set the link layer address on the lagg interface. + * sc_lladdr() notifies the MAC change to + * the aggregation protocol. iflladdr_event handler which + * may trigger gratuitous ARPs for INET will be handled in + * a taskqueue. + */ bcopy(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN); - /* Let the protocol know the MAC has changed */ if (sc->sc_lladdr != NULL) (*sc->sc_lladdr)(sc); - EVENTHANDLER_INVOKE(iflladdr_event, ifp); + + bzero(&lp, sizeof(lp)); + lp.lp_ifp = sc->sc_ifp; + lp.lp_softc = sc; + + lagg_port_lladdr(&lp, lladdr); } static void @@ -491,11 +508,13 @@ lagg_port_lladdr(struct lagg_port *lp, uint8_t *lladdr) struct ifnet *ifp = lp->lp_ifp; struct lagg_llq *llq; int pending = 0; + int primary; LAGG_WLOCK_ASSERT(sc); - if (lp->lp_detaching || - memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0) + primary = (sc->sc_primary->lp_ifp == ifp) ? 1 : 0; + if (primary == 0 && (lp->lp_detaching || + memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)) return; /* Check to make sure its not already queued to be changed */ @@ -514,6 +533,7 @@ lagg_port_lladdr(struct lagg_port *lp, uint8_t *lladdr) /* Update the lladdr even if pending, it may have changed */ llq->llq_ifp = ifp; + llq->llq_primary = primary; bcopy(lladdr, llq->llq_lladdr, ETHER_ADDR_LEN); if (!pending) @@ -546,14 +566,20 @@ lagg_port_setlladdr(void *arg, int pending) for (llq = head; llq != NULL; llq = head) { ifp = llq->llq_ifp; - /* Set the link layer address */ CURVNET_SET(ifp->if_vnet); - error = if_setlladdr(ifp, llq->llq_lladdr, ETHER_ADDR_LEN); + if (llq->llq_primary == 0) { + /* + * Set the link layer address on the laggport interface. + * if_setlladdr() triggers gratuitous ARPs for INET. + */ + error = if_setlladdr(ifp, llq->llq_lladdr, + ETHER_ADDR_LEN); + if (error) + printf("%s: setlladdr failed on %s\n", __func__, + ifp->if_xname); + } else + EVENTHANDLER_INVOKE(iflladdr_event, ifp); CURVNET_RESTORE(); - if (error) - printf("%s: setlladdr failed on %s\n", __func__, - ifp->if_xname); - head = SLIST_NEXT(llq, llq_entries); free(llq, M_DEVBUF); } @@ -585,34 +611,6 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) if (ifp->if_type != IFT_ETHER) return (EPROTONOSUPPORT); -#ifdef INET6 - /* - * The member interface should not have inet6 address because - * two interfaces with a valid link-local scope zone must not be - * merged in any form. This restriction is needed to - * prevent violation of link-local scope zone. Attempts to - * add a member interface which has inet6 addresses triggers - * removal of all inet6 addresses on the member interface. - */ - SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { - if (in6ifa_llaonifp(lp->lp_ifp)) { - in6_ifdetach(lp->lp_ifp); - if_printf(sc->sc_ifp, - "IPv6 addresses on %s have been removed " - "before adding it as a member to prevent " - "IPv6 address scope violation.\n", - lp->lp_ifp->if_xname); - } - } - if (in6ifa_llaonifp(ifp)) { - in6_ifdetach(ifp); - if_printf(sc->sc_ifp, - "IPv6 addresses on %s have been removed " - "before adding it as a member to prevent " - "IPv6 address scope violation.\n", - ifp->if_xname); - } -#endif /* Allow the first Ethernet member to define the MTU */ if (SLIST_EMPTY(&sc->sc_ports)) sc->sc_ifp->if_mtu = ifp->if_mtu; @@ -627,10 +625,10 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) return (ENOMEM); /* Check if port is a stacked lagg */ - mtx_lock(&lagg_list_mtx); - SLIST_FOREACH(sc_ptr, &lagg_list, sc_entries) { + LAGG_LIST_LOCK(); + SLIST_FOREACH(sc_ptr, &V_lagg_list, sc_entries) { if (ifp == sc_ptr->sc_ifp) { - mtx_unlock(&lagg_list_mtx); + LAGG_LIST_UNLOCK(); free(lp, M_DEVBUF); return (EINVAL); /* XXX disable stacking for the moment, its untested */ @@ -638,14 +636,14 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) lp->lp_flags |= LAGG_PORT_STACK; if (lagg_port_checkstacking(sc_ptr) >= LAGG_MAX_STACKING) { - mtx_unlock(&lagg_list_mtx); + LAGG_LIST_UNLOCK(); free(lp, M_DEVBUF); return (E2BIG); } #endif } } - mtx_unlock(&lagg_list_mtx); + LAGG_LIST_UNLOCK(); /* Change the interface type */ lp->lp_iftype = ifp->if_type; @@ -995,10 +993,12 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; struct lagg_reqall *ra = (struct lagg_reqall *)data; + struct lagg_reqopts *ro = (struct lagg_reqopts *)data; struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf; struct lagg_reqflags *rf = (struct lagg_reqflags *)data; struct ifreq *ifr = (struct ifreq *)data; struct lagg_port *lp; + const struct lagg_proto *proto = NULL; struct ifnet *tpif; struct thread *td = curthread; char *buf, *outbuf; @@ -1046,50 +1046,153 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) error = priv_check(td, PRIV_NET_LAGG); if (error) break; - if (ra->ra_proto >= LAGG_PROTO_MAX) { + for (proto = lagg_protos; proto->ti_proto != LAGG_PROTO_NONE; + proto++) { + if (proto->ti_proto == ra->ra_proto) { + if (sc->sc_ifflags & IFF_DEBUG) + printf("%s: using proto %u\n", + sc->sc_ifname, proto->ti_proto); + break; + } + } + if (proto->ti_proto == LAGG_PROTO_NONE) { error = EPROTONOSUPPORT; break; } + /* Set to LAGG_PROTO_NONE during the attach. */ LAGG_WLOCK(sc); if (sc->sc_proto != LAGG_PROTO_NONE) { - /* Reset protocol first in case detach unlocks */ sc->sc_proto = LAGG_PROTO_NONE; - error = sc->sc_detach(sc); - sc->sc_detach = NULL; - sc->sc_start = NULL; - sc->sc_input = NULL; - sc->sc_port_create = NULL; - sc->sc_port_destroy = NULL; - sc->sc_linkstate = NULL; - sc->sc_init = NULL; - sc->sc_stop = NULL; - sc->sc_lladdr = NULL; - sc->sc_req = NULL; - sc->sc_portreq = NULL; - } else if (sc->sc_input != NULL) { - /* Still detaching */ - error = EBUSY; - } - if (error != 0) { + if (sc->sc_detach != NULL) + sc->sc_detach(sc); + else + LAGG_WUNLOCK(sc); + } else LAGG_WUNLOCK(sc); + proto->ti_attach(sc); + LAGG_WLOCK(sc); + sc->sc_proto = proto->ti_proto; + LAGG_WUNLOCK(sc); + break; + case SIOCGLAGGOPTS: + ro->ro_opts = sc->sc_opts; + if (sc->sc_proto == LAGG_PROTO_LACP) { + struct lacp_softc *lsc; + + lsc = (struct lacp_softc *)sc->sc_psc; + if (lsc->lsc_debug.lsc_tx_test != 0) + ro->ro_opts |= LAGG_OPT_LACP_TXTEST; + if (lsc->lsc_debug.lsc_rx_test != 0) + ro->ro_opts |= LAGG_OPT_LACP_RXTEST; + if (lsc->lsc_strict_mode != 0) + ro->ro_opts |= LAGG_OPT_LACP_STRICT; + if (lsc->lsc_fast_timeout != 0) + ro->ro_opts |= LAGG_OPT_LACP_TIMEOUT; + + ro->ro_active = sc->sc_active; + } else { + ro->ro_active = 0; + SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) + ro->ro_active += LAGG_PORTACTIVE(lp); + } + ro->ro_flapping = sc->sc_flapping; + ro->ro_flowid_shift = sc->flowid_shift; + break; + case SIOCSLAGGOPTS: + error = priv_check(td, PRIV_NET_LAGG); + if (error) + break; + if (ro->ro_opts == 0) + break; + /* + * Set options. LACP options are stored in sc->sc_psc, + * not in sc_opts. + */ + int valid, lacp; + + switch (ro->ro_opts) { + case LAGG_OPT_USE_FLOWID: + case -LAGG_OPT_USE_FLOWID: + case LAGG_OPT_FLOWIDSHIFT: + valid = 1; + lacp = 0; + break; + case LAGG_OPT_LACP_TXTEST: + case -LAGG_OPT_LACP_TXTEST: + case LAGG_OPT_LACP_RXTEST: + case -LAGG_OPT_LACP_RXTEST: + case LAGG_OPT_LACP_STRICT: + case -LAGG_OPT_LACP_STRICT: + case LAGG_OPT_LACP_TIMEOUT: + case -LAGG_OPT_LACP_TIMEOUT: + valid = lacp = 1; + break; + default: + valid = lacp = 0; break; } - for (int i = 0; i < (sizeof(lagg_protos) / - sizeof(lagg_protos[0])); i++) { - if (lagg_protos[i].ti_proto == ra->ra_proto) { - if (sc->sc_ifflags & IFF_DEBUG) - printf("%s: using proto %u\n", - sc->sc_ifname, - lagg_protos[i].ti_proto); - sc->sc_proto = lagg_protos[i].ti_proto; - if (sc->sc_proto != LAGG_PROTO_NONE) - error = lagg_protos[i].ti_attach(sc); - LAGG_WUNLOCK(sc); - return (error); + + LAGG_WLOCK(sc); + if (valid == 0 || + (lacp == 1 && sc->sc_proto != LAGG_PROTO_LACP)) { + /* Invalid combination of options specified. */ + error = EINVAL; + LAGG_WUNLOCK(sc); + break; /* Return from SIOCSLAGGOPTS. */ + } + /* + * Store new options into sc->sc_opts except for + * FLOWIDSHIFT and LACP options. + */ + if (lacp == 0) { + if (ro->ro_opts == LAGG_OPT_FLOWIDSHIFT) + sc->flowid_shift = ro->ro_flowid_shift; + else if (ro->ro_opts > 0) + sc->sc_opts |= ro->ro_opts; + else + sc->sc_opts &= ~ro->ro_opts; + } else { + struct lacp_softc *lsc; + struct lacp_port *lp; + + lsc = (struct lacp_softc *)sc->sc_psc; + + switch (ro->ro_opts) { + case LAGG_OPT_LACP_TXTEST: + lsc->lsc_debug.lsc_tx_test = 1; + break; + case -LAGG_OPT_LACP_TXTEST: + lsc->lsc_debug.lsc_tx_test = 0; + break; + case LAGG_OPT_LACP_RXTEST: + lsc->lsc_debug.lsc_rx_test = 1; + break; + case -LAGG_OPT_LACP_RXTEST: + lsc->lsc_debug.lsc_rx_test = 0; + break; + case LAGG_OPT_LACP_STRICT: + lsc->lsc_strict_mode = 1; + break; + case -LAGG_OPT_LACP_STRICT: + lsc->lsc_strict_mode = 0; + break; + case LAGG_OPT_LACP_TIMEOUT: + LACP_LOCK(lsc); + LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) + lp->lp_state |= LACP_STATE_TIMEOUT; + LACP_UNLOCK(lsc); + lsc->lsc_fast_timeout = 1; + break; + case -LAGG_OPT_LACP_TIMEOUT: + LACP_LOCK(lsc); + LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) + lp->lp_state &= ~LACP_STATE_TIMEOUT; + LACP_UNLOCK(lsc); + lsc->lsc_fast_timeout = 0; + break; } } LAGG_WUNLOCK(sc); - error = EPROTONOSUPPORT; break; case SIOCGLAGGFLAGS: rf->rf_flags = sc->sc_flags; @@ -1134,6 +1237,26 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) error = EINVAL; break; } +#ifdef INET6 + /* + * A laggport interface should not have inet6 address + * because two interfaces with a valid link-local + * scope zone must not be merged in any form. This + * restriction is needed to prevent violation of + * link-local scope zone. Attempts to add a laggport + * interface which has inet6 addresses triggers + * removal of all inet6 addresses on the member + * interface. + */ + if (in6ifa_llaonifp(tpif)) { + in6_ifdetach(tpif); + if_printf(sc->sc_ifp, + "IPv6 addresses on %s have been removed " + "before adding it as a member to prevent " + "IPv6 address scope violation.\n", + tpif->if_xname); + } +#endif LAGG_WLOCK(sc); error = lagg_port_create(sc, tpif); LAGG_WUNLOCK(sc); @@ -1419,7 +1542,7 @@ lagg_input(struct ifnet *ifp, struct mbuf *m) ETHER_BPF_MTAP(scifp, m); - m = (*sc->sc_input)(sc, lp, m); + m = (lp->lp_detaching == 0) ? (*sc->sc_input)(sc, lp, m) : NULL; if (m != NULL) { counter_u64_add(sc->sc_ipackets, 1); @@ -1582,27 +1705,6 @@ lagg_gethdr(struct mbuf *m, u_int off, u_int len, void *buf) return (mtod(m, char *) + off); } -static int -lagg_sysctl_active(SYSCTL_HANDLER_ARGS) -{ - struct lagg_softc *sc = (struct lagg_softc *)arg1; - struct lagg_port *lp; - int error; - - /* LACP tracks active links automatically, the others do not */ - if (sc->sc_proto != LAGG_PROTO_LACP) { - sc->sc_active = 0; - SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) - sc->sc_active += LAGG_PORTACTIVE(lp); - } - - error = sysctl_handle_int(oidp, &sc->sc_active, 0, req); - if ((error) || (req->newptr == NULL)) - return (error); - - return (0); -} - uint32_t lagg_hashmbuf(struct lagg_softc *sc, struct mbuf *m, uint32_t key) { @@ -1715,18 +1817,16 @@ lagg_enqueue(struct ifnet *ifp, struct mbuf *m) /* * Simple round robin aggregation */ - -static int +static void lagg_rr_attach(struct lagg_softc *sc) { sc->sc_detach = lagg_rr_detach; sc->sc_start = lagg_rr_start; sc->sc_input = lagg_rr_input; + sc->sc_detach = NULL; sc->sc_port_create = NULL; sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX; sc->sc_seq = 0; - - return (0); } static int @@ -1774,8 +1874,7 @@ lagg_rr_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) /* * Active failover */ - -static int +static void lagg_fail_attach(struct lagg_softc *sc) { sc->sc_detach = lagg_fail_detach; @@ -1783,8 +1882,7 @@ lagg_fail_attach(struct lagg_softc *sc) sc->sc_input = lagg_fail_input; sc->sc_port_create = NULL; sc->sc_port_destroy = NULL; - - return (0); + sc->sc_detach = NULL; } static int @@ -1814,7 +1912,7 @@ lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) struct ifnet *ifp = sc->sc_ifp; struct lagg_port *tmp_tp; - if (lp == sc->sc_primary || lagg_failover_rx_all) { + if (lp == sc->sc_primary || V_lagg_failover_rx_all) { m->m_pkthdr.rcvif = ifp; return (m); } @@ -1838,16 +1936,13 @@ lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) /* * Loadbalancing */ - -static int +static void lagg_lb_attach(struct lagg_softc *sc) { struct lagg_port *lp; struct lagg_lb *lb; - if ((lb = (struct lagg_lb *)malloc(sizeof(struct lagg_lb), - M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) - return (ENOMEM); + lb = malloc(sizeof(struct lagg_lb), M_DEVBUF, M_WAITOK | M_ZERO); sc->sc_detach = lagg_lb_detach; sc->sc_start = lagg_lb_start; @@ -1861,14 +1956,13 @@ lagg_lb_attach(struct lagg_softc *sc) SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) lagg_lb_port_create(lp); - - return (0); } static int lagg_lb_detach(struct lagg_softc *sc) { struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc; + LAGG_WUNLOCK(sc); if (lb != NULL) free(lb, M_DEVBUF); return (0); @@ -1917,7 +2011,7 @@ lagg_lb_start(struct lagg_softc *sc, struct mbuf *m) struct lagg_port *lp = NULL; uint32_t p = 0; - if (sc->use_flowid && + if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) && M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) p = m->m_pkthdr.flowid >> sc->flowid_shift; else @@ -1952,12 +2046,10 @@ lagg_lb_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) /* * 802.3ad LACP */ - -static int +static void lagg_lacp_attach(struct lagg_softc *sc) { struct lagg_port *lp; - int error; sc->sc_detach = lagg_lacp_detach; sc->sc_port_create = lacp_port_create; @@ -1971,31 +2063,28 @@ lagg_lacp_attach(struct lagg_softc *sc) sc->sc_req = lacp_req; sc->sc_portreq = lacp_portreq; - error = lacp_attach(sc); - if (error) - return (error); + lacp_attach(sc); SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) lacp_port_create(lp); - - return (error); } static int lagg_lacp_detach(struct lagg_softc *sc) { struct lagg_port *lp; - int error; + void *psc; SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) lacp_port_destroy(lp); - /* unlocking is safe here */ + psc = sc->sc_psc; + sc->sc_psc = NULL; LAGG_WUNLOCK(sc); - error = lacp_detach(sc); - LAGG_WLOCK(sc); - return (error); + lacp_detach(psc); + + return (0); } static void diff --git a/sys/net/if_lagg.h b/sys/net/if_lagg.h index ff1ae2f..ac01032 100644 --- a/sys/net/if_lagg.h +++ b/sys/net/if_lagg.h @@ -47,17 +47,19 @@ "\05DISTRIBUTING\06DISABLED" /* Supported lagg PROTOs */ -#define LAGG_PROTO_NONE 0 /* no lagg protocol defined */ -#define LAGG_PROTO_ROUNDROBIN 1 /* simple round robin */ -#define LAGG_PROTO_FAILOVER 2 /* active failover */ -#define LAGG_PROTO_LOADBALANCE 3 /* loadbalance */ -#define LAGG_PROTO_LACP 4 /* 802.3ad lacp */ -#define LAGG_PROTO_ETHERCHANNEL 5 /* Cisco FEC */ -#define LAGG_PROTO_MAX 6 +typedef enum { + LAGG_PROTO_NONE = 0, /* no lagg protocol defined */ + LAGG_PROTO_ROUNDROBIN, /* simple round robin */ + LAGG_PROTO_FAILOVER, /* active failover */ + LAGG_PROTO_LOADBALANCE, /* loadbalance */ + LAGG_PROTO_LACP, /* 802.3ad lacp */ + LAGG_PROTO_ETHERCHANNEL,/* Cisco FEC */ + LAGG_PROTO_MAX, +} lagg_proto; struct lagg_protos { const char *lpr_name; - int lpr_proto; + lagg_proto lpr_proto; }; #define LAGG_PROTO_DEFAULT LAGG_PROTO_FAILOVER @@ -134,6 +136,31 @@ struct lagg_reqflags { #define SIOCGLAGGFLAGS _IOWR('i', 145, struct lagg_reqflags) #define SIOCSLAGGHASH _IOW('i', 146, struct lagg_reqflags) +struct lagg_reqopts { + char ro_ifname[IFNAMSIZ]; /* name of the lagg */ + + int ro_opts; /* Option bitmap */ +#define LAGG_OPT_NONE 0x00 +#define LAGG_OPT_USE_FLOWID 0x01 /* use M_FLOWID */ +/* Pseudo flags which are used in ro_opts but not stored into sc_opts. */ +#define LAGG_OPT_FLOWIDSHIFT 0x02 /* Set flowid */ +#define LAGG_OPT_FLOWIDSHIFT_MASK 0x1f /* flowid is uint32_t */ +#define LAGG_OPT_LACP_STRICT 0x10 /* LACP strict mode */ +#define LAGG_OPT_LACP_TXTEST 0x20 /* LACP debug: txtest */ +#define LAGG_OPT_LACP_RXTEST 0x40 /* LACP debug: rxtest */ +#define LAGG_OPT_LACP_TIMEOUT 0x80 /* LACP timeout */ + u_int ro_count; /* number of ports */ + u_int ro_active; /* active port count */ + u_int ro_flapping; /* number of flapping */ + int ro_flowid_shift; /* shift the flowid */ +}; + +#define SIOCGLAGGOPTS _IOWR('i', 152, struct lagg_reqopts) +#define SIOCSLAGGOPTS _IOW('i', 153, struct lagg_reqopts) + +#define LAGG_OPT_BITS "\020\001USE_FLOWID\005LACP_STRICT" \ + "\006LACP_TXTEST\007LACP_RXTEST" + #ifdef _KERNEL #include <sys/counter.h> @@ -183,6 +210,7 @@ struct lagg_mc { struct lagg_llq { struct ifnet *llq_ifp; uint8_t llq_lladdr[ETHER_ADDR_LEN]; + uint8_t llq_primary; SLIST_ENTRY(lagg_llq) llq_entries; }; @@ -229,9 +257,7 @@ struct lagg_softc { eventhandler_tag vlan_attach; eventhandler_tag vlan_detach; struct callout sc_callout; - struct sysctl_ctx_list ctx; /* sysctl variables */ - struct sysctl_oid *sc_oid; /* sysctl tree oid */ - int use_flowid; /* enable use of flowid */ + u_int sc_opts; int flowid_shift; /* set flowid shift*/ }; diff --git a/sys/netinet/ip_fw.h b/sys/netinet/ip_fw.h index 3c60274..188057d 100644 --- a/sys/netinet/ip_fw.h +++ b/sys/netinet/ip_fw.h @@ -640,7 +640,7 @@ typedef struct _ipfw_table_xentry { char iface[IF_NAMESIZE]; /* interface name */ } k; } ipfw_table_xentry; -#define IPFW_TCF_INET 0x01 /* CIDR flags: IPv4 record */ +#define IPFW_TCF_INET 0x01 /* CIDR flags: IPv4 record */ typedef struct _ipfw_table { u_int32_t size; /* size of entries in bytes */ diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c index 4c94ba8..231b269 100644 --- a/sys/netinet6/in6.c +++ b/sys/netinet6/in6.c @@ -323,8 +323,6 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, /* FALLTHROUGH */ case OSIOCGIFINFO_IN6: case SIOCGIFINFO_IN6: - case SIOCGDRLST_IN6: - case SIOCGPRLST_IN6: case SIOCGNBRINFO_IN6: case SIOCGDEFIFACE_IN6: return (nd6_ioctl(cmd, data, ifp)); @@ -1254,11 +1252,13 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, * source address. */ ia->ia6_flags &= ~IN6_IFF_DUPLICATED; /* safety */ - if (hostIsNew && in6if_do_dad(ifp)) - ia->ia6_flags |= IN6_IFF_TENTATIVE; - /* DAD should be performed after ND6_IFF_IFDISABLED is cleared. */ - if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) + /* + * DAD should be performed for an new address or addresses on + * an interface with ND6_IFF_IFDISABLED. + */ + if (in6if_do_dad(ifp) && + (hostIsNew || (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED))) ia->ia6_flags |= IN6_IFF_TENTATIVE; /* @@ -1280,13 +1280,8 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, goto cleanup; } - /* - * Perform DAD, if needed. - * XXX It may be of use, if we can administratively disable DAD. - */ - if (in6if_do_dad(ifp) && ((ifra->ifra_flags & IN6_IFF_NODAD) == 0) && - (ia->ia6_flags & IN6_IFF_TENTATIVE)) - { + /* Perform DAD, if the address is TENTATIVE. */ + if ((ia->ia6_flags & IN6_IFF_TENTATIVE)) { int mindelay, maxdelay; delay = 0; @@ -1619,6 +1614,10 @@ in6_purgeif(struct ifnet *ifp) * in the future. * RFC2373 defines interface id to be 64bit, but it allows non-RFC2374 * address encoding scheme. (see figure on page 8) + * Notifies other subsystems about address change/arrival: + * 1) Notifies device handler on the first IPv6 address assignment + * 2) Handle routing table changes for P2P links and route + * 3) Handle routing table changes for address host route */ static int in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data, @@ -2389,13 +2388,13 @@ in6if_do_dad(struct ifnet *ifp) * However, some interfaces can be up before the RUNNING * status. Additionaly, users may try to assign addresses * before the interface becomes up (or running). - * We simply skip DAD in such a case as a work around. - * XXX: we should rather mark "tentative" on such addresses, - * and do DAD after the interface becomes ready. + * This function returns EAGAIN in that case. + * The caller should mark "tentative" on the address instead of + * performing DAD immediately. */ if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) - return (0); + return (EAGAIN); return (1); } diff --git a/sys/netinet6/in6_ifattach.c b/sys/netinet6/in6_ifattach.c index 5196101..2381dd3 100644 --- a/sys/netinet6/in6_ifattach.c +++ b/sys/netinet6/in6_ifattach.c @@ -595,12 +595,6 @@ in6_ifattach_loopback(struct ifnet *ifp) ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME; ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME; - /* we don't need to perform DAD on loopback interfaces. */ - ifra.ifra_flags |= IN6_IFF_NODAD; - - /* skip registration to the prefix list. XXX should be temporary. */ - ifra.ifra_flags |= IN6_IFF_NOPFX; - /* * We are sure that this is a newly assigned address, so we can set * NULL to the 3rd arg. diff --git a/sys/netinet6/in6_var.h b/sys/netinet6/in6_var.h index e0f337f..30dd1e8 100644 --- a/sys/netinet6/in6_var.h +++ b/sys/netinet6/in6_var.h @@ -447,11 +447,6 @@ struct in6_rrenumreq { #define SIOCGIFAFLAG_IN6 _IOWR('i', 73, struct in6_ifreq) -#define SIOCGDRLST_IN6 _IOWR('i', 74, struct in6_drlist) -#ifdef _KERNEL -/* XXX: SIOCGPRLST_IN6 is exposed in KAME but in6_oprlist is not. */ -#define SIOCGPRLST_IN6 _IOWR('i', 75, struct in6_oprlist) -#endif #ifdef _KERNEL #define OSIOCGIFINFO_IN6 _IOWR('i', 76, struct in6_ondireq) #endif @@ -499,14 +494,11 @@ struct in6_rrenumreq { #define IN6_IFF_DETACHED 0x08 /* may be detached from the link */ #define IN6_IFF_DEPRECATED 0x10 /* deprecated address */ #define IN6_IFF_NODAD 0x20 /* don't perform DAD on this address - * (used only at first SIOC* call) + * (obsolete) */ #define IN6_IFF_AUTOCONF 0x40 /* autoconfigurable address. */ #define IN6_IFF_TEMPORARY 0x80 /* temporary (anonymous) address. */ #define IN6_IFF_PREFER_SOURCE 0x0100 /* preferred address for SAS */ -#define IN6_IFF_NOPFX 0x8000 /* skip kernel prefix management. - * XXX: this should be temporary. - */ /* do not input/output */ #define IN6_IFF_NOTREADY (IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED) diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c index 94add9a..d2aa188 100644 --- a/sys/netinet6/nd6.c +++ b/sys/netinet6/nd6.c @@ -1246,99 +1246,14 @@ nd6_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info) int nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) { - struct in6_drlist *drl = (struct in6_drlist *)data; - struct in6_oprlist *oprl = (struct in6_oprlist *)data; struct in6_ndireq *ndi = (struct in6_ndireq *)data; struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data; struct in6_ndifreq *ndif = (struct in6_ndifreq *)data; - struct nd_defrouter *dr; - struct nd_prefix *pr; - int i = 0, error = 0; + int error = 0; if (ifp->if_afdata[AF_INET6] == NULL) return (EPFNOSUPPORT); switch (cmd) { - case SIOCGDRLST_IN6: - /* - * obsolete API, use sysctl under net.inet6.icmp6 - */ - bzero(drl, sizeof(*drl)); - TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { - if (i >= DRLSTSIZ) - break; - drl->defrouter[i].rtaddr = dr->rtaddr; - in6_clearscope(&drl->defrouter[i].rtaddr); - - drl->defrouter[i].flags = dr->flags; - drl->defrouter[i].rtlifetime = dr->rtlifetime; - drl->defrouter[i].expire = dr->expire + - (time_second - time_uptime); - drl->defrouter[i].if_index = dr->ifp->if_index; - i++; - } - break; - case SIOCGPRLST_IN6: - /* - * obsolete API, use sysctl under net.inet6.icmp6 - * - * XXX the structure in6_prlist was changed in backward- - * incompatible manner. in6_oprlist is used for SIOCGPRLST_IN6, - * in6_prlist is used for nd6_sysctl() - fill_prlist(). - */ - /* - * XXX meaning of fields, especialy "raflags", is very - * differnet between RA prefix list and RR/static prefix list. - * how about separating ioctls into two? - */ - bzero(oprl, sizeof(*oprl)); - LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { - struct nd_pfxrouter *pfr; - int j; - - if (i >= PRLSTSIZ) - break; - oprl->prefix[i].prefix = pr->ndpr_prefix.sin6_addr; - oprl->prefix[i].raflags = pr->ndpr_raf; - oprl->prefix[i].prefixlen = pr->ndpr_plen; - oprl->prefix[i].vltime = pr->ndpr_vltime; - oprl->prefix[i].pltime = pr->ndpr_pltime; - oprl->prefix[i].if_index = pr->ndpr_ifp->if_index; - if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME) - oprl->prefix[i].expire = 0; - else { - time_t maxexpire; - - /* XXX: we assume time_t is signed. */ - maxexpire = (-1) & - ~((time_t)1 << - ((sizeof(maxexpire) * 8) - 1)); - if (pr->ndpr_vltime < - maxexpire - pr->ndpr_lastupdate) { - oprl->prefix[i].expire = - pr->ndpr_lastupdate + - pr->ndpr_vltime + - (time_second - time_uptime); - } else - oprl->prefix[i].expire = maxexpire; - } - - j = 0; - LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) { - if (j < DRLSTSIZ) { -#define RTRADDR oprl->prefix[i].advrtr[j] - RTRADDR = pfr->router->rtaddr; - in6_clearscope(&RTRADDR); -#undef RTRADDR - } - j++; - } - oprl->prefix[i].advrtrs = j; - oprl->prefix[i].origin = PR_ORIG_RA; - - i++; - } - - break; case OSIOCGIFINFO_IN6: #define ND ndi->ndi /* XXX: old ndp(8) assumes a positive value for linkmtu. */ @@ -1398,22 +1313,19 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) * do not clear ND6_IFF_IFDISABLED. * See RFC 4862, Section 5.4.5. */ - int duplicated_linklocal = 0; - IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ia = (struct in6_ifaddr *)ifa; if ((ia->ia6_flags & IN6_IFF_DUPLICATED) && - IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) { - duplicated_linklocal = 1; + IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) break; - } } IF_ADDR_RUNLOCK(ifp); - if (duplicated_linklocal) { + if (ifa != NULL) { + /* LLA is duplicated. */ ND.flags |= ND6_IFF_IFDISABLED; log(LOG_ERR, "Cannot enable an interface" " with a link-local address marked" @@ -1429,14 +1341,18 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) /* Mark all IPv6 address as tentative. */ ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED; - IF_ADDR_RLOCK(ifp); - TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - if (ifa->ifa_addr->sa_family != AF_INET6) - continue; - ia = (struct in6_ifaddr *)ifa; - ia->ia6_flags |= IN6_IFF_TENTATIVE; + if ((ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD) == 0) { + IF_ADDR_RLOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, + ifa_link) { + if (ifa->ifa_addr->sa_family != + AF_INET6) + continue; + ia = (struct in6_ifaddr *)ifa; + ia->ia6_flags |= IN6_IFF_TENTATIVE; + } + IF_ADDR_RUNLOCK(ifp); } - IF_ADDR_RUNLOCK(ifp); } if (ND.flags & ND6_IFF_AUTO_LINKLOCAL) { @@ -1454,20 +1370,19 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) * address is assigned, and IFF_UP, try to * assign one. */ - int haslinklocal = 0; - IF_ADDR_RLOCK(ifp); - TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - if (ifa->ifa_addr->sa_family != AF_INET6) + TAILQ_FOREACH(ifa, &ifp->if_addrhead, + ifa_link) { + if (ifa->ifa_addr->sa_family != + AF_INET6) continue; ia = (struct in6_ifaddr *)ifa; - if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) { - haslinklocal = 1; + if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) break; - } } IF_ADDR_RUNLOCK(ifp); - if (!haslinklocal) + if (ifa != NULL) + /* No LLA is configured. */ in6_ifattach(ifp, NULL); } } diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c index 2272cd0..8f287dc 100644 --- a/sys/netinet6/nd6_nbr.c +++ b/sys/netinet6/nd6_nbr.c @@ -576,7 +576,7 @@ nd6_ns_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6, /* * Add a Nonce option (RFC 3971) to detect looped back NS messages. * This behavior is documented as Enhanced Duplicate Address - * Detection in draft-ietf-6man-enhanced-dad-13. + * Detection in RFC 7527. * net.inet6.ip6.dad_enhanced=0 disables this. */ if (V_dad_enhanced != 0 && nonce != NULL) { @@ -1308,11 +1308,16 @@ nd6_dad_start(struct ifaddr *ifa, int delay) } if (ifa->ifa_ifp == NULL) panic("nd6_dad_start: ifa->ifa_ifp == NULL"); - if (!(ifa->ifa_ifp->if_flags & IFF_UP)) { + if (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_NO_DAD) { + ia->ia6_flags &= ~IN6_IFF_TENTATIVE; return; } - if (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_IFDISABLED) + if (!(ifa->ifa_ifp->if_flags & IFF_UP) || + !(ifa->ifa_ifp->if_drv_flags & IFF_DRV_RUNNING) || + (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_IFDISABLED)) { + ia->ia6_flags |= IN6_IFF_TENTATIVE; return; + } if ((dp = nd6_dad_find(ifa, NULL)) != NULL) { /* DAD already in progress */ nd6_dad_rele(dp); diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c index ef66064..6a6a10f 100644 --- a/sys/powerpc/aim/mmu_oea64.c +++ b/sys/powerpc/aim/mmu_oea64.c @@ -1432,7 +1432,8 @@ retry: static mmu_t installed_mmu; static void * -moea64_uma_page_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +moea64_uma_page_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, + int wait) { /* * This entire routine is a horrible hack to avoid bothering kmem diff --git a/sys/powerpc/aim/slb.c b/sys/powerpc/aim/slb.c index 9d60b2b..89cfabf 100644 --- a/sys/powerpc/aim/slb.c +++ b/sys/powerpc/aim/slb.c @@ -473,7 +473,7 @@ slb_insert_user(pmap_t pm, struct slb *slb) } static void * -slb_uma_real_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +slb_uma_real_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) { static vm_offset_t realmax = 0; void *va; diff --git a/sys/powerpc/aim/uma_machdep.c b/sys/powerpc/aim/uma_machdep.c index 255826e..33674e5 100644 --- a/sys/powerpc/aim/uma_machdep.c +++ b/sys/powerpc/aim/uma_machdep.c @@ -50,7 +50,7 @@ SYSCTL_INT(_hw, OID_AUTO, uma_mdpages, CTLFLAG_RD, &hw_uma_mdpages, 0, "UMA MD pages in use"); void * -uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) { void *va; vm_page_t m; @@ -82,7 +82,7 @@ uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) } void -uma_small_free(void *mem, int size, u_int8_t flags) +uma_small_free(void *mem, vm_size_t size, u_int8_t flags) { vm_page_t m; diff --git a/sys/sparc64/sparc64/vm_machdep.c b/sys/sparc64/sparc64/vm_machdep.c index 779b953..60e4a2f 100644 --- a/sys/sparc64/sparc64/vm_machdep.c +++ b/sys/sparc64/sparc64/vm_machdep.c @@ -502,7 +502,7 @@ swi_vm(void *v) } void * -uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) { vm_paddr_t pa; vm_page_t m; @@ -540,7 +540,7 @@ uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) } void -uma_small_free(void *mem, int size, u_int8_t flags) +uma_small_free(void *mem, vm_size_t size, u_int8_t flags) { vm_page_t m; diff --git a/sys/sys/busdma_bufalloc.h b/sys/sys/busdma_bufalloc.h index f5ec32f..dfcb4b8 100644 --- a/sys/sys/busdma_bufalloc.h +++ b/sys/sys/busdma_bufalloc.h @@ -110,9 +110,10 @@ struct busdma_bufzone * busdma_bufalloc_findzone(busdma_bufalloc_t ba, * routines support pmap_page_set_memattr() and the VM_MEMATTR_UNCACHEABLE flag * you can probably use these when you need uncacheable buffers. */ -void * busdma_bufalloc_alloc_uncacheable(uma_zone_t zone, int size, - u_int8_t *pflag, int wait); -void busdma_bufalloc_free_uncacheable(void *item, int size, u_int8_t pflag); +void * busdma_bufalloc_alloc_uncacheable(uma_zone_t zone, vm_size_t size, + uint8_t *pflag, int wait); +void busdma_bufalloc_free_uncacheable(void *item, vm_size_t size, + uint8_t pflag); #endif /* _MACHINE_BUSDMA_BUFALLOC_H_ */ diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index fe4c9ea..eb691b8 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -600,6 +600,7 @@ struct vnode; typedef int (*vn_get_ino_t)(struct mount *, void *, int, struct vnode **); /* cache_* may belong in namei.h. */ +void cache_changesize(int newhashsize); #define cache_enter(dvp, vp, cnp) \ cache_enter_time(dvp, vp, cnp, NULL, NULL) void cache_enter_time(struct vnode *dvp, struct vnode *vp, @@ -836,6 +837,7 @@ int fifo_printinfo(struct vnode *); /* vfs_hash.c */ typedef int vfs_hash_cmp_t(struct vnode *vp, void *arg); +void vfs_hash_changesize(int newhashsize); int vfs_hash_get(const struct mount *mp, u_int hash, int flags, struct thread *td, struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg); u_int vfs_hash_index(struct vnode *vp); int vfs_hash_insert(struct vnode *vp, u_int hash, int flags, struct thread *td, struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg); diff --git a/sys/teken/teken.c b/sys/teken/teken.c index 3002a88..8834390 100644 --- a/sys/teken/teken.c +++ b/sys/teken/teken.c @@ -29,12 +29,14 @@ #include <sys/cdefs.h> #if defined(__FreeBSD__) && defined(_KERNEL) #include <sys/param.h> +#include <sys/limits.h> #include <sys/lock.h> #include <sys/systm.h> #define teken_assert(x) MPASS(x) #else /* !(__FreeBSD__ && _KERNEL) */ #include <sys/types.h> #include <assert.h> +#include <limits.h> #include <stdint.h> #include <stdio.h> #include <string.h> @@ -405,18 +407,24 @@ teken_state_numbers(teken_t *t, teken_char_t c) teken_assert(t->t_curnum < T_NUMSIZE); if (c >= '0' && c <= '9') { - /* - * Don't do math with the default value of 1 when a - * custom number is inserted. - */ if (t->t_stateflags & TS_FIRSTDIGIT) { + /* First digit. */ t->t_stateflags &= ~TS_FIRSTDIGIT; - t->t_nums[t->t_curnum] = 0; - } else { - t->t_nums[t->t_curnum] *= 10; + t->t_nums[t->t_curnum] = c - '0'; + } else if (t->t_nums[t->t_curnum] < UINT_MAX / 100) { + /* + * There is no need to continue parsing input + * once the value exceeds the size of the + * terminal. It would only allow for integer + * overflows when performing arithmetic on the + * cursor position. + * + * Ignore any further digits if the value is + * already UINT_MAX / 100. + */ + t->t_nums[t->t_curnum] = + t->t_nums[t->t_curnum] * 10 + c - '0'; } - - t->t_nums[t->t_curnum] += c - '0'; return (1); } else if (c == ';') { if (t->t_stateflags & TS_FIRSTDIGIT) diff --git a/sys/teken/teken_subr.h b/sys/teken/teken_subr.h index 2eee627..d458f2a 100644 --- a/sys/teken/teken_subr.h +++ b/sys/teken/teken_subr.h @@ -324,13 +324,13 @@ static void teken_subr_cursor_position(teken_t *t, unsigned int row, unsigned int col) { - t->t_cursor.tp_row = t->t_originreg.ts_begin + row - 1; - if (t->t_cursor.tp_row >= t->t_originreg.ts_end) - t->t_cursor.tp_row = t->t_originreg.ts_end - 1; + row = row - 1 + t->t_originreg.ts_begin; + t->t_cursor.tp_row = row < t->t_originreg.ts_end ? + row : t->t_originreg.ts_end - 1; - t->t_cursor.tp_col = col - 1; - if (t->t_cursor.tp_col >= t->t_winsize.tp_col) - t->t_cursor.tp_col = t->t_winsize.tp_col - 1; + col--; + t->t_cursor.tp_col = col < t->t_winsize.tp_col ? + col : t->t_winsize.tp_col - 1; t->t_stateflags &= ~TS_WRAPPED; teken_funcs_cursor(t); @@ -583,9 +583,9 @@ static void teken_subr_horizontal_position_absolute(teken_t *t, unsigned int col) { - t->t_cursor.tp_col = col - 1; - if (t->t_cursor.tp_col >= t->t_winsize.tp_col) - t->t_cursor.tp_col = t->t_winsize.tp_col - 1; + col--; + t->t_cursor.tp_col = col < t->t_winsize.tp_col ? + col : t->t_winsize.tp_col - 1; t->t_stateflags &= ~TS_WRAPPED; teken_funcs_cursor(t); @@ -1297,9 +1297,9 @@ static void teken_subr_vertical_position_absolute(teken_t *t, unsigned int row) { - t->t_cursor.tp_row = t->t_originreg.ts_begin + row - 1; - if (t->t_cursor.tp_row >= t->t_originreg.ts_end) - t->t_cursor.tp_row = t->t_originreg.ts_end - 1; + row = row - 1 + t->t_originreg.ts_begin; + t->t_cursor.tp_row = row < t->t_originreg.ts_end ? + row : t->t_originreg.ts_end - 1; t->t_stateflags &= ~TS_WRAPPED; teken_funcs_cursor(t); diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c index 5ac391e..4e2c9ea 100644 --- a/sys/ufs/ffs/ffs_softdep.c +++ b/sys/ufs/ffs/ffs_softdep.c @@ -923,8 +923,7 @@ static int journal_unsuspend(struct ufsmount *ump); static void softdep_prelink(struct vnode *, struct vnode *); static void add_to_journal(struct worklist *); static void remove_from_journal(struct worklist *); -static bool softdep_excess_inodes(struct ufsmount *); -static bool softdep_excess_dirrem(struct ufsmount *); +static bool softdep_excess_items(struct ufsmount *, int); static void softdep_process_journal(struct mount *, struct worklist *, int); static struct jremref *newjremref(struct dirrem *, struct inode *, struct inode *ip, off_t, nlink_t); @@ -2212,7 +2211,7 @@ inodedep_lookup(mp, inum, flags, inodedeppp) * responsible for more than our share of that usage and * we are not in a rush, request some inodedep cleanup. */ - if (softdep_excess_inodes(ump)) + if (softdep_excess_items(ump, D_INODEDEP)) schedule_cleanup(mp); else FREE_LOCK(ump); @@ -2307,7 +2306,12 @@ newblk_lookup(mp, newblkno, flags, newblkpp) return (1); if ((flags & DEPALLOC) == 0) return (0); - FREE_LOCK(ump); + if (softdep_excess_items(ump, D_NEWBLK) || + softdep_excess_items(ump, D_ALLOCDIRECT) || + softdep_excess_items(ump, D_ALLOCINDIR)) + schedule_cleanup(mp); + else + FREE_LOCK(ump); newblk = malloc(sizeof(union allblk), M_NEWBLK, M_SOFTDEP_FLAGS | M_ZERO); workitem_alloc(&newblk->nb_list, D_NEWBLK, mp); @@ -2406,7 +2410,11 @@ softdep_initialize() { TAILQ_INIT(&softdepmounts); +#ifdef __LP64__ max_softdeps = desiredvnodes * 4; +#else + max_softdeps = desiredvnodes * 2; +#endif /* initialise bioops hack */ bioops.io_start = softdep_disk_io_initiation; @@ -9106,7 +9114,7 @@ newdirrem(bp, dp, ip, isrmdir, prevdirremp) * the number of freefile and freeblks structures. */ ACQUIRE_LOCK(ip->i_ump); - if (!IS_SNAPSHOT(ip) && softdep_excess_dirrem(ip->i_ump)) + if (!IS_SNAPSHOT(ip) && softdep_excess_items(ip->i_ump, D_DIRREM)) schedule_cleanup(ITOV(dp)->v_mount); else FREE_LOCK(ip->i_ump); @@ -13244,20 +13252,12 @@ retry: } static bool -softdep_excess_inodes(struct ufsmount *ump) -{ - - return (dep_current[D_INODEDEP] > max_softdeps && - ump->softdep_curdeps[D_INODEDEP] > max_softdeps / - stat_flush_threads); -} - -static bool -softdep_excess_dirrem(struct ufsmount *ump) +softdep_excess_items(struct ufsmount *ump, int item) { - return (dep_current[D_DIRREM] > max_softdeps / 2 && - ump->softdep_curdeps[D_DIRREM] > (max_softdeps / 2) / + KASSERT(item >= 0 && item < D_LAST, ("item %d", item)); + return (dep_current[item] > max_softdeps && + ump->softdep_curdeps[item] > max_softdeps / stat_flush_threads); } @@ -13313,15 +13313,25 @@ softdep_ast_cleanup_proc(void) for (;;) { req = false; ACQUIRE_LOCK(ump); - if (softdep_excess_inodes(ump)) { + if (softdep_excess_items(ump, D_INODEDEP)) { req = true; request_cleanup(mp, FLUSH_INODES); } - if (softdep_excess_dirrem(ump)) { + if (softdep_excess_items(ump, D_DIRREM)) { req = true; request_cleanup(mp, FLUSH_BLOCKS); } FREE_LOCK(ump); + if (softdep_excess_items(ump, D_NEWBLK) || + softdep_excess_items(ump, D_ALLOCDIRECT) || + softdep_excess_items(ump, D_ALLOCINDIR)) { + error = vn_start_write(NULL, &mp, V_WAIT); + if (error == 0) { + req = true; + VFS_SYNC(mp, MNT_WAIT); + vn_finished_write(mp); + } + } if ((td->td_pflags & TDP_KTHREAD) != 0 || !req) break; } diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index c09dbc2..4deebd9 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -2354,8 +2354,8 @@ swapoff_one(struct swdevt *sp, struct ucred *cred) swap_pager_swapoff(sp); sp->sw_close(curthread, sp); - sp->sw_id = NULL; mtx_lock(&sw_dev_mtx); + sp->sw_id = NULL; TAILQ_REMOVE(&swtailq, sp, sw_list); nswapdev--; if (nswapdev == 0) { @@ -2541,13 +2541,39 @@ swapgeom_close_ev(void *arg, int flags) g_destroy_consumer(cp); } +/* + * Add a reference to the g_consumer for an inflight transaction. + */ +static void +swapgeom_acquire(struct g_consumer *cp) +{ + + mtx_assert(&sw_dev_mtx, MA_OWNED); + cp->index++; +} + +/* + * Remove a reference from the g_consumer. Post a close event if + * all referneces go away. + */ +static void +swapgeom_release(struct g_consumer *cp, struct swdevt *sp) +{ + + mtx_assert(&sw_dev_mtx, MA_OWNED); + cp->index--; + if (cp->index == 0) { + if (g_post_event(swapgeom_close_ev, cp, M_NOWAIT, NULL) == 0) + sp->sw_id = NULL; + } +} + static void swapgeom_done(struct bio *bp2) { struct swdevt *sp; struct buf *bp; struct g_consumer *cp; - int destroy; bp = bp2->bio_caller2; cp = bp2->bio_from; @@ -2557,16 +2583,11 @@ swapgeom_done(struct bio *bp2) bp->b_resid = bp->b_bcount - bp2->bio_completed; bp->b_error = bp2->bio_error; bufdone(bp); + sp = bp2->bio_caller1; mtx_lock(&sw_dev_mtx); - destroy = ((--cp->index) == 0 && cp->private); - if (destroy) { - sp = bp2->bio_caller1; - sp->sw_id = NULL; - } + swapgeom_release(cp, sp); mtx_unlock(&sw_dev_mtx); g_destroy_bio(bp2); - if (destroy) - g_waitfor_event(swapgeom_close_ev, cp, M_WAITOK, NULL); } static void @@ -2584,13 +2605,16 @@ swapgeom_strategy(struct buf *bp, struct swdevt *sp) bufdone(bp); return; } - cp->index++; + swapgeom_acquire(cp); mtx_unlock(&sw_dev_mtx); if (bp->b_iocmd == BIO_WRITE) bio = g_new_bio(); else bio = g_alloc_bio(); if (bio == NULL) { + mtx_lock(&sw_dev_mtx); + swapgeom_release(cp, sp); + mtx_unlock(&sw_dev_mtx); bp->b_error = ENOMEM; bp->b_ioflags |= BIO_ERROR; bufdone(bp); @@ -2630,7 +2654,12 @@ swapgeom_orphan(struct g_consumer *cp) break; } } - cp->private = (void *)(uintptr_t)1; + /* + * Drop reference we were created with. Do directly since we're in a + * special context where we don't have to queue the call to + * swapgeom_close_ev(). + */ + cp->index--; destroy = ((sp != NULL) && (cp->index == 0)); if (destroy) sp->sw_id = NULL; @@ -2691,8 +2720,8 @@ swapongeom_ev(void *arg, int flags) if (gp == NULL) gp = g_new_geomf(&g_swap_class, "swap"); cp = g_new_consumer(gp); - cp->index = 0; /* Number of active I/Os. */ - cp->private = NULL; /* Orphanization flag */ + cp->index = 1; /* Number of active I/Os, plus one for being active. */ + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; g_attach(cp, pp); /* * XXX: Everytime you think you can improve the margin for diff --git a/sys/vm/uma.h b/sys/vm/uma.h index ed69e19..d3e0658 100644 --- a/sys/vm/uma.h +++ b/sys/vm/uma.h @@ -382,7 +382,8 @@ uma_zfree(uma_zone_t zone, void *item) * A pointer to the allocated memory or NULL on failure. */ -typedef void *(*uma_alloc)(uma_zone_t zone, int size, uint8_t *pflag, int wait); +typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t size, uint8_t *pflag, + int wait); /* * Backend page free routines @@ -395,7 +396,7 @@ typedef void *(*uma_alloc)(uma_zone_t zone, int size, uint8_t *pflag, int wait); * Returns: * None */ -typedef void (*uma_free)(void *item, int size, uint8_t pflag); +typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag); diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index d0df901..ee0b207 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -229,10 +229,10 @@ enum zfreeskip { SKIP_NONE = 0, SKIP_DTOR, SKIP_FINI }; /* Prototypes.. */ -static void *noobj_alloc(uma_zone_t, int, uint8_t *, int); -static void *page_alloc(uma_zone_t, int, uint8_t *, int); -static void *startup_alloc(uma_zone_t, int, uint8_t *, int); -static void page_free(void *, int, uint8_t); +static void *noobj_alloc(uma_zone_t, vm_size_t, uint8_t *, int); +static void *page_alloc(uma_zone_t, vm_size_t, uint8_t *, int); +static void *startup_alloc(uma_zone_t, vm_size_t, uint8_t *, int); +static void page_free(void *, vm_size_t, uint8_t); static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int); static void cache_drain(uma_zone_t); static void bucket_drain(uma_zone_t, uma_bucket_t); @@ -1038,7 +1038,7 @@ out: * the VM is ready. */ static void * -startup_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait) +startup_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait) { uma_keg_t keg; uma_slab_t tmps; @@ -1098,7 +1098,7 @@ startup_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait) * NULL if M_NOWAIT is set. */ static void * -page_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait) +page_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait) { void *p; /* Returned page */ @@ -1120,7 +1120,7 @@ page_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait) * NULL if M_NOWAIT is set. */ static void * -noobj_alloc(uma_zone_t zone, int bytes, uint8_t *flags, int wait) +noobj_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait) { TAILQ_HEAD(, vm_page) alloctail; u_long npages; @@ -1183,7 +1183,7 @@ noobj_alloc(uma_zone_t zone, int bytes, uint8_t *flags, int wait) * Nothing */ static void -page_free(void *mem, int size, uint8_t flags) +page_free(void *mem, vm_size_t size, uint8_t flags) { struct vmem *vmem; @@ -3269,7 +3269,7 @@ uma_zone_exhausted_nolock(uma_zone_t zone) } void * -uma_large_malloc(int size, int wait) +uma_large_malloc(vm_size_t size, int wait) { void *mem; uma_slab_t slab; diff --git a/sys/vm/uma_int.h b/sys/vm/uma_int.h index 1ffc7d5..ad2a405 100644 --- a/sys/vm/uma_int.h +++ b/sys/vm/uma_int.h @@ -341,7 +341,7 @@ zone_first_keg(uma_zone_t zone) #ifdef _KERNEL /* Internal prototypes */ static __inline uma_slab_t hash_sfind(struct uma_hash *hash, uint8_t *data); -void *uma_large_malloc(int size, int wait); +void *uma_large_malloc(vm_size_t size, int wait); void uma_large_free(uma_slab_t slab); /* Lock Macros */ @@ -424,8 +424,9 @@ vsetslab(vm_offset_t va, uma_slab_t slab) * if they can provide more effecient allocation functions. This is useful * for using direct mapped addresses. */ -void *uma_small_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait); -void uma_small_free(void *mem, int size, uint8_t flags); +void *uma_small_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, + int wait); +void uma_small_free(void *mem, vm_size_t size, uint8_t flags); #endif /* _KERNEL */ #endif /* VM_UMA_INT_H */ diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 93db8d1..46e3089 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -3989,12 +3989,10 @@ RetryLookup:; vm_map_unlock_read(map); return (KERN_PROTECTION_FAILURE); } - if ((entry->eflags & MAP_ENTRY_USER_WIRED) && - (entry->eflags & MAP_ENTRY_COW) && - (fault_type & VM_PROT_WRITE)) { - vm_map_unlock_read(map); - return (KERN_PROTECTION_FAILURE); - } + KASSERT((prot & VM_PROT_WRITE) == 0 || (entry->eflags & + (MAP_ENTRY_USER_WIRED | MAP_ENTRY_NEEDS_COPY)) != + (MAP_ENTRY_USER_WIRED | MAP_ENTRY_NEEDS_COPY), + ("entry %p flags %x", entry, entry->eflags)); if ((fault_typea & VM_PROT_COPY) != 0 && (entry->max_protection & VM_PROT_WRITE) == 0 && (entry->eflags & MAP_ENTRY_COW) == 0) { @@ -4148,10 +4146,6 @@ vm_map_lookup_locked(vm_map_t *var_map, /* IN/OUT */ fault_type &= VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; if ((fault_type & prot) != fault_type) return (KERN_PROTECTION_FAILURE); - if ((entry->eflags & MAP_ENTRY_USER_WIRED) && - (entry->eflags & MAP_ENTRY_COW) && - (fault_type & VM_PROT_WRITE)) - return (KERN_PROTECTION_FAILURE); /* * If this page is not pageable, we have to get it for all possible diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 998cd37..6a56fd7 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -93,6 +93,7 @@ __FBSDID("$FreeBSD$"); #include <sys/sdt.h> #include <sys/signalvar.h> #include <sys/smp.h> +#include <sys/time.h> #include <sys/vnode.h> #include <sys/vmmeter.h> #include <sys/rwlock.h> @@ -170,7 +171,7 @@ static int vm_pageout_update_period; static int defer_swap_pageouts; static int disable_swap_pageouts; static int lowmem_period = 10; -static int lowmem_ticks; +static time_t lowmem_uptime; #if defined(NO_SWAPPING) static int vm_swap_enabled = 0; @@ -932,7 +933,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass) * some. We rate limit to avoid thrashing. */ if (vmd == &vm_dom[0] && pass > 0 && - (ticks - lowmem_ticks) / hz >= lowmem_period) { + (time_uptime - lowmem_uptime) >= lowmem_period) { /* * Decrease registered cache sizes. */ @@ -943,7 +944,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass) * drained above. */ uma_reclaim(); - lowmem_ticks = ticks; + lowmem_uptime = time_uptime; } /* diff --git a/sys/xen/interface/io/blkif.h b/sys/xen/interface/io/blkif.h index b725776..9c73ae7 100644 --- a/sys/xen/interface/io/blkif.h +++ b/sys/xen/interface/io/blkif.h @@ -97,6 +97,28 @@ * * The type of the backing device/object. * + * + * direct-io-safe + * Values: 0/1 (boolean) + * Default Value: 0 + * + * The underlying storage is not affected by the direct IO memory + * lifetime bug. See: + * http://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html + * + * Therefore this option gives the backend permission to use + * O_DIRECT, notwithstanding that bug. + * + * That is, if this option is enabled, use of O_DIRECT is safe, + * in circumstances where we would normally have avoided it as a + * workaround for that bug. This option is not relevant for all + * backends, and even not necessarily supported for those for + * which it is relevant. A backend which knows that it is not + * affected by the bug can ignore this option. + * + * This option doesn't require a backend to use O_DIRECT, so it + * should not be used to try to control the caching behaviour. + * *--------------------------------- Features --------------------------------- * * feature-barrier @@ -126,6 +148,34 @@ * of this type may still be returned at any time with the * BLKIF_RSP_EOPNOTSUPP result code. * + * feature-persistent + * Values: 0/1 (boolean) + * Default Value: 0 + * Notes: 7 + * + * A value of "1" indicates that the backend can keep the grants used + * by the frontend driver mapped, so the same set of grants should be + * used in all transactions. The maximum number of grants the backend + * can map persistently depends on the implementation, but ideally it + * should be RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. Using this + * feature the backend doesn't need to unmap each grant, preventing + * costly TLB flushes. The backend driver should only map grants + * persistently if the frontend supports it. If a backend driver chooses + * to use the persistent protocol when the frontend doesn't support it, + * it will probably hit the maximum number of persistently mapped grants + * (due to the fact that the frontend won't be reusing the same grants), + * and fall back to non-persistent mode. Backend implementations may + * shrink or expand the number of persistently mapped grants without + * notifying the frontend depending on memory constraints (this might + * cause a performance degradation). + * + * If a backend driver wants to limit the maximum number of persistently + * mapped grants to a value less than RING_SIZE * + * BLKIF_MAX_SEGMENTS_PER_REQUEST a LRU strategy should be used to + * discard the grants that are less commonly used. Using a LRU in the + * backend driver paired with a LIFO queue in the frontend will + * allow us to have better performance in this scenario. + * *----------------------- Request Transport Parameters ------------------------ * * max-ring-page-order @@ -145,33 +195,17 @@ * The maximum supported size of the request ring buffer in units of * machine pages. The value must be a power of 2. * - * max-requests <uint32_t> - * Default Value: BLKIF_MAX_RING_REQUESTS(PAGE_SIZE) - * Maximum Value: BLKIF_MAX_RING_REQUESTS(PAGE_SIZE * max-ring-pages) - * - * The maximum number of concurrent, logical requests supported by - * the backend. - * - * Note: A logical request may span multiple ring entries. - * - * max-request-segments - * Values: <uint8_t> - * Default Value: BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK - * Maximum Value: BLKIF_MAX_SEGMENTS_PER_REQUEST - * - * The maximum value of blkif_request.nr_segments supported by - * the backend. - * - * max-request-size - * Values: <uint32_t> - * Default Value: BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK * PAGE_SIZE - * Maximum Value: BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE + *------------------------- Backend Device Properties ------------------------- * - * The maximum amount of data, in bytes, that can be referenced by a - * request type that accesses frontend memory (currently BLKIF_OP_READ, - * BLKIF_OP_WRITE, or BLKIF_OP_WRITE_BARRIER). + * discard-enable + * Values: 0/1 (boolean) + * Default Value: 1 * - *------------------------- Backend Device Properties ------------------------- + * This optional property, set by the toolstack, instructs the backend + * to offer discard to the frontend. If the property is missing the + * backend should offer discard if the backing storage actually supports + * it. This optional property, set by the toolstack, requests that the + * backend offer, or not offer, discard to the frontend. * * discard-alignment * Values: <uint32_t> @@ -192,6 +226,7 @@ * discard-secure * Values: 0/1 (boolean) * Default Value: 0 + * Notes: 10 * * A value of "1" indicates that the backend can process BLKIF_OP_DISCARD * requests with the BLKIF_DISCARD_SECURE flag set. @@ -206,13 +241,17 @@ * sector-size * Values: <uint32_t> * - * The size, in bytes, of the individually addressible data blocks - * on the backend device. + * The logical sector size, in bytes, of the backend device. + * + * physical-sector-size + * Values: <uint32_t> + * + * The physical sector size, in bytes, of the backend device. * * sectors * Values: <uint64_t> * - * The size of the backend device, expressed in units of its native + * The size of the backend device, expressed in units of its logical * sector size ("sector-size"). * ***************************************************************************** @@ -269,32 +308,26 @@ * The size of the frontend allocated request ring buffer in units of * machine pages. The value must be a power of 2. * - * max-requests - * Values: <uint32_t> - * Default Value: BLKIF_MAX_RING_REQUESTS(PAGE_SIZE) - * Maximum Value: BLKIF_MAX_RING_REQUESTS(PAGE_SIZE * max-ring-pages) - * - * The maximum number of concurrent, logical requests that will be - * issued by the frontend. - * - * Note: A logical request may span multiple ring entries. - * - * max-request-segments - * Values: <uint8_t> - * Default Value: BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK - * Maximum Value: MIN(255, backend/max-request-segments) - * - * The maximum value the frontend will set in the - * blkif_request.nr_segments field. - * - * max-request-size - * Values: <uint32_t> - * Default Value: BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK * PAGE_SIZE - * Maximum Value: max-request-segments * PAGE_SIZE - * - * The maximum amount of data, in bytes, that can be referenced by - * a request type that accesses frontend memory (currently BLKIF_OP_READ, - * BLKIF_OP_WRITE, or BLKIF_OP_WRITE_BARRIER). + * feature-persistent + * Values: 0/1 (boolean) + * Default Value: 0 + * Notes: 7, 8, 9 + * + * A value of "1" indicates that the frontend will reuse the same grants + * for all transactions, allowing the backend to map them with write + * access (even when it should be read-only). If the frontend hits the + * maximum number of allowed persistently mapped grants, it can fallback + * to non persistent mode. This will cause a performance degradation, + * since the the backend driver will still try to map those grants + * persistently. Since the persistent grants protocol is compatible with + * the previous protocol, a frontend driver can choose to work in + * persistent mode even when the backend doesn't support it. + * + * It is recommended that the frontend driver stores the persistently + * mapped grants in a LIFO queue, so a subset of all persistently mapped + * grants gets used commonly. This is done in case the backend driver + * decides to limit the maximum number of persistently mapped grants + * to a value less than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. * *------------------------- Virtual Device Properties ------------------------- * @@ -315,17 +348,23 @@ * ----- * (1) Multi-page ring buffer scheme first developed in the Citrix XenServer * PV drivers. - * (2) Multi-page ring buffer scheme first used in some Red Hat distributions + * (2) Multi-page ring buffer scheme first used in some RedHat distributions * including a distribution deployed on certain nodes of the Amazon * EC2 cluster. * (3) Support for multi-page ring buffers was implemented independently, - * in slightly different forms, by both Citrix and Red Hat/Amazon. + * in slightly different forms, by both Citrix and RedHat/Amazon. * For full interoperability, block front and backends should publish * identical ring parameters, adjusted for unit differences, to the * XenStore nodes used in both schemes. - * (4) Devices that support discard functionality may internally allocate - * space (discardable extents) in units that are larger than the - * exported logical block size. + * (4) Devices that support discard functionality may internally allocate space + * (discardable extents) in units that are larger than the exported logical + * block size. If the backing device has such discardable extents the + * backend should provide both discard-granularity and discard-alignment. + * Providing just one of the two may be considered an error by the frontend. + * Backends supporting discard should include discard-granularity and + * discard-alignment even if it supports discarding individual sectors. + * Frontends should assume discard-alignment == 0 and discard-granularity + * == sector size if these keys are missing. * (5) The discard-alignment parameter allows a physical device to be * partitioned into virtual devices that do not necessarily begin or * end on a discardable extent boundary. @@ -333,6 +372,19 @@ * 'ring-ref' is used to communicate the grant reference for this * page to the backend. When using a multi-page ring, the 'ring-ref' * node is not created. Instead 'ring-ref0' - 'ring-refN' are used. + * (7) When using persistent grants data has to be copied from/to the page + * where the grant is currently mapped. The overhead of doing this copy + * however doesn't suppress the speed improvement of not having to unmap + * the grants. + * (8) The frontend driver has to allow the backend driver to map all grants + * with write access, even when they should be mapped read-only, since + * further requests may reuse these grants and require write permissions. + * (9) Linux implementation doesn't have a limit on the maximum number of + * grants that can be persistently mapped in the frontend driver, but + * due to the frontent driver implementation it should never be bigger + * than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. + *(10) The discard-secure property may be present and will be set to 1 if the + * backing device supports secure discard. */ /* @@ -457,16 +509,48 @@ #define BLKIF_OP_DISCARD 5 /* - * Maximum scatter/gather segments per request (header + segment blocks). + * Recognized if "feature-max-indirect-segments" in present in the backend + * xenbus info. The "feature-max-indirect-segments" node contains the maximum + * number of segments allowed by the backend per request. If the node is + * present, the frontend might use blkif_request_indirect structs in order to + * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The + * maximum number of indirect segments is fixed by the backend, but the + * frontend can issue requests with any number of indirect segments as long as + * it's less than the number provided by the backend. The indirect_grefs field + * in blkif_request_indirect should be filled by the frontend with the + * grant references of the pages that are holding the indirect segments. + * These pages are filled with an array of blkif_request_segment that hold the + * information about the segments. The number of indirect pages to use is + * determined by the number of segments an indirect request contains. Every + * indirect page can contain a maximum of + * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to + * calculate the number of indirect pages to use we have to do + * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))). + * + * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not* + * create the "feature-max-indirect-segments" node! + */ +#define BLKIF_OP_INDIRECT 6 + +/* + * Maximum scatter/gather segments per request. + * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE. + * NB. This could be 12 if the ring indexes weren't stored in the same page. */ #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 /* + * Maximum number of indirect pages to use per request. + */ +#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8 + +/* * NB. first_sect and last_sect in blkif_request_segment, as well as * sector_number in blkif_request, are always expressed in 512-byte units. * However they must be properly aligned to the real sector size of the - * physical disk, which is reported in the "sector-size" node in the backend - * xenbus info. Also the xenbus "sectors" node is expressed in 512-byte units. + * physical disk, which is reported in the "physical-sector-size" node in + * the backend xenbus info. Also the xenbus "sectors" node is expressed in + * 512-byte units. */ struct blkif_request_segment { grant_ref_t gref; /* reference to I/O buffer frame */ @@ -478,21 +562,6 @@ typedef struct blkif_request_segment blkif_request_segment_t; /* * Starting ring element for any I/O request. - * - * One or more segment blocks can be inserted into the request ring - * just after a blkif_request_t, allowing requests to operate on - * up to BLKIF_MAX_SEGMENTS_PER_REQUEST. - * - * BLKIF_SEGS_TO_BLOCKS() can be used on blkif_requst.nr_segments - * to determine the number of contiguous ring entries associated - * with this request. - * - * Note: Due to the way Xen request rings operate, the producer and - * consumer indices of the ring must be incremented by the - * BLKIF_SEGS_TO_BLOCKS() value of the associated request. - * (e.g. a response to a 3 ring entry request must also consume - * 3 entries in the ring, even though only the first ring entry - * in the response has any data.) */ struct blkif_request { uint8_t operation; /* BLKIF_OP_??? */ @@ -519,6 +588,20 @@ struct blkif_request_discard { }; typedef struct blkif_request_discard blkif_request_discard_t; +struct blkif_request_indirect { + uint8_t operation; /* BLKIF_OP_INDIRECT */ + uint8_t indirect_op; /* BLKIF_OP_{READ/WRITE} */ + uint16_t nr_segments; /* number of segments */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + blkif_vdev_t handle; /* same as for read/write requests */ + grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]; +#ifdef __i386__ + uint64_t pad; /* Make it 64 byte aligned on i386 */ +#endif +}; +typedef struct blkif_request_indirect blkif_request_indirect_t; + struct blkif_response { uint64_t id; /* copied from request */ uint8_t operation; /* copied from request */ @@ -550,7 +633,7 @@ DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response); /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil diff --git a/usr.bin/calendar/calendars/calendar.christian b/usr.bin/calendar/calendars/calendar.christian index 3365b07..f213594 100644 --- a/usr.bin/calendar/calendars/calendar.christian +++ b/usr.bin/calendar/calendars/calendar.christian @@ -5,8 +5,9 @@ */ #ifndef _calendar_christian_ -#define _calendar_christian_ +#define _calendar_christian_ +01/01 Solemnity of Mary, Mother of God 01/05 Last (twelfth) day of Christmastide 01/06 Epiphany Easter-47 Shrove Tuesday / Mardi Gras (day before Ash Wednesday) @@ -21,10 +22,13 @@ Easter+50 Whitmonday Easter+56 Trinity Sunday (7 days after Pentecost) Easter+60 Corpus Christi (11 days after Pentecost) 05/28* Rogation Sunday +08/15 Assumption of the Blessed Virgin Mary 10/18 Feast Day of St. Luke +11/01 All Saints' Day 11/SunLast First Sunday of Advent (4th Sunday before Christmas) 12/SunFirst First Sunday of Advent (4th Sunday before Christmas) 12/06 St. Nicholas' Day +12/08 Feast of the Immaculate Conception 12/24 Christmas Eve 12/25 Christmastide begins: First day of Christmas 12/26 Second day of Christmas (Boxing Day) diff --git a/usr.bin/iconv/iconv.c b/usr.bin/iconv/iconv.c index 494aaf7..161b22e 100644 --- a/usr.bin/iconv/iconv.c +++ b/usr.bin/iconv/iconv.c @@ -156,11 +156,11 @@ int main(int argc, char **argv) { FILE *fp; - char *opt_f, *opt_t; + const char *opt_f, *opt_t; int ch, i, res; bool opt_c = false, opt_s = false; - opt_f = opt_t = strdup(""); + opt_f = opt_t = ""; setlocale(LC_ALL, ""); setprogname(argv[0]); @@ -186,12 +186,12 @@ main(int argc, char **argv) case 'f': /* from */ if (optarg != NULL) - opt_f = strdup(optarg); + opt_f = optarg; break; case 't': /* to */ if (optarg != NULL) - opt_t = strdup(optarg); + opt_t = optarg; break; default: usage(); diff --git a/usr.bin/tftp/main.c b/usr.bin/tftp/main.c index c0f5088..49bd62a 100644 --- a/usr.bin/tftp/main.c +++ b/usr.bin/tftp/main.c @@ -223,7 +223,7 @@ urihandling(char *URI) char line[MAXLINE]; int i; - strncpy(uri, URI, ARG_MAX); + strlcpy(uri, URI, ARG_MAX); host = uri + 7; if ((s = strchr(host, '/')) == NULL) { @@ -320,11 +320,10 @@ setpeer0(char *host, const char *lport) /* res->ai_addr <= sizeof(peeraddr) is guaranteed */ memcpy(&peer_sock, res->ai_addr, res->ai_addrlen); if (res->ai_canonname) { - (void) strncpy(hostname, res->ai_canonname, + (void) strlcpy(hostname, res->ai_canonname, sizeof(hostname)); } else - (void) strncpy(hostname, host, sizeof(hostname)); - hostname[sizeof(hostname)-1] = 0; + (void) strlcpy(hostname, host, sizeof(hostname)); connected = 1; } diff --git a/usr.sbin/inetd/inetd.c b/usr.sbin/inetd/inetd.c index eebcfea..7614d73 100644 --- a/usr.sbin/inetd/inetd.c +++ b/usr.sbin/inetd/inetd.c @@ -327,16 +327,7 @@ main(int argc, char **argv) struct request_info req; int denied; char *service = NULL; - union { - struct sockaddr peer_un; - struct sockaddr_in peer_un4; - struct sockaddr_in6 peer_un6; - struct sockaddr_storage peer_max; - } p_un; -#define peer p_un.peer_un -#define peer4 p_un.peer_un4 -#define peer6 p_un.peer_un6 -#define peermax p_un.peer_max + struct sockaddr_storage peer; int i; struct addrinfo hints, *res; const char *servname; @@ -656,24 +647,24 @@ main(int argc, char **argv) } else ctrl = sep->se_fd; if (dolog && !ISWRAP(sep)) { - char pname[INET6_ADDRSTRLEN] = "unknown"; + char pname[NI_MAXHOST] = "unknown"; socklen_t sl; - sl = sizeof peermax; + sl = sizeof(peer); if (getpeername(ctrl, (struct sockaddr *) - &peermax, &sl)) { - sl = sizeof peermax; + &peer, &sl)) { + sl = sizeof(peer); if (recvfrom(ctrl, buf, sizeof(buf), MSG_PEEK, - (struct sockaddr *)&peermax, + (struct sockaddr *)&peer, &sl) >= 0) { - getnameinfo((struct sockaddr *)&peermax, - peer.sa_len, + getnameinfo((struct sockaddr *)&peer, + peer.ss_len, pname, sizeof(pname), NULL, 0, NI_NUMERICHOST); } } else { - getnameinfo((struct sockaddr *)&peermax, - peer.sa_len, + getnameinfo((struct sockaddr *)&peer, + peer.ss_len, pname, sizeof(pname), NULL, 0, NI_NUMERICHOST); } @@ -1761,9 +1752,7 @@ more: strlen(sep->se_proto) + 1 - 4); sep->se_rpc = 1; sep->se_rpc_prog = sep->se_rpc_lowvers = - sep->se_rpc_lowvers = 0; - memcpy(&sep->se_ctrladdr4, bind_sa4, - sizeof(sep->se_ctrladdr4)); + sep->se_rpc_highvers = 0; if ((versp = strrchr(sep->se_service, '/'))) { *versp++ = '\0'; switch (sscanf(versp, "%u-%u", @@ -2108,7 +2097,7 @@ inetd_setproctitle(const char *a, int s) { socklen_t size; struct sockaddr_storage ss; - char buf[80], pbuf[INET6_ADDRSTRLEN]; + char buf[80], pbuf[NI_MAXHOST]; size = sizeof(ss); if (getpeername(s, (struct sockaddr *)&ss, &size) == 0) { @@ -2124,7 +2113,7 @@ int check_loop(const struct sockaddr *sa, const struct servtab *sep) { struct servtab *se2; - char pname[INET6_ADDRSTRLEN]; + char pname[NI_MAXHOST]; for (se2 = servtab; se2; se2 = se2->se_next) { if (!se2->se_bi || se2->se_socktype != SOCK_DGRAM) @@ -2138,8 +2127,8 @@ check_loop(const struct sockaddr *sa, const struct servtab *sep) continue; #ifdef INET6 case AF_INET6: - if (((const struct sockaddr_in *)sa)->sin_port == - se2->se_ctrladdr4.sin_port) + if (((const struct sockaddr_in6 *)sa)->sin6_port == + se2->se_ctrladdr6.sin6_port) goto isloop; continue; #endif @@ -2338,7 +2327,7 @@ cpmip(const struct servtab *sep, int ctrl) } } if ((cnt * 60) / (CHTSIZE * CHTGRAN) > sep->se_maxcpm) { - char pname[INET6_ADDRSTRLEN]; + char pname[NI_MAXHOST]; getnameinfo((struct sockaddr *)&rss, ((struct sockaddr *)&rss)->sa_len, diff --git a/usr.sbin/ndp/ndp.c b/usr.sbin/ndp/ndp.c index 830f344..ce3c4e0 100644 --- a/usr.sbin/ndp/ndp.c +++ b/usr.sbin/ndp/ndp.c @@ -127,28 +127,26 @@ static int32_t thiszone; /* time difference with gmt */ static int s = -1; static int repeat = 0; -char ntop_buf[INET6_ADDRSTRLEN]; /* inet_ntop() */ -char host_buf[NI_MAXHOST]; /* getnameinfo() */ -char ifix_buf[IFNAMSIZ]; /* if_indextoname() */ +static char host_buf[NI_MAXHOST]; /* getnameinfo() */ +static char ifix_buf[IFNAMSIZ]; /* if_indextoname() */ -int main(int, char **); static int file(char *); -void getsocket(void); -int set(int, char **); -void get(char *); -int delete(char *); -void dump(struct in6_addr *, int); +static void getsocket(void); +static int set(int, char **); +static void get(char *); +static int delete(char *); +static void dump(struct sockaddr_in6 *, int); static struct in6_nbrinfo *getnbrinfo(struct in6_addr *, int, int); static char *ether_str(struct sockaddr_dl *); -int ndp_ether_aton(char *, u_char *); -void usage(void); -int rtmsg(int); -void ifinfo(char *, int, char **); -void rtrlist(void); -void plist(void); -void pfx_flush(void); -void rtr_flush(void); -void harmonize_rtr(void); +static int ndp_ether_aton(char *, u_char *); +static void usage(void); +static int rtmsg(int); +static void ifinfo(char *, int, char **); +static void rtrlist(void); +static void plist(void); +static void pfx_flush(void); +static void rtr_flush(void); +static void harmonize_rtr(void); #ifdef SIOCSDEFIFACE_IN6 /* XXX: check SIOCGDEFIFACE_IN6 as well? */ static void getdefif(void); static void setdefif(char *); @@ -156,24 +154,18 @@ static void setdefif(char *); static char *sec2str(time_t); static void ts_print(const struct timeval *); -#ifdef ICMPV6CTL_ND6_DRLIST static char *rtpref_str[] = { "medium", /* 00 */ "high", /* 01 */ "rsv", /* 10 */ "low" /* 11 */ }; -#endif - -int mode = 0; -char *arg = NULL; int -main(argc, argv) - int argc; - char **argv; +main(int argc, char **argv) { - int ch; + int ch, mode = 0; + char *arg = NULL; pid = getpid(); thiszone = gmt2local(0); @@ -322,8 +314,7 @@ main(argc, argv) * Process a file to set standard ndp entries */ static int -file(name) - char *name; +file(char *name) { FILE *fp; int i, retval; @@ -357,7 +348,7 @@ file(name) return (retval); } -void +static void getsocket() { if (s < 0) { @@ -369,23 +360,32 @@ getsocket() } } -struct sockaddr_in6 so_mask = {sizeof(so_mask), AF_INET6 }; -struct sockaddr_in6 blank_sin = {sizeof(blank_sin), AF_INET6 }, sin_m; -struct sockaddr_dl blank_sdl = {sizeof(blank_sdl), AF_LINK }, sdl_m; -time_t expire_time; -int flags, found_entry; -struct { +static struct sockaddr_in6 so_mask = { + .sin6_len = sizeof(so_mask), + .sin6_family = AF_INET6 +}; +static struct sockaddr_in6 blank_sin = { + .sin6_len = sizeof(blank_sin), + .sin6_family = AF_INET6 +}; +static struct sockaddr_in6 sin_m; +static struct sockaddr_dl blank_sdl = { + .sdl_len = sizeof(blank_sdl), + .sdl_family = AF_LINK +}; +static struct sockaddr_dl sdl_m; +static time_t expire_time; +static int flags, found_entry; +static struct { struct rt_msghdr m_rtm; char m_space[512]; -} m_rtmsg; +} m_rtmsg; /* * Set an individual neighbor cache entry */ -int -set(argc, argv) - int argc; - char **argv; +static int +set(int argc, char **argv) { register struct sockaddr_in6 *sin = &sin_m; register struct sockaddr_dl *sdl; @@ -459,9 +459,8 @@ overwrite: /* * Display an individual neighbor cache entry */ -void -get(host) - char *host; +static void +get(char *host) { struct sockaddr_in6 *sin = &sin_m; struct addrinfo hints, *res; @@ -477,7 +476,9 @@ get(host) return; } sin->sin6_addr = ((struct sockaddr_in6 *)res->ai_addr)->sin6_addr; - dump(&sin->sin6_addr, 0); + sin->sin6_scope_id = + ((struct sockaddr_in6 *)res->ai_addr)->sin6_scope_id; + dump(sin, 0); if (found_entry == 0) { getnameinfo((struct sockaddr *)sin, sin->sin6_len, host_buf, sizeof(host_buf), NULL ,0, @@ -490,9 +491,8 @@ get(host) /* * Delete a neighbor cache entry */ -int -delete(host) - char *host; +static int +delete(char *host) { struct sockaddr_in6 *sin = &sin_m; register struct rt_msghdr *rtm = &m_rtmsg.m_rtm; @@ -559,10 +559,8 @@ delete: /* * Dump the entire neighbor cache */ -void -dump(addr, cflag) - struct in6_addr *addr; - int cflag; +static void +dump(struct sockaddr_in6 *addr, int cflag) { int mib[6]; size_t needed; @@ -634,7 +632,9 @@ again:; continue; if (addr) { - if (!IN6_ARE_ADDR_EQUAL(addr, &sin->sin6_addr)) + if (IN6_ARE_ADDR_EQUAL(&addr->sin6_addr, + &sin->sin6_addr) == 0 || + addr->sin6_scope_id != sin->sin6_scope_id) continue; found_entry = 1; } else if (IN6_IS_ADDR_MULTICAST(&sin->sin6_addr)) @@ -767,10 +767,7 @@ again:; } static struct in6_nbrinfo * -getnbrinfo(addr, ifindex, warning) - struct in6_addr *addr; - int ifindex; - int warning; +getnbrinfo(struct in6_addr *addr, int ifindex, int warning) { static struct in6_nbrinfo nbi; int s; @@ -796,7 +793,6 @@ static char * ether_str(struct sockaddr_dl *sdl) { static char hbuf[NI_MAXHOST]; - char *cp; if (sdl->sdl_alen == ETHER_ADDR_LEN) { strlcpy(hbuf, ether_ntoa((struct ether_addr *)LLADDR(sdl)), @@ -810,10 +806,8 @@ ether_str(struct sockaddr_dl *sdl) return(hbuf); } -int -ndp_ether_aton(a, n) - char *a; - u_char *n; +static int +ndp_ether_aton(char *a, u_char *n) { int i, o[6]; @@ -828,7 +822,7 @@ ndp_ether_aton(a, n) return (0); } -void +static void usage() { printf("usage: ndp [-nt] hostname\n"); @@ -844,9 +838,8 @@ usage() exit(1); } -int -rtmsg(cmd) - int cmd; +static int +rtmsg(int cmd) { static int seq; int rlen; @@ -910,11 +903,8 @@ doit: return (0); } -void -ifinfo(ifname, argc, argv) - char *ifname; - int argc; - char **argv; +static void +ifinfo(char *ifname, int argc, char **argv) { struct in6_ndireq nd; int i, s; @@ -1073,10 +1063,9 @@ ifinfo(ifname, argc, argv) #define ND_RA_FLAG_RTPREF_MASK 0x18 /* 00011000 */ #endif -void +static void rtrlist() { -#ifdef ICMPV6CTL_ND6_DRLIST int mib[] = { CTL_NET, PF_INET6, IPPROTO_ICMPV6, ICMPV6CTL_ND6_DRLIST }; char *buf; struct in6_defrouter *p, *ep; @@ -1124,54 +1113,11 @@ rtrlist() sec2str(p->expire - now.tv_sec)); } free(buf); -#else - struct in6_drlist dr; - int s, i; - struct timeval now; - - if ((s = socket(AF_INET6, SOCK_DGRAM, 0)) < 0) { - err(1, "socket"); - /* NOTREACHED */ - } - bzero(&dr, sizeof(dr)); - strlcpy(dr.ifname, "lo0", sizeof(dr.ifname)); /* dummy */ - if (ioctl(s, SIOCGDRLST_IN6, (caddr_t)&dr) < 0) { - err(1, "ioctl(SIOCGDRLST_IN6)"); - /* NOTREACHED */ - } -#define DR dr.defrouter[i] - for (i = 0 ; DR.if_index && i < DRLSTSIZ ; i++) { - struct sockaddr_in6 sin6; - - bzero(&sin6, sizeof(sin6)); - sin6.sin6_family = AF_INET6; - sin6.sin6_len = sizeof(sin6); - sin6.sin6_addr = DR.rtaddr; - getnameinfo((struct sockaddr *)&sin6, sin6.sin6_len, host_buf, - sizeof(host_buf), NULL, 0, - (nflag ? NI_NUMERICHOST : 0)); - - printf("%s if=%s", host_buf, - if_indextoname(DR.if_index, ifix_buf)); - printf(", flags=%s%s", - DR.flags & ND_RA_FLAG_MANAGED ? "M" : "", - DR.flags & ND_RA_FLAG_OTHER ? "O" : ""); - gettimeofday(&now, 0); - if (DR.expire == 0) - printf(", expire=Never\n"); - else - printf(", expire=%s\n", - sec2str(DR.expire - now.tv_sec)); - } -#undef DR - close(s); -#endif } -void +static void plist() { -#ifdef ICMPV6CTL_ND6_PRLIST int mib[] = { CTL_NET, PF_INET6, IPPROTO_ICMPV6, ICMPV6CTL_ND6_PRLIST }; char *buf; struct in6_prefix *p, *ep, *n; @@ -1281,164 +1227,9 @@ plist() printf(" No advertising router\n"); } free(buf); -#else - struct in6_prlist pr; - int s, i; - struct timeval now; - - gettimeofday(&now, 0); - - if ((s = socket(AF_INET6, SOCK_DGRAM, 0)) < 0) { - err(1, "socket"); - /* NOTREACHED */ - } - bzero(&pr, sizeof(pr)); - strlcpy(pr.ifname, "lo0", sizeof(pr.ifname)); /* dummy */ - if (ioctl(s, SIOCGPRLST_IN6, (caddr_t)&pr) < 0) { - err(1, "ioctl(SIOCGPRLST_IN6)"); - /* NOTREACHED */ - } -#define PR pr.prefix[i] - for (i = 0; PR.if_index && i < PRLSTSIZ ; i++) { - struct sockaddr_in6 p6; - char namebuf[NI_MAXHOST]; - int niflags; - -#ifdef NDPRF_ONLINK - p6 = PR.prefix; -#else - memset(&p6, 0, sizeof(p6)); - p6.sin6_family = AF_INET6; - p6.sin6_len = sizeof(p6); - p6.sin6_addr = PR.prefix; -#endif - niflags = NI_NUMERICHOST; - if (getnameinfo((struct sockaddr *)&p6, - sizeof(p6), namebuf, sizeof(namebuf), - NULL, 0, niflags)) { - warnx("getnameinfo failed"); - continue; - } - printf("%s/%d if=%s\n", namebuf, PR.prefixlen, - if_indextoname(PR.if_index, ifix_buf)); - - gettimeofday(&now, 0); - /* - * meaning of fields, especially flags, is very different - * by origin. notify the difference to the users. - */ -#if 0 - printf(" %s", - PR.origin == PR_ORIG_RA ? "" : "advertise: "); -#endif -#ifdef NDPRF_ONLINK - printf("flags=%s%s%s%s%s", - PR.raflags.onlink ? "L" : "", - PR.raflags.autonomous ? "A" : "", - (PR.flags & NDPRF_ONLINK) != 0 ? "O" : "", - (PR.flags & NDPRF_DETACHED) != 0 ? "D" : "", -#ifdef NDPRF_HOME - (PR.flags & NDPRF_HOME) != 0 ? "H" : "" -#else - "" -#endif - ); -#else - printf("flags=%s%s", - PR.raflags.onlink ? "L" : "", - PR.raflags.autonomous ? "A" : ""); -#endif - if (PR.vltime == ND6_INFINITE_LIFETIME) - printf(" vltime=infinity"); - else - printf(" vltime=%lu", PR.vltime); - if (PR.pltime == ND6_INFINITE_LIFETIME) - printf(", pltime=infinity"); - else - printf(", pltime=%lu", PR.pltime); - if (PR.expire == 0) - printf(", expire=Never"); - else if (PR.expire >= now.tv_sec) - printf(", expire=%s", - sec2str(PR.expire - now.tv_sec)); - else - printf(", expired"); -#ifdef NDPRF_ONLINK - printf(", ref=%d", PR.refcnt); -#endif -#if 0 - switch (PR.origin) { - case PR_ORIG_RA: - printf(", origin=RA"); - break; - case PR_ORIG_RR: - printf(", origin=RR"); - break; - case PR_ORIG_STATIC: - printf(", origin=static"); - break; - case PR_ORIG_KERNEL: - printf(", origin=kernel"); - break; - default: - printf(", origin=?"); - break; - } -#endif - printf("\n"); - /* - * "advertising router" list is meaningful only if the prefix - * information is from RA. - */ - if (0 && /* prefix origin is almost obsolted */ - PR.origin != PR_ORIG_RA) - ; - else if (PR.advrtrs) { - int j; - printf(" advertised by\n"); - for (j = 0; j < PR.advrtrs; j++) { - struct sockaddr_in6 sin6; - struct in6_nbrinfo *nbi; - - bzero(&sin6, sizeof(sin6)); - sin6.sin6_family = AF_INET6; - sin6.sin6_len = sizeof(sin6); - sin6.sin6_addr = PR.advrtr[j]; - sin6.sin6_scope_id = PR.if_index; /* XXX */ - getnameinfo((struct sockaddr *)&sin6, - sin6.sin6_len, host_buf, - sizeof(host_buf), NULL, 0, - (nflag ? NI_NUMERICHOST : 0)); - printf(" %s", host_buf); - - nbi = getnbrinfo(&sin6.sin6_addr, - PR.if_index, 0); - if (nbi) { - switch (nbi->state) { - case ND6_LLINFO_REACHABLE: - case ND6_LLINFO_STALE: - case ND6_LLINFO_DELAY: - case ND6_LLINFO_PROBE: - printf(" (reachable)\n"); - break; - default: - printf(" (unreachable)\n"); - } - } else - printf(" (no neighbor state)\n"); - } - if (PR.advrtrs > DRLSTSIZ) - printf(" and %d routers\n", - PR.advrtrs - DRLSTSIZ); - } else - printf(" No advertising router\n"); - } -#undef PR - close(s); -#endif } -void +static void pfx_flush() { char dummyif[IFNAMSIZ+8]; @@ -1451,7 +1242,7 @@ pfx_flush() err(1, "ioctl(SIOCSPFXFLUSH_IN6)"); } -void +static void rtr_flush() { char dummyif[IFNAMSIZ+8]; @@ -1466,7 +1257,7 @@ rtr_flush() close(s); } -void +static void harmonize_rtr() { char dummyif[IFNAMSIZ+8]; @@ -1483,8 +1274,7 @@ harmonize_rtr() #ifdef SIOCSDEFIFACE_IN6 /* XXX: check SIOCGDEFIFACE_IN6 as well? */ static void -setdefif(ifname) - char *ifname; +setdefif(char *ifname) { struct in6_ndifreq ndifreq; unsigned int ifindex; @@ -1537,8 +1327,7 @@ getdefif() #endif static char * -sec2str(total) - time_t total; +sec2str(time_t total) { static char result[256]; int days, hours, mins, secs; @@ -1583,8 +1372,7 @@ sec2str(total) * from tcpdump/util.c */ static void -ts_print(tvp) - const struct timeval *tvp; +ts_print(const struct timeval *tvp) { int s; diff --git a/usr.sbin/ntp/config.h b/usr.sbin/ntp/config.h index 1aae315..7a4a2d1 100644 --- a/usr.sbin/ntp/config.h +++ b/usr.sbin/ntp/config.h @@ -120,7 +120,7 @@ #define CLOCK_PST 1 /* DCF77 raw time code */ -/* #undef CLOCK_RAWDCF */ +#define CLOCK_RAWDCF 1 /* RCC 8000 clock */ /* #undef CLOCK_RCC8000 */ diff --git a/usr.sbin/pciconf/pathnames.h b/usr.sbin/pciconf/pathnames.h index 719615a..61c0993 100644 --- a/usr.sbin/pciconf/pathnames.h +++ b/usr.sbin/pciconf/pathnames.h @@ -1,3 +1,4 @@ /* $FreeBSD$ */ #define _PATH_DEVPCI "/dev/pci" #define _PATH_PCIVDB "/usr/share/misc/pci_vendors" +#define _PATH_LPCIVDB "/usr/local/share/pciids/pci.ids" diff --git a/usr.sbin/pciconf/pciconf.8 b/usr.sbin/pciconf/pciconf.8 index e0caf1f..8dbb2a6 100644 --- a/usr.sbin/pciconf/pciconf.8 +++ b/usr.sbin/pciconf/pciconf.8 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd January 20, 2014 +.Dd September 06, 2015 .Dt PCICONF 8 .Os .Sh NAME @@ -281,7 +281,9 @@ indicates a halfword (two-byte) operation. The default is to read or write a longword (four bytes). .Sh ENVIRONMENT -The PCI vendor/device information database is normally read from +PCI vendor and device information is read from +.Pa /usr/local/share/pciids/pci.ids . +If that file is not present, it is read from .Pa /usr/share/misc/pci_vendors . This path can be overridden by setting the environment variable .Ev PCICONF_VENDOR_DATABASE . diff --git a/usr.sbin/pciconf/pciconf.c b/usr.sbin/pciconf/pciconf.c index 22bb61d..ad709db 100644 --- a/usr.sbin/pciconf/pciconf.c +++ b/usr.sbin/pciconf/pciconf.c @@ -542,9 +542,12 @@ load_vendors(void) */ TAILQ_INIT(&pci_vendors); if ((dbf = getenv("PCICONF_VENDOR_DATABASE")) == NULL) + dbf = _PATH_LPCIVDB; + if ((db = fopen(dbf, "r")) == NULL) { dbf = _PATH_PCIVDB; - if ((db = fopen(dbf, "r")) == NULL) - return(1); + if ((db = fopen(dbf, "r")) == NULL) + return(1); + } cv = NULL; cd = NULL; error = 0; diff --git a/usr.sbin/pkg/config.c b/usr.sbin/pkg/config.c index 3a1f18d..b0271f5 100644 --- a/usr.sbin/pkg/config.c +++ b/usr.sbin/pkg/config.c @@ -131,6 +131,15 @@ static struct config_entry c[] = { false, true, }, + [PUBKEY] = { + PKG_CONFIG_STRING, + "PUBKEY", + NULL, + NULL, + NULL, + false, + false + } }; static int @@ -231,6 +240,8 @@ config_parse(const ucl_object_t *obj, pkg_conf_file_t conftype) sbuf_cpy(buf, "SIGNATURE_TYPE"); else if (strcasecmp(key, "fingerprints") == 0) sbuf_cpy(buf, "FINGERPRINTS"); + else if (strcasecmp(key, "pubkey") == 0) + sbuf_cpy(buf, "PUBKEY"); else if (strcasecmp(key, "enabled") == 0) { if ((cur->type != UCL_BOOLEAN) || !ucl_object_toboolean(cur)) diff --git a/usr.sbin/pkg/config.h b/usr.sbin/pkg/config.h index ea0d000..4ff0a16 100644 --- a/usr.sbin/pkg/config.h +++ b/usr.sbin/pkg/config.h @@ -40,6 +40,7 @@ typedef enum { SIGNATURE_TYPE, FINGERPRINTS, REPOS_DIR, + PUBKEY, CONFIG_SIZE } pkg_config_key; diff --git a/usr.sbin/pkg/pkg.c b/usr.sbin/pkg/pkg.c index e242638..fc2192d 100644 --- a/usr.sbin/pkg/pkg.c +++ b/usr.sbin/pkg/pkg.c @@ -65,6 +65,11 @@ struct sig_cert { bool trusted; }; +struct pubkey { + unsigned char *sig; + int siglen; +}; + typedef enum { HASH_UNKNOWN, HASH_SHA256, @@ -470,6 +475,25 @@ cleanup: } static EVP_PKEY * +load_public_key_file(const char *file) +{ + EVP_PKEY *pkey; + BIO *bp; + char errbuf[1024]; + + bp = BIO_new_file(file, "r"); + if (!bp) + errx(EXIT_FAILURE, "Unable to read %s", file); + + if ((pkey = PEM_read_bio_PUBKEY(bp, NULL, NULL, NULL)) == NULL) + warnx("ici: %s", ERR_error_string(ERR_get_error(), errbuf)); + + BIO_free(bp); + + return (pkey); +} + +static EVP_PKEY * load_public_key_buf(const unsigned char *cert, int certlen) { EVP_PKEY *pkey; @@ -487,8 +511,8 @@ load_public_key_buf(const unsigned char *cert, int certlen) } static bool -rsa_verify_cert(int fd, const unsigned char *key, int keylen, - unsigned char *sig, int siglen) +rsa_verify_cert(int fd, const char *sigfile, const unsigned char *key, + int keylen, unsigned char *sig, int siglen) { EVP_MD_CTX *mdctx; EVP_PKEY *pkey; @@ -500,6 +524,8 @@ rsa_verify_cert(int fd, const unsigned char *key, int keylen, mdctx = NULL; ret = false; + SSL_load_error_strings(); + /* Compute SHA256 of the package. */ if (lseek(fd, 0, 0) == -1) { warn("lseek"); @@ -510,9 +536,16 @@ rsa_verify_cert(int fd, const unsigned char *key, int keylen, goto cleanup; } - if ((pkey = load_public_key_buf(key, keylen)) == NULL) { - warnx("Error reading public key"); - goto cleanup; + if (sigfile != NULL) { + if ((pkey = load_public_key_file(sigfile)) == NULL) { + warnx("Error reading public key"); + goto cleanup; + } + } else { + if ((pkey = load_public_key_buf(key, keylen)) == NULL) { + warnx("Error reading public key"); + goto cleanup; + } } /* Verify signature of the SHA256(pkg) is valid. */ @@ -552,6 +585,35 @@ cleanup: return (ret); } +static struct pubkey * +read_pubkey(int fd) +{ + struct pubkey *pk; + struct sbuf *sig; + char buf[4096]; + int r; + + if (lseek(fd, 0, 0) == -1) { + warn("lseek"); + return (NULL); + } + + sig = sbuf_new_auto(); + + while ((r = read(fd, buf, sizeof(buf))) >0) { + sbuf_bcat(sig, buf, r); + } + + sbuf_finish(sig); + pk = calloc(1, sizeof(struct pubkey)); + pk->siglen = sbuf_len(sig); + pk->sig = calloc(1, pk->siglen); + memcpy(pk->sig, sbuf_data(sig), pk->siglen); + sbuf_delete(sig); + + return (pk); +} + static struct sig_cert * parse_cert(int fd) { int my_fd; @@ -625,6 +687,45 @@ parse_cert(int fd) { } static bool +verify_pubsignature(int fd_pkg, int fd_sig) +{ + struct pubkey *pk; + const char *pubkey; + bool ret; + + pk = NULL; + pubkey = NULL; + ret = false; + if (config_string(PUBKEY, &pubkey) != 0) { + warnx("No CONFIG_PUBKEY defined"); + goto cleanup; + } + + if ((pk = read_pubkey(fd_sig)) == NULL) { + warnx("Error reading signature"); + goto cleanup; + } + + /* Verify the signature. */ + printf("Verifying signature with public key %s... ", pubkey); + if (rsa_verify_cert(fd_pkg, pubkey, NULL, 0, pk->sig, + pk->siglen) == false) { + fprintf(stderr, "Signature is not valid\n"); + goto cleanup; + } + + ret = true; + +cleanup: + if (pk) { + free(pk->sig); + free(pk); + } + + return (ret); +} + +static bool verify_signature(int fd_pkg, int fd_sig) { struct fingerprint_list *trusted, *revoked; @@ -702,7 +803,7 @@ verify_signature(int fd_pkg, int fd_sig) /* Verify the signature. */ printf("Verifying signature with trusted certificate %s... ", sc->name); - if (rsa_verify_cert(fd_pkg, sc->cert, sc->certlen, sc->sig, + if (rsa_verify_cert(fd_pkg, NULL, sc->cert, sc->certlen, sc->sig, sc->siglen) == false) { fprintf(stderr, "Signature is not valid\n"); goto cleanup; @@ -768,24 +869,42 @@ bootstrap_pkg(bool force) if (signature_type != NULL && strcasecmp(signature_type, "NONE") != 0) { - if (strcasecmp(signature_type, "FINGERPRINTS") != 0) { - warnx("Signature type %s is not supported for " - "bootstrapping.", signature_type); - goto cleanup; - } + if (strcasecmp(signature_type, "FINGERPRINTS") == 0) { - snprintf(tmpsig, MAXPATHLEN, "%s/pkg.txz.sig.XXXXXX", - getenv("TMPDIR") ? getenv("TMPDIR") : _PATH_TMP); - snprintf(url, MAXPATHLEN, "%s/Latest/pkg.txz.sig", - packagesite); + snprintf(tmpsig, MAXPATHLEN, "%s/pkg.txz.sig.XXXXXX", + getenv("TMPDIR") ? getenv("TMPDIR") : _PATH_TMP); + snprintf(url, MAXPATHLEN, "%s/Latest/pkg.txz.sig", + packagesite); - if ((fd_sig = fetch_to_fd(url, tmpsig)) == -1) { - fprintf(stderr, "Signature for pkg not available.\n"); - goto fetchfail; - } + if ((fd_sig = fetch_to_fd(url, tmpsig)) == -1) { + fprintf(stderr, "Signature for pkg not " + "available.\n"); + goto fetchfail; + } + + if (verify_signature(fd_pkg, fd_sig) == false) + goto cleanup; + } else if (strcasecmp(signature_type, "PUBKEY") == 0) { - if (verify_signature(fd_pkg, fd_sig) == false) + snprintf(tmpsig, MAXPATHLEN, + "%s/pkg.txz.pubkeysig.XXXXXX", + getenv("TMPDIR") ? getenv("TMPDIR") : _PATH_TMP); + snprintf(url, MAXPATHLEN, "%s/Latest/pkg.txz.pubkeysig", + packagesite); + + if ((fd_sig = fetch_to_fd(url, tmpsig)) == -1) { + fprintf(stderr, "Signature for pkg not " + "available.\n"); + goto fetchfail; + } + + if (verify_pubsignature(fd_pkg, fd_sig) == false) + goto cleanup; + } else { + warnx("Signature type %s is not supported for " + "bootstrapping.", signature_type); goto cleanup; + } } if ((ret = extract_pkg_static(fd_pkg, pkgstatic, MAXPATHLEN)) == 0) @@ -862,21 +981,37 @@ bootstrap_pkg_local(const char *pkgpath, bool force) } if (signature_type != NULL && strcasecmp(signature_type, "NONE") != 0) { - if (strcasecmp(signature_type, "FINGERPRINTS") != 0) { - warnx("Signature type %s is not supported for " - "bootstrapping.", signature_type); - goto cleanup; - } + if (strcasecmp(signature_type, "FINGERPRINTS") == 0) { - snprintf(path, sizeof(path), "%s.sig", pkgpath); + snprintf(path, sizeof(path), "%s.sig", pkgpath); - if ((fd_sig = open(path, O_RDONLY)) == -1) { - fprintf(stderr, "Signature for pkg not available.\n"); - goto cleanup; - } + if ((fd_sig = open(path, O_RDONLY)) == -1) { + fprintf(stderr, "Signature for pkg not " + "available.\n"); + goto cleanup; + } - if (verify_signature(fd_pkg, fd_sig) == false) + if (verify_signature(fd_pkg, fd_sig) == false) + goto cleanup; + + } else if (strcasecmp(signature_type, "PUBKEY") == 0) { + + snprintf(path, sizeof(path), "%s.pubkeysig", pkgpath); + + if ((fd_sig = open(path, O_RDONLY)) == -1) { + fprintf(stderr, "Signature for pkg not " + "available.\n"); + goto cleanup; + } + + if (verify_pubsignature(fd_pkg, fd_sig) == false) + goto cleanup; + + } else { + warnx("Signature type %s is not supported for " + "bootstrapping.", signature_type); goto cleanup; + } } if ((ret = extract_pkg_static(fd_pkg, pkgstatic, MAXPATHLEN)) == 0) diff --git a/usr.sbin/pw/pw_user.c b/usr.sbin/pw/pw_user.c index 61d468a..f133147 100644 --- a/usr.sbin/pw/pw_user.c +++ b/usr.sbin/pw/pw_user.c @@ -1694,6 +1694,7 @@ pw_user_mod(int argc, char **argv, char *arg1) if (homedir && strcmp(pwd->pw_dir, homedir) != 0) { pwd->pw_dir = homedir; + edited = true; if (fstatat(conf.rootfd, pwd->pw_dir, &st, 0) == -1) { if (!createhome) warnx("WARNING: home `%s' does not exist", |