diff options
270 files changed, 18326 insertions, 8713 deletions
diff --git a/Makefile.inc1 b/Makefile.inc1 index 9fefae8..4dcbc0f 100644 --- a/Makefile.inc1 +++ b/Makefile.inc1 @@ -929,7 +929,7 @@ packageworld: @${DESTDIR}/${DISTDIR}/${dist}.debug.meta . else ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ - tar cvJfL ${DESTDIR}/${DISTDIR}/${dist}-dbg.txz \ + tar cvJLf ${DESTDIR}/${DISTDIR}/${dist}-dbg.txz \ usr/lib/debug . endif .endfor diff --git a/ObsoleteFiles.inc b/ObsoleteFiles.inc index c29e87d..0c969ff 100644 --- a/ObsoleteFiles.inc +++ b/ObsoleteFiles.inc @@ -93,6 +93,8 @@ OLD_FILES+=usr/lib/clang/3.5.1/lib/freebsd/libclang_rt.ubsan_cxx-x86_64.a OLD_DIRS+=usr/lib/clang/3.5.1/lib/freebsd OLD_DIRS+=usr/lib/clang/3.5.1/lib OLD_DIRS+=usr/lib/clang/3.5.1 +# 20150217: Removed remnants of ar(4) driver +OLD_FILES+=usr/include/sys/dev/ic/hd64570.h # 20150212: /usr/games moving into /usr/bin OLD_FILES+=usr/games/bcd OLD_FILES+=usr/games/caesar diff --git a/bin/ln/symlink.7 b/bin/ln/symlink.7 index 821a312..089c010 100644 --- a/bin/ln/symlink.7 +++ b/bin/ln/symlink.7 @@ -29,7 +29,7 @@ .\" @(#)symlink.7 8.3 (Berkeley) 3/31/94 .\" $FreeBSD$ .\" -.Dd January 23, 2015 +.Dd February 16, 2015 .Dt SYMLINK 7 .Os .Sh NAME @@ -146,6 +146,7 @@ The following system calls follow symbolic links unless given the .Dv AT_SYMLINK_NOFOLLOW flag: +.Xr chflagsat 2 , .Xr fchmodat 2 , .Xr fchownat 2 , .Xr fstatat 2 diff --git a/bin/pkill/tests/pkill-j_test.sh b/bin/pkill/tests/pkill-j_test.sh index a844149..5635df3 100644 --- a/bin/pkill/tests/pkill-j_test.sh +++ b/bin/pkill/tests/pkill-j_test.sh @@ -4,99 +4,90 @@ jail_name_to_jid() { local check_name="$1" - ( - line="$(jls -n 2> /dev/null | grep name=$check_name )" - for nv in $line; do - local name="${nv%=*}" - if [ "${name}" = "jid" ]; then - eval $nv - echo $jid - break - fi - done - ) + jls -j "$check_name" -s 2>/dev/null | tr ' ' '\n' | grep jid= | sed -e 's/.*=//g' } base=pkill_j_test +if [ `id -u` -ne 0 ]; then + echo "1..0 # skip Test needs uid 0." + exit 0 +fi + echo "1..3" +sleep=$(pwd)/sleep.txt +ln -sf /bin/sleep $sleep + name="pkill -j <jid>" -if [ `id -u` -eq 0 ]; then - sleep=$(pwd)/sleep.txt - ln -sf /bin/sleep $sleep - jail -c path=/ name=${base}_1_1 ip4.addr=127.0.0.1 \ - command=daemon -p ${PWD}/${base}_1_1.pid $sleep 5 & +sleep_amount=5 +jail -c path=/ name=${base}_1_1 ip4.addr=127.0.0.1 \ + command=daemon -p ${PWD}/${base}_1_1.pid $sleep $sleep_amount & - jail -c path=/ name=${base}_1_2 ip4.addr=127.0.0.1 \ - command=daemon -p ${PWD}/${base}_1_2.pid $sleep 5 & +jail -c path=/ name=${base}_1_2 ip4.addr=127.0.0.1 \ + command=daemon -p ${PWD}/${base}_1_2.pid $sleep $sleep_amount & - $sleep 5 & - sleep 0.5 +$sleep $sleep_amount & + +for i in `seq 1 10`; do jid1=$(jail_name_to_jid ${base}_1_1) jid2=$(jail_name_to_jid ${base}_1_2) jid="${jid1},${jid2}" - if pkill -f -j "$jid" $sleep && sleep 0.5 && - ! -f ${PWD}/${base}_1_1.pid && - ! -f ${PWD}/${base}_1_2.pid ; then - echo "ok 1 - $name" - else - echo "not ok 1 - $name" - fi 2>/dev/null - rm -f $sleep - [ -f ${PWD}/${base}_1_1.pid ] && kill $(cat ${PWD}/${base}_1_1.pid) - [ -f ${PWD}/${base}_1_2.pid ] && kill $(cat ${PWD}/${base}_1_2.pid) - wait + case "$jid" in + [0-9]+,[0-9]+) + break + ;; + esac + sleep 0.1 +done + +if pkill -f -j "$jid" $sleep && sleep 0.5 && + ! -f ${PWD}/${base}_1_1.pid && + ! -f ${PWD}/${base}_1_2.pid ; then + echo "ok 1 - $name" else - echo "ok 1 - $name # skip Test needs uid 0." -fi + echo "not ok 1 - $name" +fi 2>/dev/null +[ -f ${PWD}/${base}_1_1.pid ] && kill $(cat ${PWD}/${base}_1_1.pid) +[ -f ${PWD}/${base}_1_2.pid ] && kill $(cat ${PWD}/${base}_1_2.pid) +wait name="pkill -j any" -if [ `id -u` -eq 0 ]; then - sleep=$(pwd)/sleep.txt - ln -sf /bin/sleep $sleep - jail -c path=/ name=${base}_2_1 ip4.addr=127.0.0.1 \ - command=daemon -p ${PWD}/${base}_2_1.pid $sleep 5 & +sleep_amount=6 +jail -c path=/ name=${base}_2_1 ip4.addr=127.0.0.1 \ + command=daemon -p ${PWD}/${base}_2_1.pid $sleep $sleep_amount & - jail -c path=/ name=${base}_2_2 ip4.addr=127.0.0.1 \ - command=daemon -p ${PWD}/${base}_2_2.pid $sleep 5 & +jail -c path=/ name=${base}_2_2 ip4.addr=127.0.0.1 \ + command=daemon -p ${PWD}/${base}_2_2.pid $sleep $sleep_amount & - $sleep 5 & - sleep 0.5 - chpid3=$! - if pkill -f -j any $sleep && sleep 0.5 && - [ ! -f ${PWD}/${base}_2_1.pid -a - ! -f ${PWD}/${base}_2_2.pid ] && kill $chpid3; then - echo "ok 2 - $name" - else - echo "not ok 2 - $name" - fi 2>/dev/null - rm -f $sleep - [ -f ${PWD}/${base}_2_1.pid ] && kill $(cat ${PWD}/${base}_2_1.pid) - [ -f ${PWD}/${base}_2_2.pid ] && kill $(cat ${PWD}/${base}_2_2.pid) - wait +$sleep $sleep_amount & +chpid3=$! +sleep 0.5 +if pkill -f -j any $sleep && sleep 0.5 && + [ ! -f ${PWD}/${base}_2_1.pid -a + ! -f ${PWD}/${base}_2_2.pid ] && kill $chpid3; then + echo "ok 2 - $name" else - echo "ok 2 - $name # skip Test needs uid 0." -fi + echo "not ok 2 - $name" +fi 2>/dev/null +[ -f ${PWD}/${base}_2_1.pid ] && kill $(cat ${PWD}/${base}_2_1.pid) +[ -f ${PWD}/${base}_2_2.pid ] && kill $(cat ${PWD}/${base}_2_2.pid) +wait name="pkill -j none" -if [ `id -u` -eq 0 ]; then - sleep=$(pwd)/sleep.txt - ln -sf /bin/sleep $sleep - daemon -p ${PWD}/${base}_3_1.pid $sleep 5 - jail -c path=/ name=${base}_3_2 ip4.addr=127.0.0.1 \ - command=daemon -p ${PWD}/${base}_3_2.pid $sleep 5 & - sleep 1 - if pkill -f -j none "$sleep 5" && sleep 1 && - [ ! -f ${PWD}/${base}_3_1.pid -a -f ${PWD}/${base}_3_2.pid ] ; then - echo "ok 3 - $name" - else - ls ${PWD}/*.pid - echo "not ok 3 - $name" - fi 2>/dev/null - rm -f $sleep - [ -f ${PWD}/${base}_3_1.pid ] && kill $(cat ${base}_3_1.pid) - [ -f ${PWD}/${base}_3_2.pid ] && kill $(cat ${base}_3_2.pid) +sleep_amount=7 +daemon -p ${PWD}/${base}_3_1.pid $sleep $sleep_amount +jail -c path=/ name=${base}_3_2 ip4.addr=127.0.0.1 \ + command=daemon -p ${PWD}/${base}_3_2.pid $sleep $sleep_amount & +sleep 1 +if pkill -f -j none "$sleep $sleep_amount" && sleep 1 && + [ ! -f ${PWD}/${base}_3_1.pid -a -f ${PWD}/${base}_3_2.pid ] ; then + echo "ok 3 - $name" else - echo "ok 3 - $name # skip Test needs uid 0." -fi + ls ${PWD}/*.pid + echo "not ok 3 - $name" +fi 2>/dev/null +[ -f ${PWD}/${base}_3_1.pid ] && kill $(cat ${base}_3_1.pid) +[ -f ${PWD}/${base}_3_2.pid ] && kill $(cat ${base}_3_2.pid) + +rm -f $sleep diff --git a/bin/sh/cd.c b/bin/sh/cd.c index 7720fad..88f03f5 100644 --- a/bin/sh/cd.c +++ b/bin/sh/cd.c @@ -122,7 +122,7 @@ cdcmd(int argc __unused, char **argv __unused) (dest[0] == '.' && (dest[1] == '/' || dest[1] == '\0')) || (dest[0] == '.' && dest[1] == '.' && (dest[2] == '/' || dest[2] == '\0')) || (path = bltinlookup("CDPATH", 1)) == NULL) - path = nullstr; + path = ""; while ((p = padvance(&path, dest)) != NULL) { if (stat(p, &statb) < 0) { if (errno != ENOENT) @@ -182,7 +182,6 @@ cdlogical(char *dest) struct stat statb; int first; int badstat; - size_t len; /* * Check each component of the path. If we find a symlink or @@ -190,9 +189,7 @@ cdlogical(char *dest) * next time we get the value of the current directory. */ badstat = 0; - len = strlen(dest); - cdcomppath = stalloc(len + 1); - memcpy(cdcomppath, dest, len + 1); + cdcomppath = stsavestr(dest); STARTSTACKSTR(p); if (*dest == '/') { STPUTC('/', p); @@ -277,7 +274,6 @@ findcwd(char *dir) { char *new; char *p; - size_t len; /* * If our argument is NULL, we don't know the current directory @@ -286,9 +282,7 @@ findcwd(char *dir) */ if (dir == NULL || curdir == NULL) return getpwd2(); - len = strlen(dir); - cdcomppath = stalloc(len + 1); - memcpy(cdcomppath, dir, len + 1); + cdcomppath = stsavestr(dir); STARTSTACKSTR(new); if (*dir != '/') { STPUTS(curdir, new); diff --git a/bin/sh/eval.c b/bin/sh/eval.c index 486de9c..347824a 100644 --- a/bin/sh/eval.c +++ b/bin/sh/eval.c @@ -498,7 +498,7 @@ exphere(union node *redir, struct arglist *fn) struct localvar *savelocalvars; int need_longjmp = 0; - redir->nhere.expdoc = nullstr; + redir->nhere.expdoc = ""; savelocalvars = localvars; localvars = NULL; forcelocal++; diff --git a/bin/sh/expand.c b/bin/sh/expand.c index b542303..e1c1a2e 100644 --- a/bin/sh/expand.c +++ b/bin/sh/expand.c @@ -105,11 +105,12 @@ static void expbackq(union node *, int, int); static int subevalvar(char *, char *, int, int, int, int, int); static char *evalvar(char *, int); static int varisset(const char *, int); +static void strtodest(const char *, int, int, int); static void varvalue(const char *, int, int, int); static void recordregion(int, int, int); static void removerecordregions(int); static void ifsbreakup(char *, struct arglist *); -static void expandmeta(struct strlist *, int); +static void expandmeta(struct strlist *); static void expmeta(char *, char *); static void addfname(char *); static struct strlist *expsort(struct strlist *); @@ -175,7 +176,7 @@ expandarg(union node *arg, struct arglist *arglist, int flag) ifsbreakup(p, &exparg); *exparg.lastp = NULL; exparg.lastp = &exparg.list; - expandmeta(exparg.list, flag); + expandmeta(exparg.list); } else { sp = (struct strlist *)stalloc(sizeof (struct strlist)); sp->text = p; @@ -298,9 +299,9 @@ exptilde(char *p, int flag) char c, *startp = p; struct passwd *pw; char *home; - int quotes = flag & (EXP_FULL | EXP_CASE); - while ((c = *p) != '\0') { + for (;;) { + c = *p; switch(c) { case CTLESC: /* This means CTL* are always considered quoted. */ case CTLVAR: @@ -311,31 +312,27 @@ exptilde(char *p, int flag) case CTLQUOTEMARK: return (startp); case ':': - if (flag & EXP_VARTILDE) - goto done; - break; + if ((flag & EXP_VARTILDE) == 0) + break; + /* FALLTHROUGH */ + case '\0': case '/': case CTLENDVAR: - goto done; + *p = '\0'; + if (*(startp+1) == '\0') { + home = lookupvar("HOME"); + } else { + pw = getpwnam(startp+1); + home = pw != NULL ? pw->pw_dir : NULL; + } + *p = c; + if (home == NULL || *home == '\0') + return (startp); + strtodest(home, flag, VSNORMAL, 1); + return (p); } p++; } -done: - *p = '\0'; - if (*(startp+1) == '\0') { - home = lookupvar("HOME"); - } else { - pw = getpwnam(startp+1); - home = pw != NULL ? pw->pw_dir : NULL; - } - *p = c; - if (home == NULL || *home == '\0') - return (startp); - if (quotes) - STPUTS_QUOTES(home, DQSYNTAX, expdest); - else - STPUTS(home, expdest); - return (p); } @@ -496,6 +493,17 @@ expbackq(union node *cmd, int quoted, int flag) +static void +recordleft(const char *str, const char *loc, char *startp) +{ + int amount; + + amount = ((str - 1) - (loc - startp)) - expdest; + STADJUST(amount, expdest); + while (loc != str - 1) + *startp++ = *loc++; +} + static int subevalvar(char *p, char *str, int strloc, int subtype, int startloc, int varflags, int quotes) @@ -530,8 +538,7 @@ subevalvar(char *p, char *str, int strloc, int subtype, int startloc, error((char *)NULL); } error("%.*s: parameter %snot set", (int)(p - str - 1), - str, (varflags & VSNUL) ? "null or " - : nullstr); + str, (varflags & VSNUL) ? "null or " : ""); return 0; case VSTRIMLEFT: @@ -540,7 +547,8 @@ subevalvar(char *p, char *str, int strloc, int subtype, int startloc, *loc = '\0'; if (patmatch(str, startp, quotes)) { *loc = c; - goto recordleft; + recordleft(str, loc, startp); + return 1; } *loc = c; if (quotes && *loc == CTLESC) @@ -554,7 +562,8 @@ subevalvar(char *p, char *str, int strloc, int subtype, int startloc, *loc = '\0'; if (patmatch(str, startp, quotes)) { *loc = c; - goto recordleft; + recordleft(str, loc, startp); + return 1; } *loc = c; loc--; @@ -602,13 +611,6 @@ subevalvar(char *p, char *str, int strloc, int subtype, int startloc, default: abort(); } - -recordleft: - amount = ((str - 1) - (loc - startp)) - expdest; - STADJUST(amount, expdest); - while (loc != str - 1) - *startp++ = *loc++; - return 1; } @@ -633,6 +635,7 @@ evalvar(char *p, int flag) int varlenb; int easy; int quotes = flag & (EXP_FULL | EXP_CASE); + int record; varflags = (unsigned char)*p++; subtype = varflags & VSTYPE; @@ -690,22 +693,15 @@ again: /* jump here after setting a variable with ${var=text} */ STADJUST(-varlenb, expdest); } } else { - char const *syntax = (varflags & VSQUOTE) ? DQSYNTAX - : BASESYNTAX; - if (subtype == VSLENGTH) { for (;*val; val++) if (!localeisutf8 || (*val & 0xC0) != 0x80) varlen++; } - else { - if (quotes) - STPUTS_QUOTES(val, syntax, expdest); - else - STPUTS(val, expdest); - - } + else + strtodest(val, flag, subtype, + varflags & VSQUOTE); } } @@ -719,15 +715,11 @@ again: /* jump here after setting a variable with ${var=text} */ switch (subtype) { case VSLENGTH: expdest = cvtnum(varlen, expdest); - goto record; + record = 1; + break; case VSNORMAL: - if (!easy) - break; -record: - recordregion(startloc, expdest - stackblock(), - varflags & VSQUOTE || (ifsset() && ifsval()[0] == '\0' && - (*var == '@' || *var == '*'))); + record = easy; break; case VSPLUS: @@ -737,8 +729,7 @@ record: (varflags & VSQUOTE ? EXP_LIT_QUOTED : 0)); break; } - if (easy) - goto record; + record = easy; break; case VSTRIMLEFT: @@ -760,7 +751,8 @@ record: } /* Remove any recorded regions beyond start of variable */ removerecordregions(startloc); - goto record; + record = 1; + break; case VSASSIGN: case VSQUESTION: @@ -777,8 +769,7 @@ record: } break; } - if (easy) - goto record; + record = easy; break; case VSERROR: @@ -790,6 +781,11 @@ record: abort(); } + if (record) + recordregion(startloc, expdest - stackblock(), + varflags & VSQUOTE || (ifsset() && ifsval()[0] == '\0' && + (*var == '@' || *var == '*'))); + if (subtype != VSNORMAL) { /* skip to end of alternative */ int nesting = 1; for (;;) { @@ -1093,7 +1089,7 @@ static char expdir[PATH_MAX]; * The results are stored in the list exparg. */ static void -expandmeta(struct strlist *str, int flag __unused) +expandmeta(struct strlist *str) { char *p; struct strlist **savelastp; @@ -1284,11 +1280,8 @@ addfname(char *name) { char *p; struct strlist *sp; - size_t len; - len = strlen(name); - p = stalloc(len + 1); - memcpy(p, name, len + 1); + p = stsavestr(name); sp = (struct strlist *)stalloc(sizeof *sp); sp->text = p; *exparg.lastp = sp; @@ -1478,16 +1471,14 @@ patmatch(const char *pattern, const char *string, int squoted) endp = p; if (*endp == '!' || *endp == '^') endp++; - for (;;) { + do { while (*endp == CTLQUOTEMARK) endp++; if (*endp == 0) goto dft; /* no matching ] */ if (*endp == CTLESC) endp++; - if (*++endp == ']') - break; - } + } while (*++endp != ']'); invert = 0; if (*p == '!' || *p == '^') { invert++; diff --git a/bin/sh/mail.c b/bin/sh/mail.c index 597e733..720cab0 100644 --- a/bin/sh/mail.c +++ b/bin/sh/mail.c @@ -85,7 +85,7 @@ chkmail(int silent) setstackmark(&smark); mpath = mpathset()? mpathval() : mailval(); for (i = 0 ; i < nmboxes ; i++) { - p = padvance(&mpath, nullstr); + p = padvance(&mpath, ""); if (p == NULL) break; if (*p == '\0') diff --git a/bin/sh/memalloc.c b/bin/sh/memalloc.c index 119f12e..a04020f 100644 --- a/bin/sh/memalloc.c +++ b/bin/sh/memalloc.c @@ -180,6 +180,18 @@ stunalloc(pointer p) } +char * +stsavestr(const char *s) +{ + char *p; + size_t len; + + len = strlen(s); + p = stalloc(len + 1); + memcpy(p, s, len + 1); + return p; +} + void setstackmark(struct stackmark *mark) diff --git a/bin/sh/memalloc.h b/bin/sh/memalloc.h index a22fa39..e8df7cb 100644 --- a/bin/sh/memalloc.h +++ b/bin/sh/memalloc.h @@ -52,6 +52,7 @@ void ckfree(pointer); char *savestr(const char *); pointer stalloc(int); void stunalloc(pointer); +char *stsavestr(const char *); void setstackmark(struct stackmark *); void popstackmark(struct stackmark *); char *growstackstr(void); diff --git a/bin/sh/miscbltin.c b/bin/sh/miscbltin.c index 027d8ae..715e324 100644 --- a/bin/sh/miscbltin.c +++ b/bin/sh/miscbltin.c @@ -265,7 +265,7 @@ readcmd(int argc __unused, char **argv __unused) /* Set any remaining args to "" */ while (*++ap != NULL) - setvar(*ap, nullstr, 0); + setvar(*ap, "", 0); return status; } diff --git a/bin/sh/nodetypes b/bin/sh/nodetypes index 603c777..d480093 100644 --- a/bin/sh/nodetypes +++ b/bin/sh/nodetypes @@ -138,7 +138,7 @@ NXHERE nhere # fd<<! fd int # file descriptor being redirected next nodeptr # next redirection in list doc nodeptr # input to command (NARG node) - expdoc temp char *expdoc # actual document (for NXHERE) + expdoc temp const char *expdoc # actual document (for NXHERE) NNOT nnot # ! command (actually pipeline) type int diff --git a/bin/sh/parser.c b/bin/sh/parser.c index 0048314..2bba84e 100644 --- a/bin/sh/parser.c +++ b/bin/sh/parser.c @@ -1940,7 +1940,7 @@ getprompt(void *unused __unused) */ switch (whichprompt) { case 0: - fmt = nullstr; + fmt = ""; break; case 1: fmt = ps1val(); diff --git a/bin/sh/tests/expansion/Makefile b/bin/sh/tests/expansion/Makefile index 027bc95..0c3e89e 100644 --- a/bin/sh/tests/expansion/Makefile +++ b/bin/sh/tests/expansion/Makefile @@ -48,6 +48,7 @@ FILES+= ifs1.0 FILES+= ifs2.0 FILES+= ifs3.0 FILES+= ifs4.0 +FILES+= ifs5.0 FILES+= length1.0 FILES+= length2.0 FILES+= length3.0 @@ -62,6 +63,7 @@ FILES+= pathname1.0 FILES+= pathname2.0 FILES+= pathname3.0 FILES+= pathname4.0 +FILES+= pathname5.0 FILES+= plus-minus1.0 FILES+= plus-minus2.0 FILES+= plus-minus3.0 diff --git a/bin/sh/tests/expansion/ifs5.0 b/bin/sh/tests/expansion/ifs5.0 new file mode 100644 index 0000000..ab0e646 --- /dev/null +++ b/bin/sh/tests/expansion/ifs5.0 @@ -0,0 +1,4 @@ +# $FreeBSD$ + +set -- $(echo a b c d) +[ "$#" = 4 ] diff --git a/bin/sh/tests/expansion/pathname5.0 b/bin/sh/tests/expansion/pathname5.0 new file mode 100644 index 0000000..bc27812 --- /dev/null +++ b/bin/sh/tests/expansion/pathname5.0 @@ -0,0 +1,3 @@ +# $FreeBSD$ + +[ `echo '/[e]tc'` = /etc ] diff --git a/bin/sh/var.c b/bin/sh/var.c index ebeff16..def1e0c 100644 --- a/bin/sh/var.c +++ b/bin/sh/var.c @@ -872,7 +872,7 @@ unsetvar(const char *s) if (vp->flags & VREADONLY) return (1); if (vp->text[vp->name_len + 1] != '\0') - setvar(s, nullstr, 0); + setvar(s, "", 0); if ((vp->flags & VEXPORT) && localevar(vp->text)) { change_env(s, 0); setlocale(LC_ALL, ""); diff --git a/contrib/elftoolchain/addr2line/addr2line.c b/contrib/elftoolchain/addr2line/addr2line.c index e1db599..6dcf19c 100644 --- a/contrib/elftoolchain/addr2line/addr2line.c +++ b/contrib/elftoolchain/addr2line/addr2line.c @@ -40,7 +40,7 @@ #include "_elftc.h" -ELFTC_VCSID("$Id: addr2line.c 2185 2011-11-19 16:07:16Z jkoshy $"); +ELFTC_VCSID("$Id: addr2line.c 3148 2015-02-15 18:47:39Z emaste $"); static struct option longopts[] = { {"target" , required_argument, NULL, 'b'}, diff --git a/contrib/elftoolchain/common/_elftc.h b/contrib/elftoolchain/common/_elftc.h index 0b8c77c..d6c8784 100644 --- a/contrib/elftoolchain/common/_elftc.h +++ b/contrib/elftoolchain/common/_elftc.h @@ -23,7 +23,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: _elftc.h 2922 2013-03-17 22:53:15Z kaiwang27 $ + * $Id: _elftc.h 3139 2015-01-05 03:17:06Z kaiwang27 $ */ /** @@ -76,10 +76,17 @@ * SUCH DAMAGE. */ +#ifndef LIST_FOREACH_SAFE +#define LIST_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = LIST_FIRST((head)); \ + (var) && ((tvar) = LIST_NEXT((var), field), 1); \ + (var) = (tvar)) +#endif + #ifndef SLIST_FOREACH_SAFE -#define SLIST_FOREACH_SAFE(var, head, field, tvar) \ - for ((var) = SLIST_FIRST((head)); \ - (var) && ((tvar) = SLIST_NEXT((var), field), 1); \ +#define SLIST_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = SLIST_FIRST((head)); \ + (var) && ((tvar) = SLIST_NEXT((var), field), 1); \ (var) = (tvar)) #endif diff --git a/contrib/elftoolchain/common/elfdefinitions.h b/contrib/elftoolchain/common/elfdefinitions.h index 8b28aeb..f0a2fc2 100644 --- a/contrib/elftoolchain/common/elfdefinitions.h +++ b/contrib/elftoolchain/common/elfdefinitions.h @@ -23,7 +23,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: elfdefinitions.h 3110 2014-12-20 08:32:46Z kaiwang27 $ + * $Id: elfdefinitions.h 3149 2015-02-15 19:00:06Z emaste $ */ /* @@ -1396,6 +1396,12 @@ _ELF_DEFINE_RELOC(R_386_8, 22) \ _ELF_DEFINE_RELOC(R_386_PC8, 23) /* + */ +#define _ELF_DEFINE_AARCH64_RELOCATIONS() \ +_ELF_DEFINE_RELOC(R_AARCH64_ABS64, 257) \ +_ELF_DEFINE_RELOC(R_AARCH64_ABS32, 258) \ + +/* * These are the symbols used in the Sun ``Linkers and Loaders * Guide'', Document No: 817-1984-17. See the X86_64 relocations list * below for the spellings used in the ELF specification. @@ -1962,6 +1968,7 @@ _ELF_DEFINE_RELOC(R_X86_64_IRELATIVE, 37) #define _ELF_DEFINE_RELOCATIONS() \ _ELF_DEFINE_386_RELOCATIONS() \ +_ELF_DEFINE_AARCH64_RELOCATIONS() \ _ELF_DEFINE_AMD64_RELOCATIONS() \ _ELF_DEFINE_ARM_RELOCATIONS() \ _ELF_DEFINE_IA64_RELOCATIONS() \ diff --git a/contrib/elftoolchain/elfcopy/main.c b/contrib/elftoolchain/elfcopy/main.c index 4a693ca..e5bea86 100644 --- a/contrib/elftoolchain/elfcopy/main.c +++ b/contrib/elftoolchain/elfcopy/main.c @@ -40,7 +40,7 @@ #include "elfcopy.h" -ELFTC_VCSID("$Id: main.c 3111 2014-12-20 08:33:01Z kaiwang27 $"); +ELFTC_VCSID("$Id: main.c 3156 2015-02-15 21:40:01Z emaste $"); enum options { diff --git a/contrib/elftoolchain/elfcopy/sections.c b/contrib/elftoolchain/elfcopy/sections.c index 4d23bc7..ee6d172 100644 --- a/contrib/elftoolchain/elfcopy/sections.c +++ b/contrib/elftoolchain/elfcopy/sections.c @@ -35,7 +35,7 @@ #include "elfcopy.h" -ELFTC_VCSID("$Id: sections.c 3134 2014-12-23 10:43:59Z kaiwang27 $"); +ELFTC_VCSID("$Id: sections.c 3150 2015-02-15 19:07:46Z emaste $"); static void add_gnu_debuglink(struct elfcopy *ecp); static uint32_t calc_crc32(const char *p, size_t len, uint32_t crc); diff --git a/contrib/elftoolchain/libdwarf/_libdwarf.h b/contrib/elftoolchain/libdwarf/_libdwarf.h index a7669e2..06413be 100644 --- a/contrib/elftoolchain/libdwarf/_libdwarf.h +++ b/contrib/elftoolchain/libdwarf/_libdwarf.h @@ -24,7 +24,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: _libdwarf.h 3106 2014-12-19 16:00:58Z kaiwang27 $ + * $Id: _libdwarf.h 3161 2015-02-15 21:43:36Z emaste $ */ #ifndef __LIBDWARF_H_ diff --git a/contrib/elftoolchain/libdwarf/dwarf_attrval.c b/contrib/elftoolchain/libdwarf/dwarf_attrval.c index 179deed..0dd38a4 100644 --- a/contrib/elftoolchain/libdwarf/dwarf_attrval.c +++ b/contrib/elftoolchain/libdwarf/dwarf_attrval.c @@ -26,7 +26,7 @@ #include "_libdwarf.h" -ELFTC_VCSID("$Id: dwarf_attrval.c 2977 2014-01-21 20:13:31Z kaiwang27 $"); +ELFTC_VCSID("$Id: dwarf_attrval.c 3159 2015-02-15 21:43:27Z emaste $"); int dwarf_attrval_flag(Dwarf_Die die, Dwarf_Half attr, Dwarf_Bool *valp, Dwarf_Error *err) diff --git a/contrib/elftoolchain/libdwarf/dwarf_get_AT_name.3 b/contrib/elftoolchain/libdwarf/dwarf_get_AT_name.3 index e88e3cf..473adc3 100644 --- a/contrib/elftoolchain/libdwarf/dwarf_get_AT_name.3 +++ b/contrib/elftoolchain/libdwarf/dwarf_get_AT_name.3 @@ -22,7 +22,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.\" $Id: dwarf_get_AT_name.3 2071 2011-10-27 03:20:00Z jkoshy $ +.\" $Id: dwarf_get_AT_name.3 3142 2015-01-29 23:11:14Z jkoshy $ .\" .Dd April 22, 2011 .Os diff --git a/contrib/elftoolchain/libdwarf/dwarf_get_arange_info.3 b/contrib/elftoolchain/libdwarf/dwarf_get_arange_info.3 index 2e67871..3878edd 100644 --- a/contrib/elftoolchain/libdwarf/dwarf_get_arange_info.3 +++ b/contrib/elftoolchain/libdwarf/dwarf_get_arange_info.3 @@ -22,7 +22,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.\" $Id: dwarf_get_arange_info.3 2134 2011-11-10 08:40:14Z jkoshy $ +.\" $Id: dwarf_get_arange_info.3 3142 2015-01-29 23:11:14Z jkoshy $ .\" .Dd April 16, 2011 .Os diff --git a/contrib/elftoolchain/libdwarf/dwarf_get_section_max_offsets.3 b/contrib/elftoolchain/libdwarf/dwarf_get_section_max_offsets.3 index 6f79341..963d4ac 100644 --- a/contrib/elftoolchain/libdwarf/dwarf_get_section_max_offsets.3 +++ b/contrib/elftoolchain/libdwarf/dwarf_get_section_max_offsets.3 @@ -22,9 +22,9 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.\" $Id: dwarf_get_section_max_offsets.3 3098 2014-09-02 22:18:29Z kaiwang27 $ +.\" $Id: dwarf_get_section_max_offsets.3 3141 2015-01-29 23:11:10Z jkoshy $ .\" -.Dd July 27, 2014 +.Dd December 21, 2014 .Os .Dt DWARF_GET_SECTION_MAX_OFFSETS .Sh NAME @@ -101,7 +101,7 @@ is identical to function .Fn dwarf_get_section_max_offsets_b except that it does not provide argument .Ar debug_types , -thus it can not retrieve the size of the +and thus cannot return the size of the .Dq \&.debug_types section. .Sh RETURN VALUES diff --git a/contrib/elftoolchain/libdwarf/dwarf_hasattr.3 b/contrib/elftoolchain/libdwarf/dwarf_hasattr.3 index d3bcb27..5875848 100644 --- a/contrib/elftoolchain/libdwarf/dwarf_hasattr.3 +++ b/contrib/elftoolchain/libdwarf/dwarf_hasattr.3 @@ -22,7 +22,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.\" $Id: dwarf_hasattr.3 2073 2011-10-27 03:30:47Z jkoshy $ +.\" $Id: dwarf_hasattr.3 3142 2015-01-29 23:11:14Z jkoshy $ .\" .Dd April 17, 2010 .Os diff --git a/contrib/elftoolchain/libdwarf/dwarf_reloc.c b/contrib/elftoolchain/libdwarf/dwarf_reloc.c index c912f27..0430e4d 100644 --- a/contrib/elftoolchain/libdwarf/dwarf_reloc.c +++ b/contrib/elftoolchain/libdwarf/dwarf_reloc.c @@ -26,7 +26,7 @@ #include "_libdwarf.h" -ELFTC_VCSID("$Id: dwarf_reloc.c 2075 2011-10-27 03:47:28Z jkoshy $"); +ELFTC_VCSID("$Id: dwarf_reloc.c 3161 2015-02-15 21:43:36Z emaste $"); int dwarf_set_reloc_application(int apply) diff --git a/contrib/elftoolchain/libdwarf/dwarf_set_reloc_application.3 b/contrib/elftoolchain/libdwarf/dwarf_set_reloc_application.3 index e62b262..db40cbb 100644 --- a/contrib/elftoolchain/libdwarf/dwarf_set_reloc_application.3 +++ b/contrib/elftoolchain/libdwarf/dwarf_set_reloc_application.3 @@ -22,7 +22,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.\" $Id: dwarf_set_reloc_application.3 2075 2011-10-27 03:47:28Z jkoshy $ +.\" $Id: dwarf_set_reloc_application.3 3161 2015-02-15 21:43:36Z emaste $ .\" .Dd February 11, 2015 .Os diff --git a/contrib/elftoolchain/libdwarf/dwarf_whatattr.3 b/contrib/elftoolchain/libdwarf/dwarf_whatattr.3 index 96d9ad2..a975d3e 100644 --- a/contrib/elftoolchain/libdwarf/dwarf_whatattr.3 +++ b/contrib/elftoolchain/libdwarf/dwarf_whatattr.3 @@ -22,7 +22,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.\" $Id: dwarf_whatattr.3 2075 2011-10-27 03:47:28Z jkoshy $ +.\" $Id: dwarf_whatattr.3 3142 2015-01-29 23:11:14Z jkoshy $ .\" .Dd May 22, 2010 .Os diff --git a/contrib/elftoolchain/libdwarf/libdwarf.c b/contrib/elftoolchain/libdwarf/libdwarf.c index 961fe2c..b2406cb 100644 --- a/contrib/elftoolchain/libdwarf/libdwarf.c +++ b/contrib/elftoolchain/libdwarf/libdwarf.c @@ -26,7 +26,7 @@ #include "_libdwarf.h" -ELFTC_VCSID("$Id: libdwarf.c 2070 2011-10-27 03:05:32Z jkoshy $"); +ELFTC_VCSID("$Id: libdwarf.c 3161 2015-02-15 21:43:36Z emaste $"); struct _libdwarf_globals _libdwarf = { .errhand = NULL, diff --git a/contrib/elftoolchain/libdwarf/libdwarf.h b/contrib/elftoolchain/libdwarf/libdwarf.h index 20360a7..fdbcb4e 100644 --- a/contrib/elftoolchain/libdwarf/libdwarf.h +++ b/contrib/elftoolchain/libdwarf/libdwarf.h @@ -24,7 +24,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: libdwarf.h 3064 2014-06-06 19:35:55Z kaiwang27 $ + * $Id: libdwarf.h 3149 2015-02-15 19:00:06Z emaste $ */ #ifndef _LIBDWARF_H_ diff --git a/contrib/elftoolchain/libdwarf/libdwarf_elf_init.c b/contrib/elftoolchain/libdwarf/libdwarf_elf_init.c index 731a20d..af2d370 100644 --- a/contrib/elftoolchain/libdwarf/libdwarf_elf_init.c +++ b/contrib/elftoolchain/libdwarf/libdwarf_elf_init.c @@ -26,7 +26,7 @@ #include "_libdwarf.h" -ELFTC_VCSID("$Id: libdwarf_elf_init.c 2972 2013-12-23 06:46:04Z kaiwang27 $"); +ELFTC_VCSID("$Id: libdwarf_elf_init.c 3161 2015-02-15 21:43:36Z emaste $"); static const char *debug_name[] = { ".debug_abbrev", diff --git a/contrib/elftoolchain/libdwarf/libdwarf_reloc.c b/contrib/elftoolchain/libdwarf/libdwarf_reloc.c index e3bba67..96bb785 100644 --- a/contrib/elftoolchain/libdwarf/libdwarf_reloc.c +++ b/contrib/elftoolchain/libdwarf/libdwarf_reloc.c @@ -26,7 +26,7 @@ #include "_libdwarf.h" -ELFTC_VCSID("$Id: libdwarf_reloc.c 2948 2013-05-30 21:25:52Z kaiwang27 $"); +ELFTC_VCSID("$Id: libdwarf_reloc.c 3149 2015-02-15 19:00:06Z emaste $"); Dwarf_Unsigned _dwarf_get_reloc_type(Dwarf_P_Debug dbg, int is64) diff --git a/contrib/elftoolchain/libelf/_libelf_config.h b/contrib/elftoolchain/libelf/_libelf_config.h index 2ad0630..45d8714 100644 --- a/contrib/elftoolchain/libelf/_libelf_config.h +++ b/contrib/elftoolchain/libelf/_libelf_config.h @@ -23,7 +23,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: _libelf_config.h 2287 2011-12-04 06:45:47Z jkoshy $ + * $Id: _libelf_config.h 3143 2015-02-15 17:57:38Z emaste $ */ #ifdef __DragonFly__ diff --git a/contrib/elftoolchain/libelf/elf.3 b/contrib/elftoolchain/libelf/elf.3 index e057a92..618b4f7 100644 --- a/contrib/elftoolchain/libelf/elf.3 +++ b/contrib/elftoolchain/libelf/elf.3 @@ -21,7 +21,7 @@ .\" out of the use of this software, even if advised of the possibility of .\" such damage. .\" -.\" $Id: elf.3 3082 2014-07-28 09:13:33Z jkoshy $ +.\" $Id: elf.3 3142 2015-01-29 23:11:14Z jkoshy $ .\" .Dd July 28, 2014 .Os diff --git a/contrib/elftoolchain/libelf/elf_scn.c b/contrib/elftoolchain/libelf/elf_scn.c index 9a9c816..0d1ac5c 100644 --- a/contrib/elftoolchain/libelf/elf_scn.c +++ b/contrib/elftoolchain/libelf/elf_scn.c @@ -37,7 +37,7 @@ #include "_libelf.h" -ELFTC_VCSID("$Id: elf_scn.c 3013 2014-03-23 06:16:59Z jkoshy $"); +ELFTC_VCSID("$Id: elf_scn.c 3147 2015-02-15 18:45:23Z emaste $"); /* * Load an ELF section table and create a list of Elf_Scn structures. diff --git a/contrib/elftoolchain/libelf/libelf_ar_util.c b/contrib/elftoolchain/libelf/libelf_ar_util.c index 62630ac..7b824fb 100644 --- a/contrib/elftoolchain/libelf/libelf_ar_util.c +++ b/contrib/elftoolchain/libelf/libelf_ar_util.c @@ -34,7 +34,7 @@ #include "_libelf.h" #include "_libelf_ar.h" -ELFTC_VCSID("$Id: libelf_ar_util.c 3013 2014-03-23 06:16:59Z jkoshy $"); +ELFTC_VCSID("$Id: libelf_ar_util.c 3157 2015-02-15 21:42:02Z emaste $"); /* * Convert a string bounded by `start' and `start+sz' (exclusive) to a @@ -278,7 +278,6 @@ _libelf_ar_open(Elf *e, int reporterror) * Handle special archive members for the SVR4 format. */ if (arh.ar_name[0] == '/') { - if (sz == 0) goto error; diff --git a/contrib/elftoolchain/libelf/libelf_convert.m4 b/contrib/elftoolchain/libelf/libelf_convert.m4 index a11ace4..f400367 100644 --- a/contrib/elftoolchain/libelf/libelf_convert.m4 +++ b/contrib/elftoolchain/libelf/libelf_convert.m4 @@ -32,7 +32,7 @@ #include "_libelf.h" -ELFTC_VCSID("$Id: libelf_convert.m4 3009 2014-03-23 01:49:59Z jkoshy $"); +ELFTC_VCSID("$Id: libelf_convert.m4 3158 2015-02-15 21:42:07Z emaste $"); /* WARNING: GENERATED FROM __file__. */ diff --git a/contrib/elftoolchain/nm/nm.1 b/contrib/elftoolchain/nm/nm.1 index d9a0325..35439e4 100644 --- a/contrib/elftoolchain/nm/nm.1 +++ b/contrib/elftoolchain/nm/nm.1 @@ -22,9 +22,9 @@ .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .\" -.\" $Id: nm.1 2377 2012-01-03 07:10:59Z jkoshy $ +.\" $Id: nm.1 3145 2015-02-15 18:04:37Z emaste $ .\" -.Dd January 8, 2015 +.Dd February 15, 2015 .Os .Dt NM 1 .Sh NAME diff --git a/contrib/elftoolchain/nm/nm.c b/contrib/elftoolchain/nm/nm.c index 8d9cbbd..f984966 100644 --- a/contrib/elftoolchain/nm/nm.c +++ b/contrib/elftoolchain/nm/nm.c @@ -48,7 +48,7 @@ #include "_elftc.h" -ELFTC_VCSID("$Id: nm.c 3124 2014-12-21 05:46:28Z kaiwang27 $"); +ELFTC_VCSID("$Id: nm.c 3145 2015-02-15 18:04:37Z emaste $"); /* symbol information list */ STAILQ_HEAD(sym_head, sym_entry); diff --git a/contrib/elftoolchain/readelf/readelf.c b/contrib/elftoolchain/readelf/readelf.c index a8c15c4..93e53a1 100644 --- a/contrib/elftoolchain/readelf/readelf.c +++ b/contrib/elftoolchain/readelf/readelf.c @@ -47,7 +47,7 @@ #include "_elftc.h" -ELFTC_VCSID("$Id: readelf.c 3110 2014-12-20 08:32:46Z kaiwang27 $"); +ELFTC_VCSID("$Id: readelf.c 3155 2015-02-15 19:15:57Z emaste $"); /* * readelf(1) options. @@ -1503,7 +1503,8 @@ r_type(unsigned int mach, unsigned int type) static const char * note_type(const char *name, unsigned int et, unsigned int nt) { - if (strcmp(name, "CORE") == 0 && et == ET_CORE) + if ((strcmp(name, "CORE") == 0 || strcmp(name, "LINUX") == 0) && + et == ET_CORE) return note_type_linux_core(nt); else if (strcmp(name, "FreeBSD") == 0) if (et == ET_CORE) @@ -1559,13 +1560,27 @@ note_type_linux_core(unsigned int nt) case 1: return "NT_PRSTATUS (Process status)"; case 2: return "NT_FPREGSET (Floating point information)"; case 3: return "NT_PRPSINFO (Process information)"; + case 4: return "NT_TASKSTRUCT (Task structure)"; case 6: return "NT_AUXV (Auxiliary vector)"; - case 0x46E62B7FUL: return "NT_PRXFPREG (Linux user_xfpregs structure)"; case 10: return "NT_PSTATUS (Linux process status)"; case 12: return "NT_FPREGS (Linux floating point regset)"; case 13: return "NT_PSINFO (Linux process information)"; case 16: return "NT_LWPSTATUS (Linux lwpstatus_t type)"; case 17: return "NT_LWPSINFO (Linux lwpinfo_t type)"; + case 18: return "NT_WIN32PSTATUS (win32_pstatus structure)"; + case 0x100: return "NT_PPC_VMX (ppc Altivec registers)"; + case 0x102: return "NT_PPC_VSX (ppc VSX registers)"; + case 0x202: return "NT_X86_XSTATE (x86 XSAVE extended state)"; + case 0x300: return "NT_S390_HIGH_GPRS (s390 upper register halves)"; + case 0x301: return "NT_S390_TIMER (s390 timer register)"; + case 0x302: return "NT_S390_TODCMP (s390 TOD comparator register)"; + case 0x303: return "NT_S390_TODPREG (s390 TOD programmable register)"; + case 0x304: return "NT_S390_CTRS (s390 control registers)"; + case 0x305: return "NT_S390_PREFIX (s390 prefix register)"; + case 0x400: return "NT_ARM_VFP (arm VFP registers)"; + case 0x46494c45UL: return "NT_FILE (mapped files)"; + case 0x46E62B7FUL: return "NT_PRXFPREG (Linux user_xfpregs structure)"; + case 0x53494749UL: return "NT_SIGINFO (siginfo_t data)"; default: return (note_type_unknown(nt)); } } @@ -1605,7 +1620,8 @@ note_type_unknown(unsigned int nt) { static char s_nt[32]; - snprintf(s_nt, sizeof(s_nt), "<unknown: %u>", nt); + snprintf(s_nt, sizeof(s_nt), + nt >= 0x100 ? "<unknown: 0x%x>" : "<unknown: %u>", nt); return (s_nt); } @@ -3154,6 +3170,10 @@ dump_rel(struct readelf *re, struct section *s, Elf_Data *d) warnx("gelf_getrel failed: %s", elf_errmsg(-1)); continue; } + if (s->link >= re->shnum) { + warnx("invalid section link index %u", s->link); + continue; + } symname = get_symbol_name(re, s->link, GELF_R_SYM(r.r_info)); symval = get_symbol_value(re, s->link, GELF_R_SYM(r.r_info)); if (re->ec == ELFCLASS32) { @@ -3206,6 +3226,10 @@ dump_rela(struct readelf *re, struct section *s, Elf_Data *d) warnx("gelf_getrel failed: %s", elf_errmsg(-1)); continue; } + if (s->link >= re->shnum) { + warnx("invalid section link index %u", s->link); + continue; + } symname = get_symbol_name(re, s->link, GELF_R_SYM(r.r_info)); symval = get_symbol_value(re, s->link, GELF_R_SYM(r.r_info)); if (re->ec == ELFCLASS32) { @@ -4219,14 +4243,22 @@ dump_attributes(struct readelf *re) len = d->d_size - 1; p++; while (len > 0) { + if (len < 4) { + warnx("truncated attribute section length"); + break; + } seclen = re->dw_decode(&p, 4); if (seclen > len) { warnx("invalid attribute section length"); break; } len -= seclen; - printf("Attribute Section: %s\n", (char *) p); nlen = strlen((char *) p) + 1; + if (nlen + 4 > seclen) { + warnx("invalid attribute section name"); + break; + } + printf("Attribute Section: %s\n", (char *) p); p += nlen; seclen -= nlen + 4; while (seclen > 0) { @@ -6696,10 +6728,8 @@ load_sections(struct readelf *re) return; } - if ((scn = elf_getscn(re->elf, 0)) == NULL) { - warnx("elf_getscn failed: %s", elf_errmsg(-1)); + if ((scn = elf_getscn(re->elf, 0)) == NULL) return; - } (void) elf_errno(); do { diff --git a/contrib/llvm/tools/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/contrib/llvm/tools/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp index e7bf20e..772adb6 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -1395,8 +1395,11 @@ ObjectFileELF::GetSectionHeaderInfo(SectionHeaderColl §ion_headers, } // If there are no section headers we are done. - if (header.e_shnum == 0) + if (header.e_shnum == 0) { + if (arch_spec.GetTriple().getOS() == llvm::Triple::OSType::UnknownOS) + arch_spec.GetTriple().setOSName(HostInfo::GetOSString().data()); return 0; + } Log *log(lldb_private::GetLogIfAllCategoriesSet (LIBLLDB_LOG_MODULES)); diff --git a/lib/libc/gen/dlopen.3 b/lib/libc/gen/dlopen.3 index 089e631..1963528 100644 --- a/lib/libc/gen/dlopen.3 +++ b/lib/libc/gen/dlopen.3 @@ -32,7 +32,7 @@ .\" @(#) dlopen.3 1.6 90/01/31 SMI .\" $FreeBSD$ .\" -.Dd December 21, 2011 +.Dd February 14, 2015 .Dt DLOPEN 3 .Os .Sh NAME @@ -236,7 +236,7 @@ as follows, in the given order: The referencing object itself (or the object from which the call to .Fn dlsym is made), if that object was linked using the -.Fl Wsymbolic +.Fl Bsymbolic option to .Xr ld 1 . .It diff --git a/lib/libc/gen/fstab.c b/lib/libc/gen/fstab.c index 6a77abd..c21ceb3 100644 --- a/lib/libc/gen/fstab.c +++ b/lib/libc/gen/fstab.c @@ -181,7 +181,7 @@ fstabscan(void) if (cp != NULL) _fs_fstab.fs_passno = atoi(cp); } - strcpy(subline, _fs_fstab.fs_mntops); + (void)strlcpy(subline, _fs_fstab.fs_mntops, sizeof(subline)); p = subline; for (typexx = 0, cp = strsep(&p, ","); cp; cp = strsep(&p, ",")) { diff --git a/lib/libc/gen/getgrent.c b/lib/libc/gen/getgrent.c index caa5ad5..4ba24ae 100644 --- a/lib/libc/gen/getgrent.c +++ b/lib/libc/gen/getgrent.c @@ -1450,7 +1450,7 @@ docompat: pos = ftello(st->fp); } fin: - if (!stayopen && st->fp != NULL) { + if (st->fp != NULL && !stayopen) { fclose(st->fp); st->fp = NULL; } diff --git a/lib/libc/gen/getpwent.c b/lib/libc/gen/getpwent.c index 7cf7f47..6cd7eaf 100644 --- a/lib/libc/gen/getpwent.c +++ b/lib/libc/gen/getpwent.c @@ -815,7 +815,7 @@ files_passwd(void *retval, void *mdata, va_list ap) size_t bufsize, namesize; uid_t uid; uint32_t store; - int rv, stayopen, *errnop; + int rv, stayopen = 0, *errnop; name = NULL; uid = (uid_t)-1; diff --git a/lib/libc/gen/ulimit.c b/lib/libc/gen/ulimit.c index e1bc020..2c090c0 100644 --- a/lib/libc/gen/ulimit.c +++ b/lib/libc/gen/ulimit.c @@ -40,7 +40,7 @@ ulimit(int cmd, ...) { struct rlimit limit; va_list ap; - long arg; + rlim_t arg; if (cmd == UL_GETFSIZE) { if (getrlimit(RLIMIT_FSIZE, &limit) == -1) @@ -53,14 +53,16 @@ ulimit(int cmd, ...) va_start(ap, cmd); arg = va_arg(ap, long); va_end(ap); - limit.rlim_max = limit.rlim_cur = (rlim_t)arg * 512; + if (arg < 0) + arg = LONG_MAX; + if (arg > RLIM_INFINITY / 512) + arg = RLIM_INFINITY / 512; + limit.rlim_max = limit.rlim_cur = arg * 512; /* The setrlimit() function sets errno to EPERM if needed. */ if (setrlimit(RLIMIT_FSIZE, &limit) == -1) return (-1); - if (arg * 512 > LONG_MAX) - return (LONG_MAX); - return (arg); + return ((long)arg); } else { errno = EINVAL; return (-1); diff --git a/lib/libc/include/libc_private.h b/lib/libc/include/libc_private.h index 71fc8df..e4bf4a6 100644 --- a/lib/libc/include/libc_private.h +++ b/lib/libc/include/libc_private.h @@ -271,7 +271,7 @@ void _malloc_first_thread(void); /* * Function to clean up streams, called from abort() and exit(). */ -void (*__cleanup)(void) __hidden; +extern void (*__cleanup)(void) __hidden; /* * Get kern.osreldate to detect ABI revisions. Explicitly diff --git a/lib/libc/sparc64/sys/Makefile.inc b/lib/libc/sparc64/sys/Makefile.inc index 031af19..726c0c9 100644 --- a/lib/libc/sparc64/sys/Makefile.inc +++ b/lib/libc/sparc64/sys/Makefile.inc @@ -12,7 +12,7 @@ SRCS+= __sparc_sigtramp_setup.c \ CFLAGS+= -I${LIBC_SRCTOP}/sparc64/fpu -MDASM+= brk.S cerror.S exect.S pipe.S ptrace.S sbrk.S setlogin.S sigaction.S +MDASM+= brk.S cerror.S exect.S pipe.S ptrace.S sbrk.S setlogin.S sigaction1.S # Don't generate default code for these syscalls: NOASM= break.o exit.o getlogin.o openbsd_poll.o sstk.o yield.o diff --git a/lib/libc/sparc64/sys/sigaction.S b/lib/libc/sparc64/sys/sigaction1.S index 7d32f97..219a238 100644 --- a/lib/libc/sparc64/sys/sigaction.S +++ b/lib/libc/sparc64/sys/sigaction1.S @@ -29,7 +29,8 @@ __FBSDID("$FreeBSD$"); #include "SYS.h" -_SYSENTRY(sigaction) + WEAK_REFERENCE(__sys_sigaction, _sigaction) +ENTRY(__sys_sigaction) PIC_PROLOGUE(%o3, %o4) SET(sigcode_installed, %o4, %o3) lduw [%o3], %o4 @@ -44,6 +45,6 @@ _SYSENTRY(sigaction) 1: _SYSCALL(sigaction) retl nop -_SYSEND(sigaction) +END(__sys_sigaction) .comm sigcode_installed, 4, 4 diff --git a/lib/libc/sys/Makefile.inc b/lib/libc/sys/Makefile.inc index e8ec58e..0edf644 100644 --- a/lib/libc/sys/Makefile.inc +++ b/lib/libc/sys/Makefile.inc @@ -65,7 +65,6 @@ INTERPOSED = \ sendmsg \ sendto \ setcontext \ - sigaction \ sigprocmask \ sigsuspend \ sigtimedwait \ @@ -76,6 +75,13 @@ INTERPOSED = \ write \ writev +.if ${MACHINE_CPUARCH} == "sparc64" +SRCS+= sigaction.c +NOASM+= sigaction.o +.else +INTERPOSED+= sigaction +.endif + SRCS+= ${INTERPOSED:S/$/.c/} NOASM+= ${INTERPOSED:S/$/.o/} PSEUDO+= ${INTERPOSED:C/^.*$/_&.o/} diff --git a/lib/libcompat/4.3/rexec.c b/lib/libcompat/4.3/rexec.c index 4e01eb6..92357aa 100644 --- a/lib/libcompat/4.3/rexec.c +++ b/lib/libcompat/4.3/rexec.c @@ -332,10 +332,10 @@ retry: perror(hp->h_name); return (-1); } - if (fd2p == 0) { - (void) write(s, "", 1); - port = 0; - } else { + port = 0; + if (fd2p == 0) + (void) write(s, "", 1); + else { char num[8]; int s2, sin2len; diff --git a/lib/libelftc/elftc_version.c b/lib/libelftc/elftc_version.c index a6bf571..5df0587 100644 --- a/lib/libelftc/elftc_version.c +++ b/lib/libelftc/elftc_version.c @@ -6,5 +6,5 @@ const char * elftc_version(void) { - return "elftoolchain r3136M"; + return "elftoolchain r3163M"; } diff --git a/lib/liblzma/config.h b/lib/liblzma/config.h index 2a0087b..29b7fdb 100644 --- a/lib/liblzma/config.h +++ b/lib/liblzma/config.h @@ -26,6 +26,7 @@ #define HAVE_ENCODER_SPARC 1 #define HAVE_ENCODER_X86 1 #define HAVE_FCNTL_H 1 +#define HAVE_FUTIMENS 1 #define HAVE_FUTIMES 1 #define HAVE_GETOPT_H 1 #define HAVE_GETOPT_LONG 1 diff --git a/share/man/man4/Makefile b/share/man/man4/Makefile index e6f21ba..8d83f62 100644 --- a/share/man/man4/Makefile +++ b/share/man/man4/Makefile @@ -843,6 +843,7 @@ MAN+= \ udbp.4 \ udp.4 \ udplite.4 \ + udl.4 \ uep.4 \ ufm.4 \ ufoma.4 \ diff --git a/share/man/man4/udl.4 b/share/man/man4/udl.4 new file mode 100644 index 0000000..2699cc7 --- /dev/null +++ b/share/man/man4/udl.4 @@ -0,0 +1,67 @@ +.\" $OpenBSD: udl.4,v 1.20 2012/09/18 17:11:41 jasper Exp $ +.\" $FreeBSD$ +.\" +.\" Copyright (c) 2009 Marcus Glocker <mglocker@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd February 15, 2015 +.Dt UDL 4 +.Os +.Sh NAME +.Nm udl +.Nd DisplayLink DL-120 / DL-160 USB display devices +.Sh SYNOPSIS +To compile this driver into the kernel, +place the following line in your +kernel configuration file: +.Bd -ragged -offset indent +.Cd "device udl" +.Ed +.Pp +Alternatively, to load the driver as a +module at boot time, place the following line in +.Xr loader.conf 5 : +.Bd -literal -offset indent +udl_load="YES" +.Ed +.Sh DESCRIPTION +The +.Nm +driver supports USB display devices based on the DisplayLink DL-120 / DL-160 +graphic chip. +.Sh HARDWARE +The following devices should work: +.Pp +.Bl -tag -width Ds -offset indent -compact +.It Century Corp. Japan Plus One LCD-8000U +.It Century Corp. Japan Plus One LCD-4300U +.It DisplayLink USB to DVI +.It ForwardVideo EasyCAP008 USB to DVI +.It HP USB 2.0 Docking Station (FQ834) +.It HP USB Graphics Adapter (NL571) +.It IOGEAR USB 2.0 External DVI (GUC2020) +.It Koenig CMP-USBVGA10 and CMP-USBVGA11 +.It Lenovo 45K5296 USB to DVI +.It Lenovo ThinkVision LT1421 +.It Lilliput UM-70 +.It Nanovision MiMo UM-710 and UM-740 +.It Rextron VCUD60 USB to DVI +.It Samsung LD220 +.It StarTech CONV-USB2DVI +.It Sunweit USB to DVI +.It Unitek Y-2240 USB to DVI +.It VideoHome NBdock1920 +.El +.Sh SEE ALSO +.Xr usb 4 diff --git a/share/man/man4/wlan.4 b/share/man/man4/wlan.4 index 16d1f2e..1581778 100644 --- a/share/man/man4/wlan.4 +++ b/share/man/man4/wlan.4 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd February 22, 2012 +.Dd February 16, 2015 .Dt WLAN 4 .Os .Sh NAME @@ -182,12 +182,14 @@ may not interoperate. .Xr mwl 4 , .Xr netintro 4 , .Xr ral 4 , +.Xr rsu 4 , .Xr rum 4 , .Xr run 4 , .Xr uath 4 , .Xr upgt 4 , .Xr ural 4 , .Xr urtw 4 , +.Xr urtwn 4 , .Xr wi 4 , .Xr wlan_acl 4 , .Xr wlan_ccmp 4 , diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 13c3d43..b767691 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -1507,6 +1507,7 @@ cpususpend_handler(void) vmm_resume_p(); /* Resume MCA and local APIC */ + lapic_xapic_mode(); mca_resume(); lapic_setup(0); diff --git a/sys/arm/arm/cpuinfo.c b/sys/arm/arm/cpuinfo.c index d20c561..b0b8a88 100644 --- a/sys/arm/arm/cpuinfo.c +++ b/sys/arm/arm/cpuinfo.c @@ -58,9 +58,13 @@ cpuinfo_init(void) /* ARMv4T CPU */ cpuinfo.architecture = 1; cpuinfo.revision = (cpuinfo.midr >> 16) & 0x7F; - } + } else { + /* ARM new id scheme */ + cpuinfo.architecture = (cpuinfo.midr >> 16) & 0x0F; + cpuinfo.revision = (cpuinfo.midr >> 20) & 0x0F; + } } else { - /* must be new id scheme */ + /* non ARM -> must be new id scheme */ cpuinfo.architecture = (cpuinfo.midr >> 16) & 0x0F; cpuinfo.revision = (cpuinfo.midr >> 20) & 0x0F; } diff --git a/sys/arm/arm/db_trace.c b/sys/arm/arm/db_trace.c index cbeee1f..1719ec5 100644 --- a/sys/arm/arm/db_trace.c +++ b/sys/arm/arm/db_trace.c @@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$"); #include <sys/proc.h> #include <sys/kdb.h> #include <sys/stack.h> + #include <machine/armreg.h> #include <machine/asm.h> #include <machine/cpufunc.h> @@ -45,322 +46,15 @@ __FBSDID("$FreeBSD$"); #include <machine/pcb.h> #include <machine/stack.h> #include <machine/vmparam.h> + #include <ddb/ddb.h> #include <ddb/db_access.h> #include <ddb/db_sym.h> #include <ddb/db_output.h> -/* - * Definitions for the instruction interpreter. - * - * The ARM EABI specifies how to perform the frame unwinding in the - * Exception Handling ABI for the ARM Architecture document. To perform - * the unwind we need to know the initial frame pointer, stack pointer, - * link register and program counter. We then find the entry within the - * index table that points to the function the program counter is within. - * This gives us either a list of three instructions to process, a 31-bit - * relative offset to a table of instructions, or a value telling us - * we can't unwind any further. - * - * When we have the instructions to process we need to decode them - * following table 4 in section 9.3. This describes a collection of bit - * patterns to encode that steps to take to update the stack pointer and - * link register to the correct values at the start of the function. - */ - -/* A special case when we are unable to unwind past this function */ -#define EXIDX_CANTUNWIND 1 - -/* The register names */ -#define FP 11 -#define SP 13 -#define LR 14 -#define PC 15 - -/* - * These are set in the linker script. Their addresses will be - * either the start or end of the exception table or index. - */ -extern int extab_start, extab_end, exidx_start, exidx_end; - -/* - * Entry types. - * These are the only entry types that have been seen in the kernel. - */ -#define ENTRY_MASK 0xff000000 -#define ENTRY_ARM_SU16 0x80000000 -#define ENTRY_ARM_LU16 0x81000000 - -/* Instruction masks. */ -#define INSN_VSP_MASK 0xc0 -#define INSN_VSP_SIZE_MASK 0x3f -#define INSN_STD_MASK 0xf0 -#define INSN_STD_DATA_MASK 0x0f -#define INSN_POP_TYPE_MASK 0x08 -#define INSN_POP_COUNT_MASK 0x07 -#define INSN_VSP_LARGE_INC_MASK 0xff - -/* Instruction definitions */ -#define INSN_VSP_INC 0x00 -#define INSN_VSP_DEC 0x40 -#define INSN_POP_MASKED 0x80 -#define INSN_VSP_REG 0x90 -#define INSN_POP_COUNT 0xa0 -#define INSN_FINISH 0xb0 -#define INSN_POP_REGS 0xb1 -#define INSN_VSP_LARGE_INC 0xb2 - -/* An item in the exception index table */ -struct unwind_idx { - uint32_t offset; - uint32_t insn; -}; - -/* The state of the unwind process */ -struct unwind_state { - uint32_t registers[16]; - uint32_t start_pc; - uint32_t *insn; - u_int entries; - u_int byte; - uint16_t update_mask; -}; - -/* Expand a 31-bit signed value to a 32-bit signed value */ -static __inline int32_t -db_expand_prel31(uint32_t prel31) -{ - - return ((int32_t)(prel31 & 0x7fffffffu) << 1) / 2; -} - -/* - * Perform a binary search of the index table to find the function - * with the largest address that doesn't exceed addr. - */ -static struct unwind_idx * -db_find_index(uint32_t addr) -{ - unsigned int min, mid, max; - struct unwind_idx *start; - struct unwind_idx *item; - int32_t prel31_addr; - uint32_t func_addr; - - start = (struct unwind_idx *)&exidx_start; - - min = 0; - max = (&exidx_end - &exidx_start) / 2; - - while (min != max) { - mid = min + (max - min + 1) / 2; - - item = &start[mid]; - - prel31_addr = db_expand_prel31(item->offset); - func_addr = (uint32_t)&item->offset + prel31_addr; - - if (func_addr <= addr) { - min = mid; - } else { - max = mid - 1; - } - } - - return &start[min]; -} - -/* Reads the next byte from the instruction list */ -static uint8_t -db_unwind_exec_read_byte(struct unwind_state *state) -{ - uint8_t insn; - - /* Read the unwind instruction */ - insn = (*state->insn) >> (state->byte * 8); - - /* Update the location of the next instruction */ - if (state->byte == 0) { - state->byte = 3; - state->insn++; - state->entries--; - } else - state->byte--; - - return insn; -} - -/* Executes the next instruction on the list */ -static int -db_unwind_exec_insn(struct unwind_state *state) -{ - unsigned int insn; - uint32_t *vsp = (uint32_t *)state->registers[SP]; - int update_vsp = 0; - - /* This should never happen */ - if (state->entries == 0) - return 1; - - /* Read the next instruction */ - insn = db_unwind_exec_read_byte(state); - - if ((insn & INSN_VSP_MASK) == INSN_VSP_INC) { - state->registers[SP] += ((insn & INSN_VSP_SIZE_MASK) << 2) + 4; - - } else if ((insn & INSN_VSP_MASK) == INSN_VSP_DEC) { - state->registers[SP] -= ((insn & INSN_VSP_SIZE_MASK) << 2) + 4; - - } else if ((insn & INSN_STD_MASK) == INSN_POP_MASKED) { - unsigned int mask, reg; - - /* Load the mask */ - mask = db_unwind_exec_read_byte(state); - mask |= (insn & INSN_STD_DATA_MASK) << 8; - - /* We have a refuse to unwind instruction */ - if (mask == 0) - return 1; - - /* Update SP */ - update_vsp = 1; - - /* Load the registers */ - for (reg = 4; mask && reg < 16; mask >>= 1, reg++) { - if (mask & 1) { - state->registers[reg] = *vsp++; - state->update_mask |= 1 << reg; - - /* If we have updated SP kep its value */ - if (reg == SP) - update_vsp = 0; - } - } - - } else if ((insn & INSN_STD_MASK) == INSN_VSP_REG && - ((insn & INSN_STD_DATA_MASK) != 13) && - ((insn & INSN_STD_DATA_MASK) != 15)) { - /* sp = register */ - state->registers[SP] = - state->registers[insn & INSN_STD_DATA_MASK]; - - } else if ((insn & INSN_STD_MASK) == INSN_POP_COUNT) { - unsigned int count, reg; - - /* Read how many registers to load */ - count = insn & INSN_POP_COUNT_MASK; - - /* Update sp */ - update_vsp = 1; - - /* Pop the registers */ - for (reg = 4; reg <= 4 + count; reg++) { - state->registers[reg] = *vsp++; - state->update_mask |= 1 << reg; - } - - /* Check if we are in the pop r14 version */ - if ((insn & INSN_POP_TYPE_MASK) != 0) { - state->registers[14] = *vsp++; - } - - } else if (insn == INSN_FINISH) { - /* Stop processing */ - state->entries = 0; - - } else if (insn == INSN_POP_REGS) { - unsigned int mask, reg; - - mask = db_unwind_exec_read_byte(state); - if (mask == 0 || (mask & 0xf0) != 0) - return 1; - - /* Update SP */ - update_vsp = 1; - - /* Load the registers */ - for (reg = 0; mask && reg < 4; mask >>= 1, reg++) { - if (mask & 1) { - state->registers[reg] = *vsp++; - state->update_mask |= 1 << reg; - } - } - - } else if ((insn & INSN_VSP_LARGE_INC_MASK) == INSN_VSP_LARGE_INC) { - unsigned int uleb128; - - /* Read the increment value */ - uleb128 = db_unwind_exec_read_byte(state); - - state->registers[SP] += 0x204 + (uleb128 << 2); - - } else { - /* We hit a new instruction that needs to be implemented */ - db_printf("Unhandled instruction %.2x\n", insn); - return 1; - } - - if (update_vsp) { - state->registers[SP] = (uint32_t)vsp; - } - -#if 0 - db_printf("fp = %08x, sp = %08x, lr = %08x, pc = %08x\n", - state->registers[FP], state->registers[SP], state->registers[LR], - state->registers[PC]); -#endif - - return 0; -} - -/* Performs the unwind of a function */ -static int -db_unwind_tab(struct unwind_state *state) -{ - uint32_t entry; - - /* Set PC to a known value */ - state->registers[PC] = 0; - - /* Read the personality */ - entry = *state->insn & ENTRY_MASK; - - if (entry == ENTRY_ARM_SU16) { - state->byte = 2; - state->entries = 1; - } else if (entry == ENTRY_ARM_LU16) { - state->byte = 1; - state->entries = ((*state->insn >> 16) & 0xFF) + 1; - } else { - db_printf("Unknown entry: %x\n", entry); - return 1; - } - - while (state->entries > 0) { - if (db_unwind_exec_insn(state) != 0) - return 1; - } - - /* - * The program counter was not updated, load it from the link register. - */ - if (state->registers[PC] == 0) { - state->registers[PC] = state->registers[LR]; - - /* - * If the program counter changed, flag it in the update mask. - */ - if (state->start_pc != state->registers[PC]) - state->update_mask |= 1 << PC; - } - - return 0; -} - static void db_stack_trace_cmd(struct unwind_state *state) { - struct unwind_idx *index; const char *name; db_expr_t value; db_expr_t offset; @@ -372,28 +66,7 @@ db_stack_trace_cmd(struct unwind_state *state) finished = false; while (!finished) { - /* Reset the mask of updated registers */ - state->update_mask = 0; - - /* The pc value is correct and will be overwritten, save it */ - state->start_pc = state->registers[PC]; - - /* Find the item to run */ - index = db_find_index(state->start_pc); - - if (index->insn != EXIDX_CANTUNWIND) { - if (index->insn & (1U << 31)) { - /* The data is within the instruction */ - state->insn = &index->insn; - } else { - /* A prel31 offset to the unwind table */ - state->insn = (uint32_t *) - ((uintptr_t)&index->insn + - db_expand_prel31(index->insn)); - } - /* Run the unwind function */ - finished = db_unwind_tab(state); - } + finished = unwind_stack_one(state); /* Print the frame details */ sym = db_search_symbol(state->start_pc, DB_STGY_ANY, &offset); @@ -432,6 +105,9 @@ db_stack_trace_cmd(struct unwind_state *state) } db_printf("\n"); + if (finished) + break; + /* * Stop if directed to do so, or if we've unwound back to the * kernel entry point, or if the unwind function didn't change @@ -441,9 +117,7 @@ db_stack_trace_cmd(struct unwind_state *state) * the last frame printed before you see the unwind failure * message (maybe it needs a STOP_UNWINDING). */ - if (index->insn == EXIDX_CANTUNWIND) { - finished = true; - } else if (state->registers[PC] < VM_MIN_KERNEL_ADDRESS) { + if (state->registers[PC] < VM_MIN_KERNEL_ADDRESS) { db_printf("Unable to unwind into user mode\n"); finished = true; } else if (state->update_mask == 0) { diff --git a/sys/arm/arm/unwind.c b/sys/arm/arm/unwind.c new file mode 100644 index 0000000..29f8200 --- /dev/null +++ b/sys/arm/arm/unwind.c @@ -0,0 +1,369 @@ +/* + * Copyright 2013-2014 Andrew Turner. + * Copyright 2013-2014 Ian Lepore. + * Copyright 2013-2014 Rui Paulo. + * Copyright 2013 Eitan Adler. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> + +#include <machine/stack.h> + +/* + * Definitions for the instruction interpreter. + * + * The ARM EABI specifies how to perform the frame unwinding in the + * Exception Handling ABI for the ARM Architecture document. To perform + * the unwind we need to know the initial frame pointer, stack pointer, + * link register and program counter. We then find the entry within the + * index table that points to the function the program counter is within. + * This gives us either a list of three instructions to process, a 31-bit + * relative offset to a table of instructions, or a value telling us + * we can't unwind any further. + * + * When we have the instructions to process we need to decode them + * following table 4 in section 9.3. This describes a collection of bit + * patterns to encode that steps to take to update the stack pointer and + * link register to the correct values at the start of the function. + */ + +/* A special case when we are unable to unwind past this function */ +#define EXIDX_CANTUNWIND 1 + +/* + * These are set in the linker script. Their addresses will be + * either the start or end of the exception table or index. + */ +extern int extab_start, extab_end, exidx_start, exidx_end; + +/* + * Entry types. + * These are the only entry types that have been seen in the kernel. + */ +#define ENTRY_MASK 0xff000000 +#define ENTRY_ARM_SU16 0x80000000 +#define ENTRY_ARM_LU16 0x81000000 + +/* Instruction masks. */ +#define INSN_VSP_MASK 0xc0 +#define INSN_VSP_SIZE_MASK 0x3f +#define INSN_STD_MASK 0xf0 +#define INSN_STD_DATA_MASK 0x0f +#define INSN_POP_TYPE_MASK 0x08 +#define INSN_POP_COUNT_MASK 0x07 +#define INSN_VSP_LARGE_INC_MASK 0xff + +/* Instruction definitions */ +#define INSN_VSP_INC 0x00 +#define INSN_VSP_DEC 0x40 +#define INSN_POP_MASKED 0x80 +#define INSN_VSP_REG 0x90 +#define INSN_POP_COUNT 0xa0 +#define INSN_FINISH 0xb0 +#define INSN_POP_REGS 0xb1 +#define INSN_VSP_LARGE_INC 0xb2 + +/* An item in the exception index table */ +struct unwind_idx { + uint32_t offset; + uint32_t insn; +}; + +/* Expand a 31-bit signed value to a 32-bit signed value */ +static __inline int32_t +expand_prel31(uint32_t prel31) +{ + + return ((int32_t)(prel31 & 0x7fffffffu) << 1) / 2; +} + +/* + * Perform a binary search of the index table to find the function + * with the largest address that doesn't exceed addr. + */ +static struct unwind_idx * +find_index(uint32_t addr) +{ + unsigned int min, mid, max; + struct unwind_idx *start; + struct unwind_idx *item; + int32_t prel31_addr; + uint32_t func_addr; + + start = (struct unwind_idx *)&exidx_start; + + min = 0; + max = (&exidx_end - &exidx_start) / 2; + + while (min != max) { + mid = min + (max - min + 1) / 2; + + item = &start[mid]; + + prel31_addr = expand_prel31(item->offset); + func_addr = (uint32_t)&item->offset + prel31_addr; + + if (func_addr <= addr) { + min = mid; + } else { + max = mid - 1; + } + } + + return &start[min]; +} + +/* Reads the next byte from the instruction list */ +static uint8_t +unwind_exec_read_byte(struct unwind_state *state) +{ + uint8_t insn; + + /* Read the unwind instruction */ + insn = (*state->insn) >> (state->byte * 8); + + /* Update the location of the next instruction */ + if (state->byte == 0) { + state->byte = 3; + state->insn++; + state->entries--; + } else + state->byte--; + + return insn; +} + +/* Executes the next instruction on the list */ +static int +unwind_exec_insn(struct unwind_state *state) +{ + unsigned int insn; + uint32_t *vsp = (uint32_t *)state->registers[SP]; + int update_vsp = 0; + + /* This should never happen */ + if (state->entries == 0) + return 1; + + /* Read the next instruction */ + insn = unwind_exec_read_byte(state); + + if ((insn & INSN_VSP_MASK) == INSN_VSP_INC) { + state->registers[SP] += ((insn & INSN_VSP_SIZE_MASK) << 2) + 4; + + } else if ((insn & INSN_VSP_MASK) == INSN_VSP_DEC) { + state->registers[SP] -= ((insn & INSN_VSP_SIZE_MASK) << 2) + 4; + + } else if ((insn & INSN_STD_MASK) == INSN_POP_MASKED) { + unsigned int mask, reg; + + /* Load the mask */ + mask = unwind_exec_read_byte(state); + mask |= (insn & INSN_STD_DATA_MASK) << 8; + + /* We have a refuse to unwind instruction */ + if (mask == 0) + return 1; + + /* Update SP */ + update_vsp = 1; + + /* Load the registers */ + for (reg = 4; mask && reg < 16; mask >>= 1, reg++) { + if (mask & 1) { + state->registers[reg] = *vsp++; + state->update_mask |= 1 << reg; + + /* If we have updated SP kep its value */ + if (reg == SP) + update_vsp = 0; + } + } + + } else if ((insn & INSN_STD_MASK) == INSN_VSP_REG && + ((insn & INSN_STD_DATA_MASK) != 13) && + ((insn & INSN_STD_DATA_MASK) != 15)) { + /* sp = register */ + state->registers[SP] = + state->registers[insn & INSN_STD_DATA_MASK]; + + } else if ((insn & INSN_STD_MASK) == INSN_POP_COUNT) { + unsigned int count, reg; + + /* Read how many registers to load */ + count = insn & INSN_POP_COUNT_MASK; + + /* Update sp */ + update_vsp = 1; + + /* Pop the registers */ + for (reg = 4; reg <= 4 + count; reg++) { + state->registers[reg] = *vsp++; + state->update_mask |= 1 << reg; + } + + /* Check if we are in the pop r14 version */ + if ((insn & INSN_POP_TYPE_MASK) != 0) { + state->registers[14] = *vsp++; + } + + } else if (insn == INSN_FINISH) { + /* Stop processing */ + state->entries = 0; + + } else if (insn == INSN_POP_REGS) { + unsigned int mask, reg; + + mask = unwind_exec_read_byte(state); + if (mask == 0 || (mask & 0xf0) != 0) + return 1; + + /* Update SP */ + update_vsp = 1; + + /* Load the registers */ + for (reg = 0; mask && reg < 4; mask >>= 1, reg++) { + if (mask & 1) { + state->registers[reg] = *vsp++; + state->update_mask |= 1 << reg; + } + } + + } else if ((insn & INSN_VSP_LARGE_INC_MASK) == INSN_VSP_LARGE_INC) { + unsigned int uleb128; + + /* Read the increment value */ + uleb128 = unwind_exec_read_byte(state); + + state->registers[SP] += 0x204 + (uleb128 << 2); + + } else { + /* We hit a new instruction that needs to be implemented */ +#if 0 + db_printf("Unhandled instruction %.2x\n", insn); +#endif + return 1; + } + + if (update_vsp) { + state->registers[SP] = (uint32_t)vsp; + } + +#if 0 + db_printf("fp = %08x, sp = %08x, lr = %08x, pc = %08x\n", + state->registers[FP], state->registers[SP], state->registers[LR], + state->registers[PC]); +#endif + + return 0; +} + +/* Performs the unwind of a function */ +static int +unwind_tab(struct unwind_state *state) +{ + uint32_t entry; + + /* Set PC to a known value */ + state->registers[PC] = 0; + + /* Read the personality */ + entry = *state->insn & ENTRY_MASK; + + if (entry == ENTRY_ARM_SU16) { + state->byte = 2; + state->entries = 1; + } else if (entry == ENTRY_ARM_LU16) { + state->byte = 1; + state->entries = ((*state->insn >> 16) & 0xFF) + 1; + } else { +#if 0 + db_printf("Unknown entry: %x\n", entry); +#endif + return 1; + } + + while (state->entries > 0) { + if (unwind_exec_insn(state) != 0) + return 1; + } + + /* + * The program counter was not updated, load it from the link register. + */ + if (state->registers[PC] == 0) { + state->registers[PC] = state->registers[LR]; + + /* + * If the program counter changed, flag it in the update mask. + */ + if (state->start_pc != state->registers[PC]) + state->update_mask |= 1 << PC; + } + + return 0; +} + +int +unwind_stack_one(struct unwind_state *state) +{ + struct unwind_idx *index; + int finished; + + /* Reset the mask of updated registers */ + state->update_mask = 0; + + /* The pc value is correct and will be overwritten, save it */ + state->start_pc = state->registers[PC]; + + /* Find the item to run */ + index = find_index(state->start_pc); + + finished = 0; + if (index->insn != EXIDX_CANTUNWIND) { + if (index->insn & (1U << 31)) { + /* The data is within the instruction */ + state->insn = &index->insn; + } else { + /* A prel31 offset to the unwind table */ + state->insn = (uint32_t *) + ((uintptr_t)&index->insn + + expand_prel31(index->insn)); + } + /* Run the unwind function */ + finished = unwind_tab(state); + } + + /* This is the top of the stack, finish */ + if (index->insn == EXIDX_CANTUNWIND) + finished = 1; + + return (finished); +} diff --git a/sys/arm/broadcom/bcm2835/bcm2835_bsc.c b/sys/arm/broadcom/bcm2835/bcm2835_bsc.c index 3e1afcd..debbf82 100644 --- a/sys/arm/broadcom/bcm2835/bcm2835_bsc.c +++ b/sys/arm/broadcom/bcm2835/bcm2835_bsc.c @@ -247,7 +247,7 @@ bcm_bsc_attach(device_t dev) /* Check the unit we are attaching by its base address. */ start = rman_get_start(sc->sc_mem_res); for (i = 0; i < nitems(bcm_bsc_pins); i++) { - if (bcm_bsc_pins[i].start == start) + if (bcm_bsc_pins[i].start == (start & BCM_BSC_BASE_MASK)) break; } if (i == nitems(bcm_bsc_pins)) { diff --git a/sys/arm/broadcom/bcm2835/bcm2835_bscvar.h b/sys/arm/broadcom/bcm2835/bcm2835_bscvar.h index 5068356..6b31dc3 100644 --- a/sys/arm/broadcom/bcm2835/bcm2835_bscvar.h +++ b/sys/arm/broadcom/bcm2835/bcm2835_bscvar.h @@ -35,9 +35,10 @@ struct { uint32_t scl; unsigned long start; } bcm_bsc_pins[] = { - { 0, 1, 0x20205000 }, /* BSC0 GPIO pins and base address. */ - { 2, 3, 0x20804000 } /* BSC1 GPIO pins and base address. */ + { 0, 1, 0x205000 }, /* BSC0 GPIO pins and base address. */ + { 2, 3, 0x804000 } /* BSC1 GPIO pins and base address. */ }; +#define BCM_BSC_BASE_MASK 0x00ffffff struct bcm_bsc_softc { device_t sc_dev; diff --git a/sys/arm/broadcom/bcm2835/bcm2835_mbox_prop.h b/sys/arm/broadcom/bcm2835/bcm2835_mbox_prop.h index a2e212e..954ded3 100644 --- a/sys/arm/broadcom/bcm2835/bcm2835_mbox_prop.h +++ b/sys/arm/broadcom/bcm2835/bcm2835_mbox_prop.h @@ -52,6 +52,53 @@ struct bcm2835_mbox_tag_hdr { uint32_t val_len; }; +#define BCM2835_MBOX_POWER_ID_EMMC 0x00000000 +#define BCM2835_MBOX_POWER_ID_UART0 0x00000001 +#define BCM2835_MBOX_POWER_ID_UART1 0x00000002 +#define BCM2835_MBOX_POWER_ID_USB_HCD 0x00000003 +#define BCM2835_MBOX_POWER_ID_I2C0 0x00000004 +#define BCM2835_MBOX_POWER_ID_I2C1 0x00000005 +#define BCM2835_MBOX_POWER_ID_I2C2 0x00000006 +#define BCM2835_MBOX_POWER_ID_SPI 0x00000007 +#define BCM2835_MBOX_POWER_ID_CCP2TX 0x00000008 + +#define BCM2835_MBOX_POWER_ON (1 << 0) +#define BCM2835_MBOX_POWER_WAIT (1 << 1) + +#define BCM2835_MBOX_TAG_GET_POWER_STATE 0x00020001 +#define BCM2835_MBOX_TAG_SET_POWER_STATE 0x00028001 + +struct msg_get_power_state { + struct bcm2835_mbox_hdr hdr; + struct bcm2835_mbox_tag_hdr tag_hdr; + union { + struct { + uint32_t device_id; + } req; + struct { + uint32_t device_id; + uint32_t state; + } resp; + } body; + uint32_t end_tag; +}; + +struct msg_set_power_state { + struct bcm2835_mbox_hdr hdr; + struct bcm2835_mbox_tag_hdr tag_hdr; + union { + struct { + uint32_t device_id; + uint32_t state; + } req; + struct { + uint32_t device_id; + uint32_t state; + } resp; + } body; + uint32_t end_tag; +}; + #define BCM2835_MBOX_CLOCK_ID_EMMC 0x00000001 #define BCM2835_MBOX_CLOCK_ID_UART 0x00000002 #define BCM2835_MBOX_CLOCK_ID_ARM 0x00000003 diff --git a/sys/arm/include/stack.h b/sys/arm/include/stack.h index 0a5ebfe..c76ad66 100644 --- a/sys/arm/include/stack.h +++ b/sys/arm/include/stack.h @@ -39,4 +39,22 @@ #define FR_RSP (-2) #define FR_RFP (-3) +/* The state of the unwind process */ +struct unwind_state { + uint32_t registers[16]; + uint32_t start_pc; + uint32_t *insn; + u_int entries; + u_int byte; + uint16_t update_mask; +}; + +/* The register names */ +#define FP 11 +#define SP 13 +#define LR 14 +#define PC 15 + +int unwind_stack_one(struct unwind_state *); + #endif /* !_MACHINE_STACK_H_ */ diff --git a/sys/boot/fdt/dts/arm/bcm2835.dtsi b/sys/boot/fdt/dts/arm/bcm2835.dtsi index bb30248..6ff1944 100644 --- a/sys/boot/fdt/dts/arm/bcm2835.dtsi +++ b/sys/boot/fdt/dts/arm/bcm2835.dtsi @@ -397,6 +397,8 @@ }; bsc0 { + #address-cells = <1>; + #size-cells = <0>; compatible = "broadcom,bcm2835-bsc", "broadcom,bcm2708-bsc"; reg = <0x205000 0x20>; @@ -405,6 +407,8 @@ }; bsc1 { + #address-cells = <1>; + #size-cells = <0>; compatible = "broadcom,bcm2835-bsc", "broadcom,bcm2708-bsc"; reg = <0x804000 0x20>; diff --git a/sys/conf/files b/sys/conf/files index e10fd5e..c08e40c 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1141,7 +1141,7 @@ dev/cxgb/sys/uipc_mvec.c optional cxgb pci \ dev/cxgb/cxgb_t3fw.c optional cxgb cxgb_t3fw \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgbe/t4_mp_ring.c optional cxgbe pci \ - compile-with "${NORMAL_C} -I$S/dev/cxgbe ${GCC_MS_EXTENSIONS}" + compile-with "${NORMAL_C} -I$S/dev/cxgbe" dev/cxgbe/t4_main.c optional cxgbe pci \ compile-with "${NORMAL_C} -I$S/dev/cxgbe" dev/cxgbe/t4_netmap.c optional cxgbe pci \ @@ -2563,8 +2563,16 @@ dev/usb/template/usb_template_mtp.c optional usb_template dev/usb/template/usb_template_phone.c optional usb_template dev/usb/template/usb_template_serialnet.c optional usb_template # +# USB video drivers +# +dev/usb/video/udl.c optional udl +# # USB END # +dev/videomode/videomode.c optional videomode +dev/videomode/edid.c optional videomode +dev/videomode/pickmode.c optional videomode +dev/videomode/vesagtf.c optional videomode dev/utopia/idtphy.c optional utopia dev/utopia/suni.c optional utopia dev/utopia/utopia.c optional utopia @@ -3616,19 +3624,16 @@ ofed/drivers/infiniband/core/fmr_pool.c optional ofed \ ofed/drivers/infiniband/core/iwcm.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" -ofed/drivers/infiniband/core/local_sa.c optional ofed \ - no-depend \ - compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/mad_rmpp.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/multicast.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" -ofed/drivers/infiniband/core/notice.c optional ofed \ +ofed/drivers/infiniband/core/packer.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" -ofed/drivers/infiniband/core/packer.c optional ofed \ +ofed/drivers/infiniband/core/peer_mem.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/sa_query.c optional ofed \ @@ -3733,6 +3738,9 @@ ofed/drivers/infiniband/hw/mlx4/mad.c optional mlx4ib \ ofed/drivers/infiniband/hw/mlx4/main.c optional mlx4ib \ no-depend obj-prefix "mlx4ib_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/" +ofed/drivers/infiniband/hw/mlx4/mlx4_exp.c optional mlx4ib \ + no-depend obj-prefix "mlx4ib_" \ + compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/" ofed/drivers/infiniband/hw/mlx4/mr.c optional mlx4ib \ no-depend obj-prefix "mlx4ib_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/" diff --git a/sys/conf/files.arm b/sys/conf/files.arm index 8ed4a7e..7fbccd1 100644 --- a/sys/conf/files.arm +++ b/sys/conf/files.arm @@ -57,6 +57,7 @@ arm/arm/trap.c optional !armv6 arm/arm/trap-v6.c optional armv6 arm/arm/uio_machdep.c standard arm/arm/undefined.c standard +arm/arm/unwind.c optional ddb arm/arm/vm_machdep.c standard arm/arm/vfp.c standard board_id.h standard \ diff --git a/sys/conf/kern.mk b/sys/conf/kern.mk index d97f4d2..65babc5 100644 --- a/sys/conf/kern.mk +++ b/sys/conf/kern.mk @@ -39,7 +39,6 @@ CLANG_NO_IAS34= -no-integrated-as .endif .if ${COMPILER_TYPE} == "gcc" -GCC_MS_EXTENSIONS= -fms-extensions .if ${COMPILER_VERSION} >= 40300 # Catch-all for all the things that are in our tree, but for which we're # not yet ready for this compiler. Note: we likely only really "support" diff --git a/sys/conf/kern.pre.mk b/sys/conf/kern.pre.mk index 232d88b..ede4ae8 100644 --- a/sys/conf/kern.pre.mk +++ b/sys/conf/kern.pre.mk @@ -97,7 +97,7 @@ CFLAGS_PARAM_LARGE_FUNCTION_GROWTH?=1000 .if ${MACHINE_CPUARCH} == "mips" CFLAGS_ARCH_PARAMS?=--param max-inline-insns-single=1000 .endif -CFLAGS.gcc+= -fno-common -finline-limit=${INLINE_LIMIT} +CFLAGS.gcc+= -fno-common -fms-extensions -finline-limit=${INLINE_LIMIT} CFLAGS.gcc+= --param inline-unit-growth=${CFLAGS_PARAM_INLINE_UNIT_GROWTH} CFLAGS.gcc+= --param large-function-growth=${CFLAGS_PARAM_LARGE_FUNCTION_GROWTH} .if defined(CFLAGS_ARCH_PARAMS) @@ -162,7 +162,7 @@ NORMAL_LINT= ${LINT} ${LINTFLAGS} ${CFLAGS:M-[DIU]*} ${.IMPSRC} # Infiniband C flags. Correct include paths and omit errors that linux # does not honor. OFEDINCLUDES= -I$S/ofed/include/ -OFEDNOERR= -Wno-cast-qual -Wno-pointer-arith ${GCC_MS_EXTENSIONS} +OFEDNOERR= -Wno-cast-qual -Wno-pointer-arith OFEDCFLAGS= ${CFLAGS:N-I*} ${OFEDINCLUDES} ${CFLAGS:M-I*} ${OFEDNOERR} OFED_C_NOIMP= ${CC} -c -o ${.TARGET} ${OFEDCFLAGS} ${WERROR} ${PROF} OFED_C= ${OFED_C_NOIMP} ${.IMPSRC} diff --git a/sys/conf/kmod.mk b/sys/conf/kmod.mk index 8e00aa4..91d4f49 100644 --- a/sys/conf/kmod.mk +++ b/sys/conf/kmod.mk @@ -105,6 +105,7 @@ CFLAGS+= -I. -I${SYSDIR} CFLAGS+= -I${SYSDIR}/contrib/altq CFLAGS.gcc+= -finline-limit=${INLINE_LIMIT} +CFLAGS.gcc+= -fms-extensions CFLAGS.gcc+= --param inline-unit-growth=100 CFLAGS.gcc+= --param large-function-growth=1000 diff --git a/sys/conf/options b/sys/conf/options index bf15767..08a5523 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -889,6 +889,9 @@ TDMA_TXRATE_QUARTER_DEFAULT opt_tdma.h TDMA_TXRATE_11NA_DEFAULT opt_tdma.h TDMA_TXRATE_11NG_DEFAULT opt_tdma.h +# VideoMode +PICKMODE_DEBUG opt_videomode.h + # Network stack virtualization options VIMAGE opt_global.h VNET_DEBUG opt_global.h diff --git a/sys/contrib/dev/ath/ath_hal/ar9300/ar9300.h b/sys/contrib/dev/ath/ath_hal/ar9300/ar9300.h index eaabbe2..f5d1274 100644 --- a/sys/contrib/dev/ath/ath_hal/ar9300/ar9300.h +++ b/sys/contrib/dev/ath/ath_hal/ar9300/ar9300.h @@ -1239,7 +1239,9 @@ extern HAL_BOOL ar9300_set_mac_address(struct ath_hal *ah, const u_int8_t *); extern void ar9300_get_bss_id_mask(struct ath_hal *ah, u_int8_t *mac); extern HAL_BOOL ar9300_set_bss_id_mask(struct ath_hal *, const u_int8_t *); extern HAL_STATUS ar9300_select_ant_config(struct ath_hal *ah, u_int32_t cfg); +#if 0 extern u_int32_t ar9300_ant_ctrl_common_get(struct ath_hal *ah, HAL_BOOL is_2ghz); +#endif extern HAL_BOOL ar9300_ant_swcom_sel(struct ath_hal *ah, u_int8_t ops, u_int32_t *common_tbl1, u_int32_t *common_tbl2); extern HAL_BOOL ar9300_set_regulatory_domain(struct ath_hal *ah, diff --git a/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_beacon.c b/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_beacon.c index a3cca95..f4da88c 100644 --- a/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_beacon.c +++ b/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_beacon.c @@ -57,6 +57,9 @@ ar9300_beacon_init(struct ath_hal *ah, /* Add the fraction adjustment lost due to unit conversions. */ beacon_period_usec += beacon_period_fraction; + HALDEBUG(ah, HAL_DEBUG_BEACON, + "%s: next_beacon=0x%08x, beacon_period=%d, opmode=%d, beacon_period_usec=%d\n", + __func__, next_beacon, beacon_period, opmode, beacon_period_usec); OS_REG_WRITE(ah, AR_BEACON_PERIOD, beacon_period_usec); OS_REG_WRITE(ah, AR_DMA_BEACON_PERIOD, beacon_period_usec); diff --git a/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_freebsd.c b/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_freebsd.c index 7a5919e..7ba7823 100644 --- a/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_freebsd.c +++ b/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_freebsd.c @@ -36,6 +36,9 @@ static HAL_BOOL ar9300ClrMulticastFilterIndex(struct ath_hal *ah, uint32_t ix); static HAL_BOOL ar9300SetMulticastFilterIndex(struct ath_hal *ah, uint32_t ix); +static void ar9300_beacon_set_beacon_timers(struct ath_hal *ah, + const HAL_BEACON_TIMERS *bt); + static void ar9300SetChainMasks(struct ath_hal *ah, uint32_t tx_chainmask, uint32_t rx_chainmask) @@ -193,10 +196,9 @@ ar9300_attach_freebsd_ops(struct ath_hal *ah) /* Beacon functions */ /* ah_setBeaconTimers */ ah->ah_beaconInit = ar9300_freebsd_beacon_init; - /* ah_setBeaconTimers */ + ah->ah_setBeaconTimers = ar9300_beacon_set_beacon_timers; ah->ah_setStationBeaconTimers = ar9300_set_sta_beacon_timers; /* ah_resetStationBeaconTimers */ - /* ah_getNextTBTT */ ah->ah_getNextTBTT = ar9300_get_next_tbtt; /* Interrupt functions */ @@ -669,6 +671,55 @@ ar9300SetMulticastFilterIndex(struct ath_hal *ah, uint32_t ix) return (AH_TRUE); } +#define TU_TO_USEC(_tu) ((_tu) << 10) +#define ONE_EIGHTH_TU_TO_USEC(_tu8) ((_tu8) << 7) + +/* + * Initializes all of the hardware registers used to + * send beacons. Note that for station operation the + * driver calls ar9300_set_sta_beacon_timers instead. + */ +static void +ar9300_beacon_set_beacon_timers(struct ath_hal *ah, + const HAL_BEACON_TIMERS *bt) +{ + uint32_t bperiod; + +#if 0 + HALASSERT(opmode == HAL_M_IBSS || opmode == HAL_M_HOSTAP); + if (opmode == HAL_M_IBSS) { + OS_REG_SET_BIT(ah, AR_TXCFG, AR_TXCFG_ADHOC_BEACON_ATIM_TX_POLICY); + } +#endif + + /* XXX TODO: should migrate the HAL code to always use ONE_EIGHTH_TU */ + OS_REG_WRITE(ah, AR_NEXT_TBTT_TIMER, TU_TO_USEC(bt->bt_nexttbtt)); + OS_REG_WRITE(ah, AR_NEXT_DMA_BEACON_ALERT, ONE_EIGHTH_TU_TO_USEC(bt->bt_nextdba)); + OS_REG_WRITE(ah, AR_NEXT_SWBA, ONE_EIGHTH_TU_TO_USEC(bt->bt_nextswba)); + OS_REG_WRITE(ah, AR_NEXT_NDP_TIMER, TU_TO_USEC(bt->bt_nextatim)); + + bperiod = TU_TO_USEC(bt->bt_intval & HAL_BEACON_PERIOD); + /* XXX TODO! */ +// ahp->ah_beaconInterval = bt->bt_intval & HAL_BEACON_PERIOD; + OS_REG_WRITE(ah, AR_BEACON_PERIOD, bperiod); + OS_REG_WRITE(ah, AR_DMA_BEACON_PERIOD, bperiod); + OS_REG_WRITE(ah, AR_SWBA_PERIOD, bperiod); + OS_REG_WRITE(ah, AR_NDP_PERIOD, bperiod); + + /* + * Reset TSF if required. + */ + if (bt->bt_intval & HAL_BEACON_RESET_TSF) + ar9300_reset_tsf(ah); + + /* enable timers */ + /* NB: flags == 0 handled specially for backwards compatibility */ + OS_REG_SET_BIT(ah, AR_TIMER_MODE, + bt->bt_flags != 0 ? bt->bt_flags : + AR_TBTT_TIMER_EN | AR_DBA_TIMER_EN | AR_SWBA_TIMER_EN); +} + + /* * RF attach stubs */ diff --git a/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_gpio.c b/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_gpio.c index 5660c1f..1dcdafe 100644 --- a/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_gpio.c +++ b/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_gpio.c @@ -162,7 +162,6 @@ ar9300_gpio_cfg_output( HALASSERT(gpio < AH_PRIVATE(ah)->ah_caps.halNumGpioPins); if ((gpio == AR9382_GPIO_PIN_8_RESERVED) || - (gpio == AR9382_GPIO_PIN_11_RESERVED) || (gpio == AR9382_GPIO_9_INPUT_ONLY)) { return AH_FALSE; @@ -348,7 +347,6 @@ ar9300_gpio_cfg_input(struct ath_hal *ah, u_int32_t gpio) HALASSERT(gpio < AH_PRIVATE(ah)->ah_caps.halNumGpioPins); if ((gpio == AR9382_GPIO_PIN_8_RESERVED) || - (gpio == AR9382_GPIO_PIN_11_RESERVED) || (gpio > AR9382_MAX_GPIO_INPUT_PIN_NUM)) { return AH_FALSE; @@ -378,7 +376,6 @@ ar9300_gpio_set(struct ath_hal *ah, u_int32_t gpio, u_int32_t val) { HALASSERT(gpio < AH_PRIVATE(ah)->ah_caps.halNumGpioPins); if ((gpio == AR9382_GPIO_PIN_8_RESERVED) || - (gpio == AR9382_GPIO_PIN_11_RESERVED) || (gpio == AR9382_GPIO_9_INPUT_ONLY)) { return AH_FALSE; @@ -397,8 +394,7 @@ ar9300_gpio_get(struct ath_hal *ah, u_int32_t gpio) { u_int32_t gpio_in; HALASSERT(gpio < AH_PRIVATE(ah)->ah_caps.halNumGpioPins); - if ((gpio == AR9382_GPIO_PIN_8_RESERVED) || - (gpio == AR9382_GPIO_PIN_11_RESERVED)) + if (gpio == AR9382_GPIO_PIN_8_RESERVED) { return 0xffffffff; } @@ -453,7 +449,6 @@ ar9300_gpio_set_intr(struct ath_hal *ah, u_int gpio, u_int32_t ilevel) HALASSERT(gpio < AH_PRIVATE(ah)->ah_caps.halNumGpioPins); if ((gpio == AR9382_GPIO_PIN_8_RESERVED) || - (gpio == AR9382_GPIO_PIN_11_RESERVED) || (gpio > AR9382_MAX_GPIO_INPUT_PIN_NUM)) { return; @@ -549,8 +544,7 @@ ar9300_gpio_get_mask(struct ath_hal *ah) if (AH_PRIVATE(ah)->ah_devid == AR9300_DEVID_AR9380_PCIE) { mask = (1 << AR9382_MAX_GPIO_PIN_NUM) - 1; - mask &= ~(1 << AR9382_GPIO_PIN_8_RESERVED | - 1 << AR9382_GPIO_PIN_11_RESERVED); + mask &= ~(1 << AR9382_GPIO_PIN_8_RESERVED); } return mask; } @@ -562,8 +556,7 @@ ar9300_gpio_set_mask(struct ath_hal *ah, u_int32_t mask, u_int32_t pol_map) if (AH_PRIVATE(ah)->ah_devid == AR9300_DEVID_AR9380_PCIE) { invalid = ~((1 << AR9382_MAX_GPIO_PIN_NUM) - 1); - invalid |= 1 << AR9382_GPIO_PIN_8_RESERVED | - 1 << AR9382_GPIO_PIN_11_RESERVED; + invalid |= 1 << AR9382_GPIO_PIN_8_RESERVED; } if (mask & invalid) { ath_hal_printf(ah, "%s: invalid GPIO mask 0x%x\n", __func__, mask); diff --git a/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_misc.c b/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_misc.c index dbf58f6..21c98a1 100644 --- a/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_misc.c +++ b/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_misc.c @@ -683,6 +683,7 @@ ar9300_get_capability(struct ath_hal *ah, HAL_CAPABILITY_TYPE type, { struct ath_hal_9300 *ahp = AH9300(ah); const HAL_CAPABILITIES *p_cap = &AH_PRIVATE(ah)->ah_caps; + struct ar9300_ani_state *ani; switch (type) { case HAL_CAP_CIPHER: /* cipher handled in hardware */ @@ -911,6 +912,34 @@ ar9300_get_capability(struct ath_hal *ah, HAL_CAPABILITY_TYPE type, return HAL_ENOTSUPP; } #endif + + /* FreeBSD ANI */ + case HAL_CAP_INTMIT: /* interference mitigation */ + switch (capability) { + case HAL_CAP_INTMIT_PRESENT: /* hardware capability */ + return HAL_OK; + case HAL_CAP_INTMIT_ENABLE: + return (ahp->ah_proc_phy_err & HAL_PROCESS_ANI) ? + HAL_OK : HAL_ENXIO; + case HAL_CAP_INTMIT_NOISE_IMMUNITY_LEVEL: + case HAL_CAP_INTMIT_OFDM_WEAK_SIGNAL_LEVEL: +// case HAL_CAP_INTMIT_CCK_WEAK_SIGNAL_THR: + case HAL_CAP_INTMIT_FIRSTEP_LEVEL: + case HAL_CAP_INTMIT_SPUR_IMMUNITY_LEVEL: + ani = ar9300_ani_get_current_state(ah); + if (ani == AH_NULL) + return HAL_ENXIO; + switch (capability) { + /* XXX AR9300 HAL has OFDM/CCK noise immunity level params? */ + case 2: *result = ani->ofdm_noise_immunity_level; break; + case 3: *result = !ani->ofdm_weak_sig_detect_off; break; + // case 4: *result = ani->cck_weak_sig_threshold; break; + case 5: *result = ani->firstep_level; break; + case 6: *result = ani->spur_immunity_level; break; + } + return HAL_OK; + } + return HAL_EINVAL; default: return ath_hal_getcapability(ah, type, capability, result); } @@ -986,6 +1015,27 @@ ar9300_set_capability(struct ath_hal *ah, HAL_CAPABILITY_TYPE type, return AH_TRUE; } return AH_FALSE; + + /* FreeBSD interrupt mitigation / ANI */ + case HAL_CAP_INTMIT: { /* interference mitigation */ + /* This maps the public ANI commands to the internal ANI commands */ + /* Private: HAL_ANI_CMD; Public: HAL_CAP_INTMIT_CMD */ + static const HAL_ANI_CMD cmds[] = { + HAL_ANI_PRESENT, + HAL_ANI_MODE, + HAL_ANI_NOISE_IMMUNITY_LEVEL, + HAL_ANI_OFDM_WEAK_SIGNAL_DETECTION, + HAL_ANI_CCK_WEAK_SIGNAL_THR, + HAL_ANI_FIRSTEP_LEVEL, + HAL_ANI_SPUR_IMMUNITY_LEVEL, + }; +#define N(a) (sizeof(a) / sizeof(a[0])) + return capability < N(cmds) ? + ar9300_ani_control(ah, cmds[capability], setting) : + AH_FALSE; +#undef N + } + case HAL_CAP_RXBUFSIZE: /* set MAC receive buffer size */ ahp->rx_buf_size = setting & AR_DATABUF_MASK; OS_REG_WRITE(ah, AR_DATABUF, ahp->rx_buf_size); diff --git a/sys/contrib/rdma/krping/krping.c b/sys/contrib/rdma/krping/krping.c index c89339e..94b2eb9 100644 --- a/sys/contrib/rdma/krping/krping.c +++ b/sys/contrib/rdma/krping/krping.c @@ -525,7 +525,7 @@ static void krping_setup_wr(struct krping_cb *cb) case MW: cb->bind_attr.wr_id = 0xabbaabba; cb->bind_attr.send_flags = 0; /* unsignaled */ - cb->bind_attr.length = cb->size; + cb->bind_attr.bind_info.length = cb->size; break; default: break; @@ -627,7 +627,7 @@ static int krping_setup_buffers(struct krping_cb *cb) cb->page_list, cb->page_list_len); break; case MW: - cb->mw = ib_alloc_mw(cb->pd); + cb->mw = ib_alloc_mw(cb->pd,IB_MW_TYPE_1); if (IS_ERR(cb->mw)) { DEBUG_LOG(cb, "recv_buf alloc_mw failed\n"); ret = PTR_ERR(cb->mw); @@ -898,15 +898,15 @@ static u32 krping_rdma_rkey(struct krping_cb *cb, u64 buf, int post_inv) * Update the MW with new buf info. */ if (buf == (u64)cb->start_dma_addr) { - cb->bind_attr.mw_access_flags = IB_ACCESS_REMOTE_READ; - cb->bind_attr.mr = cb->start_mr; + cb->bind_attr.bind_info.mw_access_flags = IB_ACCESS_REMOTE_READ; + cb->bind_attr.bind_info.mr = cb->start_mr; } else { - cb->bind_attr.mw_access_flags = IB_ACCESS_REMOTE_WRITE; - cb->bind_attr.mr = cb->rdma_mr; + cb->bind_attr.bind_info.mw_access_flags = IB_ACCESS_REMOTE_WRITE; + cb->bind_attr.bind_info.mr = cb->rdma_mr; } - cb->bind_attr.addr = buf; + cb->bind_attr.bind_info.addr = buf; DEBUG_LOG(cb, "binding mw rkey 0x%x to buf %llx mr rkey 0x%x\n", - cb->mw->rkey, buf, cb->bind_attr.mr->rkey); + cb->mw->rkey, buf, cb->bind_attr.bind_info.mr->rkey); ret = ib_bind_mw(cb->qp, cb->mw, &cb->bind_attr); if (ret) { PRINTF(cb, "bind mw error %d\n", ret); @@ -2304,7 +2304,7 @@ int krping_doit(char *cmd, void *cookie) goto out; } - cb->cm_id = rdma_create_id(krping_cma_event_handler, cb, RDMA_PS_TCP); + cb->cm_id = rdma_create_id(krping_cma_event_handler, cb, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cb->cm_id)) { ret = PTR_ERR(cb->cm_id); PRINTF(cb, "rdma_create_id error %d\n", ret); diff --git a/sys/dev/acpica/acpi_pcib.c b/sys/dev/acpica/acpi_pcib.c index c4bded8..93dada5 100644 --- a/sys/dev/acpica/acpi_pcib.c +++ b/sys/dev/acpica/acpi_pcib.c @@ -95,7 +95,7 @@ prt_attach_devices(ACPI_PCI_ROUTING_TABLE *entry, void *arg) int error; /* We only care about entries that reference a link device. */ - if (entry->Source == NULL || entry->Source[0] == '\0') + if (entry->Source[0] == '\0') return; /* @@ -222,7 +222,7 @@ acpi_pcib_route_interrupt(device_t pcib, device_t dev, int pin, if (bootverbose) { device_printf(pcib, "matched entry for %d.%d.INT%c", pci_get_bus(dev), pci_get_slot(dev), 'A' + pin); - if (prt->Source != NULL && prt->Source[0] != '\0') + if (prt->Source[0] != '\0') printf(" (src %s:%u)", prt->Source, prt->SourceIndex); printf("\n"); } @@ -234,8 +234,7 @@ acpi_pcib_route_interrupt(device_t pcib, device_t dev, int pin, * XXX: If the source index is non-zero, ignore the source device and * assume that this is a hard-wired entry. */ - if (prt->Source == NULL || prt->Source[0] == '\0' || - prt->SourceIndex != 0) { + if (prt->Source[0] == '\0' || prt->SourceIndex != 0) { if (bootverbose) device_printf(pcib, "slot %d INT%c hardwired to IRQ %d\n", pci_get_slot(dev), 'A' + pin, prt->SourceIndex); diff --git a/sys/dev/ath/if_ath_pci.c b/sys/dev/ath/if_ath_pci.c index 057ec2c..5610882 100644 --- a/sys/dev/ath/if_ath_pci.c +++ b/sys/dev/ath/if_ath_pci.c @@ -279,6 +279,13 @@ ath_pci_attach(device_t dev) */ sc->sc_invalid = 1; + ATH_LOCK_INIT(sc); + ATH_PCU_LOCK_INIT(sc); + ATH_RX_LOCK_INIT(sc); + ATH_TX_LOCK_INIT(sc); + ATH_TX_IC_LOCK_INIT(sc); + ATH_TXSTATUS_LOCK_INIT(sc); + /* * Arrange interrupt line. */ @@ -329,7 +336,7 @@ ath_pci_attach(device_t dev) if (fw == NULL) { device_printf(dev, "%s: couldn't find firmware\n", __func__); - goto bad3; + goto bad4; } device_printf(dev, "%s: EEPROM firmware @ %p\n", @@ -339,30 +346,20 @@ ath_pci_attach(device_t dev) if (! sc->sc_eepromdata) { device_printf(dev, "%s: can't malloc eepromdata\n", __func__); - goto bad3; + goto bad4; } memcpy(sc->sc_eepromdata, fw->data, fw->datasize); firmware_put(fw, 0); } #endif /* ATH_EEPROM_FIRMWARE */ - ATH_LOCK_INIT(sc); - ATH_PCU_LOCK_INIT(sc); - ATH_RX_LOCK_INIT(sc); - ATH_TX_LOCK_INIT(sc); - ATH_TX_IC_LOCK_INIT(sc); - ATH_TXSTATUS_LOCK_INIT(sc); - error = ath_attach(pci_get_device(dev), sc); if (error == 0) /* success */ return 0; - ATH_TXSTATUS_LOCK_DESTROY(sc); - ATH_PCU_LOCK_DESTROY(sc); - ATH_RX_LOCK_DESTROY(sc); - ATH_TX_IC_LOCK_DESTROY(sc); - ATH_TX_LOCK_DESTROY(sc); - ATH_LOCK_DESTROY(sc); +#ifdef ATH_EEPROM_FIRMWARE +bad4: +#endif bus_dma_tag_destroy(sc->sc_dmat); bad3: bus_teardown_intr(dev, psc->sc_irq, psc->sc_ih); @@ -370,6 +367,14 @@ bad2: bus_release_resource(dev, SYS_RES_IRQ, 0, psc->sc_irq); bad1: bus_release_resource(dev, SYS_RES_MEMORY, BS_BAR, psc->sc_sr); + + ATH_TXSTATUS_LOCK_DESTROY(sc); + ATH_PCU_LOCK_DESTROY(sc); + ATH_RX_LOCK_DESTROY(sc); + ATH_TX_IC_LOCK_DESTROY(sc); + ATH_TX_LOCK_DESTROY(sc); + ATH_LOCK_DESTROY(sc); + bad: return (error); } diff --git a/sys/dev/atkbdc/atkbd.c b/sys/dev/atkbdc/atkbd.c index 0d2b44b..d93c1c6 100644 --- a/sys/dev/atkbdc/atkbd.c +++ b/sys/dev/atkbdc/atkbd.c @@ -77,6 +77,10 @@ typedef struct atkbd_state { static void atkbd_timeout(void *arg); static void atkbd_shutdown_final(void *v); +static int atkbd_reset(KBDC kbdc, int flags, int c); + +#define HAS_QUIRK(p, q) (((atkbdc_softc_t *)(p))->quirks & q) +#define ALLOW_DISABLE_KBD(kbdc) !HAS_QUIRK(kbdc, KBDC_QUIRK_KEEP_ACTIVATED) int atkbd_probe_unit(device_t dev, int irq, int flags) @@ -1095,6 +1099,39 @@ atkbd_shutdown_final(void *v) #endif } +static int +atkbd_reset(KBDC kbdc, int flags, int c) +{ + /* reset keyboard hardware */ + if (!(flags & KB_CONF_NO_RESET) && !reset_kbd(kbdc)) { + /* + * KEYBOARD ERROR + * Keyboard reset may fail either because the keyboard + * doen't exist, or because the keyboard doesn't pass + * the self-test, or the keyboard controller on the + * motherboard and the keyboard somehow fail to shake hands. + * It is just possible, particularly in the last case, + * that the keyboard controller may be left in a hung state. + * test_controller() and test_kbd_port() appear to bring + * the keyboard controller back (I don't know why and how, + * though.) + */ + empty_both_buffers(kbdc, 10); + test_controller(kbdc); + test_kbd_port(kbdc); + /* + * We could disable the keyboard port and interrupt... but, + * the keyboard may still exist (see above). + */ + set_controller_command_byte(kbdc, + ALLOW_DISABLE_KBD(kbdc) ? 0xff : KBD_KBD_CONTROL_BITS, c); + if (bootverbose) + printf("atkbd: failed to reset the keyboard.\n"); + return (EIO); + } + return (0); +} + /* local functions */ static int @@ -1250,13 +1287,14 @@ probe_keyboard(KBDC kbdc, int flags) kbdc_set_device_mask(kbdc, m | KBD_KBD_CONTROL_BITS); } else { /* try to restore the command byte as before */ - set_controller_command_byte(kbdc, 0xff, c); + set_controller_command_byte(kbdc, + ALLOW_DISABLE_KBD(kbdc) ? 0xff : KBD_KBD_CONTROL_BITS, c); kbdc_set_device_mask(kbdc, m); } #endif kbdc_lock(kbdc, FALSE); - return err; + return (HAS_QUIRK(kbdc, KBDC_QUIRK_IGNORE_PROBE_RESULT) ? 0 : err); } static int @@ -1299,6 +1337,12 @@ init_keyboard(KBDC kbdc, int *type, int flags) return EIO; } + if (HAS_QUIRK(kbdc, KBDC_QUIRK_RESET_AFTER_PROBE) && + atkbd_reset(kbdc, flags, c)) { + kbdc_lock(kbdc, FALSE); + return EIO; + } + /* * Check if we have an XT keyboard before we attempt to reset it. * The procedure assumes that the keyboard and the controller have @@ -1343,31 +1387,9 @@ init_keyboard(KBDC kbdc, int *type, int flags) if (bootverbose) printf("atkbd: keyboard ID 0x%x (%d)\n", id, *type); - /* reset keyboard hardware */ - if (!(flags & KB_CONF_NO_RESET) && !reset_kbd(kbdc)) { - /* - * KEYBOARD ERROR - * Keyboard reset may fail either because the keyboard - * doen't exist, or because the keyboard doesn't pass - * the self-test, or the keyboard controller on the - * motherboard and the keyboard somehow fail to shake hands. - * It is just possible, particularly in the last case, - * that the keyboard controller may be left in a hung state. - * test_controller() and test_kbd_port() appear to bring - * the keyboard controller back (I don't know why and how, - * though.) - */ - empty_both_buffers(kbdc, 10); - test_controller(kbdc); - test_kbd_port(kbdc); - /* - * We could disable the keyboard port and interrupt... but, - * the keyboard may still exist (see above). - */ - set_controller_command_byte(kbdc, 0xff, c); + if (!HAS_QUIRK(kbdc, KBDC_QUIRK_RESET_AFTER_PROBE) && + atkbd_reset(kbdc, flags, c)) { kbdc_lock(kbdc, FALSE); - if (bootverbose) - printf("atkbd: failed to reset the keyboard.\n"); return EIO; } @@ -1387,7 +1409,8 @@ init_keyboard(KBDC kbdc, int *type, int flags) * The XT kbd isn't usable unless the proper scan * code set is selected. */ - set_controller_command_byte(kbdc, 0xff, c); + set_controller_command_byte(kbdc, ALLOW_DISABLE_KBD(kbdc) + ? 0xff : KBD_KBD_CONTROL_BITS, c); kbdc_lock(kbdc, FALSE); printf("atkbd: unable to set the XT keyboard mode.\n"); return EIO; @@ -1402,6 +1425,17 @@ init_keyboard(KBDC kbdc, int *type, int flags) c |= KBD_TRANSLATION; #endif + /* + * Some keyboards require a SETLEDS command to be sent after + * the reset command before they will send keystrokes to us + */ + if (HAS_QUIRK(kbdc, KBDC_QUIRK_SETLEDS_ON_INIT) && + send_kbd_command_and_data(kbdc, KBDC_SET_LEDS, 0) != KBD_ACK) { + printf("atkbd: setleds failed\n"); + } + if (!ALLOW_DISABLE_KBD(kbdc)) + send_kbd_command(kbdc, KBDC_ENABLE_KBD); + /* enable the keyboard port and intr. */ if (!set_controller_command_byte(kbdc, KBD_KBD_CONTROL_BITS | KBD_TRANSLATION | KBD_OVERRIDE_KBD_LOCK, @@ -1412,7 +1446,9 @@ init_keyboard(KBDC kbdc, int *type, int flags) * This is serious; we are left with the disabled * keyboard intr. */ - set_controller_command_byte(kbdc, 0xff, c); + set_controller_command_byte(kbdc, ALLOW_DISABLE_KBD(kbdc) + ? 0xff : (KBD_KBD_CONTROL_BITS | KBD_TRANSLATION | + KBD_OVERRIDE_KBD_LOCK), c); kbdc_lock(kbdc, FALSE); printf("atkbd: unable to enable the keyboard port and intr.\n"); return EIO; diff --git a/sys/dev/atkbdc/atkbdc.c b/sys/dev/atkbdc/atkbdc.c index 9368dbe..69ffa63 100644 --- a/sys/dev/atkbdc/atkbdc.c +++ b/sys/dev/atkbdc/atkbdc.c @@ -114,6 +114,41 @@ static int wait_for_kbd_ack(atkbdc_softc_t *kbdc); static int wait_for_aux_data(atkbdc_softc_t *kbdc); static int wait_for_aux_ack(atkbdc_softc_t *kbdc); +struct atkbdc_quirks { + const char* bios_vendor; + const char* maker; + const char* product; + int quirk; +}; + +static struct atkbdc_quirks quirks[] = { + {"coreboot", "Acer", "Peppy", + KBDC_QUIRK_KEEP_ACTIVATED | KBDC_QUIRK_IGNORE_PROBE_RESULT | + KBDC_QUIRK_RESET_AFTER_PROBE | KBDC_QUIRK_SETLEDS_ON_INIT}, + + {NULL, NULL, NULL, 0} +}; + +#define QUIRK_STR_MATCH(s1, s2) (s1 == NULL || \ + (s2 != NULL && !strcmp(s1, s2))) + +static int +atkbdc_getquirks(void) +{ + int i; + char* bios_vendor = kern_getenv("smbios.bios.vendor"); + char* maker = kern_getenv("smbios.system.maker"); + char* product = kern_getenv("smbios.system.product"); + + for (i=0; quirks[i].quirk != 0; ++i) + if (QUIRK_STR_MATCH(quirks[i].bios_vendor, bios_vendor) && + QUIRK_STR_MATCH(quirks[i].maker, maker) && + QUIRK_STR_MATCH(quirks[i].product, product)) + return (quirks[i].quirk); + + return (0); +} + atkbdc_softc_t *atkbdc_get_softc(int unit) { @@ -295,6 +330,7 @@ atkbdc_setup(atkbdc_softc_t *sc, bus_space_tag_t tag, bus_space_handle_t h0, #else sc->retry = 5000; #endif + sc->quirks = atkbdc_getquirks(); return 0; } @@ -1124,7 +1160,8 @@ void kbdc_set_device_mask(KBDC p, int mask) { kbdcp(p)->command_mask = - mask & (KBD_KBD_CONTROL_BITS | KBD_AUX_CONTROL_BITS); + mask & (((kbdcp(p)->quirks & KBDC_QUIRK_KEEP_ACTIVATED) + ? 0 : KBD_KBD_CONTROL_BITS) | KBD_AUX_CONTROL_BITS); } int diff --git a/sys/dev/atkbdc/atkbdcreg.h b/sys/dev/atkbdc/atkbdcreg.h index 44a9801..db590b9 100644 --- a/sys/dev/atkbdc/atkbdcreg.h +++ b/sys/dev/atkbdc/atkbdcreg.h @@ -202,6 +202,11 @@ typedef struct atkbdc_softc { kqueue kbd; /* keyboard data queue */ kqueue aux; /* auxiliary data queue */ int retry; + int quirks; /* controller doesn't like deactivate */ +#define KBDC_QUIRK_KEEP_ACTIVATED (1 << 0) +#define KBDC_QUIRK_IGNORE_PROBE_RESULT (1 << 1) +#define KBDC_QUIRK_RESET_AFTER_PROBE (1 << 2) +#define KBDC_QUIRK_SETLEDS_ON_INIT (1 << 3) } atkbdc_softc_t; enum kbdc_device_ivar { diff --git a/sys/dev/atkbdc/psm.c b/sys/dev/atkbdc/psm.c index 9a6ae72..94cf880 100644 --- a/sys/dev/atkbdc/psm.c +++ b/sys/dev/atkbdc/psm.c @@ -371,6 +371,10 @@ static devclass_t psm_devclass; /* other flags (flags) */ #define PSM_FLAGS_FINGERDOWN 0x0001 /* VersaPad finger down */ +#define kbdcp(p) ((atkbdc_softc_t *)(p)) +#define ALWAYS_RESTORE_CONTROLLER(kbdc) !(kbdcp(kbdc)->quirks \ + & KBDC_QUIRK_KEEP_ACTIVATED) + /* Tunables */ static int tap_enabled = -1; TUNABLE_INT("hw.psm.tap_enabled", &tap_enabled); @@ -1231,7 +1235,8 @@ psmprobe(device_t dev) * this is CONTROLLER ERROR; I don't know how to recover * from this error... */ - restore_controller(sc->kbdc, command_byte); + if (ALWAYS_RESTORE_CONTROLLER(sc->kbdc)) + restore_controller(sc->kbdc, command_byte); printf("psm%d: unable to set the command byte.\n", unit); endprobe(ENXIO); } @@ -1270,7 +1275,8 @@ psmprobe(device_t dev) recover_from_error(sc->kbdc); if (sc->config & PSM_CONFIG_IGNPORTERROR) break; - restore_controller(sc->kbdc, command_byte); + if (ALWAYS_RESTORE_CONTROLLER(sc->kbdc)) + restore_controller(sc->kbdc, command_byte); if (verbose) printf("psm%d: the aux port is not functioning (%d).\n", unit, i); @@ -1293,7 +1299,8 @@ psmprobe(device_t dev) */ if (!reset_aux_dev(sc->kbdc)) { recover_from_error(sc->kbdc); - restore_controller(sc->kbdc, command_byte); + if (ALWAYS_RESTORE_CONTROLLER(sc->kbdc)) + restore_controller(sc->kbdc, command_byte); if (verbose) printf("psm%d: failed to reset the aux " "device.\n", unit); @@ -1315,7 +1322,8 @@ psmprobe(device_t dev) if (!enable_aux_dev(sc->kbdc) || !disable_aux_dev(sc->kbdc)) { /* MOUSE ERROR */ recover_from_error(sc->kbdc); - restore_controller(sc->kbdc, command_byte); + if (ALWAYS_RESTORE_CONTROLLER(sc->kbdc)) + restore_controller(sc->kbdc, command_byte); if (verbose) printf("psm%d: failed to enable the aux device.\n", unit); @@ -1337,7 +1345,8 @@ psmprobe(device_t dev) /* verify the device is a mouse */ sc->hw.hwid = get_aux_id(sc->kbdc); if (!is_a_mouse(sc->hw.hwid)) { - restore_controller(sc->kbdc, command_byte); + if (ALWAYS_RESTORE_CONTROLLER(sc->kbdc)) + restore_controller(sc->kbdc, command_byte); if (verbose) printf("psm%d: unknown device type (%d).\n", unit, sc->hw.hwid); @@ -1443,7 +1452,8 @@ psmprobe(device_t dev) * this is CONTROLLER ERROR; I don't know the proper way to * recover from this error... */ - restore_controller(sc->kbdc, command_byte); + if (ALWAYS_RESTORE_CONTROLLER(sc->kbdc)) + restore_controller(sc->kbdc, command_byte); printf("psm%d: unable to set the command byte.\n", unit); endprobe(ENXIO); } diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c index 64ac36c..9a3d75f 100644 --- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c +++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c @@ -176,7 +176,7 @@ iwch_destroy_cq(struct ib_cq *ib_cq) } static struct ib_cq * -iwch_create_cq(struct ib_device *ibdev, int entries, int vector, +iwch_create_cq(struct ib_device *ibdev, struct ib_cq_init_attr *attr, struct ib_ucontext *ib_context, struct ib_udata *udata) { @@ -187,6 +187,7 @@ iwch_create_cq(struct ib_device *ibdev, int entries, int vector, struct iwch_ucontext *ucontext = NULL; static int warned; size_t resplen; + int entries = attr->cqe; CTR3(KTR_IW_CXGB, "%s ib_dev %p entries %d", __FUNCTION__, ibdev, entries); rhp = to_iwch_dev(ibdev); @@ -545,16 +546,14 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, int mr_id) { __be64 *pages; - int shift, i, n; + int shift, n, len; + int i, k, entry; int err = 0; - struct ib_umem_chunk *chunk; struct iwch_dev *rhp; struct iwch_pd *php; struct iwch_mr *mhp; struct iwch_reg_user_mr_resp uresp; -#ifdef notyet - int j, k, len; -#endif + struct scatterlist *sg; CTR2(KTR_IW_CXGB, "%s ib_pd %p", __FUNCTION__, pd); @@ -575,9 +574,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, shift = ffs(mhp->umem->page_size) - 1; - n = 0; - list_for_each_entry(chunk, &mhp->umem->chunk_list, list) - n += chunk->nents; + n = mhp->umem->nmap; err = iwch_alloc_pbl(mhp, n); if (err) @@ -591,7 +588,21 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, i = n = 0; -#ifdef notyet + for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) { + len = sg_dma_len(sg) >> shift; + for (k = 0; k < len; ++k) { + pages[i++] = cpu_to_be64(sg_dma_address(sg) + + mhp->umem->page_size * k); + if (i == PAGE_SIZE / sizeof *pages) { + err = iwch_write_pbl(mhp, pages, i, n); + if (err) + goto pbl_done; + n += i; + i = 0; + } + } + } +#if 0 TAILQ_FOREACH(chunk, &mhp->umem->chunk_list, entry) for (j = 0; j < chunk->nmap; ++j) { len = sg_dma_len(&chunk->page_list[j]) >> shift; @@ -612,9 +623,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (i) err = iwch_write_pbl(mhp, pages, i, n); -#ifdef notyet pbl_done: -#endif cxfree(pages); if (err) goto err_pbl; @@ -672,7 +681,7 @@ static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc) return ibmr; } -static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd) +static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) { struct iwch_dev *rhp; struct iwch_pd *php; diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c index 648d96b..3e8e6b3 100644 --- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c +++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c @@ -551,18 +551,18 @@ int iwch_bind_mw(struct ib_qp *qp, if (mw_bind->send_flags & IB_SEND_SIGNALED) t3_wr_flags = T3_COMPLETION_FLAG; - sgl.addr = mw_bind->addr; - sgl.lkey = mw_bind->mr->lkey; - sgl.length = mw_bind->length; + sgl.addr = mw_bind->bind_info.addr; + sgl.lkey = mw_bind->bind_info.mr->lkey; + sgl.length = mw_bind->bind_info.length; wqe->bind.reserved = 0; wqe->bind.type = T3_VA_BASED_TO; /* TBD: check perms */ - wqe->bind.perms = iwch_ib_to_mwbind_access(mw_bind->mw_access_flags); - wqe->bind.mr_stag = htobe32(mw_bind->mr->lkey); + wqe->bind.perms = iwch_ib_to_mwbind_access(mw_bind->bind_info.mw_access_flags); + wqe->bind.mr_stag = htobe32(mw_bind->bind_info.mr->lkey); wqe->bind.mw_stag = htobe32(mw->rkey); - wqe->bind.mw_len = htobe32(mw_bind->length); - wqe->bind.mw_va = htobe64(mw_bind->addr); + wqe->bind.mw_len = htobe32(mw_bind->bind_info.length); + wqe->bind.mw_va = htobe64(mw_bind->bind_info.addr); err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size); if (err) { mtx_unlock(&qhp->lock); diff --git a/sys/dev/cxgbe/iw_cxgbe/cq.c b/sys/dev/cxgbe/iw_cxgbe/cq.c index ec72a6c..8710e03 100644 --- a/sys/dev/cxgbe/iw_cxgbe/cq.c +++ b/sys/dev/cxgbe/iw_cxgbe/cq.c @@ -775,7 +775,7 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq) } struct ib_cq * -c4iw_create_cq(struct ib_device *ibdev, int entries, int vector, +c4iw_create_cq(struct ib_device *ibdev, struct ib_cq_init_attr *attr, struct ib_ucontext *ib_context, struct ib_udata *udata) { struct c4iw_dev *rhp; @@ -785,6 +785,7 @@ c4iw_create_cq(struct ib_device *ibdev, int entries, int vector, int ret; size_t memsize, hwentries; struct c4iw_mm_entry *mm, *mm2; + int entries = attr->cqe; CTR3(KTR_IW_CXGBE, "%s ib_dev %p entries %d", __func__, ibdev, entries); diff --git a/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h b/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h index 245e045..5f2542c 100644 --- a/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h +++ b/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h @@ -864,7 +864,7 @@ struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl( int page_list_len); struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth); int c4iw_dealloc_mw(struct ib_mw *mw); -struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd); +struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata, int mr_id); struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc); @@ -881,8 +881,7 @@ int c4iw_reregister_phys_mem(struct ib_mr *mr, int acc, u64 *iova_start); int c4iw_dereg_mr(struct ib_mr *ib_mr); int c4iw_destroy_cq(struct ib_cq *ib_cq); -struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, - int vector, +struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, struct ib_cq_init_attr *attr, struct ib_ucontext *ib_context, struct ib_udata *udata); int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata); diff --git a/sys/dev/cxgbe/iw_cxgbe/mem.c b/sys/dev/cxgbe/iw_cxgbe/mem.c index 50c5ed0..f7c460a 100644 --- a/sys/dev/cxgbe/iw_cxgbe/mem.c +++ b/sys/dev/cxgbe/iw_cxgbe/mem.c @@ -563,9 +563,9 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { __be64 *pages; int shift, n, len; - int i, j, k; + int i, k, entry; int err = 0; - struct ib_umem_chunk *chunk; + struct scatterlist *sg; struct c4iw_dev *rhp; struct c4iw_pd *php; struct c4iw_mr *mhp; @@ -594,11 +594,8 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } shift = ffs(mhp->umem->page_size) - 1; - - n = 0; - list_for_each_entry(chunk, &mhp->umem->chunk_list, list) - n += chunk->nents; - + + n = mhp->umem->nmap; err = alloc_pbl(mhp, n); if (err) goto err; @@ -610,25 +607,23 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } i = n = 0; - - list_for_each_entry(chunk, &mhp->umem->chunk_list, list) - for (j = 0; j < chunk->nmap; ++j) { - len = sg_dma_len(&chunk->page_list[j]) >> shift; - for (k = 0; k < len; ++k) { - pages[i++] = cpu_to_be64(sg_dma_address( - &chunk->page_list[j]) + + for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) { + len = sg_dma_len(sg) >> shift; + for (k = 0; k < len; ++k) { + pages[i++] = cpu_to_be64(sg_dma_address(sg) + mhp->umem->page_size * k); - if (i == PAGE_SIZE / sizeof *pages) { - err = write_pbl(&mhp->rhp->rdev, - pages, - mhp->attr.pbl_addr + (n << 3), i); - if (err) - goto pbl_done; - n += i; - i = 0; - } + if (i == PAGE_SIZE / sizeof *pages) { + err = write_pbl(&mhp->rhp->rdev, + pages, + mhp->attr.pbl_addr + (n << 3), i); + if (err) + goto pbl_done; + n += i; + i = 0; + } } + } if (i) err = write_pbl(&mhp->rhp->rdev, pages, @@ -662,7 +657,7 @@ err: return ERR_PTR(err); } -struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd) +struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) { struct c4iw_dev *rhp; struct c4iw_pd *php; diff --git a/sys/dev/fb/fbd.c b/sys/dev/fb/fbd.c index 6bd5766..f9b4d8e 100644 --- a/sys/dev/fb/fbd.c +++ b/sys/dev/fb/fbd.c @@ -134,7 +134,8 @@ fb_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, break; case FBIO_BLANK: /* blank display */ - error = 0; /* TODO */ + if (info->setblankmode != NULL) + error = info->setblankmode(info->fb_priv, *(int *)data); break; default: diff --git a/sys/dev/ic/hd64570.h b/sys/dev/ic/hd64570.h deleted file mode 100644 index 3399e0a..0000000 --- a/sys/dev/ic/hd64570.h +++ /dev/null @@ -1,372 +0,0 @@ -/*- - * Copyright (c) 1995 John Hay. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by [your name] - * and [any other names deserving credit ] - * 4. Neither the name of the author nor the names of any co-contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY [your name] AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ -#ifndef _HD64570_H_ -#define _HD64570_H_ - -typedef struct msci_channel - { - union - { - unsigned short us_trb; /* rw */ - struct - { - unsigned char uc_trbl; - unsigned char uc_trbh; - }uc_trb; - }u_trb; - unsigned char st0; /* ro */ - unsigned char st1; /* rw */ - unsigned char st2; /* rw */ - unsigned char st3; /* ro */ - unsigned char fst; /* rw */ - unsigned char unused0; - unsigned char ie0; /* rw */ - unsigned char ie1; /* rw */ - unsigned char ie2; /* rw */ - unsigned char fie; /* rw */ - unsigned char cmd; /* wo */ - unsigned char unused1; - unsigned char md0; /* rw */ - unsigned char md1; /* rw */ - unsigned char md2; /* rw */ - unsigned char ctl; /* rw */ - unsigned char sa0; /* rw */ - unsigned char sa1; /* rw */ - unsigned char idl; /* rw */ - unsigned char tmc; /* rw */ - unsigned char rxs; /* rw */ - unsigned char txs; /* rw */ - unsigned char trc0; /* rw */ - unsigned char trc1; /* rw */ - unsigned char rrc; /* rw */ - unsigned char unused2; - unsigned char cst0; /* rw */ - unsigned char cst1; /* rw */ - unsigned char unused3[2]; - }msci_channel; - -#define trb u_trb.us_trb -#define trbl u_trb.uc_trb.uc_trbl -#define trbh u_trb.uc_trb.uc_trbh - -typedef struct timer_channel - { - unsigned short tcnt; /* rw */ - unsigned short tconr; /* wo */ - unsigned char tcsr; /* rw */ - unsigned char tepr; /* rw */ - unsigned char unused[2]; - }timer_channel; - -typedef struct dmac_channel - { - unsigned short dar; /* rw */ - unsigned char darb; /* rw */ - unsigned char unused0; - unsigned short sar; /* rw On odd numbered dmacs (tx) only */ - unsigned char sarb; /* rw */ -#define cpb sarb - unsigned char unused1; - unsigned short cda; /* rw */ - unsigned short eda; /* rw */ - unsigned short bfl; /* rw On even numbered dmacs (rx) only */ - unsigned short bcr; /* rw */ - unsigned char dsr; /* rw */ - unsigned char dmr; /* rw */ - unsigned char unused2; - unsigned char fct; /* rw */ - unsigned char dir; /* rw */ - unsigned char dcr; /* rw */ - unsigned char unused3[10]; - }dmac_channel; - -/* x is the channel number. rx channels are even numbered and tx, odd. */ -#define DMAC_RXCH(x) ((x*2) + 0) -#define DMAC_TXCH(x) ((x*2) + 1) - -typedef struct sca_regs - { - unsigned char lpr; /* rw */ - unsigned char unused0; /* -- */ - /* Wait system */ - unsigned char pabr0; /* rw */ - unsigned char pabr1; /* rw */ - unsigned char wcrl; /* rw */ - unsigned char wcrm; /* rw */ - unsigned char wcrh; /* rw */ - unsigned char unused1; - /* DMAC */ - unsigned char pcr; /* rw */ - unsigned char dmer; /* rw */ - unsigned char unused2[6]; - /* Interrupt */ - unsigned char isr0; /* ro */ - unsigned char isr1; /* ro */ - unsigned char isr2; /* ro */ - unsigned char unused3; - unsigned char ier0; /* rw */ - unsigned char ier1; /* rw */ - unsigned char ier2; /* rw */ - unsigned char unused4; - unsigned char itcr; /* rw */ - unsigned char unused5; - unsigned char ivr; /* rw */ - unsigned char unused6; - unsigned char imvr; /* rw */ - unsigned char unused7[3]; - /* MSCI Channel 0 */ - msci_channel msci[2]; - timer_channel timer[4]; - dmac_channel dmac[4]; - }sca_regs; - -#define SCA_CMD_TXRESET 0x01 -#define SCA_CMD_TXENABLE 0x02 -#define SCA_CMD_TXDISABLE 0x03 -#define SCA_CMD_TXCRCINIT 0x04 -#define SCA_CMD_TXCRCEXCL 0x05 -#define SCA_CMS_TXEOM 0x06 -#define SCA_CMD_TXABORT 0x07 -#define SCA_CMD_MPON 0x08 -#define SCA_CMD_TXBCLEAR 0x09 - -#define SCA_CMD_RXRESET 0x11 -#define SCA_CMD_RXENABLE 0x12 -#define SCA_CMD_RXDISABLE 0x13 -#define SCA_CMD_RXCRCINIT 0x14 -#define SCA_CMD_RXMSGREJ 0x15 -#define SCA_CMD_MPSEARCH 0x16 -#define SCA_CMD_RXCRCEXCL 0x17 -#define SCA_CMD_RXCRCCALC 0x18 - -#define SCA_CMD_NOP 0x00 -#define SCA_CMD_RESET 0x21 -#define SCA_CMD_SEARCH 0x31 - -#define SCA_MD0_CRC_1 0x01 -#define SCA_MD0_CRC_CCITT 0x02 -#define SCA_MD0_CRC_ENABLE 0x04 -#define SCA_MD0_AUTO_ENABLE 0x10 -#define SCA_MD0_MODE_ASYNC 0x00 -#define SCA_MD0_MODE_BYTESYNC1 0x20 -#define SCA_MD0_MODE_BISYNC 0x40 -#define SCA_MD0_MODE_BYTESYNC2 0x60 -#define SCA_MD0_MODE_HDLC 0x80 - -#define SCA_MD1_NOADDRCHK 0x00 -#define SCA_MD1_SNGLADDR1 0x40 -#define SCA_MD1_SNGLADDR2 0x80 -#define SCA_MD1_DUALADDR 0xC0 - -#define SCA_MD2_DUPLEX 0x00 -#define SCA_MD2_ECHO 0x01 -#define SCA_MD2_LOOPBACK 0x03 -#define SCA_MD2_ADPLLx8 0x00 -#define SCA_MD2_ADPLLx16 0x08 -#define SCA_MD2_ADPLLx32 0x10 -#define SCA_MD2_NRZ 0x00 -#define SCA_MD2_NRZI 0x20 -#define SCA_MD2_MANCHESTER 0x80 -#define SCA_MD2_FM0 0xC0 -#define SCA_MD2_FM1 0xA0 - -#define SCA_CTL_RTS 0x01 -#define SCA_CTL_IDLPAT 0x10 -#define SCA_CTL_UDRNC 0x20 - -#define SCA_RXS_DIV_MASK 0x0F -#define SCA_RXS_DIV1 0x00 -#define SCA_RXS_DIV2 0x01 -#define SCA_RXS_DIV4 0x02 -#define SCA_RXS_DIV8 0x03 -#define SCA_RXS_DIV16 0x04 -#define SCA_RXS_DIV32 0x05 -#define SCA_RXS_DIV64 0x06 -#define SCA_RXS_DIV128 0x07 -#define SCA_RXS_DIV256 0x08 -#define SCA_RXS_DIV512 0x09 -#define SCA_RXS_CLK_RXC0 0x00 -#define SCA_RXS_CLK_RXC1 0x20 -#define SCA_RXS_CLK_INT 0x40 -#define SCA_RXS_CLK_ADPLL_OUT 0x60 -#define SCA_RXS_CLK_ADPLL_IN 0x70 - -#define SCA_TXS_DIV_MASK 0x0F -#define SCA_TXS_DIV1 0x00 -#define SCA_TXS_DIV2 0x01 -#define SCA_TXS_DIV4 0x02 -#define SCA_TXS_DIV8 0x03 -#define SCA_TXS_DIV16 0x04 -#define SCA_TXS_DIV32 0x05 -#define SCA_TXS_DIV64 0x06 -#define SCA_TXS_DIV128 0x07 -#define SCA_TXS_DIV256 0x08 -#define SCA_TXS_DIV512 0x09 -#define SCA_TXS_CLK_TXC 0x00 -#define SCA_TXS_CLK_INT 0x40 -#define SCA_TXS_CLK_RX 0x60 - -#define SCA_ST0_RXRDY 0x01 -#define SCA_ST0_TXRDY 0x02 -#define SCA_ST0_RXINT 0x40 -#define SCA_ST0_TXINT 0x80 - -#define SCA_ST1_IDLST 0x01 -#define SCA_ST1_ABTST 0x02 -#define SCA_ST1_DCDCHG 0x04 -#define SCA_ST1_CTSCHG 0x08 -#define SCA_ST1_FLAG 0x10 -#define SCA_ST1_TXIDL 0x40 -#define SCA_ST1_UDRN 0x80 - -/* ST2 and FST look the same */ -#define SCA_FST_CRCERR 0x04 -#define SCA_FST_OVRN 0x08 -#define SCA_FST_RESFRM 0x10 -#define SCA_FST_ABRT 0x20 -#define SCA_FST_SHRT 0x40 -#define SCA_FST_EOM 0x80 - -#define SCA_ST3_RXENA 0x01 -#define SCA_ST3_TXENA 0x02 -#define SCA_ST3_DCD 0x04 -#define SCA_ST3_CTS 0x08 -#define SCA_ST3_ADPLLSRCH 0x10 -#define SCA_ST3_TXDATA 0x20 - -#define SCA_FIE_EOMFE 0x80 - -#define SCA_IE0_RXRDY 0x01 -#define SCA_IE0_TXRDY 0x02 -#define SCA_IE0_RXINT 0x40 -#define SCA_IE0_TXINT 0x80 - -#define SCA_IE1_IDLDE 0x01 -#define SCA_IE1_ABTDE 0x02 -#define SCA_IE1_DCD 0x04 -#define SCA_IE1_CTS 0x08 -#define SCA_IE1_FLAG 0x10 -#define SCA_IE1_IDL 0x40 -#define SCA_IE1_UDRN 0x80 - -#define SCA_IE2_CRCERR 0x04 -#define SCA_IE2_OVRN 0x08 -#define SCA_IE2_RESFRM 0x10 -#define SCA_IE2_ABRT 0x20 -#define SCA_IE2_SHRT 0x40 -#define SCA_IE2_EOM 0x80 - -/* This is for RRC, TRC0 and TRC1. */ -#define SCA_RCR_MASK 0x1F - -#define SCA_IE1_ - -#define SCA_IV_CHAN0 0x00 -#define SCA_IV_CHAN1 0x20 - -#define SCA_IV_RXRDY 0x04 -#define SCA_IV_TXRDY 0x06 -#define SCA_IV_RXINT 0x08 -#define SCA_IV_TXINT 0x0A - -#define SCA_IV_DMACH0 0x00 -#define SCA_IV_DMACH1 0x08 -#define SCA_IV_DMACH2 0x20 -#define SCA_IV_DMACH3 0x28 - -#define SCA_IV_DMIA 0x14 -#define SCA_IV_DMIB 0x16 - -#define SCA_IV_TIMER0 0x1C -#define SCA_IV_TIMER1 0x1E -#define SCA_IV_TIMER2 0x3C -#define SCA_IV_TIMER3 0x3E - -/* - * DMA registers - */ -#define SCA_DSR_EOT 0x80 -#define SCA_DSR_EOM 0x40 -#define SCA_DSR_BOF 0x20 -#define SCA_DSR_COF 0x10 -#define SCA_DSR_DE 0x02 -#define SCA_DSR_DWE 0x01 - -#define SCA_DMR_TMOD 0x10 -#define SCA_DMR_NF 0x04 -#define SCA_DMR_CNTE 0x02 - -#define SCA_DMER_EN 0x80 - -#define SCA_DCR_ABRT 0x01 -#define SCA_DCR_FCCLR 0x02 /* Clear frame end intr counter */ - -#define SCA_DIR_EOT 0x80 -#define SCA_DIR_EOM 0x40 -#define SCA_DIR_BOF 0x20 -#define SCA_DIR_COF 0x10 - -#define SCA_PCR_BRC 0x10 -#define SCA_PCR_CCC 0x08 -#define SCA_PCR_PR2 0x04 -#define SCA_PCR_PR1 0x02 -#define SCA_PCR_PR0 0x01 - -typedef struct sca_descriptor - { - unsigned short cp; - unsigned short bp; - unsigned char bpb; - unsigned char unused0; - unsigned short len; - unsigned char stat; - unsigned char unused1; - }sca_descriptor; - -#define SCA_DESC_EOT 0x01 -#define SCA_DESC_CRC 0x04 -#define SCA_DESC_OVRN 0x08 -#define SCA_DESC_RESD 0x10 -#define SCA_DESC_ABORT 0x20 -#define SCA_DESC_SHRTFRM 0x40 -#define SCA_DESC_EOM 0x80 -#define SCA_DESC_ERRORS 0x7C - -/* -*************************************************************************** -** END -*************************************************************************** -**/ -#endif /* _HD64570_H_ */ - diff --git a/sys/dev/lmc/if_lmc.h b/sys/dev/lmc/if_lmc.h index fe4e7d0..8291dcd 100644 --- a/sys/dev/lmc/if_lmc.h +++ b/sys/dev/lmc/if_lmc.h @@ -1052,7 +1052,7 @@ struct card */ #define IOREF_CSR 1 /* access Tulip CSRs with IO cycles if 1 */ -#if (defined(__FreeBSD__) && defined(DEVICE_POLLING)) +#if defined(DEVICE_POLLING) # define DEV_POLL 1 #else # define DEV_POLL 0 @@ -1076,15 +1076,10 @@ struct softc struct ifnet *ifp; struct ifmedia ifm; /* hooks for ifconfig(8) */ # if NSPPP -# if (__FreeBSD_version < 600000) - struct sppp spppcom; /* must be first in sc for fbsd < 6 */ -# endif struct sppp *sppp; # elif P2P struct p2pcom p2pcom; struct p2pcom *p2p; -# elif (__FreeBSD_version < 600000) - struct ifnet ifnet; /* must be first in sc for fbsd < 6 */ # endif #endif @@ -1092,13 +1087,8 @@ struct softc #if NETGRAPH node_p ng_node; /* pointer to our node struct */ hook_p ng_hook; /* non-zero means NETGRAPH owns device */ -# if (__FreeBSD_version >= 503000) struct ifaltq ng_sndq; struct ifaltq ng_fastq; -# else - struct ifqueue ng_sndq; - struct ifqueue ng_fastq; -# endif #endif struct callout callout; /* watchdog needs this */ @@ -1115,13 +1105,8 @@ struct softc # ifdef DEVICE_POLLING int quota; /* used for incoming packet flow control */ # endif -# if (__FreeBSD_version >= 500000) struct mtx top_mtx; /* lock card->watchdog vs core_ioctl */ struct mtx bottom_mtx; /* lock for buf queues & descriptor rings */ -# else /* FreeBSD-4 */ - int top_spl; /* lock card->watchdog vs core_ioctl */ - int bottom_spl; /* lock for buf queues & descriptor rings */ -# endif /* Top-half state used by all card types; lock with top_lock, */ @@ -1154,23 +1139,11 @@ struct softc # define WRITE_CSR(csr, val) bus_space_write_4(sc->csr_tag, sc->csr_handle, csr, val) # define NAME_UNIT device_get_nameunit(sc->dev) # define DRIVER_DEBUG ((sc->config.debug) || (sc->ifp->if_flags & IFF_DEBUG)) -# if (__FreeBSD_version >= 500000) -# define TOP_TRYLOCK mtx_trylock(&sc->top_mtx) -# define TOP_UNLOCK mtx_unlock (&sc->top_mtx) -# define BOTTOM_TRYLOCK mtx_trylock(&sc->bottom_mtx) -# define BOTTOM_UNLOCK mtx_unlock (&sc->bottom_mtx) -# if (__FreeBSD_version >= 700000) -# define CHECK_CAP priv_check(curthread, PRIV_DRIVER) -# else -# define CHECK_CAP suser(curthread) -# endif -# else /* FreeBSD-4 */ -# define TOP_TRYLOCK (sc->top_spl = splimp()) -# define TOP_UNLOCK splx(sc->top_spl) -# define BOTTOM_TRYLOCK 1 /* giant_lock protects */ -# define BOTTOM_UNLOCK /* nothing */ -# define CHECK_CAP suser(curproc) -# endif +# define TOP_TRYLOCK mtx_trylock(&sc->top_mtx) +# define TOP_UNLOCK mtx_unlock (&sc->top_mtx) +# define BOTTOM_TRYLOCK mtx_trylock(&sc->bottom_mtx) +# define BOTTOM_UNLOCK mtx_unlock (&sc->bottom_mtx) +# define CHECK_CAP priv_check(curthread, PRIV_DRIVER) # define DISABLE_INTR /* nothing */ # define ENABLE_INTR /* nothing */ # define IRQ_NONE /* nothing */ @@ -1181,28 +1154,13 @@ struct softc # define DMA_SYNC(map, size, flags) bus_dmamap_sync(ring->tag, map, flags) # define DMA_LOAD(map, addr, size) bus_dmamap_load(ring->tag, map, addr, size, fbsd_dmamap_load, ring, 0) # if (NBPFILTER != 0) -# if (__FreeBSD_version >= 500000) -# define LMC_BPF_MTAP(mbuf) BPF_MTAP(sc->ifp, mbuf) -# else /* FreeBSD-4 */ -# define LMC_BPF_MTAP(mbuf) if (sc->ifp->if_bpf) bpf_mtap(sc->ifp, mbuf) -# endif +# define LMC_BPF_MTAP(mbuf) BPF_MTAP(sc->ifp, mbuf) # define LMC_BPF_ATTACH(dlt, len) bpfattach(sc->ifp, dlt, len) # define LMC_BPF_DETACH bpfdetach(sc->ifp) # endif -# if (__FreeBSD_version >= 500000) -# define IF_DROP(ifq) _IF_DROP(ifq) -# define IF_QFULL(ifq) _IF_QFULL(ifq) -# endif -# if (__FreeBSD_version < 500000) -# define INTR_MPSAFE 0 -# define BUS_DMA_COHERENT 0 -# endif -# if (__FreeBSD_version >= 600000) -# define IFF_RUNNING IFF_DRV_RUNNING -# endif - - - +# define IF_DROP(ifq) _IF_DROP(ifq) +# define IF_QFULL(ifq) _IF_QFULL(ifq) +# define IFF_RUNNING IFF_DRV_RUNNING #if (NBPFILTER == 0) @@ -1211,40 +1169,6 @@ struct softc # define LMC_BPF_DETACH /* nothing */ #endif -#if (defined(__bsdi__) || /* unconditionally */ \ - (defined(__FreeBSD__) && (__FreeBSD_version < 503000)) || \ - (defined(__NetBSD__) && (__NetBSD_Version__ < 106000000)) || \ - (defined(__OpenBSD__) && ( OpenBSD < 200111))) -# define IFQ_ENQUEUE(ifq, m, pa, err) \ -do { \ - if (pa==0); /* suppress warning */ \ - if (IF_QFULL(ifq)) \ - { \ - IF_DROP(ifq); \ - m_freem(m); \ - err = ENOBUFS; \ - } \ - else \ - { \ - IF_ENQUEUE(ifq, m); \ - err = 0; \ - } \ - } while (0) -# define IFQ_DEQUEUE(ifq, m) do { IF_DEQUEUE((ifq), m) } while (0) -# define IFQ_IS_EMPTY(ifq) ((ifq)->ifq_head == NULL) -# define IFQ_SET_MAXLEN(ifq, len) (ifq)->ifq_maxlen = len -# define IFQ_SET_READY(ifq) /* nothing */ -# define IFQ_PURGE(ifq) \ -do { \ - while ((ifq)->ifq_head != NULL) \ - { \ - struct mbuf *m; \ - IF_DEQUEUE(ifq, m); \ - m_freem(m); \ - } \ - } while (0) -#endif - #define HSSI_DESC "SBE/LMC HSSI Card" #define T3_DESC "SBE/LMC T3 Card" #define SSI_DESC "SBE/LMC SSI Card" @@ -1315,10 +1239,6 @@ static void t1_send_bop(softc_t *, int); static int t1_ioctl(softc_t *, struct ioctl *); #if IFNET -# if ((defined(__FreeBSD__) && (__FreeBSD_version < 500000)) ||\ - defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__)) -static void netisr_dispatch(int, struct mbuf *); -# endif static void lmc_raw_input(struct ifnet *, struct mbuf *); #endif /* IFNET */ @@ -1372,25 +1292,12 @@ static void lmc_ifnet_detach(softc_t *); #endif /* IFNET */ #if NETGRAPH -# if (__FreeBSD_version >= 500000) static int ng_constructor(node_p); -# else /* FreeBSD-4 */ -static int ng_constructor(node_p *); -# endif -# if (__FreeBSD_version >= 500000) static int ng_rcvmsg(node_p, item_p, hook_p); -# else /* FreeBSD-4 */ -static int ng_rcvmsg(node_p, struct ng_mesg *, - const char *, struct ng_mesg **); -# endif static int ng_shutdown(node_p); static int ng_newhook(node_p, hook_p, const char *); static int ng_connect(hook_p); -# if (__FreeBSD_version >= 500000) static int ng_rcvdata(hook_p, item_p); -# else /* FreeBSD-4 */ -static int ng_rcvdata(hook_p, struct mbuf *, meta_p); -# endif static int ng_disconnect(hook_p); # if (IFNET == 0) static void ng_watchdog(void *); diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c index 959b270..5401df3 100644 --- a/sys/dev/netmap/netmap.c +++ b/sys/dev/netmap/netmap.c @@ -656,9 +656,8 @@ netmap_update_config(struct netmap_adapter *na) u_int txr, txd, rxr, rxd; txr = txd = rxr = rxd = 0; - if (na->nm_config) { - na->nm_config(na, &txr, &txd, &rxr, &rxd); - } else { + if (na->nm_config == NULL || + na->nm_config(na, &txr, &txd, &rxr, &rxd)) { /* take whatever we had at init time */ txr = na->num_tx_rings; txd = na->num_tx_desc; @@ -2168,7 +2167,7 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, error = ENXIO; break; } - rmb(); /* make sure following reads are not from cache */ + mb(); /* make sure following reads are not from cache */ na = priv->np_na; /* we have a reference */ @@ -3071,16 +3070,14 @@ netmap_init(void) error = netmap_mem_init(); if (error != 0) goto fail; - /* XXX could use make_dev_credv() to get error number */ -#ifdef __FreeBSD__ - /* support for the 'eternal' flag */ + /* + * MAKEDEV_ETERNAL_KLD avoids an expensive check on syscalls + * when the module is compiled in. + * XXX could use make_dev_credv() to get error number + */ netmap_dev = make_dev_credf(MAKEDEV_ETERNAL_KLD, &netmap_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0600, "netmap"); -#else - netmap_dev = make_dev(&netmap_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, - "netmap"); -#endif if (!netmap_dev) goto fail; diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c index 263904b..ba5a333 100644 --- a/sys/dev/pci/pci.c +++ b/sys/dev/pci/pci.c @@ -605,8 +605,6 @@ pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size) if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) { devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO); - if (devlist_entry == NULL) - return (NULL); cfg = &devlist_entry->cfg; diff --git a/sys/dev/random/dummy_rng.c b/sys/dev/random/dummy_rng.c index a7ca4b3..e78f5a8 100644 --- a/sys/dev/random/dummy_rng.c +++ b/sys/dev/random/dummy_rng.c @@ -82,19 +82,13 @@ dummy_random_init(void) * * Caveat Emptor. */ -u_int +void dummy_random_read_phony(uint8_t *buf, u_int count) { /* If no entropy device is loaded, don't spam the console with warnings */ - static int warned = 0; u_long randval; size_t size, i; - if (!warned) { - log(LOG_WARNING, "random device not loaded/active; using insecure pseudo-random number generator\n"); - warned = 1; - } - /* srandom() is called in kern/init_main.c:proc0_post() */ /* Fill buf[] with random(9) output */ @@ -103,8 +97,6 @@ dummy_random_read_phony(uint8_t *buf, u_int count) size = MIN(count - i, sizeof(randval)); memcpy(buf + i, &randval, (size_t)size); } - - return (count); } struct random_adaptor randomdev_dummy = { diff --git a/sys/dev/random/random_adaptors.c b/sys/dev/random/random_adaptors.c index 30f3e3d..5a67f50 100644 --- a/sys/dev/random/random_adaptors.c +++ b/sys/dev/random/random_adaptors.c @@ -149,10 +149,14 @@ random_adaptor_choose(void) (random_adaptor_previous == NULL ? "NULL" : random_adaptor_previous->ra_ident), random_adaptor->ra_ident); #endif - if (random_adaptor_previous != NULL) + if (random_adaptor_previous != NULL) { + randomdev_deinit_reader(); (random_adaptor_previous->ra_deinit)(); + } (random_adaptor->ra_init)(); } + + randomdev_init_reader(random_adaptor->ra_read); } diff --git a/sys/dev/random/randomdev.c b/sys/dev/random/randomdev.c index c61bed7..9d41aed 100644 --- a/sys/dev/random/randomdev.c +++ b/sys/dev/random/randomdev.c @@ -214,11 +214,11 @@ random_harvest(const void *entropy, u_int count, u_int bits, enum random_entropy */ /* Hold the address of the routine which is actually called */ -static u_int (*read_func)(uint8_t *, u_int) = dummy_random_read_phony; +static void (*read_func)(uint8_t *, u_int) = dummy_random_read_phony; /* Initialise the reader when/if it is loaded */ void -randomdev_init_reader(u_int (*reader)(uint8_t *, u_int)) +randomdev_init_reader(void (*reader)(uint8_t *, u_int)) { read_func = reader; @@ -240,5 +240,10 @@ int read_random(void *buf, int count) { - return ((int)(*read_func)(buf, (u_int)count)); + if (count < 0) + return 0; + + read_func(buf, count); + + return count; } diff --git a/sys/dev/random/randomdev.h b/sys/dev/random/randomdev.h index 4daf735..4ca88ff 100644 --- a/sys/dev/random/randomdev.h +++ b/sys/dev/random/randomdev.h @@ -37,12 +37,12 @@ typedef void random_init_func_t(void); typedef void random_deinit_func_t(void); void randomdev_init_harvester(void (*)(const void *, u_int, u_int, enum random_entropy_source)); -void randomdev_init_reader(u_int (*)(uint8_t *, u_int)); +void randomdev_init_reader(void (*)(uint8_t *, u_int)); void randomdev_deinit_harvester(void); void randomdev_deinit_reader(void); /* Stub/fake routines for when no entropy processor is loaded */ -extern u_int dummy_random_read_phony(uint8_t *, u_int); +extern void dummy_random_read_phony(uint8_t *, u_int); /* kern.random sysctls */ #ifdef SYSCTL_DECL /* from sysctl.h */ diff --git a/sys/dev/sfxge/common/efx_ev.c b/sys/dev/sfxge/common/efx_ev.c index 515435b..783a047 100644 --- a/sys/dev/sfxge/common/efx_ev.c +++ b/sys/dev/sfxge/common/efx_ev.c @@ -89,7 +89,8 @@ efx_ev_rx_not_ok( if (EFX_QWORD_FIELD(*eqp, FSF_AZ_RX_EV_TOBE_DISC) != 0) { EFX_EV_QSTAT_INCR(eep, EV_RX_TOBE_DISC); EFSYS_PROBE(tobe_disc); - /* Assume this is a unicast address mismatch, unless below + /* + * Assume this is a unicast address mismatch, unless below * we find either FSF_AZ_RX_EV_ETH_CRC_ERR or * EV_RX_PAUSE_FRM_ERR is set. */ @@ -102,7 +103,8 @@ efx_ev_rx_not_ok( (*flagsp) |= EFX_DISCARD; #if (EFSYS_OPT_RX_HDR_SPLIT || EFSYS_OPT_RX_SCATTER) - /* Lookout for payload queue ran dry errors and ignore them. + /* + * Lookout for payload queue ran dry errors and ignore them. * * Sadly for the header/data split cases, the descriptor * pointer in this event refers to the header queue and diff --git a/sys/dev/sfxge/common/efx_mac.c b/sys/dev/sfxge/common/efx_mac.c index 5b868ed..3e9449a 100644 --- a/sys/dev/sfxge/common/efx_mac.c +++ b/sys/dev/sfxge/common/efx_mac.c @@ -669,11 +669,11 @@ chosen: EFSYS_ASSERT(emop != NULL); epp->ep_mac_type = type; - + if (emop->emo_reset != NULL) { if ((rc = emop->emo_reset(enp)) != 0) goto fail1; - + EFSYS_ASSERT(enp->en_reset_flags & EFX_RESET_MAC); enp->en_reset_flags &= ~EFX_RESET_MAC; } diff --git a/sys/dev/sfxge/common/efx_mcdi.c b/sys/dev/sfxge/common/efx_mcdi.c index 55297c0..5853b06 100644 --- a/sys/dev/sfxge/common/efx_mcdi.c +++ b/sys/dev/sfxge/common/efx_mcdi.c @@ -44,7 +44,8 @@ __FBSDID("$FreeBSD$"); #define MCDI_P1_REBOOT_OFST 0x1fe #define MCDI_P2_REBOOT_OFST 0x1ff -/* A reboot/assertion causes the MCDI status word to be set after the +/* + * A reboot/assertion causes the MCDI status word to be set after the * command word is set or a REBOOT event is sent. If we notice a reboot * via these mechanisms then wait 10ms for the status word to be set. */ @@ -459,7 +460,8 @@ efx_mcdi_ev_death( ++emip->emi_aborted; } - /* Since we're running in parallel with a request, consume the + /* + * Since we're running in parallel with a request, consume the * status word before dropping the lock. */ if (rc == EIO || rc == EINTR) { diff --git a/sys/dev/sfxge/common/efx_nic.c b/sys/dev/sfxge/common/efx_nic.c index fdbe5a2..2bb55d1 100644 --- a/sys/dev/sfxge/common/efx_nic.c +++ b/sys/dev/sfxge/common/efx_nic.c @@ -253,7 +253,8 @@ efx_nic_create( EFX_FEATURE_LFSR_HASH_INSERT | EFX_FEATURE_LINK_EVENTS | EFX_FEATURE_PERIODIC_MAC_STATS | EFX_FEATURE_WOL | EFX_FEATURE_MCDI | - EFX_FEATURE_LOOKAHEAD_SPLIT | EFX_FEATURE_MAC_HEADER_FILTERS; + EFX_FEATURE_LOOKAHEAD_SPLIT | + EFX_FEATURE_MAC_HEADER_FILTERS; break; #endif /* EFSYS_OPT_SIENA */ diff --git a/sys/dev/sfxge/common/efx_rx.c b/sys/dev/sfxge/common/efx_rx.c index 5b52d86..c6e0d70 100644 --- a/sys/dev/sfxge/common/efx_rx.c +++ b/sys/dev/sfxge/common/efx_rx.c @@ -527,7 +527,7 @@ efx_rx_filter_insert( EFSYS_ASSERT3P(spec, !=, NULL); spec->efs_dmaq_id = (uint16_t)erp->er_index; - return efx_filter_insert_filter(erp->er_enp, spec, B_FALSE); + return (efx_filter_insert_filter(erp->er_enp, spec, B_FALSE)); } #endif @@ -541,7 +541,7 @@ efx_rx_filter_remove( EFSYS_ASSERT3P(spec, !=, NULL); spec->efs_dmaq_id = (uint16_t)erp->er_index; - return efx_filter_remove_filter(erp->er_enp, spec); + return (efx_filter_remove_filter(erp->er_enp, spec)); } #endif @@ -673,7 +673,8 @@ efx_rx_qcreate( EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC); EFSYS_ASSERT3U(enp->en_mod_flags, &, EFX_MOD_RX); - EFX_STATIC_ASSERT(EFX_EV_RX_NLABELS == (1 << FRF_AZ_RX_DESCQ_LABEL_WIDTH)); + EFX_STATIC_ASSERT(EFX_EV_RX_NLABELS == + (1 << FRF_AZ_RX_DESCQ_LABEL_WIDTH)); EFSYS_ASSERT3U(label, <, EFX_EV_RX_NLABELS); EFSYS_ASSERT3U(enp->en_rx_qcount + 1, <, encp->enc_rxq_limit); diff --git a/sys/dev/sfxge/common/efx_tx.c b/sys/dev/sfxge/common/efx_tx.c index a7ec361..dcc225d 100644 --- a/sys/dev/sfxge/common/efx_tx.c +++ b/sys/dev/sfxge/common/efx_tx.c @@ -114,7 +114,7 @@ efx_tx_filter_insert( EFSYS_ASSERT3P(spec, !=, NULL); spec->efs_dmaq_id = (uint16_t)etp->et_index; - return efx_filter_insert_filter(etp->et_enp, spec, B_FALSE); + return (efx_filter_insert_filter(etp->et_enp, spec, B_FALSE)); } #endif @@ -128,7 +128,7 @@ efx_tx_filter_remove( EFSYS_ASSERT3P(spec, !=, NULL); spec->efs_dmaq_id = (uint16_t)etp->et_index; - return efx_filter_remove_filter(etp->et_enp, spec); + return (efx_filter_remove_filter(etp->et_enp, spec)); } #endif diff --git a/sys/dev/sfxge/common/siena_mon.c b/sys/dev/sfxge/common/siena_mon.c index de7b793..dc3e59e 100644 --- a/sys/dev/sfxge/common/siena_mon.c +++ b/sys/dev/sfxge/common/siena_mon.c @@ -138,13 +138,13 @@ siena_mon_decode_stats( efx_dword_t dword; EFSYS_MEM_READD(esmp, 4 * mc_sensor, &dword); emsvp->emsv_value = - (uint16_t)EFX_DWORD_FIELD( - dword, - MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_VALUE); + (uint16_t)EFX_DWORD_FIELD( + dword, + MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_VALUE); emsvp->emsv_state = - (uint16_t)EFX_DWORD_FIELD( - dword, - MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_STATE); + (uint16_t)EFX_DWORD_FIELD( + dword, + MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_STATE); } } diff --git a/sys/dev/sfxge/common/siena_nic.c b/sys/dev/sfxge/common/siena_nic.c index 857b78a..b7d42de 100644 --- a/sys/dev/sfxge/common/siena_nic.c +++ b/sys/dev/sfxge/common/siena_nic.c @@ -324,7 +324,8 @@ siena_board_cfg( efx_mcdi_execute(enp, &req); if (req.emr_rc == 0) { - if (req.emr_out_length_used < MC_CMD_GET_RESOURCE_LIMITS_OUT_LEN) { + if (req.emr_out_length_used < + MC_CMD_GET_RESOURCE_LIMITS_OUT_LEN) { rc = EMSGSIZE; goto fail3; } diff --git a/sys/dev/sfxge/common/siena_vpd.c b/sys/dev/sfxge/common/siena_vpd.c index cd643c8..3f1008b 100644 --- a/sys/dev/sfxge/common/siena_vpd.c +++ b/sys/dev/sfxge/common/siena_vpd.c @@ -541,11 +541,9 @@ siena_vpd_write( /* Copy in new vpd and update header */ vpd_offset = dcfg_size - vpd_length; - EFX_POPULATE_DWORD_1(dcfg->dynamic_vpd_offset, - EFX_DWORD_0, vpd_offset); + EFX_POPULATE_DWORD_1(dcfg->dynamic_vpd_offset, EFX_DWORD_0, vpd_offset); memcpy((caddr_t)dcfg + vpd_offset, data, vpd_length); - EFX_POPULATE_DWORD_1(dcfg->dynamic_vpd_length, - EFX_DWORD_0, vpd_length); + EFX_POPULATE_DWORD_1(dcfg->dynamic_vpd_length, EFX_DWORD_0, vpd_length); /* Update the checksum */ cksum = 0; diff --git a/sys/dev/sfxge/sfxge_port.c b/sys/dev/sfxge/sfxge_port.c index 6e21130..4953c92 100644 --- a/sys/dev/sfxge/sfxge_port.c +++ b/sys/dev/sfxge/sfxge_port.c @@ -91,16 +91,15 @@ sfxge_mac_stat_handler(SYSCTL_HANDLER_ARGS) struct sfxge_softc *sc = arg1; unsigned int id = arg2; int rc; + uint64_t val; SFXGE_PORT_LOCK(&sc->port); - if ((rc = sfxge_mac_stat_update(sc)) != 0) - goto out; - - rc = SYSCTL_OUT(req, - (uint64_t *)sc->port.mac_stats.decode_buf + id, - sizeof(uint64_t)); -out: + if ((rc = sfxge_mac_stat_update(sc)) == 0) + val = ((uint64_t *)sc->port.mac_stats.decode_buf)[id]; SFXGE_PORT_UNLOCK(&sc->port); + + if (rc == 0) + rc = SYSCTL_OUT(req, &val, sizeof(val)); return (rc); } @@ -173,28 +172,29 @@ sfxge_port_wanted_fc_handler(SYSCTL_HANDLER_ARGS) sc = arg1; port = &sc->port; - SFXGE_PORT_LOCK(port); - if (req->newptr != NULL) { if ((error = SYSCTL_IN(req, &fcntl, sizeof(fcntl))) != 0) - goto out; - - if (port->wanted_fc == fcntl) - goto out; + return (error); - port->wanted_fc = fcntl; + SFXGE_PORT_LOCK(port); - if (port->init_state != SFXGE_PORT_STARTED) - goto out; + if (port->wanted_fc != fcntl) { + if (port->init_state == SFXGE_PORT_STARTED) + error = efx_mac_fcntl_set(sc->enp, + port->wanted_fc, + B_TRUE); + if (error == 0) + port->wanted_fc = fcntl; + } - error = efx_mac_fcntl_set(sc->enp, port->wanted_fc, B_TRUE); + SFXGE_PORT_UNLOCK(port); } else { - error = SYSCTL_OUT(req, &port->wanted_fc, - sizeof(port->wanted_fc)); - } + SFXGE_PORT_LOCK(port); + fcntl = port->wanted_fc; + SFXGE_PORT_UNLOCK(port); -out: - SFXGE_PORT_UNLOCK(port); + error = SYSCTL_OUT(req, &fcntl, sizeof(fcntl)); + } return (error); } @@ -205,7 +205,6 @@ sfxge_port_link_fc_handler(SYSCTL_HANDLER_ARGS) struct sfxge_softc *sc; struct sfxge_port *port; unsigned int wanted_fc, link_fc; - int error; sc = arg1; port = &sc->port; @@ -215,10 +214,9 @@ sfxge_port_link_fc_handler(SYSCTL_HANDLER_ARGS) efx_mac_fcntl_get(sc->enp, &wanted_fc, &link_fc); else link_fc = 0; - error = SYSCTL_OUT(req, &link_fc, sizeof(link_fc)); SFXGE_PORT_UNLOCK(port); - return (error); + return (SYSCTL_OUT(req, &link_fc, sizeof(link_fc))); } #endif /* SFXGE_HAVE_PAUSE_MEDIAOPTS */ @@ -499,16 +497,15 @@ sfxge_phy_stat_handler(SYSCTL_HANDLER_ARGS) struct sfxge_softc *sc = arg1; unsigned int id = arg2; int rc; + uint32_t val; SFXGE_PORT_LOCK(&sc->port); - if ((rc = sfxge_phy_stat_update(sc)) != 0) - goto out; - - rc = SYSCTL_OUT(req, - (uint32_t *)sc->port.phy_stats.decode_buf + id, - sizeof(uint32_t)); -out: + if ((rc = sfxge_phy_stat_update(sc)) == 0) + val = ((uint32_t *)sc->port.phy_stats.decode_buf)[id]; SFXGE_PORT_UNLOCK(&sc->port); + + if (rc == 0) + rc = SYSCTL_OUT(req, &val, sizeof(val)); return (rc); } diff --git a/sys/dev/sfxge/sfxge_rx.c b/sys/dev/sfxge/sfxge_rx.c index 0a4b803..23101a5 100644 --- a/sys/dev/sfxge/sfxge_rx.c +++ b/sys/dev/sfxge/sfxge_rx.c @@ -92,8 +92,8 @@ static int lro_loss_packets = 20; #define SFXGE_LRO_CONN_IS_TCPIPV4(c) (!((c)->l2_id & SFXGE_LRO_L2_ID_IPV6)) /* Compare IPv6 addresses, avoiding conditional branches */ -static __inline unsigned long ipv6_addr_cmp(const struct in6_addr *left, - const struct in6_addr *right) +static unsigned long ipv6_addr_cmp(const struct in6_addr *left, + const struct in6_addr *right) { #if LONG_BIT == 64 const uint64_t *left64 = (const uint64_t *)left; @@ -167,7 +167,7 @@ sfxge_rx_schedule_refill(struct sfxge_rxq *rxq, boolean_t retrying) sfxge_rx_post_refill, rxq); } -static inline struct mbuf *sfxge_rx_alloc_mbuf(struct sfxge_softc *sc) +static struct mbuf *sfxge_rx_alloc_mbuf(struct sfxge_softc *sc) { struct mb_args args; struct mbuf *m; diff --git a/sys/dev/sfxge/sfxge_tx.c b/sys/dev/sfxge/sfxge_tx.c index 394a751..d726dac 100644 --- a/sys/dev/sfxge/sfxge_tx.c +++ b/sys/dev/sfxge/sfxge_tx.c @@ -107,7 +107,7 @@ SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_put_max, CTLFLAG_RDTUN, /* Forward declarations. */ -static inline void sfxge_tx_qdpl_service(struct sfxge_txq *txq); +static void sfxge_tx_qdpl_service(struct sfxge_txq *txq); static void sfxge_tx_qlist_post(struct sfxge_txq *txq); static void sfxge_tx_qunblock(struct sfxge_txq *txq); static int sfxge_tx_queue_tso(struct sfxge_txq *txq, struct mbuf *mbuf, @@ -156,7 +156,7 @@ sfxge_tx_qcomplete(struct sfxge_txq *txq, struct sfxge_evq *evq) #ifdef SFXGE_HAVE_MQ -static inline unsigned int +static unsigned int sfxge_is_mbuf_non_tcp(struct mbuf *mbuf) { /* Absense of TCP checksum flags does not mean that it is non-TCP @@ -481,7 +481,7 @@ sfxge_tx_qdpl_drain(struct sfxge_txq *txq) * * NOTE: drops the txq mutex! */ -static inline void +static void sfxge_tx_qdpl_service(struct sfxge_txq *txq) { SFXGE_TXQ_LOCK_ASSERT_OWNED(txq); @@ -509,7 +509,7 @@ sfxge_tx_qdpl_service(struct sfxge_txq *txq) * overload the csum_data field in the mbuf to keep track of this length * because there is no cheap alternative to avoid races. */ -static inline int +static int sfxge_tx_qdpl_put(struct sfxge_txq *txq, struct mbuf *mbuf, int locked) { struct sfxge_tx_dpl *stdp; @@ -649,7 +649,7 @@ sfxge_if_qflush(struct ifnet *ifp) sc = ifp->if_softc; - for (i = 0; i < SFXGE_TX_SCALE(sc); i++) + for (i = 0; i < SFXGE_TXQ_IP_TCP_UDP_CKSUM + SFXGE_TX_SCALE(sc); i++) sfxge_tx_qdpl_flush(sc->txq[i]); } @@ -758,7 +758,7 @@ void sfxge_if_start(struct ifnet *ifp) SFXGE_TXQ_UNLOCK(sc->txq[0]); } -static inline void +static void sfxge_tx_qdpl_service(struct sfxge_txq *txq) { struct ifnet *ifp = txq->sc->ifnet; @@ -783,7 +783,6 @@ struct sfxge_tso_state { unsigned packet_space; /* Remaining space in current packet */ /* Input position */ - unsigned dma_seg_i; /* Current DMA segment number */ uint64_t dma_addr; /* DMA address of current position */ unsigned in_len; /* Remaining length in current mbuf */ @@ -792,23 +791,21 @@ struct sfxge_tso_state { ssize_t nh_off; /* Offset of network header */ ssize_t tcph_off; /* Offset of TCP header */ unsigned header_len; /* Number of bytes of header */ - int full_packet_size; /* Number of bytes to put in each outgoing - * segment */ }; -static inline const struct ip *tso_iph(const struct sfxge_tso_state *tso) +static const struct ip *tso_iph(const struct sfxge_tso_state *tso) { KASSERT(tso->protocol == htons(ETHERTYPE_IP), ("tso_iph() in non-IPv4 state")); return (const struct ip *)(tso->mbuf->m_data + tso->nh_off); } -static inline const struct ip6_hdr *tso_ip6h(const struct sfxge_tso_state *tso) +static __unused const struct ip6_hdr *tso_ip6h(const struct sfxge_tso_state *tso) { KASSERT(tso->protocol == htons(ETHERTYPE_IPV6), ("tso_ip6h() in non-IPv6 state")); return (const struct ip6_hdr *)(tso->mbuf->m_data + tso->nh_off); } -static inline const struct tcphdr *tso_tcph(const struct sfxge_tso_state *tso) +static const struct tcphdr *tso_tcph(const struct sfxge_tso_state *tso) { return (const struct tcphdr *)(tso->mbuf->m_data + tso->tcph_off); } @@ -895,7 +892,6 @@ static void tso_start(struct sfxge_tso_state *tso, struct mbuf *mbuf) } tso->header_len = tso->tcph_off + 4 * tso_tcph(tso)->th_off; - tso->full_packet_size = tso->header_len + mbuf->m_pkthdr.tso_segsz; tso->seqnum = ntohl(tso_tcph(tso)->th_seq); @@ -1015,7 +1011,8 @@ static int tso_start_new_packet(struct sfxge_txq *txq, tso->seqnum += tso->mbuf->m_pkthdr.tso_segsz; if (tso->out_len > tso->mbuf->m_pkthdr.tso_segsz) { /* This packet will not finish the TSO burst. */ - ip_length = tso->full_packet_size - tso->nh_off; + ip_length = tso->header_len - tso->nh_off + + tso->mbuf->m_pkthdr.tso_segsz; tsoh_th->th_flags &= ~(TH_FIN | TH_PUSH); } else { /* This packet will be the last in the TSO burst. */ @@ -1280,7 +1277,6 @@ fail: void sfxge_tx_stop(struct sfxge_softc *sc) { - const efx_nic_cfg_t *encp; int index; index = SFXGE_TX_SCALE(sc); @@ -1289,7 +1285,6 @@ sfxge_tx_stop(struct sfxge_softc *sc) sfxge_tx_qstop(sc, SFXGE_TXQ_IP_CKSUM); - encp = efx_nic_cfg_get(sc->enp); sfxge_tx_qstop(sc, SFXGE_TXQ_NON_CKSUM); /* Tear down the transmit module */ diff --git a/sys/dev/usb/controller/musb_otg.c b/sys/dev/usb/controller/musb_otg.c index a751412..eba8c65 100644 --- a/sys/dev/usb/controller/musb_otg.c +++ b/sys/dev/usb/controller/musb_otg.c @@ -2258,7 +2258,8 @@ repeat: if (usb_status & (MUSB2_MASK_IRESET | MUSB2_MASK_IRESUME | MUSB2_MASK_ISUSP | - MUSB2_MASK_ICONN | MUSB2_MASK_IDISC)) { + MUSB2_MASK_ICONN | MUSB2_MASK_IDISC | + MUSB2_MASK_IVBUSERR)) { DPRINTFN(4, "real bus interrupt 0x%08x\n", usb_status); @@ -2330,6 +2331,12 @@ repeat: * always in reset state once device is connected. */ if (sc->sc_mode == MUSB2_HOST_MODE) { + /* check for VBUS error in USB host mode */ + if (usb_status & MUSB2_MASK_IVBUSERR) { + temp = MUSB2_READ_1(sc, MUSB2_REG_DEVCTL); + temp |= MUSB2_MASK_SESS; + MUSB2_WRITE_1(sc, MUSB2_REG_DEVCTL, temp); + } if (usb_status & MUSB2_MASK_ICONN) sc->sc_flags.status_bus_reset = 1; if (usb_status & MUSB2_MASK_IDISC) diff --git a/sys/dev/usb/controller/uhci.c b/sys/dev/usb/controller/uhci.c index 0077615..5204d63 100644 --- a/sys/dev/usb/controller/uhci.c +++ b/sys/dev/usb/controller/uhci.c @@ -1476,7 +1476,8 @@ uhci_interrupt(uhci_softc_t *sc) UHCI_STS_USBEI | UHCI_STS_RD | UHCI_STS_HSE | - UHCI_STS_HCPE); + UHCI_STS_HCPE | + UHCI_STS_HCH); if (status == 0) { /* nothing to acknowledge */ diff --git a/sys/dev/usb/usbdevs b/sys/dev/usb/usbdevs index a40c913..d1c7374 100644 --- a/sys/dev/usb/usbdevs +++ b/sys/dev/usb/usbdevs @@ -686,6 +686,7 @@ vendor ASUS2 0x1761 ASUS vendor SWEEX2 0x177f Sweex vendor METAGEEK 0x1781 MetaGeek vendor KAMSTRUP 0x17a8 Kamstrup A/S +vendor DISPLAYLINK 0x17e9 DisplayLink vendor LENOVO 0x17ef Lenovo vendor WAVESENSE 0x17f4 WaveSense vendor VAISALA 0x1843 Vaisala @@ -1655,6 +1656,28 @@ product DLINK2 RT3070_4 0x3c15 RT3070 product DLINK2 RT3070_5 0x3c16 RT3070 product DLINK3 DWM652 0x3e04 DWM-652 +/* DisplayLink products */ +product DISPLAYLINK LCD4300U 0x01ba LCD-4300U +product DISPLAYLINK LCD8000U 0x01bb LCD-8000U +product DISPLAYLINK LD220 0x0100 Samsung LD220 +product DISPLAYLINK GUC2020 0x0059 IOGEAR DVI GUC2020 +product DISPLAYLINK VCUD60 0x0136 Rextron DVI +product DISPLAYLINK CONV 0x0138 StarTech CONV-USB2DVI +product DISPLAYLINK DLDVI 0x0141 DisplayLink DVI +product DISPLAYLINK VGA10 0x015a CMP-USBVGA10 +product DISPLAYLINK WSDVI 0x0198 WS Tech DVI +product DISPLAYLINK EC008 0x019b EasyCAP008 DVI +product DISPLAYLINK HPDOCK 0x01d4 HP USB Docking +product DISPLAYLINK NL571 0x01d7 HP USB DVI +product DISPLAYLINK M01061 0x01e2 Lenovo DVI +product DISPLAYLINK SWDVI 0x024c SUNWEIT DVI +product DISPLAYLINK NBDOCK 0x0215 VideoHome NBdock1920 +product DISPLAYLINK LUM70 0x02a9 Lilliput UM-70 +product DISPLAYLINK UM7X0 0x401a nanovision MiMo +product DISPLAYLINK LT1421 0x03e0 Lenovo ThinkVision LT1421 +product DISPLAYLINK POLARIS2 0x0117 Polaris2 USB dock +product DISPLAYLINK PLUGABLE 0x0377 Plugable docking station + /* DMI products */ product DMI CFSM_RW 0xa109 CF/SM Reader/Writer product DMI DISK 0x2bcf Generic Disk diff --git a/sys/dev/usb/video/udl.c b/sys/dev/usb/video/udl.c new file mode 100644 index 0000000..71d6fff --- /dev/null +++ b/sys/dev/usb/video/udl.c @@ -0,0 +1,1075 @@ +/* $OpenBSD: udl.c,v 1.81 2014/12/09 07:05:06 doug Exp $ */ +/* $FreeBSD$ */ + +/*- + * Copyright (c) 2015 Hans Petter Selasky <hselasky@freebsd.org> + * Copyright (c) 2009 Marcus Glocker <mglocker@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Driver for the "DisplayLink DL-120 / DL-160" graphic chips based on + * the reversed engineered specifications of Florian Echtler + * <floe@butterbrot.org>: + * + * http://floe.butterbrot.org/displaylink/doku.php + */ + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/callout.h> +#include <sys/conf.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/module.h> +#include <sys/mutex.h> +#include <sys/condvar.h> +#include <sys/sysctl.h> +#include <sys/systm.h> +#include <sys/consio.h> +#include <sys/fbio.h> + +#include <dev/fb/fbreg.h> +#include <dev/syscons/syscons.h> + +#include <dev/videomode/videomode.h> +#include <dev/videomode/edidvar.h> + +#include <dev/usb/usb.h> +#include <dev/usb/usbdi.h> +#include <dev/usb/usbdi_util.h> +#include "usbdevs.h" + +#include <dev/usb/video/udl.h> + +#include "fb_if.h" + +#undef DPRINTF +#undef DPRINTFN +#define USB_DEBUG_VAR udl_debug +#include <dev/usb/usb_debug.h> + +#ifdef USB_DEBUG +static int udl_debug = 0; + +static SYSCTL_NODE(_hw_usb, OID_AUTO, udl, CTLFLAG_RW, 0, "USB UDL"); + +SYSCTL_INT(_hw_usb_udl, OID_AUTO, debug, CTLFLAG_RWTUN, + &udl_debug, 0, "Debug level"); +#endif + +/* + * Prototypes. + */ +static usb_callback_t udl_bulk_write_callback; + +static device_probe_t udl_probe; +static device_attach_t udl_attach; +static device_detach_t udl_detach; +static fb_getinfo_t udl_fb_getinfo; +static fb_setblankmode_t udl_fb_setblankmode; + +static void udl_select_chip(struct udl_softc *, struct usb_attach_arg *); +static int udl_init_chip(struct udl_softc *); +static void udl_select_mode(struct udl_softc *); +static int udl_init_resolution(struct udl_softc *); +static void udl_fbmem_alloc(struct udl_softc *); +static int udl_cmd_write_buf_le16(struct udl_softc *, const uint8_t *, uint32_t, uint8_t, int); +static int udl_cmd_buf_copy_le16(struct udl_softc *, uint32_t, uint32_t, uint8_t, int); +static void udl_cmd_insert_int_1(struct udl_cmd_buf *, uint8_t); +static void udl_cmd_insert_int_3(struct udl_cmd_buf *, uint32_t); +static void udl_cmd_insert_buf_le16(struct udl_cmd_buf *, const uint8_t *, uint32_t); +static void udl_cmd_write_reg_1(struct udl_cmd_buf *, uint8_t, uint8_t); +static void udl_cmd_write_reg_3(struct udl_cmd_buf *, uint8_t, uint32_t); +static int udl_power_save(struct udl_softc *, int, int); + +static const struct usb_config udl_config[UDL_N_TRANSFER] = { + [UDL_BULK_WRITE_0] = { + .type = UE_BULK, + .endpoint = UE_ADDR_ANY, + .direction = UE_DIR_TX, + .flags = {.pipe_bof = 1,.force_short_xfer = 1,.ext_buffer = 1,}, + .bufsize = UDL_CMD_MAX_DATA_SIZE * UDL_CMD_MAX_FRAMES, + .callback = &udl_bulk_write_callback, + .frames = UDL_CMD_MAX_FRAMES, + .timeout = 5000, /* 5 seconds */ + }, + [UDL_BULK_WRITE_1] = { + .type = UE_BULK, + .endpoint = UE_ADDR_ANY, + .direction = UE_DIR_TX, + .flags = {.pipe_bof = 1,.force_short_xfer = 1,.ext_buffer = 1,}, + .bufsize = UDL_CMD_MAX_DATA_SIZE * UDL_CMD_MAX_FRAMES, + .callback = &udl_bulk_write_callback, + .frames = UDL_CMD_MAX_FRAMES, + .timeout = 5000, /* 5 seconds */ + }, +}; + +/* + * Driver glue. + */ +static devclass_t udl_devclass; + +static device_method_t udl_methods[] = { + DEVMETHOD(device_probe, udl_probe), + DEVMETHOD(device_attach, udl_attach), + DEVMETHOD(device_detach, udl_detach), + DEVMETHOD(fb_getinfo, udl_fb_getinfo), + DEVMETHOD_END +}; + +static driver_t udl_driver = { + .name = "udl", + .methods = udl_methods, + .size = sizeof(struct udl_softc), +}; + +DRIVER_MODULE(udl, uhub, udl_driver, udl_devclass, NULL, NULL); +MODULE_DEPEND(udl, usb, 1, 1, 1); +MODULE_DEPEND(udl, fbd, 1, 1, 1); +MODULE_DEPEND(udl, videomode, 1, 1, 1); +MODULE_VERSION(udl, 1); + +/* + * Matching devices. + */ +static const STRUCT_USB_HOST_ID udl_devs[] = { + {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LCD4300U, DL120)}, + {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LCD8000U, DL120)}, + {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_GUC2020, DL160)}, + {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LD220, DL165)}, + {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_VCUD60, DL160)}, + {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_DLDVI, DL160)}, + {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_VGA10, DL120)}, + {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_WSDVI, DLUNK)}, + {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_EC008, DL160)}, + {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_HPDOCK, DL160)}, + {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_NL571, DL160)}, + {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_M01061, DL195)}, + {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_NBDOCK, DL165)}, + {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_SWDVI, DLUNK)}, + {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_UM7X0, DL120)}, + {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_CONV, DL160)}, + {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_PLUGABLE, DL160)}, + {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LUM70, DL125)}, + {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_POLARIS2, DLUNK)}, + {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LT1421, DLUNK)} +}; + +static uint32_t +udl_get_fb_size(struct udl_softc *sc) +{ + unsigned i = sc->sc_cur_mode; + + return ((uint32_t)udl_modes[i].hdisplay * + (uint32_t)udl_modes[i].vdisplay * 2); +} + +static uint32_t +udl_get_fb_width(struct udl_softc *sc) +{ + unsigned i = sc->sc_cur_mode; + + return (udl_modes[i].hdisplay); +} + +static uint32_t +udl_get_fb_height(struct udl_softc *sc) +{ + unsigned i = sc->sc_cur_mode; + + return (udl_modes[i].vdisplay); +} + +static uint32_t +udl_get_fb_hz(struct udl_softc *sc) +{ + unsigned i = sc->sc_cur_mode; + + return (udl_modes[i].hz); +} + +static void +udl_callout(void *arg) +{ + struct udl_softc *sc = arg; + const uint32_t max = udl_get_fb_size(sc); + + if (sc->sc_power_save == 0) { + if (sc->sc_sync_off >= max) + sc->sc_sync_off = 0; + usbd_transfer_start(sc->sc_xfer[UDL_BULK_WRITE_0]); + usbd_transfer_start(sc->sc_xfer[UDL_BULK_WRITE_1]); + } + callout_reset(&sc->sc_callout, hz / 5, &udl_callout, sc); +} + +static int +udl_probe(device_t dev) +{ + struct usb_attach_arg *uaa = device_get_ivars(dev); + + if (uaa->usb_mode != USB_MODE_HOST) + return (ENXIO); + if (uaa->info.bConfigIndex != 0) + return (ENXIO); + if (uaa->info.bIfaceIndex != 0) + return (ENXIO); + + return (usbd_lookup_id_by_uaa(udl_devs, sizeof(udl_devs), uaa)); +} + +static int +udl_attach(device_t dev) +{ + struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); + struct sysctl_oid *tree = device_get_sysctl_tree(dev); + struct udl_softc *sc = device_get_softc(dev); + struct usb_attach_arg *uaa = device_get_ivars(dev); + int error; + int i; + + device_set_usb_desc(dev); + + mtx_init(&sc->sc_mtx, "UDL lock", NULL, MTX_DEF); + cv_init(&sc->sc_cv, "UDLCV"); + callout_init_mtx(&sc->sc_callout, &sc->sc_mtx, 0); + sc->sc_udev = uaa->device; + + error = usbd_transfer_setup(uaa->device, &uaa->info.bIfaceIndex, + sc->sc_xfer, udl_config, UDL_N_TRANSFER, sc, &sc->sc_mtx); + + if (error) { + DPRINTF("usbd_transfer_setup error=%s\n", usbd_errstr(error)); + goto detach; + } + usbd_xfer_set_priv(sc->sc_xfer[UDL_BULK_WRITE_0], &sc->sc_xfer_head[0]); + usbd_xfer_set_priv(sc->sc_xfer[UDL_BULK_WRITE_1], &sc->sc_xfer_head[1]); + + TAILQ_INIT(&sc->sc_xfer_head[0]); + TAILQ_INIT(&sc->sc_xfer_head[1]); + TAILQ_INIT(&sc->sc_cmd_buf_free); + TAILQ_INIT(&sc->sc_cmd_buf_pending); + + sc->sc_def_chip = -1; + sc->sc_chip = USB_GET_DRIVER_INFO(uaa); + sc->sc_def_mode = -1; + sc->sc_cur_mode = UDL_MAX_MODES; + + /* Allow chip ID to be overwritten */ + SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "chipid_force", + CTLFLAG_RWTUN, &sc->sc_def_chip, 0, "chip ID"); + + /* Export current chip ID */ + SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "chipid", + CTLFLAG_RD, &sc->sc_chip, 0, "chip ID"); + + if (sc->sc_def_chip > -1 && sc->sc_def_chip <= DLMAX) { + device_printf(dev, "Forcing chip ID to 0x%04x\n", sc->sc_def_chip); + sc->sc_chip = sc->sc_def_chip; + } + /* + * The product might have more than one chip + */ + if (sc->sc_chip == DLUNK) + udl_select_chip(sc, uaa); + + for (i = 0; i != UDL_CMD_MAX_BUFFERS; i++) { + struct udl_cmd_buf *cb = &sc->sc_cmd_buf_temp[i]; + + TAILQ_INSERT_TAIL(&sc->sc_cmd_buf_free, cb, entry); + } + + /* + * Initialize chip. + */ + error = udl_init_chip(sc); + if (error != USB_ERR_NORMAL_COMPLETION) + goto detach; + + /* + * Select edid mode. + */ + udl_select_mode(sc); + + /* Allow default mode to be overwritten */ + SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "mode_force", + CTLFLAG_RWTUN, &sc->sc_def_mode, 0, "mode"); + + /* Export current mode */ + SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "mode", + CTLFLAG_RD, &sc->sc_cur_mode, 0, "mode"); + + i = sc->sc_def_mode; + if (i > -1 && i < UDL_MAX_MODES) { + if (udl_modes[i].chip <= sc->sc_chip) { + device_printf(dev, "Forcing mode to %d\n", i); + sc->sc_cur_mode = i; + } + } + /* Printout current mode */ + device_printf(dev, "Mode selected %dx%d @ %dHz\n", + (int)udl_get_fb_width(sc), + (int)udl_get_fb_height(sc), + (int)udl_get_fb_hz(sc)); + + udl_init_resolution(sc); + + /* Allocate frame buffer */ + udl_fbmem_alloc(sc); + + UDL_LOCK(sc); + udl_callout(sc); + UDL_UNLOCK(sc); + + sc->sc_fb_info.fb_name = device_get_nameunit(dev); + sc->sc_fb_info.fb_size = sc->sc_fb_size; + sc->sc_fb_info.fb_bpp = 16; + sc->sc_fb_info.fb_depth = 16; + sc->sc_fb_info.fb_width = udl_get_fb_width(sc); + sc->sc_fb_info.fb_height = udl_get_fb_height(sc); + sc->sc_fb_info.fb_stride = sc->sc_fb_info.fb_width * 2; + sc->sc_fb_info.fb_pbase = 0; + sc->sc_fb_info.fb_vbase = (uintptr_t)sc->sc_fb_addr; + sc->sc_fb_info.fb_priv = sc; + sc->sc_fb_info.setblankmode = &udl_fb_setblankmode; + + sc->sc_fbdev = device_add_child(dev, "fbd", -1); + if (sc->sc_fbdev == NULL) + goto detach; + if (device_probe_and_attach(sc->sc_fbdev) != 0) + goto detach; + + return (0); + +detach: + udl_detach(dev); + + return (ENXIO); +} + +static int +udl_detach(device_t dev) +{ + struct udl_softc *sc = device_get_softc(dev); + + if (sc->sc_fbdev != NULL) { + device_t bdev; + + bdev = sc->sc_fbdev; + sc->sc_fbdev = NULL; + device_detach(bdev); + device_delete_child(dev, bdev); + } + UDL_LOCK(sc); + sc->sc_gone = 1; + callout_stop(&sc->sc_callout); + UDL_UNLOCK(sc); + + usbd_transfer_unsetup(sc->sc_xfer, UDL_N_TRANSFER); + + callout_drain(&sc->sc_callout); + + mtx_destroy(&sc->sc_mtx); + cv_destroy(&sc->sc_cv); + + /* + * Free framebuffer memory, if any. + */ + free(sc->sc_fb_addr, M_DEVBUF); + free(sc->sc_fb_copy, M_DEVBUF); + + return (0); +} + +static struct fb_info * +udl_fb_getinfo(device_t dev) +{ + struct udl_softc *sc = device_get_softc(dev); + + return (&sc->sc_fb_info); +} + +static int +udl_fb_setblankmode(void *arg, int mode) +{ + struct udl_softc *sc = arg; + + switch (mode) { + case V_DISPLAY_ON: + udl_power_save(sc, 1, M_WAITOK); + break; + case V_DISPLAY_BLANK: + udl_power_save(sc, 1, M_WAITOK); + if (sc->sc_fb_addr != 0) { + const uint32_t max = udl_get_fb_size(sc); + + memset((void *)sc->sc_fb_addr, 0, max); + } + break; + case V_DISPLAY_STAND_BY: + case V_DISPLAY_SUSPEND: + udl_power_save(sc, 0, M_WAITOK); + break; + } + return (0); +} + +static struct udl_cmd_buf * +udl_cmd_buf_alloc_locked(struct udl_softc *sc, int flags) +{ + struct udl_cmd_buf *cb; + + while ((cb = TAILQ_FIRST(&sc->sc_cmd_buf_free)) == NULL) { + if (flags != M_WAITOK) + break; + cv_wait(&sc->sc_cv, &sc->sc_mtx); + } + if (cb != NULL) { + TAILQ_REMOVE(&sc->sc_cmd_buf_free, cb, entry); + cb->off = 0; + } + return (cb); +} + +static struct udl_cmd_buf * +udl_cmd_buf_alloc(struct udl_softc *sc, int flags) +{ + struct udl_cmd_buf *cb; + + UDL_LOCK(sc); + cb = udl_cmd_buf_alloc_locked(sc, flags); + UDL_UNLOCK(sc); + return (cb); +} + +static void +udl_cmd_buf_send(struct udl_softc *sc, struct udl_cmd_buf *cb) +{ + UDL_LOCK(sc); + if (sc->sc_gone) { + TAILQ_INSERT_TAIL(&sc->sc_cmd_buf_free, cb, entry); + } else { + /* mark end of command stack */ + udl_cmd_insert_int_1(cb, UDL_BULK_SOC); + udl_cmd_insert_int_1(cb, UDL_BULK_CMD_EOC); + + TAILQ_INSERT_TAIL(&sc->sc_cmd_buf_pending, cb, entry); + usbd_transfer_start(sc->sc_xfer[UDL_BULK_WRITE_0]); + usbd_transfer_start(sc->sc_xfer[UDL_BULK_WRITE_1]); + } + UDL_UNLOCK(sc); +} + +static struct udl_cmd_buf * +udl_fb_synchronize_locked(struct udl_softc *sc) +{ + const uint32_t max = udl_get_fb_size(sc); + + /* check if framebuffer is not ready */ + if (sc->sc_fb_addr == NULL || + sc->sc_fb_copy == NULL) + return (NULL); + + while (sc->sc_sync_off < max) { + uint32_t delta = max - sc->sc_sync_off; + + if (delta > UDL_CMD_MAX_PIXEL_COUNT * 2) + delta = UDL_CMD_MAX_PIXEL_COUNT * 2; + if (bcmp(sc->sc_fb_addr + sc->sc_sync_off, sc->sc_fb_copy + sc->sc_sync_off, delta) != 0) { + struct udl_cmd_buf *cb; + + cb = udl_cmd_buf_alloc_locked(sc, M_NOWAIT); + if (cb == NULL) + goto done; + memcpy(sc->sc_fb_copy + sc->sc_sync_off, + sc->sc_fb_addr + sc->sc_sync_off, delta); + udl_cmd_insert_int_1(cb, UDL_BULK_SOC); + udl_cmd_insert_int_1(cb, UDL_BULK_CMD_FB_WRITE | UDL_BULK_CMD_FB_WORD); + udl_cmd_insert_int_3(cb, sc->sc_sync_off); + udl_cmd_insert_int_1(cb, delta / 2); + udl_cmd_insert_buf_le16(cb, sc->sc_fb_copy + sc->sc_sync_off, delta); + sc->sc_sync_off += delta; + return (cb); + } else { + sc->sc_sync_off += delta; + } + } +done: + return (NULL); +} + +static void +udl_bulk_write_callback(struct usb_xfer *xfer, usb_error_t error) +{ + struct udl_softc *sc = usbd_xfer_softc(xfer); + struct udl_cmd_head *phead = usbd_xfer_get_priv(xfer); + struct udl_cmd_buf *cb; + unsigned i; + + switch (USB_GET_STATE(xfer)) { + case USB_ST_TRANSFERRED: + TAILQ_CONCAT(&sc->sc_cmd_buf_free, phead, entry); + case USB_ST_SETUP: +tr_setup: + for (i = 0; i != UDL_CMD_MAX_FRAMES; i++) { + cb = TAILQ_FIRST(&sc->sc_cmd_buf_pending); + if (cb == NULL) { + cb = udl_fb_synchronize_locked(sc); + if (cb == NULL) + break; + } else { + TAILQ_REMOVE(&sc->sc_cmd_buf_pending, cb, entry); + } + TAILQ_INSERT_TAIL(phead, cb, entry); + usbd_xfer_set_frame_data(xfer, i, cb->buf, cb->off); + } + if (i != 0) { + usbd_xfer_set_frames(xfer, i); + usbd_transfer_submit(xfer); + } + break; + default: + TAILQ_CONCAT(&sc->sc_cmd_buf_free, phead, entry); + if (error != USB_ERR_CANCELLED) { + /* try clear stall first */ + usbd_xfer_set_stall(xfer); + goto tr_setup; + } + break; + } + /* wakeup any waiters */ + cv_signal(&sc->sc_cv); +} + +static int +udl_power_save(struct udl_softc *sc, int on, int flags) +{ + struct udl_cmd_buf *cb; + + /* get new buffer */ + cb = udl_cmd_buf_alloc(sc, flags); + if (cb == NULL) + return (EAGAIN); + + DPRINTF("screen %s\n", on ? "ON" : "OFF"); + + sc->sc_power_save = on ? 0 : 1; + + if (on) + udl_cmd_write_reg_1(cb, UDL_REG_SCREEN, UDL_REG_SCREEN_ON); + else + udl_cmd_write_reg_1(cb, UDL_REG_SCREEN, UDL_REG_SCREEN_OFF); + + udl_cmd_write_reg_1(cb, UDL_REG_SYNC, 0xff); + udl_cmd_buf_send(sc, cb); + return (0); +} + +static int +udl_ctrl_msg(struct udl_softc *sc, uint8_t rt, uint8_t r, + uint16_t index, uint16_t value, uint8_t *buf, size_t len) +{ + usb_device_request_t req; + int error; + + req.bmRequestType = rt; + req.bRequest = r; + USETW(req.wIndex, index); + USETW(req.wValue, value); + USETW(req.wLength, len); + + error = usbd_do_request_flags(sc->sc_udev, NULL, + &req, buf, 0, NULL, USB_DEFAULT_TIMEOUT); + + DPRINTF("%s\n", usbd_errstr(error)); + + return (error); +} + +static int +udl_poll(struct udl_softc *sc, uint32_t *buf) +{ + uint32_t lbuf; + int error; + + error = udl_ctrl_msg(sc, UT_READ_VENDOR_DEVICE, + UDL_CTRL_CMD_POLL, 0x0000, 0x0000, (uint8_t *)&lbuf, sizeof(lbuf)); + if (error == USB_ERR_NORMAL_COMPLETION) + *buf = le32toh(lbuf); + return (error); +} + +static int +udl_read_1(struct udl_softc *sc, uint16_t addr, uint8_t *buf) +{ + uint8_t lbuf[1]; + int error; + + error = udl_ctrl_msg(sc, UT_READ_VENDOR_DEVICE, + UDL_CTRL_CMD_READ_1, addr, 0x0000, lbuf, 1); + if (error == USB_ERR_NORMAL_COMPLETION) + *buf = *(uint8_t *)lbuf; + return (error); +} + +static int +udl_write_1(struct udl_softc *sc, uint16_t addr, uint8_t buf) +{ + int error; + + error = udl_ctrl_msg(sc, UT_WRITE_VENDOR_DEVICE, + UDL_CTRL_CMD_WRITE_1, addr, 0x0000, &buf, 1); + return (error); +} + +static int +udl_read_edid(struct udl_softc *sc, uint8_t *buf) +{ + uint8_t lbuf[64]; + uint16_t offset; + int error; + + offset = 0; + + error = udl_ctrl_msg(sc, UT_READ_VENDOR_DEVICE, + UDL_CTRL_CMD_READ_EDID, 0x00a1, (offset << 8), lbuf, 64); + if (error != USB_ERR_NORMAL_COMPLETION) + goto fail; + bcopy(lbuf + 1, buf + offset, 63); + offset += 63; + + error = udl_ctrl_msg(sc, UT_READ_VENDOR_DEVICE, + UDL_CTRL_CMD_READ_EDID, 0x00a1, (offset << 8), lbuf, 64); + if (error != USB_ERR_NORMAL_COMPLETION) + goto fail; + bcopy(lbuf + 1, buf + offset, 63); + offset += 63; + + error = udl_ctrl_msg(sc, UT_READ_VENDOR_DEVICE, + UDL_CTRL_CMD_READ_EDID, 0x00a1, (offset << 8), lbuf, 3); + if (error != USB_ERR_NORMAL_COMPLETION) + goto fail; + bcopy(lbuf + 1, buf + offset, 2); +fail: + return (error); +} + +static uint8_t +udl_lookup_mode(uint16_t hdisplay, uint16_t vdisplay, uint8_t hz, + uint16_t chip, uint32_t clock) +{ + uint8_t idx; + + /* + * Check first if we have a matching mode with pixelclock + */ + for (idx = 0; idx != UDL_MAX_MODES; idx++) { + if ((udl_modes[idx].hdisplay == hdisplay) && + (udl_modes[idx].vdisplay == vdisplay) && + (udl_modes[idx].clock == clock) && + (udl_modes[idx].chip <= chip)) { + return (idx); + } + } + + /* + * If not, check for matching mode with update frequency + */ + for (idx = 0; idx != UDL_MAX_MODES; idx++) { + if ((udl_modes[idx].hdisplay == hdisplay) && + (udl_modes[idx].vdisplay == vdisplay) && + (udl_modes[idx].hz == hz) && + (udl_modes[idx].chip <= chip)) { + return (idx); + } + } + return (idx); +} + +static void +udl_select_chip(struct udl_softc *sc, struct usb_attach_arg *uaa) +{ + const char *pserial; + + pserial = usb_get_serial(uaa->device); + + sc->sc_chip = DL120; + + if ((uaa->info.idVendor == USB_VENDOR_DISPLAYLINK) && + (uaa->info.idProduct == USB_PRODUCT_DISPLAYLINK_WSDVI)) { + + /* + * WS Tech DVI is DL120 or DL160. All deviced uses the + * same revision (0.04) so iSerialNumber must be used + * to determin which chip it is. + */ + + if (strlen(pserial) > 7) { + if (strncmp(pserial, "0198-13", 7) == 0) + sc->sc_chip = DL160; + } + DPRINTF("iSerialNumber (%s) used to select chip (%d)\n", + pserial, sc->sc_chip); + } + if ((uaa->info.idVendor == USB_VENDOR_DISPLAYLINK) && + (uaa->info.idProduct == USB_PRODUCT_DISPLAYLINK_SWDVI)) { + + /* + * SUNWEIT DVI is DL160, DL125, DL165 or DL195. Major revision + * can be used to differ between DL1x0 and DL1x5. Minor to + * differ between DL1x5. iSerialNumber seems not to be uniqe. + */ + + sc->sc_chip = DL160; + + if (uaa->info.bcdDevice >= 0x100) { + sc->sc_chip = DL165; + if (uaa->info.bcdDevice == 0x104) + sc->sc_chip = DL195; + if (uaa->info.bcdDevice == 0x108) + sc->sc_chip = DL125; + } + DPRINTF("bcdDevice (%02x) used to select chip (%d)\n", + uaa->info.bcdDevice, sc->sc_chip); + } +} + +static int +udl_set_enc_key(struct udl_softc *sc, uint8_t *buf, uint8_t len) +{ + int error; + + error = udl_ctrl_msg(sc, UT_WRITE_VENDOR_DEVICE, + UDL_CTRL_CMD_SET_KEY, 0x0000, 0x0000, buf, len); + return (error); +} + +static void +udl_fbmem_alloc(struct udl_softc *sc) +{ + uint32_t size; + + size = udl_get_fb_size(sc); + size = round_page(size); + + sc->sc_fb_addr = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO); + sc->sc_fb_copy = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO); + sc->sc_fb_size = size; +} + +static void +udl_cmd_insert_int_1(struct udl_cmd_buf *cb, uint8_t value) +{ + + cb->buf[cb->off] = value; + cb->off += 1; +} + +#if 0 +static void +udl_cmd_insert_int_2(struct udl_cmd_buf *cb, uint16_t value) +{ + uint16_t lvalue; + + lvalue = htobe16(value); + bcopy(&lvalue, cb->buf + cb->off, 2); + + cb->off += 2; +} + +#endif + +static void +udl_cmd_insert_int_3(struct udl_cmd_buf *cb, uint32_t value) +{ + uint32_t lvalue; + +#if BYTE_ORDER == BIG_ENDIAN + lvalue = htobe32(value) << 8; +#else + lvalue = htobe32(value) >> 8; +#endif + bcopy(&lvalue, cb->buf + cb->off, 3); + + cb->off += 3; +} + +#if 0 +static void +udl_cmd_insert_int_4(struct udl_cmd_buf *cb, uint32_t value) +{ + uint32_t lvalue; + + lvalue = htobe32(value); + bcopy(&lvalue, cb->buf + cb->off, 4); + + cb->off += 4; +} + +#endif + +static void +udl_cmd_insert_buf_le16(struct udl_cmd_buf *cb, const uint8_t *buf, uint32_t len) +{ + uint32_t x; + + for (x = 0; x != len; x += 2) { + /* byte swap from little endian to big endian */ + cb->buf[cb->off + x + 0] = buf[x + 1]; + cb->buf[cb->off + x + 1] = buf[x + 0]; + } + cb->off += len; +} + +static void +udl_cmd_write_reg_1(struct udl_cmd_buf *cb, uint8_t reg, uint8_t val) +{ + + udl_cmd_insert_int_1(cb, UDL_BULK_SOC); + udl_cmd_insert_int_1(cb, UDL_BULK_CMD_REG_WRITE_1); + udl_cmd_insert_int_1(cb, reg); + udl_cmd_insert_int_1(cb, val); +} + +static void +udl_cmd_write_reg_3(struct udl_cmd_buf *cb, uint8_t reg, uint32_t val) +{ + + udl_cmd_write_reg_1(cb, reg + 0, (val >> 16) & 0xff); + udl_cmd_write_reg_1(cb, reg + 1, (val >> 8) & 0xff); + udl_cmd_write_reg_1(cb, reg + 2, (val >> 0) & 0xff); +} + +static int +udl_init_chip(struct udl_softc *sc) +{ + uint32_t ui32; + uint8_t ui8; + int error; + + error = udl_poll(sc, &ui32); + if (error != USB_ERR_NORMAL_COMPLETION) + return (error); + DPRINTF("poll=0x%08x\n", ui32); + + /* Some products may use later chip too */ + switch (ui32 & 0xff) { + case 0xf1: /* DL1x5 */ + switch (sc->sc_chip) { + case DL120: + sc->sc_chip = DL125; + break; + case DL160: + sc->sc_chip = DL165; + break; + } + break; + } + DPRINTF("chip 0x%04x\n", sc->sc_chip); + + error = udl_read_1(sc, 0xc484, &ui8); + if (error != USB_ERR_NORMAL_COMPLETION) + return (error); + DPRINTF("read 0x%02x from 0xc484\n", ui8); + + error = udl_write_1(sc, 0xc41f, 0x01); + if (error != USB_ERR_NORMAL_COMPLETION) + return (error); + DPRINTF("write 0x01 to 0xc41f\n"); + + error = udl_read_edid(sc, sc->sc_edid); + if (error != USB_ERR_NORMAL_COMPLETION) + return (error); + DPRINTF("read EDID\n"); + + error = udl_set_enc_key(sc, __DECONST(void *, udl_null_key_1), + sizeof(udl_null_key_1)); + if (error != USB_ERR_NORMAL_COMPLETION) + return (error); + DPRINTF("set encryption key\n"); + + error = udl_write_1(sc, 0xc40b, 0x00); + if (error != USB_ERR_NORMAL_COMPLETION) + return (error); + DPRINTF("write 0x00 to 0xc40b\n"); + + return (USB_ERR_NORMAL_COMPLETION); +} + +static void +udl_init_fb_offsets(struct udl_cmd_buf *cb, uint32_t start16, uint32_t stride16, + uint32_t start8, uint32_t stride8) +{ + udl_cmd_write_reg_1(cb, UDL_REG_SYNC, 0x00); + udl_cmd_write_reg_3(cb, UDL_REG_ADDR_START16, start16); + udl_cmd_write_reg_3(cb, UDL_REG_ADDR_STRIDE16, stride16); + udl_cmd_write_reg_3(cb, UDL_REG_ADDR_START8, start8); + udl_cmd_write_reg_3(cb, UDL_REG_ADDR_STRIDE8, stride8); + udl_cmd_write_reg_1(cb, UDL_REG_SYNC, 0xff); +} + +static int +udl_init_resolution(struct udl_softc *sc) +{ + const uint32_t max = udl_get_fb_size(sc); + const uint8_t *buf = udl_modes[sc->sc_cur_mode].mode; + struct udl_cmd_buf *cb; + uint32_t delta; + uint32_t i; + int error; + + /* get new buffer */ + cb = udl_cmd_buf_alloc(sc, M_WAITOK); + if (cb == NULL) + return (EAGAIN); + + /* write resolution values and set video memory offsets */ + udl_cmd_write_reg_1(cb, UDL_REG_SYNC, 0x00); + for (i = 0; i < UDL_MODE_SIZE; i++) + udl_cmd_write_reg_1(cb, i, buf[i]); + udl_cmd_write_reg_1(cb, UDL_REG_SYNC, 0xff); + + udl_init_fb_offsets(cb, 0x000000, 0x000a00, 0x555555, 0x000500); + udl_cmd_buf_send(sc, cb); + + /* fill screen with black color */ + for (i = 0; i < max; i += delta) { + static const uint8_t udl_black[UDL_CMD_MAX_PIXEL_COUNT * 2] __aligned(4); + + delta = max - i; + if (delta > UDL_CMD_MAX_PIXEL_COUNT * 2) + delta = UDL_CMD_MAX_PIXEL_COUNT * 2; + if (i == 0) + error = udl_cmd_write_buf_le16(sc, udl_black, i, delta / 2, M_WAITOK); + else + error = udl_cmd_buf_copy_le16(sc, 0, i, delta / 2, M_WAITOK); + if (error) + return (error); + } + + /* get new buffer */ + cb = udl_cmd_buf_alloc(sc, M_WAITOK); + if (cb == NULL) + return (EAGAIN); + + /* show framebuffer content */ + udl_cmd_write_reg_1(cb, UDL_REG_SCREEN, UDL_REG_SCREEN_ON); + udl_cmd_write_reg_1(cb, UDL_REG_SYNC, 0xff); + udl_cmd_buf_send(sc, cb); + return (0); +} + +static void +udl_select_mode(struct udl_softc *sc) +{ + struct udl_mode mode; + int index = UDL_MAX_MODES; + int i; + + /* try to get the preferred mode from EDID */ + edid_parse(sc->sc_edid, &sc->sc_edid_info); +#ifdef USB_DEBUG + edid_print(&sc->sc_edid_info); +#endif + if (sc->sc_edid_info.edid_preferred_mode != NULL) { + mode.hz = + (sc->sc_edid_info.edid_preferred_mode->dot_clock * 1000) / + (sc->sc_edid_info.edid_preferred_mode->htotal * + sc->sc_edid_info.edid_preferred_mode->vtotal); + mode.clock = + sc->sc_edid_info.edid_preferred_mode->dot_clock / 10; + mode.hdisplay = + sc->sc_edid_info.edid_preferred_mode->hdisplay; + mode.vdisplay = + sc->sc_edid_info.edid_preferred_mode->vdisplay; + index = udl_lookup_mode(mode.hdisplay, mode.vdisplay, mode.hz, + sc->sc_chip, mode.clock); + sc->sc_cur_mode = index; + } else { + DPRINTF("no preferred mode found!\n"); + } + + if (index == UDL_MAX_MODES) { + DPRINTF("no mode line found for %dx%d @ %dHz!\n", + mode.hdisplay, mode.vdisplay, mode.hz); + + i = 0; + while (i < sc->sc_edid_info.edid_nmodes) { + mode.hz = + (sc->sc_edid_info.edid_modes[i].dot_clock * 1000) / + (sc->sc_edid_info.edid_modes[i].htotal * + sc->sc_edid_info.edid_modes[i].vtotal); + mode.clock = + sc->sc_edid_info.edid_modes[i].dot_clock / 10; + mode.hdisplay = + sc->sc_edid_info.edid_modes[i].hdisplay; + mode.vdisplay = + sc->sc_edid_info.edid_modes[i].vdisplay; + index = udl_lookup_mode(mode.hdisplay, mode.vdisplay, + mode.hz, sc->sc_chip, mode.clock); + if (index < UDL_MAX_MODES) + if ((sc->sc_cur_mode == UDL_MAX_MODES) || + (index > sc->sc_cur_mode)) + sc->sc_cur_mode = index; + i++; + } + } + /* + * If no mode found use default. + */ + if (sc->sc_cur_mode == UDL_MAX_MODES) + sc->sc_cur_mode = udl_lookup_mode(800, 600, 60, sc->sc_chip, 0); +} + +static int +udl_cmd_write_buf_le16(struct udl_softc *sc, const uint8_t *buf, uint32_t off, + uint8_t pixels, int flags) +{ + struct udl_cmd_buf *cb; + + cb = udl_cmd_buf_alloc(sc, flags); + if (cb == NULL) + return (EAGAIN); + + udl_cmd_insert_int_1(cb, UDL_BULK_SOC); + udl_cmd_insert_int_1(cb, UDL_BULK_CMD_FB_WRITE | UDL_BULK_CMD_FB_WORD); + udl_cmd_insert_int_3(cb, off); + udl_cmd_insert_int_1(cb, pixels); + udl_cmd_insert_buf_le16(cb, buf, 2 * pixels); + udl_cmd_buf_send(sc, cb); + + return (0); +} + +static int +udl_cmd_buf_copy_le16(struct udl_softc *sc, uint32_t src, uint32_t dst, + uint8_t pixels, int flags) +{ + struct udl_cmd_buf *cb; + + cb = udl_cmd_buf_alloc(sc, flags); + if (cb == NULL) + return (EAGAIN); + + udl_cmd_insert_int_1(cb, UDL_BULK_SOC); + udl_cmd_insert_int_1(cb, UDL_BULK_CMD_FB_COPY | UDL_BULK_CMD_FB_WORD); + udl_cmd_insert_int_3(cb, dst); + udl_cmd_insert_int_1(cb, pixels); + udl_cmd_insert_int_3(cb, src); + udl_cmd_buf_send(sc, cb); + + return (0); +} diff --git a/sys/dev/usb/video/udl.h b/sys/dev/usb/video/udl.h new file mode 100644 index 0000000..1fdae7a --- /dev/null +++ b/sys/dev/usb/video/udl.h @@ -0,0 +1,311 @@ +/* $OpenBSD: udl.h,v 1.21 2013/04/15 09:23:02 mglocker Exp $ */ +/* $FreeBSD$ */ + +/* + * Copyright (c) 2009 Marcus Glocker <mglocker@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _UDL_H_ +#define _UDL_H_ + +#include <sys/types.h> +#include <sys/queue.h> + +/* + * BULK command transfer structure. + */ +#define UDL_CMD_MAX_FRAMES 64 /* units */ +#define UDL_CMD_MAX_DATA_SIZE 512 /* bytes */ +#define UDL_CMD_MAX_HEAD_SIZE 16 /* bytes */ +#define UDL_CMD_MAX_PIXEL_COUNT ((UDL_CMD_MAX_DATA_SIZE - UDL_CMD_MAX_HEAD_SIZE) / 2) +#define UDL_CMD_MAX_BUFFERS (3 * UDL_CMD_MAX_FRAMES) +#define UDL_FONT_HEIGHT 16 /* pixels */ +#define UDL_MAX_MODES 25 /* units */ + +struct udl_cmd_buf { + TAILQ_ENTRY(udl_cmd_buf) entry; + uint32_t off; + uint8_t buf[UDL_CMD_MAX_DATA_SIZE] __aligned(4); +}; + +TAILQ_HEAD(udl_cmd_head, udl_cmd_buf); + +enum { + UDL_BULK_WRITE_0, + UDL_BULK_WRITE_1, + UDL_N_TRANSFER, +}; + +/* + * Our per device structure. + */ +struct udl_softc { + struct mtx sc_mtx; + struct cv sc_cv; + struct callout sc_callout; + struct usb_xfer *sc_xfer[UDL_N_TRANSFER]; + struct usb_device *sc_udev; + device_t sc_fbdev; + struct fb_info sc_fb_info; + uint8_t sc_edid[128]; + struct edid_info sc_edid_info; + struct udl_cmd_head sc_xfer_head[2]; + struct udl_cmd_head sc_cmd_buf_free; + struct udl_cmd_head sc_cmd_buf_pending; + struct udl_cmd_buf sc_cmd_buf_temp[UDL_CMD_MAX_BUFFERS]; + uint32_t sc_sync_off; + uint32_t sc_fb_size; + uint8_t *sc_fb_addr; + uint8_t *sc_fb_copy; + int sc_def_chip; /* default chip version */ + int sc_chip; +#define DLALL 0x0000 +#define DL125 0x0000 /* max 1280x1024, 1440x900 */ +#define DL120 0x0001 /* max 1280x1024, 1440x1050 */ +#define DL160 0x0002 /* max 1600x1200, 1680x1050 */ +#define DL165 0x0003 /* max 1600x1200, 1920x1080 */ +#define DL195 0x0004 /* max 1920x1200, 2048x1152 */ +#define DLMAX 0x0004 +#define DLUNK 0x00ff /* unknown */ + int sc_def_mode; /* default mode */ + int sc_cur_mode; + uint8_t sc_power_save; /* set if power save is enabled */ + uint8_t sc_gone; +}; + +#define UDL_LOCK(sc) mtx_lock(&(sc)->sc_mtx) +#define UDL_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) + +/* + * Chip commands. + */ +#define UDL_CTRL_CMD_READ_EDID 0x02 +#define UDL_CTRL_CMD_WRITE_1 0x03 +#define UDL_CTRL_CMD_READ_1 0x04 +#define UDL_CTRL_CMD_POLL 0x06 +#define UDL_CTRL_CMD_SET_KEY 0x12 + +#define UDL_BULK_SOC 0xaf /* start of command token */ + +#define UDL_BULK_CMD_REG_WRITE_1 0x20 /* write 1 byte to register */ +#define UDL_BULK_CMD_EOC 0xa0 /* end of command stack */ +#define UDL_BULK_CMD_DECOMP 0xe0 /* send decompression table */ + +#define UDL_BULK_CMD_FB_BASE 0x60 +#define UDL_BULK_CMD_FB_WORD 0x08 +#define UDL_BULK_CMD_FB_COMP 0x10 +#define UDL_BULK_CMD_FB_WRITE (UDL_BULK_CMD_FB_BASE | 0x00) +#define UDL_BULK_CMD_FB_COPY (UDL_BULK_CMD_FB_BASE | 0x02) + +/* + * Chip registers. + */ +#define UDL_REG_ADDR_START16 0x20 +#define UDL_REG_ADDR_STRIDE16 0x23 +#define UDL_REG_ADDR_START8 0x26 +#define UDL_REG_ADDR_STRIDE8 0x29 + +#define UDL_REG_SCREEN 0x1f +#define UDL_REG_SCREEN_ON 0x00 +#define UDL_REG_SCREEN_OFF 0x01 +#define UDL_REG_SYNC 0xff + +#define UDL_MODE_SIZE 29 + +/* + * Register values for screen resolution initialization. + */ +static const uint8_t udl_reg_vals_640x480_60[UDL_MODE_SIZE] = { /* 25.17 Mhz 59.9 Hz + * VESA std */ + 0x00, 0x99, 0x30, 0x26, 0x94, 0x60, 0xa9, 0xce, 0x60, 0x07, 0xb3, 0x0f, + 0x79, 0xff, 0xff, 0x02, 0x80, 0x83, 0xbc, 0xff, 0xfc, 0xff, 0xff, 0x01, + 0xe0, 0x01, 0x02, 0xab, 0x13 +}; +static const uint8_t udl_reg_vals_640x480_67[UDL_MODE_SIZE] = { /* 30.25 MHz 66.6 Hz MAC + * std */ + 0x00, 0x1d, 0x33, 0x07, 0xb3, 0x60, 0xa9, 0xce, 0x60, 0xb6, 0xa8, 0xff, + 0xff, 0xbf, 0x70, 0x02, 0x80, 0x83, 0xbc, 0xff, 0xff, 0xff, 0xf9, 0x01, + 0xe0, 0x01, 0x02, 0xa2, 0x17 +}; +static const uint8_t udl_reg_vals_640x480_72[UDL_MODE_SIZE] = { /* 31.50 Mhz 72.8 Hz + * VESA std */ + 0x00, 0x2b, 0xeb, 0x35, 0xd3, 0x0a, 0x95, 0xe6, 0x0e, 0x0f, 0xb5, 0x15, + 0x2a, 0xff, 0xff, 0x02, 0x80, 0xcc, 0x1d, 0xff, 0xf9, 0xff, 0xff, 0x01, + 0xe0, 0x01, 0x02, 0x9c, 0x18 +}; +static const uint8_t udl_reg_vals_640x480_75[UDL_MODE_SIZE] = { /* 31.50 Mhz 75.7 Hz + * VESA std */ + 0x00, 0xeb, 0xf7, 0xd3, 0x0f, 0x4f, 0x93, 0xfa, 0x47, 0xb5, 0x58, 0xff, + 0xff, 0xbf, 0x70, 0x02, 0x80, 0xf4, 0x8f, 0xff, 0xff, 0xff, 0xf9, 0x01, + 0xe0, 0x01, 0x02, 0x9c, 0x18 +}; +static const uint8_t udl_reg_vals_800x480_61[UDL_MODE_SIZE] = { /* 33.00 MHz 61.9 Hz */ + 0x00, 0x20, 0x3c, 0x7a, 0xc9, 0xf2, 0x6c, 0x48, 0xf9, 0x70, 0x53, 0xff, + 0xff, 0x21, 0x27, 0x03, 0x20, 0x91, 0xf3, 0xff, 0xff, 0xff, 0xf9, 0x01, + 0xe0, 0x01, 0x02, 0xc8, 0x19 +}; +static const uint8_t udl_reg_vals_800x600_56[UDL_MODE_SIZE] = { /* 36.00 MHz 56.2 Hz + * VESA std */ + 0x00, 0x65, 0x35, 0x48, 0xf4, 0xf2, 0x6c, 0x19, 0x18, 0xc9, 0x4b, 0xff, + 0xff, 0x70, 0x35, 0x03, 0x20, 0x32, 0x31, 0xff, 0xff, 0xff, 0xfc, 0x02, + 0x58, 0x01, 0x02, 0x20, 0x1c +}; +static const uint8_t udl_reg_vals_800x600_60[UDL_MODE_SIZE] = { /* 40.00 MHz 60.3 Hz + * VESA std */ + 0x00, 0x20, 0x3c, 0x7a, 0xc9, 0x93, 0x60, 0xc8, 0xc7, 0x70, 0x53, 0xff, + 0xff, 0x21, 0x27, 0x03, 0x20, 0x91, 0x8f, 0xff, 0xff, 0xff, 0xf2, 0x02, + 0x58, 0x01, 0x02, 0x40, 0x1f +}; +static const uint8_t udl_reg_vals_800x600_72[UDL_MODE_SIZE] = { /* 50.00 MHz 72.1 Hz + * VESA std */ + 0x00, 0xeb, 0xf7, 0xd1, 0x90, 0x4d, 0x82, 0x23, 0x1f, 0x39, 0xcf, 0xff, + 0xff, 0x43, 0x21, 0x03, 0x20, 0x62, 0xc5, 0xff, 0xff, 0xff, 0xca, 0x02, + 0x58, 0x01, 0x02, 0x10, 0x27 +}; +static const uint8_t udl_reg_vals_800x600_74[UDL_MODE_SIZE] = { /* 50.00 MHz 74.4 Hz */ + 0x00, 0xb3, 0x76, 0x39, 0xcf, 0x60, 0xa9, 0xc7, 0xf4, 0x70, 0x53, 0xff, + 0xff, 0x35, 0x33, 0x03, 0x20, 0x8f, 0xe9, 0xff, 0xff, 0xff, 0xf9, 0x02, + 0x58, 0x01, 0x02, 0x10, 0x27 +}; +static const uint8_t udl_reg_vals_800x600_75[UDL_MODE_SIZE] = { /* 49.50 MHz 75.0 Hz + * VESA std */ + 0x00, 0xb3, 0x76, 0x39, 0xcf, 0xf2, 0x6c, 0x19, 0x18, 0x70, 0x53, 0xff, + 0xff, 0x35, 0x33, 0x03, 0x20, 0x32, 0x31, 0xff, 0xff, 0xff, 0xf9, 0x02, + 0x58, 0x01, 0x02, 0xac, 0x26 +}; +static const uint8_t udl_reg_vals_1024x768_60[UDL_MODE_SIZE] = { /* 65.00 MHz 60.0 Hz + * VESA std */ + 0x00, 0x36, 0x18, 0xd5, 0x10, 0x60, 0xa9, 0x7b, 0x33, 0xa1, 0x2b, 0x27, + 0x32, 0xff, 0xff, 0x04, 0x00, 0xd9, 0x9a, 0xff, 0xca, 0xff, 0xff, 0x03, + 0x00, 0x04, 0x03, 0xc8, 0x32 +}; +static const uint8_t udl_reg_vals_1024x768_70[UDL_MODE_SIZE] = { /* 75.00 MHz 70.0 Hz + * VESA std */ + 0x00, 0xb4, 0xed, 0x4c, 0x5e, 0x60, 0xa9, 0x7b, 0x33, 0x10, 0x4d, 0xff, + 0xff, 0x27, 0x32, 0x04, 0x00, 0xd9, 0x9a, 0xff, 0xff, 0xff, 0xca, 0x03, + 0x00, 0x04, 0x02, 0x98, 0x3a +}; +static const uint8_t udl_reg_vals_1024x768_75[UDL_MODE_SIZE] = { /* 78.75 MHz 75.0 Hz + * VESA std */ + 0x00, 0xec, 0xb4, 0xa0, 0x4c, 0x36, 0x0a, 0x07, 0xb3, 0x5e, 0xd5, 0xff, + 0xff, 0x0f, 0x79, 0x04, 0x00, 0x0f, 0x66, 0xff, 0xff, 0xff, 0xf9, 0x03, + 0x00, 0x04, 0x02, 0x86, 0x3d +}; +static const uint8_t udl_reg_vals_1280x800_60[UDL_MODE_SIZE] = { /* 83.46 MHz 59.9 MHz */ + 0x00, 0xb2, 0x19, 0x34, 0xdf, 0x93, 0x60, 0x30, 0xfb, 0x9f, 0xca, 0xff, + 0xff, 0x27, 0x32, 0x05, 0x00, 0x61, 0xf6, 0xff, 0xff, 0xff, 0xf9, 0x03, + 0x20, 0x04, 0x02, 0x34, 0x41 +}; +static const uint8_t udl_reg_vals_1280x960_60[UDL_MODE_SIZE] = { /* 108.00 MHz 60.0 Hz + * VESA std */ + 0x00, 0xa6, 0x03, 0x5c, 0x7e, 0x0a, 0x95, 0x48, 0xf4, 0x61, 0xbd, 0xff, + 0xff, 0x94, 0x43, 0x05, 0x00, 0x91, 0xe8, 0xff, 0xff, 0xff, 0xf9, 0x03, + 0xc0, 0x04, 0x02, 0x60, 0x54 +}; +static const uint8_t udl_reg_vals_1280x1024_60[UDL_MODE_SIZE] = { /* 108.00 MHz 60.0 Hz + * VESA std */ + 0x00, 0x98, 0xf8, 0x0d, 0x57, 0x2a, 0x55, 0x4d, 0x54, 0xca, 0x0d, 0xff, + 0xff, 0x94, 0x43, 0x05, 0x00, 0x9a, 0xa8, 0xff, 0xff, 0xff, 0xf9, 0x04, + 0x00, 0x04, 0x02, 0x60, 0x54 +}; +static const uint8_t udl_reg_vals_1280x1024_75[UDL_MODE_SIZE] = { /* 135.00 MHz 75.0 Hz + * VESA std */ + 0x00, 0xce, 0x12, 0x3f, 0x9f, 0x2a, 0x55, 0x4d, 0x54, 0xca, 0x0d, 0xff, + 0xff, 0x32, 0x60, 0x05, 0x00, 0x9a, 0xa8, 0xff, 0xff, 0xff, 0xf9, 0x04, + 0x00, 0x04, 0x02, 0x78, 0x69 +}; +static const uint8_t udl_reg_vals_1366x768_60[UDL_MODE_SIZE] = { /* 90 MHz 60.0 Hz */ + 0x01, 0x19, 0x1e, 0x1f, 0xb0, 0x93, 0x60, 0x40, 0x7b, 0x36, 0xe8, 0x27, + 0x32, 0xff, 0xff, 0x05, 0x56, 0x03, 0xd9, 0xff, 0xff, 0xfc, 0xa7, 0x03, + 0x00, 0x04, 0x02, 0x9a, 0x42 +}; +static const uint8_t udl_reg_vals_1440x900_60[UDL_MODE_SIZE] = { /* 106.47 MHz 59.9 Hz */ + 0x00, 0x24, 0xce, 0xe7, 0x72, 0x36, 0x0a, 0x86, 0xca, 0x1c, 0x10, 0xff, + 0xff, 0x60, 0x3a, 0x05, 0xa0, 0x0d, 0x94, 0xff, 0xff, 0xff, 0xf9, 0x03, + 0x84, 0x04, 0x02, 0x2e, 0x53 +}; +static const uint8_t udl_reg_vals_1440x900_59[UDL_MODE_SIZE] = { /* 106.50 MHz 59.8 Hz */ + 0x00, 0x24, 0xce, 0xe7, 0x72, 0xd8, 0x2a, 0x1b, 0x28, 0x1c, 0x10, 0xff, + 0xff, 0x60, 0x3a, 0x05, 0xa0, 0x36, 0x50, 0xff, 0xff, 0xff, 0xf9, 0x03, + 0x84, 0x04, 0x02, 0x34, 0x53 +}; +static const uint8_t udl_reg_vals_1440x900_75[UDL_MODE_SIZE] = { /* 136.49 MHz 75.0 Hz */ + 0x00, 0x73, 0xa6, 0x14, 0xea, 0x0a, 0x95, 0xca, 0x10, 0x7f, 0x46, 0xff, + 0xff, 0x60, 0x3a, 0x05, 0xa0, 0x94, 0x20, 0xff, 0xff, 0xff, 0xf9, 0x03, + 0x84, 0x04, 0x02, 0xa2, 0x6a +}; +static const uint8_t udl_reg_vals_1680x1050_60[UDL_MODE_SIZE] = { /* 147.14 MHz 60.0 Hz */ + 0x00, 0x53, 0x43, 0xa6, 0x71, 0xc1, 0x52, 0xd9, 0x29, 0x69, 0x9f, 0xff, + 0xff, 0xd7, 0xee, 0x06, 0x90, 0xb2, 0x53, 0xff, 0xff, 0xff, 0xf9, 0x04, + 0x1a, 0x04, 0x02, 0xf4, 0x72 +}; +static const uint8_t udl_reg_vals_1600x1200_60[UDL_MODE_SIZE] = { /* 162.00 MHz 60.0 Hz + * VESA std */ + 0x00, 0xcf, 0xa4, 0x3c, 0x4e, 0x55, 0x73, 0x71, 0x2b, 0x71, 0x52, 0xff, + 0xff, 0xee, 0xca, 0x06, 0x40, 0xe2, 0x57, 0xff, 0xff, 0xff, 0xf9, 0x04, + 0xb0, 0x04, 0x02, 0x90, 0x7e +}; +static const uint8_t udl_reg_vals_1920x1080_60[UDL_MODE_SIZE] = { /* 138.50 MHz 59.9 Hz */ + 0x00, 0x73, 0xa6, 0x28, 0xb3, 0x54, 0xaa, 0x41, 0x5d, 0x0d, 0x9f, 0x32, + 0x60, 0xff, 0xff, 0x07, 0x80, 0x0a, 0xea, 0xff, 0xf9, 0xff, 0xff, 0x04, + 0x38, 0x04, 0x02, 0xe0, 0x7c +}; + +struct udl_mode { + uint16_t hdisplay; + uint16_t vdisplay; + uint8_t hz; + uint16_t chip; + uint32_t clock; + const uint8_t *mode; +}; + +static const struct udl_mode udl_modes[UDL_MAX_MODES] = { + {640, 480, 60, DLALL, 2520, udl_reg_vals_640x480_60}, + {640, 480, 67, DLALL, 3025, udl_reg_vals_640x480_67}, + {640, 480, 72, DLALL, 3150, udl_reg_vals_640x480_72}, + {640, 480, 75, DLALL, 3150, udl_reg_vals_640x480_75}, + {800, 480, 59, DLALL, 5000, udl_reg_vals_800x480_61}, + {800, 480, 61, DLALL, 3300, udl_reg_vals_800x480_61}, + {800, 600, 56, DLALL, 3600, udl_reg_vals_800x600_56}, + {800, 600, 60, DLALL, 4000, udl_reg_vals_800x600_60}, + {800, 600, 72, DLALL, 5000, udl_reg_vals_800x600_72}, + {800, 600, 74, DLALL, 5000, udl_reg_vals_800x600_74}, + {800, 600, 75, DLALL, 4950, udl_reg_vals_800x600_75}, + {1024, 768, 60, DLALL, 6500, udl_reg_vals_1024x768_60}, + {1024, 768, 70, DLALL, 7500, udl_reg_vals_1024x768_70}, + {1024, 768, 75, DLALL, 7850, udl_reg_vals_1024x768_75}, + {1280, 800, 60, DLALL, 8346, udl_reg_vals_1280x800_60}, + {1280, 960, 60, DLALL, 10800, udl_reg_vals_1280x960_60}, + {1280, 1024, 60, DLALL, 10800, udl_reg_vals_1280x1024_60}, + {1280, 1024, 75, DLALL, 13500, udl_reg_vals_1280x1024_75}, + {1366, 768, 60, DLALL, 9000, udl_reg_vals_1366x768_60}, + {1440, 900, 59, DL125, 10650, udl_reg_vals_1440x900_59}, + {1440, 900, 60, DL125, 10647, udl_reg_vals_1440x900_60}, + {1440, 900, 75, DL125, 13649, udl_reg_vals_1440x900_75}, + {1680, 1050, 60, DL160, 14714, udl_reg_vals_1680x1050_60}, + {1600, 1200, 60, DL160, 16200, udl_reg_vals_1600x1200_60}, + {1920, 1080, 60, DL165, 13850, udl_reg_vals_1920x1080_60} +}; + +/* + * Encryption. + */ +static const uint8_t udl_null_key_1[] = { + 0x57, 0xcd, 0xdc, 0xa7, 0x1c, 0x88, 0x5e, 0x15, 0x60, 0xfe, 0xc6, 0x97, + 0x16, 0x3d, 0x47, 0xf2 +}; + +#endif /* _UDL_H_ */ diff --git a/sys/dev/videomode/Makefile.ediddevs b/sys/dev/videomode/Makefile.ediddevs new file mode 100644 index 0000000..69dc8bc --- /dev/null +++ b/sys/dev/videomode/Makefile.ediddevs @@ -0,0 +1,16 @@ +# $NetBSD: Makefile.ediddevs,v 1.1 2006/05/11 01:49:53 gdamore Exp $ +# $FreeBSD$ +# +# As per tron@NetBSD.org, the proper procedure is +# +# 1.) Change "src/sys/dev/videomode/ediddevs". +# 2.) Commit "src/sys/dev/videomode/ediddevs". +# 3.) Execute "make -f Makefile.ediddevs" in "src/sys/dev/videomode". +# 4.) Commit "src/sys/dev/videomode/ediddevs.h" and +# "src/sys/dev/videomode/ediddevs_data.h". + +.include <bsd.own.mk> + +ediddevs.h ediddevs_data.h: ediddevs devlist2h.awk + /bin/rm -f ediddevs.h ediddevs_data.h + ${TOOL_AWK} -f devlist2h.awk ediddevs diff --git a/sys/dev/videomode/Makefile.videomode b/sys/dev/videomode/Makefile.videomode new file mode 100644 index 0000000..27ac06e --- /dev/null +++ b/sys/dev/videomode/Makefile.videomode @@ -0,0 +1,18 @@ +# $NetBSD: Makefile.videomode,v 1.1 2006/03/04 02:34:27 gdamore Exp $ +# $FreeBSD$ +# +# As per tron@NetBSD.org, the proper procedure is +# +# 1.) Change "src/sys/dev/videomode/modelines". +# 2.) Commit "src/sys/dev/videomode/modelines". +# 3.) Execute "make -f Makefile.videomode" in "src/sys/dev/videomode". +# 4.) Commit "src/sys/dev/videomode/videomode.c" + +.include <bsd.own.mk> + +videomode.c: modelines modelines2c.awk + /bin/rm -f videomode.c + ${TOOL_AWK} -f modelines2c.awk modelines > videomode.c + +test: videomode.c videomode.h test.c + ${CC} -I ../../ -o test test.c videomode.c diff --git a/sys/dev/videomode/devlist2h.awk b/sys/dev/videomode/devlist2h.awk new file mode 100644 index 0000000..f317da6 --- /dev/null +++ b/sys/dev/videomode/devlist2h.awk @@ -0,0 +1,193 @@ +#! /usr/bin/awk -f +# $NetBSD: devlist2h.awk,v 1.9 2005/12/11 12:21:20 christos Exp $ +# $FreeBSD$ +# +# Copyright (c) 1995, 1996 Christopher G. Demetriou +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. All advertising materials mentioning features or use of this software +# must display the following acknowledgement: +# This product includes software developed by Christopher G. Demetriou. +# 4. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +BEGIN { + nproducts = nvendors = blanklines = 0 + dfile="ediddevs_data.h" + hfile="ediddevs.h" +} +NR == 1 { + VERSION = $0 + gsub("\\$", "", VERSION) + gsub(/ $/, "", VERSION) + + printf("/*\t$FreeBSD" "$\t*/\n\n") > dfile + printf("/*\n") > dfile + printf(" * THIS FILE AUTOMATICALLY GENERATED. DO NOT EDIT.\n") \ + > dfile + printf(" *\n") > dfile + printf(" * generated from:\n") > dfile + printf(" *\t%s\n", VERSION) > dfile + printf(" */\n") > dfile + + printf("/*\t$NetBSD" "$\t*/\n\n") > hfile + printf("/*\n") > hfile + printf(" * THIS FILE AUTOMATICALLY GENERATED. DO NOT EDIT.\n") \ + > hfile + printf(" *\n") > hfile + printf(" * generated from:\n") > hfile + printf(" *\t%s\n", VERSION) > hfile + printf(" */\n") > hfile + + next +} + +NF > 0 && $1 == "vendor" { + nvendors++ + + vendorindex[$2] = nvendors; # record index for this name, for later. + vendors[nvendors, 1] = $2; # name/ID + i = 2; f = 3; + + printf("#define\tEDID_VENDOR_%s\t\"", vendors[nvendors, 1]) > hfile + + # comments + oparen = 0 + while (f <= NF) { + if ($f == "#") { + printf("(") > hfile + oparen = 1 + f++ + continue + } + if (oparen) { + printf("%s", $f) > hfile + f++ + continue + } + vendors[nvendors, i] = $f + printf("%s", vendors[nvendors, i]) > hfile + if (f < NF) + printf(" ") > hfile + i++; f++; + } + if (oparen) + printf(")") > hfile + printf("\"") > hfile + printf("\n") > hfile + + next +} + +NF > 0 && $1 == "product" { + nproducts++ + + products[nproducts, 1] = $2; # vendor name + products[nproducts, 2] = $3; # product id + products[nproducts, 3] = $4; # id + printf("#define\tEDID_PRODUCT_%s_%s\t%s", products[nproducts, 1], + products[nproducts, 2], products[nproducts, 3]) > hfile + + i = 4; f = 5; + + ocomment = oparen = 0 + if (f <= NF) { + printf("\t\t/* ") > hfile + ocomment = 1; + } + while (f <= NF) { + if ($f == "#") { + printf("(") > hfile + oparen = 1 + f++ + continue + } + if (oparen) { + printf("%s", $f) > hfile + if (f < NF) + printf(" ") > hfile + f++ + continue + } + products[nproducts, i] = $f + printf("%s", products[nproducts, i]) > hfile + if (f < NF) + printf(" ") > hfile + i++; f++; + } + if (oparen) + printf(")") > hfile + if (ocomment) + printf(" */") > hfile + printf("\n") > hfile + + next +} +{ + if ($0 == "") + blanklines++ + if (blanklines != 2 && blanklines != 3) + print $0 > hfile + if (blanklines < 2) + print $0 > dfile +} +END { + # print out the match tables + + printf("\n") > dfile + printf("const struct edid_vendor edid_vendors[] = {\n") > dfile + + for (i = 1; i <= nvendors; i++) { + printf("\t{") > dfile + printf(" \"%s\", EDID_VENDOR_%s", vendors[i, 1], \ + vendors[i, 1]) > dfile + printf(" },\n") > dfile + } + printf("};\n") > dfile + printf("const int edid_nvendors = %d;\n", nvendors) > dfile + + printf("\n") > dfile + + printf("const struct edid_product edid_products[] = {\n") > dfile + for (i = 1; i <= nproducts; i++) { + printf("\t{\n") > dfile + printf("\t \"%s\", EDID_PRODUCT_%s_%s,\n", \ + products[i, 1], products[i, 1], products[i, 2]) > dfile + printf("\t \"") > dfile + j = 4 + needspace = 0 + while ((i, j) in products) { + if (needspace) + printf(" ") > dfile + printf("%s", products[i, j]) > dfile + needspace = 1 + j++ + } + printf("\",\n") > dfile + printf("\t},\n") > dfile + } + printf("};\n") > dfile + printf("const int edid_nproducts = %d;\n", nproducts) >dfile + + close(dfile) + close(hfile) +} diff --git a/sys/dev/videomode/edid.c b/sys/dev/videomode/edid.c new file mode 100644 index 0000000..5bd7ee3 --- /dev/null +++ b/sys/dev/videomode/edid.c @@ -0,0 +1,647 @@ +/* $NetBSD: edid.c,v 1.12 2013/02/08 16:35:10 skrll Exp $ */ +/* $FreeBSD$ */ + +/*- + * Copyright (c) 2006 Itronix Inc. + * All rights reserved. + * + * Written by Garrett D'Amore for Itronix Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of Itronix Inc. may not be used to endorse + * or promote products derived from this software without specific + * prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/libkern.h> +#include <dev/videomode/videomode.h> +#include <dev/videomode/ediddevs.h> +#include <dev/videomode/edidreg.h> +#include <dev/videomode/edidvar.h> +#include <dev/videomode/vesagtf.h> + +#define EDIDVERBOSE 1 +#define DIVIDE(x,y) (((x) + ((y) / 2)) / (y)) + +/* These are reversed established timing order */ +static const char *_edid_modes[] = { + "1280x1024x75", + "1024x768x75", + "1024x768x70", + "1024x768x60", + "1024x768x87i", + "832x624x74", /* rounding error, 74.55 Hz aka "832x624x75" */ + "800x600x75", + "800x600x72", + "800x600x60", + "800x600x56", + "640x480x75", + "640x480x72", + "640x480x67", + "640x480x60", + "720x400x87", /* rounding error, 87.85 Hz aka "720x400x88" */ + "720x400x70", +}; + +#ifdef EDIDVERBOSE +struct edid_vendor { + const char *vendor; + const char *name; +}; + +struct edid_product { + const char *vendor; + uint16_t product; + const char *name; +}; + +#include <dev/videomode/ediddevs_data.h> +#endif /* EDIDVERBOSE */ + +static const char * +edid_findvendor(const char *vendor) +{ +#ifdef EDIDVERBOSE + int n; + + for (n = 0; n < edid_nvendors; n++) + if (memcmp(edid_vendors[n].vendor, vendor, 3) == 0) + return edid_vendors[n].name; +#endif + return NULL; +} + +static const char * +edid_findproduct(const char *vendor, uint16_t product) +{ +#ifdef EDIDVERBOSE + int n; + + for (n = 0; n < edid_nproducts; n++) + if (edid_products[n].product == product && + memcmp(edid_products[n].vendor, vendor, 3) == 0) + return edid_products[n].name; +#endif /* EDIDVERBOSE */ + return NULL; + +} + +static void +edid_strchomp(char *ptr) +{ + for (;;) { + switch (*ptr) { + case '\0': + return; + case '\r': + case '\n': + *ptr = '\0'; + return; + } + ptr++; + } +} + +int +edid_is_valid(uint8_t *d) +{ + int sum = 0, i; + uint8_t sig[8] = EDID_SIGNATURE; + + if (memcmp(d, sig, 8) != 0) + return EINVAL; + + for (i = 0; i < 128; i++) + sum += d[i]; + if ((sum & 0xff) != 0) + return EINVAL; + + return 0; +} + +void +edid_print(struct edid_info *edid) +{ + int i; + + if (edid == NULL) + return; + printf("Vendor: [%s] %s\n", edid->edid_vendor, edid->edid_vendorname); + printf("Product: [%04X] %s\n", edid->edid_product, + edid->edid_productname); + printf("Serial number: %s\n", edid->edid_serial); + printf("Manufactured %d Week %d\n", + edid->edid_year, edid->edid_week); + printf("EDID Version %d.%d\n", edid->edid_version, + edid->edid_revision); + printf("EDID Comment: %s\n", edid->edid_comment); + + printf("Video Input: %x\n", edid->edid_video_input); + if (edid->edid_video_input & EDID_VIDEO_INPUT_DIGITAL) { + printf("\tDigital"); + if (edid->edid_video_input & EDID_VIDEO_INPUT_DFP1_COMPAT) + printf(" (DFP 1.x compatible)"); + printf("\n"); + } else { + printf("\tAnalog\n"); + switch (EDID_VIDEO_INPUT_LEVEL(edid->edid_video_input)) { + case 0: + printf("\t-0.7, 0.3V\n"); + break; + case 1: + printf("\t-0.714, 0.286V\n"); + break; + case 2: + printf("\t-1.0, 0.4V\n"); + break; + case 3: + printf("\t-0.7, 0.0V\n"); + break; + } + if (edid->edid_video_input & EDID_VIDEO_INPUT_BLANK_TO_BLACK) + printf("\tBlank-to-black setup\n"); + if (edid->edid_video_input & EDID_VIDEO_INPUT_SEPARATE_SYNCS) + printf("\tSeperate syncs\n"); + if (edid->edid_video_input & EDID_VIDEO_INPUT_COMPOSITE_SYNC) + printf("\tComposite sync\n"); + if (edid->edid_video_input & EDID_VIDEO_INPUT_SYNC_ON_GRN) + printf("\tSync on green\n"); + if (edid->edid_video_input & EDID_VIDEO_INPUT_SERRATION) + printf("\tSerration vsync\n"); + } + + printf("Gamma: %d.%02d\n", + edid->edid_gamma / 100, edid->edid_gamma % 100); + + printf("Max Size: %d cm x %d cm\n", + edid->edid_max_hsize, edid->edid_max_vsize); + + printf("Features: %x\n", edid->edid_features); + if (edid->edid_features & EDID_FEATURES_STANDBY) + printf("\tDPMS standby\n"); + if (edid->edid_features & EDID_FEATURES_SUSPEND) + printf("\tDPMS suspend\n"); + if (edid->edid_features & EDID_FEATURES_ACTIVE_OFF) + printf("\tDPMS active-off\n"); + switch (EDID_FEATURES_DISP_TYPE(edid->edid_features)) { + case EDID_FEATURES_DISP_TYPE_MONO: + printf("\tMonochrome\n"); + break; + case EDID_FEATURES_DISP_TYPE_RGB: + printf("\tRGB\n"); + break; + case EDID_FEATURES_DISP_TYPE_NON_RGB: + printf("\tMulticolor\n"); + break; + case EDID_FEATURES_DISP_TYPE_UNDEFINED: + printf("\tUndefined monitor type\n"); + break; + } + if (edid->edid_features & EDID_FEATURES_STD_COLOR) + printf("\tStandard color space\n"); + if (edid->edid_features & EDID_FEATURES_PREFERRED_TIMING) + printf("\tPreferred timing\n"); + if (edid->edid_features & EDID_FEATURES_DEFAULT_GTF) + printf("\tDefault GTF supported\n"); + + printf("Chroma Info:\n"); + printf("\tRed X: 0.%03d\n", edid->edid_chroma.ec_redx); + printf("\tRed Y: 0.%03d\n", edid->edid_chroma.ec_redy); + printf("\tGrn X: 0.%03d\n", edid->edid_chroma.ec_greenx); + printf("\tGrn Y: 0.%03d\n", edid->edid_chroma.ec_greeny); + printf("\tBlu X: 0.%03d\n", edid->edid_chroma.ec_bluex); + printf("\tBlu Y: 0.%03d\n", edid->edid_chroma.ec_bluey); + printf("\tWht X: 0.%03d\n", edid->edid_chroma.ec_whitex); + printf("\tWht Y: 0.%03d\n", edid->edid_chroma.ec_whitey); + + if (edid->edid_have_range) { + printf("Range:\n"); + printf("\tHorizontal: %d - %d kHz\n", + edid->edid_range.er_min_hfreq, + edid->edid_range.er_max_hfreq); + printf("\tVertical: %d - %d Hz\n", + edid->edid_range.er_min_vfreq, + edid->edid_range.er_max_vfreq); + printf("\tMax Dot Clock: %d MHz\n", + edid->edid_range.er_max_clock); + if (edid->edid_range.er_have_gtf2) { + printf("\tGTF2 hfreq: %d\n", + edid->edid_range.er_gtf2_hfreq); + printf("\tGTF2 C: %d\n", edid->edid_range.er_gtf2_c); + printf("\tGTF2 M: %d\n", edid->edid_range.er_gtf2_m); + printf("\tGTF2 J: %d\n", edid->edid_range.er_gtf2_j); + printf("\tGTF2 K: %d\n", edid->edid_range.er_gtf2_k); + } + } + printf("Video modes:\n"); + for (i = 0; i < edid->edid_nmodes; i++) { + printf("\t%dx%d @ %dHz", + edid->edid_modes[i].hdisplay, + edid->edid_modes[i].vdisplay, + DIVIDE(DIVIDE(edid->edid_modes[i].dot_clock * 1000, + edid->edid_modes[i].htotal), edid->edid_modes[i].vtotal)); + printf(" (%d %d %d %d %d %d %d", + edid->edid_modes[i].dot_clock, + edid->edid_modes[i].hsync_start, + edid->edid_modes[i].hsync_end, + edid->edid_modes[i].htotal, + edid->edid_modes[i].vsync_start, + edid->edid_modes[i].vsync_end, + edid->edid_modes[i].vtotal); + printf(" %s%sH %s%sV)\n", + edid->edid_modes[i].flags & VID_PHSYNC ? "+" : "", + edid->edid_modes[i].flags & VID_NHSYNC ? "-" : "", + edid->edid_modes[i].flags & VID_PVSYNC ? "+" : "", + edid->edid_modes[i].flags & VID_NVSYNC ? "-" : ""); + } + if (edid->edid_preferred_mode) + printf("Preferred mode: %dx%d @ %dHz\n", + edid->edid_preferred_mode->hdisplay, + edid->edid_preferred_mode->vdisplay, + DIVIDE(DIVIDE(edid->edid_preferred_mode->dot_clock * 1000, + edid->edid_preferred_mode->htotal), + edid->edid_preferred_mode->vtotal)); + + printf("Number of extension blocks: %d\n", edid->edid_ext_block_count); +} + +static const struct videomode * +edid_mode_lookup_list(const char *name) +{ + int i; + + for (i = 0; i < videomode_count; i++) + if (strcmp(name, videomode_list[i].name) == 0) + return &videomode_list[i]; + return NULL; +} + +static struct videomode * +edid_search_mode(struct edid_info *edid, const struct videomode *mode) +{ + int refresh, i; + + refresh = DIVIDE(DIVIDE(mode->dot_clock * 1000, + mode->htotal), mode->vtotal); + for (i = 0; i < edid->edid_nmodes; i++) { + if (mode->hdisplay == edid->edid_modes[i].hdisplay && + mode->vdisplay == edid->edid_modes[i].vdisplay && + refresh == DIVIDE(DIVIDE( + edid->edid_modes[i].dot_clock * 1000, + edid->edid_modes[i].htotal), edid->edid_modes[i].vtotal)) { + return &edid->edid_modes[i]; + } + } + return NULL; +} + +static int +edid_std_timing(uint8_t *data, struct videomode *vmp) +{ + unsigned x, y, f; + const struct videomode *lookup; + char name[80]; + + if ((data[0] == 1 && data[1] == 1) || + (data[0] == 0 && data[1] == 0) || + (data[0] == 0x20 && data[1] == 0x20)) + return 0; + + x = EDID_STD_TIMING_HRES(data); + switch (EDID_STD_TIMING_RATIO(data)) { + case EDID_STD_TIMING_RATIO_16_10: + y = x * 10 / 16; + break; + case EDID_STD_TIMING_RATIO_4_3: + y = x * 3 / 4; + break; + case EDID_STD_TIMING_RATIO_5_4: + y = x * 4 / 5; + break; + case EDID_STD_TIMING_RATIO_16_9: + default: + y = x * 9 / 16; + break; + } + f = EDID_STD_TIMING_VFREQ(data); + + /* first try to lookup the mode as a DMT timing */ + snprintf(name, sizeof(name), "%dx%dx%d", x, y, f); + if ((lookup = edid_mode_lookup_list(name)) != NULL) { + *vmp = *lookup; + } else { + /* failing that, calculate it using gtf */ + /* + * Hmm. I'm not using alternate GTF timings, which + * could, in theory, be present. + */ + vesagtf_mode(x, y, f, vmp); + } + return 1; +} + +static int +edid_det_timing(uint8_t *data, struct videomode *vmp) +{ + unsigned hactive, hblank, hsyncwid, hsyncoff; + unsigned vactive, vblank, vsyncwid, vsyncoff; + uint8_t flags; + + flags = EDID_DET_TIMING_FLAGS(data); + + /* we don't support stereo modes (for now) */ + if (flags & (EDID_DET_TIMING_FLAG_STEREO | + EDID_DET_TIMING_FLAG_STEREO_MODE)) + return 0; + + vmp->dot_clock = EDID_DET_TIMING_DOT_CLOCK(data) / 1000; + + hactive = EDID_DET_TIMING_HACTIVE(data); + hblank = EDID_DET_TIMING_HBLANK(data); + hsyncwid = EDID_DET_TIMING_HSYNC_WIDTH(data); + hsyncoff = EDID_DET_TIMING_HSYNC_OFFSET(data); + + vactive = EDID_DET_TIMING_VACTIVE(data); + vblank = EDID_DET_TIMING_VBLANK(data); + vsyncwid = EDID_DET_TIMING_VSYNC_WIDTH(data); + vsyncoff = EDID_DET_TIMING_VSYNC_OFFSET(data); + + /* Borders are contained within the blank areas. */ + + vmp->hdisplay = hactive; + vmp->htotal = hactive + hblank; + vmp->hsync_start = hactive + hsyncoff; + vmp->hsync_end = vmp->hsync_start + hsyncwid; + + vmp->vdisplay = vactive; + vmp->vtotal = vactive + vblank; + vmp->vsync_start = vactive + vsyncoff; + vmp->vsync_end = vmp->vsync_start + vsyncwid; + + vmp->flags = 0; + + if (flags & EDID_DET_TIMING_FLAG_INTERLACE) + vmp->flags |= VID_INTERLACE; + if (flags & EDID_DET_TIMING_FLAG_HSYNC_POSITIVE) + vmp->flags |= VID_PHSYNC; + else + vmp->flags |= VID_NHSYNC; + + if (flags & EDID_DET_TIMING_FLAG_VSYNC_POSITIVE) + vmp->flags |= VID_PVSYNC; + else + vmp->flags |= VID_NVSYNC; + + return 1; +} + +static void +edid_block(struct edid_info *edid, uint8_t *data) +{ + int i; + struct videomode mode, *exist_mode; + + if (EDID_BLOCK_IS_DET_TIMING(data)) { + if (!edid_det_timing(data, &mode)) + return; + /* Does this mode already exist? */ + exist_mode = edid_search_mode(edid, &mode); + if (exist_mode != NULL) { + *exist_mode = mode; + if (edid->edid_preferred_mode == NULL) + edid->edid_preferred_mode = exist_mode; + } else { + edid->edid_modes[edid->edid_nmodes] = mode; + if (edid->edid_preferred_mode == NULL) + edid->edid_preferred_mode = + &edid->edid_modes[edid->edid_nmodes]; + edid->edid_nmodes++; + } + return; + } + + switch (EDID_BLOCK_TYPE(data)) { + case EDID_DESC_BLOCK_TYPE_SERIAL: + memcpy(edid->edid_serial, data + EDID_DESC_ASCII_DATA_OFFSET, + EDID_DESC_ASCII_DATA_LEN); + edid->edid_serial[sizeof(edid->edid_serial) - 1] = 0; + break; + + case EDID_DESC_BLOCK_TYPE_ASCII: + memcpy(edid->edid_comment, data + EDID_DESC_ASCII_DATA_OFFSET, + EDID_DESC_ASCII_DATA_LEN); + edid->edid_comment[sizeof(edid->edid_comment) - 1] = 0; + break; + + case EDID_DESC_BLOCK_TYPE_RANGE: + edid->edid_have_range = 1; + edid->edid_range.er_min_vfreq = EDID_DESC_RANGE_MIN_VFREQ(data); + edid->edid_range.er_max_vfreq = EDID_DESC_RANGE_MAX_VFREQ(data); + edid->edid_range.er_min_hfreq = EDID_DESC_RANGE_MIN_HFREQ(data); + edid->edid_range.er_max_hfreq = EDID_DESC_RANGE_MAX_HFREQ(data); + edid->edid_range.er_max_clock = EDID_DESC_RANGE_MAX_CLOCK(data); + if (!EDID_DESC_RANGE_HAVE_GTF2(data)) + break; + edid->edid_range.er_have_gtf2 = 1; + edid->edid_range.er_gtf2_hfreq = + EDID_DESC_RANGE_GTF2_HFREQ(data); + edid->edid_range.er_gtf2_c = EDID_DESC_RANGE_GTF2_C(data); + edid->edid_range.er_gtf2_m = EDID_DESC_RANGE_GTF2_M(data); + edid->edid_range.er_gtf2_j = EDID_DESC_RANGE_GTF2_J(data); + edid->edid_range.er_gtf2_k = EDID_DESC_RANGE_GTF2_K(data); + break; + + case EDID_DESC_BLOCK_TYPE_NAME: + /* copy the product name into place */ + memcpy(edid->edid_productname, + data + EDID_DESC_ASCII_DATA_OFFSET, + EDID_DESC_ASCII_DATA_LEN); + break; + + case EDID_DESC_BLOCK_TYPE_STD_TIMING: + data += EDID_DESC_STD_TIMING_START; + for (i = 0; i < EDID_DESC_STD_TIMING_COUNT; i++) { + if (edid_std_timing(data, &mode)) { + /* Does this mode already exist? */ + exist_mode = edid_search_mode(edid, &mode); + if (exist_mode == NULL) { + edid->edid_modes[edid->edid_nmodes] = + mode; + edid->edid_nmodes++; + } + } + data += 2; + } + break; + + case EDID_DESC_BLOCK_TYPE_COLOR_POINT: + /* XXX: not implemented yet */ + break; + } +} + +/* + * Gets EDID version in BCD, e.g. EDID v1.3 returned as 0x0103 + */ +int +edid_parse(uint8_t *data, struct edid_info *edid) +{ + uint16_t manfid, estmodes; + const struct videomode *vmp; + int i; + const char *name; + int max_dotclock = 0; + int mhz; + + if (edid_is_valid(data) != 0) + return -1; + + /* get product identification */ + manfid = EDID_VENDOR_ID(data); + edid->edid_vendor[0] = EDID_MANFID_0(manfid); + edid->edid_vendor[1] = EDID_MANFID_1(manfid); + edid->edid_vendor[2] = EDID_MANFID_2(manfid); + edid->edid_vendor[3] = 0; /* null terminate for convenience */ + + edid->edid_product = data[EDID_OFFSET_PRODUCT_ID] + + (data[EDID_OFFSET_PRODUCT_ID + 1] << 8); + + name = edid_findvendor(edid->edid_vendor); + if (name != NULL) + strlcpy(edid->edid_vendorname, name, + sizeof(edid->edid_vendorname)); + else + edid->edid_vendorname[0] = '\0'; + + name = edid_findproduct(edid->edid_vendor, edid->edid_product); + if (name != NULL) + strlcpy(edid->edid_productname, name, + sizeof(edid->edid_productname)); + else + edid->edid_productname[0] = '\0'; + + snprintf(edid->edid_serial, sizeof(edid->edid_serial), "%08x", + EDID_SERIAL_NUMBER(data)); + + edid->edid_week = EDID_WEEK(data); + edid->edid_year = EDID_YEAR(data); + + /* get edid revision */ + edid->edid_version = EDID_VERSION(data); + edid->edid_revision = EDID_REVISION(data); + + edid->edid_video_input = EDID_VIDEO_INPUT(data); + edid->edid_max_hsize = EDID_MAX_HSIZE(data); + edid->edid_max_vsize = EDID_MAX_VSIZE(data); + + edid->edid_gamma = EDID_GAMMA(data); + edid->edid_features = EDID_FEATURES(data); + + edid->edid_chroma.ec_redx = EDID_CHROMA_REDX(data); + edid->edid_chroma.ec_redy = EDID_CHROMA_REDX(data); + edid->edid_chroma.ec_greenx = EDID_CHROMA_GREENX(data); + edid->edid_chroma.ec_greeny = EDID_CHROMA_GREENY(data); + edid->edid_chroma.ec_bluex = EDID_CHROMA_BLUEX(data); + edid->edid_chroma.ec_bluey = EDID_CHROMA_BLUEY(data); + edid->edid_chroma.ec_whitex = EDID_CHROMA_WHITEX(data); + edid->edid_chroma.ec_whitey = EDID_CHROMA_WHITEY(data); + + edid->edid_ext_block_count = EDID_EXT_BLOCK_COUNT(data); + + /* lookup established modes */ + edid->edid_nmodes = 0; + edid->edid_preferred_mode = NULL; + estmodes = EDID_EST_TIMING(data); + /* Iterate in esztablished timing order */ + for (i = 15; i >= 0; i--) { + if (estmodes & (1 << i)) { + vmp = edid_mode_lookup_list(_edid_modes[i]); + if (vmp != NULL) { + edid->edid_modes[edid->edid_nmodes] = *vmp; + edid->edid_nmodes++; + } +#ifdef DIAGNOSTIC + else + printf("no data for est. mode %s\n", + _edid_modes[i]); +#endif + } + } + + /* do standard timing section */ + for (i = 0; i < EDID_STD_TIMING_COUNT; i++) { + struct videomode mode, *exist_mode; + if (edid_std_timing(data + EDID_OFFSET_STD_TIMING + i * 2, + &mode)) { + /* Does this mode already exist? */ + exist_mode = edid_search_mode(edid, &mode); + if (exist_mode == NULL) { + edid->edid_modes[edid->edid_nmodes] = mode; + edid->edid_nmodes++; + } + } + } + + /* do detailed timings and descriptors */ + for (i = 0; i < EDID_BLOCK_COUNT; i++) { + edid_block(edid, data + EDID_OFFSET_DESC_BLOCK + + i * EDID_BLOCK_SIZE); + } + + edid_strchomp(edid->edid_vendorname); + edid_strchomp(edid->edid_productname); + edid_strchomp(edid->edid_serial); + edid_strchomp(edid->edid_comment); + + /* + * XXX + * some monitors lie about their maximum supported dot clock + * by claiming to support modes which need a higher dot clock + * than the stated maximum. + * For sanity's sake we bump it to the highest dot clock we find + * in the list of supported modes + */ + for (i = 0; i < edid->edid_nmodes; i++) + if (edid->edid_modes[i].dot_clock > max_dotclock) + max_dotclock = edid->edid_modes[i].dot_clock; + if (bootverbose) { + printf("edid: max_dotclock according to supported modes: %d\n", + max_dotclock); + } + mhz = (max_dotclock + 999) / 1000; + + if (edid->edid_have_range) { + if (mhz > edid->edid_range.er_max_clock) + edid->edid_range.er_max_clock = mhz; + } else + edid->edid_range.er_max_clock = mhz; + + return 0; +} + diff --git a/sys/dev/videomode/ediddevs b/sys/dev/videomode/ediddevs new file mode 100644 index 0000000..0029704 --- /dev/null +++ b/sys/dev/videomode/ediddevs @@ -0,0 +1,106 @@ +$NetBSD: ediddevs,v 1.2 2007/03/07 18:47:13 macallan Exp $ +$FreeBSD$ + +/*- + * Copyright (c) 2006 Itronix Inc. + * All rights reserved. + * + * Written by Garrett D'Amore for Itronix Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of Itronix Inc. may not be used to endorse + * or promote products derived from this software without specific + * prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Use "make -f Makefile.ediddevs" to regenerate ediddevs.h and ediddevs_data.h + */ + +/* + * List of known EDID monitor vendors + * + * These are standard PNP ids, managed (apparently) by Microsoft. + * It is likely that this list is grossly incomplete. + */ +vendor AAC AcerView +vendor AOC AOC +vendor APP Apple Computer +vendor AST AST Research +vendor CPL Compal +vendor CPQ Compaq +vendor CTX CTX +vendor DEC DEC +vendor DEL Dell +vendor DPC Delta +vendor DWE Daewoo +vendor EIZ EIZO +vendor ELS ELSA +vendor EPI Envision +vendor FCM Funai +vendor FUJ Fujitsu +vendor GSM LG Electronics +vendor GWY Gateway 2000 +vendor HEI Hyundai +vendor HIT Hitachi +vendor HSL Hansol +vendor HTC Hitachi/Nissei +vendor HWP HP +vendor IBM IBM +vendor ICL Fujitsu ICL +vendor IVM Iiyama +vendor KDS Korea Data Systems +vendor MEI Panasonic +vendor MEL Mitsubishi Electronics +vendor NAN Nanao +vendor NEC NEC +vendor NOK Nokia Data +vendor PHL Philips +vendor REL Relisys +vendor SAM Samsung +vendor SGI SGI +vendor SNY Sony +vendor SRC Shamrock +vendor SUN Sun Microsystems +vendor TAT Tatung +vendor TOS Toshiba +vendor TSB Toshiba +vendor VSC ViewSonic +vendor ZCM Zenith + +/* + * List of known products, grouped and sorted by vendor. + * + * EDID version 1.3 requires that monitors expose the monitor name with + * the ASCII descriptor type 0xFC, so for monitors using that block, this + * information is redundant, and there is not point in listing them here, + * unless it is desired to have a symbolic macro to detect the monitor in + * special handling code or somesuch. + */ + +/* Dell - this exists for now as a sample. I don't have one of these. */ +product DEL ULTRASCAN14XE_REVA 0x139A Ultrascan 14XE +product DEL ULTRASCAN14XE_REVB 0x139B Ultrascan 14XE + +/* ViewSonic */ +product VSC 17GS 0x0c00 17GS +product VSC 17PS 0x0c0f 17PS diff --git a/sys/dev/videomode/ediddevs.h b/sys/dev/videomode/ediddevs.h new file mode 100644 index 0000000..b383c8a --- /dev/null +++ b/sys/dev/videomode/ediddevs.h @@ -0,0 +1,91 @@ +/* $FreeBSD$ */ + +/* + * THIS FILE AUTOMATICALLY GENERATED. DO NOT EDIT. + * + * generated from: + * NetBSD: ediddevs,v 1.1 2006/05/11 01:49:53 gdamore Exp + */ + +/*- + * Copyright (c) 2006 Itronix Inc. + * All rights reserved. + * + * Written by Garrett D'Amore for Itronix Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of Itronix Inc. may not be used to endorse + * or promote products derived from this software without specific + * prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#define EDID_VENDOR_AAC "AcerView" +#define EDID_VENDOR_AOC "AOC" +#define EDID_VENDOR_APP "Apple Computer" +#define EDID_VENDOR_AST "AST Research" +#define EDID_VENDOR_CPL "Compal" +#define EDID_VENDOR_CPQ "Compaq" +#define EDID_VENDOR_CTX "CTX" +#define EDID_VENDOR_DEC "DEC" +#define EDID_VENDOR_DEL "Dell" +#define EDID_VENDOR_DPC "Delta" +#define EDID_VENDOR_DWE "Daewoo" +#define EDID_VENDOR_EIZ "EIZO" +#define EDID_VENDOR_ELS "ELSA" +#define EDID_VENDOR_EPI "Envision" +#define EDID_VENDOR_FCM "Funai" +#define EDID_VENDOR_FUJ "Fujitsu" +#define EDID_VENDOR_GSM "LG Electronics" +#define EDID_VENDOR_GWY "Gateway 2000" +#define EDID_VENDOR_HEI "Hyundai" +#define EDID_VENDOR_HIT "Hitachi" +#define EDID_VENDOR_HSL "Hansol" +#define EDID_VENDOR_HTC "Hitachi/Nissei" +#define EDID_VENDOR_HWP "HP" +#define EDID_VENDOR_IBM "IBM" +#define EDID_VENDOR_ICL "Fujitsu ICL" +#define EDID_VENDOR_IVM "Iiyama" +#define EDID_VENDOR_KDS "Korea Data Systems" +#define EDID_VENDOR_MEI "Panasonic" +#define EDID_VENDOR_MEL "Mitsubishi Electronics" +#define EDID_VENDOR_NAN "Nanao" +#define EDID_VENDOR_NEC "NEC" +#define EDID_VENDOR_NOK "Nokia Data" +#define EDID_VENDOR_PHL "Philips" +#define EDID_VENDOR_REL "Relisys" +#define EDID_VENDOR_SAM "Samsung" +#define EDID_VENDOR_SGI "SGI" +#define EDID_VENDOR_SNY "Sony" +#define EDID_VENDOR_SRC "Shamrock" +#define EDID_VENDOR_SUN "Sun Microsystems" +#define EDID_VENDOR_TAT "Tatung" +#define EDID_VENDOR_TOS "Toshiba" +#define EDID_VENDOR_TSB "Toshiba" +#define EDID_VENDOR_VSC "ViewSonic" +#define EDID_VENDOR_ZCM "Zenith" + +/* Dell - this exists for now as a sample. I don't have one of these. */ +#define EDID_PRODUCT_DEL_ULTRASCAN14XE_REVA 0x139A /* Ultrascan 14XE */ +#define EDID_PRODUCT_DEL_ULTRASCAN14XE_REVB 0x139B /* Ultrascan 14XE */ + +/* ViewSonic */ +#define EDID_PRODUCT_VSC_17GS 0x0c00 /* 17GS */ +#define EDID_PRODUCT_VSC_17PS 0x0c0f /* 17PS */ diff --git a/sys/dev/videomode/ediddevs_data.h b/sys/dev/videomode/ediddevs_data.h new file mode 100644 index 0000000..e0ad80a --- /dev/null +++ b/sys/dev/videomode/ediddevs_data.h @@ -0,0 +1,107 @@ +/* $FreeBSD$ */ + +/* + * THIS FILE AUTOMATICALLY GENERATED. DO NOT EDIT. + * + * generated from: + * NetBSD: ediddevs,v 1.1 2006/05/11 01:49:53 gdamore Exp + */ + +/*- + * Copyright (c) 2006 Itronix Inc. + * All rights reserved. + * + * Written by Garrett D'Amore for Itronix Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of Itronix Inc. may not be used to endorse + * or promote products derived from this software without specific + * prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +const struct edid_vendor edid_vendors[] = { + { "AAC", EDID_VENDOR_AAC }, + { "AOC", EDID_VENDOR_AOC }, + { "APP", EDID_VENDOR_APP }, + { "AST", EDID_VENDOR_AST }, + { "CPL", EDID_VENDOR_CPL }, + { "CPQ", EDID_VENDOR_CPQ }, + { "CTX", EDID_VENDOR_CTX }, + { "DEC", EDID_VENDOR_DEC }, + { "DEL", EDID_VENDOR_DEL }, + { "DPC", EDID_VENDOR_DPC }, + { "DWE", EDID_VENDOR_DWE }, + { "EIZ", EDID_VENDOR_EIZ }, + { "ELS", EDID_VENDOR_ELS }, + { "EPI", EDID_VENDOR_EPI }, + { "FCM", EDID_VENDOR_FCM }, + { "FUJ", EDID_VENDOR_FUJ }, + { "GSM", EDID_VENDOR_GSM }, + { "GWY", EDID_VENDOR_GWY }, + { "HEI", EDID_VENDOR_HEI }, + { "HIT", EDID_VENDOR_HIT }, + { "HSL", EDID_VENDOR_HSL }, + { "HTC", EDID_VENDOR_HTC }, + { "HWP", EDID_VENDOR_HWP }, + { "IBM", EDID_VENDOR_IBM }, + { "ICL", EDID_VENDOR_ICL }, + { "IVM", EDID_VENDOR_IVM }, + { "KDS", EDID_VENDOR_KDS }, + { "MEI", EDID_VENDOR_MEI }, + { "MEL", EDID_VENDOR_MEL }, + { "NAN", EDID_VENDOR_NAN }, + { "NEC", EDID_VENDOR_NEC }, + { "NOK", EDID_VENDOR_NOK }, + { "PHL", EDID_VENDOR_PHL }, + { "REL", EDID_VENDOR_REL }, + { "SAM", EDID_VENDOR_SAM }, + { "SGI", EDID_VENDOR_SGI }, + { "SNY", EDID_VENDOR_SNY }, + { "SRC", EDID_VENDOR_SRC }, + { "SUN", EDID_VENDOR_SUN }, + { "TAT", EDID_VENDOR_TAT }, + { "TOS", EDID_VENDOR_TOS }, + { "TSB", EDID_VENDOR_TSB }, + { "VSC", EDID_VENDOR_VSC }, + { "ZCM", EDID_VENDOR_ZCM }, +}; +const int edid_nvendors = 44; + +const struct edid_product edid_products[] = { + { + "DEL", EDID_PRODUCT_DEL_ULTRASCAN14XE_REVA, + "Ultrascan 14XE", + }, + { + "DEL", EDID_PRODUCT_DEL_ULTRASCAN14XE_REVB, + "Ultrascan 14XE", + }, + { + "VSC", EDID_PRODUCT_VSC_17GS, + "17GS", + }, + { + "VSC", EDID_PRODUCT_VSC_17PS, + "17PS", + }, +}; +const int edid_nproducts = 4; diff --git a/sys/dev/videomode/edidreg.h b/sys/dev/videomode/edidreg.h new file mode 100644 index 0000000..29b0466 --- /dev/null +++ b/sys/dev/videomode/edidreg.h @@ -0,0 +1,256 @@ +/* $NetBSD: edidreg.h,v 1.3 2011/03/30 18:49:56 jdc Exp $ */ +/* $FreeBSD$ */ + +/*- + * Copyright (c) 2006 Itronix Inc. + * All rights reserved. + * + * Written by Garrett D'Amore for Itronix Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of Itronix Inc. may not be used to endorse + * or promote products derived from this software without specific + * prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _DEV_VIDEOMODE_EDIDREG_H +#define _DEV_VIDEOMODE_EDIDREG_H + +#define EDID_OFFSET_SIGNATURE 0x00 +#define EDID_OFFSET_MANUFACTURER_ID 0x08 +#define EDID_OFFSET_PRODUCT_ID 0x0a +#define EDID_OFFSET_SERIAL_NUMBER 0x0c +#define EDID_OFFSET_MANUFACTURE_WEEK 0x10 +#define EDID_OFFSET_MANUFACTURE_YEAR 0x11 +#define EDID_OFFSET_VERSION 0x12 +#define EDID_OFFSET_REVISION 0x13 +#define EDID_OFFSET_VIDEO_INPUT 0x14 +#define EDID_OFFSET_MAX_HSIZE 0x15 /* in cm */ +#define EDID_OFFSET_MAX_VSIZE 0x16 +#define EDID_OFFSET_GAMMA 0x17 +#define EDID_OFFSET_FEATURE 0x18 +#define EDID_OFFSET_CHROMA 0x19 +#define EDID_OFFSET_EST_TIMING_1 0x23 +#define EDID_OFFSET_EST_TIMING_2 0x24 +#define EDID_OFFSET_MFG_TIMING 0x25 +#define EDID_OFFSET_STD_TIMING 0x26 +#define EDID_OFFSET_DESC_BLOCK 0x36 + +#define EDID_SIGNATURE { 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0 } + +/* assume x is 16-bit value */ +#define EDID_VENDOR_ID(ptr) ((((ptr)[8]) << 8) + ptr[9]) +#define EDID_MANFID_0(x) ((((x) >> 10) & 0x1f) + '@') +#define EDID_MANFID_1(x) ((((x) >> 5) & 0x1f) + '@') +#define EDID_MANFID_2(x) ((((x) >> 0) & 0x1f) + '@') + +/* relative to edid block */ +#define EDID_PRODUCT_ID(ptr) (((ptr)[10]) | (((ptr)[11]) << 8)) +#define EDID_SERIAL_NUMBER(ptr) (((ptr)[12] << 24) + \ + ((ptr)[13] << 16) + \ + ((ptr)[14] << 8) + \ + (ptr)[15]) + +/* relative to edid block */ +#define EDID_WEEK(ptr) ((ptr)[16]) +#define EDID_YEAR(ptr) (((ptr)[17]) + 1990) + +#define EDID_VERSION(ptr) ((ptr)[18]) +#define EDID_REVISION(ptr) ((ptr)[19]) + +#define EDID_VIDEO_INPUT(ptr) ((ptr)[20]) +#define EDID_VIDEO_INPUT_DIGITAL 0x80 +/* if INPUT_BIT_DIGITAL set */ +#define EDID_VIDEO_INPUT_DFP1_COMPAT 0x01 +/* if INPUT_BIT_DIGITAL not set */ +#define EDID_VIDEO_INPUT_BLANK_TO_BLACK 0x10 +#define EDID_VIDEO_INPUT_SEPARATE_SYNCS 0x08 +#define EDID_VIDEO_INPUT_COMPOSITE_SYNC 0x04 +#define EDID_VIDEO_INPUT_SYNC_ON_GRN 0x02 +#define EDID_VIDEO_INPUT_SERRATION 0x01 +#define EDID_VIDEO_INPUT_LEVEL(x) (((x) & 0x60) >> 5) +/* meanings of level bits are as follows, I don't know names */ +/* 0 = 0.7,0.3, 1 = 0.714,0.286, 2 = 1.0,0.4, 3 = 0.7,0.0 */ + +/* relative to edid block */ +#define EDID_MAX_HSIZE(ptr) ((ptr)[21]) /* cm */ +#define EDID_MAX_VSIZE(ptr) ((ptr)[22]) /* cm */ +/* gamma is scaled by 100 (avoid fp), e.g. 213 == 2.13 */ +#define _GAMMA(x) ((x) == 0xff ? 100 : ((x) + 100)) +#define EDID_GAMMA(ptr) _GAMMA(ptr[23]) + +#define EDID_FEATURES(ptr) ((ptr)[24]) +#define EDID_FEATURES_STANDBY 0x80 +#define EDID_FEATURES_SUSPEND 0x40 +#define EDID_FEATURES_ACTIVE_OFF 0x20 +#define EDID_FEATURES_DISP_TYPE(x) (((x) & 0x18) >> 3) +#define EDID_FEATURES_DISP_TYPE_MONO 0 +#define EDID_FEATURES_DISP_TYPE_RGB 1 +#define EDID_FEATURES_DISP_TYPE_NON_RGB 2 +#define EDID_FEATURES_DISP_TYPE_UNDEFINED 3 +#define EDID_FEATURES_STD_COLOR 0x04 +#define EDID_FEATURES_PREFERRED_TIMING 0x02 +#define EDID_FEATURES_DEFAULT_GTF 0x01 + +/* chroma values 0.0 - 0.999 scaled as 0-999 */ +#define _CHLO(byt, shft) (((byt) >> (shft)) & 0x3) +#define _CHHI(byt) ((byt) << 2) +#define _CHHILO(ptr, l, s, h) (_CHLO((ptr)[l], s) | _CHHI((ptr)[h])) +#define _CHROMA(ptr, l, s, h) ((_CHHILO(ptr, l, s, h) * 1000) / 1024) + +#define EDID_CHROMA_REDX(ptr) (_CHROMA(ptr, 25, 6, 27)) +#define EDID_CHROMA_REDY(ptr) (_CHROMA(ptr, 25, 4, 28)) +#define EDID_CHROMA_GREENX(ptr) (_CHROMA(ptr, 25, 2, 29)) +#define EDID_CHROMA_GREENY(ptr) (_CHROMA(ptr, 25, 0, 30)) +#define EDID_CHROMA_BLUEX(ptr) (_CHROMA(ptr, 26, 6, 31)) +#define EDID_CHROMA_BLUEY(ptr) (_CHROMA(ptr, 26, 4, 32)) +#define EDID_CHROMA_WHITEX(ptr) (_CHROMA(ptr, 26, 2, 33)) +#define EDID_CHROMA_WHITEY(ptr) (_CHROMA(ptr, 26, 0, 34)) + +/* relative to edid block */ +#define EDID_EST_TIMING(ptr) (((ptr)[35] << 8) | (ptr)[36]) +#define EDID_EST_TIMING_720_400_70 0x8000 /* 720x400 @ 70Hz */ +#define EDID_EST_TIMING_720_400_88 0x4000 /* 720x400 @ 88Hz */ +#define EDID_EST_TIMING_640_480_60 0x2000 /* 640x480 @ 60Hz */ +#define EDID_EST_TIMING_640_480_67 0x1000 /* 640x480 @ 67Hz */ +#define EDID_EST_TIMING_640_480_72 0x0800 /* 640x480 @ 72Hz */ +#define EDID_EST_TIMING_640_480_75 0x0400 /* 640x480 @ 75Hz */ +#define EDID_EST_TIMING_800_600_56 0x0200 /* 800x600 @ 56Hz */ +#define EDID_EST_TIMING_800_600_60 0x0100 /* 800x600 @ 60Hz */ +#define EDID_EST_TIMING_800_600_72 0x0080 /* 800x600 @ 72Hz */ +#define EDID_EST_TIMING_800_600_75 0x0040 /* 800x600 @ 75Hz */ +#define EDID_EST_TIMING_832_624_75 0x0020 /* 832x624 @ 75Hz */ +#define EDID_EST_TIMING_1024_768_87I 0x0010 /* 1024x768i @ 87Hz */ +#define EDID_EST_TIMING_1024_768_60 0x0008 /* 1024x768 @ 60Hz */ +#define EDID_EST_TIMING_1024_768_70 0x0004 /* 1024x768 @ 70Hz */ +#define EDID_EST_TIMING_1024_768_75 0x0002 /* 1024x768 @ 75Hz */ +#define EDID_EST_TIMING_1280_1024_75 0x0001 /* 1280x1024 @ 75Hz */ + +/* + * N.B.: ptr is relative to standard timing block - used for standard timing + * descriptors as well as standard timings section of edid! + */ +#define EDID_STD_TIMING_HRES(ptr) ((((ptr)[0]) * 8) + 248) +#define EDID_STD_TIMING_VFREQ(ptr) ((((ptr)[1]) & 0x3f) + 60) +#define EDID_STD_TIMING_RATIO(ptr) ((ptr)[1] & 0xc0) +#define EDID_STD_TIMING_RATIO_16_10 0x00 +#define EDID_STD_TIMING_RATIO_4_3 0x40 +#define EDID_STD_TIMING_RATIO_5_4 0x80 +#define EDID_STD_TIMING_RATIO_16_9 0xc0 + +#define EDID_STD_TIMING_SIZE 16 +#define EDID_STD_TIMING_COUNT 8 + +/* + * N.B.: ptr is relative to descriptor block start + */ +#define EDID_BLOCK_SIZE 18 +#define EDID_BLOCK_COUNT 4 + +/* detailed timing block.... what a mess */ +#define EDID_BLOCK_IS_DET_TIMING(ptr) ((ptr)[0] | (ptr)[1]) + +#define EDID_DET_TIMING_DOT_CLOCK(ptr) (((ptr)[0] | ((ptr)[1] << 8)) * 10000) +#define _HACT_LO(ptr) ((ptr)[2]) +#define _HBLK_LO(ptr) ((ptr)[3]) +#define _HACT_HI(ptr) (((ptr)[4] & 0xf0) << 4) +#define _HBLK_HI(ptr) (((ptr)[4] & 0x0f) << 8) +#define EDID_DET_TIMING_HACTIVE(ptr) (_HACT_LO(ptr) | _HACT_HI(ptr)) +#define EDID_DET_TIMING_HBLANK(ptr) (_HBLK_LO(ptr) | _HBLK_HI(ptr)) +#define _VACT_LO(ptr) ((ptr)[5]) +#define _VBLK_LO(ptr) ((ptr)[6]) +#define _VACT_HI(ptr) (((ptr)[7] & 0xf0) << 4) +#define _VBLK_HI(ptr) (((ptr)[7] & 0x0f) << 8) +#define EDID_DET_TIMING_VACTIVE(ptr) (_VACT_LO(ptr) | _VACT_HI(ptr)) +#define EDID_DET_TIMING_VBLANK(ptr) (_VBLK_LO(ptr) | _VBLK_HI(ptr)) +#define _HOFF_LO(ptr) ((ptr)[8]) +#define _HWID_LO(ptr) ((ptr)[9]) +#define _VOFF_LO(ptr) ((ptr)[10] >> 4) +#define _VWID_LO(ptr) ((ptr)[10] & 0xf) +#define _HOFF_HI(ptr) (((ptr)[11] & 0xc0) << 2) +#define _HWID_HI(ptr) (((ptr)[11] & 0x30) << 4) +#define _VOFF_HI(ptr) (((ptr)[11] & 0x0c) << 2) +#define _VWID_HI(ptr) (((ptr)[11] & 0x03) << 4) +#define EDID_DET_TIMING_HSYNC_OFFSET(ptr) (_HOFF_LO(ptr) | _HOFF_HI(ptr)) +#define EDID_DET_TIMING_HSYNC_WIDTH(ptr) (_HWID_LO(ptr) | _HWID_HI(ptr)) +#define EDID_DET_TIMING_VSYNC_OFFSET(ptr) (_VOFF_LO(ptr) | _VOFF_HI(ptr)) +#define EDID_DET_TIMING_VSYNC_WIDTH(ptr) (_VWID_LO(ptr) | _VWID_HI(ptr)) +#define _HSZ_LO(ptr) ((ptr)[12]) +#define _VSZ_LO(ptr) ((ptr)[13]) +#define _HSZ_HI(ptr) (((ptr)[14] & 0xf0) << 4) +#define _VSZ_HI(ptr) (((ptr)[14] & 0x0f) << 8) +#define EDID_DET_TIMING_HSIZE(ptr) (_HSZ_LO(ptr) | _HSZ_HI(ptr)) +#define EDID_DET_TIMING_VSIZE(ptr) (_VSZ_LO(ptr) | _VSZ_HI(ptr)) +#define EDID_DET_TIMING_HBORDER(ptr) ((ptr)[15]) +#define EDID_DET_TIMING_VBORDER(ptr) ((ptr)[16]) +#define EDID_DET_TIMING_FLAGS(ptr) ((ptr)[17]) +#define EDID_DET_TIMING_FLAG_INTERLACE 0x80 +#define EDID_DET_TIMING_FLAG_STEREO 0x60 /* stereo or not */ +#define EDID_DET_TIMING_FLAG_SYNC_SEPARATE 0x18 +#define EDID_DET_TIMING_FLAG_VSYNC_POSITIVE 0x04 +#define EDID_DET_TIMING_FLAG_HSYNC_POSITIVE 0x02 +#define EDID_DET_TIMING_FLAG_STEREO_MODE 0x01 /* stereo mode */ + + +/* N.B.: these tests assume that we already checked for detailed timing! */ +#define EDID_BLOCK_TYPE(ptr) ((ptr)[3]) + +#define EDID_DESC_BLOCK_SIZE 18 +#define EDID_DESC_BLOCK_TYPE_SERIAL 0xFF +#define EDID_DESC_BLOCK_TYPE_ASCII 0xFE +#define EDID_DESC_BLOCK_TYPE_RANGE 0xFD +#define EDID_DESC_BLOCK_TYPE_NAME 0xFC +#define EDID_DESC_BLOCK_TYPE_COLOR_POINT 0xFB +#define EDID_DESC_BLOCK_TYPE_STD_TIMING 0xFA + +/* used for descriptors 0xFF, 0xFE, and 0xFC */ +#define EDID_DESC_ASCII_DATA_OFFSET 5 +#define EDID_DESC_ASCII_DATA_LEN 13 + +#define EDID_DESC_RANGE_MIN_VFREQ(ptr) ((ptr)[5]) /* Hz */ +#define EDID_DESC_RANGE_MAX_VFREQ(ptr) ((ptr)[6]) /* Hz */ +#define EDID_DESC_RANGE_MIN_HFREQ(ptr) ((ptr)[7]) /* kHz */ +#define EDID_DESC_RANGE_MAX_HFREQ(ptr) ((ptr)[8]) /* kHz */ +#define EDID_DESC_RANGE_MAX_CLOCK(ptr) (((ptr)[9]) * 10) /* MHz */ +#define EDID_DESC_RANGE_HAVE_GTF2(ptr) (((ptr)[10]) == 0x02) +#define EDID_DESC_RANGE_GTF2_HFREQ(ptr) (((ptr)[12]) * 2) +#define EDID_DESC_RANGE_GTF2_C(ptr) (((ptr)[13]) / 2) +#define EDID_DESC_RANGE_GTF2_M(ptr) ((ptr)[14] + ((ptr)[15] << 8)) +#define EDID_DESC_RANGE_GTF2_K(ptr) ((ptr)[16]) +#define EDID_DESC_RANGE_GTF2_J(ptr) ((ptr)[17] / 2) + +#define EDID_DESC_COLOR_WHITEX(ptr) +#define EDID_DESC_COLOR_WHITE_INDEX_1(ptr) ((ptr)[5]) +#define EDID_DESC_COLOR_WHITEX_1(ptr) _CHROMA(ptr, 6, 2, 7) +#define EDID_DESC_COLOR_WHITEY_1(ptr) _CHROMA(ptr, 6, 0, 8) +#define EDID_DESC_COLOR_GAMMA_1(ptr) _GAMMA(ptr[9]) +#define EDID_DESC_COLOR_WHITE_INDEX_2(ptr) ((ptr)[10]) +#define EDID_DESC_COLOR_WHITEX_2(ptr) _CHROMA(ptr, 11, 2, 12) +#define EDID_DESC_COLOR_WHITEY_2(ptr) _CHROMA(ptr, 11, 0, 13) +#define EDID_DESC_COLOR_GAMMA_2(ptr) _GAMMA(ptr[14]) + +#define EDID_DESC_STD_TIMING_START 5 +#define EDID_DESC_STD_TIMING_COUNT 6 + +#define EDID_EXT_BLOCK_COUNT(ptr) ((ptr)[126]) + +#endif /* _DEV_VIDEOMODE_EDIDREG_H */ diff --git a/sys/dev/videomode/edidvar.h b/sys/dev/videomode/edidvar.h new file mode 100644 index 0000000..da1211b --- /dev/null +++ b/sys/dev/videomode/edidvar.h @@ -0,0 +1,96 @@ +/* $NetBSD: edidvar.h,v 1.2 2006/05/11 19:05:41 gdamore Exp $ */ +/* $FreeBSD$ */ + +/*- + * Copyright (c) 2006 Itronix Inc. + * All rights reserved. + * + * Written by Garrett D'Amore for Itronix Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of Itronix Inc. may not be used to endorse + * or promote products derived from this software without specific + * prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _DEV_VIDEOMODE_EDIDVAR_H +#define _DEV_VIDEOMODE_EDIDVAR_H + +struct edid_chroma { + uint16_t ec_redx; + uint16_t ec_redy; + uint16_t ec_greenx; + uint16_t ec_greeny; + uint16_t ec_bluex; + uint16_t ec_bluey; + uint16_t ec_whitex; + uint16_t ec_whitey; +}; + +struct edid_range { + uint16_t er_min_vfreq; /* Hz */ + uint16_t er_max_vfreq; /* Hz */ + uint16_t er_min_hfreq; /* kHz */ + uint16_t er_max_hfreq; /* kHz */ + uint16_t er_max_clock; /* MHz */ + int er_have_gtf2; + uint16_t er_gtf2_hfreq; + uint16_t er_gtf2_c; + uint16_t er_gtf2_m; + uint16_t er_gtf2_k; + uint16_t er_gtf2_j; +}; + +struct edid_info { + uint8_t edid_vendor[4]; + char edid_vendorname[16]; + char edid_productname[16]; + char edid_comment[16]; + char edid_serial[16]; + uint16_t edid_product; + uint8_t edid_version; + uint8_t edid_revision; + int edid_year; + int edid_week; + uint8_t edid_video_input; /* see edidregs.h */ + uint8_t edid_max_hsize; /* in cm */ + uint8_t edid_max_vsize; /* in cm */ + uint8_t edid_gamma; + uint8_t edid_features; + uint8_t edid_ext_block_count; + + int edid_have_range; + struct edid_range edid_range; + + struct edid_chroma edid_chroma; + + /* parsed modes */ + struct videomode *edid_preferred_mode; + int edid_nmodes; + struct videomode edid_modes[64]; +}; + +int edid_is_valid(uint8_t *); +int edid_parse(uint8_t *, struct edid_info *); +void edid_print(struct edid_info *); + +#endif /* _DEV_VIDEOMODE_EDIDVAR_H */ diff --git a/sys/dev/videomode/modelines b/sys/dev/videomode/modelines new file mode 100644 index 0000000..147f6a5 --- /dev/null +++ b/sys/dev/videomode/modelines @@ -0,0 +1,181 @@ +// $NetBSD: modelines,v 1.8 2011/03/21 19:28:37 jdc Exp $ +// $FreeBSD$ +// +// This file was imported from XFree86, and is made of the contents of both +// the vesamodes and extramodes files. As a result these should correspond +// to the same default modes compiled into XFree86. +// +// Default modes distilled from +// "VESA and Industry Standards and Guide for Computer Display Monitor +// Timing", version 1.0, revision 0.8, adopted September 17, 1998. +// +// $XFree86: xc/programs/Xserver/hw/xfree86/etc/vesamodes,v 1.3 1999/11/16 03:28:03 tsi Exp $ +// $XFree86: xc/programs/Xserver/hw/xfree86/etc/extramodes,v 1.5 2002/06/05 19:43:05 dawes Exp $ +// +// +// Use "make -f Makefile.videomode" to regenerate videomode.c +// + +# 640x350 @ 85Hz (VESA) hsync: 37.9kHz +ModeLine "640x350" 31.5 640 672 736 832 350 382 385 445 +hsync -vsync + +# 640x400 @ 85Hz (VESA) hsync: 37.9kHz +ModeLine "640x400" 31.5 640 672 736 832 400 401 404 445 -hsync +vsync + +# 720x400 @ 70Hz (EDID established timing) hsync: 31.47kHz +ModeLine "720x400" 28.32 720 738 846 900 400 412 414 449 -hsync +vsync + +# 720x400 @ 85Hz (VESA) hsync: 37.9kHz +ModeLine "720x400" 35.5 720 756 828 936 400 401 404 446 -hsync +vsync + +# 720x400 @ 88Hz (EDID established timing) hsync: 39.44kHz +ModeLine "720x400" 35.5 720 738 846 900 400 421 423 449 -hsync -vsync + +# 640x480 @ 60Hz (Industry standard) hsync: 31.5kHz +ModeLine "640x480" 25.175 640 656 752 800 480 490 492 525 -hsync -vsync + +# 640x480 @ 72Hz (VESA) hsync: 37.9kHz +ModeLine "640x480" 31.5 640 664 704 832 480 489 492 520 -hsync -vsync + +# 640x480 @ 75Hz (VESA) hsync: 37.5kHz +ModeLine "640x480" 31.5 640 656 720 840 480 481 484 500 -hsync -vsync + +# 640x480 @ 85Hz (VESA) hsync: 43.3kHz +ModeLine "640x480" 36.0 640 696 752 832 480 481 484 509 -hsync -vsync + +# 800x600 @ 56Hz (VESA) hsync: 35.2kHz +ModeLine "800x600" 36.0 800 824 896 1024 600 601 603 625 +hsync +vsync + +# 800x600 @ 60Hz (VESA) hsync: 37.9kHz +ModeLine "800x600" 40.0 800 840 968 1056 600 601 605 628 +hsync +vsync + +# 800x600 @ 72Hz (VESA) hsync: 48.1kHz +ModeLine "800x600" 50.0 800 856 976 1040 600 637 643 666 +hsync +vsync + +# 800x600 @ 75Hz (VESA) hsync: 46.9kHz +ModeLine "800x600" 49.5 800 816 896 1056 600 601 604 625 +hsync +vsync + +# 800x600 @ 85Hz (VESA) hsync: 53.7kHz +ModeLine "800x600" 56.25 800 832 896 1048 600 601 604 631 +hsync +vsync + +# 1024x768i @ 43Hz (industry standard) hsync: 35.5kHz +ModeLine "1024x768" 44.9 1024 1032 1208 1264 768 768 776 817 +hsync +vsync Interlace + +# 1024x768 @ 60Hz (VESA) hsync: 48.4kHz +ModeLine "1024x768" 65.0 1024 1048 1184 1344 768 771 777 806 -hsync -vsync + +# 1024x768 @ 70Hz (VESA) hsync: 56.5kHz +ModeLine "1024x768" 75.0 1024 1048 1184 1328 768 771 777 806 -hsync -vsync + +# 1024x768 @ 75Hz (VESA) hsync: 60.0kHz +ModeLine "1024x768" 78.75 1024 1040 1136 1312 768 769 772 800 +hsync +vsync + +# 1024x768 @ 85Hz (VESA) hsync: 68.7kHz +ModeLine "1024x768" 94.5 1024 1072 1168 1376 768 769 772 808 +hsync +vsync + +# 1024x768 @ 89Hz (non-standard) hsync: 72.0kHz +ModeLine "1024x768" 100 1024 1108 1280 1408 768 768 780 796 +hsync +vsync + +# 1152x864 @ 75Hz (VESA) hsync: 67.5kHz +ModeLine "1152x864" 108.0 1152 1216 1344 1600 864 865 868 900 +hsync +vsync + +# 1280x768 @ 75Hz (non-standard) hsync: 60.6kHz +ModeLine "1280x768" 105.64 1280 1312 1712 1744 768 782 792 807 -hsync +vsync + +# 1280x960 @ 60Hz (VESA) hsync: 60.0kHz +ModeLine "1280x960" 108.0 1280 1376 1488 1800 960 961 964 1000 +hsync +vsync + +# 1280x960 @ 85Hz (VESA) hsync: 85.9kHz +ModeLine "1280x960" 148.5 1280 1344 1504 1728 960 961 964 1011 +hsync +vsync + +# 1280x1024 @ 60Hz (VESA) hsync: 64.0kHz +ModeLine "1280x1024" 108.0 1280 1328 1440 1688 1024 1025 1028 1066 +hsync +vsync + +# 1280x1024 @ 70Hz (non-standard) hsync: 74.0kHz +ModeLine "1280x1024" 126.0 1280 1328 1440 1688 1024 1025 1028 1066 +hsync +vsync + +# 1280x1024 @ 75Hz (VESA) hsync: 80.0kHz +ModeLine "1280x1024" 135.0 1280 1296 1440 1688 1024 1025 1028 1066 +hsync +vsync + +# 1280x1024 @ 85Hz (VESA) hsync: 91.1kHz +ModeLine "1280x1024" 157.5 1280 1344 1504 1728 1024 1025 1028 1072 +hsync +vsync + +# 1600x1200 @ 60Hz (VESA) hsync: 75.0kHz +ModeLine "1600x1200" 162.0 1600 1664 1856 2160 1200 1201 1204 1250 +hsync +vsync + +# 1600x1200 @ 65Hz (VESA) hsync: 81.3kHz +ModeLine "1600x1200" 175.5 1600 1664 1856 2160 1200 1201 1204 1250 +hsync +vsync + +# 1600x1200 @ 70Hz (VESA) hsync: 87.5kHz +ModeLine "1600x1200" 189.0 1600 1664 1856 2160 1200 1201 1204 1250 +hsync +vsync + +# 1600x1200 @ 75Hz (VESA) hsync: 93.8kHz +ModeLine "1600x1200" 202.5 1600 1664 1856 2160 1200 1201 1204 1250 +hsync +vsync + +# 1600x1200 @ 85Hz (VESA) hsync: 106.3kHz +ModeLine "1600x1200" 229.5 1600 1664 1856 2160 1200 1201 1204 1250 +hsync +vsync + +# 1680x1050 @ 60.00Hz (GTF) hsync: 65.22 kHz; pclk: 147.14 MHz +ModeLine "1680x1050" 147.14 1680 1784 1968 2256 1050 1051 1054 1087 +hsync +vsync + +# 1792x1344 @ 60Hz (VESA) hsync: 83.6kHz +ModeLine "1792x1344" 204.8 1792 1920 2120 2448 1344 1345 1348 1394 -hsync +vsync + +# 1792x1344 @ 75Hz (VESA) hsync: 106.3kHz +ModeLine "1792x1344" 261.0 1792 1888 2104 2456 1344 1345 1348 1417 -hsync +vsync + +# 1856x1392 @ 60Hz (VESA) hsync: 86.3kHz +ModeLine "1856x1392" 218.3 1856 1952 2176 2528 1392 1393 1396 1439 -hsync +vsync + +# 1856x1392 @ 75Hz (VESA) hsync: 112.5kHz +ModeLine "1856x1392" 288.0 1856 1984 2208 2560 1392 1393 1396 1500 -hsync +vsync + +# 1920x1440 @ 60Hz (VESA) hsync: 90.0kHz +ModeLine "1920x1440" 234.0 1920 2048 2256 2600 1440 1441 1444 1500 -hsync +vsync + +# 1920x1440 @ 75Hz (VESA) hsync: 112.5kHz +ModeLine "1920x1440" 297.0 1920 2064 2288 2640 1440 1441 1444 1500 -hsync +vsync + + +// +// Extra modes to include as default modes in the X server. +// +// $XFree86: xc/programs/Xserver/hw/xfree86/etc/extramodes,v 1.5 2002/06/05 19:43:05 dawes Exp $ +// + +# 832x624 @ 75Hz (74.55Hz) (fix if the official/Apple spec is different) hsync: 49.725kHz +ModeLine "832x624" 57.284 832 864 928 1152 624 625 628 667 -Hsync -Vsync + +# 1152x768 @ 54.8Hz (Titanium PowerBook) hsync: 44.2kHz +ModeLine "1152x768" 64.995 1152 1178 1314 1472 768 771 777 806 +hsync +vsync + +# 1400x1050 @ 60Hz (VESA GTF) hsync: 65.5kHz +ModeLine "1400x1050" 122.0 1400 1488 1640 1880 1050 1052 1064 1082 +hsync +vsync + +# 1400x1050 @ 75Hz (VESA GTF) hsync: 82.2kHz +ModeLine "1400x1050" 155.8 1400 1464 1784 1912 1050 1052 1064 1090 +hsync +vsync + +# 1600x1024 @ 60Hz (SGI 1600SW) hsync: 64.0kHz +Modeline "1600x1024" 106.910 1600 1620 1640 1670 1024 1027 1030 1067 -hsync -vsync + +# 1920x1440 @ 85Hz (VESA GTF) hsync: 128.5kHz +Modeline "1920x1440" 341.35 1920 2072 2288 2656 1440 1441 1444 1512 -hsync +vsync + +# 2048x1536 @ 60Hz (VESA GTF) hsync: 95.3kHz +Modeline "2048x1536" 266.95 2048 2200 2424 2800 1536 1537 1540 1589 -hsync +vsync + +# 2048x1536 @ 75Hz (VESA GTF) hsync: 120.2kHz +Modeline "2048x1536" 340.48 2048 2216 2440 2832 1536 1537 1540 1603 -hsync +vsync + +# 2048x1536 @ 85Hz (VESA GTF) hsync: 137.0kHz +Modeline "2048x1536" 388.04 2048 2216 2440 2832 1536 1537 1540 1612 -hsync +vsync + +// +// some Sun-specific modes +// + +# 1152x900 @ 66Hz - default on most SBus graphics devices +ModeLine "1152x900" 94.50 1152 1192 1320 1528 900 902 906 937 -hsync -vsync + +# 1152x900 @ 76Hz +ModeLine "1152x900" 105.56 1152 1168 1280 1472 900 902 906 943 -hsync -vsync diff --git a/sys/dev/videomode/modelines2c.awk b/sys/dev/videomode/modelines2c.awk new file mode 100644 index 0000000..9b95c3f --- /dev/null +++ b/sys/dev/videomode/modelines2c.awk @@ -0,0 +1,150 @@ +#! /usr/bin/awk -f +# $NetBSD: modelines2c.awk,v 1.4 2006/10/26 23:19:50 bjh21 Exp $ +# $FreeBSD$ +# +# Copyright (c) 2006 Itronix Inc. +# All rights reserved. +# +# Written by Garrett D'Amore for Itronix Inc. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of Itronix Inc. may not be used to endorse +# or promote products derived from this software without specific +# prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# + +BEGIN { + nmodes = 0; +} + +NR == 1 { + split($0,v,"$"); + + VERSION=v[2]; + + printf("/*\t$NetBSD" "$\t*/\n\n"); + printf("/*\n") ; + printf(" * THIS FILE AUTOMATICALLY GENERATED. DO NOT EDIT.\n"); + printf(" *\n"); + printf(" * generated from:\n"); + printf(" *\t%s\n", VERSION); + printf(" */\n\n"); + + printf("#include <sys/cdefs.h>\n"); + printf("__KERNEL_RCSID(0, \"$NetBSD" "$\");\n\n"); + + printf("#include <dev/videomode/videomode.h>\n\n"); + + printf("/*\n"); + printf(" * These macros help the modelines below fit on one line.\n"); + printf(" */\n"); + printf("#define HP VID_PHSYNC\n"); + printf("#define HN VID_NHSYNC\n"); + printf("#define VP VID_PVSYNC\n"); + printf("#define VN VID_NVSYNC\n"); + printf("#define I VID_INTERLACE\n"); + printf("#define DS VID_DBLSCAN\n"); + printf("\n"); + + printf("#define M(nm,hr,vr,clk,hs,he,ht,vs,ve,vt,f) \\\n"); + printf("\t{ clk, hr, hs, he, ht, vr, vs, ve, vt, f, nm } \n\n"); + + printf("const struct videomode videomode_list[] = {\n"); + + next +} + +(/^ModeLine/) { + dotclock = $3; + + hdisplay = $4; + hsyncstart = $5; + hsyncend = $6; + htotal = $7; + + vdisplay = $8; + vsyncstart = $9; + vsyncend = $10; + vtotal = $11; + + macro = "MODE"; + iflag = ""; + iflags = ""; + hflags = "HP"; + vflags = "VP"; + + if ($12 ~ "^-") + hflags = "HN"; + + if ($13 ~ "^-") + vflags = "VN"; + + ifactor=1.0; + if ($14 ~ "[Ii][Nn][Tt][Ee][Rr][Ll][Aa][Cc][Ee]") { + iflag = "i"; + iflags = "|I"; + ifactor = 2.0; + } + + # We truncate the vrefresh figure, but some mode descriptions rely + # on rounding, so we can't win here. Adding an additional .1 + # compensates to some extent. + + hrefresh= (dotclock * 1000000) / htotal; + vrefresh= int(((hrefresh * ifactor) / vtotal) + .1); + + modestr = sprintf("%dx%dx%d%s", hdisplay, vdisplay, vrefresh, iflag); + +# printf("/* %dx%d%s refresh %d Hz, hsync %d kHz */\n", +# hdisplay, vdisplay, iflag, vrefresh, hrefresh/1000); + printf("M(\"%s\",%d,%d,%d,%d,%d,%d,%d,%d,%d,%s),\n", + modestr, + hdisplay, vdisplay, dotclock * 1000, + hsyncstart, hsyncend, htotal, + vsyncstart, vsyncend, vtotal, hflags "|" vflags iflags); + + modestr = sprintf("%dx%dx%d%s", + hdisplay/2 , vdisplay/2, vrefresh, iflag); + + dmodes[nmodes]=sprintf("M(\"%s\",%d,%d,%d,%d,%d,%d,%d,%d,%d,%s),", + modestr, + hdisplay/2, vdisplay/2, dotclock * 1000 / 2, + hsyncstart/2, hsyncend/2, htotal/2, + vsyncstart/2, vsyncend/2, vtotal/2, + hflags "|" vflags "|DS" iflags); + + nmodes = nmodes + 1 + +} + +END { + + printf("\n/* Derived Double Scan Modes */\n\n"); + + for ( i = 0; i < nmodes; i++ ) + { + print dmodes[i]; + } + + printf("};\n\n"); + printf("const int videomode_count = %d;\n", nmodes); +} diff --git a/sys/dev/videomode/pickmode.c b/sys/dev/videomode/pickmode.c new file mode 100644 index 0000000..66dea82 --- /dev/null +++ b/sys/dev/videomode/pickmode.c @@ -0,0 +1,205 @@ +/* $NetBSD: pickmode.c,v 1.3 2011/04/09 18:22:31 jdc Exp $ */ +/* $FreeBSD$ */ + +/*- + * Copyright (c) 2006 The NetBSD Foundation + * All rights reserved. + * + * this code was contributed to The NetBSD Foundation by Michael Lorenz + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE NETBSD FOUNDATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/libkern.h> +#include <dev/videomode/videomode.h> +#include "opt_videomode.h" + +#ifdef PICKMODE_DEBUG +#define DPRINTF printf +#else +#define DPRINTF while (0) printf +#endif + +const struct videomode * +pick_mode_by_dotclock(int width, int height, int dotclock) +{ + const struct videomode *this, *best = NULL; + int i; + + DPRINTF("%s: looking for %d x %d at up to %d kHz\n", __func__, width, + height, dotclock); + for (i = 0; i < videomode_count; i++) { + this = &videomode_list[i]; + if ((this->hdisplay != width) || (this->vdisplay != height) || + (this->dot_clock > dotclock)) + continue; + if (best != NULL) { + if (this->dot_clock > best->dot_clock) + best = this; + } else + best = this; + } + if (best != NULL) + DPRINTF("found %s\n", best->name); + + return best; +} + +const struct videomode * +pick_mode_by_ref(int width, int height, int refresh) +{ + const struct videomode *this, *best = NULL; + int mref, closest = 1000, i, diff; + + DPRINTF("%s: looking for %d x %d at up to %d Hz\n", __func__, width, + height, refresh); + for (i = 0; i < videomode_count; i++) { + + this = &videomode_list[i]; + mref = this->dot_clock * 1000 / (this->htotal * this->vtotal); + diff = abs(mref - refresh); + if ((this->hdisplay != width) || (this->vdisplay != height)) + continue; + DPRINTF("%s in %d hz, diff %d\n", this->name, mref, diff); + if (best != NULL) { + if (diff < closest) { + best = this; + closest = diff; + } + } else { + best = this; + closest = diff; + } + } + if (best != NULL) + DPRINTF("found %s %d\n", best->name, best->dot_clock); + + return best; +} + +static inline void +swap_modes(struct videomode *left, struct videomode *right) +{ + struct videomode temp; + + temp = *left; + *left = *right; + *right = temp; +} + +/* + * Sort modes by refresh rate, aspect ratio (*), then resolution. + * Preferred mode or largest mode is first in the list and other modes + * are sorted on closest match to that mode. + * (*) Note that the aspect ratio calculation treats "close" aspect ratios + * (within 12.5%) as the same for this purpose. + */ +#define DIVIDE(x, y) (((x) + ((y) / 2)) / (y)) +void +sort_modes(struct videomode *modes, struct videomode **preferred, int nmodes) +{ + int aspect, refresh, hbest, vbest, abest, atemp, rbest, rtemp; + int i, j; + struct videomode *mtemp = NULL; + + if (nmodes < 2) + return; + + if (*preferred != NULL) { + /* Put the preferred mode first in the list */ + aspect = (*preferred)->hdisplay * 100 / (*preferred)->vdisplay; + refresh = DIVIDE(DIVIDE((*preferred)->dot_clock * 1000, + (*preferred)->htotal), (*preferred)->vtotal); + if (*preferred != modes) { + swap_modes(*preferred, modes); + *preferred = modes; + } + } else { + /* + * Find the largest horizontal and vertical mode and put that + * first in the list. Preferred refresh rate is taken from + * the first mode of this size. + */ + hbest = 0; + vbest = 0; + for (i = 0; i < nmodes; i++) { + if (modes[i].hdisplay > hbest) { + hbest = modes[i].hdisplay; + vbest = modes[i].vdisplay; + mtemp = &modes[i]; + } else if (modes[i].hdisplay == hbest && + modes[i].vdisplay > vbest) { + vbest = modes[i].vdisplay; + mtemp = &modes[i]; + } + } + aspect = mtemp->hdisplay * 100 / mtemp->vdisplay; + refresh = DIVIDE(DIVIDE(mtemp->dot_clock * 1000, + mtemp->htotal), mtemp->vtotal); + if (mtemp != modes) + swap_modes(mtemp, modes); + } + + /* Sort other modes by refresh rate, aspect ratio, then resolution */ + for (j = 1; j < nmodes - 1; j++) { + rbest = 1000; + abest = 1000; + hbest = 0; + vbest = 0; + for (i = j; i < nmodes; i++) { + rtemp = abs(refresh - + DIVIDE(DIVIDE(modes[i].dot_clock * 1000, + modes[i].htotal), modes[i].vtotal)); + atemp = (modes[i].hdisplay * 100 / modes[i].vdisplay); + if (rtemp < rbest) { + rbest = rtemp; + mtemp = &modes[i]; + } + if (rtemp == rbest) { + /* Treat "close" aspect ratios as identical */ + if (abs(abest - atemp) > (abest / 8) && + abs(aspect - atemp) < abs(aspect - abest)) { + abest = atemp; + mtemp = &modes[i]; + } + if (atemp == abest || + abs(abest - atemp) <= (abest / 8)) { + if (modes[i].hdisplay > hbest) { + hbest = modes[i].hdisplay; + mtemp = &modes[i]; + } + if (modes[i].hdisplay == hbest && + modes[i].vdisplay > vbest) { + vbest = modes[i].vdisplay; + mtemp = &modes[i]; + } + } + } + } + if (mtemp != &modes[j]) + swap_modes(mtemp, &modes[j]); + } +} diff --git a/sys/dev/videomode/test.c b/sys/dev/videomode/test.c new file mode 100644 index 0000000..d51ffdd --- /dev/null +++ b/sys/dev/videomode/test.c @@ -0,0 +1,26 @@ +/* $FreeBSD$ */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "videomode.h" + +int +main(int argc, char **argv) +{ + int i, j; + + for (i = 1; i < argc ; i++) { + for (j = 0; j < videomode_count; j++) { + if (strcmp(videomode_list[j].name, argv[i]) == 0) { + printf("dotclock for mode %s = %d, flags %x\n", + argv[i], + videomode_list[j].dot_clock, + videomode_list[j].flags); + break; + } + } + if (j == videomode_count) { + printf("dotclock for mode %s not found\n", argv[i]); + } + } +} diff --git a/sys/dev/videomode/vesagtf.c b/sys/dev/videomode/vesagtf.c new file mode 100644 index 0000000..7164ae4 --- /dev/null +++ b/sys/dev/videomode/vesagtf.c @@ -0,0 +1,703 @@ +/* $NetBSD: vesagtf.c,v 1.2 2013/09/15 15:56:07 martin Exp $ */ +/* $FreeBSD$ */ + +/*- + * Copyright (c) 2006 Itronix Inc. + * All rights reserved. + * + * Written by Garrett D'Amore for Itronix Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of Itronix Inc. may not be used to endorse + * or promote products derived from this software without specific + * prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This was derived from a userland GTF program supplied by NVIDIA. + * NVIDIA's original boilerplate follows. + * + * Note that I have heavily modified the program for use in the EDID + * kernel code for NetBSD, including removing the use of floating + * point operations and making significant adjustments to minimize + * error propagation while operating with integer only math. + * + * This has required the use of 64-bit integers in a few places, but + * the upshot is that for a calculation of 1920x1200x85 (as an + * example), the error deviates by only ~.004% relative to the + * floating point version. This error is *well* within VESA + * tolerances. + */ + +/* + * Copyright (c) 2001, Andy Ritger aritger@nvidia.com + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * o Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * o Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * o Neither the name of NVIDIA nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT + * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * + * This program is based on the Generalized Timing Formula(GTF TM) + * Standard Version: 1.0, Revision: 1.0 + * + * The GTF Document contains the following Copyright information: + * + * Copyright (c) 1994, 1995, 1996 - Video Electronics Standards + * Association. Duplication of this document within VESA member + * companies for review purposes is permitted. All other rights + * reserved. + * + * While every precaution has been taken in the preparation + * of this standard, the Video Electronics Standards Association and + * its contributors assume no responsibility for errors or omissions, + * and make no warranties, expressed or implied, of functionality + * of suitability for any purpose. The sample code contained within + * this standard may be used without restriction. + * + * + * + * The GTF EXCEL(TM) SPREADSHEET, a sample (and the definitive) + * implementation of the GTF Timing Standard, is available at: + * + * ftp://ftp.vesa.org/pub/GTF/GTF_V1R1.xls + * + * + * + * This program takes a desired resolution and vertical refresh rate, + * and computes mode timings according to the GTF Timing Standard. + * These mode timings can then be formatted as an XFree86 modeline + * or a mode description for use by fbset(8). + * + * + * + * NOTES: + * + * The GTF allows for computation of "margins" (the visible border + * surrounding the addressable video); on most non-overscan type + * systems, the margin period is zero. I've implemented the margin + * computations but not enabled it because 1) I don't really have + * any experience with this, and 2) neither XFree86 modelines nor + * fbset fb.modes provide an obvious way for margin timings to be + * included in their mode descriptions (needs more investigation). + * + * The GTF provides for computation of interlaced mode timings; + * I've implemented the computations but not enabled them, yet. + * I should probably enable and test this at some point. + * + * + * + * TODO: + * + * o Add support for interlaced modes. + * + * o Implement the other portions of the GTF: compute mode timings + * given either the desired pixel clock or the desired horizontal + * frequency. + * + * o It would be nice if this were more general purpose to do things + * outside the scope of the GTF: like generate double scan mode + * timings, for example. + * + * o Printing digits to the right of the decimal point when the + * digits are 0 annoys me. + * + * o Error checking. + * + */ + + +#ifdef _KERNEL +#include <sys/cdefs.h> + +__FBSDID("$FreeBSD$"); +#include <sys/types.h> +#include <sys/param.h> +#include <sys/systm.h> +#include <dev/videomode/videomode.h> +#include <dev/videomode/vesagtf.h> +#else +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include "videomode.h" +#include "vesagtf.h" + +void print_xf86_mode(struct videomode *m); +#endif + +#define CELL_GRAN 8 /* assumed character cell granularity */ + +/* C' and M' are part of the Blanking Duty Cycle computation */ +/* + * #define C_PRIME (((C - J) * K/256.0) + J) + * #define M_PRIME (K/256.0 * M) + */ + +/* + * C' and M' multiplied by 256 to give integer math. Make sure to + * scale results using these back down, appropriately. + */ +#define C_PRIME256(p) (((p->C - p->J) * p->K) + (p->J * 256)) +#define M_PRIME256(p) (p->K * p->M) + +#define DIVIDE(x,y) (((x) + ((y) / 2)) / (y)) + +/* + * print_value() - print the result of the named computation; this is + * useful when comparing against the GTF EXCEL spreadsheet. + */ + +#ifdef GTFDEBUG + +static void +print_value(int n, const char *name, unsigned val) +{ + printf("%2d: %-27s: %u\n", n, name, val); +} +#else +#define print_value(n, name, val) +#endif + + +/* + * vert_refresh() - as defined by the GTF Timing Standard, compute the + * Stage 1 Parameters using the vertical refresh frequency. In other + * words: input a desired resolution and desired refresh rate, and + * output the GTF mode timings. + * + * XXX All the code is in place to compute interlaced modes, but I don't + * feel like testing it right now. + * + * XXX margin computations are implemented but not tested (nor used by + * XFree86 of fbset mode descriptions, from what I can tell). + */ + +void +vesagtf_mode_params(unsigned h_pixels, unsigned v_lines, unsigned freq, + struct vesagtf_params *params, int flags, struct videomode *vmp) +{ + unsigned v_field_rqd; + unsigned top_margin; + unsigned bottom_margin; + unsigned interlace; + uint64_t h_period_est; + unsigned vsync_plus_bp; + unsigned v_back_porch __unused; + unsigned total_v_lines; + uint64_t v_field_est; + uint64_t h_period; + unsigned v_field_rate; + unsigned v_frame_rate __unused; + unsigned left_margin; + unsigned right_margin; + unsigned total_active_pixels; + uint64_t ideal_duty_cycle; + unsigned h_blank; + unsigned total_pixels; + unsigned pixel_freq; + + unsigned h_sync; + unsigned h_front_porch; + unsigned v_odd_front_porch_lines; + +#ifdef GTFDEBUG + unsigned h_freq; +#endif + + /* 1. In order to give correct results, the number of horizontal + * pixels requested is first processed to ensure that it is divisible + * by the character size, by rounding it to the nearest character + * cell boundary: + * + * [H PIXELS RND] = ((ROUND([H PIXELS]/[CELL GRAN RND],0))*[CELLGRAN RND]) + */ + + h_pixels = DIVIDE(h_pixels, CELL_GRAN) * CELL_GRAN; + + print_value(1, "[H PIXELS RND]", h_pixels); + + + /* 2. If interlace is requested, the number of vertical lines assumed + * by the calculation must be halved, as the computation calculates + * the number of vertical lines per field. In either case, the + * number of lines is rounded to the nearest integer. + * + * [V LINES RND] = IF([INT RQD?]="y", ROUND([V LINES]/2,0), + * ROUND([V LINES],0)) + */ + + v_lines = (flags & VESAGTF_FLAG_ILACE) ? DIVIDE(v_lines, 2) : v_lines; + + print_value(2, "[V LINES RND]", v_lines); + + + /* 3. Find the frame rate required: + * + * [V FIELD RATE RQD] = IF([INT RQD?]="y", [I/P FREQ RQD]*2, + * [I/P FREQ RQD]) + */ + + v_field_rqd = (flags & VESAGTF_FLAG_ILACE) ? (freq * 2) : (freq); + + print_value(3, "[V FIELD RATE RQD]", v_field_rqd); + + + /* 4. Find number of lines in Top margin: + * 5. Find number of lines in Bottom margin: + * + * [TOP MARGIN (LINES)] = IF([MARGINS RQD?]="Y", + * ROUND(([MARGIN%]/100*[V LINES RND]),0), + * 0) + * + * Ditto for bottom margin. Note that instead of %, we use PPT, which + * is parts per thousand. This helps us with integer math. + */ + + top_margin = bottom_margin = (flags & VESAGTF_FLAG_MARGINS) ? + DIVIDE(v_lines * params->margin_ppt, 1000) : 0; + + print_value(4, "[TOP MARGIN (LINES)]", top_margin); + print_value(5, "[BOT MARGIN (LINES)]", bottom_margin); + + + /* 6. If interlace is required, then set variable [INTERLACE]=0.5: + * + * [INTERLACE]=(IF([INT RQD?]="y",0.5,0)) + * + * To make this integer friendly, we use some special hacks in step + * 7 below. Please read those comments to understand why I am using + * a whole number of 1.0 instead of 0.5 here. + */ + interlace = (flags & VESAGTF_FLAG_ILACE) ? 1 : 0; + + print_value(6, "[2*INTERLACE]", interlace); + + + /* 7. Estimate the Horizontal period + * + * [H PERIOD EST] = ((1/[V FIELD RATE RQD]) - [MIN VSYNC+BP]/1000000) / + * ([V LINES RND] + (2*[TOP MARGIN (LINES)]) + + * [MIN PORCH RND]+[INTERLACE]) * 1000000 + * + * To make it integer friendly, we pre-multiply the 1000000 to get to + * usec. This gives us: + * + * [H PERIOD EST] = ((1000000/[V FIELD RATE RQD]) - [MIN VSYNC+BP]) / + * ([V LINES RND] + (2 * [TOP MARGIN (LINES)]) + + * [MIN PORCH RND]+[INTERLACE]) + * + * The other problem is that the interlace value is wrong. To get + * the interlace to a whole number, we multiply both the numerator and + * divisor by 2, so we can use a value of either 1 or 0 for the interlace + * factor. + * + * This gives us: + * + * [H PERIOD EST] = ((2*((1000000/[V FIELD RATE RQD]) - [MIN VSYNC+BP])) / + * (2*([V LINES RND] + (2*[TOP MARGIN (LINES)]) + + * [MIN PORCH RND]) + [2*INTERLACE])) + * + * Finally we multiply by another 1000, to get value in picosec. + * Why picosec? To minimize rounding errors. Gotta love integer + * math and error propagation. + */ + + h_period_est = DIVIDE(((DIVIDE(2000000000000ULL, v_field_rqd)) - + (2000000 * params->min_vsbp)), + ((2 * (v_lines + (2 * top_margin) + params->min_porch)) + interlace)); + + print_value(7, "[H PERIOD EST (ps)]", h_period_est); + + + /* 8. Find the number of lines in V sync + back porch: + * + * [V SYNC+BP] = ROUND(([MIN VSYNC+BP]/[H PERIOD EST]),0) + * + * But recall that h_period_est is in psec. So multiply by 1000000. + */ + + vsync_plus_bp = DIVIDE(params->min_vsbp * 1000000, h_period_est); + + print_value(8, "[V SYNC+BP]", vsync_plus_bp); + + + /* 9. Find the number of lines in V back porch alone: + * + * [V BACK PORCH] = [V SYNC+BP] - [V SYNC RND] + * + * XXX is "[V SYNC RND]" a typo? should be [V SYNC RQD]? + */ + + v_back_porch = vsync_plus_bp - params->vsync_rqd; + + print_value(9, "[V BACK PORCH]", v_back_porch); + + + /* 10. Find the total number of lines in Vertical field period: + * + * [TOTAL V LINES] = [V LINES RND] + [TOP MARGIN (LINES)] + + * [BOT MARGIN (LINES)] + [V SYNC+BP] + [INTERLACE] + + * [MIN PORCH RND] + */ + + total_v_lines = v_lines + top_margin + bottom_margin + vsync_plus_bp + + interlace + params->min_porch; + + print_value(10, "[TOTAL V LINES]", total_v_lines); + + + /* 11. Estimate the Vertical field frequency: + * + * [V FIELD RATE EST] = 1 / [H PERIOD EST] / [TOTAL V LINES] * 1000000 + * + * Again, we want to pre multiply by 10^9 to convert for nsec, thereby + * making it usable in integer math. + * + * So we get: + * + * [V FIELD RATE EST] = 1000000000 / [H PERIOD EST] / [TOTAL V LINES] + * + * This is all scaled to get the result in uHz. Again, we're trying to + * minimize error propagation. + */ + v_field_est = DIVIDE(DIVIDE(1000000000000000ULL, h_period_est), + total_v_lines); + + print_value(11, "[V FIELD RATE EST(uHz)]", v_field_est); + + + /* 12. Find the actual horizontal period: + * + * [H PERIOD] = [H PERIOD EST] / ([V FIELD RATE RQD] / [V FIELD RATE EST]) + */ + + h_period = DIVIDE(h_period_est * v_field_est, v_field_rqd * 1000); + + print_value(12, "[H PERIOD(ps)]", h_period); + + + /* 13. Find the actual Vertical field frequency: + * + * [V FIELD RATE] = 1 / [H PERIOD] / [TOTAL V LINES] * 1000000 + * + * And again, we convert to nsec ahead of time, giving us: + * + * [V FIELD RATE] = 1000000 / [H PERIOD] / [TOTAL V LINES] + * + * And another rescaling back to mHz. Gotta love it. + */ + + v_field_rate = DIVIDE(1000000000000ULL, h_period * total_v_lines); + + print_value(13, "[V FIELD RATE]", v_field_rate); + + + /* 14. Find the Vertical frame frequency: + * + * [V FRAME RATE] = (IF([INT RQD?]="y", [V FIELD RATE]/2, [V FIELD RATE])) + * + * N.B. that the result here is in mHz. + */ + + v_frame_rate = (flags & VESAGTF_FLAG_ILACE) ? + v_field_rate / 2 : v_field_rate; + + print_value(14, "[V FRAME RATE]", v_frame_rate); + + + /* 15. Find number of pixels in left margin: + * 16. Find number of pixels in right margin: + * + * [LEFT MARGIN (PIXELS)] = (IF( [MARGINS RQD?]="Y", + * (ROUND( ([H PIXELS RND] * [MARGIN%] / 100 / + * [CELL GRAN RND]),0)) * [CELL GRAN RND], + * 0)) + * + * Again, we deal with margin percentages as PPT (parts per thousand). + * And the calculations for left and right are the same. + */ + + left_margin = right_margin = (flags & VESAGTF_FLAG_MARGINS) ? + DIVIDE(DIVIDE(h_pixels * params->margin_ppt, 1000), + CELL_GRAN) * CELL_GRAN : 0; + + print_value(15, "[LEFT MARGIN (PIXELS)]", left_margin); + print_value(16, "[RIGHT MARGIN (PIXELS)]", right_margin); + + + /* 17. Find total number of active pixels in image and left and right + * margins: + * + * [TOTAL ACTIVE PIXELS] = [H PIXELS RND] + [LEFT MARGIN (PIXELS)] + + * [RIGHT MARGIN (PIXELS)] + */ + + total_active_pixels = h_pixels + left_margin + right_margin; + + print_value(17, "[TOTAL ACTIVE PIXELS]", total_active_pixels); + + + /* 18. Find the ideal blanking duty cycle from the blanking duty cycle + * equation: + * + * [IDEAL DUTY CYCLE] = [C'] - ([M']*[H PERIOD]/1000) + * + * However, we have modified values for [C'] as [256*C'] and + * [M'] as [256*M']. Again the idea here is to get good scaling. + * We use 256 as the factor to make the math fast. + * + * Note that this means that we have to scale it appropriately in + * later calculations. + * + * The ending result is that our ideal_duty_cycle is 256000x larger + * than the duty cycle used by VESA. But again, this reduces error + * propagation. + */ + + ideal_duty_cycle = + ((C_PRIME256(params) * 1000) - + (M_PRIME256(params) * h_period / 1000000)); + + print_value(18, "[IDEAL DUTY CYCLE]", ideal_duty_cycle); + + + /* 19. Find the number of pixels in the blanking time to the nearest + * double character cell: + * + * [H BLANK (PIXELS)] = (ROUND(([TOTAL ACTIVE PIXELS] * + * [IDEAL DUTY CYCLE] / + * (100-[IDEAL DUTY CYCLE]) / + * (2*[CELL GRAN RND])), 0)) + * * (2*[CELL GRAN RND]) + * + * Of course, we adjust to make this rounding work in integer math. + */ + + h_blank = DIVIDE(DIVIDE(total_active_pixels * ideal_duty_cycle, + (256000 * 100ULL) - ideal_duty_cycle), + 2 * CELL_GRAN) * (2 * CELL_GRAN); + + print_value(19, "[H BLANK (PIXELS)]", h_blank); + + + /* 20. Find total number of pixels: + * + * [TOTAL PIXELS] = [TOTAL ACTIVE PIXELS] + [H BLANK (PIXELS)] + */ + + total_pixels = total_active_pixels + h_blank; + + print_value(20, "[TOTAL PIXELS]", total_pixels); + + + /* 21. Find pixel clock frequency: + * + * [PIXEL FREQ] = [TOTAL PIXELS] / [H PERIOD] + * + * We calculate this in Hz rather than MHz, to get a value that + * is usable with integer math. Recall that the [H PERIOD] is in + * nsec. + */ + + pixel_freq = DIVIDE(total_pixels * 1000000, DIVIDE(h_period, 1000)); + + print_value(21, "[PIXEL FREQ]", pixel_freq); + + + /* 22. Find horizontal frequency: + * + * [H FREQ] = 1000 / [H PERIOD] + * + * I've ifdef'd this out, because we don't need it for any of + * our calculations. + * We calculate this in Hz rather than kHz, to avoid rounding + * errors. Recall that the [H PERIOD] is in usec. + */ + +#ifdef GTFDEBUG + h_freq = 1000000000 / h_period; + + print_value(22, "[H FREQ]", h_freq); +#endif + + + + /* Stage 1 computations are now complete; I should really pass + the results to another function and do the Stage 2 + computations, but I only need a few more values so I'll just + append the computations here for now */ + + + + /* 17. Find the number of pixels in the horizontal sync period: + * + * [H SYNC (PIXELS)] =(ROUND(([H SYNC%] / 100 * [TOTAL PIXELS] / + * [CELL GRAN RND]),0))*[CELL GRAN RND] + * + * Rewriting for integer math: + * + * [H SYNC (PIXELS)]=(ROUND((H SYNC%] * [TOTAL PIXELS] / 100 / + * [CELL GRAN RND),0))*[CELL GRAN RND] + */ + + h_sync = DIVIDE(((params->hsync_pct * total_pixels) / 100), CELL_GRAN) * + CELL_GRAN; + + print_value(17, "[H SYNC (PIXELS)]", h_sync); + + + /* 18. Find the number of pixels in the horizontal front porch period: + * + * [H FRONT PORCH (PIXELS)] = ([H BLANK (PIXELS)]/2)-[H SYNC (PIXELS)] + * + * Note that h_blank is always an even number of characters (i.e. + * h_blank % (CELL_GRAN * 2) == 0) + */ + + h_front_porch = (h_blank / 2) - h_sync; + + print_value(18, "[H FRONT PORCH (PIXELS)]", h_front_porch); + + + /* 36. Find the number of lines in the odd front porch period: + * + * [V ODD FRONT PORCH(LINES)]=([MIN PORCH RND]+[INTERLACE]) + * + * Adjusting for the fact that the interlace is scaled: + * + * [V ODD FRONT PORCH(LINES)]=(([MIN PORCH RND] * 2) + [2*INTERLACE]) / 2 + */ + + v_odd_front_porch_lines = ((2 * params->min_porch) + interlace) / 2; + + print_value(36, "[V ODD FRONT PORCH(LINES)]", v_odd_front_porch_lines); + + + /* finally, pack the results in the mode struct */ + + vmp->hsync_start = h_pixels + h_front_porch; + vmp->hsync_end = vmp->hsync_start + h_sync; + vmp->htotal = total_pixels; + vmp->hdisplay = h_pixels; + + vmp->vsync_start = v_lines + v_odd_front_porch_lines; + vmp->vsync_end = vmp->vsync_start + params->vsync_rqd; + vmp->vtotal = total_v_lines; + vmp->vdisplay = v_lines; + + vmp->dot_clock = pixel_freq; + +} + +void +vesagtf_mode(unsigned x, unsigned y, unsigned refresh, struct videomode *vmp) +{ + struct vesagtf_params params; + + params.margin_ppt = VESAGTF_MARGIN_PPT; + params.min_porch = VESAGTF_MIN_PORCH; + params.vsync_rqd = VESAGTF_VSYNC_RQD; + params.hsync_pct = VESAGTF_HSYNC_PCT; + params.min_vsbp = VESAGTF_MIN_VSBP; + params.M = VESAGTF_M; + params.C = VESAGTF_C; + params.K = VESAGTF_K; + params.J = VESAGTF_J; + + vesagtf_mode_params(x, y, refresh, ¶ms, 0, vmp); +} + +/* + * The tidbit here is so that you can compile this file as a + * standalone user program to generate X11 modelines using VESA GTF. + * This also allows for testing of the code itself, without + * necessitating a full kernel recompile. + */ + +/* print_xf86_mode() - print the XFree86 modeline, given mode timings. */ + +#ifndef _KERNEL +void +print_xf86_mode (struct videomode *vmp) +{ + float vf, hf; + + hf = 1000.0 * vmp->dot_clock / vmp->htotal; + vf = 1.0 * hf / vmp->vtotal; + + printf("\n"); + printf(" # %dx%d @ %.2f Hz (GTF) hsync: %.2f kHz; pclk: %.2f MHz\n", + vmp->hdisplay, vmp->vdisplay, vf, hf, vmp->dot_clock / 1000.0); + + printf(" Modeline \"%dx%d_%.2f\" %.2f" + " %d %d %d %d" + " %d %d %d %d" + " -HSync +Vsync\n\n", + vmp->hdisplay, vmp->vdisplay, vf, (vmp->dot_clock / 1000.0), + vmp->hdisplay, vmp->hsync_start, vmp->hsync_end, vmp->htotal, + vmp->vdisplay, vmp->vsync_start, vmp->vsync_end, vmp->vtotal); +} + +int +main (int argc, char *argv[]) +{ + struct videomode m; + + if (argc != 4) { + printf("usage: %s x y refresh\n", argv[0]); + exit(1); + } + + vesagtf_mode(atoi(argv[1]), atoi(argv[2]), atoi(argv[3]), &m); + + print_xf86_mode(&m); + + return 0; + +} +#endif diff --git a/sys/dev/videomode/vesagtf.h b/sys/dev/videomode/vesagtf.h new file mode 100644 index 0000000..ea9dae4 --- /dev/null +++ b/sys/dev/videomode/vesagtf.h @@ -0,0 +1,86 @@ +/* $NetBSD$ */ +/* $FreeBSD$ */ + +/*- + * Copyright (c) 2006 Itronix Inc. + * All rights reserved. + * + * Written by Garrett D'Amore for Itronix Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of Itronix Inc. may not be used to endorse + * or promote products derived from this software without specific + * prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _DEV_VIDEOMODE_VESAGTF_H +#define _DEV_VIDEOMODE_VESAGTF_H + +/* + * Use VESA GTF formula to generate a monitor mode, given resolution and + * refresh rates. + */ + +struct vesagtf_params { + unsigned margin_ppt; /* vertical margin size, percent * 10 + * think parts-per-thousand */ + unsigned min_porch; /* minimum front porch */ + unsigned vsync_rqd; /* width of vsync in lines */ + unsigned hsync_pct; /* hsync as % of total width */ + unsigned min_vsbp; /* minimum vsync + back porch (usec) */ + unsigned M; /* blanking formula gradient */ + unsigned C; /* blanking formula offset */ + unsigned K; /* blanking formula scaling factor */ + unsigned J; /* blanking formula scaling factor */ +}; + +/* + * Default values to use for params. + */ +#define VESAGTF_MARGIN_PPT 18 /* 1.8% */ +#define VESAGTF_MIN_PORCH 1 /* minimum front porch */ +#define VESAGTF_VSYNC_RQD 3 /* vsync width in lines */ +#define VESAGTF_HSYNC_PCT 8 /* width of hsync % of total line */ +#define VESAGTF_MIN_VSBP 550 /* min vsync + back porch (usec) */ +#define VESAGTF_M 600 /* blanking formula gradient */ +#define VESAGTF_C 40 /* blanking formula offset */ +#define VESAGTF_K 128 /* blanking formula scaling factor */ +#define VESAGTF_J 20 /* blanking formula scaling factor */ + +/* + * Use VESA GTF formula to generate monitor timings. Assumes default + * GTF parameters, non-interlaced, and no margins. + */ +void vesagtf_mode(unsigned x, unsigned y, unsigned refresh, + struct videomode *); + +/* + * A more complete version, in case we ever want to use alternate GTF + * parameters. EDID 1.3 allows for "secondary GTF parameters". + */ +void vesagtf_mode_params(unsigned x, unsigned y, unsigned refresh, + struct vesagtf_params *, int flags, struct videomode *); + +#define VESAGTF_FLAG_ILACE 0x0001 /* use interlace */ +#define VESAGTF_FLAG_MARGINS 0x0002 /* use margins */ + +#endif /* _DEV_VIDEOMODE_VESAGTF_H */ diff --git a/sys/dev/videomode/videomode.c b/sys/dev/videomode/videomode.c new file mode 100644 index 0000000..a1c7f0a --- /dev/null +++ b/sys/dev/videomode/videomode.c @@ -0,0 +1,130 @@ +/* $FreeBSD$ */ + +/* + * THIS FILE AUTOMATICALLY GENERATED. DO NOT EDIT. + * + * generated from: + * NetBSD: modelines,v 1.9 2011/03/30 18:45:04 jdc Exp + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> +#include <sys/module.h> +#include <dev/videomode/videomode.h> + +MODULE_VERSION(videomode, 1); + +/* + * These macros help the modelines below fit on one line. + */ +#define HP VID_PHSYNC +#define HN VID_NHSYNC +#define VP VID_PVSYNC +#define VN VID_NVSYNC +#define I VID_INTERLACE +#define DS VID_DBLSCAN + +#define M(nm,hr,vr,clk,hs,he,ht,vs,ve,vt,f) \ + { clk, hr, hs, he, ht, vr, vs, ve, vt, f, nm } + +const struct videomode videomode_list[] = { +M("640x350x85",640,350,31500,672,736,832,382,385,445,HP|VN), +M("640x400x85",640,400,31500,672,736,832,401,404,445,HN|VP), +M("720x400x70",720,400,28320,738,846,900,412,414,449,HN|VP), +M("720x400x85",720,400,35500,756,828,936,401,404,446,HN|VP), +M("720x400x87",720,400,35500,738,846,900,421,423,449,HN|VN), +M("640x480x60",640,480,25175,656,752,800,490,492,525,HN|VN), +M("640x480x72",640,480,31500,664,704,832,489,492,520,HN|VN), +M("640x480x75",640,480,31500,656,720,840,481,484,500,HN|VN), +M("640x480x85",640,480,36000,696,752,832,481,484,509,HN|VN), +M("800x600x56",800,600,36000,824,896,1024,601,603,625,HP|VP), +M("800x600x60",800,600,40000,840,968,1056,601,605,628,HP|VP), +M("800x600x72",800,600,50000,856,976,1040,637,643,666,HP|VP), +M("800x600x75",800,600,49500,816,896,1056,601,604,625,HP|VP), +M("800x600x85",800,600,56250,832,896,1048,601,604,631,HP|VP), +M("1024x768x87i",1024,768,44900,1032,1208,1264,768,776,817,HP|VP|I), +M("1024x768x60",1024,768,65000,1048,1184,1344,771,777,806,HN|VN), +M("1024x768x70",1024,768,75000,1048,1184,1328,771,777,806,HN|VN), +M("1024x768x75",1024,768,78750,1040,1136,1312,769,772,800,HP|VP), +M("1024x768x85",1024,768,94500,1072,1168,1376,769,772,808,HP|VP), +M("1024x768x89",1024,768,100000,1108,1280,1408,768,780,796,HP|VP), +M("1152x864x75",1152,864,108000,1216,1344,1600,865,868,900,HP|VP), +M("1280x768x75",1280,768,105640,1312,1712,1744,782,792,807,HN|VP), +M("1280x960x60",1280,960,108000,1376,1488,1800,961,964,1000,HP|VP), +M("1280x960x85",1280,960,148500,1344,1504,1728,961,964,1011,HP|VP), +M("1280x1024x60",1280,1024,108000,1328,1440,1688,1025,1028,1066,HP|VP), +M("1280x1024x70",1280,1024,126000,1328,1440,1688,1025,1028,1066,HP|VP), +M("1280x1024x75",1280,1024,135000,1296,1440,1688,1025,1028,1066,HP|VP), +M("1280x1024x85",1280,1024,157500,1344,1504,1728,1025,1028,1072,HP|VP), +M("1600x1200x60",1600,1200,162000,1664,1856,2160,1201,1204,1250,HP|VP), +M("1600x1200x65",1600,1200,175500,1664,1856,2160,1201,1204,1250,HP|VP), +M("1600x1200x70",1600,1200,189000,1664,1856,2160,1201,1204,1250,HP|VP), +M("1600x1200x75",1600,1200,202500,1664,1856,2160,1201,1204,1250,HP|VP), +M("1600x1200x85",1600,1200,229500,1664,1856,2160,1201,1204,1250,HP|VP), +M("1680x1050x60",1680,1050,147140,1784,1968,2256,1051,1054,1087,HP|VP), +M("1792x1344x60",1792,1344,204800,1920,2120,2448,1345,1348,1394,HN|VP), +M("1792x1344x75",1792,1344,261000,1888,2104,2456,1345,1348,1417,HN|VP), +M("1856x1392x60",1856,1392,218300,1952,2176,2528,1393,1396,1439,HN|VP), +M("1856x1392x75",1856,1392,288000,1984,2208,2560,1393,1396,1500,HN|VP), +M("1920x1440x60",1920,1440,234000,2048,2256,2600,1441,1444,1500,HN|VP), +M("1920x1440x75",1920,1440,297000,2064,2288,2640,1441,1444,1500,HN|VP), +M("832x624x74",832,624,57284,864,928,1152,625,628,667,HN|VN), +M("1152x768x54",1152,768,64995,1178,1314,1472,771,777,806,HP|VP), +M("1400x1050x60",1400,1050,122000,1488,1640,1880,1052,1064,1082,HP|VP), +M("1400x1050x74",1400,1050,155800,1464,1784,1912,1052,1064,1090,HP|VP), +M("1152x900x66",1152,900,94500,1192,1320,1528,902,906,937,HN|VN), +M("1152x900x76",1152,900,105560,1168,1280,1472,902,906,943,HN|VN), + +/* Derived Double Scan Modes */ + +M("320x175x85",320,175,15750,336,368,416,191,192,222,HP|VN|DS), +M("320x200x85",320,200,15750,336,368,416,200,202,222,HN|VP|DS), +M("360x200x70",360,200,14160,369,423,450,206,207,224,HN|VP|DS), +M("360x200x85",360,200,17750,378,414,468,200,202,223,HN|VP|DS), +M("360x200x87",360,200,17750,369,423,450,210,211,224,HN|VN|DS), +M("320x240x60",320,240,12587,328,376,400,245,246,262,HN|VN|DS), +M("320x240x72",320,240,15750,332,352,416,244,246,260,HN|VN|DS), +M("320x240x75",320,240,15750,328,360,420,240,242,250,HN|VN|DS), +M("320x240x85",320,240,18000,348,376,416,240,242,254,HN|VN|DS), +M("400x300x56",400,300,18000,412,448,512,300,301,312,HP|VP|DS), +M("400x300x60",400,300,20000,420,484,528,300,302,314,HP|VP|DS), +M("400x300x72",400,300,25000,428,488,520,318,321,333,HP|VP|DS), +M("400x300x75",400,300,24750,408,448,528,300,302,312,HP|VP|DS), +M("400x300x85",400,300,28125,416,448,524,300,302,315,HP|VP|DS), +M("512x384x87i",512,384,22450,516,604,632,384,388,408,HP|VP|DS|I), +M("512x384x60",512,384,32500,524,592,672,385,388,403,HN|VN|DS), +M("512x384x70",512,384,37500,524,592,664,385,388,403,HN|VN|DS), +M("512x384x75",512,384,39375,520,568,656,384,386,400,HP|VP|DS), +M("512x384x85",512,384,47250,536,584,688,384,386,404,HP|VP|DS), +M("512x384x89",512,384,50000,554,640,704,384,390,398,HP|VP|DS), +M("576x432x75",576,432,54000,608,672,800,432,434,450,HP|VP|DS), +M("640x384x75",640,384,52820,656,856,872,391,396,403,HN|VP|DS), +M("640x480x60",640,480,54000,688,744,900,480,482,500,HP|VP|DS), +M("640x480x85",640,480,74250,672,752,864,480,482,505,HP|VP|DS), +M("640x512x60",640,512,54000,664,720,844,512,514,533,HP|VP|DS), +M("640x512x70",640,512,63000,664,720,844,512,514,533,HP|VP|DS), +M("640x512x75",640,512,67500,648,720,844,512,514,533,HP|VP|DS), +M("640x512x85",640,512,78750,672,752,864,512,514,536,HP|VP|DS), +M("800x600x60",800,600,81000,832,928,1080,600,602,625,HP|VP|DS), +M("800x600x65",800,600,87750,832,928,1080,600,602,625,HP|VP|DS), +M("800x600x70",800,600,94500,832,928,1080,600,602,625,HP|VP|DS), +M("800x600x75",800,600,101250,832,928,1080,600,602,625,HP|VP|DS), +M("800x600x85",800,600,114750,832,928,1080,600,602,625,HP|VP|DS), +M("840x525x60",840,525,73570,892,984,1128,525,527,543,HP|VP|DS), +M("896x672x60",896,672,102400,960,1060,1224,672,674,697,HN|VP|DS), +M("896x672x75",896,672,130500,944,1052,1228,672,674,708,HN|VP|DS), +M("928x696x60",928,696,109150,976,1088,1264,696,698,719,HN|VP|DS), +M("928x696x75",928,696,144000,992,1104,1280,696,698,750,HN|VP|DS), +M("960x720x60",960,720,117000,1024,1128,1300,720,722,750,HN|VP|DS), +M("960x720x75",960,720,148500,1032,1144,1320,720,722,750,HN|VP|DS), +M("416x312x74",416,312,28642,432,464,576,312,314,333,HN|VN|DS), +M("576x384x54",576,384,32497,589,657,736,385,388,403,HP|VP|DS), +M("700x525x60",700,525,61000,744,820,940,526,532,541,HP|VP|DS), +M("700x525x74",700,525,77900,732,892,956,526,532,545,HP|VP|DS), +M("576x450x66",576,450,47250,596,660,764,451,453,468,HN|VN|DS), +M("576x450x76",576,450,52780,584,640,736,451,453,471,HN|VN|DS), +}; + +const int videomode_count = 46; diff --git a/sys/dev/videomode/videomode.h b/sys/dev/videomode/videomode.h new file mode 100644 index 0000000..b223da8 --- /dev/null +++ b/sys/dev/videomode/videomode.h @@ -0,0 +1,74 @@ +/* $NetBSD: videomode.h,v 1.2 2010/05/04 21:17:10 macallan Exp $ */ +/* $FreeBSD$ */ + +/* + * Copyright (c) 2001, 2002 Bang Jun-Young + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _DEV_VIDEOMODE_H +#define _DEV_VIDEOMODE_H + +struct videomode { + int dot_clock; /* Dot clock frequency in kHz. */ + int hdisplay; + int hsync_start; + int hsync_end; + int htotal; + int vdisplay; + int vsync_start; + int vsync_end; + int vtotal; + int flags; /* Video mode flags; see below. */ + const char *name; +}; + +/* + * Video mode flags. + */ + +#define VID_PHSYNC 0x0001 +#define VID_NHSYNC 0x0002 +#define VID_PVSYNC 0x0004 +#define VID_NVSYNC 0x0008 +#define VID_INTERLACE 0x0010 +#define VID_DBLSCAN 0x0020 +#define VID_CSYNC 0x0040 +#define VID_PCSYNC 0x0080 +#define VID_NCSYNC 0x0100 +#define VID_HSKEW 0x0200 +#define VID_BCAST 0x0400 +#define VID_PIXMUX 0x1000 +#define VID_DBLCLK 0x2000 +#define VID_CLKDIV2 0x4000 + +extern const struct videomode videomode_list[]; +extern const int videomode_count; + +const struct videomode *pick_mode_by_dotclock(int, int, int); +const struct videomode *pick_mode_by_ref(int, int, int); +void sort_modes(struct videomode *, struct videomode **, int); + +#endif /* _DEV_VIDEOMODE_H */ diff --git a/sys/dev/vt/vt_core.c b/sys/dev/vt/vt_core.c index f0acf22..9627849 100644 --- a/sys/dev/vt/vt_core.c +++ b/sys/dev/vt/vt_core.c @@ -2518,6 +2518,7 @@ vt_upgrade(struct vt_device *vd) { struct vt_window *vw; unsigned int i; + int register_handlers; if (!vty_enabled(VTY_VT)) return; @@ -2546,6 +2547,7 @@ vt_upgrade(struct vt_device *vd) if (vd->vd_curwindow == NULL) vd->vd_curwindow = vd->vd_windows[VT_CONSWINDOW]; + register_handlers = 0; if (!(vd->vd_flags & VDF_ASYNC)) { /* Attach keyboard. */ vt_allocate_keyboard(vd); @@ -2557,18 +2559,21 @@ vt_upgrade(struct vt_device *vd) vd->vd_flags |= VDF_ASYNC; callout_reset(&vd->vd_timer, hz / VT_TIMERFREQ, vt_timer, vd); vd->vd_timer_armed = 1; + register_handlers = 1; + } + + VT_UNLOCK(vd); + + /* Refill settings with new sizes. */ + vt_resize(vd); + if (register_handlers) { /* Register suspend/resume handlers. */ EVENTHANDLER_REGISTER(power_suspend_early, vt_suspend_handler, vd, EVENTHANDLER_PRI_ANY); EVENTHANDLER_REGISTER(power_resume, vt_resume_handler, vd, EVENTHANDLER_PRI_ANY); } - - VT_UNLOCK(vd); - - /* Refill settings with new sizes. */ - vt_resize(vd); } static void diff --git a/sys/dev/wpi/if_wpi.c b/sys/dev/wpi/if_wpi.c index 52846dd..87e8b8b 100644 --- a/sys/dev/wpi/if_wpi.c +++ b/sys/dev/wpi/if_wpi.c @@ -42,7 +42,7 @@ __FBSDID("$FreeBSD$"); * * A similar thing happens with the tx rings. The difference is the firmware * stop processing buffers once the queue is full and until confirmation - * of a successful transmition (tx_intr) has occurred. + * of a successful transmition (tx_done) has occurred. * * The command ring operates in the same manner as the tx queues. * @@ -447,6 +447,8 @@ wpi_attach(device_t dev) ic->ic_cryptocaps = IEEE80211_CRYPTO_AES_CCM; + ic->ic_flags |= IEEE80211_F_DATAPAD; + /* * Read in the eeprom and also setup the channels for * net80211. We don't set the rates as net80211 does this for us @@ -1378,8 +1380,7 @@ wpi_read_eeprom_band(struct wpi_softc *sc, int n) "adding chan %d (%dMHz) flags=0x%x maxpwr=%d passive=%d," " offset %d\n", chan, c->ic_freq, channels[i].flags, sc->maxpwr[chan], - (c->ic_flags & IEEE80211_CHAN_PASSIVE) != 0, - ic->ic_nchans); + IEEE80211_IS_CHAN_PASSIVE(c), ic->ic_nchans); } } @@ -1695,8 +1696,7 @@ wpi_rx_done(struct wpi_softc *sc, struct wpi_rx_desc *desc, if (stat->len > WPI_STAT_MAXLEN) { device_printf(sc->sc_dev, "invalid RX statistic header\n"); - if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); - return; + goto fail1; } bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTREAD); @@ -1714,23 +1714,20 @@ wpi_rx_done(struct wpi_softc *sc, struct wpi_rx_desc *desc, if ((flags & WPI_RX_NOERROR) != WPI_RX_NOERROR) { DPRINTF(sc, WPI_DEBUG_RECV, "%s: RX flags error %x\n", __func__, flags); - if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); - return; + goto fail1; } /* Discard frames that are too short. */ if (len < sizeof (*wh)) { DPRINTF(sc, WPI_DEBUG_RECV, "%s: frame too short: %d\n", __func__, len); - if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); - return; + goto fail1; } m1 = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE); if (m1 == NULL) { DPRINTF(sc, WPI_DEBUG_ANY, "%s: no mbuf to restock ring\n", __func__); - if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); - return; + goto fail1; } bus_dmamap_unload(ring->data_dmat, data->map); @@ -1752,8 +1749,7 @@ wpi_rx_done(struct wpi_softc *sc, struct wpi_rx_desc *desc, ring->desc[ring->cur] = htole32(paddr); bus_dmamap_sync(ring->data_dmat, ring->desc_dma.map, BUS_DMASYNC_PREWRITE); - if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); - return; + goto fail1; } m = data->m; @@ -1777,18 +1773,14 @@ wpi_rx_done(struct wpi_softc *sc, struct wpi_rx_desc *desc, if ((wh->i_fc[1] & IEEE80211_FC1_PROTECTED) && !IEEE80211_IS_MULTICAST(wh->i_addr1) && cip != NULL && cip->ic_cipher == IEEE80211_CIPHER_AES_CCM) { - if ((flags & WPI_RX_CIPHER_MASK) != WPI_RX_CIPHER_CCMP) { - if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); - m_freem(m); - return; - } + if ((flags & WPI_RX_CIPHER_MASK) != WPI_RX_CIPHER_CCMP) + goto fail2; + /* Check whether decryption was successful or not. */ if ((flags & WPI_RX_DECRYPT_MASK) != WPI_RX_DECRYPT_OK) { DPRINTF(sc, WPI_DEBUG_RECV, "CCMP decryption failed 0x%x\n", flags); - if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); - m_freem(m); - return; + goto fail2; } m->m_flags |= M_WEP; } @@ -1817,6 +1809,13 @@ wpi_rx_done(struct wpi_softc *sc, struct wpi_rx_desc *desc, (void)ieee80211_input_all(ic, m, stat->rssi, -WPI_RSSI_OFFSET); WPI_LOCK(sc); + + return; + +fail2: ieee80211_free_node(ni); + m_freem(m); + +fail1: if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); } static void @@ -1836,6 +1835,7 @@ wpi_tx_done(struct wpi_softc *sc, struct wpi_rx_desc *desc) struct mbuf *m; struct ieee80211_node *ni; struct ieee80211vap *vap; + int ackfailcnt = stat->ackfailcnt; int status = le32toh(stat->status); KASSERT(data->ni != NULL, ("no node")); @@ -1844,7 +1844,7 @@ wpi_tx_done(struct wpi_softc *sc, struct wpi_rx_desc *desc) DPRINTF(sc, WPI_DEBUG_XMIT, "%s: " "qid %d idx %d retries %d btkillcnt %d rate %x duration %d " - "status %x\n", __func__, desc->qid, desc->idx, stat->ackfailcnt, + "status %x\n", __func__, desc->qid, desc->idx, ackfailcnt, stat->btkillcnt, stat->rate, le32toh(stat->duration), status); /* Unmap and free mbuf. */ @@ -1861,11 +1861,11 @@ wpi_tx_done(struct wpi_softc *sc, struct wpi_rx_desc *desc) if ((status & 0xff) != 1) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); ieee80211_ratectl_tx_complete(vap, ni, - IEEE80211_RATECTL_TX_FAILURE, &stat->ackfailcnt, NULL); + IEEE80211_RATECTL_TX_FAILURE, &ackfailcnt, NULL); } else { if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); ieee80211_ratectl_tx_complete(vap, ni, - IEEE80211_RATECTL_TX_SUCCESS, &stat->ackfailcnt, NULL); + IEEE80211_RATECTL_TX_SUCCESS, &ackfailcnt, NULL); } ieee80211_tx_complete(ni, m, (status & 0xff) != 1); @@ -1931,10 +1931,7 @@ wpi_notif_intr(struct wpi_softc *sc) hw = le32toh(sc->shared->next); hw = (hw == 0) ? WPI_RX_RING_COUNT - 1 : hw - 1; - if (sc->rxq.cur == hw) - return; - - do { + while (sc->rxq.cur != hw) { sc->rxq.cur = (sc->rxq.cur + 1) % WPI_RX_RING_COUNT; struct wpi_rx_data *data = &sc->rxq.data[sc->rxq.cur]; @@ -2020,10 +2017,10 @@ wpi_notif_intr(struct wpi_softc *sc) BUS_DMASYNC_POSTREAD); uint32_t *status = (uint32_t *)(desc + 1); -#ifdef WPI_DEBUG + DPRINTF(sc, WPI_DEBUG_STATE, "state changed to %x\n", le32toh(*status)); -#endif + if (le32toh(*status) & 1) { ieee80211_runtask(ic, &sc->sc_radiooff_task); return; @@ -2061,7 +2058,7 @@ wpi_notif_intr(struct wpi_softc *sc) break; } } - } while (sc->rxq.cur != hw); + } /* Tell the firmware what we have processed. */ wpi_update_rx_ring(sc); @@ -2081,15 +2078,15 @@ wpi_wakeup_intr(struct wpi_softc *sc) /* Wakeup RX and TX rings. */ if (sc->rxq.update) { - wpi_update_rx_ring(sc); sc->rxq.update = 0; + wpi_update_rx_ring(sc); } for (qid = 0; qid < WPI_NTXQUEUES; qid++) { struct wpi_tx_ring *ring = &sc->txq[qid]; if (ring->update) { - wpi_update_tx_ring(sc, ring); ring->update = 0; + wpi_update_tx_ring(sc, ring); } } @@ -2227,6 +2224,8 @@ done: static int wpi_cmd2(struct wpi_softc *sc, struct wpi_buf *buf) { + struct ifnet *ifp = sc->sc_ifp; + struct ieee80211com *ic = ifp->if_l2com; struct ieee80211_frame *wh; struct wpi_tx_cmd *cmd; struct wpi_tx_data *data; @@ -2234,23 +2233,16 @@ wpi_cmd2(struct wpi_softc *sc, struct wpi_buf *buf) struct wpi_tx_ring *ring; struct mbuf *m1; bus_dma_segment_t *seg, segs[WPI_MAX_SCATTER]; - u_int hdrlen; - int error, i, nsegs, pad, totlen; + int error, i, hdrspace, nsegs, totlen; WPI_LOCK_ASSERT(sc); DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__); wh = mtod(buf->m, struct ieee80211_frame *); - hdrlen = ieee80211_anyhdrsize(wh); + hdrspace = ieee80211_anyhdrspace(ic, wh); totlen = buf->m->m_pkthdr.len; - if (hdrlen & 3) { - /* First segment length must be a multiple of 4. */ - pad = 4 - (hdrlen & 3); - } else - pad = 0; - ring = &sc->txq[buf->ac]; desc = &ring->desc[ring->cur]; data = &ring->data[ring->cur]; @@ -2265,8 +2257,8 @@ wpi_cmd2(struct wpi_softc *sc, struct wpi_buf *buf) memcpy(cmd->data, buf->data, buf->size); /* Save and trim IEEE802.11 header. */ - memcpy((uint8_t *)(cmd->data + buf->size), wh, hdrlen); - m_adj(buf->m, hdrlen); + memcpy((uint8_t *)(cmd->data + buf->size), wh, hdrspace); + m_adj(buf->m, hdrspace); error = bus_dmamap_load_mbuf_sg(ring->data_dmat, data->map, buf->m, segs, &nsegs, BUS_DMA_NOWAIT); @@ -2304,10 +2296,10 @@ wpi_cmd2(struct wpi_softc *sc, struct wpi_buf *buf) __func__, ring->qid, ring->cur, totlen, nsegs); /* Fill TX descriptor. */ - desc->nsegs = WPI_PAD32(totlen + pad) << 4 | (1 + nsegs); + desc->nsegs = WPI_PAD32(totlen) << 4 | (1 + nsegs); /* First DMA segment is used by the TX command. */ desc->segs[0].addr = htole32(data->cmd_paddr); - desc->segs[0].len = htole32(4 + buf->size + hdrlen + pad); + desc->segs[0].len = htole32(4 + buf->size + hdrspace); /* Other DMA segments are for data payload. */ seg = &segs[0]; for (i = 1; i <= nsegs; i++) { @@ -2353,9 +2345,10 @@ wpi_tx_data(struct wpi_softc *sc, struct mbuf *m, struct ieee80211_node *ni) uint32_t flags; uint16_t qos; uint8_t tid, type; - int ac, error, rate, ismcast, totlen; + int ac, error, rate, ismcast, hdrlen, totlen; wh = mtod(m, struct ieee80211_frame *); + hdrlen = ieee80211_anyhdrsize(wh); type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK; ismcast = IEEE80211_IS_MULTICAST(wh->i_addr1); @@ -2399,12 +2392,12 @@ wpi_tx_data(struct wpi_softc *sc, struct mbuf *m, struct ieee80211_node *ni) /* 802.11 header may have moved. */ wh = mtod(m, struct ieee80211_frame *); } - totlen = m->m_pkthdr.len; + totlen = m->m_pkthdr.len - (hdrlen & 3); if (ieee80211_radiotap_active_vap(vap)) { struct wpi_tx_radiotap_header *tap = &sc->sc_txtap; - tap->wt_flags = 0; + tap->wt_flags = IEEE80211_RADIOTAP_F_DATAPAD; tap->wt_rate = rate; if (k != NULL) tap->wt_flags |= IEEE80211_RADIOTAP_F_WEP; @@ -2420,6 +2413,9 @@ wpi_tx_data(struct wpi_softc *sc, struct mbuf *m, struct ieee80211_node *ni) flags |= WPI_TX_NEED_ACK; } + if (wh->i_fc[1] & IEEE80211_FC1_MORE_FRAG) + flags |= WPI_TX_MORE_FRAG; /* Cannot happen yet. */ + /* Check if frame must be protected using RTS/CTS or CTS-to-self. */ if (!ismcast) { /* NB: Group frames are sent using CCK in 802.11b/g. */ @@ -2518,11 +2514,12 @@ wpi_tx_data_raw(struct wpi_softc *sc, struct mbuf *m, struct ieee80211_node *ni, struct wpi_buf tx_data; uint32_t flags; uint8_t type; - int ac, rate, totlen; + int ac, rate, hdrlen, totlen; wh = mtod(m, struct ieee80211_frame *); + hdrlen = ieee80211_anyhdrsize(wh); type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK; - totlen = m->m_pkthdr.len; + totlen = m->m_pkthdr.len - (hdrlen & 3); ac = params->ibp_pri & 3; @@ -2544,6 +2541,8 @@ wpi_tx_data_raw(struct wpi_softc *sc, struct mbuf *m, struct ieee80211_node *ni, tap->wt_flags = 0; tap->wt_rate = rate; + if (params->ibp_flags & IEEE80211_BPF_DATAPAD) + tap->wt_flags |= IEEE80211_RADIOTAP_F_DATAPAD; ieee80211_radiotap_tx(vap, m); } @@ -3434,31 +3433,28 @@ wpi_config(struct wpi_softc *sc) if (IEEE80211_IS_CHAN_2GHZ(ic->ic_curchan)) sc->rxon.flags |= htole32(WPI_RXON_AUTO | WPI_RXON_24GHZ); + sc->rxon.filter = WPI_FILTER_MULTICAST; switch (ic->ic_opmode) { case IEEE80211_M_STA: sc->rxon.mode = WPI_MODE_STA; - sc->rxon.filter = htole32(WPI_FILTER_MULTICAST); break; case IEEE80211_M_IBSS: sc->rxon.mode = WPI_MODE_IBSS; - sc->rxon.filter = htole32(WPI_FILTER_BEACON | - WPI_FILTER_MULTICAST); + sc->rxon.filter |= WPI_FILTER_BEACON; break; /* XXX workaround for passive channels selection */ case IEEE80211_M_AHDEMO: - sc->rxon.filter = htole32(WPI_FILTER_MULTICAST); - /* FALLTHROUGH */ case IEEE80211_M_HOSTAP: sc->rxon.mode = WPI_MODE_HOSTAP; break; case IEEE80211_M_MONITOR: sc->rxon.mode = WPI_MODE_MONITOR; - sc->rxon.filter = htole32(WPI_FILTER_MULTICAST); break; default: device_printf(sc->sc_dev, "unknown opmode %d\n", ic->ic_opmode); return EINVAL; } + sc->rxon.filter = htole32(sc->rxon.filter); wpi_set_promisc(sc); sc->rxon.cck_mask = 0x0f; /* not yet negotiated */ sc->rxon.ofdm_mask = 0xff; /* not yet negotiated */ @@ -3677,7 +3673,7 @@ wpi_scan(struct wpi_softc *sc, struct ieee80211_channel *c) } else hdr->crc_threshold = WPI_SCAN_CRC_TH_NEVER; - if (!(c->ic_flags & IEEE80211_CHAN_PASSIVE)) + if (!IEEE80211_IS_CHAN_PASSIVE(c)) chan->flags |= WPI_CHAN_ACTIVE; /* @@ -3702,7 +3698,7 @@ wpi_scan(struct wpi_softc *sc, struct ieee80211_channel *c) chan->rf_gain = 0x28; DPRINTF(sc, WPI_DEBUG_SCAN, "Scanning %u Passive: %d\n", - chan->chan, (c->ic_flags & IEEE80211_CHAN_PASSIVE) ? 1 : 0); + chan->chan, IEEE80211_IS_CHAN_PASSIVE(c)); hdr->nchan++; chan++; @@ -3838,11 +3834,13 @@ wpi_update_beacon(struct ieee80211vap *vap, int item) struct wpi_softc *sc = ifp->if_softc; int error; + WPI_LOCK(sc); if ((error = wpi_setup_beacon(sc, ni)) != 0) { device_printf(sc->sc_dev, "%s: could not update beacon frame, error %d", __func__, error); } + WPI_UNLOCK(sc); } static int @@ -3939,6 +3937,8 @@ wpi_run(struct wpi_softc *sc, struct ieee80211vap *vap) /* Enable power-saving mode if requested by user. */ if (vap->iv_flags & IEEE80211_F_PMGTON) (void)wpi_set_pslevel(sc, 0, 3, 1); + else + (void)wpi_set_pslevel(sc, 0, 0, 1); DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__); @@ -4384,6 +4384,8 @@ wpi_apm_init(struct wpi_softc *sc) DELAY(20); /* Disable L1-Active. */ wpi_prph_setbits(sc, WPI_APMG_PCI_STT, WPI_APMG_PCI_STT_L1A_DIS); + /* ??? */ + wpi_prph_clrbits(sc, WPI_APMG_PS, 0x00000E00); wpi_nic_unlock(sc); return 0; diff --git a/sys/dev/wpi/if_wpireg.h b/sys/dev/wpi/if_wpireg.h index cfd8a09..24dcb2d 100644 --- a/sys/dev/wpi/if_wpireg.h +++ b/sys/dev/wpi/if_wpireg.h @@ -470,6 +470,7 @@ struct wpi_cmd_data { #define WPI_TX_FULL_TXOP (1 << 7) #define WPI_TX_BT_DISABLE (1 << 12) /* bluetooth coexistence */ #define WPI_TX_AUTO_SEQ (1 << 13) +#define WPI_TX_MORE_FRAG (1 << 14) #define WPI_TX_INSERT_TSTAMP (1 << 16) uint8_t plcp; @@ -964,11 +965,6 @@ static const char * const wpi_fw_errmsg[] = { "FATAL_ERROR" }; -/* XXX description for some error codes (error data). */ -/* 0x00000074 - wrong totlen field */ -/* 0x000003B3 - powersave error */ -/* 0x00000447 - wrong channel selected */ - #define WPI_READ(sc, reg) \ bus_space_read_4((sc)->sc_st, (sc)->sc_sh, (reg)) diff --git a/sys/dev/xen/xenstore/xenstore_dev.c b/sys/dev/xen/xenstore/xenstore_dev.c index 54b5e82..80396fd 100644 --- a/sys/dev/xen/xenstore/xenstore_dev.c +++ b/sys/dev/xen/xenstore/xenstore_dev.c @@ -77,7 +77,11 @@ static int xs_dev_read(struct cdev *dev, struct uio *uio, int ioflag) { int error; - struct xs_dev_data *u = dev->si_drv1; + struct xs_dev_data *u; + + error = devfs_get_cdevpriv((void **)&u); + if (error != 0) + return (error); while (u->read_prod == u->read_cons) { error = tsleep(u, PCATCH, "xsdread", hz/10); @@ -115,11 +119,15 @@ static int xs_dev_write(struct cdev *dev, struct uio *uio, int ioflag) { int error; - struct xs_dev_data *u = dev->si_drv1; + struct xs_dev_data *u; struct xs_dev_transaction *trans; void *reply; int len = uio->uio_resid; + error = devfs_get_cdevpriv((void **)&u); + if (error != 0) + return (error); + if ((len + u->len) > sizeof(u->u.buffer)) return (EINVAL); @@ -177,25 +185,10 @@ xs_dev_write(struct cdev *dev, struct uio *uio, int ioflag) return (error); } -static int -xs_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) -{ - struct xs_dev_data *u; - -#if 0 /* XXX figure out if equiv needed */ - nonseekable_open(inode, filp); -#endif - u = malloc(sizeof(*u), M_XENSTORE, M_WAITOK|M_ZERO); - LIST_INIT(&u->transactions); - dev->si_drv1 = u; - - return (0); -} - -static int -xs_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td) +static void +xs_dev_dtor(void *arg) { - struct xs_dev_data *u = dev->si_drv1; + struct xs_dev_data *u = arg; struct xs_dev_transaction *trans, *tmp; LIST_FOREACH_SAFE(trans, &u->transactions, list, tmp) { @@ -205,7 +198,21 @@ xs_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td) } free(u, M_XENSTORE); - return (0); +} + +static int +xs_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) +{ + struct xs_dev_data *u; + int error; + + u = malloc(sizeof(*u), M_XENSTORE, M_WAITOK|M_ZERO); + LIST_INIT(&u->transactions); + error = devfs_set_cdevpriv(u, xs_dev_dtor); + if (error != 0) + free(u, M_XENSTORE); + + return (error); } static struct cdevsw xs_dev_cdevsw = { @@ -213,7 +220,6 @@ static struct cdevsw xs_dev_cdevsw = { .d_read = xs_dev_read, .d_write = xs_dev_write, .d_open = xs_dev_open, - .d_close = xs_dev_close, .d_name = "xs_dev", }; @@ -262,8 +268,8 @@ xs_dev_attach(device_t dev) { struct cdev *xs_cdev; - xs_cdev = make_dev(&xs_dev_cdevsw, 0, UID_ROOT, GID_WHEEL, 0400, - "xen/xenstore"); + xs_cdev = make_dev_credf(MAKEDEV_ETERNAL, &xs_dev_cdevsw, 0, NULL, + UID_ROOT, GID_WHEEL, 0400, "xen/xenstore"); if (xs_cdev == NULL) return (EINVAL); diff --git a/sys/fs/ext2fs/ext2_htree.c b/sys/fs/ext2fs/ext2_htree.c index 70a2f47..c847aa4 100644 --- a/sys/fs/ext2fs/ext2_htree.c +++ b/sys/fs/ext2fs/ext2_htree.c @@ -861,7 +861,7 @@ ext2_htree_add_entry(struct vnode *dvp, struct ext2fs_direct_2 *entry, ext2_htree_split_dirblock((char *)bp->b_data, newdirblock, blksize, fs->e3fs_hash_seed, hash_version, &split_hash, entry); cursize = roundup(ip->i_size, blksize); - dirsize = roundup(ip->i_size, blksize) + blksize; + dirsize = cursize + blksize; blknum = dirsize / blksize - 1; /* Add index entry for the new directory block */ diff --git a/sys/fs/ext2fs/ext2_vfsops.c b/sys/fs/ext2fs/ext2_vfsops.c index 6879e42..b82a41e 100644 --- a/sys/fs/ext2fs/ext2_vfsops.c +++ b/sys/fs/ext2fs/ext2_vfsops.c @@ -355,7 +355,7 @@ compute_sb_data(struct vnode *devvp, struct ext2fs *es, } fs->e2fs_ipb = fs->e2fs_bsize / EXT2_INODE_SIZE(fs); - fs->e2fs_itpg = fs->e2fs_ipg /fs->e2fs_ipb; + fs->e2fs_itpg = fs->e2fs_ipg / fs->e2fs_ipb; /* s_resuid / s_resgid ? */ fs->e2fs_gcount = (es->e2fs_bcount - es->e2fs_first_dblock + EXT2_BLOCKS_PER_GROUP(fs) - 1) / EXT2_BLOCKS_PER_GROUP(fs); @@ -365,7 +365,7 @@ compute_sb_data(struct vnode *devvp, struct ext2fs *es, fs->e2fs_gd = malloc(db_count * fs->e2fs_bsize, M_EXT2MNT, M_WAITOK); fs->e2fs_contigdirs = malloc(fs->e2fs_gcount * - sizeof(*fs->e2fs_contigdirs), M_EXT2MNT, M_WAITOK); + sizeof(*fs->e2fs_contigdirs), M_EXT2MNT, M_WAITOK | M_ZERO); /* * Adjust logic_sb_block. @@ -379,6 +379,7 @@ compute_sb_data(struct vnode *devvp, struct ext2fs *es, fsbtodb(fs, logic_sb_block + i + 1 ), fs->e2fs_bsize, NOCRED, &bp); if (error) { + free(fs->e2fs_contigdirs, M_EXT2MNT); free(fs->e2fs_gd, M_EXT2MNT); brelse(bp); return (error); @@ -390,11 +391,11 @@ compute_sb_data(struct vnode *devvp, struct ext2fs *es, brelse(bp); bp = NULL; } + /* Initialization for the ext2 Orlov allocator variant. */ fs->e2fs_total_dir = 0; - for (i=0; i < fs->e2fs_gcount; i++){ + for (i = 0; i < fs->e2fs_gcount; i++) fs->e2fs_total_dir += fs->e2fs_gd[i].ext2bgd_ndirs; - fs->e2fs_contigdirs[i] = 0; - } + if (es->e2fs_rev == E2FS_REV0 || !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_LARGEFILE)) fs->e2fs_maxfilesize = 0x7fffffff; diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c index 32b9540..a80de54 100644 --- a/sys/i386/i386/mp_machdep.c +++ b/sys/i386/i386/mp_machdep.c @@ -1555,6 +1555,7 @@ cpususpend_handler(void) cpu_ops.cpu_resume(); /* Resume MCA and local APIC */ + lapic_xapic_mode(); mca_resume(); lapic_setup(0); diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index 53abf92..4df182a 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -1401,7 +1401,8 @@ each_writable_segment(td, func, closure) object = backing_object; } ignore_entry = object->type != OBJT_DEFAULT && - object->type != OBJT_SWAP && object->type != OBJT_VNODE; + object->type != OBJT_SWAP && object->type != OBJT_VNODE && + object->type != OBJT_PHYS; VM_OBJECT_RUNLOCK(object); if (ignore_entry) continue; diff --git a/sys/kern/kern_procctl.c b/sys/kern/kern_procctl.c index d58302a..d65ba5a 100644 --- a/sys/kern/kern_procctl.c +++ b/sys/kern/kern_procctl.c @@ -160,7 +160,7 @@ static int reap_status(struct thread *td, struct proc *p, struct procctl_reaper_status *rs) { - struct proc *reap, *p2; + struct proc *reap, *p2, *first_p; sx_assert(&proctree_lock, SX_LOCKED); bzero(rs, sizeof(*rs)); @@ -176,8 +176,10 @@ reap_status(struct thread *td, struct proc *p, rs->rs_descendants = 0; rs->rs_children = 0; if (!LIST_EMPTY(&reap->p_reaplist)) { - KASSERT(!LIST_EMPTY(&reap->p_children), ("no children")); - rs->rs_pid = LIST_FIRST(&reap->p_children)->p_pid; + first_p = LIST_FIRST(&reap->p_children); + if (first_p == NULL) + first_p = LIST_FIRST(&reap->p_reaplist); + rs->rs_pid = first_p->p_pid; LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling) { if (proc_realparent(p2) == reap) rs->rs_children++; @@ -239,13 +241,13 @@ reap_kill(struct thread *td, struct proc *p, struct procctl_reaper_kill *rk) int error, error1; sx_assert(&proctree_lock, SX_LOCKED); - PROC_UNLOCK(p); if (IN_CAPABILITY_MODE(td)) return (ECAPMODE); if (rk->rk_sig <= 0 || rk->rk_sig > _SIG_MAXSIG) return (EINVAL); if ((rk->rk_flags & ~REAPER_KILL_CHILDREN) != 0) return (EINVAL); + PROC_UNLOCK(p); reap = (p->p_treeflag & P_TREE_REAPER) == 0 ? p->p_reaper : p; ksiginfo_init(&ksi); ksi.ksi_signo = rk->rk_sig; diff --git a/sys/kern/subr_taskqueue.c b/sys/kern/subr_taskqueue.c index 321e4ae..086c4a9 100644 --- a/sys/kern/subr_taskqueue.c +++ b/sys/kern/subr_taskqueue.c @@ -571,8 +571,9 @@ taskqueue_swi_giant_run(void *dummy) static int _taskqueue_start_threads(struct taskqueue **tqp, int count, int pri, - cpuset_t *mask, const char *ktname) + cpuset_t *mask, const char *name, va_list ap) { + char ktname[MAXCOMLEN + 1]; struct thread *td; struct taskqueue *tq; int i, error; @@ -580,6 +581,7 @@ _taskqueue_start_threads(struct taskqueue **tqp, int count, int pri, if (count <= 0) return (EINVAL); + vsnprintf(ktname, sizeof(ktname), name, ap); tq = *tqp; tq->tq_threads = malloc(sizeof(struct thread *) * count, M_TASKQUEUE, @@ -635,27 +637,35 @@ int taskqueue_start_threads(struct taskqueue **tqp, int count, int pri, const char *name, ...) { - char ktname[MAXCOMLEN + 1]; va_list ap; + int error; va_start(ap, name); - vsnprintf(ktname, sizeof(ktname), name, ap); + error = _taskqueue_start_threads(tqp, count, pri, NULL, name, ap); va_end(ap); - - return (_taskqueue_start_threads(tqp, count, pri, NULL, ktname)); + return (error); } int -taskqueue_start_threads_pinned(struct taskqueue **tqp, int count, int pri, - int cpu_id, const char *name, ...) +taskqueue_start_threads_cpuset(struct taskqueue **tqp, int count, int pri, + cpuset_t *mask, const char *name, ...) { - char ktname[MAXCOMLEN + 1]; va_list ap; - cpuset_t mask; + int error; va_start(ap, name); - vsnprintf(ktname, sizeof(ktname), name, ap); + error = _taskqueue_start_threads(tqp, count, pri, mask, name, ap); va_end(ap); + return (error); +} + +int +taskqueue_start_threads_pinned(struct taskqueue **tqp, int count, int pri, + int cpu_id, const char *name, ...) +{ + cpuset_t mask; + va_list ap; + int error; /* * In case someone passes in NOCPU, just fall back to the @@ -666,8 +676,11 @@ taskqueue_start_threads_pinned(struct taskqueue **tqp, int count, int pri, CPU_SET(cpu_id, &mask); } - return (_taskqueue_start_threads(tqp, count, pri, - cpu_id == NOCPU ? NULL : &mask, ktname)); + va_start(ap, name); + error = _taskqueue_start_threads(tqp, count, pri, + cpu_id == NOCPU ? NULL : &mask, name, ap); + va_end(ap); + return (error); } static inline void diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c index 7a5f624..bab8f50 100644 --- a/sys/kern/uipc_mbuf.c +++ b/sys/kern/uipc_mbuf.c @@ -120,6 +120,18 @@ CTASSERT(sizeof(struct struct_m_ext) == 28); #endif /* + * Assert that the queue(3) macros produce code of the same size as an old + * plain pointer does. + */ +#ifdef INVARIANTS +static struct mbuf m_assertbuf; +CTASSERT(sizeof(m_assertbuf.m_slist) == sizeof(m_assertbuf.m_next)); +CTASSERT(sizeof(m_assertbuf.m_stailq) == sizeof(m_assertbuf.m_next)); +CTASSERT(sizeof(m_assertbuf.m_slistpkt) == sizeof(m_assertbuf.m_nextpkt)); +CTASSERT(sizeof(m_assertbuf.m_stailqpkt) == sizeof(m_assertbuf.m_nextpkt)); +#endif + +/* * m_get2() allocates minimum mbuf that would fit "size" argument. */ struct mbuf * diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 189a30f..b897e05 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -3439,11 +3439,9 @@ soisdisconnecting(struct socket *so) SOCKBUF_LOCK(&so->so_rcv); so->so_state &= ~SS_ISCONNECTING; so->so_state |= SS_ISDISCONNECTING; - so->so_rcv.sb_state |= SBS_CANTRCVMORE; - sorwakeup_locked(so); + socantrcvmore_locked(so); SOCKBUF_LOCK(&so->so_snd); - so->so_snd.sb_state |= SBS_CANTSENDMORE; - sowwakeup_locked(so); + socantsendmore_locked(so); wakeup(&so->so_timeo); } @@ -3458,12 +3456,10 @@ soisdisconnected(struct socket *so) SOCKBUF_LOCK(&so->so_rcv); so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); so->so_state |= SS_ISDISCONNECTED; - so->so_rcv.sb_state |= SBS_CANTRCVMORE; - sorwakeup_locked(so); + socantrcvmore_locked(so); SOCKBUF_LOCK(&so->so_snd); - so->so_snd.sb_state |= SBS_CANTSENDMORE; sbdrop_locked(&so->so_snd, sbused(&so->so_snd)); - sowwakeup_locked(so); + socantsendmore_locked(so); wakeup(&so->so_timeo); } diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 912863e..cfa8f45 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -122,6 +122,10 @@ static unsigned long numvnodes; SYSCTL_ULONG(_vfs, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "Number of vnodes in existence"); +static u_long vnodes_created; +SYSCTL_ULONG(_vfs, OID_AUTO, vnodes_created, CTLFLAG_RD, &vnodes_created, + 0, "Number of vnodes created by getnewvnode"); + /* * Conversion tables for conversion from vnode types to inode formats * and back. @@ -156,6 +160,10 @@ static int vlru_allow_cache_src; SYSCTL_INT(_vfs, OID_AUTO, vlru_allow_cache_src, CTLFLAG_RW, &vlru_allow_cache_src, 0, "Allow vlru to reclaim source vnode"); +static u_long recycles_count; +SYSCTL_ULONG(_vfs, OID_AUTO, recycles, CTLFLAG_RD, &recycles_count, 0, + "Number of vnodes recycled to avoid exceding kern.maxvnodes"); + /* * Various variables used for debugging the new implementation of * reassignbuf(). @@ -788,6 +796,7 @@ vlrureclaim(struct mount *mp) } KASSERT((vp->v_iflag & VI_DOOMED) == 0, ("VI_DOOMED unexpectedly detected in vlrureclaim()")); + atomic_add_long(&recycles_count, 1); vgonel(vp); VOP_UNLOCK(vp, 0); vdropl(vp); @@ -988,8 +997,10 @@ vtryrecycle(struct vnode *vp) __func__, vp); return (EBUSY); } - if ((vp->v_iflag & VI_DOOMED) == 0) + if ((vp->v_iflag & VI_DOOMED) == 0) { + atomic_add_long(&recycles_count, 1); vgonel(vp); + } VOP_UNLOCK(vp, LK_INTERLOCK); vn_finished_write(vnmp); return (0); @@ -1093,6 +1104,7 @@ getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops, atomic_add_long(&numvnodes, 1); mtx_unlock(&vnode_free_list_mtx); alloc: + atomic_add_long(&vnodes_created, 1); vp = (struct vnode *) uma_zalloc(vnode_zone, M_WAITOK|M_ZERO); /* * Setup locks. @@ -3191,6 +3203,7 @@ DB_SHOW_COMMAND(mount, db_show_mount) db_printf(" mnt_maxsymlinklen = %d\n", mp->mnt_maxsymlinklen); db_printf(" mnt_iosize_max = %d\n", mp->mnt_iosize_max); db_printf(" mnt_hashseed = %u\n", mp->mnt_hashseed); + db_printf(" mnt_lockref = %d\n", mp->mnt_lockref); db_printf(" mnt_secondary_writes = %d\n", mp->mnt_secondary_writes); db_printf(" mnt_secondary_accwrites = %d\n", mp->mnt_secondary_accwrites); diff --git a/sys/mips/conf/AR71XX_BASE b/sys/mips/conf/AR71XX_BASE index c94dd32..357f965 100644 --- a/sys/mips/conf/AR71XX_BASE +++ b/sys/mips/conf/AR71XX_BASE @@ -60,7 +60,6 @@ device ar71xx_pci options IEEE80211_DEBUG options IEEE80211_ALQ options IEEE80211_SUPPORT_MESH -# This option is currently broken for if_ath_tx. options IEEE80211_SUPPORT_TDMA options IEEE80211_AMPDU_AGE device wlan # 802.11 support diff --git a/sys/modules/cxgbe/if_cxgbe/Makefile b/sys/modules/cxgbe/if_cxgbe/Makefile index 32347f4..a66e45a 100644 --- a/sys/modules/cxgbe/if_cxgbe/Makefile +++ b/sys/modules/cxgbe/if_cxgbe/Makefile @@ -26,4 +26,3 @@ SRCS+= t4_tracer.c CFLAGS+= -I${CXGBE} .include <bsd.kmod.mk> -CFLAGS+= ${GCC_MS_EXTENSIONS} diff --git a/sys/modules/drm2/radeonkms/Makefile b/sys/modules/drm2/radeonkms/Makefile index 076b825..1e674a7 100644 --- a/sys/modules/drm2/radeonkms/Makefile +++ b/sys/modules/drm2/radeonkms/Makefile @@ -106,6 +106,6 @@ SRCS += \ iicbus_if.h \ pci_if.h -CFLAGS += -I${.CURDIR}/../../../dev/drm2/radeon ${GCC_MS_EXTENSIONS} +CFLAGS += -I${.CURDIR}/../../../dev/drm2/radeon .include <bsd.kmod.mk> diff --git a/sys/modules/ibcore/Makefile b/sys/modules/ibcore/Makefile index fea77b9..60ec8dd 100644 --- a/sys/modules/ibcore/Makefile +++ b/sys/modules/ibcore/Makefile @@ -4,8 +4,8 @@ KMOD= ibcore SRCS= addr.c iwcm.c sa_query.c ucma.c uverbs_cmd.c \ - agent.c local_sa.c multicast.c smi.c ud_header.c uverbs_main.c \ - mad.c notice.c umem.c uverbs_marshall.c \ + agent.c multicast.c smi.c ud_header.c uverbs_main.c \ + mad.c peer_mem.c umem.c uverbs_marshall.c \ cache.c device.c packer.c sysfs.c user_mad.c verbs.c \ cm.c fmr_pool.c mad_rmpp.c ucm.c cma.c \ vnode_if.h device_if.h bus_if.h pci_if.h \ @@ -18,6 +18,6 @@ CFLAGS+= -DINET6 -DINET .include <bsd.kmod.mk> -CFLAGS+= -Wno-cast-qual -Wno-pointer-arith ${GCC_MS_EXTENSIONS} +CFLAGS+= -Wno-cast-qual -Wno-pointer-arith CWARNFLAGS.cm.c= -Wno-unused-function diff --git a/sys/modules/ipoib/Makefile b/sys/modules/ipoib/Makefile index d634e37..ba8c6d3 100644 --- a/sys/modules/ipoib/Makefile +++ b/sys/modules/ipoib/Makefile @@ -15,4 +15,4 @@ CFLAGS+= -DINET6 -DINET .include <bsd.kmod.mk> -CFLAGS+= -Wno-cast-qual -Wno-pointer-arith ${GCC_MS_EXTENSIONS} +CFLAGS+= -Wno-cast-qual -Wno-pointer-arith diff --git a/sys/modules/mlx4/Makefile b/sys/modules/mlx4/Makefile index d989ff0..cd2b177 100644 --- a/sys/modules/mlx4/Makefile +++ b/sys/modules/mlx4/Makefile @@ -15,7 +15,6 @@ CFLAGS+= -I${.CURDIR}/../../ofed/include/ .include <bsd.kmod.mk> -CFLAGS+= -Wno-cast-qual -Wno-pointer-arith ${GCC_MS_EXTENSIONS} -CFLAGS+= -fms-extensions +CFLAGS+= -Wno-cast-qual -Wno-pointer-arith CWARNFLAGS.mcg.c= -Wno-unused diff --git a/sys/modules/mlx4ib/Makefile b/sys/modules/mlx4ib/Makefile index 57592bc..7bfcb4a 100644 --- a/sys/modules/mlx4ib/Makefile +++ b/sys/modules/mlx4ib/Makefile @@ -6,14 +6,14 @@ KMOD= mlx4ib SRCS= device_if.h bus_if.h vnode_if.h pci_if.h \ opt_inet.h opt_inet6.h \ alias_GUID.c mcg.c sysfs.c ah.c cq.c \ + mlx4_exp.c \ doorbell.c mad.c main.c mr.c qp.c srq.c wc.c cm.c CFLAGS+= -I${.CURDIR}/../../ofed/drivers/infiniband/hw/mlx4 CFLAGS+= -I${.CURDIR}/../../ofed/include/ CFLAGS+= -DCONFIG_INFINIBAND_USER_MEM CFLAGS+= -DINET6 -DINET -CFLAGS+= -fms-extensions .include <bsd.kmod.mk> -CFLAGS+= -Wno-cast-qual -Wno-pointer-arith ${GCC_MS_EXTENSIONS} +CFLAGS+= -Wno-cast-qual -Wno-pointer-arith diff --git a/sys/modules/mlxen/Makefile b/sys/modules/mlxen/Makefile index 258cf7e..02f7776 100644 --- a/sys/modules/mlxen/Makefile +++ b/sys/modules/mlxen/Makefile @@ -8,8 +8,7 @@ SRCS += en_rx.c en_tx.c utils.c SRCS += opt_inet.h opt_inet6.h CFLAGS+= -I${.CURDIR}/../../ofed/drivers/net/mlx4 CFLAGS+= -I${.CURDIR}/../../ofed/include/ -CFLAGS+= -fms-extensions .include <bsd.kmod.mk> -CFLAGS+= -Wno-cast-qual -Wno-pointer-arith ${GCC_MS_EXTENSIONS} +CFLAGS+= -Wno-cast-qual -Wno-pointer-arith diff --git a/sys/modules/mthca/Makefile b/sys/modules/mthca/Makefile index 25daedb..31fcebd 100644 --- a/sys/modules/mthca/Makefile +++ b/sys/modules/mthca/Makefile @@ -14,4 +14,4 @@ CFLAGS+= -I${.CURDIR}/../../ofed/include .include <bsd.kmod.mk> -CFLAGS+= -Wno-cast-qual -Wno-pointer-arith ${GCC_MS_EXTENSIONS} +CFLAGS+= -Wno-cast-qual -Wno-pointer-arith diff --git a/sys/modules/usb/udl/Makefile b/sys/modules/usb/udl/Makefile new file mode 100644 index 0000000..5323883 --- /dev/null +++ b/sys/modules/usb/udl/Makefile @@ -0,0 +1,12 @@ +# $FreeBSD$ + +S= ${.CURDIR}/../../.. + +.PATH: $S/dev/usb/video + +KMOD= udl +SRCS= opt_bus.h opt_usb.h fb_if.h device_if.h \ + bus_if.h usb_if.h vnode_if.h usbdevs.h \ + udl.c + +.include <bsd.kmod.mk> diff --git a/sys/modules/videomode/Makefile b/sys/modules/videomode/Makefile new file mode 100644 index 0000000..9b09aae --- /dev/null +++ b/sys/modules/videomode/Makefile @@ -0,0 +1,40 @@ +# +# $FreeBSD$ +# +# Copyright (c) 2015 Hans Petter Selasky. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# + +S= ${.CURDIR}/../.. + +.PATH: $S/dev/videomode + +KMOD= videomode +SRCS= bus_if.h device_if.h vnode_if.h \ + opt_bus.h opt_ddb.h opt_videomode.h \ + videomode.c \ + edid.c \ + pickmode.c \ + vesagtf.c + +.include <bsd.kmod.mk> diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c index 019bbf5..753854d 100644 --- a/sys/net/if_bridge.c +++ b/sys/net/if_bridge.c @@ -228,7 +228,7 @@ struct bridge_softc { static VNET_DEFINE(struct mtx, bridge_list_mtx); #define V_bridge_list_mtx VNET(bridge_list_mtx) -eventhandler_tag bridge_detach_cookie = NULL; +static eventhandler_tag bridge_detach_cookie; int bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD; @@ -538,6 +538,7 @@ vnet_bridge_uninit(const void *unused __unused) { if_clone_detach(V_bridge_cloner); + V_bridge_cloner = NULL; BRIDGE_LIST_LOCK_DESTROY(); } VNET_SYSUNINIT(vnet_bridge_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, @@ -1797,7 +1798,13 @@ bridge_ifdetach(void *arg __unused, struct ifnet *ifp) if (ifp->if_flags & IFF_RENAMING) return; - + if (V_bridge_cloner == NULL) { + /* + * This detach handler can be called after + * vnet_bridge_uninit(). Just return in that case. + */ + return; + } /* Check if the interface is a bridge member */ if (sc != NULL) { BRIDGE_LOCK(sc); diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index 78a4e8a..d14f6bf 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -1567,13 +1567,18 @@ void pf_free_rule(struct pf_rule *); #ifdef INET int pf_test(int, struct ifnet *, struct mbuf **, struct inpcb *); +int pf_normalize_ip(struct mbuf **, int, struct pfi_kif *, u_short *, + struct pf_pdesc *); #endif /* INET */ #ifdef INET6 int pf_test6(int, struct ifnet *, struct mbuf **, struct inpcb *); +int pf_normalize_ip6(struct mbuf **, int, struct pfi_kif *, u_short *, + struct pf_pdesc *); void pf_poolmask(struct pf_addr *, struct pf_addr*, struct pf_addr *, struct pf_addr *, u_int8_t); void pf_addr_inc(struct pf_addr *, sa_family_t); +int pf_refragment6(struct ifnet *, struct mbuf **, struct m_tag *); #endif /* INET6 */ u_int32_t pf_new_isn(struct pf_state *); @@ -1589,10 +1594,6 @@ int pf_match_port(u_int8_t, u_int16_t, u_int16_t, u_int16_t); void pf_normalize_init(void); void pf_normalize_cleanup(void); -int pf_normalize_ip(struct mbuf **, int, struct pfi_kif *, u_short *, - struct pf_pdesc *); -int pf_normalize_ip6(struct mbuf **, int, struct pfi_kif *, u_short *, - struct pf_pdesc *); int pf_normalize_tcp(int, struct pfi_kif *, struct mbuf *, int, int, void *, struct pf_pdesc *); void pf_normalize_tcp_cleanup(struct pf_state *); @@ -1669,6 +1670,8 @@ int pfi_clear_flags(const char *, int); int pf_match_tag(struct mbuf *, struct pf_rule *, int *, int); int pf_tag_packet(struct mbuf *, struct pf_pdesc *, int); +int pf_addr_cmp(struct pf_addr *, struct pf_addr *, + sa_family_t); void pf_qid2qname(u_int32_t, char *); VNET_DECLARE(struct pf_kstatus, pf_status); diff --git a/sys/netinet6/frag6.c b/sys/netinet6/frag6.c index 92d0715..1e9e692 100644 --- a/sys/netinet6/frag6.c +++ b/sys/netinet6/frag6.c @@ -541,27 +541,16 @@ insert: *q6->ip6q_nxtp = (u_char)(nxt & 0xff); #endif - /* Delete frag6 header */ - if (m->m_len >= offset + sizeof(struct ip6_frag)) { - /* This is the only possible case with !PULLDOWN_TEST */ - ovbcopy((caddr_t)ip6, (caddr_t)ip6 + sizeof(struct ip6_frag), - offset); - m->m_data += sizeof(struct ip6_frag); - m->m_len -= sizeof(struct ip6_frag); - } else { - /* this comes with no copy if the boundary is on cluster */ - if ((t = m_split(m, offset, M_NOWAIT)) == NULL) { - frag6_remque(q6); - V_frag6_nfrags -= q6->ip6q_nfrag; + if (ip6_deletefraghdr(m, offset, M_NOWAIT) != 0) { + frag6_remque(q6); + V_frag6_nfrags -= q6->ip6q_nfrag; #ifdef MAC - mac_ip6q_destroy(q6); + mac_ip6q_destroy(q6); #endif - free(q6, M_FTABLE); - V_frag6_nfragpackets--; - goto dropfrag; - } - m_adj(t, sizeof(struct ip6_frag)); - m_cat(m, t); + free(q6, M_FTABLE); + V_frag6_nfragpackets--; + + goto dropfrag; } /* @@ -775,3 +764,27 @@ frag6_drain(void) IP6Q_UNLOCK(); VNET_LIST_RUNLOCK_NOSLEEP(); } + +int +ip6_deletefraghdr(struct mbuf *m, int offset, int wait) +{ + struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + struct mbuf *t; + + /* Delete frag6 header. */ + if (m->m_len >= offset + sizeof(struct ip6_frag)) { + /* This is the only possible case with !PULLDOWN_TEST. */ + bcopy(ip6, (char *)ip6 + sizeof(struct ip6_frag), + offset); + m->m_data += sizeof(struct ip6_frag); + m->m_len -= sizeof(struct ip6_frag); + } else { + /* This comes with no copy if the boundary is on cluster. */ + if ((t = m_split(m, offset, wait)) == NULL) + return (ENOMEM); + m_adj(t, sizeof(struct ip6_frag)); + m_cat(m, t); + } + + return (0); +} diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index a3474d3..a94d797 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -212,6 +212,65 @@ in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset) *(u_short *)(m->m_data + offset) = csum; } +int +ip6_fragment(struct ifnet *ifp, struct mbuf *m0, int hlen, u_char nextproto, + int mtu) +{ + struct mbuf *m, **mnext, *m_frgpart; + struct ip6_hdr *ip6, *mhip6; + struct ip6_frag *ip6f; + int off; + int error; + int tlen = m0->m_pkthdr.len; + uint32_t id = htonl(ip6_randomid()); + + m = m0; + ip6 = mtod(m, struct ip6_hdr *); + mnext = &m->m_nextpkt; + + for (off = hlen; off < tlen; off += mtu) { + m = m_gethdr(M_NOWAIT, MT_DATA); + if (!m) { + IP6STAT_INC(ip6s_odropped); + return (ENOBUFS); + } + m->m_flags = m0->m_flags & M_COPYFLAGS; + *mnext = m; + mnext = &m->m_nextpkt; + m->m_data += max_linkhdr; + mhip6 = mtod(m, struct ip6_hdr *); + *mhip6 = *ip6; + m->m_len = sizeof(*mhip6); + error = ip6_insertfraghdr(m0, m, hlen, &ip6f); + if (error) { + IP6STAT_INC(ip6s_odropped); + return (error); + } + ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7)); + if (off + mtu >= tlen) + mtu = tlen - off; + else + ip6f->ip6f_offlg |= IP6F_MORE_FRAG; + mhip6->ip6_plen = htons((u_short)(mtu + hlen + + sizeof(*ip6f) - sizeof(struct ip6_hdr))); + if ((m_frgpart = m_copy(m0, off, mtu)) == 0) { + IP6STAT_INC(ip6s_odropped); + return (ENOBUFS); + } + m_cat(m, m_frgpart); + m->m_pkthdr.len = mtu + hlen + sizeof(*ip6f); + m->m_pkthdr.fibnum = m0->m_pkthdr.fibnum; + m->m_pkthdr.rcvif = NULL; + ip6f->ip6f_reserved = 0; + ip6f->ip6f_ident = id; + ip6f->ip6f_nxt = nextproto; + IP6STAT_INC(ip6s_ofragments); + in6_ifstat_inc(ifp, ifs6_out_fragcreat); + } + + return (0); +} + /* * IP6 output. The packet in mbuf chain m contains a skeletal IP6 * header (with pri, len, nxt, hlim, src, dst). @@ -236,11 +295,11 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, int flags, struct ip6_moptions *im6o, struct ifnet **ifpp, struct inpcb *inp) { - struct ip6_hdr *ip6, *mhip6; + struct ip6_hdr *ip6; struct ifnet *ifp, *origifp; struct mbuf *m = m0; struct mbuf *mprev = NULL; - int hlen, tlen, len, off; + int hlen, tlen, len; struct route_in6 ip6route; struct rtentry *rt = NULL; struct sockaddr_in6 *dst, src_sa, dst_sa; @@ -901,9 +960,6 @@ passout: in6_ifstat_inc(ifp, ifs6_out_fragfail); goto bad; } else { - struct mbuf **mnext, *m_frgpart; - struct ip6_frag *ip6f; - u_int32_t id = htonl(ip6_randomid()); u_char nextproto; /* @@ -937,8 +993,6 @@ passout: m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6; } #endif - mnext = &m->m_nextpkt; - /* * Change the next header field of the last header in the * unfragmentable part. @@ -963,47 +1017,8 @@ passout: * chain. */ m0 = m; - for (off = hlen; off < tlen; off += len) { - m = m_gethdr(M_NOWAIT, MT_DATA); - if (!m) { - error = ENOBUFS; - IP6STAT_INC(ip6s_odropped); - goto sendorfree; - } - m->m_flags = m0->m_flags & M_COPYFLAGS; - *mnext = m; - mnext = &m->m_nextpkt; - m->m_data += max_linkhdr; - mhip6 = mtod(m, struct ip6_hdr *); - *mhip6 = *ip6; - m->m_len = sizeof(*mhip6); - error = ip6_insertfraghdr(m0, m, hlen, &ip6f); - if (error) { - IP6STAT_INC(ip6s_odropped); - goto sendorfree; - } - ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7)); - if (off + len >= tlen) - len = tlen - off; - else - ip6f->ip6f_offlg |= IP6F_MORE_FRAG; - mhip6->ip6_plen = htons((u_short)(len + hlen + - sizeof(*ip6f) - sizeof(struct ip6_hdr))); - if ((m_frgpart = m_copy(m0, off, len)) == 0) { - error = ENOBUFS; - IP6STAT_INC(ip6s_odropped); - goto sendorfree; - } - m_cat(m, m_frgpart); - m->m_pkthdr.len = len + hlen + sizeof(*ip6f); - m->m_pkthdr.fibnum = m0->m_pkthdr.fibnum; - m->m_pkthdr.rcvif = NULL; - ip6f->ip6f_reserved = 0; - ip6f->ip6f_ident = id; - ip6f->ip6f_nxt = nextproto; - IP6STAT_INC(ip6s_ofragments); - in6_ifstat_inc(ifp, ifs6_out_fragcreat); - } + if ((error = ip6_fragment(ifp, m, hlen, nextproto, len))) + goto sendorfree; in6_ifstat_inc(ifp, ifs6_out_fragok); } diff --git a/sys/netinet6/ip6_var.h b/sys/netinet6/ip6_var.h index 155a607..54c5c66 100644 --- a/sys/netinet6/ip6_var.h +++ b/sys/netinet6/ip6_var.h @@ -388,6 +388,8 @@ int ip6_setpktopts(struct mbuf *, struct ip6_pktopts *, void ip6_clearpktopts(struct ip6_pktopts *, int); struct ip6_pktopts *ip6_copypktopts(struct ip6_pktopts *, int); int ip6_optlen(struct inpcb *); +int ip6_deletefraghdr(struct mbuf *, int, int); +int ip6_fragment(struct ifnet *, struct mbuf *, int, u_char, int); int route6_input(struct mbuf **, int *, int); diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index 15667a6..b32288b 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -362,6 +362,45 @@ VNET_DEFINE(void *, pf_swi_cookie); VNET_DEFINE(uint32_t, pf_hashseed); #define V_pf_hashseed VNET(pf_hashseed) +int +pf_addr_cmp(struct pf_addr *a, struct pf_addr *b, sa_family_t af) +{ + + switch (af) { +#ifdef INET + case AF_INET: + if (a->addr32[0] > b->addr32[0]) + return (1); + if (a->addr32[0] < b->addr32[0]) + return (-1); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + if (a->addr32[3] > b->addr32[3]) + return (1); + if (a->addr32[3] < b->addr32[3]) + return (-1); + if (a->addr32[2] > b->addr32[2]) + return (1); + if (a->addr32[2] < b->addr32[2]) + return (-1); + if (a->addr32[1] > b->addr32[1]) + return (1); + if (a->addr32[1] < b->addr32[1]) + return (-1); + if (a->addr32[0] > b->addr32[0]) + return (1); + if (a->addr32[0] < b->addr32[0]) + return (-1); + break; +#endif /* INET6 */ + default: + panic("%s: unknown address family %u", __func__, af); + } + return (0); +} + static __inline uint32_t pf_hashkey(struct pf_state_key *sk) { @@ -5460,7 +5499,7 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, goto bad; if (oifp != ifp) { - if (pf_test6(PF_OUT, ifp, &m0, NULL) != PF_PASS) + if (pf_test6(PF_FWD, ifp, &m0, NULL) != PF_PASS) goto bad; else if (m0 == NULL) goto done; @@ -6018,15 +6057,20 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) struct pfi_kif *kif; u_short action, reason = 0, log = 0; struct mbuf *m = *m0, *n = NULL; + struct m_tag *mtag; struct ip6_hdr *h = NULL; struct pf_rule *a = NULL, *r = &V_pf_default_rule, *tr, *nr; struct pf_state *s = NULL; struct pf_ruleset *ruleset = NULL; struct pf_pdesc pd; int off, terminal = 0, dirndx, rh_cnt = 0; + int fwdir = dir; M_ASSERTPKTHDR(m); + if (ifp != m->m_pkthdr.rcvif) + fwdir = PF_FWD; + if (!V_pf_status.running) return (PF_PASS); @@ -6388,6 +6432,11 @@ done: if (s) PF_STATE_UNLOCK(s); + /* If reassembled packet passed, create new fragments. */ + if (action == PF_PASS && *m0 && fwdir == PF_FWD && + (mtag = m_tag_find(m, PF_REASSEMBLED, NULL)) != NULL) + action = pf_refragment6(ifp, m0, mtag); + return (action); } #endif /* INET6 */ diff --git a/sys/netpfil/pf/pf.h b/sys/netpfil/pf/pf.h index 96f638e..ac0e0fb 100644 --- a/sys/netpfil/pf/pf.h +++ b/sys/netpfil/pf/pf.h @@ -43,7 +43,7 @@ #endif #endif -enum { PF_INOUT, PF_IN, PF_OUT }; +enum { PF_INOUT, PF_IN, PF_OUT, PF_FWD }; enum { PF_PASS, PF_DROP, PF_SCRUB, PF_NOSCRUB, PF_NAT, PF_NONAT, PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP, PF_DEFER }; enum { PF_RULESET_SCRUB, PF_RULESET_FILTER, PF_RULESET_NAT, diff --git a/sys/netpfil/pf/pf_mtag.h b/sys/netpfil/pf/pf_mtag.h index baff00a..3aacb2e 100644 --- a/sys/netpfil/pf/pf_mtag.h +++ b/sys/netpfil/pf/pf_mtag.h @@ -39,6 +39,7 @@ #define PF_TAG_TRANSLATE_LOCALHOST 0x04 #define PF_PACKET_LOOPED 0x08 #define PF_FASTFWD_OURS_PRESENT 0x10 +#define PF_REASSEMBLED 0x20 struct pf_mtag { void *hdr; /* saved hdr pos in mbuf, for ECN */ diff --git a/sys/netpfil/pf/pf_norm.c b/sys/netpfil/pf/pf_norm.c index fb30331..aed2a55 100644 --- a/sys/netpfil/pf/pf_norm.c +++ b/sys/netpfil/pf/pf_norm.c @@ -1,5 +1,6 @@ /*- * Copyright 2001 Niels Provos <provos@citi.umich.edu> + * Copyright 2011 Alexander Bluhm <bluhm@openbsd.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -48,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include <netinet/in.h> #include <netinet/ip.h> #include <netinet/ip_var.h> +#include <netinet6/ip6_var.h> #include <netinet/tcp.h> #include <netinet/tcp_fsm.h> #include <netinet/tcp_seq.h> @@ -57,38 +59,51 @@ __FBSDID("$FreeBSD$"); #endif /* INET6 */ struct pf_frent { - LIST_ENTRY(pf_frent) fr_next; - union { - struct { - struct ip *_fr_ip; - struct mbuf *_fr_m; - } _frag; - struct { - uint16_t _fr_off; - uint16_t _fr_end; - } _cache; - } _u; + TAILQ_ENTRY(pf_frent) fr_next; + struct mbuf *fe_m; + uint16_t fe_hdrlen; /* ipv4 header lenght with ip options + ipv6, extension, fragment header */ + uint16_t fe_extoff; /* last extension header offset or 0 */ + uint16_t fe_len; /* fragment length */ + uint16_t fe_off; /* fragment offset */ + uint16_t fe_mff; /* more fragment flag */ +}; + +struct pf_fragment_cmp { + struct pf_addr frc_src; + struct pf_addr frc_dst; + uint32_t frc_id; + sa_family_t frc_af; + uint8_t frc_proto; + uint8_t frc_direction; }; -#define fr_ip _u._frag._fr_ip -#define fr_m _u._frag._fr_m -#define fr_off _u._cache._fr_off -#define fr_end _u._cache._fr_end struct pf_fragment { + struct pf_fragment_cmp fr_key; +#define fr_src fr_key.frc_src +#define fr_dst fr_key.frc_dst +#define fr_id fr_key.frc_id +#define fr_af fr_key.frc_af +#define fr_proto fr_key.frc_proto +#define fr_direction fr_key.frc_direction + RB_ENTRY(pf_fragment) fr_entry; TAILQ_ENTRY(pf_fragment) frag_next; - struct in_addr fr_src; - struct in_addr fr_dst; - u_int8_t fr_p; /* protocol of this fragment */ - u_int8_t fr_flags; /* status flags */ -#define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */ -#define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */ -#define PFFRAG_DROP 0x0004 /* Drop all fragments */ + uint8_t fr_flags; /* status flags */ +#define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */ +#define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */ +#define PFFRAG_DROP 0x0004 /* Drop all fragments */ #define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER)) - u_int16_t fr_id; /* fragment id for reassemble */ - u_int16_t fr_max; /* fragment data max */ - u_int32_t fr_timeout; - LIST_HEAD(, pf_frent) fr_queue; + uint16_t fr_max; /* fragment data max */ + uint32_t fr_timeout; + uint16_t fr_maxlen; /* maximum length of single fragment */ + TAILQ_HEAD(pf_fragq, pf_frent) fr_queue; +}; + +struct pf_fragment_tag { + uint16_t ft_hdrlen; /* header length of reassembled pkt */ + uint16_t ft_extoff; /* last extension header offset or 0 */ + uint16_t ft_maxlen; /* maximum fragment payload length */ }; static struct mtx pf_frag_mtx; @@ -119,25 +134,32 @@ static int pf_frag_compare(struct pf_fragment *, static RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); static RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); -/* Private prototypes */ -static void pf_free_fragment(struct pf_fragment *); -static void pf_remove_fragment(struct pf_fragment *); -static int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *, - struct tcphdr *, int, sa_family_t); +static void pf_free_fragment(struct pf_fragment *); +static void pf_remove_fragment(struct pf_fragment *); +static int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *, + struct tcphdr *, int, sa_family_t); +struct pf_frent *pf_create_fragment(u_short *); +static struct pf_fragment * + pf_find_fragment(struct pf_fragment_cmp *key, + struct pf_frag_tree *tree); +static struct pf_fragment * + pf_fillup_fragment(struct pf_fragment_cmp *, struct pf_frent *, u_short *); +static int pf_isfull_fragment(struct pf_fragment *); +struct mbuf *pf_join_fragment(struct pf_fragment *); #ifdef INET -static void pf_ip2key(struct pf_fragment *, struct ip *); -static void pf_scrub_ip(struct mbuf **, u_int32_t, u_int8_t, - u_int8_t); -static void pf_flush_fragments(void); -static struct pf_fragment *pf_find_fragment(struct ip *, struct pf_frag_tree *); -static struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **, - struct pf_frent *, int); -static struct mbuf *pf_fragcache(struct mbuf **, struct ip*, - struct pf_fragment **, int, int, int *); -#endif /* INET */ +static void pf_scrub_ip(struct mbuf **, uint32_t, uint8_t, uint8_t); +static void pf_flush_fragments(void); +static int pf_reassemble(struct mbuf **, struct ip *, int, u_short *); +static struct mbuf * + pf_fragcache(struct mbuf **, struct ip*, struct pf_fragment **, + int, int, int *); +#endif /* INET */ #ifdef INET6 -static void pf_scrub_ip6(struct mbuf **, u_int8_t); -#endif +static int pf_reassemble6(struct mbuf **, struct ip6_hdr *, + struct ip6_frag *, uint16_t, uint16_t, int, u_short *); +static void pf_scrub_ip6(struct mbuf **, uint8_t); +#endif /* INET6 */ + #define DPFPRINTF(x) do { \ if (V_pf_status.debug >= PF_DEBUG_MISC) { \ printf("%s: ", __func__); \ @@ -145,6 +167,20 @@ static void pf_scrub_ip6(struct mbuf **, u_int8_t); } \ } while(0) +#ifdef INET +static void +pf_ip2key(struct ip *ip, int dir, struct pf_fragment_cmp *key) +{ + + key->frc_src.v4 = ip->ip_src; + key->frc_dst.v4 = ip->ip_dst; + key->frc_af = AF_INET; + key->frc_proto = ip->ip_p; + key->frc_id = ip->ip_id; + key->frc_direction = dir; +} +#endif /* INET */ + void pf_normalize_init(void) { @@ -184,18 +220,16 @@ pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) { int diff; - if ((diff = a->fr_id - b->fr_id)) + if ((diff = a->fr_id - b->fr_id) != 0) return (diff); - else if ((diff = a->fr_p - b->fr_p)) + if ((diff = a->fr_proto - b->fr_proto) != 0) + return (diff); + if ((diff = a->fr_af - b->fr_af) != 0) + return (diff); + if ((diff = pf_addr_cmp(&a->fr_src, &b->fr_src, a->fr_af)) != 0) + return (diff); + if ((diff = pf_addr_cmp(&a->fr_dst, &b->fr_dst, a->fr_af)) != 0) return (diff); - else if (a->fr_src.s_addr < b->fr_src.s_addr) - return (-1); - else if (a->fr_src.s_addr > b->fr_src.s_addr) - return (1); - else if (a->fr_dst.s_addr < b->fr_dst.s_addr) - return (-1); - else if (a->fr_dst.s_addr > b->fr_dst.s_addr) - return (1); return (0); } @@ -270,23 +304,23 @@ pf_free_fragment(struct pf_fragment *frag) /* Free all fragments */ if (BUFFER_FRAGMENTS(frag)) { - for (frent = LIST_FIRST(&frag->fr_queue); frent; - frent = LIST_FIRST(&frag->fr_queue)) { - LIST_REMOVE(frent, fr_next); + for (frent = TAILQ_FIRST(&frag->fr_queue); frent; + frent = TAILQ_FIRST(&frag->fr_queue)) { + TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); - m_freem(frent->fr_m); + m_freem(frent->fe_m); uma_zfree(V_pf_frent_z, frent); } } else { - for (frent = LIST_FIRST(&frag->fr_queue); frent; - frent = LIST_FIRST(&frag->fr_queue)) { - LIST_REMOVE(frent, fr_next); + for (frent = TAILQ_FIRST(&frag->fr_queue); frent; + frent = TAILQ_FIRST(&frag->fr_queue)) { + TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); - KASSERT((LIST_EMPTY(&frag->fr_queue) || - LIST_FIRST(&frag->fr_queue)->fr_off > - frent->fr_end), - ("! (LIST_EMPTY() || LIST_FIRST()->fr_off >" - " frent->fr_end): %s", __func__)); + KASSERT((TAILQ_EMPTY(&frag->fr_queue) || + TAILQ_FIRST(&frag->fr_queue)->fe_off > + frent->fe_len), + ("! (TAILQ_EMPTY() || TAILQ_FIRST()->fe_off >" + " frent->fe_len): %s", __func__)); uma_zfree(V_pf_frent_z, frent); } @@ -296,26 +330,14 @@ pf_free_fragment(struct pf_fragment *frag) } #ifdef INET -static void -pf_ip2key(struct pf_fragment *key, struct ip *ip) -{ - key->fr_p = ip->ip_p; - key->fr_id = ip->ip_id; - key->fr_src.s_addr = ip->ip_src.s_addr; - key->fr_dst.s_addr = ip->ip_dst.s_addr; -} - static struct pf_fragment * -pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree) +pf_find_fragment(struct pf_fragment_cmp *key, struct pf_frag_tree *tree) { - struct pf_fragment key; struct pf_fragment *frag; PF_FRAG_ASSERT(); - pf_ip2key(&key, ip); - - frag = RB_FIND(pf_frag_tree, tree, &key); + frag = RB_FIND(pf_frag_tree, tree, (struct pf_fragment *)key); if (frag != NULL) { /* XXX Are we sure we want to update the timeout? */ frag->fr_timeout = time_uptime; @@ -351,210 +373,421 @@ pf_remove_fragment(struct pf_fragment *frag) } } -#ifdef INET -#define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3) -static struct mbuf * -pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, - struct pf_frent *frent, int mff) +struct pf_frent * +pf_create_fragment(u_short *reason) { - struct mbuf *m = *m0, *m2; - struct pf_frent *frea, *next; - struct pf_frent *frep = NULL; - struct ip *ip = frent->fr_ip; - int hlen = ip->ip_hl << 2; - u_int16_t off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; - u_int16_t ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4; - u_int16_t max = ip_len + off; + struct pf_frent *frent; PF_FRAG_ASSERT(); - KASSERT((*frag == NULL || BUFFER_FRAGMENTS(*frag)), - ("! (*frag == NULL || BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__)); - /* Strip off ip header */ - m->m_data += hlen; - m->m_len -= hlen; + frent = uma_zalloc(V_pf_frent_z, M_NOWAIT); + if (frent == NULL) { + pf_flush_fragments(); + frent = uma_zalloc(V_pf_frent_z, M_NOWAIT); + if (frent == NULL) { + REASON_SET(reason, PFRES_MEMORY); + return (NULL); + } + } - /* Create a new reassembly queue for this packet */ - if (*frag == NULL) { - *frag = uma_zalloc(V_pf_frag_z, M_NOWAIT); - if (*frag == NULL) { + return (frent); +} + +struct pf_fragment * +pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent, + u_short *reason) +{ + struct pf_frent *after, *next, *prev; + struct pf_fragment *frag; + uint16_t total; + + PF_FRAG_ASSERT(); + + /* No empty fragments. */ + if (frent->fe_len == 0) { + DPFPRINTF(("bad fragment: len 0")); + goto bad_fragment; + } + + /* All fragments are 8 byte aligned. */ + if (frent->fe_mff && (frent->fe_len & 0x7)) { + DPFPRINTF(("bad fragment: mff and len %d", frent->fe_len)); + goto bad_fragment; + } + + /* Respect maximum length, IP_MAXPACKET == IPV6_MAXPACKET. */ + if (frent->fe_off + frent->fe_len > IP_MAXPACKET) { + DPFPRINTF(("bad fragment: max packet %d", + frent->fe_off + frent->fe_len)); + goto bad_fragment; + } + + DPFPRINTF((key->frc_af == AF_INET ? + "reass frag %d @ %d-%d" : "reass frag %#08x @ %d-%d", + key->frc_id, frent->fe_off, frent->fe_off + frent->fe_len)); + + /* Fully buffer all of the fragments in this fragment queue. */ + frag = pf_find_fragment(key, &V_pf_frag_tree); + + /* Create a new reassembly queue for this packet. */ + if (frag == NULL) { + frag = uma_zalloc(V_pf_frag_z, M_NOWAIT); + if (frag == NULL) { pf_flush_fragments(); - *frag = uma_zalloc(V_pf_frag_z, M_NOWAIT); - if (*frag == NULL) + frag = uma_zalloc(V_pf_frag_z, M_NOWAIT); + if (frag == NULL) { + REASON_SET(reason, PFRES_MEMORY); goto drop_fragment; + } } - (*frag)->fr_flags = 0; - (*frag)->fr_max = 0; - (*frag)->fr_src = frent->fr_ip->ip_src; - (*frag)->fr_dst = frent->fr_ip->ip_dst; - (*frag)->fr_p = frent->fr_ip->ip_p; - (*frag)->fr_id = frent->fr_ip->ip_id; - (*frag)->fr_timeout = time_uptime; - LIST_INIT(&(*frag)->fr_queue); + *(struct pf_fragment_cmp *)frag = *key; + frag->fr_timeout = time_second; + frag->fr_maxlen = frent->fe_len; + TAILQ_INIT(&frag->fr_queue); + + RB_INSERT(pf_frag_tree, &V_pf_frag_tree, frag); + TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next); - RB_INSERT(pf_frag_tree, &V_pf_frag_tree, *frag); - TAILQ_INSERT_HEAD(&V_pf_fragqueue, *frag, frag_next); + /* We do not have a previous fragment. */ + TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next); - /* We do not have a previous fragment */ - frep = NULL; - goto insert; + return (frag); } - /* - * Find a fragment after the current one: - * - off contains the real shifted offset. - */ - LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) { - if (FR_IP_OFF(frea) > off) + KASSERT(!TAILQ_EMPTY(&frag->fr_queue), ("!TAILQ_EMPTY()->fr_queue")); + + /* Remember maximum fragment len for refragmentation. */ + if (frent->fe_len > frag->fr_maxlen) + frag->fr_maxlen = frent->fe_len; + + /* Maximum data we have seen already. */ + total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + + TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; + + /* Non terminal fragments must have more fragments flag. */ + if (frent->fe_off + frent->fe_len < total && !frent->fe_mff) + goto bad_fragment; + + /* Check if we saw the last fragment already. */ + if (!TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) { + if (frent->fe_off + frent->fe_len > total || + (frent->fe_off + frent->fe_len == total && frent->fe_mff)) + goto bad_fragment; + } else { + if (frent->fe_off + frent->fe_len == total && !frent->fe_mff) + goto bad_fragment; + } + + /* Find a fragment after the current one. */ + prev = NULL; + TAILQ_FOREACH(after, &frag->fr_queue, fr_next) { + if (after->fe_off > frent->fe_off) break; - frep = frea; + prev = after; } - KASSERT((frep != NULL || frea != NULL), - ("!(frep != NULL || frea != NULL): %s", __FUNCTION__));; + KASSERT(prev != NULL || after != NULL, + ("prev != NULL || after != NULL")); - if (frep != NULL && - FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * - 4 > off) - { - u_int16_t precut; + if (prev != NULL && prev->fe_off + prev->fe_len > frent->fe_off) { + uint16_t precut; - precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - - frep->fr_ip->ip_hl * 4 - off; - if (precut >= ip_len) - goto drop_fragment; - m_adj(frent->fr_m, precut); - DPFPRINTF(("overlap -%d\n", precut)); - /* Enforce 8 byte boundaries */ - ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3)); - off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; - ip_len -= precut; - ip->ip_len = htons(ip_len); - } - - for (; frea != NULL && ip_len + off > FR_IP_OFF(frea); - frea = next) - { - u_int16_t aftercut; - - aftercut = ip_len + off - FR_IP_OFF(frea); - DPFPRINTF(("adjust overlap %d\n", aftercut)); - if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl - * 4) - { - frea->fr_ip->ip_len = - htons(ntohs(frea->fr_ip->ip_len) - aftercut); - frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) + - (aftercut >> 3)); - m_adj(frea->fr_m, aftercut); + precut = prev->fe_off + prev->fe_len - frent->fe_off; + if (precut >= frent->fe_len) + goto bad_fragment; + DPFPRINTF(("overlap -%d", precut)); + m_adj(frent->fe_m, precut); + frent->fe_off += precut; + frent->fe_len -= precut; + } + + for (; after != NULL && frent->fe_off + frent->fe_len > after->fe_off; + after = next) { + uint16_t aftercut; + + aftercut = frent->fe_off + frent->fe_len - after->fe_off; + DPFPRINTF(("adjust overlap %d", aftercut)); + if (aftercut < after->fe_len) { + m_adj(after->fe_m, aftercut); + after->fe_off += aftercut; + after->fe_len -= aftercut; break; } - /* This fragment is completely overlapped, lose it */ - next = LIST_NEXT(frea, fr_next); - m_freem(frea->fr_m); - LIST_REMOVE(frea, fr_next); - uma_zfree(V_pf_frent_z, frea); + /* This fragment is completely overlapped, lose it. */ + next = TAILQ_NEXT(after, fr_next); + m_freem(after->fe_m); + TAILQ_REMOVE(&frag->fr_queue, after, fr_next); + uma_zfree(V_pf_frent_z, after); } - insert: - /* Update maximum data size */ - if ((*frag)->fr_max < max) - (*frag)->fr_max = max; - /* This is the last segment */ - if (!mff) - (*frag)->fr_flags |= PFFRAG_SEENLAST; - - if (frep == NULL) - LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next); + if (prev == NULL) + TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next); else - LIST_INSERT_AFTER(frep, frent, fr_next); + TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next); + + return (frag); + +bad_fragment: + REASON_SET(reason, PFRES_FRAG); +drop_fragment: + uma_zfree(V_pf_frent_z, frent); + return (NULL); +} + +static int +pf_isfull_fragment(struct pf_fragment *frag) +{ + struct pf_frent *frent, *next; + uint16_t off, total; /* Check if we are completely reassembled */ - if (!((*frag)->fr_flags & PFFRAG_SEENLAST)) - return (NULL); + if (TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) + return (0); + + /* Maximum data we have seen already */ + total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + + TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; /* Check if we have all the data */ off = 0; - for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) { - next = LIST_NEXT(frep, fr_next); - - off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4; - if (off < (*frag)->fr_max && - (next == NULL || FR_IP_OFF(next) != off)) - { - DPFPRINTF(("missing fragment at %d, next %d, max %d\n", - off, next == NULL ? -1 : FR_IP_OFF(next), - (*frag)->fr_max)); - return (NULL); + for (frent = TAILQ_FIRST(&frag->fr_queue); frent; frent = next) { + next = TAILQ_NEXT(frent, fr_next); + + off += frent->fe_len; + if (off < total && (next == NULL || next->fe_off != off)) { + DPFPRINTF(("missing fragment at %d, next %d, total %d", + off, next == NULL ? -1 : next->fe_off, total)); + return (0); } } - DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max)); - if (off < (*frag)->fr_max) - return (NULL); + DPFPRINTF(("%d < %d?", off, total)); + if (off < total) + return (0); + KASSERT(off == total, ("off == total")); - /* We have all the data */ - frent = LIST_FIRST(&(*frag)->fr_queue); - KASSERT((frent != NULL), ("frent == NULL: %s", __FUNCTION__)); - if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) { - DPFPRINTF(("drop: too big: %d\n", off)); - pf_free_fragment(*frag); - *frag = NULL; - return (NULL); - } - next = LIST_NEXT(frent, fr_next); + return (1); +} + +struct mbuf * +pf_join_fragment(struct pf_fragment *frag) +{ + struct mbuf *m, *m2; + struct pf_frent *frent, *next; + + frent = TAILQ_FIRST(&frag->fr_queue); + next = TAILQ_NEXT(frent, fr_next); - /* Magic from ip_input */ - ip = frent->fr_ip; - m = frent->fr_m; + /* Magic from ip_input. */ + m = frent->fe_m; m2 = m->m_next; m->m_next = NULL; m_cat(m, m2); uma_zfree(V_pf_frent_z, frent); for (frent = next; frent != NULL; frent = next) { - next = LIST_NEXT(frent, fr_next); + next = TAILQ_NEXT(frent, fr_next); - m2 = frent->fr_m; + m2 = frent->fe_m; + /* Strip off ip header. */ + m_adj(m2, frent->fe_hdrlen); uma_zfree(V_pf_frent_z, frent); - m->m_pkthdr.csum_flags &= m2->m_pkthdr.csum_flags; - m->m_pkthdr.csum_data += m2->m_pkthdr.csum_data; m_cat(m, m2); } - while (m->m_pkthdr.csum_data & 0xffff0000) - m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) + - (m->m_pkthdr.csum_data >> 16); - ip->ip_src = (*frag)->fr_src; - ip->ip_dst = (*frag)->fr_dst; + /* Remove from fragment queue. */ + pf_remove_fragment(frag); - /* Remove from fragment queue */ - pf_remove_fragment(*frag); - *frag = NULL; + return (m); +} + +#ifdef INET +static int +pf_reassemble(struct mbuf **m0, struct ip *ip, int dir, u_short *reason) +{ + struct mbuf *m = *m0; + struct pf_frent *frent; + struct pf_fragment *frag; + struct pf_fragment_cmp key; + uint16_t total, hdrlen; + + /* Get an entry for the fragment queue */ + if ((frent = pf_create_fragment(reason)) == NULL) + return (PF_DROP); + + frent->fe_m = m; + frent->fe_hdrlen = ip->ip_hl << 2; + frent->fe_extoff = 0; + frent->fe_len = ntohs(ip->ip_len) - (ip->ip_hl << 2); + frent->fe_off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; + frent->fe_mff = ntohs(ip->ip_off) & IP_MF; + + pf_ip2key(ip, dir, &key); + + if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) + return (PF_DROP); - hlen = ip->ip_hl << 2; - ip->ip_len = htons(off + hlen); - m->m_len += hlen; - m->m_data -= hlen; + /* The mbuf is part of the fragment entry, no direct free or access */ + m = *m0 = NULL; + + if (!pf_isfull_fragment(frag)) + return (PF_PASS); /* drop because *m0 is NULL, no error */ + + /* We have all the data */ + frent = TAILQ_FIRST(&frag->fr_queue); + KASSERT(frent != NULL, ("frent != NULL")); + total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + + TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; + hdrlen = frent->fe_hdrlen; + + m = *m0 = pf_join_fragment(frag); + frag = NULL; - /* some debugging cruft by sklower, below, will go away soon */ - /* XXX this should be done elsewhere */ if (m->m_flags & M_PKTHDR) { int plen = 0; - for (m2 = m; m2; m2 = m2->m_next) - plen += m2->m_len; + for (m = *m0; m; m = m->m_next) + plen += m->m_len; + m = *m0; m->m_pkthdr.len = plen; } + ip = mtod(m, struct ip *); + ip->ip_len = htons(hdrlen + total); + ip->ip_off &= ~(IP_MF|IP_OFFMASK); + + if (hdrlen + total > IP_MAXPACKET) { + DPFPRINTF(("drop: too big: %d", total)); + ip->ip_len = 0; + REASON_SET(reason, PFRES_SHORT); + /* PF_DROP requires a valid mbuf *m0 in pf_test() */ + return (PF_DROP); + } + DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len))); - return (m); + return (PF_PASS); +} - drop_fragment: - /* Oops - fail safe - drop packet */ - uma_zfree(V_pf_frent_z, frent); - m_freem(m); - return (NULL); +#ifdef INET6 +static int +pf_reassemble6(struct mbuf **m0, struct ip6_hdr *ip6, struct ip6_frag *fraghdr, + uint16_t hdrlen, uint16_t extoff, int dir, u_short *reason) +{ + struct mbuf *m = *m0; + struct pf_frent *frent; + struct pf_fragment *frag; + struct pf_fragment_cmp key; + struct m_tag *mtag; + struct pf_fragment_tag *ftag; + int off; + uint16_t total, maxlen; + uint8_t proto; + + PF_FRAG_LOCK(); + + /* Get an entry for the fragment queue. */ + if ((frent = pf_create_fragment(reason)) == NULL) { + PF_FRAG_UNLOCK(); + return (PF_DROP); + } + + frent->fe_m = m; + frent->fe_hdrlen = hdrlen; + frent->fe_extoff = extoff; + frent->fe_len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - hdrlen; + frent->fe_off = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK); + frent->fe_mff = fraghdr->ip6f_offlg & IP6F_MORE_FRAG; + + key.frc_src.v6 = ip6->ip6_src; + key.frc_dst.v6 = ip6->ip6_dst; + key.frc_af = AF_INET6; + /* Only the first fragment's protocol is relevant. */ + key.frc_proto = 0; + key.frc_id = fraghdr->ip6f_ident; + key.frc_direction = dir; + + if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) { + PF_FRAG_UNLOCK(); + return (PF_DROP); + } + + /* The mbuf is part of the fragment entry, no direct free or access. */ + m = *m0 = NULL; + + if (!pf_isfull_fragment(frag)) { + PF_FRAG_UNLOCK(); + return (PF_PASS); /* Drop because *m0 is NULL, no error. */ + } + + /* We have all the data. */ + extoff = frent->fe_extoff; + maxlen = frag->fr_maxlen; + frent = TAILQ_FIRST(&frag->fr_queue); + KASSERT(frent != NULL, ("frent != NULL")); + total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + + TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; + hdrlen = frent->fe_hdrlen - sizeof(struct ip6_frag); + + m = *m0 = pf_join_fragment(frag); + frag = NULL; + + PF_FRAG_UNLOCK(); + + /* Take protocol from first fragment header. */ + m = m_getptr(m, hdrlen + offsetof(struct ip6_frag, ip6f_nxt), &off); + KASSERT(m, ("%s: short mbuf chain", __func__)); + proto = *(mtod(m, caddr_t) + off); + m = *m0; + + /* Delete frag6 header */ + if (ip6_deletefraghdr(m, hdrlen, M_NOWAIT) != 0) + goto fail; + + if (m->m_flags & M_PKTHDR) { + int plen = 0; + for (m = *m0; m; m = m->m_next) + plen += m->m_len; + m = *m0; + m->m_pkthdr.len = plen; + } + + if ((mtag = m_tag_get(PF_REASSEMBLED, sizeof(struct pf_fragment_tag), + M_NOWAIT)) == NULL) + goto fail; + ftag = (struct pf_fragment_tag *)(mtag + 1); + ftag->ft_hdrlen = hdrlen; + ftag->ft_extoff = extoff; + ftag->ft_maxlen = maxlen; + m_tag_prepend(m, mtag); + + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_plen = htons(hdrlen - sizeof(struct ip6_hdr) + total); + if (extoff) { + /* Write protocol into next field of last extension header. */ + m = m_getptr(m, extoff + offsetof(struct ip6_ext, ip6e_nxt), + &off); + KASSERT(m, ("%s: short mbuf chain", __func__)); + *(mtod(m, char *) + off) = proto; + m = *m0; + } else + ip6->ip6_nxt = proto; + + if (hdrlen - sizeof(struct ip6_hdr) + total > IPV6_MAXPACKET) { + DPFPRINTF(("drop: too big: %d", total)); + ip6->ip6_plen = 0; + REASON_SET(reason, PFRES_SHORT); + /* PF_DROP requires a valid mbuf *m0 in pf_test6(). */ + return (PF_DROP); + } + + DPFPRINTF(("complete: %p(%d)", m, ntohs(ip6->ip6_plen))); + return (PF_PASS); + +fail: + REASON_SET(reason, PFRES_MEMORY); + /* PF_DROP requires a valid mbuf *m0 in pf_test6(), will free later. */ + return (PF_DROP); } +#endif /* INET6 */ static struct mbuf * pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, @@ -591,16 +824,15 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, (*frag)->fr_flags = PFFRAG_NOBUFFER; (*frag)->fr_max = 0; - (*frag)->fr_src = h->ip_src; - (*frag)->fr_dst = h->ip_dst; - (*frag)->fr_p = h->ip_p; + (*frag)->fr_src.v4 = h->ip_src; + (*frag)->fr_dst.v4 = h->ip_dst; (*frag)->fr_id = h->ip_id; (*frag)->fr_timeout = time_uptime; - cur->fr_off = off; - cur->fr_end = max; - LIST_INIT(&(*frag)->fr_queue); - LIST_INSERT_HEAD(&(*frag)->fr_queue, cur, fr_next); + cur->fe_off = off; + cur->fe_len = max; /* TODO: fe_len = max - off ? */ + TAILQ_INIT(&(*frag)->fr_queue); + TAILQ_INSERT_HEAD(&(*frag)->fr_queue, cur, fr_next); RB_INSERT(pf_frag_tree, &V_pf_cache_tree, *frag); TAILQ_INSERT_HEAD(&V_pf_cachequeue, *frag, frag_next); @@ -615,8 +847,8 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, * - off contains the real shifted offset. */ frp = NULL; - LIST_FOREACH(fra, &(*frag)->fr_queue, fr_next) { - if (fra->fr_off > off) + TAILQ_FOREACH(fra, &(*frag)->fr_queue, fr_next) { + if (fra->fe_off > off) break; frp = fra; } @@ -627,18 +859,18 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, if (frp != NULL) { int precut; - precut = frp->fr_end - off; + precut = frp->fe_len - off; if (precut >= ip_len) { /* Fragment is entirely a duplicate */ DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n", - h->ip_id, frp->fr_off, frp->fr_end, off, max)); + h->ip_id, frp->fe_off, frp->fe_len, off, max)); goto drop_fragment; } if (precut == 0) { /* They are adjacent. Fixup cache entry */ DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n", - h->ip_id, frp->fr_off, frp->fr_end, off, max)); - frp->fr_end = max; + h->ip_id, frp->fe_off, frp->fe_len, off, max)); + frp->fe_len = max; } else if (precut > 0) { /* The first part of this payload overlaps with a * fragment that has already been passed. @@ -648,13 +880,13 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, */ DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n", - h->ip_id, precut, frp->fr_off, frp->fr_end, off, + h->ip_id, precut, frp->fe_off, frp->fe_len, off, max)); off += precut; max -= precut; /* Update the previous frag to encompass this one */ - frp->fr_end = max; + frp->fe_len = max; if (!drop) { /* XXX Optimization opportunity @@ -702,16 +934,16 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, /* There is a gap between fragments */ DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n", - h->ip_id, -precut, frp->fr_off, frp->fr_end, off, + h->ip_id, -precut, frp->fe_off, frp->fe_len, off, max)); cur = uma_zalloc(V_pf_frent_z, M_NOWAIT); if (cur == NULL) goto no_mem; - cur->fr_off = off; - cur->fr_end = max; - LIST_INSERT_AFTER(frp, cur, fr_next); + cur->fe_off = off; + cur->fe_len = max; + TAILQ_INSERT_AFTER(&(*frag)->fr_queue, frp, cur, fr_next); } } @@ -719,19 +951,19 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, int aftercut; int merge = 0; - aftercut = max - fra->fr_off; + aftercut = max - fra->fe_off; if (aftercut == 0) { /* Adjacent fragments */ DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n", - h->ip_id, off, max, fra->fr_off, fra->fr_end)); - fra->fr_off = off; + h->ip_id, off, max, fra->fe_off, fra->fe_len)); + fra->fe_off = off; merge = 1; } else if (aftercut > 0) { /* Need to chop off the tail of this fragment */ DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n", - h->ip_id, aftercut, off, max, fra->fr_off, - fra->fr_end)); - fra->fr_off = off; + h->ip_id, aftercut, off, max, fra->fe_off, + fra->fe_len)); + fra->fe_off = off; max -= aftercut; merge = 1; @@ -756,42 +988,42 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, } else if (frp == NULL) { /* There is a gap between fragments */ DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n", - h->ip_id, -aftercut, off, max, fra->fr_off, - fra->fr_end)); + h->ip_id, -aftercut, off, max, fra->fe_off, + fra->fe_len)); cur = uma_zalloc(V_pf_frent_z, M_NOWAIT); if (cur == NULL) goto no_mem; - cur->fr_off = off; - cur->fr_end = max; - LIST_INSERT_BEFORE(fra, cur, fr_next); + cur->fe_off = off; + cur->fe_len = max; + TAILQ_INSERT_HEAD(&(*frag)->fr_queue, cur, fr_next); } /* Need to glue together two separate fragment descriptors */ if (merge) { - if (cur && fra->fr_off <= cur->fr_end) { + if (cur && fra->fe_off <= cur->fe_len) { /* Need to merge in a previous 'cur' */ DPFPRINTF(("fragcache[%d]: adjacent(merge " "%d-%d) %d-%d (%d-%d)\n", - h->ip_id, cur->fr_off, cur->fr_end, off, - max, fra->fr_off, fra->fr_end)); - fra->fr_off = cur->fr_off; - LIST_REMOVE(cur, fr_next); + h->ip_id, cur->fe_off, cur->fe_len, off, + max, fra->fe_off, fra->fe_len)); + fra->fe_off = cur->fe_off; + TAILQ_REMOVE(&(*frag)->fr_queue, cur, fr_next); uma_zfree(V_pf_frent_z, cur); cur = NULL; - } else if (frp && fra->fr_off <= frp->fr_end) { + } else if (frp && fra->fe_off <= frp->fe_len) { /* Need to merge in a modified 'frp' */ KASSERT((cur == NULL), ("cur != NULL: %s", __FUNCTION__)); DPFPRINTF(("fragcache[%d]: adjacent(merge " "%d-%d) %d-%d (%d-%d)\n", - h->ip_id, frp->fr_off, frp->fr_end, off, - max, fra->fr_off, fra->fr_end)); - fra->fr_off = frp->fr_off; - LIST_REMOVE(frp, fr_next); + h->ip_id, frp->fe_off, frp->fe_len, off, + max, fra->fe_off, fra->fe_len)); + fra->fe_off = frp->fe_off; + TAILQ_REMOVE(&(*frag)->fr_queue, frp, fr_next); uma_zfree(V_pf_frent_z, frp); frp = NULL; @@ -820,8 +1052,8 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, /* Check if we are completely reassembled */ if (((*frag)->fr_flags & PFFRAG_SEENLAST) && - LIST_FIRST(&(*frag)->fr_queue)->fr_off == 0 && - LIST_FIRST(&(*frag)->fr_queue)->fr_end == (*frag)->fr_max) { + TAILQ_FIRST(&(*frag)->fr_queue)->fe_off == 0 && + TAILQ_FIRST(&(*frag)->fr_queue)->fe_len == (*frag)->fr_max) { /* Remove from fragment queue */ DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id, (*frag)->fr_max)); @@ -859,14 +1091,85 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, return (NULL); } +#ifdef INET6 +int +pf_refragment6(struct ifnet *ifp, struct mbuf **m0, struct m_tag *mtag) +{ + struct mbuf *m = *m0, *t; + struct pf_fragment_tag *ftag = (struct pf_fragment_tag *)(mtag + 1); + struct pf_pdesc pd; + uint16_t hdrlen, extoff, maxlen; + uint8_t proto; + int error, action; + + hdrlen = ftag->ft_hdrlen; + extoff = ftag->ft_extoff; + maxlen = ftag->ft_maxlen; + m_tag_delete(m, mtag); + mtag = NULL; + ftag = NULL; + + if (extoff) { + int off; + + /* Use protocol from next field of last extension header */ + m = m_getptr(m, extoff + offsetof(struct ip6_ext, ip6e_nxt), + &off); + KASSERT((m != NULL), ("pf_refragment6: short mbuf chain")); + proto = *(mtod(m, caddr_t) + off); + *(mtod(m, char *) + off) = IPPROTO_FRAGMENT; + m = *m0; + } else { + struct ip6_hdr *hdr; + + hdr = mtod(m, struct ip6_hdr *); + proto = hdr->ip6_nxt; + hdr->ip6_nxt = IPPROTO_FRAGMENT; + } + + /* + * Maxlen may be less than 8 if there was only a single + * fragment. As it was fragmented before, add a fragment + * header also for a single fragment. If total or maxlen + * is less than 8, ip6_fragment() will return EMSGSIZE and + * we drop the packet. + */ + error = ip6_fragment(ifp, m, hdrlen, proto, maxlen); + m = (*m0)->m_nextpkt; + (*m0)->m_nextpkt = NULL; + if (error == 0) { + /* The first mbuf contains the unfragmented packet. */ + m_freem(*m0); + *m0 = NULL; + action = PF_PASS; + } else { + /* Drop expects an mbuf to free. */ + DPFPRINTF(("refragment error %d", error)); + action = PF_DROP; + } + for (t = m; m; m = t) { + t = m->m_nextpkt; + m->m_nextpkt = NULL; + memset(&pd, 0, sizeof(pd)); + pd.pf_mtag = pf_find_mtag(m); + if (error == 0) + ip6_forward(m, 0); + else + m_freem(m); + } + + return (action); +} +#endif /* INET6 */ + int pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, struct pf_pdesc *pd) { struct mbuf *m = *m0; struct pf_rule *r; - struct pf_frent *frent; struct pf_fragment *frag = NULL; + struct pf_fragment_cmp key; struct ip *h = mtod(m, struct ip *); int mff = (ntohs(h->ip_off) & IP_MF); int hlen = h->ip_hl << 2; @@ -875,6 +1178,7 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, int ip_len; int ip_off; int tag = -1; + int verdict; PF_RULES_RASSERT(); @@ -959,28 +1263,24 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, /* Fully buffer all of the fragments */ PF_FRAG_LOCK(); - frag = pf_find_fragment(h, &V_pf_frag_tree); + + pf_ip2key(h, dir, &key); + frag = pf_find_fragment(&key, &V_pf_frag_tree); /* Check if we saw the last fragment already */ if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && max > frag->fr_max) goto bad; - /* Get an entry for the fragment queue */ - frent = uma_zalloc(V_pf_frent_z, M_NOWAIT); - if (frent == NULL) { - PF_FRAG_UNLOCK(); - REASON_SET(reason, PFRES_MEMORY); - return (PF_DROP); - } - frent->fr_ip = h; - frent->fr_m = m; - /* Might return a completely reassembled mbuf, or NULL */ DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max)); - *m0 = m = pf_reassemble(m0, &frag, frent, mff); + verdict = pf_reassemble(m0, h, dir, reason); PF_FRAG_UNLOCK(); + if (verdict != PF_PASS) + return (PF_DROP); + + m = *m0; if (m == NULL) return (PF_DROP); @@ -1003,7 +1303,8 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, } PF_FRAG_LOCK(); - frag = pf_find_fragment(h, &V_pf_cache_tree); + pf_ip2key(h, dir, &key); + frag = pf_find_fragment(&key, &V_pf_cache_tree); /* Check if we saw the last fragment already */ if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && @@ -1096,13 +1397,13 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, struct mbuf *m = *m0; struct pf_rule *r; struct ip6_hdr *h = mtod(m, struct ip6_hdr *); + int extoff; int off; struct ip6_ext ext; struct ip6_opt opt; struct ip6_opt_jumbo jumbo; struct ip6_frag frag; u_int32_t jumbolen = 0, plen; - u_int16_t fragoff = 0; int optend; int ooff; u_int8_t proto; @@ -1146,6 +1447,7 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len) goto drop; + extoff = 0; off = sizeof(struct ip6_hdr); proto = h->ip6_nxt; terminal = 0; @@ -1160,6 +1462,7 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL, NULL, AF_INET6)) goto shortpkt; + extoff = off; if (proto == IPPROTO_AH) off += (ext.ip6e_len + 2) * 4; else @@ -1170,6 +1473,7 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL, NULL, AF_INET6)) goto shortpkt; + extoff = off; optend = off + (ext.ip6e_len + 1) * 8; ooff = off + sizeof(ext); do { @@ -1233,18 +1537,27 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, return (PF_PASS); fragment: - if (ntohs(h->ip6_plen) == 0 || jumbolen) - goto drop; + /* Jumbo payload packets cannot be fragmented. */ plen = ntohs(h->ip6_plen); + if (plen == 0 || jumbolen) + goto drop; + if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len) + goto shortpkt; if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6)) goto shortpkt; - fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK); - if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET) - goto badfrag; - /* do something about it */ - /* remember to set pd->flags |= PFDESC_IP_REAS */ + /* Offset now points to data portion. */ + off += sizeof(frag); + + /* Returns PF_DROP or *m0 is NULL or completely reassembled mbuf. */ + if (pf_reassemble6(m0, h, &frag, off, extoff, dir, reason) != PF_PASS) + return (PF_DROP); + m = *m0; + if (m == NULL) + return (PF_DROP); + + pd->flags |= PFDESC_IP_REAS; return (PF_PASS); shortpkt: @@ -1260,13 +1573,6 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd, 1); return (PF_DROP); - - badfrag: - REASON_SET(reason, PFRES_FRAG); - if (r != NULL && r->log) - PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd, - 1); - return (PF_DROP); } #endif /* INET6 */ diff --git a/sys/ofed/drivers/infiniband/core/Makefile b/sys/ofed/drivers/infiniband/core/Makefile deleted file mode 100644 index f646040..0000000 --- a/sys/ofed/drivers/infiniband/core/Makefile +++ /dev/null @@ -1,32 +0,0 @@ -infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := ib_addr.o rdma_cm.o -user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o - -obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \ - ib_cm.o iw_cm.o $(infiniband-y) -obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o -obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ - $(user_access-y) - -ib_core-y := packer.o ud_header.o verbs.o sysfs.o \ - device.o fmr_pool.o cache.o -ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o - -ib_mad-y := mad.o smi.o agent.o mad_rmpp.o - -ib_sa-y := sa_query.o multicast.o notice.o local_sa.o - -ib_cm-y := cm.o - -iw_cm-y := iwcm.o - -rdma_cm-y := cma.o - -rdma_ucm-y := ucma.o - -ib_addr-y := addr.o - -ib_umad-y := user_mad.o - -ib_ucm-y := ucm.o - -ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o diff --git a/sys/ofed/drivers/infiniband/core/addr.c b/sys/ofed/drivers/infiniband/core/addr.c index c3d5b4f..e85b554 100644 --- a/sys/ofed/drivers/infiniband/core/addr.c +++ b/sys/ofed/drivers/infiniband/core/addr.c @@ -69,6 +69,7 @@ static LIST_HEAD(req_list); static struct delayed_work work; static struct workqueue_struct *addr_wq; +static struct rdma_addr_client self; void rdma_addr_register_client(struct rdma_addr_client *client) { atomic_set(&client->refcount, 1); @@ -89,19 +90,6 @@ void rdma_addr_unregister_client(struct rdma_addr_client *client) } EXPORT_SYMBOL(rdma_addr_unregister_client); -#ifdef __linux__ -int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, - const unsigned char *dst_dev_addr) -{ - dev_addr->dev_type = dev->type; - memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); - memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); - if (dst_dev_addr) - memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); - dev_addr->bound_dev_if = dev->ifindex; - return 0; -} -#else int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev, const unsigned char *dst_dev_addr) { @@ -119,10 +107,10 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev, dev_addr->bound_dev_if = dev->if_index; return 0; } -#endif EXPORT_SYMBOL(rdma_copy_addr); -int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) +int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, + u16 *vlan_id) { struct net_device *dev; int ret = -EADDRNOTAVAIL; @@ -137,33 +125,21 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) } switch (addr->sa_family) { -#ifdef INET case AF_INET: - dev = ip_dev_find(NULL, + dev = ip_dev_find(&init_net, ((struct sockaddr_in *) addr)->sin_addr.s_addr); if (!dev) return ret; ret = rdma_copy_addr(dev_addr, dev, NULL); + if (vlan_id) + *vlan_id = rdma_vlan_dev_vlan_id(dev); dev_put(dev); break; -#endif #if defined(INET6) case AF_INET6: -#ifdef __linux__ - read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) { - if (ipv6_chk_addr(&init_net, - &((struct sockaddr_in6 *) addr)->sin6_addr, - dev, 1)) { - ret = rdma_copy_addr(dev_addr, dev, NULL); - break; - } - } - read_unlock(&dev_base_lock); -#else { struct sockaddr_in6 *sin6; struct ifaddr *ifa; @@ -179,12 +155,12 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) break; } ret = rdma_copy_addr(dev_addr, ifa->ifa_ifp, NULL); + if (vlan_id) + *vlan_id = rdma_vlan_dev_vlan_id(ifa->ifa_ifp); ifa_free(ifa); break; } #endif - break; -#endif } return ret; } @@ -218,127 +194,6 @@ static void queue_req(struct addr_req *req) mutex_unlock(&lock); } -#ifdef __linux__ -static int addr4_resolve(struct sockaddr_in *src_in, - struct sockaddr_in *dst_in, - struct rdma_dev_addr *addr) -{ - __be32 src_ip = src_in->sin_addr.s_addr; - __be32 dst_ip = dst_in->sin_addr.s_addr; - struct flowi fl; - struct rtable *rt; - struct neighbour *neigh; - int ret; - - memset(&fl, 0, sizeof fl); - fl.nl_u.ip4_u.daddr = dst_ip; - fl.nl_u.ip4_u.saddr = src_ip; - fl.oif = addr->bound_dev_if; - - ret = ip_route_output_key(&init_net, &rt, &fl); - if (ret) - goto out; - - src_in->sin_family = AF_INET; - src_in->sin_addr.s_addr = rt->rt_src; - - if (rt->idev->dev->flags & IFF_LOOPBACK) { - ret = rdma_translate_ip((struct sockaddr *) dst_in, addr); - if (!ret) - memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); - goto put; - } - - /* If the device does ARP internally, return 'done' */ - if (rt->idev->dev->flags & IFF_NOARP) { - rdma_copy_addr(addr, rt->idev->dev, NULL); - goto put; - } - - neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev); - if (!neigh || !(neigh->nud_state & NUD_VALID)) { - neigh_event_send(rt->u.dst.neighbour, NULL); - ret = -ENODATA; - if (neigh) - goto release; - goto put; - } - - ret = rdma_copy_addr(addr, neigh->dev, neigh->ha); -release: - neigh_release(neigh); -put: - ip_rt_put(rt); -out: - return ret; -} - -#if defined(INET6) -static int addr6_resolve(struct sockaddr_in6 *src_in, - struct sockaddr_in6 *dst_in, - struct rdma_dev_addr *addr) -{ - struct flowi fl; - struct neighbour *neigh; - struct dst_entry *dst; - int ret; - - memset(&fl, 0, sizeof fl); - ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr); - ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr); - fl.oif = addr->bound_dev_if; - - dst = ip6_route_output(&init_net, NULL, &fl); - if ((ret = dst->error)) - goto put; - - if (ipv6_addr_any(&fl.fl6_src)) { - ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev, - &fl.fl6_dst, 0, &fl.fl6_src); - if (ret) - goto put; - - src_in->sin6_family = AF_INET6; - ipv6_addr_copy(&src_in->sin6_addr, &fl.fl6_src); - } - - if (dst->dev->flags & IFF_LOOPBACK) { - ret = rdma_translate_ip((struct sockaddr *) dst_in, addr); - if (!ret) - memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); - goto put; - } - - /* If the device does ARP internally, return 'done' */ - if (dst->dev->flags & IFF_NOARP) { - ret = rdma_copy_addr(addr, dst->dev, NULL); - goto put; - } - - neigh = dst->neighbour; - if (!neigh || !(neigh->nud_state & NUD_VALID)) { - neigh_event_send(dst->neighbour, NULL); - ret = -ENODATA; - goto put; - } - - ret = rdma_copy_addr(addr, dst->dev, neigh->ha); -put: - dst_release(dst); - return ret; -} -#else -static int addr6_resolve(struct sockaddr_in6 *src_in, - struct sockaddr_in6 *dst_in, - struct rdma_dev_addr *addr) -{ - return -EADDRNOTAVAIL; -} -#endif - -#else -#include <netinet/if_ether.h> - static int addr_resolve(struct sockaddr *src_in, struct sockaddr *dst_in, struct rdma_dev_addr *addr) @@ -354,7 +209,6 @@ static int addr_resolve(struct sockaddr *src_in, int bcast; int is_gw = 0; int error = 0; - /* * Determine whether the address is unicast, multicast, or broadcast * and whether the source interface is valid. @@ -382,8 +236,7 @@ static int addr_resolve(struct sockaddr *src_in, port = sin->sin_port; sin->sin_port = 0; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); - } else - src_in = NULL; + } break; #endif #ifdef INET6 @@ -406,7 +259,7 @@ static int addr_resolve(struct sockaddr *src_in, * If we have a source address to use look it up first and verify * that it is a local interface. */ - if (src_in) { + if (sin->sin_addr.s_addr != INADDR_ANY) { ifa = ifa_ifwithaddr(src_in); if (sin) sin->sin_port = port; @@ -436,15 +289,20 @@ static int addr_resolve(struct sockaddr *src_in, * correct interface pointer and unlock the route. */ if (multi || bcast) { - if (ifp == NULL) + if (ifp == NULL) { ifp = rte->rt_ifp; + /* rt_ifa holds the route answer source address */ + ifa = rte->rt_ifa; + } RTFREE_LOCKED(rte); } else if (ifp && ifp != rte->rt_ifp) { RTFREE_LOCKED(rte); return -ENETUNREACH; } else { - if (ifp == NULL) + if (ifp == NULL) { ifp = rte->rt_ifp; + ifa = rte->rt_ifa; + } RT_UNLOCK(rte); } mcast: @@ -459,6 +317,8 @@ mcast: error = rdma_copy_addr(addr, ifp, LLADDR((struct sockaddr_dl *)llsa)); free(llsa, M_IFMADDR); + if (error == 0) + memcpy(src_in, ifa->ifa_addr, ip_addr_size(ifa->ifa_addr)); return error; } /* @@ -472,7 +332,7 @@ mcast: #endif #ifdef INET6 case AF_INET6: - error = nd6_storelladdr(ifp, NULL, dst_in, (u_char *)edst,NULL); + error = nd6_storelladdr(ifp, NULL, dst_in, (u_char *)edst, NULL); break; #endif default: @@ -480,15 +340,15 @@ mcast: error = -EINVAL; } RTFREE(rte); - if (error == 0) + if (error == 0) { + memcpy(src_in, ifa->ifa_addr, ip_addr_size(ifa->ifa_addr)); return rdma_copy_addr(addr, ifp, edst); + } if (error == EWOULDBLOCK) return -ENODATA; return -error; } -#endif - static void process_req(struct work_struct *work) { struct addr_req *req, *temp_req; @@ -602,20 +462,94 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr) } EXPORT_SYMBOL(rdma_addr_cancel); +struct resolve_cb_context { + struct rdma_dev_addr *addr; + struct completion comp; +}; + +static void resolve_cb(int status, struct sockaddr *src_addr, + struct rdma_dev_addr *addr, void *context) +{ + memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct + rdma_dev_addr)); + complete(&((struct resolve_cb_context *)context)->comp); +} + +int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac, + u16 *vlan_id) +{ + int ret = 0; + struct rdma_dev_addr dev_addr; + struct resolve_cb_context ctx; + struct net_device *dev; + + union { + struct sockaddr _sockaddr; + struct sockaddr_in _sockaddr_in; + struct sockaddr_in6 _sockaddr_in6; + } sgid_addr, dgid_addr; + + + ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid); + if (ret) + return ret; + + ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid); + if (ret) + return ret; + + memset(&dev_addr, 0, sizeof(dev_addr)); + + ctx.addr = &dev_addr; + init_completion(&ctx.comp); + ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr, + &dev_addr, 1000, resolve_cb, &ctx); + if (ret) + return ret; + + wait_for_completion(&ctx.comp); + + memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN); + dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if); + if (!dev) + return -ENODEV; + if (vlan_id) + *vlan_id = rdma_vlan_dev_vlan_id(dev); + dev_put(dev); + return ret; +} +EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh); + +int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id) +{ + int ret = 0; + struct rdma_dev_addr dev_addr; + union { + struct sockaddr _sockaddr; + struct sockaddr_in _sockaddr_in; + struct sockaddr_in6 _sockaddr_in6; + } gid_addr; + + ret = rdma_gid2ip(&gid_addr._sockaddr, sgid); + + if (ret) + return ret; + memset(&dev_addr, 0, sizeof(dev_addr)); + ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id); + if (ret) + return ret; + + memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN); + return ret; +} +EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid); + static int netevent_callback(struct notifier_block *self, unsigned long event, void *ctx) { if (event == NETEVENT_NEIGH_UPDATE) { -#ifdef __linux__ - struct neighbour *neigh = ctx; - - if (neigh->nud_state & NUD_VALID) { set_timeout(jiffies); } -#else - set_timeout(jiffies); -#endif - } return 0; } @@ -631,11 +565,13 @@ static int __init addr_init(void) return -ENOMEM; register_netevent_notifier(&nb); + rdma_addr_register_client(&self); return 0; } static void __exit addr_cleanup(void) { + rdma_addr_unregister_client(&self); unregister_netevent_notifier(&nb); destroy_workqueue(addr_wq); } diff --git a/sys/ofed/drivers/infiniband/core/cache.c b/sys/ofed/drivers/infiniband/core/cache.c index 660bff5..d11e7c2 100644 --- a/sys/ofed/drivers/infiniband/core/cache.c +++ b/sys/ofed/drivers/infiniband/core/cache.c @@ -76,19 +76,21 @@ int ib_get_cached_gid(struct ib_device *device, { struct ib_gid_cache *cache; unsigned long flags; - int ret = 0; + int ret = -EINVAL; if (port_num < start_port(device) || port_num > end_port(device)) return -EINVAL; read_lock_irqsave(&device->cache.lock, flags); - cache = device->cache.gid_cache[port_num - start_port(device)]; + if (device->cache.gid_cache) { + cache = device->cache.gid_cache[port_num - start_port(device)]; - if (index < 0 || index >= cache->table_len) - ret = -EINVAL; - else - *gid = cache->table[index]; + if (cache && index >= 0 && index < cache->table_len) { + *gid = cache->table[index]; + ret = 0; + } + } read_unlock_irqrestore(&device->cache.lock, flags); @@ -111,22 +113,24 @@ int ib_find_cached_gid(struct ib_device *device, *index = -1; read_lock_irqsave(&device->cache.lock, flags); - + if (!device->cache.gid_cache) + goto out; for (p = 0; p <= end_port(device) - start_port(device); ++p) { cache = device->cache.gid_cache[p]; + if (!cache) + continue; for (i = 0; i < cache->table_len; ++i) { if (!memcmp(gid, &cache->table[i], sizeof *gid)) { *port_num = p + start_port(device); if (index) *index = i; ret = 0; - goto found; + goto out; } } } -found: +out: read_unlock_irqrestore(&device->cache.lock, flags); - return ret; } EXPORT_SYMBOL(ib_find_cached_gid); @@ -138,19 +142,21 @@ int ib_get_cached_pkey(struct ib_device *device, { struct ib_pkey_cache *cache; unsigned long flags; - int ret = 0; + int ret = -EINVAL; if (port_num < start_port(device) || port_num > end_port(device)) return -EINVAL; read_lock_irqsave(&device->cache.lock, flags); - cache = device->cache.pkey_cache[port_num - start_port(device)]; + if (device->cache.pkey_cache) { + cache = device->cache.pkey_cache[port_num - start_port(device)]; - if (index < 0 || index >= cache->table_len) - ret = -EINVAL; - else - *pkey = cache->table[index]; + if (cache && index >= 0 && index < cache->table_len) { + *pkey = cache->table[index]; + ret = 0; + } + } read_unlock_irqrestore(&device->cache.lock, flags); @@ -167,41 +173,93 @@ int ib_find_cached_pkey(struct ib_device *device, unsigned long flags; int i; int ret = -ENOENT; + int partial_ix = -1; if (port_num < start_port(device) || port_num > end_port(device)) return -EINVAL; + *index = -1; + read_lock_irqsave(&device->cache.lock, flags); + if (!device->cache.pkey_cache) + goto out; + cache = device->cache.pkey_cache[port_num - start_port(device)]; + if (!cache) + goto out; + + for (i = 0; i < cache->table_len; ++i) + if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) { + if (cache->table[i] & 0x8000) { + *index = i; + ret = 0; + break; + } else + partial_ix = i; + } + + if (ret && partial_ix >= 0) { + *index = partial_ix; + ret = 0; + } +out: + read_unlock_irqrestore(&device->cache.lock, flags); + return ret; +} +EXPORT_SYMBOL(ib_find_cached_pkey); + +int ib_find_exact_cached_pkey(struct ib_device *device, + u8 port_num, + u16 pkey, + u16 *index) +{ + struct ib_pkey_cache *cache; + unsigned long flags; + int i; + int ret = -ENOENT; + + if (port_num < start_port(device) || port_num > end_port(device)) + return -EINVAL; *index = -1; + read_lock_irqsave(&device->cache.lock, flags); + + if (!device->cache.pkey_cache) + goto out; + + cache = device->cache.pkey_cache[port_num - start_port(device)]; + if (!cache) + goto out; + for (i = 0; i < cache->table_len; ++i) - if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) { + if (cache->table[i] == pkey) { *index = i; ret = 0; break; } - +out: read_unlock_irqrestore(&device->cache.lock, flags); - return ret; } -EXPORT_SYMBOL(ib_find_cached_pkey); +EXPORT_SYMBOL(ib_find_exact_cached_pkey); int ib_get_cached_lmc(struct ib_device *device, u8 port_num, u8 *lmc) { unsigned long flags; - int ret = 0; + int ret = -EINVAL; if (port_num < start_port(device) || port_num > end_port(device)) return -EINVAL; read_lock_irqsave(&device->cache.lock, flags); - *lmc = device->cache.lmc_cache[port_num - start_port(device)]; + if (device->cache.lmc_cache) { + *lmc = device->cache.lmc_cache[port_num - start_port(device)]; + ret = 0; + } read_unlock_irqrestore(&device->cache.lock, flags); return ret; @@ -217,6 +275,10 @@ static void ib_cache_update(struct ib_device *device, int i; int ret; + if (!(device->cache.pkey_cache && device->cache.gid_cache && + device->cache.lmc_cache)) + return; + tprops = kmalloc(sizeof *tprops, GFP_KERNEL); if (!tprops) return; @@ -309,7 +371,7 @@ static void ib_cache_event(struct ib_event_handler *handler, INIT_WORK(&work->work, ib_cache_task); work->device = event->device; work->port_num = event->element.port_num; - schedule_work(&work->work); + queue_work(ib_wq, &work->work); } } } @@ -362,14 +424,21 @@ err: kfree(device->cache.pkey_cache); kfree(device->cache.gid_cache); kfree(device->cache.lmc_cache); + device->cache.pkey_cache = NULL; + device->cache.gid_cache = NULL; + device->cache.lmc_cache = NULL; } static void ib_cache_cleanup_one(struct ib_device *device) { int p; + if (!(device->cache.pkey_cache && device->cache.gid_cache && + device->cache.lmc_cache)) + return; + ib_unregister_event_handler(&device->cache.event_handler); - flush_scheduled_work(); + flush_workqueue(ib_wq); for (p = 0; p <= end_port(device) - start_port(device); ++p) { kfree(device->cache.pkey_cache[p]); diff --git a/sys/ofed/drivers/infiniband/core/cm.c b/sys/ofed/drivers/infiniband/core/cm.c index 3d2794d..07f6e08 100644 --- a/sys/ofed/drivers/infiniband/core/cm.c +++ b/sys/ofed/drivers/infiniband/core/cm.c @@ -36,16 +36,19 @@ #include <linux/completion.h> #include <linux/dma-mapping.h> #include <linux/device.h> +#include <linux/module.h> #include <linux/err.h> #include <linux/idr.h> #include <linux/interrupt.h> #include <linux/random.h> #include <linux/rbtree.h> #include <linux/spinlock.h> +#include <linux/slab.h> #include <linux/sysfs.h> #include <linux/workqueue.h> #include <linux/kdev_t.h> #include <linux/string.h> +#include <linux/etherdevice.h> #include <asm/atomic-long.h> @@ -57,16 +60,10 @@ MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("InfiniBand CM"); MODULE_LICENSE("Dual BSD/GPL"); -#define PFX "ib_cm: " - -/* - * Limit CM message timeouts to something reasonable: - * 8 seconds per message, with up to 15 retries - */ -static int max_timeout = 21; -module_param(max_timeout, int, 0644); -MODULE_PARM_DESC(max_timeout, "Maximum IB CM per message timeout " - "(default=21, or ~8 seconds)"); +#ifdef pr_fmt +#undef pr_fmt +#endif +#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__ static void cm_add_one(struct ib_device *device); static void cm_remove_one(struct ib_device *device); @@ -189,6 +186,8 @@ struct cm_av { struct ib_ah_attr ah_attr; u16 pkey_index; u8 timeout; + u8 valid; + u8 smac[ETH_ALEN]; }; struct cm_work { @@ -358,6 +357,23 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, grh, &av->ah_attr); } +int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac) +{ + struct cm_id_private *cm_id_priv; + + cm_id_priv = container_of(id, struct cm_id_private, id); + + if (smac != NULL) + memcpy(cm_id_priv->av.smac, smac, sizeof(cm_id_priv->av.smac)); + + if (alt_smac != NULL) + memcpy(cm_id_priv->alt_av.smac, alt_smac, + sizeof(cm_id_priv->alt_av.smac)); + + return 0; +} +EXPORT_SYMBOL(ib_update_cm_av); + static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) { struct cm_device *cm_dev; @@ -388,6 +404,9 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path, &av->ah_attr); av->timeout = path->packet_life_time + 1; + memcpy(av->smac, path->smac, sizeof(av->smac)); + + av->valid = 1; return 0; } @@ -402,7 +421,7 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv) ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, next_id, &id); if (!ret) - next_id = ((unsigned) id + 1) & MAX_ID_MASK; + next_id = ((unsigned) id + 1) & MAX_IDR_MASK; spin_unlock_irqrestore(&cm.lock, flags); } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) ); @@ -794,11 +813,11 @@ static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info) } } -static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id) +static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id, gfp_t flags) { struct cm_timewait_info *timewait_info; - timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL); + timewait_info = kzalloc(sizeof *timewait_info, flags); if (!timewait_info) return ERR_PTR(-ENOMEM); @@ -902,6 +921,8 @@ retest: break; case IB_CM_ESTABLISHED: spin_unlock_irq(&cm_id_priv->lock); + if (cm_id_priv->qp_type == IB_QPT_XRC_TGT) + break; ib_send_cm_dreq(cm_id, NULL, 0); goto retest; case IB_CM_DREQ_SENT: @@ -1021,33 +1042,24 @@ static void cm_format_req(struct cm_req_msg *req_msg, req_msg->service_id = param->service_id; req_msg->local_ca_guid = cm_id_priv->id.device->node_guid; cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num)); - cm_req_set_resp_res(req_msg, param->responder_resources); cm_req_set_init_depth(req_msg, param->initiator_depth); cm_req_set_remote_resp_timeout(req_msg, param->remote_cm_response_timeout); - if (param->remote_cm_response_timeout > (u8) max_timeout) { - printk(KERN_WARNING PFX "req remote_cm_response_timeout %d > " - "%d, decreasing\n", param->remote_cm_response_timeout, - max_timeout); - cm_req_set_remote_resp_timeout(req_msg, (u8) max_timeout); - } cm_req_set_qp_type(req_msg, param->qp_type); cm_req_set_flow_ctrl(req_msg, param->flow_control); cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn)); cm_req_set_local_resp_timeout(req_msg, param->local_cm_response_timeout); - if (param->local_cm_response_timeout > (u8) max_timeout) { - printk(KERN_WARNING PFX "req local_cm_response_timeout %d > " - "%d, decreasing\n", param->local_cm_response_timeout, - max_timeout); - cm_req_set_local_resp_timeout(req_msg, (u8) max_timeout); - } - cm_req_set_retry_count(req_msg, param->retry_count); req_msg->pkey = param->primary_path->pkey; cm_req_set_path_mtu(req_msg, param->primary_path->mtu); - cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count); cm_req_set_max_cm_retries(req_msg, param->max_cm_retries); + + if (param->qp_type != IB_QPT_XRC_INI) { + cm_req_set_resp_res(req_msg, param->responder_resources); + cm_req_set_retry_count(req_msg, param->retry_count); + cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count); cm_req_set_srq(req_msg, param->srq); + } if (pri_path->hop_limit <= 1) { req_msg->primary_local_lid = pri_path->slid; @@ -1105,7 +1117,8 @@ static int cm_validate_req_param(struct ib_cm_req_param *param) if (!param->primary_path) return -EINVAL; - if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC) + if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC && + param->qp_type != IB_QPT_XRC_INI) return -EINVAL; if (param->private_data && @@ -1137,38 +1150,34 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_IDLE) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = -EINVAL; - goto out; + return -EINVAL; } - spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv-> - id.local_id); + id.local_id, + GFP_ATOMIC); if (IS_ERR(cm_id_priv->timewait_info)) { - ret = PTR_ERR(cm_id_priv->timewait_info); - goto out; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return (PTR_ERR(cm_id_priv->timewait_info)); } ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av); - if (ret) - goto error1; - if (param->alternate_path) { + if (!ret && param->alternate_path) { ret = cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av); - if (ret) + } + if (ret) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); goto error1; } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_id->service_id = param->service_id; cm_id->service_mask = ~cpu_to_be64(0); cm_id_priv->timeout_ms = cm_convert_to_ms( param->primary_path->packet_life_time) * 2 + cm_convert_to_ms( param->remote_cm_response_timeout); - if (cm_id_priv->timeout_ms > cm_convert_to_ms(max_timeout)) { - printk(KERN_WARNING PFX "req timeout_ms %d > %d, decreasing\n", - cm_id_priv->timeout_ms, cm_convert_to_ms(max_timeout)); - cm_id_priv->timeout_ms = cm_convert_to_ms(max_timeout); - } cm_id_priv->max_cm_retries = param->max_cm_retries; cm_id_priv->initiator_depth = param->initiator_depth; cm_id_priv->responder_resources = param->responder_resources; @@ -1201,9 +1210,11 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, spin_unlock_irqrestore(&cm_id_priv->lock, flags); return 0; -error2: cm_free_msg(cm_id_priv->msg); -error1: kfree(cm_id_priv->timewait_info); -out: return ret; +error2: + cm_free_msg(cm_id_priv->msg); +error1: + kfree(cm_id_priv->timewait_info); + return ret; } EXPORT_SYMBOL(ib_send_cm_req); @@ -1556,7 +1567,8 @@ static int cm_req_handler(struct cm_work *work) work->mad_recv_wc->recv_buf.grh, &cm_id_priv->av); cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv-> - id.local_id); + id.local_id, + GFP_KERNEL); if (IS_ERR(cm_id_priv->timewait_info)) { ret = PTR_ERR(cm_id_priv->timewait_info); goto destroy; @@ -1579,6 +1591,10 @@ static int cm_req_handler(struct cm_work *work) cm_process_routed_req(req_msg, work->mad_recv_wc->wc); cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); + + /* Workarround: path in req_msg doesn't contain MAC, take it from wc */ + memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, 6); + work->path[0].vlan_id = cm_id_priv->av.ah_attr.vlan_id; ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av); if (ret) { ib_get_cached_gid(work->port->cm_dev->ib_device, @@ -1600,13 +1616,6 @@ static int cm_req_handler(struct cm_work *work) cm_id_priv->tid = req_msg->hdr.tid; cm_id_priv->timeout_ms = cm_convert_to_ms( cm_req_get_local_resp_timeout(req_msg)); - if (cm_req_get_local_resp_timeout(req_msg) > (u8) max_timeout) { - printk(KERN_WARNING PFX "rcvd cm_local_resp_timeout %d > %d, " - "decreasing used timeout_ms\n", - cm_req_get_local_resp_timeout(req_msg), max_timeout); - cm_id_priv->timeout_ms = cm_convert_to_ms(max_timeout); - } - cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg); cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg); cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg); @@ -1638,18 +1647,24 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg, cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid); rep_msg->local_comm_id = cm_id_priv->id.local_id; rep_msg->remote_comm_id = cm_id_priv->id.remote_id; - cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num)); cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn)); rep_msg->resp_resources = param->responder_resources; - rep_msg->initiator_depth = param->initiator_depth; cm_rep_set_target_ack_delay(rep_msg, cm_id_priv->av.port->cm_dev->ack_delay); cm_rep_set_failover(rep_msg, param->failover_accepted); - cm_rep_set_flow_ctrl(rep_msg, param->flow_control); cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count); - cm_rep_set_srq(rep_msg, param->srq); rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid; + if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) { + rep_msg->initiator_depth = param->initiator_depth; + cm_rep_set_flow_ctrl(rep_msg, param->flow_control); + cm_rep_set_srq(rep_msg, param->srq); + cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num)); + } else { + cm_rep_set_srq(rep_msg, 1); + cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num)); + } + if (param->private_data && param->private_data_len) memcpy(rep_msg->private_data, param->private_data, param->private_data_len); @@ -1672,6 +1687,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_REQ_RCVD && cm_id->state != IB_CM_MRA_REQ_SENT) { + pr_debug("cm_id->state: %d\n", cm_id->state); ret = -EINVAL; goto out; } @@ -1697,7 +1713,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, cm_id_priv->initiator_depth = param->initiator_depth; cm_id_priv->responder_resources = param->responder_resources; cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg); - cm_id_priv->local_qpn = cm_rep_get_local_qpn(rep_msg); + cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF); out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; @@ -1738,6 +1754,7 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id, spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_REP_RCVD && cm_id->state != IB_CM_MRA_REP_SENT) { + pr_debug("cm_id->state: %d\n", cm_id->state); ret = -EINVAL; goto error; } @@ -1768,7 +1785,7 @@ error: spin_unlock_irqrestore(&cm_id_priv->lock, flags); } EXPORT_SYMBOL(ib_send_cm_rtu); -static void cm_format_rep_event(struct cm_work *work) +static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type) { struct cm_rep_msg *rep_msg; struct ib_cm_rep_event_param *param; @@ -1777,7 +1794,7 @@ static void cm_format_rep_event(struct cm_work *work) param = &work->cm_event.param.rep_rcvd; param->remote_ca_guid = rep_msg->local_ca_guid; param->remote_qkey = be32_to_cpu(rep_msg->local_qkey); - param->remote_qpn = be32_to_cpu(cm_rep_get_local_qpn(rep_msg)); + param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type)); param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg)); param->responder_resources = rep_msg->initiator_depth; param->initiator_depth = rep_msg->resp_resources; @@ -1842,10 +1859,11 @@ static int cm_rep_handler(struct cm_work *work) cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0); if (!cm_id_priv) { cm_dup_rep_handler(work); + pr_debug("no cm_id_priv\n"); return -EINVAL; } - cm_format_rep_event(work); + cm_format_rep_event(work, cm_id_priv->qp_type); spin_lock_irq(&cm_id_priv->lock); switch (cm_id_priv->id.state) { @@ -1855,12 +1873,13 @@ static int cm_rep_handler(struct cm_work *work) default: spin_unlock_irq(&cm_id_priv->lock); ret = -EINVAL; + pr_debug("cm_id_priv->id.state: %d\n", cm_id_priv->id.state); goto error; } cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id; cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid; - cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg); + cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type); spin_lock(&cm.lock); /* Check for duplicate REP. */ @@ -1868,6 +1887,7 @@ static int cm_rep_handler(struct cm_work *work) spin_unlock(&cm.lock); spin_unlock_irq(&cm_id_priv->lock); ret = -EINVAL; + pr_debug("Failed to insert remote id\n"); goto error; } /* Check for a stale connection. */ @@ -1881,13 +1901,14 @@ static int cm_rep_handler(struct cm_work *work) IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP, NULL, 0); ret = -EINVAL; + pr_debug("Stale connection.\n"); goto error; } spin_unlock(&cm.lock); cm_id_priv->id.state = IB_CM_REP_RCVD; cm_id_priv->id.remote_id = rep_msg->local_comm_id; - cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg); + cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type); cm_id_priv->initiator_depth = rep_msg->resp_resources; cm_id_priv->responder_resources = rep_msg->initiator_depth; cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg); @@ -2021,10 +2042,15 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id, cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_ESTABLISHED) { + pr_debug("cm_id->state: %d\n", cm_id->state); ret = -EINVAL; goto out; } + if (cm_id->lap_state == IB_CM_LAP_SENT || + cm_id->lap_state == IB_CM_MRA_LAP_RCVD) + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) { cm_enter_timewait(cm_id_priv); @@ -2086,6 +2112,7 @@ int ib_send_cm_drep(struct ib_cm_id *cm_id, if (cm_id->state != IB_CM_DREQ_RCVD) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); kfree(data); + pr_debug("cm_id->state(%d) != IB_CM_DREQ_RCVD\n", cm_id->state); return -EINVAL; } @@ -2151,6 +2178,7 @@ static int cm_dreq_handler(struct cm_work *work) atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_DREQ_COUNTER]); cm_issue_drep(work->port, work->mad_recv_wc); + pr_debug("no cm_id_priv\n"); return -EINVAL; } @@ -2166,6 +2194,10 @@ static int cm_dreq_handler(struct cm_work *work) ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); break; case IB_CM_ESTABLISHED: + if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT || + cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + break; case IB_CM_MRA_REP_RCVD: break; case IB_CM_TIMEWAIT: @@ -2187,6 +2219,7 @@ static int cm_dreq_handler(struct cm_work *work) counter[CM_DREQ_COUNTER]); goto unlock; default: + pr_debug("cm_id_priv->id.state: %d\n", cm_id_priv->id.state); goto unlock; } cm_id_priv->id.state = IB_CM_DREQ_RCVD; @@ -2290,6 +2323,7 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id, cm_enter_timewait(cm_id_priv); break; default: + pr_debug("cm_id->state: 0x%x\n", cm_id->state); ret = -EINVAL; goto out; } @@ -2386,11 +2420,21 @@ static int cm_rej_handler(struct cm_work *work) /* fall through */ case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: + cm_enter_timewait(cm_id_priv); + break; case IB_CM_ESTABLISHED: + if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT || + cm_id_priv->id.lap_state == IB_CM_LAP_SENT) { + if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT) + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + cm_id_priv->msg); cm_enter_timewait(cm_id_priv); break; + } + /* fall through */ default: spin_unlock_irq(&cm_id_priv->lock); + pr_debug("cm_id_priv->id.state: 0x%x\n", cm_id_priv->id.state); ret = -EINVAL; goto out; } @@ -2453,6 +2497,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, break; } default: + pr_debug("cm_id_priv->id.state: 0x%x\n", cm_id_priv->id.state); ret = -EINVAL; goto error1; } @@ -2518,12 +2563,6 @@ static int cm_mra_handler(struct cm_work *work) cm_mra_get_service_timeout(mra_msg); timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) + cm_convert_to_ms(cm_id_priv->av.timeout); - if (timeout > cm_convert_to_ms(max_timeout)) { - printk(KERN_WARNING PFX "calculated mra timeout %d > %d, " - "decreasing used timeout_ms\n", timeout, - cm_convert_to_ms(max_timeout)); - timeout = cm_convert_to_ms(max_timeout); - } spin_lock_irq(&cm_id_priv->lock); switch (cm_id_priv->id.state) { @@ -2560,6 +2599,7 @@ static int cm_mra_handler(struct cm_work *work) counter[CM_MRA_COUNTER]); /* fall through */ default: + pr_debug("cm_id_priv->id.state: 0x%x\n", cm_id_priv->id.state); goto out; } @@ -2746,7 +2786,8 @@ static int cm_lap_handler(struct cm_work *work) cm_init_av_for_response(work->port, work->mad_recv_wc->wc, work->mad_recv_wc->recv_buf.grh, &cm_id_priv->av); - cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av); + if (cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av)) + goto unlock; ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); @@ -2938,6 +2979,9 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + ret = cm_init_av_by_path(param->path, &cm_id_priv->av); if (ret) goto out; @@ -2945,12 +2989,6 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, cm_id->service_id = param->service_id; cm_id->service_mask = ~cpu_to_be64(0); cm_id_priv->timeout_ms = param->timeout_ms; - if (cm_id_priv->timeout_ms > cm_convert_to_ms(max_timeout)) { - printk(KERN_WARNING PFX "sidr req timeout_ms %d > %d, " - "decreasing used timeout_ms\n", param->timeout_ms, - cm_convert_to_ms(max_timeout)); - cm_id_priv->timeout_ms = cm_convert_to_ms(max_timeout); - } cm_id_priv->max_cm_retries = param->max_cm_retries; ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) @@ -2961,21 +2999,19 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, msg->timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT; - spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state == IB_CM_IDLE) ret = ib_post_send_mad(msg, NULL); else ret = -EINVAL; if (ret) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); goto out; } cm_id->state = IB_CM_SIDR_REQ_SENT; cm_id_priv->msg = msg; - spin_unlock_irqrestore(&cm_id_priv->lock, flags); out: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_sidr_req); @@ -3038,6 +3074,7 @@ static int cm_sidr_req_handler(struct cm_work *work) goto out; /* No match. */ } atomic_inc(&cur_cm_id_priv->refcount); + atomic_inc(&cm_id_priv->refcount); spin_unlock_irq(&cm.lock); cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler; @@ -3302,6 +3339,7 @@ static void cm_work_handler(struct work_struct *_work) ret = cm_timewait_handler(work); break; default: + pr_debug("work->cm_event.event: 0x%x\n", work->cm_event.event); ret = -EINVAL; break; } @@ -3332,6 +3370,7 @@ static int cm_establish(struct ib_cm_id *cm_id) ret = -EISCONN; break; default: + pr_debug("cm_id->state: 0x%x\n", cm_id->state); ret = -EINVAL; break; } @@ -3494,6 +3533,7 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, ret = 0; break; default: + pr_debug("cm_id_priv->id.state: 0x%x\n", cm_id_priv->id.state); ret = -EINVAL; break; } @@ -3520,10 +3560,36 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN; qp_attr->ah_attr = cm_id_priv->av.ah_attr; + if (!cm_id_priv->av.valid) + return -EINVAL; + if (cm_id_priv->av.ah_attr.vlan_id != 0xffff) { + qp_attr->vlan_id = cm_id_priv->av.ah_attr.vlan_id; + *qp_attr_mask |= IB_QP_VID; + } + if (!is_zero_ether_addr(cm_id_priv->av.smac)) { + memcpy(qp_attr->smac, cm_id_priv->av.smac, + sizeof(qp_attr->smac)); + *qp_attr_mask |= IB_QP_SMAC; + } + if (cm_id_priv->alt_av.valid) { + if (cm_id_priv->alt_av.ah_attr.vlan_id != 0xffff) { + qp_attr->alt_vlan_id = + cm_id_priv->alt_av.ah_attr.vlan_id; + *qp_attr_mask |= IB_QP_ALT_VID; + } + if (!is_zero_ether_addr(cm_id_priv->alt_av.smac)) { + memcpy(qp_attr->alt_smac, + cm_id_priv->alt_av.smac, + sizeof(qp_attr->alt_smac)); + *qp_attr_mask |= IB_QP_ALT_SMAC; + } + } + qp_attr->path_mtu = cm_id_priv->path_mtu; qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn); qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); - if (cm_id_priv->qp_type == IB_QPT_RC) { + if (cm_id_priv->qp_type == IB_QPT_RC || + cm_id_priv->qp_type == IB_QPT_XRC_TGT) { *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER; qp_attr->max_dest_rd_atomic = @@ -3540,6 +3606,7 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, ret = 0; break; default: + pr_debug("cm_id_priv->id.state: 0x%x\n", cm_id_priv->id.state); ret = -EINVAL; break; } @@ -3568,15 +3635,21 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) { *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN; qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn); - if (cm_id_priv->qp_type == IB_QPT_RC) { - *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | - IB_QP_RNR_RETRY | + switch (cm_id_priv->qp_type) { + case IB_QPT_RC: + case IB_QPT_XRC_INI: + *qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC; - qp_attr->timeout = cm_id_priv->av.timeout; qp_attr->retry_cnt = cm_id_priv->retry_count; qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; - qp_attr->max_rd_atomic = - cm_id_priv->initiator_depth; + qp_attr->max_rd_atomic = cm_id_priv->initiator_depth; + /* fall through */ + case IB_QPT_XRC_TGT: + *qp_attr_mask |= IB_QP_TIMEOUT; + qp_attr->timeout = cm_id_priv->av.timeout; + break; + default: + break; } if (cm_id_priv->alt_av.ah_attr.dlid) { *qp_attr_mask |= IB_QP_PATH_MIG_STATE; @@ -3593,6 +3666,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, ret = 0; break; default: + pr_debug("cm_id_priv->id.state: 0x%x\n", cm_id_priv->id.state); ret = -EINVAL; break; } @@ -3619,6 +3693,7 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id, ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask); break; default: + pr_debug("qp_attr->qp_state: 0x%x\n", qp_attr->qp_state); ret = -EINVAL; break; } @@ -3649,7 +3724,7 @@ static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr, atomic_long_read(&group->counter[cm_attr->index])); } -static struct sysfs_ops cm_counter_ops = { +static const struct sysfs_ops cm_counter_ops = { .show = cm_show_counter }; @@ -3670,8 +3745,17 @@ static struct kobj_type cm_port_obj_type = { .release = cm_release_port_obj }; +static char *cm_devnode(struct device *dev, umode_t *mode) +{ + if (mode) + *mode = 0666; + return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); +} + struct class cm_class = { + .owner = THIS_MODULE, .name = "infiniband_cm", + .devnode = cm_devnode, }; EXPORT_SYMBOL(cm_class); @@ -3745,7 +3829,7 @@ static void cm_add_one(struct ib_device *ib_device) cm_dev->device = device_create(&cm_class, &ib_device->dev, MKDEV(0, 0), NULL, "%s", ib_device->name); - if (!cm_dev->device) { + if (IS_ERR(cm_dev->device)) { kfree(cm_dev); return; } @@ -3846,28 +3930,33 @@ static int __init ib_cm_init(void) cm.remote_sidr_table = RB_ROOT; idr_init(&cm.local_id_table); get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand); - idr_pre_get(&cm.local_id_table, GFP_KERNEL); + if (!idr_pre_get(&cm.local_id_table, GFP_KERNEL)) + return -ENOMEM; INIT_LIST_HEAD(&cm.timewait_list); ret = class_register(&cm_class); - if (ret) - return -ENOMEM; + if (ret) { + ret = -ENOMEM; + goto error1; + } cm.wq = create_workqueue("ib_cm"); if (!cm.wq) { ret = -ENOMEM; - goto error1; + goto error2; } ret = ib_register_client(&cm_client); if (ret) - goto error2; + goto error3; return 0; -error2: +error3: destroy_workqueue(cm.wq); -error1: +error2: class_unregister(&cm_class); +error1: + idr_destroy(&cm.local_id_table); return ret; } diff --git a/sys/ofed/drivers/infiniband/core/cm_msgs.h b/sys/ofed/drivers/infiniband/core/cm_msgs.h index 7e63c08..be068f4 100644 --- a/sys/ofed/drivers/infiniband/core/cm_msgs.h +++ b/sys/ofed/drivers/infiniband/core/cm_msgs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004, 2011 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * @@ -44,18 +44,6 @@ #define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */ -#define CM_REQ_ATTR_ID cpu_to_be16(0x0010) -#define CM_MRA_ATTR_ID cpu_to_be16(0x0011) -#define CM_REJ_ATTR_ID cpu_to_be16(0x0012) -#define CM_REP_ATTR_ID cpu_to_be16(0x0013) -#define CM_RTU_ATTR_ID cpu_to_be16(0x0014) -#define CM_DREQ_ATTR_ID cpu_to_be16(0x0015) -#define CM_DREP_ATTR_ID cpu_to_be16(0x0016) -#define CM_SIDR_REQ_ATTR_ID cpu_to_be16(0x0017) -#define CM_SIDR_REP_ATTR_ID cpu_to_be16(0x0018) -#define CM_LAP_ATTR_ID cpu_to_be16(0x0019) -#define CM_APR_ATTR_ID cpu_to_be16(0x001A) - enum cm_msg_sequence { CM_MSG_SEQUENCE_REQ, CM_MSG_SEQUENCE_LAP, @@ -86,7 +74,7 @@ struct cm_req_msg { __be16 pkey; /* path MTU:4, RDC exists:1, RNR retry count:3. */ u8 offset50; - /* max CM Retries:4, SRQ:1, rsvd:3 */ + /* max CM Retries:4, SRQ:1, extended transport type:3 */ u8 offset51; __be16 primary_local_lid; @@ -175,6 +163,11 @@ static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg) switch(transport_type) { case 0: return IB_QPT_RC; case 1: return IB_QPT_UC; + case 3: + switch (req_msg->offset51 & 0x7) { + case 1: return IB_QPT_XRC_TGT; + default: return 0; + } default: return 0; } } @@ -188,6 +181,12 @@ static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg, req_msg->offset40) & 0xFFFFFFF9) | 0x2); break; + case IB_QPT_XRC_INI: + req_msg->offset40 = cpu_to_be32((be32_to_cpu( + req_msg->offset40) & + 0xFFFFFFF9) | 0x6); + req_msg->offset51 = (req_msg->offset51 & 0xF8) | 1; + break; default: req_msg->offset40 = cpu_to_be32(be32_to_cpu( req_msg->offset40) & @@ -527,6 +526,23 @@ static inline void cm_rep_set_local_qpn(struct cm_rep_msg *rep_msg, __be32 qpn) (be32_to_cpu(rep_msg->offset12) & 0x000000FF)); } +static inline __be32 cm_rep_get_local_eecn(struct cm_rep_msg *rep_msg) +{ + return cpu_to_be32(be32_to_cpu(rep_msg->offset16) >> 8); +} + +static inline void cm_rep_set_local_eecn(struct cm_rep_msg *rep_msg, __be32 eecn) +{ + rep_msg->offset16 = cpu_to_be32((be32_to_cpu(eecn) << 8) | + (be32_to_cpu(rep_msg->offset16) & 0x000000FF)); +} + +static inline __be32 cm_rep_get_qpn(struct cm_rep_msg *rep_msg, enum ib_qp_type qp_type) +{ + return (qp_type == IB_QPT_XRC_INI) ? + cm_rep_get_local_eecn(rep_msg) : cm_rep_get_local_qpn(rep_msg); +} + static inline __be32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg) { return cpu_to_be32(be32_to_cpu(rep_msg->offset20) >> 8); @@ -771,6 +787,7 @@ struct cm_apr_msg { u8 info_length; u8 ap_status; + __be16 rsvd; u8 info[IB_CM_APR_INFO_LENGTH]; u8 private_data[IB_CM_APR_PRIVATE_DATA_SIZE]; diff --git a/sys/ofed/drivers/infiniband/core/cma.c b/sys/ofed/drivers/infiniband/core/cma.c index 318beb1..d2064b6 100644 --- a/sys/ofed/drivers/infiniband/core/cma.c +++ b/sys/ofed/drivers/infiniband/core/cma.c @@ -40,6 +40,10 @@ #include <linux/random.h> #include <linux/idr.h> #include <linux/inetdevice.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/string.h> +#include <net/route.h> #include <net/tcp.h> #include <net/ipv6.h> @@ -55,28 +59,47 @@ MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("Generic RDMA CM Agent"); MODULE_LICENSE("Dual BSD/GPL"); -static int tavor_quirk = 0; -module_param_named(tavor_quirk, tavor_quirk, int, 0644); -MODULE_PARM_DESC(tavor_quirk, "Tavor performance quirk: limit MTU to 1K if > 0"); - -int unify_tcp_port_space = 1; -module_param(unify_tcp_port_space, int, 0644); -MODULE_PARM_DESC(unify_tcp_port_space, "Unify the host TCP and RDMA port " - "space allocation (default=1)"); - #define CMA_CM_RESPONSE_TIMEOUT 20 #define CMA_MAX_CM_RETRIES 15 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) -#define IBOE_PACKET_LIFETIME 18 +#define CMA_IBOE_PACKET_LIFETIME 18 static int cma_response_timeout = CMA_CM_RESPONSE_TIMEOUT; module_param_named(cma_response_timeout, cma_response_timeout, int, 0644); -MODULE_PARM_DESC(cma_response_timeout, "CMA_CM_RESPONSE_TIMEOUT default=20"); +MODULE_PARM_DESC(cma_response_timeout, "CMA_CM_RESPONSE_TIMEOUT (default=20)"); static int def_prec2sl = 3; module_param_named(def_prec2sl, def_prec2sl, int, 0644); MODULE_PARM_DESC(def_prec2sl, "Default value for SL priority with RoCE. Valid values 0 - 7"); +static int debug_level = 0; +#define cma_pr(level, priv, format, arg...) \ + printk(level "CMA: %p: %s: " format, ((struct rdma_id_priv *) priv) , __func__, ## arg) + +#define cma_dbg(priv, format, arg...) \ + do { if (debug_level) cma_pr(KERN_DEBUG, priv, format, ## arg); } while (0) + +#define cma_warn(priv, format, arg...) \ + cma_pr(KERN_WARNING, priv, format, ## arg) + +#define CMA_GID_FMT "%2.2x%2.2x:%2.2x%2.2x" +#define CMA_GID_RAW_ARG(gid) ((u8 *)(gid))[12],\ + ((u8 *)(gid))[13],\ + ((u8 *)(gid))[14],\ + ((u8 *)(gid))[15] + +#define CMA_GID_ARG(gid) CMA_GID_RAW_ARG((gid).raw) +#define cma_debug_path(priv, pfx, p) \ + cma_dbg(priv, pfx "sgid=" CMA_GID_FMT ",dgid=" \ + CMA_GID_FMT "\n", CMA_GID_ARG(p.sgid), \ + CMA_GID_ARG(p.dgid)) + +#define cma_debug_gid(priv, g) \ + cma_dbg(priv, "gid=" CMA_GID_FMT "\n", CMA_GID_ARG(g) + +module_param_named(debug_level, debug_level, int, 0644); +MODULE_PARM_DESC(debug_level, "debug level default=0"); + static void cma_add_one(struct ib_device *device); static void cma_remove_one(struct ib_device *device); @@ -92,13 +115,12 @@ static LIST_HEAD(dev_list); static LIST_HEAD(listen_any_list); static DEFINE_MUTEX(lock); static struct workqueue_struct *cma_wq; +static struct workqueue_struct *cma_free_wq; static DEFINE_IDR(sdp_ps); static DEFINE_IDR(tcp_ps); static DEFINE_IDR(udp_ps); static DEFINE_IDR(ipoib_ps); -#if defined(INET) -static int next_port; -#endif +static DEFINE_IDR(ib_ps); struct cma_device { struct list_head list; @@ -108,26 +130,16 @@ struct cma_device { struct list_head id_list; }; -enum cma_state { - CMA_IDLE, - CMA_ADDR_QUERY, - CMA_ADDR_RESOLVED, - CMA_ROUTE_QUERY, - CMA_ROUTE_RESOLVED, - CMA_CONNECT, - CMA_DISCONNECT, - CMA_ADDR_BOUND, - CMA_LISTEN, - CMA_DEVICE_REMOVAL, - CMA_DESTROYING -}; - struct rdma_bind_list { struct idr *ps; struct hlist_head owners; unsigned short port; }; +enum { + CMA_OPTION_AFONLY, +}; + /* * Device removal can occur at anytime, so we need extra handling to * serialize notifying the user of device removal with other callbacks. @@ -138,7 +150,7 @@ struct rdma_id_private { struct rdma_cm_id id; struct rdma_bind_list *bind_list; - struct socket *sock; + struct socket *sock; struct hlist_node node; struct list_head list; /* listen_any_list or cma_device.list */ struct list_head listen_list; /* per device listens */ @@ -146,13 +158,15 @@ struct rdma_id_private { struct list_head mc_list; int internal_id; - enum cma_state state; + enum rdma_cm_state state; spinlock_t lock; + spinlock_t cm_lock; struct mutex qp_mutex; struct completion comp; atomic_t refcount; struct mutex handler_mutex; + struct work_struct work; /* garbage coll */ int backlog; int timeout_ms; @@ -166,8 +180,16 @@ struct rdma_id_private { u32 seq_num; u32 qkey; u32 qp_num; + pid_t owner; + u32 options; u8 srq; u8 tos; + u8 reuseaddr; + u8 afonly; + int qp_timeout; + /* cache for mc record params */ + struct ib_sa_mcmember_rec rec; + int is_valid_rec; }; struct cma_multicast { @@ -184,8 +206,8 @@ struct cma_multicast { struct cma_work { struct work_struct work; struct rdma_id_private *id; - enum cma_state old_state; - enum cma_state new_state; + enum rdma_cm_state old_state; + enum rdma_cm_state new_state; struct rdma_cm_event event; }; @@ -236,7 +258,7 @@ struct sdp_hah { #define CMA_VERSION 0x00 #define SDP_MAJ_VERSION 0x2 -static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp) +static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp) { unsigned long flags; int ret; @@ -248,7 +270,7 @@ static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp) } static int cma_comp_exch(struct rdma_id_private *id_priv, - enum cma_state comp, enum cma_state exch) + enum rdma_cm_state comp, enum rdma_cm_state exch) { unsigned long flags; int ret; @@ -260,11 +282,11 @@ static int cma_comp_exch(struct rdma_id_private *id_priv, return ret; } -static enum cma_state cma_exch(struct rdma_id_private *id_priv, - enum cma_state exch) +static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv, + enum rdma_cm_state exch) { unsigned long flags; - enum cma_state old; + enum rdma_cm_state old; spin_lock_irqsave(&id_priv->lock, flags); old = id_priv->state; @@ -298,11 +320,6 @@ static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver) hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF); } -static inline int cma_is_ud_ps(enum rdma_port_space ps) -{ - return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB); -} - static void cma_attach_to_dev(struct rdma_id_private *id_priv, struct cma_device *cma_dev) { @@ -328,11 +345,13 @@ static inline void release_mc(struct kref *kref) kfree(mc); } -static void cma_detach_from_dev(struct rdma_id_private *id_priv) +static void cma_release_dev(struct rdma_id_private *id_priv) { + mutex_lock(&lock); list_del(&id_priv->list); cma_deref_dev(id_priv->cma_dev); id_priv->cma_dev = NULL; + mutex_unlock(&lock); } static int cma_set_qkey(struct rdma_id_private *id_priv) @@ -361,36 +380,71 @@ static int cma_set_qkey(struct rdma_id_private *id_priv) return ret; } +static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_num) +{ + int i; + int err; + struct ib_port_attr props; + union ib_gid tmp; + + err = ib_query_port(device, port_num, &props); + if (err) + return 1; + + for (i = 0; i < props.gid_tbl_len; ++i) { + err = ib_query_gid(device, port_num, i, &tmp); + if (err) + return 1; + if (!memcmp(&tmp, gid, sizeof tmp)) + return 0; + } + + return -EAGAIN; +} + static int cma_acquire_dev(struct rdma_id_private *id_priv) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; struct cma_device *cma_dev; - union ib_gid gid; + union ib_gid gid, iboe_gid; int ret = -ENODEV; + u8 port; + enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ? + IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; - if (dev_addr->dev_type != ARPHRD_INFINIBAND) { - iboe_addr_get_sgid(dev_addr, &gid); - list_for_each_entry(cma_dev, &dev_list, list) { - ret = ib_find_cached_gid(cma_dev->device, &gid, - &id_priv->id.port_num, NULL); - if (!ret) - goto out; - } - } + if (dev_ll != IB_LINK_LAYER_INFINIBAND && + id_priv->id.ps == RDMA_PS_IPOIB) + return -EINVAL; + + mutex_lock(&lock); + rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, + &iboe_gid); memcpy(&gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof gid); list_for_each_entry(cma_dev, &dev_list, list) { - ret = ib_find_cached_gid(cma_dev->device, &gid, - &id_priv->id.port_num, NULL); - if (!ret) + for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { + if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) { + if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB && + rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET) + ret = find_gid_port(cma_dev->device, &iboe_gid, port); + else + ret = find_gid_port(cma_dev->device, &gid, port); + + if (!ret) { + id_priv->id.port_num = port; + goto out; + } else if (ret == 1) break; } + } + } out: if (!ret) cma_attach_to_dev(id_priv, cma_dev); + mutex_unlock(&lock); return ret; } @@ -401,7 +455,7 @@ static void cma_deref_id(struct rdma_id_private *id_priv) } static int cma_disable_callback(struct rdma_id_private *id_priv, - enum cma_state state) + enum rdma_cm_state state) { mutex_lock(&id_priv->handler_mutex); if (id_priv->state != state) { @@ -411,13 +465,9 @@ static int cma_disable_callback(struct rdma_id_private *id_priv, return 0; } -static int cma_has_cm_dev(struct rdma_id_private *id_priv) -{ - return (id_priv->id.device && id_priv->cm_id.ib); -} - struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, - void *context, enum rdma_port_space ps) + void *context, enum rdma_port_space ps, + enum ib_qp_type qp_type) { struct rdma_id_private *id_priv; @@ -425,11 +475,14 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, if (!id_priv) return ERR_PTR(-ENOMEM); - id_priv->state = CMA_IDLE; + id_priv->owner = curthread->td_proc->p_pid; + id_priv->state = RDMA_CM_IDLE; id_priv->id.context = context; id_priv->id.event_handler = event_handler; id_priv->id.ps = ps; + id_priv->id.qp_type = qp_type; spin_lock_init(&id_priv->lock); + spin_lock_init(&id_priv->cm_lock); mutex_init(&id_priv->qp_mutex); init_completion(&id_priv->comp); atomic_set(&id_priv->refcount, 1); @@ -496,7 +549,7 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, if (IS_ERR(qp)) return PTR_ERR(qp); - if (cma_is_ud_ps(id_priv->id.ps)) + if (id->qp_type == IB_QPT_UD) ret = cma_init_ud_qp(id_priv, qp); else ret = cma_init_conn_qp(id_priv, qp); @@ -530,6 +583,7 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, { struct ib_qp_attr qp_attr; int qp_attr_mask, ret; + union ib_gid sgid; mutex_lock(&id_priv->qp_mutex); if (!id_priv->id.qp) { @@ -551,6 +605,20 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); if (ret) goto out; + ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num, + qp_attr.ah_attr.grh.sgid_index, &sgid); + if (ret) + goto out; + + if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) + == RDMA_TRANSPORT_IB && + rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num) + == IB_LINK_LAYER_ETHERNET) { + ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL); + + if (ret) + goto out; + } if (conn_param) qp_attr.max_dest_rd_atomic = conn_param->responder_resources; @@ -579,6 +647,12 @@ static int cma_modify_qp_rts(struct rdma_id_private *id_priv, if (conn_param) qp_attr.max_rd_atomic = conn_param->initiator_depth; + + if (id_priv->qp_timeout && id_priv->id.qp->qp_type == IB_QPT_RC) { + qp_attr.timeout = id_priv->qp_timeout; + qp_attr_mask |= IB_QP_TIMEOUT; + } + ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); out: mutex_unlock(&id_priv->qp_mutex); @@ -624,7 +698,7 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, qp_attr->port_num = id_priv->id.port_num; *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; - if (cma_is_ud_ps(id_priv->id.ps)) { + if (id_priv->id.qp_type == IB_QPT_UD) { ret = cma_set_qkey(id_priv); if (ret) return ret; @@ -647,7 +721,7 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, id_priv = container_of(id, struct rdma_id_private, id); switch (rdma_node_get_transport(id_priv->id.device->node_type)) { case RDMA_TRANSPORT_IB: - if (!id_priv->cm_id.ib || cma_is_ud_ps(id_priv->id.ps)) + if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD)) ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask); else ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, @@ -656,6 +730,7 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, qp_attr->rq_psn = id_priv->seq_num; break; case RDMA_TRANSPORT_IWARP: + case RDMA_TRANSPORT_SCIF: if (!id_priv->cm_id.iw) { qp_attr->qp_access_flags = 0; *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; @@ -701,6 +776,21 @@ static inline int cma_any_addr(struct sockaddr *addr) return cma_zero_addr(addr) || cma_loopback_addr(addr); } +static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst) +{ + if (src->sa_family != dst->sa_family) + return -1; + + switch (src->sa_family) { + case AF_INET: + return ((struct sockaddr_in *) src)->sin_addr.s_addr != + ((struct sockaddr_in *) dst)->sin_addr.s_addr; + default: + return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr, + &((struct sockaddr_in6 *) dst)->sin6_addr); + } +} + static inline __be16 cma_port(struct sockaddr *addr) { if (addr->sa_family == AF_INET) @@ -831,16 +921,16 @@ static void cma_cancel_listens(struct rdma_id_private *id_priv) } static void cma_cancel_operation(struct rdma_id_private *id_priv, - enum cma_state state) + enum rdma_cm_state state) { switch (state) { - case CMA_ADDR_QUERY: + case RDMA_CM_ADDR_QUERY: rdma_addr_cancel(&id_priv->id.route.addr.dev_addr); break; - case CMA_ROUTE_QUERY: + case RDMA_CM_ROUTE_QUERY: cma_cancel_route(id_priv); break; - case CMA_LISTEN: + case RDMA_CM_LISTEN: if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr) && !id_priv->cma_dev) cma_cancel_listens(id_priv); @@ -852,20 +942,21 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv, static void cma_release_port(struct rdma_id_private *id_priv) { - struct rdma_bind_list *bind_list = id_priv->bind_list; - - if (!bind_list) - return; + struct rdma_bind_list *bind_list; mutex_lock(&lock); + bind_list = id_priv->bind_list; + if (!bind_list) { + mutex_unlock(&lock); + return; + } hlist_del(&id_priv->node); + id_priv->bind_list = NULL; if (hlist_empty(&bind_list->owners)) { idr_remove(bind_list->ps, bind_list->port); kfree(bind_list); } mutex_unlock(&lock); - if (id_priv->sock) - sock_release(id_priv->sock); } static void cma_leave_mc_groups(struct rdma_id_private *id_priv) @@ -889,46 +980,66 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv) } } } +static void __rdma_free(struct work_struct *work) +{ + struct rdma_id_private *id_priv; + id_priv = container_of(work, struct rdma_id_private, work); + + wait_for_completion(&id_priv->comp); + + if (id_priv->internal_id) + cma_deref_id(id_priv->id.context); + + kfree(id_priv->id.route.path_rec); + kfree(id_priv); +} void rdma_destroy_id(struct rdma_cm_id *id) { struct rdma_id_private *id_priv; - enum cma_state state; + enum rdma_cm_state state; + unsigned long flags; + struct ib_cm_id *ib; id_priv = container_of(id, struct rdma_id_private, id); - state = cma_exch(id_priv, CMA_DESTROYING); + state = cma_exch(id_priv, RDMA_CM_DESTROYING); cma_cancel_operation(id_priv, state); - mutex_lock(&lock); + /* + * Wait for any active callback to finish. New callbacks will find + * the id_priv state set to destroying and abort. + */ + mutex_lock(&id_priv->handler_mutex); + mutex_unlock(&id_priv->handler_mutex); + if (id_priv->cma_dev) { - mutex_unlock(&lock); switch (rdma_node_get_transport(id_priv->id.device->node_type)) { case RDMA_TRANSPORT_IB: - if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) - ib_destroy_cm_id(id_priv->cm_id.ib); + spin_lock_irqsave(&id_priv->cm_lock, flags); + if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) { + ib = id_priv->cm_id.ib; + id_priv->cm_id.ib = NULL; + spin_unlock_irqrestore(&id_priv->cm_lock, flags); + ib_destroy_cm_id(ib); + } else + spin_unlock_irqrestore(&id_priv->cm_lock, flags); break; case RDMA_TRANSPORT_IWARP: - if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw)) + case RDMA_TRANSPORT_SCIF: + if (id_priv->cm_id.iw) iw_destroy_cm_id(id_priv->cm_id.iw); break; default: break; } cma_leave_mc_groups(id_priv); - mutex_lock(&lock); - cma_detach_from_dev(id_priv); + cma_release_dev(id_priv); } - mutex_unlock(&lock); cma_release_port(id_priv); cma_deref_id(id_priv); - wait_for_completion(&id_priv->comp); - - if (id_priv->internal_id) - cma_deref_id(id_priv->id.context); - - kfree(id_priv->id.route.path_rec); - kfree(id_priv); + INIT_WORK(&id_priv->work, __rdma_free); + queue_work(cma_free_wq, &id_priv->work); } EXPORT_SYMBOL(rdma_destroy_id); @@ -944,6 +1055,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv) if (ret) goto reject; + cma_dbg(id_priv, "sending RTU\n"); ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0); if (ret) goto reject; @@ -951,6 +1063,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv) return 0; reject: cma_modify_qp_err(id_priv); + cma_dbg(id_priv, "sending REJ\n"); ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, NULL, 0); return ret; @@ -987,11 +1100,10 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) int ret = 0; if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && - cma_disable_callback(id_priv, CMA_CONNECT)) || + cma_disable_callback(id_priv, RDMA_CM_CONNECT)) || (ib_event->event == IB_CM_TIMEWAIT_EXIT && - cma_disable_callback(id_priv, CMA_DISCONNECT))) + cma_disable_callback(id_priv, RDMA_CM_DISCONNECT))) return 0; - memset(&event, 0, sizeof event); switch (ib_event->event) { case IB_CM_REQ_ERROR: @@ -1020,7 +1132,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) event.status = -ETIMEDOUT; /* fall through */ case IB_CM_DREQ_RECEIVED: case IB_CM_DREP_RECEIVED: - if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT)) + if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT, + RDMA_CM_DISCONNECT)) goto out; event.event = RDMA_CM_EVENT_DISCONNECTED; break; @@ -1047,7 +1160,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; - cma_exch(id_priv, CMA_DESTROYING); + cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); return ret; @@ -1070,12 +1183,12 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, if (cma_get_net_info(ib_event->private_data, listen_id->ps, &ip_ver, &port, &src, &dst)) - goto err; + return NULL; id = rdma_create_id(listen_id->event_handler, listen_id->context, - listen_id->ps); + listen_id->ps, ib_event->param.req_rcvd.qp_type); if (IS_ERR(id)) - goto err; + return NULL; cma_save_net_info(&id->route.addr, &listen_id->route.addr, ip_ver, port, src, dst); @@ -1085,7 +1198,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, GFP_KERNEL); if (!rt->path_rec) - goto destroy_id; + goto err; rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path; if (rt->num_paths == 2) @@ -1094,22 +1207,21 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) { rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND; rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); - ib_addr_set_pkey(&rt->addr.dev_addr, rt->path_rec[0].pkey); + ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); } else { ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr, - &rt->addr.dev_addr); + &rt->addr.dev_addr, NULL); if (ret) - goto destroy_id; + goto err; } rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); id_priv = container_of(id, struct rdma_id_private, id); - id_priv->state = CMA_CONNECT; + id_priv->state = RDMA_CM_CONNECT; return id_priv; -destroy_id: - rdma_destroy_id(id); err: + rdma_destroy_id(id); return NULL; } @@ -1124,7 +1236,7 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, int ret; id = rdma_create_id(listen_id->event_handler, listen_id->context, - listen_id->ps); + listen_id->ps, IB_QPT_UD); if (IS_ERR(id)) return NULL; @@ -1138,13 +1250,13 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) { ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr, - &id->route.addr.dev_addr); + &id->route.addr.dev_addr, NULL); if (ret) goto err; } id_priv = container_of(id, struct rdma_id_private, id); - id_priv->state = CMA_CONNECT; + id_priv->state = RDMA_CM_CONNECT; return id_priv; err: rdma_destroy_id(id); @@ -1166,20 +1278,43 @@ static void cma_set_req_event_data(struct rdma_cm_event *event, event->param.conn.qp_num = req_data->remote_qpn; } +static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event) +{ + return (((ib_event->event == IB_CM_REQ_RECEIVED) && + (ib_event->param.req_rcvd.qp_type == id->qp_type)) || + ((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) && + (id->qp_type == IB_QPT_UD)) || + (!id->qp_type)); +} + static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) { struct rdma_id_private *listen_id, *conn_id; struct rdma_cm_event event; int offset, ret; + u8 smac[ETH_ALEN]; + u8 alt_smac[ETH_ALEN]; + u8 *psmac = smac; + u8 *palt_smac = alt_smac; + int is_iboe = ((rdma_node_get_transport(cm_id->device->node_type) == + RDMA_TRANSPORT_IB) && + (rdma_port_get_link_layer(cm_id->device, + ib_event->param.req_rcvd.port) == + IB_LINK_LAYER_ETHERNET)); + int is_sidr = 0; listen_id = cm_id->context; - if (cma_disable_callback(listen_id, CMA_LISTEN)) + if (!cma_check_req_qp_type(&listen_id->id, ib_event)) + return -EINVAL; + + if (cma_disable_callback(listen_id, RDMA_CM_LISTEN)) return -ECONNABORTED; memset(&event, 0, sizeof event); offset = cma_user_data_offset(listen_id->id.ps); event.event = RDMA_CM_EVENT_CONNECT_REQUEST; - if (cma_is_ud_ps(listen_id->id.ps)) { + if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { + is_sidr = 1; conn_id = cma_new_udp_id(&listen_id->id, ib_event); event.param.ud.private_data = ib_event->private_data + offset; event.param.ud.private_data_len = @@ -1191,45 +1326,69 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) } if (!conn_id) { ret = -ENOMEM; - goto out; + goto err1; } mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); - mutex_lock(&lock); ret = cma_acquire_dev(conn_id); - mutex_unlock(&lock); if (ret) - goto release_conn_id; + goto err2; conn_id->cm_id.ib = cm_id; cm_id->context = conn_id; cm_id->cm_handler = cma_ib_handler; + /* + * Protect against the user destroying conn_id from another thread + * until we're done accessing it. + */ + atomic_inc(&conn_id->refcount); ret = conn_id->id.event_handler(&conn_id->id, &event); - if (!ret) { + if (ret) + goto err3; + + if (is_iboe && !is_sidr) { + if (ib_event->param.req_rcvd.primary_path != NULL) + rdma_addr_find_smac_by_sgid( + &ib_event->param.req_rcvd.primary_path->sgid, + psmac, NULL); + else + psmac = NULL; + if (ib_event->param.req_rcvd.alternate_path != NULL) + rdma_addr_find_smac_by_sgid( + &ib_event->param.req_rcvd.alternate_path->sgid, + palt_smac, NULL); + else + palt_smac = NULL; + } /* * Acquire mutex to prevent user executing rdma_destroy_id() * while we're accessing the cm_id. */ mutex_lock(&lock); - if (cma_comp(conn_id, CMA_CONNECT) && - !cma_is_ud_ps(conn_id->id.ps)) + if (is_iboe && !is_sidr) + ib_update_cm_av(cm_id, psmac, palt_smac); + if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD)) { + cma_dbg(container_of(&conn_id->id, struct rdma_id_private, id), "sending MRA\n"); ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); + } mutex_unlock(&lock); mutex_unlock(&conn_id->handler_mutex); - goto out; - } + mutex_unlock(&listen_id->handler_mutex); + cma_deref_id(conn_id); + return 0; +err3: + cma_deref_id(conn_id); /* Destroy the CM ID by returning a non-zero value. */ conn_id->cm_id.ib = NULL; - -release_conn_id: - cma_exch(conn_id, CMA_DESTROYING); +err2: + cma_exch(conn_id, RDMA_CM_DESTROYING); mutex_unlock(&conn_id->handler_mutex); - rdma_destroy_id(&conn_id->id); - -out: +err1: mutex_unlock(&listen_id->handler_mutex); + if (conn_id) + rdma_destroy_id(&conn_id->id); return ret; } @@ -1244,9 +1403,7 @@ static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr, struct cma_hdr *cma_data, *cma_mask; struct sdp_hh *sdp_data, *sdp_mask; __be32 ip4_addr; -#ifdef INET6 struct in6_addr ip6_addr; -#endif memset(compare, 0, sizeof *compare); cma_data = (void *) compare->data; @@ -1260,33 +1417,39 @@ static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr, if (ps == RDMA_PS_SDP) { sdp_set_ip_ver(sdp_data, 4); sdp_set_ip_ver(sdp_mask, 0xF); - sdp_data->dst_addr.ip4.addr = ip4_addr; - sdp_mask->dst_addr.ip4.addr = htonl(~0); + if (!cma_any_addr(addr)) { + sdp_data->dst_addr.ip4.addr = ip4_addr; + sdp_mask->dst_addr.ip4.addr = htonl(~0); + } } else { cma_set_ip_ver(cma_data, 4); cma_set_ip_ver(cma_mask, 0xF); - cma_data->dst_addr.ip4.addr = ip4_addr; - cma_mask->dst_addr.ip4.addr = htonl(~0); + if (!cma_any_addr(addr)) { + cma_data->dst_addr.ip4.addr = ip4_addr; + cma_mask->dst_addr.ip4.addr = htonl(~0); + } } break; -#ifdef INET6 case AF_INET6: ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr; if (ps == RDMA_PS_SDP) { sdp_set_ip_ver(sdp_data, 6); sdp_set_ip_ver(sdp_mask, 0xF); - sdp_data->dst_addr.ip6 = ip6_addr; - memset(&sdp_mask->dst_addr.ip6, 0xFF, - sizeof sdp_mask->dst_addr.ip6); + if (!cma_any_addr(addr)) { + sdp_data->dst_addr.ip6 = ip6_addr; + memset(&sdp_mask->dst_addr.ip6, 0xFF, + sizeof(sdp_mask->dst_addr.ip6)); + } } else { cma_set_ip_ver(cma_data, 6); cma_set_ip_ver(cma_mask, 0xF); - cma_data->dst_addr.ip6 = ip6_addr; - memset(&cma_mask->dst_addr.ip6, 0xFF, - sizeof cma_mask->dst_addr.ip6); + if (!cma_any_addr(addr)) { + cma_data->dst_addr.ip6 = ip6_addr; + memset(&cma_mask->dst_addr.ip6, 0xFF, + sizeof(cma_mask->dst_addr.ip6)); + } } break; -#endif default: break; } @@ -1299,7 +1462,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) struct sockaddr_in *sin; int ret = 0; - if (cma_disable_callback(id_priv, CMA_CONNECT)) + if (cma_disable_callback(id_priv, RDMA_CM_CONNECT)) return 0; memset(&event, 0, sizeof event); @@ -1315,6 +1478,8 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) switch ((int)iw_event->status) { case 0: event.event = RDMA_CM_EVENT_ESTABLISHED; + event.param.conn.initiator_depth = iw_event->ird; + event.param.conn.responder_resources = iw_event->ord; break; case -ECONNRESET: case -ECONNREFUSED: @@ -1330,6 +1495,8 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) break; case IW_CM_EVENT_ESTABLISHED: event.event = RDMA_CM_EVENT_ESTABLISHED; + event.param.conn.initiator_depth = iw_event->ird; + event.param.conn.responder_resources = iw_event->ord; break; default: BUG_ON(1); @@ -1342,7 +1509,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.iw = NULL; - cma_exch(id_priv, CMA_DESTROYING); + cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); return ret; @@ -1364,22 +1531,22 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, struct ib_device_attr attr; listen_id = cm_id->context; - if (cma_disable_callback(listen_id, CMA_LISTEN)) + if (cma_disable_callback(listen_id, RDMA_CM_LISTEN)) return -ECONNABORTED; /* Create a new RDMA id for the new IW CM ID */ new_cm_id = rdma_create_id(listen_id->id.event_handler, listen_id->id.context, - RDMA_PS_TCP); + RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(new_cm_id)) { ret = -ENOMEM; goto out; } conn_id = container_of(new_cm_id, struct rdma_id_private, id); mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); - conn_id->state = CMA_CONNECT; + conn_id->state = RDMA_CM_CONNECT; - dev = ip_dev_find(NULL, iw_event->local_addr.sin_addr.s_addr); + dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr); if (!dev) { ret = -EADDRNOTAVAIL; mutex_unlock(&conn_id->handler_mutex); @@ -1393,9 +1560,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, goto out; } - mutex_lock(&lock); ret = cma_acquire_dev(conn_id); - mutex_unlock(&lock); if (ret) { mutex_unlock(&conn_id->handler_mutex); rdma_destroy_id(new_cm_id); @@ -1422,19 +1587,27 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, event.event = RDMA_CM_EVENT_CONNECT_REQUEST; event.param.conn.private_data = iw_event->private_data; event.param.conn.private_data_len = iw_event->private_data_len; - event.param.conn.initiator_depth = attr.max_qp_init_rd_atom; - event.param.conn.responder_resources = attr.max_qp_rd_atom; + event.param.conn.initiator_depth = iw_event->ird; + event.param.conn.responder_resources = iw_event->ord; + + /* + * Protect against the user destroying conn_id from another thread + * until we're done accessing it. + */ + atomic_inc(&conn_id->refcount); ret = conn_id->id.event_handler(&conn_id->id, &event); if (ret) { /* User wants to destroy the CM ID */ conn_id->cm_id.iw = NULL; - cma_exch(conn_id, CMA_DESTROYING); + cma_exch(conn_id, RDMA_CM_DESTROYING); mutex_unlock(&conn_id->handler_mutex); + cma_deref_id(conn_id); rdma_destroy_id(&conn_id->id); goto out; } mutex_unlock(&conn_id->handler_mutex); + cma_deref_id(conn_id); out: if (dev) @@ -1447,17 +1620,19 @@ static int cma_ib_listen(struct rdma_id_private *id_priv) { struct ib_cm_compare_data compare_data; struct sockaddr *addr; + struct ib_cm_id *id; __be64 svc_id; int ret; - id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler, - id_priv); - if (IS_ERR(id_priv->cm_id.ib)) - return PTR_ERR(id_priv->cm_id.ib); + id = ib_create_cm_id(id_priv->id.device, cma_req_handler, id_priv); + if (IS_ERR(id)) + return PTR_ERR(id); + + id_priv->cm_id.ib = id; addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr; svc_id = cma_get_service_id(id_priv->id.ps, addr); - if (cma_any_addr(addr)) + if (cma_any_addr(addr) && !id_priv->afonly) ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL); else { cma_set_compare_data(id_priv->id.ps, addr, &compare_data); @@ -1476,13 +1651,16 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) { int ret; struct sockaddr_in *sin; + struct iw_cm_id *id; - id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device, + id = iw_create_cm_id(id_priv->id.device, id_priv->sock, iw_conn_req_handler, id_priv); - if (IS_ERR(id_priv->cm_id.iw)) - return PTR_ERR(id_priv->cm_id.iw); + if (IS_ERR(id)) + return PTR_ERR(id); + + id_priv->cm_id.iw = id; sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; id_priv->cm_id.iw->local_addr = *sin; @@ -1514,13 +1692,14 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, struct rdma_cm_id *id; int ret; - id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps); + id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps, + id_priv->id.qp_type); if (IS_ERR(id)) return; dev_id_priv = container_of(id, struct rdma_id_private, id); - dev_id_priv->state = CMA_ADDR_BOUND; + dev_id_priv->state = RDMA_CM_ADDR_BOUND; memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr, ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr)); @@ -1528,11 +1707,11 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); atomic_inc(&id_priv->refcount); dev_id_priv->internal_id = 1; + dev_id_priv->afonly = id_priv->afonly; ret = rdma_listen(id, id_priv->backlog); if (ret) - printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, " - "listening on device %s\n", ret, cma_dev->device->name); + cma_warn(id_priv, "cma_listen_on_dev, error %d, listening on device %s\n", ret, cma_dev->device->name); } static void cma_listen_on_all(struct rdma_id_private *id_priv) @@ -1546,58 +1725,23 @@ static void cma_listen_on_all(struct rdma_id_private *id_priv) mutex_unlock(&lock); } -int rdma_listen(struct rdma_cm_id *id, int backlog) +void rdma_set_service_type(struct rdma_cm_id *id, int tos) { struct rdma_id_private *id_priv; - int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (id_priv->state == CMA_IDLE) { - ((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET; - ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr); - if (ret) - return ret; - } - - if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN)) - return -EINVAL; - - id_priv->backlog = backlog; - if (id->device) { - switch (rdma_node_get_transport(id->device->node_type)) { - case RDMA_TRANSPORT_IB: - ret = cma_ib_listen(id_priv); - if (ret) - goto err; - break; - case RDMA_TRANSPORT_IWARP: - ret = cma_iw_listen(id_priv, backlog); - if (ret) - goto err; - break; - default: - ret = -ENOSYS; - goto err; - } - } else - cma_listen_on_all(id_priv); - - return 0; -err: - id_priv->backlog = 0; - cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND); - return ret; + id_priv->tos = (u8) tos; } -EXPORT_SYMBOL(rdma_listen); +EXPORT_SYMBOL(rdma_set_service_type); -void rdma_set_service_type(struct rdma_cm_id *id, int tos) +void rdma_set_timeout(struct rdma_cm_id *id, int timeout) { struct rdma_id_private *id_priv; id_priv = container_of(id, struct rdma_id_private, id); - id_priv->tos = (u8) tos; + id_priv->qp_timeout = (u8) timeout; } -EXPORT_SYMBOL(rdma_set_service_type); +EXPORT_SYMBOL(rdma_set_timeout); static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec, void *context) @@ -1611,8 +1755,8 @@ static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec, route->num_paths = 1; *route->path_rec = *path_rec; } else { - work->old_state = CMA_ROUTE_QUERY; - work->new_state = CMA_ADDR_RESOLVED; + work->old_state = RDMA_CM_ROUTE_QUERY; + work->new_state = RDMA_CM_ADDR_RESOLVED; work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; work->event.status = status; } @@ -1650,11 +1794,6 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; } - if (tavor_quirk) { - path_rec.mtu_selector = IB_SA_LT; - path_rec.mtu = IB_MTU_2048; - } - id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, id_priv->id.port_num, &path_rec, comp_mask, timeout_ms, @@ -1675,7 +1814,7 @@ static void cma_work_handler(struct work_struct *_work) goto out; if (id_priv->id.event_handler(&id_priv->id, &work->event)) { - cma_exch(id_priv, CMA_DESTROYING); + cma_exch(id_priv, RDMA_CM_DESTROYING); destroy = 1; } out: @@ -1693,12 +1832,12 @@ static void cma_ndev_work_handler(struct work_struct *_work) int destroy = 0; mutex_lock(&id_priv->handler_mutex); - if (id_priv->state == CMA_DESTROYING || - id_priv->state == CMA_DEVICE_REMOVAL) + if (id_priv->state == RDMA_CM_DESTROYING || + id_priv->state == RDMA_CM_DEVICE_REMOVAL) goto out; if (id_priv->id.event_handler(&id_priv->id, &work->event)) { - cma_exch(id_priv, CMA_DESTROYING); + cma_exch(id_priv, RDMA_CM_DESTROYING); destroy = 1; } @@ -1722,8 +1861,8 @@ static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms) work->id = id_priv; INIT_WORK(&work->work, cma_work_handler); - work->old_state = CMA_ROUTE_QUERY; - work->new_state = CMA_ROUTE_RESOLVED; + work->old_state = RDMA_CM_ROUTE_QUERY; + work->new_state = RDMA_CM_ROUTE_RESOLVED; work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL); @@ -1752,19 +1891,21 @@ int rdma_set_ib_paths(struct rdma_cm_id *id, int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_RESOLVED)) + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, + RDMA_CM_ROUTE_RESOLVED)) return -EINVAL; - id->route.path_rec = kmalloc(sizeof *path_rec * num_paths, GFP_KERNEL); + id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths, + GFP_KERNEL); if (!id->route.path_rec) { ret = -ENOMEM; goto err; } - memcpy(id->route.path_rec, path_rec, sizeof *path_rec * num_paths); + id->route.num_paths = num_paths; return 0; err: - cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED); + cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED); return ret; } EXPORT_SYMBOL(rdma_set_ib_paths); @@ -1779,8 +1920,8 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) work->id = id_priv; INIT_WORK(&work->work, cma_work_handler); - work->old_state = CMA_ROUTE_QUERY; - work->new_state = CMA_ROUTE_RESOLVED; + work->old_state = RDMA_CM_ROUTE_QUERY; + work->new_state = RDMA_CM_ROUTE_RESOLVED; work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; queue_work(cma_wq, &work->work); return 0; @@ -1800,7 +1941,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) struct sockaddr_in *src_addr = (struct sockaddr_in *)&route->addr.src_addr; struct sockaddr_in *dst_addr = (struct sockaddr_in *)&route->addr.dst_addr; struct net_device *ndev = NULL; - u16 vid; + if (src_addr->sin_family != dst_addr->sin_family) return -EINVAL; @@ -1827,10 +1968,15 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) goto err2; } - vid = rdma_vlan_dev_vlan_id(ndev); + route->path_rec->vlan_id = rdma_vlan_dev_vlan_id(ndev); + memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN); + memcpy(route->path_rec->smac, IF_LLADDR(ndev), ndev->if_addrlen); + - iboe_mac_vlan_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr, vid); - iboe_mac_vlan_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr, vid); + rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, + &route->path_rec->sgid); + rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr, + &route->path_rec->dgid); route->path_rec->hop_limit = 1; route->path_rec->reversible = 1; @@ -1838,23 +1984,19 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) route->path_rec->mtu_selector = IB_SA_EQ; route->path_rec->sl = tos_to_sl(id_priv->tos); -#ifdef __linux__ - route->path_rec->mtu = iboe_get_mtu(ndev->mtu); -#else route->path_rec->mtu = iboe_get_mtu(ndev->if_mtu); -#endif route->path_rec->rate_selector = IB_SA_EQ; route->path_rec->rate = iboe_get_rate(ndev); dev_put(ndev); route->path_rec->packet_life_time_selector = IB_SA_EQ; - route->path_rec->packet_life_time = IBOE_PACKET_LIFETIME; + route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME; if (!route->path_rec->mtu) { ret = -EINVAL; goto err2; } - work->old_state = CMA_ROUTE_QUERY; - work->new_state = CMA_ROUTE_RESOLVED; + work->old_state = RDMA_CM_ROUTE_QUERY; + work->new_state = RDMA_CM_ROUTE_RESOLVED; work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; work->event.status = 0; @@ -1876,7 +2018,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_QUERY)) + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY)) return -EINVAL; atomic_inc(&id_priv->refcount); @@ -1894,6 +2036,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) } break; case RDMA_TRANSPORT_IWARP: + case RDMA_TRANSPORT_SCIF: ret = cma_resolve_iw_route(id_priv, timeout_ms); break; default: @@ -1905,12 +2048,19 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) return 0; err: - cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ADDR_RESOLVED); + cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED); cma_deref_id(id_priv); return ret; } EXPORT_SYMBOL(rdma_resolve_route); +int rdma_enable_apm(struct rdma_cm_id *id, enum alt_path_type alt_type) +{ + /* APM is not supported yet */ + return -EINVAL; +} +EXPORT_SYMBOL(rdma_enable_apm); + static int cma_bind_loopback(struct rdma_id_private *id_priv) { struct cma_device *cma_dev; @@ -1964,34 +2114,26 @@ static void addr_handler(int status, struct sockaddr *src_addr, memset(&event, 0, sizeof event); mutex_lock(&id_priv->handler_mutex); - - /* - * Grab mutex to block rdma_destroy_id() from removing the device while - * we're trying to acquire it. - */ - mutex_lock(&lock); - if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) { - mutex_unlock(&lock); + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, + RDMA_CM_ADDR_RESOLVED)) goto out; - } + memcpy(&id_priv->id.route.addr.src_addr, src_addr, + ip_addr_size(src_addr)); if (!status && !id_priv->cma_dev) status = cma_acquire_dev(id_priv); - mutex_unlock(&lock); if (status) { - if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND)) + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, + RDMA_CM_ADDR_BOUND)) goto out; event.event = RDMA_CM_EVENT_ADDR_ERROR; event.status = status; - } else { - memcpy(&id_priv->id.route.addr.src_addr, src_addr, - ip_addr_size(src_addr)); + } else event.event = RDMA_CM_EVENT_ADDR_RESOLVED; - } if (id_priv->id.event_handler(&id_priv->id, &event)) { - cma_exch(id_priv, CMA_DESTROYING); + cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); cma_deref_id(id_priv); rdma_destroy_id(&id_priv->id); @@ -2026,18 +2168,18 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv) if (cma_zero_addr(src)) { dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr; if ((src->sa_family = dst->sa_family) == AF_INET) { - ((struct sockaddr_in *) src)->sin_addr.s_addr = - ((struct sockaddr_in *) dst)->sin_addr.s_addr; + ((struct sockaddr_in *)src)->sin_addr = + ((struct sockaddr_in *)dst)->sin_addr; } else { - ipv6_addr_copy(&((struct sockaddr_in6 *) src)->sin6_addr, - &((struct sockaddr_in6 *) dst)->sin6_addr); + ((struct sockaddr_in6 *)src)->sin6_addr = + ((struct sockaddr_in6 *)dst)->sin6_addr; } } work->id = id_priv; INIT_WORK(&work->work, cma_work_handler); - work->old_state = CMA_ADDR_QUERY; - work->new_state = CMA_ADDR_RESOLVED; + work->old_state = RDMA_CM_ADDR_QUERY; + work->new_state = RDMA_CM_ADDR_RESOLVED; work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; queue_work(cma_wq, &work->work); return 0; @@ -2046,6 +2188,25 @@ err: return ret; } +static int cma_resolve_scif(struct rdma_id_private *id_priv) +{ + struct cma_work *work; + + work = kzalloc(sizeof *work, GFP_KERNEL); + if (!work) + return -ENOMEM; + + /* we probably can leave it empty here */ + + work->id = id_priv; + INIT_WORK(&work->work, cma_work_handler); + work->old_state = RDMA_CM_ADDR_QUERY; + work->new_state = RDMA_CM_ADDR_RESOLVED; + work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; + queue_work(cma_wq, &work->work); + return 0; +} + static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, struct sockaddr *dst_addr) { @@ -2061,11 +2222,12 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, else { struct sockaddr_in addr_in; - memset(&addr_in, 0, sizeof addr_in); - addr_in.sin_family = dst_addr->sa_family; - addr_in.sin_len = sizeof addr_in; - return rdma_bind_addr(id, (struct sockaddr *) &addr_in); + memset(&addr_in, 0, sizeof addr_in); + addr_in.sin_family = dst_addr->sa_family; + addr_in.sin_len = sizeof addr_in; + return rdma_bind_addr(id, (struct sockaddr *) &addr_in); } + } int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, @@ -2075,19 +2237,22 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (id_priv->state == CMA_IDLE) { + if (id_priv->state == RDMA_CM_IDLE) { ret = cma_bind_addr(id, src_addr, dst_addr); if (ret) return ret; } - if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_ADDR_QUERY)) + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) return -EINVAL; atomic_inc(&id_priv->refcount); memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr)); if (cma_any_addr(dst_addr)) ret = cma_resolve_loopback(id_priv); + else if (id_priv->id.device && + rdma_node_get_transport(id_priv->id.device->node_type) == RDMA_TRANSPORT_SCIF) + ret = cma_resolve_scif(id_priv); else ret = rdma_resolve_ip(&addr_client, (struct sockaddr *) &id->route.addr.src_addr, dst_addr, &id->route.addr.dev_addr, @@ -2097,12 +2262,51 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, return 0; err: - cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND); + cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); cma_deref_id(id_priv); return ret; } EXPORT_SYMBOL(rdma_resolve_addr); +int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) +{ + struct rdma_id_private *id_priv; + unsigned long flags; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + spin_lock_irqsave(&id_priv->lock, flags); + if (id_priv->state == RDMA_CM_IDLE) { + id_priv->reuseaddr = reuse; + ret = 0; + } else { + ret = -EINVAL; + } + spin_unlock_irqrestore(&id_priv->lock, flags); + return ret; +} +EXPORT_SYMBOL(rdma_set_reuseaddr); + +int rdma_set_afonly(struct rdma_cm_id *id, int afonly) +{ + struct rdma_id_private *id_priv; + unsigned long flags; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + spin_lock_irqsave(&id_priv->lock, flags); + if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) { + id_priv->options |= (1 << CMA_OPTION_AFONLY); + id_priv->afonly = afonly; + ret = 0; + } else { + ret = -EINVAL; + } + spin_unlock_irqrestore(&id_priv->lock, flags); + return ret; +} +EXPORT_SYMBOL(rdma_set_afonly); + static void cma_bind_port(struct rdma_bind_list *bind_list, struct rdma_id_private *id_priv) { @@ -2149,126 +2353,100 @@ err1: static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv) { -#if defined(INET) - struct rdma_bind_list *bind_list; - int port, ret, low, high; - - bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); - if (!bind_list) - return -ENOMEM; - -retry: - /* FIXME: add proper port randomization per like inet_csk_get_port */ - do { - ret = idr_get_new_above(ps, bind_list, next_port, &port); - } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL)); - - if (ret) - goto err1; + static unsigned int last_used_port; + int low, high, remaining; + unsigned int rover; inet_get_local_port_range(&low, &high); - if (port > high) { - if (next_port != low) { - idr_remove(ps, port); - next_port = low; - goto retry; + remaining = (high - low) + 1; + rover = random() % remaining + low; +retry: + if (last_used_port != rover && + !idr_find(ps, (unsigned short) rover)) { + int ret = cma_alloc_port(ps, id_priv, rover); + /* + * Remember previously used port number in order to avoid + * re-using same port immediately after it is closed. + */ + if (!ret) + last_used_port = rover; + if (ret != -EADDRNOTAVAIL) + return ret; } - ret = -EADDRNOTAVAIL; - goto err2; + if (--remaining) { + rover++; + if ((rover < low) || (rover > high)) + rover = low; + goto retry; } - - if (port == high) - next_port = low; - else - next_port = port + 1; - - bind_list->ps = ps; - bind_list->port = (unsigned short) port; - cma_bind_port(bind_list, id_priv); - return 0; -err2: - idr_remove(ps, port); -err1: - kfree(bind_list); - return ret; -#else - return -ENOSPC; -#endif + return -EADDRNOTAVAIL; } -static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv) +/* + * Check that the requested port is available. This is called when trying to + * bind to a specific port, or when trying to listen on a bound port. In + * the latter case, the provided id_priv may already be on the bind_list, but + * we still need to check that it's okay to start listening. + */ +static int cma_check_port(struct rdma_bind_list *bind_list, + struct rdma_id_private *id_priv, uint8_t reuseaddr) { struct rdma_id_private *cur_id; - struct sockaddr_in *sin, *cur_sin; - struct rdma_bind_list *bind_list; + struct sockaddr *addr, *cur_addr; struct hlist_node *node; - unsigned short snum; - sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; - snum = ntohs(sin->sin_port); -#ifdef __linux__ - if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) - return -EACCES; -#endif + addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr; + hlist_for_each_entry(cur_id, node, &bind_list->owners, node) { + if (id_priv == cur_id) + continue; - bind_list = idr_find(ps, snum); - if (!bind_list) - return cma_alloc_port(ps, id_priv, snum); + if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr && + cur_id->reuseaddr) + continue; - /* - * We don't support binding to any address if anyone is bound to - * a specific address on the same port. - */ - if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)) - return -EADDRNOTAVAIL; + cur_addr = (struct sockaddr *) &cur_id->id.route.addr.src_addr; + if (id_priv->afonly && cur_id->afonly && + (addr->sa_family != cur_addr->sa_family)) + continue; - hlist_for_each_entry(cur_id, node, &bind_list->owners, node) { - if (cma_any_addr((struct sockaddr *) &cur_id->id.route.addr.src_addr)) + if (cma_any_addr(addr) || cma_any_addr(cur_addr)) return -EADDRNOTAVAIL; - cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr; - if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr) + if (!cma_addr_cmp(addr, cur_addr)) return -EADDRINUSE; } - - cma_bind_port(bind_list, id_priv); return 0; } -static int cma_get_tcp_port(struct rdma_id_private *id_priv) +static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv) { + struct rdma_bind_list *bind_list; + unsigned short snum; int ret; - int size; - struct socket *sock; - ret = sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); - if (ret) - return ret; -#ifdef __linux__ - ret = sock->ops->bind(sock, - (struct sockaddr *) &id_priv->id.route.addr.src_addr, - ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr)); -#else - ret = -sobind(sock, - (struct sockaddr *)&id_priv->id.route.addr.src_addr, - curthread); -#endif - if (ret) { - sock_release(sock); - return ret; - } + snum = ntohs(cma_port((struct sockaddr *) &id_priv->id.route.addr.src_addr)); - size = ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr); - ret = sock_getname(sock, - (struct sockaddr *) &id_priv->id.route.addr.src_addr, - &size, 0); - if (ret) { - sock_release(sock); - return ret; + bind_list = idr_find(ps, snum); + if (!bind_list) { + ret = cma_alloc_port(ps, id_priv, snum); + } else { + ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr); + if (!ret) + cma_bind_port(bind_list, id_priv); } + return ret; +} - id_priv->sock = sock; - return 0; +static int cma_bind_listen(struct rdma_id_private *id_priv) +{ + struct rdma_bind_list *bind_list = id_priv->bind_list; + int ret = 0; + + mutex_lock(&lock); + if (bind_list->owners.first->next) + ret = cma_check_port(bind_list, id_priv, 0); + mutex_unlock(&lock); + return ret; } static int cma_get_port(struct rdma_id_private *id_priv) @@ -2282,11 +2460,6 @@ static int cma_get_port(struct rdma_id_private *id_priv) break; case RDMA_PS_TCP: ps = &tcp_ps; - if (unify_tcp_port_space) { - ret = cma_get_tcp_port(id_priv); - if (ret) - goto out; - } break; case RDMA_PS_UDP: ps = &udp_ps; @@ -2294,6 +2467,9 @@ static int cma_get_port(struct rdma_id_private *id_priv) case RDMA_PS_IPOIB: ps = &ipoib_ps; break; + case RDMA_PS_IB: + ps = &ib_ps; + break; default: return -EPROTONOSUPPORT; } @@ -2304,7 +2480,7 @@ static int cma_get_port(struct rdma_id_private *id_priv) else ret = cma_use_port(ps, id_priv); mutex_unlock(&lock); -out: + return ret; } @@ -2318,11 +2494,7 @@ static int cma_check_linklocal(struct rdma_dev_addr *dev_addr, return 0; sin6 = (struct sockaddr_in6 *) addr; -#ifdef __linux__ - if ((ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) && -#else if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) && -#endif !sin6->sin6_scope_id) return -EINVAL; @@ -2331,48 +2503,105 @@ static int cma_check_linklocal(struct rdma_dev_addr *dev_addr, return 0; } +int rdma_listen(struct rdma_cm_id *id, int backlog) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (id_priv->state == RDMA_CM_IDLE) { + ((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET; + ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr); + if (ret) + return ret; + } + + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) + return -EINVAL; + + if (id_priv->reuseaddr) { + ret = cma_bind_listen(id_priv); + if (ret) + goto err; + } + + id_priv->backlog = backlog; + if (id->device) { + switch (rdma_node_get_transport(id->device->node_type)) { + case RDMA_TRANSPORT_IB: + ret = cma_ib_listen(id_priv); + if (ret) + goto err; + break; + case RDMA_TRANSPORT_IWARP: + case RDMA_TRANSPORT_SCIF: + ret = cma_iw_listen(id_priv, backlog); + if (ret) + goto err; + break; + default: + ret = -ENOSYS; + goto err; + } + } else + cma_listen_on_all(id_priv); + + return 0; +err: + id_priv->backlog = 0; + cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND); + return ret; +} +EXPORT_SYMBOL(rdma_listen); + int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) { struct rdma_id_private *id_priv; int ret; + int ipv6only; + size_t var_size = sizeof(int); if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6) return -EAFNOSUPPORT; id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND)) + if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND)) return -EINVAL; ret = cma_check_linklocal(&id->route.addr.dev_addr, addr); if (ret) goto err1; + memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr)); if (!cma_any_addr(addr)) { - ret = rdma_translate_ip(addr, &id->route.addr.dev_addr); + ret = rdma_translate_ip(addr, &id->route.addr.dev_addr, NULL); if (ret) goto err1; - mutex_lock(&lock); ret = cma_acquire_dev(id_priv); - mutex_unlock(&lock); if (ret) goto err1; } - memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr)); + if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) { + if (addr->sa_family == AF_INET) + id_priv->afonly = 1; +#if defined(INET6) + else if (addr->sa_family == AF_INET6) + id_priv->afonly = kernel_sysctlbyname(&thread0, "net.inet6.ip6.v6only", + &ipv6only, &var_size, NULL, 0, NULL, 0); +#endif + } ret = cma_get_port(id_priv); if (ret) goto err2; return 0; err2: - if (id_priv->cma_dev) { - mutex_lock(&lock); - cma_detach_from_dev(id_priv); - mutex_unlock(&lock); - } + if (id_priv->cma_dev) + cma_release_dev(id_priv); err1: - cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE); + cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); return ret; } EXPORT_SYMBOL(rdma_bind_addr); @@ -2445,7 +2674,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; int ret = 0; - if (cma_disable_callback(id_priv, CMA_CONNECT)) + if (cma_disable_callback(id_priv, RDMA_CM_CONNECT)) return 0; memset(&event, 0, sizeof event); @@ -2491,7 +2720,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; - cma_exch(id_priv, CMA_DESTROYING); + cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); return ret; @@ -2506,10 +2735,14 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, { struct ib_cm_sidr_req_param req; struct rdma_route *route; + struct ib_cm_id *id; int ret; req.private_data_len = sizeof(struct cma_hdr) + conn_param->private_data_len; + if (req.private_data_len < conn_param->private_data_len) + return -EINVAL; + req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC); if (!req.private_data) return -ENOMEM; @@ -2523,12 +2756,13 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, if (ret) goto out; - id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, - cma_sidr_rep_handler, id_priv); - if (IS_ERR(id_priv->cm_id.ib)) { - ret = PTR_ERR(id_priv->cm_id.ib); + id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler, + id_priv); + if (IS_ERR(id)) { + ret = PTR_ERR(id); goto out; } + id_priv->cm_id.ib = id; req.path = route->path_rec; req.service_id = cma_get_service_id(id_priv->id.ps, @@ -2536,6 +2770,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, req.timeout_ms = 1 << (cma_response_timeout - 8); req.max_cm_retries = CMA_MAX_CM_RETRIES; + cma_dbg(id_priv, "sending SIDR\n"); ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req); if (ret) { ib_destroy_cm_id(id_priv->cm_id.ib); @@ -2552,11 +2787,15 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, struct ib_cm_req_param req; struct rdma_route *route; void *private_data; + struct ib_cm_id *id; int offset, ret; memset(&req, 0, sizeof req); offset = cma_user_data_offset(id_priv->id.ps); req.private_data_len = offset + conn_param->private_data_len; + if (req.private_data_len < conn_param->private_data_len) + return -EINVAL; + private_data = kzalloc(req.private_data_len, GFP_ATOMIC); if (!private_data) return -ENOMEM; @@ -2565,12 +2804,12 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, memcpy(private_data + offset, conn_param->private_data, conn_param->private_data_len); - id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_ib_handler, - id_priv); - if (IS_ERR(id_priv->cm_id.ib)) { - ret = PTR_ERR(id_priv->cm_id.ib); + id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv); + if (IS_ERR(id)) { + ret = PTR_ERR(id); goto out; } + id_priv->cm_id.ib = id; route = &id_priv->id.route; ret = cma_format_hdr(private_data, id_priv->id.ps, route); @@ -2585,22 +2824,23 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, req.service_id = cma_get_service_id(id_priv->id.ps, (struct sockaddr *) &route->addr.dst_addr); req.qp_num = id_priv->qp_num; - req.qp_type = IB_QPT_RC; + req.qp_type = id_priv->id.qp_type; req.starting_psn = id_priv->seq_num; req.responder_resources = conn_param->responder_resources; req.initiator_depth = conn_param->initiator_depth; req.flow_control = conn_param->flow_control; - req.retry_count = conn_param->retry_count; - req.rnr_retry_count = conn_param->rnr_retry_count; + req.retry_count = min_t(u8, 7, conn_param->retry_count); + req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); req.remote_cm_response_timeout = cma_response_timeout; req.local_cm_response_timeout = cma_response_timeout; req.max_cm_retries = CMA_MAX_CM_RETRIES; req.srq = id_priv->srq ? 1 : 0; + cma_dbg(id_priv, "sending REQ\n"); ret = ib_send_cm_req(id_priv->cm_id.ib, &req); out: - if (ret && !IS_ERR(id_priv->cm_id.ib)) { - ib_destroy_cm_id(id_priv->cm_id.ib); + if (ret && !IS_ERR(id)) { + ib_destroy_cm_id(id); id_priv->cm_id.ib = NULL; } @@ -2617,11 +2857,9 @@ static int cma_connect_iw(struct rdma_id_private *id_priv, struct iw_cm_conn_param iw_param; cm_id = iw_create_cm_id(id_priv->id.device, id_priv->sock, - cma_iw_handler, id_priv); - if (IS_ERR(cm_id)) { - ret = PTR_ERR(cm_id); - goto out; - } + cma_iw_handler, id_priv); + if (IS_ERR(cm_id)) + return PTR_ERR(cm_id); id_priv->cm_id.iw = cm_id; @@ -2635,17 +2873,19 @@ static int cma_connect_iw(struct rdma_id_private *id_priv, if (ret) goto out; + if (conn_param) { iw_param.ord = conn_param->initiator_depth; iw_param.ird = conn_param->responder_resources; iw_param.private_data = conn_param->private_data; iw_param.private_data_len = conn_param->private_data_len; - if (id_priv->id.qp) + iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num; + } else { + memset(&iw_param, 0, sizeof iw_param); iw_param.qpn = id_priv->qp_num; - else - iw_param.qpn = conn_param->qp_num; + } ret = iw_cm_connect(cm_id, &iw_param); out: - if (ret && !IS_ERR(cm_id)) { + if (ret) { iw_destroy_cm_id(cm_id); id_priv->cm_id.iw = NULL; } @@ -2658,7 +2898,7 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_CONNECT)) + if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT)) return -EINVAL; if (!id->qp) { @@ -2668,12 +2908,13 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: - if (cma_is_ud_ps(id->ps)) + if (id->qp_type == IB_QPT_UD) ret = cma_resolve_ib_udp(id_priv, conn_param); else ret = cma_connect_ib(id_priv, conn_param); break; case RDMA_TRANSPORT_IWARP: + case RDMA_TRANSPORT_SCIF: ret = cma_connect_iw(id_priv, conn_param); break; default: @@ -2685,7 +2926,7 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) return 0; err: - cma_comp_exch(id_priv, CMA_CONNECT, CMA_ROUTE_RESOLVED); + cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED); return ret; } EXPORT_SYMBOL(rdma_connect); @@ -2713,9 +2954,9 @@ static int cma_accept_ib(struct rdma_id_private *id_priv, rep.initiator_depth = conn_param->initiator_depth; rep.failover_accepted = 0; rep.flow_control = conn_param->flow_control; - rep.rnr_retry_count = conn_param->rnr_retry_count; + rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); rep.srq = id_priv->srq ? 1 : 0; - + cma_dbg(id_priv, "sending REP\n"); ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep); out: return ret; @@ -2727,6 +2968,9 @@ static int cma_accept_iw(struct rdma_id_private *id_priv, struct iw_cm_conn_param iw_param; int ret; + if (!conn_param) + return -EINVAL; + ret = cma_modify_qp_rtr(id_priv, conn_param); if (ret) return ret; @@ -2762,6 +3006,7 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv, rep.private_data = private_data; rep.private_data_len = private_data_len; + cma_dbg(id_priv, "sending SIDR\n"); return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep); } @@ -2771,7 +3016,9 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp(id_priv, CMA_CONNECT)) + + id_priv->owner = curthread->td_proc->p_pid; + if (!cma_comp(id_priv, RDMA_CM_CONNECT)) return -EINVAL; if (!id->qp && conn_param) { @@ -2781,16 +3028,23 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: - if (cma_is_ud_ps(id->ps)) + if (id->qp_type == IB_QPT_UD) { + if (conn_param) ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, conn_param->private_data, conn_param->private_data_len); - else if (conn_param) + else + ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, + NULL, 0); + } else { + if (conn_param) ret = cma_accept_ib(id_priv, conn_param); else ret = cma_rep_recv(id_priv); + } break; case RDMA_TRANSPORT_IWARP: + case RDMA_TRANSPORT_SCIF: ret = cma_accept_iw(id_priv, conn_param); break; default: @@ -2815,7 +3069,7 @@ int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_has_cm_dev(id_priv)) + if (!id_priv->cm_id.ib) return -EINVAL; switch (id->device->node_type) { @@ -2837,20 +3091,23 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data, int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_has_cm_dev(id_priv)) + if (!id_priv->cm_id.ib) return -EINVAL; switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: - if (cma_is_ud_ps(id->ps)) + if (id->qp_type == IB_QPT_UD) ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, private_data, private_data_len); - else + else { + cma_dbg(id_priv, "sending REJ\n"); ret = ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, private_data, private_data_len); + } break; case RDMA_TRANSPORT_IWARP: + case RDMA_TRANSPORT_SCIF: ret = iw_cm_reject(id_priv->cm_id.iw, private_data, private_data_len); break; @@ -2868,7 +3125,7 @@ int rdma_disconnect(struct rdma_cm_id *id) int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_has_cm_dev(id_priv)) + if (!id_priv->cm_id.ib) return -EINVAL; switch (rdma_node_get_transport(id->device->node_type)) { @@ -2877,10 +3134,14 @@ int rdma_disconnect(struct rdma_cm_id *id) if (ret) goto out; /* Initiate or respond to a disconnect. */ - if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) + cma_dbg(id_priv, "sending DREQ\n"); + if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) { + cma_dbg(id_priv, "sending DREP\n"); ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); + } break; case RDMA_TRANSPORT_IWARP: + case RDMA_TRANSPORT_SCIF: ret = iw_cm_disconnect(id_priv->cm_id.iw, 0); break; default: @@ -2897,35 +3158,55 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) struct rdma_id_private *id_priv; struct cma_multicast *mc = multicast->context; struct rdma_cm_event event; + struct rdma_dev_addr *dev_addr; int ret; + struct net_device *ndev = NULL; + u16 vlan; id_priv = mc->id_priv; - if (cma_disable_callback(id_priv, CMA_ADDR_BOUND) && - cma_disable_callback(id_priv, CMA_ADDR_RESOLVED)) + dev_addr = &id_priv->id.route.addr.dev_addr; + if (cma_disable_callback(id_priv, RDMA_CM_ADDR_BOUND) && + cma_disable_callback(id_priv, RDMA_CM_ADDR_RESOLVED)) return 0; mutex_lock(&id_priv->qp_mutex); if (!status && id_priv->id.qp) status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, - multicast->rec.mlid); + be16_to_cpu(multicast->rec.mlid)); mutex_unlock(&id_priv->qp_mutex); memset(&event, 0, sizeof event); event.status = status; event.param.ud.private_data = mc->context; + ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); + if (!ndev) { + status = -ENODEV; + } else { + vlan = rdma_vlan_dev_vlan_id(ndev); + dev_put(ndev); + } if (!status) { event.event = RDMA_CM_EVENT_MULTICAST_JOIN; ib_init_ah_from_mcmember(id_priv->id.device, id_priv->id.port_num, &multicast->rec, &event.param.ud.ah_attr); + event.param.ud.ah_attr.vlan_id = vlan; event.param.ud.qp_num = 0xFFFFFF; event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey); - } else + } else { event.event = RDMA_CM_EVENT_MULTICAST_ERROR; + /* mark that the cached record is no longer valid */ + if (status != -ENETRESET && status != -EAGAIN) { + spin_lock(&id_priv->lock); + id_priv->is_valid_rec = 0; + spin_unlock(&id_priv->lock); + } + } + ret = id_priv->id.event_handler(&id_priv->id, &event); if (ret) { - cma_exch(id_priv, CMA_DESTROYING); + cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); return 0; @@ -2938,20 +3219,13 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) static void cma_set_mgid(struct rdma_id_private *id_priv, struct sockaddr *addr, union ib_gid *mgid) { -#if defined(INET) || defined(INET6) unsigned char mc_map[MAX_ADDR_LEN]; struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; -#endif -#ifdef INET struct sockaddr_in *sin = (struct sockaddr_in *) addr; -#endif -#ifdef INET6 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr; -#endif if (cma_any_addr(addr)) { memset(mgid, 0, sizeof *mgid); -#ifdef INET6 } else if ((addr->sa_family == AF_INET6) && ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) == 0xFF10A01B)) { @@ -2962,14 +3236,11 @@ static void cma_set_mgid(struct rdma_id_private *id_priv, if (id_priv->id.ps == RDMA_PS_UDP) mc_map[7] = 0x01; /* Use RDMA CM signature */ *mgid = *(union ib_gid *) (mc_map + 4); -#endif -#ifdef INET } else { ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map); if (id_priv->id.ps == RDMA_PS_UDP) mc_map[7] = 0x01; /* Use RDMA CM signature */ *mgid = *(union ib_gid *) (mc_map + 4); -#endif } } @@ -2979,13 +3250,26 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, struct ib_sa_mcmember_rec rec; struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; ib_sa_comp_mask comp_mask; - int ret; + int ret = 0; - ib_addr_get_mgid(dev_addr, &rec.mgid); - ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num, - &rec.mgid, &rec); - if (ret) + ib_addr_get_mgid(dev_addr, &id_priv->rec.mgid); + + /* cache ipoib bc record */ + spin_lock(&id_priv->lock); + if (!id_priv->is_valid_rec) + ret = ib_sa_get_mcmember_rec(id_priv->id.device, + id_priv->id.port_num, + &id_priv->rec.mgid, + &id_priv->rec); + if (ret) { + id_priv->is_valid_rec = 0; + spin_unlock(&id_priv->lock); return ret; + } else { + rec = id_priv->rec; + id_priv->is_valid_rec = 1; + } + spin_unlock(&id_priv->lock); cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); if (id_priv->id.ps == RDMA_PS_UDP) @@ -3002,19 +3286,18 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, if (id_priv->id.ps == RDMA_PS_IPOIB) comp_mask |= IB_SA_MCMEMBER_REC_RATE | - IB_SA_MCMEMBER_REC_RATE_SELECTOR; + IB_SA_MCMEMBER_REC_RATE_SELECTOR | + IB_SA_MCMEMBER_REC_MTU_SELECTOR | + IB_SA_MCMEMBER_REC_MTU | + IB_SA_MCMEMBER_REC_HOP_LIMIT; mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device, id_priv->id.port_num, &rec, comp_mask, GFP_KERNEL, cma_ib_mc_handler, mc); - if (IS_ERR(mc->multicast.ib)) - return PTR_ERR(mc->multicast.ib); - - return 0; + return PTR_RET(mc->multicast.ib); } - static void iboe_mcast_work_handler(struct work_struct *work) { struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work); @@ -3034,9 +3317,9 @@ static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid) if (cma_any_addr(addr)) { memset(mgid, 0, sizeof *mgid); - } else if (addr->sa_family == AF_INET6) + } else if (addr->sa_family == AF_INET6) { memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); - else { + } else { mgid->raw[0] = 0xff; mgid->raw[1] = 0x0e; mgid->raw[2] = 0; @@ -3087,20 +3370,16 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, err = -ENODEV; goto out2; } - mc->multicast.ib->rec.rate = iboe_get_rate(ndev); mc->multicast.ib->rec.hop_limit = 1; -#ifdef __linux__ - mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu); -#else mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->if_mtu); -#endif dev_put(ndev); if (!mc->multicast.ib->rec.mtu) { err = -EINVAL; goto out2; } - iboe_addr_get_sgid(dev_addr, &mc->multicast.ib->rec.port_gid); + rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, + &mc->multicast.ib->rec.port_gid); work->id = id_priv; work->mc = mc; INIT_WORK(&work->work, iboe_mcast_work_handler); @@ -3124,8 +3403,8 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp(id_priv, CMA_ADDR_BOUND) && - !cma_comp(id_priv, CMA_ADDR_RESOLVED)) + if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) && + !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED)) return -EINVAL; mc = kmalloc(sizeof *mc, GFP_KERNEL); @@ -3165,7 +3444,6 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, spin_unlock_irq(&id_priv->lock); kfree(mc); } - return ret; } EXPORT_SYMBOL(rdma_join_multicast); @@ -3185,7 +3463,7 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) if (id->qp) ib_detach_mcast(id->qp, &mc->multicast.ib->rec.mgid, - mc->multicast.ib->rec.mlid); + be16_to_cpu(mc->multicast.ib->rec.mlid)); if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) == RDMA_TRANSPORT_IB) { switch (rdma_port_get_link_layer(id->device, id->port_num)) { case IB_LINK_LAYER_INFINIBAND: @@ -3213,17 +3491,10 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id dev_addr = &id_priv->id.route.addr.dev_addr; -#ifdef __linux__ - if ((dev_addr->bound_dev_if == ndev->ifindex) && - memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) { - printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n", - ndev->name, &id_priv->id); -#else if ((dev_addr->bound_dev_if == ndev->if_index) && memcmp(dev_addr->src_dev_addr, IF_LLADDR(ndev), ndev->if_addrlen)) { printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n", ndev->if_xname, &id_priv->id); -#endif work = kzalloc(sizeof *work, GFP_KERNEL); if (!work) return -ENOMEM; @@ -3246,7 +3517,8 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event, struct rdma_id_private *id_priv; int ret = NOTIFY_DONE; -#ifdef __linux__ +/* BONDING related, commented out until the bonding is resolved */ +#if 0 if (dev_net(ndev) != &init_net) return NOTIFY_DONE; @@ -3255,10 +3527,9 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event, if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING)) return NOTIFY_DONE; -#else +#endif if (event != NETDEV_DOWN && event != NETDEV_UNREGISTER) return NOTIFY_DONE; -#endif mutex_lock(&lock); list_for_each_entry(cma_dev, &dev_list, list) @@ -3303,19 +3574,19 @@ static void cma_add_one(struct ib_device *device) static int cma_remove_id_dev(struct rdma_id_private *id_priv) { struct rdma_cm_event event; - enum cma_state state; + enum rdma_cm_state state; int ret = 0; /* Record that we want to remove the device */ - state = cma_exch(id_priv, CMA_DEVICE_REMOVAL); - if (state == CMA_DESTROYING) + state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL); + if (state == RDMA_CM_DESTROYING) return 0; cma_cancel_operation(id_priv, state); mutex_lock(&id_priv->handler_mutex); /* Check for destruction from another callback. */ - if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL)) + if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL)) goto out; memset(&event, 0, sizeof event); @@ -3370,22 +3641,18 @@ static void cma_remove_one(struct ib_device *device) kfree(cma_dev); } -static int cma_init(void) +static int __init cma_init(void) { - int ret; -#if defined(INET) - int low, high, remaining; - - get_random_bytes(&next_port, sizeof next_port); - inet_get_local_port_range(&low, &high); - remaining = (high - low) + 1; - next_port = ((unsigned int) next_port % remaining) + low; -#endif + int ret = -ENOMEM; cma_wq = create_singlethread_workqueue("rdma_cm"); if (!cma_wq) return -ENOMEM; + cma_free_wq = create_singlethread_workqueue("rdma_cm_fr"); + if (!cma_free_wq) + goto err1; + ib_sa_register_client(&sa_client); rdma_addr_register_client(&addr_client); register_netdevice_notifier(&cma_nb); @@ -3393,27 +3660,34 @@ static int cma_init(void) ret = ib_register_client(&cma_client); if (ret) goto err; + return 0; err: unregister_netdevice_notifier(&cma_nb); rdma_addr_unregister_client(&addr_client); ib_sa_unregister_client(&sa_client); + + destroy_workqueue(cma_free_wq); +err1: destroy_workqueue(cma_wq); return ret; } -static void cma_cleanup(void) +static void __exit cma_cleanup(void) { ib_unregister_client(&cma_client); unregister_netdevice_notifier(&cma_nb); rdma_addr_unregister_client(&addr_client); ib_sa_unregister_client(&sa_client); + flush_workqueue(cma_free_wq); + destroy_workqueue(cma_free_wq); destroy_workqueue(cma_wq); idr_destroy(&sdp_ps); idr_destroy(&tcp_ps); idr_destroy(&udp_ps); idr_destroy(&ipoib_ps); + idr_destroy(&ib_ps); } module_init(cma_init); diff --git a/sys/ofed/drivers/infiniband/core/core_priv.h b/sys/ofed/drivers/infiniband/core/core_priv.h index 08c4bbb..001bbbe 100644 --- a/sys/ofed/drivers/infiniband/core/core_priv.h +++ b/sys/ofed/drivers/infiniband/core/core_priv.h @@ -38,7 +38,8 @@ #include <rdma/ib_verbs.h> -int ib_device_register_sysfs(struct ib_device *device, int (*port_callback)(struct ib_device *, +int ib_device_register_sysfs(struct ib_device *device, + int (*port_callback)(struct ib_device *, u8, struct kobject *)); void ib_device_unregister_sysfs(struct ib_device *device); diff --git a/sys/ofed/drivers/infiniband/core/device.c b/sys/ofed/drivers/infiniband/core/device.c index 98adf48..a7a06d78 100644 --- a/sys/ofed/drivers/infiniband/core/device.c +++ b/sys/ofed/drivers/infiniband/core/device.c @@ -37,7 +37,6 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/mutex.h> -#include <linux/workqueue.h> #include "core_priv.h" @@ -45,18 +44,15 @@ MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("core kernel InfiniBand API"); MODULE_LICENSE("Dual BSD/GPL"); -#ifdef __ia64__ -/* workaround for a bug in hp chipset that would cause kernel - panic when dma resources are exhaused */ -int dma_map_sg_hp_wa = 0; -#endif - struct ib_client_data { struct list_head list; struct ib_client *client; void * data; }; +struct workqueue_struct *ib_wq; +EXPORT_SYMBOL_GPL(ib_wq); + static LIST_HEAD(device_list); static LIST_HEAD(client_list); @@ -99,7 +95,7 @@ static int ib_device_check_mandatory(struct ib_device *device) int i; for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) { - if (!*(void **) ((u_char *) device + mandatory_table[i].offset)) { + if (!*(void **) ((void *) device + mandatory_table[i].offset)) { printk(KERN_WARNING "Device %s is missing mandatory function %s\n", device->name, mandatory_table[i].name); return -EINVAL; @@ -177,9 +173,14 @@ static int end_port(struct ib_device *device) */ struct ib_device *ib_alloc_device(size_t size) { + struct ib_device *dev; + BUG_ON(size < sizeof (struct ib_device)); - return kzalloc(size, GFP_KERNEL); + dev = kzalloc(size, GFP_KERNEL); + spin_lock_init(&dev->cmd_perf_lock); + + return dev; } EXPORT_SYMBOL(ib_alloc_device); @@ -295,8 +296,6 @@ int ib_register_device(struct ib_device *device, INIT_LIST_HEAD(&device->client_data_list); spin_lock_init(&device->event_handler_lock); spin_lock_init(&device->client_data_lock); - device->ib_uverbs_xrcd_table = RB_ROOT; - mutex_init(&device->xrcd_table_mutex); ret = read_port_table_lengths(device); if (ret) { @@ -631,6 +630,9 @@ int ib_modify_device(struct ib_device *device, int device_modify_mask, struct ib_device_modify *device_modify) { + if (!device->modify_device) + return -ENOSYS; + return device->modify_device(device, device_modify_mask, device_modify); } @@ -651,6 +653,9 @@ int ib_modify_port(struct ib_device *device, u8 port_num, int port_modify_mask, struct ib_port_modify *port_modify) { + if (!device->modify_port) + return -ENOSYS; + if (port_num < start_port(device) || port_num > end_port(device)) return -EINVAL; @@ -705,18 +710,28 @@ int ib_find_pkey(struct ib_device *device, { int ret, i; u16 tmp_pkey; + int partial_ix = -1; for (i = 0; i < device->pkey_tbl_len[port_num - start_port(device)]; ++i) { ret = ib_query_pkey(device, port_num, i, &tmp_pkey); if (ret) return ret; - if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) { - *index = i; - return 0; + /* if there is full-member pkey take it.*/ + if (tmp_pkey & 0x8000) { + *index = i; + return 0; + } + if (partial_ix < 0) + partial_ix = i; } } + /*no full-member, if exists take the limited*/ + if (partial_ix >= 0) { + *index = partial_ix; + return 0; + } return -ENOENT; } EXPORT_SYMBOL(ib_find_pkey); @@ -725,21 +740,29 @@ static int __init ib_core_init(void) { int ret; -#ifdef __ia64__ - if (ia64_platform_is("hpzx1")) - dma_map_sg_hp_wa = 1; -#endif + ib_wq = create_workqueue("infiniband"); + if (!ib_wq) + return -ENOMEM; ret = ib_sysfs_setup(); - if (ret) + if (ret) { printk(KERN_WARNING "Couldn't create InfiniBand device class\n"); + goto err; + } ret = ib_cache_setup(); if (ret) { printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n"); - ib_sysfs_cleanup(); + goto err_sysfs; } + return 0; + +err_sysfs: + ib_sysfs_cleanup(); + +err: + destroy_workqueue(ib_wq); return ret; } @@ -748,7 +771,7 @@ static void __exit ib_core_cleanup(void) ib_cache_cleanup(); ib_sysfs_cleanup(); /* Make sure that any pending umem accounting work is done. */ - flush_scheduled_work(); + destroy_workqueue(ib_wq); } module_init(ib_core_init); diff --git a/sys/ofed/drivers/infiniband/core/fmr_pool.c b/sys/ofed/drivers/infiniband/core/fmr_pool.c index c225833..4ba4c77 100644 --- a/sys/ofed/drivers/infiniband/core/fmr_pool.c +++ b/sys/ofed/drivers/infiniband/core/fmr_pool.c @@ -33,6 +33,7 @@ #include <linux/errno.h> #include <linux/spinlock.h> +#include <linux/module.h> #include <linux/slab.h> #include <linux/jhash.h> #include <linux/kthread.h> diff --git a/sys/ofed/drivers/infiniband/core/iwcm.c b/sys/ofed/drivers/infiniband/core/iwcm.c index 27878a8..14d23cc 100644 --- a/sys/ofed/drivers/infiniband/core/iwcm.c +++ b/sys/ofed/drivers/infiniband/core/iwcm.c @@ -40,9 +40,12 @@ #include <linux/idr.h> #include <linux/interrupt.h> #include <linux/rbtree.h> +#include <linux/sched.h> #include <linux/spinlock.h> #include <linux/workqueue.h> #include <linux/completion.h> +#include <linux/slab.h> +#include <linux/module.h> #include <linux/string.h> #include <rdma/iw_cm.h> @@ -507,6 +510,8 @@ int iw_cm_accept(struct iw_cm_id *cm_id, qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); if (!qp) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); return -EINVAL; } cm_id->device->iwcm->add_ref(qp); @@ -566,6 +571,8 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); if (!qp) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); return -EINVAL; } cm_id->device->iwcm->add_ref(qp); @@ -620,17 +627,6 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, */ BUG_ON(iw_event->status); - /* - * We could be destroying the listening id. If so, ignore this - * upcall. - */ - spin_lock_irqsave(&listen_id_priv->lock, flags); - if (listen_id_priv->state != IW_CM_STATE_LISTEN) { - spin_unlock_irqrestore(&listen_id_priv->lock, flags); - goto out; - } - spin_unlock_irqrestore(&listen_id_priv->lock, flags); - cm_id = iw_create_cm_id(listen_id_priv->id.device, iw_event->so, listen_id_priv->id.cm_handler, @@ -646,6 +642,19 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); cm_id_priv->state = IW_CM_STATE_CONN_RECV; + /* + * We could be destroying the listening id. If so, ignore this + * upcall. + */ + spin_lock_irqsave(&listen_id_priv->lock, flags); + if (listen_id_priv->state != IW_CM_STATE_LISTEN) { + spin_unlock_irqrestore(&listen_id_priv->lock, flags); + iw_cm_reject(cm_id, NULL, 0); + iw_destroy_cm_id(cm_id); + goto out; + } + spin_unlock_irqrestore(&listen_id_priv->lock, flags); + ret = alloc_work_entries(cm_id_priv, 3); if (ret) { iw_cm_reject(cm_id, NULL, 0); @@ -723,7 +732,7 @@ static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, */ clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); - if (iw_event->status == IW_CM_EVENT_STATUS_ACCEPTED) { + if (iw_event->status == 0) { cm_id_priv->id.local_addr = iw_event->local_addr; cm_id_priv->id.remote_addr = iw_event->remote_addr; cm_id_priv->state = IW_CM_STATE_ESTABLISHED; diff --git a/sys/ofed/drivers/infiniband/core/local_sa.c b/sys/ofed/drivers/infiniband/core/local_sa.c deleted file mode 100644 index 9b9c60a..0000000 --- a/sys/ofed/drivers/infiniband/core/local_sa.c +++ /dev/null @@ -1,1273 +0,0 @@ -/* - * Copyright (c) 2006 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/dma-mapping.h> -#include <linux/err.h> -#include <linux/interrupt.h> -#include <linux/rbtree.h> -#include <linux/mutex.h> -#include <linux/spinlock.h> -#include <linux/pci.h> -#include <linux/miscdevice.h> -#include <linux/random.h> - -#include <rdma/ib_cache.h> -#include <rdma/ib_sa.h> -#include "sa.h" - -MODULE_AUTHOR("Sean Hefty"); -MODULE_DESCRIPTION("InfiniBand subnet administration caching"); -MODULE_LICENSE("Dual BSD/GPL"); - -enum { - SA_DB_MAX_PATHS_PER_DEST = 0x7F, - SA_DB_MIN_RETRY_TIMER = 4000, /* 4 sec */ - SA_DB_MAX_RETRY_TIMER = 256000 /* 256 sec */ -}; - -static int set_paths_per_dest(const char *val, struct kernel_param *kp); -static unsigned long paths_per_dest = 0; -module_param_call(paths_per_dest, set_paths_per_dest, param_get_ulong, - &paths_per_dest, 0644); -MODULE_PARM_DESC(paths_per_dest, "Maximum number of paths to retrieve " - "to each destination (DGID). Set to 0 " - "to disable cache."); - -static int set_subscribe_inform_info(const char *val, struct kernel_param *kp); -static char subscribe_inform_info = 1; -module_param_call(subscribe_inform_info, set_subscribe_inform_info, - param_get_bool, &subscribe_inform_info, 0644); -MODULE_PARM_DESC(subscribe_inform_info, - "Subscribe for SA InformInfo/Notice events."); - -static int do_refresh(const char *val, struct kernel_param *kp); -module_param_call(refresh, do_refresh, NULL, NULL, 0200); - -static unsigned long retry_timer = SA_DB_MIN_RETRY_TIMER; - -enum sa_db_lookup_method { - SA_DB_LOOKUP_LEAST_USED, - SA_DB_LOOKUP_RANDOM -}; - -static int set_lookup_method(const char *val, struct kernel_param *kp); -static int get_lookup_method(char *buf, struct kernel_param *kp); -static unsigned long lookup_method; -module_param_call(lookup_method, set_lookup_method, get_lookup_method, - &lookup_method, 0644); -MODULE_PARM_DESC(lookup_method, "Method used to return path records when " - "multiple paths exist to a given destination."); - -static void sa_db_add_dev(struct ib_device *device); -static void sa_db_remove_dev(struct ib_device *device); - -static struct ib_client sa_db_client = { - .name = "local_sa", - .add = sa_db_add_dev, - .remove = sa_db_remove_dev -}; - -static LIST_HEAD(dev_list); -static DEFINE_MUTEX(lock); -static rwlock_t rwlock; -static struct workqueue_struct *sa_wq; -static struct ib_sa_client sa_client; - -enum sa_db_state { - SA_DB_IDLE, - SA_DB_REFRESH, - SA_DB_DESTROY -}; - -struct sa_db_port { - struct sa_db_device *dev; - struct ib_mad_agent *agent; - /* Limit number of outstanding MADs to SA to reduce SA flooding */ - struct ib_mad_send_buf *msg; - u16 sm_lid; - u8 sm_sl; - struct ib_inform_info *in_info; - struct ib_inform_info *out_info; - struct rb_root paths; - struct list_head update_list; - unsigned long update_id; - enum sa_db_state state; - struct work_struct work; - union ib_gid gid; - int port_num; -}; - -struct sa_db_device { - struct list_head list; - struct ib_device *device; - struct ib_event_handler event_handler; - int start_port; - int port_count; - struct sa_db_port port[0]; -}; - -struct ib_sa_iterator { - struct ib_sa_iterator *next; -}; - -struct ib_sa_attr_iter { - struct ib_sa_iterator *iter; - unsigned long flags; -}; - -struct ib_sa_attr_list { - struct ib_sa_iterator iter; - struct ib_sa_iterator *tail; - int update_id; - union ib_gid gid; - struct rb_node node; -}; - -struct ib_path_rec_info { - struct ib_sa_iterator iter; /* keep first */ - struct ib_sa_path_rec rec; - unsigned long lookups; -}; - -struct ib_sa_mad_iter { - struct ib_mad_recv_wc *recv_wc; - struct ib_mad_recv_buf *recv_buf; - int attr_size; - int attr_offset; - int data_offset; - int data_left; - void *attr; - u8 attr_data[0]; -}; - -enum sa_update_type { - SA_UPDATE_FULL, - SA_UPDATE_ADD, - SA_UPDATE_REMOVE -}; - -struct update_info { - struct list_head list; - union ib_gid gid; - enum sa_update_type type; -}; - -struct sa_path_request { - struct work_struct work; - struct ib_sa_client *client; - void (*callback)(int, struct ib_sa_path_rec *, void *); - void *context; - struct ib_sa_path_rec path_rec; -}; - -static void process_updates(struct sa_db_port *port); - -static void free_attr_list(struct ib_sa_attr_list *attr_list) -{ - struct ib_sa_iterator *cur; - - for (cur = attr_list->iter.next; cur; cur = attr_list->iter.next) { - attr_list->iter.next = cur->next; - kfree(cur); - } - attr_list->tail = &attr_list->iter; -} - -static void remove_attr(struct rb_root *root, struct ib_sa_attr_list *attr_list) -{ - rb_erase(&attr_list->node, root); - free_attr_list(attr_list); - kfree(attr_list); -} - -static void remove_all_attrs(struct rb_root *root) -{ - struct rb_node *node, *next_node; - struct ib_sa_attr_list *attr_list; - - write_lock_irq(&rwlock); - for (node = rb_first(root); node; node = next_node) { - next_node = rb_next(node); - attr_list = rb_entry(node, struct ib_sa_attr_list, node); - remove_attr(root, attr_list); - } - write_unlock_irq(&rwlock); -} - -static void remove_old_attrs(struct rb_root *root, unsigned long update_id) -{ - struct rb_node *node, *next_node; - struct ib_sa_attr_list *attr_list; - - write_lock_irq(&rwlock); - for (node = rb_first(root); node; node = next_node) { - next_node = rb_next(node); - attr_list = rb_entry(node, struct ib_sa_attr_list, node); - if (attr_list->update_id != update_id) - remove_attr(root, attr_list); - } - write_unlock_irq(&rwlock); -} - -static struct ib_sa_attr_list *insert_attr_list(struct rb_root *root, - struct ib_sa_attr_list *attr_list) -{ - struct rb_node **link = &root->rb_node; - struct rb_node *parent = NULL; - struct ib_sa_attr_list *cur_attr_list; - int cmp; - - while (*link) { - parent = *link; - cur_attr_list = rb_entry(parent, struct ib_sa_attr_list, node); - cmp = memcmp(&cur_attr_list->gid, &attr_list->gid, - sizeof attr_list->gid); - if (cmp < 0) - link = &(*link)->rb_left; - else if (cmp > 0) - link = &(*link)->rb_right; - else - return cur_attr_list; - } - rb_link_node(&attr_list->node, parent, link); - rb_insert_color(&attr_list->node, root); - return NULL; -} - -static struct ib_sa_attr_list *find_attr_list(struct rb_root *root, u8 *gid) -{ - struct rb_node *node = root->rb_node; - struct ib_sa_attr_list *attr_list; - int cmp; - - while (node) { - attr_list = rb_entry(node, struct ib_sa_attr_list, node); - cmp = memcmp(&attr_list->gid, gid, sizeof attr_list->gid); - if (cmp < 0) - node = node->rb_left; - else if (cmp > 0) - node = node->rb_right; - else - return attr_list; - } - return NULL; -} - -static int insert_attr(struct rb_root *root, unsigned long update_id, void *key, - struct ib_sa_iterator *iter) -{ - struct ib_sa_attr_list *attr_list; - void *err; - - write_lock_irq(&rwlock); - attr_list = find_attr_list(root, key); - if (!attr_list) { - write_unlock_irq(&rwlock); - attr_list = kmalloc(sizeof *attr_list, GFP_KERNEL); - if (!attr_list) - return -ENOMEM; - - attr_list->iter.next = NULL; - attr_list->tail = &attr_list->iter; - attr_list->update_id = update_id; - memcpy(attr_list->gid.raw, key, sizeof attr_list->gid); - - write_lock_irq(&rwlock); - err = insert_attr_list(root, attr_list); - if (err) { - write_unlock_irq(&rwlock); - kfree(attr_list); - return PTR_ERR(err); - } - } else if (attr_list->update_id != update_id) { - free_attr_list(attr_list); - attr_list->update_id = update_id; - } - - attr_list->tail->next = iter; - iter->next = NULL; - attr_list->tail = iter; - write_unlock_irq(&rwlock); - return 0; -} - -static struct ib_sa_mad_iter *ib_sa_iter_create(struct ib_mad_recv_wc *mad_recv_wc) -{ - struct ib_sa_mad_iter *iter; - struct ib_sa_mad *mad = (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad; - int attr_size, attr_offset; - - attr_offset = be16_to_cpu(mad->sa_hdr.attr_offset) * 8; - attr_size = 64; /* path record length */ - if (attr_offset < attr_size) - return ERR_PTR(-EINVAL); - - iter = kzalloc(sizeof *iter + attr_size, GFP_KERNEL); - if (!iter) - return ERR_PTR(-ENOMEM); - - iter->data_left = mad_recv_wc->mad_len - IB_MGMT_SA_HDR; - iter->recv_wc = mad_recv_wc; - iter->recv_buf = &mad_recv_wc->recv_buf; - iter->attr_offset = attr_offset; - iter->attr_size = attr_size; - return iter; -} - -static void ib_sa_iter_free(struct ib_sa_mad_iter *iter) -{ - kfree(iter); -} - -static void *ib_sa_iter_next(struct ib_sa_mad_iter *iter) -{ - struct ib_sa_mad *mad; - int left, offset = 0; - - while (iter->data_left >= iter->attr_offset) { - while (iter->data_offset < IB_MGMT_SA_DATA) { - mad = (struct ib_sa_mad *) iter->recv_buf->mad; - - left = IB_MGMT_SA_DATA - iter->data_offset; - if (left < iter->attr_size) { - /* copy first piece of the attribute */ - iter->attr = &iter->attr_data; - memcpy(iter->attr, - &mad->data[iter->data_offset], left); - offset = left; - break; - } else if (offset) { - /* copy the second piece of the attribute */ - memcpy(iter->attr + offset, &mad->data[0], - iter->attr_size - offset); - iter->data_offset = iter->attr_size - offset; - offset = 0; - } else { - iter->attr = &mad->data[iter->data_offset]; - iter->data_offset += iter->attr_size; - } - - iter->data_left -= iter->attr_offset; - goto out; - } - iter->data_offset = 0; - iter->recv_buf = list_entry(iter->recv_buf->list.next, - struct ib_mad_recv_buf, list); - } - iter->attr = NULL; -out: - return iter->attr; -} - -/* - * Copy path records from a received response and insert them into our cache. - * A path record in the MADs are in network order, packed, and may - * span multiple MAD buffers, just to make our life hard. - */ -static void update_path_db(struct sa_db_port *port, - struct ib_mad_recv_wc *mad_recv_wc, - enum sa_update_type type) -{ - struct ib_sa_mad_iter *iter; - struct ib_path_rec_info *path_info; - void *attr; - int ret; - - iter = ib_sa_iter_create(mad_recv_wc); - if (IS_ERR(iter)) - return; - - port->update_id += (type == SA_UPDATE_FULL); - - while ((attr = ib_sa_iter_next(iter)) && - (path_info = kmalloc(sizeof *path_info, GFP_KERNEL))) { - - ib_sa_unpack_attr(&path_info->rec, attr, IB_SA_ATTR_PATH_REC); - - ret = insert_attr(&port->paths, port->update_id, - path_info->rec.dgid.raw, &path_info->iter); - if (ret) { - kfree(path_info); - break; - } - } - ib_sa_iter_free(iter); - - if (type == SA_UPDATE_FULL) - remove_old_attrs(&port->paths, port->update_id); -} - -static struct ib_mad_send_buf *get_sa_msg(struct sa_db_port *port, - struct update_info *update) -{ - struct ib_ah_attr ah_attr; - struct ib_mad_send_buf *msg; - - msg = ib_create_send_mad(port->agent, 1, 0, 0, IB_MGMT_SA_HDR, - IB_MGMT_SA_DATA, GFP_KERNEL); - if (IS_ERR(msg)) - return NULL; - - memset(&ah_attr, 0, sizeof ah_attr); - ah_attr.dlid = port->sm_lid; - ah_attr.sl = port->sm_sl; - ah_attr.port_num = port->port_num; - - msg->ah = ib_create_ah(port->agent->qp->pd, &ah_attr); - if (IS_ERR(msg->ah)) { - ib_free_send_mad(msg); - return NULL; - } - - msg->timeout_ms = retry_timer; - msg->retries = 0; - msg->context[0] = port; - msg->context[1] = update; - return msg; -} - -static __be64 form_tid(u32 hi_tid) -{ - static atomic_t tid; - return cpu_to_be64((((u64) hi_tid) << 32) | - ((u32) atomic_inc_return(&tid))); -} - -static void format_path_req(struct sa_db_port *port, - struct update_info *update, - struct ib_mad_send_buf *msg) -{ - struct ib_sa_mad *mad = msg->mad; - struct ib_sa_path_rec path_rec; - - mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION; - mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; - mad->mad_hdr.class_version = IB_SA_CLASS_VERSION; - mad->mad_hdr.method = IB_SA_METHOD_GET_TABLE; - mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC); - mad->mad_hdr.tid = form_tid(msg->mad_agent->hi_tid); - - mad->sa_hdr.comp_mask = IB_SA_PATH_REC_SGID | IB_SA_PATH_REC_NUMB_PATH; - - path_rec.sgid = port->gid; - path_rec.numb_path = (u8) paths_per_dest; - - if (update->type == SA_UPDATE_ADD) { - mad->sa_hdr.comp_mask |= IB_SA_PATH_REC_DGID; - memcpy(&path_rec.dgid, &update->gid, sizeof path_rec.dgid); - } - - ib_sa_pack_attr(mad->data, &path_rec, IB_SA_ATTR_PATH_REC); -} - -static int send_query(struct sa_db_port *port, - struct update_info *update) -{ - int ret; - - port->msg = get_sa_msg(port, update); - if (!port->msg) - return -ENOMEM; - - format_path_req(port, update, port->msg); - - ret = ib_post_send_mad(port->msg, NULL); - if (ret) - goto err; - - return 0; - -err: - ib_destroy_ah(port->msg->ah); - ib_free_send_mad(port->msg); - return ret; -} - -static void add_update(struct sa_db_port *port, u8 *gid, - enum sa_update_type type) -{ - struct update_info *update; - - update = kmalloc(sizeof *update, GFP_KERNEL); - if (update) { - if (gid) - memcpy(&update->gid, gid, sizeof update->gid); - update->type = type; - list_add(&update->list, &port->update_list); - } - - if (port->state == SA_DB_IDLE) { - port->state = SA_DB_REFRESH; - process_updates(port); - } -} - -static void clean_update_list(struct sa_db_port *port) -{ - struct update_info *update; - - while (!list_empty(&port->update_list)) { - update = list_entry(port->update_list.next, - struct update_info, list); - list_del(&update->list); - kfree(update); - } -} - -static int notice_handler(int status, struct ib_inform_info *info, - struct ib_sa_notice *notice) -{ - struct sa_db_port *port = info->context; - struct ib_sa_notice_data_gid *gid_data; - struct ib_inform_info **pinfo; - enum sa_update_type type; - - if (info->trap_number == IB_SA_SM_TRAP_GID_IN_SERVICE) { - pinfo = &port->in_info; - type = SA_UPDATE_ADD; - } else { - pinfo = &port->out_info; - type = SA_UPDATE_REMOVE; - } - - mutex_lock(&lock); - if (port->state == SA_DB_DESTROY || !*pinfo) { - mutex_unlock(&lock); - return 0; - } - - if (notice) { - gid_data = (struct ib_sa_notice_data_gid *) - ¬ice->data_details; - add_update(port, gid_data->gid, type); - mutex_unlock(&lock); - } else if (status == -ENETRESET) { - *pinfo = NULL; - mutex_unlock(&lock); - } else { - if (status) - *pinfo = ERR_PTR(-EINVAL); - port->state = SA_DB_IDLE; - clean_update_list(port); - mutex_unlock(&lock); - queue_work(sa_wq, &port->work); - } - - return status; -} - -static int reg_in_info(struct sa_db_port *port) -{ - int ret = 0; - - port->in_info = ib_sa_register_inform_info(&sa_client, - port->dev->device, - port->port_num, - IB_SA_SM_TRAP_GID_IN_SERVICE, - GFP_KERNEL, notice_handler, - port); - if (IS_ERR(port->in_info)) - ret = PTR_ERR(port->in_info); - - return ret; -} - -static int reg_out_info(struct sa_db_port *port) -{ - int ret = 0; - - port->out_info = ib_sa_register_inform_info(&sa_client, - port->dev->device, - port->port_num, - IB_SA_SM_TRAP_GID_OUT_OF_SERVICE, - GFP_KERNEL, notice_handler, - port); - if (IS_ERR(port->out_info)) - ret = PTR_ERR(port->out_info); - - return ret; -} - -static void unsubscribe_port(struct sa_db_port *port) -{ - if (port->in_info && !IS_ERR(port->in_info)) - ib_sa_unregister_inform_info(port->in_info); - - if (port->out_info && !IS_ERR(port->out_info)) - ib_sa_unregister_inform_info(port->out_info); - - port->out_info = NULL; - port->in_info = NULL; - -} - -static void cleanup_port(struct sa_db_port *port) -{ - unsubscribe_port(port); - - clean_update_list(port); - remove_all_attrs(&port->paths); -} - -static int update_port_info(struct sa_db_port *port) -{ - struct ib_port_attr port_attr; - int ret; - - ret = ib_query_port(port->dev->device, port->port_num, &port_attr); - if (ret) - return ret; - - if (port_attr.state != IB_PORT_ACTIVE) - return -ENODATA; - - port->sm_lid = port_attr.sm_lid; - port->sm_sl = port_attr.sm_sl; - return 0; -} - -static void process_updates(struct sa_db_port *port) -{ - struct update_info *update; - struct ib_sa_attr_list *attr_list; - int ret; - - if (!paths_per_dest || update_port_info(port)) { - cleanup_port(port); - goto out; - } - - /* Event registration is an optimization, so ignore failures. */ - if (subscribe_inform_info) { - if (!port->out_info) { - ret = reg_out_info(port); - if (!ret) - return; - } - - if (!port->in_info) { - ret = reg_in_info(port); - if (!ret) - return; - } - } else - unsubscribe_port(port); - - while (!list_empty(&port->update_list)) { - update = list_entry(port->update_list.next, - struct update_info, list); - - if (update->type == SA_UPDATE_REMOVE) { - write_lock_irq(&rwlock); - attr_list = find_attr_list(&port->paths, - update->gid.raw); - if (attr_list) - remove_attr(&port->paths, attr_list); - write_unlock_irq(&rwlock); - } else { - ret = send_query(port, update); - if (!ret) - return; - - } - list_del(&update->list); - kfree(update); - } -out: - port->state = SA_DB_IDLE; -} - -static void refresh_port_db(struct sa_db_port *port) -{ - if (port->state == SA_DB_DESTROY) - return; - - if (port->state == SA_DB_REFRESH) { - clean_update_list(port); - ib_cancel_mad(port->agent, port->msg); - } - - add_update(port, NULL, SA_UPDATE_FULL); -} - -static void refresh_dev_db(struct sa_db_device *dev) -{ - int i; - - for (i = 0; i < dev->port_count; i++) - refresh_port_db(&dev->port[i]); -} - -static void refresh_db(void) -{ - struct sa_db_device *dev; - - list_for_each_entry(dev, &dev_list, list) - refresh_dev_db(dev); -} - -static int do_refresh(const char *val, struct kernel_param *kp) -{ - mutex_lock(&lock); - refresh_db(); - mutex_unlock(&lock); - return 0; -} - -static int get_lookup_method(char *buf, struct kernel_param *kp) -{ - return sprintf(buf, - "%c %d round robin\n" - "%c %d random", - (lookup_method == SA_DB_LOOKUP_LEAST_USED) ? '*' : ' ', - SA_DB_LOOKUP_LEAST_USED, - (lookup_method == SA_DB_LOOKUP_RANDOM) ? '*' : ' ', - SA_DB_LOOKUP_RANDOM); -} - -static int set_lookup_method(const char *val, struct kernel_param *kp) -{ - unsigned long method; - int ret = 0; - - method = simple_strtoul(val, NULL, 0); - - switch (method) { - case SA_DB_LOOKUP_LEAST_USED: - case SA_DB_LOOKUP_RANDOM: - lookup_method = method; - break; - default: - ret = -EINVAL; - break; - } - - return ret; -} - -static int set_paths_per_dest(const char *val, struct kernel_param *kp) -{ - int ret; - - mutex_lock(&lock); - ret = param_set_ulong(val, kp); - if (ret) - goto out; - - if (paths_per_dest > SA_DB_MAX_PATHS_PER_DEST) - paths_per_dest = SA_DB_MAX_PATHS_PER_DEST; - refresh_db(); -out: - mutex_unlock(&lock); - return ret; -} - -static int set_subscribe_inform_info(const char *val, struct kernel_param *kp) -{ - int ret; - - ret = param_set_bool(val, kp); - if (ret) - return ret; - - return do_refresh(val, kp); -} - -static void port_work_handler(struct work_struct *work) -{ - struct sa_db_port *port; - - port = container_of(work, typeof(*port), work); - mutex_lock(&lock); - refresh_port_db(port); - mutex_unlock(&lock); -} - -static void handle_event(struct ib_event_handler *event_handler, - struct ib_event *event) -{ - struct sa_db_device *dev; - struct sa_db_port *port; - - dev = container_of(event_handler, typeof(*dev), event_handler); - port = &dev->port[event->element.port_num - dev->start_port]; - - switch (event->event) { - case IB_EVENT_PORT_ERR: - case IB_EVENT_LID_CHANGE: - case IB_EVENT_SM_CHANGE: - case IB_EVENT_CLIENT_REREGISTER: - case IB_EVENT_PKEY_CHANGE: - case IB_EVENT_PORT_ACTIVE: - queue_work(sa_wq, &port->work); - break; - default: - break; - } -} - -static void ib_free_path_iter(struct ib_sa_attr_iter *iter) -{ - read_unlock_irqrestore(&rwlock, iter->flags); -} - -static int ib_create_path_iter(struct ib_device *device, u8 port_num, - union ib_gid *dgid, struct ib_sa_attr_iter *iter) -{ - struct sa_db_device *dev; - struct sa_db_port *port; - struct ib_sa_attr_list *list; - - dev = ib_get_client_data(device, &sa_db_client); - if (!dev) - return -ENODEV; - - port = &dev->port[port_num - dev->start_port]; - - read_lock_irqsave(&rwlock, iter->flags); - list = find_attr_list(&port->paths, dgid->raw); - if (!list) { - ib_free_path_iter(iter); - return -ENODATA; - } - - iter->iter = &list->iter; - return 0; -} - -static struct ib_sa_path_rec *ib_get_next_path(struct ib_sa_attr_iter *iter) -{ - struct ib_path_rec_info *next_path; - - iter->iter = iter->iter->next; - if (iter->iter) { - next_path = container_of(iter->iter, struct ib_path_rec_info, iter); - return &next_path->rec; - } else - return NULL; -} - -static int cmp_rec(struct ib_sa_path_rec *src, - struct ib_sa_path_rec *dst, ib_sa_comp_mask comp_mask) -{ - /* DGID check already done */ - if (comp_mask & IB_SA_PATH_REC_SGID && - memcmp(&src->sgid, &dst->sgid, sizeof src->sgid)) - return -EINVAL; - if (comp_mask & IB_SA_PATH_REC_DLID && src->dlid != dst->dlid) - return -EINVAL; - if (comp_mask & IB_SA_PATH_REC_SLID && src->slid != dst->slid) - return -EINVAL; - if (comp_mask & IB_SA_PATH_REC_RAW_TRAFFIC && - src->raw_traffic != dst->raw_traffic) - return -EINVAL; - - if (comp_mask & IB_SA_PATH_REC_FLOW_LABEL && - src->flow_label != dst->flow_label) - return -EINVAL; - if (comp_mask & IB_SA_PATH_REC_HOP_LIMIT && - src->hop_limit != dst->hop_limit) - return -EINVAL; - if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS && - src->traffic_class != dst->traffic_class) - return -EINVAL; - if (comp_mask & IB_SA_PATH_REC_REVERSIBLE && - dst->reversible && !src->reversible) - return -EINVAL; - /* Numb path check already done */ - if (comp_mask & IB_SA_PATH_REC_PKEY && src->pkey != dst->pkey) - return -EINVAL; - - if (comp_mask & IB_SA_PATH_REC_SL && src->sl != dst->sl) - return -EINVAL; - - if (ib_sa_check_selector(comp_mask, IB_SA_PATH_REC_MTU_SELECTOR, - IB_SA_PATH_REC_MTU, dst->mtu_selector, - src->mtu, dst->mtu)) - return -EINVAL; - if (ib_sa_check_selector(comp_mask, IB_SA_PATH_REC_RATE_SELECTOR, - IB_SA_PATH_REC_RATE, dst->rate_selector, - src->rate, dst->rate)) - return -EINVAL; - if (ib_sa_check_selector(comp_mask, - IB_SA_PATH_REC_PACKET_LIFE_TIME_SELECTOR, - IB_SA_PATH_REC_PACKET_LIFE_TIME, - dst->packet_life_time_selector, - src->packet_life_time, dst->packet_life_time)) - return -EINVAL; - - return 0; -} - -static struct ib_sa_path_rec *get_random_path(struct ib_sa_attr_iter *iter, - struct ib_sa_path_rec *req_path, - ib_sa_comp_mask comp_mask) -{ - struct ib_sa_path_rec *path, *rand_path = NULL; - int num, count = 0; - - for (path = ib_get_next_path(iter); path; - path = ib_get_next_path(iter)) { - if (!cmp_rec(path, req_path, comp_mask)) { - get_random_bytes(&num, sizeof num); - if ((num % ++count) == 0) - rand_path = path; - } - } - - return rand_path; -} - -static struct ib_sa_path_rec *get_next_path(struct ib_sa_attr_iter *iter, - struct ib_sa_path_rec *req_path, - ib_sa_comp_mask comp_mask) -{ - struct ib_path_rec_info *cur_path, *next_path = NULL; - struct ib_sa_path_rec *path; - unsigned long lookups = ~0; - - for (path = ib_get_next_path(iter); path; - path = ib_get_next_path(iter)) { - if (!cmp_rec(path, req_path, comp_mask)) { - - cur_path = container_of(iter->iter, struct ib_path_rec_info, - iter); - if (cur_path->lookups < lookups) { - lookups = cur_path->lookups; - next_path = cur_path; - } - } - } - - if (next_path) { - next_path->lookups++; - return &next_path->rec; - } else - return NULL; -} - -static void report_path(struct work_struct *work) -{ - struct sa_path_request *req; - - req = container_of(work, struct sa_path_request, work); - req->callback(0, &req->path_rec, req->context); - ib_sa_client_put(req->client); - kfree(req); -} - -/** - * ib_sa_path_rec_get - Start a Path get query - * @client:SA client - * @device:device to send query on - * @port_num: port number to send query on - * @rec:Path Record to send in query - * @comp_mask:component mask to send in query - * @timeout_ms:time to wait for response - * @gfp_mask:GFP mask to use for internal allocations - * @callback:function called when query completes, times out or is - * canceled - * @context:opaque user context passed to callback - * @sa_query:query context, used to cancel query - * - * Send a Path Record Get query to the SA to look up a path. The - * callback function will be called when the query completes (or - * fails); status is 0 for a successful response, -EINTR if the query - * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error - * occurred sending the query. The resp parameter of the callback is - * only valid if status is 0. - * - * If the return value of ib_sa_path_rec_get() is negative, it is an - * error code. Otherwise it is a query ID that can be used to cancel - * the query. - */ -int ib_sa_path_rec_get(struct ib_sa_client *client, - struct ib_device *device, u8 port_num, - struct ib_sa_path_rec *rec, - ib_sa_comp_mask comp_mask, - int timeout_ms, gfp_t gfp_mask, - void (*callback)(int status, - struct ib_sa_path_rec *resp, - void *context), - void *context, - struct ib_sa_query **sa_query) -{ - struct sa_path_request *req; - struct ib_sa_attr_iter iter; - struct ib_sa_path_rec *path_rec; - int ret; - - if (!paths_per_dest) - goto query_sa; - - if (!(comp_mask & IB_SA_PATH_REC_DGID) || - !(comp_mask & IB_SA_PATH_REC_NUMB_PATH) || rec->numb_path != 1) - goto query_sa; - - req = kmalloc(sizeof *req, gfp_mask); - if (!req) - goto query_sa; - - ret = ib_create_path_iter(device, port_num, &rec->dgid, &iter); - if (ret) - goto free_req; - - if (lookup_method == SA_DB_LOOKUP_RANDOM) - path_rec = get_random_path(&iter, rec, comp_mask); - else - path_rec = get_next_path(&iter, rec, comp_mask); - - if (!path_rec) - goto free_iter; - - memcpy(&req->path_rec, path_rec, sizeof *path_rec); - ib_free_path_iter(&iter); - - INIT_WORK(&req->work, report_path); - req->client = client; - req->callback = callback; - req->context = context; - - ib_sa_client_get(client); - queue_work(sa_wq, &req->work); - *sa_query = ERR_PTR(-EEXIST); - return 0; - -free_iter: - ib_free_path_iter(&iter); -free_req: - kfree(req); -query_sa: - return ib_sa_path_rec_query(client, device, port_num, rec, comp_mask, - timeout_ms, gfp_mask, callback, context, - sa_query); -} -EXPORT_SYMBOL(ib_sa_path_rec_get); - -static void recv_handler(struct ib_mad_agent *mad_agent, - struct ib_mad_recv_wc *mad_recv_wc) -{ - struct sa_db_port *port; - struct update_info *update; - struct ib_mad_send_buf *msg; - enum sa_update_type type; - - msg = (struct ib_mad_send_buf *) (unsigned long) mad_recv_wc->wc->wr_id; - port = msg->context[0]; - update = msg->context[1]; - - mutex_lock(&lock); - if (port->state == SA_DB_DESTROY || - update != list_entry(port->update_list.next, - struct update_info, list)) { - mutex_unlock(&lock); - } else { - type = update->type; - mutex_unlock(&lock); - update_path_db(mad_agent->context, mad_recv_wc, type); - } - - ib_free_recv_mad(mad_recv_wc); -} - -static void send_handler(struct ib_mad_agent *agent, - struct ib_mad_send_wc *mad_send_wc) -{ - struct ib_mad_send_buf *msg; - struct sa_db_port *port; - struct update_info *update; - int ret; - - msg = mad_send_wc->send_buf; - port = msg->context[0]; - update = msg->context[1]; - - mutex_lock(&lock); - if (port->state == SA_DB_DESTROY) - goto unlock; - - if (update == list_entry(port->update_list.next, - struct update_info, list)) { - - if (mad_send_wc->status == IB_WC_RESP_TIMEOUT_ERR && - msg->timeout_ms < SA_DB_MAX_RETRY_TIMER) { - - msg->timeout_ms <<= 1; - ret = ib_post_send_mad(msg, NULL); - if (!ret) { - mutex_unlock(&lock); - return; - } - } - list_del(&update->list); - kfree(update); - } - process_updates(port); -unlock: - mutex_unlock(&lock); - - ib_destroy_ah(msg->ah); - ib_free_send_mad(msg); -} - -static int init_port(struct sa_db_device *dev, int port_num) -{ - struct sa_db_port *port; - int ret; - - port = &dev->port[port_num - dev->start_port]; - port->dev = dev; - port->port_num = port_num; - INIT_WORK(&port->work, port_work_handler); - port->paths = RB_ROOT; - INIT_LIST_HEAD(&port->update_list); - - ret = ib_get_cached_gid(dev->device, port_num, 0, &port->gid); - if (ret) - return ret; - - port->agent = ib_register_mad_agent(dev->device, port_num, IB_QPT_GSI, - NULL, IB_MGMT_RMPP_VERSION, - send_handler, recv_handler, port); - if (IS_ERR(port->agent)) - ret = PTR_ERR(port->agent); - - return ret; -} - -static void destroy_port(struct sa_db_port *port) -{ - mutex_lock(&lock); - port->state = SA_DB_DESTROY; - mutex_unlock(&lock); - - ib_unregister_mad_agent(port->agent); - cleanup_port(port); - flush_workqueue(sa_wq); -} - -static void sa_db_add_dev(struct ib_device *device) -{ - struct sa_db_device *dev; - struct sa_db_port *port; - int s, e, i, ret; - - if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) - return; - - if (device->node_type == RDMA_NODE_IB_SWITCH) { - s = e = 0; - } else { - s = 1; - e = device->phys_port_cnt; - } - - dev = kzalloc(sizeof *dev + (e - s + 1) * sizeof *port, GFP_KERNEL); - if (!dev) - return; - - dev->start_port = s; - dev->port_count = e - s + 1; - dev->device = device; - for (i = 0; i < dev->port_count; i++) { - ret = init_port(dev, s + i); - if (ret) - goto err; - } - - ib_set_client_data(device, &sa_db_client, dev); - - INIT_IB_EVENT_HANDLER(&dev->event_handler, device, handle_event); - - mutex_lock(&lock); - list_add_tail(&dev->list, &dev_list); - refresh_dev_db(dev); - mutex_unlock(&lock); - - ib_register_event_handler(&dev->event_handler); - return; -err: - while (i--) - destroy_port(&dev->port[i]); - kfree(dev); -} - -static void sa_db_remove_dev(struct ib_device *device) -{ - struct sa_db_device *dev; - int i; - - dev = ib_get_client_data(device, &sa_db_client); - if (!dev) - return; - - ib_unregister_event_handler(&dev->event_handler); - flush_workqueue(sa_wq); - - for (i = 0; i < dev->port_count; i++) - destroy_port(&dev->port[i]); - - mutex_lock(&lock); - list_del(&dev->list); - mutex_unlock(&lock); - - kfree(dev); -} - -int sa_db_init(void) -{ - int ret; - - rwlock_init(&rwlock); - sa_wq = create_singlethread_workqueue("local_sa"); - if (!sa_wq) - return -ENOMEM; - - ib_sa_register_client(&sa_client); - ret = ib_register_client(&sa_db_client); - if (ret) - goto err; - - return 0; - -err: - ib_sa_unregister_client(&sa_client); - destroy_workqueue(sa_wq); - return ret; -} - -void sa_db_cleanup(void) -{ - ib_unregister_client(&sa_db_client); - ib_sa_unregister_client(&sa_client); - destroy_workqueue(sa_wq); -} diff --git a/sys/ofed/drivers/infiniband/core/mad.c b/sys/ofed/drivers/infiniband/core/mad.c index 64e660c..11b3ba3 100644 --- a/sys/ofed/drivers/infiniband/core/mad.c +++ b/sys/ofed/drivers/infiniband/core/mad.c @@ -34,6 +34,9 @@ * */ #include <linux/dma-mapping.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/string.h> #include <rdma/ib_cache.h> #include "mad_priv.h" @@ -46,8 +49,8 @@ MODULE_DESCRIPTION("kernel IB MAD API"); MODULE_AUTHOR("Hal Rosenstock"); MODULE_AUTHOR("Sean Hefty"); -int mad_sendq_size = IB_MAD_QP_SEND_SIZE; -int mad_recvq_size = IB_MAD_QP_RECV_SIZE; +static int mad_sendq_size = IB_MAD_QP_SEND_SIZE; +static int mad_recvq_size = IB_MAD_QP_RECV_SIZE; module_param_named(send_queue_size, mad_sendq_size, int, 0444); MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests"); @@ -59,9 +62,26 @@ static struct kmem_cache *ib_mad_cache; static struct list_head ib_mad_port_list; static u32 ib_mad_client_id = 0; -/* Port list lock */ -static spinlock_t ib_mad_port_list_lock; +/* + * Timeout FIFO (tf) param + */ +enum { + /* min time between 2 consecutive activations of tf workqueue */ + MIN_BETWEEN_ACTIVATIONS_MS = 5 +}; + +/* + * SA congestion control params + */ +enum { + MAX_OUTSTANDING_SA_MADS = 10, + MIN_TIME_FOR_SA_MAD_SEND_MS = 20, + MAX_SA_MADS = 10000 +}; + +/* Port list lock */ +static DEFINE_SPINLOCK(ib_mad_port_list_lock); /* Forward declarations */ static int method_in_use(struct ib_mad_mgmt_method_table **method, @@ -80,6 +100,509 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, u8 mgmt_class); static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, struct ib_mad_agent_private *agent_priv); +static int send_sa_cc_mad(struct ib_mad_send_wr_private *mad_send_wr, + u32 timeout_ms, u32 retries_left); + + +/* + * Timeout FIFO functions - implements FIFO with timeout mechanism + */ + +static void activate_timeout_handler_task(unsigned long data) +{ + struct to_fifo *tf; + + tf = (struct to_fifo *)data; + del_timer(&tf->timer); + queue_work(tf->workq, &tf->work); +} + +static unsigned long adjusted_time(unsigned long last, unsigned long next) +{ + unsigned long min_next; + + min_next = last + msecs_to_jiffies(MIN_BETWEEN_ACTIVATIONS_MS); + if (time_after(min_next, next)) + return min_next; + + return next; +} + +static void notify_failure(struct ib_mad_send_wr_private *mad_send_wr, + enum ib_wc_status status) +{ + struct ib_mad_send_wc mad_send_wc; + struct ib_mad_agent_private *mad_agent_priv; + + mad_send_wc.status = status; + mad_send_wc.vendor_err = 0; + mad_send_wc.send_buf = &mad_send_wr->send_buf; + mad_agent_priv = mad_send_wr->mad_agent_priv; + mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); +} + +static inline struct sa_cc_data * +get_cc_obj(struct ib_mad_send_wr_private *mad_send_wr) +{ + return &mad_send_wr->mad_agent_priv->qp_info->port_priv->sa_cc; +} + +static inline struct ib_mad_send_wr_private *tfe_to_mad(struct tf_entry *tfe) +{ + return container_of(tfe, struct ib_mad_send_wr_private, tf_list); +} + +static void timeout_handler_task(struct work_struct *work) +{ + struct tf_entry *tmp1, *tmp2; + struct list_head *list_item, exp_lst; + unsigned long flags, curr_time; + int lst_empty; + struct to_fifo *tf; + + tf = container_of(work, struct to_fifo, work); + do { + INIT_LIST_HEAD(&exp_lst); + + spin_lock_irqsave(&tf->lists_lock, flags); + curr_time = jiffies; + list_for_each(list_item, &tf->to_head) { + tmp1 = list_entry(list_item, struct tf_entry, to_list); + if (time_before(curr_time, tmp1->exp_time)) + break; + list_del(&tmp1->fifo_list); + tf->num_items--; + } + + /* cut list up to and including list_item->prev */ + list_cut_position(&exp_lst, &tf->to_head, list_item->prev); + spin_unlock_irqrestore(&tf->lists_lock, flags); + + lst_empty = list_empty(&exp_lst); + list_for_each_entry_safe(tmp1, tmp2, &exp_lst, to_list) { + list_del(&tmp1->to_list); + if (tmp1->canceled) { + tmp1->canceled = 0; + notify_failure(tfe_to_mad(tmp1), IB_WC_WR_FLUSH_ERR); + } else { + notify_failure(tfe_to_mad(tmp1), IB_WC_RESP_TIMEOUT_ERR); + } + } + } while (!lst_empty); + + spin_lock_irqsave(&tf->lists_lock, flags); + if (!list_empty(&tf->to_head)) { + tmp1 = list_entry(tf->to_head.next, struct tf_entry, to_list); + mod_timer(&tf->timer, adjusted_time(curr_time, tmp1->exp_time)); + } + spin_unlock_irqrestore(&tf->lists_lock, flags); +} + +/** + * tf_create - creates new timeout-fifo object + * @fifo_size: Maximum fifo size + * + * Allocate and initialize new timeout-fifo object + */ +static struct to_fifo *tf_create(u32 fifo_size) +{ + struct to_fifo *tf; + + tf = kzalloc(sizeof(*tf), GFP_KERNEL); + if (tf) { + tf->workq = create_singlethread_workqueue("to_fifo"); + if (!tf->workq) { + kfree(tf); + return NULL; + } + spin_lock_init(&tf->lists_lock); + INIT_LIST_HEAD(&tf->to_head); + INIT_LIST_HEAD(&tf->fifo_head); + init_timer(&tf->timer); + INIT_WORK(&tf->work, timeout_handler_task); + tf->timer.data = (unsigned long) tf; + tf->timer.function = activate_timeout_handler_task; + tf->timer.expires = jiffies; + tf->fifo_size = fifo_size; + tf->stop_enqueue = 0; + tf->num_items = 0; + } + + return tf; +} + +/** + * tf_enqueue - enqueue item to timeout-fifo object + * @tf:timeout-fifo object + * @item: item to enqueue. + * @timeout_ms: item expiration time in ms. + * + * Enqueue item to fifo and modify expiration timer when required. + * + * Returns 0 on success and negative on failure. + */ +static int tf_enqueue(struct to_fifo *tf, struct tf_entry *item, u32 timeout_ms) +{ + struct tf_entry *tmp; + struct list_head *list_item; + unsigned long flags; + + item->exp_time = jiffies + msecs_to_jiffies(timeout_ms); + + spin_lock_irqsave(&tf->lists_lock, flags); + if (tf->num_items >= tf->fifo_size || tf->stop_enqueue) { + spin_unlock_irqrestore(&tf->lists_lock, flags); + return -EBUSY; + } + + /* Insert item to timeout list */ + list_for_each_prev(list_item, &tf->to_head) { + tmp = list_entry(list_item, struct tf_entry, to_list); + if (time_after(item->exp_time, tmp->exp_time)) + break; + } + + list_add(&item->to_list, list_item); + + /* Insert item to fifo list */ + list_add_tail(&item->fifo_list, &tf->fifo_head); + + tf->num_items++; + + /* modify expiration timer if required */ + if (list_item == &tf->to_head) + mod_timer(&tf->timer, item->exp_time); + + spin_unlock_irqrestore(&tf->lists_lock, flags); + + return 0; +} + +/** + * tf_dequeue - dequeue item from timeout-fifo object + * @tf:timeout-fifo object + * @time_left_ms: returns the time left for expiration in ms. + * + * Dequeue item from fifo and modify expiration timer when required. + * + * Returns pointer to tf_entry on success and NULL on failure. + */ +static struct tf_entry *tf_dequeue(struct to_fifo *tf, u32 *time_left_ms) +{ + unsigned long flags; + unsigned long time_left; + struct tf_entry *tmp, *tmp1; + + spin_lock_irqsave(&tf->lists_lock, flags); + if (list_empty(&tf->fifo_head)) { + spin_unlock_irqrestore(&tf->lists_lock, flags); + return NULL; + } + + list_for_each_entry(tmp, &tf->fifo_head, fifo_list) { + if (!tmp->canceled) + break; + } + + if (tmp->canceled) { + spin_unlock_irqrestore(&tf->lists_lock, flags); + return NULL; + } + + /* modify timer in case enqueued item is the next to expire */ + if (tf->to_head.next == &tmp->to_list) { + if (list_is_last(&tmp->to_list, &tf->to_head)) { + del_timer(&tf->timer); + } else { + tmp1 = list_entry(tmp->to_list.next, struct tf_entry, to_list); + mod_timer(&tf->timer, tmp1->exp_time); + } + } + list_del(&tmp->fifo_list); + list_del(&tmp->to_list); + tf->num_items--; + spin_unlock_irqrestore(&tf->lists_lock, flags); + + time_left = tmp->exp_time - jiffies; + if ((long) time_left <= 0) + time_left = 0; + *time_left_ms = jiffies_to_msecs(time_left); + + return tmp; +} + +static void tf_stop_enqueue(struct to_fifo *tf) +{ + unsigned long flags; + + spin_lock_irqsave(&tf->lists_lock, flags); + tf->stop_enqueue = 1; + spin_unlock_irqrestore(&tf->lists_lock, flags); +} + +/** + * tf_free - free empty timeout-fifo object + * @tf:timeout-fifo object + * + */ +static void tf_free(struct to_fifo *tf) +{ + del_timer_sync(&tf->timer); + flush_workqueue(tf->workq); + destroy_workqueue(tf->workq); + + kfree(tf); +} + +/** + * tf_free_agent - free MADs related to specific MAD agent from timeout-fifo + * @tf:timeout-fifo object + * @mad_agent_priv: MAD agent. + * + */ +static void tf_free_agent(struct to_fifo *tf, struct ib_mad_agent_private *mad_agent_priv) +{ + unsigned long flags; + struct tf_entry *tmp, *tmp1; + struct list_head tmp_head; + + INIT_LIST_HEAD(&tmp_head); + spin_lock_irqsave(&tf->lists_lock, flags); + list_for_each_entry_safe(tmp, tmp1, &tf->fifo_head, fifo_list) { + if (tfe_to_mad(tmp)->mad_agent_priv == mad_agent_priv) { + list_del(&tmp->to_list); + list_move(&tmp->fifo_list, &tmp_head); + tf->num_items--; + } + } + spin_unlock_irqrestore(&tf->lists_lock, flags); + + list_for_each_entry_safe(tmp, tmp1, &tmp_head, fifo_list) { + list_del(&tmp->fifo_list); + notify_failure(tfe_to_mad(tmp), IB_WC_WR_FLUSH_ERR); + } +} + +/** + * tf_modify_item - to modify expiration time for specific item + * @tf:timeout-fifo object + * @mad_agent_priv: MAD agent. + * @send_buf: the MAD to modify in queue + * @timeout_ms: new timeout to set. + * + * Returns 0 if item found on list and -ENXIO if not. + * + * Note: The send_buf may point on MAD that is already released. + * Therefore we can't use this struct before finding it in the list + */ +static int tf_modify_item(struct to_fifo *tf, + struct ib_mad_agent_private *mad_agent_priv, + struct ib_mad_send_buf *send_buf, u32 timeout_ms) +{ + struct tf_entry *tmp, *item; + struct list_head *list_item; + unsigned long flags; + int found = 0; + + spin_lock_irqsave(&tf->lists_lock, flags); + list_for_each_entry(item, &tf->fifo_head, fifo_list) { + if (tfe_to_mad(item)->mad_agent_priv == mad_agent_priv && + &tfe_to_mad(item)->send_buf == send_buf) { + found = 1; + break; + } + } + + if (!found) { + spin_unlock_irqrestore(&tf->lists_lock, flags); + return -ENXIO; + } + + item->exp_time = jiffies + msecs_to_jiffies(timeout_ms); + + if (timeout_ms) { + list_del(&item->to_list); + list_for_each_prev(list_item, &tf->to_head) { + tmp = list_entry(list_item, struct tf_entry, to_list); + if (time_after(item->exp_time, tmp->exp_time)) + break; + } + list_add(&item->to_list, list_item); + + /* modify expiration timer if required */ + if (list_item == &tf->to_head) + mod_timer(&tf->timer, item->exp_time); + } else { + /* + * when item canceled (timeout_ms == 0) move item to + * head of timeout list and to the tail of fifo list + */ + item->canceled = 1; + list_move(&item->to_list, &tf->to_head); + list_move_tail(&item->fifo_list, &tf->fifo_head); + mod_timer(&tf->timer, item->exp_time); + } + spin_unlock_irqrestore(&tf->lists_lock, flags); + + return 0; +} + +/* + * SA congestion control functions + */ + +/* + * Defines which MAD is under congestion control. + */ +static int is_sa_cc_mad(struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_mad_hdr *mad; + + mad = (struct ib_mad_hdr *)mad_send_wr->send_buf.mad; + + return ((mad_send_wr->send_buf.timeout_ms) && + (mad->mgmt_class == IB_MGMT_CLASS_SUBN_ADM) && + ((mad->method == IB_MGMT_METHOD_GET) || + (mad->method == IB_MGMT_METHOD_SET))); +} + +/* + * Notify that SA congestion controlled MAD is done. + * to allow dequeuing SA MAD from congestion control queue. + */ +static void sa_cc_mad_done(struct sa_cc_data *cc_obj) +{ + unsigned long flags; + struct tf_entry *tfe; + struct ib_mad_send_wr_private *mad_send_wr; + u32 time_left_ms, timeout_ms, retries; + int ret; + + do { + spin_lock_irqsave(&cc_obj->lock, flags); + tfe = tf_dequeue(cc_obj->tf, &time_left_ms); + if (!tfe) { + if (cc_obj->outstanding > 0) + cc_obj->outstanding--; + spin_unlock_irqrestore(&cc_obj->lock, flags); + break; + } + spin_unlock_irqrestore(&cc_obj->lock, flags); + mad_send_wr = tfe_to_mad(tfe); + time_left_ms += MIN_TIME_FOR_SA_MAD_SEND_MS; + if (time_left_ms > mad_send_wr->send_buf.timeout_ms) { + retries = time_left_ms / mad_send_wr->send_buf.timeout_ms - 1; + timeout_ms = mad_send_wr->send_buf.timeout_ms; + } else { + retries = 0; + timeout_ms = time_left_ms; + } + ret = send_sa_cc_mad(mad_send_wr, timeout_ms, retries); + if (ret) { + if (ret == -ENOMEM) + notify_failure(mad_send_wr, IB_WC_GENERAL_ERR); + else + notify_failure(mad_send_wr, IB_WC_LOC_QP_OP_ERR); + } + } while (ret); +} + +/* + * Send SA MAD under congestion control. + */ +static int sa_cc_mad_send(struct ib_mad_send_wr_private *mad_send_wr) +{ + unsigned long flags; + int ret; + struct sa_cc_data *cc_obj; + + cc_obj = get_cc_obj(mad_send_wr); + spin_lock_irqsave(&cc_obj->lock, flags); + if (cc_obj->outstanding < MAX_OUTSTANDING_SA_MADS) { + cc_obj->outstanding++; + spin_unlock_irqrestore(&cc_obj->lock, flags); + ret = send_sa_cc_mad(mad_send_wr, mad_send_wr->send_buf.timeout_ms, + mad_send_wr->retries_left); + if (ret) + sa_cc_mad_done(cc_obj); + + } else { + int qtime = (mad_send_wr->send_buf.timeout_ms * + (mad_send_wr->retries_left + 1)) + - MIN_TIME_FOR_SA_MAD_SEND_MS; + + if (qtime < 0) + qtime = 0; + ret = tf_enqueue(cc_obj->tf, &mad_send_wr->tf_list, (u32)qtime); + + spin_unlock_irqrestore(&cc_obj->lock, flags); + } + + return ret; +} + +/* + * Initialize SA congestion control. + */ +static int sa_cc_init(struct sa_cc_data *cc_obj) +{ + spin_lock_init(&cc_obj->lock); + cc_obj->outstanding = 0; + cc_obj->tf = tf_create(MAX_SA_MADS); + if (!cc_obj->tf) + return -ENOMEM; + return 0; +} + +/* + * Cancel SA MADs from congestion control queue. + */ +static void cancel_sa_cc_mads(struct ib_mad_agent_private *mad_agent_priv) +{ + tf_free_agent(mad_agent_priv->qp_info->port_priv->sa_cc.tf, + mad_agent_priv); +} + +/* + * Modify timeout of SA MAD on congestion control queue. + */ +static int modify_sa_cc_mad(struct ib_mad_agent_private *mad_agent_priv, + struct ib_mad_send_buf *send_buf, u32 timeout_ms) +{ + int ret; + int qtime = 0; + + if (timeout_ms > MIN_TIME_FOR_SA_MAD_SEND_MS) + qtime = timeout_ms - MIN_TIME_FOR_SA_MAD_SEND_MS; + + ret = tf_modify_item(mad_agent_priv->qp_info->port_priv->sa_cc.tf, + mad_agent_priv, send_buf, (u32)qtime); + return ret; +} + +static void sa_cc_destroy(struct sa_cc_data *cc_obj) +{ + struct ib_mad_send_wr_private *mad_send_wr; + struct tf_entry *tfe; + struct ib_mad_send_wc mad_send_wc; + struct ib_mad_agent_private *mad_agent_priv; + u32 time_left_ms; + + mad_send_wc.status = IB_WC_WR_FLUSH_ERR; + mad_send_wc.vendor_err = 0; + + tf_stop_enqueue(cc_obj->tf); + tfe = tf_dequeue(cc_obj->tf, &time_left_ms); + while (tfe) { + mad_send_wr = tfe_to_mad(tfe); + mad_send_wc.send_buf = &mad_send_wr->send_buf; + mad_agent_priv = mad_send_wr->mad_agent_priv; + mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, + &mad_send_wc); + tfe = tf_dequeue(cc_obj->tf, &time_left_ms); + } + tf_free(cc_obj->tf); +} /* * Returns a ib_mad_port_private structure or NULL for a device/port @@ -184,15 +707,6 @@ int ib_response_mad(struct ib_mad *mad) } EXPORT_SYMBOL(ib_response_mad); -static void timeout_callback(unsigned long data) -{ - struct ib_mad_agent_private *mad_agent_priv = - (struct ib_mad_agent_private *) data; - - queue_work(mad_agent_priv->qp_info->port_priv->wq, - &mad_agent_priv->timeout_work); -} - /* * ib_register_mad_agent - Register to send/receive MADs */ @@ -285,6 +799,13 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, goto error1; } + /* Verify the QP requested is supported. For example, Ethernet devices + * will not have QP0 */ + if (!port_priv->qp_info[qpn].qp) { + ret = ERR_PTR(-EPROTONOSUPPORT); + goto error1; + } + /* Allocate structures */ mad_agent_priv = kzalloc(sizeof *mad_agent_priv, GFP_KERNEL); if (!mad_agent_priv) { @@ -300,13 +821,11 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, } if (mad_reg_req) { - reg_req = kmalloc(sizeof *reg_req, GFP_KERNEL); + reg_req = kmemdup(mad_reg_req, sizeof *reg_req, GFP_KERNEL); if (!reg_req) { ret = ERR_PTR(-ENOMEM); goto error3; } - /* Make a copy of the MAD registration request */ - memcpy(reg_req, mad_reg_req, sizeof *reg_req); } /* Now, fill in the various structures */ @@ -324,9 +843,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, INIT_LIST_HEAD(&mad_agent_priv->wait_list); INIT_LIST_HEAD(&mad_agent_priv->done_list); INIT_LIST_HEAD(&mad_agent_priv->rmpp_list); - INIT_WORK(&mad_agent_priv->timeout_work, timeout_sends); - setup_timer(&mad_agent_priv->timeout_timer, timeout_callback, - (unsigned long) mad_agent_priv); + INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends); INIT_LIST_HEAD(&mad_agent_priv->local_list); INIT_WORK(&mad_agent_priv->local_work, local_completions); atomic_set(&mad_agent_priv->refcount, 1); @@ -533,8 +1050,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) */ cancel_mads(mad_agent_priv); port_priv = mad_agent_priv->qp_info->port_priv; - del_timer_sync(&mad_agent_priv->timeout_timer); - cancel_work_sync(&mad_agent_priv->timeout_work); + cancel_delayed_work(&mad_agent_priv->timed_work); spin_lock_irqsave(&port_priv->reg_lock, flags); remove_mad_reg_req(mad_agent_priv); @@ -577,6 +1093,7 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent) struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_snoop_private *mad_snoop_priv; + if (!IS_ERR(mad_agent)) { /* If the TID is zero, the agent can only snoop. */ if (mad_agent->hi_tid) { mad_agent_priv = container_of(mad_agent, @@ -589,6 +1106,8 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent) agent); unregister_mad_snoop(mad_snoop_priv); } + } + return 0; } EXPORT_SYMBOL(ib_unregister_mad_agent); @@ -695,7 +1214,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, struct ib_wc mad_wc; struct ib_send_wr *send_wr = &mad_send_wr->send_wr; - if (device->node_type == RDMA_NODE_IB_SWITCH) + if (device->node_type == RDMA_NODE_IB_SWITCH && + smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) port_num = send_wr->wr.ud.port_num; else port_num = mad_agent_priv->agent.port_num; @@ -1028,12 +1548,20 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) mad_send_wr->send_buf.mad, sge[0].length, DMA_TO_DEVICE); - mad_send_wr->header_mapping = sge[0].addr; + if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[0].addr))) + return -ENOMEM; sge[1].addr = ib_dma_map_single(mad_agent->device, ib_get_payload(mad_send_wr), sge[1].length, DMA_TO_DEVICE); + + if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[1].addr))) { + ret = -ENOMEM; + goto dma1_err; + } + + mad_send_wr->header_mapping = sge[0].addr; mad_send_wr->payload_mapping = sge[1].addr; spin_lock_irqsave(&qp_info->send_queue.lock, flags); @@ -1051,14 +1579,51 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) list_add_tail(&mad_send_wr->mad_list.list, list); } spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); - if (ret) { + + if (!ret) + return 0; + ib_dma_unmap_single(mad_agent->device, mad_send_wr->header_mapping, - sge[0].length, DMA_TO_DEVICE); + sge[1].length, DMA_TO_DEVICE); +dma1_err: ib_dma_unmap_single(mad_agent->device, mad_send_wr->payload_mapping, - sge[1].length, DMA_TO_DEVICE); + sge[0].length, DMA_TO_DEVICE); + return ret; +} + +/* + * Send SA MAD that passed congestion control + */ +static int send_sa_cc_mad(struct ib_mad_send_wr_private *mad_send_wr, + u32 timeout_ms, u32 retries_left) +{ + int ret; + unsigned long flags; + struct ib_mad_agent_private *mad_agent_priv; + + mad_agent_priv = mad_send_wr->mad_agent_priv; + mad_send_wr->timeout = msecs_to_jiffies(timeout_ms); + mad_send_wr->retries_left = retries_left; + mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0); + + /* Reference MAD agent until send completes */ + atomic_inc(&mad_agent_priv->refcount); + spin_lock_irqsave(&mad_agent_priv->lock, flags); + list_add_tail(&mad_send_wr->agent_list, + &mad_agent_priv->send_list); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + + ret = ib_send_mad(mad_send_wr); + if (ret < 0) { + /* Fail send request */ + spin_lock_irqsave(&mad_agent_priv->lock, flags); + list_del(&mad_send_wr->agent_list); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + atomic_dec(&mad_agent_priv->refcount); } + return ret; } @@ -1125,6 +1690,12 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0); mad_send_wr->status = IB_WC_SUCCESS; + if (is_sa_cc_mad(mad_send_wr)) { + mad_send_wr->is_sa_cc_mad = 1; + ret = sa_cc_mad_send(mad_send_wr); + if (ret < 0) + goto error; + } else { /* Reference MAD agent until send completes */ atomic_inc(&mad_agent_priv->refcount); spin_lock_irqsave(&mad_agent_priv->lock, flags); @@ -1147,6 +1718,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, goto error; } } + } return 0; error: if (bad_send_buf) @@ -1206,10 +1778,7 @@ static int method_in_use(struct ib_mad_mgmt_method_table **method, { int i; - for (i = find_first_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS); - i < IB_MGMT_MAX_METHODS; - i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS, - 1+i)) { + for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) { if ((*method)->agent[i]) { printk(KERN_ERR PFX "Method %d already in use\n", i); return -EINVAL; @@ -1343,13 +1912,9 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, goto error3; /* Finally, add in methods being registered */ - for (i = find_first_bit(mad_reg_req->method_mask, - IB_MGMT_MAX_METHODS); - i < IB_MGMT_MAX_METHODS; - i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS, - 1+i)) { + for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) (*method)->agent[i] = agent_priv; - } + return 0; error3: @@ -1442,13 +2007,9 @@ check_in_use: goto error4; /* Finally, add in methods being registered */ - for (i = find_first_bit(mad_reg_req->method_mask, - IB_MGMT_MAX_METHODS); - i < IB_MGMT_MAX_METHODS; - i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS, - 1+i)) { + for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) (*method)->agent[i] = agent_priv; - } + return 0; error4: @@ -1614,6 +2175,9 @@ find_mad_agent(struct ib_mad_port_private *port_priv, mad->mad_hdr.class_version].class; if (!class) goto out; + if (convert_mgmt_class(mad->mad_hdr.mgmt_class) >= + IB_MGMT_MAX_METHODS) + goto out; method = class->method_table[convert_mgmt_class( mad->mad_hdr.mgmt_class)]; if (method) @@ -1856,6 +2420,26 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, } } +static bool generate_unmatched_resp(struct ib_mad_private *recv, + struct ib_mad_private *response) +{ + if (recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_GET || + recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_SET) { + memcpy(response, recv, sizeof *response); + response->header.recv_wc.wc = &response->header.wc; + response->header.recv_wc.recv_buf.mad = &response->mad.mad; + response->header.recv_wc.recv_buf.grh = &response->grh; + response->mad.mad.mad_hdr.method = IB_MGMT_METHOD_GET_RESP; + response->mad.mad.mad_hdr.status = + cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB); + if (recv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + response->mad.mad.mad_hdr.status |= IB_SMP_DIRECTION; + + return true; + } else { + return false; + } +} static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, struct ib_wc *wc) { @@ -1865,6 +2449,7 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, struct ib_mad_list_head *mad_list; struct ib_mad_agent_private *mad_agent; int port_num; + int ret = IB_MAD_RESULT_SUCCESS; mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id; qp_info = mad_list->mad_queue->qp_info; @@ -1948,8 +2533,6 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, local: /* Give driver "right of first refusal" on incoming MAD */ if (port_priv->device->process_mad) { - int ret; - ret = port_priv->device->process_mad(port_priv->device, 0, port_priv->port_num, wc, &recv->grh, @@ -1977,6 +2560,10 @@ local: * or via recv_handler in ib_mad_complete_recv() */ recv = NULL; + } else if ((ret & IB_MAD_RESULT_SUCCESS) && + generate_unmatched_resp(recv, response)) { + agent_send_response(&response->mad.mad, &recv->grh, wc, + port_priv->device, port_num, qp_info->qp->qp_num); } out: @@ -1992,9 +2579,10 @@ out: static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv) { struct ib_mad_send_wr_private *mad_send_wr; + unsigned long delay; if (list_empty(&mad_agent_priv->wait_list)) { - del_timer(&mad_agent_priv->timeout_timer); + cancel_delayed_work(&mad_agent_priv->timed_work); } else { mad_send_wr = list_entry(mad_agent_priv->wait_list.next, struct ib_mad_send_wr_private, @@ -2003,8 +2591,11 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv) if (time_after(mad_agent_priv->timeout, mad_send_wr->timeout)) { mad_agent_priv->timeout = mad_send_wr->timeout; - mod_timer(&mad_agent_priv->timeout_timer, - mad_send_wr->timeout); + delay = mad_send_wr->timeout - jiffies; + if ((long)delay <= 0) + delay = 1; + mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq, + &mad_agent_priv->timed_work, delay); } } } @@ -2031,14 +2622,15 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr) temp_mad_send_wr->timeout)) break; } - } else + } + else list_item = &mad_agent_priv->wait_list; list_add(&mad_send_wr->agent_list, list_item); /* Reschedule a work item if we have a shorter timeout */ if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) - mod_timer(&mad_agent_priv->timeout_timer, - mad_send_wr->timeout); + mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq, + &mad_agent_priv->timed_work, delay); } void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr, @@ -2090,9 +2682,12 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, mad_send_wc->status = mad_send_wr->status; if (ret == IB_RMPP_RESULT_INTERNAL) ib_rmpp_send_handler(mad_send_wc); - else + else { + if (mad_send_wr->is_sa_cc_mad) + sa_cc_mad_done(get_cc_obj(mad_send_wr)); mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, mad_send_wc); + } /* Release reference on agent taken when sending */ deref_mad_agent(mad_agent_priv); @@ -2272,6 +2867,7 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) INIT_LIST_HEAD(&cancel_list); + cancel_sa_cc_mads(mad_agent_priv); spin_lock_irqsave(&mad_agent_priv->lock, flags); list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, &mad_agent_priv->send_list, agent_list) { @@ -2293,6 +2889,8 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) &cancel_list, agent_list) { mad_send_wc.send_buf = &mad_send_wr->send_buf; list_del(&mad_send_wr->agent_list); + if (mad_send_wr->is_sa_cc_mad) + sa_cc_mad_done(get_cc_obj(mad_send_wr)); mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); atomic_dec(&mad_agent_priv->refcount); @@ -2332,7 +2930,13 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, agent); spin_lock_irqsave(&mad_agent_priv->lock, flags); mad_send_wr = find_send_wr(mad_agent_priv, send_buf); - if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) { + if (!mad_send_wr) { + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + if (modify_sa_cc_mad(mad_agent_priv, send_buf, timeout_ms)) + return -EINVAL; + return 0; + } + if (mad_send_wr->status != IB_WC_SUCCESS) { spin_unlock_irqrestore(&mad_agent_priv->lock, flags); return -EINVAL; } @@ -2482,10 +3086,10 @@ static void timeout_sends(struct work_struct *work) struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; struct ib_mad_send_wc mad_send_wc; - unsigned long flags; + unsigned long flags, delay; mad_agent_priv = container_of(work, struct ib_mad_agent_private, - timeout_work); + timed_work.work); mad_send_wc.vendor_err = 0; spin_lock_irqsave(&mad_agent_priv->lock, flags); @@ -2495,8 +3099,12 @@ static void timeout_sends(struct work_struct *work) agent_list); if (time_after(mad_send_wr->timeout, jiffies)) { - mod_timer(&mad_agent_priv->timeout_timer, - mad_send_wr->timeout); + delay = mad_send_wr->timeout - jiffies; + if ((long)delay <= 0) + delay = 1; + queue_delayed_work(mad_agent_priv->qp_info-> + port_priv->wq, + &mad_agent_priv->timed_work, delay); break; } @@ -2512,6 +3120,8 @@ static void timeout_sends(struct work_struct *work) else mad_send_wc.status = mad_send_wr->status; mad_send_wc.send_buf = &mad_send_wr->send_buf; + if (mad_send_wr->is_sa_cc_mad) + sa_cc_mad_done(get_cc_obj(mad_send_wr)); mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); @@ -2572,6 +3182,14 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, sizeof *mad_priv - sizeof mad_priv->header, DMA_FROM_DEVICE); + if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device, + sg_list.addr))) { + ret = -ENOMEM; + kmem_cache_free(ib_mad_cache, mad_priv); + printk(KERN_ERR PFX "ib_dma_map_single failed\n"); + break; + } + mad_priv->header.mapping = sg_list.addr; recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list; mad_priv->header.mad_list.mad_queue = recv_queue; @@ -2645,6 +3263,7 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) int ret, i; struct ib_qp_attr *attr; struct ib_qp *qp; + u16 pkey_index = 0; attr = kmalloc(sizeof *attr, GFP_KERNEL); if (!attr) { @@ -2652,6 +3271,11 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) return -ENOMEM; } + ret = ib_find_pkey(port_priv->device, port_priv->port_num, + 0xFFFF, &pkey_index); + if (ret) + pkey_index = 0; + for (i = 0; i < IB_MAD_QPS_CORE; i++) { qp = port_priv->qp_info[i].qp; if (!qp) @@ -2662,7 +3286,7 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) * one is needed for the Reset to Init transition */ attr->qp_state = IB_QPS_INIT; - attr->pkey_index = 0; + attr->pkey_index = pkey_index; attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY; ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY); @@ -2858,6 +3482,10 @@ static int ib_mad_port_open(struct ib_device *device, } INIT_WORK(&port_priv->work, ib_mad_completion_handler); + if (sa_cc_init(&port_priv->sa_cc)) + goto error9; + + spin_lock_irqsave(&ib_mad_port_list_lock, flags); list_add_tail(&port_priv->port_list, &ib_mad_port_list); spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); @@ -2865,17 +3493,19 @@ static int ib_mad_port_open(struct ib_device *device, ret = ib_mad_port_start(port_priv); if (ret) { printk(KERN_ERR PFX "Couldn't start port\n"); - goto error9; + goto error10; } return 0; -error9: +error10: spin_lock_irqsave(&ib_mad_port_list_lock, flags); list_del_init(&port_priv->port_list); spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); destroy_workqueue(port_priv->wq); +error9: + sa_cc_destroy(&port_priv->sa_cc); error8: destroy_mad_qp(&port_priv->qp_info[1]); error7: @@ -2915,6 +3545,7 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); destroy_workqueue(port_priv->wq); + sa_cc_destroy(&port_priv->sa_cc); destroy_mad_qp(&port_priv->qp_info[1]); destroy_mad_qp(&port_priv->qp_info[0]); ib_dereg_mr(port_priv->mr); @@ -2983,6 +3614,9 @@ static void ib_mad_remove_device(struct ib_device *device) { int i, num_ports, cur_port; + if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + return; + if (device->node_type == RDMA_NODE_IB_SWITCH) { num_ports = 1; cur_port = 0; @@ -3017,8 +3651,6 @@ static int __init ib_mad_init_module(void) mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE); mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE); - spin_lock_init(&ib_mad_port_list_lock); - ib_mad_cache = kmem_cache_create("ib_mad", sizeof(struct ib_mad_private), 0, @@ -3054,4 +3686,3 @@ static void __exit ib_mad_cleanup_module(void) module_init(ib_mad_init_module); module_exit(ib_mad_cleanup_module); - diff --git a/sys/ofed/drivers/infiniband/core/mad_priv.h b/sys/ofed/drivers/infiniband/core/mad_priv.h index 8b4df0a..e2cd0ac 100644 --- a/sys/ofed/drivers/infiniband/core/mad_priv.h +++ b/sys/ofed/drivers/infiniband/core/mad_priv.h @@ -102,8 +102,7 @@ struct ib_mad_agent_private { struct list_head send_list; struct list_head wait_list; struct list_head done_list; - struct work_struct timeout_work; - struct timer_list timeout_timer; + struct delayed_work timed_work; unsigned long timeout; struct list_head local_list; struct work_struct local_work; @@ -122,6 +121,14 @@ struct ib_mad_snoop_private { struct completion comp; }; +/* Structure for timeout-fifo entry */ +struct tf_entry { + unsigned long exp_time; /* entry expiration time */ + struct list_head fifo_list; /* to keep entries in fifo order */ + struct list_head to_list; /* to keep entries in timeout order */ + int canceled; /* indicates whether entry is canceled */ +}; + struct ib_mad_send_wr_private { struct ib_mad_list_head mad_list; struct list_head agent_list; @@ -147,6 +154,10 @@ struct ib_mad_send_wr_private { int seg_num; int newwin; int pad; + + /* SA congestion controlled MAD */ + int is_sa_cc_mad; + struct tf_entry tf_list; }; struct ib_mad_local_private { @@ -198,6 +209,25 @@ struct ib_mad_qp_info { atomic_t snoop_count; }; +struct to_fifo { + struct list_head to_head; + struct list_head fifo_head; + spinlock_t lists_lock; + struct timer_list timer; + struct work_struct work; + u32 fifo_size; + u32 num_items; + int stop_enqueue; + struct workqueue_struct *workq; +}; + +/* SA congestion control data */ +struct sa_cc_data { + spinlock_t lock; + unsigned long outstanding; + struct to_fifo *tf; +}; + struct ib_mad_port_private { struct list_head port_list; struct ib_device *device; @@ -212,6 +242,7 @@ struct ib_mad_port_private { struct workqueue_struct *wq; struct work_struct work; struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE]; + struct sa_cc_data sa_cc; }; int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr); diff --git a/sys/ofed/drivers/infiniband/core/mad_rmpp.c b/sys/ofed/drivers/infiniband/core/mad_rmpp.c index 4e0f282..f37878c 100644 --- a/sys/ofed/drivers/infiniband/core/mad_rmpp.c +++ b/sys/ofed/drivers/infiniband/core/mad_rmpp.c @@ -31,6 +31,8 @@ * SOFTWARE. */ +#include <linux/slab.h> + #include "mad_priv.h" #include "mad_rmpp.h" diff --git a/sys/ofed/drivers/infiniband/core/multicast.c b/sys/ofed/drivers/infiniband/core/multicast.c index f8d7ef8..ef595b2 100644 --- a/sys/ofed/drivers/infiniband/core/multicast.c +++ b/sys/ofed/drivers/infiniband/core/multicast.c @@ -34,12 +34,27 @@ #include <linux/dma-mapping.h> #include <linux/err.h> #include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/slab.h> #include <linux/bitops.h> #include <linux/random.h> +#include <linux/moduleparam.h> +#include <linux/rbtree.h> #include <rdma/ib_cache.h> #include "sa.h" +static int mcast_leave_retries = 3; + +/*static const struct kernel_param_ops retry_ops = { + .set = param_set_int, + .get = param_get_int, +}; + +module_param_cb(mcast_leave_retries, &retry_ops, &mcast_leave_retries, 0644); +MODULE_PARM_DESC(mcast_leave_retries, "Number of retries for multicast leave " + "requests before giving up (default: 3)"); +*/ static void mcast_add_one(struct ib_device *device); static void mcast_remove_one(struct ib_device *device); @@ -250,6 +265,34 @@ static u8 get_leave_state(struct mcast_group *group) return leave_state & group->rec.join_state; } +static int check_selector(ib_sa_comp_mask comp_mask, + ib_sa_comp_mask selector_mask, + ib_sa_comp_mask value_mask, + u8 selector, u8 src_value, u8 dst_value) +{ + int err; + + if (!(comp_mask & selector_mask) || !(comp_mask & value_mask)) + return 0; + + switch (selector) { + case IB_SA_GT: + err = (src_value <= dst_value); + break; + case IB_SA_LT: + err = (src_value >= dst_value); + break; + case IB_SA_EQ: + err = (src_value != dst_value); + break; + default: + err = 0; + break; + } + + return err; +} + static int cmp_rec(struct ib_sa_mcmember_rec *src, struct ib_sa_mcmember_rec *dst, ib_sa_comp_mask comp_mask) { @@ -262,7 +305,7 @@ static int cmp_rec(struct ib_sa_mcmember_rec *src, return -EINVAL; if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid) return -EINVAL; - if (ib_sa_check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR, + if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR, IB_SA_MCMEMBER_REC_MTU, dst->mtu_selector, src->mtu, dst->mtu)) return -EINVAL; @@ -271,11 +314,11 @@ static int cmp_rec(struct ib_sa_mcmember_rec *src, return -EINVAL; if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey) return -EINVAL; - if (ib_sa_check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR, + if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR, IB_SA_MCMEMBER_REC_RATE, dst->rate_selector, src->rate, dst->rate)) return -EINVAL; - if (ib_sa_check_selector(comp_mask, + if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR, IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME, dst->packet_life_time_selector, @@ -517,11 +560,15 @@ static void leave_handler(int status, struct ib_sa_mcmember_rec *rec, { struct mcast_group *group = context; - if (status && (group->retries > 0) && + if (status && group->retries > 0 && !send_leave(group, group->leave_state)) group->retries--; - else + else { + if (status && group->retries <= 0) + printk(KERN_WARNING "reached max retry count. " + "status=%d. Giving up\n", status); mcast_work_handler(&group->work); + } } static struct mcast_group *acquire_group(struct mcast_port *port, @@ -544,7 +591,7 @@ static struct mcast_group *acquire_group(struct mcast_port *port, if (!group) return NULL; - group->retries = 3; + group->retries = mcast_leave_retries; group->port = port; group->rec.mgid = *mgid; group->pkey_index = MCAST_INVALID_PKEY_INDEX; @@ -754,7 +801,6 @@ static void mcast_event_handler(struct ib_event_handler *handler, switch (event->event) { case IB_EVENT_PORT_ERR: case IB_EVENT_LID_CHANGE: - case IB_EVENT_SM_CHANGE: case IB_EVENT_CLIENT_REREGISTER: mcast_groups_event(&dev->port[index], MCAST_GROUP_ERROR); break; diff --git a/sys/ofed/drivers/infiniband/core/notice.c b/sys/ofed/drivers/infiniband/core/notice.c deleted file mode 100644 index ca91d96d..0000000 --- a/sys/ofed/drivers/infiniband/core/notice.c +++ /dev/null @@ -1,749 +0,0 @@ -/* - * Copyright (c) 2006 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/completion.h> -#include <linux/dma-mapping.h> -#include <linux/err.h> -#include <linux/interrupt.h> -#include <linux/pci.h> -#include <linux/bitops.h> -#include <linux/random.h> - -#include "sa.h" - -MODULE_AUTHOR("Sean Hefty"); -MODULE_DESCRIPTION("InfiniBand InformInfo & Notice event handling"); -MODULE_LICENSE("Dual BSD/GPL"); - -static void inform_add_one(struct ib_device *device); -static void inform_remove_one(struct ib_device *device); - -static struct ib_client inform_client = { - .name = "ib_notice", - .add = inform_add_one, - .remove = inform_remove_one -}; - -static struct ib_sa_client sa_client; -static struct workqueue_struct *inform_wq; - -struct inform_device; - -struct inform_port { - struct inform_device *dev; - spinlock_t lock; - struct rb_root table; - atomic_t refcount; - struct completion comp; - u8 port_num; -}; - -struct inform_device { - struct ib_device *device; - struct ib_event_handler event_handler; - int start_port; - int end_port; - struct inform_port port[0]; -}; - -enum inform_state { - INFORM_IDLE, - INFORM_REGISTERING, - INFORM_MEMBER, - INFORM_BUSY, - INFORM_ERROR -}; - -struct inform_member; - -struct inform_group { - u16 trap_number; - struct rb_node node; - struct inform_port *port; - spinlock_t lock; - struct work_struct work; - struct list_head pending_list; - struct list_head active_list; - struct list_head notice_list; - struct inform_member *last_join; - int members; - enum inform_state join_state; /* State relative to SA */ - atomic_t refcount; - enum inform_state state; - struct ib_sa_query *query; - int query_id; -}; - -struct inform_member { - struct ib_inform_info info; - struct ib_sa_client *client; - struct inform_group *group; - struct list_head list; - enum inform_state state; - atomic_t refcount; - struct completion comp; -}; - -struct inform_notice { - struct list_head list; - struct ib_sa_notice notice; -}; - -static void reg_handler(int status, struct ib_sa_inform *inform, - void *context); -static void unreg_handler(int status, struct ib_sa_inform *inform, - void *context); - -static struct inform_group *inform_find(struct inform_port *port, - u16 trap_number) -{ - struct rb_node *node = port->table.rb_node; - struct inform_group *group; - - while (node) { - group = rb_entry(node, struct inform_group, node); - if (trap_number < group->trap_number) - node = node->rb_left; - else if (trap_number > group->trap_number) - node = node->rb_right; - else - return group; - } - return NULL; -} - -static struct inform_group *inform_insert(struct inform_port *port, - struct inform_group *group) -{ - struct rb_node **link = &port->table.rb_node; - struct rb_node *parent = NULL; - struct inform_group *cur_group; - - while (*link) { - parent = *link; - cur_group = rb_entry(parent, struct inform_group, node); - if (group->trap_number < cur_group->trap_number) - link = &(*link)->rb_left; - else if (group->trap_number > cur_group->trap_number) - link = &(*link)->rb_right; - else - return cur_group; - } - rb_link_node(&group->node, parent, link); - rb_insert_color(&group->node, &port->table); - return NULL; -} - -static void deref_port(struct inform_port *port) -{ - if (atomic_dec_and_test(&port->refcount)) - complete(&port->comp); -} - -static void release_group(struct inform_group *group) -{ - struct inform_port *port = group->port; - unsigned long flags; - - spin_lock_irqsave(&port->lock, flags); - if (atomic_dec_and_test(&group->refcount)) { - rb_erase(&group->node, &port->table); - spin_unlock_irqrestore(&port->lock, flags); - kfree(group); - deref_port(port); - } else - spin_unlock_irqrestore(&port->lock, flags); -} - -static void deref_member(struct inform_member *member) -{ - if (atomic_dec_and_test(&member->refcount)) - complete(&member->comp); -} - -static void queue_reg(struct inform_member *member) -{ - struct inform_group *group = member->group; - unsigned long flags; - - spin_lock_irqsave(&group->lock, flags); - list_add(&member->list, &group->pending_list); - if (group->state == INFORM_IDLE) { - group->state = INFORM_BUSY; - atomic_inc(&group->refcount); - queue_work(inform_wq, &group->work); - } - spin_unlock_irqrestore(&group->lock, flags); -} - -static int send_reg(struct inform_group *group, struct inform_member *member) -{ - struct inform_port *port = group->port; - struct ib_sa_inform inform; - int ret; - - memset(&inform, 0, sizeof inform); - inform.lid_range_begin = cpu_to_be16(0xFFFF); - inform.is_generic = 1; - inform.subscribe = 1; - inform.type = cpu_to_be16(IB_SA_EVENT_TYPE_ALL); - inform.trap.generic.trap_num = cpu_to_be16(member->info.trap_number); - inform.trap.generic.resp_time = 19; - inform.trap.generic.producer_type = - cpu_to_be32(IB_SA_EVENT_PRODUCER_TYPE_ALL); - - group->last_join = member; - ret = ib_sa_informinfo_query(&sa_client, port->dev->device, - port->port_num, &inform, 3000, GFP_KERNEL, - reg_handler, group,&group->query); - if (ret >= 0) { - group->query_id = ret; - ret = 0; - } - return ret; -} - -static int send_unreg(struct inform_group *group) -{ - struct inform_port *port = group->port; - struct ib_sa_inform inform; - int ret; - - memset(&inform, 0, sizeof inform); - inform.lid_range_begin = cpu_to_be16(0xFFFF); - inform.is_generic = 1; - inform.type = cpu_to_be16(IB_SA_EVENT_TYPE_ALL); - inform.trap.generic.trap_num = cpu_to_be16(group->trap_number); - inform.trap.generic.qpn = IB_QP1; - inform.trap.generic.resp_time = 19; - inform.trap.generic.producer_type = - cpu_to_be32(IB_SA_EVENT_PRODUCER_TYPE_ALL); - - ret = ib_sa_informinfo_query(&sa_client, port->dev->device, - port->port_num, &inform, 3000, GFP_KERNEL, - unreg_handler, group, &group->query); - if (ret >= 0) { - group->query_id = ret; - ret = 0; - } - return ret; -} - -static void join_group(struct inform_group *group, struct inform_member *member) -{ - member->state = INFORM_MEMBER; - group->members++; - list_move(&member->list, &group->active_list); -} - -static int fail_join(struct inform_group *group, struct inform_member *member, - int status) -{ - spin_lock_irq(&group->lock); - list_del_init(&member->list); - spin_unlock_irq(&group->lock); - return member->info.callback(status, &member->info, NULL); -} - -static void process_group_error(struct inform_group *group) -{ - struct inform_member *member; - int ret; - - spin_lock_irq(&group->lock); - while (!list_empty(&group->active_list)) { - member = list_entry(group->active_list.next, - struct inform_member, list); - atomic_inc(&member->refcount); - list_del_init(&member->list); - group->members--; - member->state = INFORM_ERROR; - spin_unlock_irq(&group->lock); - - ret = member->info.callback(-ENETRESET, &member->info, NULL); - deref_member(member); - if (ret) - ib_sa_unregister_inform_info(&member->info); - spin_lock_irq(&group->lock); - } - - group->join_state = INFORM_IDLE; - group->state = INFORM_BUSY; - spin_unlock_irq(&group->lock); -} - -/* - * Report a notice to all active subscribers. We use a temporary list to - * handle unsubscription requests while the notice is being reported, which - * avoids holding the group lock while in the user's callback. - */ -static void process_notice(struct inform_group *group, - struct inform_notice *info_notice) -{ - struct inform_member *member; - struct list_head list; - int ret; - - INIT_LIST_HEAD(&list); - - spin_lock_irq(&group->lock); - list_splice_init(&group->active_list, &list); - while (!list_empty(&list)) { - - member = list_entry(list.next, struct inform_member, list); - atomic_inc(&member->refcount); - list_move(&member->list, &group->active_list); - spin_unlock_irq(&group->lock); - - ret = member->info.callback(0, &member->info, - &info_notice->notice); - deref_member(member); - if (ret) - ib_sa_unregister_inform_info(&member->info); - spin_lock_irq(&group->lock); - } - spin_unlock_irq(&group->lock); -} - -static void inform_work_handler(struct work_struct *work) -{ - struct inform_group *group; - struct inform_member *member; - struct ib_inform_info *info; - struct inform_notice *info_notice; - int status, ret; - - group = container_of(work, typeof(*group), work); -retest: - spin_lock_irq(&group->lock); - while (!list_empty(&group->pending_list) || - !list_empty(&group->notice_list) || - (group->state == INFORM_ERROR)) { - - if (group->state == INFORM_ERROR) { - spin_unlock_irq(&group->lock); - process_group_error(group); - goto retest; - } - - if (!list_empty(&group->notice_list)) { - info_notice = list_entry(group->notice_list.next, - struct inform_notice, list); - list_del(&info_notice->list); - spin_unlock_irq(&group->lock); - process_notice(group, info_notice); - kfree(info_notice); - goto retest; - } - - member = list_entry(group->pending_list.next, - struct inform_member, list); - info = &member->info; - atomic_inc(&member->refcount); - - if (group->join_state == INFORM_MEMBER) { - join_group(group, member); - spin_unlock_irq(&group->lock); - ret = info->callback(0, info, NULL); - } else { - spin_unlock_irq(&group->lock); - status = send_reg(group, member); - if (!status) { - deref_member(member); - return; - } - ret = fail_join(group, member, status); - } - - deref_member(member); - if (ret) - ib_sa_unregister_inform_info(&member->info); - spin_lock_irq(&group->lock); - } - - if (!group->members && (group->join_state == INFORM_MEMBER)) { - group->join_state = INFORM_IDLE; - spin_unlock_irq(&group->lock); - if (send_unreg(group)) - goto retest; - } else { - group->state = INFORM_IDLE; - spin_unlock_irq(&group->lock); - release_group(group); - } -} - -/* - * Fail a join request if it is still active - at the head of the pending queue. - */ -static void process_join_error(struct inform_group *group, int status) -{ - struct inform_member *member; - int ret; - - spin_lock_irq(&group->lock); - member = list_entry(group->pending_list.next, - struct inform_member, list); - if (group->last_join == member) { - atomic_inc(&member->refcount); - list_del_init(&member->list); - spin_unlock_irq(&group->lock); - ret = member->info.callback(status, &member->info, NULL); - deref_member(member); - if (ret) - ib_sa_unregister_inform_info(&member->info); - } else - spin_unlock_irq(&group->lock); -} - -static void reg_handler(int status, struct ib_sa_inform *inform, void *context) -{ - struct inform_group *group = context; - - if (status) - process_join_error(group, status); - else - group->join_state = INFORM_MEMBER; - - inform_work_handler(&group->work); -} - -static void unreg_handler(int status, struct ib_sa_inform *rec, void *context) -{ - struct inform_group *group = context; - - inform_work_handler(&group->work); -} - -int notice_dispatch(struct ib_device *device, u8 port_num, - struct ib_sa_notice *notice) -{ - struct inform_device *dev; - struct inform_port *port; - struct inform_group *group; - struct inform_notice *info_notice; - - dev = ib_get_client_data(device, &inform_client); - if (!dev) - return 0; /* No one to give notice to. */ - - port = &dev->port[port_num - dev->start_port]; - spin_lock_irq(&port->lock); - group = inform_find(port, __be16_to_cpu(notice->trap. - generic.trap_num)); - if (!group) { - spin_unlock_irq(&port->lock); - return 0; - } - - atomic_inc(&group->refcount); - spin_unlock_irq(&port->lock); - - info_notice = kmalloc(sizeof *info_notice, GFP_KERNEL); - if (!info_notice) { - release_group(group); - return -ENOMEM; - } - - info_notice->notice = *notice; - - spin_lock_irq(&group->lock); - list_add(&info_notice->list, &group->notice_list); - if (group->state == INFORM_IDLE) { - group->state = INFORM_BUSY; - spin_unlock_irq(&group->lock); - inform_work_handler(&group->work); - } else { - spin_unlock_irq(&group->lock); - release_group(group); - } - - return 0; -} - -static struct inform_group *acquire_group(struct inform_port *port, - u16 trap_number, gfp_t gfp_mask) -{ - struct inform_group *group, *cur_group; - unsigned long flags; - - spin_lock_irqsave(&port->lock, flags); - group = inform_find(port, trap_number); - if (group) - goto found; - spin_unlock_irqrestore(&port->lock, flags); - - group = kzalloc(sizeof *group, gfp_mask); - if (!group) - return NULL; - - group->port = port; - group->trap_number = trap_number; - INIT_LIST_HEAD(&group->pending_list); - INIT_LIST_HEAD(&group->active_list); - INIT_LIST_HEAD(&group->notice_list); - INIT_WORK(&group->work, inform_work_handler); - spin_lock_init(&group->lock); - - spin_lock_irqsave(&port->lock, flags); - cur_group = inform_insert(port, group); - if (cur_group) { - kfree(group); - group = cur_group; - } else - atomic_inc(&port->refcount); -found: - atomic_inc(&group->refcount); - spin_unlock_irqrestore(&port->lock, flags); - return group; -} - -/* - * We serialize all join requests to a single group to make our lives much - * easier. Otherwise, two users could try to join the same group - * simultaneously, with different configurations, one could leave while the - * join is in progress, etc., which makes locking around error recovery - * difficult. - */ -struct ib_inform_info * -ib_sa_register_inform_info(struct ib_sa_client *client, - struct ib_device *device, u8 port_num, - u16 trap_number, gfp_t gfp_mask, - int (*callback)(int status, - struct ib_inform_info *info, - struct ib_sa_notice *notice), - void *context) -{ - struct inform_device *dev; - struct inform_member *member; - struct ib_inform_info *info; - int ret; - - dev = ib_get_client_data(device, &inform_client); - if (!dev) - return ERR_PTR(-ENODEV); - - member = kzalloc(sizeof *member, gfp_mask); - if (!member) - return ERR_PTR(-ENOMEM); - - ib_sa_client_get(client); - member->client = client; - member->info.trap_number = trap_number; - member->info.callback = callback; - member->info.context = context; - init_completion(&member->comp); - atomic_set(&member->refcount, 1); - member->state = INFORM_REGISTERING; - - member->group = acquire_group(&dev->port[port_num - dev->start_port], - trap_number, gfp_mask); - if (!member->group) { - ret = -ENOMEM; - goto err; - } - - /* - * The user will get the info structure in their callback. They - * could then free the info structure before we can return from - * this routine. So we save the pointer to return before queuing - * any callback. - */ - info = &member->info; - queue_reg(member); - return info; - -err: - ib_sa_client_put(member->client); - kfree(member); - return ERR_PTR(ret); -} -EXPORT_SYMBOL(ib_sa_register_inform_info); - -void ib_sa_unregister_inform_info(struct ib_inform_info *info) -{ - struct inform_member *member; - struct inform_group *group; - - member = container_of(info, struct inform_member, info); - group = member->group; - - spin_lock_irq(&group->lock); - if (member->state == INFORM_MEMBER) - group->members--; - - list_del_init(&member->list); - - if (group->state == INFORM_IDLE) { - group->state = INFORM_BUSY; - spin_unlock_irq(&group->lock); - /* Continue to hold reference on group until callback */ - queue_work(inform_wq, &group->work); - } else { - spin_unlock_irq(&group->lock); - release_group(group); - } - - deref_member(member); - wait_for_completion(&member->comp); - ib_sa_client_put(member->client); - kfree(member); -} -EXPORT_SYMBOL(ib_sa_unregister_inform_info); - -static void inform_groups_lost(struct inform_port *port) -{ - struct inform_group *group; - struct rb_node *node; - unsigned long flags; - - spin_lock_irqsave(&port->lock, flags); - for (node = rb_first(&port->table); node; node = rb_next(node)) { - group = rb_entry(node, struct inform_group, node); - spin_lock(&group->lock); - if (group->state == INFORM_IDLE) { - atomic_inc(&group->refcount); - queue_work(inform_wq, &group->work); - } - group->state = INFORM_ERROR; - spin_unlock(&group->lock); - } - spin_unlock_irqrestore(&port->lock, flags); -} - -static void inform_event_handler(struct ib_event_handler *handler, - struct ib_event *event) -{ - struct inform_device *dev; - - dev = container_of(handler, struct inform_device, event_handler); - - switch (event->event) { - case IB_EVENT_PORT_ERR: - case IB_EVENT_LID_CHANGE: - case IB_EVENT_SM_CHANGE: - case IB_EVENT_CLIENT_REREGISTER: - inform_groups_lost(&dev->port[event->element.port_num - - dev->start_port]); - break; - default: - break; - } -} - -static void inform_add_one(struct ib_device *device) -{ - struct inform_device *dev; - struct inform_port *port; - int i; - - if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) - return; - - dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port, - GFP_KERNEL); - if (!dev) - return; - - if (device->node_type == RDMA_NODE_IB_SWITCH) - dev->start_port = dev->end_port = 0; - else { - dev->start_port = 1; - dev->end_port = device->phys_port_cnt; - } - - for (i = 0; i <= dev->end_port - dev->start_port; i++) { - port = &dev->port[i]; - port->dev = dev; - port->port_num = dev->start_port + i; - spin_lock_init(&port->lock); - port->table = RB_ROOT; - init_completion(&port->comp); - atomic_set(&port->refcount, 1); - } - - dev->device = device; - ib_set_client_data(device, &inform_client, dev); - - INIT_IB_EVENT_HANDLER(&dev->event_handler, device, inform_event_handler); - ib_register_event_handler(&dev->event_handler); -} - -static void inform_remove_one(struct ib_device *device) -{ - struct inform_device *dev; - struct inform_port *port; - int i; - - dev = ib_get_client_data(device, &inform_client); - if (!dev) - return; - - ib_unregister_event_handler(&dev->event_handler); - flush_workqueue(inform_wq); - - for (i = 0; i <= dev->end_port - dev->start_port; i++) { - port = &dev->port[i]; - deref_port(port); - wait_for_completion(&port->comp); - } - - kfree(dev); -} - -int notice_init(void) -{ - int ret; - - inform_wq = create_singlethread_workqueue("ib_inform"); - if (!inform_wq) - return -ENOMEM; - - ib_sa_register_client(&sa_client); - - ret = ib_register_client(&inform_client); - if (ret) - goto err; - return 0; - -err: - ib_sa_unregister_client(&sa_client); - destroy_workqueue(inform_wq); - return ret; -} - -void notice_cleanup(void) -{ - ib_unregister_client(&inform_client); - ib_sa_unregister_client(&sa_client); - destroy_workqueue(inform_wq); -} diff --git a/sys/ofed/drivers/infiniband/core/packer.c b/sys/ofed/drivers/infiniband/core/packer.c index 019bd4b..9f42595 100644 --- a/sys/ofed/drivers/infiniband/core/packer.c +++ b/sys/ofed/drivers/infiniband/core/packer.c @@ -31,6 +31,7 @@ * SOFTWARE. */ +#include <linux/module.h> #include <linux/string.h> #include <rdma/ib_pack.h> diff --git a/sys/ofed/drivers/infiniband/core/peer_mem.c b/sys/ofed/drivers/infiniband/core/peer_mem.c new file mode 100644 index 0000000..cd716a4 --- /dev/null +++ b/sys/ofed/drivers/infiniband/core/peer_mem.c @@ -0,0 +1,461 @@ +/* + * Copyright (c) 2013, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <rdma/ib_peer_mem.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_umem.h> + +static DEFINE_MUTEX(peer_memory_mutex); +static LIST_HEAD(peer_memory_list); + +static int num_registered_peers; + +/* This code uses the sysfs which is not supporeted by the FreeBSD. + * * Will be added in future to the sysctl */ + +#if 0 +static struct kobject *peers_kobj; +static struct ib_peer_memory_client *get_peer_by_kobj(void *kobj); +static ssize_t version_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct ib_peer_memory_client *ib_peer_client = get_peer_by_kobj(kobj); + + if (ib_peer_client) { + sprintf(buf, "%s\n", ib_peer_client->peer_mem->version); + return strlen(buf); + } + /* not found - nothing is return */ + return 0; +} + +static ssize_t num_alloc_mrs_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct ib_peer_memory_client *ib_peer_client = get_peer_by_kobj(kobj); + + if (ib_peer_client) { + sprintf(buf, "%lu\n", ib_peer_client->stats.num_alloc_mrs); + return strlen(buf); + } + /* not found - nothing is return */ + return 0; +} + +static ssize_t num_reg_pages_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct ib_peer_memory_client *ib_peer_client = get_peer_by_kobj(kobj); + + if (ib_peer_client) { + sprintf(buf, "%lu\n", ib_peer_client->stats.num_reg_pages); + return strlen(buf); + } + /* not found - nothing is return */ + return 0; +} + +static ssize_t num_dereg_pages_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct ib_peer_memory_client *ib_peer_client = get_peer_by_kobj(kobj); + + if (ib_peer_client) { + sprintf(buf, "%lu\n", ib_peer_client->stats.num_dereg_pages); + return strlen(buf); + } + /* not found - nothing is return */ + return 0; +} + +static ssize_t num_free_callbacks_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct ib_peer_memory_client *ib_peer_client = get_peer_by_kobj(kobj); + + if (ib_peer_client) { + sprintf(buf, "%lu\n", ib_peer_client->stats.num_free_callbacks); + return strlen(buf); + } + /* not found - nothing is return */ + return 0; +} + +static struct kobj_attribute version_attr = __ATTR_RO(version); +static struct kobj_attribute num_alloc_mrs = __ATTR_RO(num_alloc_mrs); +static struct kobj_attribute num_reg_pages = __ATTR_RO(num_reg_pages); +static struct kobj_attribute num_dereg_pages = __ATTR_RO(num_dereg_pages); +static struct kobj_attribute num_free_callbacks = __ATTR_RO(num_free_callbacks); + +static struct attribute *peer_mem_attrs[] = { + &version_attr.attr, + &num_alloc_mrs.attr, + &num_reg_pages.attr, + &num_dereg_pages.attr, + &num_free_callbacks.attr, + NULL, +}; +#endif + +#if 0 +static void destroy_peer_sysfs(struct ib_peer_memory_client *ib_peer_client) +{ + kobject_put(ib_peer_client->kobj); + if (!num_registered_peers) + kobject_put(peers_kobj); + + return; +} + +/* This code uses the sysfs which is not supporeted by the FreeBSD. + * Will be added in future to the sysctl */ + +static int create_peer_sysfs(struct ib_peer_memory_client *ib_peer_client) +{ + int ret; + + if (!num_registered_peers) { + /* creating under /sys/kernel/mm */ + peers_kobj = kobject_create_and_add("memory_peers", mm_kobj); + if (!peers_kobj) + return -ENOMEM; + } + + ib_peer_client->peer_mem_attr_group.attrs = peer_mem_attrs; + /* Dir alreday was created explicitly to get its kernel object for further usage */ + ib_peer_client->peer_mem_attr_group.name = NULL; + ib_peer_client->kobj = kobject_create_and_add(ib_peer_client->peer_mem->name, + peers_kobj); + + if (!ib_peer_client->kobj) { + ret = -EINVAL; + goto free; + } + + /* Create the files associated with this kobject */ + ret = sysfs_create_group(ib_peer_client->kobj, + &ib_peer_client->peer_mem_attr_group); + if (ret) + goto peer_free; + + return 0; + +peer_free: + kobject_put(ib_peer_client->kobj); + +free: + if (!num_registered_peers) + kobject_put(peers_kobj); + + return ret; +} +#endif + +static int ib_invalidate_peer_memory(void *reg_handle, + void *core_context) +{ + struct ib_peer_memory_client *ib_peer_client = + (struct ib_peer_memory_client *)reg_handle; + struct invalidation_ctx *invalidation_ctx; + struct core_ticket *core_ticket; + int need_unlock = 1; + + mutex_lock(&ib_peer_client->lock); + ib_peer_client->stats.num_free_callbacks += 1; + core_ticket = ib_peer_search_context(ib_peer_client, + (unsigned long)core_context); + if (!core_ticket) + goto out; + + invalidation_ctx = (struct invalidation_ctx *)core_ticket->context; + /* If context not ready yet mark to be invalidated */ + if (!invalidation_ctx->func) { + invalidation_ctx->peer_invalidated = 1; + goto out; + } + + invalidation_ctx->func(invalidation_ctx->cookie, + invalidation_ctx->umem, 0, 0); + if (invalidation_ctx->inflight_invalidation) { + + /* init the completion to wait on before letting other thread to run */ + init_completion(&invalidation_ctx->comp); + mutex_unlock(&ib_peer_client->lock); + need_unlock = 0; + wait_for_completion(&invalidation_ctx->comp); + } + + kfree(invalidation_ctx); + +out: + if (need_unlock) + mutex_unlock(&ib_peer_client->lock); + + return 0; +} + +/* access to that peer client is under its lock - no extra lock is needed */ +unsigned long ib_peer_insert_context(struct ib_peer_memory_client *ib_peer_client, + void *context) +{ + struct core_ticket *core_ticket = kzalloc(sizeof(*core_ticket), GFP_KERNEL); + + ib_peer_client->last_ticket++; + core_ticket->context = context; + core_ticket->key = ib_peer_client->last_ticket; + + list_add_tail(&core_ticket->ticket_list, + &ib_peer_client->core_ticket_list); + + return core_ticket->key; +} + +int ib_peer_remove_context(struct ib_peer_memory_client *ib_peer_client, + unsigned long key) +{ + struct core_ticket *core_ticket, *tmp; + + list_for_each_entry_safe(core_ticket, tmp, &ib_peer_client->core_ticket_list, + ticket_list) { + if (core_ticket->key == key) { + list_del(&core_ticket->ticket_list); + kfree(core_ticket); + return 0; + } + } + + return 1; +} + +struct core_ticket *ib_peer_search_context(struct ib_peer_memory_client *ib_peer_client, + unsigned long key) +{ + struct core_ticket *core_ticket, *tmp; + list_for_each_entry_safe(core_ticket, tmp, &ib_peer_client->core_ticket_list, + ticket_list) { + if (core_ticket->key == key) + return core_ticket; + } + + return NULL; +} + + +static int ib_memory_peer_check_mandatory(struct peer_memory_client + *peer_client) +{ +#define PEER_MEM_MANDATORY_FUNC(x) {\ + offsetof(struct peer_memory_client, x), #x } + + static const struct { + size_t offset; + char *name; + } mandatory_table[] = { + PEER_MEM_MANDATORY_FUNC(acquire), + PEER_MEM_MANDATORY_FUNC(get_pages), + PEER_MEM_MANDATORY_FUNC(put_pages), + PEER_MEM_MANDATORY_FUNC(get_page_size), + PEER_MEM_MANDATORY_FUNC(dma_map), + PEER_MEM_MANDATORY_FUNC(dma_unmap) + }; + int i; + + for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) { + if (!*(void **) ((void *) peer_client + mandatory_table[i].offset)) { + printk(KERN_WARNING "Peer memory %s is missing mandatory function %s\n", + peer_client->name, mandatory_table[i].name); + return -EINVAL; + } + } + + return 0; +} + + + +void *ib_register_peer_memory_client(struct peer_memory_client *peer_client, + invalidate_peer_memory *invalidate_callback) +{ + int ret = 0; + struct ib_peer_memory_client *ib_peer_client = NULL; + + mutex_lock(&peer_memory_mutex); + if (ib_memory_peer_check_mandatory(peer_client)) { + ret = -EINVAL; + goto out; + } + + ib_peer_client = kzalloc(sizeof(*ib_peer_client), GFP_KERNEL); + if (!ib_peer_client) + goto out; + ib_peer_client->peer_mem = peer_client; + + INIT_LIST_HEAD(&ib_peer_client->core_ticket_list); + mutex_init(&ib_peer_client->lock); +#ifdef __FreeBSD__ + ib_peer_client->holdcount = 0; + ib_peer_client->needwakeup = 0; + cv_init(&ib_peer_client->peer_cv, "ibprcl"); +#else + ret = init_srcu_struct(&ib_peer_client->peer_srcu); + if (ret) + goto free; +#endif +#if 0 + if (create_peer_sysfs(ib_peer_client)) + goto free; +#endif + *invalidate_callback = ib_invalidate_peer_memory; + list_add_tail(&ib_peer_client->core_peer_list, &peer_memory_list); + num_registered_peers++; + goto out; +#if 0 +free: + kfree(ib_peer_client); + ib_peer_client = NULL; +#endif +out: + mutex_unlock(&peer_memory_mutex); + return ib_peer_client; +} +EXPORT_SYMBOL(ib_register_peer_memory_client); + +void ib_unregister_peer_memory_client(void *reg_handle) +{ + struct ib_peer_memory_client *ib_peer_client = + (struct ib_peer_memory_client *)reg_handle; + + mutex_lock(&peer_memory_mutex); + /* remove from list to prevent future core clients usage as it goes down */ + list_del(&ib_peer_client->core_peer_list); +#ifdef __FreeBSD__ + while (ib_peer_client->holdcount != 0) { + ib_peer_client->needwakeup = 1; + cv_wait(&ib_peer_client->peer_cv, &peer_memory_mutex.sx); + } + cv_destroy(&ib_peer_client->peer_cv); +#else + mutex_unlock(&peer_memory_mutex); + /* peer memory can't go down while there are active clients */ + synchronize_srcu(&ib_peer_client->peer_srcu); + cleanup_srcu_struct(&ib_peer_client->peer_srcu); + mutex_lock(&peer_memory_mutex); +#endif + num_registered_peers--; +/* This code uses the sysfs which is not supporeted by the FreeBSD. + * Will be added in future to the sysctl */ +#if 0 + destroy_peer_sysfs(ib_peer_client); +#endif + mutex_unlock(&peer_memory_mutex); + + kfree(ib_peer_client); +} +EXPORT_SYMBOL(ib_unregister_peer_memory_client); + +/* This code uses the sysfs which is not supporeted by the FreeBSD. + * Will be added in future to the sysctl */ + +#if 0 +static struct ib_peer_memory_client *get_peer_by_kobj(void *kobj) +{ + struct ib_peer_memory_client *ib_peer_client; + + mutex_lock(&peer_memory_mutex); + list_for_each_entry(ib_peer_client, &peer_memory_list, core_peer_list) { + if (ib_peer_client->kobj == kobj) + goto found; + } + + ib_peer_client = NULL; + +found: + + mutex_unlock(&peer_memory_mutex); + return ib_peer_client; +} +#endif + +struct ib_peer_memory_client *ib_get_peer_client(struct ib_ucontext *context, unsigned long addr, + size_t size, void **peer_client_context, + int *srcu_key) +{ + struct ib_peer_memory_client *ib_peer_client; + int ret; + + mutex_lock(&peer_memory_mutex); + list_for_each_entry(ib_peer_client, &peer_memory_list, core_peer_list) { + ret = ib_peer_client->peer_mem->acquire(addr, size, + context->peer_mem_private_data, + context->peer_mem_name, + peer_client_context); + if (ret == 1) + goto found; + } + + ib_peer_client = NULL; + +found: + if (ib_peer_client) { +#ifdef __FreeBSD__ + ib_peer_client->holdcount++; +#else + *srcu_key = srcu_read_lock(&ib_peer_client->peer_srcu); +#endif + } + + mutex_unlock(&peer_memory_mutex); + return ib_peer_client; + +} +EXPORT_SYMBOL(ib_get_peer_client); + +void ib_put_peer_client(struct ib_peer_memory_client *ib_peer_client, + void *peer_client_context, + int srcu_key) +{ + + if (ib_peer_client->peer_mem->release) + ib_peer_client->peer_mem->release(peer_client_context); + +#ifdef __FreeBSD__ + ib_peer_client->holdcount--; + if (ib_peer_client->holdcount == 0 && ib_peer_client->needwakeup) { + cv_signal(&ib_peer_client->peer_cv); + } +#else + srcu_read_unlock(&ib_peer_client->peer_srcu, srcu_key); +#endif + return; +} +EXPORT_SYMBOL(ib_put_peer_client); + diff --git a/sys/ofed/drivers/infiniband/core/sa.h b/sys/ofed/drivers/infiniband/core/sa.h index b8abdd7..b1d4bbf 100644 --- a/sys/ofed/drivers/infiniband/core/sa.h +++ b/sys/ofed/drivers/infiniband/core/sa.h @@ -48,29 +48,6 @@ static inline void ib_sa_client_put(struct ib_sa_client *client) complete(&client->comp); } -int ib_sa_check_selector(ib_sa_comp_mask comp_mask, - ib_sa_comp_mask selector_mask, - ib_sa_comp_mask value_mask, - u8 selector, u8 src_value, u8 dst_value); - -int ib_sa_pack_attr(void *dst, void *src, int attr_id); - -int ib_sa_unpack_attr(void *dst, void *src, int attr_id); - -int ib_sa_path_rec_query(struct ib_sa_client *client, - struct ib_device *device, u8 port_num, - struct ib_sa_path_rec *rec, - ib_sa_comp_mask comp_mask, - int timeout_ms, gfp_t gfp_mask, - void (*callback)(int status, - struct ib_sa_path_rec *resp, - void *context), - void *context, - struct ib_sa_query **sa_query); - -int sa_db_init(void); -void sa_db_cleanup(void); - int ib_sa_mcmember_rec_query(struct ib_sa_client *client, struct ib_device *device, u8 port_num, u8 method, @@ -86,20 +63,4 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client, int mcast_init(void); void mcast_cleanup(void); -int ib_sa_informinfo_query(struct ib_sa_client *client, - struct ib_device *device, u8 port_num, - struct ib_sa_inform *rec, - int timeout_ms, gfp_t gfp_mask, - void (*callback)(int status, - struct ib_sa_inform *resp, - void *context), - void *context, - struct ib_sa_query **sa_query); - -int notice_dispatch(struct ib_device *device, u8 port_num, - struct ib_sa_notice *notice); - -int notice_init(void); -void notice_cleanup(void); - #endif /* SA_H */ diff --git a/sys/ofed/drivers/infiniband/core/sa_query.c b/sys/ofed/drivers/infiniband/core/sa_query.c index 9c6b4f7..a0c04f5 100644 --- a/sys/ofed/drivers/infiniband/core/sa_query.c +++ b/sys/ofed/drivers/infiniband/core/sa_query.c @@ -59,12 +59,10 @@ struct ib_sa_sm_ah { struct ib_sa_port { struct ib_mad_agent *agent; - struct ib_mad_agent *notice_agent; struct ib_sa_sm_ah *sm_ah; struct work_struct update_task; spinlock_t ah_lock; u8 port_num; - struct ib_device *device; }; struct ib_sa_device { @@ -95,14 +93,14 @@ struct ib_sa_path_query { struct ib_sa_query sa_query; }; -struct ib_sa_mcmember_query { - void (*callback)(int, struct ib_sa_mcmember_rec *, void *); +struct ib_sa_guidinfo_query { + void (*callback)(int, struct ib_sa_guidinfo_rec *, void *); void *context; struct ib_sa_query sa_query; }; -struct ib_sa_inform_query { - void (*callback)(int, struct ib_sa_inform *, void *); +struct ib_sa_mcmember_query { + void (*callback)(int, struct ib_sa_mcmember_rec *, void *); void *context; struct ib_sa_query sa_query; }; @@ -116,10 +114,10 @@ static struct ib_client sa_client = { .remove = ib_sa_remove_one }; -static spinlock_t idr_lock; +static DEFINE_SPINLOCK(idr_lock); static DEFINE_IDR(query_idr); -static spinlock_t tid_lock; +static DEFINE_SPINLOCK(tid_lock); static u32 tid; #define PATH_REC_FIELD(field) \ @@ -354,162 +352,34 @@ static const struct ib_field service_rec_table[] = { .size_bits = 2*64 }, }; -#define INFORM_FIELD(field) \ - .struct_offset_bytes = offsetof(struct ib_sa_inform, field), \ - .struct_size_bytes = sizeof ((struct ib_sa_inform *) 0)->field, \ - .field_name = "sa_inform:" #field +#define GUIDINFO_REC_FIELD(field) \ + .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \ + .struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \ + .field_name = "sa_guidinfo_rec:" #field -static const struct ib_field inform_table[] = { - { INFORM_FIELD(gid), +static const struct ib_field guidinfo_rec_table[] = { + { GUIDINFO_REC_FIELD(lid), .offset_words = 0, .offset_bits = 0, - .size_bits = 128 }, - { INFORM_FIELD(lid_range_begin), - .offset_words = 4, - .offset_bits = 0, - .size_bits = 16 }, - { INFORM_FIELD(lid_range_end), - .offset_words = 4, - .offset_bits = 16, - .size_bits = 16 }, - { RESERVED, - .offset_words = 5, - .offset_bits = 0, .size_bits = 16 }, - { INFORM_FIELD(is_generic), - .offset_words = 5, + { GUIDINFO_REC_FIELD(block_num), + .offset_words = 0, .offset_bits = 16, .size_bits = 8 }, - { INFORM_FIELD(subscribe), - .offset_words = 5, - .offset_bits = 24, - .size_bits = 8 }, - { INFORM_FIELD(type), - .offset_words = 6, - .offset_bits = 0, - .size_bits = 16 }, - { INFORM_FIELD(trap.generic.trap_num), - .offset_words = 6, - .offset_bits = 16, - .size_bits = 16 }, - { INFORM_FIELD(trap.generic.qpn), - .offset_words = 7, - .offset_bits = 0, - .size_bits = 24 }, - { RESERVED, - .offset_words = 7, + { GUIDINFO_REC_FIELD(res1), + .offset_words = 0, .offset_bits = 24, - .size_bits = 3 }, - { INFORM_FIELD(trap.generic.resp_time), - .offset_words = 7, - .offset_bits = 27, - .size_bits = 5 }, - { RESERVED, - .offset_words = 8, - .offset_bits = 0, .size_bits = 8 }, - { INFORM_FIELD(trap.generic.producer_type), - .offset_words = 8, - .offset_bits = 8, - .size_bits = 24 }, -}; - -#define NOTICE_FIELD(field) \ - .struct_offset_bytes = offsetof(struct ib_sa_notice, field), \ - .struct_size_bytes = sizeof ((struct ib_sa_notice *) 0)->field, \ - .field_name = "sa_notice:" #field - -static const struct ib_field notice_table[] = { - { NOTICE_FIELD(is_generic), - .offset_words = 0, - .offset_bits = 0, - .size_bits = 1 }, - { NOTICE_FIELD(type), - .offset_words = 0, - .offset_bits = 1, - .size_bits = 7 }, - { NOTICE_FIELD(trap.generic.producer_type), - .offset_words = 0, - .offset_bits = 8, - .size_bits = 24 }, - { NOTICE_FIELD(trap.generic.trap_num), + { GUIDINFO_REC_FIELD(res2), .offset_words = 1, .offset_bits = 0, - .size_bits = 16 }, - { NOTICE_FIELD(issuer_lid), - .offset_words = 1, - .offset_bits = 16, - .size_bits = 16 }, - { NOTICE_FIELD(notice_toggle), - .offset_words = 2, - .offset_bits = 0, - .size_bits = 1 }, - { NOTICE_FIELD(notice_count), - .offset_words = 2, - .offset_bits = 1, - .size_bits = 15 }, - { NOTICE_FIELD(data_details), + .size_bits = 32 }, + { GUIDINFO_REC_FIELD(guid_info_list), .offset_words = 2, - .offset_bits = 16, - .size_bits = 432 }, - { NOTICE_FIELD(issuer_gid), - .offset_words = 16, .offset_bits = 0, - .size_bits = 128 }, + .size_bits = 512 }, }; -int ib_sa_check_selector(ib_sa_comp_mask comp_mask, - ib_sa_comp_mask selector_mask, - ib_sa_comp_mask value_mask, - u8 selector, u8 src_value, u8 dst_value) -{ - int err; - - if (!(comp_mask & selector_mask) || !(comp_mask & value_mask)) - return 0; - - switch (selector) { - case IB_SA_GT: - err = (src_value <= dst_value); - break; - case IB_SA_LT: - err = (src_value >= dst_value); - break; - case IB_SA_EQ: - err = (src_value != dst_value); - break; - default: - err = 0; - break; - } - - return err; -} - -int ib_sa_pack_attr(void *dst, void *src, int attr_id) -{ - switch (attr_id) { - case IB_SA_ATTR_PATH_REC: - ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), src, dst); - break; - default: - return -EINVAL; - } - return 0; -} - -int ib_sa_unpack_attr(void *dst, void *src, int attr_id) -{ - switch (attr_id) { - case IB_SA_ATTR_PATH_REC: - ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), src, dst); - break; - default: - return -EINVAL; - } - return 0; -} - static void free_sm_ah(struct kref *kref) { struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref); @@ -588,7 +458,7 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event port->sm_ah = NULL; spin_unlock_irqrestore(&port->ah_lock, flags); - schedule_work(&sa_dev->port[event->element.port_num - + queue_work(ib_wq, &sa_dev->port[event->element.port_num - sa_dev->start_port].update_task); } } @@ -685,6 +555,14 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, ah_attr->grh.hop_limit = rec->hop_limit; ah_attr->grh.traffic_class = rec->traffic_class; } + if (force_grh) { + memcpy(ah_attr->dmac, rec->dmac, 6); + ah_attr->vlan_id = rec->vlan_id; + } else { + memset(ah_attr->dmac, 0, 6); + ah_attr->vlan_id = 0xffff; + } + return 0; } EXPORT_SYMBOL(ib_init_ah_from_path); @@ -791,6 +669,10 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), mad->data, &rec); + rec.vlan_id = 0xffff; + memset(rec.dmac, 0, ETH_ALEN); + memset(rec.smac, 0, ETH_ALEN); + query->callback(status, &rec, query->context); } else query->callback(status, NULL, query->context); @@ -801,7 +683,33 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) kfree(container_of(sa_query, struct ib_sa_path_query, sa_query)); } -int ib_sa_path_rec_query(struct ib_sa_client *client, + +/** + * ib_sa_path_rec_get - Start a Path get query + * @client:SA client + * @device:device to send query on + * @port_num: port number to send query on + * @rec:Path Record to send in query + * @comp_mask:component mask to send in query + * @timeout_ms:time to wait for response + * @gfp_mask:GFP mask to use for internal allocations + * @callback:function called when query completes, times out or is + * canceled + * @context:opaque user context passed to callback + * @sa_query:query context, used to cancel query + * + * Send a Path Record Get query to the SA to look up a path. The + * callback function will be called when the query completes (or + * fails); status is 0 for a successful response, -EINTR if the query + * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error + * occurred sending the query. The resp parameter of the callback is + * only valid if status is 0. + * + * If the return value of ib_sa_path_rec_get() is negative, it is an + * error code. Otherwise it is a query ID that can be used to cancel + * the query. + */ +int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device, u8 port_num, struct ib_sa_path_rec *rec, ib_sa_comp_mask comp_mask, @@ -867,6 +775,7 @@ err1: kfree(query); return ret; } +EXPORT_SYMBOL(ib_sa_path_rec_get); static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query, int status, @@ -1082,26 +991,27 @@ err1: return ret; } -static void ib_sa_inform_callback(struct ib_sa_query *sa_query, +/* Support GuidInfoRecord */ +static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) { - struct ib_sa_inform_query *query = - container_of(sa_query, struct ib_sa_inform_query, sa_query); + struct ib_sa_guidinfo_query *query = + container_of(sa_query, struct ib_sa_guidinfo_query, sa_query); if (mad) { - struct ib_sa_inform rec; + struct ib_sa_guidinfo_rec rec; - ib_unpack(inform_table, ARRAY_SIZE(inform_table), + ib_unpack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), mad->data, &rec); query->callback(status, &rec, query->context); } else query->callback(status, NULL, query->context); } -static void ib_sa_inform_release(struct ib_sa_query *sa_query) +static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query) { - kfree(container_of(sa_query, struct ib_sa_inform_query, sa_query)); + kfree(container_of(sa_query, struct ib_sa_guidinfo_query, sa_query)); } int ib_sa_guid_info_rec_query(struct ib_sa_client *client, @@ -1115,52 +1025,7 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client, void *context, struct ib_sa_query **sa_query) { - // stub function - - // called originally from mad.c under mlx4_ib_init_sriov() - // which calls mlx4_ib_init_alias_guid_service() in alias_GUID.c - // which goes down to this function - - printk("ERROR: function should be called only in SRIOV flow!!!"); - - return 0; -} - -/** - * ib_sa_informinfo_query - Start an InformInfo registration. - * @client:SA client - * @device:device to send query on - * @port_num: port number to send query on - * @rec:Inform record to send in query - * @timeout_ms:time to wait for response - * @gfp_mask:GFP mask to use for internal allocations - * @callback:function called when notice handler registration completes, - * times out or is canceled - * @context:opaque user context passed to callback - * @sa_query:query context, used to cancel query - * - * This function sends inform info to register with SA to receive - * in-service notice. - * The callback function will be called when the query completes (or - * fails); status is 0 for a successful response, -EINTR if the query - * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error - * occurred sending the query. The resp parameter of the callback is - * only valid if status is 0. - * - * If the return value of ib_sa_inform_query() is negative, it is an - * error code. Otherwise it is a query ID that can be used to cancel - * the query. - */ -int ib_sa_informinfo_query(struct ib_sa_client *client, - struct ib_device *device, u8 port_num, - struct ib_sa_inform *rec, - int timeout_ms, gfp_t gfp_mask, - void (*callback)(int status, - struct ib_sa_inform *resp, - void *context), - void *context, - struct ib_sa_query **sa_query) -{ - struct ib_sa_inform_query *query; + struct ib_sa_guidinfo_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); struct ib_sa_port *port; struct ib_mad_agent *agent; @@ -1170,6 +1035,12 @@ int ib_sa_informinfo_query(struct ib_sa_client *client, if (!sa_dev) return -ENODEV; + if (method != IB_MGMT_METHOD_GET && + method != IB_MGMT_METHOD_SET && + method != IB_SA_METHOD_DELETE) { + return -EINVAL; + } + port = &sa_dev->port[port_num - sa_dev->start_port]; agent = port->agent; @@ -1190,15 +1061,18 @@ int ib_sa_informinfo_query(struct ib_sa_client *client, mad = query->sa_query.mad_buf->mad; init_mad(mad, agent); - query->sa_query.callback = callback ? ib_sa_inform_callback : NULL; - query->sa_query.release = ib_sa_inform_release; - query->sa_query.port = port; - mad->mad_hdr.method = IB_MGMT_METHOD_SET; - mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_INFORM_INFO); + query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL; + query->sa_query.release = ib_sa_guidinfo_rec_release; - ib_pack(inform_table, ARRAY_SIZE(inform_table), rec, mad->data); + mad->mad_hdr.method = method; + mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_GUID_INFO_REC); + mad->sa_hdr.comp_mask = comp_mask; + + ib_pack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), rec, + mad->data); *sa_query = &query->sa_query; + ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); if (ret < 0) goto err2; @@ -1209,49 +1083,12 @@ err2: *sa_query = NULL; ib_sa_client_put(query->sa_query.client); free_mad(&query->sa_query); + err1: kfree(query); return ret; } - -static void ib_sa_notice_resp(struct ib_sa_port *port, - struct ib_mad_recv_wc *mad_recv_wc) -{ - struct ib_mad_send_buf *mad_buf; - struct ib_sa_mad *mad; - int ret; - unsigned long flags; - - mad_buf = ib_create_send_mad(port->notice_agent, 1, 0, 0, - IB_MGMT_SA_HDR, IB_MGMT_SA_DATA, - GFP_KERNEL); - if (IS_ERR(mad_buf)) - return; - - mad = mad_buf->mad; - memcpy(mad, mad_recv_wc->recv_buf.mad, sizeof *mad); - mad->mad_hdr.method = IB_MGMT_METHOD_REPORT_RESP; - - spin_lock_irqsave(&port->ah_lock, flags); - if (!port->sm_ah) { - spin_unlock_irqrestore(&port->ah_lock, flags); - ib_free_send_mad(mad_buf); - return; - } - kref_get(&port->sm_ah->ref); - mad_buf->context[0] = &port->sm_ah->ref; - mad_buf->ah = port->sm_ah->ah; - spin_unlock_irqrestore(&port->ah_lock, flags); - - ret = ib_post_send_mad(mad_buf, NULL); - if (ret) - goto err; - - return; -err: - kref_put(mad_buf->context[0], free_sm_ah); - ib_free_send_mad(mad_buf); -} +EXPORT_SYMBOL(ib_sa_guid_info_rec_query); static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) @@ -1306,36 +1143,9 @@ static void recv_handler(struct ib_mad_agent *mad_agent, ib_free_recv_mad(mad_recv_wc); } -static void notice_resp_handler(struct ib_mad_agent *agent, - struct ib_mad_send_wc *mad_send_wc) -{ - kref_put(mad_send_wc->send_buf->context[0], free_sm_ah); - ib_free_send_mad(mad_send_wc->send_buf); -} - -static void notice_handler(struct ib_mad_agent *mad_agent, - struct ib_mad_recv_wc *mad_recv_wc) -{ - struct ib_sa_port *port; - struct ib_sa_mad *mad; - struct ib_sa_notice notice; - - port = mad_agent->context; - mad = (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad; - ib_unpack(notice_table, ARRAY_SIZE(notice_table), mad->data, ¬ice); - - if (!notice_dispatch(port->device, port->port_num, ¬ice)) - ib_sa_notice_resp(port, mad_recv_wc); - ib_free_recv_mad(mad_recv_wc); -} - static void ib_sa_add_one(struct ib_device *device) { struct ib_sa_device *sa_dev; - struct ib_mad_reg_req reg_req = { - .mgmt_class = IB_MGMT_CLASS_SUBN_ADM, - .mgmt_class_version = 2 - }; int s, e, i; if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) @@ -1372,16 +1182,6 @@ static void ib_sa_add_one(struct ib_device *device) if (IS_ERR(sa_dev->port[i].agent)) goto err; - sa_dev->port[i].device = device; - set_bit(IB_MGMT_METHOD_REPORT, reg_req.method_mask); - sa_dev->port[i].notice_agent = - ib_register_mad_agent(device, i + s, IB_QPT_GSI, - ®_req, 0, notice_resp_handler, - notice_handler, &sa_dev->port[i]); - - if (IS_ERR(sa_dev->port[i].notice_agent)) - goto err; - INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah); } @@ -1396,7 +1196,7 @@ static void ib_sa_add_one(struct ib_device *device) INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event); if (ib_register_event_handler(&sa_dev->event_handler)) - goto err; + goto reg_err; for (i = 0; i <= e - s; ++i) if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) @@ -1404,14 +1204,14 @@ static void ib_sa_add_one(struct ib_device *device) return; +reg_err: + ib_set_client_data(device, &sa_client, NULL); + i = e - s; err: - while (--i >= 0) - if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) { - if (!IS_ERR(sa_dev->port[i].notice_agent)) - ib_unregister_mad_agent(sa_dev->port[i].notice_agent); - if (!IS_ERR(sa_dev->port[i].agent)) + for (; i >= 0; --i) + if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND && + !IS_ERR(sa_dev->port[i].agent)) ib_unregister_mad_agent(sa_dev->port[i].agent); - } kfree(sa_dev); @@ -1428,11 +1228,10 @@ static void ib_sa_remove_one(struct ib_device *device) ib_unregister_event_handler(&sa_dev->event_handler); - flush_scheduled_work(); + flush_workqueue(ib_wq); for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) { if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) { - ib_unregister_mad_agent(sa_dev->port[i].notice_agent); ib_unregister_mad_agent(sa_dev->port[i].agent); if (sa_dev->port[i].sm_ah) kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah); @@ -1447,9 +1246,6 @@ static int __init ib_sa_init(void) { int ret; - spin_lock_init(&idr_lock); - spin_lock_init(&tid_lock); - get_random_bytes(&tid, sizeof tid); ret = ib_register_client(&sa_client); @@ -1464,23 +1260,7 @@ static int __init ib_sa_init(void) goto err2; } - ret = notice_init(); - if (ret) { - printk(KERN_ERR "Couldn't initialize notice handling\n"); - goto err3; - } - - ret = sa_db_init(); - if (ret) { - printk(KERN_ERR "Couldn't initialize local SA\n"); - goto err4; - } - return 0; -err4: - notice_cleanup(); -err3: - mcast_cleanup(); err2: ib_unregister_client(&sa_client); err1: @@ -1489,9 +1269,7 @@ err1: static void __exit ib_sa_cleanup(void) { - sa_db_cleanup(); mcast_cleanup(); - notice_cleanup(); ib_unregister_client(&sa_client); idr_destroy(&query_idr); } diff --git a/sys/ofed/drivers/infiniband/core/smi.c b/sys/ofed/drivers/infiniband/core/smi.c index 8723675..5855e44 100644 --- a/sys/ofed/drivers/infiniband/core/smi.c +++ b/sys/ofed/drivers/infiniband/core/smi.c @@ -52,6 +52,10 @@ enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp, hop_cnt = smp->hop_cnt; /* See section 14.2.2.2, Vol 1 IB spec */ + /* C14-6 -- valid hop_cnt values are from 0 to 63 */ + if (hop_cnt >= IB_SMP_MAX_PATH_HOPS) + return IB_SMI_DISCARD; + if (!ib_get_smp_direction(smp)) { /* C14-9:1 */ if (hop_cnt && hop_ptr == 0) { @@ -133,6 +137,10 @@ enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type, hop_cnt = smp->hop_cnt; /* See section 14.2.2.2, Vol 1 IB spec */ + /* C14-6 -- valid hop_cnt values are from 0 to 63 */ + if (hop_cnt >= IB_SMP_MAX_PATH_HOPS) + return IB_SMI_DISCARD; + if (!ib_get_smp_direction(smp)) { /* C14-9:1 -- sender should have incremented hop_ptr */ if (hop_cnt && hop_ptr == 0) diff --git a/sys/ofed/drivers/infiniband/core/sysfs.c b/sys/ofed/drivers/infiniband/core/sysfs.c index 4cd5560..6bcbfb9 100644 --- a/sys/ofed/drivers/infiniband/core/sysfs.c +++ b/sys/ofed/drivers/infiniband/core/sysfs.c @@ -37,6 +37,7 @@ #include <linux/slab.h> #include <linux/string.h> #include <linux/fs.h> +#include <linux/printk.h> #include <rdma/ib_mad.h> #include <rdma/ib_pma.h> @@ -105,7 +106,7 @@ static ssize_t state_show(struct ib_port *p, struct port_attribute *unused, return ret; return sprintf(buf, "%d: %s\n", attr.state, - attr.state < ARRAY_SIZE(state_name) ? + attr.state >= 0 && attr.state < ARRAY_SIZE(state_name) ? state_name[attr.state] : "UNKNOWN"); } @@ -180,19 +181,18 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused, { struct ib_port_attr attr; char *speed = ""; - int rate; + int rate; /* in deci-Gb/sec */ ssize_t ret; ret = ib_query_port(p->ibdev, p->port_num, &attr); if (ret) return ret; - switch (attr.active_speed) { - case 2: speed = " DDR"; break; - case 4: speed = " QDR"; break; - } + ib_active_speed_enum_to_rate(attr.active_speed, + &rate, + &speed); - rate = 25 * ib_width_enum_to_int(attr.active_width) * attr.active_speed; + rate *= ib_width_enum_to_int(attr.active_width); if (rate < 0) return -EINVAL; @@ -229,9 +229,11 @@ static ssize_t link_layer_show(struct ib_port *p, struct port_attribute *unused, { switch (rdma_port_get_link_layer(p->ibdev, p->port_num)) { case IB_LINK_LAYER_INFINIBAND: - return sprintf(buf, "%s\n", "IB"); + return sprintf(buf, "%s\n", "InfiniBand"); case IB_LINK_LAYER_ETHERNET: return sprintf(buf, "%s\n", "Ethernet"); + case IB_LINK_LAYER_SCIF: + return sprintf(buf, "%s\n", "SCIF"); default: return sprintf(buf, "%s\n", "Unknown"); } @@ -267,16 +269,12 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr, container_of(attr, struct port_table_attribute, attr); union ib_gid gid; ssize_t ret; - u16 *raw; ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid); if (ret) return ret; - raw = (u16 *)gid.raw; - return sprintf(buf, "%.4x:%.4x:%.4x:%.4x:%.4x:%.4x:%.4x:%.4x\n", - htons(raw[0]), htons(raw[1]), htons(raw[2]), htons(raw[3]), - htons(raw[4]), htons(raw[5]), htons(raw[6]), htons(raw[7])); + return sprintf(buf, GID_PRINT_FMT"\n",GID_PRINT_ARGS(gid.raw)); } static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr, @@ -351,8 +349,8 @@ static ssize_t get_pma_counters(struct ib_port *p, struct port_attribute *attr, be32_to_cpup((__be32 *)(out_mad->data + 40 + offset / 8))); break; case 64: - ret = sprintf(buf, "%llu\n", (unsigned long long) - be64_to_cpup((__be64 *)(out_mad->data + 40 + offset / 8))); + ret = sprintf(buf, "%llu\n", + (unsigned long long)be64_to_cpup((__be64 *)(out_mad->data + 40 + offset / 8))); break; default: ret = 0; @@ -536,6 +534,7 @@ alloc_group_attrs(ssize_t (*show)(struct ib_port *, element->attr.attr.mode = S_IRUGO; element->attr.show = show; element->index = i; + sysfs_attr_init(&element->attr.attr); tab_attr[i] = &element->attr.attr; } @@ -570,7 +569,7 @@ static int add_port(struct ib_device *device, int port_num, p->port_num = port_num; ret = kobject_init_and_add(&p->kobj, &port_type, - kobject_get(device->ports_parent), + device->ports_parent, "%d", port_num); if (ret) goto err_put; @@ -609,7 +608,6 @@ static int add_port(struct ib_device *device, int port_num, } list_add_tail(&p->kobj.entry, &device->port_list); - #ifdef __linux__ kobject_uevent(&p->kobj, KOBJ_ADD); #endif @@ -655,6 +653,7 @@ static ssize_t show_node_type(struct device *device, case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type); case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type); + case RDMA_NODE_MIC: return sprintf(buf, "%d: MIC\n", dev->node_type); default: return sprintf(buf, "%d: <unknown>\n", dev->node_type); } } @@ -716,16 +715,75 @@ static ssize_t set_node_desc(struct device *device, return count; } +static ssize_t show_cmd_perf(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct ib_device *dev = container_of(device, struct ib_device, dev); + + return sprintf(buf, "%d\n", dev->cmd_perf); +} + +static ssize_t set_cmd_perf(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct ib_device *dev = container_of(device, struct ib_device, dev); + u32 val; + + if (sscanf(buf, "0x%x", &val) != 1) + return -EINVAL; + + dev->cmd_perf = val; + + return count; +} + +static ssize_t show_cmd_avg(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct ib_device *dev = container_of(device, struct ib_device, dev); + + return sprintf(buf, "%llu\n", (unsigned long long)dev->cmd_avg); +} + +static ssize_t set_cmd_avg(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct ib_device *dev = container_of(device, struct ib_device, dev); + + spin_lock(&dev->cmd_perf_lock); + dev->cmd_avg = 0; + dev->cmd_n = 0; + spin_unlock(&dev->cmd_perf_lock); + + return count; +} + +static ssize_t show_cmd_n(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct ib_device *dev = container_of(device, struct ib_device, dev); + + return sprintf(buf, "%d\n", dev->cmd_n); +} + static DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL); static DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL); static DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL); static DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc, set_node_desc); +static DEVICE_ATTR(cmd_perf, S_IRUGO | S_IWUSR, show_cmd_perf, set_cmd_perf); +static DEVICE_ATTR(cmd_avg, S_IRUGO | S_IWUSR, show_cmd_avg, set_cmd_avg); +static DEVICE_ATTR(cmd_n, S_IRUGO, show_cmd_n, NULL); static struct device_attribute *ib_class_attributes[] = { &dev_attr_node_type, &dev_attr_sys_image_guid, &dev_attr_node_guid, - &dev_attr_node_desc + &dev_attr_node_desc, + &dev_attr_cmd_perf, + &dev_attr_cmd_avg, + &dev_attr_cmd_n, }; static struct class ib_class = { @@ -851,7 +909,8 @@ static struct attribute_group iw_stats_group = { }; int ib_device_register_sysfs(struct ib_device *device, - int (*port_callback)(struct ib_device *, u8, struct kobject *)) + int (*port_callback)(struct ib_device *, + u8, struct kobject *)) { struct device *class_dev = &device->dev; int ret; @@ -874,8 +933,7 @@ int ib_device_register_sysfs(struct ib_device *device, goto err_unregister; } - device->ports_parent = kobject_create_and_add("ports", - kobject_get(&class_dev->kobj)); + device->ports_parent = kobject_create_and_add("ports",&class_dev->kobj); if (!device->ports_parent) { ret = -ENOMEM; goto err_put; @@ -919,6 +977,11 @@ err_put: kobject_put(&class_dev->kobj); err_unregister: + + for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) { + device_remove_file(class_dev, ib_class_attributes[i]); + } + device_unregister(class_dev); err: @@ -927,15 +990,16 @@ err: void ib_device_unregister_sysfs(struct ib_device *device) { + int i; struct kobject *p, *t; struct ib_port *port; - int i; + struct device *class_dev = &device->dev; /* Hold kobject until ib_dealloc_device() */ kobject_get(&device->dev.kobj); for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) { - device_remove_file(&device->dev, ib_class_attributes[i]); + device_remove_file(class_dev, ib_class_attributes[i]); } list_for_each_entry_safe(p, t, &device->port_list, entry) { @@ -960,22 +1024,3 @@ void ib_sysfs_cleanup(void) { class_unregister(&ib_class); } - -/*int ib_sysfs_create_port_files(struct ib_device *device, - int (*create)(struct ib_device *dev, u8 port_num, - struct kobject *kobj)) -{ - struct kobject *p; - struct ib_port *port; - int ret = 0; - - list_for_each_entry(p, &device->port_list, entry) { - port = container_of(p, struct ib_port, kobj); - ret = create(device, port->port_num, &port->kobj); - if (ret) - break; - } - - return ret; -} -EXPORT_SYMBOL(ib_sysfs_create_port_files);*/ diff --git a/sys/ofed/drivers/infiniband/core/ucm.c b/sys/ofed/drivers/infiniband/core/ucm.c index b912ebe..8f20e89 100644 --- a/sys/ofed/drivers/infiniband/core/ucm.c +++ b/sys/ofed/drivers/infiniband/core/ucm.c @@ -37,10 +37,12 @@ #include <linux/device.h> #include <linux/err.h> #include <linux/poll.h> +#include <linux/sched.h> #include <linux/file.h> #include <linux/cdev.h> #include <linux/idr.h> #include <linux/mutex.h> +#include <linux/slab.h> #include <linux/string.h> #include <asm/uaccess.h> @@ -396,7 +398,6 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file, struct ib_ucm_event_get cmd; struct ib_ucm_event *uevent; int result = 0; - DEFINE_WAIT(wait); if (out_len < sizeof(struct ib_ucm_event_resp)) return -ENOSPC; @@ -1123,7 +1124,7 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, if (copy_from_user(&hdr, buf, sizeof(hdr))) return -EFAULT; - if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) + if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) return -EINVAL; if (hdr.in + sizeof(hdr) > len) @@ -1163,7 +1164,7 @@ static int ib_ucm_open(struct inode *inode, struct file *filp) { struct ib_ucm_file *file; - file = kzalloc(sizeof(*file), GFP_KERNEL); + file = kmalloc(sizeof(*file), GFP_KERNEL); if (!file) return -ENOMEM; @@ -1177,7 +1178,7 @@ static int ib_ucm_open(struct inode *inode, struct file *filp) file->filp = filp; file->device = container_of(inode->i_cdev->si_drv1, struct ib_ucm_device, cdev); - return 0; + return nonseekable_open(inode, filp); } static int ib_ucm_close(struct inode *inode, struct file *filp) @@ -1212,7 +1213,10 @@ static void ib_ucm_release_dev(struct device *dev) ucm_dev = container_of(dev, struct ib_ucm_device, dev); cdev_del(&ucm_dev->cdev); + if (ucm_dev->devnum < IB_UCM_MAX_DEVICES) clear_bit(ucm_dev->devnum, dev_map); + else + clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, dev_map); kfree(ucm_dev); } @@ -1222,6 +1226,7 @@ static const struct file_operations ucm_fops = { .release = ib_ucm_close, .write = ib_ucm_write, .poll = ib_ucm_poll, + .llseek = no_llseek, }; static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, @@ -1234,8 +1239,32 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); +static dev_t overflow_maj; +static DECLARE_BITMAP(overflow_map, IB_UCM_MAX_DEVICES); +static int find_overflow_devnum(void) +{ + int ret; + + if (!overflow_maj) { + ret = alloc_chrdev_region(&overflow_maj, 0, IB_UCM_MAX_DEVICES, + "infiniband_cm"); + if (ret) { + printk(KERN_ERR "ucm: couldn't register dynamic device number\n"); + return ret; + } + } + + ret = find_first_zero_bit(overflow_map, IB_UCM_MAX_DEVICES); + if (ret >= IB_UCM_MAX_DEVICES) + return -1; + + return ret; +} + static void ib_ucm_add_one(struct ib_device *device) { + int devnum; + dev_t base; struct ib_ucm_device *ucm_dev; if (!device->alloc_ucontext || @@ -1248,16 +1277,25 @@ static void ib_ucm_add_one(struct ib_device *device) ucm_dev->ib_dev = device; - ucm_dev->devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES); - if (ucm_dev->devnum >= IB_UCM_MAX_DEVICES) + devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES); + if (devnum >= IB_UCM_MAX_DEVICES) { + devnum = find_overflow_devnum(); + if (devnum < 0) goto err; - set_bit(ucm_dev->devnum, dev_map); + ucm_dev->devnum = devnum + IB_UCM_MAX_DEVICES; + base = devnum + overflow_maj; + set_bit(devnum, overflow_map); + } else { + ucm_dev->devnum = devnum; + base = devnum + IB_UCM_BASE_DEV; + set_bit(devnum, dev_map); + } cdev_init(&ucm_dev->cdev, &ucm_fops); ucm_dev->cdev.owner = THIS_MODULE; kobject_set_name(&ucm_dev->cdev.kobj, "ucm%d", ucm_dev->devnum); - if (cdev_add(&ucm_dev->cdev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1)) + if (cdev_add(&ucm_dev->cdev, base, 1)) goto err; ucm_dev->dev.class = &cm_class; @@ -1278,7 +1316,10 @@ err_dev: device_unregister(&ucm_dev->dev); err_cdev: cdev_del(&ucm_dev->cdev); - clear_bit(ucm_dev->devnum, dev_map); + if (ucm_dev->devnum < IB_UCM_MAX_DEVICES) + clear_bit(devnum, dev_map); + else + clear_bit(devnum, overflow_map); err: kfree(ucm_dev); return; @@ -1298,6 +1339,7 @@ static ssize_t show_abi_version(struct class *class, struct class_attribute *att { return sprintf(buf, "%d\n", IB_USER_CM_ABI_VERSION); } + static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); static int __init ib_ucm_init(void) @@ -1337,6 +1379,8 @@ static void __exit ib_ucm_cleanup(void) ib_unregister_client(&ucm_client); class_remove_file(&cm_class, &class_attr_abi_version); unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); + if (overflow_maj) + unregister_chrdev_region(overflow_maj, IB_UCM_MAX_DEVICES); idr_destroy(&ctx_id_table); } diff --git a/sys/ofed/drivers/infiniband/core/ucma.c b/sys/ofed/drivers/infiniband/core/ucma.c index 23cbf7b..5f73b40 100644 --- a/sys/ofed/drivers/infiniband/core/ucma.c +++ b/sys/ofed/drivers/infiniband/core/ucma.c @@ -34,10 +34,13 @@ #include <linux/file.h> #include <linux/mutex.h> #include <linux/poll.h> +#include <linux/sched.h> #include <linux/idr.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/miscdevice.h> +#include <linux/slab.h> +#include <linux/module.h> #include <rdma/rdma_user_cm.h> #include <rdma/ib_marshall.h> @@ -48,9 +51,7 @@ MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); MODULE_LICENSE("Dual BSD/GPL"); -enum { - UCMA_MAX_BACKLOG = 1024 -}; +static unsigned int max_backlog = 1024; struct ucma_file { struct mutex mut; @@ -253,17 +254,17 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id, if (!uevent) return event->event == RDMA_CM_EVENT_CONNECT_REQUEST; + mutex_lock(&ctx->file->mut); uevent->cm_id = cm_id; ucma_set_event_context(ctx, event, uevent); uevent->resp.event = event->event; uevent->resp.status = event->status; - if (cm_id->ps == RDMA_PS_UDP || cm_id->ps == RDMA_PS_IPOIB) + if (cm_id->qp_type == IB_QPT_UD) ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud); else ucma_copy_conn_event(&uevent->resp.param.conn, &event->param.conn); - mutex_lock(&ctx->file->mut); if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) { if (!ctx->backlog) { ret = -ENOMEM; @@ -298,7 +299,6 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf, struct rdma_ucm_get_event cmd; struct ucma_event *uevent; int ret = 0; - DEFINE_WAIT(wait); if (out_len < sizeof uevent->resp) return -ENOSPC; @@ -332,6 +332,7 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf, ctx->cm_id = uevent->cm_id; ctx->cm_id->context = ctx; uevent->resp.id = ctx->id; + ctx->cm_id->ucontext = ctx; } if (copy_to_user((void __user *)(unsigned long)cmd.response, @@ -350,13 +351,31 @@ done: return ret; } -static ssize_t ucma_create_id(struct ucma_file *file, - const char __user *inbuf, +static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type) +{ + switch (cmd->ps) { + case RDMA_PS_TCP: + *qp_type = IB_QPT_RC; + return 0; + case RDMA_PS_UDP: + case RDMA_PS_IPOIB: + *qp_type = IB_QPT_UD; + return 0; + case RDMA_PS_IB: + *qp_type = cmd->qp_type; + return 0; + default: + return -EINVAL; + } +} + +static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_create_id cmd; struct rdma_ucm_create_id_resp resp; struct ucma_context *ctx; + enum ib_qp_type qp_type; int ret; if (out_len < sizeof(resp)) @@ -365,6 +384,10 @@ static ssize_t ucma_create_id(struct ucma_file *file, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; + ret = ucma_get_qp_type(&cmd, &qp_type); + if (ret) + return ret; + mutex_lock(&file->mut); ctx = ucma_alloc_ctx(file); mutex_unlock(&file->mut); @@ -372,11 +395,12 @@ static ssize_t ucma_create_id(struct ucma_file *file, return -ENOMEM; ctx->uid = cmd.uid; - ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps); + ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps, qp_type); if (IS_ERR(ctx->cm_id)) { ret = PTR_ERR(ctx->cm_id); goto err1; } + ctx->cm_id->ucontext = ctx; resp.id = ctx->id; if (copy_to_user((void __user *)(unsigned long)cmd.response, @@ -409,24 +433,6 @@ static void ucma_cleanup_multicast(struct ucma_context *ctx) mutex_unlock(&mut); } -static void ucma_cleanup_events(struct ucma_context *ctx) -{ - struct ucma_event *uevent, *tmp; - - list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { - if (uevent->ctx != ctx) - continue; - - list_del(&uevent->list); - - /* clear incoming connections. */ - if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) - rdma_destroy_id(uevent->cm_id); - - kfree(uevent); - } -} - static void ucma_cleanup_mc_events(struct ucma_multicast *mc) { struct ucma_event *uevent, *tmp; @@ -440,9 +446,16 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc) } } +/* + * We cannot hold file->mut when calling rdma_destroy_id() or we can + * deadlock. We also acquire file->mut in ucma_event_handler(), and + * rdma_destroy_id() will wait until all callbacks have completed. + */ static int ucma_free_ctx(struct ucma_context *ctx) { int events_reported; + struct ucma_event *uevent, *tmp; + LIST_HEAD(list); /* No new events will be generated after destroying the id. */ rdma_destroy_id(ctx->cm_id); @@ -451,10 +464,20 @@ static int ucma_free_ctx(struct ucma_context *ctx) /* Cleanup events not yet reported to the user. */ mutex_lock(&ctx->file->mut); - ucma_cleanup_events(ctx); + list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { + if (uevent->ctx == ctx) + list_move_tail(&uevent->list, &list); + } list_del(&ctx->list); mutex_unlock(&ctx->file->mut); + list_for_each_entry_safe(uevent, tmp, &list, list) { + list_del(&uevent->list); + if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) + rdma_destroy_id(uevent->cm_id); + kfree(uevent); + } + events_reported = ctx->events_reported; kfree(ctx); return events_reported; @@ -586,24 +609,14 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp, static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp, struct rdma_route *route) { - struct rdma_dev_addr *dev_addr; - struct net_device *dev; - u16 vid = 0; resp->num_paths = route->num_paths; switch (route->num_paths) { case 0: - dev_addr = &route->addr.dev_addr; - dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); - if (dev) { - vid = rdma_vlan_dev_vlan_id(dev); - dev_put(dev); - } - - iboe_mac_vlan_to_ll((union ib_gid *) &resp->ib_route[0].dgid, - dev_addr->dst_dev_addr, vid); - iboe_addr_get_sgid(dev_addr, - (union ib_gid *) &resp->ib_route[0].sgid); + rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr, + (union ib_gid *)&resp->ib_route[0].dgid); + rdma_ip2gid((struct sockaddr *)&route->addr.src_addr, + (union ib_gid *)&resp->ib_route[0].sgid); resp->ib_route[0].pkey = cpu_to_be16(0xffff); break; case 2: @@ -619,6 +632,16 @@ static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp, } } +static void ucma_copy_iw_route(struct rdma_ucm_query_route_resp *resp, + struct rdma_route *route) +{ + struct rdma_dev_addr *dev_addr; + + dev_addr = &route->addr.dev_addr; + rdma_addr_get_dgid(dev_addr, (union ib_gid *) &resp->ib_route[0].dgid); + rdma_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid); +} + static ssize_t ucma_query_route(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) @@ -653,8 +676,10 @@ static ssize_t ucma_query_route(struct ucma_file *file, resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid; resp.port_num = ctx->cm_id->port_num; - if (rdma_node_get_transport(ctx->cm_id->device->node_type) == RDMA_TRANSPORT_IB) { - switch (rdma_port_get_link_layer(ctx->cm_id->device, ctx->cm_id->port_num)) { + switch (rdma_node_get_transport(ctx->cm_id->device->node_type)) { + case RDMA_TRANSPORT_IB: + switch (rdma_port_get_link_layer(ctx->cm_id->device, + ctx->cm_id->port_num)) { case IB_LINK_LAYER_INFINIBAND: ucma_copy_ib_route(&resp, &ctx->cm_id->route); break; @@ -664,6 +689,12 @@ static ssize_t ucma_query_route(struct ucma_file *file, default: break; } + break; + case RDMA_TRANSPORT_IWARP: + ucma_copy_iw_route(&resp, &ctx->cm_id->route); + break; + default: + break; } out: @@ -727,8 +758,8 @@ static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); - ctx->backlog = cmd.backlog > 0 && cmd.backlog < UCMA_MAX_BACKLOG ? - cmd.backlog : UCMA_MAX_BACKLOG; + ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ? + cmd.backlog : max_backlog; ret = rdma_listen(ctx->cm_id, ctx->backlog); ucma_put_ctx(ctx); return ret; @@ -750,9 +781,12 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, return PTR_ERR(ctx); if (cmd.conn_param.valid) { - ctx->uid = cmd.uid; ucma_copy_conn_param(&conn_param, &cmd.conn_param); + mutex_lock(&file->mut); ret = rdma_accept(ctx->cm_id, &conn_param); + if (!ret) + ctx->uid = cmd.uid; + mutex_unlock(&file->mut); } else ret = rdma_accept(ctx->cm_id, NULL); @@ -848,6 +882,20 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname, } rdma_set_service_type(ctx->cm_id, *((u8 *) optval)); break; + case RDMA_OPTION_ID_REUSEADDR: + if (optlen != sizeof(int)) { + ret = -EINVAL; + break; + } + ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0); + break; + case RDMA_OPTION_ID_AFONLY: + if (optlen != sizeof(int)) { + ret = -EINVAL; + break; + } + ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0); + break; default: ret = -ENOSYS; } @@ -887,12 +935,22 @@ static int ucma_set_ib_path(struct ucma_context *ctx, static int ucma_set_option_ib(struct ucma_context *ctx, int optname, void *optval, size_t optlen) { - int ret; + int ret = 0; switch (optname) { case RDMA_OPTION_IB_PATH: ret = ucma_set_ib_path(ctx, optval, optlen); break; + + case RDMA_OPTION_IB_APM: + if (optlen != sizeof(u8)) { + ret = -EINVAL; + break; + } + if (*(u8 *)optval) + ret = rdma_enable_apm(ctx->cm_id, RDMA_ALT_PATH_BEST); + break; + default: ret = -ENOSYS; } @@ -937,20 +995,21 @@ static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf, optval = kmalloc(cmd.optlen, GFP_KERNEL); if (!optval) { ret = -ENOMEM; - goto out1; + goto err_ucma_put_ctx; } - if (copy_from_user(optval, (void __user *) (unsigned long) cmd.optval, + if (copy_from_user(optval, (void __user *)(unsigned long)cmd.optval, cmd.optlen)) { ret = -EFAULT; - goto out2; + goto err_kfree; } ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval, cmd.optlen); -out2: + +err_kfree: kfree(optval); -out1: +err_ucma_put_ctx: ucma_put_ctx(ctx); return ret; } @@ -1121,7 +1180,7 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file, struct rdma_ucm_migrate_id cmd; struct rdma_ucm_migrate_resp resp; struct ucma_context *ctx; - struct file *filp; + struct fd f; struct ucma_file *cur_file; int ret = 0; @@ -1129,12 +1188,12 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file, return -EFAULT; /* Get current fd to protect against it being closed */ - filp = fget(cmd.fd); - if (!filp) + f = fdget(cmd.fd); + if (!f.file) return -ENOENT; /* Validate current fd and prevent destruction of id. */ - ctx = ucma_get_ctx(filp->private_data, cmd.id); + ctx = ucma_get_ctx(f.file->private_data, cmd.id); if (IS_ERR(ctx)) { ret = PTR_ERR(ctx); goto file_put; @@ -1168,7 +1227,7 @@ response: ucma_put_ctx(ctx); file_put: - fput(filp); + fdput(f); return ret; } @@ -1209,7 +1268,7 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf, if (copy_from_user(&hdr, buf, sizeof(hdr))) return -EFAULT; - if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) + if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) return -EINVAL; if (hdr.in + sizeof(hdr) > len) @@ -1261,7 +1320,8 @@ static int ucma_open(struct inode *inode, struct file *filp) filp->private_data = file; file->filp = filp; - return 0; + + return nonseekable_open(inode, filp); } static int ucma_close(struct inode *inode, struct file *filp) @@ -1291,11 +1351,14 @@ static const struct file_operations ucma_fops = { .release = ucma_close, .write = ucma_write, .poll = ucma_poll, + .llseek = no_llseek, }; static struct miscdevice ucma_misc = { .minor = MISC_DYNAMIC_MINOR, .name = "rdma_cm", + .nodename = "infiniband/rdma_cm", + .mode = 0666, .fops = &ucma_fops, }; @@ -1318,10 +1381,11 @@ static int __init ucma_init(void) ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version); if (ret) { printk(KERN_ERR "rdma_ucm: couldn't create abi_version attr\n"); - goto err; + goto err1; } + return 0; -err: +err1: misc_deregister(&ucma_misc); return ret; } diff --git a/sys/ofed/drivers/infiniband/core/ud_header.c b/sys/ofed/drivers/infiniband/core/ud_header.c index 09fc1ff..051d3bd 100644 --- a/sys/ofed/drivers/infiniband/core/ud_header.c +++ b/sys/ofed/drivers/infiniband/core/ud_header.c @@ -33,6 +33,7 @@ #include <linux/errno.h> #include <linux/string.h> +#include <linux/module.h> #include <linux/if_ether.h> #include <rdma/ib_pack.h> @@ -230,32 +231,28 @@ void ib_ud_header_init(int payload_bytes, int immediate_present, struct ib_ud_header *header) { - u16 packet_length = 0; - memset(header, 0, sizeof *header); if (lrh_present) { + u16 packet_length = 0; + header->lrh.link_version = 0; header->lrh.link_next_header = grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL; - packet_length = IB_LRH_BYTES; + packet_length = (IB_LRH_BYTES + + IB_BTH_BYTES + + IB_DETH_BYTES + + (grh_present ? IB_GRH_BYTES : 0) + + payload_bytes + + 4 + /* ICRC */ + 3) / 4; /* round up */ + header->lrh.packet_length = cpu_to_be16(packet_length); } - if (eth_present) { - if (vlan_present) { + if (vlan_present) header->eth.type = cpu_to_be16(ETH_P_8021Q); - packet_length += IB_VLAN_BYTES; - } - packet_length += IB_ETH_BYTES; - } - - packet_length += IB_BTH_BYTES + IB_DETH_BYTES + payload_bytes + - 4 + /* ICRC */ - 3; /* round up */ - packet_length /= 4; if (grh_present) { - packet_length += IB_GRH_BYTES / 4; header->grh.ip_version = 6; header->grh.payload_length = cpu_to_be16((IB_BTH_BYTES + @@ -266,9 +263,6 @@ void ib_ud_header_init(int payload_bytes, header->grh.next_header = 0x1b; } - if (lrh_present) - header->lrh.packet_length = cpu_to_be16(packet_length); - if (immediate_present) header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; else @@ -285,36 +279,6 @@ void ib_ud_header_init(int payload_bytes, EXPORT_SYMBOL(ib_ud_header_init); /** - * ib_lrh_header_pack - Pack LRH header struct into wire format - * @lrh:unpacked LRH header struct - * @buf:Buffer to pack into - * - * ib_lrh_header_pack() packs the LRH header structure @lrh into - * wire format in the buffer @buf. - */ -int ib_lrh_header_pack(struct ib_unpacked_lrh *lrh, void *buf) -{ - ib_pack(lrh_table, ARRAY_SIZE(lrh_table), lrh, buf); - return 0; -} -EXPORT_SYMBOL(ib_lrh_header_pack); - -/** - * ib_lrh_header_unpack - Unpack LRH structure from wire format - * @lrh:unpacked LRH header struct - * @buf:Buffer to pack into - * - * ib_lrh_header_unpack() unpacks the LRH header structure from - * wire format (in buf) into @lrh. - */ -int ib_lrh_header_unpack(void *buf, struct ib_unpacked_lrh *lrh) -{ - ib_unpack(lrh_table, ARRAY_SIZE(lrh_table), buf, lrh); - return 0; -} -EXPORT_SYMBOL(ib_lrh_header_unpack); - -/** * ib_ud_header_pack - Pack UD header struct into wire format * @header:UD header struct * @buf:Buffer to pack into @@ -337,14 +301,11 @@ int ib_ud_header_pack(struct ib_ud_header *header, &header->eth, buf + len); len += IB_ETH_BYTES; } - - if (header->vlan_present) { ib_pack(vlan_table, ARRAY_SIZE(vlan_table), &header->vlan, buf + len); len += IB_VLAN_BYTES; } - if (header->grh_present) { ib_pack(grh_table, ARRAY_SIZE(grh_table), &header->grh, buf + len); diff --git a/sys/ofed/drivers/infiniband/core/umem.c b/sys/ofed/drivers/infiniband/core/umem.c index 7695a21..cdd2e67 100644 --- a/sys/ofed/drivers/infiniband/core/umem.c +++ b/sys/ofed/drivers/infiniband/core/umem.c @@ -35,109 +35,168 @@ #include <linux/mm.h> #include <linux/dma-mapping.h> #include <linux/sched.h> -#ifdef __linux__ -#include <linux/hugetlb.h> -#endif #include <linux/dma-attrs.h> - +#include <linux/slab.h> +#include <linux/module.h> #include <sys/priv.h> -#include <sys/resource.h> #include <sys/resourcevar.h> - -#include <vm/vm.h> -#include <vm/vm_map.h> -#include <vm/vm_object.h> #include <vm/vm_pageout.h> - +#include <vm/vm_map.h> #include "uverbs.h" -static int allow_weak_ordering; -module_param(allow_weak_ordering, bool, 0444); -MODULE_PARM_DESC(allow_weak_ordering, "Allow weak ordering for data registered memory"); +#define IB_UMEM_MAX_PAGE_CHUNK (PAGE_SIZE / sizeof (struct page *)) -#define IB_UMEM_MAX_PAGE_CHUNK \ - ((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) / \ - ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] - \ - (void *) &((struct ib_umem_chunk *) 0)->page_list[0])) - -#ifdef __ia64__ -extern int dma_map_sg_hp_wa; +static int allow_weak_ordering; +module_param_named(weak_ordering, allow_weak_ordering, int, 0444); +MODULE_PARM_DESC(weak_ordering, "Allow weak ordering for data registered memory"); -static int dma_map_sg_ia64(struct ib_device *ibdev, - struct scatterlist *sg, - int nents, - enum dma_data_direction dir) +static struct ib_umem *peer_umem_get(struct ib_peer_memory_client *ib_peer_mem, + struct ib_umem *umem, unsigned long addr, + int dmasync, int invalidation_supported) { - int i, rc, j, lents = 0; - struct device *dev; - - if (!dma_map_sg_hp_wa) - return ib_dma_map_sg(ibdev, sg, nents, dir); + int ret; + const struct peer_memory_client *peer_mem = ib_peer_mem->peer_mem; + struct invalidation_ctx *invalidation_ctx = NULL; - dev = ibdev->dma_device; - for (i = 0; i < nents; ++i) { - rc = dma_map_sg(dev, sg + i, 1, dir); - if (rc <= 0) { - for (j = 0; j < i; ++j) - dma_unmap_sg(dev, sg + j, 1, dir); + umem->ib_peer_mem = ib_peer_mem; + if (invalidation_supported) { + invalidation_ctx = kzalloc(sizeof(*invalidation_ctx), GFP_KERNEL); + if (!invalidation_ctx) { + ret = -ENOMEM; + goto out; + } + umem->invalidation_ctx = invalidation_ctx; + invalidation_ctx->umem = umem; + mutex_lock(&ib_peer_mem->lock); + invalidation_ctx->context_ticket = + ib_peer_insert_context(ib_peer_mem, invalidation_ctx); + /* unlock before calling get pages to prevent a dead-lock from the callback */ + mutex_unlock(&ib_peer_mem->lock); + } - return 0; + ret = peer_mem->get_pages(addr, umem->length, umem->writable, 1, + &umem->sg_head, + umem->peer_mem_client_context, + invalidation_ctx ? + (void *)invalidation_ctx->context_ticket : NULL); + + if (invalidation_ctx) { + /* taking the lock back, checking that wasn't invalidated at that time */ + mutex_lock(&ib_peer_mem->lock); + if (invalidation_ctx->peer_invalidated) { + printk(KERN_ERR "peer_umem_get: pages were invalidated by peer\n"); + ret = -EINVAL; } - lents += rc; } - return lents; + if (ret) + goto out; + + umem->page_size = peer_mem->get_page_size + (umem->peer_mem_client_context); + if (umem->page_size <= 0) + goto put_pages; + + umem->offset = addr & ((unsigned long)umem->page_size - 1); + ret = peer_mem->dma_map(&umem->sg_head, + umem->peer_mem_client_context, + umem->context->device->dma_device, + dmasync, + &umem->nmap); + if (ret) + goto put_pages; + + ib_peer_mem->stats.num_reg_pages += + umem->nmap * (umem->page_size >> PAGE_SHIFT); + ib_peer_mem->stats.num_alloc_mrs += 1; + return umem; + +put_pages: + + peer_mem->put_pages(umem->peer_mem_client_context, + &umem->sg_head); +out: + if (invalidation_ctx) { + ib_peer_remove_context(ib_peer_mem, invalidation_ctx->context_ticket); + mutex_unlock(&umem->ib_peer_mem->lock); + kfree(invalidation_ctx); + } + + ib_put_peer_client(ib_peer_mem, umem->peer_mem_client_context, + umem->peer_mem_srcu_key); + kfree(umem); + return ERR_PTR(ret); } -static void dma_unmap_sg_ia64(struct ib_device *ibdev, - struct scatterlist *sg, - int nents, - enum dma_data_direction dir) +static void peer_umem_release(struct ib_umem *umem) { - int i; - struct device *dev; - - if (!dma_map_sg_hp_wa) - return ib_dma_unmap_sg(ibdev, sg, nents, dir); + struct ib_peer_memory_client *ib_peer_mem = umem->ib_peer_mem; + const struct peer_memory_client *peer_mem = ib_peer_mem->peer_mem; + struct invalidation_ctx *invalidation_ctx = umem->invalidation_ctx; + + if (invalidation_ctx) { + + int peer_callback; + int inflight_invalidation; + /* If we are not under peer callback we must take the lock before removing + * core ticket from the tree and releasing its umem. + * It will let any inflight callbacks to be ended safely. + * If we are under peer callback or under error flow of reg_mr so that context + * wasn't activated yet lock was already taken. + */ + if (invalidation_ctx->func && !invalidation_ctx->peer_callback) + mutex_lock(&ib_peer_mem->lock); + ib_peer_remove_context(ib_peer_mem, invalidation_ctx->context_ticket); + /* make sure to check inflight flag after took the lock and remove from tree. + * in addition, from that point using local variables for peer_callback and + * inflight_invalidation as after the complete invalidation_ctx can't be accessed + * any more as it may be freed by the callback. + */ + peer_callback = invalidation_ctx->peer_callback; + inflight_invalidation = invalidation_ctx->inflight_invalidation; + if (inflight_invalidation) + complete(&invalidation_ctx->comp); + /* On peer callback lock is handled externally */ + if (!peer_callback) + /* unlocking before put_pages */ + mutex_unlock(&ib_peer_mem->lock); + /* in case under callback context or callback is pending let it free the invalidation context */ + if (!peer_callback && !inflight_invalidation) + kfree(invalidation_ctx); + } - dev = ibdev->dma_device; - for (i = 0; i < nents; ++i) - dma_unmap_sg(dev, sg + i, 1, dir); -} + peer_mem->dma_unmap(&umem->sg_head, + umem->peer_mem_client_context, + umem->context->device->dma_device); + peer_mem->put_pages(&umem->sg_head, + umem->peer_mem_client_context); + + ib_peer_mem->stats.num_dereg_pages += + umem->nmap * (umem->page_size >> PAGE_SHIFT); + ib_peer_mem->stats.num_dealloc_mrs += 1; + ib_put_peer_client(ib_peer_mem, umem->peer_mem_client_context, + umem->peer_mem_srcu_key); + kfree(umem); -#define ib_dma_map_sg(dev, sg, nents, dir) dma_map_sg_ia64(dev, sg, nents, dir) -#define ib_dma_unmap_sg(dev, sg, nents, dir) dma_unmap_sg_ia64(dev, sg, nents, dir) + return; -#endif +} static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty) { -#ifdef __linux__ - struct ib_umem_chunk *chunk, *tmp; - int i; - list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) { - ib_dma_unmap_sg_attrs(dev, chunk->page_list, - chunk->nents, DMA_BIDIRECTIONAL, &chunk->attrs); - for (i = 0; i < chunk->nents; ++i) { - struct page *page = sg_page(&chunk->page_list[i]); - if (umem->writable && dirty) - set_page_dirty_lock(page); - put_page(page); - } - kfree(chunk); - } -#else - struct ib_umem_chunk *chunk, *tmp; vm_object_t object; + struct scatterlist *sg; + struct page *page; int i; object = NULL; - list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) { - ib_dma_unmap_sg_attrs(dev, chunk->page_list, - chunk->nents, DMA_BIDIRECTIONAL, &chunk->attrs); - for (i = 0; i < chunk->nents; ++i) { - struct page *page = sg_page(&chunk->page_list[i]); + if (umem->nmap > 0) + ib_dma_unmap_sg(dev, umem->sg_head.sgl, + umem->nmap, + DMA_BIDIRECTIONAL); + for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) { + page = sg_page(sg); if (umem->writable && dirty) { if (object && object != page->object) VM_OBJECT_WUNLOCK(object); @@ -148,14 +207,26 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d vm_page_dirty(page); } } - kfree(chunk); - } + sg_free_table(&umem->sg_head); if (object) VM_OBJECT_WUNLOCK(object); -#endif } +void ib_umem_activate_invalidation_notifier(struct ib_umem *umem, + umem_invalidate_func_t func, + void *cookie) +{ + struct invalidation_ctx *invalidation_ctx = umem->invalidation_ctx; + + invalidation_ctx->func = func; + invalidation_ctx->cookie = cookie; + + /* from that point any pending invalidations can be called */ + mutex_unlock(&umem->ib_peer_mem->lock); + return; +} +EXPORT_SYMBOL(ib_umem_activate_invalidation_notifier); /** * ib_umem_get - Pin and DMA map userspace memory. * @context: userspace context to pin memory for @@ -164,163 +235,23 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d * @access: IB_ACCESS_xxx flags for memory being pinned * @dmasync: flush in-flight DMA when the memory region is written */ -struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, - size_t size, int access, int dmasync) +struct ib_umem *ib_umem_get_ex(struct ib_ucontext *context, unsigned long addr, + size_t size, int access, int dmasync, + int invalidation_supported) { -#ifdef __linux__ - struct ib_umem *umem; - struct page **page_list; - struct vm_area_struct **vma_list; - struct ib_umem_chunk *chunk; - unsigned long locked; - unsigned long lock_limit; - unsigned long cur_base; - unsigned long npages; - int ret; - int off; - int i; - DEFINE_DMA_ATTRS(attrs); - - if (dmasync) - dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); - else if (allow_weak_ordering) - dma_set_attr(DMA_ATTR_WEAK_ORDERING, &attrs); - - if (!can_do_mlock()) - return ERR_PTR(-EPERM); - umem = kmalloc(sizeof *umem, GFP_KERNEL); - if (!umem) - return ERR_PTR(-ENOMEM); - - umem->context = context; - umem->length = size; - umem->offset = addr & ~PAGE_MASK; - umem->page_size = PAGE_SIZE; - /* - * We ask for writable memory if any access flags other than - * "remote read" are set. "Local write" and "remote write" - * obviously require write access. "Remote atomic" can do - * things like fetch and add, which will modify memory, and - * "MW bind" can change permissions by binding a window. - */ - umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ); - - /* We assume the memory is from hugetlb until proved otherwise */ - umem->hugetlb = 1; - - INIT_LIST_HEAD(&umem->chunk_list); - - page_list = (struct page **) __get_free_page(GFP_KERNEL); - if (!page_list) { - kfree(umem); - return ERR_PTR(-ENOMEM); - } - - /* - * if we can't alloc the vma_list, it's not so bad; - * just assume the memory is not hugetlb memory - */ - vma_list = (struct vm_area_struct **) __get_free_page(GFP_KERNEL); - if (!vma_list) - umem->hugetlb = 0; - - npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT; - - down_write(¤t->mm->mmap_sem); - - locked = npages + current->mm->locked_vm; - lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; - - if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { - ret = -ENOMEM; - goto out; - } - - cur_base = addr & PAGE_MASK; - - ret = 0; - - while (npages) { - ret = get_user_pages(current, current->mm, cur_base, - min_t(unsigned long, npages, - PAGE_SIZE / sizeof (struct page *)), - 1, !umem->writable, page_list, vma_list); - - if (ret < 0) - goto out; - - cur_base += ret * PAGE_SIZE; - npages -= ret; - - off = 0; - - while (ret) { - chunk = kmalloc(sizeof *chunk + sizeof (struct scatterlist) * - min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK), - GFP_KERNEL); - if (!chunk) { - ret = -ENOMEM; - goto out; - } - - chunk->attrs = attrs; - chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK); - sg_init_table(chunk->page_list, chunk->nents); - for (i = 0; i < chunk->nents; ++i) { - if (vma_list && - !is_vm_hugetlb_page(vma_list[i + off])) - umem->hugetlb = 0; - sg_set_page(&chunk->page_list[i], page_list[i + off], PAGE_SIZE, 0); - } - - chunk->nmap = ib_dma_map_sg_attrs(context->device, - &chunk->page_list[0], - chunk->nents, - DMA_BIDIRECTIONAL, - &attrs); - if (chunk->nmap <= 0) { - for (i = 0; i < chunk->nents; ++i) - put_page(sg_page(&chunk->page_list[i])); - kfree(chunk); - - ret = -ENOMEM; - goto out; - } - - ret -= chunk->nents; - off += chunk->nents; - list_add_tail(&chunk->list, &umem->chunk_list); - } - - ret = 0; - } - -out: - if (ret < 0) { - __ib_umem_release(context->device, umem, 0); - kfree(umem); - } else - current->mm->locked_vm = locked; - - up_write(¤t->mm->mmap_sem); - if (vma_list) - free_page((unsigned long) vma_list); - free_page((unsigned long) page_list); - - return ret < 0 ? ERR_PTR(ret) : umem; -#else struct ib_umem *umem; - struct ib_umem_chunk *chunk; struct proc *proc; pmap_t pmap; vm_offset_t end, last, start; vm_size_t npages; int error; - int ents; int ret; + int ents; int i; DEFINE_DMA_ATTRS(attrs); + struct scatterlist *sg, *sg_list_start; + int need_release = 0; error = priv_check(curthread, PRIV_VM_MLOCK); if (error) @@ -372,76 +303,86 @@ out: * "MW bind" can change permissions by binding a window. */ umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ); + + if (invalidation_supported || context->peer_mem_private_data) { + + struct ib_peer_memory_client *peer_mem_client; + + peer_mem_client = ib_get_peer_client(context, addr, size, + &umem->peer_mem_client_context, + &umem->peer_mem_srcu_key); + if (peer_mem_client) + return peer_umem_get(peer_mem_client, umem, addr, + dmasync, invalidation_supported); + } + umem->hugetlb = 0; - INIT_LIST_HEAD(&umem->chunk_list); pmap = vm_map_pmap(&proc->p_vmspace->vm_map); - ret = 0; - while (npages) { - ents = min_t(int, npages, IB_UMEM_MAX_PAGE_CHUNK); - chunk = kmalloc(sizeof(*chunk) + - (sizeof(struct scatterlist) * ents), - GFP_KERNEL); - if (!chunk) { - ret = -ENOMEM; + + if (npages == 0) { + ret = -EINVAL; goto out; } - chunk->attrs = attrs; - chunk->nents = ents; - sg_init_table(&chunk->page_list[0], ents); - for (i = 0; i < chunk->nents; ++i) { + ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL); + if (ret) + goto out; + + need_release = 1; + sg_list_start = umem->sg_head.sgl; + + while (npages) { + + ents = min_t(int, npages, IB_UMEM_MAX_PAGE_CHUNK); + umem->npages += ents; + + for_each_sg(sg_list_start, sg, ents, i) { vm_paddr_t pa; pa = pmap_extract(pmap, start); if (pa == 0) { ret = -ENOMEM; - kfree(chunk); goto out; } - sg_set_page(&chunk->page_list[i], PHYS_TO_VM_PAGE(pa), + sg_set_page(sg, PHYS_TO_VM_PAGE(pa), PAGE_SIZE, 0); npages--; start += PAGE_SIZE; } - chunk->nmap = ib_dma_map_sg_attrs(context->device, - &chunk->page_list[0], - chunk->nents, + /* preparing for next loop */ + sg_list_start = sg; + } + + umem->nmap = ib_dma_map_sg_attrs(context->device, + umem->sg_head.sgl, + umem->npages, DMA_BIDIRECTIONAL, &attrs); - if (chunk->nmap != chunk->nents) { - kfree(chunk); + if (umem->nmap != umem->npages) { ret = -ENOMEM; goto out; } - list_add_tail(&chunk->list, &umem->chunk_list); - } - out: if (ret < 0) { + if (need_release) __ib_umem_release(context->device, umem, 0); kfree(umem); } return ret < 0 ? ERR_PTR(ret) : umem; -#endif } -EXPORT_SYMBOL(ib_umem_get); +EXPORT_SYMBOL(ib_umem_get_ex); -#ifdef __linux__ -static void ib_umem_account(struct work_struct *work) +struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, + size_t size, int access, int dmasync) { - struct ib_umem *umem = container_of(work, struct ib_umem, work); - - down_write(&umem->mm->mmap_sem); - umem->mm->locked_vm -= umem->diff; - up_write(&umem->mm->mmap_sem); - mmput(umem->mm); - kfree(umem); + return ib_umem_get_ex(context, addr, + size, access, dmasync, 0); } -#endif +EXPORT_SYMBOL(ib_umem_get); /** * ib_umem_release - release memory pinned with ib_umem_get @@ -449,57 +390,28 @@ static void ib_umem_account(struct work_struct *work) */ void ib_umem_release(struct ib_umem *umem) { -#ifdef __linux__ - struct ib_ucontext *context = umem->context; - struct mm_struct *mm; - unsigned long diff; - - __ib_umem_release(umem->context->device, umem, 1); - mm = get_task_mm(current); - if (!mm) { - kfree(umem); - return; - } - - diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; - - /* - * We may be called with the mm's mmap_sem already held. This - * can happen when a userspace munmap() is the call that drops - * the last reference to our file and calls our release - * method. If there are memory regions to destroy, we'll end - * up here and not be able to take the mmap_sem. In that case - * we defer the vm_locked accounting to the system workqueue. - */ - if (context->closing) { - if (!down_write_trylock(&mm->mmap_sem)) { - INIT_WORK(&umem->work, ib_umem_account); - umem->mm = mm; - umem->diff = diff; - - schedule_work(&umem->work); - return; - } - } else - down_write(&mm->mmap_sem); - - current->mm->locked_vm -= diff; - up_write(&mm->mmap_sem); - mmput(mm); -#else vm_offset_t addr, end, last, start; vm_size_t size; int error; + if (umem->ib_peer_mem) { + peer_umem_release(umem); + return; + } + __ib_umem_release(umem->context->device, umem, 1); + if (umem->context->closing) { kfree(umem); return; } + error = priv_check(curthread, PRIV_VM_MUNLOCK); + if (error) return; + addr = umem->start; size = umem->length; last = addr + size; @@ -507,69 +419,24 @@ void ib_umem_release(struct ib_umem *umem) end = roundup2(last, PAGE_SIZE); /* Use PAGE_MASK safe operation. */ vm_map_unwire(&curthread->td_proc->p_vmspace->vm_map, start, end, VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); - -#endif kfree(umem); + } EXPORT_SYMBOL(ib_umem_release); int ib_umem_page_count(struct ib_umem *umem) { - struct ib_umem_chunk *chunk; int shift; int i; int n; + struct scatterlist *sg; shift = ilog2(umem->page_size); n = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) - for (i = 0; i < chunk->nmap; ++i) - n += sg_dma_len(&chunk->page_list[i]) >> shift; + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) + n += sg_dma_len(sg) >> shift; return n; } EXPORT_SYMBOL(ib_umem_page_count); - -/**********************************************/ -/* - * Stub functions for contiguous pages - - * We currently do not support this feature - */ -/**********************************************/ - -/** - * ib_cmem_release_contiguous_pages - release memory allocated by - * ib_cmem_alloc_contiguous_pages. - * @cmem: cmem struct to release - */ -void ib_cmem_release_contiguous_pages(struct ib_cmem *cmem) -{ -} -EXPORT_SYMBOL(ib_cmem_release_contiguous_pages); - -/** - * * ib_cmem_alloc_contiguous_pages - allocate contiguous pages - * * @context: userspace context to allocate memory for - * * @total_size: total required size for that allocation. - * * @page_size_order: order of one contiguous page. - * */ -struct ib_cmem *ib_cmem_alloc_contiguous_pages(struct ib_ucontext *context, - unsigned long total_size, - unsigned long page_size_order) -{ - return NULL; -} -EXPORT_SYMBOL(ib_cmem_alloc_contiguous_pages); - -/** - * * ib_cmem_map_contiguous_pages_to_vma - map contiguous pages into VMA - * * @ib_cmem: cmem structure returned by ib_cmem_alloc_contiguous_pages - * * @vma: VMA to inject pages into. - * */ -int ib_cmem_map_contiguous_pages_to_vma(struct ib_cmem *ib_cmem, - struct vm_area_struct *vma) -{ - return 0; -} -EXPORT_SYMBOL(ib_cmem_map_contiguous_pages_to_vma); diff --git a/sys/ofed/drivers/infiniband/core/user_mad.c b/sys/ofed/drivers/infiniband/core/user_mad.c index 161c65f..cc4a659 100644 --- a/sys/ofed/drivers/infiniband/core/user_mad.c +++ b/sys/ofed/drivers/infiniband/core/user_mad.c @@ -43,7 +43,9 @@ #include <linux/mutex.h> #include <linux/kref.h> #include <linux/compat.h> +#include <linux/sched.h> #include <linux/semaphore.h> +#include <linux/slab.h> #include <asm/uaccess.h> @@ -63,12 +65,9 @@ enum { }; /* - * Our lifetime rules for these structs are the following: each time a - * device special file is opened, we look up the corresponding struct - * ib_umad_port by minor in the umad_port[] table while holding the - * port_lock. If this lookup succeeds, we take a reference on the - * ib_umad_port's struct ib_umad_device while still holding the - * port_lock; if the lookup fails, we fail the open(). We drop these + * Our lifetime rules for these structs are the following: + * device special file is opened, we take a reference on the + * ib_umad_port's struct ib_umad_device. We drop these * references in the corresponding close(). * * In addition to references coming from open character devices, there @@ -76,12 +75,7 @@ enum { * module's reference taken when allocating the ib_umad_device in * ib_umad_add_one(). * - * When destroying an ib_umad_device, we clear all of its - * ib_umad_ports from umad_port[] while holding port_lock before - * dropping the module's reference to the ib_umad_device. This is - * always safe because any open() calls will either succeed and obtain - * a reference before we clear the umad_port[] entries, or fail after - * we clear the umad_port[] entries. + * When destroying an ib_umad_device, we drop the module's reference. */ struct ib_umad_port { @@ -99,6 +93,7 @@ struct ib_umad_port { struct ib_umad_device *umad_dev; int dev_num; u8 port_num; + struct list_head port_lst; }; struct ib_umad_device { @@ -135,18 +130,85 @@ static struct class *umad_class; static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE); static DEFINE_SPINLOCK(port_lock); -static struct ib_umad_port *umad_port[IB_UMAD_MAX_PORTS]; static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS); +static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS); static void ib_umad_add_one(struct ib_device *device); static void ib_umad_remove_one(struct ib_device *device); -static void ib_umad_release_dev(struct kref *ref) +static DEFINE_SPINLOCK(ports_list_lock); +static struct list_head ports_list; + + +static void remove_ports(struct kref *ref) +{ + int i; + struct ib_umad_port *p, *p1; + struct ib_umad_device *dev = + container_of(ref, struct ib_umad_device, ref); + + for (i = 0; i <= dev->end_port - dev->start_port; ++i) { + struct ib_umad_port *port = &dev->port[i]; + + list_for_each_entry_safe(p, p1, &ports_list, port_lst) + if (p == port) { + list_del(&p->port_lst); + break; + } + } +} + +static void put_umad_dev(struct kref *ref) { + int ret, i; struct ib_umad_device *dev = container_of(ref, struct ib_umad_device, ref); + spin_lock(&ports_list_lock); + ret = (kref_put(ref, remove_ports)); + spin_unlock(&ports_list_lock); + if (ret) { + for (i = 0; i <= dev->end_port - dev->start_port; ++i) { + if (dev->port[i].dev_num < IB_UMAD_MAX_PORTS) + clear_bit(dev->port[i].dev_num, dev_map); + else + clear_bit(dev->port[i].dev_num - IB_UMAD_MAX_PORTS, overflow_map); + cdev_del(dev->port[i].cdev); + cdev_del(dev->port[i].sm_cdev); + } kfree(dev); + } +} + +static void release_port(struct ib_umad_port *port) +{ + put_umad_dev(&port->umad_dev->ref); +} + + +static struct ib_umad_port *get_port(struct cdev *cdev) +{ + struct ib_umad_port *port; + + spin_lock(&ports_list_lock); + list_for_each_entry(port, &ports_list, port_lst) { + if (port->cdev == cdev || port->sm_cdev == cdev) { + kref_get(&port->umad_dev->ref); + spin_unlock(&ports_list_lock); + + return port; + } + } + spin_unlock(&ports_list_lock); + + return NULL; +} + +static void insert_port(struct ib_umad_port *port) +{ + spin_lock(&ports_list_lock); + list_add(&port->port_lst, &ports_list); + spin_unlock(&ports_list_lock); } static int hdr_size(struct ib_umad_file *file) @@ -466,8 +528,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, goto err; } - if (packet->mad.hdr.id < 0 || - packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) { + if (packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) { ret = -EINVAL; goto err; } @@ -679,7 +740,7 @@ found: file->already_used = 1; if (!file->use_pkey_index) { printk(KERN_WARNING "user_mad: process %s did not enable " - "P_Key index support.\n", curproc->p_comm); + "P_Key index support.\n", curthread->td_proc->p_comm); printk(KERN_WARNING "user_mad: Documentation/infiniband/user_mad.txt " "has info on the new ABI.\n"); } @@ -711,7 +772,7 @@ static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg) mutex_lock(&file->port->file_mutex); mutex_lock(&file->mutex); - if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) { + if (id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) { ret = -EINVAL; goto out; } @@ -779,41 +840,33 @@ static long ib_umad_compat_ioctl(struct file *filp, unsigned int cmd, /* * ib_umad_open() does not need the BKL: * - * - umad_port[] accesses are protected by port_lock, the - * ib_umad_port structures are properly reference counted, and + * - the ib_umad_port structures are properly reference counted, and * everything else is purely local to the file being created, so * races against other open calls are not a problem; * - the ioctl method does not affect any global state outside of the * file structure being operated on; - * - the port is added to umad_port[] as the last part of module - * initialization so the open method will either immediately run - * -ENXIO, or all required initialization will be done. */ static int ib_umad_open(struct inode *inode, struct file *filp) { struct ib_umad_port *port; struct ib_umad_file *file; - int ret = 0; - - spin_lock(&port_lock); - port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE]; - if (port) - kref_get(&port->umad_dev->ref); - spin_unlock(&port_lock); + int ret; + port = get_port(inode->i_cdev->si_drv1); if (!port) return -ENXIO; mutex_lock(&port->file_mutex); if (!port->ib_dev) { + release_port(port); ret = -ENXIO; goto out; } file = kzalloc(sizeof *file, GFP_KERNEL); if (!file) { - kref_put(&port->umad_dev->ref, ib_umad_release_dev); + release_port(port); ret = -ENOMEM; goto out; } @@ -830,6 +883,8 @@ static int ib_umad_open(struct inode *inode, struct file *filp) list_add_tail(&file->port_list, &port->file_list); + ret = nonseekable_open(inode, filp); + out: mutex_unlock(&port->file_mutex); return ret; @@ -838,7 +893,7 @@ out: static int ib_umad_close(struct inode *inode, struct file *filp) { struct ib_umad_file *file = filp->private_data; - struct ib_umad_device *dev = file->port->umad_dev; + struct ib_umad_port *port = file->port; struct ib_umad_packet *packet, *tmp; int already_dead; int i; @@ -867,7 +922,7 @@ static int ib_umad_close(struct inode *inode, struct file *filp) mutex_unlock(&file->port->file_mutex); kfree(file); - kref_put(&dev->ref, ib_umad_release_dev); + release_port(port); return 0; } @@ -882,7 +937,8 @@ static const struct file_operations umad_fops = { .compat_ioctl = ib_umad_compat_ioctl, #endif .open = ib_umad_open, - .release = ib_umad_close + .release = ib_umad_close, + .llseek = no_llseek, }; static int ib_umad_sm_open(struct inode *inode, struct file *filp) @@ -893,12 +949,7 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp) }; int ret; - spin_lock(&port_lock); - port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE - IB_UMAD_MAX_PORTS]; - if (port) - kref_get(&port->umad_dev->ref); - spin_unlock(&port_lock); - + port = get_port(inode->i_cdev->si_drv1); if (!port) return -ENXIO; @@ -922,10 +973,10 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp) filp->private_data = port; - return 0; + return nonseekable_open(inode, filp); fail: - kref_put(&port->umad_dev->ref, ib_umad_release_dev); + release_port(port); return ret; } @@ -944,7 +995,7 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp) up(&port->sm_sem); - kref_put(&port->umad_dev->ref, ib_umad_release_dev); + release_port(port); return ret; } @@ -952,7 +1003,8 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp) static const struct file_operations umad_sm_fops = { .owner = THIS_MODULE, .open = ib_umad_sm_open, - .release = ib_umad_sm_close + .release = ib_umad_sm_close, + .llseek = no_llseek, }; static struct ib_client umad_client = { @@ -991,31 +1043,66 @@ static ssize_t show_abi_version(struct class *class, struct class_attribute *att } static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); +static dev_t overflow_maj; +static int find_overflow_devnum(void) +{ + int ret; + + if (!overflow_maj) { + ret = alloc_chrdev_region(&overflow_maj, 0, IB_UMAD_MAX_PORTS * 2, + "infiniband_mad"); + if (ret) { + printk(KERN_ERR "user_mad: couldn't register dynamic device number\n"); + return ret; + } + } + + ret = find_first_zero_bit(overflow_map, IB_UMAD_MAX_PORTS); + if (ret >= IB_UMAD_MAX_PORTS) + return -1; + + return ret; +} + static int ib_umad_init_port(struct ib_device *device, int port_num, struct ib_umad_port *port) { + int devnum; + dev_t base; + spin_lock(&port_lock); - port->dev_num = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); - if (port->dev_num >= IB_UMAD_MAX_PORTS) { + devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); + if (devnum >= IB_UMAD_MAX_PORTS) { spin_unlock(&port_lock); + devnum = find_overflow_devnum(); + if (devnum < 0) return -1; + + spin_lock(&port_lock); + port->dev_num = devnum + IB_UMAD_MAX_PORTS; + base = devnum + overflow_maj; + set_bit(devnum, overflow_map); + } else { + port->dev_num = devnum; + base = devnum + base_dev; + set_bit(devnum, dev_map); } - set_bit(port->dev_num, dev_map); spin_unlock(&port_lock); port->ib_dev = device; port->port_num = port_num; - init_MUTEX(&port->sm_sem); + sema_init(&port->sm_sem, 1); mutex_init(&port->file_mutex); INIT_LIST_HEAD(&port->file_list); port->cdev = cdev_alloc(); if (!port->cdev) - return -1; - port->cdev->owner = THIS_MODULE; + goto err_cdev_c; + port->cdev->ops = &umad_fops; + port->cdev->owner = THIS_MODULE; kobject_set_name(&port->cdev->kobj, "umad%d", port->dev_num); - if (cdev_add(port->cdev, base_dev + port->dev_num, 1)) + if (cdev_add(port->cdev, base, 1)) goto err_cdev; port->dev = device_create(umad_class, device->dma_device, @@ -1029,13 +1116,15 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, if (device_create_file(port->dev, &dev_attr_port)) goto err_dev; + base += IB_UMAD_MAX_PORTS; port->sm_cdev = cdev_alloc(); if (!port->sm_cdev) goto err_dev; - port->sm_cdev->owner = THIS_MODULE; + port->sm_cdev->ops = &umad_sm_fops; + port->sm_cdev->owner = THIS_MODULE; kobject_set_name(&port->sm_cdev->kobj, "issm%d", port->dev_num); - if (cdev_add(port->sm_cdev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1)) + if (cdev_add(port->sm_cdev, base, 1)) goto err_sm_cdev; port->sm_dev = device_create(umad_class, device->dma_device, @@ -1049,10 +1138,6 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, if (device_create_file(port->sm_dev, &dev_attr_port)) goto err_sm_dev; - spin_lock(&port_lock); - umad_port[port->dev_num] = port; - spin_unlock(&port_lock); - return 0; err_sm_dev: @@ -1066,7 +1151,11 @@ err_dev: err_cdev: cdev_del(port->cdev); - clear_bit(port->dev_num, dev_map); +err_cdev_c: + if (port->dev_num < IB_UMAD_MAX_PORTS) + clear_bit(devnum, dev_map); + else + clear_bit(devnum, overflow_map); return -1; } @@ -1074,7 +1163,6 @@ err_cdev: static void ib_umad_kill_port(struct ib_umad_port *port) { struct ib_umad_file *file; - int already_dead; int id; dev_set_drvdata(port->dev, NULL); @@ -1083,20 +1171,12 @@ static void ib_umad_kill_port(struct ib_umad_port *port) device_destroy(umad_class, port->cdev->dev); device_destroy(umad_class, port->sm_cdev->dev); - cdev_del(port->cdev); - cdev_del(port->sm_cdev); - - spin_lock(&port_lock); - umad_port[port->dev_num] = NULL; - spin_unlock(&port_lock); - mutex_lock(&port->file_mutex); port->ib_dev = NULL; list_for_each_entry(file, &port->file_list, port_list) { mutex_lock(&file->mutex); - already_dead = file->agents_dead; file->agents_dead = 1; mutex_unlock(&file->mutex); @@ -1106,8 +1186,6 @@ static void ib_umad_kill_port(struct ib_umad_port *port) } mutex_unlock(&port->file_mutex); - - clear_bit(port->dev_num, dev_map); } static void ib_umad_add_one(struct ib_device *device) @@ -1136,10 +1214,12 @@ static void ib_umad_add_one(struct ib_device *device) umad_dev->start_port = s; umad_dev->end_port = e; + for (i = 0; i <= e - s; ++i) + insert_port(&umad_dev->port[i]); + for (i = s; i <= e; ++i) { umad_dev->port[i - s].umad_dev = umad_dev; - if (rdma_port_get_link_layer(device, i) == IB_LINK_LAYER_INFINIBAND) if (ib_umad_init_port(device, i, &umad_dev->port[i - s])) goto err; } @@ -1150,10 +1230,9 @@ static void ib_umad_add_one(struct ib_device *device) err: while (--i >= s) - if (rdma_port_get_link_layer(device, i) == IB_LINK_LAYER_INFINIBAND) ib_umad_kill_port(&umad_dev->port[i - s]); - kref_put(&umad_dev->ref, ib_umad_release_dev); + put_umad_dev(&umad_dev->ref); } static void ib_umad_remove_one(struct ib_device *device) @@ -1165,16 +1244,22 @@ static void ib_umad_remove_one(struct ib_device *device) return; for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i) - if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) ib_umad_kill_port(&umad_dev->port[i]); - kref_put(&umad_dev->ref, ib_umad_release_dev); + put_umad_dev(&umad_dev->ref); +} + +static char *umad_devnode(struct device *dev, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); } static int __init ib_umad_init(void) { int ret; + INIT_LIST_HEAD(&ports_list); + ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2, "infiniband_mad"); if (ret) { @@ -1189,6 +1274,8 @@ static int __init ib_umad_init(void) goto out_chrdev; } + umad_class->devnode = umad_devnode; + ret = class_create_file(umad_class, &class_attr_abi_version); if (ret) { printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n"); @@ -1218,6 +1305,8 @@ static void __exit ib_umad_cleanup(void) ib_unregister_client(&umad_client); class_destroy(umad_class); unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); + if (overflow_maj) + unregister_chrdev_region(overflow_maj, IB_UMAD_MAX_PORTS * 2); } module_init(ib_umad_init); diff --git a/sys/ofed/drivers/infiniband/core/uverbs.h b/sys/ofed/drivers/infiniband/core/uverbs.h index fa64da5..8ca6498 100644 --- a/sys/ofed/drivers/infiniband/core/uverbs.h +++ b/sys/ofed/drivers/infiniband/core/uverbs.h @@ -41,10 +41,14 @@ #include <linux/idr.h> #include <linux/mutex.h> #include <linux/completion.h> +#include <linux/cdev.h> +#include <linux/rbtree.h> #include <rdma/ib_verbs.h> +#include <rdma/ib_verbs_exp.h> #include <rdma/ib_umem.h> #include <rdma/ib_user_verbs.h> +#include <rdma/ib_user_verbs_exp.h> /* * Our lifetime rules for these structs are the following: @@ -69,24 +73,26 @@ struct ib_uverbs_device { struct kref ref; + int num_comp_vectors; struct completion comp; - int devnum; - struct cdev *cdev; struct device *dev; struct ib_device *ib_dev; - int num_comp_vectors; + int devnum; + struct cdev cdev; + struct rb_root xrcd_tree; + struct mutex xrcd_tree_mutex; }; struct ib_uverbs_event_file { struct kref ref; struct file *filp; + int is_async; struct ib_uverbs_file *uverbs_file; spinlock_t lock; + int is_closed; wait_queue_head_t poll_wait; struct fasync_struct *async_queue; struct list_head event_list; - int is_async; - int is_closed; }; struct ib_uverbs_file { @@ -120,9 +126,20 @@ struct ib_uevent_object { u32 events_reported; }; +struct ib_uxrcd_object { + struct ib_uobject uobject; + atomic_t refcnt; +}; + +struct ib_usrq_object { + struct ib_uevent_object uevent; + struct ib_uxrcd_object *uxrcd; +}; + struct ib_uqp_object { struct ib_uevent_object uevent; struct list_head mcast_list; + struct ib_uxrcd_object *uxrcd; }; struct ib_ucq_object { @@ -134,9 +151,8 @@ struct ib_ucq_object { u32 async_events_reported; }; -struct ib_uxrcd_object { +struct ib_udct_object { struct ib_uobject uobject; - struct list_head xrc_reg_qp_list; }; extern spinlock_t ib_uverbs_idr_lock; @@ -147,12 +163,14 @@ extern struct idr ib_uverbs_ah_idr; extern struct idr ib_uverbs_cq_idr; extern struct idr ib_uverbs_qp_idr; extern struct idr ib_uverbs_srq_idr; -extern struct idr ib_uverbs_xrc_domain_idr; +extern struct idr ib_uverbs_xrcd_idr; +extern struct idr ib_uverbs_rule_idr; +extern struct idr ib_uverbs_dct_idr; void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, - int is_async, int *fd); + int is_async); struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd); void ib_uverbs_release_ucq(struct ib_uverbs_file *file, @@ -167,12 +185,24 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event); -void ib_uverbs_xrc_rcv_qp_event_handler(struct ib_event *event, - void *context_ptr); -void ib_uverbs_dealloc_xrcd(struct ib_device *ib_dev, - struct ib_xrcd *xrcd); -int ib_uverbs_cleanup_xrc_rcv_qp(struct ib_uverbs_file *file, - struct ib_xrcd *xrcd, u32 qp_num); +void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd); + +struct ib_uverbs_flow_spec { + union { + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; + struct ib_uverbs_flow_spec_eth eth; + struct ib_uverbs_flow_spec_ib ib; + struct ib_uverbs_flow_spec_ipv4 ipv4; + struct ib_uverbs_flow_spec_tcp_udp tcp_udp; + }; +}; #define IB_UVERBS_DECLARE_CMD(name) \ ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ @@ -186,6 +216,8 @@ IB_UVERBS_DECLARE_CMD(alloc_pd); IB_UVERBS_DECLARE_CMD(dealloc_pd); IB_UVERBS_DECLARE_CMD(reg_mr); IB_UVERBS_DECLARE_CMD(dereg_mr); +IB_UVERBS_DECLARE_CMD(alloc_mw); +IB_UVERBS_DECLARE_CMD(dealloc_mw); IB_UVERBS_DECLARE_CMD(create_comp_channel); IB_UVERBS_DECLARE_CMD(create_cq); IB_UVERBS_DECLARE_CMD(resize_cq); @@ -193,6 +225,7 @@ IB_UVERBS_DECLARE_CMD(poll_cq); IB_UVERBS_DECLARE_CMD(req_notify_cq); IB_UVERBS_DECLARE_CMD(destroy_cq); IB_UVERBS_DECLARE_CMD(create_qp); +IB_UVERBS_DECLARE_CMD(open_qp); IB_UVERBS_DECLARE_CMD(query_qp); IB_UVERBS_DECLARE_CMD(modify_qp); IB_UVERBS_DECLARE_CMD(destroy_qp); @@ -207,14 +240,30 @@ IB_UVERBS_DECLARE_CMD(create_srq); IB_UVERBS_DECLARE_CMD(modify_srq); IB_UVERBS_DECLARE_CMD(query_srq); IB_UVERBS_DECLARE_CMD(destroy_srq); -IB_UVERBS_DECLARE_CMD(create_xrc_srq); -IB_UVERBS_DECLARE_CMD(open_xrc_domain); -IB_UVERBS_DECLARE_CMD(close_xrc_domain); -IB_UVERBS_DECLARE_CMD(create_xrc_rcv_qp); -IB_UVERBS_DECLARE_CMD(modify_xrc_rcv_qp); -IB_UVERBS_DECLARE_CMD(query_xrc_rcv_qp); -IB_UVERBS_DECLARE_CMD(reg_xrc_rcv_qp); -IB_UVERBS_DECLARE_CMD(unreg_xrc_rcv_qp); +IB_UVERBS_DECLARE_CMD(create_xsrq); +IB_UVERBS_DECLARE_CMD(open_xrcd); +IB_UVERBS_DECLARE_CMD(close_xrcd); + +#define IB_UVERBS_DECLARE_EX_CMD(name) \ + int ib_uverbs_ex_##name(struct ib_uverbs_file *file,\ + struct ib_udata *ucore, \ + struct ib_udata *uhw) + +#define IB_UVERBS_DECLARE_EXP_CMD(name) \ + ssize_t ib_uverbs_exp_##name(struct ib_uverbs_file *file, \ + struct ib_udata *ucore, \ + struct ib_udata *uhw) + +IB_UVERBS_DECLARE_EX_CMD(create_flow); +IB_UVERBS_DECLARE_EX_CMD(destroy_flow); +IB_UVERBS_DECLARE_EXP_CMD(create_qp); +IB_UVERBS_DECLARE_EXP_CMD(modify_cq); +IB_UVERBS_DECLARE_EXP_CMD(modify_qp); +IB_UVERBS_DECLARE_EXP_CMD(create_cq); +IB_UVERBS_DECLARE_EXP_CMD(query_device); +IB_UVERBS_DECLARE_EXP_CMD(create_dct); +IB_UVERBS_DECLARE_EXP_CMD(destroy_dct); +IB_UVERBS_DECLARE_EXP_CMD(query_dct); #endif /* UVERBS_H */ diff --git a/sys/ofed/drivers/infiniband/core/uverbs_cmd.c b/sys/ofed/drivers/infiniband/core/uverbs_cmd.c index a34b344..5eef3f7 100644 --- a/sys/ofed/drivers/infiniband/core/uverbs_cmd.c +++ b/sys/ofed/drivers/infiniband/core/uverbs_cmd.c @@ -35,28 +35,68 @@ #include <linux/file.h> #include <linux/fs.h> +#include <linux/slab.h> +#include <linux/moduleparam.h> +#include <linux/rbtree.h> #include <linux/lockdep.h> +#include <rdma/ib_addr.h> #include <asm/uaccess.h> #include <asm/fcntl.h> +#include <sys/priv.h> #include "uverbs.h" -static struct lock_class_key pd_lock_key; -static struct lock_class_key mr_lock_key; -static struct lock_class_key cq_lock_key; -static struct lock_class_key qp_lock_key; -static struct lock_class_key ah_lock_key; -static struct lock_class_key srq_lock_key; +static int disable_raw_qp_enforcement; +module_param_named(disable_raw_qp_enforcement, disable_raw_qp_enforcement, int, + 0444); +MODULE_PARM_DESC(disable_raw_qp_enforcement, "Disable RAW QP enforcement for " + "being opened by root (default: 0)"); + +struct uverbs_lock_class { + struct lock_class_key key; + char name[16]; +}; + +static struct uverbs_lock_class pd_lock_class = { .name = "PD-uobj" }; +static struct uverbs_lock_class mr_lock_class = { .name = "MR-uobj" }; +static struct uverbs_lock_class mw_lock_class = { .name = "MW-uobj" }; +static struct uverbs_lock_class cq_lock_class = { .name = "CQ-uobj" }; +static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" }; +static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" }; +static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" }; +static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" }; +static struct uverbs_lock_class dct_lock_class = { .name = "DCT-uobj" }; + +static int uverbs_copy_from_udata(void *dest, struct ib_udata *udata, size_t len) +{ + return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0; +} + +static int uverbs_copy_to_udata(struct ib_udata *udata, void *src, size_t len) +{ + return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0; +} + +static struct ib_udata_ops uverbs_copy = { + .copy_from = uverbs_copy_from_udata, + .copy_to = uverbs_copy_to_udata +}; #define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \ do { \ + (udata)->ops = &uverbs_copy; \ (udata)->inbuf = (void __user *) (ibuf); \ (udata)->outbuf = (void __user *) (obuf); \ (udata)->inlen = (ilen); \ (udata)->outlen = (olen); \ } while (0) +enum uverbs_cmd_type { + IB_USER_VERBS_CMD_BASIC, + IB_USER_VERBS_CMD_EXTENDED +}; + /* * The ib_uobject locking scheme is as follows: * @@ -83,13 +123,13 @@ static struct lock_class_key srq_lock_key; */ static void init_uobj(struct ib_uobject *uobj, u64 user_handle, - struct ib_ucontext *context, struct lock_class_key *key) + struct ib_ucontext *context, struct uverbs_lock_class *c) { uobj->user_handle = user_handle; uobj->context = context; kref_init(&uobj->ref); init_rwsem(&uobj->mutex); - lockdep_set_class(&uobj->mutex, key); + lockdep_set_class_and_name(&uobj->mutex, &c->key, c->name); uobj->live = 0; } @@ -241,11 +281,34 @@ static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context) return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0); } +static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context) +{ + struct ib_uobject *uobj; + + uobj = idr_write_uobj(&ib_uverbs_qp_idr, qp_handle, context); + return uobj ? uobj->object : NULL; +} + static void put_qp_read(struct ib_qp *qp) { put_uobj_read(qp->uobject); } +static void put_qp_write(struct ib_qp *qp) +{ + put_uobj_write(qp->uobject); +} + +static struct ib_dct *idr_read_dct(int dct_handle, struct ib_ucontext *context) +{ + return idr_read_obj(&ib_uverbs_dct_idr, dct_handle, context, 0); +} + +static void put_dct_read(struct ib_dct *dct) +{ + put_uobj_read(dct->uobject); +} + static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context) { return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0); @@ -256,12 +319,10 @@ static void put_srq_read(struct ib_srq *srq) put_uobj_read(srq->uobject); } -static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, - struct ib_ucontext *context, +static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context, struct ib_uobject **uobj) { - *uobj = idr_read_uobj(&ib_uverbs_xrc_domain_idr, xrcd_handle, - context, 0); + *uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0); return *uobj ? (*uobj)->object : NULL; } @@ -301,7 +362,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, ucontext = ibdev->alloc_ucontext(ibdev, &udata); if (IS_ERR(ucontext)) { - ret = PTR_ERR(file->ucontext); + ret = PTR_ERR(ucontext); goto err; } @@ -314,20 +375,23 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, INIT_LIST_HEAD(&ucontext->srq_list); INIT_LIST_HEAD(&ucontext->ah_list); INIT_LIST_HEAD(&ucontext->xrcd_list); + INIT_LIST_HEAD(&ucontext->rule_list); + INIT_LIST_HEAD(&ucontext->dct_list); ucontext->closing = 0; + ucontext->peer_mem_private_data = NULL; + ucontext->peer_mem_name = NULL; resp.num_comp_vectors = file->device->num_comp_vectors; - filp = ib_uverbs_alloc_event_file(file, 1, &resp.async_fd); - if (IS_ERR(filp)) { - ret = PTR_ERR(filp); + ret = get_unused_fd(); + if (ret < 0) goto err_free; - } + resp.async_fd = ret; - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { - ret = -EFAULT; - goto err_file; + filp = ib_uverbs_alloc_event_file(file, 1); + if (IS_ERR(filp)) { + ret = PTR_ERR(filp); + goto err_fd; } file->async_file = filp->private_data; @@ -338,6 +402,11 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, if (ret) goto err_file; + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_file; + } kref_get(&file->async_file->ref); kref_get(&file->ref); file->ucontext = ucontext; @@ -349,9 +418,11 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, return in_len; err_file: - put_unused_fd(resp.async_fd); fput(filp); +err_fd: + put_unused_fd(resp.async_fd); + err_free: ibdev->dealloc_ucontext(ucontext); @@ -360,6 +431,55 @@ err: return ret; } +static void ib_uverbs_query_device_assign( + struct ib_uverbs_query_device_resp *resp, + struct ib_device_attr *attr, + struct ib_uverbs_file *file) +{ + memset(resp, 0, sizeof(*resp)); + + resp->fw_ver = attr->fw_ver; + resp->node_guid = file->device->ib_dev->node_guid; + resp->sys_image_guid = attr->sys_image_guid; + resp->max_mr_size = attr->max_mr_size; + resp->page_size_cap = attr->page_size_cap; + resp->vendor_id = attr->vendor_id; + resp->vendor_part_id = attr->vendor_part_id; + resp->hw_ver = attr->hw_ver; + resp->max_qp = attr->max_qp; + resp->max_qp_wr = attr->max_qp_wr; + resp->device_cap_flags = attr->device_cap_flags; + resp->max_sge = attr->max_sge; + resp->max_sge_rd = attr->max_sge_rd; + resp->max_cq = attr->max_cq; + resp->max_cqe = attr->max_cqe; + resp->max_mr = attr->max_mr; + resp->max_pd = attr->max_pd; + resp->max_qp_rd_atom = attr->max_qp_rd_atom; + resp->max_ee_rd_atom = attr->max_ee_rd_atom; + resp->max_res_rd_atom = attr->max_res_rd_atom; + resp->max_qp_init_rd_atom = attr->max_qp_init_rd_atom; + resp->max_ee_init_rd_atom = attr->max_ee_init_rd_atom; + resp->atomic_cap = attr->atomic_cap; + resp->max_ee = attr->max_ee; + resp->max_rdd = attr->max_rdd; + resp->max_mw = attr->max_mw; + resp->max_raw_ipv6_qp = attr->max_raw_ipv6_qp; + resp->max_raw_ethy_qp = attr->max_raw_ethy_qp; + resp->max_mcast_grp = attr->max_mcast_grp; + resp->max_mcast_qp_attach = attr->max_mcast_qp_attach; + resp->max_total_mcast_qp_attach = attr->max_total_mcast_qp_attach; + resp->max_ah = attr->max_ah; + resp->max_fmr = attr->max_fmr; + resp->max_map_per_fmr = attr->max_map_per_fmr; + resp->max_srq = attr->max_srq; + resp->max_srq_wr = attr->max_srq_wr; + resp->max_srq_sge = attr->max_srq_sge; + resp->max_pkeys = attr->max_pkeys; + resp->local_ca_ack_delay = attr->local_ca_ack_delay; + resp->phys_port_cnt = file->device->ib_dev->phys_port_cnt; +} + ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -379,51 +499,10 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, if (ret) return ret; - memset(&resp, 0, sizeof resp); - - resp.fw_ver = attr.fw_ver; - resp.node_guid = file->device->ib_dev->node_guid; - resp.sys_image_guid = attr.sys_image_guid; - resp.max_mr_size = attr.max_mr_size; - resp.page_size_cap = attr.page_size_cap; - resp.vendor_id = attr.vendor_id; - resp.vendor_part_id = attr.vendor_part_id; - resp.hw_ver = attr.hw_ver; - resp.max_qp = attr.max_qp; - resp.max_qp_wr = attr.max_qp_wr; - resp.device_cap_flags = attr.device_cap_flags; - resp.max_sge = attr.max_sge; - resp.max_sge_rd = attr.max_sge_rd; - resp.max_cq = attr.max_cq; - resp.max_cqe = attr.max_cqe; - resp.max_mr = attr.max_mr; - resp.max_pd = attr.max_pd; - resp.max_qp_rd_atom = attr.max_qp_rd_atom; - resp.max_ee_rd_atom = attr.max_ee_rd_atom; - resp.max_res_rd_atom = attr.max_res_rd_atom; - resp.max_qp_init_rd_atom = attr.max_qp_init_rd_atom; - resp.max_ee_init_rd_atom = attr.max_ee_init_rd_atom; - resp.atomic_cap = attr.atomic_cap; - resp.max_ee = attr.max_ee; - resp.max_rdd = attr.max_rdd; - resp.max_mw = attr.max_mw; - resp.max_raw_ipv6_qp = attr.max_raw_ipv6_qp; - resp.max_raw_ethy_qp = attr.max_raw_ethy_qp; - resp.max_mcast_grp = attr.max_mcast_grp; - resp.max_mcast_qp_attach = attr.max_mcast_qp_attach; - resp.max_total_mcast_qp_attach = attr.max_total_mcast_qp_attach; - resp.max_ah = attr.max_ah; - resp.max_fmr = attr.max_fmr; - resp.max_map_per_fmr = attr.max_map_per_fmr; - resp.max_srq = attr.max_srq; - resp.max_srq_wr = attr.max_srq_wr; - resp.max_srq_sge = attr.max_srq_sge; - resp.max_pkeys = attr.max_pkeys; - resp.local_ca_ack_delay = attr.local_ca_ack_delay; - resp.phys_port_cnt = file->device->ib_dev->phys_port_cnt; + ib_uverbs_query_device_assign(&resp, &attr, file); - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) + if (copy_to_user((void __user *)(unsigned long) cmd.response, + &resp, sizeof(resp))) return -EFAULT; return in_len; @@ -469,7 +548,8 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, resp.active_width = attr.active_width; resp.active_speed = attr.active_speed; resp.phys_state = attr.phys_state; - resp.link_layer = attr.link_layer; + resp.link_layer = rdma_port_get_link_layer(file->device->ib_dev, + cmd.port_num); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) @@ -503,7 +583,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, if (!uobj) return -ENOMEM; - init_uobj(uobj, 0, file->ucontext, &pd_lock_key); + init_uobj(uobj, 0, file->ucontext, &pd_lock_class); down_write(&uobj->mutex); pd = file->device->ib_dev->alloc_pd(file->device->ib_dev, @@ -587,17 +667,316 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, return in_len; } -ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, +struct xrcd_table_entry { + struct rb_node node; + struct ib_xrcd *xrcd; + struct inode *inode; +}; + +static int xrcd_table_insert(struct ib_uverbs_device *dev, + struct inode *inode, + struct ib_xrcd *xrcd) +{ + struct xrcd_table_entry *entry, *scan; + struct rb_node **p = &dev->xrcd_tree.rb_node; + struct rb_node *parent = NULL; + + entry = kmalloc(sizeof *entry, GFP_KERNEL); + if (!entry) + return -ENOMEM; + + entry->xrcd = xrcd; + entry->inode = inode; + + while (*p) { + parent = *p; + scan = rb_entry(parent, struct xrcd_table_entry, node); + + if (inode < scan->inode) { + p = &(*p)->rb_left; + } else if (inode > scan->inode) { + p = &(*p)->rb_right; + } else { + kfree(entry); + return -EEXIST; + } + } + + rb_link_node(&entry->node, parent, p); + rb_insert_color(&entry->node, &dev->xrcd_tree); + igrab(inode); + return 0; +} + +static struct xrcd_table_entry *xrcd_table_search(struct ib_uverbs_device *dev, + struct inode *inode) +{ + struct xrcd_table_entry *entry; + struct rb_node *p = dev->xrcd_tree.rb_node; + + while (p) { + entry = rb_entry(p, struct xrcd_table_entry, node); + + if (inode < entry->inode) + p = p->rb_left; + else if (inode > entry->inode) + p = p->rb_right; + else + return entry; + } + + return NULL; +} + +static struct ib_xrcd *find_xrcd(struct ib_uverbs_device *dev, struct inode *inode) +{ + struct xrcd_table_entry *entry; + + entry = xrcd_table_search(dev, inode); + if (!entry) + return NULL; + + return entry->xrcd; +} + +static void xrcd_table_delete(struct ib_uverbs_device *dev, + struct inode *inode) +{ + struct xrcd_table_entry *entry; + + entry = xrcd_table_search(dev, inode); + if (entry) { + iput(inode); + rb_erase(&entry->node, &dev->xrcd_tree); + kfree(entry); + } +} + +ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { + struct ib_uverbs_open_xrcd cmd; + struct ib_uverbs_open_xrcd_resp resp; + struct ib_udata udata; + struct ib_uxrcd_object *obj; + struct ib_xrcd *xrcd = NULL; + struct fd f = {NULL}; + struct inode *inode = NULL; + int ret = 0; + int new_xrcd = 0; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + mutex_lock(&file->device->xrcd_tree_mutex); + + if (cmd.fd != -1) { + /* search for file descriptor */ + f = fdget(cmd.fd); + if (!f.file) { + ret = -EBADF; + goto err_tree_mutex_unlock; + } + + inode = f.file->f_dentry->d_inode; + xrcd = find_xrcd(file->device, inode); + if (!xrcd && !(cmd.oflags & O_CREAT)) { + /* no file descriptor. Need CREATE flag */ + ret = -EAGAIN; + goto err_tree_mutex_unlock; + } + + if (xrcd && cmd.oflags & O_EXCL) { + ret = -EINVAL; + goto err_tree_mutex_unlock; + } + } + + obj = kmalloc(sizeof *obj, GFP_KERNEL); + if (!obj) { + ret = -ENOMEM; + goto err_tree_mutex_unlock; + } + + init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_class); + + down_write(&obj->uobject.mutex); + + if (!xrcd) { + xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev, + file->ucontext, &udata); + if (IS_ERR(xrcd)) { + ret = PTR_ERR(xrcd); + goto err; + } + + xrcd->inode = inode; + xrcd->device = file->device->ib_dev; + atomic_set(&xrcd->usecnt, 0); + mutex_init(&xrcd->tgt_qp_mutex); + INIT_LIST_HEAD(&xrcd->tgt_qp_list); + new_xrcd = 1; + } + + atomic_set(&obj->refcnt, 0); + obj->uobject.object = xrcd; + ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject); + if (ret) + goto err_idr; + + memset(&resp, 0, sizeof resp); + resp.xrcd_handle = obj->uobject.id; + + if (inode) { + if (new_xrcd) { + /* create new inode/xrcd table entry */ + ret = xrcd_table_insert(file->device, inode, xrcd); + if (ret) + goto err_insert_xrcd; + } + atomic_inc(&xrcd->usecnt); + } + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_copy; + } + + if (f.file) + fdput(f); + + mutex_lock(&file->mutex); + list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list); + mutex_unlock(&file->mutex); + + obj->uobject.live = 1; + up_write(&obj->uobject.mutex); + + mutex_unlock(&file->device->xrcd_tree_mutex); + return in_len; + +err_copy: + if (inode) { + if (new_xrcd) + xrcd_table_delete(file->device, inode); + atomic_dec(&xrcd->usecnt); + } + +err_insert_xrcd: + idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject); + +err_idr: + ib_dealloc_xrcd(xrcd); + +err: + put_uobj_write(&obj->uobject); + +err_tree_mutex_unlock: + if (f.file) + fdput(f); + + mutex_unlock(&file->device->xrcd_tree_mutex); + + return ret; +} + +ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_close_xrcd cmd; + struct ib_uobject *uobj; + struct ib_xrcd *xrcd = NULL; + struct inode *inode = NULL; + struct ib_uxrcd_object *obj; + int live; + int ret = 0; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + mutex_lock(&file->device->xrcd_tree_mutex); + uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle, file->ucontext); + if (!uobj) { + ret = -EINVAL; + goto out; + } + + xrcd = uobj->object; + inode = xrcd->inode; + obj = container_of(uobj, struct ib_uxrcd_object, uobject); + if (atomic_read(&obj->refcnt)) { + put_uobj_write(uobj); + ret = -EBUSY; + goto out; + } + + if (!inode || atomic_dec_and_test(&xrcd->usecnt)) { + ret = ib_dealloc_xrcd(uobj->object); + if (!ret) + uobj->live = 0; + } + + live = uobj->live; + if (inode && ret) + atomic_inc(&xrcd->usecnt); + + put_uobj_write(uobj); + + if (ret) + goto out; + + if (inode && !live) + xrcd_table_delete(file->device, inode); + + idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj); + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + put_uobj(uobj); + ret = in_len; + +out: + mutex_unlock(&file->device->xrcd_tree_mutex); + return ret; +} + +void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, + struct ib_xrcd *xrcd) +{ + struct inode *inode; + + inode = xrcd->inode; + if (inode && !atomic_dec_and_test(&xrcd->usecnt)) + return; + + ib_dealloc_xrcd(xrcd); + + if (inode) + xrcd_table_delete(dev, inode); +} + +ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ struct ib_uverbs_reg_mr cmd; struct ib_uverbs_reg_mr_resp resp; - struct ib_udata udata; + struct ib_udata udata; struct ib_uobject *uobj; struct ib_pd *pd; struct ib_mr *mr; - int ret; + int ret; if (out_len < sizeof resp) return -ENOSPC; @@ -612,32 +991,34 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) return -EINVAL; - /* - * Local write permission is required if remote write or - * remote atomic permission is also requested. - */ - if (cmd.access_flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) && - !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE)) - return -EINVAL; + ret = ib_check_mr_access(cmd.access_flags); + if (ret) + return ret; uobj = kmalloc(sizeof *uobj, GFP_KERNEL); if (!uobj) return -ENOMEM; - init_uobj(uobj, 0, file->ucontext, &mr_lock_key); + init_uobj(uobj, 0, file->ucontext, &mr_lock_class); down_write(&uobj->mutex); pd = idr_read_pd(cmd.pd_handle, file->ucontext); if (!pd) { - ret = -EINVAL; + ret = -EINVAL; goto err_free; } + /* We first get a new "obj id" to be passed later to reg mr for + further use as mr_id. + */ + ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj); + if (ret) + goto err_put; mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va, - cmd.access_flags, &udata, 0); + cmd.access_flags, &udata, uobj->id); if (IS_ERR(mr)) { ret = PTR_ERR(mr); - goto err_put; + goto err_remove_uobj; } mr->device = pd->device; @@ -647,9 +1028,6 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, atomic_set(&mr->usecnt, 0); uobj->object = mr; - ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj); - if (ret) - goto err_unreg; memset(&resp, 0, sizeof resp); resp.lkey = mr->lkey; @@ -675,11 +1053,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, return in_len; err_copy: - idr_remove_uobj(&ib_uverbs_mr_idr, uobj); - -err_unreg: ib_dereg_mr(mr); +err_remove_uobj: + idr_remove_uobj(&ib_uverbs_mr_idr, uobj); + err_put: put_pd_read(pd); @@ -689,13 +1067,13 @@ err_free: } ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) + const char __user *buf, int in_len, + int out_len) { struct ib_uverbs_dereg_mr cmd; struct ib_mr *mr; struct ib_uobject *uobj; - int ret = -EINVAL; + int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; @@ -726,13 +1104,134 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, return in_len; } +ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_alloc_mw cmd; + struct ib_uverbs_alloc_mw_resp resp; + struct ib_uobject *uobj; + struct ib_pd *pd; + struct ib_mw *mw; + int ret; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); + if (!uobj) + return -ENOMEM; + + init_uobj(uobj, 0, file->ucontext, &mw_lock_class); + down_write(&uobj->mutex); + + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + if (!pd) { + ret = -EINVAL; + goto err_free; + } + + mw = pd->device->alloc_mw(pd, cmd.mw_type); + if (IS_ERR(mw)) { + ret = PTR_ERR(mw); + goto err_put; + } + + mw->device = pd->device; + mw->pd = pd; + mw->uobject = uobj; + atomic_inc(&pd->usecnt); + + uobj->object = mw; + ret = idr_add_uobj(&ib_uverbs_mw_idr, uobj); + if (ret) + goto err_unalloc; + + memset(&resp, 0, sizeof(resp)); + resp.rkey = mw->rkey; + resp.mw_handle = uobj->id; + + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) { + ret = -EFAULT; + goto err_copy; + } + + put_pd_read(pd); + + mutex_lock(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->mw_list); + mutex_unlock(&file->mutex); + + uobj->live = 1; + + up_write(&uobj->mutex); + + return in_len; + +err_copy: + idr_remove_uobj(&ib_uverbs_mw_idr, uobj); + +err_unalloc: + ib_dealloc_mw(mw); + +err_put: + put_pd_read(pd); + +err_free: + put_uobj_write(uobj); + return ret; +} + +ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_dealloc_mw cmd; + struct ib_mw *mw; + struct ib_uobject *uobj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + uobj = idr_write_uobj(&ib_uverbs_mw_idr, cmd.mw_handle, file->ucontext); + if (!uobj) + return -EINVAL; + + mw = uobj->object; + + ret = ib_dealloc_mw(mw); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + + if (ret) + return ret; + + idr_remove_uobj(&ib_uverbs_mw_idr, uobj); + + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + put_uobj(uobj); + + return in_len; +} + ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) + const char __user *buf, int in_len, + int out_len) { struct ib_uverbs_create_comp_channel cmd; struct ib_uverbs_create_comp_channel_resp resp; struct file *filp; + int ret; if (out_len < sizeof resp) return -ENOSPC; @@ -740,9 +1239,16 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - filp = ib_uverbs_alloc_event_file(file, 0, &resp.fd); - if (IS_ERR(filp)) + ret = get_unused_fd(); + if (ret < 0) + return ret; + resp.fd = ret; + + filp = ib_uverbs_alloc_event_file(file, 0); + if (IS_ERR(filp)) { + put_unused_fd(resp.fd); return PTR_ERR(filp); + } if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { @@ -755,40 +1261,44 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, return in_len; } -ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static ssize_t create_cq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len, void *vcmd, int ex, + void __user *response) { - struct ib_uverbs_create_cq cmd; + struct ib_uverbs_create_cq *cmd; + struct ib_uverbs_create_cq_ex *cmd_e; struct ib_uverbs_create_cq_resp resp; struct ib_udata udata; struct ib_ucq_object *obj; struct ib_uverbs_event_file *ev_file = NULL; struct ib_cq *cq; + struct ib_cq_init_attr attr; + int cmd_sz; int ret; if (out_len < sizeof resp) return -ENOSPC; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); + cmd = vcmd; + cmd_e = vcmd; + cmd_sz = ex ? sizeof(*cmd_e) : sizeof(*cmd); + INIT_UDATA(&udata, buf + cmd_sz, response + sizeof(resp), + in_len - sizeof(cmd), out_len - sizeof(resp)); - if (cmd.comp_vector >= file->device->num_comp_vectors) + if (cmd->comp_vector >= file->device->num_comp_vectors) return -EINVAL; obj = kmalloc(sizeof *obj, GFP_KERNEL); if (!obj) return -ENOMEM; - init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &cq_lock_key); + init_uobj(&obj->uobject, cmd->user_handle, file->ucontext, + &cq_lock_class); down_write(&obj->uobject.mutex); - if (cmd.comp_channel >= 0) { - ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel); + if (cmd->comp_channel >= 0) { + ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel); if (!ev_file) { ret = -EINVAL; goto err; @@ -801,8 +1311,12 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, INIT_LIST_HEAD(&obj->comp_list); INIT_LIST_HEAD(&obj->async_list); - cq = file->device->ib_dev->create_cq(file->device->ib_dev, cmd.cqe, - cmd.comp_vector, + memset(&attr, 0, sizeof(attr)); + attr.cqe = cmd->cqe; + attr.comp_vector = cmd->comp_vector; + if (ex && (cmd_e->comp_mask & IB_UVERBS_CREATE_CQ_EX_CAP_FLAGS)) + attr.flags = cmd_e->create_flags; + cq = file->device->ib_dev->create_cq(file->device->ib_dev, &attr, file->ucontext, &udata); if (IS_ERR(cq)) { ret = PTR_ERR(cq); @@ -825,8 +1339,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, resp.cq_handle = obj->uobject.id; resp.cqe = cq->cqe; - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { + if (copy_to_user(response, &resp, sizeof(resp))) { ret = -EFAULT; goto err_copy; } @@ -856,6 +1369,19 @@ err: return ret; } +ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_cq cmd; + + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + return create_cq(file, buf, in_len, out_len, &cmd, + IB_USER_VERBS_CMD_BASIC, (void __user *)cmd.response); +} + ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -893,68 +1419,81 @@ out: return ret ? ret : in_len; } +static int copy_wc_to_user(void __user *dest, struct ib_wc *wc) +{ + struct ib_uverbs_wc tmp; + + tmp.wr_id = wc->wr_id; + tmp.status = wc->status; + tmp.opcode = wc->opcode; + tmp.vendor_err = wc->vendor_err; + tmp.byte_len = wc->byte_len; + tmp.ex.imm_data = (__u32 __force) wc->ex.imm_data; + tmp.qp_num = wc->qp->qp_num; + tmp.src_qp = wc->src_qp; + tmp.wc_flags = wc->wc_flags; + tmp.pkey_index = wc->pkey_index; + tmp.slid = wc->slid; + tmp.sl = wc->sl; + tmp.dlid_path_bits = wc->dlid_path_bits; + tmp.port_num = wc->port_num; + tmp.reserved = 0; + + if (copy_to_user(dest, &tmp, sizeof tmp)) + return -EFAULT; + + return 0; +} + ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) + const char __user *buf, int in_len, + int out_len) { struct ib_uverbs_poll_cq cmd; - struct ib_uverbs_poll_cq_resp *resp; + struct ib_uverbs_poll_cq_resp resp; + u8 __user *header_ptr; + u8 __user *data_ptr; struct ib_cq *cq; - struct ib_wc *wc; - int ret = 0; - int i; - int rsize; + struct ib_wc wc; + int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - wc = kmalloc(cmd.ne * sizeof *wc, GFP_KERNEL); - if (!wc) - return -ENOMEM; + cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + if (!cq) + return -EINVAL; - rsize = sizeof *resp + cmd.ne * sizeof(struct ib_uverbs_wc); - resp = kmalloc(rsize, GFP_KERNEL); - if (!resp) { - ret = -ENOMEM; - goto out_wc; - } + /* we copy a struct ib_uverbs_poll_cq_resp to user space */ + header_ptr = (void __user *)(unsigned long) cmd.response; + data_ptr = header_ptr + sizeof resp; - cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); - if (!cq) { - ret = -EINVAL; - goto out; - } + memset(&resp, 0, sizeof resp); + while (resp.count < cmd.ne) { + ret = ib_poll_cq(cq, 1, &wc); + if (ret < 0) + goto out_put; + if (!ret) + break; - resp->count = ib_poll_cq(cq, cmd.ne, wc); + ret = copy_wc_to_user(data_ptr, &wc); + if (ret) + goto out_put; - put_cq_read(cq); + data_ptr += sizeof(struct ib_uverbs_wc); + ++resp.count; + } - for (i = 0; i < resp->count; i++) { - resp->wc[i].wr_id = wc[i].wr_id; - resp->wc[i].status = wc[i].status; - resp->wc[i].opcode = wc[i].opcode; - resp->wc[i].vendor_err = wc[i].vendor_err; - resp->wc[i].byte_len = wc[i].byte_len; - resp->wc[i].ex.imm_data = (__u32 __force) wc[i].ex.imm_data; - resp->wc[i].qp_num = wc[i].qp->qp_num; - resp->wc[i].src_qp = wc[i].src_qp; - resp->wc[i].wc_flags = wc[i].wc_flags; - resp->wc[i].pkey_index = wc[i].pkey_index; - resp->wc[i].slid = wc[i].slid; - resp->wc[i].sl = wc[i].sl; - resp->wc[i].dlid_path_bits = wc[i].dlid_path_bits; - resp->wc[i].port_num = wc[i].port_num; - } - - if (copy_to_user((void __user *) (unsigned long) cmd.response, resp, rsize)) + if (copy_to_user(header_ptr, &resp, sizeof resp)) { ret = -EFAULT; + goto out_put; + } -out: - kfree(resp); + ret = in_len; -out_wc: - kfree(wc); - return ret ? ret : in_len; +out_put: + put_cq_read(cq); + return ret; } ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, @@ -1035,124 +1574,181 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { - struct ib_uverbs_create_qp cmd; - struct ib_uverbs_create_qp_resp resp; + void __user *response; struct ib_udata udata; struct ib_uqp_object *obj; - struct ib_pd *pd; - struct ib_cq *scq, *rcq; - struct ib_srq *srq; + struct ib_device *device; + struct ib_pd *pd = NULL; + struct ib_xrcd *xrcd = NULL; + struct ib_uobject *uninitialized_var(xrcd_uobj); + struct ib_cq *scq = NULL, *rcq = NULL; + struct ib_srq *srq = NULL; struct ib_qp *qp; struct ib_qp_init_attr attr; - struct ib_xrcd *xrcd; - struct ib_uobject *xrcd_uobj; int ret; - - if (out_len < sizeof resp) + union { + struct ib_uverbs_create_qp basic; + } cmd_obj; + struct ib_uverbs_create_qp *cmd; + size_t cmd_size = 0; + union { + struct ib_uverbs_create_qp_resp basic; + } resp_obj; + struct ib_uverbs_create_qp_resp *resp; + size_t resp_size = 0; + + cmd_size = sizeof(cmd_obj.basic); + cmd = &cmd_obj.basic; + + resp_size = sizeof(resp_obj.basic); + resp = &resp_obj.basic; + + if (out_len < resp_size) return -ENOSPC; - if (copy_from_user(&cmd, buf, sizeof cmd)) + if (copy_from_user(&cmd_obj, buf, cmd_size)) return -EFAULT; - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); + response = (void __user *)cmd->response; - obj = kmalloc(sizeof *obj, GFP_KERNEL); + if (!disable_raw_qp_enforcement && + cmd->qp_type == IB_QPT_RAW_PACKET && !priv_check(curthread, PRIV_NET_RAW)) + return -EPERM; + + INIT_UDATA(&udata, buf + cmd_size, response + resp_size, + in_len - cmd_size, out_len - resp_size); + + obj = kzalloc(sizeof *obj, GFP_KERNEL); if (!obj) return -ENOMEM; - init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key); + init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &qp_lock_class); down_write(&obj->uevent.uobject.mutex); - srq = (cmd.is_srq && cmd.qp_type != IB_QPT_XRC) ? - idr_read_srq(cmd.srq_handle, file->ucontext) : NULL; - xrcd = cmd.qp_type == IB_QPT_XRC ? - idr_read_xrcd(cmd.srq_handle, file->ucontext, &xrcd_uobj) : NULL; - pd = idr_read_pd(cmd.pd_handle, file->ucontext); - scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0); - rcq = cmd.recv_cq_handle == cmd.send_cq_handle ? - scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1); + if (cmd->qp_type == IB_QPT_XRC_TGT) { + xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext, &xrcd_uobj); + if (!xrcd) { + ret = -EINVAL; + goto err_put; + } + device = xrcd->device; + } else { + if (cmd->qp_type == IB_QPT_XRC_INI) { + cmd->max_recv_wr = 0; + cmd->max_recv_sge = 0; + } else { + if (cmd->is_srq) { + srq = idr_read_srq(cmd->srq_handle, file->ucontext); + if (!srq || srq->srq_type != IB_SRQT_BASIC) { + ret = -EINVAL; + goto err_put; + } + } - if (!pd || !scq || !rcq || (cmd.is_srq && !srq) || - (cmd.qp_type == IB_QPT_XRC && !xrcd)) { - ret = -EINVAL; - goto err_put; + if (cmd->recv_cq_handle != cmd->send_cq_handle) { + rcq = idr_read_cq(cmd->recv_cq_handle, file->ucontext, 0); + if (!rcq) { + ret = -EINVAL; + goto err_put; + } + } + } + + scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq); + rcq = rcq ?: scq; + pd = idr_read_pd(cmd->pd_handle, file->ucontext); + if (!pd || !scq) { + ret = -EINVAL; + goto err_put; } - attr.create_flags = 0; + device = pd->device; + } + + memset(&attr, 0, sizeof attr); attr.event_handler = ib_uverbs_qp_event_handler; attr.qp_context = file; attr.send_cq = scq; attr.recv_cq = rcq; attr.srq = srq; - attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; - attr.qp_type = cmd.qp_type; - attr.xrcd = xrcd; + attr.xrcd = xrcd; + attr.sq_sig_type = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; + attr.qp_type = cmd->qp_type; attr.create_flags = 0; - attr.cap.max_send_wr = cmd.max_send_wr; - attr.cap.max_recv_wr = cmd.max_recv_wr; - attr.cap.max_send_sge = cmd.max_send_sge; - attr.cap.max_recv_sge = cmd.max_recv_sge; - attr.cap.max_inline_data = cmd.max_inline_data; + attr.cap.max_send_wr = cmd->max_send_wr; + attr.cap.max_recv_wr = cmd->max_recv_wr; + attr.cap.max_send_sge = cmd->max_send_sge; + attr.cap.max_recv_sge = cmd->max_recv_sge; + attr.cap.max_inline_data = cmd->max_inline_data; obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); INIT_LIST_HEAD(&obj->mcast_list); - qp = pd->device->create_qp(pd, &attr, &udata); + if (cmd->qp_type == IB_QPT_XRC_TGT) + qp = ib_create_qp(pd, &attr); + else + qp = device->create_qp(pd, &attr, &udata); + if (IS_ERR(qp)) { ret = PTR_ERR(qp); goto err_put; } - qp->device = pd->device; - qp->pd = pd; - qp->send_cq = attr.send_cq; - qp->recv_cq = attr.recv_cq; - qp->srq = attr.srq; - qp->uobject = &obj->uevent.uobject; - qp->event_handler = attr.event_handler; - qp->qp_context = attr.qp_context; - qp->qp_type = attr.qp_type; - qp->xrcd = attr.xrcd; - atomic_inc(&pd->usecnt); - atomic_inc(&attr.send_cq->usecnt); - atomic_inc(&attr.recv_cq->usecnt); - if (attr.srq) - atomic_inc(&attr.srq->usecnt); - else if (attr.xrcd) - atomic_inc(&attr.xrcd->usecnt); + if (cmd->qp_type != IB_QPT_XRC_TGT) { + qp->real_qp = qp; + qp->device = device; + qp->pd = pd; + qp->send_cq = attr.send_cq; + qp->recv_cq = attr.recv_cq; + qp->srq = attr.srq; + qp->event_handler = attr.event_handler; + qp->qp_context = attr.qp_context; + qp->qp_type = attr.qp_type; + atomic_set(&qp->usecnt, 0); + atomic_inc(&pd->usecnt); + atomic_inc(&attr.send_cq->usecnt); + if (attr.recv_cq) + atomic_inc(&attr.recv_cq->usecnt); + if (attr.srq) + atomic_inc(&attr.srq->usecnt); + } + qp->uobject = &obj->uevent.uobject; obj->uevent.uobject.object = qp; ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); if (ret) goto err_destroy; - memset(&resp, 0, sizeof resp); - resp.qpn = qp->qp_num; - resp.qp_handle = obj->uevent.uobject.id; - resp.max_recv_sge = attr.cap.max_recv_sge; - resp.max_send_sge = attr.cap.max_send_sge; - resp.max_recv_wr = attr.cap.max_recv_wr; - resp.max_send_wr = attr.cap.max_send_wr; - resp.max_inline_data = attr.cap.max_inline_data; + memset(&resp_obj, 0, sizeof(resp_obj)); + resp->qpn = qp->qp_num; + resp->qp_handle = obj->uevent.uobject.id; + resp->max_recv_sge = attr.cap.max_recv_sge; + resp->max_send_sge = attr.cap.max_send_sge; + resp->max_recv_wr = attr.cap.max_recv_wr; + resp->max_send_wr = attr.cap.max_send_wr; + resp->max_inline_data = attr.cap.max_inline_data; - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { - ret = -EFAULT; + if (copy_to_user(response, &resp_obj, resp_size)) { + ret = -EFAULT; goto err_copy; - } + } - put_pd_read(pd); - put_cq_read(scq); - if (rcq != scq) + if (xrcd) { + obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); + atomic_inc(&obj->uxrcd->refcnt); + put_xrcd_read(xrcd_uobj); + } + + if (pd) + put_pd_read(pd); + if (scq) + put_cq_read(scq); + if (rcq && rcq != scq) put_cq_read(rcq); if (srq) put_srq_read(srq); - if (xrcd) - put_xrcd_read(xrcd_uobj); mutex_lock(&file->mutex); list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); @@ -1171,6 +1767,8 @@ err_destroy: ib_destroy_qp(qp); err_put: + if (xrcd) + put_xrcd_read(xrcd_uobj); if (pd) put_pd_read(pd); if (scq) @@ -1179,16 +1777,107 @@ err_put: put_cq_read(rcq); if (srq) put_srq_read(srq); - if (xrcd) - put_xrcd_read(xrcd_uobj); put_uobj_write(&obj->uevent.uobject); return ret; } +ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, + const char __user *buf, int in_len, int out_len) +{ + struct ib_uverbs_open_qp cmd; + struct ib_uverbs_create_qp_resp resp; + struct ib_udata udata; + struct ib_uqp_object *obj; + struct ib_xrcd *xrcd; + struct ib_uobject *uninitialized_var(xrcd_uobj); + struct ib_qp *qp; + struct ib_qp_open_attr attr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + obj = kmalloc(sizeof *obj, GFP_KERNEL); + if (!obj) + return -ENOMEM; + + init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class); + down_write(&obj->uevent.uobject.mutex); + + xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj); + if (!xrcd) { + ret = -EINVAL; + goto err_put; + } + + attr.event_handler = ib_uverbs_qp_event_handler; + attr.qp_context = file; + attr.qp_num = cmd.qpn; + attr.qp_type = cmd.qp_type; + + obj->uevent.events_reported = 0; + INIT_LIST_HEAD(&obj->uevent.event_list); + INIT_LIST_HEAD(&obj->mcast_list); + + qp = ib_open_qp(xrcd, &attr); + if (IS_ERR(qp)) { + ret = PTR_ERR(qp); + goto err_put; + } + + qp->uobject = &obj->uevent.uobject; + + obj->uevent.uobject.object = qp; + ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); + if (ret) + goto err_destroy; + + memset(&resp, 0, sizeof resp); + resp.qpn = qp->qp_num; + resp.qp_handle = obj->uevent.uobject.id; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_remove; + } + + obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); + atomic_inc(&obj->uxrcd->refcnt); + put_xrcd_read(xrcd_uobj); + + mutex_lock(&file->mutex); + list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); + mutex_unlock(&file->mutex); + + obj->uevent.uobject.live = 1; + up_write(&obj->uevent.uobject.mutex); + + return in_len; + +err_remove: + idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); + +err_destroy: + ib_destroy_qp(qp); + +err_put: + put_xrcd_read(xrcd_uobj); + put_uobj_write(&obj->uevent.uobject); + return ret; +} + ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) + const char __user *buf, int in_len, + int out_len) { struct ib_uverbs_query_qp cmd; struct ib_uverbs_query_qp_resp resp; @@ -1286,30 +1975,59 @@ out: return ret ? ret : in_len; } -ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +/* Remove ignored fields set in the attribute mask */ +static int modify_qp_mask(enum ib_qp_type qp_type, int mask) { - struct ib_uverbs_modify_qp cmd; - struct ib_udata udata; - struct ib_qp *qp; - struct ib_qp_attr *attr; - int ret; + switch (qp_type) { + case IB_QPT_XRC_INI: + return mask & ~(IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER); + case IB_QPT_XRC_TGT: + return mask & ~(IB_QP_MAX_QP_RD_ATOMIC | IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY); + default: + return mask; + } +} - if (copy_from_user(&cmd, buf, sizeof cmd)) +static ssize_t __uverbs_modify_qp(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len, + enum uverbs_cmd_type cmd_type) +{ + struct ib_uverbs_modify_qp_ex cmd; + struct ib_udata udata; + struct ib_qp *qp; + struct ib_qp_attr *attr; + struct ib_qp_attr_ex *attrx; + int ret; + void *p; + union ib_gid sgid; + union ib_gid *dgid; + u8 port_num; + + if (cmd_type == IB_USER_VERBS_CMD_BASIC) { + p = &cmd; + p += sizeof(cmd.comp_mask); + if (copy_from_user(p, buf, + sizeof(struct ib_uverbs_modify_qp))) return -EFAULT; + } else { + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + } INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, out_len); - attr = kmalloc(sizeof *attr, GFP_KERNEL); - if (!attr) + attrx = kzalloc(sizeof(*attrx), GFP_KERNEL); + if (!attrx) return -ENOMEM; + attr = (struct ib_qp_attr *)attrx; qp = idr_read_qp(cmd.qp_handle, file->ucontext); if (!qp) { - ret = -EINVAL; - goto out; + kfree(attrx); + return -EINVAL; } attr->qp_state = cmd.qp_state; @@ -1357,10 +2075,49 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate; attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0; attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; + port_num = (cmd.attr_mask & IB_QP_PORT) ? cmd.port_num : qp->port_num; + if ((cmd.attr_mask & IB_QP_AV) && port_num && + (rdma_port_get_link_layer(qp->device, port_num) == + IB_LINK_LAYER_ETHERNET)) { + ret = ib_query_gid(qp->device, port_num, + attr->ah_attr.grh.sgid_index, &sgid); + if (ret) + goto out; + dgid = &attr->ah_attr.grh.dgid; + if (rdma_link_local_addr((struct in6_addr *)dgid->raw)) { + rdma_get_ll_mac((struct in6_addr *)dgid->raw, + attr->ah_attr.dmac); + rdma_get_ll_mac((struct in6_addr *)sgid.raw, + attr->smac); + attr->vlan_id = rdma_get_vlan_id(&sgid); + } else { + ret = rdma_addr_find_dmac_by_grh(&sgid, dgid, + attr->ah_attr.dmac, + &attr->vlan_id); + if (ret) + goto out; + ret = rdma_addr_find_smac_by_sgid(&sgid, attr->smac, + NULL); + if (ret) + goto out; + } + cmd.attr_mask |= IB_QP_SMAC; + if (attr->vlan_id < 0xFFFF) + cmd.attr_mask |= IB_QP_VID; + } + if (cmd_type == IB_USER_VERBS_CMD_EXTENDED) { + if (cmd.comp_mask & IB_UVERBS_QP_ATTR_DCT_KEY) + attrx->dct_key = cmd.dct_key; + } - ret = qp->device->modify_qp(qp, attr, cmd.attr_mask, &udata); - - put_qp_read(qp); + if (qp->real_qp == qp) { + ret = qp->device->modify_qp(qp, attr, + modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata); + if (!ret && (cmd.attr_mask & IB_QP_PORT)) + qp->port_num = attr->port_num; + } else { + ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask)); + } if (ret) goto out; @@ -1368,18 +2125,27 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, ret = in_len; out: - kfree(attr); + put_qp_read(qp); + kfree(attrx); return ret; } +ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + return __uverbs_modify_qp(file, buf, in_len, out_len, + IB_USER_VERBS_CMD_BASIC); +} + ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_destroy_qp cmd; struct ib_uverbs_destroy_qp_resp resp; - struct ib_uobject *uobj; + struct ib_uobject *uobj; struct ib_qp *qp; struct ib_uqp_object *obj; int ret = -EINVAL; @@ -1409,6 +2175,9 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, if (ret) return ret; + if (obj->uxrcd) + atomic_dec(&obj->uxrcd->refcnt); + idr_remove_uobj(&ib_uverbs_qp_idr, uobj); mutex_lock(&file->mutex); @@ -1429,14 +2198,14 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, } ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) + const char __user *buf, int in_len, + int out_len) { struct ib_uverbs_post_send cmd; struct ib_uverbs_post_send_resp resp; struct ib_uverbs_send_wr *user_wr; struct ib_send_wr *wr = NULL, *last, *next, *bad_wr; - struct ib_qp *qp; + struct ib_qp *qp; int i, sg_ind; int is_ud; ssize_t ret = -EINVAL; @@ -1479,13 +2248,13 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, user_wr->num_sge * sizeof (struct ib_sge), GFP_KERNEL); if (!next) { - ret = -ENOMEM; - goto out_put; - } + ret = -ENOMEM; + goto out_put; + } if (!last) wr = next; - else + else last->next = next; last = next; @@ -1500,7 +2269,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, file->ucontext); if (!next->wr.ud.ah) { ret = -EINVAL; - goto out_put; + goto out_put; } next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn; next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey; @@ -1555,12 +2324,12 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, } resp.bad_wr = 0; - ret = qp->device->post_send(qp, wr, &bad_wr); + ret = qp->device->post_send(qp->real_qp, wr, &bad_wr); if (ret) for (next = wr; next; next = next->next) { ++resp.bad_wr; if (next == bad_wr) - break; + break; } if (copy_to_user((void __user *) (unsigned long) cmd.response, @@ -1594,7 +2363,7 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf, struct ib_recv_wr *wr = NULL, *last, *next; int sg_ind; int i; - int ret; + int ret; if (in_len < wqe_size * wr_count + sge_count * sizeof (struct ib_uverbs_sge)) @@ -1617,9 +2386,9 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf, } if (user_wr->num_sge + sg_ind > sge_count) { - ret = -EINVAL; - goto err; - } + ret = -EINVAL; + goto err; + } next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + user_wr->num_sge * sizeof (struct ib_sge), @@ -1627,7 +2396,7 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf, if (!next) { ret = -ENOMEM; goto err; - } + } if (!last) wr = next; @@ -1693,7 +2462,7 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, goto out; resp.bad_wr = 0; - ret = qp->device->post_recv(qp, wr, &bad_wr); + ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr); put_qp_read(qp); @@ -1768,8 +2537,8 @@ out: } ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) + const char __user *buf, int in_len, + int out_len) { struct ib_uverbs_create_ah cmd; struct ib_uverbs_create_ah_resp resp; @@ -1789,10 +2558,10 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, if (!uobj) return -ENOMEM; - init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_key); + init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class); down_write(&uobj->mutex); - pd = idr_read_pd(cmd.pd_handle, file->ucontext); + pd = idr_read_pd(cmd.pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto err; @@ -1863,7 +2632,7 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, struct ib_uverbs_destroy_ah cmd; struct ib_ah *ah; struct ib_uobject *uobj; - int ret; + int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; @@ -1906,7 +2675,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - qp = idr_read_qp(cmd.qp_handle, file->ucontext); + qp = idr_write_qp(cmd.qp_handle, file->ucontext); if (!qp) return -EINVAL; @@ -1935,25 +2704,25 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, kfree(mcast); out_put: - put_qp_read(qp); + put_qp_write(qp); return ret ? ret : in_len; } ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) + const char __user *buf, int in_len, + int out_len) { struct ib_uverbs_detach_mcast cmd; struct ib_uqp_object *obj; struct ib_qp *qp; struct ib_uverbs_mcast_entry *mcast; - int ret = -EINVAL; + int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - qp = idr_read_qp(cmd.qp_handle, file->ucontext); + qp = idr_write_qp(cmd.qp_handle, file->ucontext); if (!qp) return -EINVAL; @@ -1972,102 +2741,122 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, } out_put: - put_qp_read(qp); + put_qp_write(qp); return ret ? ret : in_len; } -ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int __uverbs_create_xsrq(struct ib_uverbs_file *file, + struct ib_uverbs_create_xsrq *cmd, + struct ib_udata *udata) { - struct ib_uverbs_create_srq cmd; struct ib_uverbs_create_srq_resp resp; - struct ib_udata udata; - struct ib_uevent_object *obj; + struct ib_usrq_object *obj; struct ib_pd *pd; struct ib_srq *srq; + struct ib_uobject *uninitialized_var(xrcd_uobj); struct ib_srq_init_attr attr; int ret; - if (out_len < sizeof resp) - return -ENOSPC; + obj = kmalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return -ENOMEM; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class); + down_write(&obj->uevent.uobject.mutex); - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); + if (cmd->srq_type == IB_SRQT_XRC) { + attr.ext.xrc.xrcd = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj); + if (!attr.ext.xrc.xrcd) { + ret = -EINVAL; + goto err; + } - obj = kmalloc(sizeof *obj, GFP_KERNEL); - if (!obj) - return -ENOMEM; + obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); + atomic_inc(&obj->uxrcd->refcnt); - init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &srq_lock_key); - down_write(&obj->uobject.mutex); + attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0); + if (!attr.ext.xrc.cq) { + ret = -EINVAL; + goto err_put_xrcd; + } + } - pd = idr_read_pd(cmd.pd_handle, file->ucontext); + pd = idr_read_pd(cmd->pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; - goto err; - } + goto err_put_cq; + } attr.event_handler = ib_uverbs_srq_event_handler; attr.srq_context = file; - attr.attr.max_wr = cmd.max_wr; - attr.attr.max_sge = cmd.max_sge; - attr.attr.srq_limit = cmd.srq_limit; + attr.srq_type = cmd->srq_type; + attr.attr.max_wr = cmd->max_wr; + attr.attr.max_sge = cmd->max_sge; + attr.attr.srq_limit = cmd->srq_limit; - obj->events_reported = 0; - INIT_LIST_HEAD(&obj->event_list); + obj->uevent.events_reported = 0; + INIT_LIST_HEAD(&obj->uevent.event_list); - srq = pd->device->create_srq(pd, &attr, &udata); + srq = pd->device->create_srq(pd, &attr, udata); if (IS_ERR(srq)) { ret = PTR_ERR(srq); goto err_put; } - srq->device = pd->device; - srq->pd = pd; - srq->uobject = &obj->uobject; + srq->device = pd->device; + srq->pd = pd; + srq->srq_type = cmd->srq_type; + srq->uobject = &obj->uevent.uobject; srq->event_handler = attr.event_handler; srq->srq_context = attr.srq_context; - srq->ext.xrc.cq = NULL; - srq->ext.xrc.xrcd = NULL; + + if (cmd->srq_type == IB_SRQT_XRC) { + srq->ext.xrc.cq = attr.ext.xrc.cq; + srq->ext.xrc.xrcd = attr.ext.xrc.xrcd; + atomic_inc(&attr.ext.xrc.cq->usecnt); + atomic_inc(&attr.ext.xrc.xrcd->usecnt); + } + atomic_inc(&pd->usecnt); atomic_set(&srq->usecnt, 0); - obj->uobject.object = srq; - ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uobject); + obj->uevent.uobject.object = srq; + ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject); if (ret) goto err_destroy; memset(&resp, 0, sizeof resp); - resp.srq_handle = obj->uobject.id; + resp.srq_handle = obj->uevent.uobject.id; resp.max_wr = attr.attr.max_wr; resp.max_sge = attr.attr.max_sge; + if (cmd->srq_type == IB_SRQT_XRC) + resp.srqn = srq->ext.xrc.srq_num; - if (copy_to_user((void __user *) (unsigned long) cmd.response, + if (copy_to_user((void __user *) (unsigned long) cmd->response, &resp, sizeof resp)) { ret = -EFAULT; goto err_copy; } + if (cmd->srq_type == IB_SRQT_XRC) { + put_uobj_read(xrcd_uobj); + put_cq_read(attr.ext.xrc.cq); + } put_pd_read(pd); mutex_lock(&file->mutex); - list_add_tail(&obj->uobject.list, &file->ucontext->srq_list); + list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list); mutex_unlock(&file->mutex); - obj->uobject.live = 1; + obj->uevent.uobject.live = 1; - up_write(&obj->uobject.mutex); + up_write(&obj->uevent.uobject.mutex); - return in_len; + return 0; err_copy: - idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uobject); + idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject); err_destroy: ib_destroy_srq(srq); @@ -2075,25 +2864,29 @@ err_destroy: err_put: put_pd_read(pd); +err_put_cq: + if (cmd->srq_type == IB_SRQT_XRC) + put_cq_read(attr.ext.xrc.cq); + +err_put_xrcd: + if (cmd->srq_type == IB_SRQT_XRC) { + atomic_dec(&obj->uxrcd->refcnt); + put_uobj_read(xrcd_uobj); + } + err: - put_uobj_write(&obj->uobject); + put_uobj_write(&obj->uevent.uobject); return ret; } -ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) { - struct ib_uverbs_create_xsrq cmd; + struct ib_uverbs_create_srq cmd; + struct ib_uverbs_create_xsrq xcmd; struct ib_uverbs_create_srq_resp resp; - struct ib_udata udata; - struct ib_uevent_object *obj; - struct ib_pd *pd; - struct ib_srq *srq; - struct ib_cq *xrc_cq; - struct ib_xrcd *xrcd; - struct ib_srq_init_attr attr; - struct ib_uobject *xrcd_uobj; + struct ib_udata udata; int ret; if (out_len < sizeof resp) @@ -2102,113 +2895,48 @@ ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; + xcmd.response = cmd.response; + xcmd.user_handle = cmd.user_handle; + xcmd.srq_type = IB_SRQT_BASIC; + xcmd.pd_handle = cmd.pd_handle; + xcmd.max_wr = cmd.max_wr; + xcmd.max_sge = cmd.max_sge; + xcmd.srq_limit = cmd.srq_limit; + INIT_UDATA(&udata, buf + sizeof cmd, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - obj = kmalloc(sizeof *obj, GFP_KERNEL); - if (!obj) - return -ENOMEM; - - init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, - &srq_lock_key); - down_write(&obj->uobject.mutex); - - pd = idr_read_pd(cmd.pd_handle, file->ucontext); - if (!pd) { - ret = -EINVAL; - goto err; - } - - xrc_cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); - if (!xrc_cq) { - ret = -EINVAL; - goto err_put_pd; - } - - xrcd = idr_read_xrcd(cmd.xrcd_handle, file->ucontext, &xrcd_uobj); - if (!xrcd) { - ret = -EINVAL; - goto err_put_cq; - } - - - attr.event_handler = ib_uverbs_srq_event_handler; - attr.srq_context = file; - attr.attr.max_wr = cmd.max_wr; - attr.attr.max_sge = cmd.max_sge; - attr.attr.srq_limit = cmd.srq_limit; - - obj->events_reported = 0; - INIT_LIST_HEAD(&obj->event_list); - - srq = pd->device->create_xrc_srq(pd, xrc_cq, xrcd, &attr, &udata); - if (IS_ERR(srq)) { - ret = PTR_ERR(srq); - goto err_put; - } - - srq->device = pd->device; - srq->pd = pd; - srq->uobject = &obj->uobject; - srq->event_handler = attr.event_handler; - srq->srq_context = attr.srq_context; - srq->ext.xrc.cq = xrc_cq; - srq->ext.xrc.xrcd = xrcd; - atomic_inc(&pd->usecnt); - atomic_inc(&xrc_cq->usecnt); - atomic_inc(&xrcd->usecnt); - - atomic_set(&srq->usecnt, 0); - - obj->uobject.object = srq; - ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uobject); + ret = __uverbs_create_xsrq(file, &xcmd, &udata); if (ret) - goto err_destroy; - - memset(&resp, 0, sizeof resp); - resp.srq_handle = obj->uobject.id; - resp.max_wr = attr.attr.max_wr; - resp.max_sge = attr.attr.max_sge; - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { - ret = -EFAULT; - goto err_copy; - } - - put_xrcd_read(xrcd_uobj); - put_cq_read(xrc_cq); - put_pd_read(pd); - - mutex_lock(&file->mutex); - list_add_tail(&obj->uobject.list, &file->ucontext->srq_list); - mutex_unlock(&file->mutex); - - obj->uobject.live = 1; - - up_write(&obj->uobject.mutex); + return ret; return in_len; +} -err_copy: - idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uobject); +ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, int out_len) +{ + struct ib_uverbs_create_xsrq cmd; + struct ib_uverbs_create_srq_resp resp; + struct ib_udata udata; + int ret; -err_destroy: - ib_destroy_srq(srq); + if (out_len < sizeof resp) + return -ENOSPC; -err_put: - put_xrcd_read(xrcd_uobj); + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; -err_put_cq: - put_cq_read(xrc_cq); + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); -err_put_pd: - put_pd_read(pd); + ret = __uverbs_create_xsrq(file, &cmd, &udata); + if (ret) + return ret; -err: - put_uobj_write(&obj->uobject); - return ret; + return in_len; } ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, @@ -2266,7 +2994,7 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, put_srq_read(srq); if (ret) - return ret; + return ret; memset(&resp, 0, sizeof resp); @@ -2282,8 +3010,8 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, } ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) + const char __user *buf, int in_len, + int out_len) { struct ib_uverbs_destroy_srq cmd; struct ib_uverbs_destroy_srq_resp resp; @@ -2291,6 +3019,8 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, struct ib_srq *srq; struct ib_uevent_object *obj; int ret = -EINVAL; + struct ib_usrq_object *us; + enum ib_srq_type srq_type; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; @@ -2300,6 +3030,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, return -EINVAL; srq = uobj->object; obj = container_of(uobj, struct ib_uevent_object, uobject); + srq_type = srq->srq_type; ret = ib_destroy_srq(srq); if (!ret) @@ -2310,6 +3041,11 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, if (ret) return ret; + if (srq_type == IB_SRQT_XRC) { + us = container_of(obj, struct ib_usrq_object, uevent); + atomic_dec(&us->uxrcd->refcnt); + } + idr_remove_uobj(&ib_uverbs_srq_idr, uobj); mutex_lock(&file->mutex); @@ -2330,313 +3066,467 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, return ret ? ret : in_len; } -static struct inode *xrc_file2inode(struct file *f) +ssize_t ib_uverbs_exp_create_dct(struct ib_uverbs_file *file, + struct ib_udata *ucore, + struct ib_udata *uhw) { - return f->f_dentry->d_inode; -} + int in_len = ucore->inlen + uhw->inlen; + int out_len = ucore->outlen + uhw->outlen; + struct ib_uverbs_create_dct cmd; + struct ib_uverbs_create_dct_resp resp; + struct ib_udata udata; + struct ib_udct_object *obj; + struct ib_dct *dct; + int ret; + struct ib_dct_init_attr attr; + struct ib_pd *pd = NULL; + struct ib_cq *cq = NULL; + struct ib_srq *srq = NULL; -struct xrcd_table_entry { - struct rb_node node; - struct inode *inode; - struct ib_xrcd *xrcd; -}; + if (out_len < sizeof(resp)) + return -ENOSPC; -static int xrcd_table_insert(struct ib_device *dev, - struct inode *i_n, - struct ib_xrcd *xrcd) -{ - struct xrcd_table_entry *entry, *scan; - struct rb_node **p = &dev->ib_uverbs_xrcd_table.rb_node; - struct rb_node *parent = NULL; + ret = ucore->ops->copy_from(&cmd, ucore, sizeof(cmd)); + if (ret) + return ret; - entry = kmalloc(sizeof(struct xrcd_table_entry), GFP_KERNEL); - if (!entry) + obj = kmalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) return -ENOMEM; - entry->inode = i_n; - entry->xrcd = xrcd; + init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, + &dct_lock_class); + down_write(&obj->uobject.mutex); - while (*p) { - parent = *p; - scan = rb_entry(parent, struct xrcd_table_entry, node); + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + if (!pd) { + ret = -EINVAL; + goto err_pd; + } - if (i_n < scan->inode) - p = &(*p)->rb_left; - else if (i_n > scan->inode) - p = &(*p)->rb_right; - else { - kfree(entry); - return -EEXIST; - } + cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + if (!cq) { + ret = -EINVAL; + goto err_put; } - rb_link_node(&entry->node, parent, p); - rb_insert_color(&entry->node, &dev->ib_uverbs_xrcd_table); - igrab(i_n); - return 0; -} + srq = idr_read_srq(cmd.srq_handle, file->ucontext); + if (!srq) { + ret = -EINVAL; + goto err_put; + } -static struct xrcd_table_entry *xrcd_table_search(struct ib_device *dev, - struct inode *i_n) -{ - struct xrcd_table_entry *scan; - struct rb_node **p = &dev->ib_uverbs_xrcd_table.rb_node; - struct rb_node *parent = NULL; + attr.cq = cq; + attr.access_flags = cmd.access_flags; + attr.min_rnr_timer = cmd.min_rnr_timer; + attr.srq = srq; + attr.tclass = cmd.tclass; + attr.flow_label = cmd.flow_label; + attr.dc_key = cmd.dc_key; + attr.mtu = cmd.mtu; + attr.port = cmd.port; + attr.pkey_index = cmd.pkey_index; + attr.gid_index = cmd.gid_index; + attr.hop_limit = cmd.hop_limit; + attr.create_flags = cmd.create_flags; + + dct = ib_create_dct(pd, &attr, &udata); + if (IS_ERR(dct)) { + ret = PTR_ERR(dct); + goto err_put; + } - while (*p) { - parent = *p; - scan = rb_entry(parent, struct xrcd_table_entry, node); + dct->device = file->device->ib_dev; + dct->uobject = &obj->uobject; - if (i_n < scan->inode) - p = &(*p)->rb_left; - else if (i_n > scan->inode) - p = &(*p)->rb_right; - else - return scan; - } - return NULL; -} + obj->uobject.object = dct; + ret = idr_add_uobj(&ib_uverbs_dct_idr, &obj->uobject); + if (ret) + goto err_dct; -static int find_xrcd(struct ib_device *dev, struct inode *i_n, - struct ib_xrcd **xrcd) -{ - struct xrcd_table_entry *entry; + memset(&resp, 0, sizeof(resp)); + resp.dct_handle = obj->uobject.id; + resp.dctn = dct->dct_num; - entry = xrcd_table_search(dev, i_n); - if (!entry) - return -EINVAL; + ret = ucore->ops->copy_to(ucore, &resp, sizeof(resp)); + if (ret) + goto err_copy; - *xrcd = entry->xrcd; - return 0; -} + mutex_lock(&file->mutex); + list_add_tail(&obj->uobject.list, &file->ucontext->dct_list); + mutex_unlock(&file->mutex); + obj->uobject.live = 1; -static void xrcd_table_delete(struct ib_device *dev, - struct inode *i_n) -{ - struct xrcd_table_entry *entry = xrcd_table_search(dev, i_n); + put_srq_read(srq); + put_cq_read(cq); + put_pd_read(pd); - if (entry) { - iput(i_n); - rb_erase(&entry->node, &dev->ib_uverbs_xrcd_table); - kfree(entry); - } + up_write(&obj->uobject.mutex); + + return in_len; + +err_copy: + idr_remove_uobj(&ib_uverbs_dct_idr, &obj->uobject); + +err_dct: + ib_destroy_dct(dct); + +err_put: + if (srq) + put_srq_read(srq); + + if (cq) + put_cq_read(cq); + + put_pd_read(pd); + +err_pd: + put_uobj_write(&obj->uobject); + return ret; } -ssize_t ib_uverbs_open_xrc_domain(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +ssize_t ib_uverbs_exp_destroy_dct(struct ib_uverbs_file *file, + struct ib_udata *ucore, + struct ib_udata *uhw) { - struct ib_uverbs_open_xrc_domain cmd; - struct ib_uverbs_open_xrc_domain_resp resp; - struct ib_udata udata; - struct ib_uobject *uobj; - struct ib_uxrcd_object *xrcd_uobj; - struct ib_xrcd *xrcd = NULL; - struct file *f = NULL; - struct inode *inode = NULL; - int ret = 0; - int new_xrcd = 0; + int in_len = ucore->inlen + uhw->inlen; + int out_len = ucore->outlen + uhw->outlen; + struct ib_uverbs_destroy_dct cmd; + struct ib_uverbs_destroy_dct_resp resp; + struct ib_uobject *uobj; + struct ib_dct *dct; + struct ib_udct_object *obj; + int ret; - if (out_len < sizeof resp) + if (out_len < sizeof(resp)) return -ENOSPC; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = ucore->ops->copy_from(&cmd, ucore, sizeof(cmd)); + if (ret) + return ret; - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); + uobj = idr_write_uobj(&ib_uverbs_dct_idr, cmd.user_handle, file->ucontext); + if (!uobj) + return -EINVAL; - mutex_lock(&file->device->ib_dev->xrcd_table_mutex); - if (cmd.fd != (u32) (-1)) { - /* search for file descriptor */ - f = fget(cmd.fd); - if (!f) { - ret = -EBADF; - goto err_table_mutex_unlock; - } + dct = uobj->object; + obj = container_of(dct->uobject, struct ib_udct_object, uobject); - inode = xrc_file2inode(f); - if (!inode) { - ret = -EBADF; - goto err_table_mutex_unlock; - } + ret = ib_destroy_dct(dct); + if (!ret) + uobj->live = 0; - ret = find_xrcd(file->device->ib_dev, inode, &xrcd); - if (ret && !(cmd.oflags & O_CREAT)) { - /* no file descriptor. Need CREATE flag */ - ret = -EAGAIN; - goto err_table_mutex_unlock; - } + put_uobj_write(uobj); - if (xrcd && cmd.oflags & O_EXCL) { - ret = -EINVAL; - goto err_table_mutex_unlock; - } - } + if (ret) + return ret; - xrcd_uobj = kmalloc(sizeof *xrcd_uobj, GFP_KERNEL); - if (!xrcd_uobj) { - ret = -ENOMEM; - goto err_table_mutex_unlock; - } + idr_remove_uobj(&ib_uverbs_dct_idr, uobj); - uobj = &xrcd_uobj->uobject; - init_uobj(uobj, 0, file->ucontext, &pd_lock_key); - down_write(&uobj->mutex); + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); - if (!xrcd) { - xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev, - file->ucontext, &udata); - if (IS_ERR(xrcd)) { - ret = PTR_ERR(xrcd); - goto err; - } - xrcd->uobject = (cmd.fd == -1) ? uobj : NULL; - xrcd->inode = inode; - xrcd->device = file->device->ib_dev; - atomic_set(&xrcd->usecnt, 0); - new_xrcd = 1; - } + memset(&resp, 0, sizeof(resp)); - uobj->object = xrcd; - ret = idr_add_uobj(&ib_uverbs_xrc_domain_idr, uobj); + put_uobj(uobj); + + ret = ucore->ops->copy_to(ucore, &resp, sizeof(resp)); if (ret) - goto err_idr; + return ret; - memset(&resp, 0, sizeof resp); - resp.xrcd_handle = uobj->id; + return in_len; +} - if (inode) { - if (new_xrcd) { - /* create new inode/xrcd table entry */ - ret = xrcd_table_insert(file->device->ib_dev, inode, xrcd); - if (ret) - goto err_insert_xrcd; - } - atomic_inc(&xrcd->usecnt); +ssize_t ib_uverbs_exp_query_dct(struct ib_uverbs_file *file, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + int in_len = ucore->inlen + uhw->inlen; + int out_len = ucore->outlen + uhw->outlen; + struct ib_uverbs_query_dct cmd; + struct ib_uverbs_query_dct_resp resp; + struct ib_dct *dct; + struct ib_dct_attr *attr; + int err; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + err = ucore->ops->copy_from(&cmd, ucore, sizeof(cmd)); + if (err) + return err; + + attr = kmalloc(sizeof(*attr), GFP_KERNEL); + if (!attr) { + err = -ENOMEM; + goto out; } - if (f) - fput(f); - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { - ret = -EFAULT; - goto err_copy; + dct = idr_read_dct(cmd.dct_handle, file->ucontext); + if (!dct) { + err = -EINVAL; + goto out; } - INIT_LIST_HEAD(&xrcd_uobj->xrc_reg_qp_list); + err = ib_query_dct(dct, attr); - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->xrcd_list); - mutex_unlock(&file->mutex); + put_dct_read(dct); - uobj->live = 1; + if (err) + goto out; - up_write(&uobj->mutex); + memset(&resp, 0, sizeof(resp)); - mutex_unlock(&file->device->ib_dev->xrcd_table_mutex); - return in_len; + resp.dc_key = attr->dc_key; + resp.access_flags = attr->access_flags; + resp.flow_label = attr->flow_label; + resp.key_violations = attr->key_violations; + resp.port = attr->port; + resp.min_rnr_timer = attr->min_rnr_timer; + resp.tclass = attr->tclass; + resp.mtu = attr->mtu; + resp.pkey_index = attr->pkey_index; + resp.gid_index = attr->gid_index; + resp.hop_limit = attr->hop_limit; + resp.state = attr->state; -err_copy: + err = ucore->ops->copy_to(ucore, &resp, sizeof(resp)); - if (inode) { - if (new_xrcd) - xrcd_table_delete(file->device->ib_dev, inode); - atomic_dec(&xrcd->usecnt); +out: + kfree(attr); + + return err ? err : in_len; +} + +/* + * Experimental functions + */ + +static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" }; + +static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, + union ib_flow_spec *ib_spec) +{ + ib_spec->type = kern_spec->type; + + switch (ib_spec->type) { + case IB_FLOW_SPEC_ETH: + ib_spec->eth.size = sizeof(struct ib_flow_spec_eth); + memcpy(&ib_spec->eth.val, &kern_spec->eth.val, + sizeof(struct ib_flow_eth_filter)); + memcpy(&ib_spec->eth.mask, &kern_spec->eth.mask, + sizeof(struct ib_flow_eth_filter)); + break; + case IB_FLOW_SPEC_IB: + ib_spec->ib.size = sizeof(struct ib_flow_spec_ib); + memcpy(&ib_spec->ib.val, &kern_spec->ib.val, + sizeof(struct ib_flow_ib_filter)); + memcpy(&ib_spec->ib.mask, &kern_spec->ib.mask, + sizeof(struct ib_flow_ib_filter)); + break; + case IB_FLOW_SPEC_IPV4: + ib_spec->ipv4.size = sizeof(struct ib_flow_spec_ipv4); + memcpy(&ib_spec->ipv4.val, &kern_spec->ipv4.val, + sizeof(struct ib_flow_ipv4_filter)); + memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask, + sizeof(struct ib_flow_ipv4_filter)); + break; + case IB_FLOW_SPEC_TCP: + case IB_FLOW_SPEC_UDP: + ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp); + memcpy(&ib_spec->tcp_udp.val, &kern_spec->tcp_udp.val, + sizeof(struct ib_flow_tcp_udp_filter)); + memcpy(&ib_spec->tcp_udp.mask, &kern_spec->tcp_udp.mask, + sizeof(struct ib_flow_tcp_udp_filter)); + break; + default: + return -EINVAL; } + return 0; +} -err_insert_xrcd: - idr_remove_uobj(&ib_uverbs_xrc_domain_idr, uobj); +int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_create_flow cmd; + struct ib_uverbs_create_flow_resp resp; + struct ib_uobject *uobj; + struct ib_flow *flow_id; + struct ib_uverbs_flow_attr *kern_flow_attr; + struct ib_flow_attr *flow_attr; + struct ib_qp *qp; + int err = 0; + void *kern_spec; + void *ib_spec; + int i; + + if (ucore->outlen < sizeof(resp)) + return -ENOSPC; -err_idr: - ib_dealloc_xrcd(xrcd); + err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); + if (err) + return err; -err: - put_uobj_write(uobj); + ucore->inbuf += sizeof(cmd); + ucore->inlen -= sizeof(cmd); -err_table_mutex_unlock: + if (cmd.comp_mask) + return -EINVAL; - if (f) - fput(f); - mutex_unlock(&file->device->ib_dev->xrcd_table_mutex); - return ret; -} + if (!priv_check(curthread, PRIV_NET_RAW) && !disable_raw_qp_enforcement) + return -EPERM; -ssize_t ib_uverbs_close_xrc_domain(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_close_xrc_domain cmd; - struct ib_uobject *uobj, *t_uobj; - struct ib_uxrcd_object *xrcd_uobj; - struct ib_xrcd *xrcd = NULL; - struct inode *inode = NULL; - int ret = 0; + if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS) + return -EINVAL; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + if (cmd.flow_attr.size > ucore->inlen || + cmd.flow_attr.size > + (cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec))) + return -EINVAL; - mutex_lock(&file->device->ib_dev->xrcd_table_mutex); - uobj = idr_write_uobj(&ib_uverbs_xrc_domain_idr, cmd.xrcd_handle, - file->ucontext); + if (cmd.flow_attr.num_of_specs) { + kern_flow_attr = kmalloc(sizeof(*kern_flow_attr) + + cmd.flow_attr.size, GFP_KERNEL); + if (!kern_flow_attr) + return -ENOMEM; + + memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr)); + err = ib_copy_from_udata(kern_flow_attr + 1, ucore, + cmd.flow_attr.size); + if (err) + goto err_free_attr; + } else { + kern_flow_attr = &cmd.flow_attr; + } + + uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); if (!uobj) { - ret = -EINVAL; - goto err_unlock_mutex; + err = -ENOMEM; + goto err_free_attr; } + init_uobj(uobj, 0, file->ucontext, &rule_lock_class); + down_write(&uobj->mutex); - mutex_lock(&file->mutex); - if (!ret) { - list_for_each_entry(t_uobj, &file->ucontext->qp_list, list) { - struct ib_qp *qp = t_uobj->object; - if (qp->xrcd && qp->xrcd == uobj->object) { - ret = -EBUSY; - break; - } - } + qp = idr_read_qp(cmd.qp_handle, file->ucontext); + if (!qp) { + err = -EINVAL; + goto err_uobj; } - if (!ret) { - list_for_each_entry(t_uobj, &file->ucontext->srq_list, list) { - struct ib_srq *srq = t_uobj->object; - if (srq->ext.xrc.xrcd && srq->ext.xrc.xrcd == uobj->object) { - ret = -EBUSY; - break; - } + + flow_attr = kmalloc(sizeof(*flow_attr) + cmd.flow_attr.size, + GFP_KERNEL); + if (!flow_attr) { + err = -ENOMEM; + goto err_put; + } + + flow_attr->type = kern_flow_attr->type; + flow_attr->priority = kern_flow_attr->priority; + flow_attr->num_of_specs = kern_flow_attr->num_of_specs; + flow_attr->port = kern_flow_attr->port; + flow_attr->flags = kern_flow_attr->flags; + flow_attr->size = sizeof(*flow_attr); + + kern_spec = kern_flow_attr + 1; + ib_spec = flow_attr + 1; + for (i = 0; i < flow_attr->num_of_specs && + cmd.flow_attr.size > + offsetof(struct ib_uverbs_flow_spec, reserved) && + cmd.flow_attr.size >= + ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) { + err = kern_spec_to_ib_spec(kern_spec, ib_spec); + if (err) + goto err_free; + flow_attr->size += + ((union ib_flow_spec *)ib_spec)->size; + cmd.flow_attr.size -= + ((struct ib_uverbs_flow_spec *)kern_spec)->size; + kern_spec += ((struct ib_uverbs_flow_spec *)kern_spec)->size; + ib_spec += ((union ib_flow_spec *)ib_spec)->size; + } + if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) { + pr_warn("create flow failed, flow %d: %d bytes left from uverb cmd\n", + i, cmd.flow_attr.size); + goto err_free; } + flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER); + if (IS_ERR(flow_id)) { + err = PTR_ERR(flow_id); + goto err_free; } + flow_id->qp = qp; + flow_id->uobject = uobj; + uobj->object = flow_id; + + err = idr_add_uobj(&ib_uverbs_rule_idr, uobj); + if (err) + goto destroy_flow; + + memset(&resp, 0, sizeof(resp)); + resp.flow_handle = uobj->id; + + err = ib_copy_to_udata(ucore, + &resp, sizeof(resp)); + if (err) + goto err_copy; + + put_qp_read(qp); + mutex_lock(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->rule_list); mutex_unlock(&file->mutex); - if (ret) { - put_uobj_write(uobj); - goto err_unlock_mutex; - } - xrcd_uobj = container_of(uobj, struct ib_uxrcd_object, uobject); - if (!list_empty(&xrcd_uobj->xrc_reg_qp_list)) { - ret = -EBUSY; - put_uobj_write(uobj); - goto err_unlock_mutex; - } + uobj->live = 1; - xrcd = (struct ib_xrcd *) (uobj->object); - inode = xrcd->inode; + up_write(&uobj->mutex); + kfree(flow_attr); + if (cmd.flow_attr.num_of_specs) + kfree(kern_flow_attr); + return 0; +err_copy: + idr_remove_uobj(&ib_uverbs_rule_idr, uobj); +destroy_flow: + ib_destroy_flow(flow_id); +err_free: + kfree(flow_attr); +err_put: + put_qp_read(qp); +err_uobj: + put_uobj_write(uobj); +err_free_attr: + if (cmd.flow_attr.num_of_specs) + kfree(kern_flow_attr); + return err; +} - if (inode) - atomic_dec(&xrcd->usecnt); +int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_destroy_flow cmd; + struct ib_flow *flow_id; + struct ib_uobject *uobj; + int ret; - ret = ib_dealloc_xrcd(uobj->object); + ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); + if (ret) + return ret; + + uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle, + file->ucontext); + if (!uobj) + return -EINVAL; + flow_id = uobj->object; + + ret = ib_destroy_flow(flow_id); if (!ret) uobj->live = 0; put_uobj_write(uobj); - if (ret && !inode) - goto err_unlock_mutex; - - if (!ret && inode) - xrcd_table_delete(file->device->ib_dev, inode); - - idr_remove_uobj(&ib_uverbs_xrc_domain_idr, uobj); + idr_remove_uobj(&ib_uverbs_rule_idr, uobj); mutex_lock(&file->mutex); list_del(&uobj->list); @@ -2644,380 +3534,378 @@ ssize_t ib_uverbs_close_xrc_domain(struct ib_uverbs_file *file, put_uobj(uobj); - mutex_unlock(&file->device->ib_dev->xrcd_table_mutex); - return in_len; - -err_unlock_mutex: - mutex_unlock(&file->device->ib_dev->xrcd_table_mutex); return ret; } -void ib_uverbs_dealloc_xrcd(struct ib_device *ib_dev, - struct ib_xrcd *xrcd) +ssize_t ib_uverbs_exp_modify_qp(struct ib_uverbs_file *file, + struct ib_udata *ucore, struct ib_udata *uhw) { - struct inode *inode = NULL; - int ret = 0; + const char __user *buf = ucore->inbuf; + int in_len = ucore->inlen + uhw->inlen; + int out_len = ucore->outlen + uhw->outlen; - inode = xrcd->inode; - if (inode) - atomic_dec(&xrcd->usecnt); + return __uverbs_modify_qp(file, buf, in_len, out_len, + IB_USER_VERBS_CMD_EXTENDED); +} - ret = ib_dealloc_xrcd(xrcd); - if (!ret && inode) - xrcd_table_delete(ib_dev, inode); + +ssize_t ib_uverbs_exp_create_cq(struct ib_uverbs_file *file, + struct ib_udata *ucore, struct ib_udata *uhw) +{ + const char __user *buf = ucore->inbuf; + int in_len = ucore->inlen + uhw->inlen; + int out_len = ucore->outlen + uhw->outlen; + struct ib_uverbs_create_cq_ex cmd; + + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + return create_cq(file, buf, in_len, out_len, &cmd, + IB_USER_VERBS_CMD_EXTENDED, ucore->outbuf); } -ssize_t ib_uverbs_create_xrc_rcv_qp(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +ssize_t ib_uverbs_exp_modify_cq(struct ib_uverbs_file *file, + struct ib_udata *ucore, struct ib_udata *uhw) { - struct ib_uverbs_create_xrc_rcv_qp cmd; - struct ib_uverbs_create_xrc_rcv_qp_resp resp; - struct ib_uxrc_rcv_object *obj; - struct ib_qp_init_attr init_attr; - struct ib_xrcd *xrcd; - struct ib_uobject *uobj; - struct ib_uxrcd_object *xrcd_uobj; - u32 qp_num; - int err; + const char __user *buf = ucore->inbuf; + int in_len = ucore->inlen + uhw->inlen; + struct ib_uverbs_modify_cq_ex cmd; + struct ib_cq *cq; + struct ib_cq_attr attr; + int ret; - if (out_len < sizeof resp) + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + if (!cq) + return -EINVAL; + + attr.moderation.cq_count = cmd.cq_count; + attr.moderation.cq_period = cmd.cq_period; + attr.cq_cap_flags = cmd.cq_cap_flags; + + ret = ib_modify_cq(cq, &attr, cmd.attr_mask); + + put_cq_read(cq); + + return ret ? ret : in_len; +} + + +ssize_t ib_uverbs_exp_query_device(struct ib_uverbs_file *file, + struct ib_udata *ucore, struct ib_udata *uhw) +{ + struct ib_uverbs_exp_query_device_resp resp; + struct ib_exp_device_attr exp_attr; + int ret; + + if (ucore->outlen + uhw->outlen < sizeof(resp)) return -ENOSPC; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + memset(&resp, 0, sizeof(resp)); + memset(&exp_attr, 0, sizeof(exp_attr)); + ret = ib_exp_query_device(file->device->ib_dev, &exp_attr); + if (ret) + return ret; - obj = kzalloc(sizeof *obj, GFP_KERNEL); - if (!obj) - return -ENOMEM; + ib_uverbs_query_device_assign(&resp.base, &exp_attr.base, file); - xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &uobj); - if (!xrcd) { - err = -EINVAL; - goto err_out; + resp.comp_mask = 0; + resp.device_cap_flags2 = 0; + + /* + * Handle regular attr fields + */ + if (exp_attr.base.comp_mask & IB_DEVICE_ATTR_WITH_TIMESTAMP_MASK) { + resp.timestamp_mask = exp_attr.base.timestamp_mask; + resp.comp_mask |= IB_EXP_DEVICE_ATTR_WITH_TIMESTAMP_MASK; } - init_attr.event_handler = ib_uverbs_xrc_rcv_qp_event_handler; - init_attr.qp_context = file; - init_attr.srq = NULL; - init_attr.sq_sig_type = - cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; - init_attr.qp_type = IB_QPT_XRC; - init_attr.xrcd = xrcd; + if (exp_attr.base.comp_mask & IB_DEVICE_ATTR_WITH_HCA_CORE_CLOCK) { + resp.hca_core_clock = exp_attr.base.hca_core_clock; + resp.comp_mask |= IB_EXP_DEVICE_ATTR_WITH_HCA_CORE_CLOCK; + } - init_attr.cap.max_send_wr = 1; - init_attr.cap.max_recv_wr = 0; - init_attr.cap.max_send_sge = 1; - init_attr.cap.max_recv_sge = 0; - init_attr.cap.max_inline_data = 0; + /* + * Handle experimental attr fields + */ + if (exp_attr.exp_comp_mask & IB_EXP_DEVICE_ATTR_CAP_FLAGS2) { + resp.device_cap_flags2 = exp_attr.device_cap_flags2; + resp.comp_mask |= IB_EXP_DEVICE_ATTR_CAP_FLAGS2; + } - err = xrcd->device->create_xrc_rcv_qp(&init_attr, &qp_num); - if (err) - goto err_put; + if (exp_attr.exp_comp_mask & IB_EXP_DEVICE_ATTR_DC_REQ_RD) { + resp.dc_rd_req = exp_attr.dc_rd_req; + resp.comp_mask |= IB_EXP_DEVICE_ATTR_DC_REQ_RD; + } - memset(&resp, 0, sizeof resp); - resp.qpn = qp_num; + if (exp_attr.exp_comp_mask & IB_EXP_DEVICE_ATTR_DC_RES_RD) { + resp.dc_rd_res = exp_attr.dc_rd_res; + resp.comp_mask |= IB_EXP_DEVICE_ATTR_DC_RES_RD; + } - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { - err = -EFAULT; - goto err_destroy; + if (exp_attr.exp_comp_mask & IB_EXP_DEVICE_ATTR_INLINE_RECV_SZ) { + resp.inline_recv_sz = exp_attr.inline_recv_sz; + resp.comp_mask |= IB_EXP_DEVICE_ATTR_INLINE_RECV_SZ; } - atomic_inc(&xrcd->usecnt); - put_xrcd_read(uobj); - obj->qp_num = qp_num; - obj->domain_handle = cmd.xrc_domain_handle; - xrcd_uobj = container_of(uobj, struct ib_uxrcd_object, uobject); - mutex_lock(&file->device->ib_dev->xrcd_table_mutex); - list_add_tail(&obj->list, &xrcd_uobj->xrc_reg_qp_list); - mutex_unlock(&file->device->ib_dev->xrcd_table_mutex); + if (exp_attr.exp_comp_mask & IB_EXP_DEVICE_ATTR_RSS_TBL_SZ) { + resp.max_rss_tbl_sz = exp_attr.max_rss_tbl_sz; + resp.comp_mask |= IB_EXP_DEVICE_ATTR_RSS_TBL_SZ; + } - return in_len; + if (copy_to_user(ucore->outbuf, &resp, sizeof(resp))) + return -EFAULT; -err_destroy: - xrcd->device->unreg_xrc_rcv_qp(xrcd, file, qp_num); -err_put: - put_xrcd_read(uobj); -err_out: - kfree(obj); - return err; + return ucore->inlen + uhw->inlen; } -ssize_t ib_uverbs_modify_xrc_rcv_qp(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +ssize_t ib_uverbs_exp_create_qp(struct ib_uverbs_file *file, + struct ib_udata *ucore, struct ib_udata *uhw) { - struct ib_uverbs_modify_xrc_rcv_qp cmd; - struct ib_qp_attr *attr; - struct ib_xrcd *xrcd; - struct ib_uobject *uobj; - int err; + struct ib_uqp_object *obj; + struct ib_device *device; + struct ib_pd *pd = NULL; + struct ib_xrcd *xrcd = NULL; + struct ib_uobject *uninitialized_var(xrcd_uobj); + struct ib_cq *scq = NULL, *rcq = NULL; + struct ib_srq *srq = NULL; + struct ib_qp *qp; + struct ib_exp_qp_init_attr attr; + int ret; + struct ib_uverbs_exp_create_qp cmd_exp; + struct ib_uverbs_exp_create_qp_resp resp_exp; + struct ib_qp *parentqp = NULL; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + memset(&cmd_exp, 0, sizeof(cmd_exp)); - attr = kzalloc(sizeof *attr, GFP_KERNEL); - if (!attr) - return -ENOMEM; + ret = ucore->ops->copy_from(&cmd_exp, ucore, sizeof(cmd_exp)); + if (ret) + return ret; - xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &uobj); - if (!xrcd) { - kfree(attr); - return -EINVAL; - } + if (!disable_raw_qp_enforcement && + cmd_exp.qp_type == IB_QPT_RAW_PACKET && !priv_check(curthread, + PRIV_NET_RAW)) + return -EPERM; - attr->qp_state = cmd.qp_state; - attr->cur_qp_state = cmd.cur_qp_state; - attr->qp_access_flags = cmd.qp_access_flags; - attr->pkey_index = cmd.pkey_index; - attr->port_num = cmd.port_num; - attr->path_mtu = cmd.path_mtu; - attr->path_mig_state = cmd.path_mig_state; - attr->qkey = cmd.qkey; - attr->rq_psn = cmd.rq_psn; - attr->sq_psn = cmd.sq_psn; - attr->dest_qp_num = cmd.dest_qp_num; - attr->alt_pkey_index = cmd.alt_pkey_index; - attr->en_sqd_async_notify = cmd.en_sqd_async_notify; - attr->max_rd_atomic = cmd.max_rd_atomic; - attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic; - attr->min_rnr_timer = cmd.min_rnr_timer; - attr->port_num = cmd.port_num; - attr->timeout = cmd.timeout; - attr->retry_cnt = cmd.retry_cnt; - attr->rnr_retry = cmd.rnr_retry; - attr->alt_port_num = cmd.alt_port_num; - attr->alt_timeout = cmd.alt_timeout; + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return -ENOMEM; - memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16); - attr->ah_attr.grh.flow_label = cmd.dest.flow_label; - attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index; - attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit; - attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class; - attr->ah_attr.dlid = cmd.dest.dlid; - attr->ah_attr.sl = cmd.dest.sl; - attr->ah_attr.src_path_bits = cmd.dest.src_path_bits; - attr->ah_attr.static_rate = cmd.dest.static_rate; - attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0; - attr->ah_attr.port_num = cmd.dest.port_num; + init_uobj(&obj->uevent.uobject, cmd_exp.user_handle, file->ucontext, + &qp_lock_class); + down_write(&obj->uevent.uobject.mutex); - memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16); - attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label; - attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index; - attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit; - attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class; - attr->alt_ah_attr.dlid = cmd.alt_dest.dlid; - attr->alt_ah_attr.sl = cmd.alt_dest.sl; - attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits; - attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate; - attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0; - attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; - - err = xrcd->device->modify_xrc_rcv_qp(xrcd, cmd.qp_num, attr, cmd.attr_mask); - put_xrcd_read(uobj); - kfree(attr); - return err ? err : in_len; -} + if (cmd_exp.qp_type == IB_QPT_XRC_TGT) { + xrcd = idr_read_xrcd(cmd_exp.pd_handle, file->ucontext, &xrcd_uobj); + if (!xrcd) { + ret = -EINVAL; + goto err_put; + } + device = xrcd->device; + } else { + if (cmd_exp.qp_type == IB_QPT_XRC_INI) { + cmd_exp.max_recv_wr = 0; + cmd_exp.max_recv_sge = 0; + } else { + if (cmd_exp.is_srq) { + srq = idr_read_srq(cmd_exp.srq_handle, file->ucontext); + if (!srq || srq->srq_type != IB_SRQT_BASIC) { + ret = -EINVAL; + goto err_put; + } + } -ssize_t ib_uverbs_query_xrc_rcv_qp(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_query_xrc_rcv_qp cmd; - struct ib_uverbs_query_qp_resp resp; - struct ib_qp_attr *attr; - struct ib_qp_init_attr *init_attr; - struct ib_xrcd *xrcd; - struct ib_uobject *uobj; - int ret; + if (cmd_exp.recv_cq_handle != cmd_exp.send_cq_handle) { + rcq = idr_read_cq(cmd_exp.recv_cq_handle, file->ucontext, 0); + if (!rcq) { + ret = -EINVAL; + goto err_put; + } + } + } - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + scq = idr_read_cq(cmd_exp.send_cq_handle, file->ucontext, !!rcq); + rcq = rcq ?: scq; + pd = idr_read_pd(cmd_exp.pd_handle, file->ucontext); + if (!pd || !scq) { + ret = -EINVAL; + goto err_put; + } - attr = kmalloc(sizeof *attr, GFP_KERNEL); - init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL); - if (!attr || !init_attr) { - ret = -ENOMEM; - goto out; + device = pd->device; } - xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &uobj); - if (!xrcd) { - ret = -EINVAL; - goto out; + memset(&attr, 0, sizeof(attr)); + attr.event_handler = ib_uverbs_qp_event_handler; + attr.qp_context = file; + attr.send_cq = scq; + attr.recv_cq = rcq; + attr.srq = srq; + attr.xrcd = xrcd; + attr.sq_sig_type = cmd_exp.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; + attr.qp_type = cmd_exp.qp_type; + attr.create_flags = 0; + + attr.cap.max_send_wr = cmd_exp.max_send_wr; + attr.cap.max_recv_wr = cmd_exp.max_recv_wr; + attr.cap.max_send_sge = cmd_exp.max_send_sge; + attr.cap.max_recv_sge = cmd_exp.max_recv_sge; + attr.cap.max_inline_data = cmd_exp.max_inline_data; + + if (cmd_exp.comp_mask & IB_UVERBS_EXP_CREATE_QP_CAP_FLAGS) + attr.create_flags |= cmd_exp.qp_cap_flags & + (IB_QP_CREATE_CROSS_CHANNEL | + IB_QP_CREATE_MANAGED_SEND | + IB_QP_CREATE_MANAGED_RECV); + + if (cmd_exp.comp_mask & IB_UVERBS_EXP_CREATE_QP_QPG) { + struct ib_uverbs_qpg *qpg; + if (cmd_exp.qp_type != IB_QPT_RAW_PACKET && + cmd_exp.qp_type != IB_QPT_UD) { + ret = -EINVAL; + goto err_put; + } + qpg = &cmd_exp.qpg; + switch (qpg->qpg_type) { + case IB_QPG_PARENT: + attr.parent_attrib.rss_child_count = + qpg->parent_attrib.rss_child_count; + attr.parent_attrib.tss_child_count = + qpg->parent_attrib.tss_child_count; + break; + case IB_QPG_CHILD_RX: + case IB_QPG_CHILD_TX: + parentqp = idr_read_qp(qpg->parent_handle, + file->ucontext); + if (!parentqp) { + ret = -EINVAL; + goto err_put; + } + attr.qpg_parent = parentqp; + break; + default: + ret = -EINVAL; + goto err_put; + } + attr.qpg_type = qpg->qpg_type; } - ret = xrcd->device->query_xrc_rcv_qp(xrcd, cmd.qp_num, attr, - cmd.attr_mask, init_attr); + if (cmd_exp.comp_mask & IB_UVERBS_EXP_CREATE_QP_INL_RECV) + attr.max_inl_recv = cmd_exp.max_inl_recv; - put_xrcd_read(uobj); + obj->uevent.events_reported = 0; + INIT_LIST_HEAD(&obj->uevent.event_list); + INIT_LIST_HEAD(&obj->mcast_list); - if (ret) - goto out; + if (cmd_exp.qp_type == IB_QPT_XRC_TGT) + qp = ib_create_qp(pd, (struct ib_qp_init_attr *)&attr); + else + qp = device->exp_create_qp(pd, &attr, uhw); - memset(&resp, 0, sizeof resp); - resp.qp_state = attr->qp_state; - resp.cur_qp_state = attr->cur_qp_state; - resp.path_mtu = attr->path_mtu; - resp.path_mig_state = attr->path_mig_state; - resp.qkey = attr->qkey; - resp.rq_psn = attr->rq_psn; - resp.sq_psn = attr->sq_psn; - resp.dest_qp_num = attr->dest_qp_num; - resp.qp_access_flags = attr->qp_access_flags; - resp.pkey_index = attr->pkey_index; - resp.alt_pkey_index = attr->alt_pkey_index; - resp.sq_draining = attr->sq_draining; - resp.max_rd_atomic = attr->max_rd_atomic; - resp.max_dest_rd_atomic = attr->max_dest_rd_atomic; - resp.min_rnr_timer = attr->min_rnr_timer; - resp.port_num = attr->port_num; - resp.timeout = attr->timeout; - resp.retry_cnt = attr->retry_cnt; - resp.rnr_retry = attr->rnr_retry; - resp.alt_port_num = attr->alt_port_num; - resp.alt_timeout = attr->alt_timeout; + if (IS_ERR(qp)) { + ret = PTR_ERR(qp); + goto err_put; + } - memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16); - resp.dest.flow_label = attr->ah_attr.grh.flow_label; - resp.dest.sgid_index = attr->ah_attr.grh.sgid_index; - resp.dest.hop_limit = attr->ah_attr.grh.hop_limit; - resp.dest.traffic_class = attr->ah_attr.grh.traffic_class; - resp.dest.dlid = attr->ah_attr.dlid; - resp.dest.sl = attr->ah_attr.sl; - resp.dest.src_path_bits = attr->ah_attr.src_path_bits; - resp.dest.static_rate = attr->ah_attr.static_rate; - resp.dest.is_global = !!(attr->ah_attr.ah_flags & IB_AH_GRH); - resp.dest.port_num = attr->ah_attr.port_num; + if (cmd_exp.qp_type != IB_QPT_XRC_TGT) { + qp->real_qp = qp; + qp->device = device; + qp->pd = pd; + qp->send_cq = attr.send_cq; + qp->recv_cq = attr.recv_cq; + qp->srq = attr.srq; + qp->event_handler = attr.event_handler; + qp->qp_context = attr.qp_context; + qp->qp_type = attr.qp_type; + atomic_set(&qp->usecnt, 0); + atomic_inc(&pd->usecnt); + atomic_inc(&attr.send_cq->usecnt); + if (attr.recv_cq) + atomic_inc(&attr.recv_cq->usecnt); + if (attr.srq) + atomic_inc(&attr.srq->usecnt); + } + qp->uobject = &obj->uevent.uobject; - memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16); - resp.alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label; - resp.alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index; - resp.alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit; - resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class; - resp.alt_dest.dlid = attr->alt_ah_attr.dlid; - resp.alt_dest.sl = attr->alt_ah_attr.sl; - resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits; - resp.alt_dest.static_rate = attr->alt_ah_attr.static_rate; - resp.alt_dest.is_global = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH); - resp.alt_dest.port_num = attr->alt_ah_attr.port_num; + obj->uevent.uobject.object = qp; + ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); + if (ret) + goto err_destroy; - resp.max_send_wr = init_attr->cap.max_send_wr; - resp.max_recv_wr = init_attr->cap.max_recv_wr; - resp.max_send_sge = init_attr->cap.max_send_sge; - resp.max_recv_sge = init_attr->cap.max_recv_sge; - resp.max_inline_data = init_attr->cap.max_inline_data; - resp.sq_sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR; + memset(&resp_exp, 0, sizeof(resp_exp)); + resp_exp.qpn = qp->qp_num; + resp_exp.qp_handle = obj->uevent.uobject.id; + resp_exp.max_recv_sge = attr.cap.max_recv_sge; + resp_exp.max_send_sge = attr.cap.max_send_sge; + resp_exp.max_recv_wr = attr.cap.max_recv_wr; + resp_exp.max_send_wr = attr.cap.max_send_wr; + resp_exp.max_inline_data = attr.cap.max_inline_data; - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - ret = -EFAULT; + if (cmd_exp.comp_mask & IB_UVERBS_EXP_CREATE_QP_INL_RECV) { + resp_exp.comp_mask |= IB_UVERBS_EXP_CREATE_QP_RESP_INL_RECV; + resp_exp.max_inl_recv = attr.max_inl_recv; + } -out: - kfree(attr); - kfree(init_attr); + ret = ucore->ops->copy_to(ucore, &resp_exp, sizeof(resp_exp)); + if (ret) + goto err_copy; - return ret ? ret : in_len; -} + if (xrcd) { + obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); + atomic_inc(&obj->uxrcd->refcnt); + put_xrcd_read(xrcd_uobj); + } -ssize_t ib_uverbs_reg_xrc_rcv_qp(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_reg_xrc_rcv_qp cmd; - struct ib_uxrc_rcv_object *qp_obj, *tmp; - struct ib_xrcd *xrcd; - struct ib_uobject *uobj; - struct ib_uxrcd_object *xrcd_uobj; - int ret; + if (pd) + put_pd_read(pd); + if (scq) + put_cq_read(scq); + if (rcq && rcq != scq) + put_cq_read(rcq); + if (srq) + put_srq_read(srq); + if (parentqp) + put_qp_read(parentqp); - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + mutex_lock(&file->mutex); + list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); + mutex_unlock(&file->mutex); - qp_obj = kmalloc(sizeof *qp_obj, GFP_KERNEL); - if (!qp_obj) - return -ENOMEM; + obj->uevent.uobject.live = 1; - xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &uobj); - if (!xrcd) { - ret = -EINVAL; - goto err_out; - } + up_write(&obj->uevent.uobject.mutex); - ret = xrcd->device->reg_xrc_rcv_qp(xrcd, file, cmd.qp_num); - if (ret) - goto err_put; + return ucore->inlen + uhw->inlen; - xrcd_uobj = container_of(uobj, struct ib_uxrcd_object, uobject); - mutex_lock(&file->device->ib_dev->xrcd_table_mutex); - list_for_each_entry(tmp, &xrcd_uobj->xrc_reg_qp_list, list) - if (cmd.qp_num == tmp->qp_num) { - kfree(qp_obj); - mutex_unlock(&file->device->ib_dev->xrcd_table_mutex); - put_xrcd_read(uobj); - return in_len; - } - qp_obj->qp_num = cmd.qp_num; - qp_obj->domain_handle = cmd.xrc_domain_handle; - list_add_tail(&qp_obj->list, &xrcd_uobj->xrc_reg_qp_list); - mutex_unlock(&file->device->ib_dev->xrcd_table_mutex); - atomic_inc(&xrcd->usecnt); - put_xrcd_read(uobj); - return in_len; +err_copy: + idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); + +err_destroy: + ib_destroy_qp(qp); err_put: - put_xrcd_read(uobj); -err_out: + if (xrcd) + put_xrcd_read(xrcd_uobj); + if (pd) + put_pd_read(pd); + if (scq) + put_cq_read(scq); + if (rcq && rcq != scq) + put_cq_read(rcq); + if (srq) + put_srq_read(srq); + if (parentqp) + put_qp_read(parentqp); - kfree(qp_obj); + put_uobj_write(&obj->uevent.uobject); return ret; } -int ib_uverbs_cleanup_xrc_rcv_qp(struct ib_uverbs_file *file, - struct ib_xrcd *xrcd, u32 qp_num) -{ - int err; - err = xrcd->device->unreg_xrc_rcv_qp(xrcd, file, qp_num); - if (!err) - atomic_dec(&xrcd->usecnt); - return err; -} - -ssize_t ib_uverbs_unreg_xrc_rcv_qp(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +int ib_exp_query_device(struct ib_device *device, + struct ib_exp_device_attr *device_attr) { - struct ib_uverbs_unreg_xrc_rcv_qp cmd; - struct ib_uxrc_rcv_object *qp_obj, *tmp; - struct ib_xrcd *xrcd; - struct ib_uobject *uobj; - struct ib_uxrcd_object *xrcd_uobj; - int ret; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &uobj); - if (!xrcd) - return -EINVAL; - - ret = xrcd->device->unreg_xrc_rcv_qp(xrcd, file, cmd.qp_num); - if (ret) { - put_xrcd_read(uobj); - return -EINVAL; - } - atomic_dec(&xrcd->usecnt); - - xrcd_uobj = container_of(uobj, struct ib_uxrcd_object, uobject); - mutex_lock(&file->device->ib_dev->xrcd_table_mutex); - list_for_each_entry_safe(qp_obj, tmp, &xrcd_uobj->xrc_reg_qp_list, list) - if (cmd.qp_num == qp_obj->qp_num) { - list_del(&qp_obj->list); - kfree(qp_obj); - break; - } - mutex_unlock(&file->device->ib_dev->xrcd_table_mutex); - put_xrcd_read(uobj); - return in_len; + return device->exp_query_device(device, device_attr); } +EXPORT_SYMBOL(ib_exp_query_device); diff --git a/sys/ofed/drivers/infiniband/core/uverbs_main.c b/sys/ofed/drivers/infiniband/core/uverbs_main.c index 30b9259..12bc0d3 100644 --- a/sys/ofed/drivers/infiniband/core/uverbs_main.c +++ b/sys/ofed/drivers/infiniband/core/uverbs_main.c @@ -39,8 +39,13 @@ #include <linux/err.h> #include <linux/fs.h> #include <linux/poll.h> +#include <linux/sched.h> #include <linux/file.h> #include <linux/cdev.h> +#include <linux/slab.h> +#include <linux/ktime.h> +#include <linux/rbtree.h> +#include <linux/math64.h> #include <asm/uaccess.h> @@ -50,8 +55,6 @@ MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand userspace verbs access"); MODULE_LICENSE("Dual BSD/GPL"); -#define INFINIBANDEVENTFS_MAGIC 0x49426576 /* "IBev" */ - enum { IB_UVERBS_MAJOR = 231, IB_UVERBS_BASE_MINOR = 192, @@ -60,6 +63,31 @@ enum { #define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR) +static int uverbs_copy_from_udata_ex(void *dest, struct ib_udata *udata, size_t len) +{ + return copy_from_user(dest, udata->inbuf, min(udata->inlen, len)) ? -EFAULT : 0; +} + +static int uverbs_copy_to_udata_ex(struct ib_udata *udata, void *src, size_t len) +{ + return copy_to_user(udata->outbuf, src, min(udata->outlen, len)) ? -EFAULT : 0; +} + +static struct ib_udata_ops uverbs_copy_ex = { + .copy_from = uverbs_copy_from_udata_ex, + .copy_to = uverbs_copy_to_udata_ex +}; + +#define INIT_UDATA_EX(udata, ibuf, obuf, ilen, olen) \ + do { \ + (udata)->ops = &uverbs_copy_ex; \ + (udata)->inbuf = (void __user *)(ibuf); \ + (udata)->outbuf = (void __user *)(obuf); \ + (udata)->inlen = (ilen); \ + (udata)->outlen = (olen); \ + } while (0) + + static struct class *uverbs_class; DEFINE_SPINLOCK(ib_uverbs_idr_lock); @@ -70,10 +98,11 @@ DEFINE_IDR(ib_uverbs_ah_idr); DEFINE_IDR(ib_uverbs_cq_idr); DEFINE_IDR(ib_uverbs_qp_idr); DEFINE_IDR(ib_uverbs_srq_idr); -DEFINE_IDR(ib_uverbs_xrc_domain_idr); +DEFINE_IDR(ib_uverbs_xrcd_idr); +DEFINE_IDR(ib_uverbs_rule_idr); +DEFINE_IDR(ib_uverbs_dct_idr); -static spinlock_t map_lock; -static struct ib_uverbs_device *dev_table[IB_UVERBS_MAX_DEVICES]; +static DEFINE_SPINLOCK(map_lock); static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, @@ -86,6 +115,8 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, + [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw, + [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw, [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel, [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq, @@ -107,20 +138,31 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq, [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, - [IB_USER_VERBS_CMD_CREATE_XRC_SRQ] = ib_uverbs_create_xrc_srq, - [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrc_domain, - [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrc_domain, - [IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP] = ib_uverbs_create_xrc_rcv_qp, - [IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP] = ib_uverbs_modify_xrc_rcv_qp, - [IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP] = ib_uverbs_query_xrc_rcv_qp, - [IB_USER_VERBS_CMD_REG_XRC_RCV_QP] = ib_uverbs_reg_xrc_rcv_qp, - [IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP] = ib_uverbs_unreg_xrc_rcv_qp, + [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd, + [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd, + [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq, + [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp, }; -#ifdef __linux__ -/* BSD Does not require a fake mountpoint for all files. */ -static struct vfsmount *uverbs_event_mnt; -#endif +static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, + struct ib_udata *ucore, + struct ib_udata *uhw) = { + [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow, + [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow, +}; + +static ssize_t (*uverbs_exp_cmd_table[])(struct ib_uverbs_file *file, + struct ib_udata *ucore, + struct ib_udata *uhw) = { + [IB_USER_VERBS_EXP_CMD_CREATE_QP] = ib_uverbs_exp_create_qp, + [IB_USER_VERBS_EXP_CMD_MODIFY_CQ] = ib_uverbs_exp_modify_cq, + [IB_USER_VERBS_EXP_CMD_MODIFY_QP] = ib_uverbs_exp_modify_qp, + [IB_USER_VERBS_EXP_CMD_CREATE_CQ] = ib_uverbs_exp_create_cq, + [IB_USER_VERBS_EXP_CMD_QUERY_DEVICE] = ib_uverbs_exp_query_device, + [IB_USER_VERBS_EXP_CMD_CREATE_DCT] = ib_uverbs_exp_create_dct, + [IB_USER_VERBS_EXP_CMD_DESTROY_DCT] = ib_uverbs_exp_destroy_dct, + [IB_USER_VERBS_EXP_CMD_QUERY_DCT] = ib_uverbs_exp_query_dct, +}; static void ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device); @@ -195,6 +237,7 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, struct ib_ucontext *context) { struct ib_uobject *uobj, *tmp; + int err; if (!context) return 0; @@ -209,18 +252,55 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, kfree(uobj); } + /* Remove MWs before QPs, in order to support type 2A MWs. */ + list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) { + struct ib_mw *mw = uobj->object; + + idr_remove_uobj(&ib_uverbs_mw_idr, uobj); + err = ib_dealloc_mw(mw); + if (err) { + pr_info("user_verbs: couldn't deallocate MW during cleanup.\n"); + pr_info("user_verbs: the system may have become unstable.\n"); + } + kfree(uobj); + } + list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) { + struct ib_flow *flow_id = uobj->object; + + idr_remove_uobj(&ib_uverbs_rule_idr, uobj); + ib_destroy_flow(flow_id); + kfree(uobj); + } + list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { struct ib_qp *qp = uobj->object; struct ib_uqp_object *uqp = container_of(uobj, struct ib_uqp_object, uevent.uobject); idr_remove_uobj(&ib_uverbs_qp_idr, uobj); + ib_uverbs_detach_umcast(qp, uqp); - ib_destroy_qp(qp); + err = ib_destroy_qp(qp); + if (err) + pr_info("destroying uverbs qp failed: err %d\n", err); + ib_uverbs_release_uevent(file, &uqp->uevent); kfree(uqp); } + list_for_each_entry_safe(uobj, tmp, &context->dct_list, list) { + struct ib_dct *dct = uobj->object; + struct ib_udct_object *udct = + container_of(uobj, struct ib_udct_object, uobject); + + idr_remove_uobj(&ib_uverbs_dct_idr, uobj); + + err = ib_destroy_dct(dct); + if (err) + pr_info("destroying uverbs dct failed: err %d\n", err); + + kfree(udct); + } list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { struct ib_srq *srq = uobj->object; @@ -228,7 +308,9 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, container_of(uobj, struct ib_uevent_object, uobject); idr_remove_uobj(&ib_uverbs_srq_idr, uobj); - ib_destroy_srq(srq); + err = ib_destroy_srq(srq); + if (err) + pr_info("destroying uverbs srq failed: err %d\n", err); ib_uverbs_release_uevent(file, uevent); kfree(uevent); } @@ -240,41 +322,37 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, container_of(uobj, struct ib_ucq_object, uobject); idr_remove_uobj(&ib_uverbs_cq_idr, uobj); - ib_destroy_cq(cq); + err = ib_destroy_cq(cq); + if (err) + pr_info("destroying uverbs cq failed: err %d\n", err); + ib_uverbs_release_ucq(file, ev_file, ucq); kfree(ucq); } - /* XXX Free MWs */ - list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { struct ib_mr *mr = uobj->object; idr_remove_uobj(&ib_uverbs_mr_idr, uobj); - ib_dereg_mr(mr); + err = ib_dereg_mr(mr); + if (err) { + pr_info("user_verbs: couldn't deregister an MR during cleanup.\n"); + pr_info("user_verbs: the system may have become unstable.\n"); + } kfree(uobj); } - mutex_lock(&file->device->ib_dev->xrcd_table_mutex); + mutex_lock(&file->device->xrcd_tree_mutex); list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) { struct ib_xrcd *xrcd = uobj->object; - struct ib_uxrc_rcv_object *xrc_qp_obj, *tmp1; - struct ib_uxrcd_object *xrcd_uobj = + struct ib_uxrcd_object *uxrcd = container_of(uobj, struct ib_uxrcd_object, uobject); - list_for_each_entry_safe(xrc_qp_obj, tmp1, - &xrcd_uobj->xrc_reg_qp_list, list) { - list_del(&xrc_qp_obj->list); - ib_uverbs_cleanup_xrc_rcv_qp(file, xrcd, - xrc_qp_obj->qp_num); - kfree(xrc_qp_obj); - } - - idr_remove_uobj(&ib_uverbs_xrc_domain_idr, uobj); - ib_uverbs_dealloc_xrcd(file->device->ib_dev, xrcd); - kfree(uobj); + idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj); + ib_uverbs_dealloc_xrcd(file->device, xrcd); + kfree(uxrcd); } - mutex_unlock(&file->device->ib_dev->xrcd_table_mutex); + mutex_unlock(&file->device->xrcd_tree_mutex); list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { struct ib_pd *pd = uobj->object; @@ -405,7 +483,8 @@ static const struct file_operations uverbs_event_fops = { .read = ib_uverbs_event_read, .poll = ib_uverbs_event_poll, .release = ib_uverbs_event_close, - .fasync = ib_uverbs_event_fasync + .fasync = ib_uverbs_event_fasync, + .llseek = no_llseek, }; void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) @@ -524,21 +603,13 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler, NULL, NULL); } -void ib_uverbs_xrc_rcv_qp_event_handler(struct ib_event *event, - void *context_ptr) -{ - ib_uverbs_async_handler(context_ptr, event->element.xrc_qp_num, - event->event, NULL, NULL); -} - struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, - int is_async, int *fd) + int is_async) { struct ib_uverbs_event_file *ev_file; struct file *filp; - int ret; - ev_file = kmalloc(sizeof *ev_file, GFP_KERNEL); + ev_file = kzalloc(sizeof *ev_file, GFP_KERNEL); if (!ev_file) return ERR_PTR(-ENOMEM); @@ -547,43 +618,22 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, INIT_LIST_HEAD(&ev_file->event_list); init_waitqueue_head(&ev_file->poll_wait); ev_file->uverbs_file = uverbs_file; - ev_file->async_queue = NULL; ev_file->is_async = is_async; - ev_file->is_closed = 0; - ev_file->filp = NULL; - - *fd = get_unused_fd(); - if (*fd < 0) { - ret = *fd; - goto err; - } /* * fops_get() can't fail here, because we're coming from a * system call on a uverbs file, which will already have a * module reference. */ -#ifdef __linux__ - filp = alloc_file(uverbs_event_mnt, dget(uverbs_event_mnt->mnt_root), - FMODE_READ, fops_get(&uverbs_event_fops)); -#else filp = alloc_file(FMODE_READ, fops_get(&uverbs_event_fops)); -#endif - if (!filp) { - ret = -ENFILE; - goto err_fd; - } + if (IS_ERR(filp)) { + kfree(ev_file); + } else { filp->private_data = ev_file; + } return filp; - -err_fd: - put_unused_fd(*fd); - -err: - kfree(ev_file); - return ERR_PTR(ret); } /* @@ -594,16 +644,15 @@ err: struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) { struct ib_uverbs_event_file *ev_file = NULL; - struct file *filp; + struct fd f = fdget(fd); - filp = fget(fd); - if (!filp) + if (!f.file) return NULL; - if (filp->f_op != &uverbs_event_fops) + if (f.file->f_op != &uverbs_event_fops) goto out; - ev_file = filp->private_data; + ev_file = f.file->private_data; if (ev_file->is_async) { ev_file = NULL; goto out; @@ -612,15 +661,225 @@ struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) kref_get(&ev_file->ref); out: - fput(filp); + fdput(f); return ev_file; } +static const char *verbs_cmd_str(__u32 cmd) +{ + switch (cmd) { + case IB_USER_VERBS_CMD_GET_CONTEXT: + return "GET_CONTEXT"; + case IB_USER_VERBS_CMD_QUERY_DEVICE: + return "QUERY_DEVICE"; + case IB_USER_VERBS_CMD_QUERY_PORT: + return "QUERY_PORT"; + case IB_USER_VERBS_CMD_ALLOC_PD: + return "ALLOC_PD"; + case IB_USER_VERBS_CMD_DEALLOC_PD: + return "DEALLOC_PD"; + case IB_USER_VERBS_CMD_REG_MR: + return "REG_MR"; + case IB_USER_VERBS_CMD_DEREG_MR: + return "DEREG_MR"; + case IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL: + return "CREATE_COMP_CHANNEL"; + case IB_USER_VERBS_CMD_CREATE_CQ: + return "CREATE_CQ"; + case IB_USER_VERBS_CMD_RESIZE_CQ: + return "RESIZE_CQ"; + case IB_USER_VERBS_CMD_POLL_CQ: + return "POLL_CQ"; + case IB_USER_VERBS_CMD_REQ_NOTIFY_CQ: + return "REQ_NOTIFY_CQ"; + case IB_USER_VERBS_CMD_DESTROY_CQ: + return "DESTROY_CQ"; + case IB_USER_VERBS_CMD_CREATE_QP: + return "CREATE_QP"; + case IB_USER_VERBS_CMD_QUERY_QP: + return "QUERY_QP"; + case IB_USER_VERBS_CMD_MODIFY_QP: + return "MODIFY_QP"; + case IB_USER_VERBS_CMD_DESTROY_QP: + return "DESTROY_QP"; + case IB_USER_VERBS_CMD_POST_SEND: + return "POST_SEND"; + case IB_USER_VERBS_CMD_POST_RECV: + return "POST_RECV"; + case IB_USER_VERBS_CMD_POST_SRQ_RECV: + return "POST_SRQ_RECV"; + case IB_USER_VERBS_CMD_CREATE_AH: + return "CREATE_AH"; + case IB_USER_VERBS_CMD_DESTROY_AH: + return "DESTROY_AH"; + case IB_USER_VERBS_CMD_ATTACH_MCAST: + return "ATTACH_MCAST"; + case IB_USER_VERBS_CMD_DETACH_MCAST: + return "DETACH_MCAST"; + case IB_USER_VERBS_CMD_CREATE_SRQ: + return "CREATE_SRQ"; + case IB_USER_VERBS_CMD_MODIFY_SRQ: + return "MODIFY_SRQ"; + case IB_USER_VERBS_CMD_QUERY_SRQ: + return "QUERY_SRQ"; + case IB_USER_VERBS_CMD_DESTROY_SRQ: + return "DESTROY_SRQ"; + case IB_USER_VERBS_CMD_OPEN_XRCD: + return "OPEN_XRCD"; + case IB_USER_VERBS_CMD_CLOSE_XRCD: + return "CLOSE_XRCD"; + case IB_USER_VERBS_CMD_CREATE_XSRQ: + return "CREATE_XSRQ"; + case IB_USER_VERBS_CMD_OPEN_QP: + return "OPEN_QP"; + } + + return "Unknown command"; +} + +enum { + COMMAND_INFO_MASK = 0x1000, +}; + +static ssize_t ib_uverbs_exp_handle_cmd(struct ib_uverbs_file *file, + const char __user *buf, + struct ib_device *dev, + struct ib_uverbs_cmd_hdr *hdr, + size_t count, + int legacy_ex_cmd) +{ + struct ib_udata ucore; + struct ib_udata uhw; + struct ib_uverbs_ex_cmd_hdr ex_hdr; + __u32 command = hdr->command - IB_USER_VERBS_EXP_CMD_FIRST; + + if (hdr->command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK | + IB_USER_VERBS_CMD_COMMAND_MASK)) + return -EINVAL; + + if (command >= ARRAY_SIZE(uverbs_exp_cmd_table) || + !uverbs_exp_cmd_table[command]) + return -EINVAL; + + if (!file->ucontext) + return -EINVAL; + + if (!(dev->uverbs_exp_cmd_mask & (1ull << command))) + return -ENOSYS; + + if (legacy_ex_cmd) { + struct ib_uverbs_ex_cmd_hdr_legacy hxl; + struct ib_uverbs_ex_cmd_resp1_legacy resp1; + __u64 response; + ssize_t ret; + + if (count < sizeof(hxl)) + return -EINVAL; + + if (copy_from_user(&hxl, buf, sizeof(hxl))) + return -EFAULT; + + if (((hxl.in_words + hxl.provider_in_words) * 4) != count) + return -EINVAL; + + count -= sizeof(hxl); + buf += sizeof(hxl); + if (hxl.out_words || hxl.provider_out_words) { + if (count < sizeof(resp1)) + return -EINVAL; + if (copy_from_user(&resp1, buf, sizeof(resp1))) + return -EFAULT; + response = resp1.response; + if (!response) + return -EINVAL; + + /* + * Change user buffer to comply with new extension format. + */ + if (sizeof(resp1.comp_mask) != sizeof(resp1.response)) + return -EFAULT; + buf += sizeof(resp1.comp_mask); + if (copy_to_user(__DECONST(void __user *, buf), &resp1.comp_mask, + sizeof(resp1.response))) + return -EFAULT; + + } else { + response = 0; + } + + INIT_UDATA_EX(&ucore, + (hxl.in_words) ? buf : 0, + response, + hxl.in_words * 4, + hxl.out_words * 4); + + INIT_UDATA_EX(&uhw, + (hxl.provider_in_words) ? buf + ucore.inlen : 0, + (hxl.provider_out_words) ? response + ucore.outlen : 0, + hxl.provider_in_words * 4, + hxl.provider_out_words * 4); + + ret = uverbs_exp_cmd_table[command](file, &ucore, &uhw); + /* + * UnChange user buffer + */ + if (response && copy_to_user(__DECONST(void __user *, buf), &resp1.response, sizeof(resp1.response))) + return -EFAULT; + + return ret; + } else { + if (count < (sizeof(hdr) + sizeof(ex_hdr))) + return -EINVAL; + + if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) + return -EFAULT; + + buf += sizeof(hdr) + sizeof(ex_hdr); + + if ((hdr->in_words + ex_hdr.provider_in_words) * 8 != count) + return -EINVAL; + + if (ex_hdr.response) { + if (!hdr->out_words && !ex_hdr.provider_out_words) + return -EINVAL; + } else { + if (hdr->out_words || ex_hdr.provider_out_words) + return -EINVAL; + } + + INIT_UDATA_EX(&ucore, + (hdr->in_words) ? buf : 0, + (unsigned long)ex_hdr.response, + hdr->in_words * 8, + hdr->out_words * 8); + + INIT_UDATA_EX(&uhw, + (ex_hdr.provider_in_words) ? buf + ucore.inlen : 0, + (ex_hdr.provider_out_words) ? ex_hdr.response + ucore.outlen : 0, + ex_hdr.provider_in_words * 8, + ex_hdr.provider_out_words * 8); + + return uverbs_exp_cmd_table[command](file, &ucore, &uhw); + } +} + static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct ib_uverbs_file *file = filp->private_data; + struct ib_device *dev = file->device->ib_dev; struct ib_uverbs_cmd_hdr hdr; + struct timespec ts1; + struct timespec ts2; + ktime_t t1, t2, delta; + s64 ds; + ssize_t ret; + u64 dividend; + u32 divisor; + __u32 flags; + __u32 command; + int legacy_ex_cmd = 0; + size_t written_count = count; if (count < sizeof hdr) return -EINVAL; @@ -628,20 +887,126 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, if (copy_from_user(&hdr, buf, sizeof hdr)) return -EFAULT; + /* + * For BWD compatibility change old style extension verbs commands + * to their equivalent experimental command. + */ + if ((hdr.command >= IB_USER_VERBS_LEGACY_CMD_FIRST) && + (hdr.command <= IB_USER_VERBS_LEGACY_EX_CMD_LAST)) { + hdr.command += IB_USER_VERBS_EXP_CMD_FIRST - + IB_USER_VERBS_LEGACY_CMD_FIRST; + legacy_ex_cmd = 1; + } + + flags = (hdr.command & + IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT; + command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK; + + ktime_get_ts(&ts1); + if (!flags && (command >= IB_USER_VERBS_EXP_CMD_FIRST)) { + ret = ib_uverbs_exp_handle_cmd(file, buf, dev, &hdr, count, legacy_ex_cmd); + } else if (!flags) { + if (command >= ARRAY_SIZE(uverbs_cmd_table) || + !uverbs_cmd_table[command]) + return -EINVAL; + + if (!file->ucontext && + command != IB_USER_VERBS_CMD_GET_CONTEXT) + return -EINVAL; + + if (!(dev->uverbs_cmd_mask & (1ull << command))) + return -ENOSYS; + if (hdr.in_words * 4 != count) return -EINVAL; - if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) || - !uverbs_cmd_table[hdr.command] || - !(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command))) + ret = uverbs_cmd_table[command](file, + buf + sizeof(hdr), + hdr.in_words * 4, + hdr.out_words * 4); + } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) { + struct ib_udata ucore; + struct ib_udata uhw; + struct ib_uverbs_ex_cmd_hdr ex_hdr; + + if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK | + IB_USER_VERBS_CMD_COMMAND_MASK)) return -EINVAL; - if (!file->ucontext && - hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT) + if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) || + !uverbs_ex_cmd_table[command]) + return -EINVAL; + + if (!file->ucontext) + return -EINVAL; + + if (!(dev->uverbs_ex_cmd_mask & (1ull << command))) + return -ENOSYS; + + if (count < (sizeof(hdr) + sizeof(ex_hdr))) + return -EINVAL; + + if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) + return -EFAULT; + + count -= sizeof(hdr) + sizeof(ex_hdr); + buf += sizeof(hdr) + sizeof(ex_hdr); + + if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count) + return -EINVAL; + + if (ex_hdr.response) { + if (!hdr.out_words && !ex_hdr.provider_out_words) + return -EINVAL; + } else { + if (hdr.out_words || ex_hdr.provider_out_words) return -EINVAL; + } + + INIT_UDATA_EX(&ucore, + (hdr.in_words) ? buf : 0, + (unsigned long)ex_hdr.response, + hdr.in_words * 8, + hdr.out_words * 8); + + INIT_UDATA_EX(&uhw, + (ex_hdr.provider_in_words) ? buf + ucore.inlen : 0, + (ex_hdr.provider_out_words) ? ex_hdr.response + ucore.outlen : 0, + ex_hdr.provider_in_words * 8, + ex_hdr.provider_out_words * 8); + + ret = uverbs_ex_cmd_table[command](file, &ucore, &uhw); + + if (ret) + return ret; + + return written_count; + + } else { + return -EFAULT; + } - return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr, - hdr.in_words * 4, hdr.out_words * 4); + if ((dev->cmd_perf & (COMMAND_INFO_MASK - 1)) == hdr.command) { + ktime_get_ts(&ts2); + t1 = timespec_to_ktime(ts1); + t2 = timespec_to_ktime(ts2); + delta = ktime_sub(t2, t1); + ds = ktime_to_ns(delta); + spin_lock(&dev->cmd_perf_lock); + dividend = dev->cmd_avg * dev->cmd_n + ds; + ++dev->cmd_n; + divisor = dev->cmd_n; + do_div(dividend, divisor); + dev->cmd_avg = dividend; + spin_unlock(&dev->cmd_perf_lock); + if (dev->cmd_perf & COMMAND_INFO_MASK) { + pr_info("%s: %s execution time = %lld nsec\n", + file->device->ib_dev->name, + verbs_cmd_str(hdr.command), + (long long)ds); + } + } + return ret; } static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) @@ -653,18 +1018,51 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) else return file->device->ib_dev->mmap(file->ucontext, vma); } +/* XXX Not supported in FreeBSD */ +#if 0 +static unsigned long ib_uverbs_get_unmapped_area(struct file *filp, + unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct ib_uverbs_file *file = filp->private_data; + + if (!file->ucontext) + return -ENODEV; + else { + if (!file->device->ib_dev->get_unmapped_area) + return current->mm->get_unmapped_area(filp, addr, len, + pgoff, flags); + + return file->device->ib_dev->get_unmapped_area(filp, addr, len, + pgoff, flags); + } +} +#endif + +static long ib_uverbs_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct ib_uverbs_file *file = filp->private_data; + + if (!file->device->ib_dev->ioctl) + return -ENOTSUPP; + + if (!file->ucontext) + return -ENODEV; + else + /* provider should provide it's own locking mechanism */ + return file->device->ib_dev->ioctl(file->ucontext, cmd, arg); +} /* * ib_uverbs_open() does not need the BKL: * - * - dev_table[] accesses are protected by map_lock, the - * ib_uverbs_device structures are properly reference counted, and + * - the ib_uverbs_device structures are properly reference counted and * everything else is purely local to the file being created, so * races against other open calls are not a problem; * - there is no ioctl method to race against; - * - the device is added to dev_table[] as the last part of module - * initialization, the open method will either immediately run - * -ENXIO, or all required initialization will be done. + * - the open method will either immediately run -ENXIO, or all + * required initialization will be done. */ static int ib_uverbs_open(struct inode *inode, struct file *filp) { @@ -672,13 +1070,10 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) struct ib_uverbs_file *file; int ret; - spin_lock(&map_lock); - dev = dev_table[iminor(inode) - IB_UVERBS_BASE_MINOR]; + dev = container_of(inode->i_cdev->si_drv1, struct ib_uverbs_device, cdev); if (dev) kref_get(&dev->ref); - spin_unlock(&map_lock); - - if (!dev) + else return -ENXIO; if (!try_module_get(dev->ib_dev->owner)) { @@ -700,7 +1095,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) filp->private_data = file; - return 0; + return nonseekable_open(inode, filp); err_module: module_put(dev->ib_dev->owner); @@ -728,7 +1123,9 @@ static const struct file_operations uverbs_fops = { .owner = THIS_MODULE, .write = ib_uverbs_write, .open = ib_uverbs_open, - .release = ib_uverbs_close + .release = ib_uverbs_close, + .llseek = no_llseek, + .unlocked_ioctl = ib_uverbs_ioctl, }; static const struct file_operations uverbs_mmap_fops = { @@ -736,7 +1133,13 @@ static const struct file_operations uverbs_mmap_fops = { .write = ib_uverbs_write, .mmap = ib_uverbs_mmap, .open = ib_uverbs_open, - .release = ib_uverbs_close + .release = ib_uverbs_close, + .llseek = no_llseek, +/* XXX Not supported in FreeBSD */ +#if 0 + .get_unmapped_area = ib_uverbs_get_unmapped_area, +#endif + .unlocked_ioctl = ib_uverbs_ioctl, }; static struct ib_client uverbs_client = { @@ -757,6 +1160,18 @@ static ssize_t show_ibdev(struct device *device, struct device_attribute *attr, } static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); +static ssize_t show_dev_ref_cnt(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct ib_uverbs_device *dev = dev_get_drvdata(device); + + if (!dev) + return -ENODEV; + + return sprintf(buf, "%d\n", dev->ref.count); +} +static DEVICE_ATTR(ref_cnt, S_IRUGO, show_dev_ref_cnt, NULL); + static ssize_t show_dev_abi_version(struct device *device, struct device_attribute *attr, char *buf) { @@ -773,8 +1188,36 @@ static ssize_t show_abi_version(struct class *class, struct class_attribute *att { return sprintf(buf, "%d\n", IB_USER_VERBS_ABI_VERSION); } + static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); +static dev_t overflow_maj; +static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES); + +/* + * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by + * requesting a new major number and doubling the number of max devices we + * support. It's stupid, but simple. + */ +static int find_overflow_devnum(void) +{ + int ret; + + if (!overflow_maj) { + ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES, + "infiniband_verbs"); + if (ret) { + printk(KERN_ERR "user_verbs: couldn't register dynamic device number\n"); + return ret; + } + } + + ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES); + if (ret >= IB_UVERBS_MAX_DEVICES) + return -1; + + return ret; +} #include <linux/pci.h> static ssize_t @@ -801,6 +1244,7 @@ show_dev_vendor(struct device *device, struct device_attribute *attr, char *buf) return sprintf(buf, "0x%04x\n", ((struct pci_dev *)dev->ib_dev->dma_device)->vendor); } + static DEVICE_ATTR(vendor, S_IRUGO, show_dev_vendor, NULL); struct attribute *device_attrs[] = @@ -817,6 +1261,8 @@ static struct attribute_group device_group = { static void ib_uverbs_add_one(struct ib_device *device) { + int devnum; + dev_t base; struct ib_uverbs_device *uverbs_dev; if (!device->alloc_ucontext) @@ -828,55 +1274,66 @@ static void ib_uverbs_add_one(struct ib_device *device) kref_init(&uverbs_dev->ref); init_completion(&uverbs_dev->comp); + uverbs_dev->xrcd_tree = RB_ROOT; + mutex_init(&uverbs_dev->xrcd_tree_mutex); spin_lock(&map_lock); - uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); - if (uverbs_dev->devnum >= IB_UVERBS_MAX_DEVICES) { + devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); + if (devnum >= IB_UVERBS_MAX_DEVICES) { spin_unlock(&map_lock); + devnum = find_overflow_devnum(); + if (devnum < 0) goto err; + + spin_lock(&map_lock); + uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES; + base = devnum + overflow_maj; + set_bit(devnum, overflow_map); + } else { + uverbs_dev->devnum = devnum; + base = devnum + IB_UVERBS_BASE_DEV; + set_bit(devnum, dev_map); } - set_bit(uverbs_dev->devnum, dev_map); spin_unlock(&map_lock); uverbs_dev->ib_dev = device; uverbs_dev->num_comp_vectors = device->num_comp_vectors; - uverbs_dev->cdev = cdev_alloc(); - if (!uverbs_dev->cdev) - goto err; - uverbs_dev->cdev->owner = THIS_MODULE; - uverbs_dev->cdev->ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; - kobject_set_name(&uverbs_dev->cdev->kobj, "uverbs%d", uverbs_dev->devnum); - if (cdev_add(uverbs_dev->cdev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1)) + cdev_init(&uverbs_dev->cdev, NULL); + uverbs_dev->cdev.owner = THIS_MODULE; + uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; + kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum); + if (cdev_add(&uverbs_dev->cdev, base, 1)) goto err_cdev; uverbs_dev->dev = device_create(uverbs_class, device->dma_device, - uverbs_dev->cdev->dev, uverbs_dev, + uverbs_dev->cdev.dev, uverbs_dev, "uverbs%d", uverbs_dev->devnum); if (IS_ERR(uverbs_dev->dev)) goto err_cdev; if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev)) goto err_class; + if (device_create_file(uverbs_dev->dev, &dev_attr_ref_cnt)) + goto err_class; if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version)) goto err_class; if (sysfs_create_group(&uverbs_dev->dev->kobj, &device_group)) goto err_class; - spin_lock(&map_lock); - dev_table[uverbs_dev->devnum] = uverbs_dev; - spin_unlock(&map_lock); - ib_set_client_data(device, &uverbs_client, uverbs_dev); return; err_class: - device_destroy(uverbs_class, uverbs_dev->cdev->dev); + device_destroy(uverbs_class, uverbs_dev->cdev.dev); err_cdev: - cdev_del(uverbs_dev->cdev); - clear_bit(uverbs_dev->devnum, dev_map); + cdev_del(&uverbs_dev->cdev); + if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES) + clear_bit(devnum, dev_map); + else + clear_bit(devnum, overflow_map); err: kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); @@ -894,42 +1351,30 @@ static void ib_uverbs_remove_one(struct ib_device *device) sysfs_remove_group(&uverbs_dev->dev->kobj, &device_group); dev_set_drvdata(uverbs_dev->dev, NULL); - device_destroy(uverbs_class, uverbs_dev->cdev->dev); - cdev_del(uverbs_dev->cdev); - - spin_lock(&map_lock); - dev_table[uverbs_dev->devnum] = NULL; - spin_unlock(&map_lock); + device_destroy(uverbs_class, uverbs_dev->cdev.dev); + cdev_del(&uverbs_dev->cdev); + if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES) clear_bit(uverbs_dev->devnum, dev_map); + else + clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map); kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); wait_for_completion(&uverbs_dev->comp); kfree(uverbs_dev); } -#ifdef __linux__ -static int uverbs_event_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, - struct vfsmount *mnt) + +static char *uverbs_devnode(struct device *dev, umode_t *mode) { - return get_sb_pseudo(fs_type, "infinibandevent:", NULL, - INFINIBANDEVENTFS_MAGIC, mnt); + if (mode) + *mode = 0666; + return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); } -static struct file_system_type uverbs_event_fs = { - /* No owner field so module can be unloaded */ - .name = "infinibandeventfs", - .get_sb = uverbs_event_get_sb, - .kill_sb = kill_litter_super -}; -#endif - static int __init ib_uverbs_init(void) { int ret; - spin_lock_init(&map_lock); - ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES, "infiniband_verbs"); if (ret) { @@ -944,43 +1389,22 @@ static int __init ib_uverbs_init(void) goto out_chrdev; } + uverbs_class->devnode = uverbs_devnode; + ret = class_create_file(uverbs_class, &class_attr_abi_version); if (ret) { printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n"); goto out_class; } -#ifdef __linux__ - ret = register_filesystem(&uverbs_event_fs); - if (ret) { - printk(KERN_ERR "user_verbs: couldn't register infinibandeventfs\n"); - goto out_class; - } - - uverbs_event_mnt = kern_mount(&uverbs_event_fs); - if (IS_ERR(uverbs_event_mnt)) { - ret = PTR_ERR(uverbs_event_mnt); - printk(KERN_ERR "user_verbs: couldn't mount infinibandeventfs\n"); - goto out_fs; - } -#endif - ret = ib_register_client(&uverbs_client); if (ret) { printk(KERN_ERR "user_verbs: couldn't register client\n"); - goto out_mnt; + goto out_class; } return 0; -out_mnt: -#ifdef __linux__ - mntput(uverbs_event_mnt); - -out_fs: - unregister_filesystem(&uverbs_event_fs); -#endif - out_class: class_destroy(uverbs_class); @@ -994,12 +1418,10 @@ out: static void __exit ib_uverbs_cleanup(void) { ib_unregister_client(&uverbs_client); -#ifdef __linux__ - mntput(uverbs_event_mnt); - unregister_filesystem(&uverbs_event_fs); -#endif class_destroy(uverbs_class); unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); + if (overflow_maj) + unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES); idr_destroy(&ib_uverbs_pd_idr); idr_destroy(&ib_uverbs_mr_idr); idr_destroy(&ib_uverbs_mw_idr); diff --git a/sys/ofed/drivers/infiniband/core/uverbs_marshall.c b/sys/ofed/drivers/infiniband/core/uverbs_marshall.c index 5440da0..a541882 100644 --- a/sys/ofed/drivers/infiniband/core/uverbs_marshall.c +++ b/sys/ofed/drivers/infiniband/core/uverbs_marshall.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#include <linux/module.h> #include <rdma/ib_marshall.h> void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst, @@ -40,18 +41,21 @@ void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst, dst->grh.sgid_index = src->grh.sgid_index; dst->grh.hop_limit = src->grh.hop_limit; dst->grh.traffic_class = src->grh.traffic_class; + memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved)); dst->dlid = src->dlid; dst->sl = src->sl; dst->src_path_bits = src->src_path_bits; dst->static_rate = src->static_rate; dst->is_global = src->ah_flags & IB_AH_GRH ? 1 : 0; dst->port_num = src->port_num; + dst->reserved = 0; } EXPORT_SYMBOL(ib_copy_ah_attr_to_user); void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst, struct ib_qp_attr *src) { + dst->qp_state = src->qp_state; dst->cur_qp_state = src->cur_qp_state; dst->path_mtu = src->path_mtu; dst->path_mig_state = src->path_mig_state; @@ -83,6 +87,7 @@ void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst, dst->rnr_retry = src->rnr_retry; dst->alt_port_num = src->alt_port_num; dst->alt_timeout = src->alt_timeout; + memset(dst->reserved, 0, sizeof(dst->reserved)); } EXPORT_SYMBOL(ib_copy_qp_attr_to_user); diff --git a/sys/ofed/drivers/infiniband/core/verbs.c b/sys/ofed/drivers/infiniband/core/verbs.c index 023564f..51a0ed5 100644 --- a/sys/ofed/drivers/infiniband/core/verbs.c +++ b/sys/ofed/drivers/infiniband/core/verbs.c @@ -38,10 +38,13 @@ #include <linux/errno.h> #include <linux/err.h> +#include <linux/module.h> #include <linux/string.h> +#include <linux/slab.h> #include <rdma/ib_verbs.h> #include <rdma/ib_cache.h> +#include <rdma/ib_addr.h> int ib_rate_to_mult(enum ib_rate rate) { @@ -77,6 +80,31 @@ enum ib_rate mult_to_ib_rate(int mult) } EXPORT_SYMBOL(mult_to_ib_rate); +int ib_rate_to_mbps(enum ib_rate rate) +{ + switch (rate) { + case IB_RATE_2_5_GBPS: return 2500; + case IB_RATE_5_GBPS: return 5000; + case IB_RATE_10_GBPS: return 10000; + case IB_RATE_20_GBPS: return 20000; + case IB_RATE_30_GBPS: return 30000; + case IB_RATE_40_GBPS: return 40000; + case IB_RATE_60_GBPS: return 60000; + case IB_RATE_80_GBPS: return 80000; + case IB_RATE_120_GBPS: return 120000; + case IB_RATE_14_GBPS: return 14062; + case IB_RATE_56_GBPS: return 56250; + case IB_RATE_112_GBPS: return 112500; + case IB_RATE_168_GBPS: return 168750; + case IB_RATE_25_GBPS: return 25781; + case IB_RATE_100_GBPS: return 103125; + case IB_RATE_200_GBPS: return 206250; + case IB_RATE_300_GBPS: return 309375; + default: return -1; + } +} +EXPORT_SYMBOL(ib_rate_to_mbps); + enum rdma_transport_type rdma_node_get_transport(enum rdma_node_type node_type) { @@ -87,6 +115,8 @@ rdma_node_get_transport(enum rdma_node_type node_type) return RDMA_TRANSPORT_IB; case RDMA_NODE_RNIC: return RDMA_TRANSPORT_IWARP; + case RDMA_NODE_MIC: + return RDMA_TRANSPORT_SCIF; default: BUG(); return 0; @@ -104,6 +134,8 @@ enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_ return IB_LINK_LAYER_INFINIBAND; case RDMA_TRANSPORT_IWARP: return IB_LINK_LAYER_ETHERNET; + case RDMA_TRANSPORT_SCIF: + return IB_LINK_LAYER_SCIF; default: return IB_LINK_LAYER_UNSPECIFIED; } @@ -162,8 +194,29 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc, u32 flow_class; u16 gid_index; int ret; + int is_eth = (rdma_port_get_link_layer(device, port_num) == + IB_LINK_LAYER_ETHERNET); memset(ah_attr, 0, sizeof *ah_attr); + if (is_eth) { + if (!(wc->wc_flags & IB_WC_GRH)) + return -EPROTOTYPE; + + if (wc->wc_flags & IB_WC_WITH_SMAC && + wc->wc_flags & IB_WC_WITH_VLAN) { + memcpy(ah_attr->dmac, wc->smac, ETH_ALEN); + ah_attr->vlan_id = wc->vlan_id; + } else { + ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid, + ah_attr->dmac, &ah_attr->vlan_id); + if (ret) + return ret; + } + } else { + ah_attr->vlan_id = 0xffff; + } + + ah_attr->dlid = wc->slid; ah_attr->sl = wc->sl; ah_attr->src_path_bits = wc->dlid_path_bits; @@ -250,45 +303,20 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd, srq->uobject = NULL; srq->event_handler = srq_init_attr->event_handler; srq->srq_context = srq_init_attr->srq_context; - srq->ext.xrc.cq = NULL; - srq->ext.xrc.xrcd = NULL; - atomic_inc(&pd->usecnt); - atomic_set(&srq->usecnt, 0); + srq->srq_type = srq_init_attr->srq_type; + if (srq->srq_type == IB_SRQT_XRC) { + srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd; + srq->ext.xrc.cq = srq_init_attr->ext.xrc.cq; + atomic_inc(&srq->ext.xrc.xrcd->usecnt); + atomic_inc(&srq->ext.xrc.cq->usecnt); } - - return srq; -} -EXPORT_SYMBOL(ib_create_srq); - -struct ib_srq *ib_create_xrc_srq(struct ib_pd *pd, - struct ib_cq *xrc_cq, - struct ib_xrcd *xrcd, - struct ib_srq_init_attr *srq_init_attr) -{ - struct ib_srq *srq; - - if (!pd->device->create_xrc_srq) - return ERR_PTR(-ENOSYS); - - srq = pd->device->create_xrc_srq(pd, xrc_cq, xrcd, srq_init_attr, NULL); - - if (!IS_ERR(srq)) { - srq->device = pd->device; - srq->pd = pd; - srq->uobject = NULL; - srq->event_handler = srq_init_attr->event_handler; - srq->srq_context = srq_init_attr->srq_context; - srq->ext.xrc.cq = xrc_cq; - srq->ext.xrc.xrcd = xrcd; atomic_inc(&pd->usecnt); - atomic_inc(&xrcd->usecnt); - atomic_inc(&xrc_cq->usecnt); atomic_set(&srq->usecnt, 0); } return srq; } -EXPORT_SYMBOL(ib_create_xrc_srq); +EXPORT_SYMBOL(ib_create_srq); int ib_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr, @@ -308,27 +336,39 @@ int ib_query_srq(struct ib_srq *srq, } EXPORT_SYMBOL(ib_query_srq); +int ib_query_values(struct ib_device *device, + int q_values, struct ib_device_values *values) +{ + return device->query_values ? + device->query_values(device, q_values, values) : -ENOSYS; +} +EXPORT_SYMBOL(ib_query_values); + int ib_destroy_srq(struct ib_srq *srq) { struct ib_pd *pd; - struct ib_cq *xrc_cq; - struct ib_xrcd *xrcd; + enum ib_srq_type srq_type; + struct ib_xrcd *uninitialized_var(xrcd); + struct ib_cq *uninitialized_var(cq); int ret; if (atomic_read(&srq->usecnt)) return -EBUSY; pd = srq->pd; - xrc_cq = srq->ext.xrc.cq; + srq_type = srq->srq_type; + if (srq_type == IB_SRQT_XRC) { xrcd = srq->ext.xrc.xrcd; + cq = srq->ext.xrc.cq; + } ret = srq->device->destroy_srq(srq); if (!ret) { atomic_dec(&pd->usecnt); - if (xrc_cq) - atomic_dec(&xrc_cq->usecnt); - if (xrcd) + if (srq_type == IB_SRQT_XRC) { atomic_dec(&xrcd->usecnt); + atomic_dec(&cq->usecnt); + } } return ret; @@ -337,32 +377,130 @@ EXPORT_SYMBOL(ib_destroy_srq); /* Queue pairs */ +static void __ib_shared_qp_event_handler(struct ib_event *event, void *context) +{ + struct ib_qp *qp = context; + unsigned long flags; + + /* The code below must be synced with deletions of existing qps (ib_close_qp) -- + * because a qp from the list may be closed during the scan, resulting in a kernel Oops. + */ + spin_lock_irqsave(&qp->device->event_handler_lock, flags); + list_for_each_entry(event->element.qp, &qp->open_list, open_list) + if (event->element.qp->event_handler) + event->element.qp->event_handler(event, event->element.qp->qp_context); + spin_unlock_irqrestore(&qp->device->event_handler_lock, flags); +} + +static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp) +{ + mutex_lock(&xrcd->tgt_qp_mutex); + list_add(&qp->xrcd_list, &xrcd->tgt_qp_list); + mutex_unlock(&xrcd->tgt_qp_mutex); +} + +static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp, + void (*event_handler)(struct ib_event *, void *), + void *qp_context) +{ + struct ib_qp *qp; + unsigned long flags; + + qp = kzalloc(sizeof *qp, GFP_KERNEL); + if (!qp) + return ERR_PTR(-ENOMEM); + + qp->real_qp = real_qp; + atomic_inc(&real_qp->usecnt); + qp->device = real_qp->device; + qp->event_handler = event_handler; + qp->qp_context = qp_context; + qp->qp_num = real_qp->qp_num; + qp->qp_type = real_qp->qp_type; + + spin_lock_irqsave(&real_qp->device->event_handler_lock, flags); + list_add(&qp->open_list, &real_qp->open_list); + spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); + + return qp; +} + +struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd, + struct ib_qp_open_attr *qp_open_attr) +{ + struct ib_qp *qp, *real_qp; + + if (qp_open_attr->qp_type != IB_QPT_XRC_TGT) + return ERR_PTR(-EINVAL); + + qp = ERR_PTR(-EINVAL); + mutex_lock(&xrcd->tgt_qp_mutex); + list_for_each_entry(real_qp, &xrcd->tgt_qp_list, xrcd_list) { + if (real_qp->qp_num == qp_open_attr->qp_num) { + qp = __ib_open_qp(real_qp, qp_open_attr->event_handler, + qp_open_attr->qp_context); + break; + } + } + mutex_unlock(&xrcd->tgt_qp_mutex); + return qp; +} +EXPORT_SYMBOL(ib_open_qp); + struct ib_qp *ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr) { - struct ib_qp *qp; + struct ib_qp *qp, *real_qp; + struct ib_device *device; - qp = pd->device->create_qp(pd, qp_init_attr, NULL); + device = pd ? pd->device : qp_init_attr->xrcd->device; + qp = device->create_qp(pd, qp_init_attr, NULL); if (!IS_ERR(qp)) { - qp->device = pd->device; - qp->pd = pd; - qp->send_cq = qp_init_attr->send_cq; - qp->recv_cq = qp_init_attr->recv_cq; - qp->srq = qp_init_attr->srq; + qp->device = device; + qp->real_qp = qp; qp->uobject = NULL; + qp->qp_type = qp_init_attr->qp_type; + + atomic_set(&qp->usecnt, 0); + if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) { + qp->event_handler = __ib_shared_qp_event_handler; + qp->qp_context = qp; + qp->pd = NULL; + qp->send_cq = qp->recv_cq = NULL; + qp->srq = NULL; + qp->xrcd = qp_init_attr->xrcd; + atomic_inc(&qp_init_attr->xrcd->usecnt); + INIT_LIST_HEAD(&qp->open_list); + + real_qp = qp; + qp = __ib_open_qp(real_qp, qp_init_attr->event_handler, + qp_init_attr->qp_context); + if (!IS_ERR(qp)) + __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp); + else + real_qp->device->destroy_qp(real_qp); + } else { qp->event_handler = qp_init_attr->event_handler; qp->qp_context = qp_init_attr->qp_context; - qp->qp_type = qp_init_attr->qp_type; - qp->xrcd = qp->qp_type == IB_QPT_XRC ? - qp_init_attr->xrcd : NULL; - atomic_inc(&pd->usecnt); - atomic_inc(&qp_init_attr->send_cq->usecnt); + if (qp_init_attr->qp_type == IB_QPT_XRC_INI) { + qp->recv_cq = NULL; + qp->srq = NULL; + } else { + qp->recv_cq = qp_init_attr->recv_cq; atomic_inc(&qp_init_attr->recv_cq->usecnt); - if (qp_init_attr->srq) + qp->srq = qp_init_attr->srq; + if (qp->srq) atomic_inc(&qp_init_attr->srq->usecnt); - if (qp->qp_type == IB_QPT_XRC) - atomic_inc(&qp->xrcd->usecnt); + } + + qp->pd = pd; + qp->send_cq = qp_init_attr->send_cq; + qp->xrcd = NULL; + + atomic_inc(&pd->usecnt); + atomic_inc(&qp_init_attr->send_cq->usecnt); + } } return qp; @@ -371,8 +509,10 @@ EXPORT_SYMBOL(ib_create_qp); static const struct { int valid; - enum ib_qp_attr_mask req_param[IB_QPT_RAW_PACKET + 1]; - enum ib_qp_attr_mask opt_param[IB_QPT_RAW_PACKET + 1]; + enum ib_qp_attr_mask req_param[IB_QPT_MAX]; + enum ib_qp_attr_mask req_param_add_eth[IB_QPT_MAX]; + enum ib_qp_attr_mask opt_param[IB_QPT_MAX]; + enum ib_qp_attr_mask opt_param_add_eth[IB_QPT_MAX]; } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { [IB_QPS_RESET] = { [IB_QPS_RESET] = { .valid = 1 }, @@ -389,13 +529,24 @@ static const struct { [IB_QPT_RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), - [IB_QPT_XRC] = (IB_QP_PKEY_INDEX | + [IB_QPT_DC_INI] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS | + IB_QP_DC_KEY), + [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), + [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), + }, + .opt_param = { + [IB_QPT_UD] = IB_QP_GROUP_RSS, + [IB_QPT_RAW_PACKET] = IB_QP_GROUP_RSS } }, }, @@ -414,7 +565,13 @@ static const struct { [IB_QPT_RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), - [IB_QPT_XRC] = (IB_QP_PKEY_INDEX | + [IB_QPT_DC_INI] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), + [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), + [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | @@ -436,13 +593,26 @@ static const struct { IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER), - [IB_QPT_XRC] = (IB_QP_AV | + [IB_QPT_DC_INI] = (IB_QP_PATH_MTU | + IB_QP_MAX_DEST_RD_ATOMIC | + IB_QP_MIN_RNR_TIMER), + [IB_QPT_XRC_INI] = (IB_QP_AV | + IB_QP_PATH_MTU | + IB_QP_DEST_QPN | + IB_QP_RQ_PSN), + [IB_QPT_XRC_TGT] = (IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER), }, + .req_param_add_eth = { + [IB_QPT_RC] = (IB_QP_SMAC), + [IB_QPT_UC] = (IB_QP_SMAC), + [IB_QPT_XRC_INI] = (IB_QP_SMAC), + [IB_QPT_XRC_TGT] = (IB_QP_SMAC) + }, .opt_param = { [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), @@ -452,13 +622,34 @@ static const struct { [IB_QPT_RC] = (IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX), - [IB_QPT_XRC] = (IB_QP_ALT_PATH | + [IB_QPT_DC_INI] = (IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX), + [IB_QPT_XRC_INI] = (IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX), + [IB_QPT_XRC_TGT] = (IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), + [IB_QPT_RAW_PACKET] = IB_QP_AV, + }, + .opt_param_add_eth = { + [IB_QPT_RC] = (IB_QP_ALT_SMAC | + IB_QP_VID | + IB_QP_ALT_VID), + [IB_QPT_UC] = (IB_QP_ALT_SMAC | + IB_QP_VID | + IB_QP_ALT_VID), + [IB_QPT_XRC_INI] = (IB_QP_ALT_SMAC | + IB_QP_VID | + IB_QP_ALT_VID), + [IB_QPT_XRC_TGT] = (IB_QP_ALT_SMAC | + IB_QP_VID | + IB_QP_ALT_VID) } } }, @@ -475,11 +666,17 @@ static const struct { IB_QP_RNR_RETRY | IB_QP_SQ_PSN | IB_QP_MAX_QP_RD_ATOMIC), - [IB_QPT_XRC] = (IB_QP_TIMEOUT | + [IB_QPT_DC_INI] = (IB_QP_TIMEOUT | + IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY | + IB_QP_MAX_QP_RD_ATOMIC), + [IB_QPT_XRC_INI] = (IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | IB_QP_SQ_PSN | IB_QP_MAX_QP_RD_ATOMIC), + [IB_QPT_XRC_TGT] = (IB_QP_TIMEOUT | + IB_QP_SQ_PSN), [IB_QPT_SMI] = IB_QP_SQ_PSN, [IB_QPT_GSI] = IB_QP_SQ_PSN, }, @@ -495,7 +692,16 @@ static const struct { IB_QP_ACCESS_FLAGS | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), - [IB_QPT_XRC] = (IB_QP_CUR_STATE | + [IB_QPT_DC_INI] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_MIN_RNR_TIMER | + IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_MIN_RNR_TIMER | @@ -524,7 +730,16 @@ static const struct { IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE | IB_QP_MIN_RNR_TIMER), - [IB_QPT_XRC] = (IB_QP_CUR_STATE | + [IB_QPT_DC_INI] = (IB_QP_CUR_STATE | + IB_QP_ACCESS_FLAGS | + IB_QP_ALT_PATH | + IB_QP_PATH_MIG_STATE | + IB_QP_MIN_RNR_TIMER), + [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | + IB_QP_ACCESS_FLAGS | + IB_QP_ALT_PATH | + IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | IB_QP_ACCESS_FLAGS | IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE | @@ -541,7 +756,8 @@ static const struct { [IB_QPT_UD] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_UC] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_RC] = IB_QP_EN_SQD_ASYNC_NOTIFY, - [IB_QPT_XRC] = IB_QP_EN_SQD_ASYNC_NOTIFY, + [IB_QPT_XRC_INI] = IB_QP_EN_SQD_ASYNC_NOTIFY, + [IB_QPT_XRC_TGT] = IB_QP_EN_SQD_ASYNC_NOTIFY, /* ??? */ [IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY } @@ -564,7 +780,11 @@ static const struct { IB_QP_ACCESS_FLAGS | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), - [IB_QPT_XRC] = (IB_QP_CUR_STATE | + [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_MIN_RNR_TIMER | @@ -597,12 +817,19 @@ static const struct { IB_QP_PKEY_INDEX | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), - [IB_QPT_XRC] = (IB_QP_PORT | + [IB_QPT_XRC_INI] = (IB_QP_PORT | IB_QP_AV | IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX | + IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC_TGT] = (IB_QP_PORT | + IB_QP_AV | + IB_QP_TIMEOUT | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | @@ -640,7 +867,8 @@ static const struct { }; int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, - enum ib_qp_type type, enum ib_qp_attr_mask mask) + enum ib_qp_type type, enum ib_qp_attr_mask mask, + enum rdma_link_layer ll) { enum ib_qp_attr_mask req_param, opt_param; @@ -659,6 +887,13 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, req_param = qp_state_table[cur_state][next_state].req_param[type]; opt_param = qp_state_table[cur_state][next_state].opt_param[type]; + if (ll == IB_LINK_LAYER_ETHERNET) { + req_param |= qp_state_table[cur_state][next_state]. + req_param_add_eth[type]; + opt_param |= qp_state_table[cur_state][next_state]. + opt_param_add_eth[type]; + } + if ((mask & req_param) != req_param) return 0; @@ -673,7 +908,13 @@ int ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask) { - return qp->device->modify_qp(qp, qp_attr, qp_attr_mask, NULL); + int ret; + + ret = qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL); + if (!ret && (qp_attr_mask & IB_QP_PORT)) + qp->port_num = qp_attr->port_num; + + return ret; } EXPORT_SYMBOL(ib_modify_qp); @@ -683,35 +924,87 @@ int ib_query_qp(struct ib_qp *qp, struct ib_qp_init_attr *qp_init_attr) { return qp->device->query_qp ? - qp->device->query_qp(qp, qp_attr, qp_attr_mask, qp_init_attr) : + qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) : -ENOSYS; } EXPORT_SYMBOL(ib_query_qp); +int ib_close_qp(struct ib_qp *qp) +{ + struct ib_qp *real_qp; + unsigned long flags; + + real_qp = qp->real_qp; + if (real_qp == qp) + return -EINVAL; + + spin_lock_irqsave(&real_qp->device->event_handler_lock, flags); + list_del(&qp->open_list); + spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); + + atomic_dec(&real_qp->usecnt); + kfree(qp); + + return 0; +} +EXPORT_SYMBOL(ib_close_qp); + +static int __ib_destroy_shared_qp(struct ib_qp *qp) +{ + struct ib_xrcd *xrcd; + struct ib_qp *real_qp; + int ret; + + real_qp = qp->real_qp; + xrcd = real_qp->xrcd; + + mutex_lock(&xrcd->tgt_qp_mutex); + ib_close_qp(qp); + if (atomic_read(&real_qp->usecnt) == 0) + list_del(&real_qp->xrcd_list); + else + real_qp = NULL; + mutex_unlock(&xrcd->tgt_qp_mutex); + + if (real_qp) { + ret = ib_destroy_qp(real_qp); + if (!ret) + atomic_dec(&xrcd->usecnt); + else + __ib_insert_xrcd_qp(xrcd, real_qp); + } + + return 0; +} + int ib_destroy_qp(struct ib_qp *qp) { struct ib_pd *pd; struct ib_cq *scq, *rcq; struct ib_srq *srq; - struct ib_xrcd *xrcd; - enum ib_qp_type qp_type = qp->qp_type; int ret; + if (atomic_read(&qp->usecnt)) + return -EBUSY; + + if (qp->real_qp != qp) + return __ib_destroy_shared_qp(qp); + pd = qp->pd; scq = qp->send_cq; rcq = qp->recv_cq; srq = qp->srq; - xrcd = qp->xrcd; ret = qp->device->destroy_qp(qp); if (!ret) { + if (pd) atomic_dec(&pd->usecnt); + if (scq) atomic_dec(&scq->usecnt); + if (rcq) atomic_dec(&rcq->usecnt); if (srq) atomic_dec(&srq->usecnt); - if (qp_type == IB_QPT_XRC) - atomic_dec(&xrcd->usecnt); } return ret; @@ -726,8 +1019,13 @@ struct ib_cq *ib_create_cq(struct ib_device *device, void *cq_context, int cqe, int comp_vector) { struct ib_cq *cq; + struct ib_cq_init_attr attr = { + .cqe = cqe, + .comp_vector = comp_vector, + .flags = 0, + }; - cq = device->create_cq(device, cqe, comp_vector, NULL, NULL); + cq = device->create_cq(device, &attr, NULL, NULL); if (!IS_ERR(cq)) { cq->device = device; @@ -742,10 +1040,12 @@ struct ib_cq *ib_create_cq(struct ib_device *device, } EXPORT_SYMBOL(ib_create_cq); -int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) +int ib_modify_cq(struct ib_cq *cq, + struct ib_cq_attr *cq_attr, + int cq_attr_mask) { return cq->device->modify_cq ? - cq->device->modify_cq(cq, cq_count, cq_period) : -ENOSYS; + cq->device->modify_cq(cq, cq_attr, cq_attr_mask) : -ENOSYS; } EXPORT_SYMBOL(ib_modify_cq); @@ -770,6 +1070,11 @@ EXPORT_SYMBOL(ib_resize_cq); struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags) { struct ib_mr *mr; + int err; + + err = ib_check_mr_access(mr_access_flags); + if (err) + return ERR_PTR(err); mr = pd->device->get_dma_mr(pd, mr_access_flags); @@ -792,6 +1097,11 @@ struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd, u64 *iova_start) { struct ib_mr *mr; + int err; + + err = ib_check_mr_access(mr_access_flags); + if (err) + return ERR_PTR(err); if (!pd->device->reg_phys_mr) return ERR_PTR(-ENOSYS); @@ -822,6 +1132,10 @@ int ib_rereg_phys_mr(struct ib_mr *mr, struct ib_pd *old_pd; int ret; + ret = ib_check_mr_access(mr_access_flags); + if (ret) + return ret; + if (!mr->device->rereg_phys_mr) return -ENOSYS; @@ -867,6 +1181,45 @@ int ib_dereg_mr(struct ib_mr *mr) } EXPORT_SYMBOL(ib_dereg_mr); +struct ib_mr *ib_create_mr(struct ib_pd *pd, + struct ib_mr_init_attr *mr_init_attr) +{ + struct ib_mr *mr; + + if (!pd->device->create_mr) + return ERR_PTR(-ENOSYS); + + mr = pd->device->create_mr(pd, mr_init_attr); + + if (!IS_ERR(mr)) { + mr->device = pd->device; + mr->pd = pd; + mr->uobject = NULL; + atomic_inc(&pd->usecnt); + atomic_set(&mr->usecnt, 0); + } + + return mr; +} +EXPORT_SYMBOL(ib_create_mr); + +int ib_destroy_mr(struct ib_mr *mr) +{ + struct ib_pd *pd; + int ret; + + if (atomic_read(&mr->usecnt)) + return -EBUSY; + + pd = mr->pd; + ret = mr->device->destroy_mr(mr); + if (!ret) + atomic_dec(&pd->usecnt); + + return ret; +} +EXPORT_SYMBOL(ib_destroy_mr); + struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len) { struct ib_mr *mr; @@ -915,18 +1268,19 @@ EXPORT_SYMBOL(ib_free_fast_reg_page_list); /* Memory windows */ -struct ib_mw *ib_alloc_mw(struct ib_pd *pd) +struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) { struct ib_mw *mw; if (!pd->device->alloc_mw) return ERR_PTR(-ENOSYS); - mw = pd->device->alloc_mw(pd); + mw = pd->device->alloc_mw(pd, type); if (!IS_ERR(mw)) { mw->device = pd->device; mw->pd = pd; mw->uobject = NULL; + mw->type = type; atomic_inc(&pd->usecnt); } @@ -1000,58 +1354,58 @@ EXPORT_SYMBOL(ib_dealloc_fmr); int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) { + int ret; + if (!qp->device->attach_mcast) return -ENOSYS; switch (rdma_node_get_transport(qp->device->node_type)) { case RDMA_TRANSPORT_IB: - if (qp->qp_type == IB_QPT_RAW_PACKET) { - /* In raw Etherent mgids the 63 msb's should be 0 */ - if (gid->global.subnet_prefix & cpu_to_be64(~1ULL)) - return -EINVAL; - } else if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) + if ((gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) && + qp->qp_type != IB_QPT_RAW_PACKET) return -EINVAL; break; case RDMA_TRANSPORT_IWARP: + case RDMA_TRANSPORT_SCIF: if (qp->qp_type != IB_QPT_RAW_PACKET) return -EINVAL; break; } - return qp->device->attach_mcast(qp, gid, lid); + + ret = qp->device->attach_mcast(qp, gid, lid); + if (!ret) + atomic_inc(&qp->usecnt); + return ret; } EXPORT_SYMBOL(ib_attach_mcast); int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) { + int ret; + if (!qp->device->detach_mcast) return -ENOSYS; switch (rdma_node_get_transport(qp->device->node_type)) { case RDMA_TRANSPORT_IB: - if (qp->qp_type == IB_QPT_RAW_PACKET) { - /* In raw Etherent mgids the 63 msb's should be 0 */ - if (gid->global.subnet_prefix & cpu_to_be64(~1ULL)) - return -EINVAL; - } else if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) + if ((gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) && + qp->qp_type != IB_QPT_RAW_PACKET) return -EINVAL; break; case RDMA_TRANSPORT_IWARP: + case RDMA_TRANSPORT_SCIF: + if (qp->qp_type != IB_QPT_RAW_PACKET) return -EINVAL; break; } - return qp->device->detach_mcast(qp, gid, lid); -} -EXPORT_SYMBOL(ib_detach_mcast); -int ib_dealloc_xrcd(struct ib_xrcd *xrcd) -{ - if (atomic_read(&xrcd->usecnt)) - return -EBUSY; - - return xrcd->device->dealloc_xrcd(xrcd); + ret = qp->device->detach_mcast(qp, gid, lid); + if (!ret) + atomic_dec(&qp->usecnt); + return ret; } -EXPORT_SYMBOL(ib_dealloc_xrcd); +EXPORT_SYMBOL(ib_detach_mcast); struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device) { @@ -1064,10 +1418,119 @@ struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device) if (!IS_ERR(xrcd)) { xrcd->device = device; xrcd->inode = NULL; - xrcd->uobject = NULL; atomic_set(&xrcd->usecnt, 0); + mutex_init(&xrcd->tgt_qp_mutex); + INIT_LIST_HEAD(&xrcd->tgt_qp_list); } + return xrcd; } EXPORT_SYMBOL(ib_alloc_xrcd); +int ib_dealloc_xrcd(struct ib_xrcd *xrcd) +{ + struct ib_qp *qp; + int ret; + + if (atomic_read(&xrcd->usecnt)) + return -EBUSY; + + while (!list_empty(&xrcd->tgt_qp_list)) { + qp = list_entry(xrcd->tgt_qp_list.next, struct ib_qp, xrcd_list); + ret = ib_destroy_qp(qp); + if (ret) + return ret; + } + + return xrcd->device->dealloc_xrcd(xrcd); +} +EXPORT_SYMBOL(ib_dealloc_xrcd); + +struct ib_flow *ib_create_flow(struct ib_qp *qp, + struct ib_flow_attr *flow_attr, + int domain) +{ + struct ib_flow *flow_id; + if (!qp->device->create_flow) + return ERR_PTR(-ENOSYS); + + flow_id = qp->device->create_flow(qp, flow_attr, domain); + if (!IS_ERR(flow_id)) + atomic_inc(&qp->usecnt); + return flow_id; +} +EXPORT_SYMBOL(ib_create_flow); + +int ib_destroy_flow(struct ib_flow *flow_id) +{ + int err; + struct ib_qp *qp; + + if (!flow_id) + return -EINVAL; + qp = flow_id->qp; + if (!qp->device->destroy_flow) + return -ENOSYS; + err = qp->device->destroy_flow(flow_id); + if (!err) + atomic_dec(&qp->usecnt); + return err; +} +EXPORT_SYMBOL(ib_destroy_flow); + +struct ib_dct *ib_create_dct(struct ib_pd *pd, struct ib_dct_init_attr *attr, + struct ib_udata *udata) +{ + struct ib_dct *dct; + + if (!pd->device->exp_create_dct) + return ERR_PTR(-ENOSYS); + + dct = pd->device->exp_create_dct(pd, attr, udata); + if (!IS_ERR(dct)) { + dct->pd = pd; + dct->srq = attr->srq; + dct->cq = attr->cq; + atomic_inc(&dct->srq->usecnt); + atomic_inc(&dct->cq->usecnt); + atomic_inc(&dct->pd->usecnt); + } + + return dct; +} +EXPORT_SYMBOL(ib_create_dct); + +int ib_destroy_dct(struct ib_dct *dct) +{ + int err; + + if (!dct->device->exp_destroy_dct) + return -ENOSYS; + + err = dct->device->exp_destroy_dct(dct); + if (!err) { + atomic_dec(&dct->srq->usecnt); + atomic_dec(&dct->cq->usecnt); + atomic_dec(&dct->pd->usecnt); + } + + return err; +} +EXPORT_SYMBOL(ib_destroy_dct); + +int ib_query_dct(struct ib_dct *dct, struct ib_dct_attr *attr) +{ + if (!dct->device->exp_query_dct) + return -ENOSYS; + + return dct->device->exp_query_dct(dct, attr); +} +EXPORT_SYMBOL(ib_query_dct); + +int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, + struct ib_mr_status *mr_status) +{ + return mr->device->check_mr_status ? + mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS; +} +EXPORT_SYMBOL(ib_check_mr_status); diff --git a/sys/ofed/drivers/infiniband/debug/memtrack.c b/sys/ofed/drivers/infiniband/debug/memtrack.c index 199b33b..7082856 100644 --- a/sys/ofed/drivers/infiniband/debug/memtrack.c +++ b/sys/ofed/drivers/infiniband/debug/memtrack.c @@ -24,12 +24,21 @@ #ifdef kmalloc #undef kmalloc #endif +#ifdef kmemdup + #undef kmemdup +#endif #ifdef kfree #undef kfree #endif #ifdef vmalloc #undef vmalloc #endif +#ifdef vzalloc + #undef vzalloc +#endif +#ifdef vzalloc_node + #undef vzalloc_node +#endif #ifdef vfree #undef vfree #endif @@ -39,16 +48,59 @@ #ifdef kmem_cache_free #undef kmem_cache_free #endif +#ifdef ioremap + #undef ioremap +#endif +#ifdef io_mapping_create_wc + #undef io_mapping_create_wc +#endif +#ifdef io_mapping_free + #undef io_mapping_free +#endif +#ifdef ioremap_nocache + #undef ioremap_nocache +#endif +#ifdef iounmap + #undef iounmap +#endif +#ifdef alloc_pages + #undef alloc_pages +#endif +#ifdef free_pages + #undef free_pages +#endif +#ifdef get_page + #undef get_page +#endif +#ifdef put_page + #undef put_page +#endif +#ifdef create_workqueue + #undef create_workqueue +#endif +#ifdef create_rt_workqueue + #undef create_rt_workqueue +#endif +#ifdef create_freezeable_workqueue + #undef create_freezeable_workqueue +#endif +#ifdef create_singlethread_workqueue + #undef create_singlethread_workqueue +#endif +#ifdef destroy_workqueue + #undef destroy_workqueue +#endif #include <linux/module.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/interrupt.h> #include <linux/vmalloc.h> -#include <linux/version.h> +#include <linux/mm.h> #include <asm/uaccess.h> #include <linux/proc_fs.h> -#include <memtrack.h> +#include <linux/random.h> +#include "memtrack.h" #include <linux/moduleparam.h> @@ -67,7 +119,7 @@ MODULE_LICENSE("GPL"); bit0 corresponds to MEMTRACK_KMALLOC, bit1 corresponds to MEMTRACK_VMALLOC etc. */ static unsigned long track_mask = -1; /* effectively everything */ module_param(track_mask, ulong, 0444); -MODULE_PARM_DESC(track_mask, "bitmask definenig what is tracked"); +MODULE_PARM_DESC(track_mask, "bitmask defining what is tracked"); /* if a bit is set then the corresponding allocation is strictly tracked. That is, before inserting the whole range is checked to not overlap any @@ -76,59 +128,95 @@ static unsigned long strict_track_mask = 0; /* no strict tracking */ module_param(strict_track_mask, ulong, 0444); MODULE_PARM_DESC(strict_track_mask, "bitmask which allocation requires strict tracking"); -typedef struct memtrack_meminfo_st { +/* Sets the frequency of allocations failures injections + if set to 0 all allocation should succeed */ +static unsigned int inject_freq = 0; +module_param(inject_freq, uint, 0644); +MODULE_PARM_DESC(inject_freq, "Error injection frequency, default is 0 (disabled)"); + +static int random_mem = 1; +module_param(random_mem, uint, 0644); +MODULE_PARM_DESC(random_mem, "When set, randomize allocated memory, default is 1 (enabled)"); + +struct memtrack_meminfo_t { unsigned long addr; unsigned long size; unsigned long line_num; - struct memtrack_meminfo_st *next; + unsigned long dev; + unsigned long addr2; + int direction; + struct memtrack_meminfo_t *next; struct list_head list; /* used to link all items from a certain type together */ char filename[MAX_FILENAME_LEN + 1]; /* putting the char array last is better for struct. packing */ -} memtrack_meminfo_t; + char ext_info[32]; +}; static struct kmem_cache *meminfo_cache; -typedef struct { - memtrack_meminfo_t *mem_hash[MEMTRACK_HASH_SZ]; +struct tracked_obj_desc_t { + struct memtrack_meminfo_t *mem_hash[MEMTRACK_HASH_SZ]; spinlock_t hash_lock; unsigned long count; /* size of memory tracked (*malloc) or number of objects tracked */ struct list_head tracked_objs_head; /* head of list of all objects */ int strict_track; /* if 1 then for each object inserted check if it overlaps any of the objects already in the list */ -} tracked_obj_desc_t; +}; -static tracked_obj_desc_t *tracked_objs_arr[MEMTRACK_NUM_OF_MEMTYPES]; +static struct tracked_obj_desc_t *tracked_objs_arr[MEMTRACK_NUM_OF_MEMTYPES]; static const char *rsc_names[MEMTRACK_NUM_OF_MEMTYPES] = { "kmalloc", "vmalloc", - "kmem_cache_alloc" + "kmem_cache_alloc", + "io_remap", + "create_workqueue", + "alloc_pages", + "ib_dma_map_single", + "ib_dma_map_page", + "ib_dma_map_sg" }; - static const char *rsc_free_names[MEMTRACK_NUM_OF_MEMTYPES] = { "kfree", "vfree", - "kmem_cache_free" + "kmem_cache_free", + "io_unmap", + "destory_workqueue", + "free_pages", + "ib_dma_unmap_single", + "ib_dma_unmap_page", + "ib_dma_unmap_sg" }; - -static inline const char *memtype_alloc_str(memtrack_memtype_t memtype) +static inline const char *memtype_alloc_str(enum memtrack_memtype_t memtype) { switch (memtype) { - case MEMTRACK_KMALLOC: - case MEMTRACK_VMALLOC: - case MEMTRACK_KMEM_OBJ: + case MEMTRACK_KMALLOC: + case MEMTRACK_VMALLOC: + case MEMTRACK_KMEM_OBJ: + case MEMTRACK_IOREMAP: + case MEMTRACK_WORK_QUEUE: + case MEMTRACK_PAGE_ALLOC: + case MEMTRACK_DMA_MAP_SINGLE: + case MEMTRACK_DMA_MAP_PAGE: + case MEMTRACK_DMA_MAP_SG: return rsc_names[memtype]; default: return "(Unknown allocation type)"; } } -static inline const char *memtype_free_str(memtrack_memtype_t memtype) +static inline const char *memtype_free_str(enum memtrack_memtype_t memtype) { switch (memtype) { - case MEMTRACK_KMALLOC: - case MEMTRACK_VMALLOC: - case MEMTRACK_KMEM_OBJ: + case MEMTRACK_KMALLOC: + case MEMTRACK_VMALLOC: + case MEMTRACK_KMEM_OBJ: + case MEMTRACK_IOREMAP: + case MEMTRACK_WORK_QUEUE: + case MEMTRACK_PAGE_ALLOC: + case MEMTRACK_DMA_MAP_SINGLE: + case MEMTRACK_DMA_MAP_PAGE: + case MEMTRACK_DMA_MAP_SG: return rsc_free_names[memtype]; default: return "(Unknown allocation type)"; @@ -138,56 +226,56 @@ static inline const char *memtype_free_str(memtrack_memtype_t memtype) /* * overlap_a_b */ -static int overlap_a_b(unsigned long a_start, unsigned long a_end, +static inline int overlap_a_b(unsigned long a_start, unsigned long a_end, unsigned long b_start, unsigned long b_end) { - if ((b_start > a_end) || (a_start > b_end)) { + if ((b_start > a_end) || (a_start > b_end)) return 0; - } + return 1; } /* * check_overlap */ -static void check_overlap(memtrack_memtype_t memtype, - memtrack_meminfo_t * mem_info_p, - tracked_obj_desc_t * obj_desc_p) +static void check_overlap(enum memtrack_memtype_t memtype, + struct memtrack_meminfo_t *mem_info_p, + struct tracked_obj_desc_t *obj_desc_p) { struct list_head *pos, *next; - memtrack_meminfo_t *cur; + struct memtrack_meminfo_t *cur; unsigned long start_a, end_a, start_b, end_b; - list_for_each_safe(pos, next, &obj_desc_p->tracked_objs_head) { - cur = list_entry(pos, memtrack_meminfo_t, list); - start_a = mem_info_p->addr; end_a = mem_info_p->addr + mem_info_p->size - 1; + + list_for_each_safe(pos, next, &obj_desc_p->tracked_objs_head) { + cur = list_entry(pos, struct memtrack_meminfo_t, list); + start_b = cur->addr; end_b = cur->addr + cur->size - 1; - if (overlap_a_b(start_a, end_a, start_b, end_b)) { - printk - ("%s overlaps! new_start=0x%lx, new_end=0x%lx, item_start=0x%lx, item_end=0x%lx\n", + if (overlap_a_b(start_a, end_a, start_b, end_b)) + printk(KERN_ERR "%s overlaps! new_start=0x%lx, new_end=0x%lx, item_start=0x%lx, item_end=0x%lx\n", memtype_alloc_str(memtype), mem_info_p->addr, mem_info_p->addr + mem_info_p->size - 1, cur->addr, cur->addr + cur->size - 1); } - } } /* Invoke on memory allocation */ -void memtrack_alloc(memtrack_memtype_t memtype, unsigned long addr, - unsigned long size, const char *filename, +void memtrack_alloc(enum memtrack_memtype_t memtype, unsigned long dev, + unsigned long addr, unsigned long size, unsigned long addr2, + int direction, const char *filename, const unsigned long line_num, int alloc_flags) { unsigned long hash_val; - memtrack_meminfo_t *cur_mem_info_p, *new_mem_info_p; - tracked_obj_desc_t *obj_desc_p; + struct memtrack_meminfo_t *cur_mem_info_p, *new_mem_info_p; + struct tracked_obj_desc_t *obj_desc_p; unsigned long flags; if (memtype >= MEMTRACK_NUM_OF_MEMTYPES) { - printk("%s: Invalid memory type (%d)\n", __func__, memtype); + printk(KERN_ERR "%s: Invalid memory type (%d)\n", __func__, memtype); return; } @@ -199,11 +287,9 @@ void memtrack_alloc(memtrack_memtype_t memtype, unsigned long addr, hash_val = addr % MEMTRACK_HASH_SZ; - new_mem_info_p = (memtrack_meminfo_t *) - kmem_cache_alloc(meminfo_cache, alloc_flags); + new_mem_info_p = (struct memtrack_meminfo_t *)kmem_cache_alloc(meminfo_cache, alloc_flags); if (new_mem_info_p == NULL) { - printk - ("%s: Failed allocating kmem_cache item for new mem_info. " + printk(KERN_ERR "%s: Failed allocating kmem_cache item for new mem_info. " "Lost tracking on allocation at %s:%lu...\n", __func__, filename, line_num); return; @@ -211,26 +297,34 @@ void memtrack_alloc(memtrack_memtype_t memtype, unsigned long addr, /* save allocation properties */ new_mem_info_p->addr = addr; new_mem_info_p->size = size; + new_mem_info_p->dev = dev; + new_mem_info_p->addr2 = addr2; + new_mem_info_p->direction = direction; + new_mem_info_p->line_num = line_num; + *new_mem_info_p->ext_info = '\0'; /* Make sure that we will print out the path tail if the given filename is longer * than MAX_FILENAME_LEN. (otherwise, we will not see the name of the actual file * in the printout -- only the path head! */ - if (strlen(filename) > MAX_FILENAME_LEN) { + if (strlen(filename) > MAX_FILENAME_LEN) strncpy(new_mem_info_p->filename, filename + strlen(filename) - MAX_FILENAME_LEN, MAX_FILENAME_LEN); - } else { + else strncpy(new_mem_info_p->filename, filename, MAX_FILENAME_LEN); - } + new_mem_info_p->filename[MAX_FILENAME_LEN] = 0; /* NULL terminate anyway */ memtrack_spin_lock(&obj_desc_p->hash_lock, flags); /* make sure given memory location is not already allocated */ + if ((memtype != MEMTRACK_DMA_MAP_SINGLE) && (memtype != MEMTRACK_DMA_MAP_PAGE) && + (memtype != MEMTRACK_DMA_MAP_SG)) { + + /* make sure given memory location is not already allocated */ cur_mem_info_p = obj_desc_p->mem_hash[hash_val]; while (cur_mem_info_p != NULL) { - if (cur_mem_info_p->addr == addr) { + if ((cur_mem_info_p->addr == addr) && (cur_mem_info_p->dev == dev)) { /* Found given address in the database */ - printk - ("mtl rsc inconsistency: %s: %s::%lu: %s @ addr=0x%lX which is already known from %s:%lu\n", + printk(KERN_ERR "mtl rsc inconsistency: %s: %s::%lu: %s @ addr=0x%lX which is already known from %s:%lu\n", __func__, filename, line_num, memtype_alloc_str(memtype), addr, cur_mem_info_p->filename, @@ -241,31 +335,33 @@ void memtrack_alloc(memtrack_memtype_t memtype, unsigned long addr, } cur_mem_info_p = cur_mem_info_p->next; } + } /* not found - we can put in the hash bucket */ /* link as first */ new_mem_info_p->next = obj_desc_p->mem_hash[hash_val]; obj_desc_p->mem_hash[hash_val] = new_mem_info_p; - if (obj_desc_p->strict_track) { + if (obj_desc_p->strict_track) check_overlap(memtype, new_mem_info_p, obj_desc_p); - } obj_desc_p->count += size; list_add(&new_mem_info_p->list, &obj_desc_p->tracked_objs_head); memtrack_spin_unlock(&obj_desc_p->hash_lock, flags); return; } +EXPORT_SYMBOL(memtrack_alloc); /* Invoke on memory free */ -void memtrack_free(memtrack_memtype_t memtype, unsigned long addr, +void memtrack_free(enum memtrack_memtype_t memtype, unsigned long dev, + unsigned long addr, unsigned long size, int direction, const char *filename, const unsigned long line_num) { unsigned long hash_val; - memtrack_meminfo_t *cur_mem_info_p, *prev_mem_info_p; - tracked_obj_desc_t *obj_desc_p; + struct memtrack_meminfo_t *cur_mem_info_p, *prev_mem_info_p; + struct tracked_obj_desc_t *obj_desc_p; unsigned long flags; if (memtype >= MEMTRACK_NUM_OF_MEMTYPES) { - printk("%s: Invalid memory type (%d)\n", __func__, memtype); + printk(KERN_ERR "%s: Invalid memory type (%d)\n", __func__, memtype); return; } @@ -282,13 +378,27 @@ void memtrack_free(memtrack_memtype_t memtype, unsigned long addr, prev_mem_info_p = NULL; cur_mem_info_p = obj_desc_p->mem_hash[hash_val]; while (cur_mem_info_p != NULL) { - if (cur_mem_info_p->addr == addr) { - /* Found given address in the database - remove from the bucket/list */ - if (prev_mem_info_p == NULL) { + if ((cur_mem_info_p->addr == addr) && (cur_mem_info_p->dev == dev)) { + /* Found given address in the database */ + if ((memtype == MEMTRACK_DMA_MAP_SINGLE) || (memtype == MEMTRACK_DMA_MAP_PAGE) || + (memtype == MEMTRACK_DMA_MAP_SG)) { + if (direction != cur_mem_info_p->direction) + printk(KERN_ERR "mtl rsc inconsistency: %s: %s::%lu: %s bad direction for addr 0x%lX: alloc:0x%x, free:0x%x (allocated in %s::%lu)\n", + __func__, filename, line_num, memtype_free_str(memtype), addr, cur_mem_info_p->direction, direction, + cur_mem_info_p->filename, cur_mem_info_p->line_num); + + if (size != cur_mem_info_p->size) + printk(KERN_ERR "mtl rsc inconsistency: %s: %s::%lu: %s bad size for addr 0x%lX: size:%lu, free:%lu (allocated in %s::%lu)\n", + __func__, filename, line_num, memtype_free_str(memtype), addr, cur_mem_info_p->size, size, + cur_mem_info_p->filename, cur_mem_info_p->line_num); + } + + /* Remove from the bucket/list */ + if (prev_mem_info_p == NULL) obj_desc_p->mem_hash[hash_val] = cur_mem_info_p->next; /* removing first */ - } else { + else prev_mem_info_p->next = cur_mem_info_p->next; /* "crossover" */ - } + list_del(&cur_mem_info_p->list); obj_desc_p->count -= cur_mem_info_p->size; @@ -301,64 +411,317 @@ void memtrack_free(memtrack_memtype_t memtype, unsigned long addr, } /* not found */ - printk - ("mtl rsc inconsistency: %s: %s::%lu: %s for unknown address=0x%lX\n", - __func__, filename, line_num, memtype_free_str(memtype), addr); + printk(KERN_ERR "mtl rsc inconsistency: %s: %s::%lu: %s for unknown address=0x%lX, device=0x%lX\n", + __func__, filename, line_num, memtype_free_str(memtype), addr, dev); memtrack_spin_unlock(&obj_desc_p->hash_lock, flags); return; } +EXPORT_SYMBOL(memtrack_free); + +/* + * This function recognizes allocations which + * may be released by kernel (e.g. skb) and + * therefore not trackable by memtrack. + * The allocations are recognized by the name + * of their calling function. + */ +int is_non_trackable_alloc_func(const char *func_name) +{ + static const char * const str_str_arr[] = { + /* functions containing these strings consider non trackable */ + "skb", + }; + static const char * const str_str_excep_arr[] = { + /* functions which are exception to the str_str_arr table */ + "ipoib_cm_skb_too_long" + }; + static const char * const str_cmp_arr[] = { + /* functions that allocate SKBs */ + "mlx4_en_alloc_frags", + "mlx4_en_alloc_frag", + "mlx4_en_init_allocator", + "mlx4_en_free_frag", + "mlx4_en_free_rx_desc", + "mlx4_en_destroy_allocator", + "mlx4_en_complete_rx_desc", + /* vnic skb functions */ + "free_single_frag", + "vnic_alloc_rx_skb", + "vnic_rx_skb", + "vnic_alloc_frag", + "vnic_empty_rx_entry", + "vnic_init_allocator", + "vnic_destroy_allocator", + "sdp_post_recv", + "sdp_rx_ring_purge", + "sdp_post_srcavail", + "sk_stream_alloc_page", + "update_send_head", + "sdp_bcopy_get", + "sdp_destroy_resources", + + /* function that allocate memory for RDMA device context */ + "ib_alloc_device" + }; + size_t str_str_arr_size = sizeof(str_str_arr)/sizeof(char *); + size_t str_str_excep_size = sizeof(str_str_excep_arr)/sizeof(char *); + size_t str_cmp_arr_size = sizeof(str_cmp_arr)/sizeof(char *); + + int i, j; + + for (i = 0; i < str_str_arr_size; ++i) + if (strstr(func_name, str_str_arr[i])) { + for (j = 0; j < str_str_excep_size; ++j) + if (!strcmp(func_name, str_str_excep_arr[j])) + return 0; + return 1; + } + for (i = 0; i < str_cmp_arr_size; ++i) + if (!strcmp(func_name, str_cmp_arr[i])) + return 1; + return 0; +} +EXPORT_SYMBOL(is_non_trackable_alloc_func); + +/* + * In some cases we need to free a memory + * we defined as "non trackable" (see + * is_non_trackable_alloc_func). + * This function recognizes such releases + * by the name of their calling function. + */ +int is_non_trackable_free_func(const char *func_name) +{ + + static const char * const str_cmp_arr[] = { + /* function that deallocate memory for RDMA device context */ + "ib_dealloc_device" + }; + size_t str_cmp_arr_size = sizeof(str_cmp_arr)/sizeof(char *); + + int i; + + for (i = 0; i < str_cmp_arr_size; ++i) + if (!strcmp(func_name, str_cmp_arr[i])) + return 1; + return 0; +} +EXPORT_SYMBOL(is_non_trackable_free_func); + + +/* WA - In this function handles confirm + the the function name is + '__ib_umem_release' or 'ib_umem_get' + In this case we won't track the + memory there because the kernel + was the one who allocated it. + Return value: + 1 - if the function name is match, else 0 */ +int is_umem_put_page(const char *func_name) +{ + const char func_str[18] = "__ib_umem_release"; + /* In case of error flow put_page is called as part of ib_umem_get */ + const char func_str1[12] = "ib_umem_get"; + + return ((strstr(func_name, func_str) != NULL) || + (strstr(func_name, func_str1) != NULL)) ? 1 : 0; +} +EXPORT_SYMBOL(is_umem_put_page); + +/* Check page order size + When Freeing a page allocation it checks whether + we are trying to free the same size + we asked to allocate */ +int memtrack_check_size(enum memtrack_memtype_t memtype, unsigned long addr, + unsigned long size, const char *filename, + const unsigned long line_num) +{ + unsigned long hash_val; + struct memtrack_meminfo_t *cur_mem_info_p; + struct tracked_obj_desc_t *obj_desc_p; + unsigned long flags; + int ret = 0; + + if (memtype >= MEMTRACK_NUM_OF_MEMTYPES) { + printk(KERN_ERR "%s: Invalid memory type (%d)\n", __func__, memtype); + return 1; + } + + if (!tracked_objs_arr[memtype]) { + /* object is not tracked */ + return 1; + } + obj_desc_p = tracked_objs_arr[memtype]; + + hash_val = addr % MEMTRACK_HASH_SZ; + + memtrack_spin_lock(&obj_desc_p->hash_lock, flags); + /* find mem_info of given memory location */ + cur_mem_info_p = obj_desc_p->mem_hash[hash_val]; + while (cur_mem_info_p != NULL) { + if (cur_mem_info_p->addr == addr) { + /* Found given address in the database - check size */ + if (cur_mem_info_p->size != size) { + printk(KERN_ERR "mtl size inconsistency: %s: %s::%lu: try to %s at address=0x%lX with size %lu while was created with size %lu\n", + __func__, filename, line_num, memtype_free_str(memtype), + addr, size, cur_mem_info_p->size); + snprintf(cur_mem_info_p->ext_info, sizeof(cur_mem_info_p->ext_info), + "invalid free size %lu\n", size); + ret = 1; + } + memtrack_spin_unlock(&obj_desc_p->hash_lock, flags); + return ret; + } + cur_mem_info_p = cur_mem_info_p->next; + } + + /* not found - This function will not give any indication + but will only check the correct size\order + For inconsistency the 'free' function will check that */ + memtrack_spin_unlock(&obj_desc_p->hash_lock, flags); + return 1; +} +EXPORT_SYMBOL(memtrack_check_size); + +/* Search for a specific addr whether it exist in the + current data-base. + It will print an error msg if we get an unexpected result, + Return value: 0 - if addr exist, else 1 */ +int memtrack_is_new_addr(enum memtrack_memtype_t memtype, unsigned long addr, int expect_exist, + const char *filename, const unsigned long line_num) +{ + unsigned long hash_val; + struct memtrack_meminfo_t *cur_mem_info_p; + struct tracked_obj_desc_t *obj_desc_p; + unsigned long flags; + + if (memtype >= MEMTRACK_NUM_OF_MEMTYPES) { + printk(KERN_ERR "%s: Invalid memory type (%d)\n", __func__, memtype); + return 1; + } + + if (!tracked_objs_arr[memtype]) { + /* object is not tracked */ + return 0; + } + obj_desc_p = tracked_objs_arr[memtype]; + + hash_val = addr % MEMTRACK_HASH_SZ; + + memtrack_spin_lock(&obj_desc_p->hash_lock, flags); + /* find mem_info of given memory location */ + cur_mem_info_p = obj_desc_p->mem_hash[hash_val]; + while (cur_mem_info_p != NULL) { + if (cur_mem_info_p->addr == addr) { + /* Found given address in the database - exiting */ + memtrack_spin_unlock(&obj_desc_p->hash_lock, flags); + return 0; + } + cur_mem_info_p = cur_mem_info_p->next; + } + + /* not found */ + if (expect_exist) + printk(KERN_ERR "mtl rsc inconsistency: %s: %s::%lu: %s for unknown address=0x%lX\n", + __func__, filename, line_num, memtype_free_str(memtype), addr); + + memtrack_spin_unlock(&obj_desc_p->hash_lock, flags); + return 1; +} +EXPORT_SYMBOL(memtrack_is_new_addr); + +/* Return current page reference counter */ +int memtrack_get_page_ref_count(unsigned long addr) +{ + unsigned long hash_val; + struct memtrack_meminfo_t *cur_mem_info_p; + struct tracked_obj_desc_t *obj_desc_p; + unsigned long flags; + /* This function is called only for page allocation */ + enum memtrack_memtype_t memtype = MEMTRACK_PAGE_ALLOC; + int ref_conut = 0; + + if (!tracked_objs_arr[memtype]) { + /* object is not tracked */ + return ref_conut; + } + obj_desc_p = tracked_objs_arr[memtype]; + + hash_val = addr % MEMTRACK_HASH_SZ; + + memtrack_spin_lock(&obj_desc_p->hash_lock, flags); + /* find mem_info of given memory location */ + cur_mem_info_p = obj_desc_p->mem_hash[hash_val]; + while (cur_mem_info_p != NULL) { + if (cur_mem_info_p->addr == addr) { + /* Found given address in the database - check ref-count */ + struct page *page = (struct page *)(cur_mem_info_p->addr); + ref_conut = atomic_read(&page->_count); + memtrack_spin_unlock(&obj_desc_p->hash_lock, flags); + return ref_conut; + } + cur_mem_info_p = cur_mem_info_p->next; + } + + /* not found */ + memtrack_spin_unlock(&obj_desc_p->hash_lock, flags); + return ref_conut; +} +EXPORT_SYMBOL(memtrack_get_page_ref_count); /* Report current allocations status (for all memory types) */ static void memtrack_report(void) { - memtrack_memtype_t memtype; + enum memtrack_memtype_t memtype; unsigned long cur_bucket; - memtrack_meminfo_t *cur_mem_info_p; + struct memtrack_meminfo_t *cur_mem_info_p; int serial = 1; - tracked_obj_desc_t *obj_desc_p; + struct tracked_obj_desc_t *obj_desc_p; unsigned long flags; + unsigned long detected_leaks = 0; - printk("%s: Currently known allocations:\n", __func__); + printk(KERN_INFO "%s: Currently known allocations:\n", __func__); for (memtype = 0; memtype < MEMTRACK_NUM_OF_MEMTYPES; memtype++) { if (tracked_objs_arr[memtype]) { - printk("%d) %s:\n", serial, memtype_alloc_str(memtype)); + printk(KERN_INFO "%d) %s:\n", serial, memtype_alloc_str(memtype)); obj_desc_p = tracked_objs_arr[memtype]; /* Scan all buckets to find existing allocations */ /* TBD: this may be optimized by holding a linked list of all hash items */ - for (cur_bucket = 0; cur_bucket < MEMTRACK_HASH_SZ; - cur_bucket++) { + for (cur_bucket = 0; cur_bucket < MEMTRACK_HASH_SZ; cur_bucket++) { memtrack_spin_lock(&obj_desc_p->hash_lock, flags); /* protect per bucket/list */ - cur_mem_info_p = - obj_desc_p->mem_hash[cur_bucket]; + cur_mem_info_p = obj_desc_p->mem_hash[cur_bucket]; while (cur_mem_info_p != NULL) { /* scan bucket */ - printk("%s::%lu: %s(%lu)==%lX\n", + printk(KERN_INFO "%s::%lu: %s(%lu)==%lX dev=%lX %s\n", cur_mem_info_p->filename, cur_mem_info_p->line_num, memtype_alloc_str(memtype), cur_mem_info_p->size, - cur_mem_info_p->addr); + cur_mem_info_p->addr, + cur_mem_info_p->dev, + cur_mem_info_p->ext_info); cur_mem_info_p = cur_mem_info_p->next; + ++ detected_leaks; } /* while cur_mem_info_p */ memtrack_spin_unlock(&obj_desc_p->hash_lock, flags); } /* for cur_bucket */ serial++; } } /* for memtype */ + printk(KERN_INFO "%s: Summary: %lu leak(s) detected\n", __func__, detected_leaks); } static struct proc_dir_entry *memtrack_tree; -static memtrack_memtype_t get_rsc_by_name(const char *name) +static enum memtrack_memtype_t get_rsc_by_name(const char *name) { - memtrack_memtype_t i; + enum memtrack_memtype_t i; - for (i=0; i<MEMTRACK_NUM_OF_MEMTYPES; ++i) { - if (strcmp(name, rsc_names[i]) == 0) { + for (i = 0; i < MEMTRACK_NUM_OF_MEMTYPES; ++i) { + if (strcmp(name, rsc_names[i]) == 0) return i; } - } return i; } @@ -375,44 +738,41 @@ static ssize_t memtrack_read(struct file *filp, static int file_len; int _read, to_ret, left; const char *fname; - memtrack_memtype_t memtype; + enum memtrack_memtype_t memtype; if (pos < 0) return -EINVAL; - fname= filp->f_dentry->d_name.name; + fname = filp->f_dentry->d_name.name; - memtype= get_rsc_by_name(fname); + memtype = get_rsc_by_name(fname); if (memtype >= MEMTRACK_NUM_OF_MEMTYPES) { - printk("invalid file name\n"); + printk(KERN_ERR "invalid file name\n"); return -EINVAL; } - if ( pos == 0 ) { + if (pos == 0) { memtrack_spin_lock(&tracked_objs_arr[memtype]->hash_lock, flags); - cur= tracked_objs_arr[memtype]->count; + cur = tracked_objs_arr[memtype]->count; memtrack_spin_unlock(&tracked_objs_arr[memtype]->hash_lock, flags); _read = sprintf(kbuf, "%lu\n", cur); - if ( _read < 0 ) { + if (_read < 0) return _read; - } - else { + else file_len = _read; } - } left = file_len - pos; to_ret = (left < size) ? left : size; - if ( copy_to_user(buf, kbuf+pos, to_ret) ) { + if (copy_to_user(buf, kbuf+pos, to_ret)) return -EFAULT; - } else { *offset = pos + to_ret; return to_ret; } } -static struct file_operations memtrack_proc_fops = { +static const struct file_operations memtrack_proc_fops = { .read = memtrack_read, }; @@ -426,30 +786,28 @@ static int create_procfs_tree(void) unsigned long bit_mask; dir_ent = proc_mkdir(memtrack_proc_entry_name, NULL); - if ( !dir_ent ) { + if (!dir_ent) return -1; - } memtrack_tree = dir_ent; - for (i=0, bit_mask=1; i<MEMTRACK_NUM_OF_MEMTYPES; ++i, bit_mask<<=1) { + for (i = 0, bit_mask = 1; i < MEMTRACK_NUM_OF_MEMTYPES; ++i, bit_mask <<= 1) { if (bit_mask & track_mask) { proc_ent = create_proc_entry(rsc_names[i], S_IRUGO, memtrack_tree); - if ( !proc_ent ) + if (!proc_ent) goto undo_create_root; - proc_ent->proc_fops = &memtrack_proc_fops; + proc_ent->proc_fops = &memtrack_proc_fops; } } goto exit_ok; undo_create_root: - for (j=0, bit_mask=1; j<i; ++j, bit_mask<<=1) { - if (bit_mask & track_mask) { + for (j = 0, bit_mask = 1; j < i; ++j, bit_mask <<= 1) { + if (bit_mask & track_mask) remove_proc_entry(rsc_names[j], memtrack_tree); } - } remove_proc_entry(memtrack_proc_entry_name, NULL); return -1; @@ -463,30 +821,48 @@ static void destroy_procfs_tree(void) int i; unsigned long bit_mask; - for (i=0, bit_mask=1; i<MEMTRACK_NUM_OF_MEMTYPES; ++i, bit_mask<<=1) { - if (bit_mask & track_mask) { + for (i = 0, bit_mask = 1; i < MEMTRACK_NUM_OF_MEMTYPES; ++i, bit_mask <<= 1) { + if (bit_mask & track_mask) remove_proc_entry(rsc_names[i], memtrack_tree); - } + } remove_proc_entry(memtrack_proc_entry_name, NULL); } +int memtrack_inject_error(void) +{ + int val = 0; + + if (inject_freq) { + if (!(random32() % inject_freq)) + val = 1; + } + + return val; +} +EXPORT_SYMBOL(memtrack_inject_error); + +int memtrack_randomize_mem(void) +{ + return random_mem; +} +EXPORT_SYMBOL(memtrack_randomize_mem); /* module entry points */ int init_module(void) { - memtrack_memtype_t i; + enum memtrack_memtype_t i; int j; unsigned long bit_mask; /* create a cache for the memtrack_meminfo_t strcutures */ meminfo_cache = kmem_cache_create("memtrack_meminfo_t", - sizeof(memtrack_meminfo_t), 0, + sizeof(struct memtrack_meminfo_t), 0, SLAB_HWCACHE_ALIGN, NULL); if (!meminfo_cache) { - printk("memtrack::%s: failed to allocate meminfo cache\n", __func__); + printk(KERN_ERR "memtrack::%s: failed to allocate meminfo cache\n", __func__); return -1; } @@ -494,49 +870,43 @@ int init_module(void) memset(tracked_objs_arr, 0, sizeof(tracked_objs_arr)); /* create a tracking object descriptor for all required objects */ - for (i = 0, bit_mask = 1; i < MEMTRACK_NUM_OF_MEMTYPES; - ++i, bit_mask <<= 1) { + for (i = 0, bit_mask = 1; i < MEMTRACK_NUM_OF_MEMTYPES; ++i, bit_mask <<= 1) { if (bit_mask & track_mask) { - tracked_objs_arr[i] = - vmalloc(sizeof(tracked_obj_desc_t)); + tracked_objs_arr[i] = vmalloc(sizeof(struct tracked_obj_desc_t)); if (!tracked_objs_arr[i]) { - printk("memtrack: failed to allocate tracking object\n"); + printk(KERN_ERR "memtrack: failed to allocate tracking object\n"); goto undo_cache_create; } - memset(tracked_objs_arr[i], 0, sizeof(tracked_obj_desc_t)); + memset(tracked_objs_arr[i], 0, sizeof(struct tracked_obj_desc_t)); spin_lock_init(&tracked_objs_arr[i]->hash_lock); INIT_LIST_HEAD(&tracked_objs_arr[i]->tracked_objs_head); - if (bit_mask & strict_track_mask) { + if (bit_mask & strict_track_mask) tracked_objs_arr[i]->strict_track = 1; - } else { + else tracked_objs_arr[i]->strict_track = 0; } } - } - if ( create_procfs_tree() ) { - printk("%s: create_procfs_tree() failed\n", __FILE__); + if (create_procfs_tree()) { + printk(KERN_ERR "%s: create_procfs_tree() failed\n", __FILE__); goto undo_cache_create; } - - printk("memtrack::%s done.\n", __func__); + printk(KERN_INFO "memtrack::%s done.\n", __func__); return 0; undo_cache_create: - for (j=0; j<i; ++j) { - if (tracked_objs_arr[j]) { + for (j = 0; j < i; ++j) { + if (tracked_objs_arr[j]) vfree(tracked_objs_arr[j]); } - } -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) - if (kmem_cache_destroy(meminfo_cache) != 0) { - printk("Failed on kmem_cache_destroy !\n"); - } +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 19) + if (kmem_cache_destroy(meminfo_cache) != 0) + printk(KERN_ERR "Failed on kmem_cache_destroy!\n"); #else kmem_cache_destroy(meminfo_cache); #endif @@ -546,10 +916,10 @@ undo_cache_create: void cleanup_module(void) { - memtrack_memtype_t memtype; + enum memtrack_memtype_t memtype; unsigned long cur_bucket; - memtrack_meminfo_t *cur_mem_info_p, *next_mem_info_p; - tracked_obj_desc_t *obj_desc_p; + struct memtrack_meminfo_t *cur_mem_info_p, *next_mem_info_p; + struct tracked_obj_desc_t *obj_desc_p; unsigned long flags; @@ -564,15 +934,12 @@ void cleanup_module(void) /* TBD: this may be optimized by holding a linked list of all hash items */ if (tracked_objs_arr[memtype]) { obj_desc_p = tracked_objs_arr[memtype]; - for (cur_bucket = 0; cur_bucket < MEMTRACK_HASH_SZ; - cur_bucket++) { + for (cur_bucket = 0; cur_bucket < MEMTRACK_HASH_SZ; cur_bucket++) { memtrack_spin_lock(&obj_desc_p->hash_lock, flags); /* protect per bucket/list */ - cur_mem_info_p = - obj_desc_p->mem_hash[cur_bucket]; + cur_mem_info_p = obj_desc_p->mem_hash[cur_bucket]; while (cur_mem_info_p != NULL) { /* scan bucket */ next_mem_info_p = cur_mem_info_p->next; /* save "next" pointer before the "free" */ - kmem_cache_free(meminfo_cache, - cur_mem_info_p); + kmem_cache_free(meminfo_cache, cur_mem_info_p); cur_mem_info_p = next_mem_info_p; } /* while cur_mem_info_p */ memtrack_spin_unlock(&obj_desc_p->hash_lock, flags); @@ -581,20 +948,11 @@ void cleanup_module(void) } } /* for memtype */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) - if (kmem_cache_destroy(meminfo_cache) != 0) { - printk - ("memtrack::cleanup_module: Failed on kmem_cache_destroy !\n"); - } +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 19) + if (kmem_cache_destroy(meminfo_cache) != 0) + printk(KERN_ERR "memtrack::cleanup_module: Failed on kmem_cache_destroy!\n"); #else kmem_cache_destroy(meminfo_cache); #endif - printk("memtrack::cleanup_module done.\n"); + printk(KERN_INFO "memtrack::cleanup_module done.\n"); } - -EXPORT_SYMBOL(memtrack_alloc); -EXPORT_SYMBOL(memtrack_free); - -//module_init(memtrack_init) -//module_exit(memtrack_exit) - diff --git a/sys/ofed/drivers/infiniband/debug/memtrack.h b/sys/ofed/drivers/infiniband/debug/memtrack.h index e443a31..76265ae 100644 --- a/sys/ofed/drivers/infiniband/debug/memtrack.h +++ b/sys/ofed/drivers/infiniband/debug/memtrack.h @@ -22,24 +22,85 @@ #ifndef H_MEMTRACK_H #define H_MEMTRACK_H -typedef enum { +enum memtrack_memtype_t { MEMTRACK_KMALLOC, MEMTRACK_VMALLOC, MEMTRACK_KMEM_OBJ, + MEMTRACK_IOREMAP, /* IO-RE/UN-MAP */ + MEMTRACK_WORK_QUEUE, /* Handle work-queue create & destroy */ + MEMTRACK_PAGE_ALLOC, /* Handle page allocation and free */ + MEMTRACK_DMA_MAP_SINGLE,/* Handle ib_dma_single map and unmap */ + MEMTRACK_DMA_MAP_PAGE, /* Handle ib_dma_page map and unmap */ + MEMTRACK_DMA_MAP_SG, /* Handle ib_dma_sg map and unmap with and without attributes */ MEMTRACK_NUM_OF_MEMTYPES -} memtrack_memtype_t; +}; /* Invoke on memory allocation */ -void memtrack_alloc(memtrack_memtype_t memtype, unsigned long addr, - unsigned long size, const char *filename, +void memtrack_alloc(enum memtrack_memtype_t memtype, unsigned long dev, + unsigned long addr, unsigned long size, unsigned long addr2, + int direction, const char *filename, const unsigned long line_num, int alloc_flags); /* Invoke on memory free */ -void memtrack_free(memtrack_memtype_t memtype, unsigned long addr, +void memtrack_free(enum memtrack_memtype_t memtype, unsigned long dev, + unsigned long addr, unsigned long size, int direction, const char *filename, const unsigned long line_num); +/* + * This function recognizes allocations which + * may be released by kernel (e.g. skb & vnic) and + * therefore not trackable by memtrack. + * The allocations are recognized by the name + * of their calling function. + */ +int is_non_trackable_alloc_func(const char *func_name); +/* + * In some cases we need to free a memory + * we defined as "non trackable" (see + * is_non_trackable_alloc_func). + * This function recognizes such releases + * by the name of their calling function. + */ +int is_non_trackable_free_func(const char *func_name); + +/* WA - In this function handles confirm + the the function name is + '__ib_umem_release' or 'ib_umem_get' + In this case we won't track the + memory there because the kernel + was the one who allocated it. + Return value: + 1 - if the function name is match, else 0 */ +int is_umem_put_page(const char *func_name); + +/* Check page order size + When Freeing a page allocation it checks whether + we are trying to free the same amount of pages + we ask to allocate (In log2(order)). + In case an error if found it will print + an error msg */ +int memtrack_check_size(enum memtrack_memtype_t memtype, unsigned long addr, + unsigned long size, const char *filename, + const unsigned long line_num); + +/* Search for a specific addr whether it exist in the + current data-base. + If not it will print an error msg, + Return value: 0 - if addr exist, else 1 */ +int memtrack_is_new_addr(enum memtrack_memtype_t memtype, unsigned long addr, int expect_exist, + const char *filename, const unsigned long line_num); + +/* Return current page reference counter */ +int memtrack_get_page_ref_count(unsigned long addr); + /* Report current allocations status (for all memory types) */ /* we do not export this function since it is used by cleanup_module only */ /* void memtrack_report(void); */ +/* Allow support of error injections */ +int memtrack_inject_error(void); + +/* randomize allocated memory */ +int memtrack_randomize_mem(void); + #endif diff --git a/sys/ofed/drivers/infiniband/debug/mtrack.h b/sys/ofed/drivers/infiniband/debug/mtrack.h index 337d9c3..5c0cd20 100644 --- a/sys/ofed/drivers/infiniband/debug/mtrack.h +++ b/sys/ofed/drivers/infiniband/debug/mtrack.h @@ -1,46 +1,84 @@ #ifndef __mtrack_h_ #define __mtrack_h_ -#include <memtrack.h> +#include "memtrack.h" #include <linux/slab.h> #include <linux/vmalloc.h> -#include <linux/version.h> +#include <linux/kernel.h> +#include <linux/io.h> /* For ioremap_nocache, ioremap, iounmap */ +#include <linux/random.h> +#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 27) +# include <linux/io-mapping.h> /* For ioremap_nocache, ioremap, iounmap */ +#endif +#include <linux/mm.h> /* For all page handling */ +#include <linux/workqueue.h> /* For all work-queue handling */ +#include <linux/scatterlist.h> /* For using scatterlists */ +#include <linux/skbuff.h> /* For skbufs handling */ +#include <asm/uaccess.h> /* For copy from/to user */ + +#define MEMTRACK_ERROR_INJECTION_MESSAGE(file, line, func) ({ \ + printk(KERN_ERR "%s failure injected at %s:%d\n", func, file, line); \ + dump_stack(); \ +}) -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 14) #define RDMA_KZALLOC_H #define kzalloc(size, flags) ({ \ - void *__memtrack_kz_addr; \ + void *__memtrack_kz_addr = NULL; \ \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kzalloc");\ + else \ __memtrack_kz_addr = kmalloc(size, flags); \ - if ( __memtrack_kz_addr ) { \ - memset( __memtrack_kz_addr, 0, size) ; \ + if (__memtrack_kz_addr && !is_non_trackable_alloc_func(__func__)) { \ + memset(__memtrack_kz_addr, 0, size); \ } \ __memtrack_kz_addr; \ }) #else #define kzalloc(size, flags) ({ \ - void *__memtrack_addr; \ + void *__memtrack_addr = NULL; \ \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kzalloc");\ + else \ __memtrack_addr = kzalloc(size, flags); \ - if ( __memtrack_addr && (size)) { \ - memtrack_alloc(MEMTRACK_KMALLOC, (unsigned long)(__memtrack_addr), size, __FILE__, __LINE__, flags); \ + if (__memtrack_addr && !is_non_trackable_alloc_func(__func__)) { \ + memtrack_alloc(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, flags); \ } \ __memtrack_addr; \ }) #endif -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) +#define kzalloc_node(size, flags, node) ({ \ + void *__memtrack_addr = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kzalloc_node"); \ + else \ + __memtrack_addr = kzalloc_node(size, flags, node); \ + if (__memtrack_addr && (size) && \ + !is_non_trackable_alloc_func(__func__)) { \ + memtrack_alloc(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, flags); \ + } \ + __memtrack_addr; \ +}) + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 19) #define kcalloc(n, size, flags) kzalloc((n)*(size), flags) #else #define kcalloc(n, size, flags) ({ \ - void *__memtrack_addr; \ + void *__memtrack_addr = NULL; \ \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kcalloc");\ + else \ __memtrack_addr = kcalloc(n, size, flags); \ - if ( __memtrack_addr && (size)) { \ - memtrack_alloc(MEMTRACK_KMALLOC, (unsigned long)(__memtrack_addr), (n)*(size), __FILE__, __LINE__, flags); \ + if (__memtrack_addr && (size)) { \ + memtrack_alloc(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), (n)*(size), 0UL, 0, __FILE__, __LINE__, flags); \ } \ __memtrack_addr; \ }) @@ -50,76 +88,208 @@ #ifdef ZERO_OR_NULL_PTR #define kmalloc(sz, flgs) ({ \ - void *__memtrack_addr; \ + void *__memtrack_addr = NULL; \ \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kmalloc");\ + else \ __memtrack_addr = kmalloc(sz, flgs); \ - if ( !ZERO_OR_NULL_PTR(__memtrack_addr)) { \ - memtrack_alloc(MEMTRACK_KMALLOC, (unsigned long)(__memtrack_addr), sz, __FILE__, __LINE__, flgs); \ + if (!ZERO_OR_NULL_PTR(__memtrack_addr)) { \ + memtrack_alloc(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), sz, 0UL, 0, __FILE__, __LINE__, flgs); \ + if (memtrack_randomize_mem()) \ + get_random_bytes(__memtrack_addr, sz); \ } \ __memtrack_addr; \ }) #else #define kmalloc(sz, flgs) ({ \ - void *__memtrack_addr; \ + void *__memtrack_addr = NULL; \ \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kmalloc");\ + else \ __memtrack_addr = kmalloc(sz, flgs); \ - if ( __memtrack_addr ) { \ - memtrack_alloc(MEMTRACK_KMALLOC, (unsigned long)(__memtrack_addr), sz, __FILE__, __LINE__, flgs); \ + if (__memtrack_addr) { \ + memtrack_alloc(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), sz, 0UL, 0, __FILE__, __LINE__, flgs); \ + if (memtrack_randomize_mem()) \ + get_random_bytes(__memtrack_addr, sz); \ } \ __memtrack_addr; \ }) #endif +#define kmalloc_node(sz, flgs, node) ({ \ + void *__memtrack_addr = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kmalloc_node"); \ + else \ + __memtrack_addr = kmalloc_node(sz, flgs, node); \ + if (__memtrack_addr) { \ + memtrack_alloc(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), sz, 0UL, 0, __FILE__, __LINE__, flgs); \ + if (memtrack_randomize_mem() && ((flgs) == GFP_KERNEL)) \ + get_random_bytes(__memtrack_addr, sz); \ + } \ + __memtrack_addr; \ +}) + +#ifdef ZERO_OR_NULL_PTR +#define kmemdup(src, sz, flgs) ({ \ + void *__memtrack_addr = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kmemdup");\ + else \ + __memtrack_addr = kmemdup(src, sz, flgs); \ + if (!ZERO_OR_NULL_PTR(__memtrack_addr)) { \ + memtrack_alloc(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), sz, 0UL, 0, __FILE__, __LINE__, flgs); \ + } \ + __memtrack_addr; \ +}) +#else +#define kmemdup(src, sz, flgs) ({ \ + void *__memtrack_addr = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kmemdup");\ + else \ + __memtrack_addr = kmemdup(src, sz, flgs); \ + if (__memtrack_addr) { \ + memtrack_alloc(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), sz, 0UL, 0, __FILE__, __LINE__, flgs); \ + } \ + __memtrack_addr; \ +}) +#endif + #ifdef ZERO_OR_NULL_PTR #define kfree(addr) ({ \ void *__memtrack_addr = (void *)addr; \ - if ( !ZERO_OR_NULL_PTR(__memtrack_addr) ) { \ - memtrack_free(MEMTRACK_KMALLOC, (unsigned long)(__memtrack_addr), __FILE__, __LINE__); \ + \ + if (!ZERO_OR_NULL_PTR(__memtrack_addr) && \ + !is_non_trackable_free_func(__func__)) { \ + memtrack_free(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \ } \ kfree(__memtrack_addr); \ }) #else #define kfree(addr) ({ \ void *__memtrack_addr = (void *)addr; \ - if ( __memtrack_addr ) { \ - memtrack_free(MEMTRACK_KMALLOC, (unsigned long)(__memtrack_addr), __FILE__, __LINE__); \ + \ + if (__memtrack_addr && !is_non_trackable_free_func(__func__)) { \ + memtrack_free(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \ } \ kfree(__memtrack_addr); \ }) #endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 0) || defined (CONFIG_COMPAT_RCU) +#ifdef kfree_rcu + #undef kfree_rcu +#endif - - - +#ifdef ZERO_OR_NULL_PTR +#define kfree_rcu(addr, rcu_head) ({ \ + void *__memtrack_addr = (void *)addr; \ + \ + if (!ZERO_OR_NULL_PTR(__memtrack_addr) && \ + !is_non_trackable_free_func(__func__)) { \ + memtrack_free(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \ + } \ + __kfree_rcu(&((addr)->rcu_head), offsetof(typeof(*(addr)), rcu_head)); \ +}) +#else +#define kfree_rcu(addr, rcu_head) ({ \ + void *__memtrack_addr = (void *)addr; \ + \ + if (__memtrack_addr && !is_non_trackable_free_func(__func__)) { \ + memtrack_free(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \ + } \ + __kfree_rcu(&((addr)->rcu_head), offsetof(typeof(*(addr)), rcu_head)); \ +}) +#endif +#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0) */ #define vmalloc(size) ({ \ - void *__memtrack_addr; \ + void *__memtrack_addr = NULL; \ \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "vmalloc");\ + else \ __memtrack_addr = vmalloc(size); \ - if ( __memtrack_addr ) { \ - memtrack_alloc(MEMTRACK_VMALLOC, (unsigned long)(__memtrack_addr), size, __FILE__, __LINE__, GFP_ATOMIC); \ + if (__memtrack_addr) { \ + memtrack_alloc(MEMTRACK_VMALLOC, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \ + if (memtrack_randomize_mem()) \ + get_random_bytes(__memtrack_addr, size); \ + } \ + __memtrack_addr; \ +}) + +#ifndef vzalloc +#define vzalloc(size) ({ \ + void *__memtrack_addr = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "vzalloc");\ + else \ + __memtrack_addr = vzalloc(size); \ + if (__memtrack_addr) { \ + memtrack_alloc(MEMTRACK_VMALLOC, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \ } \ __memtrack_addr; \ }) +#endif + +#ifndef vzalloc_node +#define vzalloc_node(size, node) ({ \ + void *__memtrack_addr = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "vzalloc_node"); \ + else \ + __memtrack_addr = vzalloc_node(size, node); \ + if (__memtrack_addr) { \ + memtrack_alloc(MEMTRACK_VMALLOC, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \ + if (memtrack_randomize_mem()) \ + get_random_bytes(__memtrack_addr, size); \ + } \ + __memtrack_addr; \ +}) +#endif +#define vmalloc_node(size, node) ({ \ + void *__memtrack_addr = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "vmalloc_node"); \ + else \ + __memtrack_addr = vmalloc_node(size, node); \ + if (__memtrack_addr) { \ + memtrack_alloc(MEMTRACK_VMALLOC, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \ + if (memtrack_randomize_mem()) \ + get_random_bytes(__memtrack_addr, size); \ + } \ + __memtrack_addr; \ +}) #define vfree(addr) ({ \ void *__memtrack_addr = (void *)addr; \ - if ( __memtrack_addr ) { \ - memtrack_free(MEMTRACK_VMALLOC, (unsigned long)(__memtrack_addr), __FILE__, __LINE__); \ + if (__memtrack_addr) { \ + memtrack_free(MEMTRACK_VMALLOC, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \ } \ vfree(__memtrack_addr); \ }) #define kmem_cache_alloc(cache, flags) ({ \ - void *__memtrack_addr; \ + void *__memtrack_addr = NULL; \ \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kmem_cache_alloc"); \ + else \ __memtrack_addr = kmem_cache_alloc(cache, flags); \ - if ( __memtrack_addr ) { \ - memtrack_alloc(MEMTRACK_KMEM_OBJ, (unsigned long)(__memtrack_addr), 1, __FILE__, __LINE__, flags); \ + if (__memtrack_addr) { \ + memtrack_alloc(MEMTRACK_KMEM_OBJ, 0UL, (unsigned long)(__memtrack_addr), 1, 0UL, 0, __FILE__, __LINE__, flags); \ } \ __memtrack_addr; \ }) @@ -127,12 +297,548 @@ #define kmem_cache_free(cache, addr) ({ \ void *__memtrack_addr = (void *)addr; \ - if ( __memtrack_addr ) { \ - memtrack_free(MEMTRACK_KMEM_OBJ, (unsigned long)(__memtrack_addr), __FILE__, __LINE__); \ + \ + if (__memtrack_addr) { \ + memtrack_free(MEMTRACK_KMEM_OBJ, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \ } \ kmem_cache_free(cache, __memtrack_addr); \ }) +/* All IO-MAP handling */ +#define ioremap(phys_addr, size) ({ \ + void __iomem *__memtrack_addr = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "ioremap");\ + else \ + __memtrack_addr = ioremap(phys_addr, size); \ + if (__memtrack_addr) { \ + memtrack_alloc(MEMTRACK_IOREMAP, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \ + } \ + __memtrack_addr; \ +}) + +#define io_mapping_create_wc(base, size) ({ \ + void __iomem *__memtrack_addr = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "io_mapping_create_wc"); \ + else \ + __memtrack_addr = io_mapping_create_wc(base, size); \ + if (__memtrack_addr) { \ + memtrack_alloc(MEMTRACK_IOREMAP, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \ + } \ + __memtrack_addr; \ +}) + +#define io_mapping_free(addr) ({ \ + void *__memtrack_addr = (void *)addr; \ + \ + if (__memtrack_addr) { \ + memtrack_free(MEMTRACK_IOREMAP, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \ + } \ + io_mapping_free(__memtrack_addr); \ +}) + +#ifdef CONFIG_PPC +#ifdef ioremap_nocache + #undef ioremap_nocache +#endif +#define ioremap_nocache(phys_addr, size) ({ \ + void __iomem *__memtrack_addr = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "ioremap_nocache"); \ + else \ + __memtrack_addr = ioremap(phys_addr, size); \ + if (__memtrack_addr) { \ + memtrack_alloc(MEMTRACK_IOREMAP, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \ + } \ + __memtrack_addr; \ +}) +#else +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 18) /* 2.6.16 - 2.6.17 */ +#ifdef ioremap_nocache + #undef ioremap_nocache +#endif +#define ioremap_nocache(phys_addr, size) ({ \ + void __iomem *__memtrack_addr = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "ioremap_nocache"); \ + else \ + __memtrack_addr = ioremap(phys_addr, size); \ + if (__memtrack_addr) { \ + memtrack_alloc(MEMTRACK_IOREMAP, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \ + } \ + __memtrack_addr; \ +}) +#else +#define ioremap_nocache(phys_addr, size) ({ \ + void __iomem *__memtrack_addr = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "ioremap_nocache"); \ + else \ + __memtrack_addr = ioremap_nocache(phys_addr, size); \ + if (__memtrack_addr) { \ + memtrack_alloc(MEMTRACK_IOREMAP, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \ + } \ + __memtrack_addr; \ +}) +#endif /* Kernel version is under 2.6.18 */ +#endif /* PPC */ + +#define iounmap(addr) ({ \ + void *__memtrack_addr = (void *)addr; \ + \ + if (__memtrack_addr) { \ + memtrack_free(MEMTRACK_IOREMAP, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \ + } \ + iounmap(__memtrack_addr); \ +}) + + +/* All Page handlers */ +/* TODO: Catch netif_rx for page dereference */ +#define alloc_pages_node(nid, gfp_mask, order) ({ \ + struct page *page_addr = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "alloc_pages_node"); \ + else \ + page_addr = (struct page *)alloc_pages_node(nid, gfp_mask, order); \ + if (page_addr && !is_non_trackable_alloc_func(__func__)) { \ + memtrack_alloc(MEMTRACK_PAGE_ALLOC, 0UL, (unsigned long)(page_addr), (unsigned long)(order), 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \ + } \ + page_addr; \ +}) + +#ifdef CONFIG_NUMA +#define alloc_pages(gfp_mask, order) ({ \ + struct page *page_addr = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "alloc_pages"); \ + else \ + page_addr = (struct page *)alloc_pages(gfp_mask, order); \ + if (page_addr && !is_non_trackable_alloc_func(__func__)) { \ + memtrack_alloc(MEMTRACK_PAGE_ALLOC, 0UL, (unsigned long)(page_addr), (unsigned long)(order), 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \ + } \ + page_addr; \ +}) +#else +#ifdef alloc_pages + #undef alloc_pages +#endif +#define alloc_pages(gfp_mask, order) ({ \ + struct page *page_addr; \ + \ + page_addr = (struct page *)alloc_pages_node(numa_node_id(), gfp_mask, order); \ + page_addr; \ +}) +#endif + +#define __get_free_pages(gfp_mask, order) ({ \ + struct page *page_addr = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "__get_free_pages"); \ + else \ + page_addr = (struct page *)__get_free_pages(gfp_mask, order); \ + if (page_addr && !is_non_trackable_alloc_func(__func__)) { \ + memtrack_alloc(MEMTRACK_PAGE_ALLOC, 0UL, (unsigned long)(page_addr), (unsigned long)(order), 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \ + } \ + page_addr; \ +}) + +#define get_zeroed_page(gfp_mask) ({ \ + struct page *page_addr = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "get_zeroed_page"); \ + else \ + page_addr = (struct page *)get_zeroed_page(gfp_mask); \ + if (page_addr && !is_non_trackable_alloc_func(__func__)) { \ + memtrack_alloc(MEMTRACK_PAGE_ALLOC, 0UL, (unsigned long)(page_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \ + } \ + (unsigned long)page_addr; \ +}) + +#define __free_pages(addr, order) ({ \ + void *__memtrack_addr = (void *)addr; \ + \ + if (__memtrack_addr && !is_non_trackable_alloc_func(__func__)) { \ + if (!memtrack_check_size(MEMTRACK_PAGE_ALLOC, (unsigned long)(__memtrack_addr), (unsigned long)(order), __FILE__, __LINE__)) \ + memtrack_free(MEMTRACK_PAGE_ALLOC, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \ + } \ + __free_pages(addr, order); \ +}) + + +#define free_pages(addr, order) ({ \ + void *__memtrack_addr = (void *)addr; \ + \ + if (__memtrack_addr && !is_non_trackable_alloc_func(__func__)) { \ + if (!memtrack_check_size(MEMTRACK_PAGE_ALLOC, (unsigned long)(__memtrack_addr), (unsigned long)(order), __FILE__, __LINE__)) \ + memtrack_free(MEMTRACK_PAGE_ALLOC, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \ + } \ + free_pages(addr, order); \ +}) + + +#define get_page(addr) ({ \ + void *__memtrack_addr = (void *)addr; \ + \ + if (__memtrack_addr && !is_non_trackable_alloc_func(__func__)) { \ + if (memtrack_is_new_addr(MEMTRACK_PAGE_ALLOC, (unsigned long)(__memtrack_addr), 0, __FILE__, __LINE__)) { \ + memtrack_alloc(MEMTRACK_PAGE_ALLOC, 0UL, (unsigned long)(__memtrack_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \ + } \ + } \ + get_page(addr); \ +}) + +#define get_user_pages_fast(start, nr_pages, write, pages) ({ \ + int __memtrack_rc = -1; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "get_user_pages_fast"); \ + else \ + __memtrack_rc = get_user_pages_fast(start, nr_pages, write, pages); \ + if (__memtrack_rc > 0 && !is_non_trackable_alloc_func(__func__)) { \ + int __memtrack_i; \ + \ + for (__memtrack_i = 0; __memtrack_i < __memtrack_rc; __memtrack_i++) \ + memtrack_alloc(MEMTRACK_PAGE_ALLOC, 0UL, (unsigned long)(pages[__memtrack_i]), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \ + } \ + __memtrack_rc; \ +}) + +#define put_page(addr) ({ \ + void *__memtrack_addr = (void *)addr; \ + \ + if (__memtrack_addr && !is_non_trackable_alloc_func(__func__)) { \ + /* Check whether this is not part of umem put page & not */\ + /* a new addr and the ref-count is 1 then we'll free this addr */\ + /* Don't change the order these conditions */ \ + if (!is_umem_put_page(__func__) && \ + !memtrack_is_new_addr(MEMTRACK_PAGE_ALLOC, (unsigned long)(__memtrack_addr), 1, __FILE__, __LINE__) && \ + (memtrack_get_page_ref_count((unsigned long)(__memtrack_addr)) == 1)) { \ + memtrack_free(MEMTRACK_PAGE_ALLOC, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \ + } \ + } \ + put_page(addr); \ +}) + + +/* Work-Queue handlers */ +#ifdef create_workqueue + #undef create_workqueue +#endif +#ifdef create_rt_workqueue + #undef create_rt_workqueue +#endif +#ifdef create_freezeable_workqueue + #undef create_freezeable_workqueue +#endif +#ifdef create_singlethread_workqueue + #undef create_singlethread_workqueue +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) /* 2.6.18 - 2.6.19 */ +#define create_workqueue(name) ({ \ + struct workqueue_struct *wq_addr = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_workqueue"); \ + else \ + wq_addr = __create_workqueue((name), 0); \ + if (wq_addr) { \ + memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \ + } \ + wq_addr; \ +}) + +#define create_singlethread_workqueue(name) ({ \ + struct workqueue_struct *wq_addr = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_singlethread_workqueue"); \ + else \ + wq_addr = __create_workqueue((name), 1); \ + if (wq_addr) { \ + memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \ + } \ + wq_addr; \ +}) + +#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28) /* 2.6.20 - 2.6.27 */ +#define create_workqueue(name) ({ \ + struct workqueue_struct *wq_addr = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_workqueue"); \ + else \ + wq_addr = __create_workqueue((name), 0, 0); \ + if (wq_addr) { \ + memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \ + } \ + wq_addr; \ +}) + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 22) /* 2.6.20 - 2.6.21 */ +#define create_freezeable_workqueue(name) ({ \ + struct workqueue_struct *wq_addr = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_freezeable_workqueue"); \ + else \ + wq_addr = __create_workqueue((name), 0, 1); \ + if (wq_addr) { \ + memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \ + } \ + wq_addr; \ +}) +#else /* 2.6.22 - 2.6.27 */ +#define create_freezeable_workqueue(name) ({ \ + struct workqueue_struct *wq_addr = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_freezeable_workqueue"); \ + else \ + wq_addr = __create_workqueue((name), 1, 1); \ + if (wq_addr) { \ + memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \ + } \ + wq_addr; \ +}) +#endif /* 2.6.20 - 2.6.27 */ + +#define create_singlethread_workqueue(name) ({ \ + struct workqueue_struct *wq_addr = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_singlethread_workqueue"); \ + else \ + wq_addr = __create_workqueue((name), 1, 0); \ + if (wq_addr) { \ + memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \ + } \ + wq_addr; \ +}) + +#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) /* 2.6.28 - 2.6.35 */ + +#ifdef alloc_workqueue + #undef alloc_workqueue +#endif + +#define alloc_workqueue(name, flags, max_active) ({ \ + struct workqueue_struct *wq_addr = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "alloc_workqueue"); \ + else \ + wq_addr = __create_workqueue((name), (flags), (max_active), 0); \ + if (wq_addr) { \ + memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \ + } \ + wq_addr; \ +}) + +#define create_workqueue(name) ({ \ + struct workqueue_struct *wq_addr = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_workqueue"); \ + else \ + wq_addr = __create_workqueue((name), 0, 0, 0); \ + if (wq_addr) { \ + memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \ + } \ + wq_addr; \ +}) + +#define create_rt_workqueue(name) ({ \ + struct workqueue_struct *wq_addr = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_rt_workqueue"); \ + else \ + wq_addr = __create_workqueue((name), 0, 0, 1); \ + if (wq_addr) { \ + memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \ + } \ + wq_addr; \ +}) + +#define create_freezeable_workqueue(name) ({ \ + struct workqueue_struct *wq_addr = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_freezeable_workqueue"); \ + else \ + wq_addr = __create_workqueue((name), 1, 1, 0); \ + if (wq_addr) { \ + memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \ + } \ + wq_addr; \ +}) + +#define create_singlethread_workqueue(name) ({ \ + struct workqueue_struct *wq_addr = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_singlethread_workqueue"); \ + else \ + wq_addr = __create_workqueue((name), 1, 0, 0); \ + if (wq_addr) { \ + memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \ + } \ + wq_addr; \ +}) +#else /* 2.6.36 */ +#ifdef alloc_workqueue + #undef alloc_workqueue +#endif +#ifdef CONFIG_LOCKDEP +#define alloc_workqueue(name, flags, max_active) \ +({ \ + static struct lock_class_key __key; \ + const char *__lock_name; \ + struct workqueue_struct *wq_addr = NULL; \ + \ + if (__builtin_constant_p(name)) \ + __lock_name = (name); \ + else \ + __lock_name = #name; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "alloc_workqueue"); \ + else \ + wq_addr = __alloc_workqueue_key((name), (flags), (max_active), \ + &__key, __lock_name); \ + if (wq_addr) { \ + memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \ + } \ + wq_addr; \ +}) +#else +#define alloc_workqueue(name, flags, max_active) ({ \ + struct workqueue_struct *wq_addr = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "alloc_workqueue"); \ + else \ + wq_addr = __alloc_workqueue_key((name), (flags), (max_active), NULL, NULL); \ + if (wq_addr) { \ + memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \ + } \ + wq_addr; \ +}) +#endif + +#define create_workqueue(name) \ + alloc_workqueue((name), WQ_RESCUER, 1); + +#define create_freezeable_workqueue(name) \ + alloc_workqueue((name), WQ_FREEZEABLE | WQ_UNBOUND | WQ_RESCUER, 1); + +#define create_singlethread_workqueue(name) \ + alloc_workqueue((name), WQ_UNBOUND | WQ_RESCUER, 1); + +#endif /* Work-Queue Kernel Versions */ + +#define destroy_workqueue(wq_addr) ({ \ + void *__memtrack_addr = (void *)wq_addr; \ + \ + if (__memtrack_addr) { \ + memtrack_free(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \ + } \ + destroy_workqueue(wq_addr); \ +}) + +/* ONLY error injection to functions that we don't monitor */ +#define alloc_skb(size, prio) ({ \ + struct sk_buff *__memtrack_skb = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "alloc_skb"); \ + else \ + __memtrack_skb = alloc_skb(size, prio); \ + __memtrack_skb; \ +}) + +#define dev_alloc_skb(size) ({ \ + struct sk_buff *__memtrack_skb = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "dev_alloc_skb"); \ + else \ + __memtrack_skb = dev_alloc_skb(size); \ + __memtrack_skb; \ +}) + +#define alloc_skb_fclone(size, prio) ({ \ + struct sk_buff *__memtrack_skb = NULL; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "alloc_skb_fclone"); \ + else \ + __memtrack_skb = alloc_skb_fclone(size, prio); \ + __memtrack_skb; \ +}) + +#define copy_from_user(to, from, n) ({ \ + int ret = n; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "copy_from_user"); \ + else \ + ret = copy_from_user(to, from, n); \ + ret; \ +}) + +#define copy_to_user(to, from, n) ({ \ + int ret = n; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "copy_to_user"); \ + else \ + ret = copy_to_user(to, from, n); \ + ret; \ +}) + +#define sysfs_create_file(kobj, attr) ({ \ + int ret = -ENOSYS; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "sysfs_create_file"); \ + else \ + ret = sysfs_create_file(kobj, attr); \ + ret; \ +}) + +#define sysfs_create_link(kobj, target, name) ({ \ + int ret = -ENOSYS; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "sysfs_create_link"); \ + else \ + ret = sysfs_create_link(kobj, target, name); \ + ret; \ +}) + +#define sysfs_create_group(kobj, grp) ({ \ + int ret = -ENOSYS; \ + \ + if (memtrack_inject_error()) \ + MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "sysfs_create_group"); \ + else \ + ret = sysfs_create_group(kobj, grp); \ + ret; \ +}) + #endif /* __mtrack_h_ */ diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/Makefile b/sys/ofed/drivers/infiniband/hw/mlx4/Makefile deleted file mode 100644 index 7b81da0..0000000 --- a/sys/ofed/drivers/infiniband/hw/mlx4/Makefile +++ /dev/null @@ -1,31 +0,0 @@ -# $FreeBSD$ -#.PATH: ${.CURDIR}/../../ofed/drivers/infiniband/hw/mlx4 -#.PATH: ${.CURDIR}/../../../../include/linux - -.include <src.opts.mk> - -KMOD = mlx4ib -SRCS = device_if.h bus_if.h pci_if.h vnode_if.h -#SRCS+= linux_compat.c linux_radix.c -SRCS+= ah.c cq.c doorbell.c mad.c main.c mr.c qp.c srq.c wc.c -SRCS+= opt_inet.h opt_inet6.h - -#CFLAGS+= -I${.CURDIR}/../../ofed/include/ -CFLAGS+= -I${.CURDIR}/../../../../include -CFLAGS+= -DCONFIG_INFINIBAND_USER_MEM - -.if !defined(KERNBUILDDIR) -.if ${MK_INET_SUPPORT} != "no" -opt_inet.h: - @echo "#define INET 1" > ${.TARGET} -.endif - -.if ${MK_INET6_SUPPORT} != "no" -opt_inet6.h: - @echo "#define INET6 1" > ${.TARGET} -.endif -.endif - -.include <bsd.kmod.mk> - -CFLAGS+= -Wno-cast-qual -Wno-pointer-arith ${GCC_MS_EXTENSIONS} diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/ah.c b/sys/ofed/drivers/infiniband/hw/mlx4/ah.c index fe35e62..1c30fa9 100644 --- a/sys/ofed/drivers/infiniband/hw/mlx4/ah.c +++ b/sys/ofed/drivers/infiniband/hw/mlx4/ah.c @@ -30,7 +30,6 @@ * SOFTWARE. */ - #include <sys/types.h> #include <sys/param.h> #include <sys/systm.h> @@ -95,21 +94,18 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr { struct mlx4_ib_dev *ibdev = to_mdev(pd->device); struct mlx4_dev *dev = ibdev->dev; - union ib_gid sgid; - u8 mac[6]; - int err; - int is_mcast; + int is_mcast = 0; + struct in6_addr in6; u16 vlan_tag; - err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast, ah_attr->port_num); - if (err) - return ERR_PTR(err); - - memcpy(ah->av.eth.mac, mac, 6); - err = ib_get_cached_gid(pd->device, ah_attr->port_num, ah_attr->grh.sgid_index, &sgid); - if (err) - return ERR_PTR(err); - vlan_tag = rdma_get_vlan_id(&sgid); + memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6)); + if (rdma_is_multicast_addr(&in6)) { + is_mcast = 1; + resolve_mcast_mac(&in6, ah->av.eth.mac); + } else { + memcpy(ah->av.eth.mac, ah_attr->dmac, 6); + } + vlan_tag = ah_attr->vlan_id; if (vlan_tag < 0x1000) vlan_tag |= (ah_attr->sl & 7) << 13; ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/alias_GUID.c b/sys/ofed/drivers/infiniband/hw/mlx4/alias_GUID.c index 0738adc..17e646a 100644 --- a/sys/ofed/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/sys/ofed/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -57,6 +57,7 @@ struct mlx4_alias_guid_work_context { int query_id; struct list_head list; int block_num; + u8 method; }; struct mlx4_next_alias_guid_work { @@ -80,7 +81,8 @@ void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num, guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid. ports_guid[port_num - 1]. all_rec_per_port[block_num].guid_indexes); - pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, (long long)guid_indexes); + pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, + (unsigned long long)guid_indexes); for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) { /* The location of the specific index starts from bit number 4 @@ -144,7 +146,8 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev, guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid. ports_guid[port_num - 1]. all_rec_per_port[block_num].guid_indexes); - pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, (long long)guid_indexes); + pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, + (unsigned long long)guid_indexes); /*calculate the slaves and notify them*/ for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) { @@ -201,7 +204,7 @@ static void aliasguid_query_handler(int status, { struct mlx4_ib_dev *dev; struct mlx4_alias_guid_work_context *cb_ctx = context; - u8 port_index ; + u8 port_index; int i; struct mlx4_sriov_alias_guid_info_rec_det *rec; unsigned long flags, flags1; @@ -240,6 +243,18 @@ static void aliasguid_query_handler(int status, for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) { __be64 tmp_cur_ag; tmp_cur_ag = *(__be64 *)&guid_rec->guid_info_list[i * GUID_REC_SIZE]; + if ((cb_ctx->method == MLX4_GUID_INFO_RECORD_DELETE) + && (MLX4_NOT_SET_GUID == tmp_cur_ag)) { + pr_debug("%s:Record num %d in block_num:%d " + "was deleted by SM,ownership by %d " + "(0 = driver, 1=sysAdmin, 2=None)\n", + __func__, i, guid_rec->block_num, + rec->ownership); + rec->guid_indexes = rec->guid_indexes & + ~mlx4_ib_get_aguid_comp_mask_from_ix(i); + continue; + } + /* check if the SM didn't assign one of the records. * if it didn't, if it was not sysadmin request: * ask the SM to give a new GUID, (instead of the driver request). @@ -379,7 +394,7 @@ static int set_guid_rec(struct ib_device *ibdev, callback_context->port = port; callback_context->dev = dev; callback_context->block_num = index; - + callback_context->method = rec_det->method; memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec)); guid_info_rec.lid = cpu_to_be16(attr.lid); diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/cm.c b/sys/ofed/drivers/infiniband/hw/mlx4/cm.c index 1bfbeee..3ff7600 100644 --- a/sys/ofed/drivers/infiniband/hw/mlx4/cm.c +++ b/sys/ofed/drivers/infiniband/hw/mlx4/cm.c @@ -33,6 +33,7 @@ #include <rdma/ib_mad.h> #include <linux/mlx4/cmd.h> +#include <linux/rbtree.h> #include <linux/idr.h> #include <rdma/ib_cm.h> @@ -60,6 +61,11 @@ struct cm_generic_msg { __be32 remote_comm_id; }; +struct cm_sidr_generic_msg { + struct ib_mad_hdr hdr; + __be32 request_id; +}; + struct cm_req_msg { unsigned char unused[0x60]; union ib_gid primary_path_sgid; @@ -68,28 +74,62 @@ struct cm_req_msg { static void set_local_comm_id(struct ib_mad *mad, u32 cm_id) { + if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { + struct cm_sidr_generic_msg *msg = + (struct cm_sidr_generic_msg *)mad; + msg->request_id = cpu_to_be32(cm_id); + } else if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) { + pr_err("trying to set local_comm_id in SIDR_REP\n"); + return; + } else { struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; msg->local_comm_id = cpu_to_be32(cm_id); + } } static u32 get_local_comm_id(struct ib_mad *mad) { + if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { + struct cm_sidr_generic_msg *msg = + (struct cm_sidr_generic_msg *)mad; + return be32_to_cpu(msg->request_id); + } else if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) { + pr_err("trying to set local_comm_id in SIDR_REP\n"); + return -1; + } else { struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; - return be32_to_cpu(msg->local_comm_id); + } } static void set_remote_comm_id(struct ib_mad *mad, u32 cm_id) { + if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) { + struct cm_sidr_generic_msg *msg = + (struct cm_sidr_generic_msg *)mad; + msg->request_id = cpu_to_be32(cm_id); + } else if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { + pr_err("trying to set remote_comm_id in SIDR_REQ\n"); + return; + } else { struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; msg->remote_comm_id = cpu_to_be32(cm_id); + } } static u32 get_remote_comm_id(struct ib_mad *mad) { + if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) { + struct cm_sidr_generic_msg *msg = + (struct cm_sidr_generic_msg *)mad; + return be32_to_cpu(msg->request_id); + } else if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { + pr_err("trying to set remote_comm_id in SIDR_REQ\n"); + return -1; + } else { struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; - return be32_to_cpu(msg->remote_comm_id); + } } static union ib_gid gid_from_req_msg(struct ib_device *ibdev, struct ib_mad *mad) @@ -285,19 +325,22 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id u32 sl_cm_id; int pv_cm_id = -1; - sl_cm_id = get_local_comm_id(mad); - if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID || - mad->mad_hdr.attr_id == CM_REP_ATTR_ID) { + mad->mad_hdr.attr_id == CM_REP_ATTR_ID || + mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID || + mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) { + sl_cm_id = get_local_comm_id(mad); id = id_map_alloc(ibdev, slave_id, sl_cm_id); if (IS_ERR(id)) { mlx4_ib_warn(ibdev, "%s: id{slave: %d, sl_cm_id: 0x%x} Failed to id_map_alloc\n", __func__, slave_id, sl_cm_id); return PTR_ERR(id); } - } else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID) { + } else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID || + mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) { return 0; } else { + sl_cm_id = get_local_comm_id(mad); id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id); } @@ -323,7 +366,8 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, u32 pv_cm_id; struct id_map_entry *id; - if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID) { + if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID || + mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { union ib_gid gid; if (is_eth) @@ -333,7 +377,7 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, *slave = mlx4_ib_find_real_gid(ibdev, port, gid.global.interface_id); if (*slave < 0) { mlx4_ib_warn(ibdev, "failed matching slave_id by gid (0x%llx)\n", - (long long)gid.global.interface_id); + (unsigned long long)gid.global.interface_id); return -ENOENT; } return 0; diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/cq.c b/sys/ofed/drivers/infiniband/hw/mlx4/cq.c index 293917a..52788c2 100644 --- a/sys/ofed/drivers/infiniband/hw/mlx4/cq.c +++ b/sys/ofed/drivers/infiniband/hw/mlx4/cq.c @@ -33,6 +33,7 @@ #include <linux/mlx4/cq.h> #include <linux/mlx4/qp.h> +#include <linux/mlx4/srq.h> #include <linux/slab.h> #include "mlx4_ib.h" @@ -92,12 +93,33 @@ static struct mlx4_cqe *next_cqe_sw(struct mlx4_ib_cq *cq) return get_sw_cqe(cq, cq->mcq.cons_index); } -int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) +int mlx4_ib_modify_cq(struct ib_cq *cq, + struct ib_cq_attr *cq_attr, + int cq_attr_mask) { + int err = 0; struct mlx4_ib_cq *mcq = to_mcq(cq); struct mlx4_ib_dev *dev = to_mdev(cq->device); - return mlx4_cq_modify(dev->dev, &mcq->mcq, cq_count, cq_period); + if (cq_attr_mask & IB_CQ_CAP_FLAGS) { + if (cq_attr->cq_cap_flags & IB_CQ_TIMESTAMP) + return -ENOTSUPP; + + if (cq_attr->cq_cap_flags & IB_CQ_IGNORE_OVERRUN) { + if (dev->dev->caps.cq_flags & MLX4_DEV_CAP_CQ_FLAG_IO) + err = mlx4_cq_ignore_overrun(dev->dev, &mcq->mcq); + else + err = -ENOSYS; + } + } + + if (!err) + if (cq_attr_mask & IB_CQ_MODERATION) + err = mlx4_cq_modify(dev->dev, &mcq->mcq, + cq_attr->moderation.cq_count, + cq_attr->moderation.cq_period); + + return err; } static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int nent) @@ -173,7 +195,11 @@ err_buf: return err; } -struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector, +/* we don't support system timestamping */ +#define CQ_CREATE_FLAGS_SUPPORTED IB_CQ_TIMESTAMP + +struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, + struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata) { @@ -181,11 +207,16 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector struct mlx4_ib_cq *cq; struct mlx4_uar *uar; int err; + int entries = attr->cqe; + int vector = attr->comp_vector; if (entries < 1 || entries > dev->dev->caps.max_cqes) return ERR_PTR(-EINVAL); - cq = kmalloc(sizeof *cq, GFP_KERNEL); + if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED) + return ERR_PTR(-EINVAL); + + cq = kzalloc(sizeof(*cq), GFP_KERNEL); if (!cq) return ERR_PTR(-ENOMEM); @@ -195,6 +226,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector spin_lock_init(&cq->lock); cq->resize_buf = NULL; cq->resize_umem = NULL; + cq->create_flags = attr->flags; if (context) { struct mlx4_ib_create_cq ucmd; @@ -236,7 +268,8 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector vector = dev->eq_table[vector % ibdev->num_comp_vectors]; err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, - cq->db.dma, &cq->mcq, vector, 0, 0); + cq->db.dma, &cq->mcq, vector, 0, + !!(cq->create_flags & IB_CQ_TIMESTAMP)); if (err) goto err_dbmap; @@ -331,21 +364,23 @@ static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq) u32 i; i = cq->mcq.cons_index; - while (get_sw_cqe(cq, i & cq->ibcq.cqe)) + while (get_sw_cqe(cq, i)) ++i; return i - cq->mcq.cons_index; } -static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq) +static int mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq) { struct mlx4_cqe *cqe, *new_cqe; int i; int cqe_size = cq->buf.entry_size; int cqe_inc = cqe_size == 64 ? 1 : 0; + struct mlx4_cqe *start_cqe; i = cq->mcq.cons_index; cqe = get_cqe(cq, i & cq->ibcq.cqe); + start_cqe = cqe; cqe += cqe_inc; while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) { @@ -357,9 +392,15 @@ static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq) new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) | (((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0); cqe = get_cqe(cq, ++i & cq->ibcq.cqe); + if (cqe == start_cqe) { + pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n", cq->mcq.cqn); + return -ENOMEM; + } cqe += cqe_inc; + } ++cq->mcq.cons_index; + return 0; } int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) @@ -374,7 +415,6 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) return -ENOSYS; mutex_lock(&cq->resize_mutex); - if (entries < 1 || entries > dev->dev->caps.max_cqes) { err = -EINVAL; goto out; @@ -386,6 +426,11 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) goto out; } + if (entries > dev->dev->caps.max_cqes + 1) { + err = -EINVAL; + goto out; + } + if (ibcq->uobject) { err = mlx4_alloc_resize_umem(dev, cq, entries, udata); if (err) @@ -425,7 +470,7 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) spin_lock_irq(&cq->lock); if (cq->resize_buf) { - mlx4_ib_cq_resize_copy_cqes(cq); + err = mlx4_ib_cq_resize_copy_cqes(cq); tmp_buf = cq->buf; tmp_cqe = cq->ibcq.cqe; cq->buf = cq->resize_buf->buf; @@ -580,7 +625,7 @@ static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum) } static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc, - unsigned tail, struct mlx4_cqe *cqe) + unsigned tail, struct mlx4_cqe *cqe, int is_eth) { struct mlx4_ib_proxy_sqp_hdr *hdr; @@ -590,12 +635,19 @@ static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct DMA_FROM_DEVICE); hdr = (struct mlx4_ib_proxy_sqp_hdr *) (qp->sqp_proxy_rcv[tail].addr); wc->pkey_index = be16_to_cpu(hdr->tun.pkey_index); - wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32); - wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12); wc->src_qp = be32_to_cpu(hdr->tun.flags_src_qp) & 0xFFFFFF; wc->wc_flags |= (hdr->tun.g_ml_path & 0x80) ? (IB_WC_GRH) : 0; wc->dlid_path_bits = 0; + if (is_eth) { + wc->vlan_id = be16_to_cpu(hdr->tun.sl_vid); + memcpy(&(wc->smac[0]), (char *)&hdr->tun.mac_31_0, 4); + memcpy(&(wc->smac[4]), (char *)&hdr->tun.slid_mac_47_32, 2); + } else { + wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32); + wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12); + } + return 0; } @@ -607,11 +659,14 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, struct mlx4_qp *mqp; struct mlx4_ib_wq *wq; struct mlx4_ib_srq *srq; + struct mlx4_srq *msrq = NULL; int is_send; int is_error; u32 g_mlpath_rqpn; u16 wqe_ctr; unsigned tail = 0; + int timestamp_en = !!(cq->create_flags & IB_CQ_TIMESTAMP); + repoll: cqe = next_cqe_sw(cq); @@ -675,6 +730,20 @@ repoll: wc->qp = &(*cur_qp)->ibqp; + if (wc->qp->qp_type == IB_QPT_XRC_TGT) { + u32 srq_num; + g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn); + srq_num = g_mlpath_rqpn & 0xffffff; + /* SRQ is also in the radix tree */ + msrq = mlx4_srq_lookup(to_mdev(cq->ibcq.device)->dev, + srq_num); + if (unlikely(!msrq)) { + pr_warn("CQ %06x with entry for unknown SRQN %06x\n", + cq->mcq.cqn, srq_num); + return -EINVAL; + } + } + if (is_send) { wq = &(*cur_qp)->sq; if (!(*cur_qp)->sq_signal_bits) { @@ -688,6 +757,11 @@ repoll: wqe_ctr = be16_to_cpu(cqe->wqe_index); wc->wr_id = srq->wrid[wqe_ctr]; mlx4_ib_free_srq_wqe(srq, wqe_ctr); + } else if (msrq) { + srq = to_mibsrq(msrq); + wqe_ctr = be16_to_cpu(cqe->wqe_index); + wc->wr_id = srq->wrid[wqe_ctr]; + mlx4_ib_free_srq_wqe(srq, wqe_ctr); } else { wq = &(*cur_qp)->rq; tail = wq->tail & (wq->wqe_cnt - 1); @@ -707,6 +781,7 @@ repoll: switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { case MLX4_OPCODE_RDMA_WRITE_IMM: wc->wc_flags |= IB_WC_WITH_IMM; + /* fall through */ case MLX4_OPCODE_RDMA_WRITE: wc->opcode = IB_WC_RDMA_WRITE; break; @@ -778,10 +853,31 @@ repoll: if ((*cur_qp)->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) - return use_tunnel_data(*cur_qp, cq, wc, tail, cqe); + return use_tunnel_data + (*cur_qp, cq, wc, tail, cqe, + rdma_port_get_link_layer + (wc->qp->device, + (*cur_qp)->port) == + IB_LINK_LAYER_ETHERNET); } + if (timestamp_en) { + /* currently, only CQ_CREATE_WITH_TIMESTAMPING_RAW is + * supported. CQ_CREATE_WITH_TIMESTAMPING_SYS isn't + * supported */ + if (cq->create_flags & IB_CQ_TIMESTAMP_TO_SYS_TIME) { + wc->ts.timestamp = 0; + } else { + wc->ts.timestamp = + ((u64)(be32_to_cpu(cqe->timestamp_16_47) + + !cqe->timestamp_0_15) << 16) + | be16_to_cpu(cqe->timestamp_0_15); + wc->wc_flags |= IB_WC_WITH_TIMESTAMP; + } + } else { + wc->wc_flags |= IB_WC_WITH_SLID; wc->slid = be16_to_cpu(cqe->rlid); + } g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn); wc->src_qp = g_mlpath_rqpn & 0xffffff; wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f; @@ -789,11 +885,27 @@ repoll: wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f; wc->wc_flags |= mlx4_ib_ipoib_csum_ok(cqe->status, cqe->checksum) ? IB_WC_IP_CSUM_OK : 0; + if (!timestamp_en) { if (rdma_port_get_link_layer(wc->qp->device, - (*cur_qp)->port) == IB_LINK_LAYER_ETHERNET) + (*cur_qp)->port) == + IB_LINK_LAYER_ETHERNET) wc->sl = be16_to_cpu(cqe->sl_vid) >> 13; else wc->sl = be16_to_cpu(cqe->sl_vid) >> 12; + wc->wc_flags |= IB_WC_WITH_SL; + } + if ((be32_to_cpu(cqe->vlan_my_qpn) & + MLX4_CQE_VLAN_PRESENT_MASK) && !timestamp_en) { + wc->vlan_id = be16_to_cpu(cqe->sl_vid) & + MLX4_CQE_VID_MASK; + wc->wc_flags |= IB_WC_WITH_VLAN; + } else { + wc->vlan_id = 0xffff; + } + if (!timestamp_en) { + memcpy(wc->smac, cqe->smac, 6); + wc->wc_flags |= IB_WC_WITH_SMAC; + } } return 0; diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/doorbell.c b/sys/ofed/drivers/infiniband/hw/mlx4/doorbell.c index 8aee423..c517409 100644 --- a/sys/ofed/drivers/infiniband/hw/mlx4/doorbell.c +++ b/sys/ofed/drivers/infiniband/hw/mlx4/doorbell.c @@ -45,7 +45,6 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, struct mlx4_db *db) { struct mlx4_ib_user_db_page *page; - struct ib_umem_chunk *chunk; int err = 0; mutex_lock(&context->db_page_mutex); @@ -73,8 +72,7 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, list_add(&page->list, &context->db_page_list); found: - chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list); - db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK); + db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK); db->u.user_page = page; ++page->refcnt; diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/mad.c b/sys/ofed/drivers/infiniband/hw/mlx4/mad.c index 74bbf5c..bd36931 100644 --- a/sys/ofed/drivers/infiniband/hw/mlx4/mad.c +++ b/sys/ofed/drivers/infiniband/hw/mlx4/mad.c @@ -545,11 +545,32 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, /* adjust tunnel data */ tun_mad->hdr.pkey_index = cpu_to_be16(tun_pkey_ix); - tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12); - tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid); tun_mad->hdr.flags_src_qp = cpu_to_be32(wc->src_qp & 0xFFFFFF); tun_mad->hdr.g_ml_path = (grh && (wc->wc_flags & IB_WC_GRH)) ? 0x80 : 0; + if (is_eth) { + u16 vlan = 0; + if (mlx4_get_slave_default_vlan(dev->dev, port, slave, &vlan, + NULL)) { + if (vlan != wc->vlan_id) + /* VST and default vlan is not the packet vlan drop the + * packet*/ + goto out; + else + /* VST , remove hide the vlan from the VF */ + vlan = 0; + } else { + vlan = wc->vlan_id; + } + + tun_mad->hdr.sl_vid = cpu_to_be16(vlan); + memcpy((char *)&tun_mad->hdr.mac_31_0, &(wc->smac[0]), 4); + memcpy((char *)&tun_mad->hdr.slid_mac_47_32, &(wc->smac[4]), 2); + } else { + tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12); + tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid); + } + ib_dma_sync_single_for_device(&dev->ib_dev, tun_qp->tx_ring[tun_tx_ix].buf.map, sizeof (struct mlx4_rcv_tunnel_mad), @@ -696,12 +717,11 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, be16_to_cpu(in_mad->mad_hdr.attr_id)); if (in_wc->wc_flags & IB_WC_GRH) { pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n", - (long long)be64_to_cpu(in_grh->sgid.global.subnet_prefix), - (long long) - be64_to_cpu(in_grh->sgid.global.interface_id)); + (unsigned long long)be64_to_cpu(in_grh->sgid.global.subnet_prefix), + (unsigned long long)be64_to_cpu(in_grh->sgid.global.interface_id)); pr_debug("dgid_hi:0x%016llx dgid_lo:0x%016llx\n", - (long long)be64_to_cpu(in_grh->dgid.global.subnet_prefix), - (long long)be64_to_cpu(in_grh->dgid.global.interface_id)); + (unsigned long long)be64_to_cpu(in_grh->dgid.global.subnet_prefix), + (unsigned long long)be64_to_cpu(in_grh->dgid.global.interface_id)); } } @@ -946,7 +966,7 @@ int mlx4_ib_query_if_stat(struct mlx4_ib_dev *dev, u32 counter_index, err = mlx4_cmd_box(dev->dev, 0, mailbox->dma, inmod, 0, MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C, - MLX4_CMD_WRAPPED); + MLX4_CMD_NATIVE); if (!err) memcpy(counter, mailbox->buf, MLX4_IF_STAT_SZ(1)); @@ -961,7 +981,7 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, { struct mlx4_ib_dev *dev = to_mdev(ibdev); int err; - u32 counter_index = dev->counters[port_num - 1] & 0xffff; + u32 counter_index = dev->counters[port_num - 1].counter_index & 0xffff; u8 mode; char counter_buf[MLX4_IF_STAT_SZ(1)]; union mlx4_counter *counter = (union mlx4_counter *) @@ -970,10 +990,16 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT) return -EINVAL; - if (mlx4_ib_query_if_stat(dev, counter_index, counter, 0)) { - err = IB_MAD_RESULT_FAILURE; - } else { + /* in case of default counter IB shares the counter with ETH */ + /* the state could be -EEXIST or -ENOSPC */ + if (dev->counters[port_num - 1].status) { memset(out_mad->data, 0, sizeof out_mad->data); + err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; + } else { + if (mlx4_ib_query_if_stat(dev, counter_index, counter, 0)) + return IB_MAD_RESULT_FAILURE; + + memset(out_mad->data, 0, sizeof(out_mad->data)); mode = counter->control.cnt_mode & 0xFF; err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; switch (mode & 0xf) { @@ -992,7 +1018,6 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, } } - return err; } @@ -1179,6 +1204,11 @@ void handle_port_mgmt_change_event(struct work_struct *work) u16 lid = be16_to_cpu(eqe->event.port_mgmt_change.params.port_info.mstr_sm_lid); u8 sl = eqe->event.port_mgmt_change.params.port_info.mstr_sm_sl & 0xf; update_sm_ah(dev, port, lid, sl); + mlx4_ib_dispatch_event(dev, port, IB_EVENT_SM_CHANGE); + if (mlx4_is_master(dev->dev)) + mlx4_gen_slaves_port_mgt_ev(dev->dev, port, + changed_attr & MSTR_SM_CHANGE_MASK, + lid, sl); } /* Check if it is a lid change event */ @@ -1295,8 +1325,9 @@ static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave) int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, - enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn, - u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad) + enum ib_qp_type dest_qpt, u16 pkey_index, + u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr, + u8 *s_mac, struct ib_mad *mad) { struct ib_sge list; struct ib_send_wr wr, *bad_wr; @@ -1385,6 +1416,9 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, wr.num_sge = 1; wr.opcode = IB_WR_SEND; wr.send_flags = IB_SEND_SIGNALED; + if (s_mac) + memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6); + ret = ib_post_send(send_qp, &wr, &bad_wr); out: @@ -1512,6 +1546,11 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc if (ah_attr.ah_flags & IB_AH_GRH) if (get_real_sgid_index(dev, slave, ctx->port, &ah_attr)) return; + memcpy(ah_attr.dmac, tunnel->hdr.mac, 6); + ah_attr.vlan_id = tunnel->hdr.vlan; + /* if slave have default vlan use it */ + mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave, + &ah_attr.vlan_id, &ah_attr.sl); mlx4_ib_send_to_wire(dev, slave, ctx->port, is_proxy_qp0(dev, wc->src_qp, slave) ? @@ -1519,7 +1558,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc be16_to_cpu(tunnel->hdr.pkey_index), be32_to_cpu(tunnel->hdr.remote_qpn), be32_to_cpu(tunnel->hdr.qkey), - &ah_attr, &tunnel->mad); + &ah_attr, wc->smac, &tunnel->mad); } static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, @@ -1564,6 +1603,12 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, tun_qp->ring[i].addr, rx_buf_size, DMA_FROM_DEVICE); + if (unlikely(ib_dma_mapping_error(ctx->ib_dev, + tun_qp->ring[i].map))) { + mlx4_ib_warn(ctx->ib_dev, "ib_dma_map_single failed\n"); + kfree(tun_qp->ring[i].addr); + goto err; + } } for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { @@ -1576,6 +1621,12 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, tun_qp->tx_ring[i].buf.addr, tx_buf_size, DMA_TO_DEVICE); + if (unlikely(ib_dma_mapping_error(ctx->ib_dev, + tun_qp->tx_ring[i].buf.map))) { + mlx4_ib_warn(ctx->ib_dev, "ib_dma_map_single failed\n"); + kfree(tun_qp->tx_ring[i].buf.addr); + goto tx_err; + } tun_qp->tx_ring[i].ah = NULL; } spin_lock_init(&tun_qp->tx_lock); @@ -1664,12 +1715,12 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work) (MLX4_NUM_TUNNEL_BUFS - 1)); if (ret) pr_err("Failed reposting tunnel " - "buf:%lld\n", (long long)wc.wr_id); + "buf:%lld\n", (unsigned long long)wc.wr_id); break; case IB_WC_SEND: pr_debug("received tunnel send completion:" "wrid=0x%llx, status=0x%x\n", - (long long)wc.wr_id, wc.status); + (unsigned long long)wc.wr_id, wc.status); ib_destroy_ah(tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah); tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah @@ -1685,7 +1736,7 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work) } else { pr_debug("mlx4_ib: completion error in tunnel: %d." " status = %d, wrid = 0x%llx\n", - ctx->slave, wc.status, (long long)wc.wr_id); + ctx->slave, wc.status, (unsigned long long)wc.wr_id); if (!MLX4_TUN_IS_RECV(wc.wr_id)) { ib_destroy_ah(tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah); @@ -1757,6 +1808,11 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx, memset(&attr, 0, sizeof attr); attr.qp_state = IB_QPS_INIT; + ret = 0; + if (create_tun) + ret = find_slave_port_pkey_ix(to_mdev(ctx->ib_dev), ctx->slave, + ctx->port, 0xFFFF, &attr.pkey_index); + if (ret || !create_tun) attr.pkey_index = to_mdev(ctx->ib_dev)->pkeys.virt2phys_pkey[ctx->slave][ctx->port - 1][0]; attr.qkey = IB_QP1_QKEY; @@ -1837,7 +1893,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1))) pr_err("Failed reposting SQP " - "buf:%lld\n", (long long)wc.wr_id); + "buf:%lld\n", (unsigned long long)wc.wr_id); break; default: BUG_ON(1); @@ -1846,7 +1902,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) } else { pr_debug("mlx4_ib: completion error in tunnel: %d." " status = %d, wrid = 0x%llx\n", - ctx->slave, wc.status, (long long)wc.wr_id); + ctx->slave, wc.status, (unsigned long long)wc.wr_id); if (!MLX4_TUN_IS_RECV(wc.wr_id)) { ib_destroy_ah(sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah); diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/main.c b/sys/ofed/drivers/infiniband/hw/mlx4/main.c index fd0b723..bdcffbe 100644 --- a/sys/ofed/drivers/infiniband/hw/mlx4/main.c +++ b/sys/ofed/drivers/infiniband/hw/mlx4/main.c @@ -32,37 +32,37 @@ */ #include <linux/module.h> - -#ifdef __linux__ -#include <linux/proc_fs.h> -#endif - #include <linux/slab.h> #include <linux/errno.h> #include <linux/netdevice.h> #include <linux/inetdevice.h> #include <linux/if_vlan.h> -#include <linux/bitops.h> -#include <linux/if_ether.h> #include <linux/fs.h> +#include <net/ipv6.h> #include <rdma/ib_smi.h> #include <rdma/ib_user_verbs.h> +#include <rdma/ib_user_verbs_exp.h> #include <rdma/ib_addr.h> #include <linux/mlx4/driver.h> #include <linux/mlx4/cmd.h> #include <linux/sched.h> +#include <linux/page.h> +#include <linux/printk.h> #include "mlx4_ib.h" +#include "mlx4_exp.h" #include "user.h" #include "wc.h" #define DRV_NAME MLX4_IB_DRV_NAME #define DRV_VERSION "1.0" -#define DRV_RELDATE "April 4, 2008" +#define DRV_RELDATE __DATE__ #define MLX4_IB_DRIVER_PROC_DIR_NAME "driver/mlx4_ib" #define MLX4_IB_MRS_PROC_DIR_NAME "mrs" +#define MLX4_IB_FLOW_MAX_PRIO 0xFFF +#define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver"); @@ -73,20 +73,30 @@ MODULE_VERSION(DRV_VERSION); int mlx4_ib_sm_guid_assign = 1; -#ifdef __linux__ -struct proc_dir_entry *mlx4_mrs_dir_entry; -static struct proc_dir_entry *mlx4_ib_driver_dir_entry; -#endif - module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444); MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 1)"); -static char dev_assign_str[512]; -//module_param_string(dev_assign_str, dev_assign_str, sizeof(dev_assign_str), 0644); -MODULE_PARM_DESC(dev_assign_str, "Map all device function numbers to " - "IB device numbers following the pattern: " - "bb:dd.f-0,bb:dd.f-1,... (all numbers are hexadecimals)." - " Max supported devices - 32"); +enum { + MAX_NUM_STR_BITMAP = 1 << 15, + DEFAULT_TBL_VAL = -1 +}; + +static struct mlx4_dbdf2val_lst dev_assign_str = { + .name = "dev_assign_str param", + .num_vals = 1, + .def_val = {DEFAULT_TBL_VAL}, + .range = {0, MAX_NUM_STR_BITMAP - 1} +}; +module_param_string(dev_assign_str, dev_assign_str.str, + sizeof(dev_assign_str.str), 0444); +MODULE_PARM_DESC(dev_assign_str, + "Map device function numbers to IB device numbers (e.g. '0000:04:00.0-0,002b:1c:0b.a-1,...').\n" + "\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for IB device numbers (e.g. 1).\n" + "\t\tMax supported devices - 32"); + + +static unsigned long *dev_num_str_bitmap; +static spinlock_t dev_num_str_lock; static const char mlx4_ib_version[] = DRV_NAME ": Mellanox ConnectX InfiniBand driver v" @@ -106,11 +116,16 @@ struct dev_rec { int nr; }; -#define MAX_DR 32 -static struct dev_rec dr[MAX_DR]; +static int dr_active; static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init); +static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev, struct net_device*, + unsigned long); + +static u8 mlx4_ib_get_dev_port(struct net_device *dev, + struct mlx4_ib_dev *ibdev); + static struct workqueue_struct *wq; static void init_query_mad(struct ib_smp *mad) @@ -123,7 +138,30 @@ static void init_query_mad(struct ib_smp *mad) static union ib_gid zgid; -static int mlx4_ib_query_device(struct ib_device *ibdev, +static int check_flow_steering_support(struct mlx4_dev *dev) +{ + int eth_num_ports = 0; + int ib_num_ports = 0; + int dmfs = dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED; + + if (dmfs) { + int i; + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) + eth_num_ports++; + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) + ib_num_ports++; + dmfs &= (!ib_num_ports || + (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB)) && + (!eth_num_ports || + (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN)); + if (ib_num_ports && mlx4_is_mfunc(dev)) { + dmfs = 0; + } + } + return dmfs; +} + +int mlx4_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props) { struct mlx4_ib_dev *dev = to_mdev(ibdev); @@ -174,12 +212,26 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) props->device_cap_flags |= IB_DEVICE_XRC; + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_CROSS_CHANNEL) + props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL; + + if (check_flow_steering_support(dev->dev)) + props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; + props->device_cap_flags |= IB_DEVICE_QPG; if (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) { props->device_cap_flags |= IB_DEVICE_UD_RSS; props->max_rss_tbl_sz = dev->dev->caps.max_rss_tbl_sz; } + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW) + props->device_cap_flags |= IB_DEVICE_MEM_WINDOW; + if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) { + if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_WIN_TYPE_2B) + props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B; + else + props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A; + } props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; props->vendor_part_id = dev->dev->pdev->device; @@ -213,6 +265,13 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; props->max_map_per_fmr = dev->dev->caps.max_fmr_maps; + props->hca_core_clock = dev->dev->caps.hca_core_clock; + if (dev->dev->caps.hca_core_clock > 0) + props->comp_mask |= IB_DEVICE_ATTR_WITH_HCA_CORE_CLOCK; + if (dev->dev->caps.cq_timestamp) { + props->timestamp_mask = 0xFFFFFFFFFFFF; + props->comp_mask |= IB_DEVICE_ATTR_WITH_TIMESTAMP_MASK; + } out: kfree(in_mad); @@ -334,6 +393,7 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, struct net_device *ndev; enum ib_mtu tmp; struct mlx4_cmd_mailbox *mailbox; + unsigned long flags; int err = 0; mailbox = mlx4_alloc_cmd_mailbox(mdev->dev); @@ -362,7 +422,7 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, props->state = IB_PORT_DOWN; props->phys_state = state_to_phys_state(props->state); props->active_mtu = IB_MTU_256; - spin_lock(&iboe->lock); + spin_lock_irqsave(&iboe->lock, flags); ndev = iboe->netdevs[port - 1]; if (!ndev) goto out_unlock; @@ -374,7 +434,7 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, IB_PORT_ACTIVE : IB_PORT_DOWN; props->phys_state = state_to_phys_state(props->state); out_unlock: - spin_unlock(&iboe->lock); + spin_unlock_irqrestore(&iboe->lock, flags); out: mlx4_free_cmd_mailbox(mdev->dev, mailbox); return err; @@ -674,7 +734,9 @@ static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) return 0; } -#ifdef __linux__ + +/* XXX FBSD has no support for get_unmapped_area function */ +#if 0 static unsigned long mlx4_ib_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, @@ -732,7 +794,6 @@ full_search: static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { struct mlx4_ib_dev *dev = to_mdev(context->device); - int err; /* Last 8 bits hold the command others are data per that command */ unsigned long command = vma->vm_pgoff & MLX4_IB_MMAP_CMD_MASK; @@ -758,31 +819,81 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) dev->dev->caps.num_uars, PAGE_SIZE, vma->vm_page_prot)) return -EAGAIN; - } else if (command == MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES) { - /* Getting contiguous physical pages */ - unsigned long total_size = vma->vm_end - vma->vm_start; - unsigned long page_size_order = (vma->vm_pgoff) >> - MLX4_IB_MMAP_CMD_BITS; - struct ib_cmem *ib_cmem; - ib_cmem = ib_cmem_alloc_contiguous_pages(context, total_size, - page_size_order); - if (IS_ERR(ib_cmem)) { - err = PTR_ERR(ib_cmem); - return err; - } + } else if (command == MLX4_IB_MMAP_GET_HW_CLOCK) { + struct mlx4_clock_params params; + int ret; - err = ib_cmem_map_contiguous_pages_to_vma(ib_cmem, vma); - if (err) { - ib_cmem_release_contiguous_pages(ib_cmem); - return err; - } - return 0; + ret = mlx4_get_internal_clock_params(dev->dev, ¶ms); + if (ret) + return ret; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + if (io_remap_pfn_range(vma, vma->vm_start, + (pci_resource_start(dev->dev->pdev, + params.bar) + params.offset) + >> PAGE_SHIFT, + PAGE_SIZE, vma->vm_page_prot)) + return -EAGAIN; } else return -EINVAL; return 0; } +static int mlx4_ib_ioctl(struct ib_ucontext *context, unsigned int cmd, + unsigned long arg) +{ + struct mlx4_ib_dev *dev = to_mdev(context->device); + int ret; + int offset; + + switch (cmd) { + case MLX4_IOCHWCLOCKOFFSET: { + struct mlx4_clock_params params; + int ret; + ret = mlx4_get_internal_clock_params(dev->dev, ¶ms); + if (!ret) { + offset = params.offset % PAGE_SIZE; + ret = put_user(offset, + (int *)arg); + return sizeof(int); + } else { + return ret; + } + } + default: { + pr_err("mlx4_ib: invalid ioctl %u command with arg %lX\n", + cmd, arg); + return -ENOTTY; + } + } + + return ret; +} + +static int mlx4_ib_query_values(struct ib_device *device, int q_values, + struct ib_device_values *values) +{ + struct mlx4_ib_dev *dev = to_mdev(device); + cycle_t cycles; + + values->values_mask = 0; + if (q_values & IBV_VALUES_HW_CLOCK) { + cycles = mlx4_read_clock(dev->dev); + if (cycles < 0) { + values->hwclock = cycles & CORE_CLOCK_MASK; + values->values_mask |= IBV_VALUES_HW_CLOCK; + } + q_values &= ~IBV_VALUES_HW_CLOCK; + } + + if (q_values) + return -ENOTTY; + + return 0; +} + static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata) @@ -926,258 +1037,220 @@ struct mlx4_ib_steering { union ib_gid gid; }; -static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +static int parse_flow_attr(struct mlx4_dev *dev, + union ib_flow_spec *ib_spec, + struct _rule_hw *mlx4_spec) { - int err; - struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); - struct mlx4_ib_qp *mqp = to_mqp(ibqp); - u64 reg_id; - struct mlx4_ib_steering *ib_steering = NULL; + enum mlx4_net_trans_rule_id type; + + switch (ib_spec->type) { + case IB_FLOW_SPEC_ETH: + type = MLX4_NET_TRANS_RULE_ID_ETH; + memcpy(mlx4_spec->eth.dst_mac, ib_spec->eth.val.dst_mac, + ETH_ALEN); + memcpy(mlx4_spec->eth.dst_mac_msk, ib_spec->eth.mask.dst_mac, + ETH_ALEN); + mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag; + mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag; + break; - if (mdev->dev->caps.steering_mode == - MLX4_STEERING_MODE_DEVICE_MANAGED) { - ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL); - if (!ib_steering) - return -ENOMEM; - } + case IB_FLOW_SPEC_IB: + type = MLX4_NET_TRANS_RULE_ID_IB; + mlx4_spec->ib.l3_qpn = ib_spec->ib.val.l3_type_qpn; + mlx4_spec->ib.qpn_mask = ib_spec->ib.mask.l3_type_qpn; + memcpy(&mlx4_spec->ib.dst_gid, ib_spec->ib.val.dst_gid, 16); + memcpy(&mlx4_spec->ib.dst_gid_msk, + ib_spec->ib.mask.dst_gid, 16); + break; - err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port, - !!(mqp->flags & - MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK), - MLX4_PROT_IB_IPV6, ®_id); - if (err) - goto err_malloc; + case IB_FLOW_SPEC_IPV4: + type = MLX4_NET_TRANS_RULE_ID_IPV4; + mlx4_spec->ipv4.src_ip = ib_spec->ipv4.val.src_ip; + mlx4_spec->ipv4.src_ip_msk = ib_spec->ipv4.mask.src_ip; + mlx4_spec->ipv4.dst_ip = ib_spec->ipv4.val.dst_ip; + mlx4_spec->ipv4.dst_ip_msk = ib_spec->ipv4.mask.dst_ip; + break; - err = add_gid_entry(ibqp, gid); - if (err) - goto err_add; + case IB_FLOW_SPEC_TCP: + case IB_FLOW_SPEC_UDP: + type = ib_spec->type == IB_FLOW_SPEC_TCP ? + MLX4_NET_TRANS_RULE_ID_TCP : + MLX4_NET_TRANS_RULE_ID_UDP; + mlx4_spec->tcp_udp.dst_port = ib_spec->tcp_udp.val.dst_port; + mlx4_spec->tcp_udp.dst_port_msk = + ib_spec->tcp_udp.mask.dst_port; + mlx4_spec->tcp_udp.src_port = ib_spec->tcp_udp.val.src_port; + mlx4_spec->tcp_udp.src_port_msk = + ib_spec->tcp_udp.mask.src_port; + break; - if (ib_steering) { - memcpy(ib_steering->gid.raw, gid->raw, 16); - ib_steering->reg_id = reg_id; - mutex_lock(&mqp->mutex); - list_add(&ib_steering->list, &mqp->steering_rules); - mutex_unlock(&mqp->mutex); + default: + return -EINVAL; } - return 0; - -err_add: - mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, - MLX4_PROT_IB_IPV6, reg_id); -err_malloc: - kfree(ib_steering); - - return err; + if (map_sw_to_hw_steering_id(dev, type) < 0 || + hw_rule_sz(dev, type) < 0) + return -EINVAL; + mlx4_spec->id = cpu_to_be16(map_sw_to_hw_steering_id(dev, type)); + mlx4_spec->size = hw_rule_sz(dev, type) >> 2; + return hw_rule_sz(dev, type); } -enum { - IBV_FLOW_L4_NONE = 0, - IBV_FLOW_L4_OTHER = 3, - IBV_FLOW_L4_UDP = 5, - IBV_FLOW_L4_TCP = 6 -}; - -struct mlx4_cm_steering { - struct list_head list; - u64 reg_id; - struct ib_flow_spec spec; -}; - -static int flow_spec_to_net_rule(struct ib_device *dev, struct ib_flow_spec *flow_spec, - struct list_head *rule_list_h) +static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, + int domain, + enum mlx4_net_trans_promisc_mode flow_type, + u64 *reg_id) { - struct mlx4_spec_list *spec_l2, *spec_l3, *spec_l4; - u64 mac_msk = cpu_to_be64(MLX4_MAC_MASK << 16); - - spec_l2 = kzalloc(sizeof *spec_l2, GFP_KERNEL); - if (!spec_l2) - return -ENOMEM; + int ret, i; + int size = 0; + void *ib_flow; + struct mlx4_ib_dev *mdev = to_mdev(qp->device); + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_net_trans_rule_hw_ctrl *ctrl; + size_t rule_size = sizeof(struct mlx4_net_trans_rule_hw_ctrl) + + (sizeof(struct _rule_hw) * flow_attr->num_of_specs); + + static const u16 __mlx4_domain[] = { + [IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS, + [IB_FLOW_DOMAIN_ETHTOOL] = MLX4_DOMAIN_ETHTOOL, + [IB_FLOW_DOMAIN_RFS] = MLX4_DOMAIN_RFS, + [IB_FLOW_DOMAIN_NIC] = MLX4_DOMAIN_NIC, + }; - switch (flow_spec->type) { - case IB_FLOW_ETH: - spec_l2->id = MLX4_NET_TRANS_RULE_ID_ETH; - memcpy(spec_l2->eth.dst_mac, flow_spec->l2_id.eth.mac, ETH_ALEN); - memcpy(spec_l2->eth.dst_mac_msk, &mac_msk, ETH_ALEN); - spec_l2->eth.ether_type = flow_spec->l2_id.eth.ethertype; - if (flow_spec->l2_id.eth.vlan_present) { - spec_l2->eth.vlan_id = flow_spec->l2_id.eth.vlan; - spec_l2->eth.vlan_id_msk = cpu_to_be16(0x0fff); - } - break; - case IB_FLOW_IB_UC: - spec_l2->id = MLX4_NET_TRANS_RULE_ID_IB; - if(flow_spec->l2_id.ib_uc.qpn) { - spec_l2->ib.l3_qpn = cpu_to_be32(flow_spec->l2_id.ib_uc.qpn); - spec_l2->ib.qpn_msk = cpu_to_be32(0xffffff); + if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) { + pr_err("Invalid priority value.\n"); + return -EINVAL; } - break; - case IB_FLOW_IB_MC_IPV4: - case IB_FLOW_IB_MC_IPV6: - spec_l2->id = MLX4_NET_TRANS_RULE_ID_IB; - memcpy(spec_l2->ib.dst_gid, flow_spec->l2_id.ib_mc.mgid, 16); - memset(spec_l2->ib.dst_gid_msk, 0xff, 16); - break; + if (domain >= IB_FLOW_DOMAIN_NUM) { + pr_err("Invalid domain value.\n"); + return -EINVAL; } + if (map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0) + return -EINVAL; + mailbox = mlx4_alloc_cmd_mailbox(mdev->dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + memset(mailbox->buf, 0, rule_size); + ctrl = mailbox->buf; + + ctrl->prio = cpu_to_be16(__mlx4_domain[domain] | + flow_attr->priority); + ctrl->type = map_sw_to_hw_steering_mode(mdev->dev, flow_type); + ctrl->port = flow_attr->port; + ctrl->qpn = cpu_to_be32(qp->qp_num); + + if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_ALLOW_LOOP_BACK) + ctrl->flags = (1 << 3); + + ib_flow = flow_attr + 1; + size += sizeof(struct mlx4_net_trans_rule_hw_ctrl); + for (i = 0; i < flow_attr->num_of_specs; i++) { + ret = parse_flow_attr(mdev->dev, ib_flow, mailbox->buf + size); + if (ret < 0) { + mlx4_free_cmd_mailbox(mdev->dev, mailbox); + return -EINVAL; + } + ib_flow += ((union ib_flow_spec *)ib_flow)->size; + size += ret; + } - list_add_tail(&spec_l2->list, rule_list_h); + ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0, + MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (ret == -ENOMEM) + pr_err("mcg table is full. Fail to register network rule.\n"); + else if (ret == -ENXIO) + pr_err("Device managed flow steering is disabled. Fail to register network rule.\n"); + else if (ret) + pr_err("Invalid argumant. Fail to register network rule.\n"); + mlx4_free_cmd_mailbox(mdev->dev, mailbox); + return ret; +} - if (flow_spec->l2_id.eth.ethertype == cpu_to_be16(ETH_P_IP) || - flow_spec->type != IB_FLOW_ETH) { - spec_l3 = kzalloc(sizeof *spec_l3, GFP_KERNEL); - if (!spec_l3) - return -ENOMEM; +static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id) +{ + int err; + err = mlx4_cmd(dev, reg_id, 0, 0, + MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) + pr_err("Fail to detach network rule. registration id = 0x%llx\n", + (unsigned long long)reg_id); + return err; +} - spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4; - spec_l3->ipv4.src_ip = flow_spec->src_ip; - if (flow_spec->type != IB_FLOW_IB_MC_IPV4 && - flow_spec->type != IB_FLOW_IB_MC_IPV6) - spec_l3->ipv4.dst_ip = flow_spec->dst_ip; +static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, + struct ib_flow_attr *flow_attr, + int domain) +{ + int err = 0, i = 0; + struct mlx4_ib_flow *mflow; + enum mlx4_net_trans_promisc_mode type[2]; - if (spec_l3->ipv4.src_ip) - spec_l3->ipv4.src_ip_msk = MLX4_BE_WORD_MASK; - if (spec_l3->ipv4.dst_ip) - spec_l3->ipv4.dst_ip_msk = MLX4_BE_WORD_MASK; + memset(type, 0, sizeof(type)); - list_add_tail(&spec_l3->list, rule_list_h); + mflow = kzalloc(sizeof(struct mlx4_ib_flow), GFP_KERNEL); + if (!mflow) { + err = -ENOMEM; + goto err_free; } - if (flow_spec->l4_protocol) { - spec_l4 = kzalloc(sizeof(*spec_l4), GFP_KERNEL); - if (!spec_l4) - return -ENOMEM; - - spec_l4->tcp_udp.src_port = flow_spec->src_port; - spec_l4->tcp_udp.dst_port = flow_spec->dst_port; - if (spec_l4->tcp_udp.src_port) - spec_l4->tcp_udp.src_port_msk = - MLX4_BE_SHORT_MASK; - if (spec_l4->tcp_udp.dst_port) - spec_l4->tcp_udp.dst_port_msk = - MLX4_BE_SHORT_MASK; - - switch (flow_spec->l4_protocol) { - case IBV_FLOW_L4_UDP: - spec_l4->id = MLX4_NET_TRANS_RULE_ID_UDP; + switch (flow_attr->type) { + case IB_FLOW_ATTR_NORMAL: + type[0] = MLX4_FS_REGULAR; break; - case IBV_FLOW_L4_TCP: - spec_l4->id = MLX4_NET_TRANS_RULE_ID_TCP; - break; - default: - dev_err(dev->dma_device, - "Unsupported l4 protocol.\n"); - kfree(spec_l4); - return -EPROTONOSUPPORT; - } - list_add_tail(&spec_l4->list, rule_list_h); - } - return 0; -} - -static int __mlx4_ib_flow_attach(struct mlx4_ib_dev *mdev, - struct mlx4_ib_qp *mqp, - struct ib_flow_spec *flow_spec, - int priority, int lock_qp) -{ - u64 reg_id = 0; - int err = 0; - struct mlx4_cm_steering *cm_flow; - struct mlx4_spec_list *spec, *tmp_spec; - struct mlx4_net_trans_rule rule = - { .queue_mode = MLX4_NET_TRANS_Q_FIFO, - .exclusive = 0, - }; + case IB_FLOW_ATTR_ALL_DEFAULT: + type[0] = MLX4_FS_ALL_DEFAULT; + break; - rule.promisc_mode = flow_spec->rule_type; - rule.port = mqp->port; - rule.qpn = mqp->mqp.qpn; - INIT_LIST_HEAD(&rule.list); + case IB_FLOW_ATTR_MC_DEFAULT: + type[0] = MLX4_FS_MC_DEFAULT; + break; - cm_flow = kmalloc(sizeof(*cm_flow), GFP_KERNEL); - if (!cm_flow) - return -ENOMEM; + case IB_FLOW_ATTR_SNIFFER: + type[0] = MLX4_FS_UC_SNIFFER; + type[1] = MLX4_FS_MC_SNIFFER; + break; - if (rule.promisc_mode == MLX4_FS_REGULAR) { - rule.allow_loopback = !flow_spec->block_mc_loopback; - rule.priority = MLX4_DOMAIN_UVERBS | priority; - err = flow_spec_to_net_rule(&mdev->ib_dev, flow_spec, - &rule.list); - if (err) - goto free_list; + default: + err = -EINVAL; + goto err_free; } - err = mlx4_flow_attach(mdev->dev, &rule, ®_id); + while (i < ARRAY_SIZE(type) && type[i]) { + err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i], + &mflow->reg_id[i]); if (err) - goto free_list; - - memcpy(&cm_flow->spec, flow_spec, sizeof(*flow_spec)); - cm_flow->reg_id = reg_id; + goto err_free; + i++; + } - if (lock_qp) - mutex_lock(&mqp->mutex); - list_add(&cm_flow->list, &mqp->rules_list); - if (lock_qp) - mutex_unlock(&mqp->mutex); + return &mflow->ibflow; -free_list: - list_for_each_entry_safe(spec, tmp_spec, &rule.list, list) { - list_del(&spec->list); - kfree(spec); - } - if (err) { - kfree(cm_flow); - dev_err(mdev->ib_dev.dma_device, - "Fail to attach flow steering rule\n"); - } - return err; +err_free: + kfree(mflow); + return ERR_PTR(err); } -static int __mlx4_ib_flow_detach(struct mlx4_ib_dev *mdev, - struct mlx4_ib_qp *mqp, - struct ib_flow_spec *spec, int priority, - int lock_qp) +static int mlx4_ib_destroy_flow(struct ib_flow *flow_id) { - struct mlx4_cm_steering *cm_flow; - int ret; + int err, ret = 0; + int i = 0; + struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device); + struct mlx4_ib_flow *mflow = to_mflow(flow_id); - if (lock_qp) - mutex_lock(&mqp->mutex); - list_for_each_entry(cm_flow, &mqp->rules_list, list) { - if (!memcmp(&cm_flow->spec, spec, sizeof(*spec))) { - list_del(&cm_flow->list); - break; - } - } - if (lock_qp) - mutex_unlock(&mqp->mutex); - - if (&cm_flow->list == &mqp->rules_list) { - dev_err(mdev->ib_dev.dma_device, "Couldn't find reg_id for flow spec. " - "Steering rule is left attached\n"); - return -EINVAL; + while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i]) { + err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i]); + if (err) + ret = err; + i++; } - ret = mlx4_flow_detach(mdev->dev, cm_flow->reg_id); - - kfree(cm_flow); + kfree(mflow); return ret; } -static int mlx4_ib_flow_attach(struct ib_qp *qp, struct ib_flow_spec *flow_spec, - int priority) -{ - return __mlx4_ib_flow_attach(to_mdev(qp->device), to_mqp(qp), - flow_spec, priority, 1); -} - -static int mlx4_ib_flow_detach(struct ib_qp *qp, struct ib_flow_spec *spec, - int priority) -{ - return __mlx4_ib_flow_detach(to_mdev(qp->device), to_mqp(qp), - spec, priority, 1); -} - static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw) { struct mlx4_ib_gid_entry *ge; @@ -1194,40 +1267,14 @@ static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw) return ret; } -static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) + +static int del_gid_entry(struct ib_qp *ibqp, union ib_gid *gid) { - int err; struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); struct mlx4_ib_qp *mqp = to_mqp(ibqp); - u8 mac[6]; - struct net_device *ndev; struct mlx4_ib_gid_entry *ge; - u64 reg_id = 0; - - if (mdev->dev->caps.steering_mode == - MLX4_STEERING_MODE_DEVICE_MANAGED) { - struct mlx4_ib_steering *ib_steering; - - mutex_lock(&mqp->mutex); - list_for_each_entry(ib_steering, &mqp->steering_rules, list) { - if (!memcmp(ib_steering->gid.raw, gid->raw, 16)) { - list_del(&ib_steering->list); - break; - } - } - mutex_unlock(&mqp->mutex); - if (&ib_steering->list == &mqp->steering_rules) { - pr_err("Couldn't find reg_id for mgid. Steering rule is left attached\n"); - return -EINVAL; - } - reg_id = ib_steering->reg_id; - kfree(ib_steering); - } - - err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, - MLX4_PROT_IB_IPV6, reg_id); - if (err) - return err; + struct net_device *ndev; + u8 mac[6]; mutex_lock(&mqp->mutex); ge = find_gid_entry(mqp, gid->raw); @@ -1250,8 +1297,174 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) pr_warn("could not find mgid entry\n"); mutex_unlock(&mqp->mutex); + return ge != 0 ? 0 : -EINVAL; +} + +static int _mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid, + int count) +{ + int err; + struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); + struct mlx4_ib_qp *mqp = to_mqp(ibqp); + u64 reg_id = 0; + int record_err = 0; + + if (mdev->dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { + struct mlx4_ib_steering *ib_steering; + struct mlx4_ib_steering *tmp; + LIST_HEAD(temp); + + mutex_lock(&mqp->mutex); + list_for_each_entry_safe(ib_steering, tmp, &mqp->steering_rules, + list) { + if (memcmp(ib_steering->gid.raw, gid->raw, 16)) + continue; + + if (--count < 0) + break; + + list_del(&ib_steering->list); + list_add(&ib_steering->list, &temp); + } + mutex_unlock(&mqp->mutex); + list_for_each_entry_safe(ib_steering, tmp, &temp, + list) { + reg_id = ib_steering->reg_id; + + err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, + gid->raw, + (ibqp->qp_type == IB_QPT_RAW_PACKET) ? + MLX4_PROT_ETH : MLX4_PROT_IB_IPV6, + reg_id); + if (err) { + record_err = record_err ?: err; + continue; + } + + err = del_gid_entry(ibqp, gid); + if (err) { + record_err = record_err ?: err; + continue; + } + + list_del(&ib_steering->list); + kfree(ib_steering); + } + mutex_lock(&mqp->mutex); + list_for_each_entry(ib_steering, &temp, list) { + list_add(&ib_steering->list, &mqp->steering_rules); + } + mutex_unlock(&mqp->mutex); + if (count) { + pr_warn("Couldn't release all reg_ids for mgid. Steering rule is left attached\n"); + return -EINVAL; + } + + } else { + if (mdev->dev->caps.steering_mode == MLX4_STEERING_MODE_B0 && + ibqp->qp_type == IB_QPT_RAW_PACKET) + gid->raw[5] = mqp->port; + + err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, + (ibqp->qp_type == IB_QPT_RAW_PACKET) ? + MLX4_PROT_ETH : MLX4_PROT_IB_IPV6, + reg_id); + if (err) + return err; + + err = del_gid_entry(ibqp, gid); + + if (err) + return err; + } + + return record_err; +} + +static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); + int count = (mdev->dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) ? + mdev->dev->caps.num_ports : 1; + + return _mlx4_ib_mcg_detach(ibqp, gid, lid, count); +} + +static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + int err = -ENODEV; + struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); + struct mlx4_ib_qp *mqp = to_mqp(ibqp); + DECLARE_BITMAP(ports, MLX4_MAX_PORTS); + int i = 0; + + if (mdev->dev->caps.steering_mode == MLX4_STEERING_MODE_B0 && + ibqp->qp_type == IB_QPT_RAW_PACKET) + gid->raw[5] = mqp->port; + + if (mdev->dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { + bitmap_fill(ports, mdev->dev->caps.num_ports); + } else { + if (mqp->port <= mdev->dev->caps.num_ports) { + bitmap_zero(ports, mdev->dev->caps.num_ports); + set_bit(0, ports); + } else { + return -EINVAL; + } + } + + for (; i < mdev->dev->caps.num_ports; i++) { + u64 reg_id; + struct mlx4_ib_steering *ib_steering = NULL; + if (!test_bit(i, ports)) + continue; + if (mdev->dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { + ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL); + if (!ib_steering) + goto err_add; + } + + err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, + gid->raw, i + 1, + !!(mqp->flags & + MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK), + (ibqp->qp_type == IB_QPT_RAW_PACKET) ? + MLX4_PROT_ETH : MLX4_PROT_IB_IPV6, + ®_id); + if (err) { + kfree(ib_steering); + goto err_add; + } + + err = add_gid_entry(ibqp, gid); + if (err) { + mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, + MLX4_PROT_IB_IPV6, reg_id); + kfree(ib_steering); + goto err_add; + } + + if (ib_steering) { + memcpy(ib_steering->gid.raw, gid->raw, 16); + mutex_lock(&mqp->mutex); + list_add(&ib_steering->list, &mqp->steering_rules); + mutex_unlock(&mqp->mutex); + ib_steering->reg_id = reg_id; + } + } + return 0; + +err_add: + if (i > 0) + _mlx4_ib_mcg_detach(ibqp, gid, lid, i); + + return err; } static int init_node_data(struct mlx4_ib_dev *dev) @@ -1327,27 +1540,39 @@ static ssize_t show_board(struct device *device, struct device_attribute *attr, dev->dev->board_id); } +static ssize_t show_vsd(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct mlx4_ib_dev *dev = + container_of(device, struct mlx4_ib_dev, ib_dev.dev); + ssize_t len = MLX4_VSD_LEN; + + if (dev->dev->vsd_vendor_id == PCI_VENDOR_ID_MELLANOX) + len = sprintf(buf, "%.*s\n", MLX4_VSD_LEN, dev->dev->vsd); + else + memcpy(buf, dev->dev->vsd, MLX4_VSD_LEN); + + return len; +} + static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); +static DEVICE_ATTR(vsd, S_IRUGO, show_vsd, NULL); static struct device_attribute *mlx4_class_attributes[] = { &dev_attr_hw_rev, &dev_attr_fw_ver, &dev_attr_hca_type, - &dev_attr_board_id + &dev_attr_board_id, + &dev_attr_vsd }; -static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev) +static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev, u8 port) { -#ifdef __linux__ - memcpy(eui, dev->dev_addr, 3); - memcpy(eui + 5, dev->dev_addr + 3, 3); -#else memcpy(eui, IF_LLADDR(dev), 3); memcpy(eui + 5, IF_LLADDR(dev) + 3, 3); -#endif if (vlan_id < 0x1000) { eui[3] = vlan_id >> 8; eui[4] = vlan_id & 0xff; @@ -1366,191 +1591,352 @@ static void update_gids_task(struct work_struct *work) int err; struct mlx4_dev *dev = gw->dev->dev; + mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { pr_warn("update gid table failed %ld\n", PTR_ERR(mailbox)); - return; + goto free; } gids = mailbox->buf; memcpy(gids, gw->gids, sizeof gw->gids); - err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port, + if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, gw->port) == + IB_LINK_LAYER_ETHERNET) { + err = mlx4_cmd(dev, mailbox->dma, + MLX4_SET_PORT_GID_TABLE << 8 | gw->port, 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); + if (err) pr_warn("set port command failed\n"); - else { - memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids); - mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE); + else + mlx4_ib_dispatch_event(gw->dev, gw->port, + IB_EVENT_GID_CHANGE); + } + + mlx4_free_cmd_mailbox(dev, mailbox); +free: + kfree(gw); +} + +static void reset_gids_task(struct work_struct *work) +{ + struct update_gid_work *gw = + container_of(work, struct update_gid_work, work); + struct mlx4_cmd_mailbox *mailbox; + union ib_gid *gids; + int err; + struct mlx4_dev *dev = gw->dev->dev; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + pr_warn("reset gid table failed\n"); + goto free; + } + + gids = mailbox->buf; + memcpy(gids, gw->gids, sizeof(gw->gids)); + + if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, 1) == + IB_LINK_LAYER_ETHERNET && + dev->caps.num_ports > 0) { + err = mlx4_cmd(dev, mailbox->dma, + MLX4_SET_PORT_GID_TABLE << 8 | 1, + 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + if (err) + pr_warn("set port 1 command failed\n"); + } + + if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, 2) == + IB_LINK_LAYER_ETHERNET && + dev->caps.num_ports > 1) { + err = mlx4_cmd(dev, mailbox->dma, + MLX4_SET_PORT_GID_TABLE << 8 | 2, + 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + if (err) + pr_warn("set port 2 command failed\n"); } mlx4_free_cmd_mailbox(dev, mailbox); +free: kfree(gw); } -static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear) +static int update_gid_table(struct mlx4_ib_dev *dev, int port, + union ib_gid *gid, int clear, int default_gid) { - struct net_device *ndev = dev->iboe.netdevs[port - 1]; struct update_gid_work *work; - struct net_device *tmp; int i; - u8 *hits; - union ib_gid gid; - int index_free; - int found; int need_update = 0; + int free = -1; + int found = -1; int max_gids; - u16 vid; - - work = kzalloc(sizeof *work, GFP_ATOMIC); - if (!work) - return -ENOMEM; - - hits = kzalloc(128, GFP_ATOMIC); - if (!hits) { - kfree(work); - return -ENOMEM; - } + int start_index = !default_gid; max_gids = dev->dev->caps.gid_table_len[port]; - -#ifdef __linux__ - rcu_read_lock(); - for_each_netdev_rcu(&init_net, tmp) { -#else - IFNET_RLOCK(); - TAILQ_FOREACH(tmp, &V_ifnet, if_link) { -#endif - if (ndev && (tmp == ndev || rdma_vlan_dev_real_dev(tmp) == ndev)) { - gid.global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); - vid = rdma_vlan_dev_vlan_id(tmp); - mlx4_addrconf_ifid_eui48(&gid.raw[8], vid, ndev); - found = 0; - index_free = -1; - for (i = 0; i < max_gids; ++i) { - if (index_free < 0 && - !memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid)) - index_free = i; - if (!memcmp(&dev->iboe.gid_table[port - 1][i], &gid, sizeof gid)) { - hits[i] = 1; - found = 1; + for (i = start_index; i < max_gids; ++i) { + if (!memcmp(&dev->iboe.gid_table[port - 1][i], gid, + sizeof(*gid))) + found = i; + + if (clear) { + if (found >= 0) { + need_update = 1; + dev->iboe.gid_table[port - 1][found] = zgid; break; } - } + } else { + if (found >= 0) + break; - if (!found) { - if (tmp == ndev && - (memcmp(&dev->iboe.gid_table[port - 1][0], - &gid, sizeof gid) || - !memcmp(&dev->iboe.gid_table[port - 1][0], - &zgid, sizeof gid))) { - dev->iboe.gid_table[port - 1][0] = gid; - ++need_update; - hits[0] = 1; - } else if (index_free >= 0) { - dev->iboe.gid_table[port - 1][index_free] = gid; - hits[index_free] = 1; - ++need_update; + if (free < 0 && + !memcmp(&dev->iboe.gid_table[port - 1][i], + &zgid, sizeof(*gid))) + free = i; } } + + if (found == -1 && !clear && free < 0) { + pr_err("GID table of port %d is full. Can't add "GID_PRINT_FMT"\n", + port, GID_PRINT_ARGS(gid)); + return -ENOMEM; } -#ifdef __linux__ + if (found == -1 && clear) { + pr_err(GID_PRINT_FMT" is not in GID table of port %d\n", GID_PRINT_ARGS(gid), port); + return -EINVAL; } - rcu_read_unlock(); -#else + if (found == -1 && !clear && free >= 0) { + dev->iboe.gid_table[port - 1][free] = *gid; + need_update = 1; } - IFNET_RUNLOCK(); -#endif - for (i = 0; i < max_gids; ++i) - if (!hits[i]) { - if (memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid)) - ++need_update; - dev->iboe.gid_table[port - 1][i] = zgid; - } + if (!need_update) + return 0; + + work = kzalloc(sizeof *work, GFP_ATOMIC); + if (!work) + return -ENOMEM; - if (need_update) { - memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof work->gids); + memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof(work->gids)); INIT_WORK(&work->work, update_gids_task); work->port = port; work->dev = dev; queue_work(wq, &work->work); - } else - kfree(work); - kfree(hits); return 0; } -static void handle_en_event(struct mlx4_ib_dev *dev, int port, unsigned long event) +static int reset_gid_table(struct mlx4_ib_dev *dev) { - switch (event) { - case NETDEV_UP: -#ifdef __linux__ - case NETDEV_CHANGEADDR: + struct update_gid_work *work; + + + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return -ENOMEM; + + memset(dev->iboe.gid_table, 0, sizeof(dev->iboe.gid_table)); + memset(work->gids, 0, sizeof(work->gids)); + INIT_WORK(&work->work, reset_gids_task); + work->dev = dev; + queue_work(wq, &work->work); + return 0; +} + +/* XXX BOND Related - stub (no support for these flags in FBSD)*/ +static inline int netif_is_bond_master(struct net_device *dev) +{ +#if 0 + return (dev->flags & IFF_MASTER) && (dev->priv_flags & IFF_BONDING); #endif - update_ipv6_gids(dev, port, 0); + return 0; +} + +static void mlx4_make_default_gid(struct net_device *dev, union ib_gid *gid, u8 port) +{ + gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); + mlx4_addrconf_ifid_eui48(&gid->raw[8], 0xffff, dev, port); +} + +static u8 mlx4_ib_get_dev_port(struct net_device *dev, struct mlx4_ib_dev *ibdev) +{ + u8 port = 0; + struct mlx4_ib_iboe *iboe; + struct net_device *real_dev = rdma_vlan_dev_real_dev(dev) ? + rdma_vlan_dev_real_dev(dev) : dev; + + iboe = &ibdev->iboe; + + for (port = 1; port <= MLX4_MAX_PORTS; ++port) + if ((netif_is_bond_master(real_dev) && (real_dev == iboe->masters[port - 1])) || + (!netif_is_bond_master(real_dev) && (real_dev == iboe->netdevs[port - 1]))) break; - case NETDEV_DOWN: - update_ipv6_gids(dev, port, 1); - dev->iboe.netdevs[port - 1] = NULL; + return port > MLX4_MAX_PORTS ? 0 : port; +} + +static void mlx4_ib_get_dev_addr(struct net_device *dev, struct mlx4_ib_dev *ibdev, u8 port) +{ + struct ifaddr *ifa; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct inet6_dev *in6_dev; + union ib_gid *pgid; + struct inet6_ifaddr *ifp; +#endif + union ib_gid gid; + + + if ((port == 0) || (port > MLX4_MAX_PORTS)) + return; + + /* IPv4 gids */ + TAILQ_FOREACH(ifa, &dev->if_addrhead, ifa_link) { + if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET){ + ipv6_addr_set_v4mapped( + ((struct sockaddr_in *) ifa->ifa_addr)->sin_addr.s_addr, + (struct in6_addr *)&gid); + update_gid_table(ibdev, port, &gid, 0, 0); + } + + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + /* IPv6 gids */ + in6_dev = in6_dev_get(dev); + if (in6_dev) { + read_lock_bh(&in6_dev->lock); + list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { + pgid = (union ib_gid *)&ifp->addr; + update_gid_table(ibdev, port, pgid, 0, 0); } + read_unlock_bh(&in6_dev->lock); + in6_dev_put(in6_dev); + } +#endif } -static void netdev_added(struct mlx4_ib_dev *dev, int port) +static void mlx4_set_default_gid(struct mlx4_ib_dev *ibdev, + struct net_device *dev, u8 port) { - update_ipv6_gids(dev, port, 0); + union ib_gid gid; + mlx4_make_default_gid(dev, &gid, port); + update_gid_table(ibdev, port, &gid, 0, 1); } -static void netdev_removed(struct mlx4_ib_dev *dev, int port) +static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev) { - update_ipv6_gids(dev, port, 1); + struct net_device *dev; + + if (reset_gid_table(ibdev)) + return -1; + + IFNET_RLOCK_NOSLEEP(); + TAILQ_FOREACH(dev, &V_ifnet, if_link) { + u8 port = mlx4_ib_get_dev_port(dev, ibdev); + if (port) { + if (!rdma_vlan_dev_real_dev(dev) && + !netif_is_bond_master(dev)) + mlx4_set_default_gid(ibdev, dev, port); + mlx4_ib_get_dev_addr(dev, ibdev, port); + } + } + + IFNET_RUNLOCK_NOSLEEP(); + + return 0; } -static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event, - void *ptr) +static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev, + struct net_device *dev, unsigned long event) { - struct net_device *dev = ptr; - struct mlx4_ib_dev *ibdev; - struct net_device *oldnd; struct mlx4_ib_iboe *iboe; int port; + int init = 0; + unsigned long flags; -#ifdef __linux__ - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; -#endif - - ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb); iboe = &ibdev->iboe; - spin_lock(&iboe->lock); + spin_lock_irqsave(&iboe->lock, flags); mlx4_foreach_ib_transport_port(port, ibdev->dev) { - oldnd = iboe->netdevs[port - 1]; + struct net_device *old_netdev = iboe->netdevs[port - 1]; +/* XXX BOND related */ +#if 0 + struct net_device *old_master = iboe->masters[port - 1]; +#endif + iboe->masters[port - 1] = NULL; iboe->netdevs[port - 1] = mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port); - if (oldnd != iboe->netdevs[port - 1]) { - if (iboe->netdevs[port - 1]) - netdev_added(ibdev, port); - else - netdev_removed(ibdev, port); - } + + + if (old_netdev != iboe->netdevs[port - 1]) + init = 1; + if (dev == iboe->netdevs[port - 1] && + event == NETDEV_CHANGEADDR) + init = 1; +/* XXX BOND related */ +#if 0 + if (iboe->netdevs[port - 1] && netif_is_bond_slave(iboe->netdevs[port - 1])) + iboe->masters[port - 1] = iboe->netdevs[port - 1]->master; + + /* if bonding is used it is possible that we add it to masters only after + IP address is assigned to the net bonding interface */ + if (old_master != iboe->masters[port - 1]) + init = 1; +#endif } - if (dev == iboe->netdevs[0] || - (iboe->netdevs[0] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[0])) - handle_en_event(ibdev, 1, event); - else if (dev == iboe->netdevs[1] - || (iboe->netdevs[1] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[1])) - handle_en_event(ibdev, 2, event); + spin_unlock_irqrestore(&iboe->lock, flags); + + if (init) + if (mlx4_ib_init_gid_table(ibdev)) + pr_warn("Fail to reset gid table\n"); +} + +static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *dev = ptr; + struct mlx4_ib_dev *ibdev; - spin_unlock(&iboe->lock); + ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb); + + mlx4_ib_scan_netdevs(ibdev, dev, event); return NOTIFY_DONE; } +/* This function initializes the gid table only if the event_netdev real device is an iboe + * device, will be invoked by the inet/inet6 events */ +static int mlx4_ib_inet_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *event_netdev = ptr; + struct mlx4_ib_dev *ibdev; + struct mlx4_ib_iboe *ibdev_iboe; + int port = 0; + + ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet); + + struct net_device *real_dev = rdma_vlan_dev_real_dev(event_netdev) ? + rdma_vlan_dev_real_dev(event_netdev) : + event_netdev; + + ibdev_iboe = &ibdev->iboe; + + port = mlx4_ib_get_dev_port(real_dev, ibdev); + + /* Perform init_gid_table if the event real_dev is the net_device which represents this port, + * otherwise this event is not related and would be ignored.*/ + if(port && (real_dev == ibdev_iboe->netdevs[port - 1])) + if (mlx4_ib_init_gid_table(ibdev)) + pr_warn("Fail to reset gid table\n"); + + return NOTIFY_DONE; +} + + static void init_pkeys(struct mlx4_ib_dev *ibdev) { int port; @@ -1615,7 +2001,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) eq = 0; mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) { for (j = 0; j < eq_per_port; j++) { - snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%d:%d:%d:%d", i, j, + sprintf(name, "mlx4-ib-%d-%d@%d:%d:%d:%d", i, j, pci_get_domain(dev->pdev->dev.bsddev), pci_get_bus(dev->pdev->dev.bsddev), PCI_SLOT(dev->pdev->devfn), @@ -1779,89 +2165,61 @@ static struct attribute_group diag_counters_group = { .attrs = diag_rprt_attrs }; -#ifdef __linux__ -static int mlx4_ib_proc_init(void) +static void init_dev_assign(void) { - /* Creating procfs directories /proc/drivers/mlx4_ib/ && - /proc/drivers/mlx4_ib/mrs for further use by the driver. - */ - int err; + int i = 1; - mlx4_ib_driver_dir_entry = proc_mkdir(MLX4_IB_DRIVER_PROC_DIR_NAME, - NULL); - if (!mlx4_ib_driver_dir_entry) { - pr_err("mlx4_ib_proc_init has failed for %s\n", - MLX4_IB_DRIVER_PROC_DIR_NAME); - err = -ENODEV; - goto error; + spin_lock_init(&dev_num_str_lock); + if (mlx4_fill_dbdf2val_tbl(&dev_assign_str)) + return; + dev_num_str_bitmap = + kmalloc(BITS_TO_LONGS(MAX_NUM_STR_BITMAP) * sizeof(long), + GFP_KERNEL); + if (!dev_num_str_bitmap) { + pr_warn("bitmap alloc failed -- cannot apply dev_assign_str parameter\n"); + return; } - - mlx4_mrs_dir_entry = proc_mkdir(MLX4_IB_MRS_PROC_DIR_NAME, - mlx4_ib_driver_dir_entry); - if (!mlx4_mrs_dir_entry) { - pr_err("mlx4_ib_proc_init has failed for %s\n", - MLX4_IB_MRS_PROC_DIR_NAME); - err = -ENODEV; - goto remove_entry; + bitmap_zero(dev_num_str_bitmap, MAX_NUM_STR_BITMAP); + while ((i < MLX4_DEVS_TBL_SIZE) && (dev_assign_str.tbl[i].dbdf != + MLX4_ENDOF_TBL)) { + if (bitmap_allocate_region(dev_num_str_bitmap, + dev_assign_str.tbl[i].val[0], 0)) + goto err; + i++; } + dr_active = 1; + return; - return 0; - -remove_entry: - remove_proc_entry(MLX4_IB_DRIVER_PROC_DIR_NAME, - NULL); -error: - return err; +err: + kfree(dev_num_str_bitmap); + dev_num_str_bitmap = NULL; + pr_warn("mlx4_ib: The value of 'dev_assign_str' parameter " + "is incorrect. The parameter value is discarded!"); } -#endif -static void init_dev_assign(void) +static int mlx4_ib_dev_idx(struct mlx4_dev *dev) { - int bus, slot, fn, ib_idx; - char *p = dev_assign_str, *t; - char curr_val[32] = {0}; - int ret; - int j, i = 0; - - memset(dr, 0, sizeof dr); - - if (dev_assign_str[0] == 0) - return; - - while (strlen(p)) { - ret = sscanf(p, "%02x:%02x.%x-%x", &bus, &slot, &fn, &ib_idx); - if (ret != 4 || ib_idx < 0) - goto err; - - for (j = 0; j < i; j++) - if (dr[j].nr == ib_idx) - goto err; - - dr[i].bus = bus; - dr[i].dev = slot; - dr[i].func = fn; - dr[i].nr = ib_idx; - - t = strchr(p, ','); - sprintf(curr_val, "%02x:%02x.%x-%x", bus, slot, fn, ib_idx); - if ((!t) && strlen(p) == strlen(curr_val)) - return; - - if (!t || (t + 1) >= dev_assign_str + sizeof dev_assign_str) - goto err; - - ++i; - if (i >= MAX_DR) - goto err; - - p = t + 1; + int i, val; + + if (!dr_active) + return -1; + if (!dev) + return -1; + if (mlx4_get_val(dev_assign_str.tbl, dev->pdev, 0, &val)) + return -1; + + if (val != DEFAULT_TBL_VAL) { + dev->flags |= MLX4_FLAG_DEV_NUM_STR; + return val; } - return; -err: - memset(dr, 0, sizeof dr); - printk(KERN_WARNING "mlx4_ib: The value of 'dev_assign_str' parameter " - "is incorrect. The parameter value is discarded!"); + spin_lock(&dev_num_str_lock); + i = bitmap_find_free_region(dev_num_str_bitmap, MAX_NUM_STR_BITMAP, 0); + spin_unlock(&dev_num_str_lock); + if (i >= 0) + return i; + + return -1; } static void *mlx4_ib_add(struct mlx4_dev *dev) @@ -1871,8 +2229,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) int i, j; int err; struct mlx4_ib_iboe *iboe; + int dev_idx; - printk(KERN_INFO "%s", mlx4_ib_version); + pr_info_once("%s", mlx4_ib_version); mlx4_foreach_ib_transport_port(i, dev) num_ports++; @@ -1905,7 +2264,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->dev = dev; + dev_idx = mlx4_ib_dev_idx(dev); + if (dev_idx >= 0) + sprintf(ibdev->ib_dev.name, "mlx4_%d", dev_idx); + else strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX); + ibdev->ib_dev.owner = THIS_MODULE; ibdev->ib_dev.node_type = RDMA_NODE_IB_CA; ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey; @@ -1942,10 +2306,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | - (1ull << IB_USER_VERBS_CMD_OPEN_QP) | - (1ull << IB_USER_VERBS_CMD_ATTACH_FLOW) | - (1ull << IB_USER_VERBS_CMD_DETACH_FLOW) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); + (1ull << IB_USER_VERBS_CMD_OPEN_QP); ibdev->ib_dev.query_device = mlx4_ib_query_device; ibdev->ib_dev.query_port = mlx4_ib_query_port; @@ -1957,7 +2318,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.alloc_ucontext = mlx4_ib_alloc_ucontext; ibdev->ib_dev.dealloc_ucontext = mlx4_ib_dealloc_ucontext; ibdev->ib_dev.mmap = mlx4_ib_mmap; -#ifdef __linux__ +/* XXX FBSD has no support for get_unmapped_area function */ +#if 0 ibdev->ib_dev.get_unmapped_area = mlx4_ib_get_unmapped_area; #endif ibdev->ib_dev.alloc_pd = mlx4_ib_alloc_pd; @@ -1990,9 +2352,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list; ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach; ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; - ibdev->ib_dev.attach_flow = mlx4_ib_flow_attach; - ibdev->ib_dev.detach_flow = mlx4_ib_flow_detach; ibdev->ib_dev.process_mad = mlx4_ib_process_mad; + ibdev->ib_dev.ioctl = mlx4_ib_ioctl; + ibdev->ib_dev.query_values = mlx4_ib_query_values; if (!mlx4_is_slave(ibdev->dev)) { ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc; @@ -2001,6 +2363,16 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; } + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW) { + ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw; + ibdev->ib_dev.bind_mw = mlx4_ib_bind_mw; + ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw; + + ibdev->ib_dev.uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); + } + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) { ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd; ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd; @@ -2009,6 +2381,29 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); } + /* + * Set experimental data + */ + ibdev->ib_dev.uverbs_exp_cmd_mask = + (1ull << IB_USER_VERBS_EXP_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_EXP_CMD_MODIFY_CQ) | + (1ull << IB_USER_VERBS_EXP_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_EXP_CMD_CREATE_CQ); + ibdev->ib_dev.exp_create_qp = mlx4_ib_exp_create_qp; + ibdev->ib_dev.exp_query_device = mlx4_ib_exp_query_device; + if (check_flow_steering_support(dev)) { + ibdev->ib_dev.uverbs_ex_cmd_mask |= + (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); + ibdev->ib_dev.create_flow = mlx4_ib_create_flow; + ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow; + } else { + pr_debug("Device managed flow steering is unavailable for this configuration.\n"); + } + /* + * End of experimental data + */ + mlx4_ib_alloc_eqs(dev, ibdev); spin_lock_init(&iboe->lock); @@ -2019,18 +2414,29 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) for (i = 0; i < ibdev->num_ports; ++i) { if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) == IB_LINK_LAYER_ETHERNET) { - err = mlx4_counter_alloc(ibdev->dev, i + 1, &ibdev->counters[i]); - if (err) - ibdev->counters[i] = -1; - } else - ibdev->counters[i] = -1; + if (mlx4_is_slave(dev)) { + ibdev->counters[i].status = mlx4_counter_alloc(ibdev->dev, + i + 1, + &ibdev->counters[i].counter_index); + } else {/* allocating the PF IB default counter indices reserved in mlx4_init_counters_table */ + ibdev->counters[i].counter_index = ((i + 1) << 1) - 1; + ibdev->counters[i].status = 0; + } + + dev_info(&dev->pdev->dev, + "%s: allocated counter index %d for port %d\n", + __func__, ibdev->counters[i].counter_index, i+1); + } else { + ibdev->counters[i].counter_index = MLX4_SINK_COUNTER_INDEX; + ibdev->counters[i].status = -ENOSPC; + } } spin_lock_init(&ibdev->sm_lock); mutex_init(&ibdev->cap_mask_mutex); if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED && - !mlx4_is_slave(dev)) { + !mlx4_is_mfunc(dev)) { ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS; err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count, MLX4_IB_UC_STEER_QPN_ALIGN, &ibdev->steer_qpn_base, 0); @@ -2063,20 +2469,32 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) if (mlx4_ib_init_sriov(ibdev)) goto err_mad; - if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) { + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) { + if (!iboe->nb.notifier_call) { iboe->nb.notifier_call = mlx4_ib_netdev_event; err = register_netdevice_notifier(&iboe->nb); - if (err) - goto err_sriov; + if (err) { + iboe->nb.notifier_call = NULL; + goto err_notify; + } + } + if (!iboe->nb_inet.notifier_call) { + iboe->nb_inet.notifier_call = mlx4_ib_inet_event; + err = register_inetaddr_notifier(&iboe->nb_inet); + if (err) { + iboe->nb_inet.notifier_call = NULL; + goto err_notify; + } + } + mlx4_ib_scan_netdevs(ibdev, NULL, 0); } - for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { if (device_create_file(&ibdev->ib_dev.dev, mlx4_class_attributes[j])) - goto err_notif; + goto err_notify; } if (sysfs_create_group(&ibdev->ib_dev.dev.kobj, &diag_counters_group)) - goto err_notif; + goto err_notify; ibdev->ib_active = true; @@ -2094,12 +2512,24 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) } return ibdev; -err_notif: +err_notify: + for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { + device_remove_file(&ibdev->ib_dev.dev, + mlx4_class_attributes[j]); + } + + if (ibdev->iboe.nb.notifier_call) { if (unregister_netdevice_notifier(&ibdev->iboe.nb)) pr_warn("failure unregistering notifier\n"); + ibdev->iboe.nb.notifier_call = NULL; + } + if (ibdev->iboe.nb_inet.notifier_call) { + if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet)) + pr_warn("failure unregistering notifier\n"); + ibdev->iboe.nb_inet.notifier_call = NULL; + } flush_workqueue(wq); -err_sriov: mlx4_ib_close_sriov(ibdev); err_mad: @@ -2116,9 +2546,14 @@ err_steer_qp_release: mlx4_qp_release_range(dev, ibdev->steer_qpn_base, ibdev->steer_qpn_count); err_counter: - for (; i; --i) - if (ibdev->counters[i - 1] != -1) - mlx4_counter_free(ibdev->dev, i, ibdev->counters[i - 1]); + for (; i; --i) { + if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i) == + IB_LINK_LAYER_ETHERNET) { + mlx4_counter_free(ibdev->dev, + i, + ibdev->counters[i - 1].counter_index); + } + } err_map: iounmap(ibdev->priv_uar.map); @@ -2167,30 +2602,71 @@ void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count) int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, int is_attach) { - struct ib_flow_spec spec = { - .type = IB_FLOW_IB_UC, - .l2_id.ib_uc.qpn = mqp->ibqp.qp_num, - }; - - return is_attach ? - __mlx4_ib_flow_attach(mdev, mqp, &spec, MLX4_DOMAIN_NIC, 0) - : __mlx4_ib_flow_detach(mdev, mqp, &spec, MLX4_DOMAIN_NIC, 0); + int err; + size_t flow_size; + struct ib_flow_attr *flow = NULL; + struct ib_flow_spec_ib *ib_spec; + + if (is_attach) { + flow_size = sizeof(struct ib_flow_attr) + + sizeof(struct ib_flow_spec_ib); + flow = kzalloc(flow_size, GFP_KERNEL); + if (!flow) + return -ENOMEM; + flow->port = mqp->port; + flow->num_of_specs = 1; + flow->size = flow_size; + ib_spec = (struct ib_flow_spec_ib *)(flow + 1); + ib_spec->type = IB_FLOW_SPEC_IB; + ib_spec->size = sizeof(struct ib_flow_spec_ib); + ib_spec->val.l3_type_qpn = mqp->ibqp.qp_num; + ib_spec->mask.l3_type_qpn = MLX4_IB_FLOW_QPN_MASK; + + err = __mlx4_ib_create_flow(&mqp->ibqp, flow, + IB_FLOW_DOMAIN_NIC, + MLX4_FS_REGULAR, + &mqp->reg_id); + } else { + err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id); + } + kfree(flow); + return err; } static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) { struct mlx4_ib_dev *ibdev = ibdev_ptr; - int p,j; + int p, j; + int dev_idx, ret; + + if (ibdev->iboe.nb_inet.notifier_call) { + if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet)) + pr_warn("failure unregistering notifier\n"); + ibdev->iboe.nb_inet.notifier_call = NULL; + } mlx4_ib_close_sriov(ibdev); sysfs_remove_group(&ibdev->ib_dev.dev.kobj, &diag_counters_group); mlx4_ib_mad_cleanup(ibdev); for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { - device_remove_file(&ibdev->ib_dev.dev, mlx4_class_attributes[j]); + device_remove_file(&ibdev->ib_dev.dev, + mlx4_class_attributes[j]); } + + dev_idx = -1; + if (dr_active && !(ibdev->dev->flags & MLX4_FLAG_DEV_NUM_STR)) { + ret = sscanf(ibdev->ib_dev.name, "mlx4_%d", &dev_idx); + if (ret != 1) + dev_idx = -1; + } ib_unregister_device(&ibdev->ib_dev); + if (dev_idx >= 0) { + spin_lock(&dev_num_str_lock); + bitmap_release_region(dev_num_str_bitmap, dev_idx, 0); + spin_unlock(&dev_num_str_lock); + } if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { mlx4_qp_release_range(dev, ibdev->steer_qpn_base, @@ -2204,9 +2680,16 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) ibdev->iboe.nb.notifier_call = NULL; } iounmap(ibdev->priv_uar.map); - for (p = 0; p < ibdev->num_ports; ++p) - if (ibdev->counters[p] != -1) - mlx4_counter_free(ibdev->dev, p + 1, ibdev->counters[p]); + + for (p = 0; p < ibdev->num_ports; ++p) { + if (mlx4_ib_port_link_layer(&ibdev->ib_dev, p + 1) == + IB_LINK_LAYER_ETHERNET) { + mlx4_counter_free(ibdev->dev, + p + 1, + ibdev->counters[p].counter_index); + } + } + mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB) mlx4_CLOSE_PORT(dev, p); @@ -2355,12 +2838,6 @@ static int __init mlx4_ib_init(void) if (!wq) return -ENOMEM; -#ifdef __linux__ - err = mlx4_ib_proc_init(); - if (err) - goto clean_wq; -#endif - err = mlx4_ib_mcg_init(); if (err) goto clean_proc; @@ -2377,13 +2854,6 @@ clean_mcg: mlx4_ib_mcg_destroy(); clean_proc: -#ifdef __linux__ - remove_proc_entry(MLX4_IB_MRS_PROC_DIR_NAME, - mlx4_ib_driver_dir_entry); - remove_proc_entry(MLX4_IB_DRIVER_PROC_DIR_NAME, NULL); - -clean_wq: -#endif destroy_workqueue(wq); return err; } @@ -2394,13 +2864,7 @@ static void __exit mlx4_ib_cleanup(void) mlx4_ib_mcg_destroy(); destroy_workqueue(wq); - /* Remove proc entries */ -#ifdef __linux__ - remove_proc_entry(MLX4_IB_MRS_PROC_DIR_NAME, - mlx4_ib_driver_dir_entry); - remove_proc_entry(MLX4_IB_DRIVER_PROC_DIR_NAME, NULL); -#endif - + kfree(dev_num_str_bitmap); } module_init_order(mlx4_ib_init, SI_ORDER_MIDDLE); @@ -2417,7 +2881,7 @@ static moduledata_t mlx4ib_mod = { .evhand = mlx4ib_evhand, }; -DECLARE_MODULE(mlx4ib, mlx4ib_mod, SI_SUB_OFED_PREINIT, SI_ORDER_ANY); +DECLARE_MODULE(mlx4ib, mlx4ib_mod, SI_SUB_SMP, SI_ORDER_ANY); MODULE_DEPEND(mlx4ib, mlx4, 1, 1, 1); MODULE_DEPEND(mlx4ib, ibcore, 1, 1, 1); MODULE_DEPEND(mlx4ib, linuxapi, 1, 1, 1); diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/mcg.c b/sys/ofed/drivers/infiniband/hw/mlx4/mcg.c index e70dfe9..07d5c87 100644 --- a/sys/ofed/drivers/infiniband/hw/mlx4/mcg.c +++ b/sys/ofed/drivers/infiniband/hw/mlx4/mcg.c @@ -36,6 +36,7 @@ #include <rdma/ib_sa.h> #include <linux/mlx4/cmd.h> +#include <linux/rbtree.h> #include <linux/delay.h> #include "mlx4_ib.h" @@ -53,6 +54,7 @@ #define mcg_error_group(group, format, arg...) \ pr_err(" %16s: " format, (group)->name, ## arg) + static union ib_gid mgid0; static struct workqueue_struct *clean_wq; @@ -214,7 +216,7 @@ static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad) mlx4_ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr); spin_unlock(&dev->sm_lock); return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev), ctx->port, - IB_QPT_GSI, 0, 1, IB_QP1_QKEY, &ah_attr, mad); + IB_QPT_GSI, 0, 1, IB_QP1_QKEY, &ah_attr, 0, mad); } static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx, @@ -567,7 +569,7 @@ static void mlx4_ib_mcg_timeout_handler(struct work_struct *work) mcg_warn_group(group, "invalid state %s\n", get_state_string(group->state)); group->state = MCAST_IDLE; atomic_inc(&group->refcount); - queue_work(group->demux->mcg_wq, &group->work); + if (!queue_work(group->demux->mcg_wq, &group->work)) safe_atomic_dec(&group->refcount); mutex_unlock(&group->lock); @@ -656,8 +658,9 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work) method = group->response_sa_mad.mad_hdr.method; if (group->last_req_tid != group->response_sa_mad.mad_hdr.tid) { mcg_warn_group(group, "Got MAD response to existing MGID but wrong TID, dropping. Resp TID=%llx, group TID=%llx\n", - (long long unsigned int)be64_to_cpu(group->response_sa_mad.mad_hdr.tid), - (long long unsigned int)be64_to_cpu(group->last_req_tid)); + (long long)be64_to_cpu( + group->response_sa_mad.mad_hdr.tid), + (long long)be64_to_cpu(group->last_req_tid)); group->state = group->prev_state; goto process_requests; } @@ -752,8 +755,8 @@ static struct mcast_group *search_relocate_mgid0_group(struct mlx4_ib_demux_ctx if (memcmp(new_mgid, &mgid0, sizeof mgid0)) { group->rec.mgid = *new_mgid; sprintf(group->name, "%016llx%016llx", - (long long unsigned int)be64_to_cpu(group->rec.mgid.global.subnet_prefix), - (long long unsigned int)be64_to_cpu(group->rec.mgid.global.interface_id)); + (long long)be64_to_cpu(group->rec.mgid.global.subnet_prefix), + (long long)be64_to_cpu(group->rec.mgid.global.interface_id)); list_del_init(&group->mgid0_list); cur_group = mcast_insert(ctx, group); if (cur_group) { @@ -834,8 +837,10 @@ static struct mcast_group *acquire_group(struct mlx4_ib_demux_ctx *ctx, INIT_DELAYED_WORK(&group->timeout_work, mlx4_ib_mcg_timeout_handler); mutex_init(&group->lock); sprintf(group->name, "%016llx%016llx", - (long long unsigned int)be64_to_cpu(group->rec.mgid.global.subnet_prefix), - (long long unsigned int)be64_to_cpu(group->rec.mgid.global.interface_id)); + (long long)be64_to_cpu( + group->rec.mgid.global.subnet_prefix), + (long long)be64_to_cpu( + group->rec.mgid.global.interface_id)); sysfs_attr_init(&group->dentry.attr); group->dentry.show = sysfs_show_group; group->dentry.store = NULL; @@ -871,7 +876,7 @@ static void queue_req(struct mcast_req *req) list_add_tail(&req->group_list, &group->pending_list); list_add_tail(&req->func_list, &group->func[req->func].pending); /* calls mlx4_ib_mcg_work_handler */ - queue_work(group->demux->mcg_wq, &group->work); + if (!queue_work(group->demux->mcg_wq, &group->work)) safe_atomic_dec(&group->refcount); } @@ -907,7 +912,7 @@ int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave, group->state = MCAST_RESP_READY; /* calls mlx4_ib_mcg_work_handler */ atomic_inc(&group->refcount); - queue_work(ctx->mcg_wq, &group->work); + if (!queue_work(ctx->mcg_wq, &group->work)) safe_atomic_dec(&group->refcount); mutex_unlock(&group->lock); release_group(group, 0); @@ -998,13 +1003,14 @@ static ssize_t sysfs_show_group(struct device *dev, else sprintf(state_str, "%s(TID=0x%llx)", get_state_string(group->state), - (long long unsigned int)be64_to_cpu(group->last_req_tid)); + (long long)be64_to_cpu(group->last_req_tid)); if (list_empty(&group->pending_list)) { sprintf(pending_str, "No"); } else { req = list_first_entry(&group->pending_list, struct mcast_req, group_list); sprintf(pending_str, "Yes(TID=0x%llx)", - (long long unsigned int)be64_to_cpu(req->sa_mad.mad_hdr.tid)); + (long long)be64_to_cpu( + req->sa_mad.mad_hdr.tid)); } len += sprintf(buf + len, "%1d [%02d,%02d,%02d] %4d %4s %5s ", group->rec.scope_join_state & 0xf, diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_exp.c b/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_exp.c new file mode 100644 index 0000000..b6a6962 --- /dev/null +++ b/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_exp.c @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx4_ib.h" +#include "mlx4_exp.h" +#include <linux/mlx4/qp.h> + +int mlx4_ib_exp_query_device(struct ib_device *ibdev, + struct ib_exp_device_attr *props) +{ + struct ib_device_attr *base = &props->base; + struct mlx4_ib_dev *dev = to_mdev(ibdev); + int ret = mlx4_ib_query_device(ibdev, &props->base); + + props->exp_comp_mask = IB_EXP_DEVICE_ATTR_INLINE_RECV_SZ; + props->inline_recv_sz = dev->dev->caps.max_rq_sg * sizeof(struct mlx4_wqe_data_seg); + props->device_cap_flags2 = 0; + + /* move RSS device cap from device_cap to device_cap_flags2 */ + if (base->device_cap_flags & IB_DEVICE_QPG) { + props->device_cap_flags2 |= IB_EXP_DEVICE_QPG; + if (base->device_cap_flags & IB_DEVICE_UD_RSS) + props->device_cap_flags2 |= IB_EXP_DEVICE_UD_RSS; + } + base->device_cap_flags &= ~(IB_DEVICE_QPG | + IB_DEVICE_UD_RSS | + IB_DEVICE_UD_TSS); + + if (base->max_rss_tbl_sz > 0) { + props->max_rss_tbl_sz = base->max_rss_tbl_sz; + props->exp_comp_mask |= IB_EXP_DEVICE_ATTR_RSS_TBL_SZ; + } else { + props->max_rss_tbl_sz = 0; + props->exp_comp_mask &= ~IB_EXP_DEVICE_ATTR_RSS_TBL_SZ; + } + + if (props->device_cap_flags2) + props->exp_comp_mask |= IB_EXP_DEVICE_ATTR_CAP_FLAGS2; + + return ret; +} + +/* + * Experimental functions + */ +struct ib_qp *mlx4_ib_exp_create_qp(struct ib_pd *pd, + struct ib_exp_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + int rwqe_size; + struct ib_qp *qp; + struct mlx4_ib_qp *mqp; + int use_inlr; + struct mlx4_ib_dev *dev; + + if (init_attr->max_inl_recv && !udata) + return ERR_PTR(-EINVAL); + + use_inlr = mlx4_ib_qp_has_rq((struct ib_qp_init_attr *)init_attr) && + init_attr->max_inl_recv && pd; + if (use_inlr) { + rwqe_size = roundup_pow_of_two(max(1U, init_attr->cap.max_recv_sge)) * + sizeof(struct mlx4_wqe_data_seg); + if (rwqe_size < init_attr->max_inl_recv) { + dev = to_mdev(pd->device); + init_attr->max_inl_recv = min(init_attr->max_inl_recv, + (u32)(dev->dev->caps.max_rq_sg * + sizeof(struct mlx4_wqe_data_seg))); + init_attr->cap.max_recv_sge = roundup_pow_of_two(init_attr->max_inl_recv) / + sizeof(struct mlx4_wqe_data_seg); + } + } else { + init_attr->max_inl_recv = 0; + } + qp = mlx4_ib_create_qp(pd, (struct ib_qp_init_attr *)init_attr, udata); + if (IS_ERR(qp)) + return qp; + + if (use_inlr) { + mqp = to_mqp(qp); + mqp->max_inlr_data = 1 << mqp->rq.wqe_shift; + init_attr->max_inl_recv = mqp->max_inlr_data; + } + + return qp; +} diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_exp.h b/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_exp.h new file mode 100644 index 0000000..58675a4 --- /dev/null +++ b/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_exp.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2006, 2007 Cisco Systems. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_EXP_H +#define MLX4_EXP_H + +#include <rdma/ib_verbs_exp.h> +#include "mlx4_ib.h" + +struct ib_qp *mlx4_ib_exp_create_qp(struct ib_pd *pd, + struct ib_exp_qp_init_attr *init_attr, + struct ib_udata *udata); +int mlx4_ib_exp_query_device(struct ib_device *ibdev, + struct ib_exp_device_attr *props); + +#endif /* MLX4_EXP_H */ diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_ib.h b/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_ib.h index 2435df5..ddf5236 100644 --- a/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -38,6 +38,7 @@ #include <linux/list.h> #include <linux/mutex.h> #include <linux/idr.h> +#include <linux/rbtree.h> #include <linux/notifier.h> #include <rdma/ib_verbs.h> @@ -47,7 +48,6 @@ #include <linux/mlx4/device.h> #include <linux/mlx4/doorbell.h> -#include <linux/rbtree.h> #define MLX4_IB_DRV_NAME "mlx4_ib" @@ -72,9 +72,7 @@ enum { /*module param to indicate if SM assigns the alias_GUID*/ extern int mlx4_ib_sm_guid_assign; -#ifdef __linux__ extern struct proc_dir_entry *mlx4_mrs_dir_entry; -#endif #define MLX4_IB_UC_STEER_QPN_ALIGN 1 #define MLX4_IB_UC_MAX_NUM_QPS (256 * 1024) @@ -128,6 +126,7 @@ struct mlx4_ib_cq { struct mutex resize_mutex; struct ib_umem *umem; struct ib_umem *resize_umem; + int create_flags; }; struct mlx4_ib_mr { @@ -135,6 +134,13 @@ struct mlx4_ib_mr { struct mlx4_mr mmr; struct ib_umem *umem; struct mlx4_shared_mr_info *smr_info; + atomic_t invalidated; + struct completion invalidation_comp; +}; + +struct mlx4_ib_mw { + struct ib_mw ibmw; + struct mlx4_mw mmw; }; struct mlx4_ib_fast_reg_page_list { @@ -148,6 +154,12 @@ struct mlx4_ib_fmr { struct mlx4_fmr mfmr; }; +struct mlx4_ib_flow { + struct ib_flow ibflow; + /* translating DMFS verbs sniffer rule to FW API requires two reg IDs */ + u64 reg_id[2]; +}; + struct mlx4_ib_wq { u64 *wrid; spinlock_t lock; @@ -163,6 +175,9 @@ struct mlx4_ib_wq { enum mlx4_ib_qp_flags { MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, + MLX4_IB_QP_CAP_CROSS_CHANNEL = IB_QP_CREATE_CROSS_CHANNEL, + MLX4_IB_QP_CAP_MANAGED_SEND = IB_QP_CREATE_MANAGED_SEND, + MLX4_IB_QP_CAP_MANAGED_RECV = IB_QP_CREATE_MANAGED_RECV, MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP, MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30, MLX4_IB_SRIOV_SQP = 1 << 31, @@ -179,6 +194,7 @@ enum mlx4_ib_mmap_cmd { MLX4_IB_MMAP_UAR_PAGE = 0, MLX4_IB_MMAP_BLUE_FLAME_PAGE = 1, MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES = 2, + MLX4_IB_MMAP_GET_HW_CLOCK = 3, }; enum mlx4_ib_qp_type { @@ -319,8 +335,14 @@ struct mlx4_ib_qp { struct mlx4_roce_smac_vlan_info pri; struct mlx4_roce_smac_vlan_info alt; struct list_head rules_list; + u64 reg_id; int max_inline_data; struct mlx4_bf bf; + + /* + * Experimental data + */ + int max_inlr_data; }; struct mlx4_ib_srq { @@ -354,6 +376,12 @@ struct mlx4_ib_ah { #define MLX4_NOT_SET_GUID (0x00LL) #define MLX4_GUID_FOR_DELETE_VAL (~(0x00LL)) +/****************************************/ +/* ioctl codes */ +/****************************************/ +#define MLX4_IOC_MAGIC 'm' +#define MLX4_IOCHWCLOCKOFFSET _IOR(MLX4_IOC_MAGIC, 1, int) + enum mlx4_guid_alias_rec_status { MLX4_GUID_INFO_STATUS_IDLE, MLX4_GUID_INFO_STATUS_SET, @@ -478,7 +506,9 @@ struct mlx4_ib_sriov { struct mlx4_ib_iboe { spinlock_t lock; struct net_device *netdevs[MLX4_MAX_PORTS]; + struct net_device *masters[MLX4_MAX_PORTS]; struct notifier_block nb; + struct notifier_block nb_inet; union ib_gid gid_table[MLX4_MAX_PORTS][128]; }; @@ -518,6 +548,11 @@ struct mlx4_ib_iov_port { struct mlx4_ib_iov_sysfs_attr mcg_dentry; }; +struct mlx4_ib_counter { + int counter_index; + int status; +}; + struct mlx4_ib_dev { struct ib_device ib_dev; struct mlx4_dev *dev; @@ -534,7 +569,7 @@ struct mlx4_ib_dev { struct mutex cap_mask_mutex; bool ib_active; struct mlx4_ib_iboe iboe; - int counters[MLX4_MAX_PORTS]; + struct mlx4_ib_counter counters[MLX4_MAX_PORTS]; int *eq_table; int eq_added; struct kobject *iov_parent; @@ -595,6 +630,11 @@ static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr) return container_of(ibmr, struct mlx4_ib_mr, ibmr); } +static inline struct mlx4_ib_mw *to_mmw(struct ib_mw *ibmw) +{ + return container_of(ibmw, struct mlx4_ib_mw, ibmw); +} + static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl) { return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl); @@ -604,6 +644,12 @@ static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr) { return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr); } + +static inline struct mlx4_ib_flow *to_mflow(struct ib_flow *ibflow) +{ + return container_of(ibflow, struct mlx4_ib_flow, ibflow); +} + static inline struct mlx4_ib_qp *to_mqp(struct ib_qp *ibqp) { return container_of(ibqp, struct mlx4_ib_qp, ibqp); @@ -646,16 +692,23 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata, int mr_id); int mlx4_ib_dereg_mr(struct ib_mr *mr); +struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); +int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw, + struct ib_mw_bind *mw_bind); +int mlx4_ib_dealloc_mw(struct ib_mw *mw); struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len); struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len); void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); -int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); +int mlx4_ib_modify_cq(struct ib_cq *cq, + struct ib_cq_attr *cq_attr, + int cq_attr_mask); int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); int mlx4_ib_ignore_overrun_cq(struct ib_cq *ibcq); -struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector, +struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, + struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata); int mlx4_ib_destroy_cq(struct ib_cq *cq); @@ -730,6 +783,13 @@ static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah) return !!(ah->av.ib.g_slid & 0x80); } +static inline int mlx4_ib_qp_has_rq(struct ib_qp_init_attr *attr) +{ + if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT) + return 0; + + return !attr->srq; +} int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx); void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq); @@ -757,7 +817,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, struct ib_grh *grh, struct ib_mad *mad); int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn, - u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad); + u32 qkey, struct ib_ah_attr *attr, u8 *s_mac, struct ib_mad *mad); __be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx); int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, @@ -799,5 +859,7 @@ int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn); void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count); int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, int is_attach); +int mlx4_ib_query_device(struct ib_device *ibdev, + struct ib_device_attr *props); #endif /* MLX4_IB_H */ diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/mr.c b/sys/ofed/drivers/infiniband/hw/mlx4/mr.c index 9ea4901..61c2088 100644 --- a/sys/ofed/drivers/infiniband/hw/mlx4/mr.c +++ b/sys/ofed/drivers/infiniband/hw/mlx4/mr.c @@ -35,11 +35,6 @@ #include <linux/module.h> #include <linux/sched.h> -#ifdef __linux__ -#include <linux/proc_fs.h> -#include <linux/cred.h> -#endif - #include "mlx4_ib.h" static u32 convert_access(int acc) @@ -48,9 +43,11 @@ static u32 convert_access(int acc) (acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) | (acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) | (acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) | + (acc & IB_ACCESS_MW_BIND ? MLX4_PERM_BIND_MW : 0) | MLX4_PERM_LOCAL_READ; } -#ifdef __linux__ +/* No suuport for Shared MR feature */ +#if 0 static ssize_t shared_mr_proc_read(struct file *file, char __user *buffer, size_t len, @@ -129,7 +126,7 @@ struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc) return &mr->ibmr; err_mr: - mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); + (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); err_free: kfree(mr); @@ -159,7 +156,7 @@ static int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev, if (len & (mtt_size-1ULL)) { WARN(1 , "write_block: len %llx is not aligned to mtt_size %llx\n", - (long long)len, (long long)mtt_size); + (unsigned long long)len, (unsigned long long)mtt_size); return -EINVAL; } @@ -203,8 +200,6 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, struct ib_umem *umem) { u64 *pages; - struct ib_umem_chunk *chunk; - int j; u64 len = 0; int err = 0; u64 mtt_size; @@ -212,6 +207,8 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, u64 mtt_shift; int start_index = 0; int npages = 0; + struct scatterlist *sg; + int i; pages = (u64 *) __get_free_page(GFP_KERNEL); if (!pages) @@ -220,12 +217,11 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, mtt_shift = mtt->page_shift; mtt_size = 1ULL << mtt_shift; - list_for_each_entry(chunk, &umem->chunk_list, list) - for (j = 0; j < chunk->nmap; ++j) { + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) { if (cur_start_addr + len == - sg_dma_address(&chunk->page_list[j])) { + sg_dma_address(sg)) { /* still the same block */ - len += sg_dma_len(&chunk->page_list[j]); + len += sg_dma_len(sg); continue; } /* A new block is started ...*/ @@ -242,8 +238,8 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, goto out; cur_start_addr = - sg_dma_address(&chunk->page_list[j]); - len = sg_dma_len(&chunk->page_list[j]); + sg_dma_address(sg); + len = sg_dma_len(sg); } /* Handle the last block */ @@ -319,8 +315,6 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va, int *num_of_mtts) { - struct ib_umem_chunk *chunk; - int j; u64 block_shift = MLX4_MAX_MTT_SHIFT; u64 current_block_len = 0; u64 current_block_start = 0; @@ -330,14 +324,18 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 total_len = 0; u64 last_block_aligned_end = 0; u64 min_shift = ilog2(umem->page_size); + struct scatterlist *sg; + int i; + u64 next_block_start; + u64 current_block_end; - list_for_each_entry(chunk, &umem->chunk_list, list) { + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) { /* Initialization - save the first chunk start as the current_block_start - block means contiguous pages. */ if (current_block_len == 0 && current_block_start == 0) { first_block_start = current_block_start = - sg_dma_address(&chunk->page_list[0]); + sg_dma_address(sg); /* Find the bits that are different between the physical address and the virtual address for the start of the MR. @@ -361,13 +359,12 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, , block_shift); } - /* Go over the scatter entries in the current chunk, check + /* Go over the scatter entries and check if they continue the previous scatter entry. */ - for (j = 0; j < chunk->nmap; ++j) { - u64 next_block_start = - sg_dma_address(&chunk->page_list[j]); - u64 current_block_end = current_block_start + next_block_start = + sg_dma_address(sg); + current_block_end = current_block_start + current_block_len; /* If we have a split (non-contig.) between two block*/ if (current_block_end != next_block_start) { @@ -392,7 +389,7 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, /* Start a new block */ current_block_start = next_block_start; current_block_len = - sg_dma_len(&chunk->page_list[j]); + sg_dma_len(sg); continue; } /* The scatter entry is another part of @@ -402,8 +399,7 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, which merge some blocks together. */ current_block_len += - sg_dma_len(&chunk->page_list[j]); - } + sg_dma_len(sg); } /* Account for the last block in the total len */ @@ -416,7 +412,7 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, WARN((total_len & ((1ULL<<block_shift)-1ULL)), " misaligned total length detected (%llu, %llu)!", - (long long)total_len, (long long)block_shift); + (unsigned long long)total_len, (unsigned long long)block_shift); *num_of_mtts = total_len >> block_shift; end: @@ -426,16 +422,19 @@ end: */ WARN(1, "mlx4_ib_umem_calc_optimal_mtt_size - unexpected shift %lld\n", - (long long)block_shift); + (unsigned long long)block_shift); block_shift = min_shift; } return block_shift; + } -#ifdef __linux__ +/* No suuport for Shared MR */ +#if 0 static int prepare_shared_mr(struct mlx4_ib_mr *mr, int access_flags, int mr_id) { + struct proc_dir_entry *mr_proc_entry; mode_t mode = S_IFREG; char name_buff[16]; @@ -475,8 +474,51 @@ static int is_shared_mr(int access_flags) IB_ACCESS_SHARED_MR_OTHER_WRITE)); } + +static void free_smr_info(struct mlx4_ib_mr *mr) +{ + /* When master/parent shared mr is dereged there is + no ability to share this mr any more - its mr_id will be + returned to the kernel as part of ib_uverbs_dereg_mr + and may be allocated again as part of other reg_mr. + */ + char name_buff[16]; + + sprintf(name_buff, "%X", mr->smr_info->mr_id); + /* Remove proc entry is checking internally that no operation + was strated on that proc fs file and if in the middle + current process will wait till end of operation. + That's why no sync mechanism is needed when we release + below the shared umem. + */ + remove_proc_entry(name_buff, mlx4_mrs_dir_entry); + kfree(mr->smr_info); + mr->smr_info = NULL; +} #endif +static void mlx4_invalidate_umem(void *invalidation_cookie, + struct ib_umem *umem, + unsigned long addr, size_t size) +{ + struct mlx4_ib_mr *mr = (struct mlx4_ib_mr *)invalidation_cookie; + + /* This function is called under client peer lock so its resources are race protected */ + if (atomic_inc_return(&mr->invalidated) > 1) { + umem->invalidation_ctx->inflight_invalidation = 1; + goto end; + } + + umem->invalidation_ctx->peer_callback = 1; + mlx4_mr_free(to_mdev(mr->ibmr.device)->dev, &mr->mmr); + ib_umem_release(umem); + complete(&mr->invalidation_comp); + +end: + return; + +} + struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata, @@ -487,18 +529,20 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, int shift; int err; int n; + struct ib_peer_memory_client *ib_peer_mem; mr = kzalloc(sizeof *mr, GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); - mr->umem = ib_umem_get(pd->uobject->context, start, length, - access_flags, 0); + mr->umem = ib_umem_get_ex(pd->uobject->context, start, length, + access_flags, 0, 1); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); goto err_free; } + ib_peer_mem = mr->umem->ib_peer_mem; n = ib_umem_page_count(mr->umem); shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start, &n); @@ -516,7 +560,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto err_mr; mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; -#ifdef __linux__ +/* No suuport for Shared MR */ +#if 0 /* Check whether MR should be shared */ if (is_shared_mr(access_flags)) { /* start address and length must be aligned to page size in order @@ -531,10 +576,32 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto err_mr; } #endif + if (ib_peer_mem) { + if (access_flags & IB_ACCESS_MW_BIND) { + /* Prevent binding MW on peer clients. + * mlx4_invalidate_umem must be void, + * therefore, mlx4_mr_free should not fail + * when using peer clients. */ + err = -ENOSYS; + pr_err("MW is not supported with peer memory client"); + goto err_smr; + } + init_completion(&mr->invalidation_comp); + ib_umem_activate_invalidation_notifier(mr->umem, + mlx4_invalidate_umem, mr); + } + + atomic_set(&mr->invalidated, 0); return &mr->ibmr; +err_smr: +/* No suuport for Shared MR */ +#if 0 + if (mr->smr_info) + free_smr_info(mr); +#endif err_mr: - mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); + (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); err_umem: ib_umem_release(mr->umem); @@ -545,41 +612,106 @@ err_free: return ERR_PTR(err); } - int mlx4_ib_dereg_mr(struct ib_mr *ibmr) { struct mlx4_ib_mr *mr = to_mmr(ibmr); + struct ib_umem *umem = mr->umem; + int ret; - mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr); - if (mr->smr_info) { - /* When master/parent shared mr is dereged there is - no ability to share this mr any more - its mr_id will be - returned to the kernel as part of ib_uverbs_dereg_mr - and may be allocated again as part of other reg_mr. - */ - char name_buff[16]; - - sprintf(name_buff, "%X", mr->smr_info->mr_id); - /* Remove proc entry is checking internally that no operation - was strated on that proc fs file and if in the middle - current process will wait till end of operation. - That's why no sync mechanism is needed when we release - below the shared umem. - */ -#ifdef __linux__ - remove_proc_entry(name_buff, mlx4_mrs_dir_entry); - kfree(mr->smr_info); +/* No suuport for Shared MR */ +#if 0 + if (mr->smr_info) + free_smr_info(mr); #endif + + if (atomic_inc_return(&mr->invalidated) > 1) { + wait_for_completion(&mr->invalidation_comp); + goto end; + } + + ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr); + if (ret) { + /* Error is not expected here, except when memory windows + * are bound to MR which is not supported with + * peer memory clients */ + atomic_set(&mr->invalidated, 0); + return ret; } - if (mr->umem) + if (!umem) + goto end; + ib_umem_release(mr->umem); +end: kfree(mr); return 0; } +struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) +{ + struct mlx4_ib_dev *dev = to_mdev(pd->device); + struct mlx4_ib_mw *mw; + int err; + + mw = kmalloc(sizeof(*mw), GFP_KERNEL); + if (!mw) + return ERR_PTR(-ENOMEM); + + err = mlx4_mw_alloc(dev->dev, to_mpd(pd)->pdn, (enum mlx4_mw_type)type, &mw->mmw); + if (err) + goto err_free; + + err = mlx4_mw_enable(dev->dev, &mw->mmw); + if (err) + goto err_mw; + + mw->ibmw.rkey = mw->mmw.key; + + return &mw->ibmw; + +err_mw: + mlx4_mw_free(dev->dev, &mw->mmw); + +err_free: + kfree(mw); + + return ERR_PTR(err); +} + +int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw, + struct ib_mw_bind *mw_bind) +{ + struct ib_send_wr wr; + struct ib_send_wr *bad_wr; + int ret; + + memset(&wr, 0, sizeof(wr)); + wr.opcode = IB_WR_BIND_MW; + wr.wr_id = mw_bind->wr_id; + wr.send_flags = mw_bind->send_flags; + wr.wr.bind_mw.mw = mw; + wr.wr.bind_mw.bind_info = mw_bind->bind_info; + wr.wr.bind_mw.rkey = ib_inc_rkey(mw->rkey); + + ret = mlx4_ib_post_send(qp, &wr, &bad_wr); + if (!ret) + mw->rkey = wr.wr.bind_mw.rkey; + + return ret; +} + +int mlx4_ib_dealloc_mw(struct ib_mw *ibmw) +{ + struct mlx4_ib_mw *mw = to_mmw(ibmw); + + mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw); + kfree(mw); + + return 0; +} + struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len) { @@ -606,7 +738,7 @@ struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, return &mr->ibmr; err_mr: - mlx4_mr_free(dev->dev, &mr->mmr); + (void) mlx4_mr_free(dev->dev, &mr->mmr); err_free: kfree(mr); @@ -685,7 +817,7 @@ struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc, return &fmr->ibfmr; err_mr: - mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr); + (void) mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr); err_free: kfree(fmr); diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/qp.c b/sys/ofed/drivers/infiniband/hw/mlx4/qp.c index c5ebe6b..b3d9695 100644 --- a/sys/ofed/drivers/infiniband/hw/mlx4/qp.c +++ b/sys/ofed/drivers/infiniband/hw/mlx4/qp.c @@ -45,13 +45,11 @@ #include <linux/mlx4/driver.h> #include <linux/io.h> -#ifndef __linux__ -#define asm __asm -#endif - #include "mlx4_ib.h" #include "user.h" +#define asm __asm + enum { MLX4_IB_ACK_REQ_FREQ = 8, }; @@ -111,6 +109,8 @@ static const __be32 mlx4_ib_opcode[] = { [IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS), [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA), + [IB_WR_BIND_MW] = cpu_to_be32( + MLX4_OPCODE_BIND_MW), }; #ifndef wc_wmb @@ -263,7 +263,7 @@ static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size) /* Pad the remainder of the WQE with an inline data segment. */ if (size > s) { inl = wqe + s; - inl->byte_count = cpu_to_be32(1U << 31 | (size - s - sizeof *inl)); + inl->byte_count = cpu_to_be32(1 << 31 | (size - s - sizeof *inl)); } ctrl->srcrb_flags = 0; ctrl->fence_size = size / 16; @@ -274,7 +274,7 @@ static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size) wmb(); ctrl->owner_opcode = cpu_to_be32(MLX4_OPCODE_NOP | MLX4_WQE_CTRL_NEC) | - (n & qp->sq.wqe_cnt ? cpu_to_be32(1U << 31) : 0); + (n & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0); stamp_send_wqe(qp, n + qp->sq_spare_wqes, size); } @@ -573,6 +573,12 @@ static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp) ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr, sizeof (struct mlx4_ib_proxy_sqp_hdr), DMA_FROM_DEVICE); + if (unlikely(ib_dma_mapping_error(dev, + qp->sqp_proxy_rcv[i].map))) { + pr_warn("ib_dma_map_single failed\n"); + kfree(qp->sqp_proxy_rcv[i].addr); + goto err; + } } return 0; @@ -602,15 +608,6 @@ static void free_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp) kfree(qp->sqp_proxy_rcv); } -static int qp_has_rq(struct ib_qp_init_attr *attr) -{ - if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT) - return 0; - - return !attr->srq; -} - -#ifdef __linux__ static int init_qpg_parent(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *pqp, struct ib_qp_init_attr *attr, int *qpn) { @@ -644,7 +641,7 @@ static int init_qpg_parent(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *pqp, err = mlx4_ib_steer_qp_alloc(dev, tss_align_num, &tss_base); else err = mlx4_qp_reserve_range(dev->dev, tss_align_num, - tss_align_num, &tss_base, 1); + tss_align_num, &tss_base, MLX4_RESERVE_BF_QP); if (err) goto err1; @@ -791,7 +788,6 @@ static void free_qpg_qpn(struct mlx4_ib_qp *mqp, int qpn) break; } } -#endif static int alloc_qpn_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, struct ib_qp_init_attr *attr, int *qpn) @@ -800,10 +796,12 @@ static int alloc_qpn_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, switch (attr->qpg_type) { case IB_QPG_NONE: - /* Raw packet QPNs must be aligned to 8 bits. If not, the WQE - * BlueFlame setup flow wrongly causes VLAN insertion. */ + /* Raw packet QPNs may not have bits 6,7 set in their qp_num; + * otherwise, the WQE BlueFlame setup flow wrongly causes + * VLAN insertion. */ if (attr->qp_type == IB_QPT_RAW_PACKET) { - err = mlx4_qp_reserve_range(dev->dev, 1, 1, qpn, 1); + err = mlx4_qp_reserve_range(dev->dev, 1, 1, qpn, + MLX4_RESERVE_BF_QP); } else { if(qp->flags & MLX4_IB_QP_NETIF) err = mlx4_ib_steer_qp_alloc(dev, 1, qpn); @@ -812,15 +810,11 @@ static int alloc_qpn_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, } break; case IB_QPG_PARENT: -#ifdef __linux__ err = init_qpg_parent(dev, qp, attr, qpn); -#endif break; case IB_QPG_CHILD_TX: case IB_QPG_CHILD_RX: -#ifdef __linux__ err = alloc_qpg_qpn(attr, qp, qpn); -#endif break; default: qp->qpg_type = IB_QPG_NONE; @@ -844,15 +838,11 @@ static void free_qpn_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, mlx4_qp_release_range(dev->dev, qpn, 1); break; case IB_QPG_PARENT: -#ifdef __linux__ free_qpg_parent(dev, qp); -#endif break; case IB_QPG_CHILD_TX: case IB_QPG_CHILD_RX: -#ifdef __linux__ free_qpg_qpn(qp, qpn); -#endif break; default: break; @@ -881,10 +871,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, struct mlx4_ib_qp *qp; enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type; -#ifndef __linux__ - init_attr->qpg_type = IB_QPG_NONE; -#endif - /* When tunneling special qps, we use a plain UD qp */ if (sqpn) { if (mlx4_is_mfunc(dev->dev) && @@ -941,6 +927,23 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, qp->mlx4_ib_qp_type = qp_type; + if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) + qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; + + if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) + qp->flags |= MLX4_IB_QP_LSO; + + if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) { + if (dev->dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED && + !mlx4_is_mfunc(dev->dev)) + qp->flags |= MLX4_IB_QP_NETIF; + else { + err = -EINVAL; + goto err; + } + } + mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); @@ -952,7 +955,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); - err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp); + err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, mlx4_ib_qp_has_rq(init_attr), qp); if (err) goto err; @@ -961,11 +964,20 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, int shift; int n; - if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { + if (!udata || ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { err = -EFAULT; goto err; } + if (init_attr->create_flags & IB_QP_CREATE_CROSS_CHANNEL) + qp->flags |= MLX4_IB_QP_CAP_CROSS_CHANNEL; + + if (init_attr->create_flags & IB_QP_CREATE_MANAGED_SEND) + qp->flags |= MLX4_IB_QP_CAP_MANAGED_SEND; + + if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV) + qp->flags |= MLX4_IB_QP_CAP_MANAGED_RECV; + qp->sq_no_prefetch = ucmd.sq_no_prefetch; err = set_user_sq_size(dev, qp, &ucmd); @@ -990,7 +1002,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (err) goto err_mtt; - if (qp_has_rq(init_attr)) { + if (mlx4_ib_qp_has_rq(init_attr)) { err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context), ucmd.db_addr, &qp->db); if (err) @@ -999,23 +1011,11 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, } else { qp->sq_no_prefetch = 0; - if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) - qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; - - if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) - qp->flags |= MLX4_IB_QP_LSO; - - if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP && - dev->dev->caps.steering_mode == - MLX4_STEERING_MODE_DEVICE_MANAGED && - !mlx4_is_mfunc(dev->dev)) - qp->flags |= MLX4_IB_QP_NETIF; - err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp); if (err) goto err; - if (qp_has_rq(init_attr)) { + if (mlx4_ib_qp_has_rq(init_attr)) { err = mlx4_db_alloc(dev->dev, &qp->db, 0); if (err) goto err; @@ -1097,7 +1097,7 @@ err_proxy: free_proxy_bufs(pd->device, qp); err_wrid: if (pd->uobject) { - if (qp_has_rq(init_attr)) + if (mlx4_ib_qp_has_rq(init_attr)) mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db); } else { kfree(qp->sq.wrid); @@ -1114,7 +1114,7 @@ err_buf: mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); err_db: - if (!pd->uobject && qp_has_rq(init_attr)) + if (!pd->uobject && mlx4_ib_qp_has_rq(init_attr)) mlx4_db_free(dev->dev, &qp->db); if (qp->max_inline_data) @@ -1145,7 +1145,7 @@ static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv { if (send_cq == recv_cq) { spin_lock_irq(&send_cq->lock); - (void) __acquire(&recv_cq->lock); + __acquire(&recv_cq->lock); } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { spin_lock_irq(&send_cq->lock); spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); @@ -1159,7 +1159,7 @@ static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *re __releases(&send_cq->lock) __releases(&recv_cq->lock) { if (send_cq == recv_cq) { - (void) __release(&recv_cq->lock); + __release(&recv_cq->lock); spin_unlock_irq(&send_cq->lock); } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { spin_unlock(&recv_cq->lock); @@ -1300,14 +1300,14 @@ static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr) return dev->dev->caps.qp1_proxy[attr->port_num - 1]; } -#ifdef __linux__ static int check_qpg_attr(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr) { if (attr->qpg_type == IB_QPG_NONE) return 0; - if (attr->qp_type != IB_QPT_UD) + if (attr->qp_type != IB_QPT_UD && + attr->qp_type != IB_QPT_RAW_PACKET) return -EINVAL; if (attr->qpg_type == IB_QPG_PARENT) { @@ -1346,7 +1346,6 @@ static int check_qpg_attr(struct mlx4_ib_dev *dev, } return 0; } -#endif #define RESERVED_FLAGS_MASK ((((unsigned int)IB_QP_CREATE_RESERVED_END - 1) | IB_QP_CREATE_RESERVED_END) \ & ~(IB_QP_CREATE_RESERVED_START - 1)) @@ -1364,6 +1363,15 @@ static enum mlx4_ib_qp_flags to_mlx4_ib_qp_flags(enum ib_qp_create_flags ib_qp_f if (ib_qp_flags & IB_QP_CREATE_NETIF_QP) mlx4_ib_qp_flags |= MLX4_IB_QP_NETIF; + if (ib_qp_flags & IB_QP_CREATE_CROSS_CHANNEL) + mlx4_ib_qp_flags |= MLX4_IB_QP_CAP_CROSS_CHANNEL; + + if (ib_qp_flags & IB_QP_CREATE_MANAGED_SEND) + mlx4_ib_qp_flags |= MLX4_IB_QP_CAP_MANAGED_SEND; + + if (ib_qp_flags & IB_QP_CREATE_MANAGED_RECV) + mlx4_ib_qp_flags |= MLX4_IB_QP_CAP_MANAGED_RECV; + /* reserved flags */ mlx4_ib_qp_flags |= (ib_qp_flags & RESERVED_FLAGS_MASK); @@ -1387,6 +1395,9 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, * and only for kernel UD QPs. */ if (mlx4_qp_flags & ~(MLX4_IB_QP_LSO | + MLX4_IB_QP_CAP_CROSS_CHANNEL | + MLX4_IB_QP_CAP_MANAGED_SEND | + MLX4_IB_QP_CAP_MANAGED_RECV | MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK | MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP | MLX4_IB_QP_NETIF)) @@ -1397,19 +1408,30 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, return ERR_PTR(-EINVAL); } - if (init_attr->create_flags && - (udata || - ((mlx4_qp_flags & ~MLX4_IB_SRIOV_SQP) && + if ((mlx4_qp_flags & + (MLX4_IB_QP_CAP_CROSS_CHANNEL | + MLX4_IB_QP_CAP_MANAGED_SEND | + MLX4_IB_QP_CAP_MANAGED_RECV)) && + !(to_mdev(device)->dev->caps.flags & + MLX4_DEV_CAP_FLAG_CROSS_CHANNEL)) { + pr_debug("%s Does not support cross-channel operations\n", + to_mdev(device)->ib_dev.name); + return ERR_PTR(-EINVAL); + } + + if ((init_attr->create_flags & + ~(IB_QP_CREATE_CROSS_CHANNEL | + IB_QP_CREATE_MANAGED_SEND | + IB_QP_CREATE_MANAGED_RECV)) && + (((mlx4_qp_flags & ~MLX4_IB_SRIOV_SQP) && init_attr->qp_type != IB_QPT_UD) || ((mlx4_qp_flags & MLX4_IB_SRIOV_SQP) && init_attr->qp_type > IB_QPT_GSI))) return ERR_PTR(-EINVAL); -#ifdef __linux__ err = check_qpg_attr(to_mdev(device), init_attr); if (err) return ERR_PTR(err); -#endif switch (init_attr->qp_type) { case IB_QPT_XRC_TGT: @@ -1559,32 +1581,42 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port) path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6); } +static int ib_rate_to_mlx4(struct mlx4_ib_dev *dev, u8 rate) +{ + if (rate == IB_RATE_PORT_CURRENT) { + return 0; + } else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) { + return -EINVAL; + } else { + while (rate != IB_RATE_2_5_GBPS && + !(1 << (rate + MLX4_STAT_RATE_OFFSET) & + dev->dev->caps.stat_rate_support)) + --rate; + } + + return rate + MLX4_STAT_RATE_OFFSET; +} + static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, - struct mlx4_ib_qp *qp, struct mlx4_qp_path *path, - u8 port, int is_primary) + u8 *smac, u16 vlan_id, struct mlx4_ib_qp *qp, + struct mlx4_qp_path *path, u8 port, int is_primary) { - struct net_device *ndev; - int err; int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_ETHERNET; - u8 mac[6]; - int is_mcast; u16 vlan_tag; int vidx; int smac_index; + int err; u64 u64_mac; - u8 *smac; struct mlx4_roce_smac_vlan_info *smac_info; path->grh_mylmc = ah->src_path_bits & 0x7f; path->rlid = cpu_to_be16(ah->dlid); - if (ah->static_rate) { - path->static_rate = ah->static_rate + MLX4_STAT_RATE_OFFSET; - while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && - !(1 << path->static_rate & dev->dev->caps.stat_rate_support)) - --path->static_rate; - } else - path->static_rate = 0; + + err = ib_rate_to_mlx4(dev, ah->static_rate); + if (err < 0) + return err; + path->static_rate = err; if (ah->ah_flags & IB_AH_GRH) { if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) { @@ -1614,7 +1646,7 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, else smac_info = &qp->alt; - vlan_tag = rdma_get_vlan_id(&dev->iboe.gid_table[port - 1][ah->grh.sgid_index]); + vlan_tag = vlan_id; if (vlan_tag < 0x1000) { if (smac_info->vid < 0x1000) { /* both valid vlan ids */ @@ -1653,28 +1685,13 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, } } - err = mlx4_ib_resolve_grh(dev, ah, mac, &is_mcast, port); - if (err) - return err; /* get smac_index for RoCE use. * If no smac was yet assigned, register one. * If one was already assigned, but the new mac differs, * unregister the old one and register the new one. */ - spin_lock(&dev->iboe.lock); - ndev = dev->iboe.netdevs[port - 1]; - if (ndev) { -#ifdef __linux__ - smac = ndev->dev_addr; /* fixme: cache this value */ -#else - smac = IF_LLADDR(ndev); /* fixme: cache this value */ -#endif - u64_mac = mlx4_mac_to_u64(smac); - } else - u64_mac = dev->dev->caps.def_mac[port]; - spin_unlock(&dev->iboe.lock); if (!smac_info->smac || smac_info->smac != u64_mac) { /* register candidate now, unreg if needed, after success */ @@ -1688,7 +1705,7 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, } else smac_index = smac_info->smac_index; - memcpy(path->dmac, mac, 6); + memcpy(path->dmac, ah->dmac, 6); path->ackto = MLX4_IB_LINK_TYPE_ETH; /* put MAC table smac index for IBoE */ path->grh_mylmc = (u8) (smac_index) | 0x80 ; @@ -1712,24 +1729,21 @@ static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) } } -static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, +static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, const u8 *smac, struct mlx4_qp_context *context) { struct net_device *ndev; u64 u64_mac; - u8 *smac; int smac_index; + ndev = dev->iboe.netdevs[qp->port - 1]; if (ndev) { -#ifdef __linux__ - smac = ndev->dev_addr; /* fixme: cache this value */ -#else - smac = IF_LLADDR(ndev); /* fixme: cache this value */ -#endif + smac = IF_LLADDR(ndev); u64_mac = mlx4_mac_to_u64(smac); - } else + } else { u64_mac = dev->dev->caps.def_mac[qp->port]; + } context->pri_path.sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((qp->port - 1) << 6); if (!qp->pri.smac) { @@ -1783,6 +1797,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } } + if (qp->max_inlr_data) + context->param3 |= cpu_to_be32(1 << 25); + if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) context->mtu_msgmax = (IB_MTU_4096 << 5) | 11; else if (ibqp->qp_type == IB_QPT_RAW_PACKET) @@ -1834,12 +1851,13 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { - if (dev->counters[qp->port - 1] != -1) { + if (dev->counters[qp->port - 1].counter_index != -1) { context->pri_path.counter_index = - dev->counters[qp->port - 1]; + dev->counters[qp->port - 1].counter_index; optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX; - } else + } else { context->pri_path.counter_index = 0xff; + } if (qp->flags & MLX4_IB_QP_NETIF && (qp->qpg_type == IB_QPG_NONE || qp->qpg_type == IB_QPG_PARENT)) { @@ -1855,8 +1873,11 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, optpar |= MLX4_QP_OPTPAR_PKEY_INDEX; } - if (attr_mask & IB_QP_AV) { - if (mlx4_set_path(dev, &attr->ah_attr, qp, &context->pri_path, + if ((attr_mask & IB_QP_AV) && (ibqp->qp_type != IB_QPT_RAW_PACKET)) { + if (mlx4_set_path(dev, &attr->ah_attr, (u8 *)attr->smac, + attr_mask & IB_QP_VID ? + attr->vlan_id : 0xffff , + qp, &context->pri_path, attr_mask & IB_QP_PORT ? attr->port_num : qp->port, 1)) goto out; @@ -1879,12 +1900,16 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, dev->dev->caps.pkey_table_len[attr->alt_port_num]) goto out; - if (mlx4_set_path(dev, &attr->alt_ah_attr, qp, &context->alt_path, + if (mlx4_set_path(dev, &attr->alt_ah_attr, (u8 *)attr->smac, + attr_mask & IB_QP_ALT_VID ? + attr->alt_vlan_id : 0xffff, + qp, &context->alt_path, attr->alt_port_num, 0)) goto out; context->alt_path.pkey_index = attr->alt_pkey_index; context->alt_path.ackto = attr->alt_timeout << 3; + context->alt_path.counter_index = dev->counters[attr->alt_port_num - 1].counter_index; optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH; } @@ -1943,6 +1968,15 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (attr_mask & IB_M_EXT_CLASS_3) context->params2 |= cpu_to_be32(MLX4_QP_BIT_COLL_SYNC_RQ); + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { + context->params2 |= (qp->flags & MLX4_IB_QP_CAP_CROSS_CHANNEL ? + cpu_to_be32(MLX4_QP_BIT_COLL_MASTER) : 0); + context->params2 |= (qp->flags & MLX4_IB_QP_CAP_MANAGED_SEND ? + cpu_to_be32(MLX4_QP_BIT_COLL_MASTER | MLX4_QP_BIT_COLL_SYNC_SQ) : 0); + context->params2 |= (qp->flags & MLX4_IB_QP_CAP_MANAGED_RECV ? + cpu_to_be32(MLX4_QP_BIT_COLL_MASTER | MLX4_QP_BIT_COLL_SYNC_RQ) : 0); + } + if (ibqp->srq) context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC); @@ -1997,6 +2031,12 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, context->pri_path.fl = 0x80; context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE; } + if (ibqp->qp_type == IB_QPT_RAW_PACKET && + (attr_mask & IB_QP_AV)) { + context->pri_path.sched_queue |= + ((attr->ah_attr.sl & 0xf) << 3); + context->pri_path.feup = 1 << 6; + } is_eth = rdma_port_get_link_layer(&dev->ib_dev, qp->port) == IB_LINK_LAYER_ETHERNET; if (is_eth) { @@ -2007,13 +2047,19 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_UD || qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI || qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI) { - err = handle_eth_ud_smac_index(dev, qp, context); + err = handle_eth_ud_smac_index(dev, qp, (const u8 *)attr->smac, context); if (err) return -EINVAL; } } } + if (ibqp->qp_type == IB_QPT_UD) + if (is_eth && (new_state == IB_QPS_RTR)) { + context->pri_path.ackto = MLX4_IB_LINK_TYPE_ETH; + optpar |= MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH; + } + if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify) sqd_event = 1; @@ -2072,7 +2118,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, for (i = 0; i < qp->sq.wqe_cnt; ++i) { ctrl = get_send_wqe(qp, i); - ctrl->owner_opcode = cpu_to_be32(1U << 31); + ctrl->owner_opcode = cpu_to_be32(1 << 31); if (qp->sq_max_wqes_per_wr == 1) ctrl->fence_size = 1 << (qp->sq.wqe_shift - 4); @@ -2080,6 +2126,11 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } } + if ((qp->port && rdma_port_get_link_layer(&dev->ib_dev, qp->port) == + IB_LINK_LAYER_ETHERNET) && (qp->ibqp.qp_type == IB_QPT_RAW_PACKET)) + context->pri_path.ackto = (context->pri_path.ackto & 0xf8) | + MLX4_IB_LINK_TYPE_ETH; + err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state), to_mlx4_state(new_state), context, optpar, sqd_event, &qp->mqp); @@ -2268,14 +2319,22 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct mlx4_ib_qp *qp = to_mqp(ibqp); enum ib_qp_state cur_state, new_state; int err = -EINVAL; + int ll; mutex_lock(&qp->mutex); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; + if (cur_state == new_state && cur_state == IB_QPS_RESET) { + ll = IB_LINK_LAYER_UNSPECIFIED; + } else { + int port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; + ll = rdma_port_get_link_layer(&dev->ib_dev, port); + } + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, - attr_mask & ~IB_M_QP_MOD_VEND_MASK)) { + attr_mask & ~IB_M_QP_MOD_VEND_MASK, ll)) { pr_debug("qpn 0x%x: invalid attribute mask specified " "for transition %d to %d. qp_type %d," " attr_mask 0x%x\n", @@ -2299,11 +2358,6 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } - if ((attr_mask & IB_QP_PORT) && (ibqp->qp_type == IB_QPT_RAW_PACKET) && - (rdma_port_get_link_layer(&dev->ib_dev, attr->port_num) != - IB_LINK_LAYER_ETHERNET)) - goto out; - if (attr_mask & IB_QP_PKEY_INDEX) { int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p]) { @@ -2421,11 +2475,11 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, spc = MLX4_INLINE_ALIGN - ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); if (header_size <= spc) { - inl->byte_count = cpu_to_be32(1U << 31 | header_size); + inl->byte_count = cpu_to_be32(1 << 31 | header_size); memcpy(inl + 1, sqp->header_buf, header_size); i = 1; } else { - inl->byte_count = cpu_to_be32(1U << 31 | spc); + inl->byte_count = cpu_to_be32(1 << 31 | spc); memcpy(inl + 1, sqp->header_buf, spc); inl = (void *) (inl + 1) + spc; @@ -2444,7 +2498,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, * of 16 mod 64. */ wmb(); - inl->byte_count = cpu_to_be32(1U << 31 | (header_size - spc)); + inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc)); i = 2; } @@ -2470,7 +2524,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, int is_eth; int is_vlan = 0; int is_grh; - u16 vlan = 0; + u16 uninitialized_var(vlan); int err = 0; send_size = 0; @@ -2497,8 +2551,10 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, return err; } - vlan = rdma_get_vlan_id(&sgid); - is_vlan = vlan < 0x1000; + if (is_eth && ah->av.eth.vlan != 0xffff) { + vlan = cpu_to_be16(ah->av.eth.vlan) & 0x0fff; + is_vlan = 1; + } } ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header); @@ -2565,7 +2621,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, } if (is_eth) { - u8 smac[6]; + u8 *smac; struct in6_addr in6; u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13; @@ -2577,8 +2633,13 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2); memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4); memcpy(&in6, sgid.raw, sizeof(in6)); - rdma_get_ll_mac(&in6, smac); + + if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev)) + smac = IF_LLADDR(to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1]); + else + smac = ah->av.eth.s_mac; /* use the src mac of the tunnel */ memcpy(sqp->ud_header.eth.smac_h, smac, 6); + if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6)) mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); if (!is_vlan) { @@ -2628,11 +2689,11 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, spc = MLX4_INLINE_ALIGN - ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); if (header_size <= spc) { - inl->byte_count = cpu_to_be32(1U << 31 | header_size); + inl->byte_count = cpu_to_be32(1 << 31 | header_size); memcpy(inl + 1, sqp->header_buf, header_size); i = 1; } else { - inl->byte_count = cpu_to_be32(1U << 31 | spc); + inl->byte_count = cpu_to_be32(1 << 31 | spc); memcpy(inl + 1, sqp->header_buf, spc); inl = (void *) (inl + 1) + spc; @@ -2651,7 +2712,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, * of 16 mod 64. */ wmb(); - inl->byte_count = cpu_to_be32(1U << 31 | (header_size - spc)); + inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc)); i = 2; } @@ -2679,9 +2740,12 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq static __be32 convert_access(int acc) { - return (acc & IB_ACCESS_REMOTE_ATOMIC ? cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC) : 0) | - (acc & IB_ACCESS_REMOTE_WRITE ? cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE) : 0) | - (acc & IB_ACCESS_REMOTE_READ ? cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ) : 0) | + return (acc & IB_ACCESS_REMOTE_ATOMIC ? + cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC) : 0) | + (acc & IB_ACCESS_REMOTE_WRITE ? + cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE) : 0) | + (acc & IB_ACCESS_REMOTE_READ ? + cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ) : 0) | (acc & IB_ACCESS_LOCAL_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE) : 0) | cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ); } @@ -2707,6 +2771,24 @@ static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr) fseg->reserved[1] = 0; } +static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg, struct ib_send_wr *wr) +{ + bseg->flags1 = + convert_access(wr->wr.bind_mw.bind_info.mw_access_flags) & + cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ | + MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE | + MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC); + bseg->flags2 = 0; + if (wr->wr.bind_mw.mw->type == IB_MW_TYPE_2) + bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2); + if (wr->wr.bind_mw.bind_info.mw_access_flags & IB_ZERO_BASED) + bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED); + bseg->new_rkey = cpu_to_be32(wr->wr.bind_mw.rkey); + bseg->lkey = cpu_to_be32(wr->wr.bind_mw.bind_info.mr->lkey); + bseg->addr = cpu_to_be64(wr->wr.bind_mw.bind_info.addr); + bseg->length = cpu_to_be64(wr->wr.bind_mw.bind_info.length); +} + static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey) { iseg->mem_key = cpu_to_be32(rkey); @@ -2792,23 +2874,25 @@ static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_ hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index); hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + memcpy(hdr.mac, ah->av.eth.mac, 6); + hdr.vlan = cpu_to_be16(ah->av.eth.vlan); spc = MLX4_INLINE_ALIGN - ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); if (sizeof (hdr) <= spc) { memcpy(inl + 1, &hdr, sizeof (hdr)); wmb(); - inl->byte_count = cpu_to_be32(1U << 31 | sizeof (hdr)); + inl->byte_count = cpu_to_be32(1 << 31 | sizeof (hdr)); i = 1; } else { memcpy(inl + 1, &hdr, spc); wmb(); - inl->byte_count = cpu_to_be32(1U << 31 | spc); + inl->byte_count = cpu_to_be32(1 << 31 | spc); inl = (void *) (inl + 1) + spc; memcpy(inl + 1, (void *) &hdr + spc, sizeof (hdr) - spc); wmb(); - inl->byte_count = cpu_to_be32(1U << 31 | (sizeof (hdr) - spc)); + inl->byte_count = cpu_to_be32(1 << 31 | (sizeof (hdr) - spc)); i = 2; } @@ -2833,7 +2917,7 @@ static void set_mlx_icrc_seg(void *dseg) */ wmb(); - iseg->byte_count = cpu_to_be32((1U << 31) | 4); + iseg->byte_count = cpu_to_be32((1 << 31) | 4); } static void set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) @@ -2901,7 +2985,7 @@ static void add_zero_len_inline(void *wqe) { struct mlx4_wqe_inline_seg *inl = wqe; memset(wqe, 0, 16); - inl->byte_count = cpu_to_be32(1U << 31); + inl->byte_count = cpu_to_be32(1 << 31); } static int lay_inline_data(struct mlx4_ib_qp *qp, struct ib_send_wr *wr, @@ -3102,6 +3186,12 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, size += sizeof (struct mlx4_wqe_fmr_seg) / 16; break; + case IB_WR_BIND_MW: + ctrl->srcrb_flags |= + cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); + set_bind_seg(wqe, wr); + wqe += sizeof(struct mlx4_wqe_bind_seg); + size += sizeof(struct mlx4_wqe_bind_seg) / 16; default: /* No extra segments required for sends */ break; @@ -3246,14 +3336,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, */ wmb(); - if (wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) { + if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) { *bad_wr = wr; err = -EINVAL; goto out; } ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] | - (ind & qp->sq.wqe_cnt ? cpu_to_be32(1U << 31) : 0) | blh; + (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh; stamp = ind + qp->sq_spare_wqes; ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift); @@ -3576,6 +3666,15 @@ done: qp->sq_signal_bits == cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; + if (qp->flags & MLX4_IB_QP_CAP_CROSS_CHANNEL) + qp_init_attr->create_flags |= IB_QP_CREATE_CROSS_CHANNEL; + + if (qp->flags & MLX4_IB_QP_CAP_MANAGED_SEND) + qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND; + + if (qp->flags & MLX4_IB_QP_CAP_MANAGED_RECV) + qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV; + qp_init_attr->qpg_type = ibqp->qpg_type; if (ibqp->qpg_type == IB_QPG_PARENT) qp_init_attr->cap.qpg_tss_mask_sz = qp->qpg_data->qpg_tss_mask_sz; @@ -3586,4 +3685,3 @@ out: mutex_unlock(&qp->mutex); return err; } - diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/sysfs.c b/sys/ofed/drivers/infiniband/hw/mlx4/sysfs.c index 6837b86..df4549f 100644 --- a/sys/ofed/drivers/infiniband/hw/mlx4/sysfs.c +++ b/sys/ofed/drivers/infiniband/hw/mlx4/sysfs.c @@ -56,8 +56,8 @@ static ssize_t show_admin_alias_guid(struct device *dev, record_num = mlx4_ib_iov_dentry->entry_num / 8 ; guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8 ; - return sprintf(buf, "%llx\n", (long long) - be64_to_cpu(*(__be64 *)&mdev->sriov.alias_guid. + return sprintf(buf, "%llx\n", + (long long)be64_to_cpu(*(__be64 *)&mdev->sriov.alias_guid. ports_guid[port->num - 1]. all_rec_per_port[record_num]. all_recs[8 * guid_index_in_rec])); diff --git a/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c b/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c index 088e440..3fed07c 100644 --- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c @@ -672,8 +672,8 @@ static int mthca_destroy_qp(struct ib_qp *qp) return 0; } -static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, - int comp_vector, +static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, + struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata) { @@ -681,6 +681,7 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, struct mthca_cq *cq; int nent; int err; + int entries = attr->cqe; if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes) return ERR_PTR(-EINVAL); @@ -1010,12 +1011,12 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata, int mr_id) { struct mthca_dev *dev = to_mdev(pd->device); - struct ib_umem_chunk *chunk; + struct scatterlist *sg; struct mthca_mr *mr; struct mthca_reg_mr ucmd; u64 *pages; int shift, n, len; - int i, j, k; + int i, k, entry; int err = 0; int write_mtt_size; @@ -1044,10 +1045,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, shift = ffs(mr->umem->page_size) - 1; - n = 0; - list_for_each_entry(chunk, &mr->umem->chunk_list, list) - n += chunk->nents; - + n = mr->umem->nmap;; mr->mtt = mthca_alloc_mtt(dev, n); if (IS_ERR(mr->mtt)) { err = PTR_ERR(mr->mtt); @@ -1064,25 +1062,27 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages)); - list_for_each_entry(chunk, &mr->umem->chunk_list, list) - for (j = 0; j < chunk->nmap; ++j) { - len = sg_dma_len(&chunk->page_list[j]) >> shift; - for (k = 0; k < len; ++k) { - pages[i++] = sg_dma_address(&chunk->page_list[j]) + - mr->umem->page_size * k; - /* - * Be friendly to write_mtt and pass it chunks - * of appropriate size. - */ - if (i == write_mtt_size) { - err = mthca_write_mtt(dev, mr->mtt, n, pages, i); - if (err) - goto mtt_done; - n += i; - i = 0; - } + for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) { + len = sg_dma_len(sg) >> shift; + for (k = 0; k < len; ++k) { + pages[i++] = sg_dma_address(sg) + + mr->umem->page_size * k; + /* + * Be friendly to write_mtt and pass it chunks + * of appropriate size. + */ + if (i == write_mtt_size) { + err = mthca_write_mtt(dev, mr->mtt, n, pages, i); + if (err) + goto mtt_done; + n += i; + i = 0; } } + } + + + if (i) err = mthca_write_mtt(dev, mr->mtt, n, pages, i); diff --git a/sys/ofed/drivers/infiniband/hw/mthca/mthca_qp.c b/sys/ofed/drivers/infiniband/hw/mthca/mthca_qp.c index 2264bcd..b4c70b4 100644 --- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_qp.c @@ -870,7 +870,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; - if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) { + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, + attr_mask, IB_LINK_LAYER_UNSPECIFIED)) { mthca_dbg(dev, "Bad QP transition (transport %d) " "%d->%d with attr 0x%08x\n", qp->transport, cur_state, new_state, diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/Makefile b/sys/ofed/drivers/infiniband/ulp/ipoib/Makefile deleted file mode 100644 index 3090100..0000000 --- a/sys/ofed/drivers/infiniband/ulp/ipoib/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -obj-$(CONFIG_INFINIBAND_IPOIB) += ib_ipoib.o - -ib_ipoib-y := ipoib_main.o \ - ipoib_ib.o \ - ipoib_multicast.o \ - ipoib_verbs.o \ - ipoib_vlan.o \ - ipoib_ethtool.o -ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_CM) += ipoib_cm.o -ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_DEBUG) += ipoib_fs.o - diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h index 7d5e175..eb269a4 100644 --- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h @@ -80,6 +80,7 @@ #include <linux/workqueue.h> #include <linux/kref.h> #include <linux/mutex.h> +#include <linux/rbtree.h> #include <asm/atomic.h> @@ -313,6 +314,7 @@ struct ipoib_ethtool_st { */ struct ipoib_dev_priv { spinlock_t lock; + spinlock_t drain_lock; struct ifnet *dev; diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 4fb39b4..814938c 100644 --- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -383,6 +383,7 @@ ipoib_poll(struct ipoib_dev_priv *priv) int n, i; poll_more: + spin_lock(&priv->drain_lock); for (;;) { n = ib_poll_cq(priv->recv_cq, IPOIB_NUM_WC, priv->ibwc); @@ -401,6 +402,7 @@ poll_more: if (n != IPOIB_NUM_WC) break; } + spin_unlock(&priv->drain_lock); if (ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)) @@ -707,6 +709,7 @@ void ipoib_drain_cq(struct ipoib_dev_priv *priv) { int i, n; + spin_lock(&priv->drain_lock); do { n = ib_poll_cq(priv->recv_cq, IPOIB_NUM_WC, priv->ibwc); for (i = 0; i < n; ++i) { @@ -727,6 +730,7 @@ void ipoib_drain_cq(struct ipoib_dev_priv *priv) ipoib_ib_handle_rx_wc(priv, priv->ibwc + i); } } while (n == IPOIB_NUM_WC); + spin_unlock(&priv->drain_lock); spin_lock(&priv->lock); while (ipoib_poll_tx(priv)) diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c index 695621f..35e16417 100644 --- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -832,6 +832,7 @@ ipoib_priv_alloc(void) priv = malloc(sizeof(struct ipoib_dev_priv), M_TEMP, M_ZERO|M_WAITOK); spin_lock_init(&priv->lock); + spin_lock_init(&priv->drain_lock); mutex_init(&priv->vlan_mutex); INIT_LIST_HEAD(&priv->path_list); INIT_LIST_HEAD(&priv->child_intfs); diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 9c7bcec..4c04da1 100644 --- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -466,12 +466,20 @@ void ipoib_mcast_join_task(struct work_struct *work) struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, mcast_task.work); struct ifnet *dev = priv->dev; + struct ib_port_attr attr; ipoib_dbg_mcast(priv, "Running join task. flags 0x%lX\n", priv->flags); if (!test_bit(IPOIB_MCAST_RUN, &priv->flags)) return; + if (ib_query_port(priv->ca, priv->port, &attr) || + attr.state != IB_PORT_ACTIVE) { + ipoib_dbg(priv, "%s: port state is not ACTIVE (state = %d) suspend task.\n", + __func__, attr.state); + return; + } + if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid)) ipoib_warn(priv, "ib_query_gid() failed\n"); else diff --git a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c index 2e91d85..ace705c 100644 --- a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c +++ b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c @@ -129,7 +129,7 @@ sdp_pcbbind(struct sdp_sock *ssk, struct sockaddr *nam, struct ucred *cred) /* rdma_bind_addr handles bind races. */ SDP_WUNLOCK(ssk); if (ssk->id == NULL) - ssk->id = rdma_create_id(sdp_cma_handler, ssk, RDMA_PS_SDP); + ssk->id = rdma_create_id(sdp_cma_handler, ssk, RDMA_PS_SDP, IB_QPT_RC); if (ssk->id == NULL) { SDP_WLOCK(ssk); return (ENOMEM); @@ -1702,11 +1702,15 @@ int sdp_mod_usec = 0; void sdp_set_default_moderation(struct sdp_sock *ssk) { + struct ib_cq_attr attr; if (sdp_mod_count <= 0 || sdp_mod_usec <= 0) return; - ib_modify_cq(ssk->rx_ring.cq, sdp_mod_count, sdp_mod_usec); -} + memset(&attr, 0, sizeof(attr)); + attr.moderation.cq_count = sdp_mod_count; + attr.moderation.cq_period = sdp_mod_usec; + ib_modify_cq(ssk->rx_ring.cq, &attr, IB_CQ_MODERATION); +} static void sdp_dev_add(struct ib_device *device) diff --git a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c index 4e581ab..0b78212 100644 --- a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c +++ b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c @@ -736,7 +736,7 @@ sdp_rx_ring_create(struct sdp_sock *ssk, struct ib_device *device) } rx_cq = ib_create_cq(device, sdp_rx_irq, sdp_rx_cq_event_handler, - ssk->socket, SDP_RX_SIZE, IB_CQ_VECTOR_LEAST_ATTACHED); + ssk->socket, SDP_RX_SIZE, 0); if (IS_ERR(rx_cq)) { rc = PTR_ERR(rx_cq); diff --git a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_tx.c b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_tx.c index f7d84be..6f54331 100644 --- a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_tx.c +++ b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_tx.c @@ -438,7 +438,7 @@ sdp_tx_ring_create(struct sdp_sock *ssk, struct ib_device *device) } tx_cq = ib_create_cq(device, sdp_tx_irq, sdp_tx_cq_event_handler, - ssk, SDP_TX_SIZE, IB_CQ_VECTOR_LEAST_ATTACHED); + ssk, SDP_TX_SIZE, 0); if (IS_ERR(tx_cq)) { rc = PTR_ERR(tx_cq); diff --git a/sys/ofed/drivers/net/mlx4/Makefile b/sys/ofed/drivers/net/mlx4/Makefile index dc0e2a3..05338e8 100644 --- a/sys/ofed/drivers/net/mlx4/Makefile +++ b/sys/ofed/drivers/net/mlx4/Makefile @@ -30,4 +30,4 @@ opt_inet6.h: .include <bsd.kmod.mk> -CFLAGS+= -Wno-cast-qual -Wno-pointer-arith ${GCC_MS_EXTENSIONS} +CFLAGS+= -Wno-cast-qual -Wno-pointer-arith diff --git a/sys/ofed/drivers/net/mlx4/en_rx.c b/sys/ofed/drivers/net/mlx4/en_rx.c index fa26326..39688a5 100644 --- a/sys/ofed/drivers/net/mlx4/en_rx.c +++ b/sys/ofed/drivers/net/mlx4/en_rx.c @@ -492,7 +492,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, if (nr) mb->m_next = mb_list[nr]; mb = mb_list[nr]; - mb->m_len = frag_info[nr].frag_size; + mb->m_len = frag_info->frag_size; dma = be64_to_cpu(rx_desc->data[nr].addr); /* Allocate a replacement page */ diff --git a/sys/ofed/include/linux/device.h b/sys/ofed/include/linux/device.h index f7bb0fb..87cf0e8 100644 --- a/sys/ofed/include/linux/device.h +++ b/sys/ofed/include/linux/device.h @@ -431,17 +431,6 @@ static inline char *kvasprintf(gfp_t gfp, const char *fmt, va_list ap) return p; } -static inline char *kasprintf(gfp_t gfp, const char *fmt, ...) -{ - va_list ap; - char *p; - - va_start(ap, fmt); - p = kvasprintf(gfp, fmt, ap); - va_end(ap); - - return p; -} - +char *kasprintf(gfp_t, const char *, ...); #endif /* _LINUX_DEVICE_H_ */ diff --git a/sys/ofed/include/linux/linux_compat.c b/sys/ofed/include/linux/linux_compat.c index 081177d..5dc881d 100644 --- a/sys/ofed/include/linux/linux_compat.c +++ b/sys/ofed/include/linux/linux_compat.c @@ -712,6 +712,20 @@ vunmap(void *addr) kfree(vmmap); } + +char * +kasprintf(gfp_t gfp, const char *fmt, ...) +{ + va_list ap; + char *p; + + va_start(ap, fmt); + p = kvasprintf(gfp, fmt, ap); + va_end(ap); + + return p; +} + static void linux_compat_init(void) { diff --git a/sys/ofed/include/linux/printk.h b/sys/ofed/include/linux/printk.h new file mode 100644 index 0000000..3c97ae7 --- /dev/null +++ b/sys/ofed/include/linux/printk.h @@ -0,0 +1,40 @@ +/*- + * Copyright (c) 2010 Isilon Systems, Inc. + * Copyright (c) 2010 iX Systems, Inc. + * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _FBSD_PRINTK_H_ +#define _FBSD_PRINTK_H_ + +/* GID printing macros */ +#define GID_PRINT_FMT "%.4x:%.4x:%.4x:%.4x:%.4x:%.4x:%.4x:%.4x" +#define GID_PRINT_ARGS(gid_raw) htons(((u16 *)gid_raw)[0]), htons(((u16 *)gid_raw)[1]),\ + htons(((u16 *)gid_raw)[2]), htons(((u16 *)gid_raw)[3]),\ + htons(((u16 *)gid_raw)[4]), htons(((u16 *)gid_raw)[5]),\ + htons(((u16 *)gid_raw)[6]), htons(((u16 *)gid_raw)[7]) + +#endif /* _FBSD_PRINTK_H */ diff --git a/sys/ofed/include/rdma/ib_addr.h b/sys/ofed/include/rdma/ib_addr.h index b711510..b564415 100644 --- a/sys/ofed/include/rdma/ib_addr.h +++ b/sys/ofed/include/rdma/ib_addr.h @@ -31,17 +31,20 @@ * SOFTWARE. */ -#if !defined(IB_ADDR_H) +#ifndef IB_ADDR_H #define IB_ADDR_H #include <linux/in.h> #include <linux/in6.h> #include <linux/if_arp.h> #include <linux/netdevice.h> +#include <linux/inetdevice.h> #include <linux/socket.h> +#include <linux/if_vlan.h> #include <rdma/ib_verbs.h> #include <rdma/ib_pack.h> -#include <linux/if_vlan.h> +#include <net/if_inet6.h> +#include <net/ipv6.h> struct rdma_addr_client { atomic_t refcount; @@ -72,7 +75,8 @@ struct rdma_dev_addr { * rdma_translate_ip - Translate a local IP address to an RDMA hardware * address. */ -int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr); +int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, + u16 *vlan_id); /** * rdma_resolve_ip - Resolve source and destination IP addresses to @@ -101,6 +105,9 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr); int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, const unsigned char *dst_dev_addr); +int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id); +int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *smac, + u16 *vlan_id); static inline int ip_addr_size(struct sockaddr *addr) { @@ -130,50 +137,56 @@ static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr) return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0; } -static inline void iboe_mac_vlan_to_ll(union ib_gid *gid, u8 *mac, u16 vid) -{ - memset(gid->raw, 0, 16); - *((u32 *)gid->raw) = cpu_to_be32(0xfe800000); - if (vid < 0x1000) { - gid->raw[12] = vid & 0xff; - gid->raw[11] = vid >> 8; - } else { - gid->raw[12] = 0xfe; - gid->raw[11] = 0xff; - } - - memcpy(gid->raw + 13, mac + 3, 3); - memcpy(gid->raw + 8, mac, 3); - gid->raw[8] ^= 2; -} - static inline u16 rdma_vlan_dev_vlan_id(const struct net_device *dev) { -#ifdef __linux__ - return dev->priv_flags & IFF_802_1Q_VLAN ? - vlan_dev_vlan_id(dev) : 0xffff; -#else uint16_t tag; if (VLAN_TAG(__DECONST(struct ifnet *, dev), &tag) != 0) return 0xffff; return tag; -#endif } -static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr, - union ib_gid *gid) +static inline int rdma_ip2gid(struct sockaddr *addr, union ib_gid *gid) { - struct net_device *dev; - u16 vid = 0xffff; + switch (addr->sa_family) { + case AF_INET: + ipv6_addr_set_v4mapped(((struct sockaddr_in *)addr)->sin_addr.s_addr, + (struct in6_addr *)gid); + break; + case AF_INET6: + memcpy(gid->raw, &((struct sockaddr_in6 *)addr)->sin6_addr, + 16); + break; + default: + return -EINVAL; + } + return 0; +} - dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); - if (dev) { - vid = rdma_vlan_dev_vlan_id(dev); - dev_put(dev); +/* Important - sockaddr should be a union of sockaddr_in and sockaddr_in6 */ +static inline int rdma_gid2ip(struct sockaddr *out, union ib_gid *gid) +{ + if (ipv6_addr_v4mapped((struct in6_addr *)gid)) { + struct sockaddr_in *out_in = (struct sockaddr_in *)out; + memset(out_in, 0, sizeof(*out_in)); + out_in->sin_len = sizeof(*out_in); + out_in->sin_family = AF_INET; + memcpy(&out_in->sin_addr.s_addr, gid->raw + 12, 4); + } else { + struct sockaddr_in6 *out_in = (struct sockaddr_in6 *)out; + memset(out_in, 0, sizeof(*out_in)); + out_in->sin6_family = AF_INET6; + memcpy(&out_in->sin6_addr.s6_addr, gid->raw, 16); } + return 0; +} - iboe_mac_vlan_to_ll(gid, dev_addr->src_dev_addr, vid); +/* This func is called only in loopback ip address (127.0.0.1) + * case in which sgid is not relevant + */ +static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr, + union ib_gid *gid) +{ } static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) @@ -223,27 +236,6 @@ static inline enum ib_mtu iboe_get_mtu(int mtu) return 0; } -#ifdef __linux__ -static inline int iboe_get_rate(struct net_device *dev) -{ - struct ethtool_cmd cmd; - - if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings || - dev->ethtool_ops->get_settings(dev, &cmd)) - return IB_RATE_PORT_CURRENT; - - if (cmd.speed >= 40000) - return IB_RATE_40_GBPS; - else if (cmd.speed >= 30000) - return IB_RATE_30_GBPS; - else if (cmd.speed >= 20000) - return IB_RATE_20_GBPS; - else if (cmd.speed >= 10000) - return IB_RATE_10_GBPS; - else - return IB_RATE_PORT_CURRENT; -} -#else static inline int iboe_get_rate(struct net_device *dev) { if (dev->if_baudrate >= IF_Gbps(40)) @@ -257,11 +249,10 @@ static inline int iboe_get_rate(struct net_device *dev) else return IB_RATE_PORT_CURRENT; } -#endif static inline int rdma_link_local_addr(struct in6_addr *addr) { - if (addr->s6_addr32[0] == cpu_to_be32(0xfe800000) && + if (addr->s6_addr32[0] == htonl(0xfe800000) && addr->s6_addr32[1] == 0) return 1; @@ -280,6 +271,20 @@ static inline int rdma_is_multicast_addr(struct in6_addr *addr) return addr->s6_addr[0] == 0xff; } +static inline void resolve_mcast_mac(struct in6_addr *addr, u8 *mac) +{ + if (addr->s6_addr[0] != 0xff) + return; + +#ifdef DUAL_MODE_MCAST_MAC + if (addr->s6_addr[1] == 0x0e) /* IPv4 */ + ip_eth_mc_map(addr->s6_addr32[3], mac); + else +#endif + ipv6_eth_mc_map(addr, mac); +} + + static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac) { int i; @@ -300,12 +305,7 @@ static inline u16 rdma_get_vlan_id(union ib_gid *dgid) static inline struct net_device *rdma_vlan_dev_real_dev(const struct net_device *dev) { -#ifdef __linux__ - return dev->priv_flags & IFF_802_1Q_VLAN ? - vlan_dev_real_dev(dev) : 0; -#else return VLAN_TRUNKDEV(__DECONST(struct ifnet *, dev)); -#endif } #endif /* IB_ADDR_H */ diff --git a/sys/ofed/include/rdma/ib_cache.h b/sys/ofed/include/rdma/ib_cache.h index 00a2b8e..ad9a3c2 100644 --- a/sys/ofed/include/rdma/ib_cache.h +++ b/sys/ofed/include/rdma/ib_cache.h @@ -101,6 +101,22 @@ int ib_find_cached_pkey(struct ib_device *device, u16 *index); /** + * ib_find_exact_cached_pkey - Returns the PKey table index where a specified + * PKey value occurs. Comparison uses the FULL 16 bits (incl membership bit) + * @device: The device to query. + * @port_num: The port number of the device to search for the PKey. + * @pkey: The PKey value to search for. + * @index: The index into the cached PKey table where the PKey was found. + * + * ib_find_exact_cached_pkey() searches the specified PKey table in + * the local software cache. + */ +int ib_find_exact_cached_pkey(struct ib_device *device, + u8 port_num, + u16 pkey, + u16 *index); + +/** * ib_get_cached_lmc - Returns a cached lmc table entry * @device: The device to query. * @port_num: The port number of the device to query. diff --git a/sys/ofed/include/rdma/ib_cm.h b/sys/ofed/include/rdma/ib_cm.h index 40c24b6..a7ffaf9 100644 --- a/sys/ofed/include/rdma/ib_cm.h +++ b/sys/ofed/include/rdma/ib_cm.h @@ -497,7 +497,7 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id, * message. * @cm_id: Connection identifier associated with the connection message. * @service_timeout: The lower 5-bits specify the maximum time required for - * the sender to reply to to the connection message. The upper 3-bits + * the sender to reply to the connection message. The upper 3-bits * specify additional control flags. * @private_data: Optional user-defined private data sent with the * message receipt acknowledgement. @@ -601,4 +601,6 @@ struct ib_cm_sidr_rep_param { int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, struct ib_cm_sidr_rep_param *param); +int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac); + #endif /* IB_CM_H */ diff --git a/sys/ofed/include/rdma/ib_mad.h b/sys/ofed/include/rdma/ib_mad.h index 32f8114..3d81b90 100644 --- a/sys/ofed/include/rdma/ib_mad.h +++ b/sys/ofed/include/rdma/ib_mad.h @@ -77,6 +77,15 @@ #define IB_MGMT_MAX_METHODS 128 +/* MAD Status field bit masks */ +#define IB_MGMT_MAD_STATUS_SUCCESS 0x0000 +#define IB_MGMT_MAD_STATUS_BUSY 0x0001 +#define IB_MGMT_MAD_STATUS_REDIRECT_REQD 0x0002 +#define IB_MGMT_MAD_STATUS_BAD_VERSION 0x0004 +#define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD 0x0008 +#define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB 0x000c +#define IB_MGMT_MAD_STATUS_INVALID_ATTRIB_VALUE 0x001c + /* RMPP information */ #define IB_MGMT_RMPP_VERSION 1 diff --git a/sys/ofed/include/rdma/ib_pack.h b/sys/ofed/include/rdma/ib_pack.h index af615a4..1678be7 100644 --- a/sys/ofed/include/rdma/ib_pack.h +++ b/sys/ofed/include/rdma/ib_pack.h @@ -263,7 +263,5 @@ int ib_ud_header_pack(struct ib_ud_header *header, int ib_ud_header_unpack(void *buf, struct ib_ud_header *header); -int ib_lrh_header_pack(struct ib_unpacked_lrh *lrh, void *buf); -int ib_lrh_header_unpack(void *buf, struct ib_unpacked_lrh *lrh); #endif /* IB_PACK_H */ diff --git a/sys/ofed/include/rdma/ib_peer_mem.h b/sys/ofed/include/rdma/ib_peer_mem.h new file mode 100644 index 0000000..b2a8a4a --- /dev/null +++ b/sys/ofed/include/rdma/ib_peer_mem.h @@ -0,0 +1,59 @@ +#if !defined(IB_PEER_MEM_H) +#define IB_PEER_MEM_H + +#include <rdma/peer_mem.h> + + +struct invalidation_ctx; +struct ib_ucontext; + +struct ib_peer_memory_statistics { + unsigned long num_alloc_mrs; + unsigned long num_dealloc_mrs; + unsigned long num_reg_pages; + unsigned long num_dereg_pages; + unsigned long num_free_callbacks; +}; + +struct ib_peer_memory_client { + const struct peer_memory_client *peer_mem; + + struct list_head core_peer_list; + struct list_head core_ticket_list; + unsigned long last_ticket; +#ifdef __FreeBSD__ + int holdcount; + int needwakeup; + struct cv peer_cv; +#else + struct srcu_struct peer_srcu; +#endif + struct mutex lock; + struct kobject *kobj; + struct attribute_group peer_mem_attr_group; + struct ib_peer_memory_statistics stats; +}; + +struct core_ticket { + unsigned long key; + void *context; + struct list_head ticket_list; +}; + +struct ib_peer_memory_client *ib_get_peer_client(struct ib_ucontext *context, unsigned long addr, + size_t size, void **peer_client_context, + int *srcu_key); + +void ib_put_peer_client(struct ib_peer_memory_client *ib_peer_client, + void *peer_client_context, + int srcu_key); + +unsigned long ib_peer_insert_context(struct ib_peer_memory_client *ib_peer_client, + void *context); +int ib_peer_remove_context(struct ib_peer_memory_client *ib_peer_client, + unsigned long key); +struct core_ticket *ib_peer_search_context(struct ib_peer_memory_client *ib_peer_client, + unsigned long key); +#endif + + diff --git a/sys/ofed/include/rdma/ib_sa.h b/sys/ofed/include/rdma/ib_sa.h index 61588d9..65f1a00 100644 --- a/sys/ofed/include/rdma/ib_sa.h +++ b/sys/ofed/include/rdma/ib_sa.h @@ -154,6 +154,9 @@ struct ib_sa_path_rec { u8 packet_life_time_selector; u8 packet_life_time; u8 preference; + u8 smac[ETH_ALEN]; + u8 dmac[6]; + __be16 vlan_id; }; #define IB_SA_MCMEMBER_REC_MGID IB_SA_COMP_MASK( 0) @@ -251,127 +254,6 @@ struct ib_sa_service_rec { u64 data64[2]; }; -enum { - IB_SA_EVENT_TYPE_FATAL = 0x0, - IB_SA_EVENT_TYPE_URGENT = 0x1, - IB_SA_EVENT_TYPE_SECURITY = 0x2, - IB_SA_EVENT_TYPE_SM = 0x3, - IB_SA_EVENT_TYPE_INFO = 0x4, - IB_SA_EVENT_TYPE_EMPTY = 0x7F, - IB_SA_EVENT_TYPE_ALL = 0xFFFF -}; - -enum { - IB_SA_EVENT_PRODUCER_TYPE_CA = 0x1, - IB_SA_EVENT_PRODUCER_TYPE_SWITCH = 0x2, - IB_SA_EVENT_PRODUCER_TYPE_ROUTER = 0x3, - IB_SA_EVENT_PRODUCER_TYPE_CLASS_MANAGER = 0x4, - IB_SA_EVENT_PRODUCER_TYPE_ALL = 0xFFFFFF -}; - -enum { - IB_SA_SM_TRAP_GID_IN_SERVICE = 64, - IB_SA_SM_TRAP_GID_OUT_OF_SERVICE = 65, - IB_SA_SM_TRAP_CREATE_MC_GROUP = 66, - IB_SA_SM_TRAP_DELETE_MC_GROUP = 67, - IB_SA_SM_TRAP_PORT_CHANGE_STATE = 128, - IB_SA_SM_TRAP_LINK_INTEGRITY = 129, - IB_SA_SM_TRAP_EXCESSIVE_BUFFER_OVERRUN = 130, - IB_SA_SM_TRAP_FLOW_CONTROL_UPDATE_EXPIRED = 131, - IB_SA_SM_TRAP_BAD_M_KEY = 256, - IB_SA_SM_TRAP_BAD_P_KEY = 257, - IB_SA_SM_TRAP_BAD_Q_KEY = 258, - IB_SA_SM_TRAP_SWITCH_BAD_P_KEY = 259, - IB_SA_SM_TRAP_ALL = 0xFFFF -}; - -struct ib_sa_inform { - union ib_gid gid; - __be16 lid_range_begin; - __be16 lid_range_end; - u8 is_generic; - u8 subscribe; - __be16 type; - union { - struct { - __be16 trap_num; - __be32 qpn; - u8 resp_time; - __be32 producer_type; - } generic; - struct { - __be16 device_id; - __be32 qpn; - u8 resp_time; - __be32 vendor_id; - } vendor; - } trap; -}; - -struct ib_sa_notice { - u8 is_generic; - u8 type; - union { - struct { - __be32 producer_type; - __be16 trap_num; - } generic; - struct { - __be32 vendor_id; - __be16 device_id; - } vendor; - } trap; - __be16 issuer_lid; - __be16 notice_count; - u8 notice_toggle; - /* - * Align data 16 bits off 64 bit field to match InformInfo definition. - * Data contained within this field will then align properly. - * See IB spec 1.2, sections 13.4.8.2 and 14.2.5.1. - */ - u8 reserved[5]; - u8 data_details[54]; - union ib_gid issuer_gid; -}; - -/* - * SM notice data details for: - * - * IB_SA_SM_TRAP_GID_IN_SERVICE = 64 - * IB_SA_SM_TRAP_GID_OUT_OF_SERVICE = 65 - * IB_SA_SM_TRAP_CREATE_MC_GROUP = 66 - * IB_SA_SM_TRAP_DELETE_MC_GROUP = 67 - */ -struct ib_sa_notice_data_gid { - u8 reserved[6]; - u8 gid[16]; - u8 padding[32]; -}; - -/* - * SM notice data details for: - * - * IB_SA_SM_TRAP_PORT_CHANGE_STATE = 128 - */ -struct ib_sa_notice_data_port_change { - __be16 lid; - u8 padding[52]; -}; - -/* - * SM notice data details for: - * - * IB_SA_SM_TRAP_LINK_INTEGRITY = 129 - * IB_SA_SM_TRAP_EXCESSIVE_BUFFER_OVERRUN = 130 - * IB_SA_SM_TRAP_FLOW_CONTROL_UPDATE_EXPIRED = 131 - */ -struct ib_sa_notice_data_port_error { - u8 reserved[2]; - __be16 lid; - u8 port_num; - u8 padding[49]; -}; - #define IB_SA_GUIDINFO_REC_LID IB_SA_COMP_MASK(0) #define IB_SA_GUIDINFO_REC_BLOCK_NUM IB_SA_COMP_MASK(1) #define IB_SA_GUIDINFO_REC_RES1 IB_SA_COMP_MASK(2) @@ -528,56 +410,7 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, */ void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec); -struct ib_inform_info { - void *context; - int (*callback)(int status, - struct ib_inform_info *info, - struct ib_sa_notice *notice); - u16 trap_number; -}; - -/** - * ib_sa_register_inform_info - Registers to receive notice events. - * @device: Device associated with the registration. - * @port_num: Port on the specified device to associate with the registration. - * @trap_number: InformInfo trap number to register for. - * @gfp_mask: GFP mask for memory allocations. - * @callback: User callback invoked once the registration completes and to - * report noticed events. - * @context: User specified context stored with the ib_inform_reg structure. - * - * This call initiates a registration request with the SA for the specified - * trap number. If the operation is started successfully, it returns - * an ib_inform_info structure that is used to track the registration operation. - * Users must free this structure by calling ib_unregister_inform_info, - * even if the operation later fails. (The callback status is non-zero.) - * - * If the registration fails; status will be non-zero. If the registration - * succeeds, the callback status will be zero, but the notice parameter will - * be NULL. If the notice parameter is not NULL, a trap or notice is being - * reported to the user. - * - * A status of -ENETRESET indicates that an error occurred which requires - * reregisteration. - */ -struct ib_inform_info * -ib_sa_register_inform_info(struct ib_sa_client *client, - struct ib_device *device, u8 port_num, - u16 trap_number, gfp_t gfp_mask, - int (*callback)(int status, - struct ib_inform_info *info, - struct ib_sa_notice *notice), - void *context); - -/** - * ib_sa_unregister_inform_info - Releases an InformInfo registration. - * @info: InformInfo registration tracking structure. - * - * This call blocks until the registration request is destroyed. It may - * not be called from within the registration callback. - */ -void ib_sa_unregister_inform_info(struct ib_inform_info *info); - +/* Support GuidInfoRecord */ int ib_sa_guid_info_rec_query(struct ib_sa_client *client, struct ib_device *device, u8 port_num, struct ib_sa_guidinfo_rec *rec, @@ -588,6 +421,4 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client, void *context), void *context, struct ib_sa_query **sa_query); - - #endif /* IB_SA_H */ diff --git a/sys/ofed/include/rdma/ib_umem.h b/sys/ofed/include/rdma/ib_umem.h index a825111..82f6cfa 100644 --- a/sys/ofed/include/rdma/ib_umem.h +++ b/sys/ofed/include/rdma/ib_umem.h @@ -37,9 +37,26 @@ #include <linux/scatterlist.h> #include <linux/workqueue.h> #include <linux/dma-attrs.h> +#include <linux/completion.h> +#include <rdma/ib_peer_mem.h> struct ib_ucontext; -struct vm_area_struct; +struct ib_umem; + +typedef void (*umem_invalidate_func_t)(void *invalidation_cookie, + struct ib_umem *umem, + unsigned long addr, size_t size); + +struct invalidation_ctx { + struct ib_umem *umem; + umem_invalidate_func_t func; + void *cookie; + unsigned long context_ticket; + int peer_callback; + int inflight_invalidation; + int peer_invalidated; + struct completion comp; +}; struct ib_umem { struct ib_ucontext *context; @@ -48,55 +65,29 @@ struct ib_umem { int page_size; int writable; int hugetlb; - struct list_head chunk_list; -#ifdef __linux__ struct work_struct work; - struct mm_struct *mm; -#else - unsigned long start; -#endif unsigned long diff; -}; - -struct ib_cmem { - - struct ib_ucontext *context; - size_t length; - /* Link list of contiguous blocks being part of that cmem */ - struct list_head ib_cmem_block; - - /* Order of cmem block, 2^ block_order will equal number - of physical pages per block - */ - unsigned long block_order; - /* Refernce counter for that memory area - - When value became 0 pages will be returned to the kernel. - */ - struct kref refcount; -}; - - -struct ib_umem_chunk { - struct list_head list; - int nents; + unsigned long start; + struct sg_table sg_head; int nmap; - struct dma_attrs attrs; - struct scatterlist page_list[0]; + int npages; + /* peer memory that manages this umem*/ + struct ib_peer_memory_client *ib_peer_mem; + struct invalidation_ctx *invalidation_ctx; + int peer_mem_srcu_key; + /* peer memory private context */ + void *peer_mem_client_context; }; struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, size_t size, int access, int dmasync); +struct ib_umem *ib_umem_get_ex(struct ib_ucontext *context, unsigned long addr, + size_t size, int access, int dmasync, + int invalidation_supported); +void ib_umem_activate_invalidation_notifier(struct ib_umem *umem, + umem_invalidate_func_t func, + void *cookie); void ib_umem_release(struct ib_umem *umem); int ib_umem_page_count(struct ib_umem *umem); -int ib_cmem_map_contiguous_pages_to_vma(struct ib_cmem *ib_cmem, - struct vm_area_struct *vma); -struct ib_cmem *ib_cmem_alloc_contiguous_pages(struct ib_ucontext *context, - unsigned long total_size, - unsigned long page_size_order); -void ib_cmem_release_contiguous_pages(struct ib_cmem *cmem); -int ib_umem_map_to_vma(struct ib_umem *umem, - struct vm_area_struct *vma); - - #endif /* IB_UMEM_H */ diff --git a/sys/ofed/include/rdma/ib_user_verbs.h b/sys/ofed/include/rdma/ib_user_verbs.h index 670d6e8..a07de88 100644 --- a/sys/ofed/include/rdma/ib_user_verbs.h +++ b/sys/ofed/include/rdma/ib_user_verbs.h @@ -43,6 +43,13 @@ * compatibility are made. */ #define IB_USER_VERBS_ABI_VERSION 6 +#define IB_USER_VERBS_CMD_THRESHOLD 50 + +/* + * To support 6 legacy commands using the old extension style + */ +#define IB_USER_VERBS_LEGACY_CMD_FIRST 52 +#define IB_USER_VERBS_LEGACY_EX_CMD_LAST 56 enum { IB_USER_VERBS_CMD_GET_CONTEXT, @@ -85,17 +92,15 @@ enum { IB_USER_VERBS_CMD_OPEN_XRCD, IB_USER_VERBS_CMD_CLOSE_XRCD, IB_USER_VERBS_CMD_CREATE_XSRQ, - IB_USER_VERBS_CMD_OPEN_QP, - IB_USER_VERBS_CMD_ATTACH_FLOW, - IB_USER_VERBS_CMD_DETACH_FLOW, - IB_USER_VERBS_CMD_CREATE_XRC_SRQ, - IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP, - IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP, - IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP, - IB_USER_VERBS_CMD_REG_XRC_RCV_QP, - IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP, + IB_USER_VERBS_CMD_OPEN_QP +}; + +enum { + IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, + IB_USER_VERBS_EX_CMD_DESTROY_FLOW }; + /* * Make sure that all structs defined in this file remain laid out so * that they pack the same way on 32-bit and 64-bit architectures (to @@ -125,12 +130,33 @@ struct ib_uverbs_comp_event_desc { * the rest of the command struct based on these value. */ +#define IBV_RESP_TO_VERBS_RESP_EX_RAW(ex_ptr, ex_type, ibv_type, field) \ + ((ibv_type *)((void *)(ex_ptr) + offsetof(ex_type, \ + field) + sizeof((ex_ptr)->field))) + +#define IBV_RESP_TO_VERBS_RESP_EX(ex_ptr, ex_type, ibv_type) \ + IBV_RESP_TO_VERBS_RESP_EX_RAW(ex_ptr, ex_type, ibv_type, comp_mask) + + +#define IB_USER_VERBS_CMD_COMMAND_MASK 0xff +#define IB_USER_VERBS_CMD_FLAGS_MASK 0xff000000u +#define IB_USER_VERBS_CMD_FLAGS_SHIFT 24 + +#define IB_USER_VERBS_CMD_FLAG_EXTENDED 0x80 + struct ib_uverbs_cmd_hdr { __u32 command; __u16 in_words; __u16 out_words; }; +struct ib_uverbs_ex_cmd_hdr { + __u64 response; + __u16 provider_in_words; + __u16 provider_out_words; + __u32 cmd_hdr_reserved; +}; + struct ib_uverbs_get_context { __u64 response; __u64 driver_data[0]; @@ -146,6 +172,11 @@ struct ib_uverbs_query_device { __u64 driver_data[0]; }; +struct ib_uverbs_query_device_ex { + __u64 comp_mask; + __u64 driver_data[0]; +}; + struct ib_uverbs_query_device_resp { __u64 fw_ver; __be64 node_guid; @@ -269,6 +300,22 @@ struct ib_uverbs_dereg_mr { __u32 mr_handle; }; +struct ib_uverbs_alloc_mw { + __u64 response; + __u32 pd_handle; + __u8 mw_type; + __u8 reserved[3]; +}; + +struct ib_uverbs_alloc_mw_resp { + __u32 mw_handle; + __u32 rkey; +}; + +struct ib_uverbs_dealloc_mw { + __u32 mw_handle; +}; + struct ib_uverbs_create_comp_channel { __u64 response; }; @@ -292,6 +339,30 @@ struct ib_uverbs_create_cq_resp { __u32 cqe; }; +enum ib_uverbs_create_cq_ex_comp_mask { + IB_UVERBS_CREATE_CQ_EX_CAP_FLAGS = (u64)1 << 0, +}; + +struct ib_uverbs_create_cq_ex { + __u64 comp_mask; + __u64 user_handle; + __u32 cqe; + __u32 comp_vector; + __s32 comp_channel; + __u32 reserved; + __u64 create_flags; + __u64 driver_data[0]; +}; + +struct ib_uverbs_modify_cq_ex { + __u64 comp_mask; + __u32 cq_handle; + __u32 attr_mask; + __u16 cq_count; + __u16 cq_period; + __u32 cq_cap_flags; +}; + struct ib_uverbs_resize_cq { __u64 response; __u32 cq_handle; @@ -543,6 +614,42 @@ struct ib_uverbs_modify_qp { __u64 driver_data[0]; }; +enum ib_uverbs_modify_qp_ex_comp_mask { + IB_UVERBS_QP_ATTR_DCT_KEY = 1ULL << 0, +}; + +struct ib_uverbs_modify_qp_ex { + __u32 comp_mask; + struct ib_uverbs_qp_dest dest; + struct ib_uverbs_qp_dest alt_dest; + __u32 qp_handle; + __u32 attr_mask; + __u32 qkey; + __u32 rq_psn; + __u32 sq_psn; + __u32 dest_qp_num; + __u32 qp_access_flags; + __u16 pkey_index; + __u16 alt_pkey_index; + __u8 qp_state; + __u8 cur_qp_state; + __u8 path_mtu; + __u8 path_mig_state; + __u8 en_sqd_async_notify; + __u8 max_rd_atomic; + __u8 max_dest_rd_atomic; + __u8 min_rnr_timer; + __u8 port_num; + __u8 timeout; + __u8 retry_cnt; + __u8 rnr_retry; + __u8 alt_port_num; + __u8 alt_timeout; + __u8 reserved[2]; + __u64 dct_key; + __u64 driver_data[0]; +}; + struct ib_uverbs_modify_qp_resp { }; @@ -599,16 +706,6 @@ struct ib_uverbs_send_wr { } wr; }; -struct ibv_uverbs_flow_spec { - __u32 type; - __be32 src_ip; - __be32 dst_ip; - __be16 src_port; - __be16 dst_port; - __u8 l4_protocol; - __u8 block_mc_loopback; -}; - struct ib_uverbs_post_send { __u64 response; __u32 qp_handle; @@ -686,43 +783,117 @@ struct ib_uverbs_detach_mcast { __u64 driver_data[0]; }; -struct ibv_kern_flow_spec { +struct ib_uverbs_flow_spec_hdr { __u32 type; - __u32 reserved1; + __u16 size; + __u16 reserved; + /* followed by flow_spec */ + __u64 flow_spec_data[0]; +}; + +struct ib_kern_eth_filter { + __u8 dst_mac[6]; + __u8 src_mac[6]; + __be16 ether_type; + __be16 vlan_tag; +}; + +struct ib_uverbs_flow_spec_eth { union { + struct ib_uverbs_flow_spec_hdr hdr; struct { - __be16 ethertype; - __be16 vlan; - __u8 vlan_present; - __u8 mac[6]; - __u8 port; - } eth; - struct { - __be32 qpn; - } ib_uc; + __u32 type; + __u16 size; + __u16 reserved; + }; + }; + struct ib_kern_eth_filter val; + struct ib_kern_eth_filter mask; +}; + +struct ib_kern_ib_filter { + __be32 l3_type_qpn; + __u8 dst_gid[16]; +}; + +struct ib_uverbs_flow_spec_ib { + union { + struct ib_uverbs_flow_spec_hdr hdr; struct { - __u8 mgid[16]; - } ib_mc; - } l2_id; + __u32 type; + __u16 size; + __u16 reserved; + }; + }; + struct ib_kern_ib_filter val; + struct ib_kern_ib_filter mask; +}; + +struct ib_kern_ipv4_filter { __be32 src_ip; __be32 dst_ip; - __be16 src_port; +}; + +struct ib_uverbs_flow_spec_ipv4 { + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; + struct ib_kern_ipv4_filter val; + struct ib_kern_ipv4_filter mask; +}; + +struct ib_kern_tcp_udp_filter { __be16 dst_port; - __u8 l4_protocol; - __u8 block_mc_loopback; + __be16 src_port; +}; + +struct ib_uverbs_flow_spec_tcp_udp { + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; + struct ib_kern_tcp_udp_filter val; + struct ib_kern_tcp_udp_filter mask; +}; + +struct ib_uverbs_flow_attr { + __u32 type; + __u16 size; + __u16 priority; + __u8 num_of_specs; __u8 reserved[2]; + __u8 port; + __u32 flags; + /* Following are the optional layers according to user request + * struct ib_flow_spec_xxx + * struct ib_flow_spec_yyy + */ + struct ib_uverbs_flow_spec_hdr flow_specs[0]; }; -struct ib_uverbs_attach_flow { +struct ib_uverbs_create_flow { + __u32 comp_mask; __u32 qp_handle; - __u32 priority; - struct ibv_kern_flow_spec spec; + struct ib_uverbs_flow_attr flow_attr; }; -struct ib_uverbs_detach_flow { - __u32 qp_handle; - __u32 priority; - struct ibv_kern_flow_spec spec; +struct ib_uverbs_create_flow_resp { + __u32 comp_mask; + __u32 flow_handle; +}; + +struct ib_uverbs_destroy_flow { + __u32 comp_mask; + __u32 flow_handle; }; struct ib_uverbs_create_srq { @@ -788,95 +959,22 @@ struct ib_uverbs_destroy_srq_resp { __u32 events_reported; }; -struct ib_uverbs_open_xrc_domain { - __u64 response; - __u32 fd; - __u32 oflags; - __u64 driver_data[0]; -}; - -struct ib_uverbs_open_xrc_domain_resp { - __u32 xrcd_handle; -}; - -struct ib_uverbs_close_xrc_domain { - __u64 response; - __u32 xrcd_handle; - __u32 reserved; - __u64 driver_data[0]; -}; - -struct ib_uverbs_create_xrc_rcv_qp { - __u64 response; - __u64 user_handle; - __u32 xrc_domain_handle; - __u32 max_send_wr; - __u32 max_recv_wr; - __u32 max_send_sge; - __u32 max_recv_sge; - __u32 max_inline_data; - __u8 sq_sig_all; - __u8 qp_type; - __u8 reserved[6]; - __u64 driver_data[0]; -}; - -struct ib_uverbs_create_xrc_rcv_qp_resp { - __u32 qpn; - __u32 reserved; -}; -struct ib_uverbs_modify_xrc_rcv_qp { - __u32 xrc_domain_handle; - __u32 qp_num; - struct ib_uverbs_qp_dest dest; - struct ib_uverbs_qp_dest alt_dest; - __u32 attr_mask; - __u32 qkey; - __u32 rq_psn; - __u32 sq_psn; - __u32 dest_qp_num; - __u32 qp_access_flags; - __u16 pkey_index; - __u16 alt_pkey_index; - __u8 qp_state; - __u8 cur_qp_state; - __u8 path_mtu; - __u8 path_mig_state; - __u8 en_sqd_async_notify; - __u8 max_rd_atomic; - __u8 max_dest_rd_atomic; - __u8 min_rnr_timer; - __u8 port_num; - __u8 timeout; - __u8 retry_cnt; - __u8 rnr_retry; - __u8 alt_port_num; - __u8 alt_timeout; - __u8 reserved[6]; - __u64 driver_data[0]; +/* + * Legacy extended verbs related structures + */ +struct ib_uverbs_ex_cmd_hdr_legacy { + __u32 command; + __u16 in_words; + __u16 out_words; + __u16 provider_in_words; + __u16 provider_out_words; + __u32 cmd_hdr_reserved; }; -struct ib_uverbs_query_xrc_rcv_qp { +struct ib_uverbs_ex_cmd_resp1_legacy { + __u64 comp_mask; __u64 response; - __u32 xrc_domain_handle; - __u32 qp_num; - __u32 attr_mask; - __u32 reserved; - __u64 driver_data[0]; -}; - -struct ib_uverbs_reg_xrc_rcv_qp { - __u32 xrc_domain_handle; - __u32 qp_num; - __u64 driver_data[0]; }; -struct ib_uverbs_unreg_xrc_rcv_qp { - __u32 xrc_domain_handle; - __u32 qp_num; - __u64 driver_data[0]; -}; - - #endif /* IB_USER_VERBS_H */ diff --git a/sys/ofed/include/rdma/ib_user_verbs_exp.h b/sys/ofed/include/rdma/ib_user_verbs_exp.h new file mode 100644 index 0000000..557d4ba --- /dev/null +++ b/sys/ofed/include/rdma/ib_user_verbs_exp.h @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. + * Copyright (c) 2006 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IB_USER_VERBS_EXP_H +#define IB_USER_VERBS_EXP_H + +#include <rdma/ib_user_verbs.h> + +enum { + IB_USER_VERBS_EXP_CMD_FIRST = 64 +}; + +enum { + IB_USER_VERBS_EXP_CMD_CREATE_QP, + IB_USER_VERBS_EXP_CMD_MODIFY_CQ, + IB_USER_VERBS_EXP_CMD_MODIFY_QP, + IB_USER_VERBS_EXP_CMD_CREATE_CQ, + IB_USER_VERBS_EXP_CMD_QUERY_DEVICE, + IB_USER_VERBS_EXP_CMD_CREATE_DCT, + IB_USER_VERBS_EXP_CMD_DESTROY_DCT, + IB_USER_VERBS_EXP_CMD_QUERY_DCT, +}; + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * Specifically: + * - Do not use pointer types -- pass pointers in __u64 instead. + * - Make sure that any structure larger than 4 bytes is padded to a + * multiple of 8 bytes. Otherwise the structure size will be + * different between 32-bit and 64-bit architectures. + */ + +enum ib_uverbs_exp_create_qp_comp_mask { + IB_UVERBS_EXP_CREATE_QP_CAP_FLAGS = (1ULL << 0), + IB_UVERBS_EXP_CREATE_QP_INL_RECV = (1ULL << 1), + IB_UVERBS_EXP_CREATE_QP_QPG = (1ULL << 2) +}; + +struct ib_uverbs_qpg_init_attrib { + __u32 tss_child_count; + __u32 rss_child_count; +}; + +struct ib_uverbs_qpg { + __u32 qpg_type; + union { + struct { + __u32 parent_handle; + __u32 reserved; + }; + struct ib_uverbs_qpg_init_attrib parent_attrib; + }; + __u32 reserved2; +}; + +struct ib_uverbs_exp_create_qp { + __u64 comp_mask; + __u64 user_handle; + __u32 pd_handle; + __u32 send_cq_handle; + __u32 recv_cq_handle; + __u32 srq_handle; + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; + __u8 sq_sig_all; + __u8 qp_type; + __u8 is_srq; + __u8 reserved; + __u64 qp_cap_flags; + __u32 max_inl_recv; + __u32 reserved1; + struct ib_uverbs_qpg qpg; + __u64 driver_data[0]; +}; + +enum ib_uverbs_exp_create_qp_resp_comp_mask { + IB_UVERBS_EXP_CREATE_QP_RESP_INL_RECV = (1ULL << 0), +}; + +struct ib_uverbs_exp_create_qp_resp { + __u64 comp_mask; + __u32 qp_handle; + __u32 qpn; + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; + __u32 max_inl_recv; +}; + +struct ib_uverbs_create_dct { + __u64 comp_mask; + __u64 user_handle; + __u32 pd_handle; + __u32 cq_handle; + __u32 srq_handle; + __u32 access_flags; + __u32 flow_label; + __u64 dc_key; + __u8 min_rnr_timer; + __u8 tclass; + __u8 port; + __u8 pkey_index; + __u8 gid_index; + __u8 hop_limit; + __u8 mtu; + __u8 rsvd; + __u32 create_flags; + __u64 driver_data[0]; +}; + +struct ib_uverbs_create_dct_resp { + __u32 dct_handle; + __u32 dctn; +}; + +struct ib_uverbs_destroy_dct { + __u64 comp_mask; + __u64 user_handle; +}; + +struct ib_uverbs_destroy_dct_resp { + __u64 reserved; +}; + +struct ib_uverbs_query_dct { + __u64 comp_mask; + __u64 dct_handle; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_dct_resp { + __u64 dc_key; + __u32 access_flags; + __u32 flow_label; + __u32 key_violations; + __u8 port; + __u8 min_rnr_timer; + __u8 tclass; + __u8 mtu; + __u8 pkey_index; + __u8 gid_index; + __u8 hop_limit; + __u8 state; + __u32 rsvd; + __u64 driver_data[0]; +}; + +struct ib_uverbs_exp_query_device { + __u64 comp_mask; + __u64 driver_data[0]; +}; + +struct ib_uverbs_exp_query_device_resp { + __u64 comp_mask; + struct ib_uverbs_query_device_resp base; + __u64 timestamp_mask; + __u64 hca_core_clock; + __u64 device_cap_flags2; + __u32 dc_rd_req; + __u32 dc_rd_res; + __u32 inline_recv_sz; + __u32 max_rss_tbl_sz; +}; + +#endif /* IB_USER_VERBS_EXP_H */ diff --git a/sys/ofed/include/rdma/ib_verbs.h b/sys/ofed/include/rdma/ib_verbs.h index d167e42..d2607c8 100644 --- a/sys/ofed/include/rdma/ib_verbs.h +++ b/sys/ofed/include/rdma/ib_verbs.h @@ -48,10 +48,10 @@ #include <linux/rwsem.h> #include <linux/scatterlist.h> #include <linux/workqueue.h> +#include <linux/if_ether.h> +#include <linux/mutex.h> #include <asm/uaccess.h> -#include <linux/rbtree.h> -#include <linux/mutex.h> extern struct workqueue_struct *ib_wq; @@ -68,12 +68,14 @@ enum rdma_node_type { RDMA_NODE_IB_CA = 1, RDMA_NODE_IB_SWITCH, RDMA_NODE_IB_ROUTER, - RDMA_NODE_RNIC + RDMA_NODE_RNIC, + RDMA_NODE_MIC }; enum rdma_transport_type { RDMA_TRANSPORT_IB, - RDMA_TRANSPORT_IWARP + RDMA_TRANSPORT_IWARP, + RDMA_TRANSPORT_SCIF }; enum rdma_transport_type @@ -83,6 +85,7 @@ enum rdma_link_layer { IB_LINK_LAYER_UNSPECIFIED, IB_LINK_LAYER_INFINIBAND, IB_LINK_LAYER_ETHERNET, + IB_LINK_LAYER_SCIF }; enum ib_device_cap_flags { @@ -120,7 +123,29 @@ enum ib_device_cap_flags { IB_DEVICE_SHARED_MR = (1<<24), IB_DEVICE_QPG = (1<<25), IB_DEVICE_UD_RSS = (1<<26), - IB_DEVICE_UD_TSS = (1<<27) + IB_DEVICE_UD_TSS = (1<<27), + IB_DEVICE_CROSS_CHANNEL = (1<<28), + IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29), + /* + * Devices can set either IB_DEVICE_MEM_WINDOW_TYPE_2A or + * IB_DEVICE_MEM_WINDOW_TYPE_2B if it supports type 2A or type 2B + * memory windows. It can set neither to indicate it doesn't support + * type 2 windows at all. + */ + IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<30), + IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<31), + IB_DEVICE_SIGNATURE_HANDOVER = (1LL<<32) +}; + +enum ib_signature_prot_cap { + IB_PROT_T10DIF_TYPE_1 = 1, + IB_PROT_T10DIF_TYPE_2 = 1 << 1, + IB_PROT_T10DIF_TYPE_3 = 1 << 2, +}; + +enum ib_signature_guard_cap { + IB_GUARD_T10DIF_CRC = 1, + IB_GUARD_T10DIF_CSUM = 1 << 1, }; enum ib_atomic_cap { @@ -129,6 +154,12 @@ enum ib_atomic_cap { IB_ATOMIC_GLOB }; +enum ib_cq_create_flags { + IB_CQ_CREATE_CROSS_CHANNEL = 1 << 0, + IB_CQ_TIMESTAMP = 1 << 1, + IB_CQ_TIMESTAMP_TO_SYS_TIME = 1 << 2 +}; + struct ib_device_attr { u64 fw_ver; __be64 sys_image_guid; @@ -139,7 +170,7 @@ struct ib_device_attr { u32 hw_ver; int max_qp; int max_qp_wr; - int device_cap_flags; + u64 device_cap_flags; int max_sge; int max_sge_rd; int max_cq; @@ -171,6 +202,16 @@ struct ib_device_attr { int max_rss_tbl_sz; u16 max_pkeys; u8 local_ca_ack_delay; + int comp_mask; + uint64_t timestamp_mask; + uint64_t hca_core_clock; + unsigned int sig_prot_cap; + unsigned int sig_guard_cap; +}; + +enum ib_device_attr_comp_mask { + IB_DEVICE_ATTR_WITH_TIMESTAMP_MASK = 1ULL << 1, + IB_DEVICE_ATTR_WITH_HCA_CORE_CLOCK = 1ULL << 2 }; enum ib_mtu { @@ -199,7 +240,8 @@ enum ib_port_state { IB_PORT_INIT = 2, IB_PORT_ARMED = 3, IB_PORT_ACTIVE = 4, - IB_PORT_ACTIVE_DEFER = 5 + IB_PORT_ACTIVE_DEFER = 5, + IB_PORT_DUMMY = -1 /* force enum signed */ }; enum ib_port_cap_flags { @@ -326,7 +368,6 @@ struct ib_port_attr { u8 active_width; u8 active_speed; u8 phys_state; - enum rdma_link_layer link_layer; }; enum ib_device_modify_flags { @@ -373,10 +414,6 @@ enum ib_event_type { IB_EVENT_GID_CHANGE, }; -enum ib_event_flags { - IB_XRC_QP_EVENT_FLAG = 0x80000000, -}; - struct ib_event { struct ib_device *device; union { @@ -384,7 +421,6 @@ struct ib_event { struct ib_qp *qp; struct ib_srq *srq; u8 port_num; - u32 xrc_qp_num; } element; enum ib_event_type event; }; @@ -450,6 +486,22 @@ enum ib_rate { IB_RATE_300_GBPS = 18 }; +enum ib_mr_create_flags { + IB_MR_SIGNATURE_EN = 1, +}; + +/** + * ib_mr_init_attr - Memory region init attributes passed to routine + * ib_create_mr. + * @max_reg_descriptors: max number of registration descriptors that + * may be used with registration work requests. + * @flags: MR creation flags bit mask. + */ +struct ib_mr_init_attr { + int max_reg_descriptors; + u32 flags; +}; + /** * ib_rate_to_mult - Convert the IB rate enum to a multiple of the * base rate of 2.5 Gbit/sec. For example, IB_RATE_5_GBPS will be @@ -465,6 +517,120 @@ int ib_rate_to_mult(enum ib_rate rate) __attribute_const__; */ int ib_rate_to_mbps(enum ib_rate rate) __attribute_const__; +struct ib_cq_init_attr { + int cqe; + int comp_vector; + u32 flags; +}; + +enum ib_signature_type { + IB_SIG_TYPE_T10_DIF, +}; + +/** + * T10-DIF Signature types + * T10-DIF types are defined by SCSI + * specifications. + */ +enum ib_t10_dif_type { + IB_T10DIF_NONE, + IB_T10DIF_TYPE1, + IB_T10DIF_TYPE2, + IB_T10DIF_TYPE3 +}; + +/** + * Signature T10-DIF block-guard types + * IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules. + * IB_T10DIF_CSUM: Corresponds to IP checksum rules. + */ +enum ib_t10_dif_bg_type { + IB_T10DIF_CRC, + IB_T10DIF_CSUM +}; + +/** + * struct ib_t10_dif_domain - Parameters specific for T10-DIF + * domain. + * @type: T10-DIF type (0|1|2|3) + * @bg_type: T10-DIF block guard type (CRC|CSUM) + * @pi_interval: protection information interval. + * @bg: seed of guard computation. + * @app_tag: application tag of guard block + * @ref_tag: initial guard block reference tag. + * @type3_inc_reftag: T10-DIF type 3 does not state + * about the reference tag, it is the user + * choice to increment it or not. + */ +struct ib_t10_dif_domain { + enum ib_t10_dif_type type; + enum ib_t10_dif_bg_type bg_type; + u32 pi_interval; + u16 bg; + u16 app_tag; + u32 ref_tag; + bool type3_inc_reftag; +}; + +/** + * struct ib_sig_domain - Parameters for signature domain + * @sig_type: specific signauture type + * @sig: union of all signature domain attributes that may + * be used to set domain layout. + */ +struct ib_sig_domain { + enum ib_signature_type sig_type; + union { + struct ib_t10_dif_domain dif; + } sig; +}; + +/** + * struct ib_sig_attrs - Parameters for signature handover operation + * @check_mask: bitmask for signature byte check (8 bytes) + * @mem: memory domain layout desciptor. + * @wire: wire domain layout desciptor. + */ +struct ib_sig_attrs { + u8 check_mask; + struct ib_sig_domain mem; + struct ib_sig_domain wire; +}; + +enum ib_sig_err_type { + IB_SIG_BAD_GUARD, + IB_SIG_BAD_REFTAG, + IB_SIG_BAD_APPTAG, +}; + +/** + * struct ib_sig_err - signature error descriptor + */ +struct ib_sig_err { + enum ib_sig_err_type err_type; + u32 expected; + u32 actual; + u64 sig_err_offset; + u32 key; +}; + +enum ib_mr_status_check { + IB_MR_CHECK_SIG_STATUS = 1, +}; + +/** + * struct ib_mr_status - Memory region status container + * + * @fail_status: Bitmask of MR checks status. For each + * failed check a corresponding status bit is set. + * @sig_err: Additional info for IB_MR_CEHCK_SIG_STATUS + * failure. + */ +struct ib_mr_status { + u32 fail_status; + struct ib_sig_err sig_err; +}; + /** * mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate * enum. @@ -480,6 +646,8 @@ struct ib_ah_attr { u8 static_rate; u8 ah_flags; u8 port_num; + u8 dmac[6]; + u16 vlan_id; }; enum ib_wc_status { @@ -532,6 +700,11 @@ enum ib_wc_flags { IB_WC_WITH_IMM = (1<<1), IB_WC_WITH_INVALIDATE = (1<<2), IB_WC_IP_CSUM_OK = (1<<3), + IB_WC_WITH_SL = (1<<4), + IB_WC_WITH_SLID = (1<<5), + IB_WC_WITH_TIMESTAMP = (1<<6), + IB_WC_WITH_SMAC = (1<<7), + IB_WC_WITH_VLAN = (1<<8), }; struct ib_wc { @@ -553,6 +726,11 @@ struct ib_wc { u8 dlid_path_bits; u8 port_num; /* valid only for DR SMPs on switches */ int csum_ok; + struct { + uint64_t timestamp; /* timestamp = 0 indicates error*/ + } ts; + u8 smac[6]; + u16 vlan_id; }; enum ib_cq_notify_flags { @@ -618,19 +796,37 @@ enum ib_qp_type { IB_QPT_RC, IB_QPT_UC, IB_QPT_UD, - IB_QPT_XRC, IB_QPT_RAW_IPV6, IB_QPT_RAW_ETHERTYPE, IB_QPT_RAW_PACKET = 8, IB_QPT_XRC_INI = 9, IB_QPT_XRC_TGT, + IB_QPT_DC_INI, IB_QPT_MAX, + /* Reserve a range for qp types internal to the low level driver. + * These qp types will not be visible at the IB core layer, so the + * IB_QPT_MAX usages should not be affected in the core layer + */ + IB_QPT_RESERVED1 = 0x1000, + IB_QPT_RESERVED2, + IB_QPT_RESERVED3, + IB_QPT_RESERVED4, + IB_QPT_RESERVED5, + IB_QPT_RESERVED6, + IB_QPT_RESERVED7, + IB_QPT_RESERVED8, + IB_QPT_RESERVED9, + IB_QPT_RESERVED10, }; enum ib_qp_create_flags { IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0, IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1, - IB_QP_CREATE_NETIF_QP = 1 << 2, + IB_QP_CREATE_CROSS_CHANNEL = 1 << 2, + IB_QP_CREATE_MANAGED_SEND = 1 << 3, + IB_QP_CREATE_MANAGED_RECV = 1 << 4, + IB_QP_CREATE_NETIF_QP = 1 << 5, + IB_QP_CREATE_SIGNATURE_EN = 1 << 6, /* reserve bits 26-31 for low level drivers' internal use */ IB_QP_CREATE_RESERVED_START = 1 << 26, IB_QP_CREATE_RESERVED_END = 1 << 31, @@ -659,7 +855,7 @@ struct ib_qp_init_attr { union { struct ib_qp *qpg_parent; /* see qpg_type */ struct ib_qpg_init_attrib parent_attrib; - } pp; + }; enum ib_sig_type sq_sig_type; enum ib_qp_type qp_type; enum ib_qp_create_flags create_flags; @@ -667,6 +863,43 @@ struct ib_qp_init_attr { u8 port_num; /* special QP types only */ }; +enum { + IB_DCT_CREATE_FLAG_RCV_INLINE = 1 << 0, + IB_DCT_CREATE_FLAGS_MASK = IB_DCT_CREATE_FLAG_RCV_INLINE, +}; + +struct ib_dct_init_attr { + struct ib_pd *pd; + struct ib_cq *cq; + struct ib_srq *srq; + u64 dc_key; + u8 port; + u32 access_flags; + u8 min_rnr_timer; + u8 tclass; + u32 flow_label; + enum ib_mtu mtu; + u8 pkey_index; + u8 gid_index; + u8 hop_limit; + u32 create_flags; +}; + +struct ib_dct_attr { + u64 dc_key; + u8 port; + u32 access_flags; + u8 min_rnr_timer; + u8 tclass; + u32 flow_label; + enum ib_mtu mtu; + u8 pkey_index; + u8 gid_index; + u8 hop_limit; + u32 key_violations; + u8 state; +}; + struct ib_qp_open_attr { void (*event_handler)(struct ib_event *, void *); void *qp_context; @@ -731,7 +964,12 @@ enum ib_qp_attr_mask { IB_QP_PATH_MIG_STATE = (1<<18), IB_QP_CAP = (1<<19), IB_QP_DEST_QPN = (1<<20), - IB_QP_GROUP_RSS = (1<<21) + IB_QP_GROUP_RSS = (1<<21), + IB_QP_DC_KEY = (1<<22), + IB_QP_SMAC = (1<<23), + IB_QP_ALT_SMAC = (1<<24), + IB_QP_VID = (1<<25), + IB_QP_ALT_VID = (1<<26) }; enum ib_qp_state { @@ -741,7 +979,8 @@ enum ib_qp_state { IB_QPS_RTS, IB_QPS_SQD, IB_QPS_SQE, - IB_QPS_ERR + IB_QPS_ERR, + IB_QPS_DUMMY = -1 /* force enum signed */ }; enum ib_mig_state { @@ -750,6 +989,11 @@ enum ib_mig_state { IB_MIG_ARMED }; +enum ib_mw_type { + IB_MW_TYPE_1 = 1, + IB_MW_TYPE_2 = 2 +}; + struct ib_qp_attr { enum ib_qp_state qp_state; enum ib_qp_state cur_qp_state; @@ -776,6 +1020,40 @@ struct ib_qp_attr { u8 rnr_retry; u8 alt_port_num; u8 alt_timeout; + u8 smac[ETH_ALEN]; + u8 alt_smac[ETH_ALEN]; + u16 vlan_id; + u16 alt_vlan_id; + +}; + +struct ib_qp_attr_ex { + enum ib_qp_state qp_state; + enum ib_qp_state cur_qp_state; + enum ib_mtu path_mtu; + enum ib_mig_state path_mig_state; + u32 qkey; + u32 rq_psn; + u32 sq_psn; + u32 dest_qp_num; + int qp_access_flags; + struct ib_qp_cap cap; + struct ib_ah_attr ah_attr; + struct ib_ah_attr alt_ah_attr; + u16 pkey_index; + u16 alt_pkey_index; + u8 en_sqd_async_notify; + u8 sq_draining; + u8 max_rd_atomic; + u8 max_dest_rd_atomic; + u8 min_rnr_timer; + u8 port_num; + u8 timeout; + u8 retry_cnt; + u8 rnr_retry; + u8 alt_port_num; + u8 alt_timeout; + u64 dct_key; }; enum ib_wr_opcode { @@ -787,13 +1065,27 @@ enum ib_wr_opcode { IB_WR_ATOMIC_CMP_AND_SWP, IB_WR_ATOMIC_FETCH_AND_ADD, IB_WR_LSO, - IB_WR_BIG_LSO, IB_WR_SEND_WITH_INV, IB_WR_RDMA_READ_WITH_INV, IB_WR_LOCAL_INV, IB_WR_FAST_REG_MR, IB_WR_MASKED_ATOMIC_CMP_AND_SWP, IB_WR_MASKED_ATOMIC_FETCH_AND_ADD, + IB_WR_BIND_MW, + IB_WR_REG_SIG_MR, + /* reserve values for low level drivers' internal use. + * These values will not be used at all in the ib core layer. + */ + IB_WR_RESERVED1 = 0xf0, + IB_WR_RESERVED2, + IB_WR_RESERVED3, + IB_WR_RESERVED4, + IB_WR_RESERVED5, + IB_WR_RESERVED6, + IB_WR_RESERVED7, + IB_WR_RESERVED8, + IB_WR_RESERVED9, + IB_WR_RESERVED10, }; enum ib_send_flags { @@ -801,21 +1093,12 @@ enum ib_send_flags { IB_SEND_SIGNALED = (1<<1), IB_SEND_SOLICITED = (1<<2), IB_SEND_INLINE = (1<<3), - IB_SEND_IP_CSUM = (1<<4) -}; + IB_SEND_IP_CSUM = (1<<4), -enum ib_flow_types { - IB_FLOW_ETH = 0, - IB_FLOW_IB_UC = 1, - IB_FLOW_IB_MC_IPV4 = 2, - IB_FLOW_IB_MC_IPV6 = 3 -}; - -enum { - IB_FLOW_L4_NONE = 0, - IB_FLOW_L4_OTHER = 3, - IB_FLOW_L4_UDP = 5, - IB_FLOW_L4_TCP = 6 + /* reserve bits 26-31 for low level drivers' internal use */ + IB_SEND_RESERVED_START = (1 << 26), + IB_SEND_RESERVED_END = (1 << 31), + IB_SEND_UMR_UNREG = (1<<5) }; struct ib_sge { @@ -830,6 +1113,23 @@ struct ib_fast_reg_page_list { unsigned int max_page_list_len; }; +/** + * struct ib_mw_bind_info - Parameters for a memory window bind operation. + * @mr: A memory region to bind the memory window to. + * @addr: The address where the memory window should begin. + * @length: The length of the memory window, in bytes. + * @mw_access_flags: Access flags from enum ib_access_flags for the window. + * + * This struct contains the shared parameters for type 1 and type 2 + * memory window bind operations. + */ +struct ib_mw_bind_info { + struct ib_mr *mr; + u64 addr; + u64 length; + int mw_access_flags; +}; + struct ib_send_wr { struct ib_send_wr *next; u64 wr_id; @@ -874,10 +1174,26 @@ struct ib_send_wr { u32 rkey; } fast_reg; struct { - struct ib_unpacked_lrh *lrh; - u32 eth_type; - u8 static_rate; - } raw_ety; + int npages; + int access_flags; + u32 mkey; + struct ib_pd *pd; + u64 virt_addr; + u64 length; + int page_shift; + } umr; + struct { + struct ib_mw *mw; + /* The new rkey for the memory window. */ + u32 rkey; + struct ib_mw_bind_info bind_info; + } bind_mw; + struct { + struct ib_sig_attrs *sig_attrs; + struct ib_mr *sig_mr; + int access_flags; + struct ib_sge *prot; + } sig_handover; } wr; u32 xrc_remote_srq_num; /* XRC TGT QPs only */ }; @@ -896,13 +1212,7 @@ enum ib_access_flags { IB_ACCESS_REMOTE_ATOMIC = (1<<3), IB_ACCESS_MW_BIND = (1<<4), IB_ACCESS_ALLOCATE_MR = (1<<5), - IB_ACCESS_SHARED_MR_USER_READ = (1<<6), - IB_ACCESS_SHARED_MR_USER_WRITE = (1<<7), - IB_ACCESS_SHARED_MR_GROUP_READ = (1<<8), - IB_ACCESS_SHARED_MR_GROUP_WRITE = (1<<9), - IB_ACCESS_SHARED_MR_OTHER_READ = (1<<10), - IB_ACCESS_SHARED_MR_OTHER_WRITE = (1<<11) - + IB_ZERO_BASED = (1<<13) }; struct ib_phys_buf { @@ -925,13 +1235,16 @@ enum ib_mr_rereg_flags { IB_MR_REREG_ACCESS = (1<<2) }; +/** + * struct ib_mw_bind - Parameters for a type 1 memory window bind operation. + * @wr_id: Work request id. + * @send_flags: Flags from ib_send_flags enum. + * @bind_info: More parameters of the bind operation. + */ struct ib_mw_bind { - struct ib_mr *mr; u64 wr_id; - u64 addr; - u32 length; int send_flags; - int mw_access_flags; + struct ib_mw_bind_info bind_info; }; struct ib_fmr_attr { @@ -950,7 +1263,11 @@ struct ib_ucontext { struct list_head srq_list; struct list_head ah_list; struct list_head xrcd_list; + struct list_head rule_list; + struct list_head dct_list; int closing; + void *peer_mem_private_data; + char *peer_mem_name; }; struct ib_uobject { @@ -964,19 +1281,22 @@ struct ib_uobject { int live; }; +struct ib_udata; +struct ib_udata_ops { + int (*copy_from)(void *dest, struct ib_udata *udata, + size_t len); + int (*copy_to)(struct ib_udata *udata, void *src, + size_t len); +}; + struct ib_udata { + struct ib_udata_ops *ops; void __user *inbuf; void __user *outbuf; size_t inlen; size_t outlen; }; -struct ib_uxrc_rcv_object { - struct list_head list; /* link to context's list */ - u32 qp_num; - u32 domain_handle; -}; - struct ib_pd { struct ib_device *device; struct ib_uobject *uobject; @@ -985,10 +1305,8 @@ struct ib_pd { struct ib_xrcd { struct ib_device *device; - struct ib_uobject *uobject; atomic_t usecnt; /* count all exposed resources */ struct inode *inode; - struct rb_node node; struct mutex tgt_qp_mutex; struct list_head tgt_qp_list; @@ -1000,6 +1318,23 @@ struct ib_ah { struct ib_uobject *uobject; }; +enum ib_cq_attr_mask { + IB_CQ_MODERATION = (1 << 0), + IB_CQ_CAP_FLAGS = (1 << 1) +}; + +enum ib_cq_cap_flags { + IB_CQ_IGNORE_OVERRUN = (1 << 0) +}; + +struct ib_cq_attr { + struct { + u16 cq_count; + u16 cq_period; + } moderation; + u32 cq_cap_flags; +}; + typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context); struct ib_cq { @@ -1038,7 +1373,8 @@ struct ib_qp { struct ib_srq *srq; struct ib_xrcd *xrcd; /* XRC TGT QPs only */ struct list_head xrcd_list; - atomic_t usecnt; /* count times opened, mcast attaches */ + /* count times opened, mcast attaches, flow attaches */ + atomic_t usecnt; struct list_head open_list; struct ib_qp *real_qp; struct ib_uobject *uobject; @@ -1047,6 +1383,16 @@ struct ib_qp { u32 qp_num; enum ib_qp_type qp_type; enum ib_qpg_type qpg_type; + u8 port_num; +}; + +struct ib_dct { + struct ib_device *device; + struct ib_uobject *uobject; + struct ib_pd *pd; + struct ib_cq *cq; + struct ib_srq *srq; + u32 dct_num; }; struct ib_mr { @@ -1063,6 +1409,7 @@ struct ib_mw { struct ib_pd *pd; struct ib_uobject *uobject; u32 rkey; + enum ib_mw_type type; }; struct ib_fmr { @@ -1073,30 +1420,128 @@ struct ib_fmr { u32 rkey; }; -struct ib_flow_spec { - enum ib_flow_types type; - union { - struct { - __be16 ethertype; - __be16 vlan; - u8 vlan_present; - u8 mac[6]; - u8 port; - } eth; - struct { - __be32 qpn; - } ib_uc; - struct { - u8 mgid[16]; - } ib_mc; - } l2_id; +/* Supported steering options */ +enum ib_flow_attr_type { + /* steering according to rule specifications */ + IB_FLOW_ATTR_NORMAL = 0x0, + /* default unicast and multicast rule - + * receive all Eth traffic which isn't steered to any QP + */ + IB_FLOW_ATTR_ALL_DEFAULT = 0x1, + /* default multicast rule - + * receive all Eth multicast traffic which isn't steered to any QP + */ + IB_FLOW_ATTR_MC_DEFAULT = 0x2, + /* sniffer rule - receive all port traffic */ + IB_FLOW_ATTR_SNIFFER = 0x3 +}; + +/* Supported steering header types */ +enum ib_flow_spec_type { + /* L2 headers*/ + IB_FLOW_SPEC_ETH = 0x20, + IB_FLOW_SPEC_IB = 0x21, + /* L3 header*/ + IB_FLOW_SPEC_IPV4 = 0x30, + /* L4 headers*/ + IB_FLOW_SPEC_TCP = 0x40, + IB_FLOW_SPEC_UDP = 0x41 +}; + +#define IB_FLOW_SPEC_SUPPORT_LAYERS 4 + +/* Flow steering rule priority is set according to it's domain. + * Lower domain value means higher priority. + */ +enum ib_flow_domain { + IB_FLOW_DOMAIN_USER, + IB_FLOW_DOMAIN_ETHTOOL, + IB_FLOW_DOMAIN_RFS, + IB_FLOW_DOMAIN_NIC, + IB_FLOW_DOMAIN_NUM /* Must be last */ +}; + +enum ib_flow_flags { + IB_FLOW_ATTR_FLAGS_ALLOW_LOOP_BACK = 1 +}; + +struct ib_flow_eth_filter { + u8 dst_mac[6]; + u8 src_mac[6]; + __be16 ether_type; + __be16 vlan_tag; +}; + +struct ib_flow_spec_eth { + enum ib_flow_spec_type type; + u16 size; + struct ib_flow_eth_filter val; + struct ib_flow_eth_filter mask; +}; + +struct ib_flow_ib_filter { + __be32 l3_type_qpn; + u8 dst_gid[16]; +}; + +struct ib_flow_spec_ib { + enum ib_flow_spec_type type; + u16 size; + struct ib_flow_ib_filter val; + struct ib_flow_ib_filter mask; +}; + +struct ib_flow_ipv4_filter { __be32 src_ip; __be32 dst_ip; - __be16 src_port; +}; + +struct ib_flow_spec_ipv4 { + enum ib_flow_spec_type type; + u16 size; + struct ib_flow_ipv4_filter val; + struct ib_flow_ipv4_filter mask; +}; + +struct ib_flow_tcp_udp_filter { __be16 dst_port; - u8 l4_protocol; - u8 block_mc_loopback; - u8 rule_type; + __be16 src_port; +}; + +struct ib_flow_spec_tcp_udp { + enum ib_flow_spec_type type; + u16 size; + struct ib_flow_tcp_udp_filter val; + struct ib_flow_tcp_udp_filter mask; +}; + +union ib_flow_spec { + struct { + enum ib_flow_spec_type type; + u16 size; + }; + struct ib_flow_spec_ib ib; + struct ib_flow_spec_eth eth; + struct ib_flow_spec_ipv4 ipv4; + struct ib_flow_spec_tcp_udp tcp_udp; +}; + +struct ib_flow_attr { + enum ib_flow_attr_type type; + u16 size; + u16 priority; + u8 num_of_specs; + u8 port; + u32 flags; + /* Following are the optional layers according to user request + * struct ib_flow_spec_xxx + * struct ib_flow_spec_yyy + */ +}; + +struct ib_flow { + struct ib_qp *qp; + struct ib_uobject *uobject; }; struct ib_mad; @@ -1125,6 +1570,15 @@ struct ib_cache { u8 *lmc_cache; }; +enum verbs_values_mask { + IBV_VALUES_HW_CLOCK = 1 << 0 +}; + +struct ib_device_values { + int values_mask; + uint64_t hwclock; +}; + struct ib_dma_mapping_ops { int (*mapping_error)(struct ib_device *dev, u64 dma_addr); @@ -1169,6 +1623,8 @@ struct ib_dma_mapping_ops { }; struct iw_cm_verbs; +struct ib_exp_device_attr; +struct ib_exp_qp_init_attr; struct ib_device { struct device *dma_device; @@ -1257,12 +1713,13 @@ struct ib_device { int (*post_recv)(struct ib_qp *qp, struct ib_recv_wr *recv_wr, struct ib_recv_wr **bad_recv_wr); - struct ib_cq * (*create_cq)(struct ib_device *device, int cqe, - int comp_vector, + struct ib_cq * (*create_cq)(struct ib_device *device, + struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata); - int (*modify_cq)(struct ib_cq *cq, u16 cq_count, - u16 cq_period); + int (*modify_cq)(struct ib_cq *cq, + struct ib_cq_attr *cq_attr, + int cq_attr_mask); int (*destroy_cq)(struct ib_cq *cq); int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata); @@ -1289,6 +1746,9 @@ struct ib_device { int (*query_mr)(struct ib_mr *mr, struct ib_mr_attr *mr_attr); int (*dereg_mr)(struct ib_mr *mr); + int (*destroy_mr)(struct ib_mr *mr); + struct ib_mr * (*create_mr)(struct ib_pd *pd, + struct ib_mr_init_attr *mr_init_attr); struct ib_mr * (*alloc_fast_reg_mr)(struct ib_pd *pd, int max_page_list_len); struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device, @@ -1301,7 +1761,8 @@ struct ib_device { int num_phys_buf, int mr_access_flags, u64 *iova_start); - struct ib_mw * (*alloc_mw)(struct ib_pd *pd); + struct ib_mw * (*alloc_mw)(struct ib_pd *pd, + enum ib_mw_type type); int (*bind_mw)(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind); @@ -1327,43 +1788,28 @@ struct ib_device { struct ib_grh *in_grh, struct ib_mad *in_mad, struct ib_mad *out_mad); - struct ib_srq * (*create_xrc_srq)(struct ib_pd *pd, - struct ib_cq *xrc_cq, - struct ib_xrcd *xrcd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata); struct ib_xrcd * (*alloc_xrcd)(struct ib_device *device, struct ib_ucontext *ucontext, struct ib_udata *udata); int (*dealloc_xrcd)(struct ib_xrcd *xrcd); - int (*create_xrc_rcv_qp)(struct ib_qp_init_attr *init_attr, - u32 *qp_num); - int (*modify_xrc_rcv_qp)(struct ib_xrcd *xrcd, - u32 qp_num, - struct ib_qp_attr *attr, - int attr_mask); - int (*query_xrc_rcv_qp)(struct ib_xrcd *xrcd, - u32 qp_num, - struct ib_qp_attr *attr, - int attr_mask, - struct ib_qp_init_attr *init_attr); - int (*reg_xrc_rcv_qp)(struct ib_xrcd *xrcd, - void *context, - u32 qp_num); - int (*unreg_xrc_rcv_qp)(struct ib_xrcd *xrcd, - void *context, - u32 qp_num); - int (*attach_flow)(struct ib_qp *qp, - struct ib_flow_spec *spec, - int priority); - int (*detach_flow)(struct ib_qp *qp, - struct ib_flow_spec *spec, - int priority); + struct ib_flow * (*create_flow)(struct ib_qp *qp, + struct ib_flow_attr + *flow_attr, + int domain); + int (*destroy_flow)(struct ib_flow *flow_id); + int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, + struct ib_mr_status *mr_status); unsigned long (*get_unmapped_area)(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); + int (*ioctl)(struct ib_ucontext *context, + unsigned int cmd, + unsigned long arg); + int (*query_values)(struct ib_device *device, + int q_values, + struct ib_device_values *values); struct ib_dma_mapping_ops *dma_ops; struct module *owner; @@ -1379,14 +1825,33 @@ struct ib_device { int uverbs_abi_ver; u64 uverbs_cmd_mask; + u64 uverbs_ex_cmd_mask; char node_desc[64]; __be64 node_guid; u32 local_dma_lkey; u8 node_type; u8 phys_port_cnt; - struct rb_root ib_uverbs_xrcd_table; - struct mutex xrcd_table_mutex; + int cmd_perf; + u64 cmd_avg; + u32 cmd_n; + spinlock_t cmd_perf_lock; + + /* + * Experimental data and functions + */ + int (*exp_query_device)(struct ib_device *device, + struct ib_exp_device_attr *device_attr); + struct ib_qp * (*exp_create_qp)(struct ib_pd *pd, + struct ib_exp_qp_init_attr *qp_init_attr, + struct ib_udata *udata); + struct ib_dct * (*exp_create_dct)(struct ib_pd *pd, + struct ib_dct_init_attr *attr, + struct ib_udata *udata); + int (*exp_destroy_dct)(struct ib_dct *dct); + int (*exp_query_dct)(struct ib_dct *dct, struct ib_dct_attr *attr); + + u64 uverbs_exp_cmd_mask; }; struct ib_client { @@ -1414,12 +1879,12 @@ void ib_set_client_data(struct ib_device *device, struct ib_client *client, static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len) { - return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0; + return udata->ops->copy_from(dest, udata, len); } static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len) { - return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0; + return udata->ops->copy_to(udata, src, len); } /** @@ -1430,6 +1895,7 @@ static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len * @next_state: Next QP state * @type: QP type * @mask: Mask of supplied QP attributes + * @ll : link layer of port * * This function is a helper function that a low-level driver's * modify_qp method can use to validate the consumer's input. It @@ -1438,7 +1904,8 @@ static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len * and that the attribute mask supplied is allowed for the transition. */ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, - enum ib_qp_type type, enum ib_qp_attr_mask mask); + enum ib_qp_type type, enum ib_qp_attr_mask mask, + enum rdma_link_layer ll); int ib_register_event_handler (struct ib_event_handler *event_handler); int ib_unregister_event_handler(struct ib_event_handler *event_handler); @@ -1552,26 +2019,6 @@ int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); int ib_destroy_ah(struct ib_ah *ah); /** - * ib_create_xrc_srq - Creates an XRC SRQ associated with the specified - * protection domain, cq, and xrc domain. - * @pd: The protection domain associated with the SRQ. - * @xrc_cq: The cq to be associated with the XRC SRQ. - * @xrcd: The XRC domain to be associated with the XRC SRQ. - * @srq_init_attr: A list of initial attributes required to create the - * XRC SRQ. If XRC SRQ creation succeeds, then the attributes are updated - * to the actual capabilities of the created XRC SRQ. - * - * srq_attr->max_wr and srq_attr->max_sge are read the determine the - * requested size of the XRC SRQ, and set to the actual values allocated - * on return. If ib_create_xrc_srq() succeeds, then max_wr and max_sge - * will always be at least as large as the requested values. - */ -struct ib_srq *ib_create_xrc_srq(struct ib_pd *pd, - struct ib_cq *xrc_cq, - struct ib_xrcd *xrcd, - struct ib_srq_init_attr *srq_init_attr); - -/** * ib_create_srq - Creates a SRQ associated with the specified protection * domain. * @pd: The protection domain associated with the SRQ. @@ -1732,13 +2179,6 @@ static inline int ib_post_recv(struct ib_qp *qp, return qp->device->post_recv(qp, recv_wr, bad_recv_wr); } -/* - * IB_CQ_VECTOR_LEAST_ATTACHED: The constant specifies that - * the CQ will be attached to the completion vector that has - * the least number of CQs already attached to it. - */ -#define IB_CQ_VECTOR_LEAST_ATTACHED 0xffffffff - /** * ib_create_cq - Creates a CQ on the specified device. * @device: The device on which to create the CQ. @@ -1769,13 +2209,16 @@ struct ib_cq *ib_create_cq(struct ib_device *device, int ib_resize_cq(struct ib_cq *cq, int cqe); /** - * ib_modify_cq - Modifies moderation params of the CQ + * ib_modify_cq - Modifies the attributes for the specified CQ and then + * transitions the CQ to the given state. * @cq: The CQ to modify. - * @cq_count: number of CQEs that will trigger an event - * @cq_period: max period of time in usec before triggering an event - * + * @cq_attr: specifies the CQ attributes to modify. + * @cq_attr_mask: A bit-mask used to specify which attributes of the CQ + * are being modified. */ -int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); +int ib_modify_cq(struct ib_cq *cq, + struct ib_cq_attr *cq_attr, + int cq_attr_mask); /** * ib_destroy_cq - Destroys the specified CQ. @@ -2179,9 +2622,30 @@ int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); * ib_dereg_mr - Deregisters a memory region and removes it from the * HCA translation table. * @mr: The memory region to deregister. + * + * This function can fail, if the memory region has memory windows bound to it. */ int ib_dereg_mr(struct ib_mr *mr); + +/** + * ib_create_mr - Allocates a memory region that may be used for + * signature handover operations. + * @pd: The protection domain associated with the region. + * @mr_init_attr: memory region init attributes. + */ +struct ib_mr *ib_create_mr(struct ib_pd *pd, + struct ib_mr_init_attr *mr_init_attr); + +/** + * ib_destroy_mr - Destroys a memory region that was created using + * ib_create_mr and removes it from HW translation tables. + * @mr: The memory region to destroy. + * + * This function can fail, if the memory region has memory windows bound to it. + */ +int ib_destroy_mr(struct ib_mr *mr); + /** * ib_alloc_fast_reg_mr - Allocates memory region usable with the * IB_WR_FAST_REG_MR send work request. @@ -2231,10 +2695,22 @@ static inline void ib_update_fast_reg_key(struct ib_mr *mr, u8 newkey) } /** + * ib_inc_rkey - increments the key portion of the given rkey. Can be used + * for calculating a new rkey for type 2 memory windows. + * @rkey - the rkey to increment. + */ +static inline u32 ib_inc_rkey(u32 rkey) +{ + const u32 mask = 0x000000ff; + return ((rkey + 1) & mask) | (rkey & ~mask); +} + +/** * ib_alloc_mw - Allocates a memory window. * @pd: The protection domain associated with the memory window. + * @type: The type of the memory window (1 or 2). */ -struct ib_mw *ib_alloc_mw(struct ib_pd *pd); +struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); /** * ib_bind_mw - Posts a work request to the send queue of the specified @@ -2244,6 +2720,10 @@ struct ib_mw *ib_alloc_mw(struct ib_pd *pd); * @mw: The memory window to bind. * @mw_bind: Specifies information about the memory window, including * its address range, remote access rights, and associated memory region. + * + * If there is no immediate error, the function will update the rkey member + * of the mw parameter to its new value. The bind operation can still fail + * asynchronously. */ static inline int ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw, @@ -2334,7 +2814,77 @@ struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device); */ int ib_dealloc_xrcd(struct ib_xrcd *xrcd); -int ib_attach_flow(struct ib_qp *qp, struct ib_flow_spec *spec, int priority); -int ib_detach_flow(struct ib_qp *qp, struct ib_flow_spec *spec, int priority); +struct ib_flow *ib_create_flow(struct ib_qp *qp, + struct ib_flow_attr *flow_attr, int domain); +int ib_destroy_flow(struct ib_flow *flow_id); + +struct ib_dct *ib_create_dct(struct ib_pd *pd, struct ib_dct_init_attr *attr, + struct ib_udata *udata); +int ib_destroy_dct(struct ib_dct *dct); +int ib_query_dct(struct ib_dct *dct, struct ib_dct_attr *attr); + +int ib_query_values(struct ib_device *device, + int q_values, struct ib_device_values *values); + +static inline void ib_active_speed_enum_to_rate(u8 active_speed, + int *rate, + char **speed) +{ + switch (active_speed) { + case IB_SPEED_DDR: + *speed = " DDR"; + *rate = 50; + break; + case IB_SPEED_QDR: + *speed = " QDR"; + *rate = 100; + break; + case IB_SPEED_FDR10: + *speed = " FDR10"; + *rate = 100; + break; + case IB_SPEED_FDR: + *speed = " FDR"; + *rate = 140; + break; + case IB_SPEED_EDR: + *speed = " EDR"; + *rate = 250; + break; + case IB_SPEED_SDR: + default: /* default to SDR for invalid rates */ + *rate = 25; + break; + } + +} + +static inline int ib_check_mr_access(int flags) +{ + /* + * Local write permission is required if remote write or + * remote atomic permission is also requested. + */ + if (flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) && + !(flags & IB_ACCESS_LOCAL_WRITE)) + return -EINVAL; + + return 0; +} + +/** + * ib_check_mr_status: lightweight check of MR status. + * This routine may provide status checks on a selected + * ib_mr. first use is for signature status check. + * + * @mr: A memory region. + * @check_mask: Bitmask of which checks to perform from + * ib_mr_status_check enumeration. + * @mr_status: The container of relevant status checks. + * failed checks will be indicated in the status bitmask + * and the relevant info shall be in the error item. + */ +int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, + struct ib_mr_status *mr_status); #endif /* IB_VERBS_H */ diff --git a/sys/ofed/include/rdma/ib_verbs_exp.h b/sys/ofed/include/rdma/ib_verbs_exp.h new file mode 100644 index 0000000..ca5b84b --- /dev/null +++ b/sys/ofed/include/rdma/ib_verbs_exp.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IB_VERBS_EXP_H +#define IB_VERBS_EXP_H + +#include <rdma/ib_verbs.h> + + +enum ib_exp_device_cap_flags2 { + IB_EXP_DEVICE_DC_TRANSPORT = 1 << 0, + IB_EXP_DEVICE_QPG = 1 << 1, + IB_EXP_DEVICE_UD_RSS = 1 << 2, + IB_EXP_DEVICE_UD_TSS = 1 << 3 +}; + +enum ib_exp_device_attr_comp_mask { + IB_EXP_DEVICE_ATTR_WITH_TIMESTAMP_MASK = 1ULL << 1, + IB_EXP_DEVICE_ATTR_WITH_HCA_CORE_CLOCK = 1ULL << 2, + IB_EXP_DEVICE_ATTR_CAP_FLAGS2 = 1ULL << 3, + IB_EXP_DEVICE_ATTR_DC_REQ_RD = 1ULL << 4, + IB_EXP_DEVICE_ATTR_DC_RES_RD = 1ULL << 5, + IB_EXP_DEVICE_ATTR_INLINE_RECV_SZ = 1ULL << 6, + IB_EXP_DEVICE_ATTR_RSS_TBL_SZ = 1ULL << 7, +}; + +struct ib_exp_device_attr { + struct ib_device_attr base; + /* Use IB_EXP_DEVICE_ATTR_... for exp_comp_mask */ + uint32_t exp_comp_mask; + uint64_t device_cap_flags2; + uint32_t dc_rd_req; + uint32_t dc_rd_res; + uint32_t inline_recv_sz; + uint32_t max_rss_tbl_sz; +}; + +struct ib_exp_qp_init_attr { + void (*event_handler)(struct ib_event *, void *); + void *qp_context; + struct ib_cq *send_cq; + struct ib_cq *recv_cq; + struct ib_srq *srq; + struct ib_xrcd *xrcd; /* XRC TGT QPs only */ + struct ib_qp_cap cap; + union { + struct ib_qp *qpg_parent; /* see qpg_type */ + struct ib_qpg_init_attrib parent_attrib; + }; + enum ib_sig_type sq_sig_type; + enum ib_qp_type qp_type; + enum ib_qp_create_flags create_flags; + enum ib_qpg_type qpg_type; + u8 port_num; /* special QP types only */ + u32 max_inl_recv; +}; + + +int ib_exp_query_device(struct ib_device *device, + struct ib_exp_device_attr *device_attr); + + + + +#endif /* IB_VERBS_EXP_H */ diff --git a/sys/ofed/include/rdma/iw_cm.h b/sys/ofed/include/rdma/iw_cm.h index 412320e..271c2f8 100644 --- a/sys/ofed/include/rdma/iw_cm.h +++ b/sys/ofed/include/rdma/iw_cm.h @@ -46,24 +46,17 @@ enum iw_cm_event_type { IW_CM_EVENT_CLOSE /* close complete */ }; -enum iw_cm_event_status { - IW_CM_EVENT_STATUS_OK = 0, /* request successful */ - IW_CM_EVENT_STATUS_ACCEPTED = 0, /* connect request accepted */ - IW_CM_EVENT_STATUS_REJECTED, /* connect request rejected */ - IW_CM_EVENT_STATUS_TIMEOUT, /* the operation timed out */ - IW_CM_EVENT_STATUS_RESET, /* reset from remote peer */ - IW_CM_EVENT_STATUS_EINVAL, /* asynchronous failure for bad parm */ -}; - struct iw_cm_event { enum iw_cm_event_type event; - enum iw_cm_event_status status; + int status; struct sockaddr_in local_addr; struct sockaddr_in remote_addr; void *private_data; - u8 private_data_len; void *provider_data; + u8 private_data_len; struct socket *so; + u8 ord; + u8 ird; }; /** diff --git a/sys/ofed/include/rdma/peer_mem.h b/sys/ofed/include/rdma/peer_mem.h new file mode 100644 index 0000000..8565883 --- /dev/null +++ b/sys/ofed/include/rdma/peer_mem.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2013, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if !defined(PEER_MEM_H) +#define PEER_MEM_H + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/errno.h> +#include <linux/scatterlist.h> +#include <linux/mutex.h> + + +#define IB_PEER_MEMORY_NAME_MAX 64 +#define IB_PEER_MEMORY_VER_MAX 16 + +struct peer_memory_client { + char name[IB_PEER_MEMORY_NAME_MAX]; + char version[IB_PEER_MEMORY_VER_MAX]; + /* acquire return code: 1 mine, 0 - not mine */ + int (*acquire) (unsigned long addr, size_t size, void *peer_mem_private_data, + char *peer_mem_name, void **client_context); + int (*get_pages) (unsigned long addr, + size_t size, int write, int force, + struct sg_table *sg_head, + void *client_context, void *core_context); + int (*dma_map) (struct sg_table *sg_head, void *client_context, + struct device *dma_device, int dmasync, int *nmap); + int (*dma_unmap) (struct sg_table *sg_head, void *client_context, + struct device *dma_device); + void (*put_pages) (struct sg_table *sg_head, void *client_context); + unsigned long (*get_page_size) (void *client_context); + void (*release) (void *client_context); + +}; + +typedef int (*invalidate_peer_memory)(void *reg_handle, + void *core_context); + +void *ib_register_peer_memory_client(struct peer_memory_client *peer_client, + invalidate_peer_memory *invalidate_callback); +void ib_unregister_peer_memory_client(void *reg_handle); + +#endif diff --git a/sys/ofed/include/rdma/rdma_cm.h b/sys/ofed/include/rdma/rdma_cm.h index c6b2962..d699261 100644 --- a/sys/ofed/include/rdma/rdma_cm.h +++ b/sys/ofed/include/rdma/rdma_cm.h @@ -59,15 +59,26 @@ enum rdma_cm_event_type { RDMA_CM_EVENT_MULTICAST_JOIN, RDMA_CM_EVENT_MULTICAST_ERROR, RDMA_CM_EVENT_ADDR_CHANGE, - RDMA_CM_EVENT_TIMEWAIT_EXIT + RDMA_CM_EVENT_TIMEWAIT_EXIT, + RDMA_CM_EVENT_ALT_ROUTE_RESOLVED, + RDMA_CM_EVENT_ALT_ROUTE_ERROR, + RDMA_CM_EVENT_LOAD_ALT_PATH, + RDMA_CM_EVENT_ALT_PATH_LOADED, }; enum rdma_port_space { RDMA_PS_SDP = 0x0001, RDMA_PS_IPOIB = 0x0002, + RDMA_PS_IB = 0x013F, RDMA_PS_TCP = 0x0106, RDMA_PS_UDP = 0x0111, - RDMA_PS_SCTP = 0x0183 +}; + +enum alt_path_type { + RDMA_ALT_PATH_NONE, + RDMA_ALT_PATH_PORT, + RDMA_ALT_PATH_LID, + RDMA_ALT_PATH_BEST }; struct rdma_addr { @@ -101,6 +112,7 @@ struct rdma_ud_param { struct ib_ah_attr ah_attr; u32 qp_num; u32 qkey; + u8 alt_path_index; }; struct rdma_cm_event { @@ -112,6 +124,20 @@ struct rdma_cm_event { } param; }; +enum rdma_cm_state { + RDMA_CM_IDLE, + RDMA_CM_ADDR_QUERY, + RDMA_CM_ADDR_RESOLVED, + RDMA_CM_ROUTE_QUERY, + RDMA_CM_ROUTE_RESOLVED, + RDMA_CM_CONNECT, + RDMA_CM_DISCONNECT, + RDMA_CM_ADDR_BOUND, + RDMA_CM_LISTEN, + RDMA_CM_DEVICE_REMOVAL, + RDMA_CM_DESTROYING +}; + struct rdma_cm_id; /** @@ -131,7 +157,9 @@ struct rdma_cm_id { rdma_cm_event_handler event_handler; struct rdma_route route; enum rdma_port_space ps; + enum ib_qp_type qp_type; u8 port_num; + void *ucontext; }; /** @@ -141,9 +169,11 @@ struct rdma_cm_id { * returned rdma_id. * @context: User specified context associated with the id. * @ps: RDMA port space. + * @qp_type: type of queue pair associated with the id. */ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, - void *context, enum rdma_port_space ps); + void *context, enum rdma_port_space ps, + enum ib_qp_type qp_type); /** * rdma_destroy_id - Destroys an RDMA identifier. @@ -192,6 +222,19 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms); /** + * rdma_enable_apm - Get ready to use APM for the given ID. + * Actual Alternate path discovery and load will take place only + * after a connection has been established. + * + * Calling this function only has an effect on the connection's client side. + * It should be called after rdma_resolve_route and before rdma_connect. + * + * @id: RDMA identifier. + * @alt_type: Alternate path type to resolve. + */ +int rdma_enable_apm(struct rdma_cm_id *id, enum alt_path_type alt_type); + +/** * rdma_create_qp - Allocate a QP and associate it with the specified RDMA * identifier. * @@ -330,4 +373,32 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr); */ void rdma_set_service_type(struct rdma_cm_id *id, int tos); +/** + * rdma_set_reuseaddr - Allow the reuse of local addresses when binding + * the rdma_cm_id. + * @id: Communication identifier to configure. + * @reuse: Value indicating if the bound address is reusable. + * + * Reuse must be set before an address is bound to the id. + */ +int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse); + +/** + * rdma_set_afonly - Specify that listens are restricted to the + * bound address family only. + * @id: Communication identifer to configure. + * @afonly: Value indicating if listens are restricted. + * + * Must be set before identifier is in the listening state. + */ +int rdma_set_afonly(struct rdma_cm_id *id, int afonly); + +/** + * rdma_set_timeout - Set the QP timeout associated with a connection + * identifier. + * @id: Communication identifier to associated with service type. + * @timeout: QP timeout + */ +void rdma_set_timeout(struct rdma_cm_id *id, int timeout); + #endif /* RDMA_CM_H */ diff --git a/sys/ofed/include/rdma/rdma_user_cm.h b/sys/ofed/include/rdma/rdma_user_cm.h index 1d16502..4d99099 100644 --- a/sys/ofed/include/rdma/rdma_user_cm.h +++ b/sys/ofed/include/rdma/rdma_user_cm.h @@ -77,7 +77,8 @@ struct rdma_ucm_create_id { __u64 uid; __u64 response; __u16 ps; - __u8 reserved[6]; + __u8 qp_type; + __u8 reserved[5]; }; struct rdma_ucm_create_id_resp { @@ -222,7 +223,11 @@ enum { /* Option details */ enum { RDMA_OPTION_ID_TOS = 0, - RDMA_OPTION_IB_PATH = 1 + RDMA_OPTION_ID_REUSEADDR = 1, + RDMA_OPTION_ID_AFONLY = 2, + + RDMA_OPTION_IB_PATH = 1, + RDMA_OPTION_IB_APM = 2, }; struct rdma_ucm_set_option { diff --git a/sys/powerpc/powerpc/db_trace.c b/sys/powerpc/powerpc/db_trace.c index f85918c..7dfbb84 100644 --- a/sys/powerpc/powerpc/db_trace.c +++ b/sys/powerpc/powerpc/db_trace.c @@ -267,6 +267,9 @@ db_backtrace(struct thread *td, db_addr_t fp, int count) db_printf("%-10s r1=%#zx cr=%#x xer=%#x ctr=%#zx", "", tf->fixreg[1], (uint32_t)tf->cr, (uint32_t)tf->xer, tf->ctr); +#ifdef __powerpc64__ + db_printf(" r2=%#zx", tf->fixreg[2]); +#endif if (tf->exc == EXC_DSI) db_printf(" sr=%#x", (uint32_t)tf->cpu.aim.dsisr); diff --git a/sys/sparc64/include/asm.h b/sys/sparc64/include/asm.h index dea06c5..69a53be 100644 --- a/sys/sparc64/include/asm.h +++ b/sys/sparc64/include/asm.h @@ -103,6 +103,16 @@ CNAME(x): #define END(x) .size x, . - x /* + * WEAK_REFERENCE(): create a weak reference alias from sym. + * The macro is not a general asm macro that takes arbitrary names, + * but one that takes only C names. It does the non-null name + * translation inside the macro. + */ +#define WEAK_REFERENCE(sym, alias) \ + .weak CNAME(alias); \ + .equ CNAME(alias),CNAME(sym) + +/* * Kernel RCS ID tag and copyright macros */ diff --git a/sys/sys/fbio.h b/sys/sys/fbio.h index c47304a..034d052 100644 --- a/sys/sys/fbio.h +++ b/sys/sys/fbio.h @@ -115,6 +115,7 @@ struct fb_info; typedef int fb_enter_t(void *priv); typedef int fb_leave_t(void *priv); +typedef int fb_setblankmode_t(void *priv, int mode); struct fb_info { /* Raw copy of fbtype. Do not change. */ @@ -129,6 +130,7 @@ struct fb_info { fb_enter_t *enter; fb_leave_t *leave; + fb_setblankmode_t *setblankmode; intptr_t fb_pbase; /* For FB mmap. */ intptr_t fb_vbase; /* if NULL, use fb_write/fb_read. */ diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index 2e94d2a..4729320 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -184,8 +184,16 @@ struct mbuf { * Compile-time assertions in uipc_mbuf.c test these values to ensure * that they are correct. */ - struct mbuf *m_next; /* next buffer in chain */ - struct mbuf *m_nextpkt; /* next chain in queue/record */ + union { /* next buffer in chain */ + struct mbuf *m_next; + SLIST_ENTRY(mbuf) m_slist; + STAILQ_ENTRY(mbuf) m_stailq; + }; + union { /* next chain in queue/record */ + struct mbuf *m_nextpkt; + SLIST_ENTRY(mbuf) m_slistpkt; + STAILQ_ENTRY(mbuf) m_stailqpkt; + }; caddr_t m_data; /* location of data */ int32_t m_len; /* amount of data in this mbuf */ uint32_t m_type:8, /* type of data in this mbuf */ diff --git a/sys/sys/param.h b/sys/sys/param.h index aba65ac..c92ee7b 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -58,7 +58,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1100059 /* Master, propagated to newvers */ +#define __FreeBSD_version 1100060 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, diff --git a/sys/sys/taskqueue.h b/sys/sys/taskqueue.h index bf23ac1..9a00ada 100644 --- a/sys/sys/taskqueue.h +++ b/sys/sys/taskqueue.h @@ -36,6 +36,7 @@ #include <sys/queue.h> #include <sys/_task.h> #include <sys/_callout.h> +#include <sys/_cpuset.h> struct taskqueue; struct thread; @@ -71,6 +72,8 @@ struct taskqueue *taskqueue_create(const char *name, int mflags, void *context); int taskqueue_start_threads(struct taskqueue **tqp, int count, int pri, const char *name, ...) __printflike(4, 5); +int taskqueue_start_threads_cpuset(struct taskqueue **tqp, int count, + int pri, cpuset_t *mask, const char *name, ...) __printflike(5, 6); int taskqueue_start_threads_pinned(struct taskqueue **tqp, int count, int pri, int cpu_id, const char *name, ...) __printflike(5, 6); diff --git a/sys/vm/memguard.c b/sys/vm/memguard.c index ec372c1..5d483b1 100644 --- a/sys/vm/memguard.c +++ b/sys/vm/memguard.c @@ -89,9 +89,7 @@ memguard_sysctl_desc(SYSCTL_HANDLER_ARGS) return (error); mtx_lock(&malloc_mtx); - /* - * If mtp is NULL, it will be initialized in memguard_cmp(). - */ + /* If mtp is NULL, it will be initialized in memguard_cmp() */ vm_memguard_mtype = malloc_desc2type(desc); strlcpy(vm_memguard_desc, desc, sizeof(vm_memguard_desc)); mtx_unlock(&malloc_mtx); @@ -502,7 +500,7 @@ int memguard_cmp_zone(uma_zone_t zone) { - if ((memguard_options & MG_GUARD_NOFREE) == 0 && + if ((memguard_options & MG_GUARD_NOFREE) == 0 && zone->uz_flags & UMA_ZONE_NOFREE) return (0); diff --git a/sys/x86/acpica/acpi_wakeup.c b/sys/x86/acpica/acpi_wakeup.c index 74522be..e652419 100644 --- a/sys/x86/acpica/acpi_wakeup.c +++ b/sys/x86/acpica/acpi_wakeup.c @@ -270,6 +270,7 @@ acpi_wakeup_machdep(struct acpi_softc *sc, int state, int sleep_result, initializecpu(); PCPU_SET(switchtime, 0); PCPU_SET(switchticks, ticks); + lapic_xapic_mode(); #ifdef SMP if (!CPU_EMPTY(&suspcpus)) acpi_wakeup_cpus(sc); diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c index 1809fa6..1b66674 100644 --- a/sys/x86/x86/local_apic.c +++ b/sys/x86/x86/local_apic.c @@ -527,7 +527,6 @@ native_lapic_xapic_mode(void) saveintr = intr_disable(); if (x2apic_mode) native_lapic_enable_x2apic(); - native_lapic_disable(); intr_restore(saveintr); } diff --git a/sys/x86/xen/pvcpu_enum.c b/sys/x86/xen/pvcpu_enum.c index 8d54bdd..5b8b053 100644 --- a/sys/x86/xen/pvcpu_enum.c +++ b/sys/x86/xen/pvcpu_enum.c @@ -81,6 +81,7 @@ madt_parse_interrupt_override(ACPI_MADT_INTERRUPT_OVERRIDE *intr) { enum intr_trigger trig; enum intr_polarity pol; + int ret; if (acpi_quirks & ACPI_Q_MADT_IRQ0 && intr->SourceIrq == 0 && intr->GlobalIrq == 2) { @@ -101,7 +102,9 @@ madt_parse_interrupt_override(ACPI_MADT_INTERRUPT_OVERRIDE *intr) acpi_OverrideInterruptLevel(intr->GlobalIrq); /* Register the IRQ with the polarity and trigger mode found. */ - xen_register_pirq(intr->GlobalIrq, trig, pol); + ret = xen_register_pirq(intr->GlobalIrq, trig, pol); + if (ret != 0) + panic("Unable to register interrupt override"); } /* @@ -175,7 +178,7 @@ xenpv_setup_io(void) { if (xen_initial_domain()) { - int i; + int i, ret; /* Map MADT */ madt_physaddr = acpi_find_table(ACPI_SIG_MADT); @@ -201,8 +204,21 @@ xenpv_setup_io(void) if (!madt_found_sci_override) { printf( "MADT: Forcing active-low polarity and level trigger for SCI\n"); - xen_register_pirq(AcpiGbl_FADT.SciInterrupt, + ret = xen_register_pirq(AcpiGbl_FADT.SciInterrupt, INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW); + if (ret != 0) + panic("Unable to register SCI IRQ"); + } + + /* Register legacy ISA IRQs */ + for (i = 1; i < 16; i++) { + if (intr_lookup_source(i) != NULL) + continue; + ret = xen_register_pirq(i, INTR_TRIGGER_EDGE, + INTR_POLARITY_LOW); + if (ret != 0 && bootverbose) + printf("Unable to register legacy IRQ#%d: %d\n", + i, ret); } acpi_SetDefaultIntrModel(ACPI_INTR_APIC); diff --git a/sys/x86/xen/xen_intr.c b/sys/x86/xen/xen_intr.c index 03ccd7c..a964fac 100644 --- a/sys/x86/xen/xen_intr.c +++ b/sys/x86/xen/xen_intr.c @@ -126,7 +126,6 @@ struct xenisrc { int xi_virq; void *xi_cookie; u_int xi_close:1; /* close on unbind? */ - u_int xi_shared:1; /* Shared with other domains. */ u_int xi_activehi:1; u_int xi_edgetrigger:1; }; @@ -579,11 +578,12 @@ xen_intr_handle_upcall(struct trapframe *trap_frame) /* process port */ port = (l1i * LONG_BIT) + l2i; - synch_clear_bit(port, &s->evtchn_pending[0]); isrc = xen_intr_port_to_isrc[port]; - if (__predict_false(isrc == NULL)) + if (__predict_false(isrc == NULL)) { + synch_clear_bit(port, &s->evtchn_pending[0]); continue; + } /* Make sure we are firing on the right vCPU */ KASSERT((isrc->xi_cpu == PCPU_GET(cpuid)), @@ -932,6 +932,9 @@ out: static void xen_intr_disable_source(struct intsrc *isrc, int eoi) { + + if (eoi == PIC_EOI) + xen_intr_eoi_source(isrc); } /* @@ -950,8 +953,13 @@ xen_intr_enable_source(struct intsrc *isrc) * \param isrc The interrupt source to EOI. */ static void -xen_intr_eoi_source(struct intsrc *isrc) +xen_intr_eoi_source(struct intsrc *base_isrc) { + struct xenisrc *isrc; + + isrc = (struct xenisrc *)base_isrc; + synch_clear_bit(isrc->xi_port, + &HYPERVISOR_shared_info->evtchn_pending[0]); } /* @@ -981,8 +989,9 @@ xen_intr_pirq_disable_source(struct intsrc *base_isrc, int eoi) struct xenisrc *isrc; isrc = (struct xenisrc *)base_isrc; - evtchn_mask_port(isrc->xi_port); + if (isrc->xi_edgetrigger == 0) + evtchn_mask_port(isrc->xi_port); if (eoi == PIC_EOI) xen_intr_pirq_eoi_source(base_isrc); } @@ -998,7 +1007,9 @@ xen_intr_pirq_enable_source(struct intsrc *base_isrc) struct xenisrc *isrc; isrc = (struct xenisrc *)base_isrc; - evtchn_unmask_port(isrc->xi_port); + + if (isrc->xi_edgetrigger == 0) + evtchn_unmask_port(isrc->xi_port); } /* @@ -1010,13 +1021,19 @@ static void xen_intr_pirq_eoi_source(struct intsrc *base_isrc) { struct xenisrc *isrc; + int error; - /* XXX Use shared page of flags for this. */ isrc = (struct xenisrc *)base_isrc; + + synch_clear_bit(isrc->xi_port, + &HYPERVISOR_shared_info->evtchn_pending[0]); if (test_bit(isrc->xi_pirq, xen_intr_pirq_eoi_map)) { struct physdev_eoi eoi = { .irq = isrc->xi_pirq }; - (void)HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); + error = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); + if (error != 0) + panic("Unable to EOI PIRQ#%d: %d\n", + isrc->xi_pirq, error); } } @@ -1361,7 +1378,6 @@ int xen_register_pirq(int vector, enum intr_trigger trig, enum intr_polarity pol) { struct physdev_map_pirq map_pirq; - struct physdev_irq alloc_pirq; struct xenisrc *isrc; int error; @@ -1382,14 +1398,6 @@ xen_register_pirq(int vector, enum intr_trigger trig, enum intr_polarity pol) return (error); } - alloc_pirq.irq = vector; - alloc_pirq.vector = 0; - error = HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &alloc_pirq); - if (error) { - printf("xen: unable to alloc PIRQ for IRQ#%d\n", vector); - return (error); - } - mtx_lock(&xen_intr_isrc_lock); isrc = xen_intr_alloc_isrc(EVTCHN_TYPE_PIRQ, vector); mtx_unlock(&xen_intr_isrc_lock); @@ -1432,6 +1440,8 @@ xen_register_msi(device_t dev, int vector, int count) KASSERT(isrc != NULL, ("xen: unable to allocate isrc for interrupt")); isrc->xi_pirq = msi_irq.pirq + i; + /* MSI interrupts are always edge triggered */ + isrc->xi_edgetrigger = 1; } mtx_unlock(&xen_intr_isrc_lock); @@ -1573,10 +1583,9 @@ xen_intr_dump_port(struct xenisrc *isrc) isrc->xi_port, xen_intr_print_type(isrc->xi_type)); if (isrc->xi_type == EVTCHN_TYPE_PIRQ) { db_printf("\tPirq: %d ActiveHi: %d EdgeTrigger: %d " - "NeedsEOI: %d Shared: %d\n", + "NeedsEOI: %d\n", isrc->xi_pirq, isrc->xi_activehi, isrc->xi_edgetrigger, - !!test_bit(isrc->xi_pirq, xen_intr_pirq_eoi_map), - isrc->xi_shared); + !!test_bit(isrc->xi_pirq, xen_intr_pirq_eoi_map)); } if (isrc->xi_type == EVTCHN_TYPE_VIRQ) db_printf("\tVirq: %d\n", isrc->xi_virq); diff --git a/usr.bin/compress/compress.c b/usr.bin/compress/compress.c index 1f458e5..2d2efb3 100644 --- a/usr.bin/compress/compress.c +++ b/usr.bin/compress/compress.c @@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$"); #include <err.h> #include <errno.h> +#include <fcntl.h> #include <stdarg.h> #include <stdio.h> #include <stdlib.h> @@ -360,14 +361,14 @@ err: if (ofp) { static void setfile(const char *name, struct stat *fs) { - static struct timeval tv[2]; + static struct timespec tspec[2]; fs->st_mode &= S_ISUID|S_ISGID|S_IRWXU|S_IRWXG|S_IRWXO; - TIMESPEC_TO_TIMEVAL(&tv[0], &fs->st_atim); - TIMESPEC_TO_TIMEVAL(&tv[1], &fs->st_mtim); - if (utimes(name, tv)) - cwarn("utimes: %s", name); + tspec[0] = fs->st_atim; + tspec[1] = fs->st_mtim; + if (utimensat(AT_FDCWD, name, tspec, 0)) + cwarn("utimensat: %s", name); /* * Changing the ownership probably won't succeed, unless we're root diff --git a/usr.bin/gcore/elfcore.c b/usr.bin/gcore/elfcore.c index d487b21..2d1acb8 100644 --- a/usr.bin/gcore/elfcore.c +++ b/usr.bin/gcore/elfcore.c @@ -511,7 +511,8 @@ readmap(pid_t pid) ((pflags & PFLAGS_FULL) == 0 && kve->kve_type != KVME_TYPE_DEFAULT && kve->kve_type != KVME_TYPE_VNODE && - kve->kve_type != KVME_TYPE_SWAP)) + kve->kve_type != KVME_TYPE_SWAP && + kve->kve_type != KVME_TYPE_PHYS)) continue; ent = calloc(1, sizeof(*ent)); diff --git a/usr.bin/gzip/gzip.c b/usr.bin/gzip/gzip.c index 9f92b6a..f197f18 100644 --- a/usr.bin/gzip/gzip.c +++ b/usr.bin/gzip/gzip.c @@ -1070,7 +1070,7 @@ out2: static void copymodes(int fd, const struct stat *sbp, const char *file) { - struct timeval times[2]; + struct timespec times[2]; struct stat sb; /* @@ -1098,10 +1098,10 @@ copymodes(int fd, const struct stat *sbp, const char *file) if (fchmod(fd, sb.st_mode) < 0) maybe_warn("couldn't fchmod: %s", file); - TIMESPEC_TO_TIMEVAL(×[0], &sb.st_atim); - TIMESPEC_TO_TIMEVAL(×[1], &sb.st_mtim); - if (futimes(fd, times) < 0) - maybe_warn("couldn't utimes: %s", file); + times[0] = sb.st_atim; + times[1] = sb.st_mtim; + if (futimens(fd, times) < 0) + maybe_warn("couldn't futimens: %s", file); /* only try flags if they exist already */ if (sb.st_flags != 0 && fchflags(fd, sb.st_flags) < 0) diff --git a/usr.bin/kdump/kdump.c b/usr.bin/kdump/kdump.c index b8e5903..fe7bcd2 100644 --- a/usr.bin/kdump/kdump.c +++ b/usr.bin/kdump/kdump.c @@ -1155,7 +1155,7 @@ ktrsyscall(struct ktr_syscall *ktr, u_int flags) print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); - sendfileflagsname(*ip); + sendfileflagsname(*(int *)ip); ip++; narg--; break; diff --git a/usr.bin/timeout/timeout.c b/usr.bin/timeout/timeout.c index 06dc086..a682dde 100644 --- a/usr.bin/timeout/timeout.c +++ b/usr.bin/timeout/timeout.c @@ -172,6 +172,7 @@ main(int argc, char **argv) double second_kill; bool timedout = false; bool do_second_kill = false; + bool child_done = false; struct sigaction signals; struct procctl_reaper_status info; struct procctl_reaper_kill killemall; @@ -187,7 +188,6 @@ main(int argc, char **argv) foreground = preserve = 0; second_kill = 0; - cpid = -1; const struct option longopts[] = { { "preserve-status", no_argument, &preserve, 1 }, @@ -281,20 +281,26 @@ main(int argc, char **argv) if (sig_chld) { sig_chld = 0; - while (((cpid = wait(&status)) < 0) && errno == EINTR) - continue; - if (cpid == pid) { - pstat = status; - if (!foreground) - break; + while ((cpid = waitpid(-1, &status, WNOHANG)) != 0) { + if (cpid < 0) { + if (errno == EINTR) + continue; + else + break; + } else if (cpid == pid) { + pstat = status; + child_done = true; + } } - if (!foreground) { - procctl(P_PID, getpid(), PROC_REAP_STATUS, - &info); - if (info.rs_children == 0) { - cpid = pid; + if (child_done) { + if (foreground) { break; + } else { + procctl(P_PID, getpid(), + PROC_REAP_STATUS, &info); + if (info.rs_children == 0) + break; } } } else if (sig_alrm) { @@ -336,7 +342,7 @@ main(int argc, char **argv) } } - while (cpid != pid && wait(&pstat) == -1) { + while (!child_done && wait(&pstat) == -1) { if (errno != EINTR) err(EX_OSERR, "waitpid()"); } diff --git a/usr.bin/touch/touch.c b/usr.bin/touch/touch.c index c8bfd28..804af7e 100644 --- a/usr.bin/touch/touch.c +++ b/usr.bin/touch/touch.c @@ -76,8 +76,8 @@ main(int argc, char *argv[]) myname = basename(argv[0]); Aflag = aflag = cflag = mflag = timeset = 0; atflag = 0; - if (clock_gettime(CLOCK_REALTIME, &ts[0]) == -1) - err(1, "clock_gettime(CLOCK_REALTIME)"); + ts[0].tv_sec = ts[1].tv_sec = 0; + ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW; while ((ch = getopt(argc, argv, "A:acd:fhmr:t:")) != -1) switch(ch) { @@ -152,6 +152,11 @@ main(int argc, char *argv[]) ts[1] = ts[0]; } + if (!aflag) + ts[0].tv_nsec = UTIME_OMIT; + if (!mflag) + ts[1].tv_nsec = UTIME_OMIT; + if (*argv == NULL) usage(myname); @@ -183,11 +188,6 @@ main(int argc, char *argv[]) continue; } - if (!aflag) - ts[0] = sb.st_atim; - if (!mflag) - ts[1] = sb.st_mtim; - /* * We're adjusting the times based on the file times, not a * specified time (that gets handled above). @@ -203,26 +203,9 @@ main(int argc, char *argv[]) } } - /* Try utimensat(2). */ if (!utimensat(AT_FDCWD, *argv, ts, atflag)) continue; - /* If the user specified a time, nothing else we can do. */ - if (timeset || Aflag) { - rval = 1; - warn("%s", *argv); - continue; - } - - /* - * System V and POSIX 1003.1 require that a NULL argument - * set the access/modification times to the current time. - * The permission checks are different, too, in that the - * ability to write the file is sufficient. Take a shot. - */ - if (!utimensat(AT_FDCWD, *argv, NULL, atflag)) - continue; - rval = 1; warn("%s", *argv); } @@ -238,8 +221,8 @@ stime_arg1(const char *arg, struct timespec *tvp) struct tm *t; int yearset; char *p; - /* Start with the current time. */ - now = tvp[0].tv_sec; + + now = time(NULL); if ((t = localtime(&now)) == NULL) err(1, "localtime"); /* [[CC]YY]MMDDhhmm[.SS] */ @@ -300,8 +283,8 @@ stime_arg2(const char *arg, int year, struct timespec *tvp) { time_t now; struct tm *t; - /* Start with the current time. */ - now = tvp[0].tv_sec; + + now = time(NULL); if ((t = localtime(&now)) == NULL) err(1, "localtime"); diff --git a/usr.sbin/binmiscctl/binmiscctl.8 b/usr.sbin/binmiscctl/binmiscctl.8 index 8eb5552..4bc9986 100644 --- a/usr.sbin/binmiscctl/binmiscctl.8 +++ b/usr.sbin/binmiscctl/binmiscctl.8 @@ -180,6 +180,17 @@ Look up and list the record for the image activator: .Dl # binmiscctl lookup llvmbc .Pp +Add QEMU bsd-user program as an image activator for ARM AARCH64 binaries: +.Bd -literal -offset indent +# binmiscctl add arm64 \e + --interpreter "/usr/local/bin/qemu-aarch64-static" \e + --magic "\ex7f\ex45\ex4c\ex46\ex02\ex01\ex01\ex00\ex00\ex00\e + \ex00\ex00\ex00\ex00\ex00\ex00\ex02\ex00\exb7\ex00" \e + --mask "\exff\exff\exff\exff\exff\exff\exff\ex00\exff\exff\e + \exff\exff\exff\exff\exff\exff\exfe\exff\exff\exff" \e + --size 20 --set-enabled +.Ed +.Pp Add QEMU bsd-user program as an image activator for ARM little-endian binaries: .Bd -literal -offset indent # binmiscctl add armelf \e diff --git a/usr.sbin/binmiscctl/binmiscctl.c b/usr.sbin/binmiscctl/binmiscctl.c index 44122eb..5ab82e4 100644 --- a/usr.sbin/binmiscctl/binmiscctl.c +++ b/usr.sbin/binmiscctl/binmiscctl.c @@ -371,8 +371,10 @@ add_cmd(__unused int argc, char *argv[], ximgact_binmisc_entry_t *xbe) } int -name_cmd(__unused int argc, char *argv[], ximgact_binmisc_entry_t *xbe) +name_cmd(int argc, char *argv[], ximgact_binmisc_entry_t *xbe) { + if (argc == 0) + usage("Required argument missing\n"); if (strlen(argv[0]) > IBE_NAME_MAX) usage("'%s' string length longer than IBE_NAME_MAX (%d)", IBE_NAME_MAX); diff --git a/usr.sbin/devctl/Makefile b/usr.sbin/devctl/Makefile index 5a6e19d..a7deb37 100644 --- a/usr.sbin/devctl/Makefile +++ b/usr.sbin/devctl/Makefile @@ -2,7 +2,6 @@ PROG= devctl MAN= devctl.8 -MAN= LIBADD= devctl diff --git a/usr.sbin/pw/tests/pw_useradd.sh b/usr.sbin/pw/tests/pw_useradd.sh index 2930c41..1934bbc 100755 --- a/usr.sbin/pw/tests/pw_useradd.sh +++ b/usr.sbin/pw/tests/pw_useradd.sh @@ -63,6 +63,108 @@ user_add_comments_invalid_noupdate_body() { atf_check -s exit:1 -o empty grep "^test:.*" $HOME/master.passwd } +# Test add user with alternate homedir +atf_test_case user_add_homedir +user_add_homedir_body() { + populate_etc_skel + + atf_check -s exit:0 ${PW} useradd test -d /foo/bar + atf_check -s exit:0 -o match:"^test:\*:.*::0:0:User &:/foo/bar:.*" \ + ${PW} usershow test +} + +# Test add user with account expiration as an epoch date +atf_test_case user_add_account_expiration_epoch +user_add_account_expiration_epoch_body() { + populate_etc_skel + + DATE=`date -j -v+1d "+%s"` + atf_check -s exit:0 ${PW} useradd test -e ${DATE} + atf_check -s exit:0 -o match:"^test:\*:.*::0:${DATE}:.*" \ + ${PW} usershow test +} + +# Test add user with account expiration as a DD-MM-YYYY date +atf_test_case user_add_account_expiration_date_numeric +user_add_account_expiration_date_numeric_body() { + populate_etc_skel + + DATE=`date -j -v+1d "+%d-%m-%Y"` + EPOCH=`date -j -f "%d-%m-%Y %H:%M:%S" "${DATE} 00:00:00" "+%s"` + atf_check -s exit:0 ${PW} useradd test -e ${DATE} + atf_check -s exit:0 -o match:"^test:\*:.*::0:${EPOCH}:User &:.*" \ + ${PW} usershow test +} + +# Test add user with account expiration as a DD-MM-YYYY date +atf_test_case user_add_account_expiration_date_month +user_add_account_expiration_date_month_body() { + populate_etc_skel + + DATE=`date -j -v+1d "+%d-%b-%Y"` + EPOCH=`date -j -f "%d-%b-%Y %H:%M:%S" "${DATE} 00:00:00" "+%s"` + atf_check -s exit:0 ${PW} useradd test -e ${DATE} + atf_check -s exit:0 -o match:"^test:\*:.*::0:${EPOCH}:User &:.*" \ + ${PW} usershow test +} + +# Test add user with account expiration as a relative date +atf_test_case user_add_account_expiration_date_relative +user_add_account_expiration_date_relative_body() { + populate_etc_skel + + EPOCH=`date -j -v+13m "+%s"` + atf_check -s exit:0 ${PW} useradd test -e +13o + atf_check -s exit:0 -o match:"^test:\*:.*::0:${EPOCH}:User &:.*" \ + ${PW} usershow test +} + +# Test add user with password expiration as an epoch date +atf_test_case user_add_password_expiration_epoch +user_add_password_expiration_epoch_body() { + populate_etc_skel + + DATE=`date -j -v+1d "+%s"` + atf_check -s exit:0 ${PW} useradd test -p ${DATE} + atf_check -s exit:0 -o match:"^test:\*:.*::${DATE}:0:.*" \ + ${PW} usershow test +} + +# Test add user with password expiration as a DD-MM-YYYY date +atf_test_case user_add_password_expiration_date_numeric +user_add_password_expiration_date_numeric_body() { + populate_etc_skel + + DATE=`date -j -v+1d "+%d-%m-%Y"` + EPOCH=`date -j -f "%d-%m-%Y %H:%M:%S" "${DATE} 00:00:00" "+%s"` + atf_check -s exit:0 ${PW} useradd test -p ${DATE} + atf_check -s exit:0 -o match:"^test:\*:.*::${EPOCH}:0:User &:.*" \ + ${PW} usershow test +} + +# Test add user with password expiration as a DD-MMM-YYYY date +atf_test_case user_add_password_expiration_date_month +user_add_password_expiration_date_month_body() { + populate_etc_skel + + DATE=`date -j -v+1d "+%d-%b-%Y"` + EPOCH=`date -j -f "%d-%b-%Y %H:%M:%S" "${DATE} 00:00:00" "+%s"` + atf_check -s exit:0 ${PW} useradd test -p ${DATE} + atf_check -s exit:0 -o match:"^test:\*:.*::${EPOCH}:0:User &:.*" \ + ${PW} usershow test +} + +# Test add user with password expiration as a relative date +atf_test_case user_add_password_expiration_date_relative +user_add_password_expiration_date_relative_body() { + populate_etc_skel + + EPOCH=`date -j -v+13m "+%s"` + atf_check -s exit:0 ${PW} useradd test -p +13o + atf_check -s exit:0 -o match:"^test:\*:.*::${EPOCH}:0:User &:.*" \ + ${PW} usershow test +} + atf_init_test_cases() { atf_add_test_case user_add atf_add_test_case user_add_noupdate @@ -70,4 +172,13 @@ atf_init_test_cases() { atf_add_test_case user_add_comments_noupdate atf_add_test_case user_add_comments_invalid atf_add_test_case user_add_comments_invalid_noupdate + atf_add_test_case user_add_homedir + atf_add_test_case user_add_account_expiration_epoch + atf_add_test_case user_add_account_expiration_date_numeric + atf_add_test_case user_add_account_expiration_date_month + atf_add_test_case user_add_account_expiration_date_relative + atf_add_test_case user_add_password_expiration_epoch + atf_add_test_case user_add_password_expiration_date_numeric + atf_add_test_case user_add_password_expiration_date_month + atf_add_test_case user_add_password_expiration_date_relative } diff --git a/usr.sbin/pw/tests/pw_usernext.sh b/usr.sbin/pw/tests/pw_usernext.sh index 1cdadee..89f938e 100755 --- a/usr.sbin/pw/tests/pw_usernext.sh +++ b/usr.sbin/pw/tests/pw_usernext.sh @@ -8,14 +8,15 @@ atf_test_case usernext usernext_body() { populate_etc_skel - var0=1 - LIMIT=`jot -r 1 2 10` - while [ "$var0" -lt "$LIMIT" ] + CURRENT=`${PW} usernext | sed -e 's/:.*//'` + RANDOM=`jot -r 1 1 150` + MAX=`expr ${CURRENT} + ${RANDOM}` + while [ "${CURRENT}" -lt "${MAX}" ] do - atf_check -s exit:0 ${PW} useradd test$var0 - var0=`expr $var0 + 1` + atf_check -s exit:0 ${PW} useradd test${CURRENT} + CURRENT=`expr ${CURRENT} + 1` done - atf_check -s exit:0 -o match:"100${LIMIT}:100${LIMIT}" \ + atf_check -s exit:0 -o match:"${CURRENT}:${CURRENT}" \ ${PW} usernext } @@ -25,14 +26,16 @@ atf_test_case usernext_assigned_group usernext_assigned_group_body() { populate_etc_skel - var0=1 - LIMIT=`jot -r 1 2 10` - while [ "$var0" -lt "$LIMIT" ] + CURRENT=`${PW} usernext | sed -e 's/:.*//'` + CURRENTGID=`${PW} groupnext` + RANDOM=`jot -r 1 1 150` + MAX=`expr ${CURRENT} + ${RANDOM}` + while [ "${CURRENT}" -lt "${MAX}" ] do - atf_check -s exit:0 ${PW} useradd -n test$var0 -g 0 - var0=`expr $var0 + 1` + atf_check -s exit:0 ${PW} useradd -n test${CURRENT} -g 0 + CURRENT=`expr ${CURRENT} + 1` done - atf_check -s exit:0 -o match:"100${LIMIT}:1001}" \ + atf_check -s exit:0 -o match:"${CURRENT}:${CURRENTGID}" \ ${PW} usernext } |