diff options
author | jilles <jilles@FreeBSD.org> | 2011-05-08 11:32:20 +0000 |
---|---|---|
committer | jilles <jilles@FreeBSD.org> | 2011-05-08 11:32:20 +0000 |
commit | 8ac39aa5be9e327aabd426d6fa925ffc26a8e459 (patch) | |
tree | 25ab00119aa1c5d46402e917d5ea6e05e8cac2a1 /tools | |
parent | fb474a94c3ad352448621bbcc77856aa0219fa92 (diff) | |
download | FreeBSD-src-8ac39aa5be9e327aabd426d6fa925ffc26a8e459.zip FreeBSD-src-8ac39aa5be9e327aabd426d6fa925ffc26a8e459.tar.gz |
sh: Add UTF-8 support to pattern matching.
?, [...] patterns match codepoints instead of bytes. They do not match
invalid sequences. [...] patterns must not contain invalid sequences
otherwise they will not match anything. This is so that ${var#?} removes the
first codepoint, not the first byte, without putting UTF-8 knowledge into
the ${var#pattern} code. However, * continues to match any string and an
invalid sequence matches an identical invalid sequence. (This differs from
fnmatch(3).)
Diffstat (limited to 'tools')
-rw-r--r-- | tools/regression/bin/sh/builtins/case5.0 | 57 | ||||
-rw-r--r-- | tools/regression/bin/sh/expansion/trim8.0 | 75 |
2 files changed, 132 insertions, 0 deletions
diff --git a/tools/regression/bin/sh/builtins/case5.0 b/tools/regression/bin/sh/builtins/case5.0 new file mode 100644 index 0000000..8c6db5a --- /dev/null +++ b/tools/regression/bin/sh/builtins/case5.0 @@ -0,0 +1,57 @@ +# $FreeBSD$ + +unset LC_ALL +LC_CTYPE=en_US.UTF-8 +export LC_CTYPE + +c1=e +# a umlaut +c2=$(printf '\303\244') +# euro sign +c3=$(printf '\342\202\254') +# some sort of 't' outside BMP +c4=$(printf '\360\235\225\245') + +ok=0 +case $c1$c2$c3$c4 in +*) ok=1 ;; +esac +if [ $ok = 0 ]; then + echo wrong at $LINENO + exit 3 +fi + +case $c1$c2$c3$c4 in +$c1$c2$c3$c4) ;; +*) echo wrong at $LINENO ;; +esac + +case $c1$c2$c3$c4 in +"$c1$c2$c3$c4") ;; +*) echo wrong at $LINENO ;; +esac + +case $c1$c2$c3$c4 in +????) ;; +*) echo wrong at $LINENO ;; +esac + +case $c1.$c2.$c3.$c4 in +?.?.?.?) ;; +*) echo wrong at $LINENO ;; +esac + +case $c1$c2$c3$c4 in +[!a][!b][!c][!d]) ;; +*) echo wrong at $LINENO ;; +esac + +case $c1$c2$c3$c4 in +[$c1][$c2][$c3][$c4]) ;; +*) echo wrong at $LINENO ;; +esac + +case $c1$c2$c3$c4 in +["$c1"]["$c2"]["$c3"]["$c4"]) ;; +*) echo wrong at $LINENO ;; +esac diff --git a/tools/regression/bin/sh/expansion/trim8.0 b/tools/regression/bin/sh/expansion/trim8.0 new file mode 100644 index 0000000..f7272f3 --- /dev/null +++ b/tools/regression/bin/sh/expansion/trim8.0 @@ -0,0 +1,75 @@ +# $FreeBSD$ + +unset LC_ALL +LC_CTYPE=en_US.UTF-8 +export LC_CTYPE + +c1=e +# a umlaut +c2=$(printf '\303\244') +# euro sign +c3=$(printf '\342\202\254') +# some sort of 't' outside BMP +c4=$(printf '\360\235\225\245') + +s=$c1$c2$c3$c4 + +testcase() { + code="$1" + expected="$2" + oIFS="$IFS" + eval "$code" + IFS='|' + result="$#|$*" + IFS="$oIFS" + if [ "x$result" = "x$expected" ]; then + ok=x$ok + else + failures=x$failures + echo "For $code, expected $expected actual $result" + fi +} + +testcase 'set -- "$s"' "1|$s" +testcase 'set -- "${s#$c2}"' "1|$s" +testcase 'set -- "${s#*}"' "1|$s" +testcase 'set -- "${s#$c1}"' "1|$c2$c3$c4" +testcase 'set -- "${s#$c1$c2}"' "1|$c3$c4" +testcase 'set -- "${s#$c1$c2$c3}"' "1|$c4" +testcase 'set -- "${s#$c1$c2$c3$c4}"' "1|" +testcase 'set -- "${s#?}"' "1|$c2$c3$c4" +testcase 'set -- "${s#??}"' "1|$c3$c4" +testcase 'set -- "${s#???}"' "1|$c4" +testcase 'set -- "${s#????}"' "1|" +testcase 'set -- "${s#*$c3}"' "1|$c4" +testcase 'set -- "${s%$c4}"' "1|$c1$c2$c3" +testcase 'set -- "${s%$c3$c4}"' "1|$c1$c2" +testcase 'set -- "${s%$c2$c3$c4}"' "1|$c1" +testcase 'set -- "${s%$c1$c2$c3$c4}"' "1|" +testcase 'set -- "${s%?}"' "1|$c1$c2$c3" +testcase 'set -- "${s%??}"' "1|$c1$c2" +testcase 'set -- "${s%???}"' "1|$c1" +testcase 'set -- "${s%????}"' "1|" +testcase 'set -- "${s%$c2*}"' "1|$c1" +testcase 'set -- "${s##$c2}"' "1|$s" +testcase 'set -- "${s##*}"' "1|" +testcase 'set -- "${s##$c1}"' "1|$c2$c3$c4" +testcase 'set -- "${s##$c1$c2}"' "1|$c3$c4" +testcase 'set -- "${s##$c1$c2$c3}"' "1|$c4" +testcase 'set -- "${s##$c1$c2$c3$c4}"' "1|" +testcase 'set -- "${s##?}"' "1|$c2$c3$c4" +testcase 'set -- "${s##??}"' "1|$c3$c4" +testcase 'set -- "${s##???}"' "1|$c4" +testcase 'set -- "${s##????}"' "1|" +testcase 'set -- "${s##*$c3}"' "1|$c4" +testcase 'set -- "${s%%$c4}"' "1|$c1$c2$c3" +testcase 'set -- "${s%%$c3$c4}"' "1|$c1$c2" +testcase 'set -- "${s%%$c2$c3$c4}"' "1|$c1" +testcase 'set -- "${s%%$c1$c2$c3$c4}"' "1|" +testcase 'set -- "${s%%?}"' "1|$c1$c2$c3" +testcase 'set -- "${s%%??}"' "1|$c1$c2" +testcase 'set -- "${s%%???}"' "1|$c1" +testcase 'set -- "${s%%????}"' "1|" +testcase 'set -- "${s%%$c2*}"' "1|$c1" + +test "x$failures" = x |