summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorjilles <jilles@FreeBSD.org>2011-05-08 11:32:20 +0000
committerjilles <jilles@FreeBSD.org>2011-05-08 11:32:20 +0000
commit8ac39aa5be9e327aabd426d6fa925ffc26a8e459 (patch)
tree25ab00119aa1c5d46402e917d5ea6e05e8cac2a1 /tools
parentfb474a94c3ad352448621bbcc77856aa0219fa92 (diff)
downloadFreeBSD-src-8ac39aa5be9e327aabd426d6fa925ffc26a8e459.zip
FreeBSD-src-8ac39aa5be9e327aabd426d6fa925ffc26a8e459.tar.gz
sh: Add UTF-8 support to pattern matching.
?, [...] patterns match codepoints instead of bytes. They do not match invalid sequences. [...] patterns must not contain invalid sequences otherwise they will not match anything. This is so that ${var#?} removes the first codepoint, not the first byte, without putting UTF-8 knowledge into the ${var#pattern} code. However, * continues to match any string and an invalid sequence matches an identical invalid sequence. (This differs from fnmatch(3).)
Diffstat (limited to 'tools')
-rw-r--r--tools/regression/bin/sh/builtins/case5.057
-rw-r--r--tools/regression/bin/sh/expansion/trim8.075
2 files changed, 132 insertions, 0 deletions
diff --git a/tools/regression/bin/sh/builtins/case5.0 b/tools/regression/bin/sh/builtins/case5.0
new file mode 100644
index 0000000..8c6db5a
--- /dev/null
+++ b/tools/regression/bin/sh/builtins/case5.0
@@ -0,0 +1,57 @@
+# $FreeBSD$
+
+unset LC_ALL
+LC_CTYPE=en_US.UTF-8
+export LC_CTYPE
+
+c1=e
+# a umlaut
+c2=$(printf '\303\244')
+# euro sign
+c3=$(printf '\342\202\254')
+# some sort of 't' outside BMP
+c4=$(printf '\360\235\225\245')
+
+ok=0
+case $c1$c2$c3$c4 in
+*) ok=1 ;;
+esac
+if [ $ok = 0 ]; then
+ echo wrong at $LINENO
+ exit 3
+fi
+
+case $c1$c2$c3$c4 in
+$c1$c2$c3$c4) ;;
+*) echo wrong at $LINENO ;;
+esac
+
+case $c1$c2$c3$c4 in
+"$c1$c2$c3$c4") ;;
+*) echo wrong at $LINENO ;;
+esac
+
+case $c1$c2$c3$c4 in
+????) ;;
+*) echo wrong at $LINENO ;;
+esac
+
+case $c1.$c2.$c3.$c4 in
+?.?.?.?) ;;
+*) echo wrong at $LINENO ;;
+esac
+
+case $c1$c2$c3$c4 in
+[!a][!b][!c][!d]) ;;
+*) echo wrong at $LINENO ;;
+esac
+
+case $c1$c2$c3$c4 in
+[$c1][$c2][$c3][$c4]) ;;
+*) echo wrong at $LINENO ;;
+esac
+
+case $c1$c2$c3$c4 in
+["$c1"]["$c2"]["$c3"]["$c4"]) ;;
+*) echo wrong at $LINENO ;;
+esac
diff --git a/tools/regression/bin/sh/expansion/trim8.0 b/tools/regression/bin/sh/expansion/trim8.0
new file mode 100644
index 0000000..f7272f3
--- /dev/null
+++ b/tools/regression/bin/sh/expansion/trim8.0
@@ -0,0 +1,75 @@
+# $FreeBSD$
+
+unset LC_ALL
+LC_CTYPE=en_US.UTF-8
+export LC_CTYPE
+
+c1=e
+# a umlaut
+c2=$(printf '\303\244')
+# euro sign
+c3=$(printf '\342\202\254')
+# some sort of 't' outside BMP
+c4=$(printf '\360\235\225\245')
+
+s=$c1$c2$c3$c4
+
+testcase() {
+ code="$1"
+ expected="$2"
+ oIFS="$IFS"
+ eval "$code"
+ IFS='|'
+ result="$#|$*"
+ IFS="$oIFS"
+ if [ "x$result" = "x$expected" ]; then
+ ok=x$ok
+ else
+ failures=x$failures
+ echo "For $code, expected $expected actual $result"
+ fi
+}
+
+testcase 'set -- "$s"' "1|$s"
+testcase 'set -- "${s#$c2}"' "1|$s"
+testcase 'set -- "${s#*}"' "1|$s"
+testcase 'set -- "${s#$c1}"' "1|$c2$c3$c4"
+testcase 'set -- "${s#$c1$c2}"' "1|$c3$c4"
+testcase 'set -- "${s#$c1$c2$c3}"' "1|$c4"
+testcase 'set -- "${s#$c1$c2$c3$c4}"' "1|"
+testcase 'set -- "${s#?}"' "1|$c2$c3$c4"
+testcase 'set -- "${s#??}"' "1|$c3$c4"
+testcase 'set -- "${s#???}"' "1|$c4"
+testcase 'set -- "${s#????}"' "1|"
+testcase 'set -- "${s#*$c3}"' "1|$c4"
+testcase 'set -- "${s%$c4}"' "1|$c1$c2$c3"
+testcase 'set -- "${s%$c3$c4}"' "1|$c1$c2"
+testcase 'set -- "${s%$c2$c3$c4}"' "1|$c1"
+testcase 'set -- "${s%$c1$c2$c3$c4}"' "1|"
+testcase 'set -- "${s%?}"' "1|$c1$c2$c3"
+testcase 'set -- "${s%??}"' "1|$c1$c2"
+testcase 'set -- "${s%???}"' "1|$c1"
+testcase 'set -- "${s%????}"' "1|"
+testcase 'set -- "${s%$c2*}"' "1|$c1"
+testcase 'set -- "${s##$c2}"' "1|$s"
+testcase 'set -- "${s##*}"' "1|"
+testcase 'set -- "${s##$c1}"' "1|$c2$c3$c4"
+testcase 'set -- "${s##$c1$c2}"' "1|$c3$c4"
+testcase 'set -- "${s##$c1$c2$c3}"' "1|$c4"
+testcase 'set -- "${s##$c1$c2$c3$c4}"' "1|"
+testcase 'set -- "${s##?}"' "1|$c2$c3$c4"
+testcase 'set -- "${s##??}"' "1|$c3$c4"
+testcase 'set -- "${s##???}"' "1|$c4"
+testcase 'set -- "${s##????}"' "1|"
+testcase 'set -- "${s##*$c3}"' "1|$c4"
+testcase 'set -- "${s%%$c4}"' "1|$c1$c2$c3"
+testcase 'set -- "${s%%$c3$c4}"' "1|$c1$c2"
+testcase 'set -- "${s%%$c2$c3$c4}"' "1|$c1"
+testcase 'set -- "${s%%$c1$c2$c3$c4}"' "1|"
+testcase 'set -- "${s%%?}"' "1|$c1$c2$c3"
+testcase 'set -- "${s%%??}"' "1|$c1$c2"
+testcase 'set -- "${s%%???}"' "1|$c1"
+testcase 'set -- "${s%%????}"' "1|"
+testcase 'set -- "${s%%$c2*}"' "1|$c1"
+
+test "x$failures" = x
OpenPOWER on IntegriCloud