diff options
author | ache <ache@FreeBSD.org> | 1998-02-22 12:05:42 +0000 |
---|---|---|
committer | ache <ache@FreeBSD.org> | 1998-02-22 12:05:42 +0000 |
commit | d651de4e235915b1d935a41f99a513c4625443ff (patch) | |
tree | a5b0182738bacc7dbbdbb3fea50dceea2acdb8c7 /sys/fs/msdosfs/msdosfs_conv.c | |
parent | 59689f1f05493a8789b2a31a81091f52611d1879 (diff) | |
download | FreeBSD-src-d651de4e235915b1d935a41f99a513c4625443ff.zip FreeBSD-src-d651de4e235915b1d935a41f99a513c4625443ff.tar.gz |
Add initial support to map 0x4XX Unicode Cyrillic range names:
only win->unix part is implemented at this time with 256-byte
table defaulted to KOI8-R (will be loadable in future).
Since back mapping not supported yet, you'll get "No such file or directory"
on each Cyrillic name with 'ls -l', only 'echo *' work at this moment.
Teach current code to understand Unicode a bit.
Diffstat (limited to 'sys/fs/msdosfs/msdosfs_conv.c')
-rw-r--r-- | sys/fs/msdosfs/msdosfs_conv.c | 321 |
1 files changed, 308 insertions, 13 deletions
diff --git a/sys/fs/msdosfs/msdosfs_conv.c b/sys/fs/msdosfs/msdosfs_conv.c index 727cacd..3ed3cb9 100644 --- a/sys/fs/msdosfs/msdosfs_conv.c +++ b/sys/fs/msdosfs/msdosfs_conv.c @@ -1,4 +1,4 @@ -/* $Id: msdosfs_conv.c,v 1.14 1998/02/09 06:09:50 eivind Exp $ */ +/* $Id: msdosfs_conv.c,v 1.15 1998/02/18 09:28:31 jkh Exp $ */ /* $NetBSD: msdosfs_conv.c,v 1.25 1997/11/17 15:36:40 ws Exp $ */ /*- @@ -336,6 +336,270 @@ u2l[256] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* f8-ff */ }; +/* UNICODE Cyrillic to local code table conversion */ +/* will be loadable in future */ + +static u_char +cyr2u[256] = { /* defaulted to KOI8-R */ +0x00, /* */ +0xb3, /* CYRILLIC CAPITAL LETTER IO */ +0x00, /* CYRILLIC CAPITAL LETTER DJE */ +0x00, /* CYRILLIC CAPITAL LETTER GJE */ +0x00, /* CYRILLIC CAPITAL LETTER UKRAINIAN IE */ +0x00, /* CYRILLIC CAPITAL LETTER DZE */ +0x00, /* CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I */ +0x00, /* CYRILLIC CAPITAL LETTER YI */ +0x00, /* CYRILLIC CAPITAL LETTER JE */ +0x00, /* CYRILLIC CAPITAL LETTER LJE */ +0x00, /* CYRILLIC CAPITAL LETTER NJE */ +0x00, /* CYRILLIC CAPITAL LETTER TSHE */ +0x00, /* CYRILLIC CAPITAL LETTER KJE */ +0x00, /* */ +0x00, /* CYRILLIC CAPITAL LETTER SHORT U */ +0x00, /* CYRILLIC CAPITAL LETTER DZHE */ +0xe1, /* CYRILLIC CAPITAL LETTER A */ +0xe2, /* CYRILLIC CAPITAL LETTER BE */ +0xf7, /* CYRILLIC CAPITAL LETTER VE */ +0xe7, /* CYRILLIC CAPITAL LETTER GHE */ +0xe4, /* CYRILLIC CAPITAL LETTER DE */ +0xe5, /* CYRILLIC CAPITAL LETTER IE */ +0xf6, /* CYRILLIC CAPITAL LETTER ZHE */ +0xfa, /* CYRILLIC CAPITAL LETTER ZE */ +0xe9, /* CYRILLIC CAPITAL LETTER I */ +0xea, /* CYRILLIC CAPITAL LETTER SHORT I */ +0xeb, /* CYRILLIC CAPITAL LETTER KA */ +0xe4, /* CYRILLIC CAPITAL LETTER EL */ +0xed, /* CYRILLIC CAPITAL LETTER EM */ +0xee, /* CYRILLIC CAPITAL LETTER EN */ +0xef, /* CYRILLIC CAPITAL LETTER O */ +0xf0, /* CYRILLIC CAPITAL LETTER PE */ +0xf2, /* CYRILLIC CAPITAL LETTER ER */ +0xf3, /* CYRILLIC CAPITAL LETTER ES */ +0xf4, /* CYRILLIC CAPITAL LETTER TE */ +0xf5, /* CYRILLIC CAPITAL LETTER U */ +0xe6, /* CYRILLIC CAPITAL LETTER EF */ +0xe8, /* CYRILLIC CAPITAL LETTER HA */ +0xe3, /* CYRILLIC CAPITAL LETTER TSE */ +0xfe, /* CYRILLIC CAPITAL LETTER CHE */ +0xfb, /* CYRILLIC CAPITAL LETTER SHA */ +0xfd, /* CYRILLIC CAPITAL LETTER SHCHA */ +0xff, /* CYRILLIC CAPITAL LETTER HARD SIGN */ +0xf9, /* CYRILLIC CAPITAL LETTER YERU */ +0xf8, /* CYRILLIC CAPITAL LETTER SOFT SIGN */ +0xfc, /* CYRILLIC CAPITAL LETTER E */ +0xe0, /* CYRILLIC CAPITAL LETTER YU */ +0xf1, /* CYRILLIC CAPITAL LETTER YA */ +0xc1, /* CYRILLIC SMALL LETTER A */ +0xc2, /* CYRILLIC SMALL LETTER BE */ +0xd7, /* CYRILLIC SMALL LETTER VE */ +0xc7, /* CYRILLIC SMALL LETTER GHE */ +0xc4, /* CYRILLIC SMALL LETTER DE */ +0xc5, /* CYRILLIC SMALL LETTER IE */ +0xd6, /* CYRILLIC SMALL LETTER ZHE */ +0xda, /* CYRILLIC SMALL LETTER ZE */ +0xc9, /* CYRILLIC SMALL LETTER I */ +0xca, /* CYRILLIC SMALL LETTER SHORT I */ +0xcb, /* CYRILLIC SMALL LETTER KA */ +0xcc, /* CYRILLIC SMALL LETTER EL */ +0xcd, /* CYRILLIC SMALL LETTER EM */ +0xce, /* CYRILLIC SMALL LETTER EN */ +0xcf, /* CYRILLIC SMALL LETTER O */ +0xd0, /* CYRILLIC SMALL LETTER PE */ +0xd2, /* CYRILLIC SMALL LETTER ER */ +0xd3, /* CYRILLIC SMALL LETTER ES */ +0xd4, /* CYRILLIC SMALL LETTER TE */ +0xd5, /* CYRILLIC SMALL LETTER U */ +0xc6, /* CYRILLIC SMALL LETTER EF */ +0xc8, /* CYRILLIC SMALL LETTER HA */ +0xc3, /* CYRILLIC SMALL LETTER TSE */ +0xde, /* CYRILLIC SMALL LETTER CHE */ +0xdb, /* CYRILLIC SMALL LETTER SHA */ +0xdd, /* CYRILLIC SMALL LETTER SHCHA */ +0xdf, /* CYRILLIC SMALL LETTER HARD SIGN */ +0xd9, /* CYRILLIC SMALL LETTER YERU */ +0xd8, /* CYRILLIC SMALL LETTER SOFT SIGN */ +0xdc, /* CYRILLIC SMALL LETTER E */ +0xc0, /* CYRILLIC SMALL LETTER YU */ +0xd1, /* CYRILLIC SMALL LETTER YA */ +0x00, /* */ +0xa3, /* CYRILLIC SMALL LETTER IO */ +0x00, /* CYRILLIC SMALL LETTER DJE */ +0x00, /* CYRILLIC SMALL LETTER GJE */ +0x00, /* CYRILLIC SMALL LETTER UKRAINIAN IE */ +0x00, /* CYRILLIC SMALL LETTER DZE */ +0x00, /* CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I */ +0x00, /* CYRILLIC SMALL LETTER YI */ +0x00, /* CYRILLIC SMALL LETTER JE */ +0x00, /* CYRILLIC SMALL LETTER LJE */ +0x00, /* CYRILLIC SMALL LETTER NJE */ +0x00, /* CYRILLIC SMALL LETTER TSHE */ +0x00, /* CYRILLIC SMALL LETTER KJE */ +0x00, /* */ +0x00, /* CYRILLIC SMALL LETTER SHORT U */ +0x00, /* CYRILLIC SMALL LETTER DZHE */ +0x00, /* CYRILLIC CAPITAL LETTER OMEGA */ +0x00, /* CYRILLIC SMALL LETTER OMEGA */ +0x00, /* CYRILLIC CAPITAL LETTER YAT */ +0x00, /* CYRILLIC SMALL LETTER YAT */ +0x00, /* CYRILLIC CAPITAL LETTER IOTIFIED E */ +0x00, /* CYRILLIC SMALL LETTER IOTIFIED E */ +0x00, /* CYRILLIC CAPITAL LETTER LITTLE YUS */ +0x00, /* CYRILLIC SMALL LETTER LITTLE YUS */ +0x00, /* CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS */ +0x00, /* CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS */ +0x00, /* CYRILLIC CAPITAL LETTER BIG YUS */ +0x00, /* CYRILLIC SMALL LETTER BIG YUS */ +0x00, /* CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS */ +0x00, /* CYRILLIC SMALL LETTER IOTIFIED BIG YUS */ +0x00, /* CYRILLIC CAPITAL LETTER KSI */ +0x00, /* CYRILLIC SMALL LETTER KSI */ +0x00, /* CYRILLIC CAPITAL LETTER PSI */ +0x00, /* CYRILLIC SMALL LETTER PSI */ +0x00, /* CYRILLIC CAPITAL LETTER FITA */ +0x00, /* CYRILLIC SMALL LETTER FITA */ +0x00, /* CYRILLIC CAPITAL LETTER IZHITSA */ +0x00, /* CYRILLIC SMALL LETTER IZHITSA */ +0x00, /* CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT */ +0x00, /* CYRILLIC SMALL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT */ +0x00, /* CYRILLIC CAPITAL LETTER UK */ +0x00, /* CYRILLIC SMALL LETTER UK */ +0x00, /* CYRILLIC CAPITAL LETTER ROUND OMEGA */ +0x00, /* CYRILLIC SMALL LETTER ROUND OMEGA */ +0x00, /* CYRILLIC CAPITAL LETTER OMEGA WITH TITLO */ +0x00, /* CYRILLIC SMALL LETTER OMEGA WITH TITLO */ +0x00, /* CYRILLIC CAPITAL LETTER OT */ +0x00, /* CYRILLIC SMALL LETTER OT */ +0x00, /* CYRILLIC CAPITAL LETTER KOPPA */ +0x00, /* CYRILLIC SMALL LETTER KOPPA */ +0x00, /* CYRILLIC THOUSANDS SIGN */ +0x00, /* */ +0x00, /* */ +0x00, /* */ +0x00, /* */ +0x00, /* */ +0x00, /* */ +0x00, /* */ +0x00, /* */ +0x00, /* */ +0x00, /* */ +0x00, /* */ +0x00, /* */ +0x00, /* */ +0x00, /* CYRILLIC CAPITAL LETTER GHE WITH UPTURN */ +0x00, /* CYRILLIC SMALL LETTER GHE WITH UPTURN */ +0x00, /* CYRILLIC CAPITAL LETTER GHE WITH STROKE */ +0x00, /* CYRILLIC SMALL LETTER GHE WITH STROKE */ +0x00, /* CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK */ +0x00, /* CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK */ +0x00, /* CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER */ +0x00, /* CYRILLIC SMALL LETTER ZHE WITH DESCENDER */ +0x00, /* CYRILLIC CAPITAL LETTER ZE WITH DESCENDER */ +0x00, /* CYRILLIC SMALL LETTER ZE WITH DESCENDER */ +0x00, /* CYRILLIC CAPITAL LETTER KA WITH DESCENDER */ +0x00, /* CYRILLIC SMALL LETTER KA WITH DESCENDER */ +0x00, /* CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE */ +0x00, /* CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE */ +0x00, /* CYRILLIC CAPITAL LETTER KA WITH STROKE */ +0x00, /* CYRILLIC SMALL LETTER KA WITH STROKE */ +0x00, /* CYRILLIC CAPITAL LETTER BASHKIR KA */ +0x00, /* CYRILLIC SMALL LETTER BASHKIR KA */ +0x00, /* CYRILLIC CAPITAL LETTER EN WITH DESCENDER */ +0x00, /* CYRILLIC SMALL LETTER EN WITH DESCENDER */ +0x00, /* CYRILLIC CAPITAL LIGATURE EN GHE */ +0x00, /* CYRILLIC SMALL LIGATURE EN GHE */ +0x00, /* CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK */ +0x00, /* CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK */ +0x00, /* CYRILLIC CAPITAL LETTER ABKHASIAN HA */ +0x00, /* CYRILLIC SMALL LETTER ABKHASIAN HA */ +0x00, /* CYRILLIC CAPITAL LETTER ES WITH DESCENDER */ +0x00, /* CYRILLIC SMALL LETTER ES WITH DESCENDER */ +0x00, /* CYRILLIC CAPITAL LETTER TE WITH DESCENDER */ +0x00, /* CYRILLIC SMALL LETTER TE WITH DESCENDER */ +0x00, /* CYRILLIC CAPITAL LETTER STRAIGHT U */ +0x00, /* CYRILLIC SMALL LETTER STRAIGHT U */ +0x00, /* CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE */ +0x00, /* CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE */ +0x00, /* CYRILLIC CAPITAL LETTER HA WITH DESCENDER */ +0x00, /* CYRILLIC SMALL LETTER HA WITH DESCENDER */ +0x00, /* CYRILLIC CAPITAL LIGATURE TE TSE */ +0x00, /* CYRILLIC SMALL LIGATURE TE TSE */ +0x00, /* CYRILLIC CAPITAL LETTER CHE WITH DESCENDER */ +0x00, /* CYRILLIC SMALL LETTER CHE WITH DESCENDER */ +0x00, /* CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE */ +0x00, /* CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE */ +0x00, /* CYRILLIC CAPITAL LETTER SHHA */ +0x00, /* CYRILLIC SMALL LETTER SHHA */ +0x00, /* CYRILLIC CAPITAL LETTER ABKHASIAN CHE */ +0x00, /* CYRILLIC SMALL LETTER ABKHASIAN CHE */ +0x00, /* CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER */ +0x00, /* CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER */ +0x00, /* CYRILLIC LETTER PALOCHKA */ +0x00, /* CYRILLIC CAPITAL LETTER ZHE WITH BREVE */ +0x00, /* CYRILLIC SMALL LETTER ZHE WITH BREVE */ +0x00, /* CYRILLIC CAPITAL LETTER KA WITH HOOK */ +0x00, /* CYRILLIC SMALL LETTER KA WITH HOOK */ +0x00, /* */ +0x00, /* */ +0x00, /* CYRILLIC CAPITAL LETTER EN WITH HOOK */ +0x00, /* CYRILLIC SMALL LETTER EN WITH HOOK */ +0x00, /* */ +0x00, /* */ +0x00, /* CYRILLIC CAPITAL LETTER KHAKASSIAN CHE */ +0x00, /* CYRILLIC SMALL LETTER KHAKASSIAN CHE */ +0x00, /* */ +0x00, /* */ +0x00, /* */ +0x00, /* CYRILLIC CAPITAL LETTER A WITH BREVE */ +0x00, /* CYRILLIC SMALL LETTER A WITH BREVE */ +0x00, /* CYRILLIC CAPITAL LETTER A WITH DIAERESIS */ +0x00, /* CYRILLIC SMALL LETTER A WITH DIAERESIS */ +0x00, /* CYRILLIC CAPITAL LIGATURE A IE */ +0x00, /* CYRILLIC SMALL LIGATURE A IE */ +0x00, /* CYRILLIC CAPITAL LETTER IE WITH BREVE */ +0x00, /* CYRILLIC SMALL LETTER IE WITH BREVE */ +0x00, /* CYRILLIC CAPITAL LETTER SCHWA */ +0x00, /* CYRILLIC SMALL LETTER SCHWA */ +0x00, /* CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS */ +0x00, /* CYRILLIC SMALL LETTER SCHWA WITH DIAERESIS */ +0x00, /* CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS */ +0x00, /* CYRILLIC SMALL LETTER ZHE WITH DIAERESIS */ +0x00, /* CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS */ +0x00, /* CYRILLIC SMALL LETTER ZE WITH DIAERESIS */ +0x00, /* CYRILLIC CAPITAL LETTER ABKHASIAN DZE */ +0x00, /* CYRILLIC SMALL LETTER ABKHASIAN DZE */ +0x00, /* CYRILLIC CAPITAL LETTER I WITH MACRON */ +0x00, /* CYRILLIC SMALL LETTER I WITH MACRON */ +0x00, /* CYRILLIC CAPITAL LETTER I WITH DIAERESIS */ +0x00, /* CYRILLIC SMALL LETTER I WITH DIAERESIS */ +0x00, /* CYRILLIC CAPITAL LETTER O WITH DIAERESIS */ +0x00, /* CYRILLIC SMALL LETTER O WITH DIAERESIS */ +0x00, /* CYRILLIC CAPITAL LETTER BARRED O */ +0x00, /* CYRILLIC SMALL LETTER BARRED O */ +0x00, /* CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS */ +0x00, /* CYRILLIC SMALL LETTER BARRED O WITH DIAERESIS */ +0x00, /* */ +0x00, /* */ +0x00, /* CYRILLIC CAPITAL LETTER U WITH MACRON */ +0x00, /* CYRILLIC SMALL LETTER U WITH MACRON */ +0x00, /* CYRILLIC CAPITAL LETTER U WITH DIAERESIS */ +0x00, /* CYRILLIC SMALL LETTER U WITH DIAERESIS */ +0x00, /* CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE */ +0x00, /* CYRILLIC SMALL LETTER U WITH DOUBLE ACUTE */ +0x00, /* CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS */ +0x00, /* CYRILLIC SMALL LETTER CHE WITH DIAERESIS */ +0x00, /* */ +0x00, /* */ +0x00, /* CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS */ +0x00, /* CYRILLIC SMALL LETTER YERU WITH DIAERESIS */ +0x00, /* */ +0x00, /* */ +0x00, /* */ +0x00, /* */ +0x00, /* */ +0x00 /* */ +}; + + /* * DOS filenames are made of 2 parts, the name part and the extension part. * The name part is 8 characters long and the extension part is 3 @@ -701,6 +965,7 @@ win2unixfn(wep, dp, chksum) { u_int8_t *cp; u_int8_t *np, *ep = dp->d_name + WIN_MAXLEN; + u_int16_t code; int i; if ((wep->weCnt&WIN_CNT) > howmany(WIN_MAXLEN, WIN_CHARS) @@ -731,14 +996,25 @@ win2unixfn(wep, dp, chksum) * Convert the name parts */ for (cp = wep->wePart1, i = sizeof(wep->wePart1)/2; --i >= 0;) { - switch (*np++ = *cp++) { + code = (cp[1] << 8) | cp[0]; + switch (code) { case 0: + *np = '\0'; dp->d_namlen -= sizeof(wep->wePart2)/2 + sizeof(wep->wePart3)/2 + i + 1; return chksum; case '/': - np[-1] = 0; + *np = '\0'; return -1; + default: + if (code & 0xff00) { + if ((code &~ 0xff) == 0x400) + code = cyr2u[code & 0xff]; + else + return -1; + } + *np++ = code; + break; } /* * The size comparison should result in the compiler @@ -749,17 +1025,27 @@ win2unixfn(wep, dp, chksum) np[-1] = 0; return -1; } - if (*cp++) - return -1; + cp += 2; } for (cp = wep->wePart2, i = sizeof(wep->wePart2)/2; --i >= 0;) { - switch (*np++ = *cp++) { + code = (cp[1] << 8) | cp[0]; + switch (code) { case 0: + *np = '\0'; dp->d_namlen -= sizeof(wep->wePart3)/2 + i + 1; return chksum; case '/': - np[-1] = 0; + *np = '\0'; return -1; + default: + if (code & 0xff00) { + if ((code &~ 0xff) == 0x400) + code = cyr2u[code & 0xff]; + else + return -1; + } + *np++ = code; + break; } /* * The size comparisons should be optimized away @@ -770,17 +1056,27 @@ win2unixfn(wep, dp, chksum) np[-1] = 0; return -1; } - if (*cp++) - return -1; + cp += 2; } for (cp = wep->wePart3, i = sizeof(wep->wePart3)/2; --i >= 0;) { - switch (*np++ = *cp++) { + code = (cp[1] << 8) | cp[0]; + switch (code) { case 0: + *np = '\0'; dp->d_namlen -= i + 1; return chksum; case '/': - np[-1] = 0; + *np = '\0'; return -1; + default: + if (code & 0xff00) { + if ((code &~ 0xff) == 0x400) + code = cyr2u[code & 0xff]; + else + return -1; + } + *np++ = code; + break; } /* * See above @@ -790,8 +1086,7 @@ win2unixfn(wep, dp, chksum) np[-1] = 0; return -1; } - if (*cp++) - return -1; + cp += 2; } return chksum; } |