diff options
Diffstat (limited to 'share/i18n/csmapper/APPLE')
44 files changed, 12651 insertions, 0 deletions
diff --git a/share/i18n/csmapper/APPLE/ARABIC%UCS.src b/share/i18n/csmapper/APPLE/ARABIC%UCS.src new file mode 100644 index 0000000..6a7d119 --- /dev/null +++ b/share/i18n/csmapper/APPLE/ARABIC%UCS.src @@ -0,0 +1,451 @@ +# $FreeBSD$ + +TYPE ROWCOL +NAME ARABIC/UCS +SRC_ZONE 0x00-0xFF +OOB_MODE ILSEQ +DST_ILSEQ 0xFFFE +DST_UNIT_BITS 16 + +BEGIN_MAP +#======================================================================= +# File name: ARABIC.TXT +# +# Contents: Map (external version) from Mac OS Arabic +# character set to Unicode 2.1 and later. +# +# Copyright: (c) 1994-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-04 Update header comments. Matches internal xml +# <c1.2> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Add comments about character display and +# direction overrides. Update URLs, notes. +# Matches internal utom<b4>. +# b02 1999-Sep-22 Update contact e-mail address. Matches +# internal utom<b1>, ufrm<b1>, and Text +# Encoding Converter version 1.5. +# n10 1998-Feb-05 Show required Unicode character +# directionality in a different way. Matches +# internal utom<n4>, ufrm<n21>, and Text +# Encoding Converter version 1.3. Update +# header comments; include information on +# loose mapping of digits. +# n07 1997-Jul-17 Update to match internal utom<n2>, ufrm<n17>: +# Change standard mapping for 0xC0 from U+066D +# to U+274A. Add direction overrides to +# mappings for 0x25, 0x2C, 0x3B, 0x3F. Add +# information on variants. +# n03 1995-Apr-18 First version (after fixing some typos). +# Matches internal ufrm<n11>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Arabic code (in hex as 0xNN). +# Column #2 is the corresponding Unicode (in hex as 0xNNNN), +# possibly preceded by a tag indicating required directionality +# (i.e. <LR>+0xNNNN or <RL>+0xNNNN). +# Column #3 is a comment containing the Unicode name. +# +# The entries are in Mac OS Arabic code order. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Arabic character set uses the standard control characters at +# 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Arabic: +# ----------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# 1. General +# +# The Mac OS Arabic character set is intended to cover Arabic as +# used in North Africa, the Arabian peninsula, and the Levant. It +# also contains several characters needed for Urdu and/or Farsi. +# +# The Mac OS Arabic character set is essentially a superset of ISO +# 8859-6. The 8859-6 code points that are interpreted differently +# in the Mac OS Arabic set are as follows: +# 0xA0 is NO-BREAK SPACE in 8859-6 and right-left SPACE in Mac OS +# Arabic; NO-BREAK is 0x81 in Mac OS Arabic. +# 0xA4 is CURRENCY SIGN in 8859-6 and right-left DOLLAR SIGN in +# Mac OS Arabic. +# 0xAD is SOFT HYPHEN in 8859-6 and right-left HYPHEN-MINUS in +# Mac OS Arabic. +# ISO 8859-6 specifies that codes 0x30-0x39 can be rendered either +# with European digit shapes or Arabic digit shapes. This is also +# true in Mac OS Arabic, which determines from context which digit +# shapes to use (see below). +# +# The Mac OS Arabic character set uses the C1 controls area and other +# code points which are undefined in ISO 8859-6 for additional +# graphic characters: additional Arabic letters for Farsi and Urdu, +# some accented Roman letters for European languages (such as French), +# and duplicates of some of the punctuation, symbols, and digits in +# the ASCII block. The duplicate punctuation, symbol, and digit +# characters have right-left directionality, while the ASCII versions +# have left-right directionality. See the next section for more +# information on this. +# +# Mac OS Arabic characters 0xEB-0xF2 are non-spacing/combining marks. +# +# 2. Directional characters and roundtrip fidelity +# +# The Mac OS Arabic character set was developed in 1986-1987. At that +# time the bidirectional line layout algorithm used in the Mac OS +# Arabic system was fairly simple; it used only a few direction +# classes (instead of the 19 now used in the Unicode bidirectional +# algorithm). In order to permit users to handle some tricky layout +# problems, certain punctuation and symbol characters were encoded +# twice, one with a left-right direction attribute and the other with +# a right-left direction attribute. +# +# For example, plus sign is encoded at 0x2B with a left-right +# attribute, and at 0xAB with a right-left attribute. However, there +# is only one PLUS SIGN character in Unicode. This leads to some +# interesting problems when mapping between Mac OS Arabic and Unicode; +# see below. +# +# A related problem is that even when a particular character is +# encoded only once in Mac OS Arabic, it may have a different +# direction attribute than the corresponding Unicode character. +# +# For example, the Mac OS Arabic character at 0x93 is HORIZONTAL +# ELLIPSIS with strong right-left direction. However, the Unicode +# character HORIZONTAL ELLIPSIS has direction class neutral. +# +# 3. Behavior of ASCII-range numbers in WorldScript +# +# Mac OS Arabic also has two sets of digit codes. +# +# The digits at 0x30-0x39 may be displayed using either European +# digit forms or Arabic digit forms, depending on context. If there +# is a "strong European" character such as a Latin letter on either +# side of a sequence consisting of digits 0x30-0x39 and possibly comma +# 0x2C or period 0x2E, then the characters will be displayed using +# European forms (This will happen even if there are neutral characters +# between the digits and the strong European character). Otherwise, the +# digits will be displayed using Arabic forms, the comma will be +# displayed as Arabic thousands separator, and the period as Arabic +# decimal separator. In any case, 0x2C, 0x2E, and 0x30-0x39 are always +# left-right. +# +# The digits at 0xB0-0xB9 are always displayed using Arabic digit +# shapes, and moreover, these digits always have strong right-left +# directionality. These are mainly intended for special layout +# purposes such as part numbers, etc. +# +# 4. Font variants +# +# The table in this file gives the Unicode mappings for the standard +# Mac OS Arabic encoding. This encoding is supported by the Cairo font +# (the system font for Arabic), and is the encoding supported by the +# text processing utilities. However, the other Arabic fonts actually +# implement slightly different encodings; this mainly affects the code +# points 0xAA and 0xC0. For these code points the standard Mac OS +# Arabic encoding has the following mappings: +# 0xAA -> <RL>+0x002A ASTERISK, right-left +# 0xC0 -> <RL>+0x274A EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, +# right-left +# This mapping of 0xAA is consistent with the normal convention for +# Mac OS Arabic and Hebrew that the right-left duplicates have codes +# that are equal to the ASCII code of the left-right character plus +# 0x80. However, in all of the other fonts, 0xAA is MULTIPLY SIGN, and +# right-left ASTERISK may be at a different code point. The other +# variants are described below. +# +# The TrueType variant is used for most of the Arabic TrueType fonts: +# Baghdad, Geeza, Kufi, Nadeem. It differs from the standard variant +# in the following way: +# 0xAA -> <RL>+0x00D7 MULTIPLICATION SIGN, right-left +# 0xC0 -> <RL>+0x002A ASTERISK, right-left +# +# The Thuluth variant is used for the Arabic Postscript-only fonts: +# Thuluth and Thuluth bold. It differs from the standard variant in +# the following way: +# 0xAA -> <RL>+0x00D7 MULTIPLICATION SIGN, right-left +# 0xC0 -> 0x066D ARABIC FIVE POINTED STAR +# +# The AlBayan variant is used for the Arabic TrueType font Al Bayan. +# It differs from the standard variant in the following way: +# 0x81 -> no mapping (glyph just has authorship information, etc.) +# 0xA3 -> 0xFDFA ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM +# 0xA4 -> 0xFDF2 ARABIC LIGATURE ALLAH ISOLATED FORM +# 0xAA -> <RL>+0x00D7 MULTIPLICATION SIGN, right-left +# 0xDC -> <RL>+0x25CF BLACK CIRCLE, right-left +# 0xFC -> <RL>+0x25A0 BLACK SQUARE, right-left +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# 1. Matching the direction of Mac OS Arabic characters +# +# When Mac OS Arabic encodes a character twice but with different +# direction attributes for the two code points - as in the case of +# plus sign mentioned above - we need a way to map both Mac OS Arabic +# code points to Unicode and back again without loss of information. +# With the plus sign, for example, mapping one of the Mac OS Arabic +# characters to a code in the Unicode corporate use zone is +# undesirable, since both of the plus sign characters are likely to +# be used in text that is interchanged. +# +# The problem is solved with the use of direction override characters +# and direction-dependent mappings. When mapping from Mac OS Arabic +# to Unicode, we use direction overrides as necessary to force the +# direction of the resulting Unicode characters. +# +# The required direction is indicated by a direction tag in the +# mappings. A tag of <LR> means the corresponding Unicode character +# must have a strong left-right context, and a tag of <RL> indicates +# a right-left context. +# +# For example, the mapping of 0x2B is given as <LR>+0x002B; the +# mapping of 0xAB is given as <RL>+0x002B. If we map an isolated +# instance of 0x2B to Unicode, it should be mapped as follows (LRO +# indicates LEFT-RIGHT OVERRIDE, PDF indicates POP DIRECTION +# FORMATTING): +# +# 0x2B -> 0x202D (LRO) + 0x002B (PLUS SIGN) + 0x202C (PDF) +# +# When mapping several characters in a row that require direction +# forcing, the overrides need only be used at the beginning and end. +# For example: +# +# 0x24 0x20 0x28 0x29 -> 0x202D 0x0024 0x0020 0x0028 0x0029 0x202C +# +# If neutral characters that require direction forcing are already +# between strong-direction characters with matching directionality, +# then direction overrides need not be used. Direction overrides are +# always needed to map the right-left digits at 0xB0-0xB9. +# +# When mapping from Unicode to Mac OS Arabic, the Unicode +# bidirectional algorithm should be used to determine resolved +# direction of the Unicode characters. The mapping from Unicode to +# Mac OS Arabic can then be disambiguated by the use of the resolved +# direction: +# +# Unicode 0x002B -> Mac OS Arabic 0x2B (if L) or 0xAB (if R) +# +# However, this also means the direction override characters should +# be discarded when mapping from Unicode to Mac OS Arabic (after +# they have been used to determine resolved direction), since the +# direction override information is carried by the code point itself. +# +# Even when direction overrides are not needed for roundtrip +# fidelity, they are sometimes used when mapping Mac OS Arabic +# characters to Unicode in order to achieve similar text layout with +# the resulting Unicode text. For example, the single Mac OS Arabic +# ellipsis character has direction class right-left,and there is no +# left-right version. However, the Unicode HORIZONTAL ELLIPSIS +# character has direction class neutral (which means it may end up +# with a resolved direction of left-right if surrounded by left-right +# characters). When mapping the Mac OS Arabic ellipsis to Unicode, it +# is surrounded with a direction override to help preserve proper +# text layout. The resolved direction is not needed or used when +# mapping the Unicode HORIZONTAL ELLIPSIS back to Mac OS Arabic. +# +# 2. Mapping the Mac OS Arabic digits +# +# The main table below contains mappings that should be used when +# strict round-trip fidelity is required. However, for numeric +# values, the mappings in that table will produce Unicode characters +# that may appear different than the Mac OS Arabic text displayed on +# a Mac OS system using WorldScript. This is because WorldScript +# uses context-dependent display for the 0x30-0x39 digits. +# +# If roundtrip fidelity is not required, then the following +# alternate mappings should be used when a sequence of 0x30-0x39 +# digits - possibly including 0x2C and 0x2E - occurs in an Arabic +# context (that is, when the first "strong" character on either side +# of the digit sequence is Arabic, or there is no strong character): +# +# 0x2C 0x066C # ARABIC THOUSANDS SEPARATOR +# 0x2E 0x066B # ARABIC DECIMAL SEPARATOR +# 0x30 0x0660 # ARABIC-INDIC DIGIT ZERO +# 0x31 0x0661 # ARABIC-INDIC DIGIT ONE +# 0x32 0x0662 # ARABIC-INDIC DIGIT TWO +# 0x33 0x0663 # ARABIC-INDIC DIGIT THREE +# 0x34 0x0664 # ARABIC-INDIC DIGIT FOUR +# 0x35 0x0665 # ARABIC-INDIC DIGIT FIVE +# 0x36 0x0666 # ARABIC-INDIC DIGIT SIX +# 0x37 0x0667 # ARABIC-INDIC DIGIT SEVEN +# 0x38 0x0668 # ARABIC-INDIC DIGIT EIGHT +# 0x39 0x0669 # ARABIC-INDIC DIGIT NINE +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version n03 to version n07: +# +# - Change mapping for 0xC0 from U+066D to U+274A. +# +# - Add direction overrides (required directionality) to mappings +# for 0x25, 0x2C, 0x3B, 0x3F. +# +################## +0x00 - 0x7F = 0x0000 - +0x80 = 0x00C4 +0x81 = 0x00A0 +0x82 = 0x00C7 +0x83 = 0x00C9 +0x84 = 0x00D1 +0x85 = 0x00D6 +0x86 = 0x00DC +0x87 = 0x00E1 +0x88 = 0x00E0 +0x89 = 0x00E2 +0x8A = 0x00E4 +0x8B = 0x06BA +0x8C = 0x00AB +0x8D = 0x00E7 +0x8E = 0x00E9 +0x8F = 0x00E8 +0x90 = 0x00EA +0x91 = 0x00EB +0x92 = 0x00ED +0x93 = 0x2026 +0x94 = 0x00EE +0x95 = 0x00EF +0x96 = 0x00F1 +0x97 = 0x00F3 +0x98 = 0x00BB +0x99 = 0x00F4 +0x9A = 0x00F6 +0x9B = 0x00F7 +0x9C = 0x00FA +0x9D = 0x00F9 +0x9E = 0x00FB +0x9F = 0x00FC +0xA0 = 0x0020 +0xA1 = 0x0021 +0xA2 = 0x0022 +0xA3 = 0x0023 +0xA4 = 0x0024 +0xA5 = 0x066A +0xA6 = 0x0026 +0xA7 = 0x0027 +0xA8 = 0x0028 +0xA9 = 0x0029 +0xAA = 0x002A +0xAB = 0x002B +0xAC = 0x060C +0xAD = 0x002D +0xAE = 0x002E +0xAF = 0x002F +0xB0 = 0x0660 +0xB1 = 0x0661 +0xB2 = 0x0662 +0xB3 = 0x0663 +0xB4 = 0x0664 +0xB5 = 0x0665 +0xB6 = 0x0666 +0xB7 = 0x0667 +0xB8 = 0x0668 +0xB9 = 0x0669 +0xBA = 0x003A +0xBB = 0x061B +0xBC = 0x003C +0xBD = 0x003D +0xBE = 0x003E +0xBF = 0x061F +0xC0 = 0x274A +0xC1 = 0x0621 +0xC2 = 0x0622 +0xC3 = 0x0623 +0xC4 = 0x0624 +0xC5 = 0x0625 +0xC6 = 0x0626 +0xC7 = 0x0627 +0xC8 = 0x0628 +0xC9 = 0x0629 +0xCA = 0x062A +0xCB = 0x062B +0xCC = 0x062C +0xCD = 0x062D +0xCE = 0x062E +0xCF = 0x062F +0xD0 = 0x0630 +0xD1 = 0x0631 +0xD2 = 0x0632 +0xD3 = 0x0633 +0xD4 = 0x0634 +0xD5 = 0x0635 +0xD6 = 0x0636 +0xD7 = 0x0637 +0xD8 = 0x0638 +0xD9 = 0x0639 +0xDA = 0x063A +0xDB = 0x005B +0xDC = 0x005C +0xDD = 0x005D +0xDE = 0x005E +0xDF = 0x005F +0xE0 = 0x0640 +0xE1 = 0x0641 +0xE2 = 0x0642 +0xE3 = 0x0643 +0xE4 = 0x0644 +0xE5 = 0x0645 +0xE6 = 0x0646 +0xE7 = 0x0647 +0xE8 = 0x0648 +0xE9 = 0x0649 +0xEA = 0x064A +0xEB = 0x064B +0xEC = 0x064C +0xED = 0x064D +0xEE = 0x064E +0xEF = 0x064F +0xF0 = 0x0650 +0xF1 = 0x0651 +0xF2 = 0x0652 +0xF3 = 0x067E +0xF4 = 0x0679 +0xF5 = 0x0686 +0xF6 = 0x06D5 +0xF7 = 0x06A4 +0xF8 = 0x06AF +0xF9 = 0x0688 +0xFA = 0x0691 +0xFB = 0x007B +0xFC = 0x007C +0xFD = 0x007D +0xFE = 0x0698 +0xFF = 0x06D2 +END_MAP diff --git a/share/i18n/csmapper/APPLE/CELTIC%UCS.src b/share/i18n/csmapper/APPLE/CELTIC%UCS.src new file mode 100644 index 0000000..3022aa1 --- /dev/null +++ b/share/i18n/csmapper/APPLE/CELTIC%UCS.src @@ -0,0 +1,248 @@ +# $FreeBSD$ +# $NetBSD: CELTIC%UCS.src,v 1.1 2006/03/13 19:45:36 tnozaki Exp $ + +TYPE ROWCOL +NAME CELTIC/UCS +SRC_ZONE 0x00-0xFF +OOB_MODE ILSEQ +DST_ILSEQ 0xFFFE +DST_UNIT_BITS 16 + +BEGIN_MAP +# +# This mapping data is made from the mapping data provided by Unicode, Inc. +# Original notice: +# +#======================================================================= +# File name: CELTIC.TXT +# +# Contents: Map (external version) from Mac OS Celtic +# character set to Unicode 2.1 and later +# +# Contacts: charsets@apple.com, everson@evertype.com +# +# Changes: +# +# c01 2005-Apr-01 First posted version. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Celtic code (in hex as 0xNN) +# Column #2 is the corresponding Unicode (in hex as 0xNNNN) +# Column #3 is a comment containing the Unicode name +# +# The entries are in Mac OS Celtic code order. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Celtic character set uses the standard control characters +# at 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Celtic (partly from Michael Everson): +# ----------------------------------------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# This character set was developed by Michael Everson of Everson +# Typography (everson@evertype.com) and was used for the Irish +# localizations of Mac OS 6.0.8 and 7.1, for the Welsh localization of +# Mac OS 7.1, and for several fonts that can be used on any version of +# Mac OS 7.1 or later. Note that while Apple authorized +# the Irish and Welsh localizations mentioned above, they were not +# systems which shipped with Apple hardware, and were not otherwise +# supported by Apple. Fonts conforming to the Mac OS Celtic character +# set are available from Everson Typography (http://www.evertype.com) +# and MEU Cymru (http://www.meucymru.co.uk). Information about the use +# of this character set is available at +# http://www.evertype.com/celtscript/celtcode.html. +# +# The Mac OS Celtic encoding shares the script code smRoman (0) with +# the standard Mac OS Roman encoding. To determine if the Celtic +# encoding is being used in Mac OS 7-9, you should also check if the +# system region code is 50, verIreland, or 79, verWales. Otherwise, +# you can check for particular fonts that conform to this encoding. +# +# This character set is a variant of standard Mac OS Roman, adding +# capital and small y with acute, grave, and circumflex, and capital +# and small w with acute, grave, circumflex and diaeresis. It has 14 +# code point differences from standard Mac OS Roman (0xDE, 0xDF, 0xE2, +# 0xE3, 0xF6-0xFF). +# +# Before Mac OS 8.5, code point 0xDB was CURRENCY SIGN, and was +# mapped to U+00A4. In Mac OS 8.5 and later versions, code point +# 0xDB is changed to EURO SIGN and maps to U+20AC; the standard +# Apple fonts were updated for Mac OS 8.5 to reflect this. There is +# a "currency sign" variant of the Mac OS Celtic encoding that still +# maps 0xDB to U+00A4; this can be used for older fonts. +# Note: U+20AC is new with Unicode 2.1; for earlier Unicode +# versions, Mac OS Celtic 0xDB may be mapped to private-use +# character U+F8A0. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +################## +0x00 - 0x7E = 0x0000 - +0x80 = 0x00C4 +0x81 = 0x00C5 +0x82 = 0x00C7 +0x83 = 0x00C9 +0x84 = 0x00D1 +0x85 = 0x00D6 +0x86 = 0x00DC +0x87 = 0x00E1 +0x88 = 0x00E0 +0x89 = 0x00E2 +0x8A = 0x00E4 +0x8B = 0x00E3 +0x8C = 0x00E5 +0x8D = 0x00E7 +0x8E = 0x00E9 +0x8F = 0x00E8 +0x90 = 0x00EA +0x91 = 0x00EB +0x92 = 0x00ED +0x93 = 0x00EC +0x94 = 0x00EE +0x95 = 0x00EF +0x96 = 0x00F1 +0x97 = 0x00F3 +0x98 = 0x00F2 +0x99 = 0x00F4 +0x9A = 0x00F6 +0x9B = 0x00F5 +0x9C = 0x00FA +0x9D = 0x00F9 +0x9E = 0x00FB +0x9F = 0x00FC +0xA0 = 0x2020 +0xA1 = 0x00B0 +0xA2 = 0x00A2 +0xA3 = 0x00A3 +0xA4 = 0x00A7 +0xA5 = 0x2022 +0xA6 = 0x00B6 +0xA7 = 0x00DF +0xA8 = 0x00AE +0xA9 = 0x00A9 +0xAA = 0x2122 +0xAB = 0x00B4 +0xAC = 0x00A8 +0xAD = 0x2260 +0xAE = 0x00C6 +0xAF = 0x00D8 +0xB0 = 0x221E +0xB1 = 0x00B1 +0xB2 = 0x2264 +0xB3 = 0x2265 +0xB4 = 0x00A5 +0xB5 = 0x00B5 +0xB6 = 0x2202 +0xB7 = 0x2211 +0xB8 = 0x220F +0xB9 = 0x03C0 +0xBA = 0x222B +0xBB = 0x00AA +0xBC = 0x00BA +0xBD = 0x03A9 +0xBE = 0x00E6 +0xBF = 0x00F8 +0xC0 = 0x00BF +0xC1 = 0x00A1 +0xC2 = 0x00AC +0xC3 = 0x221A +0xC4 = 0x0192 +0xC5 = 0x2248 +0xC6 = 0x2206 +0xC7 = 0x00AB +0xC8 = 0x00BB +0xC9 = 0x2026 +0xCA = 0x00A0 +0xCB = 0x00C0 +0xCC = 0x00C3 +0xCD = 0x00D5 +0xCE = 0x0152 +0xCF = 0x0153 +0xD0 = 0x2013 +0xD1 = 0x2014 +0xD2 = 0x201C +0xD3 = 0x201D +0xD4 = 0x2018 +0xD5 = 0x2019 +0xD6 = 0x00F7 +0xD7 = 0x25CA +0xD8 = 0x00FF +0xD9 = 0x0178 +0xDA = 0x2044 +0xDB = 0x20AC +0xDC = 0x2039 +0xDD = 0x203A +0xDE = 0x0176 +0xDF = 0x0177 +0xE0 = 0x2021 +0xE1 = 0x00B7 +0xE2 = 0x1EF2 +0xE3 = 0x1EF3 +0xE4 = 0x2030 +0xE5 = 0x00C2 +0xE6 = 0x00CA +0xE7 = 0x00C1 +0xE8 = 0x00CB +0xE9 = 0x00C8 +0xEA = 0x00CD +0xEB = 0x00CE +0xEC = 0x00CF +0xED = 0x00CC +0xEE = 0x00D3 +0xEF = 0x00D4 +0xF0 = 0x2663 +0xF1 = 0x00D2 +0xF2 = 0x00DA +0xF3 = 0x00DB +0xF4 = 0x00D9 +0xF5 = 0x0131 +0xF6 = 0x00DD +0xF7 = 0x00FD +0xF8 = 0x0174 +0xF9 = 0x0175 +0xFA = 0x1E84 +0xFB = 0x1E85 +0xFC = 0x1E80 +0xFD = 0x1E81 +0xFE = 0x1E82 +0xFF = 0x1E83 +END_MAP diff --git a/share/i18n/csmapper/APPLE/CENTEURO%UCS.src b/share/i18n/csmapper/APPLE/CENTEURO%UCS.src new file mode 100644 index 0000000..c23b7b5 --- /dev/null +++ b/share/i18n/csmapper/APPLE/CENTEURO%UCS.src @@ -0,0 +1,247 @@ +# $FreeBSD$ +# $NetBSD: CENTEURO%UCS.src,v 1.1 2006/03/13 19:45:36 tnozaki Exp $ + +TYPE ROWCOL +NAME CENTEURO/UCS +SRC_ZONE 0x00-0xFF +OOB_MODE ILSEQ +DST_ILSEQ 0xFFFE +DST_UNIT_BITS 16 + +BEGIN_MAP +# +# This mapping data is made from the mapping data provided by Unicode, Inc. +# Original notice: +# +#======================================================================= +# File name: CENTEURO.TXT +# +# Contents: Map (external version) from Mac OS Central European +# character set to Unicode 2.1 and later. +# +# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-04 Update header comments. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Update URLs. Matches internal utom<b1>. +# b02 1999-Sep-22 Update contact e-mail address. Matches +# internal utom<b1>, ufrm<b1>, and Text +# Encoding Converter version 1.5. +# n05 1998-Feb-05 Update header comments to new format; no +# mapping changes. Matches internal utom<n3>, +# ufrm<n13>, and Text Encoding Converter +# version 1.3. +# n03 1995-Apr-15 First version (after fixing some typos). +# Matches internal ufrm<n5>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Central European code (in hex as 0xNN) +# Column #2 is the corresponding Unicode (in hex as 0xNNNN) +# Column #3 is a comment containing the Unicode name +# +# The entries are in Mac OS Central European code order. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Central European character set uses the standard control +# characters at 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Central European: +# --------------------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported directly in programming +# interfaces for QuickDraw Text, the Script Manager, and related +# Text Utilities. For other purposes it is supported via transcoding +# to and from Unicode. +# +# This character set is intended to cover the following languages: +# +# Polish, Czech, Slovak, Hungarian, Estonian, Latvian, Lithuanian +# +# These are written in Latin script, but using a different set of +# of accented characters than Mac OS Roman. The Mac OS Central +# European character set also includes a number of characters +# needed for the Mac OS user interface and localization (e.g. +# ellipsis, bullet, copyright sign), several typographic +# punctuation symbols, math symbols, etc. However, it has a +# smaller set of punctuation and symbols than Mac OS Roman. All of +# the characters in Mac OS Central European that are also in the +# Mac OS Roman character set are at the same code point in both +# character sets; this improves application compatibility. +# +# Note: This does not have the same letter repertoire as ISO +# 8859-2 (Latin-2); each has some accented letters that the other +# does not have. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +################## +0x00 - 0x7F = 0x0000 - +0x80 = 0x00C4 +0x81 = 0x0100 +0x82 = 0x0101 +0x83 = 0x00C9 +0x84 = 0x0104 +0x85 = 0x00D6 +0x86 = 0x00DC +0x87 = 0x00E1 +0x88 = 0x0105 +0x89 = 0x010C +0x8A = 0x00E4 +0x8B = 0x010D +0x8C = 0x0106 +0x8D = 0x0107 +0x8E = 0x00E9 +0x8F = 0x0179 +0x90 = 0x017A +0x91 = 0x010E +0x92 = 0x00ED +0x93 = 0x010F +0x94 = 0x0112 +0x95 = 0x0113 +0x96 = 0x0116 +0x97 = 0x00F3 +0x98 = 0x0117 +0x99 = 0x00F4 +0x9A = 0x00F6 +0x9B = 0x00F5 +0x9C = 0x00FA +0x9D = 0x011A +0x9E = 0x011B +0x9F = 0x00FC +0xA0 = 0x2020 +0xA1 = 0x00B0 +0xA2 = 0x0118 +0xA3 = 0x00A3 +0xA4 = 0x00A7 +0xA5 = 0x2022 +0xA6 = 0x00B6 +0xA7 = 0x00DF +0xA8 = 0x00AE +0xA9 = 0x00A9 +0xAA = 0x2122 +0xAB = 0x0119 +0xAC = 0x00A8 +0xAD = 0x2260 +0xAE = 0x0123 +0xAF = 0x012E +0xB0 = 0x012F +0xB1 = 0x012A +0xB2 = 0x2264 +0xB3 = 0x2265 +0xB4 = 0x012B +0xB5 = 0x0136 +0xB6 = 0x2202 +0xB7 = 0x2211 +0xB8 = 0x0142 +0xB9 = 0x013B +0xBA = 0x013C +0xBB = 0x013D +0xBC = 0x013E +0xBD = 0x0139 +0xBE = 0x013A +0xBF = 0x0145 +0xC0 = 0x0146 +0xC1 = 0x0143 +0xC2 = 0x00AC +0xC3 = 0x221A +0xC4 = 0x0144 +0xC5 = 0x0147 +0xC6 = 0x2206 +0xC7 = 0x00AB +0xC8 = 0x00BB +0xC9 = 0x2026 +0xCA = 0x00A0 +0xCB = 0x0148 +0xCC = 0x0150 +0xCD = 0x00D5 +0xCE = 0x0151 +0xCF = 0x014C +0xD0 = 0x2013 +0xD1 = 0x2014 +0xD2 = 0x201C +0xD3 = 0x201D +0xD4 = 0x2018 +0xD5 = 0x2019 +0xD6 = 0x00F7 +0xD7 = 0x25CA +0xD8 = 0x014D +0xD9 = 0x0154 +0xDA = 0x0155 +0xDB = 0x0158 +0xDC = 0x2039 +0xDD = 0x203A +0xDE = 0x0159 +0xDF = 0x0156 +0xE0 = 0x0157 +0xE1 = 0x0160 +0xE2 = 0x201A +0xE3 = 0x201E +0xE4 = 0x0161 +0xE5 = 0x015A +0xE6 = 0x015B +0xE7 = 0x00C1 +0xE8 = 0x0164 +0xE9 = 0x0165 +0xEA = 0x00CD +0xEB = 0x017D +0xEC = 0x017E +0xED = 0x016A +0xEE = 0x00D3 +0xEF = 0x00D4 +0xF0 = 0x016B +0xF1 = 0x016E +0xF2 = 0x00DA +0xF3 = 0x016F +0xF4 = 0x0170 +0xF5 = 0x0171 +0xF6 = 0x0172 +0xF7 = 0x0173 +0xF8 = 0x00DD +0xF9 = 0x00FD +0xFA = 0x0137 +0xFB = 0x017B +0xFC = 0x0141 +0xFD = 0x017C +0xFE = 0x0122 +0xFF = 0x02C7 +END_MAP diff --git a/share/i18n/csmapper/APPLE/CROATIAN%UCS.src b/share/i18n/csmapper/APPLE/CROATIAN%UCS.src new file mode 100644 index 0000000..e44fb12 --- /dev/null +++ b/share/i18n/csmapper/APPLE/CROATIAN%UCS.src @@ -0,0 +1,271 @@ +# $FreeBSD$ +# $NetBSD: CROATIAN%UCS.src,v 1.1 2006/03/13 19:45:36 tnozaki Exp $ + +TYPE ROWCOL +NAME CROATIAN/UCS +SRC_ZONE 0x00-0xFF +OOB_MODE ILSEQ +DST_ILSEQ 0xFFFE +DST_UNIT_BITS 16 + +BEGIN_MAP +# +# This mapping data is made from the mapping data provided by Unicode, Inc. +# Original notice: +# +#======================================================================= +# File name: CROATIAN.TXT +# +# Contents: Map (external version) from Mac OS Croatian +# character set to Unicode 2.1 and later. +# +# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-04 Update header comments. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Update URLs, notes. Matches internal +# utom<b3>. +# b02 1999-Sep-22 Encoding changed for Mac OS 8.5; change +# mapping of 0xDB from CURRENCY SIGN to EURO +# SIGN. Update contact e-mail address. Matches +# internal utom<b2>, ufrm<b2>, and Text +# Encoding Converter version 1.5. +# n07 1998-Feb-05 Minor update to header comments +# n05 1997-Dec-14 Update to match internal utom<5>, ufrm<16>: +# Change standard mapping for 0xBD from U+2126 +# to its canonical decomposition, U+03A9. +# n03 1995-Apr-15 First version (after fixing some typos). +# Matches internal ufrm<6>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Croatian code (in hex as 0xNN) +# Column #2 is the corresponding Unicode (in hex as 0xNNNN) +# Column #3 is a comment containing the Unicode name +# +# The entries are in Mac OS Croatian code order. +# +# One of these mappings requires the use of a corporate character. +# See the file "CORPCHAR.TXT" and notes below. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Croatian character set uses the standard control characters +# at 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Croatian: +# ------------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# Mac OS Croatian is used for Croatian and Slovene. +# +# The Mac OS Croatian encoding shares the script code smRoman +# (0) with the standard Mac OS Roman encoding. To determine if +# the Croatian encoding is being used, you must check if the +# system region code is 68, verCroatia (or 25, verYugoCroatian, +# only used in older systems). +# +# This character set is a variant of standard Mac OS Roman +# encoding, adding five accented letter case pairs to handle +# Croatian. It has 20 code point differences from standard +# Mac OS Roman, but only 10 differences in repertoire. +# +# Before Mac OS 8.5, code point 0xDB was CURRENCY SIGN, and was +# mapped to U+00A4. In Mac OS 8.5 and later versions, code point +# 0xDB is changed to EURO SIGN and maps to U+20AC; the standard +# Apple fonts are updated for Mac OS 8.5 to reflect this. There is +# a "currency sign" variant of the Mac OS Croatian encoding that +# still maps 0xDB to U+00A4; this can be used for older fonts. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# The following corporate zone Unicode character is used in this +# mapping: +# +# 0xF8FF Apple logo +# +# NOTE: The graphic image associated with the Apple logo character +# is not authorized for use without permission of Apple, and +# unauthorized use might constitute trademark infringement. +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version n07 to version b02: +# +# - Encoding changed for Mac OS 8.5; change mapping of 0xDB from +# CURRENCY SIGN (U+00A4) to EURO SIGN (U+20AC). +# +# Changes from version n03 to version n05: +# +# - Change mapping of 0xBD from U+2126 to its canonical +# decomposition, U+03A9. +# +################## +0x00 - 0x7E = 0x0000 - +0x80 = 0x00C4 +0x81 = 0x00C5 +0x82 = 0x00C7 +0x83 = 0x00C9 +0x84 = 0x00D1 +0x85 = 0x00D6 +0x86 = 0x00DC +0x87 = 0x00E1 +0x88 = 0x00E0 +0x89 = 0x00E2 +0x8A = 0x00E4 +0x8B = 0x00E3 +0x8C = 0x00E5 +0x8D = 0x00E7 +0x8E = 0x00E9 +0x8F = 0x00E8 +0x90 = 0x00EA +0x91 = 0x00EB +0x92 = 0x00ED +0x93 = 0x00EC +0x94 = 0x00EE +0x95 = 0x00EF +0x96 = 0x00F1 +0x97 = 0x00F3 +0x98 = 0x00F2 +0x99 = 0x00F4 +0x9A = 0x00F6 +0x9B = 0x00F5 +0x9C = 0x00FA +0x9D = 0x00F9 +0x9E = 0x00FB +0x9F = 0x00FC +0xA0 = 0x2020 +0xA1 = 0x00B0 +0xA2 = 0x00A2 +0xA3 = 0x00A3 +0xA4 = 0x00A7 +0xA5 = 0x2022 +0xA6 = 0x00B6 +0xA7 = 0x00DF +0xA8 = 0x00AE +0xA9 = 0x0160 +0xAA = 0x2122 +0xAB = 0x00B4 +0xAC = 0x00A8 +0xAD = 0x2260 +0xAE = 0x017D +0xAF = 0x00D8 +0xB0 = 0x221E +0xB1 = 0x00B1 +0xB2 = 0x2264 +0xB3 = 0x2265 +0xB4 = 0x2206 +0xB5 = 0x00B5 +0xB6 = 0x2202 +0xB7 = 0x2211 +0xB8 = 0x220F +0xB9 = 0x0161 +0xBA = 0x222B +0xBB = 0x00AA +0xBC = 0x00BA +0xBD = 0x03A9 +0xBE = 0x017E +0xBF = 0x00F8 +0xC0 = 0x00BF +0xC1 = 0x00A1 +0xC2 = 0x00AC +0xC3 = 0x221A +0xC4 = 0x0192 +0xC5 = 0x2248 +0xC6 = 0x0106 +0xC7 = 0x00AB +0xC8 = 0x010C +0xC9 = 0x2026 +0xCA = 0x00A0 +0xCB = 0x00C0 +0xCC = 0x00C3 +0xCD = 0x00D5 +0xCE = 0x0152 +0xCF = 0x0153 +0xD0 = 0x0110 +0xD1 = 0x2014 +0xD2 = 0x201C +0xD3 = 0x201D +0xD4 = 0x2018 +0xD5 = 0x2019 +0xD6 = 0x00F7 +0xD7 = 0x25CA +0xD8 = 0xF8FF +0xD9 = 0x00A9 +0xDA = 0x2044 +0xDB = 0x20AC +0xDC = 0x2039 +0xDD = 0x203A +0xDE = 0x00C6 +0xDF = 0x00BB +0xE0 = 0x2013 +0xE1 = 0x00B7 +0xE2 = 0x201A +0xE3 = 0x201E +0xE4 = 0x2030 +0xE5 = 0x00C2 +0xE6 = 0x0107 +0xE7 = 0x00C1 +0xE8 = 0x010D +0xE9 = 0x00C8 +0xEA = 0x00CD +0xEB = 0x00CE +0xEC = 0x00CF +0xED = 0x00CC +0xEE = 0x00D3 +0xEF = 0x00D4 +0xF0 = 0x0111 +0xF1 = 0x00D2 +0xF2 = 0x00DA +0xF3 = 0x00DB +0xF4 = 0x00D9 +0xF5 = 0x0131 +0xF6 = 0x02C6 +0xF7 = 0x02DC +0xF8 = 0x00AF +0xF9 = 0x03C0 +0xFA = 0x00CB +0xFB = 0x02DA +0xFC = 0x00B8 +0xFD = 0x00CA +0xFE = 0x00E6 +0xFF = 0x02C7 +END_MAP diff --git a/share/i18n/csmapper/APPLE/CYRILLIC%UCS.src b/share/i18n/csmapper/APPLE/CYRILLIC%UCS.src new file mode 100644 index 0000000..ac2912c --- /dev/null +++ b/share/i18n/csmapper/APPLE/CYRILLIC%UCS.src @@ -0,0 +1,272 @@ +# $FreeBSD$ +# $NetBSD: CYRILLIC%UCS.src,v 1.1 2006/03/13 19:45:36 tnozaki Exp $ + +TYPE ROWCOL +NAME CYRILLIC/UCS +SRC_ZONE 0x00-0xFF +OOB_MODE ILSEQ +DST_ILSEQ 0xFFFE +DST_UNIT_BITS 16 + +BEGIN_MAP +# +# This mapping data is made from the mapping data provided by Unicode, Inc. +# Original notice: +# +#======================================================================= +# File name: CYRILLIC.TXT +# +# Contents: Map (external version) from Mac OS Cyrillic +# character set to Unicode 2.1 and later. +# +# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c03 2005-Apr-05 Update header comments. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Update URLs, notes. Matches internal +# utom<b2>. +# b02 1999-Sep-22 Encoding changed for Mac OS 9.0 to merge +# with Mac OS Ukrainian and support EURO SIGN; +# Change mappings for 0xA2, 0xB6, and 0xFF. +# Update contact e-mail address. Matches +# internal utom<b2>, ufrm<b2>, and Text +# Encoding Converter version 1.5. +# n05 1998-Feb-05 Update header comments to new format; no +# mapping changes. Matches internal utom<n3>, +# ufrm<n13>, and Text Encoding Converter +# version 1.3. +# n03 1995-Apr-15 First version (after fixing some typos). +# Matches internal ufrm<n5>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Cyrillic code (in hex as 0xNN) +# Column #2 is the corresponding Unicode (in hex as 0xNNNN) +# Column #3 is a comment containing the Unicode name +# +# The entries are in Mac OS Cyrillic code order. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Cyrillic character set uses the standard control characters +# at 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Cyrillic: +# ------------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported directly in programming +# interfaces for QuickDraw Text, the Script Manager, and related +# Text Utilities. For other purposes it is supported via transcoding +# to and from Unicode. +# +# This is the "Euro sign" version of Mac Cyrillic for Mac OS 9.0 and +# later. Before Mac OS 9.0, there were two separate Slavic Cyrillic +# encodings: +# +# 1. The Cyrillic currency sign variant (used for localized Russian +# and Bulgarian systems), which had the following: +# 0xA2 U+00A2 CENT SIGN +# 0xB6 U+2202 PARTIAL DIFFERENTIAL +# 0xFF U+00A4 CURRENCY SIGN +# +# 2. The Ukrainian currency sign variant (used for localized Ukrainian +# systems and the pre-9.0 Cyrillic Language Kit), which had the +# following: +# 0xA2 U+0490 CYRILLIC CAPITAL LETTER GHE WITH UPTURN +# 0xB6 U+0491 CYRILLIC SMALL LETTER GHE WITH UPTURN +# 0xFF U+00A4 CURRENCY SIGN +# +# This new Cyrillic Euro sign version is based on the old Ukrainian +# currency sign variant, with 0xFF changed to be EURO SIGN. +# +# The Mac OS Cyrillic encoding includes the Cyrillic letter repertoire +# of ISO 8859-5 (although not at the same code points). This covers +# most of the Slavic languages written in Cyrillic script. +# +# The Mac OS Cyrillic encoding also includes a number of characters +# needed for the Mac OS user interface and localization (e.g. +# ellipsis, bullet, copyright sign). All of the characters in Mac OS +# Cyrillic that are also in the Mac OS Roman encoding are at the +# same code point in both; this improves application compatibility. +# +# Note: There is a common Ukrainian glyph variation in which the glyph +# for CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I may or may not +# have a dot above. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version n05 to version b02: +# +# - Encoding changed for Mac OS 9.0 to merge with Mac OS Ukrainian and +# support EURO SIGN. 0xA2 changed from U+00A2 to U+0490; 0xB6 changed +# from U+2202 to U+0491; 0xFF changed from U+00A4 to U+20AC. +# +################## +0x00 - 0x7E = 0x00 - +0x80 = 0x0410 +0x81 = 0x0411 +0x82 = 0x0412 +0x83 = 0x0413 +0x84 = 0x0414 +0x85 = 0x0415 +0x86 = 0x0416 +0x87 = 0x0417 +0x88 = 0x0418 +0x89 = 0x0419 +0x8A = 0x041A +0x8B = 0x041B +0x8C = 0x041C +0x8D = 0x041D +0x8E = 0x041E +0x8F = 0x041F +0x90 = 0x0420 +0x91 = 0x0421 +0x92 = 0x0422 +0x93 = 0x0423 +0x94 = 0x0424 +0x95 = 0x0425 +0x96 = 0x0426 +0x97 = 0x0427 +0x98 = 0x0428 +0x99 = 0x0429 +0x9A = 0x042A +0x9B = 0x042B +0x9C = 0x042C +0x9D = 0x042D +0x9E = 0x042E +0x9F = 0x042F +0xA0 = 0x2020 +0xA1 = 0x00B0 +0xA2 = 0x0490 +0xA3 = 0x00A3 +0xA4 = 0x00A7 +0xA5 = 0x2022 +0xA6 = 0x00B6 +0xA7 = 0x0406 +0xA8 = 0x00AE +0xA9 = 0x00A9 +0xAA = 0x2122 +0xAB = 0x0402 +0xAC = 0x0452 +0xAD = 0x2260 +0xAE = 0x0403 +0xAF = 0x0453 +0xB0 = 0x221E +0xB1 = 0x00B1 +0xB2 = 0x2264 +0xB3 = 0x2265 +0xB4 = 0x0456 +0xB5 = 0x00B5 +0xB6 = 0x0491 +0xB7 = 0x0408 +0xB8 = 0x0404 +0xB9 = 0x0454 +0xBA = 0x0407 +0xBB = 0x0457 +0xBC = 0x0409 +0xBD = 0x0459 +0xBE = 0x040A +0xBF = 0x045A +0xC0 = 0x0458 +0xC1 = 0x0405 +0xC2 = 0x00AC +0xC3 = 0x221A +0xC4 = 0x0192 +0xC5 = 0x2248 +0xC6 = 0x2206 +0xC7 = 0x00AB +0xC8 = 0x00BB +0xC9 = 0x2026 +0xCA = 0x00A0 +0xCB = 0x040B +0xCC = 0x045B +0xCD = 0x040C +0xCE = 0x045C +0xCF = 0x0455 +0xD0 = 0x2013 +0xD1 = 0x2014 +0xD2 = 0x201C +0xD3 = 0x201D +0xD4 = 0x2018 +0xD5 = 0x2019 +0xD6 = 0x00F7 +0xD7 = 0x201E +0xD8 = 0x040E +0xD9 = 0x045E +0xDA = 0x040F +0xDB = 0x045F +0xDC = 0x2116 +0xDD = 0x0401 +0xDE = 0x0451 +0xDF = 0x044F +0xE0 = 0x0430 +0xE1 = 0x0431 +0xE2 = 0x0432 +0xE3 = 0x0433 +0xE4 = 0x0434 +0xE5 = 0x0435 +0xE6 = 0x0436 +0xE7 = 0x0437 +0xE8 = 0x0438 +0xE9 = 0x0439 +0xEA = 0x043A +0xEB = 0x043B +0xEC = 0x043C +0xED = 0x043D +0xEE = 0x043E +0xEF = 0x043F +0xF0 = 0x0440 +0xF1 = 0x0441 +0xF2 = 0x0442 +0xF3 = 0x0443 +0xF4 = 0x0444 +0xF5 = 0x0445 +0xF6 = 0x0446 +0xF7 = 0x0447 +0xF8 = 0x0448 +0xF9 = 0x0449 +0xFA = 0x044A +0xFB = 0x044B +0xFC = 0x044C +0xFD = 0x044D +0xFE = 0x044E +0xFF = 0x20AC +END_MAP diff --git a/share/i18n/csmapper/APPLE/DEVANAGA%UCS.src b/share/i18n/csmapper/APPLE/DEVANAGA%UCS.src new file mode 100644 index 0000000..9d4c0fd --- /dev/null +++ b/share/i18n/csmapper/APPLE/DEVANAGA%UCS.src @@ -0,0 +1,359 @@ +# $FreeBSD$ + +TYPE ROWCOL +NAME DEVANAGA/UCS +SRC_ZONE 0x00-0xFA +OOB_MODE ILSEQ +DST_ILSEQ 0xFFFE +DST_UNIT_BITS 16 +#======================================================================= +# File name: DEVANAGA.TXT +# +# Contents: Map (external version) from Mac OS Devanagari +# encoding to Unicode 2.1 and later. +# +# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Update header comments; add section on +# roundtrip considerations. Matches internal +# xml <c1.1> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Update URLs. Matches internal utom<b1>. +# b02 1999-Sep-22 Update contact e-mail address. Matches +# internal utom<b1>, ufrm<b1>, and Text +# Encoding Converter version 1.5. +# n04 1998-Feb-05 First version; matches internal utom<n9>, +# ufrm<n15>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Devanagari code or code sequence +# (in hex as 0xNN or 0xNN+0xNN) +# Column #2 is the corresponding Unicode or Unicode sequence +# (in hex as 0xNNNN or 0xNNNN+0xNNNN). +# Column #3 is a comment containing the Unicode name or sequence +# of names. In some cases an additional comment follows the +# Unicode name(s). +# +# The entries are in two sections. The first section is for pairs of +# Mac OS Devanagari code points that must be mapped in a special way. +# The second section maps individual code points. +# +# Within each section, the entries are in Mac OS Devanagari code order. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Devanagari character set uses the standard control characters +# at 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Devanagari: +# --------------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# Mac OS Devanagari is based on IS 13194:1991 (ISCII-91), with the +# addition of several punctuation and symbol characters. However, +# Mac OS Devanagari does not support the ATR (attribute) mechanism of +# ISCII-91. +# +# 1. ISCII-91 features in Mac OS Devanagari include: +# +# a) Overloading of nukta +# +# In addition to using the nukta (0xE9) like a combining dot below, +# nukta is overloaded to function as a general character modifier. +# In this role, certain code points followed by 0xE9 are treated as +# a two-byte code point representing a character which may be +# rather different than the characters represented by either of +# the code points alone. For example, the character DEVANAGARI OM +# (U+0950) is represented in ISCII-91 as candrabindu + nukta. +# +# b) Explicit halant and soft halant +# +# A double halant (0xE8 + 0xE8) constitutes an "explicit halant", +# which will always appear as a halant instead of causing formation +# of a ligature or half-form consonant. +# +# Halant followed by nukta (0xE8 + 0xE9) constitutes a "soft +# halant", which prevents formation of a ligature and instead +# retains the half-form of the first consonant. +# +# c) Invisible consonant +# +# The byte 0xD9 (called INV in ISCII-91) is an invisible consonant: +# It behaves like a consonant but has no visible appearance. It is +# intended to be used (often in combination with halant) to display +# dependent forms in isolation, such as the RA forms or consonant +# half-forms. +# +# d) Extensions for Vedic, etc. +# +# The byte 0xF0 (called EXT in ISCII-91) followed by any byte in +# the range 0xA1-0xEE constitutes a two-byte code point which can +# be used to represent additional characters for Vedic (or other +# extensions); 0xF0 followed by any other byte value constitutes +# malformed text. Mac OS Devanagari supports this mechanism, but +# does not currently map any of these two-byte code points to +# anything. +# +# 2. Mac OS Devanagari additions +# +# Mac OS Devanagari adds characters using the code points +# 0x80-0x8A and 0x90-0x91 (the latter are some Devanagari additions +# from Unicode). +# +# 3. Unused code points +# +# The following code points are currently unused, and are not shown +# here: 0x8B-0x8F, 0x92-0xA0, 0xEB-0xEF, 0xFB-0xFF. In addition, +# 0xF0 is not shown here, but it has a special function as described +# above. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# 1. Mapping the byte pairs +# +# If one of the following byte values is encountered when mapping +# Mac OS Devanagari text - 0xA1, 0xA6, 0xA7, 0xAA, 0xDB, 0xDC, 0xDF, +# 0xE8, or 0xEA - then the next byte (if there is one) should be +# examined. If the next byte is 0xE9 - or also 0xE8, if the first +# byte was 0xE8 - then the byte pair should be mapped using the +# first section of the mapping table below. Otherwise, each byte +# should be mapped using the second section of the mapping table +# below. +# +# - The Unicode Standard, Version 2.0, specifies how explicit +# halant and soft halant should be represented in Unicode; +# these mappings are used below. +# +# If the byte value 0xF0 is encountered when mapping Mac OS +# Devanagari text, then the next byte should be examined. If there +# is no next byte (e.g. 0xF0 at end of buffer), the mapping +# process should indicate incomplete character. If there is a next +# byte but it is not in the range 0xA1-0xEE, the mapping process +# should indicate malformed text. Otherwise, the mapping process +# should treat the byte pair as a valid two-byte code point with no +# mapping (e.g. map it to QUESTION MARK, REPLACEMENT CHARACTER, +# etc.). +# +# 2. Mapping the invisible consonant +# +# It has been suggested that INV in ISCII-91 should map to ZERO +# WIDTH NON-JOINER in Unicode. However, this causes problems with +# roundtrip fidelity: The ISCII-91 sequences 0xE8+0xE8 and 0xE8+0xD9 +# would map to the same sequence of Unicode characters. We have +# instead mapped INV to LEFT-TO-RIGHT MARK, which avoids these +# problems. +# +# 3. Additional loose mappings from Unicode +# +# These are not preserved in roundtrip mappings. +# +# U+0958 0xB3+0xE9 # DEVANAGARI LETTER QA +# U+0959 0xB4+0xE9 # DEVANAGARI LETTER KHHA +# U+095A 0xB5+0xE9 # DEVANAGARI LETTER GHHA +# U+095B 0xBA+0xE9 # DEVANAGARI LETTER ZA +# U+095C 0xBF+0xE9 # DEVANAGARI LETTER DDDHA +# U+095D 0xC0+0xE9 # DEVANAGARI LETTER RHA +# U+095E 0xC9+0xE9 # DEVANAGARI LETTER FA +# +# 4. Roundtrip considerations when mapping to decomposed Unicode +# +# Both ISCII-91 (hence Mac OS Devanagari) and Unicode provide multiple +# ways of representing certain Devanagari consonants. For example, +# DEVANAGARI LETTER NNNA can be represented in Unicode as the single +# character 0x0929 or as the sequence 0x0928 0x093C; similarly, this +# consonant can be represented in Mac OS Devanagari as 0xC7 or as the +# sequence 0xC6 0xE9. This leads to some roundtrip problems. First +# note that we have the following mappings without such problems: +# +# ISCII/ standard decomposition of reverse mapping +# Mac OS Unicode mapping standard mapping of decomposition +# ------ ----------------------- ---------------- ---------------- +# 0xC6 0x0928 ... LETTER NA 0x0928 (same) 0xC6 +# 0xCD 0x092F ... LETTER YA 0x092F (same) 0xCD +# 0xCF 0x0930 ... LETTER RA 0x0930 (same) 0xCF +# 0xD2 0x0933 ... LETTER LLA 0x0933 (same) 0xD2 +# 0xE9 0x093C ... SIGN NUKTA 0x093C (same) 0xE9 +# +# However, those mappings above cause roundtrip problems for the +# the following mappings if they are decomposed: +# +# ISCII/ standard decomposition of reverse mapping +# Mac OS Unicode mapping standard mapping of decomposition +# ------ ----------------------- ---------------- ---------------- +# 0xC7 0x0929 ... LETTER NNNA 0x0928 0x093C 0xC6 0xE9 +# 0xCE 0x095F ... LETTER YYA 0x092F 0x093C 0xCD 0xE9 +# 0xD0 0x0931 ... LETTER RRA 0x0930 0x093C 0xCF 0xE9 +# 0xD3 0x0934 ... LETTER LLLA 0x0933 0x093C 0xD2 0xE9 +# +# One solution is to use a grouping transcoding hint with the four +# decompositions above to mark the decomposed sequence for special +# treatment in transcoding. This yields the following mappings to +# decomposed Unicode: +# +# ISCII/ decomposed +# Mac OS Unicode mapping +# ------ ---------------- +# 0xC7 0xF860 0x0928 0x093C +# 0xCE 0xF860 0x092F 0x093C +# 0xD0 0xF860 0x0930 0x093C +# 0xD3 0xF860 0x0933 0x093C +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +################## +# Section 1: Map the following byte pairs as indicated: +# (ZWNJ means ZERO WIDTH NON-JOINER, ZWJ means ZERO WIDTH JOINER) +# (Also see note about 0xF0 in comments above) +# Section 2: Map the remaining bytes as follows: +# +# +# +# +BEGIN_MAP +0x00 - 0x7F = 0x0000 - +0x80 = 0x00D7 +0x81 = 0x2212 +0x82 = 0x2013 +0x83 = 0x2014 +0x84 = 0x2018 +0x85 = 0x2019 +0x86 = 0x2026 +0x87 = 0x2022 +0x88 = 0x00A9 +0x89 = 0x00AE +0x8A = 0x2122 +0x90 = 0x0965 +0x91 = 0x0970 +0xA1 = 0x0901 +#0xA1+0xE9 = 0x0950 +0xA2 = 0x0902 +0xA3 = 0x0903 +0xA4 = 0x0905 +0xA5 = 0x0906 +0xA6 = 0x0907 +#0xA6+0xE9 = 0x090C +0xA7 = 0x0908 +#0xA7+0xE9 = 0x0961 +0xA8 = 0x0909 +0xA9 = 0x090A +0xAA = 0x090B +#0xAA+0xE9 = 0x0960 +0xAB = 0x090E +0xAC = 0x090F +0xAD = 0x0910 +0xAE = 0x090D +0xAF = 0x0912 +0xB0 = 0x0913 +0xB1 = 0x0914 +0xB2 = 0x0911 +0xB3 = 0x0915 +0xB4 = 0x0916 +0xB5 = 0x0917 +0xB6 = 0x0918 +0xB7 = 0x0919 +0xB8 = 0x091A +0xB9 = 0x091B +0xBA = 0x091C +0xBB = 0x091D +0xBC = 0x091E +0xBD = 0x091F +0xBE = 0x0920 +0xBF = 0x0921 +0xC0 = 0x0922 +0xC1 = 0x0923 +0xC2 = 0x0924 +0xC3 = 0x0925 +0xC4 = 0x0926 +0xC5 = 0x0927 +0xC6 = 0x0928 +0xC7 = 0x0929 +0xC8 = 0x092A +0xC9 = 0x092B +0xCA = 0x092C +0xCB = 0x092D +0xCC = 0x092E +0xCD = 0x092F +0xCE = 0x095F +0xCF = 0x0930 +0xD0 = 0x0931 +0xD1 = 0x0932 +0xD2 = 0x0933 +0xD3 = 0x0934 +0xD4 = 0x0935 +0xD5 = 0x0936 +0xD6 = 0x0937 +0xD7 = 0x0938 +0xD8 = 0x0939 +0xD9 = 0x200E +0xDA = 0x093E +0xDB = 0x093F +#0xDB+0xE9 = 0x0962 +0xDC = 0x0940 +#0xDC+0xE9 = 0x0963 +0xDD = 0x0941 +0xDE = 0x0942 +0xDF = 0x0943 +#0xDF+0xE9 = 0x0944 +0xE0 = 0x0946 +0xE1 = 0x0947 +0xE2 = 0x0948 +0xE3 = 0x0945 +0xE4 = 0x094A +0xE5 = 0x094B +0xE6 = 0x094C +0xE7 = 0x0949 +0xE8 = 0x094D +#0xE8+0xE8 = 0x094D+0x200C +#0xE8+0xE9 = 0x094D+0x200D +0xE9 = 0x093C +0xEA = 0x0964 +#0xEA+0xE9 = 0x093D +0xF1 = 0x0966 +0xF2 = 0x0967 +0xF3 = 0x0968 +0xF4 = 0x0969 +0xF5 = 0x096A +0xF6 = 0x096B +0xF7 = 0x096C +0xF8 = 0x096D +0xF9 = 0x096E +0xFA = 0x096F +END_MAP diff --git a/share/i18n/csmapper/APPLE/DINGBATS%UCS.src b/share/i18n/csmapper/APPLE/DINGBATS%UCS.src new file mode 100644 index 0000000..bd61c44 --- /dev/null +++ b/share/i18n/csmapper/APPLE/DINGBATS%UCS.src @@ -0,0 +1,341 @@ +# $FreeBSD$ +# $NetBSD: DINGBATS%UCS.src,v 1.1 2006/03/13 19:45:36 tnozaki Exp $ + +TYPE ROWCOL +NAME DINGBATS/UCS +SRC_ZONE 0x00-0xFF +OOB_MODE ILSEQ +DST_ILSEQ 0xFFFE +DST_UNIT_BITS 16 + +BEGIN_MAP +# +# This mapping data is made from the mapping data provided by Unicode, Inc. +# Original notice: +# +#======================================================================= +# File name: DINGBATS.TXT +# +# Contents: Map (external version) from Mac OS Dingbats +# character set to Unicode 3.2 and later. +# +# Copyright: (c) 1994-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Update header comments. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Update mappings for 0x80-0x8D to use new +# Unicode 3.2 characters. Update URLs, notes. +# Matches internal utom<b2>. +# b02 1999-Sep-22 Update contact e-mail address. Matches +# internal utom<b1>, ufrm<b1>, and Text +# Encoding Converter version 1.5. +# n05 1998-Feb-05 Update to match internal utom<n4>, ufrm<n14>, +# and Text Encoding Converter version 1.3: +# Change all mappings to single corporate-zone +# Unicodes to either use standard Unicodes +# or standard Unicodes plus transcoding hints; +# see details below. Also update header +# comments to new format. +# n03 1995-Apr-15 First version (after fixing some typos). +# Matches internal ufrm<n4>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Dingbats code (in hex as 0xNN) +# Column #2 is the corresponding Unicode or Unicode sequence +# (in hex as 0xNNNN). +# Column #3 is a comment containing the Unicode name. +# In some cases an additional comment follows the Unicode name. +# +# The entries are in Mac OS Dingbats code order. +# +# Some of these mappings require the use of corporate characters. +# See the file "CORPCHAR.TXT" and notes below. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Dingbats character set uses the standard control characters +# at 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Dingbats: +# ------------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported directly in programming +# interfaces for QuickDraw Text, the Script Manager, and related +# Text Utilities. For other purposes it is supported via transcoding +# to and from Unicode. +# +# The Mac OS Dingbats encoding shares the script code smRoman +# (0) with the standard Mac OS Roman encoding. To determine if +# the Dingbats encoding is being used, you must check if the +# font name is "Zapf Dingbats". +# +# The layout of the Dingbats character set is identical to or +# a superset of the layout of the Adobe Zapf Dingbats encoding +# vector. +# +# The following code points are unused, and are not shown here: +# 0x8E-0xA0, 0xF0, 0xFF. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version b02 to version b03/c01: +# +# - The mappings for the following Mac OS Dingbats characters +# were changed to use standard Unicode characters added for +# Unicode 3.2: 0x80-0x8D. +# +# Changes from version n03 to version n05: +# +# - The mappings for the following Mac OS Dingbats characters +# were changed from single corporate-zone Unicode characters +# to standard Unicode characters: +# 0x80-0x81, 0x84-0x87, 0x8A-0x8D. +# +# - The mappings for the following Mac OS Dingbats characters +# were changed from single corporate-zone Unicode characters +# to combinations of a standard Unicode and a transcoding hint: +# 0x82-0x83, 0x88-0x89. +# +################## +0x00 - 0x20 = 0x00 - +0x21 = 0x2701 +0x22 = 0x2702 +0x23 = 0x2703 +0x24 = 0x2704 +0x25 = 0x260E +0x26 = 0x2706 +0x27 = 0x2707 +0x28 = 0x2708 +0x29 = 0x2709 +0x2A = 0x261B +0x2B = 0x261E +0x2C = 0x270C +0x2D = 0x270D +0x2E = 0x270E +0x2F = 0x270F +0x30 = 0x2710 +0x31 = 0x2711 +0x32 = 0x2712 +0x33 = 0x2713 +0x34 = 0x2714 +0x35 = 0x2715 +0x36 = 0x2716 +0x37 = 0x2717 +0x38 = 0x2718 +0x39 = 0x2719 +0x3A = 0x271A +0x3B = 0x271B +0x3C = 0x271C +0x3D = 0x271D +0x3E = 0x271E +0x3F = 0x271F +0x40 = 0x2720 +0x41 = 0x2721 +0x42 = 0x2722 +0x43 = 0x2723 +0x44 = 0x2724 +0x45 = 0x2725 +0x46 = 0x2726 +0x47 = 0x2727 +0x48 = 0x2605 +0x49 = 0x2729 +0x4A = 0x272A +0x4B = 0x272B +0x4C = 0x272C +0x4D = 0x272D +0x4E = 0x272E +0x4F = 0x272F +0x50 = 0x2730 +0x51 = 0x2731 +0x52 = 0x2732 +0x53 = 0x2733 +0x54 = 0x2734 +0x55 = 0x2735 +0x56 = 0x2736 +0x57 = 0x2737 +0x58 = 0x2738 +0x59 = 0x2739 +0x5A = 0x273A +0x5B = 0x273B +0x5C = 0x273C +0x5D = 0x273D +0x5E = 0x273E +0x5F = 0x273F +0x60 = 0x2740 +0x61 = 0x2741 +0x62 = 0x2742 +0x63 = 0x2743 +0x64 = 0x2744 +0x65 = 0x2745 +0x66 = 0x2746 +0x67 = 0x2747 +0x68 = 0x2748 +0x69 = 0x2749 +0x6A = 0x274A +0x6B = 0x274B +0x6C = 0x25CF +0x6D = 0x274D +0x6E = 0x25A0 +0x6F = 0x274F +0x70 = 0x2750 +0x71 = 0x2751 +0x72 = 0x2752 +0x73 = 0x25B2 +0x74 = 0x25BC +0x75 = 0x25C6 +0x76 = 0x2756 +0x77 = 0x25D7 +0x78 = 0x2758 +0x79 = 0x2759 +0x7A = 0x275A +0x7B = 0x275B +0x7C = 0x275C +0x7D = 0x275D +0x7E = 0x275E +0x80 = 0x2768 +0x81 = 0x2769 +0x82 = 0x276A +0x83 = 0x276B +0x84 = 0x276C +0x85 = 0x276D +0x86 = 0x276E +0x87 = 0x276F +0x88 = 0x2770 +0x89 = 0x2771 +0x8A = 0x2772 +0x8B = 0x2773 +0x8C = 0x2774 +0x8D = 0x2775 +0xA1 = 0x2761 +0xA2 = 0x2762 +0xA3 = 0x2763 +0xA4 = 0x2764 +0xA5 = 0x2765 +0xA6 = 0x2766 +0xA7 = 0x2767 +0xA8 = 0x2663 +0xA9 = 0x2666 +0xAA = 0x2665 +0xAB = 0x2660 +0xAC = 0x2460 +0xAD = 0x2461 +0xAE = 0x2462 +0xAF = 0x2463 +0xB0 = 0x2464 +0xB1 = 0x2465 +0xB2 = 0x2466 +0xB3 = 0x2467 +0xB4 = 0x2468 +0xB5 = 0x2469 +0xB6 = 0x2776 +0xB7 = 0x2777 +0xB8 = 0x2778 +0xB9 = 0x2779 +0xBA = 0x277A +0xBB = 0x277B +0xBC = 0x277C +0xBD = 0x277D +0xBE = 0x277E +0xBF = 0x277F +0xC0 = 0x2780 +0xC1 = 0x2781 +0xC2 = 0x2782 +0xC3 = 0x2783 +0xC4 = 0x2784 +0xC5 = 0x2785 +0xC6 = 0x2786 +0xC7 = 0x2787 +0xC8 = 0x2788 +0xC9 = 0x2789 +0xCA = 0x278A +0xCB = 0x278B +0xCC = 0x278C +0xCD = 0x278D +0xCE = 0x278E +0xCF = 0x278F +0xD0 = 0x2790 +0xD1 = 0x2791 +0xD2 = 0x2792 +0xD3 = 0x2793 +0xD4 = 0x2794 +0xD5 = 0x2192 +0xD6 = 0x2194 +0xD7 = 0x2195 +0xD8 = 0x2798 +0xD9 = 0x2799 +0xDA = 0x279A +0xDB = 0x279B +0xDC = 0x279C +0xDD = 0x279D +0xDE = 0x279E +0xDF = 0x279F +0xE0 = 0x27A0 +0xE1 = 0x27A1 +0xE2 = 0x27A2 +0xE3 = 0x27A3 +0xE4 = 0x27A4 +0xE5 = 0x27A5 +0xE6 = 0x27A6 +0xE7 = 0x27A7 +0xE8 = 0x27A8 +0xE9 = 0x27A9 +0xEA = 0x27AA +0xEB = 0x27AB +0xEC = 0x27AC +0xED = 0x27AD +0xEE = 0x27AE +0xEF = 0x27AF +0xF1 = 0x27B1 +0xF2 = 0x27B2 +0xF3 = 0x27B3 +0xF4 = 0x27B4 +0xF5 = 0x27B5 +0xF6 = 0x27B6 +0xF7 = 0x27B7 +0xF8 = 0x27B8 +0xF9 = 0x27B9 +0xFA = 0x27BA +0xFB = 0x27BB +0xFC = 0x27BC +0xFD = 0x27BD +0xFE = 0x27BE +END_MAP diff --git a/share/i18n/csmapper/APPLE/FARSI%UCS.src b/share/i18n/csmapper/APPLE/FARSI%UCS.src new file mode 100644 index 0000000..4fdccf9 --- /dev/null +++ b/share/i18n/csmapper/APPLE/FARSI%UCS.src @@ -0,0 +1,437 @@ +# $FreeBSD$ + +TYPE ROWCOL +NAME FARSI/UCS +SRC_ZONE 0x00-0xFF +OOB_MODE ILSEQ +DST_ILSEQ 0xFFFE +DST_UNIT_BITS 16 + +BEGIN_MAP +#======================================================================= +# File name: FARSI.TXT +# +# Contents: Map (external version) from Mac OS Farsi +# character set to Unicode 2.1 and later. +# +# Copyright: (c) 1997-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Update header comments. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Add comments about character display and +# direction overrides. Update URLs, notes. +# Matches internal utom<b3>. +# b02 1999-Sep-22 Update contact e-mail address. Matches +# internal utom<b1>, ufrm<b1>, and Text +# Encoding Converter version 1.5. +# n04 1998-Feb-05 Show required Unicode character +# directionality in a different way. Matches +# internal utom<n3>, ufrm<n9>, and Text +# Encoding Converter version 1.3. Update +# header comments; include information on +# loose mapping of digits, and changes to +# mapping for the TrueType variant. +# n01 1997-Jul-17 First version. Matches internal utom<n1>, +# ufrm<n2>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Farsi code (in hex as 0xNN) +# Column #2 is the corresponding Unicode (in hex as 0xNNNN), +# possibly preceded by a tag indicating required directionality +# (i.e. <LR>+0xNNNN or <RL>+0xNNNN). +# Column #3 is a comment containing the Unicode name. +# +# The entries are in Mac OS Farsi code order. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Farsi character set uses the standard control characters at +# 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Farsi: +# ---------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# 1. General +# +# The Mac OS Farsi character set is based on the Mac OS Arabic +# character set. The main difference is in the right-to-left digits +# 0xB0-0xB9: For Mac OS Arabic these correspond to right-left +# versions of the Unicode ARABIC-INDIC DIGITs 0660-0669; for +# Mac OS Farsi these correspond to right-left versions of the +# Unicode EXTENDED ARABIC-INDIC DIGITs 06F0-06F9. The other +# difference is in the nature of the font variants. +# +# For more information, see the comments in the mapping table for +# Mac OS Arabic. +# +# Mac OS Farsi characters 0xEB-0xF2 are non-spacing/combining marks. +# +# 2. Directional characters and roundtrip fidelity +# +# The Mac OS Arabic character set (on which Mac OS Farsi is based) +# was developed in 1986-1987. At that time the bidirectional line +# layout algorithm used in the Mac OS Arabic system was fairly simple; +# it used only a few direction classes (instead of the 19 now used in +# the Unicode bidirectional algorithm). In order to permit users to +# handle some tricky layout problems, certain punctuation and symbol +# characters were encoded twice, one with a left-right direction +# attribute and the other with a right-left direction attribute. This +# is the case in Mac OS Farsi too. +# +# For example, plus sign is encoded at 0x2B with a left-right +# attribute, and at 0xAB with a right-left attribute. However, there +# is only one PLUS SIGN character in Unicode. This leads to some +# interesting problems when mapping between Mac OS Farsi and Unicode; +# see below. +# +# A related problem is that even when a particular character is +# encoded only once in Mac OS Farsi, it may have a different +# direction attribute than the corresponding Unicode character. +# +# For example, the Mac OS Farsi character at 0x93 is HORIZONTAL +# ELLIPSIS with strong right-left direction. However, the Unicode +# character HORIZONTAL ELLIPSIS has direction class neutral. +# +# 3. Behavior of ASCII-range numbers in WorldScript +# +# Mac OS Farsi also has two sets of digit codes. + +# The digits at 0x30-0x39 may be displayed using either European +# digit forms or Persian digit forms, depending on context. If there +# is a "strong European" character such as a Latin letter on either +# side of a sequence consisting of digits 0x30-0x39 and possibly comma +# 0x2C or period 0x2E, then the characters will be displayed using +# European forms (This will happen even if there are neutral characters +# between the digits and the strong European character). Otherwise, the +# digits will be displayed using Persian forms, the comma will be +# displayed as Arabic thousands separator, and the period as Arabic +# decimal separator. In any case, 0x2C, 0x2E, and 0x30-0x39 are always +# left-right. +# +# The digits at 0xB0-0xB9 are always displayed using Persian digit +# shapes, and moreover, these digits always have strong right-left +# directionality. These are mainly intended for special layout +# purposes such as part numbers, etc. +# +# 4. Font variants +# +# The table in this file gives the Unicode mappings for the standard +# Mac OS Farsi encoding. This encoding is supported by the Tehran font +# (the system font for Farsi), and is the encoding supported by the +# text processing utilities. However, the other Farsi fonts actually +# implement a somewhat different encoding; this affects nine code +# points including 0xAA and 0xC0 (which are also affected by font +# variants in Mac OS Arabic). For these nine code points the standard +# Mac OS Farsi encoding has the following mappings: +# 0x8B -> 0x06BA ARABIC LETTER NOON GHUNNA (Urdu) +# 0xA4 -> <RL>+0x0024 DOLLAR SIGN, right-left +# 0xAA -> <RL>+0x002A ASTERISK, right-left +# 0xC0 -> <RL>+0x274A EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, +# right-left +# 0xF4 -> 0x0679 ARABIC LETTER TTEH (Urdu) +# 0xF7 -> 0x06A4 ARABIC LETTER VEH (for transliteration) +# 0xF9 -> 0x0688 ARABIC LETTER DDAL (Urdu) +# 0xFA -> 0x0691 ARABIC LETTER RREH (Urdu) +# 0xFF -> 0x06D2 ARABIC LETTER YEH BARREE (Urdu) +# +# The TrueType variant is used for the Farsi TrueType fonts: Ashfahan, +# Amir, Kamran, Mashad, NadeemFarsi. It differs from the standard +# variant in the following ways: +# 0x8B -> 0xF882 Arabic ligature "peace on him" (corporate char.) +# 0xA4 -> 0xFDFC RIAL SIGN (added in Unicode 3.2) +# 0xAA -> <RL>+0x00D7 MULTIPLICATION SIGN, right-left +# 0xC0 -> <RL>+0x002A ASTERISK, right-left +# 0xF4 -> <RL>+0x00B0 DEGREE SIGN, right-left +# 0xF7 -> 0xFDFA ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM +# 0xF9 -> <RL>+0x25CF BLACK CIRCLE, right-left +# 0xFA -> <RL>+0x25A0 BLACK SQUARE, right-left +# 0xFF -> <RL>+0x25B2 BLACK UP-POINTING TRIANGLE, right-left +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# 1. Matching the direction of Mac OS Farsi characters +# +# When Mac OS Farsi encodes a character twice but with different +# direction attributes for the two code points - as in the case of +# plus sign mentioned above - we need a way to map both Mac OS Farsi +# code points to Unicode and back again without loss of information. +# With the plus sign, for example, mapping one of the Mac OS Farsi +# characters to a code in the Unicode corporate use zone is +# undesirable, since both of the plus sign characters are likely to +# be used in text that is interchanged. +# +# The problem is solved with the use of direction override characters +# and direction-dependent mappings. When mapping from Mac OS Farsi +# to Unicode, we use direction overrides as necessary to force the +# direction of the resulting Unicode characters. +# +# The required direction is indicated by a direction tag in the +# mappings. A tag of <LR> means the corresponding Unicode character +# must have a strong left-right context, and a tag of <RL> indicates +# a right-left context. +# +# For example, the mapping of 0x2B is given as <LR>+0x002B; the +# mapping of 0xAB is given as <RL>+0x002B. If we map an isolated +# instance of 0x2B to Unicode, it should be mapped as follows (LRO +# indicates LEFT-RIGHT OVERRIDE, PDF indicates POP DIRECTION +# FORMATTING): +# +# 0x2B -> 0x202D (LRO) + 0x002B (PLUS SIGN) + 0x202C (PDF) +# +# When mapping several characters in a row that require direction +# forcing, the overrides need only be used at the beginning and end. +# For example: +# +# 0x24 0x20 0x28 0x29 -> 0x202D 0x0024 0x0020 0x0028 0x0029 0x202C +# +# If neutral characters that require direction forcing are already +# between strong-direction characters with matching directionality, +# then direction overrides need not be used. Direction overrides are +# always needed to map the right-left digits at 0xB0-0xB9. +# +# When mapping from Unicode to Mac OS Farsi, the Unicode +# bidirectional algorithm should be used to determine resolved +# direction of the Unicode characters. The mapping from Unicode to +# Mac OS Farsi can then be disambiguated by the use of the resolved +# direction: +# +# Unicode 0x002B -> Mac OS Farsi 0x2B (if L) or 0xAB (if R) +# +# However, this also means the direction override characters should +# be discarded when mapping from Unicode to Mac OS Farsi (after +# they have been used to determine resolved direction), since the +# direction override information is carried by the code point itself. +# +# Even when direction overrides are not needed for roundtrip +# fidelity, they are sometimes used when mapping Mac OS Farsi +# characters to Unicode in order to achieve similar text layout with +# the resulting Unicode text. For example, the single Mac OS Farsi +# ellipsis character has direction class right-left,and there is no +# left-right version. However, the Unicode HORIZONTAL ELLIPSIS +# character has direction class neutral (which means it may end up +# with a resolved direction of left-right if surrounded by left-right +# characters). When mapping the Mac OS Farsi ellipsis to Unicode, it +# is surrounded with a direction override to help preserve proper +# text layout. The resolved direction is not needed or used when +# mapping the Unicode HORIZONTAL ELLIPSIS back to Mac OS Farsi. +# +# 2. Mapping the Mac OS Farsi digits +# +# The main table below contains mappings that should be used when +# strict round-trip fidelity is required. However, for numeric +# values, the mappings in that table will produce Unicode characters +# that may appear different than the Mac OS Farsi text displayed on +# a Mac OS system using WorldScript. This is because WorldScript +# uses context-dependent display for the 0x30-0x39 digits. +# +# If roundtrip fidelity is not required, then the following +# alternate mappings should be used when a sequence of 0x30-0x39 +# digits - possibly including 0x2C and 0x2E - occurs in an Arabic +# context (that is, when the first "strong" character on either side +# of the digit sequence is Arabic, or there is no strong character): +# +# 0x2C 0x066C # ARABIC THOUSANDS SEPARATOR +# 0x2E 0x066B # ARABIC DECIMAL SEPARATOR +# 0x30 0x06F0 # EXTENDED ARABIC-INDIC DIGIT ZERO +# 0x31 0x06F1 # EXTENDED ARABIC-INDIC DIGIT ONE +# 0x32 0x06F2 # EXTENDED ARABIC-INDIC DIGIT TWO +# 0x33 0x06F3 # EXTENDED ARABIC-INDIC DIGIT THREE +# 0x34 0x06F4 # EXTENDED ARABIC-INDIC DIGIT FOUR +# 0x35 0x06F5 # EXTENDED ARABIC-INDIC DIGIT FIVE +# 0x36 0x06F6 # EXTENDED ARABIC-INDIC DIGIT SIX +# 0x37 0x06F7 # EXTENDED ARABIC-INDIC DIGIT SEVEN +# 0x38 0x06F8 # EXTENDED ARABIC-INDIC DIGIT EIGHT +# 0x39 0x06F9 # EXTENDED ARABIC-INDIC DIGIT NINE +# +# 3. Use of corporate-zone Unicodes (mapping the TrueType variant) +# +# The following corporate zone Unicode character is used in this +# mapping: +# +# 0xF882 Arabic ligature "peace on him" +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version b02 to version b03/c01: +# +# - Update mapping of 0xA4 in TrueType variant to use new Unicode +# character U+FDFC RIAL SIGN addded for Unicode 3.2 +# +# Changes from version n01 to version n04: +# +# - Change mapping of 0xA4 in TrueType variant (just described in +# header comment) from single corporate character to use +# grouping hint +# +################## + +0x00 - 0x7F = 0x0000 - +0x80 = 0x00C4 +0x81 = 0x00A0 +0x82 = 0x00C7 +0x83 = 0x00C9 +0x84 = 0x00D1 +0x85 = 0x00D6 +0x86 = 0x00DC +0x87 = 0x00E1 +0x88 = 0x00E0 +0x89 = 0x00E2 +0x8A = 0x00E4 +0x8B = 0x06BA +0x8C = 0x00AB +0x8D = 0x00E7 +0x8E = 0x00E9 +0x8F = 0x00E8 +0x90 = 0x00EA +0x91 = 0x00EB +0x92 = 0x00ED +0x93 = 0x2026 +0x94 = 0x00EE +0x95 = 0x00EF +0x96 = 0x00F1 +0x97 = 0x00F3 +0x98 = 0x00BB +0x99 = 0x00F4 +0x9A = 0x00F6 +0x9B = 0x00F7 +0x9C = 0x00FA +0x9D = 0x00F9 +0x9E = 0x00FB +0x9F = 0x00FC +0xA0 = 0x0020 +0xA1 = 0x0021 +0xA2 = 0x0022 +0xA3 = 0x0023 +0xA4 = 0x0024 +0xA5 = 0x066A +0xA6 = 0x0026 +0xA7 = 0x0027 +0xA8 = 0x0028 +0xA9 = 0x0029 +0xAA = 0x002A +0xAB = 0x002B +0xAC = 0x060C +0xAD = 0x002D +0xAE = 0x002E +0xAF = 0x002F +0xB0 = 0x06F0 +0xB1 = 0x06F1 +0xB2 = 0x06F2 +0xB3 = 0x06F3 +0xB4 = 0x06F4 +0xB5 = 0x06F5 +0xB6 = 0x06F6 +0xB7 = 0x06F7 +0xB8 = 0x06F8 +0xB9 = 0x06F9 +0xBA = 0x003A +0xBB = 0x061B +0xBC = 0x003C +0xBD = 0x003D +0xBE = 0x003E +0xBF = 0x061F +0xC0 = 0x274A +0xC1 = 0x0621 +0xC2 = 0x0622 +0xC3 = 0x0623 +0xC4 = 0x0624 +0xC5 = 0x0625 +0xC6 = 0x0626 +0xC7 = 0x0627 +0xC8 = 0x0628 +0xC9 = 0x0629 +0xCA = 0x062A +0xCB = 0x062B +0xCC = 0x062C +0xCD = 0x062D +0xCE = 0x062E +0xCF = 0x062F +0xD0 = 0x0630 +0xD1 = 0x0631 +0xD2 = 0x0632 +0xD3 = 0x0633 +0xD4 = 0x0634 +0xD5 = 0x0635 +0xD6 = 0x0636 +0xD7 = 0x0637 +0xD8 = 0x0638 +0xD9 = 0x0639 +0xDA = 0x063A +0xDB = 0x005B +0xDC = 0x005C +0xDD = 0x005D +0xDE = 0x005E +0xDF = 0x005F +0xE0 = 0x0640 +0xE1 = 0x0641 +0xE2 = 0x0642 +0xE3 = 0x0643 +0xE4 = 0x0644 +0xE5 = 0x0645 +0xE6 = 0x0646 +0xE7 = 0x0647 +0xE8 = 0x0648 +0xE9 = 0x0649 +0xEA = 0x064A +0xEB = 0x064B +0xEC = 0x064C +0xED = 0x064D +0xEE = 0x064E +0xEF = 0x064F +0xF0 = 0x0650 +0xF1 = 0x0651 +0xF2 = 0x0652 +0xF3 = 0x067E +0xF4 = 0x0679 +0xF5 = 0x0686 +0xF6 = 0x06D5 +0xF7 = 0x06A4 +0xF8 = 0x06AF +0xF9 = 0x0688 +0xFA = 0x0691 +0xFB = 0x007B +0xFC = 0x007C +0xFD = 0x007D +0xFE = 0x0698 +0xFF = 0x06D2 +END_MAP diff --git a/share/i18n/csmapper/APPLE/GAELIC%UCS.src b/share/i18n/csmapper/APPLE/GAELIC%UCS.src new file mode 100644 index 0000000..f008aa6 --- /dev/null +++ b/share/i18n/csmapper/APPLE/GAELIC%UCS.src @@ -0,0 +1,257 @@ +# $FreeBSD$ +# $NetBSD: GAELIC%UCS.src,v 1.1 2006/03/13 19:45:36 tnozaki Exp $ + +TYPE ROWCOL +NAME GAELIC/UCS +SRC_ZONE 0x00-0xFF +OOB_MODE ILSEQ +DST_ILSEQ 0xFFFE +DST_UNIT_BITS 16 + +BEGIN_MAP +# +# This mapping data is made from the mapping data provided by Unicode, Inc. +# Original notice: +# +#======================================================================= +# File name: GAELIC.TXT +# +# Contents: Map (external version) from Mac OS Celtic +# character set to Unicode 3.0 and later +# +# Contacts: charsets@apple.com, everson@evertype.com +# +# Changes: +# +# c01 2005-Apr-01 First posted version. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Gaelic code (in hex as 0xNN) +# Column #2 is the corresponding Unicode (in hex as 0xNNNN) +# Column #3 is a comment containing the Unicode name +# +# The entries are in Mac OS Gaelic code order. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Gaelic character set uses the standard control characters +# at 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Gaelic (partly from Michael Everson): +# ----------------------------------------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# This character set was developed by Michael Everson of Everson +# Typography (everson@evertype.com) and was used for fonts in his +# Celtic Utilities and CeltScript font packages for the Mac, as well +# as some fonts included with the Irish localizations of Mac OS 6.0.8 +# and 7.1. Note that while Apple authorized this Irish localization, +# it was not a system which shipped with Apple hardware, and was not +# otherwise supported by Apple. Fonts conforming to the Mac OS Gaelic +# character set are available from Everson Typography +# (http://www.evertype.com/celtscript/). Information about the use of +# this character set is available at +# http://www.evertype.com/celtscript/celtcode.html. +# +# The Mac OS Gaelic encoding shares the script code smRoman (0) with +# the standard Mac OS Roman encoding. To determine if the Gaelic +# encoding is being used in Mac OS 7-9, you should also check if the +# system region code is 81. Otherwise, you can check for particular +# fonts that conform to this encoding (since in practice Gaelic fonts +# are used with the ordinary US or UK system versions). +# +# This character set is a variant of standard Mac OS Roman, adding +# capital and small y with acute, grave, and circumflex; capital and +# small w with acute, grave, circumflex and diaeresis; capital and +# small b, c, d, f, g, m, p, s, t with dot above; tironian et; small +# long r, small long s, and small long s with dot above. It has 36 +# code point differences from standard Mac OS Roman. +# +# Before Mac OS 8.5, code point 0xDB was CURRENCY SIGN, and was +# mapped to U+00A4. In Mac OS 8.5 and later versions, code point +# 0xDB is changed to EURO SIGN and maps to U+20AC; the standard +# Apple fonts are updated for Mac OS 8.5 to reflect this. There is +# a "currency sign" variant of the Latin 8 Extended encoding that still +# maps 0xDB to U+00A4; this can be used for older fonts. +# Note: U+20AC is new with Unicode 2.1; for earlier Unicode +# versions, Latin 8 Extended 0xDB may be mapped to private-use +# character U+F8A0. +# +# Before Unicode 3.0, code point 0xE4 was PER MILLE SIGN, and was +# mapped to U+2030. Since August 1998, code point 0xE4 is changed +# to TIRONIAN SIGN ET and maps to U+204A. There is a "per mille +# sign" variant of the Mac OS Gaelic encoding that still +# maps 0xE4 to U+2030; this can be used for older fonts. +# Note: U+204A is new with Unicode 3.0; for earlier Unicode +# versions, Mac OS Gaelic was unified with AMPERSAND. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +################## +0x00 - 0x7E = 0x00 - +0x80 = 0x00C4 +0x81 = 0x00C5 +0x82 = 0x00C7 +0x83 = 0x00C9 +0x84 = 0x00D1 +0x85 = 0x00D6 +0x86 = 0x00DC +0x87 = 0x00E1 +0x88 = 0x00E0 +0x89 = 0x00E2 +0x8A = 0x00E4 +0x8B = 0x00E3 +0x8C = 0x00E5 +0x8D = 0x00E7 +0x8E = 0x00E9 +0x8F = 0x00E8 +0x90 = 0x00EA +0x91 = 0x00EB +0x92 = 0x00ED +0x93 = 0x00EC +0x94 = 0x00EE +0x95 = 0x00EF +0x96 = 0x00F1 +0x97 = 0x00F3 +0x98 = 0x00F2 +0x99 = 0x00F4 +0x9A = 0x00F6 +0x9B = 0x00F5 +0x9C = 0x00FA +0x9D = 0x00F9 +0x9E = 0x00FB +0x9F = 0x00FC +0xA0 = 0x2020 +0xA1 = 0x00B0 +0xA2 = 0x00A2 +0xA3 = 0x00A3 +0xA4 = 0x00A7 +0xA5 = 0x2022 +0xA6 = 0x00B6 +0xA7 = 0x00DF +0xA8 = 0x00AE +0xA9 = 0x00A9 +0xAA = 0x2122 +0xAB = 0x00B4 +0xAC = 0x00A8 +0xAD = 0x2260 +0xAE = 0x00C6 +0xAF = 0x00D8 +0xB0 = 0x1E02 +0xB1 = 0x00B1 +0xB2 = 0x2264 +0xB3 = 0x2265 +0xB4 = 0x1E03 +0xB5 = 0x010A +0xB6 = 0x010B +0xB7 = 0x1E0A +0xB8 = 0x1E0B +0xB9 = 0x1E1E +0xBA = 0x1E1F +0xBB = 0x0120 +0xBC = 0x0121 +0xBD = 0x1E40 +0xBE = 0x00E6 +0xBF = 0x00F8 +0xC0 = 0x1E41 +0xC1 = 0x1E56 +0xC2 = 0x1E57 +0xC3 = 0x027C +0xC4 = 0x0192 +0xC5 = 0x017F +0xC6 = 0x1E60 +0xC7 = 0x00AB +0xC8 = 0x00BB +0xC9 = 0x2026 +0xCA = 0x00A0 +0xCB = 0x00C0 +0xCC = 0x00C3 +0xCD = 0x00D5 +0xCE = 0x0152 +0xCF = 0x0153 +0xD0 = 0x2013 +0xD1 = 0x2014 +0xD2 = 0x201C +0xD3 = 0x201D +0xD4 = 0x2018 +0xD5 = 0x2019 +0xD6 = 0x1E61 +0xD7 = 0x1E9B +0xD8 = 0x00FF +0xD9 = 0x0178 +0xDA = 0x1E6A +0xDB = 0x20AC +0xDC = 0x2039 +0xDD = 0x203A +0xDE = 0x0176 +0xDF = 0x0177 +0xE0 = 0x1E6B +0xE1 = 0x00B7 +0xE2 = 0x1EF2 +0xE3 = 0x1EF3 +0xE4 = 0x204A +0xE5 = 0x00C2 +0xE6 = 0x00CA +0xE7 = 0x00C1 +0xE8 = 0x00CB +0xE9 = 0x00C8 +0xEA = 0x00CD +0xEB = 0x00CE +0xEC = 0x00CF +0xED = 0x00CC +0xEE = 0x00D3 +0xEF = 0x00D4 +0xF0 = 0x2663 +0xF1 = 0x00D2 +0xF2 = 0x00DA +0xF3 = 0x00DB +0xF4 = 0x00D9 +0xF5 = 0x0131 +0xF6 = 0x00DD +0xF7 = 0x00FD +0xF8 = 0x0174 +0xF9 = 0x0175 +0xFA = 0x1E84 +0xFB = 0x1E85 +0xFC = 0x1E80 +0xFD = 0x1E81 +0xFE = 0x1E82 +0xFF = 0x1E83 +END_MAP diff --git a/share/i18n/csmapper/APPLE/GREEK%UCS.src b/share/i18n/csmapper/APPLE/GREEK%UCS.src new file mode 100644 index 0000000..4e53947 --- /dev/null +++ b/share/i18n/csmapper/APPLE/GREEK%UCS.src @@ -0,0 +1,275 @@ +# $FreeBSD$ +# $NetBSD: GREEK%UCS.src,v 1.1 2006/03/13 19:45:36 tnozaki Exp $ + +TYPE ROWCOL +NAME GREEK/UCS +SRC_ZONE 0x00-0xFF +OOB_MODE ILSEQ +DST_ILSEQ 0xFFFE +DST_UNIT_BITS 16 + +BEGIN_MAP +# +# This mapping data is made from the mapping data provided by Unicode, Inc. +# Original notice: +# +#======================================================================= +# File name: GREEK.TXT +# +# Contents: Map (external version) from Mac OS Greek +# character set to Unicode 2.1 and later. +# +# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Update header comments. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Update to match changes in Mac OS Greek +# encoding for Mac OS 9.2.2 and later. +# Update URLs, notes. Matches internal +# utom<b3>. +# b02 1999-Sep-22 Update contact e-mail address. Matches +# internal utom<b1>, ufrm<b1>, and Text +# Encoding Converter version 1.5. +# n06 1998-Feb-05 Update to match internal utom<n4>, ufrm<n17>, +# and Text Encoding Converter versions 1.3: +# Change mapping for 0xAF from U+0387 to its +# canonical decomposition, U+00B7. Also +# update header comments to new format. +# n04 1995-Apr-15 First version (after fixing some typos). +# Matches internal ufrm<n7>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Greek code (in hex as 0xNN) +# Column #2 is the corresponding Unicode (in hex as 0xNNNN) +# Column #3 is a comment containing the Unicode name +# +# The entries are in Mac OS Greek code order. +# +# One of these mappings requires the use of a corporate character. +# See the file "CORPCHAR.TXT" and notes below. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Greek character set uses the standard control characters at +# 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Greek: +# ---------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# Although a Mac OS script code is defined for Greek (smGreek = 6), +# the Greek localized system does not currently use it (the font +# family IDs are in the Mac OS Roman range). To determine if the +# Greek encoding is being used when the script code is smRoman (0), +# you must check if the system region code is 20, verGreece. +# +# The Mac OS Greek encoding is a superset of the repertoire of +# ISO 8859-7 (although characters are not at the same code points), +# except that LEFT & RIGHT SINGLE QUOTATION MARK replace the +# MODIFIER LETTER REVERSED COMMA & APOSTROPHE (spacing versions of +# Greek rough & smooth breathing marks) that are in ISO 8859-7. +# The added characters in Mac OS Greek include more punctuation and +# symbols and several accented Latin letters. +# +# Before Mac OS 9.2.2, code point 0x9C was SOFT HYPHEN (U+00AD), and +# code point 0xFF was undefined. In Mac OS 9.2.2 and later versions, +# SOFT HYPHEN was moved to 0xFF, and code point 0x9C was changed to be +# EURO SIGN (U+20AC); the standard Apple fonts are updated for Mac OS +# 9.2.2 to reflect this. There is a "no Euro sign" variant of the Mac +# OS Greek encoding that uses the older mapping; this can be used for +# older fonts. +# +# This "no Euro sign" variant of Mac OS Greek was the character set +# used by Mac OS Greek systems before 9.2.2 except for system 6.0.7, +# which used a variant character set but was quickly replaced with +# Greek system 6.0.7.1 using the no Euro sign" character set +# documented here. Greek system 4.1 used a variant Greek set that had +# ISO 8859-7 in 0xA0-0xFF (with some holes filled in with DTP +# characters), and Mac OS Roman accented Roman letters in 0x80-0x9F. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version b02 to version b03/c01: +# +# - The Mac OS Greek encoding changed for Mac OS 9.2.2 and later +# as follows: +# 0x9C, changed from 0x00AD SOFT HYPHEN to 0x20AC EURO SIGN +# 0xFF, changed from undefined to 0x00AD SOFT HYPHEN +# +# Changes from version n04 to version n06: +# +# - Change mapping of 0xAF from U+0387 to its canonical +# decomposition, U+00B7. +# +################## +0x00 - 0x7E = 0x00 - +0x80 = 0x00C4 +0x81 = 0x00B9 +0x82 = 0x00B2 +0x83 = 0x00C9 +0x84 = 0x00B3 +0x85 = 0x00D6 +0x86 = 0x00DC +0x87 = 0x0385 +0x88 = 0x00E0 +0x89 = 0x00E2 +0x8A = 0x00E4 +0x8B = 0x0384 +0x8C = 0x00A8 +0x8D = 0x00E7 +0x8E = 0x00E9 +0x8F = 0x00E8 +0x90 = 0x00EA +0x91 = 0x00EB +0x92 = 0x00A3 +0x93 = 0x2122 +0x94 = 0x00EE +0x95 = 0x00EF +0x96 = 0x2022 +0x97 = 0x00BD +0x98 = 0x2030 +0x99 = 0x00F4 +0x9A = 0x00F6 +0x9B = 0x00A6 +0x9C = 0x20AC +0x9D = 0x00F9 +0x9E = 0x00FB +0x9F = 0x00FC +0xA0 = 0x2020 +0xA1 = 0x0393 +0xA2 = 0x0394 +0xA3 = 0x0398 +0xA4 = 0x039B +0xA5 = 0x039E +0xA6 = 0x03A0 +0xA7 = 0x00DF +0xA8 = 0x00AE +0xA9 = 0x00A9 +0xAA = 0x03A3 +0xAB = 0x03AA +0xAC = 0x00A7 +0xAD = 0x2260 +0xAE = 0x00B0 +0xAF = 0x00B7 +0xB0 = 0x0391 +0xB1 = 0x00B1 +0xB2 = 0x2264 +0xB3 = 0x2265 +0xB4 = 0x00A5 +0xB5 = 0x0392 +0xB6 = 0x0395 +0xB7 = 0x0396 +0xB8 = 0x0397 +0xB9 = 0x0399 +0xBA = 0x039A +0xBB = 0x039C +0xBC = 0x03A6 +0xBD = 0x03AB +0xBE = 0x03A8 +0xBF = 0x03A9 +0xC0 = 0x03AC +0xC1 = 0x039D +0xC2 = 0x00AC +0xC3 = 0x039F +0xC4 = 0x03A1 +0xC5 = 0x2248 +0xC6 = 0x03A4 +0xC7 = 0x00AB +0xC8 = 0x00BB +0xC9 = 0x2026 +0xCA = 0x00A0 +0xCB = 0x03A5 +0xCC = 0x03A7 +0xCD = 0x0386 +0xCE = 0x0388 +0xCF = 0x0153 +0xD0 = 0x2013 +0xD1 = 0x2015 +0xD2 = 0x201C +0xD3 = 0x201D +0xD4 = 0x2018 +0xD5 = 0x2019 +0xD6 = 0x00F7 +0xD7 = 0x0389 +0xD8 = 0x038A +0xD9 = 0x038C +0xDA = 0x038E +0xDB = 0x03AD +0xDC = 0x03AE +0xDD = 0x03AF +0xDE = 0x03CC +0xDF = 0x038F +0xE0 = 0x03CD +0xE1 = 0x03B1 +0xE2 = 0x03B2 +0xE3 = 0x03C8 +0xE4 = 0x03B4 +0xE5 = 0x03B5 +0xE6 = 0x03C6 +0xE7 = 0x03B3 +0xE8 = 0x03B7 +0xE9 = 0x03B9 +0xEA = 0x03BE +0xEB = 0x03BA +0xEC = 0x03BB +0xED = 0x03BC +0xEE = 0x03BD +0xEF = 0x03BF +0xF0 = 0x03C0 +0xF1 = 0x03CE +0xF2 = 0x03C1 +0xF3 = 0x03C3 +0xF4 = 0x03C4 +0xF5 = 0x03B8 +0xF6 = 0x03C9 +0xF7 = 0x03C2 +0xF8 = 0x03C7 +0xF9 = 0x03C5 +0xFA = 0x03B6 +0xFB = 0x03CA +0xFC = 0x03CB +0xFD = 0x0390 +0xFE = 0x03B0 +0xFF = 0x00AD +END_MAP diff --git a/share/i18n/csmapper/APPLE/GUJARATI%UCS.src b/share/i18n/csmapper/APPLE/GUJARATI%UCS.src new file mode 100644 index 0000000..90f36dc --- /dev/null +++ b/share/i18n/csmapper/APPLE/GUJARATI%UCS.src @@ -0,0 +1,279 @@ +# $FreeBSD$ + +TYPE ROWCOL +NAME GUJARATI/UCS +SRC_ZONE 0x00-0xFA +OOB_MODE ILSEQ +DST_ILSEQ 0xFFFE +DST_UNIT_BITS 16 +#======================================================================= +# File name: GUJARATI.TXT +# +# Contents: Map (external version) from Mac OS Gujarati +# encoding to Unicode 2.1 and later. +# +# Copyright: (c) 1997-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Update header comments. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Update URLs. Matches internal utom<b1>. +# b02 1999-Sep-22 Update contact e-mail address. Matches +# internal utom<b1>, ufrm<b1>, and Text +# Encoding Converter version 1.5. +# n02 1998-Feb-05 First version; matches internal utom<n4>, +# ufrm<n5>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Gujarati code or code sequence +# (in hex as 0xNN or 0xNN+0xNN) +# Column #2 is the corresponding Unicode or Unicode sequence +# (in hex as 0xNNNN or 0xNNNN+0xNNNN). +# Column #3 is a comment containing the Unicode name or sequence +# of names. In some cases an additional comment follows the +# Unicode name(s). +# +# The entries are in two sections. The first section is for pairs of +# Mac OS Gujarati code points that must be mapped in a special way. +# The second section maps individual code points. +# +# Within each section, the entries are in Mac OS Gujarati code order. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Gujarati character set uses the standard control characters +# at 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Gujarati: +# ------------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# Mac OS Gujarati is based on IS 13194:1991 (ISCII-91), with the +# addition of several punctuation and symbol characters. However, +# Mac OS Gujarati does not support the ATR (attribute) mechanism of +# ISCII-91. +# +# 1. ISCII-91 features in Mac OS Gujarati include: +# +# a) Overloading of nukta +# +# In addition to using the nukta (0xE9) like a combining dot below, +# nukta is overloaded to function as a general character modifier. +# In this role, certain code points followed by 0xE9 are treated as +# a two-byte code point representing a character which may be +# rather different than the characters represented by either of +# the code points alone. For example, the character GUJARATI OM +# (U+0AD0) is represented in ISCII-91 as candrabindu + nukta. +# +# b) Explicit halant and soft halant +# +# A double halant (0xE8 + 0xE8) constitutes an "explicit halant", +# which will always appear as a halant instead of causing formation +# of a ligature or half-form consonant. +# +# Halant followed by nukta (0xE8 + 0xE9) constitutes a "soft +# halant", which prevents formation of a ligature and instead +# retains the half-form of the first consonant. +# +# c) Invisible consonant +# +# The byte 0xD9 (called INV in ISCII-91) is an invisible consonant: +# It behaves like a consonant but has no visible appearance. It is +# intended to be used (often in combination with halant) to display +# dependent forms in isolation, such as the RA forms or consonant +# half-forms. +# +# d) Extensions for Vedic, etc. +# +# The byte 0xF0 (called EXT in ISCII-91) followed by any byte in +# the range 0xA1-0xEE constitutes a two-byte code point which can +# be used to represent additional characters for Vedic (or other +# extensions); 0xF0 followed by any other byte value constitutes +# malformed text. Mac OS Gujarati supports this mechanism, but +# does not currently map any of these two-byte code points to +# anything. +# +# 2. Mac OS Gujarati additions +# +# Mac OS Gujarati adds characters using the code points +# 0x80-0x8A and 0x90. +# +# 3. Unused code points +# +# The following code points are currently unused, and are not shown +# here: 0x8B-0x8F, 0x91-0xA0, 0xAB, 0xAF, 0xC7, 0xCE, 0xD0, 0xD3, +# 0xE0, 0xE4, 0xEB-0xEF, 0xFB-0xFF. In addition, 0xF0 is not shown +# here, but it has a special function as described above. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# 1. Mapping the byte pairs +# +# If one of the following byte values is encountered when mapping +# Mac OS Gujarati text - xA1, xAA, xDF, or 0xE8 - then the next +# byte (if there is one) should be examined. If the next byte is +# 0xE9 - or also 0xE8, if the first byte was 0xE8 - then the byte +# pair should be mapped using the first section of the mapping +# table below. Otherwise, each byte should be mapped using the +# second section of the mapping table below. +# +# - The Unicode Standard, Version 2.0, specifies how explicit +# halant and soft halant should be represented in Unicode; +# these mappings are used below. +# +# If the byte value 0xF0 is encountered when mapping Mac OS +# Gujarati text, then the next byte should be examined. If there +# is no next byte (e.g. 0xF0 at end of buffer), the mapping +# process should indicate incomplete character. If there is a next +# byte but it is not in the range 0xA1-0xEE, the mapping process +# should indicate malformed text. Otherwise, the mapping process +# should treat the byte pair as a valid two-byte code point with no +# mapping (e.g. map it to QUESTION MARK, REPLACEMENT CHARACTER, +# etc.). +# +# 2. Mapping the invisible consonant +# +# It has been suggested that INV in ISCII-91 should map to ZERO +# WIDTH NON-JOINER in Unicode. However, this causes problems with +# roundtrip fidelity: The ISCII-91 sequences 0xE8+0xE8 and 0xE8+0xD9 +# would map to the same sequence of Unicode characters. We have +# instead mapped INV to LEFT-TO-RIGHT MARK, which avoids these +# problems. +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +################## +BEGIN_MAP +0x00 - 0x7F = 0x0000 - +0x80 = 0x00D7 +0x81 = 0x2212 +0x82 = 0x2013 +0x83 = 0x2014 +0x84 = 0x2018 +0x85 = 0x2019 +0x86 = 0x2026 +0x87 = 0x2022 +0x88 = 0x00A9 +0x89 = 0x00AE +0x8A = 0x2122 +0x90 = 0x0965 +0xA1 = 0x0A81 +#0xA1+0xE9 = 0x0AD0 +0xA2 = 0x0A82 +0xA3 = 0x0A83 +0xA4 = 0x0A85 +0xA5 = 0x0A86 +0xA6 = 0x0A87 +0xA7 = 0x0A88 +0xA8 = 0x0A89 +0xA9 = 0x0A8A +0xAA = 0x0A8B +#0xAA+0xE9 = 0x0AE0 +0xAC = 0x0A8F +0xAD = 0x0A90 +0xAE = 0x0A8D +0xB0 = 0x0A93 +0xB1 = 0x0A94 +0xB2 = 0x0A91 +0xB3 = 0x0A95 +0xB4 = 0x0A96 +0xB5 = 0x0A97 +0xB6 = 0x0A98 +0xB7 = 0x0A99 +0xB8 = 0x0A9A +0xB9 = 0x0A9B +0xBA = 0x0A9C +0xBB = 0x0A9D +0xBC = 0x0A9E +0xBD = 0x0A9F +0xBE = 0x0AA0 +0xBF = 0x0AA1 +0xC0 = 0x0AA2 +0xC1 = 0x0AA3 +0xC2 = 0x0AA4 +0xC3 = 0x0AA5 +0xC4 = 0x0AA6 +0xC5 = 0x0AA7 +0xC6 = 0x0AA8 +0xC8 = 0x0AAA +0xC9 = 0x0AAB +0xCA = 0x0AAC +0xCB = 0x0AAD +0xCC = 0x0AAE +0xCD = 0x0AAF +0xCF = 0x0AB0 +0xD1 = 0x0AB2 +0xD2 = 0x0AB3 +0xD4 = 0x0AB5 +0xD5 = 0x0AB6 +0xD6 = 0x0AB7 +0xD7 = 0x0AB8 +0xD8 = 0x0AB9 +0xD9 = 0x200E +0xDA = 0x0ABE +0xDB = 0x0ABF +0xDC = 0x0AC0 +0xDD = 0x0AC1 +0xDE = 0x0AC2 +0xDF = 0x0AC3 +#0xDF+0xE9 = 0x0AC4 +0xE1 = 0x0AC7 +0xE2 = 0x0AC8 +0xE3 = 0x0AC5 +0xE5 = 0x0ACB +0xE6 = 0x0ACC +0xE7 = 0x0AC9 +0xE8 = 0x0ACD +#0xE8+0xE8 = 0x0ACD+0x200C +#0xE8+0xE9 = 0x0ACD+0x200D +0xE9 = 0x0ABC +0xEA = 0x0964 +0xF1 = 0x0AE6 +0xF2 = 0x0AE7 +0xF3 = 0x0AE8 +0xF4 = 0x0AE9 +0xF5 = 0x0AEA +0xF6 = 0x0AEB +0xF7 = 0x0AEC +0xF8 = 0x0AED +0xF9 = 0x0AEE +0xFA = 0x0AEF +END_MAP diff --git a/share/i18n/csmapper/APPLE/GURMUKHI%UCS.src b/share/i18n/csmapper/APPLE/GURMUKHI%UCS.src new file mode 100644 index 0000000..41ed461 --- /dev/null +++ b/share/i18n/csmapper/APPLE/GURMUKHI%UCS.src @@ -0,0 +1,333 @@ +# $FreeBSD$ + +TYPE ROWCOL +NAME GURMUKHI/UCS +SRC_ZONE 0x00-0xFA +OOB_MODE ILSEQ +DST_ILSEQ 0xFFFE +DST_UNIT_BITS 16 +#======================================================================= +# File name: GURMUKHI.TXT +# +# Contents: Map (external version) from Mac OS Gurmukhi +# encoding to Unicode 2.1 and later. +# +# Copyright: (c) 1997-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Update header comments. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Change mappings for 0x91, 0xD5 based on +# new decomposition rules. Update URLs, +# notes. Matches internal utom<b2>. +# b02 1999-Sep-22 Update contact e-mail address. Matches +# internal utom<b1>, ufrm<b1>, and Text +# Encoding Converter version 1.5. +# n02 1998-Feb-05 First version; matches internal utom<n5>, +# ufrm<n6>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Gurmukhi code or code sequence +# (in hex as 0xNN or 0xNN+0xNN) +# Column #2 is the corresponding Unicode or Unicode sequence +# (in hex as 0xNNNN or 0xNNNN+0xNNNN). +# Column #3 is a comment containing the Unicode name or sequence +# of names. In some cases an additional comment follows the +# Unicode name(s). +# +# The entries are in two sections. The first section is for pairs of +# Mac OS Gurmukhi code points that must be mapped in a special way. +# The second section maps individual code points. +# +# Within each section, the entries are in Mac OS Gurmukhi code order. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Gurmukhi character set uses the standard control characters +# at 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Gurmukhi: +# ------------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# Mac OS Gurmukhi is based on IS 13194:1991 (ISCII-91), with the +# addition of several punctuation and symbol characters. However, +# Mac OS Gurmukhi does not support the ATR (attribute) mechanism of +# ISCII-91. +# +# 1. ISCII-91 features in Mac OS Gurmukhi include: +# +# a) Explicit halant and soft halant +# +# A double halant (0xE8 + 0xE8) constitutes an "explicit halant", +# which will always appear as a halant instead of causing formation +# of a ligature or half-form consonant. +# +# Halant followed by nukta (0xE8 + 0xE9) constitutes a "soft +# halant", which prevents formation of a ligature and instead +# retains the half-form of the first consonant. +# +# b) Invisible consonant +# +# The byte 0xD9 (called INV in ISCII-91) is an invisible consonant: +# It behaves like a consonant but has no visible appearance. It is +# intended to be used (often in combination with halant) to display +# dependent forms in isolation, such as the RA forms or consonant +# half-forms. +# +# c) Extensions for Vedic, etc. +# +# The byte 0xF0 (called EXT in ISCII-91) followed by any byte in +# the range 0xA1-0xEE constitutes a two-byte code point which can +# be used to represent additional characters for Vedic (or other +# extensions); 0xF0 followed by any other byte value constitutes +# malformed text. Mac OS Gurmukhi supports this mechanism, but +# does not currently map any of these two-byte code points to +# anything. +# +# 2. Mac OS Gurmukhi additions +# +# Mac OS Gurmukhi adds characters using the code points +# 0x80-0x8A and 0x90-0x94 (the latter are some Gurmukhi additions). +# +# 3. Unused code points +# +# The following code points are currently unused, and are not shown +# here: 0x8B-0x8F, 0x95-0xA1, 0xA3, 0xAA-0xAB, 0xAE-0xAF, 0xB2, +# 0xC7, 0xCE, 0xD0, 0xD2-0xD3, 0xD6, 0xDF-0xE0, 0xE3-0xE4, 0xE7, +# 0xEB-0xEF, 0xFB-0xFF. In addition, 0xF0 is not shown here, but it +# has a special function as described above. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# 1. Mapping the byte pairs +# +# If the byte value 0xE8 is encountered when mapping Mac OS +# Gurmukhi text, then the next byte (if there is one) should be +# examined. If the next byte is 0xE8 or 0xE9, then the byte pair +# should be mapped using the first section of the mapping table +# below. Otherwise, each byte should be mapped using the second +# section of the mapping table below. +# +# - The Unicode Standard, Version 2.0, specifies how explicit +# halant and soft halant should be represented in Unicode; +# these mappings are used below. +# +# If the byte value 0xF0 is encountered when mapping Mac OS +# Gurmukhi text, then the next byte should be examined. If there +# is no next byte (e.g. 0xF0 at end of buffer), the mapping +# process should indicate incomplete character. If there is a next +# byte but it is not in the range 0xA1-0xEE, the mapping process +# should indicate malformed text. Otherwise, the mapping process +# should treat the byte pair as a valid two-byte code point with no +# mapping (e.g. map it to QUESTION MARK, REPLACEMENT CHARACTER, +# etc.). +# +# 2. Mapping the invisible consonant +# +# It has been suggested that INV in ISCII-91 should map to ZERO +# WIDTH NON-JOINER in Unicode. However, this causes problems with +# roundtrip fidelity: The ISCII-91 sequences 0xE8+0xE8 and 0xE8+0xD9 +# would map to the same sequence of Unicode characters. We have +# instead mapped INV to LEFT-TO-RIGHT MARK, which avoids these +# problems. +# +# 3. Mappings using corporate characters +# +# Mapping the GURMUKHI LETTER SHA 0xD5 presents an interesting +# problem. At first glance, we could map it to the single Unicode +# character 0x0A36. +# +# However, our goal is that the mappings provided here should also +# be able to generate the mappings to maximally decomposed Unicode +# by simple recursive substitution of the canonical decompositions +# in the Unicode database. We want mapping tables derived this way +# to retain full roundtrip fidelity. +# +# Since the canonical decomposition of 0x0A36 is 0x0A38+0x0A3C, +# the decomposition mapping for 0xD5 would be identical with the +# decomposition mapping for 0xD7+0xE9, and roundtrip fidelity would +# be lost. +# +# We solve this problem by using a grouping hint (one of the set of +# transcoding hints defined by Apple). +# +# Apple has defined a block of 32 corporate characters as "transcoding +# hints." These are used in combination with standard Unicode characters +# to force them to be treated in a special way for mapping to other +# encodings; they have no other effect. Sixteen of these transcoding +# hints are "grouping hints" - they indicate that the next 2-4 Unicode +# characters should be treated as a single entity for transcoding. The +# other sixteen transcoding hints are "variant tags" - they are like +# combining characters, and can follow a standard Unicode (or a sequence +# consisting of a base character and other combining characters) to +# cause it to be treated in a special way for transcoding. These always +# terminate a combining-character sequence. +# +# The transcoding coding hint used in this mapping table is: +# 0xF860 group next 2 characters +# +# Then we can map 0x91 as follows: +# 0xD5 -> 0xF860+0x0A38+0x0A3C +# +# We could also have used a variant tag such as 0xF87F and mapped it +# this way: +# 0xD5 -> 0x0A36+0xF87F +# +# 4. Additional loose mappings from Unicode +# +# These are not preserved in roundtrip mappings. +# +# 0A59 -> 0xB4+0xE9 # GURMUKHI LETTER KHHA +# 0A5A -> 0xB5+0xE9 # GURMUKHI LETTER GHHA +# 0A5B -> 0xBA+0xE9 # GURMUKHI LETTER ZA +# 0A5E -> 0xC9+0xE9 # GURMUKHI LETTER FA +# +# 0A70 -> 0xA2 # GURMUKHI TIPPI +# +# Loose mappings from Unicode should also map U+0A71 (GURMUKHI ADDAK) +# followed by any Gurmukhi consonant to the equivalent ISCII-91 +# consonant plus halant plus the consonant again. For example: +# +# 0A71+0A15 -> 0xB3+0xE8+0xB3 +# 0A71+0A16 -> 0xB4+0xE8+0xB4 +# ... +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version b02 to version b03/c01: +# +# - Change mapping of 0x91 from 0xF860+0x0A21+0x0A3C to 0x0A5C GURMUKHI +# LETTER RRA, now that the canonical decomposition of 0x0A5C to +# 0x0A21+0x0A3C has been deleted +# +# - Change mapping of 0xD5 from 0x0A36 GURMUKHI LETTER SHA to +# 0xF860+0x0A38+0x0A3C, now that a canonical decomposition of 0x0A36 +# to 0x0A38+0x0A3C has been added. +# +################## +BEGIN_MAP +0x00 - 0x7F = 0x0000 - +0x80 = 0x00D7 +0x81 = 0x2212 +0x82 = 0x2013 +0x83 = 0x2014 +0x84 = 0x2018 +0x85 = 0x2019 +0x86 = 0x2026 +0x87 = 0x2022 +0x88 = 0x00A9 +0x89 = 0x00AE +0x8A = 0x2122 +0x90 = 0x0A71 +0x91 = 0x0A5C +0x92 = 0x0A73 +0x93 = 0x0A72 +0x94 = 0x0A74 +0xA2 = 0x0A02 +0xA4 = 0x0A05 +0xA5 = 0x0A06 +0xA6 = 0x0A07 +0xA7 = 0x0A08 +0xA8 = 0x0A09 +0xA9 = 0x0A0A +0xAC = 0x0A0F +0xAD = 0x0A10 +0xB0 = 0x0A13 +0xB1 = 0x0A14 +0xB3 = 0x0A15 +0xB4 = 0x0A16 +0xB5 = 0x0A17 +0xB6 = 0x0A18 +0xB7 = 0x0A19 +0xB8 = 0x0A1A +0xB9 = 0x0A1B +0xBA = 0x0A1C +0xBB = 0x0A1D +0xBC = 0x0A1E +0xBD = 0x0A1F +0xBE = 0x0A20 +0xBF = 0x0A21 +0xC0 = 0x0A22 +0xC1 = 0x0A23 +0xC2 = 0x0A24 +0xC3 = 0x0A25 +0xC4 = 0x0A26 +0xC5 = 0x0A27 +0xC6 = 0x0A28 +0xC8 = 0x0A2A +0xC9 = 0x0A2B +0xCA = 0x0A2C +0xCB = 0x0A2D +0xCC = 0x0A2E +0xCD = 0x0A2F +0xCF = 0x0A30 +0xD1 = 0x0A32 +0xD4 = 0x0A35 +#0xD5 = 0xF860+0x0A38+0x0A3C +0xD7 = 0x0A38 +0xD8 = 0x0A39 +0xD9 = 0x200E +0xDA = 0x0A3E +0xDB = 0x0A3F +0xDC = 0x0A40 +0xDD = 0x0A41 +0xDE = 0x0A42 +0xE1 = 0x0A47 +0xE2 = 0x0A48 +0xE5 = 0x0A4B +0xE6 = 0x0A4C +0xE8 = 0x0A4D +#0xE8+0xE8 = 0x0A4D+0x200C +#0xE8+0xE9 = 0x0A4D+0x200D +0xE9 = 0x0A3C +0xEA = 0x0964 +0xF1 = 0x0A66 +0xF2 = 0x0A67 +0xF3 = 0x0A68 +0xF4 = 0x0A69 +0xF5 = 0x0A6A +0xF6 = 0x0A6B +0xF7 = 0x0A6C +0xF8 = 0x0A6D +0xF9 = 0x0A6E +0xFA = 0x0A6F +END_MAP diff --git a/share/i18n/csmapper/APPLE/HEBREW%UCS.src b/share/i18n/csmapper/APPLE/HEBREW%UCS.src new file mode 100644 index 0000000..95bfbf5 --- /dev/null +++ b/share/i18n/csmapper/APPLE/HEBREW%UCS.src @@ -0,0 +1,517 @@ +# $FreeBSD$ + +TYPE ROWCOL +NAME HEBREW/UCS +SRC_ZONE 0x00-0xFF +OOB_MODE ILSEQ +DST_ILSEQ 0xFFFE +DST_UNIT_BITS 16 + +BEGIN_MAP +#======================================================================= +# File name: HEBREW.TXT +# +# Contents: Map (external version) from Mac OS Hebrew +# character set to Unicode 2.1 and later. +# +# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Update header comments; add section on +# roundtrip considerations. Matches internal +# xml <c1.4> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Don't require left-right context for digits +# 0x30-0x39. Change mapping of 0x81 to use +# decomposition. Reverse the mappings of 0xA8, +# 0xA9. Update URLs, notes. Matches internal +# utom<b7>. +# b02 1999-Sep-22 Update contact e-mail address. Matches +# internal utom<b1>, ufrm<b1>, and Text +# Encoding Converter version 1.5. +# n03 1998-Feb-05 Show required Unicode character +# directionality in a different way. Update +# mappings for 0xC0 and 0xDE to use +# transcoding hints; matches internal utom<n6>, +# ufrm<n20>, and Text Encoding Converter +# version 1.3. Rewrite header comments. +# n01 1995-Nov-15 First version. Matches internal ufrm<n8>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Hebrew code (in hex as 0xNN). +# Column #2 is the corresponding Unicode or Unicode sequence (in +# hex as 0xNNNN, 0xNNNN+0xNNNN, etc.). Sequences of up to 3 +# Unicode characters are used here. A single Unicode character +# may be preceded by a tag indicating required directionality +# (i.e. 0xNNNN or 0xNNNN). +# Column #3 is a comment containing the Unicode name. +# +# The entries are in Mac OS Hebrew code order. +# +# Some of these mappings require the use of corporate characters. +# See the file "CORPCHAR.TXT" and notes below. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Hebrew character set uses the standard control characters at +# 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Hebrew: +# ----------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# 1. General +# +# The Mac OS Hebrew character set supports the Hebrew and Yiddish +# languages. It incorporates the Hebrew letter repertoire of +# ISO 8859-8, and uses the same code points for them, 0xE0-0xFA. +# It also incorporates the ASCII character set. In addition, the +# Mac OS Hebrew character set includes the following: +# +# - Hebrew points (nikud marks) at 0xC6, 0xCB-0xCF and 0xD8-0xDF. +# These are non-spacing combining marks. Note that the RAFE point +# at 0xD8 is not displayed correctly in some fonts, and cannot be +# typed using the keyboard layouts in the current Hebrew localized +# systems. Also note: The character given in Unicode as QAMATS +# (U+05B8) actually refers to two different sounds, depending on +# context. For example, when ALEF is followed by QAMATS, the QAMATS +# can actually refer to two different sounds depending on the +# following letters. The Mac OS Hebrew character set separately +# encodes these two sounds for the same graphic shape, as "qamats" +# (0xCB) and "qamats qatan" (0xDE). The "qamats" character is more +# common, so it is mapped to the Unicode QAMATS; "qamats qatan" can +# only be used with a limited number of characters, and it is +# mapped using a corporate-zone variant tag (see below). +# +# - Various Hebrew ligatures at 0x81, 0xC0, 0xC7, 0xC8, 0xD6, and +# 0xD7. Also note that the Yiddish YOD YOD PATAH ligature at 0x81 +# is missing in some fonts. +# +# - The NEW SHEQEL SIGN at 0xA6. +# +# - Latin characters with diacritics at 0x80 and 0x82-0x9F. However, +# most of these cannot be typed using the keyboard layouts in the +# Hebrew localized systems. +# +# - Right-left versions of certain ASCII punctuation, symbols and +# digits: 0xA0-0xA5, 0xA7-0xBF, 0xFB-0xFF. See below. +# +# - Miscellaneous additional punctuation at 0xC1, 0xC9, 0xCA, and +# 0xD0-0xD5. There is a variant of the Hebrew encoding in which +# the LEFT SINGLE QUOTATION MARK at 0xD4 is replaced by FIGURE +# SPACE. The glyphs for some of the other punctuation characters +# are missing in some fonts. +# +# - Four obsolete characters at 0xC2-0xC5 known as canorals (not to +# be confused with cantillation marks!). These were used for +# manual positioning of nikud marks before System 7.1 (at which +# point nikud positioning became automatic with WorldScript.). +# +# 2. Directional characters and roundtrip fidelity +# +# The Mac OS Hebrew character set was developed around 1987. At that +# time the bidirectional line line layout algorithm used in the Mac OS +# Hebrew system was fairly simple; it used only a few direction +# classes (instead of the 19 now used in the Unicode bidirectional +# algorithm). In order to permit users to handle some tricky layou +# problems, certain punctuation, symbol, and digit characters have +# duplicate code points, one with a left-right direction attribute and +# the other with a right-left direction attribute. +# +# For example, plus sign is encoded at 0x2B with a left-right +# attribute, and at 0xAB with a right-left attribute. However, there +# is only one PLUS SIGN character in Unicode. This leads to some +# interesting problems when mapping between Mac OS Hebrew and Unicode; +# see below. +# +# A related problem is that even when a particular character is +# encoded only once in Mac OS Hebrew, it may have a different +# direction attribute than the corresponding Unicode character. +# +# For example, the Mac OS Hebrew character at 0xC9 is HORIZONTAL +# ELLIPSIS with strong right-left direction. However, the Unicode +# character HORIZONTAL ELLIPSIS has direction class neutral. +# +# 3. Font variants +# +# The table in this file gives the Unicode mappings for the standard +# Mac OS Hebrew encoding. This encoding is supported by many of the +# Apple fonts (including all of the fonts in the Hebrew Language Kit), +# and is the encoding supported by the text processing utilities. +# However, some TrueType fonts provided with the localized Hebrew +# system implement a slightly different encoding; the difference is +# only in one code point, 0xD4. For the standard variant, this is: +# 0xD4 -> 0x2018 LEFT SINGLE QUOTATION MARK, right-left +# +# The TrueType variant is used by the following TrueType fonts from +# the localized system: Caesarea, Carmel Book, Gilboa, Ramat Sharon, +# and Sinai Book. For these, 0xD4 is as follows: +# 0xD4 -> 0x2007 FIGURE SPACE, right-left +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# 1. Matching the direction of Mac OS Hebrew characters +# +# When Mac OS Hebrew encodes a character twice but with different +# direction attributes for the two code points - as in the case of +# plus sign mentioned above - we need a way to map both Mac OS Hebrew +# code points to Unicode and back again without loss of information. +# With the plus sign, for example, mapping one of the Mac OS Hebrew +# characters to a code in the Unicode corporate use zone is +# undesirable, since both of the plus sign characters are likely to +# be used in text that is interchanged. +# +# The problem is solved with the use of direction override characters +# and direction-dependent mappings. When mapping from Mac OS Hebrew +# to Unicode, we use direction overrides as necessary to force the +# direction of the resulting Unicode characters. +# +# The required direction is indicated by a direction tag in the +# mappings. A tag of <LR> means the corresponding Unicode character +# must have a strong left-right context, and a tag of <RL> indicates +# a right-left context. +# +# For example, the mapping of 0x2B is given as 0x002B; the +# mapping of 0xAB is given as 0x002B. If we map an isolated +# instance of 0x2B to Unicode, it should be mapped as follows (LRO +# indicates LEFT-RIGHT OVERRIDE, PDF indicates POP DIRECTION +# FORMATTING): +# +# 0x2B -> 0x202D (LRO) + 0x002B (PLUS SIGN) + 0x202C (PDF) +# +# When mapping several characters in a row that require direction +# forcing, the overrides need only be used at the beginning and end. +# For example: +# +# 0x24 0x20 0x28 0x29 -> 0x202D 0x0024 0x0020 0x0028 0x0029 0x202C +# +# If neutral characters that require direction forcing are already +# between strong-direction characters with matching directionality, +# then direction overrides need not be used. Direction overrides are +# always needed to map the right-left digits at 0xB0-0xB9. +# +# When mapping from Unicode to Mac OS Hebrew, the Unicode +# bidirectional algorithm should be used to determine resolved +# direction of the Unicode characters. The mapping from Unicode to +# Mac OS Hebrew can then be disambiguated by the use of the resolved +# direction: +# +# Unicode 0x002B -> Mac OS Hebrew 0x2B (if L) or 0xAB (if R) +# +# However, this also means the direction override characters should +# be discarded when mapping from Unicode to Mac OS Hebrew (after +# they have been used to determine resolved direction), since the +# direction override information is carried by the code point itself. +# +# Even when direction overrides are not needed for roundtrip +# fidelity, they are sometimes used when mapping Mac OS Hebrew +# characters to Unicode in order to achieve similar text layout with +# the resulting Unicode text. For example, the single Mac OS Hebrew +# ellipsis character has direction class right-left,and there is no +# left-right version. However, the Unicode HORIZONTAL ELLIPSIS +# character has direction class neutral (which means it may end up +# with a resolved direction of left-right if surrounded by left-right +# characters). When mapping the Mac OS Hebrew ellipsis to Unicode, it +# is surrounded with a direction override to help preserve proper +# text layout. The resolved direction is not needed or used when +# mapping the Unicode HORIZONTAL ELLIPSIS back to Mac OS Hebrew. +# +# 2. Use of corporate-zone Unicodes +# +# The goals in the mappings provided here are: +# - Ensure roundtrip mapping from every character in the Mac OS +# Hebrew character set to Unicode and back +# - Use standard Unicode characters as much as possible, to +# maximize interchangeability of the resulting Unicode text. +# Whenever possible, avoid having content carried by private-use +# characters. +# +# Some of the characters in the Mac OS Hebrew character set do not +# correspond to distinct, single Unicode characters. To map these +# and satisfy both goals above, we employ various strategies. +# +# a) If possible, use private use characters in combination with +# standard Unicode characters to mark variants of the standard +# Unicode character. +# +# Apple has defined a block of 32 corporate characters as "transcoding +# hints." These are used in combination with standard Unicode characters +# to force them to be treated in a special way for mapping to other +# encodings; they have no other effect. Sixteen of these transcoding +# hints are "grouping hints" - they indicate that the next 2-4 Unicode +# characters should be treated as a single entity for transcoding. The +# other sixteen transcoding hints are "variant tags" - they are like +# combining characters, and can follow a standard Unicode (or a sequence +# consisting of a base character and other combining characters) to +# cause it to be treated in a special way for transcoding. These always +# terminate a combining-character sequence. +# +# Two transcoding hints are used in this mapping table: a grouping hint +# and a variant tag: +# hint: +# 0xF86A group next 2 characters, right-left directionality +# 0xF87F variant tag +# +# In Mac OS Hebrew, 0xC0 is a ligature for lamed holam. This can also +# be represented in Mac OS Hebrew as 0xEC+0xDD, using separate +# characters for lamed and holam. The latter sequence is mapped to +# Unicode as 0x05DC+0x05B9, i.e. as the sequence HEBREW LETTER LAMED + +# HEBREW POINT HOLAM. We want to map the ligature 0xC0 using the same +# standard Unicode characters, but for round-trip fidelity we need to +# distinguish it from the mapping of the sequence 0xEC+0xDD. Thus for +# 0xC0 we use a grouping hint, and map as follows: +# +# 0xC0 -> 0xF86A+0x05DC+0x05B9 +# +# The variant tag is used for "qamats qatan" to mark it as an alternate +# for HEBREW POINT QAMATS, as follows: +# +# 0xDE -> 0x05B8+0xF87F +# +# b) Otherwise, use private use characters by themselves to map Mac OS +# Hebrew characters which have no relationship to any standard Unicode +# character. +# +# The following additional corporate zone Unicode characters are used +# for this purpose here (to map the obsolete "canorals", see above): +# +# 0xF89B Hebrew canoral 1 +# 0xF89C Hebrew canoral 2 +# 0xF89D Hebrew canoral 3 +# 0xF89E Hebrew canoral 4 +# +# 3. Roundtrip considerations when mapping to decomposed Unicode +# +# Both Mac OS Hebrew and Unicode provide multiple ways of representing +# certain letter-and-point combinations. For example, HEBREW LETTER +# VAV WITH HOLAM can be represented in Unicode as the single character +# 0xFB4B or as the sequence 0x05D5 0x05B9; similarly, it can be +# represented in Mac OS Hebrew as 0xC7 or as the sequence 0xE5 0xDD. +# This leads to some roundtrip problems. First note that we have the +# following mappings without such problems: +# +# Mac standard decomp. of reverse map +# OS Unicode mapping std. mapping of decomp. +# ---- ---------------------------------- ------------- ----------- +# 0xC6 0x05BC ... POINT DAGESH OR MAPIQ 0x05BC (same) 0xC6 +# 0xE5 0x05D5 ... LETTER VAV 0x05D5 (same) 0xE5 +# 0xDD 0x05B9 ... POINT HOLAM 0x05B9 (same) 0xDD +# +# However, those mappings above cause roundtrip problems for the +# the following mappings if they are decomposed: +# +# Mac standard decomp. of reverse map +# OS Unicode mapping std. mapping of decomp. +# ---- ---------------------------------- ------------- ----------- +# 0xC7 0xFB4B ... LETTER VAV WITH HOLAM 0x05D5 0x05B9 0xE5 0xDD +# 0xC8 0xFB35 ... LETTER VAV WITH DAGESH 0x05D5 0x05BC 0xE5 0xC6 +# +# One solution is to use a grouping transcoding hint with the two +# decompositions above to mark the decomposed sequence for special +# treatment in transcoding. This yields the following mappings to +# decomposed Unicode: +# +# Mac decomposed +# OS Unicode mapping +# ---- -------------------- +# 0xC7 0xF86A 0x05D5 0x05B9 +# 0xC8 0xF86A 0x05D5 0x05BC +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version b02 to version b03/c01: +# +# - Stop specifying left-right context for digits 0x30-0x39, since the +# corresponding Unicodes 0x0030-0x0039 already have left-right +# directionality. +# +# - Change mapping of 0x81 from 0xFB1F HEBREW LIGATURE YIDDISH YOD YOD +# PATAH to its canonical decomposition 0x05F2+0x05B7 to improve +# cross-platform compatibility (Windows doesn't handle 0xFB1F) +# +# - Interchange the mappings of 0xA8 and 0xA9 to obtain the correct +# open/close behavior; they work differently than in Mac Arabic. +# The old mapping was +# 0xA8 0x0028 # LEFT PARENTHESIS, right-left +# 0xA9 0x0029 # RIGHT PARENTHESIS, right-left +# and the new mapping is +# 0xA8 0x0029 # RIGHT PARENTHESIS, right-left +# 0xA9 0x0028 # LEFT PARENTHESIS, right-left +# +# Changes from version n01 to version n03: +# +# - Change mapping for 0xC0 from single corporate character to +# grouping hint plus standard Unicodes +# +# - Change mapping for 0xDE from single corporate character to +# standard Unicode plus variant tag +# +################## + +0x00 - 0x7F = 0x0000 - +0x80 = 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0x81 = 0xFB1F # 0x05F2+0x05B7 # HEBREW LIGATURE YIDDISH YOD YOD PATAH +0x82 = 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA +0x83 = 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0x84 = 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE +0x85 = 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0x86 = 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0x87 = 0x00E1 # LATIN SMALL LETTER A WITH ACUTE +0x88 = 0x00E0 # LATIN SMALL LETTER A WITH GRAVE +0x89 = 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0x8A = 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0x8B = 0x00E3 # LATIN SMALL LETTER A WITH TILDE +0x8C = 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE +0x8D = 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA +0x8E = 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0x8F = 0x00E8 # LATIN SMALL LETTER E WITH GRAVE +0x90 = 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX +0x91 = 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS +0x92 = 0x00ED # LATIN SMALL LETTER I WITH ACUTE +0x93 = 0x00EC # LATIN SMALL LETTER I WITH GRAVE +0x94 = 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX +0x95 = 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS +0x96 = 0x00F1 # LATIN SMALL LETTER N WITH TILDE +0x97 = 0x00F3 # LATIN SMALL LETTER O WITH ACUTE +0x98 = 0x00F2 # LATIN SMALL LETTER O WITH GRAVE +0x99 = 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0x9A = 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0x9B = 0x00F5 # LATIN SMALL LETTER O WITH TILDE +0x9C = 0x00FA # LATIN SMALL LETTER U WITH ACUTE +0x9D = 0x00F9 # LATIN SMALL LETTER U WITH GRAVE +0x9E = 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX +0x9F = 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xA0 = 0x0020 # SPACE, right-left +0xA1 = 0x0021 # EXCLAMATION MARK, right-left +0xA2 = 0x0022 # QUOTATION MARK, right-left +0xA3 = 0x0023 # NUMBER SIGN, right-left +0xA4 = 0x0024 # DOLLAR SIGN, right-left +0xA5 = 0x0025 # PERCENT SIGN, right-left +0xA6 = 0x20AA # NEW SHEQEL SIGN +0xA7 = 0x0027 # APOSTROPHE, right-left +0xA8 = 0x0029 # RIGHT PARENTHESIS, right-left # close parenthesis +0xA9 = 0x0028 # LEFT PARENTHESIS, right-left # open parenthesis +0xAA = 0x002A # ASTERISK, right-left +0xAB = 0x002B # PLUS SIGN, right-left +0xAC = 0x002C # COMMA, right-left +0xAD = 0x002D # HYPHEN-MINUS, right-left +0xAE = 0x002E # FULL STOP, right-left +0xAF = 0x002F # SOLIDUS, right-left +0xB0 = 0x0030 # DIGIT ZERO, right-left (need override) +0xB1 = 0x0031 # DIGIT ONE, right-left (need override) +0xB2 = 0x0032 # DIGIT TWO, right-left (need override) +0xB3 = 0x0033 # DIGIT THREE, right-left (need override) +0xB4 = 0x0034 # DIGIT FOUR, right-left (need override) +0xB5 = 0x0035 # DIGIT FIVE, right-left (need override) +0xB6 = 0x0036 # DIGIT SIX, right-left (need override) +0xB7 = 0x0037 # DIGIT SEVEN, right-left (need override) +0xB8 = 0x0038 # DIGIT EIGHT, right-left (need override) +0xB9 = 0x0039 # DIGIT NINE, right-left (need override) +0xBA = 0x003A # COLON, right-left +0xBB = 0x003B # SEMICOLON, right-left +0xBC = 0x003C # LESS-THAN SIGN, right-left +0xBD = 0x003D # EQUALS SIGN, right-left +0xBE = 0x003E # GREATER-THAN SIGN, right-left +0xBF = 0x003F # QUESTION MARK, right-left +0xC0 = 0x05B9 # 0xF86A+0x05DC+0x05B9 # Hebrew ligature lamed holam +0xC1 = 0x201E # DOUBLE LOW-9 QUOTATION MARK, right-left +0xC2 = 0xF89B # Hebrew canoral 1 +0xC3 = 0xF89C # Hebrew canoral 2 +0xC4 = 0xF89D # Hebrew canoral 3 +0xC5 = 0xF89E # Hebrew canoral 4 +0xC6 = 0x05BC # HEBREW POINT DAGESH OR MAPIQ +0xC7 = 0xFB4B # HEBREW LETTER VAV WITH HOLAM +0xC8 = 0xFB35 # HEBREW LETTER VAV WITH DAGESH +0xC9 = 0x2026 # HORIZONTAL ELLIPSIS, right-left +0xCA = 0x00A0 # NO-BREAK SPACE, right-left +0xCB = 0x05B8 # HEBREW POINT QAMATS +0xCC = 0x05B7 # HEBREW POINT PATAH +0xCD = 0x05B5 # HEBREW POINT TSERE +0xCE = 0x05B6 # HEBREW POINT SEGOL +0xCF = 0x05B4 # HEBREW POINT HIRIQ +0xD0 = 0x2013 # EN DASH, right-left +0xD1 = 0x2014 # EM DASH, right-left +0xD2 = 0x201C # LEFT DOUBLE QUOTATION MARK, right-left +0xD3 = 0x201D # RIGHT DOUBLE QUOTATION MARK, right-left +0xD4 = 0x2018 # LEFT SINGLE QUOTATION MARK, right-left +0xD5 = 0x2019 # RIGHT SINGLE QUOTATION MARK, right-left +0xD6 = 0xFB2A # HEBREW LETTER SHIN WITH SHIN DOT +0xD7 = 0xFB2B # HEBREW LETTER SHIN WITH SIN DOT +0xD8 = 0x05BF # HEBREW POINT RAFE +0xD9 = 0x05B0 # HEBREW POINT SHEVA +0xDA = 0x05B2 # HEBREW POINT HATAF PATAH +0xDB = 0x05B1 # HEBREW POINT HATAF SEGOL +0xDC = 0x05BB # HEBREW POINT QUBUTS +0xDD = 0x05B9 # HEBREW POINT HOLAM +0xDE = 0xF87F # 0x05B8+0xF87F # HEBREW POINT QAMATS, alternate form "qamats qatan" +0xDF = 0x05B3 # HEBREW POINT HATAF QAMATS +0xE0 = 0x05D0 # HEBREW LETTER ALEF +0xE1 = 0x05D1 # HEBREW LETTER BET +0xE2 = 0x05D2 # HEBREW LETTER GIMEL +0xE3 = 0x05D3 # HEBREW LETTER DALET +0xE4 = 0x05D4 # HEBREW LETTER HE +0xE5 = 0x05D5 # HEBREW LETTER VAV +0xE6 = 0x05D6 # HEBREW LETTER ZAYIN +0xE7 = 0x05D7 # HEBREW LETTER HET +0xE8 = 0x05D8 # HEBREW LETTER TET +0xE9 = 0x05D9 # HEBREW LETTER YOD +0xEA = 0x05DA # HEBREW LETTER FINAL KAF +0xEB = 0x05DB # HEBREW LETTER KAF +0xEC = 0x05DC # HEBREW LETTER LAMED +0xED = 0x05DD # HEBREW LETTER FINAL MEM +0xEE = 0x05DE # HEBREW LETTER MEM +0xEF = 0x05DF # HEBREW LETTER FINAL NUN +0xF0 = 0x05E0 # HEBREW LETTER NUN +0xF1 = 0x05E1 # HEBREW LETTER SAMEKH +0xF2 = 0x05E2 # HEBREW LETTER AYIN +0xF3 = 0x05E3 # HEBREW LETTER FINAL PE +0xF4 = 0x05E4 # HEBREW LETTER PE +0xF5 = 0x05E5 # HEBREW LETTER FINAL TSADI +0xF6 = 0x05E6 # HEBREW LETTER TSADI +0xF7 = 0x05E7 # HEBREW LETTER QOF +0xF8 = 0x05E8 # HEBREW LETTER RESH +0xF9 = 0x05E9 # HEBREW LETTER SHIN +0xFA = 0x05EA # HEBREW LETTER TAV +0xFB = 0x007D # RIGHT CURLY BRACKET, right-left +0xFC = 0x005D # RIGHT SQUARE BRACKET, right-left +0xFD = 0x007B # LEFT CURLY BRACKET, right-left +0xFE = 0x005B # LEFT SQUARE BRACKET, right-left +0xFF = 0x007C # VERTICAL LINE, right-left +END_MAP diff --git a/share/i18n/csmapper/APPLE/ICELAND%UCS.src b/share/i18n/csmapper/APPLE/ICELAND%UCS.src new file mode 100644 index 0000000..3e0be22 --- /dev/null +++ b/share/i18n/csmapper/APPLE/ICELAND%UCS.src @@ -0,0 +1,285 @@ +# $FreeBSD$ +# $NetBSD: ICELAND%UCS.src,v 1.1 2006/03/13 19:45:36 tnozaki Exp $ + +TYPE ROWCOL +NAME ICELAND/UCS +SRC_ZONE 0x00-0xFF +OOB_MODE ILSEQ +DST_ILSEQ 0xFFFE +DST_UNIT_BITS 16 + +BEGIN_MAP +#======================================================================= +# File name: ICELAND.TXT +# +# Contents: Map (external version) from Mac OS Icelandic +# character set to Unicode 2.1 and later. +# +# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Update header comments. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Update URLs, notes. Matches internal +# utom<b3>. +# b02 1999-Sep-22 Encoding changed for Mac OS 8.5; change +# mapping of 0xDB from CURRENCY SIGN to EURO +# SIGN. Update contact e-mail address. Matches +# internal utom<b2>, ufrm<b2>, and Text +# Encoding Converter version 1.5. +# n06 1998-Feb-05 Minor update to header comments, add +# information on font variants +# n03 1997-Dec-14 Update to match internal utom<n4>, ufrm<n16>: +# Change standard mapping for 0xBD from U+2126 +# to its canonical decomposition, U+03A9. +# n02 1995-Apr-15 First version (after fixing some typos). +# Matches internal ufrm<n5>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Icelandic code (in hex as 0xNN) +# Column #2 is the corresponding Unicode (in hex as 0xNNNN) +# Column #3 is a comment containing the Unicode name +# +# The entries are in Mac OS Icelandic code order. +# +# One of these mappings requires the use of a corporate character. +# See the file "CORPCHAR.TXT" and notes below. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Icelandic character set uses the standard control characters +# at 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Icelandic: +# -------------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# 1. General +# +# Mac OS Icelandic is used for Icelandic and Faroese. +# +# The Mac OS Icelandic encoding shares the script code smRoman +# (0) with the standard Mac OS Roman encoding. To determine if +# the Icelandic encoding is being used, you must also check if +# the system region code is 21, verIceland. +# +# This character set is a variant of standard Mac OS Roman, +# adding upper and lower eth, thorn, and Y acute. It has 6 code +# point differences from standard Mac OS Roman. +# +# Before Mac OS 8.5, code point 0xDB was CURRENCY SIGN, and was +# mapped to U+00A4. In Mac OS 8.5 and later versions, code point +# 0xDB is changed to EURO SIGN and maps to U+20AC; the standard +# Apple fonts are updated for Mac OS 8.5 to reflect this. There are +# "currency sign" variants of the Mac OS Icelandic encoding that +# still map 0xDB to U+00A4; these can be used for older fonts. +# +# 2. Font variants +# +# The table in this file gives the Unicode mappings for the standard +# Mac OS Icelandic encoding. This encoding is supported by the +# Icelandic versions of the fonts Chicago, Geneva, Monaco, and New +# York, and is the encoding supported by the text processing +# utilities. However, other TrueType fonts implement a slightly +# different encoding; the difference is only in two code points. +# For the standard variant, these are: +# 0xBB -> 0x00AA FEMININE ORDINAL INDICATOR +# 0xBC -> 0x00BA MASCULINE ORDINAL INDICATOR +# +# For the TrueType variant (used by the Icelandic versions of the +# fonts Courier, Helvetica, Palatino, and Times), these are: +# 0xBB -> 0xFB01 LATIN SMALL LIGATURE FI +# 0xBC -> 0xFB02 LATIN SMALL LIGATURE FL +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# The following corporate zone Unicode character is used in this +# mapping: +# +# 0xF8FF Apple logo +# +# NOTE: The graphic image associated with the Apple logo character +# is not authorized for use without permission of Apple, and +# unauthorized use might constitute trademark infringement. +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version n06 to version b02: +# +# - Encoding changed for Mac OS 8.5; change mapping of 0xDB from +# CURRENCY SIGN (U+00A4) to EURO SIGN (U+20AC). +# +# Changes from version n02 to version n03: +# +# - Change mapping of 0xBD from U+2126 to its canonical +# decomposition, U+03A9. +# +################## +0x00 - 0x7E = 0x0000 - +0x80 = 0x00C4 +0x81 = 0x00C5 +0x82 = 0x00C7 +0x83 = 0x00C9 +0x84 = 0x00D1 +0x85 = 0x00D6 +0x86 = 0x00DC +0x87 = 0x00E1 +0x88 = 0x00E0 +0x89 = 0x00E2 +0x8A = 0x00E4 +0x8B = 0x00E3 +0x8C = 0x00E5 +0x8D = 0x00E7 +0x8E = 0x00E9 +0x8F = 0x00E8 +0x90 = 0x00EA +0x91 = 0x00EB +0x92 = 0x00ED +0x93 = 0x00EC +0x94 = 0x00EE +0x95 = 0x00EF +0x96 = 0x00F1 +0x97 = 0x00F3 +0x98 = 0x00F2 +0x99 = 0x00F4 +0x9A = 0x00F6 +0x9B = 0x00F5 +0x9C = 0x00FA +0x9D = 0x00F9 +0x9E = 0x00FB +0x9F = 0x00FC +0xA0 = 0x00DD +0xA1 = 0x00B0 +0xA2 = 0x00A2 +0xA3 = 0x00A3 +0xA4 = 0x00A7 +0xA5 = 0x2022 +0xA6 = 0x00B6 +0xA7 = 0x00DF +0xA8 = 0x00AE +0xA9 = 0x00A9 +0xAA = 0x2122 +0xAB = 0x00B4 +0xAC = 0x00A8 +0xAD = 0x2260 +0xAE = 0x00C6 +0xAF = 0x00D8 +0xB0 = 0x221E +0xB1 = 0x00B1 +0xB2 = 0x2264 +0xB3 = 0x2265 +0xB4 = 0x00A5 +0xB5 = 0x00B5 +0xB6 = 0x2202 +0xB7 = 0x2211 +0xB8 = 0x220F +0xB9 = 0x03C0 +0xBA = 0x222B +0xBB = 0x00AA +0xBC = 0x00BA +0xBD = 0x03A9 +0xBE = 0x00E6 +0xBF = 0x00F8 +0xC0 = 0x00BF +0xC1 = 0x00A1 +0xC2 = 0x00AC +0xC3 = 0x221A +0xC4 = 0x0192 +0xC5 = 0x2248 +0xC6 = 0x2206 +0xC7 = 0x00AB +0xC8 = 0x00BB +0xC9 = 0x2026 +0xCA = 0x00A0 +0xCB = 0x00C0 +0xCC = 0x00C3 +0xCD = 0x00D5 +0xCE = 0x0152 +0xCF = 0x0153 +0xD0 = 0x2013 +0xD1 = 0x2014 +0xD2 = 0x201C +0xD3 = 0x201D +0xD4 = 0x2018 +0xD5 = 0x2019 +0xD6 = 0x00F7 +0xD7 = 0x25CA +0xD8 = 0x00FF +0xD9 = 0x0178 +0xDA = 0x2044 +0xDB = 0x20AC +0xDC = 0x00D0 +0xDD = 0x00F0 +0xDE = 0x00DE +0xDF = 0x00FE +0xE0 = 0x00FD +0xE1 = 0x00B7 +0xE2 = 0x201A +0xE3 = 0x201E +0xE4 = 0x2030 +0xE5 = 0x00C2 +0xE6 = 0x00CA +0xE7 = 0x00C1 +0xE8 = 0x00CB +0xE9 = 0x00C8 +0xEA = 0x00CD +0xEB = 0x00CE +0xEC = 0x00CF +0xED = 0x00CC +0xEE = 0x00D3 +0xEF = 0x00D4 +0xF0 = 0xF8FF +0xF1 = 0x00D2 +0xF2 = 0x00DA +0xF3 = 0x00DB +0xF4 = 0x00D9 +0xF5 = 0x0131 +0xF6 = 0x02C6 +0xF7 = 0x02DC +0xF8 = 0x00AF +0xF9 = 0x02D8 +0xFA = 0x02D9 +0xFB = 0x02DA +0xFC = 0x00B8 +0xFD = 0x02DD +0xFE = 0x02DB +0xFF = 0x02C7 +END_MAP diff --git a/share/i18n/csmapper/APPLE/INUIT%UCS.src b/share/i18n/csmapper/APPLE/INUIT%UCS.src new file mode 100644 index 0000000..007ab4c --- /dev/null +++ b/share/i18n/csmapper/APPLE/INUIT%UCS.src @@ -0,0 +1,242 @@ +# $FreeBSD$ +# $NetBSD: INUIT%UCS.src,v 1.1 2006/03/13 19:45:36 tnozaki Exp $ + +TYPE ROWCOL +NAME INUIT/UCS +SRC_ZONE 0x00-0xFF +OOB_MODE ILSEQ +DST_ILSEQ 0xFFFE +DST_UNIT_BITS 16 + +BEGIN_MAP +# +# This mapping data is made from the mapping data provided by Unicode, Inc. +# Original notice: +# +#======================================================================= +# File name: INUIT.TXT +# +# Contents: Map (external version) from Mac OS Inuit +# character set to Unicode 3.0 and later +# +# Contacts: charsets@apple.com, everson@evertype.com +# +# Changes: +# +# c01 2005-Apr-01 First posted version. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Inuit code (in hex as 0xNN) +# Column #2 is the corresponding Unicode (in hex as 0xNNNN) +# Column #3 is a comment containing the Unicode name +# +# The entries are in Mac OS Inuit code order. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Inuit character set uses the standard control characters +# at 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Inuit (partly from Michael Everson): +# ---------------------------------------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# This character set was developed by Michael Everson of Everson +# Typography (everson@evertype.com) and was used for the Inuktitut +# localizations of Mac OS, as well as for the Inuktitut utilities +# package from Everson Typography. Note that while Apple authorized +# the Inuktitut localization mentioned above, it was not shipped with +# Apple hardware, and was not otherwise supported by Apple. Fonts +# conforming to the Mac OS Inuit character set are available from +# Everson Typography (http://www.evertype.com/software/apple/). +# Information about the use of this character set is available at +# http://www.evertype.com/standards/iu/. +# +# The Mac OS Inuit character set shares the script code smEthiopic +# (28) with the Ethiopic encoding. To determine if the Inuktitut +# encoding is being used, you must also check if the system region +# code is 78, verNunavut. +# +# The Mac OS Inuit character set includes the full syllabic letter +# repertoire required for Inuktitut; it is a subset of the Unified +# Canadian Aboriginal Syllabics set encoded in Unicode. The encoding +# is InuitSCII, designed by Doug Hitch for the Government of the +# Northwest Territories. +# +# The Mac OS Inuit character set also includes a number of characters +# that were needed for the classic Mac OS user interface and +# localization (e.g. ellipsis, bullet, copyright sign). All of the +# characters in Mac OS Inuit that are also in the Mac OS Roman +# encoding are at the same code point in both; this improves +# application compatibility. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +################## +0x00 - 0x7E = 0x0000 - +0x80 = 0x1403 +0x81 = 0x1404 +0x82 = 0x1405 +0x83 = 0x1406 +0x84 = 0x140A +0x85 = 0x140B +0x86 = 0x1431 +0x87 = 0x1432 +0x88 = 0x1433 +0x89 = 0x1434 +0x8A = 0x1438 +0x8B = 0x1439 +0x8C = 0x1449 +0x8D = 0x144E +0x8E = 0x144F +0x8F = 0x1450 +0x90 = 0x1451 +0x91 = 0x1455 +0x92 = 0x1456 +0x93 = 0x1466 +0x94 = 0x146D +0x95 = 0x146E +0x96 = 0x146F +0x97 = 0x1470 +0x98 = 0x1472 +0x99 = 0x1473 +0x9A = 0x1483 +0x9B = 0x148B +0x9C = 0x148C +0x9D = 0x148D +0x9E = 0x148E +0x9F = 0x1490 +0xA0 = 0x1491 +0xA1 = 0x00B0 +0xA2 = 0x14A1 +0xA3 = 0x14A5 +0xA4 = 0x14A6 +0xA5 = 0x2022 +0xA6 = 0x00B6 +0xA7 = 0x14A7 +0xA8 = 0x00AE +0xA9 = 0x00A9 +0xAA = 0x2122 +0xAB = 0x14A8 +0xAC = 0x14AA +0xAD = 0x14AB +0xAE = 0x14BB +0xAF = 0x14C2 +0xB0 = 0x14C3 +0xB1 = 0x14C4 +0xB2 = 0x14C5 +0xB3 = 0x14C7 +0xB4 = 0x14C8 +0xB5 = 0x14D0 +0xB6 = 0x14EF +0xB7 = 0x14F0 +0xB8 = 0x14F1 +0xB9 = 0x14F2 +0xBA = 0x14F4 +0xBB = 0x14F5 +0xBC = 0x1505 +0xBD = 0x14D5 +0xBE = 0x14D6 +0xBF = 0x14D7 +0xC0 = 0x14D8 +0xC1 = 0x14DA +0xC2 = 0x14DB +0xC3 = 0x14EA +0xC4 = 0x1528 +0xC5 = 0x1529 +0xC6 = 0x152A +0xC7 = 0x152B +0xC8 = 0x152D +0xC9 = 0x2026 +0xCA = 0x00A0 +0xCB = 0x152E +0xCC = 0x153E +0xCD = 0x1555 +0xCE = 0x1556 +0xCF = 0x1557 +0xD0 = 0x2013 +0xD1 = 0x2014 +0xD2 = 0x201C +0xD3 = 0x201D +0xD4 = 0x2018 +0xD5 = 0x2019 +0xD6 = 0x1558 +0xD7 = 0x1559 +0xD8 = 0x155A +0xD9 = 0x155D +0xDA = 0x1546 +0xDB = 0x1547 +0xDC = 0x1548 +0xDD = 0x1549 +0xDE = 0x154B +0xDF = 0x154C +0xE0 = 0x1550 +0xE1 = 0x157F +0xE2 = 0x1580 +0xE3 = 0x1581 +0xE4 = 0x1582 +0xE5 = 0x1583 +0xE6 = 0x1584 +0xE7 = 0x1585 +0xE8 = 0x158F +0xE9 = 0x1590 +0xEA = 0x1591 +0xEB = 0x1592 +0xEC = 0x1593 +0xED = 0x1594 +0xEE = 0x1595 +0xEF = 0x1671 +0xF0 = 0x1672 +0xF1 = 0x1673 +0xF2 = 0x1674 +0xF3 = 0x1675 +0xF4 = 0x1676 +0xF5 = 0x1596 +0xF6 = 0x15A0 +0xF7 = 0x15A1 +0xF8 = 0x15A2 +0xF9 = 0x15A3 +0xFA = 0x15A4 +0xFB = 0x15A5 +0xFC = 0x15A6 +0xFD = 0x157C +0xFE = 0x0141 +0xFF = 0x0142 +END_MAP diff --git a/share/i18n/csmapper/APPLE/KEYBOARD%UCS.src b/share/i18n/csmapper/APPLE/KEYBOARD%UCS.src new file mode 100644 index 0000000..52d8175 --- /dev/null +++ b/share/i18n/csmapper/APPLE/KEYBOARD%UCS.src @@ -0,0 +1,234 @@ +# $FreeBSD$ + +TYPE ROWCOL +NAME KEYBOARD/UCS +SRC_ZONE 0x00-0x8F +OOB_MODE ILSEQ +DST_ILSEQ 0xFFFE +DST_UNIT_BITS 16 +#======================================================================= +# File name: KEYBOARD.TXT +# +# Contents: Map (external version) from Mac OS Keyboard +# character set to Unicode 4.0 and later. +# +# Copyright: (c) 2001-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Change mappings for 0x09, 0x0F, 0x8C; add +# Mac OS X-only mappings for 0x8D-9x8F. +# Update header comments, including +# clarification of Mac OS X usage. Matches +# internal xml <c1.2> and Text Encoding +# Converter 2.0. +# b1,c1 2002-Dec-19 First version. Matches internal utom<b6>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Keyboard code (in hex as 0xNN) +# Column #2 is the corresponding Unicode or Unicode sequence +# (in hex as 0xNNNN or 0xNNNN+0xNNNN, etc.). +# Column #3 is a comment containing the Unicode name. +# In some cases an additional comment follows the Unicode name. +# +# The entries are in Mac OS Keyboard code order. +# +# Some of these mappings require the use of corporate characters. +# See the file "CORPCHAR.TXT" and notes below. +# +# The Mac OS Keyboard character set uses the ranges normally set aside +# for controls, so those ranges are present in this table. +# +# Notes on Mac OS Keyboard: +# ------------------------- +# +# This is the encoding for the legacy font named ".Keyboard". Before +# Mac OS X, this font was used by the user-interface system to display +# glyphs for special keys on the keyboard. In Mac OS X, that font is +# not present and this mapping is not associated with a font; it is +# only used as a way to map from a set of Menu Manager constants to +# associated Unicode sequences. As such, new mappings added for Mac OS +# X only may be one-way mappings: From the Keyboard glyph "encoding" +# to Unicode, but not back. +# +# The Mac OS Keyboard encoding shares the script code smRoman +# (0) with the Mac OS Roman encoding. To determine if the Keyboard +# encoding is being used in Mac OS 8 or Mac OS 9, you must check if +# the font name is ".Keyboard". +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# The goals in the mappings provided here are: +# - For mappings used in Mac OS 8 and Mac OS 9, ensure roundtrip +# mapping from every character in the Mac OS Keyboard character set +# to Unicode and back. This consideration does not apply to mappings +# added for Mac OS X only (noted below). +# - Use standard Unicode characters as much as possible, to +# maximize interchangeability of the resulting Unicode text. +# Whenever possible, avoid having content carried by private-use +# characters. +# +# Some of the characters in the Mac OS Keyboard character set do not +# correspond to distinct, single Unicode characters. To map these +# and satisfy both goals above, we employ various strategies. +# +# a) If possible, use private use characters in combination with +# standard Unicode characters to mark variants of the standard +# Unicode character. +# +# Apple has defined a block of 32 corporate characters as "transcoding +# hints." These are used in combination with standard Unicode +# characters to force them to be treated in a special way for mapping +# to other encodings; they have no other effect. Sixteen of these +# transcoding hints are "grouping hints" - they indicate that the next +# 2-4 Unicode characters should be treated as a single entity for +# transcoding. The other sixteen transcoding hints are "variant tags" +# - they are like combining characters, and can follow a standard +# Unicode (or a sequence consisting of a base character and other +# combining characters) to cause it to be treated in a special way for +# transcoding. These always terminate a combining-character sequence. +# +# The transcoding coding hints used in this mapping table are two +# grouping tags, 0xF860-61, and one variant tag, 0xF87F. Since these +# are combined with standard Unicode characters, some characters in +# the Mac OS Keyboard character set map to a sequence of two to four +# Unicodes instead of a single Unicode character. +# +# For example, the Mac OS Keyboard character at 0x6F, representing the +# F1 key, is mapped to Unicode using the grouping tag F860 (group next +# two) followed by U+0046 (LATIN CAPITAL LETTER F) and U+0031 (DIGIT +# ONE). +# +# b) Otherwise, use private use characters by themselves to map Mac OS +# Keyboard characters which have no relationship to any standard +# Unicode character. +# +# The following additional corporate zone Unicode characters are +# used for this purpose here: +# +# 0xF802 Lower left pencil +# 0xF803 Contextual menu key symbol +# 0xF8FF Apple logo +# +# NOTE: The graphic image associated with the Apple logo character +# is not authorized for use without permission of Apple, and +# unauthorized use might constitute trademark infringement. +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version c01 to version c02: +# +# - Mapping for 0x09 changed from 0x0009 (wrong) to 0x2423 +# - Mapping for 0x0F changed from 0x270E (wrong) to 0xF802 +# - Mapping for 0x8C changed from 0xF804 to 0x23CF (Unicode 4.0) +# - Add Mac OS X-only mappings for 0x8D-0x8F +# +################## +BEGIN_MAP +0x00 = 0x0000 +0x02 = 0x21E5 +0x03 = 0x21E4 +0x04 = 0x2324 +0x05 = 0x21E7 +0x06 = 0x2303 +0x07 = 0x2325 +0x08 = 0x0008 +0x09 = 0x2423 +0x0A = 0x2326 +0x0B = 0x21A9 +0x0C = 0x21AA +0x0D = 0x000D +0x0F = 0xF802 +0x10 = 0x21E3 +0x11 = 0x2318 +0x12 = 0x2713 +0x13 = 0x25C6 +0x14 = 0xF8FF +0x17 = 0x232B +0x18 = 0x21E0 +0x19 = 0x21E1 +0x1A = 0x21E2 +0x1B = 0x238B +0x1C = 0x2327 +0x20 = 0x0020 +0x30 = 0x0030 +0x31 = 0x0031 +0x32 = 0x0032 +0x33 = 0x0033 +0x34 = 0x0034 +0x35 = 0x0035 +0x36 = 0x0036 +0x37 = 0x0037 +0x38 = 0x0038 +0x39 = 0x0039 +0x46 = 0x0046 +0x61 = 0x2423 +0x62 = 0x21DE +0x63 = 0x21EA +0x64 = 0x2190 +0x65 = 0x2192 +0x66 = 0x2196 +#0x67 = 0x003F+0x20DD +0x68 = 0x2191 +0x69 = 0x2198 +0x6A = 0x2193 +0x6B = 0x21DF +#0x6C = 0xF8FF+0xF87F +0x6D = 0xF803 +#0x6E = 0x2758+0x20DD +#0x6F = 0xF860+0x0046+0x0031 +#0x70 = 0xF860+0x0046+0x0032 +#0x71 = 0xF860+0x0046+0x0033 +#0x72 = 0xF860+0x0046+0x0034 +#0x73 = 0xF860+0x0046+0x0035 +#0x74 = 0xF860+0x0046+0x0036 +#0x75 = 0xF860+0x0046+0x0037 +#0x76 = 0xF860+0x0046+0x0038 +#0x77 = 0xF860+0x0046+0x0039 +#0x78 = 0xF861+0x0046+0x0031+0x0030 +#0x79 = 0xF861+0x0046+0x0031+0x0031 +#0x7A = 0xF861+0x0046+0x0031+0x0032 +#0x87 = 0xF861+0x0046+0x0031+0x0033 +#0x88 = 0xF861+0x0046+0x0031+0x0034 +#0x89 = 0xF861+0x0046+0x0031+0x0035 +0x8A = 0x2388 +0x8B = 0x2387 +0x8C = 0x23CF +#0x8D = 0x82F1+0x6570 +#0x8E = 0x304B+0x306A +#0x8F = 0xF861+0x0046+0x0031+0x0036 +END_MAP diff --git a/share/i18n/csmapper/APPLE/MAC.part b/share/i18n/csmapper/APPLE/MAC.part new file mode 100644 index 0000000..fd0d642 --- /dev/null +++ b/share/i18n/csmapper/APPLE/MAC.part @@ -0,0 +1,24 @@ +# $FreeBSD$ +# $NetBSD: MAC.part,v 1.1 2006/03/13 19:45:36 tnozaki Exp $ + +ARABIC +CELTIC +CENTEURO +CROATIAN +CYRILLIC +DEVANAGA +DINGBATS +FARSI +GAELIC +GREEK +GUJARATI +GURMUKHI +HEBREW +ICELAND +INUIT +KEYBOARD +ROMAN +ROMANIAN +SYMBOL +THAI +TURKISH diff --git a/share/i18n/csmapper/APPLE/Makefile b/share/i18n/csmapper/APPLE/Makefile new file mode 100644 index 0000000..50ae28d --- /dev/null +++ b/share/i18n/csmapper/APPLE/Makefile @@ -0,0 +1,10 @@ +# $FreeBSD$ +# $NetBSD: Makefile.inc,v 1.2 2008/10/25 22:35:36 apb Exp $ + +CODE= MAC +ESUBDIR= APPLE +TABLENAME= %%PART%%%UCS +RTABLENAME= UCS%%%PART%% +ENCID= %%PART%% + +.include "../Makefile.part" diff --git a/share/i18n/csmapper/APPLE/ROMAN%UCS.src b/share/i18n/csmapper/APPLE/ROMAN%UCS.src new file mode 100644 index 0000000..80d539c --- /dev/null +++ b/share/i18n/csmapper/APPLE/ROMAN%UCS.src @@ -0,0 +1,290 @@ +# $FreeBSD$ +# $NetBSD: ROMAN%UCS.src,v 1.1 2006/03/13 19:45:36 tnozaki Exp $ + +TYPE ROWCOL +NAME ROMAN/UCS +SRC_ZONE 0x00-0xFF +OOB_MODE ILSEQ +DST_ILSEQ 0xFFFE +DST_UNIT_BITS 16 + +BEGIN_MAP +# +# This mapping data is made from the mapping data provided by Unicode, Inc. +# Original notice: +# +#======================================================================= +# File name: ROMAN.TXT +# +# Contents: Map (external version) from Mac OS Roman +# character set to Unicode 2.1 and later. +# +# Copyright: (c) 1994-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Update header comments. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# b4,c1 2002-Dec-19 Update URLs, notes. Matches internal +# utom<b5>. +# b03 1999-Sep-22 Update contact e-mail address. Matches +# internal utom<b4>, ufrm<b3>, and Text +# Encoding Converter version 1.5. +# b02 1998-Aug-18 Encoding changed for Mac OS 8.5; change +# mapping of 0xDB from CURRENCY SIGN to +# EURO SIGN. Matches internal utom<b3>, +# ufrm<b3>. +# n08 1998-Feb-05 Minor update to header comments +# n06 1997-Dec-14 Add warning about future changes to 0xDB +# from CURRENCY SIGN to EURO SIGN. Clarify +# some header information +# n04 1997-Dec-01 Update to match internal utom<n3>, ufrm<n22>: +# Change standard mapping for 0xBD from U+2126 +# to its canonical decomposition, U+03A9. +# n03 1995-Apr-15 First version (after fixing some typos). +# Matches internal ufrm<n9>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Roman code (in hex as 0xNN) +# Column #2 is the corresponding Unicode (in hex as 0xNNNN) +# Column #3 is a comment containing the Unicode name +# +# The entries are in Mac OS Roman code order. +# +# One of these mappings requires the use of a corporate character. +# See the file "CORPCHAR.TXT" and notes below. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Roman character set uses the standard control characters at +# 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Roman: +# ---------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported directly in programming +# interfaces for QuickDraw Text, the Script Manager, and related +# Text Utilities. For other purposes it is supported via transcoding +# to and from Unicode. +# +# This character set is used for at least the following Mac OS +# localizations: U.S., British, Canadian French, French, Swiss +# French, German, Swiss German, Italian, Swiss Italian, Dutch, +# Swedish, Norwegian, Danish, Finnish, Spanish, Catalan, +# Portuguese, Brazilian, and the default International system. +# +# Variants of Mac OS Roman are used for Croatian, Icelandic, +# Turkish, Romanian, and other encodings. Separate mapping tables +# are available for these encodings. +# +# Before Mac OS 8.5, code point 0xDB was CURRENCY SIGN, and was +# mapped to U+00A4. In Mac OS 8.5 and later versions, code point +# 0xDB is changed to EURO SIGN and maps to U+20AC; the standard +# Apple fonts are updated for Mac OS 8.5 to reflect this. There is +# a "currency sign" variant of the Mac OS Roman encoding that still +# maps 0xDB to U+00A4; this can be used for older fonts. +# +# Before Mac OS 8.5, the ROM bitmap versions of the fonts Chicago, +# New York, Geneva, and Monaco did not implement the full Mac OS +# Roman character set; they only supported character codes up to +# 0xD8. The TrueType versions of these fonts have always implemented +# the full character set, as with the bitmap and TrueType versions +# of the other standard Roman fonts. +# +# In all Mac OS encodings, fonts such as Chicago which are used +# as "system" fonts (for menus, dialogs, etc.) have four glyphs +# at code points 0x11-0x14 for transient use by the Menu Manager. +# These glyphs are not intended as characters for use in normal +# text, and the associated code points are not generally +# interpreted as associated with these glyphs; they are usually +# interpreted (if at all) as the control codes DC1-DC4. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# The following corporate zone Unicode character is used in this +# mapping: +# +# 0xF8FF Apple logo +# +# NOTE: The graphic image associated with the Apple logo character +# is not authorized for use without permission of Apple, and +# unauthorized use might constitute trademark infringement. +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version n08 to version b02: +# +# - Encoding changed for Mac OS 8.5; change mapping of 0xDB from +# CURRENCY SIGN (U+00A4) to EURO SIGN (U+20AC). +# +# Changes from version n03 to version n04: +# +# - Change mapping of 0xBD from U+2126 to its canonical +# decomposition, U+03A9. +# +################## +0x00-0x7E = 0x00 - +0x80 = 0x00C4 +0x81 = 0x00C5 +0x82 = 0x00C7 +0x83 = 0x00C9 +0x84 = 0x00D1 +0x85 = 0x00D6 +0x86 = 0x00DC +0x87 = 0x00E1 +0x88 = 0x00E0 +0x89 = 0x00E2 +0x8A = 0x00E4 +0x8B = 0x00E3 +0x8C = 0x00E5 +0x8D = 0x00E7 +0x8E = 0x00E9 +0x8F = 0x00E8 +0x90 = 0x00EA +0x91 = 0x00EB +0x92 = 0x00ED +0x93 = 0x00EC +0x94 = 0x00EE +0x95 = 0x00EF +0x96 = 0x00F1 +0x97 = 0x00F3 +0x98 = 0x00F2 +0x99 = 0x00F4 +0x9A = 0x00F6 +0x9B = 0x00F5 +0x9C = 0x00FA +0x9D = 0x00F9 +0x9E = 0x00FB +0x9F = 0x00FC +0xA0 = 0x2020 +0xA1 = 0x00B0 +0xA2 = 0x00A2 +0xA3 = 0x00A3 +0xA4 = 0x00A7 +0xA5 = 0x2022 +0xA6 = 0x00B6 +0xA7 = 0x00DF +0xA8 = 0x00AE +0xA9 = 0x00A9 +0xAA = 0x2122 +0xAB = 0x00B4 +0xAC = 0x00A8 +0xAD = 0x2260 +0xAE = 0x00C6 +0xAF = 0x00D8 +0xB0 = 0x221E +0xB1 = 0x00B1 +0xB2 = 0x2264 +0xB3 = 0x2265 +0xB4 = 0x00A5 +0xB5 = 0x00B5 +0xB6 = 0x2202 +0xB7 = 0x2211 +0xB8 = 0x220F +0xB9 = 0x03C0 +0xBA = 0x222B +0xBB = 0x00AA +0xBC = 0x00BA +0xBD = 0x03A9 +0xBE = 0x00E6 +0xBF = 0x00F8 +0xC0 = 0x00BF +0xC1 = 0x00A1 +0xC2 = 0x00AC +0xC3 = 0x221A +0xC4 = 0x0192 +0xC5 = 0x2248 +0xC6 = 0x2206 +0xC7 = 0x00AB +0xC8 = 0x00BB +0xC9 = 0x2026 +0xCA = 0x00A0 +0xCB = 0x00C0 +0xCC = 0x00C3 +0xCD = 0x00D5 +0xCE = 0x0152 +0xCF = 0x0153 +0xD0 = 0x2013 +0xD1 = 0x2014 +0xD2 = 0x201C +0xD3 = 0x201D +0xD4 = 0x2018 +0xD5 = 0x2019 +0xD6 = 0x00F7 +0xD7 = 0x25CA +0xD8 = 0x00FF +0xD9 = 0x0178 +0xDA = 0x2044 +0xDB = 0x20AC +0xDC = 0x2039 +0xDD = 0x203A +0xDE = 0xFB01 +0xDF = 0xFB02 +0xE0 = 0x2021 +0xE1 = 0x00B7 +0xE2 = 0x201A +0xE3 = 0x201E +0xE4 = 0x2030 +0xE5 = 0x00C2 +0xE6 = 0x00CA +0xE7 = 0x00C1 +0xE8 = 0x00CB +0xE9 = 0x00C8 +0xEA = 0x00CD +0xEB = 0x00CE +0xEC = 0x00CF +0xED = 0x00CC +0xEE = 0x00D3 +0xEF = 0x00D4 +0xF0 = 0xF8FF +0xF1 = 0x00D2 +0xF2 = 0x00DA +0xF3 = 0x00DB +0xF4 = 0x00D9 +0xF5 = 0x0131 +0xF6 = 0x02C6 +0xF7 = 0x02DC +0xF8 = 0x00AF +0xF9 = 0x02D8 +0xFA = 0x02D9 +0xFB = 0x02DA +0xFC = 0x00B8 +0xFD = 0x02DD +0xFE = 0x02DB +0xFF = 0x02C7 +END_MAP diff --git a/share/i18n/csmapper/APPLE/ROMANIAN%UCS.src b/share/i18n/csmapper/APPLE/ROMANIAN%UCS.src new file mode 100644 index 0000000..b901c1d --- /dev/null +++ b/share/i18n/csmapper/APPLE/ROMANIAN%UCS.src @@ -0,0 +1,285 @@ +# $FreeBSD$ +# $NetBSD: ROMANIAN%UCS.src,v 1.1 2006/03/13 19:45:36 tnozaki Exp $ + +TYPE ROWCOL +NAME ROMANIAN/UCS +SRC_ZONE 0x00-0xFF +OOB_MODE ILSEQ +DST_ILSEQ 0xFFFE +DST_UNIT_BITS 16 + +BEGIN_MAP +# +# This mapping data is made from the mapping data provided by Unicode, Inc. +# Original notice: +# +#======================================================================= +# File name: ROMANIAN.TXT +# +# Contents: Map (external version) from Mac OS Romanian +# character set to Unicode 3.0 and later. +# +# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Update header comments. Matches internal xml +# <c1.2> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Update mappings for 0xAF, 0xBF, 0xDE, 0xDF +# to use new composed characters added in +# Unicode 3.0. Update URLs, notes. Matches +# internal utom<b3>. +# b02 1999-Sep-22 Encoding changed for Mac OS 8.5; change +# mapping of 0xDB from CURRENCY SIGN to EURO +# SIGN. Update contact e-mail address. Matches +# internal utom<b2>, ufrm<b2>, and Text +# Encoding Converter version 1.5. +# n05 1998-Feb-05 Minor update to header comments +# n03 1997-Dec-14 Update to match internal utom<n5>, ufrm<n16>: +# Change standard mapping for 0xBD from U+2126 +# to its canonical decomposition, U+03A9. +# Change mapping of 0xAF,0xBF,0xDE,0xDF from +# composed S/T WITH CEDILLA to S/T with +# COMBINING COMMA BELOW (to match our +# decomposition mappings). +# n02 1995-Apr-15 First version (after fixing some typos). +# Matches internal ufrm<n4>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Romanian code (in hex as 0xNN) +# Column #2 is the corresponding Unicode (in hex as 0xNNNN) +# Column #3 is a comment containing the Unicode name +# +# The entries are in Mac OS Romanian code order. +# +# One of these mappings requires the use of a corporate character. +# See the file "CORPCHAR.TXT" and notes below. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Romanian character set uses the standard control characters at +# 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Romanian: +# ------------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# Mac OS Romanian is used only for Romanian. +# +# The Mac OS Romanian encoding shares the script code smRoman +# (0) with the standard Mac OS Roman encoding. To determine if +# the Romanian encoding is being used, you must also check if the +# system region code is 39, verRomania. +# +# This character set is a variant of standard Mac OS Roman, adding +# upper and lower A breve, S comma below, and T comma below. It +# has 6 code point differences from standard Mac OS Roman. +# +# Before Mac OS 8.5, code point 0xDB was CURRENCY SIGN, and was +# mapped to U+00A4. In Mac OS 8.5 and later versions, code point +# 0xDB is changed to EURO SIGN and maps to U+20AC; the standard +# Apple fonts are updated for Mac OS 8.5 to reflect this. There is +# a "currency sign" variant of the Mac OS Romanian encoding that +# still maps 0xDB to U+00A4; this can be used for older fonts. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# The following corporate zone Unicode character is used in this +# mapping: +# +# 0xF8FF Apple logo +# +# NOTE: The graphic image associated with the Apple logo character +# is not authorized for use without permission of Apple, and +# unauthorized use might constitute trademark infringement. +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version b02 to version b03/c01: +# +# - Update the mappings for 0xAF, 0xBF, 0xDE, 0xDF to use new +# composed Unicode characters 0x0218-0x021B added in Unicode 3.0; +# the previous mappings were to the equivalent decomposition +# sequences. +# +# Changes from version n05 to version b02: +# +# - Encoding changed for Mac OS 8.5; change mapping of 0xDB from +# CURRENCY SIGN (U+00A4) to EURO SIGN (U+20AC). +# +# Changes from version n02 to version n03: +# +# - Change mapping of 0xBD from U+2126 to its canonical +# decomposition, U+03A9. +# - Change mapping of 0xAF,0xBF,0xDE,0xDF from composed S or T +# WITH CEDILLA to S or T with COMBINING COMMA BELOW (to match +# our decomposition mappings). +# +################## +0x00 - 0x7F = 0x0000 - +0x80 = 0x00C4 +0x81 = 0x00C5 +0x82 = 0x00C7 +0x83 = 0x00C9 +0x84 = 0x00D1 +0x85 = 0x00D6 +0x86 = 0x00DC +0x87 = 0x00E1 +0x88 = 0x00E0 +0x89 = 0x00E2 +0x8A = 0x00E4 +0x8B = 0x00E3 +0x8C = 0x00E5 +0x8D = 0x00E7 +0x8E = 0x00E9 +0x8F = 0x00E8 +0x90 = 0x00EA +0x91 = 0x00EB +0x92 = 0x00ED +0x93 = 0x00EC +0x94 = 0x00EE +0x95 = 0x00EF +0x96 = 0x00F1 +0x97 = 0x00F3 +0x98 = 0x00F2 +0x99 = 0x00F4 +0x9A = 0x00F6 +0x9B = 0x00F5 +0x9C = 0x00FA +0x9D = 0x00F9 +0x9E = 0x00FB +0x9F = 0x00FC +0xA0 = 0x2020 +0xA1 = 0x00B0 +0xA2 = 0x00A2 +0xA3 = 0x00A3 +0xA4 = 0x00A7 +0xA5 = 0x2022 +0xA6 = 0x00B6 +0xA7 = 0x00DF +0xA8 = 0x00AE +0xA9 = 0x00A9 +0xAA = 0x2122 +0xAB = 0x00B4 +0xAC = 0x00A8 +0xAD = 0x2260 +0xAE = 0x0102 +0xAF = 0x0218 +0xB0 = 0x221E +0xB1 = 0x00B1 +0xB2 = 0x2264 +0xB3 = 0x2265 +0xB4 = 0x00A5 +0xB5 = 0x00B5 +0xB6 = 0x2202 +0xB7 = 0x2211 +0xB8 = 0x220F +0xB9 = 0x03C0 +0xBA = 0x222B +0xBB = 0x00AA +0xBC = 0x00BA +0xBD = 0x03A9 +0xBE = 0x0103 +0xBF = 0x0219 +0xC0 = 0x00BF +0xC1 = 0x00A1 +0xC2 = 0x00AC +0xC3 = 0x221A +0xC4 = 0x0192 +0xC5 = 0x2248 +0xC6 = 0x2206 +0xC7 = 0x00AB +0xC8 = 0x00BB +0xC9 = 0x2026 +0xCA = 0x00A0 +0xCB = 0x00C0 +0xCC = 0x00C3 +0xCD = 0x00D5 +0xCE = 0x0152 +0xCF = 0x0153 +0xD0 = 0x2013 +0xD1 = 0x2014 +0xD2 = 0x201C +0xD3 = 0x201D +0xD4 = 0x2018 +0xD5 = 0x2019 +0xD6 = 0x00F7 +0xD7 = 0x25CA +0xD8 = 0x00FF +0xD9 = 0x0178 +0xDA = 0x2044 +0xDB = 0x20AC +0xDC = 0x2039 +0xDD = 0x203A +0xDE = 0x021A +0xDF = 0x021B +0xE0 = 0x2021 +0xE1 = 0x00B7 +0xE2 = 0x201A +0xE3 = 0x201E +0xE4 = 0x2030 +0xE5 = 0x00C2 +0xE6 = 0x00CA +0xE7 = 0x00C1 +0xE8 = 0x00CB +0xE9 = 0x00C8 +0xEA = 0x00CD +0xEB = 0x00CE +0xEC = 0x00CF +0xED = 0x00CC +0xEE = 0x00D3 +0xEF = 0x00D4 +0xF0 = 0xF8FF +0xF1 = 0x00D2 +0xF2 = 0x00DA +0xF3 = 0x00DB +0xF4 = 0x00D9 +0xF5 = 0x0131 +0xF6 = 0x02C6 +0xF7 = 0x02DC +0xF8 = 0x00AF +0xF9 = 0x02D8 +0xFA = 0x02D9 +0xFB = 0x02DA +0xFC = 0x00B8 +0xFD = 0x02DD +0xFE = 0x02DB +0xFF = 0x02C7 +END_MAP diff --git a/share/i18n/csmapper/APPLE/SYMBOL%UCS.src b/share/i18n/csmapper/APPLE/SYMBOL%UCS.src new file mode 100644 index 0000000..07926c5 --- /dev/null +++ b/share/i18n/csmapper/APPLE/SYMBOL%UCS.src @@ -0,0 +1,321 @@ +# $FreeBSD$ + +TYPE ROWCOL +NAME SYMBOL/UCS +SRC_ZONE 0x00-0xFF +OOB_MODE ILSEQ +DST_ILSEQ 0xFFFE +DST_UNIT_BITS 16 + +BEGIN_MAP +#======================================================================= +# File name: SYMBOL.TXT +# +# Contents: Map (external version) from Mac OS Symbol +# character set to Unicode 4.0 and later. +# +# Copyright: (c) 1994-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Change mappings for 0xBD, 0xE0. Update +# header comments. Matches internal xml <c1.2> +# and Text Encoding Converter 2.0. +# b4,c1 2002-Dec-19 Update mappings for encoded glyph fragments +# 0xBE, 0xE6-EF, 0xF4, 0xF6-FE to use new +# Unicode 3.2 characters instead of sequences +# involving corporate-use characters. Update +# URLs, notes. Matches internal utom<b4>. +# b03 1999-Sep-22 Update contact e-mail address. Matches +# internal utom<b3>, ufrm<b3>, and Text +# Encoding Converter version 1.5. +# b02 1998-Aug-18 Encoding changed for Mac OS 8.5; add new +# mapping from 0xA0 to EURO SIGN. Matches +# internal utom<b3>, ufrm<b3>. +# n05 1998-Feb-05 Update to match internal utom<n5>, ufrm<n15> +# and Text Encoding Converter version 1.3: +# Use standard Unicodes plus transcoding hints +# instead of single corporate characters, also +# change mappings for 0xE1 & 0xF1 from U+2329 +# & U+232A to their canonical decompositions; +# see details below. Also update header +# comments to new format. +# n03 1995-Apr-15 First version (after fixing some typos). +# Matches internal ufrm<n4>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Symbol code (in hex as 0xNN) +# Column #2 is the corresponding Unicode or Unicode sequence +# (in hex as 0xNNNN or 0xNNNN+0xNNNN). +# Column #3 is a comment containing the Unicode name. +# In some cases an additional comment follows the Unicode name. +# +# The entries are in Mac OS Symbol code order. +# +# Some of these mappings require the use of corporate characters. +# See the file "CORPCHAR.TXT" and notes below. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Symbol character set uses the standard control characters +# at 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Symbol: +# ----------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported directly in programming +# interfaces for QuickDraw Text, the Script Manager, and related +# Text Utilities. For other purposes it is supported via transcoding +# to and from Unicode. +# +# The Mac OS Symbol encoding shares the script code smRoman +# (0) with the Mac OS Roman encoding. To determine if the Symbol +# encoding is being used, you must check if the font name is +# "Symbol". +# +# Before Mac OS 8.5, code point 0xA0 was unused. In Mac OS 8.5 +# and later versions, code point 0xA0 is EURO SIGN and maps to +# U+20AC (the Symbol font is updated for Mac OS 8.5 to reflect +# this). +# +# The layout of the Mac OS Symbol character set is identical to +# the layout of the Adobe Symbol encoding vector, with the +# addition of the Apple logo character at 0xF0. +# +# This character set encodes a number of glyph fragments. Some are +# used as extenders: 0x60 is used to extend radical signs, 0xBD and +# 0xBE are used to extend vertical and horizontal arrows, etc. In +# addition, there are top, bottom, and center sections for +# parentheses, brackets, integral signs, and other signs that may +# extend vertically for 2 or more lines of normal text. As of +# Unicode 3.2, most of these are now encoded in Unicode; a few are +# not, so these are mapped using corporate-zone Unicode characters +# (see below). +# +# In addition, Symbol separately encodes both serif and sans-serif +# forms for copyright, trademark, and registered signs. Unicode +# encodes only the abstract characters, so one set of these (the +# sans-serif forms) are also mapped using corporate-zone Unicode +# characters (see below). +# +# The following code points are unused, and are not shown here: +# 0x80-0x9F, 0xFF. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# The goals in the mappings provided here are: +# - Ensure roundtrip mapping from every character in the Mac OS +# Symbol character set to Unicode and back +# - Use standard Unicode characters as much as possible, to +# maximize interchangeability of the resulting Unicode text. +# Whenever possible, avoid having content carried by private-use +# characters. +# +# Some of the characters in the Mac OS Symbol character set do not +# correspond to distinct, single Unicode characters. To map these +# and satisfy both goals above, we employ various strategies. +# +# a) If possible, use private use characters in combination with +# standard Unicode characters to mark variants of the standard +# Unicode character. +# +# Apple has defined a block of 32 corporate characters as "transcoding +# hints." These are used in combination with standard Unicode +# characters to force them to be treated in a special way for mapping +# to other encodings; they have no other effect. Sixteen of these +# transcoding hints are "grouping hints" - they indicate that the next +# 2-4 Unicode characters should be treated as a single entity for +# transcoding. The other sixteen transcoding hints are "variant tags" +# - they are like combining characters, and can follow a standard +# Unicode (or a sequence consisting of a base character and other +# combining characters) to cause it to be treated in a special way for +# transcoding. These always terminate a combining-character sequence. +# +# The transcoding coding hint used in this mapping table is the +# variant tag 0xF87F. Since this is combined with standard Unicode +# characters, some characters in the Mac OS Symbol character set map +# to a sequence of two Unicodes instead of a single Unicode character. +# +# For example, the Mac OS Symbol character at 0xE2 is an alternate, +# sans-serif form of the REGISTERED SIGN (the standard mapping is for +# the abstract character at 0xD2, which here has a serif form). So 0xE2 +# is mapped to 0x00AE (REGISTERED SIGN) + 0xF87F (a variant tag). +# +# b) Otherwise, use private use characters by themselves to map +# Mac OS Symbol characters which have no relationship to any standard +# Unicode character. +# +# The following additional corporate zone Unicode characters are +# used for this purpose here: +# +# 0xF8E5 radical extender +# 0xF8FF Apple logo +# +# NOTE: The graphic image associated with the Apple logo character +# is not authorized for use without permission of Apple, and +# unauthorized use might constitute trademark infringement. +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version c01 to version c02: +# +# - Update mappings for 0xBD from 0xF8E6 to 0x23D0 (use new Unicode +# 4.0 char) +# - Correct mapping for 0xE0 from 0x22C4 to 0x25CA +# +# Changes from version b02 to version b03/c01: +# +# - Update mappings for encoded glyph fragments 0xBE, 0xE6-EF, 0xF4, +# 0xF6-FE to use new Unicode 3.2 characters instead of using either +# single corporate-use characters (e.g. 0xBE was mapped to 0xF8E7) or +# sequences combining a standard Unicode character with a transcoding +# hint (e.g. 0xE6 was mapped to 0x0028+0xF870). +# +# Changes from version n05 to version b02: +# +# - Encoding changed for Mac OS 8.5; 0xA0 now maps to 0x20AC, EURO +# SIGN. 0xA0 was unmapped in earlier versions. +# +# Changes from version n03 to version n05: +# +# - Change strict mapping for 0xE1 & 0xF1 from U+2329 & U+232A +# to their canonical decompositions, U+3008 & U+3009. +# +# - Change mapping for the following to use standard Unicode + +# transcoding hint, instead of single corporate-zone +# character: 0xE2-0xE4, 0xE6-0xEE, 0xF4, 0xF6-0xFE. +# +################## + +0x00 - 0x7F = 0x0000 - +0xA0 = 0x20AC +0xA1 = 0x03D2 +0xA2 = 0x2032 +0xA3 = 0x2264 +0xA4 = 0x2044 +0xA5 = 0x221E +0xA6 = 0x0192 +0xA7 = 0x2663 +0xA8 = 0x2666 +0xA9 = 0x2665 +0xAA = 0x2660 +0xAB = 0x2194 +0xAC = 0x2190 +0xAD = 0x2191 +0xAE = 0x2192 +0xAF = 0x2193 +0xB0 = 0x00B0 +0xB1 = 0x00B1 +0xB2 = 0x2033 +0xB3 = 0x2265 +0xB4 = 0x00D7 +0xB5 = 0x221D +0xB6 = 0x2202 +0xB7 = 0x2022 +0xB8 = 0x00F7 +0xB9 = 0x2260 +0xBA = 0x2261 +0xBB = 0x2248 +0xBC = 0x2026 +0xBD = 0x23D0 +0xBE = 0x23AF +0xBF = 0x21B5 +0xC0 = 0x2135 +0xC1 = 0x2111 +0xC2 = 0x211C +0xC3 = 0x2118 +0xC4 = 0x2297 +0xC5 = 0x2295 +0xC6 = 0x2205 +0xC7 = 0x2229 +0xC8 = 0x222A +0xC9 = 0x2283 +0xCA = 0x2287 +0xCB = 0x2284 +0xCC = 0x2282 +0xCD = 0x2286 +0xCE = 0x2208 +0xCF = 0x2209 +0xD0 = 0x2220 +0xD1 = 0x2207 +0xD2 = 0x00AE +0xD3 = 0x00A9 +0xD4 = 0x2122 +0xD5 = 0x220F +0xD6 = 0x221A +0xD7 = 0x22C5 +0xD8 = 0x00AC +0xD9 = 0x2227 +0xDA = 0x2228 +0xDB = 0x21D4 +0xDC = 0x21D0 +0xDD = 0x21D1 +0xDE = 0x21D2 +0xDF = 0x21D3 +0xE0 = 0x25CA +0xE1 = 0x3008 +#0xE2 = 0x00AE+0xF87F +#0xE3 = 0x00A9+0xF87F +#0xE4 = 0x2122+0xF87F +0xE5 = 0x2211 +0xE6 = 0x239B +0xE7 = 0x239C +0xE8 = 0x239D +0xE9 = 0x23A1 +0xEA = 0x23A2 +0xEB = 0x23A3 +0xEC = 0x23A7 +0xED = 0x23A8 +0xEE = 0x23A9 +0xEF = 0x23AA +0xF0 = 0xF8FF +0xF1 = 0x3009 +0xF2 = 0x222B +0xF3 = 0x2320 +0xF4 = 0x23AE +0xF5 = 0x2321 +0xF6 = 0x239E +0xF7 = 0x239F +0xF8 = 0x23A0 +0xF9 = 0x23A4 +0xFA = 0x23A5 +0xFB = 0x23A6 +0xFC = 0x23AB +0xFD = 0x23AC +0xFE = 0x23AD +END_MAP diff --git a/share/i18n/csmapper/APPLE/THAI%UCS.src b/share/i18n/csmapper/APPLE/THAI%UCS.src new file mode 100644 index 0000000..25a10c0 --- /dev/null +++ b/share/i18n/csmapper/APPLE/THAI%UCS.src @@ -0,0 +1,302 @@ +# $FreeBSD$ +# $NetBSD: THAI%UCS.src,v 1.1 2006/03/13 19:45:36 tnozaki Exp $ + +TYPE ROWCOL +NAME THAI/UCS +SRC_ZONE 0x00-0xFF +OOB_MODE ILSEQ +DST_ILSEQ 0xFFFE +DST_UNIT_BITS 16 + +BEGIN_MAP +# +# This mapping data is made from the mapping data provided by Unicode, Inc. +# Original notice: +# +#======================================================================= +# File name: THAI.TXT +# +# Contents: Map (external version) from Mac OS Thai +# character set to Unicode 3.2 and later. +# +# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Update header comments. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Update mapping for 0xDB to use new Unicode +# 3.2 WORD JOINER instead of ZWNBSP (BOM). +# Update URLs. Matches internal utom<b3>. +# b02 1999-Sep-22 Update contact e-mail address. Matches +# internal utom<b1>, ufrm<b2>, and Text +# Encoding Converter version 1.5. +# n07 1998-Feb-05 Update to match internal utom<n5>, ufrm<n13> +# and Text Encoding Converter version 1.3: +# Use standard Unicodes plus transcoding hints +# instead of single corporate characters; see +# details below. Also update header comments +# to new format. +# n04 1995-Nov-17 First version (after fixing some typos). +# Matches internal ufrm<n6>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Thai code (in hex as 0xNN) +# Column #2 is the corresponding Unicode or Unicode sequence +# (in hex as 0xNNNN or 0xNNNN+0xNNNN). +# Column #3 is a comment containing the Unicode name +# +# The entries are in Mac OS Thai code order. +# +# Some of these mappings require the use of corporate characters. +# See the file "CORPCHAR.TXT" and notes below. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Thai character set uses the standard control characters at +# 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Thai: +# --------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# Codes 0xA1-0xDA and 0xDF-0xFB are the character set from Thai +# standard TIS 620-2533, except that the following changes are +# made: +# 0xEE is TRADE MARK SIGN (instead of THAI CHARACTER YAMAKKAN) +# 0xFA is REGISTERED SIGN (instead of THAI CHARACTER ANGKHANKHU) +# 0xFB is COPYRIGHT SIGN (instead of THAI CHARACTER KHOMUT) +# +# Codes 0x80-0x82, 0x8D-0x8E, 0x91, 0x9D-0x9E, and 0xDB-0xDE are +# various additional punctuation marks (e.g. curly quotes, +# ellipsis), no-break space, and two special characters "word join" +# and "word break". +# +# Codes 0x83-0x8C, 0x8F, and 0x92-0x9C are for positional variants +# of the upper vowels, tone marks, and other signs at 0xD1, +# 0xD4-0xD7, and 0xE7-0xED. The positional variants would normally +# be considered presentation forms only and not characters. In most +# cases they are not typed directly; they are selected automatically +# at display time by the WorldScript software. However, using the +# Thai-DTP keyboard, the presentation forms can in fact be typed +# directly using dead keys. Thus they must be treated as real +# characters in the Mac OS Thai encoding. They are mapped using +# variant tags; see below. +# +# Several code points are undefined and unused (they cannot be +# typed using any of the Mac OS Thai keyboard layouts): 0x90, 0x9F, +# 0xFC-0xFE. These are not shown in the table below. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# The goals in the Apple mappings provided here are: +# - Ensure roundtrip mapping from every character in the Mac OS Thai +# character set to Unicode and back +# - Use standard Unicode characters as much as possible, to maximize +# interchangeability of the resulting Unicode text. Whenever possible, +# avoid having content carried by private-use characters. +# +# To satisfy both goals, we use private use characters to mark variants +# that are similar to a sequence of one or more standard Unicode +# characters. +# +# Apple has defined a block of 32 corporate characters as "transcoding +# hints." These are used in combination with standard Unicode characters +# to force them to be treated in a special way for mapping to other +# encodings; they have no other effect. Sixteen of these transcoding +# hints are "grouping hints" - they indicate that the next 2-4 Unicode +# characters should be treated as a single entity for transcoding. The +# other sixteen transcoding hints are "variant tags" - they are like +# combining characters, and can follow a standard Unicode (or a sequence +# consisting of a base character and other combining characters) to +# cause it to be treated in a special way for transcoding. These always +# terminate a combining-character sequence. +# +# The transcoding coding hints used in this mapping table are four +# variant tags in the range 0xF873-75. Since these are combined with +# standard Unicode characters, some characters in the Mac OS Thai +# character set map to a sequence of two Unicodes instead of a single +# Unicode character. For example, the Mac OS Thai character at 0x83 is a +# low-left positional variant of THAI CHARACTER MAI EK (the standard +# mapping is for the abstract character at 0xE8). So 0x83 is mapped to +# 0x0E48 (THAI CHARACTER MAI EK) + 0xF875 (a variant tag). +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version b02 to version b03/c01: +# +# - Update mapping for 0xDB to use new Unicode 3.2 character U+2060 +# WORD JOINER instead of U+FEFF ZERO WIDTH NO-BREAK SPACE (BOM) +# +# Changes from version n04 to version n07: +# +# - Changed mappings of the positional variants to use standard +# Unicodes + transcoding hint, instead of using single corporate +# zone characters. This affected the mappings for the following: +# 0x83-08C, 0x8F, 0x92-0x9C +# +# - Just comment out unused code points in the table, instead +# of mapping them to U+FFFD. +# +################## +0x00 - 0x7E = 0x0000 - +0x80 = 0x00AB +0x81 = 0x00BB +0x82 = 0x2026 +0x83 = 0x0E48 +0x84 = 0x0E49 +0x85 = 0x0E4A +0x86 = 0x0E4B +0x87 = 0x0E4C +0x88 = 0x0E48 +0x89 = 0x0E49 +0x8A = 0x0E4A +0x8B = 0x0E4B +0x8C = 0x0E4C +0x8D = 0x201C +0x8E = 0x201D +0x8F = 0x0E4D +0x91 = 0x2022 +0x92 = 0x0E31 +0x93 = 0x0E47 +0x94 = 0x0E34 +0x95 = 0x0E35 +0x96 = 0x0E36 +0x97 = 0x0E37 +0x98 = 0x0E48 +0x99 = 0x0E49 +0x9A = 0x0E4A +0x9B = 0x0E4B +0x9C = 0x0E4C +0x9D = 0x2018 +0x9E = 0x2019 +0xA0 = 0x00A0 +0xA1 = 0x0E01 +0xA2 = 0x0E02 +0xA3 = 0x0E03 +0xA4 = 0x0E04 +0xA5 = 0x0E05 +0xA6 = 0x0E06 +0xA7 = 0x0E07 +0xA8 = 0x0E08 +0xA9 = 0x0E09 +0xAA = 0x0E0A +0xAB = 0x0E0B +0xAC = 0x0E0C +0xAD = 0x0E0D +0xAE = 0x0E0E +0xAF = 0x0E0F +0xB0 = 0x0E10 +0xB1 = 0x0E11 +0xB2 = 0x0E12 +0xB3 = 0x0E13 +0xB4 = 0x0E14 +0xB5 = 0x0E15 +0xB6 = 0x0E16 +0xB7 = 0x0E17 +0xB8 = 0x0E18 +0xB9 = 0x0E19 +0xBA = 0x0E1A +0xBB = 0x0E1B +0xBC = 0x0E1C +0xBD = 0x0E1D +0xBE = 0x0E1E +0xBF = 0x0E1F +0xC0 = 0x0E20 +0xC1 = 0x0E21 +0xC2 = 0x0E22 +0xC3 = 0x0E23 +0xC4 = 0x0E24 +0xC5 = 0x0E25 +0xC6 = 0x0E26 +0xC7 = 0x0E27 +0xC8 = 0x0E28 +0xC9 = 0x0E29 +0xCA = 0x0E2A +0xCB = 0x0E2B +0xCC = 0x0E2C +0xCD = 0x0E2D +0xCE = 0x0E2E +0xCF = 0x0E2F +0xD0 = 0x0E30 +0xD1 = 0x0E31 +0xD2 = 0x0E32 +0xD3 = 0x0E33 +0xD4 = 0x0E34 +0xD5 = 0x0E35 +0xD6 = 0x0E36 +0xD7 = 0x0E37 +0xD8 = 0x0E38 +0xD9 = 0x0E39 +0xDA = 0x0E3A +0xDB = 0x2060 +0xDC = 0x200B +0xDD = 0x2013 +0xDE = 0x2014 +0xDF = 0x0E3F +0xE0 = 0x0E40 +0xE1 = 0x0E41 +0xE2 = 0x0E42 +0xE3 = 0x0E43 +0xE4 = 0x0E44 +0xE5 = 0x0E45 +0xE6 = 0x0E46 +0xE7 = 0x0E47 +0xE8 = 0x0E48 +0xE9 = 0x0E49 +0xEA = 0x0E4A +0xEB = 0x0E4B +0xEC = 0x0E4C +0xED = 0x0E4D +0xEE = 0x2122 +0xEF = 0x0E4F +0xF0 = 0x0E50 +0xF1 = 0x0E51 +0xF2 = 0x0E52 +0xF3 = 0x0E53 +0xF4 = 0x0E54 +0xF5 = 0x0E55 +0xF6 = 0x0E56 +0xF7 = 0x0E57 +0xF8 = 0x0E58 +0xF9 = 0x0E59 +0xFA = 0x00AE +0xFB = 0x00A9 +END_MAP diff --git a/share/i18n/csmapper/APPLE/TURKISH%UCS.src b/share/i18n/csmapper/APPLE/TURKISH%UCS.src new file mode 100644 index 0000000..c4f2701 --- /dev/null +++ b/share/i18n/csmapper/APPLE/TURKISH%UCS.src @@ -0,0 +1,261 @@ +# $FreeBSD$ +# $NetBSD: TURKISH%UCS.src,v 1.1 2006/03/13 19:45:36 tnozaki Exp $ + +TYPE ROWCOL +NAME TURKISH/UCS +SRC_ZONE 0x00-0xFF +OOB_MODE ILSEQ +DST_ILSEQ 0xFFFE +DST_UNIT_BITS 16 + +BEGIN_MAP +# +# This mapping data is made from the mapping data provided by Unicode, Inc. +# Original notice: +# +#======================================================================= +# File name: TURKISH.TXT +# +# Contents: Map (external version) from Mac OS Turkish +# character set to Unicode 2.1 and later. +# +# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Update header comments. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Update URLs, notes. Matches internal +# utom<b1>. +# b02 1999-Sep-22 Update contact e-mail address. Matches +# internal utom<b1>, ufrm<b1>, and Text +# Encoding Converter version 1.5. +# n05 1998-Feb-05 Minor update to header comments +# n03 1997-Dec-14 Update to match internal utom<n5>, ufrm<n15>: +# Change standard mapping for 0xBD from U+2126 +# to its canonical decomposition, U+03A9. +# n02 1995-Apr-15 First version (after fixing some typos). +# Matches internal ufrm<n4>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Turkish code (in hex as 0xNN) +# Column #2 is the corresponding Unicode (in hex as 0xNNNN) +# Column #3 is a comment containing the Unicode name +# +# The entries are in Mac OS Turkish code order. +# +# Two of these mappings requires the use of a corporate character. +# See the file "CORPCHAR.TXT" and notes below. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Turkish character set uses the standard control characters at +# 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Turkish: +# ------------------------ +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# Mac OS Turkish is used for Turkish. +# +# The Mac OS Turkish encoding shares the script code smRoman +# (0) with the Mac OS Roman encoding. To determine if the Turkish +# encoding is being used, you must also check if the system region +# code is 24, verTurkey. +# +# This character set is a variant of standard Mac OS Roman. It adds +# upper & lower G with breve, upper & lower S with cedilla, upper I +# with dot, and moves the dotless lower i from its position at 0xF5 +# in standard Mac OS Roman to a position at 0xDD here (leaving the +# 0xF5 code point undefined in Mac OS Turkish). This gives a total +# of 7 code point differences from standard Mac OS Roman. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# The following corporate zone Unicode characters are used in this +# mapping: +# +# 0xF8A0 undefined1, used to map the single undefined code point +# in Mac OS Turkish (to obtain roundtrip fidelity for all +# code points). +# 0xF8FF Apple logo +# +# NOTE: The graphic image associated with the Apple logo character +# is not authorized for use without permission of Apple, and +# unauthorized use might constitute trademark infringement. +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version n02 to version n03: +# +# - Change mapping of 0xBD from U+2126 to its canonical +# decomposition, U+03A9. +# +################## +0x00 - 0x7E = 0x0000 - +0x80 = 0x00C4 +0x81 = 0x00C5 +0x82 = 0x00C7 +0x83 = 0x00C9 +0x84 = 0x00D1 +0x85 = 0x00D6 +0x86 = 0x00DC +0x87 = 0x00E1 +0x88 = 0x00E0 +0x89 = 0x00E2 +0x8A = 0x00E4 +0x8B = 0x00E3 +0x8C = 0x00E5 +0x8D = 0x00E7 +0x8E = 0x00E9 +0x8F = 0x00E8 +0x90 = 0x00EA +0x91 = 0x00EB +0x92 = 0x00ED +0x93 = 0x00EC +0x94 = 0x00EE +0x95 = 0x00EF +0x96 = 0x00F1 +0x97 = 0x00F3 +0x98 = 0x00F2 +0x99 = 0x00F4 +0x9A = 0x00F6 +0x9B = 0x00F5 +0x9C = 0x00FA +0x9D = 0x00F9 +0x9E = 0x00FB +0x9F = 0x00FC +0xA0 = 0x2020 +0xA1 = 0x00B0 +0xA2 = 0x00A2 +0xA3 = 0x00A3 +0xA4 = 0x00A7 +0xA5 = 0x2022 +0xA6 = 0x00B6 +0xA7 = 0x00DF +0xA8 = 0x00AE +0xA9 = 0x00A9 +0xAA = 0x2122 +0xAB = 0x00B4 +0xAC = 0x00A8 +0xAD = 0x2260 +0xAE = 0x00C6 +0xAF = 0x00D8 +0xB0 = 0x221E +0xB1 = 0x00B1 +0xB2 = 0x2264 +0xB3 = 0x2265 +0xB4 = 0x00A5 +0xB5 = 0x00B5 +0xB6 = 0x2202 +0xB7 = 0x2211 +0xB8 = 0x220F +0xB9 = 0x03C0 +0xBA = 0x222B +0xBB = 0x00AA +0xBC = 0x00BA +0xBD = 0x03A9 +0xBE = 0x00E6 +0xBF = 0x00F8 +0xC0 = 0x00BF +0xC1 = 0x00A1 +0xC2 = 0x00AC +0xC3 = 0x221A +0xC4 = 0x0192 +0xC5 = 0x2248 +0xC6 = 0x2206 +0xC7 = 0x00AB +0xC8 = 0x00BB +0xC9 = 0x2026 +0xCA = 0x00A0 +0xCB = 0x00C0 +0xCC = 0x00C3 +0xCD = 0x00D5 +0xCE = 0x0152 +0xCF = 0x0153 +0xD0 = 0x2013 +0xD1 = 0x2014 +0xD2 = 0x201C +0xD3 = 0x201D +0xD4 = 0x2018 +0xD5 = 0x2019 +0xD6 = 0x00F7 +0xD7 = 0x25CA +0xD8 = 0x00FF +0xD9 = 0x0178 +0xDA = 0x011E +0xDB = 0x011F +0xDC = 0x0130 +0xDD = 0x0131 +0xDE = 0x015E +0xDF = 0x015F +0xE0 = 0x2021 +0xE1 = 0x00B7 +0xE2 = 0x201A +0xE3 = 0x201E +0xE4 = 0x2030 +0xE5 = 0x00C2 +0xE6 = 0x00CA +0xE7 = 0x00C1 +0xE8 = 0x00CB +0xE9 = 0x00C8 +0xEA = 0x00CD +0xEB = 0x00CE +0xEC = 0x00CF +0xED = 0x00CC +0xEE = 0x00D3 +0xEF = 0x00D4 +0xF0 = 0xF8FF +0xF1 = 0x00D2 +0xF2 = 0x00DA +0xF3 = 0x00DB +0xF4 = 0x00D9 +0xF5 = 0xF8A0 +0xF6 = 0x02C6 +0xF7 = 0x02DC +0xF8 = 0x00AF +0xF9 = 0x02D8 +0xFA = 0x02D9 +0xFB = 0x02DA +0xFC = 0x00B8 +0xFD = 0x02DD +0xFE = 0x02DB +0xFF = 0x02C7 +END_MAP diff --git a/share/i18n/csmapper/APPLE/UCS%ARABIC.src b/share/i18n/csmapper/APPLE/UCS%ARABIC.src new file mode 100644 index 0000000..bfcad10 --- /dev/null +++ b/share/i18n/csmapper/APPLE/UCS%ARABIC.src @@ -0,0 +1,426 @@ +# $FreeBSD$ + +TYPE ROWCOL +NAME UCS/ARABIC +SRC_ZONE 0x0000-0xFB02 +OOB_MODE INVALID +DST_INVALID 0x100 +DST_UNIT_BITS 16 + +BEGIN_MAP +#======================================================================= +# File name: ARABIC.TXT +# +# Contents: Map (external version) from Mac OS Arabic +# character set to Unicode 2.1 and later. +# +# Copyright: (c) 1994-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-04 Update header comments. Matches internal xml +# <c1.2> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Add comments about character display and +# direction overrides. Update URLs, notes. +# Matches internal utom<b4>. +# b02 1999-Sep-22 Update contact e-mail address. Matches +# internal utom<b1>, ufrm<b1>, and Text +# Encoding Converter version 1.5. +# n10 1998-Feb-05 Show required Unicode character +# directionality in a different way. Matches +# internal utom<n4>, ufrm<n21>, and Text +# Encoding Converter version 1.3. Update +# header comments; include information on +# loose mapping of digits. +# n07 1997-Jul-17 Update to match internal utom<n2>, ufrm<n17>: +# Change standard mapping for 0xC0 from U+066D +# to U+274A. Add direction overrides to +# mappings for 0x25, 0x2C, 0x3B, 0x3F. Add +# information on variants. +# n03 1995-Apr-18 First version (after fixing some typos). +# Matches internal ufrm<n11>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Arabic code (in hex as 0xNN). +# Column #2 is the corresponding Unicode (in hex as 0xNNNN), +# possibly preceded by a tag indicating required directionality +# (i.e. <LR>+0xNNNN or <RL>+0xNNNN). +# Column #3 is a comment containing the Unicode name. +# +# The entries are in Mac OS Arabic code order. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Arabic character set uses the standard control characters at +# 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Arabic: +# ----------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# 1. General +# +# The Mac OS Arabic character set is intended to cover Arabic as +# used in North Africa, the Arabian peninsula, and the Levant. It +# also contains several characters needed for Urdu and/or Farsi. +# +# The Mac OS Arabic character set is essentially a superset of ISO +# 8859-6. The 8859-6 code points that are interpreted differently +# in the Mac OS Arabic set are as follows: +# 0xA0 is NO-BREAK SPACE in 8859-6 and right-left SPACE in Mac OS +# Arabic; NO-BREAK is 0x81 in Mac OS Arabic. +# 0xA4 is CURRENCY SIGN in 8859-6 and right-left DOLLAR SIGN in +# Mac OS Arabic. +# 0xAD is SOFT HYPHEN in 8859-6 and right-left HYPHEN-MINUS in +# Mac OS Arabic. +# ISO 8859-6 specifies that codes 0x30-0x39 can be rendered either +# with European digit shapes or Arabic digit shapes. This is also +# true in Mac OS Arabic, which determines from context which digit +# shapes to use (see below). +# +# The Mac OS Arabic character set uses the C1 controls area and other +# code points which are undefined in ISO 8859-6 for additional +# graphic characters: additional Arabic letters for Farsi and Urdu, +# some accented Roman letters for European languages (such as French), +# and duplicates of some of the punctuation, symbols, and digits in +# the ASCII block. The duplicate punctuation, symbol, and digit +# characters have right-left directionality, while the ASCII versions +# have left-right directionality. See the next section for more +# information on this. +# +# Mac OS Arabic characters 0xEB-0xF2 are non-spacing/combining marks. +# +# 2. Directional characters and roundtrip fidelity +# +# The Mac OS Arabic character set was developed in 1986-1987. At that +# time the bidirectional line layout algorithm used in the Mac OS +# Arabic system was fairly simple; it used only a few direction +# classes (instead of the 19 now used in the Unicode bidirectional +# algorithm). In order to permit users to handle some tricky layout +# problems, certain punctuation and symbol characters were encoded +# twice, one with a left-right direction attribute and the other with +# a right-left direction attribute. +# +# For example, plus sign is encoded at 0x2B with a left-right +# attribute, and at 0xAB with a right-left attribute. However, there +# is only one PLUS SIGN character in Unicode. This leads to some +# interesting problems when mapping between Mac OS Arabic and Unicode; +# see below. +# +# A related problem is that even when a particular character is +# encoded only once in Mac OS Arabic, it may have a different +# direction attribute than the corresponding Unicode character. +# +# For example, the Mac OS Arabic character at 0x93 is HORIZONTAL +# ELLIPSIS with strong right-left direction. However, the Unicode +# character HORIZONTAL ELLIPSIS has direction class neutral. +# +# 3. Behavior of ASCII-range numbers in WorldScript +# +# Mac OS Arabic also has two sets of digit codes. +# +# The digits at 0x30-0x39 may be displayed using either European +# digit forms or Arabic digit forms, depending on context. If there +# is a "strong European" character such as a Latin letter on either +# side of a sequence consisting of digits 0x30-0x39 and possibly comma +# 0x2C or period 0x2E, then the characters will be displayed using +# European forms (This will happen even if there are neutral characters +# between the digits and the strong European character). Otherwise, the +# digits will be displayed using Arabic forms, the comma will be +# displayed as Arabic thousands separator, and the period as Arabic +# decimal separator. In any case, 0x2C, 0x2E, and 0x30-0x39 are always +# left-right. +# +# The digits at 0xB0-0xB9 are always displayed using Arabic digit +# shapes, and moreover, these digits always have strong right-left +# directionality. These are mainly intended for special layout +# purposes such as part numbers, etc. +# +# 4. Font variants +# +# The table in this file gives the Unicode mappings for the standard +# Mac OS Arabic encoding. This encoding is supported by the Cairo font +# (the system font for Arabic), and is the encoding supported by the +# text processing utilities. However, the other Arabic fonts actually +# implement slightly different encodings; this mainly affects the code +# points 0xAA and 0xC0. For these code points the standard Mac OS +# Arabic encoding has the following mappings: +# 0xAA -> <RL>+0x002A ASTERISK, right-left +# 0xC0 -> <RL>+0x274A EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, +# right-left +# This mapping of 0xAA is consistent with the normal convention for +# Mac OS Arabic and Hebrew that the right-left duplicates have codes +# that are equal to the ASCII code of the left-right character plus +# 0x80. However, in all of the other fonts, 0xAA is MULTIPLY SIGN, and +# right-left ASTERISK may be at a different code point. The other +# variants are described below. +# +# The TrueType variant is used for most of the Arabic TrueType fonts: +# Baghdad, Geeza, Kufi, Nadeem. It differs from the standard variant +# in the following way: +# 0xAA -> <RL>+0x00D7 MULTIPLICATION SIGN, right-left +# 0xC0 -> <RL>+0x002A ASTERISK, right-left +# +# The Thuluth variant is used for the Arabic Postscript-only fonts: +# Thuluth and Thuluth bold. It differs from the standard variant in +# the following way: +# 0xAA -> <RL>+0x00D7 MULTIPLICATION SIGN, right-left +# 0xC0 -> 0x066D ARABIC FIVE POINTED STAR +# +# The AlBayan variant is used for the Arabic TrueType font Al Bayan. +# It differs from the standard variant in the following way: +# 0x81 -> no mapping (glyph just has authorship information, etc.) +# 0xA3 -> 0xFDFA ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM +# 0xA4 -> 0xFDF2 ARABIC LIGATURE ALLAH ISOLATED FORM +# 0xAA -> <RL>+0x00D7 MULTIPLICATION SIGN, right-left +# 0xDC -> <RL>+0x25CF BLACK CIRCLE, right-left +# 0xFC -> <RL>+0x25A0 BLACK SQUARE, right-left +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# 1. Matching the direction of Mac OS Arabic characters +# +# When Mac OS Arabic encodes a character twice but with different +# direction attributes for the two code points - as in the case of +# plus sign mentioned above - we need a way to map both Mac OS Arabic +# code points to Unicode and back again without loss of information. +# With the plus sign, for example, mapping one of the Mac OS Arabic +# characters to a code in the Unicode corporate use zone is +# undesirable, since both of the plus sign characters are likely to +# be used in text that is interchanged. +# +# The problem is solved with the use of direction override characters +# and direction-dependent mappings. When mapping from Mac OS Arabic +# to Unicode, we use direction overrides as necessary to force the +# direction of the resulting Unicode characters. +# +# The required direction is indicated by a direction tag in the +# mappings. A tag of <LR> means the corresponding Unicode character +# must have a strong left-right context, and a tag of <RL> indicates +# a right-left context. +# +# For example, the mapping of 0x2B is given as <LR>+0x002B; the +# mapping of 0xAB is given as <RL>+0x002B. If we map an isolated +# instance of 0x2B to Unicode, it should be mapped as follows (LRO +# indicates LEFT-RIGHT OVERRIDE, PDF indicates POP DIRECTION +# FORMATTING): +# +# 0x2B -> 0x202D (LRO) + 0x002B (PLUS SIGN) + 0x202C (PDF) +# +# When mapping several characters in a row that require direction +# forcing, the overrides need only be used at the beginning and end. +# For example: +# +# 0x24 0x20 0x28 0x29 -> 0x202D 0x0024 0x0020 0x0028 0x0029 0x202C +# +# If neutral characters that require direction forcing are already +# between strong-direction characters with matching directionality, +# then direction overrides need not be used. Direction overrides are +# always needed to map the right-left digits at 0xB0-0xB9. +# +# When mapping from Unicode to Mac OS Arabic, the Unicode +# bidirectional algorithm should be used to determine resolved +# direction of the Unicode characters. The mapping from Unicode to +# Mac OS Arabic can then be disambiguated by the use of the resolved +# direction: +# +# Unicode 0x002B -> Mac OS Arabic 0x2B (if L) or 0xAB (if R) +# +# However, this also means the direction override characters should +# be discarded when mapping from Unicode to Mac OS Arabic (after +# they have been used to determine resolved direction), since the +# direction override information is carried by the code point itself. +# +# Even when direction overrides are not needed for roundtrip +# fidelity, they are sometimes used when mapping Mac OS Arabic +# characters to Unicode in order to achieve similar text layout with +# the resulting Unicode text. For example, the single Mac OS Arabic +# ellipsis character has direction class right-left,and there is no +# left-right version. However, the Unicode HORIZONTAL ELLIPSIS +# character has direction class neutral (which means it may end up +# with a resolved direction of left-right if surrounded by left-right +# characters). When mapping the Mac OS Arabic ellipsis to Unicode, it +# is surrounded with a direction override to help preserve proper +# text layout. The resolved direction is not needed or used when +# mapping the Unicode HORIZONTAL ELLIPSIS back to Mac OS Arabic. +# +# 2. Mapping the Mac OS Arabic digits +# +# The main table below contains mappings that should be used when +# strict round-trip fidelity is required. However, for numeric +# values, the mappings in that table will produce Unicode characters +# that may appear different than the Mac OS Arabic text displayed on +# a Mac OS system using WorldScript. This is because WorldScript +# uses context-dependent display for the 0x30-0x39 digits. +# +# If roundtrip fidelity is not required, then the following +# alternate mappings should be used when a sequence of 0x30-0x39 +# digits - possibly including 0x2C and 0x2E - occurs in an Arabic +# context (that is, when the first "strong" character on either side +# of the digit sequence is Arabic, or there is no strong character): +# +# 0x2C 0x066C # ARABIC THOUSANDS SEPARATOR +# 0x2E 0x066B # ARABIC DECIMAL SEPARATOR +# 0x30 0x0660 # ARABIC-INDIC DIGIT ZERO +# 0x31 0x0661 # ARABIC-INDIC DIGIT ONE +# 0x32 0x0662 # ARABIC-INDIC DIGIT TWO +# 0x33 0x0663 # ARABIC-INDIC DIGIT THREE +# 0x34 0x0664 # ARABIC-INDIC DIGIT FOUR +# 0x35 0x0665 # ARABIC-INDIC DIGIT FIVE +# 0x36 0x0666 # ARABIC-INDIC DIGIT SIX +# 0x37 0x0667 # ARABIC-INDIC DIGIT SEVEN +# 0x38 0x0668 # ARABIC-INDIC DIGIT EIGHT +# 0x39 0x0669 # ARABIC-INDIC DIGIT NINE +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version n03 to version n07: +# +# - Change mapping for 0xC0 from U+066D to U+274A. +# +# - Add direction overrides (required directionality) to mappings +# for 0x25, 0x2C, 0x3B, 0x3F. +# +################## +0x0000 - 0x007F = 0x00 - +0x00A0 = 0x81 +0x00AB = 0x8C +0x00BB = 0x98 +0x00C4 = 0x80 +0x00C7 = 0x82 +0x00C9 = 0x83 +0x00D1 = 0x84 +0x00D6 = 0x85 +0x00DC = 0x86 +0x00E0 = 0x88 +0x00E1 = 0x87 +0x00E2 = 0x89 +0x00E4 = 0x8A +0x00E7 = 0x8D +0x00E8 = 0x8F +0x00E9 = 0x8E +0x00EA = 0x90 +0x00EB = 0x91 +0x00ED = 0x92 +0x00EE = 0x94 +0x00EF = 0x95 +0x00F1 = 0x96 +0x00F3 = 0x97 +0x00F4 = 0x99 +0x00F6 = 0x9A +0x00F7 = 0x9B +0x00F9 = 0x9D +0x00FA = 0x9C +0x00FB = 0x9E +0x00FC = 0x9F +0x060C = 0xAC +0x061B = 0xBB +0x061F = 0xBF +0x0621 = 0xC1 +0x0622 = 0xC2 +0x0623 = 0xC3 +0x0624 = 0xC4 +0x0625 = 0xC5 +0x0626 = 0xC6 +0x0627 = 0xC7 +0x0628 = 0xC8 +0x0629 = 0xC9 +0x062A = 0xCA +0x062B = 0xCB +0x062C = 0xCC +0x062D = 0xCD +0x062E = 0xCE +0x062F = 0xCF +0x0630 = 0xD0 +0x0631 = 0xD1 +0x0632 = 0xD2 +0x0633 = 0xD3 +0x0634 = 0xD4 +0x0635 = 0xD5 +0x0636 = 0xD6 +0x0637 = 0xD7 +0x0638 = 0xD8 +0x0639 = 0xD9 +0x063A = 0xDA +0x0640 = 0xE0 +0x0641 = 0xE1 +0x0642 = 0xE2 +0x0643 = 0xE3 +0x0644 = 0xE4 +0x0645 = 0xE5 +0x0646 = 0xE6 +0x0647 = 0xE7 +0x0648 = 0xE8 +0x0649 = 0xE9 +0x064A = 0xEA +0x064B = 0xEB +0x064C = 0xEC +0x064D = 0xED +0x064E = 0xEE +0x064F = 0xEF +0x0650 = 0xF0 +0x0651 = 0xF1 +0x0652 = 0xF2 +0x0660 = 0xB0 +0x0661 = 0xB1 +0x0662 = 0xB2 +0x0663 = 0xB3 +0x0664 = 0xB4 +0x0665 = 0xB5 +0x0666 = 0xB6 +0x0667 = 0xB7 +0x0668 = 0xB8 +0x0669 = 0xB9 +0x066A = 0xA5 +0x066D = 0xC0 +0x0679 = 0xF4 +0x067E = 0xF3 +0x0686 = 0xF5 +0x0688 = 0xF9 +0x0691 = 0xFA +0x0698 = 0xFE +0x06A4 = 0xF7 +0x06AF = 0xF8 +0x06BA = 0x8B +0x06D2 = 0xFF +0x06D5 = 0xF6 +0x2026 = 0x93 +0x274A = 0xC0 +END_MAP diff --git a/share/i18n/csmapper/APPLE/UCS%CELTIC.src b/share/i18n/csmapper/APPLE/UCS%CELTIC.src new file mode 100644 index 0000000..005fef1 --- /dev/null +++ b/share/i18n/csmapper/APPLE/UCS%CELTIC.src @@ -0,0 +1,248 @@ +# $FreeBSD$ +# $NetBSD: UCS%CELTIC.src,v 1.2 2006/04/08 15:47:39 tnozaki Exp $ + +TYPE ROWCOL +NAME UCS/CELTIC +SRC_ZONE 0x0000-0x2663 +OOB_MODE INVALID +DST_INVALID 0x100 +DST_UNIT_BITS 16 + +BEGIN_MAP +# +# This mapping data is made from the mapping data provided by Unicode, Inc. +# Original notice: +# +#======================================================================= +# File name: CELTIC.TXT +# +# Contents: Map (external version) from Mac OS Celtic +# character set to Unicode 2.1 and later +# +# Contacts: charsets@apple.com, everson@evertype.com +# +# Changes: +# +# c01 2005-Apr-01 First posted version. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Celtic code (in hex as 0xNN) +# Column #2 is the corresponding Unicode (in hex as 0xNNNN) +# Column #3 is a comment containing the Unicode name +# +# The entries are in Mac OS Celtic code order. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Celtic character set uses the standard control characters +# at 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Celtic (partly from Michael Everson): +# ----------------------------------------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# This character set was developed by Michael Everson of Everson +# Typography (everson@evertype.com) and was used for the Irish +# localizations of Mac OS 6.0.8 and 7.1, for the Welsh localization of +# Mac OS 7.1, and for several fonts that can be used on any version of +# Mac OS 7.1 or later. Note that while Apple authorized +# the Irish and Welsh localizations mentioned above, they were not +# systems which shipped with Apple hardware, and were not otherwise +# supported by Apple. Fonts conforming to the Mac OS Celtic character +# set are available from Everson Typography (http://www.evertype.com) +# and MEU Cymru (http://www.meucymru.co.uk). Information about the use +# of this character set is available at +# http://www.evertype.com/celtscript/celtcode.html. +# +# The Mac OS Celtic encoding shares the script code smRoman (0) with +# the standard Mac OS Roman encoding. To determine if the Celtic +# encoding is being used in Mac OS 7-9, you should also check if the +# system region code is 50, verIreland, or 79, verWales. Otherwise, +# you can check for particular fonts that conform to this encoding. +# +# This character set is a variant of standard Mac OS Roman, adding +# capital and small y with acute, grave, and circumflex, and capital +# and small w with acute, grave, circumflex and diaeresis. It has 14 +# code point differences from standard Mac OS Roman (0xDE, 0xDF, 0xE2, +# 0xE3, 0xF6-0xFF). +# +# Before Mac OS 8.5, code point 0xDB was CURRENCY SIGN, and was +# mapped to U+00A4. In Mac OS 8.5 and later versions, code point +# 0xDB is changed to EURO SIGN and maps to U+20AC; the standard +# Apple fonts were updated for Mac OS 8.5 to reflect this. There is +# a "currency sign" variant of the Mac OS Celtic encoding that still +# maps 0xDB to U+00A4; this can be used for older fonts. +# Note: U+20AC is new with Unicode 2.1; for earlier Unicode +# versions, Mac OS Celtic 0xDB may be mapped to private-use +# character U+F8A0. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +################## +0x0000 - 0x007E = 0x00 - +0x00A0 = 0xCA +0x00A1 = 0xC1 +0x00A2 = 0xA2 +0x00A3 = 0xA3 +0x00A5 = 0xB4 +0x00A7 = 0xA4 +0x00A8 = 0xAC +0x00A9 = 0xA9 +0x00AA = 0xBB +0x00AB = 0xC7 +0x00AC = 0xC2 +0x00AE = 0xA8 +0x00B0 = 0xA1 +0x00B1 = 0xB1 +0x00B4 = 0xAB +0x00B5 = 0xB5 +0x00B6 = 0xA6 +0x00B7 = 0xE1 +0x00BA = 0xBC +0x00BB = 0xC8 +0x00BF = 0xC0 +0x00C0 = 0xCB +0x00C1 = 0xE7 +0x00C2 = 0xE5 +0x00C3 = 0xCC +0x00C4 = 0x80 +0x00C5 = 0x81 +0x00C6 = 0xAE +0x00C7 = 0x82 +0x00C8 = 0xE9 +0x00C9 = 0x83 +0x00CA = 0xE6 +0x00CB = 0xE8 +0x00CC = 0xED +0x00CD = 0xEA +0x00CE = 0xEB +0x00CF = 0xEC +0x00D1 = 0x84 +0x00D2 = 0xF1 +0x00D3 = 0xEE +0x00D4 = 0xEF +0x00D5 = 0xCD +0x00D6 = 0x85 +0x00D8 = 0xAF +0x00D9 = 0xF4 +0x00DA = 0xF2 +0x00DB = 0xF3 +0x00DC = 0x86 +0x00DD = 0xF6 +0x00DF = 0xA7 +0x00E0 = 0x88 +0x00E1 = 0x87 +0x00E2 = 0x89 +0x00E3 = 0x8B +0x00E4 = 0x8A +0x00E5 = 0x8C +0x00E6 = 0xBE +0x00E7 = 0x8D +0x00E8 = 0x8F +0x00E9 = 0x8E +0x00EA = 0x90 +0x00EB = 0x91 +0x00EC = 0x93 +0x00ED = 0x92 +0x00EE = 0x94 +0x00EF = 0x95 +0x00F1 = 0x96 +0x00F2 = 0x98 +0x00F3 = 0x97 +0x00F4 = 0x99 +0x00F5 = 0x9B +0x00F6 = 0x9A +0x00F7 = 0xD6 +0x00F8 = 0xBF +0x00F9 = 0x9D +0x00FA = 0x9C +0x00FB = 0x9E +0x00FC = 0x9F +0x00FD = 0xF7 +0x00FF = 0xD8 +0x0131 = 0xF5 +0x0152 = 0xCE +0x0153 = 0xCF +0x0174 = 0xF8 +0x0175 = 0xF9 +0x0176 = 0xDE +0x0177 = 0xDF +0x0178 = 0xD9 +0x0192 = 0xC4 +0x03A9 = 0xBD +0x03C0 = 0xB9 +0x1E80 = 0xFC +0x1E81 = 0xFD +0x1E82 = 0xFE +0x1E83 = 0xFF +0x1E84 = 0xFA +0x1E85 = 0xFB +0x1EF2 = 0xE2 +0x1EF3 = 0xE3 +0x2013 = 0xD0 +0x2014 = 0xD1 +0x2018 = 0xD4 +0x2019 = 0xD5 +0x201C = 0xD2 +0x201D = 0xD3 +0x2020 = 0xA0 +0x2021 = 0xE0 +0x2022 = 0xA5 +0x2026 = 0xC9 +0x2030 = 0xE4 +0x2039 = 0xDC +0x203A = 0xDD +0x2044 = 0xDA +0x20AC = 0xDB +0x2122 = 0xAA +0x2202 = 0xB6 +0x2206 = 0xC6 +0x220F = 0xB8 +0x2211 = 0xB7 +0x221A = 0xC3 +0x221E = 0xB0 +0x222B = 0xBA +0x2248 = 0xC5 +0x2260 = 0xAD +0x2264 = 0xB2 +0x2265 = 0xB3 +0x25CA = 0xD7 +0x2663 = 0xF0 +END_MAP diff --git a/share/i18n/csmapper/APPLE/UCS%CENTEURO.src b/share/i18n/csmapper/APPLE/UCS%CENTEURO.src new file mode 100644 index 0000000..ba57234 --- /dev/null +++ b/share/i18n/csmapper/APPLE/UCS%CENTEURO.src @@ -0,0 +1,247 @@ +# $FreeBSD$ +# $NetBSD: UCS%CENTEURO.src,v 1.2 2006/04/08 15:47:39 tnozaki Exp $ + +TYPE ROWCOL +NAME UCS/CENTEURO +SRC_ZONE 0x0000-0x25CA +OOB_MODE INVALID +DST_INVALID 0x100 +DST_UNIT_BITS 16 + +BEGIN_MAP +# +# This mapping data is made from the mapping data provided by Unicode, Inc. +# Original notice: +# +#======================================================================= +# File name: CENTEURO.TXT +# +# Contents: Map (external version) from Mac OS Central European +# character set to Unicode 2.1 and later. +# +# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-04 Update header comments. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Update URLs. Matches internal utom<b1>. +# b02 1999-Sep-22 Update contact e-mail address. Matches +# internal utom<b1>, ufrm<b1>, and Text +# Encoding Converter version 1.5. +# n05 1998-Feb-05 Update header comments to new format; no +# mapping changes. Matches internal utom<n3>, +# ufrm<n13>, and Text Encoding Converter +# version 1.3. +# n03 1995-Apr-15 First version (after fixing some typos). +# Matches internal ufrm<n5>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Central European code (in hex as 0xNN) +# Column #2 is the corresponding Unicode (in hex as 0xNNNN) +# Column #3 is a comment containing the Unicode name +# +# The entries are in Mac OS Central European code order. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Central European character set uses the standard control +# characters at 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Central European: +# --------------------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported directly in programming +# interfaces for QuickDraw Text, the Script Manager, and related +# Text Utilities. For other purposes it is supported via transcoding +# to and from Unicode. +# +# This character set is intended to cover the following languages: +# +# Polish, Czech, Slovak, Hungarian, Estonian, Latvian, Lithuanian +# +# These are written in Latin script, but using a different set of +# of accented characters than Mac OS Roman. The Mac OS Central +# European character set also includes a number of characters +# needed for the Mac OS user interface and localization (e.g. +# ellipsis, bullet, copyright sign), several typographic +# punctuation symbols, math symbols, etc. However, it has a +# smaller set of punctuation and symbols than Mac OS Roman. All of +# the characters in Mac OS Central European that are also in the +# Mac OS Roman character set are at the same code point in both +# character sets; this improves application compatibility. +# +# Note: This does not have the same letter repertoire as ISO +# 8859-2 (Latin-2); each has some accented letters that the other +# does not have. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +################## +0x0000 - 0x007F = 0x00 - +0x00A0 = 0xCA +0x00A3 = 0xA3 +0x00A7 = 0xA4 +0x00A8 = 0xAC +0x00A9 = 0xA9 +0x00AB = 0xC7 +0x00AC = 0xC2 +0x00AE = 0xA8 +0x00B0 = 0xA1 +0x00B6 = 0xA6 +0x00BB = 0xC8 +0x00C1 = 0xE7 +0x00C4 = 0x80 +0x00C9 = 0x83 +0x00CD = 0xEA +0x00D3 = 0xEE +0x00D4 = 0xEF +0x00D5 = 0xCD +0x00D6 = 0x85 +0x00DA = 0xF2 +0x00DC = 0x86 +0x00DD = 0xF8 +0x00DF = 0xA7 +0x00E1 = 0x87 +0x00E4 = 0x8A +0x00E9 = 0x8E +0x00ED = 0x92 +0x00F3 = 0x97 +0x00F4 = 0x99 +0x00F5 = 0x9B +0x00F6 = 0x9A +0x00F7 = 0xD6 +0x00FA = 0x9C +0x00FC = 0x9F +0x00FD = 0xF9 +0x0100 = 0x81 +0x0101 = 0x82 +0x0104 = 0x84 +0x0105 = 0x88 +0x0106 = 0x8C +0x0107 = 0x8D +0x010C = 0x89 +0x010D = 0x8B +0x010E = 0x91 +0x010F = 0x93 +0x0112 = 0x94 +0x0113 = 0x95 +0x0116 = 0x96 +0x0117 = 0x98 +0x0118 = 0xA2 +0x0119 = 0xAB +0x011A = 0x9D +0x011B = 0x9E +0x0122 = 0xFE +0x0123 = 0xAE +0x012A = 0xB1 +0x012B = 0xB4 +0x012E = 0xAF +0x012F = 0xB0 +0x0136 = 0xB5 +0x0137 = 0xFA +0x0139 = 0xBD +0x013A = 0xBE +0x013B = 0xB9 +0x013C = 0xBA +0x013D = 0xBB +0x013E = 0xBC +0x0141 = 0xFC +0x0142 = 0xB8 +0x0143 = 0xC1 +0x0144 = 0xC4 +0x0145 = 0xBF +0x0146 = 0xC0 +0x0147 = 0xC5 +0x0148 = 0xCB +0x014C = 0xCF +0x014D = 0xD8 +0x0150 = 0xCC +0x0151 = 0xCE +0x0154 = 0xD9 +0x0155 = 0xDA +0x0156 = 0xDF +0x0157 = 0xE0 +0x0158 = 0xDB +0x0159 = 0xDE +0x015A = 0xE5 +0x015B = 0xE6 +0x0160 = 0xE1 +0x0161 = 0xE4 +0x0164 = 0xE8 +0x0165 = 0xE9 +0x016A = 0xED +0x016B = 0xF0 +0x016E = 0xF1 +0x016F = 0xF3 +0x0170 = 0xF4 +0x0171 = 0xF5 +0x0172 = 0xF6 +0x0173 = 0xF7 +0x0179 = 0x8F +0x017A = 0x90 +0x017B = 0xFB +0x017C = 0xFD +0x017D = 0xEB +0x017E = 0xEC +0x02C7 = 0xFF +0x2013 = 0xD0 +0x2014 = 0xD1 +0x2018 = 0xD4 +0x2019 = 0xD5 +0x201A = 0xE2 +0x201C = 0xD2 +0x201D = 0xD3 +0x201E = 0xE3 +0x2020 = 0xA0 +0x2022 = 0xA5 +0x2026 = 0xC9 +0x2039 = 0xDC +0x203A = 0xDD +0x2122 = 0xAA +0x2202 = 0xB6 +0x2206 = 0xC6 +0x2211 = 0xB7 +0x221A = 0xC3 +0x2260 = 0xAD +0x2264 = 0xB2 +0x2265 = 0xB3 +0x25CA = 0xD7 +END_MAP diff --git a/share/i18n/csmapper/APPLE/UCS%CROATIAN.src b/share/i18n/csmapper/APPLE/UCS%CROATIAN.src new file mode 100644 index 0000000..a46e082 --- /dev/null +++ b/share/i18n/csmapper/APPLE/UCS%CROATIAN.src @@ -0,0 +1,271 @@ +# $FreeBSD$ +# $NetBSD: UCS%CROATIAN.src,v 1.2 2006/04/08 15:47:39 tnozaki Exp $ + +TYPE ROWCOL +NAME UCS/CROATIAN +SRC_ZONE 0x0000-0xF8FF +OOB_MODE INVALID +DST_INVALID 0x100 +DST_UNIT_BITS 16 + +BEGIN_MAP +# +# This mapping data is made from the mapping data provided by Unicode, Inc. +# Original notice: +# +#======================================================================= +# File name: CROATIAN.TXT +# +# Contents: Map (external version) from Mac OS Croatian +# character set to Unicode 2.1 and later. +# +# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-04 Update header comments. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Update URLs, notes. Matches internal +# utom<b3>. +# b02 1999-Sep-22 Encoding changed for Mac OS 8.5; change +# mapping of 0xDB from CURRENCY SIGN to EURO +# SIGN. Update contact e-mail address. Matches +# internal utom<b2>, ufrm<b2>, and Text +# Encoding Converter version 1.5. +# n07 1998-Feb-05 Minor update to header comments +# n05 1997-Dec-14 Update to match internal utom<5>, ufrm<16>: +# Change standard mapping for 0xBD from U+2126 +# to its canonical decomposition, U+03A9. +# n03 1995-Apr-15 First version (after fixing some typos). +# Matches internal ufrm<6>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Croatian code (in hex as 0xNN) +# Column #2 is the corresponding Unicode (in hex as 0xNNNN) +# Column #3 is a comment containing the Unicode name +# +# The entries are in Mac OS Croatian code order. +# +# One of these mappings requires the use of a corporate character. +# See the file "CORPCHAR.TXT" and notes below. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Croatian character set uses the standard control characters +# at 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Croatian: +# ------------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# Mac OS Croatian is used for Croatian and Slovene. +# +# The Mac OS Croatian encoding shares the script code smRoman +# (0) with the standard Mac OS Roman encoding. To determine if +# the Croatian encoding is being used, you must check if the +# system region code is 68, verCroatia (or 25, verYugoCroatian, +# only used in older systems). +# +# This character set is a variant of standard Mac OS Roman +# encoding, adding five accented letter case pairs to handle +# Croatian. It has 20 code point differences from standard +# Mac OS Roman, but only 10 differences in repertoire. +# +# Before Mac OS 8.5, code point 0xDB was CURRENCY SIGN, and was +# mapped to U+00A4. In Mac OS 8.5 and later versions, code point +# 0xDB is changed to EURO SIGN and maps to U+20AC; the standard +# Apple fonts are updated for Mac OS 8.5 to reflect this. There is +# a "currency sign" variant of the Mac OS Croatian encoding that +# still maps 0xDB to U+00A4; this can be used for older fonts. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# The following corporate zone Unicode character is used in this +# mapping: +# +# 0xF8FF Apple logo +# +# NOTE: The graphic image associated with the Apple logo character +# is not authorized for use without permission of Apple, and +# unauthorized use might constitute trademark infringement. +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version n07 to version b02: +# +# - Encoding changed for Mac OS 8.5; change mapping of 0xDB from +# CURRENCY SIGN (U+00A4) to EURO SIGN (U+20AC). +# +# Changes from version n03 to version n05: +# +# - Change mapping of 0xBD from U+2126 to its canonical +# decomposition, U+03A9. +# +################## +0x0000 - 0x007E = 0x00 - +0x00A0 = 0xCA +0x00A1 = 0xC1 +0x00A2 = 0xA2 +0x00A3 = 0xA3 +0x00A7 = 0xA4 +0x00A8 = 0xAC +0x00A9 = 0xD9 +0x00AA = 0xBB +0x00AB = 0xC7 +0x00AC = 0xC2 +0x00AE = 0xA8 +0x00AF = 0xF8 +0x00B0 = 0xA1 +0x00B1 = 0xB1 +0x00B4 = 0xAB +0x00B5 = 0xB5 +0x00B6 = 0xA6 +0x00B7 = 0xE1 +0x00B8 = 0xFC +0x00BA = 0xBC +0x00BB = 0xDF +0x00BF = 0xC0 +0x00C0 = 0xCB +0x00C1 = 0xE7 +0x00C2 = 0xE5 +0x00C3 = 0xCC +0x00C4 = 0x80 +0x00C5 = 0x81 +0x00C6 = 0xDE +0x00C7 = 0x82 +0x00C8 = 0xE9 +0x00C9 = 0x83 +0x00CA = 0xFD +0x00CB = 0xFA +0x00CC = 0xED +0x00CD = 0xEA +0x00CE = 0xEB +0x00CF = 0xEC +0x00D1 = 0x84 +0x00D2 = 0xF1 +0x00D3 = 0xEE +0x00D4 = 0xEF +0x00D5 = 0xCD +0x00D6 = 0x85 +0x00D8 = 0xAF +0x00D9 = 0xF4 +0x00DA = 0xF2 +0x00DB = 0xF3 +0x00DC = 0x86 +0x00DF = 0xA7 +0x00E0 = 0x88 +0x00E1 = 0x87 +0x00E2 = 0x89 +0x00E3 = 0x8B +0x00E4 = 0x8A +0x00E5 = 0x8C +0x00E6 = 0xFE +0x00E7 = 0x8D +0x00E8 = 0x8F +0x00E9 = 0x8E +0x00EA = 0x90 +0x00EB = 0x91 +0x00EC = 0x93 +0x00ED = 0x92 +0x00EE = 0x94 +0x00EF = 0x95 +0x00F1 = 0x96 +0x00F2 = 0x98 +0x00F3 = 0x97 +0x00F4 = 0x99 +0x00F5 = 0x9B +0x00F6 = 0x9A +0x00F7 = 0xD6 +0x00F8 = 0xBF +0x00F9 = 0x9D +0x00FA = 0x9C +0x00FB = 0x9E +0x00FC = 0x9F +0x0106 = 0xC6 +0x0107 = 0xE6 +0x010C = 0xC8 +0x010D = 0xE8 +0x0110 = 0xD0 +0x0111 = 0xF0 +0x0131 = 0xF5 +0x0152 = 0xCE +0x0153 = 0xCF +0x0160 = 0xA9 +0x0161 = 0xB9 +0x017D = 0xAE +0x017E = 0xBE +0x0192 = 0xC4 +0x02C6 = 0xF6 +0x02C7 = 0xFF +0x02DA = 0xFB +0x02DC = 0xF7 +0x03A9 = 0xBD +0x03C0 = 0xF9 +0x2013 = 0xE0 +0x2014 = 0xD1 +0x2018 = 0xD4 +0x2019 = 0xD5 +0x201A = 0xE2 +0x201C = 0xD2 +0x201D = 0xD3 +0x201E = 0xE3 +0x2020 = 0xA0 +0x2022 = 0xA5 +0x2026 = 0xC9 +0x2030 = 0xE4 +0x2039 = 0xDC +0x203A = 0xDD +0x2044 = 0xDA +0x20AC = 0xDB +0x2122 = 0xAA +0x2202 = 0xB6 +0x2206 = 0xB4 +0x220F = 0xB8 +0x2211 = 0xB7 +0x221A = 0xC3 +0x221E = 0xB0 +0x222B = 0xBA +0x2248 = 0xC5 +0x2260 = 0xAD +0x2264 = 0xB2 +0x2265 = 0xB3 +0x25CA = 0xD7 +0xF8FF = 0xD8 +END_MAP diff --git a/share/i18n/csmapper/APPLE/UCS%CYRILLIC.src b/share/i18n/csmapper/APPLE/UCS%CYRILLIC.src new file mode 100644 index 0000000..8451772 --- /dev/null +++ b/share/i18n/csmapper/APPLE/UCS%CYRILLIC.src @@ -0,0 +1,272 @@ +# $FreeBSD$ +# $NetBSD: UCS%CYRILLIC.src,v 1.2 2006/04/08 15:47:39 tnozaki Exp $ + +TYPE ROWCOL +NAME UCS/CYRILLIC +SRC_ZONE 0x0000-0x2265 +OOB_MODE INVALID +DST_INVALID 0x100 +DST_UNIT_BITS 16 + +BEGIN_MAP +# +# This mapping data is made from the mapping data provided by Unicode, Inc. +# Original notice: +# +#======================================================================= +# File name: CYRILLIC.TXT +# +# Contents: Map (external version) from Mac OS Cyrillic +# character set to Unicode 2.1 and later. +# +# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c03 2005-Apr-05 Update header comments. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Update URLs, notes. Matches internal +# utom<b2>. +# b02 1999-Sep-22 Encoding changed for Mac OS 9.0 to merge +# with Mac OS Ukrainian and support EURO SIGN; +# Change mappings for 0xA2, 0xB6, and 0xFF. +# Update contact e-mail address. Matches +# internal utom<b2>, ufrm<b2>, and Text +# Encoding Converter version 1.5. +# n05 1998-Feb-05 Update header comments to new format; no +# mapping changes. Matches internal utom<n3>, +# ufrm<n13>, and Text Encoding Converter +# version 1.3. +# n03 1995-Apr-15 First version (after fixing some typos). +# Matches internal ufrm<n5>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Cyrillic code (in hex as 0xNN) +# Column #2 is the corresponding Unicode (in hex as 0xNNNN) +# Column #3 is a comment containing the Unicode name +# +# The entries are in Mac OS Cyrillic code order. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Cyrillic character set uses the standard control characters +# at 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Cyrillic: +# ------------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported directly in programming +# interfaces for QuickDraw Text, the Script Manager, and related +# Text Utilities. For other purposes it is supported via transcoding +# to and from Unicode. +# +# This is the "Euro sign" version of Mac Cyrillic for Mac OS 9.0 and +# later. Before Mac OS 9.0, there were two separate Slavic Cyrillic +# encodings: +# +# 1. The Cyrillic currency sign variant (used for localized Russian +# and Bulgarian systems), which had the following: +# 0xA2 U+00A2 CENT SIGN +# 0xB6 U+2202 PARTIAL DIFFERENTIAL +# 0xFF U+00A4 CURRENCY SIGN +# +# 2. The Ukrainian currency sign variant (used for localized Ukrainian +# systems and the pre-9.0 Cyrillic Language Kit), which had the +# following: +# 0xA2 U+0490 CYRILLIC CAPITAL LETTER GHE WITH UPTURN +# 0xB6 U+0491 CYRILLIC SMALL LETTER GHE WITH UPTURN +# 0xFF U+00A4 CURRENCY SIGN +# +# This new Cyrillic Euro sign version is based on the old Ukrainian +# currency sign variant, with 0xFF changed to be EURO SIGN. +# +# The Mac OS Cyrillic encoding includes the Cyrillic letter repertoire +# of ISO 8859-5 (although not at the same code points). This covers +# most of the Slavic languages written in Cyrillic script. +# +# The Mac OS Cyrillic encoding also includes a number of characters +# needed for the Mac OS user interface and localization (e.g. +# ellipsis, bullet, copyright sign). All of the characters in Mac OS +# Cyrillic that are also in the Mac OS Roman encoding are at the +# same code point in both; this improves application compatibility. +# +# Note: There is a common Ukrainian glyph variation in which the glyph +# for CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I may or may not +# have a dot above. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version n05 to version b02: +# +# - Encoding changed for Mac OS 9.0 to merge with Mac OS Ukrainian and +# support EURO SIGN. 0xA2 changed from U+00A2 to U+0490; 0xB6 changed +# from U+2202 to U+0491; 0xFF changed from U+00A4 to U+20AC. +# +################## +0x0000 - 0x007E = 0x0000 - +0x00A0 = 0xCA +0x00A3 = 0xA3 +0x00A7 = 0xA4 +0x00A9 = 0xA9 +0x00AB = 0xC7 +0x00AC = 0xC2 +0x00AE = 0xA8 +0x00B0 = 0xA1 +0x00B1 = 0xB1 +0x00B5 = 0xB5 +0x00B6 = 0xA6 +0x00BB = 0xC8 +0x00F7 = 0xD6 +0x0192 = 0xC4 +0x0401 = 0xDD +0x0402 = 0xAB +0x0403 = 0xAE +0x0404 = 0xB8 +0x0405 = 0xC1 +0x0406 = 0xA7 +0x0407 = 0xBA +0x0408 = 0xB7 +0x0409 = 0xBC +0x040A = 0xBE +0x040B = 0xCB +0x040C = 0xCD +0x040E = 0xD8 +0x040F = 0xDA +0x0410 = 0x80 +0x0411 = 0x81 +0x0412 = 0x82 +0x0413 = 0x83 +0x0414 = 0x84 +0x0415 = 0x85 +0x0416 = 0x86 +0x0417 = 0x87 +0x0418 = 0x88 +0x0419 = 0x89 +0x041A = 0x8A +0x041B = 0x8B +0x041C = 0x8C +0x041D = 0x8D +0x041E = 0x8E +0x041F = 0x8F +0x0420 = 0x90 +0x0421 = 0x91 +0x0422 = 0x92 +0x0423 = 0x93 +0x0424 = 0x94 +0x0425 = 0x95 +0x0426 = 0x96 +0x0427 = 0x97 +0x0428 = 0x98 +0x0429 = 0x99 +0x042A = 0x9A +0x042B = 0x9B +0x042C = 0x9C +0x042D = 0x9D +0x042E = 0x9E +0x042F = 0x9F +0x0430 = 0xE0 +0x0431 = 0xE1 +0x0432 = 0xE2 +0x0433 = 0xE3 +0x0434 = 0xE4 +0x0435 = 0xE5 +0x0436 = 0xE6 +0x0437 = 0xE7 +0x0438 = 0xE8 +0x0439 = 0xE9 +0x043A = 0xEA +0x043B = 0xEB +0x043C = 0xEC +0x043D = 0xED +0x043E = 0xEE +0x043F = 0xEF +0x0440 = 0xF0 +0x0441 = 0xF1 +0x0442 = 0xF2 +0x0443 = 0xF3 +0x0444 = 0xF4 +0x0445 = 0xF5 +0x0446 = 0xF6 +0x0447 = 0xF7 +0x0448 = 0xF8 +0x0449 = 0xF9 +0x044A = 0xFA +0x044B = 0xFB +0x044C = 0xFC +0x044D = 0xFD +0x044E = 0xFE +0x044F = 0xDF +0x0451 = 0xDE +0x0452 = 0xAC +0x0453 = 0xAF +0x0454 = 0xB9 +0x0455 = 0xCF +0x0456 = 0xB4 +0x0457 = 0xBB +0x0458 = 0xC0 +0x0459 = 0xBD +0x045A = 0xBF +0x045B = 0xCC +0x045C = 0xCE +0x045E = 0xD9 +0x045F = 0xDB +0x0490 = 0xA2 +0x0491 = 0xB6 +0x2013 = 0xD0 +0x2014 = 0xD1 +0x2018 = 0xD4 +0x2019 = 0xD5 +0x201C = 0xD2 +0x201D = 0xD3 +0x201E = 0xD7 +0x2020 = 0xA0 +0x2022 = 0xA5 +0x2026 = 0xC9 +0x20AC = 0xFF +0x2116 = 0xDC +0x2122 = 0xAA +0x2206 = 0xC6 +0x221A = 0xC3 +0x221E = 0xB0 +0x2248 = 0xC5 +0x2260 = 0xAD +0x2264 = 0xB2 +0x2265 = 0xB3 +END_MAP diff --git a/share/i18n/csmapper/APPLE/UCS%DEVANAGA.src b/share/i18n/csmapper/APPLE/UCS%DEVANAGA.src new file mode 100644 index 0000000..d1af80b --- /dev/null +++ b/share/i18n/csmapper/APPLE/UCS%DEVANAGA.src @@ -0,0 +1,359 @@ +# $FreeBSD$ + +TYPE ROWCOL +NAME UCS/DEVANAGA +SRC_ZONE 0x0000-0x2212 +OOB_MODE INVALID +DST_INVALID 0x100 +DST_UNIT_BITS 16 +#======================================================================= +# File name: DEVANAGA.TXT +# +# Contents: Map (external version) from Mac OS Devanagari +# encoding to Unicode 2.1 and later. +# +# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Update header comments; add section on +# roundtrip considerations. Matches internal +# xml <c1.1> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Update URLs. Matches internal utom<b1>. +# b02 1999-Sep-22 Update contact e-mail address. Matches +# internal utom<b1>, ufrm<b1>, and Text +# Encoding Converter version 1.5. +# n04 1998-Feb-05 First version; matches internal utom<n9>, +# ufrm<n15>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Devanagari code or code sequence +# (in hex as 0xNN or 0xNN+0xNN) +# Column #2 is the corresponding Unicode or Unicode sequence +# (in hex as 0xNNNN or 0xNNNN+0xNNNN). +# Column #3 is a comment containing the Unicode name or sequence +# of names. In some cases an additional comment follows the +# Unicode name(s). +# +# The entries are in two sections. The first section is for pairs of +# Mac OS Devanagari code points that must be mapped in a special way. +# The second section maps individual code points. +# +# Within each section, the entries are in Mac OS Devanagari code order. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Devanagari character set uses the standard control characters +# at 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Devanagari: +# --------------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# Mac OS Devanagari is based on IS 13194:1991 (ISCII-91), with the +# addition of several punctuation and symbol characters. However, +# Mac OS Devanagari does not support the ATR (attribute) mechanism of +# ISCII-91. +# +# 1. ISCII-91 features in Mac OS Devanagari include: +# +# a) Overloading of nukta +# +# In addition to using the nukta (0xE9) like a combining dot below, +# nukta is overloaded to function as a general character modifier. +# In this role, certain code points followed by 0xE9 are treated as +# a two-byte code point representing a character which may be +# rather different than the characters represented by either of +# the code points alone. For example, the character DEVANAGARI OM +# (U+0950) is represented in ISCII-91 as candrabindu + nukta. +# +# b) Explicit halant and soft halant +# +# A double halant (0xE8 + 0xE8) constitutes an "explicit halant", +# which will always appear as a halant instead of causing formation +# of a ligature or half-form consonant. +# +# Halant followed by nukta (0xE8 + 0xE9) constitutes a "soft +# halant", which prevents formation of a ligature and instead +# retains the half-form of the first consonant. +# +# c) Invisible consonant +# +# The byte 0xD9 (called INV in ISCII-91) is an invisible consonant: +# It behaves like a consonant but has no visible appearance. It is +# intended to be used (often in combination with halant) to display +# dependent forms in isolation, such as the RA forms or consonant +# half-forms. +# +# d) Extensions for Vedic, etc. +# +# The byte 0xF0 (called EXT in ISCII-91) followed by any byte in +# the range 0xA1-0xEE constitutes a two-byte code point which can +# be used to represent additional characters for Vedic (or other +# extensions); 0xF0 followed by any other byte value constitutes +# malformed text. Mac OS Devanagari supports this mechanism, but +# does not currently map any of these two-byte code points to +# anything. +# +# 2. Mac OS Devanagari additions +# +# Mac OS Devanagari adds characters using the code points +# 0x80-0x8A and 0x90-0x91 (the latter are some Devanagari additions +# from Unicode). +# +# 3. Unused code points +# +# The following code points are currently unused, and are not shown +# here: 0x8B-0x8F, 0x92-0xA0, 0xEB-0xEF, 0xFB-0xFF. In addition, +# 0xF0 is not shown here, but it has a special function as described +# above. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# 1. Mapping the byte pairs +# +# If one of the following byte values is encountered when mapping +# Mac OS Devanagari text - 0xA1, 0xA6, 0xA7, 0xAA, 0xDB, 0xDC, 0xDF, +# 0xE8, or 0xEA - then the next byte (if there is one) should be +# examined. If the next byte is 0xE9 - or also 0xE8, if the first +# byte was 0xE8 - then the byte pair should be mapped using the +# first section of the mapping table below. Otherwise, each byte +# should be mapped using the second section of the mapping table +# below. +# +# - The Unicode Standard, Version 2.0, specifies how explicit +# halant and soft halant should be represented in Unicode; +# these mappings are used below. +# +# If the byte value 0xF0 is encountered when mapping Mac OS +# Devanagari text, then the next byte should be examined. If there +# is no next byte (e.g. 0xF0 at end of buffer), the mapping +# process should indicate incomplete character. If there is a next +# byte but it is not in the range 0xA1-0xEE, the mapping process +# should indicate malformed text. Otherwise, the mapping process +# should treat the byte pair as a valid two-byte code point with no +# mapping (e.g. map it to QUESTION MARK, REPLACEMENT CHARACTER, +# etc.). +# +# 2. Mapping the invisible consonant +# +# It has been suggested that INV in ISCII-91 should map to ZERO +# WIDTH NON-JOINER in Unicode. However, this causes problems with +# roundtrip fidelity: The ISCII-91 sequences 0xE8+0xE8 and 0xE8+0xD9 +# would map to the same sequence of Unicode characters. We have +# instead mapped INV to LEFT-TO-RIGHT MARK, which avoids these +# problems. +# +# 3. Additional loose mappings from Unicode +# +# These are not preserved in roundtrip mappings. +# +# U+0958 0xB3+0xE9 # DEVANAGARI LETTER QA +# U+0959 0xB4+0xE9 # DEVANAGARI LETTER KHHA +# U+095A 0xB5+0xE9 # DEVANAGARI LETTER GHHA +# U+095B 0xBA+0xE9 # DEVANAGARI LETTER ZA +# U+095C 0xBF+0xE9 # DEVANAGARI LETTER DDDHA +# U+095D 0xC0+0xE9 # DEVANAGARI LETTER RHA +# U+095E 0xC9+0xE9 # DEVANAGARI LETTER FA +# +# 4. Roundtrip considerations when mapping to decomposed Unicode +# +# Both ISCII-91 (hence Mac OS Devanagari) and Unicode provide multiple +# ways of representing certain Devanagari consonants. For example, +# DEVANAGARI LETTER NNNA can be represented in Unicode as the single +# character 0x0929 or as the sequence 0x0928 0x093C; similarly, this +# consonant can be represented in Mac OS Devanagari as 0xC7 or as the +# sequence 0xC6 0xE9. This leads to some roundtrip problems. First +# note that we have the following mappings without such problems: +# +# ISCII/ standard decomposition of reverse mapping +# Mac OS Unicode mapping standard mapping of decomposition +# ------ ----------------------- ---------------- ---------------- +# 0xC6 0x0928 ... LETTER NA 0x0928 (same) 0xC6 +# 0xCD 0x092F ... LETTER YA 0x092F (same) 0xCD +# 0xCF 0x0930 ... LETTER RA 0x0930 (same) 0xCF +# 0xD2 0x0933 ... LETTER LLA 0x0933 (same) 0xD2 +# 0xE9 0x093C ... SIGN NUKTA 0x093C (same) 0xE9 +# +# However, those mappings above cause roundtrip problems for the +# the following mappings if they are decomposed: +# +# ISCII/ standard decomposition of reverse mapping +# Mac OS Unicode mapping standard mapping of decomposition +# ------ ----------------------- ---------------- ---------------- +# 0xC7 0x0929 ... LETTER NNNA 0x0928 0x093C 0xC6 0xE9 +# 0xCE 0x095F ... LETTER YYA 0x092F 0x093C 0xCD 0xE9 +# 0xD0 0x0931 ... LETTER RRA 0x0930 0x093C 0xCF 0xE9 +# 0xD3 0x0934 ... LETTER LLLA 0x0933 0x093C 0xD2 0xE9 +# +# One solution is to use a grouping transcoding hint with the four +# decompositions above to mark the decomposed sequence for special +# treatment in transcoding. This yields the following mappings to +# decomposed Unicode: +# +# ISCII/ decomposed +# Mac OS Unicode mapping +# ------ ---------------- +# 0xC7 0xF860 0x0928 0x093C +# 0xCE 0xF860 0x092F 0x093C +# 0xD0 0xF860 0x0930 0x093C +# 0xD3 0xF860 0x0933 0x093C +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +################## +# Section 1: Map the following byte pairs as indicated: +# (ZWNJ means ZERO WIDTH NON-JOINER, ZWJ means ZERO WIDTH JOINER) +# (Also see note about 0xF0 in comments above) +# Section 2: Map the remaining bytes as follows: +# +# +# +# +BEGIN_MAP +0x0000 - 0x007F = 0x00 - +0x00A9 = 0x88 +0x00AE = 0x89 +0x00D7 = 0x80 +0x0901 = 0xA1 +0x0902 = 0xA2 +0x0903 = 0xA3 +0x0905 = 0xA4 +0x0906 = 0xA5 +0x0907 = 0xA6 +0x0908 = 0xA7 +0x0909 = 0xA8 +0x090A = 0xA9 +0x090B = 0xAA +#0x090C = 0xA6+0xE9 +0x090D = 0xAE +0x090E = 0xAB +0x090F = 0xAC +0x0910 = 0xAD +0x0911 = 0xB2 +0x0912 = 0xAF +0x0913 = 0xB0 +0x0914 = 0xB1 +0x0915 = 0xB3 +0x0916 = 0xB4 +0x0917 = 0xB5 +0x0918 = 0xB6 +0x0919 = 0xB7 +0x091A = 0xB8 +0x091B = 0xB9 +0x091C = 0xBA +0x091D = 0xBB +0x091E = 0xBC +0x091F = 0xBD +0x0920 = 0xBE +0x0921 = 0xBF +0x0922 = 0xC0 +0x0923 = 0xC1 +0x0924 = 0xC2 +0x0925 = 0xC3 +0x0926 = 0xC4 +0x0927 = 0xC5 +0x0928 = 0xC6 +0x0929 = 0xC7 +0x092A = 0xC8 +0x092B = 0xC9 +0x092C = 0xCA +0x092D = 0xCB +0x092E = 0xCC +0x092F = 0xCD +0x0930 = 0xCF +0x0931 = 0xD0 +0x0932 = 0xD1 +0x0933 = 0xD2 +0x0934 = 0xD3 +0x0935 = 0xD4 +0x0936 = 0xD5 +0x0937 = 0xD6 +0x0938 = 0xD7 +0x0939 = 0xD8 +0x093C = 0xE9 +#0x093D = 0xEA+0xE9 +0x093E = 0xDA +0x093F = 0xDB +0x0940 = 0xDC +0x0941 = 0xDD +0x0942 = 0xDE +0x0943 = 0xDF +#0x0944 = 0xDF+0xE9 +0x0945 = 0xE3 +0x0946 = 0xE0 +0x0947 = 0xE1 +0x0948 = 0xE2 +0x0949 = 0xE7 +0x094A = 0xE4 +0x094B = 0xE5 +0x094C = 0xE6 +0x094D = 0xE8 +#0x094D+0x200C = 0xE8+0xE8 +#0x094D+0x200D = 0xE8+0xE9 +#0x0950 = 0xA1+0xE9 +0x095F = 0xCE +#0x0960 = 0xAA+0xE9 +#0x0961 = 0xA7+0xE9 +#0x0962 = 0xDB+0xE9 +#0x0963 = 0xDC+0xE9 +0x0964 = 0xEA +0x0965 = 0x90 +0x0966 = 0xF1 +0x0967 = 0xF2 +0x0968 = 0xF3 +0x0969 = 0xF4 +0x096A = 0xF5 +0x096B = 0xF6 +0x096C = 0xF7 +0x096D = 0xF8 +0x096E = 0xF9 +0x096F = 0xFA +0x0970 = 0x91 +0x200E = 0xD9 +0x2013 = 0x82 +0x2014 = 0x83 +0x2018 = 0x84 +0x2019 = 0x85 +0x2022 = 0x87 +0x2026 = 0x86 +0x2122 = 0x8A +0x2212 = 0x81 +END_MAP diff --git a/share/i18n/csmapper/APPLE/UCS%DINGBATS.src b/share/i18n/csmapper/APPLE/UCS%DINGBATS.src new file mode 100644 index 0000000..75ceb25 --- /dev/null +++ b/share/i18n/csmapper/APPLE/UCS%DINGBATS.src @@ -0,0 +1,341 @@ +# $FreeBSD$ +# $NetBSD: UCS%DINGBATS.src,v 1.2 2006/04/08 15:47:39 tnozaki Exp $ + +TYPE ROWCOL +NAME UCS/DINGBATS +SRC_ZONE 0x0000-0x27BE +OOB_MODE INVALID +DST_INVALID 0x100 +DST_UNIT_BITS 16 + +BEGIN_MAP +# +# This mapping data is made from the mapping data provided by Unicode, Inc. +# Original notice: +# +#======================================================================= +# File name: DINGBATS.TXT +# +# Contents: Map (external version) from Mac OS Dingbats +# character set to Unicode 3.2 and later. +# +# Copyright: (c) 1994-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Update header comments. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Update mappings for 0x80-0x8D to use new +# Unicode 3.2 characters. Update URLs, notes. +# Matches internal utom<b2>. +# b02 1999-Sep-22 Update contact e-mail address. Matches +# internal utom<b1>, ufrm<b1>, and Text +# Encoding Converter version 1.5. +# n05 1998-Feb-05 Update to match internal utom<n4>, ufrm<n14>, +# and Text Encoding Converter version 1.3: +# Change all mappings to single corporate-zone +# Unicodes to either use standard Unicodes +# or standard Unicodes plus transcoding hints; +# see details below. Also update header +# comments to new format. +# n03 1995-Apr-15 First version (after fixing some typos). +# Matches internal ufrm<n4>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Dingbats code (in hex as 0xNN) +# Column #2 is the corresponding Unicode or Unicode sequence +# (in hex as 0xNNNN). +# Column #3 is a comment containing the Unicode name. +# In some cases an additional comment follows the Unicode name. +# +# The entries are in Mac OS Dingbats code order. +# +# Some of these mappings require the use of corporate characters. +# See the file "CORPCHAR.TXT" and notes below. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Dingbats character set uses the standard control characters +# at 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Dingbats: +# ------------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported directly in programming +# interfaces for QuickDraw Text, the Script Manager, and related +# Text Utilities. For other purposes it is supported via transcoding +# to and from Unicode. +# +# The Mac OS Dingbats encoding shares the script code smRoman +# (0) with the standard Mac OS Roman encoding. To determine if +# the Dingbats encoding is being used, you must check if the +# font name is "Zapf Dingbats". +# +# The layout of the Dingbats character set is identical to or +# a superset of the layout of the Adobe Zapf Dingbats encoding +# vector. +# +# The following code points are unused, and are not shown here: +# 0x8E-0xA0, 0xF0, 0xFF. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version b02 to version b03/c01: +# +# - The mappings for the following Mac OS Dingbats characters +# were changed to use standard Unicode characters added for +# Unicode 3.2: 0x80-0x8D. +# +# Changes from version n03 to version n05: +# +# - The mappings for the following Mac OS Dingbats characters +# were changed from single corporate-zone Unicode characters +# to standard Unicode characters: +# 0x80-0x81, 0x84-0x87, 0x8A-0x8D. +# +# - The mappings for the following Mac OS Dingbats characters +# were changed from single corporate-zone Unicode characters +# to combinations of a standard Unicode and a transcoding hint: +# 0x82-0x83, 0x88-0x89. +# +################## +0x0000 - 0x0020 = 0x00 - +0x2192 = 0xD5 +0x2194 = 0xD6 +0x2195 = 0xD7 +0x2460 = 0xAC +0x2461 = 0xAD +0x2462 = 0xAE +0x2463 = 0xAF +0x2464 = 0xB0 +0x2465 = 0xB1 +0x2466 = 0xB2 +0x2467 = 0xB3 +0x2468 = 0xB4 +0x2469 = 0xB5 +0x25A0 = 0x6E +0x25B2 = 0x73 +0x25BC = 0x74 +0x25C6 = 0x75 +0x25CF = 0x6C +0x25D7 = 0x77 +0x2605 = 0x48 +0x260E = 0x25 +0x261B = 0x2A +0x261E = 0x2B +0x2660 = 0xAB +0x2663 = 0xA8 +0x2665 = 0xAA +0x2666 = 0xA9 +0x2701 = 0x21 +0x2702 = 0x22 +0x2703 = 0x23 +0x2704 = 0x24 +0x2706 = 0x26 +0x2707 = 0x27 +0x2708 = 0x28 +0x2709 = 0x29 +0x270C = 0x2C +0x270D = 0x2D +0x270E = 0x2E +0x270F = 0x2F +0x2710 = 0x30 +0x2711 = 0x31 +0x2712 = 0x32 +0x2713 = 0x33 +0x2714 = 0x34 +0x2715 = 0x35 +0x2716 = 0x36 +0x2717 = 0x37 +0x2718 = 0x38 +0x2719 = 0x39 +0x271A = 0x3A +0x271B = 0x3B +0x271C = 0x3C +0x271D = 0x3D +0x271E = 0x3E +0x271F = 0x3F +0x2720 = 0x40 +0x2721 = 0x41 +0x2722 = 0x42 +0x2723 = 0x43 +0x2724 = 0x44 +0x2725 = 0x45 +0x2726 = 0x46 +0x2727 = 0x47 +0x2729 = 0x49 +0x272A = 0x4A +0x272B = 0x4B +0x272C = 0x4C +0x272D = 0x4D +0x272E = 0x4E +0x272F = 0x4F +0x2730 = 0x50 +0x2731 = 0x51 +0x2732 = 0x52 +0x2733 = 0x53 +0x2734 = 0x54 +0x2735 = 0x55 +0x2736 = 0x56 +0x2737 = 0x57 +0x2738 = 0x58 +0x2739 = 0x59 +0x273A = 0x5A +0x273B = 0x5B +0x273C = 0x5C +0x273D = 0x5D +0x273E = 0x5E +0x273F = 0x5F +0x2740 = 0x60 +0x2741 = 0x61 +0x2742 = 0x62 +0x2743 = 0x63 +0x2744 = 0x64 +0x2745 = 0x65 +0x2746 = 0x66 +0x2747 = 0x67 +0x2748 = 0x68 +0x2749 = 0x69 +0x274A = 0x6A +0x274B = 0x6B +0x274D = 0x6D +0x274F = 0x6F +0x2750 = 0x70 +0x2751 = 0x71 +0x2752 = 0x72 +0x2756 = 0x76 +0x2758 = 0x78 +0x2759 = 0x79 +0x275A = 0x7A +0x275B = 0x7B +0x275C = 0x7C +0x275D = 0x7D +0x275E = 0x7E +0x2761 = 0xA1 +0x2762 = 0xA2 +0x2763 = 0xA3 +0x2764 = 0xA4 +0x2765 = 0xA5 +0x2766 = 0xA6 +0x2767 = 0xA7 +0x2768 = 0x80 +0x2769 = 0x81 +0x276A = 0x82 +0x276B = 0x83 +0x276C = 0x84 +0x276D = 0x85 +0x276E = 0x86 +0x276F = 0x87 +0x2770 = 0x88 +0x2771 = 0x89 +0x2772 = 0x8A +0x2773 = 0x8B +0x2774 = 0x8C +0x2775 = 0x8D +0x2776 = 0xB6 +0x2777 = 0xB7 +0x2778 = 0xB8 +0x2779 = 0xB9 +0x277A = 0xBA +0x277B = 0xBB +0x277C = 0xBC +0x277D = 0xBD +0x277E = 0xBE +0x277F = 0xBF +0x2780 = 0xC0 +0x2781 = 0xC1 +0x2782 = 0xC2 +0x2783 = 0xC3 +0x2784 = 0xC4 +0x2785 = 0xC5 +0x2786 = 0xC6 +0x2787 = 0xC7 +0x2788 = 0xC8 +0x2789 = 0xC9 +0x278A = 0xCA +0x278B = 0xCB +0x278C = 0xCC +0x278D = 0xCD +0x278E = 0xCE +0x278F = 0xCF +0x2790 = 0xD0 +0x2791 = 0xD1 +0x2792 = 0xD2 +0x2793 = 0xD3 +0x2794 = 0xD4 +0x2798 = 0xD8 +0x2799 = 0xD9 +0x279A = 0xDA +0x279B = 0xDB +0x279C = 0xDC +0x279D = 0xDD +0x279E = 0xDE +0x279F = 0xDF +0x27A0 = 0xE0 +0x27A1 = 0xE1 +0x27A2 = 0xE2 +0x27A3 = 0xE3 +0x27A4 = 0xE4 +0x27A5 = 0xE5 +0x27A6 = 0xE6 +0x27A7 = 0xE7 +0x27A8 = 0xE8 +0x27A9 = 0xE9 +0x27AA = 0xEA +0x27AB = 0xEB +0x27AC = 0xEC +0x27AD = 0xED +0x27AE = 0xEE +0x27AF = 0xEF +0x27B1 = 0xF1 +0x27B2 = 0xF2 +0x27B3 = 0xF3 +0x27B4 = 0xF4 +0x27B5 = 0xF5 +0x27B6 = 0xF6 +0x27B7 = 0xF7 +0x27B8 = 0xF8 +0x27B9 = 0xF9 +0x27BA = 0xFA +0x27BB = 0xFB +0x27BC = 0xFC +0x27BD = 0xFD +0x27BE = 0xFE +END_MAP diff --git a/share/i18n/csmapper/APPLE/UCS%FARSI.src b/share/i18n/csmapper/APPLE/UCS%FARSI.src new file mode 100644 index 0000000..95f5d06 --- /dev/null +++ b/share/i18n/csmapper/APPLE/UCS%FARSI.src @@ -0,0 +1,411 @@ +# $FreeBSD$ + +TYPE ROWCOL +NAME UCS/FARSI +SRC_ZONE 0x0000-0xF8FF +OOB_MODE INVALID +DST_INVALID 0x100 +DST_UNIT_BITS 16 + +BEGIN_MAP +#======================================================================= +# File name: FARSI.TXT +# +# Contents: Map (external version) from Mac OS Farsi +# character set to Unicode 2.1 and later. +# +# Copyright: (c) 1997-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Update header comments. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Add comments about character display and +# direction overrides. Update URLs, notes. +# Matches internal utom<b3>. +# b02 1999-Sep-22 Update contact e-mail address. Matches +# internal utom<b1>, ufrm<b1>, and Text +# Encoding Converter version 1.5. +# n04 1998-Feb-05 Show required Unicode character +# directionality in a different way. Matches +# internal utom<n3>, ufrm<n9>, and Text +# Encoding Converter version 1.3. Update +# header comments; include information on +# loose mapping of digits, and changes to +# mapping for the TrueType variant. +# n01 1997-Jul-17 First version. Matches internal utom<n1>, +# ufrm<n2>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Farsi code (in hex as 0xNN) +# Column #2 is the corresponding Unicode (in hex as 0xNNNN), +# possibly preceded by a tag indicating required directionality +# (i.e. <LR>+0xNNNN or <RL>+0xNNNN). +# Column #3 is a comment containing the Unicode name. +# +# The entries are in Mac OS Farsi code order. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Farsi character set uses the standard control characters at +# 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Farsi: +# ---------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# 1. General +# +# The Mac OS Farsi character set is based on the Mac OS Arabic +# character set. The main difference is in the right-to-left digits +# 0xB0-0xB9: For Mac OS Arabic these correspond to right-left +# versions of the Unicode ARABIC-INDIC DIGITs 0660-0669; for +# Mac OS Farsi these correspond to right-left versions of the +# Unicode EXTENDED ARABIC-INDIC DIGITs 06F0-06F9. The other +# difference is in the nature of the font variants. +# +# For more information, see the comments in the mapping table for +# Mac OS Arabic. +# +# Mac OS Farsi characters 0xEB-0xF2 are non-spacing/combining marks. +# +# 2. Directional characters and roundtrip fidelity +# +# The Mac OS Arabic character set (on which Mac OS Farsi is based) +# was developed in 1986-1987. At that time the bidirectional line +# layout algorithm used in the Mac OS Arabic system was fairly simple; +# it used only a few direction classes (instead of the 19 now used in +# the Unicode bidirectional algorithm). In order to permit users to +# handle some tricky layout problems, certain punctuation and symbol +# characters were encoded twice, one with a left-right direction +# attribute and the other with a right-left direction attribute. This +# is the case in Mac OS Farsi too. +# +# For example, plus sign is encoded at 0x2B with a left-right +# attribute, and at 0xAB with a right-left attribute. However, there +# is only one PLUS SIGN character in Unicode. This leads to some +# interesting problems when mapping between Mac OS Farsi and Unicode; +# see below. +# +# A related problem is that even when a particular character is +# encoded only once in Mac OS Farsi, it may have a different +# direction attribute than the corresponding Unicode character. +# +# For example, the Mac OS Farsi character at 0x93 is HORIZONTAL +# ELLIPSIS with strong right-left direction. However, the Unicode +# character HORIZONTAL ELLIPSIS has direction class neutral. +# +# 3. Behavior of ASCII-range numbers in WorldScript +# +# Mac OS Farsi also has two sets of digit codes. + +# The digits at 0x30-0x39 may be displayed using either European +# digit forms or Persian digit forms, depending on context. If there +# is a "strong European" character such as a Latin letter on either +# side of a sequence consisting of digits 0x30-0x39 and possibly comma +# 0x2C or period 0x2E, then the characters will be displayed using +# European forms (This will happen even if there are neutral characters +# between the digits and the strong European character). Otherwise, the +# digits will be displayed using Persian forms, the comma will be +# displayed as Arabic thousands separator, and the period as Arabic +# decimal separator. In any case, 0x2C, 0x2E, and 0x30-0x39 are always +# left-right. +# +# The digits at 0xB0-0xB9 are always displayed using Persian digit +# shapes, and moreover, these digits always have strong right-left +# directionality. These are mainly intended for special layout +# purposes such as part numbers, etc. +# +# 4. Font variants +# +# The table in this file gives the Unicode mappings for the standard +# Mac OS Farsi encoding. This encoding is supported by the Tehran font +# (the system font for Farsi), and is the encoding supported by the +# text processing utilities. However, the other Farsi fonts actually +# implement a somewhat different encoding; this affects nine code +# points including 0xAA and 0xC0 (which are also affected by font +# variants in Mac OS Arabic). For these nine code points the standard +# Mac OS Farsi encoding has the following mappings: +# 0x8B -> 0x06BA ARABIC LETTER NOON GHUNNA (Urdu) +# 0xA4 -> <RL>+0x0024 DOLLAR SIGN, right-left +# 0xAA -> <RL>+0x002A ASTERISK, right-left +# 0xC0 -> <RL>+0x274A EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, +# right-left +# 0xF4 -> 0x0679 ARABIC LETTER TTEH (Urdu) +# 0xF7 -> 0x06A4 ARABIC LETTER VEH (for transliteration) +# 0xF9 -> 0x0688 ARABIC LETTER DDAL (Urdu) +# 0xFA -> 0x0691 ARABIC LETTER RREH (Urdu) +# 0xFF -> 0x06D2 ARABIC LETTER YEH BARREE (Urdu) +# +# The TrueType variant is used for the Farsi TrueType fonts: Ashfahan, +# Amir, Kamran, Mashad, NadeemFarsi. It differs from the standard +# variant in the following ways: +# 0x8B -> 0xF882 Arabic ligature "peace on him" (corporate char.) +# 0xA4 -> 0xFDFC RIAL SIGN (added in Unicode 3.2) +# 0xAA -> <RL>+0x00D7 MULTIPLICATION SIGN, right-left +# 0xC0 -> <RL>+0x002A ASTERISK, right-left +# 0xF4 -> <RL>+0x00B0 DEGREE SIGN, right-left +# 0xF7 -> 0xFDFA ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM +# 0xF9 -> <RL>+0x25CF BLACK CIRCLE, right-left +# 0xFA -> <RL>+0x25A0 BLACK SQUARE, right-left +# 0xFF -> <RL>+0x25B2 BLACK UP-POINTING TRIANGLE, right-left +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# 1. Matching the direction of Mac OS Farsi characters +# +# When Mac OS Farsi encodes a character twice but with different +# direction attributes for the two code points - as in the case of +# plus sign mentioned above - we need a way to map both Mac OS Farsi +# code points to Unicode and back again without loss of information. +# With the plus sign, for example, mapping one of the Mac OS Farsi +# characters to a code in the Unicode corporate use zone is +# undesirable, since both of the plus sign characters are likely to +# be used in text that is interchanged. +# +# The problem is solved with the use of direction override characters +# and direction-dependent mappings. When mapping from Mac OS Farsi +# to Unicode, we use direction overrides as necessary to force the +# direction of the resulting Unicode characters. +# +# The required direction is indicated by a direction tag in the +# mappings. A tag of <LR> means the corresponding Unicode character +# must have a strong left-right context, and a tag of <RL> indicates +# a right-left context. +# +# For example, the mapping of 0x2B is given as <LR>+0x002B; the +# mapping of 0xAB is given as <RL>+0x002B. If we map an isolated +# instance of 0x2B to Unicode, it should be mapped as follows (LRO +# indicates LEFT-RIGHT OVERRIDE, PDF indicates POP DIRECTION +# FORMATTING): +# +# 0x2B -> 0x202D (LRO) + 0x002B (PLUS SIGN) + 0x202C (PDF) +# +# When mapping several characters in a row that require direction +# forcing, the overrides need only be used at the beginning and end. +# For example: +# +# 0x24 0x20 0x28 0x29 -> 0x202D 0x0024 0x0020 0x0028 0x0029 0x202C +# +# If neutral characters that require direction forcing are already +# between strong-direction characters with matching directionality, +# then direction overrides need not be used. Direction overrides are +# always needed to map the right-left digits at 0xB0-0xB9. +# +# When mapping from Unicode to Mac OS Farsi, the Unicode +# bidirectional algorithm should be used to determine resolved +# direction of the Unicode characters. The mapping from Unicode to +# Mac OS Farsi can then be disambiguated by the use of the resolved +# direction: +# +# Unicode 0x002B -> Mac OS Farsi 0x2B (if L) or 0xAB (if R) +# +# However, this also means the direction override characters should +# be discarded when mapping from Unicode to Mac OS Farsi (after +# they have been used to determine resolved direction), since the +# direction override information is carried by the code point itself. +# +# Even when direction overrides are not needed for roundtrip +# fidelity, they are sometimes used when mapping Mac OS Farsi +# characters to Unicode in order to achieve similar text layout with +# the resulting Unicode text. For example, the single Mac OS Farsi +# ellipsis character has direction class right-left,and there is no +# left-right version. However, the Unicode HORIZONTAL ELLIPSIS +# character has direction class neutral (which means it may end up +# with a resolved direction of left-right if surrounded by left-right +# characters). When mapping the Mac OS Farsi ellipsis to Unicode, it +# is surrounded with a direction override to help preserve proper +# text layout. The resolved direction is not needed or used when +# mapping the Unicode HORIZONTAL ELLIPSIS back to Mac OS Farsi. +# +# 2. Mapping the Mac OS Farsi digits +# +# The main table below contains mappings that should be used when +# strict round-trip fidelity is required. However, for numeric +# values, the mappings in that table will produce Unicode characters +# that may appear different than the Mac OS Farsi text displayed on +# a Mac OS system using WorldScript. This is because WorldScript +# uses context-dependent display for the 0x30-0x39 digits. +# +# If roundtrip fidelity is not required, then the following +# alternate mappings should be used when a sequence of 0x30-0x39 +# digits - possibly including 0x2C and 0x2E - occurs in an Arabic +# context (that is, when the first "strong" character on either side +# of the digit sequence is Arabic, or there is no strong character): +# +# 0x2C 0x066C # ARABIC THOUSANDS SEPARATOR +# 0x2E 0x066B # ARABIC DECIMAL SEPARATOR +# 0x30 0x06F0 # EXTENDED ARABIC-INDIC DIGIT ZERO +# 0x31 0x06F1 # EXTENDED ARABIC-INDIC DIGIT ONE +# 0x32 0x06F2 # EXTENDED ARABIC-INDIC DIGIT TWO +# 0x33 0x06F3 # EXTENDED ARABIC-INDIC DIGIT THREE +# 0x34 0x06F4 # EXTENDED ARABIC-INDIC DIGIT FOUR +# 0x35 0x06F5 # EXTENDED ARABIC-INDIC DIGIT FIVE +# 0x36 0x06F6 # EXTENDED ARABIC-INDIC DIGIT SIX +# 0x37 0x06F7 # EXTENDED ARABIC-INDIC DIGIT SEVEN +# 0x38 0x06F8 # EXTENDED ARABIC-INDIC DIGIT EIGHT +# 0x39 0x06F9 # EXTENDED ARABIC-INDIC DIGIT NINE +# +# 3. Use of corporate-zone Unicodes (mapping the TrueType variant) +# +# The following corporate zone Unicode character is used in this +# mapping: +# +# 0xF882 Arabic ligature "peace on him" +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version b02 to version b03/c01: +# +# - Update mapping of 0xA4 in TrueType variant to use new Unicode +# character U+FDFC RIAL SIGN addded for Unicode 3.2 +# +# Changes from version n01 to version n04: +# +# - Change mapping of 0xA4 in TrueType variant (just described in +# header comment) from single corporate character to use +# grouping hint +# +################## + +0x0000 - 0x007F = 0x00 - +0x00A0 = 0x81 +0x00AB = 0x8C +0x00BB = 0x98 +0x00C4 = 0x80 +0x00C7 = 0x82 +0x00C9 = 0x83 +0x00D1 = 0x84 +0x00D6 = 0x85 +0x00DC = 0x86 +0x00E0 = 0x88 +0x00E1 = 0x87 +0x00E2 = 0x89 +0x00E4 = 0x8A +0x00E7 = 0x8D +0x00E8 = 0x8F +0x00E9 = 0x8E +0x00EA = 0x90 +0x00EB = 0x91 +0x00ED = 0x92 +0x00EE = 0x94 +0x00EF = 0x95 +0x00F1 = 0x96 +0x00F3 = 0x97 +0x00F4 = 0x99 +0x00F6 = 0x9A +0x00F7 = 0x9B +0x00F9 = 0x9D +0x00FA = 0x9C +0x00FB = 0x9E +0x00FC = 0x9F +0x060C = 0xAC +0x061B = 0xBB +0x061F = 0xBF +0x0621 = 0xC1 +0x0622 = 0xC2 +0x0623 = 0xC3 +0x0624 = 0xC4 +0x0625 = 0xC5 +0x0626 = 0xC6 +0x0627 = 0xC7 +0x0628 = 0xC8 +0x0629 = 0xC9 +0x062A = 0xCA +0x062B = 0xCB +0x062C = 0xCC +0x062D = 0xCD +0x062E = 0xCE +0x062F = 0xCF +0x0630 = 0xD0 +0x0631 = 0xD1 +0x0632 = 0xD2 +0x0633 = 0xD3 +0x0634 = 0xD4 +0x0635 = 0xD5 +0x0636 = 0xD6 +0x0637 = 0xD7 +0x0638 = 0xD8 +0x0639 = 0xD9 +0x063A = 0xDA +0x0640 = 0xE0 +0x0641 = 0xE1 +0x0642 = 0xE2 +0x0643 = 0xE3 +0x0644 = 0xE4 +0x0645 = 0xE5 +0x0646 = 0xE6 +0x0647 = 0xE7 +0x0648 = 0xE8 +0x0649 = 0xE9 +0x064A = 0xEA +0x064B = 0xEB +0x064C = 0xEC +0x064D = 0xED +0x064E = 0xEE +0x064F = 0xEF +0x0650 = 0xF0 +0x0651 = 0xF1 +0x0652 = 0xF2 +0x066A = 0xA5 +0x0679 = 0xF4 +0x067E = 0xF3 +0x0686 = 0xF5 +0x0688 = 0xF9 +0x0691 = 0xFA +0x0698 = 0xFE +0x06A4 = 0xF7 +0x06AF = 0xF8 +0x06BA = 0x8B +0x06D2 = 0xFF +0x06D5 = 0xF6 +0x06F0 = 0xB0 +0x06F1 = 0xB1 +0x06F2 = 0xB2 +0x06F3 = 0xB3 +0x06F4 = 0xB4 +0x06F5 = 0xB5 +0x06F6 = 0xB6 +0x06F7 = 0xB7 +0x06F8 = 0xB8 +0x06F9 = 0xB9 +0x2026 = 0x93 +0x274A = 0xC0 +END_MAP diff --git a/share/i18n/csmapper/APPLE/UCS%GAELIC.src b/share/i18n/csmapper/APPLE/UCS%GAELIC.src new file mode 100644 index 0000000..d048860 --- /dev/null +++ b/share/i18n/csmapper/APPLE/UCS%GAELIC.src @@ -0,0 +1,257 @@ +# $FreeBSD$ +# $NetBSD: UCS%GAELIC.src,v 1.2 2006/04/08 15:47:39 tnozaki Exp $ + +TYPE ROWCOL +NAME UCS/GAELIC +SRC_ZONE 0x0000-0x2663 +OOB_MODE INVALID +DST_INVALID 0x100 +DST_UNIT_BITS 16 + +BEGIN_MAP +# +# This mapping data is made from the mapping data provided by Unicode, Inc. +# Original notice: +# +#======================================================================= +# File name: GAELIC.TXT +# +# Contents: Map (external version) from Mac OS Celtic +# character set to Unicode 3.0 and later +# +# Contacts: charsets@apple.com, everson@evertype.com +# +# Changes: +# +# c01 2005-Apr-01 First posted version. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Gaelic code (in hex as 0xNN) +# Column #2 is the corresponding Unicode (in hex as 0xNNNN) +# Column #3 is a comment containing the Unicode name +# +# The entries are in Mac OS Gaelic code order. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Gaelic character set uses the standard control characters +# at 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Gaelic (partly from Michael Everson): +# ----------------------------------------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# This character set was developed by Michael Everson of Everson +# Typography (everson@evertype.com) and was used for fonts in his +# Celtic Utilities and CeltScript font packages for the Mac, as well +# as some fonts included with the Irish localizations of Mac OS 6.0.8 +# and 7.1. Note that while Apple authorized this Irish localization, +# it was not a system which shipped with Apple hardware, and was not +# otherwise supported by Apple. Fonts conforming to the Mac OS Gaelic +# character set are available from Everson Typography +# (http://www.evertype.com/celtscript/). Information about the use of +# this character set is available at +# http://www.evertype.com/celtscript/celtcode.html. +# +# The Mac OS Gaelic encoding shares the script code smRoman (0) with +# the standard Mac OS Roman encoding. To determine if the Gaelic +# encoding is being used in Mac OS 7-9, you should also check if the +# system region code is 81. Otherwise, you can check for particular +# fonts that conform to this encoding (since in practice Gaelic fonts +# are used with the ordinary US or UK system versions). +# +# This character set is a variant of standard Mac OS Roman, adding +# capital and small y with acute, grave, and circumflex; capital and +# small w with acute, grave, circumflex and diaeresis; capital and +# small b, c, d, f, g, m, p, s, t with dot above; tironian et; small +# long r, small long s, and small long s with dot above. It has 36 +# code point differences from standard Mac OS Roman. +# +# Before Mac OS 8.5, code point 0xDB was CURRENCY SIGN, and was +# mapped to U+00A4. In Mac OS 8.5 and later versions, code point +# 0xDB is changed to EURO SIGN and maps to U+20AC; the standard +# Apple fonts are updated for Mac OS 8.5 to reflect this. There is +# a "currency sign" variant of the Latin 8 Extended encoding that still +# maps 0xDB to U+00A4; this can be used for older fonts. +# Note: U+20AC is new with Unicode 2.1; for earlier Unicode +# versions, Latin 8 Extended 0xDB may be mapped to private-use +# character U+F8A0. +# +# Before Unicode 3.0, code point 0xE4 was PER MILLE SIGN, and was +# mapped to U+2030. Since August 1998, code point 0xE4 is changed +# to TIRONIAN SIGN ET and maps to U+204A. There is a "per mille +# sign" variant of the Mac OS Gaelic encoding that still +# maps 0xE4 to U+2030; this can be used for older fonts. +# Note: U+204A is new with Unicode 3.0; for earlier Unicode +# versions, Mac OS Gaelic was unified with AMPERSAND. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +################## +0x0000 - 0x007E = 0x00 - +0x00A0 = 0xCA +0x00A2 = 0xA2 +0x00A3 = 0xA3 +0x00A7 = 0xA4 +0x00A8 = 0xAC +0x00A9 = 0xA9 +0x00AB = 0xC7 +0x00AE = 0xA8 +0x00B0 = 0xA1 +0x00B1 = 0xB1 +0x00B4 = 0xAB +0x00B6 = 0xA6 +0x00B7 = 0xE1 +0x00BB = 0xC8 +0x00C0 = 0xCB +0x00C1 = 0xE7 +0x00C2 = 0xE5 +0x00C3 = 0xCC +0x00C4 = 0x80 +0x00C5 = 0x81 +0x00C6 = 0xAE +0x00C7 = 0x82 +0x00C8 = 0xE9 +0x00C9 = 0x83 +0x00CA = 0xE6 +0x00CB = 0xE8 +0x00CC = 0xED +0x00CD = 0xEA +0x00CE = 0xEB +0x00CF = 0xEC +0x00D1 = 0x84 +0x00D2 = 0xF1 +0x00D3 = 0xEE +0x00D4 = 0xEF +0x00D5 = 0xCD +0x00D6 = 0x85 +0x00D8 = 0xAF +0x00D9 = 0xF4 +0x00DA = 0xF2 +0x00DB = 0xF3 +0x00DC = 0x86 +0x00DD = 0xF6 +0x00DF = 0xA7 +0x00E0 = 0x88 +0x00E1 = 0x87 +0x00E2 = 0x89 +0x00E3 = 0x8B +0x00E4 = 0x8A +0x00E5 = 0x8C +0x00E6 = 0xBE +0x00E7 = 0x8D +0x00E8 = 0x8F +0x00E9 = 0x8E +0x00EA = 0x90 +0x00EB = 0x91 +0x00EC = 0x93 +0x00ED = 0x92 +0x00EE = 0x94 +0x00EF = 0x95 +0x00F1 = 0x96 +0x00F2 = 0x98 +0x00F3 = 0x97 +0x00F4 = 0x99 +0x00F5 = 0x9B +0x00F6 = 0x9A +0x00F8 = 0xBF +0x00F9 = 0x9D +0x00FA = 0x9C +0x00FB = 0x9E +0x00FC = 0x9F +0x00FD = 0xF7 +0x00FF = 0xD8 +0x010A = 0xB5 +0x010B = 0xB6 +0x0120 = 0xBB +0x0121 = 0xBC +0x0131 = 0xF5 +0x0152 = 0xCE +0x0153 = 0xCF +0x0174 = 0xF8 +0x0175 = 0xF9 +0x0176 = 0xDE +0x0177 = 0xDF +0x0178 = 0xD9 +0x017F = 0xC5 +0x0192 = 0xC4 +0x027C = 0xC3 +0x1E02 = 0xB0 +0x1E03 = 0xB4 +0x1E0A = 0xB7 +0x1E0B = 0xB8 +0x1E1E = 0xB9 +0x1E1F = 0xBA +0x1E40 = 0xBD +0x1E41 = 0xC0 +0x1E56 = 0xC1 +0x1E57 = 0xC2 +0x1E60 = 0xC6 +0x1E61 = 0xD6 +0x1E6A = 0xDA +0x1E6B = 0xE0 +0x1E80 = 0xFC +0x1E81 = 0xFD +0x1E82 = 0xFE +0x1E83 = 0xFF +0x1E84 = 0xFA +0x1E85 = 0xFB +0x1E9B = 0xD7 +0x1EF2 = 0xE2 +0x1EF3 = 0xE3 +0x2013 = 0xD0 +0x2014 = 0xD1 +0x2018 = 0xD4 +0x2019 = 0xD5 +0x201C = 0xD2 +0x201D = 0xD3 +0x2020 = 0xA0 +0x2022 = 0xA5 +0x2026 = 0xC9 +0x2039 = 0xDC +0x203A = 0xDD +0x204A = 0xE4 +0x20AC = 0xDB +0x2122 = 0xAA +0x2260 = 0xAD +0x2264 = 0xB2 +0x2265 = 0xB3 +0x2663 = 0xF0 +END_MAP diff --git a/share/i18n/csmapper/APPLE/UCS%GREEK.src b/share/i18n/csmapper/APPLE/UCS%GREEK.src new file mode 100644 index 0000000..e808b63 --- /dev/null +++ b/share/i18n/csmapper/APPLE/UCS%GREEK.src @@ -0,0 +1,275 @@ +# $FreeBSD$ +# $NetBSD: UCS%GREEK.src,v 1.2 2006/04/08 15:47:40 tnozaki Exp $ + +TYPE ROWCOL +NAME UCS/GREEK +SRC_ZONE 0x0000-0x2265 +OOB_MODE INVALID +DST_INVALID 0x100 +DST_UNIT_BITS 16 + +BEGIN_MAP +# +# This mapping data is made from the mapping data provided by Unicode, Inc. +# Original notice: +# +#======================================================================= +# File name: GREEK.TXT +# +# Contents: Map (external version) from Mac OS Greek +# character set to Unicode 2.1 and later. +# +# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Update header comments. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Update to match changes in Mac OS Greek +# encoding for Mac OS 9.2.2 and later. +# Update URLs, notes. Matches internal +# utom<b3>. +# b02 1999-Sep-22 Update contact e-mail address. Matches +# internal utom<b1>, ufrm<b1>, and Text +# Encoding Converter version 1.5. +# n06 1998-Feb-05 Update to match internal utom<n4>, ufrm<n17>, +# and Text Encoding Converter versions 1.3: +# Change mapping for 0xAF from U+0387 to its +# canonical decomposition, U+00B7. Also +# update header comments to new format. +# n04 1995-Apr-15 First version (after fixing some typos). +# Matches internal ufrm<n7>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Greek code (in hex as 0xNN) +# Column #2 is the corresponding Unicode (in hex as 0xNNNN) +# Column #3 is a comment containing the Unicode name +# +# The entries are in Mac OS Greek code order. +# +# One of these mappings requires the use of a corporate character. +# See the file "CORPCHAR.TXT" and notes below. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Greek character set uses the standard control characters at +# 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Greek: +# ---------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# Although a Mac OS script code is defined for Greek (smGreek = 6), +# the Greek localized system does not currently use it (the font +# family IDs are in the Mac OS Roman range). To determine if the +# Greek encoding is being used when the script code is smRoman (0), +# you must check if the system region code is 20, verGreece. +# +# The Mac OS Greek encoding is a superset of the repertoire of +# ISO 8859-7 (although characters are not at the same code points), +# except that LEFT & RIGHT SINGLE QUOTATION MARK replace the +# MODIFIER LETTER REVERSED COMMA & APOSTROPHE (spacing versions of +# Greek rough & smooth breathing marks) that are in ISO 8859-7. +# The added characters in Mac OS Greek include more punctuation and +# symbols and several accented Latin letters. +# +# Before Mac OS 9.2.2, code point 0x9C was SOFT HYPHEN (U+00AD), and +# code point 0xFF was undefined. In Mac OS 9.2.2 and later versions, +# SOFT HYPHEN was moved to 0xFF, and code point 0x9C was changed to be +# EURO SIGN (U+20AC); the standard Apple fonts are updated for Mac OS +# 9.2.2 to reflect this. There is a "no Euro sign" variant of the Mac +# OS Greek encoding that uses the older mapping; this can be used for +# older fonts. +# +# This "no Euro sign" variant of Mac OS Greek was the character set +# used by Mac OS Greek systems before 9.2.2 except for system 6.0.7, +# which used a variant character set but was quickly replaced with +# Greek system 6.0.7.1 using the no Euro sign" character set +# documented here. Greek system 4.1 used a variant Greek set that had +# ISO 8859-7 in 0xA0-0xFF (with some holes filled in with DTP +# characters), and Mac OS Roman accented Roman letters in 0x80-0x9F. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version b02 to version b03/c01: +# +# - The Mac OS Greek encoding changed for Mac OS 9.2.2 and later +# as follows: +# 0x9C, changed from 0x00AD SOFT HYPHEN to 0x20AC EURO SIGN +# 0xFF, changed from undefined to 0x00AD SOFT HYPHEN +# +# Changes from version n04 to version n06: +# +# - Change mapping of 0xAF from U+0387 to its canonical +# decomposition, U+00B7. +# +################## +0x0000 - 0x007E = 0x00 - +0x00A0 = 0xCA +0x00A3 = 0x92 +0x00A5 = 0xB4 +0x00A6 = 0x9B +0x00A7 = 0xAC +0x00A8 = 0x8C +0x00A9 = 0xA9 +0x00AB = 0xC7 +0x00AC = 0xC2 +0x00AD = 0xFF +0x00AE = 0xA8 +0x00B0 = 0xAE +0x00B1 = 0xB1 +0x00B2 = 0x82 +0x00B3 = 0x84 +0x00B7 = 0xAF +0x00B9 = 0x81 +0x00BB = 0xC8 +0x00BD = 0x97 +0x00C4 = 0x80 +0x00C9 = 0x83 +0x00D6 = 0x85 +0x00DC = 0x86 +0x00DF = 0xA7 +0x00E0 = 0x88 +0x00E2 = 0x89 +0x00E4 = 0x8A +0x00E7 = 0x8D +0x00E8 = 0x8F +0x00E9 = 0x8E +0x00EA = 0x90 +0x00EB = 0x91 +0x00EE = 0x94 +0x00EF = 0x95 +0x00F4 = 0x99 +0x00F6 = 0x9A +0x00F7 = 0xD6 +0x00F9 = 0x9D +0x00FB = 0x9E +0x00FC = 0x9F +0x0153 = 0xCF +0x0384 = 0x8B +0x0385 = 0x87 +0x0386 = 0xCD +0x0388 = 0xCE +0x0389 = 0xD7 +0x038A = 0xD8 +0x038C = 0xD9 +0x038E = 0xDA +0x038F = 0xDF +0x0390 = 0xFD +0x0391 = 0xB0 +0x0392 = 0xB5 +0x0393 = 0xA1 +0x0394 = 0xA2 +0x0395 = 0xB6 +0x0396 = 0xB7 +0x0397 = 0xB8 +0x0398 = 0xA3 +0x0399 = 0xB9 +0x039A = 0xBA +0x039B = 0xA4 +0x039C = 0xBB +0x039D = 0xC1 +0x039E = 0xA5 +0x039F = 0xC3 +0x03A0 = 0xA6 +0x03A1 = 0xC4 +0x03A3 = 0xAA +0x03A4 = 0xC6 +0x03A5 = 0xCB +0x03A6 = 0xBC +0x03A7 = 0xCC +0x03A8 = 0xBE +0x03A9 = 0xBF +0x03AA = 0xAB +0x03AB = 0xBD +0x03AC = 0xC0 +0x03AD = 0xDB +0x03AE = 0xDC +0x03AF = 0xDD +0x03B0 = 0xFE +0x03B1 = 0xE1 +0x03B2 = 0xE2 +0x03B3 = 0xE7 +0x03B4 = 0xE4 +0x03B5 = 0xE5 +0x03B6 = 0xFA +0x03B7 = 0xE8 +0x03B8 = 0xF5 +0x03B9 = 0xE9 +0x03BA = 0xEB +0x03BB = 0xEC +0x03BC = 0xED +0x03BD = 0xEE +0x03BE = 0xEA +0x03BF = 0xEF +0x03C0 = 0xF0 +0x03C1 = 0xF2 +0x03C2 = 0xF7 +0x03C3 = 0xF3 +0x03C4 = 0xF4 +0x03C5 = 0xF9 +0x03C6 = 0xE6 +0x03C7 = 0xF8 +0x03C8 = 0xE3 +0x03C9 = 0xF6 +0x03CA = 0xFB +0x03CB = 0xFC +0x03CC = 0xDE +0x03CD = 0xE0 +0x03CE = 0xF1 +0x2013 = 0xD0 +0x2015 = 0xD1 +0x2018 = 0xD4 +0x2019 = 0xD5 +0x201C = 0xD2 +0x201D = 0xD3 +0x2020 = 0xA0 +0x2022 = 0x96 +0x2026 = 0xC9 +0x2030 = 0x98 +0x20AC = 0x9C +0x2122 = 0x93 +0x2248 = 0xC5 +0x2260 = 0xAD +0x2264 = 0xB2 +0x2265 = 0xB3 +END_MAP diff --git a/share/i18n/csmapper/APPLE/UCS%GUJARATI.src b/share/i18n/csmapper/APPLE/UCS%GUJARATI.src new file mode 100644 index 0000000..e74899b --- /dev/null +++ b/share/i18n/csmapper/APPLE/UCS%GUJARATI.src @@ -0,0 +1,279 @@ +# $FreeBSD$ + +TYPE ROWCOL +NAME UCS/GUJARATI +SRC_ZONE 0x0000-0xFFFF +OOB_MODE INVALID +DST_INVALID 0x100 +DST_UNIT_BITS 16 +#======================================================================= +# File name: GUJARATI.TXT +# +# Contents: Map (external version) from Mac OS Gujarati +# encoding to Unicode 2.1 and later. +# +# Copyright: (c) 1997-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Update header comments. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Update URLs. Matches internal utom<b1>. +# b02 1999-Sep-22 Update contact e-mail address. Matches +# internal utom<b1>, ufrm<b1>, and Text +# Encoding Converter version 1.5. +# n02 1998-Feb-05 First version; matches internal utom<n4>, +# ufrm<n5>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Gujarati code or code sequence +# (in hex as 0xNN or 0xNN+0xNN) +# Column #2 is the corresponding Unicode or Unicode sequence +# (in hex as 0xNNNN or 0xNNNN+0xNNNN). +# Column #3 is a comment containing the Unicode name or sequence +# of names. In some cases an additional comment follows the +# Unicode name(s). +# +# The entries are in two sections. The first section is for pairs of +# Mac OS Gujarati code points that must be mapped in a special way. +# The second section maps individual code points. +# +# Within each section, the entries are in Mac OS Gujarati code order. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Gujarati character set uses the standard control characters +# at 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Gujarati: +# ------------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# Mac OS Gujarati is based on IS 13194:1991 (ISCII-91), with the +# addition of several punctuation and symbol characters. However, +# Mac OS Gujarati does not support the ATR (attribute) mechanism of +# ISCII-91. +# +# 1. ISCII-91 features in Mac OS Gujarati include: +# +# a) Overloading of nukta +# +# In addition to using the nukta (0xE9) like a combining dot below, +# nukta is overloaded to function as a general character modifier. +# In this role, certain code points followed by 0xE9 are treated as +# a two-byte code point representing a character which may be +# rather different than the characters represented by either of +# the code points alone. For example, the character GUJARATI OM +# (U+0AD0) is represented in ISCII-91 as candrabindu + nukta. +# +# b) Explicit halant and soft halant +# +# A double halant (0xE8 + 0xE8) constitutes an "explicit halant", +# which will always appear as a halant instead of causing formation +# of a ligature or half-form consonant. +# +# Halant followed by nukta (0xE8 + 0xE9) constitutes a "soft +# halant", which prevents formation of a ligature and instead +# retains the half-form of the first consonant. +# +# c) Invisible consonant +# +# The byte 0xD9 (called INV in ISCII-91) is an invisible consonant: +# It behaves like a consonant but has no visible appearance. It is +# intended to be used (often in combination with halant) to display +# dependent forms in isolation, such as the RA forms or consonant +# half-forms. +# +# d) Extensions for Vedic, etc. +# +# The byte 0xF0 (called EXT in ISCII-91) followed by any byte in +# the range 0xA1-0xEE constitutes a two-byte code point which can +# be used to represent additional characters for Vedic (or other +# extensions); 0xF0 followed by any other byte value constitutes +# malformed text. Mac OS Gujarati supports this mechanism, but +# does not currently map any of these two-byte code points to +# anything. +# +# 2. Mac OS Gujarati additions +# +# Mac OS Gujarati adds characters using the code points +# 0x80-0x8A and 0x90. +# +# 3. Unused code points +# +# The following code points are currently unused, and are not shown +# here: 0x8B-0x8F, 0x91-0xA0, 0xAB, 0xAF, 0xC7, 0xCE, 0xD0, 0xD3, +# 0xE0, 0xE4, 0xEB-0xEF, 0xFB-0xFF. In addition, 0xF0 is not shown +# here, but it has a special function as described above. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# 1. Mapping the byte pairs +# +# If one of the following byte values is encountered when mapping +# Mac OS Gujarati text - xA1, xAA, xDF, or 0xE8 - then the next +# byte (if there is one) should be examined. If the next byte is +# 0xE9 - or also 0xE8, if the first byte was 0xE8 - then the byte +# pair should be mapped using the first section of the mapping +# table below. Otherwise, each byte should be mapped using the +# second section of the mapping table below. +# +# - The Unicode Standard, Version 2.0, specifies how explicit +# halant and soft halant should be represented in Unicode; +# these mappings are used below. +# +# If the byte value 0xF0 is encountered when mapping Mac OS +# Gujarati text, then the next byte should be examined. If there +# is no next byte (e.g. 0xF0 at end of buffer), the mapping +# process should indicate incomplete character. If there is a next +# byte but it is not in the range 0xA1-0xEE, the mapping process +# should indicate malformed text. Otherwise, the mapping process +# should treat the byte pair as a valid two-byte code point with no +# mapping (e.g. map it to QUESTION MARK, REPLACEMENT CHARACTER, +# etc.). +# +# 2. Mapping the invisible consonant +# +# It has been suggested that INV in ISCII-91 should map to ZERO +# WIDTH NON-JOINER in Unicode. However, this causes problems with +# roundtrip fidelity: The ISCII-91 sequences 0xE8+0xE8 and 0xE8+0xD9 +# would map to the same sequence of Unicode characters. We have +# instead mapped INV to LEFT-TO-RIGHT MARK, which avoids these +# problems. +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +################## +BEGIN_MAP +0x0000 - 0x007F = 0x00 - +0x00A9 = 0x88 +0x00AE = 0x89 +0x00D7 = 0x80 +0x0964 = 0xEA +0x0965 = 0x90 +0x0A81 = 0xA1 +0x0A82 = 0xA2 +0x0A83 = 0xA3 +0x0A85 = 0xA4 +0x0A86 = 0xA5 +0x0A87 = 0xA6 +0x0A88 = 0xA7 +0x0A89 = 0xA8 +0x0A8A = 0xA9 +0x0A8B = 0xAA +0x0A8D = 0xAE +0x0A8F = 0xAC +0x0A90 = 0xAD +0x0A91 = 0xB2 +0x0A93 = 0xB0 +0x0A94 = 0xB1 +0x0A95 = 0xB3 +0x0A96 = 0xB4 +0x0A97 = 0xB5 +0x0A98 = 0xB6 +0x0A99 = 0xB7 +0x0A9A = 0xB8 +0x0A9B = 0xB9 +0x0A9C = 0xBA +0x0A9D = 0xBB +0x0A9E = 0xBC +0x0A9F = 0xBD +0x0AA0 = 0xBE +0x0AA1 = 0xBF +0x0AA2 = 0xC0 +0x0AA3 = 0xC1 +0x0AA4 = 0xC2 +0x0AA5 = 0xC3 +0x0AA6 = 0xC4 +0x0AA7 = 0xC5 +0x0AA8 = 0xC6 +0x0AAA = 0xC8 +0x0AAB = 0xC9 +0x0AAC = 0xCA +0x0AAD = 0xCB +0x0AAE = 0xCC +0x0AAF = 0xCD +0x0AB0 = 0xCF +0x0AB2 = 0xD1 +0x0AB3 = 0xD2 +0x0AB5 = 0xD4 +0x0AB6 = 0xD5 +0x0AB7 = 0xD6 +0x0AB8 = 0xD7 +0x0AB9 = 0xD8 +0x0ABC = 0xE9 +0x0ABE = 0xDA +0x0ABF = 0xDB +0x0AC0 = 0xDC +0x0AC1 = 0xDD +0x0AC2 = 0xDE +0x0AC3 = 0xDF +#0x0AC4 = 0xDF+0xE9 +0x0AC5 = 0xE3 +0x0AC7 = 0xE1 +0x0AC8 = 0xE2 +0x0AC9 = 0xE7 +0x0ACB = 0xE5 +0x0ACC = 0xE6 +0x0ACD = 0xE8 +#0x0ACD+0x200C = 0xE8+0xE8 +#0x0ACD+0x200D = 0xE8+0xE9 +#0x0AD0 = 0xA1+0xE9 +#0x0AE0 = 0xAA+0xE9 +0x0AE6 = 0xF1 +0x0AE7 = 0xF2 +0x0AE8 = 0xF3 +0x0AE9 = 0xF4 +0x0AEA = 0xF5 +0x0AEB = 0xF6 +0x0AEC = 0xF7 +0x0AED = 0xF8 +0x0AEE = 0xF9 +0x0AEF = 0xFA +0x200E = 0xD9 +0x2013 = 0x82 +0x2014 = 0x83 +0x2018 = 0x84 +0x2019 = 0x85 +0x2022 = 0x87 +0x2026 = 0x86 +0x2122 = 0x8A +0x2212 = 0x81 +END_MAP diff --git a/share/i18n/csmapper/APPLE/UCS%GURMUKHI.src b/share/i18n/csmapper/APPLE/UCS%GURMUKHI.src new file mode 100644 index 0000000..6d2a1a2 --- /dev/null +++ b/share/i18n/csmapper/APPLE/UCS%GURMUKHI.src @@ -0,0 +1,333 @@ +# $FreeBSD$ + +TYPE ROWCOL +NAME UCS/GURMUKHI +SRC_ZONE 0x0000-0x2212 +OOB_MODE INVALID +DST_INVALID 0x100 +DST_UNIT_BITS 16 +#======================================================================= +# File name: GURMUKHI.TXT +# +# Contents: Map (external version) from Mac OS Gurmukhi +# encoding to Unicode 2.1 and later. +# +# Copyright: (c) 1997-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Update header comments. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Change mappings for 0x91, 0xD5 based on +# new decomposition rules. Update URLs, +# notes. Matches internal utom<b2>. +# b02 1999-Sep-22 Update contact e-mail address. Matches +# internal utom<b1>, ufrm<b1>, and Text +# Encoding Converter version 1.5. +# n02 1998-Feb-05 First version; matches internal utom<n5>, +# ufrm<n6>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Gurmukhi code or code sequence +# (in hex as 0xNN or 0xNN+0xNN) +# Column #2 is the corresponding Unicode or Unicode sequence +# (in hex as 0xNNNN or 0xNNNN+0xNNNN). +# Column #3 is a comment containing the Unicode name or sequence +# of names. In some cases an additional comment follows the +# Unicode name(s). +# +# The entries are in two sections. The first section is for pairs of +# Mac OS Gurmukhi code points that must be mapped in a special way. +# The second section maps individual code points. +# +# Within each section, the entries are in Mac OS Gurmukhi code order. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Gurmukhi character set uses the standard control characters +# at 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Gurmukhi: +# ------------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# Mac OS Gurmukhi is based on IS 13194:1991 (ISCII-91), with the +# addition of several punctuation and symbol characters. However, +# Mac OS Gurmukhi does not support the ATR (attribute) mechanism of +# ISCII-91. +# +# 1. ISCII-91 features in Mac OS Gurmukhi include: +# +# a) Explicit halant and soft halant +# +# A double halant (0xE8 + 0xE8) constitutes an "explicit halant", +# which will always appear as a halant instead of causing formation +# of a ligature or half-form consonant. +# +# Halant followed by nukta (0xE8 + 0xE9) constitutes a "soft +# halant", which prevents formation of a ligature and instead +# retains the half-form of the first consonant. +# +# b) Invisible consonant +# +# The byte 0xD9 (called INV in ISCII-91) is an invisible consonant: +# It behaves like a consonant but has no visible appearance. It is +# intended to be used (often in combination with halant) to display +# dependent forms in isolation, such as the RA forms or consonant +# half-forms. +# +# c) Extensions for Vedic, etc. +# +# The byte 0xF0 (called EXT in ISCII-91) followed by any byte in +# the range 0xA1-0xEE constitutes a two-byte code point which can +# be used to represent additional characters for Vedic (or other +# extensions); 0xF0 followed by any other byte value constitutes +# malformed text. Mac OS Gurmukhi supports this mechanism, but +# does not currently map any of these two-byte code points to +# anything. +# +# 2. Mac OS Gurmukhi additions +# +# Mac OS Gurmukhi adds characters using the code points +# 0x80-0x8A and 0x90-0x94 (the latter are some Gurmukhi additions). +# +# 3. Unused code points +# +# The following code points are currently unused, and are not shown +# here: 0x8B-0x8F, 0x95-0xA1, 0xA3, 0xAA-0xAB, 0xAE-0xAF, 0xB2, +# 0xC7, 0xCE, 0xD0, 0xD2-0xD3, 0xD6, 0xDF-0xE0, 0xE3-0xE4, 0xE7, +# 0xEB-0xEF, 0xFB-0xFF. In addition, 0xF0 is not shown here, but it +# has a special function as described above. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# 1. Mapping the byte pairs +# +# If the byte value 0xE8 is encountered when mapping Mac OS +# Gurmukhi text, then the next byte (if there is one) should be +# examined. If the next byte is 0xE8 or 0xE9, then the byte pair +# should be mapped using the first section of the mapping table +# below. Otherwise, each byte should be mapped using the second +# section of the mapping table below. +# +# - The Unicode Standard, Version 2.0, specifies how explicit +# halant and soft halant should be represented in Unicode; +# these mappings are used below. +# +# If the byte value 0xF0 is encountered when mapping Mac OS +# Gurmukhi text, then the next byte should be examined. If there +# is no next byte (e.g. 0xF0 at end of buffer), the mapping +# process should indicate incomplete character. If there is a next +# byte but it is not in the range 0xA1-0xEE, the mapping process +# should indicate malformed text. Otherwise, the mapping process +# should treat the byte pair as a valid two-byte code point with no +# mapping (e.g. map it to QUESTION MARK, REPLACEMENT CHARACTER, +# etc.). +# +# 2. Mapping the invisible consonant +# +# It has been suggested that INV in ISCII-91 should map to ZERO +# WIDTH NON-JOINER in Unicode. However, this causes problems with +# roundtrip fidelity: The ISCII-91 sequences 0xE8+0xE8 and 0xE8+0xD9 +# would map to the same sequence of Unicode characters. We have +# instead mapped INV to LEFT-TO-RIGHT MARK, which avoids these +# problems. +# +# 3. Mappings using corporate characters +# +# Mapping the GURMUKHI LETTER SHA 0xD5 presents an interesting +# problem. At first glance, we could map it to the single Unicode +# character 0x0A36. +# +# However, our goal is that the mappings provided here should also +# be able to generate the mappings to maximally decomposed Unicode +# by simple recursive substitution of the canonical decompositions +# in the Unicode database. We want mapping tables derived this way +# to retain full roundtrip fidelity. +# +# Since the canonical decomposition of 0x0A36 is 0x0A38+0x0A3C, +# the decomposition mapping for 0xD5 would be identical with the +# decomposition mapping for 0xD7+0xE9, and roundtrip fidelity would +# be lost. +# +# We solve this problem by using a grouping hint (one of the set of +# transcoding hints defined by Apple). +# +# Apple has defined a block of 32 corporate characters as "transcoding +# hints." These are used in combination with standard Unicode characters +# to force them to be treated in a special way for mapping to other +# encodings; they have no other effect. Sixteen of these transcoding +# hints are "grouping hints" - they indicate that the next 2-4 Unicode +# characters should be treated as a single entity for transcoding. The +# other sixteen transcoding hints are "variant tags" - they are like +# combining characters, and can follow a standard Unicode (or a sequence +# consisting of a base character and other combining characters) to +# cause it to be treated in a special way for transcoding. These always +# terminate a combining-character sequence. +# +# The transcoding coding hint used in this mapping table is: +# 0xF860 group next 2 characters +# +# Then we can map 0x91 as follows: +# 0xD5 -> 0xF860+0x0A38+0x0A3C +# +# We could also have used a variant tag such as 0xF87F and mapped it +# this way: +# 0xD5 -> 0x0A36+0xF87F +# +# 4. Additional loose mappings from Unicode +# +# These are not preserved in roundtrip mappings. +# +# 0A59 -> 0xB4+0xE9 # GURMUKHI LETTER KHHA +# 0A5A -> 0xB5+0xE9 # GURMUKHI LETTER GHHA +# 0A5B -> 0xBA+0xE9 # GURMUKHI LETTER ZA +# 0A5E -> 0xC9+0xE9 # GURMUKHI LETTER FA +# +# 0A70 -> 0xA2 # GURMUKHI TIPPI +# +# Loose mappings from Unicode should also map U+0A71 (GURMUKHI ADDAK) +# followed by any Gurmukhi consonant to the equivalent ISCII-91 +# consonant plus halant plus the consonant again. For example: +# +# 0A71+0A15 -> 0xB3+0xE8+0xB3 +# 0A71+0A16 -> 0xB4+0xE8+0xB4 +# ... +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version b02 to version b03/c01: +# +# - Change mapping of 0x91 from 0xF860+0x0A21+0x0A3C to 0x0A5C GURMUKHI +# LETTER RRA, now that the canonical decomposition of 0x0A5C to +# 0x0A21+0x0A3C has been deleted +# +# - Change mapping of 0xD5 from 0x0A36 GURMUKHI LETTER SHA to +# 0xF860+0x0A38+0x0A3C, now that a canonical decomposition of 0x0A36 +# to 0x0A38+0x0A3C has been added. +# +################## +BEGIN_MAP +0x0000 - 0x007F = 0x00 - +0x00A9 = 0x88 +0x00AE = 0x89 +0x00D7 = 0x80 +0x0964 = 0xEA +0x0A02 = 0xA2 +0x0A05 = 0xA4 +0x0A06 = 0xA5 +0x0A07 = 0xA6 +0x0A08 = 0xA7 +0x0A09 = 0xA8 +0x0A0A = 0xA9 +0x0A0F = 0xAC +0x0A10 = 0xAD +0x0A13 = 0xB0 +0x0A14 = 0xB1 +0x0A15 = 0xB3 +0x0A16 = 0xB4 +0x0A17 = 0xB5 +0x0A18 = 0xB6 +0x0A19 = 0xB7 +0x0A1A = 0xB8 +0x0A1B = 0xB9 +0x0A1C = 0xBA +0x0A1D = 0xBB +0x0A1E = 0xBC +0x0A1F = 0xBD +0x0A20 = 0xBE +0x0A21 = 0xBF +0x0A22 = 0xC0 +0x0A23 = 0xC1 +0x0A24 = 0xC2 +0x0A25 = 0xC3 +0x0A26 = 0xC4 +0x0A27 = 0xC5 +0x0A28 = 0xC6 +0x0A2A = 0xC8 +0x0A2B = 0xC9 +0x0A2C = 0xCA +0x0A2D = 0xCB +0x0A2E = 0xCC +0x0A2F = 0xCD +0x0A30 = 0xCF +0x0A32 = 0xD1 +0x0A35 = 0xD4 +0x0A38 = 0xD7 +0x0A39 = 0xD8 +0x0A3C = 0xE9 +0x0A3E = 0xDA +0x0A3F = 0xDB +0x0A40 = 0xDC +0x0A41 = 0xDD +0x0A42 = 0xDE +0x0A47 = 0xE1 +0x0A48 = 0xE2 +0x0A4B = 0xE5 +0x0A4C = 0xE6 +0x0A4D = 0xE8 +#0x0A4D+0x200C = 0xE8+0xE8 +#0x0A4D+0x200D = 0xE8+0xE9 +0x0A5C = 0x91 +0x0A66 = 0xF1 +0x0A67 = 0xF2 +0x0A68 = 0xF3 +0x0A69 = 0xF4 +0x0A6A = 0xF5 +0x0A6B = 0xF6 +0x0A6C = 0xF7 +0x0A6D = 0xF8 +0x0A6E = 0xF9 +0x0A6F = 0xFA +0x0A71 = 0x90 +0x0A72 = 0x93 +0x0A73 = 0x92 +0x0A74 = 0x94 +0x200E = 0xD9 +0x2013 = 0x82 +0x2014 = 0x83 +0x2018 = 0x84 +0x2019 = 0x85 +0x2022 = 0x87 +0x2026 = 0x86 +0x2122 = 0x8A +0x2212 = 0x81 +#0xF860+0x0A38+0x0A3C = 0xD5 +END_MAP diff --git a/share/i18n/csmapper/APPLE/UCS%HEBREW.src b/share/i18n/csmapper/APPLE/UCS%HEBREW.src new file mode 100644 index 0000000..8d529d7 --- /dev/null +++ b/share/i18n/csmapper/APPLE/UCS%HEBREW.src @@ -0,0 +1,105 @@ +# $FreeBSD$ + +TYPE ROWCOL +NAME UCS/HEBREW +SRC_ZONE 0x0000-0xFB4B +OOB_MODE INVALID +DST_INVALID 0x100 +DST_UNIT_BITS 16 + +BEGIN_MAP +0x0000 - 0x007F = 0x00 - +0x00A0 = 0xCA +0x00C4 = 0x80 +0x00C7 = 0x82 +0x00C9 = 0x83 +0x00D1 = 0x84 +0x00D6 = 0x85 +0x00DC = 0x86 +0x00E0 = 0x88 +0x00E1 = 0x87 +0x00E2 = 0x89 +0x00E3 = 0x8B +0x00E4 = 0x8A +0x00E5 = 0x8C +0x00E7 = 0x8D +0x00E8 = 0x8F +0x00E9 = 0x8E +0x00EA = 0x90 +0x00EB = 0x91 +0x00EC = 0x93 +0x00ED = 0x92 +0x00EE = 0x94 +0x00EF = 0x95 +0x00F1 = 0x96 +0x00F2 = 0x98 +0x00F3 = 0x97 +0x00F4 = 0x99 +0x00F5 = 0x9B +0x00F6 = 0x9A +0x00F9 = 0x9D +0x00FA = 0x9C +0x00FB = 0x9E +0x00FC = 0x9F +0x05B0 = 0xD9 +0x05B1 = 0xDB +0x05B2 = 0xDA +0x05B3 = 0xDF +0x05B4 = 0xCF +0x05B5 = 0xCD +0x05B6 = 0xCE +0x05B7 = 0xCC +0x05B8 = 0xCB +#0x05B8+0xF87F = 0xDE +0x05B9 = 0xDD +0x05BB = 0xDC +0x05BC = 0xC6 +0x05BF = 0xD8 +0x05D0 = 0xE0 +0x05D1 = 0xE1 +0x05D2 = 0xE2 +0x05D3 = 0xE3 +0x05D4 = 0xE4 +0x05D5 = 0xE5 +0x05D6 = 0xE6 +0x05D7 = 0xE7 +0x05D8 = 0xE8 +0x05D9 = 0xE9 +0x05DA = 0xEA +0x05DB = 0xEB +0x05DC = 0xEC +0x05DD = 0xED +0x05DE = 0xEE +0x05DF = 0xEF +0x05E0 = 0xF0 +0x05E1 = 0xF1 +0x05E2 = 0xF2 +0x05E3 = 0xF3 +0x05E4 = 0xF4 +0x05E5 = 0xF5 +0x05E6 = 0xF6 +0x05E7 = 0xF7 +0x05E8 = 0xF8 +0x05E9 = 0xF9 +0x05EA = 0xFA +#0x05F2+0x05B7 = 0x81 +0x2013 = 0xD0 +0x2014 = 0xD1 +0x2018 = 0xD4 +0x2019 = 0xD5 +0x201C = 0xD2 +0x201D = 0xD3 +0x201E = 0xC1 +0x2026 = 0xC9 +0x20AA = 0xA6 +#0xF86A+0x05DC+0x05B9 = 0xC0 +0xF89B = 0xC2 +0xF89C = 0xC3 +0xF89D = 0xC4 +0xF89E = 0xC5 +0xFB1F = 0x81 +0xFB2A = 0xD6 +0xFB2B = 0xD7 +0xFB35 = 0xC8 +0xFB4B = 0xC7 +END_MAP diff --git a/share/i18n/csmapper/APPLE/UCS%ICELAND.src b/share/i18n/csmapper/APPLE/UCS%ICELAND.src new file mode 100644 index 0000000..5cfdd70 --- /dev/null +++ b/share/i18n/csmapper/APPLE/UCS%ICELAND.src @@ -0,0 +1,289 @@ +# $FreeBSD$ +# $NetBSD: UCS%ICELAND.src,v 1.2 2006/04/08 15:47:40 tnozaki Exp $ + +TYPE ROWCOL +NAME UCS/ICELAND +SRC_ZONE 0x0000-0xF8FF +OOB_MODE INVALID +DST_INVALID 0x100 +DST_UNIT_BITS 16 + +BEGIN_MAP +# +# This mapping data is made from the mapping data provided by Unicode, Inc. +# Original notice: +# +#======================================================================= +# File name: ICELAND.TXT +# +# Contents: Map (external version) from Mac OS Icelandic +# character set to Unicode 2.1 and later. +# +# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Update header comments. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Update URLs, notes. Matches internal +# utom<b3>. +# b02 1999-Sep-22 Encoding changed for Mac OS 8.5; change +# mapping of 0xDB from CURRENCY SIGN to EURO +# SIGN. Update contact e-mail address. Matches +# internal utom<b2>, ufrm<b2>, and Text +# Encoding Converter version 1.5. +# n06 1998-Feb-05 Minor update to header comments, add +# information on font variants +# n03 1997-Dec-14 Update to match internal utom<n4>, ufrm<n16>: +# Change standard mapping for 0xBD from U+2126 +# to its canonical decomposition, U+03A9. +# n02 1995-Apr-15 First version (after fixing some typos). +# Matches internal ufrm<n5>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Icelandic code (in hex as 0xNN) +# Column #2 is the corresponding Unicode (in hex as 0xNNNN) +# Column #3 is a comment containing the Unicode name +# +# The entries are in Mac OS Icelandic code order. +# +# One of these mappings requires the use of a corporate character. +# See the file "CORPCHAR.TXT" and notes below. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Icelandic character set uses the standard control characters +# at 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Icelandic: +# -------------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# 1. General +# +# Mac OS Icelandic is used for Icelandic and Faroese. +# +# The Mac OS Icelandic encoding shares the script code smRoman +# (0) with the standard Mac OS Roman encoding. To determine if +# the Icelandic encoding is being used, you must also check if +# the system region code is 21, verIceland. +# +# This character set is a variant of standard Mac OS Roman, +# adding upper and lower eth, thorn, and Y acute. It has 6 code +# point differences from standard Mac OS Roman. +# +# Before Mac OS 8.5, code point 0xDB was CURRENCY SIGN, and was +# mapped to U+00A4. In Mac OS 8.5 and later versions, code point +# 0xDB is changed to EURO SIGN and maps to U+20AC; the standard +# Apple fonts are updated for Mac OS 8.5 to reflect this. There are +# "currency sign" variants of the Mac OS Icelandic encoding that +# still map 0xDB to U+00A4; these can be used for older fonts. +# +# 2. Font variants +# +# The table in this file gives the Unicode mappings for the standard +# Mac OS Icelandic encoding. This encoding is supported by the +# Icelandic versions of the fonts Chicago, Geneva, Monaco, and New +# York, and is the encoding supported by the text processing +# utilities. However, other TrueType fonts implement a slightly +# different encoding; the difference is only in two code points. +# For the standard variant, these are: +# 0xBB -> 0x00AA FEMININE ORDINAL INDICATOR +# 0xBC -> 0x00BA MASCULINE ORDINAL INDICATOR +# +# For the TrueType variant (used by the Icelandic versions of the +# fonts Courier, Helvetica, Palatino, and Times), these are: +# 0xBB -> 0xFB01 LATIN SMALL LIGATURE FI +# 0xBC -> 0xFB02 LATIN SMALL LIGATURE FL +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# The following corporate zone Unicode character is used in this +# mapping: +# +# 0xF8FF Apple logo +# +# NOTE: The graphic image associated with the Apple logo character +# is not authorized for use without permission of Apple, and +# unauthorized use might constitute trademark infringement. +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version n06 to version b02: +# +# - Encoding changed for Mac OS 8.5; change mapping of 0xDB from +# CURRENCY SIGN (U+00A4) to EURO SIGN (U+20AC). +# +# Changes from version n02 to version n03: +# +# - Change mapping of 0xBD from U+2126 to its canonical +# decomposition, U+03A9. +# +################## +0x0000 - 0x007E = 0x00 - +0x00A0 = 0xCA +0x00A1 = 0xC1 +0x00A2 = 0xA2 +0x00A3 = 0xA3 +0x00A5 = 0xB4 +0x00A7 = 0xA4 +0x00A8 = 0xAC +0x00A9 = 0xA9 +0x00AA = 0xBB +0x00AB = 0xC7 +0x00AC = 0xC2 +0x00AE = 0xA8 +0x00AF = 0xF8 +0x00B0 = 0xA1 +0x00B1 = 0xB1 +0x00B4 = 0xAB +0x00B5 = 0xB5 +0x00B6 = 0xA6 +0x00B7 = 0xE1 +0x00B8 = 0xFC +0x00BA = 0xBC +0x00BB = 0xC8 +0x00BF = 0xC0 +0x00C0 = 0xCB +0x00C1 = 0xE7 +0x00C2 = 0xE5 +0x00C3 = 0xCC +0x00C4 = 0x80 +0x00C5 = 0x81 +0x00C6 = 0xAE +0x00C7 = 0x82 +0x00C8 = 0xE9 +0x00C9 = 0x83 +0x00CA = 0xE6 +0x00CB = 0xE8 +0x00CC = 0xED +0x00CD = 0xEA +0x00CE = 0xEB +0x00CF = 0xEC +0x00D0 = 0xDC +0x00D1 = 0x84 +0x00D2 = 0xF1 +0x00D3 = 0xEE +0x00D4 = 0xEF +0x00D5 = 0xCD +0x00D6 = 0x85 +0x00D8 = 0xAF +0x00D9 = 0xF4 +0x00DA = 0xF2 +0x00DB = 0xF3 +0x00DC = 0x86 +0x00DD = 0xA0 +0x00DE = 0xDE +0x00DF = 0xA7 +0x00E0 = 0x88 +0x00E1 = 0x87 +0x00E2 = 0x89 +0x00E3 = 0x8B +0x00E4 = 0x8A +0x00E5 = 0x8C +0x00E6 = 0xBE +0x00E7 = 0x8D +0x00E8 = 0x8F +0x00E9 = 0x8E +0x00EA = 0x90 +0x00EB = 0x91 +0x00EC = 0x93 +0x00ED = 0x92 +0x00EE = 0x94 +0x00EF = 0x95 +0x00F0 = 0xDD +0x00F1 = 0x96 +0x00F2 = 0x98 +0x00F3 = 0x97 +0x00F4 = 0x99 +0x00F5 = 0x9B +0x00F6 = 0x9A +0x00F7 = 0xD6 +0x00F8 = 0xBF +0x00F9 = 0x9D +0x00FA = 0x9C +0x00FB = 0x9E +0x00FC = 0x9F +0x00FD = 0xE0 +0x00FE = 0xDF +0x00FF = 0xD8 +0x0131 = 0xF5 +0x0152 = 0xCE +0x0153 = 0xCF +0x0178 = 0xD9 +0x0192 = 0xC4 +0x02C6 = 0xF6 +0x02C7 = 0xFF +0x02D8 = 0xF9 +0x02D9 = 0xFA +0x02DA = 0xFB +0x02DB = 0xFE +0x02DC = 0xF7 +0x02DD = 0xFD +0x03A9 = 0xBD +0x03C0 = 0xB9 +0x2013 = 0xD0 +0x2014 = 0xD1 +0x2018 = 0xD4 +0x2019 = 0xD5 +0x201A = 0xE2 +0x201C = 0xD2 +0x201D = 0xD3 +0x201E = 0xE3 +0x2022 = 0xA5 +0x2026 = 0xC9 +0x2030 = 0xE4 +0x2044 = 0xDA +0x20AC = 0xDB +0x2122 = 0xAA +0x2202 = 0xB6 +0x2206 = 0xC6 +0x220F = 0xB8 +0x2211 = 0xB7 +0x221A = 0xC3 +0x221E = 0xB0 +0x222B = 0xBA +0x2248 = 0xC5 +0x2260 = 0xAD +0x2264 = 0xB2 +0x2265 = 0xB3 +0x25CA = 0xD7 +0xF8FF = 0xF0 +END_MAP diff --git a/share/i18n/csmapper/APPLE/UCS%INUIT.src b/share/i18n/csmapper/APPLE/UCS%INUIT.src new file mode 100644 index 0000000..e0fb8e3 --- /dev/null +++ b/share/i18n/csmapper/APPLE/UCS%INUIT.src @@ -0,0 +1,242 @@ +# $FreeBSD$ +# $NetBSD: UCS%INUIT.src,v 1.2 2006/04/08 15:47:40 tnozaki Exp $ + +TYPE ROWCOL +NAME UCS/INUIT +SRC_ZONE 0x0000-0x2122 +OOB_MODE INVALID +DST_INVALID 0x100 +DST_UNIT_BITS 16 + +BEGIN_MAP +# +# This mapping data is made from the mapping data provided by Unicode, Inc. +# Original notice: +# +#======================================================================= +# File name: INUIT.TXT +# +# Contents: Map (external version) from Mac OS Inuit +# character set to Unicode 3.0 and later +# +# Contacts: charsets@apple.com, everson@evertype.com +# +# Changes: +# +# c01 2005-Apr-01 First posted version. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Inuit code (in hex as 0xNN) +# Column #2 is the corresponding Unicode (in hex as 0xNNNN) +# Column #3 is a comment containing the Unicode name +# +# The entries are in Mac OS Inuit code order. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Inuit character set uses the standard control characters +# at 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Inuit (partly from Michael Everson): +# ---------------------------------------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# This character set was developed by Michael Everson of Everson +# Typography (everson@evertype.com) and was used for the Inuktitut +# localizations of Mac OS, as well as for the Inuktitut utilities +# package from Everson Typography. Note that while Apple authorized +# the Inuktitut localization mentioned above, it was not shipped with +# Apple hardware, and was not otherwise supported by Apple. Fonts +# conforming to the Mac OS Inuit character set are available from +# Everson Typography (http://www.evertype.com/software/apple/). +# Information about the use of this character set is available at +# http://www.evertype.com/standards/iu/. +# +# The Mac OS Inuit character set shares the script code smEthiopic +# (28) with the Ethiopic encoding. To determine if the Inuktitut +# encoding is being used, you must also check if the system region +# code is 78, verNunavut. +# +# The Mac OS Inuit character set includes the full syllabic letter +# repertoire required for Inuktitut; it is a subset of the Unified +# Canadian Aboriginal Syllabics set encoded in Unicode. The encoding +# is InuitSCII, designed by Doug Hitch for the Government of the +# Northwest Territories. +# +# The Mac OS Inuit character set also includes a number of characters +# that were needed for the classic Mac OS user interface and +# localization (e.g. ellipsis, bullet, copyright sign). All of the +# characters in Mac OS Inuit that are also in the Mac OS Roman +# encoding are at the same code point in both; this improves +# application compatibility. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +################## +0x0000 - 0x007E = 0x00 - +0x00A0 = 0xCA +0x00A9 = 0xA9 +0x00AE = 0xA8 +0x00B0 = 0xA1 +0x00B6 = 0xA6 +0x0141 = 0xFE +0x0142 = 0xFF +0x1403 = 0x80 +0x1404 = 0x81 +0x1405 = 0x82 +0x1406 = 0x83 +0x140A = 0x84 +0x140B = 0x85 +0x1431 = 0x86 +0x1432 = 0x87 +0x1433 = 0x88 +0x1434 = 0x89 +0x1438 = 0x8A +0x1439 = 0x8B +0x1449 = 0x8C +0x144E = 0x8D +0x144F = 0x8E +0x1450 = 0x8F +0x1451 = 0x90 +0x1455 = 0x91 +0x1456 = 0x92 +0x1466 = 0x93 +0x146D = 0x94 +0x146E = 0x95 +0x146F = 0x96 +0x1470 = 0x97 +0x1472 = 0x98 +0x1473 = 0x99 +0x1483 = 0x9A +0x148B = 0x9B +0x148C = 0x9C +0x148D = 0x9D +0x148E = 0x9E +0x1490 = 0x9F +0x1491 = 0xA0 +0x14A1 = 0xA2 +0x14A5 = 0xA3 +0x14A6 = 0xA4 +0x14A7 = 0xA7 +0x14A8 = 0xAB +0x14AA = 0xAC +0x14AB = 0xAD +0x14BB = 0xAE +0x14C2 = 0xAF +0x14C3 = 0xB0 +0x14C4 = 0xB1 +0x14C5 = 0xB2 +0x14C7 = 0xB3 +0x14C8 = 0xB4 +0x14D0 = 0xB5 +0x14D5 = 0xBD +0x14D6 = 0xBE +0x14D7 = 0xBF +0x14D8 = 0xC0 +0x14DA = 0xC1 +0x14DB = 0xC2 +0x14EA = 0xC3 +0x14EF = 0xB6 +0x14F0 = 0xB7 +0x14F1 = 0xB8 +0x14F2 = 0xB9 +0x14F4 = 0xBA +0x14F5 = 0xBB +0x1505 = 0xBC +0x1528 = 0xC4 +0x1529 = 0xC5 +0x152A = 0xC6 +0x152B = 0xC7 +0x152D = 0xC8 +0x152E = 0xCB +0x153E = 0xCC +0x1546 = 0xDA +0x1547 = 0xDB +0x1548 = 0xDC +0x1549 = 0xDD +0x154B = 0xDE +0x154C = 0xDF +0x1550 = 0xE0 +0x1555 = 0xCD +0x1556 = 0xCE +0x1557 = 0xCF +0x1558 = 0xD6 +0x1559 = 0xD7 +0x155A = 0xD8 +0x155D = 0xD9 +0x157C = 0xFD +0x157F = 0xE1 +0x1580 = 0xE2 +0x1581 = 0xE3 +0x1582 = 0xE4 +0x1583 = 0xE5 +0x1584 = 0xE6 +0x1585 = 0xE7 +0x158F = 0xE8 +0x1590 = 0xE9 +0x1591 = 0xEA +0x1592 = 0xEB +0x1593 = 0xEC +0x1594 = 0xED +0x1595 = 0xEE +0x1596 = 0xF5 +0x15A0 = 0xF6 +0x15A1 = 0xF7 +0x15A2 = 0xF8 +0x15A3 = 0xF9 +0x15A4 = 0xFA +0x15A5 = 0xFB +0x15A6 = 0xFC +0x1671 = 0xEF +0x1672 = 0xF0 +0x1673 = 0xF1 +0x1674 = 0xF2 +0x1675 = 0xF3 +0x1676 = 0xF4 +0x2013 = 0xD0 +0x2014 = 0xD1 +0x2018 = 0xD4 +0x2019 = 0xD5 +0x201C = 0xD2 +0x201D = 0xD3 +0x2022 = 0xA5 +0x2026 = 0xC9 +0x2122 = 0xAA +END_MAP diff --git a/share/i18n/csmapper/APPLE/UCS%KEYBOARD.src b/share/i18n/csmapper/APPLE/UCS%KEYBOARD.src new file mode 100644 index 0000000..ef6727f --- /dev/null +++ b/share/i18n/csmapper/APPLE/UCS%KEYBOARD.src @@ -0,0 +1,234 @@ +# $FreeBSD$ + +TYPE ROWCOL +NAME UCS/KEYBOARD +SRC_ZONE 0x0000-0xF8FF +OOB_MODE INVALID +DST_INVALID 0x100 +DST_UNIT_BITS 16 +#======================================================================= +# File name: KEYBOARD.TXT +# +# Contents: Map (external version) from Mac OS Keyboard +# character set to Unicode 4.0 and later. +# +# Copyright: (c) 2001-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Change mappings for 0x09, 0x0F, 0x8C; add +# Mac OS X-only mappings for 0x8D-9x8F. +# Update header comments, including +# clarification of Mac OS X usage. Matches +# internal xml <c1.2> and Text Encoding +# Converter 2.0. +# b1,c1 2002-Dec-19 First version. Matches internal utom<b6>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Keyboard code (in hex as 0xNN) +# Column #2 is the corresponding Unicode or Unicode sequence +# (in hex as 0xNNNN or 0xNNNN+0xNNNN, etc.). +# Column #3 is a comment containing the Unicode name. +# In some cases an additional comment follows the Unicode name. +# +# The entries are in Mac OS Keyboard code order. +# +# Some of these mappings require the use of corporate characters. +# See the file "CORPCHAR.TXT" and notes below. +# +# The Mac OS Keyboard character set uses the ranges normally set aside +# for controls, so those ranges are present in this table. +# +# Notes on Mac OS Keyboard: +# ------------------------- +# +# This is the encoding for the legacy font named ".Keyboard". Before +# Mac OS X, this font was used by the user-interface system to display +# glyphs for special keys on the keyboard. In Mac OS X, that font is +# not present and this mapping is not associated with a font; it is +# only used as a way to map from a set of Menu Manager constants to +# associated Unicode sequences. As such, new mappings added for Mac OS +# X only may be one-way mappings: From the Keyboard glyph "encoding" +# to Unicode, but not back. +# +# The Mac OS Keyboard encoding shares the script code smRoman +# (0) with the Mac OS Roman encoding. To determine if the Keyboard +# encoding is being used in Mac OS 8 or Mac OS 9, you must check if +# the font name is ".Keyboard". +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# The goals in the mappings provided here are: +# - For mappings used in Mac OS 8 and Mac OS 9, ensure roundtrip +# mapping from every character in the Mac OS Keyboard character set +# to Unicode and back. This consideration does not apply to mappings +# added for Mac OS X only (noted below). +# - Use standard Unicode characters as much as possible, to +# maximize interchangeability of the resulting Unicode text. +# Whenever possible, avoid having content carried by private-use +# characters. +# +# Some of the characters in the Mac OS Keyboard character set do not +# correspond to distinct, single Unicode characters. To map these +# and satisfy both goals above, we employ various strategies. +# +# a) If possible, use private use characters in combination with +# standard Unicode characters to mark variants of the standard +# Unicode character. +# +# Apple has defined a block of 32 corporate characters as "transcoding +# hints." These are used in combination with standard Unicode +# characters to force them to be treated in a special way for mapping +# to other encodings; they have no other effect. Sixteen of these +# transcoding hints are "grouping hints" - they indicate that the next +# 2-4 Unicode characters should be treated as a single entity for +# transcoding. The other sixteen transcoding hints are "variant tags" +# - they are like combining characters, and can follow a standard +# Unicode (or a sequence consisting of a base character and other +# combining characters) to cause it to be treated in a special way for +# transcoding. These always terminate a combining-character sequence. +# +# The transcoding coding hints used in this mapping table are two +# grouping tags, 0xF860-61, and one variant tag, 0xF87F. Since these +# are combined with standard Unicode characters, some characters in +# the Mac OS Keyboard character set map to a sequence of two to four +# Unicodes instead of a single Unicode character. +# +# For example, the Mac OS Keyboard character at 0x6F, representing the +# F1 key, is mapped to Unicode using the grouping tag F860 (group next +# two) followed by U+0046 (LATIN CAPITAL LETTER F) and U+0031 (DIGIT +# ONE). +# +# b) Otherwise, use private use characters by themselves to map Mac OS +# Keyboard characters which have no relationship to any standard +# Unicode character. +# +# The following additional corporate zone Unicode characters are +# used for this purpose here: +# +# 0xF802 Lower left pencil +# 0xF803 Contextual menu key symbol +# 0xF8FF Apple logo +# +# NOTE: The graphic image associated with the Apple logo character +# is not authorized for use without permission of Apple, and +# unauthorized use might constitute trademark infringement. +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version c01 to version c02: +# +# - Mapping for 0x09 changed from 0x0009 (wrong) to 0x2423 +# - Mapping for 0x0F changed from 0x270E (wrong) to 0xF802 +# - Mapping for 0x8C changed from 0xF804 to 0x23CF (Unicode 4.0) +# - Add Mac OS X-only mappings for 0x8D-0x8F +# +################## +BEGIN_MAP +0x0000 = 0x00 +0x0008 = 0x08 +0x000D = 0x0D +0x0020 = 0x20 +0x0030 = 0x30 +0x0031 = 0x31 +0x0032 = 0x32 +0x0033 = 0x33 +0x0034 = 0x34 +0x0035 = 0x35 +0x0036 = 0x36 +0x0037 = 0x37 +0x0038 = 0x38 +0x0039 = 0x39 +#0x003F+0x20DD = 0x67 +0x0046 = 0x46 +0x2190 = 0x64 +0x2191 = 0x68 +0x2192 = 0x65 +0x2193 = 0x6A +0x2196 = 0x66 +0x2198 = 0x69 +0x21A9 = 0x0B +0x21AA = 0x0C +0x21DE = 0x62 +0x21DF = 0x6B +0x21E0 = 0x18 +0x21E1 = 0x19 +0x21E2 = 0x1A +0x21E3 = 0x10 +0x21E4 = 0x03 +0x21E5 = 0x02 +0x21E7 = 0x05 +0x21EA = 0x63 +0x2303 = 0x06 +0x2318 = 0x11 +0x2324 = 0x04 +0x2325 = 0x07 +0x2326 = 0x0A +0x2327 = 0x1C +0x232B = 0x17 +0x2387 = 0x8B +0x2388 = 0x8A +0x238B = 0x1B +0x23CF = 0x8C +0x2423 = 0x09 +0x2423 = 0x61 +0x25C6 = 0x13 +0x2713 = 0x12 +#0x2758+0x20DD = 0x6E +#0x304B+0x306A = 0x8E +#0x82F1+0x6570 = 0x8D +0xF802 = 0x0F +0xF803 = 0x6D +#0xF860+0x0046+0x0031 = 0x6F +#0xF860+0x0046+0x0032 = 0x70 +#0xF860+0x0046+0x0033 = 0x71 +#0xF860+0x0046+0x0034 = 0x72 +#0xF860+0x0046+0x0035 = 0x73 +#0xF860+0x0046+0x0036 = 0x74 +#0xF860+0x0046+0x0037 = 0x75 +#0xF860+0x0046+0x0038 = 0x76 +#0xF860+0x0046+0x0039 = 0x77 +#0xF861+0x0046+0x0031+0x0030 = 0x78 +#0xF861+0x0046+0x0031+0x0031 = 0x79 +#0xF861+0x0046+0x0031+0x0032 = 0x7A +#0xF861+0x0046+0x0031+0x0033 = 0x87 +#0xF861+0x0046+0x0031+0x0034 = 0x88 +#0xF861+0x0046+0x0031+0x0035 = 0x89 +#0xF861+0x0046+0x0031+0x0036 = 0x8F +0xF8FF = 0x14 +#0xF8FF+0xF87F = 0x6C +END_MAP diff --git a/share/i18n/csmapper/APPLE/UCS%ROMAN.src b/share/i18n/csmapper/APPLE/UCS%ROMAN.src new file mode 100644 index 0000000..81f20ee --- /dev/null +++ b/share/i18n/csmapper/APPLE/UCS%ROMAN.src @@ -0,0 +1,290 @@ +# $FreeBSD$ +# $NetBSD: UCS%ROMAN.src,v 1.2 2006/04/08 15:47:40 tnozaki Exp $ + +TYPE ROWCOL +NAME UCS/ROMAN +SRC_ZONE 0x0000-0xFB02 +OOB_MODE INVALID +DST_INVALID 0x100 +DST_UNIT_BITS 16 + +BEGIN_MAP +# +# This mapping data is made from the mapping data provided by Unicode, Inc. +# Original notice: +# +#======================================================================= +# File name: ROMAN.TXT +# +# Contents: Map (external version) from Mac OS Roman +# character set to Unicode 2.1 and later. +# +# Copyright: (c) 1994-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Update header comments. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# b4,c1 2002-Dec-19 Update URLs, notes. Matches internal +# utom<b5>. +# b03 1999-Sep-22 Update contact e-mail address. Matches +# internal utom<b4>, ufrm<b3>, and Text +# Encoding Converter version 1.5. +# b02 1998-Aug-18 Encoding changed for Mac OS 8.5; change +# mapping of 0xDB from CURRENCY SIGN to +# EURO SIGN. Matches internal utom<b3>, +# ufrm<b3>. +# n08 1998-Feb-05 Minor update to header comments +# n06 1997-Dec-14 Add warning about future changes to 0xDB +# from CURRENCY SIGN to EURO SIGN. Clarify +# some header information +# n04 1997-Dec-01 Update to match internal utom<n3>, ufrm<n22>: +# Change standard mapping for 0xBD from U+2126 +# to its canonical decomposition, U+03A9. +# n03 1995-Apr-15 First version (after fixing some typos). +# Matches internal ufrm<n9>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Roman code (in hex as 0xNN) +# Column #2 is the corresponding Unicode (in hex as 0xNNNN) +# Column #3 is a comment containing the Unicode name +# +# The entries are in Mac OS Roman code order. +# +# One of these mappings requires the use of a corporate character. +# See the file "CORPCHAR.TXT" and notes below. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Roman character set uses the standard control characters at +# 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Roman: +# ---------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported directly in programming +# interfaces for QuickDraw Text, the Script Manager, and related +# Text Utilities. For other purposes it is supported via transcoding +# to and from Unicode. +# +# This character set is used for at least the following Mac OS +# localizations: U.S., British, Canadian French, French, Swiss +# French, German, Swiss German, Italian, Swiss Italian, Dutch, +# Swedish, Norwegian, Danish, Finnish, Spanish, Catalan, +# Portuguese, Brazilian, and the default International system. +# +# Variants of Mac OS Roman are used for Croatian, Icelandic, +# Turkish, Romanian, and other encodings. Separate mapping tables +# are available for these encodings. +# +# Before Mac OS 8.5, code point 0xDB was CURRENCY SIGN, and was +# mapped to U+00A4. In Mac OS 8.5 and later versions, code point +# 0xDB is changed to EURO SIGN and maps to U+20AC; the standard +# Apple fonts are updated for Mac OS 8.5 to reflect this. There is +# a "currency sign" variant of the Mac OS Roman encoding that still +# maps 0xDB to U+00A4; this can be used for older fonts. +# +# Before Mac OS 8.5, the ROM bitmap versions of the fonts Chicago, +# New York, Geneva, and Monaco did not implement the full Mac OS +# Roman character set; they only supported character codes up to +# 0xD8. The TrueType versions of these fonts have always implemented +# the full character set, as with the bitmap and TrueType versions +# of the other standard Roman fonts. +# +# In all Mac OS encodings, fonts such as Chicago which are used +# as "system" fonts (for menus, dialogs, etc.) have four glyphs +# at code points 0x11-0x14 for transient use by the Menu Manager. +# These glyphs are not intended as characters for use in normal +# text, and the associated code points are not generally +# interpreted as associated with these glyphs; they are usually +# interpreted (if at all) as the control codes DC1-DC4. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# The following corporate zone Unicode character is used in this +# mapping: +# +# 0xF8FF Apple logo +# +# NOTE: The graphic image associated with the Apple logo character +# is not authorized for use without permission of Apple, and +# unauthorized use might constitute trademark infringement. +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version n08 to version b02: +# +# - Encoding changed for Mac OS 8.5; change mapping of 0xDB from +# CURRENCY SIGN (U+00A4) to EURO SIGN (U+20AC). +# +# Changes from version n03 to version n04: +# +# - Change mapping of 0xBD from U+2126 to its canonical +# decomposition, U+03A9. +# +################## +0x0000 - 0x007E = 0x00 - +0x00A0 = 0xCA +0x00A1 = 0xC1 +0x00A2 = 0xA2 +0x00A3 = 0xA3 +0x00A5 = 0xB4 +0x00A7 = 0xA4 +0x00A8 = 0xAC +0x00A9 = 0xA9 +0x00AA = 0xBB +0x00AB = 0xC7 +0x00AC = 0xC2 +0x00AE = 0xA8 +0x00AF = 0xF8 +0x00B0 = 0xA1 +0x00B1 = 0xB1 +0x00B4 = 0xAB +0x00B5 = 0xB5 +0x00B6 = 0xA6 +0x00B7 = 0xE1 +0x00B8 = 0xFC +0x00BA = 0xBC +0x00BB = 0xC8 +0x00BF = 0xC0 +0x00C0 = 0xCB +0x00C1 = 0xE7 +0x00C2 = 0xE5 +0x00C3 = 0xCC +0x00C4 = 0x80 +0x00C5 = 0x81 +0x00C6 = 0xAE +0x00C7 = 0x82 +0x00C8 = 0xE9 +0x00C9 = 0x83 +0x00CA = 0xE6 +0x00CB = 0xE8 +0x00CC = 0xED +0x00CD = 0xEA +0x00CE = 0xEB +0x00CF = 0xEC +0x00D1 = 0x84 +0x00D2 = 0xF1 +0x00D3 = 0xEE +0x00D4 = 0xEF +0x00D5 = 0xCD +0x00D6 = 0x85 +0x00D8 = 0xAF +0x00D9 = 0xF4 +0x00DA = 0xF2 +0x00DB = 0xF3 +0x00DC = 0x86 +0x00DF = 0xA7 +0x00E0 = 0x88 +0x00E1 = 0x87 +0x00E2 = 0x89 +0x00E3 = 0x8B +0x00E4 = 0x8A +0x00E5 = 0x8C +0x00E6 = 0xBE +0x00E7 = 0x8D +0x00E8 = 0x8F +0x00E9 = 0x8E +0x00EA = 0x90 +0x00EB = 0x91 +0x00EC = 0x93 +0x00ED = 0x92 +0x00EE = 0x94 +0x00EF = 0x95 +0x00F1 = 0x96 +0x00F2 = 0x98 +0x00F3 = 0x97 +0x00F4 = 0x99 +0x00F5 = 0x9B +0x00F6 = 0x9A +0x00F7 = 0xD6 +0x00F8 = 0xBF +0x00F9 = 0x9D +0x00FA = 0x9C +0x00FB = 0x9E +0x00FC = 0x9F +0x00FF = 0xD8 +0x0131 = 0xF5 +0x0152 = 0xCE +0x0153 = 0xCF +0x0178 = 0xD9 +0x0192 = 0xC4 +0x02C6 = 0xF6 +0x02C7 = 0xFF +0x02D8 = 0xF9 +0x02D9 = 0xFA +0x02DA = 0xFB +0x02DB = 0xFE +0x02DC = 0xF7 +0x02DD = 0xFD +0x03A9 = 0xBD +0x03C0 = 0xB9 +0x2013 = 0xD0 +0x2014 = 0xD1 +0x2018 = 0xD4 +0x2019 = 0xD5 +0x201A = 0xE2 +0x201C = 0xD2 +0x201D = 0xD3 +0x201E = 0xE3 +0x2020 = 0xA0 +0x2021 = 0xE0 +0x2022 = 0xA5 +0x2026 = 0xC9 +0x2030 = 0xE4 +0x2039 = 0xDC +0x203A = 0xDD +0x2044 = 0xDA +0x20AC = 0xDB +0x2122 = 0xAA +0x2202 = 0xB6 +0x2206 = 0xC6 +0x220F = 0xB8 +0x2211 = 0xB7 +0x221A = 0xC3 +0x221E = 0xB0 +0x222B = 0xBA +0x2248 = 0xC5 +0x2260 = 0xAD +0x2264 = 0xB2 +0x2265 = 0xB3 +0x25CA = 0xD7 +0xF8FF = 0xF0 +0xFB01 = 0xDE +0xFB02 = 0xDF +END_MAP diff --git a/share/i18n/csmapper/APPLE/UCS%ROMANIAN.src b/share/i18n/csmapper/APPLE/UCS%ROMANIAN.src new file mode 100644 index 0000000..8239684 --- /dev/null +++ b/share/i18n/csmapper/APPLE/UCS%ROMANIAN.src @@ -0,0 +1,285 @@ +# $FreeBSD$ +# $NetBSD: UCS%ROMANIAN.src,v 1.2 2006/04/08 15:47:40 tnozaki Exp $ + +TYPE ROWCOL +NAME UCS/ROMANIAN +SRC_ZONE 0x0000-0xF8FF +OOB_MODE INVALID +DST_INVALID 0x100 +DST_UNIT_BITS 16 + +BEGIN_MAP +# +# This mapping data is made from the mapping data provided by Unicode, Inc. +# Original notice: +# +#======================================================================= +# File name: ROMANIAN.TXT +# +# Contents: Map (external version) from Mac OS Romanian +# character set to Unicode 3.0 and later. +# +# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Update header comments. Matches internal xml +# <c1.2> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Update mappings for 0xAF, 0xBF, 0xDE, 0xDF +# to use new composed characters added in +# Unicode 3.0. Update URLs, notes. Matches +# internal utom<b3>. +# b02 1999-Sep-22 Encoding changed for Mac OS 8.5; change +# mapping of 0xDB from CURRENCY SIGN to EURO +# SIGN. Update contact e-mail address. Matches +# internal utom<b2>, ufrm<b2>, and Text +# Encoding Converter version 1.5. +# n05 1998-Feb-05 Minor update to header comments +# n03 1997-Dec-14 Update to match internal utom<n5>, ufrm<n16>: +# Change standard mapping for 0xBD from U+2126 +# to its canonical decomposition, U+03A9. +# Change mapping of 0xAF,0xBF,0xDE,0xDF from +# composed S/T WITH CEDILLA to S/T with +# COMBINING COMMA BELOW (to match our +# decomposition mappings). +# n02 1995-Apr-15 First version (after fixing some typos). +# Matches internal ufrm<n4>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Romanian code (in hex as 0xNN) +# Column #2 is the corresponding Unicode (in hex as 0xNNNN) +# Column #3 is a comment containing the Unicode name +# +# The entries are in Mac OS Romanian code order. +# +# One of these mappings requires the use of a corporate character. +# See the file "CORPCHAR.TXT" and notes below. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Romanian character set uses the standard control characters at +# 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Romanian: +# ------------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# Mac OS Romanian is used only for Romanian. +# +# The Mac OS Romanian encoding shares the script code smRoman +# (0) with the standard Mac OS Roman encoding. To determine if +# the Romanian encoding is being used, you must also check if the +# system region code is 39, verRomania. +# +# This character set is a variant of standard Mac OS Roman, adding +# upper and lower A breve, S comma below, and T comma below. It +# has 6 code point differences from standard Mac OS Roman. +# +# Before Mac OS 8.5, code point 0xDB was CURRENCY SIGN, and was +# mapped to U+00A4. In Mac OS 8.5 and later versions, code point +# 0xDB is changed to EURO SIGN and maps to U+20AC; the standard +# Apple fonts are updated for Mac OS 8.5 to reflect this. There is +# a "currency sign" variant of the Mac OS Romanian encoding that +# still maps 0xDB to U+00A4; this can be used for older fonts. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# The following corporate zone Unicode character is used in this +# mapping: +# +# 0xF8FF Apple logo +# +# NOTE: The graphic image associated with the Apple logo character +# is not authorized for use without permission of Apple, and +# unauthorized use might constitute trademark infringement. +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version b02 to version b03/c01: +# +# - Update the mappings for 0xAF, 0xBF, 0xDE, 0xDF to use new +# composed Unicode characters 0x0218-0x021B added in Unicode 3.0; +# the previous mappings were to the equivalent decomposition +# sequences. +# +# Changes from version n05 to version b02: +# +# - Encoding changed for Mac OS 8.5; change mapping of 0xDB from +# CURRENCY SIGN (U+00A4) to EURO SIGN (U+20AC). +# +# Changes from version n02 to version n03: +# +# - Change mapping of 0xBD from U+2126 to its canonical +# decomposition, U+03A9. +# - Change mapping of 0xAF,0xBF,0xDE,0xDF from composed S or T +# WITH CEDILLA to S or T with COMBINING COMMA BELOW (to match +# our decomposition mappings). +# +################## +0x0000 - 0x007F = 0x00 - +0x00A0 = 0xCA +0x00A1 = 0xC1 +0x00A2 = 0xA2 +0x00A3 = 0xA3 +0x00A5 = 0xB4 +0x00A7 = 0xA4 +0x00A8 = 0xAC +0x00A9 = 0xA9 +0x00AA = 0xBB +0x00AB = 0xC7 +0x00AC = 0xC2 +0x00AE = 0xA8 +0x00AF = 0xF8 +0x00B0 = 0xA1 +0x00B1 = 0xB1 +0x00B4 = 0xAB +0x00B5 = 0xB5 +0x00B6 = 0xA6 +0x00B7 = 0xE1 +0x00B8 = 0xFC +0x00BA = 0xBC +0x00BB = 0xC8 +0x00BF = 0xC0 +0x00C0 = 0xCB +0x00C1 = 0xE7 +0x00C2 = 0xE5 +0x00C3 = 0xCC +0x00C4 = 0x80 +0x00C5 = 0x81 +0x00C7 = 0x82 +0x00C8 = 0xE9 +0x00C9 = 0x83 +0x00CA = 0xE6 +0x00CB = 0xE8 +0x00CC = 0xED +0x00CD = 0xEA +0x00CE = 0xEB +0x00CF = 0xEC +0x00D1 = 0x84 +0x00D2 = 0xF1 +0x00D3 = 0xEE +0x00D4 = 0xEF +0x00D5 = 0xCD +0x00D6 = 0x85 +0x00D9 = 0xF4 +0x00DA = 0xF2 +0x00DB = 0xF3 +0x00DC = 0x86 +0x00DF = 0xA7 +0x00E0 = 0x88 +0x00E1 = 0x87 +0x00E2 = 0x89 +0x00E3 = 0x8B +0x00E4 = 0x8A +0x00E5 = 0x8C +0x00E7 = 0x8D +0x00E8 = 0x8F +0x00E9 = 0x8E +0x00EA = 0x90 +0x00EB = 0x91 +0x00EC = 0x93 +0x00ED = 0x92 +0x00EE = 0x94 +0x00EF = 0x95 +0x00F1 = 0x96 +0x00F2 = 0x98 +0x00F3 = 0x97 +0x00F4 = 0x99 +0x00F5 = 0x9B +0x00F6 = 0x9A +0x00F7 = 0xD6 +0x00F9 = 0x9D +0x00FA = 0x9C +0x00FB = 0x9E +0x00FC = 0x9F +0x00FF = 0xD8 +0x0102 = 0xAE +0x0103 = 0xBE +0x0131 = 0xF5 +0x0152 = 0xCE +0x0153 = 0xCF +0x0178 = 0xD9 +0x0192 = 0xC4 +0x0218 = 0xAF +0x0219 = 0xBF +0x021A = 0xDE +0x021B = 0xDF +0x02C6 = 0xF6 +0x02C7 = 0xFF +0x02D8 = 0xF9 +0x02D9 = 0xFA +0x02DA = 0xFB +0x02DB = 0xFE +0x02DC = 0xF7 +0x02DD = 0xFD +0x03A9 = 0xBD +0x03C0 = 0xB9 +0x2013 = 0xD0 +0x2014 = 0xD1 +0x2018 = 0xD4 +0x2019 = 0xD5 +0x201A = 0xE2 +0x201C = 0xD2 +0x201D = 0xD3 +0x201E = 0xE3 +0x2020 = 0xA0 +0x2021 = 0xE0 +0x2022 = 0xA5 +0x2026 = 0xC9 +0x2030 = 0xE4 +0x2039 = 0xDC +0x203A = 0xDD +0x2044 = 0xDA +0x20AC = 0xDB +0x2122 = 0xAA +0x2202 = 0xB6 +0x2206 = 0xC6 +0x220F = 0xB8 +0x2211 = 0xB7 +0x221A = 0xC3 +0x221E = 0xB0 +0x222B = 0xBA +0x2248 = 0xC5 +0x2260 = 0xAD +0x2264 = 0xB2 +0x2265 = 0xB3 +0x25CA = 0xD7 +0xF8FF = 0xF0 +END_MAP diff --git a/share/i18n/csmapper/APPLE/UCS%SYMBOL.src b/share/i18n/csmapper/APPLE/UCS%SYMBOL.src new file mode 100644 index 0000000..0297f76 --- /dev/null +++ b/share/i18n/csmapper/APPLE/UCS%SYMBOL.src @@ -0,0 +1,383 @@ +# $FreeBSD$ + +TYPE ROWCOL +NAME UCS/SYMBOL +SRC_ZONE 0x0000-0xF8FF +OOB_MODE INVALID +DST_INVALID 0x100 +DST_UNIT_BITS 16 + +BEGIN_MAP +#======================================================================= +# File name: SYMBOL.TXT +# +# Contents: Map (external version) from Mac OS Symbol +# character set to Unicode 4.0 and later. +# +# Copyright: (c) 1994-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Change mappings for 0xBD, 0xE0. Update +# header comments. Matches internal xml <c1.2> +# and Text Encoding Converter 2.0. +# b4,c1 2002-Dec-19 Update mappings for encoded glyph fragments +# 0xBE, 0xE6-EF, 0xF4, 0xF6-FE to use new +# Unicode 3.2 characters instead of sequences +# involving corporate-use characters. Update +# URLs, notes. Matches internal utom<b4>. +# b03 1999-Sep-22 Update contact e-mail address. Matches +# internal utom<b3>, ufrm<b3>, and Text +# Encoding Converter version 1.5. +# b02 1998-Aug-18 Encoding changed for Mac OS 8.5; add new +# mapping from 0xA0 to EURO SIGN. Matches +# internal utom<b3>, ufrm<b3>. +# n05 1998-Feb-05 Update to match internal utom<n5>, ufrm<n15> +# and Text Encoding Converter version 1.3: +# Use standard Unicodes plus transcoding hints +# instead of single corporate characters, also +# change mappings for 0xE1 & 0xF1 from U+2329 +# & U+232A to their canonical decompositions; +# see details below. Also update header +# comments to new format. +# n03 1995-Apr-15 First version (after fixing some typos). +# Matches internal ufrm<n4>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Symbol code (in hex as 0xNN) +# Column #2 is the corresponding Unicode or Unicode sequence +# (in hex as 0xNNNN or 0xNNNN+0xNNNN). +# Column #3 is a comment containing the Unicode name. +# In some cases an additional comment follows the Unicode name. +# +# The entries are in Mac OS Symbol code order. +# +# Some of these mappings require the use of corporate characters. +# See the file "CORPCHAR.TXT" and notes below. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Symbol character set uses the standard control characters +# at 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Symbol: +# ----------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported directly in programming +# interfaces for QuickDraw Text, the Script Manager, and related +# Text Utilities. For other purposes it is supported via transcoding +# to and from Unicode. +# +# The Mac OS Symbol encoding shares the script code smRoman +# (0) with the Mac OS Roman encoding. To determine if the Symbol +# encoding is being used, you must check if the font name is +# "Symbol". +# +# Before Mac OS 8.5, code point 0xA0 was unused. In Mac OS 8.5 +# and later versions, code point 0xA0 is EURO SIGN and maps to +# U+20AC (the Symbol font is updated for Mac OS 8.5 to reflect +# this). +# +# The layout of the Mac OS Symbol character set is identical to +# the layout of the Adobe Symbol encoding vector, with the +# addition of the Apple logo character at 0xF0. +# +# This character set encodes a number of glyph fragments. Some are +# used as extenders: 0x60 is used to extend radical signs, 0xBD and +# 0xBE are used to extend vertical and horizontal arrows, etc. In +# addition, there are top, bottom, and center sections for +# parentheses, brackets, integral signs, and other signs that may +# extend vertically for 2 or more lines of normal text. As of +# Unicode 3.2, most of these are now encoded in Unicode; a few are +# not, so these are mapped using corporate-zone Unicode characters +# (see below). +# +# In addition, Symbol separately encodes both serif and sans-serif +# forms for copyright, trademark, and registered signs. Unicode +# encodes only the abstract characters, so one set of these (the +# sans-serif forms) are also mapped using corporate-zone Unicode +# characters (see below). +# +# The following code points are unused, and are not shown here: +# 0x80-0x9F, 0xFF. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# The goals in the mappings provided here are: +# - Ensure roundtrip mapping from every character in the Mac OS +# Symbol character set to Unicode and back +# - Use standard Unicode characters as much as possible, to +# maximize interchangeability of the resulting Unicode text. +# Whenever possible, avoid having content carried by private-use +# characters. +# +# Some of the characters in the Mac OS Symbol character set do not +# correspond to distinct, single Unicode characters. To map these +# and satisfy both goals above, we employ various strategies. +# +# a) If possible, use private use characters in combination with +# standard Unicode characters to mark variants of the standard +# Unicode character. +# +# Apple has defined a block of 32 corporate characters as "transcoding +# hints." These are used in combination with standard Unicode +# characters to force them to be treated in a special way for mapping +# to other encodings; they have no other effect. Sixteen of these +# transcoding hints are "grouping hints" - they indicate that the next +# 2-4 Unicode characters should be treated as a single entity for +# transcoding. The other sixteen transcoding hints are "variant tags" +# - they are like combining characters, and can follow a standard +# Unicode (or a sequence consisting of a base character and other +# combining characters) to cause it to be treated in a special way for +# transcoding. These always terminate a combining-character sequence. +# +# The transcoding coding hint used in this mapping table is the +# variant tag 0xF87F. Since this is combined with standard Unicode +# characters, some characters in the Mac OS Symbol character set map +# to a sequence of two Unicodes instead of a single Unicode character. +# +# For example, the Mac OS Symbol character at 0xE2 is an alternate, +# sans-serif form of the REGISTERED SIGN (the standard mapping is for +# the abstract character at 0xD2, which here has a serif form). So 0xE2 +# is mapped to 0x00AE (REGISTERED SIGN) + 0xF87F (a variant tag). +# +# b) Otherwise, use private use characters by themselves to map +# Mac OS Symbol characters which have no relationship to any standard +# Unicode character. +# +# The following additional corporate zone Unicode characters are +# used for this purpose here: +# +# 0xF8E5 radical extender +# 0xF8FF Apple logo +# +# NOTE: The graphic image associated with the Apple logo character +# is not authorized for use without permission of Apple, and +# unauthorized use might constitute trademark infringement. +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version c01 to version c02: +# +# - Update mappings for 0xBD from 0xF8E6 to 0x23D0 (use new Unicode +# 4.0 char) +# - Correct mapping for 0xE0 from 0x22C4 to 0x25CA +# +# Changes from version b02 to version b03/c01: +# +# - Update mappings for encoded glyph fragments 0xBE, 0xE6-EF, 0xF4, +# 0xF6-FE to use new Unicode 3.2 characters instead of using either +# single corporate-use characters (e.g. 0xBE was mapped to 0xF8E7) or +# sequences combining a standard Unicode character with a transcoding +# hint (e.g. 0xE6 was mapped to 0x0028+0xF870). +# +# Changes from version n05 to version b02: +# +# - Encoding changed for Mac OS 8.5; 0xA0 now maps to 0x20AC, EURO +# SIGN. 0xA0 was unmapped in earlier versions. +# +# Changes from version n03 to version n05: +# +# - Change strict mapping for 0xE1 & 0xF1 from U+2329 & U+232A +# to their canonical decompositions, U+3008 & U+3009. +# +# - Change mapping for the following to use standard Unicode + +# transcoding hint, instead of single corporate-zone +# character: 0xE2-0xE4, 0xE6-0xEE, 0xF4, 0xF6-0xFE. +# +################## + +0x0000 - 0x007F = 0x00 - +0x00A9 = 0xD3 +#0x00A9+0xF87F = 0xE3 +0x00AC = 0xD8 +0x00AE = 0xD2 +#0x00AE+0xF87F = 0xE2 +0x00B0 = 0xB0 +0x00B1 = 0xB1 +0x00D7 = 0xB4 +0x00F7 = 0xB8 +0x0192 = 0xA6 +0x0391 = 0x41 +0x0392 = 0x42 +0x0393 = 0x47 +0x0394 = 0x44 +0x0395 = 0x45 +0x0396 = 0x5A +0x0397 = 0x48 +0x0398 = 0x51 +0x0399 = 0x49 +0x039A = 0x4B +0x039B = 0x4C +0x039C = 0x4D +0x039D = 0x4E +0x039E = 0x58 +0x039F = 0x4F +0x03A0 = 0x50 +0x03A1 = 0x52 +0x03A3 = 0x53 +0x03A4 = 0x54 +0x03A5 = 0x55 +0x03A6 = 0x46 +0x03A7 = 0x43 +0x03A8 = 0x59 +0x03A9 = 0x57 +0x03B1 = 0x61 +0x03B2 = 0x62 +0x03B3 = 0x67 +0x03B4 = 0x64 +0x03B5 = 0x65 +0x03B6 = 0x7A +0x03B7 = 0x68 +0x03B8 = 0x71 +0x03B9 = 0x69 +0x03BA = 0x6B +0x03BB = 0x6C +0x03BC = 0x6D +0x03BD = 0x6E +0x03BE = 0x78 +0x03BF = 0x6F +0x03C0 = 0x70 +0x03C1 = 0x72 +0x03C2 = 0x56 +0x03C3 = 0x73 +0x03C4 = 0x74 +0x03C5 = 0x75 +0x03C6 = 0x66 +0x03C7 = 0x63 +0x03C8 = 0x79 +0x03C9 = 0x77 +0x03D1 = 0x4A +0x03D2 = 0xA1 +0x03D5 = 0x6A +0x03D6 = 0x76 +0x2022 = 0xB7 +0x2026 = 0xBC +0x2032 = 0xA2 +0x2033 = 0xB2 +0x2044 = 0xA4 +0x20AC = 0xA0 +0x2111 = 0xC1 +0x2118 = 0xC3 +0x211C = 0xC2 +0x2122 = 0xD4 +#0x2122+0xF87F = 0xE4 +0x2135 = 0xC0 +0x2190 = 0xAC +0x2191 = 0xAD +0x2192 = 0xAE +0x2193 = 0xAF +0x2194 = 0xAB +0x21B5 = 0xBF +0x21D0 = 0xDC +0x21D1 = 0xDD +0x21D2 = 0xDE +0x21D3 = 0xDF +0x21D4 = 0xDB +0x2200 = 0x22 +0x2202 = 0xB6 +0x2203 = 0x24 +0x2205 = 0xC6 +0x2207 = 0xD1 +0x2208 = 0xCE +0x2209 = 0xCF +0x220D = 0x27 +0x220F = 0xD5 +0x2211 = 0xE5 +0x2212 = 0x2D +0x2217 = 0x2A +0x221A = 0xD6 +0x221D = 0xB5 +0x221E = 0xA5 +0x2220 = 0xD0 +0x2227 = 0xD9 +0x2228 = 0xDA +0x2229 = 0xC7 +0x222A = 0xC8 +0x222B = 0xF2 +0x2234 = 0x5C +0x223C = 0x7E +0x2245 = 0x40 +0x2248 = 0xBB +0x2260 = 0xB9 +0x2261 = 0xBA +0x2264 = 0xA3 +0x2265 = 0xB3 +0x2282 = 0xCC +0x2283 = 0xC9 +0x2284 = 0xCB +0x2286 = 0xCD +0x2287 = 0xCA +0x2295 = 0xC5 +0x2297 = 0xC4 +0x22A5 = 0x5E +0x22C5 = 0xD7 +0x2320 = 0xF3 +0x2321 = 0xF5 +0x239B = 0xE6 +0x239C = 0xE7 +0x239D = 0xE8 +0x239E = 0xF6 +0x239F = 0xF7 +0x23A0 = 0xF8 +0x23A1 = 0xE9 +0x23A2 = 0xEA +0x23A3 = 0xEB +0x23A4 = 0xF9 +0x23A5 = 0xFA +0x23A6 = 0xFB +0x23A7 = 0xEC +0x23A8 = 0xED +0x23A9 = 0xEE +0x23AA = 0xEF +0x23AB = 0xFC +0x23AC = 0xFD +0x23AD = 0xFE +0x23AE = 0xF4 +0x23AF = 0xBE +0x23D0 = 0xBD +0x25CA = 0xE0 +0x2660 = 0xAA +0x2663 = 0xA7 +0x2665 = 0xA9 +0x2666 = 0xA8 +0x3008 = 0xE1 +0x3009 = 0xF1 +0xF8E5 = 0x60 +0xF8FF = 0xF0 +END_MAP diff --git a/share/i18n/csmapper/APPLE/UCS%THAI.src b/share/i18n/csmapper/APPLE/UCS%THAI.src new file mode 100644 index 0000000..086b344 --- /dev/null +++ b/share/i18n/csmapper/APPLE/UCS%THAI.src @@ -0,0 +1,302 @@ +# $FreeBSD$ +# $NetBSD: UCS%THAI.src,v 1.2 2006/04/08 15:47:40 tnozaki Exp $ + +TYPE ROWCOL +NAME UCS/THAI +SRC_ZONE 0x0000-0x2122 +OOB_MODE INVALID +DST_INVALID 0x100 +DST_UNIT_BITS 16 + +BEGIN_MAP +# +# This mapping data is made from the mapping data provided by Unicode, Inc. +# Original notice: +# +#======================================================================= +# File name: THAI.TXT +# +# Contents: Map (external version) from Mac OS Thai +# character set to Unicode 3.2 and later. +# +# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Update header comments. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Update mapping for 0xDB to use new Unicode +# 3.2 WORD JOINER instead of ZWNBSP (BOM). +# Update URLs. Matches internal utom<b3>. +# b02 1999-Sep-22 Update contact e-mail address. Matches +# internal utom<b1>, ufrm<b2>, and Text +# Encoding Converter version 1.5. +# n07 1998-Feb-05 Update to match internal utom<n5>, ufrm<n13> +# and Text Encoding Converter version 1.3: +# Use standard Unicodes plus transcoding hints +# instead of single corporate characters; see +# details below. Also update header comments +# to new format. +# n04 1995-Nov-17 First version (after fixing some typos). +# Matches internal ufrm<n6>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Thai code (in hex as 0xNN) +# Column #2 is the corresponding Unicode or Unicode sequence +# (in hex as 0xNNNN or 0xNNNN+0xNNNN). +# Column #3 is a comment containing the Unicode name +# +# The entries are in Mac OS Thai code order. +# +# Some of these mappings require the use of corporate characters. +# See the file "CORPCHAR.TXT" and notes below. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Thai character set uses the standard control characters at +# 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Thai: +# --------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# Codes 0xA1-0xDA and 0xDF-0xFB are the character set from Thai +# standard TIS 620-2533, except that the following changes are +# made: +# 0xEE is TRADE MARK SIGN (instead of THAI CHARACTER YAMAKKAN) +# 0xFA is REGISTERED SIGN (instead of THAI CHARACTER ANGKHANKHU) +# 0xFB is COPYRIGHT SIGN (instead of THAI CHARACTER KHOMUT) +# +# Codes 0x80-0x82, 0x8D-0x8E, 0x91, 0x9D-0x9E, and 0xDB-0xDE are +# various additional punctuation marks (e.g. curly quotes, +# ellipsis), no-break space, and two special characters "word join" +# and "word break". +# +# Codes 0x83-0x8C, 0x8F, and 0x92-0x9C are for positional variants +# of the upper vowels, tone marks, and other signs at 0xD1, +# 0xD4-0xD7, and 0xE7-0xED. The positional variants would normally +# be considered presentation forms only and not characters. In most +# cases they are not typed directly; they are selected automatically +# at display time by the WorldScript software. However, using the +# Thai-DTP keyboard, the presentation forms can in fact be typed +# directly using dead keys. Thus they must be treated as real +# characters in the Mac OS Thai encoding. They are mapped using +# variant tags; see below. +# +# Several code points are undefined and unused (they cannot be +# typed using any of the Mac OS Thai keyboard layouts): 0x90, 0x9F, +# 0xFC-0xFE. These are not shown in the table below. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# The goals in the Apple mappings provided here are: +# - Ensure roundtrip mapping from every character in the Mac OS Thai +# character set to Unicode and back +# - Use standard Unicode characters as much as possible, to maximize +# interchangeability of the resulting Unicode text. Whenever possible, +# avoid having content carried by private-use characters. +# +# To satisfy both goals, we use private use characters to mark variants +# that are similar to a sequence of one or more standard Unicode +# characters. +# +# Apple has defined a block of 32 corporate characters as "transcoding +# hints." These are used in combination with standard Unicode characters +# to force them to be treated in a special way for mapping to other +# encodings; they have no other effect. Sixteen of these transcoding +# hints are "grouping hints" - they indicate that the next 2-4 Unicode +# characters should be treated as a single entity for transcoding. The +# other sixteen transcoding hints are "variant tags" - they are like +# combining characters, and can follow a standard Unicode (or a sequence +# consisting of a base character and other combining characters) to +# cause it to be treated in a special way for transcoding. These always +# terminate a combining-character sequence. +# +# The transcoding coding hints used in this mapping table are four +# variant tags in the range 0xF873-75. Since these are combined with +# standard Unicode characters, some characters in the Mac OS Thai +# character set map to a sequence of two Unicodes instead of a single +# Unicode character. For example, the Mac OS Thai character at 0x83 is a +# low-left positional variant of THAI CHARACTER MAI EK (the standard +# mapping is for the abstract character at 0xE8). So 0x83 is mapped to +# 0x0E48 (THAI CHARACTER MAI EK) + 0xF875 (a variant tag). +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version b02 to version b03/c01: +# +# - Update mapping for 0xDB to use new Unicode 3.2 character U+2060 +# WORD JOINER instead of U+FEFF ZERO WIDTH NO-BREAK SPACE (BOM) +# +# Changes from version n04 to version n07: +# +# - Changed mappings of the positional variants to use standard +# Unicodes + transcoding hint, instead of using single corporate +# zone characters. This affected the mappings for the following: +# 0x83-08C, 0x8F, 0x92-0x9C +# +# - Just comment out unused code points in the table, instead +# of mapping them to U+FFFD. +# +################## +0x0000 - 0x007E = 0x00 - +0x00A0 = 0xA0 +0x00A9 = 0xFB +0x00AB = 0x80 +0x00AE = 0xFA +0x00BB = 0x81 +0x0E01 = 0xA1 +0x0E02 = 0xA2 +0x0E03 = 0xA3 +0x0E04 = 0xA4 +0x0E05 = 0xA5 +0x0E06 = 0xA6 +0x0E07 = 0xA7 +0x0E08 = 0xA8 +0x0E09 = 0xA9 +0x0E0A = 0xAA +0x0E0B = 0xAB +0x0E0C = 0xAC +0x0E0D = 0xAD +0x0E0E = 0xAE +0x0E0F = 0xAF +0x0E10 = 0xB0 +0x0E11 = 0xB1 +0x0E12 = 0xB2 +0x0E13 = 0xB3 +0x0E14 = 0xB4 +0x0E15 = 0xB5 +0x0E16 = 0xB6 +0x0E17 = 0xB7 +0x0E18 = 0xB8 +0x0E19 = 0xB9 +0x0E1A = 0xBA +0x0E1B = 0xBB +0x0E1C = 0xBC +0x0E1D = 0xBD +0x0E1E = 0xBE +0x0E1F = 0xBF +0x0E20 = 0xC0 +0x0E21 = 0xC1 +0x0E22 = 0xC2 +0x0E23 = 0xC3 +0x0E24 = 0xC4 +0x0E25 = 0xC5 +0x0E26 = 0xC6 +0x0E27 = 0xC7 +0x0E28 = 0xC8 +0x0E29 = 0xC9 +0x0E2A = 0xCA +0x0E2B = 0xCB +0x0E2C = 0xCC +0x0E2D = 0xCD +0x0E2E = 0xCE +0x0E2F = 0xCF +0x0E30 = 0xD0 +0x0E31 = 0x92 +0x0E31 = 0xD1 +0x0E32 = 0xD2 +0x0E33 = 0xD3 +0x0E34 = 0x94 +0x0E34 = 0xD4 +0x0E35 = 0x95 +0x0E35 = 0xD5 +0x0E36 = 0x96 +0x0E36 = 0xD6 +0x0E37 = 0x97 +0x0E37 = 0xD7 +0x0E38 = 0xD8 +0x0E39 = 0xD9 +0x0E3A = 0xDA +0x0E3F = 0xDF +0x0E40 = 0xE0 +0x0E41 = 0xE1 +0x0E42 = 0xE2 +0x0E43 = 0xE3 +0x0E44 = 0xE4 +0x0E45 = 0xE5 +0x0E46 = 0xE6 +0x0E47 = 0x93 +0x0E47 = 0xE7 +0x0E48 = 0x83 +0x0E48 = 0x88 +0x0E48 = 0x98 +0x0E48 = 0xE8 +0x0E49 = 0x84 +0x0E49 = 0x89 +0x0E49 = 0x99 +0x0E49 = 0xE9 +0x0E4A = 0x85 +0x0E4A = 0x8A +0x0E4A = 0x9A +0x0E4A = 0xEA +0x0E4B = 0x86 +0x0E4B = 0x8B +0x0E4B = 0x9B +0x0E4B = 0xEB +0x0E4C = 0x87 +0x0E4C = 0x8C +0x0E4C = 0x9C +0x0E4C = 0xEC +0x0E4D = 0x8F +0x0E4D = 0xED +0x0E4F = 0xEF +0x0E50 = 0xF0 +0x0E51 = 0xF1 +0x0E52 = 0xF2 +0x0E53 = 0xF3 +0x0E54 = 0xF4 +0x0E55 = 0xF5 +0x0E56 = 0xF6 +0x0E57 = 0xF7 +0x0E58 = 0xF8 +0x0E59 = 0xF9 +0x200B = 0xDC +0x2013 = 0xDD +0x2014 = 0xDE +0x2018 = 0x9D +0x2019 = 0x9E +0x201C = 0x8D +0x201D = 0x8E +0x2022 = 0x91 +0x2026 = 0x82 +0x2060 = 0xDB +0x2122 = 0xEE +END_MAP diff --git a/share/i18n/csmapper/APPLE/UCS%TURKISH.src b/share/i18n/csmapper/APPLE/UCS%TURKISH.src new file mode 100644 index 0000000..630abf6 --- /dev/null +++ b/share/i18n/csmapper/APPLE/UCS%TURKISH.src @@ -0,0 +1,261 @@ +# $FreeBSD$ +# $NetBSD: UCS%TURKISH.src,v 1.2 2006/04/08 15:47:40 tnozaki Exp $ + +TYPE ROWCOL +NAME UCS/TURKISH +SRC_ZONE 0x0000-0xF8FF +OOB_MODE INVALID +DST_INVALID 0x100 +DST_UNIT_BITS 16 + +BEGIN_MAP +# +# This mapping data is made from the mapping data provided by Unicode, Inc. +# Original notice: +# +#======================================================================= +# File name: TURKISH.TXT +# +# Contents: Map (external version) from Mac OS Turkish +# character set to Unicode 2.1 and later. +# +# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Update header comments. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# b3,c1 2002-Dec-19 Update URLs, notes. Matches internal +# utom<b1>. +# b02 1999-Sep-22 Update contact e-mail address. Matches +# internal utom<b1>, ufrm<b1>, and Text +# Encoding Converter version 1.5. +# n05 1998-Feb-05 Minor update to header comments +# n03 1997-Dec-14 Update to match internal utom<n5>, ufrm<n15>: +# Change standard mapping for 0xBD from U+2126 +# to its canonical decomposition, U+03A9. +# n02 1995-Apr-15 First version (after fixing some typos). +# Matches internal ufrm<n4>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Turkish code (in hex as 0xNN) +# Column #2 is the corresponding Unicode (in hex as 0xNNNN) +# Column #3 is a comment containing the Unicode name +# +# The entries are in Mac OS Turkish code order. +# +# Two of these mappings requires the use of a corporate character. +# See the file "CORPCHAR.TXT" and notes below. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Turkish character set uses the standard control characters at +# 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Turkish: +# ------------------------ +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported via transcoding to and from +# Unicode. +# +# Mac OS Turkish is used for Turkish. +# +# The Mac OS Turkish encoding shares the script code smRoman +# (0) with the Mac OS Roman encoding. To determine if the Turkish +# encoding is being used, you must also check if the system region +# code is 24, verTurkey. +# +# This character set is a variant of standard Mac OS Roman. It adds +# upper & lower G with breve, upper & lower S with cedilla, upper I +# with dot, and moves the dotless lower i from its position at 0xF5 +# in standard Mac OS Roman to a position at 0xDD here (leaving the +# 0xF5 code point undefined in Mac OS Turkish). This gives a total +# of 7 code point differences from standard Mac OS Roman. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# The following corporate zone Unicode characters are used in this +# mapping: +# +# 0xF8A0 undefined1, used to map the single undefined code point +# in Mac OS Turkish (to obtain roundtrip fidelity for all +# code points). +# 0xF8FF Apple logo +# +# NOTE: The graphic image associated with the Apple logo character +# is not authorized for use without permission of Apple, and +# unauthorized use might constitute trademark infringement. +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version n02 to version n03: +# +# - Change mapping of 0xBD from U+2126 to its canonical +# decomposition, U+03A9. +# +################## +0x0000 - 0x007E = 0x00 - +0x00A0 = 0xCA +0x00A1 = 0xC1 +0x00A2 = 0xA2 +0x00A3 = 0xA3 +0x00A5 = 0xB4 +0x00A7 = 0xA4 +0x00A8 = 0xAC +0x00A9 = 0xA9 +0x00AA = 0xBB +0x00AB = 0xC7 +0x00AC = 0xC2 +0x00AE = 0xA8 +0x00AF = 0xF8 +0x00B0 = 0xA1 +0x00B1 = 0xB1 +0x00B4 = 0xAB +0x00B5 = 0xB5 +0x00B6 = 0xA6 +0x00B7 = 0xE1 +0x00B8 = 0xFC +0x00BA = 0xBC +0x00BB = 0xC8 +0x00BF = 0xC0 +0x00C0 = 0xCB +0x00C1 = 0xE7 +0x00C2 = 0xE5 +0x00C3 = 0xCC +0x00C4 = 0x80 +0x00C5 = 0x81 +0x00C6 = 0xAE +0x00C7 = 0x82 +0x00C8 = 0xE9 +0x00C9 = 0x83 +0x00CA = 0xE6 +0x00CB = 0xE8 +0x00CC = 0xED +0x00CD = 0xEA +0x00CE = 0xEB +0x00CF = 0xEC +0x00D1 = 0x84 +0x00D2 = 0xF1 +0x00D3 = 0xEE +0x00D4 = 0xEF +0x00D5 = 0xCD +0x00D6 = 0x85 +0x00D8 = 0xAF +0x00D9 = 0xF4 +0x00DA = 0xF2 +0x00DB = 0xF3 +0x00DC = 0x86 +0x00DF = 0xA7 +0x00E0 = 0x88 +0x00E1 = 0x87 +0x00E2 = 0x89 +0x00E3 = 0x8B +0x00E4 = 0x8A +0x00E5 = 0x8C +0x00E6 = 0xBE +0x00E7 = 0x8D +0x00E8 = 0x8F +0x00E9 = 0x8E +0x00EA = 0x90 +0x00EB = 0x91 +0x00EC = 0x93 +0x00ED = 0x92 +0x00EE = 0x94 +0x00EF = 0x95 +0x00F1 = 0x96 +0x00F2 = 0x98 +0x00F3 = 0x97 +0x00F4 = 0x99 +0x00F5 = 0x9B +0x00F6 = 0x9A +0x00F7 = 0xD6 +0x00F8 = 0xBF +0x00F9 = 0x9D +0x00FA = 0x9C +0x00FB = 0x9E +0x00FC = 0x9F +0x00FF = 0xD8 +0x011E = 0xDA +0x011F = 0xDB +0x0130 = 0xDC +0x0131 = 0xDD +0x0152 = 0xCE +0x0153 = 0xCF +0x015E = 0xDE +0x015F = 0xDF +0x0178 = 0xD9 +0x0192 = 0xC4 +0x02C6 = 0xF6 +0x02C7 = 0xFF +0x02D8 = 0xF9 +0x02D9 = 0xFA +0x02DA = 0xFB +0x02DB = 0xFE +0x02DC = 0xF7 +0x02DD = 0xFD +0x03A9 = 0xBD +0x03C0 = 0xB9 +0x2013 = 0xD0 +0x2014 = 0xD1 +0x2018 = 0xD4 +0x2019 = 0xD5 +0x201A = 0xE2 +0x201C = 0xD2 +0x201D = 0xD3 +0x201E = 0xE3 +0x2020 = 0xA0 +0x2021 = 0xE0 +0x2022 = 0xA5 +0x2026 = 0xC9 +0x2030 = 0xE4 +0x2122 = 0xAA +0x2202 = 0xB6 +0x2206 = 0xC6 +0x220F = 0xB8 +0x2211 = 0xB7 +0x221A = 0xC3 +0x221E = 0xB0 +0x222B = 0xBA +0x2248 = 0xC5 +0x2260 = 0xAD +0x2264 = 0xB2 +0x2265 = 0xB3 +0x25CA = 0xD7 +0xF8A0 = 0xF5 +0xF8FF = 0xF0 +END_MAP |