This fairly intrusive patch fixes handling of the charsets, which are not explicitly known to the TkRat, but whose TCL encoding is the same as the charset (like koi8-u or koi8-r). Such charsets can be added explicitly to the user's ~/.ratatosk/ratatoskrc The patch also considers the charset of the original message when replying, instead of just ignoring it. The patch was submitted to the developer and may be included when the next version comes out. -mi --- tkrat/compose.tcl Tue Feb 13 00:59:37 2001 +++ tkrat/compose.tcl Tue Jun 26 14:29:18 2001 @@ -49,4 +49,10 @@ } set handler [$msg reply $to] + global $handler + set charset [[$msg body] parameter charset] + if {"" == $charset} { + set charset auto ;# or US-ASCII? + } + array set $handler [list charset $charset] return [DoCompose $handler \ [expr {($option(reply_bottom)) ? "1" : "-1"}] \ @@ -377,6 +378,4 @@ proc ComposeExtracted {mgh} { - global charsetMapping - upvar #0 $mgh mh if [info exists mh(body)] { @@ -411,15 +410,10 @@ if [info exists p(charset)] { set charset $p(charset) - } else { - if [info exists mh(charset)] { - set charset $mh(charset) - } else { - set charset auto - } + } elseif [info exists mh(charset)] { + set charset $mh(charset) + } else { + set charset [encoding system] } - if {"auto" == $charset} { - set charset utf-8 - } - fconfigure $fh -encoding $charsetMapping($charset) + ConfigureEncoding $fh [SaneCharset $charset] set mh(data) [read $fh] close $fh @@ -1024,4 +1017,31 @@ } +# Cache the known charsets for subsequent SaneCharset invocations: +set KnownCharsets [encoding names] + +proc SaneCharset args { + global charsetMapping KnownCharsets + foreach charset $args { + if {[info exists charsetMapping($charset)]} { + set charset $charsetMapping($charset) + } + if {[lsearch -exact $KnownCharsets $charset] != -1} { + return $charset + } + } + # XXX what else? + return _binary +} + +# If we were unable to figure out the encoding for some reason, +# output the file without translations and hope for the best: +proc ConfigureEncoding {fh encoding} { + if {"_binary" == $encoding} { + fconfigure $fh -translation binary + } else { + fconfigure $fh -encoding $encoding + } +} + # ComposeBuildStruct -- # @@ -1066,5 +1092,5 @@ if {0 != [RatDialog $mh(toplevel) $t(warning) $t(bad_charset) {} 0 \ $t(continue) $t(abort)]} { - return + return -1 } set charset $fallback @@ -1074,10 +1096,9 @@ lappend bh(parameter) [list charset $charset] } - } else { - if {"auto" == $charset} { - set charset utf-8 - } } - fconfigure $fh -encoding $charsetMapping($charset) + + set mh(charset) [set charset [SaneCharset $charset]] + ConfigureEncoding $fh $charset + puts -nonewline $fh $bodydata close $fh @@ -1172,8 +1193,12 @@ # Write data, change text visible and edit set ecmd [lindex $editor($e) 0] - set charset $charsetMapping([lindex $editor($e) 1]) + # If the charset preferred by the editor is not known, try the one + # used by the message itself -- so we get what was written already + # unmodified. It should be possible to specify _a list_ of charsets + # the editor can handle. XXX + set charset [SaneCharset [lindex $editor($e) 1] $mh(charset)] set fname $option(tmp)/rat.[RatGenId] set fh [open $fname w 0600] - fconfigure $fh -encoding $charset + ConfigureEncoding $fh $charset puts -nonewline $fh [$mh(composeBody) get 0.0 end] close $fh @@ -1195,5 +1220,6 @@ $mh(composeBody) delete 0.0 end set fh [open $fname r] - fconfigure $fh -encoding $charset + # The charset is already sanitized by ComposeEEdit + ConfigureEncoding $fh $charset while { -1 != [gets $fh line]} { $mh(composeBody) insert end "$line\n" @@ -2605,5 +2631,5 @@ } set editor($t(external_editor)) \ - [list $option(editor) $charset] + [list $option(editor) [SaneCharset $charset]] } if ![info exists option(eeditor)] {