meta/recipes-devtools/python/python/python-2.7.3-CVE-2012-2135.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73

Upstream-Status: Backport

Reference:http://bugs.python.org/issue14579

The utf-16 decoder in Python 3.1 through 3.3 does not update the
aligned_end variable after calling the unicode_decode_call_errorhandler
function, which allows remote attackers to obtain sensitive information
(process memory) or cause a denial of service (memory corruption and crash)
via unspecified vectors.

http://web.nvd.nist.gov/view/vuln/detail?vulnId=CVE-2012-2135

diff -urpN a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -495,8 +495,21 @@ class UTF16LETest(ReadTest):
         )
 
     def test_errors(self):
-        self.assertRaises(UnicodeDecodeError, codecs.utf_16_le_decode, "\xff", "strict", True)
-
+        tests = [
+            (b'\xff', u'\ufffd'),
+            (b'A\x00Z', u'A\ufffd'),
+            (b'A\x00B\x00C\x00D\x00Z', u'ABCD\ufffd'),
+            (b'\x00\xd8', u'\ufffd'),
+            (b'\x00\xd8A', u'\ufffd'),
+            (b'\x00\xd8A\x00', u'\ufffdA'),
+            (b'\x00\xdcA\x00', u'\ufffdA'),
+        ]
+        for raw, expected in tests:
+            print('*****', raw, expected)
+            self.assertRaises(UnicodeDecodeError, codecs.utf_16_le_decode,
+                              raw, 'strict', True)
+            self.assertEqual(raw.decode('utf-16le', 'replace'), expected)
+        
 class UTF16BETest(ReadTest):
     encoding = "utf-16-be"
 
@@ -516,7 +529,20 @@ class UTF16BETest(ReadTest):
         )
 
     def test_errors(self):
-        self.assertRaises(UnicodeDecodeError, codecs.utf_16_be_decode, "\xff", "strict", True)
+        tests = [
+            (b'\xff', u'\ufffd'),
+            (b'\x00A\xff', u'A\ufffd'),
+            (b'\x00A\x00B\x00C\x00DZ', u'ABCD\ufffd'),
+            (b'\xd8\x00', u'\ufffd'),
+            (b'\xd8\x00\xdc', u'\ufffd'),
+            (b'\xd8\x00\x00A', u'\ufffdA'),
+            (b'\xdc\x00\x00A', u'\ufffdA'),
+        ]
+        for raw, expected in tests:
+            print('*****', raw, expected)
+            self.assertRaises(UnicodeDecodeError, codecs.utf_16_be_decode,
+                              raw, 'strict', True)
+            self.assertEqual(raw.decode('utf-16be', 'replace'), expected)
 
 class UTF8Test(ReadTest):
     encoding = "utf-8"
diff -urpN a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c	2013-03-04 11:34:34.000000000 +0800
+++ b/Objects/unicodeobject.c	2013-03-04 11:36:01.000000000 +0800
@@ -2564,7 +2564,7 @@ PyUnicode_DecodeUTF16Stateful(const char
         }
 
         /* UTF-16 code pair: */
-        if (q >= e) {
+        if (e - q < 2) {
             errmsg = "unexpected end of data";
             startinpos = (((const char *)q)-2)-starts;
             endinpos = ((const char *)e)-starts;