-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathunicode_fromformat.patch
208 lines (205 loc) · 7.99 KB
/
unicode_fromformat.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
diff -r 847a0e74c4cc Lib/test/test_unicode.py
--- a/Lib/test/test_unicode.py Sun Jul 20 21:26:04 2014 -0700
+++ b/Lib/test/test_unicode.py Tue Jul 22 00:13:24 2014 +0200
@@ -1659,6 +1659,122 @@ class UnicodeTest(
self.assertEqual("%s" % u, u'__unicode__ overridden')
self.assertEqual("{}".format(u), '__unicode__ overridden')
+ # Test PyUnicode_FromFormat()
+ def test_from_format(self):
+ test_support.import_module('ctypes')
+ from ctypes import (
+ pythonapi, py_object, sizeof,
+ c_int, c_long, c_longlong, c_ssize_t,
+ c_uint, c_ulong, c_ulonglong, c_size_t, c_void_p)
+ if sys.maxunicode == 0xffff:
+ name = "PyUnicodeUCS2_FromFormat"
+ else:
+ name = "PyUnicodeUCS4_FromFormat"
+ _PyUnicode_FromFormat = getattr(pythonapi, name)
+ _PyUnicode_FromFormat.restype = py_object
+
+ def PyUnicode_FromFormat(format, *args):
+ cargs = tuple(
+ py_object(arg) if isinstance(arg, unicode) else arg
+ for arg in args)
+ return _PyUnicode_FromFormat(format, *cargs)
+
+ def check_format(expected, format, *args):
+ text = PyUnicode_FromFormat(format, *args)
+ self.assertEqual(expected, text)
+
+ # ascii format, non-ascii argument
+ check_format(u'ascii\x7f=unicode\xe9',
+ b'ascii\x7f=%U', u'unicode\xe9')
+
+ # non-ascii format, ascii argument: ensure that PyUnicode_FromFormatV()
+ # raises an error
+ #self.assertRaisesRegex(ValueError,
+ # '^PyUnicode_FromFormatV\(\) expects an ASCII-encoded format '
+ # 'string, got a non-ASCII byte: 0xe9$',
+ # PyUnicode_FromFormat, b'unicode\xe9=%s', u'ascii')
+
+ # test "%c"
+ check_format(u'\uabcd',
+ b'%c', c_int(0xabcd))
+ if sys.maxunicode > 0xffff:
+ check_format(u'\U0010ffff',
+ b'%c', c_int(0x10ffff))
+ with self.assertRaises(OverflowError):
+ PyUnicode_FromFormat(b'%c', c_int(0x110000))
+ # Issue #18183
+ if sys.maxunicode > 0xffff:
+ check_format(u'\U00010000\U00100000',
+ b'%c%c', c_int(0x10000), c_int(0x100000))
+
+ # test "%"
+ check_format(u'%',
+ b'%')
+ check_format(u'%',
+ b'%%')
+ check_format(u'%s',
+ b'%%s')
+ check_format(u'[%]',
+ b'[%%]')
+ check_format(u'%abc',
+ b'%%%s', b'abc')
+
+ # test %S
+ check_format(u"repr=abc",
+ b'repr=%S', u'abc')
+
+ # test %R
+ check_format(u"repr=u'abc'",
+ b'repr=%R', u'abc')
+
+ # test integer formats (%i, %d, %u)
+ check_format(u'010',
+ b'%03i', c_int(10))
+ check_format(u'0010',
+ b'%0.4i', c_int(10))
+ check_format(u'-123',
+ b'%i', c_int(-123))
+ check_format(u'-123',
+ b'%li', c_long(-123))
+ check_format(u'-123',
+ b'%zi', c_ssize_t(-123))
+
+ check_format(u'-123',
+ b'%d', c_int(-123))
+ check_format(u'-123',
+ b'%ld', c_long(-123))
+ check_format(u'-123',
+ b'%zd', c_ssize_t(-123))
+
+ check_format(u'123',
+ b'%u', c_uint(123))
+ check_format(u'123',
+ b'%lu', c_ulong(123))
+ check_format(u'123',
+ b'%zu', c_size_t(123))
+
+ # test long output
+ PyUnicode_FromFormat(b'%p', c_void_p(-1))
+
+ # test %V
+ check_format(u'repr=abc',
+ b'repr=%V', u'abc', b'xyz')
+ check_format(u'repr=\xe4\xba\xba\xe6\xb0\x91',
+ b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')
+ check_format(u'repr=abc\xff',
+ b'repr=%V', None, b'abc\xff')
+
+ # not supported: copy the raw format string. these tests are just here
+ # to check for crashs and should not be considered as specifications
+ check_format(u'%s',
+ b'%1%s', b'abc')
+ check_format(u'%1abc',
+ b'%1abc')
+ check_format(u'%+i',
+ b'%+i', c_int(10))
+ check_format(u'%s',
+ b'%.%s', b'abc')
+
@test_support.cpython_only
def test_encode_decimal(self):
from _testcapi import unicode_encodedecimal
diff -r 847a0e74c4cc Objects/unicodeobject.c
--- a/Objects/unicodeobject.c Sun Jul 20 21:26:04 2014 -0700
+++ b/Objects/unicodeobject.c Tue Jul 22 00:13:24 2014 +0200
@@ -690,7 +690,12 @@ makefmt(char *fmt, int longflag, int siz
*fmt = '\0';
}
-#define appendstring(string) {for (copy = string;*copy;) *s++ = *copy++;}
+#define appendstring(string) \
+ do { \
+ for (copy = string;*copy; copy++) { \
+ *s++ = (unsigned char)*copy; \
+ } \
+ } while (0)
PyObject *
PyUnicode_FromFormatV(const char *format, va_list vargs)
@@ -845,7 +850,7 @@ PyUnicode_FromFormatV(const char *format
str = PyObject_Str(obj);
if (!str)
goto fail;
- n += PyUnicode_GET_SIZE(str);
+ n += PyString_GET_SIZE(str);
/* Remember the str and switch to the next slot */
*callresult++ = str;
break;
@@ -925,12 +930,12 @@ PyUnicode_FromFormatV(const char *format
}
/* handle the long flag, but only for %ld and %lu.
others can be added when necessary. */
- if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
+ if (*f == 'l' && (f[1] == 'd' || f[1] == 'i' || f[1] == 'u')) {
longflag = 1;
++f;
}
/* handle the size_t flag. */
- if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
+ if (*f == 'z' && (f[1] == 'd' || f[1] == 'i' || f[1] == 'u')) {
size_tflag = 1;
++f;
}
@@ -939,8 +944,9 @@ PyUnicode_FromFormatV(const char *format
case 'c':
*s++ = va_arg(vargs, int);
break;
+ case 'i':
case 'd':
- makefmt(fmt, longflag, size_tflag, zeropad, width, precision, 'd');
+ makefmt(fmt, longflag, size_tflag, zeropad, width, precision, *f);
if (longflag)
sprintf(realbuffer, fmt, va_arg(vargs, long));
else if (size_tflag)
@@ -959,11 +965,6 @@ PyUnicode_FromFormatV(const char *format
sprintf(realbuffer, fmt, va_arg(vargs, unsigned int));
appendstring(realbuffer);
break;
- case 'i':
- makefmt(fmt, 0, 0, zeropad, width, precision, 'i');
- sprintf(realbuffer, fmt, va_arg(vargs, int));
- appendstring(realbuffer);
- break;
case 'x':
makefmt(fmt, 0, 0, zeropad, width, precision, 'x');
sprintf(realbuffer, fmt, va_arg(vargs, int));
@@ -1006,15 +1007,10 @@ PyUnicode_FromFormatV(const char *format
case 'S':
case 'R':
{
- Py_UNICODE *ucopy;
- Py_ssize_t usize;
- Py_ssize_t upos;
+ const char *str = PyString_AS_STRING(*callresult);
/* unused, since we already have the result */
(void) va_arg(vargs, PyObject *);
- ucopy = PyUnicode_AS_UNICODE(*callresult);
- usize = PyUnicode_GET_SIZE(*callresult);
- for (upos = 0; upos<usize;)
- *s++ = ucopy[upos++];
+ appendstring(str);
/* We're done with the unicode()/repr() => forget it */
Py_DECREF(*callresult);
/* switch to next unicode()/repr() result */