On UCS-2 builds py27 handle low surrogate chars GH342

On UCS-2 builds of Python 2.7-3.2, handle low surrogate characters.
This commit is contained in:
Gregory P. Smith 2018-04-11 14:44:22 -07:00 committed by GitHub
commit 9012f893a1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 13 additions and 0 deletions

11
cpplint/cpplint.py vendored
View File

@ -51,6 +51,7 @@ import sre_compile
import string import string
import sys import sys
import unicodedata import unicodedata
import sysconfig
try: try:
xrange # Python 2 xrange # Python 2
@ -4291,6 +4292,16 @@ def GetLineWidth(line):
if unicodedata.east_asian_width(uc) in ('W', 'F'): if unicodedata.east_asian_width(uc) in ('W', 'F'):
width += 2 width += 2
elif not unicodedata.combining(uc): elif not unicodedata.combining(uc):
# Issue 337
# https://mail.python.org/pipermail/python-list/2012-August/628809.html
if (sys.version_info.major, sys.version_info.minor) <= (3, 2):
# https://github.com/python/cpython/blob/2.7/Include/unicodeobject.h#L81
is_wide_build = sysconfig.get_config_var("Py_UNICODE_SIZE") >= 4
# https://github.com/python/cpython/blob/2.7/Objects/unicodeobject.c#L564
is_low_surrogate = 0xDC00 <= ord(uc) <= 0xDFFF
if not is_wide_build and is_low_surrogate:
width -= 1
width += 1 width += 1
return width return width
else: else:

View File

@ -321,6 +321,8 @@ class CpplintTest(CpplintTestBase):
self.assertEquals(0, cpplint.GetLineWidth('')) self.assertEquals(0, cpplint.GetLineWidth(''))
self.assertEquals(10, cpplint.GetLineWidth(u'x' * 10)) self.assertEquals(10, cpplint.GetLineWidth(u'x' * 10))
self.assertEquals(16, cpplint.GetLineWidth(u'都|道|府|県|支庁')) self.assertEquals(16, cpplint.GetLineWidth(u'都|道|府|県|支庁'))
self.assertEquals(5 + 13 + 9, cpplint.GetLineWidth(
u'd𝐱/dt' + u'f : t 𝐱' + u't 𝐱'))
def testGetTextInside(self): def testGetTextInside(self):
self.assertEquals('', cpplint._GetTextInside('fun()', r'fun\(')) self.assertEquals('', cpplint._GetTextInside('fun()', r'fun\('))