92 lines
4.8 KiB
Python
92 lines
4.8 KiB
Python
# -*- coding: utf-8 -*-
|
||
#
|
||
import unittest
|
||
from nose import tools
|
||
|
||
import warnings
|
||
|
||
from kitchen.text import utf8
|
||
|
||
import base_classes
|
||
|
||
class TestUTF8(base_classes.UnicodeTestData, unittest.TestCase):
|
||
def setUp(self):
|
||
# All of the utf8* functions are deprecated
|
||
warnings.simplefilter('ignore', DeprecationWarning)
|
||
|
||
def tearDown(self):
|
||
warnings.simplefilter('default', DeprecationWarning)
|
||
|
||
def test_utf8_width(self):
|
||
'''Test that we find the proper number of spaces that a utf8 string will consume'''
|
||
tools.ok_(utf8.utf8_width(self.utf8_japanese) == 31)
|
||
tools.ok_(utf8.utf8_width(self.utf8_spanish) == 50)
|
||
tools.ok_(utf8.utf8_width(self.utf8_mixed) == 23)
|
||
|
||
def test_utf8_width_non_utf8(self):
|
||
'''Test that we handle non-utf8 bytes in utf8_width without backtracing'''
|
||
# utf8_width() treats non-utf8 byte sequences as undecodable so you
|
||
# end up with less characters than normal. In this string:
|
||
# Python-2.7+ replaces problematic characters in a different manner
|
||
# than older pythons.
|
||
# Python >= 2.7:
|
||
# El veloz murci<63>lago salt<6C> sobre el perro perezoso.
|
||
# Python < 2.7:
|
||
# El veloz murci<63>go salt<6C>bre el perro perezoso.
|
||
if len(str('\xe9la'.encode('latin1'), 'utf8', 'replace')) == 1:
|
||
# Python < 2.7
|
||
tools.ok_(utf8.utf8_width(self.latin1_spanish) == 45)
|
||
else:
|
||
# Python >= 2.7
|
||
tools.ok_(utf8.utf8_width(self.latin1_spanish) == 50)
|
||
|
||
def test_utf8_width_chop(self):
|
||
'''utf8_width_chop with byte strings'''
|
||
tools.ok_(utf8.utf8_width_chop(self.utf8_mixed) == (23, self.utf8_mixed))
|
||
tools.ok_(utf8.utf8_width_chop(self.utf8_mixed, 23) == (23, self.utf8_mixed))
|
||
tools.ok_(utf8.utf8_width_chop(self.utf8_mixed, 22) == (22, self.utf8_mixed[:-1]))
|
||
tools.ok_(utf8.utf8_width_chop(self.utf8_mixed, 19) == (18, self.u_mixed[:-4].encode('utf8')))
|
||
tools.ok_(utf8.utf8_width_chop(self.utf8_mixed, 2) == (2, self.u_mixed[0].encode('utf8')))
|
||
tools.ok_(utf8.utf8_width_chop(self.utf8_mixed, 1) == (0, b''))
|
||
|
||
def test_utf8_width_chop_unicode(self):
|
||
'''utf8_width_chop with unicode input'''
|
||
tools.ok_(utf8.utf8_width_chop(self.u_mixed) == (23, self.u_mixed))
|
||
tools.ok_(utf8.utf8_width_chop(self.u_mixed, 23) == (23, self.u_mixed))
|
||
tools.ok_(utf8.utf8_width_chop(self.u_mixed, 22) == (22, self.u_mixed[:-1]))
|
||
tools.ok_(utf8.utf8_width_chop(self.u_mixed, 19) == (18, self.u_mixed[:-4]))
|
||
tools.ok_(utf8.utf8_width_chop(self.u_mixed, 2) == (2, self.u_mixed[0]))
|
||
tools.ok_(utf8.utf8_width_chop(self.u_mixed, 1), (0, ''))
|
||
|
||
def test_utf8_width_fill(self):
|
||
'''Pad a utf8 string'''
|
||
tools.ok_(utf8.utf8_width_fill(self.utf8_mixed, 1) == self.utf8_mixed)
|
||
tools.ok_(utf8.utf8_width_fill(self.utf8_mixed, 25) == self.utf8_mixed + b' ')
|
||
tools.ok_(utf8.utf8_width_fill(self.utf8_mixed, 25, left=False) == b' ' + self.utf8_mixed)
|
||
tools.ok_(utf8.utf8_width_fill(self.utf8_mixed, 25, chop=18) == self.u_mixed[:-4].encode('utf8') + b' ')
|
||
tools.ok_(utf8.utf8_width_fill(self.utf8_mixed, 25, chop=18, prefix=self.utf8_spanish, suffix=self.utf8_spanish) == self.utf8_spanish + self.u_mixed[:-4].encode('utf8') + self.utf8_spanish + b' ')
|
||
tools.ok_(utf8.utf8_width_fill(self.utf8_mixed, 25, chop=18) == self.u_mixed[:-4].encode('utf8') + b' ')
|
||
tools.ok_(utf8.utf8_width_fill(self.u_mixed, 25, chop=18, prefix=self.u_spanish, suffix=self.utf8_spanish) == self.u_spanish.encode('utf8') + self.u_mixed[:-4].encode('utf8') + self.u_spanish.encode('utf8') + b' ')
|
||
pass
|
||
|
||
def test_utf8_valid(self):
|
||
'''Test that a utf8 byte sequence is validated'''
|
||
warnings.simplefilter('ignore', DeprecationWarning)
|
||
tools.ok_(utf8.utf8_valid(self.utf8_japanese) == True)
|
||
tools.ok_(utf8.utf8_valid(self.utf8_spanish) == True)
|
||
warnings.simplefilter('default', DeprecationWarning)
|
||
|
||
def test_utf8_invalid(self):
|
||
'''Test that we return False with non-utf8 chars'''
|
||
warnings.simplefilter('ignore', DeprecationWarning)
|
||
tools.ok_(utf8.utf8_valid(b'\xff') == False)
|
||
tools.ok_(utf8.utf8_valid(self.latin1_spanish) == False)
|
||
warnings.simplefilter('default', DeprecationWarning)
|
||
|
||
def test_utf8_text_wrap(self):
|
||
tools.ok_(utf8.utf8_text_wrap(self.utf8_mixed) == [self.utf8_mixed])
|
||
tools.ok_(utf8.utf8_text_wrap(self.utf8_paragraph) == self.utf8_paragraph_out)
|
||
tools.ok_(utf8.utf8_text_wrap(self.utf8_mixed_para) == self.utf8_mixed_para_out)
|
||
tools.ok_(utf8.utf8_text_wrap(self.utf8_mixed_para, width=57,
|
||
initial_indent=b' ', subsequent_indent=b'----') ==
|
||
self.utf8_mixed_para_57_initial_subsequent_out)
|