-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
66 lines (51 loc) · 2.18 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import re
def parse_boolean( var=None ):
if var is not None:
if type(var) is bool:
return var
elif type(var) is int:
return var == 1
try:
return int(var) == 1
except ValueError:
if (var.lower() != "false") and \
(var.lower() != "no"):
return True
return False
def chunk_string( str_in, max_chunk_length ):
l = len(str_in)
chunks =[]
if l <= max_chunk_length:
chunks = [str_in]
else:
spaces = re.finditer( "\s+", str_in )
start_index = 0
do_chunking = True
while do_chunking:
end_index = start_index + max_chunk_length
if end_index > l:
end_index = l
do_chunking = False
if do_chunking:
# find the chunk of whitespace closest to end_index
end_space = None
for space_match in spaces:
if space_match.start() > end_index:
break
if space_match.start() >= start_index:
end_space = space_match
if end_space:
end_index = end_space.start()
this_chunk = str_in[start_index:end_index]
chunks.append( this_chunk )
start_index = end_index
if end_space:
start_index = end_space.end()
return chunks
# simple test case
if __name__ == '__main__':
t = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Cras imperdiet nec erat ac condimentum. Nulla vel rutrum ligula. Sed hendrerit interdum orci a posuere. Vivamus ut velit aliquet, mollis purus eget, iaculis nisl. Proin posuere malesuada ante. Proin auctor orci eros, ac molestie lorem dictum nec. Vestibulum sit amet erat est. Morbi luctus sed elit ac luctus. Proin blandit, enim vitae egestas posuere, neque elit ultricies dui, vel mattis nibh enim ac lorem. Maecenas molestie nisl sit amet velit dictum lobortis. Aliquam erat volutpat."
chunks = chunk_string( t, 140 )
for chunk in chunks:
print chunk
print len(chunk)