-
Notifications
You must be signed in to change notification settings - Fork 4
/
splitFile.py
58 lines (50 loc) · 1.82 KB
/
splitFile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# -*- coding: utf-8 -*-
# -*- author: JeremySun -*-
# -*- dating: 19/10/24 -*-
# 模块载入
import os
import time
from functools import wraps
# 定义timer
def func_timer(function):
@wraps(function)
def function_timer(*args, **kwargs):
print('[Function: {name} start...]'.format(name=function.__name__))
t0 = time.time()
result = function(*args, **kwargs)
t1 = time.time()
print('[Function: {name} finished, spent time: {time:.2f}s]'.format(name=function.__name__, time=t1 - t0))
return result
return function_timer
# 切分函数
@func_timer
def split_file(file_path, partial_size):
file_dir, name = os.path.split(file_path)
name, ext = os.path.splitext(name)
file_dir = os.path.join(file_dir, name)
if not os.path.exists(file_dir):
os.mkdir(file_dir)
part_no = 0
stream = open(file_path, 'r', encoding='utf-8')
while True:
part_filename = os.path.join(file_dir, name + '_' + str(part_no) + ext)
print('write start %s' % part_filename)
part_stream = open(part_filename, 'w', encoding='utf-8')
read_count = 0
read_size = 1024 * 512
read_count_once = 0
while read_count < partial_size:
read_content = stream.read(read_size)
read_count_once = len(read_content)
if read_count_once > 0:
part_stream.write(read_content)
else:
break
read_count += read_count_once
part_stream.close()
if read_count_once < read_size:
break
part_no += 1
return print('Splitting is done')
if __name__ == '__main__':
split_file(r'C:\Users\JeremySun\Desktop\Internship\Project02_corpusProcessor\english_text_pre.txt', 100 * 100 * 1000)