218 lines
6.9 KiB
Python
218 lines
6.9 KiB
Python
# -*- coding: UTF-8 -*-
|
|
import table2
|
|
import rule
|
|
import util
|
|
import os
|
|
import sys
|
|
import multiprocessing
|
|
|
|
all_table = []
|
|
struct_config = dict()
|
|
|
|
def xlsx_get_function(sheet):
|
|
def gmr():
|
|
return sheet.max_row
|
|
|
|
def gmc():
|
|
return sheet.max_column
|
|
|
|
def access(i, j):
|
|
return sheet.cell(i+1, j+1).value
|
|
|
|
def access_color(i, j):
|
|
fill = sheet.cell(i+1, j+1).fill
|
|
if not fill:
|
|
return 255, 255, 255
|
|
x = fill.start_color.index
|
|
if isinstance(x, (int, long)):
|
|
return index_to_rgb(x)
|
|
else:
|
|
color_int = int(x, 16) & 0x00FFFFFF
|
|
r, g, b = color_int >> 16, (color_int >> 8) & 0xff, color_int & 0xff
|
|
return r, g, b
|
|
|
|
def get_rows():
|
|
return sheet.rows
|
|
return gmr, gmc, access, access_color, get_rows
|
|
|
|
def read_struct_config(get_max_row, get_max_col, access):
|
|
success = True
|
|
global struct_config
|
|
max_row = get_max_row()
|
|
max_col = get_max_col()
|
|
for i in range(4, max_row):
|
|
name = access(i, 0)
|
|
if not name:
|
|
break
|
|
attribs = []
|
|
for j in range(1, max_col):
|
|
platform = access(1, j)
|
|
if platform == "none":
|
|
continue
|
|
attrib = access(i, j)
|
|
if not attrib:
|
|
break
|
|
attribs.append(attrib)
|
|
if struct_config.has_key(name):
|
|
success = False
|
|
print(u"[ERROR]struct.xlsx 有同名的自定义结构")
|
|
struct_config[name] = attribs
|
|
return success
|
|
|
|
def init_config_struct(folder_path):
|
|
success = True
|
|
struct_path = os.path.join(folder_path, 'struct.xlsx')
|
|
if os.path.exists(struct_path):
|
|
from openpyxl import load_workbook
|
|
wb = load_workbook(struct_path, data_only=True)
|
|
for sheet in wb:
|
|
gmr, gmc, access, access_color, get_rows = xlsx_get_function(sheet)
|
|
success = success & read_struct_config(gmr, gmc, access)
|
|
wb.close()
|
|
return success
|
|
|
|
def get_folder_path():
|
|
return all_table[0].folder_path
|
|
|
|
|
|
def get_excel_obj(file_path):
|
|
for i in range(len(all_table)):
|
|
if all_table[i]["file_path"] == file_path:
|
|
return all_table[i]
|
|
return None
|
|
|
|
|
|
def convert(split_path_list, kind, struct_config, config=None):
|
|
convert_table_list = []
|
|
error_list = []
|
|
|
|
for i in range(len(split_path_list)):
|
|
try:
|
|
table_infos = table2.convert(split_path_list[i], struct_config, config)
|
|
except BaseException as info:
|
|
print split_path_list[i]["folder_path"], split_path_list[i]['file_path']
|
|
import traceback
|
|
exc_info = sys.exc_info()
|
|
traceback.print_exception(*exc_info)
|
|
st = split_path_list[i]["file_path"][len(split_path_list[i]["folder_path"]):] + u"表" + str(info)
|
|
error_list.append(st)
|
|
return st
|
|
|
|
if table_infos != None:
|
|
for i in range(len(table_infos)):
|
|
convert_table_list.append(table_infos[i])
|
|
return convert_table_list
|
|
|
|
|
|
def search_convert(folder_path, dump_path, kind="export"):
|
|
path_info_list = []
|
|
global all_table
|
|
all_table = []
|
|
dst_folder_map = {}
|
|
if os.path.exists('./config.json'):
|
|
config = util.read_json_file('./config.json')
|
|
elif os.path.exists(os.path.join(folder_path, 'config.json')):
|
|
config = util.read_json_file(os.path.join(folder_path, 'config.json'))
|
|
else:
|
|
config = None
|
|
|
|
for root, dirs, files in os.walk(folder_path, True):
|
|
for name in files:
|
|
if not util.is_ok(name) or util.ignore(folder_path, root, name, config):
|
|
continue
|
|
forbid = util.forbid(name, config)
|
|
if forbid:
|
|
print forbid
|
|
return
|
|
ab_path = os.path.join(root, name)
|
|
path_info = {}
|
|
path_info["folder_path"] = folder_path
|
|
path_info["dump_path"] = dump_path
|
|
path_info["file_path"] = ab_path
|
|
path_info["file_size"] = os.path.getsize(ab_path)
|
|
path_info_list.append(path_info)
|
|
# 提前创建好目录
|
|
if not dst_folder_map.has_key(root):
|
|
dst_folder_path = dump_path + root[len(folder_path):]
|
|
if not os.path.exists(dst_folder_path):
|
|
os.makedirs(dst_folder_path)
|
|
dst_folder_map[root] = 1
|
|
if len(path_info_list) > 0:
|
|
# 先分分为几组,每组的文件大小的总和相近
|
|
path_info_list.sort(key=lambda k:k["file_size"], reverse=False)
|
|
cpu_count = multiprocessing.cpu_count()
|
|
split_path_list = []
|
|
for i in xrange(cpu_count):
|
|
files = {}
|
|
files["total_size"] = 0
|
|
files["path_list"] = []
|
|
split_path_list.append(files)
|
|
while len(path_info_list) > 0:
|
|
element = path_info_list.pop()
|
|
split_path_list[0]["total_size"] += element["file_size"]
|
|
split_path_list[0]["path_list"].append(element)
|
|
split_path_list.sort(key=lambda k:k["total_size"], reverse=False)
|
|
|
|
pool = multiprocessing.Pool(processes=cpu_count)
|
|
pool_result = []
|
|
|
|
global struct_config
|
|
for i in xrange(cpu_count):
|
|
pool_result.append(pool.apply_async(convert, (split_path_list[i]["path_list"], kind, struct_config, config)))
|
|
pool.close()
|
|
pool.join()
|
|
|
|
error_msg_list = []
|
|
|
|
for i in pool_result:
|
|
convert_table_result = i.get()
|
|
if isinstance(convert_table_result, list):
|
|
for j in range(len(convert_table_result)):
|
|
all_table.append(convert_table_result[j])
|
|
else:
|
|
error_msg_list.append(convert_table_result)
|
|
|
|
if len(error_msg_list) > 0:
|
|
print("********************************")
|
|
for msg in error_msg_list:
|
|
print(msg)
|
|
print("********************************")
|
|
return False
|
|
else:
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
|
|
def check_all_rule():
|
|
if len(all_table) > 0:
|
|
error_msg_list = []
|
|
for i in range(len(all_table)):
|
|
error_msgs = rule.check(all_table[i])
|
|
if error_msgs is not None:
|
|
error_msg_list.append(error_msgs)
|
|
if len(error_msg_list) > 0:
|
|
for i in range(len(error_msg_list)):
|
|
for j in range(len(error_msg_list[i])):
|
|
print(error_msg_list[i][j])
|
|
return False
|
|
else:
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
|
|
def convert_all_excel(folder_path, dump_path, kind="export"):
|
|
success = init_config_struct(folder_path)
|
|
success = success & search_convert(folder_path, dump_path, kind)
|
|
if kind == "transform":
|
|
return 0
|
|
if success:
|
|
success = check_all_rule()
|
|
if success:
|
|
return 0
|
|
else:
|
|
return 2
|
|
else:
|
|
return 1
|