# -*- coding: UTF-8 -*- import table2 import rule import util import os import sys import multiprocessing all_table = [] struct_config = dict() def xlsx_get_function(sheet): def gmr(): return sheet.max_row def gmc(): return sheet.max_column def access(i, j): return sheet.cell(i+1, j+1).value def access_color(i, j): fill = sheet.cell(i+1, j+1).fill if not fill: return 255, 255, 255 x = fill.start_color.index if isinstance(x, (int, long)): return index_to_rgb(x) else: color_int = int(x, 16) & 0x00FFFFFF r, g, b = color_int >> 16, (color_int >> 8) & 0xff, color_int & 0xff return r, g, b def get_rows(): return sheet.rows return gmr, gmc, access, access_color, get_rows def read_struct_config(get_max_row, get_max_col, access): success = True global struct_config max_row = get_max_row() max_col = get_max_col() for i in range(4, max_row): name = access(i, 0) if not name: break attribs = [] for j in range(1, max_col): platform = access(1, j) if platform == "none": continue attrib = access(i, j) if not attrib: break attribs.append(attrib) if struct_config.has_key(name): success = False print(u"[ERROR]struct.xlsx 有同名的自定义结构") struct_config[name] = attribs return success def init_config_struct(folder_path): success = True struct_path = os.path.join(folder_path, 'struct.xlsx') if os.path.exists(struct_path): from openpyxl import load_workbook wb = load_workbook(struct_path, data_only=True) for sheet in wb: gmr, gmc, access, access_color, get_rows = xlsx_get_function(sheet) success = success & read_struct_config(gmr, gmc, access) wb.close() return success def get_folder_path(): return all_table[0].folder_path def get_excel_obj(file_path): for i in range(len(all_table)): if all_table[i]["file_path"] == file_path: return all_table[i] return None def convert(split_path_list, kind, struct_config, config=None): convert_table_list = [] error_list = [] for i in range(len(split_path_list)): try: table_infos = table2.convert(split_path_list[i], struct_config, config) except BaseException as info: print split_path_list[i]["folder_path"], split_path_list[i]['file_path'] import traceback exc_info = sys.exc_info() traceback.print_exception(*exc_info) st = split_path_list[i]["file_path"][len(split_path_list[i]["folder_path"]):] + u"表" + str(info) error_list.append(st) return st if table_infos != None: for i in range(len(table_infos)): convert_table_list.append(table_infos[i]) return convert_table_list def search_convert(folder_path, dump_path, kind="export"): path_info_list = [] global all_table all_table = [] dst_folder_map = {} if os.path.exists('./config.json'): config = util.read_json_file('./config.json') elif os.path.exists(os.path.join(folder_path, 'config.json')): config = util.read_json_file(os.path.join(folder_path, 'config.json')) else: config = None for root, dirs, files in os.walk(folder_path, True): for name in files: if not util.is_ok(name) or util.ignore(folder_path, root, name, config): continue forbid = util.forbid(name, config) if forbid: print forbid return ab_path = os.path.join(root, name) path_info = {} path_info["folder_path"] = folder_path path_info["dump_path"] = dump_path path_info["file_path"] = ab_path path_info["file_size"] = os.path.getsize(ab_path) path_info_list.append(path_info) # 提前创建好目录 if not dst_folder_map.has_key(root): dst_folder_path = dump_path + root[len(folder_path):] if not os.path.exists(dst_folder_path): os.makedirs(dst_folder_path) dst_folder_map[root] = 1 if len(path_info_list) > 0: # 先分分为几组,每组的文件大小的总和相近 path_info_list.sort(key=lambda k:k["file_size"], reverse=False) cpu_count = multiprocessing.cpu_count() split_path_list = [] for i in xrange(cpu_count): files = {} files["total_size"] = 0 files["path_list"] = [] split_path_list.append(files) while len(path_info_list) > 0: element = path_info_list.pop() split_path_list[0]["total_size"] += element["file_size"] split_path_list[0]["path_list"].append(element) split_path_list.sort(key=lambda k:k["total_size"], reverse=False) pool = multiprocessing.Pool(processes=cpu_count) pool_result = [] global struct_config for i in xrange(cpu_count): pool_result.append(pool.apply_async(convert, (split_path_list[i]["path_list"], kind, struct_config, config))) pool.close() pool.join() error_msg_list = [] for i in pool_result: convert_table_result = i.get() if isinstance(convert_table_result, list): for j in range(len(convert_table_result)): all_table.append(convert_table_result[j]) else: error_msg_list.append(convert_table_result) if len(error_msg_list) > 0: print("********************************") for msg in error_msg_list: print(msg) print("********************************") return False else: return True else: return False def check_all_rule(): if len(all_table) > 0: error_msg_list = [] for i in range(len(all_table)): error_msgs = rule.check(all_table[i]) if error_msgs is not None: error_msg_list.append(error_msgs) if len(error_msg_list) > 0: for i in range(len(error_msg_list)): for j in range(len(error_msg_list[i])): print(error_msg_list[i][j]) return False else: return True else: return False def convert_all_excel(folder_path, dump_path, kind="export"): success = init_config_struct(folder_path) success = success & search_convert(folder_path, dump_path, kind) if kind == "transform": return 0 if success: success = check_all_rule() if success: return 0 else: return 2 else: return 1