按分类分割EXCEL文件
在管理工作中,会碰到把报表分割成不同的下属单位,或者把下属单位上报的报表分割成不同的业务种类。
示例:
待分割工作表 SALES.XLSX
产品号 | 销量 | 客户 | 说明 |
A2312 | 100 | V01 | |
A2311 | 120 | V02 | |
A2311 | 200 | V03 | TEST |
A2313 | 50 | V04 | |
A2315 | 80 | V05 | |
A2318 | 130 | V06 | |
A2311 | 120 | V07 | |
A2314 | 200 | V08 | OK |
A2313 | 50 | V09 | |
A2312 | 80 | V10 |
产品号 | 销量 | 客户 | 说明 |
A2312 | 100 | C01 | |
A2311 | 120 | C02 | |
A2311 | 200 | C03 | TEST |
A2313 | 50 | C04 | |
A2315 | 80 | C05 |
分割依据: CATE.XLSX
CATE | PRD_ID |
洗涤用品 | A2311 |
食品 | A2312 |
食品 | A2313 |
食品 | A2314 |
食品 | A2315 |
洗涤用品 | A2318 |
# coding=UTF-8
"""
标题:Excel文件分割
功能:检索excel 文件,按标识列,保存到N个文件中。
开发人员:seakingx
建立时间:2018.11.02
最后修改:2018.11.05
python 2.7
"""
def split_test():
xls_file = r"CATE.xlsx"
data = pd.read_excel(xls_file, 0)
rows = data.shape[0] # 获取行数 shape[1]获取列数
department_dict = {}
department_list = []
for i in range(rows):
temp = data['CATE'][i]
key = data['PRD_ID'][i]
if temp not in department_list:
department_list.append(temp) # 将分类存在一个列表中
department_dict[temp] = [key] # 建立分类字典
else:
# 分类字典添加数据
key_list = department_dict[temp]
key_list.append(key)
department_dict[temp] = key_list
print len(department_list), rows, len(department_dict)
# for di in department_dict:
# print di, department_dict[di]
ret_data = {'list': department_list, 'dict': department_dict}
split_work = {"xls_file": r"DATA\sales.xlsx", "xls_fld": u"产品号", "file_head": u"产品_", "dir": "DATA"}
split_work['dict_list'] = ret_data['dict']
split_work['file_list'] = ret_data['list']
xls_file = split_work['xls_file']
fld_key = split_work['xls_fld']
# 工作表数量
xl = pd.ExcelFile(xls_file)
xls_sheet_len = len(xl.sheet_names)
# 获取市县列表、机构代码名称对应表、市县机构字典
department_list = split_work['file_list']
department_dict = split_work['dict_list']
# 按分类遍历数据、写入数据
work_info = []
for department in department_list:
print department
file_name = u"{0}\{1}{2}.xlsx".format(split_work['dir'], split_work['file_head'], department)
work_item = [department, file_name]
xls_save_file = pd.ExcelWriter(file_name)
# 遍历工作表
for sheet_i in range(xls_sheet_len):
data = pd.read_excel(xls_file, sheet_i)
rows = data.shape[0] # 获取行数 shape[1]获取列数
new_df = pd.DataFrame()
# 遍历行,筛选数据
for i in range(0, rows):
if data[fld_key][i] in department_dict[department]:
new_df = pd.concat([new_df, data.iloc[[i], :]], axis=0, ignore_index=True)
# 写入工作表
work_item.append(len(new_df))
new_df.to_excel(xls_save_file, sheet_name=xl.sheet_names[sheet_i], index=False)
# 保存文件
xls_save_file.save()
work_info.append(work_item)
for w in work_item:
print w,
print "完成"
work_df = pd.DataFrame(work_info, columns=[u'分类', u'文件名'] + xl.sheet_names, dtype=float)
rpt_file_name = split_work['file_head'] + u"_统计.xlsx"
work_df.to_excel(rpt_file_name, sheet_name=u'统计', index=False)
完成结果示范:
统计表
分类 | 文件名 | 分店1 | 分店2 |
洗涤用品 | DATA\产品_洗涤用品.xlsx | 4 | 2 |
食品 | DATA\产品_食品.xlsx | 6 | 3 |
分割后的文件: