python处理Excel:报表结构转换处理各经联社收支明细表(支出项目汇总表)
一、
#coding=utf-8
#Transformation structure报表结构转换
import xlrd, xlwt, pyautogui, time, pyperclip, easygui, os, tkinter, io,pandas,shutil,re
#import autopy as at
#file= tkinter.filedialog.askopenfile()#选择打开什么文件,返回IO流对象
#file= tkinter.filedialog.askopenfilename()#选择打开什么文件,返回文件名
#print(file)
from io import StringIO
import sys
from contextlib import contextmanager
import sys
def get_file_list(raw_folder_path):
# 打开文件
dirs = os.listdir(raw_folder_path)
raw_file_list=[]
raw_file_path_list=[]
for home, dirs, files in os.walk(raw_folder_path):
for file_name in files:
raw_file_list.append((file_name))
raw_file_path_list.append(os.path.join(home, file_name))
print(raw_file_list)
return raw_file_list,raw_file_path_list
# print('对列表进行排序,查找时可能减少遍历次数。也便于根据账套号顺序查找')
# #file_accnt_num_list2=file_accnt_num_list.sort()
# print('sort影响列表本身,sorted不影响列表本身,用sort排序')
# file_exist_list = sorted(file_accnt_num_list)
# print(file_exist_list)
# return file_exist_list
def gath_data(raw_file_list,raw_file_path_list):
print('gather data to excel')
list1=[]
print("file name")
print('创建一个空的二维数组')
targ_data=pandas.DataFrame(columns=('项目', '本月数', '累计数'))
for i in range(0,len(raw_file_path_list)):
fil_nam = raw_file_list[i]
raw_file_path=raw_file_path_list[i]
print('----------------------------------------------------------------')
print('读取第'+str(i+1)+'个文件。'+"文件名是:",fil_nam)
# get cared macro info from testplan and save as 'MacroInfo.xlsx'
print('最后一行“贝佳制作”不读取。skip_footer:省略从尾部数的行数据,没有第0行,从1开始')
incom = pandas.read_excel(raw_file_path, sheet_name="Sheet1", header=1,dtype=str, usecols='b,c,d', skipfooter=1,keep_default_na=False)
incom.rename(columns={'收入项目': '项目'}, inplace=True)
#print(pandas.DataFrame(incom))
row_n = incom.shape[0]
col_n = incom.shape[1]
list2=[]
for i in range(0,row_n):
list2.append(fil_nam)
print('list2')
print(list2)
#incom['文件名']=list2
print('将文件名列插入到二维数组的第一列')
incom.insert(0,'文件名',list2)
print("incom['文件名']=list2")
print(pandas.DataFrame(incom))
# # 选择 first_name 为Antonio,并且从 'city' 到 'email'的所有列
# #zhaiyao=getInfo.loc[getInfo['分类'] == '摘要', '序号':'数据2']
# # print(zhaiyao)
# # # 选择 first_name 为Antonio,并且从 'city' 到 'email'的所有列
# # data.loc[data['first_name'] == 'Antonio', 'city':'email']
print('pandas查找二维数据不用遍历,但格式比较怪,很多列表、字符串符号')
print('陷阱:系统导出的收支明细表每行单元格中有很多空格')
row1 = incom[incom['项目'].isin(['三、投资收益 '])]
print(row1)
print('row1.index.values[0]返回所在行的索引值')
row1_index = row1.index.values[0]
print(row1_index)
print('插入行只能3个步骤:切割、添加、合并')
part1 = incom.iloc[:row1_index]
part2 = incom.iloc[row1_index:]
# 百度搜索python黑洞网,使用python pandas在Excel中添加空行
# pd.DataFrame([[''] * len(df.columns)], columns=df.columns).append(df)
print('插入多列空行')
for i in range(0, 4):
part2 = pandas.DataFrame([[''] * len(part1.columns)], columns=part1.columns).append(part2)
#print(part2)
incom = part1.append(part2)
print(incom)
expnd = pandas.read_excel(raw_file_path, sheet_name="Sheet1", header=1, dtype=str, usecols='e,f,g',
skipfooter=1, keep_default_na=False)
# print('pandas.DataFrame(expnd)')
# print(pandas.DataFrame(expnd))
row_n = expnd.shape[0]
col_n = expnd.shape[1]
list3=[]
for i in range(0,row_n):
list3.append(fil_nam)
print('list3')
print(list3)
print('将文件名列插入到二维数组的第一列')
expnd.insert(0,'文件名',list3)
print("expnd['文件名']=list3")
print(pandas.DataFrame(expnd))
print('直接暴力修改列名,形成相同列名的两个二维数组,才能合并')
#expnd.columns = ['分类1', '数据1', '数据2']
expnd.rename(columns={'支出项目': '项目','本月数.1': '本月数','累计数.1': '累计数'}, inplace=True)
print('打印重命名列后的二维数组结构')
print(pandas.DataFrame(expnd))
# print('update')
# incom.update(expnd)
# print(incom)
#getInfo = pandas.merge(incom, expnd,left_index=True,right_index=True,how='outer')
getInfo= incom.append(expnd)
#print(getInfo)
#print(pandas.DataFrame(getInfo))
print('存放多个Excel文件的二维数组')
targ_data = targ_data.append(getInfo)
# print('拼接到后面')
# targ_data = pandas.concat([targ_data, getInfo], ignore_index=True)
targ_data = targ_data[['文件名', '项目', '本月数', '累计数']]
print(targ_data)
print('行列转置transpose函数')
#targ_data=targ_data.transpose()
#print(pandas.DataFrame(getInfo))
# print(' 需要转置的那部分的行数、列数')
# row_n = getInfo.shape[0] - 1
# col_n = getInfo.shape[1]
# print('定位用is location,iloc函数')
# print('切片成收入部分,支出部分。Income and expenditure')
# #testdf3.loc[:, ['A', 'C']]
# incom=getInfo.iloc[:,[1, 2, 3]]
# expnd = getInfo.iloc[:, [4, 5, 6]]
# # expnd=getInfo.loc[:,['分类2', '数据3','数据4']] # DF, 指定某几列,直接用列名
# getInfo=pandas.merge(incom,expnd)
# #getInfo=incom+expnd
# # print(pandas.DataFrame(incom))
# #
# # print(pandas.DataFrame(expnd))
# print(pandas.DataFrame(getInfo))
# for i in range(0,row_n):
# print(getInfo.iloc[i,2])
# if getInfo.iloc[i,2]=='摘要':
# print(getInfo.loc[i].values)
# for i in getInfo.index:
# for j in range(len(getInfo.loc[i].values)):
# # row_data=getInfo.loc[i].values
# # #print(row_data)
# # row_data_list=list(row_data)
# # list1.append(row_data_list)
# print('')
#
#
# print(list1)
# print('target data目标数据')
# targ_data=pandas.DataFrame(list1)
# print(targ_data)
# 判断是否存在output文件夹
OutputPath=r"E:\贝佳会计系统导出数据\两项经费统计\报表结构转换"
# if (os.path.exists(OutputPath)):
# shutil.rmtree(OutputPath)
# print('output dir has been rm -rf and new makedirs')
# os.makedirs(OutputPath)
if not os.path.exists(OutputPath):
os.mkdir(OutputPath)
# 写入文件保存在output 文件夹下
filepath = os.path.join(OutputPath, '结构调整后表格.xls')
targ_data.to_excel(filepath, header=1, index=False, encoding='utf-8', sheet_name='1')
def func():
year = time.strftime("%Y")
os.getcwd() # 获取当前工作目录
raw_folder_path = r'E:\贝佳会计系统导出数据\两项经费统计\报表结构转换\需要结构转换的表格'
raw_file_list,raw_file_path_list=get_file_list(raw_folder_path)
#input_file_path_plan = r"E:\贝佳会计系统导出数据\两项经费统计\2020年序时簿\2020 045冲洋经联社序时簿.xls"
gath_data(raw_file_list,raw_file_path_list)
if __name__ == "__main__":
func()
#
#
#
#
#
#
#
#
#
#
# def saveAsNewExcelFile(raw_file_path_list):
# list1=[]
# print("file name")
# fil_nam='村委会1'
#
# for raw_file_path in raw_file_path_list:
#
# for i in getInfo.index:
# for j in range(len(getInfo.loc[i].values)):
# if (getInfo.loc[i].values[j] == '专项应付款-村(社区)办公经费'):
# start_col = i + 1
# row_data = getInfo.loc[i].values
# # print(row_data)
# row_data = list(row_data)
# row_data = list[fil_nam] + row_data
# list1.append(row_data)
# print(list1)
# print('target data目标数据')
# targ_data = pandas.DataFrame(list1)
#
#
#
#
# # get cared macro info from testplan and save as 'MacroInfo.xlsx'
# getInfo = pandas.read_excel(input_file_path_plan, sheet_name="Sheet1", dtype=str, keep_default_na=False)
# for i in getInfo.index:
# for j in range(len(getInfo.loc[i].values)):
# if (getInfo.loc[i].values[j] == '专项应付款-村(社区)办公经费'):
# start_col = i + 1
# row_data=getInfo.loc[i].values
# #print(row_data)
# row_data=list(row_data)
# row_data=list[fil_nam]+row_data
# list1.append(row_data)
# print(list1)
# print('target data目标数据')
# targ_data=pandas.DataFrame(list1)
# #break #add 2.3
# # print(getInfo.loc[i].values[j])
# #x = pandas.DataFrame(getInfo.iloc[start_col:, ])
# #print(x)
# # 判断是否存在output文件夹
# OutputPath=r"e:\贝佳会计系统导出数据\两项经费统计"
# if (os.path.exists(OutputPath)):
# shutil.rmtree(OutputPath)
# print('output dir has been rm -rf and new makedirs')
# os.makedirs(OutputPath)
#
# # 写入文件保存在output 文件夹下
# filepath = os.path.join(OutputPath, '村(社区)办公经费.xls')
# targ_data.to_excel(filepath, header=2, index=False, encoding='utf-8', sheet_name='村(社区)办公经费')
二、支出项目汇总表ChangeExcelStructure03expnd
#coding=utf-8
#Transformation structure报表结构转换
import xlrd, xlwt, pyautogui, time, pyperclip, easygui, os, tkinter, io,pandas,shutil,re
#import autopy as at
import xlrd, xlwt, xlwings, pyautogui, time, pyperclip, easygui, os, tkinter, io,pandas,shutil,re
from tkinter import filedialog,ttk
from openpyxl import load_workbook
#file= tkinter.filedialog.askopenfile()#选择打开什么文件,返回IO流对象
#file= tkinter.filedialog.askopenfilename()#选择打开什么文件,返回文件名
#print(file)
from io import StringIO
import sys
from contextlib import contextmanager
import sys
def get_file_list(raw_folder_path):
# 打开文件
dirs = os.listdir(raw_folder_path)
raw_file_list=[]
raw_file_path_list=[]
for home, dirs, files in os.walk(raw_folder_path):
for file_name in files:
raw_file_list.append((file_name))
raw_file_path_list.append(os.path.join(home, file_name))
print(raw_file_list)
return raw_file_list,raw_file_path_list
# print('对列表进行排序,查找时可能减少遍历次数。也便于根据账套号顺序查找')
# #file_accnt_num_list2=file_accnt_num_list.sort()
# print('sort影响列表本身,sorted不影响列表本身,用sort排序')
# file_exist_list = sorted(file_accnt_num_list)
# print(file_exist_list)
# return file_exist_list
def mod_fil_lis(raw_file_list,raw_file_path_list):
print('modify filename list')
admin_vill_order = ['前锋', '西海', '八家', '冲洋', '新围', '竹洛', '竹湖', '新屋', '达材', '朝中', '官窦', '伞塘', '白岗', '稔坪', '西坑',
'三和', '居委']
admin_vill_dict={}
vill_filpth_dict={}
admin_vill_Nu=len(admin_vill_order)
raw_file_Nu=len(raw_file_list)
for i in range(admin_vill_Nu):
admin_vill=admin_vill_order[i]
for j in range(raw_file_Nu):
raw_file_Nam=raw_file_list[j]
raw_file_path=raw_file_path_list[j]
find_data = re.compile('.*{}.*'.format(admin_vill))
result = re.match(find_data, raw_file_Nam)
if result:
dict = {raw_file_Nam: admin_vill} # dict = {文件名:村委会}
admin_vill_dict.update(dict) # 添加dict到空字典dict_all中
dict={admin_vill:raw_file_path}
vill_filpth_dict.update(dict)
print(admin_vill_dict)
return admin_vill_dict,vill_filpth_dict
# for i in range(raw_file_Nu):
# raw_file_Nam = raw_file_list[i]
#
# for j in range(admin_vill_Nu):
# admin_vill = admin_vill_order[i]
# find_data = re.compile('{}.*{}'.format(admin_vill))
# result = re.match(find_data, raw_file_Nam)
# if result:
def gath_data(raw_file_list,raw_file_path_list,admin_vill_dict,vill_filpth_dict):
print('gather data to excel')
list1=[]
print("file name")
print('创建一个空的二维数组')
merged_df=None
for i in range(0,len(admin_vill_dict)):
# fil_nam = raw_file_list[i]
# raw_file_path=raw_file_path_list[i]
#
#
# print('----------------------------------------------------------------')
# print('读取第'+str(i+1)+'个文件。'+"文件名是:",fil_nam)
# print("administrative village行政村")
# print('依次删除文件名的前8个字符,删除当年科目余额表、经联社、会,得到行政村名')
# admin_vill = fil_nam[8:].strip("当前年月收支明细表.xls").strip("经联社").replace("居委会", "居委")
# if admin_vill=='锋':
# admin_vill='前锋'
# print(admin_vill)
#
print('获取字典第几个键值对:dict.keys()返回一个列表,因此,您只需dict.keys()[n]')
file_list = list(admin_vill_dict.keys())
print(file_list)
file=file_list[i]
print(file)
admin_vill=admin_vill_dict[file]
print(admin_vill)
raw_file_path=vill_filpth_dict[admin_vill]
print('----------------------------------------------------------------')
print('读取第'+str(i+1)+'个文件。'+"读取的村是:",admin_vill)
print('最后一行“贝佳制作”不读取。skip_footer:省略从尾部数的行数据,没有第0行,从1开始')
expnd = pandas.read_excel(raw_file_path, sheet_name="Sheet1", header=1,dtype=str, usecols='E,F,G', skipfooter=1,keep_default_na=False)
col_list=expnd.columns.values
print(col_list)
col_list[1]=admin_vill+col_list[1][:3]
print('累计数列的列名改成村委会+累计数,第一个方括号是读取列表元素,第二个方括号是截取字符串。')
col_list[2] = admin_vill + col_list[2][:3]
print('直接重命名全部列')
expnd.columns = col_list
# print('区分:expnd.iloc[:i]表示i列;expnd=expnd[:t]表示0到t行。')
# expnd.iloc[:i]=admin_vill+expnd.loc[0].values[2]
expnd_index_list=expnd.index
# for i in range(0,len(expnd_index_list)):
# if expnd_index_list[0]=='':
# print('expnd.index[i]')
# print(expnd.index[i])
# expnd=expnd.drop(expnd.index[i],inplace=True)
# # #print(expnd)
row_n = expnd.shape[0]
col_n = expnd.shape[1]
print('删除支出项目为空的行')
for t in range(0,row_n):
row_i_0=expnd.iloc[t, 0]
print(row_i_0)
if row_i_0=="":
print('删除空行:第{}行'.format(t))
break
#print(t)
print('切片保存0到t行')
expnd=expnd[:t]
#print(expnd)
row_n = expnd.shape[0]
col_n = expnd.shape[1]
# # print(pandas.DataFrame(expnd))
#expnd= pandas.DataFrame(expnd)
print('增加空列')
expnd['大类编号'] = None
for i in expnd.index:
print('第{}行'.format(i+1))
for j in range(len(expnd.loc[i].values)):
#print('根据项目判断大类编号')
if expnd.loc[i].values[0] == "一、经营支出 ":
expnd.loc[i].values[3] = '1'
if expnd.loc[i].values[0]=="二、管理费支出 ":
for t in range(i,row_n):
expnd.loc[t].values[3] ='2'
if expnd.loc[i].values[0]=="三、其他支出 ":
for t in range(i,row_n):
expnd.loc[t].values[3] ='3'
if expnd.loc[i].values[0]=="四、福利费支出 ":
for t in range(i,row_n):
expnd.loc[t].values[3] ='4'
if expnd.loc[i].values[0]=="五、分配农户及投资者 ":
for t in range(i,row_n):
expnd.loc[t].values[3] ='5'
if expnd.loc[i].values[0]=="六、固定资产购建 ":
for t in range(i,row_n):
expnd.loc[t].values[3] ='6'
expnd = expnd.set_index('支出项目')
expnd=pandas.DataFrame(expnd)
#print(getInfo)
#print(pandas.DataFrame(getInfo))
print('存放多个Excel文件的二维数组')
if merged_df is None:
merged_df=expnd
else:
#targ_data= pandas.merge(merged_df, expnd,how='inner',left_index=True,right_index=True)
#targ_data = merged_df.join(expnd)
print('正在合并')
targ_data = pandas.merge(merged_df, expnd,how='outer', on=['支出项目','大类编号'],sort=False)
print('把值传到merged_df就不会覆盖targ_data,两个表格内容迭代互传实现循环增加。')
merged_df=targ_data
#sub_df=targ_data
targ_data=merged_df
print(targ_data.columns.values)
targ_data.set_index('大类编号')
print(targ_data.index)
print(targ_data)
targ_data_columns=targ_data.columns.values
print(targ_data_columns)
#targ_data.groupby(targ_data_columns[2])
targ_data = targ_data.sort_values(['大类编号'], inplace=False)
print('把大类编号放在表格末尾')
top_code = targ_data.pop('大类编号')
targ_data.insert(loc=targ_data.shape[1], column='大类编号', value=top_code, allow_duplicates=False)
targ_data['科目级别'] = None
indx_list=list(targ_data.index)
print('indx_list')
print(indx_list)
row_n = targ_data.shape[0]
col_n = targ_data.shape[1]
for i in range(len(indx_list)):
accnt_item=indx_list[i]
print('accounting item会计科目名称是:{}'.format(accnt_item))
char_null_Numb = len(accnt_item) - len(accnt_item.lstrip())
print('空格数量Number of null characters是:{}'.format(char_null_Numb))
if 0<=char_null_Numb<=2:
#targ_data.loc[i].value[col_n-1]=1
targ_data.iat[i,col_n-1]=1
if 3<=char_null_Numb<=7:
targ_data.iat[i,col_n-1] = 2
if 8<=char_null_Numb<=10:
targ_data.iat[i,col_n-1] = 3
if char_null_Numb>10:
targ_data.iat[i,col_n-1] = 4
targ_data = targ_data.sort_values(['大类编号','科目级别'], inplace=False)
# targ_data2_colm=set(targ_data.columns)
# targ_data2=targ_data[targ_data2_colm]
# top_code = targ_data2.pop('大类编号')
# targ_data2.insert(loc=targ_data2.shape[1], column='大类编号', value=top_code, allow_duplicates=False)
# print(targ_data2)
# # #file_accnt_num_list2=file_accnt_num_list.sort()
# print('sort影响列表本身,sorted不影响列表本身,用sort排序')
# file_exist_list=targ_data.sort_index()
# #file_exist_list = sorted(targ_data)
# print(file_exist_list)
#print(targ_data)
# targ_data.set_index(targ_data.columns.values[0])
# print(targ_data.index)
# 判断是否存在output文件夹
#OutputPath=r"E:\贝佳会计系统导出数据\两项经费统计\报表结构转换"
OutputPath = os.getcwd()
# if not os.path.exists(OutputPath):
# os.mkdir(OutputPath)
# 写入文件保存在output 文件夹下
filepath = os.path.join(OutputPath, '冲蒌镇各经联社收支明细表(支出项目汇总表).xlsx')
print('不覆盖Excel原来的数据写入Excel')
book = load_workbook(filepath)
writer = pandas.ExcelWriter(filepath, engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)#获取文件中已存在的表名,这行直接用,不用修改
targ_data.to_excel(writer,header=2, sheet_name='1')
#targ_data2.to_excel(writer,header=2, sheet_name='2')
writer.save()
def func():
year = time.strftime("%Y")
#os.getcwd() # 获取当前工作目录
#raw_folder_path = easygui.fileopenbox()
msg = '选择一个文件,将会返回该文件的完整的目录哦'
title = ' 文件选择对话框'
default = r'D:\贝佳会计系统导出数据'
raw_folder_path = easygui.diropenbox(msg, title, default)
print('选择的文件的完整的路径为:' + str(raw_folder_path))
#raw_folder_path = r'E:\贝佳会计系统导出数据\两项经费统计\报表结构转换\需要结构转换的表格'
raw_file_list,raw_file_path_list=get_file_list(raw_folder_path)
admin_vill_dict,vill_filpth_dict = mod_fil_lis(raw_file_list, raw_file_path_list)
#input_file_path_plan = r"E:\贝佳会计系统导出数据\两项经费统计\2020年序时簿\2020 045冲洋经联社序时簿.xls"
gath_data(raw_file_list,raw_file_path_list,admin_vill_dict,vill_filpth_dict)
if __name__ == "__main__":
func()
#
#
#
# def gath_data(raw_file_list,raw_file_path_list,admin_vill_dict,vill_filpth_dict):
# print('gather data to excel')
# list1=[]
# print("file name")
# print('创建一个空的二维数组')
# merged_df=None
# for i in range(0,len(raw_file_path_list)):
# fil_nam = raw_file_list[i]
# raw_file_path=raw_file_path_list[i]
# print('----------------------------------------------------------------')
# print('读取第'+str(i+1)+'个文件。'+"文件名是:",fil_nam)
# print("administrative village行政村")
# print('依次删除文件名的前8个字符,删除当年科目余额表、经联社、会,得到行政村名')
# admin_vill = fil_nam[8:].strip("当前年月收支明细表.xls").strip("经联社").replace("居委会", "居委")
# if admin_vill=='锋':
# admin_vill='前锋'
# print(admin_vill)
# # get cared macro info from testplan and save as 'MacroInfo.xlsx'
# print('最后一行“贝佳制作”不读取。skip_footer:省略从尾部数的行数据,没有第0行,从1开始')
# expnd = pandas.read_excel(raw_file_path, sheet_name="Sheet1", header=1,dtype=str, usecols='b,c,d', skipfooter=1,keep_default_na=False)
# col_list=expnd.columns.values
# print(col_list)
# col_list[1]=admin_vill+col_list[1]
# print('累计数列的列名改成村委会+累计数')
# col_list[2] = admin_vill + col_list[2]
# print('直接重命名全部列')
# expnd.columns = col_list
#
# # print('区分:expnd.iloc[:i]表示i列;expnd=expnd[:t]表示0到t行。')
# # expnd.iloc[:i]=admin_vill+expnd.loc[0].values[2]
#
#
#
# expnd_index_list=expnd.index
# # for i in range(0,len(expnd_index_list)):
# # if expnd_index_list[0]=='':
# # print('expnd.index[i]')
# # print(expnd.index[i])
# # expnd=expnd.drop(expnd.index[i],inplace=True)
# # # #print(expnd)
#
# row_n = expnd.shape[0]
# col_n = expnd.shape[1]
# print('删除支出项目为空的行')
# for t in range(0,row_n):
# row_i_0=expnd.iloc[t, 0]
# print(row_i_0)
# if row_i_0=="":
# print('删除空行:第{}行'.format(t))
# break
# #print(t)
# print('切片保存0到t行')
# expnd=expnd[:t]
# #print(expnd)
# row_n = expnd.shape[0]
# col_n = expnd.shape[1]
#
# # # print(pandas.DataFrame(expnd))
# #expnd= pandas.DataFrame(expnd)
# print('增加空列')
# expnd['大类编号'] = None
# for i in expnd.index:
# print('第{}行'.format(i+1))
# for j in range(len(expnd.loc[i].values)):
# print('根据项目判断大类编号')
# if expnd.loc[i].values[0] == "一、经营支出 ":
# expnd.loc[i].values[3] = '1'
# if expnd.loc[i].values[0]=="二、发包及上交支出 ":
# for t in range(i,row_n):
# expnd.loc[t].values[3] ='2'
# if expnd.loc[i].values[0]=="三、投资收益 ":
# for t in range(i,row_n):
# expnd.loc[t].values[3] ='3'
# if expnd.loc[i].values[0]=="四、农业税附加返还支出 ":
# for t in range(i,row_n):
# expnd.loc[t].values[3] ='4'
# if expnd.loc[i].values[0]=="五、补助支出 ":
# for t in range(i,row_n):
# expnd.loc[t].values[3] ='5'
# if expnd.loc[i].values[0]=="六、其他支出 ":
# for t in range(i,row_n):
# expnd.loc[t].values[3] ='6'
# if expnd.loc[i].values[0]=="七、福利费支出 ":
# for t in range(i,row_n):
# expnd.loc[t].values[3] ='7'
#
# expnd = expnd.set_index('支出项目')
#
# expnd=pandas.DataFrame(expnd)
#
#
#
#
#
# #print(getInfo)
# #print(pandas.DataFrame(getInfo))
# print('存放多个Excel文件的二维数组')
# if merged_df is None:
# merged_df=expnd
# else:
# #targ_data= pandas.merge(merged_df, expnd,how='inner',left_index=True,right_index=True)
# #targ_data = merged_df.join(expnd)
# print('正在合并')
# targ_data = pandas.merge(merged_df, expnd,how='outer', on=['支出项目'],sort=False)
# print('把值传到merged_df就不会覆盖targ_data,两个表格内容迭代互传实现循环增加。')
# merged_df=targ_data
# #sub_df=targ_data
# targ_data=merged_df
# print(targ_data.columns.values)
# targ_data.set_index('大类编号')
# print(targ_data.index)
# print(targ_data)
# targ_data_columns=targ_data.columns.values
# print(targ_data_columns)
# #targ_data.groupby(targ_data_columns[2])
#
# targ_data=targ_data.sort_values(['大类编号'], inplace=False)
#
# # # #file_accnt_num_list2=file_accnt_num_list.sort()
# # print('sort影响列表本身,sorted不影响列表本身,用sort排序')
# # file_exist_list=targ_data.sort_index()
# # #file_exist_list = sorted(targ_data)
# # print(file_exist_list)
#
#
#
# print(targ_data)
# # targ_data.set_index(targ_data.columns.values[0])
# # print(targ_data.index)
#
#
# # 判断是否存在output文件夹
# OutputPath=r"E:\贝佳会计系统导出数据\两项经费统计\报表结构转换"
#
# if not os.path.exists(OutputPath):
# os.mkdir(OutputPath)
#
#
# # 写入文件保存在output 文件夹下
# filepath = os.path.join(OutputPath, '结构调整后表格.xlsx')
#
#
# print('不覆盖Excel原来的数据写入Excel')
# book = load_workbook(filepath)
# writer = pandas.ExcelWriter(filepath, engine='openpyxl')
# writer.book = book
# writer.sheets = dict((ws.title, ws) for ws in book.worksheets)#获取文件中已存在的表名,这行直接用,不用修改
# targ_data.to_excel(writer,header=5, sheet_name='1')
# writer.save()
#
#
#
# def gath_data(raw_file_list,raw_file_path_list):
# print('gather data to excel')
# list1=[]
# print("file name")
# print('创建一个空的二维数组')
#
# merged_df=None
# for i in range(0,len(raw_file_path_list)):
# fil_nam = raw_file_list[i]
# raw_file_path=raw_file_path_list[i]
# print('----------------------------------------------------------------')
# print('读取第'+str(i+1)+'个文件。'+"文件名是:",fil_nam)
#
# # get cared macro info from testplan and save as 'MacroInfo.xlsx'
# print('最后一行“贝佳制作”不读取。skip_footer:省略从尾部数的行数据,没有第0行,从1开始')
# expnd = pandas.read_excel(raw_file_path, sheet_name="Sheet1", header=1,dtype=str, usecols='b,c,d', skipfooter=1,keep_default_na=False)
#
# row_n = expnd.shape[0]
# col_n = expnd.shape[1]
# # # print(pandas.DataFrame(expnd))
# expnd= pandas.DataFrame(expnd)
# # print(expnd.columns.values)
# # expnd_item=expnd.iat[0,0]
# # print(expnd_item)
# # print(len(expnd_item))
# expnd = expnd.set_index('支出项目')
# #print(expnd)
#
# list2=[]
# for i in range(0,row_n):
# list2.append(fil_nam)
# print('list2')
# print(list2)
#
# #print(pandas.DataFrame(expnd))
# # # 选择 first_name 为Antonio,并且从 'city' 到 'email'的所有列
# # #zhaiyao=getInfo.loc[getInfo['分类'] == '摘要', '序号':'数据2']
# # # print(zhaiyao)
# # # # 选择 first_name 为Antonio,并且从 'city' 到 'email'的所有列
# # # data.loc[data['first_name'] == 'Antonio', 'city':'email']
#
#
#
#
# #print(getInfo)
# #print(pandas.DataFrame(getInfo))
# print('存放多个Excel文件的二维数组')
# if merged_df is None:
# merged_df=expnd
# else:
# #targ_data= pandas.merge(merged_df, expnd,how='inner',left_index=True,right_index=True)
# #targ_data = merged_df.join(expnd)
# print('正在合并')
# targ_data = pandas.merge(merged_df, expnd,how='outer', on='支出项目')
#
#
#
#
#
#
#
# # print('拼接到后面')
# # targ_data = pandas.concat([targ_data, getInfo], ignore_index=True)
# #targ_data = targ_data[['文件名', '项目', '本月数', '累计数']]
# print(targ_data)
#
# print('行列转置transpose函数')
# #targ_data=targ_data.transpose()
#
#
#
# #print(pandas.DataFrame(getInfo))
# # print(' 需要转置的那部分的行数、列数')
# # row_n = getInfo.shape[0] - 1
# # col_n = getInfo.shape[1]
# # print('定位用is location,iloc函数')
# # print('切片成支出部分,支出部分。expnde and expenditure')
# # #testdf3.loc[:, ['A', 'C']]
# # expnd=getInfo.iloc[:,[1, 2, 3]]
# # expnd = getInfo.iloc[:, [4, 5, 6]]
# # # expnd=getInfo.loc[:,['分类2', '数据3','数据4']] # DF, 指定某几列,直接用列名
# # getInfo=pandas.merge(expnd,expnd)
# # #getInfo=expnd+expnd
# # # print(pandas.DataFrame(expnd))
# # #
# # # print(pandas.DataFrame(expnd))
# # print(pandas.DataFrame(getInfo))
#
#
#
#
#
#
# # for i in range(0,row_n):
# # print(getInfo.iloc[i,2])
# # if getInfo.iloc[i,2]=='摘要':
# # print(getInfo.loc[i].values)
# # for i in getInfo.index:
# # for j in range(len(getInfo.loc[i].values)):
# # # row_data=getInfo.loc[i].values
# # # #print(row_data)
# # # row_data_list=list(row_data)
# # # list1.append(row_data_list)
# # print('')
# #
# #
# # print(list1)
# # print('target data目标数据')
# # targ_data=pandas.DataFrame(list1)
# # print(targ_data)
#
# # 判断是否存在output文件夹
# OutputPath=r"E:\贝佳会计系统导出数据\两项经费统计\报表结构转换"
# # if (os.path.exists(OutputPath)):
# # shutil.rmtree(OutputPath)
# # print('output dir has been rm -rf and new makedirs')
# # os.makedirs(OutputPath)
# if not os.path.exists(OutputPath):
# os.mkdir(OutputPath)
#
#
# # 写入文件保存在output 文件夹下
# filepath = os.path.join(OutputPath, '结构调整后表格.xls')
# targ_data.to_excel(filepath, header=1, index=True, encoding='utf-8', sheet_name='1')
#
#
#
#
#
#
#
#
#
#
#
# def saveAsNewExcelFile(raw_file_path_list):
# list1=[]
# print("file name")
# fil_nam='村委会1'
#
# for raw_file_path in raw_file_path_list:
#
# for i in getInfo.index:
# for j in range(len(getInfo.loc[i].values)):
# if (getInfo.loc[i].values[j] == '专项应付款-村(社区)办公经费'):
# start_col = i + 1
# row_data = getInfo.loc[i].values
# # print(row_data)
# row_data = list(row_data)
# row_data = list[fil_nam] + row_data
# list1.append(row_data)
# print(list1)
# print('target data目标数据')
# targ_data = pandas.DataFrame(list1)
#
#
#
#
# # get cared macro info from testplan and save as 'MacroInfo.xlsx'
# getInfo = pandas.read_excel(input_file_path_plan, sheet_name="Sheet1", dtype=str, keep_default_na=False)
# for i in getInfo.index:
# for j in range(len(getInfo.loc[i].values)):
# if (getInfo.loc[i].values[j] == '专项应付款-村(社区)办公经费'):
# start_col = i + 1
# row_data=getInfo.loc[i].values
# #print(row_data)
# row_data=list(row_data)
# row_data=list[fil_nam]+row_data
# list1.append(row_data)
# print(list1)
# print('target data目标数据')
# targ_data=pandas.DataFrame(list1)
# #break #add 2.3
# # print(getInfo.loc[i].values[j])
# #x = pandas.DataFrame(getInfo.iloc[start_col:, ])
# #print(x)
# # 判断是否存在output文件夹
# OutputPath=r"e:\贝佳会计系统导出数据\两项经费统计"
# if (os.path.exists(OutputPath)):
# shutil.rmtree(OutputPath)
# print('output dir has been rm -rf and new makedirs')
# os.makedirs(OutputPath)
#
# # 写入文件保存在output 文件夹下
# filepath = os.path.join(OutputPath, '村(社区)办公经费.xls')
# targ_data.to_excel(filepath, header=2, index=False, encoding='utf-8', sheet_name='村(社区)办公经费')
三、收入项目汇总表
ChangeExcelStructure03incom
#coding=utf-8
#Transformation structure报表结构转换
import xlrd, xlwt, pyautogui, time, pyperclip, easygui, os, tkinter, io,pandas,shutil,re
#import autopy as at
import xlrd, xlwt, xlwings, pyautogui, time, pyperclip, easygui, os, tkinter, io,pandas,shutil,re
from tkinter import filedialog,ttk
from openpyxl import load_workbook
#file= tkinter.filedialog.askopenfile()#选择打开什么文件,返回IO流对象
#file= tkinter.filedialog.askopenfilename()#选择打开什么文件,返回文件名
#print(file)
from io import StringIO
import sys
from contextlib import contextmanager
import sys
def get_file_list(raw_folder_path):
# 打开文件
dirs = os.listdir(raw_folder_path)
raw_file_list=[]
raw_file_path_list=[]
for home, dirs, files in os.walk(raw_folder_path):
for file_name in files:
raw_file_list.append((file_name))
raw_file_path_list.append(os.path.join(home, file_name))
print(raw_file_list)
return raw_file_list,raw_file_path_list
# print('对列表进行排序,查找时可能减少遍历次数。也便于根据账套号顺序查找')
# #file_accnt_num_list2=file_accnt_num_list.sort()
# print('sort影响列表本身,sorted不影响列表本身,用sort排序')
# file_exist_list = sorted(file_accnt_num_list)
# print(file_exist_list)
# return file_exist_list
def mod_fil_lis(raw_file_list,raw_file_path_list):
print('modify filename list')
admin_vill_order = ['前锋', '西海', '八家', '冲洋', '新围', '竹洛', '竹湖', '新屋', '达材', '朝中', '官窦', '伞塘', '白岗', '稔坪', '西坑',
'三和', '居委']
admin_vill_dict={}
vill_filpth_dict={}
admin_vill_Nu=len(admin_vill_order)
raw_file_Nu=len(raw_file_list)
for i in range(admin_vill_Nu):
admin_vill=admin_vill_order[i]
for j in range(raw_file_Nu):
raw_file_Nam=raw_file_list[j]
raw_file_path=raw_file_path_list[j]
find_data = re.compile('.*{}.*'.format(admin_vill))
result = re.match(find_data, raw_file_Nam)
if result:
dict = {raw_file_Nam: admin_vill} # dict = {文件名:村委会}
admin_vill_dict.update(dict) # 添加dict到空字典dict_all中
dict={admin_vill:raw_file_path}
vill_filpth_dict.update(dict)
print(admin_vill_dict)
return admin_vill_dict,vill_filpth_dict
# for i in range(raw_file_Nu):
# raw_file_Nam = raw_file_list[i]
#
# for j in range(admin_vill_Nu):
# admin_vill = admin_vill_order[i]
# find_data = re.compile('{}.*{}'.format(admin_vill))
# result = re.match(find_data, raw_file_Nam)
# if result:
def gath_data(raw_file_list,raw_file_path_list,admin_vill_dict,vill_filpth_dict):
print('gather data to excel')
list1=[]
print("file name")
print('创建一个空的二维数组')
merged_df=None
for i in range(0,len(admin_vill_dict)):
# fil_nam = raw_file_list[i]
# raw_file_path=raw_file_path_list[i]
#
#
# print('----------------------------------------------------------------')
# print('读取第'+str(i+1)+'个文件。'+"文件名是:",fil_nam)
# print("administrative village行政村")
# print('依次删除文件名的前8个字符,删除当年科目余额表、经联社、会,得到行政村名')
# admin_vill = fil_nam[8:].strip("当前年月收支明细表.xls").strip("经联社").replace("居委会", "居委")
# if admin_vill=='锋':
# admin_vill='前锋'
# print(admin_vill)
#
print('获取字典第几个键值对:dict.keys()返回一个列表,因此,您只需dict.keys()[n]')
file_list = list(admin_vill_dict.keys())
print(file_list)
file=file_list[i]
print(file)
admin_vill=admin_vill_dict[file]
print(admin_vill)
raw_file_path=vill_filpth_dict[admin_vill]
print('----------------------------------------------------------------')
print('读取第'+str(i+1)+'个文件。'+"读取的村是:",admin_vill)
print('最后一行“贝佳制作”不读取。skip_footer:省略从尾部数的行数据,没有第0行,从1开始')
expnd = pandas.read_excel(raw_file_path, sheet_name="Sheet1", header=1,dtype=str, usecols='E,F,G', skipfooter=1,keep_default_na=False)
col_list=expnd.columns.values
print(col_list)
col_list[1]=admin_vill+col_list[1][:3]
print('累计数列的列名改成村委会+累计数,第一个方括号是读取列表元素,第二个方括号是截取字符串。')
col_list[2] = admin_vill + col_list[2][:3]
print('直接重命名全部列')
expnd.columns = col_list
# print('区分:expnd.iloc[:i]表示i列;expnd=expnd[:t]表示0到t行。')
# expnd.iloc[:i]=admin_vill+expnd.loc[0].values[2]
expnd_index_list=expnd.index
# for i in range(0,len(expnd_index_list)):
# if expnd_index_list[0]=='':
# print('expnd.index[i]')
# print(expnd.index[i])
# expnd=expnd.drop(expnd.index[i],inplace=True)
# # #print(expnd)
row_n = expnd.shape[0]
col_n = expnd.shape[1]
print('删除支出项目为空的行')
for t in range(0,row_n):
row_i_0=expnd.iloc[t, 0]
print(row_i_0)
if row_i_0=="":
print('删除空行:第{}行'.format(t))
break
#print(t)
print('切片保存0到t行')
expnd=expnd[:t]
#print(expnd)
row_n = expnd.shape[0]
col_n = expnd.shape[1]
# # print(pandas.DataFrame(expnd))
#expnd= pandas.DataFrame(expnd)
print('增加空列')
expnd['大类编号'] = None
for i in expnd.index:
print('第{}行'.format(i+1))
for j in range(len(expnd.loc[i].values)):
#print('根据项目判断大类编号')
if expnd.loc[i].values[0] == "一、经营支出 ":
expnd.loc[i].values[3] = '1'
if expnd.loc[i].values[0]=="二、管理费支出 ":
for t in range(i,row_n):
expnd.loc[t].values[3] ='2'
if expnd.loc[i].values[0]=="三、其他支出 ":
for t in range(i,row_n):
expnd.loc[t].values[3] ='3'
if expnd.loc[i].values[0]=="四、福利费支出 ":
for t in range(i,row_n):
expnd.loc[t].values[3] ='4'
if expnd.loc[i].values[0]=="五、分配农户及投资者 ":
for t in range(i,row_n):
expnd.loc[t].values[3] ='5'
if expnd.loc[i].values[0]=="六、固定资产购建 ":
for t in range(i,row_n):
expnd.loc[t].values[3] ='6'
expnd = expnd.set_index('支出项目')
expnd=pandas.DataFrame(expnd)
#print(getInfo)
#print(pandas.DataFrame(getInfo))
print('存放多个Excel文件的二维数组')
if merged_df is None:
merged_df=expnd
else:
#targ_data= pandas.merge(merged_df, expnd,how='inner',left_index=True,right_index=True)
#targ_data = merged_df.join(expnd)
print('正在合并')
targ_data = pandas.merge(merged_df, expnd,how='outer', on=['支出项目','大类编号'],sort=False)
print('把值传到merged_df就不会覆盖targ_data,两个表格内容迭代互传实现循环增加。')
merged_df=targ_data
#sub_df=targ_data
targ_data=merged_df
print(targ_data.columns.values)
targ_data.set_index('大类编号')
print(targ_data.index)
print(targ_data)
targ_data_columns=targ_data.columns.values
print(targ_data_columns)
#targ_data.groupby(targ_data_columns[2])
targ_data = targ_data.sort_values(['大类编号'], inplace=False)
print('把大类编号放在表格末尾')
top_code = targ_data.pop('大类编号')
targ_data.insert(loc=targ_data.shape[1], column='大类编号', value=top_code, allow_duplicates=False)
targ_data['科目级别'] = None
indx_list=list(targ_data.index)
print('indx_list')
print(indx_list)
row_n = targ_data.shape[0]
col_n = targ_data.shape[1]
for i in range(len(indx_list)):
accnt_item=indx_list[i]
print('accounting item会计科目名称是:{}'.format(accnt_item))
char_null_Numb = len(accnt_item) - len(accnt_item.lstrip())
print('空格数量Number of null characters是:{}'.format(char_null_Numb))
if 0<=char_null_Numb<=2:
#targ_data.loc[i].value[col_n-1]=1
targ_data.iat[i,col_n-1]=1
if 3<=char_null_Numb<=7:
targ_data.iat[i,col_n-1] = 2
if 8<=char_null_Numb<=10:
targ_data.iat[i,col_n-1] = 3
if char_null_Numb>10:
targ_data.iat[i,col_n-1] = 4
targ_data = targ_data.sort_values(['大类编号','科目级别'], inplace=False)
# targ_data2_colm=set(targ_data.columns)
# targ_data2=targ_data[targ_data2_colm]
# top_code = targ_data2.pop('大类编号')
# targ_data2.insert(loc=targ_data2.shape[1], column='大类编号', value=top_code, allow_duplicates=False)
# print(targ_data2)
# # #file_accnt_num_list2=file_accnt_num_list.sort()
# print('sort影响列表本身,sorted不影响列表本身,用sort排序')
# file_exist_list=targ_data.sort_index()
# #file_exist_list = sorted(targ_data)
# print(file_exist_list)
#print(targ_data)
# targ_data.set_index(targ_data.columns.values[0])
# print(targ_data.index)
# 判断是否存在output文件夹
#OutputPath=r"E:\贝佳会计系统导出数据\两项经费统计\报表结构转换"
OutputPath = os.getcwd()
# if not os.path.exists(OutputPath):
# os.mkdir(OutputPath)
# 写入文件保存在output 文件夹下
filepath = os.path.join(OutputPath, '冲蒌镇各经联社收支明细表(支出项目汇总表).xlsx')
print('不覆盖Excel原来的数据写入Excel')
book = load_workbook(filepath)
writer = pandas.ExcelWriter(filepath, engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)#获取文件中已存在的表名,这行直接用,不用修改
targ_data.to_excel(writer,header=2, sheet_name='1')
#targ_data2.to_excel(writer,header=2, sheet_name='2')
writer.save()
def func():
year = time.strftime("%Y")
#os.getcwd() # 获取当前工作目录
#raw_folder_path = easygui.fileopenbox()
msg = '选择一个文件,将会返回该文件的完整的目录哦'
title = ' 文件选择对话框'
default = r'D:\贝佳会计系统导出数据'
raw_folder_path = easygui.diropenbox(msg, title, default)
print('选择的文件的完整的路径为:' + str(raw_folder_path))
#raw_folder_path = r'E:\贝佳会计系统导出数据\两项经费统计\报表结构转换\需要结构转换的表格'
raw_file_list,raw_file_path_list=get_file_list(raw_folder_path)
admin_vill_dict,vill_filpth_dict = mod_fil_lis(raw_file_list, raw_file_path_list)
#input_file_path_plan = r"E:\贝佳会计系统导出数据\两项经费统计\2020年序时簿\2020 045冲洋经联社序时簿.xls"
gath_data(raw_file_list,raw_file_path_list,admin_vill_dict,vill_filpth_dict)
if __name__ == "__main__":
func()
#
#
#
# def gath_data(raw_file_list,raw_file_path_list,admin_vill_dict,vill_filpth_dict):
# print('gather data to excel')
# list1=[]
# print("file name")
# print('创建一个空的二维数组')
# merged_df=None
# for i in range(0,len(raw_file_path_list)):
# fil_nam = raw_file_list[i]
# raw_file_path=raw_file_path_list[i]
# print('----------------------------------------------------------------')
# print('读取第'+str(i+1)+'个文件。'+"文件名是:",fil_nam)
# print("administrative village行政村")
# print('依次删除文件名的前8个字符,删除当年科目余额表、经联社、会,得到行政村名')
# admin_vill = fil_nam[8:].strip("当前年月收支明细表.xls").strip("经联社").replace("居委会", "居委")
# if admin_vill=='锋':
# admin_vill='前锋'
# print(admin_vill)
# # get cared macro info from testplan and save as 'MacroInfo.xlsx'
# print('最后一行“贝佳制作”不读取。skip_footer:省略从尾部数的行数据,没有第0行,从1开始')
# expnd = pandas.read_excel(raw_file_path, sheet_name="Sheet1", header=1,dtype=str, usecols='b,c,d', skipfooter=1,keep_default_na=False)
# col_list=expnd.columns.values
# print(col_list)
# col_list[1]=admin_vill+col_list[1]
# print('累计数列的列名改成村委会+累计数')
# col_list[2] = admin_vill + col_list[2]
# print('直接重命名全部列')
# expnd.columns = col_list
#
# # print('区分:expnd.iloc[:i]表示i列;expnd=expnd[:t]表示0到t行。')
# # expnd.iloc[:i]=admin_vill+expnd.loc[0].values[2]
#
#
#
# expnd_index_list=expnd.index
# # for i in range(0,len(expnd_index_list)):
# # if expnd_index_list[0]=='':
# # print('expnd.index[i]')
# # print(expnd.index[i])
# # expnd=expnd.drop(expnd.index[i],inplace=True)
# # # #print(expnd)
#
# row_n = expnd.shape[0]
# col_n = expnd.shape[1]
# print('删除支出项目为空的行')
# for t in range(0,row_n):
# row_i_0=expnd.iloc[t, 0]
# print(row_i_0)
# if row_i_0=="":
# print('删除空行:第{}行'.format(t))
# break
# #print(t)
# print('切片保存0到t行')
# expnd=expnd[:t]
# #print(expnd)
# row_n = expnd.shape[0]
# col_n = expnd.shape[1]
#
# # # print(pandas.DataFrame(expnd))
# #expnd= pandas.DataFrame(expnd)
# print('增加空列')
# expnd['大类编号'] = None
# for i in expnd.index:
# print('第{}行'.format(i+1))
# for j in range(len(expnd.loc[i].values)):
# print('根据项目判断大类编号')
# if expnd.loc[i].values[0] == "一、经营支出 ":
# expnd.loc[i].values[3] = '1'
# if expnd.loc[i].values[0]=="二、发包及上交支出 ":
# for t in range(i,row_n):
# expnd.loc[t].values[3] ='2'
# if expnd.loc[i].values[0]=="三、投资收益 ":
# for t in range(i,row_n):
# expnd.loc[t].values[3] ='3'
# if expnd.loc[i].values[0]=="四、农业税附加返还支出 ":
# for t in range(i,row_n):
# expnd.loc[t].values[3] ='4'
# if expnd.loc[i].values[0]=="五、补助支出 ":
# for t in range(i,row_n):
# expnd.loc[t].values[3] ='5'
# if expnd.loc[i].values[0]=="六、其他支出 ":
# for t in range(i,row_n):
# expnd.loc[t].values[3] ='6'
# if expnd.loc[i].values[0]=="七、福利费支出 ":
# for t in range(i,row_n):
# expnd.loc[t].values[3] ='7'
#
# expnd = expnd.set_index('支出项目')
#
# expnd=pandas.DataFrame(expnd)
#
#
#
#
#
# #print(getInfo)
# #print(pandas.DataFrame(getInfo))
# print('存放多个Excel文件的二维数组')
# if merged_df is None:
# merged_df=expnd
# else:
# #targ_data= pandas.merge(merged_df, expnd,how='inner',left_index=True,right_index=True)
# #targ_data = merged_df.join(expnd)
# print('正在合并')
# targ_data = pandas.merge(merged_df, expnd,how='outer', on=['支出项目'],sort=False)
# print('把值传到merged_df就不会覆盖targ_data,两个表格内容迭代互传实现循环增加。')
# merged_df=targ_data
# #sub_df=targ_data
# targ_data=merged_df
# print(targ_data.columns.values)
# targ_data.set_index('大类编号')
# print(targ_data.index)
# print(targ_data)
# targ_data_columns=targ_data.columns.values
# print(targ_data_columns)
# #targ_data.groupby(targ_data_columns[2])
#
# targ_data=targ_data.sort_values(['大类编号'], inplace=False)
#
# # # #file_accnt_num_list2=file_accnt_num_list.sort()
# # print('sort影响列表本身,sorted不影响列表本身,用sort排序')
# # file_exist_list=targ_data.sort_index()
# # #file_exist_list = sorted(targ_data)
# # print(file_exist_list)
#
#
#
# print(targ_data)
# # targ_data.set_index(targ_data.columns.values[0])
# # print(targ_data.index)
#
#
# # 判断是否存在output文件夹
# OutputPath=r"E:\贝佳会计系统导出数据\两项经费统计\报表结构转换"
#
# if not os.path.exists(OutputPath):
# os.mkdir(OutputPath)
#
#
# # 写入文件保存在output 文件夹下
# filepath = os.path.join(OutputPath, '结构调整后表格.xlsx')
#
#
# print('不覆盖Excel原来的数据写入Excel')
# book = load_workbook(filepath)
# writer = pandas.ExcelWriter(filepath, engine='openpyxl')
# writer.book = book
# writer.sheets = dict((ws.title, ws) for ws in book.worksheets)#获取文件中已存在的表名,这行直接用,不用修改
# targ_data.to_excel(writer,header=5, sheet_name='1')
# writer.save()
#
#
#
# def gath_data(raw_file_list,raw_file_path_list):
# print('gather data to excel')
# list1=[]
# print("file name")
# print('创建一个空的二维数组')
#
# merged_df=None
# for i in range(0,len(raw_file_path_list)):
# fil_nam = raw_file_list[i]
# raw_file_path=raw_file_path_list[i]
# print('----------------------------------------------------------------')
# print('读取第'+str(i+1)+'个文件。'+"文件名是:",fil_nam)
#
# # get cared macro info from testplan and save as 'MacroInfo.xlsx'
# print('最后一行“贝佳制作”不读取。skip_footer:省略从尾部数的行数据,没有第0行,从1开始')
# expnd = pandas.read_excel(raw_file_path, sheet_name="Sheet1", header=1,dtype=str, usecols='b,c,d', skipfooter=1,keep_default_na=False)
#
# row_n = expnd.shape[0]
# col_n = expnd.shape[1]
# # # print(pandas.DataFrame(expnd))
# expnd= pandas.DataFrame(expnd)
# # print(expnd.columns.values)
# # expnd_item=expnd.iat[0,0]
# # print(expnd_item)
# # print(len(expnd_item))
# expnd = expnd.set_index('支出项目')
# #print(expnd)
#
# list2=[]
# for i in range(0,row_n):
# list2.append(fil_nam)
# print('list2')
# print(list2)
#
# #print(pandas.DataFrame(expnd))
# # # 选择 first_name 为Antonio,并且从 'city' 到 'email'的所有列
# # #zhaiyao=getInfo.loc[getInfo['分类'] == '摘要', '序号':'数据2']
# # # print(zhaiyao)
# # # # 选择 first_name 为Antonio,并且从 'city' 到 'email'的所有列
# # # data.loc[data['first_name'] == 'Antonio', 'city':'email']
#
#
#
#
# #print(getInfo)
# #print(pandas.DataFrame(getInfo))
# print('存放多个Excel文件的二维数组')
# if merged_df is None:
# merged_df=expnd
# else:
# #targ_data= pandas.merge(merged_df, expnd,how='inner',left_index=True,right_index=True)
# #targ_data = merged_df.join(expnd)
# print('正在合并')
# targ_data = pandas.merge(merged_df, expnd,how='outer', on='支出项目')
#
#
#
#
#
#
#
# # print('拼接到后面')
# # targ_data = pandas.concat([targ_data, getInfo], ignore_index=True)
# #targ_data = targ_data[['文件名', '项目', '本月数', '累计数']]
# print(targ_data)
#
# print('行列转置transpose函数')
# #targ_data=targ_data.transpose()
#
#
#
# #print(pandas.DataFrame(getInfo))
# # print(' 需要转置的那部分的行数、列数')
# # row_n = getInfo.shape[0] - 1
# # col_n = getInfo.shape[1]
# # print('定位用is location,iloc函数')
# # print('切片成支出部分,支出部分。expnde and expenditure')
# # #testdf3.loc[:, ['A', 'C']]
# # expnd=getInfo.iloc[:,[1, 2, 3]]
# # expnd = getInfo.iloc[:, [4, 5, 6]]
# # # expnd=getInfo.loc[:,['分类2', '数据3','数据4']] # DF, 指定某几列,直接用列名
# # getInfo=pandas.merge(expnd,expnd)
# # #getInfo=expnd+expnd
# # # print(pandas.DataFrame(expnd))
# # #
# # # print(pandas.DataFrame(expnd))
# # print(pandas.DataFrame(getInfo))
#
#
#
#
#
#
# # for i in range(0,row_n):
# # print(getInfo.iloc[i,2])
# # if getInfo.iloc[i,2]=='摘要':
# # print(getInfo.loc[i].values)
# # for i in getInfo.index:
# # for j in range(len(getInfo.loc[i].values)):
# # # row_data=getInfo.loc[i].values
# # # #print(row_data)
# # # row_data_list=list(row_data)
# # # list1.append(row_data_list)
# # print('')
# #
# #
# # print(list1)
# # print('target data目标数据')
# # targ_data=pandas.DataFrame(list1)
# # print(targ_data)
#
# # 判断是否存在output文件夹
# OutputPath=r"E:\贝佳会计系统导出数据\两项经费统计\报表结构转换"
# # if (os.path.exists(OutputPath)):
# # shutil.rmtree(OutputPath)
# # print('output dir has been rm -rf and new makedirs')
# # os.makedirs(OutputPath)
# if not os.path.exists(OutputPath):
# os.mkdir(OutputPath)
#
#
# # 写入文件保存在output 文件夹下
# filepath = os.path.join(OutputPath, '结构调整后表格.xls')
# targ_data.to_excel(filepath, header=1, index=True, encoding='utf-8', sheet_name='1')
#
#
#
#
#
#
#
#
#
#
#
# def saveAsNewExcelFile(raw_file_path_list):
# list1=[]
# print("file name")
# fil_nam='村委会1'
#
# for raw_file_path in raw_file_path_list:
#
# for i in getInfo.index:
# for j in range(len(getInfo.loc[i].values)):
# if (getInfo.loc[i].values[j] == '专项应付款-村(社区)办公经费'):
# start_col = i + 1
# row_data = getInfo.loc[i].values
# # print(row_data)
# row_data = list(row_data)
# row_data = list[fil_nam] + row_data
# list1.append(row_data)
# print(list1)
# print('target data目标数据')
# targ_data = pandas.DataFrame(list1)
#
#
#
#
# # get cared macro info from testplan and save as 'MacroInfo.xlsx'
# getInfo = pandas.read_excel(input_file_path_plan, sheet_name="Sheet1", dtype=str, keep_default_na=False)
# for i in getInfo.index:
# for j in range(len(getInfo.loc[i].values)):
# if (getInfo.loc[i].values[j] == '专项应付款-村(社区)办公经费'):
# start_col = i + 1
# row_data=getInfo.loc[i].values
# #print(row_data)
# row_data=list(row_data)
# row_data=list[fil_nam]+row_data
# list1.append(row_data)
# print(list1)
# print('target data目标数据')
# targ_data=pandas.DataFrame(list1)
# #break #add 2.3
# # print(getInfo.loc[i].values[j])
# #x = pandas.DataFrame(getInfo.iloc[start_col:, ])
# #print(x)
# # 判断是否存在output文件夹
# OutputPath=r"e:\贝佳会计系统导出数据\两项经费统计"
# if (os.path.exists(OutputPath)):
# shutil.rmtree(OutputPath)
# print('output dir has been rm -rf and new makedirs')
# os.makedirs(OutputPath)
#
# # 写入文件保存在output 文件夹下
# filepath = os.path.join(OutputPath, '村(社区)办公经费.xls')
# targ_data.to_excel(filepath, header=2, index=False, encoding='utf-8', sheet_name='村(社区)办公经费')