self.progress_bar.setVisible(False)
self.button_select = QPushButton("打开", self)
self.button_select.setFont(QFont('等线', 11))
self.button_select.setGeometry(10, 60, 130, 30)
self.button_select.clicked.connect(self.select_file)
self.button_save = QPushButton("另存为", self)
self.button_save.setFont(QFont('等线', 11))
self.button_save.setGeometry(10, 95, 130, 30)
self.button_save.clicked.connect(self.save_file)
self.button_run = QPushButton("开始清洗", self)
self.button_run.setFont(QFont('等线', 11))
self.button_run.setGeometry(10, 130, 130, 30)
self.button_run.clicked.connect(self.run)
self.button = QPushButton("清洗附加选项", self)
self.button.setFont(QFont('等线', 11))
self.button.setGeometry(155, 60, 130, 30)
self.button.clicked.connect(self.button_clicked)
self.label_1_1 = QLabel(self)
self.label_1_1.setGeometry(290, 50, 700, 50)
self.label_1_1.setFont(QFont("等线", 14)) # 设置字体和字号
self.label_1_1.setVisible(False)
self.input_text = QLineEdit(self) # 创建一个输入框
self.input_text.setGeometry(155, 125, 130, 30)
self.input_text.setVisible(False)
self.input_text.setValidator(QIntValidator())
self.label_info = QLabel("清洗指定列", self) # 创建一个标签
self.label_info.setFont(QFont('等线', 11))
self.label_info.setGeometry(155, 95, 130, 30) # 设置标签位置和大小
self.label_info.setVisible(False) # 初始时隐藏标签
self.label_info_1 = QLabel("根据指定列去重", self)
self.label_info_1.setFont(QFont('等线', 11))
self.label_info_1.setGeometry(155, 155, 130, 30)
self.label_info_1.setVisible(False)
self.input_text_1 = QLineEdit(self)
self.input_text_1.setGeometry(155, 185, 130, 30)
self.input_text_1.setVisible(False)
self.input_text_1.setValidator(QIntValidator())
# 指示灯
self.red_light = QLabel(self)
self.red_light.setGeometry(10, 170, 20, 20)
self.red_light.setStyleSheet("background-color: red")
self.red_light.setVisible(False)
self.green_light = QLabel(self)
self.green_light.setGeometry(35, 170, 20, 20)
self.green_light.setStyleSheet("background-color: green")
self.green_light.setVisible(False)
self.line_horizontal_1 = SeparatorLine1(self)
self.line_horizontal_1.setGeometry(150, 225, 10000, 2)
self.line_horizontal_1.setVisible(False)
def button_clicked(self):
self.label_1_1.setText('(温馨提示,下方的输入框只能输入数字,以下参数非必须参数,可不填)')
self.input_text.setVisible(not self.input_text.isVisible()) # 切换输入框的可见性
self.label_info.setVisible(self.input_text.isVisible())
self.input_text_1.setVisible(not self.input_text_1.isVisible()) # 切换输入框的可见性
self.label_info_1.setVisible(self.input_text_1.isVisible())
self.label_1_1.setVisible(not self.label_1_1.isVisible())
self.label_1_1.setVisible(self.label_1_1.isVisible())
def select_file(self): # 选择文件
file_dialog = QFileDialog()
file_path, _ = file_dialog.getOpenFileName(self, "选择文件", "", "All Files (*)")
if file_path:
self.file_path = file_path
def save_file(self): # 保存文件
file_dialog = QFileDialog()
file_dialog.setWindowTitle("另存为") # 修改对话框标题
file_dialog.setNameFilters(["Text Files (*.txt)", "CSV Files (*.csv)",
"Excel Files (*.xlsx *.xls)", "JSON Files (*.json *.jsonl)"])
file_dialog.selectNameFilter("Excel Files (*.xlsx")
if file_dialog.exec():
self.save_path = file_dialog.selectedFiles()[0]
if os.path.exists(self.save_path):
# noinspection PyUnresolvedReferences
result = QMessageBox.warning(self, "警告", "文件已存在,是否覆盖?",
QMessageBox.Yes | QMessageBox.No)
# noinspection PyUnresolvedReferences
if result == QMessageBox.Yes:
bag.Bag.save_excel([], self.save_path)
else:
self.save_path = ''
else:
bag.Bag.save_excel([], self.save_path)
def run(self): # 开始清洗
resp = judge(self.file_path)
total = len(resp)
def clean_1(ls, col_number, col_name):
self.red_light.setVisible(True)
result = []
for i, info in enumerate(tqdm(ls)):
mid = []
for value in info[col_number].split('\n'):
if re.sub(r'\s', '', value):
text = value.lstrip(',.?!;:,。?;:')
text1 = text.strip()
text2 = emoji.replace_emoji(text1, replace='<emoji>')
mid.append(text2)
else:
pass
info[col_number] = '\n'.join(mid)
result.append(info)
# 更新进度条的值
progress = int((i + 1) / total * 100)
self.progress_bar.setValue(progress)
QApplication.processEvents() # 刷新界面
sign = []
new_result = [item for item in result if item[col_name] not in sign and not sign.append(item[col_name])]
self.red_light.setVisible(False)
self.green_light.setVisible(True)
if bool(self.save_path):
bag.Bag.save_excel(new_result, self.save_path)
else:
self.save_file()
bag.Bag.save_excel(new_result, self.save_path)
time.sleep(1)
self.green_light.setVisible(False)
def clean_2(ls, col_number):
self.red_light.setVisible(True)
result = []
for i, info in enumerate(tqdm(ls)):
mid = []
for value in info[col_number].split('\n'):
if re.sub(r'\s', '', value):
text = value.lstrip(',.?!;:,。?;:')
text1 = text.strip()
text2 = emoji.replace_emoji(text1, replace='<emoji>')
mid.append(text2)
else:
pass
info[col_number] = '\n'.join(mid)
result.append(info)
progress = int((i + 1) / total * 100)
self.progress_bar.setValue(progress)
QApplication.processEvents() # 刷新界面
self.red_light.setVisible(False)
self.green_light.setVisible(True)
if bool(self.save_path):
bag.Bag.save_excel(result, self.save_path)
else:
self.save_file()
bag.Bag.save_excel(result, self.save_path)
time.sleep(1)
self.green_light.setVisible(False)
def clean_3(ls, col_name):
self.red_light.setVisible(True)
result = []
for i, info in enumerate(tqdm(ls)):
mid1 = []
for info1 in info:
mid = []
for value in info1.split('\n'):
if re.sub(r'\s', '', value):
text = value.lstrip(',.?!;:,。?;:')
text1 = text.strip()
text2 = emoji.replace_emoji(text1, replace='<emoji>')
mid.append(text2)
else:
pass
mid1.append('\n'.join(mid))
result.append(mid1)
progress = int((i + 1) / total * 100)
self.progress_bar.setValue(progress)
QApplication.processEvents() # 刷新界面
sign = []
new_result = [item for item in result if item[col_name] not in sign and not sign.append(item[col_name])]
self.red_light.setVisible(False)
self.green_light.setVisible(True)
if bool(self.save_path):
bag.Bag.save_excel(new_result, self.save_path)
else:
self.save_file()
bag.Bag.save_excel(new_result, self.save_path)
time.sleep(1)
self.green_light.setVisible(False)
def clean_4(ls):
self.red_light.setVisible(True)
result = []
for i, info in enumerate(tqdm(ls)):
mid1 = []
for info1 in info:
mid = []
for value in info1.split('\n'):
if re.sub(r'\s', '', value):
text = value.lstrip(',.?!;:,。?;:')
text1 = text.strip()
text2 = emoji.replace_emoji(text1, replace='<emoji>')
mid.append(text2)
else:
pass
mid1.append('\n'.join(mid))
result.append(mid1)
progress = int((i + 1) / total * 100)
self.progress_bar.setValue(progress)
QApplication.processEvents() # 刷新界面
self.red_light.setVisible(False)
self.green_light.setVisible(True)
if bool(self.save_path):
bag.Bag.save_excel(result, self.save_path)
else:
self.save_file()
bag.Bag.save_excel(result, self.save_path)
time.sleep(1)
self.green_light.setVisible(False)
additional_options = self.input_text.text() # 获取输入框的文本内容
additional_options_1 = self.input_text_1.text()
col_num = additional_options
tag_name = additional_options_1
"""判断是否夹带清洗附加条件"""
target = bool(col_num)
target_1 = bool(tag_name)
# 进度条
self.progress_bar.setVisible(True)
self.line_horizontal_1.setVisible(True)
# 处理异常
try:
if target and target_1: # 清洗指定列以及根据某某标签去重
clean_1(resp, int(col_num)-1, int(tag_name)-1)
elif target and not target_1: # 清洗子指定列,不去重
clean_2(resp, int(col_num)-1)
elif not target and target_1: # 清洗全文再根据某某列去重
clean_3(resp, int(tag_name)-1)
else: # 清洗全文,不去重
clean_4(resp)
QMessageBox.information(self, "完成", "数据清洗完成")
except Exception as e:
error_message = str(e)
QMessageBox.critical(self, "错误", error_message)
压缩、解压
class CompressionDecompression(Layout):
def init(self):
super().init()
self.layout = QVBoxLayout()
self.file_path = None
self.button_compress = QPushButton("压缩文件", self)
self.button_compress.setFont(QFont('等线', 11))
self.button_compress.setGeometry(10, 60, 130, 30)
self.button_compress.clicked.connect(self.on_button_clicked)
self.button_extract = QPushButton("解压文件", self)
self.button_extract.setFont(QFont('等线', 11))
self.button_extract.setGeometry(10, 95, 130, 30)
self.button_extract.clicked.connect(self.on_button_clicked1)
def on_button_clicked(self):
selected_file = bool(self.file_path)
if selected_file:
extract_path = QFileDialog.getExistingDirectory(self, "另存为")
if bool(extract_path):
unzip_file(selected_file, extract_path)
QMessageBox.information(self, 'success', '解压成功!')
else:
QMessageBox.critical(self, 'error', '解压失败')
else:
self.select_file()
extract_path = QFileDialog.getExistingDirectory(self, "另存为")
if bool(extract_path):
unzip_file(self.file_path, extract_path)
QMessageBox.information(self, 'success', '解压成功!')
else:
QMessageBox.critical(self, 'error', '解压失败')
def on_button_clicked1(self):
selected_file = bool(self.file_path)
if selected_file:
extract_path = QFileDialog.getExistingDirectory(window, "选择文件夹", options=QFileDialog.ShowDirsOnly)
if bool(extract_path):
self.compress_to_zip(selected_file, extract_path)
QMessageBox.information(self, 'success', '压缩成功!')
else:
QMessageBox.critical(self, 'error', '压缩失败')
else:
self.select_folder()
extract_path = QFileDialog.getExistingDirectory(window, "选择文件夹", options=QFileDialog.ShowDirsOnly)
if bool(extract_path):
self.compress_to_zip(self.file_path, extract_path)
QMessageBox.critical(self, 'success', '压缩成功!')
else:
QMessageBox.critical(self, 'error', '压缩失败')
def select_file(self): # 选择文件
file_dialog = QFileDialog()
file_path, _ = file_dialog.getOpenFileName(self, "选择文件", "", "All Files (*)")
if file_path:
self.file_path = file_path
def select_folder(self):
file_path, _ = QFileDialog.getExistingDirectory(window, "选择文件夹", options=QFileDialog.ShowDirsOnly)
if file_path:
self.file_path = file_path
def compress_to_zip(self, file_path, zip_path):
with zipfile.ZipFile(zip_path, 'w') as zipf:
zipf.write(file_path, arcname='compressed_file.txt')
合并表格
noinspection PyMethodMayBeStatic
class SelectDirectory(Layout):
def init(self):
super().init()
self.layout = QVBoxLayout()
self.button_extract = QPushButton(“选择合并文件路径”, self)
self.button_extract.setFont(QFont(‘等线’, 11))
self.button_extract.setGeometry(10, 60, 130, 30)
self.button_extract.clicked.connect(self.choose_folder)
self.progress_bar = QProgressBar(self)
self.progress_bar.setGeometry(155, 100, 668, 28)
self.progress_bar.setVisible(False)
self.red_light = QLabel(self)
self.red_light.setGeometry(10, 100, 20, 20)
self.red_light.setStyleSheet("background-color: red")
self.red_light.setVisible(False)
self.green_light = QLabel(self)
self.green_light.setGeometry(35, 100, 20, 20)
self.green_light.setStyleSheet("background-color: green")
self.green_light.setVisible(False)
def choose_folder(self):
# noinspection PyUnresolvedReferences
self.red_light.setVisible(True)
self.progress_bar.setVisible(True)
folder = QFileDialog.getExistingDirectory(window, "选择文件夹", options=QFileDialog.ShowDirsOnly)
save_path = folder+'\\'+'合并结果.xlsx'
# 清空前一次合并结果
if os.path.isfile(save_path):
os.remove(save_path)
else:
pass
file_name = os.listdir(folder)
consolidated_number = file_name # 合并索引,后期需要可优化
total = len(consolidated_number)
workbook = xlsxwriter.Workbook(save_path, options={'strings_to_urls': False})
worksheet = workbook.add_worksheet('Sheet1')
count_1 = 0
n = 0
for i, _ in enumerate(tqdm(consolidated_number)):
path = folder + '\\' + _.replace('\n', '')
book = openpyxl.load_workbook(path)
sheet = book['Sheet1']
rows = sheet.max_row
columns = sheet.max_column
a = 1
做了那么多年开发,自学了很多门编程语言,我很明白学习资源对于学一门新语言的重要性,这些年也收藏了不少的Python干货,对我来说这些东西确实已经用不到了,但对于准备自学Python的人来说,或许它就是一个宝藏,可以给你省去很多的时间和精力。
别在网上瞎学了,我最近也做了一些资源的更新,只要你是我的粉丝,这期福利你都可拿走。