python批量提取sheet Python批量提取txt中指定内容

转载

jowvid 2023-12-20 10:00:41

文章标签 python批量提取sheet python txt文件文件名数据集 文章分类 Python 后端开发

本段代码为根据想要内容（数字1），把所有txt文件中有数字1的复制保存至其他文件夹

# 提取全部txt文件中 含有pf缺陷的txt文件 保存至其他文件夹中

import os
import shutil

# 读取给定目录下的所有 .txt 文件的文件名
def read_txt_files(path):
    # 创建文件名列表
    file_names = []
    # 遍历给定目录及其子目录下的所有文件
    for root, dirs, files in os.walk(path):
        # 遍历所有文件
        for file in files:
            # 如果是 .txt 文件，则加入文件名列表
            if file.endswith('.txt'): # endswith () 方法用于判断字符串是否以指定后缀结尾，如果以指定后缀结尾返回True，否则返回False。
                file_names.append(os.path.join(root, file))
    # 返回文件名列表
    return file_names

# 读取 .txt 文件中的每一行，并返回每一行的第一个元素的列表
def read_txt_file(file_name):
    # 打开文件
    with open(file_name, 'r') as file:
        # 读取文件中的所有行
        lines = file.readlines()
    # 返回每一行的第一个元素的列表
    first_elements = [line.split()[0] for line in lines]
    return first_elements

# 主函数
def main(path,new_save_path ):
    # 获取所有 .txt 文件的文件名
    file_names = read_txt_files(path)
    # 遍历所有 .txt 文件
    sum=0
    name=[]
    for file_name in file_names:
        # name.append(file_name.split(".txt")[0])  # 提取txt文件名
        # src_file_list=[]
        # img_src_dir = r"D:\YWJ\数据集处理0211\数据清洗"
        # for imges_all in name:
        #     src_file_list.append(img_src_dir + imges_all + ".jpg")  # 提取txt文件名对应的jpg文件
        # for srcfile in src_file_list:
        #     shutil.copy(srcfile, r'D:\YWJ\数据集处理0211\only_PF_img')


        # 读取 .txt 文件中的每一行的第一个元素
        first_elements = read_txt_file(file_name)
        # 如果列表中存在数字 1，则输出该文件的文件名 并且复制到指定文件夹保存
        if '1' in first_elements:
            sum+=1
            print('File with number 1 found:', file_name)
            src_file = os.path.join(path, file_name)
            # dst_file = os.path.join(new_save_path,file_name)
            dst_file = new_save_path
            if os.path.abspath(src_file) != os.path.abspath(dst_file):
                shutil.copy(src_file, dst_file)
            else:
                print(f'{src_file} and {dst_file} are the same file, skipping copy.')
    print("共输出",sum)


# 如果当前脚本被直接运行，则调用主函数
if __name__ == '__main__':
    # 设置 .txt 文件所在目录的路径
    path = r"D:\YWJ\数据集处理0211\all_abels"
    new_save_path = r"D:\YWJ\数据集处理0211\SJD陷\SJD_label"
    main(path,new_save_path)

下端代码为根据txt文件名提取相对于img图片：

# srcfile 需要复制、移动的文件
# dstpath 目的地址
# 代码实现功能，根据txt文件名提取并提取出对应文件名的图片  功能一  要修改
# 根据images图片名提取并提出对应的txt文件名            功能二  要修改
import os
import shutil
from glob import glob
def mycopyfile(srcfile, dstpath):  # 复制函数
    if not os.path.isfile(srcfile):
        print("%s not exist!" % (srcfile))
    else:
        fpath, fname = os.path.split(srcfile)  # 分离文件名和路径
        if not os.path.exists(dstpath):
            os.makedirs(dstpath)  # 创建路径
        shutil.copy(srcfile, dstpath + fname)  # 复制文件
        print("copy %s -> %s" % (srcfile, dstpath + fname))



src_dir = r'D:\YWJ\数据集处理0211\数据清洗/'  # 要提取对应图片的文件夹路径  or  txt提取  记得加斜杠
dst_dir = r'D:\YWJ\数据集处理0211\ytx缺陷\ytx_IMG/'  # 目的路径记得加斜杠 保存的地址
txt_path = r'D:\YWJ\数据集处理0211\ytx缺陷\ytx_label'     # txt文件名提取 or jpg图片提取
name=[]
# with open(txt_path) as f1:
txt_names = os.listdir(txt_path)
for i in range(len(txt_names)):
    # per_txt_path=os.path.join(txt_path,txt_names[i].split(".txt")[0]+".txt")
    name.append(txt_names[i].split(".txt")[0])   # 提取txt文件名
    # name.append(txt_names[i].split(".jpg")[0])   # 提取jpg文件名


src_file_list=[]
for imges_all in name:
    src_file_list.append(src_dir+imges_all+".jpg")  #提取txt文件名对应的jpg文件
    # src_file_list.append(src_dir+imges_all+".txt")  #提取图片对应的txt文件
# print(src_file_list)
for srcfile in src_file_list:
    mycopyfile(srcfile, dst_dir)  # 复制文件

本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。