pandas 读取xlsx文件复制文件

精选原创

进阶的西红柿 2024-08-03 00:27:53 博主文章分类：python需求 ©著作权

©著作权归作者所有：来自51CTO博客作者进阶的西红柿的原创作品，请联系作者获取转载授权，否则将追究法律责任

pandas 读取xlsx文件复制文件

import os
import shutil
import pandas as pd


def read_excel_and_copy_files(excel_path, files_directory, destination_directory):
    # 使用pandas加载Excel文件
    df = pd.read_excel(excel_path, header=None)  # 不使用默认的header行

    # 获取I列的所有内容，并与B列内容进行匹配
    for idx, row in df.iterrows():
        if idx >= 2 and pd.notnull(row[8]):  # 从第3行开始读取，索引从0开始
            file_name = row[8]  # I列对应索引8
            category = row[1]   # B列对应索引1

            # 如果category为空，将其设置为'other'
            if pd.isnull(category):
                category = 'other'

            # 遍历文件目录，查找匹配的文件
            file_found = False
            for root, dirs, files in os.walk(files_directory):
                if file_name in files:
                    file_found = True
                    folder_name = os.path.basename(root)

                    # 构建目标路径
                    target_directory = os.path.join(destination_directory, category, folder_name)
                    os.makedirs(target_directory, exist_ok=True)

                    # 复制当前文件夹中的所有文件（排除 .rar 文件）
                    for file in files:
                        if not file.endswith('.rar'):
                            source_file = os.path.join(root, file)
                            target_file = os.path.join(target_directory, file)
                            shutil.copy2(source_file, target_file)
                            print(f"复制文件 {file} 到 {target_file}")

                    break  # 找到文件后退出循环

            # 如果没有找到文件，输出错误信息
            if not file_found:
                print(f"文件 {file_name} 未找到")


if __name__ == "__main__":
    excel_path = rf"E:\programs\read_xlsx\chenghui.xlsx"  # Excel文件路径
    files_directory = rf"E:\programs\read_xlsx\bd"  # 要匹配文件的目录
    destination_directory = rf"E:\programs\read_xlsx\new"  # 自定义路径

    read_excel_and_copy_files(excel_path, files_directory, destination_directory)

只复制文件

import os
import shutil
from openpyxl import load_workbook


def read_excel_and_copy_files(excel_path, files_directory, destination_directory):
    # 加载Excel文件
    wb = load_workbook(excel_path)
    ws = wb.active

    # 获取I列的所有内容，并与B列内容进行匹配
    for idx, row in enumerate(ws.iter_rows(min_row=3, max_col=9, min_col=9, values_only=True), start=3):
        if row[0] is not None:
            file_name = row[0]
            category = ws[f"B{idx}"].value

            # 遍历文件目录，查找匹配的文件
            for root, dirs, files in os.walk(files_directory):
                if file_name in files:
                    source_file = os.path.join(root, file_name)

                    # 构建目标路径
                    target_directory = os.path.join(destination_directory, category)
                    os.makedirs(target_directory, exist_ok=True)
                    target_file = os.path.join(target_directory, file_name)

                    # 复制文件
                    shutil.copy2(source_file, target_file)
                    print(f"复制文件 {file_name} 到 {target_file}")
                    break  # 找到文件后退出循环


if __name__ == "__main__":
    excel_path = rf"E:\programs\read_xlsx\chenghui.xlsx"  # Excel文件路径
    files_directory = rf"E:\programs\read_xlsx\bd"  # 要匹配文件的目录
    destination_directory = rf"E:\programs\read_xlsx\new"  # 自定义路径

    read_excel_and_copy_files(excel_path, files_directory, destination_directory)

升级完善版本

"""
filename：
program：requirement
author：
dater：2024/07/10

-----------------------

"""
import os
import shutil
from openpyxl import load_workbook


def read_excel_and_copy_files(excel_path, files_directory, destination_directory):
    # 加载Excel文件
    wb = load_workbook(excel_path)
    ws = wb.active

    # 获取I列的所有内容，并与B列内容进行匹配
    for idx, row in enumerate(ws.iter_rows(min_col=9, max_col=9, min_row=3, values_only=True), start=3):
        if row[0] is not None:
            file_name = row[0]
            category = ws[f"B{idx}"].value

            # 如果category为空，将其设置为'other'
            if not category:
                category = 'other'

            # 遍历文件目录，查找匹配的文件
            file_found = False
            for root, dirs, files in os.walk(files_directory):
                if file_name in files:
                    file_found = True
                    folder_name = os.path.basename(root)

                    # 构建目标路径
                    target_directory = os.path.join(destination_directory, category, folder_name)
                    os.makedirs(target_directory, exist_ok=True)

                    # 复制当前文件夹中的所有文件（排除 .rar 文件）
                    for file in files:
                        if not file.endswith('.rar'):
                            source_file = os.path.join(root, file)
                            target_file = os.path.join(target_directory, file)
                            shutil.copy2(source_file, target_file)
                            print(f"复制文件 {file} 到 {target_file}")

                    break  # 找到文件后退出循环

            # 如果没有找到文件，输出错误信息
            if not file_found:
                print(f"文件 {file_name} 未找到")


if __name__ == "__main__":
    excel_path = rf"E:\programs\read_xlsx\chenghui.xlsx"  # Excel文件路径
    files_directory = rf"E:\programs\read_xlsx\bd"  # 要匹配文件的目录
    destination_directory = rf"E:\programs\read_xlsx\new"  # 自定义路径

    read_excel_and_copy_files(excel_path, files_directory, destination_directory)