import pandas as pd

df = pd.read_csv('/home/weidu/桌面/first1.csv',header=None)
data = df.values
first_col = []
third_col = []
for i in range(len(data)):
    first_col.append(data[i][0])
    third_col.append((data[i][2]))
# print(len(first_col))
# # print(first_col)
# # print(len(third_col))
# # print((third_col))


dict_data = dict(zip(first_col, third_col))
print(dict_data)



lminalB_list = []
ERBB2_list = []
TNBC_list = []

for key,value in dict_data.items():
    key = key.replace('_', '-')
    if value == 'lminalB':
        lminalB_list.append(key)
    if value == 'ERBB2':
        ERBB2_list.append(key)
    if value == 'TNBC':
        TNBC_list.append(key)


print(len(lminalB_list))##TCGA_A1_A0SM',将下划线转为-
print(lminalB_list)
print(len(ERBB2_list))
print(ERBB2_list)
print(len(TNBC_list))
print(TNBC_list)


####下面这是匹配操作
# 1、先遍历brca299文件夹
# 2、将上诉文件夹中对应的样本存入lminalB和ERBB2中
#     2.1、先复制文件夹a、再将文件夹a中的所有图片复制到文件a中

import os
import shutil
from shutil import move

# 目标list,但是需要操作的文件名可能是a
# list = ['TCGA_A1_A0SE','TCGA_A1_A0SF','TCGA-BH-A0HS']

# path1 = "/home/weidu/桌面/test" #需要复制的文件所在地址
path1 = "/home/weidu/桌面/brca299" #需要复制的文件所在地址
# path2 = "/home/weidu/桌面/subtype/ERBB2" #目标地址
path2 = "/home/weidu/桌面/subtype/TNBC" #目标地址
filename_list = os.listdir(path1)
print(len(filename_list))
print(filename_list)
qwb = 0

import shutil
from shutil import copy
from shutil import move

for filename in filename_list:
    portion = os.path.splitext(filename)
    # print(portion)
    if portion[1]!= '.dzi':
        YuanPath = path1 + '/' + portion[0] +portion[1]
        print("样本路径:",YuanPath)##这个test是brca中的样本文件夹
        # Patient_id = portion[0] +portion[1]
        print("样本名:",filename)
        for i in range(len(TNBC_list)):
            # if filename.startswith(ERBB2_list[i]):
            if filename.startswith(TNBC_list[i]):

                ###下面这个是先创建目标(样例)文件夹
                MuBiaoPath = path2 + '/' + portion[0] +portion[1]
                if not os.path.exists(MuBiaoPath):
                    os.makedirs(MuBiaoPath)####创建目标文件夹
        ####---------------------------------------
                test_list = os.listdir(YuanPath)###os.listdir() 方法用于返回指定的文件夹包含的文件或文件夹的名字的列表。
        # print(test_list)###三级子目录
                for testname in test_list:  # 遍历列表下的所有文件名
                    print(testname)  ##图片的上级目录0.625等等
                    YuanPath_2 = YuanPath + '/' + testname
                    print("分割等级:", YuanPath_2)
                    test2_list = os.listdir(YuanPath_2)

                    print("图片列表:",test2_list)###图片列表
                    if len(test2_list) != 0:
                        for test2name in test2_list:
                            print(test2name)##最后一层中的所有图片
                            filename = YuanPath_2 + '/' + test2name
                            copy(filename, MuBiaoPath)
                            qwb = qwb + 1
                            print("匹配成功", qwb, "个")
        print("----------------------------")###遍历文件夹一定要加绝对路劲




            # for i in range(len(rulist)):
            #     # print(list[i])
            #     if testname.startswith(rulist[i]):
            #     # if testname.endswith('.partial'):
            #         filename = path1 + '/' + portion[0]
            #         newname =  path2 + '/' + portion[0]
            #         move(filename, newname)
            #         qwb = qwb + 1
            #         print("匹配成功",qwb,"个")