import csv  # 导入csv包
import pandas as pd  # 导入pandas包,用于建立csv文件
"""
问题一:浏览三个CSV文件,解释每个文件所含信息和之间的关联(写在代码
开头的注释里)。

①在class-descriptions-boxable.csv文件中,从左往右分别是
LabelName(标签名称)、Title(标题);

②在validation-annotations-human-imagelabels-boxable.csv文件中,从左往右分别是
ImageID(图像ID),Source(图像来源),LabelName(标签名称),Confidence(图像某个小区域是否被用作识别目标);

③在validation-images-with-rotation.csv文件中,从左往右分别是
ImageID(图像ID),Subset(子集),OriginalURL(原始url地址),OriginalLandingURL(原始下载url地址),License(许可),
AuthorProfileURL(作者配置文件地址),Author(作者),Title(标题),OriginalSize(原始大小),OriginalMD5(原始MD5值),
Thumbnail300KURL(约300K像素的缩略图的可选网址),Rotation(旋转)。

④关联:
class-descriptions-boxable.csv文件与validation-annotations-human-imagelabels-boxable.csv文件
含有共同的内容是LabelName(标签名称);
class-descriptions-boxable.csv文件与validation-images-with-rotation.csv文件
含有共同的内容是Title(标题);
validation-annotations-human-imagelabels-boxable.csv文件与validation-images-with-rotation.csv文件
含有共同的内容是ImageID(图像ID);
三者可以通过共同的内容互相定位和检索其他内容。


问题二:对于Cat、Dog、Rabbit三个类,分别找到包含该类且Confidence为1的
所有图片ID(ImageID),各自存入一个列表中。
"""
# 使用open函数打开class-descriptions-boxable.csv文件
with open("./oidv6/class-descriptions-boxable.csv") as cdb:
    reader = csv.reader(cdb)
    Title = [row[1] for row in reader]  # 获取第二列的Title
    # 利用enumerate(x)函数列表Title中的所有项,获取指定元素(类)的索引位置并保存到列表中
    Title_cat = [i for i, x in enumerate(Title) if x == "Cat"]
    Title_dog = [i for i, x in enumerate(Title) if x == "Dog"]
    Title_rabbit = [i for i, x in enumerate(Title) if x == "Rabbit"]


# 获取class-descriptions-boxable.csv文件第一列的LabelName并保存到列表中
with open("./oidv6/class-descriptions-boxable.csv") as cdb:
    reader = csv.reader(cdb)
    LabelName_1 = [row[0] for row in reader]

# 遍历列表LabelName_1,根据每个类的Title列表中的索引位置,获取并保存每个类的标签名称至其列表中
    for i in LabelName_1:
        LabelName_cat = []
        for x in Title_cat:
            LabelName_cat.append(LabelName_1[int(x)])

        LabelName_dog = []
        for y in Title_dog:
            LabelName_dog.append(LabelName_1[int(y)])

        LabelName_rabbit = []
        for z in Title_rabbit:
            LabelName_rabbit.append(LabelName_1[int(z)])

# 使用open函数打开validation-annotations-human-imagelabels-boxable.csv文件
with open("./oidv6/validation-annotations-human-imagelabels-boxable.csv") as vahib:
    reader = csv.reader(vahib)
    ImageID = [row[0] for row in reader]             # 获取第一列的ImageID
with open("./oidv6/validation-annotations-human-imagelabels-boxable.csv") as vahib:
    reader = csv.reader(vahib)
    LabelName_2 = [row[2] for row in reader]         # 获取第一列的LabelName
with open("./oidv6/validation-annotations-human-imagelabels-boxable.csv") as vahib:
    reader = csv.reader(vahib)
    Confidence = [row[3] for row in reader]          # 获取第三列的Confidence

# 创建类Cat、Dog、Rabbit的ImageID列表,用于保存其图像ID
    ImageID_cat = []
    ImageID_dog = []
    ImageID_rabbit = []

# 遍历列表LabelName_2,找到类Cat、Dog、Rabbit且Confidence为1的ImageID并保存为列表
    for i in range(len(LabelName_2)):
        for a in range(len(LabelName_cat)):
            if LabelName_cat[a] == LabelName_2[i] and Confidence[i] == '1':
                ImageID_cat.append(ImageID[i])
        for b in range(len(LabelName_dog)):
            if LabelName_dog[b] == LabelName_2[i] and Confidence[i] == '1':
                ImageID_dog.append(ImageID[i])
        for c in range(len(LabelName_rabbit)):
            if LabelName_rabbit[c] == LabelName_2[i] and Confidence[i] == '1':
                ImageID_rabbit.append(ImageID[i])


print("Cat类图片ID列表:")
print(ImageID_cat)
print("Dog类图片ID列表:")
print(ImageID_dog)
print("Rabbit类图片ID列表:")
print(ImageID_rabbit)


"""
问题三:找到2中图片的下载地址(OriginalURL),将每个类的结果分别写入
一个CSV文件(包含ImageID和OriginalURL两列,第一行为标题行),
分别命名为cat.csv, dog.csv和rabbit.csv。
"""
# 使用open函数打开validation-images-with-rotation.csv文件,获取ImageID和OriginalURL
with open("./oidv6/validation-images-with-rotation.csv") as viwr:
    reader = csv.reader(viwr)
    ImageID = [row[0] for row in reader]
with open("./oidv6/validation-images-with-rotation.csv") as viwr:
    reader = csv.reader(viwr)
    OriginalURL = [row[3] for row in reader]

# 创建类Cat、Dog、Rabbit的OriginalURL列表,用于保存其图像下载地址
OriginalURL_cat = []
OriginalURL_dog = []
OriginalURL_rabbit = []

# 遍历列表ImageID,找到类Cat、Dog、Rabbit的OriginalURL并保存为列表
for i in range(len(ImageID)):
    for d in range(len(ImageID_cat)):
        if ImageID_cat[d] == ImageID[i]:
            OriginalURL_cat.append(OriginalURL[i])
    for e in range(len(ImageID_dog)):
        if ImageID_dog[e] == ImageID[i]:
            OriginalURL_dog.append(OriginalURL[i])
    for f in range(len(ImageID_rabbit)):
        if ImageID_rabbit[f] == ImageID[i]:
            OriginalURL_rabbit.append(OriginalURL[i])


# 将每个类的结果分别写入CSV文件(注意去掉索引)
cat = pd.DataFrame({"ImageID": ImageID_cat, "OriginalURL": OriginalURL_cat})
cat.to_csv("cat.csv", encoding='utf-8', index=False)

dog = pd.DataFrame({"ImageID": ImageID_dog, "OriginalURL": OriginalURL_dog})
dog.to_csv("dog.csv", encoding='utf-8', index=False)

rabbit = pd.DataFrame(
    {"ImageID": ImageID_rabbit, "OriginalURL": OriginalURL_rabbit})
rabbit.to_csv("rabbit.csv", encoding='utf-8', index=False)