import csv # 导入csv包
import pandas as pd # 导入pandas包,用于建立csv文件
"""
问题一:浏览三个CSV文件,解释每个文件所含信息和之间的关联(写在代码
开头的注释里)。
①在class-descriptions-boxable.csv文件中,从左往右分别是
LabelName(标签名称)、Title(标题);
②在validation-annotations-human-imagelabels-boxable.csv文件中,从左往右分别是
ImageID(图像ID),Source(图像来源),LabelName(标签名称),Confidence(图像某个小区域是否被用作识别目标);
③在validation-images-with-rotation.csv文件中,从左往右分别是
ImageID(图像ID),Subset(子集),OriginalURL(原始url地址),OriginalLandingURL(原始下载url地址),License(许可),
AuthorProfileURL(作者配置文件地址),Author(作者),Title(标题),OriginalSize(原始大小),OriginalMD5(原始MD5值),
Thumbnail300KURL(约300K像素的缩略图的可选网址),Rotation(旋转)。
④关联:
class-descriptions-boxable.csv文件与validation-annotations-human-imagelabels-boxable.csv文件
含有共同的内容是LabelName(标签名称);
class-descriptions-boxable.csv文件与validation-images-with-rotation.csv文件
含有共同的内容是Title(标题);
validation-annotations-human-imagelabels-boxable.csv文件与validation-images-with-rotation.csv文件
含有共同的内容是ImageID(图像ID);
三者可以通过共同的内容互相定位和检索其他内容。
问题二:对于Cat、Dog、Rabbit三个类,分别找到包含该类且Confidence为1的
所有图片ID(ImageID),各自存入一个列表中。
"""
# 使用open函数打开class-descriptions-boxable.csv文件
with open("./oidv6/class-descriptions-boxable.csv") as cdb:
reader = csv.reader(cdb)
Title = [row[1] for row in reader] # 获取第二列的Title
# 利用enumerate(x)函数列表Title中的所有项,获取指定元素(类)的索引位置并保存到列表中
Title_cat = [i for i, x in enumerate(Title) if x == "Cat"]
Title_dog = [i for i, x in enumerate(Title) if x == "Dog"]
Title_rabbit = [i for i, x in enumerate(Title) if x == "Rabbit"]
# 获取class-descriptions-boxable.csv文件第一列的LabelName并保存到列表中
with open("./oidv6/class-descriptions-boxable.csv") as cdb:
reader = csv.reader(cdb)
LabelName_1 = [row[0] for row in reader]
# 遍历列表LabelName_1,根据每个类的Title列表中的索引位置,获取并保存每个类的标签名称至其列表中
for i in LabelName_1:
LabelName_cat = []
for x in Title_cat:
LabelName_cat.append(LabelName_1[int(x)])
LabelName_dog = []
for y in Title_dog:
LabelName_dog.append(LabelName_1[int(y)])
LabelName_rabbit = []
for z in Title_rabbit:
LabelName_rabbit.append(LabelName_1[int(z)])
# 使用open函数打开validation-annotations-human-imagelabels-boxable.csv文件
with open("./oidv6/validation-annotations-human-imagelabels-boxable.csv") as vahib:
reader = csv.reader(vahib)
ImageID = [row[0] for row in reader] # 获取第一列的ImageID
with open("./oidv6/validation-annotations-human-imagelabels-boxable.csv") as vahib:
reader = csv.reader(vahib)
LabelName_2 = [row[2] for row in reader] # 获取第一列的LabelName
with open("./oidv6/validation-annotations-human-imagelabels-boxable.csv") as vahib:
reader = csv.reader(vahib)
Confidence = [row[3] for row in reader] # 获取第三列的Confidence
# 创建类Cat、Dog、Rabbit的ImageID列表,用于保存其图像ID
ImageID_cat = []
ImageID_dog = []
ImageID_rabbit = []
# 遍历列表LabelName_2,找到类Cat、Dog、Rabbit且Confidence为1的ImageID并保存为列表
for i in range(len(LabelName_2)):
for a in range(len(LabelName_cat)):
if LabelName_cat[a] == LabelName_2[i] and Confidence[i] == '1':
ImageID_cat.append(ImageID[i])
for b in range(len(LabelName_dog)):
if LabelName_dog[b] == LabelName_2[i] and Confidence[i] == '1':
ImageID_dog.append(ImageID[i])
for c in range(len(LabelName_rabbit)):
if LabelName_rabbit[c] == LabelName_2[i] and Confidence[i] == '1':
ImageID_rabbit.append(ImageID[i])
print("Cat类图片ID列表:")
print(ImageID_cat)
print("Dog类图片ID列表:")
print(ImageID_dog)
print("Rabbit类图片ID列表:")
print(ImageID_rabbit)
"""
问题三:找到2中图片的下载地址(OriginalURL),将每个类的结果分别写入
一个CSV文件(包含ImageID和OriginalURL两列,第一行为标题行),
分别命名为cat.csv, dog.csv和rabbit.csv。
"""
# 使用open函数打开validation-images-with-rotation.csv文件,获取ImageID和OriginalURL
with open("./oidv6/validation-images-with-rotation.csv") as viwr:
reader = csv.reader(viwr)
ImageID = [row[0] for row in reader]
with open("./oidv6/validation-images-with-rotation.csv") as viwr:
reader = csv.reader(viwr)
OriginalURL = [row[3] for row in reader]
# 创建类Cat、Dog、Rabbit的OriginalURL列表,用于保存其图像下载地址
OriginalURL_cat = []
OriginalURL_dog = []
OriginalURL_rabbit = []
# 遍历列表ImageID,找到类Cat、Dog、Rabbit的OriginalURL并保存为列表
for i in range(len(ImageID)):
for d in range(len(ImageID_cat)):
if ImageID_cat[d] == ImageID[i]:
OriginalURL_cat.append(OriginalURL[i])
for e in range(len(ImageID_dog)):
if ImageID_dog[e] == ImageID[i]:
OriginalURL_dog.append(OriginalURL[i])
for f in range(len(ImageID_rabbit)):
if ImageID_rabbit[f] == ImageID[i]:
OriginalURL_rabbit.append(OriginalURL[i])
# 将每个类的结果分别写入CSV文件(注意去掉索引)
cat = pd.DataFrame({"ImageID": ImageID_cat, "OriginalURL": OriginalURL_cat})
cat.to_csv("cat.csv", encoding='utf-8', index=False)
dog = pd.DataFrame({"ImageID": ImageID_dog, "OriginalURL": OriginalURL_dog})
dog.to_csv("dog.csv", encoding='utf-8', index=False)
rabbit = pd.DataFrame(
{"ImageID": ImageID_rabbit, "OriginalURL": OriginalURL_rabbit})
rabbit.to_csv("rabbit.csv", encoding='utf-8', index=False)