gpt4 book ai didi

Python 统计数据集标签的类别及数目操作

转载 作者:qq735679552 更新时间:2022-09-27 22:32:09 25 4
gpt4 key购买 nike

CFSDN坚持开源创造价值,我们致力于搭建一个资源共享平台,让每一个IT人在这里找到属于你的精彩世界.

这篇CFSDN的博客文章Python 统计数据集标签的类别及数目操作由作者收集整理,如果你对这篇文章有兴趣,记得点赞哟.

看了大神统计voc数据集标签框后,针对自己标注数据集,灵活应用 ,感谢! 。

看代码吧~

import reimport osimport xml.etree.ElementTree as ETclass1 = "answer"class2 = "hand"class3 = "write"class4 = "music"class5 = "phone""""class6 = "bus"class7 = "car"class8 = "cat"class9 = "chair"class10 = "cow"class11 = "diningtable"class12 = "dog"class13 = "horse"class14 = "motorbike"class15 = "person"class16 = "pottedplant"class17 = "sheep"class18 = "sofa"class19 = "train"class20 = "tvmonitor""""annotation_folder = "/home/.../train/"		#改为自己标签文件夹的路径#annotation_folder = "/home/.../VOC2007/Annotations/"list = os.listdir(annotation_folder)  def file_name(file_dir):	L = []	for root, dirs, files in os.walk(file_dir):		for file in files:			if os.path.splitext(file)[1] == ".xml":				L.append(os.path.join(root, file))	return L  total_number1 = 0total_number2 = 0total_number3 = 0total_number4 = 0total_number5 = 0"""total_number6 = 0total_number7 = 0total_number8 = 0total_number9 = 0total_number10 = 0total_number11 = 0total_number12 = 0total_number13 = 0total_number14 = 0total_number15 = 0total_number16 = 0total_number17 = 0total_number18 = 0total_number19 = 0total_number20 = 0"""total = 0total_pic=0 pic_num1 = 0pic_num2 = 0pic_num3 = 0pic_num4 = 0pic_num5 = 0"""pic_num6 = 0pic_num7 = 0pic_num8 = 0pic_num9 = 0pic_num10 = 0pic_num11 = 0pic_num12 = 0pic_num13 = 0pic_num14 = 0pic_num15 = 0pic_num16 = 0pic_num17 = 0pic_num18 = 0pic_num19 = 0pic_num20 = 0""" flag1 = 0flag2 = 0flag3 = 0flag4 = 0flag5 = 0"""flag6 = 0flag7 = 0flag8 = 0flag9 = 0flag10 = 0flag11 = 0flag12 = 0flag13 = 0flag14 = 0flag15= 0flag16 = 0flag17 = 0flag18 = 0flag19 = 0flag20 = 0""" xml_dirs = file_name(annotation_folder) for i in range(0, len(xml_dirs)):	print(xml_dirs[i])	#path = os.path.join(annotation_folder,list[i])	#print(path) 	annotation_file = open(xml_dirs[i]).read() 	root = ET.fromstring(annotation_file)	#tree = ET.parse(annotation_file)	#root = tree.getroot() 	total_pic = total_pic + 1	for obj in root.findall("object"):		label = obj.find("name").text		if label == class1:			total_number1=total_number1+1			flag1=1			total = total + 1			#print("bounding box number:", total_number1)		if label == class2:			total_number2=total_number2+1			flag2=1			total = total + 1		if label == class3:			total_number3=total_number3+1			flag3=1			total = total + 1		if label == class4:			total_number4=total_number4+1			flag4=1			total = total + 1		if label == class5:			total_number5=total_number5+1			flag5=1			total = total + 1		"""if label == class6:			total_number6=total_number6+1			flag6=1			total = total + 1		if label == class7:			total_number7=total_number7+1			flag7=1			total = total + 1		if label == class8:			total_number8=total_number8+1			flag8=1			total = total + 1		if label == class9:			total_number9=total_number9+1			flag9=1			total = total + 1		if label == class10:			total_number10=total_number10+1			flag10=1			total = total + 1		if label == class11:			total_number11=total_number11+1			flag11=1			total = total + 1		if label == class12:			total_number12=total_number12+1			flag12=1			total = total + 1		if label == class13:			total_number13=total_number13+1			flag13=1			total = total + 1		if label == class14:			total_number14=total_number14+1			flag14=1			total = total + 1		if label == class15:			total_number15=total_number15+1			flag15=1			total = total + 1		if label == class16:			total_number16=total_number16+1			flag16=1			total = total + 1		if label == class17:			total_number17=total_number17+1			flag17=1			total = total + 1		if label == class18:			total_number18=total_number18+1			flag18=1			total = total + 1		if label == class19:			total_number19=total_number19+1			flag19=1			total = total + 1		if label == class20:			total_number20=total_number20+1			flag20=1			total = total + 1""" 	if flag1==1:		pic_num1=pic_num1+1		#print("pic number:", pic_num1)		flag1=0	if flag2==1:		pic_num2=pic_num2+1		flag2=0	if flag3==1:		pic_num3=pic_num3+1		flag3=0	if flag4==1:		pic_num4=pic_num4+1		flag4=0	if flag5==1:		pic_num5=pic_num5+1		flag5=0	"""if flag6==1:		pic_num6=pic_num6+1		flag6=0	if flag7==1:		pic_num7=pic_num7+1		flag7=0	if flag8==1:		pic_num8=pic_num8+1		flag8=0	if flag9==1:		pic_num9=pic_num9+1		flag9=0	if flag10==1:		pic_num10=pic_num10+1		flag10=0	if flag11==1:		pic_num11=pic_num11+1		flag11=0	if flag12==1:		pic_num12=pic_num12+1		flag12=0	if flag13==1:		pic_num13=pic_num13+1		flag13=0	if flag14==1:		pic_num14=pic_num14+1		flag14=0	if flag15==1:		pic_num15=pic_num15+1		flag15=0	if flag16==1:		pic_num16=pic_num16+1		flag16=0	if flag17==1:		pic_num17=pic_num17+1		flag17=0	if flag18==1:		pic_num18=pic_num18+1		flag18=0	if flag19==1:		pic_num19=pic_num19+1		flag19=0	if flag20==1:		pic_num20=pic_num20+1		flag20=0"""  print(class1,pic_num1,total_number1)print(class2,pic_num2,total_number2)print(class3,pic_num3, total_number3)print(class4,pic_num4, total_number4)print(class5,pic_num5, total_number5)"""print(class6,pic_num6, total_number6)print(class7,pic_num7, total_number7)print(class8,pic_num8, total_number8)print(class9,pic_num9, total_number9)print(class10,pic_num10, total_number10)print(class11,pic_num11,total_number11)print(class12,pic_num12,total_number12)print(class13,pic_num13, total_number13)print(class14,pic_num14, total_number14)print(class15,pic_num15, total_number15)print(class16,pic_num16, total_number16)print(class17,pic_num17, total_number17)print(class18,pic_num18, total_number18)print(class19,pic_num19, total_number19)print(class20,pic_num20, total_number20)""" print("total", total_pic, total) 

补充:【数据集处理】Python对目标检测数据集xml文件操作(统计目标种类、数量、面积、比例等&修改目标名字) 。

1. 根据xml文件统计目标种类以及数量

# -*- coding:utf-8 -*-#根据xml文件统计目标种类以及数量import osimport xml.etree.ElementTree as ETimport numpy as npnp.set_printoptions(suppress=True, threshold=np.nan)import matplotlibfrom PIL import Image def parse_obj(xml_path, filename):  tree=ET.parse(xml_path+filename)  objects=[]  for obj in tree.findall("object"):    obj_struct={}    obj_struct["name"]=obj.find("name").text    objects.append(obj_struct)  return objects  def read_image(image_path, filename):  im=Image.open(image_path+filename)  W=im.size[0]  H=im.size[1]  area=W*H  im_info=[W,H,area]  return im_info  if __name__ == "__main__":  xml_path="/home/dlut/网络/make_database/数据集――合集/VOCdevkit/VOC2018/Annotations/"  filenamess=os.listdir(xml_path)  filenames=[]  for name in filenamess:    name=name.replace(".xml","")    filenames.append(name)  recs={}  obs_shape={}  classnames=[]  num_objs={}  obj_avg={}  for i,name in enumerate(filenames):    recs[name]=parse_obj(xml_path, name+ ".xml" )  for name in filenames:    for object in recs[name]:      if object["name"] not in num_objs.keys():         num_objs[object["name"]]=1      else:         num_objs[object["name"]]+=1      if object["name"] not in classnames:         classnames.append(object["name"])  for name in classnames:    print("{}:{}个".format(name,num_objs[name]))  print("信息统计算完毕。")

Python 统计数据集标签的类别及数目操作

2.根据xml文件统计目标的平均长度、宽度、面积以及每一个目标在原图中的占比

# -*- coding:utf-8 -*-#统计# 计算每一个目标在原图中的占比# 计算目标的平均长度、# 计算平均宽度,# 计算平均面积、# 计算目标平均占比import osimport xml.etree.ElementTree as ETimport numpy as np#np.set_printoptions(suppress=True, threshold=np.nan)  #10,000,000np.set_printoptions(suppress=True, threshold=10000000)  #10,000,000import matplotlibfrom PIL import Imagedef parse_obj(xml_path, filename):    tree = ET.parse(xml_path + filename)    objects = []    for obj in tree.findall("object"):        obj_struct = {}        obj_struct["name"] = obj.find("name").text        bbox = obj.find("bndbox")        obj_struct["bbox"] = [int(bbox.find("xmin").text),                              int(bbox.find("ymin").text),                              int(bbox.find("xmax").text),                              int(bbox.find("ymax").text)]        objects.append(obj_struct)    return objectsdef read_image(image_path, filename):    im = Image.open(image_path + filename)    W = im.size[0]    H = im.size[1]    area = W * H    im_info = [W, H, area]    return im_infoif __name__ == "__main__":    image_path = "/home/dlut/网络/make_database/数据集――合集/VOCdevkit/VOC2018/JPEGImages/"    xml_path = "/home/dlut/网络/make_database/数据集――合集/VOCdevkit/VOC2018/Annotations/"    filenamess = os.listdir(xml_path)    filenames = []    for name in filenamess:        name = name.replace(".xml", "")        filenames.append(name)    print(filenames)    recs = {}    ims_info = {}    obs_shape = {}    classnames = []    num_objs={}    obj_avg = {}    for i, name in enumerate(filenames):        print("正在处理 {}.xml ".format(name))        recs[name] = parse_obj(xml_path, name + ".xml")        print("正在处理 {}.jpg ".format(name))        ims_info[name] = read_image(image_path, name + ".jpg")    print("所有信息收集完毕。")    print("正在处理信息......")    for name in filenames:        im_w = ims_info[name][0]        im_h = ims_info[name][1]        im_area = ims_info[name][2]        for object in recs[name]:            if object["name"] not in num_objs.keys():                num_objs[object["name"]] = 1            else:                num_objs[object["name"]] += 1            #num_objs += 1            ob_w = object["bbox"][2] - object["bbox"][0]            ob_h = object["bbox"][3] - object["bbox"][1]            ob_area = ob_w * ob_h            w_rate = ob_w / im_w            h_rate = ob_h / im_h            area_rate = ob_area / im_area            if not object["name"] in obs_shape.keys():                obs_shape[object["name"]] = ([[ob_w,                                               ob_h,                                               ob_area,                                               w_rate,                                               h_rate,                                               area_rate]])            else:                obs_shape[object["name"]].append([ob_w,                                                  ob_h,                                                  ob_area,                                                  w_rate,                                                  h_rate,                                                  area_rate])        if object["name"] not in classnames:            classnames.append(object["name"])  # 求平均    for name in classnames:        obj_avg[name] = (np.array(obs_shape[name]).sum(axis=0)) / num_objs[name]        print("{}的情况如下:*******".format(name))        print("  目标平均W={}".format(obj_avg[name][0]))        print("  目标平均H={}".format(obj_avg[name][1]))        print("  目标平均area={}".format(obj_avg[name][2]))        print("  目标平均与原图的W比例={}".format(obj_avg[name][3]))        print("  目标平均与原图的H比例={}".format(obj_avg[name][4]))        print("  目标平均原图面积占比={}".format(obj_avg[name][5]))    print("信息统计计算完毕。")

Python 统计数据集标签的类别及数目操作

3.修改xml文件中某个目标的名字为另一个名字

#修改xml文件中的目标的名字,import os, sysimport globfrom xml.etree import ElementTree as ET# 批量读取Annotations下的xml文件# per=ET.parse(r"C:UsersockhuangDesktopAnnotations00003.xml")xml_dir = r"/home/dlut/网络/make_database/数据集――合集/VOCdevkit/VOC2018/Annotations"xml_list = glob.glob(xml_dir + "/*.xml")for xml in xml_list:    print(xml)    per = ET.parse(xml)    p = per.findall("/object")    for oneper in p:  # 找出person节点        child = oneper.getchildren()[0]  # 找出person节点的子节点        if child.text == "PinNormal":   #需要修改的名字            child.text = "normal bolt"    #修改成什么名字        if child.text == "PinDefect":    #需要修改的名字            child.text = "defect bolt-1"   #修改成什么名字    per.write(xml)    print(child.tag, ":", child.text)

Python 统计数据集标签的类别及数目操作

修改为:

Python 统计数据集标签的类别及数目操作

以上为个人经验,希望能给大家一个参考,也希望大家多多支持我.

原文链接:https://blog.csdn.net/weixin_41991401/article/details/89517903 。

最后此篇关于Python 统计数据集标签的类别及数目操作的文章就讲到这里了,如果你想了解更多关于Python 统计数据集标签的类别及数目操作的内容请搜索CFSDN的文章或继续浏览相关文章,希望大家以后支持我的博客! 。

25 4 0