반응형
안녕하세요
데이터셋을 모으다보면 제가 사용하고있는 데이터로더의 형태와 맞지않은 annotation 파일들을 볼 수가 있습니다.
그런 경우 dataloader를 annotation파일에 맞게 수정을 진행을 하거나
annotation 파일 형식을 현재 제가 사용하고있는 dataloader형식에 맞추어 사용하고 있습니다.
이번에는 후자의 경우에 어떤식으로 바꾸는지 알아보는 시간을 가지도록 하겠습니다.
우선 제가 다운받은 데이터셋의 annotation 파일의 형태를 살펴봐야합니다. 그리고 제가 사용하고있는 dataloader는 어떤 데이터를 필요로 하는지 살펴봐야할텐데요.
여기서 제가 원하는 데이터는 category, box2d, name정도 입니다.
import os
import json
from xml.etree.ElementTree import Element, SubElement, ElementTree
import cv2
"""
bdd100k dataset json structure convert to xml file
bbox structure x_min, y_min. x_max, y_max
"""
def check_double_slash(name):
if not name.find('//') == -1:
name = name.replace('//','/')
return name
def createFolder(directory):
try:
if not os.path.isdir(directory):
os.makedirs(directory)
except OSError:
print("Error: creating directory," + directory)
def indent(elem, level=1):
i = "\n" + level*" "
if len(elem):
if not elem.text or not elem.text.strip():
elem.text = i + " "
if not elem.tail or not elem.tail.strip():
elem.tail = i
for elem in elem:
indent(elem, level+1)
if not elem.tail or not elem.tail.strip():
elem.tail = i
else:
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
def write_to_xml(result, IMAGE_PATH, XML_SAVE_PATH, IMAGE_SAVE_PATH):
#get image_name
image_name = result['name']
#get bboxes
"""
category, x_min, y_min, x_max, y_max
"""
bboxes_list = result['bbox']
#get category
category_list = result['category']
root = Element('Annotations')
SubElement(root,'filename').text = image_name
img = cv2.imread(os.path.join(IMAGE_PATH,image_name))
h,w,d = img.shape
size = SubElement(root,'size')
SubElement(size,'width').text = str(w)
SubElement(size,'height').text = str(h)
SubElement(size,'depth').text = str(d)
for i in range(len(bboxes_list)):
category = category_list[i]
x_min = int(bboxes_list[i]['x1'])
y_min = int(bboxes_list[i]['y1'])
x_max = int(bboxes_list[i]['x2'])
y_max = int(bboxes_list[i]['y2'])
obj = SubElement(root,'object')
SubElement(obj,'name').text = str(category)
SubElement(obj,'difficult').text = str(0)
bndbox = SubElement(obj,'bndbox')
SubElement(bndbox,'xmin').text = str(x_min)
SubElement(bndbox,'ymin').text = str(y_min)
SubElement(bndbox,'xmax').text = str(x_max)
SubElement(bndbox,'ymax').text = str(y_max)
tree = ElementTree(root)
indent(root)
anno_path = XML_SAVE_PATH+"/"+image_name.split('.')[0] +'.xml'
anno_path = check_double_slash(anno_path)
img_path = IMAGE_SAVE_PATH+"/"+image_name
img_path = check_double_slash(img_path)
print(img_path)
cv2.imwrite(img_path,img)
tree.write(anno_path)
if __name__ == "__main__":
"""
User can modify PATH information
"""
XML_SAVE_PATH = "xml/"
IMAGE_SAVE_PATH = "image/"
TARGET_PATH = "det_20/det_train.json"
IMAGE_PATH = "../images/100k/train/"
CATEGORY = "car" #if want various category, change str to list
#check TARGET_PATH
if not os.path.isfile(TARGET_PATH):
raise Exception("There is no that dir:",TARGET_PATH)
#make save_path dir
createFolder(XML_SAVE_PATH)
createFolder(IMAGE_SAVE_PATH)
#read json file
result_list = []
name_category_bbox = {}
with open(TARGET_PATH,"r",encoding="utf8") as file:
doc = json.load(file)
for i in range(len(doc)):
keyList = doc[i].keys
name = doc[i]['name']
count = 0
category_list = []
bbox_list = []
if not doc[i].get('labels'):
continue
for k in range(len(doc[i]['labels'])):
category = doc[i]['labels'][k]['category']
if isinstance(CATEGORY,str) and category != CATEGORY:
continue
elif isinstance(CATEGORY,list) and category not in CATEGORY:
continue
count = 1
bbox = doc[i]['labels'][k]['box2d']
category_list.append(category)
bbox_list.append(bbox)
if not count == 0:
#append
name_category_bbox = {}
name_category_bbox["name"] = name
name_category_bbox["category"] = category_list
name_category_bbox["bbox"] = bbox_list
result_list.append(name_category_bbox)
#generate xml file
for result in result_list:
write_to_xml(result,IMAGE_PATH,XML_SAVE_PATH,IMAGE_SAVE_PATH)
print("Done covert to xml!")
반응형
'python' 카테고리의 다른 글
[python] class 오버라이딩(overriding), 상속, super() (0) | 2021.11.09 |
---|---|
DataLoader worker is killed by signal: Killed 에러 (0) | 2021.11.04 |
[Python] 여러 GPU중 특정 GPU만 사용하고 싶을 때 (0) | 2021.11.03 |
[python]확장자 일괄 변경 (0) | 2021.11.03 |
[python] list함수 (append, insert, pop, remove, sort) (0) | 2021.06.29 |