PASCAL to COCO Converter With Python Script
import xml.etree.ElementTree as ET
import json
import os
def pascal_voc_to_coco(xml_folder_path):
annotations = []
classes = {}
data = {
"annotations": annotations,
"categories": [],
"images": []
}
class_id = 0
for xml_file in os.listdir(xml_folder_path):
if not xml_file.endswith(".xml"):
continue
xml_path = os.path.join(xml_folder_path, xml_file)
tree = ET.parse(xml_path)
root = tree.getroot()
filename = root.find("filename").text
image_id = int(root.find("./size/depth").text)
image_info = {
"id": image_id,
"file_name": filename
}
data["images"].append(image_info)
for obj in root.findall("object"):
name = obj.find("name").text
xmin = int(obj.find("bndbox/xmin").text)
ymin = int(obj.find("bndbox/ymin").text)
xmax = int(obj.find("bndbox/xmax").text)
ymax = int(obj.find("bndbox/ymax").text)
if name not in classes:
classes[name] = class_id
class_info = {
"id": class_id,
"name": name
}
data["categories"].append(class_info)
class_id += 1
class_id = classes[name]
annotation = {
"image_id": image_id,
"bbox": [xmin, ymin, xmax - xmin, ymax - ymin],
"category_id": class_id,
"id": len(annotations) + 1,
"iscrowd": 0
}
annotations.append(annotation)
json_filename = os.path.splitext(xml_file)[0] + ".json"
json_path = os.path.join(xml_folder_path, json_filename)
with open(json_path, "w") as file:
json.dump(data, file, indent=4)
annotations = []
classes = {}
data = {
"annotations": annotations,
"categories": [],
"images": []
}
class_id = 0
print("Conversion complete.")
xml_folder_path = "C:\\Users\\USER-PC\\Desktop\\kaggle-sainfoin-dataset\\test\\"
#xml_folder_path = "C:\\Users\\USER-PC\\Desktop\\kaggle-sainfoin-dataset\\train\\"
pascal_voc_to_coco(xml_folder_path)