detect.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385
  1. import copy
  2. import json
  3. import dataclasses
  4. from dataclasses import dataclass, field
  5. from typing import Optional, List
  6. import xml.etree.ElementTree as ET
  7. import khandy
  8. import lxml
  9. import lxml.builder
  10. import numpy as np
  11. __all__ = ['PascalVocSource', 'PascalVocSize', 'PascalVocBndbox',
  12. 'PascalVocObject', 'PascalVocInfo', 'PascalVocHandler',
  13. 'LabelmeShape', 'LabelmeInfo', 'LabelmeHandler',
  14. 'YoloObject', 'YoloInfo', 'YoloHandler',
  15. 'convert_pascal_voc_to_labelme', 'convert_labelme_to_pascal_voc',
  16. 'convert_labelme_to_yolo', 'convert_yolo_to_labelme',
  17. 'convert_pascal_voc_to_yolo', 'convert_yolo_to_pascal_voc']
  18. @dataclass
  19. class PascalVocSource:
  20. database: str = ''
  21. annotation: str = ''
  22. image: str = ''
  23. @dataclass
  24. class PascalVocSize:
  25. height: int
  26. width: int
  27. depth: int
  28. @dataclass
  29. class PascalVocBndbox:
  30. xmin: float
  31. ymin: float
  32. xmax: float
  33. ymax: float
  34. @dataclass
  35. class PascalVocObject:
  36. name: str
  37. pose: str = 'Unspecified'
  38. truncated: int = 0
  39. difficult: int = 0
  40. bndbox: Optional[PascalVocBndbox] = None
  41. @dataclass
  42. class PascalVocInfo:
  43. folder: str = ''
  44. filename: str = ''
  45. path: str = ''
  46. source: PascalVocSource = PascalVocSource()
  47. size: Optional[PascalVocSize] = None
  48. segmented: int = 0
  49. object: List[PascalVocObject] = field(default_factory=list)
  50. class PascalVocHandler:
  51. @staticmethod
  52. def load(filename) -> PascalVocInfo:
  53. pascal_voc_info = PascalVocInfo()
  54. xml_tree = ET.parse(filename)
  55. pascal_voc_info.folder = xml_tree.find('folder').text
  56. pascal_voc_info.filename = xml_tree.find('filename').text
  57. pascal_voc_info.path = xml_tree.find('path').text
  58. pascal_voc_info.segmented = xml_tree.find('segmented').text
  59. source_tag = xml_tree.find('source')
  60. pascal_voc_info.source = PascalVocSource(
  61. database=source_tag.find('database').text,
  62. # annotation=source_tag.find('annotation').text,
  63. # image=source_tag.find('image').text
  64. )
  65. size_tag = xml_tree.find('size')
  66. pascal_voc_info.size = PascalVocSize(
  67. width=int(size_tag.find('width').text),
  68. height=int(size_tag.find('height').text),
  69. depth=int(size_tag.find('depth').text)
  70. )
  71. object_tags = xml_tree.findall('object')
  72. for index, obj in enumerate(object_tags):
  73. bndbox_tag = obj.find('bndbox')
  74. bndbox = PascalVocBndbox(
  75. xmin=float(bndbox_tag.find('xmin').text) - 1,
  76. ymin=float(bndbox_tag.find('ymin').text) - 1,
  77. xmax=float(bndbox_tag.find('xmax').text) - 1,
  78. ymax=float(bndbox_tag.find('ymax').text) - 1
  79. )
  80. one_object = PascalVocObject(
  81. name=obj.find('name').text,
  82. pose=obj.find('pose').text,
  83. truncated=obj.find('truncated').text,
  84. difficult=obj.find('difficult').text,
  85. bndbox=bndbox
  86. )
  87. pascal_voc_info.object.append(one_object)
  88. return pascal_voc_info
  89. @staticmethod
  90. def save(filename, pascal_voc_info: PascalVocInfo):
  91. maker = lxml.builder.ElementMaker()
  92. xml = maker.annotation(
  93. maker.folder(pascal_voc_info.folder),
  94. maker.filename(pascal_voc_info.filename),
  95. maker.path(pascal_voc_info.path),
  96. maker.source(
  97. maker.database(pascal_voc_info.source.database),
  98. ),
  99. maker.size(
  100. maker.width(str(pascal_voc_info.size.width)),
  101. maker.height(str(pascal_voc_info.size.height)),
  102. maker.depth(str(pascal_voc_info.size.depth)),
  103. ),
  104. maker.segmented(str(pascal_voc_info.segmented)),
  105. )
  106. for one_object in pascal_voc_info.object:
  107. object_tag = maker.object(
  108. maker.name(one_object.name),
  109. maker.pose(one_object.pose),
  110. maker.truncated(str(one_object.truncated)),
  111. maker.difficult(str(one_object.difficult)),
  112. maker.bndbox(
  113. maker.xmin(str(float(one_object.bndbox.xmin))),
  114. maker.ymin(str(float(one_object.bndbox.ymin))),
  115. maker.xmax(str(float(one_object.bndbox.xmax))),
  116. maker.ymax(str(float(one_object.bndbox.ymax))),
  117. ),
  118. )
  119. xml.append(object_tag)
  120. with open(filename, 'wb') as f:
  121. f.write(lxml.etree.tostring(xml, pretty_print=True, encoding='utf-8'))
  122. class NumpyEncoder(json.JSONEncoder):
  123. """ Special json encoder for numpy types """
  124. def default(self, obj):
  125. if isinstance(obj, (np.bool_,)):
  126. return bool(obj)
  127. elif isinstance(obj, (np.int_, np.intc, np.intp, np.int8,
  128. np.int16, np.int32, np.int64, np.uint8,
  129. np.uint16, np.uint32, np.uint64)):
  130. return int(obj)
  131. elif isinstance(obj, (np.float_, np.float16, np.float32,
  132. np.float64)):
  133. return float(obj)
  134. elif isinstance(obj, (np.ndarray,)):
  135. return obj.tolist()
  136. return json.JSONEncoder.default(self, obj)
  137. @dataclass
  138. class LabelmeShape:
  139. label: str
  140. points: np.ndarray
  141. shape_type: str
  142. flags: dict = field(default_factory=dict)
  143. group_id: Optional[int] = None
  144. def __post_init__(self):
  145. self.points = np.asarray(self.points)
  146. @dataclass
  147. class LabelmeInfo:
  148. version: str = '4.5.6'
  149. flags: dict = field(default_factory=dict)
  150. shapes: List[LabelmeShape] = field(default_factory=list)
  151. imagePath: Optional[str] = None
  152. imageData: Optional[str] = None
  153. imageHeight: Optional[int] = None
  154. imageWidth: Optional[int] = None
  155. def __post_init__(self):
  156. for k, shape in enumerate(self.shapes):
  157. self.shapes[k] = LabelmeShape(**shape)
  158. class LabelmeHandler:
  159. @staticmethod
  160. def load(filename) -> LabelmeInfo:
  161. json_content = khandy.load_json(filename)
  162. return LabelmeInfo(**json_content)
  163. @staticmethod
  164. def save(filename, labelme_info: LabelmeInfo):
  165. json_content = dataclasses.asdict(labelme_info)
  166. khandy.save_json(filename, json_content, cls=NumpyEncoder)
  167. @dataclass
  168. class YoloObject:
  169. label: str
  170. x_center: float
  171. y_center: float
  172. width: float
  173. height: float
  174. @dataclass
  175. class YoloInfo:
  176. image_filename: Optional[str] = None
  177. width: Optional[int] = None
  178. height: Optional[int] = None
  179. objects: List[YoloObject] = field(default_factory=list)
  180. class YoloHandler:
  181. @staticmethod
  182. def load(filename, **kwargs) -> YoloInfo:
  183. records = khandy.load_list(filename)
  184. yolo_info = YoloInfo(
  185. image_filename=kwargs.get('image_filename'),
  186. width=kwargs.get('width'),
  187. height=kwargs.get('height'))
  188. for record in records:
  189. record_parts = record.split()
  190. yolo_info.objects.append(YoloObject(
  191. label=record_parts[0],
  192. x_center=float(record_parts[1]),
  193. y_center=float(record_parts[2]),
  194. width=float(record_parts[3]),
  195. height=float(record_parts[4]),
  196. ))
  197. return yolo_info
  198. @staticmethod
  199. def save(filemame, yolo_info: YoloInfo):
  200. records = []
  201. for object in yolo_info.objects:
  202. records.append(f'{object.label} {object.x_center} {object.y_center} {object.width} {object.height}')
  203. khandy.save_list(filemame, records)
  204. @staticmethod
  205. def replace_label(yolo_info: YoloInfo, label_map):
  206. dst_yolo_info = copy.deepcopy(yolo_info)
  207. for object in dst_yolo_info.objects:
  208. object.label= label_map[object.label]
  209. return dst_yolo_info
  210. def convert_pascal_voc_to_labelme(pascal_voc_info: PascalVocInfo) -> LabelmeInfo:
  211. labelme_info = LabelmeInfo(
  212. imagePath=pascal_voc_info.filename,
  213. imageWidth=pascal_voc_info.size.width,
  214. imageHeight=pascal_voc_info.size.height
  215. )
  216. for object in pascal_voc_info.object:
  217. labelme_shape = LabelmeShape(
  218. label=object.name,
  219. shape_type='rectangle',
  220. points=[[object.bndbox.xmin, object.bndbox.ymin],
  221. [object.bndbox.xmax, object.bndbox.ymax]]
  222. )
  223. labelme_info.shapes.append(labelme_shape)
  224. return labelme_info
  225. def convert_labelme_to_pascal_voc(labelme_info: LabelmeInfo) -> PascalVocInfo:
  226. pascal_voc_info = PascalVocInfo(
  227. filename=labelme_info.imagePath,
  228. size=PascalVocSize(
  229. width=labelme_info.imageWidth,
  230. height=labelme_info.imageHeight,
  231. depth=3
  232. )
  233. )
  234. for shape in labelme_info.shapes:
  235. if shape.shape_type != 'rectangle':
  236. continue
  237. pascal_voc_object = PascalVocObject(
  238. name=shape.label,
  239. bndbox=PascalVocBndbox(
  240. xmin=shape.points[0][0],
  241. ymin=shape.points[0][1],
  242. xmax=shape.points[1][0],
  243. ymax=shape.points[1][1],
  244. )
  245. )
  246. pascal_voc_info.object.append(pascal_voc_object)
  247. return pascal_voc_info
  248. def convert_labelme_to_yolo(labelme_info: LabelmeInfo) -> YoloInfo:
  249. yolo_info = YoloInfo(
  250. image_filename=labelme_info.imagePath,
  251. width=labelme_info.imageWidth,
  252. height=labelme_info.imageHeight
  253. )
  254. for shape in labelme_info.shapes:
  255. if shape.shape_type != 'rectangle':
  256. continue
  257. x_center = (shape.points[0][0] + shape.points[1][0]) / (2 * labelme_info.imageWidth)
  258. y_center = (shape.points[0][1] + shape.points[1][1]) / (2 * labelme_info.imageHeight)
  259. width = abs(shape.points[0][0] - shape.points[1][0]) / labelme_info.imageWidth
  260. height = abs(shape.points[0][1] - shape.points[1][1]) / labelme_info.imageHeight
  261. yolo_object = YoloObject(
  262. label=shape.label,
  263. x_center=x_center,
  264. y_center=y_center,
  265. width=width,
  266. height=height,
  267. )
  268. yolo_info.objects.append(yolo_object)
  269. return yolo_info
  270. def convert_yolo_to_labelme(yolo_info: YoloInfo) -> LabelmeInfo:
  271. assert (yolo_info.width is not None) and (yolo_info.height is not None)
  272. labelme_info = LabelmeInfo(
  273. imagePath=yolo_info.image_filename,
  274. imageHeight=yolo_info.height,
  275. imageWidth=yolo_info.width,
  276. )
  277. for object in yolo_info.objects:
  278. x_min = (object.x_center - 0.5 * object.width) * yolo_info.width
  279. y_min = (object.y_center - 0.5 * object.height) * yolo_info.height
  280. x_max = (object.x_center + 0.5 * object.width) * yolo_info.width
  281. y_max = (object.y_center + 0.5 * object.height) * yolo_info.height
  282. labelme_shape = LabelmeShape(
  283. label=object.label,
  284. shape_type='rectangle',
  285. points=[[x_min, y_min], [x_max, y_max]]
  286. )
  287. labelme_info.shapes.append(labelme_shape)
  288. return labelme_info
  289. def convert_pascal_voc_to_yolo(pascal_voc_info: PascalVocInfo) -> YoloInfo:
  290. yolo_info = YoloInfo(
  291. image_filename=pascal_voc_info.filename,
  292. width=pascal_voc_info.size.width,
  293. height=pascal_voc_info.size.height
  294. )
  295. for object in pascal_voc_info.object:
  296. x_center = (object.bndbox.xmax + object.bndbox.xmin) / (2 * pascal_voc_info.size.width)
  297. y_center = (object.bndbox.ymax + object.bndbox.ymin) / (2 * pascal_voc_info.size.height)
  298. width = abs(object.bndbox.xmax - object.bndbox.xmin) / pascal_voc_info.size.width
  299. height = abs(object.bndbox.ymax - object.bndbox.ymin) / pascal_voc_info.size.height
  300. yolo_object = YoloObject(
  301. label=object.name,
  302. x_center=x_center,
  303. y_center=y_center,
  304. width=width,
  305. height=height,
  306. )
  307. yolo_info.objects.append(yolo_object)
  308. return yolo_info
  309. def convert_yolo_to_pascal_voc(yolo_info: YoloInfo) -> PascalVocInfo:
  310. pascal_voc_info = PascalVocInfo(
  311. filename=yolo_info.image_filename,
  312. size=PascalVocSize(
  313. width=yolo_info.width,
  314. height=yolo_info.height,
  315. depth=3
  316. )
  317. )
  318. for object in yolo_info.objects:
  319. x_min = (object.x_center - 0.5 * object.width) * yolo_info.width
  320. y_min = (object.y_center - 0.5 * object.height) * yolo_info.height
  321. x_max = (object.x_center + 0.5 * object.width) * yolo_info.width
  322. y_max = (object.y_center + 0.5 * object.height) * yolo_info.height
  323. voc_object = PascalVocObject(
  324. name=object.label,
  325. bndbox=PascalVocBndbox(xmin=x_min,ymin=y_min,xmax=x_max,ymax=y_max)
  326. )
  327. pascal_voc_info.object.append(voc_object)
  328. return pascal_voc_info