iconicon
BBBlllooogggPPPooorrrtttfffooollliiiooo
    Uncategorized

    mini project:YOLOv7 animal detect

    2026년 04월 12일
    On this page


      mini project:YOLOv7 animal detect

      YOLOv7을 이용한 동물 감지를 해보도록 하겠습니다.
      환경은 맥북 m1 pro에서 virtual machine을 사용하였습니다.

      사용한 데이터는 animals.v2-release.voc입니다

      data는 split이 되어있는 상태로 image와 label이 함께 있었습니다.

      먼저 VOC의 xml 파일을 txt로 바꾸어 주었습니다.(label.casche는 학습시에 생성되는 캐쉬파일입니다)

      <annotation>
      <folder></folder>
      <filename>1_jpg.rf.2e6cce3e7cfc0e62b404ba5af96a9c38.jpg</filename>
      <path>1_jpg.rf.2e6cce3e7cfc0e62b404ba5af96a9c38.jpg</path>
      <source>
      <database>roboflow.ai</database>
      </source>
      <size>
      <width>155</width>
      <height>178</height>
      <depth>3</depth>
      </size>
      <segmented>0</segmented>
      <object>
      <name>fox</name>
      <pose>Unspecified</pose>
      <truncated>0</truncated>
      <difficult>0</difficult>
      <occluded>0</occluded>
      <bndbox>
      <xmin>56</xmin>
      <xmax>156</xmax>
      <ymin>18</ymin>
      <ymax>175</ymax>
      </bndbox>
      </object>
      </annotation>
      
      import os
      import glob
      import cv2
      import shutil
      from xml.etree.ElementTree import parse
      
      # data_dir = "./animals.v2-release.voc/valid/"
      # image_data = glob.glob(os.path.join(data_dir, "*.jpg"))
      
      label_dict = {"background": 0,
                    "cat": 1,
                    "chicken": 2,
                    "cow": 3,
                    "dog": 4,
                    "fox": 5,
                    "goat": 6,
                    "horse": 7,
                    "person": 8,
                    "racoon": 9,
                    "skunk": 10,
                    }
      # cat, chicken, cow, dog, fox, goat, horse, person, racoon, skunk
      
      
      class Voc_to_yolo_convter():
      def __init__(self, xml_paths):
      self.xml_path_list = glob.glob(os.path.join(xml_paths, "*.xml"))
      
      def get_voc_to_yolo(self, mode):
      for xml_path in self.xml_path_list:
      tree = parse(xml_path)
      root = tree.getroot()
      
      # get file name
      file_name = root.find('filename').text
      
      # get image size
      size_meta = root.findall('size')
      img_width = int(size_meta[0].find('width').text)
      img_height = int(size_meta[0].find('height').text)
      
      # object meta
      object_metas = root.findall('object')
      
      # box info get
      for object_meta in object_metas:
      # label_name
      object_label = object_meta.find('name').text
      
      # bbox
      xmin = int(object_meta.find('bndbox').findtext('xmin'))
      xmax = int(object_meta.find('bndbox').findtext('xmax'))
      ymin = int(object_meta.find('bndbox').findtext('ymin'))
      ymax = int(object_meta.find('bndbox').findtext('ymax'))
      
      # print(object_label, xmin, ymin, xmax, ymax)
      # voc to yolo
      yolo_x = round(((int(xmin) + int(xmax))/2)/img_width, 6)
      yolo_y = round(((int(ymin) + int(ymax))/2)/img_height, 6)
      yolo_w = round((int(xmax) - int(xmin))/img_width, 6)
      yolo_h = round((int(ymax) - int(ymin))/img_height, 6)
      
      image_name_temp = file_name.replace(".jpg", ".txt")
      
      # txt file save folder
      os.makedirs(f"./animals.v2-release.voc/{mode}/labels", exist_ok=True)
      
      # label
      label = label_dict[object_label]
      
      # txt save
      with open(f"./animals.v2-release.voc/{mode}/labels/{image_name_temp}", "a") as f:
      f.write(f"{label} {yolo_x} {yolo_y} {yolo_w} {yolo_h} \\n")
      
      
      # move image
      def move_image(data, mode):
      for path in data:
      image_folder_path = f"./animals.v2-release.voc/{mode}/images"
      os.makedirs(image_folder_path, exist_ok=True)
      
      file_name = path.split("\\\\")[1]
      
      image_path = os.path.join(image_folder_path, file_name)
      shutil.move(path, image_path)
      
      
      if __name__ == "__main__":
      data_dir = "./animals.v2-release.voc/valid/"
      image_data = glob.glob(os.path.join(data_dir, "*.jpg"))
      # test = Voc_to_yolo_convter(data_dir)
      # test.get_voc_to_yolo(mode="valid")
      move_image(image_data, mode="valid")
      

      그리고 data.yaml을 작성해 주었습니다.

      위치는 yolov7-main/data/data.yaml입니다.

      train: ./animals_dataset/train/images
      val: ./animals_dataset/valid/images
      test: ./animals_dataset/test.images
      
      # number of class
      nc : 11
      
        # classes
      names : ["background",
               "cat",
               "chicken",
               "cow",
               "dog",
               "fox",
               "goat",
               "horse",
               "person",
               "racoon",
               "skunk"
               ]
      

      하이퍼파라미터 입니다.

      lr0: 0.001  # initial learning rate (SGD=1E-2, Adam=1E-3)
      lrf: 0.1  # final OneCycleLR learning rate (lr0 * lrf)
      momentum: 0.937  # SGD momentum/Adam beta1
      weight_decay: 0.0005  # optimizer weight decay 5e-4
      warmup_epochs: 3.0  # warmup epochs (fractions ok)
      warmup_momentum: 0.8  # warmup initial momentum
      warmup_bias_lr: 0.1  # warmup initial bias lr
      box: 0.05  # box loss gain
      cls: 0.3  # cls loss gain
      cls_pw: 1.0  # cls BCELoss positive_weight
      obj: 0.7  # obj loss gain (scale with pixels)
      obj_pw: 1.0  # obj BCELoss positive_weight
      iou_t: 0.20  # IoU training threshold
      anchor_t: 4.0  # anchor-multiple threshold
      # anchors: 3  # anchors per output layer (0 to ignore)
      fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
      hsv_h: 0.015  # image HSV-Hue augmentation (fraction)
      hsv_s: 0.7  # image HSV-Saturation augmentation (fraction)
      hsv_v: 0.4  # image HSV-Value augmentation (fraction)
      degrees: 0.0  # image rotation (+/- deg)
      translate: 0.2  # image translation (+/- fraction)
      scale: 0.9  # image scale (+/- gain)
      shear: 0.0  # image shear (+/- deg)
      perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
      flipud: 0.0  # image flip up-down (probability)
      fliplr: 0.5  # image flip left-right (probability)
      mosaic: 1.0  # image mosaic (probability)
      mixup: 0.15  # image mixup (probability)
      copy_paste: 0.0  # image copy paste (probability)
      paste_in: 0.15  # image copy paste (probability), use 0 for faster training
      loss_ota: 1 # use ComputeLossOTA, use 0 for faster training
      

      각 라벨별로 동물들을 잘 잡아 내는것을 확인 할 수 있었습니다.