前言


本来是毕业论文的一部分,但是一直懒得写复现过程,不过最近又要做相关的内容了,所以重新搞了搞,简单的写一下过程。


yolo3论文:https://arxiv.org/abs/1804.02767
yolo3源码https://github.com/qqwweee/keras-yolo3


sort论文:https://arxiv.org/abs/1602.00763
sort源码:https://github.com/abewley/sort




依赖:


目标检测:
tensorflow-gpu==1.12.0
keras-gpu==2.2.4
opencv==4.2.0
pillow==6.2.2
numpy
matplotlib

    多目标跟踪:
    filterpy==1.4.5
    numba==0.49.0
    scikit-image==0.14.0
    lap==0.4.0

      一、Yolo3目标检测


      第一步是检测,只有对已检测的目标才能形成跟踪,检测部分采用yolov3。


      【目标检测】基于YOLOv3的海上船舶目标检测分类(Tensorflow/keras)


      二、Sort多目标跟踪


      原理:匈牙利算法+卡尔曼滤波器,具体原理我也解释不清楚。


      流程图
      在这里插入图片描述
      SORT是基于检测的跟踪算法,其跟踪效果主要取决与检测结果,其跟踪准确率取决于检测准确率。


      2.1 创建文件夹Sort


      在检测算法(keras-yolo3)目录下创建sort目录。
      在这里插入图片描述
      input中存放待识别的视频和图片。
      output中存放识别后的结果。


      2.2 sort/sort.py


      下载Sort源码,将sort源码内的sort.py文件复制到刚刚创建的sort目录下。并进行修改:


      注释掉第26行:from skimage import io


      2.3 sort/util.py


      同时,在该目录下创建util.py文件:


      import numpy as np
      import sort.sort


      def delete_repeat_bbox(out_boxes, out_scores, out_classes, iou_threshold):
      to_del = []
      for i in range(0, len(out_classes) - 1):
      for j in range(i + 1, len(out_classes)):
      if (i not in to_del) and (j not in to_del):
      # bounding box 1
      y1_1, x1_1, y2_1, x2_1 = out_boxes[i]
      # bounding box 2
      y1_2, x1_2, y2_2, x2_2 = out_boxes[j]
      if sort.sort.iou([x1_1, y1_1, x2_1, y2_1], [x1_2, y1_2, x2_2, y2_2]) >= iou_threshold:
      if out_scores[i] >= out_scores[j]:
      to_del.append(j)
      else:
      to_del.append(i)

      to_del = sorted(to_del)

      for t in reversed(to_del):
      out_boxes.pop(t)
      out_scores.pop(t)
      out_classes.pop(t)

      return np.array(out_boxes), np.array(out_scores), np.array(out_classes)


      def sort_image(sort_class, out_boxes, out_scores, out_classes):
      dets = []

      for i in range(0, len(out_boxes)):
      dets.append([out_boxes[i][1], out_boxes[i][0], out_boxes[i][3], out_boxes[i][2], out_scores[i], out_classes[i]])

      dets = np.array(dets)
      trackers = sort_class.update(dets)

      out_boxes = []
      out_scores = []
      out_classes = []
      object_id = []
      # d [x1,y1,x2,y2,object_id,score,type]

      for d in trackers:
      out_boxes.append(list([d[1], d[0], d[3], d[2]]))
      object_id.append(int(d[4]))
      out_scores.append(float(d[5]))
      out_classes.append(int(d[6]))

      return np.array(out_boxes), np.array(out_scores), np.array(out_classes), np.array(object_id)

        2.4 主函数:yolo_sort.py


        • 将sort与yolo中类相结合,形成新的类
        • 注意修改86-99行的配置信息
        • main函数内配置跟踪视频或图片的路径

        import cv2
        import numpy as np
        import sort.utils

        from sort.sort import Sort, associate_detections_to_trackers, KalmanBoxTracker
        from yolo import YOLO
        from PIL import Image, ImageFont, ImageDraw
        from keras import backend as K
        from timeit import default_timer as timer
        from yolo3.utils import letterbox_image


        # 继承sort文件中的Sort类
        class Sort_child(Sort):
        def __init__(self, max_age=2, min_hits=3):
        “””
        Sets key parameters for SORT
        “””

        self.max_age = max_age
        self.min_hits = min_hits
        self.trackers = []
        self.scores = []
        self.types = []
        self.frame_count = 0

        def update(self, dets):
        “””
        Params:
        dets - a numpy array of detections in the format [[x1,y1,x2,y2,score,type],[x1,y1,x2,y2,score,type],…]
        Requires: this method must be called once for each frame even with empty detections.
        Returns the a numpy array in the format [x1,y1,x2,y2,object_id,score,type]
        NOTE: The number of objects returned may differ from the number of detections provided.
        “””

        self.frame_count += 1
        # get predicted locations from existing trackers.
        trks = np.zeros((len(self.trackers), 6))
        to_del = []
        ret = []
        for t, trk in enumerate(trks):
        pos = self.trackers[t].predict()[0]
        trk[:] = [pos[0], pos[1], pos[2], pos[3], self.scores[t], self.types[t]]
        if np.any(np.isnan(pos)):
        to_del.append(t)
        trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
        for t in reversed(to_del):
        self.trackers.pop(t)
        self.scores.pop(t)
        self.types.pop(t)
        matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers(dets, trks)

        # update matched trackers with assigned detections
        for t, trk in enumerate(self.trackers):
        if t not in unmatched_trks:
        d = matched[np.where(matched[:, 1] == t)[0], 0]
        trk.update(dets[d, :][0])
        self.scores[t] = dets[d, :][0][4]
        self.types[t] = dets[d, :][0][5]

        # create and initialise new trackers for unmatched detections
        for i in unmatched_dets:
        trk = KalmanBoxTracker(dets[i, 0:5])
        self.trackers.append(trk)
        self.scores.append(dets[i, :][4])
        self.types.append(dets[i, :][5])
        i = len(self.trackers)
        for trk in reversed(self.trackers):
        pos = trk.get_state()[0]
        i -= 1
        if (trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits):
        ret.append(np.concatenate((pos, [trk.id + 1], [self.scores[i]], [self.types[i]])).reshape(1,
        -1)) # +1 as MOT benchmark requires positive
        # remove dead tracklet
        if trk.time_since_update > self.max_age:
        self.trackers.pop(i)
        self.scores.pop(i)
        self.types.pop(i)

        if len(ret) > 0:
        return np.concatenate(ret)
        else:
        return np.empty((0, 5))


        # 继承yolo中的YOLO类
        class yolo_child(YOLO):
        _defaults = {
        “model_path”: ‘model/001/trained_weights_final.h5’, # 模型
        “anchors_path”: ‘model_data/yolo_anchors.txt’,
        “classes_path”: ‘model_data/my_classes.txt’,
        “score”: 0.3,
        “iou”: 0.45,
        “model_image_size”: (416, 416),
        “gpu_num”: 1,
        # 新配置
        “tracker”: True,
        “write_to_file”: True,
        “output_path”: ‘sort/output/‘,
        “repeat_iou”: 0.95,
        }

        def __init__(self, **kwargs):
        super(yolo_child, self).__init__()
        self.__dict__.update(self._defaults)
        self.frame = 1
        self.mot_tracker = Sort_child()

        def detect_image(self, image, fo=None):
        start = timer()

        if self.model_image_size != (None, None):
        assert self.model_image_size[0]%32 == 0, ‘Multiples of 32 required’
        assert self.model_image_size[1]%32 == 0, ‘Multiples of 32 required’
        boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size)))
        else:
        new_image_size = (image.width - (image.width % 32),
        image.height - (image.height % 32))
        boxed_image = letterbox_image(image, new_image_size)
        image_data = np.array(boxed_image, dtype=‘float32’)

        print(image_data.shape)
        image_data /= 255.
        image_data = np.expand_dims(image_data, 0) # Add batch dimension.

        out_boxes, out_scores, out_classes = self.sess.run(
        [self.boxes, self.scores, self.classes],
        feed_dict={
        self.yolo_model.input: image_data,
        self.input_image_shape: [image.size[1], image.size[0]],
        K.learning_phase(): 0
        })

        # delete repeat bbox
        out_boxes, out_scores, out_classes = \
        sort.utils.delete_repeat_bbox(list(out_boxes), list(out_scores), list(out_classes), self.repeat_iou)

        # open or close tracker
        if self.tracker and (self.mot_tracker is not None):
        out_boxes, out_scores, out_classes, object_id = \
        sort.utils.sort_image(self.mot_tracker, out_boxes, out_scores, out_classes)
        else:
        object_id = np.concatenate(np.zeros((1, len(out_boxes))))

        # write to file
        if self.write_to_file:
        for i in reversed(range(0, len(out_boxes))):
        result = [self.frame, object_id[i], out_boxes[i][0], out_boxes[i][1],
        abs(out_boxes[i][2] - out_boxes[i][0]), abs(out_boxes[i][3] - out_boxes[i][1]), out_scores[i],
        -1, -1, -1]
        fo.write(‘, ‘.join(map(str, result)))
        fo.write(‘\n’)

        print(‘Found {} boxes for {}’.format(len(out_boxes), ‘img’))
        # ‘font/times.ttf’
        font = ImageFont.truetype(font=‘font/FiraMono-Medium.otf’,
        size=np.floor(3e-2 * image.size[1] + 0.5).astype(‘int32’))
        thickness = (image.size[0] + image.size[1]) // 300

        for i, c in reversed(list(enumerate(out_classes))):
        predicted_class = self.class_names[c]
        box = out_boxes[i]
        score = out_scores[i]
        id = int(object_id[i])

        # bounding box
        top, left, bottom, right = box
        top = max(0, np.floor(top + 0.5).astype(‘int32’))
        left = max(0, np.floor(left + 0.5).astype(‘int32’))
        bottom = min(image.size[1], np.floor(bottom + 0.5).astype(‘int32’))
        right = min(image.size[0], np.floor(right + 0.5).astype(‘int32’))

        label = ‘{} {:.2f} id:{}’.format(predicted_class, score, id)
        draw = ImageDraw.Draw(image)
        label_size = draw.textsize(label, font)

        print(label, (left, top), (right, bottom))

        if top - label_size[1] >= 0:
        text_origin = np.array([left, top - label_size[1]])
        else:
        text_origin = np.array([left, top + 1])

        # My kingdom for a good redistributable image drawing library.
        for i in range(thickness):
        draw.rectangle(
        [left + i, top + i, right - i, bottom - i],
        outline=self.colors[c])
        draw.rectangle(
        [tuple(text_origin), tuple(text_origin + label_size)],
        fill=self.colors[c])
        draw.text(text_origin, label, fill=(0, 0, 0), font=font)
        del draw

        end = timer()
        print(‘process time:’, end - start, ‘s’)
        self.frame = self.frame + 1
        return image


        # dectect_video 在detect image的基础上
        def detect_video(yolo, video_path, output_path=“”):
        vid = cv2.VideoCapture(video_path)
        if not vid.isOpened():
        raise IOError(“Couldn’t open webcam or video”)
        video_FourCC = int(vid.get(cv2.CAP_PROP_FOURCC))
        video_fps = vid.get(cv2.CAP_PROP_FPS)
        video_size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)),
        int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)))
        isOutput = True if output_path != “” else False
        if isOutput:
        print(“!!! TYPE:”, type(output_path), type(video_FourCC), type(video_fps), type(video_size))
        out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size)
        accum_time = 0
        curr_fps = 0
        fps = “FPS: ??”
        prev_time = timer()


        if yolo.write_to_file:
        emptyFile = open(yolo.output_path + ‘result.dat’, ‘w’)
        else:
        emptyFile = None

        while True:
        return_value, frame = vid.read()
        try:
        image = Image.fromarray(frame)
        except AttributeError:
        break
        image = yolo.detect_image(image, emptyFile)
        result = np.asarray(image)
        curr_time = timer()
        exec_time = curr_time - prev_time
        prev_time = curr_time
        curr_fps = 1./exec_time
        fps = “FPS: “ + str(curr_fps)

        cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
        fontScale=0.50, color=(255, 0, 0), thickness=2)
        cv2.namedWindow(“result”, cv2.WINDOW_NORMAL)
        cv2.imshow(“result”, result)
        if isOutput:
        out.write(result)
        if cv2.waitKey(1) & 0xFF == ord(‘q’):
        break
        if yolo.write_to_file:
        emptyFile.close()
        yolo.close_session()




        if __name__ == ‘__main__‘:
        yolo_child = yolo_child()

        # detect and track base on image
        if False:
        image_name = ‘000887.jpg’ # 图片目录:sort/input/
        image_path = ‘sort/input/‘
        image = Image.open(image_path+image_name)

        output = open(‘sort/output/result.dat’, ‘w’)
        r_image = yolo_child.detect_image(image, output)
        r_image.save(yolo_child.output_path + image_name)

        # detect and track base on video
        if True:
        video_path = ‘sort/input/video10.mp4’
        output = ‘sort/output/video10.mp4’
        detect_video(yolo_child, video_path, output)

          三、展示


          视频:https://www.bilibili.com/video/BV1Yi4y14716