雜亂大全26-YOLOv3程式碼解析

發表於 2020-10-02 | 分類於 python | 閱讀數次

前言:接續連兩篇的YOLO簡介

tags: `六角學院`

主題:雜亂大全26-YOLOv3程式碼解析

本篇重點:

學習YOLOv3的程式碼解析

了解YOLOv3程式碼(1)-detect_image

參考資料

yolov3 keras版本yolo.py函式解析
https://blog.csdn.net/qq_43211132/article/details/102988139

<參考運作流程>

def detect_image(self, image):（重要）
開始計時->
①調用letterbox_image函數，即：先生成一個用“絕對灰”R128-G128-B128填充的416×416新圖片，然後用按比例縮放（採樣方式：BICUBIC）後的輸入圖片粘貼，粘貼不到的部分保留為灰色。
②model_image_size定義的寬和高必須是32的倍數；若沒有定義model_image_size，將輸入的尺寸調整為32的倍數，並調用letterbox_image函數進行縮放。
③將縮放後的圖片數值除以255，做歸一化。
④將（416,416,3）數組調整為（1,416,416,3），滿足網絡輸入的張量格式：image_data。

->
①運行self.sess.run（）輸入參數：輸入圖片416×416，學習模式0測試/1訓練。 self.yolo_model.input: image_data，self.input_image_shape: [image.size[1], image.size[0]]，K.learning_phase(): 0。
②self.generate（），讀取：model路徑、anchor box、coco類別、加載模型yolo.h5.，對於80中coco目標，確定每一種目標框的繪製顏色，即：將（x/80,1.0 ,1.0）的顏色轉換為RGB格式，並隨機調整顏色一遍肉眼識別，其中：一個1.0表示飽和度，一個1.0表示亮度。

->
①yolo_eval(self.yolo_model.output),max_boxes=20,
每張圖沒類最多檢測20個框。
②將anchor_box分為3組，分別分配給三個尺度，
yolo_model輸出的feature map
③特徵圖越小，感受野越大，對大目標越敏感，
選大的anchor box->分別對三個feature map運行
out_boxes, out_scores, out_classes，
返回boxes、scores、classes。

yolo.py 內的 detect_image()

請參考前兩篇的github位置

#請參考註解

#def detect_image(self, image):
#detect_image作用：
    #要求圖片尺寸是32的倍數
    #原因:執行的是5次step為2的捲積操作
    #即圖片的尺寸是416*416，因在最底層中的特徵圖大小是13*13 (13*32=416)
def detect_image(self, image, output_path=""):
    start = timer()  #計時器
    # 調用letterbox_image()函數
    # 生成一個用 絕對灰 R128-G128-B128 填充的 (416x416)新圖片
    # 然後用按比例縮放（BICUBIC）後的輸入圖片黏貼，黏貼不到的部分保留為灰色
    if self.model_image_size != (None, None):
        assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required'
        assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required'
        # assert語法格式 model_image_size[0][1]指圖像的w和h，且必須是32的整數倍
        boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size)))
        # letterbox_image調整尺寸為(w,h)
    else:
        new_image_size = (image.width - (image.width % 32),
                          image.height - (image.height % 32))
        boxed_image = letterbox_image(image, new_image_size)
    image_data = np.array(boxed_image, dtype='float32')
    print(image_data.shape)
    #（416，416,3）
    
    image_data /= 255.
    #除以255 (正規化)
    
    # batch dimension
    # 設定一维 -> (1,416,416,3) 格式(bitch, w, h, c)
    image_data = np.expand_dims(image_data, 0) 
 
    #求boxes,scores,classes
    #請參照程式上方 generate()
    out_boxes, out_scores, out_classes = self.sess.run(
        [self.boxes, self.scores, self.classes],
        feed_dict={
            #參數設定
            #圖像數據
            self.yolo_model.input: image_data,
            #尺寸416x416
            self.input_image_shape: [image.size[1], image.size[0]],
            #模式 0：測試模型  1：訓練模型
            K.learning_phase(): 0
        })

    #使用Pillow繪圖庫 繪製邊框、邊框寬度、文字
    print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
    #設定字體
    font = ImageFont.truetype(font='font/FiraMono-Medium.otf',
                size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
    #設定框線寬度、厚度
    thickness = (image.size[0] + image.size[1]) // 300
    
    #使用Pillow繪圖庫 對C個目標類別中的每個目標框i 處理
    for i, c in reversed(list(enumerate(out_classes))):
        #目標類別的名字
        predicted_class = self.class_names[c]
        #框
        box = out_boxes[i]
        #框信度
        score = out_scores[i]
        #標籤
        label = '{} {:.2f}'.format(predicted_class, score)
        #繪製輸入的原始圖片
        draw = ImageDraw.Draw(image)
        #標籤文字 -> label的寬與高（pixels）
        label_size = draw.textsize(label, font)

        top, left, bottom, right = box
        
        #目標框的上、左  兩個座標四捨五入取小數後一位
        top = max(0, np.floor(top + 0.5).astype('int32'))
        left = max(0, np.floor(left + 0.5).astype('int32'))
        
        #目標框的下、右  兩個座標四捨五入取小數後一位
        #與圖片的尺寸相比取最小值
        bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
        right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
        print(label, (left, top), (right, bottom))
        #邊框 確定標籤起始點位置:左、下
        if top - label_size[1] >= 0:
            text_origin = np.array([left, top - label_size[1]])
        else:
            text_origin = np.array([left, top + 1])

        # 繪製目標框，線條寬度為thickness
        for i in range(thickness):
        #畫框
            draw.rectangle(
                [left + i, top + i, right - i, bottom - i],
                outline=self.colors[c])
        draw.rectangle(
        #文字.背景
            [tuple(text_origin), tuple(text_origin + label_size)],
            fill=self.colors[c])
            
        # 標籤內容
        draw.text(text_origin, label, fill=(0, 0, 0), font=font)
        del draw
        
    # 計時器結束
    end = timer()
    #顯示時長
    print(end - start)
    
    isOutput = True if output_path != "" else False
    if isOutput:
      image.save(output_path)
    return image
    
def close_session(self):
    self.sess.close()

了解YOLOv3程式碼(2)-detect_video

yolo.py 內的 detect_video( )

#請參考註解
#利用 cv2 將影片變成一幀一幀
#並使用 detect_image 辨識後顯示結果
def detect_video(yolo, video_path, output_path=""):
    import cv2
    vid = cv2.VideoCapture(video_path)
    #開啟影片
    if not vid.isOpened():
        raise IOError("Couldn't open webcam or video")
    #讀取影片資訊
    video_FourCC    = int(vid.get(cv2.CAP_PROP_FOURCC))
    video_fps       = vid.get(cv2.CAP_PROP_FPS)
    video_size      = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)),
                        int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    isOutput = True if output_path != "" else False
    if isOutput:
        print("!!! TYPE:", type(output_path), type(video_FourCC), type(video_fps), type(video_size))
        out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size)
    accum_time = 0
    curr_fps = 0
    fps = "FPS: ??"
    #計時器開始
    prev_time = timer()
    
    #辨識開始
    while True:
        return_value, frame = vid.read()
        image = Image.fromarray(frame)
        
        #使用detect_image函式變式圖片 
        #(請參照上方detect_image程式解析)
        image = yolo.detect_image(image)
        #將結果存至result
        result = np.asarray(image)
        curr_time = timer()
        
        #計算 處理時間
        exec_time = curr_time - prev_time
        prev_time = curr_time
        accum_time = accum_time + exec_time
        curr_fps = curr_fps + 1
        #計算FPS
        if accum_time > 1:
            accum_time = accum_time - 1
            fps = "FPS: " + str(curr_fps)
            curr_fps = 0
            
        #顯示辨識結果
        cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                    fontScale=0.50, color=(255, 0, 0), thickness=2)
        cv2.namedWindow("result", cv2.WINDOW_NORMAL)
        cv2.imshow("result", result)
        if isOutput:
            out.write(result)
        #結束結果畫面
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    yolo.close_session()

程式各函式運作流程圖:
https://1drv.ms/u/s!AqstO-BYCWeDxBD-DKTXrqyxvCdu

若有任何問題↓

請聯繫我:liao86221@gmail.com

或填表: https://forms.gle/4FB5Tmtd9A7XcXjF8