yolo process video directly

2026-01-31 16:24:26 +08:00
parent 8c626fa1c1
commit 7511bb30a0
1 changed files with 26 additions and 58 deletions
--- a/video_cleaner.py
+++ b/video_cleaner.py
@@ -58,79 +58,47 @@ class VideoCleaner:
    def should_keep_video(self, video_path, save_preview_path=None):
        """
-        Scans the video to see if it should be kept.
+        Scans the video using YOLO's native stream processing.
        Kept if: (It's not a night-only video) AND (Human is detected).
        """
-        cap = cv2.VideoCapture(str(video_path))
+        model = self._get_model()
-        fps = cap.get(cv2.CAP_PROP_FPS)
+        # Use YOLO's generator to handle video decoding efficiently
-        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        # vid_stride=int(fps*2) could be used but we need consistent logic
-        
+        # We'll use a larger stride to skip frames like before
-        if fps <= 0 or frame_count <= 0:
+        results_gen = model.predict(
-            cap.release()
+            source=str(video_path),
-            return False, "Invalid video"
+            classes=[0],
-
+            conf=0.7,
-        step = max(1, int(fps * 2)) 
+            half=True,
-        print(f"Analyzing {video_path.name}...")
+            imgsz=640,
            stream=True,  # This makes it a generator
            verbose=False,
            vid_stride=30  # Roughly 1-2 seconds depending on fps
        )
        has_shown_color = False
        prev_gray = None
-        for i in range(0, frame_count, step):
+        for i, result in enumerate(results_gen):
-            cap.set(cv2.CAP_PROP_POS_FRAMES, i)
+            frame = result.orig_img # Get the original frame for our checks
            ret, frame = cap.read()
            if not ret:
                break
-            # Check if this frame is 'Normal Mode' (Lights on)
+            # 1. Check if this frame is 'Normal Mode' (Lights on)
            is_color = self.is_frame_color_and_bright(frame)
            if is_color:
                has_shown_color = True
-            # --- MOTION DETECTION ---
+            # 2. If YOLO detected someone
-            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+            if len(result.boxes) > 0:
            gray = cv2.GaussianBlur(gray, (21, 21), 0)
            if prev_gray is not None:
                frame_delta = cv2.absdiff(prev_gray, gray)
                thresh = cv2.threshold(frame_delta, 25, 255, cv2.THRESH_BINARY)[1]
                if (np.sum(thresh) / thresh.size) < 0.005: 
                    continue 
            prev_gray = gray
            # --- AI HUMAN DETECTION ---
            model = self._get_model()
            # 直接传原图，省去 CPU resize，开启半精度和指定推理尺寸
            results = model(frame, classes=[0], verbose=False, conf=0.7, half=True, imgsz=640)
            debugmode=True
            if len(results[0].boxes) > 0:
                if is_color:
-                    # --- DEBUG: Save the frame that triggered detection ---
+                    if save_preview_path:
-                    if debugmode or save_preview_path:
+                        save_preview_path.parent.mkdir(parents=True, exist_ok=True)
-                            # Draw boxes on the frame for visualization
+                        # Save annotated frame
-                            annotated_frame = results[0].plot() 
+                        annotated_frame = result.plot()
                        cv2.imwrite(str(save_preview_path), annotated_frame)
                            if debugmode:
                                debug_dir = Path("debug_detections")
                                debug_dir.mkdir(exist_ok=True)
                                debug_filename = debug_dir / f"detected_{video_path.stem}_frame_{i}.jpg"
                                cv2.imwrite(str(debug_filename), annotated_frame)
                                print(f"DEBUG: Saved detection image to {debug_filename}")
                            if save_preview_path:
                                save_preview_path.parent.mkdir(parents=True, exist_ok=True)
                                cv2.imwrite(str(save_preview_path), annotated_frame)
                                print(f"INFO: Saved preview image to {save_preview_path}")
                    cap.release()
                    return True, "Human detected in color mode"
-        cap.release()
+        # Final decision
        # Final decision after scanning the whole video:
        if has_shown_color:
            # If lights were turned on but we found no people during the whole video
            # Based on your rule #2: "If no humans in the whole video, delete."
            return False, "Lights were on, but no humans detected"
        return False, "Entirely night/IR mode or no humans"
    def process_video_file(self, video_path, processed_base_dir, input_base_dir):