From 7511bb30a07f153c2af8c12f3a556b0177dcd718 Mon Sep 17 00:00:00 2001
From: Shuming Liu <smshine@qq.com>
Date: Sat, 31 Jan 2026 16:24:26 +0800
Subject: [PATCH] yolo process video directly

---
 video_cleaner.py | 84 +++++++++++++++---------------------------------
 1 file changed, 26 insertions(+), 58 deletions(-)

diff --git a/video_cleaner.py b/video_cleaner.py
index 00e9ebd..394cc1c 100644
--- a/video_cleaner.py
+++ b/video_cleaner.py
@@ -58,79 +58,47 @@ class VideoCleaner:
 
     def should_keep_video(self, video_path, save_preview_path=None):
         """
-        Scans the video to see if it should be kept.
-        Kept if: (It's not a night-only video) AND (Human is detected).
+        Scans the video using YOLO's native stream processing.
         """
-        cap = cv2.VideoCapture(str(video_path))
-        fps = cap.get(cv2.CAP_PROP_FPS)
-        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-        
-        if fps <= 0 or frame_count <= 0:
-            cap.release()
-            return False, "Invalid video"
+        model = self._get_model()
+        # Use YOLO's generator to handle video decoding efficiently
+        # vid_stride=int(fps*2) could be used but we need consistent logic
+        # We'll use a larger stride to skip frames like before
+        results_gen = model.predict(
+            source=str(video_path),
+            classes=[0],
+            conf=0.7,
+            half=True,
+            imgsz=640,
+            stream=True,  # This makes it a generator
+            verbose=False,
+            vid_stride=30  # Roughly 1-2 seconds depending on fps
+        )
 
-        step = max(1, int(fps * 2)) 
-        print(f"Analyzing {video_path.name}...")
-        
         has_shown_color = False
-        prev_gray = None
         
-        for i in range(0, frame_count, step):
-            cap.set(cv2.CAP_PROP_POS_FRAMES, i)
-            ret, frame = cap.read()
-            if not ret:
-                break
+        for i, result in enumerate(results_gen):
+            frame = result.orig_img # Get the original frame for our checks
             
-            # Check if this frame is 'Normal Mode' (Lights on)
+            # 1. Check if this frame is 'Normal Mode' (Lights on)
             is_color = self.is_frame_color_and_bright(frame)
             if is_color:
                 has_shown_color = True
 
-            # --- MOTION DETECTION ---
-            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-            gray = cv2.GaussianBlur(gray, (21, 21), 0)
-            if prev_gray is not None:
-                frame_delta = cv2.absdiff(prev_gray, gray)
-                thresh = cv2.threshold(frame_delta, 25, 255, cv2.THRESH_BINARY)[1]
-                if (np.sum(thresh) / thresh.size) < 0.005: 
-                    continue 
-            prev_gray = gray
-            
-            # --- AI HUMAN DETECTION ---
-            model = self._get_model()
-            # 直接传原图，省去 CPU resize，开启半精度和指定推理尺寸
-            results = model(frame, classes=[0], verbose=False, conf=0.7, half=True, imgsz=640)
-            debugmode=True
-            if len(results[0].boxes) > 0:
+            # 2. If YOLO detected someone
+            if len(result.boxes) > 0:
                 if is_color:
-                    # --- DEBUG: Save the frame that triggered detection ---
-                    if debugmode or save_preview_path:
-                            # Draw boxes on the frame for visualization
-                            annotated_frame = results[0].plot() 
-                            
-                            if debugmode:
-                                debug_dir = Path("debug_detections")
-                                debug_dir.mkdir(exist_ok=True)
-                                debug_filename = debug_dir / f"detected_{video_path.stem}_frame_{i}.jpg"
-                                cv2.imwrite(str(debug_filename), annotated_frame)
-                                print(f"DEBUG: Saved detection image to {debug_filename}")
-
-                            if save_preview_path:
-                                save_preview_path.parent.mkdir(parents=True, exist_ok=True)
-                                cv2.imwrite(str(save_preview_path), annotated_frame)
-                                print(f"INFO: Saved preview image to {save_preview_path}")
+                    if save_preview_path:
+                        save_preview_path.parent.mkdir(parents=True, exist_ok=True)
+                        # Save annotated frame
+                        annotated_frame = result.plot()
+                        cv2.imwrite(str(save_preview_path), annotated_frame)
                     
-                    cap.release()
                     return True, "Human detected in color mode"
         
-        cap.release()
-        
-        # Final decision after scanning the whole video:
+        # Final decision
         if has_shown_color:
-            # If lights were turned on but we found no people during the whole video
-            # Based on your rule #2: "If no humans in the whole video, delete."
             return False, "Lights were on, but no humans detected"
-            
         return False, "Entirely night/IR mode or no humans"
 
     def process_video_file(self, video_path, processed_base_dir, input_base_dir):