From 7511bb30a07f153c2af8c12f3a556b0177dcd718 Mon Sep 17 00:00:00 2001 From: Shuming Liu Date: Sat, 31 Jan 2026 16:24:26 +0800 Subject: [PATCH] yolo process video directly --- video_cleaner.py | 84 +++++++++++++++--------------------------------- 1 file changed, 26 insertions(+), 58 deletions(-) diff --git a/video_cleaner.py b/video_cleaner.py index 00e9ebd..394cc1c 100644 --- a/video_cleaner.py +++ b/video_cleaner.py @@ -58,79 +58,47 @@ class VideoCleaner: def should_keep_video(self, video_path, save_preview_path=None): """ - Scans the video to see if it should be kept. - Kept if: (It's not a night-only video) AND (Human is detected). + Scans the video using YOLO's native stream processing. """ - cap = cv2.VideoCapture(str(video_path)) - fps = cap.get(cv2.CAP_PROP_FPS) - frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - - if fps <= 0 or frame_count <= 0: - cap.release() - return False, "Invalid video" + model = self._get_model() + # Use YOLO's generator to handle video decoding efficiently + # vid_stride=int(fps*2) could be used but we need consistent logic + # We'll use a larger stride to skip frames like before + results_gen = model.predict( + source=str(video_path), + classes=[0], + conf=0.7, + half=True, + imgsz=640, + stream=True, # This makes it a generator + verbose=False, + vid_stride=30 # Roughly 1-2 seconds depending on fps + ) - step = max(1, int(fps * 2)) - print(f"Analyzing {video_path.name}...") - has_shown_color = False - prev_gray = None - for i in range(0, frame_count, step): - cap.set(cv2.CAP_PROP_POS_FRAMES, i) - ret, frame = cap.read() - if not ret: - break + for i, result in enumerate(results_gen): + frame = result.orig_img # Get the original frame for our checks - # Check if this frame is 'Normal Mode' (Lights on) + # 1. Check if this frame is 'Normal Mode' (Lights on) is_color = self.is_frame_color_and_bright(frame) if is_color: has_shown_color = True - # --- MOTION DETECTION --- - gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) - gray = cv2.GaussianBlur(gray, (21, 21), 0) - if prev_gray is not None: - frame_delta = cv2.absdiff(prev_gray, gray) - thresh = cv2.threshold(frame_delta, 25, 255, cv2.THRESH_BINARY)[1] - if (np.sum(thresh) / thresh.size) < 0.005: - continue - prev_gray = gray - - # --- AI HUMAN DETECTION --- - model = self._get_model() - # 直接传原图,省去 CPU resize,开启半精度和指定推理尺寸 - results = model(frame, classes=[0], verbose=False, conf=0.7, half=True, imgsz=640) - debugmode=True - if len(results[0].boxes) > 0: + # 2. If YOLO detected someone + if len(result.boxes) > 0: if is_color: - # --- DEBUG: Save the frame that triggered detection --- - if debugmode or save_preview_path: - # Draw boxes on the frame for visualization - annotated_frame = results[0].plot() - - if debugmode: - debug_dir = Path("debug_detections") - debug_dir.mkdir(exist_ok=True) - debug_filename = debug_dir / f"detected_{video_path.stem}_frame_{i}.jpg" - cv2.imwrite(str(debug_filename), annotated_frame) - print(f"DEBUG: Saved detection image to {debug_filename}") - - if save_preview_path: - save_preview_path.parent.mkdir(parents=True, exist_ok=True) - cv2.imwrite(str(save_preview_path), annotated_frame) - print(f"INFO: Saved preview image to {save_preview_path}") + if save_preview_path: + save_preview_path.parent.mkdir(parents=True, exist_ok=True) + # Save annotated frame + annotated_frame = result.plot() + cv2.imwrite(str(save_preview_path), annotated_frame) - cap.release() return True, "Human detected in color mode" - cap.release() - - # Final decision after scanning the whole video: + # Final decision if has_shown_color: - # If lights were turned on but we found no people during the whole video - # Based on your rule #2: "If no humans in the whole video, delete." return False, "Lights were on, but no humans detected" - return False, "Entirely night/IR mode or no humans" def process_video_file(self, video_path, processed_base_dir, input_base_dir):