简单检测
import time
from ultralytics import YOLO
model = YOLO('yolo11n.pt')
startTime = time.time()
model('1.png', save=True)
print(time.time()-startTime)
实时屏幕检测
import time
import cv2
import numpy as np
import win32gui
import win32con
from mss import mss
from ultralytics import YOLO
from PIL import Image
import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")
model = YOLO('yolo11n.pt').to(device)
with mss() as sct:
monitor = sct.monitors[1]
screen_width, screen_height = monitor["width"], monitor["height"]
crop_width, crop_height = 320, 320
crop_x = (screen_width - crop_width) // 2
crop_y = (screen_height - crop_height) // 2
window_name = "YOLO Detection Overlay"
cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
cv2.setWindowProperty(window_name, cv2.WND_PROP_TOPMOST, 1)
cv2.resizeWindow(window_name, screen_width, screen_height)
cv2.moveWindow(window_name, 0, 0)
hwnd = win32gui.FindWindow(None, window_name)
win32gui.SetWindowLong(hwnd, win32con.GWL_EXSTYLE,
win32gui.GetWindowLong(hwnd, win32con.GWL_EXSTYLE) |
win32con.WS_EX_LAYERED |
win32con.WS_EX_TRANSPARENT)
win32gui.SetLayeredWindowAttributes(hwnd, 0, 0, win32con.LWA_COLORKEY)
overlay = np.zeros((screen_height, screen_width, 4), dtype=np.uint8)
with mss() as sct:
monitor = {"top": crop_y, "left": crop_x, "width": crop_width, "height": crop_height}
warmup_img = Image.new('RGB', (crop_width, crop_height), (0, 0, 0))
_ = model(warmup_img, device=device, verbose=False)
while True:
start_time = time.perf_counter()
screenshot = sct.grab(monitor)
img = Image.frombytes("RGB", screenshot.size, screenshot.rgb)
results = model(img,
device=device,
half=True,
verbose=False)
overlay.fill(0)
cv2.rectangle(overlay, (crop_x, crop_y),
(crop_x + crop_width, crop_y + crop_height),
(150, 150, 150, 255), 2)
cv2.putText(overlay, "Detection Area",
(crop_x + 10, crop_y - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (200, 200, 200, 255), 2)
for result in results:
for box in result.boxes:
x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
x1 += crop_x
y1 += crop_y
x2 += crop_x
y2 += crop_y
cv2.rectangle(overlay, (x1, y1), (x2, y2), (0, 255, 0, 255), 2)
cls_id = int(box.cls)
conf = float(box.conf)
label = f"{result.names[cls_id]} {conf:.2f}"
cv2.putText(overlay, label, (x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0, 255), 2)
fps = 1 / (time.perf_counter() - start_time)
cv2.putText(overlay, f"FPS: {fps:.1f} | Delay: {1000 / fps:.1f}ms | Device: {device.upper()}",
(10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255, 255), 2)
cv2.imshow(window_name, overlay)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cv2.destroyAllWindows()
数据标注工具
https://github.com/HumanSignal/labelImg