核心实现思路
滑动窗口策略 :在图像上滑动固定大小的窗口,对每个窗口进行分类多维特征提取 :结合统计特征、纹理特征、边缘特征、形状特征等随机森林分类 :训练二分类器判断窗口是否包含目标后处理优化 :使用非极大值抑制减少重复检测
特征工程的重要性
LBP纹理特征 :捕捉局部纹理模式灰度共生矩阵 :描述纹理的统计特性边缘密度 :反映目标边界信息形状描述符 :圆形度、面积比等几何特征
实际应用建议
数据收集 :收集大量正负样本进行训练特征优化 :根据具体目标调整特征提取策略参数调优 :调整窗口大小、步长、置信度阈值等多尺度检测 :使用不同尺寸的窗口检测不同大小的目标
适用场景
计算资源受限的嵌入式设备 目标具有明显纹理或形状特征的场景 需要快速部署和调试的原型系统 传统图像处理流程的补充
import cv2
import numpy as np
from sklearn. ensemble import RandomForestClassifier
from sklearn. model_selection import train_test_split
from sklearn. metrics import classification_report
from skimage. feature import local_binary_pattern, graycomatrix, graycoprops
from skimage. measure import regionprops
import os
from typing import List, Tuple
import matplotlib. pyplot as pltclass RandomForestObjectDetector : """基于随机森林的目标检测器""" def __init__ ( self, window_size= ( 64 , 64 ) , step_size= 16 , n_estimators= 100 ) : """初始化检测器Args:window_size: 滑动窗口大小step_size: 滑动步长n_estimators: 随机森林中树的数量""" self. window_size = window_sizeself. step_size = step_sizeself. rf_classifier = RandomForestClassifier( n_estimators= n_estimators, random_state= 42 , max_depth= 10 , min_samples_split= 5 ) self. is_trained = False def extract_features ( self, image_patch: np. ndarray) - > np. ndarray: """从图像块中提取特征Args:image_patch: 输入图像块Returns:特征向量""" features = [ ] if len ( image_patch. shape) == 3 : gray = cv2. cvtColor( image_patch, cv2. COLOR_BGR2GRAY) else : gray = image_patch. copy( ) features. extend( [ np. mean( gray) , np. std( gray) , np. median( gray) , np. min ( gray) , np. max ( gray) , np. var( gray) ] ) radius = 3 n_points = 8 * radiuslbp = local_binary_pattern( gray, n_points, radius, method= 'uniform' ) lbp_hist, _ = np. histogram( lbp. ravel( ) , bins= n_points + 2 , range = ( 0 , n_points + 2 ) , density= True ) features. extend( lbp_hist) try : glcm = graycomatrix( gray, distances= [ 1 ] , angles= [ 0 , 45 , 90 , 135 ] , levels= 256 , symmetric= True , normed= True ) contrast = graycoprops( glcm, 'contrast' ) . mean( ) dissimilarity = graycoprops( glcm, 'dissimilarity' ) . mean( ) homogeneity = graycoprops( glcm, 'homogeneity' ) . mean( ) energy = graycoprops( glcm, 'energy' ) . mean( ) correlation = graycoprops( glcm, 'correlation' ) . mean( ) features. extend( [ contrast, dissimilarity, homogeneity, energy, correlation] ) except : features. extend( [ 0 , 0 , 0 , 0 , 0 ] ) edges = cv2. Canny( gray, 50 , 150 ) edge_density = np. sum ( edges > 0 ) / edges. sizefeatures. append( edge_density) grad_x = cv2. Sobel( gray, cv2. CV_64F, 1 , 0 , ksize= 3 ) grad_y = cv2. Sobel( gray, cv2. CV_64F, 0 , 1 , ksize= 3 ) grad_magnitude = np. sqrt( grad_x** 2 + grad_y** 2 ) features. extend( [ np. mean( grad_magnitude) , np. std( grad_magnitude) ] ) _, binary = cv2. threshold( gray, 0 , 255 , cv2. THRESH_BINARY + cv2. THRESH_OTSU) contours, _ = cv2. findContours( binary, cv2. RETR_EXTERNAL, cv2. CHAIN_APPROX_SIMPLE) if contours: largest_contour = max ( contours, key= cv2. contourArea) area = cv2. contourArea( largest_contour) perimeter = cv2. arcLength( largest_contour, True ) if perimeter > 0 : circularity = 4 * np. pi * area / ( perimeter ** 2 ) else : circularity = 0 features. extend( [ area / ( gray. shape[ 0 ] * gray. shape[ 1 ] ) , circularity] ) else : features. extend( [ 0 , 0 ] ) return np. array( features) def sliding_window ( self, image: np. ndarray) - > List[ Tuple] : """在图像上应用滑动窗口Args:image: 输入图像Returns:窗口位置和图像块的列表""" windows = [ ] h, w = image. shape[ : 2 ] for y in range ( 0 , h - self. window_size[ 1 ] + 1 , self. step_size) : for x in range ( 0 , w - self. window_size[ 0 ] + 1 , self. step_size) : window = image[ y: y + self. window_size[ 1 ] , x: x + self. window_size[ 0 ] ] if window. shape[ : 2 ] == self. window_size: windows. append( ( ( x, y) , window) ) return windowsdef prepare_training_data ( self, positive_samples: List[ np. ndarray] , negative_samples: List[ np. ndarray] ) - > Tuple[ np. ndarray, np. ndarray] : """准备训练数据Args:positive_samples: 正样本图像块列表negative_samples: 负样本图像块列表Returns:特征矩阵和标签向量""" features = [ ] labels = [ ] print ( "提取正样本特征..." ) for sample in positive_samples: feature = self. extract_features( sample) features. append( feature) labels. append( 1 ) print ( "提取负样本特征..." ) for sample in negative_samples: feature = self. extract_features( sample) features. append( feature) labels. append( 0 ) return np. array( features) , np. array( labels) def train ( self, positive_samples: List[ np. ndarray] , negative_samples: List[ np. ndarray] ) : """训练随机森林分类器Args:positive_samples: 正样本图像块列表negative_samples: 负样本图像块列表""" print ( "准备训练数据..." ) X, y = self. prepare_training_data( positive_samples, negative_samples) print ( f"训练数据形状: { X. shape} , 标签分布: { np. bincount( y) } " ) X_train, X_val, y_train, y_val = train_test_split( X, y, test_size= 0.2 , random_state= 42 , stratify= y) print ( "训练随机森林分类器..." ) self. rf_classifier. fit( X_train, y_train) val_pred = self. rf_classifier. predict( X_val) print ( "\n验证集性能:" ) print ( classification_report( y_val, val_pred) ) self. is_trained = True print ( "训练完成!" ) def detect ( self, image: np. ndarray, confidence_threshold: float = 0.7 ) - > List[ Tuple] : """在图像中检测目标Args:image: 输入图像confidence_threshold: 置信度阈值Returns:检测结果列表 [(x, y, w, h, confidence), ...]""" if not self. is_trained: raise ValueError( "模型尚未训练,请先调用train()方法" ) detections = [ ] windows = self. sliding_window( image) print ( f"处理 { len ( windows) } 个窗口..." ) for ( x, y) , window in windows: features = self. extract_features( window) . reshape( 1 , - 1 ) prediction = self. rf_classifier. predict( features) [ 0 ] confidence = self. rf_classifier. predict_proba( features) [ 0 ] [ 1 ] if prediction == 1 and confidence >= confidence_threshold: detections. append( ( x, y, self. window_size[ 0 ] , self. window_size[ 1 ] , confidence) ) return detectionsdef non_max_suppression ( self, detections: List[ Tuple] , overlap_threshold: float = 0.3 ) - > List[ Tuple] : """非极大值抑制Args:detections: 检测结果列表overlap_threshold: 重叠阈值Returns:过滤后的检测结果""" if not detections: return [ ] detections = sorted ( detections, key= lambda x: x[ 4 ] , reverse= True ) keep = [ ] while detections: current = detections. pop( 0 ) keep. append( current) remaining = [ ] for detection in detections: iou = self. calculate_iou( current, detection) if iou < overlap_threshold: remaining. append( detection) detections = remainingreturn keep@staticmethod def calculate_iou ( box1: Tuple, box2: Tuple) - > float : """计算两个边界框的IoU""" x1, y1, w1, h1, _ = box1x2, y2, w2, h2, _ = box2xi1 = max ( x1, x2) yi1 = max ( y1, y2) xi2 = min ( x1 + w1, x2 + w2) yi2 = min ( y1 + h1, y2 + h2) if xi2 <= xi1 or yi2 <= yi1: return 0.0 intersection = ( xi2 - xi1) * ( yi2 - yi1) union = w1 * h1 + w2 * h2 - intersectionreturn intersection / union if union > 0 else 0.0 def visualize_detections ( self, image: np. ndarray, detections: List[ Tuple] , title: str = "检测结果" ) : """可视化检测结果Args:image: 原始图像detections: 检测结果列表title: 图像标题""" img_vis = image. copy( ) for x, y, w, h, confidence in detections: cv2. rectangle( img_vis, ( x, y) , ( x + w, y + h) , ( 0 , 255 , 0 ) , 2 ) label = f" { confidence: .2f } " cv2. putText( img_vis, label, ( x, y - 10 ) , cv2. FONT_HERSHEY_SIMPLEX, 0.5 , ( 0 , 255 , 0 ) , 1 ) plt. figure( figsize= ( 12 , 8 ) ) plt. imshow( cv2. cvtColor( img_vis, cv2. COLOR_BGR2RGB) ) plt. title( f" { title} - 检测到 { len ( detections) } 个目标" ) plt. axis( 'off' ) plt. show( )
def create_sample_data ( ) : """创建示例训练数据""" positive_samples = [ ] for _ in range ( 100 ) : sample = np. random. randint( 50 , 100 , ( 64 , 64 ) , dtype= np. uint8) cv2. rectangle( sample, ( 10 , 20 ) , ( 50 , 40 ) , 150 , - 1 ) noise = np. random. normal( 0 , 10 , sample. shape) sample = np. clip( sample + noise, 0 , 255 ) . astype( np. uint8) positive_samples. append( sample) negative_samples = [ ] for _ in range ( 200 ) : sample = np. random. randint( 0 , 50 , ( 64 , 64 ) , dtype= np. uint8) noise = np. random. normal( 0 , 15 , sample. shape) sample = np. clip( sample + noise, 0 , 255 ) . astype( np. uint8) negative_samples. append( sample) return positive_samples, negative_samples
if __name__ == "__main__" : detector = RandomForestObjectDetector( window_size= ( 64 , 64 ) , step_size= 32 ) print ( "创建示例数据..." ) positive_samples, negative_samples = create_sample_data( ) detector. train( positive_samples, negative_samples) test_image = np. random. randint( 0 , 50 , ( 300 , 400 ) , dtype= np. uint8) cv2. rectangle( test_image, ( 50 , 50 ) , ( 114 , 114 ) , 150 , - 1 ) cv2. rectangle( test_image, ( 200 , 150 ) , ( 264 , 214 ) , 150 , - 1 ) print ( "进行目标检测..." ) detections = detector. detect( test_image, confidence_threshold= 0.6 ) filtered_detections = detector. non_max_suppression( detections, overlap_threshold= 0.3 ) print ( f"原始检测数量: { len ( detections) } " ) print ( f"NMS后检测数量: { len ( filtered_detections) } " ) if len ( filtered_detections) > 0 : detector. visualize_detections( cv2. cvtColor( test_image, cv2. COLOR_GRAY2BGR) , filtered_detections) else : print ( "未检测到目标" )