Skip to content

图像处理算法

图像处理基础概念

什么是数字图像处理

数字图像处理是指使用计算机处理、分析和改进数字图像的技术。在 LeBot 机器人项目中,图像处理用于视觉感知、环境理解和目标追踪。

图像的数学表示

数字图像 = 二维函数 f(x, y)

其中:
- x, y 是像素坐标
- f(x, y) 是像素强度(灰度值)

彩色图像:
- RGB:3 个通道,每个通道 0-255
- HSV:色调、饱和度、亮度
- LAB:感知均匀的色彩空间

像素操作

python
import numpy as np
import cv2

# 读取图像
image = cv2.imread('image.jpg')
height, width, channels = image.shape

print(f"图像大小: {width}x{height}, 通道数: {channels}")

# 像素访问
pixel = image[100, 100]  # (y, x) 坐标
print(f"像素值: {pixel}")

# 图像切片
roi = image[100:200, 100:200]  # 感兴趣区域

# 通道分离
b, g, r = cv2.split(image)
print(f"蓝通道: {b.shape}")

# 通道合并
bgr_image = cv2.merge([b, g, r])

基本图像变换

几何变换

1. 图像缩放

python
import cv2

def scale_image(image, scale_factor):
    """按比例缩放图像"""
    height, width = image.shape[:2]
    new_width = int(width * scale_factor)
    new_height = int(height * scale_factor)
    
    # 方法 1:使用 cv2.resize
    resized = cv2.resize(image, (new_width, new_height))
    
    # 方法 2:使用缩放因子
    resized_fx_fy = cv2.resize(image, None, fx=scale_factor, fy=scale_factor)
    
    return resized

# 使用示例
image = cv2.imread('image.jpg')
small = scale_image(image, 0.5)
large = scale_image(image, 2.0)

2. 图像旋转

python
def rotate_image(image, angle, center=None, scale=1.0):
    """旋转图像"""
    height, width = image.shape[:2]
    
    if center is None:
        center = (width // 2, height // 2)
    
    # 获取旋转矩阵
    rotation_matrix = cv2.getRotationMatrix2D(center, angle, scale)
    
    # 应用旋转
    rotated = cv2.warpAffine(image, rotation_matrix, (width, height))
    
    return rotated

# 使用示例
image = cv2.imread('image.jpg')
rotated = rotate_image(image, 45)  # 旋转 45 度

3. 图像仿射变换

python
def affine_transform(image, src_points, dst_points):
    """仿射变换"""
    # src_points 和 dst_points 应该是 3 个点的坐标
    
    # 获取仿射变换矩阵
    affine_matrix = cv2.getAffineTransform(src_points, dst_points)
    
    height, width = image.shape[:2]
    
    # 应用仿射变换
    transformed = cv2.warpAffine(image, affine_matrix, (width, height))
    
    return transformed

# 使用示例
image = cv2.imread('image.jpg')
height, width = image.shape[:2]

# 定义原始点和目标点
src_pts = np.float32([[50, 50], [200, 50], [50, 200]])
dst_pts = np.float32([[10, 100], [200, 50], [100, 250]])

transformed = affine_transform(image, src_pts, dst_pts)

4. 透视变换

python
def perspective_transform(image, src_points, dst_points):
    """透视变换"""
    # src_points 和 dst_points 应该是 4 个点的坐标
    
    # 获取透视变换矩阵
    perspective_matrix = cv2.getPerspectiveTransform(src_points, dst_points)
    
    height, width = image.shape[:2]
    
    # 应用透视变换
    transformed = cv2.warpPerspective(image, perspective_matrix, (width, height))
    
    return transformed

# 使用示例
image = cv2.imread('image.jpg')
height, width = image.shape[:2]

# 定义四个角点
src_pts = np.float32([[0, 0], [width, 0], [0, height], [width, height]])
dst_pts = np.float32([[0, 0], [width, 0], [50, height], [width - 50, height]])

transformed = perspective_transform(image, src_pts, dst_pts)

颜色空间转换

python
def convert_color_spaces(image):
    """演示各种颜色空间转换"""
    
    # BGR 到 RGB
    rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # BGR 到灰度
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # BGR 到 HSV
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    
    # BGR 到 LAB
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    
    # BGR 到 YCrCb
    ycrcb = cv2.cvtColor(image, cv2.COLOR_BGR2YCrCb)
    
    return rgb, gray, hsv, lab, ycrcb

# 颜色空间的应用
image = cv2.imread('image.jpg')
rgb, gray, hsv, lab, ycrcb = convert_color_spaces(image)

# HSV 用于基于颜色的分割
lower_red = np.array([0, 100, 100])
upper_red = np.array([10, 255, 255])
mask = cv2.inRange(hsv, lower_red, upper_red)

# 灰度用于简化处理
edge_detection = cv2.Canny(gray, 100, 200)

图像滤波

线性滤波

python
def demonstrate_linear_filters(image):
    """演示各种线性滤波器"""
    
    # 1. 均值滤波(低通滤波)
    blur = cv2.blur(image, (5, 5))
    
    # 2. 高斯滤波
    gaussian = cv2.GaussianBlur(image, (5, 5), 0)
    
    # 3. 盒子滤波
    box = cv2.boxFilter(image, -1, (5, 5))
    
    # 4. 双边滤波(边界保留)
    bilateral = cv2.bilateralFilter(image, 9, 75, 75)
    
    return blur, gaussian, box, bilateral

# 自定义卷积核
def custom_filter(image, kernel):
    """应用自定义卷积核"""
    filtered = cv2.filter2D(image, -1, kernel)
    return filtered

# 定义核
identity_kernel = np.array([[0, 0, 0],
                            [0, 1, 0],
                            [0, 0, 0]])

blur_kernel = np.ones((5, 5)) / 25

sharpening_kernel = np.array([[0, -1, 0],
                              [-1, 5, -1],
                              [0, -1, 0]])

# 应用
image = cv2.imread('image.jpg')
sharpened = custom_filter(image, sharpening_kernel)

非线性滤波

python
def demonstrate_nonlinear_filters(image):
    """演示非线性滤波器"""
    
    # 1. 中值滤波(去除椒盐噪声)
    median = cv2.medianBlur(image, 5)
    
    # 2. 形态学开运算(先腐蚀后膨胀)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
    opening = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
    
    # 3. 形态学闭运算(先膨胀后腐蚀)
    closing = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel)
    
    # 4. 形态学梯度(膨胀 - 腐蚀)
    gradient = cv2.morphologyEx(image, cv2.MORPH_GRADIENT, kernel)
    
    # 5. 顶帽(原图 - 开运算)
    tophat = cv2.morphologyEx(image, cv2.MORPH_TOPHAT, kernel)
    
    return median, opening, closing, gradient, tophat

image = cv2.imread('image.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
median, opening, closing, gradient, tophat = demonstrate_nonlinear_filters(gray)

边缘检测

Canny 边缘检测

python
def canny_edge_detection(image, threshold1=100, threshold2=200):
    """Canny 边缘检测"""
    # 转换为灰度
    if len(image.shape) == 3:
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        gray = image
    
    # 高斯模糊减少噪声
    blurred = cv2.GaussianBlur(gray, (5, 5), 1.5)
    
    # Canny 边缘检测
    edges = cv2.Canny(blurred, threshold1, threshold2)
    
    return edges

# 使用示例
image = cv2.imread('image.jpg')
edges = canny_edge_detection(image)
cv2.imshow('Canny Edges', edges)

Sobel 边缘检测

python
def sobel_edge_detection(image):
    """Sobel 边缘检测"""
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # X 方向梯度
    sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=5)
    
    # Y 方向梯度
    sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=5)
    
    # 计算梯度幅度
    magnitude = np.sqrt(sobelx**2 + sobely**2)
    magnitude = np.uint8(255 * magnitude / np.max(magnitude))
    
    # 计算梯度方向
    angle = np.arctan2(sobely, sobelx) * 180 / np.pi
    
    return magnitude, angle

image = cv2.imread('image.jpg')
magnitude, angle = sobel_edge_detection(image)

Laplacian 边缘检测

python
def laplacian_edge_detection(image):
    """Laplacian 边缘检测"""
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # 高斯模糊
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # Laplacian
    laplacian = cv2.Laplacian(blurred, cv2.CV_64F)
    
    # 转换为 8 位
    laplacian = np.uint8(np.absolute(laplacian))
    
    return laplacian

图像分割

基于阈值的分割

python
def threshold_segmentation(image):
    """阈值分割"""
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # 1. 固定阈值
    ret, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    
    # 2. 反向二值化
    ret, binary_inv = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
    
    # 3. 截断
    ret, trunc = cv2.threshold(gray, 127, 255, cv2.THRESH_TRUNC)
    
    # 4. 自适应阈值
    adaptive = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                      cv2.THRESH_BINARY, 11, 2)
    
    # 5. Otsu 自动阈值
    ret, otsu = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    return binary, binary_inv, trunc, adaptive, otsu

连通分量分析

python
def connected_components(image):
    """连通分量分析"""
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # 二值化
    ret, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    
    # 标记连通分量
    num_labels, labels = cv2.connectedComponents(binary)
    
    # 得到每个分量的属性
    num_labels_with_stats, labels, stats, centroids = cv2.connectedComponentsWithStats(binary)
    
    # 标签着色
    label_hue = np.uint8(179 * labels / np.max(labels))
    blank_channel = np.zeros_like(label_hue)
    labeled_image = cv2.merge([label_hue, 255, 255])
    labeled_image = cv2.cvtColor(labeled_image, cv2.COLOR_HSV2BGR)
    
    return num_labels, labels, stats, centroids, labeled_image

轮廓检测

python
def contour_detection(image):
    """轮廓检测"""
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # 边缘检测
    edges = cv2.Canny(gray, 100, 200)
    
    # 找轮廓
    contours, hierarchy = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    
    # 绘制轮廓
    output = image.copy()
    cv2.drawContours(output, contours, -1, (0, 255, 0), 2)
    
    # 分析轮廓
    results = []
    for i, contour in enumerate(contours):
        area = cv2.contourArea(contour)
        perimeter = cv2.arcLength(contour, True)
        
        # 轮廓近似
        epsilon = 0.02 * perimeter
        approx = cv2.approxPolyDP(contour, epsilon, True)
        
        # 凸包
        hull = cv2.convexHull(contour)
        
        # 最小外接矩形
        rect = cv2.minAreaRect(contour)
        
        # 最小外接圆
        (cx, cy), radius = cv2.minEnclosingCircle(contour)
        
        # 拟合椭圆(需要至少 5 个点)
        if len(contour) >= 5:
            ellipse = cv2.fitEllipse(contour)
        else:
            ellipse = None
        
        results.append({
            'area': area,
            'perimeter': perimeter,
            'approx': approx,
            'hull': hull,
            'rect': rect,
            'circle': (cx, cy, radius),
            'ellipse': ellipse
        })
    
    return output, results

K-means 聚类分割

python
def kmeans_segmentation(image, k=3):
    """K-means 颜色聚类"""
    # 转换为 float32
    data = image.reshape((-1, 3))
    data = np.float32(data)
    
    # K-means 聚类
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
    ret, labels, centers = cv2.kmeans(data, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
    
    # 转换为原始图像大小
    centers = np.uint8(centers)
    result = centers[labels.flatten()]
    segmented = result.reshape(image.shape)
    
    return segmented, labels.reshape(image.shape[:2]), centers

特征检测

Harris 角点检测

python
def harris_corner_detection(image, threshold=0.01):
    """Harris 角点检测"""
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Harris 角点检测
    corners = cv2.cornerHarris(gray, 2, 3, 0.04)
    
    # 角点位置
    corners = cv2.dilate(corners, None)
    ret, threshold_corners = cv2.threshold(corners, threshold * corners.max(), 255, 0)
    
    # 绘制角点
    output = image.copy()
    output[corners > threshold * corners.max()] = [0, 0, 255]
    
    return output, corners

SIFT 特征检测

python
def sift_feature_detection(image):
    """SIFT 特征检测"""
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # 创建 SIFT 对象
    sift = cv2.SIFT_create()
    
    # 检测关键点和描述符
    keypoints, descriptors = sift.detectAndCompute(gray, None)
    
    # 绘制关键点
    output = cv2.drawKeypoints(image, keypoints, None)
    
    return output, keypoints, descriptors

ORB 特征检测

python
def orb_feature_detection(image):
    """ORB 特征检测"""
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # 创建 ORB 对象
    orb = cv2.ORB_create(nfeatures=500)
    
    # 检测关键点和描述符
    keypoints, descriptors = orb.detectAndCompute(gray, None)
    
    # 绘制关键点
    output = cv2.drawKeypoints(image, keypoints, None)
    
    return output, keypoints, descriptors

直线和圆检测

Hough 直线检测

python
def hough_line_detection(image, threshold=50):
    """Hough 直线检测"""
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150)
    
    # 直线检测
    lines = cv2.HoughLines(edges, 1, np.pi / 180, threshold)
    
    output = image.copy()
    if lines is not None:
        for rho, theta in lines[:, 0]:
            a = np.cos(theta)
            b = np.sin(theta)
            x0 = a * rho
            y0 = b * rho
            x1 = int(x0 + 1000 * (-b))
            y1 = int(y0 + 1000 * a)
            x2 = int(x0 - 1000 * (-b))
            y2 = int(y0 - 1000 * a)
            cv2.line(output, (x1, y1), (x2, y2), (0, 255, 0), 2)
    
    return output

Hough 圆检测

python
def hough_circle_detection(image, min_dist=50):
    """Hough 圆检测"""
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (9, 9), 2)
    
    # 圆检测
    circles = cv2.HoughCircles(blurred, cv2.HOUGH_GRADIENT, 1, min_dist,
                               param1=50, param2=30, minRadius=0, maxRadius=0)
    
    output = image.copy()
    if circles is not None:
        circles = np.uint16(np.around(circles))
        for i in circles[0, :]:
            center = (i[0], i[1])
            radius = i[2]
            cv2.circle(output, center, radius, (0, 255, 0), 2)
            cv2.circle(output, center, 2, (0, 0, 255), 3)
    
    return output, circles

模板匹配

python
def template_matching(image, template):
    """模板匹配"""
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray_template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
    
    # 模板匹配方法
    methods = [
        cv2.TM_CCOEFF,
        cv2.TM_CCOEFF_NORMED,
        cv2.TM_CCORR,
        cv2.TM_CCORR_NORMED,
        cv2.TM_SQDIFF,
        cv2.TM_SQDIFF_NORMED
    ]
    
    results = {}
    
    for method in methods:
        result = cv2.matchTemplate(gray, gray_template, method)
        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
        
        if method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]:
            location = min_loc
            value = min_val
        else:
            location = max_loc
            value = max_val
        
        results[method] = {
            'location': location,
            'value': value,
            'result_map': result
        }
    
    return results

图像直方图

python
def histogram_analysis(image):
    """直方图分析"""
    # 计算直方图
    hist_b = cv2.calcHist([image], [0], None, [256], [0, 256])
    hist_g = cv2.calcHist([image], [1], None, [256], [0, 256])
    hist_r = cv2.calcHist([image], [2], None, [256], [0, 256])
    
    # 直方图均衡化
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    equalized = cv2.equalizeHist(gray)
    
    # CLAHE(限制对比度自适应直方图均衡化)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    clahe_result = clahe.apply(gray)
    
    return hist_b, hist_g, hist_r, equalized, clahe_result

def histogram_backprojection(image, roi):
    """直方图反投影"""
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hsv_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
    
    # 计算 ROI 的直方图
    hist = cv2.calcHist([hsv_roi], [0, 1], None, [180, 256], [0, 180, 0, 256])
    cv2.normalize(hist, hist, 0, 255, cv2.NORM_MINMAX)
    
    # 反投影
    backproj = cv2.calcBackProject([hsv], [0, 1], hist, [0, 180, 0, 256], 1)
    
    return backproj

总结

本章介绍了图像处理的核心算法,包括:

  1. 图像变换 - 缩放、旋转、仿射、透视
  2. 图像滤波 - 线性和非线性滤波
  3. 边缘检测 - Canny、Sobel、Laplacian
  4. 图像分割 - 阈值、连通分量、轮廓、聚类
  5. 特征检测 - Harris、SIFT、ORB
  6. 检测 - 直线、圆、模板
  7. 直方图 - 分析、均衡化、反投影

在 LeBot 项目中,这些算法的组合应用能够实现强大的视觉功能。

推荐资源

由 LeBot 开发团队编写