图像处理算法
图像处理基础概念
什么是数字图像处理
数字图像处理是指使用计算机处理、分析和改进数字图像的技术。在 LeBot 机器人项目中,图像处理用于视觉感知、环境理解和目标追踪。
图像的数学表示
数字图像 = 二维函数 f(x, y)
其中:
- x, y 是像素坐标
- f(x, y) 是像素强度(灰度值)
彩色图像:
- RGB:3 个通道,每个通道 0-255
- HSV:色调、饱和度、亮度
- LAB:感知均匀的色彩空间像素操作
python
import numpy as np
import cv2
# 读取图像
image = cv2.imread('image.jpg')
height, width, channels = image.shape
print(f"图像大小: {width}x{height}, 通道数: {channels}")
# 像素访问
pixel = image[100, 100] # (y, x) 坐标
print(f"像素值: {pixel}")
# 图像切片
roi = image[100:200, 100:200] # 感兴趣区域
# 通道分离
b, g, r = cv2.split(image)
print(f"蓝通道: {b.shape}")
# 通道合并
bgr_image = cv2.merge([b, g, r])基本图像变换
几何变换
1. 图像缩放
python
import cv2
def scale_image(image, scale_factor):
"""按比例缩放图像"""
height, width = image.shape[:2]
new_width = int(width * scale_factor)
new_height = int(height * scale_factor)
# 方法 1:使用 cv2.resize
resized = cv2.resize(image, (new_width, new_height))
# 方法 2:使用缩放因子
resized_fx_fy = cv2.resize(image, None, fx=scale_factor, fy=scale_factor)
return resized
# 使用示例
image = cv2.imread('image.jpg')
small = scale_image(image, 0.5)
large = scale_image(image, 2.0)2. 图像旋转
python
def rotate_image(image, angle, center=None, scale=1.0):
"""旋转图像"""
height, width = image.shape[:2]
if center is None:
center = (width // 2, height // 2)
# 获取旋转矩阵
rotation_matrix = cv2.getRotationMatrix2D(center, angle, scale)
# 应用旋转
rotated = cv2.warpAffine(image, rotation_matrix, (width, height))
return rotated
# 使用示例
image = cv2.imread('image.jpg')
rotated = rotate_image(image, 45) # 旋转 45 度3. 图像仿射变换
python
def affine_transform(image, src_points, dst_points):
"""仿射变换"""
# src_points 和 dst_points 应该是 3 个点的坐标
# 获取仿射变换矩阵
affine_matrix = cv2.getAffineTransform(src_points, dst_points)
height, width = image.shape[:2]
# 应用仿射变换
transformed = cv2.warpAffine(image, affine_matrix, (width, height))
return transformed
# 使用示例
image = cv2.imread('image.jpg')
height, width = image.shape[:2]
# 定义原始点和目标点
src_pts = np.float32([[50, 50], [200, 50], [50, 200]])
dst_pts = np.float32([[10, 100], [200, 50], [100, 250]])
transformed = affine_transform(image, src_pts, dst_pts)4. 透视变换
python
def perspective_transform(image, src_points, dst_points):
"""透视变换"""
# src_points 和 dst_points 应该是 4 个点的坐标
# 获取透视变换矩阵
perspective_matrix = cv2.getPerspectiveTransform(src_points, dst_points)
height, width = image.shape[:2]
# 应用透视变换
transformed = cv2.warpPerspective(image, perspective_matrix, (width, height))
return transformed
# 使用示例
image = cv2.imread('image.jpg')
height, width = image.shape[:2]
# 定义四个角点
src_pts = np.float32([[0, 0], [width, 0], [0, height], [width, height]])
dst_pts = np.float32([[0, 0], [width, 0], [50, height], [width - 50, height]])
transformed = perspective_transform(image, src_pts, dst_pts)颜色空间转换
python
def convert_color_spaces(image):
"""演示各种颜色空间转换"""
# BGR 到 RGB
rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# BGR 到灰度
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# BGR 到 HSV
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
# BGR 到 LAB
lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
# BGR 到 YCrCb
ycrcb = cv2.cvtColor(image, cv2.COLOR_BGR2YCrCb)
return rgb, gray, hsv, lab, ycrcb
# 颜色空间的应用
image = cv2.imread('image.jpg')
rgb, gray, hsv, lab, ycrcb = convert_color_spaces(image)
# HSV 用于基于颜色的分割
lower_red = np.array([0, 100, 100])
upper_red = np.array([10, 255, 255])
mask = cv2.inRange(hsv, lower_red, upper_red)
# 灰度用于简化处理
edge_detection = cv2.Canny(gray, 100, 200)图像滤波
线性滤波
python
def demonstrate_linear_filters(image):
"""演示各种线性滤波器"""
# 1. 均值滤波(低通滤波)
blur = cv2.blur(image, (5, 5))
# 2. 高斯滤波
gaussian = cv2.GaussianBlur(image, (5, 5), 0)
# 3. 盒子滤波
box = cv2.boxFilter(image, -1, (5, 5))
# 4. 双边滤波(边界保留)
bilateral = cv2.bilateralFilter(image, 9, 75, 75)
return blur, gaussian, box, bilateral
# 自定义卷积核
def custom_filter(image, kernel):
"""应用自定义卷积核"""
filtered = cv2.filter2D(image, -1, kernel)
return filtered
# 定义核
identity_kernel = np.array([[0, 0, 0],
[0, 1, 0],
[0, 0, 0]])
blur_kernel = np.ones((5, 5)) / 25
sharpening_kernel = np.array([[0, -1, 0],
[-1, 5, -1],
[0, -1, 0]])
# 应用
image = cv2.imread('image.jpg')
sharpened = custom_filter(image, sharpening_kernel)非线性滤波
python
def demonstrate_nonlinear_filters(image):
"""演示非线性滤波器"""
# 1. 中值滤波(去除椒盐噪声)
median = cv2.medianBlur(image, 5)
# 2. 形态学开运算(先腐蚀后膨胀)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
opening = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
# 3. 形态学闭运算(先膨胀后腐蚀)
closing = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel)
# 4. 形态学梯度(膨胀 - 腐蚀)
gradient = cv2.morphologyEx(image, cv2.MORPH_GRADIENT, kernel)
# 5. 顶帽(原图 - 开运算)
tophat = cv2.morphologyEx(image, cv2.MORPH_TOPHAT, kernel)
return median, opening, closing, gradient, tophat
image = cv2.imread('image.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
median, opening, closing, gradient, tophat = demonstrate_nonlinear_filters(gray)边缘检测
Canny 边缘检测
python
def canny_edge_detection(image, threshold1=100, threshold2=200):
"""Canny 边缘检测"""
# 转换为灰度
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image
# 高斯模糊减少噪声
blurred = cv2.GaussianBlur(gray, (5, 5), 1.5)
# Canny 边缘检测
edges = cv2.Canny(blurred, threshold1, threshold2)
return edges
# 使用示例
image = cv2.imread('image.jpg')
edges = canny_edge_detection(image)
cv2.imshow('Canny Edges', edges)Sobel 边缘检测
python
def sobel_edge_detection(image):
"""Sobel 边缘检测"""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# X 方向梯度
sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=5)
# Y 方向梯度
sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=5)
# 计算梯度幅度
magnitude = np.sqrt(sobelx**2 + sobely**2)
magnitude = np.uint8(255 * magnitude / np.max(magnitude))
# 计算梯度方向
angle = np.arctan2(sobely, sobelx) * 180 / np.pi
return magnitude, angle
image = cv2.imread('image.jpg')
magnitude, angle = sobel_edge_detection(image)Laplacian 边缘检测
python
def laplacian_edge_detection(image):
"""Laplacian 边缘检测"""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 高斯模糊
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
# Laplacian
laplacian = cv2.Laplacian(blurred, cv2.CV_64F)
# 转换为 8 位
laplacian = np.uint8(np.absolute(laplacian))
return laplacian图像分割
基于阈值的分割
python
def threshold_segmentation(image):
"""阈值分割"""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 1. 固定阈值
ret, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
# 2. 反向二值化
ret, binary_inv = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
# 3. 截断
ret, trunc = cv2.threshold(gray, 127, 255, cv2.THRESH_TRUNC)
# 4. 自适应阈值
adaptive = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2)
# 5. Otsu 自动阈值
ret, otsu = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
return binary, binary_inv, trunc, adaptive, otsu连通分量分析
python
def connected_components(image):
"""连通分量分析"""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 二值化
ret, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
# 标记连通分量
num_labels, labels = cv2.connectedComponents(binary)
# 得到每个分量的属性
num_labels_with_stats, labels, stats, centroids = cv2.connectedComponentsWithStats(binary)
# 标签着色
label_hue = np.uint8(179 * labels / np.max(labels))
blank_channel = np.zeros_like(label_hue)
labeled_image = cv2.merge([label_hue, 255, 255])
labeled_image = cv2.cvtColor(labeled_image, cv2.COLOR_HSV2BGR)
return num_labels, labels, stats, centroids, labeled_image轮廓检测
python
def contour_detection(image):
"""轮廓检测"""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 边缘检测
edges = cv2.Canny(gray, 100, 200)
# 找轮廓
contours, hierarchy = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# 绘制轮廓
output = image.copy()
cv2.drawContours(output, contours, -1, (0, 255, 0), 2)
# 分析轮廓
results = []
for i, contour in enumerate(contours):
area = cv2.contourArea(contour)
perimeter = cv2.arcLength(contour, True)
# 轮廓近似
epsilon = 0.02 * perimeter
approx = cv2.approxPolyDP(contour, epsilon, True)
# 凸包
hull = cv2.convexHull(contour)
# 最小外接矩形
rect = cv2.minAreaRect(contour)
# 最小外接圆
(cx, cy), radius = cv2.minEnclosingCircle(contour)
# 拟合椭圆(需要至少 5 个点)
if len(contour) >= 5:
ellipse = cv2.fitEllipse(contour)
else:
ellipse = None
results.append({
'area': area,
'perimeter': perimeter,
'approx': approx,
'hull': hull,
'rect': rect,
'circle': (cx, cy, radius),
'ellipse': ellipse
})
return output, resultsK-means 聚类分割
python
def kmeans_segmentation(image, k=3):
"""K-means 颜色聚类"""
# 转换为 float32
data = image.reshape((-1, 3))
data = np.float32(data)
# K-means 聚类
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
ret, labels, centers = cv2.kmeans(data, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
# 转换为原始图像大小
centers = np.uint8(centers)
result = centers[labels.flatten()]
segmented = result.reshape(image.shape)
return segmented, labels.reshape(image.shape[:2]), centers特征检测
Harris 角点检测
python
def harris_corner_detection(image, threshold=0.01):
"""Harris 角点检测"""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Harris 角点检测
corners = cv2.cornerHarris(gray, 2, 3, 0.04)
# 角点位置
corners = cv2.dilate(corners, None)
ret, threshold_corners = cv2.threshold(corners, threshold * corners.max(), 255, 0)
# 绘制角点
output = image.copy()
output[corners > threshold * corners.max()] = [0, 0, 255]
return output, cornersSIFT 特征检测
python
def sift_feature_detection(image):
"""SIFT 特征检测"""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 创建 SIFT 对象
sift = cv2.SIFT_create()
# 检测关键点和描述符
keypoints, descriptors = sift.detectAndCompute(gray, None)
# 绘制关键点
output = cv2.drawKeypoints(image, keypoints, None)
return output, keypoints, descriptorsORB 特征检测
python
def orb_feature_detection(image):
"""ORB 特征检测"""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 创建 ORB 对象
orb = cv2.ORB_create(nfeatures=500)
# 检测关键点和描述符
keypoints, descriptors = orb.detectAndCompute(gray, None)
# 绘制关键点
output = cv2.drawKeypoints(image, keypoints, None)
return output, keypoints, descriptors直线和圆检测
Hough 直线检测
python
def hough_line_detection(image, threshold=50):
"""Hough 直线检测"""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray, 50, 150)
# 直线检测
lines = cv2.HoughLines(edges, 1, np.pi / 180, threshold)
output = image.copy()
if lines is not None:
for rho, theta in lines[:, 0]:
a = np.cos(theta)
b = np.sin(theta)
x0 = a * rho
y0 = b * rho
x1 = int(x0 + 1000 * (-b))
y1 = int(y0 + 1000 * a)
x2 = int(x0 - 1000 * (-b))
y2 = int(y0 - 1000 * a)
cv2.line(output, (x1, y1), (x2, y2), (0, 255, 0), 2)
return outputHough 圆检测
python
def hough_circle_detection(image, min_dist=50):
"""Hough 圆检测"""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (9, 9), 2)
# 圆检测
circles = cv2.HoughCircles(blurred, cv2.HOUGH_GRADIENT, 1, min_dist,
param1=50, param2=30, minRadius=0, maxRadius=0)
output = image.copy()
if circles is not None:
circles = np.uint16(np.around(circles))
for i in circles[0, :]:
center = (i[0], i[1])
radius = i[2]
cv2.circle(output, center, radius, (0, 255, 0), 2)
cv2.circle(output, center, 2, (0, 0, 255), 3)
return output, circles模板匹配
python
def template_matching(image, template):
"""模板匹配"""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray_template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
# 模板匹配方法
methods = [
cv2.TM_CCOEFF,
cv2.TM_CCOEFF_NORMED,
cv2.TM_CCORR,
cv2.TM_CCORR_NORMED,
cv2.TM_SQDIFF,
cv2.TM_SQDIFF_NORMED
]
results = {}
for method in methods:
result = cv2.matchTemplate(gray, gray_template, method)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
if method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]:
location = min_loc
value = min_val
else:
location = max_loc
value = max_val
results[method] = {
'location': location,
'value': value,
'result_map': result
}
return results图像直方图
python
def histogram_analysis(image):
"""直方图分析"""
# 计算直方图
hist_b = cv2.calcHist([image], [0], None, [256], [0, 256])
hist_g = cv2.calcHist([image], [1], None, [256], [0, 256])
hist_r = cv2.calcHist([image], [2], None, [256], [0, 256])
# 直方图均衡化
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
equalized = cv2.equalizeHist(gray)
# CLAHE(限制对比度自适应直方图均衡化)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
clahe_result = clahe.apply(gray)
return hist_b, hist_g, hist_r, equalized, clahe_result
def histogram_backprojection(image, roi):
"""直方图反投影"""
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
hsv_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
# 计算 ROI 的直方图
hist = cv2.calcHist([hsv_roi], [0, 1], None, [180, 256], [0, 180, 0, 256])
cv2.normalize(hist, hist, 0, 255, cv2.NORM_MINMAX)
# 反投影
backproj = cv2.calcBackProject([hsv], [0, 1], hist, [0, 180, 0, 256], 1)
return backproj总结
本章介绍了图像处理的核心算法,包括:
- 图像变换 - 缩放、旋转、仿射、透视
- 图像滤波 - 线性和非线性滤波
- 边缘检测 - Canny、Sobel、Laplacian
- 图像分割 - 阈值、连通分量、轮廓、聚类
- 特征检测 - Harris、SIFT、ORB
- 检测 - 直线、圆、模板
- 直方图 - 分析、均衡化、反投影
在 LeBot 项目中,这些算法的组合应用能够实现强大的视觉功能。
推荐资源
- OpenCV 官方文档:https://docs.opencv.org/
- 图像处理教程:https://en.wikipedia.org/wiki/Digital_image_processing
- Python-OpenCV 教程:https://docs.opencv.org/4.x/d6/d00/tutorial_py_root.html