深度图像处理
深度图像基础
什么是深度图像
深度图像(Depth Image)是一种特殊的图像,其中每个像素不是表示颜色信息,而是表示该像素对应的三维场景中该点到摄像头的距离。深度图像是 3D 视觉和立体视觉的基础。
深度图像的获取方式
深度获取方法对比:
┌────────────────┬──────────────┬──────────────┬─────────────┐
│ 方法 │ 精度 │ 范围 │ 成本 │
├────────────────┼──────────────┼──────────────┼─────────────┤
│ 立体匹配 │ 中等 │ 中到远 │ 低 │
│ 结构光 │ 高 │ 近到中 │ 中 │
│ ToF(飞行时间)│ 高 │ 近到中 │ 中到高 │
│ LIDAR │ 高 │ 远 │ 高 │
│ 双目摄像头 │ 中等 │ 中到远 │ 低 │
└────────────────┴──────────────┴──────────────┴─────────────┘深度图像的表示
python
import numpy as np
import cv2
# 深度图像的典型表示
depth_image = np.zeros((480, 640), dtype=np.uint16) # 16位无符号整数
# 访问深度值
depth_at_pixel = depth_image[100, 100] # 单位:毫米或者其他
# 深度值范围
min_depth = np.min(depth_image)
max_depth = np.max(depth_image)
mean_depth = np.mean(depth_image)
print(f"深度范围: {min_depth} - {max_depth} mm")
print(f"平均深度: {mean_depth:.2f} mm")深度图像处理基本操作
深度图像的可视化
python
import cv2
import numpy as np
import matplotlib.pyplot as plt
def visualize_depth_image(depth_image):
"""可视化深度图像"""
# 方法 1:将深度值归一化到 0-255
normalized = cv2.normalize(depth_image, None, 0, 255, cv2.NORM_MINMAX)
normalized = np.uint8(normalized)
# 应用色图(colourmap)
colormap = cv2.applyColorMap(normalized, cv2.COLORMAP_JET)
# 方法 2:使用伪彩色
# 红色表示近,蓝色表示远
heatmap = cv2.cvtColor(normalized, cv2.COLOR_GRAY2BGR)
return colormap, normalized
def filter_invalid_depth(depth_image, min_depth=0, max_depth=5000):
"""过滤无效深度值"""
# 创建掩码
valid_mask = (depth_image > min_depth) & (depth_image < max_depth)
# 将无效区域设置为 0
filtered = depth_image.copy()
filtered[~valid_mask] = 0
return filtered, valid_mask
def bilateral_filter_depth(depth_image, d=9, sigma_color=75, sigma_space=75):
"""双边滤波平滑深度图像(保留边界)"""
# 转换为 float32
depth_float = depth_image.astype(np.float32) / 1000.0 # 转换为米
# 应用双边滤波
filtered = cv2.bilateralFilter(depth_float, d, sigma_color, sigma_space)
# 转换回原始单位
filtered = (filtered * 1000.0).astype(depth_image.dtype)
return filtered从深度图生成 3D 点云
python
def depth_to_point_cloud(depth_image, camera_matrix, depth_scale=1000.0):
"""将深度图转换为 3D 点云
Args:
depth_image: 深度图像 (H, W)
camera_matrix: 摄像头内参矩阵 (3, 3)
depth_scale: 深度值缩放因子 (深度单位为毫米时通常为 1000)
Returns:
point_cloud: 3D 点云 (N, 3),N = H * W
"""
height, width = depth_image.shape
# 获取摄像头参数
fx = camera_matrix[0, 0]
fy = camera_matrix[1, 1]
cx = camera_matrix[0, 2]
cy = camera_matrix[1, 2]
# 创建像素坐标网格
x_coords, y_coords = np.meshgrid(np.arange(width), np.arange(height))
# 获取有效的深度值
depth_values = depth_image.astype(np.float32) / depth_scale # 转换为米
# 转换为 3D 坐标
# X = (x - cx) * Z / fx
# Y = (y - cy) * Z / fy
# Z = Z
x_3d = (x_coords - cx) * depth_values / fx
y_3d = (y_coords - cy) * depth_values / fy
z_3d = depth_values
# 堆叠成点云
point_cloud = np.dstack([x_3d, y_3d, z_3d])
point_cloud = point_cloud.reshape(-1, 3)
# 移除无效点(深度为 0)
valid_mask = depth_image.reshape(-1) > 0
point_cloud = point_cloud[valid_mask]
return point_cloud
def point_cloud_to_depth(point_cloud, camera_matrix, height, width, depth_scale=1000.0):
"""将 3D 点云转换回深度图
Args:
point_cloud: 3D 点云 (N, 3)
camera_matrix: 摄像头内参矩阵 (3, 3)
height, width: 深度图的高度和宽度
depth_scale: 深度值缩放因子
Returns:
depth_image: 深度图像 (H, W)
"""
# 获取摄像头参数
fx = camera_matrix[0, 0]
fy = camera_matrix[1, 1]
cx = camera_matrix[0, 2]
cy = camera_matrix[1, 2]
# 初始化深度图
depth_image = np.zeros((height, width), dtype=np.uint16)
# 投影到像素平面
for i, point in enumerate(point_cloud):
x, y, z = point
if z <= 0: # 无效点
continue
# 投影公式
u = int(fx * x / z + cx)
v = int(fy * y / z + cy)
# 检查是否在图像范围内
if 0 <= u < width and 0 <= v < height:
depth_value = int(z * depth_scale)
depth_image[v, u] = depth_value
return depth_image深度图像分析
深度分割
python
def depth_segmentation(depth_image, num_segments=5):
"""基于深度的分割"""
# 移除无效深度值
valid_mask = depth_image > 0
# 深度量化
valid_depths = depth_image[valid_mask]
min_depth = np.min(valid_depths)
max_depth = np.max(valid_depths)
# 创建分段
segment_image = np.zeros_like(depth_image)
for i in range(num_segments):
threshold_low = min_depth + (max_depth - min_depth) * i / num_segments
threshold_high = min_depth + (max_depth - min_depth) * (i + 1) / num_segments
mask = (depth_image >= threshold_low) & (depth_image < threshold_high)
segment_image[mask] = i + 1
return segment_image
def depth_based_roi_extraction(depth_image, target_depth, tolerance=100):
"""基于特定深度的 ROI 提取"""
# 创建掩码
mask = np.abs(depth_image.astype(np.float32) - target_depth) < tolerance
# 应用形态学运算平滑
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
mask = cv2.morphologyEx(mask.astype(np.uint8), cv2.MORPH_CLOSE, kernel)
# 提取 ROI
roi = depth_image.copy()
roi[~mask.astype(bool)] = 0
return roi, mask表面法向量估计
python
def estimate_surface_normals(point_cloud, k=5):
"""使用主成分分析估计表面法向量
Args:
point_cloud: 3D 点云 (N, 3)
k: 用于计算法向量的邻近点数
Returns:
normals: 法向量 (N, 3)
"""
from sklearn.neighbors import NearestNeighbors
# 寻找 k 个最近邻
nbrs = NearestNeighbors(n_neighbors=k).fit(point_cloud)
distances, indices = nbrs.kneighbors(point_cloud)
normals = np.zeros_like(point_cloud)
for i in range(len(point_cloud)):
# 获取邻近点
neighbors = point_cloud[indices[i]]
# 中心化
centered = neighbors - neighbors.mean(axis=0)
# SVD 分解
_, _, vt = np.linalg.svd(centered)
# 最小特征向量是法向量
normal = vt[-1]
normals[i] = normal
return normals
def curvature_estimation(point_cloud, k=10):
"""估计曲率
Args:
point_cloud: 3D 点云 (N, 3)
k: 邻域大小
Returns:
curvatures: 曲率值 (N,)
"""
from sklearn.neighbors import NearestNeighbors
nbrs = NearestNeighbors(n_neighbors=k).fit(point_cloud)
distances, indices = nbrs.kneighbors(point_cloud)
curvatures = np.zeros(len(point_cloud))
for i in range(len(point_cloud)):
neighbors = point_cloud[indices[i]]
centered = neighbors - neighbors.mean(axis=0)
# 计算主曲率
cov = np.cov(centered.T)
eigenvalues = np.linalg.eigvalsh(cov)
# 高斯曲率近似
curvatures[i] = np.min(eigenvalues)
return curvatures平面检测
python
def ransac_plane_detection(point_cloud, threshold=0.01, max_iterations=1000):
"""使用 RANSAC 检测平面
Args:
point_cloud: 3D 点云 (N, 3)
threshold: 距离阈值
max_iterations: 最大迭代次数
Returns:
plane_model: (a, b, c, d) 平面方程系数
inliers: 内点掩码
"""
best_inliers = 0
best_model = None
best_mask = None
for _ in range(max_iterations):
# 随机选择 3 个点
indices = np.random.choice(len(point_cloud), 3, replace=False)
points = point_cloud[indices]
# 计算平面方程
v1 = points[1] - points[0]
v2 = points[2] - points[0]
normal = np.cross(v1, v2)
if np.linalg.norm(normal) < 1e-6:
continue
normal = normal / np.linalg.norm(normal)
d = -np.dot(normal, points[0])
# 计算所有点到平面的距离
distances = np.abs(np.dot(point_cloud, normal) + d)
# 计算内点
inliers = distances < threshold
num_inliers = np.sum(inliers)
if num_inliers > best_inliers:
best_inliers = num_inliers
best_model = np.append(normal, d)
best_mask = inliers
return best_model, best_mask立体匹配
块匹配(Block Matching)
python
def stereo_block_matching(left_image, right_image, num_disparities=16*5, block_size=15):
"""立体块匹配
Args:
left_image: 左视图图像
right_image: 右视图图像
num_disparities: 视差搜索范围
block_size: 匹配块的大小
Returns:
disparity_map: 视差图
"""
# 创建 StereoBM 对象
stereo = cv2.StereoBM_create(numDisparities=num_disparities, blockSize=block_size)
# 灰度转换
left_gray = cv2.cvtColor(left_image, cv2.COLOR_BGR2GRAY)
right_gray = cv2.cvtColor(right_image, cv2.COLOR_BGR2GRAY)
# 计算视差
disparity = stereo.compute(left_gray, right_gray)
# 视差值处理(视差值是整数的 16 倍)
disparity = disparity.astype(np.float32) / 16.0
return disparity
def compute_depth_from_disparity(disparity, baseline, focal_length):
"""从视差计算深度
Args:
disparity: 视差图
baseline: 双摄像头之间的距离(厘米)
focal_length: 焦距(像素)
Returns:
depth_map: 深度图(厘米)
"""
depth = np.zeros_like(disparity)
# 避免除以零
valid_mask = disparity > 0
# 深度 = (baseline * focal_length) / disparity
depth[valid_mask] = (baseline * focal_length) / disparity[valid_mask]
return depth半全局匹配(Semi-Global Matching)
python
def stereo_sgm(left_image, right_image):
"""半全局匹配立体算法
需要 OpenCV 支持 SGM(某些版本不可用)
"""
try:
# 创建 StereoSGBM 对象
stereo = cv2.StereoSGBM_create(
minDisparity=0,
numDisparities=16*5,
blockSize=3,
P1=8 * 3 * 3**2,
P2=32 * 3 * 3**2,
disp12MaxDiff=1,
uniquenessRatio=10,
speckleWindowSize=100,
speckleRange=32,
preFilterCap=63,
mode=cv2.STEREO_SGBM_MODE_SGBM_3WAY
)
# 灰度转换
left_gray = cv2.cvtColor(left_image, cv2.COLOR_BGR2GRAY)
right_gray = cv2.cvtColor(right_image, cv2.COLOR_BGR2GRAY)
# 计算视差
disparity = stereo.compute(left_gray, right_gray)
disparity = disparity.astype(np.float32) / 16.0
return disparity
except Exception as e:
print(f"SGM 不可用: {e}")
return None3D 重建
点云处理
python
import numpy as np
class PointCloud:
"""点云处理类"""
def __init__(self, points, colors=None):
"""
Args:
points: (N, 3) 点坐标
colors: (N, 3) RGB 颜色值(可选)
"""
self.points = points
self.colors = colors
def filter_by_distance(self, min_dist=0, max_dist=5000):
"""距离滤波"""
distances = np.linalg.norm(self.points, axis=1)
mask = (distances > min_dist) & (distances < max_dist)
self.points = self.points[mask]
if self.colors is not None:
self.colors = self.colors[mask]
return self
def remove_outliers_statistical(self, num_neighbors=20, std_ratio=2.0):
"""统计离群值移除"""
from sklearn.neighbors import NearestNeighbors
nbrs = NearestNeighbors(n_neighbors=num_neighbors).fit(self.points)
distances, _ = nbrs.kneighbors(self.points)
mean_distances = np.mean(distances[:, 1:], axis=1)
mean = np.mean(mean_distances)
std = np.std(mean_distances)
mask = mean_distances < (mean + std_ratio * std)
self.points = self.points[mask]
if self.colors is not None:
self.colors = self.colors[mask]
return self
def downsample_voxel(self, voxel_size=0.01):
"""体素下采样"""
# 计算体素索引
indices = (self.points / voxel_size).astype(np.int32)
# 使用字典来存储唯一的体素
voxel_dict = {}
for i, idx in enumerate(indices):
key = tuple(idx)
if key not in voxel_dict:
voxel_dict[key] = []
voxel_dict[key].append(i)
# 对每个体素取平均
new_points = []
new_colors = []
for point_indices in voxel_dict.values():
new_points.append(np.mean(self.points[point_indices], axis=0))
if self.colors is not None:
new_colors.append(np.mean(self.colors[point_indices], axis=0))
self.points = np.array(new_points)
if self.colors is not None:
self.colors = np.array(new_colors)
return self
def save_to_ply(self, filename):
"""保存为 PLY 格式"""
with open(filename, 'w') as f:
# 写入 PLY 头
f.write(f"ply\n")
f.write(f"format ascii 1.0\n")
f.write(f"element vertex {len(self.points)}\n")
f.write(f"property float x\n")
f.write(f"property float y\n")
f.write(f"property float z\n")
if self.colors is not None:
f.write(f"property uchar red\n")
f.write(f"property uchar green\n")
f.write(f"property uchar blue\n")
f.write(f"end_header\n")
# 写入点数据
for i, point in enumerate(self.points):
f.write(f"{point[0]} {point[1]} {point[2]}")
if self.colors is not None:
color = self.colors[i].astype(np.uint8)
f.write(f" {color[0]} {color[1]} {color[2]}")
f.write(f"\n")表面重建
python
def poisson_surface_reconstruction(point_cloud, normal_vectors, depth=8):
"""泊松表面重建(需要 open3d 库)
Args:
point_cloud: 3D 点云 (N, 3)
normal_vectors: 表面法向量 (N, 3)
depth: 重建网格深度
Returns:
mesh: 重建的三角网格
"""
try:
import open3d as o3d
# 创建 Open3D 点云对象
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(point_cloud)
pcd.normals = o3d.utility.Vector3dVector(normal_vectors)
# 泊松重建
mesh, _ = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
pcd, depth=depth
)
return mesh
except ImportError:
print("请安装 open3d: pip install open3d")
return NoneLeBot 中的深度处理应用
深度基础的避障
python
class DepthBasedObstacleAvoidance:
"""基于深度的避障系统"""
def __init__(self, danger_threshold=500, safe_margin=100):
"""
Args:
danger_threshold: 危险距离阈值(毫米)
safe_margin: 安全边界
"""
self.danger_threshold = danger_threshold
self.safe_margin = safe_margin
def detect_obstacles(self, depth_image):
"""检测障碍物"""
# 获取有效的深度值
valid_mask = (depth_image > 0) & (depth_image < 10000)
# 检测近距离物体
obstacle_mask = (depth_image < self.danger_threshold) & valid_mask
return obstacle_mask
def compute_avoidance_direction(self, depth_image):
"""计算避障方向"""
height, width = depth_image.shape
# 将图像分成左中右三个区域
left_region = depth_image[:, :width//3]
center_region = depth_image[:, width//3:2*width//3]
right_region = depth_image[:, 2*width//3:]
# 计算每个区域的平均深度
left_avg = np.mean(left_region[left_region > 0])
center_avg = np.mean(center_region[center_region > 0])
right_avg = np.mean(right_region[right_region > 0])
# 找最安全的方向
regions = {
'left': left_avg,
'center': center_avg,
'right': right_avg
}
safest_direction = max(regions.items(), key=lambda x: x[1])[0]
return safest_direction
def detect_ground_plane(depth_image, camera_matrix):
"""检测地面平面"""
# 转换为点云
point_cloud = depth_to_point_cloud(depth_image, camera_matrix)
# 使用 RANSAC 检测平面
plane_model, inliers = ransac_plane_detection(point_cloud, threshold=0.05, max_iterations=1000)
return plane_model, inliers深度图像常见问题
深度填充
python
def inpaint_depth(depth_image, method='bilateral'):
"""填充深度图像中的孔洞"""
if method == 'bilateral':
filled = cv2.bilateralFilter(depth_image, 9, 75, 75)
elif method == 'morphological':
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
filled = cv2.morphologyEx(depth_image, cv2.MORPH_CLOSE, kernel)
elif method == 'inpainting':
mask = (depth_image == 0).astype(np.uint8)
filled = cv2.inpaint(depth_image, mask, 3, cv2.INPAINT_TELEA)
else:
filled = depth_image
return filled总结
深度图像处理是 LeBot 机器人 3D 视觉的基础,主要涉及:
- 深度获取 - 多种传感器的选择
- 图像处理 - 滤波、分割、分析
- 3D 重建 - 点云生成和处理
- 应用 - 避障、导航、物体识别
掌握深度处理技术是开发智能机器人的关键。
推荐资源
- Open3D 官方文档:http://www.open3d.org/
- PCL 点云库:http://pointclouds.org/
- OpenCV 深度处理:https://docs.opencv.org/
- 深度学习推荐资源:https://github.com/MingYuanLiu/awesome-3D-object-detection