Skip to content

深度图像处理

深度图像基础

什么是深度图像

深度图像(Depth Image)是一种特殊的图像,其中每个像素不是表示颜色信息,而是表示该像素对应的三维场景中该点到摄像头的距离。深度图像是 3D 视觉和立体视觉的基础。

深度图像的获取方式

深度获取方法对比:
┌────────────────┬──────────────┬──────────────┬─────────────┐
│ 方法           │ 精度         │ 范围         │ 成本        │
├────────────────┼──────────────┼──────────────┼─────────────┤
│ 立体匹配       │ 中等         │ 中到远       │ 低          │
│ 结构光         │ 高           │ 近到中       │ 中          │
│ ToF(飞行时间)│ 高           │ 近到中       │ 中到高      │
│ LIDAR          │ 高           │ 远           │ 高          │
│ 双目摄像头     │ 中等         │ 中到远       │ 低          │
└────────────────┴──────────────┴──────────────┴─────────────┘

深度图像的表示

python
import numpy as np
import cv2

# 深度图像的典型表示
depth_image = np.zeros((480, 640), dtype=np.uint16)  # 16位无符号整数

# 访问深度值
depth_at_pixel = depth_image[100, 100]  # 单位:毫米或者其他

# 深度值范围
min_depth = np.min(depth_image)
max_depth = np.max(depth_image)
mean_depth = np.mean(depth_image)

print(f"深度范围: {min_depth} - {max_depth} mm")
print(f"平均深度: {mean_depth:.2f} mm")

深度图像处理基本操作

深度图像的可视化

python
import cv2
import numpy as np
import matplotlib.pyplot as plt

def visualize_depth_image(depth_image):
    """可视化深度图像"""
    
    # 方法 1:将深度值归一化到 0-255
    normalized = cv2.normalize(depth_image, None, 0, 255, cv2.NORM_MINMAX)
    normalized = np.uint8(normalized)
    
    # 应用色图(colourmap)
    colormap = cv2.applyColorMap(normalized, cv2.COLORMAP_JET)
    
    # 方法 2:使用伪彩色
    # 红色表示近,蓝色表示远
    heatmap = cv2.cvtColor(normalized, cv2.COLOR_GRAY2BGR)
    
    return colormap, normalized

def filter_invalid_depth(depth_image, min_depth=0, max_depth=5000):
    """过滤无效深度值"""
    # 创建掩码
    valid_mask = (depth_image > min_depth) & (depth_image < max_depth)
    
    # 将无效区域设置为 0
    filtered = depth_image.copy()
    filtered[~valid_mask] = 0
    
    return filtered, valid_mask

def bilateral_filter_depth(depth_image, d=9, sigma_color=75, sigma_space=75):
    """双边滤波平滑深度图像(保留边界)"""
    # 转换为 float32
    depth_float = depth_image.astype(np.float32) / 1000.0  # 转换为米
    
    # 应用双边滤波
    filtered = cv2.bilateralFilter(depth_float, d, sigma_color, sigma_space)
    
    # 转换回原始单位
    filtered = (filtered * 1000.0).astype(depth_image.dtype)
    
    return filtered

从深度图生成 3D 点云

python
def depth_to_point_cloud(depth_image, camera_matrix, depth_scale=1000.0):
    """将深度图转换为 3D 点云
    
    Args:
        depth_image: 深度图像 (H, W)
        camera_matrix: 摄像头内参矩阵 (3, 3)
        depth_scale: 深度值缩放因子 (深度单位为毫米时通常为 1000)
    
    Returns:
        point_cloud: 3D 点云 (N, 3),N = H * W
    """
    height, width = depth_image.shape
    
    # 获取摄像头参数
    fx = camera_matrix[0, 0]
    fy = camera_matrix[1, 1]
    cx = camera_matrix[0, 2]
    cy = camera_matrix[1, 2]
    
    # 创建像素坐标网格
    x_coords, y_coords = np.meshgrid(np.arange(width), np.arange(height))
    
    # 获取有效的深度值
    depth_values = depth_image.astype(np.float32) / depth_scale  # 转换为米
    
    # 转换为 3D 坐标
    # X = (x - cx) * Z / fx
    # Y = (y - cy) * Z / fy
    # Z = Z
    
    x_3d = (x_coords - cx) * depth_values / fx
    y_3d = (y_coords - cy) * depth_values / fy
    z_3d = depth_values
    
    # 堆叠成点云
    point_cloud = np.dstack([x_3d, y_3d, z_3d])
    point_cloud = point_cloud.reshape(-1, 3)
    
    # 移除无效点(深度为 0)
    valid_mask = depth_image.reshape(-1) > 0
    point_cloud = point_cloud[valid_mask]
    
    return point_cloud

def point_cloud_to_depth(point_cloud, camera_matrix, height, width, depth_scale=1000.0):
    """将 3D 点云转换回深度图
    
    Args:
        point_cloud: 3D 点云 (N, 3)
        camera_matrix: 摄像头内参矩阵 (3, 3)
        height, width: 深度图的高度和宽度
        depth_scale: 深度值缩放因子
    
    Returns:
        depth_image: 深度图像 (H, W)
    """
    # 获取摄像头参数
    fx = camera_matrix[0, 0]
    fy = camera_matrix[1, 1]
    cx = camera_matrix[0, 2]
    cy = camera_matrix[1, 2]
    
    # 初始化深度图
    depth_image = np.zeros((height, width), dtype=np.uint16)
    
    # 投影到像素平面
    for i, point in enumerate(point_cloud):
        x, y, z = point
        
        if z <= 0:  # 无效点
            continue
        
        # 投影公式
        u = int(fx * x / z + cx)
        v = int(fy * y / z + cy)
        
        # 检查是否在图像范围内
        if 0 <= u < width and 0 <= v < height:
            depth_value = int(z * depth_scale)
            depth_image[v, u] = depth_value
    
    return depth_image

深度图像分析

深度分割

python
def depth_segmentation(depth_image, num_segments=5):
    """基于深度的分割"""
    # 移除无效深度值
    valid_mask = depth_image > 0
    
    # 深度量化
    valid_depths = depth_image[valid_mask]
    min_depth = np.min(valid_depths)
    max_depth = np.max(valid_depths)
    
    # 创建分段
    segment_image = np.zeros_like(depth_image)
    
    for i in range(num_segments):
        threshold_low = min_depth + (max_depth - min_depth) * i / num_segments
        threshold_high = min_depth + (max_depth - min_depth) * (i + 1) / num_segments
        
        mask = (depth_image >= threshold_low) & (depth_image < threshold_high)
        segment_image[mask] = i + 1
    
    return segment_image

def depth_based_roi_extraction(depth_image, target_depth, tolerance=100):
    """基于特定深度的 ROI 提取"""
    # 创建掩码
    mask = np.abs(depth_image.astype(np.float32) - target_depth) < tolerance
    
    # 应用形态学运算平滑
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
    mask = cv2.morphologyEx(mask.astype(np.uint8), cv2.MORPH_CLOSE, kernel)
    
    # 提取 ROI
    roi = depth_image.copy()
    roi[~mask.astype(bool)] = 0
    
    return roi, mask

表面法向量估计

python
def estimate_surface_normals(point_cloud, k=5):
    """使用主成分分析估计表面法向量
    
    Args:
        point_cloud: 3D 点云 (N, 3)
        k: 用于计算法向量的邻近点数
    
    Returns:
        normals: 法向量 (N, 3)
    """
    from sklearn.neighbors import NearestNeighbors
    
    # 寻找 k 个最近邻
    nbrs = NearestNeighbors(n_neighbors=k).fit(point_cloud)
    distances, indices = nbrs.kneighbors(point_cloud)
    
    normals = np.zeros_like(point_cloud)
    
    for i in range(len(point_cloud)):
        # 获取邻近点
        neighbors = point_cloud[indices[i]]
        
        # 中心化
        centered = neighbors - neighbors.mean(axis=0)
        
        # SVD 分解
        _, _, vt = np.linalg.svd(centered)
        
        # 最小特征向量是法向量
        normal = vt[-1]
        normals[i] = normal
    
    return normals

def curvature_estimation(point_cloud, k=10):
    """估计曲率
    
    Args:
        point_cloud: 3D 点云 (N, 3)
        k: 邻域大小
    
    Returns:
        curvatures: 曲率值 (N,)
    """
    from sklearn.neighbors import NearestNeighbors
    
    nbrs = NearestNeighbors(n_neighbors=k).fit(point_cloud)
    distances, indices = nbrs.kneighbors(point_cloud)
    
    curvatures = np.zeros(len(point_cloud))
    
    for i in range(len(point_cloud)):
        neighbors = point_cloud[indices[i]]
        centered = neighbors - neighbors.mean(axis=0)
        
        # 计算主曲率
        cov = np.cov(centered.T)
        eigenvalues = np.linalg.eigvalsh(cov)
        
        # 高斯曲率近似
        curvatures[i] = np.min(eigenvalues)
    
    return curvatures

平面检测

python
def ransac_plane_detection(point_cloud, threshold=0.01, max_iterations=1000):
    """使用 RANSAC 检测平面
    
    Args:
        point_cloud: 3D 点云 (N, 3)
        threshold: 距离阈值
        max_iterations: 最大迭代次数
    
    Returns:
        plane_model: (a, b, c, d) 平面方程系数
        inliers: 内点掩码
    """
    best_inliers = 0
    best_model = None
    best_mask = None
    
    for _ in range(max_iterations):
        # 随机选择 3 个点
        indices = np.random.choice(len(point_cloud), 3, replace=False)
        points = point_cloud[indices]
        
        # 计算平面方程
        v1 = points[1] - points[0]
        v2 = points[2] - points[0]
        
        normal = np.cross(v1, v2)
        if np.linalg.norm(normal) < 1e-6:
            continue
        
        normal = normal / np.linalg.norm(normal)
        d = -np.dot(normal, points[0])
        
        # 计算所有点到平面的距离
        distances = np.abs(np.dot(point_cloud, normal) + d)
        
        # 计算内点
        inliers = distances < threshold
        num_inliers = np.sum(inliers)
        
        if num_inliers > best_inliers:
            best_inliers = num_inliers
            best_model = np.append(normal, d)
            best_mask = inliers
    
    return best_model, best_mask

立体匹配

块匹配(Block Matching)

python
def stereo_block_matching(left_image, right_image, num_disparities=16*5, block_size=15):
    """立体块匹配
    
    Args:
        left_image: 左视图图像
        right_image: 右视图图像
        num_disparities: 视差搜索范围
        block_size: 匹配块的大小
    
    Returns:
        disparity_map: 视差图
    """
    # 创建 StereoBM 对象
    stereo = cv2.StereoBM_create(numDisparities=num_disparities, blockSize=block_size)
    
    # 灰度转换
    left_gray = cv2.cvtColor(left_image, cv2.COLOR_BGR2GRAY)
    right_gray = cv2.cvtColor(right_image, cv2.COLOR_BGR2GRAY)
    
    # 计算视差
    disparity = stereo.compute(left_gray, right_gray)
    
    # 视差值处理(视差值是整数的 16 倍)
    disparity = disparity.astype(np.float32) / 16.0
    
    return disparity

def compute_depth_from_disparity(disparity, baseline, focal_length):
    """从视差计算深度
    
    Args:
        disparity: 视差图
        baseline: 双摄像头之间的距离(厘米)
        focal_length: 焦距(像素)
    
    Returns:
        depth_map: 深度图(厘米)
    """
    depth = np.zeros_like(disparity)
    
    # 避免除以零
    valid_mask = disparity > 0
    
    # 深度 = (baseline * focal_length) / disparity
    depth[valid_mask] = (baseline * focal_length) / disparity[valid_mask]
    
    return depth

半全局匹配(Semi-Global Matching)

python
def stereo_sgm(left_image, right_image):
    """半全局匹配立体算法
    
    需要 OpenCV 支持 SGM(某些版本不可用)
    """
    try:
        # 创建 StereoSGBM 对象
        stereo = cv2.StereoSGBM_create(
            minDisparity=0,
            numDisparities=16*5,
            blockSize=3,
            P1=8 * 3 * 3**2,
            P2=32 * 3 * 3**2,
            disp12MaxDiff=1,
            uniquenessRatio=10,
            speckleWindowSize=100,
            speckleRange=32,
            preFilterCap=63,
            mode=cv2.STEREO_SGBM_MODE_SGBM_3WAY
        )
        
        # 灰度转换
        left_gray = cv2.cvtColor(left_image, cv2.COLOR_BGR2GRAY)
        right_gray = cv2.cvtColor(right_image, cv2.COLOR_BGR2GRAY)
        
        # 计算视差
        disparity = stereo.compute(left_gray, right_gray)
        disparity = disparity.astype(np.float32) / 16.0
        
        return disparity
    
    except Exception as e:
        print(f"SGM 不可用: {e}")
        return None

3D 重建

点云处理

python
import numpy as np

class PointCloud:
    """点云处理类"""
    
    def __init__(self, points, colors=None):
        """
        Args:
            points: (N, 3) 点坐标
            colors: (N, 3) RGB 颜色值(可选)
        """
        self.points = points
        self.colors = colors
    
    def filter_by_distance(self, min_dist=0, max_dist=5000):
        """距离滤波"""
        distances = np.linalg.norm(self.points, axis=1)
        mask = (distances > min_dist) & (distances < max_dist)
        
        self.points = self.points[mask]
        if self.colors is not None:
            self.colors = self.colors[mask]
        
        return self
    
    def remove_outliers_statistical(self, num_neighbors=20, std_ratio=2.0):
        """统计离群值移除"""
        from sklearn.neighbors import NearestNeighbors
        
        nbrs = NearestNeighbors(n_neighbors=num_neighbors).fit(self.points)
        distances, _ = nbrs.kneighbors(self.points)
        mean_distances = np.mean(distances[:, 1:], axis=1)
        
        mean = np.mean(mean_distances)
        std = np.std(mean_distances)
        
        mask = mean_distances < (mean + std_ratio * std)
        
        self.points = self.points[mask]
        if self.colors is not None:
            self.colors = self.colors[mask]
        
        return self
    
    def downsample_voxel(self, voxel_size=0.01):
        """体素下采样"""
        # 计算体素索引
        indices = (self.points / voxel_size).astype(np.int32)
        
        # 使用字典来存储唯一的体素
        voxel_dict = {}
        for i, idx in enumerate(indices):
            key = tuple(idx)
            if key not in voxel_dict:
                voxel_dict[key] = []
            voxel_dict[key].append(i)
        
        # 对每个体素取平均
        new_points = []
        new_colors = []
        
        for point_indices in voxel_dict.values():
            new_points.append(np.mean(self.points[point_indices], axis=0))
            if self.colors is not None:
                new_colors.append(np.mean(self.colors[point_indices], axis=0))
        
        self.points = np.array(new_points)
        if self.colors is not None:
            self.colors = np.array(new_colors)
        
        return self
    
    def save_to_ply(self, filename):
        """保存为 PLY 格式"""
        with open(filename, 'w') as f:
            # 写入 PLY 头
            f.write(f"ply\n")
            f.write(f"format ascii 1.0\n")
            f.write(f"element vertex {len(self.points)}\n")
            f.write(f"property float x\n")
            f.write(f"property float y\n")
            f.write(f"property float z\n")
            
            if self.colors is not None:
                f.write(f"property uchar red\n")
                f.write(f"property uchar green\n")
                f.write(f"property uchar blue\n")
            
            f.write(f"end_header\n")
            
            # 写入点数据
            for i, point in enumerate(self.points):
                f.write(f"{point[0]} {point[1]} {point[2]}")
                if self.colors is not None:
                    color = self.colors[i].astype(np.uint8)
                    f.write(f" {color[0]} {color[1]} {color[2]}")
                f.write(f"\n")

表面重建

python
def poisson_surface_reconstruction(point_cloud, normal_vectors, depth=8):
    """泊松表面重建(需要 open3d 库)
    
    Args:
        point_cloud: 3D 点云 (N, 3)
        normal_vectors: 表面法向量 (N, 3)
        depth: 重建网格深度
    
    Returns:
        mesh: 重建的三角网格
    """
    try:
        import open3d as o3d
        
        # 创建 Open3D 点云对象
        pcd = o3d.geometry.PointCloud()
        pcd.points = o3d.utility.Vector3dVector(point_cloud)
        pcd.normals = o3d.utility.Vector3dVector(normal_vectors)
        
        # 泊松重建
        mesh, _ = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
            pcd, depth=depth
        )
        
        return mesh
    
    except ImportError:
        print("请安装 open3d: pip install open3d")
        return None

LeBot 中的深度处理应用

深度基础的避障

python
class DepthBasedObstacleAvoidance:
    """基于深度的避障系统"""
    
    def __init__(self, danger_threshold=500, safe_margin=100):
        """
        Args:
            danger_threshold: 危险距离阈值(毫米)
            safe_margin: 安全边界
        """
        self.danger_threshold = danger_threshold
        self.safe_margin = safe_margin
    
    def detect_obstacles(self, depth_image):
        """检测障碍物"""
        # 获取有效的深度值
        valid_mask = (depth_image > 0) & (depth_image < 10000)
        
        # 检测近距离物体
        obstacle_mask = (depth_image < self.danger_threshold) & valid_mask
        
        return obstacle_mask
    
    def compute_avoidance_direction(self, depth_image):
        """计算避障方向"""
        height, width = depth_image.shape
        
        # 将图像分成左中右三个区域
        left_region = depth_image[:, :width//3]
        center_region = depth_image[:, width//3:2*width//3]
        right_region = depth_image[:, 2*width//3:]
        
        # 计算每个区域的平均深度
        left_avg = np.mean(left_region[left_region > 0])
        center_avg = np.mean(center_region[center_region > 0])
        right_avg = np.mean(right_region[right_region > 0])
        
        # 找最安全的方向
        regions = {
            'left': left_avg,
            'center': center_avg,
            'right': right_avg
        }
        
        safest_direction = max(regions.items(), key=lambda x: x[1])[0]
        
        return safest_direction

def detect_ground_plane(depth_image, camera_matrix):
    """检测地面平面"""
    # 转换为点云
    point_cloud = depth_to_point_cloud(depth_image, camera_matrix)
    
    # 使用 RANSAC 检测平面
    plane_model, inliers = ransac_plane_detection(point_cloud, threshold=0.05, max_iterations=1000)
    
    return plane_model, inliers

深度图像常见问题

深度填充

python
def inpaint_depth(depth_image, method='bilateral'):
    """填充深度图像中的孔洞"""
    if method == 'bilateral':
        filled = cv2.bilateralFilter(depth_image, 9, 75, 75)
    elif method == 'morphological':
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
        filled = cv2.morphologyEx(depth_image, cv2.MORPH_CLOSE, kernel)
    elif method == 'inpainting':
        mask = (depth_image == 0).astype(np.uint8)
        filled = cv2.inpaint(depth_image, mask, 3, cv2.INPAINT_TELEA)
    else:
        filled = depth_image
    
    return filled

总结

深度图像处理是 LeBot 机器人 3D 视觉的基础,主要涉及:

  1. 深度获取 - 多种传感器的选择
  2. 图像处理 - 滤波、分割、分析
  3. 3D 重建 - 点云生成和处理
  4. 应用 - 避障、导航、物体识别

掌握深度处理技术是开发智能机器人的关键。

推荐资源

由 LeBot 开发团队编写