【昉·星光2 RISC-V 单板计算机】数字识别

本文介绍了昉·星光2单板计算机实现 数字识别 的项目设计。

OpenCV

调用 OpenCV 库进行数字识别

流程图

代码

import cv2
import numpy as np

# Initialize the camera
def init_camera(camera_index=4):
    cap = cv2.VideoCapture(camera_index)
    if not cap.isOpened():
        raise RuntimeError("Unable to open the camera, please check the connection")
    return cap

# Preprocess the image
def preprocess_image(img):
    # Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Apply Gaussian blur for noise reduction
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # Adaptive thresholding to binarize the image
    binary = cv2.adaptiveThreshold(
        blurred, 255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY_INV, 11, 2
    )
    
    # Morphological operations
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    processed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    
    return processed

# Find digit regions
def find_digit_regions(image):
    # Find contours
    contours, _ = cv2.findContours(
        image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
    )
    
    digit_rects = []
    for cnt in contours:
        x, y, w, h = cv2.boundingRect(cnt)
        # Filter out small regions
        if w > 20 and h > 30 and w < 200 and h < 200:
            aspect_ratio = w / float(h)
            if 0.2 < aspect_ratio < 1.2:  # Digits are usually close to square
                digit_rects.append((x, y, w, h))
    
    # Sort by x-coordinate (left to right)
    digit_rects.sort(key=lambda rect: rect[0])
    
    return digit_rects

# Simple digit recognition (should be replaced with a more complex model in real applications)
def recognize_digit(roi):
    # Preprocess the ROI
    resized = cv2.resize(roi, (28, 28))
    normalized = resized / 255.0
    
    # This should be your model inference code
    # Example: A simple decision logic
    mean_intensity = np.mean(normalized)
    
    if mean_intensity < 0.3:
        return "0"
    elif 0.3 <= mean_intensity < 0.4:
        return "1"
    elif 0.4 <= mean_intensity < 0.5:
        return "2"
    else:
        return str(int(mean_intensity * 10 % 10))  # Example logic

def main():
    # Initialize the camera
    cap = init_camera()
    
    # Create a window
    cv2.namedWindow("Digit Recognition", cv2.WINDOW_NORMAL)
    
    try:
        while True:
            # Capture a frame
            ret, frame = cap.read()
            if not ret:
                print("Unable to capture frame, exiting...")
                break
            
            # Preprocess the image
            processed = preprocess_image(frame)
            
            # Find digit regions
            digit_rects = find_digit_regions(processed)
            
            # Recognize and label each digit
            for (x, y, w, h) in digit_rects:
                # Extract ROI
                roi = processed[y:y+h, x:x+w]
                
                # Recognize the digit
                digit = recognize_digit(roi)
                
                # Draw a rectangle around the digit
                cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
                
                # Display the recognition result at the top-left corner
                cv2.putText(
                    frame, digit, (x, y-10),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2
                )
            
            # Display the result
            cv2.imshow("Digit Recognition", frame)
            
            # Exit on 'q' key press
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
                
    finally:
        # Release resources
        cap.release()
        cv2.destroyAllWindows()

if __name__ == "__main__":
    main()

识别速度较快，无卡顿

识别灵敏度过高，导致误识别概率过大。

ONNX 模型

通过调用训练完成的 ONNX 模型，提升转化效率和识别效果。

流程图

代码

使用 OpenCV 的 DNN 库对模型进行解析

import cv2
import numpy as np

class DigitRecognizer:
    def __init__(self, onnx_model_path="mnist-8.onnx"):
        # Load the ONNX model using OpenCV DNN
        self.net = cv2.dnn.readNetFromONNX(onnx_model_path)
        print("Model loaded successfully!")

    def preprocess(self, img):
        """Preprocess the image for model input format (1, 1, 28, 28)"""
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
        resized = cv2.resize(gray, (28, 28))  # Resize
        normalized = resized.astype(np.float32) / 255.0  # Normalize to [0,1]
        blob = cv2.dnn.blobFromImage(normalized)  # Convert to (1,1,28,28)
        return blob

    def predict(self, img):
        """Predict the digit (0-9)"""
        blob = self.preprocess(img)
        self.net.setInput(blob)
        output = self.net.forward()  # Inference
        return np.argmax(output)  # Return the class with the highest probability

def main():
    # Initialize the recognizer
    recognizer = DigitRecognizer("mnist-8.onnx")

    # Initialize the camera
    cap = cv2.VideoCapture(4)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)

    print("Digit recognition program has started, press 'q' to exit...")

    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                print("Unable to capture video frame")
                break

            # Convert to grayscale and binarize
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

            # Find contours
            contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

            for cnt in contours:
                x, y, w, h = cv2.boundingRect(cnt)
                if 20 < w < 200 and 30 < h < 200:  # Filter out small regions
                    roi = frame[y:y+h, x:x+w]
                    digit = recognizer.predict(roi)  # Predict the digit
                    
                    # Draw the results
                    cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
                    cv2.putText(frame, str(digit), (x, y-10), 
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

            cv2.imshow("Digit Recognition (OpenCV DNN)", frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    except KeyboardInterrupt:
        print("Program interrupted by user")
    finally:
        cap.release()
        cv2.destroyAllWindows()
        print("Resources released")

if __name__ == "__main__":
    main()

识别速度较快，识别效果较 OpenCV 的方案有所提升

替换 onnx 模型，识别准确度依然有限

分析

识别效果与模型、图像分辨率、设定识别范围、分辨率设定等因素有关，具体应用需要进行参数校订。

总结

本文介绍了昉·星光2单板计算机实现 数字识别 的项目设计，为该开发板在人工智能领域的应用和快速开发提供了参考。

全部评论

暂无评论

0/144