OpenCV的基础光学字符识别(Basic OCR in OpenCV)

本站另外篇就是这个的源码。opencv 写的 OCR
basic painterthe basic pattern recognition and classification with openCV 两个教程。




特征提取(Feature extraction):在这个模块我们转换我们处理后的图像为一个特征向量以便于分类,它可能是像素矩阵转换成向量或者获取轮廓编码链的数据表示。



void findX(IplImage * imgSrc, int * min, int * max) {
    int i;
    int minFound = 0;
    CvMat data;
    CvScalar maxVal = cvRealScalar(imgSrc - > width * 255);
    CvScalar val = cvRealScalar(0);
    //For each col sum, if sum < width*255 then we find the min
    //then continue to end to search the max, if sum< width*255 then is new max
    for (i = 0; i < imgSrc - > width; i++) {
        cvGetCol(imgSrc, & data, i);
        val = cvSum( & data);
        if (val.val[0] < maxVal.val[0]) { *
            max = i;
            if (!minFound) { *
                min = i;
                minFound = 1;
void findY(IplImage * imgSrc, int * min, int * max) {
    int i;
    int minFound = 0;
    CvMat data;
    CvScalar maxVal = cvRealScalar(imgSrc - > width * 255);
    CvScalar val = cvRealScalar(0);
    //For each col sum, if sum < width*255 then we find the min
    //then continue to end to search the max, if sum< width*255 then is new max
    for (i = 0; i < imgSrc - > height; i++) {
        cvGetRow(imgSrc, & data, i);
        val = cvSum( & data);
        if (val.val[0] < maxVal.val[0]) { *
            max = i;
            if (!minFound) { *
                min = i;
                minFound = 1;
CvRect findBB(IplImage * imgSrc) {
    CvRect aux;
    int xmin, xmax, ymin, ymax;
    xmin = xmax = ymin = ymax = 0;
    findX(imgSrc, & xmin, & xmax);
    findY(imgSrc, & ymin, & ymax);
    aux = cvRect(xmin, ymin, xmax - xmin, ymax - ymin);
    //printf("BB: %d,%d - %d,%d\n", aux.x, aux.y, aux.width, aux.height);
    return aux;
IplImage preprocessing(IplImage * imgSrc, int new_width, int new_height) {
    IplImage * result;
    IplImage * scaledResult;
    CvMat data;
    CvMat dataA;
    CvRect bb; //bounding box
    CvRect bba; //boundinb box maintain aspect ratio
    //Find bounding box
    bb = findBB(imgSrc);
    //Get bounding box data and no with aspect ratio, the x and y can be corrupted
    cvGetSubRect(imgSrc, & data, cvRect(bb.x, bb.y, bb.width, bb.height));
    //Create image with this data with width and height with aspect ratio 1
    //then we get highest size betwen width and height of our bounding box
    int size = (bb.width > bb.height) ? bb.width : bb.height;
    result = cvCreateImage(cvSize(size, size), 8, 1);
    cvSet(result, CV_RGB(255, 255, 255), NULL);
    //Copy de data in center of image
    int x = (int) floor((float)(size - bb.width) / 2.0 f);
    int y = (int) floor((float)(size - bb.height) / 2.0 f);
    cvGetSubRect(result, & dataA, cvRect(x, y, bb.width, bb.height));
    cvCopy( & data, & dataA, NULL);
    //Scale result
    scaledResult = cvCreateImage(cvSize(new_width, new_height), 8, 1);
    cvResize(result, scaledResult, CV_INTER_NN);
    //Return processed data
    return *scaledResult;

basicOCR.cpp 获取数据代码:
void basicOCR::getData() {
    IplImage * src_image;
    IplImage prs_image;
    CvMat row, data;
    char file[255];
    int i, j;
    for (i = 0; i < classes; i++) {
        for (j = 0; j < train_samples; j++) {
            //Load file
            if (j < 10)
                sprintf(file, "%s%d/%d0%d.pbm", file_path, i, i, j);
                sprintf(file, "%s%d/%d%d.pbm", file_path, i, i, j);
            src_image = cvLoadImage(file, 0);
            if (!src_image) {
                printf("Error: Cant load image %s\n", file);
            //process file
            prs_image = preprocessing(src_image, size, size);
            //Set class label
            cvGetRow(trainClasses, & row, i * train_samples + j);
            cvSet( & row, cvRealScalar(i));
            //Set data
            cvGetRow(trainData, & row, i * train_samples + j);
            IplImage * img = cvCreateImage(cvSize(size, size), IPL_DEPTH_32F, 1);
            //convert 8 bits image to 32 float image
            cvConvertScale( & prs_image, img, 0.0039215, 0);
            cvGetSubRect(img, & data, cvRect(0, 0, size, size));
            CvMat row_header, * row1;
            //convert data matrix sizexsize to vecor
            row1 = cvReshape( & data, & row_header, 0, 1);
            cvCopy(row1, & row, NULL);

knn=new CvKNearest( trainData, trainClasses, 0, false, K );

void basicOCR::test() {
    IplImage * src_image;
    IplImage prs_image;
    CvMat row, data;
    char file[255];
    int i, j;
    int error = 0;
    int testCount = 0;
    for (i = 0; i < classes; i++) {
        for (j = 50; j < 50 + train_samples; j++) {
            sprintf(file, "%s%d/%d%d.pbm", file_path, i, i, j);
            src_image = cvLoadImage(file, 0);
            if (!src_image) {
                printf("Error: Cant load image %s\n", file);
            //process file
            prs_image = preprocessing(src_image, size, size);
            float r = classify( & prs_image, 0);
            if ((int) r != i)
    float totalerror = 100 * (float) error / (float) testCount;
    printf("System Error: %.2f%%\n", totalerror);

float basicOCR::classify(IplImage * img, int showResult) {
    IplImage prs_image;
    CvMat data;
    CvMat * nearest = cvCreateMat(1, K, CV_32FC1);
    float result;
    //process file
    prs_image = preprocessing(img, size, size);
    //Set data
    IplImage * img32 = cvCreateImage(cvSize(size, size), IPL_DEPTH_32F, 1);
    cvConvertScale( & prs_image, img32, 0.0039215, 0);
    cvGetSubRect(img32, & data, cvRect(0, 0, size, size));
    CvMat row_header, * row1;
    row1 = cvReshape( & data, & row_header, 0, 1);
    result = knn - > find_nearest(row1, K, 0, 0, nearest, 0);
    int accuracy = 0;
    for (int i = 0; i < K; i++) {
        if (nearest - > data.fl[i] == result)
    float pre = 100 * ((float) accuracy / (float) K);
    if (showResult == 1) {
        printf("|\t%.0f\t| \t%.2f%%  \t| \t%d of %d \t| \n", result, pre, accuracy, K);
        printf(" ---------------------------------------------------------------\n");
    return result;


1.knn,即K最邻近结点算法(k-Nearest Neighbor algorithm),最简单的机器学习算法之一,简单说就是在特征空间里找到周围最近的k个样本,如果这k个样本中的大多数属于某个类,则该样本也属于这个类。


