使用Tesseract文字提取之前,我们可以先使用opencv处理一下图片。最简单的例子如下所示:

//识别
 void _characterIdentify()
 {string path = "C:/Users/Desktop/11/9.jpg";
     Mat img = imread(path);    cv::Mat gray, binary;
     // 转换为灰度图像
     cv::cvtColor(img, gray, cv::COLOR_BGR2GRAY);
     cv::threshold(gray, binary, 0, 255, cv::THRESH_BINARY | cv::THRESH_OTSU);    // 反转二进制图像,得到文字区域
     cv::bitwise_not(binary, binary);    //imshow("binary", binary);
     //waitKey(0);    tesseract::TessBaseAPI ocr;
     int ret = ocr.Init("E:\\qt\\Project\\TesseractSample\\Debug\\tessdata", "chi_sim");
     ocr.SetImage(binary.data, binary.cols, binary.rows, 1, binary.step1());    char* outText = ocr.GetUTF8Text();
     std::string strrrr = Coding::UTF8ToGB2312(outText);    std::cout << strrrr;
     delete[] outText;
     ocr.End();}