使用Tesseract文字提取之前,我们可以先使用opencv处理一下图片。最简单的例子如下所示:
//识别
void _characterIdentify()
{string path = "C:/Users/Desktop/11/9.jpg";
Mat img = imread(path); cv::Mat gray, binary;
// 转换为灰度图像
cv::cvtColor(img, gray, cv::COLOR_BGR2GRAY);
cv::threshold(gray, binary, 0, 255, cv::THRESH_BINARY | cv::THRESH_OTSU); // 反转二进制图像,得到文字区域
cv::bitwise_not(binary, binary); //imshow("binary", binary);
//waitKey(0); tesseract::TessBaseAPI ocr;
int ret = ocr.Init("E:\\qt\\Project\\TesseractSample\\Debug\\tessdata", "chi_sim");
ocr.SetImage(binary.data, binary.cols, binary.rows, 1, binary.step1()); char* outText = ocr.GetUTF8Text();
std::string strrrr = Coding::UTF8ToGB2312(outText); std::cout << strrrr;
delete[] outText;
ocr.End();}