说明
基于开源项目 https://github.com/sdcb/PaddleSharp
VS2022+.net4.8+ OpenCvSharp4+Sdcb.PaddleInference
效果
项目
代码
using Sdcb.PaddleInference.Native;
using Sdcb.PaddleInference;
using System;
using System.Collections.Generic;
using OpenCvSharp.Extensions;
using OpenCvSharp;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Runtime.InteropServices;
using System.Globalization;
using System.IO;
namespace PaddleInference_OCR识别
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
Bitmap bmp;
string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";
string img = "";
string startupPath = "";
int MaxSize = 1536;
float? BoxThreshold = 0.3f;
float? BoxScoreThreahold = 0.7f;
int? DilatedSize = 2;
int MinSize = 3;
float UnclipRatio = 2.0f;
Mat src;
PaddlePredictor det_predictor;
RotatedRect[] rects;
bool Enable180Classification { get; set; } = true;
bool AllowRotateDetection { get; set; } = true;
double RotateThreshold { get; } = 0.75;
Mat[] mats;
PaddlePredictor cls_predictor;
OcrShape recShape = new OcrShape(3, 320, 48);
PaddlePredictor rec_predictor;
public IReadOnlyList<string> Labels;
DateTime dt1 = DateTime.Now;
DateTime dt2 = DateTime.Now;
private unsafe void Form1_Load(object sender, EventArgs e)
{
string startupPath = Application.StartupPath;
IntPtr det_ptr = PaddleNative.PD_ConfigCreate();
Encoding PaddleEncoding = Environment.OSVersion.Platform == PlatformID.Win32NT ? Encoding.GetEncoding(CultureInfo.CurrentCulture.TextInfo.ANSICodePage) : Encoding.UTF8;
//检测模型路径
String det_programPath = startupPath + "\\ch_PP-OCRv3_det\\inference.pdmodel";
String det_paramsPath = startupPath + "\\ch_PP-OCRv3_det\\inference.pdiparams";
//方式一
//byte[] programBytes = PaddleEncoding.GetBytes(det_programPath);
//byte[] paramsBytes = PaddleEncoding.GetBytes(det_paramsPath);
//fixed (byte* programPtr = programBytes)
//fixed (byte* paramsPtr = paramsBytes)
//{
// PaddleNative.PD_ConfigSetModel(det_ptr, (IntPtr)programPtr, (IntPtr)paramsPtr);
//}
//方式二 读自己加密后的模型文件,解密后写入byte[]
Stream Steam = new FileStream(det_programPath, FileMode.Open, FileAccess.Read, FileShare.Read);
byte[] programBuffer = new byte[Steam.Length];
Steam.Read(programBuffer, 0, programBuffer.Length);
Steam = new FileStream(det_paramsPath, FileMode.Open, FileAccess.Read, FileShare.Read);
byte[] paramsBuffer = new byte[Steam.Length];
Steam.Read(paramsBuffer, 0, paramsBuffer.Length);
fixed (byte* pprogram = programBuffer)
fixed (byte* pparams = paramsBuffer)
{
PaddleNative.PD_ConfigSetModelBuffer(det_ptr,
(IntPtr)pprogram, programBuffer.Length,
(IntPtr)pparams, paramsBuffer.Length);
}
det_predictor = new PaddlePredictor(PaddleNative.PD_PredictorCreate(det_ptr));
//方向分类模型
IntPtr cls_ptr = PaddleNative.PD_ConfigCreate();
String cls_programPath = startupPath + "\\ch_ppocr_mobile_v2.0_cls\\inference.pdmodel";
String cls_paramsPath = startupPath + "\\ch_ppocr_mobile_v2.0_cls\\inference.pdiparams";
byte[] programBytes = PaddleEncoding.GetBytes(cls_programPath);
byte[] paramsBytes = PaddleEncoding.GetBytes(cls_paramsPath);
fixed (byte* programPtr = programBytes)
fixed (byte* paramsPtr = paramsBytes)
{
PaddleNative.PD_ConfigSetModel(cls_ptr, (IntPtr)programPtr, (IntPtr)paramsPtr);
}
cls_predictor = new PaddlePredictor(PaddleNative.PD_PredictorCreate(cls_ptr));
//识别模型
IntPtr rec_ptr = PaddleNative.PD_ConfigCreate();
String rec_programPath = startupPath + "\\ch_PP-OCRv3_rec\\inference.pdmodel";
String rec_paramsPath = startupPath + "\\ch_PP-OCRv3_rec\\inference.pdiparams";
byte[] rec_programBytes = PaddleEncoding.GetBytes(rec_programPath);
byte[] rec_paramsBytes = PaddleEncoding.GetBytes(rec_paramsPath);
fixed (byte* rec_programPtr = rec_programBytes)
fixed (byte* rec_paramsPtr = rec_paramsBytes)
{
PaddleNative.PD_ConfigSetModel(rec_ptr, (IntPtr)rec_programPtr, (IntPtr)rec_paramsPtr);
}
rec_predictor = new PaddlePredictor(PaddleNative.PD_PredictorCreate(rec_ptr));
//Labels
String labelsPath = startupPath + "\\ppocr_keys.txt";
Steam = new FileStream(labelsPath, FileMode.Open, FileAccess.Read, FileShare.Read);
StreamReader reader = new StreamReader(Steam);
List<string> tempList = new List<string>();
while (!reader.EndOfStream)
{
tempList.Add(reader.ReadLine());
}
reader.Dispose();
Steam.Dispose();
Labels = tempList;
}
private void button1_Click(object sender, EventArgs e)
{
OpenFileDialog ofd = new OpenFileDialog();
ofd.Filter = fileFilter;
if (ofd.ShowDialog() != DialogResult.OK) return;
pictureBox1.Image = null;
img = ofd.FileName;
bmp = new Bitmap(img);
pictureBox1.Image = new Bitmap(img);
textBox1.Text = "";
}
private void button2_Click(object sender, EventArgs e)
{
textBox1.Text = "";
Application.DoEvents();
if (img == "")
{
return;
}
dt1 = DateTime.Now;
src = Cv2.ImRead(img);
Mat resized = MatResize(src, MaxSize);
//Cv2.ImShow("resized", resized);
Mat padded = MatPadding32(resized);
//Cv2.ImShow("padded", padded);
Mat normalized = Normalize(padded);
Cv2.ImShow("normalized", normalized);
OpenCvSharp.Size resizedSize = resized.Size();
using (PaddleTensor input = det_predictor.GetInputTensor(det_predictor.InputNames[0]))
{
input.Shape = new[] { 1, 3, normalized.Rows, normalized.Cols };
float[] setData = ExtractMat(normalized);
input.SetData(setData);
}
if (!det_predictor.Run())
{
throw new Exception("PaddlePredictor(Detector) run failed.");
}
using (PaddleTensor output = det_predictor.GetOutputTensor(det_predictor.OutputNames[0]))
{
float[] data = output.GetData<float>();
int[] shape = output.Shape;
Mat pred = new Mat(shape[2], shape[3], MatType.CV_32FC1, data);
Cv2.ImShow("pred", pred);
Mat cbuf = new Mat();
Mat roi = pred[0, resizedSize.Height, 0, resizedSize.Width];
roi.ConvertTo(cbuf, MatType.CV_8UC1, 255);
//Cv2.ImShow("roi", roi);
Mat dilated = new Mat();
Mat binary = BoxThreshold != null ?
cbuf.Threshold((int)(BoxThreshold * 255), 255, ThresholdTypes.Binary) :
cbuf;
//Cv2.ImShow("binary", binary);
if (DilatedSize != null)
{
Mat ones = Cv2.GetStructuringElement(MorphShapes.Rect, new OpenCvSharp.Size(DilatedSize.Value, DilatedSize.Value));
Cv2.Dilate(binary, dilated, ones);
ones.Dispose();
}
else
{
Cv2.CopyTo(binary, dilated);
}
//Cv2.ImShow("dilated", dilated);
OpenCvSharp.Point[][] contours = dilated.FindContoursAsArray(RetrievalModes.List, ContourApproximationModes.ApproxSimple);
OpenCvSharp.Size size = src.Size();
double scaleRate = 1.0 * src.Width / resizedSize.Width;
rects = contours
.Where(x => BoxScoreThreahold == null || GetScore(x, pred) > BoxScoreThreahold)
.Select(x => Cv2.MinAreaRect(x))
.Where(x => x.Size.Width > MinSize && x.Size.Height > MinSize)
.Select(rect =>
{
float minEdge = Math.Min(rect.Size.Width, rect.Size.Height);
Size2f newSize = new Size2f(
(rect.Size.Width + UnclipRatio * minEdge) * scaleRate,
(rect.Size.Height + UnclipRatio * minEdge) * scaleRate);
RotatedRect largerRect = new RotatedRect(rect.Center * scaleRate, newSize, rect.Angle);
return largerRect;
})
.OrderBy(v => v.Center.Y)
.ThenBy(v => v.Center.X)
.ToArray();
binary.Dispose();
roi.Dispose();
cbuf.Dispose();
pred.Dispose();
dilated.Dispose();
dt2 = DateTime.Now;
StringBuilder sb = new StringBuilder();
sb.AppendLine("-----------------------------------\n");
sb.AppendLine("耗时:" + (dt2 - dt1).TotalMilliseconds + "ms\n");
textBox1.Text = sb.ToString();
//绘图
Mat src2 = Cv2.ImRead(img);
for (int i = 0; i < rects.Length; i++)
{
Scalar scalar = Scalar.RandomColor();
List<OpenCvSharp.Point> temp = new List<OpenCvSharp.Point>();
foreach (var item2 in rects[i].Points())
{
temp.Add(new OpenCvSharp.Point(item2.X, item2.Y));
}
List<List<OpenCvSharp.Point>> lltemp = new List<List<OpenCvSharp.Point>>();
lltemp.Add(temp);
Cv2.Polylines(src2, lltemp, true, scalar);
}
if (pictureBox1.Image != null)
{
pictureBox1.Image.Dispose();
}
pictureBox1.Image = BitmapConverter.ToBitmap(src2);
src2.Dispose();
}
}
private float GetScore(OpenCvSharp.Point[] contour, Mat pred)
{
int width = pred.Width;
int height = pred.Height;
int[] boxX = contour.Select(v => v.X).ToArray();
int[] boxY = contour.Select(v => v.Y).ToArray();
int xmin = Clamp(boxX.Min(), 0, width - 1);
int xmax = Clamp(boxX.Max(), 0, width - 1);
int ymin = Clamp(boxY.Min(), 0, height - 1);
int ymax = Clamp(boxY.Max(), 0, height - 1);
OpenCvSharp.Point[] rootPoints = contour
.Select(v => new OpenCvSharp.Point(v.X - xmin, v.Y - ymin))
.ToArray();
Mat mask = new Mat(ymax - ymin + 1, xmax - xmin + 1, MatType.CV_8UC1, Scalar.Black);
mask.FillPoly(new[] { rootPoints }, new Scalar(1));
Mat croppedMat = pred[ymin, ymax + 1, xmin, xmax + 1];
float score = (float)croppedMat.Mean(mask).Val0;
return score;
}
public int Clamp(int val, int min, int max)
{
if (val < min)
{
return min;
}
else if (val > max)
{
return max;
}
return val;
}
float[] ExtractMat(Mat src)
{
int rows = src.Rows;
int cols = src.Cols;
float[] array = new float[rows * cols * 3];
GCHandle gCHandle = default(GCHandle);
try
{
gCHandle = GCHandle.Alloc(array, GCHandleType.Pinned);
IntPtr intPtr = gCHandle.AddrOfPinnedObject();
for (int i = 0; i < src.Channels(); i++)
{
Mat dest = new Mat(rows, cols, MatType.CV_32FC1, intPtr + i * rows * cols * 4, 0L);
Cv2.ExtractChannel(src, dest, i);
dest.Dispose();
}
return array;
}
finally
{
gCHandle.Free();
}
}
private Mat MatResize(Mat src, int? maxSize)
{
if (maxSize == null) return src.Clone();
OpenCvSharp.Size size = src.Size();
int longEdge = Math.Max(size.Width, size.Height);
double scaleRate = 1.0 * maxSize.Value / longEdge;
return scaleRate < 1.0 ?
src.Resize(OpenCvSharp.Size.Zero, scaleRate, scaleRate) :
src.Clone();
}
private Mat MatPadding32(Mat src)
{
OpenCvSharp.Size size = src.Size();
OpenCvSharp.Size newSize = new OpenCvSharp.Size(
32 * Math.Ceiling(1.0 * size.Width / 32),
32 * Math.Ceiling(1.0 * size.Height / 32));
return src.CopyMakeBorder(0, newSize.Height - size.Height, 0, newSize.Width - size.Width, BorderTypes.Constant, Scalar.Black);
}
private Mat Normalize(Mat src)
{
Mat normalized = new Mat();
src.ConvertTo(normalized, MatType.CV_32FC3, 1.0 / 255);
Mat[] bgr = normalized.Split();
float[] scales = new[] { 1 / 0.229f, 1 / 0.224f, 1 / 0.225f };
float[] means = new[] { 0.485f, 0.456f, 0.406f };
for (int i = 0; i < bgr.Length; ++i)
{
bgr[i].ConvertTo(bgr[i], MatType.CV_32FC1, 1.0 * scales[i], (0.0 - means[i]) * scales[i]);
}
normalized.Dispose();
Mat dest = new Mat();
Cv2.Merge(bgr, dest);
foreach (Mat channel in bgr)
{
channel.Dispose();
}
return dest;
}
private Mat GetRotateCropImage(Mat src, RotatedRect rect)
{
bool wider = rect.Size.Width > rect.Size.Height;
float angle = rect.Angle;
OpenCvSharp.Size srcSize = src.Size();
Rect boundingRect = rect.BoundingRect();
int expTop = Math.Max(0, 0 - boundingRect.Top);
int expBottom = Math.Max(0, boundingRect.Bottom - srcSize.Height);
int expLeft = Math.Max(0, 0 - boundingRect.Left);
int expRight = Math.Max(0, boundingRect.Right - srcSize.Width);
Rect rectToExp = boundingRect + new OpenCvSharp.Point(expTop, expLeft);
Rect roiRect = Rect.FromLTRB(
boundingRect.Left + expLeft,
boundingRect.Top + expTop,
boundingRect.Right - expRight,
boundingRect.Bottom - expBottom);
Mat boundingMat = src[roiRect];
Mat expanded = boundingMat.CopyMakeBorder(expTop, expBottom, expLeft, expRight, BorderTypes.Replicate);
Point2f[] rp = rect.Points()
.Select(v => new Point2f(v.X - rectToExp.X, v.Y - rectToExp.Y))
.ToArray();
Point2f[] srcPoints = new[] { rp[0], rp[3], rp[2], rp[1] };
if (wider == true && angle >= 0 && angle < 45)
{
srcPoints = new[] { rp[1], rp[2], rp[3], rp[0] };
}
var ptsDst0 = new Point2f(0, 0);
var ptsDst1 = new Point2f(rect.Size.Width, 0);
var ptsDst2 = new Point2f(rect.Size.Width, rect.Size.Height);
var ptsDst3 = new Point2f(0, rect.Size.Height);
Mat matrix = Cv2.GetPerspectiveTransform(srcPoints, new[] { ptsDst0, ptsDst1, ptsDst2, ptsDst3 });
Mat dest = expanded.WarpPerspective(matrix, new OpenCvSharp.Size(rect.Size.Width, rect.Size.Height), InterpolationFlags.Nearest, BorderTypes.Replicate);
if (!wider)
{
Cv2.Transpose(dest, dest);
}
else if (angle > 45)
{
Cv2.Flip(dest, dest, FlipMode.X);
}
boundingMat.Dispose();
expanded.Dispose();
matrix.Dispose();
return dest;
}
private Rect GetCropedRect(Rect rect, OpenCvSharp.Size size)
{
return Rect.FromLTRB(
Clamp(rect.Left, 0, size.Width),
Clamp(rect.Top, 0, size.Height),
Clamp(rect.Right, 0, size.Width),
Clamp(rect.Bottom, 0, size.Height));
}
private void Form1_FormClosing(object sender, FormClosingEventArgs e)
{
det_predictor.Dispose();
rec_predictor.Dispose();
}
/// <summary>
/// 方向检测
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void button4_Click(object sender, EventArgs e)
{
textBox1.Text = "";
Application.DoEvents();
if (rects == null)
{
return;
}
if (rects == null)
{
return;
}
dt1 = DateTime.Now;
mats =
rects.Select(rect =>
{
Mat roi = AllowRotateDetection ? GetRotateCropImage(src, rect) : src[GetCropedRect(rect.BoundingRect(), src.Size())];
return Enable180Classification ? CLSPredictorRun(roi) : roi;
})
.ToArray();
dt2 = DateTime.Now;
StringBuilder sb = new StringBuilder();
sb.AppendLine("-----------------------------------\n");
sb.AppendLine("耗时:" + (dt2 - dt1).TotalMilliseconds + "ms\n");
textBox1.Text = sb.ToString();
}
private void button3_Click(object sender, EventArgs e)
{
textBox1.Text = "";
Application.DoEvents();
if (rects == null || mats == null)
{
return;
}
dt1 = DateTime.Now;
try
{
int recognizeBatchSize = 0;
//Cv2.ImShow("mats", mats[0]);
PaddleOcrRecognizerResult[] porr = RecognizerRun(mats, recognizeBatchSize);
dt2 = DateTime.Now;
StringBuilder sb = new StringBuilder();
for (int i = 0; i < porr.Length; i++)
{
sb.AppendLine("("+ porr[i].Score+")"+ porr[i].Text );
}
sb.AppendLine("-----------------------------------\n");
sb.AppendLine("耗时:" + (dt2 - dt1).TotalMilliseconds + "ms\n");
textBox1.Text = sb.ToString();
}
finally
{
foreach (Mat mat in mats)
{
mat.Dispose();
}
}
}
public PaddleOcrRecognizerResult[] RecognizerRun(Mat[] srcs, int batchSize = 0)
{
if (srcs.Length == 0)
{
return new PaddleOcrRecognizerResult[0];
}
int chooseBatchSize = batchSize != 0 ? batchSize : Math.Min(8, Environment.ProcessorCount);
PaddleOcrRecognizerResult[] allResult = new PaddleOcrRecognizerResult[srcs.Length];
return srcs
.Select((x, i) => (mat: x, i))
.OrderBy(x => x.mat.Width)
.Chunk(chooseBatchSize)
.Select(x => (result: RunMulti(x.Select(x2 => x2.mat).ToArray()), ids: x.Select(x2 => x2.i).ToArray()))
.SelectMany(x => x.result.Zip(x.ids, (result, i) => (result, i)))
.OrderBy(x => x.i)
.Select(x => x.result)
.ToArray();
}
private Mat ResizePadding(Mat src, int height, int targetWidth)
{
OpenCvSharp.Size size = src.Size();
float whRatio = 1.0f * size.Width / size.Height;
int width = (int)Math.Ceiling(height * whRatio);
if (width == targetWidth)
{
return src.Resize(new OpenCvSharp.Size(width, height));
}
else
{
//using Mat resized = src.Resize(new OpenCvSharp.Size(width, height));
Mat resized = src.Resize(new OpenCvSharp.Size(width, height));
return resized.CopyMakeBorder(0, 0, 0, targetWidth - width, BorderTypes.Constant, Scalar.Gray);
}
}
private PaddleOcrRecognizerResult Run(Mat src) => RunMulti(new[] { src }).Single();
private PaddleOcrRecognizerResult[] RunMulti(Mat[] srcs)
{
if (srcs.Length == 0)
{
return new PaddleOcrRecognizerResult[0];
}
for (int i = 0; i < srcs.Length; ++i)
{
Mat src = srcs[i];
if (src.Empty())
{
throw new ArgumentException($"src[{i}] size should not be 0, wrong input picture provided?");
}
}
int modelHeight = recShape.Height;
int maxWidth = (int)Math.Ceiling(srcs.Max(src =>
{
OpenCvSharp.Size size = src.Size();
return 1.0 * size.Width / size.Height * modelHeight;
}));
int index = 0;
Mat[] normalizeds = srcs
.Select(src =>
{
Mat channel3 = new Mat();
if (src.Channels() == 4)
{
channel3 = src.CvtColor(ColorConversionCodes.RGBA2BGR);
}
else if (src.Channels() == 3)
{
channel3 = src.Clone();
}
else if (src.Channels() == 1)
{
channel3 = src.CvtColor(ColorConversionCodes.GRAY2RGB);
}
else
{
throw new Exception("Unexpect src channel: {" + src.Channels() + "}, allow: (1/3/4)");
}
Mat resized = ResizePadding(channel3, modelHeight, maxWidth);
Cv2.ImShow("resized"+index.ToString(), resized);
index++;
channel3.Dispose();
return Normalize(resized);
})
.ToArray();
using (PaddleTensor input = rec_predictor.GetInputTensor(rec_predictor.InputNames[0]))
{
int channel = normalizeds[0].Channels();
input.Shape = new[] { normalizeds.Length, channel, modelHeight, maxWidth };
float[] data = ExtractMat(normalizeds, channel, modelHeight, maxWidth);
input.SetData(data);
}
if (!rec_predictor.Run())
{
throw new Exception($"PaddlePredictor(Recognizer) run failed.");
}
using (PaddleTensor output = rec_predictor.GetOutputTensor(rec_predictor.OutputNames[0]))
{
float[] data = output.GetData<float>();
int[] shape = output.Shape;
GCHandle dataHandle = default;
try
{
dataHandle = GCHandle.Alloc(data, GCHandleType.Pinned);
IntPtr dataPtr = dataHandle.AddrOfPinnedObject();
int labelCount = shape[2];
int charCount = shape[1];
return Enumerable.Range(0, shape[0])
.Select(i =>
{
StringBuilder sb = new StringBuilder();
int lastIndex = 0;
float score = 0;
for (int n = 0; n < charCount; ++n)
{
Mat mat = new Mat(1, labelCount, MatType.CV_32FC1, dataPtr + (n + i * charCount) * labelCount * sizeof(float));
int[] maxIdx = new int[2];
mat.MinMaxIdx(out double _, out double maxVal, new int[0], maxIdx);
if (maxIdx[1] > 0 && (!(n > 0 && maxIdx[1] == lastIndex)))
{
score += (float)maxVal;
sb.Append(GetLabelByIndex(maxIdx[1]));
}
lastIndex = maxIdx[1];
mat.Dispose();
}
return new PaddleOcrRecognizerResult(sb.ToString(), score / sb.Length);
})
.ToArray();
}
finally
{
dataHandle.Free();
}
}
}
private float[] ExtractMat(Mat[] srcs, int channel, int height, int width)
{
float[] result = new float[srcs.Length * channel * width * height];
GCHandle resultHandle = GCHandle.Alloc(result, GCHandleType.Pinned);
IntPtr resultPtr = resultHandle.AddrOfPinnedObject();
try
{
for (int i = 0; i < srcs.Length; ++i)
{
Mat src = srcs[i];
if (src.Channels() != channel)
{
throw new Exception($"src[{i}] channel={src.Channels()}, expected {channel}");
}
for (int c = 0; c < channel; ++c)
{
//using Mat dest = new Mat(height, width, MatType.CV_32FC1, resultPtr + (c + i * channel) * height * width * sizeof(float));
Mat dest = new Mat(height, width, MatType.CV_32FC1, resultPtr + (c + i * channel) * height * width * sizeof(float));
Cv2.ExtractChannel(src, dest, c);
//dest.Dispose();
}
}
return result;
}
finally
{
resultHandle.Free();
}
}
string GetLabelByIndex(int x)
{
if (x > 0 && x <= Labels.Count)
{
return Labels[x - 1];
}
else if (x == Labels.Count + 1)
{
return "";
}
else
{
throw new Exception("Unable to GetLabelByIndex: index {" + x + "} out of range {" + Labels.Count + "}, OCR model or labels not matched?");
}
}
private Mat ResizePadding(Mat src, OcrShape shape)
{
OpenCvSharp.Size srcSize = src.Size();
Mat roi = srcSize.Width / srcSize.Height > shape.Width / shape.Height ?
src[0, srcSize.Height, 0, (int)Math.Floor(1.0 * srcSize.Height * shape.Width / shape.Height)] :
src.Clone();
double scaleRate = 1.0 * shape.Height / srcSize.Height;
Mat resized = roi.Resize(new OpenCvSharp.Size(Math.Floor(roi.Width * scaleRate), shape.Height));
if (resized.Width < shape.Width)
{
Cv2.CopyMakeBorder(resized, resized, 0, 0, 0, shape.Width - resized.Width, BorderTypes.Constant, Scalar.Black);
}
roi.Dispose();
return resized;
}
public Mat CLSPredictorRun(Mat src)
{
if (src.Empty())
{
throw new ArgumentException("src size should not be 0, wrong input picture provided?");
}
if (!(src.Channels() == 3 || src.Channels() == 1))
{
throw new NotSupportedException($"{nameof(src)} channel must be 3 or 1, provided {src.Channels()}.");
}
if (ShouldRotate180(src))
{
Cv2.Rotate(src, src, RotateFlags.Rotate180);
Console.WriteLine("ShouldRotate180:True");
return src;
}
else
{
Console.WriteLine("ShouldRotate180:Flase");
return src;
}
}
public bool ShouldRotate180(Mat src)
{
if (src.Empty())
{
throw new ArgumentException("src size should not be 0, wrong input picture provided?");
}
if (!(src.Channels() == 3 || src.Channels() == 1))
{
throw new NotSupportedException($"{nameof(src)} channel must be 3 or 1, provided {src.Channels()}.");
}
//The default OcrShape used in the classification model
OcrShape shape = new OcrShape(3, 192, 48);
Mat resized = ResizePadding(src, shape);
Mat normalized = Normalize(resized);
using (PaddleTensor input = cls_predictor.GetInputTensor(cls_predictor.InputNames[0]))
{
input.Shape = new[] { 1, 3, normalized.Rows, normalized.Cols };
float[] data = ExtractMat(normalized);
input.SetData(data);
}
if (!cls_predictor.Run())
{
throw new Exception("PaddlePredictor(Classifier) run failed.");
}
using (PaddleTensor output = cls_predictor.GetOutputTensor(cls_predictor.OutputNames[0]))
{
float[] softmax = output.GetData<float>();
float score = 0;
int label = 0;
for (int i = 0; i < softmax.Length; ++i)
{
if (softmax[i] > score)
{
score = softmax[i];
label = i;
}
}
resized.Dispose();
normalized.Dispose();
return label % 2 == 1 && score > RotateThreshold;
}
}
}
}
Demo下载