C#用Tesseract进行OCR识别,可识别中英日韩所有语言
源码下载:https://download.****.net/download/horseroll/10739546 源码下包含部分语言包,所以文件比较大
先上效果图。测试中文英文日语识别,其他语言也都行,只要下载相应的语言包,操作使用后面都有讲
1.首先在Nuget中搜索Tesseract,下载到项目中
2.下载相应的语言包放至Debug/tessdata文件夹下,Tesseract语言包下载地址:https://github.com/tesseract-ocr/tesseract/wiki/Data-Files#data-files-for-version-302
3.代码操作
首先先初始化类,设置语言
TesseractEngine ocr;
ocr = new TesseractEngine("./tessdata", "chi_sim");//设置语言 中文
//ocr = new TesseractEngine("./tessdata", "eng", EngineMode.TesseractAndCube);//设置语言 英文
//ocr = new TesseractEngine("./tessdata", "jpn");//设置语言 日语
导入图片进行识别
Bitmap bit = new Bitmap(Image.FromFile(filename.FileName.ToString()));
//bit = PreprocesImage(bit);//进行图像处理,如果识别率低可试试
Page page = ocr.Process(bit);
string str = page.GetText();//识别后的内容
page.Dispose();
图片处理算法,如果是识别数字,识别率低可以试试这个方法
/// <summary>
/// 图片颜色区分,剩下白色和黑色
/// </summary>
/// <param name="image"></param>
/// <returns></returns>
private Bitmap PreprocesImage(Bitmap image)
{
//You can change your new color here. Red,Green,LawnGreen any..
Color actualColor;
//make an empty bitmap the same size as scrBitmap
image = ResizeImage(image, image.Width * 5, image.Height * 5);
//image.Save(@"D:\UpWork\OCR_WinForm\Preprocess_Resize.jpg");
Bitmap newBitmap = new Bitmap(image.Width, image.Height);
for (int i = 0; i < image.Width; i++)
{
for (int j = 0; j < image.Height; j++)
{
//get the pixel from the scrBitmap image
actualColor = image.GetPixel(i, j);
// > 150 because.. Images edges can be of low pixel colr. if we set all pixel color to new then there will be no smoothness left.
if (actualColor.R > 23 || actualColor.G > 23 || actualColor.B > 23)//在这里设置RGB
newBitmap.SetPixel(i, j, Color.White);
else
newBitmap.SetPixel(i, j, Color.Black);
}
}
return newBitmap;
}
/// <summary>
/// 调整图片大小和对比度
/// </summary>
/// <param name="image"></param>
/// <param name="width"></param>
/// <param name="height"></param>
/// <returns></returns>
private Bitmap ResizeImage(Image image, int width, int height)
{
var destRect = new Rectangle(0, 0, width, height);
var destImage = new Bitmap(width, height);
destImage.SetResolution(image.HorizontalResolution, image.VerticalResolution * 2);//2,3
//image.Save(@"D:\UpWork\OCR_WinForm\Preprocess_HighRes.jpg");
using (var graphics = Graphics.FromImage(destImage))
{
graphics.CompositingMode = CompositingMode.SourceOver;
graphics.CompositingQuality = CompositingQuality.HighQuality;
graphics.InterpolationMode = InterpolationMode.HighQualityBicubic;
graphics.SmoothingMode = SmoothingMode.HighQuality;
graphics.PixelOffsetMode = PixelOffsetMode.HighQuality;
using (var wrapMode = new ImageAttributes())
{
wrapMode.SetWrapMode(WrapMode.Clamp);
graphics.DrawImage(image, destRect, 0, 0, image.Width, image.Height, GraphicsUnit.Pixel, wrapMode);
}
}
return destImage;
}