C#使用‘百度AI接口’剔除多余字符
本文的代码出于:https://blog.****.net/a497785609/article/details/82690732
我只是整合并实验了一下方便大家掌握和理解,若有违规请联系删除。
所有代码(这是窗体应用程序):
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using System.Drawing.Imaging;
using System.IO;
using System.Net;
using System.Text.RegularExpressions;
using System.Web;
namespace TextIdentify
{
public partial class Form1 : Form
{
private static readonly string clientId = "你的AK";
private static readonly string clientSecret = "你的SK";
public static string Ocr(string filePath)
{
try
{
// HttpUtility 需要添加 system.web dll 的引用
string img = HttpUtility.UrlEncode(GetBase64FromImage(filePath));
string token = GetAccessToken();
token = new Regex(
"\"access_token\":\"(?<token>[^\"]*?)\"",
RegexOptions.CultureInvariant
| RegexOptions.Compiled
).Match(token).Groups["token"].Value.Trim();
//var url = "https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic";
string url = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic";
var list = new List<KeyValuePair<string, string>>
{
new KeyValuePair<string, string>("access_token", token),
new KeyValuePair<string, string>("image", img),
new KeyValuePair<string, string>("language_type", "CHN_ENG")
};
var data = new List<string>();
foreach (var pair in list)
data.Add(pair.Key + "=" + pair.Value);
string json = HttpPost(url, string.Join("&", data.ToArray()));
var regex = new Regex(
"\"words\": \"(?<word>[\\s\\S]*?)\"",
RegexOptions.CultureInvariant
| RegexOptions.Compiled
);
var str = new StringBuilder();
foreach (Match match in regex.Matches(json))
{
str.AppendLine(match.Groups["word"].Value.Trim());
}
return str.ToString();
}
catch (Exception ex)
{
return ex.Message;
}
}
public static string GetBase64FromImage(string imagefile)
{
string base64String;
try
{
byte[] arr;
using (var bmp = new Bitmap(imagefile))
{
using (var ms = new MemoryStream())
{
bmp.Save(ms, ImageFormat.Jpeg);
arr = new byte[ms.Length];
ms.Position = 0;
ms.Read(arr, 0, (int)ms.Length);
ms.Close();
}
}
base64String = Convert.ToBase64String(arr);
}
catch (Exception)
{
throw new Exception("Something wrong during convert!");
}
return base64String;
}
public static string GetAccessToken()
{
string url = "https://aip.baidubce.com/oauth/2.0/token";
var list = new List<KeyValuePair<string, string>>
{
new KeyValuePair<string, string>("grant_type", "client_credentials"),
new KeyValuePair<string, string>("client_id", clientId),
new KeyValuePair<string, string>("client_secret", clientSecret)
};
var data = new List<string>();
foreach (var pair in list)
data.Add(pair.Key + "=" + pair.Value);
return HttpGet(url, string.Join("&", data.ToArray()));
}
public static string HttpGet(string url, string data)
{
var request = (HttpWebRequest)WebRequest.Create(url + (data == "" ? "" : "?") + data);
request.Method = "GET";
request.ContentType = "text/html;charset=UTF-8";
using (var response = (HttpWebResponse)request.GetResponse())
{
Stream stream = response.GetResponseStream();
string s = null;
if (stream != null)
{
using (var reader = new StreamReader(stream, Encoding.GetEncoding("utf-8")))
{
s = reader.ReadToEnd();
reader.Close();
}
stream.Close();
}
return s;
}
}
public static string HttpPost(string url, string data)
{
var request = (HttpWebRequest)WebRequest.Create(url);
request.Method = "POST";
request.ContentType = "application/x-www-form-urlencoded";
request.ContentLength = Encoding.UTF8.GetByteCount(data);
Stream stream = request.GetRequestStream();
var writer = new StreamWriter(stream, Encoding.GetEncoding("gb2312"));
writer.Write(data);
writer.Close();
using (var response = (HttpWebResponse)request.GetResponse())
{
Stream res = response.GetResponseStream();
if (res != null)
{
var reader = new StreamReader(res, Encoding.GetEncoding("utf-8"));
string retString = reader.ReadToEnd();
reader.Close();
res.Close();
return retString;
}
}
return "";
}
/// <summary>
/// ///////////////////////////////////////////////////////////////////////////////
/// </summary>
public Form1()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
// 初始化
MessageBox.Show(Ocr("C:\\Users\\Simon\\Desktop\\Test.bmp"));
}
}
}
需要注意,HttpUtility 这个类需要在 ‘引用’里添加 System.Web 这个dll 。如果单单是using system.web 这样是不行的。
最后对比一下效果:(请忽略对话框样式的差异… 最后懒得移植了 哈哈)