如果想让粘贴也抓取图片,需要改造百度编辑器,我个人觉得不太好,修改了源码,你的编辑器就不太好升级了。有瑕疵,所以放弃了这个想法,打算换一个解决方案,在用户提交的时候,主动去处理文本中的图片内容。
1、用户写好内容,提交到后台之后,就是一堆html标签,而且是字符串形式的。
2、写好我们的抓取类,如下:
using HtmlAgilityPack; using System; using System.Collections.Generic; using System.IO; using System.Text; using System.Threading.Tasks; namespace ConsoleApp1.comm { /// <summary> ///抓取图片 /// </summary> public class CrawlerHelper { /// <summary> /// 抓取图片 /// </summary> /// <param name="html">The HTML.</param> /// <returns>System.String.</returns> public static async Task<string> CrawlerAsync(string html) { List<CrawImgView> imglst = new List<CrawImgView>(); HtmlDocument Doc = new HtmlDocument(); Doc.LoadHtml(html); var nodes = Doc.DocumentNode.SelectNodes("//img"); List<Task> tasks = new List<Task>(); foreach (var node in nodes) { string origimg = node.Attributes["src"].Value; //过滤一下已经处理的图片 if (origimg.IndexOf(".qiniu.com") == -1 && !imglst.Exists(t => t.Original == origimg)) { var task = Task.Run(() => { CrawImgView item = new CrawImgView(); item.Original = origimg; item.ImgPath = GetImg(node.Attributes["src"].Value); imglst.Add(item); }); tasks.Add(task); } } await Task.WhenAll(tasks); imglst.ForEach(t => { html = html.Replace(t.Original, t.ImgPath); }); return html; } /// <summary> ///下载图片 /// </summary> /// <param name="original">The original.</param> /// <returns>System.String.</returns> private static string GetImg(string original) { if (!string.IsNullOrWhiteSpace(original)) { if (original.IndexOf('?') > -1) { original = original.Split('?')[0]; } var request = System.Net.HttpWebRequest.Create(original) as System.Net.HttpWebRequest; using (var response = request.GetResponse() as System.Net.HttpWebResponse) { if (response.StatusCode != System.Net.HttpStatusCode.OK) { return original; } if (response.ContentType.IndexOf("image") == -1) { return original; } try { var stream = response.GetResponseStream(); var reader = new BinaryReader(stream); byte[] bytes; using (var ms = new MemoryStream()) { byte[] buffer = new byte[4096]; int count; while ((count = reader.Read(buffer, 0, buffer.Length)) != 0) { ms.Write(buffer, 0, count); } bytes = ms.ToArray(); } string fileDoc = Path.GetExtension(original); if (string.IsNullOrWhiteSpace(fileDoc)) { fileDoc = ".jpg"; } string uEditorWidhtImgSize ="0";//可以设置裁剪尺寸 string uEditorheightImgSize ="0"; //文件访问域名 string filedomain ="http://www.qiniu.com/";//七牛配置的访问域名 FileUploadResult result = QiniuUpload(fileDoc, bytes, uEditorWidhtImgSize, uEditorheightImgSize); if (result.IsSucceed) { return filedomain + result.FileUrl; } } catch { } } } return original; } /// <summary> ///七牛上传 /// </summary> /// <param name="fileDoc">The file document.</param> /// <param name="byteFile">The byte file.</param> /// <param name="uEditorImgSize">Size of the u editor img.</param> /// <param name="uEditorheightImgSize">Size of the u editorheight img.</param> /// <returns>FileUploadResult.</returns> private static FileUploadResult QiniuUpload(string fileDoc, byte[] byteFile, string uEditorImgSize, string uEditorheightImgSize) { string uploadUrl ="http://www.qiniu.com/upload";//自己搭建的七牛上传站点 if (uEditorImgSize != null && uEditorImgSize.Length > 1) { uEditorImgSize = "&size=" + uEditorImgSize + "x" + uEditorheightImgSize; } else { uEditorImgSize = ""; } var wc = new System.Net.WebClient(); var upload = wc.UploadData($"{uploadUrl}?type={fileDoc}{uEditorImgSize}", "POST", byteFile); return Encoding.UTF8.GetString(upload).DeserializeObject<FileUploadResult>(); } } /// <summary> /// 抓取对象 /// </summary> public class CrawImgView { /// <summary> ///原图 /// </summary> /// <value>The original.</value> public string Original { get; set; } /// <summary> ///新图 /// </summary> /// <value>The imgpath.</value> public string ImgPath { get; set; } } }
3、封装好之后,可以开始调用了
string content = CrawlerHelper.CrawlerAsync(content).Result;//处理完成
4、用处理完成的content内容写入数据库即可。
5、注意项:
a、如果有很多或者很大的图片,可能这个过程会有点慢,尽管是异步下载图片。
b、如果内容中有背景图,不会处理的,没时间写,有需要可以改造我的方法,目前就获取了img标签的src属性。
留下您的脚步
最近评论