利用c# 的cef3 模拟浏览器操作,抓取京东商城关键词商品列表 获取商品标题,商品id,商品链接,商品价格 public partial class Form1 : Form { public ChromiumWebBrowser browser { get ; set ; } public string
利用c# 的cef3 模拟浏览器操作,抓取京东商城关键词商品列表
获取商品标题,商品id,商品链接,商品价格
public partial class Form1 : Form { public ChromiumWebBrowser browser { get; set; } public string key = "手机"; //抓取的关键词 string jdTab="综合"; //选择排序 int goTab = 0; //判断是否已排序 public Form1() { InitializeComponent(); } private void Form1_Load(object sender, EventArgs e) { browser = new ChromiumWebBrowser("https://www.jd.com/"); //初始化浏览器地址 this.Controls.Add(browser); browser.FrameLoadEnd += Web_FrameLoadEnd; //设置监听,当浏览器加载完毕 } private async void Web_FrameLoadEnd(object sender, FrameLoadEndEventArgs e) { Debug.WriteLine("进入页面:" + e.Url); //MessageBox.Show(e.Url.ToString()); if (e.Url.Contains("https://www.jd.com/")) //进入主页时
{ // 设置关键词,点击搜索 await browser.GetMainFrame().EvaluateScriptAsync(" $(‘#key‘).focus()"); await browser.GetMainFrame().EvaluateScriptAsync(" $(‘#key‘).val(‘" + key + "‘)"); await browser.GetMainFrame().EvaluateScriptAsync(" $(‘.button‘).click()"); } else if (e.Url.Contains("https://passport.jd.com/uc/login")) { //京东反爬页面,跳回主页,重新搜索 browser.Load("https://www.jd.com/"); } else if (e.Url.Contains("Search?")) { string url = ""; string title = ""; string price = ""; string code = ""; string searchResult = ""; //判断该关键词是否有效 await browser.GetMainFrame().EvaluateScriptAsync("(function(){ return $(‘.ns-content‘).text();})()").ContinueWith(x => { try { searchResult += x.Result.Result.ToString(); } catch (NullReferenceException s) { } }); await browser.GetMainFrame().EvaluateScriptAsync("(function(){ return $(‘.check-error‘).text();})()").ContinueWith(x => { try { searchResult += x.Result.Result.ToString(); } catch (NullReferenceException s) { } }); Debug.WriteLine("searchResult:" + searchResult); if (!searchResult.Contains("没有") && !searchResult.Contains("仍然搜索") && !searchResult.Contains("点击查看")) //当页面字段不包含这些字样时,关键词搜索有效 {
//选择排序,并将标识置为1 switch (jdTab) { case "综合": await browser.GetMainFrame().EvaluateScriptAsync("$(‘.f-sort a‘)[0].click()"); goTab = 1; break; case "销量": await browser.GetMainFrame().EvaluateScriptAsync("$(‘.f-sort a‘)[1].click()"); goTab = 1; break; case "价格": await browser.GetMainFrame().EvaluateScriptAsync("$(‘.f-sort a‘)[4].click()"); goTab = 1; break; case "评论数": await browser.GetMainFrame().EvaluateScriptAsync("$(‘.f-sort a‘)[2].click()"); goTab = 1; break; } Thread.Sleep(2000); browser.ExecuteScriptAsync(" scrollTo(0, document.body.scrollHeight)"); Thread.Sleep(3000); int p = 0; //数据丢失标志 int max = 0; //商品总数 Thread.Sleep(500); await browser.GetMainFrame().EvaluateScriptAsync("(function(){ return $(‘.gl-item‘).length})()").ContinueWith(x => { try { max = int.Parse(x.Result.Result.ToString()); } catch (NullReferenceException s) { } }); for (int index = 0; index <= max; index++) { await browser.GetMainFrame().EvaluateScriptAsync("(function(){ return $(‘.gl-item .p-name a‘)[" + index.ToString() + "].href})()").ContinueWith(x => { try { url = x.Result.Result.ToString(); code = "JD" + Regex.Replace(url, @"[^\d]*", ""); } catch (Exception) { p = 1; } }); await browser.GetMainFrame().EvaluateScriptAsync("(function(){ return $(‘.gl-item .p-name em‘)[" + index.ToString() + "].innerText})()").ContinueWith(x => { try { title = x.Result.Result.ToString().Replace("\n",""); } catch (Exception) { p = 1; } }); await browser.GetMainFrame().EvaluateScriptAsync("(function(){ return $(‘.gl-item .p-price strong‘)[" + index.ToString() + "].innerText.replace(‘¥‘,‘‘)})()").ContinueWith(x => { try { price = x.Result.Result.ToString(); } catch (Exception) { p = 1; } }); if (p == 0) { Debug.WriteLine("url:" + url + " title=" + title + " code=" + code + " price=" + price); } } } else { Debug.WriteLine("没有找到词:" + key); } } } }
抓取结果: