当前位置 : 主页 > 编程语言 > c语言 >

c# 利用cef3抓取京东关键词商品列表,以手机为例

来源:互联网 收集:自由互联 发布时间:2021-06-25
利用c# 的cef3 模拟浏览器操作,抓取京东商城关键词商品列表 获取商品标题,商品id,商品链接,商品价格 public partial class Form1 : Form { public ChromiumWebBrowser browser { get ; set ; } public string

利用c# 的cef3 模拟浏览器操作,抓取京东商城关键词商品列表

获取商品标题,商品id,商品链接,商品价格

 public partial class Form1 : Form
    {
        public ChromiumWebBrowser browser { get; set; }
        public string key = "手机";  //抓取的关键词 string jdTab="综合";     //选择排序 int goTab = 0;        //判断是否已排序 public Form1()
        {
            InitializeComponent();
        }
        private void Form1_Load(object sender, EventArgs e)
        {
            browser = new ChromiumWebBrowser("https://www.jd.com/"); //初始化浏览器地址 this.Controls.Add(browser);
            browser.FrameLoadEnd += Web_FrameLoadEnd;  //设置监听,当浏览器加载完毕
        }
        private async void Web_FrameLoadEnd(object sender, FrameLoadEndEventArgs e)
        {
            Debug.WriteLine("进入页面:" + e.Url);
            //MessageBox.Show(e.Url.ToString());
            if (e.Url.Contains("https://www.jd.com/"))  //进入主页时
{
// 设置关键词,点击搜索 await browser.GetMainFrame().EvaluateScriptAsync(" $(‘#key‘).focus()"); await browser.GetMainFrame().EvaluateScriptAsync(" $(‘#key‘).val(‘" + key + "‘)"); await browser.GetMainFrame().EvaluateScriptAsync(" $(‘.button‘).click()"); } else if (e.Url.Contains("https://passport.jd.com/uc/login")) { //京东反爬页面,跳回主页,重新搜索 browser.Load("https://www.jd.com/"); } else if (e.Url.Contains("Search?")) { string url = ""; string title = ""; string price = ""; string code = ""; string searchResult = ""; //判断该关键词是否有效 await browser.GetMainFrame().EvaluateScriptAsync("(function(){ return $(‘.ns-content‘).text();})()").ContinueWith(x => { try { searchResult += x.Result.Result.ToString(); } catch (NullReferenceException s) { } }); await browser.GetMainFrame().EvaluateScriptAsync("(function(){ return $(‘.check-error‘).text();})()").ContinueWith(x => { try { searchResult += x.Result.Result.ToString(); } catch (NullReferenceException s) { } }); Debug.WriteLine("searchResult:" + searchResult); if (!searchResult.Contains("没有") && !searchResult.Contains("仍然搜索") && !searchResult.Contains("点击查看")) //当页面字段不包含这些字样时,关键词搜索有效 {
            //选择排序,并将标识置为1
switch (jdTab) { case "综合": await browser.GetMainFrame().EvaluateScriptAsync("$(‘.f-sort a‘)[0].click()"); goTab = 1; break; case "销量": await browser.GetMainFrame().EvaluateScriptAsync("$(‘.f-sort a‘)[1].click()"); goTab = 1; break; case "价格": await browser.GetMainFrame().EvaluateScriptAsync("$(‘.f-sort a‘)[4].click()"); goTab = 1; break; case "评论数": await browser.GetMainFrame().EvaluateScriptAsync("$(‘.f-sort a‘)[2].click()"); goTab = 1; break; } Thread.Sleep(2000); browser.ExecuteScriptAsync(" scrollTo(0, document.body.scrollHeight)"); Thread.Sleep(3000); int p = 0; //数据丢失标志 int max = 0;  //商品总数 Thread.Sleep(500); await browser.GetMainFrame().EvaluateScriptAsync("(function(){ return $(‘.gl-item‘).length})()").ContinueWith(x => { try { max = int.Parse(x.Result.Result.ToString()); } catch (NullReferenceException s) { } }); for (int index = 0; index <= max; index++) { await browser.GetMainFrame().EvaluateScriptAsync("(function(){ return $(‘.gl-item .p-name a‘)[" + index.ToString() + "].href})()").ContinueWith(x => { try { url = x.Result.Result.ToString(); code = "JD" + Regex.Replace(url, @"[^\d]*", ""); } catch (Exception) { p = 1; } }); await browser.GetMainFrame().EvaluateScriptAsync("(function(){ return $(‘.gl-item .p-name em‘)[" + index.ToString() + "].innerText})()").ContinueWith(x => { try { title = x.Result.Result.ToString().Replace("\n",""); } catch (Exception) { p = 1; } }); await browser.GetMainFrame().EvaluateScriptAsync("(function(){ return $(‘.gl-item .p-price strong‘)[" + index.ToString() + "].innerText.replace(‘¥‘,‘‘)})()").ContinueWith(x => { try { price = x.Result.Result.ToString(); } catch (Exception) { p = 1; } }); if (p == 0) { Debug.WriteLine("url:" + url + " title=" + title + " code=" + code + " price=" + price); } } } else { Debug.WriteLine("没有找到词:" + key); } } } }

抓取结果:

网友评论