当前位置 : 主页 > 网络编程 > JavaScript >

node实现简单爬虫

来源:互联网 收集:自由互联 发布时间:2021-06-28
server.js const express = require('express'), request = require('superagent'), // cheerio抓取网页数据 cheerio = require('cheerio');app = express()app.use(express.static('www'))app.get('/jokes', (req, res) = { request .get('https://www
server.js
const express = require('express'),
    request = require('superagent'),
    // cheerio抓取网页数据
    cheerio = require('cheerio');
app = express()
app.use(express.static('www'))
app.get('/jokes', (req, res) => {
    request
        .get('https://www.qiushibaike.com/')
        .set('User-Agent', 'Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1')
        .end(function (err, request) {
            var $ = cheerio.load(request.text)
            var arr = []
            $('main').find('article').each(function (i, el) {
                arr.push({
                    author: $(el).find('.username').text(),
                    image: $(el).find('.avatar').css("background-image"),
                    age: $(el).find('.age').text(),
                    text: $(el).find('.text').text(),
                    godname: $(el).find('.comment .item .username').text(),
                    godWord: $(el).find('.comment .item .text').text(),
                    href: $(el).find('.text').attr('href')
                })
            })
            res.json({
                code: 'success',
                message: '查询成功',
                data: arr
            })
        })
})

app.listen(3000, () => {
    console.log('node is ok')
})
效果展示.png 爬虫代码整合.rar 爬虫代码整合.rar
上一篇:Hexo toc patch
下一篇:提取QQ视频地址信息
网友评论