当前位置 : 主页 > 网络编程 > JavaScript >

Nodejs 抓取页面

来源:互联网 收集:自由互联 发布时间:2021-06-28
gistfile1.txt const util = require('util');const http = require("http");const fs = require("fs");const cheerio = require("cheerio");function download(url, callback) { http.get(url, res = { let data = ''; res.setEncoding('utf8'); res.on('dat
gistfile1.txt
const util = require('util');
const http = require("http");
const fs = require("fs");
const cheerio = require("cheerio");

function download(url, callback) {
    http.get(url, res => {
        let data = '';
        res.setEncoding('utf8');
        res.on('data', chunk => {
            data += chunk;
        });
        res.on("end", () => {
            callback(null, data);
        });
    }).on("error", error => {
        callback(error, null);
    });
}
let downloadAsync = util.promisify(download);

async function test() {
    try {
        const CiyuModel = require('./app/models/ciyu-model.js');
        let model = new CiyuModel();
        let list = await model.getListAsync('');
        for (let item of list) {
            await testOneWord(item.name);
        }
    } catch (error) {
        console.log(error.message);
    }
}

async function testOneWord(word) {
    try {
        let url = 'http://my.huadict.com/' + encodeURIComponent(word);
        let html = await downloadAsync(url);
        const $ = cheerio.load(html);
        let $def = $('.def');
        if (!$def.length) {
            console.log(word);
            writeLog(`“${word}”没有定义!`);
        } else {
            console.log(word);
        }
    } catch (error) {
        console.log(`处理“${word}”时出错,${error.message}`);
    }
}

function writeLog(message) {
    message = message + '\n';
    fs.appendFileSync('./huadict.log', message);
}

//testOneWord('中国');
test();
//writeLog('我是的方式地方l');
//writeLog('我是的方式地方l');
网友评论