当前位置 : 主页 > 编程语言 > java >

java+爬虫部分实现

来源:互联网 收集:自由互联 发布时间:2021-06-28
gistfile1.txt package com.trs.exercise;import java.io.IOException;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;public class WebCrawler {public Document getHtmlTextByUrl(String url) {Document doc = null;try {// doc = Jsoup.connect(
gistfile1.txt
package com.trs.exercise;

import java.io.IOException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

public class WebCrawler {
	public Document getHtmlTextByUrl(String url) {
		Document doc = null;
		try {
			// doc = Jsoup.connect(url).timeout(5000000).get();
			int i = (int) (Math.random() * 1000); // 做一个随机延时,防止网站屏蔽
			while (i != 0) {
				i--;
			}
			doc = Jsoup.connect(url).data("query", "Java").userAgent("Mozilla").cookie("auth", "token").timeout(300000)
					.post();
		} catch (IOException e) {
			e.printStackTrace();
			try {
				doc = Jsoup.connect(url).timeout(5000000).get();
			} catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); }
			}
		}

		return doc;
	}

	public static void main(String[] args) {
		WebCrawler wc = new WebCrawler();
		System.out.println(wc.getHtmlTextByUrl("https://user.qzone.qq.com/842161530"));
	}
}
网友评论