java模仿网络爬虫简单案例,直接看代码 package com . example . demo1 ; import java . io . * ; import java . net . * ; import java . util . regex . Matcher ; import java . util . regex . Pattern ; /** * @author: YinLei * Packag
java模仿网络爬虫简单案例,直接看代码
package com.example.demo1;import java.io.*;
import java.net.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author: YinLei
* Package: com.example.demo1
* @date: 2021/9/7 20:23
* @Description: java爬虫测试
* @version: 1.0
*/
public class Crawler {
public static void main(String[] args) {
URL url = null; //用于定义url类型
URLConnection urlConnection = null; //用于定义url连接类型
BufferedReader br = null; //缓存读取
PrintWriter pw = null; //输出流
try {
url = new URL("http://search.dangdang.com/?key=%BB%FA%D0%B5%B1%ED&act=input");//爬取的网址、这里爬取的是一个生物网站
urlConnection = url.openConnection(); //url连接
pw = new PrintWriter(new FileWriter("D:/SiteURL.txt"), true);//将爬取到的内容放到D盘的SiteURL文件中
System.out.println("Stay Here1!!");
br = new BufferedReader(new InputStreamReader(
urlConnection.getInputStream(),"UTF-8"));
String buf = null;
System.out.println("Stay Here2!!");
Pattern p = Pattern.compile("1\\d{10}");
while ((buf = br.readLine()) != null) {
// System.out.println(buf);
Matcher m = p.matcher(buf);
while (m.find()){
pw.println(m.group());
}
}
System.out.println("success!");
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}