企业客户做大数据抓取都会用到爬虫IP,质量好的爬虫IP可以让爬虫工作事半功倍,如何高效的爬取目标数据就显得尤为重要。影响这样的结果不仅仅是因为爬虫IP问题,还有可能是技术
企业客户做大数据抓取都会用到爬虫IP,质量好的爬虫IP可以让爬虫工作事半功倍,如何高效的爬取目标数据就显得尤为重要。影响这样的结果不仅仅是因为爬虫IP问题,还有可能是技术在写代码时候的优化问题。下文是有关使用java语言的代码示例可以一起看看。
Java HttpURLConnection
package com.qgproxy;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.net.Authenticator;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.PasswordAuthentication;
import java.net.Proxy;
import java.net.URL;
class QGProxyAuthenticatorg extends Authenticator {
private String user, password;
public QGProxyAuthenticator(String user, String password) {
this.user = user;
this.password = password;
}
protected PasswordAuthentication getPasswordAuthentication() {
return new PasswordAuthentication(user, password.toCharArray());
}
}
class QGProxy {
public static void main(String args[]) {
String targetUrl = "http://jshk.com.cn";
String proxyIp = "219.151.125.106";
int proxyPort = 31615;
String authKey = "895314XY";
String password = "24D6YB309ZCB";
try {
URL url = new URL(targetUrl);
Authenticator.setDefault(new QGProxyAuthenticator(authKey, password));
InetSocketAddress socketAddress = new InetSocketAddress(proxyIp, proxyPort);
Proxy proxy = new Proxy(Proxy.Type.HTTP, socketAddress);
HttpURLConnection connection = (HttpURLConnection) url.openConnection(proxy);
byte[] response = readStream(connection.getInputStream());
System.out.println(new String(response));
} catch (Exception e) {
System.out.println(e.getLocalizedMessage());
}
}
public static byte[] readStream(InputStream inStream) throws Exception {
ByteArrayOutputStream outSteam = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int len = -1;
while ((len = inStream.read(buffer)) != -1) {
outSteam.write(buffer, 0, len);
}
outSteam.close();
inStream.close();
return outSteam.toByteArray();
}
}
Java okhttp
package com.qgproxy;import okhttp3.*;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.util.concurrent.TimeUnit;
public class QGProxy {
final static String proxyIp = "219.151.125.106";
final static Integer proxyPort = 31615;
final static String authKey = "895314XY";
final static String password = "24D6YB309ZCB";
static OkHttpClient client;
static {
Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(proxyIp, proxyPort));
Authenticator proxyAuthenticator = (route, response) -> {
String credential = Credentials.basic(authKey, password);
return response.request().newBuilder().header("Proxy-Authorization", credential).build();
};
client = new OkHttpClient().newBuilder()
.connectTimeout(10, TimeUnit.SECONDS)
.readTimeout(10, TimeUnit.SECONDS)
.proxy(proxy)
.proxyAuthenticator(proxyAuthenticator)
.connectionPool(new ConnectionPool(4, 2, TimeUnit.SECONDS))
.build();
}
public static void main(String[] args) throws IOException {
Request request = new Request.Builder().url("http://jshk.com.cn").build();
Response response = client.newCall(request).execute();
System.out.println(response.body().string());
}
}