当前位置 : 主页 > 编程语言 > c++ >

获取百度网页上的内容不包含js动态生成数据

来源:互联网 收集:自由互联 发布时间:2021-07-03
gistfile1.txt public class Crawler { public static void main(String[] args) throws IOException { HttpClient client=new DefaultHttpClient(); HttpGet httpGet=new HttpGet("http://www.baidu.com/"); HttpResponse response=client.execute(httpGet);
gistfile1.txt
public class Crawler {
    public static void main(String[] args) throws IOException {
        HttpClient client=new DefaultHttpClient();
        HttpGet httpGet=new HttpGet("http://www.baidu.com/");
        HttpResponse response=client.execute(httpGet);
        HttpEntity entity=response.getEntity();
        InputStream in=entity.getContent();
        byte[] bytes=new byte[1024];
        int count;
        StringBuffer sb = new StringBuffer();
        while ((count=in.read(bytes))!=-1)
        {
           sb.append(new String(bytes,0,count,"UTF-8"));
        }
        System.out.println(sb.toString());

        System.out.println("-----------------------");
        in.close();
        System.out.println(JSON.toJSONString(entity));
    }
}
网友评论