当前位置 : 主页 > 编程语言 > java >

Java解析网页中的内容

来源:互联网 收集:自由互联 发布时间:2021-06-28
解析网页中的内容 package com.wsq.utils.download.demo04;import java.awt.BorderLayout;import java.awt.Dimension;import java.awt.EventQueue;import java.awt.Font;import java.awt.event.ActionEvent;import java.awt.event.ActionListener;imp
解析网页中的内容
package com.wsq.utils.download.demo04;

import java.awt.BorderLayout;
import java.awt.Dimension;
import java.awt.EventQueue;
import java.awt.Font;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import javax.swing.JButton;
import javax.swing.JFrame;
import javax.swing.JLabel;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.JTextArea;
import javax.swing.JTextField;

/**
 * @Author wsq
 * @Package com.wsq.utils.download
 * @Description: 解析网页中的内容
 * @Date Created by wsq on 2017/12/31上午12:41.
 * @Modified By:
 */
@SuppressWarnings("serial")
public class InternetContentFrame extends JFrame {

    private JTextArea ta_content;
    private JTextField tf_address;

    /**
     * Launch the application
     *
     * @param args
     */
    public static void main(String args[]) {
        EventQueue.invokeLater(new Runnable() {
            public void run() {
                try {
                    InternetContentFrame frame = new InternetContentFrame();
                    frame.setVisible(true);
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        });
    }

    /**
     * Create the frame
     */
    public InternetContentFrame() {
        super();
        setTitle("解析网页中的内容");
        setBounds(100, 100, 484, 375);
        setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);

        final JPanel panel = new JPanel();
        getContentPane().add(panel, BorderLayout.NORTH);

        final JLabel label = new JLabel();
        label.setText("输入网址:");
        panel.add(label);

        tf_address = new JTextField();
        tf_address.setPreferredSize(new Dimension(260, 25));
        panel.add(tf_address);

        final JButton button = new JButton();
        button.addActionListener(new ActionListener() {
            public void actionPerformed(final ActionEvent e) {
                String address = tf_address.getText().trim();// 获得输入的网址
                Collection
  urlCollection = getURLCollection(address);// 调用方法,获得网页内容的集合对象
                Iterator
  it = urlCollection.iterator(); // 获得集合的迭代器对象
                while (it.hasNext()) {
                    ta_content.append((String) it.next() + "\n"); // 在文本域中显示解析的内容
                }
            }
        });
        button.setText("解析网页");
        panel.add(button);

        final JScrollPane scrollPane = new JScrollPane();
        getContentPane().add(scrollPane, BorderLayout.CENTER);

        ta_content = new JTextArea();
        ta_content.setFont(new Font("", Font.BOLD, 14));
        scrollPane.setViewportView(ta_content);
        //
    }

    public Collection
 
   getURLCollection(String urlString) {
        URL url = null; // 声明URL
        URLConnection conn = null; // 声明URLConnection
        Collection
  
    urlCollection = new ArrayList
   
    (); // 创建集合对象 try { url = new URL(urlString); // 创建URL对象 conn = url.openConnection(); // 获得连接对象 conn.connect(); // 打开到url引用资源的通信链接 InputStream is = conn.getInputStream(); // 获取流对象 InputStreamReader in = new InputStreamReader(is, "UTF-8"); // 转换为字符流 BufferedReader br = new BufferedReader(in); // 创建缓冲流对象 String nextLine = br.readLine(); // 读取信息,解析网页 while (nextLine != null) { urlCollection.add(nextLine); // 解析网页的全部内容,添加到集合中 nextLine = br.readLine(); // 读取信息,解析网页 } } catch (Exception ex) { ex.printStackTrace(); } return urlCollection; } }
   
  
 
解析网页中的内容.png
网友评论