java 爬去百度首页HTML源码

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/uotail/article/details/83143641

import java.io.*;
import java.net.URL;
import java.net.URLConnection;

/**
 * @author ljy
 * @version V1.0
 * @Package com.biz.eisp.taskjob.job
 * @Description: TODO
 * @date 2018/10/18 11:11
 */
public class pageTest  {

    public static void main(String[] args) throws IOException {
//        URL url = new URL("http://www.baidu.com");
//        URLConnection connection = url.openConnection();
//        InputStream is = connection.getInputStream();
//        OutputStream os = new FileOutputStream("d:/data.txt");
//        byte[] buffer = new byte[1024];
//        int flag = 0;
//        while (-1 != (flag = is.read(buffer, 0, buffer.length))) {
//            os.write(buffer, 0, flag);
//            System.out.println(new String(buffer, 0, flag, "utf-8"));
//        }
//        os.close();
//        is.close();
        //上面这个控制台打印的会有乱码 d:/data.txt文件中没有乱码
        //下面这个没有乱码
         URL u = new URL("http://www.baidu.com");
         BufferedReader buffr = new BufferedReader(new InputStreamReader(new BufferedInputStream(u.openStream()), "utf-8"));
         StringBuffer sb = new StringBuffer();
         String line = null;
         while((line=buffr.readLine())!=null)
         {
           sb.append(line);
         }
         buffr.close();
         System.out.println(sb.toString());
    }


}

猜你喜欢

转载自blog.csdn.net/uotail/article/details/83143641