咳咳,今天给大家分享一个多线程的知识点,和线程池,最近任务是写爬虫,五百万个网址,循环很慢,然后考虑用多线程,今天看了一下多线程,氛围继承thread 和实现runnuble接口,差不多这么拼写的,区别在于runnuble接口可以共享资源,当然,也就考虑到了线程安全问题,都知道,for循环中的int i 是非线程安全的,先不聊原子性的,直说多线程,实现runnble接口,刚开始会导致数据重复,查询资料,说事因为线程不安全导致,因为涉及到征用乱七八招的东西,然后,我有了解了一下线程池,下面上代码,果断解决,正常单线程跑完这个程序需要七天,这个多线程,我估计,也就两天左右
public static void main(String[] args) throws InterruptedException {
/* for (int i = 155667; i < 500000; i++) {
System.out.println(i + "------------------------------------------执行前");
String url = "https://jobs.51job.com/all/co" + i + ".html";
GetHtml.grabDataGet(url, i + "");
System.out.println(i + "------------------------------------------执行后");
}*/
ExecutorService executor = Executors.newFixedThreadPool(77);
CountDownLatch latch = new CountDownLatch(2500000 - 1);
for (int i = 1; i < 2500000; i++) {
ThreadUtil.sleep(); //该方法是我写的休眠方法,一共17毫秒,因为不设置休眠,数据一般为null
final String str = String.valueOf(i);
executor.submit(() -> {
System.out.println("------执行前 i=" + str);
//start 这是我的爬虫方法,请不要理会,换成你们的业务代码类就好
GetHtml.grabDataGet("https://xxx" + str + ".html", str);
//end
latch.countDown();
System.out.println("------执行后 i=" + str);
});
}
System.out.println("------等待所有线程执行完成");
latch.await();
System.out.println("------所有线程执行完成");
}
---------------------------------------
当然,看不懂lambad没关系,下面为正常代码
public class Loadurl10 {
public static void main(String[] args) throws InterruptedException {
/* ExecutorService executor = Executors.newFixedThreadPool(100);
for (int i = 4500000; i <= 5101000; i++) {
final String str = String.valueOf(i);
executor.submit(() -> GetHtml.grabDataGet("https://jobs.51job.com/all/co" + str + ".html", str));
}*/
ExecutorService executor = Executors.newFixedThreadPool(111);
CountDownLatch latch = new CountDownLatch(5101000 - 1 + 1);
for (int i = 1; i <= 5101000; i++) {
final String str = String.valueOf(i);
executor.submit(new runnublerun(str));
/* executor.submit(() -> {
System.out.println("------执行前 i=" + str);
GetHtml.grabDataGet("https://jobs.51job.com/all/co" + str + ".html", str);
latch.countDown();
System.out.println("------执行后 i=" + str);
});*/
}
System.out.println("------等待所有线程执行完成");
latch.await();
System.out.println("------所有线程执行完成");
}
}
class runnublerun implements Runnable {
private String str;
public runnublerun(String str) {
this.str = str;
}
@Override
public void run() {
System.out.println("------执行前 i=" + str);
GetHtml.grabDataGet("https://jobs.51job.com/all/co" + str + ".html", str);
System.out.println("------执行后 i=" + str);
}
}