多线程批量检测未注册域名

最近想注册一个域名,使用万网尝试了很多域名,基本都已被注册。之前听说双拼域名很火,所以想写个脚本,看看哪些双拼域名还未被注册。

一、查询接口

网上搜索了一下,万网的域名查询接口比较简单易用,查询URL格式为: http://panda.www.net.cn/cgi-bin/check.cgi?area_domain=aaa.com

返回值及含义:

210 : Domain name is available
211 : Domain name is not available
212 : Domain name is invalid
214 : Unknown error

二、编程思路

1. DomainGenerator读取文件pinyin.txt,获取所有可用的拼音字母。遍历拼音字母, 组装成双拼域名。这个拼音列表是从网上搜索来的,可能会有纰漏。

2. 创建域名检测线程DomainRunner,每个线程采用httpclient调用万网的域名查询接口。

3. 每个线程调用DomainValidator检查返回结果。

4. 线程ResultRunner将可用域名写入domain.txt文件。

三、核心代码

DomainGenerator.java, 启动类,读取拼音列表,组装需要检测的域名,创建检测线程和结果处理线程。


    
    
  1. package com.learnworld;
  2. import java.util.List;
  3. import java.io.BufferedReader;
  4. import java.io.BufferedWriter;
  5. import java.io.FileReader;
  6. import java.io.FileWriter;
  7. import java.util.ArrayList;
  8. import java.util.concurrent.ArrayBlockingQueue;
  9. import java.util.concurrent.CountDownLatch;
  10. import java.util.concurrent.ExecutorService;
  11. import java.util.concurrent.Executors;
  12. import java.util.concurrent.LinkedBlockingQueue;
  13. import java.util.concurrent.atomic.AtomicInteger;
  14. import org.apache.http.impl.client.CloseableHttpClient;
  15. import org.apache.http.impl.client.HttpClients;
  16. import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
  17. public class DomainGenerator {
  18. public static void main(String[] args){
  19. // pinyin list, read from pinyin.txt
  20. List<String> items = new ArrayList<String>();
  21. // domain list, which need to check
  22. ArrayBlockingQueue<String> taskQueue = new ArrayBlockingQueue<String>( 163620);
  23. // available domain list, which need to save into file
  24. LinkedBlockingQueue<String> resultQueue = new LinkedBlockingQueue<String>();
  25. // counter, need to count unavailable domain statistical information
  26. AtomicInteger count = new AtomicInteger( 0);
  27. // Httpclient initialization
  28. PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager();
  29. cm.setMaxTotal( 20);
  30. cm.setDefaultMaxPerRoute( 20);
  31. CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build();
  32. try {
  33. // pinyin.txt, used to save all available pinyin
  34. BufferedReader reader = new BufferedReader( new FileReader( "pinyin.txt"));
  35. // domain.txt, used to save all available domain result
  36. BufferedWriter writer = new BufferedWriter( new FileWriter( "domain.txt"));
  37. String item = null;
  38. while((item = reader.readLine()) != null){
  39. items.add(item);
  40. }
  41. // generate domain list
  42. for (String item1 : items){
  43. for (String item2 : items) {
  44. taskQueue.offer(item1 + item2 + ".com");
  45. }
  46. }
  47. int domainThreadNum = 3;
  48. CountDownLatch downLatch = new CountDownLatch(domainThreadNum);
  49. ExecutorService executor = Executors.newFixedThreadPool(domainThreadNum + 1);
  50. // start domain check thread
  51. for( int i = 0; i < domainThreadNum; i++){
  52. executor.execute( new DomainRunner(taskQueue, resultQueue, downLatch, count, httpClient));
  53. }
  54. // start result handle thread
  55. executor.execute( new ResultRunner(resultQueue, writer));
  56. downLatch.await();
  57. System.out.println( "All tasks are done!");
  58. // TODO, suggest use volatile flag to control ResultRunner
  59. executor.shutdownNow();
  60. reader.close();
  61. writer.close();
  62. httpClient.close();
  63. } catch (Exception e) {
  64. e.printStackTrace();
  65. }
  66. }
  67. }

 DomainRunner:域名检测线程,从域名domainQueue中读取域名,调用接口进行检测。 如果域名可用,将结果放入resultQueue中等待写入文件。


    
    
  1. package com.learnworld;
  2. import java.io.IOException;
  3. import java.util.Calendar;
  4. import java.util.concurrent.ArrayBlockingQueue;
  5. import java.util.concurrent.CountDownLatch;
  6. import java.util.concurrent.LinkedBlockingQueue;
  7. import java.util.concurrent.atomic.AtomicInteger;
  8. import org.apache.http.HttpEntity;
  9. import org.apache.http.client.config.RequestConfig;
  10. import org.apache.http.client.methods.CloseableHttpResponse;
  11. import org.apache.http.client.methods.HttpGet;
  12. import org.apache.http.impl.client.CloseableHttpClient;
  13. import org.apache.http.protocol.BasicHttpContext;
  14. import org.apache.http.protocol.HttpContext;
  15. import org.apache.http.util.EntityUtils;
  16. public class DomainRunner implements Runnable {
  17. private ArrayBlockingQueue<String> domainQueue;
  18. private LinkedBlockingQueue<String> resultQueue;
  19. private CountDownLatch downLatch;
  20. private AtomicInteger count;
  21. private CloseableHttpClient httpClient;
  22. public DomainRunner(ArrayBlockingQueue<String> domainQueue,
  23. LinkedBlockingQueue<String> resultQueue, CountDownLatch downLatch,
  24. AtomicInteger count, CloseableHttpClient httpClient) {
  25. super();
  26. this.domainQueue = domainQueue;
  27. this.resultQueue = resultQueue;
  28. this.downLatch = downLatch;
  29. this.count = count;
  30. this.httpClient = httpClient;
  31. }
  32. @Override
  33. public void run() {
  34. String domain = null;
  35. while ((domain = domainQueue.poll()) != null) {
  36. boolean isDomainAvailable = false;
  37. RequestConfig requestConfig = RequestConfig.custom()
  38. .setSocketTimeout( 5000)
  39. .setConnectTimeout( 5000)
  40. .setConnectionRequestTimeout( 5000)
  41. .build();
  42. HttpGet httpGet = new HttpGet( "http://panda.www.net.cn/cgi-bin/check.cgi?area_domain=" + domain);
  43. httpGet.setConfig(requestConfig);
  44. httpGet.setHeader( "Connection", "close");
  45. HttpContext context = new BasicHttpContext();
  46. CloseableHttpResponse response = null;
  47. try {
  48. response = httpClient.execute(httpGet, context);
  49. HttpEntity entity = response.getEntity();
  50. int status = response.getStatusLine().getStatusCode();
  51. if (status >= 200 && status < 300) {
  52. String resultXml = EntityUtils.toString(entity);
  53. isDomainAvailable = DomainValidator.isAvailableDomainForResponse(resultXml);
  54. EntityUtils.consumeQuietly(entity);
  55. } else {
  56. System.out.println(domain + " check error.");
  57. }
  58. } catch (Exception e) {
  59. e.printStackTrace();
  60. } finally {
  61. try {
  62. httpGet.releaseConnection();
  63. if (response != null) {
  64. response.close();
  65. }
  66. } catch (IOException e) {
  67. e.printStackTrace();
  68. }
  69. }
  70. // result handle
  71. if(isDomainAvailable) {
  72. resultQueue.offer(domain);
  73. } else {
  74. int totalInvalid = count.addAndGet( 1);
  75. if (totalInvalid % 100 == 0) {
  76. System.out.println(totalInvalid + " " + Calendar.getInstance().getTime());
  77. }
  78. }
  79. }
  80. downLatch.countDown();
  81. }
  82. }

DomainValidator: 对万网返回结果进行检查,判断域名是否可用。


    
    
  1. package com.learnworld;
  2. public class DomainValidator {
  3. public static boolean isAvailableDomainForResponse(String responseXml){
  4. if(responseXml == null || responseXml.isEmpty()){
  5. return false;
  6. }
  7. if(responseXml.contains( "<original>210")){
  8. return true;
  9. } else if(responseXml.contains( "<original>211")
  10. || responseXml.contains( "<original>212")
  11. || responseXml.contains( "<original>214")){
  12. return false;
  13. } else {
  14. System.out.println( "api callback error!");
  15. try {
  16. Thread.sleep( 60000);
  17. } catch (InterruptedException e) {
  18. e.printStackTrace();
  19. }
  20. return false;
  21. }
  22. }
  23. }

ResultRunner: 结果处理线程,将可用域名写入文件domain.txt中。


    
    
  1. package com.learnworld;
  2. import java.io.BufferedWriter;
  3. import java.util.concurrent.LinkedBlockingQueue;
  4. public class ResultRunner implements Runnable{
  5. private LinkedBlockingQueue<String> resultQueue;
  6. BufferedWriter writer;
  7. public ResultRunner(LinkedBlockingQueue<String> resultQueue,
  8. BufferedWriter writer) {
  9. super();
  10. this.resultQueue = resultQueue;
  11. this.writer = writer;
  12. }
  13. @Override
  14. public void run() {
  15. String result = null;
  16. try {
  17. while ((result = resultQueue.take()) != null) {
  18. writer.write(result);
  19. writer.newLine();
  20. writer.flush();
  21. }
  22. } catch (Exception e) {
  23. e.printStackTrace();
  24. }
  25. }
  26. }

 

四、总结

1. 第一版程序采用单线程处理,性能很差,每100个域名大概需要90s左右,主要原因是网络IO延迟比较大。将代码修改为多线程处理,创建两个检测线程,每100个域名大概需要30s左右。

2. 提高检测线程数会加快处理性能,但建议不超过三个,原因有两个:

1) 万网采用了阿里云的过滤技术,如果一段时间内某个IP的请求数很高,就会将该IP加入屏蔽列表。 我开始采用了100个线程,不到1分钟就被屏蔽。

2)当请求数很高时,网络连接不能得到及时释放,很多TCP连接处于TIME_WAIT状态,进而出现BindException错误。

3. 我遍历了所有的双拼域名,目前约有1万个域名尚未被注册,结果见附件。我又遍历了四位及以下的纯英文字母域名,已经全部被注册。

需要注册双拼域名的童鞋要抓紧了~~

猜你喜欢

转载自www.cnblogs.com/jpfss/p/10071938.html