Reptile base class

Self-encapsulated crawler base class.


public interface TaskBaseInfo {
	
	/**
	 * Returns the name of the task.
	 * <br/>
	 * Generally used as log output
	 * @return
	 */
	String taskName();

	/**
	 * Return the unique code of the task
	 * @return Code value that is not repeated in the entire crawler project
	 */
	String taskCode();
}



import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;

import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import redis.clients.jedis.JedisCluster;

public interface TaskStringCache {

	Logger logger = LoggerFactory.getLogger(TaskStringCache.class);
	
	String BASE_FILE_PATH = "/mfs/ShareFile/static/cms/crawler/cache/";
	
	JedisCluster obtainJedisCluster();
	
	String getCacheStr(String taskCode, String cacheKey);

	void setCacheStr(String taskCode, String cacheKey, int cacheSeconds, String cacheStr);
	
	
	
	default String obtainTargetFilePath(String taskCode, String cacheKey) {
		
		return BASE_FILE_PATH + taskCode + File.pathSeparator + cacheKey + ".properties";
	}
	
	/**
	 * Set the default method for caching
	 * @param taskName task Chinese name, log use
	 * @param taskCode task code must be unique
	 * @param cacheKey cache key
	 * @param cacheStr cached value
	 */
    default void defaultSetCacheStr(String taskName, String taskCode, String cacheKey, int cacheSeconds, String cacheStr) {

        JedisCluster jedisCluster = obtainJedisCluster();
        jedisCluster.setex(cacheKey, cacheSeconds, cacheStr);
        String targetFilePath = obtainTargetFilePath(taskCode, cacheKey);
        save2FileAtomic(taskName, targetFilePath, cacheStr);
    }

    /**
     * Get the cache set by [default method of setting cache] {@link #defaultSetCacheStr(String, String, String, String)}
     * @param taskName task Chinese name, log use
     * @param taskCode task code must be unique
     * @param cacheKey cache key
     * @return
     */
    default String defaultGetCacheStr(String taskName, String taskCode, String cacheKey) {

        JedisCluster jedisCluster = obtainJedisCluster();
        String cacheStr = jedisCluster.get(cacheKey);
        if (StringUtils.isNotBlank(cacheStr)) {

            return cacheStr;
        }

        String targetFilePath = obtainTargetFilePath(taskCode, cacheKey);
        try {
        	// It doesn't take advantage of a lot of asynchrony, and getting the result immediately after executing the asynchronous operation will still block
            cacheStr = readFile(targetFilePath).get();
        } catch (InterruptedException | ExecutionException e) {
			
        	logger.error("[" + taskName + "] Failed to get the file cache content asynchronously. taskCode=>" + "[" + taskCode + "]" + " cacheKey=>" + "[" + cacheKey + "] ");
        	logger.error(e.getMessage());
		}

        return cacheStr;
    }

    /**
     * Persistently crawled cursor IDs through files to avoid adding fields to the data
     * File write operations are slow and executed asynchronously
     * Atomic operation to avoid concurrency problems of writing and reading
     *
     * @param filePath
     * @return
     */
    default void save2FileAtomic(String taskName, String filePath, String content) {

        CompletableFuture.runAsync(() -> {

			File tmpFile = new File(filePath + ".tmp");
			try {

				if (tmpFile.exists() == false) {

					tmpFile.getParentFile().mkdirs();
					tmpFile.createNewFile();
				}
				try (FileWriter fw = new FileWriter(tmpFile)) {

					fw.write(content);
					fw.flush();
				}

			} catch (IOException e) {

				logger.error("[" + taskName + "] => write cache string to file => [" + tmpFile + "] exception \n" + e.getMessage());
				logger.error("[" + taskName + "] file write operation exited");
				if (tmpFile.exists()) {
					tmpFile.delete();
				}
				return;
			}

			if (tmpFile.exists() == false) {

				return;
			}
            
            // This comment is for the Windows system already has a target file in the same file system and under the same drive letter;
            // The following renameTo operation will fail, resulting in an infinite recursive call and a [stack overflow] exception
            // In the case of Linux running, it can be commented out temporarily, and the test will go online after no problem
            // start of comment
//			File destFile = new File(filePath);
//			if (destFile.exists()) {
//				destFile.delete();
//			}
            // end of comment

            if (tmpFile.renameTo(new File(filePath))) {

                tmpFile.delete();
            } else {

            	logger.error("move fails filePath:" + filePath);
                tmpFile.delete();
                this.save2FileAtomic(taskName, filePath, content);
                // When testing under a Linux distribution, if the renameTo operation is wrong, atomic operations are not required.
                // You can comment out the atomic operation above and replace it with the following operation
//				save2File(filePath, content);
            }
        });
    }

//	default void save2File(String filePath, String content) throws IOException {
//		
//		try (FileWriter fw = new FileWriter(new File(filePath))) {
//
//			fw.write(content);
//			fw.flush();
//		}
//	}

    /**
     * Read the file content asynchronously
     *
     * @param filePath
     * @return
     * @throws IOException
     * @throws FileNotFoundException
     */
	default CompletableFuture<String> readFile(String filePath) {

		return CompletableFuture.supplyAsync(() -> {

			StringBuilder strb = new StringBuilder();
			try (FileInputStream fis = new FileInputStream(filePath);
					BufferedReader inReader = new BufferedReader(new InputStreamReader(fis));) {

				String line = StringUtils.EMPTY;
				while ((line = inReader.readLine()) != null) {
					strb.append(line);
				}
			} catch (IOException e) {

				logger.error(e.getMessage());
				return StringUtils.EMPTY;
			}

			return strb.toString();
		});
	}
}



public interface BasicTask {

	void run();
}



import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.BeansException;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware;

import com.xxx.zx.crawler.basic.BasicTask;

public abstract class BaseCrawlerTask implements TaskBaseInfo, TaskStringCache, BasicTask, ApplicationContextAware {

	protected final Logger logger = LoggerFactory.getLogger(getClass());
	
	protected static ApplicationContext ac;
	
	@Override
	public void setApplicationContext(ApplicationContext applicationContext) throws BeansException {
		
		ac = applicationContext;
	}
	
	public synchronized static <T> T getBean(Class<T> beanClass) {
		
		return ac.getBean(beanClass);
	}
	
	public synchronized static Object getBean(String beanName) {
		
        return ac.getBean (beanName);
    }  
	
	@Override
	public String getCacheStr(String taskCode, String cacheKey) {
		
		return defaultGetCacheStr(taskName(), taskCode, cacheKey);
	}

	@Override
	public void setCacheStr(String taskCode, String cacheKey, int cacheSeconds, String cacheStr) {
		
		defaultSetCacheStr(taskName(), taskCode, cacheKey, cacheSeconds, cacheStr);
	}
}

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=326262404&siteId=291194637