1.多线程大文件外加jedis插入数据
package readfile; import java.io.FileNotFoundException; import java.io.IOException; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import org.apache.log4j.Logger; import com.wandoulabs.jodis.JedisResourcePool; import com.wandoulabs.jodis.RoundRobinJedisPool; import redis.clients.jedis.Jedis; import redis.clients.jedis.JedisPoolConfig; public class FileReader { private int threadNum = 3;// 线程数,默认为3 private String filePath;// 文件路径 private int bufSize = 1024;// 缓冲区大小,默认为1024 private DataProcessHandler dataProcessHandler;// 数据处理接口 private ExecutorService threadPool; protected static Logger logger = Logger.getLogger(FileReader.class); // private static Jedis jedis; private static JedisResourcePool jedisPool; private static void initialPool() { JedisPoolConfig i2 = new JedisPoolConfig(); i2.setMaxTotal(-1); i2.setMinIdle(2); i2.setMaxIdle(-1); i2.setMaxWaitMillis(10000); i2.setTestOnBorrow(true); i2.setTestOnReturn(true); jedisPool = new RoundRobinJedisPool("zk ip:2181", 10000, "/zk/codis/db_test/proxy", i2, 10000); } private static synchronized void poolInit() { if (jedisPool == null) { initialPool(); } } public synchronized static Jedis getJedis() { if (jedisPool == null) { poolInit(); } Jedis jedis = null; // try { if (jedisPool != null) { jedis = jedisPool.getResource(); } // } catch (Exception e) { // logger.error("Get jedis error : " + e); // } finally { // returnResource(jedis); // } return jedis; } public static void returnResource(final Jedis jedis) { if (jedis != null && jedisPool != null) { jedis.close();; } } public FileReader(String filePath, int bufSize, int threadNum) { this.threadNum = threadNum; this.bufSize = bufSize; this.filePath = filePath; this.threadPool = Executors.newFixedThreadPool(threadNum); // this.jedis = jedis; } /** * 启动多线程读取文件 */ public void startRead() { FileChannel infile = null; try { @SuppressWarnings("resource") RandomAccessFile raf = new RandomAccessFile(filePath, "r"); infile = raf.getChannel(); long size = infile.size(); long subSize = size / threadNum; for (int i = 0; i < threadNum; i++) { long startIndex = i * subSize; if (size % threadNum > 0 && i == threadNum - 1) { subSize += size % threadNum; } @SuppressWarnings("resource") RandomAccessFile accessFile = new RandomAccessFile(filePath, "r"); FileChannel inch = accessFile.getChannel(); threadPool.execute(new MultiThreadReader(inch, startIndex, subSize)); } threadPool.shutdown(); } catch (FileNotFoundException e1) { e1.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { try { if (infile != null) { infile.close(); } } catch (IOException e) { e.printStackTrace(); } } } /** * 注册数据处理接口 * * @param dataHandler */ public void registerHanlder(DataProcessHandler dataHandler) { this.dataProcessHandler = dataHandler; } /** * 多线程按行读取文件具体实现类 * * @author zyh * */ public class MultiThreadReader implements Runnable { private FileChannel channel; private long startIndex; private long rSize; public MultiThreadReader(FileChannel channel, long startIndex, long rSize) { this.channel = channel; this.startIndex = startIndex > 0 ? startIndex - 1 : startIndex; this.rSize = rSize; } public void run() { readByLine(); } /** * 按行读取文件实现逻辑 * * @return */ public void readByLine() { // JedisPoolConfig i2 = new JedisPoolConfig(); // i2.setMaxTotal(-1); // i2.setMinIdle(2); // i2.setMaxIdle(-1); // i2.setMaxWaitMillis(10000); // i2.setTestOnBorrow(true); // i2.setTestOnReturn(true); // JedisResourcePool i3 = new RoundRobinJedisPool("zk ip:2181", 10000, // "/zk/codis/db_test/proxy", i2, // 10000); Jedis jedis = getJedis(); // Jedis jedis = Single.getPool().getResource(); long start = System.currentTimeMillis(); try { ByteBuffer rbuf = ByteBuffer.allocate(bufSize); channel.position(startIndex);// 设置读取文件的起始位置 long endIndex = startIndex + rSize;// 读取文件数据的结束位置 byte[] temp = new byte[0];// 用来缓存上次读取剩下的部分 int LF = "\n".getBytes()[0];// 换行符 boolean isEnd = false;// 用于判断数据是否读取完 boolean isWholeLine = false;// 用于判断第一行读取到的是否是完整的一行 long lineCount = 0;// 行数统计 long endLineIndex = startIndex;// 当前处理字节所在位置 while (channel.read(rbuf) != -1 && !isEnd) { int position = rbuf.position(); byte[] rbyte = new byte[position]; rbuf.flip(); rbuf.get(rbyte); int startnum = 0;// 每行的起始位置下标,相对于当前所读取到的byte数组 // 判断是否有换行符 // 如果读取到最后一行不是完整的一行时,则继续往后读取直至读取到完整的一行才结束 for (int i = 0; i < rbyte.length; i++) { endLineIndex++; if (rbyte[i] == LF) {// 若存在换行符 if (channel.position() == startIndex) {// 若改数据片段第一个字节为换行符,说明第一行读取到的是完整的一行 isWholeLine = true; startnum = i + 1; } else { byte[] line = new byte[temp.length + i - startnum + 1]; System.arraycopy(temp, 0, line, 0, temp.length); System.arraycopy(rbyte, startnum, line, temp.length, i - startnum + 1); startnum = i + 1; lineCount++; temp = new byte[0]; String k = ""; String v = ""; // 处理数据 if (startIndex != 0) {// 如果不是第一个数据段 if (lineCount == 1) { if (isWholeLine) {// 当且仅当第一行为完整行时才处理 String[] kv = dataProcessHandler.process(line); k = kv[0]; v = kv[1]; // System.out.println("键:"+k+",值:"+v); jedis.set(k, v); //jedis.del(k); //jedis.get(k); } } else { String[] kv = dataProcessHandler.process(line); k = kv[0]; v = kv[1]; // System.out.println("键:"+k+",值:"+v); jedis.set(k, v); // jedis.del(k); // jedis.get(k); } } else { // dataProcessHandler.process(line); String[] kv = dataProcessHandler.process(line); k = kv[0]; v = kv[1]; // System.out.println("键:"+k+",值:"+v); jedis.set(k, v); //jedis.del(k); // jedis.get(k); } // 结束读取的判断 if (endLineIndex >= endIndex) { isEnd = true; break; } } } } if (!isEnd && startnum < rbyte.length) {// 说明rbyte最后还剩不完整的一行 byte[] temp2 = new byte[temp.length + rbyte.length - startnum]; System.arraycopy(temp, 0, temp2, 0, temp.length); System.arraycopy(rbyte, startnum, temp2, temp.length, rbyte.length - startnum); temp = temp2; } rbuf.clear(); } // 兼容最后一行没有换行的情况 if (temp.length > 0) { if (dataProcessHandler != null) { dataProcessHandler.process(temp); } } } catch (IOException e) { e.printStackTrace(); } finally { try { channel.close(); } catch (IOException e) { e.printStackTrace(); } } long end = System.currentTimeMillis(); System.out.println("查询单个文件" + "共耗时:::" + (end - start) * 1.0 / 1000 + "s"); jedis.close(); } } public int getThreadNum() { return threadNum; } public String getFilePath() { return filePath; } public ExecutorService getThreadPool() { return threadPool; } public int getBufSize() { return bufSize; } }2.辅助类:文件内容有bytes[]转为String
package readfile; import java.io.UnsupportedEncodingException; public class FileLineDataHandler implements DataProcessHandler { private String encode = "utf-8"; @Override public String[] process(byte[] data) { String[] kv = new String[2]; try { kv[0] = new String(data, encode).toString().split("H")[0]; kv[1] = new String(data, encode).toString().split("H")[1]; } catch (UnsupportedEncodingException e) { e.printStackTrace(); } return kv; } }
package readfile; public interface DataProcessHandler { String[] process(byte[] data); //void process(byte[] data); }
3.主类:遍历读取目录下的所有txt文件
package readfile; import java.io.File; import java.util.ArrayList; import java.util.List; public class MultiThreadReadByLine { public static void main(String[] args) { List<File> filePathsList = new ArrayList<File>(); //File f = new File("C:\\Users\\test-wangkezhou\\Desktop\\test"); //File f = new File("/home/hadoop/test"); File f = new File(args[0]); File[] filePaths = f.listFiles(); for (File s : filePaths) { if (-1 != s.getName().lastIndexOf(".txt")) { filePathsList.add(s); System.out.println(s.getName()); } } int index = 0; while (index < filePathsList.size()) { if (index >= filePathsList.size()) { continue; } //FileReader fileReader = new FileReader( "C:\\Users\\test-wangkezhou\\Desktop\\test\\"+filePathsList.get(index).getName(), 1024, 10); //FileReader fileReader = new FileReader( "/home/hadoop/test"+filePathsList.get(index).getName(), 1024, 10); //目录 缓存大小 线程数 FileReader fileReader = new FileReader( args[0]+filePathsList.get(index).getName(), Integer.parseInt(args[1]), Integer.parseInt(args[2])); fileReader.registerHanlder(new FileLineDataHandler()); fileReader.startRead(); index++; } }
}
4.随机查询类:根据key值有规律,做出的随机查询
package chaxun; import java.util.Random; import com.wandoulabs.jodis.JedisResourcePool; import com.wandoulabs.jodis.RoundRobinJedisPool; import redis.clients.jedis.Jedis; import redis.clients.jedis.JedisPoolConfig; public class RandomGet { private static JedisResourcePool jedisPool; private static void initialPool() { JedisPoolConfig i2 = new JedisPoolConfig(); i2.setMaxTotal(-1); i2.setMinIdle(2); i2.setMaxIdle(-1); i2.setMaxWaitMillis(10000); i2.setTestOnBorrow(true); i2.setTestOnReturn(true); jedisPool = new RoundRobinJedisPool("10.191.20.224:2181", 10000, "/zk/codis/db_test/proxy", i2, 10000); } private static synchronized void poolInit() { if (jedisPool == null) { initialPool(); } } public synchronized static Jedis getJedis() { if (jedisPool == null) { poolInit(); } Jedis jedis = null; // try { if (jedisPool != null) { jedis = jedisPool.getResource(); } // } catch (Exception e) { // logger.error("Get jedis error : " + e); // } finally { // returnResource(jedis); // } return jedis; } public static void returnResource(final Jedis jedis) { if (jedis != null && jedisPool != null) { jedis.close(); } } public static void main(String[] args) { // 查询条数 int numThread = Integer.parseInt(args[0]); int count = Integer.parseInt(args[1]); RandomGet rg = new RandomGet(); rg.thread(numThread, count); } private void thread(int threadNumber, int OneThreadCount) { int i = 0; for (i = 0; i < threadNumber; i++) { Jedis jedis = getJedis(); Thread i1 = new MyThread(i, OneThreadCount, jedis); i1.start(); } } } class MyThread extends Thread { int OneThreadCount; Jedis _jedis; int num; public MyThread(int i, int OneThreadCount, Jedis i4) { super(); this.num = i; this.OneThreadCount = OneThreadCount; this._jedis = i4; } public void run() { long start = System.currentTimeMillis(); Random k = new Random(); Random v = new Random(); for (int i = 1; i <= OneThreadCount; i++) { _jedis.get(k.nextInt(251) + "K" + v.nextInt(13229912) + "\t"); } _jedis.close(); long end = System.currentTimeMillis(); System.out.println("线程---:::Thread-" + num + ":::查询" + OneThreadCount + "条数据," + "共耗时:::" + (end - start) * 1.0 / 1000 + "s"); } }