从文件读东西,写回去,字符串处理

import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

import org.apache.commons.lang.StringUtils;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * @author shiqil.liu
 * @date 2019-08-02 19:17
 */
public class BB {
    public static void main(String[] args) {
        String str1 = "abasdf";
        String str2 = "asdasd1(dfgdfg2)(asdsad3)(fdgfdg4)";
        String str3 = "asdasd(sdfdsf(dsfdsfdsf(666)(dsfdsfds)))";
        String str4 = "asdasd(dfgdfg(asdsad)fdgfdg)";
        String str5 = "asdasd(sdfdsf(dsfdsfdsf(dsfdsfd(abc))))";
        String str6 = "asdasd(dfgdfg)(sad)(aafdgfdg)";
        String str7 = "asdasd(sdfdsf(dsfdsfdsf(dsfdsfds)))";
        String str8 = "asdasd(dfgdfg(asdsad)fdgfdg)";
        String str9 = "asdasd(sdfdsf(dsfdsfdsf(dsfdsfds())))";
        String strr = "asdasd(dfgdfg)()";
        /*List<String> dealed = dealUsedNames(str3);
        dealed.forEach(r -> {
            System.out.println(r);
        });*/

        readByFile("D:\\usedNamesAll.txt","D:\\usedNamesLog.txt");
        //readByFile("D:\\usedNamesBetaAll.txt","D:\\usedNamesBetaLog.txt");
        //testOutPut("D:\\outputAll.txt");

    }

    public static void testOutPut(String p1) {
        try (FileReader r1 = new FileReader(p1);
             BufferedReader br1 = new BufferedReader(r1)) {

            for(int i=0;i<10;i++) {
                String str = br1.readLine();
                System.out.println(str);
                System.out.println("-------------------");
            }

        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static Map<String,List<String>> readByFile(String p1,String p2) {
        HashMap<String, List<String>> map = Maps.newHashMap();
        ArrayList<String> list = Lists.newArrayList();
        File writeName = new File("D:\\outputAll.txt");// 相对路径,如果没有则要建立一个新的output.txt文件
        File errorFile = new File("D:\\errorFile.txt");
        try {
            writeName.createNewFile();
            errorFile.createNewFile();
        }catch (Exception e) {
            e.printStackTrace();
        }

        try (FileReader r1 = new FileReader(p1);
             BufferedReader br1 = new BufferedReader(r1);//原来attrs里面的usedNmaes
             FileReader r2 = new FileReader(p2);
             BufferedReader br2 = new BufferedReader(r2);//log中记录的曾用名
             FileWriter writer = new FileWriter(writeName);
             BufferedWriter out = new BufferedWriter(writer);
             FileWriter writer2 = new FileWriter(errorFile);
             BufferedWriter out2 = new BufferedWriter(writer2)){

            String readLine;
            Map<String,String> oldMaps = Maps.newHashMap();

            while ((readLine = br1.readLine()) != null) {
                // 一次读入一行数据
                List<String> list1 = Splitter.on('\t').trimResults().omitEmptyStrings().splitToList(readLine);//这个list里装的是seq和a||b
                oldMaps.put(list1.get(0),list1.get(1));
                String seq = list1.get(0);
                List<String> listTemp = Splitter.on("||").omitEmptyStrings().trimResults().splitToList(list1.get(1));//这个list里装的是a(b)(c)

                List<String> listAll = Lists.newArrayList();//这个list各个处理后的String(还没去重)
                listTemp.forEach( s1 -> {
                    List<String> list2 = dealUsedNames(s1);
                    listAll.addAll(list2);

                });
                map.put(seq,listAll);
            }

             while ((readLine = br2.readLine()) != null) {
                // 一次读入一行数据
                List<String> list1 = Splitter.on('\t').trimResults().omitEmptyStrings().splitToList(readLine);//这个list里装的是seq和曾用名(一个)
                String seq = list1.get(0);
                String usedStr = list1.get(1);
                if(StringUtils.isNotBlank(usedStr)) {
                    List<String> list2 = map.get(seq);
                    if(list2 == null || list2.isEmpty()) {
                        map.put(seq,Lists.newArrayList(usedStr));
                    } else {
                        list2.add(usedStr);
                    }
                }

            }
            int errorNum1 = 0;
            int errorNum2 = 0;
            long count = 0;
            long count1 = 0;
            long count2 = 0;
            long count3 = 0;

            for(Map.Entry<String,List<String>> entry: map.entrySet()) {
                List<String> list3 = deleteRepeatCharMark(entry.getKey(),entry.getValue());
                count += list3.size();
                String join = Joiner.on("||").join(list3);
                if(join.contains("\\"))
                    count2++;
                if(join.contains("''"))
                    count1++;
                join = join.replaceAll("'","''");

                if(StringUtils.equals(join,oldMaps.get(entry.getKey()))) {
                    count3++;
                    continue;
                }

                out.write("update hotel_info set attrs = attrs || hstore('usedNames','"+join+"'),online_status=2,last_mod=now() where hotel_seq='"+entry.getKey()+"';"+"\r\n");
            }

            out.flush();
            out2.flush();
            System.out.println("error1:"+errorNum1);
            System.out.println("error2:"+errorNum2);
            System.out.println(count1);
            System.out.println(count2);
            System.out.println(count3);
            System.out.println(count);

        } catch (Exception e) {
            e.printStackTrace();
        }

        return null;
    }

    public static List<String> dealOthers(String str) {
        String reg1 = "^([^(^)]+)(\\([^(^)]+\\))*?$";
        String reg2 = "([^(^)]+)(\\([^(^)]+\\))*?";

        Matcher matcher = Pattern.compile(reg1).matcher(str);
        if (matcher.find()) {
            ArrayList<String> list = Lists.newArrayList();
            matcher = Pattern.compile(reg2).matcher(str);
            while (matcher.find()) {
                list.add(matcher.group());
            }
            return list;
        }
        return Collections.emptyList();

    }

    public static List<String> dealUsedNames(String str) {
        if (StringUtils.isBlank(str)) {
            return Collections.emptyList();
        }
        ArrayList<String> list = Lists.newArrayList();
        //将中文的括号转换成英文的,去掉反斜杠
        str = str.replaceAll("(", "(");
        str = str.replaceAll(")", ")");

        //判断左右括号数量是否相同
        String strtemp1 = str.replaceAll("\\(", "");
        String strtemp2 = str.replaceAll("\\)", "");
        if (strtemp1.length() != strtemp2.length()) {
            return Collections.emptyList();
        }
        //1.直接就是个字符串
        if (str.indexOf('(', 0) == -1) {
            list.add(str);
            return list;
        }
        try {
            if (str.charAt(str.length() - 2) == ')') {
                //此时必须是嵌套结构
                return dealNest(str);
            } else {
                //此时必须是A(b) 或者A(B)(C)这种
                return dealOthers(str);
            }
        } catch (Exception e) {
            return Collections.emptyList();
        }


    }

    public static List<String> dealNest(String str) {
        ArrayList<String> list = Lists.newArrayList();
        int begin = 0;
        int endRight;
        int num = 0;//用来比较匹配之后左右括号是否相等
        int endLeft = str.indexOf('(', begin);
        list.add(str.substring(begin, endLeft));
        begin = endLeft + 1;
        while ((endLeft = str.indexOf('(', begin)) != -1) {
            list.add(str.substring(begin, endLeft));
            begin = endLeft + 1;
            num++;
        }
        endRight = str.indexOf(')', begin);
        list.add(str.substring(begin, endRight));
        begin = endRight + 1;
        if (begin + num != str.length())
            return Collections.emptyList();
        if (list.contains("")) {
            return Collections.emptyList();
        }
        return list;
    }
    public static List<String> deleteRepeatCharMark(String sss,List<String> list) {
        LinkedHashMap<String, String> map = Maps.newLinkedHashMap();
        for(String s:list) {
            s = s.trim();
            if(s.length() <= 1) {
                continue;
            }
            if(StringUtils.isBlank(map.get(s.toLowerCase()))) {
                map.put(s.toLowerCase(),s);
            }
        }
        int temp1 = 10;
        int temp2 = 10;
        LinkedList<String> newList = Lists.newLinkedList();
        for(Map.Entry<String,String> entry:map.entrySet()) {
            String str = entry.getValue();

            str = str.replaceAll("\\\\","");
            if(str.contains("''")) {
                continue;
            }
            if(StringUtils.isNotBlank(str)) {
                if(str.charAt(0) == '\'' && str.charAt(str.length()-1) == '\'') {
                    str = str.substring(1,str.length()-1);
                }
            }
            newList.add(str);
        }
        return newList;
    }
}
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.Lists;

import org.apache.commons.lang.StringUtils;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.List;

/**
 * @Version 1.0
 * @Author shiqil.liu
 * @Date 2019/5/11 22:44
 */

public class CC {
    public static void main(String[] args) {
        String p1 = "D:\\NamesAll3.txt";
        File writeName = new File("D:\\outputAll3.txt");// 相对路径,如果没有则要建立一个新的output.txt文件
        try (FileReader r1 = new FileReader(p1);
             FileWriter writer = new FileWriter(writeName);
             BufferedWriter out = new BufferedWriter(writer);
             BufferedReader br1 = new BufferedReader(r1)) {
            String readLine;

            int count = 0;
            int count2 = 0;
            int count3 = 0;
            int count4 = 0;
            while ((readLine = br1.readLine()) != null) {
                // 一次读入一行数据
                List<String> list1 = Splitter.on('\t').trimResults().omitEmptyStrings().splitToList(readLine);

                String s1 = list1.get(0);//seq
                String s2 = list1.get(1);//cym
                String s3 = list1.get(2);//zwm
                String s3low = s3.toLowerCase();
                String s4 = "";
                if(list1.size() != 3) {
                    s4 = list1.get(3);
                }

                if(s4.equals("\\N")) {
                    s4 = "";
                }
                String s4low = s4.toLowerCase();
                List<String> list = Lists.newArrayList(Splitter.on("||").omitEmptyStrings().trimResults().splitToList(s2));
                list.removeIf(s ->
                    StringUtils.equals(s.toLowerCase(),s3low) || StringUtils.equals(s.toLowerCase(),s4low)
                );

                if(list.size() > 20) {
                    //System.out.println(s1);
                    list = list.subList(list.size()-20,list.size());
                }

/*                if(list.size() > 20) {
                    count2++;
                }*/


                String usedNames = Joiner.on("||").join(list);
                if(StringUtils.equals(s2,usedNames)) {
                    continue;
                }
                /*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!注意注释的内容和实际内容变化*/
                if(StringUtils.isBlank(usedNames)) {
                    count2++;
                    //out.write("update hotel_info set attrs = delete(attrs,'usedNames'),online_status=2,last_mod=now() where hotel_seq='"+s1+"';"+"\r\n");
                    out.write("update hotel_info set attrs = delete(attrs,'usedNames') where hotel_seq='"+s1+"';"+"\r\n");
                } else {
                    count3++;
                    usedNames = usedNames.replaceAll("'","''");
                    //out.write("update hotel_info set attrs = attrs || hstore('usedNames','"+usedNames+"'),online_status=2,last_mod=now() where hotel_seq='"+s1+"';"+"\r\n");
                    out.write("update hotel_info set attrs = attrs || hstore('usedNames','"+usedNames+"') where hotel_seq='"+s1+"';"+"\r\n");
                }
            }
            System.out.println(count);
            System.out.println(count2);
            System.out.println(count3);
            System.out.println(count4);

        } catch (Exception e) {
            e.printStackTrace();
        }



    }
}

猜你喜欢

转载自www.cnblogs.com/TheQi/p/11390717.html