读写文件编码问题

FileWriter和FileReader确实使用系统当前默认的编码方式;
FileWriter和FileReader都不支持通过参数指定编码方式,而OutputStreamWriter和InputStreamReader可以。这两个类从名字上就可以看到是字节流和字符流的组合,实际上也是连接两者的桥梁    

//Writing to a File , encoding is utf-8
public static void writeToFileUTF8(String str, String fileName) throws IOException {
        File f = new File(fileName);   
        if (!f.exists()) {   
            f.createNewFile();   
        }   
        BufferedWriter writer=new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f),"UTF-8"));  
        writer.write(str);
        writer.close();
    }
    
    // Appending to a File , encoding is utf-8
public static void appendToFileUTF8(String str, String fileName) throws IOException {
        File f = new File(fileName);   
        if (!f.exists()) {   
            f.createNewFile();   
        }   
        BufferedWriter writer=new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f, true),"UTF-8"));  
        writer.write(str);
        writer.close();
    }  

private static Map<String,String> readFile(String fileName) throws Exception{
        File file = new File(fileName);
        Map<String,String> contentMap = new HashMap<String, String>();
        BufferedReader reader = null;
        try {
            String enCode = getFileCharacterEncode(fileName);
            reader = new BufferedReader(new InputStreamReader(new FileInputStream(file),enCode));
            String tempString = null;
            StringBuffer sb = new StringBuffer();
            while ((tempString = reader.readLine()) != null) {
                //...
            }
            if(sb != null){
                contentMap.put("其他", sb.toString());
            }
            reader.close();
        } catch (IOException e) {
            log.error("read file error , file name is : " + fileName,e);
            throw new Exception();
        } finally {
            if (reader != null) {
                try {
                    reader.close();
                } catch (IOException e1) {
                }
            }
        }
        return contentMap;
    }

//得到文件的编码格式   utf-8时,有BOM的文件前3个字节为  EF BB BF
private static String getFileCharacterEncode(String fileName) {
        int p = 0;
        try {
            BufferedInputStream bis = new BufferedInputStream(new FileInputStream(fileName));
            p = (bis.read() << 8) + bis.read();
        } catch (IOException e) {
            log.error("get file encode error, file name is : " + fileName , e);
        }
        String code = "";
        switch (p) {
        case 0xefbb:
            code = "UTF-8";
            break;
        case 0xfffe:
            code = "Unicode";
            break;
        case 0xfeff:
            code = "UTF-16BE";
            break;
        default:
            code = "GBK";
        }
        log.info("file encode is : " + code);
        return code;
    }  

猜你喜欢

转载自even-ing.iteye.com/blog/2287702