1 package test;
2
3 import lombok.extern.slf4j.Slf4j;
4
5 import java.io.BufferedReader;
6 import java.io.FileInputStream;
7 import java.io.IOException;
8 import java.io.InputStreamReader;
9 import java.util.ArrayList;
10 import java.util.HashMap;
11 import java.util.List;
12 import java.util.Map;
13
14 @Slf4j
15 public class CSVFileUtil {
16 /**
17 * CSV文件编码
18 */
19 private static final String ENCODE = "UTF-8";
20
21 /**
22 * 读取CSV文件得到List,默认使用UTF-8编码
23 * @param fileName 文件路径
24 * @return
25 */
26 public static List<String> getLines(String fileName) {
27 return getLines(fileName, ENCODE);
28 }
29
30 /**
31 * 读取CSV文件得到List
32 * @param fileName 文件路径
33 * @param encode 编码
34 * @return
35 */
36 public static List<String> getLines(String fileName, String encode) {
37 List<String> lines = new ArrayList<String>();
38 BufferedReader br = null;
39 InputStreamReader isr = null;
40 FileInputStream fis = null;
41 try {
42 fis = new FileInputStream(fileName);
43 isr = new InputStreamReader(fis, encode);
44 br = new BufferedReader(isr);
45 String line;
46 while ((line = br.readLine()) != null) {
47 StringBuilder sb = new StringBuilder();
48 sb.append(line);
49 boolean readNext = countChar(sb.toString(), '"', 0) % 2 == 1;
50 // 如果双引号是奇数的时候继续读取。考虑有换行的是情况
51 while (readNext) {
52 line = br.readLine();
53 if (line == null) {
54 return null;
55 }
56 sb.append(line);
57 readNext = countChar(sb.toString(), '"', 0) % 2 == 1;
58 }
59 lines.add(sb.toString());
60 }
61 } catch (Exception e) {
62 log.error("Read CSV file failure :{}", e);
63 } finally {
64 try {
65 if (br != null) {
66 br.close();
67 }
68 if (isr != null) {
69 isr.close();
70 }
71 if (fis != null) {
72 fis.close();
73 }
74 } catch (IOException e) {
75 log.error("Close stream failure :{}", e);
76 }
77 }
78 return lines;
79 }
80
81 public static String[] fromCSVLine(String source) {
82 return fromCSVLine(source, 0);
83 }
84
85 /**
86 * 把CSV文件的一行转换成字符串数组。指定数组长度,不够长度的部分设置为null
87 * @param source
88 * @param size
89 * @return
90 */
91 public static String[] fromCSVLine(String source, int size) {
92 List list = fromCSVLineToArray(source);
93 if (size < list.size()) {
94 size = list.size();
95 }
96 String[] arr = new String[size];
97 list.toArray(arr);
98 return arr;
99 }
100
101 public static List fromCSVLineToArray(String source) {
102 if (source == null || source.length() == 0) {
103 return new ArrayList();
104 }
105 int currentPosition = 0;
106 int maxPosition = source.length();
107 int nextComa = 0;
108 List list = new ArrayList();
109 while (currentPosition < maxPosition) {
110 nextComa = nextComma(source, currentPosition);
111 list.add(nextToken(source, currentPosition, nextComa));
112 currentPosition = nextComa + 1;
113 if (currentPosition == maxPosition) {
114 list.add("");
115 }
116 }
117 return list;
118 }
119
120 /**
121 * 把字符串类型的数组转换成一个CSV行。(输出CSV文件的时候用)
122 *
123 * @param arr
124 * @return
125 */
126 public static String toCSVLine(String[] arr) {
127 if (arr == null) {
128 return "";
129 }
130 StringBuilder sb = new StringBuilder();
131 for (int i = 0; i < arr.length; i++) {
132 String item = addQuote(arr[i]);
133 sb.append(item);
134 if (arr.length - 1 != i) {
135 sb.append(",");
136 }
137 }
138 return sb.toString();
139 }
140
141 /**
142 * 将list的第一行作为Map的key,下面的列作为Map的value
143 * @param list
144 * @return
145 */
146 public static List<Map<String, String>> parseList(List<String> list) {
147 List<Map<String, String>> resultList = new ArrayList<Map<String, String>>();
148 String firstLine = list.get(0);
149 String[] fields = firstLine.split(",");
150 for (int i = 1; i < list.size(); i++) {
151 String valueLine = list.get(i);
152 String[] valueItems = CSVFileUtil.fromCSVLine(valueLine);
153 Map<String, String> map = new HashMap<String, String>();
154 for (int j = 0; j < fields.length; j++) {
155 map.put(fields[j], valueItems[j]);
156 }
157 resultList.add(map);
158 }
159 return resultList;
160 }
161
162 /**
163 * 字符串类型的List转换成一个CSV行。(输出CSV文件的时候用)
164 *
165 * @param strArrList
166 * @return
167 */
168 public static String toCSVLine(ArrayList strArrList) {
169 if (strArrList == null) {
170 return "";
171 }
172 String[] strArray = new String[strArrList.size()];
173 for (int idx = 0; idx < strArrList.size(); idx++) {
174 strArray[idx] = (String) strArrList.get(idx);
175 }
176 return toCSVLine(strArray);
177 }
178
179 /**
180 * 计算指定字符的个数
181 *
182 * @param str 文字列
183 * @param c 字符
184 * @param start 开始位置
185 * @return 个数
186 */
187 private static int countChar(String str, char c, int start) {
188 int index = str.indexOf(c, start);
189 return index == -1 ? 0 : countChar(str, c, index + 1) + 1;
190 }
191
192 /**
193 * 查询下一个逗号的位置。
194 *
195 * @param source 文字列
196 * @param st 检索开始位置
197 * @return 下一个逗号的位置。
198 */
199 private static int nextComma(String source, int st) {
200 int maxPosition = source.length();
201 boolean inquote = false;
202 while (st < maxPosition) {
203 char ch = source.charAt(st);
204 if (!inquote && ch == ',') {
205 break;
206 } else if ('"' == ch) {
207 inquote = !inquote;
208 }
209 st++;
210 }
211 return st;
212 }
213
214 /**
215 * 取得下一个字符串
216 *
217 * @param source
218 * @param st
219 * @param nextComma
220 * @return
221 */
222 private static String nextToken(String source, int st, int nextComma) {
223 StringBuilder strb = new StringBuilder();
224 int next = st;
225 while (next < nextComma) {
226 char ch = source.charAt(next++);
227 if (ch == '"') {
228 if ((st + 1 < next && next < nextComma) && (source.charAt(next) == '"')) {
229 strb.append(ch);
230 next++;
231 }
232 } else {
233 strb.append(ch);
234 }
235 }
236 return strb.toString();
237 }
238
239 /**
240 * 在字符串的外侧加双引号。如果该字符串的内部有双引号的话,把"转换成""。
241 *
242 * @param item 字符串
243 * @return 处理过的字符串
244 */
245 private static String addQuote(String item) {
246 if (item == null || item.length() == 0) {
247 return "\"\"";
248 }
249 StringBuilder sb = new StringBuilder();
250 sb.append('"');
251 for (int idx = 0; idx < item.length(); idx++) {
252 char ch = item.charAt(idx);
253 if ('"' == ch) {
254 sb.append("\"\"");
255 } else {
256 sb.append(ch);
257 }
258 }
259 sb.append('"');
260 return sb.toString();
261 }
262 }
1 package test;
2
3 import com.alibaba.fastjson.JSONObject;
4 import lombok.extern.slf4j.Slf4j;
5
6 import java.util.List;
7 import java.util.Map;
8
9 @Slf4j
10 public class CsvTest {
11 public static void main(String[] args) {
12 List<String> lines = CSVFileUtil.getLines("d:/智能问答.csv", "UTF-8");
13 List<Map<String, String>> mapList = CSVFileUtil.parseList(lines);
14 System.out.println(Arrays.toString(mapList.toArray()));
15 }
16
17 }