csv文件:
1、每行数据集各元素以逗号间隔
2、若单元素中有逗号,则整个元素以双引号括起来为一个单元素
3、若双引号元素中又有双引号,则需要转义双引号为 \"
以clisp为例实现 csv 文件读取,实现如下:
read-csv-file: 可以直接读取一个csv文件,并以列表形式返回数据。
split-csv-line: 可以处理单行数据集,并返回列表形式数据集;所有元素都默认为string
;;;; WHJ.20180827 read csv file
;;;; WHJ.20180831 完善
(defpackage :WHJ.CSV
(:use :cl :charset :cl-ppcre)
(:export
:split-csv-line
:read-csv-file))
(in-package :WHJ.CSV)
(defun next-f1 (str str-len idx)
(let ((s (search "," str :start2 idx)))
(if s s str-len)))
(defun next-end (str slen idx)
(unless (< idx slen) (return-from next-end slen))
(if (char= (char str idx) #\");以"开头
(unless
(do* ((i (1+ idx))
(ch (char str i) (char str i))
(nextch (if (< (1+ i) slen) (char str (1+ i)) nil) (if (< (1+ i) slen) (char str (1+ i)) nil)))
((null nextch))
;(format t "i=~a ch=~a nch=~a~%" i ch nextch)
(if (and (char= ch #\") (char= nextch ch))
(incf i 2)
(if (char= ch #\")
(return-from next-end (next-f1 str slen i))
(incf i))))
slen)
(let ((ch (char str idx)));可能为多个空的值,例str = ",,,,"
(if (char= ch #\,)
idx
(next-f1 str slen (1+ idx))))))
(defun split-csv-line (str)
(let ((len (length str)))
(append
(loop for s = 0 then (1+ e)
for e = (next-end str len 0) then (next-end str len s)
while (< s len)
collect (let ((sublen (- e s))
(substr (subseq str s e)))
(if (and (plusp sublen) (char= (char substr 0) #\"))
(setf substr (subseq substr 1 (1- sublen))))
(regex-replace-all "\"\"" substr "\"")))
(if (char= (char str (1- len)) #\,) (list "") nil))))
(defun read-csv-file (file &optional (charset charset:gbk))
(mapcar #'split-csv-line
(with-open-file (in file :external-format charset)
(loop for x = (read-line in nil)
while x
collect x))))