=======先介绍下BOM============== Bytes Encoding Form EF BB BF UTF-8 FF FE UTF-16 aka UCS-2, little endian FE FF UTF-16 aka UCS-2, big endian 00 00 FF FE UTF-32 aka UCS-4, little endian. 00 00 FE FF UTF-32 aka UCS-4, big-endian. ======================= 读取 unicode csv 文件
function fopen_utf8($filename){ $encoding=''; $handle = fopen($filename, 'r'); $bom = fread($handle, 2); // fclose($handle); rewind($handle); if($bom === chr(0xff).chr(0xfe) || $bom === chr(0xfe).chr(0xff)){ // UTF16 Byte Order Mark present $encoding = 'UTF-16'; } else { $file_sample = fread($handle, 1000) + 'e'; //read first 1000 bytes // + e is a workaround for mb_string bug rewind($handle); $encoding = mb_detect_encoding($file_sample , 'UTF-8, UTF-7, ASCII, EUC-JP,SJIS, eucJP-win, SJIS-win, JIS, ISO-2022-JP'); } if ($encoding){ stream_filter_append($handle, 'convert.iconv.'.$encoding.'/UTF-8'); } return ($handle); }
生成 unicode csv (此php文件一定要是无BOM的UTF-8编码文件)
$content=iconv("UTF-8","UTF-16LE",$content); $content = "\xFF\xFE".$content; //添加BOM header("Content-type: text/csv;charset=UTF-16LE") ; header("Content-Disposition: attachment; filename=test.csv"); 再介绍一个 操作 ANSI 编码 以 "," 隔开的 操作类 参考:<a href="http://www.jb51.net/article/21416.htm">http://www.jb51.net/article/21416.htm</a>
<?php // Unicode BOM is U+FEFF, but after encoded, it will look like this. define ('UTF32_BIG_ENDIAN_BOM' , chr(0x00) . chr(0x00) . chr(0xFE) . chr(0xFF)); define ('UTF32_LITTLE_ENDIAN_BOM', chr(0xFF) . chr(0xFE) . chr(0x00) . chr(0x00)); define ('UTF16_BIG_ENDIAN_BOM' , chr(0xFE) . chr(0xFF)); define ('UTF16_LITTLE_ENDIAN_BOM', chr(0xFF) . chr(0xFE)); define ('UTF8_BOM' , chr(0xEF) . chr(0xBB) . chr(0xBF)); function detect_utf_encoding($filename) { $text = file_get_contents($filename); $first2 = substr($text, 0, 2); $first3 = substr($text, 0, 3); $first4 = substr($text, 0, 3); if ($first3 == UTF8_BOM) return 'UTF-8'; elseif ($first4 == UTF32_BIG_ENDIAN_BOM) return 'UTF-32BE'; elseif ($first4 == UTF32_LITTLE_ENDIAN_BOM) return 'UTF-32LE'; elseif ($first2 == UTF16_BIG_ENDIAN_BOM) return 'UTF-16BE'; elseif ($first2 == UTF16_LITTLE_ENDIAN_BOM) return 'UTF-16LE'; } ?>