[记录一] PHP自动识别文本编码并转码生成新文件

重点:1.识别文本编码函数
2.文本内容转码
3.生成新文件
4.获取文件后缀名、文件名

$newDir = "D:/temp/";
$oldDir = "D:/www/";
//获取目录下所有子文件
$filenames = get_filenamesbydir("$oldDir");
//对每个文件识别编码、转码、生成新文件操作
filenamesOption($filenames, $arr);
//创建目录
function createDir($PATH) {
	if (file_exists($PATH) && is_dir($PATH)) {

	} else {
		mkdir($PATH, 0777, true);
	}
}

function getFileName($WholeFileName) {
	return substr($WholeFileName, strrpos($WholeFileName, '/') + 1);
}

function getNewWholeFile($newDir, $oldDir, $oldWholeFileName) {
	$length = strlen($oldDir);
	return $newDir . substr($oldWholeFileName, $length - 1);
}

function createNewFile($newDir, $oldDir, $oldWholeFileName, $content) {
	$newWholeFileName = getNewWholeFile($newDir, $oldDir, $oldWholeFileName);
	$newFileName = getFileName($newWholeFileName);
	$newFileDir = substr($newWholeFileName, 0, strrpos($newWholeFileName, '/'));
	createDir($newFileDir);
	file_put_contents($newWholeFileName, $content);
	echo detect_encoding($newWholeFileName) . "</br>";
}

function copyOldToNew($newDir, $oldDir, $oldWholeFileName) {
	$newWholeFileName = getNewWholeFile($newDir, $oldDir, $oldWholeFileName);
	$newFileName = getFileName($newWholeFileName);
	$newFileDir = substr($newWholeFileName, 0, strrpos($newWholeFileName, '/'));
	createDir($newFileDir);
	copy($oldWholeFileName, $newWholeFileName);
}

function get_allfiles($path, &$files) {
	if (is_dir($path)) {
		$dp = dir($path);
		while ($file = $dp -> read()) {
			if ($file !== "." && $file !== "..") {
				get_allfiles($path . "/" . $file, $files);
			}
		}
		$dp -> close();
	}
	if (is_file($path)) {
		$files[] = $path;
	}
}

//获取子文件
function get_filenamesbydir($dir) {
	$files = array();
	get_allfiles($dir, $files);
	return $files;
}

//对子文件进行遍历操作
function filenamesOption($filenames) {
	$newDir = "D:/temp/";
	$oldDir = "D:/temp/";
	foreach ($filenames as $value) {
		//echo $value."</br>", PHP_EOL;
		$fileType = substr($value, strrpos($value, '.'));
		if (strcasecmp($fileType, '.php') == 0 || strcasecmp($fileType, '.html') == 0 || strcasecmp($fileType, '.js') == 0) {//strcasecmp() 比较时不区分大小写 如果完全相同 返回0
			$contents = getContents($value);
			createNewFile($newDir, $oldDir, $value, $contents);
		} else {
			copyOldToNew($newDir, $oldDir, $value);
		}
	}
}

//此匹配为模糊匹配
function selectStrNum($str, $match) {
	preg_match_all('#(' . implode('|', $match) . ')#', $str, $wordsFound);
	$wordsFound = array_unique($wordsFound[0]);
	return count($wordsFound);
}

//获取文件转码后的内容
function getContents($file) {
	$str = file_get_contents("$file");
	$encoding = detect_encoding($file);
	//选择文件路径,将整个文件内容读入到一个字符串中
	$str_encoding = mb_convert_encoding($str, 'GBK', "$encoding");
	return $str_encoding;
}

//检测编码格式
function detect_encoding($file) {
	$list = array('GBK', 'UTF-8', 'UTF-16LE', 'UTF-16BE', 'ISO-8859-1');
	$str = file_get_contents($file);
	foreach ($list as $item) {
		$tmp = mb_convert_encoding($str, $item, $item);
		if (md5($tmp) == md5($str)) {
			return $item;
		}
	}
	return null;
}

猜你喜欢

转载自blog.csdn.net/weixin_41050093/article/details/108279571