Under the window, read the file through the source and use BufferedReader instead
to delete the Chinese
object in the file ChineseDrop extends App {
// val stArray = Array("Win more and lose less","abadsf","13123123")
// stArray.foreach ( word => println(s" $word is ${isChinese(word)} "))
//G:\\fromHD\\braveheart\\braveheart.srt
if (args.size < 1) {
println (" input file name "); System.exit(-1)
}
val in = new BufferedReader(new InputStreamReader(new FileInputStream(args(0)), "utf-8"))
val writer = new PrintWriter(new File( args(0) + ".eng"))
var str = in.readLine()
while (str != null) {
println(s" $str is ${isChinese(str)} ")
if (! isChinese(str)) writer.println(str)
str = in.readLine()
}
in.close()
writer.close()
// 根据Unicode编码完美的判断中文汉字和符号
private def isChinese(c: Char): Boolean = {
val ub: Character.UnicodeBlock = Character.UnicodeBlock.of(c)
if ((ub eq Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS)
|| (ub eq Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS)
|| (ub eq Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A)
|| (ub eq Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B)
|| (ub eq Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION)
|| (ub eq Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS)
|| (ub eq Character.UnicodeBlock.GENERAL_PUNCTUATION))
return true
false
}
// Complete judgment of Chinese characters and symbols
def isChinese(strName: String): Boolean = {
strName.toCharArray.foreach(c => if (isChinese(c )) return true)
false
}
}
Source usage For different characters, you can add curry function "utf-8" after fromfile ....
//file read
val file=Source.fromFile("E:\\scalaIO.txt ")
for(line <- file.getLines)
{
println(line)
}
file.close
file io
Guess you like
Origin http://43.154.161.224:23101/article/api/json?id=326490378&siteId=291194637
Recommended
Ranking