file io

Under the window, read the file through the source and use BufferedReader instead

to delete the Chinese


object in the file ChineseDrop extends App {

  // val stArray = Array("Win more and lose less","abadsf","13123123")
  // stArray.foreach ( word => println(s" $word is ${isChinese(word)} "))
  //G:\\fromHD\\braveheart\\braveheart.srt
  if (args.size < 1) {
    println (" input file name "); System.exit(-1)
  }

  val in = new BufferedReader(new InputStreamReader(new FileInputStream(args(0)), "utf-8"))
  val writer = new PrintWriter(new File( args(0) + ".eng"))

  var str = in.readLine()
  while (str != null) {
    println(s" $str is ${isChinese(str)} ")
    if (! isChinese(str)) writer.println(str)
    str = in.readLine()
  }

  in.close()
  writer.close()


  // 根据Unicode编码完美的判断中文汉字和符号
  private def isChinese(c: Char): Boolean = {
    val ub: Character.UnicodeBlock = Character.UnicodeBlock.of(c)
    if ((ub eq Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS)
      || (ub eq Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS)
      || (ub eq Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A)
      || (ub eq Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B)
      || (ub eq Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION)
      || (ub eq Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS)
      || (ub eq Character.UnicodeBlock.GENERAL_PUNCTUATION))
      return true
    false
  }

  // Complete judgment of Chinese characters and symbols
  def isChinese(strName: String): Boolean = {
    strName.toCharArray.foreach(c => if (isChinese(c )) return true)
    false
  }


}




Source usage For different characters, you can add curry function "utf-8" after fromfile ....

//file read
    val file=Source.fromFile("E:\\scalaIO.txt ")
    for(line <- file.getLines)
    {
      println(line)
    }
    file.close


Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=326490378&siteId=291194637