Use scala to read a file with only characters. The file is the data that has been cleaned. Regularly, one line is a piece of data. The current requirement is to read part of the file at a time, and read it in sequence.
In the past, the byte stream occasionally appeared garbled, and finally changed to read one line at a time.
import java.io.{BufferedReader, InputStreamReader} import org.apache.hadoop.fs.FSDataInputStream /** * Created by wx on 2017/7/25. */ object ReadHDFSFileByLine { def main(args: Array[String]) { var inputStream: FSDataInputStream = null var bufferedReader: BufferedReader = null try { /* inputStream = HDFSUtil.getFSDataInputStream("hdfs://master:9000/TestData/aviation7/part-00018") bufferedReader = new BufferedReader(new InputStreamReader(inputStream)) var lineTxt: String = null var count = 1 // This is an infinite loop and can't be jumped out. I'm very depressed and just use the following while ((lineTxt= bufferedReader.readLine()) != null) { println(count + "\t" + lineTxt) count += 1 // Let the program pause and sleep for a while Thread.sleep(1) }*/ //Get the input stream of HDFS, you can refer to the previous document inputStream = HDFSUtil.getFSDataInputStream("hdfs://master:9000/TestData/aviation7/part-00018") // Convert to buffer stream bufferedReader = new BufferedReader(new InputStreamReader(inputStream)) // read one line at a time var lineTxt: String = bufferedReader.readLine() var count = 1 while (lineTxt != null) { println(count + "\t" + lineTxt) count += 1 lineTxt = bufferedReader.readLine() // Let the program pause and sleep for a while Thread.sleep(1) } } catch { case e: Exception => e.printStackTrace() } finally { if (bufferedReader != null) { bufferedReader.close() } if (inputStream != null) { HDFSUtil.close } } } }