气象数据
Hadoop权威指南书中的气象数据地址: ftp://ftp.ncdc.noaa.gov/pub/data/gsod/YEAR/gsod_YEAR.tar
其中1901年-1928年的数据是空的,不必下载了。
使用java程序下载
package chapter02.downdata;
public class Main {
public static void main(String[] args) throws Exception {
FileThread file = new FileThread();
for (int i = 1987; i <= 2000; i++) {
file.setYear(new Integer(i).toString());
file.run();
}
}
}
package chapter02.downdata;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
public class FileThread extends Thread {
String year;
public FileThread() {
}
public FileThread(String year) {
this.year = year;
}
public void setYear(String year) {
this.year = year;
}
@Override
public void run() {
InputStream input = null;
FileOutputStream output = null;
try {
URL url = new URL("ftp://ftp.ncdc.noaa.gov/pub/data/gsod/" + year + "/gsod_" + year + ".tar");
input = url.openStream();
output = new FileOutputStream(new File("E:/bigdata/Hadoop/data/gsod/gsod_" + year + ".tar"));
int ls = 0;
byte b[] = new byte[204800];
while ((ls = input.read(b,0,b.length)) > -1){
output.write(b,0,ls);
output.flush();
}
output.flush();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if(output != null) output.close();
} catch (IOException e) {
e.printStackTrace();
}
try {
if(input != null) input.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
shell脚本解压合并
#!/bin/bash
#unzip_merge_file
read x
read y
read filename
for((i=x;i<=y;i++))
do
tar -xvf *${i}.tar
gzip -d *.op.gz
cat *.op >> ${filename}
rm -f *.op
done
https://blog.csdn.net/leibniz_zhang/article/details/80590117
https://blog.csdn.net/weixin_40645816/article/details/82110402