package com.alibaba.odps.tunnel.samples; import java.io.BufferedReader; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import com.alibaba.odps.tunnel.Column; import com.alibaba.odps.tunnel.Configuration; import com.alibaba.odps.tunnel.DataTunnel; import com.alibaba.odps.tunnel.RecordSchema; import com.alibaba.odps.tunnel.Upload; import com.alibaba.odps.tunnel.io.Record; import com.alibaba.odps.tunnel.io.RecordWriter; public class FileUploadSample { private static String endpoint = "http://dt.odps.aliyun.com"; // 公网 // private static String endpoint = "http://dt-ext.odps.aliyun-inc.com"; private static String accessId = "######"; private static String accessKey = "#######"; private static String project = "######"; private static SimpleDateFormat sdf = new SimpleDateFormat( "yyyy-MM-dd HH:mm:ss"); public static void UploadFile(String fileName, String tableName, String pid) throws Exception { Configuration cfg = new Configuration(accessId, accessKey, endpoint); DataTunnel tunnel = new DataTunnel(cfg); BufferedReader br = null; try { String line = ""; Long blockid = (long) 0; ArrayList<Long> blocks = new ArrayList<Long>(); blocks.add(blockid); br = new BufferedReader(new InputStreamReader(new FileInputStream( fileName), "gbk")); Upload up = tunnel.createUpload(project, tableName, pid); RecordSchema schema = up.getSchema(); Record r = new Record(schema.getColumnCount()); RecordWriter writer = up.openRecordWriter(blockid); final long MAX_BYTE_WRITER = 50 * 1024 * 1024 * 1024L; long max_byte_curr = MAX_BYTE_WRITER; while ((line = br.readLine()) != null) { if (line.length() == 0) { continue; } String[] as = line.split(",", -1); try { for (int i = 0; i < schema.getColumnCount(); i++) { Column.Type t = schema.getColumnType(i); String a = as[i]; if ((a.length() == 0) && (t != Column.Type.ODPS_STRING)) { switch (t) { case ODPS_BIGINT: r.setBigint(i, 0L); break; case ODPS_DOUBLE: r.setDouble(i, 0.0); break; case ODPS_DATETIME: r.setDatetime(i, new Date()); break; case ODPS_BOOLEAN: r.setBoolean(i, false); break; default: break; } } else switch (t) { case ODPS_STRING: r.setString(i, a); break; case ODPS_BIGINT: r.setBigint(i, Long.parseLong(a)); break; case ODPS_DOUBLE: r.setDouble(i, Double.parseDouble(a)); break; case ODPS_DATETIME: r.setDatetime(i, (Date) sdf.parse(a)); break; case ODPS_BOOLEAN: r.setBoolean(i, Boolean.parseBoolean(a)); break; default: break; } } } catch (ParseException e) { // 解析错误,忽略本行 } catch (ArrayIndexOutOfBoundsException e) { // 通常原因是少列,忽略本行 } catch (NumberFormatException e) { // 数据类型不匹配,忽略本行 } catch (Exception e) { // 本次上传失败,直接退出 throw e; } // 如果网络不稳定,可能抛出异常。如果发生,则进行重试处理 for (int t = 0; t >= 0; t++) try { writer.write(r); if (t > 0) { System.out.println("write retry suc.retry=" + t); } if (writer.getTotalBytes() > max_byte_curr) { max_byte_curr = writer.getTotalBytes() + MAX_BYTE_WRITER; writer.close(); writer = up.openRecordWriter(++blockid); blocks.add(blockid); } break; } catch (IOException e) { if (t < 10) { Thread.sleep(2000); } else { throw e; } } } writer.close(); // 只有complete成功,数据才会被真正写到ODPS中 up.complete(blocks.toArray(new Long[blocks.size()])); } finally { if (br != null) { br.close(); } } } public static void main(String args[]) { try { UploadFile("C:/hoh.txt", "sale_detail", "pid=201412"); System.out.println("fileupload sucessful"); } catch (Exception e) { // 本次上传失败,所有已经write 成功的数据都会被丢弃。 e.printStackTrace(); } } }
祝你好运!!