从数据库中读取数据到HDFS

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/qq_28844767/article/details/80491788


package mapreduce.format.dbInputFormat;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * 描述 : 从数据库中读取数据到HDFS 
 *
 */

public class MyDBInputFormatMR {
	
	private static final String driverClass = "com.mysql.jdbc.Driver";
	private static final String dbUrl = "jdbc:mysql://hadoop01:3306/jdbc_test?characterEncoding=utf-8";
	private static final String username = "root";
	private static final String password = "root";

	public static void main(String[] args) throws Exception {
		
//		在本地环境中运行
		Configuration conf = new Configuration();
		
//		在获取job对象之前,一定要先指定数据库的链接信息
		DBConfiguration.configureDB(conf, driverClass, dbUrl, username, password);
		Job job = Job.getInstance(conf);
		
//		不带任何条件,查询所有字段
//		String[] fieldNames = new String[]{"id","name","sex","age","department"};
//		DBInputFormat.setInput(job, Student.class, "student", null,"id",fieldNames);

//		不带任何条件,查询部分字段,注意: 一定要更改Student.class的sql相关方法。sql设置的字段类型一定要和下面的查询字段参数匹配
//		String[] fieldNamesPart = new String[] {"id", "name", "sex"};
//		DBInputFormat.setInput(job, Student.class, "student", null, "id", fieldNamesPart);
		
//		带条件,查询所有字段值
//		String[] fieldNames = new String[] {"id", "name", "sex", "age", "department"};
//		DBInputFormat.setInput(job, Student.class, "student", "department = \"cc\"", "id", fieldNames);
		
		
		//自定义SQL语句
		String inputQuery = "select id, name, sex, age, department from student where age > 18";
		String inputCountQuery = "select count(*) from student where age > 18 ";
		DBInputFormat.setInput(job, Student.class, inputQuery, inputCountQuery);
		
	
		
		
		job.setJarByClass(MyDBInputFormatMR.class);
		job.setMapperClass(MyDBInputFormatMRMapper.class);
		job.setOutputKeyClass(NullWritable.class);
		job.setOutputValueClass(Student.class);
		
		job.setNumReduceTasks(0);
		
		job.setInputFormatClass(DBInputFormat.class);
		
		String path = "D:\\bigdata";
		FileSystem fs = FileSystem.get(conf);
		Path p = new Path(path);
		if(fs.exists(p)){
			fs.delete(p,true);
			System.out.println("输出路径存在,已删除!");
		}
		FileOutputFormat.setOutputPath(job,p);
		System.exit(job.waitForCompletion(true) ? 0 : 1);
		
	}
	
	static class MyDBInputFormatMRMapper extends Mapper<LongWritable, Student, NullWritable, Student>{

		@Override
		protected void map(LongWritable key, Student value,Context context)
				throws IOException, InterruptedException {
			
			context.write(NullWritable.get(), value);
		}
			
	}
	
}

自定义封装从数据库读取的一条记录成为一个对应的java对象

实现WritableComparable<Student> , DBWritable:

package mapreduce.format.dbInputFormat;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;

/**
 * 描述 : 此类的作用就是用来自定义封装从数据库读取的一条记录成为一个对应的java对象
 * 提供了序列化和反序列化方法
 * 提供了sql语句的设置参数方法和结果集检索的对象封装方法
 */

public class Student implements WritableComparable<Student> , DBWritable{
	private int id;
	private String name;
	private String sex;
	private int age;
	private String department;
	public Student(int id, String name, String sex, int age, String department) {
		super();
		this.id = id;
		this.name = name;
		this.sex = sex;
		this.age = age;
		this.department = department;
	}
	public Student() {
		super();
	}
	public int getId() {
		return id;
	}
	public void setId(int id) {
		this.id = id;
	}
	public String getName() {
		return name;
	}
	public void setName(String name) {
		this.name = name;
	}
	public String getSex() {
		return sex;
	}
	public void setSex(String sex) {
		this.sex = sex;
	}
	public int getAge() {
		return age;
	}
	public void setAge(int age) {
		this.age = age;
	}
	public String getDepartment() {
		return department;
	}
	public void setDepartment(String department) {
		this.department = department;
	}
	@Override
	public String toString() {
		return id + "\t" + name + "\t" + sex + "\t" + age + "\t" + department;
	}
	@Override
	public void readFields(DataInput in) throws IOException {
		this.id = in.readInt();
		this.name = in.readUTF();
		this.sex = in.readUTF();
		this.age = in.readInt();
		this.department = in.readUTF();
		
	}
	@Override
	public void write(DataOutput out) throws IOException {

		out.writeInt(id);
		out.writeUTF(name);
		out.writeUTF(sex);
		out.writeInt(age);
		out.writeUTF(department);
		
	}
	
	/**
	 * 排序规则。同时也是分组规则
	 */
	@Override
	public int compareTo(Student o) {
		
		return this.id - o.id;
	}
	
	@Override
	public void write(PreparedStatement statement) throws SQLException {
		statement.setInt(1, id);
		statement.setString(2, name);
		statement.setString(3, sex);
		statement.setInt(4, age);
		statement.setString(5, department);
//		statement.setInt(4, age);
//		statement.setString(5, department);
		
	}
	
	@Override
	public void readFields(ResultSet resultSet) throws SQLException {
		this.id = resultSet.getInt(1);
		this.name = resultSet.getString(2);
		this.sex = resultSet.getString(3);
		this.age = resultSet.getInt(4);
		this.department = resultSet.getString(5);
//		this.age = resultSet.getInt(4);
//		this.department = resultSet.getString(5);
	}
		
}


猜你喜欢

转载自blog.csdn.net/qq_28844767/article/details/80491788