**hadoop支持的数据类型有以下几种
BinaryWritable
IntWritable(对应于int)
BooleanWritable(对应于boolean)
DoubleWritable(对应于double)
FloatWritable(对应于float)
LongWritable(对应于long)
NullWritable
VLongWritable
VIntWritable
BytesWritable(字节数组)
ByteWritable(源码注意与上面的BytesWritable区分,这个是字节数据的封装)
Text(string)等
这些类所在的包都在org.apache.hadoop.io
1.WritableComparable.
继承了Writable和Comparable,是接下来讲的基本数据类型的父类
package org.apache.hadoop.io;
public interface WritableComparable<T> extends Writable, Comparable<T> {
}//继承了Writable和Comparable
2.IntWritable
int型数据的封装,实现了WritableComparable接口
package org.apache.hadoop.io;
import java.io.*;
/** A WritableComparable for ints. */
//实现WritableComparable接口
public class IntWritable implements WritableComparable {
private int value;//该对象所封装的int的值
public IntWritable() {}
//设置该int值
public IntWritable(int value) { set(value); }
/** Set the value of this IntWritable. */
//设置该int值
public void set(int value) { this.value = value; }
/** Return the value of this IntWritable. */
//得到int的值
public int get() { return value; }
public void readFields(DataInput in) throws IOException {
value = in.readInt();
}
//将int序列化写到输出流中
public void write(DataOutput out) throws IOException {
out.writeInt(value);
}
/** Returns true iff <code>o</code> is a IntWritable with the same value. */
//比较是否相等
public boolean equals(Object o) {
if (!(o instanceof IntWritable))
return false;
IntWritable other = (IntWritable)o;
return this.value == other.value;
}
public int hashCode() {
return value;
}
/** Compares two IntWritables. */
public int compareTo(Object o) {
int thisValue = this.value;
int thatValue = ((IntWritable)o).value;
return (thisValue<thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
}
//返回int的字符串表示
public String toString() {
return Integer.toString(value);
}
/** A Comparator optimized for IntWritable. */
//重载WritableComparator里的Comparator 方法,比较器
public static class Comparator extends WritableComparator {
public Comparator() {
super(IntWritable.class);
}
public int compare(byte[] b1, int s1, int l1,
byte[] b2, int s2, int l2) {
int thisValue = readInt(b1, s1);
int thatValue = readInt(b2, s2);
return (thisValue<thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
}
}
static { // register this comparator
WritableComparator.define(IntWritable.class, new Comparator());
}
}
其他数据类型的类与IntWritable类似,这里不再介绍。
3.Text.
Text相当于String,它继承自BinaryWritable并且实现了WritableComparable接口。下面来看实现Text的源码,注释在源码里面
package org.apache.hadoop.io;
import java.io.IOException;
import java.io.DataInput;
import java.io.DataOutput;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.MalformedInputException;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
public class Text extends BinaryComparable
implements WritableComparable<BinaryComparable> {
private static final Log LOG= LogFactory.getLog(Text.class);
//为Text.class创建一个log
private static ThreadLocal<CharsetEncoder> ENCODER_FACTORY =
new ThreadLocal<CharsetEncoder>() {
protected CharsetEncoder initialValue() {
return Charset.forName("UTF-8").newEncoder().
onMalformedInput(CodingErrorAction.REPORT).
onUnmappableCharacter(CodingErrorAction.REPORT);
}
}; //Text内部使用的线程安全的编码器
private static ThreadLocal<CharsetDecoder> DECODER_FACTORY =
new ThreadLocal<CharsetDecoder>() {
protected CharsetDecoder initialValue() {
return Charset.forName("UTF-8").newDecoder().
onMalformedInput(CodingErrorAction.REPORT).
onUnmappableCharacter(CodingErrorAction.REPORT);
}
};//Text内部使用的线程安全的解码器
private static final byte [] EMPTY_BYTES = new byte[0];
//定义一个空字节的数组
private byte[] bytes;//存储Text内部的字节
private int length;//Text存储字节的长度
public Text() {
bytes = EMPTY_BYTES;
}
/** Construct from a string.
*/
public Text(String string) {
set(string);
}
/** Construct from another text. */
public Text(Text utf8) {
set(utf8);
}
/** Construct from a byte array.
*/
public Text(byte[] utf8) {
set(utf8);
}
//以上为Text的构造函数,默认创建一个空的字节数组,可以通过String,Text,字节数组来初始化
/**
* Returns the raw bytes; however, only data up to {@link #getLength()} is
* valid.
*/
public byte[] getBytes() {//返回原来的字节
return bytes;
}
/** Returns the number of bytes in the byte array */
public int getLength() { //返回字节数组的字节数
return length;
}
public int charAt(int position) {
if (position > this.length) return -1; // too long
if (position < 0) return -1; // duh.
ByteBuffer bb = (ByteBuffer)ByteBuffer.wrap(bytes).position(position);
return bytesToCodePoint(bb.slice());
}//返回一个长度为32位的整型的Unicode代码点
public int find(String what) {
return find(what, 0);
}
public int find(String what, int start) {
try {
ByteBuffer src = ByteBuffer.wrap(this.bytes,0,this.length);
ByteBuffer tgt = encode(what);
byte b = tgt.get();
src.position(start);
while (src.hasRemaining()) {
if (b == src.get()) { // matching first byte
src.mark(); // save position in loop
tgt.mark(); // save position in target
boolean found = true;
int pos = src.position()-1;
while (tgt.hasRemaining()) {
if (!src.hasRemaining()) { // src expired first
tgt.reset();
src.reset();
found = false;
break;
}
if (!(tgt.get() == src.get())) {
tgt.reset();
src.reset();
found = false;
break; // no match
}
}
if (found) return pos;
}
}
return -1; // not found
} catch (CharacterCodingException e) {
// can't get here
e.printStackTrace();
return -1;
}
} //返回某个what在Text所封装的字节数组中从start开始第一次出现的位置
/** Set to contain the contents of a string.
*/
public void set(String string) {
try {
ByteBuffer bb = encode(string, true);
bytes = bb.array();
length = bb.limit();
}catch(CharacterCodingException e) {
throw new RuntimeException("Should not have happened " + e.toString());
}
}
/** Set to a utf8 byte array
*/
public void set(byte[] utf8) {
set(utf8, 0, utf8.length);
}
/** copy a text. */
public void set(Text other) {
set(other.getBytes(), 0, other.getLength());
}
//与上面的构造方法对应
/**
* Set the Text to range of bytes
* @param utf8 the data to copy from
* @param start the first position of the new string
* @param len the number of bytes of the new string
*/
public void set(byte[] utf8, int start, int len) {
setCapacity(len, false);
System.arraycopy(utf8, start, bytes, 0, len);
this.length = len;
}//从start出开始存放长为len的字节数组,即在字符串的末尾追加数据
/**
* Append a range of bytes to the end of the given text
* @param utf8 the data to copy from
* @param start the first position to append from utf8
* @param len the number of bytes to append
*/
public void append(byte[] utf8, int start, int len) {
setCapacity(length + len, true);
System.arraycopy(utf8, start, bytes, length, len);
length += len;
}//在start处追加长为len的数组
/**
* Clear the string to empty.
*/
public void clear() {
length = 0;
} //清除Text的内容,即将长度设为0;
private void setCapacity(int len, boolean keepData) {
if (bytes == null || bytes.length < len) {
byte[] newBytes = new byte[len];
if (bytes != null && keepData) {
System.arraycopy(bytes, 0, newBytes, 0, length);
}
bytes = newBytes;
}
}//将Text的容量设置为len,keepData为是否保留原来的数据,1为保留
/**
* Convert text back to string
* @see java.lang.Object#toString()
*/
public String toString() {
try {
return decode(bytes, 0, length);
} catch (CharacterCodingException e) {
throw new RuntimeException("Should not have happened " + e.toString());
}
}//将Text转化为字符串输出
/** deserialize
*/
public void readFields(DataInput in) throws IOException {
int newLength = WritableUtils.readVInt(in);
setCapacity(newLength, false);
in.readFully(bytes, 0, newLength);
length = newLength;
}//反序列化操作
/** Skips over one Text in the input. */
public static void skip(DataInput in) throws IOException {
int length = WritableUtils.readVInt(in);
WritableUtils.skipFully(in, length);
}//在输入流中跳过一个Text
/** serialize
* write this object to out
* length uses zero-compressed encoding
* @see Writable#write(DataOutput)
*/
public void write(DataOutput out) throws IOException {
WritableUtils.writeVInt(out, length);
out.write(bytes, 0, length);
}//序列化操作
/** Returns true iff <code>o</code> is a Text with the same contents. */
public boolean equals(Object o) {
if (o instanceof Text)
return super.equals(o);
return false;
} //是否与O相等?
public int hashCode() {
return super.hashCode();
}
/** A WritableComparator optimized for Text keys. */
public static class Comparator extends WritableComparator {
public Comparator() {
super(Text.class);
}
public int compare(byte[] b1, int s1, int l1,
byte[] b2, int s2, int l2) {
int n1 = WritableUtils.decodeVIntSize(b1[s1]);
int n2 = WritableUtils.decodeVIntSize(b2[s2]);
return compareBytes(b1, s1+n1, l1-n1, b2, s2+n2, l2-n2);
}
}
static {
// register this comparator将比较器加入注册表
WritableComparator.define(Text.class, new Comparator());
}
/// STATIC UTILITIES FROM HERE DOWN
/**
* Converts the provided byte array to a String using the
* UTF-8 encoding. If the input is malformed,
* replace by a default value.
* 将输入的字节数组转化为使用UTF-8编码的String,如果输入时没有严格按照格式时,使用默认的值替代
*/
public static String decode(byte[] utf8) throws CharacterCodingException {
return decode(ByteBuffer.wrap(utf8), true);
}
public static String decode(byte[] utf8, int start, int length)
throws CharacterCodingException {
return decode(ByteBuffer.wrap(utf8, start, length), true);
}
/**
* Converts the provided byte array to a String using the
* UTF-8 encoding. If <code>replace</code> is true, then
* malformed input is replaced with the
* substitution character, which is U+FFFD. Otherwise the
* method throws a MalformedInputException.
*/
public static String decode(byte[] utf8, int start, int length, boolean replace)
throws CharacterCodingException {
return decode(ByteBuffer.wrap(utf8, start, length), replace);
}
private static String decode(ByteBuffer utf8, boolean replace)
throws CharacterCodingException {
CharsetDecoder decoder = DECODER_FACTORY.get();
if (replace) {
decoder.onMalformedInput(
java.nio.charset.CodingErrorAction.REPLACE);
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
}
String str = decoder.decode(utf8).toString();
// set decoder back to its default value: REPORT
if (replace) {
decoder.onMalformedInput(CodingErrorAction.REPORT);
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
}
return str;
}//将字节数组转化为String
/**
* Converts the provided String to bytes using the
* UTF-8 encoding. If the input is malformed,
* invalid chars are replaced by a default value.
* @return ByteBuffer: bytes stores at ByteBuffer.array()
* and length is ByteBuffer.limit()
*/
public static ByteBuffer encode(String string)
throws CharacterCodingException {
return encode(string, true);
}
/**
* Converts the provided String to bytes using the
* UTF-8 encoding. If <code>replace</code> is true, then
* malformed input is replaced with the
* substitution character, which is U+FFFD. Otherwise the
* method throws a MalformedInputException.
* @return ByteBuffer: bytes stores at ByteBuffer.array()
* and length is ByteBuffer.limit()
*/
public static ByteBuffer encode(String string, boolean replace)
throws CharacterCodingException {
CharsetEncoder encoder = ENCODER_FACTORY.get();
if (replace) {
encoder.onMalformedInput(CodingErrorAction.REPLACE);
encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
}
ByteBuffer bytes =
encoder.encode(CharBuffer.wrap(string.toCharArray()));
if (replace) {
encoder.onMalformedInput(CodingErrorAction.REPORT);
encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
}
return bytes;
}//使用UTF-8的编码方式将String转化为字节数组
/** Read a UTF8 encoded string from in
*/
public static String readString(DataInput in) throws IOException {
int length = WritableUtils.readVInt(in);
byte [] bytes = new byte[length];
in.readFully(bytes, 0, length);
return decode(bytes);
}//将输入结果以String的形式读进
/** Write a UTF8 encoded string to out
*/
public static int writeString(DataOutput out, String s) throws IOException {
ByteBuffer bytes = encode(s);
int length = bytes.limit();
WritableUtils.writeVInt(out, length);
out.write(bytes.array(), 0, length);
return length;
}//将s转化为int输出
////// states for validateUTF8
private static final int LEAD_BYTE = 0;
private static final int TRAIL_BYTE_1 = 1;
private static final int TRAIL_BYTE = 2;
/**
* Check if a byte array contains valid utf-8
* @param utf8 byte array
* @throws MalformedInputException if the byte array contains invalid utf-8
*/
public static void validateUTF8(byte[] utf8) throws MalformedInputException {
validateUTF8(utf8, 0, utf8.length);
}
/**
* Check to see if a byte array is valid utf-8
* @param utf8 the array of bytes
* @param start the offset of the first byte in the array
* @param len the length of the byte sequence
* @throws MalformedInputException if the byte array contains invalid bytes
*/
public static void validateUTF8(byte[] utf8, int start, int len)
throws MalformedInputException {
int count = start;
int leadByte = 0;
int length = 0;
int state = LEAD_BYTE;
while (count < start+len) {
int aByte = ((int) utf8[count] & 0xFF);
switch (state) {
case LEAD_BYTE:
leadByte = aByte;
length = bytesFromUTF8[aByte];
switch (length) {
case 0: // check for ASCII
if (leadByte > 0x7F)
throw new MalformedInputException(count);
break;
case 1:
if (leadByte < 0xC2 || leadByte > 0xDF)
throw new MalformedInputException(count);
state = TRAIL_BYTE_1;
break;
case 2:
if (leadByte < 0xE0 || leadByte > 0xEF)
throw new MalformedInputException(count);
state = TRAIL_BYTE_1;
break;
case 3:
if (leadByte < 0xF0 || leadByte > 0xF4)
throw new MalformedInputException(count);
state = TRAIL_BYTE_1;
break;
default:
// too long! Longest valid UTF-8 is 4 bytes (lead + three)
// or if < 0 we got a trail byte in the lead byte position
throw new MalformedInputException(count);
} // switch (length)
break;
case TRAIL_BYTE_1:
if (leadByte == 0xF0 && aByte < 0x90)
throw new MalformedInputException(count);
if (leadByte == 0xF4 && aByte > 0x8F)
throw new MalformedInputException(count);
if (leadByte == 0xE0 && aByte < 0xA0)
throw new MalformedInputException(count);
if (leadByte == 0xED && aByte > 0x9F)
throw new MalformedInputException(count);
// falls through to regular trail-byte test!!
case TRAIL_BYTE:
if (aByte < 0x80 || aByte > 0xBF)
throw new MalformedInputException(count);
if (--length == 0) {
state = LEAD_BYTE;
} else {
state = TRAIL_BYTE;
}
break;
} // switch (state)
count++;
}
}// 检查字节数组是不是一个有效的UTF-8编码
/**
* Magic numbers for UTF-8. These are the number of bytes
* that <em>follow</em> a given lead byte. Trailing bytes
* have the value -1. The values 4 and 5 are presented in
* this table, even though valid UTF-8 cannot include the
* five and six byte sequences.
*/
static final int[] bytesFromUTF8 =
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0,
// trail bytes
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5 };
/**
* Returns the next code point at the current position in
* the buffer. The buffer's position will be incremented.
* Any mark set on this buffer will be changed by this method!
*/
/* 返回缓冲区当前位置处的下一个代码点。
* 缓冲区的位置将递增。
* 这个方法将改变在这个缓冲区上设置的任何标记!
* */
public static int bytesToCodePoint(ByteBuffer bytes) {
bytes.mark();
byte b = bytes.get();
bytes.reset();
int extraBytesToRead = bytesFromUTF8[(b & 0xFF)];
if (extraBytesToRead < 0) return -1; // trailing byte!
int ch = 0;
switch (extraBytesToRead) {
case 5: ch += (bytes.get() & 0xFF); ch <<= 6; /* remember, illegal UTF-8 */
case 4: ch += (bytes.get() & 0xFF); ch <<= 6; /* remember, illegal UTF-8 */
case 3: ch += (bytes.get() & 0xFF); ch <<= 6;
case 2: ch += (bytes.get() & 0xFF); ch <<= 6;
case 1: ch += (bytes.get() & 0xFF); ch <<= 6;
case 0: ch += (bytes.get() & 0xFF);
}
ch -= offsetsFromUTF8[extraBytesToRead];
return ch;
}
static final int offsetsFromUTF8[] =
{ 0x00000000, 0x00003080,
0x000E2080, 0x03C82080, 0xFA082080, 0x82082080 };
/**
* For the given string, returns the number of UTF-8 bytes
* required to encode the string.
* @param string text to encode
* @return number of UTF-8 bytes required to encode
*/
public static int utf8Length(String string) {
CharacterIterator iter = new StringCharacterIterator(string);
char ch = iter.first();
int size = 0;
while (ch != CharacterIterator.DONE) {
if ((ch >= 0xD800) && (ch < 0xDC00)) {
// surrogate pair?
char trail = iter.next();
if ((trail > 0xDBFF) && (trail < 0xE000)) {
// valid pair
size += 4;
} else {
// invalid pair
size += 3;
iter.previous(); // rewind one
}
} else if (ch < 0x80) {
size++;
} else if (ch < 0x800) {
size += 2;
} else {
// ch < 0x10000, that is, the largest char value
size += 3;
}
ch = iter.next();
}
return size;
}
}//返回给定的String以UTF-8编码后的数量
4.NullWritable
NullWritable是一个单例对象,不能被修改。其序列化的长度为0,没有从流中读入字节也没有写出字节,相当于NULL。
package org.apache.hadoop.io;
import java.io.*;
/** Singleton Writable with no data. */
public class NullWritable implements WritableComparable {
private static final NullWritable THIS = new NullWritable();
private NullWritable() {} // no public ctor
/** Returns the single instance of this class. */
public static NullWritable get() { return THIS; }
public String toString() {
return "(null)";
}
public int hashCode() { return 0; }
public int compareTo(Object other) {
if (!(other instanceof NullWritable)) {
throw new ClassCastException("can't compare " + other.getClass().getName()
+ " to NullWritable");
}
return 0;
}
public boolean equals(Object other) { return other instanceof NullWritable; }
public void readFields(DataInput in) throws IOException {} //实现反序列化,可以看出没有读入字节流
public void write(DataOutput out) throws IOException {}
//实现序列化,没有写出字节流
//比较器
public int compare(byte[] b1, int s1, int l1,
byte[] b2, int s2, int l2) {
assert 0 == l1;
assert 0 == l2;
return 0;
}
}
static { // register this comparator
WritableComparator.define(NullWritable.class, new Comparator());
}
}
5.ObjectWritable
ObjectWritable实现了Writable和Configure的接口,其功能是实现基本数据类型,String和枚举类型等类型以及这些类型的数组的封装。
package org.apache.hadoop.io;
import java.lang.reflect.Array;
import java.io.*;
import java.util.*;
import org.apache.hadoop.conf.*;
/** A polymorphic Writable that writes an instance with it's class name.
* Handles arrays, strings and primitive types without a Writable wrapper.
*/
public class ObjectWritable implements Writable, Configurable {
private Class declaredClass;//要封装的对象
private Object instance;//封装对象的值
private Configuration conf;//配置信息
public ObjectWritable() {}
public ObjectWritable(Object instance) {
set(instance);
}
public ObjectWritable(Class declaredClass, Object instance) {
this.declaredClass = declaredClass;
this.instance = instance;
}//构造函数
/** Return the instance, or null if none. */
public Object get() { return instance; }//得到封装对象的值
/** Return the class this is meant to be. */
public Class getDeclaredClass() { return declaredClass; }//得到封装的对象
/** Reset the instance. */
public void set(Object instance) {
this.declaredClass = instance.getClass();
this.instance = instance;
}//对应于上面的构造函数
public String toString() {
return "OW[class=" + declaredClass + ",value=" + instance + "]";
}//将封装的对象以及值转化为字符串输出
public void readFields(DataInput in) throws IOException {
readObject(in, this, this.conf);
}//反序列化
public void write(DataOutput out) throws IOException {
writeObject(out, instance, declaredClass, conf);
}//序列化
private static final Map<String, Class<?>> PRIMITIVE_NAMES = new HashMap<String, Class<?>>();
static {
PRIMITIVE_NAMES.put("boolean", Boolean.TYPE);
PRIMITIVE_NAMES.put("byte", Byte.TYPE);
PRIMITIVE_NAMES.put("char", Character.TYPE);
PRIMITIVE_NAMES.put("short", Short.TYPE);
PRIMITIVE_NAMES.put("int", Integer.TYPE);
PRIMITIVE_NAMES.put("long", Long.TYPE);
PRIMITIVE_NAMES.put("float", Float.TYPE);
PRIMITIVE_NAMES.put("double", Double.TYPE);
PRIMITIVE_NAMES.put("void", Void.TYPE);
}
private static class NullInstance extends Configured implements Writable {
private Class<?> declaredClass;
public NullInstance() { super(null); }
public NullInstance(Class declaredClass, Configuration conf) {
super(conf);
this.declaredClass = declaredClass;
}
public void readFields(DataInput in) throws IOException {
String className = UTF8.readString(in);
declaredClass = PRIMITIVE_NAMES.get(className);
if (declaredClass == null) {
try {
declaredClass = getConf().getClassByName(className);
} catch (ClassNotFoundException e) {
throw new RuntimeException(e.toString());
}
}
}
public void write(DataOutput out) throws IOException {
UTF8.writeString(out, declaredClass.getName());
}
}
/** Write a {@link Writable}, {@link String}, primitive type, or an array of
* the preceding. */
public static void writeObject(DataOutput out, Object instance,
Class declaredClass,
Configuration conf) throws IOException {
if (instance == null) { // null
instance = new NullInstance(declaredClass, conf);
declaredClass = Writable.class;
}
UTF8.writeString(out, declaredClass.getName()); // always write declared
if (declaredClass.isArray()) { // array
int length = Array.getLength(instance);
out.writeInt(length);
for (int i = 0; i < length; i++) {
writeObject(out, Array.get(instance, i),
declaredClass.getComponentType(), conf);
}
} else if (declaredClass == String.class) { // String
UTF8.writeString(out, (String)instance);
} else if (declaredClass.isPrimitive()) { // primitive type
if (declaredClass == Boolean.TYPE) { // boolean
out.writeBoolean(((Boolean)instance).booleanValue());
} else if (declaredClass == Character.TYPE) { // char
out.writeChar(((Character)instance).charValue());
} else if (declaredClass == Byte.TYPE) { // byte
out.writeByte(((Byte)instance).byteValue());
} else if (declaredClass == Short.TYPE) { // short
out.writeShort(((Short)instance).shortValue());
} else if (declaredClass == Integer.TYPE) { // int
out.writeInt(((Integer)instance).intValue());
} else if (declaredClass == Long.TYPE) { // long
out.writeLong(((Long)instance).longValue());
} else if (declaredClass == Float.TYPE) { // float
out.writeFloat(((Float)instance).floatValue());
} else if (declaredClass == Double.TYPE) { // double
out.writeDouble(((Double)instance).doubleValue());
} else if (declaredClass == Void.TYPE) { // void
} else {
throw new IllegalArgumentException("Not a primitive: "+declaredClass);
}
} else if (declaredClass.isEnum()) { // enum
UTF8.writeString(out, ((Enum)instance).name());
} else if (Writable.class.isAssignableFrom(declaredClass)) { // Writable
UTF8.writeString(out, instance.getClass().getName());
((Writable)instance).write(out);
} else {
throw new IOException("Can't write: "+instance+" as "+declaredClass);
}
}
/** Read a {@link Writable}, {@link String}, primitive type, or an array of
* the preceding. */
public static Object readObject(DataInput in, Configuration conf)
throws IOException {
return readObject(in, null, conf);
}
/** Read a {@link Writable}, {@link String}, primitive type, or an array of
* the preceding. */
@SuppressWarnings("unchecked")
public static Object readObject(DataInput in, ObjectWritable objectWritable, Configuration conf)
throws IOException {
String className = UTF8.readString(in);
Class<?> declaredClass = PRIMITIVE_NAMES.get(className);
if (declaredClass == null) {
try {
declaredClass = conf.getClassByName(className);
} catch (ClassNotFoundException e) {
throw new RuntimeException("readObject can't find class " + className, e);
}
}
Object instance;
if (declaredClass.isPrimitive()) { // primitive types
if (declaredClass == Boolean.TYPE) { // boolean
instance = Boolean.valueOf(in.readBoolean());
} else if (declaredClass == Character.TYPE) { // char
instance = Character.valueOf(in.readChar());
} else if (declaredClass == Byte.TYPE) { // byte
instance = Byte.valueOf(in.readByte());
} else if (declaredClass == Short.TYPE) { // short
instance = Short.valueOf(in.readShort());
} else if (declaredClass == Integer.TYPE) { // int
instance = Integer.valueOf(in.readInt());
} else if (declaredClass == Long.TYPE) { // long
instance = Long.valueOf(in.readLong());
} else if (declaredClass == Float.TYPE) { // float
instance = Float.valueOf(in.readFloat());
} else if (declaredClass == Double.TYPE) { // double
instance = Double.valueOf(in.readDouble());
} else if (declaredClass == Void.TYPE) { // void
instance = null;
} else {
throw new IllegalArgumentException("Not a primitive: "+declaredClass);
}
} else if (declaredClass.isArray()) { // array
int length = in.readInt();
instance = Array.newInstance(declaredClass.getComponentType(), length);
for (int i = 0; i < length; i++) {
Array.set(instance, i, readObject(in, conf));
}
} else if (declaredClass == String.class) { // String
instance = UTF8.readString(in);
} else if (declaredClass.isEnum()) { // enum
instance = Enum.valueOf((Class<? extends Enum>) declaredClass, UTF8.readString(in));
} else { // Writable
Class instanceClass = null;
String str = "";
try {
str = UTF8.readString(in);
instanceClass = conf.getClassByName(str);
} catch (ClassNotFoundException e) {
throw new RuntimeException("readObject can't find class " + str, e);
}
Writable writable = WritableFactories.newInstance(instanceClass, conf);
writable.readFields(in);
instance = writable;
if (instanceClass == NullInstance.class) { // null
declaredClass = ((NullInstance)instance).declaredClass;
instance = null;
}
}
if (objectWritable != null) { // store values
objectWritable.declaredClass = declaredClass;
objectWritable.instance = instance;
}
return instance;
}
public void setConf(Configuration conf) {
this.conf = conf;
}
public Configuration getConf() {
return this.conf;
}
}
6.BinaryComparable
2进制数据的封装,实现了Comparable接口
package org.apache.hadoop.io;
//实现Comparable<BinaryComparable>的接口
public abstract class BinaryComparable implements Comparable<BinaryComparable> {
/**
* Return n st bytes 0..n-1 from {#getBytes()} are valid.
*/
public abstract int getLength();
/**
* Return representative byte array for this instance.
*/
public abstract byte[] getBytes();
/**
* Compare bytes from {#getBytes()}.
* @see org.apache.hadoop.io.WritableComparator#compareBytes(byte[],int,int,byte[],int,int)
*/
//调用WritableComparator类中的compareBytes(byte[],int,int,byte[],int,int)方法
public int compareTo(BinaryComparable other) {
if (this == other)
return 0;
return WritableComparator.compareBytes(getBytes(), 0, getLength(),
other.getBytes(), 0, other.getLength());
}
/**
* Compare bytes from {#getBytes()} to those provided.
*/
public int compareTo(byte[] other, int off, int len) {
return WritableComparator.compareBytes(getBytes(), 0, getLength(),
other, off, len);
}
/**
* Return true if bytes from {#getBytes()} match.
*/
public boolean equals(Object other) {
if (!(other instanceof BinaryComparable))
return false;
BinaryComparable that = (BinaryComparable)other;
if (this.getLength() != that.getLength())
return false;
return this.compareTo(that) == 0;
}
/**
* Return a hash of the bytes returned from {#getBytes()}.
* @see org.apache.hadoop.io.WritableComparator#hashBytes(byte[],int)
*/
//调用WritableComparator类中的hashBytes(byte[],int)方法,
public int hashCode() {
return WritableComparator.hashBytes(getBytes(), getLength());
}
}
7.BytesWritable.
继承BinaryComparable接口实现了WritableComparable接口,是字节类型数组的封装 :byte[]
package org.apache.hadoop.io;
import java.io.IOException;
import java.io.DataInput;
import java.io.DataOutput;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
public class BytesWritable extends BinaryComparable
implements WritableComparable<BinaryComparable> {
private static final Log LOG = LogFactory.getLog(BytesWritable.class);
private static final int LENGTH_BYTES = 4;
private static final byte[] EMPTY_BYTES = {};
private int size;
private byte[] bytes;
/**
* Create a zero-size sequence.
*/
//创建0长度的序列
public BytesWritable() {this(EMPTY_BYTES);}
/**
* Create a BytesWritable using the byte array as the initial value.
* @param bytes This array becomes the backing storage for the object.
*/
//创建内容为bytes的序列
public BytesWritable(byte[] bytes) {
this.bytes = bytes;
this.size = bytes.length;
}
/**
* Get the data from the BytesWritable.
* @return The data is only valid between 0 and getLength() - 1.
*/
//得到序列的内容
public byte[] getBytes() {
return bytes;
}
/**
* Get the data from the BytesWritable.
* @deprecated Use {@link #getBytes()} instead.
*/
//得到序列的内容
@Deprecated
public byte[] get() {
return getBytes();
}
/**
* Get the current size of the buffer.
*/
//得到序列的长度
public int getLength() {
return size;
}
/**
* Get the current size of the buffer.
* @deprecated Use {@link #getLength()} instead.
*/
//得到序列的长度
@Deprecated
public int getSize() {
return getLength();
}
/**
* Change the size of the buffer. The values in the old range are preserved
* and any new values are undefined. The capacity is changed if it is
* necessary.
* @param size The new number of bytes
*/
//设置buffer的长度
public void setSize(int size) {
if (size > getCapacity()) {
setCapacity(size * 3 / 2);
}
this.size = size;
}
/**
* Get the capacity, which is the maximum size that could handled without
* resizing the backing storage.
* @return The number of bytes
*/
//得到buffer的容量
public int getCapacity() {
return bytes.length;
}
/**
* Change the capacity of the backing storage.
* The data is preserved.
* @param new_cap The new capacity in bytes.
*/
//设置buffer的容量
public void setCapacity(int new_cap) {
if (new_cap != getCapacity()) {
byte[] new_data = new byte[new_cap];
if (new_cap < size) {
size = new_cap;
}
if (size != 0) {
System.arraycopy(bytes, 0, new_data, 0, size);
}
bytes = new_data;
}
}
/**
* Set the BytesWritable to the contents of the given newData.
* @param newData the value to set this BytesWritable to.
*/
//设置所封装的对象
public void set(BytesWritable newData) {
set(newData.bytes, 0, newData.size);
}
/**
* Set the value to a copy of the given byte range
* @param newData the new values to copy in
* @param offset the offset in newData to start at
* @param length the number of bytes to copy
*/
//设置所封装的对象
public void set(byte[] newData, int offset, int length) {
setSize(0);
setSize(length);
System.arraycopy(newData, offset, bytes, 0, size);
}
// inherit javadoc
//反序列化
public void readFields(DataInput in) throws IOException {
setSize(0); // clear the old data
setSize(in.readInt());
in.readFully(bytes, 0, size);
}
// inherit javadoc
//序列化
public void write(DataOutput out) throws IOException {
out.writeInt(size);
out.write(bytes, 0, size);
}
public int hashCode() {//判断是否相等
return super.hashCode();
}
/**
* Are the two byte sequences equal?
*/
public boolean equals(Object right_obj) {
if (right_obj instanceof BytesWritable)
return super.equals(right_obj);
return false;
}
/**
* Generate the stream of bytes as hex pairs separated by ' '.
*/
public String toString() {
StringBuffer sb = new StringBuffer(3*size);
for (int idx = 0; idx < size; idx++) {
// if not the first, put a blank separator in
if (idx != 0) {
sb.append(' ');
}
String num = Integer.toHexString(0xff & bytes[idx]);
// if it is only one digit, add a leading 0.
if (num.length() < 2) {
sb.append('0');
}
sb.append(num);
}
return sb.toString();
}
/** A Comparator optimized for BytesWritable. */
public static class Comparator extends WritableComparator {
public Comparator() {
super(BytesWritable.class);
}
/**
* Compare the buffers in serialized form.
*/
public int compare(byte[] b1, int s1, int l1,
byte[] b2, int s2, int l2) {
return compareBytes(b1, s1+LENGTH_BYTES, l1-LENGTH_BYTES,
b2, s2+LENGTH_BYTES, l2-LENGTH_BYTES);
}
}
static { // register this comparator
WritableComparator.define(BytesWritable.class, new Comparator());
}
}