hadoop支持的数据类型

**hadoop支持的数据类型有以下几种
BinaryWritable
IntWritable（对应于int）
BooleanWritable（对应于boolean）
DoubleWritable（对应于double）
FloatWritable（对应于float）
LongWritable（对应于long）
NullWritable
VLongWritable
VIntWritable
BytesWritable(字节数组)
ByteWritable(源码注意与上面的BytesWritable区分,这个是字节数据的封装)
Text(string)等
这些类所在的包都在org.apache.hadoop.io

1.WritableComparable.

继承了Writable和Comparable，是接下来讲的基本数据类型的父类

package org.apache.hadoop.io;

public interface WritableComparable<T> extends Writable, Comparable<T> {
}//继承了Writable和Comparable

2.IntWritable

int型数据的封装，实现了WritableComparable接口

package org.apache.hadoop.io;

import java.io.*;

/** A WritableComparable for ints. */
//实现WritableComparable接口
public class IntWritable implements WritableComparable {
  private int value;//该对象所封装的int的值

  public IntWritable() {}
   //设置该int值
  public IntWritable(int value) { set(value); } 

  /** Set the value of this IntWritable. */
  //设置该int值
  public void set(int value) { this.value = value; }

  /** Return the value of this IntWritable. */
  //得到int的值
  public int get() { return value; }

  public void readFields(DataInput in) throws IOException {
    value = in.readInt();
  }
   //将int序列化写到输出流中
  public void write(DataOutput out) throws IOException {
    out.writeInt(value);
  }

  /** Returns true iff <code>o</code> is a IntWritable with the same value. */
  //比较是否相等
  public boolean equals(Object o) {
    if (!(o instanceof IntWritable))
      return false;
    IntWritable other = (IntWritable)o;
    return this.value == other.value;
  }

  public int hashCode() {
    return value;
  }

  /** Compares two IntWritables. */
  public int compareTo(Object o) {
    int thisValue = this.value;
    int thatValue = ((IntWritable)o).value;
    return (thisValue<thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
  }
  //返回int的字符串表示
  public String toString() {
    return Integer.toString(value);
  }

  /** A Comparator optimized for IntWritable. */ 
  //重载WritableComparator里的Comparator 方法，比较器
  public static class Comparator extends WritableComparator {
    public Comparator() {
      super(IntWritable.class);
    }

    public int compare(byte[] b1, int s1, int l1,
                       byte[] b2, int s2, int l2) {
      int thisValue = readInt(b1, s1);
      int thatValue = readInt(b2, s2);
      return (thisValue<thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
    }
  }

  static {          // register this comparator
    WritableComparator.define(IntWritable.class, new Comparator());
  }
}

其他数据类型的类与IntWritable类似，这里不再介绍。

3.Text.

Text相当于String,它继承自BinaryWritable并且实现了WritableComparable接口。下面来看实现Text的源码，注释在源码里面

package org.apache.hadoop.io;

import java.io.IOException;
import java.io.DataInput;
import java.io.DataOutput;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.MalformedInputException;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

public class Text extends BinaryComparable
    implements WritableComparable<BinaryComparable> {
    private static final Log LOG= LogFactory.getLog(Text.class);
    //为Text.class创建一个log
  private static ThreadLocal<CharsetEncoder> ENCODER_FACTORY =
    new ThreadLocal<CharsetEncoder>() {
      protected CharsetEncoder initialValue() {
        return Charset.forName("UTF-8").newEncoder().
               onMalformedInput(CodingErrorAction.REPORT).
               onUnmappableCharacter(CodingErrorAction.REPORT);
    }
  }; //Text内部使用的线程安全的编码器

  private static ThreadLocal<CharsetDecoder> DECODER_FACTORY =
    new ThreadLocal<CharsetDecoder>() {
    protected CharsetDecoder initialValue() {
      return Charset.forName("UTF-8").newDecoder().
             onMalformedInput(CodingErrorAction.REPORT).
             onUnmappableCharacter(CodingErrorAction.REPORT);
    }
  };//Text内部使用的线程安全的解码器

  private static final byte [] EMPTY_BYTES = new byte[0];
  //定义一个空字节的数组

  private byte[] bytes;//存储Text内部的字节
  private int length;//Text存储字节的长度

  public Text() {
    bytes = EMPTY_BYTES;
  }

  /** Construct from a string. 
   */
  public Text(String string) {
    set(string);
  }

  /** Construct from another text. */
  public Text(Text utf8) {
    set(utf8);
  }

  /** Construct from a byte array.
   */
  public Text(byte[] utf8)  {
    set(utf8);
  }
  //以上为Text的构造函数，默认创建一个空的字节数组，可以通过String，Text,字节数组来初始化

  /**
   * Returns the raw bytes; however, only data up to {@link #getLength()} is
   * valid.
   */
  public byte[] getBytes() {//返回原来的字节
    return bytes;
  }

  /** Returns the number of bytes in the byte array */ 
  public int getLength() { //返回字节数组的字节数
    return length;
  }

  public int charAt(int position) { 
    if (position > this.length) return -1; // too long
    if (position < 0) return -1; // duh.

    ByteBuffer bb = (ByteBuffer)ByteBuffer.wrap(bytes).position(position);
    return bytesToCodePoint(bb.slice());
  }//返回一个长度为32位的整型的Unicode代码点

  public int find(String what) {
    return find(what, 0);
  }


  public int find(String what, int start) {
    try {
      ByteBuffer src = ByteBuffer.wrap(this.bytes,0,this.length);
      ByteBuffer tgt = encode(what);
      byte b = tgt.get();
      src.position(start);

      while (src.hasRemaining()) {
        if (b == src.get()) { // matching first byte
          src.mark(); // save position in loop
          tgt.mark(); // save position in target
          boolean found = true;
          int pos = src.position()-1;
          while (tgt.hasRemaining()) {
            if (!src.hasRemaining()) { // src expired first
              tgt.reset();
              src.reset();
              found = false;
              break;
            }
            if (!(tgt.get() == src.get())) {
              tgt.reset();
              src.reset();
              found = false;
              break; // no match
            }
          }
          if (found) return pos;
        }
      }
      return -1; // not found
    } catch (CharacterCodingException e) {
      // can't get here
      e.printStackTrace();
      return -1;
    }
  }  //返回某个what在Text所封装的字节数组中从start开始第一次出现的位置
  /** Set to contain the contents of a string. 
   */
  public void set(String string) {
    try {
      ByteBuffer bb = encode(string, true);
      bytes = bb.array();
      length = bb.limit();
    }catch(CharacterCodingException e) {
      throw new RuntimeException("Should not have happened " + e.toString()); 
    }
  }

  /** Set to a utf8 byte array
   */
  public void set(byte[] utf8) {
    set(utf8, 0, utf8.length);
  }

  /** copy a text. */
  public void set(Text other) {
    set(other.getBytes(), 0, other.getLength());
  }
  //与上面的构造方法对应
  /**
   * Set the Text to range of bytes
   * @param utf8 the data to copy from
   * @param start the first position of the new string
   * @param len the number of bytes of the new string
   */
  public void set(byte[] utf8, int start, int len) {
    setCapacity(len, false);
    System.arraycopy(utf8, start, bytes, 0, len);
    this.length = len;
  }//从start出开始存放长为len的字节数组，即在字符串的末尾追加数据

  /**
   * Append a range of bytes to the end of the given text
   * @param utf8 the data to copy from
   * @param start the first position to append from utf8
   * @param len the number of bytes to append
   */
  public void append(byte[] utf8, int start, int len) {
    setCapacity(length + len, true);
    System.arraycopy(utf8, start, bytes, length, len);
    length += len;
  }//在start处追加长为len的数组

  /**
   * Clear the string to empty.
   */
  public void clear() {
    length = 0;
  } //清除Text的内容，即将长度设为0；

  private void setCapacity(int len, boolean keepData) {
    if (bytes == null || bytes.length < len) {
      byte[] newBytes = new byte[len];
      if (bytes != null && keepData) {
        System.arraycopy(bytes, 0, newBytes, 0, length);
      }
      bytes = newBytes;
    }
  }//将Text的容量设置为len，keepData为是否保留原来的数据，1为保留

  /** 
   * Convert text back to string
   * @see java.lang.Object#toString()
   */
  public String toString() {
    try {
      return decode(bytes, 0, length);
    } catch (CharacterCodingException e) { 
      throw new RuntimeException("Should not have happened " + e.toString()); 
    }
  }//将Text转化为字符串输出

  /** deserialize 
   */
  public void readFields(DataInput in) throws IOException {
    int newLength = WritableUtils.readVInt(in);
    setCapacity(newLength, false);
    in.readFully(bytes, 0, newLength);
    length = newLength;
  }//反序列化操作

  /** Skips over one Text in the input. */
  public static void skip(DataInput in) throws IOException {
    int length = WritableUtils.readVInt(in);
    WritableUtils.skipFully(in, length);
  }//在输入流中跳过一个Text

  /** serialize
   * write this object to out
   * length uses zero-compressed encoding
   * @see Writable#write(DataOutput)
   */
  public void write(DataOutput out) throws IOException {
    WritableUtils.writeVInt(out, length);
    out.write(bytes, 0, length);
  }//序列化操作

  /** Returns true iff <code>o</code> is a Text with the same contents.  */
  public boolean equals(Object o) {
    if (o instanceof Text)
      return super.equals(o);
    return false;
  } //是否与O相等？

  public int hashCode() {
    return super.hashCode();
  } 

  /** A WritableComparator optimized for Text keys. */
  public static class Comparator extends WritableComparator {
    public Comparator() {
      super(Text.class);
    }

    public int compare(byte[] b1, int s1, int l1,
                       byte[] b2, int s2, int l2) {
      int n1 = WritableUtils.decodeVIntSize(b1[s1]);
      int n2 = WritableUtils.decodeVIntSize(b2[s2]);
      return compareBytes(b1, s1+n1, l1-n1, b2, s2+n2, l2-n2);
    }
  }

  static {
    // register this comparator将比较器加入注册表
    WritableComparator.define(Text.class, new Comparator());
  }

  /// STATIC UTILITIES FROM HERE DOWN
  /**
   * Converts the provided byte array to a String using the
   * UTF-8 encoding. If the input is malformed,
   * replace by a default value.
   * 将输入的字节数组转化为使用UTF-8编码的String,如果输入时没有严格按照格式时，使用默认的值替代
   */
  public static String decode(byte[] utf8) throws CharacterCodingException {
    return decode(ByteBuffer.wrap(utf8), true);
  }

  public static String decode(byte[] utf8, int start, int length) 
    throws CharacterCodingException {
    return decode(ByteBuffer.wrap(utf8, start, length), true);
  }

  /**
   * Converts the provided byte array to a String using the
   * UTF-8 encoding. If <code>replace</code> is true, then
   * malformed input is replaced with the
   * substitution character, which is U+FFFD. Otherwise the
   * method throws a MalformedInputException.
   */
  public static String decode(byte[] utf8, int start, int length, boolean replace) 
    throws CharacterCodingException {
    return decode(ByteBuffer.wrap(utf8, start, length), replace);
  }

  private static String decode(ByteBuffer utf8, boolean replace) 
    throws CharacterCodingException {
    CharsetDecoder decoder = DECODER_FACTORY.get();
    if (replace) {
      decoder.onMalformedInput(
          java.nio.charset.CodingErrorAction.REPLACE);
      decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
    }
    String str = decoder.decode(utf8).toString();
    // set decoder back to its default value: REPORT
    if (replace) {
      decoder.onMalformedInput(CodingErrorAction.REPORT);
      decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
    }
    return str;
  }//将字节数组转化为String

  /**
   * Converts the provided String to bytes using the
   * UTF-8 encoding. If the input is malformed,
   * invalid chars are replaced by a default value.
   * @return ByteBuffer: bytes stores at ByteBuffer.array() 
   *                     and length is ByteBuffer.limit()
   */

  public static ByteBuffer encode(String string)
    throws CharacterCodingException {
    return encode(string, true);
  }

  /**
   * Converts the provided String to bytes using the
   * UTF-8 encoding. If <code>replace</code> is true, then
   * malformed input is replaced with the
   * substitution character, which is U+FFFD. Otherwise the
   * method throws a MalformedInputException.
   * @return ByteBuffer: bytes stores at ByteBuffer.array() 
   *                     and length is ByteBuffer.limit()
   */
  public static ByteBuffer encode(String string, boolean replace)
    throws CharacterCodingException {
    CharsetEncoder encoder = ENCODER_FACTORY.get();
    if (replace) {
      encoder.onMalformedInput(CodingErrorAction.REPLACE);
      encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
    }
    ByteBuffer bytes = 
      encoder.encode(CharBuffer.wrap(string.toCharArray()));
    if (replace) {
      encoder.onMalformedInput(CodingErrorAction.REPORT);
      encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
    }
    return bytes;
  }//使用UTF-8的编码方式将String转化为字节数组

  /** Read a UTF8 encoded string from in
   */
  public static String readString(DataInput in) throws IOException {
    int length = WritableUtils.readVInt(in);
    byte [] bytes = new byte[length];
    in.readFully(bytes, 0, length);
    return decode(bytes);
  }//将输入结果以String的形式读进

  /** Write a UTF8 encoded string to out
   */
  public static int writeString(DataOutput out, String s) throws IOException {
    ByteBuffer bytes = encode(s);
    int length = bytes.limit();
    WritableUtils.writeVInt(out, length);
    out.write(bytes.array(), 0, length);
    return length;
  }//将s转化为int输出

  ////// states for validateUTF8

  private static final int LEAD_BYTE = 0;

  private static final int TRAIL_BYTE_1 = 1;

  private static final int TRAIL_BYTE = 2;

  /** 
   * Check if a byte array contains valid utf-8
   * @param utf8 byte array
   * @throws MalformedInputException if the byte array contains invalid utf-8
   */
  public static void validateUTF8(byte[] utf8) throws MalformedInputException {
    validateUTF8(utf8, 0, utf8.length);     
  }

  /**
   * Check to see if a byte array is valid utf-8
   * @param utf8 the array of bytes
   * @param start the offset of the first byte in the array
   * @param len the length of the byte sequence
   * @throws MalformedInputException if the byte array contains invalid bytes
   */
  public static void validateUTF8(byte[] utf8, int start, int len)
    throws MalformedInputException {
    int count = start;
    int leadByte = 0;
    int length = 0;
    int state = LEAD_BYTE;
    while (count < start+len) {
      int aByte = ((int) utf8[count] & 0xFF);

      switch (state) {
      case LEAD_BYTE:
        leadByte = aByte;
        length = bytesFromUTF8[aByte];

        switch (length) {
        case 0: // check for ASCII
          if (leadByte > 0x7F)
            throw new MalformedInputException(count);
          break;
        case 1:
          if (leadByte < 0xC2 || leadByte > 0xDF)
            throw new MalformedInputException(count);
          state = TRAIL_BYTE_1;
          break;
        case 2:
          if (leadByte < 0xE0 || leadByte > 0xEF)
            throw new MalformedInputException(count);
          state = TRAIL_BYTE_1;
          break;
        case 3:
          if (leadByte < 0xF0 || leadByte > 0xF4)
            throw new MalformedInputException(count);
          state = TRAIL_BYTE_1;
          break;
        default:
          // too long! Longest valid UTF-8 is 4 bytes (lead + three)
          // or if < 0 we got a trail byte in the lead byte position
          throw new MalformedInputException(count);
        } // switch (length)
        break;

      case TRAIL_BYTE_1:
        if (leadByte == 0xF0 && aByte < 0x90)
          throw new MalformedInputException(count);
        if (leadByte == 0xF4 && aByte > 0x8F)
          throw new MalformedInputException(count);
        if (leadByte == 0xE0 && aByte < 0xA0)
          throw new MalformedInputException(count);
        if (leadByte == 0xED && aByte > 0x9F)
          throw new MalformedInputException(count);
        // falls through to regular trail-byte test!!
      case TRAIL_BYTE:
        if (aByte < 0x80 || aByte > 0xBF)
          throw new MalformedInputException(count);
        if (--length == 0) {
          state = LEAD_BYTE;
        } else {
          state = TRAIL_BYTE;
        }
        break;
      } // switch (state)
      count++;
    }
  }// 检查字节数组是不是一个有效的UTF-8编码

  /**
   * Magic numbers for UTF-8. These are the number of bytes
   * that <em>follow</em> a given lead byte. Trailing bytes
   * have the value -1. The values 4 and 5 are presented in
   * this table, even though valid UTF-8 cannot include the
   * five and six byte sequences.
   */
  static final int[] bytesFromUTF8 =
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0,
    // trail bytes
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
    3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5 };

  /**
   * Returns the next code point at the current position in
   * the buffer. The buffer's position will be incremented.
   * Any mark set on this buffer will be changed by this method!
   */
  /*  返回缓冲区当前位置处的下一个代码点。
   * 缓冲区的位置将递增。
   * 这个方法将改变在这个缓冲区上设置的任何标记！
   * */
  public static int bytesToCodePoint(ByteBuffer bytes) {
    bytes.mark();
    byte b = bytes.get();
    bytes.reset();
    int extraBytesToRead = bytesFromUTF8[(b & 0xFF)];
    if (extraBytesToRead < 0) return -1; // trailing byte!
    int ch = 0;

    switch (extraBytesToRead) {
    case 5: ch += (bytes.get() & 0xFF); ch <<= 6; /* remember, illegal UTF-8 */
    case 4: ch += (bytes.get() & 0xFF); ch <<= 6; /* remember, illegal UTF-8 */
    case 3: ch += (bytes.get() & 0xFF); ch <<= 6;
    case 2: ch += (bytes.get() & 0xFF); ch <<= 6;
    case 1: ch += (bytes.get() & 0xFF); ch <<= 6;
    case 0: ch += (bytes.get() & 0xFF);
    }
    ch -= offsetsFromUTF8[extraBytesToRead];

    return ch;
  }


  static final int offsetsFromUTF8[] =
  { 0x00000000, 0x00003080,
    0x000E2080, 0x03C82080, 0xFA082080, 0x82082080 };

  /**
   * For the given string, returns the number of UTF-8 bytes
   * required to encode the string.
   * @param string text to encode
   * @return number of UTF-8 bytes required to encode
   */
  public static int utf8Length(String string) {
    CharacterIterator iter = new StringCharacterIterator(string);
    char ch = iter.first();
    int size = 0;
    while (ch != CharacterIterator.DONE) {
      if ((ch >= 0xD800) && (ch < 0xDC00)) {
        // surrogate pair?
        char trail = iter.next();
        if ((trail > 0xDBFF) && (trail < 0xE000)) {
          // valid pair
          size += 4;
        } else {
          // invalid pair
          size += 3;
          iter.previous(); // rewind one
        }
      } else if (ch < 0x80) {
        size++;
      } else if (ch < 0x800) {
        size += 2;
      } else {
        // ch < 0x10000, that is, the largest char value
        size += 3;
      }
      ch = iter.next();
    }
    return size;
  }
}//返回给定的String以UTF-8编码后的数量

4.NullWritable
NullWritable是一个单例对象，不能被修改。其序列化的长度为0，没有从流中读入字节也没有写出字节，相当于NULL。


package org.apache.hadoop.io;

import java.io.*;

/** Singleton Writable with no data. */
public class NullWritable implements WritableComparable {

  private static final NullWritable THIS = new NullWritable();

  private NullWritable() {}                       // no public ctor

  /** Returns the single instance of this class. */
  public static NullWritable get() { return THIS; }

  public String toString() {
    return "(null)";
  }

  public int hashCode() { return 0; }
  public int compareTo(Object other) {
    if (!(other instanceof NullWritable)) {
      throw new ClassCastException("can't compare " + other.getClass().getName() 
                                   + " to NullWritable");
    }
    return 0;
  }
  public boolean equals(Object other) { return other instanceof NullWritable; }
  public void readFields(DataInput in) throws IOException {} //实现反序列化，可以看出没有读入字节流
  public void write(DataOutput out) throws IOException {}
  //实现序列化，没有写出字节流
//比较器
  public int compare(byte[] b1, int s1, int l1,
                       byte[] b2, int s2, int l2) {
      assert 0 == l1;
      assert 0 == l2;
      return 0;
    }
  }

  static {                                        // register this comparator
    WritableComparator.define(NullWritable.class, new Comparator());
  }
}

5.ObjectWritable

ObjectWritable实现了Writable和Configure的接口，其功能是实现基本数据类型，String和枚举类型等类型以及这些类型的数组的封装。


package org.apache.hadoop.io;

import java.lang.reflect.Array;

import java.io.*;
import java.util.*;

import org.apache.hadoop.conf.*;

/** A polymorphic Writable that writes an instance with it's class name.
 * Handles arrays, strings and primitive types without a Writable wrapper.
 */
public class ObjectWritable implements Writable, Configurable {

  private Class declaredClass;//要封装的对象
  private Object instance;//封装对象的值
  private Configuration conf;//配置信息

  public ObjectWritable() {}

  public ObjectWritable(Object instance) {
    set(instance);
  }

  public ObjectWritable(Class declaredClass, Object instance) {
    this.declaredClass = declaredClass;
    this.instance = instance;
  }//构造函数
  /** Return the instance, or null if none. */
  public Object get() { return instance; }//得到封装对象的值

  /** Return the class this is meant to be. */
  public Class getDeclaredClass() { return declaredClass; }//得到封装的对象

  /** Reset the instance. */
  public void set(Object instance) {
    this.declaredClass = instance.getClass();
    this.instance = instance;
  }//对应于上面的构造函数

  public String toString() {
    return "OW[class=" + declaredClass + ",value=" + instance + "]";
  }//将封装的对象以及值转化为字符串输出


  public void readFields(DataInput in) throws IOException {
    readObject(in, this, this.conf);
  }//反序列化

  public void write(DataOutput out) throws IOException {
    writeObject(out, instance, declaredClass, conf);
  }//序列化

  private static final Map<String, Class<?>> PRIMITIVE_NAMES = new HashMap<String, Class<?>>();
  static {
    PRIMITIVE_NAMES.put("boolean", Boolean.TYPE);
    PRIMITIVE_NAMES.put("byte", Byte.TYPE);
    PRIMITIVE_NAMES.put("char", Character.TYPE);
    PRIMITIVE_NAMES.put("short", Short.TYPE);
    PRIMITIVE_NAMES.put("int", Integer.TYPE);
    PRIMITIVE_NAMES.put("long", Long.TYPE);
    PRIMITIVE_NAMES.put("float", Float.TYPE);
    PRIMITIVE_NAMES.put("double", Double.TYPE);
    PRIMITIVE_NAMES.put("void", Void.TYPE);
  }

  private static class NullInstance extends Configured implements Writable {
    private Class<?> declaredClass;
    public NullInstance() { super(null); }
    public NullInstance(Class declaredClass, Configuration conf) {
      super(conf);
      this.declaredClass = declaredClass;
    }
    public void readFields(DataInput in) throws IOException {
      String className = UTF8.readString(in);
      declaredClass = PRIMITIVE_NAMES.get(className);
      if (declaredClass == null) {
        try {
          declaredClass = getConf().getClassByName(className);
        } catch (ClassNotFoundException e) {
          throw new RuntimeException(e.toString());
        }
      }
    }
    public void write(DataOutput out) throws IOException {
      UTF8.writeString(out, declaredClass.getName());
    }
  }

  /** Write a {@link Writable}, {@link String}, primitive type, or an array of
   * the preceding. */
  public static void writeObject(DataOutput out, Object instance,
                                 Class declaredClass, 
                                 Configuration conf) throws IOException {

    if (instance == null) {                       // null
      instance = new NullInstance(declaredClass, conf);
      declaredClass = Writable.class;
    }

    UTF8.writeString(out, declaredClass.getName()); // always write declared

    if (declaredClass.isArray()) {                // array
      int length = Array.getLength(instance);
      out.writeInt(length);
      for (int i = 0; i < length; i++) {
        writeObject(out, Array.get(instance, i),
                    declaredClass.getComponentType(), conf);
      }

    } else if (declaredClass == String.class) {   // String
      UTF8.writeString(out, (String)instance);

    } else if (declaredClass.isPrimitive()) {     // primitive type

      if (declaredClass == Boolean.TYPE) {        // boolean
        out.writeBoolean(((Boolean)instance).booleanValue());
      } else if (declaredClass == Character.TYPE) { // char
        out.writeChar(((Character)instance).charValue());
      } else if (declaredClass == Byte.TYPE) {    // byte
        out.writeByte(((Byte)instance).byteValue());
      } else if (declaredClass == Short.TYPE) {   // short
        out.writeShort(((Short)instance).shortValue());
      } else if (declaredClass == Integer.TYPE) { // int
        out.writeInt(((Integer)instance).intValue());
      } else if (declaredClass == Long.TYPE) {    // long
        out.writeLong(((Long)instance).longValue());
      } else if (declaredClass == Float.TYPE) {   // float
        out.writeFloat(((Float)instance).floatValue());
      } else if (declaredClass == Double.TYPE) {  // double
        out.writeDouble(((Double)instance).doubleValue());
      } else if (declaredClass == Void.TYPE) {    // void
      } else {
        throw new IllegalArgumentException("Not a primitive: "+declaredClass);
      }
    } else if (declaredClass.isEnum()) {         // enum
      UTF8.writeString(out, ((Enum)instance).name());
    } else if (Writable.class.isAssignableFrom(declaredClass)) { // Writable
      UTF8.writeString(out, instance.getClass().getName());
      ((Writable)instance).write(out);

    } else {
      throw new IOException("Can't write: "+instance+" as "+declaredClass);
    }
  }


  /** Read a {@link Writable}, {@link String}, primitive type, or an array of
   * the preceding. */
  public static Object readObject(DataInput in, Configuration conf)
    throws IOException {
    return readObject(in, null, conf);
  }

  /** Read a {@link Writable}, {@link String}, primitive type, or an array of
   * the preceding. */
  @SuppressWarnings("unchecked")
  public static Object readObject(DataInput in, ObjectWritable objectWritable, Configuration conf)
    throws IOException {
    String className = UTF8.readString(in);
    Class<?> declaredClass = PRIMITIVE_NAMES.get(className);
    if (declaredClass == null) {
      try {
        declaredClass = conf.getClassByName(className);
      } catch (ClassNotFoundException e) {
        throw new RuntimeException("readObject can't find class " + className, e);
      }
    }    

    Object instance;

    if (declaredClass.isPrimitive()) {            // primitive types

      if (declaredClass == Boolean.TYPE) {             // boolean
        instance = Boolean.valueOf(in.readBoolean());
      } else if (declaredClass == Character.TYPE) {    // char
        instance = Character.valueOf(in.readChar());
      } else if (declaredClass == Byte.TYPE) {         // byte
        instance = Byte.valueOf(in.readByte());
      } else if (declaredClass == Short.TYPE) {        // short
        instance = Short.valueOf(in.readShort());
      } else if (declaredClass == Integer.TYPE) {      // int
        instance = Integer.valueOf(in.readInt());
      } else if (declaredClass == Long.TYPE) {         // long
        instance = Long.valueOf(in.readLong());
      } else if (declaredClass == Float.TYPE) {        // float
        instance = Float.valueOf(in.readFloat());
      } else if (declaredClass == Double.TYPE) {       // double
        instance = Double.valueOf(in.readDouble());
      } else if (declaredClass == Void.TYPE) {         // void
        instance = null;
      } else {
        throw new IllegalArgumentException("Not a primitive: "+declaredClass);
      }

    } else if (declaredClass.isArray()) {              // array
      int length = in.readInt();
      instance = Array.newInstance(declaredClass.getComponentType(), length);
      for (int i = 0; i < length; i++) {
        Array.set(instance, i, readObject(in, conf));
      }

    } else if (declaredClass == String.class) {        // String
      instance = UTF8.readString(in);
    } else if (declaredClass.isEnum()) {         // enum
      instance = Enum.valueOf((Class<? extends Enum>) declaredClass, UTF8.readString(in));
    } else {                                      // Writable
      Class instanceClass = null;
      String str = "";
      try {
        str = UTF8.readString(in);
        instanceClass = conf.getClassByName(str);
      } catch (ClassNotFoundException e) {
        throw new RuntimeException("readObject can't find class " + str, e);
      }

      Writable writable = WritableFactories.newInstance(instanceClass, conf);
      writable.readFields(in);
      instance = writable;

      if (instanceClass == NullInstance.class) {  // null
        declaredClass = ((NullInstance)instance).declaredClass;
        instance = null;
      }
    }

    if (objectWritable != null) {                 // store values
      objectWritable.declaredClass = declaredClass;
      objectWritable.instance = instance;
    }

    return instance;

  }

  public void setConf(Configuration conf) {
    this.conf = conf;
  }

  public Configuration getConf() {
    return this.conf;
  }

}

6.BinaryComparable

2进制数据的封装，实现了Comparable接口

package org.apache.hadoop.io;

 //实现Comparable<BinaryComparable>的接口
public abstract class BinaryComparable implements Comparable<BinaryComparable> {

  /**
   * Return n st bytes 0..n-1 from {#getBytes()} are valid.
   */
  public abstract int getLength();

  /**
   * Return representative byte array for this instance.
   */
  public abstract byte[] getBytes();

  /**
   * Compare bytes from {#getBytes()}.
   * @see org.apache.hadoop.io.WritableComparator#compareBytes(byte[],int,int,byte[],int,int)
   */
   //调用WritableComparator类中的compareBytes(byte[],int,int,byte[],int,int)方法
  public int compareTo(BinaryComparable other) {
    if (this == other)
      return 0;
    return WritableComparator.compareBytes(getBytes(), 0, getLength(),
             other.getBytes(), 0, other.getLength());
  }

  /**
   * Compare bytes from {#getBytes()} to those provided.
   */
  public int compareTo(byte[] other, int off, int len) {
    return WritableComparator.compareBytes(getBytes(), 0, getLength(),
             other, off, len);
  }

  /**
   * Return true if bytes from {#getBytes()} match.
   */
  public boolean equals(Object other) {
    if (!(other instanceof BinaryComparable))
      return false;
    BinaryComparable that = (BinaryComparable)other;
    if (this.getLength() != that.getLength())
      return false;
    return this.compareTo(that) == 0;
  }

  /**
   * Return a hash of the bytes returned from {#getBytes()}.
   * @see org.apache.hadoop.io.WritableComparator#hashBytes(byte[],int)
   */
   //调用WritableComparator类中的hashBytes(byte[],int)方法，
  public int hashCode() {
    return WritableComparator.hashBytes(getBytes(), getLength());
  }

}

7.BytesWritable.

继承BinaryComparable接口实现了WritableComparable接口，是字节类型数组的封装 :byte[]

package org.apache.hadoop.io;

import java.io.IOException;
import java.io.DataInput;
import java.io.DataOutput;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

public class BytesWritable extends BinaryComparable
    implements WritableComparable<BinaryComparable> {
  private static final Log LOG = LogFactory.getLog(BytesWritable.class);
  private static final int LENGTH_BYTES = 4;
  private static final byte[] EMPTY_BYTES = {};

  private int size;
  private byte[] bytes;

  /**
   * Create a zero-size sequence.
   */
   //创建0长度的序列
  public BytesWritable() {this(EMPTY_BYTES);}

  /**
   * Create a BytesWritable using the byte array as the initial value.
   * @param bytes This array becomes the backing storage for the object.
   */
   //创建内容为bytes的序列
  public BytesWritable(byte[] bytes) {
    this.bytes = bytes;
    this.size = bytes.length;
  }

  /**
   * Get the data from the BytesWritable.
   * @return The data is only valid between 0 and getLength() - 1.
   */
   //得到序列的内容
  public byte[] getBytes() {
    return bytes;
  }

  /**
   * Get the data from the BytesWritable.
   * @deprecated Use {@link #getBytes()} instead.
   */
   //得到序列的内容
  @Deprecated
  public byte[] get() {
    return getBytes();
  }

  /**
   * Get the current size of the buffer.
   */
   //得到序列的长度
  public int getLength() {
    return size;
  }

  /**
   * Get the current size of the buffer.
   * @deprecated Use {@link #getLength()} instead.
   */
     //得到序列的长度
  @Deprecated
  public int getSize() {
    return getLength();
  }

  /**
   * Change the size of the buffer. The values in the old range are preserved
   * and any new values are undefined. The capacity is changed if it is 
   * necessary.
   * @param size The new number of bytes
   */
   //设置buffer的长度
  public void setSize(int size) {
    if (size > getCapacity()) {
      setCapacity(size * 3 / 2);
    }
    this.size = size;
  }

  /**
   * Get the capacity, which is the maximum size that could handled without
   * resizing the backing storage.
   * @return The number of bytes
   */
   //得到buffer的容量
  public int getCapacity() {
    return bytes.length;
  }

  /**
   * Change the capacity of the backing storage.
   * The data is preserved.
   * @param new_cap The new capacity in bytes.
   */
   //设置buffer的容量
  public void setCapacity(int new_cap) {
    if (new_cap != getCapacity()) {
      byte[] new_data = new byte[new_cap];
      if (new_cap < size) {
        size = new_cap;
      }
      if (size != 0) {
        System.arraycopy(bytes, 0, new_data, 0, size);
      }
      bytes = new_data;
    }
  }

  /**
   * Set the BytesWritable to the contents of the given newData.
   * @param newData the value to set this BytesWritable to.
   */
   //设置所封装的对象
  public void set(BytesWritable newData) {
    set(newData.bytes, 0, newData.size);
  }

  /**
   * Set the value to a copy of the given byte range
   * @param newData the new values to copy in
   * @param offset the offset in newData to start at
   * @param length the number of bytes to copy
   */
   //设置所封装的对象
  public void set(byte[] newData, int offset, int length) {
    setSize(0);
    setSize(length);
    System.arraycopy(newData, offset, bytes, 0, size);
  }

  // inherit javadoc
  //反序列化
  public void readFields(DataInput in) throws IOException {
    setSize(0); // clear the old data
    setSize(in.readInt());
    in.readFully(bytes, 0, size);
  }

  // inherit javadoc
  //序列化
  public void write(DataOutput out) throws IOException {
    out.writeInt(size);
    out.write(bytes, 0, size);
  }

  public int hashCode() {//判断是否相等
    return super.hashCode();
  }

  /**
   * Are the two byte sequences equal?
   */
  public boolean equals(Object right_obj) {
    if (right_obj instanceof BytesWritable)
      return super.equals(right_obj);
    return false;
  }

  /**
   * Generate the stream of bytes as hex pairs separated by ' '.
   */
  public String toString() { 
    StringBuffer sb = new StringBuffer(3*size);
    for (int idx = 0; idx < size; idx++) {
      // if not the first, put a blank separator in
      if (idx != 0) {
        sb.append(' ');
      }
      String num = Integer.toHexString(0xff & bytes[idx]);
      // if it is only one digit, add a leading 0.
      if (num.length() < 2) {
        sb.append('0');
      }
      sb.append(num);
    }
    return sb.toString();
  }

  /** A Comparator optimized for BytesWritable. */ 
  public static class Comparator extends WritableComparator {
    public Comparator() {
      super(BytesWritable.class);
    }

    /**
     * Compare the buffers in serialized form.
     */
    public int compare(byte[] b1, int s1, int l1,
                       byte[] b2, int s2, int l2) {
      return compareBytes(b1, s1+LENGTH_BYTES, l1-LENGTH_BYTES, 
                          b2, s2+LENGTH_BYTES, l2-LENGTH_BYTES);
    }
  }

  static {            // register this comparator
    WritableComparator.define(BytesWritable.class, new Comparator());
  }

}

hadoop支持的数据类型

猜你喜欢