JavaIO-DataOutputStream源码分析

	
	//把字符串以UTF-8方式写入到输出流
	static int writeUTF(String str, DataOutput out) throws IOException {
		//字符串长度
        int strlen = str.length();
		//写成UTF-8后字节的长度
        int utflen = 0;
		//c=字符的unicode编码(码点)  count=写入到缓冲区的字节位置
        int c, count = 0;

        //循环字符串，拿出每个字符进行处理
        for (int i = 0; i < strlen; i++) {
			//拿出每个字符的unicode编码
            c = str.charAt(i);
			//0000-007F | 0xxxxxxx  在这个区间的码点用1个字节表示
            if ((c >= 0x0001) && (c <= 0x007F)) {
                utflen++;
				
			//0800-FFFF | 1110xxxx 10xxxxxx 10xxxxxx 在这个区间的用3个字节表示
            } else if (c > 0x07FF) {
                utflen += 3;
				
			//0000-07FF | 110xxxxx 10xxxxxx 在这个区间的用2个字节表示
            } else {
                utflen += 2;
            }
        }

		//转换为utf-8后的字节数不能大于65535,因为这个类用2个字节表示这个utf-8字节的长度。
        if (utflen > 65535)
            throw new UTFDataFormatException(
                "encoded string too long: " + utflen + " bytes");

		//缓冲区
        byte[] bytearr = null;
        if (out instanceof DataOutputStream) {
			//拿到输出流当中的缓冲区
            DataOutputStream dos = (DataOutputStream)out;
			//如果为空或者长度不够则进行扩容
            if(dos.bytearr == null || (dos.bytearr.length < (utflen+2)))
                dos.bytearr = new byte[(utflen*2) + 2];
            bytearr = dos.bytearr;
        } else {
            bytearr = new byte[utflen+2];
        }
		
		//因为长度不会超过65535所以高16位全部为0
		//假设长度为‭3‬ = 00000000 00000000 00000000 00000011 
		//">>>"是无符号右移，无论是０是１，都是补０；
		//右移8位变成 = 00000000 00000000 00000000 00000000& oxff = 00000000
        bytearr[count++] = (byte) ((utflen >>> 8) & 0xFF);
		//右移8位变成 = 00000000 00000000 00000000 11111111& oxff = 00000011
        bytearr[count++] = (byte) ((utflen >>> 0) & 0xFF);

        int i=0;
        for (i=0; i<strlen; i++) {
           c = str.charAt(i);
		   //如果unicode码点在（0000-007F | 0xxxxxxx）这个区间
		   //则输出为一个字节，如果出现区间以外的码点，则循环结束。
           if (!((c >= 0x0001) && (c <= 0x007F))) break;
           bytearr[count++] = (byte) c;
        }

		//接着上面的循环
        for (;i < strlen; i++){
            c = str.charAt(i);
			//如果unicode码点在（0000-007F | 0xxxxxxx）这个区间,则输出一个字节
            if ((c >= 0x0001) && (c <= 0x007F)) {
                bytearr[count++] = (byte) c;

			//如果码点大于0x07FF则需要输出三个字节（1110xxxx 10xxxxxx 10xxxxxx）
			//这里以汉字“严”来举例子，严在unicode中的码点是4E25(01001110 00100101)
			//c=01001110 00100101
            } else if (c > 0x07FF) {
			    //(c >> 12)右移12位得到=(00000000 00000100) &(与) 0x0F(00001111) = 00000100 |(或) 0xE0(11100000) = 11100100 
                bytearr[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
				//(c >> 6) 右移6位得到(00000001 00111000) &(与) 0x3F(00111111) = 00111000 |(或) 0x80(10000000) = 10111000
                bytearr[count++] = (byte) (0x80 | ((c >>  6) & 0x3F));
				//(c >> 0) 右移0位得到(01001110 00100101) &(与) 0x3F(00111111) = 00100101 |(或) 0x80(10000000) = 10100101
                bytearr[count++] = (byte) (0x80 | ((c >>  0) & 0x3F));
				
			//最后一种情况就是码点在0800-FFFF范围内，需要输出二个字节（110xxxxx 10xxxxxx）
            } else {
                bytearr[count++] = (byte) (0xC0 | ((c >>  6) & 0x1F));
                bytearr[count++] = (byte) (0x80 | ((c >>  0) & 0x3F));
            }
        }
		
		//假设传入字符串严 则写入的二进制为00000000 00000011 11100100 10111000 10100101，其中前俩个字节为这次writeutf的长度（3），后面为严的utf-8编码
		//讲缓冲区写入到底层输出流
        out.write(bytearr, 0, utflen+2);
        return utflen + 2;
    }


package java.io;

public class DataOutputStream extends FilterOutputStream implements DataOutput {

    protected int written;

    private byte[] bytearr = null;

    public DataOutputStream(OutputStream out) {
        super(out);
    }

    private void incCount(int value) {
        int temp = written + value;
        if (temp < 0) {
            temp = Integer.MAX_VALUE;
        }
        written = temp;
    }

    public synchronized void write(int b) throws IOException {
        out.write(b);
        incCount(1);
    }

    public synchronized void write(byte b[], int off, int len)
        throws IOException
    {
        out.write(b, off, len);
        incCount(len);
    }

    
    public void flush() throws IOException {
        out.flush();
    }

    
	//输出布尔类型到底层流
    public final void writeBoolean(boolean v) throws IOException {
        out.write(v ? 1 : 0);
        incCount(1);
    }

   
    //输出byte到底层流，int值大于255会丢失经丢
    public final void writeByte(int v) throws IOException {
        out.write(v);
        incCount(1);
    }

   
    //把int类型的0-16位输出到底层流，先输出高8位，在输出低8位。
    public final void writeShort(int v) throws IOException {
        out.write((v >>> 8) & 0xFF);
        out.write((v >>> 0) & 0xFF);
        incCount(2);
    }

    //‭输出一个字符，字符的unicode码点不能超过FFFF(65535)否则会乱码，因为它用2个字节存储这个unicode码点
	//以汉字严位例子，严的码点=4E25(01001110,00100101)
    public final void writeChar(int v) throws IOException {
		//无符号右移8位= (00000000 01001110) & 0xFF(11111111) 得到 01001110
        out.write((v >>> 8) & 0xFF);
		//无符号右移0位= (01001110,00100101) & 0xFF(11111111) 得到 00100101
        out.write((v >>> 0) & 0xFF);
		//计数器+2
        incCount(2);
		//最后输出的字节为 01001110 00100101
    }

    
	//输出int到底层流，占用4个字节，先输出高位，在输出低位
    public final void writeInt(int v) throws IOException {
		//无符号右移24位，& 0xFF(11111111)，得到32-23位。输出
        out.write((v >>> 24) & 0xFF);
		//无符号右移16位，& 0xFF(11111111)，得到24-17位。输出
        out.write((v >>> 16) & 0xFF);
		//无符号右移8位，& 0xFF(11111111)，得到16-9位。输出
        out.write((v >>>  8) & 0xFF);
		//无符号右移8位，& 0xFF(11111111)，得到8-1位。输出
        out.write((v >>>  0) & 0xFF);
        incCount(4);
    }

    private byte writeBuffer[] = new byte[8];

    //输出long到底层流，占用8个字节，先输出高位，在输出低位
    //原理跟writeInt一致
    public final void writeLong(long v) throws IOException {
        writeBuffer[0] = (byte)(v >>> 56);
        writeBuffer[1] = (byte)(v >>> 48);
        writeBuffer[2] = (byte)(v >>> 40);
        writeBuffer[3] = (byte)(v >>> 32);
        writeBuffer[4] = (byte)(v >>> 24);
        writeBuffer[5] = (byte)(v >>> 16);
        writeBuffer[6] = (byte)(v >>>  8);
        writeBuffer[7] = (byte)(v >>>  0);
        out.write(writeBuffer, 0, 8);
        incCount(8);
    }

    //输出float到底层流，把float浮点数的二进制形式转换为int输出
    public final void writeFloat(float v) throws IOException {
        writeInt(Float.floatToIntBits(v));
    }
    
	//输出double到底层流，把double的二进制形式转换为long输出
    public final void writeDouble(double v) throws IOException {
        writeLong(Double.doubleToLongBits(v));
    }

    //把字符串的每个字符转为uncode码点，然后丢掉高位输出低8位
    public final void writeBytes(String s) throws IOException {
        int len = s.length();
        for (int i = 0 ; i < len ; i++) {
            out.write((byte)s.charAt(i));
        }
        incCount(len);
    }

	//把字符串内的每个字符单独写入到输出流
    public final void writeChars(String s) throws IOException {
        int len = s.length();
        for (int i = 0 ; i < len ; i++) {
		//取出字符对应unicode码点
            int v = s.charAt(i);
			//与writeChar原理一样，只能写入65535以内的码点，否则丢失精度，出现乱码
            out.write((v >>> 8) & 0xFF);
            out.write((v >>> 0) & 0xFF);
        }
        incCount(len * 2);
    }

    public final void writeUTF(String str) throws IOException {
        writeUTF(str, this);
    }
    
    public final int size() {
        return written;
    }
}

JavaIO-DataOutputStream源码分析

猜你喜欢