fastjson源码解析(1)-工具类IOUtils

1.概念

通过大量的性能对比实例，可以知道fastjson是目前json解析最快的框架。所以我打算打入其内部了解一下，它是怎么丧心病狂地将性能提升到如此程度，在解析fastjson核心开始我准备，先解析它的边边角角，以便增加一些知识储备，更容易了解其内部的实现原理。本章就是讲解一下在fastjson中使用比较多的工具类IOUtils。

2.源码解析

  static {
        try {
            // 加载属性文件
            loadPropertiesFromFile();
        } catch (Throwable e) {
            //skip
        }
    }

    /**
     * 加载属性文件
     */
    public static void loadPropertiesFromFile(){
        // doPrivileged,不用做权限检查,可以防止没有权限操作某个文件
        InputStream imputStream = AccessController.doPrivileged(new PrivilegedAction<InputStream>() {
            public InputStream run() {
                ClassLoader cl = Thread.currentThread().getContextClassLoader();
                // 读取属性文件
                if (cl != null) {
                    return cl.getResourceAsStream(FASTJSON_PROPERTIES);
                } else {
                    return ClassLoader.getSystemResourceAsStream(FASTJSON_PROPERTIES);
                }
            }
        });

        if (null != imputStream) {
            try {
                // 加载到属性类中
                DEFAULT_PROPERTIES.load(imputStream);
                imputStream.close();
            } catch (java.io.IOException e) {
                // skip
            }
        }
    }	
	
	
    /**
     * 查找属性值（先找系统属性，再找自定义属性）
     * @param name
     * @return
     */
    public static String getStringProperty(String name) {
        String prop = null;
        try {
            prop = System.getProperty(name);
        } catch (SecurityException e) {
            //skip
        }
        return (prop == null) ? DEFAULT_PROPERTIES.getProperty(name) : prop;
    }

	/**
     * long类型数据大小
     * 參考原生jdk ： Long#stringSize(long)
     * @param x
     * @return
     */
    public static int stringSize(long x) {
        long p = 10;
		// long最大长度为19
        for (int i = 1; i < 19; i++) {
            if (x < p) return i;
            p = 10 * p;
        }
        return 19;
    }
	
	
	// 模拟十位
    final static char[] DigitTens = { '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '1', '1', '1', '1', '1', '1',
            '1', '1', '1', '1', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '3', '3', '3', '3', '3', '3', '3',
            '3', '3', '3', '4', '4', '4', '4', '4', '4', '4', '4', '4', '4', '5', '5', '5', '5', '5', '5', '5', '5',
            '5', '5', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '7', '7', '7', '7', '7', '7', '7', '7', '7',
            '7', '8', '8', '8', '8', '8', '8', '8', '8', '8', '8', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', };

    // 模拟个位
    final static char[] DigitOnes = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5',
            '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6',
            '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7',
            '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8',
            '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', };
	
    /**
     * 參考原生jdk ： Long#getChars(long, int, char[])
     * 设计思路：
     *     将long类型数据分为三段：8-4字节（int - long），4-2字节(byte - int)，2-0字节 (byte)
     * @param i 原始数据
     * @param index 一般是long类型数据所有字符个数
     * @param buf 一般长度为index
     */
    public static void getChars(long i, int index, char[] buf) {
        long q;
        int r;
        int charPos = index;
        char sign = 0;

        if (i < 0) {
            sign = '-';
            i = -i;
        }

        // 每次获取获取两个字符，并添加到数组中
        while (i > Integer.MAX_VALUE) {
            q = i / 100;
            //相当于： r = i - (q * 100); ， 结果为i / 100 的余数
            r = (int) (i - ((q << 6) + (q << 5) + (q << 2))); 
            i = q;
			// 获取个位数
            buf[--charPos] = DigitOnes[r];
			// 获取十位数
            buf[--charPos] = DigitTens[r];
        }

        // 和上面处理方式一样，只是减少存储空间
        int q2;
        int i2 = (int) i;
        while (i2 >= 65536) {
            q2 = i2 / 100;
            // really: r = i2 - (q * 100);
            r = i2 - ((q2 << 6) + (q2 << 5) + (q2 << 2));
            i2 = q2;
            buf[--charPos] = DigitOnes[r];
            buf[--charPos] = DigitTens[r];
        }

        // 小数快速计算字符
		// 52429/524288 = 0.10000038146972656, 524288 = 1 << 19，换句话说q2 = (i2 * 52429) >>> (16+3);就是q2 = i2/10为了避免效率低下的除法 (引用：https://my.oschina.net/muyexia/blog/35382)
        for (;;) {
            q2 = (i2 * 52429) >>> (16 + 3);
            r = i2 - ((q2 << 3) + (q2 << 1)); // r = i2-(q2*10) ...
            buf[--charPos] = digits[r];
            i2 = q2;
            if (i2 == 0) break;
        }
		// 符号
        if (sign != 0) {
            buf[--charPos] = sign;
        }
    }
	
 // 标识符第一个字符数组（空间换时间）
    public final static boolean[] firstIdentifierFlags       = new boolean[256];
    static {
        for (char c = 0; c < firstIdentifierFlags.length; ++c) {
            if (c >= 'A' && c <= 'Z') {
                firstIdentifierFlags[c] = true;
            } else if (c >= 'a' && c <= 'z') {
                firstIdentifierFlags[c] = true;
            } else if (c == '_' || c == '$') {
                firstIdentifierFlags[c] = true;
            }
        }
    }

    // 标识符数组
    public final static boolean[] identifierFlags            = new boolean[256];

    static {
        for (char c = 0; c < identifierFlags.length; ++c) {
            if (c >= 'A' && c <= 'Z') {
                identifierFlags[c] = true;
            } else if (c >= 'a' && c <= 'z') {
                identifierFlags[c] = true;
            } else if (c == '_') {
                identifierFlags[c] = true;
            } else if (c >= '0' && c <= '9') {
                identifierFlags[c] = true;
            }
        }
    }
	
   /**
     * 是否是第一个标识符判断
     */
    public static boolean firstIdentifier(char ch) {
        return ch < IOUtils.firstIdentifierFlags.length && IOUtils.firstIdentifierFlags[ch];
    }

    /**
     * 标识符判断
     */
    public static boolean isIdent(char ch) {
        return ch < identifierFlags.length && identifierFlags[ch];
    }
	
    /**
     * base64解码
     *
     * @param chars The source array. Length 0 will return an empty array. <code>null</code> will throw an exception.
     * @return The decoded array of bytes. May be of length 0.
     */
    public static byte[] decodeBase64(char[] chars, int offset, int charsLen) {
        // Check special case
        if (charsLen == 0) {
            return new byte[0];
        }

        int sIx = offset, eIx = offset + charsLen - 1; // Start and end index after trimming.

        // 去除非法起始字符
        while (sIx < eIx && IA[chars[sIx]] < 0)
            sIx++;

        // 去除非法结束字符
        while (eIx > 0 && IA[chars[eIx]] < 0)
            eIx--;

        // 判断最后是一个还是两个==
        int pad = chars[eIx] == '=' ? (chars[eIx - 1] == '=' ? 2 : 1) : 0;
        // 有效字符数
        int cCnt = eIx - sIx + 1;
		// 根据RFC822规定，BASE64Encoder编码每76个字符，还需要加上一个回车换行。部分Base64编码的java库还按照这个标准实行
		// cCnt / 78表示存在多少个\r\n (76+\r\n两个字符=78)，(cCnt / 78) << 1可以统计\r\n的个数
        int sepCnt = charsLen > 76 ? (chars[76] == '\r' ? cCnt / 78 : 0) << 1 : 0;
		
        // base64每个字符表示6位信息,len表示有效的字节数
        int len = ((cCnt - sepCnt) * 6 >> 3) - pad;
        byte[] bytes = new byte[len];

        // Decode all but the last 0 - 2 bytes.
        int d = 0;
        for (int cc = 0, eLen = (len / 3) * 3; d < eLen;) {
            // Assemble three bytes into an int from four "valid" characters.
            int i = IA[chars[sIx++]] << 18 | IA[chars[sIx++]] << 12 | IA[chars[sIx++]] << 6 | IA[chars[sIx++]];

            // Add the bytes
            bytes[d++] = (byte) (i >> 16);
            bytes[d++] = (byte) (i >> 8);
            bytes[d++] = (byte) i;

            // If line separator, jump over it.
            if (sepCnt > 0 && ++cc == 19) {
                sIx += 2;
                cc = 0;
            }
        }

        if (d < len) {
            // Decode last 1-3 bytes (incl '=') into 1-3 bytes
            int i = 0;
            for (int j = 0; sIx <= eIx - pad; j++)
                i |= IA[chars[sIx++]] << (18 - j * 6);

            for (int r = 16; d < len; r -= 8)
                bytes[d++] = (byte) (i >> r);
        }

        return bytes;
    }

3.知识扩展

3.1 base64编码原理和实现

3.1.1 base64字符组成：
0-9 A-Z a-z + / 总共64个字符(需要使用6位表示，还是一个补充字符=)

3.1.2 编码原理
任何东西在计算机中都可以使用0,1表示，如果将每6个1或者0表示一个字符，就会对应多个base64字符表示。需要注意可能存在位数不是6的整数，这样会导致最后一个字符计算有误。最后一个字符可能

占2位,4位或者6位（整除）。当占用两位的时候使用==占位，当占用四位的时候使用=占位。因为一个字节占用8位，而base64一个字符占用6位。所以原数据每3个字节将转换为base64的四个字符。

3.1.3 java实现

  /**
         * byte数组中单个字节对应的字符
         */

        private static final char[] intToBase64 = {
                'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
                'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
                'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd',
                'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
                'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y',
                'z', '0', '1', '2', '3', '4', '5', '6', '7', '8',
                '9', '+', '/'
        };

        /**
         * 字符ASCII码对应intToBase64中的位置
         */
        private static final int[] base64Toint = {
                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
                -1, -1, -1, 62, -1, -1, -1, 63, 52, 53,
                54, 55, 56, 57, 58, 59, 60, 61, -1, -1,
                -1, -1, -1, -1, -1, 0, 1, 2, 3, 4,
                5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
                15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
                25, -1, -1, -1, -1, -1, -1, 26, 27, 28,
                29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
                39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
                49, 50, 51
        };

        /**
		 * 参考： https://blog.csdn.net/u013412497/article/details/51552335
         * base64加密
         * 1、将byte数据每3个字节为一组，转成4个字节，单个字节上高位补0
         * 2、每个字节上获得的数字从intToBase64中找到要转成的对应字符
         * 3、判断分组是否是3的整数倍，如果不是，末尾使用=补齐
         */
        public static String encode(byte[] a) {
            int totalLen = a.length;
            int groupNum = a.length / 3;
            int lastGroup = totalLen - groupNum * 3;
            int index = 0;
            StringBuffer result = new StringBuffer();
            // 整除部分
            for (int i = 0; i < groupNum; i++) {
                int first = a[index++] & 0xff;
                int second = a[index++] & 0xff;
                int third = a[index++] & 0xff;
                // 三个字节将扩充为4个base64字符
                result.append(intToBase64[first >> 2]);
                result.append(intToBase64[(first << 4) & 0x3f | second >> 4]);
                result.append(intToBase64[(second << 2) & 0x3f | third >> 6]);
                result.append(intToBase64[third & 0x3f]);
            }
            // 存在最后的余数
            if (lastGroup != 0) {
                int first = a[index++] & 0xff;
                result.append(intToBase64[first >> 2]);
                if (lastGroup == 1) {
                    result.append(intToBase64[(first << 4) & 0x3f]);
                    result.append("==");
                } else {
                    int second = a[index++] & 0xff;
                    result.append(intToBase64[(first << 4) & 0x3f | second >> 4]);
                    result.append(intToBase64[(second << 2) & 0x3f]);
                    result.append("=");
                }
            }
            return result.toString();
        }

4.体会

4.1 IOUtils中大量使用了空间换时间的方法，对于提升性能有一定的帮助

4.2 对于简单的求余算法，也要绞尽脑汁"凑数" 【 r = (int) (i - ((q << 6) + (q << 5) + (q << 2))); 】

5.说明

5.1 本文是原创作品，如果需要转载请说明转载位置

5.2 本人是一个小菜鸟，如果上面的观点或者解析有误的地方，请大家及时指出，谢谢^_^

fastjson源码解析(1)-工具类IOUtils

猜你喜欢