js中的UTF-8编码与解码

utf-8编码规则计算出一个数字二进制,然后根据它的二进制长度把它们塞到一个特殊序列的二进制中:
1字节 0xxxxxxx (0-127)
2字节 110xxxxx 10xxxxxx (128-2047)
3字节 1110xxxx 10xxxxxx 10xxxxxx (2048-65535)
4字节 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx (65536-1114111)
5字节 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
6字节 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
先来看看utf-8编码:

encode: function(str) {
        var rs = '';
        for(var i of str) {
            var code = i.codePointAt(0);
                if(code < 128) {
                    rs += i;
                } else if(code > 127 && code < 2048) {
                    rs += String.fromCharCode((code >> 6) | 192, (code & 63) | 128);
                } else if(code > 2047 && code < 65536) {
                    rs += String.fromCharCode((code >> 12) | 224, ((code >> 6) & 63) | 128, (code & 63) | 128);
                } else if(code > 65536 && code < 1114112) {
                    rs += String.fromCharCode((code >> 18) | 240, ((code >> 12) & 63) | 128, ((code >> 6) & 63) | 128, (code & 63) | 128);
            }
        }
        console.log(rs);
        return rs;
    },

再来看看utf-8解码:

decode: function(str) {
        var rs = '';
            for(var i = 0; i < str.length; i++) {
                var code = str.charCodeAt(i);
                console.log(code);
                if((240 & code) == 240) {
                    var code1 = str.charCodeAt(i + 1),
                    code2 = str.charCodeAt(i + 2),
                    code3 = str.charCodeAt(i + 3);
                    rs += String.fromCodePoint(((code & 7) << 18) | ((code1 & 63) << 12) | ((code2 & 63) << 6) | (code3 & 63));
                    i += 3;
                } else if((224 & code) == 224) {
                    var code1 = str.charCodeAt(i + 1),
                    code2 = str.charCodeAt(i + 2);
                    rs += String.fromCodePoint(((code & 15) << 12) | ((code1 & 63) << 6) | (code2 & 63));
                    i += 2;
                } else if((192 & code) == 192) {
                    var code1 = str.charCodeAt(i + 1);
                    rs += String.fromCodePoint(((code & 31) << 6) | (code1 & 63));
                    i++;
                } else if((128 & code) == 0) {
                    rs += String.fromCharCode(code);
                }
            }
            console.log(rs);
        }

更多关于utf-8编码/解码内容,请看原文:
js中的UTF-8编码与解码
里面详解讲解了代码具体含义。

猜你喜欢

转载自blog.csdn.net/weixin_40863414/article/details/81351225