Original link:
http://www.cnblogs.com/javastart/p/10237766.html
Statistical 1.JS length input string in UTF-8 encoding of
/** * Calculation bytes of memory occupied by the string, default UTF-8 encoding calculation can be formulated as UTF-16 * UTF-8 is a variable-length Unicode encoding format, using one to four bytes per character encoding * * 000000 - 00007F (128 codes) 0zzzzzzz (00-7F) Byte a * 000080 - 0007FF (1920 codes) 110yyyyy (C0-DF) 10zzzzzz (80-BF) two bytes * 000800 - 00D7FF 00E000 - 00FFFF (61440 codes) 1110xxxx (E0-EF) 10yyyyyy 10zzzzzz three bytes * 010000 - 10FFFF (1048576 codes) 11110www (F0-F7) 10xxxxxx 10yyyyyy 10zzzzzz four bytes * * Note: Unicode there are no characters in the range D800-DFFF * {@link <a onclick="javascript:pageTracker._trackPageview('/outgoing/zh.wikipedia.org/wiki/UTF-8');" href="http://zh.wikipedia.org/wiki/UTF-8">http://zh.wikipedia.org/wiki/UTF-8</a>} * * UTF-16 encoding most of two bytes, that out using four bytes 65535 * 000000 - 00FFFF two bytes * 010000 - 10FFFF four bytes * * {@link <a onclick="javascript:pageTracker._trackPageview('/outgoing/zh.wikipedia.org/wiki/UTF-16');" href="http://zh.wikipedia.org/wiki/UTF-16">http://zh.wikipedia.org/wiki/UTF-16</a>} * @param {String} str * @param {String} charset utf-8, utf-16 * @return {Number} */ var sizeof = function (str, charset) { var total = 0, charCode, i, only; charset = charset ? charset.toLowerCase() : ''; if (charset === 'utf-16' || charset === 'utf16') { for (i = 0, len = str.length; i < len; i++) { charCode = str.charCodeAt(i); if (charCode <= 0xffff) { total += 2; } else { total += 4; } } } else { for (i = 0, len = str.length; i < len; i++) { charCode = str.charCodeAt(i); if (charCode <= 0x007f) { total += 1; } else if (charCode <= 0x07ff) { total += 2; } else if (charCode <= 0xffff) { total += 3; } else { total += 4; } } } return total; };
https://www.cnblogs.com/dwade/articles/alen.html
Reproduced in: https: //www.cnblogs.com/javastart/p/10237766.html