java采集腾讯动漫

腾讯动漫章节地址结构为:

http://ac.qq.com/ComicView/index/id/$id/cid/$cid

$id为漫画id,$cid为章节id

在页码源码后面变量DATA储存了章节图片等详细信息:

var DATA        = 'LeyJjb21pYyI6eyJpZCI6NTI0MzU2LCJ0a...',
    

 DATA需要重新解码为json数据,解码函数在

<script src="http://ac.gtimg.com/media/js/ac.page.chapter.view_v2.3.5.js?v=20160826"></script>

 核心解码函数为:

  eval(function (p, a, c, k, e, r) {
    e = function (c) {
      return (c < a ? '' : e(parseInt(c / a))) + ((c = c % a) > 35 ? String.fromCharCode(c + 29)  : c.toString(36))
    };
    if (!''.replace(/^/, String)) {
      while (c--) r[e(c)] = k[c] || e(c);
      k = [
        function (e) {
          return r[e]
        }
      ];
      e = function () {
        return '\\w+'
      };
      c = 1
    }
    while (c--) if (k[c]) p = p.replace(new RegExp('\\b' + e(c) + '\\b', 'g'), k[c]);
    return p
  }('p s(){i="C+/=";H.q=p(c){o a="",b,d,h,f,g,e=0;z(c=c.J(/[^A-L-M-9\\+\\/\\=]/g,"");e<c.r;)b=i.l(c.k(e++)),d=i.l(c.k(e++)),f=i.l(c.k(e++)),g=i.l(c.k(e++)),b=b<<2|d>>4,d=(d&t)<<4|f>>2,h=(f&3)<<6|g,a+=5.7(b),w!=f&&(a+=5.7(d)),w!=g&&(a+=5.7(h));n a=y(a)};y=p(c){z(o a="",b=0,d=D=8=0;b<c.r;)d=c.j(b),E>d?(a+=5.7(d),b++):F<d&&G>d?(8=c.j(b+1),a+=5.7((d&I)<<6|8&m),b+=2):(8=c.j(b+1),x=c.j(b+2),a+=5.7((d&t)<<K|(8&m)<<6|x&m),b+=3);n a}}o B=v s;u=(v N("n "+B.q(u.O(1))))();', 51, 51, '|||||String||fromCharCode|c2||||||||||_keyStr|charCodeAt|charAt|indexOf|63|return|var|function|decode|length|Base|15|DATA|new|64|c3|_utf8_decode|for|||ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789|c1|128|191|224|this|31|replace|12|Za|z0|Function|substring'.split('|'), 0, {
  }))

 实际解析的代码为:

function Base() {
  _keyStr = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=';
  this.decode = function (c) {
    var a = '',
    b,
    d,
    h,
    f,
    g,
    e = 0;
    for (c = c.replace(/[^A-Za-z0-9\+\/\=]/g, ''); e < c.length; ) b = _keyStr.indexOf(c.charAt(e++)),
    d = _keyStr.indexOf(c.charAt(e++)),
    f = _keyStr.indexOf(c.charAt(e++)),
    g = _keyStr.indexOf(c.charAt(e++)),
    b = b << 2 | d >> 4,
    d = (d & 15) << 4 | f >> 2,
    h = (f & 3) << 6 | g,
    a += String.fromCharCode(b),
    64 != f && (a += String.fromCharCode(d)),
    64 != g && (a += String.fromCharCode(h));
    return a = _utf8_decode(a)
  };
  _utf8_decode = function (c) {
    for (var a = '', b = 0, d = c1 = c2 = 0; b < c.length; ) d = c.charCodeAt(b),
    128 > d ? (a += String.fromCharCode(d), b++)  : 191 < d && 224 > d ? (c2 = c.charCodeAt(b + 1), a += String.fromCharCode((d & 31) << 6 | c2 & 63), b += 2)  : (c2 = c.charCodeAt(b + 1), c3 = c.charCodeAt(b + 2), a += String.fromCharCode((d & 15) << 12 | (c2 & 63) << 6 | c3 & 63), b += 3);
    return a
  }
}
var B = new Base;
DATA = (new Function('return ' + B.decode(DATA.substring(1)))) ();

 Base转换为java代码:

	private static class Base
	{
		String _keyStr = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";

		public String decode(String c)
		{
			String a = "";
			int b, d, h, f, g, e = 0;
			for (c = c.replace("/[^A-Za-z0-9\\+\\/\\=]/g", ""); e < c.length();)
			{
				b = _keyStr.indexOf(c.charAt(e++));
				d = _keyStr.indexOf(c.charAt(e++));
				f = _keyStr.indexOf(c.charAt(e++));
				g = _keyStr.indexOf(c.charAt(e++));
				b = b << 2 | d >> 4;
				d = (d & 15) << 4 | f >> 2;
				h = (f & 3) << 6 | g;
				a += ((char) b + "");
				if (64 != f)
				{
					a += ((char) d + "");
				}
				if (64 != g)
				{
					a += ((char) h + "");
				}
			}
			a = _utf8_decode(a);
			return a;
		}

		public String _utf8_decode(String c)
		{
			String a = "";
			for (int b = 0, d, c1, c2 = 0; b < c.length();)
			{
				d = c.charAt(b);
				if (128 > d)
				{
					a += ((char) d + "");
					b++;
				} else
				{
					if (191 < d)
					{
						if (224 > d)
						{
							c2 = c.charAt(b + 1);
							a += String.valueOf((char) ((d & 31) << 6 | c2 & 63));
							b += 2;
						} else
						{
							c2 = c.charAt(b + 1);
							char c3 = c.charAt(b + 2);
							a += String.valueOf((char) (d & 15) << 12 | (c2 & 63) << 6 | c3 & 63);
							b += 3;
						}
					}
				}
			}
			return a;
		}
	}

猜你喜欢

转载自zhhaogen.iteye.com/blog/2341056