猿问

curl如何抓取使用加速乐服务的站点?

很多网站使用了CDN服务,例如:seebug.(org)

其中使用加速乐的站点浏览器可以正常访问,但如果使用curl返回的是一段加密过的JS,分析后是因为Cookie的原因,如果浏览器访问会有两个cookie,并且是动态的cookie。用curl访问只有一个cookie,返回的JS里有相关的加密代码,如:

var x = "toString@_phantom@Sat@17@Expires@YCTTvH@1@2@i@function@1500@href@div@innerHTML@window@Ge3@3@f@__phantomas@challenge@replace@dc@createElement@r@location@GK@jziJ@cd@for@a@setTimeout@257@G2@3D@join@charAt@length@04@firstChild@GMT@36@while@document@match@var@__jsl_clearance@aF@toLowerCase@46@54@30@x@substr@l@return@parseInt@https@cookie@Sep@h@1506743214@Path@captcha@0@if@try@addEventListener@catch@e@false@DOMContentLoaded@else@attachEvent@onreadystatechange".replace(/@*$/, "").split("@"),
    y = "1k 24=a(){1h(f.2||f.j){};1k 13,m='1l=2b.17|2e|';1k i=[a(22){25 22},a(22){25 22;},(a(){1k 2a=1i.n('d');2a.e='<15 c=\\'/\\'>22</15>';2a=2a.1e.c;1k o=2a.1j(/27?:\\/\\//)[2e];2a=2a.23(o.1c).1n();25 a(22){14(1k 9=2e;9<22.1c;9++){22[9]=2a.1b(22[9])};25 22.1a('')}})(),a(22){14(1k 9=2e;9<22.1c;9++){22[9]=26(22[9]).1(1g)};25 22.1a('')}];13=[[-~(-~[8])],'12',(((+!+{})+[-~(+!+{})])/[-~(+!+{})]+[]+[[]][~~{}]),'g',[(-~(+!+{})+[])+[([-~(+!+{})]+~~{}>>-~(+!+{}))]],[[((-~[]<<-~[]))*[((+!+{})+(+!+{}))*[(+!+{})+(+!+{})]]]],'11',[-~(-~[8])],'1m',((((+!+{})+(+!+{})^-~~~!{}))/~~[]+[[[]][~~[]], []][-~[]]).1b(~~{}),'18',((((+!+{})+(+!+{})^-~~~!{}))/~~[]+[[[]][~~[]], []][-~[]]).1b(~~{}),'6',[(-~(+!+{})+[])+[(+[])]],'%19'];14(1k 9=2e;9<13.1c;9++){13[9]=i[[2e,7,2e,7,h,8,7,2e,7,2e,7,2e,7,h,7][9]](13[9])};13=13.1a('');m+=13;16('10.c=10.c.l(/[\\?|&]2d-k/,\\'\\')',b);1i.28=(m+';5=3, 21-29-4 1d:1o:20 1f;2c=/;');};2f((a(){2g{25 !!f.2h;}2i(2j){25 2k;}})()){1i.2h('2l',24,2k);}2m{1i.2n('2o',24);}",
    z = 0,
    f = function(x, y) {
        var a = 0,
            b = 0,
            c = 0;
        x = x.split("");
        y = y || 99;
        while ((a = x.shift()) && (b = a.charCodeAt(0) - 77.5)) c = (Math.abs(b) < 13 ? (b + 48.5) : parseInt(a, 36)) + y * c;
        return c
    },
    g = y.match(/\b\w+\b/g).sort(function(x, y) {
        return f(x) - f(y)
    }).pop();
while (f(g, ++z) - x.length) {};
eval(y.replace(/\b\w+\b/g, function(y) {
    return x[f(y, z) - 1]
}));

上边提取eval后的内容为:

var l = function() {
        while (window._phantom || window.__phantomas) {};
        var cd, dc = '__jsl_clearance=1506744182.019|0|';
        var f = [function(x) {
            return x
        }, function(x) {
            return x;
        }, (function() {
            var h = document.createElement('div');
            h.innerHTML = '<a href=\'/\'>x</a>';
            h = h.firstChild.href;
            var r = h.match(/https?:\/\//)[0];
            h = h.substr(r.length).toLowerCase();
            return function(x) {
                for (var i = 0; i < x.length; i++) {
                    x[i] = h.charAt(x[i])
                };
                return x.join('')
            }
        })(), function(x) {
            for (var i = 0; i < x.length; i++) {
                x[i] = parseInt(x[i]).toString(36)
            };
            return x.join('')
        }];
        cd = ['M', [
            [~~ {}]
        ], '5XX', [((+!~~ []) + [![],
            []
        ][-~ {}]) + ((-~ {} << -~ {}) + [
            []
        ][~~ []])], '%', ((-~ {} << -~ {}) + [
            []
        ][~~ []]), 'BJU', [((+!~~ []) + [![],
            []
        ][-~ {}]) + [-~ [(-~ {} << -~ {})]]], 'E', [((-~ {} << -~ {}) + [
            []
        ][~~ []]) + ((-~ {} << -~ {}) + [
            []
        ][~~ []])], 'A', [((+!~~ []) + [![],
            []
        ][-~ {}]) + [5]], 'A7', [((-~ {} << -~ {}) + [
            []
        ][~~ []]) + [-~ (((+!~~ []) | -~ {} - ~ {}) + (-~ {} << -~ {}) + (-~ {} - ~ {} ^ -~!{}))]], 'h', [((-~ {} << -~ {}) + [
            []
        ][~~ []]) + (-~~~ [] + (-~ {} << -~ {}) - ~~~ [] + (-~ {} << -~ {}) + [
            [],
            {}][~~!{}])], 'hg', ({} + [] + []).charAt(-~ {} - ~ {} + 6), 'U', [((-~ {} << -~ {}) + [
            []
        ][~~ []]) + [5]], 'FyQ%', [-~ [(-~ {} << -~ {})]], 'D'];
        for (var i = 0; i < cd.length; i++) {
            cd[i] = f[[1, 2, 1, 3, 1, 0, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 0, 1, 3, 1, 0, 1][i]](cd[i])
        };
        cd = cd.join('');
        dc += cd;
        setTimeout('location.href=location.href.replace(/[\?|&]captcha-challenge/,\'\')', 1500);
        document.cookie = (dc + ';Expires=Sat, 30-Sep-17 05:03:02 GMT;Path=/;');
    };
if ((function() {
    try {
        return !!window.addEventListener;
    } catch (e) {
        return false;
    }
})()) {
    document.addEventListener('DOMContentLoaded', l, false);
} else {
    document.attachEvent('onreadystatechange', l);
}

可看到DC即是我们要的第二个cookie,

下面是重点:我使用的是PHP curl,我要如何在PHP里执行JS? 并提取上面解密后的JS结果?

我的思路是:解密JS并执行JS然后进行Cookie组合发送请求,应该就能过。但不知道PHP如何执行上面的的JS并得到结果。求解,感谢!!!

子衿沉夜
浏览 467回答 3
3回答
随时随地看视频慕课网APP
我要回答