猿人学爬虫题目第一题: 《抓取所有机票价格》,该案例非常适合js新手入门。

题目链接:http://match.yuanrenxue.com/match/1
Js逆向-猿人学(1)源码混淆_js


F12打开控制台,可见debugger出现,右键选择Never pause here;

Js逆向-猿人学(1)源码混淆_js_02

Js逆向-猿人学(1)源码混淆_js_03

接下来F5,就可以跳过debugger了,查看数据包。

Js逆向-猿人学(1)源码混淆_js_04

看一下请求参数:

Js逆向-猿人学(1)源码混淆_js_05

直接点击查看Initiator进行调试。

Js逆向-猿人学(1)源码混淆_js_06

随便选一个进来断点,选择下一页触发debug

Js逆向-猿人学(1)源码混淆_js_07

没找到就点右侧的call stack,按顺序点一下看

Js逆向-猿人学(1)源码混淆_js_08
点到request看到了有一端不能格式化的代码,最好是把这个代码全部复制到html文件中,然后自己处理下。

Js逆向-猿人学(1)源码混淆_js_09

从这里可以看出:
timestamp = 13位时间戳 + 100000000;

把没用的删除掉,开始自己拼oo0O0()方法。
其中window.a:

Js逆向-猿人学(1)源码混淆_js_10

然后缺什么补什么,在控制台去找更方便一些。

Js逆向-猿人学(1)源码混淆_js_11Js逆向-猿人学(1)源码混淆_js_12


执行时如果报错:

  • ReferenceError: atob is not defined

Js逆向-猿人学(1)源码混淆_js_13

atob方法:natice code , 用于解码使用 base-64 编码的字符串。
如果有node环境的话,可以安装一下 npm install atob
(其实也可以换一种方法处理,比如返回数据后使用python来进行base64转换)

const atob = require('atob');
// npm install atob

function oo0O0(mw) {
    window = {};
    document = {};
    window.b = '';
    window.a = ' 太长了,这里不贴进来了'
    document.e = 'fromC';
    document.g = 'harCode';
    document.f = 'charCo';
    document.h = 'deAt';
    window.c = 5;
    for (var i = 0, len = window.a.length; i < len; i++) {
        window.b += String[document.e + document.g](window.a[i][document.f + document.h]() - i - window.c)
    }
    var U = ['W5r5W6VdIHZcT8kU', 'WQ8CWRaxWQirAW=='];
    var J = function (o, E) {
        o = o - 0x0;
        var N = U[o];
        if (J['bSSGte'] === undefined) {
            var Y = function (w) {
                var m = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/=',
                    T = String(w)['replace'](/=+$/, '');
                var A = '';
                for (var C = 0x0, b, W, l = 0x0; W = T['charAt'](l++); ~W && (b = C % 0x4 ? b * 0x40 + W : W, C++ % 0x4) ? A += String['fromCharCode'](0xff & b >> (-0x2 * C & 0x6)) : 0x0) {
                    W = m['indexOf'](W)
                }
                return A
            };
            var t = function (w, m) {
                var T = [], A = 0x0, C, b = '', W = '';
                w = Y(w);
                for (var R = 0x0, v = w['length']; R < v; R++) {
                    W += '%' + ('00' + w['charCodeAt'](R)['toString'](0x10))['slice'](-0x2)
                }
                w = decodeURIComponent(W);
                var l;
                for (l = 0x0; l < 0x100; l++) {
                    T[l] = l
                }
                for (l = 0x0; l < 0x100; l++) {
                    A = (A + T[l] + m['charCodeAt'](l % m['length'])) % 0x100, C = T[l], T[l] = T[A], T[A] = C
                }
                l = 0x0, A = 0x0;
                for (var L = 0x0; L < w['length']; L++) {
                    l = (l + 0x1) % 0x100, A = (A + T[l]) % 0x100, C = T[l], T[l] = T[A], T[A] = C, b += String['fromCharCode'](w['charCodeAt'](L) ^ T[(T[l] + T[A]) % 0x100])
                }
                return b
            };
            J['luAabU'] = t, J['qlVPZg'] = {}, J['bSSGte'] = !![]
        }
        var H = J['qlVPZg'][o];
        return H === undefined ? (J['TUDBIJ'] === undefined && (J['TUDBIJ'] = !![]), N = J['luAabU'](N, E), J['qlVPZg'][o] = N) : N = H, N
    };
    console.log(eval(atob(window['b'])[J('0x0', ']dQW')](J('0x1', 'GTu!'), '\x27' + mw + '\x27')));
    return ''
}
console.log(oo0O0(1611212930238))

到这里并没有结束,返回最开始,可以看到 m 其实是加密之后再加上window.f,
然后以字符串形式和 timestamp/1000进行拼接,得到最终的参数m。

Js逆向-猿人学(1)源码混淆_js_14

打印了一下windows.f 发现不是固定参数,

Js逆向-猿人学(1)源码混淆_js_15

全局中也没有搜索出来

Js逆向-猿人学(1)源码混淆_js_16

这样说明,windows.f 可能是更改了字符或者编码或者用了什么代替

试了好几个地方,才找到window.f

Js逆向-猿人学(1)源码混淆_js_17

这里打印了下 base64转码后的代码,发现是一段js,格式化之后删除无用的内容。

Js逆向-猿人学(1)源码混淆_js_18


所以在看这个图,整体的生成流程已经掌握了。
另外我修改后生成了两遍md5的值,后来发现 oo0O0()并没有返回值 = =,是我自己加上了。
并且oo0O0()和window.f相同。

Js逆向-猿人学(1)源码混淆_js_19

测试: 把 node 生成的m拿出来,直接使用python requests 请求,成功返回数据。

Js逆向-猿人学(1)源码混淆_js_20


完整js代码:

const atob = require('atob');
// npm install atob

function oo0O0(mw) {
    window = {};
    document = {};
    window.b = '';
    window.a = "!!!!!!!!!A太长了,此处省略了!!!!!!!!!!!!!!!!!"
    document.e = 'fromC';
    document.g = 'harCode';
    document.f = 'charCo';
    document.h = 'deAt';
    window.c = 5;
    for (var i = 0, len = window.a.length; i < len; i++) {
        window.b += String[document.e + document.g](window.a[i][document.f + document.h]() - i - window.c)
    }
    var U = ['W5r5W6VdIHZcT8kU', 'WQ8CWRaxWQirAW=='];
    var J = function (o, E) {
        o = o - 0x0;
        var N = U[o];
        if (J['bSSGte'] === undefined) {
            var Y = function (w) {
                var m = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/=',
                    T = String(w)['replace'](/=+$/, '');
                var A = '';
                for (var C = 0x0, b, W, l = 0x0; W = T['charAt'](l++); ~W && (b = C % 0x4 ? b * 0x40 + W : W, C++ % 0x4) ? A += String['fromCharCode'](0xff & b >> (-0x2 * C & 0x6)) : 0x0) {
                    W = m['indexOf'](W)
                }
                return A
            };
            var t = function (w, m) {
                var T = [], A = 0x0, C, b = '', W = '';
                w = Y(w);
                for (var R = 0x0, v = w['length']; R < v; R++) {
                    W += '%' + ('00' + w['charCodeAt'](R)['toString'](0x10))['slice'](-0x2)
                }
                w = decodeURIComponent(W);
                var l;
                for (l = 0x0; l < 0x100; l++) {
                    T[l] = l
                }
                for (l = 0x0; l < 0x100; l++) {
                    A = (A + T[l] + m['charCodeAt'](l % m['length'])) % 0x100, C = T[l], T[l] = T[A], T[A] = C
                }
                l = 0x0, A = 0x0;
                for (var L = 0x0; L < w['length']; L++) {
                    l = (l + 0x1) % 0x100, A = (A + T[l]) % 0x100, C = T[l], T[l] = T[A], T[A] = C, b += String['fromCharCode'](w['charCodeAt'](L) ^ T[(T[l] + T[A]) % 0x100])
                }
                return b
            };
            J['luAabU'] = t, J['qlVPZg'] = {}, J['bSSGte'] = !![]
        }
        var H = J['qlVPZg'][o];
        return H === undefined ? (J['TUDBIJ'] === undefined && (J['TUDBIJ'] = !![]), N = J['luAabU'](N, E), J['qlVPZg'][o] = N) : N = H, N
    };

    result = eval(atob(window['b'])[J('0x0', ']dQW')](J('0x1', 'GTu!'), '\x27' + mw + '\x27'));
    return result;

}

function getM() {
    var timestamp = Date.parse(new Date()) + 100000000;
    var m = oo0O0(timestamp.toString());
    var result = m + '丨' + timestamp / 1000;
    console.log(result);
    return result
};

调用 getM()方法即可返回m即可。