目的

获取1688工厂名片的相关信息

详细需求

一、进入1688网站

https://www.1688.com/

二、使用“工厂”这个搜索框

三、输入工厂名称进行搜索,如“深圳市杰之美时装有限公司”

1688 获取 cookies 1688网址怎么进_js逆向

四、返回搜索结果,并获取逐个店铺/工厂的连接

1688 获取 cookies 1688网址怎么进_工厂_02

五、获取有关数据

1688 获取 cookies 1688网址怎么进_js逆向_03


1688 获取 cookies 1688网址怎么进_js逆向_04

思路解析

一、搜索关键词,获取返回网页中的工厂ID

1688 获取 cookies 1688网址怎么进_工厂_05


二、链接拼接-进入工厂名片详情页

1688 获取 cookies 1688网址怎么进_1688 获取 cookies_06


三、目标信息定位

1688 获取 cookies 1688网址怎么进_js逆向_07


1688 获取 cookies 1688网址怎么进_python_08

1688 获取 cookies 1688网址怎么进_1688 获取 cookies_09


四、模拟构建请求

1688 获取 cookies 1688网址怎么进_工厂_10


这里的难点就是sign值的获取

1688 获取 cookies 1688网址怎么进_js逆向_11


1688 获取 cookies 1688网址怎么进_python_12


1688 获取 cookies 1688网址怎么进_1688_13

1688 获取 cookies 1688网址怎么进_工厂_14


1688 获取 cookies 1688网址怎么进_1688_15

五、思路汇总

1.请求工厂关键词-解析得到工厂ID
2.需要进行三个json请求
2.1. json1获取工厂基本信息
2.2  json7获取粉丝数
2.3  json2获取生产实力
3.获取cookie(这里直接复制使用的,具有时效性,会过期)
4.对每个json请求的data进行处理
5.拼接完成后,交给本地js进行处理得到sign值
6.构建请求
7.信息提取与打印

源码

1688.js

function u(e) {
    function t(e, t) {
        return e << t | e >>> 32 - t
    }

    function n(e, t) {
        var n, o, r, i, s;
        return r = 2147483648 & e,
            i = 2147483648 & t,
            n = 1073741824 & e,
            o = 1073741824 & t,
            s = (1073741823 & e) + (1073741823 & t),
            n & o ? 2147483648 ^ s ^ r ^ i : n | o ? 1073741824 & s ? 3221225472 ^ s ^ r ^ i : 1073741824 ^ s ^ r ^ i : s ^ r ^ i
    }

    function o(e, t, n) {
        return e & t | ~e & n
    }

    function r(e, t, n) {
        return e & n | t & ~n
    }

    function i(e, t, n) {
        return e ^ t ^ n
    }

    function s(e, t, n) {
        return t ^ (e | ~n)
    }

    function a(e, r, i, s, a, p, u) {
        return e = n(e, n(n(o(r, i, s), a), u)),
            n(t(e, p), r)
    }

    function p(e, o, i, s, a, p, u) {
        return e = n(e, n(n(r(o, i, s), a), u)),
            n(t(e, p), o)
    }

    function u(e, o, r, s, a, p, u) {
        return e = n(e, n(n(i(o, r, s), a), u)),
            n(t(e, p), o)
    }

    function c(e, o, r, i, a, p, u) {
        return e = n(e, n(n(s(o, r, i), a), u)),
            n(t(e, p), o)
    }

    function d(e) {
        for (var t, n = e.length, o = n + 8, r = (o - o % 64) / 64, i = 16 * (r + 1), s = new Array(i - 1), a = 0, p = 0; n > p;)
            t = (p - p % 4) / 4,
                a = p % 4 * 8,
                s[t] = s[t] | e.charCodeAt(p) << a,
                p++;
        return t = (p - p % 4) / 4,
            a = p % 4 * 8,
            s[t] = s[t] | 128 << a,
            s[i - 2] = n << 3,
            s[i - 1] = n >>> 29,
            s
    }

    function l(e) {
        var t, n, o = "", r = "";
        for (n = 0; 3 >= n; n++)
            t = e >>> 8 * n & 255,
                r = "0" + t.toString(16),
                o += r.substr(r.length - 2, 2);
        return o
    }

    function f(e) {
        e = e.replace(/\r\n/g, "\n");
        for (var t = "", n = 0; n < e.length; n++) {
            var o = e.charCodeAt(n);
            128 > o ? t += String.fromCharCode(o) : o > 127 && 2048 > o ? (t += String.fromCharCode(o >> 6 | 192),
                t += String.fromCharCode(63 & o | 128)) : (t += String.fromCharCode(o >> 12 | 224),
                t += String.fromCharCode(o >> 6 & 63 | 128),
                t += String.fromCharCode(63 & o | 128))
        }
        return t
    }

    var m, h, g, _, y, v, R, S, w, O = [], E = 7, A = 12, q = 17, b = 22, T = 5, x = 9, N = 14, C = 20, k = 4, J = 11,
        P = 16, L = 23, I = 6, D = 10, j = 15, W = 21;
    for (e = f(e),
             O = d(e),
             v = 1732584193,
             R = 4023233417,
             S = 2562383102,
             w = 271733878,
             m = 0; m < O.length; m += 16)
        h = v,
            g = R,
            _ = S,
            y = w,
            v = a(v, R, S, w, O[m + 0], E, 3614090360),
            w = a(w, v, R, S, O[m + 1], A, 3905402710),
            S = a(S, w, v, R, O[m + 2], q, 606105819),
            R = a(R, S, w, v, O[m + 3], b, 3250441966),
            v = a(v, R, S, w, O[m + 4], E, 4118548399),
            w = a(w, v, R, S, O[m + 5], A, 1200080426),
            S = a(S, w, v, R, O[m + 6], q, 2821735955),
            R = a(R, S, w, v, O[m + 7], b, 4249261313),
            v = a(v, R, S, w, O[m + 8], E, 1770035416),
            w = a(w, v, R, S, O[m + 9], A, 2336552879),
            S = a(S, w, v, R, O[m + 10], q, 4294925233),
            R = a(R, S, w, v, O[m + 11], b, 2304563134),
            v = a(v, R, S, w, O[m + 12], E, 1804603682),
            w = a(w, v, R, S, O[m + 13], A, 4254626195),
            S = a(S, w, v, R, O[m + 14], q, 2792965006),
            R = a(R, S, w, v, O[m + 15], b, 1236535329),
            v = p(v, R, S, w, O[m + 1], T, 4129170786),
            w = p(w, v, R, S, O[m + 6], x, 3225465664),
            S = p(S, w, v, R, O[m + 11], N, 643717713),
            R = p(R, S, w, v, O[m + 0], C, 3921069994),
            v = p(v, R, S, w, O[m + 5], T, 3593408605),
            w = p(w, v, R, S, O[m + 10], x, 38016083),
            S = p(S, w, v, R, O[m + 15], N, 3634488961),
            R = p(R, S, w, v, O[m + 4], C, 3889429448),
            v = p(v, R, S, w, O[m + 9], T, 568446438),
            w = p(w, v, R, S, O[m + 14], x, 3275163606),
            S = p(S, w, v, R, O[m + 3], N, 4107603335),
            R = p(R, S, w, v, O[m + 8], C, 1163531501),
            v = p(v, R, S, w, O[m + 13], T, 2850285829),
            w = p(w, v, R, S, O[m + 2], x, 4243563512),
            S = p(S, w, v, R, O[m + 7], N, 1735328473),
            R = p(R, S, w, v, O[m + 12], C, 2368359562),
            v = u(v, R, S, w, O[m + 5], k, 4294588738),
            w = u(w, v, R, S, O[m + 8], J, 2272392833),
            S = u(S, w, v, R, O[m + 11], P, 1839030562),
            R = u(R, S, w, v, O[m + 14], L, 4259657740),
            v = u(v, R, S, w, O[m + 1], k, 2763975236),
            w = u(w, v, R, S, O[m + 4], J, 1272893353),
            S = u(S, w, v, R, O[m + 7], P, 4139469664),
            R = u(R, S, w, v, O[m + 10], L, 3200236656),
            v = u(v, R, S, w, O[m + 13], k, 681279174),
            w = u(w, v, R, S, O[m + 0], J, 3936430074),
            S = u(S, w, v, R, O[m + 3], P, 3572445317),
            R = u(R, S, w, v, O[m + 6], L, 76029189),
            v = u(v, R, S, w, O[m + 9], k, 3654602809),
            w = u(w, v, R, S, O[m + 12], J, 3873151461),
            S = u(S, w, v, R, O[m + 15], P, 530742520),
            R = u(R, S, w, v, O[m + 2], L, 3299628645),
            v = c(v, R, S, w, O[m + 0], I, 4096336452),
            w = c(w, v, R, S, O[m + 7], D, 1126891415),
            S = c(S, w, v, R, O[m + 14], j, 2878612391),
            R = c(R, S, w, v, O[m + 5], W, 4237533241),
            v = c(v, R, S, w, O[m + 12], I, 1700485571),
            w = c(w, v, R, S, O[m + 3], D, 2399980690),
            S = c(S, w, v, R, O[m + 10], j, 4293915773),
            R = c(R, S, w, v, O[m + 1], W, 2240044497),
            v = c(v, R, S, w, O[m + 8], I, 1873313359),
            w = c(w, v, R, S, O[m + 15], D, 4264355552),
            S = c(S, w, v, R, O[m + 6], j, 2734768916),
            R = c(R, S, w, v, O[m + 13], W, 1309151649),
            v = c(v, R, S, w, O[m + 4], I, 4149444226),
            w = c(w, v, R, S, O[m + 11], D, 3174756917),
            S = c(S, w, v, R, O[m + 2], j, 718787259),
            R = c(R, S, w, v, O[m + 9], W, 3951481745),
            v = n(v, h),
            R = n(R, g),
            S = n(S, _),
            w = n(w, y);
    var H = l(v) + l(R) + l(S) + l(w);
    return H.toLowerCase()
}
// console.log(a)
// console.log(p)

// var e='5970c860dcff67864c7b1912bd984ee9&1602670696727&12574478&{"cid":"TpFacCoreInfosService:TpFacCoreInfosService","methodName":"execute","params":"{\"facAliId\":\"2019106328\"}"}'
// var e='87ce3da8b6f5218713fc35e8b9d9d7de&1602738209823&12574478&{"cid":"TpFacCoreInfosService:TpFacCoreInfosService","methodName":"execute","params":"{\\"facAliId\\":\\"2019106328\\"}"}'
// var a = (new Date).getTime()
// // var a='1602810824704'
// var s='12574478'
// var token='8a0539ff99e9241f36538eee5f490e48'
// var data='{"cid":"TpFacCoreInfosService:TpFacCoreInfosService","methodName":"execute","params":"{\\"facAliId\\":\\"2019106328\\"}"}'
//
// p = (token + "&" + a + "&" + s + "&" + data)
// console.log(a)
// console.log(u(p))
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Author  : jia666
# @Time    : 2020/10/15 17:07


import re
import time
import requests
import execjs
from urllib import parse

class S1688(object):
    def __init__(self, word):

        self.head = {
          'cookie':'cookie2=15b9e05276e7c4893f44f1509e7846f6; t=2189917eb616063e3da62aa4f41673d9; _tb_token_=7b657778a3376; __cn_logon__=false; cna=dFkQGGsGrnACARsm+oOaXAW/; xlly_s=1; h_keys="%u6df1%u5733%u5e02%u6770%u4e4b%u7f8e%u65f6%u88c5%u6709%u9650%u516c%u53f8"; _csrf_token=1603072773208; alicnweb=touch_tb_at%3D1603088557128; _m_h5_tk=bd327dc391fc8b112121750e88805f27_1603099001009; _m_h5_tk_enc=7095c3c810519260432599239fa2c840; ad_prefer="2020/10/19 14:23:12"; isg=BCcnARTANLN3qbA-Gyksu_xDtlvxrPuOwp-EYfmU17bd6EeqAX-p3mWpCuj2ANMG; l=eBEQxAnqOmPZIkKZBO5CFurza779uIRb4sPzaNbMiInca10FaF6eCNQVOwXJudtjgtCUIetybUMLyRLHR3fRwxDDB5JEV7vS3xvO.; tfstk=cJ6cBRAuSsRjwbAlO-9fv3snaPbcaGoykdJPUT8LyYvDAYBJ0s4xaXPjD0YAoOh1.',
          "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36"
        }
        self.word = word  # 工厂关键词
        self.api = 'https://h5api.m.1688.com/h5/mtop.taobao.widgetservice.getjsoncomponent/1.0/?'

    def Main(self):
        self.Get_word()  # 一、关键词处理-获取工厂ID
        self.Get_mtopjsonp1()  # 二、获取合作伙伴地址等信息
        self.Get_mtopjsonp2()  # 三、获取生产实力
        self.Get_mtopjsonp7()  # 四、获取粉丝数
        self.RE_html()  # 五、正则提取与整理

    def Get_html(self, url):
        '请求网页,返回文本'
        req = requests.get(url, headers=self.head) #网页请求
        html = re.sub('\s', '', req.text)   #去除多余空格
        return html

    def Init_js(self, data):
        token = re.findall('_m_h5_tk=(.*?)_', self.head['cookie'], re.S)[0]  # token值
        s = '12574478'                  #固定参数
        self.a = str(int(time.time() * 1000)) #时间戳

        p = (token + "&" + self.a + "&" + s + "&" + data)   #参数整理
        with open('1688.js', 'r', encoding='utf-8') as f:   #加载js
            ctx = execjs.compile(f.read())
        self.sign = ctx.call('u', p)                #执行sign生成函数获取sign值

    def Get_word(self):
        '获取工厂ID'
        url = 'https://s.1688.com/company/company_search.htm?keywords={}&charset=utf8'.format(self.word)
        # print(url)
        req = self.Get_html(url)
        # print(req)
        self.factory_id = re.findall('"realUserId":"(.*?)"', req, re.S)[0]   #获取工厂ID

    def Get_mtopjsonp1(self):

        data = '{"cid":"TpFacCoreInfosService:TpFacCoreInfosService","methodName":"execute","params":"{\\"facAliId\\":\\"' + str(
            self.factory_id) + '\\"}"}'  # 参数更新

        url=self.Get_url(data,1)
        # print(url)
        self.html1 = self.Get_html(url) #请求网页返回的文本
        # print(self.html1)
        self.Check_html(self.html1) #检查cookie是否正常

    def Check_html(self, html):
        k = re.findall('令牌过期', html, re.S)
        if k:
            print('令牌过期,更新cookie后重试')

    def Get_url(self,data,n):
        self.Init_js(str(data))  # js获取
        parms = 'jsv=2.6.0&appKey=12574478&t={}&sign={}&api=mtop.taobao.widgetService.getJsonComponent&v=1.0&type=jsonp&timeout=5000&dataType=jsonp&callback=mtopjsonp{}&'.format(
            self.a, self.sign,n)
        sdata = parse.quote(str(data))  # quote()将字符串进行编码
        url = self.api + parms + 'data=' + sdata
        return url

    def Get_mtopjsonp2(self):
        data = {"cid": "FactoryStrengthServiceWidget:FactoryStrengthServiceWidget", "methodName": "execute"}
        k = "{\"extParam\":{\"factoryUserId\":\"%s\"}}" % (self.factory_id)#参数修改
        data.update(({'params': k}))#字典更新

        url=self.Get_url(data,2)

        self.html2 = self.Get_html(url)

        self.Check_html(self.html2)

    def Get_mtopjsonp7(self):

        data = {'cid': 'ShopFavouriteServiceWidget:ShopFavouriteServiceWidget', 'methodName': 'execute'}
        k = '{"extParams":{"method":"readFavourite","targetUserId":"%s"}}' % (self.factory_id)
        data.update(({'params': k}))

        url=self.Get_url(data,7)

        self.html7 = self.Get_html(url)

        self.Check_html(self.html7)

    def RE_html(self):
        facName = re.findall('"facName":"(.*?)"', self.html1, re.S)[0]  # 工厂名称
        data = re.findall('"data":"(.*?)"', self.html1, re.S)  # 数据

        comment = re.findall('"desc":"(.*?)"', self.html1, re.S)  # 备注

        factoryPv = re.findall('"factoryPv":"(.*?)"', self.html1, re.S)[0]  # 浏览数
        address = re.findall('"factoryDetailedAddress":"(.*?)"', self.html1, re.S)[0]  # 地址

        favCount = re.findall('"favCount":"(.*?)"', self.html7, re.S)[0]  # 粉丝数

        k = re.findall('"value":"(.*?)"', self.html2, re.S)  # 生产实力
        s = ''
        for i, com in enumerate(comment):
            if i==0:
                t='\t'
            else:
                t='%\t'
            s += str(com) + ':' + str(data[i]) +t

        s += '粉丝数:' + favCount + '\t' + '浏览数:' + factoryPv + '\t'

        h = '厂房面积' + k[0] + '平方' + '\t' + \
            '生产人数' + k[1] + '人' + '\t' + \
            '设备总数' + k[2] + '台' + '\t' + \
            '仓储类型' + k[3] + '\t' + \
            '加工方式' + k[4] + '\t' + \
            '代工模式' + k[5] + '\t' + \
            '质检类型' + k[6] + '\t' + \
            '售后服务' + k[7] + '\t'
        print(facName + '\n' + '*' * 50 + '\n' + s + '\n' + address + '\n' + '-' * 50 + '\n' + h)


if __name__ == '__main__':
    word = '深圳市杰之美时装有限公司'
    ex = S1688(word)
    ex.Main()

实现效果

注意:cookie需要手动更新

1688 获取 cookies 1688网址怎么进_python_16