记一次淘宝接口sign签名破解

淘宝H5页面接口Sign签名破解

前言:由于公司业务需求,需要显示淘宝商品详情页面的图文详情部分。之前原业务逻辑是根据https://world.taobao.com/item/{num_iid}.htm,在得到html文本后全量通过图片前缀名如https://img.alicdn.com/imgextra进行抓取,返回给客户端。

由于header中的UA传的一个PC端的UA,所以获取的H5数据中,可能含有一些广告,商家之类自己的布局,从而无法在客户端正常展示。另一方面,该接口为高频接口,打开htm页面每次耗时70~80%,严重影响客户端体验,所以对此将接口进行升级。

后来发现手机上打开该页面是不显示一些无关的广告的,而且布局也正常。于是将UA改为了手机端的UA,然后通过抓包分析,返回的页面是一个不包含图片的html,图文详情是根据js接口调用动态渲染。接口为https://h5api.m.taobao.com/h5/mtop.wdetail.getitemdescx/4.9/?jsv=2.4.11&appKey=12574478&t=1544165283634&sign=c691925755f2ff429c950f7b00903a23&api=mtop.wdetail.getItemDescx&v=4.9&type=jsonp&dataType=jsonp&callback=mtopjsonp6&data=%7B%22item_num_id%22%3A%22554401452734%22%7D。看到参数中包含了sign,所以是根据某种规则算出来的签名。

接着就是找到sign的签名算法了,将页面中的js格式化了一下。找到关于sign签名的部分。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
s.prototype.__processRequestUrl = function(n) {
{var e = this.params,
r = this.options;
if (r.hostSetting && r.hostSetting[t.location.hostname]) {
var o = r.hostSetting[t.location.hostname];
o.prefix && (r.prefix = o.prefix),
o.subDomain && (r.subDomain = o.subDomain),
o.mainDomain && (r.mainDomain = o.mainDomain)
}
if (!0 === r.H5Request) {
var i = "//" + (r.prefix ? r.prefix + ".": "") + (r.subDomain ? r.subDomain + ".": "") + r.mainDomain + "/h5/" + e.api.toLowerCase() + "/" + e.v.toLowerCase() + "/",
a = e.appKey || ("waptest" === r.subDomain ? "4272": "12574478"),
c = (new Date).getTime(),
u = function(t) {
function n(t, n) {
return t << n | t >>> 32 - n
}
function e(t, n) {
var e, r, o, i, a;
return o = 2147483648 & t,
i = 2147483648 & n,
a = (1073741823 & t) + (1073741823 & n),
(e = 1073741824 & t) & (r = 1073741824 & n) ? 2147483648 ^ a ^ o ^ i: e | r ? 1073741824 & a ? 3221225472 ^ a ^ o ^ i: 1073741824 ^ a ^ o ^ i: a ^ o ^ i
}
function r(t, r, o, i, a, c, u) {
return e(n(t = e(t, e(e(function(t, n, e) {
return t & n | ~t & e
} (r, o, i), a), u)), c), r)
}
function o(t, r, o, i, a, c, u) {
return e(n(t = e(t, e(e(function(t, n, e) {
return t & e | n & ~e
} (r, o, i), a), u)), c), r)
}
function i(t, r, o, i, a, c, u) {
return e(n(t = e(t, e(e(function(t, n, e) {
return t ^ n ^ e
} (r, o, i), a), u)), c), r)
}
function a(t, r, o, i, a, c, u) {
return e(n(t = e(t, e(e(function(t, n, e) {
return n ^ (t | ~e)
} (r, o, i), a), u)), c), r)
}
function c(t) {
var n, e = "",
r = "";
for (n = 0; 3 >= n; n++) e += (r = "0" + (t >>> 8 * n & 255).toString(16)).substr(r.length - 2, 2);
return e
}
var u, s, f, l, p, d, h, v, m, g;
for (g = function(t) {
for (var n, e = t.length,
r = e + 8,
o = 16 * ((r - r % 64) / 64 + 1), i = new Array(o - 1), a = 0, c = 0; e > c;) a = c % 4 * 8,
i[n = (c - c % 4) / 4] = i[n] | t.charCodeAt(c) << a,
c++;
return a = c % 4 * 8,
i[n = (c - c % 4) / 4] = i[n] | 128 << a,
i[o - 2] = e << 3,
i[o - 1] = e >>> 29,
i
} (t = function(t) {
t = t.replace(/\r\n/g, "\n");
for (var n = "",
e = 0; e < t.length; e++) {
var r = t.charCodeAt(e);
128 > r ? n += String.fromCharCode(r) : r > 127 && 2048 > r ? (n += String.fromCharCode(r >> 6 | 192), n += String.fromCharCode(63 & r | 128)) : (n += String.fromCharCode(r >> 12 | 224), n += String.fromCharCode(r >> 6 & 63 | 128), n += String.fromCharCode(63 & r | 128))
}
return n
} (t)), d = 1732584193, h = 4023233417, v = 2562383102, m = 271733878, u = 0; u < g.length; u += 16) s = d,
f = h,
l = v,
p = m,
h = a(h = a(h = a(h = a(h = i(h = i(h = i(h = i(h = o(h = o(h = o(h = o(h = r(h = r(h = r(h = r(h, v = r(v, m = r(m, d = r(d, h, v, m, g[u + 0], 7, 3614090360), h, v, g[u + 1], 12, 3905402710), d, h, g[u + 2], 17, 606105819), m, d, g[u + 3], 22, 3250441966), v = r(v, m = r(m, d = r(d, h, v, m, g[u + 4], 7, 4118548399), h, v, g[u + 5], 12, 1200080426), d, h, g[u + 6], 17, 2821735955), m, d, g[u + 7], 22, 4249261313), v = r(v, m = r(m, d = r(d, h, v, m, g[u + 8], 7, 1770035416), h, v, g[u + 9], 12, 2336552879), d, h, g[u + 10], 17, 4294925233), m, d, g[u + 11], 22, 2304563134), v = r(v, m = r(m, d = r(d, h, v, m, g[u + 12], 7, 1804603682), h, v, g[u + 13], 12, 4254626195), d, h, g[u + 14], 17, 2792965006), m, d, g[u + 15], 22, 1236535329), v = o(v, m = o(m, d = o(d, h, v, m, g[u + 1], 5, 4129170786), h, v, g[u + 6], 9, 3225465664), d, h, g[u + 11], 14, 643717713), m, d, g[u + 0], 20, 3921069994), v = o(v, m = o(m, d = o(d, h, v, m, g[u + 5], 5, 3593408605), h, v, g[u + 10], 9, 38016083), d, h, g[u + 15], 14, 3634488961), m, d, g[u + 4], 20, 3889429448), v = o(v, m = o(m, d = o(d, h, v, m, g[u + 9], 5, 568446438), h, v, g[u + 14], 9, 3275163606), d, h, g[u + 3], 14, 4107603335), m, d, g[u + 8], 20, 1163531501), v = o(v, m = o(m, d = o(d, h, v, m, g[u + 13], 5, 2850285829), h, v, g[u + 2], 9, 4243563512), d, h, g[u + 7], 14, 1735328473), m, d, g[u + 12], 20, 2368359562), v = i(v, m = i(m, d = i(d, h, v, m, g[u + 5], 4, 4294588738), h, v, g[u + 8], 11, 2272392833), d, h, g[u + 11], 16, 1839030562), m, d, g[u + 14], 23, 4259657740), v = i(v, m = i(m, d = i(d, h, v, m, g[u + 1], 4, 2763975236), h, v, g[u + 4], 11, 1272893353), d, h, g[u + 7], 16, 4139469664), m, d, g[u + 10], 23, 3200236656), v = i(v, m = i(m, d = i(d, h, v, m, g[u + 13], 4, 681279174), h, v, g[u + 0], 11, 3936430074), d, h, g[u + 3], 16, 3572445317), m, d, g[u + 6], 23, 76029189), v = i(v, m = i(m, d = i(d, h, v, m, g[u + 9], 4, 3654602809), h, v, g[u + 12], 11, 3873151461), d, h, g[u + 15], 16, 530742520), m, d, g[u + 2], 23, 3299628645), v = a(v, m = a(m, d = a(d, h, v, m, g[u + 0], 6, 4096336452), h, v, g[u + 7], 10, 1126891415), d, h, g[u + 14], 15, 2878612391), m, d, g[u + 5], 21, 4237533241), v = a(v, m = a(m, d = a(d, h, v, m, g[u + 12], 6, 1700485571), h, v, g[u + 3], 10, 2399980690), d, h, g[u + 10], 15, 4293915773), m, d, g[u + 1], 21, 2240044497), v = a(v, m = a(m, d = a(d, h, v, m, g[u + 8], 6, 1873313359), h, v, g[u + 15], 10, 4264355552), d, h, g[u + 6], 15, 2734768916), m, d, g[u + 13], 21, 1309151649), v = a(v, m = a(m, d = a(d, h, v, m, g[u + 4], 6, 4149444226), h, v, g[u + 11], 10, 3174756917), d, h, g[u + 2], 15, 718787259), m, d, g[u + 9], 21, 3951481745),
d = e(d, s),
h = e(h, f),
v = e(v, l),
m = e(m, p);
return (c(d) + c(h) + c(v) + c(m)).toLowerCase()
} (r.token + "&" + c + "&" + a + "&" + e.data),
s = {
jsv: "2.4.11",
appKey: a,
t: c,
sign: u
},
f = {
data: e.data,
ua: e.ua
};
Object.keys(e).forEach(function(t) {
"undefined" == typeof s[t] && "undefined" == typeof f[t] && "headers" !== t && "ext_headers" !== t && "ext_querys" !== t && (s[t] = e[t])
}),
e.ext_querys && Object.keys(e.ext_querys).forEach(function(t) {
s[t] = e.ext_querys[t]
}),
r.getJSONP ? s.type = "jsonp": r.getOriginalJSONP ? s.type = "originaljsonp": (r.getJSON || r.postJSON) && (s.type = "originaljson"),
"undefined" != typeof e.valueType && ("original" === e.valueType ? r.getJSONP || r.getOriginalJSONP ? s.type = "originaljsonp": (r.getJSON || r.postJSON) && (s.type = "originaljson") : "string" === e.valueType && (r.getJSONP || r.getOriginalJSONP ? s.type = "jsonp": (r.getJSON || r.postJSON) && (s.type = "json"))),
!0 === r.useJsonpResultType && "originaljson" === s.type && delete s.type,
r.dangerouslySetProtocol && (i = r.dangerouslySetProtocol + ":" + i),
r.querystring = s,
r.postdata = f,
r.path = i
}
n()
},}

看到这里应该是一个加密后的js,后来找到一篇文章。淘宝sign加密算法。根据抓包的参数,按照公式来计算sign,验证算法是否正确。可能由于时间戳没有取整,或者一些其他的原因如编码问题等,导致sign的值没有和正确值对上,又看到文中贴的代码比我找到的js更加简单,以为淘宝更新了算法。再后来一位知友提示说算法没有问题,(在此特别感谢知友Jaho及文章作者小歪)才又抓包试了一次,这次将毫秒时间戳转为了int,并将字符串进行utf8编码,算出来的sign值和抓包中的匹配。后来证实通过js断点调试也可以验证公式的正确性。sign生成公式为:md5Hex(token&t&appKey&data)t表示毫秒时间戳,appKey一般为固定值。data为参数。本文中的例子为6b2310e47a950f1c8c3fe6ec792f356a&1544165283634&12574478&{"item_num_id":"554401452734"}再进行md5转换。

知道了sign算法,接下来就是token的获取,根据知乎文中作者所说,第一次请求是会设置cookie,于是使用Postman清理cookie后请求,此时sign的值传任何值均无影响,接口会返回

1
2
3
4
5
6
mtopjsonp2({
"api": "mtop.wdetail.getItemDescx",
"v": "4.9",
"ret": ["FAIL_SYS_TOKEN_EXOIRED::令牌过期"],
"data": {}
})

并设置两个cookie,一个是_m_h5_tk值为6b2310e47a950f1c8c3fe6ec792f356a_1544173923544,另一个是_m_h5_tk_enc,这个目前用不到,我们要的token就是前者_分割的前部分。这样一来,我们请求两次接口,第一次拿到token,第二次请求拿到数据。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
{
"api": "mtop.wdetail.getItemDescx",
"data": {
"pages": [
"<txt>特别提醒:此款收纳箱尺寸测量方法:(盖上盖子测量长宽高)长度=盖子+把手。手工量的,稍许误差,谢谢谅解! 注意:不要把本店的L数跟超市的比较,本店的L数是网上笼统的叫法,以箱子实际尺寸为主,介意的亲请勿拍!建议:店长推荐拍套装几个套在一起 可以抗摔 。。10L不建议购买只能装一些小东西人工测量会有2cm左右误差属于正常现象温馨提醒:塑料属于易碎品,为了免去亲的麻烦,请亲在收货时先拆开检查后在签收,如有破损请直接拒收</txt><img>https://img.alicdn.com/imgextra/i3/2651141793/TB2kQKqbgsSMeJjSspcXXXjFXXa_!!2651141793.jpg</img><img>https://img.alicdn.com/imgextra/i1/2651141793/TB2UkPszC8mpuFjSZFMXXaxpVXa_!!2651141793.jpg</img><img>https://img.alicdn.com/imgextra/i3/2651141793/TB2dJSBzstnpuFjSZFKXXalFFXa_!!2651141793.jpg</img><img>https://img.alicdn.com/imgextra/i4/2651141793/TB2U3i5zxxmpuFjSZFNXXXrRXXa_!!2651141793.jpg</img><img>https://img.alicdn.com/imgextra/i1/2651141793/TB22qfEzr4npuFjSZFmXXXl4FXa_!!2651141793.jpg</img><img>https://img.alicdn.com/imgextra/i2/2651141793/TB2_8HcvbJkpuFjy1zcXXa5FFXa_!!2651141793.jpg</img>",
"<img>https://img.alicdn.com/imgextra/i1/2651141793/TB2f7YszC8mpuFjSZFMXXaxpVXa_!!2651141793.jpg</img><img>https://img.alicdn.com/imgextra/i3/2651141793/TB2B600zrBnpuFjSZFGXXX51pXa_!!2651141793.jpg</img><img>https://img.alicdn.com/imgextra/i4/2651141793/TB2C.HmvgFkpuFjSspnXXb4qFXa_!!2651141793.jpg</img><img>https://img.alicdn.com/imgextra/i3/2651141793/TB25AYszC8mpuFjSZFMXXaxpVXa_!!2651141793.jpg</img><img>https://img.alicdn.com/imgextra/i3/2651141793/TB2Tt1BzstnpuFjSZFKXXalFFXa_!!2651141793.jpg</img><img>https://img.alicdn.com/imgextra/i3/2651141793/TB2FNPgvgxlpuFjSszbXXcSVpXa_!!2651141793.jpg</img><img>https://img.alicdn.com/imgextra/i2/2651141793/TB26j40zrBnpuFjSZFGXXX51pXa_!!2651141793.jpg</img><img>https://img.alicdn.com/imgextra/i4/2651141793/TB2uDGozCVmpuFjSZFFXXcZApXa_!!2651141793.jpg</img>",
"<img>https://img.alicdn.com/imgextra/i1/2651141793/TB2Bx9ntxRDOuFjSZFzXXcIipXa_!!2651141793.jpg</img><img>https://img.alicdn.com/imgextra/i2/2651141793/TB2nSblvm0jpuFjy0FlXXc0bpXa_!!2651141793.jpg</img><img>https://img.alicdn.com/imgextra/i2/2651141793/TB2wtaQvbXlpuFjSszfXXcSGXXa_!!2651141793.jpg</img><img>https://img.alicdn.com/imgextra/i4/2651141793/TB2k1vfvl4lpuFjy1zjXXcAKpXa_!!2651141793.jpg</img><img>https://img.alicdn.com/imgextra/i1/2651141793/TB26HbQvbFkpuFjy1XcXXclapXa_!!2651141793.jpg</img><img>https://img.alicdn.com/imgextra/i3/2651141793/TB24miYzypnpuFjSZFIXXXh2VXa_!!2651141793.jpg</img><txt>.................welcome20160618152824</txt>"
],
"images": [
"https://img.alicdn.com/imgextra/i3/2651141793/TB2kQKqbgsSMeJjSspcXXXjFXXa_!!2651141793.jpg",
"https://img.alicdn.com/imgextra/i1/2651141793/TB2UkPszC8mpuFjSZFMXXaxpVXa_!!2651141793.jpg",
"https://img.alicdn.com/imgextra/i3/2651141793/TB2dJSBzstnpuFjSZFKXXalFFXa_!!2651141793.jpg",
"https://img.alicdn.com/imgextra/i4/2651141793/TB2U3i5zxxmpuFjSZFNXXXrRXXa_!!2651141793.jpg",
"https://img.alicdn.com/imgextra/i1/2651141793/TB22qfEzr4npuFjSZFmXXXl4FXa_!!2651141793.jpg",
"https://img.alicdn.com/imgextra/i2/2651141793/TB2_8HcvbJkpuFjy1zcXXa5FFXa_!!2651141793.jpg",
"https://img.alicdn.com/imgextra/i1/2651141793/TB2f7YszC8mpuFjSZFMXXaxpVXa_!!2651141793.jpg",
"https://img.alicdn.com/imgextra/i3/2651141793/TB2B600zrBnpuFjSZFGXXX51pXa_!!2651141793.jpg",
"https://img.alicdn.com/imgextra/i4/2651141793/TB2C.HmvgFkpuFjSspnXXb4qFXa_!!2651141793.jpg",
"https://img.alicdn.com/imgextra/i3/2651141793/TB25AYszC8mpuFjSZFMXXaxpVXa_!!2651141793.jpg",
"https://img.alicdn.com/imgextra/i3/2651141793/TB2Tt1BzstnpuFjSZFKXXalFFXa_!!2651141793.jpg",
"https://img.alicdn.com/imgextra/i3/2651141793/TB2FNPgvgxlpuFjSszbXXcSVpXa_!!2651141793.jpg",
"https://img.alicdn.com/imgextra/i2/2651141793/TB26j40zrBnpuFjSZFGXXX51pXa_!!2651141793.jpg",
"https://img.alicdn.com/imgextra/i4/2651141793/TB2uDGozCVmpuFjSZFFXXcZApXa_!!2651141793.jpg",
"https://img.alicdn.com/imgextra/i1/2651141793/TB2Bx9ntxRDOuFjSZFzXXcIipXa_!!2651141793.jpg",
"https://img.alicdn.com/imgextra/i2/2651141793/TB2nSblvm0jpuFjy0FlXXc0bpXa_!!2651141793.jpg",
"https://img.alicdn.com/imgextra/i2/2651141793/TB2wtaQvbXlpuFjSszfXXcSGXXa_!!2651141793.jpg",
"https://img.alicdn.com/imgextra/i4/2651141793/TB2k1vfvl4lpuFjy1zjXXcAKpXa_!!2651141793.jpg",
"https://img.alicdn.com/imgextra/i1/2651141793/TB26HbQvbFkpuFjy1XcXXclapXa_!!2651141793.jpg",
"https://img.alicdn.com/imgextra/i3/2651141793/TB24miYzypnpuFjSZFIXXXh2VXa_!!2651141793.jpg"
]
},
"ret": [
"SUCCESS::接口调用成功"
],
"v": "4.9"
}

其中的images数组就是我想要的数据,这里返回值不是标准的json,套了一层mtopjsonp2({JSON}),于是获取结果后还要根据正则截取一下json的文本。

附上部分相关代码,python版本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
def get_images_from_mtop(num_iid):

APPKEY = '12574478'
DATA = '{"item_num_id":"%s"}' % num_iid
URL = 'https://h5api.m.taobao.com/h5/mtop.wdetail.getitemdescx/4.9/'
params = {'jsv': '2.4.11', 'appKey': APPKEY, 't': int(time.time()*1000),
'sign': 'FAKE_SIGN_WITH_ANYTHING', 'api': 'mtop.wdetail.getItemDescx', 'v': '4.9',
'type': 'jsonp', 'dataType': 'jsonp', 'callback': 'mtopjsonp2',
'data': DATA}
headers = {
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 9_3_4 like Mac OS X) AppleWebKit/601.1.46 ' + \
'(KHTML, like Gecko) Version/9.0 Mobile/13G35 Safari/601.1',
}
images = []
try:
# get token in first request
r1 = requests.get(URL, params=params, headers=headers)
token_with_time = r1.cookies.get('_m_h5_tk')
token = token_with_time.split('_')[0]
enc_token = r1.cookies.get('_m_h5_tk_enc')
logger.debug(r1.cookies)
# get results in second request
t2 = str(int(time.time() * 1000))
c = '&'.join([token, t2, APPKEY, DATA])
m = hashlib.md5()
m.update(c.encode('utf-8'))
params.update({'t': t2, 'sign': m.hexdigest()})
cookies = {'_m_h5_tk': token_with_time, '_m_h5_tk_enc': enc_token}
r2 = requests.get(URL, params=params, headers=headers, cookies=cookies)
logger.debug(r2.text)
json_text = re.match(r'(.*\()(.*)(\))', r2.text).group(2)
images = dict(json.loads(json_text))['data']['images']
except Exception as e:
logger.warning(e)
finally:
return images

可以看到,接口响应时间有了大幅度提升。