Try ordering Ali
Sorry, before inexplicably to ignore the configuration file, and has been repaired, sorry
Abstract prospects
To say simply disgrace steel straight man's face, yes, last night I was happy doing the outsourcing of living (China Mobile applet, freelance, feed), more than 11 points, his girlfriend suddenly my mind a draw: "can you help me sort this stuff about me with ah, I pull out a good point wool, technology enables well?"
I looked more helpless, Ali trial baa? What the hell, oh, just this thing, ah, what it wants to climb reptiles. I am a front-end ......
replied: "No problem ah, reptiles chant."
She: "wow, how long do come out ah?"
Me:. "I'm busy eh, 1-2 hours of it."
She: "Well you take a break, and quickly and help me out! "
I looked at her face, shame minimize" micro-channel developer tools. " . .
Page display
If you think this is advertising, it was so flattering.
Reptiles engage them
NodeJS reptiles, Baidu, are readily available around the code, I have not one analysis, come up with simple piece of code books, from Emily Emily:
const express = require('express');
// 调用 express 实例,它是一个函数,不带参数调用时,会返回一个 express 实例,将这个变量赋予 app 变量。
const superagent = require('superagent');
const cheerio = require('cheerio');
const app = express();
app.get('/', (req, res, next) => {
console.log(req)
superagent.get('https://www.v2ex.com/')
.end((err, sres) => {
// 常规的错误处理
if (err) {
return next(err);
}
// sres.text 里面存储着网页的 html 内容,将它传给 cheerio.load 之后
// 就可以得到一个实现了 jquery 接口的变量,我们习惯性地将它命名为 `$`
// 剩下就都是 jquery 的内容了
let $ = cheerio.load(sres.text);
let items = [];
$('.item_title a').each((idx, element) => {
let $element = $(element);
items.push({
title: $element.text(),
href: $element.attr('href')
});
});
res.send(items);
});
});
app.listen(3000, function () {
console.log('app is listening at port 3000');
});
Well, express with impossible NodeJS do not know, superagent can be understood as a request to do outside in the Node inside, cheerio ah, Node-specific JQ.
The first climb
The above address into the request: https://try.taobao.com/
see page label structure, find the desired selector structure:
.tb-try-wd-item-info > .detail
, To replace the above selector .item_title a
, go from:
...... I do not want to show results, because only six, the actual display is 10 pages, looking for a long time and found two problems:
As above, the first six are recommended climb, meow, the following list is not;
the second, the following list is by POST request back to individual data, to see how a dry SSR is a good frame.
So not a reptile, have to change strategy.
Analog POST
OK, since it is POST, like get a direct connection with the plane out parameters, and then simulate superagent:
superagent
.post(
`https://try.taobao.com/api3/call?what=show&page=${paylaod.page}&pageSize&api=x%2Fsearch`
)
.set('content-type', 'application/x-www-form-urlencoded; charset=UTF-8')
.end((err, sres) => {
// 常规的错误处理
if (err) {
return next(err)
}
const result = JSON.parse(sres.text).result // 返回结构树
resolve(result)
})
content-type from:
Hum hum, you guessed it, failed, as follows:
Think is inevitable, how could you just request it, and then how to do? the study? nonono, the old lady up is a shuttle, not that Content-Type it!
superagent
.post(
`https://try.taobao.com/api3/call?what=show&page=${paylaod.page}&pageSize&api=x%2Fsearch`
)
.set(
'user-agent',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
)
.set('accept', 'pplication/json, text/javascript, */*; q=0.01')
.set('accept-encoding', 'gzip, deflate, br')
.set(
'accept-language',
'zh-CN,zh;q=0.9,en;q=0.8,la;q=0.7,zh-TW;q=0.6,da;q=0.5'
)
// .set('content-length', '8')
.set('content-type', 'application/x-www-form-urlencoded; charset=UTF-8')
.set(
'cookie',
'your cookie'
)
.set('origin', 'https://try.taobao.com')
.set('referer', 'https://try.taobao.com')
.set('x-csrf-token', 'f0b8e7443eb7e')
.set('x-requested-with', 'XMLHttpRequest')
.end((err, sres) => {
// 常规的错误处理
if (err) {
return next(err)
}
const result = JSON.parse(sres.text).result
resolve(result)
})
This is based on the following:
Why is not the head, not what is the source, is not that what the user agent, you are not using a HTTPS way?
Note that the above .set('content-length', '8')
, do not know how to play over there, plus the overtime ......
Thus, the account of it:
{
"pages": {
"paging": {
"n": 2182,
"page": 1,
"pages": 219
},
"items": [
{
"shopUserId": "2450112357",
"title": "凯度高端款嵌入式蒸烤箱",
"status": 1,
"totalNum": 1,
"requestNum": 15530,
"acceptNum": 0,
"reportNum": 0,
"isApplied": false,
"shopName": "casdon凯度旗舰店",
"showId": "2561626",
"startTime": 1539619200000,
"endTime": 1540220400000,
"id": "34530215",
"type": 1,
"pic": "//img.alicdn.com/bao/uploaded/TB1ycS2eMDqK1RjSZSyXXaxEVXa.jpg",
"shopItemId": "559771706359",
"price": 13850
},
{
"shopUserId": "3189770892",
"title": "皇家美素佳儿老包装2段400g",
"status": 1,
"totalNum": 50,
"requestNum": 2079,
"acceptNum": 0,
"reportNum": 0,
"isApplied": false,
"shopName": "皇家美素佳儿旗舰店",
"showId": "2551240",
"startTime": 1539619200000,
"endTime": 1540220400000,
"id": "34396042",
"type": 1,
"pic": "//img.alicdn.com/bao/uploaded/TB1YrSZaVYqK1RjSZLeXXbXppXa.jpg",
"shopItemId": "547114874458",
"price": 189
},
{
"shopUserId": "1077716829",
"title": "关注店铺优先审水密码幻彩隔离",
"status": 1,
"totalNum": 10,
"requestNum": 6907,
"acceptNum": 0,
"reportNum": 0,
"isApplied": false,
"shopName": "水密码旗舰店",
"showId": "2568391",
"startTime": 1539619200000,
"endTime": 1540220400000,
"id": "34784086",
"type": 1,
"pic": "//img.alicdn.com/bao/uploaded/TB16_4ChmzqK1RjSZPxXXc4tVXa.jpg",
"shopItemId": "559005882880",
"price": 599
},
{
"shopUserId": "725786863",
"title": "精品皮草派克大衣",
"status": 1,
"totalNum": 1,
"requestNum": 11793,
"acceptNum": 0,
"reportNum": 0,
"isApplied": false,
"shopName": "美瑞蓓特",
"showId": "2557886",
"startTime": 1539619200000,
"endTime": 1540220400000,
"id": "34574078",
"type": 1,
"pic": "//img.alicdn.com/bao/uploaded/TB1zVLMdCrqK1RjSZK9XXXyypXa.jpg",
"shopItemId": "577418950477",
"price": 5980
},
{
"shopUserId": "3000840351",
"title": "保友智能新品Pofit电脑椅",
"status": 1,
"totalNum": 1,
"requestNum": 12895,
"acceptNum": 0,
"reportNum": 0,
"isApplied": false,
"shopName": "保友办公家具旗舰店",
"showId": "2557100",
"startTime": 1539619200000,
"endTime": 1540220400000,
"id": "34528042",
"type": 1,
"pic": "//img.alicdn.com/bao/uploaded/TB1bYZEg6TpK1RjSZKPXXa3UpXa.png",
"shopItemId": "577598687971",
"price": 5408
},
{
"shopUserId": "791732485",
"title": "TEK手持吸尘器A8",
"status": 1,
"totalNum": 1,
"requestNum": 17195,
"acceptNum": 0,
"reportNum": 0,
"isApplied": false,
"shopName": "泰怡凯旗舰店",
"showId": "2552265",
"startTime": 1539619200000,
"endTime": 1540220400000,
"id": "34444014",
"type": 1,
"pic": "//img.alicdn.com/bao/uploaded/TB1D6bWbhTpK1RjSZFGXXcHqFXa.jpg",
"shopItemId": "547653053965",
"price": 5199
},
{
"shopUserId": "3229583972",
"title": "椰富海南冷炸椰子油食用油1L",
"status": 1,
"totalNum": 20,
"requestNum": 4451,
"acceptNum": 0,
"reportNum": 0,
"isApplied": false,
"shopName": "椰富食品专营店",
"showId": "2561698",
"startTime": 1539619200000,
"endTime": 1540220400000,
"id": "34532250",
"type": 1,
"pic": "//img.alicdn.com/bao/uploaded/TB1VjLSePDpK1RjSZFrXXa78VXa.jpg",
"shopItemId": "578653506446",
"price": 256
},
{
"shopUserId": "855223948",
"title": "卡西欧立式家用电钢琴PX770",
"status": 1,
"totalNum": 1,
"requestNum": 16762,
"acceptNum": 0,
"reportNum": 0,
"isApplied": false,
"shopName": "世纪音缘乐器专营店",
"showId": "2551326",
"startTime": 1539619200000,
"endTime": 1540220400000,
"id": "34420041",
"type": 1,
"pic": "//img.alicdn.com/bao/uploaded/TB1CC6aa9zqK1RjSZFpXXakSXXa.jpg",
"shopItemId": "562405126383",
"price": 4838
},
{
"shopUserId": "4065939832",
"title": "关注宝贝送轻奢沙发床",
"status": 1,
"totalNum": 1,
"requestNum": 17436,
"acceptNum": 0,
"reportNum": 0,
"isApplied": false,
"shopName": "贝兮旗舰店",
"showId": "2559904",
"startTime": 1539619200000,
"endTime": 1540220400000,
"id": "34532170",
"type": 1,
"pic": "//img.alicdn.com/bao/uploaded/TB1AzxYegHqK1RjSZFPXXcwapXa.jpg",
"shopItemId": "577798067313",
"price": 4399
},
{
"shopUserId": "807974445",
"title": "森海塞尔CX6蓝牙耳机",
"status": 1,
"totalNum": 4,
"requestNum": 22557,
"acceptNum": 0,
"reportNum": 0,
"isApplied": false,
"shopName": "sennheiser旗舰店",
"showId": "2559701",
"startTime": 1539619200000,
"endTime": 1540220400000,
"id": "34532161",
"type": 1,
"pic": "//img.alicdn.com/bao/uploaded/TB1HET6d7voK1RjSZFwXXciCFXa.jpg",
"shopItemId": "564408956766",
"price": 999
}
]
}
}
Careful junior partner should see, I did not send him to the form, and they can request data to the desired, page hanging on the query ......
Display section
Get the data, it is simple, it is actually a function of the rest of the interface, and yes, I remember the front.
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>tb try</title>
<style>
.warning {
color: red;
}
button {
width: 100px;
height: 44px;
margin-right: 44px;
}
table {
border: 1px solid #d8d8d8;
border-collapse: collapse;
}
tr {
border-bottom: 1px solid #d8d8d8;
cursor: pointer;
}
tr:last-child {
border: 0;
}
</style>
</head>
<body>
<button onclick="postPage()">下一页</button>
<span id="currentPage"></span>
<table>
<tbody>
<tr>
<th>序号(倒序)</th>
<th>概率</th>
<th>名字</th>
</tr>
</tbody>
<tbody id="results"></tbody>
</table>
<script>
let currentPage = 0 // 当前页面
let allItems = [] // 全部数据
let currentTime = 0 // 锁频率使用,标记上次时间
const xhr = new XMLHttpRequest()
const loopInterval = 2 // 锁频率步长,单位秒
const results = document.querySelector('#results')
const currentPageText = document.querySelector('#currentPage')
const reFullTBody = arr => {
let innerHtml = ''
arr.forEach((item, i) => {
item.rate = item.totalNum / item.requestNum * 100
let tr = `
<tr onclick="window.open('https://try.taobao.com/item.htm?id=${item.id}')">
<td>${i + 1}</td>
<td>${item.rate.toFixed(3) + '%'}</td>
<td>${item.title}</td>
</tr>
`
if (item.rate > 5) tr = tr.replace('<tr', '<tr class="warning"')
innerHtml += tr
})
currentPageText.innerText = `当前页:${currentPage}`
results.innerHTML = innerHtml
}
const postPage = () => {
// 锁频率步长内取消请求
const newTime = new Date().getTime()
const shoudBack = newTime - currentTime < loopInterval * 1000
if(shoudBack) {
alert(loopInterval + '秒内不要多次点击哦。')
return
}
currentTime = newTime
xhr.onreadystatechange = function() {
if(this.readyState === 4 && this.status === 200) {
const res = JSON.parse(this.response)
if(res.length < 1) {
alert('今天结束的已经筛选完了')
return
}
allItems = [...allItems, ...res]
allItems.sort((a, b) => b.rate - a.rate)
reFullTBody(allItems)
currentPage--
}
}
xhr.open('post', '/table')
xhr.setRequestHeader("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8");
//发送请求
xhr.send("page=" + currentPage)
}
xhr.onreadystatechange = function() {
if(this.readyState === 4 && this.status === 200) {
currentPage = JSON.parse(this.response).pages
postPage()
}
}
xhr.open('get', '/total')
xhr.send()
</script>
</body>
</html>
Long like this:
I am more than personalized, you can click to jump, the probability of more than 5% red display, also tells you the current page number is located, point back to you quickly Tips ....................................
It is so easy to use, like quickly to experience it!
Online: point my experience
Github: Spider
Find it useful, do not begrudge star Oh.