node 爬虫

服务端

const https = require("https"),

http= require("http"),

fs = require('fs');

const url = require('url'); //导入url

let cheerio = require('cheerio');


 

//开启服务,监听8888端口

//端口号最好为6000以上

var server = http.createServer(function(req,res){

res.setHeader("Access-Control-Allow-Origin", "*");

/*

req用来接受客户端数据

res用来向客户端发送服务器数据

*/

let str = (url.parse(req.url,true).query).url;

//一参是http请求状态,200连接成功

//连接成功后向客户端写入头信息

res.writeHeader(200,{

'content-type' : 'text/html;charset="utf-8"'

});

if(str){

str = url.parse(str);

}else{

return;

}

options = {

hostname:str.host,//主域名

path:str.path,//网址

port:'443',//端口

method:'GET',

headers:{

'Accept-Encoding':'utf-8'

}

}

//向外发起http的get请求

https.get(options,function(res1){

let html = '';

//监听data 当d有数据的时候触发

res1.on('data',function(data){

html+=data;

});

//当请求完成的时候

res1.on('end',function(r){

let d = '111';

let data = JSON.stringify(parseHtml(html))+",";

fs.appendFile("./111.txt",data , (error) => {

if (error) return console.log("追加文件失败" + error.message);

console.log("追加成功");

fs.readFile('./111.txt','utf8',function(err,data){

if(err){

console.log(33)

}else{

console.log(22);

d = data;

res.write(d);

res.end();

}

})

});

});

});

}).listen(8888);

//解析dome结构

function parseHtml(result) {

var $ = cheerio.load(result);

var captionList = $('#contact p');

var itemList = [];

captionList.each(function(item) {

var cap = $(this);

//console.log(cap.find('h3').text());

itemList.push(cap.text());

});

return itemList;

}

前端

<!DOCTYPE html>

<html lang="en">

<head>

<meta charset="UTF-8">

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<meta http-equiv="X-UA-Compatible" content="ie=edge">

<title>Document</title>

</head>

<body>

<div id="app">

<div class="input-group">

<span class="input-group-addon" id="basic-addon1">网址</span>

<input type="text" class="form-control" placeholder="在这里输入网址" aria-describedby="basic-addon1">

</div>

<button type="button" class="btn btn-success" @click="get_data()">抓取</button>

<div class="bs-example" data-example-id="striped-table">

<table class="table table-striped">

<thead>

<tr>

<th>#</th>

<th>邮 编</th>

<th>联系电话</th>

<th>传 真</th>

<th>地 址</th>

</tr>

</thead>

<tbody>

<tr v-for="(item,index) in list">

<th >{{index}}</th>

<td>{{item.yb}}</td>

<td>{{item.tel}}</td>

<td>{{item.cz}}</td>

<td>{{item.dz}}</td>

</tr>

</tbody>

</table>

</div>

</div>

</body>

<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.3/css/bootstrap.min.css" integrity="sha384-MCw98/SFnGE8fJT3GXwEOngsV7Zt27NXFoaoApmYm81iuXoPkFOJwJ8ERdknLPMO" crossorigin="anonymous">

<script src="https://stackpath.bootstrapcdn.com/bootstrap/4.1.3/js/bootstrap.min.js" integrity="sha384-ChfqqxuZUCnJSK3+MXmPNIyE6ZbWh2IMqE241rYiqJxyMiZ6OW/JmZQ5stwEULTy" crossorigin="anonymous"></script>

<script src="http://libs.baidu.com/jquery/2.0.0/jquery.min.js"></script>

<script src="https://cdn.jsdelivr.net/npm/vue/dist/vue.js"></script>

<script>

window.onload = function(){

var app = new Vue({

el: '#app',

data:{

list:[{"tel":'86-21-5386-6500',"cz":'86-21-5386-6668',"dz":"上海市太仓路233号新茂大厦21层2105室","yx":200020}]

},

methods:{

get_data:function(){

let that = this;

let v = $('.form-control').val();

$.ajax({

type:"GET",

url:"http://localhost:8888",

data:{

url:v

},

success:function(res){

res ='['+res+']';

res = res.substring(0,res.length-2);

res =res+']';

console.log(res)

let data = JSON.parse(res);

for(let i =0;i<data.length;i++){

data[i] = {

"tel":data[i][0].substring(5),

"cz":data[i][1].substring(4),

"dz":data[i][2].substring(4),

"yb":data[i][3].substring(4)

}

}

that.list = data;

}

});

}

}

})

}

</script>

</html>

猜你喜欢

转载自blog.csdn.net/qq_38389828/article/details/86060328