node.js study notes -- Promise rewriting HTTP crawler

Note: This blog is a study note when learning the basics of attacking Node.js (1) , thanks to Mr. Scott for his course.

1. Use Promise to handle asynchrony and nesting

1. Use traditional callbacks to perform ball animations sequentially

<!doctype>
<html>
<head>
    <title>Promise animation</title>
    <style>
        .ball{
            width: 40px;
            height:40px;
            border-radius: 20px;
        }
        .ball1{
            background: red;
        }
        .ball2{
            background: yellow;
        }
        .ball3{
            background: green;
        }
    </style>
    <!-- 引用bluebird这个库来使用promise函数,bluebird被我下在了Desktop的上一级,administrator文件夹里 -->
    <script scr="C:\Users\Administrator\node_modules\bluebird\js\browser\bluebird.js"></script>
</head>
<body>
    <div class='ball ball1' style="margin-left: 0;"></div>
    <div class='ball ball2' style="margin-left: 0;"></div>
    <div class='ball ball3' style="margin-left: 0;"></div>
    <script>
        var ball1 = document.querySelector('.ball1')
        var ball2 = document.querySelector('.ball2')
        var ball3 = document.querySelector('.ball3')
        console.log(ball1)

        function animate(ball, distance, callback){
     
     
            setTimeout(function(){
     
     
                var marginLeft = parseInt(ball.style.marginLeft, 10)

                //如果小球到达目标点,即动画已经执行完毕,就执行回调函数
                if(marginLeft === distance){
                    callback && callback()
                }else{
                    if(marginLeft < distance){  //球在左侧,就往右移
                        marginLeft++
                    }else{
                        marginLeft--
                    }
                    ball.style.marginLeft = marginLeft
                    animate(ball, distance, callback)  //继续调用自身animate,不断重复调整小球位置,直到移到目标位置
                }
            }, 13)   //设定间隔多少时间执行函数,13毫秒一次
        }       

        //传统的按顺序执行调用函数
        //第一个球向右移动100像素,然后执行移动第二个球,第二个球向右移动200像素,然后执行移动第三个球...
        animate(ball1, 100, function(){
     
     
            animate(ball2, 200, function(){
     
     
                animate(ball3, 300, function(){
     
     
                    animate(ball3, 150, function(){
     
     
                        animate(ball2, 150, function(){
     
     
                            animate(ball1, 150, function(){
     
     
                                //
                            })
                        })
                    })
                })
            })
        })
    </script>
</body>
</html>

Effect:
Write picture description here
2. Use the Promise method to rewrite the same ball animation in order

Calling the promise function in bluebird is the same logic as above, but the way the function is declared is different. And compared to the above nested writing method, if you want to change the order of the balls or add other actions, the above writing method is very troublesome. In contrast, with Promise, the sequence relationship of each action is linear. A Promise is an object with a .then() method, an abstraction for asynchronous programming.

<!doctype>
<html>
<head>
    <title>Promise animation</title>
    <style>
        .ball{
            width: 40px;
            height:40px;
            border-radius: 20px;
        }
        .ball1{
            background: red;
        }
        .ball2{
            background: yellow;
        }
        .ball3{
            background: green;
        }
    </style>
    <!-- 引用bluebird这个库来使用promise函数,bluebird被我下在了Desktop的上一级,administrator文件夹里 -->
    <script scr="C:\Users\Administrator\node_modules\bluebird\js\browser\bluebird.js"></script>
</head>
<body>
    <div class='ball ball1' style="margin-left: 0;"></div>
    <div class='ball ball2' style="margin-left: 0;"></div>
    <div class='ball ball3' style="margin-left: 0;"></div>
    <script>
        var ball1 = document.querySelector('.ball1')
        var ball2 = document.querySelector('.ball2')
        var ball3 = document.querySelector('.ball3')
        console.log(ball1)

        var Promise = window.Promise //不过现在好像原生支持Promise,不需要引入库了

        function promiseAnimate(ball, distance){
     
     
            return new Promise(function(resolve, reject){
     
     

                function _animate(){
     
       //下划线表示_animate是私有函数
                    setTimeout(function(){
     
       //定时器
                        var marginLeft = parseInt(ball.style.marginLeft, 10)

                        if(marginLeft === distance){
                            resolve()  //如果小球到达目标点,即动画已经执行完毕,就执行回调函数
                        }else{
                            if(marginLeft < distance){  //球在左侧,就往右移
                                marginLeft++
                            }else{
                                marginLeft--
                            }
                            ball.style.marginLeft = marginLeft + 'px'
                            _animate() //调用自身
                        }
                    }, 13)   //设定间隔多少时间执行函数,13毫秒一次
                }

                _animate() //启动第一次调用
            })
        }

        //原理: .then()函数总是返回一个新的Promise,then()里可放两个参数,第一个为前面函数执行成功的返回函数,第二个为执行不成功的返回函数
        promiseAnimate(ball1, 100)
            .then(function(){
     
     
                return promiseAnimate(ball2, 200)
            })
            .then(function(){
     
     
                return promiseAnimate(ball3, 300)
            })
            .then(function(){
     
     
                return promiseAnimate(ball3, 150)
            })
            .then(function(){
     
     
                return promiseAnimate(ball2, 150)
            })
            .then(function(){
     
     
                return promiseAnimate(ball1, 150)
            })
    </script>
</body>
</html>

2. Rewrite the small crawler with Promise

Rewrite the node.js study notes in the previous article -a small crawler for HTTP .

//用Promise来重构小爬虫,去除之前的回调
var http = require('http')
var Promise = require('Promise') //新版本的nodejs可以直接引用Promise了
var cheerio = require('cheerio')   //一个像JQuery语法一样可以提供快捷检索的库
var url = 'http://www.imooc.com/learn/348'
var baseUrl = 'http://www.imooc.com/learn/'

function filterChapters(html){
    
    
    var $ = cheerio.load(html)
    var chapters = $('.mod-chapters')

    //网页上的数据结构
    // courseData = {
    
    
    //      [{
    
    
    //      chapterTitle: '',
    //      videos: [
    //          title: '',
    //          id: ''
    //      ]
    //      }]
    // }

    var courseData = []

    //对每一章进行遍历
    chapters.each(function(item){
    
    
        var chapter = $(this) //拿到每个单独的章节
        var chapterTitle = chapter.find('strong').text()
        var videos =  chapter.find('.video').children('li')
        var chapterData = {
            chapterTitle: chapterTitle,
            videos: []
        } //组装对象
        //对videos进行遍历
        videos.each(function(item){
    
    
            var video = $(this).find('.J-media-item') //拿到每个单独的video里的class
            var videosTitle = video.text() //返回该元素下的所有文本内容
            var id =  video.attr('href').split('video/')[1]  //要拿到href链接里video/后的内容即视频id

            chapterData.videos.push({
                title: videosTitle,
                id: id
            })
        })

        courseData.push(chapterData) //把拿好的章节数据放进数组
    })

    return courseData
}

function printCourseInfo(courseData){
    
    
    courseData.forEach(function(item){
    
      //对courseData这个数组进行遍历
        var chapterTitle = item.chapterTitle
        console.log(chapterTitle + '\n')

        item.videos.forEach(function(video){
    
    
            console.log('(' + video.id + ')' + video.title + '\n')
        })
    })
}

function getPageAsync(url){
    
    
    return new Promise(function(resolve, reject){
    
    
        console.log('正在爬取 ' + url)

        http.get(url, function(res){
    
    
            var html = ''

            res.on('data', function(data){
    
    
                html += data
            })  //收到数据data时这个事件就会不断被触发,html字符串就不断累加

            res.on('end',function(){
    
    
                resolve(html)
                //var courseData = filterChapters(html) //原来的回调写法

                //printCourseInfo(courseData)
            })  //end事件
        }).on('error', function(){
    
    
            reject(e)
            console.log('获取课程数据出错')
        })
        //http.get还可以注册error事件,当出现异常时能捕捉错误
    })
}


//可同步爬取多个课程
var fetchCourseaArray = []

videoIds.forEach(function(id){
    
    
    fetchCourseaArray.push(getPageAsync(baseUrl + id))
})

Promise
    .all(fetchCourseaArray)
    .then(function(pages){
    
    
        var coursesData = []

        pages.forEach(function(html){
    
    
            var courses = filterChapters(html) //解析html

            coursesData.push(courses)
        })
    })

Guess you like

Origin blog.csdn.net/sriting/article/details/79631626