Getting started using the node write reptiles

Recently read node reptiles do, so we tried it, it is a record of the whole process

1, the new folder baidunews

2, the top new folder under the input npm init to initialize

3, after the initialization is complete download the required dependencies

  npm install express

  npm install cheerio

  npm install superagent

4, in baidunews file folder New index.js

5. Add the code in the file

  

Express the require = const ( 'Express' ); 
const App = Express (); 

// ... 

the let Server = app.listen (3000, function () { 
  the let Host = server.address () address;. 
  the let Port = Server . .Address () Port; 
  the console.log ( 'Your running the App IS AT HTTP: //% S:% S' , Host, Port); 
}); 

/ * * 
 * [Description] - routing with 
 * / 
// when a get request http: // localhost: 3000, async function will later 
app.get ( '/', async (REQ, RES, Next) => { 
    res.send (HotNews); 
  }); 

  // the introduction of third-party packages needed
the require SuperAgent = const ( 'SuperAgent' ); 

the let HotNews = [];                                 // Hot News 
the let Localnews = [];                               // local news 

/ * * 
 * The index.js 
 * [Description] - use superagent.get () method access Baidu News 
 * / 
superagent.get ( 'http://news.baidu.com/').end((err, RES) => {
   IF (ERR) {
     // If the access fails or errors occur this line here 
    console.log ( `hot news failed to fetch - $ {}` ERR) 
  } the else {
    // successful visit, the page request http://news.baidu.com/ returned data will be included in the RES 
   // crawl hot News data 
   hotNews =getHotNews (RES) 
  } 
}); 


/ * * 
 * The index.js 
 * [Description] - Hot news crawl the page 
 * / 
// introduced required third-party packages 
const = Cheerio the require ( 'Cheerio' ); 

the let getHotNews = ( RES) => { 
  the let HotNews = [];
   // access success, returned request http://news.baidu.com/ page data is contained in the res.text. 
  
  / * Use cheerio module cherrio.load () method, as a parameter to a function HTMLdocument 
     later be used in a similar jQuery $ (selectior) way to get the page element 
   * / 
  the let $ = cheerio.load (res.text) ; 

  // find the page where the target data elements, data acquisition 
  $ ( 'div # Pane News-ul li a') the each ((IDX, ELE) =>. {
     //In cherrio $ ( 'selector') each ( ) is used to iterate through all matched DOM elements. 
    // parameter idx is index of the element currently traversed, ele convenient DOM element is the current 
    the let News = { 
      title: $ (ELE) .text (),         // get news headlines 
      href: $ (ELE) .attr ( 'href')     // get news pages link 
    }; 
    hotNews.push (News)               // stores final result array 
  });
   return HotNews 
} ;

6, open a command line in the current file folder and enter the command line

  node index.js

7, access localhost in the browser: 3000

8, the display data in the page crawling

 

Guess you like

Origin www.cnblogs.com/wyongz/p/11242469.html