[Daily] Go Language Bible - Anonymous Function Exercise 2

Exercise 5.13: Modify the crawl to save the pages it finds and, if necessary, create a directory to save these pages. Only save pages from under the original domain name. Assuming the initial page is under golang.org, don't
save the page under vimeo.com.

package main

import (
        "fmt"
        "io"
        "io/ioutil"
        "links"
        "log"
        "net/http"
        "net/url"
        "os"
)
/*
Exercise 5.13: Modify the crawl to save the pages it finds and, if necessary, create a directory to save these pages. Only save pages from under the original domain name. Assuming the initial page is under golang.org, don't save the page under vimeo.com.
*/
var sum int
func main() {
        breadthFirst(crawl, os.Args[1:])
}

/*
Grab all links of the page
*/
func crawl(url string) []string {
        sum++

        go save(url)
        fmt.Printf("%d|%s\n", sum, url)
        list, err := links.Extract(url)
        if err != nil {
                log.Print(err)
        }
        return list
}

/*
save page to file
*/
func save(u string) bool {

        urlObj, _: = url.Parse (s)
        path := "/tmp/crawl/" + urlObj.Host
        if urlObj.Path == "" || urlObj.Path == "/" {
                urlObj.Path = "/index.html"
        }
        filename := path + urlObj.Path //focus on the filename
        fmt.Println(filename)
        //open a file
        f, _ := os.OpenFile(filename, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0755)
        // read link
        resp, geterr := http.Get(u)

        if geterr != nil || resp.StatusCode != http.StatusOK {
                //resp.Body.Close()
                return false
        }
        body, _ := ioutil.ReadAll(resp.Body)
        //fmt.Println(body)
        //create save directory
        _, err: = os.Stat (path)
        if err != nil {
                os.MkdirAll(path, 0755)
        }

        io.WriteString(f, string(body))
        resp.Body.Close()
        body = nil
        return true
}

/*
breadth-first algorithm
*/
// breadthFirst calls f for each item in the worklist.
// Any items returned by f are added to the worklist.
// f is called at most once for each item.
func breadthFirst(f func(item string) []string, worklist []string) {
        seen := make(map[string]bool)
        for len(worklist) > 0 {
                items := worklist
                worklist = nil
                for _, item := range items {
                        if !seen[item] {
                                seen[item] = true
                                worklist = append(worklist, f(item)...)
                        }
                }
        }
}

  

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=324642597&siteId=291194637