golang notes(4)

1, Execise: Web Crawler

Solution #1: use channel to synchronize goroutines

<cache *Cache> is used to avoid URL duplication
<ch chan int> is used to wait for end of a goroutine

func Crawl(url string, depth int, fetcher Fetcher, cache *Cache, ch chan int) {
	if depth <= 0 || cache.urls[url] {
		ch <- 1
		return
	}

	body, urls, err := fetcher.Fetch(url)

	cache.mu.Lock()
	cache.urls[url] = true
	cache.mu.Unlock()

	if err != nil {
		fmt.Println(err)
		ch <- 1
		return
	}

	fmt.Printf("Found: %s %q\n", url, body)

	subch := make(chan int, len(urls))
	for _, u := range urls {
		go Crawl(u, depth-1, fetcher, cache, subch)
		<-subch
	}

	ch <- 1
	return
}

type Cache struct {
	mu sync.Mutex
	urls map[string]bool
}

func main() {
	cache := Cache{urls: make(map[string]bool)}
	ch := make(chan int)
	go Crawl("https://golang.org/", 4, fetcher, &cache, ch)
	<-ch
}

Output looks like:

Found: https://golang.org/ "The Go Programming Language"
not found: https://golang.org/cmd/
Found: https://golang.org/pkg/ "Packages"
Found: https://golang.org/pkg/os/ "Package os"
Found: https://golang.org/pkg/fmt/ "Package fmt"

Solution #2: use sync.WaitGroup to synchronize goroutines

refer to:

https://stackoverflow.com/questions/12224962/exercise-web-crawler-concurrency-not-working

https://www.dyxmq.cn/program/code/golang/waitgroup-in-golang.html

func Crawl(url string, depth int, fetcher Fetcher, cache *Cache, wg *sync.WaitGroup) {
	defer wg.Done()

	if depth <= 0 || cache.urls[url] {
		return
	}

	body, urls, err := fetcher.Fetch(url)

	cache.mu.Lock()
	cache.urls[url] = true
	cache.mu.Unlock()

	if err != nil {
		fmt.Println(err)
		return
	}

	fmt.Printf("Found: %s %q\n", url, body)

	for _, u := range urls {
		wg.Add(1)
		go Crawl(u, depth-1, fetcher, cache, wg)
	}

	return
}

type Cache struct {
	mu sync.Mutex
	urls map[string]bool
}

func main() {
	cache := Cache{urls: make(map[string]bool)}
	wg := &sync.WaitGroup{}
	wg.Add(1)
	go Crawl("https://golang.org/", 4, fetcher, &cache, wg)
	wg.Wait()
}

The journey into 'A Tour of Go' ends here!

猜你喜欢