go语言爬取椎名真白

单任务版:

package main

import (
	"net/http"
	"regexp"
	"io/ioutil"
	"os"
	"strconv"
	"time"
	"fmt"
	"runtime"
)

func get_mashiro(link,rule,target string)[]string{
	res,_:=http.Get(link)
	defer res.Body.Close()
	page_content,_:= ioutil.ReadAll(res.Body)
	re := regexp.MustCompile(rule)
	url_arr := re.FindAllStringSubmatch(string(page_content),-1)
	urls := make([]string,0)
	for _,url:=range url_arr{
		if len(url[1]) != len(target){
			continue
		}
		urls = append(urls,url[1])
	}
	return urls
}

func download_mashiro(urls []string){
	for i,v:=range urls{
		res,_:=http.Get(v)
		f,_:=os.Create(`F:\project\黑马go\mashiro\`+strconv.Itoa(i)+".jpg")
		defer f.Close()
		data,_:=ioutil.ReadAll(res.Body)
		f.Write(data)
	}

}

func main(){
	start_time := time.Now().Unix()
	link := "https://tieba.baidu.com/p/5290405550?red_tag=0872096237"
	rule := `src="(http.+?(?:jpg|png))"`
	target := "https://imgsa.baidu.com/forum/w%3D580/sign=5a28bf191fce36d3a20483380af13a24/5f572ae93901213fb9930d1f5ee736d12e2e951c.jpg"
	pic_urls := get_mashiro(link,rule,target)
	download_mashiro(pic_urls)
	end_time := time.Now().Unix()
	fmt.Println("总用时:",end_time-start_time)

}//2

  

多任务版:

package main

import (
	"net/http"
	"regexp"
	"io/ioutil"
	"os"
	"strconv"
	"time"
	"fmt"
	"runtime"
)

func get_mashiro(link,rule,target string)[]string{
	res,_:=http.Get(link)
	defer res.Body.Close()
	page_content,_:= ioutil.ReadAll(res.Body)
	re := regexp.MustCompile(rule)
	url_arr := re.FindAllStringSubmatch(string(page_content),-1)
	urls := make([]string,0)
	for _,url:=range url_arr{
		if len(url[1]) != len(target){
			continue
		}
		urls = append(urls,url[1])
	}
	return urls
}

func download_mashiro(urls []string){
	runtime.GOMAXPROCS(4)
	ch := make(chan int)
	for i,v:=range urls{
		go func(i int,v string) {
			fmt.Println(i)
			res,_:=http.Get(v)
			f,_:=os.Create(`F:\project\黑马go\mashiro\`+strconv.Itoa(i)+".jpg")
			defer f.Close()
			data,_:=ioutil.ReadAll(res.Body)
			f.Write(data)
			ch<-i
		}(i,v)
	}
	for i:=0;i<len(urls);i++{
		<-ch
	}
}

func main(){
	start_time := time.Now().Unix()
	link := "https://tieba.baidu.com/p/5290405550?red_tag=0872096237"
	rule := `src="(http.+?(?:jpg|png))"`
	target := "https://imgsa.baidu.com/forum/w%3D580/sign=5a28bf191fce36d3a20483380af13a24/5f572ae93901213fb9930d1f5ee736d12e2e951c.jpg"
	pic_urls := get_mashiro(link,rule,target)
	download_mashiro(pic_urls)
	end_time := time.Now().Unix()
	fmt.Println("总用时:",end_time-start_time)
}//1

  

猜你喜欢

转载自www.cnblogs.com/traditional/p/9278710.html