go爬虫

package main

import (
	"fmt"
	"github.com/antchfx/htmlquery"
	"github.com/kirinlabs/HttpRequest"
	"labix.org/v2/mgo"
	"labix.org/v2/mgo/bson"
	"strconv"
	"strings"
	"sync"
)


var db *mgo.Database
var c *mgo.Collection
func init(){
	session, err := mgo.Dial("mongodb://localhost:27017")  //连接数据库
	if err != nil {
		panic(err)
	}
	//defer session.Close()
	session.SetMode(mgo.Monotonic, true)
	db = session.DB("myjs")	 //数据库名称
	c = db.C("fuli")
}

var wg sync.WaitGroup

func main() {

	wg.Add(21)
	for i:=1;i<22;i++{
		go HttpImg("http://www.mntuxiu.com/page/"+ strconv.Itoa(i)+ "/")
	}
	wg.Wait()
}


func HttpImg(url string){

	response, e := HttpRequest.Get(url)
	if e != nil{
		panic(e.Error())
	}
	bytes, e := response.Body()
	html := string(bytes)
	node, e := htmlquery.Parse(strings.NewReader(html))

	list := htmlquery.Find(node, "//*[@id='index_ajax_list']/li/a/img")


	for _,n := range list{
		fmt.Println(n.Attr[2].Val,n.Attr[3].Val)
		img, _ := HttpRequest.Get(n.Attr[2].Val)
		byts, _ := img.Body()
		e := c.Insert(&User{
			Id_:  bson.NewObjectId(),
			Name: n.Attr[3].Val,
			Bs64: byts,
		})
		if e !=nil{
			panic(e)
		}

	}

	wg.Done()
}




type User struct
{
	Id_ bson.ObjectId `bson:"_id"`
	Name string `bson:"name"`
	Bs64 []byte `bson:"Bs64"`
}

  

猜你喜欢

转载自www.cnblogs.com/kjtt/p/13208564.html