package main
import (
"fmt"
"io"
"net/http"
"os"
"regexp"
"strconv"
"strings"
)
func formatStr(str* string) {
switch len(*str) {
case 3:
*str=*str+"\t\t\t\t"
case 6:
*str=*str+"\t\t\t\t"
case 7:
*str=*str+"\t\t\t\t"
case 8:
*str=*str+"\t\t\t"
case 9:
*str=*str+"\t\t\t\t"
case 10:
*str=*str+"\t\t\t"
case 11:
*str=*str+"\t\t\t"
case 12:
*str=*str+"\t\t\t"
case 15:
*str=*str+"\t\t\t"
case 18:
*str=*str+"\t\t"
case 21:
*str=*str+"\t\t"
case 24:
*str=*str+"\t"
case 27:
*str=*str+"\t"
}
}
type Novel struct {
Rank int
Name string
author string
ticket string
link string
}
func makeFile(path string, novelRank,novelName, author, ticket [][]string) {
//打开文件
fd,err:=os.OpenFile(path,os.O_APPEND,0664)
defer fd.Close()
if err!=nil {
fmt.Println("os.Create err",err)
return
}
novelMap:=make(map[string]string)
line:=len(novelName)
for i:=0;i<line ;i++ {
//提取小说排名
slicesNovelRank:=strings.Split(novelRank[i][0],">")
slicesNovelRank=strings.Split(slicesNovelRank[1],"<")
strNovelRank:=slicesNovelRank[0]
//提取小说名字
regName:=regexp.MustCompile(`bookName = ".*?"`)
novelname:=regName.FindAllStringSubmatch(novelName[i][0],-1)
strName:=strings.Split(novelname[0][0],"\"")
//提取小说ID
regID:=regexp.MustCompile(`bookId=".*?"`)
novelID:=regID.FindAllStringSubmatch(novelName[i][0],-1)
strID:=strings.Split(novelID[0][0],"\"")
//将小说名和ID存入map
novelMap[strName[1]]="http://book.zongheng.com/book/"+strID[1]+".html"
name:=strName[1]
formatStr(&name)
//提取小说作者
slicesAuthor:=strings.Split(author[i][0],"\"")
strAuthor:=slicesAuthor[3]
formatStr(&strAuthor)
//提取小说月票
slicesTicket:=strings.Split(ticket[i][0],">")
slicesTicket=strings.Split(slicesTicket[1],"<")
strTicket:=slicesTicket[0]
fd.WriteString(strNovelRank+"\t\t\t\t"+name+"\t\t\t\t"+strAuthor+"\t\t"+strTicket+"\t\t\t\t"+novelMap[strName[1]]+"\n")
}
}
func httpGetDB(url string)(result string,err error) {
respond,err:=http.Get(url)
if err != nil {
fmt.Println("http.Get:",err)
}
defer respond.Body.Close()
fmt.Printf("爬取的网页:%s\n",url)
//循环读取网页数据
for ; ; {
buf:=make([]byte,4096)
n,err2:=respond.Body.Read(buf)
if n == 0 {
break
}
if err2 != nil && err2 != io.EOF {
fmt.Println("respond.Body.Read:",err)
err=err2
return
}
//将读取的数据存入result
result+=string(buf[:n])
}
return
}
func SpliderPages(index int,ch chan<- int) {
//获取url
url:="http://www.zongheng.com/rank/details.html?rt=1&d=1&p="+strconv.Itoa(index)
//爬取网页
result,err:=httpGetDB(url)
if err != nil {
fmt.Println("http.Get err:",err)
return
}
//解析编译正则表达式->小说排名
reg:=regexp.MustCompile(`<div class="rank_d_icon rank_d_b_num rank_d_b_num.*</div>`)
novelRank:=reg.FindAllStringSubmatch(result,-1)
//解析编译正则表达式->小说名称和ID
reg1:=regexp.MustCompile(`div class="rank_d_list borderB_c_dsh clearfix".*>`)
novelName:=reg1.FindAllStringSubmatch(result,-1)
//解析编译正则表达式->小说作者
reg2:=regexp.MustCompile(`"rank_d_b_cate" title=".*"`)
novelAuthor:=reg2.FindAllStringSubmatch(result,-1)
//解析编译正则表达式->小说月票
reg3:=regexp.MustCompile(`<div class="rank_d_b_ticket">[0-9]+<span>月票</span></div>`)
novelTicket:=reg3.FindAllStringSubmatch(result,-1)
//将爬取的数据存入文件
path:="C:/Users/yy/Desktop/小说排行榜爬虫.txt"
makeFile(path,novelRank,novelName,novelAuthor,novelTicket)
}
func main() {
var start,end int
fmt.Print("请输入爬取的起始页:")
fmt.Scan(&start)
fmt.Print("请输入爬取的终止页:")
fmt.Scan(&end)
fmt.Printf("正在爬取第%d页到第%d页的数据\n",start,end)
//阻塞主go程退出Channel
block:=make(chan int)
for i := start; i <= end; i++ {
SpliderPages(i,block)
}
fmt.Println(len("849"))
}
Go语言小说排行榜爬虫实现
猜你喜欢
转载自blog.csdn.net/qq_44630120/article/details/106305646
今日推荐
周排行