获取豆瓣电影

package main

import (
   "encoding/json"
   "fmt"
   "golang.org/x/text/encoding/simplifiedchinese"
   "golang.org/x/text/transform"
   "io/ioutil"
   "net/http"
   "os"
   "strings"
   "time"
)

type A struct {
   Rating      []string `json:"rating"`
   Rank        int      `json:"rank"`
   CoverUrl    string   `json:"cover_url"`
   IsPlayable  bool     `json:"is_playable"`
   Id          string   `json:"id"`
   Types       []string `json:"types"`
   Regions     []string `json:"regions"`
   Title       string   `json:"title"`
   Url         string   `json:"url"`
   ReleaseDate string   `json:"release_date"`
   ActorCount  int      `json:"actor_count"`
   VoteCount   int      `json:"vote_count"`
   Score       string   `json:"score"`
   Actors      []string `json:"actors"`
   IsWatched   bool     `json:"is_watched"`
}

func main() {
   // 获取豆瓣戏剧排行榜单数据
   // 明确目标
   // 爬取
   // 清洗
   // 保存

   //https://movie.douban.com/j/chart/top_list?type=24&interval_id=100%3A90&action=&start=0&limit=20
   //https://movie.douban.com/j/chart/top_list?type=24&interval_id=100%3A90&action=&start=20&limit=20

   // 初始页 结束页
   startPage, endPage := getPage()
   fmt.Printf("起始页=%v,终止页=%v", startPage, endPage)
   //
   // 网址列表
   urlList := getUrlList(startPage, endPage)
   fmt.Println("urlList:", urlList)
   // 对多条数据进行解析
   // 获取资源
   //url := urlList[0]
   //data := []A{}
   //handleUrl(url, &data)
   //fmt.Println(data)

   for i := 0; i < len(urlList); i++ {
      url := urlList[i]
      data := []A{}
      handleUrl(url, &data)
      // 存储到excel表格
      saveCSV(data)
      time.Sleep(5 * time.Second)
   }

}

func getPage() (int, int) {
   var startPage, endPage int
   fmt.Println("请输入起始页(小于1默认为1):")
   fmt.Scanln(&startPage)
   if startPage < 1 {
      startPage = 1
   }
   fmt.Println("请输入终止页(小于起始页默认为起始页):")
   fmt.Scanln(&endPage)
   if endPage < startPage {
      endPage = startPage
   }
   return startPage, endPage
}

func getUrlList(start, end int) []string {
   baseUrl := `https://movie.douban.com/j/chart/top_list?type=24&interval_id=100%3A90&action=&start=`
   var urlList []string
   for i := start; i <= end; i++ {
      Url := baseUrl + fmt.Sprintf("%v", (i-1)*20) + `&limit=20`
      urlList = append(urlList, Url)
   }
   return urlList
}

func handleUrl(url string, data *[]A) []A {
   //url := `https://movie.douban.com/j/chart/top_list?type=24&interval_id=100%3A90&action=&start=0&limit=20`
   r, err1 := http.Get(url)
   if err1 != nil {
      os.Exit(1)
   }
   // 读取资源
   //buf := make([]byte,1024*4)
   body, err2 := ioutil.ReadAll(r.Body)
   if err2 != nil {
      os.Exit(1)
   }
   // 关闭资源流
   defer r.Body.Close()

   // 反序列化内容
   errData := json.Unmarshal(body, &data)
   if errData != nil {
      fmt.Println("反序列化失败")
      os.Exit(1)
   }
   //fmt.Println("序列化后内容:", data)
   return *data
}

func saveCSV(data []A) {
   fileName := "douBan.csv"
   fp, err := os.OpenFile(fileName, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0766)
   if err != nil {
      fmt.Println("打开文件失败")
   }
   defer fp.Close()

   if dealTitle(fileName) == true {
      column := []string{"rating", "rank", "cover_url", "is_playable", "id", "types", "regions", "title", "url", "release_date", "actor_count", "vote_count", "score", "actors", "is_watched"}
      if line, err := utf82GBK(strings.Join(column, ",")); err == nil {
         fp.WriteString(line + "\n")
      }
   }
   for i := 0; i < len(data); i++ {
      columns := []string{fmt.Sprintf("%v", data[i].Rating), fmt.Sprintf("%v", data[i].Rank), data[i].CoverUrl,
         fmt.Sprintf("%v", data[i].IsPlayable), data[i].Id, fmt.Sprintf("%v", data[i].Types),
         fmt.Sprintf("%v", data[i].Regions), data[i].Title, data[i].Url, data[i].ReleaseDate,
         fmt.Sprintf("%v", data[i].ActorCount), fmt.Sprintf("%v", data[i].VoteCount), data[i].Score,
         fmt.Sprintf("%v", data[i].Actors), fmt.Sprintf("%v", data[i].IsWatched),
      }
      if line, err := utf82GBK(strings.Join(columns, ",")); err == nil {
         fp.WriteString(line + "\n")
      }
   }
}

func utf82GBK(src string) (string, error) {
   reader := transform.NewReader(strings.NewReader(src), simplifiedchinese.GBK.NewEncoder())
   if buf, err := ioutil.ReadAll(reader); err != nil {
      return "", err
   } else {
      return string(buf), nil
   }
}

func dealTitle(name string) bool {
   path := `D:\workspace1\src\test`
   dirs, err := ioutil.ReadDir(path)
   if err != nil {
      fmt.Println(err)
   }
   n := 0
   for i := 0; i < len(dirs); i++ {
      if dirs[i].Name() == name {
         n = 1
      }
   }
   if n == 1 {
      return true
   } else {
      return false
   }
}

猜你喜欢

转载自blog.csdn.net/q320036715/article/details/84899516