Go读取论文并转换为simhahs

package main

import (
    "fmt"
    _"flag"
    _ "os"
    _ "io/ioutil"
    _"strings"
    _ "path"
     "log"
    _ "baliance.com/gooxml/document"
    "database/sql"
    _ "github.com/go-sql-driver/mysql"
    "time"
    "github.com/yanyiwu/gosimhash"
    
) 


func main(){



    t1 := time.Now()

    Mylog(doc)
    if err != nil {
        Mylog(err)
    }

    db, err := sql.Open("mysql", "root:123456@tcp(127.0.0.1:3306)/gzpg_crs_jsj?charset=utf8");
    if err != nil {
        fmt.Println(err);
    }
    sql :="select s1.paper_id,s2.title_cn,s2.abstract_cn,s2.keyword_cn,s2.title_en,s2.abstract_en,s2.keyword_en,s1.s_content from sf_content s1,sf_paper s2 where  s1.paper_id=s2.paper_id limit 10"
    rows, err := db.Query(sql)
    if err != nil {
        fmt.Println(err);
    }
    stmt, err := db.Prepare("INSERT  sim_path SET paperid=?,simcode=?")
    if err != nil {
        fmt.Println(err);
    }

    var str string
    var code string
    //查询多个
    for rows.Next() {
        var paper_id int //论文id
        var title_cn string //中文题目
        var abstract_cn string //中文摘要
        var keyword_cn string //中文关键词
        var title_en string //英文题目
        var abstract_en string //英文摘要
        var keyword_en string //英文关键词
        var s_content string//全文内容
        
        err = rows.Scan(&paper_id, &title_cn,&abstract_cn,&keyword_cn,&title_en,&abstract_en,&keyword_en,&s_content)
        str = fmt.Sprintf("%s\n 摘要:%s\n 关键词:%s\n %s\n Abstract:%s\n Keywords:%s\n %s\n",title_cn,abstract_cn,keyword_cn,title_en,abstract_en,keyword_en,s_content)
        code=simhash(str)
        res, err := stmt.Exec(paper_id, code)
        if err != nil {
            fmt.Println(err);
        }
        id, err := res.LastInsertId()
        if err != nil {
            fmt.Println(err);
        }
        fmt.Print("%s成功%s \n",id,paper_id);
    
    }
    db.Close()
    elapsed := time.Since(t1)
    log.Println("时间花费位:\n" , elapsed)

}

func simhash(str string) (string) {

    hasher := gosimhash.New("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8")
    defer hasher.Free()
    fingerprint := hasher.MakeSimhash(str, 1)
    var code string
    var s string = "0000000000000000000000000000000000000000000000000000000000000000"
    bs := []byte(s)
                        
    for i := 63; i >= 0; i-- {
        
        if (fingerprint&1)==1 {

            bs[i]='1'
        } else {

            bs[i]='0'
        }
        fingerprint >>=1
    }
    code =string(bs)
    return code
}


func Mylog(v ...interface{}) {
    f, err := os.OpenFile("20181105go.log", os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
    if err != nil {
        Mylog(err)
    }
    defer f.Close()
    logger := log.New(f, TAG, log.Ldate|log.Ltime|log.Lmicroseconds)
    logger.Println(v...)
}


















猜你喜欢

转载自www.cnblogs.com/mengluo/p/9915440.html
今日推荐