python speech tagging of NLP

1, the knowledge points

Including Chinese and English speech tagging 
library mainly used nltk and jiaba

2, the code

# Coding. 8 = UTF- 

Import NLTK
 from nltk.corpus Import stopwords
 from nltk.corpus Import Brown
 Import numpy AS NP
 "" " 
denoted Step: 
    1, washing, word 
    2 are denoted by 
    
the FAQ: 
    . 1, the Resource Not found punkt 
        install punkt module 
    2, Tagger installation average_perceptron 
    3, Resource sinica_treebank not found 
        Please install sinica_treebank module 
"" " 
DEF english_label ():
     " "" 
    English speech tagging 
    : return: 
    "" " 
    # word 
    text = "Sentiment analysis is a challenging subject in machine learning.\
     People express their emotions in language that is often obscured by sarcasm,\
      ambiguity, and plays on words, all of which could be very misleading for \
      both humans and computers.".lower()
    text_list = nltk.word_tokenize(text)
    # 去掉标点符号
    english_punctuations = [',', '.', ':', ';', '?', '(', ' ) ' , ' [ ' , ' ] ' , ' & ' , ' ! ' , ' * ' , ' @ ' , ' # ' , ' $ ' , ' % ' ] 
    Text_list = [Word for Word in text_list IF Word not  in english_punctuations]
     # remove stop words
    = the SET stops (stopwords.words ( " english " )) 
    text_list = [Word for Word in text_list IF Word not  in stops] 

    List = nltk.pos_tag (text_list) # playing tag 
    Print (List) 


DEF chineses_label ():
     Import jieba. AS PSEG posseg
     Import Re
     "" " 
    Fool can also be marked for Chinese POS 
    HanLP speech tagging sets 
    cases were using jieba speech tagging 
    : return: 
    " "" 
    str = " I love you, pink, comfortable, comfortable, literati "
    posseg_list = re.sub(r'[,]', " ", str)
    posseg_list =pseg.cut(posseg_list)
    print(posseg_list)
    print(' '.join('%s/%s' % (word, tag) for (word, tag) in posseg_list))

 

Guess you like

Origin www.cnblogs.com/ywjfx/p/11026712.html