Python only reads text Chinese characters

#coding=utf-8
import re

with open('aaa.txt','r',encoding="utf-8") as f:
    #data = f.read().decode('gbk').encode('utf-8')
    data = f.read()
    print(data)
    #str = re.sub(r'(\\u\d+)',"",data)
    #data = re.sub("[A-Za-z0-9\!\%\[\]\,\。]", "", data)
    #data = re.sub('[\W_+]', "", data)
    data = re.sub('[\u4E00-\u9FA5]',"", data)
    print(data)

# Filter out Chinese characters in addition to the

 

Import Re 

"" " 
Python 3.5 version of the 
regular match Chinese, fixed form: \ u4E00- \ u9FA5 
" "" 

text = " aqweded *** xsa *** *** China Korea Japan *** " 
regStr = " *.? ([\ u4E00- \ u9FA5] +) *.? " 
AA = re.findall (regStr, text)
 IF AA:
     Print (AA)

# Extract in the Chinese string, returns an array 

#coding=utf-8
import re

with open('aaa.txt','r',encoding="utf-8") as f:
    #data = f.read().decode('gbk').encode('utf-8')
    data = f.read()
    print(data)
    data = re.sub("[A-Za-z0-9\!\%\[\]\,\。\ ]", "", data)
    #data = re.sub('[\u4E00-\u9FA5]',"", data)
    print(data)

 

# - * - Coding: UTF-8 - * - 
Import Re
 # filtered out in addition to the Chinese character 
str = " the Hello, world !!% [545] 234 hello world ... " 
str = re.sub ( " [ Za-Z0-9-a \! \% \ [\] \, \.] " , " " , STR)
 Print (STR) 
 
# of the strings in the Chinese, returns an array 
pattern = " [\ u4e00- \ u9fa5 ] + "  
REGEX = the re.compile (pattern) 
Results = regex.findall ( " adf adf Chinese hair BOE " )
 Print (Results)

 

Guess you like

Origin www.cnblogs.com/qmfsun/p/11811990.html