When we use regular expressions to identify Chinese, we use the Unicode character range to match Chinese numbers, and the Unicode range is \ue4e00-\u9fff.
import re
def extract_chinese_chars(code):
chinese_pattern = '[\u4e00-\u9fff]+' # 匹配中文字符
chinese_chars = re.findall(chinese_pattern, code)
return chinese_chars
# 测试代码
code = '''梅西是最好的,messi is the best'''
chinese_chars = extract_chinese_chars(code)
print("中文字符:", chinese_chars)
If we want to match character classes, we need to change the pattern to [a-zA-Z]
import re
def extract_chinese_chars(code):
english_pattern = '[a-zA-Z]+' # 匹配中文字符
chinese_chars = re.findall(english_pattern, code)
return chinese_chars
# 测试代码
code = '''梅西是最好的,messi is the best'''
english_chars = extract_chinese_chars(code)
print("中文字符:", english_chars)
Regular expressions have powerful text pattern matching, search, match, replace and extract strings.