去除标点
test['clean'] = test.content.apply(lambda x:re.sub(r'[\s+\.\!\/_,$%^*(+\"\')]+|[+——()?【】“”!,。?、~@#¥%……&*()]+::', " ",x))
取出中文
chapter['ch'] = chapter.content.apply(lambda x:re.sub(r'[^\u4e00-\u9fa5]', "",x))
去除标点
test['clean'] = test.content.apply(lambda x:re.sub(r'[\s+\.\!\/_,$%^*(+\"\')]+|[+——()?【】“”!,。?、~@#¥%……&*()]+::', " ",x))
取出中文
chapter['ch'] = chapter.content.apply(lambda x:re.sub(r'[^\u4e00-\u9fa5]', "",x))