1 def get_info_by_pattern(text, pattern):
2 p = re.compile(pattern)
3 p_res = p.findall(text)
4 return p_res
5
6 # 把包含uincode字符串变成中文
7 def unicode_to_chinese(text):
8 pattern_unicode = 'u[0-9a-z]{4}'
9 p_res = get_info_by_pattern(text, pattern_unicode)
10 unicode_dict = {}
11 if len(p_res) > 0:
12 list1 = []
13 for ucode in p_res:
14 unicode_str = '\\' + ucode
15 if unicode_str in text and unicode_str not in list1:
16 list1.append(unicode_str)
17 for ucode in list1:
18 unicode_str = ucode
19 try:
20 chinese_str = bytes(unicode_str, 'utf-8').decode('unicode_escape')
21 print(unicode_str, chinese_str)
22 if unicode_str not in unicode_dict.keys():
23 unicode_dict[unicode_str] = chinese_str
24 text = text.replace(unicode_str, chinese_str)
25 except:
26 pass
27 return text