123456789101112131415161718192021 |
- import re
- from html import unescape
- # input:str
- def remove_html_tags(html_str):
- temp = re.sub(r'<.*?>', '', html_str.decode("utf-8"))
- processed_str = temp.replace('\n', '')
- return processed_str
- # input:str
- def html_unicode_2_chinese(html_unicode):
- chinese = unescape(html_unicode)
- return chinese
- def process_str(unprocessed_str):
- temp = unprocessed_str.replace(' ', '')
- temp = temp.replace('\\n', '')
- return temp
|