12345678910111213141516171819 |
- import re
- from html import unescape
- def remove_html_tags(html_str):
- temp = re.sub(r'<.*?>', '', html_str.decode("utf-8"))
- processed_str = temp.replace('\n', '')
- return processed_str
- def html_unicode_2_chinese(html_unicode):
- chinese = unescape(html_unicode)
- return chinese
- def process_str(unprocessed_str):
- temp = unprocessed_str.replace(' ', '')
- temp = temp.replace('\\n', '')
- return temp
|