import re
from html import unescape
def remove_html_tags(html_str):
temp = re.sub(r'<.*?>', '', html_str.decode("utf-8"))
processed_str = temp.replace('\n', '')
return processed_str
def html_unicode_2_chinese(html_unicode):
chinese = unescape(html_unicode)
return chinese
def process_str(unprocessed_str):
temp = unprocessed_str.replace(' ', '')
temp = temp.replace('\\n', '')
return temp