ProcessTool.py 428 B

12345678910111213141516171819
  1. import re
  2. from html import unescape
  3. def remove_html_tags(html_str):
  4. temp = re.sub(r'<.*?>', '', html_str.decode("utf-8"))
  5. processed_str = temp.replace('\n', '')
  6. return processed_str
  7. def html_unicode_2_chinese(html_unicode):
  8. chinese = unescape(html_unicode)
  9. return chinese
  10. def process_str(unprocessed_str):
  11. temp = unprocessed_str.replace(' ', '')
  12. temp = temp.replace('\\n', '')
  13. return temp