CommentNum.py 777 B

12345678910111213141516171819202122232425
  1. from lxml import etree
  2. from .ProcessTool import remove_html_tags, html_unicode_2_chinese, process_str
  3. def get_comment_count(html):
  4. selector = etree.HTML(html)
  5. weibo_comment_count_temp = selector.xpath(
  6. '//div[@class="card-act"]/ul/li[2]/a')
  7. weibo_comment_count = []
  8. for i in weibo_comment_count_temp:
  9. temp = remove_html_tags(etree.tostring(i))
  10. temp = html_unicode_2_chinese(temp)
  11. temp = process_str(temp)
  12. temp = reprocess_str(temp)
  13. if temp:
  14. weibo_comment_count.append(temp)
  15. else:
  16. weibo_comment_count.append('0')
  17. return weibo_comment_count
  18. def reprocess_str(unprocessed_str):
  19. temp = unprocessed_str.replace(' ', '')
  20. temp = temp.replace('评论', '')
  21. return temp