get_comment.py 786 B

123456789101112131415161718192021222324
  1. from lxml import etree
  2. from get_weibo_content.process_data import remove_html_tags, html_unicode_2_chinese, process_str
  3. def get_comment_count(html):
  4. selector = etree.HTML(html)
  5. weibo_comment_count_temp = selector.xpath('//div[@class="card-act"]/ul/li[3]/a')
  6. weibo_comment_count = []
  7. for i in weibo_comment_count_temp:
  8. temp = remove_html_tags(etree.tostring(i))
  9. temp = html_unicode_2_chinese(temp)
  10. temp = process_str(temp)
  11. temp = reprocess_str(temp)
  12. if temp:
  13. weibo_comment_count.append(temp)
  14. else:
  15. weibo_comment_count.append('0')
  16. return weibo_comment_count
  17. def reprocess_str(unprocessed_str):
  18. temp = unprocessed_str.replace(' ', '')
  19. temp = temp.replace('评论', '')
  20. return temp