123456789101112131415161718192021222324 |
- from lxml import etree
- from get_weibo_content.process_data import remove_html_tags, html_unicode_2_chinese, process_str
- def get_comment_count(html):
- selector = etree.HTML(html)
- weibo_comment_count_temp = selector.xpath('//div[@class="card-act"]/ul/li[3]/a')
- weibo_comment_count = []
- for i in weibo_comment_count_temp:
- temp = remove_html_tags(etree.tostring(i))
- temp = html_unicode_2_chinese(temp)
- temp = process_str(temp)
- temp = reprocess_str(temp)
- if temp:
- weibo_comment_count.append(temp)
- else:
- weibo_comment_count.append('0')
- return weibo_comment_count
- def reprocess_str(unprocessed_str):
- temp = unprocessed_str.replace(' ', '')
- temp = temp.replace('评论', '')
- return temp
|