from lxml import etree from get_weibo_content.process_data import remove_html_tags, html_unicode_2_chinese, process_str def get_comment_count(html): selector = etree.HTML(html) weibo_comment_count_temp = selector.xpath('//div[@class="card-act"]/ul/li[3]/a') weibo_comment_count = [] for i in weibo_comment_count_temp: temp = remove_html_tags(etree.tostring(i)) temp = html_unicode_2_chinese(temp) temp = process_str(temp) temp = reprocess_str(temp) if temp: weibo_comment_count.append(temp) else: weibo_comment_count.append('0') return weibo_comment_count def reprocess_str(unprocessed_str): temp = unprocessed_str.replace(' ', '') temp = temp.replace('评论', '') return temp