Shellmiao
/
Weibo_Crawler


			
							123456789101112131415161718192021222324252627282930
							from lxml import etree
from get_weibo_content.process_data import remove_html_tags, html_unicode_2_chinese, process_str


def get_time(html, flag):
    selector = etree.HTML(html)
    weibo_time = selector.xpath('//div[@class="content"]/p[2]/a[1]')
    weibo_time_str = []
    count = 0
    count_s = 0
    for i in weibo_time:
        if count in flag:
            temp = remove_html_tags(etree.tostring(selector.xpath('//div[@class="content"]/p[3]/a[1]')[count_s]))
            count_s += 1
        else:
            temp = remove_html_tags(etree.tostring(i))
        temp = html_unicode_2_chinese(temp)
        temp = process_str(temp)
        temp = reprocess_str(temp)
        weibo_time_str.append(temp)
        count += 1
        print(temp)
    print(len(weibo_time_str))
    return weibo_time_str


def reprocess_str(unprocessed_str):
    temp = unprocessed_str.replace(' ', '')
    temp = temp.replace('来自', '')
    return temp