Shellmiao
/
weibo.com-crawler


			
							123456789101112131415161718192021222324252627282930
							from lxml import etree
import time
from .ProcessTool import remove_html_tags, html_unicode_2_chinese, process_str


def get_time(html, date_begin):
    selector = etree.HTML(html)
    wei_bo_time = selector.xpath('//div[@class="content"]/p[1]/a[1]')
    wei_bo_time_str = []
    for i in wei_bo_time:
        temp = remove_html_tags(etree.tostring(i))
        temp = html_unicode_2_chinese(temp)
        temp = process_str(temp)
        temp = reprocess_str(temp)
        if '今天' in temp:
            temp = temp.replace('今天', ' ')
            temp = time.strftime("%Y-%m-%d", time.localtime()) + temp
        else:
            temp = date_begin[0:4] + '-' + temp
        wei_bo_time_str.append(temp)
    return wei_bo_time_str


def reprocess_str(unprocessed_str):
    temp = unprocessed_str.replace(' ', '')
    temp = temp.replace('来自', '')
    temp = temp.replace('\xa0', '')
    temp = temp.replace('月', '-')
    temp = temp.replace('日', '-')
    return temp