from lxml import etree import time from .ProcessTool import remove_html_tags, html_unicode_2_chinese, process_str def get_time(html, date_begin): selector = etree.HTML(html) wei_bo_time = selector.xpath('//div[@class="content"]/p[1]/a[1]') wei_bo_time_str = [] for i in wei_bo_time: temp = remove_html_tags(etree.tostring(i)) temp = html_unicode_2_chinese(temp) temp = process_str(temp) temp = reprocess_str(temp) if '今天' in temp: temp = temp.replace('今天', ' ') temp = time.strftime("%Y-%m-%d", time.localtime()) + temp else: temp = date_begin[0:4] + '-' + temp wei_bo_time_str.append(temp) return wei_bo_time_str def reprocess_str(unprocessed_str): temp = unprocessed_str.replace(' ', '') temp = temp.replace('来自', '') temp = temp.replace('\xa0', '') temp = temp.replace('月', '-') temp = temp.replace('日', '-') return temp