Time.py 980 B

123456789101112131415161718192021222324252627282930
  1. from lxml import etree
  2. import time
  3. from .ProcessTool import remove_html_tags, html_unicode_2_chinese, process_str
  4. def get_time(html, date_begin):
  5. selector = etree.HTML(html)
  6. wei_bo_time = selector.xpath('//div[@class="content"]/p[1]/a[1]')
  7. wei_bo_time_str = []
  8. for i in wei_bo_time:
  9. temp = remove_html_tags(etree.tostring(i))
  10. temp = html_unicode_2_chinese(temp)
  11. temp = process_str(temp)
  12. temp = reprocess_str(temp)
  13. if '今天' in temp:
  14. temp = temp.replace('今天', ' ')
  15. temp = time.strftime("%Y-%m-%d", time.localtime()) + temp
  16. else:
  17. temp = date_begin[0:4] + '-' + temp
  18. wei_bo_time_str.append(temp)
  19. return wei_bo_time_str
  20. def reprocess_str(unprocessed_str):
  21. temp = unprocessed_str.replace(' ', '')
  22. temp = temp.replace('来自', '')
  23. temp = temp.replace('\xa0', '')
  24. temp = temp.replace('月', '-')
  25. temp = temp.replace('日', '-')
  26. return temp