|
@@ -15,25 +15,29 @@ def run_from_time_a_2_time_b(keyword_temp, date_begin_temp, date_end_temp, proxy
|
|
|
page_count = begin_num
|
|
page_count = begin_num
|
|
|
all_data = []
|
|
all_data = []
|
|
|
while True:
|
|
while True:
|
|
|
- print('[-](' + date_begin_temp + '——' + date_end_temp + ')-page_' + str(page_count) + ':开始爬取...')
|
|
|
|
|
- html = get_one_page(keyword_temp, page_count, date_begin_temp, date_end_temp, proxy_temp, cookie_temp,
|
|
|
|
|
- user_agent_temp)
|
|
|
|
|
- weibo_content_str, flag, if_contains_keyword = get_content(html, keyword)
|
|
|
|
|
- if not if_contains_keyword:
|
|
|
|
|
- break
|
|
|
|
|
- weibo_mid_str = get_mid(html)
|
|
|
|
|
- weibo_comment_count = get_comment_count(html)
|
|
|
|
|
- weibo_like_count = get_like_count(html)
|
|
|
|
|
- weibo_time_str = get_time(html, flag, date_begin_temp)
|
|
|
|
|
|
|
+ try:
|
|
|
|
|
+ print('[-](' + date_begin_temp + '——' + date_end_temp + ')-page_' + str(page_count) + ':开始爬取...')
|
|
|
|
|
+ html = get_one_page(keyword_temp, page_count, date_begin_temp, date_end_temp, proxy_temp, cookie_temp,
|
|
|
|
|
+ user_agent_temp)
|
|
|
|
|
+ weibo_content_str, flag, if_contains_keyword = get_content(html, keyword)
|
|
|
|
|
+ if not if_contains_keyword:
|
|
|
|
|
+ break
|
|
|
|
|
+ weibo_mid_str = get_mid(html)
|
|
|
|
|
+ weibo_comment_count = get_comment_count(html)
|
|
|
|
|
+ weibo_like_count = get_like_count(html)
|
|
|
|
|
+ weibo_time_str = get_time(html, flag, date_begin_temp)
|
|
|
|
|
|
|
|
- length = len(weibo_content_str)
|
|
|
|
|
- print('[-](' + date_begin_temp + '——' + date_end_temp + ')-page_' + str(page_count) + ':爬取到' + str(
|
|
|
|
|
- length) + '条信息')
|
|
|
|
|
- one_page_data = get_one_page_excel(weibo_content_str, weibo_mid_str, weibo_time_str, weibo_like_count,
|
|
|
|
|
- weibo_comment_count, length)
|
|
|
|
|
- all_data += one_page_data
|
|
|
|
|
- time.sleep(random.randint(3, 6))
|
|
|
|
|
- page_count += 1
|
|
|
|
|
|
|
+ length = len(weibo_content_str)
|
|
|
|
|
+ print('[-](' + date_begin_temp + '——' + date_end_temp + ')-page_' + str(page_count) + ':爬取到' + str(
|
|
|
|
|
+ length) + '条信息')
|
|
|
|
|
+ one_page_data = get_one_page_excel(weibo_content_str, weibo_mid_str, weibo_time_str, weibo_like_count,
|
|
|
|
|
+ weibo_comment_count, length)
|
|
|
|
|
+ all_data += one_page_data
|
|
|
|
|
+ time.sleep(random.randint(3, 6))
|
|
|
|
|
+ page_count += 1
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ print(e)
|
|
|
|
|
+ continue
|
|
|
print('[-](' + date_begin_temp + '——' + date_end_temp + ')-page_' + str(page_count) + ':爬取完毕')
|
|
print('[-](' + date_begin_temp + '——' + date_end_temp + ')-page_' + str(page_count) + ':爬取完毕')
|
|
|
save_to_excel(all_data, keyword_temp, date_begin_temp, date_end_temp)
|
|
save_to_excel(all_data, keyword_temp, date_begin_temp, date_end_temp)
|
|
|
|
|
|