Bläddra i källkod

添加了异常处理,保证程序运行持久性

Shellmiao 4 år sedan
förälder
incheckning
1fbcec6667
3 ändrade filer med 22 tillägg och 18 borttagningar
  1. 22 18
      main.py
  2. BIN
      微博爬取内容-吴亦凡-2021-07-08.zip
  3. BIN
      微博爬取内容-吴亦凡-2021-07-09.zip

+ 22 - 18
main.py

@@ -15,25 +15,29 @@ def run_from_time_a_2_time_b(keyword_temp, date_begin_temp, date_end_temp, proxy
     page_count = begin_num
     all_data = []
     while True:
-        print('[-](' + date_begin_temp + '——' + date_end_temp + ')-page_' + str(page_count) + ':开始爬取...')
-        html = get_one_page(keyword_temp, page_count, date_begin_temp, date_end_temp, proxy_temp, cookie_temp,
-                            user_agent_temp)
-        weibo_content_str, flag, if_contains_keyword = get_content(html, keyword)
-        if not if_contains_keyword:
-            break
-        weibo_mid_str = get_mid(html)
-        weibo_comment_count = get_comment_count(html)
-        weibo_like_count = get_like_count(html)
-        weibo_time_str = get_time(html, flag, date_begin_temp)
+        try:
+            print('[-](' + date_begin_temp + '——' + date_end_temp + ')-page_' + str(page_count) + ':开始爬取...')
+            html = get_one_page(keyword_temp, page_count, date_begin_temp, date_end_temp, proxy_temp, cookie_temp,
+                                user_agent_temp)
+            weibo_content_str, flag, if_contains_keyword = get_content(html, keyword)
+            if not if_contains_keyword:
+                break
+            weibo_mid_str = get_mid(html)
+            weibo_comment_count = get_comment_count(html)
+            weibo_like_count = get_like_count(html)
+            weibo_time_str = get_time(html, flag, date_begin_temp)
 
-        length = len(weibo_content_str)
-        print('[-](' + date_begin_temp + '——' + date_end_temp + ')-page_' + str(page_count) + ':爬取到' + str(
-            length) + '条信息')
-        one_page_data = get_one_page_excel(weibo_content_str, weibo_mid_str, weibo_time_str, weibo_like_count,
-                                           weibo_comment_count, length)
-        all_data += one_page_data
-        time.sleep(random.randint(3, 6))
-        page_count += 1
+            length = len(weibo_content_str)
+            print('[-](' + date_begin_temp + '——' + date_end_temp + ')-page_' + str(page_count) + ':爬取到' + str(
+                length) + '条信息')
+            one_page_data = get_one_page_excel(weibo_content_str, weibo_mid_str, weibo_time_str, weibo_like_count,
+                                               weibo_comment_count, length)
+            all_data += one_page_data
+            time.sleep(random.randint(3, 6))
+            page_count += 1
+        except Exception as e:
+            print(e)
+            continue
     print('[-](' + date_begin_temp + '——' + date_end_temp + ')-page_' + str(page_count) + ':爬取完毕')
     save_to_excel(all_data, keyword_temp, date_begin_temp, date_end_temp)
 

BIN
微博爬取内容-吴亦凡-2021-07-08.zip


BIN
微博爬取内容-吴亦凡-2021-07-09.zip