فهرست منبع

修改为了web结构,准备开始做分布式Web微博舆情分析工具

Shellmiao 4 سال پیش
والد
کامیت
66fb828392

+ 7 - 0
.idea/WeiBoCrawler.iml

@@ -5,4 +5,11 @@
     <orderEntry type="inheritedJdk" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
+  <component name="PyNamespacePackagesService">
+    <option name="namespacePackageFolders">
+      <list>
+        <option value="$MODULE_DIR$/Crawler" />
+      </list>
+    </option>
+  </component>
 </module>

+ 0 - 0
get_weibo_content/__init__.py → Crawler/__init__.py


+ 0 - 0
save_data/__init__.py → Crawler/get_weibo_content/__init__.py


+ 0 - 0
get_weibo_content/get_comment.py → Crawler/get_weibo_content/get_comment.py


+ 0 - 0
get_weibo_content/get_content.py → Crawler/get_weibo_content/get_content.py


+ 0 - 0
get_weibo_content/get_like.py → Crawler/get_weibo_content/get_like.py


+ 0 - 0
get_weibo_content/get_mid.py → Crawler/get_weibo_content/get_mid.py


+ 0 - 0
get_weibo_content/get_one_page.py → Crawler/get_weibo_content/get_one_page.py


+ 0 - 0
get_weibo_content/get_time.py → Crawler/get_weibo_content/get_time.py


+ 0 - 0
get_weibo_content/process_data.py → Crawler/get_weibo_content/process_data.py


+ 0 - 0
Crawler/process_data/__init__.py


+ 35 - 0
Crawler/process_data/emotion_analysis.py

@@ -0,0 +1,35 @@
+import json
+from tencentcloud.common import credential
+from tencentcloud.common.profile.client_profile import ClientProfile
+from tencentcloud.common.profile.http_profile import HttpProfile
+from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException
+from tencentcloud.nlp.v20190408 import nlp_client, models
+
+
+def get_tencent_emotion_analysis(text, secret_id, secret_key):
+    try:
+        cred = credential.Credential(secret_id, secret_key)
+        http_profile = HttpProfile()
+        http_profile.endpoint = "nlp.tencentcloudapi.com"
+
+        client_profile = ClientProfile()
+        client_profile.httpProfile = http_profile
+        client = nlp_client.NlpClient(cred, "ap-guangzhou", client_profile)
+
+        req = models.SentimentAnalysisRequest()
+        params = {
+            "Text": text,
+            "Flag": 2,
+            "Mode": "2class"
+        }
+        req.from_json_string(json.dumps(params))
+
+        resp = client.SentimentAnalysis(req)
+        emotion_dict = json.loads(resp.to_json_string())
+        return emotion_dict
+
+    except TencentCloudSDKException as err:
+        print(err)
+
+
+get_tencent_emotion_analysis('我很喜欢你!', 'AKIDlLfiU0FDuKUNyIL7bpmSrypPikUxGL9g', 'II1TeYAVZMs75mBJBl46qJVQ4RXLs9Au')

+ 0 - 0
Crawler/process_data/get_data.py


+ 0 - 0
Crawler/save_data/__init__.py


+ 0 - 0
save_data/save_data_to_excel.py → Crawler/save_data/save_data_to_excel.py


+ 0 - 0
WeiBoCrawler/__init__.py


+ 118 - 0
WeiBoCrawler/settings.py

@@ -0,0 +1,118 @@
+"""
+Django settings for WeiBoCrawler project.
+
+Generated by 'django-admin startproject' using Django 2.0.13.
+
+For more information on this file, see
+https://docs.djangoproject.com/en/2.0/topics/settings/
+
+For the full list of settings and their values, see
+https://docs.djangoproject.com/en/2.0/ref/settings/
+"""
+
+import os
+
+# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+# Quick-start development settings - unsuitable for production
+# See https://docs.djangoproject.com/en/2.0/howto/deployment/checklist/
+
+# SECURITY WARNING: keep the secret key used in production secret!
+SECRET_KEY = '%hx+uaex_$6q@db4tv=ki^5%4d6g=m=h-8g3_ypwrxe13(!p_-'
+
+# SECURITY WARNING: don't run with debug turned on in production!
+DEBUG = True
+
+ALLOWED_HOSTS = []
+
+# Application definition
+
+INSTALLED_APPS = [
+    'django.contrib.admin',
+    'django.contrib.auth',
+    'django.contrib.contenttypes',
+    'django.contrib.sessions',
+    'django.contrib.messages',
+    'django.contrib.staticfiles',
+]
+
+MIDDLEWARE = [
+    'django.middleware.security.SecurityMiddleware',
+    'django.contrib.sessions.middleware.SessionMiddleware',
+    'django.middleware.common.CommonMiddleware',
+    'django.middleware.csrf.CsrfViewMiddleware',
+    'django.contrib.auth.middleware.AuthenticationMiddleware',
+    'django.contrib.messages.middleware.MessageMiddleware',
+    'django.middleware.clickjacking.XFrameOptionsMiddleware',
+]
+
+ROOT_URLCONF = 'WeiBoCrawler.urls'
+
+TEMPLATES = [
+    {
+        'BACKEND': 'django.template.backends.django.DjangoTemplates',
+        'DIRS': [],
+        'APP_DIRS': True,
+        'OPTIONS': {
+            'context_processors': [
+                'django.template.context_processors.debug',
+                'django.template.context_processors.request',
+                'django.contrib.auth.context_processors.auth',
+                'django.contrib.messages.context_processors.messages',
+            ],
+        },
+    },
+]
+
+WSGI_APPLICATION = 'WeiBoCrawler.wsgi.application'
+
+# Database
+# https://docs.djangoproject.com/en/2.0/ref/settings/#databases
+
+DATABASES = {
+    'default': {
+        'ENGINE': 'django.db.backends.mysql',
+        'NAME': 'WeiBoCrawler',
+        'USER': 'crawler',
+        'PASSWORD': 'Crawler2020520very+',
+        'HOST': '42.192.54.32',
+        'PORT': '3306',
+    }
+}
+
+# Password validation
+# https://docs.djangoproject.com/en/2.0/ref/settings/#auth-password-validators
+
+AUTH_PASSWORD_VALIDATORS = [
+    {
+        'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
+    },
+    {
+        'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
+    },
+    {
+        'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
+    },
+    {
+        'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
+    },
+]
+
+# Internationalization
+# https://docs.djangoproject.com/en/2.0/topics/i18n/
+
+LANGUAGE_CODE = 'en-us'
+
+TIME_ZONE = 'UTC'
+
+USE_I18N = True
+
+USE_L10N = True
+
+USE_TZ = True
+
+# Static files (CSS, JavaScript, Images)
+# https://docs.djangoproject.com/en/2.0/howto/static-files/
+
+STATIC_URL = '/static/'

+ 21 - 0
WeiBoCrawler/urls.py

@@ -0,0 +1,21 @@
+"""WeiBoCrawler URL Configuration
+
+The `urlpatterns` list routes URLs to views. For more information please see:
+    https://docs.djangoproject.com/en/2.0/topics/http/urls/
+Examples:
+Function views
+    1. Add an import:  from my_app import views
+    2. Add a URL to urlpatterns:  path('', views.home, name='home')
+Class-based views
+    1. Add an import:  from other_app.views import Home
+    2. Add a URL to urlpatterns:  path('', Home.as_view(), name='home')
+Including another URLconf
+    1. Import the include() function: from django.urls import include, path
+    2. Add a URL to urlpatterns:  path('blog/', include('blog.urls'))
+"""
+from django.contrib import admin
+from django.urls import path
+
+urlpatterns = [
+    path('admin/', admin.site.urls),
+]

+ 16 - 0
WeiBoCrawler/wsgi.py

@@ -0,0 +1,16 @@
+"""
+WSGI config for WeiBoCrawler project.
+
+It exposes the WSGI callable as a module-level variable named ``application``.
+
+For more information on this file, see
+https://docs.djangoproject.com/en/2.0/howto/deployment/wsgi/
+"""
+
+import os
+
+from django.core.wsgi import get_wsgi_application
+
+os.environ.setdefault("DJANGO_SETTINGS_MODULE", "WeiBoCrawler.settings")
+
+application = get_wsgi_application()

+ 9 - 9
main.py

@@ -1,10 +1,10 @@
-from get_weibo_content.get_one_page import get_one_page
-from get_weibo_content.get_content import get_content
-from get_weibo_content.get_mid import get_mid
-from get_weibo_content.get_time import get_time
-from get_weibo_content.get_comment import get_comment_count
-from get_weibo_content.get_like import get_like_count
-from save_data.save_data_to_excel import get_one_page_excel, save_to_excel
+from Crawler.get_weibo_content.get_content import get_content
+from Crawler.get_weibo_content.get_one_page import get_one_page
+from Crawler.get_weibo_content.get_mid import get_mid
+from Crawler.get_weibo_content.get_time import get_time
+from Crawler.get_weibo_content.get_comment import get_comment_count
+from Crawler.get_weibo_content.get_like import get_like_count
+from Crawler.save_data.save_data_to_excel import get_one_page_excel, save_to_excel
 import datetime
 import time
 import random
@@ -48,9 +48,9 @@ if __name__ == '__main__':
     date = datetime.datetime.strptime(date_str, "%Y-%m-%d")
     # cookie = input('[-]请输入cookie:')
     # 测试用cookie
-    # cookie = 'SUB=_2A25N_5x1DeRhGeBO4lsY9y_Pyz-IHXVvAyQ9rDV8PUJbkNAfLWH8kW1NRYEkPnoC7fl3RMKtu4E9iyGtx5ldeVcn; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WWLS0lcQryz4UlBfKyjai.L5NHD95Qceh.41KMpe050Ws4Dqcjz-cyLdspDqgYt; SINAGLOBAL=2352245042816.5166.1627033753029; ULV=1627033753032:1:1:1:2352245042816.5166.1627033753029:; UOR=,,graph.qq.com; login_sid_t=5ed1466aa2327aac6d83a6652aa1a60a; cross_origin_proto=SSL; _s_tentry=passport.weibo.com; Apache=2352245042816.5166.1627033753029; WBtopGlobal_register_version=2021072412; webim_unReadCount=%7B%22time%22%3A1627119847912%2C%22dm_pub_total%22%3A1%2C%22chat_group_client%22%3A0%2C%22chat_group_notice%22%3A0%2C%22allcountNum%22%3A36%2C%22msgbox%22%3A0%7D; appkey=; SSOLoginState=1627122726; wvr=6; WBStorage=2ceabba76d81138d|undefined'
+    cookie = 'SUB=_2A25N_5x1DeRhGeBO4lsY9y_Pyz-IHXVvAyQ9rDV8PUJbkNAfLWH8kW1NRYEkPnoC7fl3RMKtu4E9iyGtx5ldeVcn; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WWLS0lcQryz4UlBfKyjai.L5NHD95Qceh.41KMpe050Ws4Dqcjz-cyLdspDqgYt; SINAGLOBAL=2352245042816.5166.1627033753029; ULV=1627033753032:1:1:1:2352245042816.5166.1627033753029:; UOR=,,graph.qq.com; login_sid_t=5ed1466aa2327aac6d83a6652aa1a60a; cross_origin_proto=SSL; _s_tentry=passport.weibo.com; Apache=2352245042816.5166.1627033753029; WBtopGlobal_register_version=2021072412; webim_unReadCount=%7B%22time%22%3A1627119847912%2C%22dm_pub_total%22%3A1%2C%22chat_group_client%22%3A0%2C%22chat_group_notice%22%3A0%2C%22allcountNum%22%3A36%2C%22msgbox%22%3A0%7D; appkey=; SSOLoginState=1627122726; wvr=6; WBStorage=2ceabba76d81138d|undefined'
     # 写上cookie
-    cookie = ''
+    # cookie = ''
     user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0'
     proxy = '127.0.0.1:80'
     for i in range(0, 24):

+ 15 - 0
manage.py

@@ -0,0 +1,15 @@
+#!/usr/bin/env python
+import os
+import sys
+
+if __name__ == "__main__":
+    os.environ.setdefault("DJANGO_SETTINGS_MODULE", "WeiBoCrawler.settings")
+    try:
+        from django.core.management import execute_from_command_line
+    except ImportError as exc:
+        raise ImportError(
+            "Couldn't import Django. Are you sure it's installed and "
+            "available on your PYTHONPATH environment variable? Did you "
+            "forget to activate a virtual environment?"
+        ) from exc
+    execute_from_command_line(sys.argv)

BIN
微博爬取内容-吴亦凡-2021-07-08.zip


BIN
微博爬取内容-吴亦凡-2021-07-09.zip


BIN
微博爬取内容-吴亦凡-2021-07-15.zip