Ver código fonte

first commit

Shellmiao 1 ano atrás
commit
19b11eca7d

+ 0 - 0
.gitignore


+ 3 - 0
.vscode/settings.json

@@ -0,0 +1,3 @@
+{
+    "vue.features.codeActions.enable": false
+}

BIN
data_for_test/ChatGAi_评论详情_20230323_20230421.xlsx


BIN
data_for_test/和平精英_评论详情_00010101_00010101.xlsx


BIN
data_for_test/和平精英_评论详情_20230323_20230421.xlsx


+ 21 - 0
flagged/log.csv

@@ -0,0 +1,21 @@
+上传 xlsx 文件,预定义消息,send() 函数返回的字符串,flag,username,timestamp
+C:\Users\28719\Documents\GithubProject\GPTcomment\flagged\上传 xlsx 文件\和平精英_评论详情_20230323_20230421r5p9l4c8.xlsx,接下来输入一些app store中对一款app的评论,请分条总结这款app的好评论与坏评论(各十条)(并在每条后面按照百分比给出这个观点的占比),"好评论:
+1. 绿洲世界模式很不错(5%)
+2. 恢复了狂爪(1%)
+3. 希望能做得更好(2%)
+4. 牛B(1%)
+5. 绿洲世界只有充钱才能召唤车辆,希望光子改下(3%)
+6. 老兵的福利太少了,请光爷对老兵多照顾照顾(4%)
+7. 真好玩(1%)
+
+坏评论:
+1. iPad Pro 2022玩和平精英流畅90针都稳定不了,画质还模糊不清晰(20%)
+2. 垃圾游戏毫无体验感,浪费时间,光子纯属坑钱(15%)
+3. 外挂满天飞不管真无语(10%)
+4. 卡死都卡退掉,敌人贴到脸上准备反打直接不动然后调大分(8%)
+5. 充钱和废案,绿洲世界充钱不公平,废案蜘蛛侠模式待改进(6%)
+6. 游戏下载速度有问题,过慢(5%)
+7. 突变模式中的狂爪影响游戏体验(2%)
+8. 游戏进不去,需要更新(1%)
+9. 过多的氪金问题(1%)
+10. 看评论的人都是无聊的人(1%)",,,2023-04-23 19:48:03.586657

BIN
flagged/上传 xlsx 文件/和平精英_评论详情_20230323_20230421r5p9l4c8.xlsx


+ 108 - 0
main.py

@@ -0,0 +1,108 @@
+import gradio as gr
+from utils.openai_api import get_reply, get_tokens_length
+from utils.read_file import read_xlsx_file, process_data
+
+sessions = {}
+
+
+def get_session(key):
+    if key not in sessions:
+        sessions[key] = {"count_now": 0, "reply": ""}
+    return sessions[key]
+
+
+def submit(action, file, max_length, message_first, message_after,
+           session_key):
+    session = get_session(session_key)
+    count_now = session["count_now"]
+    reply = session["reply"]
+    if action == "预览消息":
+        data = read_xlsx_file(file.name)
+        if count_now == 0:
+            message, count = process_data(max_length, data, message_first,
+                                          len(data))
+        else:
+            message, count = process_data(max_length,
+                                          data,
+                                          message_after,
+                                          len(data),
+                                          count_now,
+                                          reply=reply)
+        return get_tokens_length(message), message, ""
+    elif action == "获取最终回复(耗时较长)":
+        while True:
+            data = read_xlsx_file(file.name)
+            if count_now == 0:
+                message, count = process_data(max_length, data, message_first,
+                                              len(data))
+            else:
+                message, count = process_data(max_length,
+                                              data,
+                                              message_after,
+                                              len(data),
+                                              count_now,
+                                              reply=reply)
+            count_now += count
+            if count == 0:
+                break
+            else:
+                print(message)
+                reply = get_reply(message)
+                print(reply)
+        session["count_now"] = count_now
+        session["reply"] = reply
+        return get_tokens_length(message), "", reply
+    elif action == "获取一轮回复":
+        data = read_xlsx_file(file.name)
+        if count_now == 0:
+            message, count = process_data(max_length, data, message_first,
+                                          len(data))
+        else:
+            message, count = process_data(max_length,
+                                          data,
+                                          message_after,
+                                          len(data),
+                                          count_now,
+                                          reply=reply)
+        count_now += count
+        reply = get_reply(message)
+        session["count_now"] = count_now
+        session["reply"] = reply
+        return get_tokens_length(message), message, reply
+
+
+iface = gr.Interface(
+    fn=submit,
+    inputs=[
+        gr.inputs.Dropdown(choices=["预览消息", "获取最终回复(耗时较长)", "获取一轮回复"],
+                           label="操作",
+                           default="预览消息"),
+        gr.inputs.File(label="上传 xlsx 文件"),
+        gr.inputs.Number(default=2048, label="长度限制(最大4096)"),
+        gr.inputs.Textbox(
+            lines=2,
+            label="第一轮预定义消息",
+            default=
+            "接下来输入第{comment_num_start}条-第{comment_num_end}条app store中对一款app的评论(共{all_num}条),格式为[(标题,内容),(标题,内容)...],请分条总结这款app的好评论与坏评论(各十条)(并在每条后面按照百分比给出这个观点的在好/坏评论中的当前占比),{data_string}"
+        ),
+        gr.inputs.Textbox(
+            lines=2,
+            label="后续预定义消息",
+            default=
+            "{reply_before},以上是对前{comment_num}条的分析结果,接下来分段输入第{comment_num_start}条-第{comment_num_end}条app store中对一款app的评论(共{all_num}条),格式为[(标题,内容),(标题,内容)...],请分条总结这款app的好评论与坏评论(各十条)(并在每条后面按照百分比给出这个观点的在好/坏评论中的当前占比),{data_string}"
+        ),
+        gr.inputs.Textbox(lines=1,
+                          label="session_key",
+                          default="new_session_id(用于记录会话))"),
+    ],
+    outputs=[
+        gr.outputs.Textbox(label="Token长度"),
+        gr.outputs.Textbox(label="GPT输入的字符串(Message)"),
+        gr.outputs.Textbox(label="GPT输出的字符串(Reply)"),
+    ],
+    layout="vertical",
+    title="GPT4Comment",
+    description="上传xlsx文件并输入预定义的消息,然后点击发送使用GPT",
+)
+
+iface.launch(debug=True)

BIN
utils/__pycache__/openai_api.cpython-39.pyc


BIN
utils/__pycache__/read_file.cpython-39.pyc


+ 19 - 0
utils/openai_api.py

@@ -0,0 +1,19 @@
+import openai
+from transformers import GPT2Tokenizer
+
+openai.api_key = "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"
+
+
+def get_reply(message):
+    reply = openai.ChatCompletion.create(model="gpt-4-0314",
+                                         messages=[{
+                                             "role": "user",
+                                             "content": message
+                                         }])
+    return reply["choices"][0]["message"]["content"]
+
+
+def get_tokens_length(message):
+    tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
+    tokens = tokenizer.encode(message)
+    return len(tokens)

+ 49 - 0
utils/read_file.py

@@ -0,0 +1,49 @@
+import openpyxl
+
+from utils.openai_api import get_tokens_length
+
+
+def read_xlsx_file(file_name):
+    workbook = openpyxl.load_workbook(file_name)
+    sheet = workbook.active
+    data = []
+
+    for row in sheet.iter_rows(min_row=2, values_only=True):
+        data.append({
+            '发表时间': row[1],
+            '作者': row[2],
+            '评级': row[3],
+            '标题': row[4],
+            '内容': row[5]
+        })
+
+    return data[2:]
+
+
+def process_data(max_length,
+                 data,
+                 message,
+                 length,
+                 count_now=0,
+                 reply=""):
+    temp = []
+    result_message = ""
+    data = data[count_now:]
+
+    while len(data) > 0:
+        temp_message = message
+        ele = data.pop(0)
+        temp_string = "({}, {})".format(ele["标题"], ele["内容"])
+        temp.append(temp_string)
+        temp_message = temp_message.format(
+            reply_before=reply,
+            comment_num=count_now,
+            comment_num_start=count_now + 1,
+            comment_num_end=count_now + len(temp),
+            all_num=length,
+            data_string="[{}]".format(", ".join(temp)))
+
+        if len(temp_message) > max_length:
+            break
+        result_message = temp_message
+    return result_message, len(temp)