feat: 垃圾信息分类标签功能

新增垃圾信息细分类标签，在朴素贝叶斯二分类基础上对spam进行细分： - 新增 spam_categorizer.py 分类模块（诈骗/骚扰/广告） - SpamPredictionLog 和 ContentPost 模型添加 category 字段 - content_routes 和 spam_routes 接口返回分类标签 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-22 21:52:08 +08:00
parent 84f0943578
commit cedfd066c4
5 changed files with 126 additions and 8 deletions
--- a/backend/app/routes/spam_routes.py
+++ b/backend/app/routes/spam_routes.py
@@ -3,6 +3,7 @@ from flask_jwt_extended import jwt_required

 from app.extensions import db
 from app.ml.naive_bayes_classifier import NaiveBayesSpamClassifier
+from app.ml.spam_categorizer import categorize_spam, get_category_label
 from app.models import DetectionConfig, SpamPredictionLog, SpamTrainingSample
 from app.utils.auth import admin_required, current_user
 from app.utils.response import fail, ok
@@ -58,10 +59,17 @@ def predict_one():
    threshold = _adjusted_threshold(user.credit_score or 100)
    blocked = float(result["spam_probability"]) >= threshold

+    # 分类标签：仅在判定为垃圾时进行细分
+    category = ""
+    category_label = ""
+    if blocked:
+        category, category_label = categorize_spam(result["text"])
+
    row = SpamPredictionLog(
        user_id=user.id,
        text=result["text"],
        prediction=result["prediction"],
+        category=category,
        spam_probability=result["spam_probability"],
        ham_probability=result["ham_probability"],
        confidence=result["confidence"],
@@ -71,7 +79,14 @@ def predict_one():
    db.session.add(row)
    db.session.commit()

-    return ok({**result, "log_id": row.id, "threshold": threshold, "blocked_by_threshold": blocked}, "识别成功")
+    return ok({
+        **result,
+        "log_id": row.id,
+        "threshold": threshold,
+        "blocked_by_threshold": blocked,
+        "category": category,
+        "category_label": category_label,
+    }, "识别成功")


@spam_bp.post("/predict/batch")
@@ -98,12 +113,23 @@ def predict_batch():
        if len(content) < 2:
            continue
        result = clf.predict(content)
-        result["blocked_by_threshold"] = float(result["spam_probability"]) >= threshold
+        blocked = float(result["spam_probability"]) >= threshold
+        result["blocked_by_threshold"] = blocked
+
+        # 分类标签
+        category = ""
+        category_label = ""
+        if blocked:
+            category, category_label = categorize_spam(result["text"])
+        result["category"] = category
+        result["category_label"] = category_label
+
        rows.append(
            SpamPredictionLog(
                user_id=user.id,
                text=result["text"],
                prediction=result["prediction"],
+                category=category,
                spam_probability=result["spam_probability"],
                ham_probability=result["ham_probability"],
                confidence=result["confidence"],