feat: 垃圾信息分类标签功能
新增垃圾信息细分类标签,在朴素贝叶斯二分类基础上对spam进行细分: - 新增 spam_categorizer.py 分类模块(诈骗/骚扰/广告) - SpamPredictionLog 和 ContentPost 模型添加 category 字段 - content_routes 和 spam_routes 接口返回分类标签 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -5,6 +5,7 @@ from flask_jwt_extended import jwt_required
|
||||
|
||||
from app.extensions import db
|
||||
from app.ml.naive_bayes_classifier import NaiveBayesSpamClassifier
|
||||
from app.ml.spam_categorizer import categorize_spam, get_category_label
|
||||
from app.models import ContentPost, DetectionConfig, SpamPredictionLog, SpamTrainingSample, User
|
||||
from app.utils.auth import current_user
|
||||
from app.utils.response import fail, ok
|
||||
@@ -77,7 +78,7 @@ def _resolve_recipient(payload: dict, visibility: str, current_user_id: int):
|
||||
return recipient, None
|
||||
|
||||
|
||||
def _predict_and_decide(text: str, user_credit: int = 100) -> tuple[dict, float, bool]:
|
||||
def _predict_and_decide(text: str, user_credit: int = 100) -> tuple[dict, float, bool, str, str]:
|
||||
"""根据用户信誉分调整阈值系数。信誉分越高,阈值越高(降低敏感度)"""
|
||||
clf = _ensure_ready()
|
||||
result = clf.predict(text)
|
||||
@@ -92,7 +93,14 @@ def _predict_and_decide(text: str, user_credit: int = 100) -> tuple[dict, float,
|
||||
|
||||
adjusted_threshold = base_threshold * credit_factor
|
||||
blocked = float(result["spam_probability"]) >= adjusted_threshold
|
||||
return result, adjusted_threshold, blocked
|
||||
|
||||
# 分类标签
|
||||
category = ""
|
||||
category_label = ""
|
||||
if blocked:
|
||||
category, category_label = categorize_spam(result["text"])
|
||||
|
||||
return result, adjusted_threshold, blocked, category, category_label
|
||||
|
||||
|
||||
@content_bp.post("/publish")
|
||||
@@ -113,7 +121,7 @@ def publish_text():
|
||||
if err:
|
||||
return fail(err, 400)
|
||||
|
||||
result, threshold, blocked = _predict_and_decide(text, user.credit_score or 100)
|
||||
result, threshold, blocked, category, category_label = _predict_and_decide(text, user.credit_score or 100)
|
||||
|
||||
post = ContentPost(
|
||||
user_id=user.id,
|
||||
@@ -122,6 +130,7 @@ def publish_text():
|
||||
visibility=visibility,
|
||||
status="blocked" if blocked else "published",
|
||||
prediction=result["prediction"],
|
||||
category=category,
|
||||
spam_probability=result["spam_probability"],
|
||||
ham_probability=result["ham_probability"],
|
||||
confidence=result["confidence"],
|
||||
@@ -135,6 +144,7 @@ def publish_text():
|
||||
user_id=user.id,
|
||||
text=result["text"],
|
||||
prediction=result["prediction"],
|
||||
category=category,
|
||||
spam_probability=result["spam_probability"],
|
||||
ham_probability=result["ham_probability"],
|
||||
confidence=result["confidence"],
|
||||
@@ -153,14 +163,18 @@ def publish_text():
|
||||
|
||||
db.session.commit()
|
||||
|
||||
feedback = "发布成功" if not blocked else "疑似垃圾信息,系统已拦截,可提交申诉"
|
||||
feedback = "发布成功" if not blocked else f"{category_label or '疑似垃圾信息'},系统已拦截,可提交申诉"
|
||||
return ok(
|
||||
{
|
||||
"publish_allowed": not blocked,
|
||||
"action": "published" if not blocked else "blocked",
|
||||
"feedback": feedback,
|
||||
"post": _serialize_post(post),
|
||||
"detect": result,
|
||||
"detect": {
|
||||
**result,
|
||||
"category": category,
|
||||
"category_label": category_label,
|
||||
},
|
||||
},
|
||||
feedback,
|
||||
)
|
||||
@@ -188,13 +202,14 @@ def edit_post(post_id: int):
|
||||
if err:
|
||||
return fail(err, 400)
|
||||
|
||||
result, threshold, blocked = _predict_and_decide(text, user.credit_score or 100)
|
||||
result, threshold, blocked, category, category_label = _predict_and_decide(text, user.credit_score or 100)
|
||||
|
||||
post.text = result["text"]
|
||||
post.visibility = visibility
|
||||
post.recipient_user_id = recipient.id if recipient else None
|
||||
post.status = "blocked" if blocked else "published"
|
||||
post.prediction = result["prediction"]
|
||||
post.category = category
|
||||
post.spam_probability = result["spam_probability"]
|
||||
post.ham_probability = result["ham_probability"]
|
||||
post.confidence = result["confidence"]
|
||||
|
||||
Reference in New Issue
Block a user