feat: 垃圾信息分类标签功能
新增垃圾信息细分类标签,在朴素贝叶斯二分类基础上对spam进行细分: - 新增 spam_categorizer.py 分类模块(诈骗/骚扰/广告) - SpamPredictionLog 和 ContentPost 模型添加 category 字段 - content_routes 和 spam_routes 接口返回分类标签 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -3,6 +3,7 @@ from flask_jwt_extended import jwt_required
|
||||
|
||||
from app.extensions import db
|
||||
from app.ml.naive_bayes_classifier import NaiveBayesSpamClassifier
|
||||
from app.ml.spam_categorizer import categorize_spam, get_category_label
|
||||
from app.models import DetectionConfig, SpamPredictionLog, SpamTrainingSample
|
||||
from app.utils.auth import admin_required, current_user
|
||||
from app.utils.response import fail, ok
|
||||
@@ -58,10 +59,17 @@ def predict_one():
|
||||
threshold = _adjusted_threshold(user.credit_score or 100)
|
||||
blocked = float(result["spam_probability"]) >= threshold
|
||||
|
||||
# 分类标签:仅在判定为垃圾时进行细分
|
||||
category = ""
|
||||
category_label = ""
|
||||
if blocked:
|
||||
category, category_label = categorize_spam(result["text"])
|
||||
|
||||
row = SpamPredictionLog(
|
||||
user_id=user.id,
|
||||
text=result["text"],
|
||||
prediction=result["prediction"],
|
||||
category=category,
|
||||
spam_probability=result["spam_probability"],
|
||||
ham_probability=result["ham_probability"],
|
||||
confidence=result["confidence"],
|
||||
@@ -71,7 +79,14 @@ def predict_one():
|
||||
db.session.add(row)
|
||||
db.session.commit()
|
||||
|
||||
return ok({**result, "log_id": row.id, "threshold": threshold, "blocked_by_threshold": blocked}, "识别成功")
|
||||
return ok({
|
||||
**result,
|
||||
"log_id": row.id,
|
||||
"threshold": threshold,
|
||||
"blocked_by_threshold": blocked,
|
||||
"category": category,
|
||||
"category_label": category_label,
|
||||
}, "识别成功")
|
||||
|
||||
|
||||
@spam_bp.post("/predict/batch")
|
||||
@@ -98,12 +113,23 @@ def predict_batch():
|
||||
if len(content) < 2:
|
||||
continue
|
||||
result = clf.predict(content)
|
||||
result["blocked_by_threshold"] = float(result["spam_probability"]) >= threshold
|
||||
blocked = float(result["spam_probability"]) >= threshold
|
||||
result["blocked_by_threshold"] = blocked
|
||||
|
||||
# 分类标签
|
||||
category = ""
|
||||
category_label = ""
|
||||
if blocked:
|
||||
category, category_label = categorize_spam(result["text"])
|
||||
result["category"] = category
|
||||
result["category_label"] = category_label
|
||||
|
||||
rows.append(
|
||||
SpamPredictionLog(
|
||||
user_id=user.id,
|
||||
text=result["text"],
|
||||
prediction=result["prediction"],
|
||||
category=category,
|
||||
spam_probability=result["spam_probability"],
|
||||
ham_probability=result["ham_probability"],
|
||||
confidence=result["confidence"],
|
||||
|
||||
Reference in New Issue
Block a user