feat: 风险关键词红色标记 + 点击显示权重贡献

- 后端: _extract_reason_tokens 返回 [{token, weight}] 格式
- 前端: detect/batch 页面风险关键词使用红色标签样式
- 点击关键词弹窗显示权重值及判定倾向

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
刘正航
2026-04-21 22:55:42 +08:00
parent b5237f9038
commit 50440e84fb
6 changed files with 44 additions and 6 deletions

View File

@@ -119,7 +119,7 @@ class NaiveBayesSpamClassifier:
"trained_at": self.metadata.get("trained_at"),
}
def _extract_reason_tokens(self, text: str, classes: list[str], x_row) -> list[str]:
def _extract_reason_tokens(self, text: str, classes: list[str], x_row) -> list[dict]:
try:
vocab = self.vectorizer.vocabulary_
feature_names = self.vectorizer.get_feature_names_out()
@@ -138,12 +138,12 @@ class NaiveBayesSpamClassifier:
if idx is None:
continue
delta = class_log_prob[spam_idx][idx] - class_log_prob[ham_idx][idx]
scored.append((token, delta))
scored.append({"token": token, "weight": round(delta, 4)})
scored.sort(key=lambda row: abs(row[1]), reverse=True)
return [token for token, _ in scored[:5]]
scored.sort(key=lambda row: abs(row["weight"]), reverse=True)
return scored[:5]
except Exception:
return list(text[:5])
return [{"token": ch, "weight": 0.0} for ch in list(text[:5])]
def model_info(self) -> dict:
return {