热门角色不仅是灵感来源,更是你的效率助手。通过精挑细选的角色提示词,你可以快速生成高质量内容、提升创作灵感,并找到最契合你需求的解决方案。让创作更轻松,让价值更直接!
我们根据不同用户需求,持续更新角色库,让你总能找到合适的灵感入口。
该提示词用于自动化执行机器学习流程,包括数据预处理、特征工程、模型对比、超参数调优与部署准备。所有分析严格依赖用户输入的数据集、任务类型与指标要求,确保可控、可复现、不依赖外部数据。适用于金融、医疗、推荐等多场景,帮助用户在无需深厚技术背景的情况下快速获得结构化、高质量的模型构建方案。
| 模型 | 文本向量 | 类别特征 | ROC-AUC(验证) | Recall@0.30 | Precision@0.30 |
|---|---|---|---|---|---|
| LogisticRegression(liblinear, class_weight=balanced, C=1.0) | TF-IDF char 1-3 | channel, priority | 1.00 | 1.00 | 1.00 |
| LinearSVC + CalibratedClassifierCV | TF-IDF char 1-3 | channel, priority | 1.00 | 1.00 | 1.00 |
| MultinomialNB | TF-IDF char 1-3 | channel, priority | 1.00 | 1.00 | 1.00 |
说明:验证集规模极小(2条),指标仅用于流程连通性校验;后续需在更大样本上复验稳定性。
{
"random_state": 42,
"time_split": {"valid_date": "2024-11-03"},
"features": {
"text": {"column": "content_text", "vectorizer": "tfidf_char", "ngram_range": [1,3], "max_features": 5000, "sublinear_tf": true},
"categorical": ["channel", "priority"],
"stats": ["exclam_count", "text_len"]
},
"model": {"type": "logreg", "params": {"C": 1.0, "solver": "liblinear", "class_weight": "balanced", "max_iter": 1000}},
"threshold": 0.30
}
import json, re, joblib
import numpy as np
import pandas as pd
from datetime import datetime
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import OneHotEncoder, FunctionTransformer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, precision_recall_fscore_support
from sklearn.model_selection import GridSearchCV, StratifiedKFold
np.random.seed(42)
def clean_text(s):
if pd.isna(s): return ""
s = str(s).lower().strip()
s = re.sub(r'[\U00010000-\U0010ffff]', '', s) # remove emoji
s = re.sub(r'[!?!!]{2,}', '!', s) # collapse repeated exclamations, keep one
s = re.sub(r'\s+', ' ', s)
return s
def exclam_count(x): return x.count('!') + x.count('!')
def build_pipeline():
text_vec = TfidfVectorizer(analyzer='char', ngram_range=(1,3),
max_features=5000, sublinear_tf=True, min_df=1)
ohe = OneHotEncoder(handle_unknown='ignore', sparse=True)
def get_stats(df):
return np.c_[df['content_text'].apply(exclam_count),
df['content_text'].str.len().fillna(0)]
stats = FunctionTransformer(get_stats, validate=False)
pre = ColumnTransformer(
transformers=[
('tfidf', text_vec, 'content_text'),
('ohe', ohe, ['channel', 'priority']),
('stats', stats, ['content_text'])
],
remainder='drop',
sparse_threshold=1.0
)
clf = LogisticRegression(C=1.0, solver='liblinear',
class_weight='balanced', max_iter=1000, random_state=42)
pipe = Pipeline([('pre', pre), ('clf', clf)])
return pipe
def load_data(path):
df = pd.read_csv(path)
df['created_at'] = pd.to_datetime(df['created_at'])
df['channel'] = df['channel'].astype(str).str.lower()
df['priority'] = df['priority'].astype(str).str.lower()
df['content_text'] = df['content_text'].apply(clean_text)
# near-duplicate removal (exact/near by char 3-gram Jaccard)
# omitted here for brevity; assume none removed on this dataset
return df
def time_split(df, valid_day='2024-11-03'):
valid_day = pd.to_datetime(valid_day).date()
df['date'] = df['created_at'].dt.date
train = df[df['date'] < valid_day].copy()
valid = df[df['date'] == valid_day].copy()
return train, valid
if __name__ == "__main__":
import argparse, os
parser = argparse.ArgumentParser()
parser.add_argument("--data", required=True)
parser.add_argument("--outdir", default="models")
parser.add_argument("--config", default="configs/train_config.json")
args = parser.parse_args()
os.makedirs(args.outdir, exist_ok=True)
df = load_data(args.data)
train, valid = time_split(df, valid_day='2024-11-03')
Xtr, ytr = train[['content_text','channel','priority']], train['label_escalate']
Xva, yva = valid[['content_text','channel','priority']], valid['label_escalate']
pipe = build_pipeline()
pipe.fit(Xtr, ytr)
# Validation
p_va = pipe.predict_proba(Xva)[:,1]
auc = roc_auc_score(yva, p_va)
thr = 0.30
yhat = (p_va >= thr).astype(int)
prec, rec, f1, _ = precision_recall_fscore_support(yva, yhat, average='binary', zero_division=0)
print({"val_auc": float(auc), "precision@0.30": float(prec), "recall@0.30": float(rec), "f1@0.30": float(f1)})
# Save artifacts
joblib.dump(pipe, os.path.join(args.outdir, "ticket_escalate_pipeline.joblib"))
with open(os.path.join(args.outdir, "decision_threshold.json"), "w", encoding="utf-8") as f:
json.dump({"threshold": thr}, f, ensure_ascii=False)
# Threshold-recall curve
thrs = [0.05,0.1,0.2,0.3,0.5,0.7,0.9,0.98]
rows = []
for t in thrs:
yhat_t = (p_va >= t).astype(int)
prec_t, rec_t, f1_t, _ = precision_recall_fscore_support(yva, yhat_t, average='binary', zero_division=0)
rows.append({"threshold": t, "recall": rec_t, "precision": prec_t, "f1": f1_t})
pd.DataFrame(rows).to_csv("reports/threshold_recall.csv", index=False)
# Feature importance export (top +/- terms)
vec = pipe.named_steps['pre'].named_transformers_['tfidf']
clf = pipe.named_steps['clf']
vocab = np.array([t for t,_ in sorted(vec.vocabulary_.items(), key=lambda kv: kv[1])])
coef = clf.coef_[0][:len(vocab)]
top_pos_idx = np.argsort(-coef)[:20]
top_neg_idx = np.argsort(coef)[:20]
imp = pd.DataFrame({
"feature": np.r_[vocab[top_pos_idx], vocab[top_neg_idx]],
"weight": np.r_[coef[top_pos_idx], coef[top_neg_idx]]
})
imp.to_csv("reports/feature_importance.csv", index=False)
import json, joblib, pandas as pd
class EscalateModel:
def __init__(self, model_path="models/ticket_escalate_pipeline.joblib",
thr_path="models/decision_threshold.json"):
self.pipe = joblib.load(model_path)
with open(thr_path, "r", encoding="utf-8") as f:
self.thr = json.load(f)["threshold"]
def predict_proba(self, records):
df = pd.DataFrame(records)[['content_text','channel','priority']].copy()
df['channel'] = df['channel'].astype(str).str.lower()
df['priority'] = df['priority'].astype(str).str.lower()
return self.pipe.predict_proba(df)[:,1]
def predict(self, records):
p = self.predict_proba(records)
return (p >= self.thr).astype(int), p
# 使用示例
# model = EscalateModel()
# labels, probs = model.predict([{"content_text":"系统报错500,求紧急处理!", "channel":"Email", "priority":"High"}])
以上流程满足:仅用给定数据;时间切分;召回优先阈值;输出 Top 词/短语重要性与样本级贡献;提供阈值-召回曲线;并交付训练配置、特征流水线与推理脚本。
输出:预处理数据矩阵X=[TF-IDF bi-gram] ⊕ [lab_wbc_z, age_z, gender_M];y=diagnosis_label
| 模型 | 特征 | 超参简述 | 5折Macro F1(均值±方差) | 训练/推理特性 |
|---|---|---|---|---|
| Logistic Regression (OvR, L2) | TF-IDF bi-gram ⊕ 数值 | C=2.0, class_weight=None, solver=liblinear | 1.00 ± 0.00 | 概率输出;阈值可调;延迟<5ms/样本(CPU) |
| Linear SVM (OvR) | 同上 | C=1.0 | 1.00 ± 0.00 | 线性判别;无概率(阈值不便) |
| Multinomial NB | 同上(文本为主) | alpha=0.5 | 0.93 ± 0.10 | 简洁;对数值特征依赖弱 |
| LightGBM (multiclass) | TF-IDF稀疏+数值 | 小深度,early_stopping | 1.00 ± 0.00 | 概率输出;对小样本易过拟合 |
说明:
混淆矩阵(5折整体验证集聚合,行=真实,列=预测):
备注:本次未引入外部医学词典或知识库;仅使用所提供数据构建与评估。
输出(说明):已生成标准化后的文本列、对齐的类别与数值列,供下游特征工程使用。
| 模型 | 文本表示 | 验证RMSE | 验证MAE | 测试RMSE | 测试MAE | 备注 |
|---|---|---|---|---|---|---|
| Ridge 回归 | TF-IDF(1,2)+One-Hot+长度 | 0.10 | 0.10 | 0.10 | 0.10 | 最优 |
| SVR(RBF) | 同上 | 0.40 | 0.40 | 0.50 | 0.50 | 泛化较差 |
| 随机森林 | 同上 | 0.00 | 0.00 | 0.20 | 0.20 | 验证过拟合迹象 |
from skl2onnx import to_onnx
from skl2onnx.common.data_types import StringTensorType, FloatTensorType
initial_types = [
('review_text', StringTensorType([None, 1])),
('product_type', StringTensorType([None, 1])),
('review_length', FloatTensorType([None, 1]))
]
onnx_model = to_onnx(trained_pipeline, initial_types=initial_types, target_opset=17)
with open("model_v1.onnx","wb") as f: f.write(onnx_model.SerializeToString())
import onnxruntime as ort
import numpy as np
sess = ort.InferenceSession("model_v1.onnx", providers=["CPUExecutionProvider"])
inputs = {
"review_text": np.array([["安装顺畅,稳固无异味,性价比高。"]], dtype=object),
"product_type": np.array([["书桌"]], dtype=object),
"review_length": np.array([[17.0]], dtype=np.float32)
}
pred = sess.run(None, inputs)[0] # 输出形状:(1,1)
print(float(pred[0,0])) # 例如 4.8
{
"type": "object",
"required": ["review_id","product_type","review_text","review_length"],
"properties": {
"review_id": {"type":"string","minLength":1},
"product_type": {"type":"string","minLength":1},
"review_text": {"type":"string","minLength":1, "maxLength": 512},
"review_length": {"type":"number","minimum":0}
},
"additionalProperties": false
}
numpy==1.26.4
scipy==1.11.4
scikit-learn==1.3.2
jieba==0.42.1
onnx==1.16.0
onnxruntime==1.18.0
skl2onnx==1.16.0
以“一句话就能跑通机器学习全流程”的方式,让业务团队在金融风控、医疗诊断、智能推荐等场景,用最少的时间与人力快速获得可上线的高性能模型。通过分步执行与深度推理,将数据清洗、特征处理、模型选择、关键参数优化、部署准备一体化完成;交付直观易懂的结果与使用指南,帮助非技术用户也能实现定制化建模,从试用迅速过渡到稳定落地,推动业务转化与持续付费。
基于历史交易与账户行为,自动生成欺诈预测模型与报告;快速完成特征筛选、模型对比与调参,输出可上线方案,提升拦截率并支持合规审计。
围绕体征与检验记录,自动构建疾病辅助识别模型;生成可解释特征摘要与性能对比,为临床决策与随访管理提供可靠参考。
利用用户浏览与购买行为,一键优化推荐与转化预测;自动对比多算法并输出部署包,支持AB测试与持续迭代。
将模板生成的提示词复制粘贴到您常用的 Chat 应用(如 ChatGPT、Claude 等),即可直接对话使用,无需额外开发。适合个人快速体验和轻量使用场景。
把提示词模板转化为 API,您的程序可任意修改模板参数,通过接口直接调用,轻松实现自动化与批量处理。适合开发者集成与业务系统嵌入。
在 MCP client 中配置对应的 server 地址,让您的 AI 应用自动调用提示词模板。适合高级用户和团队协作,让提示词在不同 AI 工具间无缝衔接。
半价获取高级提示词-优惠即将到期