热门角色不仅是灵感来源,更是你的效率助手。通过精挑细选的角色提示词,你可以快速生成高质量内容、提升创作灵感,并找到最契合你需求的解决方案。让创作更轻松,让价值更直接!
我们根据不同用户需求,持续更新角色库,让你总能找到合适的灵感入口。
本提示词旨在根据用户提供的核心业务变量,自动生成数据挖掘领域的相关矩阵分析。通过结构化输入,引导用户明确分析目标与数据基础,从而输出精准、专业的矩阵关系图及挖掘建议,适用于业务洞察、特征工程、策略制定等场景,提升数据驱动决策的效率与深度。
说明
变量清单
方法选择与注意事项
Python 实现(可直接运行) 依赖: pandas, numpy, scipy, pingouin, statsmodels
num_cols = ['曝光量','点击率','加购率','收藏率','新客比例','优惠券领取数','页面停留时长_秒','首购转化率','客单价','活动UV','付费广告消耗','自然流量UV','访问深度','复购天数','退款率']
df_num = df[num_cols].copy()
def spearman_corr_and_p(dfX): cols = dfX.columns n = len(cols) R = pd.DataFrame(np.eye(n), index=cols, columns=cols) P = pd.DataFrame(np.ones((n,n)), index=cols, columns=cols) for i in range(n): for j in range(i+1, n): r, p = stats.spearmanr(dfX.iloc[:, i], dfX.iloc[:, j], nan_policy='omit') R.iat[i,j] = R.iat[j,i] = r P.iat[i,j] = P.iat[j,i] = p return R, P
R_all, P_all = spearman_corr_and_p(df_num)
mask = np.triu(np.ones_like(P_all, dtype=bool), 1) pvec = P_all.where(mask).stack() rej, p_fdr, _, _ = multipletests(pvec.values, alpha=0.05, method='fdr_bh') P_fdr = P_all.copy() for (i,j), padj in zip(pvec.index, p_fdr): P_fdr.loc[i,j] = padj P_fdr.loc[j,i] = padj
targets = ['首购转化率','客单价'] controls = ['活动UV','付费广告消耗','自然流量UV'] # 可按需增减
def partial_corr_table(dfX, y, controls, method='spearman'): rows = [] for x in dfX.columns: if x == y: continue out = pg.partial_corr(data=dfX, x=x, y=y, covar=controls, method=method) rows.append({'feature': x, 'r_partial': out['r'].iat[0], 'p_partial': out['p-val'].iat[0]}) res = pd.DataFrame(rows).sort_values(by='r_partial', key=lambda s: s.abs(), ascending=False) return res
pcorr_firstbuy = partial_corr_table(df_num, '首购转化率', controls) pcorr_aov = partial_corr_table(df_num, '客单价', controls)
pvec = pd.concat([pcorr_firstbuy['p_partial'], pcorr_aov['p_partial']]) rej, p_fdr, _, _ = multipletests(pvec.values, alpha=0.05, method='fdr_bh') pcorr_firstbuy['p_partial_fdr'] = p_fdr[:len(pcorr_firstbuy)] pcorr_aov['p_partial_fdr'] = p_fdr[len(pcorr_firstbuy):]
group_keys = ['渠道类型_编码','品类_编码','日期']
df_within = df.copy() df_within[num_cols] = df_within.groupby(group_keys)[num_cols].transform(lambda x: x - x.mean()) R_within, P_within = spearman_corr_and_p(df_within[num_cols])
def correlation_ratio(categories, measurements): # η = sqrt(SS_between / SS_total) c = pd.Series(categories).astype('category') y = pd.Series(measurements).astype(float) mask = y.notna() & c.notna() y = y[mask]; c = c[mask] groups = [y[c==cat].values for cat in c.cat.categories] n_total = len(y) if n_total <= 1: return np.nan y_mean = y.mean() ss_between = sum(len(g)*(g.mean()-y_mean)**2 for g in groups if len(g)>0) ss_total = ((y - y_mean)**2).sum() return np.sqrt(ss_between/ss_total) if ss_total>0 else np.nan
cat_codes = ['渠道类型_编码','地区_省份_编码','设备类型_编码'] eta_results = [] for cat in cat_codes: for y in ['首购转化率','客单价']: eta = correlation_ratio(df[cat], df[y]) eta_results.append({'category': cat, 'target': y, 'eta': eta}) eta_df = pd.DataFrame(eta_results).sort_by = ['target','eta']
判读建议(与后续建模衔接)
如需,我可以在你提供一小段样本数据(含列名)后,直接返回数值型相关矩阵与目标变量的偏相关排名表。
以下方案生成并解释一套可用于“新增用户(30天窗口)”的相关矩阵,覆盖混合类型变量,并满足:中位数填充、长尾对数变换、统一量纲、显著性与多重检验。输出包含三部分:
一、变量与类型设定
注意:若分析7日留存,建议同步构造“前7日内的行为特征”版本,避免目标泄漏;30日留存/付费试用开启可用前30日特征。
二、预处理与变换
三、相关矩阵设计与显著性
四、Python实现示例(可直接运行) 说明:将中文列名替换为你的实际列名;df为用户级DataFrame,已限定观察窗口为注册后30天。
import numpy as np
import pandas as pd
from scipy import stats
from sklearn.preprocessing import StandardScaler
import statsmodels.api as sm
# 1) 列表与类型
binary_cols = ['付费试用开启', '7日留存', '30日留存']
count_longtail_cols = [
'首日活跃时长_分钟','首周功能触达数','关键功能使用次数','消息发送数',
'团队成员邀请数','模板使用数','移动端登录次数','充值金额','客服互动次数','Bug暴露数'
]
ratio_score_cols = ['新手引导完成率','版本更新提示点击率','NPS评分']
cat_col = '注册来源'
all_numeric_cols = binary_cols + count_longtail_cols + ratio_score_cols
# 2) 过滤(示例列名,按需替换)
def filter_population(df):
# 假设有这些标志列;如无则跳过
for col in ['is_internal', 'is_staff', 'is_test']:
if col in df.columns:
df = df[df[col] != 1]
return df
# 3) 缺失填充
def median_impute(df, cols):
for c in cols:
if c in df.columns:
df[c] = df[c].fillna(df[c].median())
return df
# 4) 变换
def log1p_transform(df, cols):
for c in cols:
if c in df.columns:
df[c] = np.log1p(df[c].clip(lower=0))
return df
def clip_ratio(df, cols):
for c in cols:
if c in df.columns:
df[c] = df[c].clip(0,1)
return df
def standardize(df, cols):
scaler = StandardScaler()
exist = [c for c in cols if c in df.columns]
df[exist] = scaler.fit_transform(df[exist])
return df
# 5) Spearman相关矩阵 + p值 + FDR
def spearman_corr_with_p(df, cols):
cols = [c for c in cols if c in df.columns]
n = len(cols)
rho = pd.DataFrame(np.nan, index=cols, columns=cols)
pval = pd.DataFrame(np.nan, index=cols, columns=cols)
for i in range(n):
for j in range(i, n):
r, p = stats.spearmanr(df[cols[i]], df[cols[j]], nan_policy='omit')
rho.iat[i,j] = rho.iat[j,i] = r
pval.iat[i,j] = pval.iat[j,i] = p
# FDR校正
flat_p = pval.values[np.triu_indices(n,1)]
order = np.argsort(flat_p)
m = len(flat_p)
q = np.empty(m)
prev = 1.0
for rank, idx in enumerate(order, start=1):
val = flat_p[order[rank-1]] * m / rank
prev = min(prev, val)
q[order[rank-1]] = prev
qmat = np.ones_like(pval.values)
qmat[np.triu_indices(n,1)] = q
qmat = qmat + qmat.T - np.diag(np.diag(qmat))
qval = pd.DataFrame(qmat, index=cols, columns=cols)
return rho, pval, qval
# 6) 注册来源关联:η与Cramér's V
from scipy.stats import chi2_contingency
def correlation_ratio(categories, measurements):
# η = sqrt(SS_between / SS_total)
cat = pd.Categorical(categories)
groups = [measurements[cat == k] for k in cat.categories]
means = [g.mean() for g in groups]
n_tot = sum([len(g) for g in groups])
grand = measurements.mean()
ss_between = sum([len(g) * (m - grand)**2 for g, m in zip(groups, means)])
ss_total = ((measurements - grand)**2).sum()
if ss_total == 0:
return 0.0
return np.sqrt(ss_between / ss_total)
def cramers_v(x, y):
table = pd.crosstab(x, y)
chi2, _, _, _ = chi2_contingency(table, correction=False)
n = table.values.sum()
r, k = table.shape
phi2 = chi2 / n
# Bias correction
phi2c = max(0, phi2 - (k-1)*(r-1)/(n-1))
rc = r - (r-1)**2/(n-1)
kc = k - (k-1)**2/(n-1)
denom = min((kc-1), (rc-1))
if denom <= 0:
return 0.0
return np.sqrt(phi2c / denom)
def source_association(df, source_col, numeric_cols, binary_cols):
res = {}
s = df[source_col]
for c in numeric_cols:
if c in df.columns:
res[c] = correlation_ratio(s, df[c])
for c in binary_cols:
if c in df.columns:
res[c] = cramers_v(s, df[c].astype(int))
return pd.Series(res, name='注册来源关联')
# 7) 偏相关:目标 vs 其他,控制注册来源
import pingouin as pg
def partial_corr_targets(df, target_cols, feature_cols, source_col):
out = []
dummies = pd.get_dummies(df[source_col].astype('category'), prefix=source_col, drop_first=True)
base = pd.concat([df[feature_cols + target_cols], dummies], axis=1).dropna()
covars = list(dummies.columns)
for tgt in target_cols:
for feat in feature_cols:
if feat == tgt:
continue
try:
pc = pg.partial_corr(data=base, x=feat, y=tgt, covar=covars, method='spearman')
out.append({
'target': tgt, 'feature': feat,
'rho_partial': pc['r'].iloc[0],
'p': pc['p-val'].iloc[0]
})
except Exception:
pass
res = pd.DataFrame(out)
# FDR校正
if not res.empty:
res = res.sort_values('p')
m = len(res)
res['q'] = (res['p'].rank(method='first') / m) * res['p']
res['q'] = res['q'].cummin()
return res
# 8) 主流程函数
def build_correlation_outputs(df):
df = filter_population(df).copy()
# 类型保障
if cat_col in df.columns:
df[cat_col] = df[cat_col].astype('category')
# 缺失处理
df = median_impute(df, [c for c in all_numeric_cols if c in df.columns])
# 变换
df = log1p_transform(df, [c for c in count_longtail_cols if c in df.columns])
df = clip_ratio(df, [c for c in ratio_score_cols if c in df.columns])
# 标准化(数值列)
df = standardize(df, [c for c in all_numeric_cols if c in df.columns])
# 矩阵A:Spearman(排除注册来源)
exist_numeric = [c for c in all_numeric_cols if c in df.columns]
rho, p, q = spearman_corr_with_p(df, exist_numeric)
# 矩阵B:注册来源关联
assoc = None
if cat_col in df.columns:
assoc = source_association(
df, cat_col,
numeric_cols=[c for c in exist_numeric if c not in binary_cols],
binary_cols=[c for c in binary_cols if c in df.columns]
)
# 矩阵C:目标-特征偏相关(控制注册来源)
part = None
if cat_col in df.columns:
targets = [c for c in binary_cols if c in df.columns]
features = [c for c in exist_numeric if c not in targets]
part = partial_corr_targets(df, targets, features, cat_col)
return rho, p, q, assoc, part
# 使用示例:
# rho, p, q, assoc, part = build_correlation_outputs(df)
# 其中:
# - rho 为矩阵A(Spearman相关系数)
# - q 为矩阵A的FDR校正p值矩阵
# - assoc 为注册来源与各变量的关联度(矩阵B的向量)
# - part 为偏相关结果明细(矩阵C)
五、结果解读与后续动作
六、交付与可视化
说明
说明
变量与类型假设
数据形态
一、Python:Spearman 与 Pearson 相关矩阵、显著性与簇自助法置信区间
示例代码(Python)
cols = [ "月交易笔数","平均客单价","品类多样性指数","近30天退货笔数","促销敏感度评分","浏览到购转化率", "会员等级_编码","优惠券使用频次","到店频次","线上偏好指数","年龄段_编码","城市等级_编码", "节假日购买占比","跨品类联购数","最近一次购买距今天数","客服咨询次数" ] ordinals = ["会员等级_编码","年龄段_编码","城市等级_编码"] metrics = [c for c in cols if c not in ordinals]
X = df[cols].copy()
rho, pval = spearmanr(X, nan_policy='omit') spearman_corr = pd.DataFrame(rho, index=cols, columns=cols) spearman_p = pd.DataFrame(pval, index=cols, columns=cols)
def pearson_pairwise(df, cols): n = len(cols) R = np.eye(n); P = np.zeros((n,n)) for i in range(n): for j in range(i+1, n): a = df[cols[i]]; b = df[cols[j]] mask = a.notna() & b.notna() if mask.sum() >= 3: r, pv = pearsonr(a[mask], b[mask]) else: r, pv = np.nan, np.nan R[i,j]=R[j,i]=r; P[i,j]=P[j,i]=pv return pd.DataFrame(R, index=cols, columns=cols), pd.DataFrame(P, index=cols, columns=cols)
pearson_corr, pearson_p = pearson_pairwise(X, cols)
spearman_corr.to_csv("corr_spearman.csv", encoding="utf-8-sig") spearman_p.to_csv("corr_spearman_pvalues.csv", encoding="utf-8-sig") pearson_corr.to_csv("corr_pearson.csv", encoding="utf-8-sig") pearson_p.to_csv("corr_pearson_pvalues.csv", encoding="utf-8-sig")
from tqdm import trange
def spearman_block_bootstrap(df, cols, user_col, B=500, ci=0.95, random_state=42): rng = np.random.RandomState(random_state) users = df[user_col].dropna().unique() n = len(cols) boot_store = { (i,j): [] for i in range(n) for j in range(i+1, n) }
for _ in trange(B):
samp_users = rng.choice(users, size=len(users), replace=True)
d = df[df[user_col].isin(samp_users)]
for i in range(n):
for j in range(i+1, n):
a = d[cols[i]]; b = d[cols[j]]
mask = a.notna() & b.notna()
if mask.sum() >= 3:
r, _ = spearmanr(a[mask], b[mask])
else:
r = np.nan
boot_store[(i,j)].append(r)
low = (1-ci)/2; high = 1-low
L = np.full((n,n), np.nan); U = np.full((n,n), np.nan); MED = np.eye(n)
for i in range(n):
for j in range(i+1, n):
arr = np.array([v for v in boot_store[(i,j)] if pd.notna(v)])
if arr.size>0:
L[i,j]=L[j,i]=np.nanpercentile(arr, 100*low)
U[i,j]=U[j,i]=np.nanpercentile(arr, 100*high)
med = np.nanmedian(arr)
MED[i,j]=MED[j,i]=med
return (pd.DataFrame(L, index=cols, columns=cols),
pd.DataFrame(U, index=cols, columns=cols),
pd.DataFrame(MED, index=cols, columns=cols))
L,U,MED = spearman_block_bootstrap(df, cols, user_col="user_id", B=500, ci=0.95, random_state=42) L.to_csv("corr_spearman_ci_low.csv", encoding="utf-8-sig") U.to_csv("corr_spearman_ci_high.csv", encoding="utf-8-sig") MED.to_csv("corr_spearman_boot_median.csv", encoding="utf-8-sig")
二、R:异质相关矩阵(连续-有序-二元混合)
示例代码(R) library(polycor) # hetcor library(dplyr)
cols <- c("月交易笔数","平均客单价","品类多样性指数","近30天退货笔数","促销敏感度评分","浏览到购转化率", "会员等级_编码","优惠券使用频次","到店频次","线上偏好指数","年龄段_编码","城市等级_编码", "节假日购买占比","跨品类联购数","最近一次购买距今天数","客服咨询次数")
ordinals <- c("会员等级_编码","年龄段_编码","城市等级_编码") df2 <- df %>% mutate(across(all_of(ordinals), ~ordered(.)))
hc <- hetcor(df2[, cols], ML=TRUE, std.err=TRUE, use="pairwise.complete.obs") corr_het <- hc$correlations se_het <- hc$std.errors write.csv(corr_het, "corr_heterogeneous.csv", row.names=TRUE, fileEncoding="UTF-8") write.csv(se_het, "corr_heterogeneous_se.csv", row.names=TRUE, fileEncoding="UTF-8")
三、稳健性与显著性控制
四、将相关矩阵用于客户细分的具体操作
五、注意事项
交付物清单(运行上述脚本后)
如提供样本数据(含 user_id、month 与上述16列),我可以直接运行并返回数值相关矩阵与可视化结果。
让 AI 充当你的数据洞察顾问,快速生成清晰易读的相关矩阵,并输出面向业务的可执行建议。帮助市场、产品、运营、风控等团队用更少时间找到关键驱动因素、验证假设、明确优先级,直接用于汇报与落地决策,提升分析效率与转化效果。
快速生成相关矩阵,锁定增长驱动与抑制因子;优化渠道投放,制定A/B优先级与成效预期。
评估埋点与核心指标关系,识别冗余与缺口;沉淀指标体系与模板化报告,加速需求评审。
筛选稳健特征,识别共线与噪声;输出候选变量清单与取舍建议,支持评分卡与策略优化。
将模板生成的提示词复制粘贴到您常用的 Chat 应用(如 ChatGPT、Claude 等),即可直接对话使用,无需额外开发。适合个人快速体验和轻量使用场景。
把提示词模板转化为 API,您的程序可任意修改模板参数,通过接口直接调用,轻松实现自动化与批量处理。适合开发者集成与业务系统嵌入。
在 MCP client 中配置对应的 server 地址,让您的 AI 应用自动调用提示词模板。适合高级用户和团队协作,让提示词在不同 AI 工具间无缝衔接。
半价获取高级提示词-优惠即将到期