模型可视化方法及工具

时间：2023-11-19 理论教育版权反馈

【摘要】：在对各类模型进行可视化时，我们固然可以利用一些方便函数。但有时，为了满足细节上的特定需要，就必须手动提取或计算出一些数值，并据此绘制图表。

模型可视化方法及工具

在对各类模型进行可视化时，我们固然可以利用一些方便函数。但有时，为了满足细节上的特定需要，就必须手动提取或计算出一些数值，并据此绘制图表。

一、回归模型

涉及回归模型的图表，主要有各种基于残差的诊断图、置信区间图、边际效应或交互效应图等。

# install.packages(c("car Data", "ggeffects", "effects", "sj Plot")) #需同时安装ggeffects和effects

library(car Data) # 使用数据Prestige

library(ggeffects) # 计算边际效应或交互效应

library(ggplot2)

library(sj Plot) # 使用作图函数

dat=Prestige # 请查阅?Prestige了解各变量的含义

m=lm(prestige~education+type*income, data=dat)

## 用ggplot绘制原本用plot(m)生成的模型诊断图

r=residuals(m) # 残差

fitv=fitted(m) # 拟合值

rsta=rstandard(m) # 标准化残差

rsta2=sqrt(abs(rsta))

ggplot(mapping=aes(fitv, r))+

geom_point()+

geom_smooth(color="red", fill=NA, method="loess")+

labs(x="Fitted Values", y="Residuals")

ggplot(mapping=aes(sample=rsta))+

geom_qq()+ # 绘制QQ图需同时用geom_qq和geom_qq_line添加点和直线

geom_qq_line()+

labs(x="Theoretical Quantiles", y="Standardized Residuals")

ggplot(mapping=aes(fitv, rsta2))+

geom_point()+

geom_smooth(color="red", fill=NA, method="loess")+

labs(x="Fitted Values", y=parse(text="sqrt(abs(Standardized~Res iduals))"))

## 置信区间图可用sj Plot包中的plot_model绘制

plot_model(m) # 读者亦可尝试用coefficients和confint从模型中提取数值并手动绘制

## 边际效应或交互效应图

mag=ggeffect(m, terms="education", ci.lvl=0.95)

plot(mag) # 直接绘制

mag=ggeffect(m, terms=c("income", "type"))

plot(mag) # 直接绘制

value=as.data.frame(mag) # 提取数值并手动绘制

ggplot(value)+

facet_wrap(vars(group), nrow=1)+

geom_line(aes(x, predicted))+

geom_ribbon(aes(x, ymin=conf.low, ymax=conf.high), alpha=0.2)

二、MCMC模型

我们以基于MCMC的Logistic模型为例讲解相关图表。

# install.packages("MCMCpack")

library(MCMCpack) # 使用MCMClogit

library(car Data) # 使用数据CES11

library(reshape2)

library(dplyr)

dat=CES11 # 请查阅?CES11了解各变量的含义

dat$abortion=ifelse(dat$abortion=="Yes", 1, 0) # 将因变量转化为1和0

m=MCMClogit(abortion~gender+importance+education+urban, data=dat, burnin=200, mcmc=10000, thin=10, seed=1) # 此处使用较小的mcmc值仅供示范

v=as.data.frame(m) # 转化为数据框

names(v)[1]="Intercept"

# 用melt将数据框转为ggplot可用的形式

v2=data.frame(iter=1: nrow(v), v)

v2=melt(v2, id.vars="iter")

## 密度图（图8-3-1）

p1=ggplot(v2)+

facet_wrap(vars(variable), nrow=3, scales="free")+

geom_area(aes(x=value),stat="density", alpha=0.5, fill="#9B1B30", color="black")+

labs(title="MCMC Density Plot")

图8-3-1 MCMC模型密度图

## 轨迹图（图8-3-2）

p2=ggplot(v2)+

facet_wrap(vars(variable), nrow=3, scales="free")+

geom_path(aes(x=iter, y=value), color="#9B1B30")+

labs(title="MCMC Trace Plot")

图8-3-2 MCMC模型轨迹图

## 均值图（图8-3-3）

图8-3-3 MCMC模型均值图(www.xing528.com)

run=group_by(v2, variable)

run=mutate(run, runmean=cummean(value))

p3=ggplot(run)+

facet_wrap(vars(variable), nrow=3, scales="free")+

geom_path(aes(x=iter, y=runmean), color="#9B1B30")+

labs(title="MCMC Running Means Plot")

## HPD图（图8-3-4）

图8-3-4 MCMC模型HPD图

sm=summary(m, quantiles=c(0.0005, 0.9995))

est=cbind(sm$statistics[, 1], sm$quantiles)

est=data.frame(rownames(est), est)

colnames(est)=c("Variable", "Mean", "lower.CI", "upper.CI")

est$Variable=factor(est$Variable, levels=rev(as.character(est$Variable)))

est$color=factor(ifelse(est$Mean<0, -1, 1))

p4=ggplot(est)+

geom_vline(aes(xintercept=0), linetype=3)+

geom_segment(aes(x=lower.CI, xend=upper.CI, y=Variable, yend=Variable, color=color), size=1)+

geom_point(aes(x=Mean, y=Variable, color=color), size=3)+

scale_color_manual(values=c("-1"="#2A4B7C", "1"="#9B1B30"))+

labs(title="HPD")

mytheme=theme_minimal()+

theme(axis.title=element_blank(),

strip.background=element_blank(),

plot.title=element_text(size=20),

strip.text=element_text(size=12)

)

p1+mytheme

p2+mytheme

p3+mytheme

p4+mytheme+theme(axis.text=element_text(size=13), legend.position="none")

三、变量重要性

条形图可用来呈现机器学习模型中的变量重要性，我们以随机森林模型为例进行讲解。

# install.packages(c("random Forest", "Ecdat"))

library(random Forest)

library(Ecdat) # 使用数据Hmda

library(grid Extra)

dat=Hmda # 请查阅?Hmda了解各变量的含义

dat=na.omit(dat)

dat$uria=NULL

dat$condominium=NULL

set.seed(1)

m=random Forest(deny~., data=dat, importance=TRUE) # 务必设置importance= TRUE

## 我们可以用var Imp Plot(m)作图，但亦可手动绘制更美观的图表（图8-3-5）

imp=m$importance[, c("Mean Decrease Accuracy", "Mean Decrease Gini")] #提取重要性指标

# 指标1

imp1=data.frame(Variable=rownames(imp), Value=imp[, "Mean Decrease Accuracy"])

imp1$Variable=reorder(imp1$Variable, imp1$Value) # 根据数值的大小重排因子水平

# 指标2

imp2=data.frame(Variable=rownames(imp), Value=imp[, "Mean Decrease Gini"])

imp2$Variable=reorder(imp2$Variable, imp2$Value)

p1=ggplot(imp1)+

geom_bar(aes(y=Variable, x=Value), stat="identity", fill="#9B1B30", orientation="y")+

geom_text(aes(Value, Variable, label=round(Value, 4)), family="serif", size=5, hjust=-0.1)+

scale_x_continuous(expand=expansion(c(0.02, 0.2)))+

labs(title="Mean Decrease Accuracy")

p2=ggplot(imp2)+

geom_bar(aes(y=Variable, x=Value), stat="identity", fill="#9B1B30", orientation="y")+

geom_text(aes(Value, Variable, label=round(Value, 4)), family="serif", size=5, hjust=-0.1)+

scale_x_continuous(expand=expansion(c(0.02, 0.2)))+

labs(title="Mean Decrease Gini")

mytheme=theme_bw(base_family="serif")+

theme(axis.title=element_blank(), axis.text=element_text (size=16), plot.title=element_text(size=19))

p1=p1+mytheme

p2=p2+mytheme

grid.arrange(p1, p2, nrow=1)

图8-3-5 随机森林变量重要性

免责声明：以上内容源自网络，版权归原作者所有，如有侵犯您的原创版权请告知，我们将尽快删除相关内容。

我要反馈

研究成果

注意事项

解决方案

计算方法

发展趋势

中国古代

社会主义

心理健康

控制系统

建筑工程

经济发展

传统文化

民事诉讼

中国传统

程序设计

轨道交通

解决方法

工程施工

研究结果

建设工程

文史资料

持续发展

计算机网

使用方法

模型可视化方法及工具

相关推荐

模型可视化方法及工具

有关基于ggplot的政经数据可视化的文章

相关推荐