Model ComparisonEducational Tool for Teaching Statistics
Learn the base recipe for model building that subsumes all of the statistical recipes found in the most advanced cookbooks
$$ {\LARGE Data = Model + Error} $$
$$ {\LARGE Y_{i} = Y'_{i} + error} $$
\(Null \, Model : Y_{i}=b_{0}\)
\(Full \, Model : Y'_{i} = b_{0} + b_{1}x_{i1} + ... b_{j}x_{ij}\)''We know but often forget that the problem of inductive inference has no single solution. There is no uniformly most powerful test, that is, no method that is best for every problem. Statistical theory has provided us with a toolbox with effective instruments, which require judgment about when it is right to use them.'' ''Judgement is part of the art of statistics.'' Gigerenzer (2004), p. 604 Simple Linear RegressionNull ModelFull ModelInferenceNull model T-StatisticFull model T-StatisticF-Statistic |
#####PACKAGES##########
library(shiny)
library(ggplot2)
library(calibrate)
library(MASS)
library(shinyBS)
atdata<-data.frame(attitude)
#######SHINY#################
shinyApp(
ui = fluidPage(
includeScript("../../../Matomo-tquant.js"),
withMathJax(),
fluidRow(
splitLayout(column(7,
h1("Model Comparison")),
column(7, img(src = "tquant100.png", align = "right", width="60%"))
)),
tabsetPanel(id = "inTabset",
tabPanel("Home", value = "Home",
br(),
h3("Educational Tool for Teaching Statistics"),
fluidRow(
column(12,
p(h4("Learn the base recipe for model building that subsumes all of the statistical recipes found in the most advanced cookbooks"), align = "middle")),
br(),
br(),
column(12,
withMathJax("$$ {\\LARGE Data = Model + Error} $$")),
column(12,
withMathJax("$$ {\\LARGE Y_{i} = Y'_{i} + error} $$")),
column(12,
p(h4("\\(Null \\, Model : Y_{i}=b_{0}\\)"),
bsButton("q1", label = "", icon = icon("question"),size = "extra-small"),
bsTooltip(id = "q1", title = " b0 = Best estimation of data when there is no effect of an IV to be modulated", placement = "right", trigger = "hover"))),
column(12,
p(h4("\\(Full \\, Model : Y'_{i} = b_{0} + b_{1}x_{i1} + ... b_{j}x_{ij}\\)"),
bsButton("q2", label = "", icon = icon("question"),size = "extra-small"),
bsTooltip(id = "q2", title = "bj*Xij = Estimate that represent the weight of j IVs for the individual i", placement = "right", trigger = "hover"))),
column(12,
p("''We know but often forget that the problem of inductive inference has no single solution. There is no uniformly most powerful test,
that is, no method that is best for every problem. Statistical theory has provided us with a toolbox with effective instruments,
which require judgment about when it is right to use them.'' ''Judgement is part of the art of statistics.'' Gigerenzer (2004), p. 604"))
)), #end of tab one
tabPanel("Model Estimation", value = "E",
br(),
h4("Simple Linear Regression"),
splitLayout(
verticalLayout(
radioButtons(inputId = "IV",
label = "Choose your research question:",
c("Effect of learning"="learning","Effect of complaints"="complaints")),
radioButtons(inputId = "model",
label = "Select the model that you want visualize:",
c("Null"="null","Full"="full","Both"="both"),selected=NULL),
radioButtons(inputId = "plot",
label = "Visualising Error",
c("Scatterplot"="scatter","Histogram"="hist"))),
column(6,
h3("Null Model"),
tableOutput(outputId = "nulltop")),
column(6,
h3("Full Model"),
tableOutput(outputId = "fulltop"))),
hr(),
splitLayout(
fluidRow(
tableOutput('table')),
fluidRow(
plotOutput(outputId = "plot")))
),#end of tab 2
tabPanel("Inference", value = "S",
br(),
h4("Inference"),
splitLayout(
verticalLayout(radioButtons(inputId = "IVs",
label = "Choose your research question:",
c("Effect of learning"="learning","Effect of complaints"="complaints")),
radioButtons(inputId = "plots",
label = "Choose the statistic you want to generate:",
c("T-Statistic" = "tstat", "F-Statistic" = "fstat"))),
verticalLayout(h4("Null model T-Statistic"),
tableOutput('tablets')),
verticalLayout(h4("Full model T-Statistic"),
tableOutput('tabletsf')),
verticalLayout(h4("F-Statistic"),
tableOutput('tablefs')))
, #insert table here
hr(),
splitLayout(
plotOutput(outputId = "sdist"),
uiOutput("formula")) #Formula in Latex
###plot and formula
)
),#end of tabs
br(),
splitLayout(
fluidRow(
column(4, img(src = "lisbon.png", align = "left", width="105%")),
column(4, img(src = "glasgow.jpg", width="70%")),
column(4, img(src = "padua.png", align = "left",width="85%"))
))
), #end of app
#######SERVER###############
server = function(input, output) {
getIV<- reactive({ switch(input$IV,
learning = as.data.frame(atdata$learning),
complaints = as.data.frame(atdata$complaints))
}) ## Indipendente variable choices
getIVs <- reactive( switch(input$IVs,
learning = as.data.frame(atdata$learning),
complaints = as.data.frame(atdata$complaints)))
getmodel <- reactive(switch(input$model,
both = "0",
null = "2",
full = "1")) ## Model choices
getplot <- reactive(switch(input$plot,
scatter = "0",
hist = "1"))
getplotS <-reactive(switch(input$plots,
tstat = "0",
fstat = "1"))
Ymodelerror<-reactive(atdata$rating-mean(atdata$rating)) #Residual of y
Xmodel<- reactive(sum(getIV()/lengths(getIV()))) #Mean of x
Xmodelerror<-reactive(getIV()-Xmodel()) #Residual of x
PExy<-reactive(Xmodelerror()*Ymodelerror() )
Covxy<-reactive(sum(PExy())/lengths(getIV())) #Covariance
var_X <- reactive(sum(Xmodelerror()^2)/lengths(getIV()))
b1<-reactive (Covxy()/var_X()) # slope
b0<-reactive( sum(atdata$rating-getIV()*b1())/length(atdata$rating))#intercept
prediction<-reactive(b0()+getIV()*b1())
fullmodelerror<-reactive(atdata$rating -prediction())
SSEfull<-reactive(sum(fullmodelerror()^2))
SSEnull<-reactive(sum(Ymodelerror()^2))
Ymodelerror<-reactive(atdata$rating-mean(atdata$rating)) #Residual of y
Xmodel<- reactive(sum(getIVs()/lengths(getIVs()))) #Mean of x
Xmodelerror<-reactive(getIVs()-Xmodel()) #Residual of x
PExy<-reactive(Xmodelerror()*Ymodelerror() )
Covxy<-reactive(sum(PExy())/lengths(getIVs())) #Covariance
var_X <- reactive(sum(Xmodelerror()^2)/lengths(getIVs()))
b1<-reactive (Covxy()/var_X()) # slope
b0<-reactive( sum(atdata$rating-getIV()*b1())/length(atdata$rating))#intercept
prediction<-reactive(b0()+getIVs()*b1())
fullmodelerror<-reactive(atdata$rating -prediction())
SSEfull<-reactive(sum(fullmodelerror()^2))
SSEnull<-reactive(sum(Ymodelerror()^2))
SSR<-reactive(sum((prediction()-mean(atdata$rating))^2))
F_sta<-reactive((SSR()/1)/(SSEnull()/length(atdata$rating)-1)) #F statistic
P_v<-reactive(pf(F_sta(),df1=1, df2=length(atdata$rating)-1 ))
#B0 test statistics
ssex <- reactive(sum(Xmodelerror()^2))
se <- reactive(sqrt(SSEfull()/(lengths(getIV())-2))) # standard error of the full model
seb0 <- reactive(se()*(sqrt
(
(1/lengths(getIV()))+
(Xmodel())^2/ssex()
)
))
b0_t_statistics <- reactive(b0()/seb0()) #B0 t-value
# b1 test statistics
seb1 <- reactive(se()/sqrt(ssex()))
b1_t_statistics <- reactive(b1()/seb1()) #B1 t-value
output$nulltop <- renderTable ({ #Upper table for Null model
tablenm <- data.frame(SSEnull(),(mean(atdata$rating)))
colnames(tablenm) <- (c("SSE", "B0"))
tablenm})
output$fulltop <- renderTable ({ #Upper table for Null model
tablefm <- data.frame(SSEfull(),b0(),b1())
colnames(tablefm) <- (c("SSE", "B0", "B1"))
tablefm})
output$table <- renderTable({
if (getmodel()=="2"){
cc <- data.frame(atdata$rating, getIV(),mean(atdata$rating),Ymodelerror() )##Print table null model
colnames(cc) <- c("y", "x", "Null Model", "Residuals Null Model")
cc
}else if (getmodel()=="1") {
cc <- data.frame(atdata$rating, getIV(),prediction(),Xmodelerror()) ##Print table full model
colnames(cc) <- c("y", "x", "Predictions", "Residuals Full Model")
cc
}else {
cc <-data.frame(atdata$rating, getIV(),mean(atdata$rating),Ymodelerror(),prediction(),Xmodelerror()) ##Print table of null model and full model
colnames(cc) <- c("y", "x", "Null Model", "Residuals Null Model" ,"Predictions", "Residuals Full Model")
cc
}
})
output$plot <- renderPlot({
if (getplot()=="0") {
if (getmodel()=="2"){
plot(data.frame(getIV() , atdata$rating),textxy(getIV(), atdata$rating, Ymodelerror(), col="red"))
abline(h=mean(atdata$rating),col="blue")
} else if(getmodel()=="1") {
plot(data.frame(getIV() , atdata$rating),textxy(getIV(), atdata$rating, Xmodelerror(), col="red"))
abline( a=b0(), b=b1(),col="red")
} else {
plot(data.frame(getIV() , atdata$rating),textxy(getIV(), atdata$rating, Xmodelerror(), col="red"))
abline( a=b0(), b=b1(),h=,col="red")
abline(h=mean(atdata$rating),col="blue")
}
}else {
if (getmodel()=="2"){
barplot(c(SSEnull(),SSEfull(),SSEnull()-SSEfull()),col=c("red","gray","gray"), names.arg =(c("Null model error","Reduction error","Full model error")) )
} else if(getmodel()=="1") {
barplot(c(SSEnull(),SSEfull(),SSEnull()-SSEfull()),col=c("gray","orange","skyblue"), names.arg =(c("Null model error","Reduction error","Full model error")) )
} else {
barplot(c(SSEnull(),SSEfull(),SSEnull()-SSEfull()),col=c("red","orange","skyblue"), names.arg =(c("Null model error","Reduction error","Full model error")) )
}
}
}
) #print plot
output$tablefs <- renderTable ({ #Upper table for F-statiscs
tableff <- data.frame(Covxy(),1,length(atdata$rating) -1,F_sta(),P_v() )
colnames(tableff) <- (c("CovXY", "df1" ,"df2" ,"F","p_value"))
tableff})
output$tablets <- renderTable ({
#Upper table for T-statiscs
tablet <- data.frame(b0(), (length(atdata$rating)-1), b0_t_statistics(), P_v0())
colnames(tablet) <- (c("B0", "df" ,"t" ,"p"))
tablet})
output$tabletsf <-renderTable({
tabletf<-data.frame(b1(),(length(atdata$rating)-1),b1_t_statistics(), P_v1())
colnames(tabletf) <- (c("B1","df","t","p"))
tabletf
})
output$sdist <- renderPlot({ ## T and F distribution plot
if (getplotS()=="0") {
##Prints t distribution plot
curve(dt(x, df=(lengths(getIV())-1)), from=-5, to=5, main="T-student's distribution")
abline(v=b0_t_statistics(),col="blue")
abline(v=b1_t_statistics(),col="red")
}
else {
curve(df(x, df1=1, df2=(length(atdata$rating)-1)), from=0, to= F_sta()+2 , main="Fisher's distribution")
abline(v=F_sta(),col="red" )
}})
seb0 <- reactive(se()*(sqrt
(
(1/lengths(getIVs()))+
(Xmodel())^2/ssex())
)
)
b0_t_statistics <- reactive(b0()/seb0()) #B0 t-value
# b1 test statistics
seb1 <- reactive(se()/sqrt(ssex()))
b1_t_statistics <- reactive(b1()/seb1()) #B1 t-value
P_v0<-reactive(1-pt(b0_t_statistics(),df=(length(atdata$rating)-1 ))) # F p-value
P_v1<-reactive(1-pt(b1_t_statistics(),df=(length(atdata$rating)-1 ))) # F p-value
output$formula <- renderUI({
if (getplotS()== "1") {
withMathJax("$$ {\\huge F\\,=\\, \\frac{\\frac{SSR}{P_{full}-P_{null}}} {\\frac{SSE_{null}}{N-P_{full}}} \\rightarrow
\\frac{\\frac{",round(SSR(),digits=2),"}{1}}{\\frac{",round(SSEnull(),digits=2) ,"}{",length(atdata$rating)-1, "}} = ", round(F_sta(),digits=2) ,"} $$")
} else {
withMathJax("$$ {\\huge t_{b0}\\,=\\, \\frac{b_{0}} {SE_{b0}} \\rightarrow
\\frac{",round(b0(), digits=2),"}{",round(seb0(), digits=2),"} = ", round(b0_t_statistics() ,digits=2) ,"}\\\\ \\\\
{\\huge t_{b1}\\,=\\, \\frac{b_{1}} {SE_{b1}} \\rightarrow
\\frac{",round(b1(), digits=2),"}{",round(seb1(), digits=2),"} = ", round(b1_t_statistics() ,digits=2) ,"}$$")
}})
}
)#close app