Model Comparison


Educational Tool for Teaching Statistics

Learn the base recipe for model building that subsumes all of the statistical recipes found in the most advanced cookbooks



$$ {\LARGE Data = Model + Error} $$
$$ {\LARGE Y_{i} = Y'_{i} + error} $$

\(Null \, Model : Y_{i}=b_{0}\)

\(Full \, Model : Y'_{i} = b_{0} + b_{1}x_{i1} + ... b_{j}x_{ij}\)

''We know but often forget that the problem of inductive inference has no single solution. There is no uniformly most powerful test, that is, no method that is best for every problem. Statistical theory has provided us with a toolbox with effective instruments, which require judgment about when it is right to use them.'' ''Judgement is part of the art of statistics.'' Gigerenzer (2004), p. 604


Simple Linear Regression

Null Model

Full Model



Inference

Null model T-Statistic

Full model T-Statistic

F-Statistic



show with app
#####PACKAGES##########
library(shiny)
library(ggplot2)
library(calibrate)
library(MASS)
library(shinyBS)
atdata<-data.frame(attitude)
#######SHINY#################
shinyApp(
  ui = fluidPage(
    includeScript("../../../Matomo-tquant.js"),
    withMathJax(),
    fluidRow(
      splitLayout(column(7,
             
             h1("Model Comparison")),
               column(7, img(src = "tquant100.png", align = "right", width="60%"))
             
             
    )),
    tabsetPanel(id = "inTabset",
                tabPanel("Home", value = "Home", 
                         br(),
                         h3("Educational Tool for Teaching Statistics"),
                         
                         fluidRow(
                           column(12,
                                  p(h4("Learn the base recipe for model building that subsumes all of the statistical recipes found in the most advanced cookbooks"), align = "middle")),
                                  
                         br(),
                         br(),
                         column(12,
                         withMathJax("$$ {\\LARGE Data = Model + Error} $$")),
                column(12,
                       withMathJax("$$ {\\LARGE Y_{i} = Y'_{i}  + error} $$")),
                
                column(12,
                       p(h4("\\(Null \\, Model : Y_{i}=b_{0}\\)"),
                         bsButton("q1", label = "", icon = icon("question"),size = "extra-small"),
                         bsTooltip(id = "q1", title = " b0 = Best estimation of data when there is no effect of an IV to be modulated", placement = "right", trigger = "hover"))),
                column(12,
                p(h4("\\(Full \\, Model :  Y'_{i} = b_{0} + b_{1}x_{i1} + ... b_{j}x_{ij}\\)"),
                  bsButton("q2", label = "", icon = icon("question"),size = "extra-small"),
                  bsTooltip(id = "q2", title = "bj*Xij =  Estimate that represent the weight of j IVs for the individual i", placement = "right", trigger = "hover"))),
                column(12,
                       p("''We know but often forget that the problem of inductive inference has no single solution. There is no uniformly most powerful test,
                                             that is, no method that is best for every problem. Statistical theory has provided us with a toolbox with effective instruments,
                                             which require judgment about when it is right to use them.'' ''Judgement is part of the art of statistics.''  Gigerenzer (2004), p. 604"))
                                    
                                  )), #end of tab one
                tabPanel("Model Estimation", value = "E",
                         br(),
                         h4("Simple Linear Regression"),
                         
                         splitLayout(
                            verticalLayout( 
                         radioButtons(inputId = "IV", 
                                      label = "Choose your research question:",
                                      c("Effect of learning"="learning","Effect of complaints"="complaints")),
                         radioButtons(inputId = "model", 
                                      label = "Select the model that you want visualize:",
                                      c("Null"="null","Full"="full","Both"="both"),selected=NULL),
                         radioButtons(inputId = "plot", 
                                      label = "Visualising Error",
                                      c("Scatterplot"="scatter","Histogram"="hist"))),
                         
                        column(6,
                        h3("Null Model"),
                         tableOutput(outputId = "nulltop")),
                         column(6,
                                h3("Full Model"),
                                tableOutput(outputId = "fulltop"))),
                         
                           hr(),
                           splitLayout(
                            fluidRow(  
                             tableOutput('table')),
                             fluidRow(
                               
                               plotOutput(outputId = "plot")))
                           
              
                           
                           
                         ),#end of tab 2
                
                tabPanel("Inference", value = "S",
                         br(),
                         h4("Inference"),
                         splitLayout(
                             verticalLayout(radioButtons(inputId = "IVs", 
                                          label = "Choose your research question:",
                                          c("Effect of learning"="learning","Effect of complaints"="complaints")),
                                          radioButtons(inputId = "plots",
                                                      label = "Choose the statistic you want to generate:",
                                                      c("T-Statistic" = "tstat", "F-Statistic" = "fstat"))),
                            
                            verticalLayout(h4("Null model T-Statistic"),
                        tableOutput('tablets')),
                     
                           verticalLayout(h4("Full model T-Statistic"),
                                          tableOutput('tabletsf')),
                        verticalLayout(h4("F-Statistic"),
                        tableOutput('tablefs')))
                        , #insert table here
                        hr(),
                        splitLayout(
                          plotOutput(outputId = "sdist"),
                                    uiOutput("formula")) #Formula in Latex
                          
                          ###plot and formula
                          
                        )
                      
                        
    
  ),#end of tabs
  
  br(),
  splitLayout(
    fluidRow(
    column(4, img(src = "lisbon.png", align = "left", width="105%")),
    column(4, img(src = "glasgow.jpg", width="70%")),
    column(4, img(src = "padua.png", align = "left",width="85%"))
    ))
  
  ), #end of app
  
  #######SERVER###############
  server = function(input, output) {
    
    
    getIV<- reactive({ switch(input$IV,
                              learning = as.data.frame(atdata$learning),
                              complaints = as.data.frame(atdata$complaints))
      
    }) ## Indipendente variable choices
    getIVs <- reactive( switch(input$IVs,
                               learning = as.data.frame(atdata$learning),
                             complaints = as.data.frame(atdata$complaints)))
    
    
    getmodel <- reactive(switch(input$model,
                                both = "0",
                                null = "2",
                                full = "1")) ## Model choices
    
    getplot <- reactive(switch(input$plot,
                               scatter = "0",
                               hist = "1"))
    
    getplotS <-reactive(switch(input$plots,
                              tstat = "0",
                              fstat = "1"))
    
    Ymodelerror<-reactive(atdata$rating-mean(atdata$rating)) #Residual of y
    Xmodel<- reactive(sum(getIV()/lengths(getIV()))) #Mean of x
    Xmodelerror<-reactive(getIV()-Xmodel()) #Residual of x
    PExy<-reactive(Xmodelerror()*Ymodelerror() )
    Covxy<-reactive(sum(PExy())/lengths(getIV())) #Covariance
    var_X <- reactive(sum(Xmodelerror()^2)/lengths(getIV()))
    b1<-reactive (Covxy()/var_X()) # slope
    b0<-reactive( sum(atdata$rating-getIV()*b1())/length(atdata$rating))#intercept
    prediction<-reactive(b0()+getIV()*b1())
    fullmodelerror<-reactive(atdata$rating -prediction())
    SSEfull<-reactive(sum(fullmodelerror()^2))
    SSEnull<-reactive(sum(Ymodelerror()^2))
    
    
    Ymodelerror<-reactive(atdata$rating-mean(atdata$rating)) #Residual of y
    Xmodel<- reactive(sum(getIVs()/lengths(getIVs()))) #Mean of x
    Xmodelerror<-reactive(getIVs()-Xmodel()) #Residual of x
    PExy<-reactive(Xmodelerror()*Ymodelerror() )
    Covxy<-reactive(sum(PExy())/lengths(getIVs())) #Covariance
    var_X <- reactive(sum(Xmodelerror()^2)/lengths(getIVs()))
    b1<-reactive (Covxy()/var_X()) # slope
    b0<-reactive( sum(atdata$rating-getIV()*b1())/length(atdata$rating))#intercept
    prediction<-reactive(b0()+getIVs()*b1())
    fullmodelerror<-reactive(atdata$rating -prediction())
    SSEfull<-reactive(sum(fullmodelerror()^2))
    SSEnull<-reactive(sum(Ymodelerror()^2))
    SSR<-reactive(sum((prediction()-mean(atdata$rating))^2))
    F_sta<-reactive((SSR()/1)/(SSEnull()/length(atdata$rating)-1)) #F statistic 
    P_v<-reactive(pf(F_sta(),df1=1, df2=length(atdata$rating)-1 )) 
    
    #B0 test statistics
    ssex <- reactive(sum(Xmodelerror()^2))
    se <- reactive(sqrt(SSEfull()/(lengths(getIV())-2))) # standard error of the full model
    seb0 <- reactive(se()*(sqrt
                           (
                           (1/lengths(getIV()))+
                             (Xmodel())^2/ssex() 
                           )
    ))
    b0_t_statistics <- reactive(b0()/seb0()) #B0 t-value
    
    # b1 test statistics
    seb1 <- reactive(se()/sqrt(ssex())) 
    b1_t_statistics <- reactive(b1()/seb1()) #B1 t-value
    
    
    output$nulltop <- renderTable ({ #Upper table for Null model
      
      tablenm <- data.frame(SSEnull(),(mean(atdata$rating)))
      colnames(tablenm) <- (c("SSE", "B0"))
      
      
      tablenm})
    
    output$fulltop <- renderTable ({   #Upper table for Null model
      
      tablefm <- data.frame(SSEfull(),b0(),b1())
      colnames(tablefm) <- (c("SSE", "B0", "B1"))
      
      
      tablefm})
    
    
    output$table <- renderTable({
      if (getmodel()=="2"){
        cc <- data.frame(atdata$rating, getIV(),mean(atdata$rating),Ymodelerror() )##Print table null model
        colnames(cc) <- c("y", "x", "Null Model", "Residuals Null Model")
        cc
      }else if (getmodel()=="1") { 
        cc <- data.frame(atdata$rating, getIV(),prediction(),Xmodelerror()) ##Print table full model
        colnames(cc) <- c("y", "x", "Predictions", "Residuals Full Model")
        cc
      }else { 
        cc <-data.frame(atdata$rating, getIV(),mean(atdata$rating),Ymodelerror(),prediction(),Xmodelerror()) ##Print table of null model and full model
        colnames(cc) <- c("y", "x", "Null Model", "Residuals Null Model" ,"Predictions", "Residuals Full Model")
        cc
        
      }
      
    })
    
    
    
    output$plot <- renderPlot({
      if (getplot()=="0") {
     if (getmodel()=="2"){
        plot(data.frame(getIV() , atdata$rating),textxy(getIV(), atdata$rating, Ymodelerror(), col="red"))
        
        abline(h=mean(atdata$rating),col="blue")
      } else if(getmodel()=="1") { 
        plot(data.frame(getIV() , atdata$rating),textxy(getIV(), atdata$rating, Xmodelerror(), col="red"))
        abline( a=b0(), b=b1(),col="red")
      }  else {
        plot(data.frame(getIV() , atdata$rating),textxy(getIV(), atdata$rating, Xmodelerror(), col="red"))
        abline( a=b0(), b=b1(),h=,col="red")
        abline(h=mean(atdata$rating),col="blue")
      }
      }else {
        
        if (getmodel()=="2"){
          barplot(c(SSEnull(),SSEfull(),SSEnull()-SSEfull()),col=c("red","gray","gray"), names.arg =(c("Null model error","Reduction error","Full model error")) )
        } else if(getmodel()=="1") { 
          barplot(c(SSEnull(),SSEfull(),SSEnull()-SSEfull()),col=c("gray","orange","skyblue"), names.arg =(c("Null model error","Reduction error","Full model error")) )
        }  else {
          barplot(c(SSEnull(),SSEfull(),SSEnull()-SSEfull()),col=c("red","orange","skyblue"), names.arg =(c("Null model error","Reduction error","Full model error")) )
          
        }
      }
    }
    ) #print plot 
    
   output$tablefs <- renderTable ({ #Upper table for F-statiscs
      
    tableff <- data.frame(Covxy(),1,length(atdata$rating) -1,F_sta(),P_v() )
      colnames(tableff) <- (c("CovXY", "df1" ,"df2" ,"F","p_value"))
      
      
      tableff})
    
    output$tablets <- renderTable ({ 
      #Upper table for T-statiscs
      
      tablet <- data.frame(b0(), (length(atdata$rating)-1), b0_t_statistics(), P_v0())
      colnames(tablet) <- (c("B0", "df" ,"t" ,"p"))
      
      
      tablet})
    
    output$tabletsf <-renderTable({
      tabletf<-data.frame(b1(),(length(atdata$rating)-1),b1_t_statistics(), P_v1())
      colnames(tabletf) <- (c("B1","df","t","p"))
    
      tabletf
      })
    
    
    
    
    output$sdist <- renderPlot({ ## T and F distribution plot
      if (getplotS()=="0") {
    
      ##Prints t distribution plot 
      curve(dt(x, df=(lengths(getIV())-1)), from=-5, to=5, main="T-student's distribution")
      abline(v=b0_t_statistics(),col="blue")
      abline(v=b1_t_statistics(),col="red")
      
    }
      else {
        curve(df(x, df1=1, df2=(length(atdata$rating)-1)), from=0, to= F_sta()+2 , main="Fisher's distribution")
        abline(v=F_sta(),col="red" )
        
        
        
      }})
    seb0 <- reactive(se()*(sqrt
                           (
                           (1/lengths(getIVs()))+
                             (Xmodel())^2/ssex()) 
    )
    )
    b0_t_statistics <- reactive(b0()/seb0()) #B0 t-value
    
    # b1 test statistics
    seb1 <- reactive(se()/sqrt(ssex())) 
    b1_t_statistics <- reactive(b1()/seb1()) #B1 t-value
    P_v0<-reactive(1-pt(b0_t_statistics(),df=(length(atdata$rating)-1 ))) # F p-value
    P_v1<-reactive(1-pt(b1_t_statistics(),df=(length(atdata$rating)-1 ))) # F p-value
    
  
      output$formula <- renderUI({
        if (getplotS()== "1") {
          
          withMathJax("$$ {\\huge F\\,=\\, \\frac{\\frac{SSR}{P_{full}-P_{null}}} {\\frac{SSE_{null}}{N-P_{full}}} \\rightarrow 
                      \\frac{\\frac{",round(SSR(),digits=2),"}{1}}{\\frac{",round(SSEnull(),digits=2) ,"}{",length(atdata$rating)-1, "}} = ", round(F_sta(),digits=2) ,"} $$")
      } else {
        
        withMathJax("$$ {\\huge t_{b0}\\,=\\, \\frac{b_{0}} {SE_{b0}} \\rightarrow 
                    \\frac{",round(b0(), digits=2),"}{",round(seb0(), digits=2),"} = ", round(b0_t_statistics() ,digits=2) ,"}\\\\   \\\\
                    {\\huge t_{b1}\\,=\\, \\frac{b_{1}} {SE_{b1}} \\rightarrow 
                    \\frac{",round(b1(), digits=2),"}{",round(seb1(), digits=2),"} = ", round(b1_t_statistics() ,digits=2) ,"}$$")    
      }})
  
    }
)#close app