#####PACKAGES##########
library(shiny)
library(ggplot2)
library(calibrate)
library(MASS)
library(shinyBS)
library(broom)
library(DT)
library(shinydashboard)
#######SERVER###############
server = function(input, output, session) {
######## GENERATE DATA ########
## Set a (lot of) seed(s) to obtain the same data everytime
set.seed(1234)
y <- round(rnorm(30, 270, 48)) # marathon performance
id <- 1:30
## Set while loops to find out which seed to use in order to obtain the
## necessary data for our pre-defined correlation values
# i <- 0 # number used for the seed
# rho <- 0 # correlation
# while(rho != -0.75){
#
# i <- i+1
# set.seed(i)
# rho <- round(cor(y, rnorm(30, 45, 5)),2)
#
# }
# i # shows which seed achieved a correlation of -0.75
set.seed(220851)
x1 <- round(rnorm(30, 45, 5),2) # hours of training
# i <- 0 # number used for the seed
# rho <- 0 # correlation
# while( rho != 0.07){
#
# i <- i+1
# set.seed(i)
# rho <- round(cor(y, rnorm(30, 180, 10)), 2)
#
# }
# i
set.seed(5)
x2 <- round(rnorm(30, 180, 10), 2) # body height
#i <- 0 # number used for the seed
#rho <- 0 # correlation
#while( rho != .42){
#
# i <- i+1
# set.seed(i)
# rho <- round(cor(y, rnorm(30, 22, 2)),2)
#
#}
#i
set.seed(1589)
x3 <- round(rnorm(30, 22, 2), 2) # bmi
# Now, bind them in one dataframe
df <- data.frame(cbind(id, y, x1, x2, x3))
# create a fourth variable to use in the ANOVA vs regression
ethnicity <- rep(c("European", "American", "Asian"), each=10) # first create the vector
df <- df [order (df$y),]
df$x4 <- ethnicity # sort it by response and add the ethnicity vector so we can guarantee
# an effect
df <- df [order (df$id),] # for aesthetic purposes sort the dataframe back by id
# Now we define some labels to our variables
idlabel <- "Participant"
ylabel <- "Finishing time [mins]"
x1label <- "Hours training"
x2label <- "Height"
x3label <- "Body Mass Index"
x4label <- "Ethnicity"
######## GET USER CHOICES ########
rv <- reactiveValues(getmodel = 1)
observeEvent(input$null, {rv$getmodel <- 1 })
observeEvent(input$full, {rv$getmodel <- 2 })
observeEvent(input$sse, {rv$getmodel <- 3 })
getplot <- reactive(switch(input$plot_type_null,
scatter = "0",
hist = "1"))
getplotprop1 <- reactive(switch(input$plot_type_prop1,
scatter = "0",
hist = "1"))
getplotprop2 <- reactive(switch(input$plot_type_prop2,
scatter = "0",
hist = "1"))
getplotprop3 <- reactive(switch(input$plot_type_prop3,
scatter = "0",
hist = "1"))
getplotS <-reactive(switch(input$plots,
tstat = "0",
fstat = "1"))
######## PREPARING ALL VARIABLES THAT WILL BE CALLED IN THE SERVER ########
#### NULL AND X MODELS ####
## x model
xmod <- lm(df$y ~ df$x1)
xmod.2 <- lm(df$y ~ df$x2)
xmod.3 <- lm(df$y ~ df$x3)
##get the summary outputs into a data.frame
xmodel <- tidy(xmod)
xmodel.2 <- tidy(xmod.2)
xmodel.3 <- tidy(xmod.3)
colnames(xmodel) <- (c("Term", "Estimate", "Std. Error", "t value", "p-value"))
rownames(xmodel) <- (c("Intercept (b0)", "Slope (b1)"))
colnames(xmodel.2) <- (c("Term", "Estimate", "Std. Error", "t value", "p-value"))
rownames(xmodel.2) <- (c("Intercept (b0)", "Slope (b1)"))
colnames(xmodel.3) <- (c("Term", "Estimate", "Std. Error", "t value", "p-value"))
rownames(xmodel.3) <- (c("Intercept (b0)", "Slope (b1)"))
# null model - mean of y (our dv)
## error or residuals for the null model
Ymodelerror <- round(df$y - mean(df$y), 2)
#### PARAMETERS ESTIMATION ####
#b1, which is also our slope
b1 <- cor(df$x1, df$y) * (sd(df$y) / sd(df$x1))
b1.2 <- cor(df$x2, df$y) * (sd(df$y) / sd(df$x2))
b1.3 <- cor(df$x3, df$y) * (sd(df$y) / sd(df$x3))
#or we can just extract it from the data.frame that we created which contains the lm model output
## b1.1 <- xmodel.1[2,2]
#and b0, our intercept
b0 <- mean(df$y) - b1 * mean(df$x1)
b0.2 <- mean(df$y) - b1.2 * mean(df$x2)
b0.3 <- mean(df$y) - b1.3 * mean(df$x3)
#similarly here we can just extract it b0.1 <- xmodel.1[1,2]
#### PROPOSED MODEL ERROR ####
## so, now we can calculate both the prediction and, hence, the proposed model error:
prediction <- round(b0 + df$x1*b1, 2)
fullmodelerror <- round(residuals(xmod), 2)
prediction.2 <- round(b0.2 + df$x2*b1.2, 2)
fullmodelerror.2 <- round(residuals(xmod.2), 2)
prediction.3 <- round(b0.3 + df$x3*b1.3, 2)
fullmodelerror.3 <- round(residuals(xmod.3), 2)
## sum of squared errors for all models, null, x, and proposed:
SSEnull <- sum(Ymodelerror^2)
#ssex <- sum(Xmodelerror^2)
SSEfull <- sum(fullmodelerror^2)
SSEfull.2 <- sum(fullmodelerror.2^2)
SSEfull.3 <- sum(fullmodelerror.3^2)
## sum of squared residuals:
SSR <- sum(prediction - mean(df$y))^2
SSR.2 <- sum(prediction.2 - mean(df$y))^2
SSR.3 <- sum(prediction.3 - mean(df$y))^2
SS <- c(SSEnull, SSEfull, SSEnull - SSEfull)
SS.2 <- c(SSEnull, SSEfull.2, SSEnull - SSEfull.2)
SS.3 <- c(SSEnull, SSEfull.3, SSEnull - SSEfull.3)
#### DATA ANALYSIS ####
#here, we are calculating both the t tests for our parameters and the f statistics for the proposed model
#these will be needed for the statistical inference tab
##B0 t-test statistics
#we need the standard error of the full model
se <- sigma(xmod)
se.2 <- sigma(xmod.2)
se.3 <- sigma(xmod.3)
#the standard error of b0
seb0 <- xmodel[1, 3]
seb0.2 <- xmodel.2[1, 3]
seb0.3 <- xmodel.3[1, 3]
### to avoid any mistakes of using manual calculations, all of the values are obtained from the lm output
#the t-value!
b0_t_statistics <- xmodel[1, 4]
b0_t_statistics.2 <- xmodel.2[1, 4]
b0_t_statistics.3 <- xmodel.3[1, 4]
#and p value!
P_v0 <- xmodel[1, 5]
P_v0.2 <- xmodel.2[1, 5]
P_v0.3 <- xmodel.3[1, 5]
##B1 t-test statistics
#we need the standard error of the x model
seb1 <- xmodel[2, 3]
seb1.2 <- xmodel.2[2, 3]
seb1.3 <- xmodel.3[2, 3]
#so we can calculate the t-value for b1:
b1_t_statistics <- xmodel[2, 4]
b1_t_statistics.2 <- xmodel.2[2, 4]
b1_t_statistics.3 <- xmodel.3[2, 4]
#and, now, its p-value:
P_v1 <- xmodel[2, 5]
P_v1.2 <- xmodel.2[2, 5]
P_v1.3 <- xmodel.3[2, 5]
## F STATISTICS FOR THE PROPOSED MODEL
# F STATISTICS
F_sta <- anova(xmod)[[4]][1]
F_sta.2 <- anova(xmod.2)[[4]][1]
F_sta.3 <- anova(xmod.3)[[4]][1]
# P VALUE
P_v <- anova(xmod)[[5]][1]
P_v.2 <- anova(xmod.2)[[5]][1]
P_v.3 <- anova(xmod.3)[[5]][1]
# DFs
dfF1 <- anova(xmod)[[1]][1]
dfF1 <- anova(xmod)[[1]][1]
dfF1 <- anova(xmod)[[1]][1]
dfF2 <- anova(xmod)[[1]][2]
dfF2.2 <- anova(xmod.2)[[1]][2]
dfF2.3 <- anova(xmod.3)[[1]][2]
dft <- length(df$x1) - 1
dft.2 <- length(df$x2) - 1
dft.3 <- length(df$x3) - 1
## covariance
Covxy <- cov(df$x1, df$y)
Covxy.2 <- cov(df$x2, df$y)
Covxy.3 <- cov(df$x3, df$y)
### creating a data.frame that contains the mean of the data to plot error for the null
my <- mean(df$y)
meany <- rep(my, 30)
###########################
###### TABLE HEADERS ######
###########################
nullmodlabel <- HTML('<a data-toggle="popover" data-trigger="focus" class="bootpop" title="Null model predictions">Null Y\'<span style="position: relative; top: 0.3em; font-size: 0.8em;">i</span></a>')
nullreslabel <- HTML('<a data-toggle="popover" data-trigger="focus" class="bootpop" title="Null model errors">Null e\'<span style="position: relative; top: 0.3em; font-size: 0.8em;">i</span></a>')
fullmodlabel <- HTML('<a data-toggle="popover" data-trigger="focus" class="bootpop" title="Proposed model predictions">Proposed Y\'<span style="position: relative; top: 0.3em; font-size: 0.8em;">i</span></a>')
fullreslabel <- HTML('<a data-toggle="popover" data-trigger="focus" class="bootpop" title="Proposed model errors">Proposed e\'<span style="position: relative; top: 0.3em; font-size: 0.8em;">i</span></a>')
#########################
### Next page buttons ###
#########################
observeEvent(input$Next1, {
updateTabItems(session, "tabs", "nullmodel")
})
observeEvent(input$Next2, {
updateTabItems(session, "tabs", "propmodel")
})
observeEvent(input$Next3, {
updateTabItems(session, "tabs", "comparison")
})
observeEvent(input$Next4, {
updateTabItems(session, "tabs", "inference")
})
###################################
#### INTRODUCTION page outputs ####
###################################
output$tableData <- shiny::renderDataTable({
cc <- data.frame(df$id, df$y, df$x1, df$x2, df$x3 )
colnames(cc) <- c(idlabel, ylabel, x1label, x2label, x3label)
cc
}, options = list(searching = FALSE,
pageLength = 10,
lengthChange = FALSE
)
, escape = FALSE)
#################################
#### Null model page outputs ####
#################################
output$tableNull <- shiny::renderDataTable({
aa <-data.frame(df$id, df$y, round(mean(df$y), 2), Ymodelerror)
colnames(aa) <- c(idlabel, ylabel, nullmodlabel, nullreslabel)
aa
},
options = list(searching = FALSE,
pageLength = 10,
lengthChange = FALSE
), escape = FALSE)
output$plotNull <- renderPlot({
#####################
#### SCATTERPLOT ####
#####################
if (getplot()==0) {
plot(data.frame(df$id , df$y), xlab = idlabel, ylab = ylabel, pch= 16, ylim=c(150, 400), xlim=c(1, 30),
# make Tom and Jerry's points bigger and colorful
col = ifelse(df$y == 249, "green3" , ifelse(df$y == 157, "red", "black")),
cex = ifelse(df$y == 249 | df$y < 170, 1.5 , 1)
)
abline(h = mean(df$y), col="#337ab7")
points(meany~df$id, bg = '#337ab7', pch= 19,
col = ifelse(df$y == 249, "green3" , ifelse(df$y == 157, "red", "#337ab7")),
cex = ifelse(df$y == 249 | df$y == 157, 1.5 , 1))
segments(df$id, df$y, df$id, meany,
col = ifelse(df$y == 249, "green3" , ifelse(df$y == 157, "red", "black")))
text(23, 230, "Tom :)", col = "green3")
text(6, 200, "Jerry :(", col = "red")
}
###################
#### HISTOGRAM ####
###################
else {
barplot(SSEnull,col="red", names.arg =("Null model error"), ylab = "SSE", xlim = c(0, 4), ylim=c(0, max(SSEnull)))
legend("topright",
inset=.05,
cex = 1,
title="SSE",
paste(round(SSEnull,2)),
horiz=FALSE,
lty=c(1,1),
lwd=c(2,2),
col=c("red"))
}
})
###########################################################
#############Scatterplot and SSE captions##################
###########################################################
output$Cap <- renderText({
#### If user selected only data ####
if (getplot()==0) {
paste("This plot shows the finishing time for each particpant with an addition of the line that represents the null model.The black lines from each data point to the line represent the residual error for each point if the null model is fitted.", sep = "")
#### If user selected the histogram ####
} else {
paste("This histogram shows the Sum Squared Errors (SSE) of the null model", sep = "")
}
})
############################################################
#### Proposed model + Alternative proposed page outputs ####
############################################################
output$tableProposed <- shiny::renderDataTable({
aa <-data.frame(df$id, df$y, df$x1,prediction,fullmodelerror)
colnames(aa) <- c(idlabel, ylabel, x1label, fullmodlabel, fullreslabel)
aa
},
options = list(searching = FALSE,
pageLength = 10,
lengthChange = FALSE
), escape = FALSE)
output$tableProposed.2 <- shiny::renderDataTable({
aa <-data.frame(df$id, df$y, df$x2,prediction.2,fullmodelerror.2)
colnames(aa) <- c(idlabel, ylabel, x2label, fullmodlabel, fullreslabel)
aa
},
options = list(searching = FALSE,
pageLength = 10,
lengthChange = FALSE
), escape = FALSE)
output$tableProposed.3 <- shiny::renderDataTable({
aa <-data.frame(df$id, df$y, df$x3,prediction.3,fullmodelerror.3)
colnames(aa) <- c(idlabel, ylabel, x3label, fullmodlabel, fullreslabel)
aa
},
options = list(searching = FALSE,
pageLength = 10,
lengthChange = FALSE
), escape = FALSE)
output$plotProposed <- renderPlot({
#####################
#### SCATTERPLOT ####
#####################
if (getplotprop1()==0) {
plot(data.frame(df$x1 , df$y), xlab = x1label, ylab = ylabel, pch= 16, ylim=c(150, 400), xlim=c(30, 60),
col = ifelse(df$y == 249, "green3" , ifelse(df$y == 157, "orange", "black")),
cex = ifelse(df$y == 249 | df$y < 170, 1.5 , 1))
abline( a=b0, b=b1,h=,col="#FA8072")
text(10, 46, paste('Intercept (b0) =', round(xmodel[1,2],2)), pos = 4)
text(10, 41, paste('Slope (b1) =', round(xmodel[2,2],2)), pos = 4)
points(prediction ~ df$x1, bg = '#FA8072', pch= 19,
col = ifelse(df$y == 249, "green3" , ifelse(df$y == 157, "orange", "#FA8072")),
cex = ifelse(df$y == 249 | df$y == 157, 1.5 , 1))
segments(df$x1, df$y, df$x1, prediction,
col = ifelse(df$y == 249, "green3" , ifelse(df$y == 157, "orange", "black")))
text(46, 200, "Tom", col = "green3")
text(56, 170, "Jerry", col = "orange")
}
###################
#### HISTOGRAM ####
###################
else {
barplot(SSEfull,col="blue", names.arg =("Proposed model error"), ylab = "SSE", xlim = c(0, 4), ylim=c(0, max(SSEnull)))
legend("topright",
inset=.05,
cex = 1,
title="SSE",
legend = round(SSEfull,2),
horiz=FALSE,
lty=c(1,1),
lwd=c(2,2),
col="blue")
}
})
############################################
#########PLOT CAPTIONS######################
############################################
output$Cap.P1 <- renderText({
if (getplotprop1()==0) {
paste("This plot shows the finishing time for each particpant with an addition of the line of best fit with 'Hours of training' as a predictor variable.The black lines from each data point to the line represent the residual error for each point if this model is fitted.", sep = "")
} else {
paste("This histogram shows the Sum Squared Errors (SSE) when the model with 'Hours of Training' as a predictor is fitted.", sep = "")
}
})
output$plotProposed.2 <- renderPlot({
#####################
#### SCATTERPLOT ####
#####################
if (getplotprop2()==0) {
plot(data.frame(df$x2 , df$y), xlab = x2label, ylab = ylabel, pch= 16, ylim=c(150, 400), xlim=c(150, 200),
col = ifelse(df$y == 249, "green3" , ifelse(df$y == 157, "red", "black")),
cex = ifelse(df$y == 249 | df$y < 170, 1.5 , 1))
abline( a=b0.2, b=b1.2,h=,col="lightgrey")
text(10, 46, paste('Intercept (b0) =', round(xmodel.2[1,2],2)), pos = 4)
text(10, 41, paste('Slope (b1) =', round(xmodel.2[2,2],2)), pos = 4)
points(prediction.2 ~ df$x2, bg = '#FA8072', pch= 19,
col = ifelse(df$y == 249, "green3" , ifelse(df$y == 157, "red", "lightgrey")),
cex = ifelse(df$y == 249 | df$y == 157, 1.5 , 1))
segments(df$x2, df$y, df$x2, prediction.2,
col = ifelse(df$y == 249, "green3" , ifelse(df$y == 157, "red", "black")))
text(195, 200, "Tom", col = "green3")
text(177, 175, "Jerry", col = "red")
}
###################
#### HISTOGRAM ####
###################
else {
barplot(SSEfull.2,col="blue", names.arg = paste(x2label,"model error"), ylab = "SSE", xlim = c(0, 4), ylim=c(0, max(SSEnull)))
legend("topright",
inset=.05,
cex = 1,
title="SSE",
legend = round(SSEfull.2,2),
horiz=FALSE,
lty=c(1,1),
lwd=c(2,2),
col="blue")
}
})
############################################
#########PLOT CAPTIONS######################
############################################
output$Cap.P2 <- renderText({
if (getplotprop2()==0) {
paste("This plot shows the finishing time for each particpant with an addition of the line of best fit for the model with Height as a predictor.
It appears that fitting the Height as a predictor does not greatly improve the error in prediction as compared to the null model(the line is fairly flat).
The black lines from each data point to the line of best fit shows that the error is fairly large if this model is fitted", sep = "")
#### If user selected the histogram ####
} else {
paste("This histogram shows the Sum Squared Errors (SSE) when the model with 'Height' as a predictor is fitted.", sep = "")
}
})
output$plotProposed.3 <- renderPlot({
#####################
#### SCATTERPLOT ####
#####################
if (getplotprop3()==0) {
plot(data.frame(df$x3 , df$y), xlab = x3label, ylab = ylabel, pch= 16, ylim=c(150, 400), xlim=c(15, 28),
col = ifelse(df$y == 249, "orange" , ifelse(df$y == 157, "darkorange2", "black")),
cex = ifelse(df$y == 249 | df$y < 170, 1.5 , 1))
abline( a=b0.3, b=b1.3,h=,col="lightgrey")
text(10, 46, paste('Intercept (b0) =', round(xmodel.3[1,2],2)), pos = 4)
text(10, 41, paste('Slope (b1) =', round(xmodel.3[2,2],2)), pos = 4)
points(prediction.3 ~ df$x3, bg = '#FA8072', pch= 19,
col = ifelse(df$y == 249, "orange" , ifelse(df$y == 157, "darkorange2", "lightgrey")),
cex = ifelse(df$y == 249 | df$y == 157, 1.5 , 1))
segments(df$x3, df$y, df$x3, prediction.3,
col = ifelse(df$y == 249, "orange" , ifelse(df$y == 157, "darkorange2", "black")))
text(24.5, 225, "Tom", col = "orange")
text(18.4, 180, "Jerry", col = "darkorange2")
}
###################
#### HISTOGRAM ####
###################
else {
barplot(SSEfull.3,col="blue", names.arg = paste(x3label,"model error"), ylab = "SSE", xlim = c(0,4), ylim=c(0, max(SSEnull)))
legend("topright",
inset=.05,
cex = 1,
title="SSE",
legend = round(SSEfull.3,2),
horiz=FALSE,
lty=c(1,1),
lwd=c(2,2),
col="blue")
}
})
output$Cap.P3 <- renderText({
#### If user selected only data ####
if (getplotprop3()==0) {
paste("This plot shows the finishing time for each particpant with an addition of the line of best fit for the model with BMI as a predictor variable.
It appears that fitting the BMI as a predictor reduces the residual error more than Height.", sep = "")
#### If user selected the histogram ####
} else {
paste("This histogram shows the Sum Squared Errors (SSE) when the model with 'BMI' as a predictor is fitted.", sep = "")
}
})
##################################
#### Model Comparison outputs ####
##################################
output$tableComparison <- shiny::renderDataTable({
aa <-data.frame(df$id, df$y, df$x1, round(mean(df$y), 2), Ymodelerror, prediction,fullmodelerror)
colnames(aa) <- c(idlabel, ylabel, x1label, nullmodlabel, nullreslabel, fullmodlabel, fullreslabel)
aa
},
options = list(searching = FALSE,
pageLength = 10,
lengthChange = FALSE
), escape = FALSE)
output$plotComparison <- renderPlot({
#####################
#### SCATTERPLOT ####
#####################
if (rv$getmodel==1) {
plot(data.frame(df$x1 , df$y), xlab = x1label, ylab = ylabel, pch= 16, ylim=c(150, 400), xlim=c(30, 60))
abline( a=b0, b=b1,h=,col="#FA8072")
abline(h=mean(df$y),col="#337ab7")
#text(50, 380, paste('Intercept (b0) =', round(xmodel[1,2],2)), pos = 4)
#text(50, 360, paste('Slope (b1) =', round(xmodel[2,2],2)), pos = 4)
points(meany ~ df$x1, col ='#337ab7', bg = '#337ab7', pch= 19)
segments(df$x1, df$y, df$x1, meany)
}
#full#
else if (rv$getmodel==2){
plot(data.frame(df$x1 , df$y), xlab = x1label, ylab = ylabel, pch= 16, ylim=c(150, 400), xlim=c(30, 60))
abline( a=b0, b=b1,h=,col="#FA8072")
abline(h=mean(df$y),col="#337ab7")
#text(50, 380, paste('Intercept (b0) =', round(xmodel[1,2],2)), pos = 4)
#text(50, 360, paste('Slope (b1) =', round(xmodel[2,2],2)), pos = 4)
points(prediction ~ df$x1, col ='#FA8072', bg = '#FA8072', pch= 19)
segments(df$x1, df$y, df$x1, prediction)
}
###################
#### HISTOGRAM ####
###################
else if (rv$getmodel==3) {
diff = SSEnull-SSEfull
t2 = matrix(c(SSEfull,diff))
tx <- barplot(SSEnull,width= 1.5,space = 0, col=c("#337ab7"), ylab = "SSE",beside=FALSE,xlim=c(0,8), names.arg =(c("Null model \n error")), ylim=c(0, max(SSEnull)))
txx <-barplot(t2,width=1.5,space = 1.5 ,col=c("#FA8072","grey"),ylab = "SSE", beside = FALSE, add = TRUE,xlim=c(0,8), names.arg =(c("Proposed model \n error")), ylim=c(0, max(SSEnull)))
text(x = txx,y = 40000, "error \n reduction")
legend("topright",
inset=.05,
cex = 1,
title="SSE",
legend = round(c(SSEnull, SSEfull, diff),2),
horiz=FALSE,
lty=c(1,1),
lwd=c(2,2),
col=c("#337ab7","#FA8072","grey"))
}
})
output$Cap.Com <- renderText({
#### null ####
if (rv$getmodel==1) {
paste("This plot shows the finishing time for each participant with an addition of the line of best fit for the null model. Click to see how the residual error is reduced when a predictor is added to the model", sep = "")
}
#### proposed ####
else if (rv$getmodel==2){
paste("This plot shows the finishing time for each participant with an addition of the line of best fit when a model with 'Hours of Training' is fitted. It is clear that additon of the predictor reduces the error", sep = "")
}
#### hist ####
else if (rv$getmodel==3){
paste("This histogram shows the Sum Squared Errors (SSE) of the null model and the model with 'Hours of Training'as a predictor. It also shows how additon of the predicotr reduces the SSE.", sep = "")
}
})
################################
#### Inference page outputs ####
################################
#### Let's create a function to make some buttons
shinyInput <- function(FUN, len, id, ...) {
inputs <- character(len)
for (i in seq_len(len)) {
inputs[i] <- as.character(FUN(paste0(id, i), ...))
}
inputs
}
#### Table headers ####
Term <- HTML('<a data-toggle="popover" data-trigger="focus" class="bootpop" title="Our parameters: b0 and b1"<center>Term</center>')
Estimate <- HTML('<a data-toggle="popover" data-trigger="focus" class="bootpop" title="Estimation for each parameter"<center>Estimate</center>')
S_Error <- HTML('<a data-toggle="popover" data-trigger="focus" class="bootpop" title="Standard error for each estimated parameter"<center>Std. Error</center>')
T_Value <- HTML('<a data-toggle="popover" data-trigger="focus" class="bootpop" title="Test statistics (ratio between systematic and unsystematic variance)"<center>t value</center>')
P_Value <- HTML('<a data-toggle="popover" data-trigger="focus" class="bootpop" title="Probability of obtaining an equal or more extreme t value if H0 is true."<center>p-value</center>')
#### Then mannually create the content of the table so we can format every element
labelsc <- c(Term, Estimate, S_Error, T_Value, P_Value)
if (xmodel[1,5] > 0.0005) {
pvalueb1 <- sprintf("%.2f",round(xmodel[1,5], 3))
} else {
pvalueb1 <- "< 0.001"
}
if (xmodel[2,5] > 0.001) {
pvalueb2 <- sprintf("%.2f",round(xmodel[2,5], 3))
} else {
pvalueb2 <- "< 0.001"
}
row1 <- c(shinyInput(actionButton, 1, 'button_1', label = "Intercept", onclick = 'Shiny.onInputChange(\"select_button\", this.id)'),
sprintf("%.2f",round(xmodel[1,2],2)),
sprintf("%.2f",round(xmodel[1,3],2)),
sprintf("%.2f",round(xmodel[1,4],2)),
pvalueb1
)
row2 <- c(shinyInput(actionButton, 1, 'button_2', label = x1label, onclick = 'Shiny.onInputChange(\"select_button\", this.id)' ),
sprintf("%.2f",round(xmodel[2,2],2)),
sprintf("%.2f",round(xmodel[2,3],2)),
sprintf("%.2f",round(xmodel[2,4],2)),
pvalueb2
)
tablecof <- rbind(row1, row2)
colnames(tablecof) <- labelsc
rownames(tablecof) <- NULL
## Then the button ##
buttonRV <- reactiveValues(RV = NULL)
observeEvent(input$showTstat, {
buttonRV$RV <- TRUE
})
# Let's compile the introductory text for the Coeff table
output$introCoeff <- renderUI({
if (is.null(buttonRV$RV)) return()
HTML('Below is the regression summary table:<br>')
})
#### Full Model output table ###
output$tableTS <- DT::renderDataTable({
if (is.null(buttonRV$RV)) return()
tablecof}
, class = 'compact'
, options = list(searching = FALSE,
pageLength = 10,
lengthChange = FALSE,
ordering = FALSE,
paging= FALSE,
info = FALSE
)
, server = FALSE, escape = FALSE, selection = 'none')
#### Initiate the variables that the buttons will change ####
inferenceV <- reactiveValues(betas = '')
#### Observe the button clicking #####
observeEvent(input$select_button, {
selectedRow <- as.numeric(strsplit(input$select_button, "_")[[1]][2])
if (selectedRow == 11) {
inferenceV$betas <- "b0"
}
else if (selectedRow == 21) {
inferenceV$betas <- "b1"
}
})
f <- summary(xmod)$fstatistic
Radj <- summary(xmod)$adj.r.squared
R <- summary(xmod)$r.squared
pF <- round(pf(q=f[1], df1=f[2], df2=f[3], lower.tail=FALSE))
if (pF < 0.0001) {
pF <- "< 0.0001"
}
# Let's compile the R^2 text
output$R2 <- renderUI({
if (is.null(buttonRV$RV)) return()
HTML(
paste("Multiple R-squared: ", round(R, 4), ", Adjusted R-squared: ", round(Radj, 4), sep=""),
"<br>",
paste("F-statistic: ", round(f[1],4), " on ", round(f[2],4), " and ", round(f[3],4), " DF, p-value: ", pF, sep ="")
)
})
#### SCATTER PLOTS #####
output$betaplot <- renderPlot({
#### When b0 is selected ####
if (inferenceV$betas =="b0"){
plot(data.frame(df$x1 , df$y), main = "Estimate", xlab = x1label, ylab = ylabel, xlim=c(0, max(x1)), ylim=c(min(y), 533), pch= 16)
abline( a=b0, b=b1,h=,col="gray")
text(0, (xmodel[1,2]-5), paste('Intercept (b0) =', round(xmodel[1,2],2)), pos = 4, col="blue")
points(0,xmodel[1,2],pch=22,col="blue", bg = "blue")
}
#### When b1 is selected ####
else if (inferenceV$betas == "b1"){
plot(data.frame(df$x1 , df$y), main = "Estimate", xlab = x1label, ylab = ylabel, xlim=c(0, max(x1)), ylim=c(min(y), 533), pch= 16)
abline( a=b0, b=b1,h=,col="gray")
text(0, (xmodel[1,2]-1), paste(' Slope (b1) =', round(xmodel[2,2],2)), pos= 4, col="chartreuse4")
segments(0,xmodel[1,2],1,xmodel[1,2],col="chartreuse4")
segments(1,xmodel[1,2],1,(xmodel[1,2]+xmodel[2,2]), col="chartreuse4")
}
})
###################################################
#### T PLOTS ####
####Function for shading the critical value area###
###################################################
colorArea <- function(from, to, density, ..., col="blue", dens=NULL){
y_seq <- seq(from, to, length.out=500)
d <- c(0, density(y_seq, ...), 0)
polygon(c(from, y_seq, to), d, col=col, density=dens)
}
#### T PLOTS #####
output$tplot <- renderPlot({
#### When t0 is selected ####
if (inferenceV$betas =="b0"){
##Prints t distribution plot
curve(dt(x, df=dft), from=-12, to=12, main="t value", xlab = "T-student's distribution", ylab = 'Probability Density')
abline(v=b0_t_statistics,col="blue")
abline(v=(qt(c(.975), df=dft)),col="red")
abline(v=-(qt(c(.975), df=dft)),col="red")
text(b0_t_statistics,0.2, paste('t value =', round(b0_t_statistics,2)), pos= 2, col="blue")
text(6.5,0.1, paste('critical value = [', round(qt(c(.975), df=dft),2), ",", round(-qt(c(.975), df=dft),2), "]"), pos=3, offset=2, col="red")
colorArea(from=qt(c(.975), df=dft), to=3.75, dnorm, mean=0, sd=1, col=2, dens=65)
colorArea(from= qt(c(.025), df=dft), to= -3.75, dnorm, mean=0, sd=1, col=2, dens=65)
}
#### When t1 is selected ####
else if (inferenceV$betas =="b1"){
curve(dt(x, df=dft), from=-12, to=12, main="t value", xlab = "T-student's distribution", ylab = 'Probability Density')
abline(v=b1_t_statistics,col="chartreuse4")
abline(v=(qt(c(.975), df=dft)),col="red")
abline(v=-(qt(c(.975), df=dft)),col="red")
text(b1_t_statistics,0.2, paste('t value =', round(b1_t_statistics,2)), pos= 2, col="chartreuse4")
text(6.5,0.1, paste('critical value = [', round(qt(c(.975), df=dft),2), ",", round(-qt(c(.975), df=dft),2), "]"), pos=3, offset=2, col="red")
colorArea(from=qt(c(.975), df=dft), to=3.75, dnorm, mean=0, sd=1, col=2, dens=65)
colorArea(from= qt(c(.025), df=dft), to= -3.75, dnorm, mean=0, sd=1, col=2, dens=65)
}
})
output$plotsLabel <- renderUI({
if (inferenceV$betas =="b0"){
HTML(paste("<br><p style=\"color:blue\">The effect here is ", round(xmodel[1,2],2), " and represents the marathon finishing time of someone who hasn\'t trained at all (i.e., trained for 0 hours). The error for the intercept is ", round(xmodel[1,3],2), ". In this sense the test statistics is ", round(xmodel[1,2],2), " / ", round(xmodel[1,3],2), " = ", round(xmodel[1,4],2), ". The likelihood to have a test statistics of ", round(xmodel[1,4],2), " or a more extreme test statistic if H<span style=\"position: relative; top: 0.3em; font-size: 0.8em;\">0</span> (i.e., the value of the intercept isn't different from zero) is true is is ", pvalueb1, ".<br></p>", sep=""))
}
else if (inferenceV$betas =="b1"){
HTML(paste("<br><p style=\"color:green\">The effect here is ", round(xmodel[2,2],2), " and represents the improvement in finishing time per hour of trainning (i.e., if you train for one hour, how many minutes earlier should you expect to finish?). The error for the intercept is ", round(xmodel[2,3],2), ". In this sense the test statistics is ", round(xmodel[2,2],2), " / ", round(xmodel[2,3],2), " = ", round(xmodel[2,4],2), ". The likelihood to have a test statistics of ", round(xmodel[2,4],2), " or a more extreme test statistic if H<span style=\"position: relative; top: 0.3em; font-size: 0.8em;\">0</span> (i.e., the value of the slope isn't different from zero) is true is ", pvalueb2, ".<br></p>", sep=""))
}
})
####REPORTING RESULTS BOX####
output$reporting1 <- renderUI ({
HTML(paste("<p style=\"font-size:large\"><b>The unstandardized coefficient way:</b> The results show a significant negative effective of hours of training on finishing time in the marathon, <i>b</i> = ", round(xmodel[2,2],2), ", <i>t</i>(", dft, ") = ", round(xmodel[2,4],2), ", <i>p</i> = .001.", sep=""))
})
betaST <- round(summary(lm(scale(y) ~ scale(x1)))$coefficients[[2]], 2)
tforcompare <- round(summary(lm(scale(y) ~ scale(x1)))$coefficients[2,3], 2)
output$reporting2 <- renderUI ({
HTML(paste("<p style=\"font-size:large\"><b>The standardized coefficient way:</b> The results show a significant negative effective of hours of training on finishing time in the marathon, β = ", betaST, ", <i>t</i>(", dft, ") = ", round(xmodel[2,4],2), ", <i>p</i> = .001.", sep=""))
})
# Let's show R2
output$RtoPrint <- renderUI({
HTML(paste('In this case there is no need for that because the test for the effect size is the same as the test for the <i>b</i><span style=\"position: relative; top: 0.3em; font-size: 0.8em;\">1</span> as there is only one quantitative independent variable (hours of training). Don\'t believe us? Here is the proof: <br>Note that the <i>F</i> distribution is simply a squared <i>t</i> distribution. So, if all the effect size is due to <i>b</i><span style=\"position: relative; top: 0.3em; font-size: 0.8em;\">1</span>, the square <i>t</i> value for <i>b</i><span style=\"position: relative; top: 0.3em; font-size: 0.8em;\">1</span> should be the same as the <i>F</i> value! Do the math and see for yourself (a little discrepancy is to expected due to rounding):',
"<ul><li><i>t</i> value of <i>b</i><span style=\"position: relative; top: 0.3em; font-size: 0.8em;\">1</span> = ",
tforcompare,
"</li><li><i>F</i> value of <i>R</i><span style=\"position: relative; bottom: 0.3em; font-size: 0.8em;\">2</span> = ",
round(f[1],2), "</li>", sep=""))
})
output$regressionStats <- renderPrint({
summary(lm(y ~ x1))
})
############################################
###############Glossary Table###############
############################################
term<- c('Null model','Model Error', 'Intercept','Slope','Sum of Squared Errors (SSE)','Standard Error','t statistic','p-value', 'Critical Value','F-statistic')
definition <- c("Null model is a model in which ","The error of the model (also knows as the residual) is the difference between the value that is predicted by a model and the value observed in the data.
The smaller the error the better the model fits the data ", "In a regression, an intercept is the expected mean value of outcome variable (Y) when all of the predictor varioables are at 0 (X=0).", "The slope of a regression line represents the rate of change in the outcome variable (y) as the predictor variable (x) changes.The greater the magnitude of the slope, the steeper the line and the greater the rate of change.",
"Is an estimate of the total spread of a set of observations around a parameter i.e. mean. It is calculated by getting the deviance for each score which is then squarted. The sum of the squared deviance is the sum of squares.",
"It provides us a measure of the statistical accuracy of an estimate. Larger values indicate that a given statistic i.e. the mean might not accurately reflect the population from which the sample came from.",
"A test statistic with a t-distribution (see t-statistic plot). In linear regression it is used to test whether the model explains the variance. It is calcualted by dividing the coefficient by its standard error. ", "The p-value in a regression each term tests the coefficients are equal to zero (no effect). A low p-value (< 0.05) indicates that you can reject the null hypothesis suggesting that the inclusion of the variable is a meaningful addition to your model.","Critical values are cut-off values that define regions where the test statistic is unlikely to lie if the null hypothesis is true. The critical value depends on the alpha level and the test statistic used with the alpha of 0.05 frequently applied. The null hypothesis is rejected if the test statistic lies within this region", "A test statistic with a known F-distribution (see F distribution plot). It is a ratio of the average variability in the data that a model explains to the average variability of unexplained variance in the same model. It is used to test the overall fit of the model in regression analysis")
glossary_data <- data.frame(term, definition)
#####table for glossary tab####
output$tablegloss <- shiny::renderDataTable({
ff <- data.frame(term, definition)
colnames(ff) <- c("Term", "Definiton")
ff
}, options = list(searching = FALSE,
pageLength = 10,
lengthChange = FALSE,
ordering = TRUE,
paging = FALSE,
info = FALSE,
stateSave = FALSE,
order = list(c(0, 'asc'))
)
, escape = FALSE)
############################################
########## Test your knowledge #############
############################################
#### The statistics ####
# Create a mode function
Mode <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
output$A1Statistics <- renderUI({
if(input$dataStatistics == "mode" & input$dataVariables == "Finishing time [mins]" ){HTML(Mode(y))}
else if(input$dataStatistics == "mode" & input$dataVariables == "Hours training" ){HTML(Mode(x1))}
else if(input$dataStatistics == "mode" & input$dataVariables == "Height" ){HTML(Mode(x2))}
else if(input$dataStatistics == "mode" & input$dataVariables == "Body Mass Index" ){HTML(Mode(x3))}
else if(input$dataStatistics == "median" & input$dataVariables == "Finishing time [mins]" ){HTML(median(y))}
else if(input$dataStatistics == "median" & input$dataVariables == "Hours training" ){HTML(median(x1))}
else if(input$dataStatistics == "median" & input$dataVariables == "Height" ){HTML(median(x2))}
else if(input$dataStatistics == "median" & input$dataVariables == "Body Mass Index" ){HTML(median(x3))}
else if(input$dataStatistics == "mean" & input$dataVariables == "Finishing time [mins]" ){HTML(mean(y))}
else if(input$dataStatistics == "mean" & input$dataVariables == "Hours training" ){HTML(mean(x1))}
else if(input$dataStatistics == "mean" & input$dataVariables == "Height" ){HTML(mean(x2))}
else if(input$dataStatistics == "mean" & input$dataVariables == "Body Mass Index" ){HTML(mean(x3))}
})
rvB <- reactiveValues(a1 = 0, a2 = 0, a3 = 0, a4 = 0, a5 = 0)
observeEvent(input$answersData, {
{rvB$a1 <- 1 }
})
observeEvent(input$answersNull, {
{rvB$a2 <- 1 }
})
observeEvent(input$answersProposed, {
{rvB$a3 <- 1 }
})
observeEvent(input$answersComparison, {
{rvB$a4 <- 1 }
})
observeEvent(input$answersInference, {
{rvB$a5 <- 1 }
})
output$answersText1 <- renderUI({
if(rvB$a1 == 1) {
HTML("
<br>
<b>Answers</b>:
<ol>
<li>30 people</li>
<li>247 minutes</li>
<li>48.42 minutes</li>
<li>255.7 minutes</li>
<li>245.5 minutes</li>
<li>212 minutes</li>
<li>Participant 4</li>
<li>Participant 20</li>
</ol>")
}
})
output$answersText2 <- renderUI({
if(rvB$a2 == 1) {
HTML("
<b>Answers</b>:
<ol>
<li>54632.3</li>
<li>The mean.</li>
<li>Individual finishing time.</li>
</ol>")
}
})
output$answersText3 <- renderUI({
if(rvB$a3 == 1) {
HTML("
<b>Answers</b>:
<ol>
<li>44944.16</li>
<li>54349.53</li>
<li>BMI</li>
<li>Hours of training</li>
</ol>")
}
})
output$answersText4 <- renderUI({
if(rvB$a4 == 1) {
HTML("
<b>Answers</b>:
<ol>
<li>30406.18</li>
<li>Participant 20 (off by 130.3)</li>
<li>Participant 23 (off by 6.7)</li>
<li>Participant 29 (off by 56.77)</li>
<li>Participant 21 (off by 3.62)</li>
</ol>")
}
})
output$answersText5 <- renderUI({
if(rvB$a5 == 1) {
HTML("
<b>Answers</b>:
<ol>
<li>532.33</li>
<li>520.31</li>
<li>A <i>t</i> of -5.93</li>
<li>29</li>
<li>30</li>
</ol>")
}
})
}
#####updated version #####
#####PACKAGES##########
library(shiny)
library(ggplot2)
library(calibrate)
library(MASS)
library(shinyBS)
library(broom)
library(DT)
library(shinydashboard)
############################
###### USER INTERFACE ######
############################
ui = dashboardPage(
dashboardHeader(title = "Model Comparison", titleWidth = 180),
dashboardSidebar(
width = 180,
####################
### SIDEBAR MENU ###
####################
sidebarMenu(id="tabs",
menuItem("Introduction", tabName = "intro", icon = icon("question")
),
menuItem("Null Model", tabName = "nullmodel", icon = icon("th")
),
menuItem("Proposed Model", tabName = "propmodel", icon = icon("signal")
),
menuItem("Model Comparison", tabName = "comparison", icon = icon("clone")
),
menuItem("Inference", tabName = "inference", icon = icon("hourglass-half")
),
menuItem("Glossary", tabName = "glossary", icon = icon("list"))
)),
dashboardBody(
tags$style(HTML("
.box-body, .box-header, .nav-tabs-custom, .tab-pane, .nav-tabs-custom > .tab-content, .nav-tabs-custom > .nav-tabs > li.active:hover > a, .nav-tabs-custom > .nav-tabs > li.active > a {
background:#EBF4FA;
}
hr {
color: #d2d6de;
background: #d2d6de;
height: 3px;
}
")),
includeScript("../../../Matomo-tquant.js"),
fluidRow(
tags$head(
tags$link(rel = "stylesheet", type = "text/css", href = "formatboot.css")),
######################
## 1st PAGE - INTRO ##
######################
tabItems(
tabItem("intro",
### Intro box ###
box(width=12, title = h3("WHAT IS THIS ALL ABOUT?",align="center"),
collapsible = T,
p("With this app you can explore basic statistics concepts, compare different models and see how they all boil down to the same recipe:"),
br(),
HTML('<p style="text-align:center; font-size:20px"> Data = model + error </p>'),
br(),
p("The curious human being that's inside yourself asks you questions all the times about phenomena, and you really would like to satisfy your unstoppable thirst for knowledge, somehow. We have a tool that might help you out.")
,p("Imagine soon a marathon will take place close where you live, and all your friends are taking part in it. You would like to join them, but you don't want to suck. So you decide to look around for what is important in order to succeed in a loooong run. You have some ideas, maybe your BMI is important? Maybe your height? Or is it more about how much you have been training?"),
p("You are lucky, the organizers of last year marathon collected information, and you can access this dataset! But what can you do with all these numbers?"),
br(),
p(img(src = "marathonRunner.jpg", width="50%"), align="middle")
),
### Table with data box ###
box(width=12, title = h3(HTML("THE DATASET"), align="middle"),
collapsible = T,
collapsed = T,
HTML('So, let\'s first have a look at the information about the last year\'s marathon.'),
br(),br(),
shiny::dataTableOutput("tableData")),
### Quiz box ###
box(width=12, title = h3("TEST YOUR KNOWLEDGE", align="center"),
collapsible = T, collapsed = T,
fluidRow(
column(8,
tags$head(
tags$style(type="text/css", "#inline label{ display: table-cell; text-align: center; vertical-align: middle; font-size:110%; padding: 8px}
#inline .form-group { display: table-row;}")
),
HTML('Here is a challenge for you! Based on the table and the menus on your left, try to answer these questions:'),
br(),br(),
tags$div(id = "inline", textInput("answer1f", HTML("1.How many people took part in the marathon last year?"))),
tags$div(id = "inline", textInput("answer1f", HTML("2.How much time did it take for participant number eleven to complete the run?"))),
tags$div(id = "inline", textInput("answer1f", HTML("3.How many hours a week did she train?"))),
tags$div(id = "inline", textInput("answer1f", HTML("4.What is the average finishing time?"))),
tags$div(id = "inline", textInput("answer1f", HTML("5.Fifty percent of the marathon runners finished before how many minutes?"))),
tags$div(id = "inline", textInput("answer1f", HTML("6.What was the most frequent finishing time in minutes?"))),
tags$div(id = "inline", textInput("answer1f", HTML("7.Who was first to finish the race?"))),
tags$div(id = "inline", textInput("answer1f", HTML("8.Who finished last?"))),
br()),
column(2, br(), htmlOutput("answersText1")),
column(2,
br(), br(),
selectInput("dataStatistics", label = "Statistic:", choices = c("mode", "median", "mean")),
selectInput("dataVariables", label = "Variable:", choices = c("Finishing time [mins]", "Hours training", "Height", "Body Mass Index")),
htmlOutput("A1Statistics"))
),
actionButton("answersData", "See answers"),
br()
),
### Summary box ###
box(width=12, title = h3("SUMMARY", align="center"),
collapsible = T, collapsed = T,
HTML("<b>What have you learned so far?</b>"),
HTML("<p> The basic recipe for any model is: Data = model + error. By now you should be familiar with the first part of the recipe (Data):
<ul>
<li>You gained some insights into your research question and how to go to answer it.</li>
<li>You have seen the available data, and briefly explored it. But this is just the first step!</li>
</ul>"),
HTML("<b>What are you going to learn next? </b>
<ul>
<li>You are going to get to know the second part of the recipe: what is a MODEL? What is ERROR?</li>
<li>What is the null model?</li>
<li>What are the parameters of the null model?</li>
</ul>"),
HTML("<p><b>Research question:</b><br>You aim at understanding what you should do to prepare for the marathon. A couple of your friends participated last year, and you decide to get some inspiration by looking at what they did.<p>")
)
,p(
HTML('<a href="#top">'),
actionButton(inputId="Next1", label=HTML('Next page <i class="fa fa-arrow-right"></i>')),
HTML("</a>")
, align="right")
),
##########################
#### 2nd - Null MODEL ####
##########################
tabItem("nullmodel",
### Intro box ###
box(width=12, title = h3('YOUR FIRST GUESS',align="center"),
collapsible=T,
HTML("<p> You are curious about how your friends Tom and Jerry performed last year, but you can't find their name in the dataset in which only participants'numbers are displayed. You don't have many information about them apart from the fact that they were there. What is the best guess you can make about their time? <br>
You can now use your recipe to answer this question. The 'i' subscript indicates the individual partcipant. </p>"),
HTML('<p style="text-align:center; font-size:20px"> Data<span style=\"position: relative; top: 0.3em; font-size: 0.8em;\">i</span> = model + error<span style=\"position: relative; top: 0.3em; font-size: 0.8em;\">i</span> </p>'),
HTML("<p>The best guess would probably be the most frequent time people spent to complete the marathon. In statistical language this is called 'the mode', and in special cases (e.g. when data are normally distributed), the mode equals the mean value. Let's assume that this is the case, you calculate the mean time and you find that it was about 4 hours and 15 minutes, so you go for that. </p>
</p>"),
HTML('<p style="text-align:center; font-size:20px"> Time<span style=\"position: relative; top: 0.3em; font-size: 0.8em;\">Jerry</span> = mean time + error<span style=\"position: relative; top: 0.3em; font-size: 0.8em;\">Jerry</span> </p>'),
HTML('<p style="text-align:center; font-size:20px"> Time<span style=\"position: relative; top: 0.3em; font-size: 0.8em;\">Tom</span> = mean time + error<span style=\"position: relative; top: 0.3em; font-size: 0.8em;\">Tom</span> </p>'),
br(),
HTML("<p> When you meet your friends, you decide to actually ask them for the real time. Jerry seems to be quite offended by your guess, he had performed way better! He made it in a little more than 2 hours and a half!!
However, Tom was happily surprised by your guess, it took 4 hours and 9 minutes for him!
Maybe you can improve your guess (model) by considering more information and adding it into your model.
</p>")),
### Null Model box ###
box(width=12, title = h3("WHAT IS AN ERROR?", align="center"),
collapsible = T,
collapsed = T,
HTML("Your prediction was better for Tom than for Jerry, so we can say that you made a greater error at predicting Jerry's time than Tom's.
You can look at the next section to get a better grasp of what an error is.
Look at the graph below. On the y-axis is represented last year participant's time. On the x-axis you find the participant number (id).
Your model (the mean) is represented as a straight line, and it is the time that you would predict for each participant to finish the marathon. For a lot of participants, you made an error using this model.
The red line represents the error you commited in your prediction for Jerry's time, whereas the green line is the error associated with the time you predicted for Tom. <br> <br>
As you can see the model is not that good. Maybe you can improve your guess (model) by considering more information to add into your model.
"),
br(),br(),
fluidRow(
column(6,
shiny::dataTableOutput("tableNull")),
column(6,
radioButtons(inputId = "plot_type_null",
label = "How do you wish to visualize it?",
c("Scatterplot"="scatter","Histogram(SSE)"="hist")),
plotOutput('plotNull'),
textOutput("Cap")
)))
,
#####Quiz box######
box(width=12, title=h3("TEST YOUR KNOWLEDGE", align="center"),
collapsible=T, collapsed=T,
column(9,
tags$div(id = "inline", textInput("answer1f", HTML("1.What is the sum of standard error when the null model is fitted?"))),
tags$div(id = "inline", textInput("answer1f", HTML("2.What estimate is used to fit the null model?"))),
tags$div(id = "inline", textInput("answer1f", HTML("3.What is your model trying to predict?"))),
actionButton("answersNull", "See answers")
),
column(3,
htmlOutput("answersText2"))
),
### Summary box ###
box(width=12, title=h3("SUMMARY", align="center"),
collapsible=T, collapsed=T,
HTML("<p><b>What have you learned so far?</b><br>
By now you should be familiar with the null model and the error in the model. The basic recipe for any model is: Data<span style=\"position: relative; top: 0.3em; font-size: 0.8em;\">i</span> = model + error<span style=\"position: relative; top: 0.3em; font-size: 0.8em;\">i</span>.<p>"),
HTML("<p><b>What are you going to learn next? </b>
<ul>
<li>What is the proposed model?</li>
<li>What are the parameters of the proposed model?</li>
<li>How good is the model?</li>
</ul>
"),
HTML("<p><b>Research question:</b><br>
Don't forget our research question! You want to understand what is important in order to succeed in a marathon. </p>")),
p(
HTML('<a href="#top">'),
actionButton(inputId="Next2", label=HTML('Next page <i class="fa fa-arrow-right"></i>')),
HTML("</a>")
, align="right")
),
##########################
## 3rd - PROPOSED MODEL ##
##########################
tabItem("propmodel",
tabBox(
tabPanel("Proposed Model",
br(),
### Tab1, Intro box ###
box(width=14, title=h3("IMPROVING YOUR GUESSES", align="center"),
collapsible=T,
HTML("<p> As you saw in the previous section, your model was not very good at predicting your friends'performance. How can you improve it?
In the dataset of last year's marathon, you found other information which might be useful, such as the hours a week each participant spent on training, BMI and their height. <br>
<br>You decide to start by looking if the hours spent on training might be relevant for the performance in the marathon.
You can now build a new model with this information and see if your predictions improve: </p>"),
h4("Marathon Finish Time = Intercept + effect of training * training + Error", align="middle"),
HTML('<p style="text-align:center; font-size:20px">Y<span style=\"position: relative; top: 0.3em; font-size: 0.8em;\">i</span> = b<span style=\"position: relative; top: 0.3em; font-size: 0.8em;\">0</span> + b<span style=\"position: relative; top: 0.3em; font-size: 0.8em;\">1</span>*x<span style=\"position: relative; top: 0.3em; font-size: 0.8em;\">1</span> + Error</p>'),
HTML('In which Y<span style=\"position: relative; top: 0.3em; font-size: 0.8em;\">i</span> = the outcome variable, b<span style=\"position: relative; top: 0.3em; font-size: 0.8em;\">0</span> = the intercept or b<span style=\"position: relative; top: 0.3em; font-size: 0.8em;\">0</span> coefficient, b<span style=\"position: relative; top: 0.3em; font-size: 0.8em;\">1</span> = the slope or b<span style=\"position: relative; top: 0.3em; font-size: 0.8em;\">1</span> coefficient, and X<span style=\"position: relative; top: 0.3em; font-size: 0.8em;\">1</span> = the predictor variable.'),
br(),
HTML('<br><b>Intercept (b<span style=\"position: relative; top: 0.3em; font-size: 0.8em;\">0</span>)</b>:<br>
You assume that the relationship between the two variables (time and training) can be described with a line. A person who did not train at all, has a value of 0 hours on the x-axis, and according to this model she will have to run for 7265 minutes to finish the marathon. This value is known as the intercept (b0) in the model and is represented by the point in which the line crosses the y-axis.'),
br(),
HTML('<br><b>Effect of training (b<span style=\"position: relative; top: 0.3em; font-size: 0.8em;\">1</span>)</b>:<br>
You would like to see if training is important for succeeding in the marathon. The answer to this question lies in the b1 coefficient. This coefficient (slope of the line) represents the strength and the direction of the relationship between the two variables, in particular it represents how much the predictor has to change in order to see a change of one unit in the outcome.'
)),
### Tab1, Proposed model box ###
box(width=14, title = h3("FITTING THE MODEL TO THE DATA", align="center"),
collapsible = TRUE,
collapsed = T,
HTML("<p>Now you can visually compare the length of the lines.
The model seems to better predict both the times of your friends (the error is reduced). But most of all, it seems that the predictor 'Hours of training a week' is significantly related to the outcome variable (performance at the marathon). To know if that's the case you will have to compute a significance test, which you will find in the inference section.
At this point you are interested to know if your predictions improved. What about comparing the null model with this new one? Go to the the 'Model comparison' section and have a look.
</p>"),
br(), br(),
fluidRow(
column(6,
shiny::dataTableOutput('tableProposed')),
column(6,
radioButtons(inputId = "plot_type_prop1",
label = "How do you wish to visualize it?",
c("Scatterplot"="scatter","Histogram"="hist")),
plotOutput('plotProposed'),
textOutput('Cap.P1')
))
),
##### Tab1, Quiz box######
box(width=14, title=h3("TEST YOUR KNOWLEDGE", align="center"),
collapsible=T, collapsed=T,
HTML("<b>Note</b>: Your answers will not be erased by using another tab in this page."),
br(),br(),
column(10,
tags$div(id = "inline", textInput("answer1f", HTML("1.What is the SSE when BMI is used as a predictor of finishing time?"))),
tags$div(id = "inline", textInput("answer1f", HTML("2.What is the SEE when Height is used as a predtor of finishing time?"))),
tags$div(id = "inline", textInput("answer1f", HTML("3.Out of those two predicotrs which provides a better estimation of finishing time?"))),
tags$div(id = "inline", textInput("answer1f", HTML("4.Overall what is the best predictor of finishing time?"))),
actionButton("answersProposed", "See answers")
),
column(2,
htmlOutput("answersText3"))
),
#### Tab1, Summary box ####
box(width=14, title = h3("SUMMARY", align="center"),
collapsible = TRUE, collapsed = T,
HTML("<p><b>What have you learned so far? </b><br>
By now you should be familiar with the null model and various models with one variable as a predictor.<br> </p>"),
HTML("<p><b>What are you going to learn next? </b>
<ul>
<li>Compare the null model with the proposed model;</li>
<li>Understand how to evaluate the model.</li>
</ul>"),
HTML("<p><b>Research question:</b><br>
Don't forget our research question! You want to understand what is important in order to succeed in a marathon. </p>")
)),
#### Tab 2 ####
tabPanel("Other alternative models",
hr(),
br(),
HTML("<p><center>Ok, so seems like hours you've spent training is a pretty good predictor for your marathon finish time!<br>
Those who spend more time training are faster and have a smaller finish time for the marathon.<bR>
Jerry was more dedicated to the training than Tom, so he finished earlier.<bR><br>
But what about the other variables? <br>
Do height and body mass index tell you something about the time people need to finish the marathon?<bR>You still have more data to look at!
<br><br><b>Click on the tabs 'Height' and 'BMI' to look at the other variables.</b></center></p>")),
#### Tab 3, X2 ####
tabPanel("Height",
hr(),
br(),
HTML("<p><center>When you use the participants height to estimate their marathon finishing time, the model looks like this:<br><br>
<b>Marathon Finish Time = Intercept + effect of height * height + Error</b><bR><br>
which translates to:
</center></p>"),
withMathJax(),
"$$Y_{i} = \\beta_{0.2} + \\beta_{1.2} * x_{2i} + e_{i}$$",
br(), br(),
fluidRow(
column(6,
shiny::dataTableOutput('tableProposed.2')),
column(6,
radioButtons(inputId = "plot_type_prop2",
label = "How do you wish to visualize it?",
c("Scatterplot"="scatter","Histogram (SSE)"="hist")),
plotOutput('plotProposed.2'),
textOutput('Cap.P2')
)
),
br(),
br(),
HTML("<p>Oh no! This doesn't look good! Your estimate for Jerry is worse again! And the errors seem to also be bigger. Let's look if BMI is a better estimator for marathon finishing time! <b>Click on the next tab on top called 'BMI'.</b></p>")
),
#### Tab 4, X3 ####
tabPanel("BMI",
hr(),
br(),
HTML("<p><center>When you use the participants BMI to estimate their marathon finishing time, the model looks like this:<br><br>
<b>Marathon Finish Time = Intercept + effect of BMI * BMI + Error</b><bR><br>
which translates to:
</center></p>") ,
withMathJax(),
"$$Y_{i} = \\beta_{0.3} + \\beta_{1.3} * x_{3i} + e_{i}$$"
,
br(), br(),
fluidRow(
column(6,
shiny::dataTableOutput('tableProposed.3')),
column(6,
radioButtons(inputId = "plot_type_prop3",
label = "How do you wish to visualize it?",
c("Scatterplot"="scatter","Histogram (SSE)"="hist")),
plotOutput('plotProposed.3'),
textOutput("Cap.P3")
)),
br(),br(),br(),
HTML("When you use the BMI for predicting the marathon performance, your estimation for Jerry is better than in the Null Model!
However, the estimation for Tom got a little worse... You should probably stick to using the hours put into training for your model!"),
br(),
p(
HTML('<a href="#top">'),
actionButton(inputId="Next3", label=HTML('Next page <i class="fa fa-arrow-right"></i>')),
HTML("</a>")
, align="right")
)
), style='width:200%;'),
######################
## 4th - COMPARISON ##
######################
tabItem(tabName = "comparison",
### Intro box ###
box(width=12, title = h3("EVALUATING YOUR MODEL - Part I", align = "center"), collapsible = T,
HTML('<p>Let us now directly compare the null model (mean finishing time) and the model with your best predictor - hours of training.
We will do so by visualising the null and the proposed model and how the additon of the predictor improves your model by visualising the reduction in sum squared errors (SSE).</p>'),
br(),
HTML('Remember that you are comparing the following models:'),
br(),
br(),
h4("Marathon Finish Time = Mean Finishing Time + Error", align="middle"),
br(),
h4("Marathon Finish Time = Intercept + Effect of training + Error", align="middle"),
br()),
#######################
#### MODEL DETAILS ####
#######################
box(width = 12, title = h3("MODEL DETAILS", align = "center"),
collapsible = T,
collapsed = T,
HTML('<p>In the graph below you can now see the two models: the <span style="color:red">red line</span>, representing the proposed model, and the <span style="color:blue">blue line</span>, representing the null model.
Which one of the two models allows you to make better predictions?</p>'),
br(),br(),
fluidRow(
column(7,
shiny::dataTableOutput('tableComparison')),
column(5,
br(),
fluidRow(
column(4,
p(actionButton("null", "Null Model",style='padding:12px; font-size:110%; background-color: #337ab7; font-weight: bold; color:#FFFFFF; border-color:#000000'), align="center")
),
column(4,
p(actionButton("full", "Proposed Model", style='padding:12px; font-size:110%; background-color: #FA8072; font-weight: bold; color:#FFFFFF; border-color:#000000'), align="center")
),
column(4,
p(actionButton("sse", "Error reduction", style='padding:12px; font-size:110%; background-color: grey; font-weight: bold; color:#FFFFFF; border-color: #000000; border-weight:bold'), align="center")
)
),
plotOutput('plotComparison'),
textOutput('Cap.Com')
)),
column(12,
br(),
br(),
HTML("You can answer this question more precisely if you are able to quantify which model has the least error. You can take each participant's time and subtract the predicted time based on your model. You do this for each participant, and sum up the results. What do you obtain? <br>
<br>
The result is zero! This does not help, in fact to avoid cancelling out opposite values and in order to give each one a weight, we have to square each difference before summing them up. At this point, we have the so called 'sum of squared errors (SSE)', which we need to divide for the number of participants to obtain the 'mean sum of squared errors' (MSSE).
<br><br>
To compare how good each model is, we need to calculate the so called 'R-squared' which is the ratio between the explained variation that the model explains (SSM) and the total variation (sum of SSE and SSM)."
),
br()
)),
#####Quiz box######
box(width=12, title=h3("TEST YOUR KNOWLEDGE", align="center"),
collapsible=T, collapsed=T,
column(10,
tags$div(id = "inline", textInput("answer1f", HTML("1.How much squared error is reduced by using the proposed model instead of the null model?"))),
tags$div(id = "inline", textInput("answer1f", HTML("2.The null model's highest error is in predicting which participant?"))),
tags$div(id = "inline", textInput("answer1f", HTML("3.The null model's lowest error is in predicting which participant?"))),
tags$div(id = "inline", textInput("answer1f", HTML("4.The proposed model's highest error is in predicting which participant?"))),
tags$div(id = "inline", textInput("answer1f", HTML("5.The proposed model's lowest error is in predicting which participant?"))),
actionButton("answersComparison", "See answers")
),
column(2,
htmlOutput("answersText4"))
),
### Summary box ###
box(width=12, title=h3("SUMMARY", align="center"),
collapsible = T, collapsed = T,
HTML('<b>What have you learned so far?</b>
<ul>
<li>The basic equation for model analysis: Data = Model + Error;</li>
<li>Having a look at the raw data.</li>
<li>The proposed model, in which we try to use hours of training to predict marathon performance.</li>
<li>The parameters of the proposed model: The intercept (b<span style="position: relative; top: 0.3em; font-size: 0.8em;">0</span>), which represents the performance of a runner with zero hours of training, and the slope (b<span style="position: relative; top: 0.3em; font-size: 0.8em;">1</span>), which represents the gain in performance for each increment of 1 hour of training.</li>
<li>The error of the proposed model: We calculate the Sum of Squared Errors in order to have a measure the amount of error we make in our predictions using the proposed model.</li>
<li>The null model: Calculating the simplest model possible, which is just using mean performance to guess performance.</li>
<li>The paramter of the null model: The intercept (b<span style="position: relative; top: 0.3em; font-size: 0.8em;">0</span>), which represents mean performance.</li>
<li>The error of the null model: We again calculate the Sum of Squared Errors in order to have a measure of the amount of error we make in our predictions using the null model.</li>
<li>The proportion of the reduction of the error (PRE): We then see the proportion of error of the null model that we\'re able to reduce by using the proposed model instead of the null model.</li>
</ul>'),
HTML('<b>What are you going to learn next?</b>
<ul>
<li>Test statistics;</li>
<li>The p-value;</li>
<li>Hypothesis testing;</li>
<li>Data reporting in APA style.</li>
</ul>'),
HTML('<b>Research question:</b></br>
Keep our research question in mind! We want to understand what is important in order to succeed in a marathon.
')
),
p(
HTML('<a href="#top">'),
actionButton(inputId="Next4", label=HTML('Next page <i class="fa fa-arrow-right"></i>')),
HTML("</a>")
, align="right")
),
##########################
## 5th - INFERENCE PAGE ##
##########################
tabItem(tabName = "inference",
### Intro box ###
box(width=12,
title = h3("EVALUATING YOUR MODEL - Part II", align="center"),
h4("Test statistic = Estimated effect / Error of the estimation", align="middle"),
collapsible = T,
br(),
p("This is the basic equation underlying test statistics. Test statistics basically represent an effect in the unit if a specific theoretical distribution (here, T-Student and F distributions). When we compute a test statistics we can evaluate how likely that value (or a more extreme value) is to occur. And with this we can say if an effect is happening by chance or not."),
br(),
conditionalPanel(condition="input.showTstat==0",
p(actionButton(inputId="showTstat", label="Press to see the relevant test statistics"), align="right")
),
uiOutput("introCoeff"),
DT::dataTableOutput("tableTS"),
uiOutput("R2"),
conditionalPanel(condition="input.select_button",
uiOutput("plotsLabel"),
fluidRow(
column(6, plotOutput("betaplot")),
column(6, plotOutput("tplot"))
)),
conditionalPanel(condition="input.select_button"
)),
### Reporting results box ###
box(width=12, title = h3("REPORTING RESULTS", align="center"),
collapsible = TRUE, collapsed = T,
p("Here's how R would print the results of our regression."),
verbatimTextOutput("regressionStats"),
HTML('How would you report your results <a href="http://apastyle.org/">(APA style)</a>? The important effect here is given by the b1 parameter. So the report of the results will be on this parameter. Two different ways to do this:'),
hr(),
uiOutput("reporting1"),
hr(),
uiOutput("reporting2"),
hr(),
HTML("<b>What about <i>R</i><span style=\"position: relative; bottom: 0.3em; font-size: 0.8em;\">2</span>? Shouldn't we be reporting <i>R</i><span style=\"position: relative; bottom: 0.3em; font-size: 0.8em;\">2</span> as well?</b>"),
uiOutput("RtoPrint")
),
#####Quiz box######
box(width=12, title=h3("TEST YOUR KNOWLEDGE", align="center"),
collapsible=T, collapsed=T,
column(10,
tags$div(id = "inline", textInput("answer1f", HTML("1.What finishing time would you expect for a person who didn't train?"))),
tags$div(id = "inline", textInput("answer1f", HTML("2.What finishing time would you expect for a person who trained 2 hours?"))),
tags$div(id = "inline", textInput("answer1f", HTML("3.What is the test statistic for the hours of training coefficient?"))),
tags$div(id = "inline", textInput("answer1f", HTML("4.How many degrees of freedom does that test have?"))),
tags$div(id = "inline", textInput("answer1f", HTML("5.And what was the sample size of that test?"))),
actionButton("answersInference", "See answers")
),
column(2,
htmlOutput("answersText5"))
),
### Summary box ###
box(width=12, title=h3("SUMMARY", align="center"),
collapsible = T, collapsed = T,
HTML('<b>What have you learned so far?</b>
<ul>
<li>The basic equation for model analysis: Data = Model + Error;</li>
<li>Having a look at the raw data.</li>
<li>The proposed model, in which we try to use hours of training to predict marathon performance.</li>
<li>The parameters of the proposed model: The intercept (b<span style="position: relative; top: 0.3em; font-size: 0.8em;">0</span>), which represents the performance of a runner with zero hours of training, and the slope (b<span style="position: relative; top: 0.3em; font-size: 0.8em;">1</span>), which represents the gain in performance for each increment of 1 hour of training.</li>
<li>The error of the proposed model: We calculate the Sum of Squared Errors in order to have a measure the amount of error we make in our predictions using the proposed model.</li>
<li>The null model: Calculating the simplest model possible, which is just using mean performance to guess performance.</li>
<li>The paramter of the null model: The intercept (b<span style="position: relative; top: 0.3em; font-size: 0.8em;">0</span>), which represents mean performance.</li>
<li>The error of the null model: We again calculate the Sum of Squared Errors in order to have a measure of the amount of error we make in our predictions using the null model.</li>
<li>The proportion of the reduction of the error (PRE): We then see the proportion of error of the null model that we\'re able to reduce by using the proposed model instead of the null model.</li>
</ul>'),
HTML('<b>What are you going to learn next?</b>
<ul>
<li>Test statistics;</li>
<li>The p-value;</li>
<li>Hypothesis testing;</li>
<li>Data reporting in APA style.</li>
</ul>'),
HTML('<b>Research question:</b></br>
Keep our research question in mind! We want to understand what is important in order to succeed in a marathon.
'))
),
##############
## GLOSSARY ##
##############
tabItem(tabName = "glossary",
box(width=12, title = h3("GLOSSARY", align="center"),
collapsible = T,
p("Here you will find the definitions of the key statistical terms that you have covered whilst using this app.", align="center"),
br(),
shiny::dataTableOutput("tablegloss")),
box(width = 12, title = h3("RECOMMENDED READING", align="center"),
collapsible = T,
HTML('Judd, C. M., & McClelland, G. H. (2017). <i>Data analysis: A model comparison approach</i> (3rd ed.). San Diego, CA: Harcourt, Brace, Jovanovich.')
)
)
))))