The researchers were interested in determining if a relationship exists between blood pressure and age, weight, body surface area, duration, pulse rate and/or stress level.
rm(list=ls())
bloodpress <- read.table("./bloodpress.txt", header = T)
( cor.bp <- round ( cor(bloodpress[,-1]) ,3))
## BP Age Weight BSA Dur Pulse Stress
## BP 1.000 0.659 0.950 0.866 0.293 0.721 0.164
## Age 0.659 1.000 0.407 0.378 0.344 0.619 0.368
## Weight 0.950 0.407 1.000 0.875 0.201 0.659 0.034
## BSA 0.866 0.378 0.875 1.000 0.131 0.465 0.018
## Dur 0.293 0.344 0.201 0.131 1.000 0.402 0.312
## Pulse 0.721 0.619 0.659 0.465 0.402 1.000 0.506
## Stress 0.164 0.368 0.034 0.018 0.312 0.506 1.000
cor.bp *(cor.bp>0.8)
## BP Age Weight BSA Dur Pulse Stress
## BP 1.000 0 0.950 0.866 0 0 0
## Age 0.000 1 0.000 0.000 0 0 0
## Weight 0.950 0 1.000 0.875 0 0 0
## BSA 0.866 0 0.875 1.000 0 0 0
## Dur 0.000 0 0.000 0.000 1 0 0
## Pulse 0.000 0 0.000 0.000 0 1 0
## Stress 0.000 0 0.000 0.000 0 0 1
regress BP on Weight and BSA, check out the changes when a new variable which is correlated to the existing variable is added to the model.
##
## Call:
## lm(formula = BP ~ Weight, data = bloodpress)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6933 -0.9318 -0.4935 0.7703 4.8656
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.20531 8.66333 0.255 0.802
## Weight 1.20093 0.09297 12.917 1.53e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.74 on 18 degrees of freedom
## Multiple R-squared: 0.9026, Adjusted R-squared: 0.8972
## F-statistic: 166.9 on 1 and 18 DF, p-value: 1.528e-10
##
## Call:
## lm(formula = BP ~ BSA, data = bloodpress)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.314 -1.963 -0.197 1.934 4.831
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 45.183 9.392 4.811 0.00014 ***
## BSA 34.443 4.690 7.343 8.11e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.79 on 18 degrees of freedom
## Multiple R-squared: 0.7497, Adjusted R-squared: 0.7358
## F-statistic: 53.93 on 1 and 18 DF, p-value: 8.114e-07
##
## Call:
## lm(formula = BP ~ Weight + BSA, data = bloodpress)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8932 -1.1961 -0.4061 1.0764 4.7524
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.6534 9.3925 0.602 0.555
## Weight 1.0387 0.1927 5.392 4.87e-05 ***
## BSA 5.8313 6.0627 0.962 0.350
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.744 on 17 degrees of freedom
## Multiple R-squared: 0.9077, Adjusted R-squared: 0.8968
## F-statistic: 83.54 on 2 and 17 DF, p-value: 1.607e-09
##
## Call:
## lm(formula = BP ~ BSA + Weight, data = bloodpress)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8932 -1.1961 -0.4061 1.0764 4.7524
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.6534 9.3925 0.602 0.555
## BSA 5.8313 6.0627 0.962 0.350
## Weight 1.0387 0.1927 5.392 4.87e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.744 on 17 degrees of freedom
## Multiple R-squared: 0.9077, Adjusted R-squared: 0.8968
## F-statistic: 83.54 on 2 and 17 DF, p-value: 1.607e-09
## Analysis of Variance Table
##
## Response: BP
## Df Sum Sq Mean Sq F value Pr(>F)
## Weight 1 505.47 505.47 166.86 1.528e-10 ***
## Residuals 18 54.53 3.03
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Analysis of Variance Table
##
## Response: BP
## Df Sum Sq Mean Sq F value Pr(>F)
## BSA 1 419.86 419.86 53.927 8.114e-07 ***
## Residuals 18 140.14 7.79
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Analysis of Variance Table
##
## Response: BP
## Df Sum Sq Mean Sq F value Pr(>F)
## Weight 1 505.47 505.47 166.1648 3.341e-10 ***
## BSA 1 2.81 2.81 0.9251 0.3496
## Residuals 17 51.71 3.04
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Analysis of Variance Table
##
## Response: BP
## Df Sum Sq Mean Sq F value Pr(>F)
## BSA 1 419.86 419.86 138.021 1.391e-09 ***
## Weight 1 88.43 88.43 29.069 4.871e-05 ***
## Residuals 17 51.71 3.04
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model1 <- lm(BP~., data = bloodpress[,-1])
summary(model1)
##
## Call:
## lm(formula = BP ~ ., data = bloodpress[, -1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.93213 -0.11314 0.03064 0.21834 0.48454
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -12.870476 2.556650 -5.034 0.000229 ***
## Age 0.703259 0.049606 14.177 2.76e-09 ***
## Weight 0.969920 0.063108 15.369 1.02e-09 ***
## BSA 3.776491 1.580151 2.390 0.032694 *
## Dur 0.068383 0.048441 1.412 0.181534
## Pulse -0.084485 0.051609 -1.637 0.125594
## Stress 0.005572 0.003412 1.633 0.126491
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4072 on 13 degrees of freedom
## Multiple R-squared: 0.9962, Adjusted R-squared: 0.9944
## F-statistic: 560.6 on 6 and 13 DF, p-value: 6.395e-15
#install.packages("lme4")
#install.packages("car")
library(lme4)
## Loading required package: Matrix
library(car)
car::vif(model1)
## Age Weight BSA Dur Pulse Stress
## 1.762807 8.417035 5.328751 1.237309 4.413575 1.834845
crates <- data.frame(list(
Day = 1:9,
x1 = c(1,1,1,0,0,1,0,1,1),
x2 = c(0,1,0,1,1,1,1,0,1),
y = c(48,51,39,24,24,27,12,27,24)
))
#a)
Model.1 <- lm(y~ x1+x2 -1, crates)
summary(Model.1)
##
## Call:
## lm(formula = y ~ x1 + x2 - 1, data = crates)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18 -3 9 12 18
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## x1 30.000 6.503 4.613 0.00245 **
## x2 12.000 6.503 1.845 0.10749
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.79 on 7 degrees of freedom
## Multiple R-squared: 0.8635, Adjusted R-squared: 0.8245
## F-statistic: 22.14 on 2 and 7 DF, p-value: 0.0009404
res <- crates$y - Model.1$fitted.values
sum(crates$x1*res)
## [1] 7.105427e-15
sum(crates$x2*res)
## [1] 5.329071e-15
## test sum of slope =30?
X <- model.matrix((Model.1))
var.12 <- sum(solve(t(X)%*%X)) * sum(res^2)/(9-2)
t = (sum(Model.1$coefficients)-30)/sqrt(var.12)
(1- pt(q = t, df = 7))*2
## [1] 0.1074942
## a new factor variable U
crates$U <- as.factor(c(1,3,1,2,2,3,2,1,3))
Model.2 <- lm(y~U, crates)
anova(Model.2)
## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## U 2 536 268 2.127 0.2003
## Residuals 6 756 126
#(f)
newdata <- crates
newdata $y.hat <- mean(crates$y)
newdata$'y.bar- y.hat' <- Model.2$fitted.values -newdata$y.hat
newdata$'y - y.bar' <- newdata$y - Model.2$fitted.values
newdata
## Day x1 x2 y U y.hat y.bar- y.hat y - y.bar
## 1 1 1 0 48 1 30.66667 7.333333 10
## 2 2 1 1 51 3 30.66667 3.333333 17
## 3 3 1 0 39 1 30.66667 7.333333 1
## 4 4 0 1 24 2 30.66667 -10.666667 4
## 5 5 0 1 24 2 30.66667 -10.666667 4
## 6 6 1 1 27 3 30.66667 3.333333 -7
## 7 7 0 1 12 2 30.66667 -10.666667 -8
## 8 8 1 0 27 1 30.66667 7.333333 -11
## 9 9 1 1 24 3 30.66667 3.333333 -10
#(g)
Model.3 <- lm(y~ x1 + x2+ x1*x2 +Day -1 ,crates)
summary(Model.3)
##
## Call:
## lm(formula = y ~ x1 + x2 + x1 * x2 + Day - 1, data = crates)
##
## Residuals:
## 1 2 3 4 5 6 7 8 9
## -0.4639 4.2108 -2.4880 -0.6506 2.8373 -5.8373 -2.1867 2.9518 1.6265
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## x1 51.9518 3.2112 16.178 1.64e-05 ***
## x2 38.6024 3.7466 10.303 0.000148 ***
## Day -3.4880 0.5471 -6.375 0.001405 **
## x1:x2 -36.7892 4.5376 -8.108 0.000463 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.07 on 5 degrees of freedom
## Multiple R-squared: 0.9915, Adjusted R-squared: 0.9847
## F-statistic: 146 on 4 and 5 DF, p-value: 2.31e-05
anova(Model.3)
## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## x1 1 7776.0 7776.0 469.4218 3.886e-06 ***
## x2 1 648.0 648.0 39.1185 0.001532 **
## Day 1 160.3 160.3 9.6769 0.026527 *
## x1:x2 1 1088.9 1088.9 65.7333 0.000463 ***
## Residuals 5 82.8 16.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfcol=c(2,2))
plot(Model.3, which = 1)
plot(Model.3, which = 2)
plot(1:9, Model.3$residuals,type = "b")
#(h)
par(mfcol=c(2,2))
plot(Model.1, which =1)
plot(Model.1, which =2)
plot(1:9, Model.1$residuals,type = "b")
par(mfcol=c(2,2))
plot(Model.2, which =1)
plot(Model.2, which =2)
plot(1:9, Model.2$residuals,type = "b")
## time trend is very clear
## catergorical and continuous variables
Model.4 <- lm(y ~ U+Day+Day*U, data = crates)
summary(Model.4)
##
## Call:
## lm(formula = y ~ U + Day + Day * U, data = crates)
##
## Residuals:
## 1 2 3 4 5 6 7 8 9
## 1.3462 2.4324 -1.8846 -1.7143 2.5714 -5.6757 -0.8571 0.5385 3.2432
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 49.538 4.519 10.961 0.00163 **
## U2 -6.681 12.603 -0.530 0.63274
## U3 6.975 7.458 0.935 0.41868
## Day -2.885 0.910 -3.170 0.05048 .
## U2:Day -1.401 2.333 -0.601 0.59043
## U3:Day -1.088 1.304 -0.835 0.46522
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.64 on 3 degrees of freedom
## Multiple R-squared: 0.95, Adjusted R-squared: 0.8667
## F-statistic: 11.4 on 5 and 3 DF, p-value: 0.03626
# verifying the Least square estimate
X <- model.matrix(Model.1)
solve(t(X)%*%X) %*% t(X)%*% crates$y
## [,1]
## x1 30
## x2 12