Download sta-671-ch8-10-11-notes-R

# # sta-671-ch8-10-11-notes-R-18nov06.doc # # --------------------------------------------------------# # Ex 8.1 - phosphorous in leaves from 3 varieties of trees # (p. 389) # # ANOVA TEST # #H0: mu1 = mu2 = mu3 #Ha: at least two means differ # #TS Fobs = MS(Between)/MS(Within) # #P-value = Pr[F(g-1, n-g) > Fobs) # entering the data …. phosphor <- scan() .35 .40 .58 .50 .47 .65 .7 .9 .84 .79 .6 .8 .75 .73 .66 variety <- rep(c(1,2,3), c(5,5,5)) ex8p1.df <- data.frame(P=phosphor,Variety=factor(variety)) attach(ex8p1.df) # generating numeric summary statistics for the three varieties by(P,Variety,function(x) c(mean(x),var(x),sd(x))) INDICES: 1 [1] 0.46000000 0.00795000 0.08916277 ---------------------------------------------INDICES: 2 [1] 0.7760000 0.0103300 0.1016366 ---------------------------------------------INDICES: 3 [1] 0.70800000 0.00617000 0.07854935 # fitting an anova model … aov.8p1<- lm(P~Variety) anova(aov.8p1) Analysis of Variance Table 1 Response: P Df Sum Sq Mean Sq F value Pr(>F) Variety 2 0.27664 0.13832 16.972 0.0003175 *** Residuals 12 0.09780 0.00815 --Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # # plots # plot(P ~ Variety,ylim=c(0,1), xlab="Tree Variety",ylab="Phosphorous Content") QuickTime™ and a TIFF (Uncompressed) decompressor are needed to see this picture. plot(P ~ as.numeric(Variety),ylim=c(0,1), xlab="Tree Variety",ylab="Phosphorous Content",xlim=c(0,4),xaxt="n") axis(1,at=c(1,2,3),labels=c("1","2","3")) xbars <- c(.46, .776, .708) segments(0.75, xbars[1], 1.25, xbars[1], lwd=2) 2 segments(1.75, xbars[2], 2.25, xbars[2], lwd=2) segments(2.75, xbars[3], 3.25, xbars[3], lwd=2) QuickTime™ and a TIFF (Uncompressed) decompressor are needed to see this picture. # --------------------------------------------------------# # Z-test for testing proportions # # H0: pi = pi_0 # # TEST STATISTIC # zstat = [ pihat – pi_0 ] / [sqrt( pi_0*(1-pi_0)/n )] # # # Example 10.4 (p. 475) – n=150 sports cars # 60 of 150 sports cars failed inspection # Research hypothesis: more than 30% fail # Ha: pi > .30 3 prop.test(x=60,n=150,p=.30,alternative="greater") 1-sample proportions test with continuity correction data: 60 out of 150, null probability 0.3 X-squared = 6.6746, df = 1, p-value = 0.00489 alternative hypothesis: true p is greater than 0.3 95 percent confidence interval: 0.3333552 1.0000000 sample estimates: p 0.4 # # CI for pi # prop.test(x=60, n=150) 1-sample proportions test with continuity correction data: 60 out of 150, null probability 0.5 X-squared = 5.6067, df = 1, p-value = 0.01789 alternative hypothesis: true p is not equal to 0.5 95 percent confidence interval: 0.3218997 0.4833039 sample estimates: p 0.4 # # # # # # --------------------------------------------------------H0: pi1 = pi2 TEST STATISTIC zstat = [ pi1hat – pi2hat ] / sqrt[pi1hat *(1-pi1hat)/n1 + pi2hat *(1-pi2hat)/n2 ] # Example 10.6 – p. 485 # number pass out of number in study npass <- c(94, 113) ntotal <- c(125, 175) prop.test(x=npass, n=ntotal, alternative="greater") 2-sample test for equality of proportions with continuity correction 4 data: npass out of ntotal X-squared = 3.3701, df = 1, p-value = 0.03320 alternative hypothesis: greater 95 percent confidence interval: 0.01240338 1.00000000 sample estimates: prop 1 prop 2 0.7520000 0.6457143 # # CI for pi1-pi2 # prop.test(x=npass, n=ntotal) 2-sample test for equality of proportions with continuity correction data: npass out of ntotal X-squared = 3.3701, df = 1, p-value = 0.06639 alternative hypothesis: two.sided 95 percent confidence interval: -0.004268342 0.216839771 sample estimates: prop 1 prop 2 0.7520000 0.6457143 # # OL text presents the NON-continuity corrected version # of the proportion and confidence interval # prop.test(x=npass, n=ntotal, correct=FALSE) 2-sample test for equality of proportions without continuity correction data: npass out of ntotal X-squared = 3.8509, df = 1, p-value = 0.04972 alternative hypothesis: two.sided 95 percent confidence interval: 0.002588801 0.209982628 sample estimates: prop 1 prop 2 0.7520000 0.6457143 # --------------------------------------------------------- 5 # # # Testing for independence between two classification factors # H0: two factors are independent # Ha: two factors are dependent # X2 = sum( (O-E)^2 / E ) ex10p14 <- scan(what=list(policy=0,states= "",count=0)) 1 oil/gas 50 1 coal 59 1 other 161 2 oil/gas 88 2 coal 20 2 other 40 3 oil/gas 56 3 coal 52 3 other 188 4 oil/gas 4 4 coal 3 4 other 5 5 oil/gas 2 5 coal 66 5 other 6 ex10p14 attach(ex10p14) matrix(count,nrow=5,byrow=T) [,1] [,2] [,3] [1,] 50 59 161 [2,] 88 20 40 [3,] 56 52 188 [4,] 4 3 5 [5,] 2 66 6 chisq.test(matrix(count,nrow=5,byrow=T)) Pearson's Chi-squared test data: matrix(count, nrow = 5, byrow = T) X-squared = 289.2229, df = 8, p-value < 2.2e-16 Warning message: Chi-squared approximation may be incorrect in: chisq.test(matrix(count, nrow = 5, byrow = T)) # # as an alternative to X2 # chisq.test(matrix(count,nrow=5,byrow=T), simulate.p.value =T) Pearson's Chi-squared test with simulated p-value (based on 2000 replicates) data: matrix(count, nrow = 5, byrow = T) X-squared = 289.2229, df = NA, p-value = 0.0004998 6 # # Example 10.15 (p. 513) # matrix(c(5,8,9,8,10,5,14,7,4,13,5,2),byrow=T,nrow=4) [,1] [,2] [,3] [1,] 5 8 9 [2,] 8 10 5 [3,] 14 7 4 [4,] 13 5 2 ex10p15.mat <matrix(c(5,8,9,8,10,5,14,7,4,13,5,2),byrow=T,nrow=4) ex10p15.mat [,1] [,2] [,3] [1,] 5 8 9 [2,] 8 10 5 [3,] 14 7 4 [4,] 13 5 2 chisq.test(ex10p15.mat) Pearson's Chi-squared test data: ex10p15.mat X-squared = 12.1104, df = 6, p-value = 0.05955 Warning message: Chi-squared approximation may be incorrect in: chisq.test(ex10p15.mat) # --------------------------------------------------------# Linear Regression # # Example 11.4 y.absences.per.100workers <- scan() 31.5 33.1 27.4 24.5 27 27.8 23.3 24.7 16.9 18.1 x.experience.mos <- scan() 18.1 20 20.8 21.5 22 22.4 22.9 24 25.4 27.3 # fit least squares regression line lmfit <- lm(y.absences.per.100workers ~ x.experience.mos) # common hypothesis test of interest here … # Model Y = beta0 + beta1*x + epsilon 7 # H0: beta1 # Ha: beta1 # TS: tstat # compare =0 NE 0 = (b1)/SE(b1) to t distribution with appropriate DF summary(lmfit) # compare to p. 548 Call: lm(formula = y.absences.per.100workers ~ x.experience.mos) Residuals: Min 1Q Median -3.35372 -1.47105 -0.04869 3Q 1.79075 Max 3.40307 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 64.6718 6.7621 9.564 1.18e-05 *** x.experience.mos -1.7487 0.2995 -5.840 0.000387 *** --Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 2.388 on 8 degrees of freedom Multiple R-Squared: 0.81, Adjusted R-squared: 0.7862 F-statistic: 34.1 on 1 and 8 DF, p-value: 0.0003873 # # scatterplot of the data along with the fitted regression line # superimposed # plot(x.experience.mos, y.absences.per.100workers, xlab= " Experience (months)", ylab= "Absences (per 100 workers)") abline(lmfit) 8 QuickTime™ and a TIFF (Uncompressed) decompressor are needed to see this picture. 9

Top subcategories

Top subcategories

Top subcategories

Top subcategories

Top subcategories

Top subcategories

Top subcategories

Top subcategories

Download sta-671-ch8-10-11-notes-R