Crab data--compare individual data and tabled (grouped) data > crab = read.table(file="D:\\stat141\\horseshoecrab.dat", header = T) > crab$satell [1] 8 0 9 0 4 0 0 0 0 0 0 0 11 0 14 8 1 1 0 5 4 3 1 2 3 0 3 5 [29] 0 0 4 0 0 8 5 0 0 6 0 6 3 5 6 5 9 4 6 4 3 3 5 5 6 4 5 15 [57] 3 3 0 0 0 5 3 5 1 8 10 0 0 3 7 1 0 6 0 0 3 4 0 5 0 0 0 4 [85] 0 3 0 0 0 0 5 0 0 0 0 1 0 1 1 1 1 1 1 4 1 1 1 1 2 4 3 6 [113] 0 2 2 0 12 0 5 6 6 2 0 2 3 0 3 4 2 6 6 0 4 10 7 0 5 5 6 6 [141] 7 3 3 0 0 8 4 4 10 9 4 0 0 0 0 4 0 2 0 4 4 3 8 0 7 0 0 2 [169] 3 4 0 0 0 > length(crab$satell) #we have 173 observations on numb satells [1] 173 > attach(crab) #just refer to var names > mean(satell) [1] 2.919075 > median(satell) #you can see this from the grouped data [1] 2 > sd(satell) #computed from the 173 obs [1] 3.148336 > var(satell) [1] 9.912018 > #calc mean,var from tabled data on handout > m = (16 +2*9 + 3*19 +4*19 + 5*15 + 6*13 + 7*4 + 8*6 + 9*3 + 10*3 + 11 + 12 + 14 + 15)/173 > m [1] 2.919075 > v = (62*(0-m)^2 + 16*(1-m)^2 + 9*(2-m)^2 + 19*(3-m)^2 + 19*(4-m)^2 + 15*(5-m)^2 + 13*(6-m)^2 + + 4*(7-m)^2 + 6*(8-m)^2 + 3*(9-m)^2 + 3*(10-m)^2 + (11-m)^2 + (12-m)^2 + (14-m)^2 +(15-m)^2)/172 > v [1] 9.912018 > sqrt(v) [1] 3.148336 > detach(crab) ========================================================================= Dichotomizing a measured variable (Y/N data) Crickets example > singtime [1] 4.3 24.1 6.6 7.3 4.0 2.6 4.0 3.9 9.4 6.2 1.6 6.5 0.2 2.7 17.4 5.6 2.0 [18] 3.8 1.2 0.7 1.6 2.3 3.7 0.8 0.5 4.5 11.5 3.5 0.8 5.2 2.0 0.7 1.7 5.0 [35] 2.8 1.5 3.9 3.7 4.5 1.8 1.2 0.7 0.7 4.2 4.7 2.2 1.4 14.1 8.6 3.7 3.5 > sort(singtime) [1] 0.2 0.5 0.7 0.7 0.7 0.7 0.8 0.8 1.2 1.2 1.4 1.5 1.6 1.6 1.7 1.8 2.0 [18] 2.0 2.2 2.3 2.6 2.7 2.8 3.5 3.5 3.7 3.7 3.7 3.8 3.9 3.9 4.0 4.0 4.2 [35] 4.3 4.5 4.5 4.7 5.0 5.2 5.6 6.2 6.5 6.6 7.3 8.6 9.4 11.5 14.1 17.4 24.1 > singtime >10 [1] FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE [16] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE [31] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE [46] FALSE FALSE TRUE FALSE FALSE FALSE > singtime >= 5 [1] FALSE TRUE TRUE TRUE FALSE FALSE FALSE FALSE TRUE TRUE FALSE TRUE FALSE FALSE TRUE [16] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE TRUE [31] FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE [46] FALSE FALSE TRUE TRUE FALSE FALSE > sum(singtime >= 10) [1] 4 > sum(singtime >= 5) [1] 13 > sum(singtime > 5) [1] 12 > length(singtime) [1] 51 > 12/51 [1] 0.2352941 > mean(singtime > 5) # proportion is mean of dichot (0,1) vars [1] 0.2352941 > var(singtime > 5) [1] 0.1835294 > (12/51)*(39/51) # p(1-p) [1] 0.1799308 > (12/51)*(39/51)*(51/50) # p(1-p)(n/n-1) [1] 0.1835294 =========================================================================== > #ordered categorical variable, US evolution > #show page9 of supp > # 3 level categorical variable 2005 US n=1484, props (.4,.21,.39) T NS F TableS2 insert approximate counts > round(.4*1484) > round(.21*1484) > round(.39*1484) [1] 594 [1] 312 [1] 579 intro rep command > rep("a",10) [1] "a" "a" "a" "a" "a" "a" "a" "a" "a" "a" make exemplar categorical data n=100 > us100dat = c(rep("T", 40), rep("NS", 21), rep("F", 39)) > us100dat [1] "T" "T" "T" "T" "T" "T" "T" "T" "T" "T" "T" "T" "T" "T" "T" "T" "T" [18] "T" "T" "T" "T" "T" "T" "T" "T" "T" "T" "T" "T" "T" "T" "T" "T" "T" [35] "T" "T" "T" "T" "T" "T" "NS" "NS" "NS" "NS" "NS" "NS" "NS" "NS" "NS" "NS" "NS" [52] "NS" "NS" "NS" "NS" "NS" "NS" "NS" "NS" "NS" "NS" "F" "F" "F" "F" "F" "F" "F" [69] "F" "F" "F" "F" "F" "F" "F" "F" "F" "F" "F" "F" "F" "F" "F" "F" "F" [86] "F" "F" "F" "F" "F" "F" "F" "F" "F" "F" "F" "F" "F" "F" "F" > table(us100dat) us100dat F NS T 39 21 40 also see display > # n=2066 for 2003 data US page2 of supp (5 ordered cat display)