###################################################### #the following packages are necessary for specific robust analyses #they must be first installed # for the robust library, use Packages > Install package(s) # for the WRS library, go to https://r-forge.r-project.org/R/?group_id=468 to download it # or type: install.packages("WRS", repos="http://R-Forge.R-project.org") library(robust) # necessary for M-estimation, robust regression and ANOVA library(WRS) # necessary only for Winsorized mean and variance ## For reproduceable research, we present here all commands that led ## to the figures of the tutorial. ## As users, the following steps are NOT useful: (3), (8), (9), (10), (14). ###################################################### ## section 1: Central tendency and dispersion #(1) reading the data on cigarette dependence and time to first cigarette cig = read.csv("cigArt.csv", header=T) #(2) creates figure 1 windows() boxplot(cig$ttf) #(3) creates figure 2 windows() par(mar=c(5, 10, 4, 4)+.1) x = c(2, 3, 4, 4, 5, 5, 6, 6, 6, 20) # simulated data with one clear outlier plot(2:20, c(rep(c(0, 8), 9), 3), type="n", xlab="", ylab="", yaxt="n") axis(2, at=c(7, 5.5, 4, 2.5, 1),c("Mean","Median","20% trimmed\n mean","20% Winsorized\n mean","M-estimator"), las=1, cex.axis=1.4) text(c(2, 3, 3.9, 4.1, 4.9, 5.1, 5.8, 6, 6.2, 20), y=-0, "|", cex=2) lines(c(2, 2, 20, 20), c(6, 7, 7, 6)+.5, lwd=2) #mean polygon(x=c(mean(x)-.1, mean(x), mean(x)+.1), y=c(7.3, 7, 7.3)+.5, col="black") lines(c(2, 4.8, 4.8, 5.2, 20), c(3, 3, 4, 4, 3)+2, type="S", lwd=2) #median polygon(x=c(median(x)-.1, median(x), median(x)+.1), y=c(4.3, 4, 4.3)+2, col="black") lines(c(2, 4, 4, 6, 20), c(0, 0, 1, 1, 0)+3.5, type="S", lwd=2) #20% trimmed mean polygon(x=c(mean(x, tr=.2)-.1, mean(x, tr=.2), mean(x, tr=.2)+.1), y=c(1.3, 1, 1.3)+3.5, col="black") lines(c(2, seq(2, 4, .1), seq(6, 20, .1), 20), c(0, 1/(5-seq(2, 4, .1)), 1/abs(5-seq(6, 20, .1)), 0)+2, lwd=2) #20% winsorized mean polygon(x=c(mean(x, tr=.2)-.1, mean(x, tr=.2), mean(x, tr=.2)+.1), y=c(1.3, 1, 1.3)+2, col="black") lines(c(2, seq(2, 2*4.594, .1), 20), c(0, ((((seq(2, 2*4.594, .1)-4.594)/4.594)^2)-1)^2, 0)+.5, lwd=2) #M-estimator of central tendency polygon(x=c(lmrob(x~1)$coeff-.1, lmrob(x~1)$coeff, lmrob(x~1)$coeff+.1), y=c(1.3, 1, 1.3)+.5, col="black") #(4) estimation of central tendency and dispersion of time to first cigarette ttfNoNA = na.omit(cig$ttf) # Winsorized mean does not deal correctly with missing values # so it is necessary to take out all missing values mean(ttfNoNA) # mean median(ttfNoNA) # median mean(ttfNoNA, trim=.2) # trimmed mean win(ttfNoNA) # Winsorized mean summary(lmRob(cig$ttf~1, data=cig)) # M-estimator of central tendency = intercept sd(ttfNoNA) # standard deviation IQR(ttfNoNA) # IQR mad(ttfNoNA) # MAD ttfNoNA20 = ttfNoNA[ttfNoNA>quantile(ttfNoNA, prob=.2) & ttfNoNA