### This script was written by Dr. Heather Merk and accompanies the "Introduction to R Statistical Software: Application to Plant Breeding" webinar presented by Dr. Merk in September 2011.
## Read in a .csv dataset
# For PC
OHColor = read.csv("C:/Users/merk.9/Desktop/Documents/2011 Webinar Series/2009OHColorSample.csv", header=T)
# For Mac
OHColor = read.csv("/Users/heathermerk/Documents/eXtension/2011 Webinar Series/2009OHColorSample.csv", header=T)
## Read in a .txt dataset
# For PC
OHColor = read.txt("C:/Users/merk.9/Desktop/Documents/2011 Webinar Series/2009OHColorSample.txt", header=T)
# For Mac
OHColor = read.txt("/Users/heathermerk/Documents/eXtension/2011 Webinar Series/2009OHColorSample.txt", header=T)
## Check that data was imported successfully
# For small data sets
OHColor
# Alternative for small data sets
print(OHColor)
# Alternative for small data sets
summary(OHColor)
# For large data sets
# To see the first rows of data including header
head(OHColor)
# To see the last rows of data including tail
tail(OHColor)
# To see the structure of the data set including the number of observations, the number of variables, the variable names, the number of levels of each variable
str(OHColor)
## Create a histogram for a numeric variable
hist(OHColor$Param1)
## Create a histogram for a numeric variable with custom axes, color
hist(OHColor$Avggreen, prob=T, xlab="Average Green", ylab="Number of Fruit", main = "Average Green Histogram for OHIO 2009 Processing", col=3);
lines(density(OHColor$Avggreen, na.rm=T, bw=2))
## Test for differences in average green between lines
# Create a linear model
fit1 = lm(formula=OHColor$Avggreen~as.factor(OHColor$Line))
anova(fit1)
summary(fit1)
## Simplifying the linear model
# Rename variables for ease of use
LINE = as.factor(OHColor$Line)
AVGGREEN = OHColor$Avggreen
# Simplified model
fit1a = lm(AVGGREEN~LINE)
anova(fit1a)
## Check assumptions for ANOVA - plots
plot(fit1a)
## Calculate mean for a numeric variable and ignore missing data
mean(AVGGREEN, na.rm=T)
## Calculated standard deviation for a numeric variable and ignore missing data
sd(AVGGREEN, na.rm=T)
## Calculate mean by rep for a numeric variable
tapply(AVGGREEN, na.rm=T, as.factor(OHColor$Rep), mean)
## Does a line have a higher average green value than the overall mean?
# t-test requires that we have numeric vectors
# AVGGREEN is already a numeric vector
# create numeric vector with all average green values for a line
sct0006 <- OHColor[OHColor$Line=="SCT_0006", "Avggreen"]
# check vector
sct0006
# t-test
t.test(sct0006, AVGGREEN, alternative="greater", var.equal=T)
## Create a boxplot to look at average green by line
boxplot(AVGGREEN~LINE)
## Working with multi year data
# Import Dataset with Ohio color data from 2010
# For PC
OHColor2010 = read.csv("C:/Users/merk.9/Desktop/Documents/2011 Webinar Series/2010OHColorSample.csv", header=T)
# For Mac
OHColor2010 = read.csv("/Users/heathermerk/Documents/eXtension/2011 Webinar Series/2010OHColorSample.csv", header=T)
# Check that data was imported correctly
str(OHColor2010)
## Combine data from both years
CombinedColor = rbind(OHColor, OHColor2010)
head(CombinedColor)
tail(CombinedColor)
str(Combined Color)
# Rename variables so that rep and year are factors and for ease of use
LINE = as.factor(CombinedColor$Line)
REP = as.factor(CombinedColor$Rep)
YEAR = as.factor(CombinedColor$Year)
AVGGREEN = as.numeric(CombinedColor$Avggreen)
# Create and test model
fit2 =lm(AVGGREEN~ LINE + YEAR + REP%in%YEAR + LINE:YEAR)
anova(fit2)
## Estimating variance components
# This requires the lme4 package. The first time you want to use this package, you need to install it using the GUI interface
# Load the lme4 package
library(lme4)
# Create model
fit3 = lmer(AVGGREEN~(1|LINE) + (1|YEAR) + (1|REP%in%YEAR) + (1|LINE:YEAR))
summary(fit3)
## Exporting a table
# For PC
write.table(CombinedColor, col.names=NA, "C:/Users/merk.9/Desktop/Documents/2011 Webinar Series/CombinedColorData.txt")
# For Mac
write.table(CombinedColor, col.names=NA, "/Users/heathermerk/Documents/eXtension/2011 Webinar Series/CombinedColorData.txt")
## for loop
for (x in c(1:10)) print (sqrt(x))
for (x in c(1:10))
{
print(sqrt(x))
}
## while loop - Fibonacci series where each number is the sum of the previous two numbers (0,1,1,2,3...).
a <- 0
b <- 1
print(a)
while (b<50)
{
print(b)
temp<-a+b
a<- b
b<- temp
}