Bioconductor

Affymetrix example: estrogen

library(affy)
library(estrogen)

datadir <- system.file("extdata", package="estrogen")
dir(datadir)
setwd(datadir)

pd <- read.phenoData("estrogen.txt", header=T, row.names=1)
pData(pd)

raw <- ReadAffy(filenames=rownames(pData(pd)), phenoData=pd, verbose=T)
norm <- justRMA(filenames=rownames(pData(pd)))

### CEL file image
image(raw[,1])
image(ReadAffy("bad.cel"))

### Histogram
hist(intensity(raw[,4]), breaks=100, col="blue")
hist(log2(intensity(raw[,4])), breaks=100, col="blue")

### Boxplot
boxplot(raw, col="red")
boxplot(data.frame(exprs(norm)), col="blue")

### Scatterplot
plot(exprs(raw)[,1:2], log="xy", pch=".", main="all")
plot(pm(raw)[,1:2], log="xy", pch=".", main="pm")
plot(mm(raw)[,1:2], log="xy", pch=".", main="pm")
plot(exprs(norm)[,1:2], pch=".", main="normalized")

### Heatmap
library(genefilter)
rsd <- rowSds(exprs(norm)) # rsd <- apply(exprs(norm),1,sd)
sel <- order(rsd, decreasing=T)[1:50]
heatmap(exprs(norm)[sel,], col=gentlecol(256))

### ANOVA
lm.coef <- function(y) lm(y~estrogen*time.h, data=pData(pd))$coefficients
eff <- esApply(norm, 1, lm.coef)
affyids <- colnames(eff)

lowest <- sort(eff[2,], decreasing=F)[1:3]
mget(names(lowest), hgu95av2GENENAME)
highest <- sort(eff[2,], decreasing=T)[1:3]
mget(names(highest), hgu95av2GENENAME)

### MA plot
MAplot(raw, pairs=T)

### Probeset
gn <- geneNames(raw)
ps <- probeset(raw, gn[1:2])
ps[[1]]
cbind(pm(ps[[1]])[,1],mm(ps[[1]])[,1])

SNPassoc

library(SNPassoc)
data(SNPs)

### Bonferroni.sig
datSNP <- setupSNP(SNPs, 6:40, sep="")
ans <- WGassociation(protein~1, data=datSNP, model="all")
Bonferroni.sig(ans, model="codominant", alpha=0.05, include.all.SNPs=F)

### SNPs.info.pos
data(SNPs.info.pos)
SNPs.info.pos

### Table.N.Per
Table.N.Per(SNPs$snp10001, SNPs$casco)
Table.N.Per(SNPs$snp10001, SNPs$casco, SNPs$sex=="Male")
Table.N.Per(dominant(snp(SNPs$snp10001,sep="")), SNPs$casco)

### Table.mean.se
Table.mean.se(SNPs$snp10001, SNPs$protein)
Table.mean.se(SNPs$snp10001, SNPs$protein, SNPs$sex=="Male")
Table.mean.se(dominant(snp(SNPs$snp10001,sep="")), SNPs$protein, SNPs$sex=="Male")
#tapply(SNPs$protein, SNPs$snp10001, mean)

### model = c("codominant", "dominant", "recessive", "overdominant", "log-additive", "all")
ansCoAd <- WGassociation(protein~1, data=datSNP, model=c("codominant","log-add"))
plot(ansCoAd)
summary(ansCoAd)

phenoData & exprSet

library(Biobase)

fake.data <-matrix(rnorm(8*200), ncol=8)

sample.info<-data.frame(spl=paste('A',1:8,sep=''), stat=rep(c('healthy','cancer'),c(4,4)))

pheno<-new("phenoData", pData=sample.info, varLabels=list('Sample Name', 'Cancer Status'))

my.experiments <- new("exprSet", exprs=fake.data, phenoData=pheno)

CDF data packages, probe-specific access

library(affy)
library(affydata)

data(Dilution)
annotation(Dilution)
[1] "hgu95av2"
data(hgu95av2cdf)

# get gene names from a CDF file
gnames <- ls(env=hgu95av2cdf)

# get a data with the 1st gene name
get(gnames[1], env=hgu95av2cdf)

# Read a CEL file
celegans <- make.cdf.env("celegans.cdf")
gnames <- ls(env=celegans)

Cluster Analysis of Genomic Data

library(cluster)
data(agriculture)

### pam() - Partitioning Around Medoids
part <- pam(agriculture, k=2)
plot(part, which.plots=1, labels=3, col.clus=3, lwd=2, main="PAM")

### diana() - Divisive Hierarchical Algorithm
hier <- diana(agriculture)
plot(hier, which.plots=2, lwd=2, main="DIANA")

### agnes() - Agglomerative Hierarchical Algorithm

### Visualizing clustering result
heatmap(as.matrix(t(agriculture)), Rowv=NA, labRow=c("GNP", "% in Agriculture"), cexRow=1,xlab="Country")

Marray

library(marray)

datadir <- system.file("swirldata", package="marray")
dir(datadir)
swirlTargets <- read.marrayInfo(file.path(datadir, "SwirlSample.txt"))

# To read Spot files (.spot)
mraw <- read.Spot(targets=swirlTargets, path=datadir)

# To read GenePix files (.gpr)
data <- read.GenePix(targets=swirlTargets)

galinfo <- read.Galfile("fish.gal", path=datadir)
mraw@maLayout <- galinfo$layout
mraw@maGnames <- galinfo$gnames

# Array quality assessment
library(arrayQuality)
maQualityPlots(mraw)
image(mraw[,1])
boxplot(mraw)
plot(mraw[,2])

# Normalization
normdata <- maNorm(mraw)
summary(normdata)

# Write normalized data
write.marray(normdata)

# data analysis with limma
LMres <- lmFit(normdata, design=c(1,-1,-1,1), weights=NULL)
LMres <- eBayes(LMres)
restable <- toptable(LMres, number=50, genelist=maGeneTable(normdata), resort.by="M")

# output
table2html(restable, disp="file")

Limma

library(limma)

setwd("d:/agti/BioCat/swirl")

targets <- readTargets("SwirlSample.txt")

# source <- c( "genepix", "genepix.median", "imagene", "quantarray", "smd.old", "smd", "spot", "spot.close.open")

RG <- read.maimages(targets$FileName, source="spot")

RG$genes <- readGAL("fish.gal")

spottypes <- readSpotTypes("SpotTypes.txt") # information spot types

RG$genes$Status <- controlStatus(spottypes, RG)

RG$printer <- getLayout(RG$genes)

# Normalize Within
MAw <- normalizeWithinArrays(RG) # bacground correction method = "subtract"

# method <- c("none", "subtract", "half", "minimum", "movingmin", "edwards", "normexp")
RGb <- backgroundCorrect(RG, method="subtract")
MAw <- normalizeWithinArrays(RGb) # bacground correction method = "subtract"

# Normalize Between
MAwb <- normalizeBetweenArrays(MAw)

### Unnormalized Data
plotMA(RG[,2]) # Color-Coded M A Plot (for one slide) for the unnormalized data
plotPrintTipLoess(RG[,2]) # Print-Tip Group M A Plot (for one slide)
imageplot(log2(RG$R[,1]), RG$printer, low="white", high="red") # image array plot
boxplot(RG$R~col(RG$R))

### Normalized Data
plotMA(MAw[,2])
plotPrintTipLoess(MAw)
imageplot(MAw$M[,1], RG$printer, zlim=c(-3,3))
boxplot(MAw$M~col(MAw$M), names=colnames(MAw$M))
boxplot(MAwb$M~col(MAwb$M), names=colnames(MAwb$M))

### Linear Model
design <- c(-1, 1, -1, 1)
fit <- lmFit(MAwb, design)

# ordinary.t <- fit$coef / fit$stdev.unscaled / fit$sigma
fiteb <- eBayes(fit)

options(digits=3)

### Number of genes in table
ngene = c(10,30,50,100,nrow(fit2))
mgene = ngene[menu(ngene, graphics=F, title="Number of genes in table?")]

### Sort by
sortby = c("M","A","t","P","B")
msortby = sortby[menu(sortby, graphics=F, title="Sort by?")]

### Adjust method
adjust = c("bonferroni","holm","hochberg","hommel","fdr","none")
madjust = adjust[menu(adjust, graphics=F, title="Adjust method?")]
topTable(fiteb, coef=1, number=mgene, sort.by=msortby, adjust=madjust)

# MA Plot (using fitted M values)
plotMA(fiteb)
ord <- order(fiteb$lods, decreasing=T)
top <- ord[1:mgene]
text(fiteb$Amean[top], fiteb$coef[top], labels=fiteb$genes[top,"Name"], cex=0.8, col="blue")

# Log Odds Plot
volcanoplot(fiteb, highlight=8, names=fiteb$genes$Name)

# Quantile-Quantile t statistic plot
qqt(fiteb$t, df=fiteb$df.prior+fiteb$df.residual, pch=16, cex=0.2)
abline(0,1)