Sunday, October 28, 2007

Affy

### Introduction
probe: oligonucleotides of 25 base pair length used to probe RNA targets
perfect match: probes intended to match perfectly the target sequence
mismatch: the probes having one base mismatch with the target sequence intended to account for non-specific binding
probe pair: a unit composed of a perfect match and its mismatch
affyID: an identification for a probe set
probe pair set: PMs and MMs related to a common affyID
CEL file contain measured intended intensities and locations for an array that has been hybridized
CDF file contain the information relating probe pair sets to locations on the array

### Quick Start
> library(affy)
> input <- c("day_7a_amp.CEL","day_7b_amp.CEL","day_7c_amp.CEL", "day_7a_HIV_amp.CEL", "day_7b_HIV_amp.CEL","day_7c_HIV_amp.CEL")
> data <- ReadAffy(filenames=input)
> hiv <- rma(data) # the following will do the 2 previous steps
> hiv <- justRMA(filenames=input) // read and normalize, very quickly > exprs2excel(hiv.e, file="hiv.csv")
> write.exprs(hiv.e, file="hiv.txt")

### Normalization with expresso
> hiv.raw <- ReadAffy(filenames=input) > bgcorrect.methods
[1] "mas" "none" "rma" "rma2"

# "rma" and "rma2" should be used with pmcorrect.method="pmonly"
> normalize.methods(affybatch.example)
[1] "constant" "contrasts" "invariantset" "loess"
[5] "qspline" "quantiles" "quantiles.robust"

> pmcorrect.methods
[1] "mas" "pmonly" "subtractmm"

> express.summary.stat.methods
[1] "avgdiff" "liwong" "mas" "medianpolish" "playerout"

# Expression value with "medianpolish" will be in log2 scale, so it should be used with pmcorrct.method="subtractmm", which can cause negative value

# expresso for rma
hiv <- expresso(hiv.raw, bgcorrect.method="rma", normalize.method="quantiles", pmcorrect.method="pmonly", summary.method="medianpolish")

# one more example
hiv <- expresso(hiv.raw, bgcorrect.method="mas", normalize.method="qspline", pmcorrect.method="subtractmm", summary.method="playerout")

### Save expressed values as a csv file
> write.csv(exprs(hiv), "hiv.csv")

### Read a saved expressed file
> hiv <- read.csv("hiv.csv", row.names=1)

### Quality Control
> MAplot(hiv.raw, pairs=T)
> hist(hiv.raw[,1:2])

> par(mfrow=c(3,2))
> image(hiv.raw)

> par(mfrow=c(1,1))
> boxplot(hiv.raw, col=1:6) // 6 colors

> phenoData(hiv.raw)

> pData(hiv.raw)

> pm(hiv.raw)
> mm(hiv.raw)
> probeNames(hiv.raw)
> geneNames(hiv.raw) // extracts the unique affyIDs
> sampleNames(hiv.raw)
> intensity(hiv.raw)

### Location to ProbeSet Mapping
> gn <- geneNames(hiv.raw)
> ps <- probeset(hiv.raw, gn[1:2])
> show(ps) // same as just typing ps
> pm(ps[[1]])

> index <- indexProbes(hiv.raw, which="pm", genenames=gn[1])[[1]]
// get indexs for the PM probes for the affyID "AB002365_at", which=c("pm", "mm", "both")

> intensity(hiv.raw)[index,]

No comments: