library(affy)
library(estrogen)
datadir <- system.file("extdata", package="estrogen")
dir(datadir)
setwd(datadir)
pd <- read.phenoData("estrogen.txt", header=T, row.names=1)
pData(pd)
raw <- ReadAffy(filenames=rownames(pData(pd)), phenoData=pd, verbose=T)
norm <- justRMA(filenames=rownames(pData(pd)))
### CEL file image
image(raw[,1])
image(ReadAffy("bad.cel"))
### Histogram
hist(intensity(raw[,4]), breaks=100, col="blue")
hist(log2(intensity(raw[,4])), breaks=100, col="blue")
### Boxplot
boxplot(raw, col="red")
boxplot(data.frame(exprs(norm)), col="blue")
### Scatterplot
plot(exprs(raw)[,1:2], log="xy", pch=".", main="all")
plot(pm(raw)[,1:2], log="xy", pch=".", main="pm")
plot(mm(raw)[,1:2], log="xy", pch=".", main="pm")
plot(exprs(norm)[,1:2], pch=".", main="normalized")
### Heatmap
library(genefilter)
rsd <- rowSds(exprs(norm)) # rsd <- apply(exprs(norm),1,sd)
sel <- order(rsd, decreasing=T)[1:50]
heatmap(exprs(norm)[sel,], col=gentlecol(256))
### ANOVA
lm.coef <- function(y) lm(y~estrogen*time.h, data=pData(pd))$coefficients
eff <- esApply(norm, 1, lm.coef)
affyids <- colnames(eff)
lowest <- sort(eff[2,], decreasing=F)[1:3]
mget(names(lowest), hgu95av2GENENAME)
highest <- sort(eff[2,], decreasing=T)[1:3]
mget(names(highest), hgu95av2GENENAME)
### MA plot
MAplot(raw, pairs=T)
### Probeset
gn <- geneNames(raw)
ps <- probeset(raw, gn[1:2])
ps[[1]]
cbind(pm(ps[[1]])[,1],mm(ps[[1]])[,1])
Sunday, October 28, 2007
SNPassoc
library(SNPassoc)
data(SNPs)
### Bonferroni.sig
datSNP <- setupSNP(SNPs, 6:40, sep="")
ans <- WGassociation(protein~1, data=datSNP, model="all")
Bonferroni.sig(ans, model="codominant", alpha=0.05, include.all.SNPs=F)
### SNPs.info.pos
data(SNPs.info.pos)
SNPs.info.pos
### Table.N.Per
Table.N.Per(SNPs$snp10001, SNPs$casco)
Table.N.Per(SNPs$snp10001, SNPs$casco, SNPs$sex=="Male")
Table.N.Per(dominant(snp(SNPs$snp10001,sep="")), SNPs$casco)
### Table.mean.se
Table.mean.se(SNPs$snp10001, SNPs$protein)
Table.mean.se(SNPs$snp10001, SNPs$protein, SNPs$sex=="Male")
Table.mean.se(dominant(snp(SNPs$snp10001,sep="")), SNPs$protein, SNPs$sex=="Male")
#tapply(SNPs$protein, SNPs$snp10001, mean)
### model = c("codominant", "dominant", "recessive", "overdominant", "log-additive", "all")
ansCoAd <- WGassociation(protein~1, data=datSNP, model=c("codominant","log-add"))
plot(ansCoAd)
summary(ansCoAd)
data(SNPs)
### Bonferroni.sig
datSNP <- setupSNP(SNPs, 6:40, sep="")
ans <- WGassociation(protein~1, data=datSNP, model="all")
Bonferroni.sig(ans, model="codominant", alpha=0.05, include.all.SNPs=F)
### SNPs.info.pos
data(SNPs.info.pos)
SNPs.info.pos
### Table.N.Per
Table.N.Per(SNPs$snp10001, SNPs$casco)
Table.N.Per(SNPs$snp10001, SNPs$casco, SNPs$sex=="Male")
Table.N.Per(dominant(snp(SNPs$snp10001,sep="")), SNPs$casco)
### Table.mean.se
Table.mean.se(SNPs$snp10001, SNPs$protein)
Table.mean.se(SNPs$snp10001, SNPs$protein, SNPs$sex=="Male")
Table.mean.se(dominant(snp(SNPs$snp10001,sep="")), SNPs$protein, SNPs$sex=="Male")
#tapply(SNPs$protein, SNPs$snp10001, mean)
### model = c("codominant", "dominant", "recessive", "overdominant", "log-additive", "all")
ansCoAd <- WGassociation(protein~1, data=datSNP, model=c("codominant","log-add"))
plot(ansCoAd)
summary(ansCoAd)
phenoData & exprSet
library(Biobase)
fake.data <-matrix(rnorm(8*200), ncol=8)
sample.info<-data.frame(spl=paste('A',1:8,sep=''), stat=rep(c('healthy','cancer'),c(4,4)))
pheno<-new("phenoData", pData=sample.info, varLabels=list('Sample Name', 'Cancer Status'))
my.experiments <- new("exprSet", exprs=fake.data, phenoData=pheno)
fake.data <-matrix(rnorm(8*200), ncol=8)
sample.info<-data.frame(spl=paste('A',1:8,sep=''), stat=rep(c('healthy','cancer'),c(4,4)))
pheno<-new("phenoData", pData=sample.info, varLabels=list('Sample Name', 'Cancer Status'))
my.experiments <- new("exprSet", exprs=fake.data, phenoData=pheno)
CDF data packages, probe-specific access
library(affy)
library(affydata)
data(Dilution)
annotation(Dilution)
[1] "hgu95av2"
data(hgu95av2cdf)
# get gene names from a CDF file
gnames <- ls(env=hgu95av2cdf)
# get a data with the 1st gene name
get(gnames[1], env=hgu95av2cdf)
# Read a CEL file
celegans <- make.cdf.env("celegans.cdf")
gnames <- ls(env=celegans)
library(affydata)
data(Dilution)
annotation(Dilution)
[1] "hgu95av2"
data(hgu95av2cdf)
# get gene names from a CDF file
gnames <- ls(env=hgu95av2cdf)
# get a data with the 1st gene name
get(gnames[1], env=hgu95av2cdf)
# Read a CEL file
celegans <- make.cdf.env("celegans.cdf")
gnames <- ls(env=celegans)
Cluster Analysis of Genomic Data
library(cluster)
data(agriculture)
### pam() - Partitioning Around Medoids
part <- pam(agriculture, k=2)
plot(part, which.plots=1, labels=3, col.clus=3, lwd=2, main="PAM")
### diana() - Divisive Hierarchical Algorithm
hier <- diana(agriculture)
plot(hier, which.plots=2, lwd=2, main="DIANA")
### agnes() - Agglomerative Hierarchical Algorithm
### Visualizing clustering result
heatmap(as.matrix(t(agriculture)), Rowv=NA, labRow=c("GNP", "% in Agriculture"), cexRow=1,xlab="Country")
data(agriculture)
### pam() - Partitioning Around Medoids
part <- pam(agriculture, k=2)
plot(part, which.plots=1, labels=3, col.clus=3, lwd=2, main="PAM")
### diana() - Divisive Hierarchical Algorithm
hier <- diana(agriculture)
plot(hier, which.plots=2, lwd=2, main="DIANA")
### agnes() - Agglomerative Hierarchical Algorithm
### Visualizing clustering result
heatmap(as.matrix(t(agriculture)), Rowv=NA, labRow=c("GNP", "% in Agriculture"), cexRow=1,xlab="Country")
Marray
library(marray)
datadir <- system.file("swirldata", package="marray")
dir(datadir)
swirlTargets <- read.marrayInfo(file.path(datadir, "SwirlSample.txt"))
# To read Spot files (.spot)
mraw <- read.Spot(targets=swirlTargets, path=datadir)
# To read GenePix files (.gpr)
data <- read.GenePix(targets=swirlTargets)
galinfo <- read.Galfile("fish.gal", path=datadir)
mraw@maLayout <- galinfo$layout
mraw@maGnames <- galinfo$gnames
# Array quality assessment
library(arrayQuality)
maQualityPlots(mraw)
image(mraw[,1])
boxplot(mraw)
plot(mraw[,2])
# Normalization
normdata <- maNorm(mraw)
summary(normdata)
# Write normalized data
write.marray(normdata)
# data analysis with limma
LMres <- lmFit(normdata, design=c(1,-1,-1,1), weights=NULL)
LMres <- eBayes(LMres)
restable <- toptable(LMres, number=50, genelist=maGeneTable(normdata), resort.by="M")
# output
table2html(restable, disp="file")
datadir <- system.file("swirldata", package="marray")
dir(datadir)
swirlTargets <- read.marrayInfo(file.path(datadir, "SwirlSample.txt"))
# To read Spot files (.spot)
mraw <- read.Spot(targets=swirlTargets, path=datadir)
# To read GenePix files (.gpr)
data <- read.GenePix(targets=swirlTargets)
galinfo <- read.Galfile("fish.gal", path=datadir)
mraw@maLayout <- galinfo$layout
mraw@maGnames <- galinfo$gnames
# Array quality assessment
library(arrayQuality)
maQualityPlots(mraw)
image(mraw[,1])
boxplot(mraw)
plot(mraw[,2])
# Normalization
normdata <- maNorm(mraw)
summary(normdata)
# Write normalized data
write.marray(normdata)
# data analysis with limma
LMres <- lmFit(normdata, design=c(1,-1,-1,1), weights=NULL)
LMres <- eBayes(LMres)
restable <- toptable(LMres, number=50, genelist=maGeneTable(normdata), resort.by="M")
# output
table2html(restable, disp="file")
Limma
library(limma)
setwd("d:/agti/BioCat/swirl")
targets <- readTargets("SwirlSample.txt")
# source <- c( "genepix", "genepix.median", "imagene", "quantarray", "smd.old", "smd", "spot", "spot.close.open")
RG <- read.maimages(targets$FileName, source="spot")
RG$genes <- readGAL("fish.gal")
spottypes <- readSpotTypes("SpotTypes.txt") # information spot types
RG$genes$Status <- controlStatus(spottypes, RG)
RG$printer <- getLayout(RG$genes)
# Normalize Within
MAw <- normalizeWithinArrays(RG) # bacground correction method = "subtract"
# method <- c("none", "subtract", "half", "minimum", "movingmin", "edwards", "normexp")
RGb <- backgroundCorrect(RG, method="subtract")
MAw <- normalizeWithinArrays(RGb) # bacground correction method = "subtract"
# Normalize Between
MAwb <- normalizeBetweenArrays(MAw)
### Unnormalized Data
plotMA(RG[,2]) # Color-Coded M A Plot (for one slide) for the unnormalized data
plotPrintTipLoess(RG[,2]) # Print-Tip Group M A Plot (for one slide)
imageplot(log2(RG$R[,1]), RG$printer, low="white", high="red") # image array plot
boxplot(RG$R~col(RG$R))
### Normalized Data
plotMA(MAw[,2])
plotPrintTipLoess(MAw)
imageplot(MAw$M[,1], RG$printer, zlim=c(-3,3))
boxplot(MAw$M~col(MAw$M), names=colnames(MAw$M))
boxplot(MAwb$M~col(MAwb$M), names=colnames(MAwb$M))
### Linear Model
design <- c(-1, 1, -1, 1)
fit <- lmFit(MAwb, design)
# ordinary.t <- fit$coef / fit$stdev.unscaled / fit$sigma
fiteb <- eBayes(fit)
options(digits=3)
### Number of genes in table
ngene = c(10,30,50,100,nrow(fit2))
mgene = ngene[menu(ngene, graphics=F, title="Number of genes in table?")]
### Sort by
sortby = c("M","A","t","P","B")
msortby = sortby[menu(sortby, graphics=F, title="Sort by?")]
### Adjust method
adjust = c("bonferroni","holm","hochberg","hommel","fdr","none")
madjust = adjust[menu(adjust, graphics=F, title="Adjust method?")]
topTable(fiteb, coef=1, number=mgene, sort.by=msortby, adjust=madjust)
# MA Plot (using fitted M values)
plotMA(fiteb)
ord <- order(fiteb$lods, decreasing=T)
top <- ord[1:mgene]
text(fiteb$Amean[top], fiteb$coef[top], labels=fiteb$genes[top,"Name"], cex=0.8, col="blue")
# Log Odds Plot
volcanoplot(fiteb, highlight=8, names=fiteb$genes$Name)
# Quantile-Quantile t statistic plot
qqt(fiteb$t, df=fiteb$df.prior+fiteb$df.residual, pch=16, cex=0.2)
abline(0,1)
setwd("d:/agti/BioCat/swirl")
targets <- readTargets("SwirlSample.txt")
# source <- c( "genepix", "genepix.median", "imagene", "quantarray", "smd.old", "smd", "spot", "spot.close.open")
RG <- read.maimages(targets$FileName, source="spot")
RG$genes <- readGAL("fish.gal")
spottypes <- readSpotTypes("SpotTypes.txt") # information spot types
RG$genes$Status <- controlStatus(spottypes, RG)
RG$printer <- getLayout(RG$genes)
# Normalize Within
MAw <- normalizeWithinArrays(RG) # bacground correction method = "subtract"
# method <- c("none", "subtract", "half", "minimum", "movingmin", "edwards", "normexp")
RGb <- backgroundCorrect(RG, method="subtract")
MAw <- normalizeWithinArrays(RGb) # bacground correction method = "subtract"
# Normalize Between
MAwb <- normalizeBetweenArrays(MAw)
### Unnormalized Data
plotMA(RG[,2]) # Color-Coded M A Plot (for one slide) for the unnormalized data
plotPrintTipLoess(RG[,2]) # Print-Tip Group M A Plot (for one slide)
imageplot(log2(RG$R[,1]), RG$printer, low="white", high="red") # image array plot
boxplot(RG$R~col(RG$R))
### Normalized Data
plotMA(MAw[,2])
plotPrintTipLoess(MAw)
imageplot(MAw$M[,1], RG$printer, zlim=c(-3,3))
boxplot(MAw$M~col(MAw$M), names=colnames(MAw$M))
boxplot(MAwb$M~col(MAwb$M), names=colnames(MAwb$M))
### Linear Model
design <- c(-1, 1, -1, 1)
fit <- lmFit(MAwb, design)
# ordinary.t <- fit$coef / fit$stdev.unscaled / fit$sigma
fiteb <- eBayes(fit)
options(digits=3)
### Number of genes in table
ngene = c(10,30,50,100,nrow(fit2))
mgene = ngene[menu(ngene, graphics=F, title="Number of genes in table?")]
### Sort by
sortby = c("M","A","t","P","B")
msortby = sortby[menu(sortby, graphics=F, title="Sort by?")]
### Adjust method
adjust = c("bonferroni","holm","hochberg","hommel","fdr","none")
madjust = adjust[menu(adjust, graphics=F, title="Adjust method?")]
topTable(fiteb, coef=1, number=mgene, sort.by=msortby, adjust=madjust)
# MA Plot (using fitted M values)
plotMA(fiteb)
ord <- order(fiteb$lods, decreasing=T)
top <- ord[1:mgene]
text(fiteb$Amean[top], fiteb$coef[top], labels=fiteb$genes[top,"Name"], cex=0.8, col="blue")
# Log Odds Plot
volcanoplot(fiteb, highlight=8, names=fiteb$genes$Name)
# Quantile-Quantile t statistic plot
qqt(fiteb$t, df=fiteb$df.prior+fiteb$df.residual, pch=16, cex=0.2)
abline(0,1)
Subscribe to:
Comments (Atom)
