Tuesday, February 3, 2009

haplo.stats

library(haplo.stats)

data(hla.demo); attach(hla.demo)

geno<-hla.demo[,c(17,18,21:24)]
label<-c("DQB","DRB","B")

geno.desc<-summaryGeno(geno, miss.val=c(0,NA))
print(geno.desc[c(1:10,80:85,135:140),])

table(geno.desc[,3])
miss.all<-which(geno.desc[,3]==3)
hla.demo.updated<-hla.demo[-miss.all,]

save.em<-haplo.em(geno=geno,locus.label=label,miss.val=c(0,NA))

summary(save.em, nlines=7)

Wednesday, January 28, 2009

GenABEL

library(GenABEL)
data(srdta)

phdata<-srdta@phdata

library(genetics)
summary(datSNP$snp10001)
summary(datSNP$snp10002)
LD(datSNP$snp10001,datSNP$snp10002)
table(datSNP$snp10001,datSNP$snp10002)

### Structure of gwaa.data
# Phenotypic data
srdta@phdata
# number of SNPs
srdta@gtdata@nsnps
# IDs of SNPs
srdta@gtdata@snpnames
# Chromosome
srdta@gtdata@chromosome
# SNPs map positions
srdta@gtdata@map
# number of subjects
srdta@gtdata@nids
# IDs of subjects
srdta@gtdata@idnames
# gender
srdta@gtdata@male
# Genotypic data
srdta@gtdata@gtps

### exercise
# SNPs after 2,490,000 pb
srdta@gtdata@snpnames[srdta@gtdata@map>2490000]

### Sub-setting

# the first 5 subject and 3 SNPs
ssubs<-srdta[1:5,1:3]
srdta[c("p141","p147","p2000"),]@phdata
srdta[c("p141","p147","p2000"),c("rs10","rs29")]

# the first character is the consensus
as.character(ssubs@gtdata@coding)
# homozygotes for the first character is "0"
as.character(ssubs@gtdata@strand)

as.character(ssubs@gtdata)
as.numeric(ssubs@gtdata)

snps<-as.character(srdta@gtdata)

# convert data to the format for "genetics" library
as.genotype(ssubs@gtdata)

# convert data to the format for "haplo.stats" library
as.hsgeno(ssubs@gtdata)

# exercise 26
table(as.character(srdta[,"rs114"]))

### Exploring
summary(ssubs)

Sunday, October 28, 2007

Affymetrix example: estrogen

library(affy)
library(estrogen)

datadir <- system.file("extdata", package="estrogen")
dir(datadir)
setwd(datadir)

pd <- read.phenoData("estrogen.txt", header=T, row.names=1)
pData(pd)

raw <- ReadAffy(filenames=rownames(pData(pd)), phenoData=pd, verbose=T)
norm <- justRMA(filenames=rownames(pData(pd)))

### CEL file image
image(raw[,1])
image(ReadAffy("bad.cel"))

### Histogram
hist(intensity(raw[,4]), breaks=100, col="blue")
hist(log2(intensity(raw[,4])), breaks=100, col="blue")

### Boxplot
boxplot(raw, col="red")
boxplot(data.frame(exprs(norm)), col="blue")

### Scatterplot
plot(exprs(raw)[,1:2], log="xy", pch=".", main="all")
plot(pm(raw)[,1:2], log="xy", pch=".", main="pm")
plot(mm(raw)[,1:2], log="xy", pch=".", main="pm")
plot(exprs(norm)[,1:2], pch=".", main="normalized")

### Heatmap
library(genefilter)
rsd <- rowSds(exprs(norm)) # rsd <- apply(exprs(norm),1,sd)
sel <- order(rsd, decreasing=T)[1:50]
heatmap(exprs(norm)[sel,], col=gentlecol(256))

### ANOVA
lm.coef <- function(y) lm(y~estrogen*time.h, data=pData(pd))$coefficients
eff <- esApply(norm, 1, lm.coef)
affyids <- colnames(eff)

lowest <- sort(eff[2,], decreasing=F)[1:3]
mget(names(lowest), hgu95av2GENENAME)
highest <- sort(eff[2,], decreasing=T)[1:3]
mget(names(highest), hgu95av2GENENAME)

### MA plot
MAplot(raw, pairs=T)

### Probeset
gn <- geneNames(raw)
ps <- probeset(raw, gn[1:2])
ps[[1]]
cbind(pm(ps[[1]])[,1],mm(ps[[1]])[,1])

SNPassoc

library(SNPassoc)
data(SNPs)

### Bonferroni.sig
datSNP <- setupSNP(SNPs, 6:40, sep="")
ans <- WGassociation(protein~1, data=datSNP, model="all")
Bonferroni.sig(ans, model="codominant", alpha=0.05, include.all.SNPs=F)

### SNPs.info.pos
data(SNPs.info.pos)
SNPs.info.pos

### Table.N.Per
Table.N.Per(SNPs$snp10001, SNPs$casco)
Table.N.Per(SNPs$snp10001, SNPs$casco, SNPs$sex=="Male")
Table.N.Per(dominant(snp(SNPs$snp10001,sep="")), SNPs$casco)

### Table.mean.se
Table.mean.se(SNPs$snp10001, SNPs$protein)
Table.mean.se(SNPs$snp10001, SNPs$protein, SNPs$sex=="Male")
Table.mean.se(dominant(snp(SNPs$snp10001,sep="")), SNPs$protein, SNPs$sex=="Male")
#tapply(SNPs$protein, SNPs$snp10001, mean)

### model = c("codominant", "dominant", "recessive", "overdominant", "log-additive", "all")
ansCoAd <- WGassociation(protein~1, data=datSNP, model=c("codominant","log-add"))
plot(ansCoAd)
summary(ansCoAd)

phenoData & exprSet

library(Biobase)

fake.data <-matrix(rnorm(8*200), ncol=8)

sample.info<-data.frame(spl=paste('A',1:8,sep=''), stat=rep(c('healthy','cancer'),c(4,4)))

pheno<-new("phenoData", pData=sample.info, varLabels=list('Sample Name', 'Cancer Status'))

my.experiments <- new("exprSet", exprs=fake.data, phenoData=pheno)

CDF data packages, probe-specific access

library(affy)
library(affydata)

data(Dilution)
annotation(Dilution)
[1] "hgu95av2"
data(hgu95av2cdf)

# get gene names from a CDF file
gnames <- ls(env=hgu95av2cdf)

# get a data with the 1st gene name
get(gnames[1], env=hgu95av2cdf)

# Read a CEL file
celegans <- make.cdf.env("celegans.cdf")
gnames <- ls(env=celegans)

Cluster Analysis of Genomic Data

library(cluster)
data(agriculture)

### pam() - Partitioning Around Medoids
part <- pam(agriculture, k=2)
plot(part, which.plots=1, labels=3, col.clus=3, lwd=2, main="PAM")

### diana() - Divisive Hierarchical Algorithm
hier <- diana(agriculture)
plot(hier, which.plots=2, lwd=2, main="DIANA")

### agnes() - Agglomerative Hierarchical Algorithm

### Visualizing clustering result
heatmap(as.matrix(t(agriculture)), Rowv=NA, labRow=c("GNP", "% in Agriculture"), cexRow=1,xlab="Country")