library(haplo.stats)
data(hla.demo); attach(hla.demo)
geno<-hla.demo[,c(17,18,21:24)]
label<-c("DQB","DRB","B")
geno.desc<-summaryGeno(geno, miss.val=c(0,NA))
print(geno.desc[c(1:10,80:85,135:140),])
table(geno.desc[,3])
miss.all<-which(geno.desc[,3]==3)
hla.demo.updated<-hla.demo[-miss.all,]
save.em<-haplo.em(geno=geno,locus.label=label,miss.val=c(0,NA))
summary(save.em, nlines=7)
Tuesday, February 3, 2009
Wednesday, January 28, 2009
GenABEL
library(GenABEL)
data(srdta)
phdata<-srdta@phdata
library(genetics)
summary(datSNP$snp10001)
summary(datSNP$snp10002)
LD(datSNP$snp10001,datSNP$snp10002)
table(datSNP$snp10001,datSNP$snp10002)
### Structure of gwaa.data
# Phenotypic data
srdta@phdata
# number of SNPs
srdta@gtdata@nsnps
# IDs of SNPs
srdta@gtdata@snpnames
# Chromosome
srdta@gtdata@chromosome
# SNPs map positions
srdta@gtdata@map
# number of subjects
srdta@gtdata@nids
# IDs of subjects
srdta@gtdata@idnames
# gender
srdta@gtdata@male
# Genotypic data
srdta@gtdata@gtps
### exercise
# SNPs after 2,490,000 pb
srdta@gtdata@snpnames[srdta@gtdata@map>2490000]
### Sub-setting
# the first 5 subject and 3 SNPs
ssubs<-srdta[1:5,1:3]
srdta[c("p141","p147","p2000"),]@phdata
srdta[c("p141","p147","p2000"),c("rs10","rs29")]
# the first character is the consensus
as.character(ssubs@gtdata@coding)
# homozygotes for the first character is "0"
as.character(ssubs@gtdata@strand)
as.character(ssubs@gtdata)
as.numeric(ssubs@gtdata)
snps<-as.character(srdta@gtdata)
# convert data to the format for "genetics" library
as.genotype(ssubs@gtdata)
# convert data to the format for "haplo.stats" library
as.hsgeno(ssubs@gtdata)
# exercise 26
table(as.character(srdta[,"rs114"]))
### Exploring
summary(ssubs)
data(srdta)
phdata<-srdta@phdata
library(genetics)
summary(datSNP$snp10001)
summary(datSNP$snp10002)
LD(datSNP$snp10001,datSNP$snp10002)
table(datSNP$snp10001,datSNP$snp10002)
### Structure of gwaa.data
# Phenotypic data
srdta@phdata
# number of SNPs
srdta@gtdata@nsnps
# IDs of SNPs
srdta@gtdata@snpnames
# Chromosome
srdta@gtdata@chromosome
# SNPs map positions
srdta@gtdata@map
# number of subjects
srdta@gtdata@nids
# IDs of subjects
srdta@gtdata@idnames
# gender
srdta@gtdata@male
# Genotypic data
srdta@gtdata@gtps
### exercise
# SNPs after 2,490,000 pb
srdta@gtdata@snpnames[srdta@gtdata@map>2490000]
### Sub-setting
# the first 5 subject and 3 SNPs
ssubs<-srdta[1:5,1:3]
srdta[c("p141","p147","p2000"),]@phdata
srdta[c("p141","p147","p2000"),c("rs10","rs29")]
# the first character is the consensus
as.character(ssubs@gtdata@coding)
# homozygotes for the first character is "0"
as.character(ssubs@gtdata@strand)
as.character(ssubs@gtdata)
as.numeric(ssubs@gtdata)
snps<-as.character(srdta@gtdata)
# convert data to the format for "genetics" library
as.genotype(ssubs@gtdata)
# convert data to the format for "haplo.stats" library
as.hsgeno(ssubs@gtdata)
# exercise 26
table(as.character(srdta[,"rs114"]))
### Exploring
summary(ssubs)
Sunday, October 28, 2007
Affymetrix example: estrogen
library(affy)
library(estrogen)
datadir <- system.file("extdata", package="estrogen")
dir(datadir)
setwd(datadir)
pd <- read.phenoData("estrogen.txt", header=T, row.names=1)
pData(pd)
raw <- ReadAffy(filenames=rownames(pData(pd)), phenoData=pd, verbose=T)
norm <- justRMA(filenames=rownames(pData(pd)))
### CEL file image
image(raw[,1])
image(ReadAffy("bad.cel"))
### Histogram
hist(intensity(raw[,4]), breaks=100, col="blue")
hist(log2(intensity(raw[,4])), breaks=100, col="blue")
### Boxplot
boxplot(raw, col="red")
boxplot(data.frame(exprs(norm)), col="blue")
### Scatterplot
plot(exprs(raw)[,1:2], log="xy", pch=".", main="all")
plot(pm(raw)[,1:2], log="xy", pch=".", main="pm")
plot(mm(raw)[,1:2], log="xy", pch=".", main="pm")
plot(exprs(norm)[,1:2], pch=".", main="normalized")
### Heatmap
library(genefilter)
rsd <- rowSds(exprs(norm)) # rsd <- apply(exprs(norm),1,sd)
sel <- order(rsd, decreasing=T)[1:50]
heatmap(exprs(norm)[sel,], col=gentlecol(256))
### ANOVA
lm.coef <- function(y) lm(y~estrogen*time.h, data=pData(pd))$coefficients
eff <- esApply(norm, 1, lm.coef)
affyids <- colnames(eff)
lowest <- sort(eff[2,], decreasing=F)[1:3]
mget(names(lowest), hgu95av2GENENAME)
highest <- sort(eff[2,], decreasing=T)[1:3]
mget(names(highest), hgu95av2GENENAME)
### MA plot
MAplot(raw, pairs=T)
### Probeset
gn <- geneNames(raw)
ps <- probeset(raw, gn[1:2])
ps[[1]]
cbind(pm(ps[[1]])[,1],mm(ps[[1]])[,1])
library(estrogen)
datadir <- system.file("extdata", package="estrogen")
dir(datadir)
setwd(datadir)
pd <- read.phenoData("estrogen.txt", header=T, row.names=1)
pData(pd)
raw <- ReadAffy(filenames=rownames(pData(pd)), phenoData=pd, verbose=T)
norm <- justRMA(filenames=rownames(pData(pd)))
### CEL file image
image(raw[,1])
image(ReadAffy("bad.cel"))
### Histogram
hist(intensity(raw[,4]), breaks=100, col="blue")
hist(log2(intensity(raw[,4])), breaks=100, col="blue")
### Boxplot
boxplot(raw, col="red")
boxplot(data.frame(exprs(norm)), col="blue")
### Scatterplot
plot(exprs(raw)[,1:2], log="xy", pch=".", main="all")
plot(pm(raw)[,1:2], log="xy", pch=".", main="pm")
plot(mm(raw)[,1:2], log="xy", pch=".", main="pm")
plot(exprs(norm)[,1:2], pch=".", main="normalized")
### Heatmap
library(genefilter)
rsd <- rowSds(exprs(norm)) # rsd <- apply(exprs(norm),1,sd)
sel <- order(rsd, decreasing=T)[1:50]
heatmap(exprs(norm)[sel,], col=gentlecol(256))
### ANOVA
lm.coef <- function(y) lm(y~estrogen*time.h, data=pData(pd))$coefficients
eff <- esApply(norm, 1, lm.coef)
affyids <- colnames(eff)
lowest <- sort(eff[2,], decreasing=F)[1:3]
mget(names(lowest), hgu95av2GENENAME)
highest <- sort(eff[2,], decreasing=T)[1:3]
mget(names(highest), hgu95av2GENENAME)
### MA plot
MAplot(raw, pairs=T)
### Probeset
gn <- geneNames(raw)
ps <- probeset(raw, gn[1:2])
ps[[1]]
cbind(pm(ps[[1]])[,1],mm(ps[[1]])[,1])
SNPassoc
library(SNPassoc)
data(SNPs)
### Bonferroni.sig
datSNP <- setupSNP(SNPs, 6:40, sep="")
ans <- WGassociation(protein~1, data=datSNP, model="all")
Bonferroni.sig(ans, model="codominant", alpha=0.05, include.all.SNPs=F)
### SNPs.info.pos
data(SNPs.info.pos)
SNPs.info.pos
### Table.N.Per
Table.N.Per(SNPs$snp10001, SNPs$casco)
Table.N.Per(SNPs$snp10001, SNPs$casco, SNPs$sex=="Male")
Table.N.Per(dominant(snp(SNPs$snp10001,sep="")), SNPs$casco)
### Table.mean.se
Table.mean.se(SNPs$snp10001, SNPs$protein)
Table.mean.se(SNPs$snp10001, SNPs$protein, SNPs$sex=="Male")
Table.mean.se(dominant(snp(SNPs$snp10001,sep="")), SNPs$protein, SNPs$sex=="Male")
#tapply(SNPs$protein, SNPs$snp10001, mean)
### model = c("codominant", "dominant", "recessive", "overdominant", "log-additive", "all")
ansCoAd <- WGassociation(protein~1, data=datSNP, model=c("codominant","log-add"))
plot(ansCoAd)
summary(ansCoAd)
data(SNPs)
### Bonferroni.sig
datSNP <- setupSNP(SNPs, 6:40, sep="")
ans <- WGassociation(protein~1, data=datSNP, model="all")
Bonferroni.sig(ans, model="codominant", alpha=0.05, include.all.SNPs=F)
### SNPs.info.pos
data(SNPs.info.pos)
SNPs.info.pos
### Table.N.Per
Table.N.Per(SNPs$snp10001, SNPs$casco)
Table.N.Per(SNPs$snp10001, SNPs$casco, SNPs$sex=="Male")
Table.N.Per(dominant(snp(SNPs$snp10001,sep="")), SNPs$casco)
### Table.mean.se
Table.mean.se(SNPs$snp10001, SNPs$protein)
Table.mean.se(SNPs$snp10001, SNPs$protein, SNPs$sex=="Male")
Table.mean.se(dominant(snp(SNPs$snp10001,sep="")), SNPs$protein, SNPs$sex=="Male")
#tapply(SNPs$protein, SNPs$snp10001, mean)
### model = c("codominant", "dominant", "recessive", "overdominant", "log-additive", "all")
ansCoAd <- WGassociation(protein~1, data=datSNP, model=c("codominant","log-add"))
plot(ansCoAd)
summary(ansCoAd)
phenoData & exprSet
library(Biobase)
fake.data <-matrix(rnorm(8*200), ncol=8)
sample.info<-data.frame(spl=paste('A',1:8,sep=''), stat=rep(c('healthy','cancer'),c(4,4)))
pheno<-new("phenoData", pData=sample.info, varLabels=list('Sample Name', 'Cancer Status'))
my.experiments <- new("exprSet", exprs=fake.data, phenoData=pheno)
fake.data <-matrix(rnorm(8*200), ncol=8)
sample.info<-data.frame(spl=paste('A',1:8,sep=''), stat=rep(c('healthy','cancer'),c(4,4)))
pheno<-new("phenoData", pData=sample.info, varLabels=list('Sample Name', 'Cancer Status'))
my.experiments <- new("exprSet", exprs=fake.data, phenoData=pheno)
CDF data packages, probe-specific access
library(affy)
library(affydata)
data(Dilution)
annotation(Dilution)
[1] "hgu95av2"
data(hgu95av2cdf)
# get gene names from a CDF file
gnames <- ls(env=hgu95av2cdf)
# get a data with the 1st gene name
get(gnames[1], env=hgu95av2cdf)
# Read a CEL file
celegans <- make.cdf.env("celegans.cdf")
gnames <- ls(env=celegans)
library(affydata)
data(Dilution)
annotation(Dilution)
[1] "hgu95av2"
data(hgu95av2cdf)
# get gene names from a CDF file
gnames <- ls(env=hgu95av2cdf)
# get a data with the 1st gene name
get(gnames[1], env=hgu95av2cdf)
# Read a CEL file
celegans <- make.cdf.env("celegans.cdf")
gnames <- ls(env=celegans)
Cluster Analysis of Genomic Data
library(cluster)
data(agriculture)
### pam() - Partitioning Around Medoids
part <- pam(agriculture, k=2)
plot(part, which.plots=1, labels=3, col.clus=3, lwd=2, main="PAM")
### diana() - Divisive Hierarchical Algorithm
hier <- diana(agriculture)
plot(hier, which.plots=2, lwd=2, main="DIANA")
### agnes() - Agglomerative Hierarchical Algorithm
### Visualizing clustering result
heatmap(as.matrix(t(agriculture)), Rowv=NA, labRow=c("GNP", "% in Agriculture"), cexRow=1,xlab="Country")
data(agriculture)
### pam() - Partitioning Around Medoids
part <- pam(agriculture, k=2)
plot(part, which.plots=1, labels=3, col.clus=3, lwd=2, main="PAM")
### diana() - Divisive Hierarchical Algorithm
hier <- diana(agriculture)
plot(hier, which.plots=2, lwd=2, main="DIANA")
### agnes() - Agglomerative Hierarchical Algorithm
### Visualizing clustering result
heatmap(as.matrix(t(agriculture)), Rowv=NA, labRow=c("GNP", "% in Agriculture"), cexRow=1,xlab="Country")
Subscribe to:
Posts (Atom)