Main / WikiSandbox
        
Go-Statistics
| 2005 | 2008 | ||||||
| category | BP | MF | CC | BP | MF | CC | |
|---|---|---|---|---|---|---|---|
| go-ids | 8924 | 6929 | 1397 | 14659 | 8260 | 2064 | |
| obsoletes | 330 | 526 | 113 | 471 | 566 | 117 | |
| MetaCyc | 517 | 3460 | 0 | 680 | 3524 | 1 | |
| EC | 1 | 4306 | 0 | 1 | 4762 | 0 | |
| slim-generic | 51 | 41 | 36 | 53 | 42 | 36 | |
| slim-plant | 52 | 28 | 29 | 51 | 27 | 27 | |
| slim-yeast | 35 | 23 | 25 | 35 | 23 | 25 | |
exprdata=read.table("http://goblet.molgen.mpg.de/data/R/ALLs-data.tab",header=TRUE)
head(exprdata)
summary(exprdata)
cnames=colnames(exprdata)
all1=grep("ALL1",cnames)
e2a=grep("E2A",cnames)
exprdata=exprdata[,c(all1,e2a)]
head(exprdata)
cnames=colnames(exprdata)
all1=grep("ALL1",cnames)
e2a=grep("E2A",cnames)
cols=rep(4,length(cnames))
cols[e2a]=2
cols
pdf("exprdata.pdf")
boxplot(exprdata,col=cols)
par(mfrow=c(2,2),mai=c(0.5,0.5,0.5,0.5))
for (mdist in c("manhattan","euclidian")) {
  for (mclust in c("single","complete")) {
    plot(hclust(dist(t(exprdata),method=mdist),method=mclust),main=paste(mclust,mdist))
  }
}
par(mfrow=c(1,1))
mtest = function (row) {
  return(t.test(row[all1],row[e2a])$p.value)
}
mtest = function (row) {
  return(t.test(row[all1],row[e2a])$p.value)
}
lfc=function(row) {
  return(log2(median(2^row[all1])/median(2^row[e2a])))
}
idx=which(apply(exprdata,1,IQR)>0.9)
pvals=apply(exprdata[idx,],1,mtest)
head(pvals)
adj=p.adjust(pvals,method="BH")
lfc=apply(exprdata[idx,],1,lfc)
res.df=data.frame(pval=pvals,adjp=adj,lfc=lfc,abs=abs(lfc))
head(res.df)
dim(with(res.df,res.df[adjp<0.05& abs>1,]))
diff.genes=with(res.df,res.df[adjp<0.05& abs>1,])
top20=head(diff.genes[order(1/diff.genes$abs),],n=20)
top20
heatmap(as.matrix(exprdata[rownames(top20),]))
plot(pvals~lfc,pch="+")
hist(pvals,main="plot rawp")
write.table(top20,file="top20.tab",quote=F,sep="\t")
# PCA
opar=par(mfrow=c(3,3),omi=c(0.2,0.2,0.5,0.2),mai=c(0.1,0.1,0.1,0.1),las=1)
pca.res=prcomp(t(exprdata))
plotPCA= function (mtitle) {
  pca.vars=summary(pca.res)[6]$importance[2,]
  pca.xx=pca.res$x
  for (i in 1:3) {
    for (j in 1:3) {
      if (i == j) {
        plot(-1:1,-1:1,col="white",pch=4,xlab="",ylab="",yaxt="n",xaxt="n",main="")
        text(0,0,paste("PC",j, "\n",format(pca.vars[i],digits=3)))
      } else {
         plot(pca.xx[,i],pca.xx[,j],col=cols,pch=cols)
      }
   }
  }
  mtext(mtitle, 3,outer=TRUE)
}
plotPCA("all genes")
pca.res=prcomp(t(exprdata[rownames(res.df),]))
plotPCA("diff genes")
par(opar)
dev.off()