Log in Page Discussion History Go to the site toolbox

540Project:KimrcodeLeon

From BluWiki

#Determine which genes play a key role in each phase (i.e. which genes we expect to be clustered together, since their expression levels will be similar at the same time, i.e. they will have similar peaks)
rm(list=ls(all=TRUE))
source("readarff.r") 
source("writearff.R") 
cho.phase<-as.matrix(read.table("http://faculty.washington.edu/kayee/cluster/log_cellcycle_384_17.txt",skip=1)[,1:2])
earlyG1<-rownames(cho.phase[(cho.phase[,2]=="1"),]) #1-67
lateG1<-rownames(cho.phase[(cho.phase[,2]=="2"),]) #68-202
S<-rownames(cho.phase[(cho.phase[,2]=="3"),]) #203-277
G2<-rownames(cho.phase[(cho.phase[,2]=="4"),]) #278-329
M<-rownames(cho.phase[(cho.phase[,2]=="5"),]) #330-384

library(rama)
numGenes<-384
numTimePoints <-17
numClust <-5

#Set up our matrix U for the adjusted random index measure
U<-list()
U[[1]]<-c(1:67)
U[[2]]<-c(68:202)
U[[3]]<-c(203:277)
U[[4]]<-c(278:329)
U[[5]]<-c(330:384)

cho.data<-as.matrix(read.table("http://faculty.washington.edu/kayee/cluster/log_cellcycle_384_17.txt",skip=1)[,3:19])

library(mva)
cho.data.std<-(cho.data-mat.mean(cho.data)[,1])/mat.mean(cho.data)[,2]


#Get PMSE, Perform k-means clustering on 2-17 PCS, and get ARI
km.cho<-list()
ari<-rep(0,numTimePoints-2)

for(k in 1:15) { 
  filename <- paste("normalizedChoMKLk",k,".arff",sep="") 
  print(filename) 
  x <- read.arff(filename) #remove first column 
  x <- x[,2:(k+3)] #sort em 
  x <- x[order(as.numeric(x[,1])),] #cut out the label now that its sorted 
  x <- x[,2:(k+2)] #kim changed this line
  rownames(x) <- c(1:dim(x)[1]) 
 a<-as.matrix(x[,2:(k+1)])
  D.cho<-dist(a, method = "euclidean")
 hc.average<-hclust(D.cho, method = "average", members=NULL)
 class.average<-cutree(hc.average, k = numClust)
 
 if (k==1){
   initial <-  tapply(a, list(cutree(hc.average, numClust),rep(1,nrow(a))),mean)
 }else{
    initial <- tapply(a, list(rep(cutree(hc.average, numClust), ncol(a)),col(a)),mean)
 }
 km.cho[[k]]<-kmeans(a, initial) 

  #Get ARI
  V<-list()
  
  for (m in 1:numClust)
  {  V[[m]]<-as.numeric(rownames(cho.data.std[km.cho[[k]]$cluster==m,]))
  }

  nij<-matrix(0,length(U),length(V))
  for (i in 1:numClust){
    for (j in 1:numClust)
    {  nij[i,j]<-length(intersect(U[[i]],V[[j]]))
    }
  }
  nidot<-apply(nij,1,sum)
  ndotj<-apply(nij,2,sum)
  n=sum(nij)
  ari[k]<- (sum(choose(nij,2)) - (sum(choose(nidot,2))*sum(choose(ndotj,2)))/(choose(n,2)))/(0.5*(sum(choose(nidot,2)) + sum(choose(ndotj,2))) - (sum(choose(nidot,2))*sum(choose(ndotj,2)))/((choose(n,2))))
} 

plot(1:15,ari,ylab="ARI  measure - how good clustering is",xlab="Number of PCs")
lines(1:15,ari)

Site Toolbox:

Personal tools
GNU Free Documentation License 1.2
This page was last modified on 2 April 2006, at 16:47.
Disclaimers - About BluWiki