Download Simulated Example: Prop1=95, Power1=7

Survey
yes no Was this document useful for you?
   Thank you for your participation!

* Your assessment is very important for improving the workof artificial intelligence, which forms the content of this project

Document related concepts

Quantitative comparative linguistics wikipedia , lookup

Transcript
Simulated Example:
Prop1 = 95, Power1 = 7, Sizes 100, 200, 300
Tova Fuller, Steve Horvath, Bin Zhang
Correspondence to [email protected]
This is an R tutorial explaining a variant of the simulated example in Zhang and Horvath
(2005). To cite this tutorial, please use the following reference.
Zhang B, Horvath S (2005) A General Framework for Weighted Gene Co-Expression
Network Analysis. Statistical Applications in Genetics and Molecular Biology.
See also our webpage at
http://www.genetics.ucla.edu/labs/horvath/CoexpressionNetwork/
The original simulated network example may be found at the following URL:
http://www.genetics.ucla.edu/labs/horvath/CoexpressionNetwork/SimulatedNetworkHor
vath.doc
Further details regarding module construction can be found at the tutorial referenced
above. The purpose of this tutorial is to demonstrate the behavior of both unweighted
and weighted networks with power of signal increased relative to the power of noise. To
accomplish this, we set power1 = 7. In this example, the power of the noise was not
modified from its value of 5.
In this example, we find a higher power1 value yields less robust results than with
power1 = 5. We obtain less robust biological signal to the choice of AF parameter, and
our connectivity measures perform poorly for high values of beta. This finding is
consistent with previous results suggesting a decreased value of power1 leads to a more
robust signal.
if(exists("modulecreation") )rm(modulecreation);
modulecreation=function(size1,prop1=.95,power1=7) {
# Note abolve prop1=1
# noise "background"
seq1=seq(1,size1)/size1
vnoise=( .85+(.95-.85)*seq1)^power1
SIMILARITY1=outer(vnoise,vnoise,FUN="*")
# this specifies the red noise rectangle of highly connected noise genes
SIMILARITY1[c((prop1*size1):size1), c((prop1*size1):size1)] =.95
# this specificies the signal part of the similarity matrix
vsignal=(1-.3*seq1)^power1
# signal part
SIMILARITY1[c(1:(size1*prop1)) ,c(1:(size1*prop1)) ]=
outer(vsignal,vsignal,FUN="pmin")[c(1:(size1*prop1)) , c(1:(size1*prop1)) ]
diag(SIMILARITY1)=1
list(ModuleCOR= SIMILARITY1, GS= vsignal, k=apply(SIMILARITY1,2,sum),
CC=ClusterCoef.fun, lastColumn = SIMILARITY1[,size1])
}
library(class)
library(cluster)
library(sma) # this is needed for plot.mat below
1
library(scatterplot3d)
source("C:/Documents and Settings/SHorvath/My
Documents/ADAG/JunDong/YEAST/YEASTtutorialApril2005/NetworkFunctions.txt")
#Memory
# check the maximum memory that can be allocated
memory.size(TRUE)
# increase the available memory
memory.limit(size=3448)
set.seed(1)
# module 1
n1=100;
par(mfrow=c(3,2))
mod1= modulecreation(n1)
module1=mod1[[1]];GS1= mod1[[2]];k1= mod1[[3]];CC1=mod1[[4]]
seq1=mod1$lastColumn
# This allows you to look at the constructed module
par(mfrow=c(1,1),mar=c(0,0,0,0))
reverse1=rank(-c(1:n1))
image(1-t(module1[reverse1,]))
# This bar reports the Gene significance measure of the genes.
signifBar = cbind(1-GS1, 1-GS1,1-GS1,1-GS1)
par(mfrow=c(1,1), mar=c(0, 0, 0, 0))
heatmap(as.matrix(t(signifBar)),Rowv=NA,Colv=NA,scale="none", xlab="", ylab="",
margins = c(0,0))
# module 2
n2=200;mod1= modulecreation(n2)
module2=mod1[[1]];GS2= mod1[[2]];k2= mod1[[3]];CC2=mod1[[4]]
seq2=mod1$lastColumn
# module 3
n3=300;mod1= modulecreation(n3)
module3=mod1[[1]];GS3= mod1[[2]];k3= mod1[[3]];CC3=mod1[[4]]
seq3=mod1$lastColumn
# grey genes
n4=500
2
nTotal=n1+n2+n3+n4
seqTotal= seq(1:nTotal)/(nTotal+1)
Similarity1= matrix(0 , nrow=nTotal, ncol=nTotal)
index1=c(1:n1);index2=c((n1+1):(n1+n2)); index3=c((n1+n2+1):(n1+n2+n3))
index4=c((n1+n2+n3+1):(n1+n2+n3+n4))
Similarity1[index1,index1]=module1; Similarity1[index2,index2]=module2
Similarity1[index3,index3]=module3
# now we add noise to the correlation matrix to make it more realistic
# the noise is added in a way to ensure that the entries lie between 0 and 1.
Similarity1=Similarity1+(1-Similarity1)*matrix(
runif(nTotal*nTotal)^5,nrow=nTotal,ncol=nTotal)
# here we symmetrize the similarity matrix
Similarity1=( t(Similarity1)+ Similarity1)/2
diag(Similarity1)=1
# here we verify that the entries lie between 0 and 1
summary(c(as.dist(Similarity1 )))
# true underlying module label
truemodule=rep(c("brown", "blue","turquoise","grey"),c(n1,n2,n3,n4))
# This is a color-coded picture of the similarity matrix
reverse1=rank(-c(1:nTotal))
par(mfrow=c(1,1),mar=c(0,0,0,0))
image(1-t(Similarity1[reverse1,]))
# The following shows the simulated module membership.
barplot(rep(1,nTotal),col=as.character(truemodule),space=0,border=
as.character(truemodule),offset=0)
# SCALE FREE TOPOLOGY CHECK FOR SOFT AND HARD Thresholding
# no.breaks1=specifies the number of points in a scale free plot.
# Unfortunately R^2 depends on this to some extent --> opportunity for research
no.breaks1=9
powers1=seq(1,9,by=1)
TableSoft=data.frame(matrix(NA,nrow=length(powers1),ncol=4))
names(TableSoft)=c("powers", "ScaleFreeRsquared", "Slope", "TruncatedRsquared")
par(mfrow=c(3,3), mar= c(2, 2, 2, 2) )
for (i in c(1:length(powers1)) ){
TableSoft[i,1]=powers1[i]
ADJsoft=abs(Similarity1)^powers1[i];diag(ADJsoft)=0;ksoft=apply(ADJsoft,2,sum)
3
TableSoft[i, 2:4]=ScaleFreePlot1(ksoft, truncated=T,AF1=
paste("beta=",as.character(powers1[i])), no.breaks=no.breaks1)
}
thresholds1= seq(from=.1,to=.9,by=0.1)
TableHard=data.frame(matrix(NA,nrow=length(thresholds1),ncol=4))
names(TableHard)=c("thresholds", "ScaleFreeRsquared", "Slope",
"TruncatedRsquared")
par(mfrow=c(3,3), mar= c(2, 2, 2, 2) )
for (i in c(1:length(thresholds1)) ){
TableHard[i,1]=thresholds1[i]
ADJhard=abs(Similarity1)>thresholds1[i];diag(ADJhard)=0;khard=apply(ADJhard,2,s
um)
TableHard[i, 2:4]=ScaleFreePlot1(khard, truncated=T,AF1=
paste("tau=",as.character(thresholds1[i])) ,no.breaks=no.breaks1)
}
4
par(mfrow=c(1,1),mar= c(5, 4, 4, 2) +0.1)
plot(1:length(powers1),
-sign(TableHard[,3])*TableHard[,2], type="n",ylab="Scale Free Topology
R^2",xlab="Adjacency Parameter Number", ylim=range(min(c(sign(TableSoft[,3])*TableSoft[,1], sign(TableHard[,3])*TableHard[,1]),na.rm=T),1) )
text(1:length(thresholds1), -sign(TableHard[,3])*TableHard[,2], labels=
thresholds1, col="black")
points(1:length(powers1), -sign(TableSoft[,3])*TableSoft[,2], type="n")
text(1:length(powers1), -sign(TableSoft[,3])*TableSoft[,2], labels=
as.character(powers1), col="red")
abline(h=.8)
# Based on where the "kink" occurs, the scale free topology
# criterion would lead us to choose the following parameters
# for the soft and hard adjacency function
# R^2>0.8
beta1=3 # this is the the power adjacency function parameter in power(s,beta)
tau1=0.7 # this parameter is hard threshold parameter in the signum function.
# Creating Scale Free Topology Plots
par(mfrow=c(2,1))
ADJsoft=abs(Similarity1)^beta1;diag(ADJsoft)=0;ksoft=apply(ADJsoft,2,sum)
ScaleFreePlot1(ksoft, truncated=T,AF1= paste("beta=",as.character(beta1)),
,no.breaks=no.breaks1)
ADJhard=I(Similarity1>tau1)+0.0; diag(ADJhard)=0;khard=apply(ADJhard,2,sum)
ScaleFreePlot1(khard,truncated=T,AF1=paste("tau=",as.character(tau1))
,no.breaks=no.breaks1 )
5
# Now we define the gene significance variable for the brown module
# We permute the GS measures in the other modules to make the example realistic
GS=c(GS1,sample(GS2)/2,sample(GS3)/2, sample(GS1,n4,replace=T)/2 )
# The following code assumes that we can identify the true underlying brown module.
# In reality this is not the case, which is why we revisit this issue below.
datconnectivitiesSoft=data.frame(matrix(NA,nrow=sum(truemodule=="brown"),ncol=l
ength(powers1)))
names(datconnectivitiesSoft)=paste("kWithinPower",powers1,sep="")
for (i in c(1:length(powers1)) ) {
datconnectivitiesSoft[,i]=apply(abs(Similarity1[truemodule=="brown",
truemodule=="brown"])^powers1[i],1,sum)}
SpearmanCorrelationsSoft=signif(cor(GS[ truemodule=="brown"],
datconnectivitiesSoft, method="s",use="p"))
datconnectivitiesHard=data.frame(matrix(NA,nrow=sum(truemodule=="brown"),ncol=l
ength(thresholds1)))
names(datconnectivitiesHard)=paste("kWithinPower",thresholds1,sep="")
for (i in c(1:length(thresholds1)) ) {
datconnectivitiesHard[,i]=apply(abs(Similarity1[truemodule=="brown",
truemodule=="brown"])>thresholds1[i],1,sum)}
SpearmanCorrelationsHard=signif(cor(GS[ truemodule=="brown"],
datconnectivitiesHard, method="s",use="p"))
par(mfrow=c(2,2))
plot(powers1, SpearmanCorrelationsSoft,type="n", main="Soft")
text(powers1, SpearmanCorrelationsSoft,labels=powers1,col="red")
abline(v=beta1,col="red")
plot(thresholds1, SpearmanCorrelationsHard,type="n",
main="Hard" ,ylim= range( c(SpearmanCorrelationsSoft,
SpearmanCorrelationsHard),na.rm=T) )
text(thresholds1, SpearmanCorrelationsHard,labels=thresholds1,col="black")
abline(v=tau1,col="red")
6
plot(-sign(TableHard[,3])*TableHard[,2],
SpearmanCorrelationsHard,type="n",ylim=range(c(SpearmanCorrelationsHard,
SpearmanCorrelationsSoft),na.rm=T),
xlim=range(c(-sign(TableHard[,3])*TableHard[,2], sign(TableSoft[,3])*TableSoft[,2]),na.rm=T))
text(-sign(TableHard[,3])*TableHard[,2] ,
SpearmanCorrelationsHard,label=as.character(thresholds1))
points(-sign(TableSoft[,3])*TableSoft[,2]
, SpearmanCorrelationsSoft, pch=as.character(powers1),col="red")
7
## TOM PLOT
# The following code computes the topological overlap matrix based on
# adjacency matrix.
dissGTOM1soft=TOMdist1(ADJsoft)
collect_garbage()
dissGTOM1hard=TOMdist1(ADJhard)
collect_garbage()
# resulting clustering tree will be used to define gene modules.
hierGTOM1soft <- hclust(as.dist(dissGTOM1soft),method="average");
hierGTOM1hard <- hclust(as.dist(dissGTOM1hard),method="average");
par(mfrow=c(1,2))
plot(hierGTOM1soft,labels=F,sub="",xlab="")
plot(hierGTOM1hard,labels=F,sub="",xlab="")
# here we chose the height cut-off such that many "true" brown module genes are
# included in the designated brown module.
h1soft= .95
h1hard= .8175
colorh1soft=as.character(modulecolor2(hierGTOM1soft,h1= h1soft, minsize1=20))
#colorh1soft=ifelse(colorh1soft=="grey","brown",colorh1soft)
table(colorh1soft)
colorh1hard=as.character(modulecolor2(hierGTOM1hard,h1= h1hard,minsize1=20))
#colorh1hard=ifelse(colorh1hard=="grey","brown",colorh1hard)
table(colorh1hard)
par(mfrow=c(3,2))
plot(hierGTOM1soft,labels=F,sub="",xlab="")
plot(hierGTOM1hard,labels=F,sub="",xlab="")
hclustplot1(hierGTOM1soft,truemodule,title1="True module colors")
hclustplot1(hierGTOM1hard,truemodule,title1="True module colors")
hclustplot1(hierGTOM1soft, colorh1soft,title1="Identified module colors")
hclustplot1(hierGTOM1hard, colorh1hard,title1="Identified module colors")
table(colorh1soft,truemodule)
table(colorh1hard,truemodule)
> table(colorh1soft,truemodule)
truemodule
colorh1soft blue brown grey turquoise
blue
172
0
9
0
brown
0
69
2
0
grey
28
30 450
4
turquoise
0
1
39
296
8
> table(colorh1hard,truemodule)
truemodule
colorh1hard blue brown grey turquoise
blue
62
0
4
0
brown
0
25
0
0
grey
138
75 495
205
turquoise
0
0
1
95
Soft thresholding does a better job at cluster retrieval.
Note the appearance of the true module color plots – this demonstrates the effect of
raising power of signal relative to the power of noise.
9
par(mfrow=c(1,1))
# if you have patience and enough stack memory, try to run this code
#TOMplot1(dissGTOM1soft , hierGTOM1soft, colorh1soft)
#TOMplot1(dissGTOM1hard , hierGTOM1hard, colorh1hard)
# Instead, I will restrict the TOM plot to the most connected genes.
restrict1= ksoft>32.733
table(restrict1)
>table(restrict1)
restrict1
FALSE TRUE
500
600
dissGTOM1softrest=TOMdist1(ADJsoft[restrict1,restrict1])
collect_garbage()
dissGTOM1hardrest=TOMdist1(ADJhard[restrict1,restrict1])
hierGTOM1softrest <- hclust(as.dist(dissGTOM1softrest),method="average");
hierGTOM1hardrest <hclust(as.dist(dissGTOM1hardrest),method="average");
10
# SOFT TOM PLOT
par(mfrow=c(1,1))
TOMplot1(dissGTOM1softrest , hierGTOM1softrest, truemodule[restrict1])
11
# HARD TOM PLOT
TOMplot1(dissGTOM1hardrest , hierGTOM1hardrest, colorh1hard[restrict1])
12
# We also propose to use classical multi-dimensional scaling plots
# for visualizing the network. Here we chose 2 scaling dimensions
cmd1=cmdscale(as.dist(dissGTOM1soft),2)
cmd2=cmdscale(as.dist(dissGTOM1hard),2)
par(mfrow=c(1,2))
plot(cmd1, col=as.character(colorh1soft),
Axis 1", ylab="Scaling Axis 2")
plot(cmd2, col=as.character(colorh1hard),
Axis 1", ylab="Scaling Axis 2")
main="Soft MDS plot",xlab="Scaling
main="Hard MDS plot",xlab="Scaling
13
# Just to be sure let’s re-define the adjacency matrix and connectivities used.
ADJhard=I(Similarity1>tau1)+0.0; diag(ADJhard)=0;khard=apply(ADJhard,2,sum)
ADJsoft=abs(Similarity1)^beta1;diag(ADJsoft)=0;ksoft=apply(ADJsoft,2,sum)
ADJhardbrown=I(abs(Similarity1[colorh1hard=="brown", colorh1hard=="brown"])
>tau1)+0.0; diag(ADJhardbrown)=0;khardbrown=apply(ADJhardbrown,2,sum)
ADJsoftbrown=abs(Similarity1[colorh1soft=="brown",
colorh1soft=="brown"])^beta1;diag(ADJsoftbrown)=0;ksoftbrown=apply(ADJsoftbrown
,2,sum)
# This computes the intramodular connectivities
AllDegreesSoft=DegreeInOut(ADJsoft,colorh1soft)
AllDegreesHard=DegreeInOut(ADJhard,colorh1hard)
# Here we compute the soft and the hard cluster coefficients
cluster.coefSoft= ClusterCoef.fun(ADJsoft)
cluster.coefHard= ClusterCoef.fun(ADJhard)
# Now we plot cluster coefficient versus connectivity
# for all genes. We use the true module coloring
par(mfrow=c(2,1))
plot(ksoft, cluster.coefSoft,col=truemodule,ylab="Cluster
Coefficient",xlab="Connectivity",main="Soft")
#whichmodule="turquoise"
#plot(AllDegreesSoftkWithin[truemodule==whichmodule],
#cluster.coefSoft[truemodule==whichmodule],col=whichmodule,ylab="Cluster
#Coefficient",xlab="Connectivity",main=whichmodule)
plot(khard, cluster.coefHard,col=truemodule,ylab="Cluster
Coefficient",xlab="Connectivity",main="Hard")
14
Comment: for the high connectivity genes in each module CC is inversely related to K in
unweighted networks (hard thresholding). In contrast, there is a slightly positive (or
constant) relationship among hub genes in weighted networks.
#Gene significance analysis
# Now we study how the gene significance is related to connectivity in the brown module
datconnectivitiesSoft=data.frame(matrix(666,nrow=sum(colorh1soft=="brown"),ncol
=length(powers1)))
names(datconnectivitiesSoft)=paste("kWithinPower",powers1,sep="")
for (i in c(1:length(powers1)) ) {
datconnectivitiesSoft[,i]=apply(abs(Similarity1[colorh1soft=="brown",
colorh1soft=="brown"])^powers1[i],1,sum)}
SpearmanCorrelationsSoft=signif(cor(GS[ colorh1soft=="brown"],
datconnectivitiesSoft, method="s",use="p"))
datconnectivitiesHard=data.frame(matrix(666,nrow=sum(colorh1hard=="brown"),ncol
=length(thresholds1)))
names(datconnectivitiesHard)=paste("kWithinPower",thresholds1,sep="")
for (i in c(1:length(thresholds1)) ) {
datconnectivitiesHard[,i]=apply(abs(Similarity1[colorh1hard=="brown",
colorh1hard=="brown"])>thresholds1[i],1,sum)}
SpearmanCorrelationsHard=signif(cor(GS[ colorh1hard=="brown"],
datconnectivitiesHard, method="s",use="p"))
par(mfrow=c(2,2))
plot(powers1, SpearmanCorrelationsSoft,type="n" ,xlab="Power (beta)" ,
main="Soft")
text(powers1, SpearmanCorrelationsSoft,labels=powers1,col="red")
abline(v=beta1,col="red")
plot(thresholds1, SpearmanCorrelationsHard,type="n" ,ylim= range(
c(SpearmanCorrelationsHard),na.rm=T) ,xlab="Thresholds (tau)" ,main="Hard")
text(thresholds1, SpearmanCorrelationsHard,labels=thresholds1,col="black")
abline(v=tau1,col="red")
plot(-sign(TableHard[,3])*TableHard[,2], SpearmanCorrelationsHard,type="n" ,
ylim=range(c(SpearmanCorrelationsHard, SpearmanCorrelationsSoft),na.rm=T),
xlim=range(c(-sign(TableHard[,3])*TableHard[,2], sign(TableSoft[,3])*TableSoft[,2]),na.rm=T),xlab="Signed Scale Free
R^2",ylab="Correlation(k, Gene Significance)" )
text(-sign(TableHard[,3])*TableHard[,2],
SpearmanCorrelationsHard,label=as.character(thresholds1))
points(-sign(TableSoft[,3])*TableSoft[,2], SpearmanCorrelationsSoft, type="n")
text(-sign(TableSoft[,3])*TableSoft[,2],
SpearmanCorrelationsSoft,labels=as.character(powers1),col="red")
15
Our results here demonstrate that increasing power1 to 7 leads to less robust results. In
addition, note that spearman correlation is highest for beta = 1, and is a monotonically
decreasing function. By contrast, with power1 = 3, spearman correlation is highest for
beta = 4.
# Now we want to see how the correlation between kWithin and gene significance
# changes for different hard threshold values tau within the BROWN module.
whichmodule="brown"
# the following data frame contains the intramodular connectivities
# corresponding to different hard thresholds
datconnectivitiesHard=data.frame(matrix(666,nrow=sum(colorh1hard==which
module),ncol=length(thresholds1)))
names(datconnectivitiesHard)=paste("kWithinTau",thresholds1,sep="")
for (i in c(1:length(thresholds1)) ) {
datconnectivitiesHard[,i]=apply(abs(Similarity1[colorh1hard==whichmodul
e, colorh1hard==whichmodule])>=thresholds1[i],1,sum)}
SpearmanCorrelationsHard=signif(cor(GS[ colorh1hard==whichmodule],
datconnectivitiesHard, method="s",use="p"))
# Now we define the new connectivity measure omega based on the TOM
matrix
# It simply considers TOM as adjacency matrix...
datOmegaINHard=data.frame(matrix(666,nrow=sum(colorh1hard==whichmodule)
,ncol=length(thresholds1)))
names(datOmegaINHard)=paste("omegaWithinHard",thresholds1,sep="")
for (i in c(1:length(thresholds1)) ) {
datconnectivitiesHard[,i]=apply(
16
1-TOMdist1(abs(Similarity1[colorh1hard==whichmodule,
colorh1hard==whichmodule])>thresholds1[i]),1,sum)}
SpearmanCorrelationsOmegaHard=as.vector(signif(cor(GS[
colorh1hard==whichmodule], datconnectivitiesHard, method="s",use="p")))
# Now we define the intramodular cluster coefficient
datCCinHard=data.frame(matrix(666,nrow=sum(colorh1hard==whichmodule),nc
ol=length(thresholds1)))
names(datCCinHard)=paste("CCinHard",thresholds1,sep="")
for (i in c(1:length(thresholds1)) ) {
datCCinHard[,i]=
ClusterCoef.fun(abs(Similarity1[colorh1hard==whichmodule,
colorh1hard==whichmodule])>thresholds1[i])}
SpearmanCorrelationsCCinHard=as.vector(signif(cor(GS[
colorh1hard==whichmodule], datCCinHard, method="s",use="p")))
# Now we compare the performance of the connectivity measures (k.in,
# omega.in, cluster coefficience) across different hard thresholds when it comes to
# predicting prognostic genes in the brown module
dathelpHard=data.frame(signedRsquared=-sign(TableHard[,3])*TableHard[,2],
corGSkINHard =as.vector(SpearmanCorrelationsHard), corGSwINHard=
as.vector(SpearmanCorrelationsOmegaHard),corGSCCHard=as.vector(SpearmanCorrelat
ionsCCinHard))
# Now we focus on soft thresholding
# the following data frame contains the intramodular connectivities
# corresponding to different soft powers
datconnectivitiesSoft=data.frame(matrix(666,nrow=sum(colorh1soft==which
module),ncol=length(powers1)))
names(datconnectivitiesSoft)=paste("kWithinTau",powers1,sep="")
for (i in c(1:length(powers1)) ) {
datconnectivitiesSoft[,i]=apply(abs(Similarity1[colorh1soft==whichmodul
e, colorh1soft==whichmodule])^powers1[i],1,sum)}
SpearmanCorrelationsSoft=signif(cor(GS[ colorh1soft==whichmodule],
datconnectivitiesSoft, method="s",use="p"))
# Now we define the new connectivity measure omega based on the TOM
#matrix
# It simply considers TOM as adjacency matrix...
datOmegaINSoft=data.frame(matrix(666,nrow=sum(colorh1soft==whichmodule)
,ncol=length(powers1)))
names(datOmegaINSoft)=paste("omegaWithinSoft",powers1,sep="")
for (i in c(1:length(powers1)) ) {
datconnectivitiesSoft[,i]=apply(
1-TOMdist1(abs(Similarity1[colorh1soft==whichmodule,
colorh1soft==whichmodule])^powers1[i]),1,sum)}
SpearmanCorrelationsOmegaSoft=as.vector(signif(cor(GS[
colorh1soft==whichmodule], datconnectivitiesSoft, method="s",use="p")))
# Now we define the intramodular cluster coefficient
datCCinSoft=data.frame(matrix(666,nrow=sum(colorh1soft==whichmodule),nc
ol=length(powers1)))
17
names(datCCinSoft)=paste("CCinSoft",powers1,sep="")
for (i in c(1:length(powers1)) ) {
datCCinSoft[,i]=
ClusterCoef.fun(abs(Similarity1[colorh1soft==whichmodule,
colorh1soft==whichmodule])^powers1[i])}
SpearmanCorrelationsCCinSoft=as.vector(signif(cor(GS[
colorh1soft==whichmodule], datCCinSoft, method="s",use="p")))
# Now we compare the performance of the connectivity measures (k.in,
# omega.in, cluster coefficience) across different soft powers when it comes to predicting
# prognostic genes in the brown module
dathelpSoft=data.frame(signedRsquared=-sign(TableSoft[,3])*TableSoft[,2],
corGSkINSoft =as.vector(SpearmanCorrelationsSoft), corGSwINSoft=
as.vector(SpearmanCorrelationsOmegaSoft),corGSCCSoft=as.vector(SpearmanCorrelat
ionsCCinSoft))
par(mfrow=c(1,1))
matplot(thresholds1,dathelpHard,type="l",lty=1,lwd=3,col=c("black","red","blue"
,"green"),ylab="",xlab="tau",main="Hard")
legend(0.65,0.2, c("signed R^2","r(GS,k.in)","r(GS,omega.in)","r(GS,cc.in)"),
col=c("black","red","blue","green"), lty=1,lwd=3,ncol = 1, cex=1)
abline(v=tau1,col="red")
matplot(powers1,dathelpSoft,type="l",lty=1,lwd=3,col=c("black","red","blue","gr
een"),ylab="",xlab="beta",main="Soft")
legend(6.8,0.65, c("signed R^2","r(GS,k.in)","r(GS,omega.in)","r(GS,cc.in)"),
col=c("black","red","blue","green"), lty=1,lwd=3,ncol = 1, cex=1)
abline(v=beta1,col="red")
18
Yet again, we find that increasing power1 to 7 leads to less robust results. This finding is
consistent with the fact that decreasing power1 to 3 leads to more robust results.
Specifically, k.in, omega.in, and cc.in only perform well for low values of beta, and
perform worse at high values of beta than previously seen for power1 = 5.
THE END
19