## ----message = FALSE,warning=FALSE,eval = FALSE------------------------------- # ### Prepare a TCGA gene expression dataset for analysis. # library(CancerSubtypes) # #library("RTCGA.mRNA") # rm(list = ls()) # data(BRCA.mRNA) # mRNA=t(as.matrix(BRCA.mRNA[,-1])) # colnames(mRNA)=BRCA.mRNA[,1] # # ###To observe the mean, variance and Median Absolute Deviation distribution of the dataset, it helps users to get the distribution characteristics of the data, e.g. To evaluate whether the dataset fits a normal distribution or not. # data.checkDistribution(mRNA) ## ----eval = FALSE------------------------------------------------------------- # index=which(is.na(mRNA)) # res1=data.imputation(mRNA,fun="median") # res2=data.imputation(mRNA,fun="mean") # res3=data.imputation(mRNA,fun="microarray") ## ----eval = FALSE------------------------------------------------------------- # result1=data.normalization(mRNA,type="feature_Median",log2=FALSE) # result2=data.normalization(mRNA,type="feature_zscore",log2=FALSE) ## ----eval = FALSE------------------------------------------------------------- # ###The top 1000 most variance features will be selected. # data1=FSbyVar(mRNA, cut.type="topk",value=1000) # ###The features with (variance>0.5) are selected. # data2=FSbyVar(mRNA, cut.type="cutoff",value=0.5) ## ----eval = FALSE------------------------------------------------------------- # data1=FSbyMAD(mRNA, cut.type="topk",value=1000) # data2=FSbyMAD(mRNA, cut.type="cutoff",value=0.5) ## ----eval = FALSE------------------------------------------------------------- # mRNA1=data.imputation(mRNA,fun="microarray") # data1=FSbyPCA(mRNA1, PC_percent=0.9,scale = TRUE) ## ----eval = FALSE------------------------------------------------------------- # data(GeneExp) # data(time) # data(status) # data1=FSbyCox(GeneExp,time,status,cutoff=0.05) ## ----eval = FALSE------------------------------------------------------------- # ### The input dataset is single gene expression matrix. # data(GeneExp) # result=ExecuteCC(clusterNum=3,d=GeneExp,maxK=10,clusterAlg="hc",distance="pearson",title="GBM") # # ### The input dataset is multi-genomics data as a list # data(GeneExp) # data(miRNAExp) # GBM=list(GeneExp=GeneExp,miRNAExp=miRNAExp) # result=ExecuteCC(clusterNum=3,d=GBM,maxK=10,clusterAlg="hc",distance="pearson",title="GBM") ## ----eval = FALSE------------------------------------------------------------- # ### The input dataset is single gene expression matrix. # data(GeneExp) # result=ExecuteCNMF(GeneExp,clusterNum=3,nrun=30) # # ### The input dataset is multi-genomics data as a list # data(GeneExp) # data(miRNAExp) # GBM=list(GeneExp=GeneExp,miRNAExp=miRNAExp) # result=ExecuteCNMF(GBM,clusterNum=3,nrun=30) ## ----eval = FALSE------------------------------------------------------------- # data(GeneExp) # data(miRNAExp) # data1=FSbyVar(GeneExp, cut.type="topk",value=1000) # data2=FSbyVar(miRNAExp, cut.type="topk",value=300) # GBM=list(GeneExp=data1,miRNAExp=data2) # result=ExecuteiCluster(datasets=GBM, k=3, lambda=list(0.44,0.33,0.28)) ## ----eval = FALSE------------------------------------------------------------- # data(GeneExp) # data(miRNAExp) # GBM=list(GeneExp=GeneExp,miRNAExp=miRNAExp) # result=ExecuteSNF(GBM, clusterNum=3, K=20, alpha=0.5, t=20) ## ----eval = FALSE------------------------------------------------------------- # data(GeneExp) # data(miRNAExp) # data(time) # data(status) # data1=FSbyCox(GeneExp,time,status,cutoff=0.05) # data2=FSbyCox(miRNAExp,time,status,cutoff=0.05) # GBM=list(GeneExp=data1,miRNAExp=data2) # result=ExecuteSNF.CC(GBM, clusterNum=3, K=20, alpha=0.5, t=20,maxK = 10, pItem = 0.8,reps=500, # title = "GBM", plot = "png", finalLinkage ="average") ## ----eval = FALSE------------------------------------------------------------- # data(GeneExp) # data(miRNAExp) # data(Ranking) # ####Retrieve there feature ranking for genes # gene_Name=rownames(GeneExp) # index1=match(gene_Name,Ranking$mRNA_TF_miRNA.v21_SYMBOL) # gene_ranking=data.frame(gene_Name,Ranking[index1,],stringsAsFactors=FALSE) # index2=which(is.na(gene_ranking$ranking_default)) # gene_ranking$ranking_default[index2]=min(gene_ranking$ranking_default,na.rm =TRUE) # # ####Retrieve there feature ranking for genes # miRNA_ID=rownames(miRNAExp) # index3=match(miRNA_ID,Ranking$mRNA_TF_miRNA_ID) # miRNA_ranking=data.frame(miRNA_ID,Ranking[index3,],stringsAsFactors=FALSE) # index4=which(is.na(miRNA_ranking$ranking_default)) # miRNA_ranking$ranking_default[index4]=min(miRNA_ranking$ranking_default,na.rm =TRUE) # ###Clustering # ranking1=list(gene_ranking$ranking_default ,miRNA_ranking$ranking_default) # GBM=list(GeneExp,miRNAExp) # result=ExecuteWSNF(datasets=GBM, feature_ranking=ranking1, beta = 0.8, clusterNum=3, # K = 20,alpha = 0.5, t = 20, plot = TRUE) ## ----eval = FALSE------------------------------------------------------------- # data(GeneExp) # data(miRNAExp) # GBM=list(GeneExp=GeneExp,miRNAExp=miRNAExp) # result=ExecuteSNF(GBM, clusterNum=3, K=20, alpha=0.5, t=20,plot = FALSE) # # ###Similarity smaple matrix # sil=silhouette_SimilarityMatrix(result$group, result$distanceMatrix) # plot(sil) ## ----eval = FALSE------------------------------------------------------------- # sil1=silhouette(result$group, result$distanceMatrix) # plot(sil1) ##wrong result ## ----eval = FALSE------------------------------------------------------------- # data(GeneExp) # data(miRNAExp) # data(time) # data(status) # data1=FSbyCox(GeneExp,time,status,cutoff=0.05) # data2=FSbyCox(miRNAExp,time,status,cutoff=0.05) # GBM=list(GeneExp=data1,miRNAExp=data2) # # #### 1.ExecuteSNF # result1=ExecuteSNF(GBM, clusterNum=3, K=20, alpha=0.5, t=20,plot = FALSE) # group1=result1$group # distanceMatrix1=result1$distanceMatrix # p_value=survAnalysis(mainTitle="GBM1",time,status,group1, # distanceMatrix1,similarity=TRUE) ## ----eval = FALSE,results="hide",message=FALSE-------------------------------- # #### 2.ExecuteSNF.CC # result2=ExecuteSNF.CC(GBM, clusterNum=3, K=20, alpha=0.5, t=20, # maxK = 5, pItem = 0.8,reps=500, # title = "GBM2", plot = "png", # finalLinkage ="average") ## ----eval = FALSE------------------------------------------------------------- # group2=result2$group # distanceMatrix2=result2$distanceMatrix # p_value=survAnalysis(mainTitle="GBM2",time,status,group2, # distanceMatrix2,similarity=TRUE) # ## ----eval = FALSE,warning=FALSE----------------------------------------------- # data(GeneExp) # data(miRNAExp) # data(time) # data(status) # GBM=list(GeneExp=GeneExp,miRNAExp=miRNAExp) # result=ExecuteSNF(GBM, clusterNum=3, K=20, alpha=0.5, t=20,plot = FALSE) # group=result$group # sigclust=sigclustTest(miRNAExp,group, nsim=1000, nrep=1, icovest=1) # sigclust ## ----results="hide",message=FALSE,fig.show="hide",warning=FALSE,eval = FALSE---- # # library("RTCGA.mRNA") # #require(TCGAbiolinks) # rm(list = ls()) # data(BRCA.mRNA) # mRNA=t(as.matrix(BRCA.mRNA[,-1])) # colnames(mRNA)=BRCA.mRNA[,1] # mRNA1=data.imputation(mRNA,fun="microarray") # mRNA1=FSbyMAD(mRNA1, cut.type="topk",value=5000) # # ###Split the normal and tumor samples # index=which(as.numeric(substr(colnames(mRNA1),14,15))>9) # mRNA_normal=mRNA1[,index] # mRNA_tumor=mRNA1[,-index] # # ### Remove the duplicate samples # index1=which(as.numeric(substr(colnames(mRNA_tumor),14,15))>1) # mRNA_tumor=mRNA_tumor[,-index1] # # ##### Identify cancer subtypes # result=ExecuteCC(clusterNum=5,d=mRNA_tumor,maxK=5,clusterAlg="hc",distance="pearson",title="BRCA") # group=result$group # res=DiffExp.limma(Tumor_Data=mRNA_tumor,Normal_Data=mRNA_normal,group=group,topk=NULL,RNAseq=FALSE) ## ----eval = FALSE------------------------------------------------------------- # ## Differently expression genes in subtype 1 # head(res[[1]]) # # ## Differently expression genes in subtype 2 # head(res[[2]])