update

jyyulab · Jul 30, 2019 · c393410 · c393410
1 parent 579ef16
commit c393410
Show file tree

Hide file tree

Showing 9 changed files with 49 additions and 30 deletions.
diff --git a/NetBID2_0.1.2.pdf b/NetBID2_0.1.2.pdf
diff --git a/R/pipeline_functions.R b/R/pipeline_functions.R
@@ -2618,8 +2618,10 @@ generate.masterTable <- function(use_comp=NULL,DE=NULL,DA=NULL,
     gene_label <-base::paste(tmp1[rn,'external_gene_name'],funcType,sep = '_')
   }
   #
-  label_info <- data.frame('gene_label'=gene_label,'geneSymbol'=geneSymbol,
-                           'originalID'=rn,'originalID_label'=rn_label,'funcType'=funcType,'Size'=use_size,stringsAsFactors=FALSE)
+  #label_info <- data.frame('gene_label'=gene_label,'geneSymbol'=geneSymbol,
+  #                         'originalID'=rn,'originalID_label'=rn_label,'funcType'=funcType,'Size'=use_size,stringsAsFactors=FALSE)
+  label_info <- data.frame('originalID_label'=rn_label,'originalID'=rn,'gene_label'=gene_label,'geneSymbol'=geneSymbol,
+                           'funcType'=funcType,'Size'=use_size,stringsAsFactors=FALSE)
   add_info <- tmp1[rn,]
   #
   combine_info <- lapply(use_comp,function(x){
@@ -5692,7 +5694,7 @@ draw.funcEnrich.cluster <- function(funcEnrich_res=NULL,top_number=30,Pv_col='Or
   funcEnrich_res <- funcEnrich_res[which(funcEnrich_res[,Pv_col]<=Pv_thre),]
   if(nrow(funcEnrich_res)>top_number) funcEnrich_res <- funcEnrich_res[1:top_number,]
   pv_val <- funcEnrich_res[,Pv_col]; names(pv_val) <- rownames(funcEnrich_res)
-  all_g2s <- sapply(funcEnrich_res[,item_col],function(x1)unlist(strsplit(x1,';')))
+  all_g2s <- lapply(funcEnrich_res[,item_col],function(x1)unlist(strsplit(x1,';')))
   names(all_g2s) <- funcEnrich_res[,name_col]
   mat1 <- t(list2mat(all_g2s))
   mat1 <- mat1[rev(funcEnrich_res[,name_col]),]
@@ -6355,10 +6357,11 @@ draw.GSEA <- function(rank_profile=NULL,use_genes=NULL,use_direction=NULL,main="
       }
       annotation <- sprintf("%s\nKS test p-value:%s",es,pv)
     }
+    pp <- par()$usr
     if(es_res$RES[base::which.max(abs(es_res$RES))]>0)
-      graphics::text(r_len,base::max(y2)-par.char2pos()[2],annotation,pos=2,cex=annotation_cex,xpd=TRUE)
+      graphics::text(r_len,pp[4]-2*par.char2pos()[2],annotation,pos=2,cex=annotation_cex,xpd=TRUE)
     else
-      graphics::text(0,base::min(y2)+par.char2pos()[2],annotation,pos=4,cex=annotation_cex,xpd=TRUE)
+      graphics::text(0,pp[3]+2*par.char2pos()[2],annotation,pos=4,cex=annotation_cex,xpd=TRUE)
   }
   if(is.null(pdf_file)==FALSE){plot_part(ori=TRUE);dev.off()} else {plot_part()}
   return(TRUE)
@@ -7346,7 +7349,7 @@ draw.targetNet <- function(source_label="",source_z=NULL,edge_score=NULL,
                            label_cex=0.7,source_cex=1,
                            pdf_file=NULL,arrow_direction='out',n_layer=1,alphabetical_order=FALSE){
   #
-  all_input_para <- c('source_label','source_z','edge_score','label_cex','source_cex',
+  all_input_para <- c('source_label','edge_score','label_cex','source_cex',
                       'arrow_direction','n_layer','alphabetical_order')
   check_res <- sapply(all_input_para,function(x)check_para(x,envir=environment()))
   if(min(check_res)==0){message('Please check and re-try!');return(FALSE)}

diff --git a/README.md b/README.md
@@ -61,7 +61,7 @@ devtools::install_deps(pkg = ".", dependencies = TRUE) ## Install package depend
 download the directory to your workspace and then run:
 
 ```R
-install.packages('NetBID2_0.1.2.tar.gz',repos=NULL,dependencies=TRUE) ## 
+devtools::install_local('NetBID2_0.1.2.tar.gz') ## 
 ```
 
 # Manual & Tutorial

diff --git a/demo_scripts/analysis_and_plot_demo1.R b/demo_scripts/analysis_and_plot_demo1.R
@@ -110,27 +110,31 @@ draw.heatmap(mat=exp_mat,use_genes=ms_tab[driver_list,'originalID'],use_gene_lab
              use_samples=colnames(exp_mat),use_sample_label=phe_info[colnames(exp_mat),'geo_accession'],
              phenotype_info=phe_info,use_phe=c('gender','pathology','subgroup','age'),main='Expression for Top drivers',scale='row',
              cluster_rows=TRUE,cluster_columns=TRUE,clustering_distance_rows='pearson',clustering_distance_columns='pearson',
-             row_names_gp = gpar(fontsize = 12),pdf_file=sprintf('%s/heatmap_demo1.pdf',analysis.par$out.dir.PLOT))
+             row_names_gp = gpar(fontsize = 12),pdf_file=sprintf('%s/heatmap_demo1.pdf',analysis.par$out.dir.PLOT),
+             pre_define=c('WNT'='blue','SHH'='red','G4'='green'))
 
 draw.heatmap(mat=ac_mat,use_genes=driver_list,use_gene_label=ms_tab[driver_list,'gene_label'],
              use_samples=colnames(exp_mat),use_sample_label=phe_info[colnames(exp_mat),'geo_accession'],
              phenotype_info=phe_info,use_phe=c('gender','pathology','subgroup','age'),main='Activity for Top drivers',scale='row',
              cluster_rows=TRUE,cluster_columns=TRUE,clustering_distance_rows='pearson',clustering_distance_columns='pearson',
-             row_names_gp = gpar(fontsize = 12),pdf_file=sprintf('%s/heatmap_demo2.pdf',analysis.par$out.dir.PLOT))
+             row_names_gp = gpar(fontsize = 12),pdf_file=sprintf('%s/heatmap_demo2.pdf',analysis.par$out.dir.PLOT),
+             pre_define=c('WNT'='blue','SHH'='red','G4'='green'))
 
 # Draw heatmaps using top DE genes
 gene_list <- rownames(sig_gene)
 draw.heatmap(mat=exp_mat,use_genes=ms_tab[gene_list,'originalID'],use_gene_label=ms_tab[gene_list,'geneSymbol'],
              use_samples=colnames(exp_mat),use_sample_label=phe_info[colnames(exp_mat),'geo_accession'],
              phenotype_info=phe_info,use_phe=c('gender','pathology','subgroup','age'),main='Expression for Top drivers',scale='row',
              cluster_rows=TRUE,cluster_columns=TRUE,clustering_distance_rows='pearson',clustering_distance_columns='pearson',
-             row_names_gp = gpar(fontsize = 12),pdf_file=sprintf('%s/heatmap_demo3.pdf',analysis.par$out.dir.PLOT))
+             row_names_gp = gpar(fontsize = 12),pdf_file=sprintf('%s/heatmap_demo3.pdf',analysis.par$out.dir.PLOT),
+             pre_define=c('WNT'='blue','SHH'='red','G4'='green'))
 
 draw.heatmap(mat=ac_mat,use_genes= gene_list,use_gene_label=ms_tab[gene_list,'gene_label'],
              use_samples=colnames(exp_mat),use_sample_label=phe_info[colnames(exp_mat),'geo_accession'],
              phenotype_info=phe_info,use_phe=c('gender','pathology','subgroup','age'),main='Activity for Top drivers',scale='row',
              cluster_rows=TRUE,cluster_columns=TRUE,clustering_distance_rows='pearson',clustering_distance_columns='pearson',
-             row_names_gp = gpar(fontsize = 12),pdf_file=sprintf('%s/heatmap_demo4.pdf',analysis.par$out.dir.PLOT))
+             row_names_gp = gpar(fontsize = 12),pdf_file=sprintf('%s/heatmap_demo4.pdf',analysis.par$out.dir.PLOT),
+             pre_define=c('WNT'='blue','SHH'='red','G4'='green'))
 
 ### QI.4: What are the biological functions of these top DA drivers ?
 
@@ -262,16 +266,19 @@ test.targetNet.overlap(source1_label=ms_tab[use_driver,'gene_label'],source2_lab
 use_obs_class <- get_obs_label(phe_info = phe_info,'subgroup')
 draw.categoryValue(ac_val=ac_mat[use_driver,],exp_val=exp_mat[ms_tab[use_driver,'originalID'],],use_obs_class=use_obs_class,
                    class_order=c('WNT','SHH','G4'),class_srt=30,main_ac = ms_tab[use_driver,'gene_label'],main_exp=ms_tab[use_driver,'geneSymbol'],
-                   pdf_file=sprintf('%s/categoryValue_demo1.pdf',analysis.par$out.dir.PLOT))
+                   pdf_file=sprintf('%s/categoryValue_demo1.pdf',analysis.par$out.dir.PLOT),
+                   pre_define=c('WNT'='blue','SHH'='red','G4'='green'))
 #
 draw.categoryValue(ac_val=ac_mat[use_driver,],exp_val=NULL,use_obs_class=use_obs_class,class_order=c('WNT','SHH','G4'),
-                   pdf_file=sprintf('%s/categoryValue_demo2.pdf',analysis.par$out.dir.PLOT))
+                   pdf_file=sprintf('%s/categoryValue_demo2.pdf',analysis.par$out.dir.PLOT),
+                   pre_define=c('WNT'='blue','SHH'='red','G4'='green'))
 
 #
 use_obs_class <- get_obs_label(phe_info = phe_info,c('subgroup','gender'))
 draw.categoryValue(ac_val=ac_mat[use_driver,],exp_val=exp_mat[ms_tab[use_driver,'originalID'],],use_obs_class=use_obs_class,
                    class_srt=30,main_ac = ms_tab[use_driver,'gene_label'],main_exp=ms_tab[use_driver,'geneSymbol'],
-                   pdf_file=sprintf('%s/categoryValue_demo3.pdf',analysis.par$out.dir.PLOT))
+                   pdf_file=sprintf('%s/categoryValue_demo3.pdf',analysis.par$out.dir.PLOT),
+                   pre_define=c('WNT'='blue','SHH'='red','G4'='green'))
 
 ### QII.4: What are the functions of the target genes of this selected driver ?
 
@@ -317,7 +324,8 @@ sig_gs <- draw.volcanoPlot(dat= DA_gs_bid,label_col='ID',logFC_col='logFC',
 
 # Draw heatmap for top significant gene sets
 draw.heatmap(mat=ac_gs[sig_gs$ID,],pdf_file=sprintf('%s/heatmap_GS.pdf',analysis.par$out.dir.PLOT),scale='row',
-             phenotype_info=phe_info,use_phe=c('gender','subgroup'))
+             phenotype_info=phe_info,use_phe=c('gender','subgroup'),
+             pre_define=c('WNT'='blue','SHH'='red','G4'='green'))
 
 # Draw GSEA plot for top significant gene sets
 DE <- analysis.par$DE[[comp_name]]
@@ -354,7 +362,8 @@ draw.GSEA(rank_profile=DE_profile,use_genes=use_target_genes,
 use_obs_class <- get_obs_label(phe_info = phe_info,'subgroup')
 draw.categoryValue(ac_val=ac_gs[use_gs,],use_obs_class=use_obs_class,
                    class_order=c('WNT','SHH','G4'),class_srt=30,pdf_file=sprintf('%s/categoryValue_GS_demo1.pdf',analysis.par$out.dir.PLOT),
-                   main_ac= use_gs,main_cex=0.8)
+                   main_ac= use_gs,main_cex=0.8,
+                   pre_define=c('WNT'='blue','SHH'='red','G4'='green'))
 
 ### QIII.2: How to find drivers share significantly overlapped target genes ?
 

diff --git a/demo_scripts/pipeline_analysis_demo1.R b/demo_scripts/pipeline_analysis_demo1.R
@@ -54,7 +54,9 @@ analysis.par$merge.ac.eset <- generate.eset(exp_mat=ac_mat,phenotype_info=pData(
                                             feature_info=NULL,annotation_info='activity in net-dataset')
 
 # QC plot for activity eset
-draw.eset.QC(analysis.par$merge.ac.eset,outdir=analysis.par$out.dir.QC,intgroup=NULL,do.logtransform=FALSE,prefix='AC_')
+draw.eset.QC(analysis.par$merge.ac.eset,outdir=analysis.par$out.dir.QC,intgroup=NULL,do.logtransform=FALSE,prefix='AC_',
+             pre_define=c('WNT'='blue','SHH'='red','G4'='green'),pca_plot_type='2D.interactive')
+
 
 # Save Step 2 analysis.par as RData
 NetBID.saveRData(analysis.par=analysis.par,step='act-get')

diff --git a/demo_scripts/pipeline_network_demo1.R b/demo_scripts/pipeline_network_demo1.R
@@ -29,7 +29,8 @@ net_eset <- update_eset.phenotype(use_eset=net_eset,use_phenotype_info=pData(net
 network.par$net.eset <- net_eset
 
 # QC for the raw eset
-draw.eset.QC(network.par$net.eset,outdir=network.par$out.dir.QC,intgroup=NULL,do.logtransform=FALSE,prefix='beforeQC_')
+draw.eset.QC(network.par$net.eset,outdir=network.par$out.dir.QC,intgroup=NULL,do.logtransform=FALSE,prefix='beforeQC_',
+             pre_define=c('WNT'='blue','SHH'='red','G4'='green'),pca_plot_type='2D.interactive')
 
 # Save Step 1 network.par as RData
 NetBID.saveRData(network.par = network.par,step='exp-load')
@@ -73,7 +74,8 @@ net_eset <- generate.eset(exp_mat=mat, phenotype_info=pData(network.par$net.eset
 network.par$net.eset <- net_eset
 
 # QC for the normalized eset
-draw.eset.QC(network.par$net.eset,outdir=network.par$out.dir.QC,intgroup=NULL,do.logtransform=FALSE,prefix='afterQC_')
+draw.eset.QC(network.par$net.eset,outdir=network.par$out.dir.QC,intgroup=NULL,do.logtransform=FALSE,prefix='afterQC_',
+             pre_define=c('WNT'='blue','SHH'='red','G4'='green'),pca_plot_type='2D.interactive')
 
 # Save Step 2 network.par as RData
 NetBID.saveRData(network.par = network.par,step='exp-QC')
@@ -94,7 +96,8 @@ mat <- mat[choose1,]
 tmp_net_eset <- generate.eset(exp_mat=mat, phenotype_info=pData(network.par$net.eset)[colnames(mat),],
                               feature_info=fData(network.par$net.eset)[rownames(mat),], annotation_info=annotation(network.par$net.eset))
 # QC plot for IQR filtered eset
-draw.eset.QC(tmp_net_eset,outdir=network.par$out.dir.QC,intgroup=NULL,do.logtransform=FALSE,prefix='Cluster_')
+draw.eset.QC(tmp_net_eset,outdir=network.par$out.dir.QC,intgroup=NULL,do.logtransform=FALSE,prefix='Cluster_',
+             pre_define=c('WNT'='blue','SHH'='red','G4'='green'),pca_plot_type='2D.interactive')
 
 # The following scripts provide various ways to visualize and check if the IQR filter selected genes
 # can be used to perform good sample cluster analysis (predicted labels vs. real labels). Figures will be displayed instead of saving as files.
@@ -106,7 +109,8 @@ intgroup <- get_int_group(network.par$net.eset)
 # Cluster analysis using Kmean and plot result using PCA biplot (pca+kmeans in 2D)
 for(i in 1:length(intgroup)){
   print(intgroup[i])
-  pred_label <- draw.pca.kmeans(mat=mat,all_k = NULL,obs_label=get_obs_label(phe,intgroup[i]))
+  pred_label <- draw.pca.kmeans(mat=mat,all_k = NULL,obs_label=get_obs_label(phe,intgroup[i]),
+                                pre_define=c('WNT'='blue','SHH'='red','G4'='green'))
 }
 # Cluster analysis using Kmeans and plot result using PCA (pca+kmeans in 3D)
 for(i in 1:length(intgroup)){
@@ -116,10 +120,10 @@ for(i in 1:length(intgroup)){
 }
 # Pick one phenotype column "subgroup" from the demo eset and show various plots NetBID2 can create
 use_int <- 'subgroup'
-pred_label <- draw.pca.kmeans(mat=mat,all_k = NULL,obs_label=get_obs_label(phe,use_int),plot_type='2D')
-pred_label <- draw.pca.kmeans(mat=mat,all_k = NULL,obs_label=get_obs_label(phe,use_int),plot_type='2D.ellipse')
-pred_label <- draw.pca.kmeans(mat=mat,all_k = NULL,obs_label=get_obs_label(phe,use_int),plot_type='2D.text')
-pred_label <- draw.pca.kmeans(mat=mat,all_k = NULL,obs_label=get_obs_label(phe,use_int),plot_type='3D')
+pred_label <- draw.pca.kmeans(mat=mat,all_k = NULL,obs_label=get_obs_label(phe,use_int),plot_type='2D',pre_define=c('WNT'='blue','SHH'='red','G4'='green'))
+pred_label <- draw.pca.kmeans(mat=mat,all_k = NULL,obs_label=get_obs_label(phe,use_int),plot_type='2D.ellipse',pre_define=c('WNT'='blue','SHH'='red','G4'='green'))
+pred_label <- draw.pca.kmeans(mat=mat,all_k = NULL,obs_label=get_obs_label(phe,use_int),plot_type='2D.text',pre_define=c('WNT'='blue','SHH'='red','G4'='green'))
+pred_label <- draw.pca.kmeans(mat=mat,all_k = NULL,obs_label=get_obs_label(phe,use_int),plot_type='3D',pre_define=c('WNT'='blue','SHH'='red','G4'='green'))
 print(table(list(pred_label=pred_label,obs_label=get_obs_label(phe, use_int))))
 draw.clustComp(pred_label,obs_label=get_obs_label(phe,use_int),outlier_cex=1,low_K=10)
 

diff --git a/docs/docs/driver_estimation/driver_estimation.md b/docs/docs/driver_estimation/driver_estimation.md
@@ -419,11 +419,12 @@ NetBID.saveRData(analysis.par=analysis.par,step='ms-tab')
 
 - `out2excel()` can save multiple master tables as excel sheets in one EXCEL file.
 - For one master table, it consists of three parts:
-  - The first six columns are `gene_label`, `geneSymbol`, `originalID`, `originalID_label`, `funcType` and `Size`.
+  - The first six columns are `originalID_label`, `originalID`, `gene_label`, `geneSymbol`, `funcType` and `Size`.
+    - `originalID_label` is the original ID type with suffix "_TF" or "_SIG", which should match the the ID type in `analysis.par$merge.network`, 
+    - `originalID` is the original ID type used in network construction, which should match the ID type in `analysis.par$cal.eset`, `analysis.par$DE`.
     - `gene_label` is the driver's gene symbol or transcript symbol, with suffix "_TF" or "_SIG" to show driver's type. 
     - `geneSymbol` is the driver's gene symbol or transcript symbol, without suffix.
-    - `originalID` is the original ID type used in network construction, which should match the ID type in `analysis.par$cal.eset`, `analysis.par$DE`.
-    - `originalID_label` is the original ID type with suffix "_TF" or "_SIG", which should match the the ID type in `analysis.par$merge.network`, `analysis.par$merge.ac.eset`,`analysis.par$DA`.
+`analysis.par$merge.ac.eset`,`analysis.par$DA`.
     - **`originalID_label`** is the only column to ensure unique ID for row record.
     - `funcType` is either "TF" or "SIG" to mark driver's type. 
     - `Size` is number of target genes for the driver. 

diff --git a/inst/Rmd/eset_QC.Rmd b/inst/Rmd/eset_QC.Rmd
@@ -136,7 +136,7 @@ if('correlation' %in% choose_plot){
 
 ```{r echo=FALSE,fig.width=8, fig.height=8}
 if('meansd' %in% choose_plot){
-  draw.meanSdPlot(eset)
+  res <- draw.meanSdPlot(eset)
 }else{
   message('No selection for MeanSd plot!')
 }

diff --git a/man/test.targetNet.overlap.Rd b/man/test.targetNet.overlap.Rd