diff --git a/NetBID2_0.1.2.pdf b/NetBID2_0.1.2.pdf index f2b5599..d6f59c0 100755 Binary files a/NetBID2_0.1.2.pdf and b/NetBID2_0.1.2.pdf differ diff --git a/R/pipeline_functions.R b/R/pipeline_functions.R index 4721407..dc56448 100755 --- a/R/pipeline_functions.R +++ b/R/pipeline_functions.R @@ -2618,8 +2618,10 @@ generate.masterTable <- function(use_comp=NULL,DE=NULL,DA=NULL, gene_label <-base::paste(tmp1[rn,'external_gene_name'],funcType,sep = '_') } # - label_info <- data.frame('gene_label'=gene_label,'geneSymbol'=geneSymbol, - 'originalID'=rn,'originalID_label'=rn_label,'funcType'=funcType,'Size'=use_size,stringsAsFactors=FALSE) + #label_info <- data.frame('gene_label'=gene_label,'geneSymbol'=geneSymbol, + # 'originalID'=rn,'originalID_label'=rn_label,'funcType'=funcType,'Size'=use_size,stringsAsFactors=FALSE) + label_info <- data.frame('originalID_label'=rn_label,'originalID'=rn,'gene_label'=gene_label,'geneSymbol'=geneSymbol, + 'funcType'=funcType,'Size'=use_size,stringsAsFactors=FALSE) add_info <- tmp1[rn,] # combine_info <- lapply(use_comp,function(x){ @@ -5692,7 +5694,7 @@ draw.funcEnrich.cluster <- function(funcEnrich_res=NULL,top_number=30,Pv_col='Or funcEnrich_res <- funcEnrich_res[which(funcEnrich_res[,Pv_col]<=Pv_thre),] if(nrow(funcEnrich_res)>top_number) funcEnrich_res <- funcEnrich_res[1:top_number,] pv_val <- funcEnrich_res[,Pv_col]; names(pv_val) <- rownames(funcEnrich_res) - all_g2s <- sapply(funcEnrich_res[,item_col],function(x1)unlist(strsplit(x1,';'))) + all_g2s <- lapply(funcEnrich_res[,item_col],function(x1)unlist(strsplit(x1,';'))) names(all_g2s) <- funcEnrich_res[,name_col] mat1 <- t(list2mat(all_g2s)) mat1 <- mat1[rev(funcEnrich_res[,name_col]),] @@ -6355,10 +6357,11 @@ draw.GSEA <- function(rank_profile=NULL,use_genes=NULL,use_direction=NULL,main=" } annotation <- sprintf("%s\nKS test p-value:%s",es,pv) } + pp <- par()$usr if(es_res$RES[base::which.max(abs(es_res$RES))]>0) - graphics::text(r_len,base::max(y2)-par.char2pos()[2],annotation,pos=2,cex=annotation_cex,xpd=TRUE) + graphics::text(r_len,pp[4]-2*par.char2pos()[2],annotation,pos=2,cex=annotation_cex,xpd=TRUE) else - graphics::text(0,base::min(y2)+par.char2pos()[2],annotation,pos=4,cex=annotation_cex,xpd=TRUE) + graphics::text(0,pp[3]+2*par.char2pos()[2],annotation,pos=4,cex=annotation_cex,xpd=TRUE) } if(is.null(pdf_file)==FALSE){plot_part(ori=TRUE);dev.off()} else {plot_part()} return(TRUE) @@ -7346,7 +7349,7 @@ draw.targetNet <- function(source_label="",source_z=NULL,edge_score=NULL, label_cex=0.7,source_cex=1, pdf_file=NULL,arrow_direction='out',n_layer=1,alphabetical_order=FALSE){ # - all_input_para <- c('source_label','source_z','edge_score','label_cex','source_cex', + all_input_para <- c('source_label','edge_score','label_cex','source_cex', 'arrow_direction','n_layer','alphabetical_order') check_res <- sapply(all_input_para,function(x)check_para(x,envir=environment())) if(min(check_res)==0){message('Please check and re-try!');return(FALSE)} diff --git a/README.md b/README.md index 6f2e890..ccad1db 100755 --- a/README.md +++ b/README.md @@ -61,7 +61,7 @@ devtools::install_deps(pkg = ".", dependencies = TRUE) ## Install package depend download the directory to your workspace and then run: ```R -install.packages('NetBID2_0.1.2.tar.gz',repos=NULL,dependencies=TRUE) ## +devtools::install_local('NetBID2_0.1.2.tar.gz') ## ``` # Manual & Tutorial diff --git a/demo_scripts/analysis_and_plot_demo1.R b/demo_scripts/analysis_and_plot_demo1.R index f0d7abf..36c1d08 100755 --- a/demo_scripts/analysis_and_plot_demo1.R +++ b/demo_scripts/analysis_and_plot_demo1.R @@ -110,13 +110,15 @@ draw.heatmap(mat=exp_mat,use_genes=ms_tab[driver_list,'originalID'],use_gene_lab use_samples=colnames(exp_mat),use_sample_label=phe_info[colnames(exp_mat),'geo_accession'], phenotype_info=phe_info,use_phe=c('gender','pathology','subgroup','age'),main='Expression for Top drivers',scale='row', cluster_rows=TRUE,cluster_columns=TRUE,clustering_distance_rows='pearson',clustering_distance_columns='pearson', - row_names_gp = gpar(fontsize = 12),pdf_file=sprintf('%s/heatmap_demo1.pdf',analysis.par$out.dir.PLOT)) + row_names_gp = gpar(fontsize = 12),pdf_file=sprintf('%s/heatmap_demo1.pdf',analysis.par$out.dir.PLOT), + pre_define=c('WNT'='blue','SHH'='red','G4'='green')) draw.heatmap(mat=ac_mat,use_genes=driver_list,use_gene_label=ms_tab[driver_list,'gene_label'], use_samples=colnames(exp_mat),use_sample_label=phe_info[colnames(exp_mat),'geo_accession'], phenotype_info=phe_info,use_phe=c('gender','pathology','subgroup','age'),main='Activity for Top drivers',scale='row', cluster_rows=TRUE,cluster_columns=TRUE,clustering_distance_rows='pearson',clustering_distance_columns='pearson', - row_names_gp = gpar(fontsize = 12),pdf_file=sprintf('%s/heatmap_demo2.pdf',analysis.par$out.dir.PLOT)) + row_names_gp = gpar(fontsize = 12),pdf_file=sprintf('%s/heatmap_demo2.pdf',analysis.par$out.dir.PLOT), + pre_define=c('WNT'='blue','SHH'='red','G4'='green')) # Draw heatmaps using top DE genes gene_list <- rownames(sig_gene) @@ -124,13 +126,15 @@ draw.heatmap(mat=exp_mat,use_genes=ms_tab[gene_list,'originalID'],use_gene_label use_samples=colnames(exp_mat),use_sample_label=phe_info[colnames(exp_mat),'geo_accession'], phenotype_info=phe_info,use_phe=c('gender','pathology','subgroup','age'),main='Expression for Top drivers',scale='row', cluster_rows=TRUE,cluster_columns=TRUE,clustering_distance_rows='pearson',clustering_distance_columns='pearson', - row_names_gp = gpar(fontsize = 12),pdf_file=sprintf('%s/heatmap_demo3.pdf',analysis.par$out.dir.PLOT)) + row_names_gp = gpar(fontsize = 12),pdf_file=sprintf('%s/heatmap_demo3.pdf',analysis.par$out.dir.PLOT), + pre_define=c('WNT'='blue','SHH'='red','G4'='green')) draw.heatmap(mat=ac_mat,use_genes= gene_list,use_gene_label=ms_tab[gene_list,'gene_label'], use_samples=colnames(exp_mat),use_sample_label=phe_info[colnames(exp_mat),'geo_accession'], phenotype_info=phe_info,use_phe=c('gender','pathology','subgroup','age'),main='Activity for Top drivers',scale='row', cluster_rows=TRUE,cluster_columns=TRUE,clustering_distance_rows='pearson',clustering_distance_columns='pearson', - row_names_gp = gpar(fontsize = 12),pdf_file=sprintf('%s/heatmap_demo4.pdf',analysis.par$out.dir.PLOT)) + row_names_gp = gpar(fontsize = 12),pdf_file=sprintf('%s/heatmap_demo4.pdf',analysis.par$out.dir.PLOT), + pre_define=c('WNT'='blue','SHH'='red','G4'='green')) ### QI.4: What are the biological functions of these top DA drivers ? @@ -262,16 +266,19 @@ test.targetNet.overlap(source1_label=ms_tab[use_driver,'gene_label'],source2_lab use_obs_class <- get_obs_label(phe_info = phe_info,'subgroup') draw.categoryValue(ac_val=ac_mat[use_driver,],exp_val=exp_mat[ms_tab[use_driver,'originalID'],],use_obs_class=use_obs_class, class_order=c('WNT','SHH','G4'),class_srt=30,main_ac = ms_tab[use_driver,'gene_label'],main_exp=ms_tab[use_driver,'geneSymbol'], - pdf_file=sprintf('%s/categoryValue_demo1.pdf',analysis.par$out.dir.PLOT)) + pdf_file=sprintf('%s/categoryValue_demo1.pdf',analysis.par$out.dir.PLOT), + pre_define=c('WNT'='blue','SHH'='red','G4'='green')) # draw.categoryValue(ac_val=ac_mat[use_driver,],exp_val=NULL,use_obs_class=use_obs_class,class_order=c('WNT','SHH','G4'), - pdf_file=sprintf('%s/categoryValue_demo2.pdf',analysis.par$out.dir.PLOT)) + pdf_file=sprintf('%s/categoryValue_demo2.pdf',analysis.par$out.dir.PLOT), + pre_define=c('WNT'='blue','SHH'='red','G4'='green')) # use_obs_class <- get_obs_label(phe_info = phe_info,c('subgroup','gender')) draw.categoryValue(ac_val=ac_mat[use_driver,],exp_val=exp_mat[ms_tab[use_driver,'originalID'],],use_obs_class=use_obs_class, class_srt=30,main_ac = ms_tab[use_driver,'gene_label'],main_exp=ms_tab[use_driver,'geneSymbol'], - pdf_file=sprintf('%s/categoryValue_demo3.pdf',analysis.par$out.dir.PLOT)) + pdf_file=sprintf('%s/categoryValue_demo3.pdf',analysis.par$out.dir.PLOT), + pre_define=c('WNT'='blue','SHH'='red','G4'='green')) ### QII.4: What are the functions of the target genes of this selected driver ? @@ -317,7 +324,8 @@ sig_gs <- draw.volcanoPlot(dat= DA_gs_bid,label_col='ID',logFC_col='logFC', # Draw heatmap for top significant gene sets draw.heatmap(mat=ac_gs[sig_gs$ID,],pdf_file=sprintf('%s/heatmap_GS.pdf',analysis.par$out.dir.PLOT),scale='row', - phenotype_info=phe_info,use_phe=c('gender','subgroup')) + phenotype_info=phe_info,use_phe=c('gender','subgroup'), + pre_define=c('WNT'='blue','SHH'='red','G4'='green')) # Draw GSEA plot for top significant gene sets DE <- analysis.par$DE[[comp_name]] @@ -354,7 +362,8 @@ draw.GSEA(rank_profile=DE_profile,use_genes=use_target_genes, use_obs_class <- get_obs_label(phe_info = phe_info,'subgroup') draw.categoryValue(ac_val=ac_gs[use_gs,],use_obs_class=use_obs_class, class_order=c('WNT','SHH','G4'),class_srt=30,pdf_file=sprintf('%s/categoryValue_GS_demo1.pdf',analysis.par$out.dir.PLOT), - main_ac= use_gs,main_cex=0.8) + main_ac= use_gs,main_cex=0.8, + pre_define=c('WNT'='blue','SHH'='red','G4'='green')) ### QIII.2: How to find drivers share significantly overlapped target genes ? diff --git a/demo_scripts/pipeline_analysis_demo1.R b/demo_scripts/pipeline_analysis_demo1.R index ca1a609..842e582 100755 --- a/demo_scripts/pipeline_analysis_demo1.R +++ b/demo_scripts/pipeline_analysis_demo1.R @@ -54,7 +54,9 @@ analysis.par$merge.ac.eset <- generate.eset(exp_mat=ac_mat,phenotype_info=pData( feature_info=NULL,annotation_info='activity in net-dataset') # QC plot for activity eset -draw.eset.QC(analysis.par$merge.ac.eset,outdir=analysis.par$out.dir.QC,intgroup=NULL,do.logtransform=FALSE,prefix='AC_') +draw.eset.QC(analysis.par$merge.ac.eset,outdir=analysis.par$out.dir.QC,intgroup=NULL,do.logtransform=FALSE,prefix='AC_', + pre_define=c('WNT'='blue','SHH'='red','G4'='green'),pca_plot_type='2D.interactive') + # Save Step 2 analysis.par as RData NetBID.saveRData(analysis.par=analysis.par,step='act-get') diff --git a/demo_scripts/pipeline_network_demo1.R b/demo_scripts/pipeline_network_demo1.R index 9a7f25b..74b4d33 100755 --- a/demo_scripts/pipeline_network_demo1.R +++ b/demo_scripts/pipeline_network_demo1.R @@ -29,7 +29,8 @@ net_eset <- update_eset.phenotype(use_eset=net_eset,use_phenotype_info=pData(net network.par$net.eset <- net_eset # QC for the raw eset -draw.eset.QC(network.par$net.eset,outdir=network.par$out.dir.QC,intgroup=NULL,do.logtransform=FALSE,prefix='beforeQC_') +draw.eset.QC(network.par$net.eset,outdir=network.par$out.dir.QC,intgroup=NULL,do.logtransform=FALSE,prefix='beforeQC_', + pre_define=c('WNT'='blue','SHH'='red','G4'='green'),pca_plot_type='2D.interactive') # Save Step 1 network.par as RData NetBID.saveRData(network.par = network.par,step='exp-load') @@ -73,7 +74,8 @@ net_eset <- generate.eset(exp_mat=mat, phenotype_info=pData(network.par$net.eset network.par$net.eset <- net_eset # QC for the normalized eset -draw.eset.QC(network.par$net.eset,outdir=network.par$out.dir.QC,intgroup=NULL,do.logtransform=FALSE,prefix='afterQC_') +draw.eset.QC(network.par$net.eset,outdir=network.par$out.dir.QC,intgroup=NULL,do.logtransform=FALSE,prefix='afterQC_', + pre_define=c('WNT'='blue','SHH'='red','G4'='green'),pca_plot_type='2D.interactive') # Save Step 2 network.par as RData NetBID.saveRData(network.par = network.par,step='exp-QC') @@ -94,7 +96,8 @@ mat <- mat[choose1,] tmp_net_eset <- generate.eset(exp_mat=mat, phenotype_info=pData(network.par$net.eset)[colnames(mat),], feature_info=fData(network.par$net.eset)[rownames(mat),], annotation_info=annotation(network.par$net.eset)) # QC plot for IQR filtered eset -draw.eset.QC(tmp_net_eset,outdir=network.par$out.dir.QC,intgroup=NULL,do.logtransform=FALSE,prefix='Cluster_') +draw.eset.QC(tmp_net_eset,outdir=network.par$out.dir.QC,intgroup=NULL,do.logtransform=FALSE,prefix='Cluster_', + pre_define=c('WNT'='blue','SHH'='red','G4'='green'),pca_plot_type='2D.interactive') # The following scripts provide various ways to visualize and check if the IQR filter selected genes # can be used to perform good sample cluster analysis (predicted labels vs. real labels). Figures will be displayed instead of saving as files. @@ -106,7 +109,8 @@ intgroup <- get_int_group(network.par$net.eset) # Cluster analysis using Kmean and plot result using PCA biplot (pca+kmeans in 2D) for(i in 1:length(intgroup)){ print(intgroup[i]) - pred_label <- draw.pca.kmeans(mat=mat,all_k = NULL,obs_label=get_obs_label(phe,intgroup[i])) + pred_label <- draw.pca.kmeans(mat=mat,all_k = NULL,obs_label=get_obs_label(phe,intgroup[i]), + pre_define=c('WNT'='blue','SHH'='red','G4'='green')) } # Cluster analysis using Kmeans and plot result using PCA (pca+kmeans in 3D) for(i in 1:length(intgroup)){ @@ -116,10 +120,10 @@ for(i in 1:length(intgroup)){ } # Pick one phenotype column "subgroup" from the demo eset and show various plots NetBID2 can create use_int <- 'subgroup' -pred_label <- draw.pca.kmeans(mat=mat,all_k = NULL,obs_label=get_obs_label(phe,use_int),plot_type='2D') -pred_label <- draw.pca.kmeans(mat=mat,all_k = NULL,obs_label=get_obs_label(phe,use_int),plot_type='2D.ellipse') -pred_label <- draw.pca.kmeans(mat=mat,all_k = NULL,obs_label=get_obs_label(phe,use_int),plot_type='2D.text') -pred_label <- draw.pca.kmeans(mat=mat,all_k = NULL,obs_label=get_obs_label(phe,use_int),plot_type='3D') +pred_label <- draw.pca.kmeans(mat=mat,all_k = NULL,obs_label=get_obs_label(phe,use_int),plot_type='2D',pre_define=c('WNT'='blue','SHH'='red','G4'='green')) +pred_label <- draw.pca.kmeans(mat=mat,all_k = NULL,obs_label=get_obs_label(phe,use_int),plot_type='2D.ellipse',pre_define=c('WNT'='blue','SHH'='red','G4'='green')) +pred_label <- draw.pca.kmeans(mat=mat,all_k = NULL,obs_label=get_obs_label(phe,use_int),plot_type='2D.text',pre_define=c('WNT'='blue','SHH'='red','G4'='green')) +pred_label <- draw.pca.kmeans(mat=mat,all_k = NULL,obs_label=get_obs_label(phe,use_int),plot_type='3D',pre_define=c('WNT'='blue','SHH'='red','G4'='green')) print(table(list(pred_label=pred_label,obs_label=get_obs_label(phe, use_int)))) draw.clustComp(pred_label,obs_label=get_obs_label(phe,use_int),outlier_cex=1,low_K=10) diff --git a/docs/docs/driver_estimation/driver_estimation.md b/docs/docs/driver_estimation/driver_estimation.md index 6017427..94c37db 100755 --- a/docs/docs/driver_estimation/driver_estimation.md +++ b/docs/docs/driver_estimation/driver_estimation.md @@ -419,11 +419,12 @@ NetBID.saveRData(analysis.par=analysis.par,step='ms-tab') - `out2excel()` can save multiple master tables as excel sheets in one EXCEL file. - For one master table, it consists of three parts: - - The first six columns are `gene_label`, `geneSymbol`, `originalID`, `originalID_label`, `funcType` and `Size`. + - The first six columns are `originalID_label`, `originalID`, `gene_label`, `geneSymbol`, `funcType` and `Size`. + - `originalID_label` is the original ID type with suffix "_TF" or "_SIG", which should match the the ID type in `analysis.par$merge.network`, + - `originalID` is the original ID type used in network construction, which should match the ID type in `analysis.par$cal.eset`, `analysis.par$DE`. - `gene_label` is the driver's gene symbol or transcript symbol, with suffix "_TF" or "_SIG" to show driver's type. - `geneSymbol` is the driver's gene symbol or transcript symbol, without suffix. - - `originalID` is the original ID type used in network construction, which should match the ID type in `analysis.par$cal.eset`, `analysis.par$DE`. - - `originalID_label` is the original ID type with suffix "_TF" or "_SIG", which should match the the ID type in `analysis.par$merge.network`, `analysis.par$merge.ac.eset`,`analysis.par$DA`. +`analysis.par$merge.ac.eset`,`analysis.par$DA`. - **`originalID_label`** is the only column to ensure unique ID for row record. - `funcType` is either "TF" or "SIG" to mark driver's type. - `Size` is number of target genes for the driver. diff --git a/inst/Rmd/eset_QC.Rmd b/inst/Rmd/eset_QC.Rmd index 93848d1..d8f67d8 100755 --- a/inst/Rmd/eset_QC.Rmd +++ b/inst/Rmd/eset_QC.Rmd @@ -136,7 +136,7 @@ if('correlation' %in% choose_plot){ ```{r echo=FALSE,fig.width=8, fig.height=8} if('meansd' %in% choose_plot){ - draw.meanSdPlot(eset) + res <- draw.meanSdPlot(eset) }else{ message('No selection for MeanSd plot!') } diff --git a/man/test.targetNet.overlap.Rd b/man/test.targetNet.overlap.Rd index 698a087..d6387a7 100755 --- a/man/test.targetNet.overlap.Rd +++ b/man/test.targetNet.overlap.Rd @@ -20,7 +20,7 @@ test.targetNet.overlap(source1_label = NULL, source2_label = NULL, If input is a vector of characters, it is the background list of all possible target genes.} } \value{ -Return statistics of the testing, including the \code{P.Value}, \code{Odds_Ratio} and \code{Intersected_Nuumber}. +Return statistics of the testing, including the \code{P.Value}, \code{Odds_Ratio} and \code{Intersected_Number}. } \description{ \code{test.targetNet.overlap} performs Fisher's exact test to see whether the target genes from two drivers are significantly intersected.