AlexsLemonade · allyhawkins · Jan 31, 2025 · Jan 31, 2025 · Feb 3, 2025 · Feb 3, 2025
@@ -42,6 +42,7 @@ jobs:
         run: |
           conda activate openscpca-cell-type-ewings
           ./download-data.py --test-data --format "sce,anndata" --project SCPCP000015
+          ./download-results.py --test-data --modules merge-sce --projects SCPCP000015
 
       - name: Run analysis
         run: |

@@ -212,6 +212,7 @@ The full list of gene signatures used can be found in [the references `README.md
 
 `AUCell` is run for each gene signature, and AUC values along with the AUC threshold reported by `AUCell` are saved to a TSV file. 
 By default, `AUCell` is run with an `aucMaxRank` value equal to 1% of the detected genes in the processed object. 
+Results from `AUCell` will be generated for each library and for the merged object containing all samples in `SCPCP000015`. 
 
 ### Usage
 

@@ -3,7 +3,8 @@
 # This script is used to get AUC values for a set of EWS-FLI1 high/low gene signatures using AUCell
 # The input gene signatures are those saved in `references/gene_signatures` and MSigDB gene sets noted in `references/msigdb-gene-sets.tsv`
 
-# For each library, a TSV is saved with the following columns: 
+# AUCell is run on each library and the merged object 
+# The output is a TSV for each library/object with the following columns: 
 # `gene_set`, `barcodes`, `auc`, and `auc_threshold`
 
 # Usage: ./run-aucell-ews-signatures.sh
@@ -28,9 +29,13 @@ max_rank_threshold=${max_rank_threshold:-0.01}
 
 # set up input and output file paths
 data_dir="../../data/current/SCPCP000015"
+merged_sce_file="../../data/current/results/merge-sce/SCPCP000015/SCPCP000015_merged.rds"
+
 results_dir="results/aucell-ews-signatures"
 mkdir -p ${results_dir}
 
+merged_results_file="${results_dir}/SCPCP000015_auc-ews-gene-signatures.tsv"
+
 # gene signature directory 
 gene_signatures_dir="${module_dir}/references/gene_signatures"
 msigdb_geneset_file="${module_dir}/references/msigdb-gene-sets.tsv"
@@ -64,3 +69,16 @@ for sample_id in $sample_ids; do
           --seed $seed
       done
 done
+
+
+# run AUCell on merged object 
+echo "Running AUCell for merged object" 
+Rscript scripts/aucell-ews-signatures/01-aucell.R \
+    --sce_file $merged_sce_file \
+    --custom_geneset_dir $gene_signatures_dir \
+    --msigdb_genesets $msigdb_geneset_file \
+    --max_rank_threshold $max_rank_threshold \
+    --is_merged \
+    --output_file $merged_results_file \
+    --threads $threads \
+    --seed $seed
@@ -38,6 +38,12 @@ option_list <- list(
     help = "Percentage of genes detected to set as the `aucMaxRank`. 
       Must be a number between 0 and 1."
   ),
+  make_option(
+    opt_str = c("--is_merged"),
+    action = "store_true",
+    default = FALSE,
+    help = "Indicate whether or not the SCE object is a merged object."
+  ),
   make_option(
     opt_str = c("--output_file"),
     type = "character",
@@ -96,8 +102,19 @@ fs::dir_create(output_dir)
 sce <- readr::read_rds(opt$sce_file)
 
 # remove genes that are not detected from SCE object 
-genes_to_remove <- rowData(sce)$detected > 0 
-filtered_sce <- sce[genes_to_remove , ]
+if(!opt$is_merged){
+  genes_to_remove <- rowData(sce)$detected > 0 
+} else {
+
+  # if merged object then need to sum all columns to find genes not present in the object at all 
+  genes_to_remove <- rowData(sce) |> 
+    as.data.frame() |>
+    dplyr::select(ends_with("detected")) |> 
+    rowSums() > 0
+
+}
+
+filtered_sce <- sce[genes_to_remove , ] 
 
 # read in gene sets to use with msigdb
 genesets_df <- readr::read_tsv(opt$msigdb_genesets)
@@ -171,7 +188,9 @@ collection <- all_genes_list |>
 # Run AUCell -------------------------------------------------------------------
 
 # run AUCell
-counts_mtx <- counts(sce)
+counts_mtx <- counts(filtered_sce) |>
+  as("dgCMatrix") # account for merged objects not being sparse
+
 max_rank <- ceiling(opt$max_rank_threshold*nrow(counts_mtx))
 
 auc_results <- AUCell::AUCell_run(