AlexsLemonade · maud-p · Aug 9, 2024 · Aug 9, 2024 · Aug 9, 2024 · Aug 9, 2024
@@ -0,0 +1,62 @@
+#!/usr/bin/env Rscript
+
+# Run the Label transfer from two fetal references ------------------------------
+
+# get list of samples in the library --------------------------------------------
+root_dir <- rprojroot::find_root(rprojroot::is_git_root)
+sample_metadata_file <- file.path(root_dir, "data", "current", "SCPCP000006", "single_cell_metadata.tsv")
+metadata <- read.table(sample_metadata_file, sep = "\t", header = TRUE)
+
+# set path to this module--------------------------------------------------------
+module_base <- file.path(root_dir, "analyses", "cell-type-wilms-tumor-06")
+
+# Download and create the fetal kidney reference (Stewart et al) ----------
+source(file.path(module_base,"scripts", "download-and-create-fetal-kidney-ref.R"))
+
+# Characterize the two fetal references -----------------------------------------
+
+# Characterize the fetal full reference (Cao et al.)
+# To be done, next PR
+
+# Characterize the fetal kidney reference (Stewart et al.)
+rmarkdown::render(input = file.path(module_base, "notebook_template", "00b_characterize_fetal_kidney_reference_Stewart.Rmd"),
+                  output_format = "html_document",
+                  output_file = "00b_characterization_fetal_kidney_reference_Stewart.html",
+                  output_dir = file.path(module_base, "notebook","00-reference"))
+
-# Characterize the fetal kidney reference (Stewart et al.)
-rmarkdown::render(input = file.path(module_base, "notebook_template", "00b_characterize_fetal_kidney_reference_Stewart.Rmd"),
-                  output_format = "html_document",
-                  output_file = "00b_characterization_fetal_kidney_reference_Stewart.html",
-                  output_dir = file.path(module_base, "notebook","00-reference"))
+reference_notebook <- "00b_characterize_fetal_kidney_reference_Stewart"
+# Characterize the fetal kidney reference (Stewart et al.)
+rmarkdown::render(input = file.path(notebook_template_dir, paste0(reference_notebook, ".Rmd"),
+                  output_format = "html_document",
+                  output_file = paste0(reference_notebook, ".html"),
+                  output_dir = file.path(notebook_output_dir, "00-reference"))
+
-# Characterize the fetal kidney reference (Stewart et al.)
-rmarkdown::render(input = file.path(module_base, "notebook_template", "00b_characterize_fetal_kidney_reference_Stewart.Rmd"),
-                  output_format = "html_document",
-                  output_file = "00b_characterization_fetal_kidney_reference_Stewart.html",
-                  output_dir = file.path(module_base, "notebook","00-reference"))
+reference_notebook <- "00b_characterize_fetal_kidney_reference_Stewart"
+# Characterize the fetal kidney reference (Stewart et al.)
+rmarkdown::render(input = file.path(notebook_template_dir, paste0(reference_notebook, ".Rmd"),
+                  output_format = "html_document",
+                  output_file = paste0(reference_notebook, ".html"),
+                  output_dir = file.path(notebook_output_dir, "00-reference"))
+
+
+# Run the workflow for (all) samples in the project -----------------------------
+for (i in metadata$scpca_sample_id[1:11]) {
+
+  # create a directory to save the pre-processed and labeled `Seurat` objects
+  dir.create(file.path(module_base, "results", i))
+  # create a directory to save the notebooks
+  dir.create(file.path(module_base, "notebook", i))
+
+
+  # Pre-process the data - `Seurat` workflow
+  rmarkdown::render(input = file.path(module_base, "notebook_template", "01_seurat-processing.Rmd"),
+                    params = list(scpca_project_id = metadata$scpca_project_id[metadata$scpca_sample_id ==i], sample_id = i),
+                    output_format = "html_document",
+                    output_file = paste0("01_Clustering_",i, ".html"),
+                    output_dir = file.path(module_base, "notebook", i))
+
+  # Label transfer from the Cao reference using Azimuth
+  rmarkdown::render(input = file.path(module_base, "notebook_template", "02a_label-transfer_fetal_full_reference_Cao.Rmd"),
+                    params = list(scpca_project_id = metadata$scpca_project_id[metadata$scpca_sample_id ==i], sample_id = i),
+                    output_format = "html_document",
+                    output_file = paste0("02a_fetal_all_reference_Cao_",i, ".html"),
+                    output_dir = file.path(module_base, "notebook", i))
+
+  # Label transfer from the Stewart reference using Seurat
+  rmarkdown::render(input = file.path(module_base, "notebook_template", "02b_label-transfer_fetal_kidney_reference_Stewart.Rmd"),
+                    params = list(scpca_project_id = metadata$scpca_project_id[metadata$scpca_sample_id ==i],sample_id = i),
+                    output_format = "html_document",
+                    output_file = paste0("02b_fetal_kidney_reference_Stewart_",i, ".html"),
+                    output_dir = file.path(module_base, "notebook", i))
+
+
+  }
+
+
+
@@ -16,8 +16,8 @@ Based on the provided annotation, we would like to additionally provide a refere
 The analysis is/will be divided as the following:
 
 - [x] Metadata file: compilation of a metadata file of marker genes for expected cell types that will be used for validation at a later step
-- [ ] Script: clustering of cells across a set of parameters for few samples
-- [ ] Script: label transfer from the fetal kidney atlas reference using runAzimuth
+- [x] Script: clustering of cells across a set of parameters for few samples
+- [x] Script: label transfer from the fetal kidney atlas reference using runAzimuth
 - [ ] Script: run InferCNV
 - [ ] Notebook: explore results from steps 2 to 4 for about 5 to 10 samples
 - [ ] Script: compile scripts 2 to 4 in a RMardown file with required adjustements and render it across all samples
@@ -67,11 +67,22 @@ Some differenices are expected, some marker genes or pathways are associated wit
 
 ## Output files
 
+for each of the steps, we have two types of `output`:
+
+- the `notebook` saved in the `notebook` directory, with a subfolder for each sample. 
+
+- the created objects saved in `results` directory, with a subfolder for each sample. 
+
+
+# Analysis
+
 ## Marker sets
 
-This folder is a resource for later validation of the annotated cell types.
+We first build a resource for later validation of the annotated cell types. 
+We gather from the litterature marker genes and specific genomic alterations that could help us characterizing the Wilms tumor ecosystem, including cancer and non-cancer cells. 
 
 ### The table CellType_metadata.csv contains the following column and information:
+
 - "gene_symbol" contains the symbol of the described gene, using the HUGO Gene Nomenclature
 - ENSEMBL_ID contains the stable identifier from the ENSEMBL database
 - cell_class is either "malignant" for marker genes specific to malignant population, or "non-malignant" for markers genes specific to non-malignant tissue or "both" for marker genes that can be found in malignant as well as non-malignant tissue but are still informative in respect to the cell type.
@@ -106,6 +117,7 @@ This folder is a resource for later validation of the annotated cell types.
 
 
 ### The table GeneticAlterations_metadata.csv contains the following column and information:
+
 - alteration contains the number and portion of the affected chromosome
 - gain_loss contains the information regarding the gain or loss of the corresponding genetic alteration
 - cell_class is "malignant"
@@ -122,6 +134,61 @@ This folder is a resource for later validation of the annotated cell types.
 |1q|gain|malignant|NA|10.1016/S0002-9440(10)63982-X|NA|Associated_with_relapse|
 
 
+## Clustering and label transfer from fetal references
+
+R Script to be rendered : `00_run_workflow.R`
+
+### Introduction
+
+The `00_run_workflow.R` contains the following steps:
+
+- define paths
+
+- download and create the fetal kidney reference: `download-and-create-fetal-kidney-ref.R` in `scripts`
+
+- characterize the fetal kidney reference: `00b_characterize_fetal_kidney_reference_Stewart.Rmd` in `notebook_template`
+
+- loop for each samples:
+
+-- `Seurat workflow`, nornalization and clustering: `01_seurat-processing.Rmd` in `notebook_template`
+-- `Azimuth` label transfer from the fetal full reference (Cao et al.) in `notebook_template`
+-- `Azimuth` label transfer from the fetal kidney reference (Stewart et al.) in `notebook_template`
+
+### Justification 
+
+The use of the right reference is crucial. 
+It is recommended that the cell types in the reference is representative to the cell types to be annotated in the query.
+
+Wilms tumors can contain up to three histologies that resemble fetal kidney: blastema, stroma, and epithelia [1-2].
+Because of their histological similarity to fetal kidneys, Wilms tumors are thought to arise from developmental derangements in embryonic renal progenitors.
+
+We thus decided to test and compare two fetal (kidney) references that could be use in the analysis module.
+
+##### Human fetal kidney atlas Stewart et al.
+
+We first wanted to try the human fetal kidney atlas to transfer label into the Wilms tumor samples using azimuth. 
+You can find more about the human kidney atlas here: https://www.kidneycellatlas.org/ [3]
+
+##### Human Azimuth fetal reference from Cao et al.
+
+Azimuth also provide a human fetal atlas as a reference [4]. 
+
+The data can be found on Zenodo: 
+https://zenodo.org/records/4738021#.YJIW4C2ZNQI
+
+The reference contain cells from 15 organs including kidney from fetal samples. 
+Here we will use `Azimuth` to transfer labels from the reference.
+
+### Input and outputs
+
+We start with the `_process.Rds` data to run `01_seurat-processing.Rmd`. 
+The output of `01_seurat-processing.Rmd` is saved in `results` in a subfolder for each sample and is the input of the second step `02a_label-transfer_fetal_full_reference_Cao.Rmd`.
+The output of `02a_label-transfer_fetal_full_reference_Cao.Rmd` is then the input of `02b_label-transfer_fetal_kidney_reference_Stewart.Rmd`.
+
+At the end of the workflow, we have a `Seurat`object that contains:
+- normalization and clustering, dimensional reductions
+- label transfer from the fetal full reference
+- label transfer from the fetal kidney reference
 
 ## Software requirements
 
@@ -178,3 +245,13 @@ The `renv` lockfile is used to install R packages in the Docker image.
 ## Computational resources
 
 
+## References 
+
+- [1] https://www.ncbi.nlm.nih.gov/books/NBK373356/ 
+
+- [2] https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9915828/ 
+
+- [3] https://www.science.org/doi/10.1126/science.aat5031 
+
+- [4] https://www.science.org/doi/10.1126/science.aba7721
+
diff --git a/analyses/cell-type-wilms-tumor-06/notebook/SCPCS000168/01_Clustering_SCPCS000168.html b/analyses/cell-type-wilms-tumor-06/notebook/SCPCS000168/01_Clustering_SCPCS000168.html
diff --git a/...ell-type-wilms-tumor-06/notebook/SCPCS000168/02a_fetal_all_reference_Cao_SCPCS000168.html b/...ell-type-wilms-tumor-06/notebook/SCPCS000168/02a_fetal_all_reference_Cao_SCPCS000168.html
diff --git a/...e-wilms-tumor-06/notebook/SCPCS000168/02b_fetal_kidney_reference_Stewart_SCPCS000168.html b/...e-wilms-tumor-06/notebook/SCPCS000168/02b_fetal_kidney_reference_Stewart_SCPCS000168.html
@@ -0,0 +1,201 @@
+---
+title: "Characterize the fetal kidney reference from the kidney cell atlas"
+author: "Maud PLASCHKA"
+date: '2024-08-07'
+params:
+  url: https://datasets.cellxgene.cziscience.com/40ebb8e4-1a25-4a33-b8ff-02d1156e4e9b.rds
+  padj_threshold: 0.05
+  lfc_threshold: 1
+  rate1_threshold: 0.5
+  seed: 12345
+output: 
+  html_document: 
+    toc: yes
+    toc_float: yes
+    code_folding: hide
+    highlight: pygments
+    df_print: paged
+    self_contained: yes
+    mode: selfcontained
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE,
+                      message=FALSE,
+                      warnings=FALSE)
+```
+
+
+# Introduction
+
+The aim is to characterize the human fetal kidney from the kidney cell atlas.
+You can find more about the human kidney atlas here: https://www.kidneycellatlas.org/ [1]
+The rds data can be download using the download link https://datasets.cellxgene.cziscience.com/40ebb8e4-1a25-4a33-b8ff-02d1156e4e9b.rds
+The azimuth compatible reference has been downloaded and created in the `R script` `download-and-create-fetal-kidney-ref.R`
+
+## Packages
+
+Load required packages in the following chunk, if needed.
+Do not install packages here; only load them with the `library()` function.
+
+```{r packages, message=FALSE, warning=FALSE}
+library("Seurat")
+library(Azimuth)
+library(SCpubr)
+library(tidyverse)
+library(patchwork)
+
+set.seed(params$seed)
+options(future.globals.maxSize= 891289600000)
+```
+
+
+## Base directories
+
+```{r base paths, eval=TRUE, include=TRUE}
+# The base path for the OpenScPCA repository, found by its (hidden) .git directory
+repository_base <- rprojroot::find_root(rprojroot::is_git_root)
+
+# The path to this module
+module_base <- file.path(repository_base, "analyses", "cell-type-wilms-tumor-06")
+
+```
+
+
+## Input files
+
+The input file is the output of the `R script` `download-and-create-fetal-kidney-ref.R`
+
+```{r path_to_data}
+path_to_data <- file.path(
+  module_base,
+  "scratch",
+  "fetal_kidney.rds"
+)
+download.file(url = params$url, destfile = path_to_data)
+```
+
+## Output file
+
+We will save the result of the differential expression analysis in results/references/00b_marker_genes_fetal_kidney_Stewart.csv
+Notebook is saved in the `notebook/00-reference` directory
+
+```{r path_to_output}
+path_to_output <- file.path(module_base, "results", "references")
+```
+
+# Analysis
+
+## Load the reference
+
+```{r pre_process, echo=TRUE, fig.height=7, fig.width=12, message=FALSE, warning=FALSE, out.width='100%'}
+fetal_kidney <- readRDS(path_to_data)
+
+d1 <- do_DimPlot(fetal_kidney, reduction = "umap", dims = c(1,2), group.by = "compartment", label = TRUE, repel = TRUE) + NoLegend() 
+d2 <- do_DimPlot(fetal_kidney, reduction = "umap", dims = c(1,2), group.by = "cell_type", label = TRUE, repel = TRUE) + NoLegend()
+
+d1 | d2
+```
+
+## Characterization of compartment and cell types in the reference
+
+Here, we use an unbiased approach to find transcripts that characterized the different compartments and cell types.
+
+This is just to get markers genes of the different population, in case some could be of interest for the Wilms tumor annotations. 
+
+We run DElegate::FindAllMarkers2 to find markers of the different clusters and manually check if they do make sense. 
+DElegate::FindAllMarkers2 is an improved version of Seurat::FindAllMarkers based on pseudobulk differential expression method. 
+Please check the preprint from Chistoph Hafemeister: https://www.biorxiv.org/content/10.1101/2023.03.28.534443v1
+and tool described here: https://github.com/cancerbits/DElegate 
+
+### Find marker genes for each of the compartment
+
+
+```{r markers_compatment, fig.width=8, fig.height=7, out.width='100%'}
+de_results   <- DElegate::FindAllMarkers2(fetal_kidney, group_column = "compartment",)
+
+#filter the most relevant markers
+s.markers <- de_results[de_results$padj < params$padj_threshold & de_results$log_fc > params$lfc_threshold & de_results$rate1 > params$rate1_threshold,]
+
+DT::datatable(s.markers, caption = ("marker genes"), 
+              extensions = 'Buttons', 
+              options = list(  dom = 'Bfrtip',
+                               buttons = c( 'csv', 'excel')))
+
+# Select top 5 genes for heatmap plotting
+s.markers <- na.omit(s.markers)
+s.markers %>%
+    group_by(group1) %>%
+    top_n(n =  5, wt = log_fc) -> top5
+
+# subset for plotting
+Idents(fetal_kidney) <- fetal_kidney$compartment
+cells <- WhichCells(fetal_kidney, downsample = 100)
+ss <- subset(fetal_kidney, cells = cells)
+ss <- ScaleData(ss, features = top5$feature)
+
+p1 <- SCpubr::do_DimPlot(fetal_kidney, reduction="umap", group.by = "compartment", label = TRUE, repel = TRUE) + ggtitle("compartment")
+p2 <- DoHeatmap(ss, features = top5$feature,  cells = cells, group.by = "compartment") + NoLegend() + 
+  scale_fill_gradientn(colors =  c("#01665e","#35978f",'darkslategray3', "#f7f7f7", "#fee391","#fec44f","#F9AD03")) 
+p3 <- ggplot(fetal_kidney@meta.data, aes(compartment, fill = compartment)) + geom_bar() + NoLegend()
+
+
+common_title <- sprintf("Unsupervised clustering %s, %d cells", fetal_kidney@meta.data$orig.ident[1], ncol(fetal_kidney))
+show((((p1 / p3) + plot_layout(heights = c(3,2)) | p2) ) + plot_layout(widths = c(1, 2)) + plot_layout(heights = c(3,1)) + plot_annotation(title = common_title))
+
+write_csv(de_results, file = file.path(path_to_output, "00a_marker_compartment_fetal_kidney_Stewart.csv"))
+
+
+```
+
+
+### Find marker genes for each of the cell types
+
+
+```{r markers_cell, fig.width=17, fig.height=20, out.width='100%'}
+de_results   <- DElegate::FindAllMarkers2(fetal_kidney, group_column = "cell_type")
+
+#filter the most relevant markers
+s.markers <- de_results[de_results$padj < params$padj_threshold & de_results$log_fc > params$lfc_threshold & de_results$rate1 > params$rate1_threshold,]
+
+
+DT::datatable(s.markers, caption = ("marker genes"), 
+              extensions = 'Buttons', 
+              options = list(  dom = 'Bfrtip',
+                               buttons = c( 'csv', 'excel')))
+
+# Select top 5 genes for heatmap plotting
+s.markers <- na.omit(s.markers)
+s.markers %>%
+    group_by(group1) %>%
+    top_n(n =  5, wt = log_fc) -> top5
+
+# subset for plotting
+Idents(fetal_kidney) <- fetal_kidney$cell_type
+cells <- WhichCells(fetal_kidney, downsample = 100)
+ss <- subset(fetal_kidney, cells = cells)
+ss <- ScaleData(ss, features = top5$feature)
+
+p1 <- SCpubr::do_DimPlot(fetal_kidney, reduction="umap", group.by = "cell_type", label = TRUE, repel = TRUE) + ggtitle("cell_type") + NoLegend()
+p2 <- DoHeatmap(ss, features = top5$feature,  cells = cells, group.by = "cell_type") + NoLegend() + 
+  scale_fill_gradientn(colors =  c("#01665e","#35978f",'darkslategray3', "#f7f7f7", "#fee391","#fec44f","#F9AD03")) 
+p3 <- ggplot(fetal_kidney@meta.data, aes(cell_type, fill = cell_type)) + geom_bar() + NoLegend() + scale_x_discrete(guide = guide_axis(angle = 90))
+
+
+common_title <- sprintf("Unsupervised clustering %s, %d cells", fetal_kidney@meta.data$orig.ident[1], ncol(fetal_kidney))
+show((((p1 / p3) + plot_layout(heights = c(3,2)) | p2) ) + plot_layout(widths = c(1, 1)) + plot_layout(heights = c(3,1)) + plot_annotation(title = common_title))
+
+write_csv( de_results, file = file.path(path_to_output, "00a_marker_cell-type_fetal_kidney_Stewart.csv"))
+
+```
+
+## References 
+
+- [1] https://www.science.org/doi/10.1126/science.aat5031 
+
+## Session info
+
+```{r }
+sessionInfo()
+```
+