-
Notifications
You must be signed in to change notification settings - Fork 18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adding 00-reference to build azimuth kidney reference #706
Changes from 12 commits
6a23d16
20f04e0
4691f7d
bd2a7e0
7fb795d
0f2bc73
557df2c
d02eaa8
9deb67c
3d8268e
3ab1e5e
dad3312
4282e29
6c70fbe
cc22690
7d60c65
b081722
c7deac5
470d073
a47e8c2
3d32a5b
f6daa9b
4c3ba05
9ce015e
b9f5eff
8eecdf5
430b080
c480e83
4eef88a
4cf4b70
ce31417
c8fffe1
b1f6b84
7060f5c
01b81b8
70d61b4
513f7de
1c3dda4
9aeda5b
3f7458f
9cd70dd
8c4e6f4
fd0a140
9abb234
0e55ed5
c01a3ad
0705141
4b761a4
268c648
f05dc97
0fcaaed
6d0f854
878dc04
f98da21
36fa88e
578f2bc
4d96949
41549c0
f6d6aec
036e799
0ed2e63
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,62 @@ | ||||||||||||||||||||||||||
#!/usr/bin/env Rscript | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
# Run the Label transfer from two fetal references ------------------------------ | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
# get list of samples in the library -------------------------------------------- | ||||||||||||||||||||||||||
root_dir <- rprojroot::find_root(rprojroot::is_git_root) | ||||||||||||||||||||||||||
sample_metadata_file <- file.path(root_dir, "data", "current", "SCPCP000006", "single_cell_metadata.tsv") | ||||||||||||||||||||||||||
maud-p marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||||
metadata <- read.table(sample_metadata_file, sep = "\t", header = TRUE) | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
# set path to this module-------------------------------------------------------- | ||||||||||||||||||||||||||
module_base <- file.path(root_dir, "analyses", "cell-type-wilms-tumor-06") | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
# Download and create the fetal kidney reference (Stewart et al) ---------- | ||||||||||||||||||||||||||
source(file.path(module_base,"scripts", "download-and-create-fetal-kidney-ref.R")) | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
# Characterize the two fetal references ----------------------------------------- | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
# Characterize the fetal full reference (Cao et al.) | ||||||||||||||||||||||||||
# To be done, next PR | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
maud-p marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||||
# Characterize the fetal kidney reference (Stewart et al.) | ||||||||||||||||||||||||||
rmarkdown::render(input = file.path(module_base, "notebook_template", "00b_characterize_fetal_kidney_reference_Stewart.Rmd"), | ||||||||||||||||||||||||||
output_format = "html_document", | ||||||||||||||||||||||||||
output_file = "00b_characterization_fetal_kidney_reference_Stewart.html", | ||||||||||||||||||||||||||
output_dir = file.path(module_base, "notebook","00-reference")) | ||||||||||||||||||||||||||
maud-p marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But what I would do to help avoid a bug where you are naming the output something different than the input is something like:
Suggested change
☝🏻 Combining that with the idea of saving the directories as variables |
||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
# Run the workflow for (all) samples in the project ----------------------------- | ||||||||||||||||||||||||||
for (i in metadata$scpca_sample_id[1:11]) { | ||||||||||||||||||||||||||
maud-p marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
# create a directory to save the pre-processed and labeled `Seurat` objects | ||||||||||||||||||||||||||
dir.create(file.path(module_base, "results", i)) | ||||||||||||||||||||||||||
# create a directory to save the notebooks | ||||||||||||||||||||||||||
dir.create(file.path(module_base, "notebook", i)) | ||||||||||||||||||||||||||
maud-p marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
# Pre-process the data - `Seurat` workflow | ||||||||||||||||||||||||||
rmarkdown::render(input = file.path(module_base, "notebook_template", "01_seurat-processing.Rmd"), | ||||||||||||||||||||||||||
params = list(scpca_project_id = metadata$scpca_project_id[metadata$scpca_sample_id ==i], sample_id = i), | ||||||||||||||||||||||||||
maud-p marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||||
output_format = "html_document", | ||||||||||||||||||||||||||
output_file = paste0("01_Clustering_",i, ".html"), | ||||||||||||||||||||||||||
output_dir = file.path(module_base, "notebook", i)) | ||||||||||||||||||||||||||
maud-p marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
# Label transfer from the Cao reference using Azimuth | ||||||||||||||||||||||||||
rmarkdown::render(input = file.path(module_base, "notebook_template", "02a_label-transfer_fetal_full_reference_Cao.Rmd"), | ||||||||||||||||||||||||||
params = list(scpca_project_id = metadata$scpca_project_id[metadata$scpca_sample_id ==i], sample_id = i), | ||||||||||||||||||||||||||
output_format = "html_document", | ||||||||||||||||||||||||||
output_file = paste0("02a_fetal_all_reference_Cao_",i, ".html"), | ||||||||||||||||||||||||||
output_dir = file.path(module_base, "notebook", i)) | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
# Label transfer from the Stewart reference using Seurat | ||||||||||||||||||||||||||
rmarkdown::render(input = file.path(module_base, "notebook_template", "02b_label-transfer_fetal_kidney_reference_Stewart.Rmd"), | ||||||||||||||||||||||||||
params = list(scpca_project_id = metadata$scpca_project_id[metadata$scpca_sample_id ==i],sample_id = i), | ||||||||||||||||||||||||||
output_format = "html_document", | ||||||||||||||||||||||||||
output_file = paste0("02b_fetal_kidney_reference_Stewart_",i, ".html"), | ||||||||||||||||||||||||||
output_dir = file.path(module_base, "notebook", i)) | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
maud-p marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
|
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,201 @@ | ||
--- | ||
title: "Characterize the fetal kidney reference from the kidney cell atlas" | ||
author: "Maud PLASCHKA" | ||
date: '2024-08-07' | ||
params: | ||
url: https://datasets.cellxgene.cziscience.com/40ebb8e4-1a25-4a33-b8ff-02d1156e4e9b.rds | ||
maud-p marked this conversation as resolved.
Show resolved
Hide resolved
|
||
padj_threshold: 0.05 | ||
lfc_threshold: 1 | ||
rate1_threshold: 0.5 | ||
seed: 12345 | ||
output: | ||
html_document: | ||
toc: yes | ||
toc_float: yes | ||
code_folding: hide | ||
highlight: pygments | ||
df_print: paged | ||
self_contained: yes | ||
mode: selfcontained | ||
--- | ||
|
||
```{r setup, include=FALSE} | ||
knitr::opts_chunk$set(echo = TRUE, | ||
message=FALSE, | ||
warnings=FALSE) | ||
``` | ||
|
||
|
||
# Introduction | ||
|
||
The aim is to characterize the human fetal kidney from the kidney cell atlas. | ||
You can find more about the human kidney atlas here: https://www.kidneycellatlas.org/ [1] | ||
The rds data can be download using the download link https://datasets.cellxgene.cziscience.com/40ebb8e4-1a25-4a33-b8ff-02d1156e4e9b.rds | ||
The azimuth compatible reference has been downloaded and created in the `R script` `download-and-create-fetal-kidney-ref.R` | ||
|
||
## Packages | ||
|
||
Load required packages in the following chunk, if needed. | ||
Do not install packages here; only load them with the `library()` function. | ||
|
||
```{r packages, message=FALSE, warning=FALSE} | ||
library("Seurat") | ||
library(Azimuth) | ||
library(SCpubr) | ||
library(tidyverse) | ||
library(patchwork) | ||
|
||
set.seed(params$seed) | ||
options(future.globals.maxSize= 891289600000) | ||
``` | ||
|
||
|
||
## Base directories | ||
|
||
```{r base paths, eval=TRUE, include=TRUE} | ||
# The base path for the OpenScPCA repository, found by its (hidden) .git directory | ||
repository_base <- rprojroot::find_root(rprojroot::is_git_root) | ||
|
||
# The path to this module | ||
module_base <- file.path(repository_base, "analyses", "cell-type-wilms-tumor-06") | ||
|
||
``` | ||
|
||
|
||
## Input files | ||
|
||
The input file is the output of the `R script` `download-and-create-fetal-kidney-ref.R` | ||
|
||
```{r path_to_data} | ||
path_to_data <- file.path( | ||
module_base, | ||
"scratch", | ||
"fetal_kidney.rds" | ||
) | ||
download.file(url = params$url, destfile = path_to_data) | ||
maud-p marked this conversation as resolved.
Show resolved
Hide resolved
|
||
``` | ||
|
||
## Output file | ||
|
||
We will save the result of the differential expression analysis in results/references/00b_marker_genes_fetal_kidney_Stewart.csv | ||
Notebook is saved in the `notebook/00-reference` directory | ||
|
||
```{r path_to_output} | ||
path_to_output <- file.path(module_base, "results", "references") | ||
``` | ||
|
||
# Analysis | ||
|
||
## Load the reference | ||
|
||
```{r pre_process, echo=TRUE, fig.height=7, fig.width=12, message=FALSE, warning=FALSE, out.width='100%'} | ||
fetal_kidney <- readRDS(path_to_data) | ||
|
||
d1 <- do_DimPlot(fetal_kidney, reduction = "umap", dims = c(1,2), group.by = "compartment", label = TRUE, repel = TRUE) + NoLegend() | ||
d2 <- do_DimPlot(fetal_kidney, reduction = "umap", dims = c(1,2), group.by = "cell_type", label = TRUE, repel = TRUE) + NoLegend() | ||
|
||
d1 | d2 | ||
``` | ||
|
||
## Characterization of compartment and cell types in the reference | ||
|
||
Here, we use an unbiased approach to find transcripts that characterized the different compartments and cell types. | ||
|
||
This is just to get markers genes of the different population, in case some could be of interest for the Wilms tumor annotations. | ||
|
||
We run DElegate::FindAllMarkers2 to find markers of the different clusters and manually check if they do make sense. | ||
DElegate::FindAllMarkers2 is an improved version of Seurat::FindAllMarkers based on pseudobulk differential expression method. | ||
Please check the preprint from Chistoph Hafemeister: https://www.biorxiv.org/content/10.1101/2023.03.28.534443v1 | ||
and tool described here: https://github.com/cancerbits/DElegate | ||
|
||
### Find marker genes for each of the compartment | ||
|
||
|
||
```{r markers_compatment, fig.width=8, fig.height=7, out.width='100%'} | ||
de_results <- DElegate::FindAllMarkers2(fetal_kidney, group_column = "compartment",) | ||
|
||
#filter the most relevant markers | ||
s.markers <- de_results[de_results$padj < params$padj_threshold & de_results$log_fc > params$lfc_threshold & de_results$rate1 > params$rate1_threshold,] | ||
|
||
DT::datatable(s.markers, caption = ("marker genes"), | ||
extensions = 'Buttons', | ||
options = list( dom = 'Bfrtip', | ||
buttons = c( 'csv', 'excel'))) | ||
|
||
# Select top 5 genes for heatmap plotting | ||
s.markers <- na.omit(s.markers) | ||
s.markers %>% | ||
group_by(group1) %>% | ||
top_n(n = 5, wt = log_fc) -> top5 | ||
|
||
# subset for plotting | ||
Idents(fetal_kidney) <- fetal_kidney$compartment | ||
cells <- WhichCells(fetal_kidney, downsample = 100) | ||
ss <- subset(fetal_kidney, cells = cells) | ||
ss <- ScaleData(ss, features = top5$feature) | ||
|
||
p1 <- SCpubr::do_DimPlot(fetal_kidney, reduction="umap", group.by = "compartment", label = TRUE, repel = TRUE) + ggtitle("compartment") | ||
p2 <- DoHeatmap(ss, features = top5$feature, cells = cells, group.by = "compartment") + NoLegend() + | ||
scale_fill_gradientn(colors = c("#01665e","#35978f",'darkslategray3', "#f7f7f7", "#fee391","#fec44f","#F9AD03")) | ||
p3 <- ggplot(fetal_kidney@meta.data, aes(compartment, fill = compartment)) + geom_bar() + NoLegend() | ||
|
||
|
||
common_title <- sprintf("Unsupervised clustering %s, %d cells", fetal_kidney@meta.data$orig.ident[1], ncol(fetal_kidney)) | ||
show((((p1 / p3) + plot_layout(heights = c(3,2)) | p2) ) + plot_layout(widths = c(1, 2)) + plot_layout(heights = c(3,1)) + plot_annotation(title = common_title)) | ||
|
||
write_csv(de_results, file = file.path(path_to_output, "00a_marker_compartment_fetal_kidney_Stewart.csv")) | ||
|
||
|
||
``` | ||
|
||
|
||
### Find marker genes for each of the cell types | ||
|
||
|
||
```{r markers_cell, fig.width=17, fig.height=20, out.width='100%'} | ||
de_results <- DElegate::FindAllMarkers2(fetal_kidney, group_column = "cell_type") | ||
|
||
#filter the most relevant markers | ||
s.markers <- de_results[de_results$padj < params$padj_threshold & de_results$log_fc > params$lfc_threshold & de_results$rate1 > params$rate1_threshold,] | ||
|
||
|
||
DT::datatable(s.markers, caption = ("marker genes"), | ||
extensions = 'Buttons', | ||
options = list( dom = 'Bfrtip', | ||
buttons = c( 'csv', 'excel'))) | ||
|
||
# Select top 5 genes for heatmap plotting | ||
s.markers <- na.omit(s.markers) | ||
s.markers %>% | ||
group_by(group1) %>% | ||
top_n(n = 5, wt = log_fc) -> top5 | ||
|
||
# subset for plotting | ||
Idents(fetal_kidney) <- fetal_kidney$cell_type | ||
cells <- WhichCells(fetal_kidney, downsample = 100) | ||
ss <- subset(fetal_kidney, cells = cells) | ||
ss <- ScaleData(ss, features = top5$feature) | ||
|
||
p1 <- SCpubr::do_DimPlot(fetal_kidney, reduction="umap", group.by = "cell_type", label = TRUE, repel = TRUE) + ggtitle("cell_type") + NoLegend() | ||
p2 <- DoHeatmap(ss, features = top5$feature, cells = cells, group.by = "cell_type") + NoLegend() + | ||
scale_fill_gradientn(colors = c("#01665e","#35978f",'darkslategray3', "#f7f7f7", "#fee391","#fec44f","#F9AD03")) | ||
p3 <- ggplot(fetal_kidney@meta.data, aes(cell_type, fill = cell_type)) + geom_bar() + NoLegend() + scale_x_discrete(guide = guide_axis(angle = 90)) | ||
|
||
|
||
common_title <- sprintf("Unsupervised clustering %s, %d cells", fetal_kidney@meta.data$orig.ident[1], ncol(fetal_kidney)) | ||
show((((p1 / p3) + plot_layout(heights = c(3,2)) | p2) ) + plot_layout(widths = c(1, 1)) + plot_layout(heights = c(3,1)) + plot_annotation(title = common_title)) | ||
|
||
write_csv( de_results, file = file.path(path_to_output, "00a_marker_cell-type_fetal_kidney_Stewart.csv")) | ||
|
||
``` | ||
|
||
## References | ||
|
||
- [1] https://www.science.org/doi/10.1126/science.aat5031 | ||
|
||
## Session info | ||
|
||
```{r } | ||
sessionInfo() | ||
``` | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just wanted to leave a general comment here - this looks good to me as is, except the
01_clustering
output name issue and we probably want to have the script that goes in run all samples 😄Most of the inline comments are potential enhancements based on my experience with buggy code I've written myself 😳