-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdemo_lochness.R
135 lines (116 loc) · 5.81 KB
/
demo_lochness.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
library(ggplot2)
library(dplyr)
library(tidyverse)
library(Seurat)
library(FNN)
library(plyr)
library(scales)
library(reshape2)
safe_column_scale = function(bmat, scale_vector) {
bmat@x <- bmat@x / rep.int(scale_vector, diff(bmat@p))
return(bmat)
}
safe_column_multiply = function(bmat, scale_vector) {
bmat@x <- bmat@x * rep.int(scale_vector, diff(bmat@p))
return(bmat)
}
wd = 'demo_data/'
seurat_object = readRDS(paste0(wd, "demo_Seurat.rds"))
# The processed seurat object should have precomtuted PCA and UMAP coordinates
kadj = round(0.5 * sqrt(ncol(seurat_object)))
seurat_object = seurat_object %>% Seurat::L2Dim(reduction='pca')
###########################
#Manuscriot implementation#
###########################
# The current implementation of lochNESS is a simple point estimate,
# considering all wildtype cells as one entity.
dff = data.frame()
for (j in unique(seurat_object$RT_group)) {
#split cells from one embryos and other embryos
mt_cells = colnames(seurat_object)[seurat_object$RT_group==j]
ot_cells = colnames(seurat_object)[seurat_object$RT_group!=j]
mt.mtx = as.matrix(t(as.data.frame(seurat_object@reductions$pca.l2@cell.embeddings)[mt_cells, ]))
ot.mtx = as.matrix(t(as.data.frame(seurat_object@reductions$pca.l2@cell.embeddings)[ot_cells, ]))
# Run KNN and assign labels based on majority of NNs
knn = get.knnx(t(ot.mtx), t(mt.mtx), k=kadj, algorithm="kd_tree")
knn$label <- mapvalues(knn$nn.index,from=seq(1, ncol(ot.mtx)),to=as.character(seurat_object[,ot_cells]$Mutant),warn_missing=FALSE)
# calculate lochNESS
mt_counts = as.data.frame(table(seurat_object[,ot_cells]$Mutant))
mutant_mask = ifelse(knn$label=="WT", 0, 1)
mutant_neighbors = rowSums(mutant_mask)
pca_mutant_score = as.data.frame((mutant_neighbors / kadj) / (1 - mt_counts$Freq[mt_counts$Var1=='WT'] / sum(mt_counts$Freq)) - 1)
rownames(pca_mutant_score) = mt_cells
colnames(pca_mutant_score) = c("lochNESS")
dff = rbind(dff,pca_mutant_score)
}
#saving..
df_umap = as.data.frame(seurat_object[['umap']]@cell.embeddings)
df_umap$lochNESS = dff[rownames(df_umap),]
df_umap$sub_trajectory = seurat_object$sub_trajectory
#saveRDS(df_umap,paste0(wd,'demo_lochness_scores.rds'))
############################
#Alternative implementation#
############################
# Alternatively, if the dataset contains sufficient cells, one can
# calculate lochNESS_i (one score per wildtype sample), and assess
# the variability of lochNESS when using different wildtype samples.
# Wildtype RT IDs are 42-44, and mutant RT IDs are 94-97.
wildtype_ids = unique(seurat_object[,seurat_object$Mutant=="WT"]$RT_group)
mutant_ids = unique(seurat_object[,seurat_object$Mutant!="WT"]$RT_group)
df_list = list()
for (ot_rt in wildtype_ids) { # calculate a score for one wildtype sample
df = data.frame()
seurat_obj = seurat_object[,seurat_object$RT_group %in% c(mutant_ids,ot_rt)] # specifiy which samples go into the analysis
for (j in mutant_ids) {
#split mutant and wildtype cells
mt_cells = colnames(seurat_obj)[seurat_obj$RT_group==j]
ot_cells = colnames(seurat_obj)[seurat_obj$RT_group!=j]
mt.mtx = as.matrix(t(as.data.frame(seurat_obj@reductions$pca.l2@cell.embeddings)[mt_cells, ]))
ot.mtx = as.matrix(t(as.data.frame(seurat_obj@reductions$pca.l2@cell.embeddings)[ot_cells, ]))
# Run KNN and assign labels based on majority of NNs
knn = get.knnx(t(ot.mtx), t(mt.mtx), k=kadj, algorithm="kd_tree")
knn$label <- mapvalues(knn$nn.index,from=seq(1, ncol(ot.mtx)),to=as.character(seurat_obj[,ot_cells]$Mutant),warn_missing=FALSE)
# calculate lochNESS
mt_counts = as.data.frame(table(seurat_obj[,ot_cells]$Mutant))
mutant_mask = ifelse(knn$label=="WT", 0, 1)
mutant_neighbors = rowSums(mutant_mask)
pca_mutant_score = as.data.frame((mutant_neighbors / kadj) / (1 - mt_counts$Freq[mt_counts$Var1=='WT'] / sum(mt_counts$Freq)) - 1)
rownames(pca_mutant_score) = mt_cells
colnames(pca_mutant_score) = c(paste0("lochNESS_",ot_rt))
df = rbind(df,pca_mutant_score)
}
df_list[[ot_rt]] = df
}
cells_to_plot = colnames(seurat_object)[seurat_object$RT_group %in% mutant_ids]
df_umap = as.data.frame(seurat_object[['umap']]@cell.embeddings[cells_to_plot,])
#plotting and comparing the two versions
df_umap$lochNESS = dff[rownames(df_umap),]
df_umap$sub_trajectory = seurat_object$sub_trajectory[cells_to_plot]
plot1 <- ggplot(df_umap, aes(x=UMAP_1, y=UMAP_2,color=lochNESS)) +
geom_point(size=0.05) +
scale_color_gradientn(
colours = c(muted("red"), "white", muted("blue")),
values = rescale(c(min(df_umap$lochNESS),median(df_umap$lochNESS),max(df_umap$lochNESS))),
guide = "colorbar", limits=c(min(df_umap$lochNESS),max(df_umap$lochNESS))
) + theme_classic()
#ggsave(plot1,filename = paste0(wd,'demo_lochness_umap2d.pdf'),width=6,height=6)
plot2 <- ggplot(df_umap, aes(x=UMAP_1, y=UMAP_2,color=sub_trajectory)) +
geom_point(size=0.2) + theme_classic()
#ggsave(plot2,filename = paste0(wd,'demo_subtrajectory_umap2d.pdf'),width=8,height=6)
for (ot_rt in wildtype_ids) {
temp = df_list[[ot_rt]][cells_to_plot,paste0("lochNESS_",ot_rt)]
df_umap[[paste0("lochNESS_",ot_rt)]] = temp
plot <- ggplot(df_umap, aes(x=UMAP_1, y=UMAP_2,color=temp)) +
geom_point(size=0.05) +
scale_color_gradientn(
colours = c(muted("red"), "white", muted("blue")),
values = rescale(c(min(temp),median(temp),max(temp))),
guide = "colorbar", limits=c(min(temp),max(temp))
) + theme_classic()
#ggsave(plot,filename = paste0(wd,'demo_lochness_',ot_rt,'_alt_umap2d.pdf'),width=6,height=6)
}
#plotting correlation matrix
cormat <- round(cor(df_umap[,grep("lochNESS", names(df_umap), value = TRUE)]),2)
melted_cormat <- melt(cormat)
plot <- ggplot(data = melted_cormat, aes(x=Var1, y=Var2, fill=value)) + geom_tile() + theme_classic()
#ggsave(plot,filename = paste0(wd,'demo_lochness_corr.pdf'),width=7,height=6)