Skip to content

Commit

Permalink
removing extra stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
maddyduran committed Nov 13, 2023
1 parent 455aa68 commit d076361
Show file tree
Hide file tree
Showing 16 changed files with 236 additions and 3,232 deletions.
297 changes: 0 additions & 297 deletions R/cell_count_model.R
Original file line number Diff line number Diff line change
Expand Up @@ -354,302 +354,6 @@ new_cell_count_set <- function(cds,
}


#' resamples the ccs counts using a multinomial distribution
#' @param ccs
#' @param random.seed
#' @noRd
bootstrap_ccs = function(ccs, random.seed=NULL) {
count_mat = counts(ccs)
num_cols = dim(count_mat)[2]
set.seed(random.seed)
count_list = lapply(seq(1,num_cols), function(i){
counts = count_mat[,i]
size = sum(count_mat[,i])
prob = counts/size
sample_counts = rmultinom(1, size, prob)

})
new_count_mat = do.call(cbind,count_list)
colnames(new_count_mat) = colnames(count_mat)
counts(ccs) = new_count_mat
return(ccs)
}


#'
#' @param ccs
#' @param full_model_formula_str
#' @param best_full_model
#' @param reduced_pln_model
#' @param pseudocount
#' @param initial_penalties
#' @param pln_min_ratio
#' @param pln_num_penalties
#' @param random.seed
#' @noRd
bootstrap_model = function(ccs,
full_model_formula_str,
best_full_model,
reduced_pln_model,
pseudocount,
initial_penalties,
pln_min_ratio,
pln_num_penalties,
random.seed,
covariance_type,
backend = c('nlopt', 'torch')) {

assertthat::assert_that(
tryCatch(expr = ifelse(match.arg(backend) == "", TRUE, TRUE),
error = function(e) FALSE),
msg = paste('Argument backend must be one of "nlopt" or "torch".'))
backend <- match.arg(backend)

# resample the counts
sub_ccs = bootstrap_ccs(ccs, random.seed = random.seed)

norm_method = ccs@info$norm_method
if (norm_method == "size_factors") {
norm_method = monocle3::size_factors(sub_ccs)
}

# remake data from new ccs
sub_pln_data <- PLNmodels::prepare_data(counts = counts(sub_ccs) + pseudocount,
covariates = colData(sub_ccs) %>% as.data.frame,
offset = norm_method)
# rerun the model using the same initial parameters
# as the original, non bootstrapped model

if (backend == "torch") {
# bge (20221227): notes:
# o I am trying to track the code in the PLNmodels master branch at Github
# o the PLNmodels::PLN() function versions that I see do not include
# the arguments min.ratio, nPenalties, vcov_est, and penalty_weights. I
# am confused...
# o I revert to the original because the PLNmodels changes break hooke.
sub_full_model = do.call(PLNmodels::PLN, args=list(full_model_formula_str,
data = sub_pln_data,
# penalties = reduced_pln_model$penalties,
control = PLNmodels::PLN_param(backend = 'torch',
trace = ifelse(FALSE, 2, 0),
covariance = covariance_type,
inception = best_full_model,
config_optim = list(maxevel = 10000,
ftol_rel = 1e-8,
xtol_rel = 1e-6))))

# bge (20221227): notes
# o I restore the PLN call for the earlier PLNmodels version commit 022d59d
# o the earlier version of PLNmodels was 'PLNmodels * 0.11.7-9600 2022-11-29 [1] Github (PLN-team/PLNmodels@022d59d)'
# sub_full_model = do.call(PLNmodels::PLN, args=list(full_model_formula_str,
# data = sub_pln_data,
# # penalties = reduced_pln_model$penalties,
# control = list(min.ratio=pln_min_ratio,
# nPenalties=pln_num_penalties,
# vcov_est = "none",
# inception = best_full_model,
# backend = "torch",
# penalty_weights=initial_penalties,
# trace = ifelse(FALSE, 2, 0))))

} else {
# bge (20221227): notes:
# o I am trying to track the code in the PLNmodels master branch at Github
# o I revert to the original because the PLNmodels changes break hooke.
sub_full_model = do.call(PLNmodels::PLNnetwork, args=list(full_model_formula_str,
data = sub_pln_data,
penalties = reduced_pln_model$penalties,
control = PLNmodels::PLNnetwork_param(backend = 'nlopt',
trace = ifelse(FALSE, 2, 0),
covariance = covariance_type,
n_penalties = pln_num_penalties,
min_ratio = pln_min_ratio,
penalty_weights = initial_penalties,
config_optim = list(algorithm = 'CCSAQ',
maxeval = 10000,
ftol_rel = 1e-8,
xtol_rel = 1e-6,
ftol_out = 1e-6,
maxit_out = 50,
ftol_abs = 0.0,
xtol_abs = 0.0,
maxtime = -1))))

# bge (20221227): notes
# o I restore the PLN call for the earlier PLNmodels version commit 022d59d
# o the earlier version of PLNmodels was 'PLNmodels * 0.11.7-9600 2022-11-29 [1] Github (PLN-team/PLNmodels@022d59d)'
# sub_full_model = do.call(PLNmodels::PLNnetwork, args=list(full_model_formula_str,
# data = sub_pln_data,
# penalties = reduced_pln_model$penalties,
# control_init=list(min.ratio=pln_min_ratio,
# nPenalties=pln_num_penalties,
# penalty_weights=initial_penalties),
# control_main=list(trace = ifelse(FALSE, 2, 0))))
}

return(sub_full_model)
}


#
# Removed 20230104 bge PLNmodels v1.0.0 removed get_vcov_hat
#' @noRd
# compute_vhat = function(model, model_family, type) {
#
# if (model$d > 0) {
# ## self$fisher$mat : Fisher Information matrix I_n(\Theta) = n * I(\Theta)
# ## safe inversion using Matrix::solve and Matrix::diag and error handling
#
# X = model_family$responses
# Y = model_family$covariates
# model$get_vcov_hat(type, X, Y)
#
# vhat = vcov(model)
#
# # zero out everything not on block diagonal
# if (type == "sandwich") {
#
# num_coef = dim(coef(model))[2]
# num_blocks =dim(vhat)[1]/num_coef
#
# blocks = lapply(1:num_blocks, function(i) {
# start = num_coef*(i-1) + 1
# end = num_coef*i
# vhat[start:end,start:end]
# })
#
# vhat = Matrix::bdiag(blocks) %>% as.matrix()
# }
#
# # vcov_mat = vcov(model)
#
# # vhat <- matrix(0, nrow = nrow(vcov_mat), ncol = ncol(vcov_mat))
# #
# # #dimnames(vhat) <- dimnames(vcov_mat)
# # safe_rows = safe_cols = Matrix::rowSums(abs(vcov_mat)) > 0
# # vcov_mat = vcov_mat[safe_rows, safe_cols]
# #
# # out <- tryCatch(Matrix::solve(vcov_mat),
# # error = function(e) {e})
# # row.names(out) = colnames(out) = names(safe_rows[safe_rows])
# # if (is(out, "error")) {
# # warning(paste("Inversion of the Fisher information matrix failed with following error message:",
# # out$message,
# # "Returning NA",
# # sep = "\n"))
# # vhat <- matrix(NA, nrow = model$p, ncol = model$d)
# # } else {
# # row.names(out) = colnames(out) = names(safe_rows[safe_rows])
# # row.names(vhat) = colnames(vhat) = row.names(vcov(model))
# # vhat[safe_rows, safe_cols] = as.numeric(out) #as.numeric(out) #%>% sqrt %>% matrix(nrow = self$d) %>% t()
# # }
# # #dimnames(vhat) <- dimnames(vcov_mat)
# } else {
# vhat <- NULL
# }
# vhat = methods::as(vhat, "dgCMatrix")
# vhat
# }


#' computes the avg vhat across n bootstraps
#' @param ccm
#' @param num_bootstraps
#' @noRd
bootstrap_vhat = function(ccs,
full_model_formula_str,
best_full_model,
best_reduced_model,
reduced_pln_model,
pseudocount,
initial_penalties,
pln_min_ratio,
pln_num_penalties,
verbose,
num_bootstraps,
backend,
covariance_type) {
# to do: parallelize

get_bootstrap_coef = function(random.seed,
ccs,
full_model_formula_str,
best_full_model,
best_reduced_model,
reduced_pln_model,
pseudocount,
initial_penalties,
pln_min_ratio,
pln_num_penalties,
backend,
covariance_type) {

bootstrapped_model = bootstrap_model(ccs,
full_model_formula_str,
best_full_model,
reduced_pln_model,
pseudocount,
initial_penalties,
pln_min_ratio,
pln_num_penalties,
random.seed = random.seed,
covariance_type=covariance_type,
backend = backend)

if (backend == "torch") {
coef(bootstrapped_model) %>%
as.data.frame() %>% t() %>%
tibble::rownames_to_column("cell_group")
} else {
best_bootstrapped_model = PLNmodels::getModel(bootstrapped_model, var=best_reduced_model$penalty)
coef(best_bootstrapped_model) %>% t() %>%
as.data.frame() %>%
tibble::rownames_to_column("cell_group")
}
}

coef_df = data.frame(seed = seq(1, num_bootstraps)) %>%
mutate(coef = purrr::map(.f = purrr::possibly(get_bootstrap_coef, NA_character_),
.x = seed,
ccs,
full_model_formula_str,
best_full_model,
best_reduced_model,
reduced_pln_model,
pseudocount,
initial_penalties,
pln_min_ratio,
pln_num_penalties,
backend,
covariance_type))

coef_df = coef_df %>% filter(!is.na(coef))

# compute the covariance of the parameters
get_cov_mat = function(data, cell_group) {

cov_matrix = cov(data)
rownames(cov_matrix) = paste0(cell_group, "_", rownames(cov_matrix))
colnames(cov_matrix) = paste0(cell_group, "_", colnames(cov_matrix))
return(cov_matrix)
}

bootstrapped_df = do.call(rbind, coef_df$coef) %>%
group_by(cell_group) %>%
tidyr::nest() %>%
mutate(cov_mat = purrr::map2(.f = get_cov_mat,
.x = data,
.y = cell_group))

bootstrapped_vhat = Matrix::bdiag(bootstrapped_df$cov_mat) %>% as.matrix()
names = lapply(bootstrapped_df$cov_mat, function(m){ colnames(m)}) %>% unlist()
rownames(bootstrapped_vhat) = names
colnames(bootstrapped_vhat) = names

bootstrapped_vhat = methods::as(bootstrapped_vhat, "dgCMatrix")
return(bootstrapped_vhat)
}


#' Create a new cell_count_model object.
#'
Expand Down Expand Up @@ -1193,4 +897,3 @@ build_interval_formula <- function(ccs, num_breaks, interval_var="timepoint", in

return(interval_formula_str)
}
#debug(build_interval_formula)
87 changes: 0 additions & 87 deletions R/deg.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,90 +80,3 @@ add_covariate <- function(ccs, pb_cds, covariate) {
return(pb_cds)
}


fit_cell_models <- function(ccs,
model_formula_str = "~ 1",
cores = 1,
cell_groups = NULL,
... ) {
pb_cds = pseudobulk_ccs_for_states(ccs)

# make sure that the model terms are in the ccs
model_formula_str = stringr::str_replace_all(model_formula_str, "~", "")
model_formula_str = gsub("[[:space:]]", "", model_formula_str)
model_terms = stringr::str_split(model_formula_str, pattern="\\+") %>%
unlist() %>%
stringr::str_split(pattern="\\*") %>%
unlist() %>%
stringr::str_split(pattern="\\:") %>%
unlist() %>% unique()

for (term in model_terms) {
if (term %in% colnames(colData(ccs))) {
pb_cds = add_covariate(ccs, pb_cds, term)
}
}

group_models = monocle3::fit_models(cg_pb_cds,
weights = colData(cg_pb_cds)$num_cells_in_group,
model_formula_str = paste0("~",model_formula_str),
cores = cores,
... )

}


# run DEGs across all cell groups in the ccs
# for a given contrast
#' @param ccs
#' @param model_formula_str
#' @param cores
fit_cell_group_models <- function(ccs,
model_formula_str = "~ 1",
cores = 1,
cell_groups = NULL,
... ) {

pb_cds = pseudobulk_ccs_for_states(ccs)

# make sure that the model terms are in the ccs
model_formula_str = stringr::str_replace_all(model_formula_str, "~", "")
model_formula_str = gsub("[[:space:]]", "", model_formula_str)
model_terms = stringr::str_split(model_formula_str, pattern="\\+") %>%
unlist() %>%
stringr::str_split(pattern="\\*") %>%
unlist() %>%
stringr::str_split(pattern="\\:") %>%
unlist() %>% unique()

for (term in model_terms) {
if (term %in% colnames(colData(ccs))) {
pb_cds = add_covariate(ccs, pb_cds, term)
}
}

# subset to genes that are expressed over a certain min value
expr_over_thresh = normalized_counts(pb_cds, "size_only", pseudocount = 0)
genes_to_test = which(Matrix::rowSums(expr_over_thresh) >= 1)
pb_cds = pb_cds[genes_to_test,]

if (is.null(cell_groups)) {
cell_groups = rownames(counts(ccs))
}

# fit models in every cell group
group_models = lapply(cell_groups, function(cell_group){

cg_pb_cds = pb_cds[, colData(pb_cds)$cell_group == cell_group]
cg_group_models = monocle3::fit_models(cg_pb_cds,
weights = colData(cg_pb_cds)$num_cells_in_group,
model_formula_str = paste0("~",model_formula_str),
cores = cores,
... )
cg_group_models
# fit_coefs = coefficient_table(cg_group_models)
# fit_coefs
})

return(group_models)
}
Loading

0 comments on commit d076361

Please sign in to comment.