removing extra stuff

cole-trapnell-lab · Nov 13, 2023 · d076361 · d076361
1 parent 455aa68
commit d076361
Show file tree

Hide file tree

Showing 16 changed files with 236 additions and 3,232 deletions.
diff --git a/R/cell_count_model.R b/R/cell_count_model.R
@@ -354,302 +354,6 @@ new_cell_count_set <- function(cds,
 }
 
 
-#' resamples the ccs counts using a multinomial distribution
-#' @param ccs
-#' @param random.seed
-#' @noRd
-bootstrap_ccs = function(ccs, random.seed=NULL) {
-  count_mat = counts(ccs)
-  num_cols = dim(count_mat)[2]
-  set.seed(random.seed)
-  count_list = lapply(seq(1,num_cols), function(i){
-    counts = count_mat[,i]
-    size = sum(count_mat[,i])
-    prob = counts/size
-    sample_counts = rmultinom(1, size, prob)
-
-  })
-  new_count_mat = do.call(cbind,count_list)
-  colnames(new_count_mat) = colnames(count_mat)
-  counts(ccs) = new_count_mat
-  return(ccs)
-}
-
-
-#'
-#' @param ccs
-#' @param full_model_formula_str
-#' @param best_full_model
-#' @param reduced_pln_model
-#' @param pseudocount
-#' @param initial_penalties
-#' @param pln_min_ratio
-#' @param pln_num_penalties
-#' @param random.seed
-#' @noRd
-bootstrap_model = function(ccs,
-                           full_model_formula_str,
-                           best_full_model,
-                           reduced_pln_model,
-                           pseudocount,
-                           initial_penalties,
-                           pln_min_ratio,
-                           pln_num_penalties,
-                           random.seed,
-                           covariance_type,
-                           backend = c('nlopt', 'torch')) {
-
-  assertthat::assert_that(
-    tryCatch(expr = ifelse(match.arg(backend) == "", TRUE, TRUE),
-             error = function(e) FALSE),
-    msg = paste('Argument backend must be one of "nlopt" or "torch".'))
-  backend <- match.arg(backend)
-
-  # resample the counts
-  sub_ccs = bootstrap_ccs(ccs, random.seed = random.seed)
-
-  norm_method = ccs@info$norm_method
-  if (norm_method == "size_factors") {
-    norm_method = monocle3::size_factors(sub_ccs)
-  }
-
-  # remake data from new ccs
-  sub_pln_data <- PLNmodels::prepare_data(counts = counts(sub_ccs) + pseudocount,
-                                          covariates = colData(sub_ccs) %>% as.data.frame,
-                                          offset = norm_method)
-  # rerun the model using the same initial parameters
-  # as the original, non bootstrapped model
-
-  if (backend == "torch") {
-# bge (20221227): notes:
-#                   o I am trying to track the code in the PLNmodels master branch at Github
-#                   o the PLNmodels::PLN() function versions that I see do not include
-#                     the arguments min.ratio, nPenalties, vcov_est, and penalty_weights. I
-#                     am confused...
-#                   o I revert to the original because the PLNmodels changes break hooke.
-    sub_full_model = do.call(PLNmodels::PLN, args=list(full_model_formula_str,
-                                                       data = sub_pln_data,
-                                                       # penalties = reduced_pln_model$penalties,
-                                                       control = PLNmodels::PLN_param(backend = 'torch',
-                                                                                      trace = ifelse(FALSE, 2, 0),
-                                                                                      covariance = covariance_type,
-                                                                                      inception = best_full_model,
-                                                                                      config_optim = list(maxevel  = 10000,
-                                                                                                          ftol_rel = 1e-8,
-                                                                                                          xtol_rel = 1e-6))))
-
-# bge (20221227): notes
-#                   o I restore the PLN call for the earlier PLNmodels version commit 022d59d
-#                   o the earlier version of PLNmodels was 'PLNmodels    * 0.11.7-9600 2022-11-29 [1] Github (PLN-team/PLNmodels@022d59d)'
-#     sub_full_model = do.call(PLNmodels::PLN, args=list(full_model_formula_str,
-#                                                        data = sub_pln_data,
-#                                                        # penalties = reduced_pln_model$penalties,
-#                                                        control = list(min.ratio=pln_min_ratio,
-#                                                                       nPenalties=pln_num_penalties,
-#                                                                       vcov_est = "none",
-#                                                                       inception = best_full_model,
-#                                                                       backend = "torch",
-#                                                                       penalty_weights=initial_penalties,
-#                                                                       trace = ifelse(FALSE, 2, 0))))
-
-  } else {
-# bge (20221227): notes:
-#                   o I am trying to track the code in the PLNmodels master branch at Github
-#                   o I revert to the original because the PLNmodels changes break hooke.
-    sub_full_model = do.call(PLNmodels::PLNnetwork, args=list(full_model_formula_str,
-                                                              data = sub_pln_data,
-                                                              penalties = reduced_pln_model$penalties,
-                                                              control = PLNmodels::PLNnetwork_param(backend = 'nlopt',
-                                                                                                    trace = ifelse(FALSE, 2, 0),
-                                                                                                    covariance = covariance_type,
-                                                                                                    n_penalties = pln_num_penalties,
-                                                                                                    min_ratio = pln_min_ratio,
-                                                                                                    penalty_weights = initial_penalties,
-                                                                                                    config_optim = list(algorithm = 'CCSAQ',
-                                                                                                                        maxeval = 10000,
-                                                                                                                        ftol_rel = 1e-8,
-                                                                                                                        xtol_rel = 1e-6,
-                                                                                                                        ftol_out = 1e-6,
-                                                                                                                        maxit_out = 50,
-                                                                                                                        ftol_abs = 0.0,
-                                                                                                                        xtol_abs = 0.0,
-                                                                                                                        maxtime = -1))))
-
-# bge (20221227): notes
-#                   o I restore the PLN call for the earlier PLNmodels version commit 022d59d
-#                   o the earlier version of PLNmodels was 'PLNmodels    * 0.11.7-9600 2022-11-29 [1] Github (PLN-team/PLNmodels@022d59d)'
-#     sub_full_model = do.call(PLNmodels::PLNnetwork, args=list(full_model_formula_str,
-#                                                               data = sub_pln_data,
-#                                                               penalties = reduced_pln_model$penalties,
-#                                                               control_init=list(min.ratio=pln_min_ratio,
-#                                                                                 nPenalties=pln_num_penalties,
-#                                                                                 penalty_weights=initial_penalties),
-#                                                               control_main=list(trace = ifelse(FALSE, 2, 0))))
-  }
-
-  return(sub_full_model)
-}
-
-
-#
-# Removed 20230104 bge PLNmodels v1.0.0 removed get_vcov_hat
-#' @noRd
-# compute_vhat = function(model, model_family, type) {
-#
-#     if (model$d > 0) {
-#       ## self$fisher$mat : Fisher Information matrix I_n(\Theta) = n * I(\Theta)
-#       ## safe inversion using Matrix::solve and Matrix::diag and error handling
-#
-#       X = model_family$responses
-#       Y = model_family$covariates
-#       model$get_vcov_hat(type, X, Y)
-#
-#       vhat = vcov(model)
-#
-#       # zero out everything not on block diagonal
-#       if (type == "sandwich") {
-#
-#         num_coef = dim(coef(model))[2]
-#         num_blocks =dim(vhat)[1]/num_coef
-#
-#         blocks = lapply(1:num_blocks, function(i) {
-#           start = num_coef*(i-1) + 1
-#           end = num_coef*i
-#           vhat[start:end,start:end]
-#         })
-#
-#         vhat = Matrix::bdiag(blocks) %>% as.matrix()
-#       }
-#
-#       # vcov_mat = vcov(model)
-#
-#     #   vhat <- matrix(0, nrow = nrow(vcov_mat), ncol = ncol(vcov_mat))
-#     #
-#     #   #dimnames(vhat) <- dimnames(vcov_mat)
-#     #   safe_rows = safe_cols = Matrix::rowSums(abs(vcov_mat)) > 0
-#     #   vcov_mat = vcov_mat[safe_rows, safe_cols]
-#     #
-#     #   out <- tryCatch(Matrix::solve(vcov_mat),
-#     #                   error = function(e) {e})
-#     #   row.names(out) = colnames(out) = names(safe_rows[safe_rows])
-#     #   if (is(out, "error")) {
-#     #     warning(paste("Inversion of the Fisher information matrix failed with following error message:",
-#     #                   out$message,
-#     #                   "Returning NA",
-#     #                   sep = "\n"))
-#     #     vhat <- matrix(NA, nrow = model$p, ncol = model$d)
-#     #   } else {
-#     #     row.names(out) = colnames(out) = names(safe_rows[safe_rows])
-#     #     row.names(vhat) = colnames(vhat) = row.names(vcov(model))
-#     #     vhat[safe_rows, safe_cols] = as.numeric(out) #as.numeric(out) #%>% sqrt %>% matrix(nrow = self$d) %>% t()
-#     #   }
-#     #   #dimnames(vhat) <- dimnames(vcov_mat)
-#     } else {
-#       vhat <- NULL
-#     }
-#     vhat = methods::as(vhat, "dgCMatrix")
-#     vhat
-# }
-
-
-#' computes the avg vhat across n bootstraps
-#' @param ccm
-#' @param num_bootstraps
-#' @noRd
-bootstrap_vhat = function(ccs,
-                          full_model_formula_str,
-                          best_full_model,
-                          best_reduced_model,
-                          reduced_pln_model,
-                          pseudocount,
-                          initial_penalties,
-                          pln_min_ratio,
-                          pln_num_penalties,
-                          verbose,
-                          num_bootstraps,
-                          backend,
-                          covariance_type) {
-  # to do: parallelize
-
-  get_bootstrap_coef = function(random.seed,
-                                ccs,
-                                full_model_formula_str,
-                                best_full_model,
-                                best_reduced_model,
-                                reduced_pln_model,
-                                pseudocount,
-                                initial_penalties,
-                                pln_min_ratio,
-                                pln_num_penalties,
-                                backend,
-                                covariance_type) {
-
-    bootstrapped_model = bootstrap_model(ccs,
-                                         full_model_formula_str,
-                                         best_full_model,
-                                         reduced_pln_model,
-                                         pseudocount,
-                                         initial_penalties,
-                                         pln_min_ratio,
-                                         pln_num_penalties,
-                                         random.seed = random.seed,
-                                         covariance_type=covariance_type,
-                                         backend = backend)
-
-    if (backend == "torch") {
-      coef(bootstrapped_model) %>%
-        as.data.frame() %>% t() %>%
-        tibble::rownames_to_column("cell_group")
-    } else {
-      best_bootstrapped_model = PLNmodels::getModel(bootstrapped_model, var=best_reduced_model$penalty)
-      coef(best_bootstrapped_model) %>% t() %>%
-        as.data.frame() %>%
-        tibble::rownames_to_column("cell_group")
-    }
-  }
-
-  coef_df = data.frame(seed = seq(1, num_bootstraps)) %>%
-    mutate(coef = purrr::map(.f = purrr::possibly(get_bootstrap_coef, NA_character_),
-                             .x = seed,
-                             ccs,
-                             full_model_formula_str,
-                             best_full_model,
-                             best_reduced_model,
-                             reduced_pln_model,
-                             pseudocount,
-                             initial_penalties,
-                             pln_min_ratio,
-                             pln_num_penalties,
-                             backend,
-                             covariance_type))
-
-  coef_df = coef_df %>% filter(!is.na(coef))
-
-  # compute the covariance of the parameters
-  get_cov_mat = function(data, cell_group) {
-
-    cov_matrix = cov(data)
-    rownames(cov_matrix) = paste0(cell_group, "_", rownames(cov_matrix))
-    colnames(cov_matrix) = paste0(cell_group, "_", colnames(cov_matrix))
-    return(cov_matrix)
-  }
-
-  bootstrapped_df = do.call(rbind, coef_df$coef) %>%
-    group_by(cell_group) %>%
-    tidyr::nest() %>%
-    mutate(cov_mat = purrr::map2(.f = get_cov_mat,
-                                 .x = data,
-                                 .y = cell_group))
-
-  bootstrapped_vhat = Matrix::bdiag(bootstrapped_df$cov_mat) %>% as.matrix()
-  names = lapply(bootstrapped_df$cov_mat, function(m){ colnames(m)}) %>% unlist()
-  rownames(bootstrapped_vhat) = names
-  colnames(bootstrapped_vhat) = names
-
-  bootstrapped_vhat = methods::as(bootstrapped_vhat, "dgCMatrix")
-  return(bootstrapped_vhat)
-}
-
 
 #' Create a new cell_count_model object.
 #'
@@ -1193,4 +897,3 @@ build_interval_formula <- function(ccs, num_breaks, interval_var="timepoint", in
 
   return(interval_formula_str)
 }
-#debug(build_interval_formula)
diff --git a/R/deg.R b/R/deg.R
@@ -80,90 +80,3 @@ add_covariate <- function(ccs, pb_cds, covariate) {
   return(pb_cds)
 }
 
-
-fit_cell_models <- function(ccs,
-                            model_formula_str = "~ 1",
-                            cores = 1,
-                            cell_groups = NULL,
-                            ... ) {
-  pb_cds = pseudobulk_ccs_for_states(ccs)
-
-  # make sure that the model terms are in the ccs
-  model_formula_str = stringr::str_replace_all(model_formula_str, "~", "")
-  model_formula_str = gsub("[[:space:]]", "", model_formula_str)
-  model_terms = stringr::str_split(model_formula_str, pattern="\\+") %>%
-                unlist() %>%
-                stringr::str_split(pattern="\\*") %>%
-                unlist() %>%
-                stringr::str_split(pattern="\\:") %>%
-                unlist() %>% unique()
-
-  for (term in model_terms) {
-    if (term %in% colnames(colData(ccs))) {
-      pb_cds = add_covariate(ccs, pb_cds, term)
-    }
-  }
-
-  group_models = monocle3::fit_models(cg_pb_cds,
-                                      weights = colData(cg_pb_cds)$num_cells_in_group,
-                                      model_formula_str = paste0("~",model_formula_str),
-                                      cores = cores,
-                                      ... )
-
-}
-
-
-# run DEGs across all cell groups in the ccs
-# for a given contrast
-#' @param ccs
-#' @param model_formula_str
-#' @param cores
-fit_cell_group_models <- function(ccs,
-                                  model_formula_str = "~ 1",
-                                  cores = 1,
-                                  cell_groups = NULL,
-                                  ... ) {
-
-  pb_cds = pseudobulk_ccs_for_states(ccs)
-
-  # make sure that the model terms are in the ccs
-  model_formula_str = stringr::str_replace_all(model_formula_str, "~", "")
-  model_formula_str = gsub("[[:space:]]", "", model_formula_str)
-  model_terms = stringr::str_split(model_formula_str, pattern="\\+") %>%
-                unlist() %>%
-                stringr::str_split(pattern="\\*") %>%
-                unlist() %>%
-                stringr::str_split(pattern="\\:") %>%
-                unlist() %>% unique()
-
-  for (term in model_terms) {
-    if (term %in% colnames(colData(ccs))) {
-      pb_cds = add_covariate(ccs, pb_cds, term)
-    }
-  }
-
-  # subset to genes that are expressed over a certain min value
-  expr_over_thresh = normalized_counts(pb_cds, "size_only", pseudocount = 0)
-  genes_to_test = which(Matrix::rowSums(expr_over_thresh) >= 1)
-  pb_cds = pb_cds[genes_to_test,]
-
-  if (is.null(cell_groups)) {
-    cell_groups = rownames(counts(ccs))
-  }
-
-  # fit models in every cell group
-  group_models = lapply(cell_groups, function(cell_group){
-
-    cg_pb_cds = pb_cds[, colData(pb_cds)$cell_group == cell_group]
-    cg_group_models = monocle3::fit_models(cg_pb_cds,
-                                           weights = colData(cg_pb_cds)$num_cells_in_group,
-                                           model_formula_str = paste0("~",model_formula_str),
-                                           cores = cores,
-                                           ... )
-    cg_group_models
-    # fit_coefs = coefficient_table(cg_group_models)
-    # fit_coefs
-  })
-
-  return(group_models)
-}