Skip to content

Commit

Permalink
Merge pull request #196 from gyorilab/background_set
Browse files Browse the repository at this point in the history
Expose background gene list option in analysis functions
  • Loading branch information
bgyori authored Jan 24, 2025
2 parents 16bb5bc + 3b3c69e commit 46780de
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 6 deletions.
17 changes: 15 additions & 2 deletions src/indra_cogex/analysis/gene_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def discrete_analysis(
minimum_evidence_count: int = 1,
minimum_belief: float = 0,
indra_path_analysis: bool = False,
background_gene_list: List[str] = None,
*,
client: Neo4jClient
) -> Dict[str, Union[pd.DataFrame, None]]:
Expand All @@ -59,6 +60,10 @@ def discrete_analysis(
Minimum belief score for filtering, by default 0.
indra_path_analysis : bool, optional
Whether to perform INDRA pathway analysis, by default False.
background_gene_list : List[str], optional
A list of background genes of which the gene list is a part
to constrain the space of possible genes to consider
when calculating enrichment statistics.
client : Neo4jClient, optional
The Neo4j client, managed automatically by the autoclient decorator.
Expand All @@ -74,6 +79,12 @@ def discrete_analysis(
f"Failed to parse the following gene identifiers: {', '.join(errors)}"
)

if background_gene_list:
background_genes, _ = parse_gene_list(background_gene_list)
background_gene_ids = list(background_genes)
else:
background_gene_ids = None

results = {}
for analysis_name, analysis_func in [
("go", go_ora),
Expand All @@ -87,7 +98,8 @@ def discrete_analysis(
if analysis_name in {"go", "wikipathways", "reactome", "phenotype"}:
analysis_result = analysis_func(
client=client, gene_ids=gene_set, method=method, alpha=alpha,
keep_insignificant=keep_insignificant
keep_insignificant=keep_insignificant,
background_gene_ids=background_gene_ids
)
else:
# Run INDRA analysis if enabled
Expand All @@ -96,7 +108,8 @@ def discrete_analysis(
client=client, gene_ids=gene_set, method=method, alpha=alpha,
keep_insignificant=keep_insignificant,
minimum_evidence_count=minimum_evidence_count,
minimum_belief=minimum_belief
minimum_belief=minimum_belief,
background_gene_ids=background_gene_ids
)
else:
continue
Expand Down
20 changes: 16 additions & 4 deletions src/indra_cogex/client/enrichment/discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,9 @@ def go_ora(
if not background_gene_ids
else len(background_gene_ids)
)
return _do_ora(get_go(client=client), query=gene_ids, count=count, **kwargs)
bg_genes = frozenset(background_gene_ids) if background_gene_ids else None
return _do_ora(get_go(client=client, background_gene_ids=bg_genes),
query=gene_ids, count=count, **kwargs)


def wikipathways_ora(
Expand Down Expand Up @@ -249,8 +251,10 @@ def wikipathways_ora(
if not background_gene_ids
else len(background_gene_ids)
)
bg_genes = frozenset(background_gene_ids) if background_gene_ids else None
return _do_ora(
get_wikipathways(client=client), query=gene_ids, count=count, **kwargs
get_wikipathways(client=client, background_gene_ids=bg_genes),
query=gene_ids, count=count, **kwargs
)


Expand Down Expand Up @@ -285,7 +289,9 @@ def reactome_ora(
if not background_gene_ids
else len(background_gene_ids)
)
return _do_ora(get_reactome(client=client), query=gene_ids, count=count, **kwargs)
bg_genes = frozenset(background_gene_ids) if background_gene_ids else None
return _do_ora(get_reactome(client=client, background_gene_ids=bg_genes),
query=gene_ids, count=count, **kwargs)


@autoclient()
Expand Down Expand Up @@ -321,8 +327,10 @@ def phenotype_ora(
if not background_gene_ids
else len(background_gene_ids)
)
bg_genes = frozenset(background_gene_ids) if background_gene_ids else None
return _do_ora(
get_phenotype_gene_sets(client=client), query=gene_ids, count=count, **kwargs
get_phenotype_gene_sets(client=client, background_gene_ids=bg_genes),
query=gene_ids, count=count, **kwargs
)


Expand Down Expand Up @@ -367,11 +375,13 @@ def indra_downstream_ora(
if not background_gene_ids
else len(background_gene_ids)
)
bg_genes = frozenset(background_gene_ids) if background_gene_ids else None
return _do_ora(
get_entity_to_regulators(
client=client,
minimum_evidence_count=minimum_evidence_count,
minimum_belief=minimum_belief,
background_gene_ids=bg_genes
),
query=gene_ids,
count=count,
Expand Down Expand Up @@ -420,11 +430,13 @@ def indra_upstream_ora(
if not background_gene_ids
else len(background_gene_ids)
)
bg_genes = frozenset(background_gene_ids) if background_gene_ids else None
return _do_ora(
get_entity_to_targets(
client=client,
minimum_evidence_count=minimum_evidence_count,
minimum_belief=minimum_belief,
background_gene_ids=bg_genes
),
query=gene_ids,
count=count,
Expand Down

0 comments on commit 46780de

Please sign in to comment.