From ac3030bc9b1924941a965d78ca56d94ca2b1c39b Mon Sep 17 00:00:00 2001 From: actapia Date: Mon, 26 Aug 2024 17:45:35 -0400 Subject: [PATCH] Use autodocs? --- .github/workflows/markdown-autodocs.yml | 16 +++ docs/tutorials/reads2tree/README.md | 159 ++++-------------------- 2 files changed, 38 insertions(+), 137 deletions(-) create mode 100644 .github/workflows/markdown-autodocs.yml diff --git a/.github/workflows/markdown-autodocs.yml b/.github/workflows/markdown-autodocs.yml new file mode 100644 index 0000000..aa1f466 --- /dev/null +++ b/.github/workflows/markdown-autodocs.yml @@ -0,0 +1,16 @@ +name: markdown-autodocs + +on: + push: + branches: ["main", "autodocs"] + +jobs: + auto-update-readme: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Markdown autodocs + uses: dineshsonachalam/markdown-autodocs@v1.0.7 + with: + output_file_paths: '[./docs/tutorials/reads2tree/README.md]' + commit_message: Update code examples in tutorials. diff --git a/docs/tutorials/reads2tree/README.md b/docs/tutorials/reads2tree/README.md index 9710777..2960eb6 100644 --- a/docs/tutorials/reads2tree/README.md +++ b/docs/tutorials/reads2tree/README.md @@ -243,46 +243,15 @@ ls "$TUTORIAL_DIR/rna_clique_out/graph.pkl" ### Getting a tree If you want a tree, you can create one using RNA-clique and Biopython. The code -below, also found in `docs/tutorial/make_tree.py`, computes the distance matrix -from the `graph.pkl` and `od2/*.pkl` files and constructs a tree using the -neighbor-joining algorithm. The tree is also rooted at its midpoint. The tree is -saved to `nj_tree.tree`, and a visualization is saved to `nj_tree.svg` in the -`rna_clique_out` directory. - -```python -import os -import Bio.Phylo - -from pathlib import Path -from Bio.Phylo.TreeConstruction import DistanceTreeConstructor, DistanceMatrix -from matplotlib import pyplot as plt -from filtered_distance import SampleSimilarity -from phylo_utils import tril_jagged, draw_tree -from path_to_sample import path_to_sample - -rna_clique_out_dir = Path(os.environ["TUTORIAL_DIR"]) / "rna_clique_out" - -def main(): - similarity_computer = SampleSimilarity.from_filenames( - rna_clique_out_dir / "graph.pkl", - (rna_clique_out_dir / "od2").glob("*.pkl") - ) - nj_tree = DistanceTreeConstructor().nj( - DistanceMatrix( - [path_to_sample(p) for p in similarity_computer.samples], - tril_jagged(similarity_computer.get_dissimilarity_matrix()) - ) - ) - nj_tree.root_at_midpoint() - for c in nj_tree.get_nonterminals(): - c.name = None - Bio.Phylo.write(nj_tree, rna_clique_out_dir / "nj_tree.tree", "newick") - draw_tree(nj_tree) - plt.savefig(rna_clique_out_dir / "nj_tree.svg") - -if __name__ == "__main__": - main() -``` +below, also found in `docs/tutorials/reads2tree/make_tree.py`, computes the +distance matrix from the `graph.pkl` and `od2/*.pkl` files and constructs a tree +using the neighbor-joining algorithm. The tree is also rooted at its +midpoint. The tree is saved to `nj_tree.tree`, and a visualization is saved to +`nj_tree.svg` in the `rna_clique_out` directory. + + + + The script requires some modules found in the root of the RNA-clique repository, so you can run it as follows: @@ -299,62 +268,12 @@ stored at `$RNA_CLIQUE/docs/tutorials/reads2tree/tall_fescue_accs.csv`. The code below draws a 3D and 2D PCoA plot and stores the results as SVG files in the `rna_clique_out` directory as `pcoa_3d.svg` and `pcoa_2d.svg`, -respectively. The code can also be found at `docs/tutorial/make_pcoa.py`. - -```python -import os -from pathlib import Path - -import skbio as skb -import pandas as pd -from matplotlib import pyplot as plt - -from IPython import embed -from filtered_distance import SampleSimilarity -from path_to_sample import path_to_sample - -tutorial_doc_dir = Path(os.environ["RNA_CLIQUE"]) / "docs" / "tutorials" -rna_clique_out_dir = Path(os.environ["TUTORIAL_DIR"]) / "rna_clique_out" - -def main(): - sample_metadata = pd.read_csv(tutorial_doc_dir / "tall_fescue_accs.csv") - similarity_computer = SampleSimilarity.from_filenames( - rna_clique_out_dir / "graph.pkl", - (rna_clique_out_dir / "od2").glob("*.pkl") - ) - dis_df = similarity_computer.get_dissimilarity_df().rename( - index=path_to_sample, - columns=path_to_sample, - ) - # 3D PCoA - pcoa_results = skb.stats.ordination.pcoa( - skb.DistanceMatrix(dis_df, ids=dis_df.columns) - ) - pcoa_results.plot( - df=sample_metadata.set_index("accession"), - column="genotype", - ) - plt.savefig(rna_clique_out_dir / "pcoa_3d.svg") - # 2D PCoA - pcoa_results_2d = skb.stats.ordination.pcoa( - skb.DistanceMatrix(dis_df, ids=dis_df.columns), - number_of_dimensions=2 - ) - plt.figure() - for g, df in sample_metadata.join( - pcoa_results_2d.samples[["PC1","PC2"]], - "accession" - ).groupby("genotype"): - plt.scatter(df["PC1"], df["PC2"], label=g) - plt.xlabel("PC1") - plt.ylabel("PC2") - plt.legend() - plt.savefig(rna_clique_out_dir / "pcoa_2d.svg") - - -if __name__ == "__main__": - main() -``` +respectively. The code can also be found at +`docs/tutorials/reads2tree/make_pcoa.py`. + + + + The example can be run as follows from the root of the RNA-clique repository. @@ -369,47 +288,13 @@ distance matrix as a heatmap. The function uses the Seaborn `heatmap` function behind the scenes, and arbitrary arguments given to `draw_heatmap` will be passed to Seaborn. -The code below is also found in `docs/tutorials/reads2tree/make_heatmap.py`. It draws a -heatmap and saves the resulting figure in the `rna_clique_out` directory as -`distance_heatmap.svg`. - -```python -import os -from pathlib import Path - -import pandas as pd -from matplotlib import pyplot as plt - -from filtered_distance import SampleSimilarity -from path_to_sample import path_to_sample -from heatmap import draw_heatmap - -tutorial_doc_dir = Path(os.environ["RNA_CLIQUE"]) / "docs" / "tutorials" -rna_clique_out_dir = Path(os.environ["TUTORIAL_DIR"]) / "rna_clique_out" - -def main(): - sample_metadata = pd.read_csv(tutorial_doc_dir / "tall_fescue_accs.csv") - similarity_computer = SampleSimilarity.from_filenames( - rna_clique_out_dir / "graph.pkl", - (rna_clique_out_dir / "od2").glob("*.pkl") - ) - dis_df = similarity_computer.get_dissimilarity_df().rename( - index=path_to_sample, - columns=path_to_sample, - ) - draw_heatmap( - dis_df, - sample_metadata=sample_metadata, - sample_name_column="accession", - order_by="genotype", - cmap="mako_r" - ) - plt.savefig(rna_clique_out_dir / "distance_heatmap.svg") - - -if __name__ == "__main__": - main() -``` +The code below is also found in `docs/tutorials/reads2tree/make_heatmap.py`. It +draws a heatmap and saves the resulting figure in the `rna_clique_out` directory +as `distance_heatmap.svg`. + + + + To generate a heatmap using this code, you can run the Python script as follows from the RNA-clique repository root.