From 15d38abf46fc3afe2a525e3d4ed3acb49d491a94 Mon Sep 17 00:00:00 2001 From: iquasere Date: Tue, 19 Dec 2023 11:42:24 +0000 Subject: [PATCH] ID mapping columns now must be properly inputted If some column is not in return fields, UPIMAPI will exit with a code different from 0 Added parameters for showing fields --- README.md | 8 ++++++-- upimapi.py | 21 ++++++++++++++++++++- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 73ae5b5..cd83ae8 100644 --- a/README.md +++ b/README.md @@ -68,13 +68,17 @@ To input a custom database into UPIMAPI, specify it as ```--database path/to/dat ### Columns of information from UniProt -UniProt provides information for many different fields of information and cross-references. For the user's convenience, a default selection is provided: ```Entry```, ```Entry name```, ```Gene names```, ```Protein names```, ```EC number```, ```Function[CC]```, ```Pathway```, ```Keywords```, ```Protein existence```, ```Gene ontology (GO)```, ```Protein families```, ```Taxonomic lineage```, ```Organism```, ```Organism ID```, ```BioCyc```, ```BRENDA```, ```CDD```, ```eggNOG```, ```Ensembl```, ```InterPro```, ```KEGG```, ```Pfam```, ```Reactome```, ```RefSeq``` and ```UniPathway``` +UniProt provides information for many different fields of information and cross-references. For the user's convenience, a default selection is provided: ```Entry```, ```Entry name```, ```Gene Names```, ```Protein names```, ```EC number```, ```Function[CC]```, ```Pathway```, ```Keywords```, ```Protein existence```, ```Gene Ontology (GO)```, ```Protein families```, ```Taxonomic lineage```, ```Organism```, ```Organism ID```, ```BioCyc```, ```BRENDA```, ```CDD```, ```eggNOG```, ```Ensembl```, ```InterPro```, ```KEGG```, ```Pfam```, ```Reactome```, ```RefSeq``` and ```UniPathway```. If another selection of columns/databases is desired, it can be specified, for example, as ``` --columns "Coiled coil&Compositional bias" ``` -where ```--columns``` takes as input the names of the fields of information required. The complete list of fields available can be consulted at [UniProtKB return fields](https://www.uniprot.org/help/return_fields). +where ```--columns``` takes as input the names of the fields of information required. Valid values for the columns can be consulted at [UniProtKB return fields](https://www.uniprot.org/help/return_fields). + +#### Sometimes the return fields are not properly updated + +If the columns were correctly inputted according to the [return fields page](https://www.uniprot.org/help/return_fields) and UPIMAPI is still complaining about "\[COL] is not a valid column name for ID mapping", it may be that values at return fields are not properly updated. If that happens, running `upimapi --show-available-columns` will present the user with the current valid fields. #### UPIMAPI offers a few additional columns for taxonomic information diff --git a/upimapi.py b/upimapi.py index c8ced33..cc36f7a 100644 --- a/upimapi.py +++ b/upimapi.py @@ -28,7 +28,7 @@ from functools import partial import re -__version__ = '1.12.3' +__version__ = '1.13.0' def load_api_info(): @@ -165,10 +165,29 @@ def get_arguments(): diamond_args.add_argument( '--diamond-mode', help="Mode to run DIAMOND with [fast]", default='fast', choices=['fast', 'mid_sensitive', 'sensitive', 'more_sensitive', 'very_sensitive', 'ultra_sensitive']) + + special_functions = parser.add_argument_group('Special functions') + special_functions.add_argument( + "--show-available-fields", action="store_true", default=False, + help="Outputs the fields available from the API.") + args = parser.parse_args() + if args.show_available_fields: + sys.exit('\n'.join(columns_dict.keys())) + args.output = args.output.rstrip('/') args.resources_directory = args.resources_directory.rstrip('/') args.columns = args.columns.split('&') if args.columns else None + + columns_fine = True + for col in args.columns: + if col not in columns_dict.keys(): + print( + f'ERR: [{col}] is not a valid column name for ID mapping. For more information, check ' + f'https://github.com/iquasere/UPIMAPI/tree/master#sometimes-the-return-fields-are-not-properly-updated') + columns_fine = False + if not columns_fine: + sys.exit(1) if args.taxids is not None: args.taxids = args.taxids.split(',') return args