diff --git a/Forming_valid_erddap_requests.ipynb b/Forming_valid_erddap_requests.ipynb new file mode 100644 index 0000000..1a382d7 --- /dev/null +++ b/Forming_valid_erddap_requests.ipynb @@ -0,0 +1,451 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This example demonstrates requesting information about at dataset to help make valid data requests. Datasets vary in structure and this aims to provide users with tools to help make requests from ERDDAP. We will first request information about the dataset, then find specific information about time, latitude, longitude and altitude coverage and then use that to make a data request. In this example we download a netcdf file, but the same workflow could be used for image requests or any other format that ERDDAP provides." + ] + }, + { + "cell_type": "code", + "execution_count": 208, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import urllib3\n", + "import json\n", + "import certifi\n", + "from netCDF4 import Dataset as netcdf_dataset\n", + "import math" + ] + }, + { + "cell_type": "code", + "execution_count": 209, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# A function that fetches metadata from an ERDDAP server\n", + "# Provides details about the dataset that we can use to\n", + "# formulate our data request. We do this to determine the available\n", + "# extent (time, lat and lon) of the dataset which prevents us from making \n", + "# unreasonable data requests that the server will reject\n", + "\n", + "#Input: ERDDAP ID\n", + "#Output: Info about dataset for checking extents and forming data query\n", + "\n", + "def getDatasetInfo(datasetId):\n", + "\n", + " http = urllib3.PoolManager(cert_reqs='CERT_REQUIRED', ca_certs=certifi.where())\n", + "\n", + " metadataUrl = 'https://polarwatch.noaa.gov/erddap/info/'+datasetId+'/index.json'\n", + "\n", + " try:\n", + " response = http.request('GET', metadataUrl)\n", + " except HTTPError as e:\n", + " print('The server couldnt fullfill the request')\n", + " print('Error Code: ', e.code)\n", + " except URLError as e:\n", + " print('Failed to reach erddap server for: '+ metadataUrl)\n", + " print('reason: ', e.reason)\n", + " else:\n", + " # load information about dataset\n", + " datasetMeta = json.loads(response.data.decode('utf-8'))\n", + " \n", + " return datasetMeta" + ] + }, + { + "cell_type": "code", + "execution_count": 210, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# A function that specifically pulls out dimension info from the \n", + "# broader metadata info returned from the ERDDAP server\n", + "\n", + "def getDimensions(datasetInfo):\n", + "\n", + " dimensions = []\n", + " for x in datasetInfo['table']['rows']:\n", + " \n", + " dimension = {}\n", + " # pull out info from first line in dimension\n", + " if x[0] == 'dimension':\n", + " dimension = {}\n", + " dimension[\"name\"] = x[1]\n", + " dh = x[4].split(',')\n", + " for dhi in dh:\n", + " dhi_split = dhi.split('=')\n", + " dhi_key = dhi_split[0].lstrip()\n", + " dimension[dhi_key] = dhi_split[1]\n", + " dimensions.append(dimension)\n", + " \n", + " # Get dimension detials\n", + " for x in datasetInfo['table']['rows']:\n", + " for dimension in dimensions:\n", + " if x[0] == 'attribute' and x[1] == dimension[\"name\"]:\n", + " dimensionFieldValue=x[4].split(',')\n", + " if len(dimensionFieldValue) > 1:\n", + " dimension[x[2]] = dimensionFieldValue\n", + " elif len(dimensionFieldValue) == 1:\n", + " dimension[x[2]] = dimensionFieldValue[0]\n", + " else:\n", + " pass\n", + "\n", + " return dimensions" + ] + }, + { + "cell_type": "code", + "execution_count": 211, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# A function that specifically pulls out global attribute info from the \n", + "# broader metadata info returned from the ERDDAP server\n", + "# input is ERDDAP dataset info as json object\n", + "# In this example we use this to get the time_coverage_end for our time query\n", + "\n", + "def getGlobalAttribute(datasetInfo, attributeName):\n", + " for x in datasetInfo['table']['rows']:\n", + " if x[0] == 'attribute' and x[1] == 'NC_GLOBAL':\n", + " if x[2] == attributeName:\n", + " attributeValue = x[4]\n", + " return attributeValue" + ] + }, + { + "cell_type": "code", + "execution_count": 212, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# A function that specifically pulls out parameter info from the \n", + "# broader metadata info returned from the ERDDAP server\n", + "# input is ERDDAP dataset info as json object\n", + "# output is a list of parameters (variables) for the dataset\n", + "\n", + "def getParameters(datasetInfo):\n", + " parameters = []\n", + " for x in datasetInfo['table']['rows']:\n", + " if x[0] == 'variable':\n", + " parameters.append(x[1])\n", + " return parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 213, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Here are two different datasets from the PolarWatch ERDDAP\n", + "# You can uncomment the second option to see how the url requests for \n", + "# each of these datasets are formed differently\n", + "\n", + "datasetId = 'jplMURSST41'\n", + "#datasetId = 'erdTAssh1day_LonPM180'\n", + "\n", + "# Get information about this dataset from ERDDAP\n", + "# Info is returned as an object that we can parse for relevant info\n", + "datasetInfo = getDatasetInfo(datasetId)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 214, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2018-04-03T09:00:00Z\n", + "[(2018-04-03T09:00:00Z):1:(2018-04-03T09:00:00Z)]\n" + ] + } + ], + "source": [ + "# Use the information returned by ERDDAP to piece together a valid time query\n", + "# Here we look for the latest available timestamp \n", + "# and make the corresponding query string\n", + "\n", + "latestTime = getGlobalAttribute(datasetInfo, 'time_coverage_end')\n", + "print(latestTime)\n", + "\n", + "timeQuery = '[(%s):%s:(%s)]' % (latestTime,'1',latestTime)\n", + "print(timeQuery)" + ] + }, + { + "cell_type": "code", + "execution_count": 215, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[(-89.99):35.000000:(89.99)]\n", + "[(-179.99):72:(180.0)]\n" + ] + } + ], + "source": [ + "# Here we use the dimension info returned by ERDDAP to form\n", + "# the altitude, latitude and longitude queries\n", + "\n", + "datasetDimensions = getDimensions(datasetInfo)\n", + "\n", + "altQuery = 0\n", + "\n", + "# Search through the dimension list for the ones we are interested in\n", + "for dimension in datasetDimensions:\n", + " \n", + " if dimension['standard_name'] == 'altitude':\n", + " # create a default altitude, for satellite data expect there to be one altitude specified, if at all\n", + " defaultAlt = dimension['actual_range'][0].strip()\n", + " altQuery = '[(%s):%s:(%s)]' % (defaultAlt,'1',defaultAlt)\n", + " \n", + " if dimension['standard_name'] == 'latitude':\n", + " \n", + " \n", + " # to make sure we aren't requesting to large of a file we can check the number of \n", + " # data points along this dimension and reduce our request if needed\n", + " # here I reduce it to be no larger than 500 values\n", + " if float(dimension['nValues']) > 500:\n", + " spacing = math.floor(float(dimension['nValues'])/500)\n", + " else:\n", + " spacing = 1\n", + " \n", + " # Latitude values can either be increasing or decreasing depending on the dataset\n", + " # We can use averageSpacing to determine which way this dataset is setup\n", + " # and use that to form our query string appropriately\n", + " # Here we query the full spatial extent of the data \n", + " \n", + " if float(dimension['averageSpacing']) >= 0:\n", + " startLat = dimension['actual_range'][0].strip()\n", + " endLat = dimension['actual_range'][1].strip()\n", + " \n", + " else:\n", + " startLat = dimension['actual_range'][1].strip()\n", + " endLat = dimension['actual_range'][0].strip()\n", + " \n", + " latQuery = '[(%s):%f:(%s)]' % (startLat, spacing, endLat)\n", + " \n", + " if dimension['standard_name'] == 'longitude':\n", + " # to make sure we aren't requesting to large of a file we can check the number of \n", + " # data points along this dimension and reduce our request if needed\n", + " # here I reduce it to be no larger than 500 values\n", + " if float(dimension['nValues']) > 500:\n", + " spacing = math.floor(float(dimension['nValues'])/500)\n", + " else:\n", + " spacing = 1\n", + " startLon = dimension['actual_range'][0].strip()\n", + " endLon = dimension['actual_range'][1].strip()\n", + " lonQuery = '[(%s):%s:(%s)]' % (startLon, spacing, endLon)\n", + "\n", + "if altQuery != 0:\n", + " print('This dataset has an altitude dimension')\n", + " \n", + "print(latQuery)\n", + "print(lonQuery)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 216, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "analysed_sst\n" + ] + } + ], + "source": [ + "# Get a variable list for this dataset\n", + "# For this demo we use the first variable in the list\n", + "\n", + "datasetParameters = getParameters(datasetInfo)\n", + "\n", + "param = datasetParameters[0]\n", + "\n", + "print(param)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Put all our query strings together and form our request url\n", + "\n", + "if altQuery != 0:\n", + " query = param + timeQuery + altQuery + latQuery + lonQuery\n", + "else:\n", + " query = param + timeQuery + latQuery + lonQuery\n", + " \n", + "base_url = 'https://polarwatch.noaa.gov/erddap/griddap/'+ datasetId +'.nc?'\n", + "\n", + "requestUrl = base_url + query\n", + "\n", + "print(requestUrl)" + ] + }, + { + "cell_type": "code", + "execution_count": 217, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://polarwatch.noaa.gov/erddap/griddap/jplMURSST41.nc?analysed_sst[(2018-04-03T09:00:00Z):1:(2018-04-03T09:00:00Z)][(-89.99):35.000000:(89.99)][(-179.99):72:(180.0)]\n", + "\n", + "root group (NETCDF3_CLASSIC data model, file format NETCDF3):\n", + " acknowledgment: Please acknowledge the use of these data with the following statement: These data were provided by JPL under support by NASA MEaSUREs program.\n", + " cdm_data_type: Grid\n", + " comment: MUR = \"Multi-scale Ultra-high Resolution\"\n", + " Conventions: CF-1.6, COARDS, ACDD-1.3\n", + " creator_email: ghrsst@podaac.jpl.nasa.gov\n", + " creator_name: JPL MUR SST project\n", + " creator_type: group\n", + " creator_url: https://mur.jpl.nasa.gov\n", + " date_created: 2018-04-04T02:05:40Z\n", + " Easternmost_Easting: 179.29\n", + " file_quality_level: 1\n", + " gds_version_id: 2.0\n", + " geospatial_lat_max: 89.91\n", + " geospatial_lat_min: -89.99\n", + " geospatial_lat_resolution: 0.01\n", + " geospatial_lat_units: degrees_north\n", + " geospatial_lon_max: 179.29\n", + " geospatial_lon_min: -179.99\n", + " geospatial_lon_resolution: 0.01\n", + " geospatial_lon_units: degrees_east\n", + " history: created at nominal 4-day latency; replaced nrt (1-day latency) version.\n", + "Data is downloaded daily from ftp://podaac-ftp.jpl.nasa.gov/allData/ghrsst/data/GDS2/L4/GLOB/JPL/MUR/v4.1/ to NOAA NMFS SWFSC ERD by erd.data@noaa.gov .\n", + "The data for the most recent 4 days is usually revised everyday. The data for other days is sometimes revised.\n", + "2018-04-05T00:06:48Z (local files)\n", + "2018-04-05T00:06:48Z http://coastwatch.pfeg.noaa.gov/erddap/griddap/jplMURSST41.nc?analysed_sst%5B(2018-04-03T09:00:00Z):1:(2018-04-03T09:00:00Z)%5D%5B(-89.99):35.000000:(89.99)%5D%5B(-179.99):72:(180.0)%5D\n", + " id: MUR-JPL-L4-GLOB-v04.1\n", + " infoUrl: https://podaac.jpl.nasa.gov/dataset/MUR-JPL-L4-GLOB-v4.1\n", + " institution: NASA JPL\n", + " keywords: analysed, analysed_sst, analysis, analysis_error, area, binary, composite, daily, data, day, deviation, distribution, Earth Science > Oceans > Ocean Temperature > Sea Surface Temperature, error, estimated, field, final, foundation, fraction, ghrsst, high, ice, ice distribution, identifier, jet, jpl, laboratory, land, land_binary_mask, mask, multi, multi-scale, mur, nasa, ocean, oceans, product, propulsion, resolution, scale, sea, sea ice area fraction, sea/land, sea_ice_fraction, sea_surface_foundation_temperature, sst, standard, statistics, surface, temperature, time, ultra, ultra-high\n", + " keywords_vocabulary: GCMD Science Keywords\n", + " license: These data are available free of charge under data policy of JPL PO.DAAC.\n", + "\n", + "The data may be used and redistributed for free but is not intended\n", + "for legal use, since it may contain inaccuracies. Neither the data\n", + "Contributor, ERD, NOAA, nor the United States Government, nor any\n", + "of their employees or contractors, makes any warranty, express or\n", + "implied, including warranties of merchantability and fitness for a\n", + "particular purpose, or assumes any legal liability for the accuracy,\n", + "completeness, or usefulness, of this information.\n", + " naming_authority: org.ghrsst\n", + " netcdf_version_id: 4.1\n", + " Northernmost_Northing: 89.91\n", + " platform: Terra, Aqua, GCOM-W, NOAA-19, MetOp-A\n", + " processing_level: L4\n", + " product_version: 04.1nrt\n", + " project: NASA Making Earth Science Data Records for Use in Research Environments (MEaSUREs) Program\n", + " publisher_email: ghrsst-po@nceo.ac.uk\n", + " publisher_name: GHRSST Project Office\n", + " publisher_url: https://www.ghrsst.org\n", + " references: https://podaac.jpl.nasa.gov/Multi-scale_Ultra-high_Resolution_MUR-SST\n", + " sensor: MODIS, AMSR2, AVHRR\n", + " source: MODIS_T-JPL, MODIS_A-JPL, AMSR2-REMSS, AVHRR19_G-NAVO, AVHRRMTA_G-NAVO, Ice_Conc-OSISAF\n", + " sourceUrl: (local files)\n", + " Southernmost_Northing: -89.99\n", + " spatial_resolution: 0.01 degrees\n", + " standard_name_vocabulary: CF Standard Name Table v29\n", + " summary: This is a merged, multi-sensor L4 Foundation Sea Surface Temperature (SST) analysis product from Jet Propulsion Laboratory (JPL). This daily, global, Multi-scale, Ultra-high Resolution (MUR) Sea Surface Temperature (SST) 1-km data set, Version 4.1, is produced at JPL under the NASA MEaSUREs program. For details, see https://podaac.jpl.nasa.gov/dataset/MUR-JPL-L4-GLOB-v4.1 . This dataset is part of the Group for High-Resolution Sea Surface Temperature (GHRSST) project. The data for the most recent 4 days is usually revised everyday. The data for other days is sometimes revised.\n", + " testOutOfDate: now-3days\n", + " time_coverage_end: 2018-04-03T09:00:00Z\n", + " time_coverage_start: 2018-04-03T09:00:00Z\n", + " title: Multi-scale Ultra-high Resolution (MUR) SST Analysis fv04.1, Global, 0.01°, 2002-present, Daily\n", + " Westernmost_Easting: -179.99\n", + " dimensions(sizes): time(1), latitude(515), longitude(500)\n", + " variables(dimensions): float64 \u001b[4mtime\u001b[0m(time), float32 \u001b[4mlatitude\u001b[0m(latitude), float32 \u001b[4mlongitude\u001b[0m(longitude), float64 \u001b[4manalysed_sst\u001b[0m(time,latitude,longitude)\n", + " groups: \n", + "\n" + ] + } + ], + "source": [ + "# Request the data from ERDDAP\n", + "\n", + "# Filename, location to store the requested data\n", + "file = 'dataset.nc'\n", + "\n", + "http = urllib3.PoolManager(cert_reqs='CERT_REQUIRED', ca_certs=certifi.where())\n", + "r = http.request('GET', requestUrl, preload_content=False)\n", + "\n", + "with open(file, 'wb') as out:\n", + " while True:\n", + " data = r.read(1024*1024)\n", + " if not data:\n", + " break\n", + " out.write(data)\n", + "\n", + "r.release_conn()\n", + "\n", + "#netcdf data object\n", + "dataset = netcdf_dataset(file)\n", + "\n", + "# Now we have a netcdf dataset that we can work with\n", + "print(dataset)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "geo3jupyter", + "language": "python", + "name": "geo3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}