# =============================================================================
# IMPORTS
# =============================================================================
import os
import numpy as np
import astropy
import astropy.table
from astroquery.mast import Mast
import requests
import tqdm
import logging
log = logging.getLogger(__name__)
log.setLevel(logging.INFO)
# =============================================================================
# MAIN
# =============================================================================
[docs]
def set_params(parameters):
"""
Utility function for making dicts used in MAST queries.
"""
return [{'paramName': p, 'values': v} for p, v in parameters.items()]
[docs]
def query_coron_datasets(inst,
filt=None,
mask=None,
kind=None,
program=None,
obsnum=None,
channel=None,
ignore_cal=True,
ignore_ta=True,
verbose=False,
level=None,
ignore_exclusive_access=False,
exp_type=None,
return_filenames=False):
"""
Query MAST to make a summary table of existing JWST coronagraphic datasets.
Parameters
----------
inst : str
'NIRCam' or 'MIRI'. Required.
filt : str
Filter name, like 'F356W'. Required.
mask : str
Coronagraph mask name, like 'MASKA335R' or '4QPM_1550'. Optional.
If provided, must exactly match the keyword value as used in MAST,
which isn't always what you might expect. In particular, you need to
explicitly include the "A" for the NIRCam A module in e.g.,
'MASKA335R'.
kind : str
'SCI' for science targets, 'REF' for PSF references, or 'BKG' for
backgrounds.
channel : str
For NIRCam only, channel name, "SW" or "SHORT" versus "LW" or "LONG".
Leave blank for both.
ignore_ta : bool
Ignore/exclude any target acquisition exposures. This can be
useful to only include science and reference images.
ignore_cal : bool
Ignore/exclude any category='CAL' calibration programs. This can be
desirable to ignore the NIRCam coronagraphic flux calibration data
sets, which otherwise look like science data to this query (programs
1537 and 1538 for example).
exp_type : list of strings, or None
By default, the value for the MAST query field on exposure type is
determined automatically, based on whether or not ignore_ta is set.
Set this optional parameter if you want to control the exp_type value
used in the query directly.
ignore_exclusive_access : bool
Whether or not to ignore (filter out from query results) any data which
is still under exclusive access restrictions to the original proposing team.
For example, query for kind='REF', ignore_exclusive_access=True to find
only the publicly-available PSF references that can be downloaded by anyone.
return_filenames : bool
Return a shorter summary table of observations, versus returning a
more comprehensive longer table of individual exposures and filenames?
level : str
Desired JWST data product level for filenames.
'1b' or 'uncal', '2a' or 'rate', etc.
For historical reasons it's not possible to easily query MAST directly for
products earlier than level 2b (cal/calints); MAST "hides" lower level files
once higher level products are available. Thus this works by querying for the
level 2b products, and performing filename transformation on them
Returns
-------
None.
"""
# Perform MAST query to find all available datasets for a given
# filter/occulter.
if inst.upper() == 'MIRI':
service = 'Mast.Jwst.Filtered.Miri'
template = 'MIRI Coronagraphic Imaging'
else:
service = 'Mast.Jwst.Filtered.NIRCam'
template = 'NIRCam Coronagraphic Imaging'
keywords = {
'template': [template],
'productLevel': ['2b'],
}
if filt is not None:
keywords['filter'] = [filt,]
if obsnum is not None:
keywords['observtn'] = obsnum if isinstance(obsnum, list) else [obsnum,]
keywords['observtn'] = [str(a) for a in keywords['observtn']] # must be string type!
if mask is not None:
keywords['coronmsk'] = [mask]
if ignore_cal:
keywords['category'] = ['COM', 'ERS', 'GTO', 'GO', 'DD',] # but not CAL
if ignore_ta:
keywords['exp_type'] = ['NRC_CORON', 'MIR_LYOT', 'MIR_4QPM'] # but not NRC_TACQ or MIRI_TACQ or NRC_TACONFIRM
if exp_type:
# Optional, allow user to custom override exp_type, for instance to query specifically for TA_CONFIRM exposures
keywords['exp_type'] = exp_type
# Optional, restrict to one apt program
if program is not None:
keywords['program'] = [f'{program:05d}',]
# Optional, restrict to one kind of coronagraphic data
if kind is not None:
if kind.upper() == 'SCI':
keywords['is_psf'] = ['f']
keywords['bkgdtarg'] = ['f']
elif kind.upper() == 'REF':
keywords['is_psf'] = ['t']
keywords['bkgdtarg'] = ['f']
elif kind.upper() == 'BKG':
keywords['is_psf'] = ['f']
keywords['bkgdtarg'] = ['t']
if inst.upper()=='NIRCAM' and channel is not None:
if channel.upper().startswith('S'):
keywords['channel'] = ['SHORT',]
elif channel.upper().startswith('L'):
keywords['channel'] = ['LONG', ]
else:
raise RuntimeError("Invalid channel")
# Method note: we query MAST for much more than we actually need/want, and
# then filter down afterward. This is not entirely necessary, but leaves
# room for future expansion to add options for more verbose output, etc.
# Currently, this works by retrieving all level2b (i.e., cal/calints)
# files, including all dithers etc., and then trimming to one unique row
# per observation.
collist = 'filename, productLevel, filter, coronmsk, targname, duration, effexptm, effinttm, exp_type, bkgdtarg, bstrtime, is_psf, nexposur, nframes, nints, numdthpt, obs_id, observtn, obslabel, pi_name, program, subarray, template, title, visit_id, visitsta, vststart_mjd, isRestricted, publicReleaseDate_mjd'
all_columns = False
parameters = {'columns': '*' if all_columns else collist,
'filters': set_params(keywords)}
if verbose:
print("MAST query parameters:")
print(parameters)
responsetable = Mast.service_request(service, parameters)
responsetable.sort(keys='bstrtime')
# Add the initial V to visit ID.
responsetable['visit_id'] = astropy.table.Column(responsetable['visit_id'], dtype=np.dtype('<U12'))
for row in responsetable:
row['visit_id'] = 'V' + row['visit_id']
# Add a summary for which kind of observation each is.
kind = np.zeros(len(responsetable), dtype='a3')
kind[:] = 'SCI' # by default assume SCI, then check for REF and BKG and TA
kind[responsetable.columns['is_psf'] == 't'] = 'REF'
kind[responsetable.columns['bkgdtarg'] == 't'] = 'BKG'
for ta_type in ['NRC_TACQ', 'NRC_TACONFIRM', 'MIRI_TACQ']:
kind[responsetable.columns['exp_type'] == ta_type] = 'TA'
kind[kind == ''] = 'SCI'
responsetable.add_column(astropy.table.Column(kind), index=2, name='kind')
if ignore_exclusive_access:
mjd_now = astropy.time.Time.now().mjd
public_data = responsetable['publicReleaseDate_mjd'] < mjd_now
responsetable = responsetable[public_data]
if return_filenames:
if level is not None and level.lower() != 'cal' and level.lower() != '2b':
# Transform filenames to either rate or uncal files
# This may not be robust to all possible scenarios yet...
if level.lower()=='rate' or level.lower()=='2a':
responsetable['filename'] = [f.replace('_cal', '_rate') for f in responsetable['filename']]
responsetable['productLevel'] = '2a'
elif level.lower() == 'uncal' or level.lower() == '1b':
responsetable['filename'] = [f.replace('_calints', '_uncal').replace('_cal', '_uncal') for f in responsetable['filename']]
responsetable['productLevel'] = '1b'
return responsetable
# Summarize the distinct observations conveniently.
cols_to_keep = ['visit_id', 'filter', 'coronmsk', 'targname', 'obslabel',
'duration', 'numdthpt', 'program', 'title', 'pi_name']
summarytable = astropy.table.Table([responsetable.columns[cname].filled() for cname in cols_to_keep],
masked=False,
copy=True)
summarytable.add_column(astropy.table.Column(astropy.time.Time(responsetable['vststart_mjd'], format='mjd').iso, dtype=np.dtype('<U16')),
index=1,
name='start time')
summarytable = astropy.table.unique(summarytable)
summarytable.sort(keys='start time')
return summarytable
[docs]
def get_mast_filename(filename, outputdir='.',
overwrite=False, exists_ok=True,
progress=False, verbose=True,
mast_api_token=None):
"""Download any specified filename from MAST, writing to outputdir
If a file exists already, default is to not download.
Set overwrite=True to overwrite existing output file.
or set exists_ok=False to raise ValueError.
Set progress=True to show a progress bar.
verbose toggles on/off minor informative text output
Other parameters are less likely to be useful:
Default mast_api_token comes from MAST_API_TOKEN environment variable.
Adapted from example code originally by Rick White, STScI, via archive help desk.
"""
if not mast_api_token:
mast_api_token = os.environ.get('MAST_API_TOKEN')
if mast_api_token is None:
raise ValueError("Must define MAST_API_TOKEN env variable or specify mast_api_token parameter")
assert '/' not in filename, "Filename cannot include directories"
mast_url = "https://mast.stsci.edu/api/v0.1/Download/file"
if not os.path.exists(outputdir):
os.makedirs(outputdir)
elif not os.path.isdir(outputdir):
raise ValueError(f"Output location {outputdir} is not a directory")
elif not os.access(outputdir, os.W_OK):
raise ValueError(f"Output directory {outputdir} is not writable")
outfile = os.path.join(outputdir, filename)
if (not overwrite) and os.path.exists(outfile):
if exists_ok:
if verbose:
print(" ALREADY DOWNLOADED: "+outfile)
return
else:
raise ValueError(f"{outfile} exists, not overwritten")
r = requests.get(mast_url, params=dict(uri=f"mast:JWST/product/{filename}"),
headers=dict(Authorization=f"token {mast_api_token}"), stream=True)
r.raise_for_status()
total_size_in_bytes = int(r.headers.get('content-length', 0))
block_size = 1024000
if progress:
progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
csize = 0
with open(outfile, 'wb') as fd:
for data in r.iter_content(chunk_size=block_size):
fd.write(data)
if progress:
# use the size before uncompression
dsize = r.raw.tell()-csize
progress_bar.update(dsize)
csize += dsize
if progress:
progress_bar.close()
if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
print("ERROR, something went wrong")
if verbose:
print(" DOWNLOAD SUCCESSFUL: "+outfile)
[docs]
def download_files(product_table, outputdir='.', verbose=True, **kwargs):
"""Retrieve data products from MAST
Parameters
----------
product_table : astropy.table
Table of MAST products, as returned by astroquery.Mast
outputdir : str
Directory where to save the output products
verbose : bool
Toggle text information output
Other kwargs are passed to get_mast_filename
"""
# depending on which mast service is used, the filename column is inconsistently labeled
# in particular the Observations search vs. keyword search interfaces.
# in returned products
if 'filename' in product_table.colnames:
fn_key = 'filename'
elif 'productFilename' in product_table.colnames:
fn_key = 'productFilename'
else:
raise RuntimeError("Cannot find filename column in that table")
product_table.sort(keys=fn_key)
for row in product_table:
if verbose:
if 'size' in row.colnames:
print(f"{row[fn_key]} : {row['size']/(1024**2):.2f} MB")
get_mast_filename(row[fn_key], outputdir=outputdir, verbose=verbose,
**kwargs)