Source code for dustmaps.iphas

#!/usr/bin/env python
#
# iphas.py
# Reads the IPHAS 3D dust map of Sale et al. (2014).
#
# Copyright (C) 2016  Gregory M. Green
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#

from __future__ import print_function, division

import numpy as np
import h5py
import os

import astropy.coordinates as coordinates
import astropy.units as units

from .std_paths import *
from .map_base import DustMap, ensure_flat_galactic
from .unstructured_map import UnstructuredDustMap
from . import fetch_utils


[docs]class IPHASQuery(UnstructuredDustMap): """ The 3D dust map of Sale et al. (2014), based on IPHAS imaging in the Galactic plane. The map covers 30 deg < l < 115 deg, -5 deg < b < 5 deg. """
[docs] def __init__(self, map_fname=None): """ Args: map_fname (Optional[str]): Filename at which the map is stored. Defaults to `None`, meaning that the default filename is used. """ if map_fname is None: map_fname = os.path.join(data_dir(), 'iphas', 'iphas.h5') with h5py.File(map_fname, 'r') as f: self._data = f['samples'][:] self._n_pix = self._data.size self._n_dists = self._data['A0'].shape[1] self._n_samples = self._data['A0'].shape[2] # All the distance bins are the same self._dists = self._data['dist'][0] # Don't query more than this angular distance from any point max_pix_scale = 0.5 * units.deg # Tesselate the sphere coords = coordinates.SkyCoord( self._data['l'], self._data['b'], unit='deg', frame='galactic') super(IPHASQuery, self).__init__(coords, max_pix_scale, metric_p=2)
[docs] @ensure_flat_galactic def query(self, coords, mode='random_sample'): """ Returns A0 at the given coordinates. There are several different query modes, which handle the probabilistic nature of the map differently. Args: coords (``astropy.coordinates.SkyCoord``): The coordinates to query. mode (Optional[str]): Five different query modes are available: 'random_sample', 'random_sample_per_pix' 'samples', 'median' and 'mean'. The ``mode`` determines how the output will reflect the probabilistic nature of the IPHAS dust map. Returns: Monochromatic extinction, A0, at the specified coordinates, in mags. The shape of the output depends on the ``mode``, and on whether ``coords`` contains distances. If ``coords`` does not specify distance(s), then the shape of the output begins with `coords.shape`. If `coords` does specify distance(s), then the shape of the output begins with ``coords.shape + ([number of distance bins],)``. If ``mode`` is 'random_sample', then at each coordinate/distance, a random sample of reddening is given. If ``mode`` is 'random_sample_per_pix', then the sample chosen for each angular pixel of the map will be consistent. For example, if two query coordinates lie in the same map pixel, then the same random sample will be chosen from the map for both query coordinates. If ``mode`` is 'median', then at each coordinate/distance, the median reddening is returned. If ``mode`` is 'mean', then at each coordinate/distance, the mean reddening is returned. Finally, if ``mode`` is 'samples', then all at each coordinate/distance, all samples are returned. """ # Check that the query mode is supported valid_modes = [ 'random_sample', 'random_sample_per_pix', 'samples', 'median', 'mean'] if mode not in valid_modes: raise ValueError( '"{}" is not a valid `mode`. Valid modes are:\n' ' {}'.format(mode, valid_modes)) n_coords_ret = coords.shape[0] # Determine if distance has been requested has_dist = hasattr(coords.distance, 'kpc') d = coords.distance.kpc if has_dist else None # Convert coordinates to pixel indices pix_idx = self._coords2idx(coords) # Determine which coordinates are out of bounds mask_idx = (pix_idx == self._n_pix) if np.any(mask_idx): pix_idx[mask_idx] = 0 # Which samples to extract if mode == 'random_sample': samp_idx = np.random.randint(0, self._n_samples, pix_idx.size) n_samp_ret = 1 elif mode == 'random_sample_per_pix': samp_idx = np.random.randint(0, self._n_samples, self._n_pix)[pix_idx] n_sample_ret = 1 else: samp_idx = slice(None) n_samp_ret = self._n_samples # Which distances to extract if has_dist: d = coords.distance.pc dist_idx_ceil = np.searchsorted(self._dists, d) if isinstance(samp_idx, slice): ret = np.empty((n_coords_ret, n_samp_ret), dtype='f4') else: ret = np.empty((n_coords_ret,), dtype='f4') # d < d(nearest distance slice) idx_near = (dist_idx_ceil == 0) if np.any(idx_near): a = d[idx_near] / self._dists[0] if isinstance(samp_idx, slice): ret[idx_near] = a[:,None] * self._data['A0'][pix_idx[idx_near], 0, samp_idx] else: ret[idx_near] = a[:] * self._data['A0'][pix_idx[idx_near], 0, samp_idx[idx_near]] # d > d(farthest distance slice) idx_far = (dist_idx_ceil == self._n_dists) if np.any(idx_far): if isinstance(samp_idx, slice): ret[idx_far] = self._data['A0'][pix_idx[idx_far], -1, samp_idx] else: ret[idx_far] = self._data['A0'][pix_idx[idx_far], -1, samp_idx[idx_far]] # d(nearest distance slice) < d < d(farthest distance slice) idx_btw = ~idx_near & ~idx_far if np.any(idx_btw): d_ceil = self._dists[dist_idx_ceil[idx_btw]] d_floor = self._dists[dist_idx_ceil[idx_btw]-1] a = (d_ceil - d[idx_btw]) / (d_ceil - d_floor) if isinstance(samp_idx, slice): ret[idx_btw] = ( (1.-a[:,None]) * self._data['A0'][pix_idx[idx_btw], dist_idx_ceil[idx_btw], samp_idx] + a[:,None] * self._data['A0'][pix_idx[idx_btw], dist_idx_ceil[idx_btw]-1, samp_idx]) else: ret[idx_btw] = ( (1.-a[:]) * self._data['A0'][pix_idx[idx_btw], dist_idx_ceil[idx_btw], samp_idx[idx_btw]] + a[:] * self._data['A0'][pix_idx[idx_btw], dist_idx_ceil[idx_btw]-1, samp_idx[idx_btw]]) else: # TODO: Harmonize order of distances & samples with Bayestar. ret = self._data['A0'][pix_idx, :, samp_idx] # Reduce the samples in the requested manner samp_axis = 1 if has_dist else 2 if mode == 'median': ret = np.median(ret, axis=samp_axis) elif mode == 'mean': ret = np.mean(ret, axis=samp_axis) if np.any(mask_idx): ret[mask_idx] = np.nan return ret
@property def distances(self): """ Returns the distance bins that the map uses. The return type is ``astropy.units.Quantity``, which stores unit-full quantities. """ return self._dists * units.kpc
[docs]def ascii2h5(dirname, output_fname): """ Converts from a directory of tarballed ASCII ".samp" files to a single HDF5 file. Essentially, converts from the original release format to a single HDF5 file. """ import tarfile import sys from glob import glob from contextlib import closing # The datatype that will be used to store extinction, A0 A0_dtype = 'float16' def load_samp_file(f, fname): # Parse filename fname_chunks = os.path.split(fname)[1].split('_') l = float(fname_chunks[0]) b = float(fname_chunks[1]) # Load ASCII data data_raw = np.loadtxt(f, dtype='float64') n_samples = data_raw.shape[1] - 1 n_dists = data_raw.shape[0] # Construct output dtype = [ ('dist', 'int32'), ('A0', A0_dtype, (n_samples,))] data = np.empty(n_dists, dtype=dtype) data['dist'][:] = data_raw[:,0] data['A0'][:,:] = data_raw[:,1:] return (l,b), data def process_tarball(tarball_fname): # Write to the progress bar print('.', end='') sys.stdout.flush() with closing(tarfile.open(tarball_fname, mode='r:gz')) as f_tar: fnames = f_tar.getnames() f = f_tar.extractfile(fnames[0]) (l,b), data = load_samp_file(f, fnames[0]) n_dists, n_samples = data['A0'].shape n_coords = len(fnames) dtype = [ ('l', 'float32'), ('b', 'float32'), ('dist', 'int32', (n_dists,)), ('A0', A0_dtype, (n_dists, n_samples))] data_combined = np.empty(n_coords, dtype=dtype) for k,fn in enumerate(fnames): # print('File {: >4d} of {:d}'.format(k+1, n_coords)) f = f_tar.extractfile(fn) (l,b), data = load_samp_file(f, fn) data_combined['l'][k] = l data_combined['b'][k] = b data_combined['dist'][k] = data['dist'] data_combined['A0'][k] = data['A0'] return data_combined def save_data(data, fname): with closing(h5py.File(fname, 'w')) as f: f.create_dataset( 'samples', data=data, chunks=True, compression='gzip', compression_opts=3) print('Progress: ', end='') sys.stdout.flush() tar_fname_list = glob(os.path.join(dirname, 'A_samp_*.tar.gz')) d = np.hstack([process_tarball(fn) for fn in tar_fname_list]) print('+', end='') sys.stdout.flush() save_data(d, output_fname) print('')
[docs]def fetch(clobber=False): """ Downloads the IPHAS 3D dust map of Sale et al. (2014). Args: clobber (Optional[bool]): If ``True``, any existing file will be overwritten, even if it appears to match. If ``False`` (the default), ``fetch()`` will attempt to determine if the dataset already exists. This determination is not 100\% robust against data corruption. """ dest_dir = fname_pattern = os.path.join(data_dir(), 'iphas') url_pattern = 'http://www.iphas.org/data/extinction/A_samp_{:03d}.tar.gz' fname_pattern = os.path.join(dest_dir, 'A_samp_') + '{:03d}.tar.gz' # Check if file already exists if not clobber: h5_fname = os.path.join(dest_dir, 'iphas.h5') h5_size = 227817543 # Guess, in Bytes h5_dsets = { 'samples': (61130,) } if fetch_utils.h5_file_exists(h5_fname, h5_size, dsets=h5_dsets): print('File appears to exist already. Call `fetch(clobber=True)` ' 'to force overwriting of existing file.') return # Expected MD5 sums of .samp files file_md5sum = { 30: 'dd531e397622bc97d4ff92b6c7863ade', 40: 'b0f925eb3e46b77876e4054a26ad5b52', 50: 'ea3b9500f0419d66dd92d9f9c127c2b5', 60: 'cccf136f4e2306a6038e8093499216fd', 70: 'a05fe2f815086686056c18087cc5410b', 80: '799bf618c8827b3d7250c884ec66ec49', 90: 'd2a302d917da768bacf6ea74cb9dcfad', 100: '2c75e31ad9320818556c4c9964b6af65', 110: '742ea8de6f5f8a7e549f6c56b0088789', 120: '9beabfa2c9634f953adadb5016eab072', 130: '7cd7313f466eb60e8318d0f1bd32e035', 140: 'fb6d09e4d939081b891e245c30b791f1', 150: '8e9b6dc1561183aeadc64f41c85a64a8', 160: '8a35828457b7b1d53d06998114553674', 170: '7ffb29ec23e2f625dcfaaa84c293821d', 180: 'c737da479d132b88483d6ddab5b25fc8', 190: '9bc5fc7f7ba55f36a167473bb3679601', 200: '7d8ffc4aa2f7c7026d8aa3ffb670d48e', 210: 'e31b04964b7970b81fc90c120b4ebc24' } # Download the .samp files for key in file_md5sum: url = url_pattern.format(key) print('Downloading {}'.format(url)) fetch_utils.download_and_verify( url, file_md5sum[key], fname_pattern.format(key)) # Convert from ASCII to HDF5 format print('Repacking files...') ascii2h5(dest_dir, os.path.join(dest_dir, 'iphas.h5')) # Cleanup print('Removing original files...') for key in file_md5sum: os.remove(fname_pattern.format(key))