Source code for dustmaps.iphas

#!/usr/bin/env python
#
# iphas.py
# Reads the IPHAS 3D dust map of Sale et al. (2014).
#
# Copyright (C) 2016  Gregory M. Green
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#

from __future__ import print_function, division

import numpy as np
import h5py
import os

import astropy.coordinates as coordinates
import astropy.units as units

from .std_paths import *
from .map_base import DustMap, ensure_flat_galactic
from .unstructured_map import UnstructuredDustMap
from . import fetch_utils


[docs]class IPHASQuery(UnstructuredDustMap):
    """
    The 3D dust map of Sale et al. (2014), based on IPHAS imaging in the
    Galactic plane. The map covers 30 deg < l < 115 deg, -5 deg < b < 5 deg.
    """

[docs]    def __init__(self, map_fname=None):
        """
        Args:
            map_fname (Optional[str]): Filename at which the map is stored.
                Defaults to `None`, meaning that the default filename is used.
        """
        if map_fname is None:
            map_fname = os.path.join(data_dir(), 'iphas', 'iphas.h5')

        with h5py.File(map_fname, 'r') as f:
            self._data = f['samples'][:]

        self._n_pix = self._data.size
        self._n_dists = self._data['A0'].shape[1]
        self._n_samples = self._data['A0'].shape[2]

        # All the distance bins are the same
        self._dists = self._data['dist'][0]

        # Don't query more than this angular distance from any point
        max_pix_scale = 0.5 * units.deg

        # Tesselate the sphere
        coords = coordinates.SkyCoord(
            self._data['l'],
            self._data['b'],
            unit='deg',
            frame='galactic')

        super(IPHASQuery, self).__init__(coords, max_pix_scale, metric_p=2)

[docs]    @ensure_flat_galactic
    def query(self, coords, mode='random_sample'):
        """
        Returns A0 at the given coordinates. There are several different query
        modes, which handle the probabilistic nature of the map differently.

        Args:
            coords (``astropy.coordinates.SkyCoord``): The coordinates to query.
            mode (Optional[str]): Five different query modes are available:
                'random_sample', 'random_sample_per_pix' 'samples', 'median' and
                'mean'. The ``mode`` determines how the output will reflect the
                probabilistic nature of the IPHAS dust map.

        Returns:
            Monochromatic extinction, A0, at the specified coordinates, in mags.
            The shape of the output depends on the ``mode``, and on whether
            ``coords`` contains distances.

            If ``coords`` does not specify distance(s), then the shape of the
            output begins with `coords.shape`. If `coords` does specify
            distance(s), then the shape of the output begins with
            ``coords.shape + ([number of distance bins],)``.

            If ``mode`` is 'random_sample', then at each coordinate/distance, a
            random sample of reddening is given.

            If ``mode`` is 'random_sample_per_pix', then the sample chosen for
            each angular pixel of the map will be consistent. For example, if
            two query coordinates lie in the same map pixel, then the same
            random sample will be chosen from the map for both query
            coordinates.

            If ``mode`` is 'median', then at each coordinate/distance, the
            median reddening is returned.

            If ``mode`` is 'mean', then at each coordinate/distance, the mean
            reddening is returned.

            Finally, if ``mode`` is 'samples', then all at each
            coordinate/distance, all samples are returned.
        """

        # Check that the query mode is supported
        valid_modes = [
            'random_sample',
            'random_sample_per_pix',
            'samples',
            'median',
            'mean']

        if mode not in valid_modes:
            raise ValueError(
                '"{}" is not a valid `mode`. Valid modes are:\n'
                '  {}'.format(mode, valid_modes))

        n_coords_ret = coords.shape[0]

        # Determine if distance has been requested
        has_dist = hasattr(coords.distance, 'kpc')
        d = coords.distance.kpc if has_dist else None

        # Convert coordinates to pixel indices
        pix_idx = self._coords2idx(coords)

        # Determine which coordinates are out of bounds
        mask_idx = (pix_idx == self._n_pix)
        if np.any(mask_idx):
            pix_idx[mask_idx] = 0

        # Which samples to extract
        if mode == 'random_sample':
            samp_idx = np.random.randint(0, self._n_samples, pix_idx.size)
            n_samp_ret = 1
        elif mode == 'random_sample_per_pix':
            samp_idx = np.random.randint(0, self._n_samples, self._n_pix)[pix_idx]
            n_sample_ret = 1
        else:
            samp_idx = slice(None)
            n_samp_ret = self._n_samples

        # Which distances to extract
        if has_dist:
            d = coords.distance.pc
            dist_idx_ceil = np.searchsorted(self._dists, d)

            if isinstance(samp_idx, slice):
                ret = np.empty((n_coords_ret, n_samp_ret), dtype='f4')
            else:
                ret = np.empty((n_coords_ret,), dtype='f4')

            # d < d(nearest distance slice)
            idx_near = (dist_idx_ceil == 0)
            if np.any(idx_near):
                a = d[idx_near] / self._dists[0]
                if isinstance(samp_idx, slice):
                    ret[idx_near] = a[:,None] * self._data['A0'][pix_idx[idx_near], 0, samp_idx]
                else:
                    ret[idx_near] = a[:] * self._data['A0'][pix_idx[idx_near], 0, samp_idx[idx_near]]

            # d > d(farthest distance slice)
            idx_far = (dist_idx_ceil == self._n_dists)
            if np.any(idx_far):
                if isinstance(samp_idx, slice):
                    ret[idx_far] = self._data['A0'][pix_idx[idx_far], -1, samp_idx]
                else:
                    ret[idx_far] = self._data['A0'][pix_idx[idx_far], -1, samp_idx[idx_far]]

            # d(nearest distance slice) < d < d(farthest distance slice)
            idx_btw = ~idx_near & ~idx_far

            if np.any(idx_btw):
                d_ceil = self._dists[dist_idx_ceil[idx_btw]]
                d_floor = self._dists[dist_idx_ceil[idx_btw]-1]
                a = (d_ceil - d[idx_btw]) / (d_ceil - d_floor)
                if isinstance(samp_idx, slice):
                    ret[idx_btw] = (
                        (1.-a[:,None]) * self._data['A0'][pix_idx[idx_btw], dist_idx_ceil[idx_btw], samp_idx]
                        +    a[:,None] * self._data['A0'][pix_idx[idx_btw], dist_idx_ceil[idx_btw]-1, samp_idx])
                else:
                    ret[idx_btw] = (
                        (1.-a[:]) * self._data['A0'][pix_idx[idx_btw], dist_idx_ceil[idx_btw], samp_idx[idx_btw]]
                        +    a[:] * self._data['A0'][pix_idx[idx_btw], dist_idx_ceil[idx_btw]-1, samp_idx[idx_btw]])
        else:
            # TODO: Harmonize order of distances & samples with Bayestar.
            ret = self._data['A0'][pix_idx, :, samp_idx]

        # Reduce the samples in the requested manner
        samp_axis = 1 if has_dist else 2

        if mode == 'median':
            ret = np.median(ret, axis=samp_axis)
        elif mode == 'mean':
            ret = np.mean(ret, axis=samp_axis)

        if np.any(mask_idx):
            ret[mask_idx] = np.nan

        return ret

    @property
    def distances(self):
        """
        Returns the distance bins that the map uses. The return type is
        ``astropy.units.Quantity``, which stores unit-full quantities.
        """
        return self._dists * units.kpc






[docs]def ascii2h5(dirname, output_fname):
    """
    Converts from a directory of tarballed ASCII ".samp" files to a single
    HDF5 file. Essentially, converts from the original release format to a
    single HDF5 file.
    """

    import tarfile
    import sys
    from glob import glob
    from contextlib import closing

    # The datatype that will be used to store extinction, A0
    A0_dtype = 'float16'

    def load_samp_file(f, fname):
        # Parse filename
        fname_chunks = os.path.split(fname)[1].split('_')
        l = float(fname_chunks[0])
        b = float(fname_chunks[1])

        # Load ASCII data
        data_raw = np.loadtxt(f, dtype='float64')

        n_samples = data_raw.shape[1] - 1
        n_dists = data_raw.shape[0]

        # Construct output
        dtype = [
            ('dist', 'int32'),
            ('A0', A0_dtype, (n_samples,))]

        data = np.empty(n_dists, dtype=dtype)
        data['dist'][:] = data_raw[:,0]
        data['A0'][:,:] = data_raw[:,1:]

        return (l,b), data

    def process_tarball(tarball_fname):
        # Write to the progress bar
        print('.', end='')
        sys.stdout.flush()

        with closing(tarfile.open(tarball_fname, mode='r:gz')) as f_tar:
            fnames = f_tar.getnames()

            f = f_tar.extractfile(fnames[0])
            (l,b), data = load_samp_file(f, fnames[0])
            n_dists, n_samples = data['A0'].shape
            n_coords = len(fnames)

            dtype = [
                ('l', 'float32'),
                ('b', 'float32'),
                ('dist', 'int32', (n_dists,)),
                ('A0', A0_dtype, (n_dists, n_samples))]
            data_combined = np.empty(n_coords, dtype=dtype)

            for k,fn in enumerate(fnames):
                # print('File {: >4d} of {:d}'.format(k+1, n_coords))
                f = f_tar.extractfile(fn)
                (l,b), data = load_samp_file(f, fn)
                data_combined['l'][k] = l
                data_combined['b'][k] = b
                data_combined['dist'][k] = data['dist']
                data_combined['A0'][k] = data['A0']

        return data_combined

    def save_data(data, fname):
        with closing(h5py.File(fname, 'w')) as f:
            f.create_dataset(
                'samples',
                data=data,
                chunks=True,
                compression='gzip',
                compression_opts=3)

    print('Progress: ', end='')
    sys.stdout.flush()

    tar_fname_list = glob(os.path.join(dirname, 'A_samp_*.tar.gz'))
    d = np.hstack([process_tarball(fn) for fn in tar_fname_list])

    print('+', end='')
    sys.stdout.flush()

    save_data(d, output_fname)

    print('')


[docs]def fetch(clobber=False):
    """
    Downloads the IPHAS 3D dust map of Sale et al. (2014).

    Args:
        clobber (Optional[bool]): If ``True``, any existing file will be
            overwritten, even if it appears to match. If ``False`` (the
            default), ``fetch()`` will attempt to determine if the dataset
            already exists. This determination is not 100\% robust against data
            corruption.
    """

    dest_dir = fname_pattern = os.path.join(data_dir(), 'iphas')
    url_pattern = 'http://www.iphas.org/data/extinction/A_samp_{:03d}.tar.gz'
    fname_pattern = os.path.join(dest_dir, 'A_samp_') + '{:03d}.tar.gz'

    # Check if file already exists
    if not clobber:
        h5_fname = os.path.join(dest_dir, 'iphas.h5')
        h5_size = 227817543 # Guess, in Bytes
        h5_dsets = {
            'samples': (61130,)
        }
        if fetch_utils.h5_file_exists(h5_fname, h5_size, dsets=h5_dsets):
            print('File appears to exist already. Call `fetch(clobber=True)` '
                  'to force overwriting of existing file.')
            return

    # Expected MD5 sums of .samp files
    file_md5sum = {
        30:  'dd531e397622bc97d4ff92b6c7863ade',
        40:  'b0f925eb3e46b77876e4054a26ad5b52',
        50:  'ea3b9500f0419d66dd92d9f9c127c2b5',
        60:  'cccf136f4e2306a6038e8093499216fd',
        70:  'a05fe2f815086686056c18087cc5410b',
        80:  '799bf618c8827b3d7250c884ec66ec49',
        90:  'd2a302d917da768bacf6ea74cb9dcfad',
        100: '2c75e31ad9320818556c4c9964b6af65',
        110: '742ea8de6f5f8a7e549f6c56b0088789',
        120: '9beabfa2c9634f953adadb5016eab072',
        130: '7cd7313f466eb60e8318d0f1bd32e035',
        140: 'fb6d09e4d939081b891e245c30b791f1',
        150: '8e9b6dc1561183aeadc64f41c85a64a8',
        160: '8a35828457b7b1d53d06998114553674',
        170: '7ffb29ec23e2f625dcfaaa84c293821d',
        180: 'c737da479d132b88483d6ddab5b25fc8',
        190: '9bc5fc7f7ba55f36a167473bb3679601',
        200: '7d8ffc4aa2f7c7026d8aa3ffb670d48e',
        210: 'e31b04964b7970b81fc90c120b4ebc24'
    }

    # Download the .samp files
    for key in file_md5sum:
        url = url_pattern.format(key)
        print('Downloading {}'.format(url))

        fetch_utils.download_and_verify(
            url,
            file_md5sum[key],
            fname_pattern.format(key))

    # Convert from ASCII to HDF5 format
    print('Repacking files...')
    ascii2h5(dest_dir, os.path.join(dest_dir, 'iphas.h5'))

    # Cleanup
    print('Removing original files...')
    for key in file_md5sum:
        os.remove(fname_pattern.format(key))