# coding=utf-8
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os,re
import sys
from scipy import ndimage
from astropy.io.fits import Header
import astropy.io.ascii as asciitable
from astropy.table import Table, Column
import math
import numpy as np
np.random.seed(seed=1234)
# LOCAL IMPORTS
from .__version__ import __version__
from .math_utils import merge_where_nan, median_clip, safe_exp
from .instruments import *
from .hyperspectral_cube import HyperspectralCube as HyperCube
from .string_stdout import StringStdOut
from .disk_model import DiskModel
from .galaxy_parameters import GalaxyParameters, GalaxyParametersError
from .plot_utilities import Runner
DefaultModel = DiskModel
OII = {'wave': [3726.2, 3728.9], 'ratio':[0.8,1.0]}
# LOGGING CONFIGURATION
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger('GalPaK')
logger.info(' Running galpak ' + __version__)
# OPTIONAL IMPORTS
try:
import bottleneck as bn
except ImportError:
logger.info("bottleneck (optional) not installed, performances will be degraded")
import numpy as bn
try:
import pyfftw
except ImportError:
logger.info("PyFFTW (optional) not installed, performances will be degraded")
#Python3 compatibility
try:
basestring
except NameError:
basestring = str
[docs]class GalPaK3D(Runner):
"""
GalPaK3D is a tool to extract Galaxy Parameters and Kinematics from
3-Dimensional data, using reverse deconvolution with Bayesian analysis
Markov Chain Monte Carlo. (random walk)
cube: HyperspectralCube|string
The actual data on which we'll work ; it should contain only one galaxy.
Can be a HyperspectralCube object, a string filename to a FITS file, or
even MPDAF's ``mpdaf.obj.Cube``.
seeing: float
Aka the Point Spread Function's Full Width Half Maximum.
This convenience parameter, when provided, will override the FWHM value
of the instrument's PSF.
line: None[default] | dict to fit doublets, use a dictionary with
line['wave']=[l, lref] eg. [3726.2, 3728.9] # Observed or Rest
line['ratio']=[0.8, 1.0] eg. [0.8, 1.0] # The primary line for redshifts is the reddest
instrument: Instrument
The instrument configuration to use when simulating convolution.
The default is :class:`MUSE <galpak.MUSE>`.
crval3: float
A value for the cube's header's CRVAL3 when it is missing.
You should update your cube's header.
crpix3: float
A value for the cube's header's CRPIX3 when it is missing.
You should update your cube's header.
cunit3: float
A value for the cube's header's CUNIT3 when it is missing.
You should update your cube's header.
cdelt3: float
A value for the cube's header's CDELT3 when it is missing.
You should update your cube's header.
cunit1: float
A value for the cube's header's CUNIT1 (&2) when it is missing.
You should update your cube's header.
force_header_update: bool
Set to True to force the update of the above header cards,
when their values are not missing.
Note: These will not be saved into the FITS file. (if the cube is one)
"""
def __init__(self, cube, variance=None, line=None,
seeing=None, instrument=None, flux_image=None,
crval3=None, crpix3=None, cunit3=None, cdelt3=None, ctype3=None, cunit1=None,
force_header_update=False):
# DEVS : If you change the signature above,
# remember to update the run() in api.py
# Assign the logger to a property for convenience, and set verbosity
self.logger = logger
self.version = __version__
# Set up the input data cube
if isinstance(cube, basestring):
self.logger.info('Reading cube from %s' % (cube))
cube = HyperCube.from_file(cube)
elif isinstance(cube, HyperCube):
self.logger.info('Provided cube is a HyperSpectral Object')
else:
import mpdaf
if isinstance(cube, mpdaf.obj.Cube):
self.logger.info('Provided Cube is a mpdaf Cube object')
cube = HyperCube.from_mpdaf(cube)
else:
raise TypeError("Provided cube is not a HyperspectralCube "
"nor mpdaf's Cube")
if cube.is_empty():
raise ValueError("Provided cube is empty")
self.cube = cube
if not self.cube.has_header():
self.logger.info("Reading hyperspectral cube without header. "
"Creating a minimal one.")
self.cube.header = Header()
# GalPaK needs a sane Cube
#self.cube.sanitize()
# Set up the variance
if variance is not None:
self.logger.info('Using user-provided variance input')
if isinstance(variance, basestring):
self.logger.info("Read provided variance cube %s into HyperCube." % (variance))
variance = HyperCube.from_file(variance)
variance_cube = variance.data
elif isinstance(variance, HyperCube):
if variance.data is None:
self.logger.warning("Provided variance cube is empty.")
else:
self.logger.info("Saving variance into varianceCube")
variance_cube = variance.data
elif not (isinstance(variance,np.float) or isinstance(variance, HyperCube)):
raise TypeError("Provided variance is not a string nor HyperCube nor a float")
else:
variance_cube = self.cube.var
#@fixme will be removed
# Set up the measure context
self.line = line
# Set up the instrument's context
if instrument is None:
instrument = MUSE()
self.logger.warning('Using the MUSE instrument per default. '
'You should specify your own instrument.')
if isinstance(instrument, basestring):
raise ValueError("Instrument needs to be an instance of "
"Instrument, not a string.")
if not isinstance(instrument, Instrument):
raise ValueError("Instrument needs to be an instance of Instrument")
self.instrument = instrument
## Set default cube specs from instrument when missing from headers
# as we don't want to rely on the input fits having properly set headers.
# So, we aggregate in the cube our own specs (in " and µm) for our personal use :
# - xy_step
# - z_step
# - z_central
# 1. Patch up the Cube's missing values
try:
self.cube.patch(
crval3=crval3, crpix3=crpix3, cunit3=cunit3,
cdelt3=cdelt3, ctype3=ctype3, cunit1=cunit1, force=force_header_update
)
except ValueError:
raise ValueError("The cube already has one of the header cards "
"you're trying to provide. "
"Use force_header_update=True to override.")
self.logger.debug('Header after patch : %s' % self.cube)
#set xy_step z_step and z_central
# 2. Set cube metadata from the the instrument if header is incomplete
self.cube.defaults_from_instrument(instrument=instrument)
# 3.
self.cube.initialize_self_cube()
# 4. Calibrate the instrument with the cube
self.instrument.use_pixelsize_from_cube(self.cube)
self.logger.debug('z central : %4.e' % (self.cube.z_central) )
# Override the PSF FWHM (aka. seeing) if provided
if seeing is not None and self.instrument.psf is not None:
try:
self.instrument.psf.fwhm = seeing
except AttributeError:
raise IOError("You provided a seeing but your instrument's PSF has no FWHM.")
# Prepare output attributes
self.acceptance_rate = 100.
self.galaxy = GalaxyParameters()
self.stdev = GalaxyParameters()
self.chain = None
self.sub_chain = None
self.psf3d = None
self.convolved_cube = None
self.deconvolved_cube = None
self.residuals_cube = None
self.residuals_map = None
self.variance_cube = None
#true intrinsic maps
self.true_flux_map = None
self.true_velocity_map = None
self.true_disp_map = None
self.error_maps = False
self.true_flux_map_error = None
self.true_velocity_map_error = None
self.true_disp_map_error = None
self.chain_flux_map = []
self.chain_velocity_map = []
self.chain_dispersion_map = []
#observed maps
self.obs_flux_map = None
self.obs_velocity_map = None
self.obs_disp_map = None
self.max_iterations = None
self.method = None
self.chain_fraction = None
self.percentile = None
self.initial_parameters = None
self.min_boundaries = None
self.max_boundaries = None
self.known_parameters = None
self.random_scale = None
self.model = None
self.redshift = None
# Handle the variance, when provided, or generate one
if variance_cube is not None:
self.logger.info("Replacing 0s in the variance cube by 1e12")
variance_cube = np.where(variance_cube == 0.0, 1e12, variance_cube)
else:
# Clip data, and collect standard deviation sigma
self.logger.warning("No variance provided. Estimating Variance from edge statistics")
clipped_data, clip_sigma, __ = median_clip(self.cube.data[:, 2:-4, 2:4], 2.5)#yband at x[2:4]
self.logger.info("Computed stdev from the edges: Var=%.e" % clip_sigma)
# Adjust stdev margin if it is zero, as we'll divide with it later on
if not np.isfinite(clip_sigma):
clipped_data, clip_sigma, __ = median_clip(self.cube.data, 2.5)
self.logger.info("reComputing stdev from the whole cube: Var=%.e" % clip_sigma)
if np.size(clip_sigma) == 1 and clip_sigma == 0:
clip_sigma = 1e-20
variance_cube = clip_sigma ** 2 *np.ones_like(self.cube.data)
self.logger.info('Variance estimated is %s ' % (str(clip_sigma)))
# Save the variance cube
self.variance_cube = variance_cube # cube of sigma^2
self.error_cube = np.sqrt(self.variance_cube) # cube of sigmas
# Provide the user with some feedback
self.logger.info("Setting up with the following setup : %s" % self.instrument)
#self.logger.info("""Will use hyperspectral cube attributes :
#xy_step = %.3e "
#z_step = %.3e %s
#z_central = %.3e %s
#""" % (self.cube.xy_step, self.cube.z_step, self.cube.z_cunit, self.cube.z_central, self.cube.z_cunit))
# We're sometimes dividing by zero, on purpose (its FASTER)
# Uncomment this to annihilate console logs -- but it's dirty
#previous_error_status = np.seterr(all='ignore')
#np.seterr(**previous_error_status)
[docs] def run_mcmc(self, max_iterations=15000,
method_chain='last',
last_chain_fraction=60,
percentile=95,
model=None,
flux_image=None,
chi_stat='gaussian',
min_boundaries=None,
max_boundaries=None,
known_parameters=None,
initial_parameters=None,
min_acceptance_rate=5.0,
random_scale=None,
verbose=True,
save_error_maps=True):
# DEVS : If you change the signature above,
# remember to update the run() in api.py
"""
Main method_chain of GalPak, computes and returns the galaxy parameters
as a :class:`GalaxyParameters <galpak.GalaxyParameters>` object
using reverse deconvolution with a MCMC.
Also fills up the following attributes :
- chain
- psf3d
- deconvolved_cube
- convolved_cube
- residuals_cube (Data-Model in units of sigma)
- residuals_map (average of data-model in units of sigma or = 1/N_z.
Sum_z Residuals_cube. sqrt(Nz) )
- acceptance_rate
- galaxy (same object as returned value)
with Vmax forced to be positive [and 180 added to PA]
- stdev (also available as galaxy.stdev)
- true_flux_map
- true_velocity_map
- true_disp_map
Stops iteration if acceptance rate drops below ``min_acceptance_rate`` %
or when ``max_iterations`` are reached.
max_iterations: int
Maximum number of useful iterations.
method_chain: 'chi_sorted'|'chi_min'|'last'
Method used to determine the best parameters from the chain.
- 'last' (default) : mean of the last_chain_fraction(%) last parameters of the chain
- 'chi_sorted' : mean of the last_chain_fraction(%) best fit parameters of the chain
- 'chi_min' : mean of last_chain_fraction(%) of the chain around the min chi
last_chain_fraction: int
Last Chain fraction (in %) used to compute the best parameters.
Defaults to 60.
model = DiskModel()
see class DiskModel
flux_image: string
filename of user provided flux image (fits) to be used.
UNSUPPORTED
chi_stat: 'gaussian' [default] | 'Mighell' | 'Neyman'
The chi2 statitics
https://heasarc.gsfc.nasa.gov/xanadu/xspec/manual/XSappendixStatistics.html
Mighell http://adsabs.harvard.edu/abs/1999ApJ...518..380M
Humphrey 2009, http://adsabs.harvard.edu/abs/2009ApJ...693..822H
- 'gaussian' (default): Sum (D - M)^2 / e
- 'Neyman' Sum (D - M )^2 / max(D,1)
- 'Mighell' Sum (D + min(D,1) - M)^2 / (D+1)
min_boundaries: ndarray|GalaxyParameters
The galaxy parameters will never be less than these values.
Will override the default minimum boundaries for the parameters.
If any of these values are NaN, they will be replaced by the default ones.
max_boundaries: ndarray|GalaxyParameters
The galaxy parameters will never be more than these values.
Will override the default minimum boundaries for the parameters.
If any of these values are NaN, they will be replaced by the default ones.
known_parameters: ndarray|GalaxyParameters
All set parameters in this array will be skipped in the MCMC,
the algorithm will not try to guess them.
initial_parameters: ndarray|GalaxyParameters
The initial galaxy parameters of the MCMC chain.
If None, will use the inital parameters provided by the model.
The galaxy parameters not initialized by the model or by this
parameter will be set to the mean of the boundaries.
min_acceptance_rate: float
In %, the acceptance rate is the number of useful iterations divided
by the total number of iterations.
If it gets below the `failure_rate` treshold, iterations stop.
random_scale: float
Scale the amplitude of the MCMC sampling by these values.
This is an important parameter to adjust for reasonable acceptance rate.
The acceptance rate should be around 30-50%.
If the acceptance rate is <20-30% (too low), decrease random_scale
IF the acceptance rate is >50-60% (too high), increase random_scale
verbose: boolean
Set to True to output a detailed log of the process.
The run is faster when this is left to False.
"""
# DEVS : If you change the signature above,
# remember to update the run() in api.py
# Save the parameters (the animation uses them)
self.max_iterations = max_iterations
self.method = method_chain
self.chain_fraction = last_chain_fraction
self.percentile = percentile
self.error_maps = save_error_maps #save intermediate model maps
# Set up the simulation model
if model is None:
model = DiskModel()
#if not isinstance(model, DiskModel):
# raise TypeError("The `model=` must inherit `Model`.")
self.model = model
self.model_dict = model.__dict__
#For backwards compatibility:
## will be removed in the future
if self.model.line is None:
self.model.line = self.line
else:
self.line = self.model.line
#save attribute
#For backwards compatibility:
## will be removed in the future
## if not None, computes Mdyn(Re)
if self.model.redshift is not None:
self.redshift = self.model.redshift
#important:
self.model.pixscale = self.cube.xy_step
# Sanitize data and set arbitrary big stdev where NaNs are
cube_data = self.cube.data
cube_data = np.nan_to_num(cube_data)
# Set verbosity
self._set_verbose(verbose)
# Provide the user with some feedback
#self.logger.info("Starting deconvolution with the following instrument "
# "properties : %s" % self.instrument)
self.logger.info("Model: %s", self.model_dict)
# Memoize various dimensions and shapes
shape = np.shape(cube_data)
dim_d = np.size(cube_data)
dim_p = np.size(GalaxyParameters())
# In fraction of boundary space, an arbitrarily
# small value for closeness to boundaries
eps = 0.01
# Compute a flux estimation
# fixme: add weighted sum with variance if present
self.flux_est = bn.nansum(self.cube.data)
if self.flux_est < 0:
self.logger.warning(
"WARNING: Initial flux (%4.2e) is <0 -- "
"likely wrong, will recompute it ignoring <0 values"
% self.flux_est
)
self.flux_est = np.sum(np.where(self.cube.data > 0, self.cube.data, 0))
self.logger.warning('Initial flux is now %4.2e' % self.flux_est)
self.logger.info('TIP: use `initial_parameters=` to set the flux')
else:
self.logger.info('Initial flux is %4.2e' % self.flux_est)
# Default boundaries
self.min_boundaries = self.model.min_boundaries(self)
self.max_boundaries = self.model.max_boundaries(self)
# Merge provided boundaries (if any) with default boundaries
if min_boundaries is not None:
min_boundaries = np.copy(min_boundaries)
merge_where_nan(min_boundaries, self.min_boundaries) #this returns a ndarray
self.min_boundaries = GalaxyParameters().from_ndarray(min_boundaries)
if max_boundaries is not None:
max_boundaries = np.copy(max_boundaries)
merge_where_nan(max_boundaries, self.max_boundaries) #this returns a ndarray
self.max_boundaries = GalaxyParameters().from_ndarray(max_boundaries)
#if abs(self.min_boundaries['maximum_velocity']) != \
# abs(self.max_boundaries['maximum_velocity']):
# raise ValueError('For Vmax, please use identical boundaries [Min/Max]')
bug_boundaries = self.min_boundaries > self.max_boundaries
if bug_boundaries.any():
self.logger.debug("Min Boundaries : %s", self.min_boundaries)
self.logger.debug("Max Boundaries : %s", self.max_boundaries)
raise ValueError("Boundaries are WRONG, because min > max")
# Default initial parameters
self.initial_parameters = self.model.initial_parameters(self)
# Create initial galaxy parameters using mean and provided values
mean_parameters = (self.max_boundaries + self.min_boundaries) / 2.
#complete default values with mean parameters
merge_where_nan(self.initial_parameters, mean_parameters)
# Merge provided initial parameters (if any) with the defaults
if initial_parameters is not None:
#complete input with default values
merge_where_nan(initial_parameters, self.initial_parameters)
self.initial_parameters = initial_parameters
galaxy = self.initial_parameters #assign
self.logger.info("Initial parameters : %s", self.initial_parameters)
# By default, try to guess all parameters
should_guess_flags = np.ones(dim_p) # 0: we know it / 1: try to guess
#if using input image
# @fixme; this is unused
if self.model_dict['flux_profile'] == 'user':
self.flux_map_user = flux_image
if known_parameters is None:
self.logger.WARNING(
"With an input image it is advised to freeze "
"the `inclination`, using `known_parameters=`.")
# Flag parameters that we manually specified and don't need to guess
if known_parameters is not None:
if len(known_parameters) < dim_p:
raise ValueError("known_parameters must have a length >= %d", dim_p)
else:
known_parameters = GalaxyParameters()
self.known_parameters = known_parameters
#fixes some known parameters
for idx in range(dim_p):
parameter = known_parameters[idx]
if not math.isnan(parameter):
should_guess_flags[idx] = 0
sign = math.copysign(1., parameter)
self.min_boundaries[idx] = parameter * (1 - eps * sign) - eps
self.max_boundaries[idx] = parameter * (1 + eps * sign) + eps
self.initial_parameters[idx] = parameter
# The setup is finished, let's dump some information
self.logger.info("Initial parameters : %s", self.initial_parameters)
self.logger.info("Min : %s", self.min_boundaries)
self.logger.info("Max : %s", self.max_boundaries)
## The actual MCMC #####################################################
# The setup is done, we can now start the MCMC loop.
# Tweak the random amplitude vector (Kp coeff, as pid)
# that we can document Model.setup_random_amplitude() adequately
random_amplitude = np.sqrt(
(self.min_boundaries - self.max_boundaries) ** 2 / 12.
) * dim_p / dim_d
# Let the model adjust the random amplitude of the parameter jump
self.model.setup_random_amplitude(random_amplitude)
# Scale MCMC if needed // allowing vectors
if random_scale is not None:
if np.size(random_scale) != 1:
merge_where_nan(random_scale, np.ones_like(random_amplitude))
random_amplitude = random_amplitude * random_scale
self.random_scale = random_scale
# Zero random amplitude where parameters are known
random_amplitude = random_amplitude * should_guess_flags
self.random_amplitude = random_amplitude
self.logger.info("Random amplitude : %s", str(random_amplitude))
# NotImplemented (yet)
fit_2d = None
# Chi2 satistics, default
if (chi_stat == 'gaussian'):
compute_chi = lambda g: \
np.nansum(self._compute_error_gauss(g) ** 2
)
self.Ndegree = (dim_d - dim_p - 1) # degree of freedom
# https://heasarc.gsfc.nasa.gov/xanadu/xspec/manual/XSappendixStatistics.html
# Mighell http://adsabs.harvard.edu/abs/1999ApJ...518..380M
# Humphrey 2009, http://adsabs.harvard.edu/abs/2009ApJ...693..822H
# Mighell
if (chi_stat == 'Mighell'):
compute_chi = lambda g: \
np.nansum(self._compute_error_Mighell(g)
)
self.Ndegree = (dim_d - dim_p )
# C-statistique
if (chi_stat == 'Cstat'):
compute_chi = lambda g: \
np.nansum(self._compute_error_cstat(g)
)
self.Ndegree = (dim_d - dim_p )
# Neyman
if (chi_stat == 'Neyman'):
compute_chi = lambda g: \
np.nansum(self._compute_error_Neyman(g)
)
self.Ndegree = (dim_d - dim_p )
# Pearson
if (chi_stat == 'Pearson'):
compute_chi = lambda g: \
np.nansum(self._compute_error_Pearson(g)
)
self.Ndegree =(dim_d - dim_p )
# Some useful vars
count_alpha_ok = 0
total_iterations = 0
ki_min = sys.maxsize # that way, it can only go down
galaxy = self.initial_parameters
galaxy_old = galaxy.copy()
galaxy_new = galaxy_old.copy()
chi_old = compute_chi(galaxy)
self.logger.info("Starting with χ² = %f", chi_old / self.Ndegree)
self.logger.info("Starting with χ² = %f", chi_old / self.Ndegree)
# Create empty chain with galaxy parameters and reduced chi
names_param = []
names_param.extend(GalaxyParameters.names)
names_param.append('reduced_chi')
# names_param = [
# 'x', 'y', 'z', 'flux', 'radius', 'inclination', 'pa',
# 'turnover_radius', 'maximum_velocity', 'velocity_dispersion',
# 'reduced_chi',
# ]
# types_param = ['float32' for i in range(dim_p + 1)]
#dt = zip(names_param, types_param)
#chain = np.zeros(max_iterations, dtype=dt) # intensive operation !
# chain = Table(names=names_param,dtype=types_param)
chain_rows = []
rate = 0.
while (total_iterations < max_iterations * 20 or rate > min_acceptance_rate) and (count_alpha_ok < max_iterations):
# Update loop conditions
total_iterations += 1
rate = count_alpha_ok * 100. / total_iterations
# Cauchy jumping
random_uniforms = np.random.uniform(-np.pi / 2., np.pi / 2., size=dim_p)
galaxy_new = galaxy_old + random_amplitude * np.tan(random_uniforms)
# Sanitize the parameters
self.model.sanitize_parameters(galaxy_new) # to deal with circularity
# Check that parameters are not too close to the boundaries
too_close_to_max = np.abs((galaxy_new - self.max_boundaries) / self.max_boundaries) < eps
too_close_to_min = np.abs((galaxy_new - self.min_boundaries) / self.min_boundaries) < eps
too_close_to_boundaries = too_close_to_min + too_close_to_max
# Make sure we're inside the preset boundaries
in_boundaries = (galaxy_new >= self.min_boundaries) * (galaxy_new <= self.max_boundaries)
if in_boundaries.all():
# Computing convolved model with new parameters
chi_new = compute_chi(galaxy_new)
if chi_new<1e-3:
#print chi_new,galaxy_new
self.logger.error('Chi2 is 0, quit')
exit()
# Compute ratio of likelihood a posteriori
# exp( old-new / 2 )
likelihood = safe_exp(-0.5 * (chi_new - chi_old))
# likelihood > 1 => new is better
# likelihood < 1 => accept or reject
alpha = np.random.rand()
# Conservation test
if alpha < likelihood:
count_alpha_ok += 1
# Save minimum
if ki_min > chi_new:
ki_min = chi_new
count_min = count_alpha_ok
galaxy_old = galaxy_new
chi_old = chi_new
# vect = np.append(galaxy_new, chi_new / self.Ndegree)
#chain[count_alpha_ok - 1] = np.float32(vect)
# chain.add_row(vect)
chain_rows.append(list(galaxy_new) +
[chi_new / self.Ndegree])
if verbose:
info = "{count:5d} MIN={count_min:5d} {rate:2.0f}% " \
"χ²={ki:3.6f}>{ki_min:3.6f} {params:s}"
info = info.format(
count=count_alpha_ok,
count_min=count_min,
ki=chi_new / self.Ndegree,
ki_min=ki_min / self.Ndegree,
rate=rate,
params=galaxy_new.colored_info(too_close_to_boundaries)
)
if self.redshift is not None:
mdyn = self.model._compute_Mdyn_at_Rhalf(galaxy_new.radius, galaxy_new.rv, galaxy_new.max_vel, self.cube.xy_step)
info += "mdyn={mass:4.2e}"
info = info.format(mass=mdyn)
print(info)
chain = Table(names=names_param, rows=chain_rows)
# Raise if no useful iteration was run
if rate == 0.:
self.logger.debug("Last Galaxy Parameters : %s", galaxy_new)
raise RuntimeError("No useful iteration was run. "
"Try with higher max_iterations?")
# Sanitize the chain
self.model.sanitize_chain(chain)
# Report
self.logger.info("Iterations report : %d Total, %d OK, %d%% Rate",
total_iterations, count_alpha_ok, rate)
self.logger.info("Storing results as parameters...")
# Acceptance Rate
self.logger.info("self.acceptance_rate : useful iterations count / total iterations count")
self.acceptance_rate = rate
# Store chain
self.logger.info("self.chain : full Markov chain")
good_idx = np.where(chain['reduced_chi']!=0)
self.chain = Table(chain[good_idx])
# Store PSF 3D, which may not be defined
try:
self.psf3d = HyperCube(self.instrument.psf3d)
except AttributeError:
pass
# Extract Galaxy Parameters from chain, and store them
self.logger.info("Extracting best parameters (medians) from chain")
self.best_parameters_from_chain(method_chain, last_fraction=last_chain_fraction, percentile=percentile)
# Create output cubes
self.logger.info("self.convolved_cube : simulated convolved cube from found galaxy parameters")
self.convolved_cube = self.create_convolved_cube(self.galaxy, shape)
self.convolved_cube.header = self.cube.header
self.logger.info("self.deconvolved_cube : deconvolved cube from found galaxy parameters")
self.deconvolved_cube = self.create_clean_cube(self.galaxy, shape, final=True)
self.deconvolved_cube.header = self.cube.header
self.logger.info("self.residuals_cube : diff between actual data and convolved cube, scaled by stdev margin")
self.residuals_cube = (self.cube - self.convolved_cube) / self.error_cube # * np.mean(variance_cube)
self.residuals_cube.header = self.cube.header
#compute observed maps
_ = self._make_moment_maps(self.convolved_cube, mask=True) # make moment maps with cube convolved with 3DPSF
# Average of residuals, normalized to sigma_mu
nz = self.residuals_cube.shape[0]
self.residuals_map = (self.residuals_cube.data.sum(0) / nz) * np.sqrt(nz)
# Compute the χ²
ki_reduced = compute_chi(self.galaxy) / (dim_d - dim_p - 1)
self.logger.info("χ² at best parameters: %f", ki_reduced)
self.best_chisq = np.min(self.chain['reduced_chi'])
self.logger.info("Best χ², %f ", self.best_chisq)
# Show a plot of the chain if verbose, to draw attention to the chain
if verbose:
# Sometimes, when the number of iterations is low, plotting fails.
# It is a complex issue with matplotlib, so we're 'try'-wrapping it.
try:
self.plot_mcmc(adapt_range='5stdev')
except:
pass
try:
self.plot_geweke()
except:
pass
return self.galaxy
def best_parameters_from_chain(self, method_chain, last_fraction, percentile):
"""
Computes best fit galaxy parameters from chain, using medians from a specified method_chain.
method_chain: string
The method to use to extract the fittest parameters from the chain.
'last' (default) : mean of the last_fraction(%) last parameters of the chain
'chi_sorted' : mean of the last_fraction(%) best fit parameters of the chain
'chi_min' : mean of last_fraction(%) of the chain around the min chi
last_fraction: float %
Fraction of the end of the chain used in determining the parameters.
percentile: float % (None as default)
None: the method to use to compute the errors on the parameter is the standard deviation of the median
float: the percentile (the 68th, or 95th percentile) to be used for the errors on the parameters
#fixme: in which case returns the lower and upper values.
Returns the galaxy parameters and the stdev
"""
self.method = method_chain
self.chain_fraction = last_fraction
self.percentile = percentile
if self.chain is None:
raise RuntimeError("No chain! Run .run_mcmc() first.")
# Data correction for Vmax
#vmax_sign = (self.chain['maximum_velocity'] < 0)
#pa_correction = np.where(vmax_sign, self.chain['pa'] + 180., self.chain['pa'])
#pa_correction = np.where(pa_correction > 180, pa_correction - 360, pa_correction)
#self.chain['pa'] = pa_correction
#self.chain['maximum_velocity'] = np.abs(self.chain['maximum_velocity'])
cols = [ Column(data=np.cos(np.radians(self.chain['pa'])), name='cospa'),
Column(data=np.cos(np.radians(self.chain['pa']*2)),name='cos2pa'),
Column(data=np.sin(np.radians(self.chain['pa'])), name='sinpa'),
Column(data=np.sin(np.radians(self.chain['pa']*2)),name='sin2pa')
]
chain_full = self.chain.copy()
chain_full.add_columns(list(cols))
#extract subchain
chain_size = np.size(chain_full)
n = int(chain_size * last_fraction / 100.) # number of samples (last_fraction(%) of total)
idx = chain_full.argsort('reduced_chi')
self.chain.idxsorted = idx
if method_chain == 'chi_min':
min_chi_index = self._get_min_chi_index()
sub_chain = chain_full[min_chi_index - n // 2: min_chi_index + n // 2]
self.chain.xmin = np.max([0, min_chi_index - n // 2])
self.chain.xmax = np.min([chain_size, min_chi_index + n // 2])
elif method_chain == 'last':
sub_chain = chain_full[-n:]
self.chain.xmin = chain_size - n
self.chain.xmax = chain_size
elif method_chain == 'chi_sorted':
chain_full.sort('reduced_chi')
sub_chain = chain_full[idx][:n]
sub_idx = idx[:n]
self.chain.xmin = 0
self.chain.xmax = n
else:
raise ValueError("Unsupported chain mean `method_chain` '%s'"
% method_chain)
#compute error model maps
if self.error_maps:
if method_chain == 'chi_min':
tmp_f = np.array(self.chain_flux_map)[min_chi_index - n // 2: min_chi_index + n // 2]
tmp_v = np.array(self.chain_velocity_map)[min_chi_index - n // 2: min_chi_index + n // 2]
tmp_s = np.array(self.chain_dispersion_map)[min_chi_index - n // 2: min_chi_index + n // 2]
elif method_chain == 'last':
tmp_f = np.array(self.chain_flux_map)[-n:]
tmp_v = np.array(self.chain_velocity_map)[-n:]
tmp_s = np.array(self.chain_dispersion_map)[-n:]
elif method_chain == 'chi_sorted':
tmp_f = np.array(self.chain_flux_map)[sub_idx]
tmp_v = np.array(self.chain_velocity_map)[sub_idx]
tmp_s = np.array(self.chain_dispersion_map)[sub_idx]
self.true_flux_map_error = HyperCube(
np.percentile(tmp_f, 50 + percentile/2., axis=0) -
np.percentile(tmp_f, 50 - percentile/2., axis=0)
)
self.true_velocity_map_error = HyperCube(
np.percentile(tmp_v, 50 + percentile/2., axis=0) -
np.percentile(tmp_v, 50 - percentile/2., axis=0)
)
self.true_disp_map_error = HyperCube(
np.percentile(tmp_s, 50 + percentile/2., axis=0) -
np.percentile(tmp_s, 50 - percentile/2., axis=0)
)
# Compute best_parameters
parameter_names = GalaxyParameters.names
tmp_chain = np.array(sub_chain[parameter_names].as_array().tolist()) #convert astropy Table into array
best_parameter = np.median(tmp_chain, axis=0)
# Compute errors to parameters
sigma_parameter = np.std(tmp_chain, axis=0)
#handle PA edges at +/-180
pa_idx = sub_chain.index_column('pa')
#following https://ncss-wpengine.netdna-ssl.com/wp-content/themes/ncss/pdf/Procedures/NCSS/Circular_Data_Analysis.pdf
cos1=np.median(sub_chain['cospa'])
sin1=np.median(sub_chain['sinpa'])
invtan = np.arctan2(sin1,cos1)
pa_circular_best = np.degrees(invtan)
best_parameter[pa_idx] = pa_circular_best
r1 = np.sqrt(sub_chain['cospa'].sum()**2+sub_chain['sinpa'].sum()**2)
r1 = r1/np.size(sub_chain)
pa_circular_std = np.sqrt(-2.*np.log(r1))
cos2=np.median(sub_chain['cos2pa'])
sin2=np.median(sub_chain['sin2pa'])
#r2 = np.sqrt(sub_chain['cos2pa']**2+sub_chain['sin2pa']**2)
#r2 = np.median(r2)
invtan2= np.arctan2(sin2,cos2)
angle2= np.degrees(invtan2)
## this is??
#pa_circular_dispersion = (1-angle2)/(2*r1**2)
#print "pa disp %.3f " % (pa_circular_dispersion)
#center chain['pa']
sub_chain_pa = sub_chain['pa'] - best_parameter[pa_idx]
sub_chain_pa = np.where(sub_chain_pa > 180, sub_chain_pa - 360, sub_chain_pa)
sub_chain_pa = np.where(sub_chain_pa < -180, sub_chain_pa + 360, sub_chain_pa)
std_pa = np.std(sub_chain_pa)
#This is identical to pa_circular_std
#print "pa sigma %.3f" % (std_pa)
#print "pa stdev %.3f " % (np.degrees(pa_circular_std))
sigma_parameter[pa_idx] = std_pa
sub_chain['pa'] = sub_chain_pa + best_parameter[pa_idx]
self.sub_chain = sub_chain[parameter_names] #will be used for correlation plots
#Save
self.galaxy = GalaxyParameters.from_ndarray(best_parameter)
self.galaxy.stdev = GalaxyParametersError.from_ndarray(sigma_parameter)
self.stdev = self.galaxy.stdev
# Compute percentiles
if percentile is not None:
self.logger.info('Setting %2d percentiles ' % (percentile))
error_parameter_upper = np.percentile(self.sub_chain.as_array().tolist(), 50. + percentile/2., axis=0)
error_parameter_lower = np.percentile(self.sub_chain.as_array().tolist(), 50. - percentile/2., axis=0)
self.galaxy.upper = GalaxyParametersError.from_ndarray(error_parameter_upper)
self.galaxy.lower = GalaxyParametersError.from_ndarray(error_parameter_lower)
self.galaxy.ICpercentile = percentile
# Store galaxy parameters and stdev
self.logger.info("self.galaxy : fittest parameters : %s", repr(self.galaxy))
self.logger.info("self.stdev : parameters stdev : %s", str(self.stdev))
return None
[docs] def create_clean_cube(self, galaxy, shape, final=False):
"""
Creates a cube containing a clean simulation of a galaxy according to
the provided model.
galaxy: GalaxyParameters
The parameters upon which the simulated galaxy will be built.
shape: Tuple of 3
The 3D (z, y, x) shape of the resulting cube.
Eg: (21, 21, 21)
Returns a HyperspectralCube
"""
# Create radial velocities and radial dispersions
#flux_cube, vz, vz_map, s_map, sig_map, sigz_disk_map, sig_intr = \
# self.model._compute_galaxy_model(galaxy, shape)
## normalize to real flux
#flux_map = flux_cube.sum(0)
#flux_map = galaxy.flux * flux_map / flux_map.sum()
flux_map, vz_map, s_map = self.model._create_maps(galaxy, shape)
if self.error_maps:
self.chain_flux_map.append(flux_map)
self.chain_velocity_map.append(vz_map)
self.chain_dispersion_map.append(s_map)
# This is too expensive ! We create Cubes on each iteration...
# There are ways to optimize this, as only the last one is used.
if final is True:
self.true_flux_map = HyperCube(flux_map)
self.true_velocity_map = HyperCube(vz_map)
self.true_disp_map = HyperCube(s_map)
modelcube = self.model._create_cube(shape, flux_map, vz_map, s_map,
self.instrument.z_step_kms, zo=galaxy.z)
#is this used?
modelcube.xy_step = self.cube.xy_step
modelcube.z_step = self.cube.z_step
modelcube.z_central = self.cube.z_central
return modelcube
[docs] def create_convolved_cube(self, galaxy, shape):
"""
Creates a cube containing a convolved simulation of a galaxy according
to the provided model.
The convolution is done by the instrument you provided upon
instantiation of this class.
galaxy: GalaxyParameters
The parameters upon which the simulated galaxy will be built.
shape: Tuple of 3
The 3D (Z, Y, X) shape of the resulting cube.
Eg: (21, 21, 21)
Returns a HyperspectralCube
"""
clean_cube = self.create_clean_cube(galaxy, shape)
return self.instrument.convolve(clean_cube)
def import_chain(self, filepath, compute_best_params=False):
"""
Imports the chain stored in a .dat file so that you may plot.
compute_best_parameters False[default] | True
if True will use 'last' method and 60%
use best_parameters_from_chain method to customize
"""
with open(filepath, 'r') as chain_data:
self.chain = asciitable.read(chain_data.read(), Reader=asciitable.FixedWidth)
if compute_best_params is True:
self.best_parameters_from_chain(method_chain='last', last_fraction=60, percentile=95)
NO_CHAIN_ERROR = "No chain to plot! Run .run_mcmc() or .import_chain() " \
"first."
[docs] def save(self, name, overwrite=False):
"""
Saves the results of the MCMC to files :
- <name>_galaxy_parameters.txt
A plain text representation of the parameters of the galaxy.
- <name>_galaxy_parameters.dat
An asciitable representation of the parameters of the galaxy.
- <name>_chain.dat
An asciitable representation of the Markov Chain.
Each line holds one set of galaxy parameters and its associated reduced chi.
- <name>_run_parameters.txt
A plain text representation of the run_parameters.
- <name>_instrument.txt
A plain text representation of the instrument parameters.
- <name>_convolved_cube.fits
A FITS file containing the PSF-convolved result cube.
- <name>_deconvolved_cube.fits
A FITS file containing the pre-convolution clean cube.
- <name>_residuals_cube.fits
A FITS file containing the diff between input data and simulation.
- <name>_true_flux_map.fits
A FITS file containing the true flux map [intrinsic]
- <name>_true_vel_map.fits
A FITS file containing the true velocity map [intrinsic]
- <name>_true_sig_map.fits
A FITS file containing the true dispersion map [intrinsic]
- <name>_obs_flux_map.fits
A FITS file containing the observed flux map [intrinsic]
- <name>_obs_vel_map.fits
A FITS file containing the observed velocity map [intrinsic]
- <name>_obs_sig_map.fits
A FITS file containing the observed dispersion map [intrinsic]
- <name>_images.png
A PNG image generated by the ``plot_images`` method.
Note: the overwrite option is always true for this file.
- <name>_mcmc.png
A PNG image generated by the ``plot_mcmc`` method.
Note: the overwrite option is always true for this file.
- <name>_correlations.png
A PNG image generated by the ``plot_correlations`` method.
Note: the overwrite option is always true for this file.
- <name>true_maps.png
A PNG image generated by the ``plot_true_vfield`` method.
Note: the overwrite option is always true for this file.
The .dat files can be easily read using asciitable and its
``asciitable.FixedWidth`` Reader : ::
asciitable.read('example.chain.dat', Reader=asciitable.FixedWidth)
.. warning::
The generated files are not compressed and may take up a lot of disk
space.
name: string
An absolute or relative name that will be used as prefix for the
save files.
Eg: 'my_run', or '/home/me/science/my_run'.
overwrite: bool
When set to true, will OVERWRITE existing files.
"""
if self.chain is None:
raise RuntimeError("Nothing to save! Run .run_mcmc() first.")
filename = '%s_galaxy_parameters.txt' % name
self._save_to_file(filename, self.galaxy.long_info(), overwrite)
filename = '%s_galaxy_parameters.dat' % name
self._save_to_file(filename, self.galaxy.structured_info(), overwrite)
filename = '%s_chain.dat' % name
self._save_to_file(filename, self._chain_as_asciitable(), overwrite)
filename = '%s_run_parameters.txt' % name
self._save_to_file(filename, self.__str__(), overwrite)
filename = '%s_instrument.txt' % name
self._save_to_file(filename, self.instrument.__str__(), overwrite)
filename = '%s_convolved_cube.fits' % name
self.convolved_cube.write_to(filename, overwrite)
filename = '%s_deconvolved_cube.fits' % name
self.deconvolved_cube.write_to(filename, overwrite)
filename = '%s_residuals_cube.fits' % name
self.residuals_cube.write_to(filename, overwrite)
filename = '%s_3Dkernel.fits' % name
self.psf3d.write_to(filename, overwrite)
filename = '%s_obs_flux_map.fits' % name
self.obs_flux_map.write_to(filename, overwrite)
filename = '%s_obs_vel_map.fits' % name
self.obs_velocity_map.write_to(filename, overwrite)
filename = '%s_obs_disp_map.fits' % name
self.obs_disp_map.write_to(filename, overwrite)
filename = '%s_true_flux_map.fits' % name
self.true_flux_map.write_to(filename, overwrite)
filename = '%s_true_vel_map.fits' % name
self.true_velocity_map.write_to(filename, overwrite)
filename = '%s_true_disp_map.fits' % name
self.true_disp_map.write_to(filename, overwrite)
#for quick display only
filename = '%s_true_maps' % name
self.plot_true_vfield(filename + '.png')
self.plot_true_vfield(filename + '.pdf')
filename = '%s_obs_maps' % name
self.plot_obs_vfield(filename + '.png')
self.plot_obs_vfield(filename + '.pdf')
if self.error_maps:
filename = '%s_true_flux_map_error.fits' % name
self.true_flux_map_error.write_to(filename, overwrite)
filename = '%s_true_vel_map_error.fits' % name
self.true_velocity_map_error.write_to(filename, overwrite)
filename = '%s_true_disp_map_error.fits' % name
self.true_disp_map_error.write_to(filename, overwrite)
filename = '%s_images' % name
self.plot_images(filename + '.png')
self.plot_images(filename + '.pdf')
filename = '%s_mcmc' % name
self.plot_mcmc(filename + '.png')
self.plot_mcmc(filename + '.pdf')
filename = '%s_correlations' % name
self.plot_correlations(filename + '.png')
self.plot_correlations(filename + '.pdf')
try:
filename = '%s_corner' % name
corner=self.plot_corner(filename + '.png',nsigma=4)
_ = self.plot_corner(filename + '.pdf',nsigma=4)
except:
self.logger.warning("plot corner failed ")
filename = '%s_geweke' % name
self.plot_geweke(filename + '.png')
self.plot_geweke(filename + '.pdf')
self.logger.info("Saved files in %s" % os.getcwd())
#fixme: to do
# def read_files(self, name):
def __str__(self):
"""
Return information about this run in a multiline string.
"""
return """
galpak_version = %s
instrument = %s
iterations = %s
parameters method = %s, chain_fraction: %s,
CI percentile: %s,
Model: %s
min_boundaries = %s
max_boundaries = %s
known_parameters = %s
initial_parameters = %s
random_scale = %s
final_parameters = \n %s
best_chi2 = %s
acceptance_rate = %s
""" % (
self.version,
self.instrument, self.max_iterations, self.method,
self.chain_fraction, self.percentile, self.model,
self.min_boundaries, self.max_boundaries,
self.known_parameters, self.initial_parameters,
self.random_scale, self.galaxy.structured_info(),
self.best_chisq,
self.acceptance_rate
)
def _chain_as_asciitable(self):
"""
Exports the chain as an `asciitable`.
See the public API `import_chain()` for the reverse operation of loading
the chain from an `asciitable` file.
"""
out = StringStdOut()
asciitable.write(self.chain,
output=out,
Writer=asciitable.FixedWidth,
names=self.chain.dtype.names)
return out.content
def _save_to_file(self, filename, contents, clobber):
if not clobber and os.path.isfile(filename):
raise IOError("The file '%s' already exists. Specify clobber=True to overwrite it.")
with open(filename, 'w') as f:
f.write(contents)
def _read_file(self, filename):
with open(filename, 'r') as f:
contents=f.read()
return contents
def _get_min_chi_index(self):
"""
Gets the index in the chain of the parameters with the minimal chi.
"""
if self.chain is None:
raise RuntimeError("No chain! Run `run_mcmc()` first.")
idx = self.chain.idxsorted[0]
return idx
def _compute_error_gauss(self, galaxy):
"""
It computes the difference between the measured cube
and the computed cube from given galaxy parameters.
returns ( D - M ) / stdev
"""
# Compute convolved cube for given galaxy parameters
cube_convolved = self.create_convolved_cube(galaxy, self.cube.shape)
# Diff. between measured cube and convolved cube, scaled by the error
difference = (self.cube - cube_convolved) / self.error_cube
return np.ndarray.flatten(difference.data)
def _compute_error_Mighell(self, galaxy):
"""
Modified Statistic Mighell 1998
Sum ( D + min(1,D) - M)^2 / D + 1
"""
# Compute convolved cube for given galaxy parameters
cube_convolved = self.create_convolved_cube(galaxy, self.cube.shape)
tmp_cube = HyperCube(np.where(self.cube.data >= 1, 1., self.cube.data))
difference = (self.cube + tmp_cube - cube_convolved ) **2 \
/ (self.cube + 1.0)
return np.ndarray.flatten(difference.data)
def _compute_error_Neyman(self, galaxy):
"""
Modified Neyman statistic
Humphrey 2009, http://adsabs.harvard.edu/abs/2009ApJ...693..822H
Sum ( M - D )^2 / max(D,1)
"""
# Compute convolved cube for given galaxy parameters
cube_convolved = self.create_convolved_cube(galaxy, self.cube.shape)
tmp_cube = HyperCube(np.where(self.cube.data <= 1, 1., self.cube.data))
difference = (cube_convolved - self.cube)**2 / tmp_cube
return np.ndarray.flatten(difference.data)
def _compute_error_Pearson(self, galaxy):
"""
Pearson statistic
Humphrey 2009, http://adsabs.harvard.edu/abs/2009ApJ...693..822H
Sum ( M - D )^2 / M
"""
# Compute convolved cube for given galaxy parameters
cube_convolved = self.create_convolved_cube(galaxy, self.cube.shape)
difference = (cube_convolved - self.cube)**2 / (cube_convolved)
return np.ndarray.flatten(difference.data)
def _compute_error_cstat(self, galaxy):
"""
Cash statistique for Poisson noise
Humphrey 2009, http://adsabs.harvard.edu/abs/2009ApJ...693..822H
Sum ( M - D + D * log(D/M) )
"""
# Compute convolved cube for given galaxy parameters
cube_convolved = self.create_convolved_cube(galaxy, self.cube.shape)
tmp_cube = np.log(self.cube.data/cube_convolved)
difference = (cube_convolved - self.cube) + self.cube * tmp_cube
return np.ndarray.flatten(difference.data)
#############################################
#
# Private methods, modeling
#
#############################################
def _set_verbose(self, verbose):
"""
Update the logger's status
"""
if verbose is True:
self.logger.disabled = False
self.model.logger.disabled = False
np.seterr(all='warn')
else:
self.logger.disabled = True
self.model.logger.disabled = True
np.seterr(all='ignore')