Source code for galpak.lib.galpak3d

# coding=utf-8

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os,re
import sys

from scipy import ndimage
from astropy.io.fits import Header
import astropy.io.ascii as asciitable
from astropy.table import Table, Column

import math
import numpy as np
np.random.seed(seed=1234)

# LOCAL IMPORTS
from .__version__ import __version__
from .math_utils import merge_where_nan, median_clip, safe_exp

from .instruments import *
from .hyperspectral_cube import HyperspectralCube as HyperCube
from .string_stdout import StringStdOut

from .disk_model import DiskModel
from .galaxy_parameters import GalaxyParameters, GalaxyParametersError
from .plot_utilities import Runner

DefaultModel = DiskModel
OII = {'wave': [3726.2, 3728.9], 'ratio':[0.8,1.0]}

# LOGGING CONFIGURATION
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger('GalPaK')
logger.info(' Running galpak ' + __version__)



# OPTIONAL IMPORTS
try:
    import bottleneck as bn
except ImportError:
    logger.info("bottleneck (optional) not installed, performances will be degraded")
    import numpy as bn
try:
    import pyfftw
except ImportError:
    logger.info("PyFFTW (optional) not installed, performances will be degraded")

#Python3 compatibility
try:
  basestring
except NameError:
  basestring = str



[docs]class GalPaK3D(Runner): """ GalPaK3D is a tool to extract Galaxy Parameters and Kinematics from 3-Dimensional data, using reverse deconvolution with Bayesian analysis Markov Chain Monte Carlo. (random walk) cube: HyperspectralCube|string The actual data on which we'll work ; it should contain only one galaxy. Can be a HyperspectralCube object, a string filename to a FITS file, or even MPDAF's ``mpdaf.obj.Cube``. seeing: float Aka the Point Spread Function's Full Width Half Maximum. This convenience parameter, when provided, will override the FWHM value of the instrument's PSF. line: None[default] | dict to fit doublets, use a dictionary with line['wave']=[l, lref] eg. [3726.2, 3728.9] # Observed or Rest line['ratio']=[0.8, 1.0] eg. [0.8, 1.0] # The primary line for redshifts is the reddest instrument: Instrument The instrument configuration to use when simulating convolution. The default is :class:`MUSE <galpak.MUSE>`. crval3: float A value for the cube's header's CRVAL3 when it is missing. You should update your cube's header. crpix3: float A value for the cube's header's CRPIX3 when it is missing. You should update your cube's header. cunit3: float A value for the cube's header's CUNIT3 when it is missing. You should update your cube's header. cdelt3: float A value for the cube's header's CDELT3 when it is missing. You should update your cube's header. cunit1: float A value for the cube's header's CUNIT1 (&2) when it is missing. You should update your cube's header. force_header_update: bool Set to True to force the update of the above header cards, when their values are not missing. Note: These will not be saved into the FITS file. (if the cube is one) """ def __init__(self, cube, variance=None, line=None, seeing=None, instrument=None, flux_image=None, crval3=None, crpix3=None, cunit3=None, cdelt3=None, ctype3=None, cunit1=None, force_header_update=False): # DEVS : If you change the signature above, # remember to update the run() in api.py # Assign the logger to a property for convenience, and set verbosity self.logger = logger self.version = __version__ # Set up the input data cube if isinstance(cube, basestring): self.logger.info('Reading cube from %s' % (cube)) cube = HyperCube.from_file(cube) elif isinstance(cube, HyperCube): self.logger.info('Provided cube is a HyperSpectral Object') else: import mpdaf if isinstance(cube, mpdaf.obj.Cube): self.logger.info('Provided Cube is a mpdaf Cube object') cube = HyperCube.from_mpdaf(cube) else: raise TypeError("Provided cube is not a HyperspectralCube " "nor mpdaf's Cube") if cube.is_empty(): raise ValueError("Provided cube is empty") self.cube = cube if not self.cube.has_header(): self.logger.info("Reading hyperspectral cube without header. " "Creating a minimal one.") self.cube.header = Header() # GalPaK needs a sane Cube #self.cube.sanitize() # Set up the variance if variance is not None: self.logger.info('Using user-provided variance input') if isinstance(variance, basestring): self.logger.info("Read provided variance cube %s into HyperCube." % (variance)) variance = HyperCube.from_file(variance) variance_cube = variance.data elif isinstance(variance, HyperCube): if variance.data is None: self.logger.warning("Provided variance cube is empty.") else: self.logger.info("Saving variance into varianceCube") variance_cube = variance.data elif not (isinstance(variance,np.float) or isinstance(variance, HyperCube)): raise TypeError("Provided variance is not a string nor HyperCube nor a float") else: variance_cube = self.cube.var #@fixme will be removed # Set up the measure context self.line = line # Set up the instrument's context if instrument is None: instrument = MUSE() self.logger.warning('Using the MUSE instrument per default. ' 'You should specify your own instrument.') if isinstance(instrument, basestring): raise ValueError("Instrument needs to be an instance of " "Instrument, not a string.") if not isinstance(instrument, Instrument): raise ValueError("Instrument needs to be an instance of Instrument") self.instrument = instrument ## Set default cube specs from instrument when missing from headers # as we don't want to rely on the input fits having properly set headers. # So, we aggregate in the cube our own specs (in " and µm) for our personal use : # - xy_step # - z_step # - z_central # 1. Patch up the Cube's missing values try: self.cube.patch( crval3=crval3, crpix3=crpix3, cunit3=cunit3, cdelt3=cdelt3, ctype3=ctype3, cunit1=cunit1, force=force_header_update ) except ValueError: raise ValueError("The cube already has one of the header cards " "you're trying to provide. " "Use force_header_update=True to override.") self.logger.debug('Header after patch : %s' % self.cube) #set xy_step z_step and z_central # 2. Set cube metadata from the the instrument if header is incomplete self.cube.defaults_from_instrument(instrument=instrument) # 3. self.cube.initialize_self_cube() # 4. Calibrate the instrument with the cube self.instrument.use_pixelsize_from_cube(self.cube) self.logger.debug('z central : %4.e' % (self.cube.z_central) ) # Override the PSF FWHM (aka. seeing) if provided if seeing is not None and self.instrument.psf is not None: try: self.instrument.psf.fwhm = seeing except AttributeError: raise IOError("You provided a seeing but your instrument's PSF has no FWHM.") # Prepare output attributes self.acceptance_rate = 100. self.galaxy = GalaxyParameters() self.stdev = GalaxyParameters() self.chain = None self.sub_chain = None self.psf3d = None self.convolved_cube = None self.deconvolved_cube = None self.residuals_cube = None self.residuals_map = None self.variance_cube = None #true intrinsic maps self.true_flux_map = None self.true_velocity_map = None self.true_disp_map = None self.error_maps = False self.true_flux_map_error = None self.true_velocity_map_error = None self.true_disp_map_error = None self.chain_flux_map = [] self.chain_velocity_map = [] self.chain_dispersion_map = [] #observed maps self.obs_flux_map = None self.obs_velocity_map = None self.obs_disp_map = None self.max_iterations = None self.method = None self.chain_fraction = None self.percentile = None self.initial_parameters = None self.min_boundaries = None self.max_boundaries = None self.known_parameters = None self.random_scale = None self.model = None self.redshift = None # Handle the variance, when provided, or generate one if variance_cube is not None: self.logger.info("Replacing 0s in the variance cube by 1e12") variance_cube = np.where(variance_cube == 0.0, 1e12, variance_cube) else: # Clip data, and collect standard deviation sigma self.logger.warning("No variance provided. Estimating Variance from edge statistics") clipped_data, clip_sigma, __ = median_clip(self.cube.data[:, 2:-4, 2:4], 2.5)#yband at x[2:4] self.logger.info("Computed stdev from the edges: Var=%.e" % clip_sigma) # Adjust stdev margin if it is zero, as we'll divide with it later on if not np.isfinite(clip_sigma): clipped_data, clip_sigma, __ = median_clip(self.cube.data, 2.5) self.logger.info("reComputing stdev from the whole cube: Var=%.e" % clip_sigma) if np.size(clip_sigma) == 1 and clip_sigma == 0: clip_sigma = 1e-20 variance_cube = clip_sigma ** 2 *np.ones_like(self.cube.data) self.logger.info('Variance estimated is %s ' % (str(clip_sigma))) # Save the variance cube self.variance_cube = variance_cube # cube of sigma^2 self.error_cube = np.sqrt(self.variance_cube) # cube of sigmas # Provide the user with some feedback self.logger.info("Setting up with the following setup : %s" % self.instrument) #self.logger.info("""Will use hyperspectral cube attributes : #xy_step = %.3e " #z_step = %.3e %s #z_central = %.3e %s #""" % (self.cube.xy_step, self.cube.z_step, self.cube.z_cunit, self.cube.z_central, self.cube.z_cunit)) # We're sometimes dividing by zero, on purpose (its FASTER) # Uncomment this to annihilate console logs -- but it's dirty #previous_error_status = np.seterr(all='ignore') #np.seterr(**previous_error_status)
[docs] def run_mcmc(self, max_iterations=15000, method_chain='last', last_chain_fraction=60, percentile=95, model=None, flux_image=None, chi_stat='gaussian', min_boundaries=None, max_boundaries=None, known_parameters=None, initial_parameters=None, min_acceptance_rate=5.0, random_scale=None, verbose=True, save_error_maps=True): # DEVS : If you change the signature above, # remember to update the run() in api.py """ Main method_chain of GalPak, computes and returns the galaxy parameters as a :class:`GalaxyParameters <galpak.GalaxyParameters>` object using reverse deconvolution with a MCMC. Also fills up the following attributes : - chain - psf3d - deconvolved_cube - convolved_cube - residuals_cube (Data-Model in units of sigma) - residuals_map (average of data-model in units of sigma or = 1/N_z. Sum_z Residuals_cube. sqrt(Nz) ) - acceptance_rate - galaxy (same object as returned value) with Vmax forced to be positive [and 180 added to PA] - stdev (also available as galaxy.stdev) - true_flux_map - true_velocity_map - true_disp_map Stops iteration if acceptance rate drops below ``min_acceptance_rate`` % or when ``max_iterations`` are reached. max_iterations: int Maximum number of useful iterations. method_chain: 'chi_sorted'|'chi_min'|'last' Method used to determine the best parameters from the chain. - 'last' (default) : mean of the last_chain_fraction(%) last parameters of the chain - 'chi_sorted' : mean of the last_chain_fraction(%) best fit parameters of the chain - 'chi_min' : mean of last_chain_fraction(%) of the chain around the min chi last_chain_fraction: int Last Chain fraction (in %) used to compute the best parameters. Defaults to 60. model = DiskModel() see class DiskModel flux_image: string filename of user provided flux image (fits) to be used. UNSUPPORTED chi_stat: 'gaussian' [default] | 'Mighell' | 'Neyman' The chi2 statitics https://heasarc.gsfc.nasa.gov/xanadu/xspec/manual/XSappendixStatistics.html Mighell http://adsabs.harvard.edu/abs/1999ApJ...518..380M Humphrey 2009, http://adsabs.harvard.edu/abs/2009ApJ...693..822H - 'gaussian' (default): Sum (D - M)^2 / e - 'Neyman' Sum (D - M )^2 / max(D,1) - 'Mighell' Sum (D + min(D,1) - M)^2 / (D+1) min_boundaries: ndarray|GalaxyParameters The galaxy parameters will never be less than these values. Will override the default minimum boundaries for the parameters. If any of these values are NaN, they will be replaced by the default ones. max_boundaries: ndarray|GalaxyParameters The galaxy parameters will never be more than these values. Will override the default minimum boundaries for the parameters. If any of these values are NaN, they will be replaced by the default ones. known_parameters: ndarray|GalaxyParameters All set parameters in this array will be skipped in the MCMC, the algorithm will not try to guess them. initial_parameters: ndarray|GalaxyParameters The initial galaxy parameters of the MCMC chain. If None, will use the inital parameters provided by the model. The galaxy parameters not initialized by the model or by this parameter will be set to the mean of the boundaries. min_acceptance_rate: float In %, the acceptance rate is the number of useful iterations divided by the total number of iterations. If it gets below the `failure_rate` treshold, iterations stop. random_scale: float Scale the amplitude of the MCMC sampling by these values. This is an important parameter to adjust for reasonable acceptance rate. The acceptance rate should be around 30-50%. If the acceptance rate is <20-30% (too low), decrease random_scale IF the acceptance rate is >50-60% (too high), increase random_scale verbose: boolean Set to True to output a detailed log of the process. The run is faster when this is left to False. """ # DEVS : If you change the signature above, # remember to update the run() in api.py # Save the parameters (the animation uses them) self.max_iterations = max_iterations self.method = method_chain self.chain_fraction = last_chain_fraction self.percentile = percentile self.error_maps = save_error_maps #save intermediate model maps # Set up the simulation model if model is None: model = DiskModel() #if not isinstance(model, DiskModel): # raise TypeError("The `model=` must inherit `Model`.") self.model = model self.model_dict = model.__dict__ #For backwards compatibility: ## will be removed in the future if self.model.line is None: self.model.line = self.line else: self.line = self.model.line #save attribute #For backwards compatibility: ## will be removed in the future ## if not None, computes Mdyn(Re) if self.model.redshift is not None: self.redshift = self.model.redshift #important: self.model.pixscale = self.cube.xy_step # Sanitize data and set arbitrary big stdev where NaNs are cube_data = self.cube.data cube_data = np.nan_to_num(cube_data) # Set verbosity self._set_verbose(verbose) # Provide the user with some feedback #self.logger.info("Starting deconvolution with the following instrument " # "properties : %s" % self.instrument) self.logger.info("Model: %s", self.model_dict) # Memoize various dimensions and shapes shape = np.shape(cube_data) dim_d = np.size(cube_data) dim_p = np.size(GalaxyParameters()) # In fraction of boundary space, an arbitrarily # small value for closeness to boundaries eps = 0.01 # Compute a flux estimation # fixme: add weighted sum with variance if present self.flux_est = bn.nansum(self.cube.data) if self.flux_est < 0: self.logger.warning( "WARNING: Initial flux (%4.2e) is <0 -- " "likely wrong, will recompute it ignoring <0 values" % self.flux_est ) self.flux_est = np.sum(np.where(self.cube.data > 0, self.cube.data, 0)) self.logger.warning('Initial flux is now %4.2e' % self.flux_est) self.logger.info('TIP: use `initial_parameters=` to set the flux') else: self.logger.info('Initial flux is %4.2e' % self.flux_est) # Default boundaries self.min_boundaries = self.model.min_boundaries(self) self.max_boundaries = self.model.max_boundaries(self) # Merge provided boundaries (if any) with default boundaries if min_boundaries is not None: min_boundaries = np.copy(min_boundaries) merge_where_nan(min_boundaries, self.min_boundaries) #this returns a ndarray self.min_boundaries = GalaxyParameters().from_ndarray(min_boundaries) if max_boundaries is not None: max_boundaries = np.copy(max_boundaries) merge_where_nan(max_boundaries, self.max_boundaries) #this returns a ndarray self.max_boundaries = GalaxyParameters().from_ndarray(max_boundaries) #if abs(self.min_boundaries['maximum_velocity']) != \ # abs(self.max_boundaries['maximum_velocity']): # raise ValueError('For Vmax, please use identical boundaries [Min/Max]') bug_boundaries = self.min_boundaries > self.max_boundaries if bug_boundaries.any(): self.logger.debug("Min Boundaries : %s", self.min_boundaries) self.logger.debug("Max Boundaries : %s", self.max_boundaries) raise ValueError("Boundaries are WRONG, because min > max") # Default initial parameters self.initial_parameters = self.model.initial_parameters(self) # Create initial galaxy parameters using mean and provided values mean_parameters = (self.max_boundaries + self.min_boundaries) / 2. #complete default values with mean parameters merge_where_nan(self.initial_parameters, mean_parameters) # Merge provided initial parameters (if any) with the defaults if initial_parameters is not None: #complete input with default values merge_where_nan(initial_parameters, self.initial_parameters) self.initial_parameters = initial_parameters galaxy = self.initial_parameters #assign self.logger.info("Initial parameters : %s", self.initial_parameters) # By default, try to guess all parameters should_guess_flags = np.ones(dim_p) # 0: we know it / 1: try to guess #if using input image # @fixme; this is unused if self.model_dict['flux_profile'] == 'user': self.flux_map_user = flux_image if known_parameters is None: self.logger.WARNING( "With an input image it is advised to freeze " "the `inclination`, using `known_parameters=`.") # Flag parameters that we manually specified and don't need to guess if known_parameters is not None: if len(known_parameters) < dim_p: raise ValueError("known_parameters must have a length >= %d", dim_p) else: known_parameters = GalaxyParameters() self.known_parameters = known_parameters #fixes some known parameters for idx in range(dim_p): parameter = known_parameters[idx] if not math.isnan(parameter): should_guess_flags[idx] = 0 sign = math.copysign(1., parameter) self.min_boundaries[idx] = parameter * (1 - eps * sign) - eps self.max_boundaries[idx] = parameter * (1 + eps * sign) + eps self.initial_parameters[idx] = parameter # The setup is finished, let's dump some information self.logger.info("Initial parameters : %s", self.initial_parameters) self.logger.info("Min : %s", self.min_boundaries) self.logger.info("Max : %s", self.max_boundaries) ## The actual MCMC ##################################################### # The setup is done, we can now start the MCMC loop. # Tweak the random amplitude vector (Kp coeff, as pid) # that we can document Model.setup_random_amplitude() adequately random_amplitude = np.sqrt( (self.min_boundaries - self.max_boundaries) ** 2 / 12. ) * dim_p / dim_d # Let the model adjust the random amplitude of the parameter jump self.model.setup_random_amplitude(random_amplitude) # Scale MCMC if needed // allowing vectors if random_scale is not None: if np.size(random_scale) != 1: merge_where_nan(random_scale, np.ones_like(random_amplitude)) random_amplitude = random_amplitude * random_scale self.random_scale = random_scale # Zero random amplitude where parameters are known random_amplitude = random_amplitude * should_guess_flags self.random_amplitude = random_amplitude self.logger.info("Random amplitude : %s", str(random_amplitude)) # NotImplemented (yet) fit_2d = None # Chi2 satistics, default if (chi_stat == 'gaussian'): compute_chi = lambda g: \ np.nansum(self._compute_error_gauss(g) ** 2 ) self.Ndegree = (dim_d - dim_p - 1) # degree of freedom # https://heasarc.gsfc.nasa.gov/xanadu/xspec/manual/XSappendixStatistics.html # Mighell http://adsabs.harvard.edu/abs/1999ApJ...518..380M # Humphrey 2009, http://adsabs.harvard.edu/abs/2009ApJ...693..822H # Mighell if (chi_stat == 'Mighell'): compute_chi = lambda g: \ np.nansum(self._compute_error_Mighell(g) ) self.Ndegree = (dim_d - dim_p ) # C-statistique if (chi_stat == 'Cstat'): compute_chi = lambda g: \ np.nansum(self._compute_error_cstat(g) ) self.Ndegree = (dim_d - dim_p ) # Neyman if (chi_stat == 'Neyman'): compute_chi = lambda g: \ np.nansum(self._compute_error_Neyman(g) ) self.Ndegree = (dim_d - dim_p ) # Pearson if (chi_stat == 'Pearson'): compute_chi = lambda g: \ np.nansum(self._compute_error_Pearson(g) ) self.Ndegree =(dim_d - dim_p ) # Some useful vars count_alpha_ok = 0 total_iterations = 0 ki_min = sys.maxsize # that way, it can only go down galaxy = self.initial_parameters galaxy_old = galaxy.copy() galaxy_new = galaxy_old.copy() chi_old = compute_chi(galaxy) self.logger.info("Starting with χ² = %f", chi_old / self.Ndegree) self.logger.info("Starting with χ² = %f", chi_old / self.Ndegree) # Create empty chain with galaxy parameters and reduced chi names_param = [] names_param.extend(GalaxyParameters.names) names_param.append('reduced_chi') # names_param = [ # 'x', 'y', 'z', 'flux', 'radius', 'inclination', 'pa', # 'turnover_radius', 'maximum_velocity', 'velocity_dispersion', # 'reduced_chi', # ] # types_param = ['float32' for i in range(dim_p + 1)] #dt = zip(names_param, types_param) #chain = np.zeros(max_iterations, dtype=dt) # intensive operation ! # chain = Table(names=names_param,dtype=types_param) chain_rows = [] rate = 0. while (total_iterations < max_iterations * 20 or rate > min_acceptance_rate) and (count_alpha_ok < max_iterations): # Update loop conditions total_iterations += 1 rate = count_alpha_ok * 100. / total_iterations # Cauchy jumping random_uniforms = np.random.uniform(-np.pi / 2., np.pi / 2., size=dim_p) galaxy_new = galaxy_old + random_amplitude * np.tan(random_uniforms) # Sanitize the parameters self.model.sanitize_parameters(galaxy_new) # to deal with circularity # Check that parameters are not too close to the boundaries too_close_to_max = np.abs((galaxy_new - self.max_boundaries) / self.max_boundaries) < eps too_close_to_min = np.abs((galaxy_new - self.min_boundaries) / self.min_boundaries) < eps too_close_to_boundaries = too_close_to_min + too_close_to_max # Make sure we're inside the preset boundaries in_boundaries = (galaxy_new >= self.min_boundaries) * (galaxy_new <= self.max_boundaries) if in_boundaries.all(): # Computing convolved model with new parameters chi_new = compute_chi(galaxy_new) if chi_new<1e-3: #print chi_new,galaxy_new self.logger.error('Chi2 is 0, quit') exit() # Compute ratio of likelihood a posteriori # exp( old-new / 2 ) likelihood = safe_exp(-0.5 * (chi_new - chi_old)) # likelihood > 1 => new is better # likelihood < 1 => accept or reject alpha = np.random.rand() # Conservation test if alpha < likelihood: count_alpha_ok += 1 # Save minimum if ki_min > chi_new: ki_min = chi_new count_min = count_alpha_ok galaxy_old = galaxy_new chi_old = chi_new # vect = np.append(galaxy_new, chi_new / self.Ndegree) #chain[count_alpha_ok - 1] = np.float32(vect) # chain.add_row(vect) chain_rows.append(list(galaxy_new) + [chi_new / self.Ndegree]) if verbose: info = "{count:5d} MIN={count_min:5d} {rate:2.0f}% " \ "χ²={ki:3.6f}>{ki_min:3.6f} {params:s}" info = info.format( count=count_alpha_ok, count_min=count_min, ki=chi_new / self.Ndegree, ki_min=ki_min / self.Ndegree, rate=rate, params=galaxy_new.colored_info(too_close_to_boundaries) ) if self.redshift is not None: mdyn = self.model._compute_Mdyn_at_Rhalf(galaxy_new.radius, galaxy_new.rv, galaxy_new.max_vel, self.cube.xy_step) info += "mdyn={mass:4.2e}" info = info.format(mass=mdyn) print(info) chain = Table(names=names_param, rows=chain_rows) # Raise if no useful iteration was run if rate == 0.: self.logger.debug("Last Galaxy Parameters : %s", galaxy_new) raise RuntimeError("No useful iteration was run. " "Try with higher max_iterations?") # Sanitize the chain self.model.sanitize_chain(chain) # Report self.logger.info("Iterations report : %d Total, %d OK, %d%% Rate", total_iterations, count_alpha_ok, rate) self.logger.info("Storing results as parameters...") # Acceptance Rate self.logger.info("self.acceptance_rate : useful iterations count / total iterations count") self.acceptance_rate = rate # Store chain self.logger.info("self.chain : full Markov chain") good_idx = np.where(chain['reduced_chi']!=0) self.chain = Table(chain[good_idx]) # Store PSF 3D, which may not be defined try: self.psf3d = HyperCube(self.instrument.psf3d) except AttributeError: pass # Extract Galaxy Parameters from chain, and store them self.logger.info("Extracting best parameters (medians) from chain") self.best_parameters_from_chain(method_chain, last_fraction=last_chain_fraction, percentile=percentile) # Create output cubes self.logger.info("self.convolved_cube : simulated convolved cube from found galaxy parameters") self.convolved_cube = self.create_convolved_cube(self.galaxy, shape) self.convolved_cube.header = self.cube.header self.logger.info("self.deconvolved_cube : deconvolved cube from found galaxy parameters") self.deconvolved_cube = self.create_clean_cube(self.galaxy, shape, final=True) self.deconvolved_cube.header = self.cube.header self.logger.info("self.residuals_cube : diff between actual data and convolved cube, scaled by stdev margin") self.residuals_cube = (self.cube - self.convolved_cube) / self.error_cube # * np.mean(variance_cube) self.residuals_cube.header = self.cube.header #compute observed maps _ = self._make_moment_maps(self.convolved_cube, mask=True) # make moment maps with cube convolved with 3DPSF # Average of residuals, normalized to sigma_mu nz = self.residuals_cube.shape[0] self.residuals_map = (self.residuals_cube.data.sum(0) / nz) * np.sqrt(nz) # Compute the χ² ki_reduced = compute_chi(self.galaxy) / (dim_d - dim_p - 1) self.logger.info("χ² at best parameters: %f", ki_reduced) self.best_chisq = np.min(self.chain['reduced_chi']) self.logger.info("Best χ², %f ", self.best_chisq) # Show a plot of the chain if verbose, to draw attention to the chain if verbose: # Sometimes, when the number of iterations is low, plotting fails. # It is a complex issue with matplotlib, so we're 'try'-wrapping it. try: self.plot_mcmc(adapt_range='5stdev') except: pass try: self.plot_geweke() except: pass return self.galaxy
def best_parameters_from_chain(self, method_chain, last_fraction, percentile): """ Computes best fit galaxy parameters from chain, using medians from a specified method_chain. method_chain: string The method to use to extract the fittest parameters from the chain. 'last' (default) : mean of the last_fraction(%) last parameters of the chain 'chi_sorted' : mean of the last_fraction(%) best fit parameters of the chain 'chi_min' : mean of last_fraction(%) of the chain around the min chi last_fraction: float % Fraction of the end of the chain used in determining the parameters. percentile: float % (None as default) None: the method to use to compute the errors on the parameter is the standard deviation of the median float: the percentile (the 68th, or 95th percentile) to be used for the errors on the parameters #fixme: in which case returns the lower and upper values. Returns the galaxy parameters and the stdev """ self.method = method_chain self.chain_fraction = last_fraction self.percentile = percentile if self.chain is None: raise RuntimeError("No chain! Run .run_mcmc() first.") # Data correction for Vmax #vmax_sign = (self.chain['maximum_velocity'] < 0) #pa_correction = np.where(vmax_sign, self.chain['pa'] + 180., self.chain['pa']) #pa_correction = np.where(pa_correction > 180, pa_correction - 360, pa_correction) #self.chain['pa'] = pa_correction #self.chain['maximum_velocity'] = np.abs(self.chain['maximum_velocity']) cols = [ Column(data=np.cos(np.radians(self.chain['pa'])), name='cospa'), Column(data=np.cos(np.radians(self.chain['pa']*2)),name='cos2pa'), Column(data=np.sin(np.radians(self.chain['pa'])), name='sinpa'), Column(data=np.sin(np.radians(self.chain['pa']*2)),name='sin2pa') ] chain_full = self.chain.copy() chain_full.add_columns(list(cols)) #extract subchain chain_size = np.size(chain_full) n = int(chain_size * last_fraction / 100.) # number of samples (last_fraction(%) of total) idx = chain_full.argsort('reduced_chi') self.chain.idxsorted = idx if method_chain == 'chi_min': min_chi_index = self._get_min_chi_index() sub_chain = chain_full[min_chi_index - n // 2: min_chi_index + n // 2] self.chain.xmin = np.max([0, min_chi_index - n // 2]) self.chain.xmax = np.min([chain_size, min_chi_index + n // 2]) elif method_chain == 'last': sub_chain = chain_full[-n:] self.chain.xmin = chain_size - n self.chain.xmax = chain_size elif method_chain == 'chi_sorted': chain_full.sort('reduced_chi') sub_chain = chain_full[idx][:n] sub_idx = idx[:n] self.chain.xmin = 0 self.chain.xmax = n else: raise ValueError("Unsupported chain mean `method_chain` '%s'" % method_chain) #compute error model maps if self.error_maps: if method_chain == 'chi_min': tmp_f = np.array(self.chain_flux_map)[min_chi_index - n // 2: min_chi_index + n // 2] tmp_v = np.array(self.chain_velocity_map)[min_chi_index - n // 2: min_chi_index + n // 2] tmp_s = np.array(self.chain_dispersion_map)[min_chi_index - n // 2: min_chi_index + n // 2] elif method_chain == 'last': tmp_f = np.array(self.chain_flux_map)[-n:] tmp_v = np.array(self.chain_velocity_map)[-n:] tmp_s = np.array(self.chain_dispersion_map)[-n:] elif method_chain == 'chi_sorted': tmp_f = np.array(self.chain_flux_map)[sub_idx] tmp_v = np.array(self.chain_velocity_map)[sub_idx] tmp_s = np.array(self.chain_dispersion_map)[sub_idx] self.true_flux_map_error = HyperCube( np.percentile(tmp_f, 50 + percentile/2., axis=0) - np.percentile(tmp_f, 50 - percentile/2., axis=0) ) self.true_velocity_map_error = HyperCube( np.percentile(tmp_v, 50 + percentile/2., axis=0) - np.percentile(tmp_v, 50 - percentile/2., axis=0) ) self.true_disp_map_error = HyperCube( np.percentile(tmp_s, 50 + percentile/2., axis=0) - np.percentile(tmp_s, 50 - percentile/2., axis=0) ) # Compute best_parameters parameter_names = GalaxyParameters.names tmp_chain = np.array(sub_chain[parameter_names].as_array().tolist()) #convert astropy Table into array best_parameter = np.median(tmp_chain, axis=0) # Compute errors to parameters sigma_parameter = np.std(tmp_chain, axis=0) #handle PA edges at +/-180 pa_idx = sub_chain.index_column('pa') #following https://ncss-wpengine.netdna-ssl.com/wp-content/themes/ncss/pdf/Procedures/NCSS/Circular_Data_Analysis.pdf cos1=np.median(sub_chain['cospa']) sin1=np.median(sub_chain['sinpa']) invtan = np.arctan2(sin1,cos1) pa_circular_best = np.degrees(invtan) best_parameter[pa_idx] = pa_circular_best r1 = np.sqrt(sub_chain['cospa'].sum()**2+sub_chain['sinpa'].sum()**2) r1 = r1/np.size(sub_chain) pa_circular_std = np.sqrt(-2.*np.log(r1)) cos2=np.median(sub_chain['cos2pa']) sin2=np.median(sub_chain['sin2pa']) #r2 = np.sqrt(sub_chain['cos2pa']**2+sub_chain['sin2pa']**2) #r2 = np.median(r2) invtan2= np.arctan2(sin2,cos2) angle2= np.degrees(invtan2) ## this is?? #pa_circular_dispersion = (1-angle2)/(2*r1**2) #print "pa disp %.3f " % (pa_circular_dispersion) #center chain['pa'] sub_chain_pa = sub_chain['pa'] - best_parameter[pa_idx] sub_chain_pa = np.where(sub_chain_pa > 180, sub_chain_pa - 360, sub_chain_pa) sub_chain_pa = np.where(sub_chain_pa < -180, sub_chain_pa + 360, sub_chain_pa) std_pa = np.std(sub_chain_pa) #This is identical to pa_circular_std #print "pa sigma %.3f" % (std_pa) #print "pa stdev %.3f " % (np.degrees(pa_circular_std)) sigma_parameter[pa_idx] = std_pa sub_chain['pa'] = sub_chain_pa + best_parameter[pa_idx] self.sub_chain = sub_chain[parameter_names] #will be used for correlation plots #Save self.galaxy = GalaxyParameters.from_ndarray(best_parameter) self.galaxy.stdev = GalaxyParametersError.from_ndarray(sigma_parameter) self.stdev = self.galaxy.stdev # Compute percentiles if percentile is not None: self.logger.info('Setting %2d percentiles ' % (percentile)) error_parameter_upper = np.percentile(self.sub_chain.as_array().tolist(), 50. + percentile/2., axis=0) error_parameter_lower = np.percentile(self.sub_chain.as_array().tolist(), 50. - percentile/2., axis=0) self.galaxy.upper = GalaxyParametersError.from_ndarray(error_parameter_upper) self.galaxy.lower = GalaxyParametersError.from_ndarray(error_parameter_lower) self.galaxy.ICpercentile = percentile # Store galaxy parameters and stdev self.logger.info("self.galaxy : fittest parameters : %s", repr(self.galaxy)) self.logger.info("self.stdev : parameters stdev : %s", str(self.stdev)) return None
[docs] def create_clean_cube(self, galaxy, shape, final=False): """ Creates a cube containing a clean simulation of a galaxy according to the provided model. galaxy: GalaxyParameters The parameters upon which the simulated galaxy will be built. shape: Tuple of 3 The 3D (z, y, x) shape of the resulting cube. Eg: (21, 21, 21) Returns a HyperspectralCube """ # Create radial velocities and radial dispersions #flux_cube, vz, vz_map, s_map, sig_map, sigz_disk_map, sig_intr = \ # self.model._compute_galaxy_model(galaxy, shape) ## normalize to real flux #flux_map = flux_cube.sum(0) #flux_map = galaxy.flux * flux_map / flux_map.sum() flux_map, vz_map, s_map = self.model._create_maps(galaxy, shape) if self.error_maps: self.chain_flux_map.append(flux_map) self.chain_velocity_map.append(vz_map) self.chain_dispersion_map.append(s_map) # This is too expensive ! We create Cubes on each iteration... # There are ways to optimize this, as only the last one is used. if final is True: self.true_flux_map = HyperCube(flux_map) self.true_velocity_map = HyperCube(vz_map) self.true_disp_map = HyperCube(s_map) modelcube = self.model._create_cube(shape, flux_map, vz_map, s_map, self.instrument.z_step_kms, zo=galaxy.z) #is this used? modelcube.xy_step = self.cube.xy_step modelcube.z_step = self.cube.z_step modelcube.z_central = self.cube.z_central return modelcube
[docs] def create_convolved_cube(self, galaxy, shape): """ Creates a cube containing a convolved simulation of a galaxy according to the provided model. The convolution is done by the instrument you provided upon instantiation of this class. galaxy: GalaxyParameters The parameters upon which the simulated galaxy will be built. shape: Tuple of 3 The 3D (Z, Y, X) shape of the resulting cube. Eg: (21, 21, 21) Returns a HyperspectralCube """ clean_cube = self.create_clean_cube(galaxy, shape) return self.instrument.convolve(clean_cube)
def import_chain(self, filepath, compute_best_params=False): """ Imports the chain stored in a .dat file so that you may plot. compute_best_parameters False[default] | True if True will use 'last' method and 60% use best_parameters_from_chain method to customize """ with open(filepath, 'r') as chain_data: self.chain = asciitable.read(chain_data.read(), Reader=asciitable.FixedWidth) if compute_best_params is True: self.best_parameters_from_chain(method_chain='last', last_fraction=60, percentile=95) NO_CHAIN_ERROR = "No chain to plot! Run .run_mcmc() or .import_chain() " \ "first."
[docs] def save(self, name, overwrite=False): """ Saves the results of the MCMC to files : - <name>_galaxy_parameters.txt A plain text representation of the parameters of the galaxy. - <name>_galaxy_parameters.dat An asciitable representation of the parameters of the galaxy. - <name>_chain.dat An asciitable representation of the Markov Chain. Each line holds one set of galaxy parameters and its associated reduced chi. - <name>_run_parameters.txt A plain text representation of the run_parameters. - <name>_instrument.txt A plain text representation of the instrument parameters. - <name>_convolved_cube.fits A FITS file containing the PSF-convolved result cube. - <name>_deconvolved_cube.fits A FITS file containing the pre-convolution clean cube. - <name>_residuals_cube.fits A FITS file containing the diff between input data and simulation. - <name>_true_flux_map.fits A FITS file containing the true flux map [intrinsic] - <name>_true_vel_map.fits A FITS file containing the true velocity map [intrinsic] - <name>_true_sig_map.fits A FITS file containing the true dispersion map [intrinsic] - <name>_obs_flux_map.fits A FITS file containing the observed flux map [intrinsic] - <name>_obs_vel_map.fits A FITS file containing the observed velocity map [intrinsic] - <name>_obs_sig_map.fits A FITS file containing the observed dispersion map [intrinsic] - <name>_images.png A PNG image generated by the ``plot_images`` method. Note: the overwrite option is always true for this file. - <name>_mcmc.png A PNG image generated by the ``plot_mcmc`` method. Note: the overwrite option is always true for this file. - <name>_correlations.png A PNG image generated by the ``plot_correlations`` method. Note: the overwrite option is always true for this file. - <name>true_maps.png A PNG image generated by the ``plot_true_vfield`` method. Note: the overwrite option is always true for this file. The .dat files can be easily read using asciitable and its ``asciitable.FixedWidth`` Reader : :: asciitable.read('example.chain.dat', Reader=asciitable.FixedWidth) .. warning:: The generated files are not compressed and may take up a lot of disk space. name: string An absolute or relative name that will be used as prefix for the save files. Eg: 'my_run', or '/home/me/science/my_run'. overwrite: bool When set to true, will OVERWRITE existing files. """ if self.chain is None: raise RuntimeError("Nothing to save! Run .run_mcmc() first.") filename = '%s_galaxy_parameters.txt' % name self._save_to_file(filename, self.galaxy.long_info(), overwrite) filename = '%s_galaxy_parameters.dat' % name self._save_to_file(filename, self.galaxy.structured_info(), overwrite) filename = '%s_chain.dat' % name self._save_to_file(filename, self._chain_as_asciitable(), overwrite) filename = '%s_run_parameters.txt' % name self._save_to_file(filename, self.__str__(), overwrite) filename = '%s_instrument.txt' % name self._save_to_file(filename, self.instrument.__str__(), overwrite) filename = '%s_convolved_cube.fits' % name self.convolved_cube.write_to(filename, overwrite) filename = '%s_deconvolved_cube.fits' % name self.deconvolved_cube.write_to(filename, overwrite) filename = '%s_residuals_cube.fits' % name self.residuals_cube.write_to(filename, overwrite) filename = '%s_3Dkernel.fits' % name self.psf3d.write_to(filename, overwrite) filename = '%s_obs_flux_map.fits' % name self.obs_flux_map.write_to(filename, overwrite) filename = '%s_obs_vel_map.fits' % name self.obs_velocity_map.write_to(filename, overwrite) filename = '%s_obs_disp_map.fits' % name self.obs_disp_map.write_to(filename, overwrite) filename = '%s_true_flux_map.fits' % name self.true_flux_map.write_to(filename, overwrite) filename = '%s_true_vel_map.fits' % name self.true_velocity_map.write_to(filename, overwrite) filename = '%s_true_disp_map.fits' % name self.true_disp_map.write_to(filename, overwrite) #for quick display only filename = '%s_true_maps' % name self.plot_true_vfield(filename + '.png') self.plot_true_vfield(filename + '.pdf') filename = '%s_obs_maps' % name self.plot_obs_vfield(filename + '.png') self.plot_obs_vfield(filename + '.pdf') if self.error_maps: filename = '%s_true_flux_map_error.fits' % name self.true_flux_map_error.write_to(filename, overwrite) filename = '%s_true_vel_map_error.fits' % name self.true_velocity_map_error.write_to(filename, overwrite) filename = '%s_true_disp_map_error.fits' % name self.true_disp_map_error.write_to(filename, overwrite) filename = '%s_images' % name self.plot_images(filename + '.png') self.plot_images(filename + '.pdf') filename = '%s_mcmc' % name self.plot_mcmc(filename + '.png') self.plot_mcmc(filename + '.pdf') filename = '%s_correlations' % name self.plot_correlations(filename + '.png') self.plot_correlations(filename + '.pdf') try: filename = '%s_corner' % name corner=self.plot_corner(filename + '.png',nsigma=4) _ = self.plot_corner(filename + '.pdf',nsigma=4) except: self.logger.warning("plot corner failed ") filename = '%s_geweke' % name self.plot_geweke(filename + '.png') self.plot_geweke(filename + '.pdf') self.logger.info("Saved files in %s" % os.getcwd())
#fixme: to do # def read_files(self, name): def __str__(self): """ Return information about this run in a multiline string. """ return """ galpak_version = %s instrument = %s iterations = %s parameters method = %s, chain_fraction: %s, CI percentile: %s, Model: %s min_boundaries = %s max_boundaries = %s known_parameters = %s initial_parameters = %s random_scale = %s final_parameters = \n %s best_chi2 = %s acceptance_rate = %s """ % ( self.version, self.instrument, self.max_iterations, self.method, self.chain_fraction, self.percentile, self.model, self.min_boundaries, self.max_boundaries, self.known_parameters, self.initial_parameters, self.random_scale, self.galaxy.structured_info(), self.best_chisq, self.acceptance_rate ) def _chain_as_asciitable(self): """ Exports the chain as an `asciitable`. See the public API `import_chain()` for the reverse operation of loading the chain from an `asciitable` file. """ out = StringStdOut() asciitable.write(self.chain, output=out, Writer=asciitable.FixedWidth, names=self.chain.dtype.names) return out.content def _save_to_file(self, filename, contents, clobber): if not clobber and os.path.isfile(filename): raise IOError("The file '%s' already exists. Specify clobber=True to overwrite it.") with open(filename, 'w') as f: f.write(contents) def _read_file(self, filename): with open(filename, 'r') as f: contents=f.read() return contents def _get_min_chi_index(self): """ Gets the index in the chain of the parameters with the minimal chi. """ if self.chain is None: raise RuntimeError("No chain! Run `run_mcmc()` first.") idx = self.chain.idxsorted[0] return idx def _compute_error_gauss(self, galaxy): """ It computes the difference between the measured cube and the computed cube from given galaxy parameters. returns ( D - M ) / stdev """ # Compute convolved cube for given galaxy parameters cube_convolved = self.create_convolved_cube(galaxy, self.cube.shape) # Diff. between measured cube and convolved cube, scaled by the error difference = (self.cube - cube_convolved) / self.error_cube return np.ndarray.flatten(difference.data) def _compute_error_Mighell(self, galaxy): """ Modified Statistic Mighell 1998 Sum ( D + min(1,D) - M)^2 / D + 1 """ # Compute convolved cube for given galaxy parameters cube_convolved = self.create_convolved_cube(galaxy, self.cube.shape) tmp_cube = HyperCube(np.where(self.cube.data >= 1, 1., self.cube.data)) difference = (self.cube + tmp_cube - cube_convolved ) **2 \ / (self.cube + 1.0) return np.ndarray.flatten(difference.data) def _compute_error_Neyman(self, galaxy): """ Modified Neyman statistic Humphrey 2009, http://adsabs.harvard.edu/abs/2009ApJ...693..822H Sum ( M - D )^2 / max(D,1) """ # Compute convolved cube for given galaxy parameters cube_convolved = self.create_convolved_cube(galaxy, self.cube.shape) tmp_cube = HyperCube(np.where(self.cube.data <= 1, 1., self.cube.data)) difference = (cube_convolved - self.cube)**2 / tmp_cube return np.ndarray.flatten(difference.data) def _compute_error_Pearson(self, galaxy): """ Pearson statistic Humphrey 2009, http://adsabs.harvard.edu/abs/2009ApJ...693..822H Sum ( M - D )^2 / M """ # Compute convolved cube for given galaxy parameters cube_convolved = self.create_convolved_cube(galaxy, self.cube.shape) difference = (cube_convolved - self.cube)**2 / (cube_convolved) return np.ndarray.flatten(difference.data) def _compute_error_cstat(self, galaxy): """ Cash statistique for Poisson noise Humphrey 2009, http://adsabs.harvard.edu/abs/2009ApJ...693..822H Sum ( M - D + D * log(D/M) ) """ # Compute convolved cube for given galaxy parameters cube_convolved = self.create_convolved_cube(galaxy, self.cube.shape) tmp_cube = np.log(self.cube.data/cube_convolved) difference = (cube_convolved - self.cube) + self.cube * tmp_cube return np.ndarray.flatten(difference.data) ############################################# # # Private methods, modeling # ############################################# def _set_verbose(self, verbose): """ Update the logger's status """ if verbose is True: self.logger.disabled = False self.model.logger.disabled = False np.seterr(all='warn') else: self.logger.disabled = True self.model.logger.disabled = True np.seterr(all='ignore')