Source code for spaceKLIP.expjumpramp

from __future__ import division

from jwst.stpipe import Step
from jwst import datamodels
from jwst.datamodels import dqflags, RampModel
from stcal.ramp_fitting.utils import set_if_total_ramp
from jwst.ramp_fitting.ramp_fit_step import create_image_model, create_integration_model

import astropy.io.fits as fits
import numpy as np
from scipy import special
import warnings

from multiprocessing import Pool

[docs] class ExperimentalJumpRampStep(Step): """ Experimental step based on Tim Brandt's jump detection and ramp fitting algorithm. """ class_alias = "experimental_jumpramp" spec = """ use = boolean(default=False) # Use regular pipeline by default nproc = integer(default=4) # Number of processes to use noise_scale = float(default=1) #Scale factor for noise estimate nan_dnu = boolean(default=True) #Set do not use pixels to NaN? """
[docs] def process(self, input): """ Perform jump detection and ramp fitting Parameters ---------- input : JWST pipeline input Appropriate input to JWST pipeline step, e.g. fits file name or preloaded JWST datamodel. Returns ------- img_model : JWST datamodel JWST datamodel equivalent to a "rate.fits" file. int_model : JWST datamodel JWST datamodel equivalent to a "rateints.fits" file. """ with datamodels.RampModel(input) as input_model: # Run the jump detection and ramp fitting ramp_results = self.produce_ramp_images(input_model, self.noise_scale) # Update DQ arrays with jumps input_model = self.update_groupdq(input_model, ramp_results['flagged_diffs']) # Convert DQ arrays to 3D dq_ints = self.create_dq_ints(input_model) # Want to set do not use pixels to NaN if self.nan_dnu: ramp_results = self.set_dnu_to_nan(ramp_results, dq_ints) # Define integration info integ_info = (ramp_results['sci'], dq_ints, ramp_results['var_poisson'], ramp_results['var_rdnoise'], ramp_results['err']) # Create integration model int_model = create_integration_model(input_model, integ_info, input_model.int_times) int_model.meta.bunit_data = 'DN/s' int_model.meta.bunit_err = 'DN/s' int_model.meta.cal_step.jump = 'COMPLETE' int_model.meta.cal_step.ramp_fit = 'COMPLETE' # Define image info im_sci, im_vpo, im_vrd, im_err = self.weighted_average(ramp_results['sci'], ramp_results['var_poisson'], ramp_results['var_rdnoise'], ramp_results['err']) image_info = (im_sci, input_model.pixeldq, im_vpo, im_vrd, im_err) # Create image model img_model = create_image_model(input_model, image_info) img_model.meta.bunit_data = 'DN/s' img_model.meta.bunit_err = 'DN/s' img_model.meta.cal_step.jump = 'COMPLETE' img_model.meta.cal_step.ramp_fit = 'COMPLETE' return img_model, int_model
[docs] def produce_ramp_images(self, datamodel, noise_scale): """ Function to assemble relevant data and run jump detection and ramp fitting. Parameters ---------- datamodel : JWST datamodel JWST ramp datamodel noise_scale : float Scale factor to apply to the readnoise Returns ------- results : dict Dictionary containing equivalents of the integration level 'SCI', 'ERR', 'VAR_POISSON', and 'VAR_RDNOISE' arrays. """ # Assemble covariance and differenced frames. C = self.create_covar(datamodel) diffs = self.create_diffs(datamodel, C) # Grab reference files and create uncertainties gain = self.get_subarray_ref(datamodel, 'gain') rn = self.get_subarray_ref(datamodel, 'readnoise') sig = noise_scale*gain*rn/2**0.5 # Need to multiply data by gain for this method, as it works in electrons # Must undo this at the end!!! diffs *= gain # Get some values ncols = datamodel.meta.subarray.xsize nrows = datamodel.meta.subarray.ysize if '-seg' in datamodel.meta.filename: # This is a multi-segment dataset int_s = datamodel.meta.exposure.integration_start int_e = datamodel.meta.exposure.integration_end nints = (int_e - int_s) + 1 else: # This is a single file dataset nints = datamodel.meta.exposure.nints # Define arrays to save the results to rate_ints = np.empty([nints, nrows, ncols]) uncert_ints = np.empty([nints, nrows, ncols]) var_po_ints = np.empty([nints, nrows, ncols]) var_rd_ints = np.empty([nints, nrows, ncols]) # Define array to flag which diff frames to use all_diffs2use = self.create_alldiffs2use(datamodel, diffs.shape) # Loop over each integration, run one ramp at a time for int_i in range(nints): print('--> Fitting Integration {}/{}...'.format(int_i+1, nints)) # Get the diffs for this integration int_diffs = diffs[int_i] # Get array to save which differences we will use int_diffs2use = all_diffs2use[int_i] # Now loop over each column using multiprocessing colrange = range(ncols) with Pool(processes=self.nproc) as pool: results = pool.map(jumpramp_column_helper, [(i, int_diffs, C, sig, int_diffs2use) for i in colrange]) pool.close() pool.join() # Unpack results into the arrays for i, res in enumerate(results): res_rate, res_uncert, res_poisson, res_rdnoise, int_diffs2use = res rate_ints[int_i, :, i] = res_rate uncert_ints[int_i, :, i] = res_uncert var_po_ints[int_i, :, i] = res_poisson var_rd_ints[int_i, :, i] = res_rdnoise all_diffs2use[int_i, :, :, i] = int_diffs2use # Explictly remove the gain scaling rate_ints /= gain uncert_ints /= gain var_po_ints /= gain**2 var_rd_ints /= gain**2 results = {'sci':rate_ints, 'err':uncert_ints, 'var_poisson':var_po_ints, 'var_rdnoise':var_rd_ints, 'flagged_diffs':all_diffs2use} return results
[docs] def create_covar(self, datamodel): """ Create the covariance matrix Parameters ---------- datamodel : JWST datamodel JWST ramp datamodel Returns ------- C : Covar() object Covariance matrix object """ # Get readtimes array dt = datamodel.meta.exposure.frame_time ngroups = datamodel.meta.exposure.ngroups readtimes = dt*(np.arange(ngroups)+1) # Produce covariance C = Covar(readtimes) return C
[docs] def create_diffs(self, datamodel, C): """ Create the differenced frames Parameters ---------- datamodel : JWST datamodel JWST ramp datamodel C : Covar() object Covariance matrix object Returns ------- diffs : ndarray Scaled differenced frames of a given set of ramp images. """ im = datamodel.data # Difference images diffs = im[:,1:,:,:]-im[:,:-1,:,:] # Scale using covariance diffs /= C.delta_t[np.newaxis, :, np.newaxis, np.newaxis] return diffs
[docs] def get_subarray_ref(self, datamodel, filename, trim=True): """ Get a reference file and extract on subarray Parameters ---------- datamodel : JWST datamodel JWST ramp datamodel filename : str File name string for the given reference file, e.g. 'gain' or 'readnoise' trim : bool Boolean for trimming reference file to the subarray of the provided datamodel. Returns ------- subref : ndarray Loaded reference file array. """ file = self.get_reference_file(datamodel, filename) with fits.open(file) as hdul: full = hdul['SCI'].data if trim == True: # Trim full array to subarray for this data xstrt = datamodel.meta.subarray.xstart-1 #SUBSTRT1 ystrt = datamodel.meta.subarray.ystart-1 #SUBSTRT2 xsize = datamodel.meta.subarray.xsize #SUBSIZE1 ysize = datamodel.meta.subarray.ysize #SUBSIZE2 subref = full[ystrt:ystrt+ysize, xstrt:xstrt+xsize] else: subref = full return subref
[docs] def create_alldiffs2use(self, datamodel, shape): """ Parameters ---------- datamodel : JWST datamodel JWST ramp datamodel shape : list Shape of the corresponding diffs array Returns ------- all_diffs2use : ndarray Array flagging which diffs should be used in the jump detection and ramp fitting """ # Make initial diffs2use all_diffs2use = np.ones(shape, dtype=np.uint8) # Want to preflag any do not use pixels dnu = dqflags.pixel['DO_NOT_USE'] grp_dq = datamodel.groupdq.copy() # Don't adjust actual groupdq dnu_loc = np.bitwise_and(grp_dq, dnu) # Locations of do not use pixels per group dnu_mask = np.where(dnu_loc > 0) good_mask = np.where(dnu_loc == 0) grp_dq[dnu_mask] = 0 # These are bad grp_dq[good_mask] = 1 # These are good # Add rather than subtract diffs dq_diffs = grp_dq[:,1:,:,:]+grp_dq[:,:-1,:,:] # By adding, any diffs with a value <2 have at least one DNU pixel, so shouldn't be used. diff_mask = np.where(dq_diffs < 2) # Set the diff mask diffs to zero so that aren't used. all_diffs2use[diff_mask] = 0 return all_diffs2use
[docs] def update_groupdq(self, datamodel, flagged_diffs): """ Update group DQ based on detected jumps Parameters ---------- datamodel : JWST datamodel JWST ramp datamodel flagged_diffs : ndarray Flagged array of size the same as differenced frames, with jumps marked. Returns ------- updated_datamodel : JWST datamodel JWST ramp datamodel with updated groupdq """ # Create array to hold the jump information and define jump flag dq_grp = datamodel.groupdq.copy() jump = dqflags.pixel['JUMP_DET'] # Loop over each integration and create jump array for int_i, integ in enumerate(flagged_diffs): for i, diff in enumerate(integ): # Locate the jumps in the differenced frames, Tim's code sets to 0. jump_check = np.where(diff == 0) # If jump is in differenced frame N, then flag as a jump in real frames N and N+1 this_dq_n = dq_grp[int_i, i] this_dq_np1 = dq_grp[int_i, i+1] this_dq_n[jump_check] = np.bitwise_or(this_dq_n[jump_check], jump) this_dq_np1[jump_check] = np.bitwise_or(this_dq_np1[jump_check], jump) dq_grp[int_i, i] = this_dq_n dq_grp[int_i, i+1] = this_dq_np1 # Apply jump array to groupdq datamodel.groupdq = dq_grp return datamodel
[docs] def create_dq_ints(self, datamodel): """ Turn the 2D and 4D pixel DQ to 3D DQ Parameters ---------- datamodel : JWST datamodel JWST ramp datamodel Returns ------- dqints : ndarray 3D data quality array, corresponding to each integration """ # Get 2D/4D arrays dq_pix_base = datamodel.pixeldq dq_grp = datamodel.groupdq # Define some DQ flags sat = dqflags.pixel['SATURATED'] jump = dqflags.pixel['JUMP_DET'] dnu = dqflags.pixel['DO_NOT_USE'] # Define array to save values to ncols = datamodel.meta.subarray.xsize nrows = datamodel.meta.subarray.ysize if '-seg' in datamodel.meta.filename: # This is a multi-segment dataset int_s = datamodel.meta.exposure.integration_start int_e = datamodel.meta.exposure.integration_end nints = (int_e - int_s) + 1 else: # This is a single file dataset nints = datamodel.meta.exposure.nints dq_ints = np.empty([nints, nrows, ncols], dtype=np.uint32) # Loop over integrations for int_i in range(nints): # Make copy of Pixel DQ dq_pix = dq_pix_base.copy() # This integration DQ dq_grp_int = dq_grp[int_i] # Check Saturated and Do Not Use set_if_total_ramp(dq_pix, dq_grp_int, sat | dnu, dnu) # Assume complete saturation if group 0 saturated dq_grp0_sat = np.bitwise_and(dq_grp_int[0], sat) dq_pix[dq_grp0_sat != 0] = np.bitwise_or(dq_pix[dq_grp0_sat != 0], sat | dnu) # If jump occurs mark the appropriate flag. jump_loc = np.bitwise_and(dq_grp_int, jump) jump_check = np.where(jump_loc.sum(axis=0) > 0) dq_pix[jump_check] = np.bitwise_or(dq_pix[jump_check], jump) # Save to main array dq_ints[int_i] = dq_pix return dq_ints
[docs] def set_dnu_to_nan(self, ramp_results, dq_ints): ''' Set any do not use pixels to NaNs Parameters ---------- results : dict Dictionary containing equivalents of the integration level 'SCI', 'ERR', 'VAR_POISSON', and 'VAR_RDNOISE' arrays. dq_ints : ndarray 3D data quality array Returns ------- results : dict Dictionary containing equivalents of the integration level 'SCI', 'ERR', 'VAR_POISSON', and 'VAR_RDNOISE' arrays. ''' # Define do not use pixel check dnu = dqflags.pixel['DO_NOT_USE'] dnu_loc = np.bitwise_and(dq_ints, dnu) dnu_check = np.where(dnu_loc > 0) # Assign NaNs to arrays ramp_results['sci'][dnu_check] = np.nan ramp_results['var_poisson'][dnu_check] = np.nan ramp_results['var_rdnoise'][dnu_check] = np.nan ramp_results['err'][dnu_check] = np.nan return ramp_results
[docs] def weighted_average(self, sci, var_poisson, var_rdnoise, err): ''' Compute a weighted average of a 3D multi-integration dataset Parameters ---------- sci : ndarray Array of science data var_poisson : ndarray Array of poission variance var_rdnoise : ndarray Array of readnoise variance err : ndarray Array of uncertainties Returns ------- w_av_sci : ndarray Weighted average array of science data w_av_vpo : ndarray Weighted average array of poisson variance w_av_vrd : ndarray Weighted average array of readnoise variance w_av_err : ndarray Weighted average array of uncertainties ''' # Remove NaNs sci = np.nan_to_num(sci) var_poisson = np.nan_to_num(var_poisson) var_rdnoise = np.nan_to_num(var_rdnoise) err = np.nan_to_num(err) # Get normalised weights using variances and normalize variance = var_poisson + var_rdnoise weights = 1 / variance weights /= np.sum(weights, axis=0) # Produce averaged results w_av_sci = np.average(sci, weights=weights, axis=0) w_av_err = np.sqrt(np.average(err**2, weights=weights, axis=0)) w_av_vpo = np.average(var_poisson, weights=weights, axis=0) w_av_vrd = np.average(var_rdnoise, weights=weights, axis=0) return w_av_sci, w_av_vpo, w_av_vrd, w_av_err
[docs] def jumpramp_column_helper(args): """ Function to unpack parameters in multiprocessing Parameters ---------- args : list List of arguments to be passed to the jumpramp calculation Returns ------- result.countrate : ndarray Array of countrates from ramp fitting. result.uncert : ndarray Array of uncertainties from ramp fitting. result.var_poisson : ndarray Array of poisson variance from ramp fitting. result.var_rdnoise : ndarray Array of readnoise from ramp fitting. int_diffs2use : ndarray The integration level differenced frames to be used during ramp fitting. """ i, int_diffs, C, sig, int_diffs2use = args result, int_diffs2use = jumpramp_column(i, int_diffs, C, sig, int_diffs2use) return result.countrate, result.uncert, result.var_poisson, result.var_rdnoise, int_diffs2use
[docs] def jumpramp_column(i, int_diffs, C, sig, int_diffs2use): """ Function to run of jump/ramp on specific column of pixels, assumes an input 4D array of data at the group level. Parameters ---------- i : int Index of the integration to run ramp fitting over. int_diffs : ndarray Array of differenced frames C : Covar() object Covariance matrix object sig : ndarray Uncertainty on the data int_diffs2use : ndarray The integration level differenced frames to be used during ramp fitting. Returns ------- result : Ramp_Result() object Output object from ramp fitting algorithm int_diffs2use : ndarray Updated integration level differenced frames to be used during ramp fitting. """ # Run jump detection int_diffs2use, countrates = mask_jumps(int_diffs[:, :, i], C, sig[:, i], diffs2use=int_diffs2use[:, :, i]) # Run ramp fitting result = fit_ramps(int_diffs[:, :, i], C, sig[:, i], diffs2use=int_diffs2use, countrateguess=countrates*(countrates > 0)) return result, int_diffs2use
[docs] class Covar: """ class Covar holding read and photon noise components of alpha and beta and the time intervals between the resultant midpoints """ def __init__(self, read_times, pedestal=False): """ Compute alpha and beta, the diagonal and off-diagonal elements of the covariance matrix of the resultant differences, and the time intervals between the resultant midpoints. Parameters ---------- 1. readtimes [list of values or lists for the times of reads. If a list of lists, times for reads that are averaged together to produce a resultant.] Optional arguments: 2. pedestal [boolean: does the covariance matrix include the terms for the first resultant? This is needed if fitting for the pedestal (i.e. the reset value). Default False. ] """ mean_t = [] # mean time of the resultant as defined in the paper tau = [] # variance-weighted mean time of the resultant N = [] # Number of reads per resultant for times in read_times: mean_t += [np.mean(times)] if hasattr(times, "__len__"): N += [len(times)] k = np.arange(1, N[-1] + 1) tau += [1/N[-1]**2*np.sum((2*N[-1] + 1 - 2*k)*np.array(times))] else: tau += [times] N += [1] mean_t = np.array(mean_t) tau = np.array(tau) N = np.array(N) delta_t = mean_t[1:] - mean_t[:-1] self.pedestal = pedestal self.delta_t = delta_t self.mean_t = mean_t self.tau = tau self.Nreads = N self.alpha_readnoise = (1/N[:-1] + 1/N[1:])/delta_t**2 self.beta_readnoise = -1/N[1:-1]/(delta_t[1:]*delta_t[:-1]) self.alpha_phnoise = (tau[:-1] + tau[1:] - 2*mean_t[:-1])/delta_t**2 self.beta_phnoise = (mean_t[1:-1] - tau[1:-1])/(delta_t[1:]*delta_t[:-1]) # If we want the reset value we need to include the first # resultant. These are the components of the variance and # covariance for the first resultant. if pedestal: self.alpha_readnoise = np.array([1/N[0]/mean_t[0]**2] + list(self.alpha_readnoise)) self.beta_readnoise = np.array([-1/N[0]/mean_t[0]/delta_t[0]] + list(self.beta_readnoise)) self.alpha_phnoise = np.array([tau[0]/mean_t[0]**2] + list(self.alpha_phnoise)) self.beta_phnoise = np.array([(mean_t[0] - tau[0])/mean_t[0]/delta_t[0]] + list(self.beta_phnoise))
[docs] def calc_bias(self, countrates, sig, cvec, da=1e-7): """ Calculate the bias in the best-fit count rate from estimating the covariance matrix. This calculation is derived in the paper. Parameters ---------- 1. countrates [array of count rates at which the bias is desired] 2. sig [float, single read noise] 3. cvec [weight vector on resultant differences for initial estimation of count rate for the covariance matrix] Optional argument: 4. da [float, fraction of the count rate plus sig**2 to use for finite difference estimate of the derivative.] Returns ------- 1. bias [array, bias of the best-fit count rate from using cvec plus the observed resultants to estimate the covariance matrix] """ if self.pedestal: raise ValueError("Cannot compute bias with a Covar class that includes a pedestal fit.") alpha = countrates[np.newaxis, :]*self.alpha_phnoise[:, np.newaxis] alpha += sig**2*self.alpha_readnoise[:, np.newaxis] beta = countrates[np.newaxis, :]*self.beta_phnoise[:, np.newaxis] beta += sig**2*self.beta_readnoise[:, np.newaxis] # we only want the weights; it doesn't matter what the count rates are. n = alpha.shape[0] z = np.zeros((len(cvec), len(countrates))) result_low_a = fit_ramps(z, self, sig, countrateguess=countrates) # try to avoid problems with roundoff error da_incr = da*(countrates[np.newaxis, :] + sig**2) dalpha = da_incr*self.alpha_phnoise[:, np.newaxis] dbeta = da_incr*self.beta_phnoise[:, np.newaxis] result_high_a = fit_ramps(z, self, sig, countrateguess=countrates+da_incr) # finite difference approximation to dw/da dw_da = (result_high_a.weights - result_low_a.weights)/da_incr bias = np.zeros(len(countrates)) c = cvec/np.sum(cvec) for i in range(len(countrates)): C = np.zeros((n, n)) for j in range(n): C[j, j] = alpha[j, i] for j in range(n - 1): C[j + 1, j] = C[j, j + 1] = beta[j, i] bias[i] = np.linalg.multi_dot([c[np.newaxis, :], C, dw_da[:, i]]) sig_a = np.sqrt(np.linalg.multi_dot([c[np.newaxis, :], C, c[:, np.newaxis]])) bias[i] *= 0.5*(1 + special.erf(countrates[i]/sig_a/2**0.5)) return bias
[docs] class Ramp_Result: def __init__(self): self.countrate = None self.chisq = None self.uncert = None self.var_poisson = None self.var_rdnoise = None self.weights = None self.pedestal = None self.uncert_pedestal = None self.covar_countrate_pedestal = None self.countrate_twoomit = None self.chisq_twoomit = None self.uncert_twoomit = None self.countrate_oneomit = None self.jumpval_oneomit = None self.jumpsig_oneomit = None self.chisq_oneomit = None self.uncert_oneomit = None
[docs] def fill_masked_reads(self, diffs2use): """ Replace countrates, uncertainties, and chi squared values that are NaN because resultant differences were doubly omitted. For these cases, revert to the corresponding values in with fewer omitted resultant differences to get the correct values without double-coundint omissions. Parameters ---------- 1. diffs2use [a 2D array matching self.countrate_oneomit in shape with zero for resultant differences that were masked and one for differences that were not masked] This function replaces the relevant entries of self.countrate_twoomit, self.chisq_twoomit, self.uncert_twoomit, self.countrate_oneomit, and self.chisq_oneomit in place. It does not return a value. """ # replace entries that would be nan (from trying to # doubly exclude read differences) with the global fits. omit = diffs2use == 0 ones = np.ones(diffs2use.shape) self.countrate_oneomit[omit] = (self.countrate*ones)[omit] self.chisq_oneomit[omit] = (self.chisq*ones)[omit] self.uncert_oneomit[omit] = (self.uncert*ones)[omit] omit = diffs2use[1:] == 0 self.countrate_twoomit[omit] = (self.countrate_oneomit[:-1])[omit] self.chisq_twoomit[omit] = (self.chisq_oneomit[:-1])[omit] self.uncert_twoomit[omit] = (self.uncert_oneomit[:-1])[omit] omit = diffs2use[:-1] == 0 self.countrate_twoomit[omit] = (self.countrate_oneomit[1:])[omit] self.chisq_twoomit[omit] = (self.chisq_oneomit[1:])[omit] self.uncert_twoomit[omit] = (self.uncert_oneomit[1:])[omit]
[docs] def fit_ramps(diffs, Cov, sig, countrateguess=None, diffs2use=None, detect_jumps=False, resetval=0, resetsig=np.inf, rescale=True, dn_scale=10): """ Function fit_ramps. Fits ramps to read differences using the covariance matrix for the read differences as given by the diagonal elements and the off-diagonal elements. Parameters ---------- 1. diffs [resultant differences, shape (ndiffs, npix)] 2. Cov [class Covar, holds the covariance matrix information] 3. sig [read noise, 1D array, shape (npix)] Optional Arguments: 4. countrateguess [array of shape (npix): count rates to be used to estimate the covariance matrix. Default None, in which case the average difference will be used, replacing negative means with zeros.] 5. diffs2use [shape (ndiffs, npix), boolean mask of whether to use each resultant difference for each pixel. Default None] 6. detect_jumps [run the jump detection machinery leaving out single and consecutive resultant differences. Default False] 7. resetval [float or array of shape (npix): priors on the reset values. Default 0. Irrelevant unless Cov.pedestal is True.] 8. resetsig [float or array of shape (npix): uncertainties on the reset values. Default np.inf, i.e., reset values have flat priors. Irrelevant unless Cov.pedestal is True.] 9. rescale [boolean, scale the covariance matrix internally to avoid possible overflow/underflow problems for long ramps. Slightly increases computational cost. Default True. ] Returns: Class Ramp_Result holding lots of information """ if diffs2use is None: diffs2use = np.ones(diffs.shape, np.uint8) if countrateguess is None: # initial guess for count rate is the average of the unmasked # resultant differences unless otherwise specified. if Cov.pedestal: countrateguess = np.sum((diffs*diffs2use)[1:], axis=0)/np.sum(diffs2use[1:], axis=0) else: countrateguess = np.sum((diffs*diffs2use), axis=0)/np.sum(diffs2use, axis=0) countrateguess *= countrateguess > 0 # Elements of the covariance matrix alpha_phnoise = countrateguess*Cov.alpha_phnoise[:, np.newaxis] alpha_readnoise = sig**2*Cov.alpha_readnoise[:, np.newaxis] alpha = alpha_phnoise + alpha_readnoise beta_phnoise = countrateguess*Cov.beta_phnoise[:, np.newaxis] beta_readnoise = sig**2*Cov.beta_readnoise[:, np.newaxis] beta = beta_phnoise + beta_readnoise # Mask resultant differences that should be ignored. This is half # of what we need to do to mask these resultant differences; the # rest comes later. d = diffs*diffs2use beta = beta*diffs2use[1:]*diffs2use[:-1] ndiffs, npix = alpha.shape # Rescale the covariance matrix to a determinant of 1 to # avoid possible overflow/underflow. The uncertainty and chi # squared value will need to be scaled back later. Note that # theta[-1] is the determinant of the covariance matrix. # # The method below uses the fact that if all alpha and beta # are multiplied by f, theta[i] is multiplied by f**i. Keep # a running track of these factors to construct the scale at # the end, and keep scaling throughout so that we never risk # overflow or underflow. if rescale: theta = np.ones((ndiffs + 1, npix)) theta[1] = alpha[0] scale = theta[0]*1 for i in range(2, ndiffs + 1): theta[i] = alpha[i - 1]/scale*theta[i - 1] - beta[i - 2]**2/scale**2*theta[i - 2] # Scaling every ten steps is safe for alpha up to 1e20 # or so and incurs a negligible computational cost for # the fractional power. if i % int(dn_scale) == 0 or i == ndiffs: f = theta[i]**(1/i) scale *= f theta[i - 1] /= theta[i]/f theta[i - 2] /= theta[i]/f**2 theta[i] = 1 else: scale = 1 alpha /= scale beta /= scale # All definitions and formulas here are in the paper. theta = np.ones((ndiffs + 1, npix)) theta[1] = alpha[0] for i in range(2, ndiffs + 1): theta[i] = alpha[i - 1]*theta[i - 1] - beta[i - 2]**2*theta[i - 2] #print(np.mean(theta[-1]), np.std(theta[-1])) phi = np.ones((ndiffs + 1, npix)) phi[ndiffs - 1] = alpha[ndiffs - 1] for i in range(ndiffs - 2, -1, -1): phi[i] = alpha[i]*phi[i + 1] - beta[i]**2*phi[i + 2] sgn = np.ones((ndiffs, npix)) sgn[::2] = -1 Phi = np.zeros((ndiffs, npix)) for i in range(ndiffs - 2, -1, -1): Phi[i] = Phi[i + 1]*beta[i] + sgn[i + 1]*beta[i]*phi[i + 2] # This one is defined later in the paper and is used for jump # detection and pedestal fitting. PhiD = np.zeros((ndiffs, npix)) for i in range(ndiffs - 2, -1, -1): PhiD[i] = (PhiD[i + 1] + sgn[i + 1]*d[i + 1]*phi[i + 2])*beta[i] Theta = np.zeros((ndiffs, npix)) Theta[0] = -theta[0] for i in range(1, ndiffs): Theta[i] = Theta[i - 1]*beta[i - 1] + sgn[i]*theta[i] ThetaD = np.zeros((ndiffs + 1, npix)) ThetaD[1] = -d[0]*theta[0] for i in range(1, ndiffs): ThetaD[i + 1] = beta[i - 1]*ThetaD[i] + sgn[i]*d[i]*theta[i] beta_extended = np.ones((ndiffs, npix)) beta_extended[1:] = beta # C' and B' in the paper dC = sgn/theta[ndiffs]*(phi[1:]*Theta + theta[:-1]*Phi) dC *= diffs2use dB = sgn/theta[ndiffs]*(phi[1:]*ThetaD[1:] + theta[:-1]*PhiD) # {\cal A}, {\cal B}, {\cal C} in the paper A = 2*np.sum(d*sgn/theta[-1]*beta_extended*phi[1:]*ThetaD[:-1], axis=0) A += np.sum(d**2*theta[:-1]*phi[1:]/theta[ndiffs], axis=0) B = np.sum(d*dC, axis=0) C = np.sum(dC, axis=0) r = Ramp_Result() # Finally, save the best-fit count rate, chi squared, uncertainty # in the count rate, and the weights used to combine the # resultants. if not Cov.pedestal: r.countrate = B/C r.chisq = (A - B**2/C)/scale r.uncert = np.sqrt(scale/C) r.weights = dC/C r.var_poisson = np.sum(r.weights**2*alpha_phnoise, axis=0) r.var_poisson += 2*np.sum(r.weights[1:]*r.weights[:-1]*beta_phnoise, axis=0) r.var_rdnoise = np.sum(r.weights**2*alpha_readnoise, axis=0) r.var_rdnoise += 2*np.sum(r.weights[1:]*r.weights[:-1]*beta_readnoise, axis=0) # If we are computing the pedestal, then we use the other formulas # in the paper. else: dt = Cov.mean_t[0] Cinv_11 = theta[0]*phi[1]/theta[ndiffs] # Calculate the pedestal and slope using the equations in the paper. # Do not compute weights for this case. b = dB[0]*C*dt - B*dC[0]*dt + dt**2*C*resetval/resetsig**2 b /= C*Cinv_11 - dC[0]**2 + dt**2*C/resetsig**2 a = B/C - b*dC[0]/C/dt r.pedestal = b r.countrate = a r.chisq = A + a**2*C + b**2/dt**2*Cinv_11 r.chisq += - 2*b/dt*dB[0] - 2*a*B + 2*a*b/dt*dC[0] r.chisq /= scale # elements of the inverse covariance matrix M = [C, dC[0]/dt, Cinv_11/dt**2 + 1/resetsig**2] detM = M[0]*M[-1] - M[1]**2 r.uncert = np.sqrt(scale*M[-1]/detM) r.uncert_pedestal = np.sqrt(scale*M[0]/detM) r.covar_countrate_pedestal = -scale*M[1]/detM # The code below computes the best chi squared, best-fit slope, # and its uncertainty leaving out each resultant difference in # turn. There are ndiffs possible differences that can be # omitted. # # Then do it omitting two consecutive reads. There are ndiffs-1 # possible pairs of adjacent reads that can be omitted. # # This approach would need to be modified if also fitting the # pedestal, so that condition currently triggers an error. The # modifications would make the equations significantly more # complicated; the matrix equations to be solved by hand would be # larger. if detect_jumps: # The algorithms below do not work if we are computing the # pedestal here. if Cov.pedestal: raise ValueError("Cannot use jump detection algorithm when fitting pedestals.") # Diagonal elements of the inverse covariance matrix Cinv_diag = theta[:-1]*phi[1:]/theta[ndiffs] Cinv_diag *= diffs2use # Off-diagonal elements of the inverse covariance matrix # one spot above and below for the case of two adjacent # differences to be masked Cinv_offdiag = -beta*theta[:-2]*phi[2:]/theta[ndiffs] # Equations in the paper: best-fit a, b # # Catch warnings in case there are masked resultant # differences, since these will be overwritten later. No need # to warn about division by zero here. with warnings.catch_warnings(): warnings.simplefilter('ignore') a = (Cinv_diag*B - dB*dC)/(C*Cinv_diag - dC**2) b = (dB - a*dC)/Cinv_diag r.countrate_oneomit = a r.jumpval_oneomit = b # Use the best-fit a, b to get chi squared r.chisq_oneomit = A + a**2*C - 2*a*B + b**2*Cinv_diag - 2*b*dB + 2*a*b*dC # invert the covariance matrix of a, b to get the uncertainty on a r.uncert_oneomit = np.sqrt(Cinv_diag/(C*Cinv_diag - dC**2)) r.jumpsig_oneomit = np.sqrt(C/(C*Cinv_diag - dC**2)) r.chisq_oneomit /= scale r.uncert_oneomit *= np.sqrt(scale) r.jumpsig_oneomit *= np.sqrt(scale) # Now for two omissions in a row. This is more work. Again, # all equations are in the paper. I first define three # factors that will be used more than once to save a bit of # computational effort. cpj_fac = dC[:-1]**2 - C*Cinv_diag[:-1] cjck_fac = dC[:-1]*dC[1:] - C*Cinv_offdiag bcpj_fac = B*dC[:-1] - dB[:-1]*C with warnings.catch_warnings(): warnings.simplefilter('ignore') # best-fit a, b, c c = (bcpj_fac/cpj_fac - (B*dC[1:] - dB[1:]*C)/cjck_fac) c /= cjck_fac/cpj_fac - (dC[1:]**2 - C*Cinv_diag[1:])/cjck_fac b = (bcpj_fac - c*cjck_fac)/cpj_fac a = (B - b*dC[:-1] - c*dC[1:])/C r.countrate_twoomit = a # best-fit chi squared r.chisq_twoomit = A + a**2*C + b**2*Cinv_diag[:-1] + c**2*Cinv_diag[1:] r.chisq_twoomit -= 2*a*B + 2*b*dB[:-1] + 2*c*dB[1:] r.chisq_twoomit += 2*a*b*dC[:-1] + 2*a*c*dC[1:] + 2*b*c*Cinv_offdiag r.chisq_twoomit /= scale # uncertainty on the slope from inverting the (a, b, c) # covariance matrix fac = Cinv_diag[1:]*Cinv_diag[:-1] - Cinv_offdiag**2 term2 = dC[:-1]*(dC[:-1]*Cinv_diag[1:] - Cinv_offdiag*dC[1:]) term3 = dC[1:]*(dC[:-1]*Cinv_offdiag - Cinv_diag[:-1]*dC[1:]) r.uncert_twoomit = np.sqrt(fac/(C*fac - term2 + term3)) r.uncert_twoomit *= np.sqrt(scale) r.fill_masked_reads(diffs2use) return r
[docs] def mask_jumps(diffs, Cov, sig, threshold_oneomit=20.25, threshold_twoomit=23.8, diffs2use=None): """ Function mask_jumps implements a likelihood-based, iterative jump detection algorithm. Parameters ---------- 1. diffs [resultant differences] 2. Cov [class Covar, holds the covariance matrix information. Must be based on differences alone (i.e. without the pedestal)] 3. sig [read noise, 1D array] Optional arguments: 4. threshold_oneomit [float, minimum chisq improvement to exclude a single resultant difference. Default 20.25, i.e., 4.5 sigma] 5. threshold_twoomit [float, minimum chisq improvement to exclude two sequential resultant differences. Default 23.8, i.e., 4.5 sigma] 6. diffs2use [a 2D array of the same shape as d, one for resultant differences that appear ok and zero for resultant differences flagged as contaminated. These flagged differences will be ignored throughout jump detection, which will only flag additional differences and overwrite the data in this array. Default None] Returns ------- 1. diffs2use [a 2D array of the same shape as d, one for resultant differences that appear ok and zero for resultant differences flagged as contaminated.] 2. countrates [a 1D array of the count rates after masking the pixels and resultants in diffs2use.] """ if Cov.pedestal: raise ValueError("Cannot mask jumps with a Covar class that includes a pedestal fit.") # Force a copy of the input array for more efficient memory access. d = diffs*1 # We can use one-omit searches only where the reads immediately # preceding and following have just one read. If a readout # pattern has more than one read per resultant but significant # gaps between resultants then a one-omit search might still be a # good idea even with multiple-read resultants. oneomit_ok = Cov.Nreads[1:]*Cov.Nreads[:-1] >= 1 oneomit_ok[0] = oneomit_ok[-1] = True # Other than that, we need to omit two. If a resultant has more # than two reads, we need to omit both differences containing it # (one pair of omissions in the differences). twoomit_ok = Cov.Nreads[1:-1] > 1 # This is the array to return: one for resultant differences to # use, zero for resultant differences to ignore. if diffs2use is None: diffs2use = np.ones(d.shape, np.uint8) # We need to estimate the covariance matrix. I'll use the median # here for now to limit problems with the count rate in reads with # jumps (which is what we are looking for) since we'll be using # likelihoods and chi squared; getting the covariance matrix # reasonably close to correct is important. countrateguess = np.median(d, axis=0)[np.newaxis, :] countrateguess *= countrateguess > 0 # boolean arrays to be used later recheck = np.ones(d.shape[1]) == 1 dropped = np.ones(d.shape[1]) == 0 for j in range(d.shape[0]): # No need for indexing on the first pass. if j == 0: result = fit_ramps(d, Cov, sig, countrateguess=countrateguess, diffs2use=diffs2use, detect_jumps=True) # Also save the count rates so that we can use them later # for debiasing. countrate = result.countrate*1. else: result = fit_ramps(d[:, recheck], Cov, sig[recheck], countrateguess=countrateguess[:, recheck], diffs2use=diffs2use[:, recheck], detect_jumps=True) # Chi squared improvements dchisq_two = result.chisq - result.chisq_twoomit dchisq_one = result.chisq - result.chisq_oneomit # We want the largest chi squared difference best_dchisq_one = np.amax(dchisq_one*oneomit_ok[:, np.newaxis], axis=0) best_dchisq_two = np.amax(dchisq_two*twoomit_ok[:, np.newaxis], axis=0) # Is the best improvement from dropping one resultant # difference or two? Two drops will always offer more # improvement than one so penalize them by the respective # thresholds. Then find the chi squared improvement # corresponding to dropping either one or two reads, whichever # is better, if either exceeded the threshold. onedropbetter = (best_dchisq_one - threshold_oneomit > best_dchisq_two - threshold_twoomit) best_dchisq = best_dchisq_one*(best_dchisq_one > threshold_oneomit)*onedropbetter best_dchisq += best_dchisq_two*(best_dchisq_two > threshold_twoomit)*(~onedropbetter) # If nothing exceeded the threshold set the improvement to # NaN so that dchisq==best_dchisq is guaranteed to be False. best_dchisq[best_dchisq == 0] = np.nan # Now make the masks for which resultant difference(s) to # drop, count the number of ramps affected, and drop them. # If no ramps were affected break the loop. dropone = dchisq_one == best_dchisq droptwo = dchisq_two == best_dchisq drop = np.any([np.sum(dropone, axis=0), np.sum(droptwo, axis=0)], axis=0) if np.sum(drop) == 0: break # Store the updated counts with omitted reads new_cts = np.zeros(np.sum(recheck)) i_d1 = np.sum(dropone, axis=0) > 0 new_cts[i_d1] = np.sum(result.countrate_oneomit*dropone, axis=0)[i_d1] i_d2 = np.sum(droptwo, axis=0) > 0 new_cts[i_d2] = np.sum(result.countrate_twoomit*droptwo, axis=0)[i_d2] # zero out count rates with drops and add their new values back in countrate[recheck] *= drop == 0 countrate[recheck] += new_cts # Drop the read (set diffs2use=0) if the boolean array is True. diffs2use[:, recheck] *= ~dropone diffs2use[:-1, recheck] *= ~droptwo diffs2use[1:, recheck] *= ~droptwo # No need to repeat this on the entire ramp, only re-search # ramps that had a resultant difference dropped this time. dropped[:] = False dropped[recheck] = drop recheck[:] = dropped # Do not try to search for bad resultants if we have already # given up on all but one, two, or three resultant differences # in the ramp. If there are only two left we have no way of # choosing which one is "good". If there are three left we # run into trouble in case we need to discard two. recheck[np.sum(diffs2use, axis=0) <= 3] = False return diffs2use, countrate
[docs] def getramps(countrate, sig, readtimes, nramps=10): """ Function getramps: make some synthetic ramps Parameters ---------- 1. countrate [electrons/time] 2. sig [single read read noise, electrons] 3. readtimes [list of values or lists for the times of reads. If a list of lists, times for reads that are averaged together to produce a resultant.] Optional Arguments: 4. nramps [number of ramps desired, default 10] Returns ------- 1. counts [electrons in each read, shape (nreads, nramps)] """ t_last = 0 counts = np.zeros((len(readtimes), nramps)) # resultant values to return counts_total = np.zeros(nramps) # Running total of electrons for k in range(len(readtimes)): t = readtimes[k] counts_last = counts_total*1. counts_resultant = counts_last*0 if hasattr(t, "__len__"): for i in range(len(t)): lam = countrate*(t[i] - t_last) # expected number of electrons counts_total += np.random.poisson(lam, nramps) counts[k] += counts_total/len(t) t_last = t[i] # add read noise counts[k] += np.random.randn(nramps)*sig/np.sqrt(len(t)) else: if t_last is None: t_last = t lam = countrate*(t - t_last) # expected number of electrons counts_total += np.random.poisson(lam, nramps) counts[k] = counts_total t_last = t # add read noise counts[k] += np.random.randn(nramps)*sig return counts