Source code for fiducia.loader

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Mar  8 09:20:37 2019

Utilities for loading DANTE measurement and response function data.

@author: Pawel M. Kozlowski
"""

# python modules
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.sparse as sparse


# custom modules
from fiducia.misc import find_nearest
import fiducia.pltDefaults


# listing all functions declared in this file so that sphinx-automodapi
# correctly documents them and doesn't document imported functions.
__all__ = ["cleanupHeader",
           "loadResponses",
           "loadResponseUncertainty",
           "readDanProcessed",
           "signalsAtTime",
           "signalInt",
           "readDanteData",
           ]


def __responseName__(channelNum):
    r"""
    Convenience function for generating response function file name
    given the DANTE channel number.
    
    Parameters
    ----------
    channelNum: int
        DANTE channel number
    
    Returns
    -------
    fileName: str
        The file name of the response file for channel 'channelNum'
        
    Notes
    -----
    
    See also
    --------
    
    Examples
    --------
    """
    baseName = 'DanteDataS_1_'
    extension = '.dat'
    fileName = baseName + str(channelNum) + extension
    return fileName


def __readResponse__(channelNum, directory):
    r"""
    Read a single DANTE channel response function file given the channel
    number and path to the directory containing the response function files.
    
    Parameters
    ----------
    channelNum: int
        DANTE channel number
        
    directory: str
        Path to channel response function files
        
    Raises
    ------
    Exception
        If file does not exist.
    
    Returns
    -------
        
    Notes
    -----
    
    See also
    --------
    
    Examples
    --------
    """
    try:
        fileName = __responseName__(channelNum)
        responseArr = np.loadtxt(directory + fileName)
        return responseArr
    except IOError:
        # file doesnt exist, return dummy data
        Exception(f"File {fileName} does not exist!")


[docs]def cleanupHeader(dataFrame): r""" Strip whitespace and rename DataFrame headers. Parameters ---------- dataFrame : pandas.core.frame.DataFrame DataFrame to be cleaned. Returns ------- cleanedDataFrame : pandas.core.frame.DataFrame DataFrame with stripped and renamed channel headers. Notes ----- See also -------- Examples -------- """ # stripping whitespace from header names colNames = dataFrame.columns colNamesStripped = [header.strip() for header in colNames] renameStripDict = {colName: colNamesStripped[idx] for idx, colName in enumerate(colNames)} df1 = dataFrame.rename(columns=renameStripDict) # renaming channel headers from e.g. Ch2 to just 2 allChannels = np.arange(18) + 1 renameDict = {} for channel in allChannels: renameDict['Ch' + str(channel)] = channel cleanedDataFrame = df1.rename(columns=renameDict) return cleanedDataFrame
[docs]def loadResponses(channels, fileName, solid=True): r""" Load DANTE measurement data from files given the channels and path to the directory containing the response function files. Returns a dataframe with the data. Parameters ---------- channels: list, numpy.ndarray List or array of relevant channels fileName: str Full path and filename of .csv file containing DANTE respones functions. solid: Bool, optional Includes solid angle in response function value if true. The default is true. Returns ------- responseFrame: pandas.core.frame.DataFrame DataFrame with the response function data for the 'channels' requested Notes ----- See also -------- Examples -------- """ solidAngles = fiducia.misc.solidAngles # loading all the response functions dataFrame = pd.read_csv(fileName) #clean headers cleanedFrame = cleanupHeader(dataFrame) # filtering for channels we care about # for this particular shot colFilter = ['Energy(eV)'] + channels responseFrame = cleanedFrame[colFilter].copy() # convert energy column from strings to floats (if necessary) if type(responseFrame['Energy(eV)'][0]) == str: energyFloats = responseFrame['Energy(eV)'].str.replace(',', '').astype(float) responseFrame.loc[:,'Energy(eV)'] = energyFloats else: energyFloats = responseFrame['Energy(eV)'].astype(float) responseFrame.loc[:,'Energy(eV)'] = energyFloats if solid: for chan in channels: #multiply each element by the corresponding channel solid angle responseFrame.loc[:, chan] *= solidAngles[chan-1] #save metadata that we already include solid angle responseFrame.solid = True return responseFrame
[docs]def loadResponseUncertainty(responseFrame, fileName): r""" Load uncertainty percentages into a DataFrame. Parameters ---------- responseFrame: pandas.core.frame.DataFrame DataFrame to base the respones uncertainty frame on. fileName: str Full path and filename of .csv file containing DANTE response uncertainty percentages functions. Returns ------- responseUncertaintyFrame : pandas.core.frame.DataFrame DataFrame with each column being a channel and each element being the channel's uncertainty percentage. Extended to match the photon energy range in the response frame. Notes ----- See also -------- Examples -------- """ #read raw data channelUncertaintyFrame = pd.read_csv(fileName) #clean headers cleanedFrame = cleanupHeader(channelUncertaintyFrame) # filtering for columns we care about channels = list(responseFrame.columns.values) channels.remove('Energy(eV)') responseUncertaintyFrame = responseFrame.copy() #go through each column and fill each element with corresponding uncertainty for chan in channels: #multiply each responseFrame element by the percent uncertainty responseUncertaintyFrame.loc[:, chan] *= cleanedFrame.loc[0, chan]/100 return responseUncertaintyFrame
[docs]def readDanProcessed(channels, directory): r""" Loads DANTE measurement data from files given the channels and path to the directory containing the reduced and aligned DANTE data. Returns a dataframe with the data. Note that this is *not* for raw data. It is for reading DANTE signals that have already been processed by Dan Barnak's scripts. Parameters ---------- channels: list, numpy.ndarray List or array of relevant channels directory: str Path to channel response function files Returns ------- dataFrame : pandas.core.frame.DataFrame Dataframe of aligned signals from Dan's analysis. Notes ----- See also -------- Examples -------- """ # initialize finder for length of data array for longest channel longestChLen = 0 # load all the arrays into a dict responseDict = {} for channel in channels: responseData = __readResponse__(channel, directory) responseDict[channel] = responseData # getting length of array for longest channel responseLen = len(responseData) if responseLen > longestChLen: longestChLen = responseLen # initialize pandas dataframe indices = np.arange(longestChLen) dataFrame = pd.DataFrame(index=indices) # add index axis label dataFrame.index.name = 'indices' # write channel response data into dataframe for channel in channels: chData = responseDict[channel] # photon energy dataFrame['Time' + str(channel)] = chData[:,0] # channel response dataFrame['Signal' + str(channel)] = chData[:,1] return dataFrame
[docs]def signalsAtTime(time, measurementFrame, channels, plot=False, method="interp"): r""" Get DANTE signals from each channel at a particular time. Default is to return an interpolated value of the signal at the given time. Alternatively, this function can return the nearest value in the signal data array for the given time. Parameters ---------- time: float Time for which we want DANTE signals (in ns). measurementFrame: pandas.core.frame.DataFrame Pandas dataframe containing DANTE measurement data. See readDanteData() and readDanProcessed(). plot: Bool When True, plots DANTE signals vs channel index at a particular time. method: str Either 'nearest' or 'interp'. 'nearest' finds the nearest point in the DANTE signal to the given time. 'interp' returns an interpolated signal value for the given time. Default is 'interp'. Returns ------- signals : numpy.ndarray Dante signals for each channel at a particular time step. Notes ----- See also -------- Examples -------- """ chLen = len(channels) signals = np.zeros(chLen) for idx, channel in enumerate(channels): if method == "nearest": timeIdx, _ = find_nearest(array=measurementFrame['Time' + str(channel)], value=time) signals[idx] = measurementFrame['Signal' + str(channel)][timeIdx] elif method == "interp": signals[idx] = np.interp(x=time, xp=measurementFrame['Time' + str(channel)], fp=measurementFrame['Signal' + str(channel)]) else: raise Exception(f"Method {method} not found!") if plot: plt.scatter(channels, signals) plt.xticks(channels) plt.xlabel('DANTE channel') plt.ylabel('Signal (V)') plt.title(f'DANTE signals @ t = {time} ns') plt.show() return signals
[docs]def signalInt(channels, measurementFrame, tStart, tEnd): r""" Get time-integrated Dante signals for a specified time interval. Used in getting time-integrated spectrum from the unfold. Parameters ---------- measurementFrame: pandas.core.frame.DataFrame Pandas dataframe containing DANTE measurement data. See loadDanteData(). tStart: float Lower bound for time integration. tEnd: float Upper bound for time integration Returns ------- signalInt: numpy.ndarray Time integrated Dante signals for each channel. Notes ----- See also -------- Examples -------- """ from scipy import integrate chLen = len(channels) signalInt = np.zeros(chLen) for idx, channel in enumerate(channels): timeseries = measurementFrame['Time' + str(channel)] chanseries = measurementFrame['Signal' + str(channel)] timeIdx1, _ = find_nearest(array=timeseries, value=tStart) timeIdx2, _ = find_nearest(array=timeseries, value=tEnd) signalInt[idx] = integrate.simps(y=chanseries[timeIdx1:timeIdx2], x=timeseries[timeIdx1:timeIdx2]) return signalInt
[docs]def readDanteData(filePath): r""" Reads Dante .dat file and returns header info and channel signals as two separate pandas dataframes. Parameters ---------- filePath: str Full path to the Dante .dat file. Returns ------- headerFrame: pandas.core.frame.DataFrame Header of Dante data file. This typically include information about the various components used in each Dante channel, such as oscilloscopes, XRDs, etc. dataFrame: pandas.core.frame.DataFrame Dante data. Notes ----- See also -------- Examples -------- """ # reading the entire dante file dataAndHeaderFrame = pd.read_csv(filePath, sep='\t', header=None) # generating dante dataframe header names headerNames1st = [str(num) for num in np.arange(18) + 1] headerNames2nd = [str(num) + ' bkg' for num in np.arange(18) + 1] headerName = headerNames1st + headerNames2nd # replacing dataframe column header names for more intuitive access dataAndHeaderFrame.columns = headerName # splitting into header frame and measurement data frame headerLen = 18 headerFrame = dataAndHeaderFrame[:][:headerLen] dataFrame = dataAndHeaderFrame[:][headerLen:] # replacing row names for header frame indexNamesReplace = {0:'Signal Cable', 1:'Attenuator 1', 2:'Attenuator 2', 3:'Attenuator 3', 4:'Attenuator 4', 5:'Jumper Cable', 6:'XRD SN', 7:'Mirror SN', 8:'Filter 1 SN', 9:'Filter 2 SN', 10:'Filter 3 SN', 11:'Fiducial T', 12:'Scope type', 13:'Full scale Hor time', 14:'#Hor Pts', 15:'Full Scale Vert mV', 16:'HV bias for XRDs', 17:'(unused field)'} headerFrame.rename(index=indexNamesReplace, inplace=True) return headerFrame, dataFrame