Source code for fiducia.rawProcess

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Mar 13 16:43:39 2019

Utilities for processing raw DANTE data. Typical steps include:
    - attenuator correction
    - background shot subtraction
    - channel alignment (via e.g. peak finding)
    - temporal axis calibration

@author: Pawel M. Kozlowski
"""

# python modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import find_peaks
#from skimage import feature

# custom modules
from fiducia.misc import find_nearest
from fiducia.loader import readDanteData
import fiducia.pltDefaults


# listing all functions declared in this file so that sphinx-automodapi
# correctly documents them and doesn't document imported functions.
__all__ = ["noScope",
           "noXRD",
           "onChannels",
           "timesScope",
           "voltageScale",
           "bkgCorrect",
           "offsetCorrect",
           "attenuationFactors",
           "attenuationCorrect",
           "timeAvgBkg",
           "avgBkgCorrect",
           "polyBkg",
           "signalEdges",
           "polyBkgFrame",
           "highestPeak",
           "highestN",
           "getPeaks",
           "alignPeaks",
           "constructMeasurementFrame",
           "loadCorrected",
           "hysteresisCorrect",
           "align",
           ]


[docs]def noScope(hf): r""" Given a header frame, return a list of channels with no scope. These are the channels that are off. Parameters ---------- hf: pandas.core.frame.DataFrame Header frame from DANTE measurement data. See readDanteData(). Returns ------- set Set of channels corresponding to oscilloscopes marked as "off" in the Dante data file header. Notes ----- See also -------- Examples -------- """ totalChannels = int(np.shape(hf)[1] / 2) offScope = [] for idx in range(totalChannels): ch = idx + 1 chStr = str(ch) if hf[chStr]['Scope type'] == 9999: offScope.append(ch) return set(offScope)
[docs]def noXRD(hf): r""" Given a header frame, return a list of channels with no XRD. If there is a scope, then these channels may still register a signal! Parameters ---------- hf: pandas.core.frame.DataFrame Header frame from DANTE measurement data. See readDanteData(). Returns ------- set Set of channels corresponding to no XRDs marked in the Dante data file header. Notes ----- See also -------- Examples -------- """ totalChannels = int(np.shape(hf)[1] / 2) offXRD = [] for idx in range(totalChannels): ch = idx + 1 chStr = str(ch) if hf[chStr]['XRD SN'] == 0: offXRD.append(ch) return set(offXRD)
[docs]def onChannels(hf): r""" Given a header frame, return a list of which dante channels were on for the shot. Parameters ---------- hf: pandas.core.frame.DataFrame Header frame from DANTE measurement data. See readDanteData(). Returns ------- set Set of channels corresponding oscilloscopes and XRDs marked as on within the Dante data file header. Notes ----- See also -------- Examples -------- """ offXRDSet = noXRD(hf) offScopeSet = noScope(hf) allOff = offXRDSet.union(offScopeSet) allChSet = set(np.arange(18) + 1) onChSet = allChSet - allOff return onChSet
def chStatus(hf): r""" Generates a list of statuses for each channel (on, no scope, or no XRD). Parameters ---------- hf: pandas.core.frame.DataFrame Header frame from DANTE measurement data. See readDanteData(). Returns ------- status: dict Statuses of Dante channels according to data file header. Notes ----- See also -------- Examples -------- """ offXRDSet = noXRD(hf) offScopeSet = noScope(hf) allOff = offXRDSet.union(offScopeSet) allChArr = np.arange(18) + 1 allChSet = set(allChArr) onChSet = allChSet - allOff status = {} for chNum in allChArr: if chNum in onChSet: status[chNum] = "On" elif chNum in offXRDSet: status[chNum] = "No XRD" elif chNum in offScopeSet: status[chNum] = "No Scope" else: status[chNum] = "Off?" return status
[docs]def timesScope(hf): r""" Given a headerFrame, returns a timesFrame containing an array of oscilloscope times for each channel and background shot in the headerFrame. Parameters ---------- hf: pandas.core.frame.DataFrame Header frame from DANTE measurement data. See readDanteData(). Returns ------- timesFrame: pandas.core.frame.DataFrame Returns a timesFrame containing the corresponding times for each oscilloscope trace contained in the header frame. Notes ----- See also -------- Examples -------- """ timesFrame = pd.DataFrame(index=np.arange(1024), columns=hf.keys()) for key in hf.keys(): timeCode = str(hf[key]['Full scale Hor time']) # if the time code is 99, it means the scope was off if timeCode == '99': print(f"No timespan for channel {key}.") # extracting total time spanned by the oscilloscope trace based on the # time code. For example, a time code of 208 means 2.0e-8 seconds. else: # assuming decimal point comes after first number, and all but the last # number are part of the significand. timeSpan = float(timeCode[0] + '.' + timeCode[1:-1] + 'e-' + timeCode[-1]) # getting total number of points recorded by oscilloscope scopePts = hf[key]['#Hor Pts'] # getting length of time steps between points by dividing the total time # over which oscilloscope trace is recorded by the total number of # time steps. There is one fewer step than number of points. timeStep = timeSpan / (scopePts - 1) # constructing array of times for convience in analysis/plotting timesFrame[key] = np.arange(scopePts) * timeStep return timesFrame
[docs]def voltageScale(hf, df): r""" Scales voltage (vertical) axis of dante signals based on information contained in the header. Returns a dataframe with the dante signals in units of volts. Also returns an errors/uncertainties frame in units of volts, where the uncertainty due to the 11-bit ADC converter has been calculated. Parameters ---------- hf: pandas.core.frame.DataFrame Header dataframe from dante .dat file. See readDanteData(). df: pandas.core.frame.DataFrame Dante dataframe. See readDanteData(). Returns ------- dfScaled: pandas.core.frame.DataFrame Dante dataframe with signals in units of volts. errFrame: pandas.core.frame.DataFrame Correspoding errors for dfScaled. Also in units of volts. Notes ----- See also -------- Examples -------- """ # number of bits in ADC converter bits = 11 # number of bins/segments that voltage range is divided into. This # defines the uncertainty in the measurements due to the oscilloscope # and is one less than the total number of points spanning the range # of the oscilloscope's ADC. bitRange = 2 ** bits - 1 # initializing scaled dataframe and error frame with same structure # as input dataframe. dfScaled = pd.DataFrame().reindex_like(df) errFrame = pd.DataFrame().reindex_like(df) for key in hf.keys(): # channel voltage scale in units of volts maxVoltage = hf[key]['Full Scale Vert mV'] / 1e3 # scale conversion factor countsToVolts = maxVoltage / bitRange # converting oscilloscope traces from counts to volts dfScaled[key] = df[key] * countsToVolts # uncertainties are constant and based on the ADC resolution # for each channel errFrame[key] = np.ones_like(df[key]) * countsToVolts return dfScaled, errFrame
[docs]def bkgCorrect(df, timesFrame): r""" Give a Dante data frame containing measurement data and background shot data, remove the background from the data and return the corrected data as a dataframe. Note that the returned dataframe is different in a few ways from the input dataframe. First, the returned dataframe is assumed to have strings as column headers, whereas the returned dataframe will have integers (corresponding to dante channel number) as the column headers. In addition, the input dataframe will start indexing at some number above 0 (usually 18, due to the header length), whereas the returned dataframe is re-indexed to begin at 0. A dataframe with corresponding time scales to df is also passed to this function for reindexing from strings to integers. This also acts as a placeholder in case it is necessary to interpolate values if the background shot and measurement shot timescales are not the same. Though this type of interpolation is not currently implemented. Parameters ---------- df: pandas.core.frame.DataFrame Dataframe of raw dante data. This should contain both the shot measurement and the shot background as columns. See readDanteData(). The columns in this dataframe are assumed to be strings. timesFrame: pandas.core.frame.DataFrame Dataframe containing time axis corresponding to dante signals in df dataframe. See timesScope(). Returns ------- timesBkg: pandas.core.frame.DataFrame Returns a dataframe of times corresponding to dfCorrected signals. The columns in this dataframe are integers corresponding to Dante channel number. dfCorrected: pandas.core.frame.DataFrame Returns a dataframe of background subtracted dante signals. The columns in this dataframe are integers corresponding to Dante channel number. Notes ----- See also -------- Examples -------- """ # reassign index (dataframe currently starts at 18 or so due to header) dfnew = df.set_index(np.arange(np.shape(df)[0])) # get total number of channels in dataframe totalChannels = int(np.shape(dfnew)[1] / 2) # produce a list of all channels channels = np.arange(totalChannels) + 1 # get the total number of datapoints in the dataframe dataRange = np.arange(np.shape(dfnew)[0]) # initialize an empty dataframe for storing background corrected values. dfCorrected = pd.DataFrame(index=dataRange, columns=channels) # initializing a new timesframe timesBkg = pd.DataFrame(index=dataRange, columns=channels) for ch in channels: # subtract shot background from shot measurement. dfCorrected[ch] = dfnew[str(ch)] - dfnew[str(ch) + ' bkg'] # passing through time values to new frame timesBkg[ch] = timesFrame[str(ch)] return timesBkg, dfCorrected
[docs]def offsetCorrect(df, timesFrame, offsetsFile): r""" Reads given offset correction file (.xls) and applies offsets to dante measurement data given in dataframe. The input dataframe should already be background corrected and scaled to units of volts, see bkgCorrect() and voltageScale(). Note that although timing offsets are also applied, they are not as relevant since timing should be realigned to a fiducial peak anyway. Additional attenuation is not implemented and an error will be raised if the offsets file contains attenuation values other than 1. Returns a dataframe with applied offsets. Parameters ---------- df: pandas.core.frame.DataFrame Dante dataframe with background corrected values and scaled to units of volts. See readDanteData(), bkgCorrect() and voltageScale(). timesFrame: pandas.core.frame.DataFrame Dataframe containing time axis corresponding to dante signals in df dataframe. See timesScope() and bkgCorrect(). Returns ------- offsetsFile: str Full path to .xls file containing dante channel offsets. dfOffset: pandas.core.frame.DataFrame Dante dataframe with applied offset corrections Notes ----- See also -------- Examples -------- """ # read excel file with offsets offsetsFrame = pd.read_excel(offsetsFile) offsets = offsetsFrame.set_index(np.arange(np.shape(offsetsFrame)[0]) + 1) # initialize dataframe for storing offset corrected values dfOffset = pd.DataFrame().reindex_like(df) timeOffset = pd.DataFrame().reindex_like(timesFrame) for key in df.keys(): # correct for channel offset # Offsets are added to signal dfOffset[key] = df[key] + offsets['OffsetV (V)'][key] if offsets['additional attenuation'][key] != 1: raise NotImplementedError("Non-zero attenuation in offsets " "file not implemented!") # applying time offsets # need to scale by 1e-9 since timesFrame is given in seconds, whereas # offsets are given in nanoseconds. timeOffset[key] = timesFrame[key] + 1e-9 * offsets['OffsetT (ns)'][key] return timeOffset, dfOffset
def __getAttenuation__(attenuatorsFrame, serialNum): r""" Given dataframe with attenuator info and a serial number, returns the attenuation factor Parameters ---------- attenuatorsFrame: serialNum: Returns ------- factorVal: Notes ----- See also -------- Examples -------- """ factor = attenuatorsFrame[attenuatorsFrame['#'] == serialNum]['Factor'] factorVal = factor.values return factorVal
[docs]def attenuationFactors(hf, channels, attenuatorsPath): r""" Given a header frame, return the attenuation factors applied to each channel. Parameters ---------- hf: pandas.core.frame.DataFrame Pandas dataframe containing dante header information. See readDanteData(). channels: set Set of channels to be analyzed. attenuatorsPath: str Full path to excel file containing attenuator serial numbers and corresponding attenuation factors. Returns ------- chFactors Notes ----- See also -------- Examples -------- """ # load attenuators dataframe attenuatorsFrame = pd.read_excel(attenuatorsPath) # list of pandas dataframe indices containing attenuator serial numbers. attenuatorIdxs = ['Attenuator 1', 'Attenuator 2', 'Attenuator 3', 'Attenuator 4'] # initializing array to store attenuation factors corresponding to each # channel. chFactors = pd.DataFrame(1, index=['Attenuation Factor'], columns=channels) # cycling over channels for ch in channels: # getting attenuator serial numbers serials = hf[str(ch)][attenuatorIdxs] # filtering null values (no attenuator) realSerials = serials[serials != 0] # fetching attenuation factors factors = [__getAttenuation__(attenuatorsFrame, sn) for sn in realSerials] # product sum of attenuation factors and saving to pandas # dataframe. If the list is empty then prod will return a value # of 1, which is exactly what we want! chFactors[ch] = np.prod(factors) return chFactors
[docs]def attenuationCorrect(attenuatorsFile, hf, df, channels): r""" Given a Dante data frame and header frame, return a data frame with attenuation corrections applied to each channel. Parameters ---------- attenuatorsFile: str Full path to .xls file containing attenuator serial numbers and corresponding attenuation factors. See attenuationFactors(). hf: pandas.core.frame.DataFrame Header dataframe from dante .dat file. See readDanteData(). df: pandas.core.frame.DataFrame Dante dataframe. This frame should already be voltage scaled, background corrected, and offset corrected. See readDanteData(). channels: list List of dante channels in df to be analyzed. Returns ------- dfAtten: pandas.core.frame.DataFrame Returns dataframe with attenuation corrected signal values for the given channels. Notes ----- See also -------- Examples -------- """ # getting attenuation factors attFactors = attenuationFactors(hf, channels, attenuatorsFile) # initializing dataframe for attenuation corrected values dfAtten = pd.DataFrame(index=np.arange(len(df)), columns=channels) for ch in channels: # applying attenuation factors dfAtten[ch] = df[ch] * attFactors[ch]['Attenuation Factor'] return dfAtten
[docs]def timeAvgBkg(times, signals, timeStart, timeEnd): r""" Calculates time averaged background for given data. Parameters ---------- times: signals: timeStart: timeEnd: Returns ------- avg: Notes ----- See also -------- Examples -------- """ # getting nearest indices to start and stop times idxStart, valueStart = find_nearest(times, timeStart) idxEnd, valueEnd = find_nearest(times, timeEnd) # averaging avg = np.mean(signals[idxStart:idxEnd]) return avg
[docs]def avgBkgCorrect(timesFrame, df, channels, timeLength=1e-9): r""" Applies background correction to bring the signal down to zero, based on averaging the signal background over a section of time from earliest time contained in timesFrame to earliest time plus timeLength. Parameters ---------- timesFrame: pandas.core.frame.DataFrame Dataframe of time axis values corresponding to signals in df. These should be in units of seconds. df: pandas.core.frame.DataFrame Dataframe of dante signals. These should already be attenuation corrected and in units of volts. channels: set Set of channels to be analyzed. timeLength: float Duration of time from initial time over which to take the average. In units of seconds. dfAvg: pandas.core.frame.DataFrame Returns a dataframe containing average background corrected signals. Returns ------- dfAvg: Notes ----- See also -------- Examples -------- """ dfAvg = pd.DataFrame().reindex_like(df) for ch in channels: # getting earliest time in frame. timeStart = timesFrame[ch][0] # added duration to earliest time to get end time for average. timeEnd = timeStart + timeLength # calculating average background by grabbing a section of the signal avgBkg = timeAvgBkg(times=timesFrame[ch].values, signals=df[ch].values, timeStart=timeStart, timeEnd=timeEnd) # applying correction dfAvg[ch] = df[ch] - avgBkg return dfAvg
[docs]def polyBkg(time, signal, lowerEdge, upperEdge, order=3, lowerLength=None, upperLength=None, plot=False): r""" Fit polynomial function to ends of the signal as an estimate of the background signal + hyesteresis. Default is cubic fit. Parameters ---------- time: numpy.ndarray array of times corresponding to signal signal: numpy.ndarray array of signal values for a single dante channel lowerEdge: int Index of time array corresponding to lower edge of detected signal. See signalEdges(). upperEdge: int Index of time array corresponding to upper edge of detected signal. See signalEdges(). order: int Order of polynomial to be fitted to estimated background/hysteresis. lowerLength: int Length over which to take the polynomial background fit on the lower end (earlier in time) segment of the signal, with respect to lowerEdge. Default is None, which just takes the first point in the signal. upperLength: int Length over which to take the polynomial background fit on the upper end (later in time) segment of the signal, with respect to upperEdge. Defualt is None, which then just picks the second to last point in the signal. plot: bool Flag for plotting polynomial fitted background signal. Default is False. Returns ------- time: fitSignal: Notes ----- See also -------- Examples -------- """ if lowerLength == None: lowerMin = 0 else: lowerMin = lowerEdge - lowerLength lowerMax = lowerEdge upperMin = upperEdge if upperLength == None: upperMax = -1 else: upperMax = upperEdge + upperLength dataEdgeSignal = np.concatenate((signal[lowerMin:lowerMax], signal[upperMin:upperMax])) dataEdgeTime = np.concatenate((time[lowerMin:lowerMax], time[upperMin:upperMax])) # fit polynomial fit = np.polyfit(dataEdgeTime, dataEdgeSignal, order) polyObj = np.poly1d(fit) fitSignal = polyObj(time) # plotting if plot: plt.plot(time, signal, label='signal') plt.plot(time, fitSignal, '--', label='fit') plt.scatter(dataEdgeTime, dataEdgeSignal, label='bkg') plt.legend() plt.show() return time, fitSignal
[docs]def signalEdges(timesFrame, df, channels, sigmaMult=3, plot=False, prominence=0.1, width=10, avgMult=1): r""" Determines locations and widths of peaks above the mean of the signal for each dante channel. Edges of the signal containing region are then obtained by moving sigmaMult peak widths away from the earliest and latest peaks. Returns these lower and upper bound edges of the signal containing region as a dataframe. These edges are useful for fitting and removing the background/hysteresis. Parameters ---------- timesFrame: pandas.core.frame.DataFrame A dataframe containing time axis values corresponding to signals in df. df: pandas.core.frame.DataFrame A dataframe of corrected/calibrated dante signal measurements. channels: list List of channels in df for which edges will be determined. sigmaMult: float Multiplier factor by which the lower and upper bounds of the signal containing region are determined. The lower bound is determined by sigmaMult times the width of the earliest peak away from the earliest peak. The upper bound is determined by sigmaMult times the width of the latest peak away from the latest peak. Default is 3 for approximately 3*sigma away from each peak. plot: bool Flag for plotting peak locations and widths. Default is False. edgesFrame: pandas.core.frame.DataFrame Lower and upper bound edges of the signal containing region for each dante channel. The lower bound is in 0 index and the upper bound is in 1 index. The bounds are given in index coordinates and they have been rounded to the nearest point. prominence: float Prominence threshold for identifying peaks in scipy's find_peaks(). width: int Width in index units for identifying peaks in scipy's find_peaks(). avgMult: float Multiplicative factor for setting minimum intensity threshold for indentifying peaks in scipy's find_peaks(). This is a multiple of the signal average. Returns ------- edgesFrame: Notes ----- See also -------- Examples -------- """ edgesFrame = pd.DataFrame(index=[0,1], columns=df.keys()) for ch in channels: times = timesFrame[ch] signal = df[ch] # finding peaks which are above the mean of the signal peaks, properties = find_peaks(signal, height=avgMult * np.mean(signal), prominence=prominence, width=width) # determining lower and upper bounds of signal region by taking first # and last peaks and shifting by distance of peak width width1 = properties["widths"][0] # width / 2 is kind of like sigma, and then that multiplied by 3 is # like 3 sigma away. lowerBnd = peaks[0] - sigmaMult * width1 / 2 width2 = properties["widths"][-1] upperBnd = peaks[-1] + sigmaMult * width2 / 2 # saving these edges to the dataframe which is to be returned edgesFrame[ch][0] = int(round(lowerBnd)) edgesFrame[ch][1] = int(round(upperBnd)) if plot: # plotting plt.plot(times, signal) plt.plot(times[peaks], signal[peaks], "x") plt.vlines(x=times[peaks], ymin=signal[peaks] - properties["prominences"], ymax = signal[peaks], color = "C1") plt.hlines(y=properties["width_heights"], xmin=times[properties["left_ips"]], xmax=times[properties["right_ips"]], color = "C1") plt.hlines(y=avgMult * np.mean(signal), xmin=times[0], xmax=times[len(signal) - 1], color = "C2", linestyles='--') plt.scatter(times[edgesFrame[ch][0]], signal[edgesFrame[ch][0]], color="C3") plt.scatter(times[edgesFrame[ch][1]], signal[edgesFrame[ch][1]], color="C3") plt.title(f"Ch {ch}") plt.xlabel("Time (s)") plt.ylabel("Signal (V)") plt.show() return edgesFrame
[docs]def polyBkgFrame(timesFrame, df, edgesFrame, channels, order=3, plot=False): r""" Parameters ---------- timesFrame: pandas.core.frame.DataFrame A dataframe containing time axis values corresponding to signals in df. df: pandas.core.frame.DataFrame A dataframe of corrected/calibrated dante signal measurements. edgesFrame: pandas.core.frame.DataFrame Dataframe describing edges of the signal containing region, outside of which should be just background. This function will fit to these two early time and late time background containing regions. See signalEdges(). channels: list A list of channels for which to apply analyis. order: int Order of polynomial to be fitted to estimated background/hysteresis. plot: bool Flag for plotting fitted background and background subtracted signal. Default is False. Returns ------- dfPoly: Notes ----- See also -------- Examples -------- """ dfPoly = pd.DataFrame().reindex_like(df) for ch in channels: # getting polynomial fit of background/hysteresis fitTimes, fitSignals = polyBkg(time=timesFrame[ch], signal=df[ch], lowerEdge=edgesFrame[ch][0], upperEdge=edgesFrame[ch][1], order=order, lowerLength=None, upperLength=None, plot=plot) # subtracting hysteresis/background and saving to dataframe dfPoly[ch] = df[ch] - fitSignals if plot: plt.plot(timesFrame[ch], dfPoly[ch]) plt.title(f'Channel {ch}') plt.xlabel('Time (s)') plt.ylabel('Signal (V)') plt.show() return dfPoly
[docs]def highestPeak(signal, peakIdxs): r""" Find the highest peak, and return list of peaks with the highest peak removed from the list. Parameters ---------- signal: pandas.core.series.Series A data series consisting of signals from a single dante channel. peakIdxs: list A list of indices corresponding to peaks identified in the signal by using scipy's find_peaks() function. Returns ------- peakHighestIdx: int Returns the index corresponding to the highest peak. peakIdxs2: list Returns a list of peak index locations with the highest peak removed from the list. This makes it easier for highestN() to apply highestPeak() iteratively to find the N highest peaks. Notes ----- See also -------- Examples -------- """ # search for first tallest peak peakSignals = signal[peakIdxs] peakHighestIdx = pd.Series.idxmax(peakSignals) # finding index of tallest peak in peakIdxs array (since this is a # numpy array and is indexed differently from peakSignals). delIdx = np.where(peakIdxs==peakHighestIdx) # remove first tallest peak peakIdxs2 = np.delete(peakIdxs, delIdx) return peakHighestIdx, peakIdxs2
[docs]def highestN(signal, peakIdxs, peaksNum=2): r""" Select the N tallest peaks. Parameters ---------- signal: pandas.core.series.Series A data series consisting of signals from a single dante channel. peakIdxs: list A list of indices corresponding to peaks identified in the signal by using scipy's find_peaks() function. peaksNum: int Number of peaks to grab from peakIdxs. This function will grab just the N tallest peaks where N=peaksNum. Returns ------- highestPeaks: numpy.ndarray Array of indices corresponding to the highest peaks. Peaks are ordered from highest to lowest. Notes ----- See also -------- Examples -------- """ if peaksNum > len(peakIdxs): raise Exception("Number of peaks to grab should be less than or " "equal to total number of peaks provided!") # initialize input to find highest peak first time around peakIdxsInput = peakIdxs # initialize array containing highest peaks highestPeaks = np.zeros(peaksNum, dtype=int) # loop over finding the N highest peaks for idx in range(peaksNum): # find the highest peak in the given list of peaks peakHighestIdx, peakIdxs2 = highestPeak(signal=signal, peakIdxs=peakIdxsInput) # reassign input to be the list of peaks without the most # recently found peak. peakIdxsInput = peakIdxs2 # saving highest peak highestPeaks[idx] = peakHighestIdx return highestPeaks
[docs]def getPeaks(timesFrame, df, channels, peaksNum=2, plot=False, prominence=0.1, width=10, avgMult=1): r""" Parameters ---------- timesFrame: pandas.core.frame.DataFrame A dataframe containing time axis values corresponding to signals in df. df: pandas.core.frame.DataFrame A dataframe of corrected/calibrated dante signal measurements. channels: list A list of channels for which to apply analyis. peaksNum: int Number of peaks to grab from peakIdxs. This function will grab just the N tallest peaks where N=peaksNum. peaksNum: int Number of peaks to grab from peakIdxs. This function will grab just the N tallest peaks where N=peaksNum. plot: bool Flag for plotting identified peaks, with prominences, and widths, overlaid with the corresponding dante signal, and the average dante signal. peaksFrame: pandas.core.frame.DataFrame Returns a dataframe containing indices of the identified peaks sorted from the peak that occurs earliest in time to the latest in time. prominence: float Prominence threshold for identifying peaks in scipy's find_peaks(). width: int Width in index units for identifying peaks in scipy's find_peaks(). avgMult: float Multiplicative factor for setting minimum intensity threshold for indentifying peaks in scipy's find_peaks(). This is a multiple of the signal average. Returns ------- peaksFrame: Notes ----- See also -------- Examples -------- """ peaksFrame = pd.DataFrame(index=np.arange(peaksNum), columns=df.keys()) for ch in channels: times = timesFrame[ch] signal = df[ch] # finding peaks which are above the mean of the signal peaks, properties = find_peaks(signal, height=avgMult * np.mean(signal), prominence=prominence, width=width) # selecting the N tallest peaks highestPeakIdxs = highestN(signal=signal, peakIdxs=peaks, peaksNum=peaksNum) # ordering peaks by which comes earliest peaksOrdered = np.sort(highestPeakIdxs) # saving these peaks to the dataframe which is to be returned peaksFrame[ch] = peaksOrdered if plot: # plotting plt.plot(times, signal) plt.plot(times[peaks], signal[peaks], "x") plt.vlines(x=times[peaks], ymin=signal[peaks] - properties["prominences"], ymax = signal[peaks], color = "C1") plt.hlines(y=properties["width_heights"], xmin=times[properties["left_ips"]], xmax=times[properties["right_ips"]], color = "C1") plt.hlines(y=avgMult * np.mean(signal), xmin=times[0], xmax=times[len(signal) - 1], color = "C2", linestyles='--') plt.title(f"Ch {ch}") plt.xlabel("Time (s)") plt.ylabel("Signal (V)") plt.show() return peaksFrame
[docs]def alignPeaks(timesFrame, df, peaksFrame, channels, peakAlignIdx=0, referenceTime=1e-9, plot=False): r""" Parameters ---------- timesFrame: pandas.core.frame.DataFrame A dataframe containing time axis values corresponding to signals in df. df: pandas.core.frame.DataFrame A dataframe of corrected/calibrated dante signal measurements. peaksFrame: pandas.core.frame.DataFrame Dataframe containing positions of N highest peaks and sorted from earliest in time to latest in time. See getPeaks(). peakAlignIdx: int Picks which peak to align to. 0 is first peak, 1 is second peak in peaksFrame, etc. referenceTime: float Time in s to which align peaks. Default is 1e-9 s or 1 ns. plot: bool Flag for plotting aligned dante signals. Default is False. Returns ------- timesAligned: pandas.core.frame.DataFrame Returns a dataframe identical in shape to timesFrame, but with the times for each dante channel offset such that the selected peaks are temporally aligned. Notes ----- See also -------- Examples -------- """ peakAlignIdx = 0 # 0 is first peak, 1 is 2nd peak, etc. referenceTime = 1e-9 # set peaks to occur at 1 ns timesAligned = pd.DataFrame().reindex_like(timesFrame) for ch in channels: time = timesFrame[ch] # of the two highest peaks, grab the one with the lowest index (earliest # in time) # align all signals to reference time, by setting first peak to be at # reference time. # Get index of peak relative to signals dataframe peakIdx = peaksFrame[ch][peakAlignIdx] # get time at which peak occurs peakTime = time[peakIdx] # calculate necessary shift in peak to bring it to the reference time offsetTime = peakTime - referenceTime # generate new time scales for shifted peaks # print(f'times: \n{time}') # print(f'offset time: {offsetTime}') timesAligned[ch] = time - offsetTime # plotting if plot: for ch in channels: plt.plot(timesAligned[ch], df[ch], label=ch) plt.xlabel('Time (s)') plt.ylabel('Signal (V)') plt.title('Aligned') plt.legend(frameon=False, labelspacing=0.001, borderaxespad=0.1) plt.show() return timesAligned
[docs]def constructMeasurementFrame(timesFrame, df, channels): r""" Takes out put timesFrame and dataFrame from rawProcess.py functions and generates a measurementFrame that can be passed to analyzeStreak() and other main.py functions. Converts units from seconds to nanoseconds. Parameters ---------- timesFrame: pandas.core.frame.DataFrame A dataframe containing time axis values corresponding to signals in df. df: pandas.core.frame.DataFrame A dataframe of corrected/calibrated dante signal measurements. channels: list A list of channels for which to apply analyis. Returns ------- measurementFrame: pandas.core.frame.DataFrame Returns a measurementFrame which can be passed to main.py functions such as analyzeSpectrum() and analyzeStreak(). Notes ----- See also -------- Examples -------- """ # generating list of column names for measurementFrame colsList= [] for ch in channels: timeStr = 'Time' + str(ch) signalStr = 'Signal' + str(ch) colsList.append(timeStr) colsList.append(signalStr) # initialize dataframe measurementFrame = pd.DataFrame(index=np.arange(len(df)), columns=colsList) for ch in channels: # writing data into measurement frame columns measurementFrame['Time' + str(ch)] = timesFrame[ch] * 1e9 measurementFrame['Signal' + str(ch)] = df[ch] return measurementFrame
[docs]def loadCorrected(danteFile, attenuatorsFile, offsetsFile, cut=None, plot=False, addCh=[]): r""" Given a dante data file, an attenuators file, and an offsets file, reads the file and applies background correction, attenuation correction, and channel offset correction. Returns the corrected data traces as a pandas dataframe. The row indices of this dataframe also contain the correct time scaling given the oscilloscope settings, but note that the channels are not aligned. User must apply alignment correction using some measured signal as a temporal fiducial. Parameters ---------- danteFile: str Full path to .dat file containing raw dante traces. attenuatorsFile: str Full path to the .xls file containing attenuator serial numbers and corresponding attenuation factors. offsetsFile: str Full path to .xls file containing dante channel offsets. cut: int Number of points to cut from leading and trailing end of each Dante channel trace. This is used to remove noise that occurs at the edges of the signal. Default is None, which means no cut is applied. plot: bool Flag for plotting data after each calibration/correction step. Default is False. addCh: list Add channels to analyze. This is used to override which channels are listed as on in the header of the data dante data file. Returns ------- timeOffset: dfAvg: onChList: hf: dfVolt: Notes ----- See also -------- Examples -------- """ # load data hf, df = readDanteData(danteFile) # get set of all dante channels with useful data onCh = onChannels(hf) print(f'Analyzing channels {onCh}') # converting from set to list because some functions are implemented # assuming a list of channels instead of a set. if addCh: onChList = list(onCh) + addCh else: onChList = list(onCh) # apply voltage scaling dfVolt, errVolt = voltageScale(hf, df) # time axis calibration timesFrame = timesScope(hf) # shot background correction # Note: Here is where we switch from string indices to integer indices # since we no longer need the bkg columns in our dataframe. timesBkg, dfBkg = bkgCorrect(df=dfVolt, timesFrame=timesFrame) # Applying offset correction timeOffset, dfOffset = offsetCorrect(df=dfBkg, timesFrame=timesBkg, offsetsFile=offsetsFile) # Applying attenuation correction dfAtten = attenuationCorrect(attenuatorsFile, hf, dfOffset, onChList) # applying an averaged background correction dfAvg = avgBkgCorrect(timesFrame=timeOffset, df=dfAtten, channels=onCh, timeLength=1e-9) # plotting each calibration/correction step if plot: plt.plot(df) plt.title('raw dante data, fresh off the load') plt.xlabel('Time (counts)') plt.ylabel('Signal (counts)') plt.show() plt.plot(dfVolt) plt.title('Voltage scaled') plt.xlabel('Time (counts)') plt.ylabel('Signal (V)') plt.show() for ch in onChList: plt.plot(timesFrame[str(ch)], dfVolt[str(ch)], label=ch) plt.title('time and voltage scaled') plt.xlabel('Time (s)') plt.ylabel('Signal (V)') plt.legend(frameon=False, labelspacing=0.001, borderaxespad=0.1) plt.show() for ch in onChList: plt.plot(timesBkg[ch], dfBkg[ch], label=ch) plt.title('bkg corrected') plt.xlabel('Time (s)') plt.ylabel('Signal (V)') plt.legend(frameon=False, labelspacing=0.001, borderaxespad=0.1) plt.show() for ch in onChList: plt.plot(timeOffset[ch], dfOffset[ch], label=ch) plt.title('offset corrected') plt.xlabel('Time (s)') plt.ylabel('Signal (V)') plt.legend(frameon=False, labelspacing=0.001, borderaxespad=0.1) plt.show() for ch in onChList: plt.plot(timeOffset[ch], dfAtten[ch], label=ch) plt.title('attenuation corrected') plt.xlabel('Time (s)') plt.ylabel('Signal (V)') plt.legend(frameon=False, labelspacing=0.001, borderaxespad=0.1) plt.show() for ch in onChList: plt.plot(timeOffset[ch], dfAvg[ch], label=ch) plt.title('averaged background corrected') plt.xlabel('Time (s)') plt.ylabel('Signal (V)') plt.legend(frameon=False, labelspacing=0.001, borderaxespad=0.1) plt.show() return timeOffset, dfAvg, onChList, hf, dfVolt
[docs]def hysteresisCorrect(timesFrame, df, channels, order=5, prominence=0.2, width=10, avgMult=1): r""" Corrects for hysteresis by detecting edges of signal containing region and fitting a polynomial background to regions that do not belong to signal. This background is then subtracted. Parameters ---------- timesFrame: pandas.core.frame.DataFrame Time corresponding to df df: pandas.core.frame.DataFrame Dataframe of dante signals. See loadCorrected(). channels: list A list of channels for which to apply analyis. order: int Polynomial order to be fitted to hysteresis/background. prominence: float Prominence threshold for identifying peaks in scipy's find_peaks(). width: int Width in index units for identifying peaks in scipy's find_peaks(). avgMult: float Multiplicative factor for setting minimum intensity threshold for indentifying peaks in scipy's find_peaks(). This is a multiple of the signal average. Returns ------- dfPoly: pandas.core.frame.DataFrame Returns a dataframe of hysteresis corrected dante signals. Notes ----- See also -------- Examples -------- """ # getting edges of signal containing region edgesFrame = signalEdges(timesFrame=timesFrame, df=df, channels=channels, plot=False, prominence=prominence, width=width, avgMult=avgMult) # removing hysteresis and background with a polynomial fit dfPoly = polyBkgFrame(timesFrame=timesFrame, df=df, edgesFrame=edgesFrame, channels=channels, order=order, plot=False) return dfPoly
[docs]def align(timesFrame, df, channels, peaksNum=1, peakAlignIdx=0, referenceTime=1e-9, prominence=0.01, width=10, avgMult=1.5): r""" Aligns dante signals based on peak finding. Parameters ---------- timesFrame: pandas.core.frame.DataFrame Time corresponding to df df: pandas.core.frame.DataFrame Dataframe of dante signals. See loadCorrected(). channels: list A list of channels for which to apply analyis. peaksNum: int Number of peaks to grab from peakIdxs. This function will grab just the N tallest peaks where N=peaksNum. peakAlignIdx: int Picks which peak to align to. 0 is first peak, 1 is second peak in peaksFrame, etc. referenceTime: float Time in s to which align peaks. Default is 1e-9 s or 1 ns. prominence: float Prominence threshold for identifying peaks in scipy's find_peaks(). width: int Width in index units for identifying peaks in scipy's find_peaks(). avgMult: float Multiplicative factor for setting minimum intensity threshold for indentifying peaks in scipy's find_peaks(). This is a multiple of the signal average. Returns ------- timesAligned: pandas.core.frame.DataFrame Returns a dataframe of times corresponding to signals in df, such that the signals are now aligned to the given peak. Notes ----- See also -------- Examples -------- """ if peakAlignIdx > peaksNum - 1: raise Exception(f"Cannot align to {peakAlignIdx}th peak if there" f" are only {peaksNum} peaks.") # finding tallest peaks in the signal peaksFrame = getPeaks(timesFrame=timesFrame, df=df, channels=channels, peaksNum=peaksNum, plot=True, prominence=prominence, width=width, avgMult=avgMult) # aligning to selected peak timesAligned = alignPeaks(timesFrame=timesFrame, df=df, peaksFrame=peaksFrame, channels=channels, peakAlignIdx=peakAlignIdx, referenceTime=referenceTime, plot=True) return timesAligned
#----------------------------------------- # PURGATORY FOR DEPRECATED METHOD ATTEMPTS #----------------------------------------- #def signalEdges(time, # signal, # sigma, # windowMin=0, # windowMax=1, # title='', # plot=False): # """ # Gets the signal edges via Canny edge detection method # # time: numpy.ndarray # Array of times corresponding to signal. # # signal: numpy.ndarray # Array of signal intensities. # # sigma: float # Width of Gaussian filter used in Canny edge detection. This should # be close to the width of the features you want to detect. # # windowMin: int # Lower bound index of window over which to search for edges in signal. # Default is 0. # # windowMax: int # Distance in index units from end of signal to upper bound of window # over which to search for edges in signal. Default is 1. # # plot: bool # Flag for plotting signal with detected edges. Default is False. # """ # timeWin = time[windowMin:-windowMax] # signalWin = signal[windowMin:-windowMax] # # stacking signals to form an "image" so that scikit-image can process it. # sigStack = np.vstack((signalWin, signalWin, signalWin)) # edgeCanny = feature.canny(sigStack, # sigma=sigma) # # getting the actual edge points instead of just the indices # edgeTimes = timeWin[edgeCanny[1,:]] # edges = signalWin[edgeCanny[1,:]] # # if plot: # # plotting detected edges # plt.plot(time, signal, label='signal') # plt.scatter(edgeTimes, edges, color='red', label='edges') # plt.legend() # plt.title(title) # plt.show() # # return edgeTimes, edges # # # # # #def highestTwo(signal, peakIdxs): # """ # Get the two highest peaks given signal, and peak # indices found using a peaking finder such as find_peaks_cwt(). # """ # # search for first tallest peak # peakSignals1 = signal[peakIdxs] # peak1Idx = np.argmax(peakSignals1) # # convert back into index for signals # peak1SigIdx = peakIdxs[peak1Idx] # # remove first tallest peak # peakIdxs2 = np.delete(peakIdxs, peak1Idx) # peakSignals2 = signal[peakIdxs2] # # search for second tallest peak # peak2Idx = np.argmax(peakSignals2) # # convert back into index for signals # peak2SigIdx = peakIdxs2[peak2Idx] # # return positions of two tallest peaks in order of which is first # if peak1SigIdx < peak2SigIdx: # return peak1SigIdx, peak2SigIdx # else: # return peak2SigIdx, peak1SigIdx