Source code for psychopy.data.experiment

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import json
import sys
import copy
import pickle
import atexit
import pandas as pd

from psychopy import constants, clock
from psychopy import logging
from psychopy.data.trial import TrialHandler2
from psychopy.tools.filetools import (openOutputFile, genDelimiter,
                                      genFilenameFromDelimiter)
from psychopy.localization import _translate
from .utils import checkValidFilePath
from .base import _ComparisonMixin


[docs]class ExperimentHandler(_ComparisonMixin): """A container class for keeping track of multiple loops/handlers Useful for generating a single data file from an experiment with many different loops (e.g. interleaved staircases or loops within loops :usage: exp = data.ExperimentHandler(name="Face Preference",version='0.1.0') """ def __init__(self, name='', version='', extraInfo=None, runtimeInfo=None, originPath=None, savePickle=True, saveWideText=True, sortColumns=False, dataFileName='', autoLog=True, appendFiles=False): """ :parameters: name : a string or unicode As a useful identifier later version : usually a string (e.g. '1.1.0') To keep track of which version of the experiment was run extraInfo : a dictionary Containing useful information about this run (e.g. {'participant':'jwp','gender':'m','orientation':90} ) runtimeInfo : :class:`psychopy.info.RunTimeInfo` Containing information about the system as detected at runtime originPath : string or unicode The path and filename of the originating script/experiment If not provided this will be determined as the path of the calling script. dataFileName : string This is defined in advance and the file will be saved at any point that the handler is removed or discarded (unless .abort() had been called in advance). The handler will attempt to populate the file even in the event of a (not too serious) crash! savePickle : True (default) or False saveWideText : True (default) or False sortColumns : str or bool How (if at all) to sort columns in the data file, if none is given to saveAsWideText. Can be: - "alphabetical", "alpha", "a" or True: Sort alphabetically by header name - "priority", "pr" or "p": Sort according to priority - other: Do not sort, columns remain in order they were added autoLog : True (default) or False """ self.loops = [] self.loopsUnfinished = [] self.name = name self.version = version self.runtimeInfo = runtimeInfo if extraInfo is None: self.extraInfo = {} else: self.extraInfo = extraInfo self.originPath = originPath self.savePickle = savePickle self.saveWideText = saveWideText self.dataFileName = dataFileName self.sortColumns = sortColumns self.thisEntry = {} self.entries = [] # chronological list of entries self._paramNamesSoFar = [] self.dataNames = ['thisRow.t', 'notes'] # names of all the data (eg. resp.keys) self.columnPriority = { 'thisRow.t': constants.priority.CRITICAL - 1, 'notes': constants.priority.MEDIUM - 1, } self.autoLog = autoLog self.appendFiles = appendFiles self.status = constants.NOT_STARTED if dataFileName in ['', None]: logging.warning('ExperimentHandler created with no dataFileName' ' parameter. No data will be saved in the event ' 'of a crash') else: # fail now if we fail at all! checkValidFilePath(dataFileName, makeValid=True) atexit.register(self.close) def __del__(self): self.close() @property def currentLoop(self): """ Return the loop which we are currently in, this will either be a handle to a loop, such as a :class:`~psychopy.data.TrialHandler` or :class:`~psychopy.data.StairHandler`, or the handle of the :class:`~psychopy.data.ExperimentHandler` itself if we are not in a loop. """ # If there are unfinished (aka currently active) loops, return the most recent if len(self.loopsUnfinished): return self.loopsUnfinished[-1] # If we are not in a loop, return handle to experiment handler return self
[docs] def addLoop(self, loopHandler): """Add a loop such as a :class:`~psychopy.data.TrialHandler` or :class:`~psychopy.data.StairHandler` Data from this loop will be included in the resulting data files. """ self.loops.append(loopHandler) self.loopsUnfinished.append(loopHandler) # keep the loop updated that is now owned loopHandler.setExp(self)
[docs] def loopEnded(self, loopHandler): """Informs the experiment handler that the loop is finished and not to include its values in further entries of the experiment. This method is called by the loop itself if it ends its iterations, so is not typically needed by the user. """ if loopHandler in self.loopsUnfinished: self.loopsUnfinished.remove(loopHandler)
[docs] def _getAllParamNames(self): """Returns the attribute names of loop parameters (trialN etc) that the current set of loops contain, ready to build a wide-format data file. """ names = copy.deepcopy(self._paramNamesSoFar) # get names (or identifiers) for all contained loops for thisLoop in self.loops: theseNames, vals = self._getLoopInfo(thisLoop) for name in theseNames: if name not in names: names.append(name) return names
[docs] def _getExtraInfo(self): """Get the names and vals from the extraInfo dict (if it exists) """ if type(self.extraInfo) != dict: names = [] vals = [] else: names = list(self.extraInfo) vals = list(self.extraInfo.values()) return names, vals
[docs] def _getLoopInfo(self, loop): """Returns the attribute names and values for the current trial of a particular loop. Does not return data inputs from the subject, only info relating to the trial execution. """ names = [] vals = [] name = loop.name # standard attributes for attr in ('thisRepN', 'thisTrialN', 'thisN', 'thisIndex', 'stepSizeCurrent'): if hasattr(loop, attr): attrName = name + '.' + attr.replace('Current', '') # append the attribute name and the current value names.append(attrName) vals.append(getattr(loop, attr)) # method of constants if hasattr(loop, 'thisTrial'): trial = loop.thisTrial if hasattr(trial, 'items'): # is a TrialList object or a simple dict for attr, val in list(trial.items()): if attr not in self._paramNamesSoFar: self._paramNamesSoFar.append(attr) names.append(attr) vals.append(val) # single StairHandler elif hasattr(loop, 'intensities'): names.append(name + '.intensity') if len(loop.intensities) > 0: vals.append(loop.intensities[-1]) else: vals.append(None) return names, vals
[docs] def addData(self, name, value, row=None, priority=None): """ Add the data with a given name to the current experiment. Typically the user does not need to use this function; if you added your data to the loop and had already added the loop to the experiment then the loop will automatically inform the experiment that it has received data. Multiple data name/value pairs can be added to any given entry of the data file and is considered part of the same entry until the nextEntry() call is made. e.g.:: # add some data for this trial exp.addData('resp.rt', 0.8) exp.addData('resp.key', 'k') # end of trial - move to next line in data output exp.nextEntry() Parameters ---------- name : str Name of the column to add data as. value : any Value to add row : int or None Row in which to add this data. Leave as None to add to the current entry. priority : int Priority value to set the column to - higher priority columns appear nearer to the start of the data file. Use values from `constants.priority` as landmark values: - CRITICAL: Always at the start of the data file, generally reserved for Routine start times - HIGH: Important columns which are near the front of the data file - MEDIUM: Possibly important columns which are around the middle of the data file - LOW: Columns unlikely to be important which are at the end of the data file - EXCLUDE: Always at the end of the data file, actively marked as unimportant """ if name not in self.dataNames: self.dataNames.append(name) # could just copy() every value, but not always needed, so check: try: hash(value) except TypeError: # unhashable type (list, dict, ...) == mutable, so need a copy() value = copy.deepcopy(value) # if value is a Timestamp, resolve to a simple value if isinstance(value, clock.Timestamp): value = value.resolve() # get entry from row number entry = self.thisEntry if row is not None: entry = self.entries[row] entry[name] = value # set priority if given if priority is not None: self.setPriority(name, priority)
[docs] def getPriority(self, name): """ Get the priority value for a given column. If no priority value is stored, returns best guess based on column name. Parameters ---------- name : str Column name Returns ------- int The priority value stored/guessed for this column, most likely a value from `constants.priority`, one of: - CRITICAL (30): Always at the start of the data file, generally reserved for Routine start times - HIGH (20): Important columns which are near the front of the data file - MEDIUM (10): Possibly important columns which are around the middle of the data file - LOW (0): Columns unlikely to be important which are at the end of the data file - EXCLUDE (-10): Always at the end of the data file, actively marked as unimportant """ if name not in self.columnPriority: # store priority if not specified already self.columnPriority[name] = self._guessPriority(name) # return stored priority return self.columnPriority[name]
[docs] def _guessPriority(self, name): """ Get a best guess at the priority of a column based on its name Parameters ---------- name : str Name of the column Returns ------- int One of the following: - HIGH (19): Important columns which are near the front of the data file - MEDIUM (9): Possibly important columns which are around the middle of the data file - LOW (-1): Columns unlikely to be important which are at the end of the data file NOTE: Values returned from this function are 1 less than values in `constants.priority`, columns whose priority was guessed are behind equivalently prioritised columns whose priority was specified. """ # if there's a dot, get attribute name if "." in name: name = name.split(".")[-1] # start off assuming low priority priority = constants.priority.LOW # if name is one of identified likely high priority columns, it's medium priority if name in [ "keys", "rt", "x", "y", "leftButton", "numClicks", "numLooks", "clip", "response", "value", "frameRate", "participant" ]: priority = constants.priority.MEDIUM return priority - 1
[docs] def setPriority(self, name, value=constants.priority.HIGH): """ Set the priority of a column in the data file. Parameters ---------- name : str Name of the column, e.g. `text.started` value : int Priority value to set the column to - higher priority columns appear nearer to the start of the data file. Use values from `constants.priority` as landmark values: - CRITICAL (30): Always at the start of the data file, generally reserved for Routine start times - HIGH (20): Important columns which are near the front of the data file - MEDIUM (10): Possibly important columns which are around the middle of the data file - LOW (0): Columns unlikely to be important which are at the end of the data file - EXCLUDE (-10): Always at the end of the data file, actively marked as unimportant """ self.columnPriority[name] = value
[docs] def addAnnotation(self, value): """ Add an annotation at the current point in the experiment Parameters ---------- value : str Value of the annotation """ self.addData("notes", value)
[docs] def timestampOnFlip(self, win, name, format=float): """Add a timestamp (in the future) to the current row Parameters ---------- win : psychopy.visual.Window The window object that we'll base the timestamp flip on name : str The name of the column in the datafile being written, such as 'myStim.stopped' format : str, class or None Format in which to return time, see clock.Timestamp.resolve() for more info. Defaults to `float`. """ # make sure the name is used when writing the datafile if name not in self.dataNames: self.dataNames.append(name) # tell win to record timestamp on flip win.timeOnFlip(self.thisEntry, name, format=format)
@property def status(self): return self._status @status.setter def status(self, value): """ Status of this experiment, from psychopy.constants. Parameters ---------- value : int One of the values from psychopy.constants. """ # log change valStr = { constants.NOT_STARTED: "NOT_STARTED", constants.STARTED: "STARTED", constants.PAUSED: "PAUSED", constants.RECORDING: "RECORDING", constants.STOPPED: "STOPPED", constants.SEEKING: "SEEKING", constants.STOPPING: "STOPPING", constants.INVALID: "INVALID" }[value] logging.exp(f"{self.name}: status = {valStr}", obj=self) # make change self._status = value
[docs] def pause(self): """ Set status to be PAUSED. """ # warn if experiment is already paused if self.status == constants.PAUSED: logging.warn(_translate( "Attempted to pause experiment '{}', but it is already paused. " "Status will remain unchanged.".format(self.name) )) # set own status self.status = constants.PAUSED
[docs] def resume(self): """ Set status to be STARTED. """ # warn if experiment is already running if self.status == constants.STARTED: logging.warn(_translate( "Attempted to resume experiment '{}', but it is not paused. " "Status will remain unchanged.".format(self.name) )) # set own status self.status = constants.STARTED
[docs] def stop(self): """ Set status to be FINISHED. """ # warn if experiment is already paused if self.status == constants.FINISHED: logging.warn(_translate( "Attempted to stop experiment '{}', but it is already stopping. " "Status will remain unchanged.".format(self.name) )) # set own status self.status = constants.STOPPED
[docs] def skipTrials(self, n=1): """ Skip ahead n trials - the trials inbetween will be marked as "skipped". If you try to skip past the last trial, will log a warning and skip *to* the last trial. Parameters ---------- n : int Number of trials to skip ahead """ # return if there isn't a TrialHandler2 active if not isinstance(self.currentLoop, TrialHandler2): return # skip trials in current loop self.currentLoop.skipTrials(n)
[docs] def rewindTrials(self, n=1): """ Skip ahead n trials - the trials inbetween will be marked as "skipped". If you try to skip past the last trial, will log a warning and skip *to* the last trial. Parameters ---------- n : int Number of trials to skip ahead """ # return if there isn't a TrialHandler2 active if not isinstance(self.currentLoop, TrialHandler2): return # rewind trials in current loop self.currentLoop.rewindTrials(n)
[docs] def getAllTrials(self): """ Returns all trials (elapsed, current and upcoming) with an index indicating which trial is the current trial. Returns ------- list[Trial] List of trials, in order (oldest to newest) int Index of the current trial in this list """ # return None if there isn't a TrialHandler2 active if not isinstance(self.currentLoop, TrialHandler2): return [None], 0 # get all trials from current loop return self.currentLoop.getAllTrials()
[docs] def getCurrentTrial(self): """ Returns the current trial (`.thisTrial`) Returns ------- Trial The current trial """ # return None if there isn't a TrialHandler2 active if not isinstance(self.currentLoop, TrialHandler2): return None return self.currentLoop.getCurrentTrial()
[docs] def getFutureTrial(self, n=1): """ Returns the condition for n trials into the future, without advancing the trials. Returns 'None' if attempting to go beyond the last trial in the current loop, or if there is no current loop. """ # return None if there isn't a TrialHandler2 active if not isinstance(self.currentLoop, TrialHandler2): return None # get future trial from current loop return self.currentLoop.getFutureTrial(n)
[docs] def getFutureTrials(self, n=1, start=0): """ Returns Trial objects for a given range in the future. Will start looking at `start` trials in the future and will return n trials from then, so e.g. to get all trials from 2 in the future to 5 in the future you would use `start=2` and `n=3`. Parameters ---------- n : int, optional How many trials into the future to look, by default 1 start : int, optional How many trials into the future to start looking at, by default 0 Returns ------- list[Trial or None] List of Trial objects n long. Any trials beyond the last trial are None. """ # blank list to store trials in trials = [] # iterate through n trials for i in range(n): # add each to the list trials.append( self.getFutureTrial(start + i) ) return trials
[docs] def nextEntry(self): """Calling nextEntry indicates to the ExperimentHandler that the current trial has ended and so further addData() calls correspond to the next trial. """ this = self.thisEntry # fetch data from each (potentially-nested) loop for thisLoop in self.loopsUnfinished: self.updateEntryFromLoop(thisLoop) # add the extraInfo dict to the data if type(self.extraInfo) == dict: this.update(self.extraInfo) self.entries.append(this) # add new entry with its self.thisEntry = {}
[docs] def updateEntryFromLoop(self, thisLoop): """ Add all values from the given loop to the current entry. Parameters ---------- thisLoop : BaseLoopHandler Loop to get fields from """ # for each name and value in the current trial... names, vals = self._getLoopInfo(thisLoop) for n, name in enumerate(names): # add/update value self.thisEntry[name] = vals[n] # make sure name is in data names if name not in self.dataNames: self.dataNames.append(name)
[docs] def getAllEntries(self): """Fetches a copy of all the entries including a final (orphan) entry if that exists. This allows entries to be saved even if nextEntry() is not yet called. :return: copy (not pointer) to entries """ # check for orphan final data (not committed as a complete entry) entries = copy.copy(self.entries) if self.thisEntry: # thisEntry is not empty entries.append(self.thisEntry) return entries
[docs] def saveAsWideText(self, fileName, delim='auto', matrixOnly=False, appendFile=None, encoding='utf-8-sig', fileCollisionMethod='rename', sortColumns=None): """Saves a long, wide-format text file, with one line representing the attributes and data for a single trial. Suitable for analysis in R and SPSS. If `appendFile=True` then the data will be added to the bottom of an existing file. Otherwise, if the file exists already it will be kept and a new file will be created with a slightly different name. If you want to overwrite the old file, pass 'overwrite' to ``fileCollisionMethod``. If `matrixOnly=True` then the file will not contain a header row, which can be handy if you want to append data to an existing file of the same format. :Parameters: fileName: if extension is not specified, '.csv' will be appended if the delimiter is ',', else '.tsv' will be appended. Can include path info. delim: allows the user to use a delimiter other than the default tab ("," is popular with file extension ".csv") matrixOnly: outputs the data with no header row. appendFile: will add this output to the end of the specified file if it already exists. encoding: The encoding to use when saving a the file. Defaults to `utf-8-sig`. fileCollisionMethod: Collision method passed to :func:`~psychopy.tools.fileerrortools.handleFileCollision` sortColumns : str or bool How (if at all) to sort columns in the data file. Can be: - "alphabetical", "alpha", "a" or True: Sort alphabetically by header name - "priority", "pr" or "p": Sort according to priority - other: Do not sort, columns remain in order they were added """ # set default delimiter if none given delimOptions = { 'comma': ",", 'semicolon': ";", 'tab': "\t" } if delim == 'auto': delim = genDelimiter(fileName) elif delim in delimOptions: delim = delimOptions[delim] if appendFile is None: appendFile = self.appendFiles # create the file or send to stdout fileName = genFilenameFromDelimiter(fileName, delim) f = openOutputFile(fileName, append=appendFile, fileCollisionMethod=fileCollisionMethod, encoding=encoding) names = self._getAllParamNames() for name in self.dataNames: if name not in names: names.append(name) # names from the extraInfo dictionary names.extend(self._getExtraInfo()[0]) if len(names) < 1: logging.error("No data was found, so data file may not look as expected.") # if sort columns not specified, use default from self if sortColumns is None: sortColumns = self.sortColumns # sort names as requested if sortColumns in ("alphabetical", "alpha", "a", True): # sort alphabetically names.sort() elif sortColumns in ("priority", "pr" or "p"): # map names to their priority priorityMap = [] for name in names: priority = self.columnPriority.get(name, self._guessPriority(name)) priorityMap.append((priority, name)) names = [name for priority, name in sorted(priorityMap, reverse=True)] # write a header line if not matrixOnly: for heading in names: f.write(u'%s%s' % (heading, delim)) f.write('\n') # write the data for each entry for entry in self.getAllEntries(): for name in names: if name in entry: ename = str(entry[name]) if ',' in ename or '\n' in ename: fmt = u'"%s"%s' else: fmt = u'%s%s' f.write(fmt % (entry[name], delim)) else: f.write(delim) f.write('\n') if f != sys.stdout: f.close() logging.info('saved data to %r' % f.name)
[docs] def saveAsPickle(self, fileName, fileCollisionMethod='rename'): """Basically just saves a copy of self (with data) to a pickle file. This can be reloaded if necessary and further analyses carried out. :Parameters: fileCollisionMethod: Collision method passed to :func:`~psychopy.tools.fileerrortools.handleFileCollision` """ # Store the current state of self.savePickle and self.saveWideText # for later use: # We are going to set both to False before saving, # so PsychoPy won't try to save again after loading the pickled # .psydat file from disk. # # After saving, the initial state of self.savePickle and # self.saveWideText is restored. # # See # https://groups.google.com/d/msg/psychopy-dev/Z4m_UX88q8U/UGuh1eeyjMEJ savePickle = self.savePickle saveWideText = self.saveWideText self.savePickle = False self.saveWideText = False origEntries = self.entries self.entries = self.getAllEntries() # otherwise use default location if not fileName.endswith('.psydat'): fileName += '.psydat' with openOutputFile(fileName=fileName, append=False, fileCollisionMethod=fileCollisionMethod) as f: pickle.dump(self, f) if (fileName is not None) and (fileName != 'stdout'): logging.info('saved data to %s' % f.name) self.entries = origEntries # revert list of completed entries post-save self.savePickle = savePickle self.saveWideText = saveWideText
[docs] def getJSON(self, priorityThreshold=constants.priority.EXCLUDE+1): """ Get the experiment data as a JSON string. Parameters ---------- priorityThreshold : int Output will only include columns whose priority is greater than or equal to this value. Use values in psychopy.constants.priority as a guideline for priority levels. Default is -9 (constants.priority.EXCLUDE + 1) Returns ------- str JSON string with the following fields: - 'type': Indicates that this is data from an ExperimentHandler (will always be "trials_data") - 'trials': `list` of `dict`s representing requested trials data - 'priority': `dict` of column names """ # get columns which meet threshold cols = [col for col in self.dataNames if self.getPriority(col) >= priorityThreshold] # convert just relevant entries to a DataFrame trials = pd.DataFrame(self.entries, columns=cols).fillna(value="") # put in context context = { 'type': "trials_data", 'thisTrial': self.thisEntry, 'trials': trials.to_dict(orient="records"), 'priority': self.columnPriority, 'threshold': priorityThreshold, } return json.dumps(context, indent=True, allow_nan=False, default=str)
[docs] def close(self): if self.dataFileName not in ['', None]: if self.autoLog: msg = 'Saving data for %s ExperimentHandler' % self.name logging.debug(msg) if self.savePickle: self.saveAsPickle(self.dataFileName) if self.saveWideText: self.saveAsWideText(self.dataFileName + '.csv') self.abort() self.autoLog = False
[docs] def abort(self): """Inform the ExperimentHandler that the run was aborted. Experiment handler will attempt automatically to save data (even in the event of a crash if possible). So if you quit your script early you may want to tell the Handler not to save out the data files for this run. This is the method that allows you to do that. """ self.savePickle = False self.saveWideText = False

Back to top