Source code for pygacity.generate.answerset

# Author: Cameron F. Abrams, <cfa22@drexel.edu>
"""
A class for handling answer sets in pygacity
"""

import yaml
import os
from collections import UserList, UserDict
import pandas as pd
import logging

logger = logging.getLogger(__name__)

[docs] class AnswerSet(UserDict): _keys = ['label', 'value', 'units', 'formatter'] def __init__(self, data: dict = {}, serial: int = 0, serialstr: str = None): self.serial = serial self.serialstr = serialstr if serialstr is not None else str(serial) self.dumpname = f'answers-{serial:08d}.yaml' self.first_index = None super().__init__(data) def __len__(self): return len(self.data)
[docs] @classmethod def from_yaml(cls, filename: str, delete: bool = False): """ Create an AnswerSet instance by loading from a YAML file of the same format as that generated by the to_yaml() method. Parameters ---------- filename : str the YAML filename to load delete : bool, optional whether to delete the YAML file after loading (default is False) Returns ------- AnswerSet An AnswerSet instance populated with data from the YAML file. """ root, ext = os.path.splitext(filename) assert ext in ['.yaml', '.yml'], f'{filename} does not end in .yaml or .yml' tokens = root.split('-') assert len(tokens) == 2, f'{filename} should be of the format "answers-<serial#>.yaml"' serial = int(tokens[1]) R = cls(serial=serial) with open(filename, 'r') as f: R.data = yaml.safe_load(f) if delete: os.remove(filename) return R
[docs] def register(self, index: any, label: str = None, value: any = None, units: str = None, formatter: str = None, group: int = None): """ Register an answer entry for a particular question index. Parameters ---------- index : any the question index label : str, optional the label for the answer entry value : any, optional the value of the answer entry units : str, optional the units of the answer entry formatter : str, optional a format string for displaying the value group : int, optional a group identifier for the answer entry """ if not self.first_index: self.first_index = index if not index in self.data: self.data[index] = [] # if value is a pint.Quantity, extract magnitude and units if hasattr(value, 'magnitude') and hasattr(value, 'units'): if units is None: units = f'{value.units:~P}' value = value.magnitude # if value is a numpy data type, convert to native python type if hasattr(value, 'item'): value = value.item() self.data[index].append(dict( label=label, value=value, units=units, formatter=formatter, group=group)) logger.debug(f'AnswerSet.register index={index} label={label} value={value} units={units} formatter={formatter} group={group}')
[docs] def display(self, index: any, element: int = 0): """ Returns a formatted string for a particular answer entry. Parameters ---------- index : any the question index element : int, optional the element number within the index (default is 0) Returns ------- str formatted string for the answer entry """ D = None if element < len(self.data[index]): D = self.data[index][element] if D: fmt = D.get('formatter',None) val = D.get('value',None) label = D.get('label',None) units = D.get('units',None) vstr = '' if val: if fmt: vstr = fmt.format(val) else: vstr = str(val) if units: vstr += f' {units}' if label: if vstr: return f'{label} = {vstr}' else: return label return ''
[docs] def to_yaml(self): """ Dumps the AnswerSet to a YAML file. """ raw_indices = list(self.data.keys()) common_prefix = os.path.commonprefix([str(x) for x in raw_indices]) logger.debug(f'AnswerSet.to_yaml common prefix: "{common_prefix}"') if common_prefix: new_D = {} for index, AL in self.data.items(): new_index = str(index)[len(common_prefix):] new_D[new_index] = AL self.data = new_D with open(self.dumpname, 'w', encoding='utf-8') as f: yaml.safe_dump(self.data, f)
[docs] class AnswerSuperSet(UserList[AnswerSet]): """ A collection of AnswerSet instances with methods to convert to pandas DataFrames and LaTeX tables. """ def __init__(self, initial: list[AnswerSet] = []): super().__init__(initial) if not self._check_congruency(): print(f'Error: There is a lack of congruency among answer sets') self._make_dfs()
[docs] @classmethod def from_dumpfiles(cls, files: list[str] = [], delete: bool = False): """ Create an **AnswerSuperSet** instance by loading multiple AnswerSet instances from YAML files. Parameters ---------- files : list[str] list of YAML filenames to load delete : bool, optional whether to delete the YAML files after loading (default is False) Returns ------- AnswerSuperSet An AnswerSuperSet instance populated with AnswerSet instances from the YAML files. """ data = [] for f in files: data.append(AnswerSet.from_yaml(f, delete=delete)) return cls(initial=data)
[docs] def to_latex(self): """ Converts the **AnswerSuperSet** to a LaTeX formatted string. Returns ------- str LaTeX formatted string representing the **AnswerSuperSet** """ result = '' for group_name, group_data in self.groups.items(): df = group_data['df'] formatters = group_data.get('formatters', None) logger.debug(f'AnswerSuperSet.to_latex group "{group_name}" with formatters: {formatters}') result += df.to_latex(formatters=formatters, index=False, longtable=True)#,header=self.headings) return result
def _check_congruency(self): """ Checks that all **AnswerSet** instances in the collection have the same indices Returns ------- bool True if all **AnswerSet** instances have the same indices, False otherwise """ if len(self.data) > 0: indices = list(self.data[0].data.keys()) for l in self.data[1:]: test_indices = list(l.data.keys()) check = all([x==y for x,y in zip(indices,test_indices)]) if not check: return False for i in indices: ilen = len(self.data[0].data[i]) for l in self.data[1:]: test_ilen = len(l.data[i]) check = ilen == test_ilen if not check: return False return True def _make_dfs(self): """ Constructs pandas DataFrames for the answer data in the collection. """ # sort by integer serial so row order is always numerically ascending self.data.sort(key=lambda x: x.serial) serials = [x.serial for x in self.data] serialstrs = [x.serialstr for x in self.data] values = {'serials': serialstrs} pattern = self.data[0] # keys in first AnswerSet form the pattern all sets follow self.formatters = {} self.groups = {} # keys in D may be prepended with a common prefix; remove it common_prefix = os.path.commonprefix([str(x) for x in pattern.data.keys()]) logger.debug(f'Overall common prefix: "{common_prefix}"') for dataset in self.data: new_dataset_D = {} for index in dataset.data.keys(): new_index = str(index)[len(common_prefix):] new_dataset_D[new_index] = dataset.data[index] dataset.data = new_dataset_D for index, AL in pattern.data.items(): index_pref = f'{index}-' if len(serials) == 1: index_pref = '' for a in AL: key = f'{index_pref}{a["label"]}' if 'units' in a and a['units']: key += f' ({a["units"]})' group = a.get('group', None) if group: if group not in self.groups: self.groups[group] = dict(formatters={}, df=None, values={'serials': serialstrs}) self.groups[group]['values'][key] = [] if 'formatter' in a: self.groups[group]['formatters'][key] = a['formatter'] else: values[key] = [] if 'formatter' in a: self.formatters[key] = a['formatter'] for inst in self.data: for index, AL in inst.data.items(): index_pref = f'{index}-' if len(serials) == 1: index_pref = '' for a in AL: key = f'{index_pref}{a["label"]}' if 'units' in a and a['units']: key += f' ({a["units"]})' group = a.get('group', None) if group: self.groups[group]['values'][key].append(a['value']) else: values[key].append(a['value']) if not self.groups: DF = pd.DataFrame(values) self.groups['base'] = dict(formatters=self.formatters, df=DF) else: for gname, gdata in self.groups.items(): logger.debug(f'Building DataFrame for group {gname} with values: {gdata["values"]}') DF = pd.DataFrame(gdata['values']) self.groups[gname]['df'] = DF
# print(self.DF)