Source code for dachs.readers

#!/usr/bin/env python
# coding: utf-8

"""
Contains readers for loading and interpreting the excel files of Glen and the log files of RoWaN
"""

__author__ = "Brian R. Pauw"
__contact__ = "brian@stack.nl"
__license__ = "GPLv3+"
__date__ = "2022/12/12"
__status__ = "beta"

# import numpy as np


import logging
from pathlib import Path
from typing import List, Optional, Union

import chempy
import pandas as pd

from dachs import ureg
from dachs.equipment import PV, Equipment
from dachs.helpers import whitespaceCleanup
from dachs.metaclasses import ExperimentalSetupClass
from dachs.reagent import Chemical, Reagent
from dachs.synthesis import RawLogMessage, synthesisStep

# from pandas import Timestamp



[docs]
def readExperimentalSetup(filename: Path, SetupName: str = "AMSET_6") -> ExperimentalSetupClass:
    #     filename = Path("tests", "testData", "AutoMOFs_Logbook_Testing.xlsx")
    # SetupName='AMSET_6'

    assert filename.exists()

    # read equipment list:
    eq = pd.read_excel(filename, sheet_name="Equipment", index_col=None, header=0)
    eq = eq.dropna(how="all")
    eqDict = {}
    for rowi, equip in eq.iterrows():
        if pd.isnull(equip["Equipment ID"]):
            continue  # skip incomplete equipment, PVs are read after each eqp
        try:
            eqItem = Equipment(
                ID=str(equip["Equipment ID"]),
                EquipmentID=str(equip["Equipment ID"]),
                EquipmentName=str(equip["Equipment Name"]),
                Manufacturer=str(equip["Manufacturer"]),
                ModelName=str(equip["Model Name"]),
                ModelNumber=str(equip["Model Number"]),
                PriceDate=str(equip["PriceDate"]) if equip.get("PriceDate") else None,  # might not exist, optional
                UnitPrice=ureg.Quantity(str(equip["Unit Price"]) + " " + str(equip["Price Unit"])),
                UnitSize=ureg.Quantity(str(equip["Unit Size"]) + " " + str(equip["Unit"])),
                Description=equip["Description"],
                PVs={},
            )
            # look for PVs in the following rows
            pvi = 1
            while not pd.isnull(eq.iloc[rowi + pvi]["PV ID"]):
                pvRec = eq.iloc[rowi + pvi]
                pv = PV(
                    ID=pvRec["PV ID"],
                    PVName=pvRec["PV Name"],
                    Description=pvRec["PV Description"],
                    CalibrationFactor=pvRec.get("Calibration Factor"),
                    CalibrationOffset=pvRec["Calibration Offset"],
                )
                eqItem.PVs[pv.ID] = pv
                pvi += 1
            if not pd.isnull(eqItem.ID):
                eqDict.update({str(equip["Equipment ID"]): eqItem})
        except Exception as e:
            import traceback

            traceback.print_exception(e)
            print(f'Failure reading {equip["Equipment ID"]=}\n {str(e)}')

    # read setup configuration:
    df = pd.read_excel(filename, sheet_name="Setup", index_col=None, header=0)
    df = df.dropna(how="all")  # "If all values are NA, drop that row or column." - right?
    dfRow = df.loc[df.SetupID == SetupName].copy()
    assert len(dfRow == 1), f"More or less than one entry found for {SetupName=} in {filename=}"
    # get all equipment for the setup
    itemList = [dfRow[i].item() for i in dfRow.keys() if "ID_" in i]
    eqList = [eqDict[item] for item in itemList if item in eqDict.keys()]
    expSetup = ExperimentalSetupClass(
        ID="ExperimentalSetup",  # this gets used to name the thing in the HDF5 structure,
        # but I want the original name dfRow.SetupID.item()
        ExperimentalSetupID=dfRow.SetupID.item(),
        SetupName=dfRow.Name.item(),
        Description=whitespaceCleanup(dfRow.Description.item()),
        EquipmentList=eqList,
    )
    return expSetup




[docs]
def readRawMessageLog(filename: Path) -> List:
    assert filename.exists()
    df = pd.read_excel(filename, sheet_name="Sheet1", index_col=None, header=0, parse_dates=["Time"])
    df = df.dropna(how="all")
    df.sort_values(by="Time", ignore_index=True, inplace=True)
    msgList = []
    for idx, row in df.iterrows():
        msgList += [
            RawLogMessage(
                Index=idx,
                TimeStamp=pd.to_datetime(
                    row["Time"], utc=True
                ),  # .map(lambda x: x.tz_convert('Asia/Kolkata')), # unit='s',
                MessageLevel=row["Info"],
                ExperimentID=row["ExperimentID"],
                SampleID=row["SampleNumber"],
                Message=row["Readout"],
                Unit=row["Unit"],
                Value=row["Value"],
                # Quantity=Q,
                Using=row.get("Using"),  # might not exist
            )
        ]
    return msgList




[docs]
def ReadStartingCompounds(filename) -> List:
    assert filename.exists()
    df = pd.read_excel(
        filename,
        sheet_name="Chemicals",
        index_col=None,
        header=0,
        # parse_dates=["Open Date"],
        # dayfirst=True,
        # date_format="mixed",
        # infer_datetime_format=True,
    )
    df = df.dropna(how="all")
    # do dates:
    df.loc[:, "Open Date"] = df.loc[:, "Open Date"].apply(
        lambda x: pd.to_datetime(x, dayfirst=True, format="mixed", utc=True, errors="coerce")
    )
    # Turn the specified chemicals into a list of starting compounds
    cList = []
    for idx, row in df.iterrows():
        # print(f"{idx=}, {row=}")
        s = chempy.Substance.from_formula(row["Formula"])
        cList += [
            Reagent(
                ID=str(row["Reagent ID"]),
                Chemical=Chemical(
                    ChemicalID=row["Reagent ID"],
                    ChemicalName=row["Name"],
                    ChemicalFormula=row["Formula"],
                    Substance=s,
                    MolarMass=ureg.Quantity(str(s.molar_mass())).to(
                        "g/mol"
                    ),  # assert_unit(row["Molar Mass"], "g/mol"),
                    Density=ureg.Quantity(str(row["Density"]) + " g/cm^3"),
                ),
                CASNumber=row["CAS-Number"],
                Brand=row["Brand"],
                UNNumber=row["UN-Number"],
                MinimumPurity=assert_unit(row["Purity"], "percent"),
                OpenDate=row["Open Date"],
                StorageConditions=row["Storage Conditions"],
                UnitPrice=assert_unit(row["Unit Price"], "euro"),
                UnitSize=assert_unit(row["Unit Size"], row["Unit"]),
            )
        ]
    return cList




[docs]
def assert_unit(value, default_unit: str) -> str:
    """
    adds a default unit string for interpretation by pint
    if the value is not in string format yet
    (and therefore does not yet have a unit)
    """
    # print(f"{value=}, {default_unit=}")
    if not isinstance(value, str):
        return str(value) + " " + str(default_unit)
    else:
        return value




[docs]
def find_trigger_in_log(logEntry: synthesisStep, triggerList=["Mass"]) -> bool:
    """
    Interprets a synthesis step. If a word in the triggerList is found,
    it returns True, otherwise False
    """
    triggerFound = False
    for trigger in triggerList:
        if trigger in logEntry.RawMessage:
            triggerFound = True
    return triggerFound




[docs]
def find_reagent_in_rawmessage(searchString: str, ReagentList: List[Reagent]) -> Optional[Reagent]:
    """
    Returns (the first match of) a given Reagent if its ID is found in an input string,
    otherwise returns None
    """
    for reag in ReagentList:
        if reag.ID in searchString:
            return reag
    return None




[docs]
def find_in_log(
    log: List[RawLogMessage],
    searchString: Union[str, list],
    excludeString: Union[str, list] = None,
    Highlander: bool = True,  # there can be only one if Highlander is True
    Which: str = "first",  # if highlander, specify if first or last
    raiseWarning: bool = True,  # raises a logging.warning if it cannot be found
) -> Union[RawLogMessage, list[RawLogMessage], None]:  # Optional[Union[RawLogMessage, list[RawLogMessage]]]:
    """
    Returns (the first match of) a given Reagent if its ID is found in an input string,
    otherwise returns None
    """
    answers = []
    if isinstance(searchString, str):
        searchString = [searchString]
    if excludeString is None:
        excludeString = []
    if isinstance(excludeString, str):
        excludeString = [excludeString]
    for RLM in log:
        if all(i.lower() in RLM.Message.lower() for i in searchString) and not any(
            j.lower() in RLM.Message.lower() for j in excludeString
        ):
            if Highlander:
                answers = RLM
                if Which.lower() == "first":
                    return RLM
            else:
                answers += [RLM]
    if answers == []:
        if raiseWarning:
            logging.warning(f"A message with {searchString=} and {excludeString=} was not found in the raw log.")
        return None
    return answers