Source code for jupyter_analysis_tools.datastore

# -*- coding: utf-8 -*-
# datastore.py

import filecmp
import getpass
import tempfile
import warnings
from pathlib import Path

from pybis import Openbis


[docs] class DataStore: url = None _availObj = None _userspace = None
[docs] def __init__(self, url, username=None): self.url = url self.username = username if self.username is None: self.username = getpass.getuser() print(f"Working as user '{self.username}'.") # to generate PAT you need to login normally self.ds = Openbis(url=self.url, verify_certificates=True) # arg. *save_token* saves the openBIS token to ~/.pybis permanently self.ds.login( self.username, getpass.getpass(prompt=f"Password for {self.username}: "), save_token=False, ) # create the PAT with the given name, don't store it self.ds.get_or_create_personal_access_token("test-session")
@property def userspace(self): uspace = self._userspace if uspace is None: allspaces = self.ds.get_spaces() uspace = allspaces.df[ allspaces.df.code.str.endswith(self.username.upper()) ].code.values[0] self._userspace = uspace return uspace @userspace.setter def userspace(self, name): name = name.upper() if name in self.ds.get_spaces().df.code.values: self._userspace = name @staticmethod def identifier(objects, code): return objects[objects.code == code].identifier.tolist()[0]
[docs] def createProject(self, projectName, space, spacePrefix=None): """Finds the requested project in the DataStore. Matching project names can be limited to a given *spacePrefix*. If the project is not found, a new project with the given code in the given space is created.""" # get available projects, accessible by the current user projectsAvail = self.ds.get_projects() if spacePrefix: projectsAvail = [prj for prj in projectsAvail if f"/{spacePrefix}_" in prj.identifier] projects = [prj for prj in projectsAvail if prj.code == projectName] assert len(projects) <= 1, f"Multiple projects found for '{projectName}'" dsProject = None if len(projects): # get the existing object dsProject = projects[0] else: # create it, if not found print(f"Creating project '{projectName}'") dsProject = self.ds.new_project(code=projectName, space=space) dsProject.save() assert dsProject return dsProject
def createCollection(self, collName, projectObj, defaultObjType=None): collections = self.ds.get_collections(project=projectObj) dsColl = [coll for coll in collections if coll.code == collName.upper()] if len(dsColl): dsColl = dsColl[0] else: # create it, if not found print(f"Creating collection '{collName}'") dsColl = self.ds.new_collection( code=collName, type="COLLECTION", project=projectObj, props={"$name": collName} ) dsColl.save() assert dsColl # update properties (name, default view and object type) if not set) props = dsColl.props.all() # props as dict propKey = "$name" if propKey in props and props[propKey] is None: props[propKey] = collName propKey = "$default_collection_view" if propKey in props.keys() and props[propKey] is None: propVal = [ item for item in self.ds.get_vocabulary(propKey + "s").get_terms().df.code if "list" in item.lower() ] assert len(propVal) props[propKey] = propVal[0] if defaultObjType: propKey = "$default_object_type" if propKey in props.keys() and props[propKey] is None: props[propKey] = defaultObjType # print(f"Setting '{collName}' properties:\n {props}") dsColl.set_props(props) dsColl.save() return dsColl def createObject( self, projectName: str, collectionName: str = None, space: str = None, spacePrefix: str = None, objType: str = None, props: dict = None, ): dsProject = self.createProject(projectName, space, spacePrefix=spacePrefix) dsColl = None if collectionName is None: # collectionName is required return None dsColl = self.createCollection(collectionName, dsProject, defaultObjType=objType) obj = self.ds.get_objects(type=objType, where={"$name": props["$name"]}).objects if len(obj): obj = obj[0] prefix = objType msg = "'{}' exists already in {}! Updating ...".format( obj.props["$name"], obj.project.identifier ) warnings.warn_explicit(msg, UserWarning, prefix, 0) else: # does not exist yet objName = f" '{props['$name']}'" if len(props.get("$name", "")) else "" print(f"Creating new {objType}{objName} in {dsColl.identifier}") obj = self.ds.new_object(type=objType, props=props, collection=dsColl) obj.set_props(props) return obj def findObjects(self, *args, **kwargs): return self.ds.get_objects(**kwargs) def uploadDataset(self, obj, datasetType, fpaths=[]): def _checkFile(localPath, remoteFiles): remoteFile = [f for f in remoteFiles if f.name == localPath.name] if not len(remoteFile): # file exists in the dataset as well return False return filecmp.cmp(localPath, remoteFile[0], shallow=False) if not len(fpaths): return # nothing to do for dataset in obj.get_datasets(type=datasetType): with tempfile.TemporaryDirectory() as tempdir: dataset.download(destination=tempdir) dsFiles = [f for f in Path(tempdir).rglob("*") if f.is_file()] if len(fpaths) == len(dsFiles): if all([_checkFile(fpath, dsFiles) for fpath in fpaths]): print( f"All local files of {datasetType} match files in dataset, " "not updating." ) continue # skip deletion below print(f"Dataset {datasetType} needs update, deleting existing dataset:") dataset.delete("Needs update") if not len(obj.get_datasets(type=datasetType)): # didn't exist yet or all deleted dataset = self.ds.new_dataset( type=datasetType, collection=obj.collection, object=obj, files=fpaths ) dataset.save()