[docs]defgetWorkDir(workDir=None,skip=False):"""Find a local work dir for temporary files, created during analysis. The default is *$HOME/data*."""ifskip:# stay in the current directory if desiredreturnos.path.abspath(".")ifnotworkDirornotlen(workDir):workDir=Path.home()/"data"else:workDir=Path(workDir).resolve()ifnotworkDir.is_dir():os.mkdir(workDir)print("Using '{}' as working directory.".format(workDir))returnworkDir
[docs]defprepareWorkDir(workDir,srcDir,useExisting=False):"""Create a temporary working directory and copy the input data (series) to it if not already present."""# source dir has to existifnotos.path.isdir(srcDir):raiseRuntimeError("Provided source directory '{}' not found!".format(srcDir))srcDir=os.path.realpath(srcDir)# no separate work dir requested?ifos.path.samefile(workDir,os.getcwd()):print("Working in current directory '{}'.".format(os.getcwd()))returnsrcDir# nothing to doprefix=os.path.basename(srcDir)+"_"ifuseExisting:# use an existing work dir, avoid copyingdirs=glob.glob(os.path.join(workDir,prefix+"*"))iflen(dirs):returndirs[0]# use the first matchprint("No existing work dir found, creating a new one.")# copy all data from src dir to a newly created work dirworkDir=tempfile.mkdtemp(dir=workDir,prefix=prefix)print("Copying data to {}:".format(workDir))fordninos.listdir(srcDir):srcPath=os.path.join(srcDir,dn)dstPath=os.path.join(workDir,dn)ifos.path.isdir(srcPath):shutil.copytree(srcPath,dstPath)print(indent,dn)ifos.path.isfile(srcPath):shutil.copy(srcPath,dstPath)print(indent,dn)print("Done preparing work dir.")returnworkDir
[docs]defgetDataDirs(dataDir,noWorkDir=False,reuseWorkDir=True,workDir=None):"""Create a local work dir with a copy of the input data and for storing the results. (Data might reside in synced folders which creates massive traffic once batch processing results get replaced repeately.) Parameters ---------- noWorkDir: bool False: Copy input data to a new working dir (default), True: otherwise, use data where it is. reuseWorkDir: bool False: Create a new working dir each time, True: reuse the work dir if it exists already (default). Returns ------- A list of absolute directory paths. """basedir=getWorkDir(workDir=workDir,skip=noWorkDir)workDir=prepareWorkDir(basedir,dataDir,useExisting=reuseWorkDir)print("Entering '{}':".format(workDir))dirs=sorted([dnfordninPath(workDir).iterdir()ifdn.is_dir()])dirs.append(Path(workDir))# [print(os.path.join(*dn.parts[-2:])) for dn in dirs]printFileList(dirs,numParts=1)returndirs
[docs]defgetDataFiles(dataDirs,include=None,exclude=None):"""Return absolute file paths from given directories."""defgetFiles(dn,include=None):ifnotinclude:include="*"ifnotisList(include):include=(include,)return[pathforincinincludeforpathinglob.glob(os.path.join(dn,inc))]ifnotexclude:exclude=()ifnotisList(exclude):exclude=(exclude,)ifnotisList(dataDirs):dataDirs=(dataDirs,)files=[fnfordnindataDirsforfningetFiles(dn,include)ifnotany([(exinfn)forexinexclude])]print("{} files to be analyzed in subdirectories.".format(len(files)))returnsorted(files)