Extra
Usage Examples¶
Basic Entity Operations¶
from bam_masterdata.datamodel.object_types import Sample
from bam_masterdata.datamodel.dataset_types import RawData
from bam_masterdata.datamodel.collection_types import MeasurementsCollection
# Create entities
sample = Sample(code="SAMPLE_001", name="Test Sample")
dataset = RawData(code="DATA_001", name="Raw measurement data")
collection = MeasurementsCollection(code="COLL_001", name="Test Collection")
# Convert to different formats
sample_dict = sample.to_dict()
sample_json = sample.to_json()
openbis_sample = sample.to_openbis()
Excel Integration¶
from bam_masterdata.excel.excel_to_entities import MasterdataExcelExtractor
import openpyxl
# Load Excel file
workbook = openpyxl.load_workbook("masterdata.xlsx")
# Extract entities
extractor = MasterdataExcelExtractor()
entities = extractor.excel_to_entities(workbook)
# Access extracted data
object_types = entities.get("object_types", {})
dataset_types = entities.get("dataset_types", {})
openBIS Operations¶
from bam_masterdata.openbis.login import ologin
from bam_masterdata.openbis.get_entities import OpenbisEntities
# Connect to openBIS
openbis = ologin(url="https://openbis.example.com", username="user", password="pass")
# Retrieve entities
openbis_entities = OpenbisEntities(openbis)
object_dict = openbis_entities.get_object_dict()
dataset_dict = openbis_entities.get_dataset_dict()
Validation¶
from bam_masterdata.checker.masterdata_validator import MasterdataValidator
# Validate entities
validator = MasterdataValidator()
entities_dict = {
"object_types": object_types,
"dataset_types": dataset_types
}
result = validator.validate(entities_dict)
if not result.is_valid:
for error in result.errors:
print(f"Validation error: {error}")
Code Generation¶
from bam_masterdata.cli.fill_masterdata import MasterdataCodeGenerator
# Generate Python code from entity definitions
generator = MasterdataCodeGenerator(
objects=object_types,
datasets=dataset_types,
collections=collection_types,
vocabularies=vocabulary_types
)
# Generate code for different entity types
object_code = generator.generate_object_types()
dataset_code = generator.generate_dataset_types()
vocab_code = generator.generate_vocabulary_types()
Type Definitions¶
Entity Definitions¶
The package uses several base definition classes that define the structure of entities:
EntityDef: Base definition class for all entitiesObjectTypeDef: Defines object type structureDatasetTypeDef: Defines dataset type structureCollectionTypeDef: Defines collection type structureVocabularyTypeDef: Defines vocabulary structure
Property Definitions¶
Properties are defined using:
PropertyTypeDef: Basic property definitionPropertyTypeAssignment: Property assignment to entity typesVocabularyTerm: Individual terms in controlled vocabularies
Data Types¶
The system supports several data types for properties:
VARCHAR: Variable-length character stringsMULTILINE_VARCHAR: Multi-line textINTEGER: Whole numbersREAL: Floating-point numbersBOOLEAN: True/false valuesTIMESTAMP: Date and time valuesCONTROLLEDVOCABULARY: Values from controlled vocabulariesXML: Structured XML data## Configuration The package can be configured through various mechanisms: ### Environment Variables - `BAM_MASTERDATA_CONFIG_PATH`: Path to configuration file - `OPENBIS_URL`: Default openBIS server URL - `OPENBIS_USERNAME`: Default username for openBIS - `LOG_LEVEL`: Logging level (DEBUG, INFO, WARNING, ERROR) ### Configuration File ```yaml # config.yaml openbis: url: "https://openbis.example.com" username: "default_user" timeout: 30 validation: strict_mode: true required_properties: ["name", "description"] logging: level: "INFO" format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
Programmatic Configuration¶
from bam_masterdata.config import Configuration
# Load configuration
config = Configuration.load_from_file("config.yaml")
# Override specific settings
config.set("openbis.url", "https://my-openbis.com")
config.set("validation.strict_mode", False)
# Apply configuration
Configuration.apply(config)
Working with Entity Types¶
How to Create Custom Entity Types¶
from bam_masterdata.metadata.entities import ObjectType
from bam_masterdata.metadata.definitions import ObjectTypeDef, PropertyTypeAssignment
class MyCustomObject(ObjectType):
defs = ObjectTypeDef(
code="MY_CUSTOM_OBJECT",
description="A custom object type for specific needs"
)
custom_property = PropertyTypeAssignment(
code="CUSTOM_PROP",
data_type="VARCHAR",
property_label="Custom Property",
description="A custom property for this object",
mandatory=True,
section="Custom Section"
)
How to Query Available Properties¶
from bam_masterdata.datamodel.object_types import Sample
# Get all available properties for a Sample
sample = Sample()
properties = sample.get_property_metadata()
for prop_code, metadata in properties.items():
print(f"Property: {prop_code}")
print(f" Label: {metadata.property_label}")
print(f" Type: {metadata.data_type}")
print(f" Mandatory: {metadata.mandatory}")
print()
How to Validate Entity Data¶
from bam_masterdata.checker.masterdata_validator import MasterdataValidator
# Create validator
validator = MasterdataValidator()
# Validate your entity data
entities_dict = {"object_types": {"SAMPLE_001": sample.to_dict()}}
validation_result = validator.validate(entities_dict)
if validation_result.is_valid:
print("✓ Validation passed")
else:
print("✗ Validation failed:")
for error in validation_result.errors:
print(f" - {error}")
Data Import and Export¶
How to Import from Excel Files¶
[Image placeholder: Excel spreadsheet template showing the correct structure for masterdata import with headers, entity types, properties, and sample data rows.]
from bam_masterdata.excel.excel_to_entities import MasterdataExcelExtractor
# Initialize extractor
extractor = MasterdataExcelExtractor()
# Load workbook
import openpyxl
workbook = openpyxl.load_workbook("masterdata.xlsx")
# Extract entities
entities = extractor.excel_to_entities(workbook)
# Access extracted data
object_types = entities.get("object_types", {})
dataset_types = entities.get("dataset_types", {})
print(f"Loaded {len(object_types)} object types")
print(f"Loaded {len(dataset_types)} dataset types")
How to Export to Excel¶
from bam_masterdata.cli.entities_to_excel import entities_to_excel
from bam_masterdata.metadata.entities_dict import EntitiesDict
# Prepare your entities data
entities_dict = EntitiesDict({
"object_types": {"SAMPLE_001": sample.to_dict()},
"dataset_types": {},
"collection_types": {},
"vocabulary_types": {}
})
# Export to Excel
entities_to_excel(entities_dict, "output.xlsx")
print("✓ Exported to output.xlsx")
How to Export to JSON¶
import json
# Single entity to JSON
entity_json = sample.to_json()
# Multiple entities to JSON
entities_dict = {
"object_types": {
"SAMPLE_001": sample.to_dict()
}
}
with open("entities.json", "w") as f:
json.dump(entities_dict, f, indent=2)
How to Export to RDF¶
from bam_masterdata.cli.entities_to_rdf import entities_to_rdf, rdf_graph_init
# Initialize RDF graph
graph = rdf_graph_init()
# Add entities to graph
entities_to_rdf(entities_dict, graph)
# Save to file
graph.serialize(destination="entities.ttl", format="turtle")
print("✓ Exported to entities.ttl")
Working with OpenBIS¶
How to Connect to OpenBIS¶
from bam_masterdata.openbis.login import ologin
# Connect to OpenBIS instance
openbis = ologin(
url="https://your-openbis-instance.com",
username="your_username",
password="your_password"
)
print(f"✓ Connected to OpenBIS: {openbis.get_server_information()}")
How to Retrieve Entities from OpenBIS¶
from bam_masterdata.openbis.get_entities import OpenbisEntities
# Initialize entities extractor
entities_extractor = OpenbisEntities(openbis)
# Get all object types
object_types = entities_extractor.get_object_dict()
print(f"Retrieved {len(object_types)} object types from OpenBIS")
# Get specific vocabulary
vocabularies = entities_extractor.get_vocabulary_dict()
storage_formats = vocabularies.get("STORAGE_FORMAT", {})
print(f"Storage format terms: {list(storage_formats.get('terms', {}).keys())}")
How to Push Data to OpenBIS¶
[Image placeholder: Screenshot of OpenBIS interface showing uploaded masterdata with entity browser and property views.]
# Create entities in OpenBIS
for obj_code, obj_data in object_types.items():
obj_type = ObjectType.from_dict(obj_data)
openbis_obj = obj_type.to_openbis()
# Register with OpenBIS
result = openbis.create_object_type(openbis_obj)
print(f"✓ Created object type: {obj_code}")
Command Line Operations¶
How to Use the CLI for Bulk Operations¶
# Export all masterdata to Excel
bam_masterdata export_to_excel masterdata_export.xlsx
# Export specific entity types to JSON
bam_masterdata export_to_json --entity-types object_types dataset_types output.json
# Run consistency checker
bam_masterdata checker --verbose
# Fill masterdata from OpenBIS
# environment variables OPENBIS_USERNAME and OPENBIS_PASSWORD are required for authentication
bam_masterdata fill_masterdata --url https://openbis.example.com
How to Generate Code from Masterdata¶
from bam_masterdata.cli.fill_masterdata import MasterdataCodeGenerator
# Initialize code generator with entities data
generator = MasterdataCodeGenerator(
objects=object_types,
datasets=dataset_types,
collections=collection_types,
vocabularies=vocabulary_types
)
# Generate Python code for dataset types
dataset_code = generator.generate_dataset_types()
# Save to file
with open("generated_dataset_types.py", "w") as f:
f.write(dataset_code)
print("✓ Generated dataset types code")
Advanced Usage¶
How to Create Custom Validators¶
from bam_masterdata.checker.masterdata_validator import MasterdataValidator
class CustomValidator(MasterdataValidator):
def validate_custom_rules(self, entities_dict):
"""Add custom validation rules."""
errors = []
# Example: Check that all samples have required properties
for obj_code, obj_data in entities_dict.get("object_types", {}).items():
if obj_data.get("code", "").startswith("SAMPLE_"):
required_props = ["material_type", "dimensions"]
for prop in required_props:
if prop not in obj_data.get("properties", {}):
errors.append(f"Sample {obj_code} missing required property: {prop}")
return errors
# Use custom validator
validator = CustomValidator()
custom_errors = validator.validate_custom_rules(entities_dict)
How to Handle Large Datasets¶
import multiprocessing
from concurrent.futures import ProcessPoolExecutor
def process_entity_batch(entities_batch):
"""Process a batch of entities."""
results = []
for entity_data in entities_batch:
# Process individual entity
entity = ObjectType.from_dict(entity_data)
results.append(entity.to_dict())
return results
# Process entities in parallel
entities_list = list(object_types.values())
batch_size = 100
batches = [entities_list[i:i+batch_size] for i in range(0, len(entities_list), batch_size)]
with ProcessPoolExecutor() as executor:
results = list(executor.map(process_entity_batch, batches))
print(f"✓ Processed {len(entities_list)} entities in {len(batches)} batches")
How to Extend Entity Relationships¶
# Add relationships between entities
sample = Sample(code="SAMPLE_001")
instrument = Instrument(code="INSTR_001")
# Create relationship
sample.add_relationship("measured_with", instrument)
# Access relationships
relationships = sample.get_relationships()
print(f"Sample relationships: {relationships}")
Troubleshooting Common Issues¶
Memory Issues with Large Files¶
# Use streaming for large Excel files
import pandas as pd
# Read Excel in chunks
chunk_size = 1000
for chunk in pd.read_excel("large_file.xlsx", chunksize=chunk_size):
# Process chunk
entities_batch = extractor.process_chunk(chunk)
# Save incrementally
Performance Optimization¶
# Use caching for repeated operations
from functools import lru_cache
@lru_cache(maxsize=128)
def get_cached_entity(entity_code):
return entity_registry.get(entity_code)
# Batch operations instead of individual calls
entities_to_create = []
for entity_data in batch_data:
entities_to_create.append(ObjectType.from_dict(entity_data))
# Create all at once
result = openbis.create_objects(entities_to_create)