Source code for bripipetools.model.documents

"""
Classes representing documents in the GenLIMS database.
"""
import logging
import re
import datetime

from .. import util
from .. import parsing

logger = logging.getLogger(__name__)


[docs]def convert_keys(obj): """ Convert keys in a dictionary (or nested dictionary) from snake_case to camelCase; ignore '_id' keys. :type obj: dict, list :param obj: A dict or list of dicts with string keys to be converted. :rtype: dict, list :return: A dict or list of dicts with string keys converted from snake_case to camelCase. """ if isinstance(obj, list): return [convert_keys(i) for i in obj] elif isinstance(obj, dict): return {(util.to_camel_case(k.lstrip('_')) if not re.search('^_id', k) else k): convert_keys(obj[k]) for k in obj} else: return obj
[docs]class TG3Object(object): """ Generic functions for objects in TG3 collections. :type _id: str :param _id: unique object identifier in the GenLIMS/TG3 Mongo database :type type: str :param type: field indicating object type in a collection :type is_mapped: bool :param is_mapped: flag indicating whether class instance was mapped from a database object (True) or created from scratch (False) """ def __init__(self, _id=None, type=None, is_mapped=False): self._id = _id self.type = type self.date_created = datetime.datetime.now() self.last_updated = self.date_created self.is_mapped = is_mapped
[docs] def update_attrs(self, attr_map, force=False): """ Given a dictionary of key-value pairs for attribute names with new values, update each attribute. Always update empty ('None') attributes and set any new attributes; update all modified attributes if force option is 'True'. :type attr_map: dict :param attr_map: a dict with key-value pairs representing object attributes and values to which they should be set :type force: bool :param force: force overwrite of object fields in database, if they already exist """ updated = False for attr, val in list(attr_map.items()): if hasattr(self, attr): if (getattr(self, attr) is None or (getattr(self, attr) != val and force)): if (attr != 'gene_counts'): logger.debug("setting attribute '{}' as '{}'" .format(attr, val)) setattr(self, attr, val) updated = True else: setattr(self, attr, val) if (attr != 'gene_counts'): logger.debug("setting attribute '{}' as '{}'" .format(attr, val)) updated = True if updated and not self.is_mapped: self.last_updated = datetime.datetime.now() else: logger.debug("no attributes updated")
[docs] def to_json(self): """ Return object attributes as dictionary with keys formatted as camel case. :rtype: dict :return: a dict containing class instance attributes, with all field names converted from snake case to camel case """ return convert_keys(self.__dict__)
[docs]class GenericSample(TG3Object): """ GenLIMS object in the 'samples' collection :type project_id: int :param project_id: Genomics Core project number :type: subproject_id: int :param subproject_id: Genomics Core sub-project number :type protocol_id: str :param protocol_id: unique ID of a protocol object in the GenLIMS database (in the 'protocols' collection) :type parent_id: str :param parent_id: unique ID of a sample object in the GenLIMS database from which the current sample was derived (in the 'samples' collection) """ def __init__(self, project_id=None, subproject_id=None, protocol_id=None, parent_id=None, **kwargs): self.project_id = project_id self.subproject_id = subproject_id self.protocol_id = protocol_id self.parent_id = parent_id super(GenericSample, self).__init__(**kwargs)
[docs]class Library(GenericSample): """ GenLIMS object in 'samples' collection of type 'library' """ def __init__(self, **kwargs): sample_type = 'library' super(Library, self).__init__(type=sample_type, **kwargs)
[docs]class SequencedLibrary(GenericSample): """ GenLIMS object in 'samples' collection of type 'sequenced library' :type run_id: str :param run_id: unique ID of a run object in the GenLIMS database """ def __init__(self, run_id=None, **kwargs): sample_type = 'sequenced library' self.run_id = run_id self._raw_data = [] super(SequencedLibrary, self).__init__(type=sample_type, **kwargs) @property def raw_data(self): """ Return list of dictionaries with information about each raw data file (i.e., FASTQ) for a sequenced library. """ return self._raw_data @raw_data.setter def raw_data(self, value): """ Set raw data. """ self._raw_data = value
[docs]class ProcessedLibrary(GenericSample): """ GenLIMS object in 'samples' collection of type 'processed library' """ def __init__(self, **kwargs): sample_type = 'processed library' self._processed_data = [] super(ProcessedLibrary, self).__init__(type=sample_type, **kwargs) @property def processed_data(self): """ Return list of dictionaries with information about each set of data processing outputs (i.e., from workflow batches). """ return self._processed_data @processed_data.setter def processed_data(self, value): """ Set processed data. """ self._processed_data = value
[docs]class GeneCounts(TG3Object): """ Research Database object in 'counts' collection of type 'gene counts' """ def __init__(self, **kwargs): sample_type = 'gene counts' self._gene_counts = [] super(GeneCounts, self).__init__(type=sample_type, **kwargs) @property def gene_counts(self): """ Return list of dictionaries with information about each library's genecounts. """ return self._gene_counts @gene_counts.setter def gene_counts(self, value): """ Set processed data. """ self._gene_counts = value
[docs]class Metrics(TG3Object): """ Research Database object in 'metrics' collection of type 'metrics' """ def __init__(self, **kwargs): sample_type = 'metrics' self._htseq = [] # one word metric we can used as an idicator super(Metrics, self).__init__(type=sample_type, **kwargs) @property def metrics(self): """ Return list of dictionaries with information about each library's metrics. """ return self._htseq @metrics.setter def metrics(self, value): """ Set metrics data. """ self._htseq = value
[docs]class GenericRun(TG3Object): """ GenLIMS object in the 'runs' collection :type protocol_id: str :param protocol_id: unique ID of a protocol object in the GenLIMS database (in the 'protocols' collection) :type date: str :param date: string indicating date of the run in ISO 8601 format """ def __init__(self, protocol_id=None, date=None, **kwargs): self.protocol_id = protocol_id self.date = date super(GenericRun, self).__init__(**kwargs)
[docs]class FlowcellRun(GenericRun): """ GenLIMS object in the 'runs' collection of type 'flowcell'. """ _flowcell_path = None def __init__(self, **kwargs): run_type = 'flowcell' run_id = kwargs.get('_id') run_items = parsing.parse_flowcell_run_id(run_id) self.instrument_id = run_items['instrument_id'] self.run_number = run_items['run_number'] self.flowcell_id = run_items['flowcell_id'] self.flowcell_position = run_items['flowcell_position'] super(FlowcellRun, self).__init__(type=run_type, date=run_items['date'], **kwargs) @property def flowcell_path(self): """ Return root-agnostic path to flowcell data folder. """ return self._flowcell_path @flowcell_path.setter def flowcell_path(self, value): """ Set flowcell path. """ self._flowcell_path = value
[docs]class GenericWorkflow(TG3Object): """ GenLIMS object in the 'workflows' collection """ def __init__(self, **kwargs): super(GenericWorkflow, self).__init__(**kwargs)
[docs]class GlobusGalaxyWorkflow(TG3Object): """ GenLIMS object in 'workflows' collection of type 'Globus Galaxy workflow' """ def __init__(self, **kwargs): workflow_type = 'Globus Galaxy workflow' super(GlobusGalaxyWorkflow, self).__init__(type=workflow_type, **kwargs)
[docs]class GenericWorkflowBatch(TG3Object): """ GenLIMS object in the 'workflow batches' collection """ def __init__(self, **kwargs): super(GenericWorkflowBatch, self).__init__(**kwargs)
[docs]class GalaxyWorkflowBatch(TG3Object): """ GenLIMS object in 'workflow batches' collection of type 'Galaxy workflow' :type workflowbatch_file: str :param workflowbatch_file: path to file describing samples and parameters of Globus Galaxy workflow batch """ def __init__(self, workflowbatch_file=None, **kwargs): workflow_batch_type = 'Galaxy workflow batch' self.workflowbatch_file = workflowbatch_file super(GalaxyWorkflowBatch, self).__init__(type=workflow_batch_type, **kwargs)