"""
Classify / provide details for sequenced libraries (outputs of a
flowcell sequencing run) and the associated raw data.
"""
import logging
import os
import re
from .. import parsing
from .. import database
from .. import model as docs
logger = logging.getLogger(__name__)
[docs]class SequencedLibraryAnnotator(object):
"""
Identifies, stores, and updates information about a sequenced library.
"""
def __init__(self, path, library, project, run_id, db):
logger.debug("creating `SequencedLibraryAnnotator` instance "
"for library '{}'".format(library))
self.path = path
self.db = db
self.library_id = parsing.get_library_id(library)
self.project_label = parsing.get_project_label(project)
self.run_id = run_id
self.run_items = parsing.parse_flowcell_run_id(run_id)
self.seqlib_id = '{}_{}'.format(self.library_id,
self.run_items['flowcell_id'])
self.sequencedlibrary = self._init_sequencedlibrary()
[docs] def _init_sequencedlibrary(self):
"""
Try to retrieve data for the sequenced library from GenLIMS;
if unsuccessful, create new ``SequencedLibrary`` object.
"""
logger.debug("initializing `SequencedLibrary` instance")
try:
logger.debug("getting `SequencedLibrary` from GenLIMS")
return database.map_to_object(
database.get_genomicsSamples(self.db, {'_id': self.seqlib_id})[0])
except IndexError:
logger.debug("creating new `SequencedLibrary` object")
return docs.SequencedLibrary(_id=self.seqlib_id)
[docs] def _get_raw_data(self):
"""
Locate and store details about raw data for sequenced library.
"""
logger.debug("collecting raw data details for library '{}'"
.format(self.library_id))
return [parsing.parse_fastq_filename(os.path.join(self.path, f))
for f in os.listdir(self.path)
if not re.search('empty', f)]
[docs] def _update_sequencedlibrary(self):
"""
Add any missing fields to SequencedLibrary object.
"""
logger.debug("updating `SequencedLibrary` object attributes")
project_items = parsing.parse_project_label(self.project_label)
update_fields = {'project_id': project_items['project_id'],
'subproject_id': project_items['subproject_id'],
'run_id': self.run_id,
'parent_id': self.library_id,
'raw_data': self._get_raw_data()}
self.sequencedlibrary.is_mapped = False
self.sequencedlibrary.update_attrs(update_fields, force=True)
[docs] def get_sequenced_library(self):
"""
Return sequenced library object with updated fields.
"""
self._update_sequencedlibrary()
logger.debug("returning sequenced library object info: {}".format(
self.sequencedlibrary.__dict__)
)
return self.sequencedlibrary