Source code for bripipetools.io.htseqmetrics

"""
Class for reading and parsing Tophat Stats metrics files.
"""
import logging

logger = logging.getLogger(__name__)


[docs]class HtseqMetricsFile(object): """ Parser to read tables of metrics generated by the htseq-count tool, stored in a tab-delimited text file. """ def __init__(self, path): self.path = path self.data = {}
[docs] def _read_file(self): """ Read file into list of raw strings. """ logger.debug("reading file '{}' to raw string list".format(self.path)) with open(self.path) as f: self.data['raw'] = f.readlines()
[docs] def _parse_lines(self): """ Get key-value pairs from text lines and return dictionary. """ # note that htseq metrics are all ints, so cast values as ints self.data['table'] = {l.strip().split('\t')[0].lstrip('__'): int(l.strip().split('\t')[1]) for l in self.data['raw']}
[docs] def parse(self): """ Parse metrics table and return dictionary. """ self._read_file() self._parse_lines() return self.data['table']