Source code for bripipetools.io.tophatstats

"""
Class for reading and parsing Tophat Stats metrics files.
"""
import logging

logger = logging.getLogger(__name__)


[docs]class TophatStatsFile(object): """ Parser to read tables of metrics generated by custom Tophat Stats PE tool, stored in a tab-delimited text file. """ def __init__(self, path): self.path = path self.data = {}
[docs] def _read_file(self): """ Read file into list of raw strings. """ logger.debug("reading file '{}' to raw string list".format(self.path)) with open(self.path) as f: self.data['raw'] = f.readlines()
[docs] def _parse_lines(self): """ Get key-value pairs from text lines and return dictionary. """ metric_keys = { 'total reads in fastq file': 'fastq_total_reads', 'reads aligned in sam file': 'reads_aligned_sam', 'aligned': 'aligned', 'reads with multiple alignments': 'reads_with_mult_align', 'of aligned segments had multiple alignments': 'algn_seg_with_mult_algn' } logger.debug("{}".format(self.data['raw'])) self.data['table'] = {metric_keys[l.strip().split('\t')[1]]: float(l.strip().translate({ord('%'):None}).split('\t')[0]) for l in self.data['raw']}
[docs] def parse(self): """ Parse metrics table and return dictionary. """ self._read_file() self._parse_lines() return self.data['table']