"""
Class for reading and parsing Tophat Stats metrics files.
"""
import logging
logger = logging.getLogger(__name__)
[docs]class HtseqMetricsFile(object):
"""
Parser to read tables of metrics generated by the htseq-count tool,
stored in a tab-delimited text file.
"""
def __init__(self, path):
self.path = path
self.data = {}
[docs] def _read_file(self):
"""
Read file into list of raw strings.
"""
logger.debug("reading file '{}' to raw string list".format(self.path))
with open(self.path) as f:
self.data['raw'] = f.readlines()
[docs] def _parse_lines(self):
"""
Get key-value pairs from text lines and return dictionary.
"""
# note that htseq metrics are all ints, so cast values as ints
self.data['table'] = {l.strip().split('\t')[0].lstrip('__'):
int(l.strip().split('\t')[1])
for l in self.data['raw']}
[docs] def parse(self):
"""
Parse metrics table and return dictionary.
"""
self._read_file()
self._parse_lines()
return self.data['table']