Source code for emva1288.process.parser

# -*- coding: utf-8 -*-
# Copyright (c) 2014 The EMVA1288 Authors. All rights reserved.
# Use of this source code is governed by a GNU GENERAL PUBLIC LICENSE that can
# be found in the LICENSE file.

"""EMVA 1288 descriptor parser
This class takes an EMVA1288 descriptor file and loads its content into a
python dictionary.

An EMVA1288 descriptor file is a file that contains the description of an
EMVA1288 test including exposure times, photon count and corresponding images
"""

import numpy as np
import os
import pprint
import logging


[docs]class ParseEmvaDescriptorFile(object): """Take an image descriptor file and transform it into an usable directory """
[docs] def __init__(self, filename, path=None, loglevel=logging.INFO): """Parser init method. Uses a :class:`python:logging.Logger` object to print infos of the parcing process. This method :meth:`loads <_load_file>` the file and :meth:`gets <_fill_info>` the information from it. Parameters ---------- filename : str The descriptor file's name or the complete path to it. path : str, optional The path to the descriptor file. loglevel : int, optional The logger level. """ # The items are in the form of # exposure:{photons:[fname1, fname2,...]}, photons....} # for dark, the number of photons is 0.0 # If no path is given, the filename path will be used to fill # the images dict self._path = path self.format = {} # bits, witdth, height self.version = None self.images = {'temporal': {}, 'spatial': {}} logging.basicConfig() self.log = logging.getLogger('Parser') self.log.setLevel(loglevel) self._load_file(filename) self._fill_info() self.log.debug(pprint.pformat(self.images))
def _get_images_filenames(self): """ From the current line in self._lines array get all the consecutive "i filename" if less than 2 consecutive, raise an error """ fnames = [] while self._lines: line = self._lines.pop() l = self._split_line(line) if l[0] != 'i': # Ups, to the end of images, reappend last line that is not an # image line self._lines.append(line) break if len(l) != 2: # pragma: no cover raise SyntaxError('Wrong format: "%s" should be "i filename"' % line) break # append image path to fnames npath = os.path.normpath(l[1]) path = os.path.join(self._path, *npath.split('\\')) fnames.append(path) if len(fnames) < 2: # pragma: no cover raise SyntaxError('Each image series, has to ' 'have at least two images') return fnames def _get_kind(self, fnames): """ Guess what kind of data based on the number of images Temporal = 2 images for each measurement point Spatial = >2 images for each measurement point """ L = len(fnames) if L == 2: kind = 'temporal' else: kind = 'spatial' return kind def _add_pcount(self, exposure, photons, fnames): """Add images to a given exposure/phton For a given exposure and photon count add the appropiate image filenames to the self.images dict """ # is it temporal or spatial data kind = self._get_kind(fnames) # create the exposure time dictionary for this exposure time # if it is not already existing self.images[kind].setdefault(exposure, {}) # if this dict for this exposure time and this # photons count already existed, raise an error in order to not # overwrite existing data. if photons in self.images[kind][exposure]: # pragma: no cover raise SyntaxError('Only one set of images exp %.3f photons %.3f' % (exposure, photons)) # append the images path to a dict whose key is the photons count # inside the exposure time dict self.images[kind][exposure][photons] = fnames def _fill_info(self): """ Iterate through all the lines in the descriptor file and parse them by their first character. Fill self.images """ # Start at the end of the file self._lines.reverse() while self._lines: # pop it such that other methods know the current processed line line = self._lines.pop() # check line is good format and split it l = self._split_line(line) # descriptor file supposed format ## # n bits width height # b exposureTime(ns) numberPhotons (bright image) # i relativePathToTheImage # d exposureTime(ns) (dark image) if l[0] == 'v': # if line starts with 'v', this is the version self.version = l[1] self.log.info('Version ' + l[1]) continue if l[0] == 'n': # for lines that starts with n, there is always 4 elements # n + bits + width + height # There should be only one of this line in the file if len(l) != 4: # pragma: no cover raise SyntaxError('Wrong format: "%s" should be "n bits ' 'width height"' % line) if self.format: # pragma: no cover # if it is the second line found # of this type raise error raise SyntaxError('Only one "n bits width height" is ' 'allowed per file') self.format['bits'] = int(l[1]) self.format['width'] = int(l[2]) self.format['height'] = int(l[3]) continue if l[0] == 'b': # For lines that starts with b. there is always 3 elements # b + exposureTime + numberPhotons (bright images) if len(l) != 3: # pragma: no cover raise SyntaxError('Wrong format: "%s" should be "b ' 'exposure photons"' % line) # Replace floating point representation if wrong format. exposure = float(l[1].replace(',', '.')) photons = float(l[2].replace(',', '.')) # For this settings, get all the corresponding images fnames = self._get_images_filenames() # Add the images path to the images[kind][exposure][photons] # dictionary where kind = temporal or spatial self._add_pcount(exposure, photons, fnames) continue if l[0] == 'd': # For lines that starts with d, there is always 2 elements # d + exposureTime (dark images) if len(l) != 2: # pragma: no cover raise SyntaxError('Wrong format: "%s" should be "d ' 'exposure"' % line) # replace floating point representation if wring format exposure = float(l[1].replace(',', '.')) # For this exposure, get all the corresponding images fnames = self._get_images_filenames() # Add the images path to the images dict. self._add_pcount(exposure, float(0.0), fnames) continue # If line is of an unknown format, warn user. self.log.warning('Unknown command ' + line) # pragma: no cover def _split_line(self, line): """ For every line of descriptorfile check that it has at least two arguments split by white spaces and strip white spaces from elements """ l = [x.strip() for x in line.split()] if (not l) or (len(l) < 2): # pragma: no cover raise SyntaxError('Wrong format line: %s' % line) return l def _load_file(self, filename): """ Load a file, split by lines removing the comments (starts with #) """ self.log.info('Opening ' + filename) f = open(filename, 'r') # To add location when opening images # If no path was passed as kwarg, set it to the filename path if self._path is None: self._path = os.path.dirname(filename) # get the lines and strip them if they are not comments try: self._lines = [x.strip() for x in f.readlines() if x.strip() and not x.strip().startswith('#')] except UnicodeDecodeError: # pragma: no cover # If there is an unknown character in the file, speak it! raise UnicodeDecodeError("File: '%s', has non-utf8 characters." "Find them and kill them!" % filename)