Source code for p4.treefilelite

from p4.p4exceptions import P4Error
from p4.tree import Tree
from p4.nexus import Nexus
from p4.var import var
import sys

import os
import string
import io
import copy



[docs]
class TreeFileLite(object):

    """Get trees in big files without reading the lot into memory.

    P4 Tree objects are a little obese, and large tree files will
    flood your RAM.  This class addresses that problem by reading in
    the file as text, and then creating Tree objects only on demand.
    If the trees are not saved then there should not be such a problem
    with memory.

    Instantiate with a file name.  It can handle mcmc output from p4,
    Beast, and MrBayes, and phylip format.

    This can handle tree descriptions with line breaks.  However, it
    does not know about nexus-style 'commenting-out' [ie with square
    brackets, like this].  Also, it is not particularly robust with
    regard to being case-insensitive.  So while the usual way of
    reading in tree files via the read() command will handle nexus
    tree lines that start with tReE or trEe, TreeFileLite cannot, due
    to lazy programming.  So unless your file conforms to the
    expectations of TreeFileLite, it would be best to use read().

    To decrease bloat, it is not loaded by default when you start up
    p4.  To access it, you need to do::

      from p4.treefilelite import TreeFileLite

    The only method is getTree(), although you can get the tLines if
    you want.

    Eg to just get a few Tree objects::

      from p4.treefilelite import TreeFileLite 
      tfl = TreeFileLite('mcmc_trees_0.nex')
      for i in [23, 45, 67]:
          t = tfl.getTree(i)
          t.draw()

    or, to write some trees, as text (not as Tree objects), to a new
    file::

      from p4.treefilelite import TreeFileLite
      tfl = TreeFileLite('myBigFile.nex')
      f = open('mySmallerFile.nex', 'w')
      f.write(tfl.header)
      for i in range(24000,25000):
          f.write('tree %s\\n' % tfl.tLines[i])
      f.write('end;')
      f.close()


    """

    def __init__(self, fName=None, verbose=1):

        gm = ["TreeFileLite()  init"]
        self.fName = fName
        self.verbose = verbose
        self.translationHash = None
        self.tLines = []
        self.header = None
        self._readTreeFile()
        # self._readMrBayesFile()

        self.nSamples = len(self.tLines)
        if self.nSamples:
            if self.verbose >= 1:
                print("Got %i samples." % self.nSamples)
        else:
            gm.append("Got 0 tree samples.")
            raise P4Error(gm)


[docs]
    def getTree(self, treeNum):
        tLine = self.tLines[treeNum]
        if self.verbose >= 3:
            print(tLine)
        f = io.StringIO(tLine)
        t = Tree()
        if tLine.startswith("("):
            t.parseNewick(f, translationHash=self.translationHash)
            t.setPreAndPostOrder()
        else:
            t.parseNexus(f, translationHash=self.translationHash)
        return t


    def _readTreeFile(self):
        gm = ["TreeFileLite._readTreeFile()"]
        # Read in the trees
        try:
            f = open(self.fName)
        except IOError:
            gm.append("Can't find tree file '%s'" % self.fName)
            raise P4Error(gm)
        fLines = f.readlines()
        f.close()

        # If it is not a nexus file, it must be a phylip file, so we
        # are done.
        lNum = 0
        aLine = fLines[0].strip()
        if not aLine.startswith("#"):
            self.tLines = fLines
            return

        # So assume it is nexus.  Get the 'header', which might be
        # useful.  Its everything up to the first tree line.
        headerLines = []
        lNum = 0
        aLine = fLines[0]
        aLine = aLine.lstrip()
        lowLine = aLine.lower()
        while 1:
            if lowLine.startswith("tree"):
                break
            headerLines.append(aLine)
            lNum += 1
            try:
                aLine = fLines[lNum]
                aLine = aLine.lstrip()
                lowLine = aLine.lower()
            except IndexError:
                headerLines = []  # something went wrong ...
                break

        if headerLines:
            self.header = ''.join(headerLines)

        # Get the translate command, if it exists
        translateLines = []
        lNum = 0
        aLine = fLines[0].strip()
        lowLine = aLine.lower()
        # print "a aLine: %s" % aLine
        try:
            while not lowLine.startswith("translate"):
                lNum += 1
                aLine = fLines[lNum].strip()
                lowLine = aLine.lower()
                if lowLine.startswith('tree'):  # then we have gone too far
                    lNum = 0
                    aLine = fLines[0].strip()
                    lowLine = aLine.lower()
                    break
        except IndexError:
            # no translate line, so go back to the beginning
            lNum = 0
            aLine = fLines[0].strip()
            lowLine = aLine.lower()

        # print "b lowLine: %s" % lowLine

        # If we got a translate line, then parse the translate command.
        assert lowLine
        if lowLine.startswith("translate"):
            lNum += 1
            aLine = fLines[lNum].strip()
            while not aLine.endswith(";"):
                translateLines.append(aLine)
                lNum += 1
                aLine = fLines[lNum].strip()
            translateLines.append(aLine)
            translateFlob = io.StringIO(' '.join(translateLines))
            nx = Nexus()
            self.translationHash = nx.readTranslateCommand(translateFlob)
            # print self.translationHash

        while not aLine.startswith("tree ") and not aLine.startswith("TREE "):
            lNum += 1
            aLine = fLines[lNum].strip()

        # Get the tree lines.
        self.tLines = []
        while 1:
            if aLine.startswith("tree ") or aLine.startswith("TREE "):
                tempLine = aLine
                # accommodate trees with line breaks.
                while aLine.find(";") < 0:
                    lNum += 1
                    aLine = fLines[lNum].strip()
                    tempLine += aLine
                self.tLines.append(tempLine[5:])
            lNum += 1
            aLine = fLines[lNum].strip()
            if aLine.startswith("end;") or aLine.startswith("End;") or aLine.startswith("ENDBLOCK;") or aLine.startswith('END'):
                break