Source code for p4.treefilelite
from p4.p4exceptions import P4Error
from p4.tree import Tree
from p4.nexus import Nexus
from p4.var import var
import sys
import os
import string
import io
import copy
[docs]class TreeFileLite(object):
"""Get trees in big files without reading the lot into memory.
P4 Tree objects are a little obese, and large tree files will
flood your RAM. This class addresses that problem by reading in
the file as text, and then creating Tree objects only on demand.
If the trees are not saved then there should not be such a problem
with memory.
Instantiate with a file name. It can handle mcmc output from p4,
Beast, and MrBayes, and phylip format.
This can handle tree descriptions with line breaks. However, it
does not know about nexus-style 'commenting-out' [ie with square
brackets, like this]. Also, it is not particularly robust with
regard to being case-insensitive. So while the usual way of
reading in tree files via the read() command will handle nexus
tree lines that start with tReE or trEe, TreeFileLite cannot, due
to lazy programming. So unless your file conforms to the
expectations of TreeFileLite, it would be best to use read().
To decrease bloat, it is not loaded by default when you start up
p4. To access it, you need to do::
from p4.treefilelite import TreeFileLite
The only method is getTree(), although you can get the tLines if
you want.
Eg to just get a few Tree objects::
from p4.treefilelite import TreeFileLite
tfl = TreeFileLite('mcmc_trees_0.nex')
for i in [23, 45, 67]:
t = tfl.getTree(i)
t.draw()
or, to write some trees, as text (not as Tree objects), to a new
file::
from p4.treefilelite import TreeFileLite
tfl = TreeFileLite('myBigFile.nex')
f = open('mySmallerFile.nex', 'w')
f.write(tfl.header)
for i in range(24000,25000):
f.write('tree %s\\n' % tfl.tLines[i])
f.write('end;')
f.close()
"""
def __init__(self, fName=None, verbose=1):
gm = ["TreeFileLite() init"]
self.fName = fName
self.verbose = verbose
self.translationHash = None
self.tLines = []
self.header = None
self._readTreeFile()
# self._readMrBayesFile()
self.nSamples = len(self.tLines)
if self.nSamples:
if self.verbose >= 1:
print("Got %i samples." % self.nSamples)
else:
gm.append("Got 0 tree samples.")
raise P4Error(gm)
[docs] def getTree(self, treeNum):
tLine = self.tLines[treeNum]
if self.verbose >= 3:
print(tLine)
f = io.StringIO(tLine)
t = Tree()
if tLine.startswith("("):
t.parseNewick(f, translationHash=self.translationHash)
t.setPreAndPostOrder()
else:
t.parseNexus(f, translationHash=self.translationHash)
return t
def _readTreeFile(self):
gm = ["TreeFileLite._readTreeFile()"]
# Read in the trees
try:
f = open(self.fName)
except IOError:
gm.append("Can't find tree file '%s'" % self.fName)
raise P4Error(gm)
fLines = f.readlines()
f.close()
# If it is not a nexus file, it must be a phylip file, so we
# are done.
lNum = 0
aLine = fLines[0].strip()
if not aLine.startswith("#"):
self.tLines = fLines
return
# So assume it is nexus. Get the 'header', which might be
# useful. Its everything up to the first tree line.
headerLines = []
lNum = 0
aLine = fLines[0]
aLine = aLine.lstrip()
lowLine = aLine.lower()
while 1:
if lowLine.startswith("tree"):
break
headerLines.append(aLine)
lNum += 1
try:
aLine = fLines[lNum]
aLine = aLine.lstrip()
lowLine = aLine.lower()
except IndexError:
headerLines = [] # something went wrong ...
break
if headerLines:
self.header = ''.join(headerLines)
# Get the translate command, if it exists
translateLines = []
lNum = 0
aLine = fLines[0].strip()
lowLine = aLine.lower()
# print "a aLine: %s" % aLine
try:
while not lowLine.startswith("translate"):
lNum += 1
aLine = fLines[lNum].strip()
lowLine = aLine.lower()
if lowLine.startswith('tree'): # then we have gone too far
lNum = 0
aLine = fLines[0].strip()
lowLine = aLine.lower()
break
except IndexError:
# no translate line, so go back to the beginning
lNum = 0
aLine = fLines[0].strip()
lowLine = aLine.lower()
# print "b lowLine: %s" % lowLine
# If we got a translate line, then parse the translate command.
assert lowLine
if lowLine.startswith("translate"):
lNum += 1
aLine = fLines[lNum].strip()
while not aLine.endswith(";"):
translateLines.append(aLine)
lNum += 1
aLine = fLines[lNum].strip()
translateLines.append(aLine)
translateFlob = io.StringIO(' '.join(translateLines))
nx = Nexus()
self.translationHash = nx.readTranslateCommand(translateFlob)
# print self.translationHash
while not aLine.startswith("tree ") and not aLine.startswith("TREE "):
lNum += 1
aLine = fLines[lNum].strip()
# Get the tree lines.
self.tLines = []
while 1:
if aLine.startswith("tree ") or aLine.startswith("TREE "):
tempLine = aLine
# accommodate trees with line breaks.
while aLine.find(";") < 0:
lNum += 1
aLine = fLines[lNum].strip()
tempLine += aLine
self.tLines.append(tempLine[5:])
lNum += 1
aLine = fLines[lNum].strip()
if aLine.startswith("end;") or aLine.startswith("End;") or aLine.startswith("ENDBLOCK;") or aLine.startswith('END'):
break