import string
import io
import sys
import os
from p4.tree import Tree
from p4.node import Node, NodePart, NodeBranchPart
from p4.trees import Trees
from p4.nexus import Nexus, NexusData
from p4.p4exceptions import P4Error
from p4.nexustoken import nextTok, safeNextTok, nexusSkipPastNextSemiColon
import p4.func
from p4.var import var
longMessage1 = """
This table shows, for splits that were used in the cons tree (ie not
necessarily all splits), which of those splits had the root on it in
the original bifurcating input trees. The tree as returned from this
method is rooted on a node on the branch with the highest biRootCount
(or if there were several highest, it is rooted on a node arbitrarily
on the first of those).
"""
longMessage2 = """
This table shows, for nodes derived from splits that were used in the
cons tree (ie not necessarily all splits), which of those nodes were
roots in the original input trees. The tree as returned from this
method is rooted on the node with the highest rootCount (or if there
are several highest, it is rooted arbitrarily on the first of
those).
"""
class SplitModelUsage(object):
def __init__(self, TPMI):
if not isinstance(TPMI, TreePartitionsModelInfo):
gm = ["SplitModelUsage init."]
gm.append("Expecting a TreePartitionsModelInfo instance.")
raise P4Error(gm)
self.nParts = TPMI.nParts
self.parts = []
for i in range(self.nParts):
SMTP = SplitModelUsagePart()
SMTP.compCounts = [0] * TPMI.parts[i].nComps
SMTP.rMatrixCounts = [0] * TPMI.parts[i].nRMatrices
SMTP.gdasrvCounts = [0] * TPMI.parts[i].nGdasrvs
self.parts.append(SMTP)
def dump(self):
print(" modelUsage nParts=%s" % self.nParts)
for pNum in range(self.nParts):
print(" part %s" % pNum)
print("%35s = %s" % ('compCounts', self.parts[pNum].compCounts))
print("%35s = %s" % ('rMatrixCounts', self.parts[pNum].rMatrixCounts))
print("%35s = %s" % ('gdasrvCounts', self.parts[pNum].gdasrvCounts))
def zero(self):
for p in self.parts:
for i in range(len(p.compCounts)):
p.compCounts[i] = 0
for i in range(len(p.rMatrixCounts)):
p.rMatrixCounts[i] = 0
for i in range(len(p.gdasrvCounts)):
p.gdasrvCounts[i] = 0
class SplitModelUsagePart(object):
def __init__(self):
self.compCounts = None
self.rMatrixCounts = None
self.gdasrvCounts = None
class Split(object):
# def __init__(self, nParts, nModels):
def __init__(self):
# self.set is a 1-based set of integers representing taxa on
# one side of the split. So naturally the taxa have to be
# ordered. It is 1-based because when I make a consensus, the
# first thing I do is make a comb tree, and the terminal node
# numbers in that tree and the resulting cons tree will
# correspond to these 1-based set numbers.
self.set = None
self.key = None
self.string = None
self.count = 0.0
self.rootCount = 0.0
# rootCount2 is needed only for non-biRoot trees that are
# rooted on the parent of the first taxon.
self.rootCount2 = 0.0
self.proportion = 0.0
self.cumBrLen = 0.0
self.biRootCumBrLen = 0.0
self.modelUsage = None
self.biRootModelUsage = None
self.rootModelUsage = None
def dump(self):
print(" Split dump:")
print("%25s = %s" % ('set', self.set))
print("%25s = %s" % ('key', self.key))
print("%25s = %s" % ('string', self.string))
print("%25s = %s" % ('count', self.count))
print("%25s = %s" % ('rootCount', self.rootCount))
print("%25s = %s" % ('proportion', self.proportion))
print("%25s = %s" % ('cumBrLen', self.cumBrLen))
if self.modelUsage:
self.modelUsage.dump()
if self.rootModelUsage:
print(" rootModelUsage.dump()")
# self.rootModelUsage.dump()
for pNum in range(self.rootModelUsage.nParts):
print(" part %s" % pNum)
print("%35s = %s" % ('compCounts', self.rootModelUsage.parts[pNum].compCounts))
def combineWith(self, otherSplitObject):
# no need to do proportion -- that is handled by finishSplits()
# model stuff is ignored -- actually, wiped! -- due to lazy
# programming.
oso = otherSplitObject
assert self.key == oso.key
self.count += oso.count
self.rootCount += oso.rootCount
self.rootCount2 += oso.rootCount2
self.cumBrLen += oso.cumBrLen
self.biRootCumBrLen += oso.biRootCumBrLen
self.modelUsage = None
self.biRootModelUsage = None
self.rootModelUsage = None
class TreePartitionsModelInfo(object):
def __init__(self):
self.nParts = 0
self.parts = [] # TreePartitionsModelInfoPart objects
def dump(self):
print("TreePartitionsModelInfo.dump() nParts=%s" % self.nParts)
for i in range(self.nParts):
self.parts[i].dump()
def check(self):
gm = ['TreePartitionsModelInfo.check()']
if self.nParts < 1:
gm.append("No parts.")
raise P4Error(gm)
isHetero = 0
for pNum in range(self.nParts):
p = self.parts[pNum]
if p.partNum != pNum:
self.dump()
gm.append("partNum is wrong.")
raise P4Error(gm)
if p.nComps < 1:
gm.append("Incomplete model info. Part %s, no nComps." % pNum)
raise P4Error(gm)
elif p.nComps > 1:
isHetero = 1
if p.nRMatrices < 1:
gm.append(
"Incomplete model info. Part %s, no nRMatrices." % pNum)
raise P4Error(gm)
elif p.nRMatrices > 1:
isHetero = 1
if p.nGdasrvs < 0:
gm.append(
"Incomplete model info. Part %s, nGdasrvs not specified." % pNum)
raise P4Error(gm)
elif p.nGdasrvs > 1:
isHetero = 1
if isHetero:
return 2
else:
return 1
class TreePartitionsModelInfoPart(object):
def __init__(self):
self.partNum = -1
self.nComps = 0
self.nRMatrices = 0
self.nGdasrvs = -1
def dump(self):
# print " TreePartitionsModelInfoPart.dump partNum=%s" %
# self.partNum
print(" partNum=%s" % self.partNum)
print(" nComps = %s" % self.nComps)
print(" nRMatrices = %s" % self.nRMatrices)
print(" nGdasrvs = %s" % self.nGdasrvs)
def _getModelInfo(theComment):
# Eg [&&p4 models p1 c0.2 r0.2 g0.1]
# or [&&p4 models p2 c0.2 r0.2 g0.1 c1.1 r1.1 g1.1]
gm = ['TreePartitions._getModelInfo()']
# print "_getModelInfo(), from theComment = %s" % theComment
#flob = io.BytesIO(theComment)
flob = io.StringIO(theComment)
flob.seek(1, 0) # past the [
tok = nextTok(flob)
if not tok or tok != '&&p4':
print("a got tok=%s" % tok)
flob.close()
return # not an error, just the wrong kind of comment
tok = nextTok(flob)
if not tok or tok != 'models':
print("b got tok=%s" % tok)
flob.close()
gm.append("Expecting 'models'. Got %s" % tok)
raise P4Error(gm)
tok = nextTok(flob)
if tok[0] != 'p':
gm.append("Expecting 'pN'. Got %s" % tok)
raise P4Error(gm)
try:
nParts = int(tok[1:])
if nParts > 0:
theModelInfo = TreePartitionsModelInfo()
theModelInfo.nParts = nParts
for i in range(nParts):
mp = TreePartitionsModelInfoPart()
mp.partNum = i
theModelInfo.parts.append(mp)
else:
theModelInfo = None
except ValueError:
gm.append("Failed to parse model comment.")
raise P4Error(gm)
tok = safeNextTok(flob)
while 1:
# print "top of loop, tok = %s" % tok
if tok == ']':
break
elif tok[0] in ['c', 'r', 'g']:
ending = tok[1:]
splitEnding = ending.split('.')
# print "got splitEnding = %s" % splitEnding
try:
firstNum = int(splitEnding[0])
secondNum = int(splitEnding[1])
except ValueError:
gm.append("Failed to parse numbers in model comment.")
raise P4Error(gm)
if tok[0] == 'c':
theModelInfo.parts[firstNum].nComps = secondNum
elif tok[0] == 'r':
theModelInfo.parts[firstNum].nRMatrices = secondNum
elif tok[0] == 'g':
theModelInfo.parts[firstNum].nGdasrvs = secondNum
else:
gm.append("Bad token %s")
raise P4Error(gm)
tok = safeNextTok(flob)
# theModelInfo.dump()
return theModelInfo
class TP_TinyTax(object):
def __init__(self, taxName):
self.name = taxName
self.rawSplitKey = None
self.splitKey = None
[docs]
class TreePartitions(object):
"""A container for tree bipartitions or splits.
Start it up like this::
tp = TreePartitions(inThing)
where inThing can be a file name, a Tree instance, or a list of
Tree objects, or a Trees instance.
If you are reading from a file (generally a bootstrap or mcmc
output), you can skip some trees at the beginning, and optionally
after that read only a maximum number of trees, like this::
tp = TreePartitions('myFile', skip=1000, max=500)
Then you can do::
tp.writeSplits() # in dot-star notation, like .***..***
t = tp.consensus()
All the input trees need to have the same taxa.
If your input trees are in more than one place, you can read in
more trees with the method::
read(whatever, skip, max)
The default setting in p4 is to use any tree weights supplied.
This will cause consensus trees created by p4 to differ from ones
created using Paup as the default in Paup is to not consider tree
weights. P4s behavior can be modified to mimic Paup using the
following statement before creating a TreePartitions object. ::
var.nexus_getWeightCommandComments = 0
tp = TreePartitions(inThing)
If you want to restore the default behavior issue this command::
var.nexus_getWeightCommandComments = 1
**Making consensus trees**
P4 makes majority rule consensus trees with extra compatible
splits. It is like PAUP does when you do the ``contree`` command
like the following::
contree all/strict=no majrule=yes percent=50 le50=yes useTreeWts=yes;
or what MrBayes does when you do a ``sumt contype=allcompat``.
Making cons trees uses this class, TreePartitions, which takes
trees apart into their 'splits', aka components or tree
bipartitions. So if you have a tree file with trees from an MCMC
output or from a bootstrap, you can make a cons tree by the
following::
tp = TreePartitions('yourFile')
t = tp.consensus()
If you want to skip some trees at the beginning of a file (often
the burn-in for an MCMC), or if you want to read in a maximum
number of trees (which might be useful for convergence testing
when using an MCMC), you do something like::
tp = TreePartitions('yourFile', skip=100, max=200)
When you get a cons tree with the ``consensus()`` method, the
support for splits is placed in Node.br.support attributes. This
allows some flexibility in how the support is displayed. To see
those support values when you draw the tree to the screen, you
will need to transfer the support information to the node names,
like this::
for n in t.iterInternalsNoRoot():
n.name = "%.2f" % n.br.support # you can specify the precision
For nice eps drawings, you might want to put the support on the
tree as the branch.name rather than the node.name, and you can do
that with something like this::
for n in t.iterInternalsNoRoot():
n.br.name = "%.0f" % (n.br.support * 100.) # convert to percent
"""
def __init__(self, inThing=None, skip=0, max=None, taxNames=None):
# self.trees = [] no, we do not want this. Too big. Need to process trees as they come.
self.nTrees = 0
self.taxNames = None
self.nTax = 0 # Merely len(taxNames)
self.consTree = None
self.splits = []
self.splitsDict = {}
# biSplits and biSplitsDict for the "even" side of the biRoot
# bifurcation.
self.biSplits = []
self.biSplitsDict = {}
self.isBiRoot = None # Is it a bifurcating root?
self.doModelComments = 0
self.modelInfo = None
if inThing:
self.read(inThing, skip, max, taxNames)
[docs]
def read(self, inThing, skip=0, max=None, taxNames=None):
"""Read in a tree, or some trees, or a file of trees.
Arg inThing can be a file name, a Tree instance, or a list of
Tree objects, or a Trees instance. Args skip and max only
come into play if inThing is a file.
"""
gm = ['TreePartitions.read()']
if not inThing:
gm.append("No input?")
raise P4Error(gm)
# print("inThing = %s, type %s" % (inThing, type(inThing)))
# If we already have self.taxNames from a previous read()
if self.taxNames:
if taxNames:
assert self.taxNames == taxNames, "Mismatched tax names."
else:
if taxNames:
self.taxNames = taxNames
self.nTax = len(taxNames)
if 0:
self.splits = []
self.splitsDict = {}
self.biSplits = []
self.biSplitsDict = {}
if isinstance(inThing, str):
if not os.path.isfile(inThing):
gm.append("The inThing is a string, but does not appear to be a file name.")
raise P4Error(gm)
assumeIsPhylip = None
f = open(inThing)
c = ' '
while c in string.whitespace:
c = f.read(1)
f.close()
if c == '(':
assumeIsPhylip = True
if assumeIsPhylip:
if not taxNames and not self.taxNames:
gm.append("File inThing %s" % inThing)
gm.append("assumed to be a phylip tree file.")
gm.append("Needs arg taxNames-- an ordered taxNames list.")
raise P4Error(gm)
self._readPhylipTreeFile(inThing, skip, max, self.taxNames)
else:
self._readNexusFile(inThing, skip, max)
elif isinstance(inThing, Tree):
if skip or max:
gm.append("Args skip and and max only come into play when reading from files.")
raise P4Error(gm)
# if not inThing.taxNames:
# gm.append("If inThing is a Tree object, it needs taxNames.")
# raise P4Error(gm)
# if not self.taxNames:
# self.taxNames = inThing.taxNames
# self.nTax = len(self.taxNames)
self.getSplitsFromTree(inThing)
elif isinstance(inThing, list):
if skip or max:
gm.append("Args skip and and max only come into play when reading from files.")
raise P4Error(gm)
for it in inThing:
assert isinstance(it, Tree)
self.getSplitsFromTree(it)
elif isinstance(inThing, Trees):
if skip or max:
gm.append(
"Args skip and and max only come into play when reading from files.")
raise P4Error(gm)
# if not inThing.taxNames:
# gm.append("If inThing is a Trees object, it needs a taxNames attached.")
# gm.append("(Try 'theTrees.setTaxNames()')")
# raise P4Error(gm)
# self.taxNames = inThing.taxNames
# self.nTax = len(self.taxNames)
# does this work?
if hasattr(inThing.trees[0], "modelInfo"):
self.doModelComments = 1
self.modelInfo = inThing.trees[0].modelInfo
for t in inThing.trees:
self.getSplitsFromTree(t)
else:
gm.append(
"Sorry-- I can't grok your input. What is '%s'?" % inThing)
raise P4Error(gm)
def _readPhylipTreeFile(self, fName, skip, max, taxNames):
gm = ['TreePartitions._readPhylipTreeFile()']
f = open(fName)
# print 'TreePartitions._readPhylipTreeFile().
# Skip over trees
if skip:
for i in range(skip):
nexusSkipPastNextSemiColon(f)
#tok = safeNextTok(f)
#lowTok = tok.lower()
# Read in the trees
while 1:
t = Tree()
if 0:
curPos = f.tell()
fileSize = f.seek(0, 2) # go to the end of the file. Seek returns the position.
f.seek(curPos)
print("About to t.parseNewick, curPos %i, fileSize %i" % (curPos, fileSize))
t.parseNewick(f, translationHash=None)
if not t.nodes:
break
t._initFinish()
t.taxNames = taxNames
if 0:
print(t)
print("got tree:", end=' ')
t.write()
self.getSplitsFromTree(t)
if max and self.nTrees >= max:
break
f.close()
def _readNexusFile(self, fName, skip, max):
gm = ['TreePartitions._readNexusFile()']
f = open(fName)
nf = Nexus() # provides nf.readBlock(), nf.readTranslateCommand()
# Read the #nexus
tok = nextTok(f)
if tok:
lowTok = tok.lower()
else:
gm.append("Empty file?!?")
raise P4Error(gm)
if lowTok != '#nexus':
gm.append("Hmmm..., this doesn't appear to be a nexus file. ")
gm.append("The first token is not '#NEXUS'.")
raise P4Error(gm)
# Get everything before the trees block.
inBlock = 0
while 1:
tok = safeNextTok(f)
lowTok = tok.lower()
if not inBlock:
if lowTok != 'begin':
gm.append("Expecting the 'begin' of a nexus block.")
raise P4Error(gm)
inBlock = 1
else:
if lowTok == 'trees':
break
elif lowTok == 'taxa':
# print "TreePartitions._readNexusFile() Entering taxa
# block ..."
nf.nexusData = NexusData()
nf.nexusData.readBlock(f, 'taxa')
nexusSkipPastNextSemiColon(f)
# print "TreePartitions._readNexusFile() Finished taxa
# block."
# If we have a taxa block, we can use it to get taxNames
self.taxNames = nf.nexusData.taxNames
self.nTax = len(self.taxNames)
inBlock = 0
else:
# Can't I just skip un-usable blocks?
gm.append("Unexpected '%s' block." % tok)
raise P4Error(gm)
# At this point we have read the 'trees' token in the 'begin trees;'
# line.
translationHash = None
nexusSkipPastNextSemiColon(f)
tok = safeNextTok(f)
# print "t got tok = %s" % tok
lowTok = tok.lower()
if lowTok == 'translate':
translationHash = nf.readTranslateCommand(f)
if not self.taxNames:
# If we did not get taxNames from a taxa block, we can
# get taxNames from the translate command. News -- this fails
# for odd keys, anything other than 1-nTax.
try:
self.taxNames = []
for i in range(len(translationHash)):
self.taxNames.append(translationHash['%s' % (i + 1)])
except KeyError:
#self.taxNames = translationHash.values()
gm.append("No taxNames were supplied, so I am getting the taxNames from the translation.")
gm.append("However, the keys of the translation do not appear to be numbers from 1 to nTax.")
gm.append("I am easily confused, and I don't want to make taxNames in an arbitrary order.")
gm.append("So you should supply a taxNames list when you invoke a TreePartitions object.")
raise P4Error(gm)
self.nTax = len(self.taxNames)
# We might expect a p4 command comment if there has been a
# translate command.
savedState = var.nexus_getP4CommandComments
var.nexus_getP4CommandComments = 1
while lowTok != 'tree':
tok = safeNextTok(f)
if tok[0] == '[':
# print("got comment: %s" % tok)
self.modelInfo = _getModelInfo(tok)
if self.modelInfo:
# self.modelInfo.check() returns
# 1 for complete info, but no heterogeneity over the tree
# 2 for complete info, and heterogeneity over the tree
ret = self.modelInfo.check()
if ret == 1:
pass # self.doModelComments remains 0
elif ret == 2:
self.doModelComments = 1
# print("doModelComments set to %s" % self.doModelComments)
else:
lowTok = tok.lower()
var.nexus_getP4CommandComments = savedState
if not self.taxNames or not self.nTax:
gm.append("No taxa block or translation command.")
gm.append("This file is not suitable for file input.")
gm.append("(Maybe input it as a Trees object.)")
raise P4Error(gm)
# At this point, lowTok should be 'tree'
if lowTok != 'tree':
if lowTok in ['end', 'endblock']:
gm.append("Got %s, expecting 'tree' command. No trees in %s?" % (tok, fName))
raise P4Error(gm)
else:
gm.append("I can't grok %s. I was expecting a 'trees' command." % tok)
raise P4Error(gm)
# Skip over trees
if skip:
for i in range(skip):
nexusSkipPastNextSemiColon(f)
tok = safeNextTok(f)
lowTok = tok.lower()
# Read in the trees
while 1:
if lowTok == 'tree':
t = Tree()
if self.doModelComments:
t.parseNexus(f, translationHash=translationHash, doModelComments=self.modelInfo.nParts)
else:
t.parseNexus(f, translationHash=translationHash)
if self.taxNames:
t.taxNames = self.taxNames
if not t.taxNames:
gm.append('Input tree does not have a taxNames list. Fix me.')
raise P4Error(gm)
self.getSplitsFromTree(t)
# t.dump(tree=True)
if max and self.nTrees >= max:
break
elif lowTok == 'end' or lowTok == 'endblock':
break
else:
gm.append("I don't understand (lowercased) token '%s'" % lowTok)
gm.append("I was expecting 'tree' or 'end'.")
raise P4Error(gm)
tok = safeNextTok(f)
lowTok = tok.lower()
# print "xx got tok %s" % tok
f.close()
[docs]
def makeSplits_Off(self):
"""Make, or re-make splits
"""
gm = ['TreePartitions.makeSplits()']
self.splits = []
self.splitsDict = {}
self.biSplits = []
self.biSplitsDict = {}
if not self.taxNames:
gm.append("Need to set taxNames")
raise P4Error(gm)
self.isBiRoot = False
firstTree = self.trees[0]
rootNChildren = firstTree.root.getNChildren()
if rootNChildren == 1:
gm.append("Cannot handle trees rooted on a single taxon yet. Fix me.")
raise P4Error(gm)
elif rootNChildren == 2:
self.isBiRoot = True
if self.isBiRoot:
for t in self.trees:
rootNChildren = t.root.getNChildren()
if rootNChildren != 2:
t.write()
gm.append("First tree had a bifurcating root, and so all trees must as well.")
raise P4Error(gm)
else:
for t in self.trees:
rootNChildren = t.root.getNChildren()
if rootNChildren < 3:
t.write()
gm.append("First tree did not have a bifurcating root.")
gm.append("All subsequent trees may not either.")
raise P4Error(gm)
# if self.isBiRoot:
# for t in self.trees:
# if hasattr(t, "tempRooter") and t.tempRooter:
# pass
# else:
# # Add a rooter node to aTree.nodes, not "in" the tree
# n = Node()
# n.isLeaf = 1
# n.name = 'tempRooter'
# n.nodeNum = var.NO_ORDER
# t.nodes.append(n)
# t.setPreAndPostOrder()
# t.tempRooter = n
# if 0 and self.isBiRoot:
# for t in self.trees:
# assert t.tempRooter
# t.attachRooter()
for t in self.trees:
t.taxNames = self.taxNames
self.nTax = len(self.taxNames)
for t in self.trees:
self._getSplitsFromTree(t)
[docs]
def finishSplits(self):
"""Having made all the splits, now do some finalizing adjustments.
- Make split.string
- Make split.set
- Make split.proportion
- Order the splits based on proportion and the string.
"""
gm = ['TreePartitions.finishSplits()']
# print "self.nTrees = %s" % self.nTrees
for spl in self.splits:
spl.string = p4.func.getSplitStringFromKey(spl.key, self.nTax)
listForSet = []
for i in range(len(spl.string)):
if spl.string[i] == '*':
# It is 1-based because the first thing that I do
# when I make a consensus tree is to make a comb
# tree. Numbers on that comb tree then correspond
# to the numbers in the 1-based sets. Otherwise
# it does not matter--- it just makes the making
# of cons trees easier to debug. If it is changed
# to 0-based, it still works.
listForSet.append(i + 1) # 1-based
spl.set = set(listForSet)
spl.proportion = spl.count / float(self.nTrees)
# We want the splits to be ordered on both the reverse
# proportion, and the split string, with stars before dots
# (arbitrarily, so it looks nice, and is consistent)
for spl in self.splits:
spl.proportion = -1.0 * spl.proportion
self.splits = p4.func.sortListOfObjectsOn2Attributes(
self.splits, 'proportion', 'string')
#self.splits = p4.func.sortListOfObjectsOn2Attributes(self.splits, 'proportion', 'key')
for spl in self.splits:
spl.proportion = -1.0 * spl.proportion
# Finish self.biSplits
for spl in self.biSplits:
spl.string = p4.func.getSplitStringFromKey(spl.key, self.nTax)
spl.proportion = spl.count / float(self.nTrees)
[docs]
def getSplitsFromTree(self, theTree):
"""Make split objects from theTree, append to self.splits"""
gm = ['TreePartitions.getSplitsFromTree()']
assert isinstance(theTree, Tree)
if self.doModelComments and theTree.recipWeight and theTree.recipWeight != 1:
gm.append("doModelComments is set, but the tree has a weight-- should not have both.")
raise P4Error(gm)
theWeight = 1.0
if theTree.recipWeight:
if theTree.recipWeight == 1:
pass
else:
theWeight = 1.0 / theTree.recipWeight
# self.isBiRoot is initialized to None. Then, when we read
# the first tree, it is set to True or False
if self.isBiRoot == None: # to start
# It is the first tree, so get the taxNames, nTax, and isBiRoot
if not self.taxNames:
# Even if it is the first tree, taxNames may have been set already by read()
assert theTree.taxNames
self.taxNames = theTree.taxNames
self.nTax = len(self.taxNames)
rootNChildren = theTree.root.getNChildren()
#if rootNChildren == 1:
# gm.append("Cannot handle trees rooted on a single taxon yet. Fix me.")
# raise P4Error(gm)
if rootNChildren == 2:
self.isBiRoot = True
else:
self.isBiRoot = False
else:
# All subsequent trees must agree with the first tree regarding isBiRoot.
rootNChildren = theTree.root.getNChildren()
if self.isBiRoot:
if rootNChildren != 2:
theTree.write()
gm.append("First tree had a bifurcating root, and so all trees must as well.")
raise P4Error(gm)
else:
if rootNChildren == 2:
theTree.write()
gm.append("First tree did not have a bifurcating root.")
gm.append("All subsequent trees may not either.")
raise P4Error(gm)
theTree.taxNames = self.taxNames # Tree.taxNames is a property, triggers checking.
# This next method call, makeSplitKeys(), checks whether any
# internal nodes have exactly 1 child. That would be bad cuz
# it would mean duped splits.
theTree.makeSplitKeys()
for n in theTree.iterNodesNoRoot():
theSKey = n.br.splitKey
# print "node %s, splitKey %s parent=%s" % (n.nodeNum, theSKey,
# n.parent.nodeNum)
# Either we have seen this splitKey before, in which case
# we get the Split object from self.splitsDict, or we have
# to make a new Split object.
if theSKey in self.splitsDict:
theSplit = self.splitsDict[theSKey]
else:
theSplit = Split()
self.splits.append(theSplit)
self.splitsDict[theSKey] = theSplit
theSplit.key = theSKey
if not self.isBiRoot:
theSplit.count += theWeight
theSplit.cumBrLen += n.br.len * theWeight
else:
if n.parent != theTree.root:
theSplit.count += theWeight
theSplit.cumBrLen += n.br.len * theWeight
else:
# We will be here twice, cuz both children of the
# root have the same splitKey. We want to only
# count it once. So we count it only if the
# rawSplitKey is odd.
if 1 & n.br.rawSplitKey:
theSplit.count += theWeight
if theSKey in self.biSplitsDict:
theBiSplit = self.biSplitsDict[theSKey]
else:
theBiSplit = Split()
self.biSplits.append(theBiSplit)
self.biSplitsDict[theSKey] = theBiSplit
theBiSplit.key = theSKey
if 1 & n.br.rawSplitKey:
theBiSplit.count += theWeight
# We need to save the brLens. If the rawSplitKey
# is odd, we save to theBiSplit.cumBrLen, and if
# it is even, we save to biRootCumBrLen.
if 1 & n.br.rawSplitKey:
theBiSplit.cumBrLen += n.br.len * theWeight
else:
theBiSplit.biRootCumBrLen += n.br.len * theWeight
if self.doModelComments:
# We save the model stuff on the "odd" branch
# to theBiSplit.modelUsage. We save the model
# stuff on the "even" side to
# theBiSplit.biRootModelUsage. We save the
# rootModelUsage to theBiSplit.rootModelUsage.
if 1 & n.br.rawSplitKey:
if theBiSplit.modelUsage:
pass
else:
theBiSplit.modelUsage = SplitModelUsage(
self.modelInfo)
smt = theBiSplit.modelUsage
for pNum in range(self.modelInfo.nParts):
if n.parts[pNum].compNum >= 0:
smt.parts[pNum].compCounts[
n.parts[pNum].compNum] += 1
if n.br.parts[pNum].rMatrixNum >= 0:
smt.parts[pNum].rMatrixCounts[
n.br.parts[pNum].rMatrixNum] += 1
if n.br.parts[pNum].gdasrvNum >= 0:
smt.parts[pNum].gdasrvCounts[
n.br.parts[pNum].gdasrvNum] += 1
# If the rawSplitKey is odd, which it is, get
# rootModelUsage
if theBiSplit.rootModelUsage:
pass
else:
theBiSplit.rootModelUsage = SplitModelUsage(
self.modelInfo)
smt = theBiSplit.rootModelUsage
for pNum in range(self.modelInfo.nParts):
if theTree.root.parts[pNum].compNum >= 0:
smt.parts[pNum].compCounts[
theTree.root.parts[pNum].compNum] += 1
else:
if theBiSplit.biRootModelUsage:
pass
else:
theBiSplit.biRootModelUsage = SplitModelUsage(
self.modelInfo)
smt = theBiSplit.biRootModelUsage
for pNum in range(self.modelInfo.nParts):
if n.parts[pNum].compNum >= 0:
smt.parts[pNum].compCounts[
n.parts[pNum].compNum] += 1
if n.br.parts[pNum].rMatrixNum >= 0:
smt.parts[pNum].rMatrixCounts[
n.br.parts[pNum].rMatrixNum] += 1
if n.br.parts[pNum].gdasrvNum >= 0:
smt.parts[pNum].gdasrvCounts[
n.br.parts[pNum].gdasrvNum] += 1
if self.doModelComments:
# print "node %i, self.isBiRoot=%s" % (n.nodeNum, self.isBiRoot)
# print "node %i, self.modelInfo=%s" % (n.nodeNum, self.modelInfo)
# We can be sure that theWeight is 1.0
if theSplit.modelUsage:
pass
else:
theSplit.modelUsage = SplitModelUsage(self.modelInfo)
smt = theSplit.modelUsage
for pNum in range(self.modelInfo.nParts):
if n.parts[pNum].compNum >= 0:
smt.parts[pNum].compCounts[n.parts[pNum].compNum] += 1
if n.br.parts[pNum].rMatrixNum >= 0:
smt.parts[pNum].rMatrixCounts[
n.br.parts[pNum].rMatrixNum] += 1
if n.br.parts[pNum].gdasrvNum >= 0:
smt.parts[pNum].gdasrvCounts[
n.br.parts[pNum].gdasrvNum] += 1
# self.dump()
# sys.exit()
if not self.isBiRoot:
for n in theTree.iterNodesNoRoot():
theSplit = self.splitsDict[n.br.splitKey]
# If the split is off the root and its rawSplitKey contains a
# 1 (ie has the first taxon), then the corresponding split will have
# its rootCount incremented.
# That means that when a consensus tree is made and
# rooted on the first taxon, the root should be placed
# on the node with the split with the highest rootCount.
if n.parent == theTree.root and (1 & n.br.rawSplitKey):
if n.br.rawSplitKey == 1:
# We do *not* want to do this next line.
#theSplit.rootCount += theWeight
# Hack alert! This is an inelegant solution. This
# tree is rooted on the parent of the first taxon.
# We want to save the rootCount, but saving it on
# this split is wrong, because this split is
# guarranteed to be in every cons tree. If this
# particular rooting does not make it to the cons
# tree then we do not want its rootCount (and we
# *will* get its rootCount if we put its root
# count on this split).
# We (generally) increment the rootCount of the
# split of the node with the clade containing the
# first taxon, but the reason is arbitrary,
# ultimately. We could increment the rootCount of
# another split, as long as we only do it for one
# node. So that is what we do here-- we store the
# info in another split-- another split that is
# off the root. But it is not that simple--
# instead of incrementing the rootCount of
# theSplit, we increment the rootCount2 of some
# other split off the root. That at least saves
# the information that the parent of that split
# was a root.
# Later, after we make a cons tree, if that split
# does not make it into the cons tree, then its
# rootCount2 is lost, as it should be. If it does
# make it to the cons tree, then we increment the
# rootCount of its parent.
aNode = None
for bNode in theTree.root.iterChildren():
# We want a child of the root, that is an internal
# node.
if bNode != n and not bNode.isLeaf:
aNode = bNode
break
if aNode:
self.splitsDict[
aNode.br.splitKey].rootCount2 += theWeight
else:
pass
#gm.append("This tree appears to have no splits!")
#raise P4Error(gm)
else:
theSplit.rootCount += theWeight
if self.doModelComments:
# We can be sure that theWeight is 1.0
# Now we want to get the root model info.
# That info is currently residing in
# theTree.root.modelUsage, but I want to store
# it in theSplit.rootModelUsage.
if theSplit.rootModelUsage:
pass
else:
theSplit.rootModelUsage = SplitModelUsage(
self.modelInfo)
smt = theSplit.rootModelUsage
for pNum in range(self.modelInfo.nParts):
if theTree.root.parts[pNum].compNum >= 0:
smt.parts[pNum].compCounts[
theTree.root.parts[pNum].compNum] += 1
self.nTrees += theWeight
[docs]
def dump(self):
# print "\nTreePartitions dump: \n\t(to get the full translationHash and splits, do 'writeSplits')"
#tH = '%s' % self.translationHash
# if len(tH) > 30:
# tH = tH[:30] + ' ...'
# print "%25s = %s' % ('translationHash", tH)
self.finishSplits()
if 1:
print("\nTreePartitions.dump()")
print("%25s = %s" % ('nTax', self.nTax))
print("%25s = %s" % ('nTrees', self.nTrees))
print("%25s = %s" % ('number of splits', len(self.splits)))
print("%25s = %s" % ('isBiRoot', self.isBiRoot))
# print "%25s = %s' % ('nParts", self.nParts)
# print "%25s = %s' % ('modelNames", self.modelNames)
if 0:
for s in self.splits:
s.dump()
if 1:
print()
print("%12s %12s %12s %12s %12s %12s %12s" % (
'string', 'key', 'count', 'rootCount', 'rootCount2', 'cumBrLen', 'biRtCumBrLen'))
for s in self.splits:
print("%12s %12s %12s %12s %12s %12.4f %12s" % (
s.string, s.key, s.count, s.rootCount, s.rootCount2, s.cumBrLen, '-'))
if len(self.biSplits):
print("biSplits")
for s in self.biSplits:
print("%12s %12s %12s %12s %12s %12.4f" % (
s.string, s.key, s.count, s.rootCount, s.rootCount2, s.cumBrLen), end=' ')
if s.biRootCumBrLen:
print("%12.4f" % s.biRootCumBrLen, end=' ')
else:
print("%12s" % " - ", end=' ')
print()
if 0:
print()
print("%12s %12s %12s %12s %12s %12s" % (
'string', 'key', 'count', 'modelUsage', 'biRtMdlUsg', 'rtModelUsage'))
for s in self.splits:
print("%12s %12s %12s" % (
s.string, s.key, s.count), end=' ')
if s.modelUsage:
print("%12s" % s.modelUsage.nParts, end=' ')
else:
print("%12s" % "None", end=' ')
if s.biRootModelUsage:
print("%12s" % s.biRootModelUsage.nParts, end=' ')
else:
print("%12s" % "None", end=' ')
if s.rootModelUsage:
print("%12s" % s.rootModelUsage.nParts)
else:
print("%12s" % "None")
if len(self.biSplits):
print("biSplits")
for s in self.biSplits:
print("%12s %12s %12s" % (
s.string, s.key, s.count), end=' ')
if s.modelUsage:
print("%12s" % s.modelUsage.nParts, end=' ')
else:
print("%12s" % "None", end=' ')
if s.biRootModelUsage:
print("%12s" % s.biRootModelUsage.nParts, end=' ')
else:
print("%12s" % "None", end=' ')
if s.rootModelUsage:
print("%12s" % s.rootModelUsage.nParts)
else:
print("%12s" % "None")
[docs]
def writeSplits(self, fName=None, minimumProportion=0.05, doLeaves=True):
"""Write a table of splits, in dot-star notation.
Writes all the splits with a proportion >= the minimumProportion."""
# print "\nTree bipartitions."
self.finishSplits()
if not fName:
f = sys.stdout
else:
f = open(fName, 'w')
for i in range(len(self.taxNames)):
f.write(" %5i %s\n" % ((i + 1), self.taxNames[i]))
# Print headings for the table
f.write("\nSplits\n======\n")
firstColWidth = self.nTax
if firstColWidth < 12:
firstColWidth = 12
firstColSig = '%%-%is' % firstColWidth
if not self.isBiRoot:
f.write(firstColSig % ' ')
f.write(" count proportion brLen\n")
nSplits = len(self.splits)
if 0:
for i in range(nSplits):
p = self.splits[i]
f.write(firstColSig % p.string)
f.write("%9.3f %5.3f %5.3f\n" %
(p.count, p.proportion, (p.cumBrLen / p.count)))
print("-" * 50)
for i in range(nSplits):
p = self.splits[i]
if not doLeaves:
theStarCount = p.string.count('*')
if theStarCount == 1 or theStarCount == self.nTax - 1:
continue
if p.proportion >= minimumProportion:
f.write(firstColSig % p.string)
f.write("%9.3f %5.3f %5.3f\n" %
(p.count, p.proportion, (p.cumBrLen / p.count)))
else:
break
if i < nSplits - 1:
f.write("(plus %i splits with proportion less than %.3f)\n" % (
(nSplits - i), minimumProportion))
else:
# This needs to be fixed to take into account minimumProportion
# The brLen depends on where its rooted, so don't report it.
f.write(firstColSig % ' ')
f.write(" count proportion\n")
for p in self.splits:
f.write(firstColSig % p.string)
f.write("%9.3f %5.3f\n" % (p.count, p.proportion))
if len(self.biSplits):
f.write(
"\nBiSplits (ie splits involving a bifurcating root)\n========\n")
f.write(firstColSig % ' ')
f.write(" count proportion\n")
for p in self.biSplits:
f.write(firstColSig % p.string)
f.write("%9.3f %5.3f\n" % (p.count, p.proportion))
# This needs to be fixed up to show modelUsage for biSplits.
if 0 and self.modelInfo:
f.write('\n')
self.modelInfo.dump()
f.write('\n')
for pNum in range(self.modelInfo.nParts):
f.write("partNum %i\n" % pNum)
mip = self.modelInfo.parts[pNum]
if mip.nComps > 1 or mip.nRMatrices > 1 or mip.nGdasrvs > 1:
for p in self.splits:
f.write("%s " % p.string)
pmt = p.modelUsage
if pmt:
if mip.nComps > 1:
f.write("compCounts=%s " %
pmt.parts[pNum].compCounts)
if mip.nRMatrices > 1:
f.write("rMatrixCounts=%s " %
pmt.parts[pNum].rMatrixCounts)
if mip.nGdasrvs > 1:
f.write("gdasrvCounts=%s " %
pmt.parts[pNum].gdasrvCounts)
else:
#f.write("no modelUsage for this split.")
pass
print()
else:
f.write("part %i is tree-homogeneous\n" % pNum)
f.write('\n')
if f != sys.stdout:
f.close()
[docs]
def compareSplits(self, otherTP, noLeaves=True, minimumProportion=0.1, bothMustMeetMinimum=False):
"""Compare split support with another TreePartitions.
It returns a list of comparisons, each comparison being a list of 3 things:
1. The split key
2. The split string
3. A list of the 2 supports
Lets say you have 2 MCMC runs, and you want to compare the
split supports for them. You make a TreePartitions object for
each, and then use this method to get a list of comparisons.
A single comparison is the support from each of the two MCMC
runs for a particular split. If one MCMC run has a split but
the other does not, it will still make a pair, with one of the
elements being zero.
noLeaves
means that trivial splits, ie for leaves,
are excluded.
minimumProportion
- If bothMustMeetMinimum is turned off,
which is now the default (and is more
informative), then a comparison is not
included unless one support value is
greater than or equal to the minimum
proportion
- If bothMustMeetMinimum is turned on,
then a comparison is not included unless
both supports are at or above the
minimumProportion.
"""
gm = ['TreePartitions.compareSplits()']
self.finishSplits()
otherTP.finishSplits()
if self.nTax != otherTP.nTax:
gm.append("Mismatched taxa.")
raise P4Error(gm)
for i in range(len(self.taxNames)):
if self.taxNames[i] != otherTP.taxNames[i]:
gm.append("Mismatched taxa.")
raise P4Error(gm)
if 0:
self.writeSplits(
minimumProportion=minimumProportion, doLeaves=False)
otherTP.writeSplits(
minimumProportion=minimumProportion, doLeaves=False)
pairs = []
# Compare all the splits in self with otherTP
for s in self.splits:
theString = s.string
if noLeaves:
theStarCount = theString.count('*')
if theStarCount == 1 or theStarCount == self.nTax - 1:
continue
theKey = s.key
prop1 = s.proportion
if theKey in otherTP.splitsDict:
prop2 = otherTP.splitsDict[theKey].proportion
else:
prop2 = 0.0
if bothMustMeetMinimum:
# The following is my old way, where both must meet the
# minimum. Changed 18 May 2011.
if (prop1 < minimumProportion) or (prop2 < minimumProportion):
continue
else:
# The following is more informative -- at least one split
# greater than minpartfreq
if (prop1 < minimumProportion) and (prop2 < minimumProportion):
continue
# print "%s %6.3f %6.3f" % (theKey, prop1, prop2)
pairs.append([theKey, theString, [prop1, prop2]])
# otherTP might have splits that self does not have. Compare those.
for s in otherTP.splits:
theString = s.string
if noLeaves:
theStarCount = theString.count('*')
if theStarCount == 1 or theStarCount == self.nTax - 1:
continue
theKey = s.key
if theKey not in self.splitsDict:
prop1 = 0.0
prop2 = s.proportion
if bothMustMeetMinimum:
if (prop1 < minimumProportion) or (prop2 < minimumProportion):
continue
else:
if (prop1 < minimumProportion) and (prop2 < minimumProportion):
continue
# print "%s %6.3f %6.3f" % (theKey, prop1, prop2)
pairs.append([theKey, theString, [prop1, prop2]])
return pairs
[docs]
def consensus(self, conTreeName='consensus', showRootInfo=False, minimumProportion=None):
"""Make a consensus tree.
This method assembles a Tree object from the tree partitions
contained in self.splits. It makes a 'majority-rule consensus
with extra compatible splits'. It will use splits that have
more than minimumProportion support (note that
minimumProportion is given as a number between 0 and 1.0,
often it will be 0.5, meaning 50%).
It returns the tree. Do it like this::
tp = TreePartitions('myTreeFile.nex')
tp.writeSplits() # If you want ...
t = tp.consensus()
The support for the splits (often the Bayesian posterior
support or bootstrap support, as given in the attribute
split.proportion) is put on the tree as node.br.support as a
float, not a string. If you want to draw it later showing the
support, you can format the float as you like (eg as percent)
and put it on node.br.name (for example) for making a picture,
or as node.name if you want to save it in Nexus format with
that info included. You can do that like this::
for n in t.iterInternalsNoRoot():
if n.br.support != None:
# The signature '%.0f' gives zero decimal places;
# change that if you want more precision.
n.name = '%.0f' % (100. * n.br.support)
t.writeNexus('consTreeWithSupport.nex')
A feature of this consensus method is that the resulting tree
is rooted on the majority root position of the input trees.
(If there is more than one majority root position, it is
rooted on the first majority root position, arbitrarily.) If
showRootInfo is set, a table of root positions is printed out.
The resulting con tree is decorated with the rootCounts. With
trifurcating roots, they get put in the node.rootCount, and
that shows directly which nodes were roots. If it is a
bifurcating root the root counts get put in
node.br.biRootCount, and shows how many times, for those
splits that made it into the cons tree, that the root was
found on that branch.
Since the resulting tree often contains more info than can be
accommodated in Newick format, you may want to save it by
pickling it. Use::
t.tPickle()
"""
gm = ['TreePartitions.consensus()']
# if self.nTax <= 3:
# gm.append(
# "Only %i taxa? -- too few to consider for a consensus" % self.nTax)
# raise P4Error(gm)
self.finishSplits()
#############################
# Make a star tree
#############################
conTree = Tree()
conTree.name = conTreeName
# Make a comb tree. Right after I finish making it, the tree
# will then be rooted on the first taxon, but for construction
# purposes here it will be rooted on the single internal node.
conTree.root = Node()
conTree.root.nodeNum = 0
conTree.root.isLeaf = 0
conTree.nodes.append(conTree.root)
n = Node()
n.nodeNum = 1
conTree.nodes.append(n)
conTree.root.leftChild = n
n.parent = conTree.root
n.isLeaf = 1
n.name = self.taxNames[0]
n.seqNum = 0
n.br.splitKey = 2 ** self.nTax - 2
previous = n
for i in range(self.nTax - 1):
n = Node()
n.nodeNum = i + 2
conTree.nodes.append(n)
previous.sibling = n
n.parent = conTree.root
n.isLeaf = 1
n.name = self.taxNames[i + 1]
n.seqNum = i + 1
n.br.splitKey = 2 ** (i + 1)
previous = n
conTree.reRoot(1, moveInternalName=False)
# Now its a comb on a stick.
if 0:
conTree.preAndPostOrderAreValid = 0
conTree.draw()
for n in conTree.nodes:
if n != conTree.root:
print("%3i %s" % (n.nodeNum, n.br.splitKey))
else:
print("%3i root" % n.nodeNum)
conTree.preOrder = None
conTreePostOrder = None
conTree.preAndPostOrderAreValid = 0
# sys.exit()
# #####################################
# Add splits
# #####################################
#nTaxSet = set(range(self.nTax+ 1)[1:])
nInternalNodes = 1
for spl in self.splits:
# Skip the trivial splits-- they are already in the star tree.
if len(spl.set) <= 1 or len(spl.set) == (self.nTax - 1):
# print "...skipping split %s" % spl.set
continue
# If the tree is fully resolved, stop.
if nInternalNodes >= self.nTax - 2:
# print "Tree is fully resolved, so stop."
break
# If the split proportion is less than minimumProportion ...
if minimumProportion:
if spl.proportion < minimumProportion:
# print " ... is less than minimumProportion, so break"
break
# print "\nLooking at split %s, %s, %s" % (spl.string, spl.key,
# spl.set)
# Check whether spl is compatible with the tree splits so
# far. (This was not in the Page code, that I could find,
# and its absence leads to grief and awkwardness.)
# The first one will be compatible for sure.
if nInternalNodes > 1:
# print " Checking for compatibility."
isCompatible = 1
# No need to check the first trivial nodes.
for n in conTree.nodes[self.nTax + 1:]:
nSet = self.splitsDict[n.br.splitKey].set
# print " ...checking %s %s" % (n.br.splitKey,
# nSet)
# A split is incompatible (for our purposes here) if:
#
# 1. The sets overlap, ie the intersection is non-empty
# 2. nSet.difference(spl.set) is non-empty
# 3. spl.set.difference(nSet) is non-empty
# (For completeness, the other criterion for
# incompatibility is that both splits (nSet and
# spl.set) have at least one shared taxon missing.
# Well, both are missing taxon 1, so that is
# always fulfilled, and does not need to be
# checked. Thanks to Mark Wilkinson for defining
# split compatibility like this.)
if len(nSet.intersection(spl.set)) \
and len(nSet.difference(spl.set)) \
and len(spl.set.difference(nSet)):
# print " %s is incompatible with" % n.br.splitKey
# print " %s" % spl.string
isCompatible = 0
break
if not isCompatible:
continue # next spl
# If we made it this far, spl is compatible.
# Find a place in the tree in which to insert the split.
# This is the crux of Page's code.
# Thanks, Rod!
p = conTree.root
q = p.leftChild
isDone = 0
addThisNode = 0
while q and not isDone:
relationship = None
# print "q.br.splitKey = %s" % q.br.splitKey
qSet = self.splitsDict[q.br.splitKey].set
# print "qSet = %s" % qSet
# print "spl.set = %s" % spl.set
# 1. spl is identical to q, but that should never happen
if q.br.splitKey == spl.key:
gm.append(
"Candidate split is identical to an existing split.")
gm.append("This should never happen. Programming error.")
raise P4Error(gm)
# 2. spl is a subset of q, then choose a new p and q (as
# children of previous p and q) and try again
elif spl.set.issubset(qSet):
relationship = 'SUBSET'
p = q
q = q.leftChild
# 3. spl is disjoint to q, then choose a new p (child of p) and
# q (sib of q) and try again
elif not len(spl.set.intersection(qSet)):
relationship = 'DISJOINT'
p = q
q = q.sibling
# 4. spl is a superset of q, in which case make a new node
# between p and q
elif spl.set.issuperset(qSet):
# print "spl.set (%s) is a superset of qSet (%s)" % (spl.set.__getstate__()[0].keys(),
# qSet.__getstate__()[0].keys())
relationship = 'SUPERSET'
isDone = 1
addThisNode = 1
# 5. spl overlaps q, so spl cant be part of the tree
elif len(spl.set.intersection(qSet)):
#relationship = 'OVERLAPPING'
# print "...Got overlapping..."
isDone = 1
else:
gm.append("xxx Why would this happen?")
raise P4Error(gm)
# print "a relationship is %s" % relationship
if addThisNode:
# print " Adding a new node. spl.string= %s" % spl.string
if not q:
gm.append("Programming error. q does not exist.")
raise P4Error(gm)
n = Node()
n.nodeNum = len(conTree.nodes)
conTree.nodes.append(n)
nInternalNodes = nInternalNodes + 1
n.br.splitKey = spl.key
# I don't think that either SUBSET nor DISJOINT will
# happen. It will never be SUBSET or DISJOINT if I
# start with a star tree. However, I'll leave this
# code in, in case I ever decide to not start with a
# star tree.
if relationship == 'SUBSET':
# print "b relationship is SUBSET"
p.leftChild = n
n.parent = p
elif relationship == 'DISJOINT':
# print "b relationship is DISJOINT"
p.sibling = n
n.parent = p.parent
elif relationship == 'SUPERSET':
# print "b relationship is SUPERSET"
if q == p.leftChild:
# print "here 1"
p.leftChild = n
n.leftChild = q
n.parent = p
q.parent = n
else:
# print "here 2. p = %s, n = %s, q = %s" % (p.nodeNum,
# n.nodeNum, q.nodeNum)
p.sibling = n
n.leftChild = q
n.parent = p.parent
q.parent = n
tmp = q
s = q.sibling
t = q.parent
while s:
sSet = self.splitsDict[s.br.splitKey].set
if s.br.splitKey == spl.key:
print("why would this ever happen?")
raise P4Error(gm)
elif sSet.issubset(spl.set):
# print "here 3"
s.parent = n
tmp = s
s = s.sibling
else:
# print "here 4"
t.sibling = s
tmp.sibling = s.sibling
t = s
t.sibling = None
s = tmp.sibling
if tmp == q:
gm.append("Programming error. This shouldn't happen.")
gm.append(
"No further subsets have been found. A useless node has been added")
raise P4Error(gm)
###################################################
###################################################
# At this point, we have a (perhaps fully resolved?) tree,
# rooted on the first taxon.
if 0:
conTree.draw()
print("%12s %12s %12s %12s %12s %12s" % ('nodeNum', '', 'count', 'rootCount', 'rootCount2', 'cumBrLen'))
for n in conTree.iterNodesNoRoot():
s = self.splitsDict[n.br.splitKey]
print("%12s %12s %12s %12s %12s %12s" % (
n.nodeNum, s.string, s.count, s.rootCount, s.rootCount2, s.cumBrLen))
sys.exit()
# For convenience, temporarily attach the split objects to
# node.br's; then I don't need to keep looking up the split
# in the self.splitsDict. The n.br.split's are deleted at the
# end of this method, below.
conTree.setPreAndPostOrder()
for n in conTree.iterNodesNoRoot():
n.br.split = self.splitsDict[n.br.splitKey]
# We need to deal with the rootCount2 counts. When the tree
# is rooted on the first taxon, which it is, then the parent
# of any nodes with a rootCount2 should have its rootCount
# incremented. Its a hack originating in
# getSplitsFromTree(), above.
if not self.isBiRoot:
for n in conTree.iterNodesNoRoot():
# The single child of the root will never have a rootCount2.
if n.br.split.rootCount2:
#print("node %i rootCount2 %i added to node %i" % (n.nodeNum, n.br.split.rootCount2, n.parent.nodeNum))
n.parent.br.split.rootCount += n.br.split.rootCount2
# Get br.lens
if self.isBiRoot:
for n in conTree.iterNodesNoRoot():
# This is a little tricky. We are only dealing with
# non-root splits here. The cumBrLen's only come from
# non-root splits, so we can use it. However, the
# counts are composed of non-root splits plus root
# splits; we only want the non-root splits. We can
# get the contribution of the root splits from the
# count from the biRootHash.
biRootCountContribution = 0.0
if n.br.split.key in self.biSplitsDict:
biRootCountContribution = self.biSplitsDict[
n.br.split.key].count
theCount = n.br.split.count - biRootCountContribution
if theCount:
n.br.len = n.br.split.cumBrLen / theCount
else:
for n in conTree.iterNodesNoRoot():
n.br.len = n.br.split.cumBrLen / n.br.split.count
# At this point, node.br.support does not exist, so set it to None
for n in conTree.iterNodesNoRoot():
n.br.support = None
# Get splitSupport
for n in conTree.nodes[2:]: # skip the (terminal) root, and the node 0
if not n.isLeaf:
#print("setting node %i support to %s" % (n.nodeNum,n.br.split.proportion))
n.br.support = n.br.split.proportion
#conTree.nodes[0].br.support = -1.0
# Get rootCount and biRootCount
if self.isBiRoot:
for n in conTree.iterNodesNoRoot():
if n.br.split.key in self.biSplitsDict:
n.br.biRootCount = self.biSplitsDict[n.br.split.key].count
else:
n.br.biRootCount = 0.0
n.rootCount = None
#n.br.support = -1.0
else:
for n in conTree.iterNodesNoRoot():
if n.br.split.rootCount:
n.rootCount = n.br.split.rootCount
if 0:
conTree.preAndPostOrderAreValid = 0
conTree.draw()
print("%12s %12s %12s %12s %12s %12s" % (
'nodeNum', '', 'n.br.len', 'n.br.support', 'n.rootCount', 'n.br.biRtCnt'))
for n in conTree.iterNodesNoRoot():
print("%12s %12s %12s" % (
n.nodeNum, n.br.split.string, "%.6f" % n.br.len), end=' ')
if hasattr(n.br, "support"):
if isinstance(n.br.support, float):
print("%12.4f" % n.br.support, end=' ')
else:
print("%12s" % n.br.support, end=' ')
else:
print("%12s" % " - ", end=' ')
if hasattr(n, "rootCount"):
print("%12s" % n.rootCount, end=' ')
else:
print("%12s" % " - ", end=' ')
if hasattr(n.br, "biRootCount"):
print("%12s" % n.br.biRootCount, end=' ')
else:
print("%12s" % " - ", end=' ')
print()
#sys.exit()
######################
# Re-root
######################
# Put together a maxRootNodes list, in 2 steps.
# First, without identifying which node or nodes have it, find
# the the maxRootCount. Then find which nodes in the conTree
# have that maxRootCount. There will be at least one, and
# maybe more. Then re-root.
if self.isBiRoot:
sumBiRootCount = 0
for n in conTree.iterNodesNoRoot():
sumBiRootCount += n.br.biRootCount
conTree.sumBiRootCount = sumBiRootCount # ad hoc
nodesWithBiRootProportions = []
for n in conTree.iterNodesNoRoot():
if n.br.biRootCount:
n.biRootProportion = n.br.biRootCount / sumBiRootCount # should be on the n.br, not the node, but I need it on the node to sort it
nodesWithBiRootProportions.append(n)
nodesWithBiRootProportions = p4.func.sortListOfObjectsOnAttribute(nodesWithBiRootProportions, "biRootProportion")
nodesWithBiRootProportions.reverse()
maxRootCount = nodesWithBiRootProportions[0].br.biRootCount
#print("Got maxRootCount %i" % maxRootCount)
# Now that the nodes are sorted, move biRootProportion to the n.br, where it belongs.
maxRootNodes = []
for nNum,n in enumerate(nodesWithBiRootProportions):
if n.br.biRootCount == maxRootCount:
maxRootNodes.append(n)
n.br.biRootProportion = n.biRootProportion
del(n.biRootProportion)
n.br.biRootRank = nNum
if 0 and showRootInfo:
print("maxRootCount = %i" % maxRootCount)
print("maxRootNodes (nodeNums) = ", end=' ')
for n in maxRootNodes:
print(n.nodeNum, end=' ')
print()
if len(maxRootNodes) == 1:
print("There is only one maxRootNode; rooting on it.")
else:
print("There are %s maxRootNodes. Choosing the first to root on." % len(maxRootNodes))
biRootChild = maxRootNodes[0]
theBiSplit = self.biSplitsDict[biRootChild.br.splitKey]
#print("Max root node, ie biRootChild, is node %i" % biRootChild.nodeNum)
# Add a root node.
theBiRoot = conTree.addNodeBetweenNodes(biRootChild, biRootChild.parent)
# At this point, theBiRoot.br has a biRootCount, but it should not, so zero it.
theBiRoot.br.biRootCount = 0
if 0:
print()
conTree.preAndPostOrderAreValid = 0
conTree.draw()
print("%14s %14s" % ('nodeNum', 'biRootCount'))
for n in conTree.iterNodesNoRoot():
print("%14i %14i" % (n.nodeNum, n.br.biRootCount))
if 0:
print()
conTree.preAndPostOrderAreValid = 0
conTree.draw()
print("%14s %14s" % ('nodeNum', 'br.support'))
for n in conTree.iterNodesNoRoot():
print("%14i %14s" % (n.nodeNum, n.br.support))
theBiRoot.br.split = Split()
theBiRoot.br.len = theBiSplit.cumBrLen / theBiSplit.count
theBiRoot.br.split.modelUsage = theBiSplit.modelUsage
biRootChild.br.len = theBiSplit.biRootCumBrLen / theBiSplit.count
biRootChild.br.split.modelUsage = theBiSplit.biRootModelUsage
if 0:
print("biRootChild is node %i" % biRootChild.nodeNum)
print("theBiRoot.rootModelUsage = %s" % theBiRoot.rootModelUsage)
sys.exit()
conTree.reRoot(theBiRoot, moveInternalName=False)
# Might be None, if there was no model info
theBiRoot.rootModelUsage = theBiSplit.rootModelUsage
else:
# Not biRoot
maxRootCount = 0
for n in conTree.iterNodesNoRoot():
if 0:
print("%3i %i" % (n.nodeNum, n.br.split.rootCount))
if n.br.split.rootCount > maxRootCount:
maxRootCount = n.br.split.rootCount
maxRootNodes = []
for n in conTree.iterNodesNoRoot():
if n.br.split.rootCount == maxRootCount:
maxRootNodes.append(n)
if 1 and showRootInfo:
print(gm[0])
if len(maxRootNodes) > 1:
print(" There are %s maxRootNodes. Choosing the first to root on." % len(maxRootNodes))
maxRootNodes[0].rootModelUsage = maxRootNodes[
0].br.split.rootModelUsage
conTree.reRoot(maxRootNodes[0], moveInternalName=False)
if 0:
# Put the nodes in pre-order, only for cosmetic purposes.
conTree.setPreAndPostOrder()
newNodes = []
for i in conTree.preOrder:
newNodes.append(conTree.nodes[int(i)])
for i in range(len(newNodes)):
newNodes[int(i)].nodeNum = int(i)
conTree.nodes = newNodes
conTree.setPreAndPostOrder()
if 0:
conTree.preAndPostOrderAreValid = 0
conTree.draw()
###################################################
# Now tabulate model usage. Temporarily turned off
###################################################
if 0 and self.modelInfo:
for pNum in range(self.modelInfo.nParts):
print("\nPartition %i" % pNum)
if self.modelInfo.parts[pNum].nComps > 1:
print("\nc%i.%i" % (pNum, self.modelInfo.parts[pNum].nComps))
print(" Composition Numbers")
print("%8s" % 'NodeNum', end=' ')
for cNum in range(self.modelInfo.parts[pNum].nComps):
print("%4s " % cNum, end=' ')
print()
print("%8s" % '=======', end=' ')
for cNum in range(self.modelInfo.parts[pNum].nComps):
print(" %4s" % '===', end=' ')
print()
for n in conTree.nodes:
if n != conTree.root:
print("%6i " % n.nodeNum, end=' ')
theCompCounts = n.br.split.modelUsage.parts[
pNum].compCounts
for cNum in range(self.modelInfo.parts[pNum].nComps):
print("%4i " % theCompCounts[cNum], end=' ')
print()
else:
print("%6i " % n.nodeNum, end=' ')
#theCompCounts = maxRootNodes[0].br.split.rootModelUsage.parts[pNum].compCounts
theCompCounts = conTree.root.rootModelUsage.parts[pNum].compCounts
for cNum in range(self.modelInfo.parts[pNum].nComps):
print("%4i " % theCompCounts[cNum], end=' ')
print()
else:
print("Comps in this partition are homogeneous.")
if self.modelInfo.parts[pNum].nRMatrices > 1:
print("\nr%i.%i" % (pNum, self.modelInfo.parts[pNum].nRMatrices))
print(" rMatrix Numbers")
print("%8s" % 'NodeNum', end=' ')
for rNum in range(self.modelInfo.parts[pNum].nRMatrices):
print("%4s " % rNum, end=' ')
print()
print("%8s" % '=======', end=' ')
for rNum in range(self.modelInfo.parts[pNum].nRMatrices):
print(" %4s" % '===', end=' ')
print()
for n in conTree.nodes:
if n != conTree.root:
print("%6i " % n.nodeNum, end=' ')
theRMatrixCounts = n.br.split.modelUsage.parts[
pNum].rMatrixCounts
for rNum in range(self.modelInfo.parts[pNum].nRMatrices):
print("%4i " % theRMatrixCounts[rNum], end=' ')
print()
else:
print("rMatrices in this partition are homogeneous.")
# Find the majority compNum's and rMatrixNum's. Temporarily off
if 0 and self.modelInfo:
# First make a place to put them. Initialize n.compNum
# etc with -1's.
for n in conTree.nodes:
if n.parts:
gm.append('node already has parts.')
raise P4Error(gm)
for pNum in range(self.modelInfo.nParts):
n.parts.append(NodePart())
if n != conTree.root:
for pNum in range(self.modelInfo.nParts):
n.br.parts.append(NodeBranchPart())
# Now find the majority
for pNum in range(self.modelInfo.nParts):
if self.modelInfo.parts[pNum].nComps > 1:
for n in conTree.nodes:
if n != conTree.root:
theCompCounts = n.br.split.modelUsage.parts[
pNum].compCounts
n.parts[pNum].compNum = theCompCounts.index(
max(theCompCounts))
else:
theCompCounts = conTree.root.rootModelUsage.parts[
pNum].compCounts
n.parts[pNum].compNum = theCompCounts.index(
max(theCompCounts))
if self.modelInfo.parts[pNum].nRMatrices > 1:
for n in conTree.iterNodesNoRoot():
theRMatrixCounts = n.br.split.modelUsage.parts[
pNum].rMatrixCounts
n.br.parts[pNum].rMatrixNum = theRMatrixCounts.index(
max(theRMatrixCounts))
# Draw them
conTree.setPreAndPostOrder() # Needed.
for pNum in range(self.modelInfo.nParts):
if self.modelInfo.parts[pNum].nComps > 1:
modelKeyHash = {}
for n in conTree.iterNodesNoRoot():
try:
n.br.textDrawSymbol = var.modelSymbols[n.parts[pNum].compNum]
except IndexError:
n.br.textDrawSymbol = "-"
if n.parts[pNum].compNum not in modelKeyHash:
modelKeyHash[n.parts[pNum].compNum] = n.br.textDrawSymbol
print()
conTree.draw()
print("\nThe drawing above shows majority comp numbers in partition %i" % pNum)
kk = list(modelKeyHash.keys())
kk.sort()
for k in kk:
print(" comp %2i - %s" % (k, modelKeyHash[k]))
print(" Root has comp %i" % conTree.root.parts[pNum].compNum)
if self.modelInfo.parts[pNum].nRMatrices > 1:
modelKeyHash = {}
for n in conTree.iterNodesNoRoot():
try:
n.br.textDrawSymbol = var.modelSymbols[n.br.parts[pNum].rMatrixNum]
except IndexError:
n.br.textDrawSymbol = "-"
if n.br.parts[pNum].rMatrixNum not in modelKeyHash:
modelKeyHash[
n.br.parts[pNum].rMatrixNum] = n.br.textDrawSymbol
print()
conTree.draw()
print("\nThe drawing above shows majority rMatrix numbers in partition %i" % pNum)
kk = list(modelKeyHash.keys())
kk.sort()
for k in kk:
print(" rMatrix %2i - %s" % (k, modelKeyHash[k]))
print()
conTree.preAndPostOrderAreValid = 0
conTree.taxNames = self.taxNames
#############
# Root info
#############
if 1 and showRootInfo:
conTree.setPreAndPostOrder()
#conTree.draw()
# Print out a table showing what nodes or branches had the root.
if self.isBiRoot:
print()
print(gm[0])
print(longMessage1) # see top of file
nnn = []
for n in conTree.iterNodesNoRoot():
if n.br.biRootCount:
n.biRootRank = n.br.biRootRank # moved to n for sorting
nnn.append(n)
nnn = p4.func.sortListOfObjectsOnAttribute(nnn, 'biRootRank')
print("node br.biRootCount proportion rank")
for n in nnn:
if not n.br.biRootCount: # None or zero
pass
else:
if n == biRootChild:
# Point out, with the arrow, that it has been rooted on that branch.
print("%4i %11.1f %5.3f %5i <==" % (
n.nodeNum, n.br.biRootCount, n.br.biRootProportion, n.br.biRootRank))
else:
print("%4i %11.1f %5.3f %5i" % (
n.nodeNum, n.br.biRootCount, n.br.biRootProportion, n.br.biRootRank))
if n.br.biRootCount:
if n.isLeaf:
n.oldName = n.name
n.name += "_BiR_%5.3f_%i" % (n.br.biRootProportion, n.br.biRootRank)
else:
n.name = "BiR_%5.3f_%i" % (n.br.biRootProportion, n.br.biRootRank)
print("Total biRootCount is %i, for %i trees" % (conTree.sumBiRootCount, self.nTrees))
#conTree.draw()
if 0 and isinstance(showRootInfo, str): # this does not work because leaf node names are modified.
if showRootInfo.endswith(".nex"):
rootOutFileName = showRootInfo
else:
rootOutFileName = showRootInfo + ".nex"
print("Writing the cons tree, decorated with root counts,")
print("to nexus tree file '%s'" % rootOutFileName)
conTree.writeNexus(rootOutFileName)
else:
conTree.draw(width=130)
print("Cons tree with root proportions and ranks --- picture above, and tree string below ---")
print("Roots were sampled on branches leading towards the root")
print(" from the nodes (including leaves) with BiR numbers.")
conTree.write()
print()
# Undo the root decoration
for n in conTree.iterNodesNoRoot():
if n.br.biRootCount:
if n.isLeaf:
n.name = n.oldName
del(n.oldName)
else:
n.name = None
else:
# not biRoot
print()
print(gm[0])
print(longMessage2) # see top of file.
sumRootCount = 0
print("node rootCount")
for n in conTree.iterNodes():
if hasattr(n, 'rootCount'):
if n.rootCount == None:
pass
else:
if n == conTree.root:
print("%4i %6.1f <==" % (n.nodeNum, n.rootCount))
else:
print("%4i %6.1f" % (n.nodeNum, n.rootCount))
n.name = "%i" % n.rootCount
sumRootCount += n.rootCount
print("Total rootCount is %i, for %i trees" % (sumRootCount, self.nTrees))
if isinstance(showRootInfo, str):
if showRootInfo.endswith(".nex"):
rootOutFileName = showRootInfo
else:
rootOutFileName = showRootInfo + ".nex"
print("Writing the cons tree, decorated with root counts,")
print("to nexus tree file '%s'" % rootOutFileName)
conTree.writeNexus(rootOutFileName)
else:
conTree.draw()
print("Cons tree with root counts --- picture above, and tree string below ---")
conTree.write()
print()
# Remove the decoration
for n in conTree.iterInternalsNoRoot():
n.name = None
conTree.root.name = None
# Attaching splits to node.br's was only temporary.
for n in conTree.iterNodesNoRoot():
if hasattr(n.br, 'split'): # theBiRoot doesnt have one
del(n.br.split)
n.br.splitKey = None
# So was the root rootModelUsage
if hasattr(conTree.root, 'rootModelUsage'):
del(conTree.root.rootModelUsage)
# At the moment, the textDrawSymbols are whatever was left
# after drawing the model. Put them back to default.
for n in conTree.iterNodesNoRoot():
n.br.textDrawSymbol = '-'
self.conTree = conTree
return conTree
[docs]
def makeTreeFromPartitions(self, partitions, taxNames=None, zeroBasedNumbering=True):
"""Make a tree from a list of partitions.
Each partition in the list of partitions is a list of taxon
numbers (zero-based, or 1-based if the arg zeroBasedNumbering
is set to False).
It needs taxNames set, which can be done as an arg in this
method, or set before.
"""
gm = ['TreePartitions.makeTreeFromPartitions()']
if not isinstance(partitions, list):
gm.append('The partitions should be a list of lists of ints.')
raise P4Error(gm)
for it in partitions:
if not isinstance(it, list):
gm.append('The partitions should be a list of lists of ints.')
raise P4Error(gm)
for it2 in it:
if not isinstance(it2, int):
gm.append(
'The partitions should be a list of lists of ints.')
raise P4Error(gm)
if zeroBasedNumbering:
zbParts = partitions
else:
zbParts = []
for p in partitions:
cp = p[:]
for i in range(len(cp)):
cp[i] -= 1
zbParts.append(cp)
# print zbParts
if taxNames:
self.taxNames = taxNames
self.nTax = len(self.taxNames)
self.nTrees = 1
allOnes = 2 ** (self.nTax) - 1
txx = []
for tNum in range(self.nTax):
tName = self.taxNames[tNum]
tx = TP_TinyTax(tName)
tx.rawSplitKey = 1 << tNum
txx.append(tx)
for tx in txx:
spl = Split()
self.splits.append(spl)
spl.rawSplitKey = tx.rawSplitKey
spl.count = 1.0
for zbPart in zbParts:
rsk = 0
for it in zbPart:
rsk = rsk | txx[it].rawSplitKey
spl = Split()
self.splits.append(spl)
spl.rawSplitKey = rsk
spl.count = 1.0
for spl in self.splits:
if (1 & spl.rawSplitKey):
spl.key = allOnes ^ spl.rawSplitKey
else:
spl.key = spl.rawSplitKey
toRemoveFromSplits = []
for spl in self.splits:
if not self.splitsDict.get(spl.key):
self.splitsDict[spl.key] = spl
else:
toRemoveFromSplits.append(spl)
self.biSplits.append(spl)
self.biSplitsDict[spl.key] = spl
if toRemoveFromSplits:
self.isBiRoot = True
for spl in toRemoveFromSplits:
self.splits.remove(spl)
# print '------'
# for spl in self.splits:
# print "%3i %3i" % (spl.rawSplitKey, spl.key)
# print "biSplits ====="
# for spl in self.biSplits:
# print "%3i %3i" % (spl.rawSplitKey, spl.key)
if self.isBiRoot and len(self.biSplits) != 1:
gm.append('It appears to be a bifurcationg root.')
gm.append("But I got %i 'biSplits' -- should be 1." %
len(self.biSplits))
gm.append("Possibly a badly-formed matrix?")
gm.append("Possibly a duplicated site?")
raise P4Error(gm)
self.finishSplits()
# self.dump()
t = self.consensus()
t.stripBrLens()
t.name = 'treeFromPartitions'
# t.write()
return t
[docs]
def combineWith(self, otherTreePartitionsObject):
"""Combine the info from another TreePartitions object into self.
Hack alert!
Only for topology info, not for model info. Attribute
doModelComments from self needs to be set to zero, and
attribute modelInfo from self needs to be set to None (by you,
the user) or else it won't work.
"""
otp = otherTreePartitionsObject
gm = ["TreePartitions.combineWith()"]
# Checks.
if self.nTax != otp.nTax:
gm.append("nTax differs. self %i, other %i" %
(self.nTax, otp.nTax))
raise P4Error(gm)
for tNum in range(len(self.taxNames)):
if self.taxNames[tNum] != otp.taxNames[tNum]:
gm.append("taxNames differ.")
raise P4Error(gm)
if self.doModelComments or otp.doModelComments:
gm.append("self.doModelComments = %s, other.doModelComments = %s" % (
self.doModelComments, otp.doModelComments))
raise P4Error(gm)
if self.modelInfo:
gm.append("self.modelInfo exists -- set it to None.")
raise P4Error(gm)
# Combine.
self.nTrees += otp.nTrees
for ospl in otp.splits:
sspl = self.splitsDict.get(ospl.key)
if sspl:
sspl.combineWith(ospl)
else:
self.splits.append(ospl)
self.splitsDict[ospl.key] = ospl
for ospl in otp.biSplits:
sspl = self.biSplitsDict.get(ospl.key)
if sspl:
sspl.combineWith(ospl)
else:
self.biSplits.append(ospl)
self.biSplitsDict[ospl.key] = ospl
self.finishSplits()
[docs]
def getSplitForTaxNames(self, txNames):
k = p4.func.getSplitKeyFromTaxNames(self.taxNames, txNames)
return self.splitsDict.get(k)
[docs]
def getProportionRange(self, verbose=False):
"""Return (min,max) of non-leaf split proportions
This does not look at biSplits.
"""
theMinSupport = 1.0
theMaxSupport = 0.0
for s in self.splits:
theString = s.string
theStarCount = theString.count('*')
if theStarCount == 1 or theStarCount == self.nTax - 1:
continue
theKey = s.key
if s.proportion < theMinSupport:
theMinSupport = s.proportion
if s.proportion > theMaxSupport:
theMaxSupport = s.proportion
if verbose:
print(f"TreePartitions.getProportionRange() min: {theMinSupport:.4f} max: {theMaxSupport:.4f}")
return (theMinSupport, theMaxSupport)