Source code for p4.tree

import sys
import string
import io
import math
import copy
import os
import p4.func
import time
import glob
from p4.var import var
from p4.p4exceptions import P4Error
from p4.node import Node, NodePart, NodeBranch, NodeBranchPart
from p4.nexustoken import nextTok, safeNextTok
from p4.distancematrix import DistanceMatrix

import numpy
import p4.pf as pf
from p4.model import Model
from p4.data import Data
from p4.alignment import Part
import random

[docs]class Tree(object):

    """A phylogenetic tree.


**Some instance variables**

    * ``fName``, if the Tree was read from a file
    * ``name``, the name of the tree.
    * ``root``, the root node
    * ``nodes``, a list of nodes.
    * ``preOrder`` and ``postOrder``, lists of node numbers
    * ``recipWeight``, the weight, if it exists, is usually 1/something, so the reciprocal looks nicer ...
    * ``nexusSets``, if it exists, a NexusSets object.

**Properties**

    * ``taxNames``, a list of names.  Usually the order is important!
    * ``data``, a :class:`Data.Data` object
    * ``model``, a :class:`Model.Model` object
    * ``nTax``, the number of taxa
    * ``nInternalNodes``, the number of non-leaf nodes

**The node method**

    You often will want to refer to a node in a tree.  This can be
    done via its name, or its nodeNum, or as an object, via the method
    :meth:`~p4.tree.Tree.node`.  For example, from a Tree ``t`` you can
    get node number 3 via::

        n = t.node(3)

    and you can get the node that is the parent of the Mastodon via::

        n = t.node('Mastodon').parent

    For many methods that require specifying a node, the method argument is *nodeSpecifier*, eg::

        t.reRoot(23)

    ``reRoots``'s the tree to node number 23.

**Describe, draw, and get information about the tree**

    .. autosummary::
       :nosignatures:

       ~Tree.dump
       ~Tree.draw
       ~Tree.textDrawList
       ~Tree.tv
       ~Tree.btv
       ~Tree.isFullyBifurcating
       ~Tree.taxSetIsASplit
       ~Tree.getAllLeafNames
       ~Tree.getChildrenNums
       ~Tree.getDegree
       ~Tree.getLen
       ~Tree.getNodeNumsAbove
       ~Tree.getPreAndPostOrderAbove
       ~Tree.getPreAndPostOrderAboveRoot
       ~Tree.getSeqNumsAbove
       ~Tree.subTreeIsFullyBifurcating
       ~Tree.summarizeModelComponentsNNodes
       ~Tree.verifyIdentityWith

**Write**

    .. autosummary::
       :nosignatures:

       ~Tree.write
       ~Tree.writeNewick
       ~Tree.writeNexus
       ~Tree.writePhylip
       ~Tree.tPickle

    See also Trees methods :meth:`p4.trees.Trees.writeNexus` and
    :meth:`p4.trees.Trees.writeNewick` for doing trees by the bunch.

**Iteration over the nodes**

    Sometimes you don't want to just iterate over the self.nodes list,
    because after some manipulations a node might be in self.nodes but
    not actually in the tree; using these 'iter' methods takes care of
    that, skipping such nodes.

    .. autosummary::
       :nosignatures:

       ~Tree.iterInternals
       ~Tree.iterInternalsNoRoot
       ~Tree.iterInternalsNoRootPostOrder
       ~Tree.iterInternalsNoRootPreOrder
       ~Tree.iterInternalsPostOrder
       ~Tree.iterLeavesNoRoot
       ~Tree.iterLeavesPostOrder
       ~Tree.iterLeavesPreOrder
       ~Tree.iterNodes
       ~Tree.iterNodesNoRoot
       ~Tree.iterPostOrder
       ~Tree.iterPreOrder
       ~Tree.nextNode

    See also :class:`~p4.node.Node` methods that do similar things starting from a given node.

**Copy**

    .. autosummary::
       :nosignatures:

       ~Tree.dupe
       ~Tree.copyToTree
       ~Tree.dupeSubTree

**In combination with Data and Model**

     .. autosummary::
        :nosignatures:

        ~Tree.calcLogLike
        ~Tree.optLogLike
        ~Tree.simulate
        ~Tree.getSiteLikes
        ~Tree.ancestralStateDraw
        ~Tree.bigXSquaredSubM
        ~Tree.compStatFromCharFreqs
        ~Tree.compoTestUsingSimulations
        ~Tree.modelFitTests
        ~Tree.modelSanityCheck
        ~Tree.simsForModelFitTests
        ~Tree.getEuclideanDistanceFromSelfDataToExpectedComposition

**Setting a model**

     .. autosummary::
        :nosignatures:

        ~Tree.newComp
        ~Tree.newRMatrix
        ~Tree.newGdasrv
        ~Tree.setPInvar
        ~Tree.setRelRate
        ~Tree.setModelComponentOnNode
        ~Tree.setModelComponentsOnNodesRandomly
        ~Tree.setModelComponentsNNodes
        ~Tree.summarizeModelComponentsNNodes
        ~Tree.setNGammaCat
        ~Tree.setTextDrawSymbol


**Tree manipulation**

     .. autosummary::
        :nosignatures:

        ~Tree.addLeaf
        ~Tree.addNodeBetweenNodes
        ~Tree.addSibLeaf
        ~Tree.addSubTree
        ~Tree.allBiRootedTrees
        ~Tree.collapseNode
        ~Tree.collapseClade
        ~Tree.ladderize
        ~Tree.lineUpLeaves
        ~Tree.nni
        ~Tree.nni2
        ~Tree.pruneSubTreeWithoutParent
        ~Tree.pruneSubTreeWithParent
        ~Tree.randomSpr
        ~Tree.randomizeTopology
        ~Tree.reRoot
        ~Tree.reconnectSubTreeWithoutParent
        ~Tree.reconnectSubTreeWithParent
        ~Tree.removeEverythingExceptCladeAtNode
        ~Tree.removeNode
        ~Tree.removeAboveNode
        ~Tree.removeRoot
        ~Tree.renameForPhylip
        ~Tree.restoreDupeTaxa
        ~Tree.restoreNamesFromRenameForPhylip
        ~Tree.rotateAround
        ~Tree.spr
        ~Tree.stripBrLens

**Misc**

     .. autosummary::
        :nosignatures:

        ~Tree.checkDupedTaxonNames
        ~Tree.checkSplitKeys
        ~Tree.checkTaxNames
        ~Tree.checkThatAllSelfNodesAreInTheTree
        ~Tree.inputTreesToSuperTreeDistances
        ~Tree.makeSplitKeys
        ~Tree.readBipartitionsFromPaupLogFile
        ~Tree.recalculateSplitKeysOfNodeFromChildren
        ~Tree.setNexusSets
        ~Tree.topologyDistance
        ~Tree.tvTopologyCompare
        ~Tree.patristicDistanceMatrix



    """

    from p4.tree_manip import node, rotateAround, reRoot, removeRoot, removeNode, removeAboveNode, collapseNode, collapseClade, pruneSubTreeWithoutParent, reconnectSubTreeWithoutParent, pruneSubTreeWithParent, reconnectSubTreeWithParent, addNodeBetweenNodes, allBiRootedTrees, ladderize, randomizeTopology, readBipartitionsFromPaupLogFile, renameForPhylip, restoreNamesFromRenameForPhylip, restoreDupeTaxa, lineUpLeaves, removeEverythingExceptCladeAtNode, dupeSubTree, addSubTree, addLeaf, addSibLeaf, subTreeIsFullyBifurcating, nni, nni2, checkThatAllSelfNodesAreInTheTree, spr, randomSpr, inputTreesToSuperTreeDistances
    from p4.tree_optsim import __del__, deleteCStuff, _allocCStuff, setCStuff, _commonCStuff, calcLogLike, optLogLike, optTest, simulate, ancestralStateDraw, getSiteLikes
    from p4.tree_model import data, model, _checkModelThing, newComp, newRMatrix, newGdasrv, setPInvar, setRelRate, setModelComponentOnNode, setModelThingsRandomly, setModelComponentsOnNodesRandomly, setModelComponentsNNodes, summarizeModelComponentsNNodes, setTextDrawSymbol, setNGammaCat, modelSanityCheck, setEmpiricalComps
    from p4.tree_write import patristicDistanceMatrix, tPickle, writeNexus, write, writePhylip, writeNewick, _getMcmcCommandComment, draw, textDrawList, eps
    from p4.tree_fit import simsForModelFitTests, modelFitTests, compoTestUsingSimulations, bigXSquaredSubM, compStatFromCharFreqs, getEuclideanDistanceFromSelfDataToExpectedComposition

    def __init__(self):
        self.fName = None   # The name of the file it came from
        self.name = None
        self.root = None
        self.nodes = []
        # nodeNums of nodes root -> tips   A numpy array
        self.preOrder = None
        # nodeNums of nodes tips -> root   A numpy array
        self.postOrder = None
        self.preAndPostOrderAreValid = 0
        # Usually weight is 1/N, so the reciprocal looks nicer
        self.recipWeight = None
        # self.weight = None               # Only for floating point weights,
        # so not usually ...
        # An ordered list.  self.taxNames is a property
        self._taxNames = []
        # A Data object.  self.data is a property
        self._data = None
        self.cTree = None                 # A pointer to a c-struct
        self.logLike = None
        self.partLikes = None
        # A Model object.  self.model is a property
        self._model = None
        # self.nTax, a property
        self._nTax = 0
        self._nInternalNodes = -1
        # self.nInternalNodes, a property
        self.doDataPart = 0
        self.nexusSets = None
        self.nodeForSplitKeyDict = None

    #########################################################
    # Properties: data, model in Tree_model
    #########################################################

    #########################################################
    # Properties: taxNames, nTax, nInternalNodes
    #########################################################

    @property
    def taxNames(self):
        """(property) taxNames"""
        return self._taxNames

    @taxNames.setter
    def taxNames(self, theTaxNames):
        gm = ['Tree.taxNames']
        if not isinstance(theTaxNames, list):
            gm.append("You can only set property 'taxNames' to a list.")
            gm.append("Got attempt to set to '%s'" % theTaxNames)
            raise P4Error(gm)
        self._taxNames = theTaxNames
        if theTaxNames:
            self.checkTaxNames()
    
    @taxNames.deleter
    def taxNames(self):
        gm = ['Tree.taxNames']
        gm.append("    Caught an attempt to delete self.taxNames, but")
        gm.append("self.taxNames is a property, so you can't delete it.")
        gm.append("But you can set it to an empty list if you like.")
        raise P4Error(gm)


    def _setTaxNamesFromLeaves(self):
        tax = []
        for n in self.iterNodes():
            if n.isLeaf and n.name:
                tax.append(n.name)
            # This next line should not be needed, as root leaves should be
            # leaves.
            # terminal root that has a taxName
            elif n == self.root and n.name and n.getNChildren() < 2:
                tax.append(n.name)
            tax.sort()
        self._taxNames = tax
        if self._taxNames:
            self.checkTaxNames()

    def _getNTax(self):
        # We can't rely on len(self.taxNames), cuz it might not exist.
        # if hasattr(self, '_nTax') and self._nTax:
        if self._nTax:
            return self._nTax
        else:
            nTax = 0
            if self.nodes:
                for n in self.iterNodes():
                    if n.isLeaf:
                        nTax += 1
            self._nTax = nTax
            return nTax

    nTax = property(_getNTax)
    """(property) nTax"""

    def _getNInternalNodes(self):
        if self._nInternalNodes >= 0:
            return self._nInternalNodes
        elif not self.nodes:
            return 0
        else:
            self._nInternalNodes = len([n for n in self.iterInternalsNoRoot()])
            if not self.root.isLeaf:
                self._nInternalNodes += 1
            return self._nInternalNodes

    def _setNInternalNodes(self, theNInternalNodes):
        gm = ['Tree._setNInternalNodes()']
        gm.append("Caught an attempt to set self.nInternalNodes, but")
        gm.append(
            "self.nInternalNodes is a property, so you shouldn't do that.")
        raise P4Error(gm)

    def _delNInternalNodes(self):
        self._nInternalNodes = -1

    nInternalNodes = property(_getNInternalNodes, _setNInternalNodes, _delNInternalNodes)
    """(property) nInternalNodes"""

    ##################################################
    ##################################################

[docs]    def dupe(self):
        """Duplicates self, but with no c-pointers.  And no data object.

        If there is a model, it is duped.

        Returns a copy of self.
        """

        storedData = None
        if self.data:
            storedData = self.data
            # We don't want to have to copy a big data object, now do we?
            self.data = None
        dupe = copy.deepcopy(self)
        # print 'Tree.dupe()   self.root=%s, dupe.root=%s' % (self.root,
        # dupe.root)

        # Delete cPointers
        for n in dupe.nodes:
            if n.cNode:
                n.cNode = None
        if dupe.cTree:
            dupe.cTree = None
        if dupe.model and dupe.model.cModel:
            dupe.model.cModel = None
        if storedData:
            self.data = storedData
        return dupe

[docs]    def parseNexus(self, flob, translationHash=None, doModelComments=0):
        """Start parsing nexus format newick tree description.

        From just after the command word 'tree', to the first paren of
        the Newick part of the tree.

        Args:
            flob: an open file or file-like object

            translationHash (dict): associates short names or numbers with long
            proper taxon names

            doModelComments (bool): whether to parse p4-specific model command
            comments in the tree description

        Returns:
            None

        """

        gm = ['Tree.parseNexus()']  # re-defined below
        if 0:
            print('Tree.parseNexus() translationHash = %s' % translationHash)
            print('    doModelComments = %s (nParts)' % doModelComments)
            print('    flob type is %s' % type(flob))

        tok = safeNextTok(flob, 'Tree.parseNexus()')
        # print 'parseNexus() tok = %s' % tok
        tok = p4.func.nexusUnquoteName(tok)
        if tok == '*':
            print(gm[0])
            print("    Ignoring '*' in tree description")
            tok = safeNextTok(flob, 'Tree.parseNexus()')
        if not p4.func.nexusCheckName(tok):
            gm.append("Bad tree name: '%s'" % tok)
            raise P4Error(gm)
        self.name = tok
        # print "got name: '%s'" % tok
        # print "%s" % tok
        gm = ["Tree.parseNexus() '%s'" % self.name]  # re-defining
        tok = safeNextTok(flob, gm[0])
        if tok != '=':
            gm.append("Tree name must be followed by '='")
            raise P4Error(gm)

        # Generally this is the beginning of the newick tree
        # description.  But we have to look ahead to see if there is a
        # weight comment.
        savedPos = flob.tell()
        while 1:
            beforeSafeNextTokPosn = flob.tell()
            #print('mnop var.nexus_getAllCommandComments = %s' % var.nexus_getAllCommandComments)

            tok = safeNextTok(flob, gm[0])          # skips [&U] if there is one
            #print("parseNexus: tok after '=' is '%s'.  Before pos %i, after pos %i" % (
            #    tok, beforeSafeNextTokPosn, flob.tell()))

            # This next bit will only happen if either var.nexus_getWeightCommandComments
            # or var nexus_getAllCommandComments is set.
            if tok[0] == '[':
                self.getWeightCommandComment(tok)
            elif tok == '(':
                flob.seek(beforeSafeNextTokPosn)
                #print("wxy var.nexus_getAllCommandComments is %s" % var.nexus_getAllCommandComments)
                self.parseNewick(flob, translationHash, doModelComments)
                # self._initFinish()
                break
            elif tok == ';':
                gm.append("Got ';' before any tree description.")
                raise P4Error(gm)
            elif tok[0] in string.ascii_letters + string.digits + '_' + "'":
                flob.seek(savedPos, 0)
                self.parseNewick(flob, translationHash, doModelComments)
                # self._initFinish()
                break
            else:
                gm.append('Expecting a newick tree description.')
                raise P4Error(gm)
        self._initFinish()
        # print 'finished Tree.parseNexus()'

[docs]    def getWeightCommandComment(self, tok):
        if 0:
            print('var.nexus_getWeightCommandComments = %s' % var.nexus_getWeightCommandComments)
            print('var.nexus_getAllCommandComments = %s' % var.nexus_getAllCommandComments)
            print("Got comment '%s' (type %s), checking if it is a 'weight' comment." % (tok, type(tok)))
        gm = ["Tree.getWeightCommandComment()"]
        cFlob = io.StringIO(tok)
        cFlob.seek(1)  # The [
        cTok = nextTok(cFlob)
        if not cTok:
            # print "no cTok -- returning nothing"
            return
        lowCTok = cTok.lower()
        if lowCTok in ['&r', '&u']:
            # print "got %s -- returning nothing" % cTok
            return
        if lowCTok != '&w':
            gm.append('Expecting a weight comment.  Got %s' % tok)
            raise P4Error(gm)
        cTok = nextTok(cFlob)
        # It might be a float, or the more usual 1/something
        if ("." in cTok):
            # A float?
            try:
                self.weight = float(cTok)
            except:
                gm.append("I can't grok '%s' in weight comment %s" %
                          (cTok, tok))
                raise P4Error(gm)

        # Should check for scientific notation?

        else:
            try:
                theNumerator = int(cTok)
                if theNumerator != 1:
                    gm.append(
                        'Expecting a numerator 1 in weight comment %s' % tok)
                    raise P4Error(gm)
                # print 'got theNumerator %i' % theNumerator
            except ValueError:
                gm.append('Expecting a numerator 1 in weight comment %s' % tok)
                raise P4Error(gm)
            cTok = nextTok(cFlob)
            if cTok == '/':
                cTok = safeNextTok(cFlob, 'Getting weight comment %s' % tok)
                try:
                    self.recipWeight = int(cTok)
                except ValueError:
                    gm.append('Bad denominator in weight comment %s' % tok)
                    raise P4Error(gm)
            elif cTok == ']':
                # self.recipWeight = theNumerator # ie 1, might as well leave
                # it as None
                pass
            else:
                gm.append("I can't grok '%s' in weight comment %s" %
                          (cTok, tok))
                raise P4Error(gm)
        cFlob.close()
        # print 'got recipWeight = %s' % self.recipWeight


# def printStack(self, theStack):  # only used for debugging parseNewick()
# print 'stack = ',
# for n in theStack:
# print "%i['%s'] " % (n.nodeNum, n.name),
# print ''

[docs]    def parseNewick(self, flob, translationHash, doModelComments=0):
        """Parse Newick tree descriptions.

        This is stack-based, and does not use recursion.
        """

        if 0:
            print('parseNewick here. doModelComments=%s' % doModelComments)
            print("    translationHash=%s, self.taxNames=%s" % (translationHash, self.taxNames))
            print("    flob type is %s, pos is %i" % (type(flob), flob.tell()))
            print("wyy var.nexus_getAllCommandComments is %s" % var.nexus_getAllCommandComments)
            print(f"    var.punctuation is {var.punctuation}")

        if self.name:
            gm = ["Tree.parseNewick(), tree '%s'" % self.name]
        else:
            gm = ['Tree.parseNewick()']

        if hasattr(flob, 'name') and flob.name:
            self.fName = flob.name
            gm[0] += ", File %s" % self.fName


        stack = []
        isAfterParen = 1  # to start, even tho its not true
        isAfterComma = 0
        parenNestLevel = 0
        lastPopped = None
        
        tok = nextTok(flob)          # skip over [&U] if there is one.
        # print("xx Got tok %s" % tok)
        if not tok:
            return
        isQuotedTok = False
        if tok.startswith("'"):
            isQuotedTok = True
        # Should generally be the opening paren, except if its a single-node
        # tree.
        tok = p4.func.nexusUnquoteName(tok)

        if doModelComments:
            # Turn on var.nexus_getAllCommandComments in order 
            # to be able to read model info on nodes, eg [& c0.1]
            # restore at end
            # We need this after the safeNextTok() above, in case there is a [&U] to be ignored.
            savedP4Nexus_getAllCommandComments = var.nexus_getAllCommandComments
            var.nexus_getAllCommandComments = 1

        while tok != ';':
            #print("top of loop tok '%s', isQuotedTok=%s, tok[0] is '%s'" % (tok, isQuotedTok, tok[0]))
            if tok == '(':
                # print "Got '(': new node (%i)." % len(self.nodes)
                if not (isAfterParen or isAfterComma):
                    gm.append(
                        'Got badly-placed paren, not after a paren or comma.')
                    raise P4Error(gm)
                newNode = Node()
                if doModelComments:
                    for pNum in range(doModelComments):
                        newNode.parts.append(NodePart())
                        newNode.br.parts.append(NodeBranchPart())

                # self.printStack(stack)
                if len(stack):
                    newNode.parent = stack[-1]
                    if newNode.parent.leftChild == None:
                        newNode.parent.leftChild = newNode
                    else:
                        newNode.parent.rightmostChild().sibling = newNode
                else:
                    if len(self.nodes) == 0:
                        self.root = newNode
                        # Sometimes. Generally not true-- corrected at the end.
                        newNode.isLeaf = 1
                    else:
                        gm.append('Something is wrong. Stack is empty.')
                        gm.append('Extra paren?')
                        raise P4Error(gm)
                newNode.nodeNum = len(self.nodes)
                self.nodes.append(newNode)
                stack.append(newNode)
                isAfterParen = 1
                parenNestLevel += 1

            elif tok == ',':
                if isAfterParen:
                    gm.append('Got comma after paren.')
                    raise P4Error(gm)
                elif isAfterComma:
                    gm.append('Got comma after comma.')
                    raise P4Error(gm)
                # self.printStack(stack)
                try:
                    lastPopped = stack.pop()
                except IndexError:
                    gm.append('Empty stack.  Out of place comma?')
                    raise P4Error(gm)
                isAfterComma = 1
                if len(stack) == 0:
                    gm.append('Empty stack.  Out of place comma?')
                    raise P4Error(gm)

            elif tok == ')':
                try:
                    lastPopped = stack.pop()
                except IndexError:
                    gm.append('Empty stack.  Out of place unparen?')
                    raise P4Error(gm)

                isAfterParen = 0
                isAfterComma = 0
                parenNestLevel = parenNestLevel - 1
                if parenNestLevel < 0:
                    gm.append('Unmatched unparen.')
                    raise P4Error(gm)
                if len(stack) == 0 and len(self.nodes) > 1:
                    gm.append('Empty stack.  Out of place unparen?')
                    raise P4Error(gm)

            elif tok[0] in string.ascii_letters or tok[0] in string.digits or tok[0] in var.nexus_safeChars \
                 or isQuotedTok or tok[0] in ['_', '#', '\\', '/', '"', '(', ')']:
                # A single-node tree, not ()aName, rather just aName.
                if len(self.nodes) == 0:
                    isAfterParen = 1
                if not (isAfterParen or isAfterComma):
                    # Probably a name of an internal node.
                    if len(stack):
                        # if stack[-1].isLeaf and stack[-1].name != '(':
                        if stack[-1].name:
                            if not var.newick_allowSpacesInNames:
                                # a second name after a node name, eg (A foo, B)   =>foo is bad
                                # or eg (A, B)foo bar    => bar is bad
                                gm.append("Badly placed token '%s'." % tok)
                                gm.append("Appears to be a second node name, after '%s'" % stack[-1].name)
                                gm.append('Missing comma maybe?  Or punctuation or spaces in an unquoted name?')
                                gm.append("To allow reading Newick (or Nexus) with spaces, ")
                                gm.append("turn var.newick_allowSpacesInNames on")
                                raise P4Error(gm)
                            else:
                                stack[-1].name += ' '
                                stack[-1].name += tok
                        else:
                            # Usually this...
                            # print "naming node %i as '%s'" % (stack[-1].nodeNum, tok)
                            # We allow bad names on internal nodes, ie we do
                            # not nexusCheckName(tok)
                            stack[-1].name = tok

                    else:    # len(stack) == 0
                        if lastPopped and lastPopped.name == None:  # ()A
                            # print "naming lastPopped node %i with '%s'" %
                            # (lastPopped.nodeNum, tok)
                            lastPopped.isLeaf = 1
                            #lastPopped.label = tok
                            lastPopped.name = tok
                        else:
                            gm.append("Badly placed token '%s' in tree description." % tok)
                            raise P4Error(gm)

                else:
                    # A new terminal node.
                    if tok[0] in string.ascii_letters or tok[0] in ['_']:
                        if translationHash and tok in translationHash:
                            # print 'got key %s, val is %s' % (tok,
                            # translationHash[tok])
                            tok = translationHash[tok]

                    elif tok[0] in string.digits:
                        if var.nexus_allowAllDigitNames:
                            if translationHash and tok in translationHash:
                                # print 'got key %s, val is %s' % (tok,
                                # translationHash[tok])
                                tok = translationHash[tok]
                        else:
                            try:
                                tok = int(tok)
                                if translationHash and repr(tok) in translationHash:
                                    tok = translationHash[repr(tok)]
                                elif translationHash and repr(tok) not in translationHash:
                                    gm.append("There is a 'translation' for this tree, but the")
                                    gm.append("number '%i' in the tree description" % tok)
                                    gm.append('is not included in that translate command.')
                                    raise P4Error(gm)
                                elif self.taxNames:
                                    try:
                                        tok = self.taxNames[tok - 1]
                                    except IndexError:
                                        gm.append("Can't make sense out of token '%s' for a new terminal node." % tok)
                                        gm.append('There is no translate command, and the taxNames does not')
                                        gm.append('have a value for that number.')
                                        raise P4Error(gm)
                                else:
                                    gm.append("We have a taxon name '%s', composed only of numerals." % tok)
                                    gm.append(" ")
                                    gm.append('The Nexus format allows tree specifications with no')
                                    gm.append('translate command to use integers to refer to taxa.')
                                    gm.append('That is possible because in a proper Nexus file the')
                                    gm.append('taxa are defined before the trees.  P4, however, does')
                                    gm.append('not require definition of taxa before the trees, and in')
                                    gm.append('the present case no definition was made.  Deal with it.')
                                    raise P4Error(gm)
                            except ValueError:
                                if translationHash and repr(tok) in translationHash:
                                    tok = translationHash[repr(tok)]
                                # else:  # starts with a digit, but it is not an int.
                                #    gm.append('Problem token %s' % tok)
                                #    raise P4Error(gm)

                    #print("Got terminal node '%s'" % tok)
                    newNode = Node()
                    if doModelComments:
                        for pNum in range(doModelComments):
                            newNode.parts.append(NodePart())
                            newNode.br.parts.append(NodeBranchPart())

                    newNode.isLeaf = 1
                    if p4.func.nexusCheckName(tok):
                        newNode.name = tok
                        # print 'got newNode.name = %s' % tok
                    else:
                        gm.append("Bad name '%s'" % tok)
                        raise P4Error(gm)
                    if len(stack):
                        newNode.parent = stack[-1]
                        if newNode.parent.leftChild == None:
                            newNode.parent.leftChild = newNode
                        else:
                            newNode.parent.rightmostChild().sibling = newNode
                    newNode.nodeNum = len(self.nodes)
                    if len(self.nodes) == 0:
                        self.root = newNode
                    self.nodes.append(newNode)
                    stack.append(newNode)
                    isAfterParen = 0
                    isAfterComma = 0

            elif tok == ':':
                #  Looking for a br.len number, which might be eg 0.234 or -1.23e-05
                # It might be a multi-token operation.  Accumulate tok's in
                # theNum
                theNum = nextTok(flob)
                if not theNum:
                    gm.append('Tree description ended with a colon.  Bad!')
                    raise P4Error(gm)
                # print "  Got token after colon:  '%s'" % theNum
                if theNum == '-' or theNum == '+':
                    tok = nextTok(flob)
                    # print "  Got tok: '%s' after '%s'" % (tok, theNum)
                    if not tok:
                        gm.append('Trying to deal with a branch length.')
                        gm.append("It didn't work, tho.")
                        gm.append("Got this after colon:  '%s'" % theNum)
                        gm.append('followed by nothing.')
                        raise P4Error(gm)
                    theNum += tok
                try:
                    # If it is a simple number like 0.123 or -23, then we are
                    # finished.
                    # Won't work if it ends in 'e'
                    stack[-1].br.len = float(theNum)
                    #print('  Successfully got br.len %f' % stack[-1].br.len)
                except ValueError:
                    # The first bit after the colon is hopefully something like
                    # +1.23e
                    if theNum[-1] not in ['e', 'E']:
                        gm.append(
                            'Trying to deal with a branch length after a colon, but I am totally confused.')
                        gm.append("Can't make sense out of '%s'" % theNum)
                        raise P4Error(gm, 'newick_badBranchLength')
                    try:
                        float(theNum[:-1])
                    except ValueError:
                        gm.append(
                            'Trying to deal with a branch length after a colon, but I am totally confused.')
                        gm.append("Can't make sense out of '%s'" % theNum)
                        raise P4Error(gm, 'newick_badBranchLength')

                    # Now we are sure that the first bit *is* something like +1.23e
                    # We do not allow spaces after the 'e', so we do not use nextTok().
                    # That introduces a bug, where comments inserted in the number don't get ignored.   <<== unfixed bug!
                    # The first thing must be a '+' or a '-'.
                    c = flob.read(1)
                    if not c:
                        gm.append(
                            'Trying to deal with a branch length, possibly in scientific notation.')
                        gm.append(
                            "Got '%s' after the colon, but then nothing." % theNum)
                        raise P4Error(gm)
                    if c not in ['+', '-']:
                        gm.append(
                            'Trying to deal with a branch length, possibly in scientific notation.')
                        gm.append("Got '%s' after the colon." % theNum)
                        gm.append(
                            "Expecting a '+' or '-' after that (no spaces allowed).")
                        gm.append("Got '%s'." % c)
                        raise P4Error(gm)
                    # Accumulate characters in 'theExp'.  We need at least one
                    # digit.
                    theExp = c
                    c = flob.read(1)
                    if not c:
                        gm.append('Trying to deal with a branch length, possibly in scientific notation.')
                        gm.append("Got '%s%s' after the colon, but then nothing." % (theNum, theExp))
                        raise P4Error(gm)
                    if c not in string.digits:
                        gm.append("Trying to deal with a branch length, possibly in scientific notation.")
                        gm.append("Got '%s%s' after the colon." % (theNum, theExp))
                        gm.append('Expecting one or more digits.')
                        gm.append("Got '%s'" % c)
                        raise P4Error(gm)
                    theExp += c
                    # So we got one good digit.  Are there any more?
                    while 1:
                        positionBeforeRead = flob.tell()
                        c = flob.read(1)
                        if not c:
                            gm.append('Trying to deal with a branch length, possibly in scientific notation.')
                            gm.append("Got '%s%s' after the colon, but then nothing." % (theNum, theExp))
                            raise P4Error(gm)
                        # We got something.  If its a digit, add it to
                        # theExp.  If its anything else, back up one
                        # space and then break
                        if c in string.digits:
                            theExp += c
                        else:
                            flob.seek(positionBeforeRead)
                            break
                    #print("  At this point, theNum='%s' and theExp='%s'" % (theNum, theExp))
                    try:
                        # print "  Trying to see if theExp '%s' can be
                        # converted to an int." % theExp
                        int(theExp)
                        try:
                            theBrLen = float(theNum + theExp)
                            # print '  Successfully got br.len %g (from %s%s)'
                            # % (theBrLen, theNum, theExp)
                            stack[-1].br.len = theBrLen
                        except ValueError:
                            gm.append('Trying to deal with a branch length, possibly in scientific notation.')
                            gm.append("It didn't work, tho.")
                            gm.append("Got these after colon: '%s' and '%s'" % (theNum, theExp))
                            gm.append('And they could not be converted to an exponential float.')
                            raise P4Error(gm)
                    except ValueError:
                        gm.append('Trying to deal with a branch length, possibly in scientific notation.')
                        gm.append("It didn't work, tho.")
                        gm.append("Got these after colon: '%s' and '%s'." % (theNum, theExp))
                        gm.append('And the latter does not appear to be an int.')
                        raise P4Error(gm)

            elif tok[0] == '[':
                # print("openSquareBracket.  Got tok '%s'" % tok)
                # if doModelComments is set, it should be set to nParts.
                if doModelComments:
                    # eg [& c0.1 r0.0]
                    n = stack[-1]
                    # print('got comment %s, node %i' % (tok, n.nodeNum))
                    cFlob = io.StringIO(tok)
                    cFlob.seek(2)
                    tok2 = safeNextTok(cFlob)
                    while 1:
                        if tok2 == ']':
                            break
                        elif tok2[0] in ['c', 'r', 'g']:
                            ending = tok2[1:]
                            splitEnding = ending.split('.')
                            try:
                                firstNum = int(splitEnding[0])
                                secondNum = int(splitEnding[1])
                            except ValueError:
                                gm.append('Bad command comment %s' % tok)
                                raise P4Error(gm)
                            if tok2[0] == 'c':
                                n.parts[firstNum].compNum = secondNum
                            if tok2[0] == 'r':
                                n.br.parts[firstNum].rMatrixNum = secondNum
                            if tok2[0] == 'g':
                                n.br.parts[firstNum].gdasrvNum = secondNum
                        else:
                            gm.append('Bad command comment %s' % tok)
                            raise P4Error(gm)
                        tok2 = safeNextTok(cFlob)
                elif 0:
                    # Ugly hack for RAxML trees with bootstrap
                    # supports in square brackets after the br len, on
                    # internal nodes.  First modify the eg [100] to be
                    # [&100], set var.nexus_getAllCommandComments =
                    # True, and turn on this elif section.
                    myNode = stack[-1]
                    assert not myNode.isLeaf
                    assert not myNode.name
                    mySupportString = tok[2:-1]
                    # print mySupportString
                    myNode.name = mySupportString
                elif var.nexus_readBeastTreeCommandComments:
                    n = stack[-1]
                    i = 2
                    while i < (len(tok) - 1):
                        j = i
                        inBraces = False
                        while 1:
                            j += 1
                            # print tok[j]
                            if tok[j] == ']':
                                break
                            if tok[j] == '{':
                                inBraces = True
                            if inBraces:
                                if tok[j] == '}':
                                    inBraces = False
                            else:
                                if tok[j] == ',':
                                    break
                        substring = tok[i:j]
                        # print(substring)
                        splSS = substring.split('=')
                        theNameString = splSS[0].strip()
                        if '%' in theNameString:
                            theNameString = theNameString.replace('%', '')
                        theValString = splSS[1]
                        if '{' in theValString:
                            theValString = theValString.replace('{', '(')
                            theValString = theValString.replace('}', ')')
                        if theValString == 'true':
                            theVal = True
                        elif theValString == 'false':
                            theVal = False
                        else:
                            theVal = eval(theValString)
                        assert isinstance(theVal, (float, tuple, bool))
                        n.__setattr__(theNameString, theVal)
                        i = j + 1
                elif 0:
                    # branch colours, after the branch length, eg '[&!color=#8caaf4]'
                    if tok.startswith("[&!color="):
                        myNode = stack[-1]
                        theColour = tok[9:-1]
                        #print(theColour)
                        myNode.br.colour = theColour

                    
                    
            else:
                gm.append("I can't make sense of the token '%s'" % tok)
                if len(tok) == 1:
                    if tok[0] in var.punctuation:
                        gm.append("The token is in var.punctuation. If you don't think it should")
                        gm.append("be, you can modify what p4 thinks that punctuation is.")
                        gm.append("So you might do this:")
                        gm.append("var.punctuation = var.phylip_punctuation")
                        gm.append("(or use your own definition -- see var.py)")
                        gm.append("read('yourWackyTreeFile.phy')")
                        gm.append("That might work.")
                    if tok[0] not in var.punctuation:
                        gm.append("The token is not in your current var.punctuation.")

                #gm.append("tok[0] is '%s'" % tok[0])
                raise P4Error(gm)

            sTok = safeNextTok(flob, 'Tree init, reading tree string')
            if sTok.startswith("'"):
                isQuotedTok = True
            else:
                isQuotedTok = False
            tok = p4.func.nexusUnquoteName(sTok)
            #print('got tok for next round = %s' % tok)
            # This is the end of the "while tok != ';':" loop

        # print '\n*** Stack len = %i ***' % len(stack)

        if parenNestLevel > 0:
            gm.append('Unmatched paren.')
            raise P4Error(gm)
        elif parenNestLevel < 0:
            gm.append('Unmatched unparen.')
            raise P4Error(gm)

        if len(stack) == 0:
            if len(self.nodes) == 1:
                pass
            else:
                gm.append(
                    "Got an oddly-placed ';' in the tree %s description." % self.name)
                self.dump(treeInfo=0, nodeInfo=1)
                raise P4Error(gm)
        elif len(stack) > 1:
            gm.append(
                "Got an oddly-placed ';' in the tree %s description." % self.name)
            # gm.append('(stack len = %i)' % len(stack)
            #self.dump(tree=0, node=1)
            raise P4Error(gm)

        if self.root.leftChild and self.root.leftChild.sibling:  # usually this
            self.root.isLeaf = 0

        # Should a root on a stick be a leaf?  If it is just for
        # display purposes, then it should be ok to not be a leaf.
        # But if you are going to re-Root, then it will cause trouble.
        # So by default, a root on a stick should be a leaf.  I think.

        # Hopefully if you are dealing with this, then you know what
        # you are doing and what you want, and how to modify things to
        # get it.

        # Uncomment this next line to make it always non-leaf, even if it is a
        # leaf.

        # self.root.isLeaf = 0 # do this to always have a non-leaf
        # root-on-a-stick   <-- Potential Trouble!!!
        self.root.br = None
        # self.draw()
        #self.dump(tree=0, node=1, treeModel=0)

        if doModelComments:
            # restore the value of var.nexus_getAllCommandComments, which was
            # saved above.
            var.nexus_getAllCommandComments = savedP4Nexus_getAllCommandComments

    def _initFinish(self):

        if self.name:
            gm = ["Tree._initFinish()   tree '%s'" % self.name]
        else:
            gm = ['Tree._initFinish()']

        # Checking for duped taxon names used to be here, but it was
        # pushed further ahead, so that self.taxNames can be corrected
        # also, if need be.  At this point, self does not have
        # taxNames set.

        # Check that all terminal nodes have names
        for item in self.nodes:
            if item.isLeaf:
                # print 'leaf name %s' % item.name
                if not item.name:
                    if item == self.root:
                        if var.warnAboutTerminalRootWithNoName:
                            print('Tree._initFinish()')
                            print('    Non-fatal warning: the root is terminal, but has no name.')
                            print('    This may be what you wanted.  Or not?')
                            print('    (To get rid of this warning, turn off var.warnAboutTerminalRootWithNoName)')
                    else:
                        gm.append('Got a terminal node with no name.')
                        raise P4Error(gm)

        self.preOrder = numpy.array(
            [var.NO_ORDER] * len(self.nodes), numpy.int32)
        self.postOrder = numpy.array(
            [var.NO_ORDER] * len(self.nodes), numpy.int32)

        if len(self.nodes) > 1:
            self.setPreAndPostOrder()

[docs]    def checkDupedTaxonNames(self):

        # Called by p4.func._tryToReadNexusFile() and p4.func._tryToReadPhylipFile()
        # Check for duped names
        if self.name:
            gm = ["Tree.checkDupedTaxonNames()   tree '%s'" % self.name]
        else:
            gm = ['Tree.checkDupedTaxonNames()']
        if self.fName:
            gm[0] += ' file=%s' % self.fName

        hasDupedName = 0
        loNames = []
        for n in self.nodes:
            if n.isLeaf and n.name:
                loNames.append(n.name.lower())
        for loName in loNames:
            if loNames.count(loName) > 1:
                if var.allowDupedTaxonNames:
                    pass
                elif not var.doRepairDupedTaxonNames:
                    gm.append(
                        "Got duplicated taxon (lowercased) name '%s'." % loName)
                    gm.append(
                        'Since var.doRepairDupedTaxonNames is not turned on, p4 will not fix duplications.')
                    gm.append('To repair duplications verbosely, set ')
                    gm.append('var.doRepairDupedTaxonNames = 1')
                    gm.append('To repair duplications silently, set')
                    gm.append('var.doRepairDupedTaxonNames = 2')
                    raise P4Error(gm)
                hasDupedName = 1
                break

        if hasDupedName:
            # print self.name
            if var.allowDupedTaxonNames:
                # more hacking ...
                if var.allowDupedTaxonNames == 2:  # ie silently.
                    pass
                else:
                    complainedAlready = []
                    for loName in loNames:
                        if loNames.count(loName) > 1 and loName not in complainedAlready:
                            if self.name:
                                print("        Tree %s. Duped tax name (lowercased) '%s'" % (
                                    self.name, loName))
                            else:
                                print("        Duped tax name (lowercased) '%s'" % loName)
                            complainedAlready.append(loName)

            elif var.doRepairDupedTaxonNames:
                repairedNames = []
                for loName in loNames:
                    if loNames.count(loName) > 1 and n not in repairedNames:
                        repairCounter = 1
                        repairCounter2 = 1
                        for n in self.nodes:
                            if n.isLeaf:
                                if n.name and n.name.lower() == loName:
                                    newName = '%s_%i' % (n.name, repairCounter)
                                    if var.doRepairDupedTaxonNames == 1:
                                        if self.name:
                                            print("        Tree %s. Changing '%s' to '%s'" % (
                                                self.name, n.name, newName))
                                        else:
                                            print("        Changing '%s' to '%s'" % (n.name, newName))
                                    n.name = newName
                                    repairedNames.append(loName)
                                    repairCounter += 1
                        if self.taxNames:
                            for tNameNum in range(len(self.taxNames)):
                                tName = self.taxNames[tNameNum]
                                if tName.lower() == loName:
                                    newName = '%s_%i' % (tName, repairCounter2)
                                    self.taxNames[tNameNum] = newName
                                    repairCounter2 += 1
                            assert repairCounter == repairCounter2, "Got a problem with re-naming duped taxa."

    ##############
    ##############
    #
    # dump()
    #
    ##############
    ##############

[docs]    def dump(self, tree=0, node=0, model=0, all=0):
        """Print rubbish about self.

        tree
            is the default, showing basic info about the tree.
        node
            shows info about all the nodes.
        model
            shows which model component number goes on which node.
            (which you can also get by drawing the tree)

        (If you want the info about the model itself, do a
        aTree.model.dump() instead.)

        """

        if all:
            self._doTreeInfo()
            self._doNodeInfo()
            self._doNodeModelInfo()
        elif not tree and not node and not model:
            self._doTreeInfo()
        else:
            if tree:
                self._doTreeInfo()
            if node:
                self._doNodeInfo()
            if model:
                self._doNodeModelInfo()

    def _doTreeInfo(self):
        if self.name:
            print("Tree '%s' dump" % self.name)
        else:
            print('Tree dump.  No name.')
        if self.fName:
            print("    From file '%s'" % self.fName)
        else:
            print("    From an unknown file, or no file.")
        if self.root:
            print('    Node %i is root' % self.root.nodeNum)
        else:
            print('    There is no root')
        if self.recipWeight:
            print('    The tree recipWeight is %s' % self.recipWeight)
        else:
            print('    There is no recipWeight')
        print('    There are %i nodes' % len(self.nodes))
        terminals = 0
        for i in self.nodes:
            if i.isLeaf:
                terminals += 1
        print('        of which %i are terminal nodes' % terminals)
        if self.data:
            print('    There is a data object, with %i parts.' % self.data.nParts)
        else:
            print('    There is no data object.')

        if self.data:
            print('    The data came from file(s):')
            for a in self.data.alignments:
                if a.fName:
                    print('        %s' % a.fName)

        if self.model:
            print('    There is a model object, with %i parts.' % self.model.nParts)
            if self.model.cModel:
                print('    model.cModel is %i' % self.model.cModel)
            else:
                print('    There is no cModel.')
        else:
            print('    There is no model object.')

        if self.taxNames:
            print('    There is a taxNames list.')
        else:
            print('    There is no taxNames list.')
        if self.cTree:
            print('    cTree is %i' % self.cTree)
        else:
            print('    There is no cTree.')

    def _doNodeInfo(self):
        """Basic rubbish about nodes."""

        print('\n-------- nodes -----------------------------------------')
        print('%7s %6s %6s %6s %6s %7s %6s  %4s' % ('nodeNum', 'isLeaf', 'parent', 'leftCh',
                                                    'siblng', 'br.len', 'seqNum', 'name'))
        for n in self.nodes:
            print('%7s %6s' % (n.nodeNum, n.isLeaf), end=' ')
            if n.parent:
                print('%6s' % n.parent.nodeNum, end=' ')
            else:
                print('%6s' % 'None', end=' ')

            if n.leftChild:
                print('%6s' % n.leftChild.nodeNum, end=' ')
            else:
                print('%6s' % 'None', end=' ')

            if n.sibling:
                print('%6s' % n.sibling.nodeNum, end=' ')
            else:
                print('%6s' % 'None', end=' ')

            if n.br and (n.br.len or n.br.len == 0.0):
                print('%7.3f' % n.br.len, end=' ')
            else:
                print('%7s' % 'None', end=' ')

            if n.seqNum or n.seqNum == 0:
                print('%6s' % n.seqNum, end=' ')
            else:
                print('%6s' % 'None', end=' ')

            if n.name:
                print('  %s' % n.name)
            else:
                print('  %s' % 'None')
        print('--------------------------------------------------------\n')

        doMore = 0
        for n in self.iterNodesNoRoot():
            if hasattr(n.br, 'name') and n.br.name:
                doMore = 1
                break
            elif hasattr(n.br, 'uName') and n.br.uName:
                doMore = 1
                break
            elif hasattr(n.br, 'support') and n.br.support:
                doMore = 1
                break

        if doMore:
            print('\n-------- more node stuff -------------------------------')
            print('%7s   %10s %10s %10s   %4s' % ('nodeNum', 'br.name', 'br.uName', 'br.support', 'name'))
            for n in self.nodes:
                print('%7s' % n.nodeNum, end=' ')

                if n.br and hasattr(n.br, 'name') and n.br.name:
                    print('%10s' % n.br.name, end=' ')
                else:
                    print('%10s' % '-', end=' ')

                if n.br and hasattr(n.br, 'uName') and n.br.uName:
                    print('%10s' % n.br.uName, end=' ')
                else:
                    print('%10s' % '-', end=' ')

                if n.br and hasattr(n.br, 'support') and n.br.support:
                    print('%10.4f' % n.br.support, end=' ')
                else:
                    print('%10s' % '-', end=' ')

                if n.name:
                    print('    %s' % n.name)
                else:
                    print('    %s' % 'None')
            print('--------------------------------------------------------\n')

        doMore = 0
        for n in self.nodes:
            if hasattr(n, 'rootCount') and n.rootCount:
                doMore = 1
                break
            if n.br:
                if hasattr(n.br, 'color') and n.br.color:
                    doMore = 1
                    break
                elif hasattr(n.br, 'biRootCount') and n.br.biRootCount:
                    doMore = 1
                    break

        if doMore:
            print('\n-------- even more node stuff --------------------------')
            print('%7s   %10s %14s %10s   %4s' % ('nodeNum', 'br.color', 'br.biRootCount', 'rootCount', 'name'))
            for n in self.nodes:
                print('%7s' % n.nodeNum, end=' ')

                if n.br and hasattr(n.br, 'color') and n.br.color:
                    print('%10s' % n.br.color, end=' ')
                else:
                    print('%10s' % '-', end=' ')

                if n.br and hasattr(n.br, 'biRootCount') and n.br.biRootCount:
                    print('%14s' % n.br.biRootCount, end=' ')
                else:
                    print('%14s' % '-', end=' ')

                if hasattr(n, 'rootCount') and n.rootCount:
                    print('%10s' % n.rootCount, end=' ')
                else:
                    print('%10s' % '-', end=' ')

                if n.name:
                    print('    %s' % n.name)
                else:
                    print('    %s' % 'None')
            print('--------------------------------------------------------\n')

    def _doNodeModelInfo(self):
        if not self.model:
            print('\n****** Node Model Info.  No model.')
            if not self.data:
                print('(no data attached, either)')
        else:
            print('\n****** Node Model Info.  nParts=%s' % self.model.nParts)
            if not self.data:
                print('no data')
            if not self.model.nParts:
                return

            print('\nComps in the nodes:')
            print(' %13s' % 'nodeNum', end=' ')
            for i in range(self.model.nParts):
                print(' %8s' % 'part%i' % i, end=' ')
            print('')
            for n in self.nodes:
                print(' %13i' % n.nodeNum, end=' ')
                for i in range(self.model.nParts):
                    print('%8i' % n.parts[i].compNum, end=' ')
                print('')

            print('\nrMatrices in the nodes:')
            print(' %13s' % 'nodeNum', end=' ')
            for i in range(self.model.nParts):
                print(' %8s' % 'part%i' % i, end=' ')
            print('')
            for n in self.iterNodesNoRoot():
                print(' %13i' % n.nodeNum, end=' ')
                for i in range(self.model.nParts):
                    print('%8i' % n.br.parts[i].rMatrixNum, end=' ')
                print('')

            print('\ngdasrvs in the nodes:')
            print(' %13s' % '', end=' ')
            for i in range(self.model.nParts):
                print(' %8s' % 'part%i' % i, end=' ')
            print('')
            print(' %13s' % 'nGammaCats ->', end=' ')
            for i in range(self.model.nParts):
                print('%8i' % self.model.parts[i].nGammaCat, end=' ')
            print('\n')

            print(' %13s' % 'nodeNum', end=' ')
            for i in range(self.model.nParts):
                print(' %8s' % 'part%i' % i, end=' ')
            print('')
            for n in self.iterNodesNoRoot():
                print(' %13i' % n.nodeNum, end=' ')
                for i in range(self.model.nParts):
                    print('%8i' % n.br.parts[i].gdasrvNum, end=' ')
                print('')

    ###########################
    #
    # Set a NexusSets object, for taxSets ...
    #
    ###########################

[docs]    def setNexusSets(self):
        """Set self.nexusSets from var.nexusSets.

        A deepcopy is made of var.nexusSets, only if it exists.  If
        var.nexusSets does not yet exist, a new blank one is not made
        (cf this method in Alignment class, where it would be
        made).

        Important!  This method depends on a correct taxNames.

        """

        assert self.taxNames, "This method requires correct taxNames, in the correct order."
        gm = ["Tree.setNexusSets()"]
        if not var.nexusSets:
            return
        self.nexusSets = copy.deepcopy(var.nexusSets)
        self.nexusSets.taxNames = self.taxNames
        self.nexusSets.nTax = self.nTax
        self.nexusSets.constant = None
        self.nexusSets.gapped = None
        self.nexusSets.charSets = []
        self.nexusSets.charPartitions = []

        if self.nexusSets.taxSets:
            # print "%s. There are %i taxSets." % (gm[0], len(self.nexusSets.taxSets))
            # Check that no taxSet name is a taxName
            lowSelfTaxNames = [txName.lower()
                               for txName in self.taxNames]
            for ts in self.nexusSets.taxSets:
                if ts.lowName in lowSelfTaxNames:
                    gm.append(
                        "Can't have taxSet names that are the same (case-insensitive) as a tax name")
                    gm.append(
                        "Lowercased taxSet name '%s' is the same as a lowcased taxName." % ts.name)
                    raise P4Error(gm)
            self.nexusSets.lowTaxNames = lowSelfTaxNames

            # If it is standard format,
            # convert triplets to numberTriplets, and then mask
            for ts in self.nexusSets.taxSets:
                if ts.format == 'standard':
                    ts.setNumberTriplets()
                    ts.setMask()
                elif ts.format == 'vector':
                    assert ts.mask
                    if len(ts.mask) != self.nTax:
                        gm.append("taxSet %s" % ts.name)
                        gm.append(
                            "It is vector format, but the length is wrong.")
                        gm.append(
                            "taxSet mask is length %i, but self nTax is %i" % (len(ts.mask), self.nTax))
                        raise P4Error(gm)
                else:
                    gm.append("taxSet %s" % ts.name)
                    gm.append("unknown format %s" % ts.format)
                    raise P4Error(gm)

                # Now set ts.taxNames from the mask
                ts.taxNames = [self.taxNames[i] for i,c in enumerate(ts.mask) if c == '1']


    ###########################
    #
    # Get lists of nodeNums ...
    #
    ###########################

[docs]    def getNodeNumsAbove(self, nodeSpecifier, leavesOnly=0):
        """Gets a list of nodeNums, in postOrder, above nodeSpecifier.

        The node specified is not included.
        """
        x, y = self.getPreAndPostOrderAbove(nodeSpecifier)
        if leavesOnly:
            tOnly = []
            for i in y[:-1]:
                if self.nodes[i].isLeaf:
                    tOnly.append(i)
            return tOnly
        return y[:-1]

[docs]    def getSeqNumsAbove(self, nodeSpecifier):
        """Gets a list of seqNums above nodeSpecifier."""
        x, y = self.getPreAndPostOrderAbove(nodeSpecifier)
        seqNums = []
        for nNum in y[:-1]:
            n = self.nodes[nNum]
            if n.isLeaf:
                seqNums.append(n.seqNum)
        return seqNums

    # def getAllChildrenNums(self, specifier):
    #     """Returns a list of the nodeNums of all children of the specified node
    #     Ambiguous, unused.
    #     """
    #     theNode = self.node(specifier)
    #     if not theNode:
    #         gm = ['Tree.getChildrenNums()']
    #         gm.append('Bad node specifier')
    #         raise P4Error(gm)
    #     ret = []
    #     x, y = self.getPreAndPostOrderAbove(specifier)
    #     for i in y[:-1]:
    #         ret.append(self.nodes[i].nodeNum)
    #     return ret

[docs]    def getAllLeafNames(self, specifier):
        """Returns a list of the leaf names of all children 
        """
        theNode = self.node(specifier)
        if not theNode:
            gm = ['Tree.getChildrenNums()']
            gm.append('Bad node specifier')
            raise P4Error(gm)
        ret = []
        x, y = self.getPreAndPostOrderAbove(specifier)
        for i in y[:-1]:
            if self.nodes[i].isLeaf:
                ret.append(self.nodes[i].name)
        return ret

[docs]    def getChildrenNums(self, specifier):
        """Returns a list of nodeNums of children of the specified node.

        See also Node.getNChildren()"""

        theNode = self.node(specifier)
        if not theNode:
            gm = ['Tree.getChildrenNums()']
            gm.append('Bad node specifier')
            raise P4Error(gm)
        ret = []
        c = theNode.leftChild
        while 1:
            if c:
                ret.append(c.nodeNum)
                c = c.sibling
            else:
                return ret

[docs]    def setPreAndPostOrder(self):
        """Sets or re-sets self.preOrder and self.postOrder lists of node numbers.

        PreOrder starts from the root and goes to the tips; postOrder
        starts from the tips and goes to the root."""

        self.getPreAndPostOrderAboveRoot()
        self.preAndPostOrderAreValid = 1

[docs]    def getPreAndPostOrderAbove(self, nodeSpecifier):
        """Returns 2 lists of node numbers, preOrder and postOrder.

        This uses a stack, not recursion, so it should work for large
        trees without bumping into the recursion limit.  The 2 lists
        are relative to the node specified, and include the node
        specified.  PreOrder starts from theNode and goes to the tips;
        postOrder starts from the tips and goes to theNode."""

        gm = ['Tree.getPreAndPostOrderAbove()']
        theNode = self.node(nodeSpecifier)
        preOrder = []  # nodeNum's
        postOrder = []
        if not theNode.leftChild:
            preOrder.append(theNode.nodeNum)
            postOrder.append(theNode.nodeNum)
            return preOrder, postOrder
        stack = []    # nodes
        stack.append(theNode)
        preOrder.append(theNode.nodeNum)
        while len(stack):

            if stack[-1].leftChild:
                # print 'leftChild: %i' % stack[-1].leftChild.nodeNum
                theNodeNum = stack[-1].leftChild.nodeNum
                stack.append(stack[-1].leftChild)
                preOrder.append(theNodeNum)
            elif stack[-1].sibling:
                # print 'sibling: %i' % stack[-1].sibling.nodeNum
                theNodeNum = stack[-1].sibling.nodeNum
                theSib = stack[-1].sibling
                # print '                 postOrder appending u %i' %
                # stack[-1].nodeNum
                postOrder.append(stack[-1].nodeNum)
                stack.pop()
                stack.append(theSib)
                preOrder.append(theNodeNum)
            else:
                # print '                 postOrder appending  v %i' %
                # stack[-1].nodeNum
                postOrder.append(stack[-1].nodeNum)
                stack.pop()
                if len(stack) == 0:
                    break
                # print '                 postOrder appending  w %i' %
                # stack[-1].nodeNum
                postOrder.append(stack[-1].nodeNum)
                theNode = stack.pop()
                while not theNode.sibling:
                    if len(stack) == 0:
                        break
                    # print '                 postOrder appending x %i' %
                    # stack[-1].nodeNum
                    postOrder.append(stack[-1].nodeNum)
                    theNode = stack.pop()
                if len(stack) == 0:
                    break
                if theNode.sibling:
                    stack.append(theNode.sibling)
                    preOrder.append(theNode.sibling.nodeNum)
                else:
                    gm.append('Problemo.')
                    gm.append('xxx got theNode %s' % theNode.nodeNum)
                    raise P4Error(gm)

        return preOrder, postOrder

[docs]    def getPreAndPostOrderAboveRoot(self):
        """Sets self.preOrder and self.postOrder.

        This uses a stack, not recursion, so it should work for large
        trees without bumping into the recursion limit.  PreOrder
        starts from the root and goes to the tips; postOrder starts
        from the tips and goes to the root."""

        gm = ['Tree.getPreAndPostOrderAboveRoot()']
        theNode = self.root
        preOrdIndx = 0
        postOrdIndx = 0
        if self.preOrder is None or self.postOrder is None or len(self.preOrder) != len(self.nodes) or len(self.postOrder) != len(self.nodes):
            self.preOrder = numpy.array(
                [var.NO_ORDER] * len(self.nodes), numpy.int32)
            self.postOrder = numpy.array(
                [var.NO_ORDER] * len(self.nodes), numpy.int32)

        # print "xx self.preOrder=%s" % self.preOrder
        if not theNode.leftChild:
            self.preOrder[preOrdIndx] = theNode.nodeNum
            preOrdIndx += 1
            self.postOrder[postOrdIndx] = theNode.nodeNum
            postOrdIndx += 1
        else:
            stack = []    # nodes
            stack.append(theNode)
            self.preOrder[preOrdIndx] = theNode.nodeNum
            preOrdIndx += 1
            while len(stack):

                if stack[-1].leftChild:
                    # print 'leftChild: %i  (%s)' %
                    # (stack[-1].leftChild.nodeNum, [n.nodeNum for n in stack])
                    theNodeNum = stack[-1].leftChild.nodeNum
                    stack.append(stack[-1].leftChild)
                    self.preOrder[preOrdIndx] = theNodeNum
                    preOrdIndx += 1
                elif stack[-1].sibling:
                    # print 'sibling: %i  (%s)' % (stack[-1].sibling.nodeNum,
                    # [n.nodeNum for n in stack])
                    theNodeNum = stack[-1].sibling.nodeNum
                    theSib = stack[-1].sibling
                    # print '                 postOrder appending u %i' %
                    # stack[-1].nodeNum
                    self.postOrder[postOrdIndx] = stack[-1].nodeNum
                    postOrdIndx += 1
                    stack.pop()
                    stack.append(theSib)
                    try:
                        self.preOrder[preOrdIndx] = theNodeNum
                    except IndexError:
                        gm.append("preOrdIndx=%s, theNodeNum=%i" %
                                  (preOrdIndx, theNodeNum))
                        gm.append("preOrder = %s" % self.preOrder)
                        raise P4Error(gm)
                    preOrdIndx += 1
                else:
                    # print '                 postOrder appending  v %i' %
                    # stack[-1].nodeNum
                    self.postOrder[postOrdIndx] = stack[-1].nodeNum
                    postOrdIndx += 1
                    stack.pop()
                    if len(stack) == 0:
                        break
                    # print '                 postOrder appending  w %i' %
                    # stack[-1].nodeNum
                    self.postOrder[postOrdIndx] = stack[-1].nodeNum
                    postOrdIndx += 1
                    theNode = stack.pop()
                    while not theNode.sibling:
                        if len(stack) == 0:
                            break
                        # print '                 postOrder appending x %i' %
                        # stack[-1].nodeNum
                        self.postOrder[postOrdIndx] = stack[-1].nodeNum
                        postOrdIndx += 1
                        theNode = stack.pop()
                    if len(stack) == 0:
                        break
                    if theNode.sibling:
                        stack.append(theNode.sibling)
                        # print "self.preOrder = %s, preOrdIndx=%i" %
                        # (self.preOrder, preOrdIndx)
                        self.preOrder[preOrdIndx] = theNode.sibling.nodeNum
                        preOrdIndx += 1
                    else:
                        gm.append('Problemo.')
                        gm.append('xxx got theNode %s' % theNode.nodeNum)
                        raise P4Error(gm)
        if 1:
            assert preOrdIndx == postOrdIndx
            # print "a preOrdIndx = %i, len(self.nodes) = %i" % (preOrdIndx,
            # len(self.nodes))
            if preOrdIndx != len(self.nodes):
                pOI = preOrdIndx
                for i in range(preOrdIndx, len(self.nodes)):
                    self.preOrder[i] = var.NO_ORDER
                    self.postOrder[i] = var.NO_ORDER
                    preOrdIndx += 1
                    postOrdIndx += 1
            # print "b preOrdIndx = %i, len(self.nodes) = %i" % (preOrdIndx,
            # len(self.nodes))
        assert preOrdIndx == len(self.nodes) and postOrdIndx == len(self.nodes)

[docs]    def iterPreOrder(self):
        """Node generator.  Assumes preAndPostOrderAreValid."""

        for i in self.preOrder:
            j = int(i)           # this speeds things up a lot!  Two-fold!
            if j != var.NO_ORDER:
                # yield self.nodes[int(i)]
                yield self.nodes[j]

[docs]    def iterPostOrder(self):
        """Node generator.  Assumes preAndPostOrderAreValid."""

        for i in self.postOrder:
            j = int(i)
            if j != var.NO_ORDER:
                yield self.nodes[j]

[docs]    def iterNodes(self):
        """Node generator, in preOrder.  Assumes preAndPostOrderAreValid."""

        for i in self.preOrder:
            j = int(i)
            if j != var.NO_ORDER:
                yield self.nodes[j]

[docs]    def iterNodesNoRoot(self):
        """Node generator, skipping the root. PreOrder."""

        for i in self.preOrder:
            j = int(i)
            if j != var.NO_ORDER:
                n = self.nodes[j]
                if n != self.root:
                    yield n

[docs]    def iterLeavesNoRoot(self):
        """Leaf node generator, skipping the root.  PreOrder."""

        for i in self.preOrder:
            j = int(i)
            if j != var.NO_ORDER:
                n = self.nodes[j]
                if n != self.root and n.isLeaf:
                    yield n

[docs]    def iterLeavesPreOrder(self):

        for i in self.preOrder:
            j = int(i)
            if j != var.NO_ORDER:
                n = self.nodes[j]
                if n.isLeaf:
                    yield n

[docs]    def iterLeavesPostOrder(self):

        for i in self.postOrder:
            j = int(i)
            if j != var.NO_ORDER:
                n = self.nodes[j]
                if n.isLeaf:
                    yield n

[docs]    def iterInternalsNoRootPreOrder(self):
        """Internal post order node generator, skipping the root.  Assumes preAndPostOrderAreValid."""

        for i in self.preOrder:
            j = int(i)
            if j != var.NO_ORDER:
                n = self.nodes[j]
                if n != self.root and not n.isLeaf:
                    yield n

[docs]    def iterInternalsNoRootPostOrder(self):
        """Internal post order node generator, skipping the root.  Assumes preAndPostOrderAreValid."""

        for i in self.postOrder:
            j = int(i)
            if j != var.NO_ORDER:
                n = self.nodes[j]
                if n != self.root and not n.isLeaf:
                    yield n

[docs]    def iterInternalsPostOrder(self):
        """Internal post order node generator.  Assumes preAndPostOrderAreValid."""

        for i in self.postOrder:
            j = int(i)
            if j != var.NO_ORDER:
                n = self.nodes[j]
                if not n.isLeaf:
                    yield n

[docs]    def iterInternalsNoRoot(self):
        """Internal node generator, skipping the root.  PreOrder"""

        for i in self.preOrder:
            j = int(i)
            if j != var.NO_ORDER:
                n = self.nodes[j]
                if n != self.root and not n.isLeaf:
                    yield n

[docs]    def iterInternals(self):
        """Internal node generator. PreOrder.  Including the root, if it is internal."""

        for i in self.preOrder:
            j = int(i)
            if j != var.NO_ORDER:
                n = self.nodes[j]
                if not n.isLeaf:
                    yield n

    ################################################
    #

[docs]    def stripBrLens(self):
        """Sets all node.br.len's to 0.1, the default in p4.

        Then, if you were to write it out in Nexus or Newick format,
        no branch lengths would be printed.
        """
        for n in self.iterNodesNoRoot():
            n.br.len = 0.1

[docs]    def getLen(self):
        """Return the sum of all br.len's."""
        if self.cTree:
            # About 0.01 msec for a tree with 1000 leaves
            return pf.p4_getTreeLen(self.cTree)
        else:
            # About 2 msec for a tree with 1000 leaves.
            total = 0.0
            for n in self.iterNodesNoRoot():
                total += n.br.len
            return total

    # def lenInternals(self):
    #     """Return the sum of all internal br.len's."""
    #     total = 0.0
    #     for n in self.iterInternalsNoRoot():
    #         total += n.br.len
    #     return total

    # def stemminess(self):
    #     """Ratio of internal branches to overall tree length.

    #     Also called 'treeness'.  Via Phillips and Penny, MPE 2003, but
    #     they cite Lanyon 1988."""

    #     total = self.len()
    #     internals = self.lenInternals()
    #     return internals/total

    def _makeRCSplitKeys(self, splitList=None):
        """Make long integer-valued split keys.
        This is dependent on that the leaf bitkeys are already set, ie this method should only 
        be used by the reduced consensus supertree class
        """
        if not self.preAndPostOrderAreValid:
            self.setPreAndPostOrder()

        # self.draw()

#        allOnes = 2**(self.nTax) - 1
        # print 'nTax = %i, allOnes = %i' % (self.nTax, allOnes)

#        for n in self.iterInternalsPostOrder():
        for n in self.iterInternalsNoRootPostOrder():
            #            if n != self.root:
                # print 'doing node %s' % n.nodeNum
            #            if not n.isLeaf():
            if n.leftChild:

                childrenNums = self.getChildrenNums(n)
#                    x = childrenNums[0]
                x = self.nodes[childrenNums[0]].br.rc
                for i in childrenNums[1:]:
                    x = x | self.nodes[i].br.rc
#                        x = x | i
                n.br.rc = x
                if splitList != None:
                    splitList.append([x, 0])
                n.br.rcList = [n.br.rc]

[docs]    def makeSplitKeys(self, makeNodeForSplitKeyDict=True):
        """Make long integer-valued split keys.

        This needs to have self.taxNames set.

        We make 2 kinds of split keys-- rawSplitKeys and splitKeys.
        Both are attributes of node.br, so we have eg node.br.splitKey.

        Raw split keys for terminal nodes are 2**n, where n is the index
        of the taxon name.  Eg for the first taxon, the rawSplitKey will
        be 1, for the 3rd taxon the rawSplitKey will be 4.

        RawSplitKeys for internal nodes are the rawSplitKey's for the
        children, bitwise OR'ed together.

        SplitKeys, cf rawSplitKeys, are in 'standard form', where the
        numbers are even, ie do not contain the 1-bit.  Having it in
        standard form means that you can compare splits among trees.
        If the rawSplitKey is even, then the splitKey is simply that,
        unchanged.  If, however, the rawSplitKey is odd, then the
        splitKey is the rawSplitKey bit-flipped.  For example, if
        there are 5 taxa, and one of the rawSplitKeys is 9 (odd), we
        can calculate the splitKey by bit-flipping, as::

            01001 =  9   rawSplitKey
            10110 = 22   splitKey

        (Bit-flipping is done by exclusive-or'ing (xor) with 11111.)

        The splitKey is readily converted to a splitString for
        display, as 22 becomes '.**.*' (note the '1' bit is now on the
        left).  It is conventional that the first taxon, on the left,
        is always a dot.  (I don't know where the convention comes
        from.)

        The root has no rawSplitKey or splitKey.

        For example, the tree::

                +-------2:B (rawSplitKey = 2)
            +---1
            |   +---------3:C  (rawSplitKey = 4)
            |
            0-------------4:E  (rawSplitKey = 16)
            |
            |    +-----6:A  (rawSplitKey = 1)
            +----5
                 +-----------7:D  (rawSplitKey = 8)

        has 2 internal splits, on nodes 1 and 5.

        ::

            Node      n.br.rawSplitKey     n.br.splitKey
            1             6                    6
            5             9                   22

        There should be no duplicated rawSplitKeys, but if the tree
        has a bifurcating root then there will be a duped splitKey.

        This method will fail for trees with internal nodes that have
        only one child, because that will make duplicated splits.

        If arg *makeNodeForSplitKeyDict* is set, then it will make a
        dictionary ``nodeForSplitKeyDict`` where the keys are the
        splitKeys and the values are the corresponding nodes.

        """

        gm = ['Tree.makeSplitKeys()']
        #raise P4Error(gm)
        if not self.taxNames:
            gm.append('No taxNames.')
            raise P4Error(gm)

        if not self.preAndPostOrderAreValid:
            self.setPreAndPostOrder()

        # self.draw()
        if makeNodeForSplitKeyDict:
            self.nodeForSplitKeyDict = {}

        allOnes = 2 ** (self.nTax) - 1
        # print 'nTax = %i, allOnes = %i' % (self.nTax, allOnes)

        for n in self.iterPostOrder():
            if n != self.root:
                # print 'doing node %s' % n.nodeNum

                if not n.leftChild:
                    try:
                        n.br.rawSplitKey = 1 << self.taxNames.index(
                            n.name)  # "<<" is left-shift
                    except ValueError:
                        gm.append('node.name %s' % n.name)
                        gm.append('is not in taxNames %s' % self.taxNames)
                        raise P4Error(gm)
                    # n.br.rawSplitKey = 1 << self.taxNames.index(n.name)  #
                    # "<<" is left-shift

                    if n.br.rawSplitKey == 1:
                        n.br.splitKey = allOnes - 1
                    else:
                        n.br.splitKey = n.br.rawSplitKey

                    # print 'upward leaf   node %s, rawSplitKey %s, splitKey
                    # %s' % (n.nodeNum, n.br.rawSplitKey, n.br.splitKey)
                else:
                    childrenNums = self.getChildrenNums(n)
                    if len(childrenNums) == 1:
                        gm.append(
                            'Internal node has only one child.  That will make a duped split.')
                        raise P4Error(gm)
                    x = self.nodes[childrenNums[0]].br.rawSplitKey
                    for i in childrenNums[1:]:
                        y = self.nodes[i].br.rawSplitKey
                        x = x | y  # '|' is bitwise "OR".
                    n.br.rawSplitKey = x

                    # Make node.br.splitKey's in "standard form", ie
                    # without the first taxon, ie without a 1.  To do that
                    # we use the '&' operator, to bitwise "and" with 1.
                    # Ie "Does rawSplitKey contain a 1?" or "Is rawSplitKey
                    # odd?"
                    if 1 & n.br.rawSplitKey:
                        # "^" is xor, a bit-flipper.
                        n.br.splitKey = allOnes ^ n.br.rawSplitKey
                    else:
                        n.br.splitKey = n.br.rawSplitKey
                    # print 'intern node %s, rawSplitKey %s, splitKey %s' %
                    # (n.nodeNum, n.br.rawSplitKey, n.br.splitKey)
                if makeNodeForSplitKeyDict:
                    self.nodeForSplitKeyDict[n.br.splitKey] = n

        if 0:
            # There should be no duped rawSplitKeys
            theKeys = []
            for n in self.iterNodesNoRoot():
                theKeys.append(n.br.rawSplitKey)
            for k in theKeys:
                if theKeys.count(k) > 1:
                    gm.append('Duped rawSplitKey %i.' % k)
                    for n in self.nodes:
                        if n != self.root:
                            print('%7s     %4s       %4s' % (n.nodeNum, n.br.rawSplitKey, n.br.splitKey))
                    raise P4Error(gm)

            # Any duped splitKeys?  There will be if the tree is bi-Rooted.
            if 0:
                theKeys = []
                for n in self.iterNodesNoRoot():
                    theKeys.append(n.br.splitKey)
                for k in theKeys:
                    if theKeys.count(k) > 1:
                        gm.append('Duped splitKey %i.' % k)
                        for n in self.iterNodesNoRoot():
                            print('%7s     %4s       %4s' % (n.nodeNum, n.br.rawSplitKey, n.br.splitKey))
                        raise P4Error(gm)

        if 0:
            print(gm[0])
            print(self.taxNames)
            print('nodeNum  rawSplitKey  splitKey')
            for n in self.iterNodesNoRoot():
                print('%7s     %4s       %4s        %s' % (
                    n.nodeNum, n.br.rawSplitKey, n.br.splitKey, p4.func.getSplitStringFromKey(n.br.splitKey, self.nTax)))

[docs]    def recalculateSplitKeysOfNodeFromChildren(self, aNode, allOnes):
        children = [n for n in aNode.iterChildren()]
        x = children[0].br.rawSplitKey
        for n in children[1:]:
            x = x | n.br.rawSplitKey   # '|' is bitwise "OR".
        aNode.br.rawSplitKey = x
        # Ie "Does rawSplitKey contain a 1?" or "Is rawSplitKey odd?"
        if 1 & aNode.br.rawSplitKey:
            # "^" is xor, a bit-flipper.
            aNode.br.splitKey = allOnes ^ aNode.br.rawSplitKey
        else:
            aNode.br.splitKey = aNode.br.rawSplitKey

[docs]    def checkSplitKeys(self, useOldName=False, glitch=True, verbose=True):
        gm = ['Tree.checkSplitKeys()']

        allOnes = 2 ** (self.nTax) - 1
        # print 'nTax = %i, allOnes = %i' % (self.nTax, allOnes)

        isBad = False
        for n in self.iterPostOrder():
            if n != self.root:
                # print 'doing node %s' % n.nodeNum

                if not n.leftChild:
                    # A long int, eg 1, has no upper limit on its value
                    try:
                        if useOldName:
                            rawSplitKey = 1 << self.taxNames.index(
                                n.oldName)  # "<<" is left-shift
                        else:
                            rawSplitKey = 1 << self.taxNames.index(
                                n.name)  # "<<" is left-shift
                    except ValueError:
                        if useOldName:
                            gm.append('node.name %s' % n.oldName)
                        else:
                            gm.append('node.name %s' % n.name)
                        gm.append('is not in taxNames %s' % self.taxNames)
                        raise P4Error(gm)
                    # n.br.rawSplitKey = 1 << self.taxNames.index(n.name)  #
                    # "<<" is left-shift

                    if rawSplitKey == 1:
                        splitKey = allOnes - 1
                    else:
                        splitKey = rawSplitKey

                    # print 'upward leaf   node %s, rawSplitKey %s, splitKey
                    # %s' % (n.nodeNum, n.br.rawSplitKey, n.br.splitKey)
                else:
                    childrenNums = self.getChildrenNums(n)
                    if len(childrenNums) == 1:
                        gm.append(
                            'Internal node has only one child.  That will make a duped split.')
                        raise P4Error(gm)
                    x = self.nodes[childrenNums[0]].br.rawSplitKey
                    for i in childrenNums[1:]:
                        y = self.nodes[i].br.rawSplitKey
                        x = x | y  # '|' is bitwise "OR".
                    rawSplitKey = x

                    # Make node.br.splitKey's in "standard form", ie
                    # without the first taxon, ie without a 1.  To do that
                    # we use the '&' operator, to bitwise "and" with 1.
                    # Ie "Does rawSplitKey contain a 1?" or "Is rawSplitKey
                    # odd?"
                    if 1 & rawSplitKey:
                        # "^" is xor, a bit-flipper.
                        splitKey = allOnes ^ rawSplitKey
                    else:
                        splitKey = rawSplitKey
                    # print 'intern node %s, rawSplitKey %s, splitKey %s' %
                    # (n.nodeNum, n.br.rawSplitKey, n.br.splitKey)
                if n.br.rawSplitKey != rawSplitKey:
                    print("checkSplitKeys node %2i rawSplitKey: existing %s, calculated %s" % (n.nodeNum, n.br.rawSplitKey, rawSplitKey))
                    isBad = True
                if n.br.splitKey != splitKey:
                    print("checkSplitKeys node %2i splitKey: existing %s, calculated %s" % (n.nodeNum, n.br.splitKey, splitKey))
                    isBad = True
        if glitch and isBad:
            raise P4Error(gm)
        if verbose and not isBad:
            print("checkSplitKeys().  ok")

[docs]    def taxSetIsASplit(self, taxSetName):
        """Asks whether a nexus taxset is a split in the tree.

        Args:
            taxSetName (str): The name of the taxset. Case does not matter.

        Returns:
            Node: the node in self if the taxset is a split, or else None

        """

        gm = ["Tree.taxSetIsASplit(%s)" % taxSetName]
        assert self.nexusSets
        assert self.taxNames
        assert self.nexusSets.taxSets
        lowArgTaxSetName = taxSetName.lower()
        theTS = None
        for ts in self.nexusSets.taxSets:
            if ts.lowName == lowArgTaxSetName:
                theTS = ts
                break
        assert theTS, "Could not find the taxSet named %s" % taxSetName
        # theTS.dump()
        assert theTS.mask

        # Make sure that self splitKey has been set. Check the first internal node.
        needsDoing = False
        for n in self.iterInternalsNoRoot():
            if not n.br.splitKey:
                needsDoing = True
            break
        if needsDoing:
            self.makeSplitKeys()

        rawSplitKey = 0
        for i in range(len(theTS.mask)):
            # print i, theTS.mask[i]
            if theTS.mask[i] == '1':
                rawSplitKey += (1 << i)
        # Ie "Does rawSplitKey contain a 1?" or "Is rawSplitKey odd?"
        if 1 & rawSplitKey:
            allOnes = 2 ** (self.nTax) - 1
            splitKey = allOnes ^ rawSplitKey  # "^" is xor, a bit-flipper.
        else:
            splitKey = rawSplitKey
        # print "got splitKey %s" % splitKey

        for n in self.nodes:
            if n.br and not n.isLeaf:
                assert n.br.splitKey, "Needs node.br.splitKey on all internal nodes."
                # print "  %2i  %s  %s" % (n.nodeNum, n.br.splitKey, splitKey)
                if n.br.splitKey == splitKey:
                    # self.draw()
                    # return n.nodeNum
                    return n
        # Was -1 before, when n.nodeNum was returned for hits.  Now a node is
        # returned.
        return None

[docs]    def checkTaxNames(self):
        """Check that all taxNames are in the tree, and vice versa."""

        # If var.allowTreesWithDifferingTaxonSets is set to True we will not check
        # the taxnames. This is primarily used to read trees for supertree and
        # leafstability calcuations.

        # Peter comments that Tobias added this, but it is not needed,
        # and messes other things up -- so comment out until it is
        # sorted.
        # if var.allowTreesWithDifferingTaxonSets:
        #     return

        # self.taxNames is a property, so when it is set, it calls this method

        if self.name:
            gm = ['Tree.checkTaxNames()   tree %s' % self.name]
        else:
            gm = ['Tree.checkTaxNames()']
        if self.fName:
            gm.append("From file '%s' " % self.fName)

        if not self.taxNames:
            gm.append('No taxNames.')
            raise P4Error(gm)

        tax = []
        for n in self.iterNodes():
            if n.isLeaf and n.name:
                tax.append(n.name)
            # This next line should not be needed, as root leaves should be
            # leaves.
            # terminal root that has a taxName
            elif n == self.root and n.name and n.getNChildren() < 2:
                tax.append(n.name)

        # print 'tax from tree = %s' % tax
        # print 'self.taxNames = %s' % self.taxNames

        # Check for same number of taxa
        if len(tax) != len(self.taxNames):
            # self.draw()
            # self.dump(node=1)
            gm.append('Mismatch.  Length of self.taxNames is wrong.')
            gm.append('The tree has %i leaves with names, but len(self.taxNames) = %i' % (
                len(tax), len(self.taxNames)))
            gm.append('leaves on the tree = %s' % tax)
            gm.append('self.taxNames = %s' % self.taxNames)
            gm.append('symmetric_difference is %s' %
                      set(tax).symmetric_difference(set(self.taxNames)))
            # print '    Setting invalid taxNames to an empty list.'
            #self.taxNames = []
            raise P4Error(gm, 'tree_badTaxNamesLength')

        # Check for mis-matched taxNames
        isBad = 0
        taxSet = set(tax)
        selfTaxNamesSet = set(self.taxNames)

        s = taxSet.difference(selfTaxNamesSet)
        if len(s):
            isBad = 1
            print(gm[0])
            print('TaxName mismatch between the tree and self.taxNames.')
            print('These taxa are found in the tree but not in self.taxNames:')
            print(s)
        s = selfTaxNamesSet.difference(taxSet)
        if len(s):
            isBad = 1
            print(gm[0])
            print('TaxName mismatch between the tree and self.taxNames.')
            print('These taxa are found in the self.taxNames but not in the tree:')
            print(s)

        if isBad:
            raise P4Error(gm, 'tree_taxNamesMisMatch')

    ################################################
    # Copy and Verify

[docs]    def copyToTree(self, otherTree):

        gm = ['Tree.copyToTree()']

        if len(self.nodes) != len(otherTree.nodes):
            gm.append('Different number of nodes.')
            raise P4Error(gm)

        # node relations (parent, child, sib)
        for nNum in range(len(self.nodes)):
            selfNode = self.nodes[nNum]
            otherNode = otherTree.nodes[nNum]

            # parent
            if selfNode.parent:
                otherNode.parent = otherTree.nodes[selfNode.parent.nodeNum]
            else:
                # print "otherNode.parent", otherNode.parent
                otherNode.parent = None

            # leftChild
            if selfNode.leftChild:
                otherNode.leftChild = otherTree.nodes[
                    selfNode.leftChild.nodeNum]
            else:
                # print "otherNode.leftChild", otherNode.leftChild
                otherNode.leftChild = None

            # sibling
            if selfNode.sibling:
                otherNode.sibling = otherTree.nodes[selfNode.sibling.nodeNum]
            else:
                # print "otherNode.sibling", otherNode.sibling
                otherNode.sibling = None

        # root
        otherTree.root.br = otherTree.nodes[self.root.nodeNum].br
        otherTree.nodes[self.root.nodeNum].br = None
        otherTree.root = otherTree.nodes[self.root.nodeNum]

        # brLens and splitKeys
        for nNum in range(len(self.nodes)):
            if self.nodes[nNum] != self.root:
                otherTree.nodes[nNum].br.len = self.nodes[nNum].br.len
                otherTree.nodes[nNum].br.lenChanged = self.nodes[
                    nNum].br.lenChanged
                #otherTree.nodes[nNum].br.flag = self.nodes[nNum].flag
                otherTree.nodes[nNum].br.splitKey = self.nodes[
                    nNum].br.splitKey
                otherTree.nodes[nNum].br.rawSplitKey = self.nodes[
                    nNum].br.rawSplitKey

        # model usage numbers
        if self.model:
            for nNum in range(len(self.nodes)):
                selfNode = self.nodes[nNum]
                otherNode = otherTree.nodes[nNum]
                if selfNode.parts and otherNode.parts:
                    for pNum in range(self.model.nParts):
                        otherNode.parts[pNum].compNum = selfNode.parts[pNum].compNum
                        if selfNode != self.root:
                            if selfNode.br.parts and otherNode.br.parts:
                                otherNode.br.parts[pNum].rMatrixNum = selfNode.br.parts[pNum].rMatrixNum
                                otherNode.br.parts[pNum].gdasrvNum = selfNode.br.parts[pNum].gdasrvNum

        # pre- and postOrder
        for i in range(len(self.preOrder)):
            otherTree.preOrder[i] = self.preOrder[i]
            otherTree.postOrder[i] = self.postOrder[i]

        # partLikes
        if self.model:
            for pNum in range(self.model.nParts):
                otherTree.partLikes[pNum] = self.partLikes[pNum]

        otherTree._nInternalNodes = self._nInternalNodes

[docs]    def verifyIdentityWith(self, otherTree, doSplitKeys=False, doMore=False):
        """For MCMC debugging.  Verifies that two trees are identical."""

        complaintHead = '\nTree.verifyIdentityWith()'  # keep

        if len(self.nodes) != len(otherTree.nodes):
            print(complaintHead)
            print('    Different number of nodes.')
            return var.DIFFERENT

        # check node relations (parent, child, sib)
        isBad = 0
        for nNum in range(len(self.nodes)):
            selfNode = self.nodes[nNum]
            otherNode = otherTree.nodes[nNum]

            # parent
            if selfNode.parent:
                if otherNode.parent:
                    if otherNode.parent.nodeNum != selfNode.parent.nodeNum:
                        isBad = 1
                else:
                    isBad = 1
            else:
                if otherNode.parent:
                    isBad = 1

            # leftChild
            if selfNode.leftChild:
                if otherNode.leftChild:
                    if otherNode.leftChild.nodeNum != selfNode.leftChild.nodeNum:
                        isBad = 1
                else:
                    isBad = 1
            else:
                if otherNode.leftChild:
                    isBad = 1

            # sibling
            if selfNode.sibling:
                if otherNode.sibling:
                    if otherNode.sibling.nodeNum != selfNode.sibling.nodeNum:
                        isBad = 1
                else:
                    isBad = 1
            else:
                if otherNode.sibling:
                    isBad = 1

            if isBad:
                print(complaintHead)
                print('    Node %i, relations differ.' % nNum)
                self.write()
                otherTree.write()
                return var.DIFFERENT

        if self.root.nodeNum != otherTree.root.nodeNum:
            print(complaintHead)
            print('    Roots differ.')
            return var.DIFFERENT

        # brLens, lenChanged, and node.flag. and splitKeys
        for nNum in range(len(self.nodes)):
            if self.nodes[nNum] != self.root:
                # if self.nodes[nNum].br.len != otherTree.nodes[nNum].br.len:
                if math.fabs(self.nodes[nNum].br.len - otherTree.nodes[nNum].br.len) > 1.e-8:
                    print(complaintHead)
                    print('    BrLens differ.')
                    return var.DIFFERENT
                if self.nodes[nNum].br.lenChanged != otherTree.nodes[nNum].br.lenChanged:
                    print(complaintHead)
                    print('    br.lenChanged differs.')
                    return var.DIFFERENT
                if self.nodes[nNum].flag != otherTree.nodes[nNum].flag:
                    print(complaintHead)
                    print('    flag differs, nodeNum %i.  %s vs %s' % (nNum, self.nodes[nNum].flag, otherTree.nodes[nNum].flag))
                    return var.DIFFERENT
                if doSplitKeys:
                    if self.nodes[nNum].br.splitKey != otherTree.nodes[nNum].br.splitKey:
                        print(complaintHead)
                        print('    SplitKeys differ.')
                        return var.DIFFERENT
                    if self.nodes[nNum].br.rawSplitKey != otherTree.nodes[nNum].br.rawSplitKey:
                        print(complaintHead)
                        print('    rawSplitKeys differ.')
                        return var.DIFFERENT

        if self.model:
            # model usage numbers
            isBad = 0
            for pNum in range(self.model.nParts):
                for nNum in range(len(self.nodes)):
                    selfNode = self.nodes[nNum]
                    otherNode = otherTree.nodes[nNum]
                    if selfNode.parts and otherNode.parts:
                        if selfNode.parts[pNum].compNum != otherNode.parts[pNum].compNum:
                            isBad = 1
                        if self.nodes[nNum] != self.root:
                            if selfNode.br.parts and otherNode.br.parts:
                                if selfNode.br.parts[pNum].rMatrixNum != otherNode.br.parts[pNum].rMatrixNum:
                                    isBad = 1
                                elif selfNode.br.parts[pNum].gdasrvNum != otherNode.br.parts[pNum].gdasrvNum:
                                    isBad = 1

                    if isBad:
                        print(complaintHead)
                        print('    Node %i, model usage info does not match.' % nNum)
                        return var.DIFFERENT

            # pre- and postOrder
            isBad = 0
            for i in range(len(self.preOrder)):
                if self.preOrder[i] != otherTree.preOrder[i]:
                    isBad = 1
                    break
                elif self.postOrder[i] != otherTree.postOrder[i]:
                    isBad = 1
                    break
            if isBad:
                print(complaintHead)
                print('    Pre- or postOrder do not match.')
                return var.DIFFERENT

            if self.nInternalNodes != otherTree.nInternalNodes:
                print(complaintHead)
                print('    nInternalNodes differ.')
                return var.DIFFERENT

            # partLikes
            for pNum in range(self.model.nParts):
                # if otherTree.partLikes[pNum] != self.partLikes[pNum]:
                myDiff = math.fabs(otherTree.partLikes[pNum] - self.partLikes[pNum])
                if myDiff > 1.e-5:
                    print(complaintHead)
                    print("    partLikes differ by %.8f (%g).  (%.8f, (%g) %.8f (%g)" % (
                        myDiff, myDiff,
                        otherTree.partLikes[pNum], otherTree.partLikes[pNum], self.partLikes[pNum], self.partLikes[pNum]))
                    return var.DIFFERENT

        if doMore:  # some more
            for nNum in range(len(self.nodes)):
                selfNode = self.nodes[nNum]
                otherNode = otherTree.nodes[nNum]
                if selfNode.nodeNum != otherNode.nodeNum:
                    print(complaintHead)
                    print('    nodeNum differs')
                    return var.DIFFERENT
                if selfNode.seqNum != otherNode.seqNum:
                    print(complaintHead)
                    print('    seqNum differs')
                    return var.DIFFERENT
                if selfNode.name != otherNode.name:
                    print(complaintHead)
                    print('    name differs')
                    return var.DIFFERENT
                if selfNode.isLeaf != otherNode.isLeaf:
                    print(complaintHead)
                    print('    isLeaf differs')
                    return var.DIFFERENT

        return var.SAME

    ############################################


[docs]    def isFullyBifurcating(self, verbose=False, biRoot=False):
        """Returns True if the tree is fully bifurcating.  Else False. 

        If arg biRoot is True, then it is required that the tree be
        biRoot'ed (ie have a bifurcating root)

        If arg biRoot is False, then biRoot'ed trees are not allowed,
        but trees rooted on a leaf are allowed.

        """

        rootNChildren = self.root.getNChildren()
        if rootNChildren > 3:
            if verbose:
                print("isFullyBifurcating() returning False, due to root with %i children." % rootNChildren)
            return False
        if not biRoot:
            if rootNChildren == 2:
                if verbose:
                    print("isFullyBifurcating() returning False, due to root with %i children." % rootNChildren)
                return False
            
            elif rootNChildren == 1 or rootNChildren == 3:   # rooting on a leaf is OK
                if rootNChildren == 1:
                    if not self.root.isLeaf:
                        if verbose:
                            print("isFullyBifurcating()", end=" ")
                            print("returning False, because the single-child root is not a leaf.")
                        return False

                for n in self.iterInternalsNoRoot():
                    if n.leftChild and n.leftChild.sibling:
                        if n.leftChild.sibling.sibling:
                            if verbose:
                                print("isFullyBifurcating()", end=" ")
                                print("returning False, due to node %i having 3 or more children." % n.nodeNum)
                            return False
                    else:
                        if verbose:
                            print("isFullyBifurcating()", end=" ")
                            print("returning False, due to non-leaf node %i having 1 or fewer children." % n.nodeNum)
                        return False
                return True
            else:
                raise P4Error("Tree.isFullyBifurcating() --- this should not happen; fix me.")

        if biRoot:
            # root on a leaf not allowed on a biRoot tree, as the root is explicitly not a leaf.
            if rootNChildren != 2:    
                if verbose:
                    print("isFullyBifurcating(biRoot=True)", end=" ")
                    print("returning False, due to root with %i children." % rootNChildren)
                return False

            for n in self.iterInternalsNoRoot():
                if n.leftChild and n.leftChild.sibling:
                    if n.leftChild.sibling.sibling:
                        if verbose:
                            print("isFullyBifurcating(biRoot=True)", end=" ")
                            print("returning False, due to node %i having 3 or more children." % n.nodeNum)
                        return False
                else:
                    if verbose:
                        print("isFullyBifurcating(biRoot=True)", end=" ")
                        print("returning False, due to non-leaf node %i having 1 or fewer children." % n.nodeNum)
                    return False
            return True

    # These next two are for the eTBR implementation that I got from Jason
    # Evans' Crux.  Thanks Jason!
[docs]    def getDegree(self, nodeSpecifier):
        n = self.node(nodeSpecifier)
        if n.isLeaf:
            if n.parent:
                return 1
            else:
                assert n == self.root
                if n.leftChild:   # a tree-on-a-stick
                    return 1
                else:             # a single isolated node
                    return 0
        else:
            #assert n.leftChild
            deg = 1  # the leftChild
            if n.parent:
                deg += 1
            s = n.leftChild.sibling
            while s:
                deg += 1
                s = s.sibling
            return deg

[docs]    def nextNode(self, spokeSpecifier, hubSpecifier):
        """Get next node cycling around a hub node.

        A bit of a hack to make a p4 Node behave sorta like a
        Felsenstein node.  Imagine cycling around the branches
        emanating from a node like spokes on a hub, starting from
        anywhere, with no end.

        The hub node would usually be the parent of the spoke, or the
        spoke would be the hub itself.  Usually
        self.nextNode(spoke,hub) delivers spoke.sibling.  What happens
        when the siblings run out is that
        self.nextNode(rightmostSibling, hub) delivers hub itself, and
        of course its branch (spoke) points toward the hub.parent.
        (Unless hub is the root, of course, in which case
        self.nextNode(rightmostSibling, hub) delivers hub.leftChild.)
        In the usual case of the hub not being the root, the next node
        to be delivered by nextNode(spokeIsHub, hub) is usually the
        leftChild of the hub.  Round and round, clockwise.

        """

        spoke = self.node(spokeSpecifier)
        hub = self.node(hubSpecifier)

        if spoke.parent == hub or hub == spoke:
            if spoke == hub:
                if spoke.leftChild:
                    return spoke.leftChild
                else:
                    return hub
            else:
                if spoke.sibling:
                    return spoke.sibling
                else:
                    if hub.parent:
                        return hub
                    else:
                        return hub.leftChild
        else:
            print("*=" * 25)
            self.draw()
            gm = ["Tree.nextNode() spoke=%i, hub=%i" %
                  (spoke.nodeNum, hub.nodeNum)]
            gm.append(
                "Need to have either spoke.parent == hub or hub == spoke.")
            raise P4Error(gm)

[docs]    def topologyDistance(self, tree2, metric='sd', resetSplitKeySet=False):
        """Compares the topology of self with tree2.

        The complete list of metrics is given in
        var.topologyDistanceMetrics

        For most metrics using this method, taxNames needs to be set,
        to the same in the two trees.  If the taxa differ, this method
        simply returns -1

        The 'metric' can be one of 'sd' (symmetric difference), 'wrf'
        (weighted Robinson-Foulds), 'bld' (Felsenstein's branch-
        length distance), or 'diffs'.  The unweighted Robinson-Foulds
        metric would be the same as the symmetric difference.

        There is also an experimental scqdist, but that needs the
        scqdist.so module, in the QDist directory.

        See Felsenstein 2004 Inferring Phylogenies, Pg 529.

        The default metric is the very simple 'sd', symmetric
        difference.  Using this metric, if the 2 trees share the same
        set of splits, they are deemed to be the same topology; branch
        lengths are not compared.  This method returns the number of
        splits that are in self that are not in tree2 plus the number
        of splits that are in tree2 that are not in self.  So it would
        return 0 for trees that are the same.

        The 'wrf' and 'bld' metrics take branch lengths into account.
        Bifurcating roots complicate things, so they are not allowed
        for weighted distance calculations.

        In the unweighted case (ie metric='sd'), whether the trees
        compared have bifurcating roots or not is ignored.  So the
        trees (A,B,(C,D)) and ((A,B),(C,D)) will be deemed to have the
        same topology, since they have the same splits.

        The measurement 'diffs', which returns a tuple of 2 numbers --
        both are set differences.  The first is the number of splits
        in self that are not in tree2, and the second is the number of
        splits in tree2 that are not in self.  (Consider it as the the
        symmetric difference split into its 2 parts.)

        If you calculate a distance and then make a topology change, a
        subsequent sd topologyDistance calculation will be wrong, as
        it uses previous splits.  So then you need to
        'resetSplitKeySet'.

        The 'scqdist' metric calculates quartet distances.  The code
        was written by Anders Kabell Kristensen for his Masters degree
        at Aarhus University, 2010.
        http://www.cs.au.dk/~dalko/thesis/ It has two versions -- a
        pure Python version (that needs scipy) that I do not include
        here, and a fast C++ version, that I wrapped in python.  Its
        speedy -- the 'sc' in 'scqdist' is for 'sub-cubic', ie better
        than O(n^3).

        I have also incorporated the tqDist v1.0.2 code, from 2014,
        also for quartet distance calculations, from Christian N.  S.
        Pedersen and his group at BiRC in Aarhus.  See
        https://users-cs.au.dk/cstorm/software/tqdist/ 2014.
        It is available here via the 'tqdist' metric.

        """

        gm = ['Tree.topologyDistance()']

        if metric not in var.topologyDistanceMetrics:
            gm.append("Got a request for unknown metric '%s'" % metric)
            gm.append("The 'metric' arg should be one of %s" %
                      var.topologyDistanceMetrics)
            raise P4Error(gm)
        if metric == 'scqdist':  # no need for taxNames
            try:
                import p4.scqdist as scqdist
            except ImportError:
                gm.append("Could not find the 'scqdist' module needed for this metric.")
                gm.append("See the instructions for making it in the p4 source, in the Qdist directory.")
                raise P4Error(gm)
            tsSelf = self.writeNewick(toString=True)
            tsTree2 = tree2.writeNewick(toString=True)
            return scqdist.qdist(tsSelf, tsTree2)

        elif metric == 'tqdist':  # no need for taxNames
            try:
                import p4.pytqdist as pytqdist
            except ImportError:
                gm.append("Could not find the 'tqdist' module needed for this metric.")
                gm.append("See the instructions for making it in the p4 source, in the tqDist directory.")
                raise P4Error(gm)
            tsSelf = self.writeNewick(toString=True, spaceAfterComma=False)
            tsTree2 = tree2.writeNewick(toString=True, spaceAfterComma=False)
            return pytqdist.qdistFromStrings(tsSelf, tsTree2)

        if not self.taxNames or not tree2.taxNames:
            gm.append("This method requires taxNames to be set.")
            raise P4Error(gm)
        if self.taxNames != tree2.taxNames:
            gm.append("The taxNames are different for the two trees.")
            gm.append("Self:  %s" % self.taxNames)
            gm.append("tree2: %s" % tree2.taxNames)
            raise P4Error(gm)
        if (self.root.getNChildren() == 2 or tree2.root.getNChildren() == 2) and (metric in ['wrf', 'bld']):
            gm.append('One of the input trees has a bifurcating root.')
            gm.append(
                'Weighted tree distance calculations do not work on bi-rooted trees.')
            raise P4Error(gm)

        # I might be doing a lot of these, and I don't want to make
        # splitKeys and make the splitKeys set over and over again.
        # So store it as a Tree.attribute.

        if resetSplitKeySet:
            if hasattr(self, 'splitKeySet'):
                del(self.splitKeySet)
            if hasattr(tree2, 'splitKeySet'):
                del(tree2.splitKeySet)

        if not hasattr(self, 'splitKeySet'):
            self.makeSplitKeys()
            self.splitKeySet = set(
                [n.br.splitKey for n in self.iterNodesNoRoot()])

        if not hasattr(tree2, 'splitKeySet'):
            tree2.makeSplitKeys()
            tree2.splitKeySet = set(
                [n.br.splitKey for n in tree2.iterNodesNoRoot()])

        if metric == 'sd':
            # Symmetric difference.  The symmetric_difference method
            # returns all elements that are in exactly one of the sets.
            theSD = len(self.splitKeySet.symmetric_difference(tree2.splitKeySet))
            return theSD

        # The difference method returns the difference of two sets as
        # a new Set.  I.e. all elements that are in self and not in
        # the other.
        selfHasButTree2DoesNot = self.splitKeySet.difference(tree2.splitKeySet)
        tree2HasButSelfDoesNot = tree2.splitKeySet.difference(self.splitKeySet)

        if metric == 'diffs':
            return len(selfHasButTree2DoesNot), len(tree2HasButSelfDoesNot)

        if metric in ['wrf', 'bld']:
            self.splitKeyHash = {}
            for n in self.iterNodesNoRoot():
                self.splitKeyHash[n.br.splitKey] = n
            tree2.splitKeyHash = {}
            for n in tree2.iterNodesNoRoot():
                tree2.splitKeyHash[n.br.splitKey] = n

        if metric == 'wrf':
            theSum = 0.0
            for k in self.splitKeySet.intersection(tree2.splitKeySet):
                # print '%s - %s' % (self.splitKeyHash[k].br.len,
                # tree2.splitKeyHash[k].br.len)
                theSum += abs(self.splitKeyHash[k].br.len -
                              tree2.splitKeyHash[k].br.len)
            for k in selfHasButTree2DoesNot:
                # print 'x %s' % self.splitKeyHash[k].br.len
                theSum += self.splitKeyHash[k].br.len
            for k in tree2HasButSelfDoesNot:
                # print 'y %s' % tree2.splitKeyHash[k].br.len
                theSum += tree2.splitKeyHash[k].br.len
            return theSum
        elif metric == 'bld':
            theSum = 0.0
            for k in self.splitKeySet.intersection(tree2.splitKeySet):
                theDiff = self.splitKeyHash[
                    k].br.len - tree2.splitKeyHash[k].br.len
                theSum += theDiff * theDiff
            for k in selfHasButTree2DoesNot:
                theSum += self.splitKeyHash[k].br.len * \
                    self.splitKeyHash[k].br.len
            for k in tree2HasButSelfDoesNot:
                theSum += tree2.splitKeyHash[k].br.len * \
                    tree2.splitKeyHash[k].br.len
            # print 'branch score =', theSum
            return math.sqrt(theSum)

    ##################################################

[docs]    def tv(self):
        """Tree Viewer. Show the tree in a gui window.

        Needs Tkinter.

        If you have nexus taxsets defined, you can show them.
        """
        from p4.btv import TV
        #import os
        #os.environ['PYTHONINSPECT'] = '1'
        TV(self)

[docs]    def btv(self):
        """Big Tree Viewer. Show the tree in a gui window.

        This is for looking at big trees.  The viewer has 2 panels --
        one for an overall view of the whole tree, and one for a
        zoomed view, controlled by a selection rectangle on the whole
        tree view.

        Needs Tkinter.

        If you have nexus taxsets defined, you can show them.
        """
        from p4.btv import BTV
        #import os
        #os.environ['PYTHONINSPECT'] = '1'
        BTV(self)

[docs]    def tvTopologyCompare(self, treeB):
        """Graphically show topology differences.

        The taxNames need to be set, and need to be the same for both
        trees.

        (If the red lines don't show up right away, try adjusting the
        size of the windows slightly.)

        """

        sd = self.topologyDistance(treeB)
        if sd == 0:
            print("The trees are the same. No tv.")
            return
        # for sk in self.splitKeyHash.keys():
        #    if sk not in treeB.splitKeyHash:
        #        print "self has sk

        self.splitKeyHash = {}
        for n in self.iterInternalsNoRoot():
            self.splitKeyHash[n.br.splitKey] = n
        treeB.splitKeyHash = {}
        for n in treeB.iterNodesNoRoot():
            treeB.splitKeyHash[n.br.splitKey] = n
        assert self.splitKeySet
        assert treeB.splitKeySet
        selfHasButTreeBDoesnt = self.splitKeySet.difference(treeB.splitKeySet)
        treeBHasButSelfDoesnt = treeB.splitKeySet.difference(self.splitKeySet)

        from p4.btv import TV
        #import os
        #os.environ['PYTHONINSPECT'] = '1'
        TV(self, title='TV self')
        TV(treeB, title='TV treeB')
        for spl in selfHasButTreeBDoesnt:
            n = self.splitKeyHash[spl]
            n.br.color = 'red'
        for spl in treeBHasButSelfDoesnt:
            n = treeB.splitKeyHash[spl]
            n.br.color = 'orange'

    ##################################################

[docs]    def drawTopologyCompare(self, treeB, showNodeNums=False):
        """Graphically show topology differences between two trees.

        The two trees (self and treeB) are drawn as text, with differences
        highlighted with a thick branch.

        The taxNames need to be set, and need to be the same for both
        trees.

        """

        sd = self.topologyDistance(treeB)
        if sd == 0:
            print("The trees are the same. ")
            return

        self.splitKeyHash = {}
        for n in self.iterInternalsNoRoot():
            self.splitKeyHash[n.br.splitKey] = n
        treeB.splitKeyHash = {}
        for n in treeB.iterNodesNoRoot():
            treeB.splitKeyHash[n.br.splitKey] = n
        assert self.splitKeySet
        assert treeB.splitKeySet
        selfHasButTreeBDoesnt = self.splitKeySet.difference(treeB.splitKeySet)
        treeBHasButSelfDoesnt = treeB.splitKeySet.difference(self.splitKeySet)

        for spl in selfHasButTreeBDoesnt:
            n = self.splitKeyHash[spl]
            n.br.textDrawSymbol = '='
        for spl in treeBHasButSelfDoesnt:
            n = treeB.splitKeyHash[spl]
            n.br.textDrawSymbol = '='

        self.draw(showNodeNums=showNodeNums)
        treeB.draw(showNodeNums=showNodeNums)


[docs]    def getNodeOnReferenceTreeCorrespondingToSelfRoot(self, refTree, verbose=True):
        """Find, on a ref tree, a node corresponding to where the self root is.

        This works for both bifurcating and non-bifurcating roots (of
        self).

        The refTree should not be bi-rooted.

        To facilitate doing a lot of (self) trees, a counter in the
        corresponding node in the refTree is incremented.  The refTree
        is rooted on the first taxon.  For bi-rooted trees, the
        node.br.biRootCount is incremented, and for non-bi-rooted
        trees the node.rootCount is incremented.  The root counts
        should therefore be immune to reRoot()'ing.

        """

        gm = ["Tree.getNodeOnReferenceTreeCorrespondingToSelfRoot()"]
        assert self is not refTree
        assert self.taxNames
        assert refTree.taxNames
        assert self.taxNames == refTree.taxNames

        isBiRoot = False
        # sr = self.root; srnChildren = nChildren of self.root
        srnChildren = self.root.getNChildren()
        if srnChildren == 1:
            gm.append("Self root has only one child; does not work")
            raise P4Error(gm)
        elif srnChildren == 2:
            isBiRoot = True
            for n in refTree.iterNodesNoRoot():
                if not hasattr(n.br, "biRootCount"):
                    n.br.biRootCount = 0
        else:
            for n in refTree.iterNodes():
                if not hasattr(n, "rootCount"):
                    n.rootCount = 0

        #self.draw()
        refTree.reRoot(self.taxNames[0])
        #refTree.draw()

        self.makeSplitKeys()  # default makeNodeForSplitKeyDict=True
        refTree.makeSplitKeys()

        for ch in self.root.iterChildren():
            if ch.br.rawSplitKey & 1:
                break
        # ch clade has the first taxon, ch.parent is self.root
        #print("self root child %i clade has the first taxon" % ch.nodeNum)
        refNode = refTree.nodeForSplitKeyDict.get(ch.br.splitKey)
        if refNode:
            if isBiRoot:
                refNode.br.biRootCount += 1
                if verbose:
                    #print("When the refTree is rooted on the first taxon,")
                    #print("the bi-root is on the branch on refTree node %i" % refNode.nodeNum)
                    print("Incrementing refTree node %i br.biRootCount by 1" % refNode.nodeNum)

            else:
                refNode.rootCount += 1
                if verbose:
                    #print("When the refTree is rooted on the first taxon,")
                    #print("the root is at refTree node %i" % refNode.nodeNum)
                    print("Incrementing refTree node %i rootCount by 1" % refNode.nodeNum)

            return refNode
        else:
            if verbose:
                print("There is no node in the refTree corresponding to the self.root")
            return None
                
                

[docs]    def readPhyloXmlFile(self, fName, verbose=False):
        """Start with an empty Tree, read in a phyloxml file"""

        assert not self.nodes, "The tree should be empty, with no nodes"
        import xml.etree.ElementTree as ET
        xAll = ET.parse(fName)
        for el in xAll.iter():
            if '}' in el.tag:
                el.tag = el.tag.split('}', 1)[1]  # strip all namespaces

        xroot = xAll.getroot()
        xphylogeny = xroot.find('phylogeny')
        assert xphylogeny
        rootclade = xphylogeny.find('clade')
        assert rootclade

        def recurseClade(theClade):
            global currentNode
            for el in theClade:
                #print(el.tag)
                if el.tag == 'clade':
                    n = Node()
                    n.nodeNum = len(self.nodes)
                    el.attrib['p4Node'] = n
                    n.parent = theClade.attrib['p4Node']
                    if n.parent:
                        if n.parent.leftChild == None:
                            n.parent.leftChild = n
                        else:
                            rmCh = n.parent.rightmostChild()
                            assert rmCh, "Can't get rightmostChild"
                            rmCh.sibling = n

                    self.nodes.append(n)
                    currentNode = n
                    recurseClade(el)
                elif el.tag == "branch_length":
                    currentNode.br.len = float(el.text)
                elif el.tag == 'name':
                    #print(el.text)
                    currentNode.name = el.text
                    if verbose:
                        print("Got node name", el.text)
                elif el.tag == 'width':
                    currentNode.br.width = float(el.text)
                    if verbose:
                        print("Got node width, as node.br.width", el.text)
                elif el.tag == 'color':
                    thiscol = {}
                    for el2 in el:
                        thiscol[el2.tag] = int(el2.text)
                    cols = ['red', 'green', 'blue']
                    for col in cols:
                        assert thiscol.get(col)
                    myHexString = ""
                    for col in cols:
                        bit = thiscol.get(col)
                        hx = hex(bit)[2:].upper()
                        myHexString += hx
                    #print(myHexString)
                    currentNode.br.hexColor = myHexString
                    if verbose:
                        print("Got node color (as attribute node.br.hexColor)", currentNode.br.hexColor)
                else:
                    print("Fixme. Unhandled/unused element with tag %s" % el.tag)


        n = Node()
        n.nodeNum = 0
        n.br = None
        self.root = n
        self.nodes.append(n)

        rootclade.attrib['p4Node'] = n

        recurseClade(rootclade)

        for n in self.nodes:
            if not n.leftChild:
                n.isLeaf = 1
        self.setPreAndPostOrder()

[docs]    def attachRooter(self):
        rNode = self.nodes[-1]
        assert rNode.name == 'tempRooter'
        assert rNode.nodeNum == var.NO_ORDER
        rtMostCh = self.root.rightmostChild()
        rtMostCh.sibling = rNode
        rNode.sibling = None
        rNode.parent = self.root
        rNode.nodeNum = len(self.nodes) - 1
        if self.taxNames:
            # taxnames can be shared among trees, so check whether it has already been added
            if rNode.name not in self.taxNames:
                self.taxNames.append(rNode.name)
        self.preAndPostOrderAreValid = False
        self.setPreAndPostOrder()
        #if self._nTax:
        #    self._nTax += 1
        return rNode


[docs]    def detachRooter(self):
        rNode = self.nodes[-1]
        assert rNode.parent == self.root
        safety = 0
        while self.root.leftChild.sibling.sibling != rNode:
            self.rotateAround(self.root)
            safety += 1
            if safety >= 5:
                self.draw()
                raise P4Error("Tree.detachRooter() Too many rotations.")

        assert self.root.leftChild.sibling.sibling == rNode
        self.root.leftChild.sibling.sibling = None
        rNode.parent = None
        rNode.nodeNum = var.NO_ORDER
        self.preAndPostOrderAreValid = False
        self.setPreAndPostOrder()
        if self.taxNames:
            # taxnames can be shared among trees, so check whether it is in
            if rNode.name == self.taxNames[-1]:
                self.taxNames.pop()
        #if self._nTax:
        #    self._nTax -= 1

[docs]    def isBiRoot(self):
        """Answers whether self has a bifurcating root"""

        rootNChildren = self.root.getNChildren()
        if rootNChildren == 2:
            return True
        return False

[docs]    def isTriRoot(self):
        """Answers whether self has a trifurcating root"""

        rootNChildren = self.root.getNChildren()
        if rootNChildren == 3:
            return True
        return False
p4

Navigation

Related Topics

Source code for p4.tree