Source code for p4.trees

import sys
import os
import math
import p4.func
from p4.var import var
from p4.tree import Tree
from p4.p4exceptions import P4Error


[docs]class Trees(object):

    """A bunch of trees, all with the same taxNames.

    This class would be good for doing things that you might want to
    do with a bunch of trees rather than just a single tree.  For
    example, you might want to write out the trees to a nexus file but
    use a translation to keep the size of the file down -- this class
    knows how to do that.

    You can start this up in two ways::

        tt = Trees()           # All the trees in var.trees
        tt = Trees(myTreeList) # All the trees in myTreeList

    This class insists on knowing the taxNames.  If the list of
    taxNames is provided when you instantiate a Trees object, then it
    uses that list.  (A good place to get such a list might be an
    alignment object, as myAlignment.taxNames).

      tt = Trees(taxNames=myTaxNamesList)

    Otherwise it looks for a taxNames attribute in the first tree.
    """

    def __init__(self, trees=None, taxNames=[]):
        gm = ['Trees()']
        if trees == None:
            if not var.trees:
                gm.append(
                    "Arg trees is not given or is empty, and var.trees is empty.  No trees.")
                raise P4Error(gm)
            else:
                self.trees = var.trees
        else:
            if not isinstance(trees, list):
                gm.append("If provided, the trees arg should be a list.")
                raise P4Error(gm)
            if not len(trees):
                gm.append("The list of trees appears to be empty.")
                raise P4Error(gm)
            if not isinstance(trees[0], Tree):
                gm.append(
                    "The first item in the input list is not a Tree object.")
                raise P4Error(gm)
            self.trees = trees
        if taxNames:
            if isinstance(taxNames, list) and len(taxNames) and isinstance(taxNames[0], str):
                pass
            else:
                gm.append(
                    "If provided, arg taxNames should be a list of (at least one) string(s).")
        else:
            if trees:
                if trees[0].taxNames:
                    taxNames = trees[0].taxNames
            else:
                if var.trees[0].taxNames:
                    taxNames = var.trees[0].taxNames
        if not taxNames:
            gm.append("I can't find a taxNames list in the input trees. ")
            gm.append(
                "In this case you need to feed this a taxNames list when you instantiate.")
            gm.append(
                "Often you can get a good taxNames from yourAlignment.taxNames.")
            gm.append(
                "(The order often matters, and generally should be same for all your analyses.)")
            raise P4Error(gm)
        self.taxNames = None
        self.setTaxNames(taxNames)
        self.data = None
        self.writeBranchLengths = True

    @property
    def nTax(self):
        """(property) nTax"""
        return len(self.taxNames)

[docs]    def setTaxNames(self, theTaxNames=None):
        """Sets and checks taxNames for self and all trees in self.

        You can provide the taxNames as the arg theTaxNames, or if you
        do not then it looks for a taxNames list in the first tree.
        If it does not find one there, it makes a list from the first
        tree, and sorts it.

        This propagates self.taxNames to all trees in self.trees, and
        then calls self.checkTaxNames()."""

        gm = ['Trees.setTaxNames()']
        if 0:
            print(gm[0])
            print('    theTaxNames=%s' % theTaxNames)

        if theTaxNames:
            self.taxNames = theTaxNames
        else:
            # Get a list from the first tree.
            t = None
            if len(self.trees):
                t = self.trees[0]
            if not t:
                print(gm[0])
                print("    No trees?")
            if t.taxNames:
                self.taxNames = t.taxNames
            else:
                # Make up a list from node names.
                self.taxNames = []
                for n in t.nodes:
                    if n.isLeaf and n.name:
                        self.taxNames.append(n.name)
                self.taxNames.sort()

        # propagate and check
        for t in self.trees:
            t.taxNames = self.taxNames
        self.checkTaxNames()

[docs]    def checkTaxNames(self):
        """Check that all trees have all taxNames."""

        gm = ['Trees.checkTaxNames()']
        if not self.taxNames:
            gm.append("No taxNames.")
            raise P4Error(gm)
        for t in self.trees:
            if t.taxNames != self.taxNames:
                if t.name:
                    gm.append(
                        "Tree %s taxNames is not the same object as self.taxNames." % t.name)
                else:
                    gm.append(
                        "Tree taxNames is not the same object as self.taxNames.")
                raise P4Error(gm)
        for t in self.trees:
            t.checkTaxNames()

[docs]    def dump(self):
        """Print summary info about self."""

        print("Trees dump.")
        print("  There are %i trees." % len(self.trees))
        print("  nTax is %i" % self.nTax)
        print("  taxNames")
        if self.nTax < 11:
            for i in range(self.nTax):
                print("    %s" % self.taxNames[i])
        else:
            for i in range(10):
                print("    %s" % self.taxNames[i])
            print("    ... (and more)")
        if self.data:
            print("  There is a data object attached.")
        else:
            print("  There is no data object attached.")

[docs]    def writeNexus(self, fName=None, append=0, withTranslation=0, writeTaxaBlock=1, likeMcmc=0):
        """Write the trees out in NEXUS format, in a trees block.

        If *fName* is None, the default, it is written to `sys.stdout`.

        #NEXUS is written unless we are *append*-ing.
        """

        gm = ['Trees.writeNexus()']

        if withTranslation:
            if not self.taxNames:
                gm.append(
                    "No taxNames.  Set taxNames if you want withTranslation.")
                raise P4Error(gm)

            translationHash = {}
            i = 1
            for tName in self.taxNames:
                translationHash[tName] = i
                i += 1
        else:
            translationHash = None

        if fName == None or fName == sys.stdout:
            f = sys.stdout
            if not append:
                f.write('#NEXUS\n\n')
        else:
            if append:
                import os
                if os.path.isfile(fName):
                    try:
                        f = open(fName, 'a')
                    except IOError:
                        gm.append("Can't open %s for appending." % fName)
                        raise P4Error(gm)
                else:
                    if 0:
                        print(gm[0])
                        print("    'append' is requested,")
                        print("    but '%s' is not a regular file (doesn't exist?)." \
                              % fName)
                        print("    Writing to a new file instead.")
                    try:
                        f = open(fName, 'w')
                        f.write('#NEXUS\n\n')
                    except IOError:
                        gm.append("Can't open %s for writing." % fName)
                        raise P4Error(gm)

            else:
                try:
                    f = open(fName, 'w')
                    f.write('#NEXUS\n\n')
                except IOError:
                    gm.append("Can't open %s for writing." % fName)
                    raise P4Error(gm)

        if writeTaxaBlock:
            if self.taxNames:
                f.write('begin taxa;\n')
                f.write('  dimensions ntax=%s;\n' % self.nTax)
                f.write('  taxlabels')
                for tN in self.taxNames:
                    f.write(' %s' % p4.func.nexusFixNameIfQuotesAreNeeded(tN))
                f.write(';\nend;\n\n')
            else:
                gm.append(
                    "writeTaxaBlock is set, but there is no taxNames.  How did *that* happen?!?")
                raise P4Error(gm)

        f.write('begin trees;\n')

        # write the "translate" command
        if withTranslation:
            f.write('    translate\n')
            for i in range(self.nTax - 1):
                f.write('        %3i %s,\n' % (
                    i + 1, p4.func.nexusFixNameIfQuotesAreNeeded(self.taxNames[i])))
            f.write('        %3i %s\n' % (
                self.nTax, p4.func.nexusFixNameIfQuotesAreNeeded(self.taxNames[-1])))
            f.write('    ;\n')

        # write the models comment
        if self.trees:
            first = self.trees[0]
            if likeMcmc:
                if not withTranslation:
                    gm.append(
                        "withTranslation is turned off, but likeMcmc is turned on.")
                    gm.append(
                        "This will cause grief in TreePartitions, so is prohibited.")
                    gm.append("Both on or both off, please.")
                    raise P4Error(gm)
                f.write('    [&&p4 models p%i' % first.model.nParts)
                for pNum in range(first.model.nParts):
                    f.write(' c%i.%i' % (pNum, first.model.parts[pNum].nComps))
                    f.write(' r%i.%i' %
                            (pNum, first.model.parts[pNum].nRMatrices))
                    f.write(' g%i.%i' %
                            (pNum, first.model.parts[pNum].nGdasrvs))
                f.write(']\n')

        # write each tree
        for t in self.trees:
            if not self.writeBranchLengths:
                t.stripBrLens()

            if t.logLike:
                f.write('    [logLike for tree %s is %f]\n' %
                        (t.name, t.logLike))

            f.write('    tree %s = [&U] ' %
                    p4.func.nexusFixNameIfQuotesAreNeeded(t.name))
            if t.recipWeight:
                # if t.recipWeight == 1:
                #    f.write('[&W 1] ')
                # else:
                f.write('[&W 1/%i] ' % t.recipWeight)
            if hasattr(t, 'weight'):
                f.write('[&W %f] ' % t.weight)

            t.writeNewick(f, withTranslation=withTranslation,
                          translationHash=translationHash, doMcmcCommandComments=likeMcmc)

        f.write('end;\n\n')
        if f != sys.stdout:
            f.close()

[docs]    def writeNewick(self, fName='intree'):
        """Write out the list of trees in Newick format.

        Good for phylip and puzzle.  Long names are not modified,
        which might get you into trouble.
        """

        gm = ['Trees.writeNewick()']
        if fName == None or fName == sys.stdout:
            flob = sys.stdout
        else:
            try:
                flob = open(fName, 'w')
            except IOError:
                gm.append("Can't open %s for writing." % fName)
                raise P4Error(gm)

        #flob.write(' %i\n' % len(self.trees))
        for t in self.trees:
            t.writeNewick(flob)
        # flob.write('\n')
        if fName != sys.stdout:
            flob.close()

[docs]    def getTreesWithSplit(self, someTaxa):
        """Return a list of trees that have a split.

        The arg ``someTaxa`` is a list of taxNames that define the split
        that you are looking for.  It returns a list, not a Trees
        object.  (If no trees are found, it returns an empty list.)
        """

        sk = p4.func.getSplitKeyFromTaxNames(self.taxNames, someTaxa)
        foundTrees = []
        for t in self.trees:
            t.makeSplitKeys()
            for n in t.nodes:
                if n.br:
                    if n.br.splitKey:
                        if n.br.splitKey == sk:
                            foundTrees.append(t)
                            break
                    else:
                        gm = ['Trees.getTreesWithSplit()']
                        gm.append("No node.br.splitKey?")
                        raise P4Error(gm)
        return foundTrees

[docs]    def topologyDistanceMatrix(self, metric='sd', resetSplitKeySet=False):
        """Returns a DistanceMatrix object showing topology distances.

        Uses the :meth:`p4.tree.Tree.topologyDistance` method to compare
        trees.  That method returns distances between pairs of trees,
        and this method simply collates those distances into a
        :class:`p4.distancematrix.DistanceMatrix` object, which is
        returned.

        See :meth:`p4.tree.Tree.topologyDistance` for an explanation of the
        different metrics.  
        """

        from p4.distancematrix import DistanceMatrix
        d = DistanceMatrix()
        d.names = []
        for t in self.trees:
            d.names.append(t.name)
        d.dim = len(d.names)
        # print d.names
        # sys.exit()
        d.matrix = []
        for i in range(d.dim):
            d.matrix.append([0.0] * d.dim)

        for i in range(d.dim):
            t1 = self.trees[i]
            for j in range(i + 1, d.dim):
                t2 = self.trees[j]
                theDist = t1.topologyDistance(t2, metric=metric, resetSplitKeySet=resetSplitKeySet)
                d.matrix[i][j] = theDist
                d.matrix[j][i] = theDist
        return d

[docs]    def inputTreesToSuperTreeDistances(self, inputTrees, latex=False, verbose=True):
        """Make a table of supertree distances.

        See the method
        :meth:`Tree.Tree.inputTreesToSuperTreeDistances`, which gives
        supertree distances from a set of input trees to a single
        Tree.  This Trees method does the same for a bunch of trees,
        outputting the results in a table.

        And it returns the results as a list of lists as well.
        """
        nn = []
        sdd = []
        qdd = []
        for t in self.trees:
            sd, qd = t.inputTreesToSuperTreeDistances(inputTrees)
            nn.append(t.name)
            sdd.append(sd)
            qdd.append(qd)

        if verbose:
            longest = 0
            for n in nn:
                if len(n) > longest:
                    longest = len(n)
            name_sig = " %%-%is" % longest

            longest = 0
            for n in sdd:
                s = "%s" % n
                if len(s) > longest:
                    longest = len(s)
            sd_sig = " %%-%is" % longest

            longest = 0
            for n in qdd:
                s = "%s" % n
                if len(s) > longest:
                    longest = len(s)
            qd_sig = " %%-%is" % longest

            if latex:
                print("\\begin{center}")
                print("\\begin{tabular}{lrr} \\toprule")
                sig = "%s & %s & %s \\\\" % (name_sig, sd_sig, qd_sig)
            else:
                sig = "%s %s  %s" % (name_sig, sd_sig, qd_sig)

        results = []
        for tNum in range(len(self.trees)):
            nm = nn[tNum]
            sd = sdd[tNum]
            qd = qdd[tNum]
            if verbose:
                print(sig % (nm, sd, qd))
            results.append([nm, sd, qd])
        
        if verbose and latex:
            print("\\bottomrule")
            print("\\end{tabular}")
            print("\\end{center}")
        return results

[docs]    def treeProbabilities(self, writeNexus=False):
        """Order the trees in self by frequency of occurrence.

        This provides a posterior probability for the tree topology if
        trees are sampled from an MCMC. The tree with the highest number
        of occurrences is the MAP tree.

        If *writeNexus* is True a Nexus tree file called
        ``treeProbabilities.nex`` is written with tree posterior
        probabilities as weights.  If that file exists, it will be
        silently over-written!

        The tree probs list of lists is returned.

        (Thanks to Cymon Cox for this method.)

        """

        gm = ["Trees.treeProbabilities()"]

        # Make the splitKey dictionary
        skd = {}
        for t in self.trees:
            t.makeSplitKeys()
            skk = [n.br.splitKey for n in t.iterNodesNoRoot()]
            skk.sort()
            skk = tuple(skk)
            it = skd.get(skk)
            if it:
                it[0] += 1
            else:
                dTree = t.dupe()     # Don't mess with the trees in self
                dTree.stripBrLens()
                skd[skk] = [1, dTree]

        # for v in skd.values():
        #    print v

        # Flatten to a list, order by counts
        skl = skd.values()
        skl = p4.func.sortListOfListsOnListElementNumber(skl, 0)
        skl.reverse()

        # Check that we have all the trees
        nTrees = sum([i[0] for i in skl])
        if nTrees != len(self.trees):
            gm.append("Programming error: all trees are not accounted for...")
            raise P4Error(gm)

        # Calculate frequency from counts
        trprobs = [(float(count) / float(nTrees), tree) for count, tree in skl]
        if writeNexus:
            count = 1
            for prob, tree in trprobs:
                tree.weight = prob
                tree.name = "t_%i" % count
                count += 1
            tt = Trees([i[1] for i in trprobs])
            tt.writeNexus(fName="treeProbabilities.nex", withTranslation=True)

        return trprobs

[docs]    def consel(self, rankByInputOrder=False, clobber=False, quiet=1, tidy=1, returnResults=False, seed=0):
        """Use Shimo's consel programs to compare trees.

        The trees in self should all be optimized, and all should have
        models attached.

        Self, the Trees object, should have a Data object attached, as
        self.data.

        The default is to rank the output by support.  You might
        rather have the output be in the same order as the input.
        Which output format is determined by the 'rankByInputOrder'
        arg.

        The analysis produces various files.  Whether they are
        overwritten if they already exist is controlled by the arg
        'clobber'.  The default is to refuse to overwrite.

        The consel programs produce lots of informative rubbish on the
        screen.  Setting 'quiet=1' or 'quiet=True', (the default)
        sends that output to the bit bucket.  The output of consel is
        a table, and that table is output to the screen, unless you
        set 'quiet=2', in which case it is not output to the screen.

        The 'tidy' arg says whether to delete intermediate files,
        including consel_out.ci, consel_out.pv, consel_out.rmt,
        consel_out.vt, and the site likelihoods 'sitelh' file.  By
        default it is 'tidy=1', (equivalent to 'tidy=True'), meaning
        delete them.  If you set 'tidy=2' then even the ouput file
        'conselOut' is deleted.

        Usually the results table will be printed to the screen
        (unless quiet=2), and also put in a file 'conselOut' (unless
        tidy=2), and nothing is returned.  If you want the numbers,
        set returnResults=True, and then this method will return a
        list of tuples of strings of the results table contents.

        Setting arg seed to a number sets the random number generator
        for makermt.  The default is zero, which then takes the seed
        from the system clock.
        """

        gm = ['Trees.consel()']

        if not self.trees or len(self.trees) == 0:
            gm.append("No trees?")
            raise P4Error(gm)

        # Check if consel is installed
        progs = ['makermt', 'consel', 'catpv']
        for progName in progs:
            if p4.func.which2(progName):
                pass
            else:
                gm.append("The programs")
                for p in progs:
                    gm.append("        %s" % p)
                gm.append("need to be in your path.")
                gm.append("Can't find %s" % progName)
                raise P4Error(gm)

        # Check for bad arg vals
        # False equates to zero, and True equates to 1.  True does not equate
        # to 2
        if quiet not in [0, 1, 2]:
            gm.append(
                "arg 'quiet' should be set to one of 0 (or False), 1 (or True), or 2.  Got '%s'" % quiet)
            raise P4Error(gm)
        if tidy not in [0, 1, 2]:
            gm.append(
                "arg 'tidy' should be set to one of 0 (or False), 1 (or True), or 2.  Got '%s'" % tidy)
            raise P4Error(gm)

        # Can we make siteLikes?
        allTreesHaveData = True
        for t in self.trees:
            if not t.data:
                allTreesHaveData = False
                break
        if not allTreesHaveData:
            if not self.data:
                gm.append("You need to 'myTreesObject.data = myDataObject'")
                raise P4Error(gm)
        for t in self.trees:
            if not t.model:
                gm.append("Tree %s has no model." % t.name)
                raise P4Error(gm)

        # Attach self.data to the trees, if needed
        if not allTreesHaveData:
            for t in self.trees:
                if not t.data:
                    t.data = self.data

        # Are we about to clobber?
        # for fName in ['siteLikes.mt', 'consel_out.ci', 'consel_out.pv',
        # 'consel_out.rmt', 'consel_out.vt']:
        for fName in ['siteLikes.sitelh', 'consel_out.ci', 'consel_out.pv', 'consel_out.rmt', 'consel_out.vt']:
            if os.path.exists(fName):
                if clobber:
                    os.remove(fName)
                else:
                    gm.append("Refusing to overwrite file %s" % fName)
                    raise P4Error(gm)

        # print "Calculating siteLikes ..."
        for t in self.trees:
            t.getSiteLikes()
            # Be memory efficient, but there is still a lot of inefficient
            # re-malloc'ing.
            t.deleteCStuff()
        nSiteLikes = len(self.trees[0].siteLikes)

        # Write the site likes to a file in xx.sitelh, ie puzzle format
        # print "Writing siteLikes ..."
        f = open('siteLikes.sitelh', 'w')
        f.write('%i %i\n' % (len(self.trees), nSiteLikes))
        for i in range(len(self.trees)):
            f.write('t%i\t' % i)
            t = self.trees[i]
            f.write('%12.8f' % math.log(t.siteLikes[0]))
            for j in range(1, nSiteLikes):
                f.write(' %12.8f' % math.log(t.siteLikes[j]))
            f.write('\n')
        f.close()

        assert seed >= 0

        # print "Invoking makermt, consel, and catpv ..."
        if quiet:
            os.system(f'makermt -s {seed} --puzzle siteLikes consel_out > /dev/null')
            os.system('consel consel_out > /dev/null')
        else:
            os.system(f'makermt -s {seed} --puzzle siteLikes consel_out')
            os.system('consel consel_out')
        if rankByInputOrder:
            if quiet in [0, 1]:
                os.system('catpv -s 1 consel_out |tee conselOut')
            else:  # quiet=2:
                os.system('catpv -s 1 consel_out > conselOut')
        else:
            if quiet in [0, 1]:
                os.system('catpv consel_out |tee conselOut')
            else:  # quiet=2
                os.system('catpv consel_out > conselOut')

        if returnResults:
            fh = open("conselOut", "r")
            lines = fh.readlines()
            # for l in lines:
            #    print "x ", l,
            fh.close()

            # The output from catpv has both leading and trailing blank lines.

            # The header differs depending on whether rankByInputOrder is
            # turned on -- if so then there is an extra header line, as
            # shown below ("# 0 1+ 2 9 ...") that is not there when
            # rankByInputOrder is not turned on.

            """

            # reading consel_out.pv
            #   0    1+     2      9     10  |     3      4      5      6      7      8 
            # rank item    obs     au     np |     bp     pp     kh     sh    wkh    wsh |
            #    1    1 -1594.0  1.000  1.000 |  1.000  1.000  1.000  1.000  1.000  1.000 |
            #    2    2 1594.0  2e-06  2e-06 |      0      0      0      0      0      0 |

            """
            for line in lines:
                if not line.startswith("#"):
                    lines.remove(line)
            linesToSkip = 2
            if rankByInputOrder:
                linesToSkip += 1
            results = []
            for line in lines[linesToSkip:]:
                a, b = line.split("|")[:2]
                results.append(tuple(a.split()[1:] + b.split()))

        if tidy:
            #os.system('rm consel_out.ci consel_out.pv consel_out.rmt consel_out.vt siteLikes.mt')
            os.system(
                'rm consel_out.ci consel_out.pv consel_out.rmt consel_out.vt siteLikes.sitelh')
            if tidy == 2:
                os.system('rm conselOut')
        if returnResults:
            return results

    # def rell(self, bootCount=10000, seedIsPid=1):
    #     """This compares several trees by the RELL method.

    #     The trees in self should all be optimized, and all should have
    #     models attached.

    #     Self, the Trees object, should have a Data object attached,
    #     as self.data.

    #     Calculations are done in C, so it is fast.  It uses the gsl random
    #     number generator.  Setting the *seedIsPid* (ie turning it on) is
    #     appropriate, and is the default.  """

    #     gm = ['Trees.rell()']

    #     if not self.trees or len(self.trees) == 0:
    #         gm.append("No trees?")
    #         raise P4Error(gm)

    #     # Can we make siteLikes?
    #     if not self.data:
    #         gm.append(
    #             "No data.  You need to 'myTreesObject.data = myDataObject'")
    #         raise P4Error(gm)
    #     for t in self.trees:
    #         if not t.model:
    #             gm.append("Tree %s has no model." % t.name)
    #             raise P4Error(gm)

    #     # Attach self.data to the trees, if needed
    #     for t in self.trees:
    #         if not t.data:
    #             t.data = self.data

    #     # print "Calculating siteLikes ..."
    #     for t in self.trees:
    #         t.getSiteLikes()
    #         # Be memory efficient, but there is still a lot of inefficient
    #         # re-malloc'ing.
    #         t.deleteCStuff()

    #     nTrees = len(self.trees)
    #     for t in self.trees:
    #         t.siteLikes = [math.log(sl) for sl in t.siteLikes]
    #     nChar = len(self.trees[0].siteLikes)

    #     theSeed = 0
    #     if seedIsPid:
    #         theSeed = os.getpid()

    #     # print "Setting C-memory..."
    #     rStuff = pf.setRellMemory(nTrees, nChar, theSeed)
    #     for i in range(nTrees):
    #         t = self.trees[i]
    #         for j in range(nChar):
    #             pf.pokeRellMemory(i, j, t.siteLikes[j], rStuff)

    #     # print "Doing bootstrap ..."
    #     winners = pf.rell(bootCount, rStuff)  # see data.c
    #     pf.freeRellMemory(rStuff)

    #     # print winners

    #     print("\nRELL bootstrap results")
    #     print("======================")
    #     for i in range(nTrees):
    #         t = self.trees[i]
    #         print("%3i   %20s  %1.3f" % (i, t.name, (float(winners[i]) / float(bootCount))))

[docs]    def trackSplitsFromTree(self, theTree, windowSize=200, stride=100, fName='trackSplitsOut.py'):
        """See how slits from theTree changes over the trees in self.

        This looks at how some splits change over the trees in self (self
        is a Trees object).  The splits that are tracked are the ones in
        the arg theTree.  If you only wanted to track one split, you could
        supply such a tree with only that one split.

        It uses a moving window, given by the windowSize arg.  The stride
        is the distance between centers of the windows.  So in the
        default, with windowSize=200 and stride=100, the first trees
        looked at will be from 0--199, and the second window will be from
        100 -- 299.  Note that because the stride is less than the
        windowSize, there are overlapping windows.

        The output is to a python file, which you can examine or plot later.
        """

        theTree.makeSplitKeys()

        # Decorate the internal nodes of theTree with splitKeys
        for n in theTree.iterInternalsNoRoot():
            if n.name:
                n.name += " (%s)" % n.br.splitKey
            else:
                n.name = "(%s)" % n.br.splitKey
        theTree.draw()
        print("The drawing above shows the splitKeys that are being tracked.")

        if fName:
            f = open(fName, 'w')
            textDrawList = theTree.textDrawList()
            for l in textDrawList:
                f.write("#  %s\n" % l)
            f.write(
                "#\n# The drawing above shows the splitKeys that are being tracked.\n")
            f.write("#\n# windowSize=%s, stride=%s.\n" % (windowSize, stride))

        # Remove the decoration from above.
        for n in theTree.iterInternalsNoRoot():
            splitKeyStringWithBlank = " (%s)" % n.br.splitKey
            if n.name.endswith(splitKeyStringWithBlank):
                n.name = n.name[: -(len(splitKeyStringWithBlank))]
            else:
                n.name = None
        # theTree.draw()

        # Determine whether we need to makeSplitKeys()
        t = self.trees[0]
        if hasattr(t, "splitKeys") and t.splitKeys:
            pass
        else:
            for t in self.trees:
                t.makeSplitKeys()
                t.splitKeys = [n.br.splitKey for n in t.iterNodesNoRoot()]
                # print '\nsplitKeys = %s' % t.splitKeys
        tracks = {}
        kk = []
        for n in theTree.iterInternalsNoRoot():
            theSplitKey = n.br.splitKey
            print("=" * 50)
            print("Looking at split %s" % theSplitKey)
            kk.append(theSplitKey)
            tracks[theSplitKey] = []
            startTNum = 0
            while len(self.trees) - startTNum >= windowSize:
                print('trees %4i to %4i: ' % (startTNum, (startTNum + windowSize) - 1), end=' ')
                theTrees = self.trees[startTNum:(startTNum + windowSize)]

                nTrees = len(theTrees)
                splitCount = 0
                for t in theTrees:
                    if theSplitKey in t.splitKeys:
                        splitCount += 1
                print(' nTrees=%3i, splitCount= %3i' % (nTrees, splitCount))
                tracks[theSplitKey].append(
                    [startTNum + (0.5 * windowSize), (float(splitCount) / nTrees)])
                startTNum += stride
        if fName:
            f.write("kk = %s\n" % kk)
            f.write("tracks = %s\n" % tracks)
            f.close()

    def _trackModelThingsFromTree(self, theTree, windowSize=200, stride=100, fName='trackModelThings.py'):

        complaintHead = '\nTrees.trackModelThingsFromTree()'
        gm = [complaintHead]

        gm.append("This method is not working yet.")
        raise P4Error(gm)

        theTree.makeSplitKeys()

        # Decorate theTree with splitKeys for all nodes.
        for n in theTree.iterNodesNoRoot():
            if n.name:
                n.name += " (%s)" % n.br.splitKey
            else:
                n.name = "(%s)" % n.br.splitKey
        theTree.splitKeys = [n.br.splitKey for n in theTree.iterNodesNoRoot()]
        theTree.draw()
        print("The drawing above shows the splitKeys that are being tracked.")

        f = open(fName, 'w')
        textDrawList = theTree.textDrawList()
        for l in textDrawList:
            f.write("#  %s\n" % l)
        f.write(
            "#\n# The drawing above shows the splitKeys that are being tracked.\n")
        f.write("#\n# windowSize=%s, stride=%s.\n" % (windowSize, stride))

        # Remove the decoration from above.
        for n in theTree.iterNodesNoRoot():
            splitKeyStringWithBlank = " (%s)" % n.br.splitKey
            if n.name.endswith(splitKeyStringWithBlank):
                n.name = n.name[: -(len(splitKeyStringWithBlank))]
            else:
                n.name = None
        # theTree.draw()

        # We need to have read in the model comments when we read in the trees
        # for self.
        if not hasattr(self.trees[0], "modelInfo"):
            gm.append("The first tree has no modelInfo.")
            gm.append("The trees should have been read in with")
            gm.append("var.doTreeReadMcmcModelUsageComments turned on.")
            raise P4Error(gm)

        mi = self.trees[0].modelInfo

        # Determine whether we need to makeSplitKeys()
        t = self.trees[0]
        if hasattr(t, "splitKeys") and t.splitKeys:
            pass
        else:
            for t in self.trees:
                t.makeSplitKeys()
                t.splitKeys = [n.br.splitKey for n in t.iterNodesNoRoot()]
                # print '\nsplitKeys = %s' % t.splitKeys

        from treepartitions import TreePartitions

        # The root buisiness is not implemented yet.  The way I do it
        # should be guided by theTree, the reference tree.  It is rooted
        # on a certain node.  I should print out how many of the input
        # trees were rooted on that node, and what the compCounts were.
        startTNum = 0
        while len(self.trees) - startTNum >= windowSize:
            print("Trees %6i -- %6i" % (startTNum, (startTNum + windowSize)))
            tt2 = Trees(
                self.trees[startTNum:(startTNum + windowSize)], taxNames=self.taxNames)
            tp = TreePartitions(tt2)
            for pNum in range(mi.nParts):
                print("  Part %i, nComps=%i" % (pNum, mi.parts[pNum].nComps))
                if mi.parts[pNum].nComps > 1:
                    # for s in tp.splits:
                    #    if s.key in theTree.splitKeys:
                    # print "    Split key: %12s, compCounts=%s" % (s.key,
                    # s.modelUsage.parts[pNum].compCounts)
                    for k in theTree.splitKeys:
                        s = tp.splitsHash.get(k)
                        if s:
                            print("    Split key: %12s, compCounts=%s" % (k, s.modelUsage.parts[pNum].compCounts))
            startTNum += stride

        f.close()
    
[docs]    def getNodesOnReferenceTreeCorrespondingToSelfRoots(self, refTree, verbose=False, drawWidth=150, printNodeNumsList=False):

        gm = ["Trees.getNodesOnReferenceTreeCorrespondingToSelfRoots()"]

        rtnChildren = refTree.root.getNChildren()
        if rtnChildren == 2:
            gm.append("The refTree should not be bi-rooted")
            gm.append("You can do refTree.removeRoot() ")
            raise P4Error(gm)
            
        assert refTree.taxNames
        assert refTree.taxNames == self.taxNames

        nodeNums = []
        for t in self.trees:
            ret = t.getNodeOnReferenceTreeCorrespondingToSelfRoot(refTree, verbose=verbose)
            if ret:
                nodeNums.append(ret.nodeNum)
            else:
                nodeNums.append(-1)

        # sanity check
        rootCountAttrs = 0
        biRootCountAttrs = 0
        for n in refTree.iterNodes():
            if n.br:
                if hasattr(n.br, 'biRootCount'):
                    biRootCountAttrs += 1
            if hasattr(n, "rootCount"):
                rootCountAttrs += 1
        assert rootCountAttrs or biRootCountAttrs
        if rootCountAttrs and biRootCountAttrs:
            gm.append("refTree has %i rootCount's and %i biRootCount's." % (rootCountAttrs, biRootCountAttrs))
            gm.append("It should not have both.")
            raise P4Error(gm)

        if rootCountAttrs:
            nodesList = []
            maxRootCount = 0
            for n in refTree.iterNodes():
                if n.rootCount:
                    n.name = n.rootCount
                    nodesList.append(n)
                    if n.rootCount > maxRootCount:
                        maxRootCount = n.rootCount
            print("non-bi-root maxRootCount is %i" % maxRootCount)
            nodesInOrder = p4.func.sortListOfObjectsOnAttribute(nodesList, 'rootCount')
            nodesInOrder.reverse()
            sumRootCount = 0
            print("node     rootCount")
            print("----     --------")
            for n in nodesInOrder:
                print("%3i       %i" % (n.nodeNum, n.rootCount))
                sumRootCount += n.rootCount
            print("The sum of rootCounts is %i, for %i trees" % (sumRootCount, len(self.trees)))
            refTree.draw(width=drawWidth)
            if printNodeNumsList:
                print(nodeNums)
            return nodeNums

        elif biRootCountAttrs:
            nodesList = []
            maxRootCount = 0
            for n in refTree.iterNodes():
                if n.br and n.br.biRootCount:
                    #n.name = n.rootCount
                    nodesList.append(n)
                    if n.br.biRootCount > maxRootCount:
                        maxRootCount = n.br.biRootCount
            print("bi-root maxRootCount is %i" % maxRootCount)
            tList = [[n.nodeNum, n.br.biRootCount] for n in nodesList]
            nodeNumsInOrder = p4.func.sortListOfListsOnListElementNumber(tList, 1)
            nodeNumsInOrder.reverse()
            sumBiRootCount = 0
            print("node     biRootCount")
            print("----     -----------")
            for n in nodeNumsInOrder:
                print("%3i       %i" % (n[0], n[1]))
                sumBiRootCount += n[1]
            print("The sum of biRootCounts is %i, for %i trees" % (sumBiRootCount, len(self.trees)))
            refTree.draw(width=drawWidth)
            if printNodeNumsList:
                print(nodeNums)
            return nodeNums
p4

Navigation

Related Topics

Source code for p4.trees