import sys
import csv
import random
from p4.tree import Tree
from p4.func import read
from p4.var import var
from p4.p4exceptions import P4Error
from p4.reducedstrictconsensus import Intersection, TreeBuilderFromSplits
class SuperTreeInputTrees(object):
def __init__(self, inputTree, distributionTrees=None):
"""
SuperTreeInputTrees is a utility to create sets of input trees.
The input trees are primarily to be used to evaluate super tree
construction methods.
Invocation removing a fixed number of taxa from each prospective input tree:
stit = SuperTreeInputTrees(inputTree)
stit.writeInputTreesToFile = True
stit.outputFile = 'myInputtrees.tre'
stit.noTaxaToRemove = 32
stit.noOutputTrees = 10
stit.generateInputTrees()
Invocation using built in distribution gathered from real world super tree cases::
stit = SuperTreeInputTrees(inputTree)
stit.writeInputTreesToFile = True
stit.outputFile = 'myInputtrees.tre'
stit.useTaxonDistribution = True
stit.generateInputTrees()
The user can generate a distribution of their own by supplying a list of p4 trees or a tree file.
The order of the trees is important, supertree and then all other trees. This goes for both list and
file. Like so::
stit = SuperTreeInputTrees(inputTree, distributionTrees='myTreefile.nex')
stit.writeInputTreesToFile = True
stit.outputFile = 'myInputtrees.tre'
stit.useTaxonDistribution = True
stit.generateInputTrees()
Placeholders which allow access to data after completed computations::
stit.outputTrees
stit.dist
"""
self.writeInputTreesToFile = False
self.outputFile = 'inputtrees.tre'
# Set to False if you want to have a set number of taxa in the output
# trees
self.useTaxonDistribution = False
# Only meaningful if setting useTaxonDistribution = False
self.noTaxaToRemove = 32
self.noOutputTrees = 10
gm = ['SuperTreeInputTrees()']
if isinstance(inputTree, Tree):
self.inputTree = inputTree # not a list.
elif isinstance(inputTree, str):
var.trees = []
read(inputTree)
if len(var.trees) > 1:
gm.append('Sorry, supply only one tree as supertree')
raise P4Error(gm)
# this was originally a list, ie [var.trees.pop()]
self.inputTree = var.trees.pop()
else:
gm.append("Input tree was neither a p4 Tree nor a valid filename")
gm.append("Got %s" % inputTree)
raise P4Error(gm)
if not self.inputTree._taxNames:
self.inputTree._setTaxNamesFromLeaves()
self.outputTrees = []
self.normalizedDist = []
# Distributions gathered from real world supertree input
# The dists are first a list of input tree taxon set sizes and the supertree taxon set size
# Using this data we can normalize the dists to fit the size of trees
# we want
# BunnyRSVNormal set from Wilkinson et al 2005, Syst Biol 54:823
# self.dist = [[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 13, 13, 13, 13, 13, 14, 14, 15, 17, 17, 18, 18, 18, 18, 18, 19, 19, 19, 20, 20, 20, 21, 22, 22, 23, 24, 25, 25, 25, 25, 25, 25, 26, 27, 28, 29, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 37, 38, 38, 40, 40, 41, 47, 51, 51, 52, 52, 52, 68, 70, 78, 78, 79, 80, 80], 80]
# CanidaeRVS set from Wilkinson et al 2005, Syst Biol 54:823
#self.dist = [[3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 6, 7, 8, 8, 9, 10, 11, 11, 11, 12, 16, 16, 20, 23, 24, 30, 30, 33, 34, 34, 34, 34, 34], 34]
# CarnivoraRVS set from Wilkinson et al 2005, Syst Biol 54:823
#self.dist = [[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12], 12]
# DavideDinoMRP set from Wilkinson et al 2005, Syst Biol 54:823
#self.dist = [[4, 4, 4, 5, 6, 6, 6, 7, 8, 8, 9, 9, 9, 10, 10, 10, 10, 10, 11, 11, 11, 12, 12, 12, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 17, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 20, 20, 20, 22, 23, 23, 24, 24, 25, 26, 27, 27, 28, 28, 29, 29, 29, 29, 29, 30, 30, 30, 31, 31, 31, 31, 33, 33, 33, 33, 36, 37, 37, 38, 38, 39, 42, 45, 47, 48, 50, 53, 53, 66, 70, 71, 74, 74, 75, 75, 76, 78, 78, 80, 86, 86, 92, 94, 96, 100, 101, 102, 102, 103, 105, 110, 111, 111, 139, 148, 149, 153, 173, 199, 204, 217, 240, 269, 270, 271, 272, 272, 273, 273, 273, 273, 274, 275], 277]
# FelidaeRVS set from Wilkinson et al 2005, Syst Biol 54:823
self.dist = [[3, 3, 3, 3, 3, 4, 4, 4, 5, 6, 6, 6, 7, 7, 7, 7, 9, 9, 10, 10, 14,
16, 17, 24, 25, 28, 29, 29, 30, 30, 32, 34, 36, 36, 36, 36, 36, 36, 36, 36], 36]
# KennedyPageData set from Wilkinson et al 2005, Syst Biol 54:823
#self.dist = [[14, 16, 17, 20, 30, 30, 90], 122]
# ViverridaeRVS set from Wilkinson et al 2005, Syst Biol 54:823
#self.dist = [[4, 5, 10, 16, 19, 33, 34, 34, 34], 34]
if distributionTrees:
self.useTaxonDistribution = True
if isinstance(distributionTrees, list):
for t in distributionTrees:
if not isinstance(t, Tree):
gm.append(
"Input trees should be a list of p4 Tree objects. Got %s" % t)
raise P4Error(gm)
superTree = distributionTrees.pop(0)
inputTrees = distributionTrees
elif isinstance(distributionTrees, list):
var.trees = []
read(distributionTrees)
if len(var.trees) < 1:
gm.append(
'Sorry, at least one tree must be supplied as input tree')
raise P4Error(gm)
superTree = var.trees.pop(0)
inputTrees = var.trees
self._generateDistribution(superTree, inputTrees)
def _generateDistribution(self, superTree, inputTrees):
if not superTree._taxNames:
superTree._setTaxNamesFromLeaves()
distribution = []
for tree in inputTrees:
if not tree._taxNames:
tree._setTaxNamesFromLeaves()
distribution.append(len(tree.taxNames))
distribution.sort()
self.dist = [distribution, len(superTree.taxNames)]
# print [distribution,len(superTree.taxNames)]
# Prepares the dist by normalizing and changing it to reflect instead the no of taxa to remove.
# This change is done simply to ease the other calculations
def _prepareDistribution(self, noOfTaxaInSuperTree):
# print 'Supertree taxa: ',noOfTaxaInSuperTree
# print 'Dist supertree taxa: ',dist[1]
normalizationIndex = self.dist[1] / float(noOfTaxaInSuperTree)
# print 'Normalization index: ',normalizationIndex
# print 'Dist supertree taxa N:',dist[1]/normalizationIndex
for size in self.dist[0]:
normalized = size / normalizationIndex
if int(noOfTaxaInSuperTree - normalized) > 2:
# print '%s, %s ' % (size, int(noOfTaxaInSuperTree-normalized))
self.normalizedDist.append(
int(noOfTaxaInSuperTree - normalized))
# Determines how we deal with the no of output trees
# In the simple case this is a set number, but we can also use a
# distribution
def _noOutputTrees(self):
return self.noOutputTrees
# Determines how we deal with the no of taxa to remove
# In the simple case this is a set number, but we can also use a
# distribution
def _noTaxaToRemove(self):
if self.useTaxonDistribution:
return random.choice(self.normalizedDist)
else:
return self.noTaxaToRemove
# Randomizes a taxon for removal, does so without regard for the shape of
# the tree
def _getTaxaToRemove(self, nodes):
node = nodes[random.randint(0, len(nodes) - 1)]
if node.isLeaf:
return node
else:
while not node.isLeaf:
node = nodes[random.randint(0, len(nodes) - 1)]
return node
def generateInputTrees(self):
gm = ['SuperTreeInputTrees.generateInputTrees()']
# Check if input values are valid
if not self.useTaxonDistribution:
if self.noTaxaToRemove >= len(self.inputTree.taxNames) - 2:
gm.append(
'The number of taxa to remove would leave less than 3 taxa in the tree, quite uninformative')
raise P4Error(gm)
# Prepare the distribution by normalizing it to the size of the input tree
if self.useTaxonDistribution:
self._prepareDistribution(len(self.inputTree.taxNames))
# Creates the output trees and removes taxa from them accoring to the settings
# Checks if the output tree has the correct number of taxa as a precation
for i in range(self._noOutputTrees()):
tree = self.inputTree.dupe()
tree.name = 'inputtree' + str(i + 1)
taxa2Remove = self._noTaxaToRemove()
for j in range(taxa2Remove):
tree.removeNode(self._getTaxaToRemove(tree.nodes))
tree._setTaxNamesFromLeaves()
if len(tree.taxNames) == len(self.inputTree.taxNames) - taxa2Remove:
self.outputTrees.append(tree)
else:
print('Bugger, the correct number of taxa were not removed, taxa remaining: ', len(tree.taxNames))
print('Expected: ', len(self.inputTree.taxNames) - taxa2Remove)
# Writes the trees to file
if self.writeInputTreesToFile:
for tree in self.outputTrees:
# tree.draw()
tree. writeNewick(fName=self.outputFile, withTranslation=0,
translationHash=None, doMcmcCommandComments=0, toString=False, append=True)
[docs]
class SuperTreeSupport(object):
"""Supertree support measures
Super tree support can be used to calculate a number of support measures for a set of trees and
a supertree. The measures can be at split level and placed on the supertree for image production or
at tree level with a number of summary measures.
The support of the input trees for a supertree is measured by counting the number of
input trees that support(S), conflict(Q), permits(P) or are relevant(R) with the splits in the supertree.
Supply a supertree and the input trees used to create it. Filenames or trees will do.
A single supertree and a list of input trees.
For example::
sts = SuperTreeSupport('supertree.nex', 'input.nex')
or::
read('input.nex')
inputTrees = var.trees
sts = SuperTreeSupport('supertree.nex', inputTrees)
sts.doSaveDecoratedTree = True
sts.decoratedFilename='mytree.nex'
sts.doSaveIndexTree=False
sts.indexFilename='mytreeIndex.nex'
sts.csvFilename='mytreeIndex.csv'
sts.doDrawTree=True
sts.verbose=1
sts.superTreeSupport()
After completing the analysis there are a number of placeholders that allows access to the resulting data::
sts.decoratedSuperTree
sts.indexSuperTree
sts.csvList
sts.T, no. of input trees;
sts.L, no. of leaves;
sts.C, coverage (average proportion of leaves in the input tree);
sts.mean, mean taxon overlap among input trees
sts.median, median taxon overlap among input trees
sts.SC, number of supertree clades;
sts.U, no. of unsupported supertree clades;
sts.UC, no. of unsupported supertree clades that conflict with at least one input tree;
sts.UCC, no. of unsupported clades conflicting with all relevant input trees;
sts.QS, average qualitative support for supertree clades. Figures in parentheses are ranges.
sts.S, average support
sts.P, average permitted
sts.Q, average conflict
sts.R, average relevance
sts.V, average V for supertree clades V = (s minus q)/(s + q)
sts.VV, V+ = (s - q + p)/(s + q + p)
sts.Vv, V minus = (s - q - p)/(s + q + p)
sts.wV, wV = (ws minus q)/(ws + q)
sts.wVV, wVV = (ws minus q +wp)/(ws + q + wp)
sts.wVv, wVv = (ws minus q minus wp)/(ws + q + wp)
These can be used for further analysis.
Examples of support, conflict, relevance and permission:
**Support**::
supertree:
+--------1:A
+--------5:100
| +--------2:B
0
|--------3:C
|
+--------4:D
input tree:
+--------1:A
+--------5:100
| +--------2:B
0
|--------3:C
**Conflict**::
supertree:
+--------1:A
+--------5:100
| +--------2:B
0
|--------3:C
|
+--------4:D
input tree:
+--------1:A
+--------5:100
| +--------2:C
0
|--------3:B
|
+--------4:D
**Relevance**, i.e an input tree split that is irrelevant to the supertree split::
supertree:
+--------1:A
+--------7:50
| +--------2:B
|
|--------3:C
0
|--------4:D
|
|--------5:E
|
+--------6:F
input tree:
+--------1:E
+--------7:50
| +--------2:F
|
|--------3:C
0
|--------4:D
|
|--------5:A
**Permission**, i.e. an input trees split that permits the supertree split but does not support it::
supertree:
+--------1:A
+--------7:50
| +--------2:B
|
|--------3:C
0
|--------4:D
|
|--------5:E
|
+--------6:F
input tree:
+--------1:A
+--------7:50
| +--------2:B
| |
0 |--------3:C
|
|--------4:D
|
|--------5:E
The analysed supertree can be decorated in three different ways.
I. V = (s minus q)/(s + q) in standard mode, i.e. values between 0 - 100.
II. V = (s minus q)/(s + q) in supertree mode, i.e. values between -1 - 1
III. With indices that can be correlated to a csv file holding the support values.
This allows for further analysis of the support values and post mapping to the tree.
"""
def __init__(self, supertree, inputTrees):
# There are two ways of decorating the supertree with the support values.
# Standard conforms to the consensus tree tradition, i.e. values are presented between
# 0 to 100 percent. Non standard adhears to the few supertree papers regarding support values
# i.e -1 to 1.
self.doStandardDecoration = True
# The decorated supertree can be saved to file
self.doSaveDecoratedTree = False
self.decoratedFilename = 'superTreeSupport.nex'
# There is a option to save a supertree decorated with index values instead of support values.
# This can then be used with a csv file containing the support values for each index.
# Further analysis of the support values can be performed and then matched to the indecies in the
# decorated supertree
self.doSaveIndexTree = False
self.indexFilename = 'supertreeIndex.nex'
self.csvFilename = 'supertreeIndex.csv'
# Draws the decorated supertree to screen
self.doDrawTree = False
# Produces output to screen
self.verbose = 1
# Placeholders that allows access to the data after completing
# calculations
self.decoratedSuperTree = None
self.indexSuperTree = None
self.csvList = None
# Keeps track of splits for producing output
self.indexIntersections = []
self.csvValues = []
self.intersections = []
# Let t be the number of input trees,
# s the number of input trees supporting a supertree clade,
# r the number of input trees that are irrelevant to the supertree clade,
# q the number of input trees that conflict with the supertree clade,
# p the number of input trees that permit the supertree clade,
# so that t = p + q + r + s.
self.T = 0 # no. of input trees;
self.L = 0 # no. of leaves;
# coverage (average proportion of leaves in the input tree);
self.C = 0.0
self.SC = 0 # number of supertree clades;
self.U = 0 # no. of unsupported supertree clades;
# no. of unsupported supertree clades that conflict with at least one
# input tree;
self.UC = 0
# no. of unsupported clades conflicting with all relevant input trees;
self.UCC = 0
# average qualitative support for supertree clades. Figures in
# parentheses are ranges.
self.QS = 0.0
self.S = 0.0 # average support
self.P = 0.0 # average permitted
self.Q = 0.0 # average conflict
self.R = 0.0 # average relevance
self.wS = 0.0 # average weighted support
self.wP = 0.0 # average weighted permitance
self.V = 0.0 # average V for supertree cladesV = (s minus q)/(s + q)
self.VV = 0.0 # V+ = (s minus q +p)/(s + q + p)
self.Vv = 0.0 # V minus = (s minus q minus p)/(s + q + p)
self.wV = 0.0 # wV = (ws minus q)/(ws + q)
self.wVV = 0.0 # wVV = (ws minus q +wp)/(ws + q + wp)
self.wVv = 0.0 # wVv = (ws minus q minus wp)/(ws + q + wp)
gm = ['SuperTreeSupport()']
var.warnReadNoFile = False
if isinstance(inputTrees, list):
for t in inputTrees:
if not isinstance(t, Tree):
gm.append(
"Input trees should be a list of p4 Tree objects. Got %s" % t)
raise P4Error(gm)
self.inputTrees = inputTrees
elif isinstance(inputTrees, str):
var.trees = []
read(inputTrees)
if len(var.trees) < 1:
gm.append(
'Sorry, at least one tree must be supplied as input tree')
raise P4Error(gm)
self.inputTrees = var.trees
else:
gm.append(
"Input trees are neither a list of p4 Tree objects nor a valid filename.")
raise P4Error(gm)
if isinstance(supertree, Tree):
self.supertree = supertree # not a list.
elif isinstance(supertree, str):
var.trees = []
read(supertree)
if len(var.trees) > 1:
gm.append('Sorry, supply only one tree as supertree')
raise P4Error(gm)
# this was originally a list, ie [var.trees.pop()]
self.supertree = var.trees.pop()
else:
gm.append("Supertree was neither a p4 Tree nor a valid filename")
gm.append("Got %s" % supertree)
raise P4Error(gm)
for tree in self.inputTrees:
if not tree._taxNames:
tree._setTaxNamesFromLeaves()
# Mean and median overlap of the input trees
overlapList = []
meanOverlap = 0.0
index = 0
for i in range(0, len(self.inputTrees) - 1):
for j in range(i + 1, len(self.inputTrees)):
overlap = len(set(self.inputTrees[i].taxNames).intersection(
set(self.inputTrees[j].taxNames)))
overlapList.append(overlap)
meanOverlap += overlap
index += 1
if index == 0:
self.mean = 0
self.median = 0
else:
self.mean = meanOverlap / index
overlapList.sort()
self.median = overlapList[len(overlapList) / 2]
commonLeafSet = CommonLeafSet()
self.splits = commonLeafSet.updateTreesToCommonLeafSet(
[self.inputTrees, [self.supertree]])
self.bitkeys = commonLeafSet.getCommonBitkeys()
self.taxnames = commonLeafSet.getCommonTaxNames()
self.taxa2Bitkey = commonLeafSet.getCommonTaxa2Bitkey()
[docs]
def superTreeSupport(self):
"""
Perform analyses on a number of input trees and the resulting supertree.
"""
gm = ['superTreeSupport()']
allOnes = 2 ** (len(self.bitkeys)) - 1
inputSplits = self.splits[0]
self.T = len(inputSplits)
self.L = len(self.bitkeys)
# if 'ROOT' in self.taxnames:
# self.L -= 1
# if 'mrp_outgroup' in self.taxnames:
# self.L -= 1
coverage = 0.0
for splits in inputSplits:
t = self.L - self.popcount(splits[0][1], self.bitkeys)
if 'ROOT' in self.taxnames and splits[0][1] & self.taxa2Bitkey['ROOT']:
t += 1
coverage += t
self.C = (coverage / self.T) / self.L
# print "self.splits = %s" % self.splits
if self.splits[1]:
supertreeSplits = self.splits[1][0]
else:
supertreeSplits = []
self.SC = len(supertreeSplits)
if self.verbose > 1:
print('Identifying supporting, conflicting and neutral input trees,')
sys.stdout.flush()
"""
A dictionary supertreeDict = {} will keep track of the supertree splits and their support
The indeces from the supertreeSplits list will be used to keep track of the splits
i.e. supertreeSplits[1] info will be in supertreeDict[1]
Each entry in the dict will be a list formated like so
[input trees hard support, input trees soft support(entailed), input trees neutral, input trees conflict]
"""
def isPowerOf2(number):
return (not(number & (number - 1)))
supertreeDict = {}
for i in range(self.SC):
# Used to store each supertree clades support values
# [S, P, R, Q, WS, WP, V, V+, V-, wV, wV+,wV-]
supertreeDict[i] = [
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
supertreeSplitDict = {}
for j in range(self.SC):
supertreeSplitDict[j] = -1
for splitSet in inputSplits:
supportedSupertreeSplits = []
permittedSupertreeSplits = []
taxSplit = splitSet[0][1] ^ allOnes
allSuperTreeSplitsResovled = False
for split in splitSet:
if allSuperTreeSplitsResovled:
break
allSuperTreeSplitsResovled = True
for i in range(self.SC):
if supertreeSplitDict[i] < 0:
allSuperTreeSplitsResovled = False
workingSplit = (
supertreeSplits[i][0] & splitSet[0][1]) ^ supertreeSplits[i][0]
if not self.popcountLimit(taxSplit & workingSplit, 2) or not self.popcountLimit(taxSplit ^ workingSplit, 2):
supertreeSplitDict[i] = 2
else:
ones = split[0] & workingSplit
one1 = 0
if ones > 0:
if not isPowerOf2(ones):
one1 = 2
else:
one1 = 1
# print 'one1: ', one1
zeroOnes = (split[0] ^ workingSplit) & workingSplit
zero1 = 0
if zeroOnes > 0:
if not isPowerOf2(zeroOnes):
zero1 = 2
else:
zero1 = 1
# print 'zero1: ', zero1
oneZeroes = (split[0] ^ workingSplit) & split[0]
one0 = 0
if oneZeroes > 0:
if not isPowerOf2(oneZeroes):
one0 = 2
else:
one0 = 1
# print 'one0: ', one0
zeroZeroes = (
(workingSplit ^ allOnes) & (split[0] ^ allOnes)) ^ split[1]
zero0 = 0
if zeroZeroes > 0:
if not isPowerOf2(zeroZeroes):
zero0 = 2
else:
zero0 = 1
# print 'zero0: ', zero0
if one1 and zero1 and one0 and zero0:
supertreeSplitDict[i] = 3
if one1 and zero0 and (not zero1 and not one0):
#self.printSplitList([split, supertreeSplits[i]], self.bitkeys)
supportedSupertreeSplits.append(i)
supertreeSplitDict[i] = 0
if zero1 and one0 and (not one1 and not zero0):
#self.printSplitList([split, supertreeSplits[i]], self.bitkeys)
supportedSupertreeSplits.append(i)
supertreeSplitDict[i] = 0
for i in range(self.SC):
if supertreeSplitDict[i] < 0:
supertreeDict[i][1] = supertreeDict[i][1] + 1
permittedSupertreeSplits.append(i)
supertreeDict[i][supertreeSplitDict[i]] = supertreeDict[
i][supertreeSplitDict[i]] + 1
supertreeSplitDict[i] = -1
if len(permittedSupertreeSplits) > 0:
wp = 1.0 / len(permittedSupertreeSplits)
for index in permittedSupertreeSplits:
supertreeDict[index][5] = supertreeDict[index][5] + wp
if len(supportedSupertreeSplits) > 0:
ws = 1.0 / len(supportedSupertreeSplits)
for index in supportedSupertreeSplits:
supertreeDict[index][4] = supertreeDict[index][4] + ws
if self.verbose > 1:
print('done.')
"""
How many input trees containing the taxa in the supertree split, conflict, agree, neutral.
"""
if self.verbose > 1:
print('Creating statistics and supertree with support values,')
self._calcSupport(supertreeDict)
if self.doSaveIndexTree:
self.saveIndexTree(supertreeSplits, supertreeDict)
if self.doDrawTree or self.doSaveDecoratedTree:
self.buildDecoratedTree(supertreeSplits, supertreeDict)
if self.doDrawTree:
self.decoratedSuperTree.draw()
print('')
if self.doSaveDecoratedTree:
self.decoratedSuperTree.writeNexus(self.decoratedFilename)
if self.verbose == 1:
self.printReducedOutput()
elif self.verbose > 1:
self.printReducedOutput(printInstructions=True)
[docs]
def buildDecoratedTree(self, supertreeSplits, supertreeDict):
for key, value in supertreeDict.items():
inter = Intersection(
supertreeSplits[key][0], supertreeSplits[key][1], 0, 0)
name = ''
if self.doStandardDecoration:
name = str(int(round(100.0 * (1.0 + value[9]) / 2.0)))
else:
name = str(value[9])
inter.name = name
self.intersections.append(inter)
treeBuilderFromSplits = TreeBuilderFromSplits(
self.bitkeys, self.taxnames)
self.decoratedSuperTree = treeBuilderFromSplits.buildTreeFromInformativeList(
self.intersections, treeName='decoratedSupertree')
[docs]
def saveIndexTree(self, supertreeSplits, supertreeDict):
for key, value in supertreeDict.items():
inter = Intersection(
supertreeSplits[key][0], supertreeSplits[key][1], 0, 0)
if self.doStandardDecoration:
self.csvValues.append([key, int(round(100.0 * (value[0] / self.T))), int(round(100.0 * (value[1] / self.T))),
int(round(
100.0 * (value[2] / self.T))), int(round(100.0 * (value[3] / self.T))),
int(round(
100.0 * (value[4] / self.T))), int(round(100.0 * (value[5] / self.T))),
value[6], value[7], value[8], value[9], value[10], value[11]])
else:
self.csvValues.append([key, value[0] + value[1], value[0], value[1], value[2], value[
3], value[4], value[5], value[6], value[7], value[8], value[9], value[10], value[11]])
inter.name = str(key)
self.indexIntersections.append(inter)
treeBuilderFromSplits = TreeBuilderFromSplits(
self.bitkeys, self.taxnames)
self.indexSuperTree = treeBuilderFromSplits.buildTreeFromInformativeList(
self.indexIntersections, treeName='indexSupertree')
self.indexSuperTree.writeNexus(self.indexFilename)
writer = csv.writer(open(self.csvFilename, "wb"))
writer.writerows(self.csvValues)
def _calcSupport(self, supertreeDict):
for value in supertreeDict.values():
S = value[0]
P = value[1]
Q = value[3]
R = value[2]
# print 'S+P+Q+R: %s, %s, %s, %s' % (S, P, Q, R)
wS = value[4]
wP = value[5]
self.S += S
self.P += P
self.Q += Q
self.R += R
self.wS += wS
self.wP += wP
if S < 1:
self.U += 1
if S < 1 and Q > 0:
self.UC += 1
if S < 1 and Q > 0 and Q == self.T - R:
self.UCC += 1
if S + Q > 0:
value[6] = (S - Q) / (S + Q)
self.V += value[6]
if wS + Q > 0:
value[9] = (wS - Q) / (wS + Q)
self.wV += value[9]
if S + Q + P > 0:
value[7] = (S - Q + P) / (S + Q + P)
self.VV += value[7]
value[8] = (S - Q - P) / (S + Q + P)
self.Vv += value[8]
if wS + Q + wP > 0:
value[10] = (wS - Q + wP) / (wS + Q + wP)
self.wVV += value[10]
value[11] = (wS - Q - wP) / (wS + Q + wP)
self.wVv += value[11]
if 0:
print("self.SC =", self.SC)
print("self.S =", self.S)
print("self.P =", self.P)
print("self.Q =", self.Q)
print("self.R =", self.R)
print("self.V =", self.V)
print("self.VV =", self.VV)
print("self.Vv =", self.Vv)
print("self.wV =", self.wV)
print("self.wVV =", self.wVV)
print("self.wVv =", self.wVv)
if self.SC:
self.S = self.S / self.SC
self.P = self.P / self.SC
self.Q = self.Q / self.SC
self.R = self.R / self.SC
self.V = self.V / self.SC
self.VV = self.VV / self.SC
self.Vv = self.Vv / self.SC
self.wV = self.wV / self.SC
self.wVV = self.wVV / self.SC
self.wVv = self.wVv / self.SC
[docs]
def printReducedOutput(self, printInstructions=False):
if printInstructions:
print('I = no. of input trees;')
print('L = no. of leaves;')
print('Mean = mean taxon overlap among input trees')
print('Medi = median taxon overlap among input trees')
print('C = coverage (average proportion of leaves in the input tree);')
print('SC = number of supertree clades;')
print('U = no. of unsupported supertree clades;')
print('U* = no. of unsupported supertree clades that conflict with at least one input tree;')
print('U** = no. of unsupported clades conflicting with all relevant input trees;')
print('V = avg of support-conflict / support+conflict;')
print('V+ = avg of support-conflict+permitting / support+conflict+permitting ;')
print('V- = avg of support-conflict-permitting / support+conflict+permitting ;')
print('wV = avg of weighted support-conflict / weighted support+conflict;')
print('wV+ = avg of weighted support-conflict+weighted permitting / weighted support+conflict+weighted permitting ;')
print('wV- = avg of weighted support-conflict-weighted permitting / weighted support+conflict+weighted permitting ;')
print('I L Mean Medi C SC U U* U** V V+ V- wV wV+ wV-')
print(repr(self.T).ljust(6)[:5], end=' ')
print(repr(self.L).ljust(6)[:5], end=' ')
print(repr(self.mean).ljust(6)[:5], end=' ')
print(repr(self.median).ljust(6)[:5], end=' ')
print(repr(self.C).ljust(6)[:5], end=' ')
print(repr(self.SC).ljust(6)[:5], end=' ')
print(repr(self.U).ljust(6)[:5], end=' ')
print(repr(self.UC).ljust(6)[:5], end=' ')
print(repr(self.UCC).ljust(6)[:5], end=' ')
print(repr(self.V).ljust(6)[:5], end=' ')
print(repr(self.VV).ljust(6)[:5], end=' ')
print(repr(self.Vv).ljust(6)[:5], end=' ')
print(repr(self.wV).ljust(6)[:5], end=' ')
print(repr(self.wVV).ljust(6)[:5], end=' ')
print(repr(self.wVv).ljust(6)[:5])
[docs]
def popcount(self, n, bitkeys):
count = 0
for bk in bitkeys:
if n < bk:
return count
if bk & n:
count += 1
return count
[docs]
def popcountLimit(self, n, limit):
count = 0
for bk in self.bitkeys:
if bk & n:
count += 1
if n >= limit:
return True
return False
[docs]
def printSplitList(self, list, bitkeys):
for split in list:
intersection = ''
for bk in bitkeys:
if bk & split[0]:
intersection = intersection + '1'
elif bk & split[1]:
intersection = intersection + '?'
else:
intersection = intersection + '*'
print('Split: %s' % (intersection))
print(' ')
class CommonLeafSet(object):
def updateToCommonLeafSetWithTaxa(self, tfl, dict, bitkeys, taxnames):
gm = ['updateToCommonLeafSetWithTaxa(tfl, dict, bitkeys, taxnames)']
uniqueSet = set()
for i in range(tfl.nSamples):
t = tfl.getTree(i)
if not t._taxNames:
t._setTaxNamesFromLeaves()
for name in t.taxNames:
uniqueSet.add(name)
list = []
for name in uniqueSet:
list.append(name)
for name in list:
if not name in taxnames:
gm.append(
'Found taxa in that does not appear in the supplied taxa list: ' + name)
raise P4Error(gm)
for name in taxnames:
if not name in list:
gm.append(
'Found taxa in supplied taxa list that does not appear in the supertree: ' + name)
raise P4Error(gm)
self.weights = []
splits = []
for i in range(tfl.nSamples):
t = tfl.getTree(i)
if not t._taxNames:
t._setTaxNamesFromLeaves()
t.missingTaxa = 0
for name in taxnames:
if not t.taxNames.count(name):
t.missingTaxa = t.missingTaxa | dict[name]
for n in t.iterLeavesNoRoot():
n.br.rc = dict[n.name]
t._makeRCSplitKeys()
weight = 1.0
if hasattr(t, "weight"):
if t.weight != None:
weight = t.weight
elif hasattr(t, "recipWeight"):
if t.recipWeight != None:
weight = 1.0 / int(t.recipWeight)
self.weights.append(weight)
t.splits = []
for n in t.iterInternalsNoRoot():
for m in n.br.rcList:
t.splits.append([m, t.missingTaxa])
splits.append(t.splits)
return splits
def updateTreesToCommonLeafSet(self, trees):
"""
Creates common leafset
Requires input to be a list like so: [[inputtrees][supertree]]
"""
uniqueSet = set()
sampler = float(len(trees)) / 10
index = 0.0
for treeList in trees:
for t in treeList:
if t.root.getNChildren() <= 2:
t.addSibLeaf(t.root, "ROOT")
index += 0.5
if not t._taxNames:
t._setTaxNamesFromLeaves()
for name in t.taxNames:
uniqueSet.add(name)
if index % sampler == 0:
print('%s trees processed ' % (int(index)))
sys.stdout.flush()
self.list = []
for name in uniqueSet:
self.list.append(name)
self.list.sort()
self.dict = {}
self.bitkeys = []
for i in range(len(self.list)):
self.bitkeys.append(1 << i)
self.dict[self.list[i]] = 1 << i
self.weights = []
self.treeNames = []
tflsSplits = []
for treeList in trees:
splits = []
for t in treeList:
index += 0.5
self.treeNames.append(t.name)
if not t._taxNames:
t._setTaxNamesFromLeaves()
t.missingTaxa = 0
for name in self.list:
if not t.taxNames.count(name):
t.missingTaxa = t.missingTaxa | self.dict[name]
for n in t.iterLeavesNoRoot():
n.br.rc = self.dict[n.name]
t._makeRCSplitKeys()
weight = 1.0
if hasattr(t, "weight"):
if t.weight != None:
weight = t.weight
elif hasattr(t, "recipWeight"):
if t.recipWeight != None:
weight = 1.0 / int(t.recipWeight)
self.weights.append(weight)
t.splits = []
for n in t.iterInternalsNoRoot():
for m in n.br.rcList:
t.splits.append([m, t.missingTaxa])
if len(t.splits) > 0:
# for split in t.splits:
# intersection = ''
# for bk in self.bitkeys:
# if bk & split[0]:
# intersection = intersection + '1'
# elif bk & split[1]:
# intersection = intersection + '?'
# else:
# intersection = intersection + '*'
# print 'Split: %s' % (intersection)
# print ''
splits.append(t.splits)
if index % sampler == 0:
print('%s trees processed ' % (int(index)))
sys.stdout.flush()
tflsSplits.append(splits)
return tflsSplits
def updateToCommonLeafSet(self, tfls):
uniqueSet = set()
trees = 0
for tfl in tfls:
trees += tfl.nSamples
sampler = float(trees) / 10
index = 0.0
for tfl in tfls:
for i in range(tfl.nSamples):
index += 0.5
t = tfl.getTree(i)
if not t._taxNames:
t._setTaxNamesFromLeaves()
for name in t.taxNames:
uniqueSet.add(name)
if index % sampler == 0:
print('%s trees processed ' % (int(index)))
sys.stdout.flush()
self.list = []
for name in uniqueSet:
self.list.append(name)
self.list.sort()
self.dict = {}
self.bitkeys = []
for i in range(len(self.list)):
self.bitkeys.append(1 << i)
self.dict[self.list[i]] = 1 << i
self.weights = []
self.treeNames = []
tflsSplits = []
for tfl in tfls:
splits = []
for i in range(tfl.nSamples):
index += 0.5
t = tfl.getTree(i)
self.treeNames.append(t.name)
if not t._taxNames:
t._setTaxNamesFromLeaves()
t.missingTaxa = 0
for name in self.list:
if not t.taxNames.count(name):
t.missingTaxa = t.missingTaxa | self.dict[name]
for n in t.iterLeavesNoRoot():
n.br.rc = self.dict[n.name]
t._makeRCSplitKeys()
weight = 1.0
if hasattr(t, "weight"):
if t.weight != None:
weight = t.weight
elif hasattr(t, "recipWeight"):
if t.recipWeight != None:
weight = 1.0 / int(t.recipWeight)
self.weights.append(weight)
t.splits = []
for n in t.iterInternalsNoRoot():
for m in n.br.rcList:
t.splits.append([m, t.missingTaxa])
splits.append(t.splits)
if index % sampler == 0:
print('%s trees processed ' % (int(index)))
sys.stdout.flush()
tflsSplits.append(splits)
return tflsSplits
def getCommonTaxa2Bitkey(self):
return self.dict
def getCommonTaxNames(self):
return self.list
def getCommonBitkeys(self):
return self.bitkeys
def getWeights(self):
return self.weights
def getTreeNames(self):
return self.treeNames