import p4.pf as pf
import p4.func
from p4.var import var
import math
import random
import string
import sys
import time
import copy
import os
import pickle
from p4.chain import Chain
from p4.p4exceptions import P4Error
from p4.treepartitions import TreePartitions
from p4.constraints import Constraints
from p4.node import Node
from p4.pnumbers import Numbers
import datetime
import numpy
import logging
import statistics
from collections import deque
# for proposal probs
fudgeFactor = {}
fudgeFactor['local'] = 1.0
fudgeFactor['brLen'] = 1.0
fudgeFactor['eTBR'] = 1.0
fudgeFactor['allBrLens'] = 1.0
fudgeFactor['polytomy'] = 1.0
fudgeFactor['root3'] = 0.05
fudgeFactor['root3n'] = 0.1
fudgeFactor['root2'] = 2.0
fudgeFactor['compLocation'] = 0.01
fudgeFactor['rMatrixLocation'] = 0.01
fudgeFactor['gdasrvLocation'] = 0.01
fudgeFactor['allCompsDir'] = 1.0
fudgeFactor['allRMatricesDir'] = 1.0
fudgeFactor['ndch2comp'] = 0.2
fudgeFactor['ndch2alpha'] = 0.04
fudgeFactor['ndch2root3nInternalCompsDir'] = 0.05
class McmcTuningsPart(object):
def __init__(self, partNum):
self.num = partNum
self.default = {}
# It is no longer changed depending on the dim
self.default['comp'] = 0.3
# This would depend on the dim; this is done in Mcmc.__init__()
self.default['compDir'] = 100.
self.default['allCompsDir'] = 500.
self.default['ndch2_leafCompsDir'] = 200.
self.default['ndch2_internalCompsDir'] = 100.
self.default['ndch2_leafCompsDirAlpha'] = 2.0 * math.log(1.2)
self.default['ndch2_internalCompsDirAlpha'] = 2.0 * math.log(3.0)
self.default['ndch2_root3n_internalCompsDir'] = 100.
# rMatrix with sliders no longer changed depending on the dim (ie size of rMatrix)
self.default['rMatrix'] = 0.3
# rMatrixDir would depend on the dim; this is done in Mcmc.__init__()
self.default['rMatrixDir'] = 200.
self.default['allRMatricesDir'] = 500.
self.default['twoP'] = 50.
self.default['gdasrv'] = 2.0 * math.log(1.5) # 0.811
self.default['pInvar'] = 0.5
#self.default['rMatrixLocation'] = 0.0
class McmcTunings(object):
def __init__(self, nParts):
self.nParts = nParts
self.parts = []
for pNum in range(nParts):
self.parts.append(McmcTuningsPart(pNum))
self.default = {}
self.default['relRate'] = 0.5
# This next tuning is set so that by default the brLens go up or down
# maximum 10%, ie from 0.909 to 1.1
self.default['local'] = 2.0 * math.log(1.1) # 0.1906
self.default['brLen'] = 2.0 * math.log(2.0) # 1.386
# Crux has 2.0 * math.log(1.6) as self.default
self.default['etbrLambda'] = 2.0 * math.log(1.6)
self.default['etbrPExt'] = 0.8
self.default['brLenPriorLambda'] = 10.0
self.default['brLenPriorLambdaForInternals'] = 1000.0
self.default['doInternalBrLenPrior'] = False
self.default['brLenPriorType'] = 'exponential'
self.default['allBrLens'] = 2.0 * math.log(1.02) # 0.0396
self.default['root2'] = 0.1
#self.default['root3'] = 10.0
#self.default['root3n'] = 10.0
class McmcProposalProbs(dict):
"""User-settable relative proposal probabilities.
An instance of this class is made as Mcmc.prob, where you can
do, for example::
yourMcmc.prob.local = 2.0
These are relative proposal probs, that do not sum to 1.0, and
affect the calculation of the final proposal probabilities (ie the
kind that do sum to 1). It is a relative setting, and the default
is 1.0. Setting it to 0 turns it off. For small
probabilities, setting it to 2.0 doubles it. For bigger
probabilities, setting it to 2.0 makes it somewhat bigger.
Check the effect that it has by doing::
yourMcmc.writeProposalIntendedProbs()
which prints out the final calculated probabilities.
"""
def __init__(self):
#object.__setattr__(self, 'comp', 1.0)
object.__setattr__(self, 'compDir', 0.0)
object.__setattr__(self, 'allCompsDir', 1.0)
object.__setattr__(self, 'ndch2_leafCompsDir', 0.0)
object.__setattr__(self, 'ndch2_internalCompsDir', 0.0)
object.__setattr__(self, 'ndch2_leafCompsDirAlpha', 0.0)
object.__setattr__(self, 'ndch2_internalCompsDirAlpha', 0.0)
#object.__setattr__(self, 'ndch2_root3n_internalCompsDir', 0.0)
#object.__setattr__(self, 'rMatrix', 1.0)
object.__setattr__(self, 'rMatrixDir', 0.0)
object.__setattr__(self, 'allRMatricesDir', 1.0)
object.__setattr__(self, 'gdasrv', 1.0)
object.__setattr__(self, 'pInvar', 1.0)
object.__setattr__(self, 'local', 1.0)
object.__setattr__(self, 'brLen', 0.0)
object.__setattr__(self, 'allBrLens', 1.0)
object.__setattr__(self, 'eTBR', 1.0)
object.__setattr__(self, 'polytomy', 0.0)
object.__setattr__(self, 'root3', 0.0)
object.__setattr__(self, 'root3n', 0.0)
object.__setattr__(self, 'root2', 0.0)
object.__setattr__(self, 'compLocation', 0.0)
object.__setattr__(self, 'rMatrixLocation', 0.0)
object.__setattr__(self, 'relRate', 1.0)
def __setattr__(self, item, val):
# complaintHead = "\nMcmcProposalProbs.__setattr__()"
gm = ["\nMcmcProposalProbs(). (set %s to %s)" % (item, val)]
theKeys = self.__dict__.keys()
if item in theKeys:
try:
val = float(val)
if val < 1e-15:
val = 0
object.__setattr__(self, item, val)
except:
gm.append("Should be a float. Got '%s'" % val)
raise P4Error(gm)
else:
self.dump()
gm.append("Can't set '%s'-- no such proposal." % item)
gm.append("See the list of proposal probabilities above for valid proposals.")
raise P4Error(gm)
def reprString(self):
stuff = [
"\nUser-settable relative proposal probabilities, from yourMcmc.prob"]
stuff.append(" To change it, do eg ")
stuff.append(" yourMcmc.prob.comp = 0.0 # turns comp proposals off")
stuff.append(" Current settings:")
theKeys = list(self.__dict__.keys())
theKeys.sort()
for k in theKeys:
stuff.append(" %30s: %s" % (k, getattr(self, k)))
return '\n'.join(stuff)
def dump(self):
print(self.reprString())
def __repr__(self):
return self.reprString()
class Proposal(object):
def __init__(self, theMcmc=None):
self.name = None
self.variant = 'gtr' # only for rMatrix. 2p or gtr
self.mcmc = theMcmc
self.nChains = theMcmc.nChains
self.pNum = -1
#self.mtNum = -1
self.weight = 1.0
self.tuning = None
self.tuningLimitHi = None
self.tuningLimitLo = None
self.tunings = {}
self.nProposals = [0] * theMcmc.nChains
self.nAcceptances = [0] * theMcmc.nChains
self.accepted = 0
self.topologyChanged = 0
self.nTopologyChangeAttempts = [0] * theMcmc.nChains
self.nTopologyChanges = [0] * theMcmc.nChains
self.doAbort = False
self.nAborts = [0] * theMcmc.nChains
self.tnSampleSize = 250
self.tnNSamples = [0] * theMcmc.nChains
self.tnNAccepts = [0] * theMcmc.nChains
self.tnAccVeryHi = None
self.tnAccHi = None
self.tnAccLo = None
self.tnAccVeryLo = None
self.tnFactorVeryHi = None
self.tnFactorHi = None
self.tnFactorLo = None
self.tnFactorVeryLo = None
self.prior = None
def dump(self):
print("proposal name=%-10s pNum=%2s, weight=%s, tuning=%s" % (
'%s,' % self.name, self.pNum, self.weight, self.tuning))
#print("proposal name=%-10s pNum=%2i, weight=%5.1f, tuning=%7.2f" % (
# '%s,' % self.name, self.pNum, self.weight, self.tuning))
#print(" nProposals by temperature: %s" % self.nProposals)
#print(" nAcceptances by temperature: %s" % self.nAcceptances)
def tune(self, tempNum):
assert self.tnSampleSize >= 100.
assert self.tnNSamples[tempNum] >= self.tnSampleSize
acc = self.tnNAccepts[tempNum] / self.tnNSamples[tempNum]
doMessage = False
if acc > self.tnAccHi:
oldTn = self.tuning[tempNum]
if acc > self.tnAccVeryHi:
self.tuning[tempNum] *= self.tnFactorVeryHi
else:
self.tuning[tempNum] *= self.tnFactorHi
doMessage = True
elif acc < self.tnAccLo:
oldTn = self.tuning[tempNum]
if acc < self.tnAccVeryLo:
self.tuning[tempNum] *= self.tnFactorVeryLo
else:
self.tuning[tempNum] *= self.tnFactorLo
doMessage = True
extraMessage = None
if doMessage and self.tuningLimitHi and (self.tuning[tempNum] > self.tuningLimitHi):
self.tuning[tempNum] = self.tuningLimitHi
extraMessage = " (tuningLimitHi)"
self.tnNSamples[tempNum] = 0
self.tnNAccepts[tempNum] = 0
if var.mcmc_logTunings and doMessage:
message = "%s tune gen=%i tempNum=%i acceptance=%.3f " % (self.name, self.mcmc.gen, tempNum, acc)
message += "(target %.3f -- %.3f) " % (self.tnAccLo, self.tnAccHi)
message += "Adjusting tuning from %g to %g" % (oldTn, self.tuning[tempNum])
if extraMessage:
message += extraMessage
#print(message)
self.mcmc.logger.info(message)
class Proposals(object):
def __init__(self):
self.proposals = []
self.topologyProposalsDict = {}
self.propWeights = []
self.cumPropWeights = []
self.totalPropWeights = 0.0
self.intended = None
self.pDict = {}
def summary(self):
print("There are %i proposals" % len(self.proposals))
for p in self.proposals:
print("proposal name=%-10s pNum=%2s, weight=%s, tuning=%s" % (
'%s,' % p.name, p.pNum, p.weight, p.tuning))
def calculateWeights(self):
gm = ["Proposals.calculateWeights()"]
self.propWeights = []
for p in self.proposals:
#print("%s: %s" % (p.name, p.weight))
self.propWeights.append(p.weight)
#print(self.propWeights)
self.cumPropWeights = [self.propWeights[0]]
for i in range(len(self.propWeights))[1:]:
self.cumPropWeights.append(
self.cumPropWeights[i - 1] + self.propWeights[i])
self.totalPropWeights = sum(self.propWeights)
if self.totalPropWeights < 1e-9:
gm.append("No proposal weights?")
raise P4Error(gm)
self.intended = self.propWeights[:]
for i in range(len(self.intended)):
self.intended[i] /= self.totalPropWeights
if math.fabs(sum(self.intended) - 1.0 > 1e-14):
raise P4Error("bad sum of intended proposal probs. %s" % sum(self.intended))
#print(self.intended)
def chooseProposal(self, equiProbableProposals):
if equiProbableProposals:
return random.choice(self.proposals)
else:
theRan = random.uniform(0.0, self.totalPropWeights)
for i in range(len(self.cumPropWeights)):
if theRan < self.cumPropWeights[i]:
break
return self.proposals[i]
def writeProposalIntendedProbs(self):
"""Tabulate the intended proposal probabilities"""
spacer = ' ' * 4
print("\nIntended proposal probabilities (%)")
print("There are %i proposals" % len(self.proposals))
print("%2s %11s %35s %5s %12s" % ('', 'intended(%)', 'proposal', 'part', 'tuning'))
for i in range(len(self.proposals)):
print("%2i" % i, end=' ')
p = self.proposals[i]
print(" %6.2f " % (100. * self.intended[i]), end=' ')
print(" %32s" % p.name, end=' ')
if p.pNum != -1:
print(" %3i " % p.pNum, end=' ')
else:
print(" - ", end=' ')
if p.tuning == None:
print(" %12s "% ' - ', end=' ')
else:
if p.tuning[0] < 0.1:
print(" %12.4g" % p.tuning[0], end=' ')
elif p.tuning[0] < 1.0:
print(" %12.4f" % p.tuning[0], end=' ')
elif p.tuning[0] < 10.0:
print(" %12.3f" % p.tuning[0], end=' ')
elif p.tuning[0] < 1000.0:
print(" %12.1f" % p.tuning[0], end=' ')
else:
print(" %12.2g " % p.tuning[0], end=' ')
print()
def writeTunings(self):
print("Proposal tunings:")
print("%20s %12s" % ("proposal name", "tuning"))
for p in self.proposals:
print("%20s" % p.name, end=' ')
if p.tuning:
# if p.tuning < 10.0:
# print("%12.3f" % p.tuning, end=' ')
# else:
# print("%12.1f" % p.tuning, end=' ')
print(p.tuning)
else:
print(" %4s " % '-', end=' ')
print()
class SwapTuner(object):
"""Continuous tuning for swap temperature, matrix version"""
def __init__(self, sampleSize):
assert sampleSize >= 100
self.sampleSize = sampleSize
self.swaps01_nAttempts = 0
self.swaps01_nSwaps = 0
self.tnAccVeryHi = 0.18
self.tnAccHi = 0.12
self.tnAccLo = 0.04
self.tnAccVeryLo = 0.01
self.tnFactorVeryHi = 1.4
self.tnFactorHi = 1.2
self.tnFactorLo = 0.9
self.tnFactorVeryLo = 0.6
self.tnFactorZero = 0.4
def tune(self, theMcmc):
"""A bad idea, at least as implemented. It is unstable."""
assert self.swaps01_nAttempts >= self.sampleSize
acc = float(self.swaps01_nSwaps) / self.swaps01_nAttempts # float() for Py2
#print("SwapTuner.tune() nSwaps %i, nAttemps %i, acc %s" % (
# self.swaps01_nSwaps, self.swaps01_nAttempts, acc))
doMessage = False
direction = None
if acc > self.tnAccHi:
oldTn = theMcmc.chainTemp
if acc > self.tnAccVeryHi:
theMcmc.chainTemp *= self.tnFactorVeryHi
else:
theMcmc.chainTemp *= self.tnFactorHi
doMessage = True
direction = 'Increase'
elif acc < self.tnAccLo:
oldTn = theMcmc.chainTemp
if acc == 0.0: # no swaps at all
theMcmc.chainTemp *= self.tnFactorZero
elif acc < self.tnAccVeryLo:
theMcmc.chainTemp *= self.tnFactorVeryLo
else:
theMcmc.chainTemp *= self.tnFactorLo
doMessage = True
direction = 'Decrease'
self.swaps01_nAttempts = 0
self.swaps01_nSwaps = 0
if doMessage:
message = "%s tune gen=%i acceptance=%.3f " % ('chainTemp', theMcmc.gen, acc)
message += "(target %.3f -- %.3f) " % (self.tnAccLo, self.tnAccHi)
message += "%s chainTemp from %g to %g" % (direction, oldTn, theMcmc.chainTemp)
#print(message)
theMcmc.logger.info(message)
class SwapTunerV(object):
"""Continuous tuning for swap temperature, vector version"""
def __init__(self, theMcmc):
assert var.mcmc_swapTunerSampleSize >= 100
self.mcmc = theMcmc
self.nChains = theMcmc.nChains
# These are for adjacent pairs. Eg for attempts between chains 0 and 1,
# we increment self.nAttempts[0], ie it is indexed with the lower number
# in the pair.
self.nAttempts = [0] * self.nChains
self.nSwaps = [0] * self.nChains
self.tnAccVeryHi = 0.30
self.tnAccHi = 0.25
self.tnAccLo = 0.10
self.tnAccVeryLo = 0.05
self.tnFactorVeryHi = 1.4
self.tnFactorHi = 1.2
self.tnFactorLo = 0.9
self.tnFactorVeryLo = 0.6
self.tnFactorZero = 0.4
#self.tnLimitHi = var.mcmc_swapTunerVTnLimitHi
#self.tnLimitLo = var.mcmc_swapTunerVTnLimitLo
def tune(self, theTempNum):
assert self.nAttempts[theTempNum] >= var.mcmc_swapTunerSampleSize
acc = float(self.nSwaps[theTempNum]) / self.nAttempts[theTempNum] # float() for Py2
# print("SwapTunerV.tune() theTempNum %i, nSwaps %i, nAttemps %i, acc %s" % (
# theTempNum, self.nSwaps[theTempNum], self.nAttempts[theTempNum], acc))
# print("tempDiffs %s" % self.mcmc.chainTempDiffs)
# print("temps %s" % self.mcmc.chainTemps)
tnLimitHi = 1.0e5
doMessage = False
direction = None
oldTn = self.mcmc.chainTempDiffs[theTempNum]
if acc > self.tnAccHi:
if self.mcmc.chainTempDiffs[theTempNum] >= tnLimitHi:
message = f"chainTemp tune gen={self.mcmc.gen} tempNum={theTempNum} diff %g >= tnLimitHi %g --- no change" % (
self.mcmc.chainTempDiffs[theTempNum], tnLimitHi)
self.mcmc.logger.info(message)
direction = "no change"
else:
if acc > self.tnAccVeryHi:
self.mcmc.chainTempDiffs[theTempNum] *= self.tnFactorVeryHi
else:
self.mcmc.chainTempDiffs[theTempNum] *= self.tnFactorHi
doMessage = True
direction = 'Increase'
elif acc < self.tnAccLo:
#if self.mcmc.chainTempDiffs[theTempNum] <= self.tnLimitLo:
# direction = "no change"
#else:
if acc == 0.0: # no swaps at all
self.mcmc.chainTempDiffs[theTempNum] *= self.tnFactorZero
elif acc < self.tnAccVeryLo:
self.mcmc.chainTempDiffs[theTempNum] *= self.tnFactorVeryLo
else:
self.mcmc.chainTempDiffs[theTempNum] *= self.tnFactorLo
doMessage = True
direction = 'Decrease'
self.nAttempts[theTempNum] = 0
self.nSwaps[theTempNum] = 0
if direction != "no change":
if doMessage:
message = "%s tune gen=%i tempNum=%i acceptance=%.3f " % ('chainTemp', self.mcmc.gen, theTempNum, acc)
message += "(target %.3f -- %.3f) " % (self.tnAccLo, self.tnAccHi)
message += "%s chainTempDiff from %g to %g" % (direction, oldTn, self.mcmc.chainTempDiffs[theTempNum])
#print(message)
self.mcmc.logger.info(message)
# Make chainTemps from chainTempDiffs
if self.mcmc.doHeatingHack:
self.mcmc.chainTemps = [self.mcmc.heatingHackTemperature]
else:
self.mcmc.chainTemps = [0.0]
for dNum in range(self.mcmc.nChains - 1):
self.mcmc.chainTemps.append(self.mcmc.chainTempDiffs[dNum] + self.mcmc.chainTemps[-1])
if doMessage:
message = "new chainTemps gen=%i " % (self.mcmc.gen)
for cT in self.mcmc.chainTemps:
message += "%10.2f" % cT
self.mcmc.logger.info(message)
class SimTempTemp(object):
def __init__(self, temp):
self.temp = temp
#self.pi = 1.0
#self.logPi = 0.0
self.logPiDiff = 0.0
self.occupancy = 0
self.nProposalsUp = 0
self.nAcceptedUp = 0
self.rValsUp = []
self.nProposalsDn = 0
self.nAcceptedDn = 0
self.rValsDn = []
[docs]class Mcmc(object):
"""An MCMC for molecular sequences.
aTree
The tree should have a model and data attached.
nChains
The number of chains in the MCMCMC, default 4
runNum
You may want to do more than one 'run' in the same directory,
to facilitate convergence testing. The first runNum would be
0, and samples, likelihoods, and checkPoints are written to
files with that number.
sampleInterval
Interval at which the cold chain is sampled,
including writing a tree, the logLike, and perhaps
doing a simulation.
checkPointInterval
Intervals at which the MCMC is checkpointed,
meaning that the whole thing is written to a pickle
file. You can re-start from a checkpoint, eg in the
event of a crash, or if you just want to make the
MCMC run longer. You can turn off checkPointing by
setting it to zero or None.
simulate
a 'binary' number from 1-31 that says what posterior
predictive simulation test quantities to collect.
This week, we have
1. multinomial log likelihood
2. bigXSquared
4. meanNCharsPerSite
8. c_m, the compStatFromCharFreqs (2 numbers per part--sim and curTree)
16. constant sites count (not proportion)
So if you say simulate=1, you get multinomial log
like. If you say simulate=2 you get bigXSquared.
If you say simulate=6 you get bigXSquared and
meanNCharsPerSite. If you say simulate=31 you get
all 5.
writePrams
write changeable model parameters to a file.
constraints
If you want to constrain the topology, say so with
a Constraints object here. A constraints object may
enforce multiple and nested constraints.
To prepare for a run, instantiate an Mcmc object::
m = Mcmc(t, sampleInterval=1000, checkPointInterval=200000)
To start it running, do this::
m.run(1000000) # Tell it the number of generations to do
You will want to make the last generation land on a
*checkPointInterval*. So in this case 1000000 is ok, but 900000 is
not.
As it runs, it saves trees and likelihoods at sampleInterval
intervals (actually whenever the current generation number is
evenly divisible by the sampleInterval).
Whenever the current generation number is evenly divisible by the
checkPointInterval it will write a checkPoint file. A checkPoint
file is the whole MCMC without the data, and without C-structs.
Using a checkPoint, you can re-start an Mcmc from the point you
left off. Or, in the event of a crash, you can restart from the
latest checkPoint. You can query checkPoints to get information
about how the chain has been running, and about convergence
diagnostics.
In order to restart the MCMC from the end of a previous run, you
need the data::
read('yourData.nex')
d = Data()
# read the last checkPoint file
m = func.unPickleMcmc(0, d) # runNum 0
m.run(20000)
Its that easy if your previous run finished properly. However, if
your previous run has crashed and you want to restart it from a
checkPoint, then you will need to repair the sample output files
to remove samples that were taken after the last checkPoint, but
before the crash. Fix the trees, likelihoods, prams, and sims.
(You probably do not need to beware of confusing gen (eg 9999) and
gen+1 (eg 10000) issues.) When you remove trees from the tree
files to restart, each tree file should have the word ``end;`` at
the end after the last tree, with a single return after that.
Other files (prams and so on) would end more simply with a single
return.
The checkPoints can help with convergence testing. To help with
that, you can use the McmcCheckPointReader class. It will print
out a table of average standard deviations of split supports
between 2 runs, or between 2 checkPoints from the same run. It
will print out tables of proposal acceptances to show whether they
change over the course of the MCMC.
Another way to monitor convergence of split supports is to use the
Trees method trackSplitSupports(). That method requires a
reference tree to tell it the splits that it should track; an
obvious reference tree to use is the consensus tree, but it need
not be; and the reference tree need not be fully resolved. Here
is how it might be used::
# read in all 1000 trees in the file
read('mcmc_trees_0.nex')
tt = Trees()
# Make a cons tree only from the last 500 trees
tt2 = Trees(trees=var.trees[500:])
tp = TreePartitions(tt2)
t = tp.consensus()
# Track the splits in the cons tree
tt.trackSplitsFromTree(t)
This makes a verbose output, and also writes the numbers to a file
in case you want to use them later.
To make a consensus from the trees from more than one run, you can
add trees to an existing TreePartitions object, like this::
tp = TreePartitions('mcmc_trees_0.nex', skip=500)
tp.read('mcmc_trees_1.nex', skip=500) # add trees from a second run
t = tp.consensus()
t.reRoot(t.node('OutgroupTaxon').parent)
for n in t.iterInternalsNoRoot():
n.name = '%.0f' % (100. * n.br.support)
t.writeNexus('cons.nex')
"""
def __init__(self, aTree, nChains=4, runNum=0, sampleInterval=100, checkPointInterval=10000, simulate=None, writePrams=True, constraints=None, verbose=True, simTemp=None, simTempMax=10.0):
gm = ['Mcmc.__init__()']
self.verbose = verbose
if aTree and aTree.model and aTree.data:
pass
else:
gm.append("The tree that you feed to this class should have a model and data attached.")
raise P4Error(gm)
self.isBiRoot = False
rootNChildren = aTree.root.getNChildren()
if var.mcmc_allowUnresolvedStartingTree:
if rootNChildren == 2:
self.isBiRoot = True
else:
if rootNChildren == 2:
ret = aTree.isFullyBifurcating(verbose=True, biRoot=True)
if not ret:
gm.append("The tree has a bifurcating root, but otherwise is not fully bifurcating.")
raise P4Error(gm)
self.isBiRoot = True
# Add a rooter node to aTree.nodes, not "in" the tree
n = Node()
n.isLeaf = 1
n.name = 'tempRooter'
n.nodeNum = var.NO_ORDER
aTree.nodes.append(n)
aTree.setPreAndPostOrder()
elif rootNChildren == 3:
ret = aTree.isFullyBifurcating(verbose=True, biRoot=False)
if not ret:
gm.append("The tree has a trifurcating root, but otherwise is not fully bifurcating.")
raise P4Error(gm)
self.isBiRoot = False
else:
gm.append("Mcmc only allows trifurcating or bifurcating roots. This tree root has %i children" % rootNChildren)
raise P4Error(gm)
self.tree = aTree
self.constraints = constraints
if self.constraints:
self.tree.makeSplitKeys()
assert isinstance(self.constraints, Constraints)
mySplitKeys = []
for n in self.tree.iterInternalsNoRoot():
mySplitKeys.append(n.br.splitKey)
# print "input tree splitKeys = %s" % mySplitKeys
for sk in self.constraints.constraints:
if sk not in mySplitKeys:
# self.tree.draw()
gm.append('Constraint %s' % p4.func.getSplitStringFromKey(sk, self.tree.nTax))
gm.append('is not in the starting tree.')
gm.append('Maybe you want to make a randomTree with constraints?')
raise P4Error(gm)
try:
nChains = int(nChains)
except (ValueError, TypeError):
gm.append("nChains should be an int, 1 or more. Got %s" % nChains)
raise P4Error(gm)
if nChains < 1:
gm.append("nChains should be an int, 1 or more. Got %s" % nChains)
raise P4Error(gm)
self.nChains = nChains
self.chains = []
self.gen = -1
self.startMinusOne = -1
self.chainTemp = 1.0
# If we are doing simulated tempering ...
self.simTemp = None
if simTemp:
try:
self.simTemp = int(simTemp)
except (ValueError, TypeError):
gm.append("If set, simTemp should be an int, 2 or more. Got %s" % simTemp)
raise P4Error(gm)
if self.simTemp < 2:
gm.append("If set, simTemp should be an int, 2 or more. Got %s" % simTemp)
raise P4Error(gm)
if self.nChains != 1:
gm.append("If simTemp is set, nChains should only be 1. Got %i" % self.nChains)
self.simTemp_temps = []
self.simTemp_curTemp = 0
self.simTemp_nTempChangeProposals = 0
self.simTemp_nTempChangeAccepts = 0
self.simTemp_tNums = []
self.simTemp_tempChangeProposeFreq = 1
self.simTemp_tunerSamples = []
self.simTemp_tunerSampleSize = 100
self.simTemp_longTNumSampleSize = 5000
if self.simTemp:
self.simTemp_longSampleTunings = [1.0] * self.simTemp
try:
thisSimTempMax = float(simTempMax)
except (ValueError, TypeError):
gm.append("If doing simTemp, simTempMax should be set to a float more than zero. Got %s" % simTempMax)
raise P4Error(gm)
if thisSimTempMax <= 0.0:
gm.append("If doing simTemp, simTempMax should be set to a float more than zero. Got %s" % simTempMax)
raise P4Error(gm)
self.simTempMax = thisSimTempMax
if 0:
# This makes the temperatures evenly spaced.
stepSize = self.simTempMax / (self.simTemp - 1)
self.simTemp_temps = [SimTempTemp(0.0)]
for tNum in range(1,self.simTemp):
tmp = SimTempTemp(tNum * stepSize)
self.simTemp_temps.append(tmp)
if 1:
# This makes more small temperatures and fewer big temperatures
# The bigger the logBase, the more curvey the curve. Smaller is more linear
logBase = var.mcmc_simTemp_tempCurveLogBase
factor = self.simTempMax / (math.pow(logBase, self.simTemp - 1) - 1.0)
self.simTemp_temps = [SimTempTemp(0.0)]
for tNum in range(1,self.simTemp):
tmp = SimTempTemp((math.pow(logBase, tNum) - 1.) * factor)
self.simTemp_temps.append(tmp)
if 0:
print("Initial settings of temperatures in Mcmc.__init__()")
for tNum,tmp in enumerate(self.simTemp_temps):
print("%2i %10.3f" % (tNum, tmp.temp))
self.simTemp_longTNumSample = deque([-1] * self.simTemp_longTNumSampleSize)
# Check that branch lengths are neither too short nor too long
for n in self.tree.iterNodesNoRoot():
if n.br.len < var.BRLEN_MIN:
gm.append("Mcmc.__init__() node %i brlen (%g)is too short." %
(n.nodeNum, n.br.len))
raise P4Error(gm)
elif n.br.len > var.BRLEN_MAX:
gm.append("Mcmc.__init__() node %i brlen (%f)is too long." %
(n.nodeNum, n.br.len))
raise P4Error(gm)
# Get the run number
try:
runNum = int(runNum)
except (ValueError, TypeError):
gm.append("runNum should be an int, 0 or more. Got %s" % runNum)
raise P4Error(gm)
if runNum < 0:
gm.append("runNum should be an int, 0 or more. Got %s" % runNum)
raise P4Error(gm)
self.runNum = runNum
self.loggerPrinter = None
self.logger = None
self._setLogger()
# Check that we are not going to over-write good stuff
ff = os.listdir(os.getcwd())
hasPickle = False
for fName in ff:
if fName.startswith("mcmc_checkPoint_%i." % self.runNum):
hasPickle = True
break
if hasPickle:
gm.append("runNum is set to %i" % self.runNum)
gm.append("There is at least one mcmc_checkPoint_%i.xxx file in this directory." % self.runNum)
gm.append("This is a new Mcmc, and I am refusing to over-write exisiting files.")
gm.append("Maybe you want to re-start from the latest mcmc_checkPoint_%i file?" % self.runNum)
gm.append("Otherwise, get rid of the existing mcmc_xxx_%i.xxx files and start again." % self.runNum)
raise P4Error(gm)
if var.strictRunNumberChecking:
# We want to start runs with number 0, so if runNum is more than
# that, check that there are other runs.
if self.runNum > 0:
for runNum2 in range(self.runNum):
hasTrees = False
for fName in ff:
if fName.startswith("mcmc_trees_%i" % runNum2):
hasTrees = True
break
if not hasTrees:
gm.append("runNum is set to %i" % self.runNum)
gm.append("runNums should go from zero up.")
gm.append("There are no mcmc_trees_%i.nex files to show that run %i has been done." % (
runNum2, runNum2))
gm.append("Set the runNum to that, first.")
gm.append("(To get rid of this requirement, turn off var.strictRunNumberChecking.)")
raise P4Error(gm)
self.sampleInterval = sampleInterval
self.checkPointInterval = checkPointInterval
self.props = Proposals()
self.tunableProps = """allBrLens allCompsDir brLen compDir
gdasrv local ndch2_internalCompsDir ndch2_root3n_internalCompsDir
ndch2_internalCompsDirAlpha ndch2_leafCompsDir
ndch2_leafCompsDirAlpha pInvar rMatrixDir allRMatricesDir relRate """.split()
# maybeTunableButNotNow compLocation eTBR polytomy root3 root3n rMatrixLocation root2
self.treePartitions = None
self.likesFileName = "mcmc_likes_%i" % runNum
self.treeFileName = "mcmc_trees_%i.nex" % runNum
self.simFileName = "mcmc_sims_%i" % runNum
self.pramsFileName = "mcmc_prams_%i" % runNum
self.hypersFileName = "mcmc_hypers_%i" % runNum
self.simTempFileName = "mcmc_simTemp_%i" % runNum
self.ssLikesFileName = "mcmc_ssLikes_%i" % runNum
self.writePrams = writePrams
self.writeHypers = True
self.lastTimeCheck = None
if simulate:
try:
simulate = int(simulate)
except (ValueError, TypeError):
gm.append("Arg 'simulate' should be an int, 1-31, inclusive.")
raise P4Error(gm)
if simulate <= 0 or simulate > 31:
gm.append("Arg 'simulate' should be an int, 1-31, inclusive.")
raise P4Error(gm)
self.simulate = simulate
if self.simulate:
self.simTree = self.tree.dupe()
self.simTree.data = self.tree.data.dupe()
self.simTree.calcLogLike(verbose=False)
else:
self.simTree = None
if self.nChains > 1:
self.swapMatrix = []
for i in range(self.nChains):
self.swapMatrix.append([0] * self.nChains)
else:
self.swapMatrix = None
self.swapTuner = None
self.stickyRootComp = False
# check the tree, and tree+model+data
if not aTree.taxNames:
gm.append("The tree that you supply should have a 'taxNames' attribute.")
gm.append("The taxNames should be in the same order as the data.")
raise P4Error(gm)
aTree.calcLogLike(verbose=False)
if 0:
# print complaintHead
print(" logLike of the input tree is %s" % aTree.logLike)
# Default tunings
self._tunings = McmcTunings(self.tree.model.nParts)
self.polytomyUseResolutionClassPrior = False
self.polytomyPriorLogBigC = 0.0
self.prob = McmcProposalProbs()
# New tunings --- compDir and rMatrixDir, seem to depend on the dim.
# And now allCompsDir
for pNum in range(self._tunings.nParts):
theDim = self.tree.model.parts[pNum].dim
nRates = ((theDim * theDim) - theDim) / 2
self._tunings.parts[pNum].default['compDir'] = 50. * theDim
self._tunings.parts[pNum].default['allCompsDir'] = 100. * theDim
self._tunings.parts[pNum].default['ndch2_leafCompsDir'] = 2000. * theDim
self._tunings.parts[pNum].default['ndch2_internalCompsDir'] = 500. * theDim
self._tunings.parts[pNum].default['ndch2_root3n_internalCompsDir'] = 500. * theDim
self._tunings.parts[pNum].default['rMatrixDir'] = 50. * nRates
self._tunings.parts[pNum].default['allRMatricesDir'] = 100. * nRates
# Zap internal node names
for n in aTree.root.iterInternals():
if n.name:
n.name = None
# If we need relRate, turn it on, and say so.
if self.tree.model.nParts > 1 and self.tree.model.relRatesAreFree:
self.prob.relRate = 1.0
if self.verbose:
print("\nInitiating across-data heterogeneous model...")
print("\n%23s" % "Additional proposals:")
print(" relative partition rate = on")
print("\n %s" % "[You can turn it off by setting")
print(" %s" % "yourMcmc.prob.relRate=0.0]")
else:
self.prob.relRate = 0.0
nNodes = len(list(self.tree.iterNodes()))
for pNum in range(self.tree.model.nParts):
mp = self.tree.model.parts[pNum]
dp = self.tree.data.parts[pNum]
if mp.ndch2:
if mp.nComps != nNodes:
gm.append("Model part %i, ndch2 is on, nNodes is %i, nComps is %i" % (
pNum, nNodes, mp.nComps))
gm.append("For ndch2 there should be one comp for each node.")
raise P4Error(gm)
if mp.nRMatrices > 1:
gm.append("Model part %i, ndch2 is on, nNodes is %i, nRMatrices is %i" % (
pNum, nNodes, mp.nRMatrices))
gm.append("This week, for ndch2 there should be only one rMatrix.")
raise P4Error(gm)
if mp.nGdasrvs > 1:
gm.append("Model part %i, ndch2 is on, nNodes is %i, nGdasrvs is %i" % (
pNum, nNodes, mp.nGdasrvs))
gm.append("This week, for ndch2 there should be only one gdasrv.")
raise P4Error(gm)
for n in self.tree.iterNodes():
assert n.nodeNum == n.parts[pNum].compNum
# Set leaf node mt.empiricalComp
for n in self.tree.iterLeavesNoRoot():
mt = mp.comps[n.parts[pNum].compNum]
# print(n.name, mt.val, "seqNum", n.seqNum)
# print("part[seq] composition:", dp.composition([n.seqNum]))
mt.empiricalComp = numpy.array(dp.composition([n.seqNum]))
while mt.empiricalComp.min() < var.PIVEC_MIN:
for i in range(mp.dim):
if mt.empiricalComp[i] < var.PIVEC_MIN:
mt.empiricalComp[i] += (1.0 + (1.1 * random.random())) * var.PIVEC_MIN
mt.empiricalComp /= mt.empiricalComp.sum()
mp.ndch2_globalComp = numpy.array(dp.composition())
while mp.ndch2_globalComp.min() < var.PIVEC_MIN:
for i in range(mp.dim):
if mp.ndch2_globalComp[i] < var.PIVEC_MIN:
mp.ndch2_globalComp[i] += (1.0 + (1.1 * random.random())) * var.PIVEC_MIN
mp.ndch2_globalComp /= mp.ndch2_globalComp.sum()
# ususal comp proposals should not be on if we are doing ndch2
self.prob.compDir = 0.0
self.prob.allCompsDir = 0.0
if self.tree.model.isHet:
props_on = []
if verbose:
print("\nInitiating across-tree heterogeneous model...")
self.tree.setModelComponentsNNodes()
if self.verbose:
self.tree.summarizeModelComponentsNNodes()
for pNum in range(self.tree.model.nParts):
mp = self.tree.model.parts[pNum]
if mp.nComps > 1 and mp.nComps < nNodes:
self.prob.compLocation = 1.0
if "composition location" not in props_on:
props_on.append("composition location")
if mp.nRMatrices > 1 and mp.nRMatrices < nNodes:
self.prob.rMatrixLocation = 1.0
if "rate matrix location" not in props_on:
props_on.append("rate matrix location")
if mp.nGdasrvs > 1 and mp.nGdasrvs < nNodes:
self.prob.gdasrvLocation = 1.0
if "gamma rate location" not in props_on:
props_on.append("gamma rate location")
if mp.ndch2:
self.prob.ndch2_leafCompsDir = 1.0
thisMString = "ndch2_leafCompsDir"
if thisMString not in props_on:
props_on.append(thisMString)
self.prob.ndch2_internalCompsDir = 1.0
thisMString = "ndch2_internalCompsDir"
if thisMString not in props_on:
props_on.append(thisMString)
self.prob.ndch2_leafCompsDirAlpha = 1.0
thisMString = "ndch2_leafCompsDirAlpha"
if thisMString not in props_on:
props_on.append(thisMString)
self.prob.ndch2_internalCompsDirAlpha = 1.0
thisMString = "ndch2_internalCompsDirAlpha"
if thisMString not in props_on:
props_on.append(thisMString)
if 0:
# experimental. doesn't work well
self.prob.ndch2_root3n_internalCompsDir = 1.0
thisMString = "ndch2_root3n_internalCompsDir"
if thisMString not in props_on:
props_on.append(thisMString)
if self.isBiRoot:
self.prob.root2 = 1.0
thisMString = "root2 (root location)"
if thisMString not in props_on:
props_on.append(thisMString)
else:
self.prob.root3 = 1.0
thisMString = "root3 (root location)"
if thisMString not in props_on:
props_on.append(thisMString)
self.prob.root3n = 1.0
thisMString = "root3n (root location, neighbours)"
if thisMString not in props_on:
props_on.append(thisMString)
if verbose:
print("\n%23s" % "Additional proposals:")
for prop in props_on:
print("%30s = on" % prop)
print("\n %s" % "[You can of course turn them")
print(" %s" % "off again before Mcmc.run()]\n")
else:
self.prob.root3 = 0.0
self.prob.root3n = 0.0
self.prob.root2 = 0.0
self.prob.compLocation = 0.0
self.prob.rMatrixLocation = 0.0
#self.prob.gdasrvLocation = 0.0
splash = p4.func.splash2(verbose=False)
for aLine in splash:
print(aLine)
self.logger.info(aLine)
# Check the data for blank sequences, partition by partition
hasBlanks = False
blankSeqNums = []
for partNum in range(self.tree.data.nParts):
p = self.tree.data.parts[partNum]
partBlankSeqNums = []
for taxNum in range(self.tree.data.nTax):
nSites = pf.partSequenceSitesCount(p.cPart, taxNum) # no gaps, no missings
# print(f"Mcmc.__init__() partNum {partNum} taxNum {taxNum} nSites {nSites}")
if not nSites:
partBlankSeqNums.append(taxNum)
if partBlankSeqNums:
hasBlanks = True
blankSeqNums.append(partBlankSeqNums)
if hasBlanks:
self.logger.info("Blank sequences were found. For each partition, the sequence numbers are ---")
self.logger.info(f"{blankSeqNums}")
self.logger.info("These will be skipped in bigXSq simulations.")
self.blankSeqNums = blankSeqNums
else:
self.blankSeqNums = None
self.swapVector = True
if self.nChains > 1:
print("%-16s: %s" % ('swapVector', "on"))
self.swapTuner = SwapTunerV(self)
print("%-16s: %s" % ('swapTuner', "on"))
# Hidden experimental hacking
self.doHeatingHack = False
self.heatingHackTemperature = 5.0
self.originalHeatingHackTemperature = 5.0
self.heatingHackRamper = None
# Whether logging from the Pf module is turned on.
# When it is turned on, a callback is set up to self._logFromPfModule()
self.setupPfLogging = False
# Whether to do root3 and root3n tuning or not
# self.doRoot3Tuning = False
# self.doRoot3nTuning = False
if not var.gsl_rng:
var.gsl_rng = pf.gsl_rng_get()
pf.gsl_rng_set(var.gsl_rng, int(time.time()))
the_gsl_rng_size = pf.gsl_rng_size(var.gsl_rng) # size of the state
# A place to store the state. Empty to start. It is stored during a checkpoint.
self.gsl_rng_state_ndarray = numpy.array(['0'] * the_gsl_rng_size, numpy.dtype('B')) # B is unsigned byte
self.randomState = None # for python random library
# For re-starts, if these values have changed or have been forgotten, allow checking and warning in Mcmc.run()
self.init_PIVEC_MIN = var.PIVEC_MIN
self.init_RATE_MIN = var.RATE_MIN
self.init_BRLEN_MIN = var.BRLEN_MIN
self.init_GAMMA_SHAPE_MIN = var.GAMMA_SHAPE_MIN
# For stepping stone power posteriors
self.doSteppingStone = False
self.ssBeta = None
def _setLogger(self):
"""Make a logger."""
# Need to reset, or else basicConfig will be ignored
log = logging.getLogger() # root logger
for hdlr in log.handlers[:]: # remove all old handlers
log.removeHandler(hdlr)
logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(message)s',
datefmt='[%Y-%m-%d %H:%M]',
filename="mcmc_log_%i" % self.runNum,
filemode='a')
# This logger only logs to the file, not to stderr.
self.logger = logging.getLogger("logFileOnly")
def _logFromPfModule(self, treeAddress, message):
tinfo = None
for chNum,ch in enumerate(self.chains):
if ch.curTree.cTree == treeAddress:
tinfo = "gen %i, chain %i, curTree; " % (self.gen, chNum)
elif ch.propTree.cTree == treeAddress:
tinfo = "gen %i, chain %i, propTree; " % (self.gen, chNum)
if not tinfo:
tinfo = "unknown tree; "
message = tinfo + message
#print(treeAddress, message)
self.logger.info(message)
def _makeProposals(self):
"""Make proposals for the mcmc."""
gm = ['Mcmc._makeProposals()']
# The weight determines how often proposals are made. The
# weight is the product of 4 things:
#
# 1. User-settable self.prob.something. Its going to be
# 1.0 by default, if it is on.
#
# 2. The inherent complexity of the proposal. Eg if there
# is only one parameter, like pInvar, that would have a
# complexity of 1, while a DNA composition proposal
# would have a complexity of 3, and a protein
# composition would have a complexity of 19.
#
# 3. In the NDCH or NDRH model, the number of comps or rMatrices in
# the partition. So two comps doubles the weight compared to one
# comp.
#
# 4. A fudge factor. Arbitrary witchcraft.
#
# So for example, for the 'local' proposal, the inherent
# complexity is the number of nodes in the tree. That is the
# same complexity as 'root3', but I don't think that root3
# needs to be proposed nearly as often as local, so I upweight
# local using a fudgeFactor. So the weight will be
# p.weight = self.prob.local * len(self.tree.nodes) * fudgeFactor['local']
# brLen
if self.prob.brLen:
p = Proposal(self)
p.name = 'brLen'
p.tuning = [self._tunings.default[p.name]] * self.nChains
p.brLenPriorType = self._tunings.default['brLenPriorType']
p.brLenPriorLambda = self._tunings.default['brLenPriorLambda']
p.weight = self.prob.brLen * \
(len(self.tree.nodes) - 1) * fudgeFactor['brLen']
p.tnAccVeryHi = 0.7
p.tnAccHi = 0.6
p.tnAccLo = 0.1
p.tnAccVeryLo = 0.05
p.tnFactorVeryHi = 1.6
p.tnFactorHi = 1.2
p.tnFactorLo = 0.8
p.tnFactorVeryLo = 0.7
self.props.proposals.append(p)
# allBrLens
if self.prob.allBrLens:
p = Proposal(self)
p.name = 'allBrLens'
p.tuning = [self._tunings.default[p.name]] * self.nChains
p.tuningLimitHi = 0.2
p.brLenPriorType = self._tunings.default['brLenPriorType']
p.brLenPriorLambda = self._tunings.default['brLenPriorLambda']
p.weight = self.prob.allBrLens * \
(len(self.tree.nodes) - 1) * fudgeFactor['allBrLens']
p.tnAccVeryHi = 0.4
p.tnAccHi = 0.15
p.tnAccLo = 0.05
p.tnAccVeryLo = 0.03
p.tnFactorVeryHi = 1.6
p.tnFactorHi = 1.2
p.tnFactorLo = 0.8
p.tnFactorVeryLo = 0.7
self.props.proposals.append(p)
# eTBR
if self.prob.eTBR:
p = Proposal(self)
p.name = 'eTBR'
p.etbrLambda = self._tunings.default['etbrLambda']
p.etbrPExt = self._tunings.default['etbrPExt']
p.brLenPriorType = self._tunings.default['brLenPriorType']
p.brLenPriorLambda = self._tunings.default['brLenPriorLambda']
p.weight = self.prob.eTBR * \
(len(self.tree.nodes) - 1) * fudgeFactor['eTBR']
self.props.proposals.append(p)
# local
if self.prob.local:
p = Proposal(self)
p.name = 'local'
p.tuning = [self._tunings.default[p.name]] * self.nChains
p.brLenPriorType = self._tunings.default['brLenPriorType']
p.brLenPriorLambda = self._tunings.default['brLenPriorLambda']
p.weight = self.prob.local * \
(len(self.tree.nodes) - 1) * fudgeFactor['local']
p.tnAccVeryHi = 0.7
p.tnAccHi = 0.6
p.tnAccLo = 0.05
p.tnAccVeryLo = 0.03
p.tnFactorVeryHi = 1.6
p.tnFactorHi = 1.2
p.tnFactorLo = 0.8
p.tnFactorVeryLo = 0.7
self.props.proposals.append(p)
# polytomy
if self.prob.polytomy:
p = Proposal(self)
p.name = 'polytomy'
p.brLenPriorType = self._tunings.default['brLenPriorType']
p.brLenPriorLambda = self._tunings.default['brLenPriorLambda']
p.polytomyUseResolutionClassPrior = self.polytomyUseResolutionClassPrior
p.polytomyPriorLogBigC = self.polytomyPriorLogBigC
p.weight = self.prob.polytomy * \
(len(self.tree.nodes) - 1) * fudgeFactor['polytomy']
self.props.proposals.append(p)
# root3
if self.prob.root3:
if len(self.tree.taxNames) <= 3:
pass
else:
p = Proposal(self)
p.name = 'root3'
#p.tuning = [self._tunings.default[p.name]] * self.nChains
p.weight = self.prob.root3 * \
self.tree.nInternalNodes * fudgeFactor['root3']
p.tnAccVeryHi = 0.4
p.tnAccHi = 0.3
p.tnAccLo = 0.05
p.tnAccVeryLo = 0.03
p.tnFactorVeryHi = 0.7
p.tnFactorHi = 0.8
p.tnFactorLo = 1.2
p.tnFactorVeryLo = 1.6
self.props.proposals.append(p)
# root3n
if self.prob.root3n:
if len(self.tree.taxNames) <= 3:
pass
else:
p = Proposal(self)
p.name = 'root3n'
#p.tuning = [self._tunings.default[p.name]] * self.nChains
p.weight = self.prob.root3n * \
self.tree.nInternalNodes * fudgeFactor['root3n']
p.tnAccVeryHi = 0.5
p.tnAccHi = 0.4
p.tnAccLo = 0.1
p.tnAccVeryLo = 0.06
p.tnFactorVeryHi = 0.7
p.tnFactorHi = 0.8
p.tnFactorLo = 1.2
p.tnFactorVeryLo = 1.6
self.props.proposals.append(p)
# root2
if self.prob.root2:
if len(self.tree.taxNames) <= 3:
pass
else:
p = Proposal(self)
p.name = 'root2'
p.tuning = [self._tunings.default[p.name]] * self.nChains
p.weight = self.prob.root2 * self.tree.nInternalNodes * fudgeFactor['root3n']
p.tnAccVeryHi = 0.5
p.tnAccHi = 0.4
p.tnAccLo = 0.1
p.tnAccVeryLo = 0.06
p.tnFactorVeryHi = 1.6
p.tnFactorHi = 1.2
p.tnFactorLo = 0.8
p.tnFactorVeryLo = 0.7
self.props.proposals.append(p)
# relRate
if self.prob.relRate:
if self.tree.model.nParts == 1:
pass
if self.tree.model.doRelRates and self.tree.model.relRatesAreFree:
p = Proposal(self)
p.name = 'relRate'
p.tuning = [self._tunings.default[p.name]] * self.nChains
p.weight = self.prob.relRate * self.tree.model.nParts
p.tnAccVeryHi = 0.7
p.tnAccHi = 0.6
p.tnAccLo = 0.1
p.tnAccVeryLo = 0.06
p.tnFactorVeryHi = 1.6
p.tnFactorHi = 1.2
p.tnFactorLo = 0.8
p.tnFactorVeryLo = 0.7
self.props.proposals.append(p)
# comp, rMatrix, gdasrv, pInvar, modelComponentLocations ...
for pNum in range(self.tree.model.nParts):
mp = self.tree.model.parts[pNum]
# # comp
# if self.prob.comp:
# if mp.comps and mp.comps[0].free:
# for mtNum in range(mp.nComps):
# assert mp.comps[mtNum].free
# p = Proposal(self)
# p.name = 'comp'
# p.tuning = self._tunings.parts[pNum].default[p.name]
# p.weight = self.prob.comp * (mp.dim - 1) * mp.nComps
# p.pNum = pNum
# p.tnAccVeryHi = 0.7
# p.tnAccHi = 0.6
# p.tnAccLo = 0.1
# p.tnAccVeryLo = 0.06
# p.tnFactorVeryHi = 1.6
# p.tnFactorHi = 1.2
# p.tnFactorLo = 0.8
# p.tnFactorVeryLo = 0.7
# self.props.proposals.append(p)
# compDir
if self.prob.compDir:
if mp.comps and mp.comps[0].free:
for mtNum in range(mp.nComps):
assert mp.comps[mtNum].free
p = Proposal(self)
p.name = 'compDir'
p.tuning = [self._tunings.parts[pNum].default[p.name]] * self.nChains
p.weight = self.prob.compDir * (mp.dim - 1) * mp.nComps
p.pNum = pNum
p.tnAccVeryHi = 0.4
p.tnAccHi = 0.15
p.tnAccLo = 0.05
p.tnAccVeryLo = 0.03
p.tnFactorVeryHi = 0.7
p.tnFactorHi = 0.8
p.tnFactorLo = 1.2
p.tnFactorVeryLo = 1.6
self.props.proposals.append(p)
# allCompsDir
if self.prob.allCompsDir:
if mp.comps and mp.comps[0].free:
for mtNum in range(mp.nComps):
assert mp.comps[mtNum].free
p = Proposal(self)
p.name = 'allCompsDir'
p.tuning = [self._tunings.parts[pNum].default[p.name]] * self.nChains
p.weight = self.prob.allCompsDir * (mp.dim - 1) * mp.nComps * fudgeFactor['allCompsDir']
p.pNum = pNum
p.tnAccVeryHi = 0.4
p.tnAccHi = 0.15
p.tnAccLo = 0.05
p.tnAccVeryLo = 0.03
p.tnFactorVeryHi = 0.7
p.tnFactorHi = 0.8
p.tnFactorLo = 1.2
p.tnFactorVeryLo = 1.4
self.props.proposals.append(p)
# ndch2_leafCompsDir
if self.prob.ndch2_leafCompsDir:
for mtNum in range(mp.nComps):
assert mp.comps[mtNum].free
p = Proposal(self)
p.name = 'ndch2_leafCompsDir'
p.tuning = [self._tunings.parts[pNum].default[p.name]] * self.nChains
p.weight = self.prob.ndch2_leafCompsDir * (mp.dim - 1) * mp.nComps * fudgeFactor['ndch2comp']
p.pNum = pNum
p.tnAccVeryHi = 0.4
p.tnAccHi = 0.15
p.tnAccLo = 0.05
p.tnAccVeryLo = 0.03
p.tnFactorVeryHi = 0.7
p.tnFactorHi = 0.8
p.tnFactorLo = 1.2
p.tnFactorVeryLo = 1.4
self.props.proposals.append(p)
# ndch2_internalCompsDir
if self.prob.ndch2_internalCompsDir:
for mtNum in range(mp.nComps):
assert mp.comps[mtNum].free
p = Proposal(self)
p.name = 'ndch2_internalCompsDir'
p.tuning = [self._tunings.parts[pNum].default[p.name]] * self.nChains
p.weight = self.prob.ndch2_internalCompsDir * (mp.dim - 1) * mp.nComps * fudgeFactor['ndch2comp']
p.pNum = pNum
p.tnAccVeryHi = 0.4
p.tnAccHi = 0.15
p.tnAccLo = 0.05
p.tnAccVeryLo = 0.03
p.tnFactorVeryHi = 0.7
p.tnFactorHi = 0.8
p.tnFactorLo = 1.2
p.tnFactorVeryLo = 1.4
self.props.proposals.append(p)
# # ndch2_root3n_internalCompsDir, only for single data-partition runs
# if self.prob.ndch2_root3n_internalCompsDir:
# p = Proposal(self)
# p.name = 'ndch2_root3n_internalCompsDir'
# p.tuning = [self._tunings.parts[pNum].default[p.name]] * self.nChains
# p.weight = self.prob.ndch2_root3n_internalCompsDir * (mp.dim - 1) * mp.nComps * fudgeFactor['ndch2root3nInternalCompsDir']
# p.pNum = pNum
# p.tnAccVeryHi = 0.4
# p.tnAccHi = 0.15
# p.tnAccLo = 0.05
# p.tnAccVeryLo = 0.03
# p.tnFactorVeryHi = 0.7
# p.tnFactorHi = 0.8
# p.tnFactorLo = 1.2
# p.tnFactorVeryLo = 1.4
# self.props.proposals.append(p)
# ndch2_leafCompsDirAlpha
if self.prob.ndch2_leafCompsDirAlpha:
p = Proposal(self)
p.name = 'ndch2_leafCompsDirAlpha'
p.tuning = [self._tunings.parts[pNum].default[p.name]] * self.nChains
p.weight = self.prob.ndch2_leafCompsDirAlpha * (mp.dim - 1) * mp.nComps * fudgeFactor['ndch2alpha']
p.pNum = pNum
p.tnAccVeryHi = 0.7
p.tnAccHi = 0.6
p.tnAccLo = 0.1
p.tnAccVeryLo = 0.06
p.tnFactorVeryHi = 1.6
p.tnFactorHi = 1.2
p.tnFactorLo = 0.8
p.tnFactorVeryLo = 0.7
self.props.proposals.append(p)
# ndch2_internalCompsDirAlpha
if self.prob.ndch2_internalCompsDirAlpha:
p = Proposal(self)
p.name = 'ndch2_internalCompsDirAlpha'
p.tuning = [self._tunings.parts[pNum].default[p.name]] * self.nChains
p.weight = self.prob.ndch2_internalCompsDirAlpha * (mp.dim - 1) * mp.nComps * fudgeFactor['ndch2alpha']
p.pNum = pNum
p.tnAccVeryHi = 0.7
p.tnAccHi = 0.6
p.tnAccLo = 0.1
p.tnAccVeryLo = 0.06
p.tnFactorVeryHi = 1.6
p.tnFactorHi = 1.2
p.tnFactorLo = 0.8
p.tnFactorVeryLo = 0.7
self.props.proposals.append(p)
# rMatrixDir
if self.prob.rMatrixDir:
if mp.rMatrices and mp.rMatrices[0].free:
for mtNum in range(mp.nRMatrices):
assert mp.rMatrices[mtNum].free
p = Proposal(self)
p.name = 'rMatrixDir'
if mp.rMatrices[mtNum].spec == '2p':
p.weight = self.prob.rMatrixDir # Is this enough?
p.variant = '2p'
p.tuning = [self._tunings.parts[pNum].default['twoP']] * self.nChains
else:
p.tuning = [self._tunings.parts[pNum].default[p.name]] * self.nChains
p.weight = self.prob.rMatrixDir * mp.nRMatrices * \
((((mp.dim * mp.dim) - mp.dim) / 2) - 1)
p.pNum = pNum
p.tnAccVeryHi = 0.4
p.tnAccHi = 0.15
p.tnAccLo = 0.05
p.tnAccVeryLo = 0.03
p.tnFactorVeryHi = 0.7
p.tnFactorHi = 0.8
p.tnFactorLo = 1.2
p.tnFactorVeryLo = 1.4
self.props.proposals.append(p)
# allRMatricesDir
if self.prob.allRMatricesDir:
if mp.rMatrices and mp.rMatrices[0].free:
for mtNum in range(mp.nRMatrices):
assert mp.rMatrices[mtNum].free
p = Proposal(self)
p.name = 'allRMatricesDir'
if mp.rMatrices[mtNum].spec == '2p':
raise P4Error("Proposal allRMatricesDir is not yet working for 2p. Fixme.")
else:
p.tuning = [self._tunings.parts[pNum].default[p.name]] * self.nChains
p.weight = self.prob.allRMatricesDir * mp.nRMatrices * \
((((mp.dim * mp.dim) - mp.dim) / 2) - 1) * fudgeFactor['allRMatricesDir']
p.pNum = pNum
p.tnAccVeryHi = 0.4
p.tnAccHi = 0.15
p.tnAccLo = 0.05
p.tnAccVeryLo = 0.03
p.tnFactorVeryHi = 0.7
p.tnFactorHi = 0.8
p.tnFactorLo = 1.2
p.tnFactorVeryLo = 1.4
self.props.proposals.append(p)
# gdasrv
if self.prob.gdasrv:
if mp.nGdasrvs and mp.gdasrvs[0].free:
assert mp.nGdasrvs == 1
p = Proposal(self)
p.name = 'gdasrv'
p.tuning = [self._tunings.parts[pNum].default[p.name]] * self.nChains
p.weight = self.prob.gdasrv
p.pNum = pNum
p.tnAccVeryHi = 0.7
p.tnAccHi = 0.6
p.tnAccLo = 0.15
p.tnAccVeryLo = 0.10
p.tnFactorVeryHi = 2.0
p.tnFactorHi = 1.5
p.tnFactorLo = 0.75
p.tnFactorVeryLo = 0.5
self.props.proposals.append(p)
# pInvar
if self.prob.pInvar:
if mp.pInvar.free:
p = Proposal(self)
p.name = 'pInvar'
p.tuning = [self._tunings.parts[pNum].default[p.name]] * self.nChains
p.weight = self.prob.pInvar
p.pNum = pNum
p.tnAccVeryHi = 0.7
p.tnAccHi = 0.6
p.tnAccLo = 0.1
p.tnAccVeryLo = 0.06
p.tnFactorVeryHi = 1.6
p.tnFactorHi = 1.2
p.tnFactorLo = 0.8
p.tnFactorVeryLo = 0.7
self.props.proposals.append(p)
# compLocation
if self.prob.compLocation:
if mp.nComps > 1:
p = Proposal(self)
p.name = 'compLocation'
p.tuning = None #self._tunings.parts[pNum].default[p.name]
p.weight = self.prob.compLocation * mp.nComps * len(self.tree.nodes) * \
fudgeFactor['compLocation']
p.pNum = pNum
self.props.proposals.append(p)
# rMatrixLocation
if self.prob.rMatrixLocation:
if mp.nRMatrices > 1:
p = Proposal(self)
p.name = 'rMatrixLocation'
p.tuning = None #self._tunings.parts[pNum].default[p.name]
p.weight = self.prob.rMatrixLocation * mp.nRMatrices * (len(self.tree.nodes) - 1) * \
fudgeFactor['rMatrixLocation']
p.pNum = pNum
self.props.proposals.append(p)
# # gdasrvLocation
# if self.prob.gdasrvLocation:
# if mp.nGdasrvs > 1:
# p = Proposal(self)
# p.name = 'gdasrvLocation'
# p.weight = self.prob.gdasrvLocation * mp.nGdasrvs * (len(self.tree.nodes) - 1) * \
# fudgeFactor['gdasrvLocation']
# p.pNum = pNum
# self.props.proposals.append(p)
if not self.props.proposals:
gm.append("No proposals?")
raise P4Error(gm)
for p in self.props.proposals:
if p.name in ['local', 'eTBR', 'polytomy']:
self.props.topologyProposalsDict[p.name] = p
self.props.calculateWeights()
def _refreshProposalProbs(self):
"""Adjust proposals after a restart."""
gm = ['Mcmc._refreshProposalProbs()']
for p in self.props.proposals:
# brLen
if p.name == 'brLen':
p.weight = self.prob.brLen * \
(len(self.tree.nodes) - 1) * fudgeFactor['brLen']
# allBrLens
if p.name == 'allBrLens':
p.weight = self.prob.allBrLens * \
(len(self.tree.nodes) - 1) * fudgeFactor['allBrLens']
# eTBR
if p.name == 'eTBR':
p.weight = self.prob.eTBR * \
(len(self.tree.nodes) - 1) * fudgeFactor['eTBR']
# local
if p.name == 'local':
p.weight = self.prob.local * \
(len(self.tree.nodes) - 1) * fudgeFactor['local']
# # treeScale
# if p.name == 'treeScale':
# p.weight = self.prob.treeScale * \
# (len(self.tree.nodes) - 1) * fudgeFactor['treeScale']
# polytomy
if p.name == 'polytomy':
p.weight = self.prob.polytomy * \
(len(self.tree.nodes) - 1) * fudgeFactor['polytomy']
# root3
if p.name == 'root3':
p.weight = self.prob.root3 * \
self.tree.nInternalNodes * fudgeFactor['root3']
# root3n
if p.name == 'root3n':
p.weight = self.prob.root3n * \
self.tree.nInternalNodes * fudgeFactor['root3n']
# relRate
if p.name == 'relRate':
p.weight = self.prob.relRate * self.tree.model.nParts
# # comp
# if p.name == 'comp':
# mp = self.tree.model.parts[p.pNum]
# p.weight = self.prob.comp * float(mp.dim - 1) * mp.nComps
# compDir
if p.name == 'compDir':
mp = self.tree.model.parts[p.pNum]
p.weight = self.prob.compDir * (mp.dim - 1) * mp.nComps
# allCompsDir
if p.name == 'allCompsDir':
mp = self.tree.model.parts[p.pNum]
p.weight = self.prob.allCompsDir * (mp.dim - 1) * mp.nComps * fudgeFactor['allCompsDir']
# ndch2_leafCompsDir
if p.name == 'ndch2_leafCompsDir':
mp = self.tree.model.parts[p.pNum]
p.weight = self.prob.ndch2_leafCompsDir * (mp.dim - 1) * mp.nComps * fudgeFactor['ndch2comp']
# ndch2_internalCompsDir
if p.name == 'ndch2_internalCompsDir':
mp = self.tree.model.parts[p.pNum]
p.weight = self.prob.ndch2_internalCompsDir * (mp.dim - 1) * mp.nComps * fudgeFactor['ndch2comp']
# ndch2_leafCompsDirAlpha
if p.name == 'ndch2_leafCompsDirAlpha':
mp = self.tree.model.parts[p.pNum]
p.weight = self.prob.ndch2_leafCompsDirAlpha * (mp.dim - 1) * mp.nComps * fudgeFactor['ndch2alpha']
# ndch2_internalCompsDirAlpha
if p.name == 'ndch2_internalCompsDirAlpha':
mp = self.tree.model.parts[p.pNum]
p.weight = self.prob.ndch2_internalCompsDirAlpha * (mp.dim - 1) * mp.nComps * fudgeFactor['ndch2alpha']
# # rMatrix
# if p.name == 'rMatrix':
# mp = self.tree.model.parts[p.pNum]
# if mp.rMatrices[0].spec == '2p':
# p.weight = self.prob.rMatrix
# else:
# p.weight = self.prob.rMatrix * mp.nRMatrices * \
# ((((mp.dim * mp.dim) - mp.dim) / 2) - 1)
# rMatrixDir
if p.name == 'rMatrixDir':
mp = self.tree.model.parts[p.pNum]
if mp.rMatrices[0].spec == '2p':
p.weight = self.prob.rMatrixDir
else:
p.weight = self.prob.rMatrixDir * mp.nRMatrices * \
((((mp.dim * mp.dim) - mp.dim) / 2.) - 1)
# allRMatricesDir
if p.name == 'allRMatricesDir':
mp = self.tree.model.parts[p.pNum]
#if mp.rMatrices[0].spec == '2p':
# p.weight = self.prob.rMatrixDir
#else:
p.weight = self.prob.allRMatricesDir * mp.nRMatrices * \
((((mp.dim * mp.dim) - mp.dim) / 2.) - 1.) * fudgeFactor['allRMatricesDir']
# gdasrv
if p.name == 'gdasrv':
p.weight = self.prob.gdasrv
# pInvar
if p.name == 'pInvar':
p.weight = self.prob.pInvar
# compLocation
if p.name == 'compLocation':
mp = self.tree.model.parts[p.pNum]
p.weight = self.prob.compLocation * mp.nComps * \
len(self.tree.nodes) * fudgeFactor['compLocation']
# rMatrixLocation
if p.name == 'rMatrixLocation':
mp = self.tree.model.parts[p.pNum]
p.weight = self.prob.rMatrixLocation * mp.nRMatrices * (len(self.tree.nodes) - 1) * \
fudgeFactor['rMatrixLocation']
# # gdasrvLocation
# if p.name == 'gdasrvLocation':
# mp = self.tree.model.parts[p.pNum]
# p.weight = self.prob.gdasrvLocation * mp.nGdasrvs * (len(self.tree.nodes) - 1) * \
# fudgeFactor['gdasrvLocation']
self.props.calculateWeights()
[docs] def writeProposalAcceptances(self):
"""Pretty-print the proposal acceptances."""
if (self.gen - self.startMinusOne) <= 0:
print("\nwriteProposalAcceptances() There is no info in memory. ")
print(" Maybe it was just emptied after writing to a checkpoint? ")
print("If so, read the checkPoint and get the proposalAcceptances from there.")
return
spacer = ' ' * 8
print("\nProposal acceptances, run %i, for %i gens, from gens %i to %i, inclusive." % (
self.runNum, (self.gen - self.startMinusOne), self.startMinusOne + 1, self.gen))
print("%s %30s %5s %10s %13s%10s" % (spacer, 'proposal', 'part', 'nProposals', 'acceptance(%)', 'tuning'))
for p in self.props.proposals:
print("%s" % spacer, end=' ')
print("%30s" % p.name, end=' ')
if p.pNum != -1:
print(" %3i " % p.pNum, end=' ')
else:
print(" - ", end=' ')
print("%10i" % p.nProposals[0], end=' ')
if p.nProposals[0]: # Don't divide by zero
print(" %5.1f " % (100.0 * float(p.nAcceptances[0]) / float(p.nProposals[0])), end=' ')
else:
print(" - ", end=' ')
if p.tuning == None:
print(" -", end=' ')
elif p.tuning[0] < 2.0:
print(" %8.4f" % p.tuning[0], end=' ')
elif p.tuning[0] < 20.0:
print(" %8.3f" % p.tuning[0], end=' ')
elif p.tuning[0] < 200.0:
print(" %8.1f" % p.tuning[0], end=' ')
else:
print(" %8.3g" % p.tuning[0], end=' ')
print()
# Tabulate topology changes for 'local', if any were attempted.
doTopol = 0
p = self.props.topologyProposalsDict.get('local')
if p:
for tNum in range(self.nChains):
if p.nTopologyChangeAttempts[tNum]:
doTopol = 1
break
if doTopol:
print("'Local' proposal-- attempted topology changes")
print("%s tempNum nProps nAccepts percent nTopolChangeAttempts nTopolChanges percent" % spacer)
for tNum in range(self.nChains):
print("%s" % spacer, end=' ')
print("%4i " % tNum, end=' ')
print("%9i" % p.nProposals[tNum], end=' ')
print("%8i" % p.nAcceptances[tNum], end=' ')
print(" %5.1f" % (100.0 * float(p.nAcceptances[tNum]) / float(p.nProposals[tNum])), end=' ')
print("%20i" % p.nTopologyChangeAttempts[tNum], end=' ')
print("%13i" % p.nTopologyChanges[tNum], end=' ')
print(" %5.1f" % (100.0 * float(p.nTopologyChanges[tNum]) / float(p.nTopologyChangeAttempts[tNum])))
else:
print("%sFor the 'local' proposals, there were no attempted" % spacer)
print("%stopology changes in any of the chains." % spacer)
# do the same for eTBR
doTopol = 0
p = self.props.topologyProposalsDict.get('eTBR')
if p:
for tNum in range(self.nChains):
if p.nTopologyChangeAttempts[tNum]:
doTopol = 1
break
if doTopol:
print("'eTBR' proposal-- attempted topology changes")
print("%s tempNum nProps nAccepts percent nTopolChangeAttempts nTopolChanges percent" % spacer)
for tNum in range(self.nChains):
print("%s" % spacer, end=' ')
print("%4i " % tNum, end=' ')
print("%9i" % p.nProposals[tNum], end=' ')
print("%8i" % p.nAcceptances[tNum], end=' ')
print(" %5.1f" % (100.0 * float(p.nAcceptances[tNum]) / float(p.nProposals[tNum])), end=' ')
print("%20i" % p.nTopologyChangeAttempts[tNum], end=' ')
print("%13i" % p.nTopologyChanges[tNum], end=' ')
print(" %5.1f" % (100.0 * float(p.nTopologyChanges[tNum]) / float(p.nTopologyChangeAttempts[tNum])))
else:
print("%sFor the 'eTBR' proposals, there were no attempted" % spacer)
print("%stopology changes in any of the chains." % spacer)
# Check for aborts.
p = self.props.topologyProposalsDict.get('local')
if p:
if hasattr(p, 'nAborts'):
if p.nAborts[0]:
print("The 'local' proposal had %i aborts. (Not counted in nProps above.)" % p.nAborts[0])
print("(Aborts might be due to brLen proposals too big or too small)")
if self.constraints:
print("(Or, more likely, due to violated constraints.)")
else:
print("The 'local' proposal had no aborts (either due to brLen proposals")
print("too big or too small, or due to violated constraints).")
p = self.props.topologyProposalsDict.get('eTBR')
if p:
if hasattr(p, 'nAborts'):
if p.nAborts[0]:
print("The 'eTBR' proposal had %i aborts. (Not counted in nProps above.)" % p.nAborts[0])
assert self.constraints
print("(Aborts due to violated constraints)")
else:
if self.constraints:
print("The 'eTBR' proposal had no aborts (due to violated constraints).")
for pN in ['polytomy']:
for p in self.props.proposals:
if p.name == pN:
if hasattr(p, 'nAborts'):
print("The %15s proposal had %5i aborts." % (p.name, p.nAborts[0]))
for pN in ['compLocation', 'rMatrixLocation', 'gdasrvLocation']:
for p in self.props.proposals:
if p.name == pN:
if hasattr(p, 'nAborts'):
print("The %15s proposal (part %i) had %5i aborts." % (p.name, p.pNum, p.nAborts[0]))
if self.nChains > 1:
print("\n\nAcceptances and tunings by temperature")
print("%s %30s %5s %5s %10s %13s%10s" % (
spacer, 'proposal', 'part', 'tempNum', 'nProposals', 'acceptance(%)', 'tuning'))
for p in self.props.proposals:
for tempNum in range(self.nChains):
print("%s" % spacer, end=' ')
print("%30s" % p.name, end=' ')
if p.pNum != -1:
print(" %3i " % p.pNum, end=' ')
else:
print(" - ", end=' ')
print(" %3i " % tempNum, end=' ')
print("%10i" % p.nProposals[tempNum], end=' ')
if p.nProposals[tempNum]: # Don't divide by zero
print(" %5.1f " % (
100.0 * float(p.nAcceptances[tempNum]) / float(p.nProposals[tempNum])), end=' ')
else:
print(" - ", end=' ')
if p.tuning == None:
print(" -", end=' ')
elif p.tuning[tempNum] < 2.0:
print(" %8.4f" % p.tuning[tempNum], end=' ')
elif p.tuning[tempNum] < 20.0:
print(" %8.3f" % p.tuning[tempNum], end=' ')
elif p.tuning[tempNum] < 200.0:
print(" %8.1f" % p.tuning[tempNum], end=' ')
else:
print(" %8.3g" % p.tuning[tempNum], end=' ')
print()
[docs] def writeSwapMatrix(self):
gm = ["Mcmc.writeSwapMatrix()"]
if self.swapVector:
gm.append("yourMcmc.swapVector is turned on, so you should use Mcmc.writeSwapVector() instead.")
gm.append("or yourMcmcCheckPointReader.writeSwapVectors()")
raise P4Error(gm)
# swapVector is default now, so this would not be used.
print("\nChain swapping, for %i gens, from gens %i to %i, inclusive." % (
(self.gen - self.startMinusOne), self.startMinusOne + 1, self.gen))
#print(" Swaps are presented as a square matrix, nChains * nChains.")
print(" Upper triangle is the number of swaps proposed between two chains.")
print(" Lower triangle is the percent swaps accepted.")
#print(" The chainTemp is %s\n" % self.chainTemp)
print(" " * 10, end=' ')
for i in range(self.nChains):
print("%7i" % i, end=' ')
print()
print(" " * 10, end=' ')
for i in range(self.nChains):
print(" ----", end=' ')
print()
for i in range(self.nChains):
print(" " * 7, "%2i" % i, end=' ')
for j in range(self.nChains):
if i < j: # upper triangle
print("%7i" % self.swapMatrix[i][j], end=' ')
elif i == j:
print(" -", end=' ')
else:
if self.swapMatrix[j][i] == 0: # no proposals
print(" -", end=' ')
else:
print(" %5.1f" % (100.0 * float(self.swapMatrix[i][j]) / float(self.swapMatrix[j][i])), end=' ')
print()
[docs] def writeSwapVector(self):
assert self.swapVector
assert self.nChains > 1
# swapVector is default now. However, self.swapMatrix is used and has the info.
print("\nMcmc.writeSwapVector(). Chain swapping, for %i gens, from gens %i to %i, inclusive." % (
(self.gen - self.startMinusOne), self.startMinusOne + 1, self.gen))
print(" swapVector is on, so swaps occur only between adjacent chains.")
chTmpString = ' '.join([f"{it:6.3f}" for it in self.chainTemps])
print(f" last chainTemps {chTmpString}")
chTmpDiffsString = ' '.join([f"{it:6.3f}" for it in self.chainTempDiffs[:-1]])
print(f" last chainTempDiffs {chTmpDiffsString}")
# This may be set differently in the run, but it is not saved, so this could be invalid. Fix this!
#print(f" var.mcmc_swapTunerSampleSize is {var.mcmc_swapTunerSampleSize}")
print()
print(f"{'chains':10}", end=' ')
for i in range(1, self.nChains):
s = f"{i-1}--{i}"
print(f"{s:>9s}", end=' ')
print()
print(f"{' ':10}", end=' ')
for i in range(1, self.nChains):
print(f"{'------':>9s}", end=' ')
print()
print(f"{'proposed':10}", end=' ')
for i in range(1, self.nChains):
j = i - 1
print(f"{self.swapMatrix[j][i]:9}", end=' ')
print()
print(f"{'accepted':10}", end=' ')
for i in range(1, self.nChains):
j = i - 1
print(f"{self.swapMatrix[i][j]:9}", end=' ')
print()
print(f"{'accepted':10}", end=' ')
for i in range(1, self.nChains):
j = i - 1
if self.swapMatrix[j][i] == 0: # no proposals
myProportion = "-"
else:
myProportion = f"{self.swapMatrix[i][j] / self.swapMatrix[j][i]:.6f}"
print(f"{myProportion:>9}", end=' ')
print()
def _makeChainsAndProposals(self):
"""Make chains and proposals."""
gm = ['Mcmc._makeChainsAndProposals()']
# random.seed(0)
# Make chains, if needed
if not self.chains:
self.chains = []
for chNum in range(self.nChains):
aChain = Chain(self)
# Temperature. Set this way to start, but it changes.
aChain.tempNum = chNum
self.chains.append(aChain)
if not self.props.proposals:
self._makeProposals()
# If we are going to be doing the resolution class prior
# in the polytomy move, we want to pre-compute the logs of
# T_{n,m}. Its a vector with indices (ie m) from zero to
# nTax-2 inclusive.
p = self.props.topologyProposalsDict.get('polytomy')
if p and self.polytomyUseResolutionClassPrior:
bigT = p4.func.nUnrootedTreesWithMultifurcations(self.tree.nTax)
p.logBigT = [0.0] * (self.tree.nTax - 1)
for i in range(1, self.tree.nTax - 1):
p.logBigT[i] = math.log(bigT[i])
# print p.logBigT
# Make a dictionary. If there is more than one partition,
# some proposals may have the same name, so it is a double
# index pDict[proposal.pNum][proposal.name].
# Some props (eg topology moves) have a pNum of -1, so that is extra.
self.props.pDict = {}
self.props.pDict[-1] = {}
for pNum in range(self.tree.data.nParts):
self.props.pDict[pNum] = {}
for pr in self.props.proposals:
self.props.pDict[pr.pNum][pr.name] = pr
def _setOutputTreeFile(self):
"""Setup the (output) tree file for the mcmc."""
gm = ['Mcmc._setOutputTreeFile()']
# Check if it exists.
if os.path.isfile(self.treeFileName):
gm.append(f"The file '{self.treeFileName}' already exists.")
raise P4Error(gm)
# Write the preamble for the trees outfile.
treeFile = open(self.treeFileName, 'w')
treeFile.write('#nexus\n\n')
treeFile.write('begin taxa;\n')
treeFile.write(' dimensions ntax=%s;\n' % self.tree.nTax)
treeFile.write(' taxlabels')
for tN in self.tree.taxNames:
treeFile.write(' %s' % p4.func.nexusFixNameIfQuotesAreNeeded(tN))
treeFile.write(';\nend;\n\n')
treeFile.write('begin trees;\n')
self.translationHash = {}
i = 1
for tName in self.tree.taxNames:
self.translationHash[tName] = i
i += 1
treeFile.write(' translate\n')
for i in range(self.tree.nTax - 1):
treeFile.write(' %3i %s,\n' % (
i + 1, p4.func.nexusFixNameIfQuotesAreNeeded(self.tree.taxNames[i])))
treeFile.write(' %3i %s\n' % (
self.tree.nTax, p4.func.nexusFixNameIfQuotesAreNeeded(self.tree.taxNames[-1])))
treeFile.write(' ;\n')
# write the models comment
if self.tree.model.isHet:
if (not self.tree.model.parts[0].ndch2) or (self.tree.model.parts[0].ndch2 and self.tree.model.parts[0].ndch2_writeComps):
treeFile.write(' [&&p4 models p%i' % self.tree.model.nParts)
for pNum in range(self.tree.model.nParts):
treeFile.write(' c%i.%i' % (pNum, self.tree.model.parts[pNum].nComps))
treeFile.write(' r%i.%i' % (pNum, self.tree.model.parts[pNum].nRMatrices))
treeFile.write(' g%i.%i' % (pNum, self.tree.model.parts[pNum].nGdasrvs))
treeFile.write(']\n')
treeFile.write(' [Tree numbers are gen+1]\nend;\n')
treeFile.close()
if 0:
self.prob.dump()
self.tunings.dump()
self.writeProposalProbs()
if 0:
for p in self.proposals:
p.dump()
# return
[docs] def simTemp_trialAndError(self, nGensToDo, showTable=True, verbose=False):
gm = ['Mcmc.simTemp_trialAndError()']
savedGenNum = self.gen
assert self.simTemp
self.run(nGensToDo, verbose=False, equiProbableProposals=False, writeSamples=False)
if showTable:
print("simTemp_trialAndError(). After %i gens, showing occupancy, before tuning" % nGensToDo)
self.simTemp_dumpTemps()
self.simTemp_tunePseudoPriors_longSample()
self.gen = savedGenNum
[docs] def simTemp_tunePseudoPriors_longSample(self, flob=sys.stdout):
occs = [self.simTemp_longTNumSample.count(i) for i in range(self.simTemp)]
expected = self.simTemp_longTNumSampleSize / self.simTemp
rats = [occs[i]/expected for i in range(self.simTemp)]
if 0:
for tNum in range(self.simTemp):
ratio = rats[tNum]
if ratio > 1.2 or ratio < 0.7:
if ratio > 3.:
adjust = 1.6
elif ratio > 1.2:
adjust = 1.2
elif ratio < 0.7:
adjust = 0.8
self.simTemp_longSampleTunings[tNum] *= adjust
# Adjusting the tunings to below 1 seems to lead to overshooting.
if self.simTemp_longSampleTunings[tNum] < 1.0:
self.simTemp_longSampleTunings[tNum] = 1.0
if 1:
#print(rats)
for tNum in range(self.simTemp):
ratio = rats[tNum]
self.simTemp_longSampleTunings[tNum] *= ratio
# This seems to work reasonably well. Remaining problem
# is that the highest temp gets too much occupancy. If
# that is the case, do a hack --- multiply it by the ratio obs/expected again.
#tNum = self.simTemp -1
#if max(occs) == occs[-1]:
# self.simTemp_longSampleTunings[-1] *= rats[-1]
if 1:
# Adjusting the tunings to below 1 seems to lead to
# overshooting, so check, and if there are any then raise
# all
minTuning = min([tn for tn in self.simTemp_longSampleTunings])
if minTuning < 1.0:
adjust = 1.0 - minTuning
for tNum in range(self.simTemp):
self.simTemp_longSampleTunings[tNum] += adjust
print("\nlongSampleTunings (after tuning):", file=flob)
for tNum in range(self.simTemp):
print(f"m.simTemp_longSampleTunings[{tNum}] = {self.simTemp_longSampleTunings[tNum]:7.2f}", file=flob)
[docs] def simTemp_tunePseudoPriors(self):
occs = [self.simTemp_longTNumSample.count(i) for i in range(self.simTemp)]
expected = self.simTemp_longTNumSampleSize / self.simTemp
if 1:
totalAdjusts = 0.0
# To increase the occupation of the cold temp, boost the cold logPiDiff
# To decrease the occupation, decrease it.
tNum = 0
cTuning = self.simTemp_longSampleTunings[tNum]
if cTuning > 1.0:
self.simTemp_temps[tNum].logPiDiff -= cTuning
totalAdjusts += cTuning
# To increase the occupation of the hottest temp, decrease the hottest-1 logPiDiff
# To decrease the occupation, increase the hottest-1 logPiDiff
tNum = self.simTemp - 1
cTuning = self.simTemp_longSampleTunings[tNum]
if cTuning > 1.0:
self.simTemp_temps[tNum - 1].logPiDiff += cTuning
# Is this needed? It does not make an obvious difference. Is it in the right direction?
#totalAdjusts -= cTuning
for tNum in range(1, self.simTemp - 1):
cTuning = self.simTemp_longSampleTunings[tNum]
if cTuning > 1.0:
self.simTemp_temps[tNum].logPiDiff -= cTuning
self.simTemp_temps[tNum - 1].logPiDiff += cTuning
#for tNum in range(self.simTemp - 1):
# self.simTemp_temps[tNum].logPiDiff + totalAdjusts/self.simTemp
if 1:
myFudge = -10.0
# Want to divide the logPi by the occupancy. Total of occs is sampleSize.
myMax = math.log(10/expected)
logOccs = []
for tNum in range(self.simTemp):
if occs[tNum] > 10:
rat = occs[tNum]/expected
logRat = math.log(rat)
else:
logRat = myMax
logOccs.append(logRat)
#print(self.gen, "logOccs", logOccs)
# center at zero
meanLogOccs = statistics.mean(logOccs)
for tNum in range(self.simTemp):
logOccs[tNum] -= meanLogOccs
for tNum in range(self.simTemp):
if tNum == 0:
# To increase the occupation of the cold temp, boost the cold logPiDiff
# To decrease the occupation, decrease it.
self.simTemp_temps[tNum].logPiDiff += (logOccs[tNum] * myFudge)
elif tNum == self.simTemp - 1:
# To increase the occupation of the hottest temp, decrease the hottest-1 logPiDiff
# To decrease the occupation, increase the hottest-1 logPiDiff
self.simTemp_temps[tNum-1].logPiDiff -= (logOccs[tNum] * myFudge)
else:
# To increase the occupation of a middle temp, tNum,
# - increase the tNum logPiDiff
# - decrease the tNum-1 logPiDiff
# To decrease the occupation of the middle temp, tNum
# - decrease tNum logPiDiff
# - increase tNum-1 logPiDiff
howMuch = logOccs[tNum] * myFudge
self.simTemp_temps[tNum].logPiDiff += howMuch
self.simTemp_temps[tNum-1].logPiDiff -= howMuch
if 0:
for tNum in range(self.simTemp -1):
self.simTemp_temps[tNum].logPiDiff += 10.0
[docs] def simTemp_dumpTemps(self, flob=sys.stdout):
"""Arg flob should be a file-like object."""
print("%4s %10s %12s %10s %10s %10s %10s %10s %10s %10s" % (
"indx", "temp", "logPiDiff", "occupancy", "nPropsUp", "accptUp", "meanLnR_Up", "nPropsDn", "accptDn", "meanLnR_Dn"),
file=flob)
totalOcc = 0
highestOccupancy = self.simTemp_temps[0].occupancy
lowestOccupancy = self.simTemp_temps[0].occupancy
for i, tmp in enumerate(self.simTemp_temps):
print("%4s %10.3f %12.4f %10i %10i" % (
i,
tmp.temp,
#tmp.logPi,
tmp.logPiDiff,
tmp.occupancy,
tmp.nProposalsUp), file=flob, end='')
if tmp.nProposalsUp:
#myNum = Numbers(tmp.rValsUp)
#myAMean = myNum.arithmeticMeanOfLogs()
myAMean = statistics.mean(tmp.rValsUp)
print(" %10.4f %10.4f" % (
(tmp.nAcceptedUp/tmp.nProposalsUp),
myAMean), file=flob, end='')
else:
print(" %10s %10s" % ("-", "-"), file=flob, end='')
print(" %10i" % tmp.nProposalsDn, file=flob, end='')
if tmp.nProposalsDn:
#myNum = Numbers(tmp.rValsDn)
#myAMean = myNum.arithmeticMeanOfLogs()
myAMean = statistics.mean(tmp.rValsDn)
print(" %10.4f %10.4f" % (
(tmp.nAcceptedDn/tmp.nProposalsDn),
myAMean), file=flob, end='')
else:
print(" %10s %10s" % ("-", "-"), file=flob, end='')
print(file=flob)
totalOcc += tmp.occupancy
if tmp.occupancy > highestOccupancy:
highestOccupancy = tmp.occupancy
if tmp.occupancy < lowestOccupancy:
lowestOccupancy = tmp.occupancy
targetOcc = totalOcc / self.simTemp
if lowestOccupancy: # it might be zero
rat = highestOccupancy/lowestOccupancy
print(f"occupancy target:{targetOcc} highest:{highestOccupancy}, lowest:{lowestOccupancy}, highest/lowest {rat}", file=flob)
else:
print(f"occupancy target:{targetOcc} highest:{highestOccupancy}, lowest:{lowestOccupancy}", file=flob)
[docs] def simTemp_proposeTempChange(self):
# Method and symbols are from:
# Geyer, C. J., and Thompson, E. A. (1995). Annealing Markov
# chain Monte Carlo with applications to ancestral
# inference. Journal of the American Statistical Association,
# 90, 909–920.
i = self.simTemp_curTemp # index
nTemps = self.simTemp
ch = self.chains[0]
if i == 0:
j = 1
qij = 1.
if nTemps == 2:
qji = 1.0
else:
qji = 0.5
elif i == nTemps - 1:
j = nTemps - 2
qij = 1.
if nTemps == 2:
qji = 1.0
else:
qji = 0.5
else:
if random.random() < 0.5:
j = i - 1
else:
j = i + 1
qij = 0.5
if j == 0:
qji = 1.0
elif j == nTemps - 1:
qji = 1.0
else:
qji = 0.5
# tmp's are SimTempTemp objects
tmpI = self.simTemp_temps[i]
tmpJ = self.simTemp_temps[j]
beta_i = 1.0 / (1.0 + tmpI.temp)
beta_j = 1.0 / (1.0 + tmpJ.temp)
# h_i_Atx is notation from Geyer and Thompson. I want the log form
# print("curr log like %f, beta_i %f, beta_j %f" % (ch.curTree.logLike, beta_i, beta_j))
#log_h_i_Atx = ch.curTree.logLike * beta_i
#log_h_j_Atx = ch.curTree.logLike * beta_j
#logTempRatio = log_h_j_Atx - log_h_i_Atx
logTempRatio = ch.curTree.logLike * (beta_j - beta_i)
#logPseudoPriorRatio = math.log(tmpJ.pi / tmpI.pi)
if i < j:
logPseudoPriorRatio = -tmpI.logPiDiff
else:
logPseudoPriorRatio = tmpJ.logPiDiff
#logPseudoPriorRatio = tmpJ.logPi - tmpI.logPi
# print("to calc logPseudoPriorRatio:", tmpJ.logPi, "-", tmpI.logPi, "=", logPseudoPriorRatio)
logHastingsRatio = math.log(qji / qij)
if 0:
print("hastingsRatio gen %i: i=%i, j=%i, hastingsRatio %f, logHastingsRatio %f" % (
self.gen, i, j, (qji/qij), logHastingsRatio))
# Geyer and Thompson use "r". I want the log form
log_r = logTempRatio + logPseudoPriorRatio + logHastingsRatio
acceptMove = False
if log_r < -100.0:
acceptMove = False
elif log_r >= 0.0:
acceptMove = True
else:
r = math.exp(log_r)
if random.random() < r:
acceptMove = True
#print("log_r %f, acceptMove %s" % (log_r, acceptMove))
#if 0:
if 0 and i==0 and j==1:
print("attemptSwap gen %i: i=%i, j=%i, log_r=%f logTempRatio=%f logPseudoPriorRatio %f logHastingsRatio %f acceptMove=%s" % (
self.gen, i, j, log_r, logTempRatio, logPseudoPriorRatio, logHastingsRatio, acceptMove))
if acceptMove:
self.simTemp_curTemp = j
self.simTemp_nTempChangeAccepts += 1
self.simTemp_nTempChangeProposals += 1
if j > i:
tmpI.nProposalsUp += 1
if acceptMove:
tmpI.nAcceptedUp += 1
tmpI.rValsUp.append(log_r)
else:
tmpI.nProposalsDn += 1
if acceptMove:
tmpI.nAcceptedDn += 1
tmpI.rValsDn.append(log_r)
[docs] def simTemp_approximatePi(self, logLike=None):
gm = ["Mcmc.simTemp_approximatePi()"]
if logLike:
thisLogLike = logLike
else:
ch = self.chains[0]
thisLogLike = ch.curTree.logLike
for i,tmpI in enumerate(self.simTemp_temps[:-1]):
j = i + 1
tmpJ = self.simTemp_temps[j]
#print(i, j)
beta_i = 1.0 / (1.0 + tmpI.temp)
beta_j = 1.0 / (1.0 + tmpJ.temp)
logTempRatio = (thisLogLike * beta_j) - (thisLogLike * beta_i)
# Here I tried to take the hastings ratio into account ...
# But it did not turn out well, so deleted. It decreased
# the first and last occupancies too much.
tmpI.logPiDiff = logTempRatio
[docs] def simTemp_resetSimTempMax(self, newSimTempMax):
newVal = float(newSimTempMax)
print(f"Mcmc.simTemp_resetSimTempMax(), changing from old value {self.simTempMax} to new value {newVal}")
self.simTempMax = newVal
logBase = var.mcmc_simTemp_tempCurveLogBase
factor = self.simTempMax / (math.pow(logBase, self.simTemp - 1) - 1.0)
for tNum in range(1,self.simTemp):
newTemp = (math.pow(logBase, tNum) - 1.) * factor
tmp = self.simTemp_temps[tNum]
tmp.temp = newTemp
if 1:
print("New settings of simTemp temperatures ---")
for tNum,tmp in enumerate(self.simTemp_temps):
print(f"{tNum:2} {tmp.temp:10.3f}")
self.logger.info(f"Resetting simulated tempering MCMC, with new highest temperature {self.simTempMax:.2f}")
[docs] def run(self, nGensToDo, verbose=True, equiProbableProposals=False, writeSamples=True):
"""Start the Mcmc running."""
gm = ['Mcmc.run()']
# Keep track of the first gen of this call to run(), maybe restart
firstGen = self.gen + 1
# Don't forget to set the PIVEC_MIN etc in a restart.
# The hasattr() part is for older checkpoints that do not have self.init_PIVEC_MIN
if var.mcmc_doCheck_PIVEC_MIN_etc and hasattr(self, "init_PIVEC_MIN"):
doWarning = False
if var.PIVEC_MIN != self.init_PIVEC_MIN:
gm.append(f"Initial var.PIVEC_MIN for this run was {self.init_PIVEC_MIN}, but now is {var.PIVEC_MIN}")
doWarning = True
if var.RATE_MIN != self.init_RATE_MIN:
gm.append(f"Initial var.RATE_MIN for this run was {self.init_RATE_MIN}, but now is {var.RATE_MIN}")
doWarning = True
if var.BRLEN_MIN != self.init_BRLEN_MIN:
gm.append(f"Initial var.BRLEN_MIN for this run was {self.init_BRLEN_MIN}, but now is {var.BRLEN_MIN}")
doWarning = True
if var.GAMMA_SHAPE_MIN != self.init_GAMMA_SHAPE_MIN:
gm.append(f"Initial var.GAMMA_SHAPE_MIN for this run was {self.init_GAMMA_SHAPE_MIN}, but now is {var.GAMMA_SHAPE_MIN}")
doWarning = True
if doWarning:
gm.append("Were these values forgotten in a restart? If so, add them to the restart script.")
gm.append("Are these values intentionally different? If so, you can turn off checking by setting, in your script ---")
gm.append("var.mcmc_doCheck_PIVEC_MIN_etc = False")
raise P4Error(gm)
# Hidden experimental hack
if self.doHeatingHack:
print("Heating hack is turned on.")
print("Heating hack temperature is %.2f" % self.heatingHackTemperature)
#print("Heating hack affects proposals %s" % self.heatingHackProposalNames)
self.originalHeatingHackTemperature = self.heatingHackTemperature
if self.simTemp:
if self.doHeatingHack:
gm.append("If we are doing simTemp, then doHeatingHack should be off.")
raise P4Error(gm)
if not self.simTemp_temps:
gm.append("If we are doing simTemp, we should have SimTempTemp objects in m.simTemp_temps.")
raise P4Error(gm)
if len(self.simTemp_temps) != self.simTemp:
gm.append("If we are doing simTemp, we should have %i SimTempTemp objects in m.simTemp_temps." % self.simTemp)
raise P4Error(gm)
for it in self.simTemp_temps:
if not isinstance(it, SimTempTemp):
gm.append("Each item in m.simTemp_temps should be a SimTempTemp object.")
raise P4Error(gm)
self.simTemp_nTempChangeProposals = 0
self.simTemp_nTempChangeAccepts = 0
self.simTemp_tNums = []
self.simTemp_tunerSamples = []
for tmp in self.simTemp_temps:
tmp.occupancy = 0
tmp.nProposalsUp = 0
tmp.nAcceptedUp = 0
tmp.rValsUp = []
tmp.nProposalsDn = 0
tmp.nAcceptedDn = 0
tmp.rValsDn = []
if self.prob.polytomy:
for pNum in range(self.tree.model.nParts):
mp = self.tree.model.parts[pNum]
if mp.ndch2:
gm.append("Part %i uses ndch2" % pNum)
gm.append("Ndch2 does not work with the polytomy move.")
raise P4Error(gm)
if self.doSteppingStone:
# self.logger.info("doSteppingStone is on, so turn writing checkpoints off")
self.checkPointInterval = False
if writeSamples and self.checkPointInterval:
# Check that checkPointInterval makes sense.
# We want a couple of things:
# 1. The last gen should be on checkPointInterval. For
# example, if the checkPointInterval is 200, then doing
# 100 or 300 generations will not be allowed cuz the
# chain would continue past the checkPoint-- bad. Or if
# you re-start after 500 gens and change to a
# checkPointInterval of 200, then you won't be allowed to
# do 500 gens.
# if ((self.gen + 1) + nGensToDo) % self.checkPointInterval == 0:
if nGensToDo % self.checkPointInterval == 0:
pass
else:
gm.append("With the current settings, the last generation won't be on a checkPointInterval.")
gm.append("self.gen+1=%i, nGensToDo=%i, checkPointInterval=%i" % ((self.gen + 1),
nGensToDo, self.checkPointInterval))
raise P4Error(gm)
# 2. We also want the checkPointInterval to be evenly
# divisible by the sampleInterval.
if self.checkPointInterval % self.sampleInterval == 0:
pass
else:
gm.append("The checkPointInterval (%i) should be evenly divisible" % self.checkPointInterval)
gm.append("by the sampleInterval (%i)." % self.sampleInterval)
raise P4Error(gm)
if self.props.proposals: # It is a re-start
# The probs and tunings may have been changed by the user.
self._refreshProposalProbs()
# This stuff below should be the same as is done after pickling,
# see below.
self.startMinusOne = self.gen
# Start the tree partitions over.
self.treePartitions = None
# Zero the proposal counts
for p in self.props.proposals:
p.nProposals = [0] * self.nChains
p.nAcceptances = [0] * self.nChains
p.nTopologyChangeAttempts = [0] * self.nChains
p.nTopologyChanges = [0] * self.nChains
# Zero the swap matrix
if self.nChains > 1:
self.swapMatrix = []
for i in range(self.nChains):
self.swapMatrix.append([0] * self.nChains)
else:
# The swap vector is just the diagonal of the swap matrix
if self.swapVector and self.nChains > 1:
print("======= Resetting chainTempDiffs and chainTemps")
# These are differences in temperatures between adjacent chains. The last one is not used.
self.chainTempDiffs = [self.chainTemp] * self.nChains
# These are cumulative, summed over the diffs. This needs to be done whenever the diffs change
self.chainTemps = [0.0]
for dNum in range(self.nChains - 1):
self.chainTemps.append(self.chainTempDiffs[dNum] + self.chainTemps[-1])
self._makeChainsAndProposals()
self._setOutputTreeFile()
if self.simulate:
self._writeSimFileHeader(self.tree)
if self.setupPfLogging:
for ch in self.chains:
pf.setMcmcTreeCallback(ch.curTree.cTree, self._logFromPfModule)
pf.setMcmcTreeCallback(ch.propTree.cTree, self._logFromPfModule)
# Should I do self.tree and self.simTree as well?
if verbose:
self.props.writeProposalIntendedProbs()
sys.stdout.flush()
if self.stickyRootComp:
print("stickyRootComp is turned on.")
# compLocation should be turned off. Is it?
if self.prob.compLocation > 0.0:
gm.append("If stickyRootComp is turned on, then compLocation should be off.")
gm.append("Turn it off, by eg (for Mcmc object m)")
gm.append("m.prob.compLocation = 0.0")
raise P4Error(gm)
# Find the cold chain, the one where tempNum is 0
coldChainNum = -1
for i in range(len(self.chains)):
if self.chains[i].tempNum == 0:
coldChainNum = i
break
if coldChainNum == -1:
gm.append("Unable to find which chain is the cold chain. That is Bad.")
raise P4Error(gm)
# # If polytomy is turned on, then it is possible to get a star
# # tree, in which case local will not work. So if we have both
# # polytomy and local proposals, we should also have brLen.
# if 'polytomy' in self.proposalsHash and 'local' in self.proposalsHash:
# if 'brLen' not in self.proposalsHash:
# gm.append("If you have polytomy and local proposals, you should have a brLen proposal as well.")
# gm.append("It can have a low proposal probability, but it needs to be there.")
# gm.append("Turn it on by eg yourMcmc.prob.brLen = 0.001")
# raise P4Error(gm)
if writeSamples and not self.doSteppingStone:
# it is a re-start, so we need to back over the "end;" in the tree
# files.
f2 = open(self.treeFileName, 'r+b')
# print(f2, type(f2), f2.tell())
# <_io.BufferedRandom name='mcmc_trees_0.nex'> <class '_io.BufferedRandom'> 0
pos = -1
while 1:
f2.seek(pos, 2)
c = f2.read(1)
if c == b';':
break
pos -= 1
# print "pos now %i" % pos
pos -= 3 # end;
f2.seek(pos, 2)
c = f2.read(4)
# print "got c = '%s'" % c
if c != b"end;":
gm.append("Mcmc.run(). Failed to find and remove the 'end;' at the end of the tree file.")
raise P4Error(gm)
else:
f2.seek(pos, 2)
f2.truncate()
f2.close()
if self.gen > -1:
self.logger.info("Re-starting the MCMC run %i from gen=%i" % (self.runNum, self.gen))
if verbose:
print()
print("Re-starting the MCMC run %i from gen=%i" % (self.runNum, self.gen))
if not writeSamples:
print("Arg 'writeSamples' is off" )
print("Set to do %i more generations." % nGensToDo)
if writeSamples and self.writePrams:
if self.chains[0].curTree.model.nFreePrams == 0:
print("There are no free prams in the model, so I am turning writePrams off.")
self.writePrams = False
sys.stdout.flush()
self.startMinusOne = self.gen
else:
self.logger.info("Starting the MCMC %s run %i" % ((self.constraints and "(with constraints)" or ""), self.runNum))
if self.simTemp:
self.logger.info("Using simulated tempering MCMC, with %i temperatures, with highest temperature %.2f" % (
self.simTemp, self.simTempMax))
if verbose:
if self.nChains > 1:
print("Using Metropolis-coupled MCMC, with %i chains." % self.nChains)
if var.mcmc_swapTunerDoTuning:
print("Temperatures are tuned (var.mcmc_swapTunerDoTuning is on)")
#print("Temperatures are tuned, but not if the difference in temperature")
#print(f"between adjacent chains is bigger than {var.mcmc_swapTunerVTnLimitHi}")
else:
print("var.mcmc_swapTunerDoTuning is turned off, so temperatures will not be tuned.")
else:
print("Not using Metropolis-coupled MCMC.")
if self.simTemp:
print("Using simulated tempering MCMC, with %i temperatures, with highest temperature %.2f" % (
self.simTemp, self.simTempMax))
print("Starting the MCMC %s run %i" % ((self.constraints and "(with constraints)" or ""), self.runNum))
print("Set to do %i generations." % nGensToDo)
if self.writePrams:
if self.chains[0].curTree.model.nFreePrams == 0:
if verbose:
print("There are no free prams in the model, so I am turning writePrams off.")
self.writePrams = False
else:
pramsFile = open(self.pramsFileName, 'w')
self.chains[0].curTree.model.writePramsProfile(pramsFile, self.runNum)
pramsFile.write("genPlus1")
self.chains[0].curTree.model.writePramsHeaderLine(pramsFile)
pramsFile.close()
if self.writeHypers:
if not self.tree.model.parts[0].ndch2: # and therefore all model parts, this week
self.writeHypers = False
else:
hypersFile = open(self.hypersFileName, 'w')
hypersFile.write('genPlus1')
self.chains[0].curTree.model.writeHypersHeaderLine(hypersFile)
hypersFile.close()
if not writeSamples:
self.logger.info("Mcmc.run() arg 'writeSamples' is off, so samples are not being written")
if equiProbableProposals:
self.logger.info("Mcmc.run() arg 'equiProbableProposals' is turned on")
if self.doSteppingStone:
self.logger.info("Mcmc.run() doSteppingStone is turned on")
if verbose:
if writeSamples:
print("Sampling every %i." % self.sampleInterval)
else:
print("Arg 'writeSamples' is off, so samples are not being written")
if equiProbableProposals:
print("Arg 'equiProbableProposals' is turned on")
if self.checkPointInterval:
print("CheckPoints written every %i." % self.checkPointInterval)
else:
print("CheckPoints will not be written")
if nGensToDo <= 20000:
print("One dot is 100 generations.")
else:
print("One dot is 1000 generations.")
sys.stdout.flush()
self.treePartitions = None
realTimeStart = time.time()
self.lastTimeCheck = time.time()
for ch in self.chains:
# 1 means do all
pf.p4_copyCondLikes(ch.curTree.cTree, ch.propTree.cTree, 1)
# 1 means do all
pf.p4_copyBigPDecks(ch.curTree.cTree, ch.propTree.cTree, 1)
ch.verifyIdentityOfTwoTreesInChain()
abortableProposals = ['local', 'polytomy', 'compLocation',
'rMatrixLocation', 'gdasrvLocation']
##################################################
############### Main loop ########################
##################################################
for gNum in range(nGensToDo):
self.gen += 1
# Do an initial time estimate based on 100 gens
if nGensToDo > 100 and self.gen - firstGen == 100:
diff_secs = time.time() - realTimeStart
total_secs = (float(nGensToDo) / float(100)) * float(diff_secs)
deltaTime = datetime.timedelta(seconds=int(round(total_secs)))
print("Estimated completion time: %s days, %s" % (
deltaTime.days, time.strftime("%H:%M:%S", time.gmtime(deltaTime.seconds))))
# Above is a list of proposals where it is possible to abort.
# When a gen(aProposal) is made, below, aProposal.doAbort
# might be set, in which case we want to skip it for this
# gen. But we want to start each 'for chNum' loop with
# doAborts all turned off.
for chNum in range(self.nChains):
failure = True
nAttempts = 0
while failure:
# Choose a proposal
gotIt = False
safety = 0
while not gotIt:
# equiProbableProposals is True or False. Usually False.
aProposal = self.props.chooseProposal(equiProbableProposals)
if aProposal:
gotIt = True
if aProposal.name == 'local':
# Can't do local on a star tree.
if self.chains[chNum].curTree.nInternalNodes == 1:
#aProposal = self.proposalsHash['brLen']
gotIt = False
elif aProposal.name in ['root3', 'root3n']:
# Can't do root3 or root3n on a star tree.
if self.chains[chNum].curTree.nInternalNodes == 1:
gotIt = False
if aProposal.doAbort:
gotIt = False
safety += 1
if safety > 1000:
gm.append("Could not find a proposal after %i attempts." % safety)
gm.append("Possibly a programming error.")
gm.append("Or possibly it is just a pathologically frustrating Mcmc.")
raise P4Error(gm)
if 0:
print("==== gNum=%i, chNum=%i, aProposal=%s (part %i)" % (
gNum, chNum, aProposal.name, aProposal.pNum), end=' ')
sys.stdout.flush()
# print gNum,
# success returns None
failure = self.chains[chNum].gen(aProposal)
#failure = None
if failure:
myWarn = "Mcmc.run() main loop. Proposal %s generated a 'failure'. Why?" % aProposal.name
self.logger.warning(myWarn)
nAttempts += 1
if nAttempts > 1000:
gm.append("Was not able to do a successful generation after %i attempts." % nAttempts)
raise P4Error(gm)
# Continuous tuning. We have a tuning, and propose/accept
# tallies for each temperature, kept separately. Note that
# since chNum does not equal tempNum, the most recently
# incremented values (and the ones we want to tune now) will
# be aProposal.tnNSamples[tempNum] and
# aProposal.tnNAccepts[tempNum], where tempNum will most
# likely not be chNum. So we get the tempNum from this
# chNum, and tune it.
# tunables = """allBrLens allCompsDir brLen compDir
# gdasrv local ndch2_internalCompsDir
# ndch2_internalCompsDirAlpha ndch2_leafCompsDir
# ndch2_leafCompsDirAlpha pInvar rMatrixDir allRMatricesDir relRate """.split()
# maybeTunablesButNotNow compLocation eTBR polytomy root3 rMatrixLocation root2
if aProposal.name in self.tunableProps:
tempNum = self.chains[chNum].tempNum
if aProposal.tnNSamples[tempNum] >= aProposal.tnSampleSize:
aProposal.tune(tempNum)
# elif aProposal.name == 'root3' and self.doRoot3Tuning:
# tempNum = self.chains[chNum].tempNum
# if aProposal.tnNSamples[tempNum] >= aProposal.tnSampleSize:
# aProposal.tune(tempNum)
# elif aProposal.name == 'root3n' and self.doRoot3nTuning:
# tempNum = self.chains[chNum].tempNum
# if aProposal.tnNSamples[tempNum] >= aProposal.tnSampleSize:
# aProposal.tune(tempNum)
# print " Mcmc.run(). finished a gen on chain %i" % (chNum)
for p in self.props.proposals:
if p.name in abortableProposals:
p.doAbort = False
if self.nChains == 1:
if self.simTemp:
self.simTemp_temps[self.simTemp_curTemp].occupancy += 1
if (self.gen + 1) % self.simTemp_tempChangeProposeFreq == 0:
self.simTemp_proposeTempChange()
self.simTemp_tNums.append(self.simTemp_curTemp)
# this is a deque. Append on the left and pop on the right
self.simTemp_longTNumSample.appendleft(self.simTemp_curTemp)
self.simTemp_longTNumSample.pop() # from the right, of course
# tunerSamplSize is small, so this adjustment is high frequency (eg every 100 gens)
self.simTemp_tunerSamples.append(self.chains[0].curTree.logLike)
if len(self.simTemp_tunerSamples) >= self.simTemp_tunerSampleSize:
meanLogLike = statistics.mean(self.simTemp_tunerSamples)
self.simTemp_approximatePi(meanLogLike)
self.simTemp_tunerSamples = []
# tunePseudoPriors() uses longTNumSample, so it should be full
if self.simTemp_longTNumSample[-1] != -1: # ie it is full
self.simTemp_tunePseudoPriors()
else:
# Propose swap, if there is more than 1 chain.
self._proposeSwapChainsInMcmcmc()
# =====================================
# If it is a writeInterval, write stuff
# =====================================
doWrite = False
if self.simTemp:
if self.simTemp_curTemp == 0:
# Thinning the simTemp chain is awkward. Here is a hack.
# var.mcmc_simTemp_thinning is a list of digit strings, by default ['0']
if var.mcmc_simTemp_thinning:
genString = f"{self.gen + 1}"
for ending in var.mcmc_simTemp_thinning:
if genString.endswith(ending):
doWrite = True
else:
doWrite = True
else:
if ((self.gen + 1) % self.sampleInterval) == 0:
doWrite = True
if doWrite and self.doSteppingStone:
if writeSamples:
ssLikesFile = open(self.ssLikesFileName, 'a')
ssLikesFile.write('%f\n' % self.chains[coldChainNum].curTree.logLike)
ssLikesFile.close()
if doWrite and not self.doSteppingStone:
if writeSamples:
self._writeSample(coldChainNum=coldChainNum)
# Do a simulation
if self.simulate:
# print "about to simulate..."
self._doSimulate(self.chains[coldChainNum].curTree)
# print "...finished simulate."
# Do other stuff.
if hasattr(self, 'hook'):
self.hook(self.chains[coldChainNum].curTree)
if 0 and self.constraints:
print("Mcmc x1c")
print(self.chains[0].verifyIdentityOfTwoTreesInChain())
print("b checking curTree ..")
self.chains[0].curTree.checkSplitKeys()
print("b checking propTree ...")
self.chains[0].propTree.checkSplitKeys()
print("Mcmc xxx")
# Add curTree to treePartitions
if self.treePartitions:
self.treePartitions.getSplitsFromTree(
self.chains[coldChainNum].curTree)
else:
self.treePartitions = TreePartitions(
self.chains[coldChainNum].curTree)
# After _getSplitsFromTree, need to follow, at some point,
# with _finishSplits(). Do that when it is pickled, or at the
# end of the run.
# Checking and debugging constraints
if 0 and self.constraints:
print("Mcmc x1d")
print(self.chains[coldChainNum].verifyIdentityOfTwoTreesInChain())
print("c checking curTree ...")
self.chains[coldChainNum].curTree.checkSplitKeys()
print("c checking propTree ...")
self.chains[coldChainNum].propTree.checkSplitKeys()
# print "c checking that all constraints are present"
#theSplits = [n.br.splitKey for n in self.chains[0].curTree.iterNodesNoRoot()]
# for sk in self.constraints.constraints:
# if sk not in theSplits:
# gm.append("split %i is not present in the curTree." % sk)
# raise P4Error(gm)
print("Mcmc zzz")
# Check that the curTree has all the constraints
if self.constraints:
splitsInCurTree = [
n.br.splitKey for n in self.chains[coldChainNum].curTree.iterInternalsNoRoot()]
for sk in self.constraints.constraints:
if sk not in splitsInCurTree:
gm.append("Programming error.")
gm.append(
"The current tree (the last tree sampled) does not contain constraint")
gm.append(
"%s" % p4.func.getSplitStringFromKey(sk, self.tree.nTax))
raise P4Error(gm)
# Do checkpoints
doCheckPoint = False
if not writeSamples:
doCheckPoint = False
elif self.doSteppingStone:
doCheckPoint = False
else:
if self.checkPointInterval and (gNum + 1) % self.checkPointInterval == 0:
doCheckPoint = True
if doCheckPoint:
# print(f"writing checkpoint at self.gen {self.gen}, gNum {gNum}")
self.checkPoint()
# The stuff below needs to be done in a re-start as well.
# See above "if self.proposals:"
self.startMinusOne = self.gen
# Start the tree partitions over.
self.treePartitions = None
# Zero the proposal counts
for p in self.props.proposals:
p.nProposals = [0] * self.nChains
p.nAcceptances = [0] * self.nChains
p.nTopologyChangeAttempts = [0] * self.nChains
p.nTopologyChanges = [0] * self.nChains
p.nAborts = [0] * self.nChains
# Zero the swap matrix
if self.nChains > 1:
self.swapMatrix = []
for i in range(self.nChains):
self.swapMatrix.append([0] * self.nChains)
if self.simTemp:
fout = open(self.simTempFileName, 'a')
print("-" * 50, file=fout)
print("gen+1 %11i" % (self.gen + 1), file=fout)
self.simTemp_dumpTemps(flob=fout)
# tuning results are written to self.simTempFileName
self.simTemp_tunePseudoPriors_longSample(flob=fout)
fout.close()
self.simTemp_nTempChangeProposals = 0
self.simTemp_nTempChangeAccepts = 0
self.simTemp_tNums = []
#self.simTemp_tunerSamples = []
for tmp in self.simTemp_temps:
tmp.occupancy = 0
tmp.nProposalsUp = 0
tmp.nAcceptedUp = 0
tmp.rValsUp = []
tmp.nProposalsDn = 0
tmp.nAcceptedDn = 0
tmp.rValsDn = []
# Reassuring pips ...
# We want to skip the first gen of every call to run()
if firstGen != self.gen:
if nGensToDo <= 20000:
if (self.gen - firstGen) % 1000 == 0:
if verbose:
deltaTime = self._doTimeCheck(
nGensToDo, firstGen, 1000)
if deltaTime.days:
timeString = "%s days, %s" % (
deltaTime.days, time.strftime("%H:%M:%S", time.gmtime(deltaTime.seconds)))
else:
timeString = time.strftime(
"%H:%M:%S", time.gmtime(deltaTime.seconds))
print("%10i - %s" % (self.gen, timeString))
else:
sys.stdout.write(".")
sys.stdout.flush()
elif (self.gen - firstGen) % 100 == 0:
sys.stdout.write(".")
sys.stdout.flush()
else:
if (self.gen - firstGen) % 50000 == 0:
if verbose:
deltaTime = self._doTimeCheck(
nGensToDo, firstGen, 50000)
if deltaTime.days:
timeString = "%s days, %s" % (
deltaTime.days, time.strftime("%H:%M:%S", time.gmtime(deltaTime.seconds)))
else:
timeString = time.strftime(
"%H:%M:%S", time.gmtime(deltaTime.seconds))
print("%10i - %s" % (self.gen, timeString))
else:
sys.stdout.write(".")
sys.stdout.flush()
elif (self.gen - firstGen) % 1000 == 0:
sys.stdout.write(".")
sys.stdout.flush()
# End of the Main loop. Gens finished. Clean up.
print()
if verbose:
print("Finished %s generations." % nGensToDo)
if writeSamples and not self.doSteppingStone:
# Write "end;" regardless of writeSamples, or else subsequent attempts to remove it will fail.
# No, that does not work for simTemp trialAndError. Why not?
treeFile = open(self.treeFileName, 'a')
treeFile.write('end;\n\n')
treeFile.close()
def _writeSample(self, coldChainNum=0):
likesFile = open(self.likesFileName, 'a')
likesFile.write(
'%11i %f\n' % (self.gen + 1, self.chains[coldChainNum].curTree.logLike))
likesFile.close()
# Check the likelihood every write interval
if 0:
oldLike = self.chains[coldChainNum].curTree.logLike
print("gen+1 %11i %f " % (
self.gen+1,
self.chains[coldChainNum].curTree.logLike), end=' ')
self.chains[coldChainNum].curTree.calcLogLike(verbose=False)
newLike = self.chains[coldChainNum].curTree.logLike
print("%f" % self.chains[coldChainNum].curTree.logLike, end=' ')
likeDiff = math.fabs(oldLike - newLike)
if likeDiff > 1e-14:
print("%f" % likeDiff)
else:
print()
treeFile = open(self.treeFileName, 'a')
treeFile.write(" tree t_%i = [&U] " % (self.gen + 1))
if self.tree.model.parts[0].ndch2: # and therefore all model parts
if self.tree.model.parts[0].ndch2_writeComps:
self.chains[coldChainNum].curTree.writeNewick(treeFile,
withTranslation=1,
translationHash=self.translationHash,
doMcmcCommandComments=True)
else:
self.chains[coldChainNum].curTree.writeNewick(treeFile,
withTranslation=1,
translationHash=self.translationHash,
doMcmcCommandComments=False)
else:
self.chains[coldChainNum].curTree.writeNewick(treeFile,
withTranslation=1,
translationHash=self.translationHash,
doMcmcCommandComments=self.tree.model.isHet)
treeFile.close()
if self.writePrams:
pramsFile = open(self.pramsFileName, 'a')
#pramsFile.write("%12i " % (self.gen + 1))
pramsFile.write("%12i" % (self.gen + 1))
self.chains[coldChainNum].curTree.model.writePramsLine(pramsFile)
pramsFile.close()
if self.writeHypers:
hypersFile = open(self.hypersFileName, 'a')
hypersFile.write("%12i" % (self.gen + 1))
self.chains[coldChainNum].curTree.model.writeHypersLine(hypersFile)
hypersFile.close()
def _proposeSwapChainsInMcmcmc(self):
if self.swapVector:
rTempNum1 = random.randrange(self.nChains - 1)
rTempNum2 = rTempNum1 + 1
chain1 = None
chain2 = None
for ch in self.chains:
if ch.tempNum == rTempNum1:
chain1 = ch
elif ch.tempNum == rTempNum2:
chain2 = ch
assert chain1 and chain2
# Use the upper triangle of swapMatrix for nAttempts
self.swapMatrix[chain1.tempNum][chain2.tempNum] += 1
lnR = (1.0 / (1.0 + (self.chainTemps[chain1.tempNum]))
) * chain2.curTree.logLike
lnR += (1.0 / (1.0 + (self.chainTemps[chain2.tempNum]))
) * chain1.curTree.logLike
lnR -= (1.0 / (1.0 + (self.chainTemps[chain1.tempNum]))
) * chain1.curTree.logLike
lnR -= (1.0 / (1.0 + (self.chainTemps[chain2.tempNum]))
) * chain2.curTree.logLike
if lnR < -100.0:
r = 0.0
elif lnR >= 0.0:
r = 1.0
else:
r = math.exp(lnR)
acceptSwap = 0
if random.random() < r:
acceptSwap = 1
# for continuous temperature tuning with self.swapTuner
if self.swapTuner:
# Index the nAttempts and nSwaps with the lower of the two tempNum's, which would be chain1.tempNum
self.swapTuner.nAttempts[chain1.tempNum] += 1
if acceptSwap:
self.swapTuner.nSwaps[chain1.tempNum] += 1
if var.mcmc_swapTunerDoTuning:
if self.swapTuner.nAttempts[chain1.tempNum] >= var.mcmc_swapTunerSampleSize:
self.swapTuner.tune(chain1.tempNum)
# tune() zeros nAttempts and nSwaps counters
if acceptSwap:
# Use the lower triangle of swapMatrix to keep track of
# nAccepted's
assert chain1.tempNum < chain2.tempNum
self.swapMatrix[chain2.tempNum][chain1.tempNum] += 1
# Do the swap
chain1.tempNum, chain2.tempNum = chain2.tempNum, chain1.tempNum
else: # swap matrix
# Chain swapping stuff was lifted from MrBayes. Thanks again.
chain1, chain2 = random.sample(self.chains, 2)
thisCh1Temp = None
thisCh2Temp = None
# Use the upper triangle of swapMatrix for nProposed's
if chain1.tempNum < chain2.tempNum:
self.swapMatrix[chain1.tempNum][chain2.tempNum] += 1
thisCh1Temp = chain1.tempNum
thisCh2Temp = chain2.tempNum
else:
self.swapMatrix[chain2.tempNum][chain1.tempNum] += 1
thisCh1Temp = chain2.tempNum
thisCh2Temp = chain1.tempNum
lnR = (1.0 / (1.0 + (self.chainTemp * chain1.tempNum))
) * chain2.curTree.logLike
lnR += (1.0 / (1.0 + (self.chainTemp * chain2.tempNum))
) * chain1.curTree.logLike
lnR -= (1.0 / (1.0 + (self.chainTemp * chain1.tempNum))
) * chain1.curTree.logLike
lnR -= (1.0 / (1.0 + (self.chainTemp * chain2.tempNum))
) * chain2.curTree.logLike
if lnR < -100.0:
r = 0.0
elif lnR >= 0.0:
r = 1.0
else:
r = math.exp(lnR)
acceptSwap = 0
if random.random() < r:
acceptSwap = 1
# # for continuous temperature tuning with self.swapTuner
# if self.swapTuner and thisCh1Temp == 0 and thisCh2Temp == 1:
# self.swapTuner.swaps01_nAttempts += 1
# if acceptSwap:
# self.swapTuner.swaps01_nSwaps += 1
# if self.swapTuner.swaps01_nAttempts >= self.swapTuner.sampleSize:
# self.swapTuner.tune(self)
# # tune() zeros nAttempts and nSwaps counters
if acceptSwap:
# Use the lower triangle of swapMatrix to keep track of
# nAccepted's
if chain1.tempNum < chain2.tempNum:
self.swapMatrix[chain2.tempNum][chain1.tempNum] += 1
else:
self.swapMatrix[chain1.tempNum][chain2.tempNum] += 1
# Do the swap
chain1.tempNum, chain2.tempNum = chain2.tempNum, chain1.tempNum
# Find the cold chain, the one where tempNum is 0
coldChainNum = -1
for i in range(len(self.chains)):
if self.chains[i].tempNum == 0:
coldChainNum = i
break
if coldChainNum == -1:
gm.append("Unable to find which chain is the cold chain. Bad.")
raise P4Error(gm)
def _doTimeCheck(self, nGensToDo, firstGen, genInterval):
"""Time check
firstGen is the first generation of this call to Mcmc.run() else
timing fails on restart"""
nowTime = time.time()
diff_secs = nowTime - self.lastTimeCheck
total_secs = (float(nGensToDo - (self.gen - firstGen)) /
float(genInterval)) * float(diff_secs)
deltaTime = datetime.timedelta(seconds=int(round(total_secs)))
self.lastTimeCheck = nowTime
return deltaTime
def _writeSimFileHeader(self, curTree):
simFile = open(self.simFileName, 'a')
simFile.write(" genPlus1")
# If self.simulate contains a 1, do unconstrained log like
if 1 & self.simulate:
for pNum in range(self.simTree.data.nParts):
simFile.write(' uncLike%i' % pNum)
if 2 & self.simulate: # If self.simulate contains a 2, do bigX^2
for pNum in range(self.simTree.model.nParts):
simFile.write(' bigXSq%i' % pNum)
# If self.simulate contains a 4, do meanNCharPerSite
if 4 & self.simulate:
for pNum in range(self.simTree.model.nParts):
simFile.write(' meanNCharsPerSite%i' % pNum)
# If self.simulate contains an 8, do c_m, the compStatFromCharFreqs
if 8 & self.simulate:
for pNum in range(self.simTree.model.nParts):
simFile.write(' c_mSim%i c_mOrig%i' % (pNum, pNum))
# If self.simulate contains a 16, do constant sites count
if 16 & self.simulate:
for pNum in range(self.simTree.model.nParts):
simFile.write(' nConstSites%i' % pNum)
simFile.write('\n')
simFile.close()
def _doSimulate(self, curTree):
curTree.copyToTree(self.simTree)
curTree.model.copyValsTo(self.simTree.model)
self.simTree.simulate()
simFile = open(self.simFileName, 'a')
simFile.write(" %11i" % (self.gen + 1))
# If self.simulate contains a 1, do unconstrained log like
if 1 & self.simulate:
for p in self.simTree.data.parts:
simFile.write(' %f' % pf.getUnconstrainedLogLike(p.cPart))
if 2 & self.simulate: # If self.simulate contains a 2, do bigX^2
if self.blankSeqNums == None:
ret = self.simTree.data.simpleBigXSquared()
for pNum in range(self.simTree.model.nParts):
simFile.write(' %f' % ret[pNum])
else:
# We do not want sims corresponding to blank seqs to contribute to the bigXSq
ret2 = self.simTree.data.compoChiSquaredTest(verbose=0, skipTaxNums=self.blankSeqNums, skipColumnZeros=True)
for pNum in range(self.simTree.model.nParts):
simFile.write(' %f' % ret2[pNum][0])
# check ...
# for i in range(len(ret)):
# if math.fabs(ret[i][0] - ret2[i]) > 0.000001:
# print "The two methods of bigXSquared calculations differed. %f
# and %f" % (ret[i], ret2[i])
# If self.simulate contains a 4, do meanNCharPerSite
if 4 & self.simulate:
ret = self.simTree.data.meanNCharsPerSite()
# ret is a list, one number per part
for pNum in range(self.simTree.model.nParts):
simFile.write(' %f' % ret[pNum])
# If self.simulate contains an 8, do c_m, the compStatFromCharFreqs
if 8 & self.simulate:
ret = self.simTree.compStatFromCharFreqs()
ret2 = curTree.compStatFromCharFreqs()
# ret is a list, one number per part
for pNum in range(self.simTree.model.nParts):
simFile.write(' %f %f' % (ret[pNum], ret2[pNum]))
# print ' compStatFromCharFreqs: %f %f' % (ret[pNum],
# ret2[pNum])
# If self.simulate contains a 16, do constant sites count
if 16 & self.simulate:
ret = self.simTree.data.simpleConstantSitesCount()
# ret is a list, one number per part
for pNum in range(self.simTree.model.nParts):
simFile.write(' %i' % ret[pNum])
simFile.write('\n')
simFile.close()
[docs] def checkPoint(self):
if 0:
for chNum in range(self.nChains):
ch = self.chains[chNum]
print("chain %i ==================" % chNum)
ch.curTree.summarizeModelComponentsNNodes()
#print("Before removing data", end=' ')
#self.chains[0].curTree.calcLogLike(verbose=True, resetEmpiricalComps=False)
# Make a copy of self, but with no cStuff.
# But we don't want to copy data or logger. So detach them.
savedData = self.tree.data
self.tree.data = None
# The logger does not pickle
savedLogger = self.logger
self.logger = None
savedLoggerPrinter = self.loggerPrinter
self.loggerPrinter = None
# gsl_rng state
the_gsl_rng_size = pf.gsl_rng_size(var.gsl_rng) # size of the state
assert self.gsl_rng_state_ndarray.shape[0] == the_gsl_rng_size
pf.gsl_rng_getstate(var.gsl_rng, self.gsl_rng_state_ndarray)
# random (python module) state
self.randomState = random.getstate()
if self.setupPfLogging:
# Get rid of the mcmcTreeCallbacks
for ch in self.chains:
pf.unsetMcmcTreeCallback(ch.curTree.cTree)
pf.unsetMcmcTreeCallback(ch.propTree.cTree)
if self.simulate:
savedSimData = self.simTree.data
self.simTree.data = None
for chNum in range(self.nChains):
ch = self.chains[chNum]
ch.curTree.data = None
ch.curTree.savedLogLike = ch.curTree.logLike
ch.propTree.data = None
theCopy = copy.deepcopy(self)
# Re-attach data and logger to self.
self.tree.data = savedData
self.logger = savedLogger
self.loggerPrinter = savedLoggerPrinter
self.tree.calcLogLike(verbose=False, resetEmpiricalComps=False)
if self.simulate:
self.simTree.data = savedSimData
self.simTree.calcLogLike(verbose=False, resetEmpiricalComps=False)
for chNum in range(self.nChains):
ch = self.chains[chNum]
ch.curTree.data = savedData
#print("After restoring data", end=' ')
ch.curTree.calcLogLike(verbose=False, resetEmpiricalComps=False)
theDiff = math.fabs(ch.curTree.savedLogLike - ch.curTree.logLike)
if theDiff > 0.01:
theMessage = "Mcmc.checkPoint(), chainNum %i. " % chNum
theMessage += "Bad likelihood calculation just before writing the checkpoint. "
theMessage += "Old curTree.logLike %g, new curTree.logLike %g, diff %g" % (ch.curTree.savedLogLike, ch.curTree.logLike, theDiff)
self.logger.info(theMessage)
print()
print(theMessage)
sys.stdout.flush()
ch.propTree.data = savedData
#ch.propTree.dump(node=True)
#ch.propTree.draw()
ch.propTree.calcLogLike(verbose=False, resetEmpiricalComps=False)
if self.setupPfLogging:
for ch in self.chains:
pf.setMcmcTreeCallback(ch.curTree.cTree, self._logFromPfModule)
pf.setMcmcTreeCallback(ch.propTree.cTree, self._logFromPfModule)
# Get rid of c-pointers in the copy
theCopy.tree.deleteCStuff()
theCopy.tree.data = None
if self.simulate:
theCopy.simTree.deleteCStuff()
theCopy.simTree.data = None
#theCopy.treePartitions._finishSplits()
theCopy.likesFile = None
theCopy.treeFile = None
for chNum in range(theCopy.nChains):
ch = theCopy.chains[chNum]
ch.curTree.deleteCStuff()
#ch.curTree.data = None
ch.propTree.deleteCStuff()
#ch.propTree.data = None
# Pickle it.
fName = "mcmc_checkPoint_%i.%i" % (self.runNum, self.gen + 1)
f = open(fName, 'wb')
pickle.dump(theCopy, f, pickle.HIGHEST_PROTOCOL)
f.close()
[docs] def writeProposalProbs(self, makeDict=False):
"""(Another) Pretty-print the proposal probabilities.
See also Mcmc.writeProposalAcceptances().
"""
nProposals = len(self.props.proposals)
if not nProposals:
print("Mcmc.writeProposalProbs(). No proposals (yet?).")
return
nAttained = [0] * nProposals
nAccepted = [0] * nProposals
for i in range(nProposals):
nAttained[i] = self.props.proposals[i].nProposals[0]
nAccepted[i] = self.props.proposals[i].nAcceptances[0]
sumAttained = float(sum(nAttained)) # should be zero or nGen
if not sumAttained:
print("Mcmc.writeProposalProbs(). No proposals have been made.")
print("Possibly, due to it being a checkPoint interval, nProposals have all been set to zero.")
return
# assert int(sumAttained) == self.gen + 1, "sumAttained is %i, should be gen+1, %i." % (
# int(sumAttained), self.gen + 1)
probAttained = []
for i in range(len(nAttained)):
probAttained.append(100.0 * float(nAttained[i]) / sumAttained)
if math.fabs(sum(probAttained) - 100.0 > 1e-13):
raise P4Error("bad sum of attained proposal probs. %s" % sum(probAttained))
if not makeDict:
spacer = ' ' * 4
print("\nProposal probabilities (%)")
# print "There are %i proposals" % len(self.proposals)
print("For %i gens, from gens %i to %i, inclusive." % (
(self.gen - self.startMinusOne), self.startMinusOne + 1, self.gen))
print("%2s %11s %11s %11s %11s %10s %30s %5s" % ('', 'nProposals',
'intended(%)', 'proposed(%)',
'accepted(%)', 'tuning',
'proposal', 'part'))
for i in range(len(self.props.proposals)):
print("%2i" % i, end=' ')
p = self.props.proposals[i]
print(" %7i " % self.props.proposals[i].nProposals[0], end=' ')
print(" %5.1f " % (100.0 * self.props.intended[i]), end=' ')
print(" %5.1f " % probAttained[i], end=' ')
if nAttained[i]:
print(" %5.1f " % (100.0 * float(nAccepted[i]) / float(nAttained[i])), end=' ')
else:
print(" - ", end=' ')
if p.tuning == None:
print(" - ", end=' ')
elif p.tuning[0] < 2.0:
print(" %7.3f " % p.tuning[0], end=' ')
else:
print(" %9.3g " % p.tuning[0], end=' ')
print(" %27s" % p.name, end=' ')
if p.pNum != -1:
print(" %3i " % p.pNum, end=' ')
else:
print(" - ", end=' ')
print()
else:
rd = {}
for i in range(len(self.props.proposals)):
p = self.props.proposals[i]
if p.pNum != -1:
pname = p.name + "_%i" % p.pNum
else:
pname = p.name
rd[pname] = []
rd[pname].append(p.nProposals[0])
rd[pname].append(probAttained[i])
if nAttained[i]:
rd[pname].append((100.0 * float(nAccepted[i]) / float(nAttained[i])))
else:
rd[pname].append(None)
if p.tuning == None:
rd[pname].append(None)
elif p.tuning < 2.0:
rd[pname].append(p.tuning)
else:
rd[pname].append(p.tuning)
return rd