diff --git a/Project.toml b/Project.toml
index fedaaad..be318a5 100644
--- a/Project.toml
+++ b/Project.toml
@@ -11,6 +11,7 @@ DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
FASTX = "c2308a5c-f048-11e8-3e8a-31650f418d12"
Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59"
+LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c"
LogDensityProblemsAD = "996a588d-648d-4e1f-a8f0-a84b347e47b1"
@@ -22,10 +23,11 @@ NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
ParameterHandling = "2412ca09-6db7-441c-8e3a-88d5709968c5"
Phylo = "aea672f4-3940-5932-aa44-993d1c3ff149"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
SimpleUnPack = "ce78b400-467f-4804-87d8-8f486da07d0a"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
[compat]
-MolecularEvolution = "0.2"
+MolecularEvolution = "0.2.3"
julia = "1.10"
diff --git a/docs/src/figures/dnds_plot.svg b/docs/src/figures/dnds_plot.svg
new file mode 100644
index 0000000..7db93e6
--- /dev/null
+++ b/docs/src/figures/dnds_plot.svg
@@ -0,0 +1,644 @@
+
+
diff --git a/docs/src/figures/tagged_simtree_phylo.svg b/docs/src/figures/tagged_simtree_phylo.svg
new file mode 100644
index 0000000..a972c9a
--- /dev/null
+++ b/docs/src/figures/tagged_simtree_phylo.svg
@@ -0,0 +1,1215 @@
+
+
diff --git a/docs/src/fitness_OU.md b/docs/src/fitness_OU.md
new file mode 100644
index 0000000..c41e705
--- /dev/null
+++ b/docs/src/fitness_OU.md
@@ -0,0 +1,61 @@
+# Simulating with time-varying fitness
+
+Uses [this tree](simtree_tagged.svg) as input.
+
+```julia
+#Read in a tree where the Newick string has tags (eg. "{G1}" or "{G2}") on branches.
+#Can be tagged with: https://murrellgroup.github.io/WebWidgets/phylotagger.html
+ts, tags = import_labeled_phylotree_newick("simtree_tagged.tre")
+tree = gettreefromnewick(ts, FelNode)
+
+#First viz at the tree with tags
+for (i,n) in enumerate(getnodelist(tree))
+ (length(n.children) > 0) && (n.name = "N$(i)_"*n.name) #Make internal node names unique (only needed for plotting)
+ n.node_data = Dict("g" => (occursin(tags[2], n.name) ? 1.0 : 0.0))
+end
+pl = plot(get_phylo_tree(tree), showtips = false, marker_z = "g", line_z = "g", markerstrokewidth = 0, colorbar = false, linecolor = :darkrainbow, markercolor = :darkrainbow)
+savefig("tagged_simtree_phylo.svg")
+```
+
+![](figures/tagged_simtree_phylo.svg)
+
+```julia
+#500 codon sites in total, where 100 will undergo a fitness shift:
+S, nsel = 500, 100
+selsites = sort(sample(1:S, nsel, replace = false))
+nucm = CodonMolecularEvolution.demo_nucmat #Using a nuc matrix derived from a flu dataset
+
+#Compute std dev of fitnesses for the target peak dN/dS, aiming for a mean dN/dS of 2.0 at the fitness shift boundary:
+σ = maxdNdS2std(2.0)
+m0fs = randn(20,S) .* σ
+m1fs = copy(m0fs)
+m1fs[:,selsites] .= randn(20,nsel) .* σ
+
+#Plot analytical expectations for background dN/dS, and for peak dN/dS at the host change boundary.
+scatter([HBdNdS(m0fs[:,i], nucm = nucm) for i in 1:S], size = (1200,500), yscale = :log10, label = "HBω0", msw = 0, color = "blue", xlabel = "Site", ylabel = "dN/dS", margins = 1Plots.cm)
+scatter!(selsites, [HBdNdS(m0fs[:,i], m1fs[:,i], nucm = nucm) for i in selsites], size = (1200,500), label = "HBω1", msw = 0, color = "red")
+savefig("dnds_plot.svg")
+```
+
+![](figures/dnds_plot.svg)
+
+```julia
+#Specify two models, m1 and m2, sharing alpha but with different fitnesses for the `selsites` sites:
+using Distributions
+
+alphas = rand(Gamma(10,0.1), S)
+m1 = ShiftingHBSimModel(S, alphas, [PiecewiseOUModel(m0fs[:,i]) for i in 1:S], nucm)
+m2 = ShiftingHBSimModel(S, alphas, [PiecewiseOUModel(m1fs[:,i]) for i in 1:S], nucm)
+
+#Use model m1 when "{G1}" occurs in the sequence name. Models are in [] because we have an array of models per partition, when passing a function:
+mfunc(n) = occursin("{G2}", n.name) ? [m2] : [m1]
+
+#The process should be at equilibrium for whatever the root model is, and this will use the root model:
+internal_message_init!(tree, ShiftingHBSimPartition(m1))
+
+#Simeulate under this process:
+sample_down!(tree, mfunc)
+
+#Write sequences to a .fasta file:
+write_fasta("simu_seqs.fasta", leaf_samples(tree), seq_names = leaf_names(tree))
+```
diff --git a/docs/src/simtree_tagged.tre b/docs/src/simtree_tagged.tre
new file mode 100644
index 0000000..7d28226
--- /dev/null
+++ b/docs/src/simtree_tagged.tre
@@ -0,0 +1 @@
+(((tax191{G1}:0.0876725806761636,tax190{G1}:0.0894607524107736){G1}:0.0036036666634083787,(tax198{G1}:0.025252958193682482,tax193{G1}:0.03016546613009762){G1}:0.057534435998472355){G1}:0.006715581998474342,((tax195{G1}:0.054546694927162916,(tax197{G1}:0.042645415031909606,tax194{G1}:0.04684513626333345){G1}:0.008401926724847584){G1}:0.03256956841111309,((tax192{G1}:0.07556019898355605,(tax196{G1}:0.05891033225204943,(tax189{G1}:0.05390345129831925,(tax188{G1}:0.06147337561398739,tax200{G1}:0.03206787896971761){G1}:0.0060425316294520715){G1}:0.01951839530844825){G1}:0.0121173468320627){G1}:0.010204999026043481,(tax199{G1}:0.04983038898089238,(((tax187{G1}:0.00605246557691153,tax176{G1}:0.09272170119191411){G1}:0.0012070179531218476,((tax175{G1}:0.06094439789197623,tax183{G1}:0.04553879703312431){G1}:0.0010182662793623157,(tax173{G1}:0.05999815475460662,(tax170{G1}:0.048216085854531326,(tax179{G1}:0.020074720760078317,(tax174{G1}:0.006107108423158383,tax171{G1}:0.014792158669170264){G1}:0.023688320695480738){G1}:0.00646722385262868){G1}:0.018395822561943883){G1}:0.008019459408802363){G1}:0.033019091075075335){G1}:0.012395507424137479,(((tax180{G1}:0.013848404760419483,tax169{G1}:0.0369630169585345){G1}:0.022670053329334052,((tax168{G1}:0.04496012425354975,(tax177{G1}:0.021472973238438074,tax166{G1}:0.06263073402087617){G1}:0.0024339037573345066){G1}:0.011158692505368292,((tax143{G2}:0.06302811880607842,(tax159{G2}:0.004924925729958462,(tax154{G2}:0.0008642848579845001,tax147{G2}:0.01290712838119032){G2}:0.011866515397821152){G2}:0.033016616222806085){G2}:0.029989142213964405,(tax163{G2}:0.0064225191509397145,(((tax158{G2}:0.01631955821078443,(tax139{G2}:0.005871165959912681,tax140{G2}:0.002985255157995405){G2}:0.04403375259941184){G2}:0.02253345288390942,(tax156{G2}:0.03970735945508374,(tax149{G2}:0.04422249505169464,(tax136{G2}:0.04664894494688254,tax141{G2}:0.020248571941254175){G2}:0.035147521747436024){G2}:0.007244581394779825){G2}:0.002279922537576455){G2}:0.00166399660059368,((tax151{G2}:0.03999793978873527,tax150{G2}:0.04048223687901386){G2}:0.007266936795155956,(tax160{G2}:0.012743206837900343,(tax152{G2}:0.008853942259116216,(tax132{G2}:0.017024896310785638,(tax111{G2}:0.03439254079209499,(tax112{G2}:0.024031067680197544,(tax125{G2}:0.006586436970727454,(tax103{G2}:0.010699233745566934,(tax105{G2}:0.000685467702129,tax102{G2}:0.00817592367180866){G2}:0.0029281341987468564){G2}:0.0385102965977885){G2}:0.0011200942097729834){G2}:0.008836558438275677){G2}:0.02896094421324713){G2}:0.10267088431053402){G2}:0.03442976526435304){G2}:0.0033528262448352035){G2}:0.002995191269348195){G2}:0.008590680413237012){G2}:0.02253345487706806){G2}:0.11043701695983364){G1}:0.005265173856082336){G1}:0.03033683707932,(((tax165{G1}:0.09067522652901837,(tax172{G1}:0.02381994587783149,tax181{G1}:0.004624412781357271){G1}:0.009226476572802863){G1}:0.026621416634713468,(tax186{G1}:0.018251778024604783,(tax185{G1}:0.018673986950570747,tax184{G1}:0.02047706217606258){G1}:0.002394163315112266){G1}:0.012047810545505994){G1}:0.00860806298143007,(tax182{G1}:0.04185317378802211,(tax178{G1}:0.025466558102832403,(tax167{G1}:0.015437885717647455,(((tax153{G1}:0.06476661664614816,(tax162{G1}:0.001976926210361459,(tax138{G1}:0.07540623292712258,tax146{G1}:0.04898847283987697){G1}:0.007199883112776702){G1}:0.016580333784091916){G1}:0.0034770067150903714,(tax164{G1}:0.0019148358003295155,(tax137{G1}:0.09610433509857566,(tax142{G1}:0.037472154209657625,(tax148{G1}:0.022076483502409334,(tax144{G1}:0.0248953412408692,tax145{G1}:0.02481586695905884){G1}:0.0026822584277028084){G1}:0.006370364076504646){G1}:0.03194123100883159){G1}:0.013667102694273252){G1}:0.0006382799316124873){G1}:0.03280798557171663,(tax161{G1}:0.0070682515465140565,(tax157{G1}:0.028277958140741136,(tax155{G1}:0.003951370805649514,(((tax126{G2}:0.01512745094011466,(tax122{G2}:0.02534487656712116,tax123{G2}:0.022590591620297425){G2}:0.0008344777032263679){G2}:0.05630755773955415,((tax116{G2}:0.06586934229235016,tax133{G2}:0.01648099469586556){G2}:0.017362650017230596,(((tax109{G2}:0.047399009811782994,tax124{G2}:0.023524413027162735){G2}:0.012807778119859932,(tax117{G2}:0.04427715315185631,tax127{G2}:0.02388452172834673){G2}:0.0017434668342086266){G2}:0.0030324447657491082,(tax119{G2}:0.044326820211354125,(tax131{G2}:0.01410171880999806,(tax104{G2}:0.06610279981490084,((tax129{G2}:0.00394639935842983,tax108{G2}:0.04320177060864035){G2}:0.003958815908072366,(tax114{G2}:0.0050640121885027,((tax99{G2}:0.030610041449712674,tax106{G2}:0.009877173507022387){G2}:0.009651166853159707,((tax88{G2}:0.009050146142266036,tax90{G2}:0.005357069973789267){G2}:0.06418296441989188,(tax72{G2}:0.11928351314710331,((tax73{G2}:0.019361942183140364,tax75{G2}:0.014106700942076026){G2}:0.058937669086599816,((tax86{G2}:0.04038786524823333,tax83{G2}:0.04255105737646524){G2}:0.0010306844249740013,((tax50{G2}:0.05626536813501043,((tax55{G2}:0.031444526461345046,tax53{G2}:0.033560533112315286){G2}:0.020201385860911376,(tax37{G2}:0.06992749047048774,tax44{G2}:0.04840236819461594){G2}:0.00974554718942256){G2}:0.0013411332955716043){G2}:0.011531232629155525,(tax70{G2}:0.006367880889844938,((tax35{G2}:0.07372239259508534,(tax66{G2}:0.01015285170050917,(tax32{G2}:0.004838002015680466,tax33{G2}:0.001351065293841831){G2}:0.0754286118347052){G2}:0.0011995633454741595){G2}:0.01825675666135404,(tax61{G2}:0.031193684543797746,((tax40{G2}:0.032686317398696124,tax39{G2}:0.03741752265888065){G2}:0.02511140875413781,((tax51{G2}:0.0006482146669855776,tax46{G2}:0.0029107515452417546){G2}:0.044808634748058844,(tax56{G2}:0.021927473629553433,(tax34{G2}:0.042041928421269185,tax52{G2}:0.008635389657038797){G2}:0.016597725366462802){G2}:0.02003001486529744){G2}:0.0006730502749348101){G2}:0.0013212652385009545){G2}:0.013955203709092267){G2}:0.005379417198292695){G2}:0.0007326552510388004){G2}:0.12551976056386938){G2}:0.01308345928810787){G2}:0.006608785811363131){G2}:0.0205267247845345){G2}:0.08664688087057545){G2}:0.0034074666771270537){G2}:0.028588420939849767){G2}:0.005453925612809456){G2}:0.00722222470011554){G2}:0.002106070665884691){G2}:0.0022103809036173763){G2}:0.033828743369384386){G2}:0.003762612973023957){G2}:0.011719975943519175,(((tax135{G2}:0.0005761901030815657,(tax110{G2}:0.014141473322690912,tax107{G2}:0.021331419227653636){G2}:0.049676436823741926){G2}:0.0008071626190684138,((tax113{G2}:0.010085802898481941,(tax98{G2}:0.031176294406067465,tax100{G2}:0.020263477178334148){G2}:0.023161812872482927){G2}:0.03499354786196447,(tax128{G2}:0.004269264497034189,(tax118{G2}:0.024778622198206576,(tax115{G2}:0.016130816021713366,tax101{G2}:0.04707862485403731){G2}:0.01090289315772599){G2}:0.0009611429436207396){G2}:0.01614073838139694){G2}:0.012388066970050425){G2}:0.009137062156414862,(tax134{G1}:0.001410670240155791,((tax130{G1}:0.002863559223434444,((tax120{G1}:0.002895851046563675,tax97{G1}:0.06242461495498733){G1}:0.018517529607584054,((tax94{G1}:0.04062876618398045,(tax93{G1}:0.04087464018063941,(tax84{G1}:0.012867391369594167,(tax85{G1}:0.0037079768850811017,tax74{G1}:0.02564786547837911){G1}:0.008180885859363152){G1}:0.04519854610854034){G1}:0.004559839557940254){G1}:0.04200961845139506,((tax89{G1}:0.023812499268666836,(tax78{G1}:0.0023469781827387052,tax77{G1}:0.0041053484202984505){G1}:0.04500979604104831){G1}:0.04302044196050195,((tax95{G1}:0.029124856787997415,tax80{G1}:0.06119772126319971){G1}:0.01190873276637045,((tax91{G1}:0.032557155445312826,tax76{G1}:0.05902708482286135){G1}:0.008853934227170651,(tax81{G1}:0.04070576252869059,tax92{G1}:0.01926010456019834){G1}:0.019441405412938432){G1}:0.012144673305076462){G1}:0.013095874015112018){G1}:0.02442344574515401){G1}:0.08921243957295114){G1}:0.007142756792580704){G1}:0.008829097679061156,(tax121{G2}:0.014774781177450664,(tax96{G1}:0.013691931863796266,(((tax69{G2}:0.032264093685494315,tax71{G2}:0.01407440129503382){G2}:0.010021215207215758,((tax38{G2}:0.07768866895527832,(tax59{G2}:0.03007607272804251,(tax65{G2}:0.011195944187118974,(tax45{G2}:0.019404170260284494,tax36{G2}:0.041746390093208456){G2}:0.016456154539289003){G2}:0.011335027207775628){G2}:0.012904655646124256){G2}:0.0004669125681408879,((tax58{G2}:0.03271363496227432,tax63{G2}:0.021090507233771053){G2}:0.004614472077992055,((tax49{G2}:0.013227517325014258,tax54{G2}:0.009986450711016556){G2}:0.02697409391356988,(tax41{G2}:0.025704997006058716,tax62{G2}:0.003308118027352456){G2}:0.01895960156374354){G2}:0.00520556363817697){G2}:0.013354184394797531){G2}:0.03949873663232511){G2}:0.08814944527383035,(tax82{G1}:0.03711451491374063,(tax79{G1}:0.04063125764334151,(tax87{G1}:0.008990539400523329,(((tax67{G1}:0.0001639165953327414,tax43{G1}:0.039866335285394974){G1}:0.014966012568011966,(tax68{G1}:0.00017385178199190687,(tax64{G1}:0.012425316025450092,(tax48{G1}:0.013788807213186071,tax47{G1}:0.014576101052456136){G1}:0.021068153428553657){G1}:0.013257316749151744){G1}:0.0029877401718464897){G1}:0.01106432294943367,(tax60{G1}:0.02893859673216692,(tax42{G1}:0.02490528669948449,(tax57{G1}:0.003459619742458121,((tax30{G1}:0.014645625890914893,tax31{G1}:0.0033702088940325546){G1}:0.003703007475892835,(tax29{G1}:0.039252857831692634,(((tax15{G1}:0.01983878171613616,tax16{G1}:0.017986037633735248){G1}:0.03525928722320944,(tax5{G1}:0.027761376380018053,(tax1{G1}:0.02782098728356268,tax11{G1}:0.0030324431640132944){G1}:0.010510477048623698){G1}:0.055820789241652714){G1}:0.03767331148992319,((tax24{G1}:0.007175047458096855,tax23{G1}:0.011235689593367536){G1}:0.06351984936512417,(((tax7{G1}:0.07532677083945756,tax18{G1}:0.046291321503111875){G1}:0.007048377205223803,((tax26{G1}:0.023422580363221448,tax28{G1}:0.007664303828376792){G1}:0.012320999489054891,(tax12{G1}:0.04030342918148052,tax25{G1}:0.010483165760666734){G1}:0.026395406999833324){G1}:0.0005687391979881872){G1}:0.028111554584236965,((tax19{G1}:0.05052581164061923,(tax27{G1}:0.0023767815598638156,(tax6{G1}:0.052376073350481764,tax2{G1}:0.06500255878737643){G1}:0.006012735920981375){G1}:0.02677539270541881){G1}:0.004152535117488408,((tax14{G1}:0.038199846424995014,(tax22{G1}:0.016508314844179722,(tax13{G1}:0.00945744491118101,tax3{G1}:0.03904921577462191){G1}:0.02687723385841604){G1}:0.002590370072479059){G1}:0.027955091353178634,(tax9{G1}:0.06089721615838432,((tax4{G1}:0.05589281221412982,tax20{G1}:0.01405205928231837){G1}:0.01387572818399487,(tax10{G1}:0.05158630023548844,(tax21{G1}:0.013761482957405551,(tax17{G1}:0.022059098678221492,tax8{G1}:0.04853646685896379){G1}:0.0014181210151387098){G1}:0.005687386318420702){G1}:0.006318210150167166){G1}:0.0018850318609619052){G1}:0.02002504347665198){G1}:0.0032708653946059425){G1}:0.02246390781058619){G1}:0.004502717400081152){G1}:0.0013932860029520725){G1}:0.011767166200922805){G1}:0.007073216714774059){G1}:0.051486961702847){G1}:0.010140432079996714){G1}:0.023750411533181325){G1}:0.01765324667827937){G1}:0.10272056344940823){G1}:0.0172359917929785){G1}:0.0015944532923444857){G1}:0.0028014708861646745){G1}:0.018045636509271913){G1}:0.13405332669329278){G1}:0.022501175901579677){G1}:0.012435250833861461){G1}:0.011834226521399056){G1}:0.033818800966523836){G1}:0.06371357523230421){G1}:0.02570000743694953){G1}:0.01465804764681862){G1}:0.05150432888672839){G1}:0.037146787704057695){G1}:0.04446588901007182){G1}:0.02381249496749753){G1}:0.004147566559762147){G1}:0.016098515785999){G1}:0.031186215545207105){G1}:0.09766150222274297){G1}:0.030565323611596966){G1}:0.002587886196604572){G1}:0.006439903107940489){G1}:0;
\ No newline at end of file
diff --git a/src/CodonMolecularEvolution.jl b/src/CodonMolecularEvolution.jl
index 7f39416..6a75e05 100644
--- a/src/CodonMolecularEvolution.jl
+++ b/src/CodonMolecularEvolution.jl
@@ -1,6 +1,6 @@
module CodonMolecularEvolution
-using FASTX, MolecularEvolution, Measures, Plots, StatsBase, Distributions, DataFrames, CSV, NLopt, ParameterHandling, LinearAlgebra, Phylo
+using FASTX, MolecularEvolution, Measures, Plots, StatsBase, Distributions, DataFrames, CSV, NLopt, ParameterHandling, LinearAlgebra, Phylo, LaTeXStrings, Random
using NNlib, Distributions, Zygote, AdvancedHMC, LogDensityProblems, SimpleUnPack, AbstractMCMC, LogDensityProblemsAD, Interpolations, MCMCChains
abstract type difFUBARGrid end
@@ -14,6 +14,7 @@ include("FUBAR/FUBAR.jl")
include("smoothFUBAR/smoothFUBAR.jl")
include("simulations/alphabeta/alphabeta.jl")
+include("simulations/ou_hb.jl")
export
difFUBARBaseline,
@@ -21,6 +22,15 @@ export
difFUBARTreesurgery,
difFUBARTreesurgeryAndParallel,
FUBAR,
- smoothFUBAR
-
+ smoothFUBAR,
+ dNdS,
+ HBdNdS,
+ std2maxdNdS,
+ maxdNdS2std,
+ HB98AA_matrix,
+ ShiftingHBSimModel,
+ ShiftingHBSimPartition,
+ PiecewiseOUModel,
+ shiftingHBviz,
+ HBviz
end
diff --git a/src/FUBAR/FUBAR.jl b/src/FUBAR/FUBAR.jl
index ab7ce88..3ba6b3c 100644
--- a/src/FUBAR/FUBAR.jl
+++ b/src/FUBAR/FUBAR.jl
@@ -183,6 +183,7 @@ function alternating_maximize(f, a_bounds, b_bounds; final_tol = 1e-20, max_iter
return a,b,-m
end
+#Generalize this to work with any grid dimensions!
function interpolating_LRS(grid)
itp = interpolate(grid, BSpline(Cubic(Line(OnGrid()))))
@@ -207,7 +208,7 @@ end
z = @. itp(x', y)
contour(x, y, z, levels=30, color=:turbo, fill=true, linewidth = 0, colorbar = false, size = (400,400))
=#
-function fifeFUBAR(f::FUBARgrid, outpath; verbosity=1, exports=true)
+function FIFE(f::FUBARgrid, outpath; verbosity=1, exports=true)
exports && init_path(outpath)
LLmatrix = reshape(f.LL_matrix, length(f.grid_values),length(f.grid_values),:)
#Note: dim1 = beta, dim2 = alpha, so we transpose going in:
@@ -221,4 +222,4 @@ function fifeFUBAR(f::FUBARgrid, outpath; verbosity=1, exports=true)
return df_results
end
-export fifeFUBAR
+export FIFE
diff --git a/src/simulations/alphabeta/alphabeta.jl b/src/simulations/alphabeta/alphabeta.jl
index c8a830f..b5c7be0 100644
--- a/src/simulations/alphabeta/alphabeta.jl
+++ b/src/simulations/alphabeta/alphabeta.jl
@@ -19,17 +19,6 @@ const demo_nucmat = [ -0.256236 0.0697056 0.152411 0.034119;
0.152411 0.0596187 -0.251381 0.0393506;
0.034119 0.144795 0.0393506 -0.218264]
-#Should maybe be in MolecularEvolution.jl
-function standard_tree_sim(ntaxa)
- n(t) = (10*ntaxa)/(1+exp(t-10))
- return sim_tree(ntaxa,n,ntaxa/5, mutation_rate = 0.05)
-end
-function ladder_tree_sim(ntaxa)
- n(t) = ntaxa/10
- return sim_tree(ntaxa,n,1.0, mutation_rate = 1/sqrt(ntaxa))
-end
-
-export standard_tree_sim, ladder_tree_sim
"""
sim_alphabeta_seqs(alphavec::Vector{Float64}, betavec::Vector{Float64}, singletree, nucmat::Array{Float64,2}, f3x4::Array{Float64,2};
diff --git a/src/simulations/ou_hb.jl b/src/simulations/ou_hb.jl
new file mode 100644
index 0000000..0a56720
--- /dev/null
+++ b/src/simulations/ou_hb.jl
@@ -0,0 +1,704 @@
+#Halpern and Bruno model where amino acid fitnesses evolve over time using a piecewise constant approximation to an OU process.
+#Authors: Hassan Sadiq and Ben Murrell
+
+#To do:
+#- Check that an alternate genetic code will thread through all of this.
+#- Introduce a jump model with an additional class of jumps to independent draws from the equilibrium fits, and make it easy to use this instead
+#- Allow viz functions to have both kids of offsets (include the AA offsets in the fitness plot, but not the codon offsets)
+
+abstract type CodonSimulationPartition <: Partition end #Must have .sites::Int64, .codons::Vector{Int64}, and .code::GeneticCode
+
+#Does nothing because the `forward!` function implicitly samples.
+function MolecularEvolution.sample_partition!(p::CodonSimulationPartition)
+end
+
+function MolecularEvolution.partition2obs(part::CodonSimulationPartition)
+ return join([part.code.sense_codons[part.codons[i]] for i in 1:part.sites])
+end
+
+const d_code = MolecularEvolution.universal_code
+
+"""
+ HB_fixation_rate(from_codon, to_codon)
+
+ Returns the fixation rate of a mutation from `from_codon` to `to_codon` under the HB98 model.
+"""
+function HB_fixation_rate(from_codon, to_codon)
+ diff = to_codon - from_codon
+ if abs(diff) < 0.001
+ f_ab = 1/12 * (12 + diff*(6 + diff))
+ else
+ f_ab = diff/(1-exp(-diff))
+ end
+end
+
+#=
+#Confirming local approx
+t0(diff) = diff/(1-exp(-diff))
+t1(diff) = 1/12 * (12 + diff*(6 + diff))
+r = -0.01:0.000001:0.01
+plot(r, t0.(r) .- t1.(r), label = "1/12 * (12 + diff*(6 + diff))")
+=#
+
+"""
+ HB98AA_matrix(alpha, nuc_matrix, AA_fitness; genetic_code = MolecularEvolution.universal_code)
+
+Returns the rate matrix for a codon model using the HB98 model where each AA has a different fitness. `alpha` is the synonymous rate, `nuc_matrix` is the symmetric nucleotide substitution matrix,
+and `AA_fitness` is the fitness of each amino acid.
+"""
+function HB98AA_matrix(alpha, nuc_matrix, AA_fitness; genetic_code = MolecularEvolution.universal_code)
+ codon_matrix = zeros(length(genetic_code.sense_codons), length(genetic_code.sense_codons))
+ for p in genetic_code.syn_positions
+ codon_matrix[p[1][1], p[1][2]] = alpha * nuc_matrix[p[2][2], p[2][3]]
+ end
+ for p in genetic_code.nonsyn_positions
+ f_ab = HB_fixation_rate(AA_fitness[genetic_code.codon2AA_pos[p[1][1]]], AA_fitness[genetic_code.codon2AA_pos[p[1][2]]])
+ codon_matrix[p[1][1], p[1][2]] = alpha * nuc_matrix[p[2][2], p[2][3]] * f_ab
+ end
+ for i = 1:length(genetic_code.sense_codons)
+ codon_matrix[i, i] = -sum(codon_matrix[i, :])
+ end
+ return codon_matrix
+end
+
+#Assumption: nuc matrix is symmetric
+function HB98_eqfreqs(fitnesses) #Result from HB98
+ v = exp.(fitnesses .- maximum(fitnesses))
+ return v ./ sum(v)
+end
+HB98AA_eqfreqs(AA_fitness; genetic_code = MolecularEvolution.universal_code) = HB98_eqfreqs(AA_fitness[genetic_code.codon2AA_pos])
+function HB98AA_expected_subs_per_site(nuc_matrix, AA_fitness; genetic_code = MolecularEvolution.universal_code)
+ eqfreqs = HB98AA_eqfreqs(AA_fitness, genetic_code = genetic_code)
+ Q = HB98AA_matrix(1.0, nuc_matrix, AA_fitness, genetic_code = genetic_code)
+ return expected_subs_per_site(Q, eqfreqs)
+end
+HB98AA_neutral_scale_nuc_matrix(nucm; genetic_code = MolecularEvolution.universal_code) = nucm ./= HB98AA_expected_subs_per_site(nucm, zeros(20), genetic_code = genetic_code)
+#/Assumption
+
+"""
+ PiecewiseOUModel(event_rate::Float64, eq_std::Float64, mixing::Float64; delta_t = 1.0)
+ PiecewiseOUModel(offsets::Vector{Float64})
+ PiecewiseOUModel(event_rate::Float64, eq_std::Float64, mixing::Float64, mu::Union{Float64,Vector{Float64}}, offsets::Union{Float64,Vector{Float64}}, codon_offsets::Union{Float64,Vector{Float64}})
+
+A piecewise constant approximation to an OU process, intended to simulate fitnesses evolving over phylogenies.
+The equilibrium standard deviation is directly parameterized (`eq_std`), as is the rate at which the process mixes to equilibrium (`mixing`).
+`event_rate` controls how often the fitness changes occur, where the mixing rate is scaled to compensate for the increased rate of change to achieve
+approximately the same amount of change per unit time even as the `event_rate` changes. A very high `event_rate` will behave more like continuous diffusion,
+but will be more computationally expensive to sample from. `mu` can also be set to control the mean fitnesses.
+The model also permits `offsets`, which are added to the fitnesses as they are passed into the model. For a single process, these are confounded with the mean `mu`
+but if the offsets change (eg. from one branch to another) the effective fitnesses will immidiately change, whereas if `mu` changes the fitnesses will drift towards `mu`.
+"""
+mutable struct PiecewiseOUModel{M<:Union{Float64,AbstractVector{Float64}},O<:Union{Float64,AbstractVector{Float64}},C<:Union{Float64,AbstractVector{Float64}}}
+ mu::M #OU mean. Scalar (in which case it is the same for all AAs) or vector (one per AA)
+ delta_t::Float64 #Scales the size of the fitness jumps
+ var::Float64 #OU instantaneous diffusion variance
+ reversion_rate::Float64 #Rate of reversion to the mean
+ event_rate::Float64 #Rate of fitness jumps
+ offsets::O #These do not change, thus allowing preferred codons. Incorporated right at the end.
+ eq_std::Float64 #Standard deviation of the equilibrium distribution
+ codon_offsets::C #These do not change, thus allowing preferred codons.
+ function PiecewiseOUModel(event_rate::Float64, eq_std::Float64, mixing::Float64; delta_t = 1.0)
+ new{Float64,Float64,Float64}(0.0, delta_t, 2*(mixing/event_rate)*(eq_std^2), mixing/event_rate, event_rate, 0.0, eq_std, 0.0)
+ end
+ #Need to test this constructor:
+ function PiecewiseOUModel(event_rate::Float64, eq_std::Float64, mixing::Float64, mu::M, offsets::O, codon_offsets::C) where {M,O,C}
+ new{M,O,C}(mu, 1.0, 2*(mixing/event_rate)*(eq_std^2), mixing/event_rate, event_rate, offsets, eq_std, codon_offsets)
+ end
+ function PiecewiseOUModel(offsets::Vector{Float64}) #Constructor for static model, using just offsets for fitnesses
+ new{Float64,Vector{Float64}, Float64}(0.0, 1.0, 1e-15, 1.0, 0.0, offsets, 1e-15, 0.0)
+ end
+end
+
+"""
+ jump(x_source, m::PiecewiseOUModel)
+
+Evolves values over time using a piecewise constant approximation to an OU process, where this function computes the new distribution for a single discrete jump.
+`x_source` is the vector of fitnesses, and m is the PiecewiseOUModel.
+
+"""
+function jump(x_source, x_rand, m::PiecewiseOUModel)
+ a = (m.reversion_rate * m.delta_t)
+ return x_rand .* (m.eq_std * sqrt(1 - exp(-2*a))) .+ m.mu .+ ((x_source .- m.mu) .* exp(-a))
+end
+jump(x_source::Float64, m::PiecewiseOUModel) = jump(x_source, randn(), m)
+jump(x_source::Vector{Float64}, m::PiecewiseOUModel) = jump(x_source, randn(length(x_source)), m)
+
+#=
+m = PiecewiseOUModel(100.0, 10.0, 10.0)
+CodonMolecularEvolution.jump(zeros(20), m)
+=#
+
+"""
+ HB98AA_row(current_codon, alpha, nuc_matrix, AA_fitness; genetic_code=MolecularEvolution.universal_code)
+
+Returns the rate row for a codon model using the HB98 model where each AA has a different fitness. `current_codon` is the current codon, `alpha` is the synonymous rate,
+`nuc_matrix` is the symmetric nucleotide substitution matrix, and `AA_fitness` is the fitness of each amino acid.
+"""
+function HB98_row(current_codon, alpha, nuc_matrix, fitness; genetic_code=MolecularEvolution.universal_code)
+ row = zeros(genetic_code.num_sense)
+ codon_aa_i = fitness[current_codon]
+ for j in 1:genetic_code.num_sense
+ if genetic_code.codon_to_nuc_map[current_codon,j] != (-1,-1,-1)
+ c2n_map = genetic_code.codon_to_nuc_map[current_codon,j]
+ f_ab = HB_fixation_rate(codon_aa_i,fitness[j])
+ row[j] = alpha * nuc_matrix[c2n_map[2],c2n_map[3]] * f_ab
+ end
+ end
+ return row
+end
+
+HB98AA_row(current_codon, alpha, nuc_matrix, AA_fitness, codon_fitness_offsets; genetic_code=MolecularEvolution.universal_code) = HB98_row(current_codon, alpha, nuc_matrix, AA_fitness[genetic_code.codon2AA_pos] .+ codon_fitness_offsets, genetic_code = genetic_code)
+
+#= Check the row matches the matrix:
+fs = randn(20)
+c = rand(1:61)
+r1 = CodonMolecularEvolution.HB98AA_row(c, 1.23, CodonMolecularEvolution.demo_nucmat, fs, 0.0)
+q1 = CodonMolecularEvolution.HB98AA_matrix(1.23, CodonMolecularEvolution.demo_nucmat, fs)
+q1[c,c] = 0
+@assert isapprox(sum(q1[c,:] .- r1), 0.0)
+=#
+
+"""
+ jumpy_HB_codon_evolve(fitnesses, codon, ou_model, nuc_matrix, alpha, time;
+ genetic_code = MolecularEvolution.universal_code, push_into = nothing)
+
+Evolves fitnesses and codons over time using the HB98 model. `fitnesses` is the vector of fitnesses, `codon` is the current codon, `ou_model` is the OU model,
+`nuc_matrix` is the symmetric nucleotide substitution matrix, `alpha` is the synonymous rate, and `time` is the total time to evolve over.
+"""
+function jumpy_HB_codon_evolve(fitnesses, codon, scaled_fitness_model, nuc_matrix, alpha, time;
+ genetic_code = MolecularEvolution.universal_code, push_into = nothing)
+ codon_jumps = 0
+ fitness_jumps = 0
+ current_fits = copy(fitnesses)
+ current_codon = codon
+ t = 0.0
+ next_event = 0.0
+ while t+next_event < time
+ HBrow = HB98AA_row(current_codon, alpha, nuc_matrix, current_fits .+ scaled_fitness_model.offsets, scaled_fitness_model.codon_offsets, genetic_code=genetic_code)
+ sum_HBrow = sum(HBrow)
+ rOU,rHB = (scaled_fitness_model.event_rate,sum_HBrow)
+ total_rate = rOU+rHB
+ next_event = randexp()/total_rate
+ t = t+next_event
+ if t < time
+ event_index = sample(1:2,Weights([rOU,rHB]))
+ if event_index == 1 # Fitness jump event
+ fitness_jumps += 1
+ current_fits = jump(current_fits, scaled_fitness_model)
+ else # Codon substitution event
+ codon_jumps += 1
+ current_codon = sample(1:length(HBrow),Weights(HBrow))
+ end
+ end
+ if !isnothing(push_into)
+ push!(push_into,(t,current_codon,copy(current_fits)))
+ end
+ end
+ return current_fits, current_codon, codon_jumps, fitness_jumps
+end
+
+
+"""
+ ShiftingHBSimModel(sites, alphas, ou_params, nuc_matrix; rescale = true)
+
+A model for simulating fitnesses evolving over phylogenies using the HB98 model. `sites` is the number of sites, `alphas` is a vector of synonymous rates (one per site),
+`ou_params` is a vector of `PiecewiseOUModel`s (one per site), and `nuc_matrix` is the symmetric nucleotide substitution matrix (shared across sites).
+If 'rescale' is true, then the nuc matrix is scaled so that, when `alpha=1` and the fitnesses`=0`, the HB model expects one substitution per site per unit time.
+"""
+mutable struct ShiftingHBSimModel <: MolecularEvolution.SimulationModel
+ sites::Int64
+ alphas::Vector{Float64}
+ ou_params::Vector{PiecewiseOUModel}
+ nuc_matrix::Matrix{Float64}
+ code::MolecularEvolution.GeneticCode
+ function ShiftingHBSimModel(s,a,oup,n; rescale = true, code = MolecularEvolution.universal_code)
+ new(s,a,oup,rescale ? HB98AA_neutral_scale_nuc_matrix(n, genetic_code = code) : n, code)
+ end
+end
+
+ShiftingHBSimModel(nuc_matrix::Matrix{Float64}, ou_params::Vector{PiecewiseOUModel{A,B,C}}; alpha = ones(length(ou_params)), rescale = true, code = MolecularEvolution.universal_code) where {A,B,C} = ShiftingHBSimModel(length(ou_params), alpha, ou_params, nuc_matrix, rescale = rescale, code = code)
+
+
+"""
+ ShiftingHBSimPartition(model::ShiftingHBSimModel; burnin_time = 100.0, code = MolecularEvolution.universal_code)
+
+Constructs a partition that tracks evolving fitnesses and codons. Only useable for sampling (not likelihood calculations).
+"""
+mutable struct ShiftingHBSimPartition <: CodonSimulationPartition
+ sites::Int64
+ fitnesses::Matrix{Float64}
+ codons::Vector{Int64}
+ code::MolecularEvolution.GeneticCode
+ function ShiftingHBSimPartition(sites; code = MolecularEvolution.universal_code)
+ new(sites,zeros(length(code.amino_acids),sites),ones(Int64,sites), code)
+ end
+ function ShiftingHBSimPartition(model::ShiftingHBSimModel; burnin_time = 100.0)
+ fits = zeros(length(model.code.amino_acids),length(model.ou_params))
+ for (i,m) in enumerate(model.ou_params)
+ fits[:,i] .= randn(length(model.code.amino_acids)) .* m.eq_std .+ m.mu
+ end
+ #Starting codons ignore codon_offsets. Should be ok because burn-in, but should adjust.
+ codons = [sample(1:61, Weights(HB98AA_eqfreqs(fits[:,i] .+ model.ou_params[i].codon_offsets))) for i in 1:length(model.ou_params)]
+ for (i,m) in enumerate(model.ou_params)
+ f, c, _, _ = jumpy_HB_codon_evolve(fits[:,i], codons[i], m, model.nuc_matrix, model.alphas[i], burnin_time, genetic_code = model.code)
+ fits[:,i] .= f
+ codons[i] = c
+ end
+ new(length(model.ou_params), fits, codons, model.code)
+ end
+end
+
+
+function MolecularEvolution.forward!(dest::ShiftingHBSimPartition,
+ source::ShiftingHBSimPartition,
+ model::ShiftingHBSimModel,
+ node::FelNode)
+ for site in 1:model.sites
+ fitnesses = source.fitnesses[:,site]
+ codon = source.codons[site]
+ ou_model = model.ou_params[site]
+ alpha = model.alphas[site]
+ fitnesses,codon,_,_ = jumpy_HB_codon_evolve(fitnesses,
+ codon,
+ ou_model,
+ model.nuc_matrix,
+ alpha,
+ node.branchlength,
+ genetic_code = model.code)
+ dest.fitnesses[:,site] .= fitnesses
+ dest.codons[site] = codon
+ end
+end
+
+"""
+ dNdS(q1, q0, p; code = MolecularEvolution.universal_code)
+ dNdS(q1, q0; code = MolecularEvolution.universal_code)
+
+Returns an analytic expectation of the dN/dS ratio for a codon model. `q1` is the rate matrix where selection is active (eg. a Halpern and Bruno model with a set of fitnesses),
+and `q0` is the corresponding rate matrix where selection is inactive (eg. a Halpern and Bruno model with all fitnesses equal).
+`p` is the frequency distribution over codons that the dN/dS ratio is computed against. If not provided, this is computed as the equilibrium from `q1`.
+If only a vector of fitnesses are provided, then the `q1`, `q0`, and `p` are computed assuming a Halpern and Bruno model.
+```
+fs = randn(20)
+nucm = CodonMolecularEvolution.demo_nucmat
+q1 = HB98AA_matrix(1.0, nucm, fs)
+q0 = HB98AA_matrix(1.0, nucm, zeros(20))
+dNdS(q1, q0)
+```
+"""
+function dNdS(q1, q0, p::Vector{Float64}; code = d_code)
+ nsi = [CartesianIndex(i[1][1], i[1][2]) for i in code.nonsyn_positions]
+ si = [CartesianIndex(i[1][1], i[1][2]) for i in code.syn_positions]
+ dN_numerator = sum((q1 .* p)[nsi])
+ dN_denominator = sum((q0 .* p)[nsi])
+ dS_numerator = sum((q1 .* p)[si])
+ dS_denominator = sum((q0 .* p)[si])
+ dn = (dN_numerator/dN_denominator)
+ ds = (dS_numerator/dS_denominator)
+ return dn/ds
+end
+
+dNdS(q1, q0; code = MolecularEvolution.universal_code) = dNdS(q1, q0, exp(q1 * 100)[1,:], code = code)
+
+"""
+ HBdNdS(fs::Vector{Float64}; code = MolecularEvolution.universal_code, nucm = CodonMolecularEvolution.demo_nucmat)
+ HBdNdS(fs_pre::Vector{Float64}, fs_post::Vector{Float64}; code = MolecularEvolution.universal_code, nucm = CodonMolecularEvolution.demo_nucmat)
+
+Returns the expected dN/dS ratio for a Halpern and Bruno model with a vector of fitnesses. If two vectors are provided, then the dN/dS ratio is computed for the shift from `fs_pre` to `fs_post`.
+"""
+function HBdNdS(fs::Vector{Float64}; code = d_code, nucm = CodonMolecularEvolution.demo_nucmat)
+ q1 = HB98AA_matrix(1.0, nucm, fs, genetic_code = code)
+ q0 = HB98AA_matrix(1.0, nucm, zeros(20), genetic_code = code)
+ return dNdS(q1, q0, HB98AA_eqfreqs(fs, genetic_code = code), code = code)
+end
+
+HBdNdS(fs_pre::Vector{Float64}, fs_post::Vector{Float64}; code = d_code, nucm = CodonMolecularEvolution.demo_nucmat) = dNdS(HB98AA_matrix(1.0, nucm, fs_post, genetic_code = code), HB98AA_matrix(1.0, nucm, zeros(20), genetic_code = code), exp(HB98AA_matrix(1.0, nucm, fs_pre, genetic_code = code) * 100)[1,:], code = code)
+
+"""
+ std2maxdNdS(σ)
+
+Approximation for the maximum dN/dS ratio as a function of the standard deviation of the fitnesses, assuming Gaussian fitnesses and a Halpern and Bruno model,
+where the fitnesses have just shifted from one Gaussian sample to another. Note: this is not an analytical solution, but a serindipitously good approximation.
+
+```
+function monte_carlo_maxdNdS(σ; N=100_000)
+ sum_val = 0.0
+ for _ in 1:N
+ f_i = σ * randn()
+ f_j = σ * randn()
+ sum_val += HB_fixation_rate(f_i, f_j)
+ end
+ return sum_val / N
+end
+vs = 0:0.01:10
+plot(vs, monte_carlo_maxdNdS.(vs), label = "Monte Carlo", alpha = 0.8)
+plot!(vs, std2maxdNdS.(vs), label = "Approx", linestyle = :dash, alpha = 0.8)
+```
+"""
+std2maxdNdS(σ) = sqrt(σ^2 + π) / sqrt(π)
+
+"""
+ maxdNdS2std(ω)
+
+Inverse of std2maxdNdS(σ). Estimates the standard deviation of the fitnesses that will produce, in expectation, a dN/dS ratio of `ω`, assuming Gaussian fitnesses and a Halpern and Bruno model,
+where the fitnesses have just shifted from one Gaussian sample to another. Note: this is not an analytical solution, but a serindipitously good approximation.
+"""
+maxdNdS2std(ω) = sqrt(π * (ω^2 - 1))
+
+"""
+ time_varying_HB_freqs(ts, T, fst, init_freqs; nucm = CodonMolecularEvolution.demo_nucmat, alpha = 1.0, delta_t = 0.002, prezero_delta_t = 0.5)
+
+Compute the time-varying codon frequencies and expected dN/dS over time for a sequence of fitnesses, under the Halpern-Bruno model.
+`ts` is a vector of times, `T` is the total time, `fst` is a vector of vector of fitnesses, `init_freqs` is the initial codon frequencies,
+`nucm` is the nucleotide substitution matrix, `alpha` is the alpha parameter, `delta_t` is the discretization time step for the simulation,
+and `prezero_delta_t` is the time step used before `t=0`. `fst[i]` is assumed to be the fitness between `t = ts[i]` and `t = ts[i+1]`.
+"""
+function time_varying_HB_freqs(ts, T, fst, init_freqs; nucm = CodonMolecularEvolution.demo_nucmat, alpha = 1.0, delta_t = 0.002, prezero_delta_t = 0.5)
+ cod_freq_collection = []
+ t_collection = []
+ dnds_collection = []
+ ind = 1
+ ts = [ts; Inf]
+ t = ts[ind]
+ codon_freqs = copy(init_freqs)
+ while (t < T) && (ind + 1 <= length(ts))
+ if t < 0-prezero_delta_t
+ dt = min(prezero_delta_t, ts[ind + 1] - t)
+ else
+ dt = min(delta_t, ts[ind + 1] - t)
+ end
+ Q = HB98AA_matrix(alpha, nucm, fst[ind])
+ P = exp(Q * dt)
+ codon_freqs = (codon_freqs' * P)'
+ push!(cod_freq_collection, copy(codon_freqs))
+ push!(dnds_collection, dNdS(Q, HB98AA_matrix(alpha, nucm, zeros(20)), codon_freqs))
+ if t + dt >= ts[ind + 1]
+ ind += 1
+ end
+ t += dt
+ push!(t_collection, t)
+ end
+ return cod_freq_collection, t_collection, dnds_collection
+end
+
+
+function piecewise_linear_plot!(t_vec, fs_vec, tmax; shift = NaN, colors = nothing, kw...)
+ fs = zeros(length(fs_vec[1]), length(fs_vec)*3)
+ ts = zeros(length(fs_vec)*3)
+ push!(t_vec, tmax)
+ for i in 1:length(fs_vec)
+ fs[:, 3i-2:3i-1] .= fs_vec[i]
+ fs[:, 3i] .= fs_vec[i] .+ shift
+ ts[3i-2:3i] .= [t_vec[i], t_vec[i+1], t_vec[i+1]]
+ end
+ for i in 1:size(fs, 1)
+ plot!(ts, fs[i,:], label = :none, color = colors[i]; kw...)
+ end
+end
+
+function add_muller!(pos_mean_matrix; x_ax = 1:size(pos_mean_matrix)[2], plot_theme = nothing, plot_perm = 1:size(pos_mean_matrix)[1],
+ edge_pad = 6, fillalpha = 0.8, colormap = 1:size(pos_mean_matrix)[1])
+ pmm = pos_mean_matrix[plot_perm,:]
+ cum_freq = zeros(size(pmm)[2])
+ for i in 1:size(pmm)[1]
+ plot!(x_ax,1 .- cum_freq, fillrange = 1 .- (cum_freq .+ pmm[i,:]), fillalpha = fillalpha, linewidth = 0, color = plot_theme[colormap[plot_perm[i]]], label = :none)
+ cum_freq .+= pmm[i,:]
+ end
+end
+
+"""
+ HBviz(ts, fst, T, alp, nucm)
+
+Visualize over time the fitness trajectory, the codon frequencies, and the expected dN/dS. `ts` is a vector of times, `fst` is a vector of fitnesses, `T` is the total time, `alp` is the alpha parameter, and `nucm` is the nucleotide substitution matrix.
+
+```julia
+σ = 2.0
+alpha = 1.0
+nucm = CodonMolecularEvolution.demo_nucmat
+fst = [randn(20) .* σ, randn(20) .* σ]
+ts = [-100.0, 1.0]
+T = 2.0
+HBviz(ts, fst, T, alpha, nucm)
+```
+"""
+function HBviz(ts::Vector{Float64}, fst::Vector{Vector{Float64}}, T::Float64, alp, nucm;
+ scram = [8, 4, 11, 17, 9, 14, 18, 2, 10, 13, 16, 20, 12, 6, 19, 7, 1, 15, 5, 3],
+ viz_size = (1200,500), σ = nothing)
+ cfc, tc, dndses = time_varying_HB_freqs(ts, T, fst, ones(61)/61, alpha = alp)
+ #Removing burnin:
+ prezero = findlast(tc .<= 0)
+ prezero = prezero == nothing ? 1 : prezero
+ tc = tc[prezero:end]
+ cfc = cfc[prezero:end]
+ dndses = dndses[prezero:end]
+ fl = findlast(ts .<= 0)
+ fl = fl == nothing ? 1 : fl
+ ts = ts[fl:end]
+ fst = fst[fl:end]
+ ###
+ perm = sortperm(MolecularEvolution.universal_code.amino_acid_lookup)
+ AA_nums = MolecularEvolution.universal_code.codon2AA_pos[perm]
+ colorv = zeros(Int, 61)
+ c = 1
+ for i in 1:20
+ s = findall(AA_nums .== i)
+ colorv[c:c+length(s)-1] .= 10*(scram[i]-1) .+ collect(1:length(s)) .* 2 .- 1
+ c += length(s)
+ end
+ plot_theme_exploded = cgrad(:darkrainbow, 200, categorical = true)
+ AA_plot_theme = plot_theme_exploded[1 .+ (10 .* (scram .- 1))]
+ plot_theme = plot_theme_exploded[colorv]
+ pl1 = plot(tc, zeros(length(tc)), size = viz_size, xlim = (0, T), linestyle = :dash, color = "black", alpha = 0.0,
+ ylabel = L"f_t", xtickfontcolor = RGBA(0,0,0,0), legend=false)
+ extr = maximum([maximum(abs.(f)) for f in fst]) * 1.1
+ piecewise_linear_plot!(ts, fst, T, colors = AA_plot_theme, ylims = (-extr, extr))
+ pl2 = plot(tc, zeros(length(tc)), size = viz_size, alpha = 0.0, xlim = (0, T), ylabel = L"C_t", xtickfontcolor = RGBA(0,0,0,0), legend=false, top_margin = -12Plots.mm)
+ add_muller!(stack(cfc)[perm,:], plot_theme = plot_theme, x_ax = tc)
+ pl3 = plot(tc, dndses, size = viz_size, xlim = (0, T), xlabel = "Time", ylabel = L"E_C_t[dN/dS|f_t]", ylim = (0, maximum(dndses[tc .>= 0]) + 0.1),
+ label = :none, top_margin = -12Plots.mm, color = "black")
+ plot!([0,T], [1,1], color = "black", linestyle = :dash, alpha = 0.5, label = :none)
+ if !isnothing(σ)
+ maxdnds = std2maxdNdS(σ)
+ plot!([0,T], [maxdnds,maxdnds], color = "red", linestyle = :dash, alpha = 0.5, label = :none)
+ end
+ return plot(pl1, pl2, pl3, layout=(3, 1), link=:x, margins = 8Plots.mm, plot_layout = :tight, widen=false, tickdirection=:out)
+end
+
+"""
+ shiftingHBviz(T, event_rate, σ, mixing_rate, alpha, nucm; T0 = -20)
+
+Visualize the fitness trajectory, codon frequencies, and expected dN/dS over time for a shifting HB process.
+`T` is the total time, `event_rate` is the rate of fitness shifts, `σ` is the standard deviation of the fitnesses,
+`mixing_rate` is the rate of mixing between fitnesses, `alpha` is the alpha parameter, and `nucm` is the nucleotide substitution matrix.
+`T0` controls the burnin time, to ensure the process is at equilibrium at `t=0`.
+
+```julia
+T = 2.0
+mix = 1.0
+σ = 5.0
+event_rate = 100.0
+alpha = 1.0
+nucm = CodonMolecularEvolution.demo_nucmat
+shiftingHBviz(T, event_rate, σ, mix, alpha, nucm)
+```
+"""
+function shiftingHBviz(T, event_rate, σ, mixing_rate, alpha, nucm; T0 = -20)
+ fs = randn(20) .* σ
+ m = PiecewiseOUModel(event_rate, σ, mixing_rate)
+ coll = []
+ codon = sample(1:61, Weights(HB98AA_eqfreqs(fs)))
+ newfs, _, _, jumps = jumpy_HB_codon_evolve(fs, codon, m, nucm, alpha, T-T0, push_into = coll)
+ prepend!(coll, [(0.0, codon, fs)])
+ ts = [c[1] for c in coll] .+ T0
+ fst = [c[3] for c in coll]
+ return HBviz(ts, fst, T, alpha, nucm, σ = σ)
+end
+
+
+
+###############################################################################
+#A model for evolving log effective pop size & unscaled site fitnesses
+###############################################################################
+
+# jumpy_NeHB_codon_evolve is currently fine for a single site. However, to get this version working over a
+#branch for multiple sites, we need to:
+# 1) First sample the Ne jumps over the branch (since these apply for all sites)
+# 2) One site at a time, sample the fitness jumps and codon substitutions between each Ne jump event
+# ShiftingNeHBSimModel and ShiftingNeHBSimPartition are commented out until this works.
+"""
+ jumpy_NeHB_codon_evolve(fitnesses, logNe, codon, fitness_model, logNe_model, nuc_matrix, alpha, time;
+ genetic_code = MolecularEvolution.universal_code, push_into = nothing)
+
+Evolves codons, unscaled site-fitness, and log-pop-size together.
+"""
+function jumpy_NeHB_codon_evolve(fitnesses, logNe, codon, fitness_model, logNe_model, nuc_matrix, alpha, time;
+ genetic_code = MolecularEvolution.universal_code, push_into = nothing)
+ codon_jumps = 0
+ fitness_jumps = 0
+ logNe_jumps = 0
+ current_fits = copy(fitnesses)
+ current_codon = codon
+ current_logNe = logNe
+ t = 0.0
+ next_event = 0.0
+ while t+next_event < time
+ HBrow = HB98AA_row(current_codon, alpha, nuc_matrix, (current_fits .+ fitness_model.offsets) .* exp(current_logNe) .* 2, fitness_model.codon_offsets .* exp(current_logNe) .* 2, genetic_code=genetic_code)
+ sum_HBrow = sum(HBrow)
+ rOU, rNe, rHB = (fitness_model.event_rate, logNe_model.event_rate, sum_HBrow)
+ total_rate = rOU+rHB+rNe
+ next_event = randexp()/total_rate
+ t = t+next_event
+ if t < time
+ event_index = sample(1:3,Weights([rOU,rNe,rHB]))
+ if event_index == 1 # Fitness jump event
+ fitness_jumps += 1
+ current_fits = jump(current_fits, fitness_model)
+ elseif event_index == 2 # pop-size jump event
+ logNe_jumps += 1
+ current_logNe = jump(current_logNe, logNe_model)
+ else # Codon substitution event
+ codon_jumps += 1
+ current_codon = sample(1:length(HBrow),Weights(HBrow))
+ end
+ end
+ if !isnothing(push_into)
+ push!(push_into,(t,current_codon,copy(current_fits),current_logNe))
+ end
+ end
+ return current_logNe, current_fits, current_codon, codon_jumps, fitness_jumps, logNe_jumps
+end
+
+"""
+ shiftingNeHBviz(T, f_event_rate, f_σ, f_mixing_rate, logNe_event_rate, logNe_σ, logNe_mean, logNe_mixing_rate, alpha, nucm; T0 = -20)
+
+Visualize the fitness trajectory, codon frequencies, and expected dN/dS over time for a shifting Ne HB process.
+"""
+function shiftingNeHBviz(T, f_event_rate, f_σ, f_mixing_rate, logNe_event_rate, logNe_σ, logNe_mean, logNe_mixing_rate, alpha, nucm; T0 = -20)
+ fs = randn(20) .* f_σ
+ logNe = randn() .* logNe_σ .+ logNe_mean
+ f_ou = PiecewiseOUModel(f_event_rate, f_σ, f_mixing_rate)
+ log_ne_ou = PiecewiseOUModel(logNe_event_rate, logNe_σ, logNe_mixing_rate, logNe_mean, 0.0, 0.0)
+ coll = []
+ codon = sample(1:61, Weights(HB98AA_eqfreqs(fs .* exp(logNe) .* 2)))
+ CodonMolecularEvolution.jumpy_NeHB_codon_evolve(fs, logNe, codon, f_ou, log_ne_ou, nucm, alpha, T-T0, push_into = coll)
+ prepend!(coll, [(0.0, codon, fs, logNe)])
+ ts = [c[1] for c in coll] .+ T0
+ fst = [c[3] .* exp(c[4]) .* 2 for c in coll] #2Ne*s
+ return HBviz(ts, fst, T, alpha, nucm)
+end
+
+#=
+
+mutable struct ShiftingNeHBSimModel <: MolecularEvolution.SimulationModel
+ sites::Int64
+ alphas::Vector{Float64} # per-site synonymous rates
+ unscaled_ou_params::Vector{PiecewiseOUModel} # OU process for each site's unscaled fitness
+ logNe_model::PiecewiseOUModel # OU process for log(pop size)
+ nuc_matrix::Matrix{Float64}
+ code::MolecularEvolution.GeneticCode
+end
+
+"""
+ ShiftingNeHBSimModel(nuc_matrix, unscaled_ou_params, logNe_model; alpha, rescale, code)
+
+Create a model with one OU process per site (unscaled_ou_params) and one OU process for log(N_e).
+`alpha` can be a scalar or a vector of the same length as unscaled_ou_params.
+If `rescale` is true, the nuc_matrix is scaled (HB98 neutral scaling).
+"""
+function ShiftingNeHBSimModel(
+ nuc_matrix::Matrix{Float64},
+ unscaled_ou_params::Vector{PiecewiseOUModel{A,B,C}},
+ logNe_model::PiecewiseOUModel;
+ alpha = ones(length(unscaled_ou_params)),
+ rescale::Bool = true,
+ code::MolecularEvolution.GeneticCode = MolecularEvolution.universal_code) where {A,B,C}
+ sites = length(unscaled_ou_params)
+ @assert length(alpha) == sites || length(alpha) == 1 "alpha must match number of sites or be a single value"
+
+ # Copy or scale the nuc matrix if requested
+ mat = rescale ? HB98AA_neutral_scale_nuc_matrix(copy(nuc_matrix), genetic_code = code) : copy(nuc_matrix)
+
+ return ShiftingNeHBSimModel(
+ sites,
+ length(alpha) == 1 ? fill(alpha[1], sites) : alpha,
+ unscaled_ou_params,
+ logNe_model,
+ mat,
+ code
+ )
+end
+
+mutable struct ShiftingNeHBSimPartition <: CodonSimulationPartition
+ sites::Int64
+ logNe::Float64
+ fitnesses::Matrix{Float64} # size: (number_of_AAs, sites)
+ codons::Vector{Int64}
+ code::MolecularEvolution.GeneticCode
+end
+
+"""
+ ShiftingNeHBSimPartition(model; burnin_time)
+
+Constructs a partition for the `ShiftingNeHBSimModel`.
+Initializes by:
+ - Drawing random unscaled fitnesses from each site's OU equilibrium.
+ - Drawing random logNe from its OU equilibrium.
+ - "Burning in" each site along the branch of length `burnin_time`.
+"""
+function ShiftingNeHBSimPartition(
+ model::ShiftingNeHBSimModel;
+ burnin_time::Float64 = 100.0
+)
+ # 1) sample an initial logNe from equilibrium
+ # The equilibrium of an OU in this piecewise-constant approximation is roughly Normal(mean=mu, sd=eq_std).
+ lp = randn() * model.logNe_model.eq_std + model.logNe_model.mu
+
+ # 2) sample initial unscaled fitness for each site
+ nAAs = length(model.code.amino_acids)
+ fits = zeros(nAAs, model.sites)
+ for s in 1:model.sites
+ oup = model.unscaled_ou_params[s]
+ fits[:,s] .= randn(nAAs) .* oup.eq_std .+ oup.mu
+ end
+
+ # 3) pick initial codons (arbitrary, say the first sense codon)
+ c_init = [rand(1:length(model.code.sense_codons)) for i in 1:model.sites]
+
+ # 4) "burn in" by evolving over burnin_time
+ #jumpy_NeHB_codon_evolve(fitnesses, logNe, codon, fitness_model, logNe_model, nuc_matrix, alpha, time
+ new_logNe, new_fits, new_codons, _, _, _ = jumpy_NeHB_codon_evolve(
+ fits, lp, c_init, model.unscaled_ou_params, model.logNe_model, model.nuc_matrix, model.alphas, burnin_time, genetic_code = model.code)
+
+ return ShiftingNeHBSimPartition(
+ model.sites,
+ new_logNe,
+ new_fits,
+ new_codons,
+ model.code
+ )
+end
+
+# Minimal constructor for placeholders (if one needs a zero-time partition, for instance)
+function ShiftingNeHBSimPartition(
+ sites::Int;
+ code::MolecularEvolution.GeneticCode = MolecularEvolution.universal_code
+)
+ nAAs = length(code.amino_acids)
+ return ShiftingNeHBSimPartition(
+ sites,
+ 0.0,
+ zeros(nAAs, sites),
+ ones(Int, sites),
+ code
+ )
+end
+
+"""
+ forward!(dest, source, model, node)
+
+Evolves `source` partition along `node.branchlength` under `model`, storing the result in `dest`.
+"""
+function MolecularEvolution.forward!(
+ dest::ShiftingNeHBSimPartition,
+ source::ShiftingNeHBSimPartition,
+ model::ShiftingNeHBSimModel,
+ node::FelNode
+)
+ bl = node.branchlength
+ new_lp, new_fits, new_codons, _, _, _ = jumpy_NeHB_codon_evolve(
+ copy(source.fitnesses),
+ source.logNe,
+ copy(source.codons),
+ model.unscaled_ou_params,
+ model.logNe_model,
+ model.nuc_matrix,
+ model.alphas,
+ bl;
+ genetic_code = model.code
+ )
+
+ dest.logNe = new_lp
+ dest.fitnesses .= new_fits
+ dest.codons .= new_codons
+end
+=#