diff --git a/Project.toml b/Project.toml index fedaaad..be318a5 100644 --- a/Project.toml +++ b/Project.toml @@ -11,6 +11,7 @@ DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" FASTX = "c2308a5c-f048-11e8-3e8a-31650f418d12" Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" +LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" LogDensityProblemsAD = "996a588d-648d-4e1f-a8f0-a84b347e47b1" @@ -22,10 +23,11 @@ NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" ParameterHandling = "2412ca09-6db7-441c-8e3a-88d5709968c5" Phylo = "aea672f4-3940-5932-aa44-993d1c3ff149" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SimpleUnPack = "ce78b400-467f-4804-87d8-8f486da07d0a" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] -MolecularEvolution = "0.2" +MolecularEvolution = "0.2.3" julia = "1.10" diff --git a/docs/src/figures/dnds_plot.svg b/docs/src/figures/dnds_plot.svg new file mode 100644 index 0000000..7db93e6 --- /dev/null +++ b/docs/src/figures/dnds_plot.svg @@ -0,0 +1,644 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/src/figures/tagged_simtree_phylo.svg b/docs/src/figures/tagged_simtree_phylo.svg new file mode 100644 index 0000000..a972c9a --- /dev/null +++ b/docs/src/figures/tagged_simtree_phylo.svg @@ -0,0 +1,1215 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/src/fitness_OU.md b/docs/src/fitness_OU.md new file mode 100644 index 0000000..c41e705 --- /dev/null +++ b/docs/src/fitness_OU.md @@ -0,0 +1,61 @@ +# Simulating with time-varying fitness + +Uses [this tree](simtree_tagged.svg) as input. + +```julia +#Read in a tree where the Newick string has tags (eg. "{G1}" or "{G2}") on branches. +#Can be tagged with: https://murrellgroup.github.io/WebWidgets/phylotagger.html +ts, tags = import_labeled_phylotree_newick("simtree_tagged.tre") +tree = gettreefromnewick(ts, FelNode) + +#First viz at the tree with tags +for (i,n) in enumerate(getnodelist(tree)) + (length(n.children) > 0) && (n.name = "N$(i)_"*n.name) #Make internal node names unique (only needed for plotting) + n.node_data = Dict("g" => (occursin(tags[2], n.name) ? 1.0 : 0.0)) +end +pl = plot(get_phylo_tree(tree), showtips = false, marker_z = "g", line_z = "g", markerstrokewidth = 0, colorbar = false, linecolor = :darkrainbow, markercolor = :darkrainbow) +savefig("tagged_simtree_phylo.svg") +``` + +![](figures/tagged_simtree_phylo.svg) + +```julia +#500 codon sites in total, where 100 will undergo a fitness shift: +S, nsel = 500, 100 +selsites = sort(sample(1:S, nsel, replace = false)) +nucm = CodonMolecularEvolution.demo_nucmat #Using a nuc matrix derived from a flu dataset + +#Compute std dev of fitnesses for the target peak dN/dS, aiming for a mean dN/dS of 2.0 at the fitness shift boundary: +σ = maxdNdS2std(2.0) +m0fs = randn(20,S) .* σ +m1fs = copy(m0fs) +m1fs[:,selsites] .= randn(20,nsel) .* σ + +#Plot analytical expectations for background dN/dS, and for peak dN/dS at the host change boundary. +scatter([HBdNdS(m0fs[:,i], nucm = nucm) for i in 1:S], size = (1200,500), yscale = :log10, label = "HBω0", msw = 0, color = "blue", xlabel = "Site", ylabel = "dN/dS", margins = 1Plots.cm) +scatter!(selsites, [HBdNdS(m0fs[:,i], m1fs[:,i], nucm = nucm) for i in selsites], size = (1200,500), label = "HBω1", msw = 0, color = "red") +savefig("dnds_plot.svg") +``` + +![](figures/dnds_plot.svg) + +```julia +#Specify two models, m1 and m2, sharing alpha but with different fitnesses for the `selsites` sites: +using Distributions + +alphas = rand(Gamma(10,0.1), S) +m1 = ShiftingHBSimModel(S, alphas, [PiecewiseOUModel(m0fs[:,i]) for i in 1:S], nucm) +m2 = ShiftingHBSimModel(S, alphas, [PiecewiseOUModel(m1fs[:,i]) for i in 1:S], nucm) + +#Use model m1 when "{G1}" occurs in the sequence name. Models are in [] because we have an array of models per partition, when passing a function: +mfunc(n) = occursin("{G2}", n.name) ? [m2] : [m1] + +#The process should be at equilibrium for whatever the root model is, and this will use the root model: +internal_message_init!(tree, ShiftingHBSimPartition(m1)) + +#Simeulate under this process: +sample_down!(tree, mfunc) + +#Write sequences to a .fasta file: +write_fasta("simu_seqs.fasta", leaf_samples(tree), seq_names = leaf_names(tree)) +``` diff --git a/docs/src/simtree_tagged.tre b/docs/src/simtree_tagged.tre new file mode 100644 index 0000000..7d28226 --- /dev/null +++ b/docs/src/simtree_tagged.tre @@ -0,0 +1 @@ +(((tax191{G1}:0.0876725806761636,tax190{G1}:0.0894607524107736){G1}:0.0036036666634083787,(tax198{G1}:0.025252958193682482,tax193{G1}:0.03016546613009762){G1}:0.057534435998472355){G1}:0.006715581998474342,((tax195{G1}:0.054546694927162916,(tax197{G1}:0.042645415031909606,tax194{G1}:0.04684513626333345){G1}:0.008401926724847584){G1}:0.03256956841111309,((tax192{G1}:0.07556019898355605,(tax196{G1}:0.05891033225204943,(tax189{G1}:0.05390345129831925,(tax188{G1}:0.06147337561398739,tax200{G1}:0.03206787896971761){G1}:0.0060425316294520715){G1}:0.01951839530844825){G1}:0.0121173468320627){G1}:0.010204999026043481,(tax199{G1}:0.04983038898089238,(((tax187{G1}:0.00605246557691153,tax176{G1}:0.09272170119191411){G1}:0.0012070179531218476,((tax175{G1}:0.06094439789197623,tax183{G1}:0.04553879703312431){G1}:0.0010182662793623157,(tax173{G1}:0.05999815475460662,(tax170{G1}:0.048216085854531326,(tax179{G1}:0.020074720760078317,(tax174{G1}:0.006107108423158383,tax171{G1}:0.014792158669170264){G1}:0.023688320695480738){G1}:0.00646722385262868){G1}:0.018395822561943883){G1}:0.008019459408802363){G1}:0.033019091075075335){G1}:0.012395507424137479,(((tax180{G1}:0.013848404760419483,tax169{G1}:0.0369630169585345){G1}:0.022670053329334052,((tax168{G1}:0.04496012425354975,(tax177{G1}:0.021472973238438074,tax166{G1}:0.06263073402087617){G1}:0.0024339037573345066){G1}:0.011158692505368292,((tax143{G2}:0.06302811880607842,(tax159{G2}:0.004924925729958462,(tax154{G2}:0.0008642848579845001,tax147{G2}:0.01290712838119032){G2}:0.011866515397821152){G2}:0.033016616222806085){G2}:0.029989142213964405,(tax163{G2}:0.0064225191509397145,(((tax158{G2}:0.01631955821078443,(tax139{G2}:0.005871165959912681,tax140{G2}:0.002985255157995405){G2}:0.04403375259941184){G2}:0.02253345288390942,(tax156{G2}:0.03970735945508374,(tax149{G2}:0.04422249505169464,(tax136{G2}:0.04664894494688254,tax141{G2}:0.020248571941254175){G2}:0.035147521747436024){G2}:0.007244581394779825){G2}:0.002279922537576455){G2}:0.00166399660059368,((tax151{G2}:0.03999793978873527,tax150{G2}:0.04048223687901386){G2}:0.007266936795155956,(tax160{G2}:0.012743206837900343,(tax152{G2}:0.008853942259116216,(tax132{G2}:0.017024896310785638,(tax111{G2}:0.03439254079209499,(tax112{G2}:0.024031067680197544,(tax125{G2}:0.006586436970727454,(tax103{G2}:0.010699233745566934,(tax105{G2}:0.000685467702129,tax102{G2}:0.00817592367180866){G2}:0.0029281341987468564){G2}:0.0385102965977885){G2}:0.0011200942097729834){G2}:0.008836558438275677){G2}:0.02896094421324713){G2}:0.10267088431053402){G2}:0.03442976526435304){G2}:0.0033528262448352035){G2}:0.002995191269348195){G2}:0.008590680413237012){G2}:0.02253345487706806){G2}:0.11043701695983364){G1}:0.005265173856082336){G1}:0.03033683707932,(((tax165{G1}:0.09067522652901837,(tax172{G1}:0.02381994587783149,tax181{G1}:0.004624412781357271){G1}:0.009226476572802863){G1}:0.026621416634713468,(tax186{G1}:0.018251778024604783,(tax185{G1}:0.018673986950570747,tax184{G1}:0.02047706217606258){G1}:0.002394163315112266){G1}:0.012047810545505994){G1}:0.00860806298143007,(tax182{G1}:0.04185317378802211,(tax178{G1}:0.025466558102832403,(tax167{G1}:0.015437885717647455,(((tax153{G1}:0.06476661664614816,(tax162{G1}:0.001976926210361459,(tax138{G1}:0.07540623292712258,tax146{G1}:0.04898847283987697){G1}:0.007199883112776702){G1}:0.016580333784091916){G1}:0.0034770067150903714,(tax164{G1}:0.0019148358003295155,(tax137{G1}:0.09610433509857566,(tax142{G1}:0.037472154209657625,(tax148{G1}:0.022076483502409334,(tax144{G1}:0.0248953412408692,tax145{G1}:0.02481586695905884){G1}:0.0026822584277028084){G1}:0.006370364076504646){G1}:0.03194123100883159){G1}:0.013667102694273252){G1}:0.0006382799316124873){G1}:0.03280798557171663,(tax161{G1}:0.0070682515465140565,(tax157{G1}:0.028277958140741136,(tax155{G1}:0.003951370805649514,(((tax126{G2}:0.01512745094011466,(tax122{G2}:0.02534487656712116,tax123{G2}:0.022590591620297425){G2}:0.0008344777032263679){G2}:0.05630755773955415,((tax116{G2}:0.06586934229235016,tax133{G2}:0.01648099469586556){G2}:0.017362650017230596,(((tax109{G2}:0.047399009811782994,tax124{G2}:0.023524413027162735){G2}:0.012807778119859932,(tax117{G2}:0.04427715315185631,tax127{G2}:0.02388452172834673){G2}:0.0017434668342086266){G2}:0.0030324447657491082,(tax119{G2}:0.044326820211354125,(tax131{G2}:0.01410171880999806,(tax104{G2}:0.06610279981490084,((tax129{G2}:0.00394639935842983,tax108{G2}:0.04320177060864035){G2}:0.003958815908072366,(tax114{G2}:0.0050640121885027,((tax99{G2}:0.030610041449712674,tax106{G2}:0.009877173507022387){G2}:0.009651166853159707,((tax88{G2}:0.009050146142266036,tax90{G2}:0.005357069973789267){G2}:0.06418296441989188,(tax72{G2}:0.11928351314710331,((tax73{G2}:0.019361942183140364,tax75{G2}:0.014106700942076026){G2}:0.058937669086599816,((tax86{G2}:0.04038786524823333,tax83{G2}:0.04255105737646524){G2}:0.0010306844249740013,((tax50{G2}:0.05626536813501043,((tax55{G2}:0.031444526461345046,tax53{G2}:0.033560533112315286){G2}:0.020201385860911376,(tax37{G2}:0.06992749047048774,tax44{G2}:0.04840236819461594){G2}:0.00974554718942256){G2}:0.0013411332955716043){G2}:0.011531232629155525,(tax70{G2}:0.006367880889844938,((tax35{G2}:0.07372239259508534,(tax66{G2}:0.01015285170050917,(tax32{G2}:0.004838002015680466,tax33{G2}:0.001351065293841831){G2}:0.0754286118347052){G2}:0.0011995633454741595){G2}:0.01825675666135404,(tax61{G2}:0.031193684543797746,((tax40{G2}:0.032686317398696124,tax39{G2}:0.03741752265888065){G2}:0.02511140875413781,((tax51{G2}:0.0006482146669855776,tax46{G2}:0.0029107515452417546){G2}:0.044808634748058844,(tax56{G2}:0.021927473629553433,(tax34{G2}:0.042041928421269185,tax52{G2}:0.008635389657038797){G2}:0.016597725366462802){G2}:0.02003001486529744){G2}:0.0006730502749348101){G2}:0.0013212652385009545){G2}:0.013955203709092267){G2}:0.005379417198292695){G2}:0.0007326552510388004){G2}:0.12551976056386938){G2}:0.01308345928810787){G2}:0.006608785811363131){G2}:0.0205267247845345){G2}:0.08664688087057545){G2}:0.0034074666771270537){G2}:0.028588420939849767){G2}:0.005453925612809456){G2}:0.00722222470011554){G2}:0.002106070665884691){G2}:0.0022103809036173763){G2}:0.033828743369384386){G2}:0.003762612973023957){G2}:0.011719975943519175,(((tax135{G2}:0.0005761901030815657,(tax110{G2}:0.014141473322690912,tax107{G2}:0.021331419227653636){G2}:0.049676436823741926){G2}:0.0008071626190684138,((tax113{G2}:0.010085802898481941,(tax98{G2}:0.031176294406067465,tax100{G2}:0.020263477178334148){G2}:0.023161812872482927){G2}:0.03499354786196447,(tax128{G2}:0.004269264497034189,(tax118{G2}:0.024778622198206576,(tax115{G2}:0.016130816021713366,tax101{G2}:0.04707862485403731){G2}:0.01090289315772599){G2}:0.0009611429436207396){G2}:0.01614073838139694){G2}:0.012388066970050425){G2}:0.009137062156414862,(tax134{G1}:0.001410670240155791,((tax130{G1}:0.002863559223434444,((tax120{G1}:0.002895851046563675,tax97{G1}:0.06242461495498733){G1}:0.018517529607584054,((tax94{G1}:0.04062876618398045,(tax93{G1}:0.04087464018063941,(tax84{G1}:0.012867391369594167,(tax85{G1}:0.0037079768850811017,tax74{G1}:0.02564786547837911){G1}:0.008180885859363152){G1}:0.04519854610854034){G1}:0.004559839557940254){G1}:0.04200961845139506,((tax89{G1}:0.023812499268666836,(tax78{G1}:0.0023469781827387052,tax77{G1}:0.0041053484202984505){G1}:0.04500979604104831){G1}:0.04302044196050195,((tax95{G1}:0.029124856787997415,tax80{G1}:0.06119772126319971){G1}:0.01190873276637045,((tax91{G1}:0.032557155445312826,tax76{G1}:0.05902708482286135){G1}:0.008853934227170651,(tax81{G1}:0.04070576252869059,tax92{G1}:0.01926010456019834){G1}:0.019441405412938432){G1}:0.012144673305076462){G1}:0.013095874015112018){G1}:0.02442344574515401){G1}:0.08921243957295114){G1}:0.007142756792580704){G1}:0.008829097679061156,(tax121{G2}:0.014774781177450664,(tax96{G1}:0.013691931863796266,(((tax69{G2}:0.032264093685494315,tax71{G2}:0.01407440129503382){G2}:0.010021215207215758,((tax38{G2}:0.07768866895527832,(tax59{G2}:0.03007607272804251,(tax65{G2}:0.011195944187118974,(tax45{G2}:0.019404170260284494,tax36{G2}:0.041746390093208456){G2}:0.016456154539289003){G2}:0.011335027207775628){G2}:0.012904655646124256){G2}:0.0004669125681408879,((tax58{G2}:0.03271363496227432,tax63{G2}:0.021090507233771053){G2}:0.004614472077992055,((tax49{G2}:0.013227517325014258,tax54{G2}:0.009986450711016556){G2}:0.02697409391356988,(tax41{G2}:0.025704997006058716,tax62{G2}:0.003308118027352456){G2}:0.01895960156374354){G2}:0.00520556363817697){G2}:0.013354184394797531){G2}:0.03949873663232511){G2}:0.08814944527383035,(tax82{G1}:0.03711451491374063,(tax79{G1}:0.04063125764334151,(tax87{G1}:0.008990539400523329,(((tax67{G1}:0.0001639165953327414,tax43{G1}:0.039866335285394974){G1}:0.014966012568011966,(tax68{G1}:0.00017385178199190687,(tax64{G1}:0.012425316025450092,(tax48{G1}:0.013788807213186071,tax47{G1}:0.014576101052456136){G1}:0.021068153428553657){G1}:0.013257316749151744){G1}:0.0029877401718464897){G1}:0.01106432294943367,(tax60{G1}:0.02893859673216692,(tax42{G1}:0.02490528669948449,(tax57{G1}:0.003459619742458121,((tax30{G1}:0.014645625890914893,tax31{G1}:0.0033702088940325546){G1}:0.003703007475892835,(tax29{G1}:0.039252857831692634,(((tax15{G1}:0.01983878171613616,tax16{G1}:0.017986037633735248){G1}:0.03525928722320944,(tax5{G1}:0.027761376380018053,(tax1{G1}:0.02782098728356268,tax11{G1}:0.0030324431640132944){G1}:0.010510477048623698){G1}:0.055820789241652714){G1}:0.03767331148992319,((tax24{G1}:0.007175047458096855,tax23{G1}:0.011235689593367536){G1}:0.06351984936512417,(((tax7{G1}:0.07532677083945756,tax18{G1}:0.046291321503111875){G1}:0.007048377205223803,((tax26{G1}:0.023422580363221448,tax28{G1}:0.007664303828376792){G1}:0.012320999489054891,(tax12{G1}:0.04030342918148052,tax25{G1}:0.010483165760666734){G1}:0.026395406999833324){G1}:0.0005687391979881872){G1}:0.028111554584236965,((tax19{G1}:0.05052581164061923,(tax27{G1}:0.0023767815598638156,(tax6{G1}:0.052376073350481764,tax2{G1}:0.06500255878737643){G1}:0.006012735920981375){G1}:0.02677539270541881){G1}:0.004152535117488408,((tax14{G1}:0.038199846424995014,(tax22{G1}:0.016508314844179722,(tax13{G1}:0.00945744491118101,tax3{G1}:0.03904921577462191){G1}:0.02687723385841604){G1}:0.002590370072479059){G1}:0.027955091353178634,(tax9{G1}:0.06089721615838432,((tax4{G1}:0.05589281221412982,tax20{G1}:0.01405205928231837){G1}:0.01387572818399487,(tax10{G1}:0.05158630023548844,(tax21{G1}:0.013761482957405551,(tax17{G1}:0.022059098678221492,tax8{G1}:0.04853646685896379){G1}:0.0014181210151387098){G1}:0.005687386318420702){G1}:0.006318210150167166){G1}:0.0018850318609619052){G1}:0.02002504347665198){G1}:0.0032708653946059425){G1}:0.02246390781058619){G1}:0.004502717400081152){G1}:0.0013932860029520725){G1}:0.011767166200922805){G1}:0.007073216714774059){G1}:0.051486961702847){G1}:0.010140432079996714){G1}:0.023750411533181325){G1}:0.01765324667827937){G1}:0.10272056344940823){G1}:0.0172359917929785){G1}:0.0015944532923444857){G1}:0.0028014708861646745){G1}:0.018045636509271913){G1}:0.13405332669329278){G1}:0.022501175901579677){G1}:0.012435250833861461){G1}:0.011834226521399056){G1}:0.033818800966523836){G1}:0.06371357523230421){G1}:0.02570000743694953){G1}:0.01465804764681862){G1}:0.05150432888672839){G1}:0.037146787704057695){G1}:0.04446588901007182){G1}:0.02381249496749753){G1}:0.004147566559762147){G1}:0.016098515785999){G1}:0.031186215545207105){G1}:0.09766150222274297){G1}:0.030565323611596966){G1}:0.002587886196604572){G1}:0.006439903107940489){G1}:0; \ No newline at end of file diff --git a/src/CodonMolecularEvolution.jl b/src/CodonMolecularEvolution.jl index 7f39416..6a75e05 100644 --- a/src/CodonMolecularEvolution.jl +++ b/src/CodonMolecularEvolution.jl @@ -1,6 +1,6 @@ module CodonMolecularEvolution -using FASTX, MolecularEvolution, Measures, Plots, StatsBase, Distributions, DataFrames, CSV, NLopt, ParameterHandling, LinearAlgebra, Phylo +using FASTX, MolecularEvolution, Measures, Plots, StatsBase, Distributions, DataFrames, CSV, NLopt, ParameterHandling, LinearAlgebra, Phylo, LaTeXStrings, Random using NNlib, Distributions, Zygote, AdvancedHMC, LogDensityProblems, SimpleUnPack, AbstractMCMC, LogDensityProblemsAD, Interpolations, MCMCChains abstract type difFUBARGrid end @@ -14,6 +14,7 @@ include("FUBAR/FUBAR.jl") include("smoothFUBAR/smoothFUBAR.jl") include("simulations/alphabeta/alphabeta.jl") +include("simulations/ou_hb.jl") export difFUBARBaseline, @@ -21,6 +22,15 @@ export difFUBARTreesurgery, difFUBARTreesurgeryAndParallel, FUBAR, - smoothFUBAR - + smoothFUBAR, + dNdS, + HBdNdS, + std2maxdNdS, + maxdNdS2std, + HB98AA_matrix, + ShiftingHBSimModel, + ShiftingHBSimPartition, + PiecewiseOUModel, + shiftingHBviz, + HBviz end diff --git a/src/FUBAR/FUBAR.jl b/src/FUBAR/FUBAR.jl index ab7ce88..3ba6b3c 100644 --- a/src/FUBAR/FUBAR.jl +++ b/src/FUBAR/FUBAR.jl @@ -183,6 +183,7 @@ function alternating_maximize(f, a_bounds, b_bounds; final_tol = 1e-20, max_iter return a,b,-m end +#Generalize this to work with any grid dimensions! function interpolating_LRS(grid) itp = interpolate(grid, BSpline(Cubic(Line(OnGrid())))) @@ -207,7 +208,7 @@ end z = @. itp(x', y) contour(x, y, z, levels=30, color=:turbo, fill=true, linewidth = 0, colorbar = false, size = (400,400)) =# -function fifeFUBAR(f::FUBARgrid, outpath; verbosity=1, exports=true) +function FIFE(f::FUBARgrid, outpath; verbosity=1, exports=true) exports && init_path(outpath) LLmatrix = reshape(f.LL_matrix, length(f.grid_values),length(f.grid_values),:) #Note: dim1 = beta, dim2 = alpha, so we transpose going in: @@ -221,4 +222,4 @@ function fifeFUBAR(f::FUBARgrid, outpath; verbosity=1, exports=true) return df_results end -export fifeFUBAR +export FIFE diff --git a/src/simulations/alphabeta/alphabeta.jl b/src/simulations/alphabeta/alphabeta.jl index c8a830f..b5c7be0 100644 --- a/src/simulations/alphabeta/alphabeta.jl +++ b/src/simulations/alphabeta/alphabeta.jl @@ -19,17 +19,6 @@ const demo_nucmat = [ -0.256236 0.0697056 0.152411 0.034119; 0.152411 0.0596187 -0.251381 0.0393506; 0.034119 0.144795 0.0393506 -0.218264] -#Should maybe be in MolecularEvolution.jl -function standard_tree_sim(ntaxa) - n(t) = (10*ntaxa)/(1+exp(t-10)) - return sim_tree(ntaxa,n,ntaxa/5, mutation_rate = 0.05) -end -function ladder_tree_sim(ntaxa) - n(t) = ntaxa/10 - return sim_tree(ntaxa,n,1.0, mutation_rate = 1/sqrt(ntaxa)) -end - -export standard_tree_sim, ladder_tree_sim """ sim_alphabeta_seqs(alphavec::Vector{Float64}, betavec::Vector{Float64}, singletree, nucmat::Array{Float64,2}, f3x4::Array{Float64,2}; diff --git a/src/simulations/ou_hb.jl b/src/simulations/ou_hb.jl new file mode 100644 index 0000000..0a56720 --- /dev/null +++ b/src/simulations/ou_hb.jl @@ -0,0 +1,704 @@ +#Halpern and Bruno model where amino acid fitnesses evolve over time using a piecewise constant approximation to an OU process. +#Authors: Hassan Sadiq and Ben Murrell + +#To do: +#- Check that an alternate genetic code will thread through all of this. +#- Introduce a jump model with an additional class of jumps to independent draws from the equilibrium fits, and make it easy to use this instead +#- Allow viz functions to have both kids of offsets (include the AA offsets in the fitness plot, but not the codon offsets) + +abstract type CodonSimulationPartition <: Partition end #Must have .sites::Int64, .codons::Vector{Int64}, and .code::GeneticCode + +#Does nothing because the `forward!` function implicitly samples. +function MolecularEvolution.sample_partition!(p::CodonSimulationPartition) +end + +function MolecularEvolution.partition2obs(part::CodonSimulationPartition) + return join([part.code.sense_codons[part.codons[i]] for i in 1:part.sites]) +end + +const d_code = MolecularEvolution.universal_code + +""" + HB_fixation_rate(from_codon, to_codon) + + Returns the fixation rate of a mutation from `from_codon` to `to_codon` under the HB98 model. +""" +function HB_fixation_rate(from_codon, to_codon) + diff = to_codon - from_codon + if abs(diff) < 0.001 + f_ab = 1/12 * (12 + diff*(6 + diff)) + else + f_ab = diff/(1-exp(-diff)) + end +end + +#= +#Confirming local approx +t0(diff) = diff/(1-exp(-diff)) +t1(diff) = 1/12 * (12 + diff*(6 + diff)) +r = -0.01:0.000001:0.01 +plot(r, t0.(r) .- t1.(r), label = "1/12 * (12 + diff*(6 + diff))") +=# + +""" + HB98AA_matrix(alpha, nuc_matrix, AA_fitness; genetic_code = MolecularEvolution.universal_code) + +Returns the rate matrix for a codon model using the HB98 model where each AA has a different fitness. `alpha` is the synonymous rate, `nuc_matrix` is the symmetric nucleotide substitution matrix, +and `AA_fitness` is the fitness of each amino acid. +""" +function HB98AA_matrix(alpha, nuc_matrix, AA_fitness; genetic_code = MolecularEvolution.universal_code) + codon_matrix = zeros(length(genetic_code.sense_codons), length(genetic_code.sense_codons)) + for p in genetic_code.syn_positions + codon_matrix[p[1][1], p[1][2]] = alpha * nuc_matrix[p[2][2], p[2][3]] + end + for p in genetic_code.nonsyn_positions + f_ab = HB_fixation_rate(AA_fitness[genetic_code.codon2AA_pos[p[1][1]]], AA_fitness[genetic_code.codon2AA_pos[p[1][2]]]) + codon_matrix[p[1][1], p[1][2]] = alpha * nuc_matrix[p[2][2], p[2][3]] * f_ab + end + for i = 1:length(genetic_code.sense_codons) + codon_matrix[i, i] = -sum(codon_matrix[i, :]) + end + return codon_matrix +end + +#Assumption: nuc matrix is symmetric +function HB98_eqfreqs(fitnesses) #Result from HB98 + v = exp.(fitnesses .- maximum(fitnesses)) + return v ./ sum(v) +end +HB98AA_eqfreqs(AA_fitness; genetic_code = MolecularEvolution.universal_code) = HB98_eqfreqs(AA_fitness[genetic_code.codon2AA_pos]) +function HB98AA_expected_subs_per_site(nuc_matrix, AA_fitness; genetic_code = MolecularEvolution.universal_code) + eqfreqs = HB98AA_eqfreqs(AA_fitness, genetic_code = genetic_code) + Q = HB98AA_matrix(1.0, nuc_matrix, AA_fitness, genetic_code = genetic_code) + return expected_subs_per_site(Q, eqfreqs) +end +HB98AA_neutral_scale_nuc_matrix(nucm; genetic_code = MolecularEvolution.universal_code) = nucm ./= HB98AA_expected_subs_per_site(nucm, zeros(20), genetic_code = genetic_code) +#/Assumption + +""" + PiecewiseOUModel(event_rate::Float64, eq_std::Float64, mixing::Float64; delta_t = 1.0) + PiecewiseOUModel(offsets::Vector{Float64}) + PiecewiseOUModel(event_rate::Float64, eq_std::Float64, mixing::Float64, mu::Union{Float64,Vector{Float64}}, offsets::Union{Float64,Vector{Float64}}, codon_offsets::Union{Float64,Vector{Float64}}) + +A piecewise constant approximation to an OU process, intended to simulate fitnesses evolving over phylogenies. +The equilibrium standard deviation is directly parameterized (`eq_std`), as is the rate at which the process mixes to equilibrium (`mixing`). +`event_rate` controls how often the fitness changes occur, where the mixing rate is scaled to compensate for the increased rate of change to achieve +approximately the same amount of change per unit time even as the `event_rate` changes. A very high `event_rate` will behave more like continuous diffusion, +but will be more computationally expensive to sample from. `mu` can also be set to control the mean fitnesses. +The model also permits `offsets`, which are added to the fitnesses as they are passed into the model. For a single process, these are confounded with the mean `mu` +but if the offsets change (eg. from one branch to another) the effective fitnesses will immidiately change, whereas if `mu` changes the fitnesses will drift towards `mu`. +""" +mutable struct PiecewiseOUModel{M<:Union{Float64,AbstractVector{Float64}},O<:Union{Float64,AbstractVector{Float64}},C<:Union{Float64,AbstractVector{Float64}}} + mu::M #OU mean. Scalar (in which case it is the same for all AAs) or vector (one per AA) + delta_t::Float64 #Scales the size of the fitness jumps + var::Float64 #OU instantaneous diffusion variance + reversion_rate::Float64 #Rate of reversion to the mean + event_rate::Float64 #Rate of fitness jumps + offsets::O #These do not change, thus allowing preferred codons. Incorporated right at the end. + eq_std::Float64 #Standard deviation of the equilibrium distribution + codon_offsets::C #These do not change, thus allowing preferred codons. + function PiecewiseOUModel(event_rate::Float64, eq_std::Float64, mixing::Float64; delta_t = 1.0) + new{Float64,Float64,Float64}(0.0, delta_t, 2*(mixing/event_rate)*(eq_std^2), mixing/event_rate, event_rate, 0.0, eq_std, 0.0) + end + #Need to test this constructor: + function PiecewiseOUModel(event_rate::Float64, eq_std::Float64, mixing::Float64, mu::M, offsets::O, codon_offsets::C) where {M,O,C} + new{M,O,C}(mu, 1.0, 2*(mixing/event_rate)*(eq_std^2), mixing/event_rate, event_rate, offsets, eq_std, codon_offsets) + end + function PiecewiseOUModel(offsets::Vector{Float64}) #Constructor for static model, using just offsets for fitnesses + new{Float64,Vector{Float64}, Float64}(0.0, 1.0, 1e-15, 1.0, 0.0, offsets, 1e-15, 0.0) + end +end + +""" + jump(x_source, m::PiecewiseOUModel) + +Evolves values over time using a piecewise constant approximation to an OU process, where this function computes the new distribution for a single discrete jump. +`x_source` is the vector of fitnesses, and m is the PiecewiseOUModel. + +""" +function jump(x_source, x_rand, m::PiecewiseOUModel) + a = (m.reversion_rate * m.delta_t) + return x_rand .* (m.eq_std * sqrt(1 - exp(-2*a))) .+ m.mu .+ ((x_source .- m.mu) .* exp(-a)) +end +jump(x_source::Float64, m::PiecewiseOUModel) = jump(x_source, randn(), m) +jump(x_source::Vector{Float64}, m::PiecewiseOUModel) = jump(x_source, randn(length(x_source)), m) + +#= +m = PiecewiseOUModel(100.0, 10.0, 10.0) +CodonMolecularEvolution.jump(zeros(20), m) +=# + +""" + HB98AA_row(current_codon, alpha, nuc_matrix, AA_fitness; genetic_code=MolecularEvolution.universal_code) + +Returns the rate row for a codon model using the HB98 model where each AA has a different fitness. `current_codon` is the current codon, `alpha` is the synonymous rate, +`nuc_matrix` is the symmetric nucleotide substitution matrix, and `AA_fitness` is the fitness of each amino acid. +""" +function HB98_row(current_codon, alpha, nuc_matrix, fitness; genetic_code=MolecularEvolution.universal_code) + row = zeros(genetic_code.num_sense) + codon_aa_i = fitness[current_codon] + for j in 1:genetic_code.num_sense + if genetic_code.codon_to_nuc_map[current_codon,j] != (-1,-1,-1) + c2n_map = genetic_code.codon_to_nuc_map[current_codon,j] + f_ab = HB_fixation_rate(codon_aa_i,fitness[j]) + row[j] = alpha * nuc_matrix[c2n_map[2],c2n_map[3]] * f_ab + end + end + return row +end + +HB98AA_row(current_codon, alpha, nuc_matrix, AA_fitness, codon_fitness_offsets; genetic_code=MolecularEvolution.universal_code) = HB98_row(current_codon, alpha, nuc_matrix, AA_fitness[genetic_code.codon2AA_pos] .+ codon_fitness_offsets, genetic_code = genetic_code) + +#= Check the row matches the matrix: +fs = randn(20) +c = rand(1:61) +r1 = CodonMolecularEvolution.HB98AA_row(c, 1.23, CodonMolecularEvolution.demo_nucmat, fs, 0.0) +q1 = CodonMolecularEvolution.HB98AA_matrix(1.23, CodonMolecularEvolution.demo_nucmat, fs) +q1[c,c] = 0 +@assert isapprox(sum(q1[c,:] .- r1), 0.0) +=# + +""" + jumpy_HB_codon_evolve(fitnesses, codon, ou_model, nuc_matrix, alpha, time; + genetic_code = MolecularEvolution.universal_code, push_into = nothing) + +Evolves fitnesses and codons over time using the HB98 model. `fitnesses` is the vector of fitnesses, `codon` is the current codon, `ou_model` is the OU model, +`nuc_matrix` is the symmetric nucleotide substitution matrix, `alpha` is the synonymous rate, and `time` is the total time to evolve over. +""" +function jumpy_HB_codon_evolve(fitnesses, codon, scaled_fitness_model, nuc_matrix, alpha, time; + genetic_code = MolecularEvolution.universal_code, push_into = nothing) + codon_jumps = 0 + fitness_jumps = 0 + current_fits = copy(fitnesses) + current_codon = codon + t = 0.0 + next_event = 0.0 + while t+next_event < time + HBrow = HB98AA_row(current_codon, alpha, nuc_matrix, current_fits .+ scaled_fitness_model.offsets, scaled_fitness_model.codon_offsets, genetic_code=genetic_code) + sum_HBrow = sum(HBrow) + rOU,rHB = (scaled_fitness_model.event_rate,sum_HBrow) + total_rate = rOU+rHB + next_event = randexp()/total_rate + t = t+next_event + if t < time + event_index = sample(1:2,Weights([rOU,rHB])) + if event_index == 1 # Fitness jump event + fitness_jumps += 1 + current_fits = jump(current_fits, scaled_fitness_model) + else # Codon substitution event + codon_jumps += 1 + current_codon = sample(1:length(HBrow),Weights(HBrow)) + end + end + if !isnothing(push_into) + push!(push_into,(t,current_codon,copy(current_fits))) + end + end + return current_fits, current_codon, codon_jumps, fitness_jumps +end + + +""" + ShiftingHBSimModel(sites, alphas, ou_params, nuc_matrix; rescale = true) + +A model for simulating fitnesses evolving over phylogenies using the HB98 model. `sites` is the number of sites, `alphas` is a vector of synonymous rates (one per site), +`ou_params` is a vector of `PiecewiseOUModel`s (one per site), and `nuc_matrix` is the symmetric nucleotide substitution matrix (shared across sites). +If 'rescale' is true, then the nuc matrix is scaled so that, when `alpha=1` and the fitnesses`=0`, the HB model expects one substitution per site per unit time. +""" +mutable struct ShiftingHBSimModel <: MolecularEvolution.SimulationModel + sites::Int64 + alphas::Vector{Float64} + ou_params::Vector{PiecewiseOUModel} + nuc_matrix::Matrix{Float64} + code::MolecularEvolution.GeneticCode + function ShiftingHBSimModel(s,a,oup,n; rescale = true, code = MolecularEvolution.universal_code) + new(s,a,oup,rescale ? HB98AA_neutral_scale_nuc_matrix(n, genetic_code = code) : n, code) + end +end + +ShiftingHBSimModel(nuc_matrix::Matrix{Float64}, ou_params::Vector{PiecewiseOUModel{A,B,C}}; alpha = ones(length(ou_params)), rescale = true, code = MolecularEvolution.universal_code) where {A,B,C} = ShiftingHBSimModel(length(ou_params), alpha, ou_params, nuc_matrix, rescale = rescale, code = code) + + +""" + ShiftingHBSimPartition(model::ShiftingHBSimModel; burnin_time = 100.0, code = MolecularEvolution.universal_code) + +Constructs a partition that tracks evolving fitnesses and codons. Only useable for sampling (not likelihood calculations). +""" +mutable struct ShiftingHBSimPartition <: CodonSimulationPartition + sites::Int64 + fitnesses::Matrix{Float64} + codons::Vector{Int64} + code::MolecularEvolution.GeneticCode + function ShiftingHBSimPartition(sites; code = MolecularEvolution.universal_code) + new(sites,zeros(length(code.amino_acids),sites),ones(Int64,sites), code) + end + function ShiftingHBSimPartition(model::ShiftingHBSimModel; burnin_time = 100.0) + fits = zeros(length(model.code.amino_acids),length(model.ou_params)) + for (i,m) in enumerate(model.ou_params) + fits[:,i] .= randn(length(model.code.amino_acids)) .* m.eq_std .+ m.mu + end + #Starting codons ignore codon_offsets. Should be ok because burn-in, but should adjust. + codons = [sample(1:61, Weights(HB98AA_eqfreqs(fits[:,i] .+ model.ou_params[i].codon_offsets))) for i in 1:length(model.ou_params)] + for (i,m) in enumerate(model.ou_params) + f, c, _, _ = jumpy_HB_codon_evolve(fits[:,i], codons[i], m, model.nuc_matrix, model.alphas[i], burnin_time, genetic_code = model.code) + fits[:,i] .= f + codons[i] = c + end + new(length(model.ou_params), fits, codons, model.code) + end +end + + +function MolecularEvolution.forward!(dest::ShiftingHBSimPartition, + source::ShiftingHBSimPartition, + model::ShiftingHBSimModel, + node::FelNode) + for site in 1:model.sites + fitnesses = source.fitnesses[:,site] + codon = source.codons[site] + ou_model = model.ou_params[site] + alpha = model.alphas[site] + fitnesses,codon,_,_ = jumpy_HB_codon_evolve(fitnesses, + codon, + ou_model, + model.nuc_matrix, + alpha, + node.branchlength, + genetic_code = model.code) + dest.fitnesses[:,site] .= fitnesses + dest.codons[site] = codon + end +end + +""" + dNdS(q1, q0, p; code = MolecularEvolution.universal_code) + dNdS(q1, q0; code = MolecularEvolution.universal_code) + +Returns an analytic expectation of the dN/dS ratio for a codon model. `q1` is the rate matrix where selection is active (eg. a Halpern and Bruno model with a set of fitnesses), +and `q0` is the corresponding rate matrix where selection is inactive (eg. a Halpern and Bruno model with all fitnesses equal). +`p` is the frequency distribution over codons that the dN/dS ratio is computed against. If not provided, this is computed as the equilibrium from `q1`. +If only a vector of fitnesses are provided, then the `q1`, `q0`, and `p` are computed assuming a Halpern and Bruno model. +``` +fs = randn(20) +nucm = CodonMolecularEvolution.demo_nucmat +q1 = HB98AA_matrix(1.0, nucm, fs) +q0 = HB98AA_matrix(1.0, nucm, zeros(20)) +dNdS(q1, q0) +``` +""" +function dNdS(q1, q0, p::Vector{Float64}; code = d_code) + nsi = [CartesianIndex(i[1][1], i[1][2]) for i in code.nonsyn_positions] + si = [CartesianIndex(i[1][1], i[1][2]) for i in code.syn_positions] + dN_numerator = sum((q1 .* p)[nsi]) + dN_denominator = sum((q0 .* p)[nsi]) + dS_numerator = sum((q1 .* p)[si]) + dS_denominator = sum((q0 .* p)[si]) + dn = (dN_numerator/dN_denominator) + ds = (dS_numerator/dS_denominator) + return dn/ds +end + +dNdS(q1, q0; code = MolecularEvolution.universal_code) = dNdS(q1, q0, exp(q1 * 100)[1,:], code = code) + +""" + HBdNdS(fs::Vector{Float64}; code = MolecularEvolution.universal_code, nucm = CodonMolecularEvolution.demo_nucmat) + HBdNdS(fs_pre::Vector{Float64}, fs_post::Vector{Float64}; code = MolecularEvolution.universal_code, nucm = CodonMolecularEvolution.demo_nucmat) + +Returns the expected dN/dS ratio for a Halpern and Bruno model with a vector of fitnesses. If two vectors are provided, then the dN/dS ratio is computed for the shift from `fs_pre` to `fs_post`. +""" +function HBdNdS(fs::Vector{Float64}; code = d_code, nucm = CodonMolecularEvolution.demo_nucmat) + q1 = HB98AA_matrix(1.0, nucm, fs, genetic_code = code) + q0 = HB98AA_matrix(1.0, nucm, zeros(20), genetic_code = code) + return dNdS(q1, q0, HB98AA_eqfreqs(fs, genetic_code = code), code = code) +end + +HBdNdS(fs_pre::Vector{Float64}, fs_post::Vector{Float64}; code = d_code, nucm = CodonMolecularEvolution.demo_nucmat) = dNdS(HB98AA_matrix(1.0, nucm, fs_post, genetic_code = code), HB98AA_matrix(1.0, nucm, zeros(20), genetic_code = code), exp(HB98AA_matrix(1.0, nucm, fs_pre, genetic_code = code) * 100)[1,:], code = code) + +""" + std2maxdNdS(σ) + +Approximation for the maximum dN/dS ratio as a function of the standard deviation of the fitnesses, assuming Gaussian fitnesses and a Halpern and Bruno model, +where the fitnesses have just shifted from one Gaussian sample to another. Note: this is not an analytical solution, but a serindipitously good approximation. + +``` +function monte_carlo_maxdNdS(σ; N=100_000) + sum_val = 0.0 + for _ in 1:N + f_i = σ * randn() + f_j = σ * randn() + sum_val += HB_fixation_rate(f_i, f_j) + end + return sum_val / N +end +vs = 0:0.01:10 +plot(vs, monte_carlo_maxdNdS.(vs), label = "Monte Carlo", alpha = 0.8) +plot!(vs, std2maxdNdS.(vs), label = "Approx", linestyle = :dash, alpha = 0.8) +``` +""" +std2maxdNdS(σ) = sqrt(σ^2 + π) / sqrt(π) + +""" + maxdNdS2std(ω) + +Inverse of std2maxdNdS(σ). Estimates the standard deviation of the fitnesses that will produce, in expectation, a dN/dS ratio of `ω`, assuming Gaussian fitnesses and a Halpern and Bruno model, +where the fitnesses have just shifted from one Gaussian sample to another. Note: this is not an analytical solution, but a serindipitously good approximation. +""" +maxdNdS2std(ω) = sqrt(π * (ω^2 - 1)) + +""" + time_varying_HB_freqs(ts, T, fst, init_freqs; nucm = CodonMolecularEvolution.demo_nucmat, alpha = 1.0, delta_t = 0.002, prezero_delta_t = 0.5) + +Compute the time-varying codon frequencies and expected dN/dS over time for a sequence of fitnesses, under the Halpern-Bruno model. +`ts` is a vector of times, `T` is the total time, `fst` is a vector of vector of fitnesses, `init_freqs` is the initial codon frequencies, +`nucm` is the nucleotide substitution matrix, `alpha` is the alpha parameter, `delta_t` is the discretization time step for the simulation, +and `prezero_delta_t` is the time step used before `t=0`. `fst[i]` is assumed to be the fitness between `t = ts[i]` and `t = ts[i+1]`. +""" +function time_varying_HB_freqs(ts, T, fst, init_freqs; nucm = CodonMolecularEvolution.demo_nucmat, alpha = 1.0, delta_t = 0.002, prezero_delta_t = 0.5) + cod_freq_collection = [] + t_collection = [] + dnds_collection = [] + ind = 1 + ts = [ts; Inf] + t = ts[ind] + codon_freqs = copy(init_freqs) + while (t < T) && (ind + 1 <= length(ts)) + if t < 0-prezero_delta_t + dt = min(prezero_delta_t, ts[ind + 1] - t) + else + dt = min(delta_t, ts[ind + 1] - t) + end + Q = HB98AA_matrix(alpha, nucm, fst[ind]) + P = exp(Q * dt) + codon_freqs = (codon_freqs' * P)' + push!(cod_freq_collection, copy(codon_freqs)) + push!(dnds_collection, dNdS(Q, HB98AA_matrix(alpha, nucm, zeros(20)), codon_freqs)) + if t + dt >= ts[ind + 1] + ind += 1 + end + t += dt + push!(t_collection, t) + end + return cod_freq_collection, t_collection, dnds_collection +end + + +function piecewise_linear_plot!(t_vec, fs_vec, tmax; shift = NaN, colors = nothing, kw...) + fs = zeros(length(fs_vec[1]), length(fs_vec)*3) + ts = zeros(length(fs_vec)*3) + push!(t_vec, tmax) + for i in 1:length(fs_vec) + fs[:, 3i-2:3i-1] .= fs_vec[i] + fs[:, 3i] .= fs_vec[i] .+ shift + ts[3i-2:3i] .= [t_vec[i], t_vec[i+1], t_vec[i+1]] + end + for i in 1:size(fs, 1) + plot!(ts, fs[i,:], label = :none, color = colors[i]; kw...) + end +end + +function add_muller!(pos_mean_matrix; x_ax = 1:size(pos_mean_matrix)[2], plot_theme = nothing, plot_perm = 1:size(pos_mean_matrix)[1], + edge_pad = 6, fillalpha = 0.8, colormap = 1:size(pos_mean_matrix)[1]) + pmm = pos_mean_matrix[plot_perm,:] + cum_freq = zeros(size(pmm)[2]) + for i in 1:size(pmm)[1] + plot!(x_ax,1 .- cum_freq, fillrange = 1 .- (cum_freq .+ pmm[i,:]), fillalpha = fillalpha, linewidth = 0, color = plot_theme[colormap[plot_perm[i]]], label = :none) + cum_freq .+= pmm[i,:] + end +end + +""" + HBviz(ts, fst, T, alp, nucm) + +Visualize over time the fitness trajectory, the codon frequencies, and the expected dN/dS. `ts` is a vector of times, `fst` is a vector of fitnesses, `T` is the total time, `alp` is the alpha parameter, and `nucm` is the nucleotide substitution matrix. + +```julia +σ = 2.0 +alpha = 1.0 +nucm = CodonMolecularEvolution.demo_nucmat +fst = [randn(20) .* σ, randn(20) .* σ] +ts = [-100.0, 1.0] +T = 2.0 +HBviz(ts, fst, T, alpha, nucm) +``` +""" +function HBviz(ts::Vector{Float64}, fst::Vector{Vector{Float64}}, T::Float64, alp, nucm; + scram = [8, 4, 11, 17, 9, 14, 18, 2, 10, 13, 16, 20, 12, 6, 19, 7, 1, 15, 5, 3], + viz_size = (1200,500), σ = nothing) + cfc, tc, dndses = time_varying_HB_freqs(ts, T, fst, ones(61)/61, alpha = alp) + #Removing burnin: + prezero = findlast(tc .<= 0) + prezero = prezero == nothing ? 1 : prezero + tc = tc[prezero:end] + cfc = cfc[prezero:end] + dndses = dndses[prezero:end] + fl = findlast(ts .<= 0) + fl = fl == nothing ? 1 : fl + ts = ts[fl:end] + fst = fst[fl:end] + ### + perm = sortperm(MolecularEvolution.universal_code.amino_acid_lookup) + AA_nums = MolecularEvolution.universal_code.codon2AA_pos[perm] + colorv = zeros(Int, 61) + c = 1 + for i in 1:20 + s = findall(AA_nums .== i) + colorv[c:c+length(s)-1] .= 10*(scram[i]-1) .+ collect(1:length(s)) .* 2 .- 1 + c += length(s) + end + plot_theme_exploded = cgrad(:darkrainbow, 200, categorical = true) + AA_plot_theme = plot_theme_exploded[1 .+ (10 .* (scram .- 1))] + plot_theme = plot_theme_exploded[colorv] + pl1 = plot(tc, zeros(length(tc)), size = viz_size, xlim = (0, T), linestyle = :dash, color = "black", alpha = 0.0, + ylabel = L"f_t", xtickfontcolor = RGBA(0,0,0,0), legend=false) + extr = maximum([maximum(abs.(f)) for f in fst]) * 1.1 + piecewise_linear_plot!(ts, fst, T, colors = AA_plot_theme, ylims = (-extr, extr)) + pl2 = plot(tc, zeros(length(tc)), size = viz_size, alpha = 0.0, xlim = (0, T), ylabel = L"C_t", xtickfontcolor = RGBA(0,0,0,0), legend=false, top_margin = -12Plots.mm) + add_muller!(stack(cfc)[perm,:], plot_theme = plot_theme, x_ax = tc) + pl3 = plot(tc, dndses, size = viz_size, xlim = (0, T), xlabel = "Time", ylabel = L"E_C_t[dN/dS|f_t]", ylim = (0, maximum(dndses[tc .>= 0]) + 0.1), + label = :none, top_margin = -12Plots.mm, color = "black") + plot!([0,T], [1,1], color = "black", linestyle = :dash, alpha = 0.5, label = :none) + if !isnothing(σ) + maxdnds = std2maxdNdS(σ) + plot!([0,T], [maxdnds,maxdnds], color = "red", linestyle = :dash, alpha = 0.5, label = :none) + end + return plot(pl1, pl2, pl3, layout=(3, 1), link=:x, margins = 8Plots.mm, plot_layout = :tight, widen=false, tickdirection=:out) +end + +""" + shiftingHBviz(T, event_rate, σ, mixing_rate, alpha, nucm; T0 = -20) + +Visualize the fitness trajectory, codon frequencies, and expected dN/dS over time for a shifting HB process. +`T` is the total time, `event_rate` is the rate of fitness shifts, `σ` is the standard deviation of the fitnesses, +`mixing_rate` is the rate of mixing between fitnesses, `alpha` is the alpha parameter, and `nucm` is the nucleotide substitution matrix. +`T0` controls the burnin time, to ensure the process is at equilibrium at `t=0`. + +```julia +T = 2.0 +mix = 1.0 +σ = 5.0 +event_rate = 100.0 +alpha = 1.0 +nucm = CodonMolecularEvolution.demo_nucmat +shiftingHBviz(T, event_rate, σ, mix, alpha, nucm) +``` +""" +function shiftingHBviz(T, event_rate, σ, mixing_rate, alpha, nucm; T0 = -20) + fs = randn(20) .* σ + m = PiecewiseOUModel(event_rate, σ, mixing_rate) + coll = [] + codon = sample(1:61, Weights(HB98AA_eqfreqs(fs))) + newfs, _, _, jumps = jumpy_HB_codon_evolve(fs, codon, m, nucm, alpha, T-T0, push_into = coll) + prepend!(coll, [(0.0, codon, fs)]) + ts = [c[1] for c in coll] .+ T0 + fst = [c[3] for c in coll] + return HBviz(ts, fst, T, alpha, nucm, σ = σ) +end + + + +############################################################################### +#A model for evolving log effective pop size & unscaled site fitnesses +############################################################################### + +# jumpy_NeHB_codon_evolve is currently fine for a single site. However, to get this version working over a +#branch for multiple sites, we need to: +# 1) First sample the Ne jumps over the branch (since these apply for all sites) +# 2) One site at a time, sample the fitness jumps and codon substitutions between each Ne jump event +# ShiftingNeHBSimModel and ShiftingNeHBSimPartition are commented out until this works. +""" + jumpy_NeHB_codon_evolve(fitnesses, logNe, codon, fitness_model, logNe_model, nuc_matrix, alpha, time; + genetic_code = MolecularEvolution.universal_code, push_into = nothing) + +Evolves codons, unscaled site-fitness, and log-pop-size together. +""" +function jumpy_NeHB_codon_evolve(fitnesses, logNe, codon, fitness_model, logNe_model, nuc_matrix, alpha, time; + genetic_code = MolecularEvolution.universal_code, push_into = nothing) + codon_jumps = 0 + fitness_jumps = 0 + logNe_jumps = 0 + current_fits = copy(fitnesses) + current_codon = codon + current_logNe = logNe + t = 0.0 + next_event = 0.0 + while t+next_event < time + HBrow = HB98AA_row(current_codon, alpha, nuc_matrix, (current_fits .+ fitness_model.offsets) .* exp(current_logNe) .* 2, fitness_model.codon_offsets .* exp(current_logNe) .* 2, genetic_code=genetic_code) + sum_HBrow = sum(HBrow) + rOU, rNe, rHB = (fitness_model.event_rate, logNe_model.event_rate, sum_HBrow) + total_rate = rOU+rHB+rNe + next_event = randexp()/total_rate + t = t+next_event + if t < time + event_index = sample(1:3,Weights([rOU,rNe,rHB])) + if event_index == 1 # Fitness jump event + fitness_jumps += 1 + current_fits = jump(current_fits, fitness_model) + elseif event_index == 2 # pop-size jump event + logNe_jumps += 1 + current_logNe = jump(current_logNe, logNe_model) + else # Codon substitution event + codon_jumps += 1 + current_codon = sample(1:length(HBrow),Weights(HBrow)) + end + end + if !isnothing(push_into) + push!(push_into,(t,current_codon,copy(current_fits),current_logNe)) + end + end + return current_logNe, current_fits, current_codon, codon_jumps, fitness_jumps, logNe_jumps +end + +""" + shiftingNeHBviz(T, f_event_rate, f_σ, f_mixing_rate, logNe_event_rate, logNe_σ, logNe_mean, logNe_mixing_rate, alpha, nucm; T0 = -20) + +Visualize the fitness trajectory, codon frequencies, and expected dN/dS over time for a shifting Ne HB process. +""" +function shiftingNeHBviz(T, f_event_rate, f_σ, f_mixing_rate, logNe_event_rate, logNe_σ, logNe_mean, logNe_mixing_rate, alpha, nucm; T0 = -20) + fs = randn(20) .* f_σ + logNe = randn() .* logNe_σ .+ logNe_mean + f_ou = PiecewiseOUModel(f_event_rate, f_σ, f_mixing_rate) + log_ne_ou = PiecewiseOUModel(logNe_event_rate, logNe_σ, logNe_mixing_rate, logNe_mean, 0.0, 0.0) + coll = [] + codon = sample(1:61, Weights(HB98AA_eqfreqs(fs .* exp(logNe) .* 2))) + CodonMolecularEvolution.jumpy_NeHB_codon_evolve(fs, logNe, codon, f_ou, log_ne_ou, nucm, alpha, T-T0, push_into = coll) + prepend!(coll, [(0.0, codon, fs, logNe)]) + ts = [c[1] for c in coll] .+ T0 + fst = [c[3] .* exp(c[4]) .* 2 for c in coll] #2Ne*s + return HBviz(ts, fst, T, alpha, nucm) +end + +#= + +mutable struct ShiftingNeHBSimModel <: MolecularEvolution.SimulationModel + sites::Int64 + alphas::Vector{Float64} # per-site synonymous rates + unscaled_ou_params::Vector{PiecewiseOUModel} # OU process for each site's unscaled fitness + logNe_model::PiecewiseOUModel # OU process for log(pop size) + nuc_matrix::Matrix{Float64} + code::MolecularEvolution.GeneticCode +end + +""" + ShiftingNeHBSimModel(nuc_matrix, unscaled_ou_params, logNe_model; alpha, rescale, code) + +Create a model with one OU process per site (unscaled_ou_params) and one OU process for log(N_e). +`alpha` can be a scalar or a vector of the same length as unscaled_ou_params. +If `rescale` is true, the nuc_matrix is scaled (HB98 neutral scaling). +""" +function ShiftingNeHBSimModel( + nuc_matrix::Matrix{Float64}, + unscaled_ou_params::Vector{PiecewiseOUModel{A,B,C}}, + logNe_model::PiecewiseOUModel; + alpha = ones(length(unscaled_ou_params)), + rescale::Bool = true, + code::MolecularEvolution.GeneticCode = MolecularEvolution.universal_code) where {A,B,C} + sites = length(unscaled_ou_params) + @assert length(alpha) == sites || length(alpha) == 1 "alpha must match number of sites or be a single value" + + # Copy or scale the nuc matrix if requested + mat = rescale ? HB98AA_neutral_scale_nuc_matrix(copy(nuc_matrix), genetic_code = code) : copy(nuc_matrix) + + return ShiftingNeHBSimModel( + sites, + length(alpha) == 1 ? fill(alpha[1], sites) : alpha, + unscaled_ou_params, + logNe_model, + mat, + code + ) +end + +mutable struct ShiftingNeHBSimPartition <: CodonSimulationPartition + sites::Int64 + logNe::Float64 + fitnesses::Matrix{Float64} # size: (number_of_AAs, sites) + codons::Vector{Int64} + code::MolecularEvolution.GeneticCode +end + +""" + ShiftingNeHBSimPartition(model; burnin_time) + +Constructs a partition for the `ShiftingNeHBSimModel`. +Initializes by: + - Drawing random unscaled fitnesses from each site's OU equilibrium. + - Drawing random logNe from its OU equilibrium. + - "Burning in" each site along the branch of length `burnin_time`. +""" +function ShiftingNeHBSimPartition( + model::ShiftingNeHBSimModel; + burnin_time::Float64 = 100.0 +) + # 1) sample an initial logNe from equilibrium + # The equilibrium of an OU in this piecewise-constant approximation is roughly Normal(mean=mu, sd=eq_std). + lp = randn() * model.logNe_model.eq_std + model.logNe_model.mu + + # 2) sample initial unscaled fitness for each site + nAAs = length(model.code.amino_acids) + fits = zeros(nAAs, model.sites) + for s in 1:model.sites + oup = model.unscaled_ou_params[s] + fits[:,s] .= randn(nAAs) .* oup.eq_std .+ oup.mu + end + + # 3) pick initial codons (arbitrary, say the first sense codon) + c_init = [rand(1:length(model.code.sense_codons)) for i in 1:model.sites] + + # 4) "burn in" by evolving over burnin_time + #jumpy_NeHB_codon_evolve(fitnesses, logNe, codon, fitness_model, logNe_model, nuc_matrix, alpha, time + new_logNe, new_fits, new_codons, _, _, _ = jumpy_NeHB_codon_evolve( + fits, lp, c_init, model.unscaled_ou_params, model.logNe_model, model.nuc_matrix, model.alphas, burnin_time, genetic_code = model.code) + + return ShiftingNeHBSimPartition( + model.sites, + new_logNe, + new_fits, + new_codons, + model.code + ) +end + +# Minimal constructor for placeholders (if one needs a zero-time partition, for instance) +function ShiftingNeHBSimPartition( + sites::Int; + code::MolecularEvolution.GeneticCode = MolecularEvolution.universal_code +) + nAAs = length(code.amino_acids) + return ShiftingNeHBSimPartition( + sites, + 0.0, + zeros(nAAs, sites), + ones(Int, sites), + code + ) +end + +""" + forward!(dest, source, model, node) + +Evolves `source` partition along `node.branchlength` under `model`, storing the result in `dest`. +""" +function MolecularEvolution.forward!( + dest::ShiftingNeHBSimPartition, + source::ShiftingNeHBSimPartition, + model::ShiftingNeHBSimModel, + node::FelNode +) + bl = node.branchlength + new_lp, new_fits, new_codons, _, _, _ = jumpy_NeHB_codon_evolve( + copy(source.fitnesses), + source.logNe, + copy(source.codons), + model.unscaled_ou_params, + model.logNe_model, + model.nuc_matrix, + model.alphas, + bl; + genetic_code = model.code + ) + + dest.logNe = new_lp + dest.fitnesses .= new_fits + dest.codons .= new_codons +end +=#