Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
jasnap committed Oct 2, 2017
0 parents commit 1541aea
Show file tree
Hide file tree
Showing 29 changed files with 1,022 additions and 0 deletions.
44 changes: 44 additions & 0 deletions BrB.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
%----------------- BrB --------------------
% This function implements Branch and bound optimization of Maximum Parsimony
%
% Input:
% set_of_seq - Set of data with important information about sequences
% Output:
% optimal_score - Optimal score calculated
% optimal_model - Model with the optimal score
%

function [optimal_score, optimal_model] = BrB(set_of_seq, name_matrix)

[row, col] = size(set_of_seq);
last_id = last_tree(row);
best_score = Inf;
k = 1;
partID = partial_treeID(row);
[mrow, mcol] = size(partID);
for i = 1:mrow
%score first partial tree
first_id = partID(i, :);
[first_model,first_score] = FitchScoring(first_id, set_of_seq);
prev= first_id;
if(first_score < best_score)
%score all derivative trees
for j = 1:last_id(end)
new_id = gen_complete_tree(prev, last_id);
prev = new_id;
[new_model,new_score] = FitchScoring(new_id, set_of_seq);
if(new_score <= best_score)
best_score = new_score;
best_model = new_model;
model = treeModelGen(new_id);
best_id = new_id;
k = k + 1;
end
end
end
end
optimal_score = best_score;
optimal_model = best_model;
tree_plot(optimal_model, model, optimal_score, name_matrix);
end

33 changes: 33 additions & 0 deletions ExhaustiveSearch.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
%------------ ExhaustiveSearch -----------
% This function for searching trees with Exhaustive search method
%
% Input:
% set_of_seq - Matrix that contains all input sequences
% Output:
% optimal_score - Best score of all trees
% optimal_model - Tree model for the best score

function [optimal_score, optimal_model] = ExhaustiveSearch(set_of_seq, name_matrix)
[row, col] = size(set_of_seq);
id = treeID(row);
best_score = Inf;
k = 1;
[id_row, id_col] = size(id);
for i = 1:id_row
type = tree_type(id(i,:));
if type == 2
[out_model, out_score] = FitchScoring(id(i, :), set_of_seq);
if out_score <= best_score
best_score = out_score;
best_model = out_model;
model = treeModelGen(id(i, :));
end
end
end
optimal_model = best_model;
optimal_score = best_score;
tree_plot(optimal_model, model, optimal_score, name_matrix);
end



55 changes: 55 additions & 0 deletions FitchScoring.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
% -------------- FitchScoring------------
% This function calculates score of a tree using Fitch scoring algorithm
%
% Input:
% id - Id of the tree it is scoring
% set_of_seq - Matrix of input sequences
% Output:
% out_model - Model of the output tree
% out_score - Calculated score

function [out_model, out_score] = FitchScoring(id, set_of_seq)
model = treeModelGen(id);
out_model = model;
out_score = 0;
new_node = model(end);
k = 1;
flag = 1;
for i = 1:length(model)
if model(i) < 0
int_nodes1(1, k) = model(1,i);
k = k+1;
end
end
int_nodes = sort(int_nodes1);
while flag == 1
i = 1;
while i<= length(int_nodes)
temp = out_model;
[ch1, ch2] = children(temp, int_nodes(i));

if(ch1 > 0 && ch2 >0)
[score, new_seq] = Merge(set_of_seq(ch1, :),set_of_seq(ch2, :));
new_node = new_node + 1;
temp(temp == int_nodes(i)) = new_node;
set_of_seq(new_node, :) = new_seq;
out_score = out_score + score;
int_nodes(i) = [];

end
out_model = temp;
i = i +1;
end
if(isempty(int_nodes))
flag = 0;
end
end
[score, new_seq] = Merge(set_of_seq(1, :),set_of_seq(new_node - 1, :));
new_node = new_node + 1;
set_of_seq(new_node, :) = new_seq;
out_score = out_score + score;
out_model(1) = new_node;
end



26 changes: 26 additions & 0 deletions Merge.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
%-------------- Merge -------------
% This function merges child nodes with parent nodes, and returns the parent
% node and the calculated score
%
% Input:
% seq1 - First child node sequence
% seq2 - Second child node sequence
% Output:
% score - Score of the merging
% out_seq - Merged parent sequence

function [score, out_seq] = Merge (seq1, seq2)

score = 0;
for i = 1:length(seq1)

if (bitand(seq1(i), seq2(i)) ~= 0)
out(i) = bitand(seq1(i), seq2(i));
else
out(i) = bitor(seq1(i), seq2(i));
score = score + 1;
end

end
out_seq = out;
end
65 changes: 65 additions & 0 deletions P_BrB.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
%---------------- P_BrB ------------------------
% This function implements parallel Branch and bound on CPU
%
% Input:
% set_of_seq - Set of sequences for calculating trees
% Output:
% optimal_score - Calculated optimal score
% optimal_model - Model with the calculated optimal score
%

function [optimal_score, optimal_model] = P_BrB(set_of_seq, name_matrix)
[row, col] = size(set_of_seq)
last_id = last_tree(row);
best_score = Inf;

partID = partial_treeID(row);
[mrow, mcol] = size(partID);

[row col] = size(partID);
tic
a = distributed(rot90(partID));
spmd
p = rot90(getLocalPart(a), -1);
end

spmd
[mrow, mcol] = size(p);
for i = 1:mrow
%score first partial tree
first_id = p(i, :);
[first_model,first_score] = FitchScoring(first_id, set_of_seq);
prev= first_id;
if(first_score < best_score)
%score all derivative trees
for j = 1:last_id(end)
new_id = gen_complete_tree(prev, last_id);
prev = new_id;
[new_model,new_score] = FitchScoring(new_id, set_of_seq);
if(new_score <= best_score)
best_score = new_score;
best_model = new_model;
model = treeModelGen(new_id);
test_id = new_id;
end
end
end
end
end
toc
optimal_model = gather(best_model);
optimal_score = gather(best_score);
temp_umodel = gather(model);
[row, col] = size(optimal_score);
optimal_id = gather(test_id);
temp_score = Inf;
for i = 1:col
if(optimal_score{i} <= temp_score)
temp_score = optimal_score{i};
temp_model = optimal_model{i};
umodel = temp_umodel{i};
tree_plot(temp_model, umodel, temp_score, name_matrix);
end
end

end
18 changes: 18 additions & 0 deletions children.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
%---------------- children ----------
% Function for finding all children nodes
%
% Input:
% model - Tree model
% node - Parent node for which it's finding
% Output:
% child1 - First child node
% child2 - Second child node

function [child1, child2] = children(model,node)

offset = 2*abs(node) + 1;
child1 = model(offset);
child2 = model(offset + 1);

end

40 changes: 40 additions & 0 deletions factd.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
function [f] = factd(n)
%FACTD Double Factorial function = n!!
%
%usage: f = factd(n)
%
%tested under version 5.3.1
%
% This function computes the double factorial of N.
% N may be complex and any size. Uses the included
% complex Gamma routine.
%
% f = n*(n-2)*(n-4)*...*5*3*1 for n odd
% f = n*(n-2)*(n-4)*...*6*4*2 for n even
%
%see also: Gamma, Fact

%Paul Godfrey
%pgodfrey@conexant.com
%8-29-00

[siz]=size(n);
n=n(:);

p=cos(pi*n)-1;

f=2.^((-p+n+n)/4).*pi.^(p/4).*gamma(1+n/2);

p=find(round(n)==n & imag(n)==0 & real(n)>=-1);
if ~isempty(p)
f(p)=round(f(p));
end

p=find(round(n/2)==(n/2) & imag(n)==0 & real(n)<-1);
if ~isempty(p)
f(p)=Inf;
end

f=reshape(f,siz);

return
34 changes: 34 additions & 0 deletions fasta_rd.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
%------------- fasta_rd --------------
% This function preprocesses input fasta sequences
%
% Input:
% f - Read fasta file
% Output:
% data - Data readable for the BrB function
%
function data = fasta_rd(f)

SeqsMultiAligned = multialign(f);

[row, col] = size(SeqsMultiAligned);
for i = 1:row
temp = SeqsMultiAligned(i).Sequence;
matrix{i, :} = temp;
end
for i = 1:row
data(i, :) = matrix{i};
end
[row, col] = size(data);
for i = 1:row
for j = 1:col
if(~isequal(data(i, j), 'A')...
&& ~isequal(data(i, j), 'C')...
&& ~isequal(data(i, j), 'T')...
&& ~isequal(data(i, j), 'G'))
data(i, j) = '-';
end
end
end
end


24 changes: 24 additions & 0 deletions gen_complete_tree.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
%------------ gen_complete_tree(previous, last)----------------------
%
% This function generates a complete tree based on inputs previous and
% last
%
% Input:
% previous - Previous complete tree that was scored
% last - Last possible complete tree that can be derived from given
% partial tree
% Output:
% t - Calculated complete tree

function t = gen_complete_tree(previous, last)
next_tree = previous;
if(next_tree(end) ~= last(end))
next_tree(end) = previous(end) + 1;
t = next_tree;
else
disp('t is zero')
t = 0;
end

end

21 changes: 21 additions & 0 deletions get_row_count.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
%------- get_row_count --------
% This function calculates the number of all possible trees for a set of
% sequences
%
% Input:
% n - Number of input sequences
% Output:
% number - Number of possible trees

function number = get_row_count(n)
if(n > 2)
number = 1 + factd(2*(n-1)-5) + factd(2*n-5);
else
number = 0;
end
end





17 changes: 17 additions & 0 deletions get_row_odd.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
%----------- get_row_odd-------------
% Function that calculates odd rows
%
% Input:
% n - Number of input sequences
% Output:
% odd - Number of odd sequences


function odd = get_row_odd(n)
n = n - 3;
odd = 1;
for i = 1:n
odd = odd + 2;
end
end

Loading

0 comments on commit 1541aea

Please sign in to comment.