Skip to content

Commit

Permalink
Merge branches 'rescore' and 'densityRec' of https://github.com/ding-…
Browse files Browse the repository at this point in the history
  • Loading branch information
AdamDS committed Sep 30, 2017
2 parents a65f29a + 6bf3a23 commit d23084d
Show file tree
Hide file tree
Showing 7 changed files with 335 additions and 86 deletions.
Binary file renamed HotSpot3D-1.8.0.2.tar.gz → HotSpot3D-1.8.0.tar.gz
Binary file not shown.
4 changes: 2 additions & 2 deletions bin/hotspot3d
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
#----------------------------------
# $Authors: Beifang Niu & Adam D Scott
# $Date: 2013-08-08 13:22:08 -0500 (Thu Aug 8 13:22:08 CDT 2013) $
# $Revision: 1.8.0.2 $
# $Revision: 1.8.0 $
# $URL: $
#----------------------------------
use strict;
use warnings;

our $VERSION = 'V1.8.0.2';
our $VERSION = 'V1.8.0';

use Carp;
use FileHandle;
Expand Down
2 changes: 1 addition & 1 deletion dist.ini
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = HotSpot3D
author = Beifang Niu, John Wallis, Adam D Scott, Sohini Sengupta, Amila Weerasinghe, & Matthew H Bailey from McDonnell Genome Institute of Washington University at St. Louis
version = 1.8.0.2
version = 1.8.0
license = Perl_5
copyright_holder = McDonnell Genome Institute at Washington University
copyright_year = 2017
Expand Down
99 changes: 80 additions & 19 deletions lib/TGI/Mutpro/Main/Cluster.pm
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,12 @@ sub new {
$this->{'MinPts'} = undef;
$this->{'number_of_runs'} = undef;
$this->{'probability_cut_off'} = undef;

$this->{'JSON_status'} = undef;
$this->{'mutations_json_hash'} = undef;
$this->{'distance_matrix_json_hash'} = undef;
$this->{'siteVertexMap_json_hash'} = undef;

bless $this, $class;
$this->process();
return $this;
Expand Down Expand Up @@ -252,6 +258,10 @@ sub setOptions {
'minPts=f' => \$this->{'MinPts'},
'number-of-runs=f' => \$this->{'number_of_runs'},
'probability-cut-off=f' => \$this->{'probability_cut_off'},
'use-JSON' => \$this->{'JSON_status'},
'mutations-hash-json-file=s' => \$this->{'mutations_json_hash'},
'distance-matrix-json-file=s' => \$this->{'distance_matrix_json_hash'},
'siteVertexMap-json-file=s' => \$this->{'siteVertexMap_json_hash'},

'help' => \$help,
);
Expand Down Expand Up @@ -348,10 +358,22 @@ sub setOptions {
} else {
warn "HotSpot3D::Cluster::setOptions warning: no pairwise-file included (cannot produce mutation-mutation clusters)!\n";
}
if ( defined $this->{'JSON_status'} ) {
$this->{'JSON_status'} = 1;
warn "HotSpot3D::Cluster::setOptions warning: use-JSON flag used (will not look for pairwise data or maf file)\n";
if ( not defined $this->{'mutations_json_hash'} or not defined $this->{'distance_matrix_json_hash'} or not defined $this->{'siteVertexMap_json_hash'} ) {
die "HotSpot3D::Cluster::setOptions Error: use-JSON flag is used, but json file locations are not provided!\n";
}
elsif ( not -e $this->{'mutations_json_hash'} or not -e $this->{'mutations_json_hash'} or not -e $this->{'siteVertexMap_json_hash'} ) {
die "HotSpot3D::Cluster::setOptions Error: use-JSON flag is used, but the provided JSON files do not exist!\n";
}
}
else { $this->{'JSON_status'} = 0; }

if ( not defined $this->pairwiseFile() and
not defined $this->sitePairsFile() and
not defined $this->musitePairsFile() and
not defined $this->drugsCleanFile() ) {
not defined $this->drugsCleanFile() and not $this->{'JSON_status'} ) {
warn "HotSpot3D::Cluster::setOptions error: no pair file provided. Need at least one of *.pairwise, *.clean, *.sites, *.musites.\n";
die $this->help_text();
}
Expand Down Expand Up @@ -527,8 +549,8 @@ sub launchClustering {

sub vertexFilter {
#$this->vertexFilter( $temp_mutations , $temp_distance_matrix , $mutations , $distance_matrix );
my ( $this , $temp_mutations , $temp_distance_matrix , $mutations , $distance_matrix ) = @_;
if ( $this->{'vertex_type'} eq $SITE ) {
my ( $this , $temp_mutations , $temp_distance_matrix , $mutations , $distance_matrix, $siteVertexMap ) = @_;
if ( $this->{'vertex_type'} eq $SITE or $this->{'clustering'} eq $DENSITY ) {
print STDOUT "Filtering vertices\n";
#TODO if using a different .maf from search step, then some mutations can be missed
my $vertexMap = {}; #a hash to map isSameProteinPosition vertices (and others to their selves)-- map=f()
Expand All @@ -539,13 +561,15 @@ sub vertexFilter {
next if not exists $temp_mutations->{$mutationKey1};
foreach my $mutationKey2 ( @mKeys ) {
next if not exists $temp_mutations->{$mutationKey2};
if ( $mutationKey1 eq $mutationKey2 ) {
if ( $mutationKey1 eq $mutationKey2 ) { # this if condition is important to capture mk2=mk1 cases to $siteVertexMap hash
$vertexMap->{$mutationKey2} = $mutationKey1;
$siteVertexMap->{$mutationKey1}->{$mutationKey2} = $temp_mutations->{$mutationKey2};
# print "ACSW::VertexFilter::Equal $mutationKey2 \=\=\> $mutationKey1\n";
next;
}
elsif ( $this->isSameProteinPosition( $temp_mutations , $mutationKey1 , $mutationKey2 ) ) { #if same site
$vertexMap->{$mutationKey2} = $mutationKey1;
$siteVertexMap->{$mutationKey1}->{$mutationKey2} = $temp_mutations->{$mutationKey2};
print "ACSW::VertexFilter::SameSite $mutationKey2 \=\=\> $mutationKey1\n";
delete $temp_mutations->{$mutationKey2};
}
Expand All @@ -555,8 +579,14 @@ sub vertexFilter {

#generate representative annotations
foreach my $mutationKey ( keys %{$temp_mutations} ) {
my ( $ra , $pk ) = $this->getARepresentativeAnnotation( $temp_mutations , $mutationKey );
$mutations->{$mutationKey}->{$ra}->{$pk} = 1;
my ( $ra , $pk, $highestRecurrence, $totalRecurrence ) = $this->getARepresentativeAnnotation( $temp_mutations , $mutationKey, $siteVertexMap );
if ( $this->{'vertex_type'} eq $SITE ) {
$mutations->{$mutationKey}->{$ra}->{$pk} = 1; # weight = 1 for SITE
}
else {
$mutations->{$mutationKey}->{$ra}->{$pk} = $totalRecurrence; # weight = total recurrence/weight for RECURRENCE/WEIGHT
}

}
print "ACSW::VertexFilter::mutations representative annotation done\n";

Expand All @@ -580,30 +610,56 @@ sub vertexFilter {
}
}
}
# print "vertex_map\n";
# print Dumper $vertexMap;
} else {
%{$mutations} = %{$temp_mutations};
%{$distance_matrix} = %{$temp_distance_matrix};
}
$temp_mutations = undef;
$temp_distance_matrix = undef;
# print "distance_matrix\n";
# print Dumper $distance_matrix;

return;
}

sub getARepresentativeAnnotation {
my ( $this , $mutations , $mutationKey ) = @_;
my $ra = ".:.";
my $pk = ".:p.";
foreach my $refAlt ( keys %{$mutations->{$mutationKey}} ) {
$ra = $refAlt;
foreach my $proteinKey ( keys %{$mutations->{$mutationKey}->{$refAlt}} ) {
$pk = $proteinKey;
last;
sub getARepresentativeAnnotation { # choose a representative out of all the mutations detected as same protein position
my ( $this , $mutations , $mutationKey, $siteVertexMap ) = @_;
# my $ra = ".:.";
# my $pk = ".:p.";
my ($ra, $pk, $highestRecurrence ) = undef;
my $totalRecurrence = 0;

foreach my $mk ( keys %{$siteVertexMap->{$mutationKey}} ) {
foreach my $refAlt ( keys %{$siteVertexMap->{$mutationKey}->{$mk}} ) {
my $lastPK; # to store one proteinKey from one ref:alt
foreach my $proteinKey ( keys %{$siteVertexMap->{$mutationKey}->{$mk}->{$refAlt}} ) {
$lastPK = $proteinKey;
if ( $mk eq $mutationKey ) { # to make sure our selected refAlt and pKey are members of the mutationKey used in downstream
if ( not defined $highestRecurrence ) { # assigning to the first entry
$ra = $refAlt;
$pk = $proteinKey;
$highestRecurrence = $siteVertexMap->{$mutationKey}->{$mk}->{$refAlt}->{$proteinKey};
}
elsif ( $highestRecurrence < $siteVertexMap->{$mutationKey}->{$mk}->{$refAlt}->{$proteinKey} ) { # this recurrence is larger than everything seen so far
$ra = $refAlt;
$pk = $proteinKey;
$highestRecurrence = $siteVertexMap->{$mutationKey}->{$mk}->{$refAlt}->{$proteinKey};
}
}
}
$totalRecurrence += $siteVertexMap->{$mutationKey}->{$mk}->{$refAlt}->{$lastPK}; # add the recurrence of one proteinKey per ref:alt
}
last;
}
return ( $ra , $pk );

# foreach my $refAlt ( keys %{$mutations->{$mutationKey}} ) {
# $ra = $refAlt;
# foreach my $proteinKey ( keys %{$mutations->{$mutationKey}->{$refAlt}} ) {
# $pk = $proteinKey;
# last;
# }
# last;
# }
return ( $ra , $pk, $highestRecurrence, $totalRecurrence );
}

sub checkPartners {
Expand Down Expand Up @@ -1921,6 +1977,11 @@ Usage: hotspot3d cluster [options]
--max-processes Set if using parallel type local (CAUTION: make sure you know your max CPU processes)
--gene-list-file Choose mutations from the genes given in this list
--structure-list-file Choose mutations from the structures given in this list
--use-JSON Use pre-encoded mutations and distance-matrix hashes in json format, default (no flag): do not use json
--mutations-hash-json-file JSON encoded mutations hash file produced by a previous cluster run
--distance-matrix-json-file JSON encoded distance-matrix hash file produced by a previous cluster run
--help this message
Expand Down
37 changes: 36 additions & 1 deletion lib/TGI/Mutpro/Main/ColorScore.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,49 @@
#!/usr/bin/env Rscript

# Authors : Amila Weerasinghe
# Generates a reachability plot colored by weight (*.Horiz.weights.pdf)

args = commandArgs(trailingOnly=TRUE)
d <- read.table(args[1], header=FALSE, sep = "\t") # data table with variant,RD,genomic_annotations,weight

segData = read.table(args[2], sep = "\t") # data table with start,stop,epsilon',clusterID

#################################################################
### process the segData so that cluster labels do not overlap ###
#################################################################

# make a column to show process status
segData$Processed = 0

# display singleton clusters at RD=0.1
segData[segData$V1==segData$V2,"V3"] = 0.1
segData[segData$V1==segData$V2,"Processed"] = 1

segData$textOffset = 0

# now start from the first un-processed row and see if there are nearby levels
unprocessed = segData[segData$Processed==0,]

while ( length(unprocessed$V1) != 0 ) {
# take the first un-processed one and the nearby stuff
tab = segData[segData$V3<unprocessed$V3[1]+0.5 & segData$V3>=unprocessed$V3[1] & segData$V2==unprocessed$V2[1],]
offset = 0.1
# go through each row in tab and add offset
for (i in c(1:length(tab$V1))) {
segData[segData$V3<unprocessed$V3[1]+0.5 & segData$V3>=unprocessed$V3[1] & segData$V2==unprocessed$V2[1],][i,"textOffset"] = offset
offset = offset + 0.1
}
segData[segData$V3<unprocessed$V3[1]+0.5 & segData$V3>=unprocessed$V3[1] & segData$V2==unprocessed$V2[1],]$Processed = 1
unprocessed = segData[segData$Processed==0,] # update the unprocessed table
}
#################################################################

y0<-segData[[3]]
x0<-segData[[1]]+1
y1<-segData[[3]]
x1<-segData[[2]]+1
Cluster<-segData[[5]]
labelOffset <- segData$textOffset

names(d)[1] <- "variant"
names(d)[2] <- "RD"
Expand Down Expand Up @@ -37,7 +72,7 @@ for (i in values) {
p <- p + geom_segment(x=x0[i], y=y0[i], xend=x1[i], yend=y1[i])
}

p <- p + annotate("text", x = x1+1, y = y0, label = Cluster, size = 2)
p <- p + annotate("text", x = x1+labelOffset, y = y0, label = Cluster, size = 2)

p <- p + ggtitle(paste("Reachability Plot with weights: Epsilon=",args[4],"MinPts=",args[5]))

Expand Down
Loading

0 comments on commit d23084d

Please sign in to comment.