diff --git a/HotSpot3D-1.7.3.tar.gz b/HotSpot3D-1.7.3.tar.gz
index f343c59..e38f862 100644
Binary files a/HotSpot3D-1.7.3.tar.gz and b/HotSpot3D-1.7.3.tar.gz differ
diff --git a/lib/TGI/Mutpro/Main/Cluster.pm b/lib/TGI/Mutpro/Main/Cluster.pm
index 3c2def7..8b8c103 100644
--- a/lib/TGI/Mutpro/Main/Cluster.pm
+++ b/lib/TGI/Mutpro/Main/Cluster.pm
@@ -107,6 +107,7 @@ sub new {
 	$this->{'JSON_status'} = undef;
 	$this->{'mutations_json_hash'} = undef;
 	$this->{'distance_matrix_json_hash'} = undef;
+	$this->{'siteVertexMap_json_hash'} = undef;
 
     bless $this, $class;
     $this->process();
@@ -250,6 +251,7 @@ sub setOptions {
 		'use-JSON' => \$this->{'JSON_status'},
 		'mutations-hash-json-file=s' => \$this->{'mutations_json_hash'},
 		'distance-matrix-json-file=s' => \$this->{'distance_matrix_json_hash'},
+		'siteVertexMap-json-file=s' => \$this->{'siteVertexMap_json_hash'},
 
         'help' => \$help,
     );
@@ -349,10 +351,10 @@ sub setOptions {
 	if ( defined $this->{'JSON_status'} ) {
 		$this->{'JSON_status'} = 1;
 		warn "HotSpot3D::Cluster::setOptions warning: use-JSON flag used (will not look for pairwise data or maf file)\n";
-		if ( not defined $this->{'mutations_json_hash'} or not defined $this->{'distance_matrix_json_hash'}  ) {
+		if ( not defined $this->{'mutations_json_hash'} or not defined $this->{'distance_matrix_json_hash'} or not defined $this->{'siteVertexMap_json_hash'}  ) {
 			die "HotSpot3D::Cluster::setOptions Error: use-JSON flag is used, but json file locations are not provided!\n";
 		}
-		elsif ( not -e $this->{'mutations_json_hash'} or not -e $this->{'mutations_json_hash'} ) {	
+		elsif ( not -e $this->{'mutations_json_hash'} or not -e $this->{'mutations_json_hash'} or not -e $this->{'siteVertexMap_json_hash'} ) {	
 			die "HotSpot3D::Cluster::setOptions Error: use-JSON flag is used, but the provided JSON files do not exist!\n";
 		}
 	}
@@ -530,8 +532,8 @@ sub launchClustering {
 
 sub vertexFilter {
 #$this->vertexFilter( $temp_mutations , $temp_distance_matrix , $mutations , $distance_matrix );
-	my ( $this , $temp_mutations , $temp_distance_matrix , $mutations , $distance_matrix ) = @_;
-	if ( $this->{'vertex_type'} eq $SITE ) {
+	my ( $this , $temp_mutations , $temp_distance_matrix , $mutations , $distance_matrix, $siteVertexMap ) = @_;
+	if ( $this->{'vertex_type'} eq $SITE or $this->{'clustering'} eq $DENSITY ) {
 		print STDOUT "Filtering vertices\n";
 #TODO if using a different .maf from search step, then some mutations can be missed
 		my $vertexMap = {}; #a hash to map isSameProteinPosition vertices (and others to their selves)-- map=f()
@@ -542,13 +544,15 @@ sub vertexFilter {
 			next if not exists $temp_mutations->{$mutationKey1};
 			foreach my $mutationKey2 ( @mKeys ) {
 				next if not exists $temp_mutations->{$mutationKey2};
-				if ( $mutationKey1 eq $mutationKey2 ) {
+				if ( $mutationKey1 eq $mutationKey2 ) { # this if condition is important to capture mk2=mk1 cases to $siteVertexMap hash
 					$vertexMap->{$mutationKey2} = $mutationKey1;
+					$siteVertexMap->{$mutationKey1}->{$mutationKey2} = $temp_mutations->{$mutationKey2};
 					# print "ACSW::VertexFilter::Equal $mutationKey2 \=\=\> $mutationKey1\n";
 					next;
 				}
 				elsif ( $this->isSameProteinPosition( $temp_mutations , $mutationKey1 , $mutationKey2 ) ) { #if same site
 					$vertexMap->{$mutationKey2} = $mutationKey1;
+					$siteVertexMap->{$mutationKey1}->{$mutationKey2} = $temp_mutations->{$mutationKey2};
 					print "ACSW::VertexFilter::SameSite $mutationKey2 \=\=\> $mutationKey1\n";
 					delete $temp_mutations->{$mutationKey2};
 				}
@@ -558,8 +562,14 @@ sub vertexFilter {
 
 		#generate representative annotations
 		foreach my $mutationKey ( keys %{$temp_mutations} ) {
-			my ( $ra , $pk ) = $this->getARepresentativeAnnotation( $temp_mutations , $mutationKey );
-			$mutations->{$mutationKey}->{$ra}->{$pk} = 1;
+			my ( $ra , $pk, $highestRecurrence, $totalRecurrence ) = $this->getARepresentativeAnnotation( $temp_mutations , $mutationKey, $siteVertexMap );
+			if ( $this->{'vertex_type'} eq $SITE ) {
+				$mutations->{$mutationKey}->{$ra}->{$pk} = 1; # weight = 1 for SITE
+			}
+			else {
+				$mutations->{$mutationKey}->{$ra}->{$pk} = $totalRecurrence; # weight = total recurrence/weight for RECURRENCE/WEIGHT
+			}
+			
 		}
 		print "ACSW::VertexFilter::mutations representative annotation done\n";
 
@@ -583,30 +593,56 @@ sub vertexFilter {
 				}
 			}
 		}
+		# print "vertex_map\n";
+		# print Dumper $vertexMap;
 	} else {
 		%{$mutations} = %{$temp_mutations};
 		%{$distance_matrix} = %{$temp_distance_matrix};
 	}
 	$temp_mutations = undef;
 	$temp_distance_matrix = undef;
-	# print "distance_matrix\n";
-	# print Dumper $distance_matrix;
+	
 	return;
 }
 
-sub getARepresentativeAnnotation {
-	my ( $this , $mutations , $mutationKey ) = @_;
-	my $ra = ".:.";
-	my $pk = ".:p.";
-	foreach my $refAlt ( keys %{$mutations->{$mutationKey}} ) {
-		$ra = $refAlt;
-		foreach my $proteinKey ( keys %{$mutations->{$mutationKey}->{$refAlt}} ) {
-			$pk = $proteinKey;
-			last;
+sub getARepresentativeAnnotation { # choose a representative out of all the mutations detected as same protein position
+	my ( $this , $mutations , $mutationKey, $siteVertexMap ) = @_;
+	# my $ra = ".:.";
+	# my $pk = ".:p.";
+	my ($ra, $pk, $highestRecurrence ) = undef;
+	my $totalRecurrence = 0;
+
+	foreach my $mk ( keys %{$siteVertexMap->{$mutationKey}} ) {
+		foreach my $refAlt ( keys %{$siteVertexMap->{$mutationKey}->{$mk}} ) {
+			my $lastPK; # to store one proteinKey from one ref:alt
+			foreach my $proteinKey ( keys %{$siteVertexMap->{$mutationKey}->{$mk}->{$refAlt}} ) {
+				$lastPK = $proteinKey;
+				if ( $mk eq $mutationKey ) { # to make sure our selected refAlt and pKey are members of the mutationKey used in downstream
+					if ( not defined $highestRecurrence ) { # assigning to the first entry
+						$ra = $refAlt;
+						$pk = $proteinKey;
+						$highestRecurrence = $siteVertexMap->{$mutationKey}->{$mk}->{$refAlt}->{$proteinKey};
+					}
+					elsif ( $highestRecurrence < $siteVertexMap->{$mutationKey}->{$mk}->{$refAlt}->{$proteinKey} ) { # this recurrence is larger than everything seen so far
+						$ra = $refAlt;
+						$pk = $proteinKey;
+						$highestRecurrence = $siteVertexMap->{$mutationKey}->{$mk}->{$refAlt}->{$proteinKey};
+					}
+				}
+			}
+			$totalRecurrence += $siteVertexMap->{$mutationKey}->{$mk}->{$refAlt}->{$lastPK}; # add the recurrence of one proteinKey per ref:alt
 		}
-		last;
 	}
-	return ( $ra , $pk );
+		
+	# foreach my $refAlt ( keys %{$mutations->{$mutationKey}} ) {
+	# 	$ra = $refAlt;
+	# 	foreach my $proteinKey ( keys %{$mutations->{$mutationKey}->{$refAlt}} ) {
+	# 		$pk = $proteinKey;
+	# 		last;
+	# 	}
+	# 	last;
+	# }
+	return ( $ra , $pk, $highestRecurrence, $totalRecurrence );
 }
 
 sub checkPartners {
diff --git a/lib/TGI/Mutpro/Main/ColorScore.R b/lib/TGI/Mutpro/Main/ColorScore.R
index b5dacea..fc02f61 100644
--- a/lib/TGI/Mutpro/Main/ColorScore.R
+++ b/lib/TGI/Mutpro/Main/ColorScore.R
@@ -1,14 +1,49 @@
 #!/usr/bin/env Rscript
+
+# Authors : Amila Weerasinghe
+# Generates a reachability plot colored by weight (*.Horiz.weights.pdf)
+
 args = commandArgs(trailingOnly=TRUE)
 d <- read.table(args[1], header=FALSE, sep = "\t") # data table with variant,RD,genomic_annotations,weight
 
 segData = read.table(args[2], sep = "\t") # data table with start,stop,epsilon',clusterID
 
+#################################################################
+### process the segData so that cluster labels do not overlap ###
+#################################################################
+
+# make a column to show process status
+segData$Processed = 0
+
+# display singleton clusters at RD=0.1
+segData[segData$V1==segData$V2,"V3"] = 0.1
+segData[segData$V1==segData$V2,"Processed"] = 1
+
+segData$textOffset = 0
+
+# now start from the first un-processed row and see if there are nearby levels
+unprocessed = segData[segData$Processed==0,]
+
+while ( length(unprocessed$V1) != 0 ) {
+  # take the first un-processed one and the nearby stuff
+  tab = segData[segData$V3<unprocessed$V3[1]+0.5 & segData$V3>=unprocessed$V3[1] & segData$V2==unprocessed$V2[1],]
+  offset = 0.1
+  # go through each row in tab and add offset
+  for (i in c(1:length(tab$V1))) {
+    segData[segData$V3<unprocessed$V3[1]+0.5 & segData$V3>=unprocessed$V3[1] & segData$V2==unprocessed$V2[1],][i,"textOffset"] = offset
+    offset = offset + 0.1
+  }
+  segData[segData$V3<unprocessed$V3[1]+0.5 & segData$V3>=unprocessed$V3[1] & segData$V2==unprocessed$V2[1],]$Processed = 1
+  unprocessed = segData[segData$Processed==0,] # update the unprocessed table
+}
+#################################################################
+
 y0<-segData[[3]]
 x0<-segData[[1]]+1
 y1<-segData[[3]]
 x1<-segData[[2]]+1
 Cluster<-segData[[5]]
+labelOffset <- segData$textOffset
 
 names(d)[1] <- "variant"
 names(d)[2] <- "RD"
@@ -37,7 +72,7 @@ for (i in values) {
   p <- p + geom_segment(x=x0[i], y=y0[i], xend=x1[i], yend=y1[i])
 }
 
-p <- p + annotate("text", x = x1+1, y = y0, label = Cluster, size = 2)
+p <- p + annotate("text", x = x1+labelOffset, y = y0, label = Cluster, size = 2)
 
 p <- p + ggtitle(paste("Reachability Plot with weights: Epsilon=",args[4],"MinPts=",args[5]))
 
diff --git a/lib/TGI/Mutpro/Main/Density.pm b/lib/TGI/Mutpro/Main/Density.pm
index b2124e5..00d4694 100644
--- a/lib/TGI/Mutpro/Main/Density.pm
+++ b/lib/TGI/Mutpro/Main/Density.pm
@@ -3,7 +3,7 @@ package TGI::Mutpro::Main::Density;
 #----------------------------------
 # $Authors: Amila Weerasinghe
 # $Date: 2016-11-28 14:34:50 -0500 (Mon Nov 28 14:34:50 CST 2016) $
-# $Revision:  
+# $Revision: 2017-09-28 by A.W. (adding the recurrence and weight based clustering) 
 # $URL: $
 # $Doc: $ Determine density-based clusters from Hotspot3D pairwise data.
 # 
@@ -33,6 +33,11 @@ my $DEPENDENT = "dependent";
 my $LOCAL = "local";
 my $NONE = "none";
 
+my $WEIGHT = "weight";
+my $RECURRENCE = "recurrence";
+my $UNIQUE = "unique";
+my $SITE = "site";
+
 sub new {
     my $class = shift;
     my $this = shift;
@@ -50,6 +55,7 @@ sub process {
     my $temp_mutations = {};
     my $distance_matrix = {};
     my $mutations = {};
+    my $siteVertexMap = {}; # a hash to store refAlt and pKey info when "SITE" specific clustering is done.
     my $WEIGHT = "weight";
 
     if ( not defined $this->{'Epsilon'} ) {
@@ -73,27 +79,42 @@ sub process {
     #####################################################
     
     $this->GetFileName(); # just to get only the name from the pairwise file destination
-    $this->setRWD(); # to retieve the path to TGI::Mutpro::Main. Used to call RScripts using perl
+    $this->setRWD(); # to retrieve the path to TGI::Mutpro::Main. Used to call RScripts using perl
 
     #####################################################
 
 	if ( $this->{'JSON_status'} ) {
 		$mutations = readHash( $this->{'mutations_json_hash'} );
-		$distance_matrix = readHash( $this->{'distance_matrix_json_hash'} );	
+		$distance_matrix = readHash( $this->{'distance_matrix_json_hash'} );
+        $siteVertexMap = readHash( $this->{'siteVertexMap_json_hash'} );	
 	}
 	else {
 		unless( $this->{'pairwise_file'} ) { warn 'You must provide a pairwise file! ', "\n"; die $this->help_text(); }
     	unless( -e $this->{'pairwise_file'} ) { warn "The input pairwise file (".$this->{'pairwise_file'}.") does not exist! ", "\n"; die $this->help_text(); }
 
 		$this->readMAF( $temp_mutations );
+        # print "temp_mutations\n";
+        # print Dumper $temp_mutations;
 		$this->getDrugMutationPairs( $temp_distance_matrix );
 		$this->getMutationMutationPairs( $temp_distance_matrix );
-		$this->vertexFilter( $temp_mutations , $temp_distance_matrix , $mutations , $distance_matrix );
+		$this->vertexFilter( $temp_mutations , $temp_distance_matrix , $mutations , $distance_matrix, $siteVertexMap );
+        # $this->setSameSiteDistancesToZero( $distance_matrix, $mutations );
   
 		printHash( $distance_matrix, "distance_matrix_hash" );
 		printHash( $mutations, "mutations_hash" );
+        printHash( $siteVertexMap, "siteVertexMap_hash" );
+
+        # print "mutations\n";
+        # print Dumper $mutations;
+        # print "distance_matrix\n";
+        # print Dumper $distance_matrix;
+        # print "siteVertexMap\n";
+        # print Dumper $siteVertexMap;
 	}
 
+    $this->{"siteVertexMap"} = $siteVertexMap; # store the reference to siteVertexMap
+    $this->{"mutationsHashRef"} = $mutations; # store the reference to mutations 
+
     #print "distance matrix\n";
     #print Dumper $distance_matrix;
     #print "mutations\n";
@@ -352,7 +373,7 @@ sub GetNeighbors { # retrieves the epsilon-neighborhood
 sub GetCoreDistance {
     my ( $Obj, $neighbors, $MinPts, $mutations ) = @_;
 
-    $neighbors->{$Obj} = 0; # adding the object so that it's counted towards the MinPts
+    $neighbors->{$Obj} = 0; # adding the object so that it is counted towards the MinPts
     my $CoreDist = undef;
     my $neighbors_count = 0;
     my @keys = sort { $neighbors->{$a} <=> $neighbors->{$b} } keys %{$neighbors}; # sort keys according to distances
@@ -360,9 +381,14 @@ sub GetCoreDistance {
     # print Dumper \@keys;
 
     for (my $i = 0; $i < scalar @keys; $i++) {
-        # print "$keys[$i]\n";
-        # print "\tneighbors_count prev = $neighbors_count\n";
-        $neighbors_count = $neighbors_count + scalar keys %{$mutations->{$keys[$i]}}; # add the number of Ref:Alt for the key
+        my $mk = $keys[$i]; # neighboring mutation key
+        foreach my $refAlt ( keys %{$mutations->{$mk}} ) {
+            # print "$keys[$i]\n";
+            # print "\tneighbors_count prev = $neighbors_count\n";
+            my $pk = ( keys %{$mutations->{$mk}->{$refAlt}} )[0]; # get the first protein key corresponding to the ref:alt (there will be just one)
+            my $weight = $mutations->{$mk}->{$refAlt}->{$pk}; # get the weight (for SITE=1, RECURRENCE=total frequency, WEIGHT=total weight)
+            $neighbors_count += $weight; # add the number of "mutations" to the count (the number depends on what you're clustering on; as explained above)
+        }
         #print "\tneighbors_count after = $neighbors_count\n";
         if ( $neighbors_count >= $MinPts ) {
             $CoreDist = $neighbors->{$keys[$i]};
@@ -471,11 +497,10 @@ sub GetFileName {
 
     my @tempArray = split( "/",$this->{'pairwise_file'}) ;
 
-    if ( not defined @tempArray ) {
-    	$this->{'pairwise_file_name_only'} = 'HS3D.DensityClustering';
-    }
-    else {
-    	$this->{'pairwise_file_name_only'} = pop @tempArray;
+    $this->{'pairwise_file_name_only'} = pop @tempArray;
+
+    if ( not defined $this->{'pairwise_file_name_only'} or $this->{'pairwise_file_name_only'} eq '' ) {
+        $this->{'pairwise_file_name_only'} = 'HS3D.DensityClustering';
     }
 
     return $this;
@@ -502,18 +527,18 @@ sub RunSuperClustersID {
         if ( $InitialSet[$i][1] == 10 ) {
             #print "RD($i)=10\n";
 
-            if ( $InitialSet[$i-1][1] == 10 ) {
-                $scs = $i;
-            }
-            else {
+            # if ( $InitialSet[$i-1][1] == 10 ) {
+            #     $scs = $i;
+            # }
+            # else {
                 $Clusters{SuperClusters}{$scs} = $i;
                 $scs = $i;
-            }       
+            # }       
         }
     }
     #print Dumper \%Clusters;
 
-    # Go inside each super cluster and start scanning from smallest r(x):
+    # Go inside each super cluster and start scanning from the smallest r(x):
     my $idSuperCluster = -1;
 
     foreach my $superC ( sort { $Clusters{SuperClusters}{$a} <=> $Clusters{SuperClusters}{$b} } keys %{$Clusters{SuperClusters}} ) {
@@ -533,15 +558,22 @@ sub RunSuperClustersID {
         }
 
         # Recording the Super Cluster
-        if ($Clusters{SuperClusters}{$superC} - $superC >= $MinPts) {
+        my $superClusterCount = 0; # a counter to record the number of weights (recurrence/pathogenicity) inside a super cluster
+        for (my $i = $superC; $i <= $Clusters{SuperClusters}{$superC}-1 ; $i++) {
+            $superClusterCount += $InitialSet[$i][4]; # add the weight (for SITE=1, RECURRENCE=frequency, WEIGHT=weight)
+        }
+        # check if the current super cluster satisfies the MinPts condition
+        if ( $superClusterCount >= $MinPts ) {
             $idSuperCluster++;
             my $ClusTot = 0;
             for (my $t = $superC+1; $t <= $Clusters{SuperClusters}{$superC}-1; $t++) { # start+1 b/c I want to get rid of the first tall peak, (-1 b/c end has included the start of the next)
-                $ClusTot = $ClusTot + $InitialSet[$t][1];
+                $ClusTot += $InitialSet[$t][1];
             }
-            my $ClusAvg = $ClusTot/($Clusters{SuperClusters}{$superC}-1 - $superC);
+            if ( $ClusTot == 0 ) { # according to the above calculation singletons will have a defined $ClusTot value equals to zero
+                $ClusTot = 0.1 * $superClusterCount; # this way, singleton super clusters will have avg cluster density = 0.1
+            }
+            my $ClusAvg = $ClusTot/$superClusterCount;
             push @ClusterArray, [$superC,$Clusters{SuperClusters}{$superC}-1,9.9,$ClusAvg,$idSuperCluster."."."0"."."."0"]; # Artificially recording the super cluster. Randomly picked 9.9. (-1 b/c end has included the start of the next)
-            
         }
 
         # Start scanning from epsilon prime= minimum r(x) to maximum r(x):
@@ -551,22 +583,27 @@ sub RunSuperClustersID {
         my $TempSubClusterRef;
         my $idLevel = 1;
 
-        for (my $Cutoff = $InitialSet[$MinRDat][1] + 0.1; $Cutoff <=  $MaxRD + 0.5 ; $Cutoff += 0.1) {
+        for (my $Cutoff = $InitialSet[$MinRDat][1] ; $Cutoff <=  $MaxRD + 0.5 ; $Cutoff += 0.1) {
             my $idSubCluster = 1;
             my $n = $superC;
             my $counter = 0;
             my $s = $superC;
             while ($n < $Clusters{SuperClusters}{$superC}) { # n < (end of the super cluster) bc we don't want the last bar in the super cluster in a sub cluster
                 if ($InitialSet[$n][1] < $Cutoff) {
-                    $counter++;
+                    $counter += $InitialSet[$n][4]; # add the weight (for SITE=1, RECURRENCE=frequency, WEIGHT=weight)
+                    # $counter++;
                     if ($counter >= $MinPts) {
                         $Clusters{SubClusters}{$superC}{$s} = $n;
                     }
-                $n++;
+                    $n++;
                 }
                 else {
+                    if ( $Cutoff == $InitialSet[$MinRDat][1] and $InitialSet[$n][4] >= $MinPts ) { # allowing singleton sub clusters to form at the lowest level
+                        $Clusters{SubClusters}{$superC}{$n} = $n; 
+                    }
+                    # regular sub cluster stuff
                     $s = $n;
-                    $counter = 0;
+                    $counter = $InitialSet[$n][4]; # reset $counter with the next bar having RD>Cutoff
                     $n++;
                 }
             }
@@ -583,10 +620,18 @@ sub RunSuperClustersID {
                     my @OrderedE = @{$Clusters{SubClusters}{$superC}}{@OrderedS};
                     foreach my $start (@OrderedS) {
                         my $ClusTot = 0;
-                        for (my $t = $start+1; $t <= $Clusters{SubClusters}{$superC}{$start}; $t++) { # start+1 b/c I want to get rid of the first tall peak
-                            $ClusTot = $ClusTot + $InitialSet[$t][1];
+                        my $tempCounter = 0;
+                        if ( $start == $Clusters{SubClusters}{$superC}{$start} ) { # this sub cluster is a singleton
+                            $ClusTot = 0.1; # singletons are given cluster avg = 0.1 #$InitialSet[$start][1];
+                            $tempCounter = 1;
                         }
-                        my $ClusAvg = $ClusTot/($Clusters{SubClusters}{$superC}{$start}-$start);
+                        else { # this sub cluster is NOT a singleton
+                            for (my $t = $start+1; $t <= $Clusters{SubClusters}{$superC}{$start}; $t++) { # start+1 b/c I want to get rid of the first tall peak
+                                $ClusTot = $ClusTot + ($InitialSet[$t][1] * $InitialSet[$t][4]);
+                                $tempCounter += $InitialSet[$t][4];
+                            }
+                        }
+                        my $ClusAvg = $ClusTot/$tempCounter;
                         push @ClusterArray, [$start,$Clusters{SubClusters}{$superC}{$start},$Cutoff,$ClusAvg,$idSuperCluster.".".$idLevel.".".$idSubCluster];
                         $idSubCluster++;
                     }
@@ -607,19 +652,21 @@ sub RunSuperClustersID {
                             $es = $Clusters{SubClusters}{$superC}{$subC} - $subCpre;
                             $ee = $Clusters{SubClusters}{$superC}{$subC} - $TempSubClusterRef->{$subCpre};
                             #print "\t\tFor subC=$subC, subCpre=$subCpre: ($ss, $es, $se, $ee)";
-                            if (($ss*$es)> 0 && ($se*$ee) > 0) {
-                                $OverlapCheck = 0; # one of new sub clusters does not overlap with the previous sub cluster in consideration, check the next previous sub cluster
-                                #print "\tOverlapCheck = $OverlapCheck  continue checking\n";
-                            }
-                            else { # one of new sub clusters overlaps with one of the previous sub clusters
-                                if ( (($Clusters{SubClusters}{$superC}{$subC} - $subC) / ($TempSubClusterRef->{$subCpre} - $subCpre)) >= 2 ) { # the expansion of the cluster is more than 100% --> record
-                                    $OverlapCheck = 0;
+                            if ( $Clusters{SubClusters}{$superC}{$subC} - $subC != 0 and $TempSubClusterRef->{$subCpre} - $subCpre != 0 ) { # singletons are not considered for the overlap check
+                                if (($ss*$es)> 0 && ($se*$ee) > 0) {
+                                    $OverlapCheck = 0; # one of new sub clusters does not overlap with the previous sub cluster in consideration, check the next previous sub cluster
+                                    #print "\tOverlapCheck = $OverlapCheck  continue checking\n";
                                 }
-                                else {
-                                    $OverlapCheck = 1;
-                                    #print "\tOverlapCheck = $OverlapCheck  stop checking, found one overlap\n";
+                                else { # one of new sub clusters overlaps with one of the previous sub clusters (if check is necessary to avoid dividing by zeros)
+                                    if ( (($Clusters{SubClusters}{$superC}{$subC} - $subC) / ($TempSubClusterRef->{$subCpre} - $subCpre)) >= 2 ) { # the expansion of the cluster is more than 100% --> record
+                                        $OverlapCheck = 0;
+                                    }
+                                    else {
+                                        $OverlapCheck = 1;
+                                        #print "\tOverlapCheck = $OverlapCheck  stop checking, found one overlap\n";
+                                    }
+                                    last; # one of new sub clusters overlaps with one of the previous sub clusters (or its size increased by more than 100%), stop checking
                                 }
-                                last; # one of new sub clusters overlaps with one of the previous sub clusters (or its size increased by more than 100%), stop checking
                             }
                         }
                         if ($OverlapCheck == 0) {
@@ -632,10 +679,18 @@ sub RunSuperClustersID {
                         my @OrderedE = @{$Clusters{SubClusters}{$superC}}{@OrderedS};
                         foreach my $start (@OrderedS) {
                             my $ClusTot = 0;
-                            for (my $t = $start+1; $t <= $Clusters{SubClusters}{$superC}{$start}; $t++) { # start+1 b/c I want to get rid of the first tall peak
-                                $ClusTot = $ClusTot + $InitialSet[$t][1];
+                            my $tempCounter = 0;
+                            if ( $start == $Clusters{SubClusters}{$superC}{$start} ) { # this sub cluster is a singleton
+                                $ClusTot = 0.1; # singletons are given cluster avg = 0.1 #$InitialSet[$start][1];
+                                $tempCounter = 1;
                             }
-                            my $ClusAvg = $ClusTot/($Clusters{SubClusters}{$superC}{$start} - $start);
+                            else { # this sub cluster is NOT a singleton
+                                for (my $t = $start+1; $t <= $Clusters{SubClusters}{$superC}{$start}; $t++) { # start+1 b/c I want to get rid of the first tall peak
+                                    $ClusTot = $ClusTot + ($InitialSet[$t][1] * $InitialSet[$t][4]);
+                                    $tempCounter += $InitialSet[$t][4];
+                                }
+                            }
+                            my $ClusAvg = $ClusTot/$tempCounter;
                             push @ClusterArray, [$start,$Clusters{SubClusters}{$superC}{$start},$Cutoff,$ClusAvg,$idSuperCluster.".".$idLevel.".".$idSubCluster];
                             $idSubCluster++;
                         }
@@ -847,10 +902,10 @@ sub getClusterProbabilities{
 
     for (my $i = 0; $i < scalar @InitialCuts; $i++) {
         $InitialCuts[$i][0] =~ /(\d+)\.(\d+)\.(\d+)/g;
-        if ($2 != 0) {
+        if ($2 != 0) { # sub clusters
             $this->{"InitialCuts"}->{$1}->{$2} = $InitialCuts[$i][3];
         }
-        else {
+        else { # super clusters, put epsilon'=10
             $this->{"InitialCuts"}->{$1}->{$2} = 10;
         }
         $this->{"Variants"}->{$InitialCuts[$i][1].":".$InitialCuts[$i][2]}->{"run0"}->{$1}->{$2}->{$3}->{"AverageDensity"} = $InitialCuts[$i][4];
@@ -871,7 +926,7 @@ sub getClusterProbabilities{
 
         $this->MainOPTICS( $structure , $distance_matrix, $mutations ); # Generate a random ordred RD set (random OPTICS)
 
-        $this->GetSuperClusters($this->{CurrentRDarray}); # Identify super clusters
+        $this->GetSuperClusters($this->{CurrentRDarray}, $MinPts); # Identify super clusters
         # print "CurrentRDarray\n";
         # print Dumper $this->{CurrentRDarray};
         # print "CurrentSuperClusters\n";
@@ -915,6 +970,9 @@ sub getClusterProbabilities{
 
     } # end of run
 
+    # print "memberships\n";
+    # print Dumper $this->{"Memberships"};
+
     # Data to generate the Cluster Membership Probability Plot
     my $FinalDataFile1 = "./ProbabilityData.$this->{'pairwise_file_name_only'}";
     open (OUT, ">$FinalDataFile1");
@@ -940,8 +998,10 @@ sub getClusterProbabilities{
     # Clusters output file (will be used in the visual)
     my $FinalDataFile2 = "./$Epsilon.$MinPts.$this->{'pairwise_file_name_only'}.Prob.$this->{'probability_cut_off'}.clusters";
     open (OUT, ">$FinalDataFile2");
-        print OUT "Cluster\tGene/Drug\tMutation/Gene\tDegree_Connectivity\tCloseness_Centrality\tGeodesic_From_Centroid\tRecurrence\tEpsilon_prime\tAvg_density\tCovering_clusters\tChromosome\tStart\tStop\tReference\tAlternate\tTranscript\tAlternative_Transcripts\n";
+        print OUT "Cluster\tGene/Drug\tMutation/Gene\tDegree_Connectivity\tCloseness_Centrality\tGeodesic_From_Centroid\tRecurrence/Weight\tEpsilon_prime\tAvg_density\tCovering_clusters\tChromosome\tStart\tStop\tReference\tAlternate\tTranscript\tAlternative_Transcripts\n";
 
+        # print "this->variants\n";
+        # print Dumper $this->{Variants};
         for (my $SCID = 0; $SCID < scalar keys %{$this->{Memberships}}; $SCID++) {
             for (my $levelID = 0; $levelID < scalar keys %{$this->{Memberships}->{$SCID}}; $levelID++) {
                 for (my $SubID = 0; $SubID < scalar keys %{$this->{Memberships}->{$SCID}->{$levelID}}; $SubID++) {
@@ -951,8 +1011,43 @@ sub getClusterProbabilities{
                             my $CurrentAvgDensity = $this->{Variants}->{$variant}->{run0}->{$SCID}->{$levelID}->{$SubID}->{AverageDensity};
                             my $CoveringClusters =  $this->{Variants}->{$variant}->{run0}->{$SCID}->{$levelID}->{$SubID}->{CoveringClusters};
                             my $genomicAnnotation = $this->{Variants}->{$variant}->{run0}->{$SCID}->{$levelID}->{$SubID}->{Annotations};
-                            $variant =~ /(\w+)\:(\D\.\D+\d+\D)/g;
-                            print OUT "$SCID.$levelID.$SubID\t$1\t$2\t0\t0\t0\t0\t$CurrentEpsilon\t$CurrentAvgDensity\t$CoveringClusters\t$genomicAnnotation\n";
+                            $variant =~ /(\w+)\:(\D\.\D+\d+\D+)/g;
+                            # print OUT "$SCID.$levelID.$SubID\t$1\t$2\t0\t0\t0\t0\t$CurrentEpsilon\t$CurrentAvgDensity\t$CoveringClusters\t$genomicAnnotation\n";
+
+                            # print clusters with other mutations (not only representative vertices, but all)
+                            
+                            my $gene = $1;
+                            my ( $chromosome, $start, $stop, $ref, $alt, $transcript ) = ( split /\t/, $genomicAnnotation )[0,1,2,3,4,5];
+                            my $mKey1 = join( ":", $gene, $chromosome, $start, $stop );
+                            my $refAlt1 = join( ":", $ref, $alt );
+                            my $pKey1 = join( ":", $transcript, $2 );
+                            my $recurrence1 = $this->{"siteVertexMap"}->{$mKey1}->{$mKey1}->{$refAlt1}->{$pKey1};
+                            print OUT "$SCID.$levelID.$SubID\t$1\t$2\t0\t0\t0\t$recurrence1\t$CurrentEpsilon\t$CurrentAvgDensity\t$CoveringClusters\t$genomicAnnotation\n"; # representative vertex
+
+                            foreach my $mKey2 ( keys %{$this->{"siteVertexMap"}->{$mKey1}} ) {
+                                my ( $chromosome2, $start2, $stop2 ) = (split /:/, $mKey2)[1,2,3];
+                                
+                                foreach my $refAlt2 ( keys %{$this->{"siteVertexMap"}->{$mKey1}->{$mKey2}} ) {
+                                    my ( $ref2, $alt2 ) = split /:/, $refAlt2;
+                                    
+                                    foreach my $pKey2 ( keys %{$this->{"siteVertexMap"}->{$mKey1}->{$mKey2}->{$refAlt2}} ) {
+                                        
+                                        my ( $transcript2, $aa2 ) =  split /:/, $pKey2;
+                                        my $recurrence2 = $this->{"siteVertexMap"}->{$mKey1}->{$mKey2}->{$refAlt2}->{$pKey2};
+                                        my $genomicAnnotation2 = join( "\t", $chromosome2, $start2, $stop2, $ref2, $alt2, $transcript2, "1" ); # included "1" in the alternateTrans field
+                                        
+                                        if ( $mKey2 eq $mKey1 and $refAlt1 eq $refAlt2 ) { # if two mutation keys and ref:alt's are equal, output all the other pKeys
+                                            if ( $pKey2 ne $pKey1 ) {
+                                                $recurrence1 = $this->{"siteVertexMap"}->{$mKey1}->{$mKey2}->{$refAlt2}->{$pKey2};
+                                                print OUT "$SCID.$levelID.$SubID\t$1\t$2\t0\t0\t0\t$recurrence1\t$CurrentEpsilon\t$CurrentAvgDensity\t$CoveringClusters\t$genomicAnnotation2\n";
+                                            }
+                                        }
+                                        else { # other mutations which are at the same site (alternate ones)
+                                            print OUT "$SCID.$levelID.$SubID\t$gene\t$aa2\t0\t0\t0\t$recurrence2\t$CurrentEpsilon\t$CurrentAvgDensity\t$CoveringClusters\t$genomicAnnotation2\n";
+                                        }
+                                    }
+                                }
+                            }
                         }
                     }
                 }
@@ -990,18 +1085,23 @@ sub getClusterProbabilities{
 ##   Probability sub-methods
 #####
 
-sub GetSuperClusters { # Identify super clusters:
-    my ($this, $arrayRef) = @_;
+sub GetSuperClusters { # Identify super clusters: (singleton super clusters are possible to form)
+    my ($this, $arrayRef, $MinPts) = @_;
 
     my $scs = 0; # super cluster start
+    my $superClusterCount = ${$arrayRef}[$scs][4]; # count the weight (for SITE=1, RECURRENCE=frequency, WEIGHT=weight)
     for (my $i = 1; $i < scalar @{$arrayRef}; $i++) {
         #print "i=$i\n";
         if ( ${$arrayRef}[$i][1] == 10 ) {
             #print "RD($i)=10\n";
-            $scs = $i;  
+            if ( $superClusterCount >= $MinPts ) {
+                $this->{"CurrentSuperClusters"}->{$scs} = $i-1;
+            }
+            $scs = $i; # reset the super cluster starting point
+            $superClusterCount = ${$arrayRef}[$scs][4]; # reset the count  
         }
         else {
-            $this->{"CurrentSuperClusters"}->{$scs} = $i;
+            $superClusterCount += ${$arrayRef}[$i][4]; # add the weight (for SITE=1, RECURRENCE=frequency, WEIGHT=weight)
         }   
     }
     return 1;
@@ -1055,15 +1155,20 @@ sub GetSubClusters {
             my $s = $nStart;
             while ($n <= $this->{SuperClusterMap}->{$SCID}->{$nStart}) {
                 if (${$this->{CurrentRDarray}}[$n][1] < $EpsilonCut) {
-                    $counter++;
+                    $counter += ${$this->{CurrentRDarray}}[$n][4]; # add the weight (for SITE=1, RECURRENCE=frequency, WEIGHT=weight)
+                    # $counter++;
                     if ($counter >= $MinPts) {
                         $this->{SubClusters}->{$SCID}->{$levelID}->{$s} = $n;
                     }
                     $n++;
                 }
                 else {
+                    if ( $levelID == 1 and ${$this->{CurrentRDarray}}[$n][4] >= $MinPts ) { # allowing singleton sub clusters to form at the lowest level
+                        $this->{SubClusters}->{$SCID}->{$levelID}->{$n} = $n; 
+                    }
+                    # regular sub cluster stuff
                     $s = $n;
-                    $counter = 0;
+                    $counter = ${$this->{CurrentRDarray}}[$s][4]; # reset the count
                     $n++;
                 }
             }
diff --git a/lib/TGI/Mutpro/Main/HorizClustersLines.R b/lib/TGI/Mutpro/Main/HorizClustersLines.R
index d7921f8..f97ba6d 100644
--- a/lib/TGI/Mutpro/Main/HorizClustersLines.R
+++ b/lib/TGI/Mutpro/Main/HorizClustersLines.R
@@ -6,6 +6,8 @@ z = read.table(args[2], sep = "\t")
 RD<-y[[2]]
 ID<-y[[1]]
 
+z[z$V1==z$V2,"V3"] = 0.1 # show singletons at RD=0.1
+
 y0<-z[[1]] 
 x0<-z[[3]] 
 y1<-z[[2]]+1