From 735c1f73ea8da26613f905b6138a14ed0e343e75 Mon Sep 17 00:00:00 2001 From: Cyriac Kandoth Date: Thu, 29 Sep 2016 12:52:22 -0400 Subject: [PATCH] Fix ref matching for long indels --- maf2vcf.pl | 4 +++- vcf2maf.pl | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/maf2vcf.pl b/maf2vcf.pl index d01896d..b754f90 100644 --- a/maf2vcf.pl +++ b/maf2vcf.pl @@ -101,7 +101,9 @@ push( @regions_split, [ splice( @regions, 0, 100000 ) ] ) while @regions; map{ my $loci = join( " ", @{$_} ); $lines .= `$samtools faidx $ref_fasta $loci` } @regions_split; foreach my $line ( grep( length, split( ">", $lines ))) { - my ( $locus, $bps ) = split( "\n", $line ); + # Carefully split this FASTA entry, properly chomping newlines for long indels + my ( $locus, $bps ) = split( "\n", $line, 2 ); + $bps =~ s/\r|\n//g; if( $bps ){ $bps = uc( $bps ); $flanking_bps{$locus} = $bps; diff --git a/vcf2maf.pl b/vcf2maf.pl index 768fd30..4756d35 100644 --- a/vcf2maf.pl +++ b/vcf2maf.pl @@ -248,7 +248,9 @@ sub GetBiotypePriority { push( @regions_split, [ splice( @regions, 0, 100000 ) ] ) while @regions; map{ my $loci = join( " ", @{$_} ); $lines .= `$samtools faidx $ref_fasta $loci` } @regions_split; foreach my $line ( grep( length, split( ">", $lines ))) { - my ( $locus, $bps ) = split( "\n", $line ); + # Carefully split this FASTA entry, properly chomping newlines for long indels + my ( $locus, $bps ) = split( "\n", $line, 2 ); + $bps =~ s/\r|\n//g; if( $bps ){ $bps = uc( $bps ); $flanking_bps{$locus} = $bps;