#! /usr/bin/perl

use warnings;

while (defined($line = <>)) {
    if ($line =~ /^\#/) {
	# Skip comment
    } else {
	chop $line;
	@fields = split /\t/,$line;
	if ($fields[2] eq "SNV") {
	    $chr = $fields[0];
	    $chrpos = $fields[3];	# GVF file is 1-based

	    undef $rsid;
	    undef $alleleA;
	    undef $alleleB;

	    foreach $item (split ";",$fields[8]) {
		if ($item =~ /Dbxref=(\S+)/) {
		    $rsid = $1;
		    $rsid =~ s/.*://;
		} elsif ($item =~ /Reference_seq=(.)/) {
		    $alleleA = $1;
		} elsif ($item =~ /Variant_seq=(.)/) {
		    $alleleB = $1;
		}
	    }
	    if (($snp_strand = $fields[6]) eq "-") {
		$alleleA = $refcomp{$alleleA};
		$alleleB = $revcomp{$alleleB};
	    }
	    if ($alleleA le $alleleB) {
		$snp_type = $alleleA . $alleleB;
	    } else {
		$snp_type = $alleleB . $alleleA;
	    }

	    if (!defined($rsid)) {
		# Skip
	    } elsif ($snp_strand ne "+" && $snp_strand ne "=") {
		# Skip
	    } elsif (!defined($alleleA) || !defined($alleleB)) {
		# Skip
	    } elsif (!defined($acgt{$alleleA}) || !defined($acgt{$alleleB})) {
		print STDERR "$rsid has alleles $fields[9] with non-ACGT character\n";
	    } else {
		print ">$rsid $chr:$chrpos $snp_type $snp_strand\n";
	    }
	}
    }
}

exit;


BEGIN {
    %acgt = ("A" => 1, "C" => 1, "G" => 1, "T" => 1);

    %revcomp = ("A" => "T", "C" => "G", "G" => "C", "T" => "A");
}


