#! /usr/bin/perl -w

# Script for calculation of LIveBench contact A and B scores for two structures, model and target, assuming exact correspondence of residue numbering
# in input files.

if($#ARGV<5) { die "Usage: contact_ab.pl -t <target> -m <model> -o <output>\n(Model and target should have the same numbering of CA atoms.)\n"; }

my $LN2 = 0.693147180559945;

while (@ARGV and $ARGV[0] =~ /^-/) {
        $_ = shift;
        last if /^--$/;
        if (/^-m/) {$_ = shift; $model = $_;}
        if (/^-t/) {$_ = shift; $target = $_;}
        if (/^-o/) {$_ = shift; $outfile = $_;}
}

my @mpath = split /\//, $model;
my $mname = $mpath[$#mpath];
my @tpath = split /\//, $target;
my $tname = $tpath[$#tpath];


# read CA coordinates (x1,y1,z1){$i} for model
my %x1=();
my %y1=();
my %z1=();
open(MOD, $model) || die "Cannot open input pdb file with model $model : $!\n";
while(my $line=<MOD>) {
	chomp($line);
	if($line=~/^ATOM/){
		
	}else{
		next;
	}		
	my $type=substr($line,12,4);

	if($type eq " CA "){ # pdb line format, e.g: ATOM      2  CA  MET     1     489.230 945.691 891.975  1.00  5.00
		my $xpos=substr($line,30,8);
		if($xpos=~/(-?\d+\.?\d+)/){
				$xpos=$1;
		}	
		my $ypos=substr($line,38,8);
		if($ypos=~/(-?\d+\.?\d+)/){
				$ypos=$1;
		}	
		my $zpos=substr($line,46,8);
		if($zpos=~/(-?\d+\.?\d+)/){
				$zpos=$1;
		}	
		
		my $n=substr($line,22,5);
		if($n=~/(-?\w+)/){
			$n=$1;
		}	
		$x1{$n} = $xpos; 
		$y1{$n} = $ypos; 
		$z1{$n} = $zpos; 
	}			
}
close(MOD); 



# read CA coordinates (x2,y2,z2){$i} for target
my %x2=();
my %y2=();
my %z2=();
open(TARG, $target) || die "Cannot open input pdb file with target $target : $!\n";
while(my $line=<TARG>) {
	chomp($line);
	if($line=~/^ATOM/){
		
	}else{
		next;
	}	
	my $type=substr($line,12,4);
	if($type eq " CA "){ # pdb line format, e.g: ATOM      2  CA  MET     1     489.230 945.691 891.975  1.00  5.00
		my $xpos=substr($line,30,8);
		if($xpos=~/(-?\d+\.?\d+)/){
				$xpos=$1;
		}	
		my $ypos=substr($line,38,8);
		if($ypos=~/(-?\d+\.?\d+)/){
				$ypos=$1;
		}	
		my $zpos=substr($line,46,8);
		if($zpos=~/(-?\d+\.?\d+)/){
				$zpos=$1;
		}	
		my $n=substr($line,22,5);
		if($n=~/(-?\w+)/){
			$n=$1;
		}	
		$x2{$n} = $xpos; 
		$y2{$n} = $ypos; 
		$z2{$n} = $zpos; 

	}			
}
close(TARG); 


my $contacta = 0.0;
my $contactb = 0.0;
my $minsumb = 0.0;
my $avsumb = 0.0;
my $common_len = 0;
for $i(sort {$a<=>$b} keys %x1) { 
	next unless defined $x2{$i};
	$common_len++;

	my $minsuma = 0.0;
	my $avsuma = 0.0;
	for $j(sort {$a<=>$b} keys %x1) { # traverse model CA pairs

		next unless defined $x2{$j};

		if( abs($i-$j) >= 6) {
			my $dx1 = $x1{$i}-$x1{$j};	
			my $dy1 = $y1{$i}-$y1{$j};	
			my $dz1 = $z1{$i}-$z1{$j};
			my $d1 = sqrt($dx1*$dx1 + $dy1*$dy1 + $dz1*$dz1);
			my $ds1 = exp( -$LN2 *$d1*$d1 / (3*3) );	


			my $dx2 = $x2{$i}-$x2{$j};	
			my $dy2 = $y2{$i}-$y2{$j};	
			my $dz2 = $z2{$i}-$z2{$j};	
			my $d2 = sqrt($dx2*$dx2 + $dy2*$dy2 + $dz2*$dz2);	
			my $ds2 = exp( -$LN2 *$d2*$d2 / (3*3) );	

			$minsuma = $ds1 < $ds2 ? $minsuma + $ds1 : $minsuma + $ds2;
			$avsuma += 0.5 * ($ds1+$ds2);
		}

	}
	$contacta += $minsuma / $avsuma;
	$minsumb += $minsuma;
	$avsumb += $avsuma;
}
$contactb = ($minsumb / $avsumb) * $common_len;

open(FOUT, ">> $outfile") || die "Cannot open output $outfile :$!\n";
print FOUT "$tname\t$mname\t";
printf FOUT "%.3f\t%.3f\n", $contacta, $contactb;
close(FOUT);
