#!/usr/bin/perl

# use to make $convert_pairs file

$scoptab = @ARGV[0];
$liblist = @ARGV[1];
$liblist_tab = @ARGV[2];
$lib_scop_dircla_ss = @ARGV[3];
$convert_pairs = @ARGV[4];

open(LIST, "$liblist");
@all_list = <LIST>;

#make sequence set for blastclust
open(S1, "$lib_scop_dircla_ss");
@all_s1 = <S1>;
open(S2, ">seqs_for_blastclust");
foreach $line (@all_s1) {
  if ($line =~ /^\>/) { 
    $str = substr($line, 0, 8);
    print S2 "$str\n"; 
  }
  else { print S2 "$line"; }
}
close(S1);
close(S2);

# easy assignments: cluster to head with blastclust at 60% seq id and 60% length converage
`blastclust -i seqs_for_blastclust -S 60 -l 0.60 -o blstclst.60`;

open(BC, "blstclst.60");
@all_bc = <BC>;
open(BCN, ">nonbc.60");
open(OUT, ">$convert_pairs");
LP1: foreach $bcline (@all_bc) {
  chomp $bcline;
  $rep = "X";
  LP2: foreach $rline (@all_list) {
    $str = substr($rline, 0, 7);
    if ($bcline =~ /$str/) { $rep = $str; last LP2; }
  }
  if ($rep eq "X") { print BCN "$bcline\n"; }
  else {
    @par0 = split/ /, $bcline;
    for ($i=0; $i!=($#par0+1); $i++) { print OUT "$par0[$i]\t$rep\n"; }
  }
}
close(BCN);
close(BC);

# make head.tab and nonhead.tab
open(TAB, "$scoptab");
@alltab = <TAB>;
open(NHOUT, ">nonhead.tab");
`cp $liblist_tab head.tab`;
open(BCNIN, "nonbc.60");
@all_bcnin = <BCNIN>;
foreach $line (@all_bcnin) {
  $str = substr($line, 0, 7);
  $str2 = substr($line, 1, 6);
  FIND: foreach $tline (@alltab) {
    @par1 = split/\t/, $tline;
    next FIND unless ($par1[2] =~ /$str2/);
    print NHOUT "$str\t$par1[5]\t$par1[7]\t$par1[9]\n";
    last FIND;
  }
}
close(NHOUT);
close(TAB);
close(BCNIN);

open(SEQ, "$lib_scop_dircla_ss");
@all_seq = <SEQ>;

open(NH, "nonhead.tab");
@all_nh = <NH>;

open(HEAD, "head.tab");
@all_head = <HEAD>;

LN1: foreach $nhline (@all_nh) {
  chomp $nhline;
  @ar1 = split/\t/, $nhline;
  $ct1 = -1;
  $g1 = `grep $ar1[0] nonbc.60`;
  chomp $g1;
  @gar = split/ /, $g1;
  LN2: foreach $hline (@all_head) {
    chomp $hline;
    @ar2 = split/\t/, $hline;
    if ($ar1[1] eq $ar2[1] && $ar1[2] eq $ar2[2] && $ar1[3] eq $ar2[3]) {
      $ct1 += 1;
      $poth[$ct1] = $ar2[0];
    }
  }
  if ($ct1 == 0) { for ($i=0; $i!=($#gar+1); $i++) { print OUT "$gar[$i]\t$poth[0]\n"; } }
  else {
    open(DEL, ">deletelist");
    print DEL "\`rm $ar1[0].fa\`\;\n";
    open(NHFA, ">$ar1[0].fa");
    $s = 0;
    F1: foreach $sline (@all_seq) {
      if ($s == 1) {
        last F1 if ($sline =~ /^\>/);
        print NHFA "$sline";
        next F1;
      }
      next F1 unless (substr($sline, 1, 7) eq $ar1[0]);
      $s = 1;
      print NHFA "$sline";
    }
    close(NHFA);
    for ($i=0; $i!=($ct1+1); $i++) {
      $s = 0;
      print DEL "\`rm $poth[$i].fa\`\;\n";
      open(HFA, ">$poth[$i].fa");
      F2: foreach $sline (@all_seq) {
        if ($s == 1) {
          last F2 if ($sline =~ /^\>/);
          print HFA "$sline";
          next F2;
        }
        next F2 unless (substr($sline, 1, 7) eq $poth[$i]);
        $s = 1;
        print HFA "$sline";
      }
      close(HFA);
      $aln = `./align0 $ar1[0].fa $poth[$i].fa`;
      @aar = split/\n/, $aln;
      foreach $aarl (@aar) { next unless ($aarl =~ /\% identity/); $id[$i] = $aarl; last; }
    }
    $big = 0;
    $pt = 0;
    for ($i=0; $i!=($ct1+1); $i++) { 
      @idar = split/ /, $id[$i];
      if ($idar[$#idar] > $big) { $big = $idar[$#idar]; $pt = $i; }
    }
    $g1 = `grep $ar1[0] nonbc.60`;
    chomp $g1;
    @gar = split/ /, $g1;
    for ($i=0; $i!=($#gar+1); $i++) { print OUT "$gar[$i]\t$poth[$pt]\n"; }
    close(DEL);
    system "chmod +x deletelist";
    system "./deletelist";
    `rm deletelist`;
  }
}

close(SEQ);
close(NH);
close(HEAD);
close(OUT);

`rm blstclst.60 nonbc.60 head.tab nonhead.tab seqs_for_blastclust`;
