#!/usr/bin/perl

$date = @ARGV[0];
$setupfile = @ARGV[1];
$input_type = @ARGV[2];
$mode = @ARGV[3];

if ($#ARGV != 2 && $#ARGV != 3) {
  print "Syntax: SCOPmap_v2.pl \$querylist \$setupfile \$input_type \$mode\n";
  print "   \$querylist = list of query structures\n";
  print "   \$setupfile = file containing user-defined library paths and variables\n";
  print "   \$input_type = EXIST (for a structure already in PDB) or NEW (for a structure not in PDB)\n";
  print "   \$mode = RESTART if this run is a continuation of a stalled run (optional)\n";
  die "\n";
}

if (-e "$setupfile" == 0) { die "Setup file \"$setupfile\" defining paths and libraries does not exist.\n"; }
if (-z "$setupfile" == 1) { die "Setup file \"$setupfile\" is empty.\n"; }
if ($input_type ne "NEW" && $input_type ne "EXIST") { die "Define input type as either NEW or EXIST.\n"; }
if ($mode eq "RESTART" | $mode eq "restart") { $findwhere = -1; }
else { $findwhere == 0; }

$extraoutputoption = "no";
#$extraoutputoption = "yes";

@undefined = SUB_get_variables_from_setupfile ( "$setupfile" );
if ($#undefined != -1) {
  print "The following variables are not defined in $setupfile:\n";
  foreach $line (@undefined) { print "\$$line\n"; }
  die "Exiting program.\n";
}
SUB_check_setup ();

if (-d "SM_$date" == 1) { print "Output directory for $date (SM_$date) already exists.  Exiting.\n"; exit; }


##########
#
# script name: blanewpdb2SCOP.pl
# - runs blastclust and chooses representatives
# - runs gapped BLAST, RPS-BLAST, and PSI-BLAST
# - finds hits that meet both coverage and score criteria, hits that meet only score criteria, and potential hits for DaliLite
#

$SIMPLE_BLAST_EVALUE_CUTOFF = 0.005;
$RPS_BLAST_EVALUE_CUTOFF = 0.005;
$DOM_PSI_BLAST_EVALUE_CUTOFF = 1e-04;
$CSV_WINDOW_SIZE = 3;
$CSV_PCT = 25;
$dalisets = 10;

@frag_list = (); @unmap_rep = (); @simple_rep = ();
@step1 = (); @step2 = (); @step3 = (); @step5 = (); @step6m = (); @step6d = (); @step7 = (); @step8 = (); @step9 = ();
$step1ct = -1; $step2ct = -1; $step3ct = -1; $step5ct = -1; $step6mct = -1; $step6dct = -1; $step7ct = -1; $step8ct = -1; $step9ct = -1;

if ($mode eq "RESTART") { $findwhere = SUB_find_restart (); }

if ($input_type eq "NEW") {
  @newstr_pseudo = ();
  if ($findwhere != 0) {
    open(K1, "KEY\_$date\_newstructure_pseudonyms");
    @k1 = <K1>;
    foreach $line (@k1) { chomp $line; push @newstr_pseudo, $line; }
    close(K1);
  }
  if ($findwhere == 0) {
    open(DATELOG, ">$date.log");
    $currdir = `pwd`; chomp $currdir;
    system "mkdir ./pdb\_$date";
    open(NLIN, "$date");
    @nlin = <NLIN>;
    if ($#nlin > 897) { print "Too many entries to give each a unique pseduonym (101z.pdb thru 999z.pdb); reduce size of input list.  Exiting program.\n"; exit; }
    foreach $nline (@nlin) {
      chomp $nline;
      if ($nline !~ /\.pdb/) { print "Entries in list \"$date\" must be named as \"structurename.pdb\". Please format input list appropriately.  Exiting program.\n"; exit; }
      if (-e "$nline" == 0) { print "New structure $nline not found in working directory.  Please move a copy of the structure (PDB-style format) to the current directory or adjust $date list accordingly.  Exiting program.\n"; exit; }
      @ar1 = split/\.pdb/, $nline;
      $wt = 0;
      while ($wt == 0) {
        $pseud = int(rand 899) + 101;
        $pseud .= "z";
        $f1 = 0;
        foreach $prev (@newstr_pseudo) { if ($prev =~ /$pseud/) { $f1 = 1; last; } }
        if ($f1 == 0) {
          push @newstr_pseudo, "$ar1[0]\t$pseud";
          print DATELOG "pseudo key\t$ar1[0]\t$pseud\n";
          system "cp $nline SAVE.$pseud.SAVE";
          system "cp $nline pdb\_$date/$pseud.pdb";
          $wt = 1;
        }
      }
    }
    chdir "./pdb\_$date";
    open(O1, ">$date.newpdb.list");
    open(O2, ">../KEY\_$date\_newstructure_pseudonyms");
    foreach $nlist (@newstr_pseudo) {
      @ar1 = split/\t/, $nlist;
      print O1 "$ar1[1]\n";
      print O2 "$nlist\n";
    }
    close(O1); close(O2); close(NLIN);
  }
  if ($findwhere == 1) {
    $currdir = `pwd`; chomp $currdir;
    system "mkdir ./pdb\_$date";
    foreach $q (@newstr_pseudo) {
      @arx = split/\t/, $q;
      system "cp SAVE.$arx[1].SAVE pdb_$date/$arx[1].pdb";
    }
    chdir "./pdb_$date";
  }
}
else {
  if ($findwhere == 0 | $findwhere == 1) {
    open(DATELOG, ">$date.log");
    if( -e "$date" ){
      if( -e "./pdb\_$date" ) { print DATELOG "WARNING: ./pdb\_$date/ already exists. Exiting program.\n"; exit; }
      else{
       system "mkdir ./pdb\_$date"; 
       chdir "./pdb\_$date";
       system "cat ../$date | sort -u | cut -c1-4 > $date.newpdb.list";
     }
    }
    else { print DATELOG "ERROR: cannot find query list $date\n"; exit; }
  }
}
$t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";

if ($findwhere == 0 | $findwhere == 1) {
  $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";
#run blastclust, get representatives, and cut ~.pdb and ~.ca files for them
  system "mkdir new_domain_str";
  system "cp $path_bin/.ncbirc .";
  @date_list = `cat $date.newpdb.list | cut -c1-4`;
  $report = SUB_blstclst95 ( \@date_list );
  print DATELOG "After blstclst95.pl:\n$report";
  SUB_fix_bc_output ();
  @bc_rep = `cat $date.newpdb.list.bc | cut -c1-6`;
  SUB_cut_new_chains( \@bc_rep );

#combine blstclst.fa, blstclst1.fa, $date.newpdb.list.bc.pre, and $date.newpdb.list.bc into one file: blstclst.fa.bc
  open( BO, ">blstclst.fa.bc" );
  print BO "!^(NEW_FILE): blstclst.fa\n";
  open( IN, "blstclst.fa" ); @in = <IN>; print BO @in; close IN;
  print BO "!^(NEW_FILE): blstclst1.fa\n";
  open( IN, "blstclst1.fa" ); @in = <IN>; print BO @in; close IN;
  print BO "!^(NEW_FILE): $date.newpdb.list.bc.pre\n";
  open( IN, "$date.newpdb.list.bc.pre" ); @in = <IN>; print BO @in; close IN;
  print BO "!^(NEW_FILE): $date.newpdb.list.bc\n";
  open( IN, "$date.newpdb.list.bc" ); @in = <IN>; print BO @in; close IN;
  close BO;
  `rm -f blstclst.fa blstclst1.fa $date.newpdb.list.bc.pre $date.newpdb.list.bc $date.newpdb.list`;

  open( S, ">blast_started" );
  print S "signal\n";
  close S;
  system "$path_bin/chk_for_stall.pl $userid &";

  foreach $pdb_chain (@bc_rep){ 
    $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";
    chomp( $pdb_chain );

    next if ($findwhere == 1 && -e "../TMP_HOLDING_$date/$pdb_chain.blast2" == 1);

    $it = 5;
    if ($findwhere == 1 && $pdb_chain eq $quitrep) { $it = 2; }

    ## 0th step: PSI-BLAST on nr, to get check_point_file (profile) and effective_nr_length.
    print DATELOG "\n$pdb_chain\t\t0) PSI-BLAST on nr to generate profile\n";
    system( "blastpgp -i $pdb_chain.fa -d nr -o $pdb_chain-nr.br -e 0.001 -m 6 -v 1000000 -j $it -b 1000000 -h 0.001 -C $pdb_chain.chk -a 4" );
    if( -z "$pdb_chain-nr.br" ){
      system( "blastpgp -i $pdb_chain.fa -d nr -o $pdb_chain-nr.br -e 0.001 -m 6 -v 1000000 -j $it -b 1000000 -h 0.001 -C $pdb_chain.chk -a 4" );
    }
    if( -z "$pdb_chain-nr.br" ){
      print DATELOG "ERROR: PSI-BLAST on nr for $pdb_chain to generate profile failed!\n"; 
    }
    ($nr_hit_count, $effective_nr_length) = SUB_psiblast_seqnum ( "$pdb_chain-nr.br" );
    chomp( $nr_hit_count );
    chomp( $effective_nr_length );
    if( !defined( $effective_nr_length ) ){
       print DATELOG "ERROR: PSI-BLAST on nr for $pdb_chain to generate profile failed!\n";
       push @unmap_rep, $pdb_chain;
       next;
    } 
    if( $effective_nr_length eq '' ){ $effective_nr_length = 303989637; }
    print DATELOG "effective_nr_length = $effective_nr_length\n";
    print DATELOG "Number of sequences used in profile_building: $nr_hit_count\n";


## first step: do simple blast on whole pdb seqs that are in SCOP
    print DATELOG "$pdb_chain\t\t1) simple-blast\n";
    system( "blastpgp -i $pdb_chain.fa -d $SIMPLE_BLAST_DB -o $pdb_chain-simple.br -e 100 -a 4 -z $effective_nr_length -v 1000000 -b 1000000" );
    if( -z "$pdb_chain-simple.br" ){
      system( "blastpgp -i $pdb_chain.fa -d $SIMPLE_BLAST_DB -o $pdb_chain-simple.br -e 100 -a 4 -z $effective_nr_length -v 1000000 -b 1000000" );
    }
    if( -z "$pdb_chain-simple.br" ){
      print DATELOG "ERROR: Simple-BLAST for $pdb_chain failed!\n"; 
    }
    @sendvar_1[0] = $pdb_chain; @sendvar_1[1] = "$pdb_chain-simple.br"; @sendvar_1[2] = 1; $sendvar_1[3] = $SIMPLE_BLAST_EVALUE_CUTOFF;
    ($return1, $return2) = SUB_chkBr_14fields ( @sendvar_1 );
    if( $return1 eq "No_hits_found" ){
        print DATELOG "step-1 'No_hits_found'\n";
        push @unmap_rep, $pdb_chain;
    }
    elsif( $return1 eq "UA" ){ push @unmap_rep, $pdb_chain; }
    else { 
      push @simple_rep, $return1;
      print DATELOG "$return2";
#X0X1
      open( L1, ">label1"); print L1 "!^(NEW_FILE): $pdb_chain-nr.br\n"; close(L1);
      open( L2, ">label2"); print L2 "!^(NEW_FILE): $pdb_chain-simple.br\n"; close(L2);
      `cat label1 $pdb_chain-nr.br label2 $pdb_chain-simple.br >> $pdb_chain.blast`;
      `rm label1 label2 $pdb_chain-nr.br $pdb_chain-simple.br`;
      open( Q1, ">>$pdb_chain.blast2");
      if (-z "$pdb_chain.daliconfirm" == 1) { system "rm -f $pdb_chain.daliconfirm"; }
      if (-e "$pdb_chain.daliconfirm" == 1) {
        open(IN, "$pdb_chain.daliconfirm"); @in = <IN>;
        print Q1 "!^(NEW_FILE): $pdb_chain.daliconfirm\n"; print Q1 @in;
        close IN; system "rm -f $pdb_chain.daliconfirm";
      }
      close Q1;
      next;
#X0X2
      # push @unmap_rep, $pdb_chain;
      ## Unmask previous line and mask lines between X0X1 and X0X2, if wants to continue to all the following steps despite if the 1st step find anything or not.
    }


## second step: do RPS-BLAST on the library of SCOP domain profiles
    print DATELOG "$pdb_chain\t\t2) rps-blast against SCOP domain profiles\n"; 
    system( "rpsblast -i $pdb_chain.fa -d $RPS_DB  -e 100 -o $pdb_chain-rps.br -a 4 -z $effective_nr_length -v 1000000 -b 1000000" );
    while( -z "$pdb_chain-rps.br" ){    # if the rpsblast output size is zero, re-run
      print DATELOG "$pdb_chain-rps.br size = 0. re-run rps-blast\n";
      system( "rpsblast -i $pdb_chain.fa -d $RPS_DB  -e 100 -o $pdb_chain-rps.br -a 4 -z $effective_nr_length -v 1000000 -b 1000000" );
    } 
    @sendvar_1[0] = $pdb_chain; @sendvar_1[1] = "$pdb_chain-rps.br"; @sendvar_1[2] = 2; $sendvar_1[3] = $RPS_BLAST_EVALUE_CUTOFF;
    ($return1, $return2) = SUB_chkBr_14fields ( @sendvar_1 );
    if( $return1 eq "No_hits_found" ){
       print DATELOG "step-2 'No_hits_found'\n";
    }
    print DATELOG "$return2";        

## third step: PSI-BLAST on nr to generage profile and then use it to do PSI-BLAST on SCOP domain seq    
## 2003-08-25, change Evalue cutoff to 1e-04 (-e), because 1ni3_A has a wrong hit at Evalue 6e-04; and after check all the testfiles (newset1-5), only the confidence level of 1 output (1m0i) will be affected (goes from confident 2 to 1) by this change of cutoff. So this change of cutoff will get rid of one false positives and affect 1 confidence output, so it is decided to make the change. ## 

    print DATELOG "$pdb_chain\t\t3) PSI-blast against SCOP domain\n";
    system( "blastpgp -i $pdb_chain.fa -d $SCOP_DOMSEQ_DB -o $pdb_chain-SCOPd.br -e 100 -R $pdb_chain.chk -a 4 -z $effective_nr_length -v 1000000 -b 1000000" );
    if( -z "$pdb_chain-SCOPd.br" ){
       system( "blastpgp -i $pdb_chain.fa -d $SCOP_DOMSEQ_DB -o $pdb_chain-SCOPd.br -e 100 -R $pdb_chain.chk -a 4 -z $effective_nr_length -v 1000000 -b 1000000" );
    }
    if( -z "$pdb_chain-SCOPd.br" ){
      print DATELOG "ERROR: PSI-BLAST on SCOP-domain-seq for $pdb_chain failed!\n"; 
    }

    @sendvar_1[0] = $pdb_chain; @sendvar_1[1] = "$pdb_chain-SCOPd.br"; @sendvar_1[2] = 3; $sendvar_1[3] = $DOM_PSI_BLAST_EVALUE_CUTOFF;
    ($return1, $return2) = SUB_chkBr_14fields ( @sendvar_1 );
    if( $return1 eq "No_hits_found" ){
       print DATELOG "step-3 'No_hits_found'\n";
    }
    print DATELOG "$return2";
        
    open( L1, ">label1"); print L1 "!^(NEW_FILE): $pdb_chain-nr.br\n"; close(L1);
    open( L2, ">label2"); print L2 "!^(NEW_FILE): $pdb_chain-simple.br\n"; close(L2);
    open( L3, ">label3"); print L3 "!^(NEW_FILE): $pdb_chain-rps.br\n"; close(L3);
    open( L4, ">label4"); print L4 "!^(NEW_FILE): $pdb_chain-SCOPd.br\n"; close(L4);
    `cat label1 $pdb_chain-nr.br label2 $pdb_chain-simple.br label3 $pdb_chain-rps.br label4 $pdb_chain-SCOPd.br >> $pdb_chain.blast`;
    `rm label1 label2 label3 label4 $pdb_chain-nr.br $pdb_chain-simple.br $pdb_chain-rps.br $pdb_chain-SCOPd.br`;
    open(Q1, ">>$pdb_chain.blast2");
    if (-z "$pdb_chain.daliconfirm" == 1) { system "rm -f $pdb_chain.daliconfirm"; }
    if (-e "$pdb_chain.daliconfirm" == 1) {
      open(IN, "$pdb_chain.daliconfirm"); @in = <IN>; 
      print Q1 "!^(NEW_FILE): $pdb_chain.daliconfirm\n"; print Q1 @in;
      close IN; system "rm -f $pdb_chain.daliconfirm";
    }
    close Q1;
  }

  if ($findwhere == 0) {
    system "rm -f blast_started error.log *.fa rpsblast.log";
    system "rm -f core.*";
    system "tar cvf $date.chk.tar *.chk";
    system "bzip2 $date.chk.tar"; 
    system "rm -f *.chk";
    system "rm -f *.pdb";
    chdir "..";
  }
  else {
    chdir "..";
    `mv pdb_$date/*.blast2 TMP_HOLDING_$date`;
    `mv pdb_$date/*.blast TMP_HOLDING_$date`;
    `mv pdb_$date/*.chk TMP_HOLDING_$date`;
    `cat pdb_$date/seqali.pos-pos >> TMP_HOLDING_$date/seqali.pos-pos`;
    `rm -r -f pdb_$date`;
    `mv TMP_HOLDING_$date pdb_$date`;
    close(DATELOG);
    $dlines = `cat $date.log`;
    @ard1 = split/\n/, $dlines;
    $pt = 0;
    for ($p=0; $p!=($#ard1+1); $p++) { if ($ard1[$p] =~ /Number of sequences whose length is/) { $pt = $p; last; } }
    if ($pt == 0) { $pt = 3; }
    $lc = $#ard1 - $pt;
    `tail -$lc $date.log >> pdb_$date/$date.log`;
    `mv pdb_$date/$date.log .`;
    open(DATELOG, ">>$date.log");
    chdir "pdb_$date";
    system "tar cvf $date.chk.tar *.chk";
    system "bzip2 $date.chk.tar";
    system "rm -f *.chk";
    @unmap_rep = (); @step1 = (); @step2 = (); @step3 = (); $step1ct = -1; $step2ct = -1; $step3ct = -1;
    foreach $rep (@bc_rep) {
      open(BL2, "$rep.blast2");
      @bl2 = <BL2>;
      $s1 = 0;
      $s1ct = -1;
      foreach $line (@bl2) {
        if ($s1 == 1) {
          last if ($line =~ /\!\^\(NEW/);
          $s1ct++;
          if ($s1ct == 0) { $step1ct++; }
          $step1[$step1ct][$s1ct] = $line;
        }
        else { if ($line =~ /\!\^\(NEW\_FILE\): $rep.out1/) { $s1 = 1; } }
      }
      if ($s1ct == -1) { push @unmap_rep, "$rep"; }
      $s2 = 0;
      $s2ct = -1;
      foreach $line (@bl2) {
        if ($s2 == 1) {
          last if ($line =~ /\!\^\(NEW/);
          $s2ct++;
          if ($s2ct == 0) { $step2ct++; }
          $step2[$step2ct][$s2ct] = $line;
        }
        else { if ($line =~ /\!\^\(NEW\_FILE\): $rep.out2/) { $s2 = 1; } }
      }
      $s3 = 0;
      $s3ct = -1;
      foreach $line (@bl2) {
        if ($s3 == 1) {
          last if ($line =~ /\!\^\(NEW/);
          $s3ct++;
          if ($s3ct == 0) { $step3ct++; }
          $step3[$step3ct][$s3ct] = $line;
        }
        else { if ($line =~ /\!\^\(NEW\_FILE\): $rep.out3/) { $s3 = 1; } }
      }
      close(BL2);
    }
    chdir "..";
  }

  `cp $path_bin/blosum62.* .`;
  `mv pdb_$date/new_domain_str .`;
  print DATELOG "STATUS: finished running BLAST programs\n";
  close(DATELOG);

#make list of fragments from the query set (less than 20 residues)
  $grepall = `grep "Do not consider" $date.log | grep "length = "`;
  @tmp = split/\n/, $grepall;
  for ($i=0; $i!=($#tmp+1); $i++) {
    $ch = substr($tmp[$i], 0, 6);
    @ar1a = split/length \= /, $tmp[$i];
    @ar1b = split/ /, $ar1a[1];
    push @frag_list, "$ch $ar1b[0]";
  }
  $grepall = "";

  $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";
}
open(DATELOG, ">>$date.log");
$t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";


##########
#
# scripts: scopmap_5_6.pl, run_step5.pl, run_step6.pl
# - runs COMPASS (step5), DaliLite (step6), CSV (step7), and DaliLite/xBLAST agreement (step8)

$databasepath = $compass_db;
$datedir = "pdb_$date";
$replist = $liblist;
# need to adjust $replist definition for exist sfam option?


if (2 >= $findwhere) { 
  $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";
#make sure all chains in @unmap_rep have been cut to ~.pdb and ~.ca files
  if (-d "new_domain_str" == 0) { `mkdir new_domain_str`; }
  foreach $newline (@unmap_rep) { if (-e "new_domain_str/$newline.ca" == 0) { SUB_other_new_cut ( "$newline" ); } }
 
#make multiple alignment from PSI-BLAST results
  if (-d "new_domain_aln" == 0) { `mkdir new_domain_aln`; }
  foreach $newline (@unmap_rep) {
    if (-e "new_domain_aln/$newline.br.aln" == 1) {
      if (-z "new_domain_aln/$newline.br.aln" == 1) { next; }
      $q5 = `head -1 new_domain_aln/$newline.br.aln | cut -c1-5`; chomp $q5;
      if ($q5 eq "QUERY") {
        `$path_bin/prep_psiblastali -i new_domain_aln/$newline.br.aln -o $newline.adj.aln`;
        `mv $newline.adj.aln new_domain_aln/$newline.br.aln`;
        $q5 = `head -1 new_domain_aln/$newline.br.aln | cut -c1-5`; chomp $q5;
        if ($q5 eq "QUERY") { open(PSEUDOUT, ">new_domain_aln/$newline.br.aln"); close (PSEUDOUT); next; }
        else { push @compass_rep, $newline; next; }
      }
      else { push @compass_rep, $newline; next; }
    }
    $ct = `wc -l pdb_$date/$newline.blast`; chomp $ct; @ct1 = split/\s/, $ct;
    next if ($ct1[0] > 3000000);
    SUB_get_br_aln_specific ($newline);
    if (-z "new_domain_aln/$newline.br.aln" == 1) { print DATELOG "No profile available for $newline\n"; next; }
    `$path_bin/prep_psiblastali -i new_domain_aln/$newline.br.aln -o $newline.adj.aln`;
    `mv $newline.adj.aln new_domain_aln/$newline.br.aln`;
    $q5 = `head -1 new_domain_aln/$newline.br.aln | cut -c1-5`; chomp $q5;
    if ($q5 eq "QUERY") { open(PSEUDOUT, ">new_domain_aln/$newline.br.aln"); close (PSEUDOUT); }
    else { push @compass_rep, $newline; }
  }
  foreach $newline (@unmap_rep) {  
    if (-e "new_domain_aln/$newline.br.aln" == 1 && -z "new_domain_aln/$newline.br.aln" == 1) { `rm new_domain_aln/$newline.br.aln`; }
  }
  SUB_get_10res_pts_newpsi ();
  print DATELOG "STATUS: finished making query alignments from PSI-BLAST results\n";
  $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";
}


if (3 >= $findwhere) {
  $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";
#foreach each unmapped rep, run COMPASS against library database
  $maxload = 7; $runct = -1; $loadct = 0; @pidlist = (); $finpid = "null";
  for ($k=0; $k<$maxload; $k++) { $pidlist[$k] = 0; }
  while ($#compass_rep > $runct) {
    if ($pid = fork) {
      for ($k=0; $k<$maxload; $k++) {  
        if ($pidlist[$k] == 0) { $pidlist[$k] = $pid; }
        sleep 1; $loadct ++; $runct ++; last;
      }
    }
    elsif ($pid == 0) {
      $rep = $compass_rep[$runct];
      if (-e "$rep.compass" == 0) {
        system "$path_bin/compass_vs_db -i new_domain_aln/$rep.br.aln -db $databasepath -o $rep.compass";
      }
      exit;
    }
    if ($loadct == $maxload) {
      $finpid = wait;
      for ($k=0; $k<$maxload; $k++) { if ($pidlist[$k] == $finpid) { $pidlist[$k] = 0; $loadct --; last; } }
      $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";
    }
  }
  while ($finpid != -1) { $finpid = wait; }
  `rm core.*`;
  print DATELOG "STATUS: finished running compass\n";
  $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";
}


if (4 >= $findwhere) {
  $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";
#get results of each COMPASS comparison (Evalue and region covered), output to ~.compass2
  $maxload = 7; $runct = -1; $loadct = 0; @pidlist = (); $finpid = "null";
  for ($k=0; $k<$maxload; $k++) { $pidlist[$k] = 0; }
  while ($#compass_rep > $runct) {
    if ($pid = fork) {
      for ($k=0; $k<$maxload; $k++) {
        if ($pidlist[$k] == 0) { $pidlist[$k] = $pid; }
        $loadct ++; $runct ++;
        if (-z "$compass_rep[$runct].compass" == 1) { `rm $compass_rep[$runct].compass`; }
        sleep 1; last;
      }
    }
    elsif ($pid == 0) {
      if (-e "$compass_rep[$runct].compass" == 1) { SUB_make_inf_file ( "$compass_rep[$runct]" ); }
      exit;
    }
    if ($loadct == $maxload) {
      $finpid = wait;
      for ($k=0; $k<$maxload; $k++) { if ($pidlist[$k] == $finpid) { $pidlist[$k] = 0; $loadct --; last; } }
      $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";
    }
  }
  while ($finpid != -1) { $finpid = wait; }
  $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";

#find which compass outputs have e-value less than 1e-10 and cover all but 10 residues at both terminii
#if no accepted hits, check for fragments or hits to repeats
  foreach $rep (@compass_rep) {
    next if (-e "$rep.compass2" == 0);
    open(IN, "$rep.compass2");
    @in = <IN>;
    @hit5 = ();
    for ($i=1; $i!=($#in+1); $i++) {
      chomp $in[$i];
      @ar4 = split/\t/, $in[$i];
      next unless ($ar4[1] < 1e-10 && $ar4[1] ne "" && $ar4[2] eq "yes");
      @ar5 = split/\.X\./, $ar4[0];
      $querydom = $rep;
      $scopdom = substr($ar5[1], 0, 7);
      $grepline1 = `grep $scopdom $lib_scop_dircla`;
      @grepar1 = split/\t/, $grepline1;
      $superfamid = $grepar1[3];
      @ar5a = split/\,/, $ar4[3];
      @ar5b = split/\-/, $ar5a[1];
      $queryrangenum = "$ar5b[0].$ar5b[1]";
      $wholequeryrange = SUB_get_wholerange ( "new_domain_str/$querydom.ca" );
      $hitqueryrangeres = SUB_convert_range_pos_to_res ( "new_domain_str/$querydom.ca", "$queryrangenum" );
      @ar5c = split/\,/, $ar4[4];
      @ar5d = split/\-/, $ar5c[1];
      $scoprangenum = "$ar5d[0].$ar5d[1]";
      $wholescoprange = SUB_get_wholerange ( "$path_str/$scopdom.ca" );
      $hitscoprangeres = SUB_convert_range_pos_to_res ( "$path_str/$scopdom.ca", "$scoprangenum") ;
      push @hit5, "$querydom\t$hitqueryrangeres\t\t$wholequeryrange\t$scopdom\t$superfamid\t$hitscoprangeres\t\t$wholescoprange\t@ar4[1]\t\t\t\t5\n";
    }
    close(IN); @in = ();
    if ( $#hit5 == -1) {
#      @foroutput = SUB_get_repeats_inf ( $rep );
#      open(OUT, ">>$rep.compass2");
#      print OUT "!^(NEW_FILE): $rep.repeatinf\n"; if (@foroutput[0] ne "") { print OUT "@foroutput"; } close(OUT);
#      @foroutput = SUB_find_repeats_hits ( $rep );
#      open(OUT, ">>$rep.compass2");
#      print OUT "!^(NEW_FILE): $rep.rephit5\n"; if (@foroutput[0] ne "") { print OUT "@foroutput"; } close(OUT);
      @foroutput=  SUB_find_pass_fragments ( $rep );
      open(OUT, ">>$rep.compass2");
      print OUT "!^(NEW_FILE): $rep.fraghit5\n"; if (@foroutput[0] ne "") { print OUT "@foroutput"; } close(OUT);
    }
    else {
      open(OUT, ">>$rep.compass2");
      print OUT "!^(NEW_FILE): $rep.out5\n";
      foreach $line (@hit5) { print OUT "$line"; }
      close(OUT);
      $step5ct ++;
      push @step5, [@hit5];
    }
  }
  @ar4 = (); @ar5 = (); @ar5a = (); @ar4b = (); @ar5c = (); @ar5d = (); @foroutput = ();

  $gc = `grep "protlist\.$date" query_$date | grep "NEW.FILE" | wc -l`; chomp $gc;
  if ($gc == 0) { SUB_get_protnames (); }
  print DATELOG "STATUS: finished making ~.compass2 files and protein names found\n";
  $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";
}


if (5 >= $findwhere) {
  $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";
#run mammoth for each new chain/domain against each library domain (CA-only)
  $maxload = 10; $runct = -1; $loadct = 0; @pidlist = (); $finpid = "null";
  for ($k=0; $k<$maxload; $k++) { $pidlist[$k] = 0; }
  while ($#unmap_rep > $runct) {
    if ($pid = fork) {
      for ($k=0; $k<$maxload; $k++) {
        if ($pidlist[$k] == 0) { $pidlist[$k] = $pid; }
        sleep 1; $loadct ++; $runct ++; last;
      }
    }
    elsif ($pid == 0) {
      $rep = $unmap_rep[$runct];
      if (-e "$rep.mammoth" == 0) {
        `mkdir dir_$rep`;
        open(IN, "$replist");
        @in = <IN>;
        chdir "dir_$rep";
        open(OUT, ">$rep.mammoth");
        print OUT "!^(NEW_FILE): all MAMMOTH pairwise comparisons\n";
        close(OUT);
        foreach $scoprep (@in) {
          chomp $scoprep;
          @ar1 = split/\.br/, $scoprep;
          next if (-e "$path_str/$ar1[0].ca" == 0);
          system "$path_bin/mammoth -p ../new_domain_str/$rep.ca -e $path_str/$ar1[0].ca >> $rep.mammoth";
        }
        close(IN); @in = ();
        `mv $rep.mammoth ..`;
        chdir "..";
        `rm -r -f dir_$rep`;
      }
      exit;
    }
    if ($loadct == $maxload) {
      $finpid = wait;
      for ($k=0; $k<$maxload; $k++) { if ($pidlist[$k] == $finpid) { $pidlist[$k] = 0; $loadct --; last; } }
      $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";
    }
  }
  while ($finpid != -1) { $finpid = wait; }
  print DATELOG "STATUS: finished running MAMMOTH\n";
  $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";

}


if (6 >= $findwhere) {
  $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";
#for each each mammoth query/library pair: get z-score, make pairwise alignment, get blosum score, and coverage (%) of library domain
  $maxload = 7; $runct = -1; $loadct = 0; @pidlist = (); $finpid = "null";
  for ($k=0; $k<$maxload; $k++) { $pidlist[$k] = 0; }
  while ($#unmap_rep > $runct) {
    if ($pid = fork) {
      for ($k=0; $k<$maxload; $k++) {
        if ($pidlist[$k] == 0) { $pidlist[$k] = $pid; }
        sleep 1; $loadct ++; $runct ++; last;
      }
    }
    elsif ($pid == 0) {
      SUB_find_zbc_scores ( $unmap_rep[$runct] );
      exit;
    }
    if ($loadct == $maxload) {
      $finpid = wait;
      for ($k=0; $k<$maxload; $k++) { if ($pidlist[$k] == $finpid) { $pidlist[$k] = 0; $loadct --; last; } }
      $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";
    }
  }
  while ($finpid != -1) { $finpid = wait; }
  print DATELOG "STATUS: finished making all ~.mammoth2 files\n";
  $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";
}


if (7 >= $findwhere) { # START OF "re-start at 6m-csres/6d/7/8"
  $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";

#find which pairs can be classified (superfam level) by mammoth criteria:
#(if Z>=4) and (coverage >= 0.5) and (blosum score >= 0.3 or blosum score >= 1/sqrt(z-score) - 0.24 or Z>=22)
#ALSO: (if Z>=4) and (coverage >= 0.5) but fails other crit, check w/compass (skip queries with ~.br.aln longer than 10000 lines)
@hit6m = ();
foreach $newline (@unmap_rep) {
  $dom1 = $newline;
  next if ($newline eq "");
  $bralnwc = `wc -l new_domain_aln/$dom1.br.aln`; chomp $bralnwc;
  @bralar = split/new\_domain\_aln/, $bralnwc; $bralar[0] =~ s/ //g;
  if ($bralar[0] > 10000) { $nocompass = 1; }
  else { $nocompass = 0; }
  open(IN, "$dom1.mammoth2");
  @in = <IN>;
  $start = 0;
  $gct = `grep comp_zcc_$dom1 $dom1.mammoth2 | grep NEW_FILE | wc -l`; chomp $gct;
  foreach $line (@in) {
    if ($start == 1) {
      chomp $line;
      last if ($line =~ /\!\^\(NEW\_FILE/);
      @ar1 = split/\t/, $line;
      @ar2 = split/\.M\./, $ar1[0];
      $dom2 = substr($ar2[1], 0, 7);
      $z = $ar1[1];
      $bs = $ar1[2];
      $cov = $ar1[3];
      next if ($z eq "error" | $bs eq "error" | $bs eq "inf");
      if ($z > 0) { $y = 0.75/$z + 0.1; }
      else { $y = 1000000; }
      if ($z >= 4  && ($cov >= 0.5 | $cov eq "error") && $dom2 ne "") {
        open(OUT, ">$dom1.$dom2.mpa");
        for ($i=0; $i!=($#in+1); $i++) {
          next unless ($in[$i] =~ /^\!\^\(NEW\_FILE\)\: $dom1.M.$dom2.mpa/);
          print OUT "$in[$i+1]"; print OUT "$in[$i+2]"; last;
        }
        close(OUT);
        if (-z "$dom1.$dom2.mpa" == 1) { `rm $dom1.$dom2.mpa`; }
        if ($bs >= 0.4 | $bs >= $y | $z >= 22) { push @hit6m, "$line"; }
        else {
          $run = 1;
          $grpln = `grep $dom2 $path_aln/rps_scop_aln_list`; chomp $grpln;
          if (-e "new_domain_aln/$dom1.br.aln" == 0 | -z "new_domain_aln/$dom1.br.aln" == 1 | -e "$dom1.$dom2.mpa" == 0 | $grpln eq "" | $nocompass == 1) { $run = 0; }
          if ($run == 1 && $gct == 0) { push @hit6forcomp, "$dom1\t$dom2\t$grpln\t$z\t$cov"; }
        }
      }
    }
    else { if ($line =~ /\!\^\(NEW\_FILE\)\: all_zbc_$newline/) { $start = 1; } }
  }
  close(IN); @in = ();
}
@ar1 = (); @ar2 = ();
$t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";

#if query is not assigned by MAMMOTH/BLSM scores, run compscores_givenali for pairs with Z>=4 and
#coverage>=0.5 but fail MAMMOTH/BLSM score cutoff
$run_comp_for_mam = 0; #skip this part -- slow and rarely provides additional hits
  if ($run_comp_for_mam == 1) {
  $maxload = 20; $runct = -1; $loadct = 0; @pidlist = (); $finpid = "null";
  for ($k=0; $k<$maxload; $k++) { $pidlist[$k] = 0; }
  while ($#hit6forcomp > $runct) {
    if ($pid = fork) {
      for ($k=0; $k<$maxload; $k++) {
        if ($pidlist[$k] == 0) { $pidlist[$k] = $pid; }
        $loadct ++; $runct ++; 
        if ($runct/$maxload == (int ($runct/$maxload)) && $runct != 0) {
          $wt = 1;
          while ($wt == 1) {
            $psct1 = `ps | grep compscores | wc -l`; chomp $psct1;
            $psct2 = `ps | grep compscores | grep defunct | wc -l`; chomp $psct2;
            if (($psct1-$psct2) < 15) { $wt = 0; }
            else { sleep 3; }
          }
        }
        sleep 1; last;
      }
    }
    elsif ($pid == 0) {
      @ar0 = split/\t/, $hit6forcomp[$runct];
      $dom1 = $ar0[0]; $dom2 = $ar0[1]; $dom2alnfile = $ar0[2]; $zscore = $ar0[3]; $cov = $ar0[4]; 
      $csout = `$path_bin/compscores_givenali -i new_domain_aln/$dom1.br.aln -j $path_aln/$dom2alnfile -a $dom1.$dom2.mpa`;
      @ar1a = split/\n/, $csout;
      @ar1b = split/Evalue \=/, $ar1a[5];
      if ($ar1a[5] !~ /Evalue /) { $cscore = "error"; }
      else { $cscore = $ar1b[1]; $cscore =~ s/ //g; }
      if ($cscore ne "error" && $cscore ne "inf" && (1 >= $cscore)) {
        open(OUTH, ">$dom1.$dom2.compreshit");
        print OUTH "$dom1.M.$dom2\t$zscore\t$cscore\t$cov\n";
        close(OUTH);
      }
      open(OUT, ">$dom1.$dom2.csres");
      print OUT "$dom1.M.$dom2\t$zscore\t$cscore\t$cov\n";
      close(OUT);
      @ar0 = (); @ar1a = (); @ar1b = ();
      exit;
    }
    if ($loadct == $maxload) {
      $finpid = wait;
      for ($k=0; $k<$maxload; $k++) { if ($pidlist[$k] == $finpid) { $pidlist[$k] = 0; $loadct --; last; } }
      $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";
    }
  }
  while ($finpid != -1) { $finpid = wait; }
}
$t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";
@hit6forcomp = ();

#append compass comparison scores to ~.mammoth2
foreach $rep (@unmap_rep) {
  $foradd = `cat $rep.*.csres`;
  if ($foradd ne "") {
    open(OUT, ">>$rep.mammoth2");
    print OUT "!^(NEW_FILE): comp_zcc_$rep\n";
    print OUT "$foradd";
    close(OUT);
  }
  `rm -f $rep*.csres`;
  $foradd = "";
}

#append hits found by compass comparison to @hit6m
$cat1 = `cat *.compreshit`;
@ar1 = split/\n/, $cat1;
foreach $line (@ar1) { push @hit6m, "$line\n"; }
`rm -f *.compreshit`;
$cat1 = ""; @ar1 = ();

#format accepted hits for ~.out6m
foreach $rep (@unmap_rep) {
  @hit6tmp = ();
  foreach $ahitline (@hit6m) {
    next unless ($ahitline =~ /^$rep/);
    chomp $ahitline;
    @ar0 = split/\t/,$ahitline;
    @ar1 = split/\.M\./, $ar0[0];
    $scopdom = substr($ar1[1], 0, 7);
    next if ($scopdom eq "");
    $zscore = $ar0[1];
    $seqscore = $ar0[2];
    ($qrangenum, $srangenum) = SUB_get_ranges_mpa ( "$rep.$scopdom.mpa" );
    $qrangeres = SUB_convert_range_pos_to_res ( "new_domain_str/$rep.ca", $qrangenum );
    $srangeres = SUB_convert_range_pos_to_res ( "$path_str/$scopdom.ca", $srangenum );
    $wholequeryrange = SUB_get_wholerange ( "new_domain_str/$rep.ca" );
    $wholescoprange = SUB_get_wholerange ( "$path_str/$scopdom.ca" );
    $grepline1 = `grep $scopdom $lib_scop_dircla`;
    @grepar1 = split/\t/, $grepline1;
    $superfamid = $grepar1[3];
    push @hit6tmp, "$rep\t$qrangeres\t\t$wholequeryrange\t$scopdom\t$superfamid\t$srangeres\t\t$wholescoprange\t\t$zscore\t\t$seqscore\t6\n";
  }
  if ($hit6tmp[0] ne "") { $step6mct ++; push @step6m, [@hit6tmp]; }
  `rm -f $rep*.mpa $rep*.cs $rep*.csres`;
}
@ar0 = (); @ar1 = (); @hit6tmp = ();
$t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";

#find all queries with a >20 residue region that has no mammoth hits
@urlist = ();
foreach $rep (@unmap_rep) {
  $pt = -1;
  for ($i=0; $i!=($step6mct+1); $i++) { if ($step6m[$i][0] =~ /$rep/) { $pt = $i; last; } }
  if ($pt == -1) { push @urlist, "$rep"; next; }
  @ar0 = split/\t/, $step6m[$pt][0];
  $wholequeryrangepos = SUB_convert_range_res_to_pos ( "new_domain_str/$rep.ca", "$ar0[3]" );
  @ar1a = split/\./, $wholequeryrangepos;
  @pos = ();
  for ($i=0; $i!=($ar1a[1]+1); $i++) { $pos[$i] = -1; }
  for ($i=($ar1a[0]); $i!=($ar1a[1]+1); $i++) { $pos[$i] = 0; }
  for ($j=0; $j!=($#{$step6m[$pt]}+1); $j++) {
    @ar1b = split/\t/, $step6m[$pt][$j];
    $thisrangepos = SUB_convert_range_res_to_pos ( "new_domain_str/$rep.ca", "$ar1b[1]" );
    @ar1c = split/\./, $thisrangepos;
    for ($i=($ar1c[0]); $i!=($ar1c[1]+1); $i++) { $pos[$i] = 1; }
  }
  $currentct = 0; $longestct = 0;
  for ($i=($ar1a[0]); $i!=($ar1a[1]+1); $i++) {
    if ($pos[$i] == 0) { $currentct ++; }
    else {
      if ($currentct > $longestct) { $longestct = $currentct; }
      $currentct = 0;
    }
  }
  if ($currentct > $longestct) { $longestct = $currentct; }
  if ($longestct >= 20) { push @urlist, "$rep"; }
}
@ar0 = (); @ar1a = (); @ar1b = (); @ar1c = (); @pos = ();
$t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";


#for any query with a >20 residue region with no mammoth hits, find potential pairs
#to check with Dali (criteria: coverage > 0.4 AND blosum score >= -0.01*Z-score + 0.14 or
#(bl score is "inf" and zscore >= 4)) [ Z-SCORE MUST BE > 0]
@pairsfordali = ();
$pairsfdct = -1;
foreach $rep (@urlist) {
  @dpairs = ();
  @dpairsinfo = ();
  $dpct = 0;
  open(IN, "$rep.mammoth2");
  @in = <IN>;
  $start = 0;
  foreach $line (@in) {
    if ($start == 1) {
      last if ($line =~ /\!\^\(NEW\_FILE\)/);
      chomp $line;
      @ar0 = split/\t/, $line;
      @ar1 = split/\.M\./, $ar0[0]; $scopdom = substr($ar1[1], 0, 7);
      $zscore = $ar0[1]; $bscore = $ar0[2]; $cov = $ar0[3];
      if ($bscore eq "error" | $bscore eq "nan" | $zscore eq "error" | 0.4 > $cov | 0 > $zscore) { push @dpairsinfo, "$rep\t$scopdom\t$zscore\tN\n"; next; }
      $y = -0.01*$zscore + 0.14;
      if (($bscore >= $y) | ($zscore >= 4 && $bscore eq "inf")) { push @dpairsinfo, "$rep\t$scopdom\t$zscore\tY\n"; $dpct ++; }
      else { push @dpairsinfo, "$rep\t$scopdom\t$zscore\tN\n"; }
    }
    else { if ($line =~ /\!\^\(NEW\_FILE\)\: all_zbc_$rep/) { $start = 1; } }
  }
  close(IN);
#if no potential pairs, find domains with top 50 MAMMOTH Z-scores
  if ($dpct == 0) {
    @tmp = @dpairsinfo;
    for ($i=0; $i!=50; $i++) {
      $big = -100; $pt = -1;
      for ($j=0; $j!=($#dpairsinfo+1); $j++) {
        next if ($tmp[$j] eq "null");
        @ar2 = split/\t/, $tmp[$j];
        if ($ar2[2] > $big) { $pt = $j; $big = $ar2[2]; $sd = $ar2[1]; }
      }
      last if (0 > $big);
      push @dpairs, "$rep\t$sd\n";
      @tmp[$pt] = "null";
    }
  }
#if >200 potential pairs, choose only top 200 MAMMOTH Z-scores greater than 0
  elsif ($dpct > 200) {
    @tmp = @dpairsinfo;
    for ($i=0; $i!=200; $i++) {
      $big = -100; $pt = -1;
      for ($j=0; $j!=($#dpairsinfo+1); $j++) {
        next if ($tmp[$j] eq "null");
        chomp $tmp[$j];
        @ar2 = split/\t/, $tmp[$j];
        if ($ar2[3] eq "Y" && $ar2[2] > $big) { $pt = $j; $big = $ar2[2]; $sd = $ar2[1]; }
      }
      last if (0 > $big);
      push @dpairs, "$rep\t$sd\n";
      @tmp[$pt] = "null";
    }
  }
  else {
    for ($i=0; $i!=($#dpairsinfo+1); $i++) {
      chomp $dpairsinfo[$i];
      @ar2 = split/\t/, $dpairsinfo[$i];
      next if ($ar2[$#ar2] eq "N");
      push @dpairs, "$rep\t$ar2[1]\n";
    }
  }
#check for the protein name in the scoptab file; if there are any hits, find the sfam reps in $replist and add to *.fordali
  $protname = "";
  open(IN, "query_$date");
  @in = <IN>;
  $start = 0;
  foreach $iline (@in) {
    if ($start == 1) {
      last if ($iline =~ /\!\^\(NEW\_FILE\)/);
      next unless ($iline =~ /^$rep/);
      chomp $iline;
      @prot1 = split/\t/, $iline;
      @prot2 = split/\;  /, $prot1[1];
      $protname = $prot2[0];
      last;
    }
    else { if ($iline =~ /\!\^\(NEW\_FILE\)\: protlist.$date/) { $start = 1; } }
  }
  close(IN);
  if ($protname ne "") {
    $protname =~ s/\(//g; $protname =~ s/\)//g;
    open(ST, "$scoptab");
    @all_st = <ST>;
    $sfct = -1;
    $addct = -1;
    LP_M1: foreach $stline (@all_st) {
      if ($stline =~ /$protname/i) {
        @sar1 = split/\t/, $stline;
        for ($i=0; $i!=($sfct+1); $i++) { next LP_M1 if ($sar1[9] eq $sfamlist[$i]); }
        $sfct += 1;
        $sfamlist[$sfct] = $sar1[9];
      }
    }
    if ($sfct != -1) {
      foreach $stline (@all_st) {
        @sar2 = split/\t/, $stline;
        for ($i=0; $i!=($sfct+1); $i++) {
          if ($sar2[9] eq $sfamlist[$i] && $sar2[2] ne "") {
            $sg1 = `grep $sar2[2] $replist`; chomp $sg1;
            if ($sg1 ne "") {
              $dup = 0;
              foreach $dp (@dpairs) { if ($dp =~ /$sar2[2]/) { $dup = 1; last; } }
              if ($dup == 0) { push @dpairs, "$rep\t$sar2[2]\n"; last; }
            }
          }
        }
      }
    }
    close(ST); @all_st = ();
  }
  if ($dpairs[0] ne "") { $pairsfdct ++; push @pairsfordali, [@dpairs]; }
}
@dpairs = (); @dpairsinfo = (); @in = (); @tmp = ();
$t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";

@querydalidump = ();
#run DaliLite for selected pairs for each query
for ($i=0; $i!=($pairsfdct+1); $i++) {
  @ar1 = split/\t/, $pairsfordali[$i][0];
  $qch = $ar1[0];
  `mkdir dali_output`;
  open(T1, ">querydomains.list"); print T1 "$qch.pdb\n"; close(T1);
  open(T2, ">querydom2dali"); print T2 "$qch.pdb 1000\n"; close(T1);
  SUB_renum_2 ( "1000", "new_domain_str" );
  SUB_get5N ();
  `cat querydom2dali_5N $path_str/scoplib2dali_5N > all2dali_5N`;
  $ls1 = `ls -1 1000*dat | wc -l`; chomp $ls1;
  if ($ls1 == 0) { push @querydalidump, "$qch"; }
#make list of pairs to run (x2)
  open(KEY, "all2dali_5N");
  @all_key = <KEY>;
  open(TORUN, ">pairstorun");
  LP_M2: for ($j=0; $j!=($#{$pairsfordali[$i]}+1); $j++) {
    chomp $pairsfordali[$i][$j];
    @ar0 = split/\t/, $pairsfordali[$i][$j];
    $query = $ar0[0]; $lib = $ar0[1];
    $id1 = "null"; $id2 = "null";
    foreach $line1 (@all_key) {
      next unless ($line1 =~ /^$query/);
      chomp $line1;
      @ar1 = split/ /, $line1;
      $id1 = $ar1[1];
      $id1four = substr($id1, 0, 4);
      next LP_M2 if (-e "$id1.dat" == 0);
      `cp $id1four* dali_output`;
      last;
    }
    foreach $line2 (@all_key) {
      next unless ($line2 =~ /^$lib/);
      chomp $line2;
      @ar2 = split/ /, $line2;
      $id2 = $ar2[1];
      $id2four = substr($id2, 0, 4);
      next LP_M2 if (-e "$path_dalidat/$id2.dat" == 0);
      `cp $path_dalidat/$id2four* dali_output`;
      last;
    }
    next if ($id1 eq "null" | $id2 eq "null");
    print TORUN "./DaliLite -align $id1 $id2 ; mv $id1.dccp $query.$lib.dccp\n";
    print TORUN "rm dali.lock\n";
    print TORUN "./DaliLite -align $id2 $id1 ; mv $id2.dccp $lib.$query.dccp\n";
    print TORUN "rm dali.lock\n";
  }
  close(KEY); @all_key = ();
  close(TORUN);
  system "mv all2dali_5N dali_output";
  $ptrct = `wc -l pairstorun`;
  chomp $ptrct;
#$dalisets = 7;
  if ($ptrct != 0) {
#split list into $dalisets pieces and set up temp directories
    for ($k=1; $k!=($dalisets+1); $k++) { `mkdir tempdir.$k`; }
    open(RUNLIST, "pairstorun");
    @all_run = <RUNLIST>;
    $N = (int (($#all_run+1)/$dalisets)) + 1;
    for ($k=1; $k!=($dalisets+1); $k++) {
      $ln = $k * $N;
      if ($ln > ($#all_run+1)) {
        $prevln = ($k-1)*$N;
        $left = $#all_run + 1 - $prevln;
        last if ($left == 0);
        system "tail -$left pairstorun > pairs.$k";
        `mv pairs.$k tempdir.$k`;
        last;
      }
      else {
        system "head -$ln pairstorun | tail -$N > pairs.$k";
        `mv pairs.$k tempdir.$k`;
      }
    }
    close(RUNLIST); @all_run = ();
#run DaliLite for pairs
    for ($k=1; $k!=($dalisets+1); $k++) {
      system "cp $path_bin/DaliLite tempdir.$k";
      chdir "tempdir.$k";
      if (-e "pairs.$k" == 1) {
        open(READPRS, "pairs.$k");
        @allrdpr = <READPRS>;
        foreach $rpline (@allrdpr) {
          next if ($rpline =~ /rm dali.lock/);
          @rpar = split/ /, $rpline;
          $str1 = substr($rpar[2], 0, 4);
          $str2 = substr($rpar[3], 0, 4);
          `cp ../dali_output/$str1* .`;
          `cp ../dali_output/$str2* .`;
        }
        close(READPRS); @allrdpr = ();
        open(ADDRM, ">>pairs.$k");
        print ADDRM "rm dali.lock\n";
        close(ADDRM);
        system "chmod +x pairs.$k";
        system "./pairs.$k &";
      }
      chdir "..";
    }
    $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";
#wait for dali to finish, then move to new directory
    sleep 10;
    $wt = 0;
    while ($wt == 0) {
      $ovct = `ps | grep "DaliLite" | wc -l`;
      chomp $ovct;
      if ($ovct == 0) { $wt = 1; }
      else { SUB_chk_for_stalled_dali (); }
    }
    for ($k=1; $k!=($dalisets+1); $k++) {
      chdir "tempdir.$k";
      `mv *.dccp ../dali_output`;
      `rm -f *`;
      chdir "..";
    }
    `rm -r -f tempdir.*`;
#extract alignments and get Z-scores
    system "cp $path_bin/DaliLite dali_output";
    chdir "dali_output";
    $ls = `ls -1 *.dccp`; @lslist = split/\n/, $ls;
    @s6zscores = ();
    foreach $dccpfile (@lslist) {
      if (-z "$dccpfile" == 1) { `rm $dccpfile`; next; }
      $zscore = SUB_mk_dpa_from_dccp ( "$dccpfile", "$qch", "all2dali_5N" );
      push @s6zscores, "$dccpfile\t$zscore";
    }
    chdir "..";
  }
  $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";

  @hit6d = ();  @forconsinfo = (); @hit9 = (); @s9inf = ();
  open(DAOUT, ">$qch.dali1");
  print DAOUT "!^(NEW_FILE): all_dali_zbc_$qch\n";
#see if the pairs pass criteria in either comparison: ($bl >= -0.01*$zscore + 0.27 and $zscore >= 4) or $zscore >= 14 or $bl >= 0.4
#also, make @forconsinfo
  for ($j=0; $j!=($#{$pairsfordali[$i]}+1); $j++) {
    chomp $pairsfordali[$i][$j];
    @ar0 = split/\t/, $pairsfordali[$i][$j];
    $qdom = $ar0[0]; $ldom = $ar0[1];
    foreach $zp (@s6zscores) {
      if ($zp =~ /$qdom\.$ldom\./) { @za = split/\s+/, $zp; $z1 = $za[$#za]; }
      if ($zp =~ /$ldom\.$qdom\./) { @za = split/\s+/, $zp; $z2 = $za[$#za]; }
    }
    if (-e "dali_output/$qdom.$ldom.dpa" == 0) { $bl1 = "error"; $cov1 = "error"; $z1 = "error"; }
    else { 
      $bl1 = `$path_bin/blsm_scores -i dali_output/$qdom.$ldom.dpa`; chomp $bl1;
      $cov1 = SUB_cover_dpa ( "dali_output/$qdom.$ldom.dpa" );
    }
    if (-e "dali_output/$ldom.$qdom.dpa" == 0) { $bl2 = "error"; $cov2 = "error"; $z2 = "error"; }
    else {
      $bl2 = `$path_bin/blsm_scores -i dali_output/$ldom.$qdom.dpa`; chomp $bl2;
      $cov2 = SUB_cover_dpa ( "dali_output/$ldom.$qdom.dpa" );
    }
    $hit1 = 0;
    $hit2 = 0;
    $ranges1 = "\t";
    $ranges2 = "\t";
    if ($z1 ne "error" && $bl1 ne "error" && $cov1 ne "error" && $bl1 ne "inf") {
      $y1 = -0.01*$z1 + 0.27;
      if ($bl1 >= 0.4 | ($bl1 >= $y1 && $z1 >= 4) | $z1 >= 14) { 
        if ($cov1 >= 0.5) { $hit1 = 1; }
      }
      $ranges1 = SUB_get_ranges_dpa ( "dali_output/$qdom.$ldom.dpa", "0", "1" );
    }
    if ($z2 ne "error" && $bl2 ne "error" && $cov2 ne "error" && $bl2 ne "inf") {  
      $y2 = -0.01*$z2 + 0.27;
      if ($bl2 >= 0.4 | ($bl2 >= $y2 && $z2 >= 4) | $z2 >= 14) { 
        if ($cov2 >= 0.5) { $hit2 = 1; }
      }
      $ranges2 = SUB_get_ranges_dpa ( "dali_output/$ldom.$qdom.dpa", "1", "0" );
    }
    print DAOUT "$qdom.D.$ldom\t$z1\t$z2\t$bl1\t$bl2\t$cov1\t$cov2\t$ranges1\t$ranges2\n";
    if ($z1 ne "error" && $z1 >= 2) { push @forconsinfo, "$ldom\t$z1"; }
    $usehit = 0;
    if ($hit1 == 1 && $hit2 == 1) {
      if ($z1 >= $z2) { $usehit = 1; }
      else { $usehit = 2; }
    }
    elsif ($hit1 == 1 && $hit2 == 0) { $usehit = 1; }
    elsif ($hit1 == 0 && $hit2 == 1) { $usehit = 2; }
    if ($usehit == 1) {
      $zscore = $z1;
      $bscore = $bl1;
      @ar0 = split/\t/, $ranges1;
    }
    if ($usehit == 2) {
      $zscore = $z2;
      $bscore = $bl2;
      @ar0 = split/\t/, $ranges2;
    }
    if ($usehit != 0 && $ldom ne "") {
      $qrangeres = SUB_convert_range_pos_to_res ( "new_domain_str/$qdom.ca", "$ar0[0]" );
      $wholequeryrange = SUB_get_wholerange ( "new_domain_str/$qdom.ca" );
      $srangeres = SUB_convert_range_pos_to_res ( "$path_str/$ldom.ca", "$ar0[1]" );
      $wholescoprange = SUB_get_wholerange ( "$path_str/$ldom.ca" );
      $grepline1 = `grep $ldom $lib_scop_dircla`;
      @grepar1 = split/\t/, $grepline1;
      $superfamid = $grepar1[3];
      push @hit6d, "$qch\t\t$qrangeres\t$wholequeryrange\t$ldom\t$superfamid\t\t$srangeres\t$wholescoprange\t\t\t$zscore\t$bscore\t6\n";
    }
    if ($z1 eq "error" && $z2 ne "error") { $zsr = 2; }
    elsif ($z1 ne "error" && $z2 eq "error") { $zsr = 1; }
    else {
      if ($z1 ne "error" && $z2 ne "error") {
        if ($z1 >= $z2) { $zsr = 1; }
        else { $zsr = 2; }
      }
      else { $zsr = "no"; }
    }
    if ($zsr ne "no" && $ldom ne "") {
      if ($zsr == 1) { $zsr = $z1; @ar0 = split/\t/, $ranges1; }
      if ($zsr == 2) { $zsr = $z2; @ar0 = split/\t/, $ranges2; }
      $g_rat = `grep "$ldom" $path_str/zratinfo`; chomp $g_rat;
      @arh1 = split/\t/, $g_rat;
      $hitlen = $arh1[1];
      $hitselfz = $arh1[2];
      if ($hitselfz ne "X") { $zrat = $zsr/$hitselfz; }
      else { $zrat = -1; }
      if ($zsr > 4 && $zsr < 14 && $hitlen > 0 && $hitlen < 150 && ($zsr/$hitselfz > 0.3)) {
        $qrangeres = SUB_convert_range_pos_to_res ( "new_domain_str/$qdom.ca", "$ar0[0]" );
        $wholequeryrange = SUB_get_wholerange ( "new_domain_str/$qdom.ca" );
        $srangeres = SUB_convert_range_pos_to_res ( "$path_str/$ldom.ca", "$ar0[1]" );
        $wholescoprange = SUB_get_wholerange ( "$path_str/$ldom.ca" );
        $grepline1 = `grep $ldom $lib_scop_dircla`;
        @grepar1 = split/\t/, $grepline1;
        $superfamid = $grepar1[3];
        push @hit9, "$qch\t\t$qrangeres\t$wholequeryrange\t$ldom\t$superfamid\t\t$srangeres\t$wholescoprange\t\t$zrat\t$zsr\t\t9\n";
      }
      push @s9inf, "$qch\t$ldom\t$hitlen\t$zsr\t$zrat";
    }
  }
  if ($s9inf[0] != "") {
    print DAOUT "!^(NEW_FILE): dali_z_ratio_info_$qch\n";
    foreach $a (@s9inf) { print DAOUT "$a\n"; }
  }
  close(DAOUT);

  if ($hit9[0] ne "") { $step9ct++; push @step9, [@hit9]; }
  if ($hit6d[0] ne "") { $step6dct ++; push @step6d, [@hit6d]; }
#if there are no passing DaliLite hits to the query chain, find if there were any "close" Dali hits (good enough to have Dali output, but don't pass criteria)
  else {
    @otherdali = ();
    open(IN, "$qch.dali1");
    @in = <IN>;
    for ($k=1; $k!=($#in+1); $k++) {
      @ar1 = split/\t/, $in[$k];
      if ($ar1[1] eq "error" | $ar1[3] eq "error" | $ar1[5] eq "error") { $skip1 = 1; }
      else { $skip1 = 0; }
      if ($ar1[2] eq "error" | $ar1[4] eq "error" | $ar1[6] eq "error") { $skip2 = 1; }
      else { $skip2 = 0; }
      $usehit = 0;
      if ($skip1 == 0 && $skip2 == 0) { if ($ar1[1] >= $ar1[2]) { $usehit = 1; } else { $usehit = 2; } }
      elsif ($skip1 == 1 && $skip2 == 0) { $usehit = 2; }
      elsif ($skip1 == 0 && $skip2 == 1) { $usehit = 1; }
      if ($usehit == 1) {
        $zscore = $ar1[1]; $bscore = $ar1[3]; $cov = $ar1[5];
        $qrangenum = $ar1[7]; $srangenum = $ar1[8];
      }
      if ($usehit == 2) {
        $zscore = $ar1[2]; $bscore = $ar1[4]; $cov = $ar1[6];
        $qrangenum = $ar1[9]; $srangenum = $ar1[10];
      }
      if ($usehit != 0 && $ldom ne "") {
        @ar2 = split/\.D\./, $ar1[0];
        $qdom = $ar2[0];
        $ldom = substr($ar2[1], 0, 7);
        next if ($ldom eq "");
        $qrangeres = SUB_convert_range_pos_to_res ( "new_domain_str/$qdom.ca", "$qrangenum" );
        $wholequeryrange = SUB_get_wholerange ( "new_domain_str/$qdom.ca" );
        $srangeres = SUB_convert_range_pos_to_res ( "$path_str/$ldom.ca", "$srangenum" );
        $wholescoprange = SUB_get_wholerange ( "$path_str/$ldom.ca" );
        $grepline1 = `grep $ldom $lib_scop_dircla`;
        @grepar1 = split/\t/, $grepline1;
        $superfamid = $grepar1[3];
        push @otherdali, "$qch\t\t$qrangeres\t$wholequeryrange\t$ldom\t$superfamid\t\t$srangeres\t$wholescoprange\t\t\t$zscore\t$bscore\t6\n";    
      }
    }
    close(IN);
#sort @otherdali by Z-score
    @othertmp = @otherdali;
    @sorted_otherdali = ();
    for ($k=0; $k!=($#otherdali+1); $k++) {
      $big = -100; $pt = -1;
      for ($m=0; $m!=($#otherdali+1); $m++) {
        next if ($othertmp[$m] eq "null");
        @ar3 = split/\t/, $othertmp[$m];
        if ($ar3[11] > $big) { $big = $ar3[11]; $pt = $m; }
      }
      push @sorted_otherdali, "$otherdali[$pt]";
      $othertmp[$pt] = "null";
    }
    if ($#sorted_otherdali != -1) {
      open(DAOUT, ">>$qch.dali1");
      print DAOUT "!^(NEW_FILE): $qch.otherdali\n";
      foreach $oline (@sorted_otherdali) { print DAOUT "$oline"; }
      close(DAOUT);
    }
    @sorted_otherdali = (); @othertmp = (); @otherdali = ();
  }
  `rm pairstorun querydatlist querydom2dali querydom2dali_5N querydomains.list 1*dat 1*dssp core.*`;
  $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";

#choose pairs for CSV analysis: sort pairs in @forconsinfo by Z-score, choose all with z>=4 or top 20 with z>=2
#skip any library domains with <5 seqs in multiple alignment
  @tmpf = @forconsinfo;
  @sorted_forcons = ();
  @forcons = ();
  for ($k=0; $k!=($#tmpf+1); $k++) {
    $big = -100; $pt = -1; $ldom = "";
    for ($m=0; $m!=($#tmpf+1); $m++) {
      next if ($tmpf[$m] eq "null");
      @ar1 = split/\t/, $tmpf[$m];
      if ($ar1[1] > $big) { $pt = $m; $big = $ar1[1]; $ldom = $ar1[0]; }
    }
    last if ($pt == -1 | $ldom eq "");
    push @sorted_forcons, "$tmpf[$pt]";
    $tmpf[$pt] = "null";
  }
  $fct = 0;
  for ($k=0; $k!=($#sorted_forcons+1); $k++) {
    @ar1 = split/\t/, $sorted_forcons[$k];
    next if ($ar1[0] eq "");
    $g1 = `grep $ar1[0] $path_aln/seqct_in_aln`; chomp $g1;
    @ar2 = split/\t/, $g1;
    next if ($ar2[1] < 5);
    last if ($fct >= 20 && $ar1[1] < 4);
    push @forcons, "$sorted_forcons[$k]";
    $fct ++;
  }
  @forconsinfo = (); @tmpf = (); @sorted_forcons = ();
#calculate CSV scores for selected query/library pairs
  @hit7 = ();
  if ($forcons[0] ne "") {
    if (-d "new_domain_csv" == 0) { `mkdir new_domain_csv`; }
    system "$path_bin/al2co -i new_domain_aln/$qch.br.aln -o new_domain_csv/$qch.win$CSV_WINDOW_SIZE.csv -w $CSV_WINDOW_SIZE";
    $maxload = 5; $runct = -1; $loadct = 0; @pidlist = (); $finpid = "null";
    for ($k=0; $k<$maxload; $k++) { $pidlist[$k] = 0; }
    while ($#forcons > $runct) {
      if ($pid = fork) {
        for ($k=0; $k<$maxload; $k++) {
          if ($pidlist[$k] == 0) { $pidlist[$k] = $pid; }
          sleep 1; $loadct ++; $runct ++; last;
        }
      }
      elsif ($pid == 0) {
        $dom1 = $qch;
        @arf = split/\t/, $forcons[$runct];
        $dom2 = $arf[0];
        $daliz = $arf[1];
        $d2aln = `grep $dom2 $path_aln/rps_scop_aln_list`;
        chomp $d2aln;
        if ($d2aln eq "") { $d2aln = "nullfile"; }
        SUB_mk_dpamod ( $dom1, $dom2, $CSV_WINDOW_SIZE );
        if (-z "dali_output/$dom1.$dom2.dpamod" == 1) { `rm $dom1.$dom2.dpamod`; }
        $catmod = `cat dali_output/$dom1.$dom2.dpamod`; chomp $catmod;
        if ($catmod eq "") { $csvoutput = "CSV ERROR"; }
        else {
          if (-e "new_domain_aln/$dom1.br.aln" == 1 && -e "$path_aln/$d2aln" == 1) {
            $csvoutput = SUB_calc_csv_score ( "$dom1", "$dom2", "$CSV_WINDOW_SIZE", "$CSV_PCT" );
          }
          else { $csvoutput = "CSV ERROR"; }
        }
        if ($csvoutput =~ /^error/ | $csvoutput eq "CSV ERROR") {
          $csvscore = "error";
          $compscore = "error";
          $posnumeach = "X";
          $posnumboth = "X";
          $posnumtot = "X";
          $posdiffpercent = "X";
        }
        else {
          @csvar1 = split/\t/, $csvoutput;
          $csvscore = sprintf("%.6f", $csvar1[0]);
          if ($csvar1[1] eq "error") { $compscore = $csvar1[1]; }
          else { $compscore = sprintf("%1.6e", $csvar1[1]); }
          $posdiffpercent = $csvar1[2];
          $posnumeach = $csvar1[3];
          $posnumboth = $csvar1[4];
          $posnumtot = $csvar1[5];
        }
        open(OUT, ">$dom1.$dom2.fromcons");
        print OUT "$dom1\t$dom2\t$daliz\t$csvscore\t$compscore\t$posdiffpercent\t($posnumeach, $posnumboth, $posnumtot)\n";
        close(OUT);
        exit;
      }
      if ($loadct == $maxload) {
        $finpid = wait;
        for ($k=0; $k<$maxload; $k++) { if ($pidlist[$k] == $finpid) { $pidlist[$k] = 0; $loadct --; last; } }
        $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";
      }
    }
    while ($finpid != -1) { $finpid = wait; }
    $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";
#append all CSV score results to ~.dali1 and find any CSV hits
    $ls1 = `ls -1 $qch*fromcons`; @ar1 = split/\n/, $ls1;
    if ($#ar1 != -1) {
      open(OUT, ">>$qch.dali1");
      print OUT "!^(NEW_FILE): $qch.csvscores.win$CSV_WINDOW_SIZE.pct$CSV_PCT\n";
      for ($s=0; $s!=($#ar1+1); $s++) {
        open(IN, "$ar1[$s]"); @in = <IN>; chomp $in[0];
        print OUT "$in[0]\n";
        @ar2 = split/\t/, $in[0];
        $scopdom = $ar2[1];
        $dz = $ar2[2];
        $csvmatrix = $ar2[3];
        $csvcompass = $ar2[4];
        $keep = 0;
        next if ($dz eq "error");
        if ($csvmatrix ne "inf" && $csvmatrix ne "error" && (($csvmatrix >= 0.1 && $dz >= 5) | ($csvmatrix >= 0.25 && $dz >= 2))) { $keep = 1; }
        if ($csvcompass ne "inf" && $csvcompass ne "error" && (($csvcompass >= 0.4 && $dz >= 5) | ($csvcompass >= 0.5 && $dz >= 2))) { $keep = 1; }
        next if ($keep == 0 | $scopdom eq "");
        $cranges = SUB_get_ranges_dpa ( "dali_output/$qch.$scopdom.dpa", "0", "1" );
        @car3 = split/\t/, $cranges;
        $qrangeres = SUB_convert_range_pos_to_res ( "new_domain_str/$qch.ca", "$car3[0]" );
        $wholeqrange = SUB_get_wholerange ( "new_domain_str/$qch.ca" );
        $srangeres = SUB_convert_range_pos_to_res ( "$path_str/$scopdom.ca", "$car3[1]" );
        $wholesrange = SUB_get_wholerange ( "$path_str/$scopdom.ca" );
        $grepline1 = `grep $scopdom $lib_scop_dircla`;
        @grepar1 = split/\t/, $grepline1;
        $superfamid = $grepar1[3];
        push @hit7, "$qch\t\t$qrangeres\t$wholeqrange\t$scopdom\t$superfamid\t\t$srangeres\t$wholesrange\t$csvcompass\t\t$dz\t$csvmatrix\t7\n";
      }
      close(OUT);
    }
    if ($hit7[0] ne "") { $step7ct ++; push @step7, [@hit7]; }
  }
  @forcons = ();
  open(DAOUT, ">>$qch.dali1");
  if ($hit6d[0] ne "") {
    print DAOUT "!^(NEW_FILE): $qch.out6d\n";
    foreach $line (@hit6d) { print DAOUT "$line"; }
  }
  if ($hit9[0] ne "") {
    print DAOUT "!^(NEW_FILE): $qch.out9\n";
    foreach $line (@hit9) { print DAOUT "$line"; }
  }
  if ($hit7[0] ne "") {
    print DAOUT "!^(NEW_FILE): $qch.out7\n";
    foreach $line (@hit7) { print DAOUT "$line"; }
  }
  $ls1 = `ls -1 dali_output/*.dpa`;
  @lsar = split/\n/, $ls1;
  foreach $dpafile (@lsar) {
    chomp $dpafile;
    @lsar2 = split/dali\_output\//, $dpafile;
    open(IN, "$dpafile"); @in = <IN>;
    print DAOUT "!^(NEW_FILE): $lsar2[1]\n"; foreach $line (@in) { print DAOUT "$line"; } close(IN);
  }
  $ls1 = `ls -1 dali_output/*.dpamod`;
  @lsar = split/\n/, $ls1;
  foreach $dpamodfile (@lsar) {
    chomp $dpamodfile;
    @lsar2 = split/dali\_output\//, $dpamodfile;
    open(IN, "$dpamodfile"); @in = <IN>;
    print DAOUT "!^(NEW_FILE): $lsar2[1]\n"; foreach $line (@in) { print DAOUT "$line"; } close(IN);
  }
  close(DAOUT);
  @hit6d = (); $ls1 = ""; @lsar = ();
  `rm -f *.fromcons`;
  `rm -f dali_output/*.dpa dali_output/*.dpamod`;
  `mkdir tmpdalidir`;
  `mv dali_output/1000*.dat tmpdalidir`;
  `mv dali_output/1000*.dssp tmpdalidir`;
  `mv dali_output/*.dccp tmpdalidir`;
  chdir "tmpdalidir";
  `tar cvf $qch.dalifiles1.tar *`;
  `mv $qch.dalifiles1.tar ..`;
  chdir "..";
  `rm -r -f dali_output`;
  `rm -r -f tmpdalidir`;
  `bzip2 $qch.dalifiles1.tar`;
  $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";
}

#if query rep was dumped by DaliLite, run CSV using MAMMOTH alignments for each query-library pair in @pairsfordct
for ($f=0; $f!=($pairsfdct+1); $f++) {
  @hit7 = ();
  @ar1 = split/\t/, $pairsfordali[$f][0];
  $qch = $ar1[0];
  $dump = 0;
  for ($j=0; $j!=($#querydalidump+1); $j++) { if ($querydalidump[$j] eq $qch) { $dump = 1; } }
  next unless ($dump == 1);
  if (-d "new_domain_csv" == 0) { `mkdir new_domain_csv`; }
  if (-e "new_domain_csv/$qch.win$CSV_WINDOW_SIZE.csv" == 0) {
    system "$path_bin/al2co -i new_domain_aln/$qch.br.aln -o new_domain_csv/$qch.win$CSV_WINDOW_SIZE.csv -w $CSV_WINDOW_SIZE";
  }
  $maxload = 5; $runct = -1; $loadct = 0; @pidlist = (); $finpid = "null"; @h7maminfo = ();
  for ($k=0; $k<$maxload; $k++) { $pidlist[$k] = 0; }
  while ($#{$pairsfordali[$f]} > $runct) {
    if ($pid = fork) {
      for ($k=0; $k<$maxload; $k++) {
        if ($pidlist[$k] == 0) { $pidlist[$k] = $pid; }
        sleep 1; $loadct ++; $runct ++;
        chomp $pairsfordali[$f][$runct];
        @ar0 = split/\t/, $pairsfordali[$f][$runct];
        $dom1 = $ar0[0]; $dom2 = $ar0[1];
        $d2aln = `grep $dom2 $path_aln/rps_scop_aln_list`; chomp $d2aln;
        $zgrep = `grep "$dom1.M.$dom2" $dom1.mammoth2`;
        @zga = split/\n/, $zgrep;
        $mz = "error";
        for ($z=0; $z!=($#zga+1); $z++) {
          next unless ($zga[$z] =~ /^$dom1\.M\.$dom2/);
          @zgb = split/\t/, $zga[$z];
          $mz = $zgb[1];
          last;
        }
        SUB_mk_mpaconf ( $dom1, $dom2, $CSV_WINDOW_SIZE );
        if (-z "$dom1.$dom2.mpaconf" == 1) { `rm $dom1.$dom2.mpaconf`; }
        push @h7maminfo, "$dom1\t$dom2\t$d2aln\t$mz";
        last;
      }
    }
    elsif ($pid == 0) {
      @ar0 = split/\t/, $h7maminfo[$runct];
      $dom1 = $ar0[0]; $dom2 = $ar0[1]; $d2aln = $ar0[2]; $mz = $ar0[3];
      if (-e "new_domain_aln/$dom1.br.aln" == 1 && -e "$path_aln/$d2aln" == 1 && -e "$dom1.$dom2.mpaconf" == 1) {
         $csvoutput = SUB_calc_csv_score_mam ( "$dom1", "$dom2", "$CSV_WINDOW_SIZE", "$CSV_PCT" );
      }
      else { $csvoutput = "CSV ERROR"; }
      if ($csvoutput =~ /^error/ | $csvoutput eq "CSV ERROR") {
        $csvscore = "error";
        $compscore = "error";
        $posnumeach = "X";
        $posnumboth = "X";
        $posnumtot = "X";
        $posdiffpercent = "X";
      }
      else {
        @csvar1 = split/\t/, $csvoutput;
        $csvscore = sprintf("%.6f", $csvar1[0]);
        if ($csvar1[1] eq "error") { $compscore = $csvar1[1]; }
        else { $compscore = sprintf("%1.6e", $csvar1[1]); }
        $posdiffpercent = $csvar1[2];
        $posnumeach = $csvar1[3];
        $posnumboth = $csvar1[4];
        $posnumtot = $csvar1[5];
      }
      open(OUT, ">$dom1.$dom2.fromcons");
      print OUT "$dom1\t$dom2\t$mz\t$csvscore\t$compscore\t$posdiffpercent\t($posnumeach, $posnumboth, $posnumtot)\n";
      close(OUT);
      exit;
    }
    if ($loadct == $maxload) {
      $finpid = wait;
      for ($k=0; $k<$maxload; $k++) { if ($pidlist[$k] == $finpid) { $pidlist[$k] = 0; $loadct --; last; } }
      $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";
    }
  }
  while ($finpid != -1) { $finpid = wait; }
  $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";
#append all CSV score results to ~.dali1 and find any CSV hits
  $ls1 = `ls -1 $qch*fromcons`; @ar1 = split/\n/, $ls1;
  if ($#ar1 != -1) {
    open(OUT, ">>$qch.dali1");
    print OUT "!^(NEW_FILE): $qch.mammothcsvscores.win$CSV_WINDOW_SIZE.pct$CSV_PCT\n";
    for ($s=0; $s!=($#ar1+1); $s++) {
      open(IN, "$ar1[$s]"); @in = <IN>; chomp $in[0];
      print OUT "$in[0]\n";
      @ar2 = split/\t/, $in[0];
      $scopdom = $ar2[1];
      $mz = $ar2[2];
      $csvmatrix = $ar2[3];
      $csvcompass = $ar2[4];
      $keep = 0;
      next if ($mz eq "error");
      if ($csvmatrix ne "inf" && $csvmatrix ne "error" && $csvmatrix >= 0.3 && $mz >= 4) { $keep = 1; }
      if ($csvcompass ne "inf" && $csvcompass ne "error" && $csvcompass >= 0.4 && $mz >= 4) { $keep = 1; }
      next if ($keep == 0 | $scopdom eq "");
      $cranges = SUB_get_ranges_mpa ( "$qch.$scopdom.mpaconf" );
      @car3 = split/\t/, $cranges;
      $qrangeres = SUB_convert_range_pos_to_res ( "new_domain_str/$qch.ca", "$car3[0]" );
      $wholeqrange = SUB_get_wholerange ( "new_domain_str/$qch.ca" );
      $srangeres = SUB_convert_range_pos_to_res ( "$path_str/$scopdom.ca", "$car3[1]" );
      $wholesrange = SUB_get_wholerange ( "$path_str/$scopdom.ca" );
      $grepline1 = `grep $scopdom $lib_scop_dircla`;
      @grepar1 = split/\t/, $grepline1;
      $superfamid = $grepar1[3];
      push @hit7, "$qch\t$qrangeres\t\t$wholeqrange\t$scopdom\t$superfamid\t$srangeres\t\t$wholesrange\t$csvcompass\t$mz\t\t$csvmatrix\t7\n";
    }
    close(OUT);
  }
  if ($hit7[0] ne "") {
    $step7ct ++; push @step7, [@hit7];
    open(DAOUT, ">>$qch.dali1");
    print DAOUT "!^(NEW_FILE): $qch.out7\n";
    foreach $line (@hit7) { print DAOUT "$line"; }
    close(DAOUT);
  }
  @ls = `ls -1 *.mpaconf`;
  if ($#ls != -1) {
    open(DAOUT, ">>$qch.dali1");
    foreach $lfile (@ls) {
      chomp $lfile;
      print DAOUT "!^(NEW_FILE): $lfile\n";
      @lscat = `cat $lfile`;
      foreach $lline (@lscat) { print DAOUT "$lline"; }
      close(DAOUT);
    }
  }
  `rm *.fromcons *.mpaconf *.mpaconfpre`;
}
@hit7 = (); @ar1 = (); $ls1 = ""; @pairsfordali = (); @querydalidump = ();
$t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";

#run dali/blast comparison for all pairs in ~.daliconfirm (list of pairs located in pdb_$date/$rep.blast2)
foreach $rep (@unmap_rep) {
  @forconfirminf = (); @fromconfirm = (); @hit8 = ();
  open(REPLIST, "$replist");
  @all_reps = <REPLIST>;
  open(IN, "pdb_$date/$rep.blast2");
  @in = <IN>;
  $start = 0;
  foreach $line (@in) {
    if ($start == 1) {
      last if ($line =~ /\!\^\(NEW\_FILE/);
      chomp $line;
      @car = split/\t/, $line;
      next if ($car[0] eq "");
      if ($car[2] =~ /simple/) {
        $pdbid = substr($car[0], 0, 4);
        $chain = substr($car[0], -1);
        foreach $rline (@all_reps) {
          next unless ($rline =~ /^d$pdbid$chain/i);
          $ldom = substr($rline, 0, 7);
          push @forconfirminf, "$ldom\t$car[0]\t$car[1]\t$car[2]";
        }
      }
      else {
        $g1 = `grep $car[0] $replist`; chomp $g1;
        next if ($g1 eq "");
        push @forconfirminf, "$car[0]\t$car[0]\t$car[1]\t$car[2]";
      }
    }
    else { if ($line =~ /\!\^\(NEW\_FILE\)\: $rep.daliconfirm/) { $start = 1; } }
  }
  close(IN); @in = ();
  close(REPLIST); @all_reps = ();
  next if ($#forconfirminf == -1);
  $qch = $rep;
  `mkdir dali_output`;
  open(T1, ">querydomains.list"); print T1 "$qch.pdb\n"; close(T1);
  open(T2, ">querydom2dali"); print T2 "$qch.pdb 9000\n"; close(T1);
  SUB_renum_2 ( "9000", "new_domain_str" );
  SUB_get5N ();
  `cat querydom2dali_5N $path_str/scoplib2dali_5N > all2dali_conf_5N`;
#make list of pairs to run (x2)
  open(KEY, "all2dali_conf_5N");
  @all_key = <KEY>;
  open(TORUN, ">pairstorun");
  LP_M3: foreach $cpair (@forconfirminf) {
    @ar0 = split/\t/, $cpair;
    $query = $qch; $lib = $ar0[0];
    $id1 = "null"; $id2 = "null";
    foreach $line1 (@all_key) {
      next unless ($line1 =~ /^$query/);
      chomp $line1;
      @ar1 = split/ /, $line1;
      $id1 = $ar1[1];
      $id1four = substr($id1, 0, 4);
      next LP_M3 if (-e "$id1.dat" == 0);
      `cp $id1four* dali_output`;
      last;
    }
    foreach $line2 (@all_key) {
      next unless ($line2 =~ /^$lib/);
      chomp $line2;
      @ar2 = split/ /, $line2;
      $id2 = $ar2[1];
      $id2four = substr($id2, 0, 4);
      next LP_M3 if (-e "$path_dalidat/$id2.dat" == 0);
      `cp $path_dalidat/$id2four* dali_output`;
      last;
    }
    next if ($id1 eq "null" | $id2 eq "null");
    print TORUN "./DaliLite -align $id1 $id2 ; mv $id1.dccp $query.$lib.dccp\n";
    print TORUN "./DaliLite -align $id2 $id1 ; mv $id2.dccp $lib.$query.dccp\n";
  }
  close(KEY); @all_key = ();
  close(TORUN);
  system "mv all2dali_conf_5N dali_output";
  $ptrct = `wc -l pairstorun`;
  chomp $ptrct;
  if ($ptrct != 0) {
#make non-redundant pairstorun
    open(PTR1, "pairstorun");
    @all_ptr1 = <PTR1>;
    open(PTR2, ">ptr2");
    $ptrct = -1;
    @ptrar[0] = "null";
    LP_M3b: foreach $ptrline (@all_ptr1) {
      chomp $ptrline;
      for ($z=0; $z!=($ptrct+1); $z++) { next LP_M3b if ($ptrar[$z] eq $ptrline); }
      $ptrct += 1;
      $ptrar[$ptrct] = $ptrline;
    }
    for ($z=0; $z!=($ptrct+1); $z++) { print PTR2 "$ptrar[$z]\nrm dali.lock\n"; }
    close(PTR1); @all_ptr1 = ();
    close(PTR2);
    `mv ptr2 pairstorun`;
#$dalisets = 7;
    for ($k=1; $k!=($dalisets+1); $k++) { `mkdir tempdir.$k`; }
    open(RUNLIST, "pairstorun");
    @all_run = <RUNLIST>;
    $N = (int (($#all_run+1)/$dalisets)) + 1;
    for ($k=1; $k!=($dalisets+1); $k++) {
      $ln = $k * $N;
      if ($ln > ($#all_run+1)) {
        $prevln = ($k-1)*$N;
        $left = $#all_run + 1 - $prevln;
        last if ($left == 0);
        system "tail -$left pairstorun > pairs.$k";
        `mv pairs.$k tempdir.$k`;
        last;
      }
      else {
        system "head -$ln pairstorun | tail -$N > pairs.$k";
        `mv pairs.$k tempdir.$k`;
      }
    }
    close(RUNLIST); @all_run = ();
#run dali for pairs
    for ($k=1; $k!=($dalisets+1); $k++) {
      system "cp $path_bin/DaliLite tempdir.$k";
      chdir "tempdir.$k";
      if (-e "pairs.$k" == 1) {
        open(READPRS, "pairs.$k");
        @allrdpr = <READPRS>;
        foreach $rpline (@allrdpr) {
          next if ($rpline =~ /rm dali.lock/);
          @rpar = split/ /, $rpline;
          $str1 = substr($rpar[2], 0, 4);
          $str2 = substr($rpar[3], 0, 4);
          `cp ../dali_output/$str1* .`;
          `cp ../dali_output/$str2* .`;
        }
        close(READPRS); @allrdpr = ();
        open(ADDRM, ">>pairs.$k");
        print ADDRM "rm dali.lock\n";
        close(ADDRM);
        system "chmod +x pairs.$k";
        system "./pairs.$k &";
      }
      chdir "..";
    }
#wait for dali to finish, then move to new directory
    sleep 10;
    $wt = 0;
    while ($wt == 0) {
      $ovct = `ps | grep "DaliLite" | wc -l`;
      chomp $ovct;
      if ($ovct == 0) { $wt = 1; }
      else { SUB_chk_for_stalled_dali (); }
    }
    for ($k=1; $k!=($dalisets+1); $k++) {
      chdir "tempdir.$k";
      `mv *.dccp ../dali_output`;
      `rm -f *`;
      chdir "..";
    }
    `rm -r -f tempdir.*`;
    system "cp $path_bin/DaliLite dali_output";
    chdir "dali_output";
    $ls = `ls -1 *.dccp`; @lslist = split/\n/, $ls;
    @s6zscores = ();
    foreach $dccpfile (@lslist) {
      if (-z "$dccpfile" == 1) { `rm $dccpfile`; next; }
      $zscore = SUB_mk_dpa_from_dccp ( "$dccpfile", "$qch", "all2dali_conf_5N" );
      push @s6zscores, "$dccpfile\t$zscore";
    }
    chdir "..";
  }
  $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";

#output dali result information to ~.dali2 ; make this non-redundant
  open(DAOUT, ">$qch.dali2");
  print DAOUT "!^(NEW_FILE): all_dali_conf_zbc_$qch\n";
  @alreadyused = ();
  LP_M5: foreach $cpair (@forconfirminf) {
    @ar0 = split/\t/, $cpair;
    $qdom = $qch; $ldom = $ar0[0];
    for ($i=0; $i!=($#alreadyused+1); $i++) { next LP_M5 if ($alreadyused[$i] eq $ldom); }
    foreach $zp (@s6zscores) {
      if ($zp =~ /$qdom\.$ldom\./) { @za = split/\s+/, $zp; $z1 = $za[$#za]; }
      if ($zp =~ /$ldom\.$qdom\./) { @za = split/\s+/, $zp; $z2 = $za[$#za]; }
    }
    if (-e "dali_output/$qdom.$ldom.dpa" == 0) { $bl1 = "error"; $cov1 = "error"; $z1 = "error"; }
    else {
      $bl1 = `$path_bin/blsm_scores -i dali_output/$qdom.$ldom.dpa`; chomp $bl1;
      $cov1 = SUB_cover_dpa ( "dali_output/$qdom.$ldom.dpa" );
    }
    if (-e "dali_output/$ldom.$qdom.dpa" == 0) { $bl2 = "error"; $cov2 = "error"; $z2 = "error"; }
    else {
      $bl2 = `$path_bin/blsm_scores -i dali_output/$ldom.$qdom.dpa`; chomp $bl2;
      $cov2 = SUB_cover_dpa ( "dali_output/$ldom.$qdom.dpa" );
    }
    $ranges1 = "\t";
    $ranges2 = "\t";
    if ($z1 ne "error" && $bl1 ne "error" && $cov1 ne "error" && $bl1 ne "inf") {
      $ranges1 = SUB_get_ranges_dpa ( "dali_output/$qdom.$ldom.dpa", "0", "1" );
    }
    if ($z2 ne "error" && $bl2 ne "error" && $cov2 ne "error" && $bl2 ne "inf") {
      $ranges2 = SUB_get_ranges_dpa ( "dali_output/$ldom.$qdom.dpa", "1", "0" );
    }
    print DAOUT "$qdom.D.$ldom\t$z1\t$z2\t$bl1\t$bl2\t$cov1\t$cov2\t$ranges1\t$ranges2\n";
    push @alreadyused, "$ldom";
  }
  close(DAOUT);
#get pairwise alignments from BLAST/RPS-BLAST/PSI-BLAST hit and DaliLite hit (or MAMMOTH if DaliLite dumped query domain)
  $lsct = `ls -1 9000*dat | wc -l`; chomp $lsct;
  if ($lsct == 0) { $type = "M"; }
  else { $type = "D"; }
  open(IN, "pdb_$date/$rep.blast");
  @in = <IN>;
  @alreadyused = ();
  LP_M6: foreach $cpair (@forconfirminf) {  
    $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";
    @ar0 = split/\t/, $cpair;
    $query = $qch; $hitid = $ar0[0]; $hit_x = $ar0[1]; $dbevalue = $ar0[2]; $seqtype = $ar0[3];
    next if ($hitid eq "");
    if ($type eq "D") {
      $g1 = `grep $hitid $query.dali2`; chomp $g1;
      @ar0 = split/\t/, $g1;
      next if ($g1 eq "" | ($ar0[1] eq "error" && $ar0[2] eq "error"));
      $whichz = 1;
      $zscore = $ar0[1];
      $qrangepos = $ar0[7], $srangepos = $ar0[8];
      if ($ar0[1] eq "error" | $ar0[2] > $ar0[1]) {
        $zscore = $ar0[2]; $whichz = 2;
        $qrangepos = $ar0[9], $srangepos = $ar0[10];
      }
    }
    else {
      $whichz = 1;
      $g1 = `grep $hitid $query.mammoth2`;
      @gar1 = split/\n/, $g1;
      $zscore = "";
      foreach $gline (@gar1) {
        next if ($gline =~ /NEW\_FILE/ | $gline =~ /^$hitid/);
        @gar2 = split/\t/, $gline;
        $zscore = $gar2[1];
        last;
      }
      next if ($zscore eq "");
    }
    $track = 0;
    $qpsS = -1; $hpsS = -1;
    $startpos_q = "null"; $startpos_h = "null";
    $start = 0;
    foreach $sq1 (@in) {
      if ($start == 1) {
        last if ($sq1 =~ /\!\^\(NEW\_FILE/);
        if ($track == 0) { if ($sq1 =~ /^\>$hit/i) { $track = 1; next; } }
        if ($track == 1) {
          if ($sq1 =~ / Expect \= /) {
            chomp $sq1;
            @ar1a = split/ Expect \= /, $sq1;
            if ($dbevalue == $ar1a[1]) { $track = 2; next; }
          }
        }
        if ($track == 2) {
          if ($sq1 =~ /^Query\:/) {
            @ar1b = split/ /, $sq1;
            if ($startpos_q eq "null") { $startpos_q = $ar1b[1]; }
            for ($i=0; $i!=101; $i++) {
              last if (substr($ar1b[$#ar1b-1], $i, 1) eq "");
              $qpsS += 1;
              $seq_q[$qpsS] = substr($ar1b[$#ar1b-1], $i, 1);
            }
          }
          if ($sq1 =~ /^Sbjct\:/) {
            @ar1c = split/ /, $sq1;
            if ($startpos_h eq "null") { $startpos_h = $ar1c[1]; }
            for ($i=0; $i!=101; $i++) {
              last if (substr($ar1c[$#ar1c-1], $i, 1) eq "");
              $hpsS += 1;
              $seq_h[$hpsS] = substr($ar1c[$#ar1c-1], $i, 1);
            }
          } 
          last if (($sq1 =~ /^\>/) | ($qpsS != -1 && $sq1 =~ /^ Score \= /));
        }
      }
      else { if ($sq1 =~ /\!\^\(NEW\_FILE\)\: $query$seqtype/) { $start = 1; } }
    }
    next if ($qpsS == -1);
    if ($type eq "D") {
      if ($whichz == 1) { open(D1, "dali_output/$query.$hitid.dpa"); }
      if ($whichz == 2) { open(D1, "dali_output/$hitid.$query.dpa"); }
      @all_d1 = <D1>;
      $qpsD = -1; $hpsD = -1;
      chomp $all_d1[0]; chomp $all_d1[1];
      if ($whichz == 1) {
        @ar2a = split/\t/, $all_d1[0];
        @ar2b = split/\t/, $all_d1[1];
      }
      if ($whichz == 2) {
        @ar2b = split/\t/, $all_d1[0];
        @ar2a = split/\t/, $all_d1[1];
      }
      close(D1);
      for ($i=0; $i!=9999999; $i++) {
        last if (substr($ar2a[1], $i, 1) eq "");
        $qpsD += 1; 
        $str_q[$qpsD] = substr($ar2a[1], $i, 1);
      }
      for ($i=0; $i!=9999999; $i++) {
        last if (substr($ar2b[1], $i, 1) eq "");
        $hpsD += 1;
        $str_h[$hpsD] = substr($ar2b[1], $i, 1);
      }
    }
    else {
      open(D1, "$query.mammoth2");
      @all_d1 = <D1>;
      for ($m=0; $m!=($#all_d1+1); $m++) {
        next unless ($all_d1[$m] =~ /^\!\^\(NEW\_FILE\)\: $query.M.$hitid.mpa/);
        $keep_d1_0 = $all_d1[$m+1];
        $keep_d1_1 = $all_d1[$m+2];
        last;
      }
      close(D1); @all_d1 = ();
      $qpsD = -1; $hpsD = -1;
      chomp $keep_d1_0; chomp $keep_d1_1;
      open(MPAOUT, ">$query.$hitid.mpa");
      print MPAOUT "$keep_d1_0\n$keep_d1_1\n";
      close(MPAOUT);
      @ar2a = split/ /, $keep_d1_0;
      @ar2b = split/ /, $keep_d1_1;
      for ($i=0; $i!=9999999; $i++) {
        last if (substr($ar2a[$#ar2a], $i, 1) eq "");
        $qpsD += 1;
        $str_q[$qpsD] = substr($ar2a[$#ar2a], $i, 1);
        if ($str_q[$qpsD] eq ".") { $str_q[$qpsD] = "-"; }
      }
      for ($i=0; $i!=9999999; $i++) {
        last if (substr($ar2b[$#ar2b], $i, 1) eq "");
        $hpsD += 1;
        $str_h[$hpsD] = substr($ar2b[$#ar2b], $i, 1);
        if ($str_h[$qpsD] eq ".") { $str_h[$qpsD] = "-"; }
      }
    }
    open(T1, ">qseqD.fa");
    print T1 ">$query\n"; for ($i=0; $i!=($qpsD+1); $i++) { print T1 "$str_q[$i]"; }
    print T1 "\n"; close(T1);
    open(T2, ">qseqS.fa");
    print T2 ">$query\n"; for ($i=0; $i!=($qpsS+1); $i++) { print T2 "$seq_q[$i]"; }
    print T2 "\n"; close(T2);
    open(T3, ">hseqD.fa");
    print T3 ">$hitid\n"; for ($i=0; $i!=($hpsD+1); $i++) { print T3 "$str_h[$i]"; }
    print T3 "\n"; close(T3);
    open(T4, ">hseqS.fa");
    print T4 ">$hitid\n"; for ($i=0; $i!=($hpsS+1); $i++) { print T4 "$seq_h[$i]"; }
    print T4 "\n"; close(T4);
    open(FB1, ">forbc1");
    print FB1 ">$query1\n"; for ($i=0; $i!=($qpsD+1); $i++) { print FB1 "$str_q[$i]"; }
    print FB1 "\n>$query2\n"; for ($i=0; $i!=($qpsS+1); $i++) { print FB1 "$seq_q[$i]"; }
    print FB1 "\n";
    close (FB1);
    open(FB2, ">forbc2");
    print FB2 ">$hitid1\n"; for ($i=0; $i!=($hpsD+1); $i++) { print FB2 "$str_h[$i]"; }
    print FB2 "\n>$hitid2\n"; for ($i=0; $i!=($hpsS+1); $i++) { print FB2 "$seq_h[$i]"; }
    print FB2 "\n";
    close(FB2);
#are these alignments to the same region?
    $ct1 = `blastclust -i forbc1 -S 90 -L 0.5 -b F | wc -l`; chomp $ct1;
    $ct2 = `blastclust -i forbc2 -S 90 -L 0.5 -b F | wc -l`; chomp $ct2;
    if ($ct1 > 2 | $ct2 > 2) { $not_the_same = 1; }
    else { $not_the_same = 0; }
    `rm forbc1 forbc2`;
    if ($not_the_same == 0) {
#deal with gaps in the two pairwise alignments relative to each other...
      $align0query = `$path_bin/align0 qseqD.fa qseqS.fa`;
      $align0hit = `$path_bin/align0 hseqD.fa hseqS.fa`;
      @alnq = split/\n/, $align0query;
      $s1 = ""; $s2 = "";
      for ($i=0; $i!=($#alnq+1); $i++) {
        if ($alnq[$i] =~ /qseqD\./) {
          @alnq_2a = split/ /, $alnq[$i];
          $s1 .= $alnq_2a[1];
          @alnq_2b = split/ /, $alnq[$i+2];
          $s2 .= $alnq_2b[1];
        }
      }
      $q1fct = 0; $q2fct = 0;
      $b1 = 0; $act1 = 0;
      $b2 = 0; $act2 = 0;
      $mctq1 = -1; $mctq2 = -1;
      for ($i=0; $i!=999999; $i++) {
        $aa1 = substr($s1, $i, 1);
        $aa2 = substr($s2, $i, 1);
        last if ($aa1 eq "");
        if ($aa1 ne "-") { $b1 = 1; $act1 += 1; }
        if ($aa2 ne "-") { $b2 = 1; $act2 += 1; }
        if ($b1 == 0 && $b2 == 1) { $q2fct += 1; }
        if ($b2 == 0 && $b1 == 1) { $q1fct += 1; }
        if ($b1 == 1 && $b2 == 1) {
          if ($aa1 eq "-") { $mctq2 += 1; $mq2[$mctq2] = $act2; }
          if ($aa2 eq "-") { $mctq1 += 1; $mq1[$mctq1] = $act1; }
        }
      }
      @alnh = split/\n/, $align0hit;
      $s3 = ""; $s4 = "";
      for ($i=0; $i!=($#alnh+1); $i++) {
        if ($alnh[$i] =~ /hseqD\./) {
          @alnh_2a = split/ /, $alnh[$i];
          $s3 .= $alnh_2a[1];
          @alnh_2b = split/ /, $alnh[$i+2];
          $s4 .= $alnh_2b[1];
        }
      }
      $h1fct = 0;
      $h2fct = 0;
      $b3 = 0; $act3 = 0;
      $b4 = 0; $act4 = 0;
      $mcth1 = -1; $mcth2 = -1;
      for ($i=0; $i!=999999; $i++) {
        $aa1 = substr($s3, $i, 1);
        $aa2 = substr($s4, $i, 1);
        last if ($aa1 eq "");
        if ($aa1 ne "-") { $b3 = 1; $act3 += 1; }
        if ($aa2 ne "-") { $b4 = 1; $act4 += 1; }
        if ($b3 == 0 && $b4 == 1) { $h2fct += 1; }
        if ($b4 == 0 && $b3 == 1) { $h1fct += 1; }
        if ($b3 == 1 && $b4 == 1) {
          if ($aa1 eq "-") { $mcth2 += 1; $mh2[$mcth2] = $act4; }
          if ($aa2 eq "-") { $mcth1 += 1; $mh1[$mcth1] = $act3; }
        }
      }
      $startpos_qS = $q1fct + 1;
      $startpos_hS = $h1fct + 1;
      $startpos_qD = $q2fct + 1;
      $startpos_hD = $h2fct + 1;
      $qduse = ""; $qsuse = "";
      $hduse = ""; $hsuse = "";
      $cct1 = 0; $p1 = -1;
      LP_M4a: for ($i=0; $i!=($qpsD+1); $i++) {
        if ($i != 0) { $p1 = $cct1; }
        if ($str_q[$i] ne "-") { $cct1 += 1; }
        if ($p1 ne $cct1) { for ($j=0; $j!=($mctq1+1); $j++) { if ($cct1 eq $mq1[$j]) { $qduse .= "-"; next LP_M4a; } } }
       $qduse .= $str_q[$i];
      }
      $cct1 = 0; $p1 = -1;
      LP_M4b: for ($i=0; $i!=($hpsD+1); $i++) {
        if ($i != 0) { $p1 = $cct1; }
        if ($str_h[$i] ne "-") { $cct1 += 1; }
        if ($p1 ne $cct1) { for ($j=0; $j!=($mcth1+1); $j++) { if ($cct1 eq $mh1[$j]) { $hduse .= "-"; next LP_M4b; } } }
        $hduse .= $str_h[$i];
      }
      $cct1 = 0; $p1 = -1;
      LP_M4c: for ($i=0; $i!=($qpsS+1); $i++) {
        if ($i != 0) { $p1 = $cct1; }
        if ($seq_q[$i] ne "-") { $cct1 += 1; }
        if ($p1 ne $cct1) { for ($j=0; $j!=($mctq2+1); $j++) { if ($cct1 eq $mq2[$j]) { $qsuse .= "-"; next LP_M4c; } } }
        $qsuse .= $seq_q[$i];
      }
      $cct1 = 0; $p1 = -1;
      LP_M4d: for ($i=0; $i!=($hpsS+1); $i++) {
        if ($i != 0) { $p1 = $cct1; }
        if ($seq_h[$i] ne "-") { $cct1 += 1; }
        if ($p1 ne $cct1) { for ($j=0; $j!=($mcth2+1); $j++) { if ($cct1 eq $mh2[$j]) { $hsuse .= "-"; next LP_M4d; } } }
        $hsuse .= $seq_h[$i];
      }
      @str_q = (); @str_h = (); @seq_h = (); @seq_q = ();
      open(OUT1, ">$query.$hitid.struse");
      printf OUT1 "%-10s %5s $qduse\n", $query, $startpos_qD;
      printf OUT1 "%-10s %5s $hduse\n", $hitid, $startpos_hD;
      close(OUT1);
      open(OUT2, ">$query.$hitid.sequse");
      printf OUT2 "%-10s %5s $qsuse\n", $query, $startpos_qS;
      printf OUT2 "%-10s %5s $hsuse\n", $hitid, $startpos_hS;
      close(OUT2);
#find number of correctly aligned positions (using Dalite or MAMMOTH aln as template)
      $naccoutput = `$path_bin/compgivenseqs_naccs -i $query.$hitid.sequse -p $query.$hitid.struse`;
      chomp $naccoutput;
      @ar3a = split/\t/, $naccoutput;
      @ar3b1 = split/ \= /, $ar3a[0];
      @ar3b2 = split/ \= /, $ar3a[1];
      $nacc1 = $ar3b1[1];
      $nacc2 = $ar3b2[1];
      push @fromconfirm, "$query\t$hitid\t$zscore\t$dbevalue\t$nacc1\t$nacc2\t$seqtype\t$whichz";
      `rm *.sequse *.struse`;
    }
    else { $nacc1 = -1; push @fromconfirm, "$query\t$hitid\t$zscore\t$dbevalue\t-1\t-1\t$seqtype\t$whichz"; }
    `rm hseqS.fa hseqD.fa qseqS.fa qseqD.fa`;
#get hits from dali/blast comparison: cutoffs are e-value =< 100, z-score > 0, and nacc1 >= 15
    for ($i=0; $i!=($#alreadyused+1); $i++) { next LP_M6 if ($alreadyused[$i] eq "$hitid"); }
    if ($type eq "D") { 
      next unless ($zscore > 0 && 100 >= $dbevalue && $nacc1 >= 15);
      $qrangeres = SUB_convert_range_pos_to_res ( "new_domain_str/$rep.ca", "$qrangepos" );
      $srangeres = SUB_convert_range_pos_to_res ( "$path_str/$hitid.ca", "$srangepos" );
      $wholequeryrange = SUB_get_wholerange ( "new_domain_str/$rep.ca" );
      $wholescoprange = SUB_get_wholerange ( "$path_str/$hitid.ca" );
      $grepline1 = `grep $hitid $lib_scop_dircla`;
      @grepar1 = split/\t/, $grepline1;
      $superfamid = $grepar1[3];
      push @hit8, "$rep\t\t$qrangeres\t$wholequeryrange\t$hitid\t$superfamid\t\t$srangeres\t$wholescoprange\t$dbevalue\t$nacc1\t$zscore\t\t8\n";
      push @alreadyused, "$hitid";
    }
    else { 
#if used MAMMOTH instead of DaliLite, cutoffs are e-value =< 100, z-score > 2, and nacc1 >= 15
      next unless ($zscore > 2 && 100 >= $dbevalue && $nacc1 >= 15);
      ($qrangenum, $srangenum) = SUB_get_ranges_mpa ( "$rep.$hitid.mpa" );
      $qrangeres = SUB_convert_range_pos_to_res ( "new_domain_str/$rep.ca", $qrangenum );
      $srangeres = SUB_convert_range_pos_to_res ( "$path_str/$hitid.ca", $srangenum );
      $wholequeryrange = SUB_get_wholerange ( "new_domain_str/$rep.ca" );
      $wholescoprange = SUB_get_wholerange ( "$path_str/$hitid.ca" );
      $grepline1 = `grep $hitid $lib_scop_dircla`;
      @grepar1 = split/\t/, $grepline1;
      $superfamid = $grepar1[3];
      push @hit8, "$rep\t$qrangeres\t\t$wholequeryrange\t$hitid\t$superfamid\t$srangeres\t\t$wholescoprange\t$dbevalue\t$nacc1\t$zscore\t\t8\n";
      push @alreadyused, "$hitid";
    }
  }
  $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";
  `rm -f *.mpa`;
  close(IN); @in = ();
  if ($hit8[0] ne "") { $step8ct ++; push @step8, [@hit8]; }
  open(OUT, ">>$rep.dali2");
  print OUT "!^(NEW_FILE): $rep.forconfirm_inf\n";
  foreach $aline (@forconfirminf) { print OUT "$aline\n"; }
  if ($type eq "D") { print OUT "!^(NEW_FILE): all_dali_blast_$rep\n"; }
  else { print OUT "!^(NEW_FILE): all_mammoth_blast_$rep\n"; }
  foreach $aline (@fromconfirm) { print OUT "$aline\n"; }
  if ($hit8[0] ne "") {
    print OUT "!^(NEW_FILE): $rep.out8\n";
    foreach $aline (@hit8) { print OUT "$aline"; }
  }
  $ls1 = `ls -1 dali_output/*.dpa`;
  @lsar = split/\n/, $ls1;
  foreach $dpafile (@lsar) {
    chomp $dpafile;
    @lsar2 = split/dali\_output\//, $dpafile;
    open(IN, "$dpafile"); @in = <IN>;
    print OUT "!^(NEW_FILE): $lsar2[1]\n"; foreach $line (@in) { print OUT "$line"; } close(IN);
  }
  close(OUT);
  $ls1 = ""; @lsar = ();
  `rm pairstorun querydatlist querydom2dali querydom2dali_5N querydomains.list 9000*`;
  `rm -f dali_output/*.dpa`;
  `mkdir tmpdalidir`;
  `mv dali_output/9000*.dat tmpdalidir`;
  `mv dali_output/9000*.dssp tmpdalidir`;
  `mv dali_output/*.dccp tmpdalidir`;
  chdir "tmpdalidir";
  `tar cvf $rep.dalifiles2.tar *`;
  `mv $rep.dalifiles2.tar ..`;
  chdir "..";
  `rm -r -f dali_output`;
  `rm -r -f tmpdalidir`;
  `bzip2 $rep.dalifiles2.tar`;
}
@forconfirminf = (); @fromconfirm = (); @hit8 = (); @alreadyused = ();
$t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";

#get DaliLite ranges for all accepted hit via MAMMOTH (first check if DaliLite already run for this pair in either step6d or step8)
foreach $rep (@unmap_rep) {
  next if ($rep eq "");
  $tpt = -1;
  for ($i=0; $i!=($step6mct+1); $i++) { if ($step6m[$i][0] =~ /$rep/) { $tpt = $i; last; } }
  next if ($tpt == -1);
  @fordalirange = ();
  for ($j=0; $j!=($#{$step6m[$tpt]}+1); $j++) {
    @ar0 = split/\t/, $step6m[$tpt][$j];
    $hitid = $ar0[4];
    $g1 = `grep "$rep.D.$hitid" $rep.dali1`; chomp $g1;
    if ($g1 eq "") { $g2 = `grep "$rep.D.$hitid" $rep.dali2`; chomp $g2; }
    else { $g2 = $g1; }
    if ($g2 eq "") { push @fordalirange, "$hitid"; }
    else { 
      @ar1a = split/\n/, $g2;
      @ar1b = split/\t/, $ar1a[0];
      if ($ar1b[1] eq "error" | ($ar1b[2] ne "error" && $ar1b[2] > $ar1b[1])) { $qrangepos = $ar1b[9], $srangepos = $ar1b[10]; }
      else { $qrangepos = $ar1b[7], $srangepos = $ar1b[8]; }
      next if ($qrangepos eq "" | $qrangepos =~ /error/ | $srangepos eq "" | $srangepos =~ /error/); 
      $qrangeres = SUB_convert_range_pos_to_res ( "new_domain_str/$rep.ca", "$qrangepos" );
      $srangeres = SUB_convert_range_pos_to_res ( "$path_str/$hitid.ca", "$srangepos" );
      $step6m[$tpt][$j] = "$rep\t$ar0[1]\t$qrangeres\t$ar0[3]\t$hitid\t$ar0[5]\t$ar0[6]\t$srangeres\t$ar0[8]\t\t$ar0[10]\t\t$ar0[12]\t6\n";
    }
  }
#run DaliLite for pairs with ranges still needed
  if ($#fordalirange != -1) {
    $qch = $rep;
    `mkdir dali_output`;
    open(T1, ">querydomains.list"); print T1 "$qch.pdb\n"; close(T1);
    open(T2, ">querydom2dali"); print T2 "$qch.pdb 8000\n"; close(T1);
    SUB_renum_2 ( "8000", "new_domain_str" );
    SUB_get5N ();
    `cat querydom2dali_5N $path_str/scoplib2dali_5N > all2dali_hit_5N`;
#make list of pairs to run (x2)
    open(KEY, "all2dali_hit_5N");
    @all_key = <KEY>;
    open(TORUN, ">pairstorun");
    LP_M7: foreach $cpair (@fordalirange) {
      $query = $qch; $lib = $cpair;
      $id1 = "null"; $id2 = "null";
      foreach $line1 (@all_key) {
        next unless ($line1 =~ /^$query/);
        chomp $line1;
        @ar1 = split/ /, $line1;
        $id1 = $ar1[1];
        $id1four = substr($id1, 0, 4);
        next LP_M7 if (-e "$id1.dat" == 0);
        `cp $id1four* dali_output`;
        last;
      }
      foreach $line2 (@all_key) {
        next unless ($line2 =~ /^$lib/);
        chomp $line2;
        @ar2 = split/ /, $line2;
        $id2 = $ar2[1];
        $id2four = substr($id2, 0, 4);
        next LP_M7 if (-e "$path_dalidat/$id2.dat" == 0);
        `cp $path_dalidat/$id2four* dali_output`;
        last;
      }
      next if ($id1 eq "null" | $id2 eq "null");
      print TORUN "./DaliLite -align $id1 $id2 ; mv $id1.dccp $query.$lib.dccp\n";
      print TORUN "rm dali.lock\n";
      print TORUN "./DaliLite -align $id2 $id1 ; mv $id2.dccp $lib.$query.dccp\n";
      print TORUN "rm dali.lock\n";
    }
    close(KEY); @all_key = ();
    close(TORUN);
    system "mv all2dali_hit_5N dali_output";
    $ptrct = `wc -l pairstorun`;
    chomp $ptrct;
#$dalisets = 7;
    if ($ptrct != 0) {
#split list into $dalisets pieces and set up temp directories
      for ($k=1; $k!=($dalisets+1); $k++) { `mkdir tempdir.$k`; }
      open(RUNLIST, "pairstorun");
      @all_run = <RUNLIST>;
      $N = (int (($#all_run+1)/$dalisets)) + 1;
      for ($k=1; $k!=($dalisets+1); $k++) {
        $ln = $k * $N;
        if ($ln > ($#all_run+1)) {
          $prevln = ($k-1)*$N;
          $left = $#all_run + 1 - $prevln;
          last if ($left == 0);
          system "tail -$left pairstorun > pairs.$k";
          `mv pairs.$k tempdir.$k`;
          last;
        }
        else {
          system "head -$ln pairstorun | tail -$N > pairs.$k";
          `mv pairs.$k tempdir.$k`;
        }
      }
      close(RUNLIST); @all_run = ();
#run dali for pairs
      for ($k=1; $k!=($dalisets+1); $k++) {
        system "cp $path_bin/DaliLite tempdir.$k";
        chdir "tempdir.$k";
        if (-e "pairs.$k" == 1) {
          open(READPRS, "pairs.$k");
          @allrdpr = <READPRS>;
          foreach $rpline (@allrdpr) {
            next if ($rpline =~ /rm dali.lock/);
            @rpar = split/ /, $rpline;
            $str1 = substr($rpar[2], 0, 4);
            $str2 = substr($rpar[3], 0, 4);
            `cp ../dali_output/$str1* .`;
            `cp ../dali_output/$str2* .`;
          }
          close(READPRS); @allrdpr = ();
          open(ADDRM, ">>pairs.$k");
          print ADDRM "rm dali.lock\n";
          close(ADDRM);
          system "chmod +x pairs.$k";
          system "./pairs.$k &";
        }
        chdir "..";
      }
#wait for dali to finish, then move to new directory
      sleep 10;
      $wt = 0;
      while ($wt == 0) {
        $ovct = `ps | grep "DaliLite" | wc -l`;
        chomp $ovct;
        if ($ovct == 0) { $wt = 1; }
        else { SUB_chk_for_stalled_dali (); }
      }
      for ($k=1; $k!=($dalisets+1); $k++) {
        chdir "tempdir.$k";
        `mv *.dccp ../dali_output`;
        `rm -f *`;
        chdir "..";
      }
      `rm -r -f tempdir.*`;
#extract alignments and get Z-scores
      system "cp $path_bin/DaliLite dali_output";
      chdir "dali_output";
      $ls = `ls -1 *.dccp`; @lslist = split/\n/, $ls;
      @s6zscores = ();
      foreach $dccpfile (@lslist) {
        if (-z "$dccpfile" == 1) { `rm $dccpfile`; next; }
        $zscore = SUB_mk_dpa_from_dccp ( "$dccpfile", "$qch", "all2dali_hit_5N" );
        push @s6zscores, "$dccpfile\t$zscore";
      }
      chdir "..";
    }
    @dalirangeinfo = ();
    open(DAOUT, ">>$qch.dali2");
    print DAOUT "!^(NEW_FILE): all_dali_hit_zbc_$qch\n";
    foreach $scophit (@fordalirange) {
      $qdom = $qch; $ldom = $scophit;
      foreach $zp (@s6zscores) {
        if ($zp =~ /$qdom\.$ldom\./) { @za = split/\s+/, $zp; $z1 = $za[$#za]; }
        if ($zp =~ /$ldom\.$qdom\./) { @za = split/\s+/, $zp; $z2 = $za[$#za]; }
      }
      if (-e "dali_output/$qdom.$ldom.dpa" == 0) { $bl1 = "error"; $cov1 = "error"; $z1 = "error"; }
      else {
        $bl1 = `$path_bin/blsm_scores -i dali_output/$qdom.$ldom.dpa`; chomp $bl1;
        $cov1 = SUB_cover_dpa ( "dali_output/$qdom.$ldom.dpa" );
      }
      if (-e "dali_output/$ldom.$qdom.dpa" == 0) { $bl2 = "error"; $cov2 = "error"; $z2 = "error"; }
      else {
        $bl2 = `$path_bin/blsm_scores -i dali_output/$ldom.$qdom.dpa`; chomp $bl2;
        $cov2 = SUB_cover_dpa ( "dali_output/$ldom.$qdom.dpa" );
      }
      $ranges1 = "\t";
      $ranges2 = "\t";
      if ($z1 ne "error" && $bl1 ne "error" && $cov1 ne "error" && $bl1 ne "inf") {
        $ranges1 = SUB_get_ranges_dpa ( "dali_output/$qdom.$ldom.dpa", "0", "1" );
      }
      if ($z2 ne "error" && $bl2 ne "error" && $cov2 ne "error" && $bl2 ne "inf") {
        $ranges2 = SUB_get_ranges_dpa ( "dali_output/$ldom.$qdom.dpa", "1", "0" );
      }
      print DAOUT "$qdom.D.$ldom\t$z1\t$z2\t$bl1\t$bl2\t$cov1\t$cov2\t$ranges1\t$ranges2\n";
      if ($z1 eq "error" && $z2 eq "error") { push @dalirangeinfo, "$ldom\terror"; }
      else {
        if ($z1 eq "error") { push @dalirangeinfo, "$ldom\t$ranges2"; }
        elsif ($z2 eq "error") { push @dalirangeinfo, "$ldom\t$ranges1"; }
        else {
          if ($z1 >= $z2) { push @dalirangeinfo, "$ldom\t$ranges1"; }
          else { push @dalirangeinfo, "$ldom\t$ranges2"; }
        }
      }
    }
#add DaliLite ranges for these pairs to @step6m
    for ($j=0; $j!=($#{$step6m[$tpt]}+1); $j++) {
      @ar0 = split/\t/, $step6m[$tpt][$j];
      $hitid = $ar0[4];
      foreach $infline (@dalirangeinfo) {
        next unless ($infline =~ /^$hitid/);
        last if ($infline =~ /error/);
        @ar1 = split/\t/, $infline;
        $qrangeres = SUB_convert_range_pos_to_res ( "new_domain_str/$rep.ca", "$ar1[1]" );
        $srangeres = SUB_convert_range_pos_to_res ( "$path_str/$hitid.ca", "$ar1[2]" );
        $step6m[$tpt][$j] = "$rep\t$ar0[1]\t$qrangeres\t$ar0[3]\t$hitid\t$ar0[5]\t$ar0[6]\t$srangeres\t$ar0[8]\t\t$ar0[10]\t\t$ar0[12]\t6\n";
      }
    }
    $ls1 = `ls -1 dali_output/*.dpa`;
    @lsar = split/\n/, $ls1;
    foreach $dpafile (@lsar) {
      chomp $dpafile;
      @lsar2 = split/dali\_output\//, $dpafile;
      open(IN, "$dpafile"); @in = <IN>;
      print DAOUT "!^(NEW_FILE): $lsar2[1]\n"; foreach $line (@in) { print DAOUT "$line"; }
      close(IN);
    }
    close(DAOUT);
    $ls1 = ""; @lsar = ();
    `rm pairstorun querydatlist querydom2dali querydom2dali_5N querydomains.list 8000*`;
    `rm -f dali_output/*.dpa`;
    `mkdir tmpdalidir`;
    `mv dali_output/8000*.dat tmpdalidir`;
    `mv dali_output/8000*.dssp tmpdalidir`;
    `mv dali_output/*.dccp tmpdalidir`;
    chdir "tmpdalidir";
    `tar cvf $rep.dalifiles3.tar *`;
    `mv $rep.dalifiles3.tar ..`;
    chdir "..";
    `rm -r -f dali_output`;
    `rm -r -f tmpdalidir`;
    `bzip2 $rep.dalifiles3.tar`;
  }
  @fordalirange = (); @dalirangeinfo = ();
}
#get all DaliLite results in one file (~.dali2)
#append MAMMOTH hits with adjusted (DaliLite) ranges to ~.mammoth2
foreach $rep (@unmap_rep) {
  `cat $rep.dali2 >> $rep.dali1`;
  `mv $rep.dali1 $rep.dali2`;
  if (-z "$rep.dali2" == 1) { `rm $rep.dali2`; }

  $pt = -1;
  for ($i=0; $i!=($step6mct+1); $i++) { if ($step6m[$i][0] =~ /^$rep/) { $pt = $i; last; } }
  next if ($pt == -1);
  open(OUT, ">>$rep.mammoth2");
  print OUT "!^(NEW_FILE): $rep.out6m\n";
  for ($j=0; $j!=($#{$step6m[$i]}+1); $j++) { print OUT "$step6m[$pt][$j]"; }
  close(OUT);
}

print DATELOG "STATUS: finished running all comparison tools, ready to start assignments\n";
$t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";

} # END OF "re-start at 6m-csres/6d/7/8"


##########
#
# scripts: choose_sfs.pl and domainbound.pl
# - determine appropriate superfamily assignment for regions within the query domain
# - determine domain boundaries of assignments


if (8 >= $findwhere) { # START OF "re-start at choose sfam and make outputs"
$t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";

$cushion = 3;
@unres_rep = ();
@dom_assi = ();
@dom_assi_fold = ();
@dom_assi_unmap = ();
@forps = ();

LP_M8: foreach $rep (@bc_rep) {
  if (-e "new_domain_str/$line.ca" == 0) { SUB_other_new_cut ( $rep ); }
  @allhitinf = (); @allhitnr = (); @allhituse = (); @skiphit = ();
#make list of all hits from all methods: get range of query hit, scop hit, step number, range type (Sequence/MAMMMOTH/DaliLite), 
#and whether hit falls above (Z, E) or below (z, e) confidence cutoff
  $act = -1;
  for ($i=0; $i!=($step1ct+1); $i++) {
    next unless ($step1[$i][0] =~ /^$rep/);
    for ($j=0; $j!=($#{$step1[$i]}+1); $j++) {
      @ar1 = split/\t/, $step1[$i][$j];
      if ($ar1[9] < 1e-10) { $cnf = "E"; }
      else { $cnf = "e"; }
      @ar2a = split/\./, $ar1[5];
      @ar2b = split/\,/, $ar1[1];
      for ($z=0; $z!=($#ar2b+1); $z++) {
        $thisact = $act + $z + 1;
        $h = "$ar1[0]\t$ar2b[$z]\t$ar1[4]\t$ar2a[0].$ar2a[1].$ar2a[2]\t1\tS\t$thisact\t";
        for ($y=0; $y!=($#ar2b+1); $y++) {
          $pt = $act + 1 + $y;
          if ($y == 0) { $h .= "$pt"; }
          else { $h .= " $pt"; }
        }
        $h .= "\t$cnf\t$i $j\t$ar1[9]";
        push @allhitinf, "$h";
      }
      $act += $#ar2b + 1;
    }
  }
  for ($i=0; $i!=($step2ct+1); $i++) {
    next unless ($step2[$i][0] =~ /^$rep/);
    for ($j=0; $j!=($#{$step2[$i]}+1); $j++) {
      $act++;
      @ar1 = split/\t/, $step2[$i][$j];
      if ($ar1[9] < 1e-10) { $cnf = "E"; }
      else { $cnf = "e"; }
      @ar2a = split/\./, $ar1[5];
      push @allhitinf, "$ar1[0]\t$ar1[1]\t$ar1[4]\t$ar2a[0].$ar2a[1].$ar2a[2]\t2\tS\t$act\t$act\t$cnf\t$i $j\t$ar1[9]";
    }
  }
  for ($i=0; $i!=($step3ct+1); $i++) {
    next unless ($step3[$i][0] =~ /^$rep/);
    for ($j=0; $j!=($#{$step3[$i]}+1); $j++) {
      $act++;
      @ar1 = split/\t/, $step3[$i][$j];
      if ($ar1[9] < 1e-10) { $cnf = "E"; }
      else { $cnf = "e"; }
      @ar2a = split/\./, $ar1[5];
      push @allhitinf, "$ar1[0]\t$ar1[1]\t$ar1[4]\t$ar2a[0].$ar2a[1].$ar2a[2]\t3\tS\t$act\t$act\t$cnf\t$i $j\t$ar1[9]";
    }
  }
  for ($i=0; $i!=($step5ct+1); $i++) {
    next unless ($step5[$i][0] =~ /^$rep/);
    for ($j=0; $j!=($#{$step5[$i]}+1); $j++) {
      $act++;
      @ar1 = split/\t/, $step5[$i][$j];
      if ($ar1[9] < 1e-10) { $cnf = "E"; }
      else { $cnf = "e"; }
      @ar2a = split/\./, $ar1[5];
      push @allhitinf, "$ar1[0]\t$ar1[1]\t$ar1[4]\t$ar2a[0].$ar2a[1].$ar2a[2]\t5\tS\t$act\t$act\t$cnf\t$i $j\t$ar1[9]";
    }
  }
  for ($i=0; $i!=($step6mct+1); $i++) {
    next unless ($step6m[$i][0] =~ /^$rep/);
    for ($j=0; $j!=($#{$step6m[$i]}+1); $j++) {
      @ar1 = split/\t/, $step6m[$i][$j];
      @ar2a = split/\./, $ar1[5];
      if ($ar1[10] > 14) { $cnf = "Z"; }
      else { $cnf = "z"; }
      if ($ar1[2] !~ /\:/) { $act++; push @allhitinf, "$ar1[0]\t$ar1[1]\t$ar1[4]\t$ar2a[0].$ar2a[1].$ar2a[2]\t6\tM\t$act\t$act\t$cnf\t$i $j\t$ar1[10]"; }
      else {
        $drange = SUB_get_dali_range ( "$ar1[0]", "$ar1[4]" );
        if ($drange =~ /no gaps/) { $act++; push @allhitinf, "$ar1[0]\t$ar1[2]\t$ar1[4]\t$ar2a[0].$ar2a[1].$ar2a[2]\t6\tD\t$act\t$act\t$cnf\t$i $j\t$ar1[10]"; }
        else {
          @gapar = split/\t/, $drange;
          for ($z=0; $z!=($#gapar+1); $z++) {
            $thisact = $act + $z + 1;
            $h = "$ar1[0]\t$gapar[$z]\t$ar1[4]\t$ar2a[0].$ar2a[1].$ar2a[2]\t6\tD\t$thisact\t";
            for ($y=0; $y!=($#gapar+1); $y++) {
              $pt = $act + 1 + $y;
              if ($y == 0) { $h .= "$pt"; }
              else { $h .= " $pt"; }
            }
            $h .= "\t$cnf\t$i $j\t$ar1[10]";
            push @allhitinf, "$h";
          }
          $act += $#gapar + 1;
        }
      }
    }
  }
  for ($i=0; $i!=($step6dct+1); $i++) {
    next unless ($step6d[$i][0] =~ /^$rep/);
    for ($j=0; $j!=($#{$step6d[$i]}+1); $j++) {
      @ar1 = split/\t/, $step6d[$i][$j];
      @ar2a = split/\./, $ar1[5];
      if ($ar1[11] > 14) { $cnf = "Z"; }
      else { $cnf = "z"; }
      $drange = SUB_get_dali_range ( "$ar1[0]", "$ar1[4]" );
      if ($drange =~ /no gaps/) { $act++; push @allhitinf, "$ar1[0]\t$ar1[2]\t$ar1[4]\t$ar2a[0].$ar2a[1].$ar2a[2]\t6\tD\t$act\t$act\t$cnf\t$i $j\t$ar1[11]"; }
      else {
        @gapar = split/\t/, $drange;
        for ($z=0; $z!=($#gapar+1); $z++) {
          $thisact = $act + $z + 1;
          $h = "$ar1[0]\t$gapar[$z]\t$ar1[4]\t$ar2a[0].$ar2a[1].$ar2a[2]\t6\tD\t$thisact\t";
          for ($y=0; $y!=($#gapar+1); $y++) {
            $pt = $act + 1 + $y;
            if ($y == 0) { $h .= "$pt"; }
            else { $h .= " $pt"; }
          }
          $h .= "\t$cnf\t$i $j\t$ar1[11]";
          push @allhitinf, "$h";
        }
        $act += $#gapar + 1;
      }
    }
  }
  for ($i=0; $i!=($step7ct+1); $i++) {
    next unless ($step7[$i][0] =~ /^$rep/);
    for ($j=0; $j!=($#{$step7[$i]}+1); $j++) {
      @ar1 = split/\t/, $step7[$i][$j];
      @ar2a = split/\./, $ar1[5];
      if ($ar1[11] > 14) { $cnf = "Z"; }
      else { $cnf = "z"; }
      if ($ar1[2] !~ /\:/) { $act++; push @allhitinf, "$ar1[0]\t$ar1[1]\t$ar1[4]\t$ar2a[0].$ar2a[1].$ar2a[2]\t7\tM\t$act\t$act\t$cnf\t$i $j\t$ar1[10]"; }
      else { 
        $drange = SUB_get_dali_range ( "$ar1[0]", "$ar1[4]" );
        if ($drange =~ /no gaps/) { $act++; push @allhitinf, "$ar1[0]\t$ar1[2]\t$ar1[4]\t$ar2a[0].$ar2a[1].$ar2a[2]\t7\tD\t$act\t$act\t$cnf\t$i $j\t$ar1[11]"; }
        else {
          @gapar = split/\t/, $drange;
          for ($z=0; $z!=($#gapar+1); $z++) {
            $thisact = $act + $z + 1;
            $h = "$ar1[0]\t$gapar[$z]\t$ar1[4]\t$ar2a[0].$ar2a[1].$ar2a[2]\t7\tD\t$thisact\t";
            for ($y=0; $y!=($#gapar+1); $y++) {
              $pt = $act + 1 + $y;
              if ($y == 0) { $h .= "$pt"; }
              else { $h .= " $pt"; }
            }
            $h .= "\t$cnf\t$i $j\t$ar1[11]";
            push @allhitinf, "$h";
          }
          $act += $#gapar + 1;
        }
      }
    }
  }
  for ($i=0; $i!=($step8ct+1); $i++) {
    next unless ($step8[$i][0] =~ /^$rep/);
    for ($j=0; $j!=($#{$step8[$i]}+1); $j++) {
      @ar1 = split/\t/, $step8[$i][$j];
      @ar2a = split/\./, $ar1[5];
      if ($ar1[2] !~ /\:/) {
        if ($ar1[10] > 14) { $cnf = "Z"; }
        else { $cnf = "z"; }
        $act++; push @allhitinf, "$ar1[0]\t$ar1[1]\t$ar1[4]\t$ar2a[0].$ar2a[1].$ar2a[2]\t8\tM\t$act\t$act\t$cnf\t$i $j\t$ar1[10]";
      }
      else {
        if ($ar1[11] > 14) { $cnf = "Z"; }
        else { $cnf = "z"; }
        $drange = SUB_get_dali_range ( "$ar1[0]", "$ar1[4]" );
        if ($drange =~ /no gaps/) { $act++; push @allhitinf, "$ar1[0]\t$ar1[2]\t$ar1[4]\t$ar2a[0].$ar2a[1].$ar2a[2]\t8\tD\t$act\t$act\t$cnf\t$i $j\t$ar1[11]"; }
        else {
          @gapar = split/\t/, $drange;
          for ($z=0; $z!=($#gapar+1); $z++) {
            $thisact = $act + $z + 1;
            $h = "$ar1[0]\t$gapar[$z]\t$ar1[4]\t$ar2a[0].$ar2a[1].$ar2a[2]\t8\tD\t$thisact\t";
            for ($y=0; $y!=($#gapar+1); $y++) {
              $pt = $act + 1 + $y;
              if ($y == 0) { $h .= "$pt"; }
              else { $h .= " $pt"; }
            }
            $h .= "\t$cnf\t$i $j\t$ar1[11]";
            push @allhitinf, "$h";
          }
          $act += $#gapar + 1;
        }
      }
    }
  }
  for ($i=0; $i!=($step9ct+1); $i++) {
    next unless ($step9[$i][0] =~ /^$rep/);
    for ($j=0; $j!=($#{$step9[$i]}+1); $j++) {
      @ar1 = split/\t/, $step9[$i][$j];
      @ar2a = split/\./, $ar1[5];
      $cnf = "z"; # Z-score cannot be greater than 14, so confidence cannot be "Z"
      $drange = SUB_get_dali_range ( "$ar1[0]", "$ar1[4]" );
      if ($drange =~ /no gaps/) { $act++; push @allhitinf, "$ar1[0]\t$ar1[2]\t$ar1[4]\t$ar2a[0].$ar2a[1].$ar2a[2]\t9\tD\t$act\t$act\t$cnf\t$i $j\t$ar1[11]"; }
      else {
        @gapar = split/\t/, $drange;
        for ($z=0; $z!=($#gapar+1); $z++) {
          $thisact = $act + $z + 1;
          $h = "$ar1[0]\t$gapar[$z]\t$ar1[4]\t$ar2a[0].$ar2a[1].$ar2a[2]\t6\tD\t$thisact\t";
          for ($y=0; $y!=($#gapar+1); $y++) {
            $pt = $act + 1 + $y;
            if ($y == 0) { $h .= "$pt"; }
            else { $h .= " $pt"; }
          }
          $h .= "\t$cnf\t$i $j\t$ar1[11]";
          push @allhitinf, "$h";
        }
        $act += $#gapar + 1;
      }
    }
  }
#make list of hits non-redundant (skip if same scop hit, same range, and same step as another hit)
  LP_M9: for ($i=0; $i!=($#allhitinf+1); $i++) {
    @ar1 = split/\t/, $allhitinf[$i];
    for ($j=0; $j!=$i; $j++) {
      @ar2 = split/\t/, $allhitinf[$j];
      next LP_M9 if ($ar1[1] eq $ar2[1] && $ar1[2] eq $ar2[2] && $ar1[4] == $ar2[4]);
    }
    push @allhitnr, "$allhitinf[$i]";
  }
#if no hits to the query, go on to next query...
  next LP_M8 if ($#allhitnr == -1);
#output @allhitinf and @allhitnr to ~.combined
  open(OUT, ">$rep.combined");
  print OUT "!^(NEW_FILE): $rep.outinf_all\n";
  foreach $line (@allhitinf) { print OUT "$line\n"; }
  print OUT "!^(NEW_FILE): $rep.outinf_nr\n";
  foreach $line (@allhitnr) { print OUT "$line\n"; }
  close(OUT);
#within the set of hits, find cases where different superfamilies are assigned to a shared range of > 50%
  $repeaty = 0; $largeins = 0;
  for ($i=0; $i!=($#allhitnr+1); $i++) {
    @ar0 = split/\t/, $allhitnr[$i];
#$hl[$i][0] is the range, $hl[$i][1] is the superfam id, $hl[$i][2] is the method/step #,
#$hl[$i][3] is the range is position format instead of pdb-residue-name format
#$hl[$i][4] is the type of step (D/M/S), $hl[$i][5] is the number id of the hit, $hl[$i][6] is linked "domains"
#$hl[$i][7] is E if evalue < 1e-10, e if evalue >= 1e-10, Z if zscore > 14, or z if zscore <= 14
#$hl[$i][8] is the scop library hit
    $hl[$i][0] = $ar0[1];
    $hl[$i][1] = $ar0[3];
    $hl[$i][2] = $ar0[4];
    $hl[$i][3] = SUB_convert_range_res_to_pos ( "new_domain_str/$rep.ca", "$hl[$i][0]" );
    $hl[$i][4] = $ar0[5];
    $hl[$i][5] = $ar0[6];
    $hl[$i][6] = $ar0[7];
    $hl[$i][7] = $ar0[8];
    $hl[$i][8] = $ar0[2];
    $greprep = `grep $ar0[2] $path_bin/REPEATS/repeats_list | wc -l`; chomp $greprep;
    if ($greprep > 0) { $repeaty = 1; }
    if ($hl[$i][6] =~ / /) { $largeins = 1; }
  }
#toss any hits where coverage of query domain (by set, not by piece/line!) is less than 20 residues total
  for ($i=0; $i!=($#allhitnr+1); $i++) {
    next if ($hl[$i][0] eq "null");
    @h6ar_1 = split/ /, $hl[$i][6];
    $resct_tot = 0;
    for ($j=0; $j!=($#h6ar_1+1); $j++) {
      for ($k=0; $k!=($#allhitnr+1); $k++) { next unless ($hl[$k][5] == $h6ar_1[$j]); $pt = $k; last; }
      $thisresct = SUB_resct_in_range ( "$rep", "$hl[$pt][0]" );
      $resct_tot += $thisresct;
    }
    if ($resct_tot < 20) {
      for ($j=0; $j!=($#h6ar_1+1); $j++) {
        for ($k=0; $k!=($#allhitnr+1); $k++) { next unless ($hl[$k][5] == $h6ar_1[$j]); $hl[$k][0] = "null"; }
      }
    }
  }
#attempt to choose correct sfam
  $wt = 0;
  while ($wt == 0) {
    $conf1 = "XX";
    $conf2 = "XX";
#find hits to same region but different SCOP superfamilies
    for ($i=0; $i!=($#allhitnr+1); $i++) {
      next if ($hl[$i][0] eq "null");
      for ($j=0; $j!=($#allhitnr+1); $j++) {
        next if ($i == $j | $hl[$j][0] eq "null" | $hl[$i][1] eq $hl[$j][1]);
        ($ov1, $ov2) = SUB_get_ov ( "$hl[$i][3]", "$hl[$j][3]" );
        next if ($ov1 < 0.5 && $ov2 < 0.5);
        $conf1 = $i; $conf2 = $j; last;
      }
      if ($conf1 eq "XX" && $conf2 eq "XX") { $wt = 1; }
      else {
#count number of times each step (1-9) hits the $conf1 superfam covering at least 75% of length of that hit
#determine if at least 1 hit for that step is above confidence (z > 14 or e < 1e-10)
        for ($s=0; $s!=10; $s++) { @step_c1[$s] = 0; }
        for ($s=0; $s!=10; $s++) { @chcf_c1[$s] = 0; }
        for ($s=1; $s!=10; $s++) {
          for ($k=0; $k!=($#allhitnr+1); $k++) {
            next unless ($hl[$k][2] == $s && $hl[$k][1] eq $hl[$conf1][1] && $hl[$k][0] ne "null");
            ($ov1, $ov2) = SUB_get_ov ( "$hl[$k][3]", "$hl[$conf1][3]" );
            if ($ov1 >= 0.75 && $ov2 >= 0.75) {
              @step_c1[$s] += 1;
              if ($hl[$k][7] eq "Z" | $hl[$k][7] eq "E") { @chcf_c1[$s] = 1; }
            }
          }
        }
#count number of times each step (1-9) hits the $conf2 superfam covering at least 75% of length of that hit
#determine if at least 1 hit for that step is above confidence (z > 14 or e < 1e-10)
        for ($s=0; $s!=10; $s++) { @step_c2[$s] = 0; }
        for ($s=0; $s!=10; $s++) { @chcf_c2[$s] = 0; }
        for ($s=1; $s!=10; $s++) {
          for ($k=0; $k!=($#allhitnr+1); $k++) {
            next unless ($hl[$k][2] == $s && $hl[$k][1] eq $hl[$conf2][1] && $hl[$k][0] ne "null");
            ($ov1, $ov2) = SUB_get_ov ( "$hl[$k][3]", "$hl[$conf2][3]" );
            if ($ov1 >= 0.75 && $ov2 >= 0.75) {
              @step_c2[$s] += 1;
              if ($hl[$k][7] eq "Z" | $hl[$k][7] eq "E") { @chcf_c2[$s] = 1; }
            }
          }
        }
#decide which superfam hit to keep:
#1) keep the hit that was found by more steps (same superfam, 75% overlap to hit)
#2) if same # of steps, make decision based on priority of steps (1, then 2/3, then 6, then 5, then 7/8/9)
#3) if both are found by steps at the same priority level, check if only 1 of the 2 superfams have hits above confidence cutoff
#4) if one sfam is low res (class i.~ in SCOP) and one is not, choose non-low-res sfam
#5) if cannot be resolved, flag for manual inspection
        $c1ct = 0;
        $c2ct = 0;
        for ($s=1; $s!=10; $s++) {
          if ($step_c1[$s] != 0) { $c1ct += 1; }
          if ($step_c2[$s] != 0) { $c2ct += 1; }
        }
        if ($c1ct > $c2ct) { $hl[$conf2][0] = "null"; }
        elsif ($c1ct < $c2ct) { $hl[$conf1][0] = "null"; }
        else {
          if ($step_c1[1] != 0 && $step_c2[1] == 0) { $toss = 2; }
          elsif ($step_c1[1] == 0 && $step_c2[1] != 0) { $toss = 1; }
          else {
            if ($step_c1[2] != 0 | $step_c1[3] != 0) { $d1 = 1; }
            else { $d1 = 0; }
            if ($step_c2[2] != 0 | $step_c2[3] != 0) { $d2 = 1; }
            else { $d2 = 0; }
            if ($d1 == 1 && $d2 == 0) { $toss = 2; }
            elsif ($d1 == 0 && $d2 == 1) { $toss = 1; }
            else {
              if ($step_c1[6] != 0 && $step_c2[6] == 0) { $toss = 2; }
              elsif ($step_c1[6] == 0 && $step_c2[6] != 0) { $toss = 1; }
              else {
                if ($step_c1[4] != 0 && $step_c2[4] == 0) { $toss = 2; }
                elsif ($step_c1[4] == 0 && $step_c2[4] != 0) { $toss = 1; }
                else {
                  if ($step_c1[5] != 0 && $step_c2[5] == 0) { $toss = 2; }
                  elsif ($step_c1[5] == 0 && $step_c2[5] != 0) { $toss = 1; }
                  else {
                    if ($step_c1[7] != 0 | $step_c1[8] != 0 | $step_c1[9] != 0) { $d1b = 1; }
                    else { $d1b = 0; }
                    if ($step_c2[7] != 0 | $step_c2[8] != 0 | $step_c2[9] != 0) { $d2b = 1; }
                    else { $d2b = 0; }
                    if ($d1b == 1 && $d2b == 0) { $toss = 2; }
                    elsif ($d1b == 0 && $d2b == 1) { $toss = 1; }
                    else { $toss = 0; }
                  }
                }
              }
            }
          }
          if ($toss == 0) {
            if ($step_c1[1] != 0 && $step_c2[1] != 0) {
              if ($chcf_c1[1] == 1 && $chcf_c2[1] != 1) { $toss = 2; }
              elsif ($chcf_c1[1] != 1 && $chcf_c2[1] == 1) { $toss = 1; }
            }
            elsif (($step_c1[2] != 0 | $step_c1[3] != 0 | $step_c1[4] != 0) && ($step_c2[2] != 0 | $step_c2[3] != 0 | $step_c2[4] != 0)) {
              if (($chcf_c1[2] == 1 | $chcf_c1[3] == 1 | $chcf_c1[4] == 1) && ($chcf_c2[2] != 1 && $chcf_c2[3] != 1 && $chcf_c2[4] != 1)) { $toss = 2; }
              elsif (($chcf_c1[2] != 1 && $chcf_c1[3] != 1 && $chcf_c1[4] != 1) && ($chcf_c2[2] == 1 | $chcf_c2[3] == 1 | $chcf_c2[4] == 1)) { $toss = 1; }
            }
            elsif ($step_c1[6] != 0 && $step_c2[6] != 0) {
              if ($chcf_c1[6] == 1 && $chcf_c2[6] != 1) { $toss = 2; }
              elsif ($chcf_c1[6] != 1 && $chcf_c2[6] == 1) { $toss = 1; }
            }
            elsif ($step_c1[5] != 0 && $step_c2[5] != 0) {
              if ($chcf_c1[5] == 1 && $chcf_c2[5] != 1) { $toss = 2; }
              elsif ($chcf_c1[5] != 1 && $chcf_c2[5] == 1) { $toss = 1; }
            }
          }
          if ($toss == 0) {
            if ($hl[$conf1][1] =~ /^i/ && $hl[$conf2][1] !~ /^i/) { $toss = 1; }
            if ($hl[$conf2][1] =~ /^i/ && $hl[$conf1][1] !~ /^i/) { $toss = 2; }
          }
          if ($toss == 1) { $hl[$conf1][0] = "null"; }
          if ($toss == 2) { $hl[$conf2][0] = "null"; }
          if ($toss == 0) { push @unres_rep, "$rep"; next LP_M8; }
        }
      }
    }
  }
#make list of number id's of all hits to be removed: if any parts of a linked domain are being taken out, also get rid of the rest of the linked domain
  @tossar = ();
  for ($k=0; $k!=($#allhitnr+1); $k++) {
    if ($hl[$k][0] eq "null") {
      push @tossar, "$k";
      if ($hl[$k][6] =~ / /) {
        @ar7 = split/ /, $hl[$k][6];
        for ($z=0; $z!=($#ar7+1); $z++) { push @tossar, "$ar7[$z]"; }
      }
    }
  }
  for ($z=0; $z!=($#tossar+1); $z++) { $pt = $tossar[$z]; $hl[$pt][0] = "null"; }
#make array of hits to use (@allhituse) and to not use (@skiphit)
  for ($k=0; $k!=($#allhitnr+1); $k++) {
    if ($hl[$k][0] ne "null") { push @allhituse, "$allhitnr[$k]"; } 
    else { push @skiphit, "$allhitnr[$k]"; }
  }
#append @allhituse and @skiphit to ~.combined
  open(OUT, ">>$rep.combined");
  print OUT "!^(NEW_FILE): $rep.outinf_use\n";
  foreach $line (@allhituse) { print OUT "$line\n"; }
  print OUT "!^(NEW_FILE): $rep.outinf_removed\n";
  foreach $line (@skiphit) { print OUT "$line\n"; }
  close(OUT);
#determine domain boundaries for these assignments...
#if contains repeats, use repeatbound.pl to find domain boundaries
#boundaries of repeats found a different way:
#1) all hits to the same superfamily are checked for overlaps (no ranking of steps or structure vs sequence)
#2) there is no overlap-between-superfamilies refinement (i may go back and update this later, though...)
  if ($repeaty == 1) {
#separate hits into superfamilies
    $sfct = 0;
    $sfa[0][0] = 0;
    @sfect[0] = 0;
    LP_M10: for ($i=0; $i!=($#allhitnr+1); $i++) {
      next if ($hl[$i][0] eq "null");
      for ($j=0; $j!=($sfct+1); $j++) {
        $pt = $sfa[$j][0];
        next unless ($hl[$i][1] eq $hl[$pt][1]);
        $sfect[$j] += 1;
        $sfa[$j][$sfect[$j]] = $i;
        next LP_M10;
      }
      $sfct += 1;
      $sfa[$sfct][0] = $i;
      $sfect[$sfct] = 0;
    }
#find ranges for each superfamily of repeats
    for ($z=0; $z!=($sfct+1); $z++) {
      $checkline = "$hl[$sfa[$z][0]][3]";
      for ($i=1; $i!=(@sfect[$z]+1); $i++) { $checkline .= " $hl[$sfa[$z][$i]][3]"; }
      $again = 1;
      while ($again == 1) {
        @newrs = split/ /, $checkline;
        $newct = $#newrs;
        if ($newct == 0) { $again = 0; $finalcheckline = $checkline; }
        else {
          LP_M11: for ($i=0; $i!=($newct+1); $i++) {
            @ar3 = split/\./, $newrs[$i];
            LP2: for ($j=($i+1); $j!=($newct+1); $j++) {
              @ar4 = split/\./, $newrs[$j];
              if ($ar4[0] >= $ar3[0] && $ar3[1] >= $ar4[1]) {
                $newcheckline = "$ar3[0].$ar3[1]";
                for ($m=0; $m!=($newct+1); $m++) { next if ($m == $i | $m == $j); $newcheckline .= " $newrs[$m]"; }
              }
              if ($ar4[0] < $ar3[0] && $ar3[1] < $ar4[1]) {
                $newcheckline = "$ar4[0].$ar4[1]";
                for ($m=0; $m!=($newct+1); $m++) { next if ($m == $i | $m == $j); $newcheckline .= " $newrs[$m]"; }
              }
              if ($ar4[0] >= $ar3[0] && $ar3[1] >= $ar4[0] && $ar3[1] < $ar4[1]) {
                $newcheckline = "$ar3[0].$ar4[1]";
                for ($m=0; $m!=($newct+1); $m++) { next if ($m == $i | $m == $j); $newcheckline .= " $newrs[$m]"; }
              }
              if ($ar3[1] >= $ar4[1] && $ar4[1] >= $ar3[0] && $ar4[0] < $ar3[0]) {
                $newcheckline = "$ar4[0].$ar3[1]";
                for ($m=0; $m!=($newct+1); $m++) { next if ($m == $i | $m == $j); $newcheckline .= " $newrs[$m]"; }
              }
              if ($ar4[1] < $ar3[0] | $ar3[1] < $ar4[0]) { $newcheckline = $checkline; }
              if ($newcheckline ne $checkline) { $checkline = $newcheckline;  last LP_M11; }
              else { if ($i == ($newct - 1) && $j == $newct) { $again = 0; $finalcheckline = $newcheckline; } }
            }
          }
        }
      }
      @ar5 = split/ /, $finalcheckline;
#determine confidence for each domain assignment
      for ($k=0; $k!=($#ar5+1); $k++) {
        $posrange = $ar5[$k];
        $sfid = $hl[$sfa[$z][0]][1];
        $stepsabove = ""; $stepsbelow = ""; $abovect = 0; $belowct = 0;
        for ($i=0; $i!=($#allhitnr+1); $i++) {
          next unless ($hl[$i][0] ne "null" && $hl[$i][1] eq $sfid);
          ($ov1, $ov2) = SUB_get_ov ( "$posrange", "$hl[$i][3]" );
          if ($ov1 >= 0.75 && $ov2 >= 0.75) {
            if ($hl[$i][7] eq "Z" | $hl[$i][7] eq "E") { if ($stepsabove !~ /$hl[$i][2]/) { $stepsabove .= "$hl[$i][2]"; } }
            else { if ($stepsbelow !~ /$hl[$i][2]/) { $stepsbelow .= "$hl[$i][2]"; } }
          }
        }
        for ($i=0; $i!=10; $i++) { last if (substr($stepsabove, $i, 1) eq ""); $abovect++; }
        for ($i=0; $i!=10; $i++) { last if (substr($stepsbelow, $i, 1) eq ""); $belowct++; }
        if ($abovect > 1 | $stepsabove =~ /1/) { $domconf = 4; }
        else {
          if ($abovect == 1) { $domconf = 3; }
          else {
            if ($belowct > 1) { $domconf = 2; }
            else { $domconf = 1; }
          }
        }
        push @dom_assi, "$rep\t$posrange\t$sfid\t$domconf";
      }
    }
    next LP_M8;
  }
#deal with non-"repeat"-superfamily assignments...
#separate hits (from *.outinf_use) into domains
#same domain if hits cover 65% length of both hits (or if 1 is completely within the other) and are from same superfamily

  $dct = -1;
  LP_M12: for ($i=0; $i!=($#allhitnr+1); $i++) {
    next if ($hl[$i][0] eq "null");
    for ($j=0; $j!=($dct+1); $j++) {
      next unless ($hl[$i][1] eq $hl[$dl[$j][0]][1]);
      ($ov1, $ov2) = SUB_get_ov ( "$hl[$i][3]", "$hl[$dl[$j][0]][3]" );
      if ($ov1 == 1 | $ov2 == 1 | ($ov1 >= 0.65 && $ov2 >= 0.65)) {
        $dlct[$j] += 1;
        $dl[$j][$dlct[$j]] = $i;
        next LP_M12;
      }
    }
    $dct += 1;
    $dl[$dct][0] = $i;
    $dlct[$dct] = 0;
  }
#order the domains from N to C
  $dlo[0][0] = "null";
  $dloct[0] = "null";
  for ($i=0; $i!=($dct+1); $i++) {
    $ntot = 0;
    for ($j=0; $j!=($dlct[$i]+1); $j++) {
      $pt = $dl[$i][$j];
      @ar3 = split/\./, $hl[$pt][3];
      $ntot += $ar3[0];
    }
    $nave[$i] = int($ntot/($dlct[$i]+1));
  }
  for ($i=0; $i!=($dct+1); $i++) {
    $small = 0;
    for ($j=0; $j!=($dct+1); $j++) {
      if ($nave[$small] >= $nave[$j]) { $small = $j; }
    }
    for ($j=0; $j!=($dlct[$small]+1); $j++) { $dlo[$i][$j] = $dl[$small][$j]; }
    $dloct[$i] = $dlct[$small];
    $nave[$small] = 10000000000;
  }
#find which domains are linked (from same hit: large insertion)
  for ($i=0; $i!=500; $i++) { @linkdomp[$i] = ""; @linkdom[$i] = ""; @linkinf[$i] = ""; }
  $linkctp = -1;  @linkdomp[0] = "null";
  for ($i=0; $i!=($#allhitnr+1); $i++) {
    next if ($hl[$i][0] eq "null");
    next unless ($hl[$i][6] =~ / /);
    @linkar1 = split/ /, $hl[$i][6];
    $linkctp += 1;
    @linkdomp[$linkctp] = "";
    for ($j=0; $j!=($#linkar1+1); $j++) { FINDL: for ($y=0; $y!=($dct+1); $y++) { for ($x=0; $x!=($dloct[$y]+1); $x++) { if ($hl[$dlo[$y][$x]][5] == $linkar1[$j]) { @linkdomp[$linkctp] .= "$y "; last FINDL; } } } }
  }
  if ($linkctp != -1) {
    $linkct = 0; @linkdom[0] = $linkdomp[0];
    LL2: for ($i=1; $i!=($linkctp+1); $i++) {
      for ($j=0; $j!=($linkct+1); $j++) { next LL2 if ($linkdom[$j] eq $linkdomp[$i]); }
      $linkct += 1; $linkdom[$linkct] = $linkdomp[$i];
    }
  }
  $linkict = -1; @linkinf[0] = "null";
  for ($i=0; $i!=($dct+1); $i++) { @trackdl[$i] = "N"; }
  LL3: for ($i=0; $i!=($dct+1); $i++) {
    next LL3 if ($trackdl[$i] eq "Y");
    if ($linkct != -1) {
      for ($j=0; $j!=($linkct+1); $j++) {
        @linkar2 = split/ /, $linkdom[$j];
        $inc = "N";
        for ($k=0; $k!=($#linkar2+1); $k++) { if ($i == $linkar2[$k]) { $inc = "Y"; } }
        if ($inc eq "Y") {
          $linkict += 1; $linkinf[$linkict] = $linkdom[$j];
          for ($k=0; $k!=($#linkar2+1); $k++) { $trackdl[$linkar2[$k]] = "Y"; }
          next LL3;
        }
      }
    }
    $linkict += 1; $linkinf[$linkict] = $i;
    $trackdl[$i] = "Y";
  }

#refine external domain boundaries (N-term of domain1 and C-term of domainN)
#     1) if there is a dali range, use it
#else 2) if there is a mammoth range, use it
#else 3) use the sequence range (hits from csv step are considered sequence hits here)
#separate out set of hits from D, M, or S (depending on where you are in the loop), choose smallest N or largest C from that set
  for ($i=0; $i!=($dct+1); $i++) { $n_dom[$i] = ""; $c_dom[$i] = ""; $def_n[$i] = ""; $def_c[$i] = ""; }
  for ($z=0; $z!=3; $z++) {
    if ($z==0) { $str = "D"; }
    if ($z==1) { $str = "M"; }
    if ($z==2) { $str = "S"; }
    $zct = -1;
    $zar[0] = "null";
    for ($i=0; $i!=($dloct[0]+1); $i++) {
      $pt = $dlo[0][$i];
      next unless ($hl[$pt][4] eq $str);
      $zct += 1;
      $zar[$zct] = $pt;
    }
    next if ($zct == -1);
    for ($i=0; $i!=($zct+1); $i++) {
      $pt = $zar[$i];
      @ar4 = split/\./, $hl[$pt][3];
      if ($i==0) { $n_dom[0] = $ar4[0]; }
      else { if ($ar4[0] < $n_dom[0]) { $n_dom[0] = $ar4[0]; } }
    }
    last if ($n_dom[0] ne "");
  }
  for ($z=0; $z!=3; $z++) {
    if ($z==0) { $str = "D"; }
    if ($z==1) { $str = "M"; }
    if ($z==2) { $str = "S"; }
    $zct = -1;
    $zar[0] = "null";
    for ($i=0; $i!=($dloct[$dct]+1); $i++) {
      $pt = $dlo[$dct][$i];
      next unless ($hl[$pt][4] eq $str);
      $zct += 1;
      $zar[$zct] = $pt;
    }
    next if ($zct == -1);
    for ($i=0; $i!=($zct+1); $i++) {
      $pt = $zar[$i];
      @ar5 = split/\./, $hl[$pt][3];
      if ($i==0) { $c_dom[$dct] = $ar5[1]; }
      else { if ($ar5[1] > $c_dom[$dct]) { $c_dom[$dct] = $ar5[1]; } }
    }
    last if ($c_dom[$dct] ne "");
  }
#refine internal boundaries
#     1) if there is a dali range, use it 
#else 2) if there is a mammoth range, use it 
#else 3) use the sequence ranges
#
#separate out set of hits from D, M, or S (depending on where you are in the loop)
#sort N-term domain ranges by decreasing C-term positions
#sort C-term domain ranges by increasing N-term positions
#
#if one domain has D/M/S of higher priority than the other domain, determine the boundary of the
#higher priority end then find the other domain boundary that doesn't overlap past the cushion
#
#if the two domains have the same D/M/S priority, find the longest ranges that don't overlap 
#past the cushion
#
#if a round fails to find boundaries, go to next step down in priorities and try again
#
#if the boundaries can't be determined without going past the cushion, disregard D/M/S rankings
#and try again
#
#if the boundaries can't be determined without going past the cushion using the combined D/M/S sets, 
#flag as unresolved boundaries problem in output
#
  for ($i=0; $i!=$dct; $i++) {
    for ($z=0; $z!=3; $z++) { $rct[0][$z] = -1; $rct[1][$z] = -1; }
    for ($j=0; $j!=($dloct[$i]+1); $j++) {
      $pt = $dlo[$i][$j];
      if ($hl[$pt][4] eq "D") { $rct[0][0] += 1; $rar[0][0][$rct[0][0]] = $pt; }
      if ($hl[$pt][4] eq "M") { $rct[0][1] += 1; $rar[0][1][$rct[0][1]] = $pt; }
      if ($hl[$pt][4] eq "S") { $rct[0][2] += 1; $rar[0][2][$rct[0][2]] = $pt; }
    }
    for ($j=0; $j!=($dloct[$i+1]+1); $j++) {
      $pt = $dlo[$i+1][$j];
      if ($hl[$pt][4] eq "D") { $rct[1][0] += 1; $rar[1][0][$rct[1][0]] = $pt; }
      if ($hl[$pt][4] eq "M") { $rct[1][1] += 1; $rar[1][1][$rct[1][1]] = $pt; }
      if ($hl[$pt][4] eq "S") { $rct[1][2] += 1; $rar[1][2][$rct[1][2]] = $pt; }
    }
#sort ranges in each set (D/M/S):  domain1 - descreasing C-term pos, domain2 - increasing N-term pos
    for ($z2=0; $z2!=3; $z2++) {
      for ($z1=0; $z1!=($rct[0][$z2]+1); $z1++) { $trar[0][$z2][$z1] = $rar[0][$z2][$z1]; }
      $srar[0][$z2][0] = "null";
      for ($k=0; $k!=($rct[0][$z2]+1); $k++) {
        $big = -1000;
        for ($n=0; $n!=($rct[0][$z2]+1); $n++) {
          next if ($trar[0][$z2][$n] eq "null");
          $pt1 = $trar[0][$z2][$n];
          @ar6 = split/\./, $hl[$pt1][3];
          if ($ar6[1] >= $big) { $bigpt = $n; $big = $ar6[1]; }
        }
        $srar[0][$z2][$k] = $trar[0][$z2][$bigpt];
        $trar[0][$z2][$bigpt] = "null";
      }
    }
    for ($z2=0; $z2!=3; $z2++) {
      for ($z1=0; $z1!=($rct[1][$z2]+1); $z1++) { $trar[1][$z2][$z1] = $rar[1][$z2][$z1]; }
      $srar[1][$z2][0] = "null";
      for ($k=0; $k!=($rct[1][$z2]+1); $k++) {
        $small = 1000000000000;
        for ($n=0; $n!=($rct[1][$z2]+1); $n++) {
          next if ($trar[1][$z2][$n] eq "null");
          $pt1 = $trar[1][$z2][$n];
          @ar7 = split/\./, $hl[$pt1][3];
          if ($small >= $ar7[0]) { $smallpt = $n; $small = $ar7[0]; }
        }
        $srar[1][$z2][$k] = $trar[1][$z2][$smallpt];
        $trar[1][$z2][$smallpt] = "null";
      }
    }
#sort all sets (D+M+S) together
    for ($y=0; $y!=($dloct[$i]+1); $y++) { $tarar[0][$y] = $dlo[$i][$y]; }
    $arar[0][0] = "null";
    for ($z=0; $z!=($dloct[$i]+1); $z++) {
      $big = -1000;
      for ($y=0; $y!=($dloct[$i]+1); $y++) {
        next if ($tarar[0][$y] eq "null");
        @ar8a = split/\./, $hl[$tarar[0][$y]][3];
        if ($ar8a[1] >= $big) { $bigpt = $y; $big = $ar8a[1]; }
      }
      $arar[0][$z] = $tarar[0][$bigpt];
      $tarar[0][$bigpt] = "null";
    }
    for ($y=0; $y!=($dloct[$i+1]+1); $y++) { $tarar[1][$y] = $dlo[$i+1][$y]; }
    $arar[1][0] = "null";
    for ($z=0; $z!=($dloct[$i+1]+1); $z++) {
      $small = 1000000000000;
      for ($y=0; $y!=($dloct[$i+1]+1); $y++) {
        next if ($tarar[1][$y] eq "null");
        @ar8b = split/\./, $hl[$tarar[1][$y]][3];
        if ($small >= $ar8b[0]) { $smallpt = $y; $small = $ar8b[0]; }
      }
      $arar[1][$z] = $tarar[1][$smallpt];
      $tarar[1][$smallpt] = "null";
    }
    @ptar1 = split/\./, $hl[$arar[1][$dloct[$i+1]]][3];
    $def_n[$i+1] = $ptar1[0];
    @ptar2 = split/\./, $hl[$arar[0][$dloct[$i]]][3];
    $def_c[$i] = $ptar2[1];
#determine internal boundaries
    $wt = 0;
    $prevpri[0] = -1;
    $prevpri[1] = -1;
    while ($wt == 0) {
      for ($j=0; $j!=3; $j++) { if ($rct[0][$j] != -1 && $j > $prevpri[0]) { $newpri[0] = $j; last; } }
      for ($j=0; $j!=3; $j++) { if ($rct[1][$j] != -1 && $j > $prevpri[1]) { $newpri[1] = $j; last; } }
      if ($newpri[0] == $prevpri[0] | $newpri[1] == $prevpri[1]) { $wt = 1; }
      else {
        if ($newpri[0] == $newpri[1]) {
#these 2 domains have same D/M/S priority; only run if D-D or M-M
          if ($newpri[0] != 2) {
            $iwt = 0; $x = 0; $y = 0;
            $xmax = $rct[0][$newpri[0]];
            $ymax = $rct[1][$newpri[1]];
            while ($iwt == 0) {
              $pt1 = $srar[0][$newpri[0]][$x];
              @ar9a = split/\./, $hl[$pt1][3];
              $thisdom1_C = $ar9a[1];
              $xct = 0;
              $pt2 = $srar[1][$newpri[1]][$y];
              @ar9b = split/\./, $hl[$pt2][3];
              $thisdom2_N = $ar9b[0];
              $yct = 0;
              for ($k=$y; $k!=($ymax+1); $k++) {
                $pt = $srar[1][$newpri[1]][$k];
                @ar9c = split/\./, $hl[$pt][3];
                if ($thisdom1_C >= ($ar9c[0]+$cushion)) { $xct += 1; }
              }
              for ($k=$x; $k!=($xmax+1); $k++) {
                $pt = $srar[0][$newpri[0]][$k];
                @ar9d = split/\./, $hl[$pt][3];
                if (($ar9d[1]-$cushion) >= $thisdom2_N) { $yct += 1; }
              }
              if ($xct == 0 && $yct == 0) {
                $n_dom[$i+1] = $thisdom2_N;
                $c_dom[$i] = $thisdom1_C;
                $iwt = 1;
                $wt = 1;
              }
              else {
                if ($x == $xmax && $y == $ymax) { $iwt = 1; }
                else {
                  if ($x == $xmax) { $y += 1; }
                  elsif ($y == $ymax) { $x += 1; }
                  else {
                    if ($xct > $yct) { $x += 1; }
                    if ($yct > $xct) { $y += 1; }
                    if ($xct == $yct) { $x += 1; $y += 1; }
                  }
                }
              }
            }
          }
          $prevpri[0] = $newpri[0];
          $prevpri[1] = $newpri[1];
        }
        else {
#domains have different D/M/S priority
          if ($newpri[0] < $newpri[1]) {
            $xmax = $rct[0][$newpri[0]];
            $ymax = $rct[1][$newpri[1]];
            XP1A: for ($k=0; $k!=($xmax+1); $k++) {
              $pt1 = $srar[0][$newpri[0]][$k];
              @ar10a = split/\./, $hl[$pt1][3];
              $thisdom1_C = $ar10a[1];
              XP1B: for ($m=0; $m!=($ymax+1); $m++) {
                $pt2 = $srar[1][$newpri[1]][$m];
                @ar10b = split/\./, $hl[$pt2][3];
                next XP1B if ($thisdom1_C >= ($ar10b[0]+$cushion));
                $n_dom[$i+1] = $ar10b[0];
                $c_dom[$i] = $thisdom1_C;
                last XP1A;
              }
            }
          }
          if ($newpri[1] < $newpri[0]) {
            $xmax = $rct[0][$newpri[0]];
            $ymax = $rct[1][$newpri[1]];
            XP1C: for ($k=0; $k!=($ymax+1); $k++) {
              $pt1 = $srar[1][$newpri[1]][$k];
              @ar10c = split/\./, $hl[$pt1][3];
              $thisdom2_N = $ar10c[0];
              XP1D: for ($m=0; $m!=($ymax+1); $m++) {
                $pt2 = $srar[0][$newpri[0]][$m];
                @ar10d = split/\./, $hl[$pt2][3];
                next XP1D if (($ar10d[1]-$cushion) >= $thisdom2_N);
                $n_dom[$i+1] = $thisdom2_N;
                $c_dom[$i] = $ar10d[1];
                last XP1C;
              }
            }
          }
          $wt = 1;
        }
      }
    }
#check if separating hits by D/M/S priority failed to determine domain boundaries
#ignore D/M/S priority and use all hits to determine longest ranges that do not overlap past cushion
    if ($n_dom[$i+1] eq "" | $c_dom[$i] eq "") {
      $iwt = 0; $x = 0; $y = 0;
      $xmax = $dloct[$i];
      $ymax = $dloct[$i+1];
      while ($iwt == 0) {
        $pt1 = $arar[0][$x];
        @ar11a = split/\./, $hl[$pt1][3];
        $thisdom1_C = $ar11a[1];
        $xct = 0;
        $pt2 = $arar[1][$y];
        @ar11b = split/\./, $hl[$pt2][3];
        $thisdom2_N = $ar11b[0];
        $yct = 0;
        for ($k=$y; $k!=($ymax+1); $k++) {
          $pt = $arar[1][$k];
          @ar11c = split/\./, $hl[$pt][3];
          if ($thisdom1_C >= ($ar11c[0]+$cushion)) { $xct += 1; }
        }
        for ($k=$x; $k!=($xmax+1); $k++) {
          $pt = $arar[0][$k];
          @ar11d = split/\./, $hl[$pt][3];
          if (($ar11d[1]-$cushion) >= $thisdom2_N) { $yct += 1; }
        }
        if ($xct == 0 && $yct == 0) {
          $n_dom[$i+1] = $thisdom2_N;
          $c_dom[$i] = $thisdom1_C;
          $iwt = 1;
        }
        else {
          if ($x == $xmax && $y == $ymax) { $iwt = 1; }
          else {
            if ($x == $xmax) { $y += 1; }
            elsif ($y == $ymax) { $x += 1; }
            else {
              if ($xct > $yct) { $x += 1; }
              if ($yct > $xct) { $y += 1; }
              if ($xct == $yct) { $x += 1; $y += 1; }
            }
          }
        }
      }
    }
  }
#determine confidence of each domain assignment
  $unres = 0; @predom = ();
  for ($k=0; $k!=($linkict+1); $k++) {
    @linkar3 = split/ /, $linkinf[$k];
    $sfid = $hl[$dlo[$linkar3[0]][0]][1];
    $posrange = "";
    for ($j=0; $j!=($#linkar3+1); $j++) {
      if ($n_dom[$linkar3[$j]] ne "" && $c_dom[$linkar3[$j]] ne "") {
        if ($j==0) { $posrange = "$n_dom[$linkar3[$j]].$c_dom[$linkar3[$j]]"; }
        else { $posrange .= ",$n_dom[$linkar3[$j]].$c_dom[$linkar3[$j]]"; }
      }
      else { $unres = 1; }
    }
    $stepsabove = ""; $stepsbelow = ""; $abovect = 0; $belowct = 0;
    for ($i=0; $i!=($#allhitnr+1); $i++) {
      next unless ($hl[$i][0] ne "null" && $hl[$i][1] eq $sfid);
      ($ov1, $ov2) = SUB_get_ov ( "$posrange", "$hl[$i][3]" );
      if ($ov1 >= 0.75 && $ov2 >= 0.75) {
        if ($hl[$i][7] eq "Z" | $hl[$i][7] eq "E") { if ($stepsabove !~ /$hl[$i][2]/) { $stepsabove .= "$hl[$i][2]"; } }
        else { if ($stepsbelow !~ /$hl[$i][2]/) { $stepsbelow .= "$hl[$i][2]"; } }
      }
    }
    for ($i=0; $i!=10; $i++) { last if (substr($stepsabove, $i, 1) eq ""); $abovect++; }
    for ($i=0; $i!=10; $i++) { last if (substr($stepsbelow, $i, 1) eq ""); $belowct++; }
    if ($abovect > 1 | $stepsabove =~ /1/) { $domconf = 4; }
    else {
      if ($abovect == 1) { $domconf = 3; }
      else {
        if ($belowct > 1) { $domconf = 2; }
        else { $domconf = 1; }
      }
    }
    push @predom, "$rep\t$posrange\t$sfid\t$domconf";
  }
  if ($unres == 1) { push @unres_rep, "$rep"; }
  else { for ($i=0; $i!=($#predom+1); $i++) { push @dom_assi, "$predom[$i]"; } }
}
@allhitinf = (); @allhitnr = (); @allhituse = (); @skiphit = (); @hl = ();
@step_c1 = (); @step_c2 = (); @chcf_c1 = (); @chcf_c2 = (); @tossar = ();
@sfa = (); @sfect = (); @dl = (); @dlct = (); @dlo = (); @dloct = (); @trackdl = ();
@linkdomp = (); @linkdom = (); @linkinf = (); @n_dom = (); @c_dom = (); @def_n = (); @def_c = ();
@zar = (); @rct = (); @rar = (); @arar = (); @tarar = (); @srar = (); @trar = (); @ptar1 = (); @ptar2 = ();
@predom = ();
$t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";


##########
#
# make outputs for each chain...


open(BCIN, "pdb_$date/blstclst.fa.bc");
@bcin = <BCIN>;
open(QIN, "query_$date");
@qin = <QIN>;
$start1 = 0;
$oc = 0;
foreach $clustline (@bcin) {
  $t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";
  if ($start1 == 1) {
    if ($clustline =~ /\!\^\(NEW/) { close(OUT); last; }
    chomp $clustline;
    @ar0 = split/ /, $clustline;
    $head = $ar0[0];
    $start2 = 0;
    foreach $qline (@qin) {
      if ($start2 == 1) {
        last if ($qline =~ /\!\^\(NEW/);
        next unless ($qline =~ /^$head/);
        $protname = $qline;
        $hprotname = $qline;
        last;
      }
      else { if ($qline =~ /\!\^\(NEW\_FILE\)\: protlist.$date/) { $start2 = 1; } }
    }
    if (-e "$head.chainlog" == 0) { open(OUT, ">$head.chainlog"); $xc = 0; }
    else { $xc = 1; }
    print OUT "$protname\nGapped BLAST results:\n";
    for ($i=0; $i!=($step1ct+1); $i++) {
      next unless ($step1[$i][0] =~ /^$head/);
#      for ($j=0; $j!=($#{$step1[$i]}+1); $j++) { print OUT "$step1[$i][$j]"; }
      for ($j=0; $j!=($#{$step1[$i]}+1); $j++) {
        @star = split/\t/, $step1[$i][$j];
        foreach $t (@star) { if ($t eq "") { print OUT "x"; } else { print OUT "$t"; } if ($t ne $star[$#star]) { print OUT "\t"; } }
      }
    }
    print OUT "\nResults from steps 2-8:\n";
    for ($i=0; $i!=($step2ct+1); $i++) {
      next unless ($step2[$i][0] =~ /^$head/);
#      for ($j=0; $j!=($#{$step2[$i]}+1); $j++) { print OUT "$step2[$i][$j]"; }
      for ($j=0; $j!=($#{$step2[$i]}+1); $j++) {
        @star = split/\t/, $step2[$i][$j];
        foreach $t (@star) { if ($t eq "") { print OUT "x"; } else { print OUT "$t"; } if ($t ne $star[$#star]) { print OUT "\t"; } }
      }
    }
    for ($i=0; $i!=($step3ct+1); $i++) {
      next unless ($step3[$i][0] =~ /^$head/);
#      for ($j=0; $j!=($#{$step3[$i]}+1); $j++) { print OUT "$step3[$i][$j]"; }
      for ($j=0; $j!=($#{$step3[$i]}+1); $j++) {
        @star = split/\t/, $step3[$i][$j];
        foreach $t (@star) { if ($t eq "") { print OUT "x"; } else { print OUT "$t"; } if ($t ne $star[$#star]) { print OUT "\t"; } }
      }
    }
    for ($i=0; $i!=($step5ct+1); $i++) {
      next unless ($step5[$i][0] =~ /^$head/);
#      for ($j=0; $j!=($#{$step5[$i]}+1); $j++) { print OUT "$step5[$i][$j]"; }
      for ($j=0; $j!=($#{$step5[$i]}+1); $j++) {
        @star = split/\t/, $step5[$i][$j];
        foreach $t (@star) { if ($t eq "") { print OUT "x"; } else { print OUT "$t"; } if ($t ne $star[$#star]) { print OUT "\t"; } }
      }
    }
    for ($i=0; $i!=($step6mct+1); $i++) {
      next unless ($step6m[$i][0] =~ /^$head/);
#      for ($j=0; $j!=($#{$step6m[$i]}+1); $j++) { print OUT "$step6m[$i][$j]"; }
      for ($j=0; $j!=($#{$step6m[$i]}+1); $j++) {
        @star = split/\t/, $step6m[$i][$j];
        foreach $t (@star) { if ($t eq "") { print OUT "x"; } else { print OUT "$t"; } if ($t ne $star[$#star]) { print OUT "\t"; } }
      }
    }
    for ($i=0; $i!=($step6dct+1); $i++) {
      next unless ($step6d[$i][0] =~ /^$head/);
#      for ($j=0; $j!=($#{$step6d[$i]}+1); $j++) { print OUT "$step6d[$i][$j]"; }
      for ($j=0; $j!=($#{$step6d[$i]}+1); $j++) {
        @star = split/\t/, $step6d[$i][$j];
        foreach $t (@star) { if ($t eq "") { print OUT "x"; } else { print OUT "$t"; } if ($t ne $star[$#star]) { print OUT "\t"; } }
      }
    }
    for ($i=0; $i!=($step7ct+1); $i++) {
      next unless ($step7[$i][0] =~ /^$head/);
#      for ($j=0; $j!=($#{$step7[$i]}+1); $j++) { print OUT "$step7[$i][$j]"; }
      for ($j=0; $j!=($#{$step7[$i]}+1); $j++) {
        @star = split/\t/, $step7[$i][$j];
        foreach $t (@star) { if ($t eq "") { print OUT "x"; } else { print OUT "$t"; } if ($t ne $star[$#star]) { print OUT "\t"; } }
      }
    }
    for ($i=0; $i!=($step8ct+1); $i++) {
      next unless ($step8[$i][0] =~ /^$head/);
#      for ($j=0; $j!=($#{$step8[$i]}+1); $j++) { print OUT "$step8[$i][$j]"; }
      for ($j=0; $j!=($#{$step8[$i]}+1); $j++) {
        @star = split/\t/, $step8[$i][$j];
        foreach $t (@star) { if ($t eq "") { print OUT "x"; } else { print OUT "$t"; } if ($t ne $star[$#star]) { print OUT "\t"; } }
      }
    }
    for ($i=0; $i!=($step9ct+1); $i++) {
      next unless ($step9[$i][0] =~ /^$head/);
#      for ($j=0; $j!=($#{$step9[$i]}+1); $j++) { print OUT "$step9[$i][$j]"; }
      for ($j=0; $j!=($#{$step9[$i]}+1); $j++) {
        @star = split/\t/, $step9[$i][$j];
        foreach $t (@star) { if ($t eq "") { print OUT "x"; } else { print OUT "$t"; } if ($t ne $star[$#star]) { print OUT "\t"; } }
      }
    }
    open(RIN, "$head.combined");
    @rin = <RIN>;
    print OUT "\nRemoved hits:\n";
    $start3 = 0;
    foreach $rline (@rin) {
      if ($start3 == 1) {
        last if ($rline =~ /\!\^\(NEW/);
        @lar1 = split/\t/, $rline;
        @lar2 = split/ /, $lar1[$#lar1-1];
        next if ($lar1[7] !~ /^$lar1[6]/);
        if ($lar1[4] == 1) { print OUT "$step1[$lar2[0]][$lar2[1]]"; }
        if ($lar1[4] == 2) { print OUT "$step2[$lar2[0]][$lar2[1]]"; }
        if ($lar1[4] == 3) { print OUT "$step3[$lar2[0]][$lar2[1]]"; }
        if ($lar1[4] == 5) { print OUT "$step5[$lar2[0]][$lar2[1]]"; }
        if ($lar1[4] == 6 && $lar1[5] eq "M") { print OUT "$step6m[$lar2[0]][$lar2[1]]"; }
        if ($lar1[4] == 6 && $lar1[5] eq "D") { print OUT "$step6d[$lar2[0]][$lar2[1]]"; }
        if ($lar1[4] == 7) { print OUT "$step7[$lar2[0]][$lar2[1]]"; }
        if ($lar1[4] == 8) { print OUT "$step8[$lar2[0]][$lar2[1]]"; }
        if ($lar1[4] == 9) { print OUT "$step9[$lar2[0]][$lar2[1]]"; }
      }
      else { if ($rline =~ /\!\^\(NEW\_FILE\): $head.outinf_removed/) { $start3 = 1; } }
    }
    close(RIN); @rin = ();
    print OUT "\nResults:\n";
#output assignments made at superfamily level (to head)
    @hitreg = ();
    DA1: for ($i=0; $i!=($#dom_assi+1); $i++) {
      next unless ($dom_assi[$i] =~ /^$head/);
      @sfar0 = split/\t/, $dom_assi[$i];
      $sfid = $sfar0[2]; $domconf = $sfar0[3];
      @ar1 = split/\,/, $sfar0[1];
      for ($j=0; $j!=($#ar1+1); $j++) {
        @ar1b = split/\./, $ar1[$j];
        @chk1 = split/\./, $ar1[$j];
        if ($chk1[0] >= $chk1[1]) {
          @tmpda = ();
          for ($z=0; $z!=($#dom_assi+1); $z++) {
            next if ($z == $i);
            push @tmpda, $dom_assi[$z];
          }
          @dom_assi = @tmpda;
          $i--;
          next DA1;
        }
        for ($m=$ar1b[0]; $m!=($ar1b[1]+1); $m++) { push @hitreg, "$m"; }
        $resrange = SUB_convert_range_pos_to_res ( "new_domain_str/$head.ca", "$ar1[$j]" );
        if ($j==0) { $rr = $resrange; }
        else { $rr .= ",$resrange"; }
      }
      $sfaminfo = SUB_convert_from_sfamid ( "$sfid" );
      print OUT "$head,$rr\t[ $sfaminfo  ($sfid) ]\t[conf: $domconf]\n";
    }
    if ($#hitreg == -1) {
      foreach $rp (@unres_rep) { 
        if ($rp eq $head) {
          print OUT "Unresolved superfamily choice!!! Manually assess output of individual steps to determine most appropriate assignment!!!\n";
          SUB_make_insight_logs ( "$head", "unresolved" );
          print OUT "See $head.unresolved.inslog.tar for insight logs of representative superfamily domains superimposed with the query structure.\n";
          last;
        }
      }
    }
#find unmapped regions more than 20 residues in length (in head)
    $wholeheadrangeres = SUB_get_wholerange ( "new_domain_str/$head.ca" );
    $wholeheadrangepos = SUB_convert_range_res_to_pos ( "new_domain_str/$head.ca", "$wholeheadrangeres" );
    @ar1 = split/\./, $wholeheadrangepos;
    $headnpos = $ar1[0]; $headcpos = $ar1[1];
    @posmiss = ();
    $prev = 0; $thisn = "null"; $thisc = "null";
    for ($i=$headnpos; $i!=($headcpos+1); $i++) {
      $f = 0;
      foreach $pos (@hitreg) { if ($i == $pos) { $f = 1; } }
      if ( ($f==0 && $prev==1) | ($f==0 && $i==1) ) { $thisn = $i; }
      elsif ($f==0 && $prev==0) { 
        $thisc = $i;
        if ($i==$headcpos) { $sizemiss=$thisc-$thisn+1; if ($sizemiss > 20) { push @posmiss, "$thisn.$thisc"; } }
      }
      elsif ($f==1 && $prev==0) { $thisc = $i-1; $sizemiss=$thisc-$thisn+1; if ($sizemiss > 20) { push @posmiss, "$thisn.$thisc"; } }
      if ($f==0) { $prev=0; }
      else { $prev=1; }
    }
    if ($#posmiss != -1) {
      print OUT "\nRegions not mapped (>20 residues):\n";
      foreach $r (@posmiss) {
        $rr = SUB_convert_range_pos_to_res ( "new_domain_str/$head.ca", "$r" );
        print OUT "$head,$rr\n";
      }
    }
#find fold level assignments to unmapped regions (in head):  best hit (Z>10) to region that covers at least 50% of corresponding scop domain (allowed overlap = 10)
    @foldassi = ();
    open(MMIN, "$head.mammoth2");
    @mmin = <MMIN>;
    $start4 = 0;
    $havefoldhit = 0;
    $formakeins_fold = "";
    for ($i=0; $i!=($#mmin+1); $i++) {
      if ($start4 == 1) {
        last if ($mmin[$i] =~ /\!\^\(NEW/);
        @mar1 = split/\t/, $mmin[$i];
        $qrangepos = SUB_convert_range_res_to_pos ( "new_domain_str/$head.ca", "$mar1[1]" );
        @mar1b = split/\./, $qrangepos;
        $ovct = 0;
        for ($j=$mar1b[0]; $j!=($mar1b[1]+1); $j++) { foreach $pos (@hitreg) { if ($pos==$j) { $ovct++; last; } } }
        next if ($ovct > 10);
        $srangeposhit = SUB_convert_range_res_to_pos ( "$path_str/$mar1[4]", "$mar1[6]" );
        $srangeposwhole = SUB_convert_range_res_to_pos ( "$path_str/$mar1[4]", "$mar1[8]" );
        ($ov1, $ov2) = SUB_get_ov ( "$srangeposhit", "$srangeposwhole" );
        next if ($ov2 < 0.5);
        if ($havefoldhit==0) { print OUT "\nBest fold level hits to unmapped regions:\n"; }
        print OUT "$mmin[$i]";
        @mar1c = split/\./, $mar1[5];
        push @foldassi, "$qrangepos\t$mar1[1]\t$mar1c[0].$mar1c[1].$mar1c[2]";
        push @dom_assi_fold, "$head\t$qrangepos\t$mar1c[0].$mar1c[1].$mar1c[2]\t0";
        $formakeins_fold .= "$mar1[4] $mar1c[0].$mar1c[1].$mar1c[2]\t";
        for ($j=$mar1b[0]; $j!=($mar1b[1]+1); $j++) {
          $f = 0;
          foreach $pos (@hitreg) { if ($pos==$j) { $f=1; last; } }
          if ($f==0) { push @hitreg, "$j"; }
        }
        $havefoldhit++;
      }
      else { if ($mmin[$i] =~ /\!\^\(NEW\_FILE\)\: $head.foldlevel/) { $start4 = 1; } }
    }
    close(MMIN); @mmin = ();
    if ($#foldassi != -1) {
      print OUT "\nFold level assignments:\n";
      foreach $fa (@foldassi) {
        @mar2 = split/\t/, $fa;
        $sfaminfo = SUB_convert_from_sfamid ( "$mar2[2]" );
        @mar3a = split/\, SF\: /, $sfaminfo;
        @mar3b = split/\./, $mar2[2];
        print OUT "$head,$mar2[1]\t[ $mar3a[0]  ($mar3b[0].$mar3b[1]) ]\n";
      }
      SUB_make_insight_logs ( "$head", "$formakeins_fold" );
      print OUT "See $head.fold.inslog.tar for insight logs of these fold hits superimposed with the query structure.\n";
    }
#output list of non-head queries in blastclust cluster
    print OUT "\nUsed as head for: ";
    for ($i=1; $i!=($#ar0+1); $i++) { print OUT "$ar0[$i] "; }
    print OUT "\n";
#output assignment made at superfamily level (to non-heads)
    for ($n=1; $n!=($#ar0+1); $n++) {
      SUB_other_new_cut ( $ar0[$n] );
      $start2 = 0;
      foreach $qline (@qin) {
        if ($start2 == 1) {
          last if ($qline =~ /\!\^\(NEW/);
          next unless ($qline =~ /^$ar0[$n]/);
          $protname = $qline;
          last;
        }
        else { if ($qline =~ /\!\^\(NEW\_FILE\)\: protlist.$date/) { $start2 = 1; } }
      }
      print OUT "\n$protname";
      for ($p=0; $p!=($#dom_assi+1); $p++) {
        next unless ($dom_assi[$p] =~ /^$head/);
        @sfar0 = split/\t/, $dom_assi[$p];
        $sfid = $sfar0[2]; $domconf = $sfar0[3];
        @ar1 = split/\,/, $sfar0[1];
        for ($m=0; $m!=($#ar1+1); $m++) {
          $posrange = SUB_compare_to_head ( $head, $ar1[$m], $ar0[$n] );
          if ($posrange eq "error") { $resrange = "error"; }
          else { $resrange = SUB_convert_range_pos_to_res ( "new_domain_str/$ar0[$n].ca", "$posrange" ); }
          if ($m==0) { $rr = "$resrange"; }
          else { $rr .= ",$resrange"; }
        }
        $sfaminfo = SUB_convert_from_sfamid ( "$sfid" );
        print OUT "sfam assignment:$ar0[$n],$rr\t[ $sfaminfo  ($sfid) ]\t[conf: $domconf]\n";
      }
#find unmapped regions >20 residues in length (in non-heads)
      if ($#posmiss != -1) {
        foreach $r (@posmiss) {
          $posrange = SUB_compare_to_head ( $head, $r, $ar0[$n] );
          $rr = SUB_convert_range_pos_to_res ( "new_domain_str/$ar0[$n].ca", "$posrange" );
          print OUT "unmapped region:$ar0[$n],$rr\n";
        }
      }
#make fold level assignments to non-heads
      if ($#foldassi != -1) {
        foreach $fa (@foldassi) {
          @mar2 = split/\t/, $fa;
          $posrange = SUB_compare_to_head ( $head, $mar2[0], $ar0[$n] );
          $rr = SUB_convert_range_pos_to_res ( "new_domain_str/$ar0[$n].ca", "$posrange" );
          $sfaminfo = SUB_convert_from_sfamid ( "$mar2[2]" );
          @mar3a = split/\, SF\: /, $sfaminfo;
          @mar3b = split/\./, $mar2[2];
          print OUT "fold assignment:$ar0[$n],$rr\t[ $mar3a[0]  ($mar3b[0].$mar3b[1]) ]\n";
        }
      }
    }
#update unmapped regions (@posmiss to @posmiss2) with fold level assignments
    @posmiss2 = (); @hitreg2 = ();
    @tmp1 = @hitreg;
    for ($i=0; $i!=($#hitreg+1); $i++) {
      $small = 99999999999; $pt = -1;
      for ($j=0; $j!=($#hitreg+1); $j++) {
        next if ($tmp1[$j] eq "null");
        if ($tmp1[$j] < $small) { $small = $tmp1[$j]; $pt = $j; }
      }
      last if ($pt == -1);
      push @hitreg2, "$hitreg[$pt]";
      $tmp1[$pt] = "null";
    }
    $prev = 0; $thisn = "null"; $thisc = "null";
    for ($i=$headnpos; $i!=($headcpos+1); $i++) {
      $f = 0;
      foreach $pos (@hitreg2) { if ($i == $pos) { $f = 1; } }
      if ( ($f==0 && $prev==1) | ($f==0 && $i==1) ) { $thisn = $i; }
      elsif ($f==0 && $prev==0) {
        $thisc = $i;
        if ($i==$headcpos) { $sizemiss=$thisc-$thisn+1; if ($sizemiss > 20) { push @posmiss2, "$thisn.$thisc"; } }
      }
      elsif ($f==1 && $prev==0) { $thisc = $i-1; $sizemiss=$thisc-$thisn+1; if ($sizemiss > 20) { push @posmiss2, "$thisn.$thisc"; } }
      if ($f==0) { $prev=0; }
      else { $prev=1; }
    }
    foreach $r (@posmiss2) { push @dom_assi_unmap, "$head\t$r\tunmapped region\t0"; }
#get information for making ~.ps
    $unresmarker = "";
    chomp $hprotname;
    foreach $rp (@unres_rep) { next unless ($rp =~ /$head/); $unresmarker = "unres"; last; }
    for ($i=0; $i!=($#dom_assi+1); $i++) {
      next unless ($dom_assi[$i] =~ /^$head/);
      @sfar0 = split/\t/, $dom_assi[$i];
      push @forps, "$hprotname\t$sfar0[1]\t$sfar0[3]\t$sfar0[2]\tassi";
    }
    for ($i=0; $i!=($#dom_assi_fold+1); $i++) {
      next unless ($dom_assi_fold[$i] =~ /^$head/);
      @sfar0 = split/\t/, $dom_assi_fold[$i];
      push @forps, "$hprotname\t$sfar0[1]\t0\t$sfar0[2]\tunmapfold$unresmarker";
    }
    for ($i=0; $i!=($#dom_assi_unmap+1); $i++) {
      next unless ($dom_assi_unmap[$i] =~ /^$head/);
      @sfar0 = split/\t/, $dom_assi_unmap[$i];
      push @forps, "$hprotname\t$sfar0[1]\t0\tX\tunmap$unresmarker";
    }
    next if ($xc == 1);
#for any query with region >20 residues unmapped (at superfamily level!), find fragment hits (steps 2,3,5), 
#other dali hits (z>0), and potential repeats identified via COMPASS
    if ($#posmiss != -1) {
      $incmpout = ""; $potrepeatout = ""; $otherdaliout = "";
      open(BL2, "$head.blast2");
      @bl2 = <BL2>;
      for ($i=2; $i!=4; $i++) {
        $start5 = 0;
        foreach $bline (@bl2) {
          if ($start5 == 1) {
            last if ($bline =~ /\!\^\(NEW/);
            $incmpout .= "$bline";
          }
          else { if ($bline =~ /\!\^\(NEW\_FILE\)\: $head.incmp$i/) { $start5 = 1; } }
        }
      }
      close(BL2); @bl2 = ();
      open(C2, "$head.compass2");
      @c2 = <C2>;
      $start5 = 0;
      foreach $bline (@c2) {
        if ($start5 == 1) {
          last if ($bline =~ /\!\^\(NEW/);
          $incmpout .= "$bline";
        }
        else { if ($bline =~ /\!\^\(NEW\_FILE\)\: $head.fraghit/) { $start5 = 1; } }
      }
      $start5 = 0;
      foreach $bline (@c2) {
        if ($start5 == 1) {
          last if ($bline =~ /\!\^\(NEW/);
          $potrepeatout .= "$bline";
        }
        else { if ($bline =~ /\!\^\(NEW\_FILE\)\: $head.rephit/) { $start5 = 1; } }
      }
      close(C2); @c2 = ();
      open(D2, "$head.dali2");
      @d2 = <D2>;
      $start5 = 0;
      foreach $bline (@d2) {
        if ($start5 == 1) {
          last if ($bline =~ /\!\^\(NEW/);
          $otherdaliout .= "$bline";
        }
        else { if ($bline =~ /\!\^\(NEW\_FILE\)\: $head.otherdali/) { $start5 = 1; } }
      }
      close(D2); @d2 = ();
      if ($incmpout ne "") { print OUT "\nSequence method hits to SCOP superfamilies (good E-value, inadequate coverage):\n$incmpout"; }
      if ($otherdaliout ne "") { print OUT "\nOther potential hits to head found by dali:\n$otherdaliout"; }
      if ($potrepeatout ne "") { print OUT "\nHit to repeat superfamily (found by compass step):\n$potrepeatout"; }
    }
#if this is a "new week" run (or if user has other use for more "close hit" outputs), find the top 5 hit by each
#comparison tool (BLAST, RPS-BLAST, PSI-BLAST, COMPASS, MAMMOTH, DALI) to each unmapped region (>20 residues)
    if ($extraoutputoption eq "yes") {
      $urct = 0;
      foreach $uqp_range_pos (@posmiss) { 
        $urct++;
        $rr = SUB_convert_range_pos_to_res ( "new_domain_str/$head.ca", $uqp_range_pos );
        print OUT "\n"; for ($z=0; $z!=101; $z++) { print OUT "-"; } print OUT "\n";
        print OUT "\nUNMAPPED REGION $urct: $head,$rr \[$uqp_range_pos\]\n\nTop 5 DaliLite hits to region $urct:\n";
        $top5_6d = SUB_rundali_rankunmap ( $head, $rr, $urct, $oc );
        if ($top5_6d eq "no dat file") { print OUT "Unmapped region $head.$urct dumped by DaliLite.\n"; }
        else { print OUT "$top5_6d"; $oc = 1; }
        $top5_1 = SUB_topseqhits ( $head, $uqp_range_pos, "pdb_$date/$head.blast", "$head-simple.br", $urct );
        print OUT "\n\nTop 5 gapped BLAST hits to region $urct (cover at least 25% of unmapped segment):\n";
        if ($top5_1 eq "") { print OUT "No hits to unmapped region $head.$urct with E-value < 100\n"; }
        else { print OUT "\n$top5_1"; }
        $top5_2 = SUB_topseqhits ( $head, $uqp_range_pos, "pdb_$date/$head.blast", "$head-rps.br", $urct );
        print OUT "\n\nTop 5 RPS-BLAST hits to region $urct (cover at least 25% of unmapped segment):\n";
        if ($top5_2 eq "") { print OUT "No hits to unmapped region $head.$urct with E-value < 100\n"; }
        else { print OUT "\n$top5_2"; }
        $top5_3 = SUB_topseqhits ( $head, $uqp_range_pos, "pdb_$date/$head.blast", "$head-SCOPd.br", $urct );
        print OUT "\n\nTop 5 PSI-BLAST hits to region $urct (cover at least 25% of unmapped segment):\n";
        if ($top5_3 eq "") { print OUT "No hits to unmapped region $head.$urct with E-value < 100\n"; }
        else { print OUT "\n$top5_3"; }
        $top5_5 = SUB_topcompasshits ( $head, $uqp_range_pos, $urct );
        print OUT "\n\nTop 5 COMPASS hits to region $urct (cover at least 25% of unmapped segment):\n\n$top5_5";
      }
    }
#for any query with region >20 residues unmapped (at superfamily level!), output hits from all methods sorted by scores
    if ($#posmiss != -1) {
      print OUT "\n"; for ($z=0; $z!=101; $z++) { print OUT "-"; } print OUT "\n";
#gapped BLAST hits with E-value < 10
      $fromgapped = SUB_outputbr ( "-simple.br", 10, $head );
      print OUT "\nGapped BLAST Alignments with E-value < 10.0:\n\n$fromgapped";
#RPS-BLAST hits with E-value < 10
      $fromrps = SUB_outputbr ( "-rps.br", 10, $head );
      print OUT "\nRPS-BLAST Alignments with E-value < 10.0:\n\n$fromrps";
#PSI-BLAST hits with E-value < 10
      $frompsi = SUB_outputbr ( "-SCOPd.br", 10, $head );
      print OUT "\nPSI-BLAST Alignments with E-value < 10.0:\n\n$frompsi";
#PSI-BLAST hits with E-value < 1e-5
      $fromcomp = SUB_compass_addon ( $head, "1e-5" );
      print OUT "\nCOMPASS Alignments with E-value < 1e-5:\n\n$fromcomp";
#MAMMOTH hits with Z-score > 3
      $frommammoth = SUB_mammoth_addon ( $head, "3" );
      print OUT "\nMAMMOTH Results with Z-score > 3:\n\n$frommammoth";
#MAMMOTH hits with Z-score > 0
      $fromdali = SUB_dali_addon ( $head, "0" );
      print OUT "\nDaliLite Results with Z-score > 0:\n\n$fromdali";
      print OUT "\n"; for ($z=0; $z!=101; $z++) { print OUT "-"; }
#PSI-BLAST multiple alignment
      print OUT "\n\nPSI-BLAST alignment for entire query chain $head:\n\n";
      open(PSIALN, "new_domain_aln/$head.br.aln");
      @psialn = <PSIALN>;
      foreach $pline (@psialn) { print OUT "$pline"; }
      close(PSIALN); @psialn = ();
      print OUT "\n";
    }
    close(OUT);
    open(TT, ">>tmptrack");
    print TT "$head.chainlog completed\n";
    close(TT);
  }
  else { if ($clustline =~ /\!\^\(NEW\_FILE\)\: $date.newpdb.list.bc/ && $clustline !~ /\.bc\.pre/) { $start1 = 1; } }
}
close(BCIN); @bcin = (); @hitreg = (); @hitreg2 = (); @posmiss = (); @posmiss2 = (); @foldassi = ();
if (-d "dali_output") { `rm -r -f dali_output`; }
$t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";

foreach $frag (@frag_list) {
  @far0 = split/ /, $frag;
  SUB_other_new_cut ( $far0[0] );
  $wholerange = SUB_get_wholerange ( "new_domain_str/$far0[0].ca" );
  $start2 = 0;
  foreach $qline (@qin) {
    if ($start2 == 1) {
      last if ($qline =~ /\!\^\(NEW/);
      next unless ($qline =~ /^$far0[$0]/);
      $protname = $qline;
      last;
    }
    else { if ($qline =~ /\!\^\(NEW\_FILE\)\: protlist.$date/) { $start2 = 1; } }
  }
  if (-e "$far0[0].chainlog" == 0) {  open(OUT, ">$far0[0].chainlog"); }
  print OUT "$protname\nGapped BLAST results:\n\nResults from steps 2-8:\n\nRemoved hits:\n\nResults:\n\n";
  print OUT "Unmapped regions:\n$far0[0],$wholerange\tMapping not attempted: $far0[1] residue fragment\n\n";
  print OUT "Regions not mapped:\n$far0[0],$wholerange\n\n";
  close(OUT);
  $r = SUB_convert_range_res_to_pos ( "new_domain_str/$far0[0].ca", "$wholerange" );
  push @dom_assi_unmap, "$far0[0]\t$r\t$far0[1] residue fragment\t0";
  chomp $protname;
  push @forps, "$protname\t$r\t0\t$far0[1] residue fragment\tunmapfrag";
}
close(QIN); @qin = ();

#append domain assignment lists to query_$date
open(QDO, ">>query_$date");
if ($#dom_assi != -1) {
  print QDO "!^(NEW_FILE): all_dom_assi_$date\n";
  for ($i=0; $i!=($#dom_assi+1); $i++) { print QDO "$dom_assi[$i]\n"; }
}
if ($#dom_assi_fold != -1) {
  print QDO "!^(NEW_FILE): all_dom_assi_fold_$date\n";
  for ($i=0; $i!=($#dom_assi_fold+1); $i++) { print QDO "$dom_assi_fold[$i]\n"; }
}
if ($#dom_assi_unmap != -1) {
  print QDO "!^(NEW_FILE): all_dom_assi_unmap_$date\n";
  for ($i=0; $i!=($#dom_assi_unmap+1); $i++) { print QDO "$dom_assi_unmap[$i]\n"; }
}
if ($#unres_rep != -1) {
  print QDO "!^(NEW_FILE): unres_rep_$date\n";
  for ($i=0; $i!=($#unres_rep+1); $i++) { print QDO "$unres_rep[$i]\n"; }
}
close(QDO);

#append domain assignment results by query rep to query_$date
open(QDO, ">>query_$date");
if ($#bc_rep != -1) { print QDO "!^(NEW_FILE): all_results_$date\n"; }
$start = 0;
foreach $rep (@bc_rep) {
  $h = 0;
  for ($i=0; $i!=($#dom_assi+1); $i++) {
    next unless ($dom_assi[$i] =~ /^$rep/);
    @ar1 = split/\t/, $dom_assi[$i];
    @ar2 = split/\,/, $ar1[1];
    for ($j=0; $j!=($#ar2+1); $j++) {
      $resr = SUB_convert_range_pos_to_res ( "new_domain_str/$rep.ca", "$ar2[$j]" );
      if ($j==0) { $resrange = $resr; }
      else { $resrange .= ",$resr"; }
    }
    $sfaminfo = SUB_convert_from_sfamid ( "$ar1[2]" );
    if ($h == 0) { print QDO "$rep assigned domains:\n"; $h=1; }
    print QDO "$resrange\t[ $sfaminfo ($ar1[2]) ]\t[conf: $ar1[3]]\n";
  }
  $h = 0;
  for ($i=0; $i!=($#dom_assi_fold+1); $i++) {
    next unless ($dom_assi_fold[$i] =~ /^$rep/);
    @ar1 = split/\t/, $dom_assi_fold[$i];
    @ar2 = split/\,/, $ar1[1];
    for ($j=0; $j!=($#ar2+1); $j++) {
      $resr = SUB_convert_range_pos_to_res ( "new_domain_str/$rep.ca", "$ar2[$j]" );
      if ($j==0) { $resrange = $resr; }
      else { $resrange .= ",$resr"; }
    }
    $sfaminfo = SUB_convert_from_sfamid ( "$ar1[2]" );
    @ar3a = split/\, SF\: /, $sfaminfo;
    @ar3b = split/\./, $ar1[2];
    if ($h == 0) { print QDO "$rep fold level assignments:\n"; $h=1; }
    print QDO "$resrange\t[ $ar3a[0] ($ar3b[0].$ar3b[1]) ]\t[conf: $ar1[3]]\n";
  }
  $h = 0;
  for ($i=0; $i!=($#dom_assi_unmap+1); $i++) {
    next unless ($dom_assi_unmap[$i] =~ /^$rep/);
    @ar1 = split/\t/, $dom_assi_unmap[$i];
    @ar2 = split/\,/, $ar1[1];
    for ($j=0; $j!=($#ar2+1); $j++) {
      $resr = SUB_convert_range_pos_to_res ( "new_domain_str/$rep.ca", "$ar2[$j]" );
      if ($j==0) { $resrange = $resr; }
      else { $resrange .= ",$resr"; }
    }
    if ($h == 0) { print QDO "$rep unmapped regions:\n"; $h=1; }
    print QDO "$resrange\n";
  }
  $this = "";
  for ($i=0; $i!=($#unres_rep+1); $i++) { next unless ($unres_rep[$i] =~ /$rep/); $this = $rep; last; }
  if ($this ne "") { print QDO "$rep domain assignment are unresolved at the superfamily level!\n"; }
  print QDO "\n";
}
close(QDO);
$t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";

#make outputs:  1) list of all domain pieces (assigned OR unassigned)
#               2) fasta-style for assigned at superfamily level

open(ASSILIST, ">$date.output.domainlist");
open(ASSIFA, ">$date.output.assigned.fa");
foreach $line (@forps) {
  chomp $line;
  @f1 = split/\t/, $line;
  @f2 = split/\,/, $f1[2];
  for ($i=0; $i!=($#f2+1); $i++) {
    $p = SUB_convert_range_pos_to_res ( "new_domain_str/$f1[0].ca", "$f2[$i]" );
    if ($i == 0) { $rr = $p; }
    else { $rr .= "\,$p"; }
  }
  if ($f1[$#f1] eq "assi") {
    $descr = "assigned sfam";
    $conf = "conf: $f1[3]";
    $map = $f1[4];
    $thisdomseq = SUB_get_domain_seq ( "$f1[0]", "$rr" );
    print ASSIFA ">$f1[0]\_$rr\_$map\_confidence\_$f1[3]\n$thisdomseq\n";
  }
  else {
    if ($f1[$#f1] =~ /fold/) {
      $descr = "assigned fold, unassigned sfam";
      if ($f1[$#f1] =~ /unres/) { $descr .= "\*"; }
      $conf = "conf: 0";
      @f3 = split/\./, $f1[4];
      $map = "$f3[0].$f3[1]";
    }
    elsif ($f1[$#f1] =~ /frag/) {
      $descr = "$f1[4]; mapping not attempted";
      $conf = "  X  "; $map = "   X   ";
    }
    else {
      $descr = "unassigned fold, unassigned sfam";
      if ($f1[$#f1] =~ /unres/) { $descr .= "\*"; }
      $conf = "  X  "; $map = "   X   ";
    }
  }
  print ASSILIST "$f1[0]\t$f1[1]\t$rr\t$map\t$conf\t$descr\n";
}
close(ASSILIST); close(ASSIFA);


print DATELOG "STATUS: finished with assignments, ready to start cleanup\n";
$t = localtime; print DATELOG "LAST_CHECK_TIME $t\n";
close(DATELOG);
} # END OF "re-start at choose sfam and make outputs"

#mv user outputs to OUTPUT
if (-d "OUTPUT" == 0 && -d "pdb_$date/OUTPUT" == 0) {
  `mkdir OUTPUT`;
  `mv *.chainlog OUTPUT`;
  `mv *.inslog.tar OUTPUT`;
  `mv $date.output.assigned.fa OUTPUT`;
  `mv $date.output.domainlist OUTPUT`;
}

#tar all files for each query rep into 1 file: $rep.allfiles.tar.bz2
$maxload = 5; $runct = -1; $loadct = 0; @pidlist = (); $finpid = "null";
for ($k=0; $k<$maxload; $k++) { $pidlist[$k] = 0; }
while ($#bc_rep > $runct) {
  if ($pid = fork) {
    for ($k=0; $k<$maxload; $k++) {
      if ($pidlist[$k] == 0) { $pidlist[$k] = $pid; }
      sleep 1; $loadct ++; $runct ++; last;
    }
  }
  elsif ($pid == 0) {
    $rep = $bc_rep[$runct];
    if (-e "$rep.allfiles.tar.bz2" == 0) {
      `mkdir dir_$rep`;
      `mv $rep.* dir_$rep`;
      `mv pdb_$date/$rep.* dir_$rep`;
      `tar cvf $rep.allfiles.tar dir_$rep/*`;
      `rm -r -f dir_$rep`;
      `bzip2 $rep.allfiles.tar`;
      `mv $rep.allfiles.tar.bz2 pdb_$date`;
    }
    exit;
  }
  if ($loadct == $maxload) {
    $finpid = wait;
    for ($k=0; $k<$maxload; $k++) { if ($pidlist[$k] == $finpid) { $pidlist[$k] = 0; $loadct --; last; } }
  }
}
while ($finpid != -1) { $finpid = wait; }

#clean up working directory
close(DATELOG);
`mv OUTPUT pdb_$date`;
`mkdir misc`;
`mv query_$date misc`;
`mv $date.log misc`;
`mv $date misc`;
`mv pdb_$date/seqali.pos-pos misc`;
`mv pdb_$date/blstclst.fa.bc misc`;
`mv pdb_$date/$date.chk.tar.bz2 misc`;
`mv output_int.tar.bz2 misc`;
`rm blosum62.qij blosum62.sij pdb_$date/.ncbirc`;
`mv misc pdb_$date`;
`tar cvf query_alndir.tar new_domain_aln/*`;
`bzip2 query_alndir.tar`;
`mv query_alndir.tar.bz2 pdb_$date`;
`tar cvf query_csvdir.tar new_domain_csv/*`;
`bzip2 query_csvdir.tar`;
`mv query_csvdir.tar.bz2 pdb_$date`;
`tar cvf query_strdir.tar new_domain_str/*`;
`bzip2 query_strdir.tar`;
`mv query_strdir.tar.bz2 pdb_$date`;
`rm tmptrack`;
`rm -r -f new_domain_aln new_domain_str new_domain_csv`;
if ($input_type eq "NEW") {
  `mv KEY_$date\_newstructure_pseudonyms pdb_$date/OUTPUT`;
  foreach $nline (@newstr_pseudo) {
    @ar1 = split/\t/, $nline;
    `mv SAVE.$ar1[1].SAVE $ar1[0].pdb`;
  }
}
`mv pdb_$date SM_$date`;

print "\n\n\nAll jobs completed.  Results for query list $date at SM_$date/OUTPUT.\n\n";


#-------------------------------------------------------------------------------------------------------------------
#   END OF MAIN SCRIPT
#-------------------------------------------------------------------------------------------------------------------


##########
#
# subroutine use: SUB_blstclst95 ( \@date_list );
# script: blstclst95.pl
# - check sequence length, discard the sequences whose length is less than or equal to $LENGTH_CUTOFF residues
# - do blastclust at 95% level for similarity and length
# - returns message to append to $date.log

sub SUB_blstclst95 {
  $LENGTH_CUTOFF = 20;
  if( -e "blstclst.fa" ){ system "rm blstclst.fa"; }
  $total_seq = 0;
  $lesslong_seq = 0;
  $retmess1 = "";
  foreach $pdbid (@date_list) {
    chomp $pdbid;
    $tmp = SUB_get_pdbseq_faout ( $pdbid );
    $chlist = "";
    if ($tmp =~ /ERROR/) { $retmess1 .= $tmp; }
    else {
      @tmp = split /\n/, $tmp;
      foreach $pdb_ch ( @tmp ){
        @fd = split / /, $pdb_ch;
        if ($chlist =~ /$fd[0]/) { next; }
        else { $chlist .= "$fd[0] "; }
        if( $fd[1] > 1 ){
          $total_seq++;
          if( $fd[1] <= $LENGTH_CUTOFF ){
            $lesslong_seq++;
            $retmess1 .= "$fd[0] length = $fd[1] (<=$LENGTH_CUTOFF). Do not consider.\n"; 
          }
          else { system "cat $fd[0].fa >> blstclst.fa"; }
        }
        else { system "rm -f $fd[0].fa"; }
      }
    }
  }
  $retmess1 .= "Total number of sequences for the pdb_list: $total_seq\nNumber of sequences whose length is <= $LENGTH_CUTOFF : $lesslong_seq\n\n";
  open( BC, "blstclst.fa" );
  open( BC1, ">blstclst1.fa" );
  while( <BC> ){
    chomp;
    if( /^>/ ){
      $chain = substr( $', 5, 1 );
      if( $chain =~ /[a-z]/ ){
        $chain1 = $_.'1';
        print BC1 "$chain1\n";
      }
      else{ print BC1 "$_\n"; }
    }
    else{ print BC1 "$_\n"; }
  }
  close (BC);
  close (BC1);
  system "blastclust -i blstclst1.fa -S 95 -l 0.95 -o $date.newpdb.list.bc.pre";
  return $retmess1;
}
### END OF SUB_blstclst95


##########
#
# subroutine use: SUB_get_pdbseq_faout ( $pdbid )
# script: get_pdbseq_faout.pl
# - gets atom-based sequences for each chain in query pdb file (output each to ~.fa)
# - returns pdb chain names and lengths or an ERROR message

sub SUB_get_pdbseq_faout {
  chomp $pdbid;
  $retmess2 = "";
  if (-e "$pdbid.pdb" == 0) {
    if ($input_type eq "NEW") { `cp $currdir/SAVE.$pdbid.SAVE $pdbid.pdb`; }
    else { `$path_bin/pdbcp.pl $pdbid`; }
  }
  if (-e "$pdbid.pdb" == 0) { $retmess2 .= "ERROR: Could not find $pdbid.\n"; }
  else {
    open(PDBIN, "$pdbid.pdb");
    @all_pdbin = <PDBIN>;
    $start = 1;
    $prev_chain = "null";
    $prev_resnum = "null";
    $bad_chain = "null";
    %all_chains = ();
    $len = 0;
    foreach $line (@all_pdbin) {
      last if ($line =~ /^ENDMDL/);
      next unless ( ($line =~ /^ATOM/ ) | ($line =~ /^HETATM/) );
      next unless (substr($line, 12, 4) eq " CA ");
      $chain = substr($line, 21, 1);
      if( $chain eq ' ' ) { $chain = '_'; }
      next if ($chain eq $bad_chain);
      if ($chain ne $prev_chain) {
        if ($start == 1) {
          if (-e "$pdbid\_$chain.fa" == 0) {
            open( OUT0, ">$pdbid\_$chain.fa" );
            print OUT0 ">$pdbid\_$chain\n";
          }
          $this_chain = $chain;
          $len = 0;
          $prev_chain = $chain;
          $all_chains{$chain} = 1;
          $start = 0;
        }
        else {
          foreach $ch (keys %all_chains){
            if($chain eq $ch ){
              $bad_chain = $chain;
              $prev_chain = $chain;
              next;
            }
          }
          print OUT0 "\n";
          close(OUT0);
          $retmess2 .= "$pdbid\_$this_chain $len\n";
          if (-e "$pdbid\_$chain.fa" == 0) {
            open( OUT0, ">$pdbid\_$chain.fa" );
            print OUT0 ">$pdbid\_$chain\n";
          }
          $this_chain = $chain;
          $len = 0;
          $prev_chain = $chain;
          $all_chains{$chain} = 1;
        }
      }
      $resnum = substr($line, 22, 6);
      if ($prev_resnum eq $resnum) { next if (substr($line, 56, 4) ne "1.00"); }
      $prev_resnum = $resnum;
      $letter3 = substr($line, 17, 3);
      $letter1 = SUB_convert_3letter_1letter ( $letter3 );
      print OUT0 "$letter1";
      $len ++;
    }
    print OUT0 "\n";
    close(OUT0);
    $retmess2 .= "$pdbid\_$this_chain $len\n";
    close(PDBIN); @all_pdbin = ();
    %all_chains = ();
  }
  return $retmess2;
}
### END OF SUB_get_pdbseq_faout


##########
#
# subroutine use: SUB_convert_3letter_1letter ( $letter3 );
# - converts 3-letter amino acid code to 1-letter code
# - returns 1-letter amino acid code

sub SUB_convert_3letter_1letter {
  $letter3 = @_[0];
  $letter1 = "X";
  if ($letter3 eq "ALA") { $letter1 = "A"; }
  elsif ($letter3 eq "CYS") { $letter1 = "C"; }
  elsif ($letter3 eq "ASP") { $letter1 = "D"; }
  elsif ($letter3 eq "GLU") { $letter1 = "E"; }
  elsif ($letter3 eq "PHE") { $letter1 = "F"; }
  elsif ($letter3 eq "GLY") { $letter1 = "G"; }
  elsif ($letter3 eq "HIS") { $letter1 = "H"; }
  elsif ($letter3 eq "ILE") { $letter1 = "I"; }
  elsif ($letter3 eq "LYS") { $letter1 = "K"; }
  elsif ($letter3 eq "LEU") { $letter1 = "L"; }
  elsif ($letter3 eq "MET" | $letter3 eq "MSE") { $letter1 = "M"; }
  elsif ($letter3 eq "ASN") { $letter1 = "N"; }
  elsif ($letter3 eq "PRO") { $letter1 = "P"; }
  elsif ($letter3 eq "GLN") { $letter1 = "Q"; }
  elsif ($letter3 eq "ARG") { $letter1 = "R"; }
  elsif ($letter3 eq "SER") { $letter1 = "S"; }
  elsif ($letter3 eq "THR") { $letter1 = "T"; }
  elsif ($letter3 eq "VAL") { $letter1 = "V"; }
  elsif ($letter3 eq "TRP") { $letter1 = "W"; }
  elsif ($letter3 eq "TYR") { $letter1 = "Y"; }
  return $letter1;
}
### END OF SUB_convert_3letter_1letter


##########
#
# subroutine use: SUB_cut_new_chains ( \@bc_rep )
# script: cut_new_chains.pl
# - cuts *.pdb and *.ca files for query chains

sub SUB_cut_new_chains {
  foreach $newdom (@bc_rep) {
    $pdbid = substr($newdom, 0, 4);
    if(-e "$pdbid.pdb" == 0){
      if ($input_type eq "NEW") { `cp $currdir/SAVE.$pdbid.SAVE $pdbid.pdb`; }
      else { `$path_bin/pdbcp.pl $pdbid`; }
    }
    $domainid = substr($newdom, 0, 6);
    next if (-e "$pdbid.pdb" == 0);
    $chain_id = substr($newdom, 5, 1);
    open(DOM, ">$domainid.pdb");
    print DOM "HEADER   domain from $pdbid\n";
    print DOM "REMARK 99 Source-PDB: $pdbid\n";
    print DOM "REMARK 99 Region: chain $chain_id\n";
    open(CAO, ">$domainid.ca");
    print CAO "HEADER   domain from $pdbid\n";
    print CAO "REMARK 99 Source-PDB: $pdbid\n";
    print CAO "REMARK 99 Region: chain $chain_id\n";
    open(TF1, "$pdbid.pdb");
    @all_tf1 = <TF1>;
#find if pdb has more than one model; if so, delete all except first model
    open(TF2, ">pdbtemp1");
    foreach $cline (@all_tf1) { 
      print TF2 "$cline";
      if ($cline =~ /^ENDMDL/) { print TF2 "END\n"; last; }
    }
    close(TF1); @all_tf1 = ();
    `rm $pdbid.pdb`;
    close(TF2);
    open(TFA, "pdbtemp1");
    @all_tfa = <TFA>;
    $prev_resnum = "null";
    if ($chain_id eq "_") { $chain_id = " "; }
    foreach $tfline (@all_tfa) {
      next unless (substr($tfline, 13, 2) eq "CA");
      next unless (substr($tfline, 21, 1) eq $chain_id);
      next unless ($tfline =~ /^ATOM/ | $tfline =~ /^HETATM/);
      $resnum = substr($tfline, 22, 6);
      if ($prev_resnum eq $resnum) { next if (substr($tfline, 56, 4) ne "1.00"); }
      $prev_resnum = $resnum;
      print CAO "$tfline";
    }
    print CAO "END\n";
    close(CAO);
    foreach $tfline (@all_tfa) {
      next unless (substr($tfline, 21, 1) eq $chain_id);
      if ($tfline =~ /^ATOM/) { print DOM "$tfline"; }
      elsif ($tfline =~ /MSE/) {
        if ($tfline =~ /^HETATM/) {
          $str1 = substr($tfline, 6, 11);
          $str2 = substr($tfline, -61);
          print DOM "ATOM  $str1\MET$str2";
        }
        else { print DOM "$tfline"; }
      }
    }
    print DOM "END\n";
    close(DOM);
    close(TFA); @all_tfa = ();
    `rm pdbtemp1`;
    `mv $domainid.pdb ./new_domain_str/.`;
    `mv $domainid.ca ./new_domain_str/.`;
  }
}
### END OF SUB_cut_new_chains


##########
#
# subroutine use: SUB_fix_bc_output ()
# script: fix_bc_output.pl
# - addresses problem of upper/lowercase chains in blastclust

sub SUB_fix_bc_output {
  open(IN, "$date.newpdb.list.bc.pre");
  @all_in = <IN>;
  open(OUT, ">$date.newpdb.list.bc");
  foreach $line (@all_in) {
    chomp $line;
    @ar = split/ /, $line;
    for ($i=0; $i!=($#ar+1); $i++) {
      $dn = substr(@ar[$i], 0, 5);
      if (substr($ar[$i], -2) =~ /A1/i) { $dn .= "a"; }
      elsif (substr($ar[$i], -2) =~ /B1/i) { $dn .= "b"; }
      elsif (substr($ar[$i], -2) =~ /C1/i) { $dn .= "c"; }
      elsif (substr($ar[$i], -2) =~ /D1/i) { $dn .= "d"; }
      elsif (substr($ar[$i], -2) =~ /E1/i) { $dn .= "e"; }
      elsif (substr($ar[$i], -2) =~ /F1/i) { $dn .= "f"; }
      elsif (substr($ar[$i], -2) =~ /G1/i) { $dn .= "g"; }
      elsif (substr($ar[$i], -2) =~ /H1/i) { $dn .= "h"; }
      elsif (substr($ar[$i], -2) =~ /I1/i) { $dn .= "i"; }
      elsif (substr($ar[$i], -2) =~ /J1/i) { $dn .= "j"; }
      elsif (substr($ar[$i], -2) =~ /K1/i) { $dn .= "k"; }
      elsif (substr($ar[$i], -2) =~ /L1/i) { $dn .= "l"; }
      elsif (substr($ar[$i], -2) =~ /M1/i) { $dn .= "m"; }
      elsif (substr($ar[$i], -2) =~ /N1/i) { $dn .= "n"; }
      elsif (substr($ar[$i], -2) =~ /O1/i) { $dn .= "o"; }
      elsif (substr($ar[$i], -2) =~ /P1/i) { $dn .= "p"; }
      elsif (substr($ar[$i], -2) =~ /Q1/i) { $dn .= "q"; }
      elsif (substr($ar[$i], -2) =~ /R1/i) { $dn .= "r"; }
      elsif (substr($ar[$i], -2) =~ /S1/i) { $dn .= "s"; }
      elsif (substr($ar[$i], -2) =~ /T1/i) { $dn .= "t"; }
      elsif (substr($ar[$i], -2) =~ /U1/i) { $dn .= "u"; }
      elsif (substr($ar[$i], -2) =~ /V1/i) { $dn .= "v"; }
      elsif (substr($ar[$i], -2) =~ /W1/i) { $dn .= "w"; }
      elsif (substr($ar[$i], -2) =~ /X1/i) { $dn .= "x"; }
      elsif (substr($ar[$i], -2) =~ /Y1/i) { $dn .= "y"; }
      elsif (substr($ar[$i], -2) =~ /Z1/i) { $dn .= "z"; }
      else { $dn = "null"; }
      if ($dn eq "null") { print OUT "$ar[$i] "; }
      else { print OUT "$dn "; }
    }
    print OUT "\n";
  }
  close(IN); @all_in = ();
  close(OUT);
}
### END OF SUB_fix_bc_output


##########
#
# subroutine use: SUB_psiblast_seqnum ( $pdb_chain-nr.br )
# script: psiblast_seqnum.pl
# - returns info on iteration and convergence

sub SUB_psiblast_seqnum {
  $count = 0;
  $round_count = 0;
  $n_round = 0;
  $converged = 0;
  $effective_nr_len = 0;
  $flag_s = 0;
  open(IN, "$pdb_chain-nr.br");
  @all_in = <IN>;
  foreach $line (@all_in) {
    next if ($line eq "\n");
    if( $line =~ /^effective length of database: / ){
      $effective_nr_len = $';
      chomp( $effective_nr_len );
      @tmp = split /,/, $effective_nr_len; 
      $effective_nr_len_1 = join '', @tmp;
    }
    if ($line =~ /round /){
      $n_round = $';
      chomp( $n_round );
      $flag_s = 1;
      next;
    }
    if( $flag_s == 1){
      if($line =~ /CONVERGED/ ){ $converged = 1; }
      if($line =~ /^QUERY/ | $line =~ /CONVERGED/ ){
        $round_count = $count;
        $count = 0;
        $flag_s = 0;
        next;
      }
      elsif($line =~ /Score    E/ ){ next; }
      elsif($line =~ /Sequences producing significant alignments/ ){ next; }
      elsif($line =~ /Sequences used in model and found again/){ next; }
      elsif($line =~ /Sequences not found previously or not previously below threshold/ ){ next; }
      else{ $count ++; }
    }
  }
  if( $converged == 1 ){
    $retmess3a = "$round_count, number_of_iter: $n_round, Converged.\n";
    $retmess3b = "$effective_nr_len_1\n";
  }
  else {
    $retmess3a = "$round_count, number_of_iter: $n_round, Not Converged.\n";
    $retmess3b = "$effective_nr_len_1\n";
  }
  close(IN); @all_in = ();
  return ($retmess3a, $retmess3b);
}
### END OF SUB_psiblast_seqnum


##########
#
# subroutine use: SUB_chkBr_14fields ( @sendvar_1 )
# script: chkBr_14fields.pl
# - finds outputs from the gapped BLAST, RPS-BLAST, and PSI-BLAST steps

sub SUB_chkBr_14fields {
  $pdb_chain = $_[0];
  $br_filename = $_[1];
  $method_step = $_[2];
  $evalue_cutoff = $_[3];
  $retmess5a = "";
  $retmess5b = "";
  if ($_[4] ne "") { $eSuperfam_id = $_[4]; $exist_flag = 1; }
  else { $exist_flag = 0; }
  $MAX_END = 10;
  $MAX_HIT = 10;
# hash of arrays. key: superfam number; value: arrays of hits, array subscripts range: 1..$MAX_HIT(or less).
# hash. key: superfam number; value: number of good_hits (passed both Evalue and length cutoff).
# hash. key: superfam number; value: nubmer of bad_hits (keep the first bad hit for each superfamily).
# br: boundary_refinement; fa: further_analysis.
  %hoa_superfam = ();
  %n_good_hit = ();
  %n_bad_hit = ();
  $good_hit = 0;
  $bad_hit = 0;
  @br = ();
  @fa = ();
  $nhit = 0;
  $ever_hit = 0;
  if( ($method_step == 2) || ($method_step == 3) ){
    @sendvar_2[0] = $br_filename;
    @sendvar_2[1] = 100;
    if( $exist_flag == 0 ){ @sendvar_2[2] = ""; $tmp1 = SUB_br_qs_range ( @sendvar_2 ); }
    elsif ($exist_flag == 1) { @sendvar_2[2] = $eSuperfam_id; $tmp1 = SUB_br_qs_range ( @sendvar_2 ); }
    if ($tmp1 eq "No_hits_found" ){ $retmess5a = "No_hits_found"; return ($retmess5a, $retmess5b); }
    @hits = split( /\n/, $tmp1 );
    foreach $hit (@hits){
      @fds = split( /\t/, $hit );
      $n_good_hit{$fds[0]} = 0;
      $n_bad_hit{$fds[0]} = 0;
      $prev_q_domain_starting{ $fds[0] } = $fds[1];
    }
    foreach $hit (@hits){
      @fds = split( /\t/, $hit );
      if( $fds[4] <= $evalue_cutoff ){
        if( ($fds[5] <= $MAX_END) && ($fds[6] >= ($fds[7] - $MAX_END)) ) { next; }
      }
      open(DALICONFIRMOUT, ">>$pdb_chain.daliconfirm");
      if( $method_step == 2 ){ print DALICONFIRMOUT "$fds[8]\t$fds[4]\t\-rps.br\n"; }
      else { print DALICONFIRMOUT "$fds[8]\t$fds[4]\t\-SCOPd.br\n"; }
      close(DALICONFIRMOUT);
    }
    foreach $hit (@hits){
      @fds = split( /\t/, $hit );
      if( $fds[4] > $evalue_cutoff ){ next; }
      $q_domain_starting{ $fds[0] } = $fds[1];
      if( ($n_good_hit{$fds[0]} < $MAX_HIT) || ( abs($prev_q_domain_starting{$fds[0]} - $q_domain_starting{$fds[0]}) >= 50) ){
        if( ($fds[5] <= $MAX_END) && ($fds[6] >= ($fds[7] - $MAX_END)) ){
          if( $exist_flag == 0 ){
             $q_pdbrange = SUB_map_pdbseq ( $pdb_chain, "$fds[1].$fds[2]", X );
             $qlength_range = SUB_map_pdbseq ( $pdb_chain, "1.$fds[3]", X );
          }elsif( $exist_flag == 1 ){
             $q_pdbrange = SUB_map_pdbseq ( $pdb_chain, "$fds[1].$fds[2]", "./new_domain_str/$pdb_chain.ca" );
             $qlength_range = SUB_map_pdbseq ( $pdb_chain, "1.$fds[3]", "./new_domain_str/$pdb_chain.ca" );
          }
          chomp( $q_pdbrange );
          chomp( $qlength_range );
          $s_pdbrange = SUB_convert_nonlibpos_to_libres ("$path_str/$fds[8].ca", "$fds[5].$fds[6]");
          @tmp = split /\t/, $s_pdbrange;
          $my7digit_identifier = $tmp[0];
          $scop_res_range = $tmp[1];
          $s_scoplength_range = SUB_convert_nonlibpos_to_libres ("$path_str/$fds[8].ca", "1.$fds[7]");
          @tmp = split /\t/, $s_scoplength_range;
          $scop_length_res_range = $tmp[1];
          $prev_q_domain_starting{$fds[0]} = $q_domain_starting{$fds[0]};
          $good_hit = 1;
          $n_good_hit{$fds[0]}++;
          $n = $n_good_hit{$fds[0]};
          $hoa_superfam{$fds[0]}[$n] = "$pdb_chain\t$q_pdbrange\t\t$qlength_range\t$my7digit_identifier\t$fds[9]\t$scop_res_range\t\t$scop_length_res_range\t$fds[4]\t\t\t\t$method_step\n";
        }
        else{
          if( ($n_good_hit{$fds[0]} == 0) && ($n_bad_hit{$fds[0]} == 0) ){
            if( $exist_flag == 0 ){
              $q_pdbrange = SUB_map_pdbseq ($pdb_chain, "$fds[1].$fds[2]", X );
              $qlength_range = SUB_map_pdbseq ($pdb_chain, "1.$fds[3]", X );
            }
            elsif( $exist_flag == 1 ){
              $q_pdbrange = SUB_map_pdbseq ($pdb_chain, "$fds[1].$fds[2]", "./new_domain_str/$pdb_chain.ca");
              $qlength_range = SUB_map_pdbseq ($pdb_chain, "1.$fds[3]", "./new_domain_str/$pdb_chain.ca");
            }
            chomp( $q_pdbrange );
            chomp( $qlength_range );
            $s_pdbrange = SUB_convert_nonlibpos_to_libres ("$path_str/$fds[8].ca", "$fds[5].$fds[6]");
            @tmp = split /\t/, $s_pdbrange;
            $my7digit_identifier = $tmp[0];
            $scop_res_range = $tmp[1];
            $s_scoplength_range = SUB_convert_nonlibpos_to_libres ("$path_str/$fds[8].ca", "1.$fds[7]");
            @tmp = split /\t/, $s_scoplength_range;
            $scop_length_res_range = $tmp[1];
            $bad_hit = 1;
            $n_bad_hit{$fds[0]} = 1;
            $hoa_superfam{$fds[0]}[1] = "$pdb_chain\t$q_pdbrange\t\t$qlength_range\t$my7digit_identifier\t$fds[9]\t$scop_res_range\t\t$scop_length_res_range\t$fds[4]\t\t\t\t$method_step\n";
          }
        }
      }
    }    
    if( $good_hit == 1){
      open( QO, ">>$pdb_chain.blast2");
      print QO "!^(NEW_FILE): $pdb_chain.out$method_step\n";
      @tmphit1 = ();
      foreach $superfam (keys %hoa_superfam){
        if( $n_good_hit{$superfam} > 0){
          for $i (1..$#{ $hoa_superfam{$superfam} }){
            print QO "$hoa_superfam{$superfam}[$i]";
            push @tmphit1, "$hoa_superfam{$superfam}[$i]";
          }
        }
      }
      if ($method_step == 2) { $step2ct ++; push @step2, [@tmphit1]; }
      if ($method_step == 3) { $step3ct ++; push @step3, [@tmphit1]; }
      close QO ;
    }
    if( $bad_hit == 1){
      open( QO, ">>$pdb_chain.blast2");
      print QO "!^(NEW_FILE): $pdb_chain.incmp$method_step\n";
      foreach $superfam (keys %hoa_superfam){
        if( ($n_good_hit{$superfam} == 0) && ($n_bad_hit{$superfam} > 0) ){ print QO "$hoa_superfam{$superfam}[1]"; }
      }
      close QO ;
    }
  } # END OF METHOD_STEPS 2 & 3
  elsif ( $method_step == 1) {
    $sendvar_3[0] = $br_filename;
    $sendvar_3[1] = 100;
    $tmp_p = SUB_br_qs_range_p ( @sendvar_3 );
    if( $tmp_p eq "No_hits_found" ){ $retmess5a = "No_hits_found"; return ($retmess5a, $retmess5b); }
    @hits = split( /\n/, $tmp_p );
    foreach $hit (@hits){
      @fds = split( /\t/, $hit );
      if( $fds[4] <= $evalue_cutoff ){
        if($fds[8] >= 25 ){
          if( ($fds[1] <= $MAX_END ) && ($fds[2] >= ($fds[3] - $MAX_END)) ){
            if( ( ($fds[5] <= $MAX_END) && ($fds[6] >= ($fds[7] - $MAX_END)) ) || ($fds[8] >= 80) ) { next; }
          }
        }
      }
      open(DALICONFIRMOUT, ">>$pdb_chain.daliconfirm");
      print DALICONFIRMOUT "$fds[0]\t$fds[4]\t\-simple.br\n";
      close(DALICONFIRMOUT);
    }
    foreach $hit (@hits){
      @fds = split( /\t/, $hit );
      $s_pdb_ch = $fds[0];
      $qstart = $fds[1];
      $qend = $fds[2];
      $qlen = $fds[3];
      $evalue = $fds[4];
      $sstart = $fds[5];
      $send = $fds[6];
      $slen = $fds[7];
      $identity = $fds[8];
      next if( $evalue > $evalue_cutoff ) ;
      if( ($nhit < $MAX_HIT) || ($ever_hit == 0) ){
        if($identity >= 25 ){
          if( ($qstart <= $MAX_END ) && ($qend >= ($qlen - $MAX_END)) ){
            if( ( ($sstart <= $MAX_END) && ($send >= ($slen - $MAX_END)) ) || ($identity >= 80) ){
              if( $exist_flag == 0 ){
                $q_pdbrange = SUB_map_pdbseq ($pdb_chain, "$qstart.$qend", X );
                $qlength_range = SUB_map_pdbseq ($pdb_chain, "1.$qlen", X);
              }
              elsif( $exist_flag == 1 ){
                $q_pdbrange = SUB_map_pdbseq ($pdb_chain, "$qstart.$qend", "./new_domain_str/$pdb_chain.ca");
                $qlength_range = SUB_map_pdbseq ($pdb_chain, "1.$qlen", "./new_domain_str/$pdb_chain.ca");
              }
              chomp( $q_pdbrange );
              chomp( $qlength_range );
              $s_pdbrange = SUB_map_pdbseq ( $s_pdb_ch, "$sstart.$send", X);
              $s_scoplength_range = SUB_map_pdbseq ( $s_pdb_ch, "1.$slen", X);
              chomp( $s_pdbrange );
              chomp( $s_scoplength_range );
              if( $s_pdbrange eq "no_pdb" ){ $retmess5b .= "WARNING: in $pdb_chain step 1, disregard hit to $s_pdb_ch because of obsolete pdbid\n"; next; }
              @sendvar_4[0] = $pdb_chain; @sendvar_4[1] = $q_pdbrange; @sendvar_4[2] = $qlength_range; @sendvar_4[3] = $s_pdb_ch; @sendvar_4[4] = $s_pdbrange; @sendvar_4[5] = $s_scoplength_range; @sendvar_4[6] = $evalue; @sendvar_4[7] = $method_step;
              @br_1 = SUB_wholeseq2domainoutput ( @sendvar_4 );
              if( !defined($br_1[0]) ){ $retmess5b .= "WARNING: in $pdb_chain step 1, disregard hit to $s_pdb_ch because it is not in SCOP\n"; next; }
              if( $exist_flag == 1 ){
                @br_11 = ();
                foreach $dbr (@br_1){
                  chomp $dbr;
                  @tmp = split /\t/, $dbr;
                  @tmpa = split /\./, $tmp[5];
                  $superfam = join ".", $tmpa[0], $tmpa[1], $tmpa[2];
                  if( $superfam ne $eSuperfam_id ){ @br_11 = (@br_11, $dbr); }
                }
                next if( !defined( $br_11[0] ) ); ## all domains belong to the same eSuperfamily ##
                @br_1 = @br_11;
              }
              @br = ( @br, @br_1 );
              $nhit++;
              $ever_hit = 1;
              if( $nhit == 1 ){ $retmess5a .= "$pdb_chain\t$s_pdb_ch\t$evalue"; }
            }
          }
        }
      }
    }    
    if( $ever_hit == 0 ) { $retmess5a = "UA"; return ($retmess5a, $retmess5b); }
    if( $ever_hit == 1 ) {
      open( QO, ">>$pdb_chain.blast2" );
      @tmphit1 = ();
      print QO "!^(NEW_FILE): $pdb_chain.out$method_step\n";
      foreach $r (@br){ print QO "$r\n"; push @tmphit1, "$r\n"; }
      close QO;
      $step1ct ++; push @step1, [@tmphit1];
    }
    $skip = 1;
    if( $skip == 0 ){
      if( $ever_hit == 0 ){
        $nhit = 0;
        foreach $hit (@hits){
          @fds = split( /\t/, $hit );
          if( $nhit < $MAX_HIT ){
            if( $exist_flag == 0 ){
              $q_pdbrange = SUB_map_pdbseq ( $pdb_chain, "$fds[1].$fds[2]", X);
              $qlength_range = SUB_map_pdbseq ( $pdb_chain, "1.$fds[3]", X);
            }
            elsif( $exist_flag == 1 ){
              $q_pdbrange = SUB_map_pdbseq ( $pdb_chain, "$fds[1].$fds[2]", "./new_domain_str/$pdb_chain.ca");
              $qlength_range = SUB_map_pdbseq ( $pdb_chain, "1.$fds[3]", "./new_domain_str/$pdb_chain.ca");
            }
            chomp( $q_pdbrange );
            chomp( $qlength_range );
            $s_pdbrange = SUB_map_pdbseq ( $fds[0], "$fds[5].$fds[6]", X);
            $s_scoplength_range = SUB_map_pdbseq ( $fds[0], "1.$fds[7]", X);
            chomp( $s_pdbrange );
            chomp( $s_scoplength_range );
            $s_pdb_ch = $fds[0];
            $evalue = $fds[4];
            @sendvar_4[0] = $pdb_chain; @sendvar_4[1] = $q_pdbrange; @sendvar_4[2] = $qlength_range; @sendvar_4[3] = $s_pdb_ch; @sendvar_4[4] = $s_pdbrange; @sendvar_4[5] = $s_scoplength_range; @sendvar_4[6] = $evalue; @sendvar_4[7] = $method_step;
            @fa_1 = SUB_wholeseq2domainoutput ( @sendvar_4 );
            @fa = ( @fa, @fa_1 );
            $nhit++;
            $ever_hit = 2;
          }
        }    
      }
      if( $ever_hit == 2 ) {
        open( QO, ">>$pdb_chain.blast2" );
        print QO "!^(NEW_FILE): $pdb_chain.incmp$method_step\n";
        foreach $r (@fa){ print QO "$r\n"; }
        close(QO);
      }
    } ## end of skip
  }
  @hits = (); @br = (); @fa = (); %hoa_superfam = (); %n_good_hit = (); %n_bad_hit = (); @tmphit1 = (); @fa_1 = ();
  return ($retmess5a, $retmess5b);
}
### END OF SUB_chkBr_14fields


##########
#
# subroutine use: SUB_br_qs_range ( @sendvar_2 )
# script: br_qs_range.pl
# - function: read the ~-RPS.br/~-SCOPd.br file (of -j==1), output the first word (sccs) after the name of the seq (can be changed),
#	      the corresponding's query's aligned range ($start\t$end), the aligned query length, and the 
#	      subject's aligned range, and the length of the subject seq. 3 for each sccs
# - check for Evalu_cutoff, can also check for exist_superfamily_id

sub SUB_br_qs_range {
  $br_file = $_[0];
  $evalue_cutoff_2 = $_[1];
  $eSuperfam_id = $_[2];
  if ($eSuperfam_id eq "") { $exist_flag_s = 0; }
  else { $exist_flag_s = 1; }
  open( BR, "$br_file" ); @all_br = <BR>;
  $flag_s = 0;
  $retmess4 = "";
  foreach $brline (@all_br) {
    chomp $brline;
    if($brline =~ /letters\)/ ){
      @tmpa = split( /\(/, $` );
      $length = $tmpa[1];
      next;
    }
    if($brline =~ /No hits found/ ){ $retmess4 = "No_hits_found"; close (BR); @all_br = (); return $retmess4; }
    if($brline =~ /^>/ ){
      if ($flag_s == 2) { $retmess4 .= "$superfam\t$qstart\t$qend\t$length\t$evalue\t$sstart\t$send\t$slen\t$suid\t$sccs\n"; }
      @tmp = split/ /, $brline;
      $sccs = $tmp[1];
      $suid1 = $tmp[0];
      $suid = substr( $suid1, 1, 7 );
      @tmpb = split( /\./, $sccs );
      $superfam = $tmpb[0].".".$tmpb[1].".".$tmpb[2];
      if( $exist_flag_s == 1 ){ if( $superfam eq $eSuperfam_id ){ $flag_s = 0; next; } }
      $flag_s = 1;
      next;
    }
    if( ($flag_s == 1) && ($brline =~ /Length = /) ){ $slen = $'; next; }
    if( ($flag_s == 1) && ($brline =~ /Expect = /) ){ 
      $evalue = $';
      if( $evalue =~ /^e/ ){ 
        $evalue1 = "1".$evalue;
        $evalue = $evalue1;
      }       
      if( $evalue > $evalue_cutoff_2 ){ $flag_s = 0; next; }
      next;
    }
    if( ($flag_s == 2) && ($brline =~ /Expect = /) ){
      $retmess4 .= "$superfam\t$qstart\t$qend\t$length\t$evalue\t$sstart\t$send\t$slen\t$suid\t$sccs\n";
      $evalue = $';
      if( $evalue =~ /^e/ ){ 
        $evalue1 = "1".$evalue;
        $evalue = $evalue1;
      }       
      if( $evalue > $evalue_cutoff_2 ){ $flag_s = 0; next; }
      $flag_s = 1;
      next;
    } 
    if( ($flag_s == 1) && ($brline =~ /^Query/ ) ){ @ztmp1 = split/ /, $brline; $qstart = $ztmp1[1]; $qend = $ztmp1[$#ztmp1]; next; }
    if( ($flag_s == 1) && ($brline =~ /^Sbjct/ ) ){ @ztmp = split/ /, $brline; $sstart = $ztmp[1]; $send = $ztmp[$#ztmp]; $flag_s = 2; next; }
    if( ($flag_s == 2) && ($brline =~ /^Query/ ) ){ @ztmp2 = split/ /, $brline; $qend = $ztmp2[$#ztmp2]; next; }
    if( ($flag_s == 2) && ($brline =~ /^Sbjct/ ) ){ @ztmp = split/ /, $brline; $send = $ztmp[$#ztmp]; next; }
  }
  if( $flag_s == 2){ $retmess4 .= "$superfam\t$qstart\t$qend\t$length\t$evalue\t$sstart\t$send\t$slen\t$suid\t$sccs\n"; }
  close (BR); @all_br = (); @ztmp = (); @ztmp1 = (); @ztmp2 = ();
  return $retmess4;
}
### END OF SUB_br_qs_range


##########
#
# subroutine use: SUB_map_pdbseq ( $pdb_chain, $seq_range, $input_pdb )
# script: map_pdbseq.pl
# - converts the residue range in sequence number(1.10) to pdb_residue_number(-1.9)

sub SUB_map_pdbseq {
  $pdb_chain_t = @_[0];
  $seq_range_t = @_[1];
  $input_pdb = @_[2];
  if ($input_pdb eq "X") { $texist_flag = 0; }
  else { $texist_flag = 1; }
  @tmp2 = split( /_/, $pdb_chain_t );
  $pdbid_t = $tmp2[0];
  $chain_t = $tmp2[1];
  if ( $chain_t eq '' ){ $chain_t = ' '; }
  @tmp3 = split( /\./, $seq_range_t );
  $first_res = $tmp3[0];
  $last_res = $tmp3[1];
  if( $texist_flag == 0 ){
    if (-e "$pdbid_t.pdb" == 0) { 
      if ($input_type eq "NEW" && -e "$currdir/SAVE.$pdbid_t.SAVE" == 1) { `cp $currdir/SAVE.$pdbid_t.SAVE $pdbid_t.pdb`; }
      else { `$path_bin/pdbcp.pl $pdbid_t`; }
    }
    if (-e "$pdbid_t.pdb" == 0) { return "no_pdb"; }
    open(PDBIN, "$pdbid_t.pdb");
  }
  else { `mv $input_pdb $pdb_chain_t.pdb`; open(PDBIN, "$pdb_chain_t.pdb"); }
  @all_pdbin = <PDBIN>;
  $residue_count = 0;
  $prev_resnum = "null";
  foreach $msline (@all_pdbin) {
    last if ($msline =~ /^ENDMDL/);
    next unless ($msline =~ /^ATOM/ | $msline =~ /^HETATM/);
    next unless (substr($msline, 12, 4) eq " CA ");
    $this_chain_t = substr($msline, 21, 1);
    next unless ($this_chain_t eq $chain_t);
    $resnum = substr($msline, 22, 6);
    if ($prev_resnum eq $resnum) { next if (substr($msline, 56, 4) ne "1.00"); }
    $prev_resnum = $resnum;
    $residue_count++;
    if ($residue_count == $first_res) {
      $myfirst_res = $resnum;
      $myfirst_res =~ s/ //g;
    }
    if ($residue_count == $last_res) {
      $mylast_res = $resnum;
      $mylast_res =~ s/ //g;
      last;
    }
  }
  if( $chain_t eq ' ' ){ $chain_t = '_'; }
  $retmess6 = "$chain_t:$myfirst_res.$chain_t:$mylast_res\n";
  close(PDBIN); @all_pdbin = ();
  if( $texist_flag == 1 ){ `mv $pdb_chain_t.pdb $input_pdb`; }
  return $retmess6;
}
###END OF SUB_map_pdbseq


############
#
# subroutine use: SUB_convert_nonlibpos_to_libres ( $dompath, $range )
# script: convert_nonlibpos_to_libres.pl
# - converts scop domain range to pdbcut format from "n1.n2" where n1 and n2 are position
#   of the residue in pdb domain file (sequentially)
# - returns 7-digit identifier of library scop rep and mapped residue range from that rep

sub SUB_convert_nonlibpos_to_libres {
  $dompath = @_[0];
  $range_t = @_[1];
  if (-e "$dompath" == 1) {
    $resrange = SUB_convert_range_pos_to_res ( $dompath, "$range_t");
    @ar0 = split/\//, $dompath;
    $domainid = substr($ar0[$#ar0], 0, 7);
    $retmess7 = "$domainid\t$resrange";
  }
  else {
    @ar1 = split/\//, $dompath;
    $domainid = substr($ar1[$#ar1], 0, 7);
    $grep1 = `grep $domainid $convert_pairs`;
    chomp $grep1;
    @ar2 = split/\t/, $grep1;
    $rep = $ar2[1];
    @ar3 = split/\./, $range_t;
    if ($ar3[0] =~ /\(/) {
      @ar4 = split/\(/, $ar3[0];
      @ar5 = split/\)/, $ar4[1];
      $npos = $ar5[0];
      $n_par = 1;
    }
    else {
      $npos = $ar3[0];
      $n_par = 0;
    }
    if ($ar3[1] =~ /\(/) {
      @ar4 = split/\(/, $ar3[1];
      @ar5 = split/\)/, $ar4[1];
      $cpos = $ar5[0];
      $c_par = 1;
    }
    else {
      $cpos = $ar3[1];
      $c_par = 0;
    }
    $range_t = "$npos.$cpos";
    open(IN, "$lib_scop_dircla_ss");
    @all_clustfa = <IN>;
    open(LIBFA, ">$rep.fa");
    for ($j=0; $j!=($#all_clustfa+1); $j++) {
      next unless (substr(@all_clustfa[$j], 2, 7) eq substr($rep, 1, 6));
      print LIBFA "@all_clustfa[$j]";
      print LIBFA "@all_clustfa[$j+1]";
      last ;
    }
    close(LIBFA);
    open(NONLIBFA, ">$domainid.fa");
    for ($j=0; $j!=($#all_clustfa+1); $j++) {
      next unless (substr(@all_clustfa[$j], 2, 6) eq substr($domainid, 1, 6));
      print NONLIBFA "@all_clustfa[$j]";
      print NONLIBFA "@all_clustfa[$j+1]";
      last ;
    }
    close(NONLIBFA);
    close(IN); @all_clustfa = ();
    $lnct1 = `cat $domainid.fa | wc -l`; chomp $lnct1;
    $lnct2 = `cat $rep.fa | wc -l`; chomp $lnct2;
    if ($lnct1 == 2 && $lnct2 == 2) {
      system "$path_bin/align0 $domainid.fa $rep.fa > pair_aln_temp";
      $hitrange = SUB_find_range_hit_from_head ( $domainid, $rep, pair_aln_temp, $range_t );
      if ($hitrange =~ /\./) {
        @ar6 = split/\./, $hitrange;
        if ($n_par == 1) { $p1 = "($ar6[0])"; }
        else { $p1 = "$ar6[0]"; }
        if ($c_par == 1) { $p2 = "($ar6[1])"; }
        else { $p2 = "$ar6[1]"; }
        $hitrange = "$p1.$p2";
        $resrange = SUB_convert_range_pos_to_res ( "$path_str/$rep.ca", "$hitrange" );
        $retmess7 = "$rep\t$resrange";
      }
      else { $retmess7 = "error"; }
    }
    else { $retmess7 = "error"; }
    `rm $domainid.fa $rep.fa pair_aln_temp`;
  }
  @all_clustfa = ();
  return $retmess7;
}
### END OF SUB_convert_nonlibpos_to_libres


##########
#
# subroutine use: SUB_convert_range_pos_to_res ( $dompath, $range )
# script: convert_range_pos_to_res.pl
# - converts range to pdbcut format from "n1.n2" where n1 and n2 are position of residue in
#   pdb domain file (sequentially)

sub SUB_convert_range_pos_to_res {
  $dompath2 = @_[0];
  $range_t2 = @_[1];
  if (-e "$dompath2" == 1) {
    open(DOM, "$dompath2");
    @all_dom = <DOM>;
    @zar0 = split/\./, $range_t2;
    if ($zar0[0] =~ /\(/) {
      @zpar1a = split/\(/, $zar0[0];
      @zpar1b = split/\)/, $zpar1a[1];
      $zn1 = $zpar1b[0];
      $zn1par = 1;
    }
    else {
      $zn1 = $zar0[0];
      $zn1par = 0;
    }
    if ($ar0[1] =~ /\(/) {
      @zpar2a = split/\(/, $zar0[1];
      @zpar2b = split/\)/, $zpar2a[1];
      $zn2 = $zpar2b[0];
      $zn2par = 1;
    }
    else {
      $zn2 = $zar0[1];
      $zn2par = 0;
    }
    $zct = 0;
    $zprevres = "null";
    foreach $zline (@all_dom) {
      next unless (substr($zline, 13, 2) eq "CA");
      next unless ($zline =~ /^ATOM/ | $zline =~ /^HETATM/);
      if (substr($zline, 22, 6) eq $zprevres) {
        next if (substr($zline, 56, 4) ne "1.00");
      }
      $zct += 1;
      if ($zn1 == $zct) {
        $zfirst_res = substr($zline, 22, 6);
        $zfirst_res =~ s/ //g;
        $zfirst_chain = substr($zline, 21, 1);
      }
      if ($zn2 == $zct) {
        $zlast_res = substr($zline, 22, 6);      
        $zlast_res =~ s/ //g;
        $zlast_chain = substr($zline, 21, 1);
      }
      $zprevres = substr($zline, 22, 6);
      next if ($zline =~ /^HETATM/ && $zline !~ /MSE/);
      $zover_res = substr($zline, 22, 6);
      $zover_res =~ s/ //g;
      $zover_chain = substr($zline, 21, 1);
    }
    $retmess8 = "";
    if ($zn1par == 1) { $retmess8 .= "("; }
    if ($zfirst_chain ne " ") { $retmess8 .= "$zfirst_chain:"; }
    else { $retmess8 .= "_:"; }
    $retmess8 .= "$zfirst_res";
    if ($zn1par == 1) { $retmess8 .= ")."; }
    else { $retmess8 .= "."; }
    if ($zn2par == 1) { $retmess8 .= "("; }
    if ($zlast_chain eq "") {
      if ($zover_chain ne " ") { $retmess8 .= "$zover_chain:"; }
      else { $retmess8 .= "_:"; }
      $retmess8 .= "$zover_res";
    }
    else {
      if ($zlast_chain ne " ") { $retmess8 .= "$zlast_chain:"; }
      else { $retmess8 .= "_:"; }
      $retmess8 .= "$zlast_res";
    }
    if ($zn2par == 1) { $retmess8 .= ")"; }
    close(DOM); @all_dom = ();
  }
  return $retmess8;
}
### END OF SUB_convert_range_pos_to_res


##########
#
# subroutine use: SUB_find_range_hit_from_head ( $fulldom1, $fulldom2, $alnfile, $headrange )
# script: find_range_hit_from_head.pl 

sub SUB_find_range_hit_from_head {
  $fulldom1 = @_[0];
  $dom1_t = substr($fulldom1, 0, 6);
  $fulldom2 = @_[1];
  $dom2_t = substr($fulldom2, 0, 6);
  $alnfile = @_[2];
  $headrange = @_[3];
  @yar0 = split/\./, $headrange;
  $yfirstres = $yar0[0];
  $ylastres = $yar0[1];
  open(YIN, "$alnfile");
  @all_yin = <YIN>;
  $startln = "null";
  $yrep = -1;
  for ($i=0; $i!=($#all_yin+1); $i++) {
    next unless (@all_yin[$i] =~ /^$dom1_t/);
    $yrep += 1;
    if ($startln eq "null") { $startln = $i; }
  }
  $yseq1ct = 0;
  $yseq2ct = 0;
  $ynpos = "null";
  $ycpos = "null";
  for ($i=0; $i!=($yrep+1); $i++) {
    @yar1 = split/ /, @all_yin[$startln+(6*$i)];
    $yseqln1 = $yar1[1];
    @yar2 = split/ /, @all_yin[$startln+2+(6*$i)];
    $yseqln2 = $yar2[1];
    for ($j=0; $j!=80; $j++) {
      $yaa1 = substr($yseqln1, $j, 1);
      $yaa2 = substr($yseqln2, $j, 1);
      last if ($yaa1 eq "\n");
      if ($yaa1 ne "-") { $yseq1ct += 1; }
      if ($yaa2 ne "-") { $yseq2ct += 1; }
      if ($yseq1ct == $yfirstres) {
        if ($ynpos eq "null") {
          if ($yaa2 eq "-") { $ynpos = $yseq2ct + 1; }
          else { $ynpos = $yseq2ct; }
        }
      }
      if ($yseq1ct == $ylastres) { $ycpos = $yseq2ct; last; }
    }
  }
  if ($ycpos eq "null" || $ynpos eq "null") { $retmess9 = "error"; }
  else { $retmess9 = "$npos.$cpos"; }
  close(YIN); @all_yin = ();
  return $retmess9;
}
### END OF SUB_find_range_hit_from_head


##########
#
# subroutine use: SUB_br_qs_range_p ( @sendvar_3 )
# script: br_qs_range_p.pl
# - function: read the ~-SCOPp.br file (of -j==1), output the first word (sccs) after the name of the seq (can be changed),
#	      the corresponding's query's aligned range ($start\t$end), the aligned query length, and the 
#	      subject's aligned range, and the length of the subject seq. 3 for each sccs
# - only check for Evalue_cutoff, does not check for exist_same_superfamily (checked in the SUB_chkBr_14fields ($method_step == 1))

sub SUB_br_qs_range_p {
  $br_file = @_[0];
  $evalue_cutoff_2 = @_[1];
  @tmp3 = split /-/, $br_file;
  $q_pdb_ch = $tmp3[0];
  if( $tmp3[1] =~ /simple/ ){ $method_step = 1; }
  elsif( $tmp3[1] =~ /SCOPp/ ){ $method_step = 4; }
  open( BR, "$br_file" ); @all_br = <BR>;
  open( PO, ">>seqali.pos-pos");
  $flag_s = 0;
  $retmess10 = "";
  foreach $brline (@all_br) {
    chomp $brline;
    if($brline =~ /letters\)/ ){
      @tmpa = split( /\(/, $` );
      $q_len = $tmpa[1];
    }
    if($brline =~ /No hits found/ ){ $retmess10 = "No_hits_found"; close(PO); close(BR); @all_br = (); return $retmess10; }
    if($brline =~ /^>/ ){
      if( $flag_s == 2){ $retmess10 .= "$s_pdb_ch\t$qstart\t$qend\t$q_len\t$evalue\t$sstart\t$send\t$slen\t$identity\n"; }
      @tmp = split( />/, $brline );
      $s_pdb_ch = $tmp[1];
      @tmp = split( / /, $s_pdb_ch );
      $s_pdb_ch = $tmp[0];
      $repeat = 0;
      $flag_s = 1;
      next;
    }
    if( ($flag_s == 1) && ($brline =~ /Length = /) ){ $slen = $'; next; }
    if( ($flag_s == 1) && ($brline =~ /Expect = /) ){
      $evalue = $';
      if( $evalue =~ /^e/ ){ 
        $evalue1 = "1".$evalue;
        $evalue = $evalue1;
      }       
      if( $evalue > $evalue_cutoff_2 ){ $flag_s = 0; next; }
      next;
    }
    if( ($flag_s == 1) && ($brline =~ /Identities = /) ){
      @tmp = split /%\), Positives = /, $';
      @tmpa = split /\(/, $tmp[0];
      $identity = $tmpa[1];
      next;
    }
    if( ($flag_s == 2) && ($brline =~ /Expect = /) ){
      $retmess10 .= "$s_pdb_ch\t$qstart\t$qend\t$q_len\t$evalue\t$sstart\t$send\t$slen\t$identity\n";
      $evalue = $';
      if( $evalue =~ /^e/ ){ 
        $evalue1 = "1".$evalue;
        $evalue = $evalue1;
      }       
      if( $evalue > $evalue_cutoff_2 ){
        $flag_s = 0;
        next;
      }
      $repeat = 1;
      $flag_s = 1;
      next;
    } 
    if( ($flag_s == 1) && ($brline =~ /^Query/ ) ){
      @tmp1 = split/ /, $brline;
      $qstart = $tmp1[1];
      $qend = $tmp1[$#tmp1];
      $q_seq = $tmp1[$#tmp1-1];
      next;
    }
    if( ($flag_s == 1) && ($brline =~ /^Sbjct/ ) ){
      @tmp = split/ /, $brline;
      $sstart = $tmp[1];
      $send = $tmp[$#tmp];
      $s_seq = $tmp[$#tmp-1];
      if( $repeat == 0 ){
        print PO "!^$q_pdb_ch-$s_pdb_ch.pos$method_step\n";
        @q_chars = split //, $q_seq;
        @s_chars = split //, $s_seq;
        $s_count = 0;
        $q_count = 0;
        for $i (0..$#s_chars){
          if( $s_chars[$i] ne '-' ){
            $s_count++;
            if( $q_chars[$i] ne '-' ){ $q_count++; }
            $s_now = $sstart + $s_count -1;
            $q_now = $qstart + $q_count -1;
            print PO "$s_now\t$q_now\n";
          }
          else { if( $q_chars[$i] ne '-' ){ $q_count++; } }
        }
      }
      $flag_s = 2;
      next;
    }
    if( ($flag_s == 2) && ($brline =~ /^Query/ ) ){
      @tmp2 = split/ /, $brline;
      $qend = $tmp2[$#tmp2];
      $q_seq = $tmp2[$#tmp2-1];
      next;
    }
    if( ($flag_s == 2) && ($brline =~ /^Sbjct/ ) ){
      @tmp = split/ /, $brline;
      $send = $tmp[$#tmp];
      $s_seq = $tmp[$#tmp-1];
      if( $repeat == 0 ){
	@q_chars = split //, $q_seq;
        @s_chars = split //, $s_seq;
        for $i (0..$#s_chars){
          if( $s_chars[$i] ne '-' ){
            $s_count++;
            if( $q_chars[$i] ne '-' ){ $q_count++; }
            $s_now = $sstart + $s_count -1;
            $q_now = $qstart + $q_count -1;
            print PO "$s_now\t$q_now\n";
          }
          else { if( $q_chars[$i] ne '-' ){ $q_count++; } }
        }
      }
      next; 
    }
  }
  if( $flag_s == 2){ $retmess10 .= "$s_pdb_ch\t$qstart\t$qend\t$q_len\t$evalue\t$sstart\t$send\t$slen\t$identity\n"; }
  close (PO);
  close (BR); @all_br = ();
  @tmp3 = (); @tmpa = (); @tmp = (); @tmp1 = (); @q_chars = (); @s_chars = (); @tmp2 = ();
  return $retmess10;
}
### END OF SUB_br_qs_range_p


##########
#
# subroutine use: SUB_wholeseq2domainoutput ( @sendvar_4 )
# script: wholeseq2domainoutput.pl
# - function: used in steps 1 and 4, for checking domain ranges in a whole chain_seq alignment.

sub SUB_wholeseq2domainoutput {
  $q_pdb_ch = $_[0];
  $q_hit_pdbrange = $_[1];
  $q_length_pdbrange = $_[2];
  $s_pdb_ch = $_[3];
  $s_hit_pdbrange = $_[4];
  $s_scopdomainlength_pdbrange = $_[5];
  $evalue_t = $_[6];
  $method_step = $_[7];
  my @domain_output = ();
  @domain = SUB_y_pdbrange2SCOPdomain ( $s_pdb_ch, $s_hit_pdbrange );
  if( $#domain == 0 ){     ## only one domain
    @result = split /\t/, $domain[0];
    $my7digit = $result[0];
    $domain_name = $result[1];
    push @domain_output, "$q_pdb_ch\t$q_hit_pdbrange\t\t$q_length_pdbrange\t$my7digit\t$domain_name\t$s_hit_pdbrange\t\t$s_scopdomainlength_pdbrange\t$evalue_t\t\t\t\t$method_step";
  }
  elsif ($#domain > 0) {    ## more than 1 domain
    foreach $i (@domain){
      @result = split /\t/, $i;   
      $my7digit = $result[0];
      $domain_name = $result[1];
      $hit_domain_range = $result[3];
      $scop_domain_range = $result[4];
      SUB_other_new_cut ( $s_pdb_ch);
      @tmpra = split /,/, $hit_domain_range;
      $qhitrange = "";
      for( $r2 = 0; $r2 <= $#tmpra; $r2++){
        $s_pos_range = SUB_convert_range_res_to_pos ("./new_domain_str/$s_pdb_ch.ca", $tmpra[$r2]);
        @tmpd = split /\./, $s_pos_range;
        $s_posrange_start_2 = $tmpd[0];
        $s_posrange_end_2 = $tmpd[1];
        ($q_posrange_start_2, $q_posrange_end_2) = get_pospos( $q_pdb_ch, $s_pdb_ch, $method_step, $s_posrange_start_2, $s_posrange_end_2 );
        $qhp2_pre = SUB_convert_range_pos_to_res ("./new_domain_str/$q_pdb_ch.ca", "$q_posrange_start_2.$q_posrange_end_2");
        if ($r2 == 0) { $qhitrange = "$qhp2_pre"; }
        else { $qhitrange .= ",$qhp2_pre"; }
      }
      push @domain_output, "$q_pdb_ch\t$qhitrange\t\t$q_length_pdbrange\t$my7digit\t$domain_name\t$hit_domain_range\t\t$scop_domain_range\t$evalue_t\t\t\t\t$method_step";
    }
  }
  @domain = (); @result = (); @tmpra = (); @tmpd = ();
  return @domain_output;
}
sub  get_pospos {
  $qpdbch = $_[0]; $spdbch = $_[1]; $method_step = $_[2]; $startp = $_[3]; $endp = $_[4];
  open( PI, "seqali.pos-pos" );
  @pos = ();
  $foundp = 0;
  while( <PI>){
    last if( /^\!\^/ && $foundp == 1 );
    if( /\!\^$qpdbch-$spdbch.pos$method_step/ ){ $foundp = 1; next; }
    if( $foundp == 1){ push @pos, $_; }
  }
  close(PI);
  for( $i = 0; $i<= $#pos; $i++){
    chomp( $pos[$i]);
    @tmpp = split /\t/, $pos[$i];
    if( $tmpp[0] eq $startp ){ $real_start = $tmpp[1]; next; }
    if( $tmpp[0] eq $endp ){ $real_end = $tmpp[1]; last; }
  }
  @pos = ();
  return ($real_start, $real_end);
}
### END OF SUB_wholeseq2domainoutput


##########
#
# subroutine use: SUB_y_pdbrange2SCOPdomain ( $pdbid_ch, $myrange)
# script: y_pdbrange2SCOPdomain.pl
# - output: its corresponding 7_digit_identifier (e.g. d1gl5a_), and SCOP_range, and the SCOP domain name (e.g. b.34.2.1)
# - detailed output: print tab-dilimited the following 5 elements: [0]:7_digit_identifier, [1]:SCOP_domain_name(b.43.2.1), [2]:chain_id, [3]:range_domain_start_residue_number.range_domain_end_residue_number, [4]:SCOP_domain_start_residue_number.SCOP_domain_end_residue_number
#                    if there are 2 parts of range of one domain, added in [3] and [4] by  a comma ",".
#		     If there are more than 1 domain in the range, print each domain as a separate line.

sub SUB_y_pdbrange2SCOPdomain {
  $pdbid_ch = $_[0];
  $myrange = $_[1];
  $DOMAIN_EXT = 5;
  @retmess11 = ();
  @tmp = split /\_/, $pdbid_ch;
  $tpdb = $tmp[0];
  $tchain = $tmp[1];
  if( !defined( $tchain ) ){ $tchain = '_'; }
  $tmp0 = SUB_y_resolve_myrange ( $myrange );
  chomp( $tmp0 );
  @tmp = split /\t/, $tmp0;
  $my_start_res = $tmp[1];
  $my_end_res = $tmp[2];
  open( SCOP, "$scoptab" );
  while(<SCOP>){
    chomp;
    @tmp = split /\t/;
    next if( $tmp[3] ne $tpdb );
    $my_7digit = $tmp[2];
    $scop_range1 = SUB_parseSCOPdomainRange_1 ($tchain, $tmp[4]);
    chomp( $scop_range1 );
    if ($scop_range1 =~ /ERROR/) { next; }
    @scop_range = split /\t/, $scop_range1;
    if( $scop_range[0] == 0 ) { next; }
    elsif ($scop_range[0] == 1){
      $domain_name = SUB_y_7digit2domainName ( $my_7digit);
      chomp( $domain_name );
      push @retmess11, "$my_7digit\t$domain_name\t$tchain\t$tchain:$my_start_res.$tchain:$my_end_res\t$tchain:$my_start_res.$tchain:$my_end_res";
    }
    elsif ($scop_range[0] == 2 ){
      $domain_name = SUB_y_7digit2domainName ( $my_7digit);
      chomp( $domain_name );
      $qsranges1 = ""; $qsranges2 = "";
      @tmpa = split /\./, $scop_range[1];
      next if (($tmpa[1] - $my_start_res < $DOMAIN_EXT ) | ($my_end_res - $tmpa[0] < $DOMAIN_EXT));
      for( $r9 = 1; $r9 <= $#scop_range; $r9++){
        @tmpa = split /\./, $scop_range[$r9];
        $scop_start_res = $tmpa[0];
        $scop_end_res = $tmpa[1];
        if( $my_start_res > $scop_start_res ){ $range_start_res = $my_start_res; }
        else { $range_start_res = $scop_start_res; }
        if( $my_end_res < $scop_end_res ){ $range_end_res = $my_end_res; }
        else{ $range_end_res = $scop_end_res; }
        if ($r9 == 1) { $qsranges1 = "$tchain:$range_start_res.$tchain:$range_end_res"; $qsranges2 = "$tchain:$scop_start_res.$tchain:$scop_end_res"; }
        else { $qsranges1 .= ",$tchain:$range_start_res.$tchain:$range_end_res"; $qsranges2 .= ",$tchain:$scop_start_res.$tchain:$scop_end_res"; }
      }
      push @retmess11, "$my_7digit\t$domain_name\t$tchain\t$qsranges1\t$qsranges2";
    }
  }
  close(SCOP);
  return @retmess11;
}
### END OF SUB_y_pdbrange2SCOPdomain


##########
#
# subroutine use: SUB_other_new_cut ( $newdom )
# script: other_new_cut.pl

sub SUB_other_new_cut {
  $newdom = @_[0];
  if (-e "new_domain_str/$newdom.ca" == 0 && -e "new_domain_str/$newdom.pdb" == 0) {
    $thispdbid = substr($newdom, 0, 4);
    if(-e "$thispdbid.pdb" == 0){
      if ($input_type eq "NEW") { `cp $currdir/SAVE.$thispdbid.SAVE $thispdbid.pdb`; }
      else { `$path_bin/pdbcp.pl $thispdbid`; }
    }
    $domainid = substr($newdom, 0, 6);
    $chain_id = substr($newdom, 5, 1);
    open(DOM, ">$domainid.pdb");
    print DOM "HEADER   domain from $thispdbid\n";
    print DOM "REMARK 99 Source-PDB: $thispdbid\n";
    print DOM "REMARK 99 Region: chain $chain_id\n";
    open(CAO, ">$domainid.ca");
    print CAO "HEADER   domain from $thispdbid\n";
    print CAO "REMARK 99 Source-PDB: $thispdbid\n";
    print CAO "REMARK 99 Region: chain $chain_id\n";
    open(TF1, "$thispdbid.pdb");
    @all_tf1 = <TF1>;
    open(TF2, ">pdbtemp1");
    foreach $cline (@all_tf1) { 
      print TF2 "$cline";
      if ($cline =~ /^ENDMDL/) { print TF2 "END\n"; last; }
    }
    close(TF1); @all_tf1 = ();
    `rm $thispdbid.pdb`;
    close(TF2);
    open(TFA, "pdbtemp1");
    @all_tfa = <TFA>;
    $prev_resnum = "null";
    if ($chain_id eq "_") { $chain_id = " "; }
    foreach $tfline (@all_tfa) {
      next unless (substr($tfline, 13, 2) eq "CA");
      next unless (substr($tfline, 21, 1) eq $chain_id);
      next unless ($tfline =~ /^ATOM/ | $tfline =~ /^HETATM/);
      $resnum = substr($tfline, 22, 6);
      if ($prev_resnum eq $resnum) { next if (substr($tfline, 56, 4) ne "1.00"); }
      $prev_resnum = $resnum;
      print CAO "$tfline";
    }
    print CAO "END\n";
    close(CAO);
    foreach $tfline (@all_tfa) {
      next unless (substr($tfline, 21, 1) eq $chain_id);
      if ($tfline =~ /^ATOM/) { print DOM "$tfline"; }
      elsif ($tfline =~ /MSE/) {
        if ($tfline =~ /^HETATM/) {
          $str1 = substr($tfline, 6, 11);
          $str2 = substr($tfline, -61);
          print DOM "ATOM  $str1\MET$str2";
        }
        else { print DOM "$tfline"; }
      }
    }
    print DOM "END\n";
    close(DOM);
    close(TFA); @all_tfa = ();
    `rm pdbtemp1`;
    `mv $newdom.pdb new_domain_str/.`;
    `mv $newdom.ca new_domain_str/.`;
  }
}
### END OF SUB_other_new_cut


##########
#
# subroutine use: SUB_y_resolve_myrange ( $range )
# script: y_resolve_myrange.pl
# - function: input e.g.: $range = "B:12.B:77";
#             output: an array of 3 elements: [0]="B", the chain_id; [1]=12, the starting position(residue); [2]=77, the ending position (residue).

sub SUB_y_resolve_myrange {
  $zrange = $_[0];
  @z1tmp = split /\./, $zrange;
  $zfirst = $z1tmp[0];
  $zsecond = $z1tmp[1];
  @z2tmp = split /:/, $zfirst;
  $zchain = $z2tmp[0];
  $zstart = $z2tmp[1];
  @z3tmp = split /:/, $zsecond;
  $zend = $z3tmp[1];
  $retmess12 = "$zchain\t$zstart\t$zend\n";
  @z1tmp = (); @z2tmp = (); @z3tmp = ();
  return $retmess12;
}
### END OF SUB_y_resolve_myrange  


##########
#
# subroutine use: SUB_y_7digit2domainName ( $my_7digit )
# script: y_7digit2domainName.pl
# - return: the SCOP_domain_name that $my_7digit is in. (e.g. b.34.2.1)

sub SUB_y_7digit2domainName {
  $my_7digit = $_[0];
  open( DIR, "$lib_scop_dircla" );
  while(<DIR>){
    @ztmp = split /\t/;
    if( $ztmp[0] eq $my_7digit ){ return $ztmp[3]; }
  }
}
### END OF SUB_y_7digit2domainName


##########
#
# subroutine use: SUB_convert_range_res_to_pos ($dompath, $resrange)
# script: convert_range_res_to_pos.pl
# - converts range for query domain from pdb-residue-name format (A:1.A:100) to position format 1.100

sub SUB_convert_range_res_to_pos {
  $dompath2 = @_[0];
  $resrange2 = @_[1];
  if (-e "$dompath2" == 1) {
    open(DOM, "$dompath2");
    @all_dom = <DOM>;
    @zar0 = split/\./, $resrange2;
    if ($zar0[0] =~ /\(/) {
      @zpar1a = split/\(/, $zar0[0];
      @zpar1b = split/\)/, $zpar1a[1];
      $zar0[0] = $zpar1b[0];
      $zn1par = 1;
    }
    else { $zn1par = 0; }
    if ($zar0[1] =~ /\(/) {
      @zpar2a = split/\(/, $zar0[1];
      @zpar2b = split/\)/, $zpar2a[1];
      $zar0[1] = $zpar2b[0];
      $zn2par = 1;
    }
    else { $zn2par = 0; }
    @zar0a = split/\:/, $zar0[0];
    $zchain1 = $zar0a[0];
    $zfirstres = $zar0a[1];
    @zar0b = split/\:/, $zar0[1];
    $zchain2 = $zar0b[0];
    $zlastres = $zar0b[1];
    if ($zchain1 eq "_") { $zchain1 = " "; }
    if ($zchain2 eq "_") { $zchain2 = " "; }
    $zct = 0;
    foreach $zline (@all_dom) {
      next unless (substr($zline, 13, 2) eq "CA");
      next unless ($zline =~ /^ATOM/ | $zline =~ /^HETATM/);
      $zct += 1;
      $zthisres = substr($zline, 22, 6);
      $zthisres =~ s/ //g;
      if ($zchain1 eq substr($zline, 21, 1)) {
        if ($zthisres eq $zfirstres) { $zn1 = $zct; }
      }
      if ($zchain2 eq substr($zline, 21, 1)) {
        if ($zthisres eq $zlastres) { $zn2 = $zct; }
      }
    }
    $retmess13 = "";
    if ($zn1par == 1) { $retmess13 .= "($zn1)."; }
    else { $retmess13 .= "$zn1."; }
    if ($zn2par == 1) { $retmess13 .= "($zn2)"; }
    else { $retmess13 .= "$zn2"; }
    close(DOM); @all_dom = ();
  }
  return $retmess13;
}
### END OF SUB_convert_range_res_to_pos


##########
#
# subroutine use: SUB_parseSCOPdomainRange_1 ($chain_id, $SCOP_range)
# script: parseSCOPdomainRange_1.pl
# - function: parse the residue range (column 5) in scop_1.6~.tab, and compare the chain_id with the input chain_id. if the same chain, return the parsed residue range; if not the same chain, return 0.
# - return: if the SCOP_range chain_id and the input chain_id are not the same, return 0.
#           if the chain_id's are the same, see below:
#              if a whole chain (or the whole chain is part of the SCOP_domain, e.g. 1i7c_A), return = 1;
#              if there is a residue range, return element[0] = 2, element[1] = "resid1.resid2";
#              and if there are more than 1 range, return element[2] = "resid3.resid4".
#           if no chainn name, shown as (compare with) "_".

sub SUB_parseSCOPdomainRange_1 {
  $mychain_id = $_[0];
  $scop_range = $_[1];
  @sprange = ();
  $special_case_1 = -1;
  if( $scop_range =~ /,/ ){
    @rs = split /,/, $scop_range;
    foreach $r1 (@rs){
      if( $r1 =~ /:/ ){
        $ch1 = $`;
        $reg1 = $';
        if( $reg1 eq '' ){ $special_case_1 = 1; }
        else{
          $special_case_1 = 0;
          @tmp = split /-/, $reg1;
          if( substr( $reg1, 0, 1 ) ne '-' ){
            $s1 = $tmp[0];
            $e1 = $tmp[1];
          }
          else {
     	    $s1 = '-'.$tmp[1];
  	    $e1 = $tmp[2];
          }
        }
      }
      else{
        @tmp = split /-/, $r1;
        $ch1 = '_';
        if( substr( $r1, 0, 1 ) ne '-' ){
  	  $s1 = $tmp[0];
	  $e1 = $tmp[1];
        }
        else{
 	  $s1 = '-'.$tmp[1];
	  $e1 = $tmp[2];
        }
      }
      if( ($special_case_1 == 1) && ($mychain_id eq $ch1) ) { return "1"; }
      if( $mychain_id eq $ch1 ){ @sprange = ( @sprange, "$s1.$e1" ); }
    }
    if( $mychain_id eq $ch1 ){ 
      $sb = "2";
      for $k (0..$#sprange){ $sb .= "\t$sprange[$k]"; }
      $sb .= "\n";
      return $sb;
    }
    else { return "0"; }
  }
  elsif( $scop_range =~ /:/ ){
    @tmp = split /:/, $scop_range;
    if( $#tmp == 1){
      $ch = $tmp[0];
      @tmpa = split /-/, $tmp[1];
      if( substr( $tmp[1], 0, 1) ne '-' ){
        $s = $tmpa[0];
	$e = $tmpa[1];
      }
      else{
	$s = '-'.$tmpa[1];
	$e = $tmpa[2];
      }
      if( $mychain_id eq $ch ){  $sb = "2\t$s.$e\n"; return $sb; }
      else { return "0"; }
    }
    elsif( $#tmp == 0 ){
      $ch = $tmp[0];
      if( $mychain_id eq $ch ){ return "1"; }
      else { return "0"; }
    }
    else { $sb = "ERROR: in range: no ',' and ':' split more than 2 cases\n       $scop_range\n"; return $sb; }
  }
  else {
    @tmpb = split /-/, $scop_range;
    if( $#tmpb == -1 ){
      $ch = '_';
      if( $mychain_id eq $ch ){ return "1"; }
      else { return "0"; }
    }
    elsif ( $#tmpb == 1 ){
      $ch = '_';
      $s = $tmpb[0];
      $e = $tmpb[1];
      if( $mychain_id eq $ch ){ $sb = "2\t$s.$e\n"; return $sb; }
      else { return "0"; }
    }
    elsif ( $#tmpb == 2 ){
      $ch = '_';
      $s = '-'.$tmpb[1];
      $e = $tmpb[2];
      if( $mychain_id eq $ch ){ $sb = "2\t$s.$e\n"; return $sb; }	
      else { return "0"; }
    }
    else { $sb = "ERROR: in range: no ',' and no ':', '-' split more than 3 cases\n       $scop_range\n"; return $sb; }
  }
}
### END OF SUB_parseSCOPdomainRange_1


##########
#
# subroutine use: SUB_get_br_aln_specific ( $thisdomid )
# script: get_br_aln_specific.pl
# - make multiple alignment from PSI-BLAST results

sub SUB_get_br_aln_specific {
  my $domainid = @_[0];
  $dvar = `cat $datedir/$domainid.blast`;
  @all_br = split/\n/, $dvar;
  open(OUT, ">new_domain_aln/$domainid.br.aln");
  $round = 0;
  $flag_s = -1;
  for ($i=0; $i!=($#all_br+1); $i++) {
    if ($flag_s > -1) { last if ($all_br[$i] =~ /\!\^\(NEW\_FILE\)\:/); next unless ($all_br[$i] =~ /^Results from round/); $round += 1; }
    else { if ($all_br[$i] =~ /\!\^\(NEW\_FILE\)\: $domainid\-nr\.br/) { $flag_s = $i; } }
  }
  if ($round == 0) { close(OUT); $dvar = ""; @all_br = (); return; }
  $findround = 0;
  $start = 0;
  $hitct = 0;
  @list[0] = "QUERY";
  $chunkct = -1;
  for ($i=$flag_s; $i!=($#all_br+1); $i++) {
    if ($findround == $round) {
      if ($start == 0) { if (@all_br[$i] =~ /\|/) { $start += 1; } }
      if ($start == 1) {
        if (@all_br[$i] =~ /^QUERY/) { $startpt = $i; last; }
        next unless (@all_br[$i] =~ /\|/);
        $hitct += 1;
        @ar = split/\|/, @all_br[$i];
        if (@ar[1] eq "") { @ar2 = split/ /, @ar[2]; $list[$hitct] = @ar2[0]; }
        else {
          if (@ar[1] =~ / /) { @anar = split/ /, @ar[1]; $list[$hitct] = @anar[0]; }
          else {
            $list[$hitct] = @ar[1];
            if (@all_br[$i] =~ /^pdb/) {
              $pdbchainid = substr(@ar[2], 0, 1);
              if ($pdbchainid ne " ") {
                $list[$hitct] .= "_";
                $list[$hitct] .= "$pdbchainid";
              }
            }
          }
        }
      }
    }
    if (@all_br[$i] =~ /^Results from round/) {$findround += 1;}
  }
  $newhitct = -1;
  for ($i=$startpt; $i!=($#all_br+1); $i++) {
    last if ($all_br[$i] eq "" | $all_br[$i] =~ /^Searching/ | $all_br[$i] =~ /^  Database: nr/ | $all_br[$i] =~ /\!\^\(NEW\_FILE\)/);
    $newhitct += 1;
    @ar4 = split/ /, @all_br[$i];
    $f1 = 0;
    for ($j=0; $j!=($hitct+1); $j++) { if (@list[$j] =~ /@ar4[0]/) { $brhits[0][$i-$startpt] = @list[$j]; $f1 = 1; last; } }
    if ($f1 == 0) { for ($j=0; $j!=($hitct+1); $j++) { if (@ar4[0] =~ /@list[$j]/) { $brhits[0][$i-$startpt] = @list[$j]; $f1 = 1; last; } } }
    if ($f1 == 0) { $brhits[0][$i-$startpt] = "?"; }
  }
  for ($i=0; $i!=($newhitct+1); $i++) { $brhits[0][$i] .= "/"; }
  for ($i=$startpt; $i!=($#all_br+1); $i++) {
    if (@all_br[$i] =~ /^QUERY/) { $pt = $i; $chunkct += 1; }
    next if ($all_br[$i] eq "");
    last if ($all_br[$i] =~ /^Searching/ | $all_br[$i] =~ /^  Database: nr/ | $all_br[$i] =~ /\!\^\(NEW\_FILE\)/);
    chomp $all_br[$i];
    @ar3 = split/ /, @all_br[$i];
    $nonemptyct = 0;
    for ($j=0; $j!=($#ar3+1); $j++) { if (@ar3[$j] ne "") { $nonemptyct += 1; } }
    if ($nonemptyct != 4) {
      $brhits[2][$i-$pt] = "?";
      $brhits[1][$i-$pt] = "?";
      $seq[$chunkct][$i-$pt] = @ar3[$#ar3];
    }
    else {
      $brhits[2][$i-$pt] = @ar3[$#ar3];
      $seq[$chunkct][$i-$pt] = @ar3[$#ar3-1];
      if ($chunkct == 0 | $brhits[1][$i-$pt] eq "?") { for ($j=1; $j!=($#ar3+1); $j++) { if (@ar3[$j] ne "") { $brhits[1][$i-$pt] = @ar3[$j]; last; } } }
    }
  }
  for ($j=0; $j!=($newhitct+1); $j++) {
    $hitid[$j] = $brhits[0][$j];
    $hitid[$j] .= "$brhits[1][$j]-$brhits[2][$j]";
  }
  for ($j=0; $j!=($chunkct+1); $j++) {
    for ($k=0; $k!=($newhitct+1); $k++) { printf OUT "%-25s   %s\n", $hitid[$k], $seq[$j][$k]; }
    print OUT "\n";
  }
  close(OUT);
  $dvar = ""; @all_br = (); @hitid = (); @brhits = (); @seq = (); @list = ();
}
### END OF SUB_get_br_aln_specific


##########
#
# subroutine use: SUB_get_10res_pts_newpsi
# script: get_10res_pts_newpsi.pl
# - NOTE: do not need 10res_pts_newpsi file (alignments adjusted so columns corresponding to gaps in query seq are deleted)
# - do still need queryseqs_newpsi though!

sub SUB_get_10res_pts_newpsi {
  opendir(DIR, "new_domain_aln");
  @allfiles = readdir DIR;
  open(OUT, ">>query_$date");
  print OUT "!^(NEW_FILE): queryseqs_newpsi\n";
  foreach $file (@allfiles) {
    next unless ($file =~ /\.br\.aln/);
    open(THIS, "new_domain_aln/$file");
    @all_this = <THIS>;
    print OUT "$file\t";
    for ($i=0; $i!=($#all_this+1); $i++) {
      next unless (@all_this[$i] =~ /^QUERY/);
      chomp @all_this[$i];
      @ar = split/ /, @all_this[$i];
      print OUT "@ar[$#ar]";
    }
    print OUT "\n";
    close(THIS);
  }
  close(OUT);
  closedir(DIR);
  @all_this = (); @allfiles = ();
}
### END OF SUB_get_10res_pts_newpsi


##########
#
# subroutine use: SUB_make_inf_file ( $rep )
# script: make_inf_file.pl

sub SUB_make_inf_file {
  $nline = @_[0];
  $dvar0 = `cat $nline.compass`;
  @all_compin = split/\n/, $dvar0;
  $dvar0 = "";
  open(COMPOUT, ">$nline.compass2");
  print COMPOUT "!^(NEW_FILE): $nline.inf\n";
  $stopcomp = 0;
  for ($m=0; $m!=1000000; $m++) {
    @comppair = ();
    for ($n=1; $n!=($#all_compin+1); $n++) {
      if ($n==$#all_compin) { $cpt = $n+1; $stopcomp = 1; last; }
      if ($all_compin[$n] =~ /^Ali1\:/) { $cpt = $n; last; }
    }
    @comppair = splice(@all_compin, 0, $cpt);
    @compar0 = split/Ali2\: /, @comppair[0];
    @compar1 = split/\/new_domain_aln\//, $compar0[0];
    $querydom = substr($compar1[1], 0, 6);
    if ($compar0[1] =~ /\//) {
      @compar2 = split/\//, $compar0[1];
      $scopdom = substr($compar2[$#compar2], 0, 7);
    }
    else { $scopdom = substr($compar0[1], 0, 7); }
    $g1 = `grep $scopdom $replist`; chomp $g1;
    if ($g1 eq "") { last if ($stopcomp == 1); next; }
    $foroutput = "";
    @car1 = split/=/, $comppair[2];
    @car3 = split/\t/, $car1[3];
    $length = $car3[0];
    @car7 = split/\t/, $car1[1];
    $newlength = $car7[0];
#find the compass e-value
    foreach $eline (@comppair) {
      next unless ($eline =~ /Evalue =/);
      chomp $eline;
      @evar = split/Evalue =/, $eline;
      @evar[1] =~ s/ //g;
      $compassevalue = @evar[1];
    }
#count residues in scop domain seq of compass alignment
    $pstart = 0;
    $thisi = 0;
    $ct_p = 0;
    $lead = "null";
    for ($a=0; $a!=($#comppair+1); $a++) {
      if ($pstart == 1) {
        last if ($thisi > $#comppair);
        next unless ($a == $thisi);
        @car2 = split/\s+/, @comppair[$a];
        if ($lead eq "null") { $lead = $car2[1]; }
        for ($b=0; $b!=101; $b++) {
          $this_str = substr($car2[$#car2], $b, 1);
          last if ($this_str eq "");
#include these gaps in count: . and -
#exclude these gaps in count: = and ~
          next if ($this_str eq "=" | $this_str eq "~");
          $ct_p += 1;
        }
        $thisi += 5;
      }
      else { if ($comppair[$a] eq "") { $pstart = 1; $thisi = $a+3; } }
    }
    $lead -= 1;
    $tail_length = $length-$lead-$ct_p;
    if ($tail_length < 0) { $tail_length = 0; }
    $keep = 1;
    if ($lead > 10) {$keep = 0;}
    if ($tail_length > 10) { $keep = 0; }
    $percent = sprintf("%.4f", ($ct_p/$length));
#count residues in new (query) seq of compass alignment
    $pstart = 0;
    $thisi = 0;
    $newct_p = 0;
    $newlead = "null";
    for ($a=0; $a!=($#comppair+1); $a++) {
      if ($pstart == 1) {
        last if ($thisi > $#comppair);
        next unless ($a == $thisi);
        @car6 = split/\s+/, $comppair[$a];
        if ($newlead eq "null") { $newlead = $car6[1]; }
        for ($b=0; $b!=101; $b++) {
          $this_str = substr(@car6[$#car6], $b, 1);
          last if ($this_str eq "");
          next if ($this_str eq "=" | $this_str eq "~");
          $newct_p += 1;
        }
        $thisi += 5;
      }
      else { if ($comppair[$a] eq "") { $pstart = 1; $thisi = $a+1; } }
    }
    $newlead -= 1;
    $newtail_length = $newlength-$newlead-$newct_p;
    if ($newtail_length < 0) { $newtail_length = 0; }
    $scop_Nbound_prof = $lead+1;
    $scop_Cbound_prof = $length - $tail_length;
    $new_Nbound_prof = $newlead+1;
    $new_Cbound_prof = $newlength - $newtail_length;
    $g1 = `grep $scopdom $replist | wc -l`; chomp $g1;
    if ($g1 != 0) {
      if ($keep == 1) { $compcover = "yes"; }
      else { $compcover = $percent; }
# no need to convert "profile positions" to "sequence positions" -- these should be equivalent because columns correpsonding to gaps in query sequence were removed from mulitple alignment
      print COMPOUT "$nline.X.$scopdom\t$compassevalue\t$compcover\t$newlead,$new_Nbound_prof-$new_Cbound_prof,$newtail_length\t$lead,$scop_Nbound_prof-$scop_Cbound_prof,$tail_length\n";
    }
    last if ($stopcomp == 1);
  }
  close(COMPOUT);
  @all_compin = (); @comppair = ();
}
### END OF SUB_make_inf_file


##########
#
# subroutine use: SUB_get_wholerange ( $repcafile )
# script: get_wholerange.pl

sub SUB_get_wholerange {
  $repcafile = @_[0];
  open(CAFILE, "$repcafile");
  @in_s = <CAFILE>;
  $firstres = "null";
  foreach $cline (@in_s) {
    next unless (substr($cline, 13, 2) eq "CA");
    next unless ($cline =~ /^HETATM/ | $cline =~ /^ATOM/);
    if ($firstres eq "null") {
      $firstres = substr($cline, 22, 6);
      $firstres =~ s/ //g;
      $firstchain = substr($cline, 21, 1);
      next;
    }
    else {
      $lastres = substr($cline, 22, 6);
      $lastres =~ s/ //g;
      $lastchain = substr($cline, 21, 1);
    }
  }
  if ($firstchain eq " ") { $firstchain = "_"; }
  if ($lastchain eq " ") { $lastchain = "_"; }
  $retrange = "$firstchain:$firstres.$lastchain:$lastres";
  close(CAFILE); @in_s = ();
  return $retrange;
}
### END OF SUB_get_wholerange


##########
#
# subroutine use: SUB_get_repeats_inf ( $newdom )
# script: get_repeats_inf.pl

sub SUB_get_repeats_inf {
  $newdom = @_[0];
  $retmess15 = "";
  for ($i=1; $i!=17; $i++) {
    open(REPUNITS, "$path_bin/REPEATS/units.$i.n.2");
    @all_repu = <REPUNITS>;
    foreach $uline (@all_repu) {
      chomp $uline;
      @ar0 = split/\t/, $uline;
      @ar1 = split/ /, $ar0[0];
      $grepln = `grep "$newdom.X.$ar1[0]" $newdom.compass2`; chomp $grepln;
      @ar2a = split/\t/, $grepln;
#get evalue of the comparison
      $cmp_evalue = $ar2a[1];
#get query range
      @ar3a = split/\,/, $ar2a[3];
      @ar3b = split/\-/, $ar3a[1];
      $query_n = $ar3b[0];
      $query_c = $ar3b[1];
#get scop range
      @ar4a = split/\,/, $ar2a[4];
      @ar4b = split/\-/, $ar4a[1];
      $scop_n = $ar4b[0];
      $scop_c = $ar4b[1];
#find how many repeat units are covered from the scop domain
      $repct = 0;
      for ($j=1; $j!=($#ar1+1); $j++) {
        @ar5 = split/\./, $ar1[$j];
        if ($ar5[0] >= $scop_n && $scop_c >= $ar5[1]) { $repct += 1; }
      }
      $scoprangesize = $ar4b[1] - $ar4b[0] + 1;
      if ($repct == 0) {
        @ar6 = split/ /, $ar0[1];
        @ar7 = split/\./, $ar6[1];
        if ($scoprangesize >= $ar6[0] && $scop_n >= $ar7[0] && $ar7[1] >= $scop_c) { $repct = 0.5; }
      }
      $retmess15 .= "$newdom.X.$ar1[0]\t$cmp_evalue\t$repct\t$query_n.$query_c\t$scop_n.$scop_c\n";
    }
    close(REPUNITS); @all_repu = ();
    $retmess15 .= "\n";
  }
  return $retmess15;
}
### END OF SUB_get_repeats_inf


##########
#
# subroutine use: SUB_find_repeats_hits ( $newdom )
# script: find_repeats_hits.pl

sub SUB_find_repeats_hits {
  $newdom = @_[0];
  $retmess16 = "";
  open(IN, "$newdom.compass2");
  @in = <IN>;
  $ct1 = -1;
  $hitar[0][0] = "null";
  $pstart = 0;
  foreach $line (@in) {
    if ($pstart == 1) {
      last if ($line =~ /\!\^\(NEW/);
      if ($line eq "\n" && $ct1 != -1) {
        $checkline = "$hitar[0][3]";
        for ($i=1; $i!=($ct1+1); $i++) { $checkline .= " $hitar[$i][3]"; }
        $again = 1;
        while ($again == 1) {
          @newrs = split/ /, $checkline;
          $newct = $#newrs;
          if ($newct == 0) { $again = 0; $finalcheckline = $checkline; }
          else {
            LP_1: for ($i=0; $i!=($newct+1); $i++) {
              @ar3 = split/\./, $newrs[$i];
              for ($j=($i+1); $j!=($newct+1); $j++) {
                @ar4 = split/\./, $newrs[$j];
                if ($ar4[0] >= $ar3[0] && $ar3[1] >= $ar4[1]) { 
                  $newcheckline = "$ar3[0].$ar3[1]";
                  for ($m=0; $m!=($newct+1); $m++) { next if ($m == $i | $m == $j); $newcheckline .= " $newrs[$m]"; }
                }
                if ($ar4[0] < $ar3[0] && $ar3[1] < $ar4[1]) { 
                  $newcheckline = "$ar4[0].$ar4[1]";
                  for ($m=0; $m!=($newct+1); $m++) { next if ($m == $i | $m == $j); $newcheckline .= " $newrs[$m]"; }
                }
                if ($ar4[0] >= $ar3[0] && $ar3[1] >= $ar4[0] && $ar3[1] < $ar4[1]) { 
                  $newcheckline = "$ar3[0].$ar4[1]";
                  for ($m=0; $m!=($newct+1); $m++) { next if ($m == $i | $m == $j); $newcheckline .= " $newrs[$m]"; }
                }
                if ($ar3[1] >= $ar4[1] && $ar4[1] >= $ar3[0] && $ar4[0] < $ar3[0]) { 
                  $newcheckline = "$ar4[0].$ar3[1]";
                  for ($m=0; $m!=($newct+1); $m++) { next if ($m == $i | $m == $j); $newcheckline .= " $newrs[$m]"; }
                }
                if ($ar4[1] < $ar3[0] | $ar3[1] < $ar4[0]) { $newcheckline = $checkline; }
                if ($newcheckline ne $checkline) { $checkline = $newcheckline; last LP_1; }
                else { if ($i == ($newct - 1) && $j == $newct) { $again = 0; $finalcheckline = $newcheckline; } }
              }
            }
          }
        }
        $querydom = substr($ar2[0], 0, 6);
        $retmess16 .= "$querydom\t";
        @ar5 = split/ /, $finalcheckline;
        for ($k=0; $k!=($#ar5+1); $k++) {
          $hitqueryrangeres = SUB_convert_range_pos_to_res ("new_domain_str/$querydom.ca", "$ar5[$k]");
          $retmess16 .= "$hitqueryrangeres";
          if ($k != $#ar5) { $retmess16 .= ", "; }
        }
        $wholequeryrange = SUB_get_wholerange ( "new_domain_str/$querydom.ca" );
        $grepline = `grep $hitar[0][0] $lib_scop_dircla`;
        @grepar1 = split/\t/, $grepline;
        $retmess16 .= "\t\t$wholequeryrange\t\t$grepar1[3]\t\t\t\t\t\t\t\t5\n";
        $ct1 = -1;
      }
      else {
        chomp $line;
        @ar1 = split/\t/, $line;
        if ($ar1[1] < 1e-10 && $ar1[2] > 0) {
          $ct1 += 1;
          @ar2 = split/\.X\./, $ar1[0];
          $hitar[$ct1][0] = $ar2[1];
          $hitar[$ct1][1] = $ar1[1];
          $hitar[$ct1][2] = $ar1[2];
          $hitar[$ct1][3] = $ar1[3];
          $hitar[$ct1][4] = $ar1[4];
        }
      }
    }
    else { if ($line =~ /\!\^\(NEW\_FILE\)\: $newdom.repeatinf/) { $pstart = 1; } }
  }
  close(IN); @in = (); @hitar = ();
  return $retmess16;
}
### END OF SUB_find_repeats_hits


##########
#
# subroutine use: SUB_find_pass_fragments ( $rep )
# script: find_pass_fragments.pl
# - finds scop domains that pass evalue cutoff (1e-10) and alignment covers all but 10 residues at the ends of the QUERY domain

sub SUB_find_pass_fragments {
  $rep = @_[0];
  open(IN, "$rep.compass2");
  @in = <IN>;
  $retmess17 = "";
  $pstart = 0;
  for ($i=0; $i!=($#in+1); $i++) {
    if ($pstart == 1) {
      last if ($in[$i] =~ /\!\^\(NEW\_FILE/);
      chomp $in[$i];
      @ar0 = split/\t/, $in[$i];
      next if ($ar0[1] > 1e-10 | $ar0[1] eq "");
      @ar1 = split/\,/, $ar0[3];
      $lead = $ar1[0];
      $tail = $ar1[2];
      next if ($lead > 10 | $tail > 10);
      @ar5 = split/\.X\./, $ar0[0];
      $querydom = substr($ar5[0], 0, 6);
      $scopdom = substr($ar5[1], 0, 7);
      @ar2a = split/\-/, $ar1[1];
      $querynumrange = "$ar2a[0].$ar2a[1]";
      $queryresrange = SUB_convert_range_pos_to_res ( "new_domain_str/$querydom.ca", "$querynumrange");
      $wholequeryrange = SUB_get_wholerange ( "new_domain_str/$querydom.ca" );
      @ar2b = split/\,/, $ar0[4];
      @ar2c = split/\-/, $ar2b[1];
      $scopnumrange = "$ar2c[0].$ar2c[1]";
      $scopresrange = SUB_convert_range_pos_to_res ( "$path_str/$scopdom.ca", "$scopnumrange");
      $wholescoprange = SUB_get_wholerange ( "$path_str/$scopdom.ca" );
      $grepline1 = `grep $scopdom $lib_scop_dircla`;
      @grepar1 = split/\t/, $grepline1;
      $superfamid = $grepar1[3];
      $retmess17 .= "$querydom\t$queryresrange\t\t$wholequeryrange\t$scopdom\t$superfamid\t$scopresrange\t\t$wholescoprange\t$ar0[1]\t\t\t\t5\n";
    }
    else { if ($in[$i] =~ /\!\^\(NEW\_FILE\)\: $rep.inf/) { $pstart = 1; } }
  }
  close(IN); @in = ();
  return $retmess17;
}
### END OF SUB_find_pass_fragments


##########
#
# subroutine use: SUB_get_protnames ()
# script: get_protnames.pl

sub SUB_get_protnames {
  open(IN1, "pdb_$date/blstclst.fa.bc");
  @in1 = <IN1>;
  $ct = -1;
  @domlist[0] = "null";
  $pstart = 0;
  foreach $line (@in1) {
    if ($pstart == 1) {
      last if ($line =~ /\!\^\(NEW/);
      chomp $line;
      @ar0 = split/ /, $line;
      for ($i=0; $i!=($#ar0+1); $i++) {
        $ct += 1;
        @domlist[$ct] = $ar0[$i];
        @protlist[$ct] = "null";
        $pdbid = substr($ar0[$i], 0, 4);
        $chain = substr($ar0[$i], 5, 1);
        if (-e "$pdbid.pdb" == 0) { 
          if ($input_type eq "NEW") { `cp $currdir/SAVE.$pdbid.SAVE $pdbid.pdb`; }
          else { `$path_bin/pdbcp.pl $pdbid`; }
        }
        open(PDB, "$pdbid.pdb");
        @all_pdb = <PDB>;
        LP_2: for ($j=0; $j!=($#all_pdb+1); $j++) {
          next unless ($all_pdb[$j] =~ /^COMPND/);
          next unless ($all_pdb[$j] =~ /MOLECULE\:/);
          last if ($all_pdb[$j] =~ /^ATOM/);
          chomp $all_pdb[$j];
          @ar1 = split/MOLECULE\: /, $all_pdb[$j];
          if ($chain eq "_") { @protlist[$ct] = @ar1[1]; last; }
          else {
            if ($all_pdb[$j+1] =~ /CHAIN\:/) { $chainline = $all_pdb[$j+1]; }
            elsif ($all_pdb[$j+1] !~ /CHAIN\:/ && $all_pdb[$j+2] =~ /CHAIN\:/) { $chainline = $all_pdb[$j+2]; }
            elsif ($all_pdb[$j+1] !~ /CHAIN\:/ && $all_pdb[$j+2] !~ /CHAIN\:/ && $all_pdb[$j+3] =~ /CHAIN\:/) { $chainline = $all_pdb[$j+3]; }
            elsif ($all_pdb[$j+1] !~ /CHAIN\:/ && $all_pdb[$j+2] !~ /CHAIN\:/ && $all_pdb[$j+3] !~ /CHAIN\:/ && $all_pdb[$j+4] =~ /CHAIN\:/) { $chainline = $all_pdb[$j+4]; }
            elsif ($all_pdb[$j+1] !~ /CHAIN\:/ && $all_pdb[$j+2] !~ /CHAIN\:/ && $all_pdb[$j+3] !~ /CHAIN\:/ && $all_pdb[$j+4] !~ /CHAIN\:/ && $all_pdb[$j+5] =~ /CHAIN\:/) { $chainline = $all_pdb[$j+5]; }
            else { $chainline eq "null"; }
            if ($chainline ne "null") {
              @ar2 = split/CHAIN\: /, $chainline;
              @ar3a = split/\n/, $ar2[1];
              if ($ar3a[0] =~ /\;/) { @ar3 = split/\;/, $ar3a[0]; }
              else { $ar3[0] = $ar3a[0]; $ar3[0] =~ s/ //g; }
              if ($ar3[0] =~ /\,/) {
                @ar4 = split/\,/, $ar3[0];
                for ($k=0; $k!=($#ar4+1); $k++) {
                  $ar4[$k] =~ s/ //g;
                  if ($ar4[$k] eq $chain) { @protlist[$ct] = @ar1[1]; last LP_2; }
                }
              }
              else { if ($ar3[0] eq $chain) { @protlist[$ct] = @ar1[1]; last LP_2; } }
            }
          }
        }
        close(PDB); @all_pdb = ();
        if ($protlist[$ct] eq "null") { $protlist[$ct] = ""; }
      }
    }
    else { if ($line =~ /\!\^\(NEW\_FILE\)\: $date.newpdb.list.bc/ && $line !~ /\.bc\.pre/) { $pstart = 1; } }
  }
  close(IN1); @in1 = ();
#now do the same thing for any fragments (because these are not included in ~.newpdb.list.bc)
  foreach $line (@frag_list) {
    chomp $line;
    $ct += 1;
    @domlist[$ct] = substr($line, 0, 6);
    @protlist[$ct] = "null";
    $pdbid = substr($line, 0, 4);
    $chain = substr($line, 5, 1);
    if (-e "$pdbid.pdb" == 0) {
      if ($input_type eq "NEW") { `cp $currdir/SAVE.$pdbid.SAVE $pdbid.pdb`; }
      else { `$path_bin/pdbcp.pl $pdbid`; }
    }
    open(PDB, "$pdbid.pdb");
    @all_pdb = <PDB>;
    LP_3: for ($j=0; $j!=($#all_pdb+1); $j++) {
      next unless ($all_pdb[$j] =~ /^COMPND/);
      next unless ($all_pdb[$j] =~ /MOLECULE\:/ | $all_pdb[$j] =~ /FRAGMENT\:/);
      last if ($all_pdb[$j] =~ /^ATOM/);
      chomp $all_pdb[$j];
      if ($all_pdb[$j] =~ /MOLECULE\:/) { @ar1 = split/MOLECULE\: /, $all_pdb[$j]; }
      else { @ar1 = split/FRAGMENT\: /, $all_pdb[$j]; }
      if ($chain eq "_") { @protlist[$ct] = @ar1[1]; last; }
      else {
        if ($all_pdb[$j+1] =~ /CHAIN\:/) { $chainline = $all_pdb[$j+1]; }
        elsif ($all_pdb[$j+1] !~ /CHAIN\:/ && $all_pdb[$j+2] =~ /CHAIN\:/) { $chainline = $all_pdb[$j+2]; }
        elsif ($all_pdb[$j+1] !~ /CHAIN\:/ && $all_pdb[$j+2] !~ /CHAIN\:/ && $all_pdb[$j+3] =~ /CHAIN\:/) { $chainline = $all_pdb[$j+3]; }
        elsif ($all_pdb[$j+1] !~ /CHAIN\:/ && $all_pdb[$j+2] !~ /CHAIN\:/ && $all_pdb[$j+3] !~ /CHAIN\:/ && $all_pdb[$j+4] =~ /CHAIN\:/) { $chainline = $all_pdb[$j+4]; }
        elsif ($all_pdb[$j+1] !~ /CHAIN\:/ && $all_pdb[$j+2] !~ /CHAIN\:/ && $all_pdb[$j+3] !~ /CHAIN\:/ && $all_pdb[$j+4] !~ /CHAIN\:/ && $all_pdb[$j+5] =~ /CHAIN\:/) { $chainline = $all_pdb[$j+5]; }
        else { $chainline eq "null"; }
        if ($chainline ne "null") {
          @ar2 = split/CHAIN\: /, $chainline;
          @ar3a = split/\n/, $ar2[1];
          if ($ar3a[0] =~ /\;/) { @ar3 = split/\;/, $ar3a[0]; }
          else { @ar3[0] = $ar3a[0]; $ar3[0] =~ s/ //g; }
          if ($ar3[0] =~ /\,/) {
            @ar4 = split/\,/, $ar3[0];
            for ($k=0; $k!=($#ar4+1); $k++) {
              $ar4[$k] =~ s/ //g;
              if ($ar4[$k] eq $chain) { @protlist[$ct] = @ar1[1]; last LP_3; }
            }
          }
          else { if ($ar3[0] eq $chain) { @protlist[$ct] = @ar1[1]; last LP_3; } }
        }
      }
    }
    close(PDB); @all_pdb = ();
    if ($protlist[$ct] eq "null") { $protlist[$ct] = ""; }
  }
  open(OUT, ">>query_$date");
  print OUT "!^(NEW_FILE): protlist.$date\n";
  for ($i=0; $i!=($ct+1); $i++) { print OUT "$domlist[$i]\t$protlist[$i]\n"; }
  close(OUT);
  `rm -f *.pdb`;
  @domlist = (); @protlist = ();
}
### END OF SUB_get_protnames


##########
#
# subroutine use: SUB_find_zbc_scores ( $rep )
# script: find_zbc_scores.pl
# - get z-score, blsm score, pct coverage, make ~.mpa file, and find fold level assignments

sub SUB_find_zbc_scores {
  $rep = @_[0];
  $mamzbc = "";
  $mpapairs = "";
  $foldpairs = "";
  @mamtotal = ();
  $dumptovar1 = `cat $rep.mammoth`;
  @mamtotal = split/\n/, $dumptovar1;
  $dumptovar1 = "";
  $stopm = 0;
  for ($i=0; $i!=1000000; $i++) {
    @allmam = ();
    for ($j=1; $j!=($#mamtotal+1); $j++) {
      if ($j==$#mamtotal) { $mpt = $j+1; $stopm = 1; last; }
      if ($mamtotal[$j] =~ /^ Predicted path/) { $mpt = $j; last; }
    }
    @allmam = splice(@mamtotal, 0, $mpt);
    $zscore = "error"; $seq1 = ""; $seq2 = ""; $new = 0; $len = 0; $scopdom = "";
    $skipnext = 0;
    for ($m=0; $m!=($#allmam+1); $m++) {
      if ($skipnext == 1) { $skipnext = 0; next; }
      if ($allmam[$m] =~ /^\=\=\> EXPERIMENT\:/) {
        chomp $allmam[$m+2]; @ar1a = split/\//, $allmam[$m+2]; @ar1b = split/\.ca/, $ar1a[$#ar1a]; $scopdom = $ar1b[0];
        chomp $allmam[$m+3]; @ar1c = split/\:/, $allmam[$m+3]; $ar1c[1] =~ s/ //g; $len = $ar1c[1];
      }
      if ($allmam[$m] =~ /^Z\-score\=/) { $zscore = substr($allmam[$m], 8, 20); $zscore =~ s/ //g; }
      if ($allmam[$m] =~ /^Prediction/) {
        $skipnext = 1;
        chomp $allmam[$m]; @ar2a = split/ /, $allmam[$m];
        chomp $allmam[$m+4]; @ar2b = split/ /, $allmam[$m+4];
        for ($n=1; $n!=($#ar2a+1); $n++) { $seq1 .= "$ar2a[$n]"; $seq2 .= "$ar2b[$n]"; }
      }
    }
    if ($scopdom eq "") { last if ($stopm == 1); next; }
    if ($seq1 eq "" | $seq2 eq "") { $mamzbc .= "$rep.M.$scopdom.mpa\t$zscore\terror\t0\n"; }
    else {
      open(TMP, ">$rep.mpatmp");
      printf TMP "%-12s  $seq1\n%-12s  $seq2\n", $rep, $scopdom;
      close(TMP);
      $bscore = `$path_bin/blsm_scores -i $rep.mpatmp`; chomp $bscore;
      if ($bscore eq "NaN" | $bscore eq "nan" | $bscore eq "") { $bscore = "error"; }
      $l2 = sprintf("%-12s  $seq1\n%-12s  $seq2\n", $rep, $scopdom);
      $mpapairs .= "!^(NEW_FILE): $rep.M.$scopdom.mpa\n$l2";
      $total = 0; $seq1_head = 0; $seq1_tail = 0;
      for ($z=0; $z!=999999; $z++) { last if (substr($seq1, $z, 1) eq ""); $total ++; }
      for ($z=0; $z!=$total; $z++) { last if (substr($seq1, $z, 1) ne "."); $seq1_head ++; }
      for ($z=($total-1); $z!=-1; $z--) { last if (substr($seq1, $z, 1) ne "."); $seq1_tail ++; }
      if ($seq1_tail == $total) { $seq1_tail = 0; }
      if ($len == 0) { $covpct = "error"; }
      else { $covpct = sprintf("%-1.3f", ($len-$seq1_head-$seq1_tail)/$len); }
      $mamzbc .= "$rep.M.$scopdom.mpa\t$zscore\t$bscore\t$covpct\n";
      if ($zscore >= 10) {
        ($qrangepos, $srangepos) = SUB_get_ranges_mpa ( "$rep.mpatmp" );
        $qrangres = SUB_convert_range_pos_to_res ( "new_domain_str/$rep.ca", "$qrangepos" );
        $srangres = SUB_convert_range_pos_to_res ( "$path_str/$scopdom.ca", "$srangepos" );          
        $wholequeryrange = SUB_get_wholerange ( "new_domain_str/$rep.ca" );
        $wholescoprange = SUB_get_wholerange ( "$path_str/$scopdom.ca" );
        $grepline1 = `grep $scopdom $lib_scop_dircla`;
        @grepar1 = split/\t/, $grepline1;
        $superfamid = $grepar1[3];
        $foldpairs .= "$rep\t$qrangres\t\t$wholequeryrange\t$scopdom\t$superfamid\t$srangres\t\t$wholescoprange\t\t$zscore\t\t$bscore\t6\n";
      }
      `rm $rep.mpatmp`;
    }
    last if ($stopm == 1);
  }
  @allmam = (); @mamtotal = ();
#sort fold pairs by Z-score
  @far1 = split/\n/, $foldpairs;
  $sortedfoldpairs = "";
  for ($i=0; $i!=($#far1+1); $i++) {
    $big = -100; $pt = -1;
    for ($j=0; $j!=($#far1+1); $j++) {
      next if ($far1[$j] eq "null");
      @far2 = split/\t/, $far1[$j];
      if ($far2[10] > $big) { $big = $far2[10]; $pt = $j; }
    }
    last if ($pt == -1);
    $sortedfoldpairs .= "$far1[$pt]\n";
    $far1[$pt] = "null";
  }
  open(OUT, ">$rep.mammoth2");
  print OUT "!^(NEW_FILE): all_zbc_$rep\n$mamzbc";
  if ($sortedfoldpairs ne "") { print OUT "!^(NEW_FILE): $rep.foldlevel\n$sortedfoldpairs"; }
  print OUT "$mpapairs";
  close(OUT);
  $foldpairs = ""; $sortedfoldpairs = ""; @far1 = (); $mpapairs = ""; $mamzbc = "";
}
### END OF SUB_find_zbc_scores


##########
#
# subroutine use: SUB_get_ranges_mpa ( $mpafile )
# script: get_ranges_mpa.pl

sub SUB_get_ranges_mpa {
  $mpafile = @_[0];
  open(MPAIN, "$mpafile");
  @allmpa = <MPAIN>;
  $head1 = substr(@allmpa[0], 0, 13);
  @line1 = split/$head1/, @allmpa[0];
  $seq1 = @line1[1];
  $head2 = substr(@allmpa[1], 0, 13);
  @line2 = split/$head2/, @allmpa[1];
  $seq2 = @line2[1];
  $nct1 = 0;
  $nct2 = 0;
  for ($i=0; $i!=999999; $i++) {
    $res1 = substr($seq1, $i, 1);
    $res2 = substr($seq2, $i, 1);
    last if ($res1 eq "\n" | $res2 eq "\n");
    if ($res1 eq "." && $res2 ne ".") { $nct2 += 1; }
    if ($res1 ne "." && $res2 eq ".") { $nct1 += 1; }
    if ($res1 ne "." && $res2 ne ".") { $n1 = $nct1 + 1; $n2 = $nct2 + 1; last; }
  }
  $cct1 = 0;
  $cct2 = 0;
  for ($i=0; $i!=999999; $i++) {
    $res1 = substr($seq1, $i, 1);
    $res2 = substr($seq2, $i, 1);
    last if ($res1 eq "\n" | $res2 eq "\n");
    if ($res1 eq "." && $res2 ne ".") { $cct2 += 1; }
    if ($res1 ne "." && $res2 eq ".") { $cct1 += 1; }
    if ($res1 ne "." && $res2 ne ".") { $cct1++; $cct2++; $c1 = $cct1; $c2 = $cct2; }
  }
  close(MPAIN); @allmpa = ();
  return "$n1.$c1\t$n2.$c2";
}
### END OF SUB_get_ranges_mpa


##########
#
# subroutine use: SUB_renum_2 ( $startval, $pdbdir )
# script: renum_2_update.pl

sub SUB_renum_2 {
  $startval = @_[0];
  $pdbdir = @_[1];
  open(IN, "querydomains.list");
  @in = <IN>;
  open(OUTa1, ">querydatlist");
  foreach $line (@in) {
    chomp $line;
    $pdbid = $line;
    if (-e "dali.lock") {`rm dali.lock`;}
    system ("$path_bin/DaliLite -readbrk $pdbdir/$pdbid $startval");
    $ls = `ls -1 core* | wc -l`; chomp $ls;
    if ($ls != 0) { `rm core* dali.lock`; }
    opendir(DIR, ".");
    @allfiles = readdir DIR;
    $yes = 0;
    foreach $file (@allfiles) {
      next unless ($file =~ /\.dat/);
      $str = substr($file, 0, 4);
      if ($str == $startval) { $yes = 1; $newstr = substr($file, 0, 5); last; }
    }
    if ($yes != 0) {  print OUTa1 "$newstr\n"; }
    closedir(DIR);
  }
  close(OUTa1);
  close(IN); @in = (); @allfiles = ();
}
### END OF SUB_renum_2


##########
#
# subroutine use: SUB_get5N ()
# script: get5N.pl

sub SUB_get5N {
  open(IN1, "querydom2dali");
  @all_in1 = <IN1>;
  open(IN2, "querydatlist");
  @all_in2 = <IN2>;
  open(OUTb1, ">querydom2dali_5N");
  foreach $line1 (@all_in1) {
    chomp $line1;
    @ar1 = split/ /, $line1;
    foreach $line2 (@all_in2) {
      chomp $line2;
      next unless (substr($line2, 0, 4) eq $ar1[1]);
      print OUTb1 "$ar1[0] $line2\n";
      last;
    }
  }
  close(IN1); close(IN2); close(OUTb1);
  @all_in1 = (); @all_in2 = ();
}
### END OF SUB_get5N


##########
#
# subroutine use: SUB_chk_for_stalled_dali

sub SUB_chk_for_stalled_dali {
  $g1 = `ps | grep DaliLite`; @ga1 = split/\n/, $g1;
  sleep 300;
  $g2 = `ps | grep DaliLite`; @ga2 = split/\n/, $g2;
  LPZ: for ($z=0; $z!=($#ga2+1); $z++) {
    for ($y=0; $y!=($#ga1+1); $y++) {
      if ($ga2[$z] eq $ga1[$y]) { @ga3 = split/\s/, $ga2[$z]; `kill $ga3[0]`; next LPZ; }
    }
  }
}
### END OF SUB_chk_for_stalled_dali


##########
#
# subroutine use: SUB_mk_dpa_from_dccp ( "$dccpfile" )

sub SUB_mk_dpa_from_dccp {
  $dccpfile = @_[0];
  $qch = @_[1];
  $p2dfile = @_[2];
  if ($dccpfile =~ /^$qch\./) {
    @ar1 = split/$qch\./, $dccpfile;
    @ar2 = split/\.dccp/, $ar1[1];
    $libdom = $ar2[0];
    $first = $qch;
  }
  else {
    @ar1 = split/\.$qch\./, $dccpfile;
    $libdom = $ar1[0];
    $first = $libdom;
  }
#find best Z-score
  for ($z=0; $z!=5; $z++) {
    open(DCCPIN, "$dccpfile");
    @dcin = <DCCPIN>;
    last if ($dcin[0] ne "");
    sleep 1;
  }
  $max = -1;
  foreach $dline (@dcin) {
    next unless (substr($dline, 1, 4) eq "DCCP");
    chomp $dline;
    @ar1 = split/\s+/, $dline;
    next if ($ar1[$#ar1] eq $ar1[$#ar1-1]);
    $thisz = substr($dline, 29, 5); $thisz =~ s/ //g;
    if ($thisz > $max) { $max = $thisz; $svline = $dline;}
  }
#get equivalent residues
  @ar1 = split/\s+/, $svline;
  $dom1 = $ar1[$#ar1-1]; $dom2 = $ar1[$#ar1];
  @eqs = ();
  for ($z=0; $z!=($#dcin+1); $z++) {
    $dline = $dcin[$z]; chomp $dline;
    next unless ($dline eq "$svline");
    for ($y=1; $y!=999999; $y++) {
      if ($dcin[$z+$y] =~ /^ DCCP/) { $n = ($y-2)/2; last; }
      if (($z+$y) == $#dcin) { $n = ($y-1)/2; last; }
    }
    $all1 = ""; $all2 = "";
    for ($y=2; $y!=(2+$n); $y++) { $all1 .= $dcin[$z+$y]; }
    for ($y=(2+$n); $y!=(2+2*$n); $y++) { $all2 .= $dcin[$z+$y]; }
    @ar1a = split/\n/, $all1;
    @ar1b = split/\n/, $all2;
    for ($x=0; $x!=($#ar1a+1); $x++) {
      for ($y=0; $y!=999; $y++) {
        $s1a = substr($ar1a[$x], $y*10, 10); $chk = $s1a; $chk =~ s/ //g; last if ($chk eq "");
        $s1b = substr($ar1b[$x], $y*10, 10);
        $s2a1 = substr($s1a, 0, 4); $s2a1 =~ s/ //g;
        $s2a2 = substr($s1a, 6, 4); $s2a2 =~ s/ //g;
        $s2b1 = substr($s1b, 0, 4); $s2b1 =~ s/ //g;
        $s2b2 = substr($s1b, 6, 4); $s2b2 =~ s/ //g;
        push @eqs, "$s2a1 $s2a2\t$s2b1 $s2b2";
      }
    }
    last;
  }
#get sequences from ~.dat
  $g1 = `grep "\-sequence" $dom1*dat`; chomp $g1;
  @g1a = split/\s+/, $g1; $seq1 = $g1a[1]; $seq1 =~ s/\"//g;
  $g2 = `grep "\-sequence" $dom2*dat`; chomp $g2;
  @g2a = split/\s+/, $g2; $seq2 = $g2a[1]; $seq2 =~ s/\"//g;
  @s1 = (); @s2 = ();
  for ($z=0; $z!=9999999; $z++) { $aa = substr($seq1, $z, 1); last if ($aa eq ""); $s1[$z+1] = $aa; }
  for ($z=0; $z!=9999999; $z++) { $aa = substr($seq2, $z, 1); last if ($aa eq ""); $s2[$z+1] = $aa; }
#add N-term gaps to aln
  $aln1 = ""; $aln2 = "";
  @ar1 = split/\s+/, $eqs[0];
  if ($ar1[0] != 1) { for ($z=1; $z!=$ar1[0]; $z++) { $aln1 .= "$s1[$z]"; $aln2 .= "-"; } }
  if ($ar1[2] != 1) { for ($z=1; $z!=$ar1[2]; $z++) { $aln1 .= "-"; $aln2 .= "$s2[$z]"; } }
#add aligned regions and internal gaps
  for ($y=0; $y!=($#eqs+1); $y++) {
    @ar1 = split/\s+/, $eqs[$y];
    $num = $ar1[1] - $ar1[0];
    for ($z=0; $z!=($num+1); $z++) { $aln1 .= "$s1[$ar1[0]+$z]"; $aln2 .= "$s2[$ar1[2]+$z]"; }
    last if ($y == $#eqs);
    @ar2 = split/\s+/, $eqs[$y+1];
    for ($z=1; $z!=999999; $z++) { last if (($ar1[1]+$z) == $ar2[0]); $aln1 .= "$s1[$ar1[1]+$z]"; $aln2 .= "-"; }  
    for ($z=1; $z!=999999; $z++) { last if (($ar1[3]+$z) == $ar2[2]); $aln1 .= "-"; $aln2 .= "$s2[$ar1[3]+$z]"; }
  }
#add C-term gaps to aln
  @ar1 = split/\s+/, $eqs[$#eqs];
  if ($ar1[1] != $#s1) {
    $num = $#s1-$ar1[1];
    for ($z=1; $z!=($num+1); $z++) { $aln1 .= "$s1[$ar1[1]+$z]"; $aln2 .= "-"; }
  }
  if ($ar1[3] != $#s2) {
    $num = $#s2-$ar1[3];
    for ($z=1; $z!=($num+1); $z++) { $aln1 .= "-"; $aln2 .= "$s2[$ar1[3]+$z]"; }
  }
#make output ~.dpa
  $g1 = `grep $dom1 $p2dfile`; chomp $g1;
  $g2 = `grep $dom2 $p2dfile`; chomp $g2;
  @ar1 = split/\s+/, $g1;
  @ar2 = split/\s+/, $g2;
  if ($g1 =~ /$qch/) {
    $qchline = "$ar1[0]\t$aln1";
    $libdomline = "$ar2[0]\t$aln2";
  }
  else {
    $qchline = "$ar2[0]\t$aln2";
    $libdomline = "$ar1[0]\t$aln1";
  }
  if ($qch eq $first) {
    open(DPAOUT, ">$qch.$libdom.dpa");
    print DPAOUT "$qchline\n$libdomline\n";
    close(DPAOUT);
  }
  else {
    open(DPAOUT, ">$libdom.$qch.dpa");
    print DPAOUT "$libdomline\n$qchline\n";
    close(DPAOUT);
  }
  close(DCCPIN);
  return "$max";
}
### END OF SUB_mk_dpa_from_dccp


##########
#
# subroutine use: SUB_cover_dpa ( $dpafile )
# script: cover_dpa.pl

sub SUB_cover_dpa {
  $infile = @_[0];
  open(IN, "$infile");
  @all_in = <IN>;
  $dct1 = 0;
  $dct2 = 0;
  $act1 = 0;
  $act2 = 0;
  @ar2 = split/\t/, @all_in[0];
  @ar3 = split/\t/, @all_in[1];
  $line1 = $ar2[1];
  $line2 = $ar3[1];
  for ($k=0; $k!=100000; $k++) {
    $str1 = substr($line1, $k, 1);
    last if ($str1 eq "\n");
    if ($str1 eq "-") { $dct1 += 1; }
    else { $act1 += 1;}
  }
  for ($k=0; $k!=100000; $k++) {
    $str2 = substr($line2, $k, 1);
    last if ($str2 eq "\n");
    if ($str2 eq "-") { $dct2 += 1; }
    else { $act2 += 1; }
  }
  if (($dct1 + $act1) != ($dct2 + $act2)) { print "ERROR: uneven sequence lengths in $file\n"; }
  $aligned_aa1 = $act1 - $dct2;
  $aligned_aa2 = $act2 - $dct1;
  $cover1 = $aligned_aa1/$act1;
  $cover2 = $aligned_aa2/$act2;
  @pairpre = split/\//, $infile;
  @pair = split/\./, $pairpre[1];
  if (-e "new_domain_str/$pair[0].pdb" == 1) { $coveruse = sprintf ("%1.4f", $cover2); }
  else { $coveruse = sprintf ("%1.4f", $cover1); }
  close(IN); @all_in = ();
  return $coveruse;
}
### END OF SUB_cover_dpa


##########
#
# subroutine use: SUB_get_ranges_dpa ( $dpafile, $qs, $ls )
# script: get_ranges_dpa.pl

sub SUB_get_ranges_dpa {
  $infile = @_[0];
  $qs = @_[1];
  $ls = @_[2];
#$qs = 0 means query seq is first line in alignment, $qs = 1 means library seq is first line
  open(IN, "$infile");
  @all_in = <IN>;
  @line1 = split/\t/, @all_in[$qs];
  @line2 = split/\t/, @all_in[$ls];
  $seq1 = @line1[1];
  $seq2 = @line2[1];
  $nct1 = 0;
  $nct2 = 0;
  for ($k=0; $k!=999999; $k++) {
    $res1 = substr($seq1, $k, 1);
    $res2 = substr($seq2, $k, 1);
    if ($res1 eq "\n" | $res2 eq "\n") { $n1 = 0; $n2 = 0; last; }
    if ($res1 eq "-" && $res2 ne "-") { $nct2 += 1; }
    if ($res1 ne "-" && $res2 eq "-") { $nct1 += 1; }
    if ($res1 ne "-" && $res2 ne "-") { $n1 = $nct1 + 1; $n2 = $nct2 + 1; last; }
  }
  $cct1 = 0;
  $cct2 = 0;
  for ($k=0; $k!=999999; $k++) {
    $res1 = substr($seq1, $k, 1);
    $res2 = substr($seq2, $k, 1);
    last if ($res1 eq "\n" | $res2 eq "\n");
    if ($res1 eq "-" && $res2 ne "-") { $cct2 += 1; }
    if ($res1 ne "-" && $res2 eq "-") { $cct1 += 1; }
    if ($res1 ne "-" && $res2 ne "-") {
      $cct1 += 1;
      $cct2 += 1;
      $c1 = $cct1;
      $c2 = $cct2;
    }
  }
  close(IN); @all_in = ();
  return "$n1.$c1\t$n2.$c2";
}
### END OF SUB_get_ranges_dpa


##########
#
# subroutine use: SUB_mk_dpamod ($dom1, $dom2, $CSV_WINDOW_SIZE)

sub SUB_mk_dpamod {
  $dom1 = @_[0];
  $dom2 = @_[1];
  $window = @_[2];
  $alld = `cat dali_output/$dom1.$dom2.dpa`;
  @dd = split/\n/, $alld;
  @s1 = split/\s/, $dd[0]; @s2 = split/\s/, $dd[1];
  $seq1 = $s1[$#s1];
  $seq2 = $s2[$#s2];
  open(CSV2, "$path_csv/$dom2.win$window.csv");
  @csv2 = <CSV2>;
  $csvhseq = "";
  foreach $csvline (@csv2) {
    last if ($csvline =~ /gap fraction/);
    $aa = substr($csvline, 6, 1);
    if ($aa eq "-") { $csvhseq .= "X"; }
    else { $csvhseq .= $aa; }
  }
  close(CSV2);
  open(F1, ">rcsv.$dom1.$dom2.fa"); print F1 ">rcsv\n$csvhseq\n"; close(F1);
  open(F2, ">rdal.$dom1.$dom2.fa"); $dhseq = $seq2; $dhseq =~ s/\-//g; print F2 ">rdal\n$dhseq\n"; close(F2);
  if ($dhseq eq "") { `rm rcsv.$dom1.$dom2.fa rdal.$dom1.$dom2.fa`; return; }
  $align0hit = `$path_bin/align0 csv.$dom1.$dom2.fa rdal.$dom1.$dom2.fa`;
  @alnh = split/\n/, $align0hit;
  $s3 = ""; $s4 = "";
  for ($i=0; $i!=($#alnh+1); $i++) {
    if ($alnh[$i] =~ /rcsv\./) {
      @alnh_2a = split/ /, $alnh[$i];
      $s3 .= $alnh_2a[1];
      @alnh_2b = split/ /, $alnh[$i+2];
      $s4 .= $alnh_2b[$#alnh_2b];
    }
  }
  @errs = (); $mct = -1; $last_s2 = 99999;
  for ($i=0; $i!=99999; $i++) {
    $aa1 = substr($s3, $i, 1);
    $aa2 = substr($s4, $i, 1);
    last if ($aa1 eq "");
    if ($aa2 ne "-") { $mct++; }
    if ($aa2 eq "-") { push @errs, "add\t$mct\t$aa1"; $last_s2 = $mct; next; }
    if ($aa1 eq "-") { push @errs, "delete\t$mct\tX"; $last_s2 = $mct; next; }
  }
  `rm rcsv.$dom1.$dom2.fa rdal.$dom1.$dom2.fa`;
  if ($last_s2 == 99999) { `cp dali_output/$dom1.$dom2.dpa dali_output/$dom1.$dom2.dpamod`; return; }
  $mct = -1; $seq1u = ""; $seq2u = "";
  for ($i=0; $i!=99999; $i++) {
    $a = substr($seq1, $i, 1);
    $b = substr($seq2, $i, 1);
    if ($b ne "\-") { $mct++; }
    last if ($a eq "");
    if ($last_s2 >= $mct) {
      for ($j=0; $j!=($#errs+1); $j++) {
        @era = split/\t/, $errs[$j];
        next unless ($era[1] == $mct);
        if ($era[0] eq "add") { $a .= "-"; $b .= "$era[2]"; $errs[$j] = "null\t-1\tnull"; }
        if ($era[0] eq "delete") {
          if ($a ne "-") { $b = "-"; }
          else { $a = ""; $b = ""; }
          $errs[$j] = "null\t-1\tnull";
        }
      }
    }
    $seq1u .= "$a"; $seq2u .= "$b";
  }
  $chkseq = $seq2u; $chkseq =~ s/\-//g;
  if ($chkseq eq $csvhseq) {
    open(OUTDMU, ">dali_output/$dom1.$dom2.dpamod");
    printf OUTDMU "$dom1.pdb\t$seq1u\n$dom2.ent\t$seq2u\n";
    close(OUTDMU);
  }
  return;
}
### END OF SUB_mk_dpamod


##########
#
# subroutine use: SUB_calc_csv_score ( $dom1, $dom2, $window, $topX )
# script: calc_csv_score.pl

sub SUB_calc_csv_score {
  $dom1 = @_[0];
  $dom2 = @_[1];
  $window = @_[2];
  $topX = @_[3];
  if (-e "new_domain_csv/$dom1.win$window.csv" == 0 | -e "$path_csv/$dom2.win$window.csv" == 0) { return "error"; }
  open(DPAIN, "dali_output/$dom1.$dom2.dpamod");
  @all_dpa = <DPAIN>;
  $uposmaln[0][0] = "null";
  $uposct = -1;
  @dd1 = split/\t/, $all_dpa[0]; $seq1 = $dd1[1];
  @dd2 = split/\t/, $all_dpa[1]; $seq2 = $dd2[1];
  $sct1 = 0;
  $sct2 = 0;
  for ($s=0; $s!=99999999; $s++) {
    last if (substr($seq1, $s, 1) eq "\n");
    $skip = 0;
    if (substr($seq1, $s, 1) ne "-") { $sct1 += 1; }
    else { $skip = 1; }
    if (substr($seq2, $s, 1) ne "-") { $sct2 += 1; }
    else { $skip = 1; }
    next if ($skip == 1);
    $uposct += 1;
    $uposmaln[0][$uposct] = $sct1;
    $uposmaln[1][$uposct] = $sct2;
  }
  close(DPAIN); @all_dpa = ();
  if ($uposct == -1) { return "error"; }
#get position scores for positions aligned by DaliLite
#change highly-gapped position scores to -100001 so that these positions will be at end 
#after sorting (will change these position scores to 0 after sorting is completed)
  open(CSV1A, "new_domain_csv/$dom1.win$window.csv");
  @all_csv1a = <CSV1A>;
  open(CSV1B, "$path_csv/$dom2.win$window.csv");
  @all_csv1b = <CSV1B>;
  $uposcsv[0][0] = "-1";
  for ($m=0; $m!=($uposct+1); $m++) {
    for ($n=0; $n!=($#all_csv1a+1); $n++) {
      next unless ($all_csv1a[$n] =~ /^$uposmaln[0][$m] /);
      $scorestr = substr($all_csv1a[$n], 13, 8);
      if ($scoresctr eq "-1.000 *") { $uposcsv[0][$m] = -100001; }
      else { $uposcsv[0][$m] = substr($all_csv1a[$n], 13, 6); }
      last;
    }
    for ($n=0; $n!=($#all_csv1b+1); $n++) {
      next unless ($all_csv1b[$n] =~ /^$uposmaln[1][$m] /);
      $scorestr = substr($all_csv1b[$n], 13, 8);
      if ($scoresctr eq "-1.000 *") { $uposcsv[1][$m]= -100001; }
      else { $uposcsv[1][$m] = substr($all_csv1b[$n], 13, 6); }
      last;
    }
  }
  close(CSV1A); @all_csv1a = ();
  close(CSV1B); @all_csv1b = ();
#sort positions in each sequence by scores
  for ($m=0; $m!=($uposct+1); $m++) { @temp1[$m] = $uposcsv[0][$m]; @temp2[$m] = $uposcsv[1][$m]; }
  $sortuposcsv[0][0] = "null";
  $sortuposmaln[0][0] = "null";
  $sortuposblock[0][0] = "null";
  for ($m=0; $m!=($uposct+1); $m++) {
    $big = -1000;
    for ($n=0; $n!=($uposct+1); $n++) {
      next if ($temp1[$n] eq "null");
      if ($temp1[$n] > $big) { $big = $temp1[$n]; $pt = $n; }
    }
    $sortuposcsv[0][$m] = $uposcsv[0][$pt];
    $sortuposmaln[0][$m] = $uposmaln[0][$pt];
    $sortuposblock[0][$m] = $pt;
    $temp1[$pt] = "null";
  }
  for ($m=0; $m!=($uposct+1); $m++) {
    $big = -1000;
    for ($n=0; $n!=($uposct+1); $n++) {
      next if ($temp2[$n] eq "null");
      if ($temp2[$n] > $big) { $big = $temp2[$n]; $pt = $n; }
    }
    $sortuposcsv[1][$m] = $uposcsv[1][$pt];
    $sortuposmaln[1][$m] = $uposmaln[1][$pt];
    $sortuposblock[1][$m] = $pt;
    $temp2[$pt] = "null";
  }
#change position scores of highly-gapped regions to 0
  for ($m=0; $m!=($uposct+1); $m++) {
    if ($sortuposcsv[0][$m] eq "-100001") { $sortuposcsv[0][$m] = 0; }
    if ($sortuposcsv[1][$m] eq "-100001") { $sortuposcsv[1][$m] = 0; }
  }
#get score-to-index conversion table and matrix
  open(CONH, "$path_bin/conservation.h");
  @all_conh = <CONH>;
  if ($window == 1) { $ins1 = "csvbound"; $ins2 = "csvmatrix0_15";}
  if ($window == 3) { $ins1 = "csv3bound"; $ins2 = "csv3matrix0_15";}
  $start = 0;
  $highcut[0] = "null";
  $hct = -1;
  foreach $line7a (@all_conh) {
    if ($start == 1) {
      $hct += 1;
      if ($hct == 20) { $highcut[$hct] = substr($line7a, 0, 10); last; }
      @ar1a = split/\,/, $line7a;
      $highcut[$hct] = $ar1a[0];
    }
    else { if ($line7a =~ /^double $ins1\[\] \= \{/) { $start = 1; next; } }
  }
  $start = 0;
  $matrix[0][0] = 0;
  $hct = -1;
  foreach $line7b (@all_conh) {
    if ($start == 1) {
      $hct += 1;
      @ar1a = split/\{/, $line7b;
      @ar1b = split/\}/, $ar1a[1];
      @ar1c = split/\,/, $ar1b[0];
      for ($m=0; $m!=21; $m++) { $matrix[$hct][$m] = $ar1c[$m]; }
      last if ($hct == 20);
    }
    else { if ($line7b =~ /^double $ins2\[21\]\[21\] \= \{/) { $start = 1; next; } }
  }
  close(CONH); @all_conh = ();
#convert position scores to index scores
  $uposind[0][0] = "null";
  for ($m=0; $m!=($uposct+1); $m++) {
    for ($n=0; $n!=21; $n++) { if ($uposcsv[0][$m] < $highcut[$n]) { $uposind[0][$m] = $n; last; } }
    for ($n=0; $n!=21; $n++) { if ($uposcsv[1][$m] < $highcut[$n]) { $uposind[1][$m] = $n; last; } }
  }
#find which positions are in the $topX of either sequence
  $topcteach = sprintf("%.0f", $topX*($uposct+1)/100);
  @toplist[0] = "null";
  $topct = -1;
  LP_6: for ($m=0; $m!=($uposct+1); $m++) {
    for ($n=0; $n!=$topcteach; $n++) { if ($m == $sortuposblock[0][$n]) { $topct += 1; $toplist[$topct] = $m; next LP_6; } }
    for ($n=0; $n!=$topcteach; $n++) { if ($m == $sortuposblock[1][$n]) { $topct += 1; $toplist[$topct] = $m; next LP_6; } }
  }
  $topctboth = $topct + 1;
#output list of chosen positions for compass
  open(SET1, ">$dom1.vs.$dom2.chosenposlist1");
  open(SET2, ">$dom1.vs.$dom2.chosenposlist2");
  for ($m=0; $m!=($topctboth); $m++) {
    $pt = $toplist[$m];
    print SET1 "$uposmaln[0][$pt] ";
    print SET2 "$uposmaln[1][$pt] ";
  }
  close(SET1);
  close(SET2);
#find Sn: sum of pairscores for chosen positions (seq1-vs-seq2)
#find S1: sum of pairscores for chosen positions (seq1-vs-seq1)
#find S2: sum of pairscores for chosen positions (seq2-vs-seq2)
  $Sn = 0;
  $S1 = 0;
  $S2 = 0;
  for ($m=0; $m!=$topctboth; $m++) {
    for ($n=0; $n!=($uposct+1); $n++) {
      next unless ($toplist[$m] == $sortuposblock[0][$n]);
      $pt = $toplist[$m];
      $ind1 = $uposind[0][$pt];
      last;
    }
    for ($n=0; $n!=($uposct+1); $n++) {
      next unless ($toplist[$m] == $sortuposblock[1][$n]);
      $pt = $toplist[$m];
      $ind2 = $uposind[1][$pt];
      last;
    }
    $Sn += $matrix[$ind1][$ind2];
    $S1 += $matrix[$ind1][$ind1];
    $S2 += $matrix[$ind2][$ind2];
  }
  $Sself = ($S1+$S2)/2;
#find Srand: sum of all-against-all position pairscores (normalized over length)
  $Srandtot = 0;
  for ($m=0; $m!=$topctboth; $m++) {
    for ($n=0; $n!=($uposct+1); $n++) {
      next unless ($toplist[$m] == $sortuposblock[0][$n]);
      $pt = $toplist[$m];
      $ind1 = $uposind[0][$pt];
      last;
    }
    for ($q=0; $q!=$topctboth; $q++) {
      for ($r=0; $r!=($uposct+1); $r++) {
        next unless ($toplist[$q] == $sortuposblock[1][$r]);
        $pt2 = $toplist[$q];
        $ind2 = $uposind[1][$pt2];
        last;
      }
      $Srandtot += $matrix[$ind1][$ind2];
    }
  }
  $Srand = $Srandtot/$topctboth;
#calculate csv score for pair (Sn-Srand)/(Sself-Srand)
  $S = ($Sn-$Srand)/($Sself-$Srand);
#get matrix of compass scores for chosen position pairs
  $domain1aln = "new_domain_aln/$dom1.br.aln";
  $domain2aln = "$path_aln/$d2aln";
  $domain1pos = "$dom1.vs.$dom2.chosenposlist1";
  $domain2pos = "$dom1.vs.$dom2.chosenposlist2";
  system "$path_bin/scoremat_chosenpos -i $domain1aln -j $domain2aln -p1 $domain1pos -p2 $domain2pos -g 1.0 -o $dom1.$dom2.compassmatrix";
  system "$path_bin/scoremat_chosenpos -i $domain1aln -j $domain1aln -p1 $domain1pos -p2 $domain1pos -g 1.0 -o $dom1.vs.$dom2.compassmatrix_self1"; 
  system "$path_bin/scoremat_chosenpos -i $domain2aln -j $domain2aln -p1 $domain2pos -p2 $domain2pos -g 1.0 -o $dom1.vs.$dom2.compassmatrix_self2";
  $ckp = 1;
  if (-e "$dom1.$dom2.compassmatrix" == 0) { $ckp = 0; }
  if (-e "$dom1.vs.$dom2.compassmatrix_self1" == 0) { $ckp = 0; }
  if (-e "$dom1.vs.$dom2.compassmatrix_self2" == 0) { $ckp = 0; }
  if ($ckp == 0) { $CS = "error"; }
  else {
    open(CIN1, "$dom1.$dom2.compassmatrix");
    @rows = <CIN1>;
    for ($m=0; $m!=($#rows+1); $m++) {
      chomp $rows[$m];
      @colar1 = split/ /, $rows[$m];
      for ($n=0; $n!=($#colar1+1); $n++) { $compassmatrix[$m][$n] = $colar1[$n]; }
    }
    open(CIN2, "$dom1.vs.$dom2.compassmatrix_self1");
    @rows1 = <CIN2>;
    for ($m=0; $m!=($#rows1+1); $m++) {
      chomp $rows1[$m];
      @colar2 = split/ /, $rows1[$m];
      for ($n=0; $n!=($#colar2+1); $n++) { $compass_self1[$m][$n] = $colar2[$n]; }
    }
    open(CIN3, "$dom1.vs.$dom2.compassmatrix_self2");
    @rows2 = <CIN3>;
    for ($m=0; $m!=($#rows2+1); $m++) {
      chomp $rows2[$m];
      @colar3 = split/ /, $rows2[$m];
      for ($n=0; $n!=($#colar3+1); $n++) { $compass_self2[$m][$n] = $colar3[$n]; }
    }
#find CSn: sum of compass pairscores for chosen positions (seq1-vs-seq2) [ diagonal of *.compassmatrix ]
#find CS1: sum of compass pairscores for chosen positions (seq1-vs-seq1) [ diagonal of dom1 self matrix ]
#find CS2: sum of compass pairscores for chosen positions (seq2-vs-seq2) [ diagonal of dom2 self matrix ]
    $CSn = 0;
    $CS1 = 0;
    $CS2 = 0;
    for ($m=0; $m!=$topctboth; $m++) {
      $CSn += $compassmatrix[$m][$m];
      $CS1 += $compass_self1[$m][$m];
      $CS2 += $compass_self2[$m][$m];
    }
    $CSself = ($CS1+$CS2)/2;
#find CSrand: sum of all-against-all compass position pairscores (normalized over length)
    $CSrandtot = 0;
    for ($m=0; $m!=$topctboth; $m++) { for ($n=0; $n!=$topctboth; $n++) {  $CSrandtot += $compassmatrix[$m][$n]; } }
    $CSrand = $CSrandtot/$topctboth;
#calculate compass score for pair (CSn-CSrand)/(CSself-CSrand)
    $CS = ($CSn-$CSrand)/($CSself-$CSrand);
    `rm $dom1.$dom2.compassmatrix $dom1.vs.$dom2.compassmatrix_self1 $dom1.vs.$dom2.compassmatrix_self2`;
    `rm $dom1.vs.$dom2.chosenposlist1 $dom1.vs.$dom2.chosenposlist2`;
  }
  $upostot_fromblocks = $uposct+1;
  $percentdiff = sprintf("%.3f", ($topctboth-$topcteach)/$upostot_fromblocks*100);
  close(CIN1); close(CIN2); close(CIN3); @rows = (); @rows1 = (); @rows2 = ();
  @posmaln = (); @uposmaln = (); @sortuposcsv = ();
  @sortuposmaln = (); @sortuposblock = (); @matrix = (); @toplist = (); @uposind = ();
  @compassmatrix = (); @compass_self1 = (); @compass_self2 = ();
  return "$S\t$CS\t$percentdiff\t$topcteach\t$topctboth\t$upostot_fromblocks";
}
### END OF SUB_calc_csv_score


##########
#
# subroutine use: SUB_get_dali_range ( $rep, $scopdom )
# script: includes dpa_gaps.pl

sub SUB_get_dali_range { 
  $query_is = @_[0];
  $hit_is = @_[1];
  if ($hit_is eq "" | $query_is eq "") { return "no gaps"; }
  $g1 = `grep "$query_is.D.$hit_is" $query_is.dali2`; @gar1 = split/\n/, $g1;
  if ($g1 eq "") { return "no gaps"; }
  @ar3a = split/\t/, $gar1[0];
  $file = "$query_is.$hit_is.dpa";
  if ($ar3a[1] eq "error" | ($ar3a[2] > $ar3a[1] && $ar3a[1] ne "error" && $ar3a[2] ne "error")) { $file = "$hit_is.$query_is.dpa"; }
  open(IN, "$query_is.dali2");
  @in = <IN>;
  for ($k=0; $k!=($#in+1); $k++) {
    next unless ($in[$k] =~ /^\!\^\(NEW\_FILE\)\: $file/);
    @ar3b = split/\t/, $in[$k+1];
    @ar3c = split/\t/, $in[$k+2];
    if ($file =~ /^$query_is/) { $seq1 = $ar3b[1]; $seq2 = $ar3c[1]; }
    else { $seq1 = $ar3c[1]; $seq2 = $ar3b[1]; }
    last;
  }
  close(IN); @in = ();
  if ($seq1 eq "\n" | $seq2 eq "\n") { return "no gaps"; }
#check if there is a gap >= 100 residues in length
  $prevstr = "null";
  @gap_n[0] = "null";
  @gap_c[0] = "null";
  @gap_s[0] = "null";
  $ct = -1;
  for ($k=0; $k!=999999; $k++) {
    $str = substr($seq2, $k, 1);
    if ($str eq "\n") { if ($ct != -1 && $str ne "-" && $prevstr eq "-") { $gap_c[$ct] = $k; } last; }
    if ($prevstr eq "null") { if ($str ne "-") { $prevstr = $str; } next; }
    if ($str eq "-" && $prevstr ne "-") { $ct += 1; $gapct = 1; $gap_n[$ct] = $k + 1; }
    if ($str eq "-" && $prevstr eq "-") { $gapct += 1; }
    if ($str ne "-" && $prevstr eq "-") { $gap_s[$ct] = $gapct; $gap_c[$ct] = $k; }
    $prevstr = $str;
  }
  $g100ct = -1;
  @g100[0] = "null";
  if ($ct != -1) { for ($k=0; $k!=($ct+1); $k++) { if ($gap_s[$k] >= 100) { $g100ct += 1; @g100[$g100ct] = $k; } } }
#if here is not a gap >= 100 residues, check the N- and C-terminii for aligned regions =< 12 residues separated
#by a gap of >= 25 residues from the rest of the aligned regions; if such a region is found, adjust assigned range
#to exclude these short stretches (this will help reduce number of unresolved assignments)
  if ($g100ct == -1) { 
    $sgq1 = -1; $sgq2 = -1;
    $bad_small_gap = 0;
#check N-terminus
    $alict = 0; $sgct = 0; $prevstr = "null";
    $sgq1 = -1; $sgq2 = -1;
    for ($k=0; $k!=999999; $k++) {
      $str1 = substr($seq1, $k, 1);
      $str2 = substr($seq2, $k, 1);
      last if ($str1 eq "\n" | $str2 eq "\n");
      if ($str1 ne "-" && $str2 ne "-") { $alict++; }
      if ($alict == 1) { $sgq1 = $k; }
      if ($alict == 13) { last; }
      if ($alict != 0) {
        if ($str2 eq "-" && $prevstr ne "-") { $sgct = 1; }
        if ($str2 eq "-" && $prevstr eq "-") { $sgct++; }
        if ($str2 ne "-" && $prevstr eq "-") {
          if ($sgct >= 25) { $sgq1 = $k; $bad_small_gap = 1; }
        }
        $prevstr = $str2;
      }
    }
#check C-terminus
    $alict = 0; $sgct = 0; $prevstr = "null";
    @allstr1 = (); @allstr2 = ();
    for ($k=0; $k!=999999; $k++) { last if (substr($seq1, $k, 1) eq "\n"); @allstr1[$k] = substr($seq1, $k, 1); @allstr2[$k] = substr($seq2, $k, 1); }
    for ($k=$#allstr1; $k!=-1; $k--) {
      $str1 = $allstr1[$k];
      $str2 = $allstr2[$k];
      if ($str1 ne "-" && $str2 ne "-") { $alict++; }
      if ($alict == 1) { $sgq2 = $k; }
      if ($alict == 13) { last; }
      if ($alict != 0) {
        if ($str2 eq "-" && $prevstr ne "-") { $sgct = 1; }
        if ($str2 eq "-" && $prevstr eq "-") { $sgct++; }
        if ($str2 ne "-" && $prevstr eq "-") { if ($sgct >= 25) { $sgq2 = $k; $bad_small_gap = 1; } }
        $prevstr = $str2;
      }
    }
    $qpt = 0;
    for ($k=0; $k!=($#allstr1+1); $k++) {
      if ($allstr1[$k] ne "-") { $qpt++; }
      last if ($k >= $sgq1 && $allstr1[$k] ne "-");
    }
    $sgq1 = $qpt;
    $qpt = 0;
    for ($k=0; $k!=($sgq2+1); $k++) { if ($allstr1[$k] ne "-") { $qpt++; } }
    $sgq2 = $qpt;
#if there is a bad small gap, return new ranges; otherwise, return "no gaps"
    if ($bad_small_gap == 1) {
      $qresrange = SUB_convert_range_pos_to_res ( "new_domain_str/$query_is.ca", "$sgq1.$sgq2" );
      return "$qresrange\t";
    }
    else { return "no gaps"; }
  }
#if there is a gap >= 100 residues, split the assigned range accordingly (if any piece is <= 12 residues, do not include it)
  @posb_n[0] = 1;
  for ($k=0; $k!=($g100ct+1); $k++) {
    @posb_c[$k] = $gap_n[$g100[$k]] - 1;
    @posb_n[$k+1] = $gap_c[$g100[$k]] + 1;
  }
  for ($k=0; $k!=999999; $k++) {
    $str = substr($seq2, $k, 1);
    if ($str eq "\n") { @posb_c[$g100ct+1] = $k; last; }
  }
#get scop ranges and query ranges (pos format)
  @smallbitarray = ();
  for ($m=0; $m!=($g100ct+2); $m++) {
    $sct = 0;
    $qct = 0;
    for ($k=0; $k!=($posb_n[$m]); $k++) {
      if (substr($seq2, $k, 1) ne "-") { $sct++; }
      if (substr($seq1, $k, 1) ne "-") { $qct++; }
    }
    $spos_n[$m] = $sct;        
    $qpos_n[$m] = $qct;
    for ($k=0; $k!=99999999; $k++) {
      $pt = $posb_n[$m] + $k;
      last if (substr($seq2, ($pt-1), 1) ne "-" && substr($seq1, ($pt-1), 1) ne "-");
      if (substr($seq2, $pt, 1) ne "-") { $spos_n[$m]++; }
      if (substr($seq1, $pt, 1) ne "-") { $qpos_n[$m]++; }
    }
    $sct = 0;
    $qct = 0;
    for ($k=0; $k!=($posb_c[$m]); $k++) {
      if (substr($seq2, $k, 1) ne "-") { $sct++; }
      if (substr($seq1, $k, 1) ne "-") { $qct++; }
    }
    $spos_c[$m] = $sct;
    $qpos_c[$m] = $qct;
    for ($k=0; $k!=99999999; $k++) {
      $pt = $posb_c[$m] - $k;
      last if (substr($seq2, ($pt-1), 1) ne "-" && substr($seq1, ($pt-1), 1) ne "-");
      if (substr($seq2, ($pt-2), 1) ne "-") { $spos_c[$m]--; }
      if (substr($seq1, ($pt-2), 1) ne "-") { $qpos_c[$m]--; }
    }
    $piecelen = $qpos_c[$m] - $qpos_n[$m];
    if ($piecelen >= 0 && $piecelen < 13) { $smallbitarray[$m] = 1; }
    else { $smallbitarray[$m] = 0; }
  }
  $r = "";
  for ($k=0; $k!=($g100ct+2); $k++) { 
    next if ($qpos_n[$k] == "-1" | $qpos_c[$k] == "-1" | $spos_n[$k] == "-1" | $spos_c[$k] == "-1" | $qpos_n[$k] == "0" | $qpos_c[$k] == "0" | $spos_n[$k] == "0" | $spos_c[$k] == "0");
    next if ($qpos_n[$k] >= $qpos_c[$k] | $spos_n[$k] >= $spos_c[$k]);
    next if ($smallbitarray[$k] == 1);
    $qresrange = SUB_convert_range_pos_to_res ( "new_domain_str/$query_is.ca", "$qpos_n[$k].$qpos_c[$k]" );
    $r .= "$qresrange\t";
  }
  @gap_n = (); @gap_c = (); @gap_s = (); @g100 = (); @posb_c = (); @posb_n = ();
  @spos_n = (); @qpos_n = (); @spos_c = (); @qpos_c = ();
  return "$r";
}
### END OF SUB_get_dali_range


##########
#
# subroutine use: SUB_resct_in_range ( $rep, $range )
#

sub SUB_resct_in_range {
  $domain = @_[0];
  $range = @_[1];
  open(IN, "new_domain_str/$domain.ca");
  @all_in = <IN>;
  @ar1 = split/\./, $range;
  @ar2 = split/\:/, @ar1[0];
  $nchain = $ar2[0];
  $nres = $ar2[1];
  if ($nchain eq "_") { $nchain = " "; }
  @ar3 = split/\:/, @ar1[1];
  $cchain = $ar3[0];
  $cres = $ar3[1];
  if ($cchain eq "_") { $cchain = " "; }
  $ct = 0;
  $start = 0;
  foreach $tfline (@all_in) {
    next unless (substr($tfline, 13, 2) eq "CA");
    next unless ($tfline =~ /^ATOM/ | $tfline =~ /^HETATM/);
    $thisres = substr($tfline, 22, 6); $thisres =~ s/ //g;
    if ((substr($tfline, 21, 1) eq $nchain) && ($thisres eq $nres)) { $start = 1; }
    if ($start == 1) { $ct += 1; }
    last if ((substr($tfline, 21, 1) eq $cchain) && ($thisres eq $cres));
  }
  close(IN); @all_in = ();
  return "$ct";
}
### END OF SUB_resct_in_range


##########
#
# subroutine use: SUB_get_ov ( $rangesdom1, $rangesdom2 )

sub SUB_get_ov {
  $dom1ranges = @_[0];
  $dom2ranges = @_[1];
  @ar2 = split/\,/, $dom1ranges;
  @ar4 = split/\,/, $dom2ranges;
  $rct = 0;
  $atot = 0;
  $btot = 0;
  for ($m=0; $m!=($#ar2+1); $m++) { @adom = split/\./, $ar2[$m]; $atot += $adom[1] - $adom[0] + 1; }
  for ($m=0; $m!=($#ar4+1); $m++) { @bdom = split/\./, $ar4[$m]; $btot += $bdom[1] - $bdom[0] + 1; }
  for ($m=0; $m!=($#ar2+1); $m++) {
    @adom = split/\./, $ar2[$m];
    for ($n=0; $n!=($#ar4+1); $n++) {
      @bdom = split/\./, $ar4[$n];
      if ($bdom[1] < $adom[0] | $adom[1] < $bdom[0]) { $rct += 0; }
      elsif ($bdom[1] == $adom[0] | $adom[1] == $bdom[0]) { $rct += 1; }
      else {
        if ($adom[0] == $bdom[0]) {
          if ($adom[1] >= $bdom[1]) { $rct += $bdom[1] - $bdom[0] + 1; }
          else { $rct += $adom[1] - $adom[0] + 1; }
        }
        elsif ($adom[0] < $bdom[0]) {
          if ($adom[1] >= $bdom[1]) { $rct += $bdom[1] - $bdom[0] + 1; }
          else { $rct += $adom[1] - $bdom[0] + 1; }
        }
        else {
          if ($bdom[1] >= $adom[1]) { $rct += $adom[1] - $adom[0] + 1; }
          else { $rct += $bdom[1] - $adom[0] + 1; }
        }
      }
    }
  }
  if ($atot == 0) { $pct1 = 0; }
  else { $pct1 = $rct/$atot; }
  if ($btot == 0) { $pct2 = 0; }
  else { $pct2 = $rct/$btot; }
  @adom = (); @bdom = ();
  return "$pct1", "$pct2";
}
### END OF SUB_get_ov


##########
#
# subroutine use: SUB_convert_from_sfamid ( $sfamid )
# script: convert_from_sfamid.pl

sub SUB_convert_from_sfamid {
  $sfamid = @_[0];
  @sfar0 = split/\./, $sfamid;
  $greplines = `grep "$sfamid" $lib_scop_dircla`;
  @sfar1 = split/\n/, $greplines;
  for ($k=0; $k!=($#sfar1+1); $k++) {
    $sfamrep = "XXXXXXX"; 
    @sfar2 = split/\t/, $sfar1[$k];
    @sfar2b = split/\./, $sfar2[3];
    if ("$sfar2b[0].$sfar2b[1].$sfar2b[2]" eq "$sfar0[0].$sfar0[1].$sfar0[2]") {
      $sfamrep = substr($sfar2[0], 0, 7);
      $grep2 = `grep $sfamrep $scoptab`;
      @sfar3 = split/\t/, $grep2;
      last if ($sfar3[9] ne "");
    }
  }
  return "C: $sfar3[5], F: $sfar3[7], SF: $sfar3[9]";
}
### END OF SUB_convert_from_sfamid


##########
#
# subroutine use: SUB_compare_to_head ( $head, $headrange, $nonhead )
# script: compare_to_head.pl

sub SUB_compare_to_head {
  $head = @_[0];
  $headrange = @_[1];
  $nonhead = @_[2];
  @car0 = split/\./, $headrange;
  if ($car0[0] =~ /\(/) {
    @car1 = split/\(/, $car0[0];
    @car2 = split/\)/, $car1[1];
    $npos = $car2[0];
    $n_par = 1;
  }
  else { $npos = $car0[0]; $n_par = 0; }
  if ($car0[1] =~ /\(/) {
    @car1 = split/\(/, $car0[1];
    @car2 = split/\)/, $car1[1];
    $cpos = $car2[0];
    $c_par = 1;
  }
  else { $cpos = $car0[1]; $c_par = 0; }
  $headrange = "$npos.$cpos";
  open(INCL, "pdb_$date/blstclst.fa.bc");
  @all_clustfa = <INCL>;
  open(HEADFA, ">$head.fa");
  for ($j=0; $j!=($#all_clustfa+1); $j++) {
    next unless (substr(@all_clustfa[$j], 1, 6) eq $head);
    print HEADFA "@all_clustfa[$j]";
    print HEADFA "@all_clustfa[$j+1]";
    last;
  }
  close(HEADFA);
  open(HITFA, ">$nonhead.fa");
  for ($j=0; $j!=($#all_clustfa+1); $j++) {
    next unless (substr(@all_clustfa[$j], 1, 6) eq $nonhead);
    print HITFA "@all_clustfa[$j]";
    print HITFA "@all_clustfa[$j+1]";
    last;
  }
  close(HITFA);
  close(INCL); @all_clustfa = ();
  system "$path_bin/align0 $head.fa $nonhead.fa > pair_aln_temp";
  $hitrange = SUB_find_range_hit_from_head ( $head, $nonhead, pair_aln_temp, $headrange );
  `rm $head.fa $nonhead.fa pair_aln_temp`;
  return "$hitrange";
}
### END OF SUB_compare_to_head


##########
#
# subroutine use: SUB_rundali_rankunmap ( $qch, $resrange, $piecenumber, $onecomplete )
# script: rundali_rankunmap.pl

sub SUB_rundali_rankunmap {
  $qch = @_[0];
  $resrange = @_[1];
  $piecenumber = @_[2];
  $onecomplete = @_[3];
  if ($onecomplete == 0) { `mkdir dali_output`; }
  SUB_cut_unmap_piece_fordali ( $qch, $resrange, $piecenumber );
  open(LISTOUT, ">querydomains.list"); print LISTOUT "$qch.pdb\n"; close(LISTOUT);
  `mkdir temp_pdb_dir2`;
  `cp ./$qch.$piecenumber.pdb temp_pdb_dir2/$qch.pdb`;
  open(QDOUT, ">querydom2dali"); print QDOUT "$qch.pdb 9950\n"; close(QDOUT);
  SUB_renum_2 ( "9950", "temp_pdb_dir2" );
  SUB_get5N ();
  `cat querydom2dali_5N $path_str/scoplib2dali_5N > all2dali_5N`;
  $lsdat = `ls -1 9950*dat | wc -l`; chomp $lsdat;
  if ($lsdat == 0) {
    `rm 9950.dssp all2dali_5N dali.default domains.puu puu.default subunits.puu units.puu dali.lock`;
    `rm dumped_query_domains querydatlist querydom2dali querydom2dali_5N querydomains.list`;
    `rm -r -f temp_pdb_dir2 $querypiece.pdb`;
    if ($onecomplete == 0) { `rm -r -f dali_output`; }
    return "no dat file";
  }
  `cp $path_str/pairslist .`;
  `cp $path_str/scoplib2dali_5N .`;
#make list of pairs to run (not x2)
  open(KEY, "all2dali_5N");
  @all_key = <KEY>;
  open(TORUN, ">pairstorun");
  open(IN, "scoplib2dali_5N");
  @all_in = <IN>;
  foreach $line1 (@all_key) {
    next unless ($line1 =~ /^$qch/);
    chomp $line1;
    @ar1 = split/ /, $line1;
    $id1 = $ar1[1];
    $id1four = substr($id1, 0, 4);
    last;
  } 
  if (-e "$id1.dat" != 0) {
    `cp $id1four* dali_output`;
    foreach $line (@all_in) {
      chomp $line;
      @ar2 = split/ /, $line;
      $lib = substr($ar2[0], 0, 7);
      print TORUN "./DaliLite -align $id1 $ar2[1] ; mv $id1.dccp $qch.$lib.dccp\n";
      print TORUN "rm dali.lock\n";
    }
  }
  close(IN); @all_in = ();
  close(KEY); @all_key = ();
  close(TORUN);
  if ($onecomplete == 0) {
    `cp $path_str/tarscoplibdat.tar.bz2 dali_output`;
    `cp $path_str/tarscoplibdssp.tar.bz2 dali_output`;
    chdir "dali_output";
    `bunzip2 tarscoplibdat.tar.bz2`;  
    `tar xvf tarscoplibdat.tar`;
    `rm tarscoplibdat.tar`;
    `bunzip2 tarscoplibdssp.tar.bz2`;
    `tar xvf tarscoplibdssp.tar`;
    `rm tarscoplibdssp.tar`;
    chdir "..";
  }
#$dalisets = 7;
#split list into $dalisets pieces and set up temp directories
  $ptrct = `wc -l pairstorun`; chomp $ptrct;
  if ($ptrct != 0) {
    for ($i=1; $i!=($dalisets+1); $i++) { `mkdir tempdir.$i`; }
    open(RUNLIST, "pairstorun");
    @all_run = <RUNLIST>;
    $N = (int (($#all_run+1)/$dalisets)) + 1;
    for ($i=1; $i!=($dalisets+1); $i++) {
      $ln = $i * $N;
      if ($ln > ($#all_run+1)) {
        $prevln = ($i-1)*$N;
        $left = $#all_run + 1 - $prevln;
        last if ($left == 0);
        system "tail -$left pairstorun > pairs.$i";
        `mv pairs.$i tempdir.$i`;
        last;
      }
      else {
        system "head -$ln pairstorun | tail -$N > pairs.$i";
        `mv pairs.$i tempdir.$i`;
      }
    }
    close(RUNLIST); @all_run = ();
#run dali for pairs
    for ($i=1; $i!=($dalisets+1); $i++) {
      system "cp $path_bin/DaliLite tempdir.$i";
      system "cp all2dali_5N tempdir.$i";
      chdir "tempdir.$i";
      if (-e "pairs.$i" == 1) {
        open(READPRS, "pairs.$i");
        @allrdpr = <READPRS>;
        $f = 0;
        foreach $rpline (@allrdpr) {
          next if ($rpline =~ /rm dali.lock/);
          @rpar = split/ /, $rpline;
          $str1 = substr($rpar[2], 0, 4);
          $str2 = substr($rpar[3], 0, 4);
          if ($f == 0) { `cp ../dali_output/$str1* .`; $f = 1; }
          `cp ../dali_output/$str2* .`;
        }
        close(READPRS); @allrdpr = ();
        open(ADDRM, ">>pairs.$i");
        print ADDRM "rm dali.lock\n";
        close(ADDRM);
        system "chmod +x pairs.$i";
        system "./pairs.$i &";
      }
      chdir "..";
    }
#wait for dali to finish
    sleep 5;
    $wt = 0;
    while ($wt == 0) {
      $ovct = `ps | grep "DaliLite" | wc -l`;
      chomp $ovct;
      if ($ovct == 0) { $wt = 1; }
      else { SUB_chk_for_stalled_dali (); }
    }
    @slzscores = ();
    `mkdir tmpdalidir`;
    for ($i=1; $i!=($dalisets+1); $i++) {
      chdir "tempdir.$i";
      $ls = `ls -1 *.dccp`; @lslist = split/\n/, $ls;
      foreach $dccpfile (@lslist) {
        if (-z "$dccpfile" == 1) { `rm $dccpfile`; next; }
        $zscore = SUB_mk_dpa_from_dccp ( "$dccpfile", "$qch", "all2dali_5N" );
        @ar1a = split/$qch\./, $dccpfile;
        @ar1b = split/\.dccp/, $ar1a[1];
        push @lib_and_z, "$ar1b[0]\t$zscore";
      }
      `mv *.dpa ../tmpdalidir`;
      chdir "..";
    }
    `rm -r -f tempdir.*`;
  }
#get Z-score for each comparison
  open(DAOUT, ">$qch.$piecenumber.dali3");
  print DAOUT "!^(NEW_FILE): $qch.$piecenumber.unmap_dalivslib\n";
  @tmp1 = @lib_and_z;
  $top5_6d_output = "";
  for ($i=0; $i!=($#tmp1+1); $i++) {
    $big = -1000; $pt = -1;
    for ($j=0; $j!=($#tmp1+1); $j++) {
      @tmp2 = split/\t/, $tmp1[$j];
      next if ($tmp2[1] eq "null" | $tmp2[1] eq "error" );
      if ($tmp2[1] > $big) { $pt = $j; $big = $tmp2[1]; }
    }
    last if ($pt == -1);
    @tmp3 = split/\t/, $tmp1[$pt];
    open(DAIN, "tmpdalidir/$qch.$tmp3[0].dpa");
    @dain = <DAIN>;
    chomp $dain[0]; chomp $dain[1];    
    @ar3_0 = split/\t/, $dain[0];
    @ar3_1 = split/\t/, $dain[1];
    $stop = 0;
    $scopgrep = `grep $tmp3[0] $scoptab`; chomp $scopgrep;
    @sgar = split/\t/, $scopgrep;
    print DAOUT "$qch.$piecenumber vs $tmp3[0]\tZ-score = $tmp3[1]\n";
    print DAOUT "$tmp3[0] belongs to CLASS $sgar[5], FOLD $sgar[7], SUPERFAMILY $sgar[9]\n";
    print DAOUT "$dain[0]\n$dain[1]\n\n";
    if ($i < 5) { 
      $querypiece = "$qch.$piecenumber";
      if ($i==0) { $top5_6d_output = "$querypiece vs $tmp3[0]\tZ-score = $tmp3[1]\n$tmp3[0] belongs to CLASS $sgar[5], FOLD $sgar[7], SUPERFAMILY $sgar[9]\n\n"; }
      else { $top5_6d_output .= "$querypiece vs $tmp3[0]\tZ-score = $tmp3[1]\n$tmp3[0] belongs to CLASS $sgar[5], FOLD $sgar[7], SUPERFAMILY $sgar[9]\n\n"; }
      for ($j=0; $j!=999999; $j++) {
        last if ($stop == 1);
        $p = sprintf ( "%-12s  ", $querypiece );
        $top5_6d_output .= "$p";
        for ($k=0; $k!=100; $k++) {
          $aa1 = substr($ar3_0[$#ar3_0], ($j*100+$k), 1);
          if ($aa1 eq "") { $stop = 1; last; }
          last if ($aa1 eq "");
          $top5_6d_output .= "$aa1";
        }
        $p = sprintf ( "%-12s  ", $tmp3[0] );
        $top5_6d_output .= "\n$p";
        for ($k=0; $k!=100; $k++) {
          $aa2 = substr($ar3_1[$#ar3_1], ($j*100+$k), 1);
          last if ($aa2 eq "");
          $top5_6d_output .= "$aa2";
        }
        $top5_6d_output .= "\n\n";
      }
      $top5_6d_output .= "\n";
    }
    $tmp1[$pt] = "used\tnull";
    close(DAIN); @dain = ();
  }
  close(DAOUT);
  @tmp1 = (); @lib_and_z = ();

  `rm pairstorun querydatlist querydom2dali querydom2dali_5N querydomains.list 9950* pairslist scoplib2dali_5N all2dali_5N`;
  `rm $qch.$piecenumber.pdb dali_output/9*`;
  chdir "tmpdalidir";
  `tar cvf $qch.$piecenumber.dalifiles.dpa.tar *`;
  `bzip2 $qch.$piecenumber.dalifiles.dpa.tar`;
  `mv $qch.$piecenumber.dalifiles.dpa.tar.bz2 ..`;
  chdir "..";
  `rm -r -f tmpdalidir`;
  `rm -r -f temp_pdb_dir2`;
  `rm core.*`;
  return $top5_6d_output;
}
### END OF SUB_rundali_rankunmap


##########
#
# subroutine use: SUB_cut_unmap_piece_fordali ( $qch, $resrange, $piecenumber )
# script: cut_unmap_piece_fordali.pl

sub SUB_cut_unmap_piece_fordali {
  $qch = @_[0];
  $resrange = @_[1];
  $piecenumber = @_[2];
  $resrange =~ s/ //g;
  @ar3 = split/\,/, $resrange;
  $totresct = 0;
#get all CA atoms in the domain
  open(INP, "new_domain_str/$qch.pdb");
  @all_inp = <INP>;
  open(OUTP, ">$qch.$piecenumber.pdb");
  print OUTP "$all_inp[0]";
  print OUTP "$all_inp[1]";
  print OUTP "$all_inp[2]";
  $cact = -1;
  for ($i=0; $i!=($#ar3+1); $i++) {
    @ar4a = split/\./, $ar3[$i];
    @ar4b = split/\:/, $ar4a[0];
    @ar4c = split/\:/, $ar4a[1];
    if ($ar4b[1] =~ /\)/) { @ar4e = split/\)/, $ar4b[1]; $n_res = $ar4e[0]; }
    else { $n_res = $ar4b[1]; }
    if ($ar4c[1] =~ /\)/) { @ar4f = split/\)/, $ar4c[1]; $c_res = $ar4f[0]; }
    else { $c_res = $ar4c[1]; }
    if (substr($qch, 5, 1) eq "_") { $ch = " "; }
    else { $ch = substr($qch, 5, 1); }
    $start = 0;
    foreach $tfline (@all_inp) {
      next unless (substr($tfline, 21, 1) eq $ch);
      next unless ($tfline =~ /^ATOM/ | $tfline =~ /^HETATM/);
      $resnum = substr($tfline, 22, 6); $resnum =~ s/ //g;
      if ($start == 1) { if ($resnum eq $c_res) { $start = 2; } }
      if ($start == 0) { if ($resnum eq $n_res) { $start = 1; } }
      if ($start != 0) {
        last if ($start == 2 && $resnum ne $c_res);
        if ($tfline =~ /^HETATM/) {
          $midstr = substr($tfline, 6, 11);
          print OUTP "ATOM  $midstr";
          $endstr = substr($tfline, 20, 100);
          print OUTP "ALA$endstr";
        }
        else { print OUTP "$tfline"; }
      }
    }
  }
  print OUTP "END\n";
  close(INP); close(OUTP);
  @all_inp = ();
}
### END OF SUB_cut_unmap_piece_fordali


##########
#
# subroutine use: SUB_topseqhits ( $head, $posrange, $file1, $filename, $piecenumber )
# script: from topseqhits_c2.pl

sub SUB_topseqhits {
  $qch = @_[0];
  $posrange = @_[1];
  $file1 = @_[2];
  $filename = @_[3];
  $piecenumber = @_[4];
  $ovpct_cutoff = 25;
  @par1 = split/\./, $posrange;
  $nterm = $par1[0];
  $cterm = $par1[1];
  open(RIN, "$file1");
  @rin = <RIN>;
  $retmess18 = "";
  $startb = 0;
  @getall = (); @getall2 = ();
#get all hits that cover at least $ovpct_cutoff of querypiece
  $thishit = "firsthit";
  foreach $bline (@rin) {
    if ($startb == 1) {
      last if ($bline =~ /\!\^\(NEW/);
      if (($bline =~ /^  Database\: / | $bline =~ /^\>/ | $bline =~ /^Lambda/) && $thishit !~ /^firsthit/) {
        $ov = 0;
        if ($startpos ne "" && $stoppos ne "") {
          if ($nterm >= $stoppos | $startpos >= $cterm) { $ov = 0; }
          elsif ($nterm >= $startpos && $stoppos >= $cterm) { $ov = 100; }
          else {
            if ($nterm > $startpos && $cterm > $stoppos) { $ov = ($stoppos-$nterm+1)/($cterm-$nterm+1)*100; }
            if ($startpos > $nterm && $stoppos > $cterm) { $ov = ($cterm-$startpos+1)/($cterm-$nterm+1)*100; }
            if ($startpos > $nterm && $cterm > $stoppos) { $ov = ($stoppos-$startpos+1)/($cterm-$nterm+1)*100; }
          }
          if ($ov >= $ovpct_cutoff) {
            $dup = 0;
            for ($j=0; $j!=($#getall+1); $j++) { @dar1 = split/\t/, $getall[$j]; if ($thisev eq $dar1[0] && $ov eq $dar1[1] && $thisidline eq $dar1[2]) { $dup = 1; last } }
            if ($dup == 0) { push @getall, "$thisev\t$ov\t$thisidline"; push @getall2, "$thishit"; }
          }
        }
      }
      last if ($bline =~ /^  Database\: / | $bline =~ /No hits found/ | $bline =~ /^Lambda/);
      if ($bline =~ /^\>/) { $thishit = $bline; $startpos = ""; $stoppos = ""; }
      else { $thishit .= $bline; }
      if ($bline =~ / Expect \= /) { chomp $bline; @ar1 = split/ Expect \= /, $bline; $thisev = $ar1[1]; $thisev =~ s/ //g; }
      if ($bline =~ /Query: /) {
        chomp $bline;
        @ar2 = split/ /, $bline;
        if ($startpos eq "") { $startpos = $ar2[1]; }
        $stoppos = $ar2[$#ar2];
      }
      if ($bline =~ /^ Identities \= /) { $thisidline = $bline; chomp $thisidline; }
    }
    else { if ($bline =~ /\!\^\(NEW\_FILE\)\: $filename/) { $startb = 1; } }
  }
  if ($thishit =~ /^firsthit/) { close(RIN); @rin = (); return; }
#sort by e-value
  @tmp1 = @getall;
  @sortgetall = (); @sortgetall2 = ();
  for ($j=0; $j!=($#getall+1); $j++) {
    $small = 100000000; $pt = -1;
    for ($k=0; $k!=($#getall+1); $k++) {
      @tmp2 = split/\t/, $tmp1[$k];
      next if ($tmp2[0] eq "null");
      if ($tmp2[0] < $small) { $pt = $k; $small = $tmp2[$0]; }
    }
    last if ($pt == -1);
    push @sortgetall, "$getall[$pt]";
    push @sortgetall2, "$getall2[$pt]";
    $tmp1[$pt] = "null\tX\tX";
  }
#make output
  if (($#sortgetall+1) < 5) { $repeat = $#sortgetall+1; }
  else { $repeat = 5; }
  for ($j=0; $j!=$repeat; $j++) {
    @tmp3 = split/\t/, $sortgetall[$j];
    if ($filename =~ /simple/) {
      $hitname = substr($sortgetall2[$j], 1, 6);
      $retmess18 .= "$qch.$piecenumber vs $hitname\tE-value = $tmp3[0]\n";
    }
    else {
      $hitname = substr($sortgetall2[$j], 1, 7);
      $scopgrep = `grep $hitname $scoptab`;  chomp $scopgrep;
      @sgar = split/\t/, $scopgrep;
      $retmess18 .= "$qch.$piecenumber vs $hitname\tE-value = $tmp3[0]\n";
      $retmess18 .= "$hitname belongs to CLASS $sgar[5], FOLD $sgar[7], SUPERFAMILY $sgar[9]\n";
    }
    $retmess18 .= "$sortgetall2[$j]\n";
  }
  close(RIN); @rin = ();
  @tmp1 = (); @getall = (); @sortgetall = (); @sortgetall2 = ();
  return $retmess18;
}
### END OF SUB_topseqhits


##########
#
# subroutine use: SUB_topcompasshits ( $head, $posrange, $piecenumber )
# script: from topseqhits_c2.pl

sub SUB_topcompasshits {
  $qch = @_[0];
  $posrange = @_[1];
  $piecenumber = @_[2];  
  $ovpct_cutoff = 25;
  @par1 = split/\./, $posrange;
  $nterm = $par1[0];
  $cterm = $par1[1];
  open(RIN, "$qch.compass2");
  @rin = <RIN>;
  $retmess19 = "";
  $startb = 0;
#get all hits that cover at least $ovpct_cutoff of querypiece
  @getall = ();
  foreach $bline (@rin) {
    if ($startb == 1) { 
      last if ($bline =~ /\!\^\(NEW\_FILE/);
      chomp $bline;
      @ar3 = split/\t/, $bline;
      @ar3a = split/\,/, $ar3[3]; @ar3aa = split/\-/, $ar3a[1];
      $startpos = $ar3aa[0]; $stoppos = $ar3aa[1];
      if ($nterm >= $stoppos | $startpos >= $cterm) { $ov = 0; }
      elsif ($nterm >= $startpos && $stoppos >= $cterm) { $ov = 100; }
      else {
        if ($nterm > $startpos && $cterm > $stoppos) { $ov = ($stoppos-$nterm+1)/($cterm-$nterm+1)*100; }
        if ($startpos > $nterm && $stoppos > $cterm) { $ov = ($cterm-$startpos+1)/($cterm-$nterm+1)*100; }
        if ($startpos > $nterm && $cterm > $stoppos) { $ov = ($stoppos-$startpos+1)/($cterm-$nterm+1)*100; }
      }
      if ($ov >= $ovpct_cutoff) {
        @ar3b = split/\.X\./, $ar3[0];
        push @getall, "$ar3b[1]\t$ar3[1]";
      }
    }
    else { if ($bline =~ /\!\^\(NEW\_FILE\)\: $qch.inf/) { $startb = 1; } }
  }
#sort by e-value
  @tmp1 = @getall; @sorted_getall = ();
  for ($i=0; $i!=($#getall+1); $i++) {
    $small = 9999999999; $pt = -1;
    for ($j=0; $j!=($#getall+1); $j++) {
      @tmp2 = split/\t/, $tmp1[$j];
      next if ($tmp2[1] eq "null" | $tmp2[1] eq "error" | $tmp2[1] eq "");
      if ($tmp2[1] < $small) { $small = $tmp2[1]; $pt = $j; }
    }
    last if ($pt == -1);
    push @sorted_getall, "$getall[$pt]";
    $tmp1[$pt] = "X\tnull";
  }
  close(RIN); @rin = ();
#print to output
  open(COMPIN, "$qch.compass");
  @all_comp = <COMPIN>;
  if (($#sorted_getall+1) < 5) { $repeat = $#sorted_getall+1; }
  else { $repeat = 5; }
  for ($j=0; $j!=$repeat; $j++) {
    @tmp3 = split/\t/, $sorted_getall[$j];
    $hitname = $tmp3[0]; $hitev = $tmp3[1];
    $scopgrep = `grep $hitname $scoptab`;  chomp $scopgrep;
    @sgar = split/\t/, $scopgrep;
    $retmess19 .= "$qch.$piecenumber vs $hitname\tE-value = $hitev\n$hitname belongs to CLASS $sgar[5], FOLD $sgar[7], SUPERFAMILY $sgar[9]\n";
    $startp = 0;
    for ($k=0; $k!=($#all_comp+1); $k++) {
      if ($all_comp[$k] =~ /^Ali1\: / && $all_comp[$k] =~ /$hitname/) { $startp = 1; }
      if ($startp == 1) {
        last if ($all_comp[$k] =~ /^Ali1\: / && $all_comp[$k] !~ /$hitname/);
        $retmess19 .= "$all_comp[$k]";
      }
    }
    $retmess19 .= "\n";
  }
  close(COMPIN); @all_comp = ();
  @tmp1 = (); @getall = (); @sorted_getall = ();
  return $retmess19;
}
### END OF SUB_topcompasshits


##########
#
# subroutine use: SUB_make_insight_logs ( $rep, $type )
# script: make_insight_logs.pl

sub SUB_make_insight_logs {
  $rep = @_[0];
  $type = @_[1];
  if ($type eq "") { return; }
  if ($type =~ /^unres/) {
#get all hits to the $rep query chain
# $hitinfo[0][$i] = 7-digit-scop-id
# $hitinfo[1][$i] = superfam id
# $hitinfo[2][$i] = residue range
# $hitinfo[3][$i] = position range
# $hitinfo[4][$i] = hit type (RPS, PSI, COM, MAM, DAL)
# $hitinfo[5][$i] = E-value or Z-score
    $hct = -1;
    $startb = 0;
    open(IA, "$rep.combined");
    @allia = <IA>;
    foreach $iline (@allia) {
      if ($startb == 1) {
        last if ($iline =~ /\!\^\(NEW/);
        chomp $iline;
        @ari1 = split/\t/, $iline;
        $pr = SUB_convert_range_res_to_pos ( "new_domain_str/$rep.ca", "$ari1[2]" );
        if ($ari1[7] !~ /^$ari1[6]/) {
          $hitinfo[2][$hct] .= ",$ari1[2]";
          $hitinfo[3][$hct] .= ",$pr";
        }
        else {
          $hct++;
          $hitinfo[0][$hct] = $ari1[2];
          $hitinfo[1][$hct] = $ari1[3];
          $hitinfo[2][$hct] = $ari1[1];
          $hitinfo[3][$hct] = $pr;
          if ($ari1[4] == 2) { $hitinfo[4][$hct] = "RPS"; }
          elsif ($ari1[4] == 3) { $hitinfo[4][$hct] = "PSI"; }
          elsif ($ari1[4] == 5) { $hitinfo[4][$hct] = "COM"; }
          elsif ($ari1[4] == 6 && $ari1[7] eq "M") { $hitinfo[4][$hct] = "MAM"; }
          else { $hitinfo[4][$hct] = "DAL"; }
          $hitinfo[5][$hct] = $ari1[$#ari1];
        }
      }
      else { if ($iline =~ /\!\^\(NEW\_FILE\)\: $rep.outinf_nr/) { $startb = 1; } }
    }
    close(IA); @allia = ();
#split hits by superfamily
    $sfct = -1;
    LP_S1: for ($i=0; $i!=($hct+1); $i++) {
      for ($j=0; $j!=($sfct+1); $j++) { if ($hitinfo[1][$i] eq $sfam[0][$j]) { $sfam[1][$j] += 1; $sfamar[$j][$sfam[1][$j]] = $i; next LP_S1; } }
      $sfct += 1;
      $sfam[0][$sfct] = $hitinfo[1][$i];
      $sfam[1][$sfct] = 0;
      $sfamar[$sfct][0] = $i;
    }
# $sfam[0][X] is superfamid (a.1.1)
# $sfam[1][X] is number of hits to that superfamid
# $sfamar[X][Z] are the $hct pointers to the hits to that superfamid
# 
# $regct[X] is the number of regions for superfamid X
# $reghitct[X][Y] is the number of hits to region Y in superfamid X
# $reg[X][Y][Z] are the $hct pointers to the hits to region Y in superfamid X
#
#in each superfamily, split hits by region (all hits to a region overlap by at least 75% with each other)
    for ($i=0; $i!=($sfct+1); $i++) {
      for ($j=0; $j!=($sfam[1][$i]+1); $j++) {
        $assi = 0;
        if ($j==0) {
          $regct[$i] = 0;
          $reghitct[$i][0] = 0;
          $reg[$i][0][0] = $sfamar[$i][$j];
          $assi = 1;
        }
        else {
# decide if each hit belongs to an existing region or a new region...
          LP_S2: for ($k=0; $k!=($regct[$i]+1); $k++) {
            for ($m=0; $m!=($reghitct[$i][$k]+1); $m++) {
              $pt1 = $sfamar[$i][$j];
              $pt2 = $reg[$i][$k][$m];
# determine if domains pointed at by $pt1 and $pt2 overlap by 75%; if not, go on to next region (next LP2)
              $len1 = -1; $len2 = -1;
              @ar5a1 = split/\,/, $hitinfo[3][$pt1];
              for ($n=0; $n!=($#ar5a1+1); $n++) {
                next if ($ar5a1[$n] eq "");
                @ar5b1 = split/\./, $ar5a1[$n];
                for ($p=$ar5b1[0]; $p!=($ar5b1[1]+1); $p++) { $len1 += 1; $pos1[$len1] = $p; }
              }
              @ar5a2 = split/\,/, $hitinfo[3][$pt2];
              for ($n=0; $n!=($#ar5a2+1); $n++) {
                next if ($ar5a2[$n] eq "");
                @ar5b2 = split/\./, $ar5a2[$n];
                for ($p=$ar5b2[0]; $p!=($ar5b2[1]+1); $p++) { $len2 += 1; $pos2[$len2] = $p; }
              }
              $sharect = 0;
              for ($n=0; $n!=($len1+1); $n++) { for ($p=0; $p!=($len2+1); $p++) { if ($pos1[$n] == $pos2[$p]) { $sharect += 1; last; } } }
              $pct1 = $sharect*100/($len1+1);
              $pct2 = $sharect*100/($len2+1);
              next LP_S2 unless ($pct1 >= 75 && $pct2 >= 75);
              if ($m == $reghitct[$i][$k]) {
                $reghitct[$i][$k] += 1;
                $reg[$i][$k][$reghitct[$i][$k]] = $pt1;
                $assi = 1; last LP_S2;
              }
            }
          }
          if ($assi == 0) {
            $regct[$i] += 1;
            $reghitct[$i][$regct[$i]] = 0;
            $reg[$i][$regct[$i]][0] = $sfamar[$i][$j];
          }
        }
      }
    }
#choose best hit for each region-set
# if str hit w/Z-score > 14, use aln with highest Z-score
# else, if seq hit w/E-value < e-10, use aln with lowest E-value
# else, if str hit (Z>0), use aln with highest Z-score
# else, use seq hit with lowest E-value
    @pairsforview = ();
    for ($i=0; $i!=($sfct+1); $i++) {
      for ($j=0; $j!=($regct[$i]+1); $j++) {
        $keeprep = "null";
        $keepscore = "null";
# check if each is a DAL or MAM hit with Z-score >= 14, choose best Z-score
        for ($k=0; $k!=($reghitct[$i][$j]+1); $k++) {
          $pt = $reg[$i][$j][$k];
          next unless (($hitinfo[4][$pt] eq "MAM" | $hitinfo[4][$pt] eq "DAL") && $hitinfo[5][$pt] >= 14);
          next unless ($keeprep eq "null" | ($keepscore ne "null" && $hitinfo[5][$pt] > $keepscore));
          $keeprep = $pt;
          $keepscore = $hitinfo[5][$pt];
        }
# check if each is a RPS, PSI, or COM hit with E-value < 1e-10, choose best E-value
        if ($keeprep eq "null") {
          for ($k=0; $k!=($reghitct[$i][$j]+1); $k++) {
            $pt = $reg[$i][$j][$k];
            next unless ($hitinfo[4][$pt] eq "RPS" | $hitinfo[4][$pt] eq "PSI" | $hitinfo[4][$pt] eq "COM");
            next unless ($hitinfo[5][$pt] < 1e-10);
            next unless ($keeprep eq "null" | ($keepscore ne "null" && $hitinfo[5][$pt] < $keepscore));
            $keeprep = $pt;
            $keepscore = $hitinfo[5][$pt];
          }
        }
# check if each is a DAL or MAM hit, choose best Z-score
        if ($keeprep eq "null") {
          for ($k=0; $k!=($reghitct[$i][$j]+1); $k++) {
            $pt = $reg[$i][$j][$k];
            next unless ($hitinfo[4][$pt] eq "MAM" | $hitinfo[4][$pt] eq "DAL");
            next unless ($keeprep eq "null" | ($keepscore ne "null" && $hitinfo[5][$pt] > $keepscore));
            $keeprep = $pt;
            $keepscore = $hitinfo[5][$pt];
          }
        }
# check if each is a RPS, PSI, or COM hit, choose best E-value
        if ($keeprep eq "null") {
          for ($k=0; $k!=($reghitct[$i][$j]+1); $k++) {
            $pt = $reg[$i][$j][$k];
            next unless ($hitinfo[4][$pt] eq "RPS" | $hitinfo[4][$pt] eq "PSI" | $hitinfo[4][$pt] eq "COM");
            next unless ($keeprep eq "null" | ($keepscore ne "null" && $hitinfo[5][$pt] < $keepscore));
            $keeprep = $pt;
            $keepscore = $hitinfo[5][$pt];
          }
        }
        push @pairsforview, "$rep\t$hitinfo[0][$keeprep]\t$hitinfo[4][$keeprep]\t$sfam[0][$i]\t$j";
      }
    }
  }
  else { 
    @pairsforview = ();
    @ari3 = split/\t/, $type;
    for ($i=0; $i!=($#ari3+1); $i++) {
      next if ($ari3[$i] eq "");
      @ari4 = split/ /, $ari3[$i];
      push @pairsforview, "$rep\t$ari4[0]\tMAM\t$ari4[1]\t$i";
    }
  }
#set up for making insight log file: copy pdb files to PDBDIR and make pairwise alignment file
  if ($#pairsforview != -1) {
    open(IAINF, ">$rep.insightlog_info");
    `mkdir PDBDIR`;
    for ($m=0; $m!=($#pairsforview+1); $m++) {
      @ari5 = split/\t/, $pairsforview[$m];
      if ($type =~ /^unres/) { print IAINF "superfamily: $ari5[3], region $ari5[4]; Representative: $ari5[1]\n"; }
      else { print IAINF "region $ari5[4]; Fold representative: $ari5[1] \[$ari5[3]\]\n"; }
      $scopid = $ari5[1];
      `cp new_domain_str/$rep.pdb PDBDIR`;
      `cp $path_str/$scopid.ent PDBDIR/$scopid.pdb`;
      if ($ari5[2] eq "RPS" | $ari5[2] eq "PSI") {
        open(GETPW, "pdb_$date/$rep.blast"); @all_br = <GETPW>;
        if ($ari5[2] eq "RPS") { $filename = "$rep-rps.br"; }
        else { $filename = "$rep-SCOPd.br"; }
        $s1 = 0; $s2 = 0; $qseq = ""; $hseq = "";
        for ($i=0; $i!=($#all_br+1); $i++) {
          if ($s1 == 1 && $s2 == 1) {
            last if ($all_br[$i] =~ /^>/ | $all_br[$i] =~ /\!\^\(NEW/);
            @ar6a = split/ /, $all_br[$i];
            $nonspacect = 0;
            for ($j=0; $j!=($#ar6a+1); $j++) {
              if ($ar6a[$j] ne "") { $nonspacect += 1; }
              if ($nonspacect == 3) { $arpt = $j; last; }
            }
            if ($all_br[$i] =~ /^Query\:/) { $qseq .= "$ar6a[$arpt]"; }
            if ($all_br[$i] =~ /^Sbjct\:/) { $hseq .= "$ar6a[$arpt]"; }
          }
          else {
            if ($all_br[$i] =~ /\!\^\(NEW\_FILE\)\: $filename/) { $s1 = 1; next; }
            if ($s1 ==1 && $all_br[$i] =~ /^>$scopid/) { $s2 = 1; }
          }
        }
        close(GETPW); @all_br = ();
      }
      if ($ari5[2] eq "COM") {
        open(GETPW, "$rep.compass");
        @all_get = <GETPW>;
        $s = 0; $qseq = ""; $hseq = "";
        for ($i=0; $i!=($#all_get+1); $i++) { if ($all_get[$i] =~ /^Ali1\:/ && $all_get[$i] =~ /$scopid/) { $spt = $i+6; last; } }
        for ($i=0; $i!=100000; $i++) {
          $lpt = $spt + $i*5;
          last if ($all_get[$lpt] =~ /^Threshold of effective gap content/);
          chomp $all_get[$lpt]; chomp $all_get[$lpt+2];
          $str1 = substr($all_get[$lpt], 21, 75);
          $str2 = substr($all_get[$lpt+2], 21, 75);
          for ($j=0; $j!=76; $j++) {
            $aa1 = substr($str1, $j, 1);
            $aa2 = substr($str2, $j, 1);
            last if ($aa1 eq "" | $aa2 eq "");
            if ($aa1 eq "." | "ACDEFGHIKLMNPQRSTVWY" !~ /$aa1/) { $qseq .= "-"; }
            else { $qseq .= $aa1; }
            if ($aa2 eq "." | "ACDEFGHIKLMNPQRSTVWY" !~ /$aa2/) { $hseq .= "-"; }
            else { $hseq .= $aa2; }
          }
        }
        close(GETPW); @all_get = ();
      }
      if ($ari5[2] eq "MAM") {
        open(GETPW, "$rep.mammoth2");
        @all_get = <GETPW>;
        $qseq = ""; $hseq = "";
        for ($i=0; $i!=($#all_get+1); $i++) {
          next unless ($all_get[$i] =~ /^\!\^\(NEW\_FILE\)\: $rep.M.$scopid.mpa/);
          chomp $all_get[$i+1]; chomp $all_get[$i+2];
          @ar7a = split/ /, $all_get[$i+1];
          @ar7b = split/ /, $all_get[$i+2];
          last;
        }
        close(GETPW); @all_get = ();
        for ($j=0; $j!=100000; $j++) {
          $aa1 = substr($ar7a[$#ar7a], $j, 1);
          $aa2 = substr($ar7b[$#ar7b], $j, 1);
          last if ($aa1 eq "" | $aa2 eq "");
          if ($aa1 eq "." | "ACDEFGHIKLMNPQRSTVWY" !~ /$aa1/) { $qseq .= "-"; }
          else { $qseq .= $aa1; }
          if ($aa2 eq "." | "ACDEFGHIKLMNPQRSTVWY" !~ /$aa2/) { $hseq .= "-"; }
          else { $hseq .= $aa2; }
        }
      }
      if ($ari5[2] eq "DAL") {
        open(GETPW, "$rep.dali2");
        @all_get = <GETPW>;
        $s = 0;
        for ($i=0; $i!=($#all_get+1); $i++) {
          next unless ($all_get[$i] =~ /^\!\^\(NEW\_FILE\)\: $rep.$scopid.dpa/);
          chomp $all_get[$i+1]; chomp $all_get[$i+2];
          $qline = $all_get[$i+1]; $hline = $all_get[$i+2];
          $s = 1; last;
        }
        if ($s == 0) {
          for ($i=0; $i!=($#all_get+1); $i++) {
            next unless ($all_get[$i] =~ /^\!\^\(NEW\_FILE\)\: $scopid.$rep.dpa/);
            chomp $all_get[$i+1]; chomp $all_get[$i+2];
            $qline = $all_get[$i+2]; $hline = $all_get[$i+1];
            last;
          }
        }
        $qseq = ""; $hseq = "";
        @ar8a = split/\t/, $qline;
        @ar8b = split/\t/, $hline;
        for ($j=0; $j!=100000; $j++) {
          $aa1 = substr($ar8a[$#ar8a], $j, 1);
          $aa2 = substr($ar8b[$#ar8b], $j, 1);
          last if ($aa1 eq "" | $aa2 eq "");
          if ($aa1 eq "." | "ACDEFGHIKLMNPQRSTVWY" !~ /$aa1/) { $qseq .= "-"; }
          else { $qseq .= $aa1; }
          if ($aa2 eq "." | "ACDEFGHIKLMNPQRSTVWY" !~ /$aa2/) { $hseq .= "-"; }
          else { $hseq .= $aa2; }
        }
        close(GETPW); @all_get = ();
      }
      open(PW, ">$rep.$scopid.aln_for_ins");
      printf PW "%-15s $qseq\n", $rep;
      printf PW "%-15s $hseq\n", $scopid;
      close(PW);
#make insight log
      SUB_aln2insightlog ( "$rep", "$scopid" );
    }
    close(IAINF);
    if ($type =~ /^unres/) { $dirfv = "INSIGHT_LOGS_unresolved_$rep"; }
    else { $dirfv = "INSIGHT_LOGS_fold_$rep"; }
    `mkdir $dirfv`;
    `mv PDBDIR $dirfv`;
    `mv *.aln_for_ins $dirfv`;
    `mv *.ins.log $dirfv`;
    `mv *.insightlog_info $dirfv`;
    if ($type =~ /^unres/) { `tar cvf $rep.unresolved.inslog.tar $dirfv/*`; }
    else { `tar cvf $rep.fold.inslog.tar $dirfv/*`; }
    `rm -r -f $dirfv`;
  }
  @hitinfo = (); @sfam = (); @sfamar = (); @regct = (); @reghitct = (); @reg = (); @pairsforview = ();
}
### END OF SUB_make_insight_logs


##########
#
# subroutine use: SUB_aln2insightlog ( $rep, $scopid )
# script: aln2insightlog.pl (modified for use in SCOPmap!)
# - function: write an INSIGHT log for superimposing based on given pairwise alignment

sub SUB_aln2insightlog {
  $dom1 = @_[0];
  $dom2 = @_[1];
  $aln = "$dom1.$dom2.aln_for_ins";
  open( ILOG, ">$dom1.$dom2.ins.log" );
  @fraglist = SUB_aln2frag ( $aln );
  chomp( $fraglist[0] );
  print ILOG "Get Molecule PDB User PDBDIR/$fraglist[0].pdb P_$fraglist[0]  Heteroatom -Keep_Alternates -Use_Segids -Keep_All_Frames -Reference_Object\n";
  print ILOG "Display Molecule Only Atoms Trace P_$fraglist[0]\n";
  for $i (1..$#fraglist){
    chomp( $fraglist[$i] );
    @pdb_chs = split / /, $fraglist[$i];
    $pdb_ch1 = $pdb_chs[0];
    $pdb_ch2 = $pdb_chs[1];
    $range = SUB_pos2res ( "$pdb_ch1", "$pdb_ch2" );
    @tmp = split / /, $range;
    @pieces_0 = split/\,/, $tmp[0];
    @pieces_1 = split/\,/, $tmp[1];
    $range_string_0 = ""; $range_string_1 = "";
    for ($j=0; $j!=($#pieces_0+1); $j++) {
      next if ($pieces_0[$j] eq "-" | $pieces_1[$j] eq "-");
      if ($range_string_0 eq "") { $range_string_0 = "$pieces_0[$j]"; }
      else { $range_string_0 .= ",$pieces_0[$j]"; }
      if ($range_string_1 eq "") { $range_string_1 = "$pieces_1[$j]"; }
      else { $range_string_1 .= ",$pieces_1[$j]"; }
    }
    print ILOG "Get Molecule PDB User PDBDIR/$pdb_ch2.pdb P_$pdb_ch2  Heteroatom -Keep_Alternates -Use_Segids -Keep_All_Frames -Reference_Object\n";
    print ILOG "Display Molecule Only Atoms Trace P_$pdb_ch2\n";
    print ILOG "Superimpose -End_Definition Trace -\"Label Mode\" P_$pdb_ch2:$range_string_1 P_$pdb_ch1:$range_string_0\n"; 
    print ILOG "Superimpose End_Definition\n";
  }
  `rm $dom1.fa $dom2.fa $dom1\-$dom2.frag`;
}
###END OF SUB_aln2insightlog


##########
#
# subroutine use: SUB_aln2frag ( $aln )
# script: aln2frag.pl

sub SUB_aln2frag {
  $aln = @_[0];
  open(AINFILE, "$aln");
  @forret = ();
  $nseq = 0;
  while(<AINFILE>){
    chomp;
    @tmp = split;
    $pdb_ch = $tmp[0];
    $seq = $tmp[1];
    $nseq++;
    if( $nseq == 1){
      @seq_1 = split //, $seq;
      $pdb_ch_1 = $pdb_ch;
      push @forret, "$pdb_ch_1";
      next;
    }
    else{
      @seq_n = split //, $seq;
      @frag_len = ();
      @frag_seq_1 = ();
      @frag_seq_n = ();
      $nfrag = 0;
      $prev_frag = 0;
      for $n (0..$#seq_1){
        if( ($seq_1[$n] =~ /[A-Z]/) && ($seq_n[$n] =~ /[A-Z]/) ){
          if( $prev_frag == 0 ){
            $nfrag++;
            @current_frag_1 = ();
            @current_frag_n = ();
          }
          push @current_frag_1, $seq_1[$n];
          push @current_frag_n, $seq_n[$n];
          $prev_frag = 1;
          next;
        }
        elsif( ($seq_1[$n] eq "-") && ($seq_n[$n] eq "-") ){ next; }
        else{
          if( $prev_frag == 1){
            $frag_seq_1[$nfrag] = join "", @current_frag_1;
            $frag_seq_n[$nfrag] = join "", @current_frag_n;
            $frag_len[$nfrag] = $#current_frag_1 + 1;
            $prev_frag = 0;
          }
        }
      }
      if( $prev_frag == 1){
        $frag_seq_1[$nfrag] = join "", @current_frag_1;
        $frag_seq_n[$nfrag] = join "", @current_frag_n;
        $frag_len[$nfrag] = $#current_frag_1 + 1;
      }
      $frag_len[0] = 0;
      @sorted_len = reverse sort {$a <=> $b} @frag_len;   
      for $n (0..$#sorted_len){ $output_flag[$n] = 0; }
      open( OUTF, ">$pdb_ch_1\-$pdb_ch.frag" );
      for $n (0..$#sorted_len){
        for $j (1..$nfrag){
          if( ($frag_len[$j] == $sorted_len[$n]) && ($output_flag[$j] == 0) ){
            print OUTF "$frag_seq_1[$j] $frag_seq_n[$j] $frag_len[$j]\n";
            $output_flag [$j] = 1;
          }
        }
      }
      close OUTF;
      push @forret, "$pdb_ch_1 $pdb_ch";
    }
  }
  close(AINFILE);
  return @forret;
}
### END OF SUB_aln2frag


##########
#
# subroutine use: SUB_pos2res ( $pdb_ch1, $pdb_ch2 )
# script: pos2res.pl

sub SUB_pos2res {
  $pdb_ch1 = @_[0];
  $pdb_ch2 = @_[1];
  $forret2 = "";
  $seq_len1 = SUB_get_domseq_faout ( "./PDBDIR/$pdb_ch1.pdb" );
  $seq_len2 = SUB_get_domseq_faout ( "./PDBDIR/$pdb_ch2.pdb" );
  open( SEQ1, "$pdb_ch1.fa" );
  @tmp = <SEQ1>; $seq1 = $tmp[1]; chomp( $seq1 );
  open( SEQ2, "$pdb_ch2.fa" );
  @tmp = <SEQ2>; $seq2 = $tmp[1]; chomp( $seq2 );
  $ffile = `cat $pdb_ch1\-$pdb_ch2.frag`; @fragfile1 = split/\n/, $ffile;
  $nfrag = 0;
  $npair = 0;
  @range1 = ();
  @range2 = ();
  foreach $fline (@fragfile1) {
    @tmp = split / /, $fline;
    $frag1 = $tmp[0];
    $frag2 = $tmp[1]; 
    $frag_len = $tmp[2];
    $nfrag++; 
    @match1 = SUB_stringMatching ( $seq1, $seq_len1, $frag1, $frag_len );
    @match2 = SUB_stringMatching ( $seq2, $seq_len2, $frag2, $frag_len );
    if( ($#match1 > 0) || ($#match2 > 0) ){ next; }
    if( ($match1[0] eq "No occurance") || ($match2[0] eq "No occurance") ){ next; }
    if( $npair < 7 ){
      $pdb_range1 = SUB_map_domseq ( "./PDBDIR/$pdb_ch1.pdb", "$match1[0]" );
      $pdb_range2 = SUB_map_domseq ( "./PDBDIR/$pdb_ch2.pdb", "$match2[0]" );
      push @range1, "$pdb_range1";
      push @range2, "$pdb_range2";
      $npair++;
    }
    else { last; }
  }
  for $n (0..($#range1-1)){ $forret2 .= "$range1[$n],"; }
  $forret2 .= "$range1[$#range1] ";
  for $n (0..($#range2-1)){ $forret2 .= "$range2[$n],";  }
  $forret2 .= "$range2[$#range1]";
  $ffile = ""; @fragfile1 = ();
  return $forret2;
}
### END OF SUB_pos2res


##########
#
# subroutine use: SUB_get_domseq_faout ( $pdbfile )
# script: get_domseq_faout.pl

sub SUB_get_domseq_faout {
  $domainpath = @_[0];
  @dar1 = split/\//, $domainpath;
  $domain = $dar1[$#dar1];
  if ($domain =~ /\.pdb/) { $tail = ".pdb"; }
  elsif ($domain =~ /\.ent/) { $tail = ".ent"; }
  elsif ($domain =~ /\.ca/) { $tail = ".ca"; }
  @dar2 = split/$tail/, $domain;
  $domain_root = $dar2[0];
  open(PDBIN, "$domainpath");
  @all_pdbin = <PDBIN>;
  $start = 1;
  $prev_resnum = "null";
  $len = 0;
  open(OUT0, ">$domain_root.fa"); print OUT0 ">$domain_root\n";
  foreach $line (@all_pdbin) {
    last if ($line =~ /^END/);
    next unless ($line =~ /^ATOM/ | $line =~ /^HETATM/);
    next unless (substr($line, 12, 4) eq " CA ");
    $resnum = substr($line, 22, 6);
    if ($prev_resnum eq $resnum) { next if (substr($line, 56, 4) ne "1.00"); }  
    $prev_resnum = $resnum;
    $letter3 = substr($line, 17, 3);
    $letter1 = SUB_convert_3letter_1letter ( $letter3 );
    print OUT0 "$letter1";
    $len++;
  }
  print OUT0 "\n";
  close( OUT0 );
  close(PDBIN); @all_pdbin = ();
  return $len;
}
###END OF SUB_get_domseq_faout


##########
#
# subroutine use: SUB_stringMatching ( $seq1, $seq_len1, $frag1, $frag_len )
# script: stringMatching.pl

sub SUB_stringMatching {
  $n1 = @_[1];
  $n2 = @_[3];
  $text = @_[0];
  $pattern = @_[2];
  @text = split //, $text;
  @pattern = split //, $pattern;
  @forret3 = ();
  $found = 0;
  LP_SM1: for $s (0..($n1-$n2)){
    LP_SM2: for $q (0..($n2-1)){
      if( $pattern[$q] ne $text[$s+$q] ){
        if( $text[$s+$q] eq "X" ) { next LP_SM2; }
        else{ next LP_SM1; }
      }
    }
    $start = $s + 1;
    $end = $s + $n2;
    $found = 1;
    push @forret3, "$start.$end";
  }
  if( $found == 0 ){ push @forret3, "No occurance"; }
  return @forret3;
}
### END OF SUB_stringMatching


##########
#
# subroutine use: SUB_map_domseq ( $pdbfile )
# script: map_domseq.pl

sub SUB_map_domseq {
  $domainpath = @_[0];
  $seq_range = @_[1];
  @tmp = split( /\./, $seq_range );
  $first_res = $tmp[0];
  $last_res = $tmp[1];
  open(PDBIN, "$domainpath");
  @all_pdbin = <PDBIN>;
  $residue_count = 0;
  $prev_resnum = "null";
  foreach $line (@all_pdbin) {
    last if ($line =~ /^END/);
    next unless ( ($line =~ /^ATOM/ ) || ($line =~ /^HETATM/) );
    next unless (substr($line, 12, 4) eq " CA ");
    $resnum = substr($line, 22, 6);
    if ($prev_resnum eq $resnum) { next if (substr($line, 56, 4) ne "1.00"); }
    $prev_resnum = $resnum;
    $residue_count++;
    if ($residue_count == $first_res) {
      $chain = substr($line, 21, 1);
      $myfirst_res = "$chain$resnum";
      $myfirst_res =~ s/ //g;
    }
    if ($residue_count == $last_res) {
      $chain = substr($line, 21, 1);
      $mylast_res = "$chain$resnum";
      $mylast_res =~ s/ //g;
      last;
    }
  }
  close(PDBIN); @all_pdbin = ();
  return "$myfirst_res\-$mylast_res";
}
### END OF SUB_map_domseq


##########
#
# subroutine use: SUB_cut_output_domain ( $pdbch, $domnum, $posrange )
# script: cut_output_doms.pl

sub SUB_cut_output_domain {
  $pdbch = @_[0];
  $domnum = @_[1];
  $posrange = @_[2];
  if (-d "output_pieces" == 0) { `mkdir output_pieces`; }
  @ar3 = split/\,/, $posrange;
#get all CA atoms in the domain
  open(CAIN, "new_domain_str/$pdbch.ca");
  @all_cain = <CAIN>;
  open(OUTC, ">output_pieces/$pdbch.$domnum.pdb");
  print OUTC "$all_cain[0]";
  print OUTC "$all_cain[1]";
  print OUTC "$all_cain[2]";
  $cact = -1;
  @cax = (); @cay = (); @caz = ();
  for ($i=0; $i!=($#ar3+1); $i++) {
    $resrange = SUB_convert_range_pos_to_res ( "new_domain_str/$pdbch.ca", "$ar3[$i]" );
    @ar4a = split/\./, $resrange;
    @ar4b = split/\:/, $ar4a[0];
    @ar4c = split/\:/, $ar4a[1];
    if ($ar4b[1] =~ /\)/) { @ar4e = split/\)/, $ar4b[1]; $n_res = $ar4e[0]; }
    else { $n_res = $ar4b[1]; }
    if ($ar4c[1] =~ /\)/) { @ar4f = split/\)/, $ar4c[1]; $c_res = $ar4f[0]; }
    else { $c_res = $ar4c[1]; }
    if (substr($pdbch, 5, 1) eq "_") { $ch = " "; }
    else { $ch = substr($pdbch, 5, 1); }
    $start = 0;
    foreach $tfline (@all_cain) {
      next unless (substr($tfline, 13, 2) eq "CA" && substr($tfline, 21, 1) eq $ch);
      next unless ($tfline =~ /^ATOM/ | $tfline =~ /^HETATM/);
      $resnum = substr($tfline, 22, 6); $resnum =~ s/ //g;
      if ($start == 1) {
        if ($tfline =~ /^HETATM/) {
          $midstr = substr($tfline, 6, 11);
          print OUTC "ATOM  $midstr";
          $endstr = substr($tfline, 20, 100);
          print OUTC "ALA$endstr";
        }
        else { print OUTC "$tfline"; }
        $cact += 1;
        $cax[$cact] = substr($tfline, 30, 8); $cax[$cact] =~ s/ //g;
        $cay[$cact] = substr($tfline, 38, 8); $cay[$cact] =~ s/ //g;
        $caz[$cact] = substr($tfline, 46, 8); $caz[$cact] =~ s/ //g;
        last if ($resnum eq $c_res);
      }
      else {
        if ($resnum eq $n_res) {
          if ($tfline =~ /^HETATM/) {
            $midstr = substr($tfline, 6, 11);
            print OUTC "ATOM  $midstr";
            $endstr = substr($tfline, 20, 100);
            print OUTC "ALA$endstr";
          }
          else { print OUTC "$tfline"; }
          $cact += 1;
          $cax[$cact] = substr($tfline, 30, 8); $cax[$cact] =~ s/ //g;
          $cay[$cact] = substr($tfline, 38, 8); $cay[$cact] =~ s/ //g;
          $caz[$cact] = substr($tfline, 46, 8); $caz[$cact] =~ s/ //g;
          $start = 1;
        }
      }
    }
  }
  close(CAIN); @all_cain = ();
#get HETATMs within 8 A of any CA in the cut domain
  $pdb4id = substr($pdbch, 0, 4);
  if (-e "$pdb4id.pdb" == 0) { 
    if ($input_type eq "NEW") { `cp $currdir/SAVE.$pdb4id.SAVE $pdb4id.pdb`; }
    else {`$path_bin/pdbcp.pl $pdb4id`; }
  }
  open(WP, "$pdb4id.pdb");
  @all_wp = <WP>;
  foreach $wpline (@all_wp) {
    next unless ($wpline =~ /^HETATM/ && substr($wpline, 17, 3) ne "HOH");
    $thisx = substr($wpline, 30, 8); $thisx =~ s/ //g;
    $thisy = substr($wpline, 38, 8); $thisy =~ s/ //g;
    $thisz = substr($wpline, 46, 8); $thisz =~ s/ //g;
    for ($i=0; $i!=($cact+1); $i++) {
      $dist_2 = (@cax[$i]-$thisx)*(@cax[$i]-$thisx) + (@cay[$i]-$thisy)*(@cay[$i]-$thisy) + (@caz[$i]-$thisz)*(@caz[$i]-$thisz);
      if ($dist_2 != 0 && $dist_2 < 64) { print OUTC "$wpline"; last; }
    }
  }
  close(WP);
  `rm $pdb4id.pdb`;
  print OUTC "END\n";
  close(OUTC);
  @cax = (); @cay = (); @caz = ();
}
### END OF SUB_cut_output_domain


##########
#
# subroutine use: SUB_outputbr ( $type, $evalue_cutoff, $pdb_ch )
# script: y_outputbr_single.pl

sub SUB_outputbr {
  $type = @_[0];
  $evalue_cutoff = @_[1];
  $pdb_ch = @_[2];
  $filename = "$pdb_ch$type";
  open(BR, "pdb_$date/$head.blast");
  @allbr = <BR>;
  if ($#allbr == -1) { return; }
  $retmess20 = "";
  $hold = -1;
  $start = 0;
  foreach $bline (@allbr) {
    if ($start == 1) {
      last if ($bline =~ /\!\^\(NEW\_FILE/);
      chomp $bline;
      if ($bline =~ /No hits found/ ) { $retmess20 .= "No hits found\n"; last; } 
      if ($bline =~ /^>/){ @buf = (); push @buf, $bline;  $hold = 1; next; } 
      if ($hold == 1) { push @buf, $bline; }
      if ($hold == 1 && $bline =~ /Expect = /) {
        @brar1 = split/Expect = /, $bline;
        $evalue = $brar1[$#brar1];
        if ($evalue =~ /^e/) { $evalue1 = "1".$evalue; $evalue = $evalue1; }
        if($evalue < $evalue_cutoff){
          $hold = 0;
          foreach $bufline (@buf){ $retmess20 .= "$bufline\n"; }
          next;
        }
        else { last; }
      }
      if($hold == 0 && ($bline =~ /^  Database:/ | $bline =~ /^Lambda/)) { last; }
      if($hold == 0) { $retmess20 .= "$bline\n"; }
    }
    else { if ($bline =~ /\!\^\(NEW\_FILE\)\: $filename/) { $start = 1; } }
  }
  close(BR); @allbr = ();
  return $retmess20;
}
### END OF SUB_outputbr


##########
#
# subroutine use: SUB_compass_addon ( $pdbch, $evalue_cutoff )
# script: compass_addon.pl

sub SUB_compass_addon {
  $chain = @_[0];
  $evalue_cutoff = @_[1];
  open(CPS, "$chain.compass");
  @all_cps = <CPS>;
  if ($#all_cps < 1) { return; }
  $retmess21 = "";
  $mrk = 0;
  $rct1 = -1;
  $evar[0] = "null";
  $alnar[0] = "null";
#get all hits below evalue cutoff
  for ($i=0; $i!=($#all_cps+1); $i++) {
    if ($mrk == 1) {
      if ($all_cps[$i] =~ /^Ali1\: /) { $mrk = 0; next; }
      $alnar[$rct1] .= "$all_cps[$i]";
    }
    if ($all_cps[$i] =~ /Evalue \= /) {
      @ar1 = split/Evalue \= /, $all_cps[$i];
      chomp $ar1[1];
      if ($ar1[1] < $evalue_cutoff) {
        $mrk = 1;
        $rct1 += 1;
        $evar[$rct1] = $ar1[1];
        $alnar[$rct1] = "$all_cps[$i-4]$all_cps[$i-3]$all_cps[$i-2]$all_cps[$i-1]$all_cps[$i]";
      }
    }
  }
#sort by e-value
  for ($i=0; $i!=($rct1+1); $i++) {
    $small = 10; $pt = -1;
    for ($j=0; $j!=($rct1+1); $j++) {
      next if ($evar[$j] eq "null");
      if ($small >= $evar[$j]) { $pt = $j; $small = $evar[$j]; }
    }
    last if ($pt == -1);
    $retmess21 .= "$alnar[$pt]\n";
    $evar[$pt] = "null";
  }
  close(CPS); @all_cps = ();
  @alnar = (); @evar = ();
  return $retmess21;
}
### END OF SUB_compass_addon


##########
#
# subroutine use: SUB_mammoth_addon ( $pdbch, $zscore_cutoff )
# script: mammoth_addon.pl

sub SUB_mammoth_addon {
  $chain = @_[0];
  $zscore_cutoff = @_[1];
  $retmess22 = "";
  open(MM2, "$chain.mammoth2");
  @allmm2 = <MM2>;
  open(D2, "$chain.dali2");
  @alld2 = <D2>;
#get all hits with Z > $zscore_cutoff
  @zar = ();
  $start = 0;
  foreach $mline (@allmm2) {
    if ($start == 1) {
      last if ($mline =~ /\!\^\(NEW/);
      @ar1 = split/\t/, $mline;
      next unless ($ar1[1] > $zscore_cutoff);
      @ar2a = split/\.M\./, $ar1[0];
      @ar2b = split/\.mpa/, $ar2a[1];
      push @zar, "$ar2b[0]\t$ar1[1]\t$ar1[2]";
    }
    else { if ($mline =~ /\!\^\(NEW\_FILE\)\: all_zbc_$chain/) { $start = 1; } }
  }
#sort by z-score and output to chainlog
  @tmp = @zar;
  for ($m=0; $m!=($#zar+1); $m++) {
    $big = -1000; $pt = -1;
    for ($n=0; $n!=($#zar+1); $n++) {
      next if ($tmp[$n] =~ /^null/);
      @tmp2 = split/\t/, $tmp[$n];
      if ($tmp2[1] > $big) { $pt = $n; $big = $tmp2[1]; }
    }
    last if ($pt == -1);
    @tmp2 = split/\t/, $tmp[$pt];
#is there CSV information?
    $start = 0; $csv = "NA";
    foreach $dline (@alld2) {
      if ($start == 1) {
        last if ($dline =~ /\!\^\(NEW/);
        next unless ($dline =~ /$tmp2[0]/);
        @tmp3 = split/\t/, $dline;
        $csv = $tmp3[3];
      }
      else { if ($dline =~ /\!\^\(NEW\_FILE\)\: $chain.mammothcsvscores.win/) { $start = 1; } }
    }
#is there mammoth/blast comparison information?
    $start = 0; $nga = "X"; $ngb = "X"; $ngc = "X";
    foreach $dline (@alld2) {
      if ($start == 1) {
        last if ($dline =~ /\!\^\(NEW/);
        next unless ($dline =~ /$tmp2[0]/);
        @tmp3 = split/\t/, $dline;
        if ($tmp3[6] =~ /simple/) { $nga = $tmp3[4]; }
        if ($tmp3[6] =~ /rps/) { $ngb = $tmp3[4]; }
        if ($tmp3[6] =~ /SCOPd/) { $ngc = $tmp3[4]; }
      }
      else { if ($dline =~ /\!\^\(NEW\_FILE\)\: all_mammoth_blast_$chain/) { $start = 1; } }
    }
    if ($start == 0) { $nacc1 = "NA"; }
    else { $nacc1 = "$nga, $ngb, $ngc"; }
    $retmess22 .= "\nZ-score = $tmp2[1]\tBLOSUM score = $tmp2[2]\tCSV score = $csv\tMAMMOTH/BLAST nacc1 = $nacc1\n";
    $grepscoptab = `grep $tmp2[0] $scoptab`;  chomp $grepscoptab;
    @greparst = split/\t/, $grepscoptab;
    $retmess22 .= "$tmp2[0] belongs to CLASS $greparst[5], FOLD $greparst[7], SUPERFAMILY $greparst[9]\n";
#get the pairwise alignment produced by mammoth
    for ($i=0; $i!=($#allmm2+1); $i++) {
      if ($allmm2[$i] =~ /\!\^\(NEW\_FILE\)\: $chain.M.$tmp2[0].mpa/) { $spt = $i; last; }
    }
    chomp $allmm2[$spt+1]; chomp $allmm2[$spt+2];
    @mm1 = split/ /, $allmm2[$spt+1];
    @mm2 = split/ /, $allmm2[$spt+2];
    $stop = 0;
    for ($j=0; $j!=999999; $j++) {
      if ($stop == 1) { $retmess22 .= "\n\n"; last; }
      $h = sprintf ("%-12s  ", $chain); $retmess22 .= "\n$h";
      for ($k=0; $k!=100; $k++) {
        $aa1 = substr($mm1[$#mm1], ($j*100+$k), 1);
        if ($aa1 eq "") { $stop = 1; last; }
        $retmess22 .= "$aa1";
      }
      $h = sprintf ("%-12s  ", $tmp2[0]); $retmess22 .= "\n$h";
      for ($k=0; $k!=100; $k++) {
        $aa2 = substr($mm2[$#mm2], ($j*100+$k), 1);
        last if ($aa2 eq "");
        $retmess22 .= "$aa2";
      }
      $retmess22 .= "\n";
    }
    $tmp[$pt] = "null\tX\tX";
  }
  close(MM2); @allmm2 = ();
  close(D2); @alld2 = ();
  @tmp = (); @zar = ();
  return $retmess22;
}
### END OF SUB_mammoth_addon


##########
#
# subroutine use: SUB_dali_addon ( $chain, $zscore_cutoff )
# script: dali_addon.pl

sub SUB_dali_addon {
  $chain = @_[0];
  $zscore_cutoff = @_[1];
  open(D2, "$chain.dali2");
  @alld2 = <D2>;
  if ($#alld2 < 1) { return; }
  $retmess23 = "";
#get all hits with Z > $zscore_cutoff
  @zar = ();
  $start = 0;
  foreach $dline (@alld2) {
    if ($start == 1) {
      last if ($dline =~ /\!\^\(NEW/);
      @ar1 = split/\t/, $dline;
      @ar2 = split/\.D\./, $ar1[0];
      next if ($ar1[1] eq "error" && $ar1[2] eq "error");
      if ($ar1[1] eq "error" | ($ar1[2] ne "error" && $ar1[2] > $ar1[1])) {
        next if ($ar1[2] < $zscore_cutoff);
        push @zar, "$ar2[1]\t$ar1[2]\t$ar1[4]\t$ar2[1].$chain.dpa";
      }
      else {
        next if ($ar1[1] < $zscore_cutoff);
        push @zar, "$ar2[1]\t$ar1[1]\t$ar1[3]\t$chain.$ar2[1].dpa";
      }
    }
    else { if ($dline =~ /\!\^\(NEW\_FILE\)\: all_dali_zbc_$chain/) { $start = 1; } }
  }
#sort by z-score
  @tmp = @zar;
  for ($m=0; $m!=($#zar+1); $m++) {
    $big = -1000; $pt = -1;
    for ($n=0; $n!=($#zar+1); $n++) {
      next if ($tmp[$n] =~ /^null/);
      @tmp2 = split/\t/, $tmp[$n];
      if ($tmp2[1] > $big) { $pt = $n; $big = $tmp2[1]; }
    }
    last if ($pt == -1);
    @tmp2 = split/\t/, $tmp[$pt];
#is there CSV information?
    $start = 0; $csv = "NA";
    foreach $dline (@alld2) {
      if ($start == 1) {
        last if ($dline =~ /\!\^\(NEW/);
        next unless ($dline =~ /$tmp2[0]/);
        @tmp3 = split/\t/, $dline;
        $csv = $tmp3[3];
      }
      else { if ($dline =~ /\!\^\(NEW\_FILE\)\: $chain.csvscores.win/) { $start = 1; } }
    }
#is there dali/blast comparison information?
    $start = 0; $nga = "X"; $ngb = "X"; $ngc = "X";
    foreach $dline (@alld2) {
      if ($start == 1) {
        last if ($dline =~ /\!\^\(NEW/);
        next unless ($dline =~ /$tmp2[0]/);
        @tmp3 = split/\t/, $dline;
        if ($tmp3[6] =~ /simple/) { $nga = $tmp3[4]; }
        if ($tmp3[6] =~ /rps/) { $ngb = $tmp3[4]; }
        if ($tmp3[6] =~ /SCOPd/) { $ngc = $tmp3[4]; }
      }
      else { if ($dline =~ /\!\^\(NEW\_FILE\)\: all_dali_blast_$chain/) { $start = 1; } }
    }
    if ($start == 0) { $nacc1 = "NA"; }
    else { $nacc1 = "$nga, $ngb, $ngc"; }
    $retmess23 .= "\nZ-score = $tmp2[1]\tBLOSUM score = $tmp2[2]\tCSV score = $csv\tDALI/BLAST nacc1 = $nacc1\n";
    $grepscoptab = `grep $tmp2[0] $scoptab`; chomp $grepscoptab;
    @greparst = split/\t/, $grepscoptab;
    $retmess23 .= "$tmp2[0] belongs to CLASS $greparst[5], FOLD $greparst[7], SUPERFAMILY $greparst[9]\n";
#get the pairwise alignment produced by DaliLite
    for ($i=0; $i!=($#alld2+1); $i++) {
      if ($alld2[$i] =~ /\!\^\(NEW\_FILE\)\: $tmp2[3]/) { $spt = $i; last; }
    }
    chomp $alld2[$spt+1]; chomp $alld2[$spt+2];
    @mm1 = split/\t/, $alld2[$spt+1];
    @mm2 = split/\t/, $alld2[$spt+2];
    $stop = 0;
    for ($j=0; $j!=999999; $j++) {
      if ($stop == 1) { $retmess23 .= "\n\n"; last; }
      $h = sprintf ("%-12s ", $chain); $retmess23 .= "\n$h";
      for ($k=0; $k!=100; $k++) {
        $aa1 = substr($mm1[$#mm1], ($j*100+$k), 1);
        if ($aa1 eq "") { $stop = 1; last; }
        $retmess23 .= "$aa1";
      }
      $h = sprintf ("%-12s ", $tmp2[0]); $retmess23 .= "\n$h";
      for ($k=0; $k!=100; $k++) {
        $aa2 = substr($mm2[$#mm2], ($j*100+$k), 1);
        last if ($aa2 eq "");
        $retmess23 .= "$aa2";
      }
      $retmess23 .= "\n";
    }
    $tmp[$pt] = "null\tX\tX\tX";
  }
  close(D2); @alld2 = ();
  @tmp = (); @zar = ();
  return $retmess23;
}
### END OF SUB_dali_addon


##########
#
# subroutine use: SUB_mk_mpaconf ( $dom1, $dom2, $CSV_WINDOW_SIZE )

sub SUB_mk_mpaconf {
  $dom1 = @_[0];
  $dom2 = @_[1];
  $window = @_[2];
  open(OUTMC, ">$dom1.$dom2.mpaconfpre");
  $allm = `cat $dom1.mammoth`;
  @mm = split/\n/, $allm;
  $startm = 0; $seq1 = ""; $seq2 = ""; $astseq = ""; $skipnext = 0;
  for ($s=0; $s!=($#mm+1); $s++) {
    if ($startm == 1) {
      if ($skipnext == 1) { $skipnext = 0; next; }
      last if ($mm[$s] =~ /^  Timings/ | $mm[$s] =~ /^ Predicted path\:/);
      if ($mm[$s] =~ /^Prediction /) {
        $skipnext = 1;
        chomp @mm[$s];
        chomp @mm[$s+4];
        chomp @mm[$s+5];
        @mar1 = split/ /, @mm[$s];
        @mar2 = split/ /, @mm[$s+4];
        for ($t=1; $t!=($#mar1+1); $t++) { $seq1 .= $mar1[$t]; $seq2 .= $mar2[$t]; }
        for ($t=1; $t!=6; $t++) { $s2 = substr($mm[$s+5], ($t*11), 10); $astseq .= $s2; }
      }    
    }
    else { if ($mm[$s] =~ /Filename\:/ && $mm[$s] =~ /$dom2/) { $startm = 1; } }
  }  
  printf OUTMC "%-12s $seq1\n%-12s $seq2\n%-12s $astseq\n", $dom1, $dom2, "asterisk";
  close(OUTMC);
  $allm = ""; @mm = ();

  open(CSV2, "$path_csv/$dom2.win$window.csv");
  @csv2 = <CSV2>;
  $csvhseq = "";
  foreach $csvline (@csv2) {
    last if ($csvline =~ /gap fraction/);
    $aa = substr($csvline, 6, 1);
    if ($aa eq "-") { $csvhseq .= "X"; }
    else { $csvhseq .= $aa; }
  }
  close(CSV2);
  open(F1, ">rcsv.$dom1.$dom2.fa"); print F1 ">rcsv\n$csvhseq\n"; close(F1);
  open(F2, ">rmam.$dom1.$dom2.fa"); $mamhseq = $seq2; $mamhseq =~ s/\.//g; print F2 ">rmam\n$mamhseq\n"; close(F2);
  if ($mamhseq eq "") { `rm rcsv.$dom1.$dom2.fa rmam.$dom1.$dom2.fa`; return; }
  $align0hit = `$path_bin/align0 rcsv.$dom1.$dom2.fa rmam.$dom1.$dom2.fa`;
  @alnh = split/\n/, $align0hit;
  $s3 = ""; $s4 = "";
  for ($i=0; $i!=($#alnh+1); $i++) {
    if ($alnh[$i] =~ /rcsv\./) {
      @alnh_2a = split/ /, $alnh[$i];
      $s3 .= $alnh_2a[1];
      @alnh_2b = split/ /, $alnh[$i+2];
      $s4 .= $alnh_2b[$#alnh_2b];
    }
  }
  @errs = (); $mct = -1; $last_s2 = 99999;
  for ($i=0; $i!=99999; $i++) {
    $aa1 = substr($s3, $i, 1);
    $aa2 = substr($s4, $i, 1);
    last if ($aa1 eq "");
    if ($aa2 ne "-") { $mct++; }
    if ($aa2 eq "-") { push @errs, "add\t$mct\t$aa1"; $last_s2 = $mct; next; }
    if ($aa1 eq "-") { push @errs, "delete\t$mct\tX"; $last_s2 = $mct; next; }
  }
  `rm rcsv.$dom1.$dom2.fa rmam.$dom1.$dom2.fa`;
  if ($last_s2 == 99999) { `cp $dom1.$dom2.mpaconfpre $dom1.$dom2.mpaconf`; return; }
  $mct = -1; $seq1u = ""; $seq2u = ""; $astsequ = "";
  for ($i=0; $i!=99999; $i++) {
    $a = substr($seq1, $i, 1);
    $b = substr($seq2, $i, 1);
    $c = substr($astseq, $i, 1);
    last if ($a eq "");
    if ($b ne "\.") { $mct++; }
    if ($last_s2 >= $mct) {
      for ($j=0; $j!=($#errs+1); $j++) {
        @era = split/\t/, $errs[$j];
        next unless ($era[1] == $mct);
        if ($era[0] eq "add") { $a .= "."; $b .= "$era[2]"; $c .= " "; $errs[$j] = "null\t-1\tnull"; }
        if ($era[0] eq "delete") {
          if ($a ne "\.") { $b = "."; $c = " "; }
          else { $a = ""; $b = ""; $c = ""; }
          $errs[$j] = "null\t-1\tnull";
        }
      }
    }
    $seq1u .= "$a"; $seq2u .= "$b"; $astsequ .= "$c";
  }
  $chkseq = $seq2u; $chkseq =~ s/\.//g;
  if ($chkseq eq $csvhseq) {
    open(OUTMCU, ">$dom1.$dom2.mpaconf");
    printf OUTMCU "%-12s $seq1u\n%-12s $seq2u\n%-12s $astsequ\n", $dom1, $dom2, "asterisk";
    close(OUTMCU);
  }
}
### END OF SUB_mk_mpaconf


##########
#
# subroutine use: SUB_calc_csv_score_mam ( $dom1, $dom2, $window, $topX )
# script: calc_csv_score_adj_mam_ast.pl

sub SUB_calc_csv_score_mam {
  $dom1 = @_[0];
  $dom2 = @_[1];
  $window = @_[2];
  $topX = @_[3];
  if (-e "new_domain_csv/$dom1.win$window.csv" == 0 | -e "$path_csv/$dom2.win$window.csv" == 0) { return "error"; }
#find positions in each sequence that are aligned (in *.mpaconf); use only those positions marked with an asterisk
  open(MPAIN, "$dom1.$dom2.mpaconf");
  @all_mpa = <MPAIN>;
  $uposmaln[0][0] = "null";
  $uposct = -1;
  $seq1 = substr($all_mpa[0], 13, 99999999); chomp $seq1;
  $seq2 = substr($all_mpa[1], 13, 99999999); chomp $seq2;
  $astseq = substr($all_mpa[2], 13, 99999999); chomp $astseq;
  $sct1 = 0;
  $sct2 = 0;
  for ($s=0; $s!=99999999; $s++) {
    last if (substr($seq1, $s, 1) eq "");
    $skip = 0;
    if (substr($seq1, $s, 1) ne ".") { $sct1 += 1; }
    else { $skip = 1; }
    if (substr($seq2, $s, 1) ne ".") { $sct2 += 1; }
    else { $skip = 1; }
    next unless ($skip == 0 && substr($astseq, $s, 1) eq "*");
    $uposct += 1;
    $uposmaln[0][$uposct] = $sct1;
    $uposmaln[1][$uposct] = $sct2;
  }
  close(MPAIN); @all_mpa = ();
  if ($uposct == -1) { return "error"; }
#get position scores for positions aligned by DaliLite
#change highly-gapped position scores to -100001 so that these positions will be at end 
#after sorting (will change these position scores to 0 after sorting is completed)
  open(CSV1A, "new_domain_csv/$dom1.win$window.csv");
  @all_csv1a = <CSV1A>;
  open(CSV1B, "$path_csv/$dom2.win$window.csv");
  @all_csv1b = <CSV1B>;
  $uposcsv[0][0] = "-1";
  for ($m=0; $m!=($uposct+1); $m++) {
    for ($n=0; $n!=($#all_csv1a+1); $n++) {
      next unless ($all_csv1a[$n] =~ /^$uposmaln[0][$m] /);
      $scorestr = substr($all_csv1a[$n], 13, 8);
      if ($scoresctr eq "-1.000 *") { $uposcsv[0][$m] = -100001; }
      else { $uposcsv[0][$m] = substr($all_csv1a[$n], 13, 6); }
      last;
    }
    for ($n=0; $n!=($#all_csv1b+1); $n++) {
      next unless ($all_csv1b[$n] =~ /^$uposmaln[1][$m] /);
      $scorestr = substr($all_csv1b[$n], 13, 8);
      if ($scoresctr eq "-1.000 *") { $uposcsv[1][$m]= -100001; }
      else { $uposcsv[1][$m] = substr($all_csv1b[$n], 13, 6); }
      last;
    }
  }
  close(CSV1A); @all_csv1 = ();
  close(CSV1B); @all_csv2 = ();
#sort positions in each sequence by scores
  for ($m=0; $m!=($uposct+1); $m++) { @temp1[$m] = $uposcsv[0][$m]; @temp2[$m] = $uposcsv[1][$m]; }
  $sortuposcsv[0][0] = "null";
  $sortuposmaln[0][0] = "null";
  $sortuposblock[0][0] = "null";
  for ($m=0; $m!=($uposct+1); $m++) {
    $big = -1000;
    for ($n=0; $n!=($uposct+1); $n++) {
      next if ($temp1[$n] eq "null");
      if ($temp1[$n] > $big) { $big = $temp1[$n]; $pt = $n; }
    }
    $sortuposcsv[0][$m] = $uposcsv[0][$pt];
    $sortuposmaln[0][$m] = $uposmaln[0][$pt];
    $sortuposblock[0][$m] = $pt;
    $temp1[$pt] = "null";
  }
  for ($m=0; $m!=($uposct+1); $m++) {
    $big = -1000;
    for ($n=0; $n!=($uposct+1); $n++) {
      next if ($temp2[$n] eq "null");
      if ($temp2[$n] > $big) { $big = $temp2[$n]; $pt = $n; }
    }
    $sortuposcsv[1][$m] = $uposcsv[1][$pt];
    $sortuposmaln[1][$m] = $uposmaln[1][$pt];
    $sortuposblock[1][$m] = $pt;
    $temp2[$pt] = "null";
  }
#change position scores of highly-gapped regions to 0
  for ($m=0; $m!=($uposct+1); $m++) {
    if ($sortuposcsv[0][$m] eq "-100001") { $sortuposcsv[0][$m] = 0; }
    if ($sortuposcsv[1][$m] eq "-100001") { $sortuposcsv[1][$m] = 0; }
  }
#get score-to-index conversion table and matrix
  open(CONH, "$path_bin/conservation.h");
  @all_conh = <CONH>;
  if ($window == 1) { $ins1 = "csvbound"; $ins2 = "csvmatrix0_15";}
  if ($window == 3) { $ins1 = "csv3bound"; $ins2 = "csv3matrix0_15";}
  $start = 0;
  $highcut[0] = "null";
  $hct = -1;
  foreach $line7a (@all_conh) {
    if ($start == 1) {
      $hct += 1;
      if ($hct == 20) { $highcut[$hct] = substr($line7a, 0, 10); last; }
      @ar1a = split/\,/, $line7a;
      $highcut[$hct] = $ar1a[0];
    }
    else { if ($line7a =~ /^double $ins1\[\] \= \{/) { $start = 1; next; } }
  }
  $start = 0;
  $matrix[0][0] = 0;
  $hct = -1;
  foreach $line7b (@all_conh) {
    if ($start == 1) {
      $hct += 1;
      @ar1a = split/\{/, $line7b;
      @ar1b = split/\}/, $ar1a[1];
      @ar1c = split/\,/, $ar1b[0];
      for ($m=0; $m!=21; $m++) { $matrix[$hct][$m] = $ar1c[$m]; }
      last if ($hct == 20);
    }
    else { if ($line7b =~ /^double $ins2\[21\]\[21\] \= \{/) { $start = 1; next; } }
  }
  close(CONH); @all_conh = ();
#convert position scores to index scores
  $uposind[0][0] = "null";
  for ($m=0; $m!=($uposct+1); $m++) {
    for ($n=0; $n!=21; $n++) { if ($uposcsv[0][$m] < $highcut[$n]) { $uposind[0][$m] = $n; last; } }
    for ($n=0; $n!=21; $n++) { if ($uposcsv[1][$m] < $highcut[$n]) { $uposind[1][$m] = $n; last; } }
  }
#find which positions are in the $topX of either sequence
  $topcteach = sprintf("%.0f", $topX*($uposct+1)/100);
  @toplist[0] = "null";
  $topct = -1;
  LP_6: for ($m=0; $m!=($uposct+1); $m++) {
    for ($n=0; $n!=$topcteach; $n++) { if ($m == $sortuposblock[0][$n]) { $topct += 1; $toplist[$topct] = $m; next LP_6; } }
    for ($n=0; $n!=$topcteach; $n++) { if ($m == $sortuposblock[1][$n]) { $topct += 1; $toplist[$topct] = $m; next LP_6; } }
  }
  $topctboth = $topct + 1;
#output list of chosen positions for compass
  open(SET1, ">$dom1.vs.$dom2.chosenposlist1");
  open(SET2, ">$dom1.vs.$dom2.chosenposlist2");
  for ($m=0; $m!=($topctboth); $m++) {
    $pt = $toplist[$m];
    print SET1 "$uposmaln[0][$pt] ";
    print SET2 "$uposmaln[1][$pt] ";
  }
  close(SET1);
  close(SET2);
#find Sn: sum of pairscores for chosen positions (seq1-vs-seq2)
#find S1: sum of pairscores for chosen positions (seq1-vs-seq1)
#find S2: sum of pairscores for chosen positions (seq2-vs-seq2)
  $Sn = 0;
  $S1 = 0;
  $S2 = 0;
  for ($m=0; $m!=$topctboth; $m++) {
    for ($n=0; $n!=($uposct+1); $n++) {
      next unless ($toplist[$m] == $sortuposblock[0][$n]);
      $pt = $toplist[$m];
      $ind1 = $uposind[0][$pt];
      last;
    }
    for ($n=0; $n!=($uposct+1); $n++) {
      next unless ($toplist[$m] == $sortuposblock[1][$n]);
      $pt = $toplist[$m];
      $ind2 = $uposind[1][$pt];
      last;
    }
    $Sn += $matrix[$ind1][$ind2];
    $S1 += $matrix[$ind1][$ind1];
    $S2 += $matrix[$ind2][$ind2];
  }
  $Sself = ($S1+$S2)/2;
#find Srand: sum of all-against-all position pairscores (normalized over length)
  $Srandtot = 0;
  for ($m=0; $m!=$topctboth; $m++) {
    for ($n=0; $n!=($uposct+1); $n++) {
      next unless ($toplist[$m] == $sortuposblock[0][$n]);
      $pt = $toplist[$m];
      $ind1 = $uposind[0][$pt];
      last;
    }
    for ($q=0; $q!=$topctboth; $q++) {
      for ($r=0; $r!=($uposct+1); $r++) {
        next unless ($toplist[$q] == $sortuposblock[1][$r]);
        $pt2 = $toplist[$q];
        $ind2 = $uposind[1][$pt2];
        last;
      }
      $Srandtot += $matrix[$ind1][$ind2];
    }
  }
  $Srand = $Srandtot/$topctboth;
#calculate csv score for pair (Sn-Srand)/(Sself-Srand)
  $S = ($Sn-$Srand)/($Sself-$Srand);
#get matrix of compass scores for chosen position pairs
  $domain1aln = "new_domain_aln/$dom1.br.aln";
  $domain2aln = "$path_aln/$d2aln";
  $domain1pos = "$dom1.vs.$dom2.chosenposlist1";
  $domain2pos = "$dom1.vs.$dom2.chosenposlist2";
  system "$path_bin/scoremat_chosenpos -i $domain1aln -j $domain2aln -p1 $domain1pos -p2 $domain2pos -g 1.0 -o $dom1.$dom2.compassmatrix";
  system "$path_bin/scoremat_chosenpos -i $domain1aln -j $domain1aln -p1 $domain1pos -p2 $domain1pos -g 1.0 -o $dom1.vs.$dom2.compassmatrix_self1"; 
  system "$path_bin/scoremat_chosenpos -i $domain2aln -j $domain2aln -p1 $domain2pos -p2 $domain2pos -g 1.0 -o $dom1.vs.$dom2.compassmatrix_self2";
  $ckp = 1;
  if (-e "$dom1.$dom2.compassmatrix" == 0) { $ckp = 0; }
  if (-e "$dom1.vs.$dom2.compassmatrix_self1" == 0) { $ckp = 0; }
  if (-e "$dom1.vs.$dom2.compassmatrix_self2" == 0) { $ckp = 0; }
  if ($ckp == 0) { $CS = "error"; }
  else {
    open(CIN1, "$dom1.$dom2.compassmatrix");
    @rows = <CIN1>;
    for ($m=0; $m!=($#rows+1); $m++) {
      chomp $rows[$m];
      @colar1 = split/ /, $rows[$m];
      for ($n=0; $n!=($#colar1+1); $n++) { $compassmatrix[$m][$n] = $colar1[$n]; }
    }
    open(CIN2, "$dom1.vs.$dom2.compassmatrix_self1");
    @rows1 = <CIN2>;
    for ($m=0; $m!=($#rows1+1); $m++) {
      chomp $rows1[$m];
      @colar2 = split/ /, $rows1[$m];
      for ($n=0; $n!=($#colar2+1); $n++) { $compass_self1[$m][$n] = $colar2[$n]; }
    }
    open(CIN3, "$dom1.vs.$dom2.compassmatrix_self2");
    @rows2 = <CIN3>;
    for ($m=0; $m!=($#rows2+1); $m++) {
      chomp $rows2[$m];
      @colar3 = split/ /, $rows2[$m];
      for ($n=0; $n!=($#colar3+1); $n++) { $compass_self2[$m][$n] = $colar3[$n]; }
    }
#find CSn: sum of compass pairscores for chosen positions (seq1-vs-seq2) [ diagonal of *.compassmatrix ]
#find CS1: sum of compass pairscores for chosen positions (seq1-vs-seq1) [ diagonal of dom1 self matrix ]
#find CS2: sum of compass pairscores for chosen positions (seq2-vs-seq2) [ diagonal of dom2 self matrix ]
    $CSn = 0;
    $CS1 = 0;
    $CS2 = 0;
    for ($m=0; $m!=$topctboth; $m++) {
      $CSn += $compassmatrix[$m][$m];
      $CS1 += $compass_self1[$m][$m];
      $CS2 += $compass_self2[$m][$m];
    }
    $CSself = ($CS1+$CS2)/2;
#find CSrand: sum of all-against-all compass position pairscores (normalized over length)
    $CSrandtot = 0;
    for ($m=0; $m!=$topctboth; $m++) { for ($n=0; $n!=$topctboth; $n++) {  $CSrandtot += $compassmatrix[$m][$n]; } }
    $CSrand = $CSrandtot/$topctboth;
#calculate compass score for pair (CSn-CSrand)/(CSself-CSrand)
    $CS = ($CSn-$CSrand)/($CSself-$CSrand);
    `rm $dom1.$dom2.compassmatrix $dom1.vs.$dom2.compassmatrix_self1 $dom1.vs.$dom2.compassmatrix_self2`;
    `rm $dom1.vs.$dom2.chosenposlist1 $dom1.vs.$dom2.chosenposlist2`;
  }
  $upostot_fromblocks = $uposct+1;
  $percentdiff = sprintf("%.3f", ($topctboth-$topcteach)/$upostot_fromblocks*100);
  close(CIN1); close(CIN2); close(CIN3); @rows = (); @rows1 = (); @rows2 = ();
  @posmaln = (); @uposmaln = (); @sortuposcsv = ();
  @sortuposmaln = (); @sortuposblock = (); @matrix = (); @toplist = (); @uposind = ();
  @compassmatrix = (); @compass_self1 = (); @compass_self2 = ();
  return "$S\t$CS\t$percentdiff\t$topcteach\t$topctboth\t$upostot_fromblocks";
}
### END OF SUB_calc_csv_score_mam


##########
#
# subroutine use: SUB_get_variables_from_setupfile ( "$setupfile" )

sub SUB_get_variables_from_setupfile {
  $setupfile = @_[0];
  @undefined = ();
#required library paths/variables
  $userid = ""; $path_bin = ""; $liblist = ""; $liblist_tab = ""; $compass_db = ""; $path_str = ""; $path_aln = ""; 
  $path_csv = ""; $path_dalidat = ""; $convert_pairs = ""; $lib_scop_dircla = ""; $lib_scop_dircla_ss = ""; 
  $scoptab = ""; $SIMPLE_BLAST_DB = ""; $RPS_DB = ""; $SCOP_DOMSEQ_DB = "";
  open(SETUPIN, "$setupfile");
  @setin = <SETUPIN>;
  @su1 = (); @su2 = ();
  foreach $line (@setin) {
    next unless ($line =~ /^\$/ && $line =~ /\=/);
    chomp $line;
    @s1 = split/\=/, $line;
    @s2 = split/\$/, $s1[0];
    $s2[1] =~ s/ //g;
    $s1[1] =~ s/ //g; $s1 =~ s/\"//g; $s1 =~ s/\;//g;
    push @su1, "$s2[1]";
    push @su2, "$s1[1]";
  }
  close(SETUPIN); @setin = (); @s1 = (); @s2 = ();

  for ($i=0; $i!=($#su1+1); $i++) {
    next unless ($su1[$i] eq "userid");
    $userid = $su2[$i]; last;
  }
  if ($userid eq "") { push @undefined, "userid"; }

  for ($i=0; $i!=($#su1+1); $i++) {
    next unless ($su1[$i] eq "path_bin");
    $path_bin = $su2[$i]; last;
  }
  if ($path_bin eq "") { push @undefined, "path_bin"; }

  for ($i=0; $i!=($#su1+1); $i++) {
    next unless ($su1[$i] eq "liblist");
    $liblist = $su2[$i]; last;
  }
  if ($liblist eq "") { push @undefined, "liblist"; }

  for ($i=0; $i!=($#su1+1); $i++) {
    next unless ($su1[$i] eq "liblist_tab");
    $liblist_tab = $su2[$i]; last;
  }
  if ($liblist_tab eq "") { push @undefined, "liblist_tab"; }

  for ($i=0; $i!=($#su1+1); $i++) {
    next unless ($su1[$i] eq "compass_db");
    $compass_db = $su2[$i]; last;
  }
  if ($compass_db eq "") { push @undefined, "compass_db"; }

  for ($i=0; $i!=($#su1+1); $i++) {
    next unless ($su1[$i] eq "path_str");
    $path_str = $su2[$i]; last;
  }
  if ($path_str eq "") { push @undefined, "path_str"; }

  for ($i=0; $i!=($#su1+1); $i++) {
    next unless ($su1[$i] eq "path_aln");
    $path_aln = $su2[$i]; last;
  }
  if ($path_aln eq "") { push @undefined, "path_aln"; }

  for ($i=0; $i!=($#su1+1); $i++) {
    next unless ($su1[$i] eq "path_csv");
    $path_csv = $su2[$i]; last;
  }
  if ($path_csv eq "") { push @undefined, "path_csv"; }

  for ($i=0; $i!=($#su1+1); $i++) {
    next unless ($su1[$i] eq "path_dalidat");
    $path_dalidat = $su2[$i]; last; 
  }
  if ($path_dalidat eq "") { push @undefined, "path_dalidat"; }

  for ($i=0; $i!=($#su1+1); $i++) {
    next unless ($su1[$i] eq "convert_pairs");
    $convert_pairs = $su2[$i]; last;
  }
  if ($convert_pairs eq "") { push @undefined, "convert_pairs"; }

  for ($i=0; $i!=($#su1+1); $i++) {
    next unless ($su1[$i] eq "lib_scop_dircla");
    $lib_scop_dircla = $su2[$i]; last;
  }
  if ($lib_scop_dircla eq "") { push @undefined, "lib_scop_dircla"; }

  for ($i=0; $i!=($#su1+1); $i++) {
    next unless ($su1[$i] eq "lib_scop_dircla_ss");
    $lib_scop_dircla_ss = $su2[$i]; last;
  }
  if ($lib_scop_dircla_ss eq "") { push @undefined, "lib_scop_dircla_ss"; }

  for ($i=0; $i!=($#su1+1); $i++) {
    next unless ($su1[$i] eq "scoptab");
    $scoptab = $su2[$i]; last;
  }
  if ($scoptab eq "") { push @undefined, "scoptab"; }

  for ($i=0; $i!=($#su1+1); $i++) {
    next unless ($su1[$i] eq "SIMPLE_BLAST_DB");
    $SIMPLE_BLAST_DB = $su2[$i]; last;
  }
  if ($SIMPLE_BLAST_DB eq "") { push @undefined, "SIMPLE_BLAST_DB"; }

  for ($i=0; $i!=($#su1+1); $i++) {
    next unless ($su1[$i] eq "RPS_DB");
    $RPS_DB = $su2[$i]; last;
  }
  if ($RPS_DB eq "") { push @undefined, "RPS_DB"; }

  for ($i=0; $i!=($#su1+1); $i++) {
    next unless ($su1[$i] eq "SCOP_DOMSEQ_DB");
    $SCOP_DOMSEQ_DB = $su2[$i]; last;
  }
  if ($SCOP_DOMSEQ_DB eq "") { push @undefined, "SCOP_DOMSEQ_DB"; }
  @su1 = (); @su2 = ();
  return @undefined;
}
### END OF SUB_get_variables_from_setupfile


##########
#
# subroutine use: SUB_get_domain_seq ( $pdbch, $resrange );

sub SUB_get_domain_seq {
  $pdbch = @_[0];
  $rr = @_[1];
  open(CAIN, "new_domain_str/$pdbch.ca");
  @all_cain = <CAIN>;
  $dseq = "";
  @rra1 = split/\,/, $rr;
  foreach $p (@rra1) {
    @ar4a = split/\./, $p;
    @ar4b = split/\:/, $ar4a[0]; $n_res = $ar4b[1];
    @ar4c = split/\:/, $ar4a[1]; $c_res = $ar4c[1];
    if (substr($pdbch, 5, 1) eq "_") { $ch = " "; }
    else { $ch = substr($pdbch, 5, 1); }
    $start = 0;
    foreach $tfline (@all_cain) {
      next unless (substr($tfline, 13, 2) eq "CA" && substr($tfline, 21, 1) eq $ch);
      next unless ($tfline =~ /^ATOM/ | $tfline =~ /^HETATM/);
      $resnum = substr($tfline, 22, 6); $resnum =~ s/ //g;
      if ($resnum eq $n_res) { $start = 1; }
      if ($start == 1) {
        $letter3 = substr($tfline, 17, 3);
        $letter1 = SUB_convert_3letter_1letter ( $letter3 );
        $dseq .= "$letter1";
      }
      last if ($resnum eq $c_res);
    }
  }
  close(CAIN);
  return "$dseq";
}
### END OF SUB_get_domain_seq


##########
#
# subroutine use: SUB_check_setup

sub SUB_check_setup {

#check setup on system...
# 1) are user-defined paths to library components correct?
# 2) do library components have nonzero size?
# 3) are required programs in the correct place and are they executable?

  print "Checking setup...\n";
  $currdir = `pwd`;
  chomp $currdir;
  @errorlist = ();
  @fileanddblist = ();
  @scrlist = ();
  if (-d "$path_bin" == 0) { push @errorlist, "Cannot find $path_bin.\n"; }
  if (-d "$path_str" == 0) { push @errorlist, "Cannot find $path_str.\n"; }
  else {
    chdir "$path_str";
    $w1 = `ls -1 *.ca | wc -l`; chomp $w1; if ($w1 == 0) { push @errorlist, "There are no ~.ca files in $path_str.\n"; }
    $w1 = `ls -1 *.ent | wc -l`; chomp $w1;
    if ($w1 == 0) {
      $w2 = `ls -1 *pdb | wc -l`; chomp $w2;
      if ($w2 == 0) { push @errorlist, "There are no ~.ent/~.pdb files in $path_str.\n"; }
    }
    chdir "$currdir";
  }
  if (-d "$path_aln" == 0) { push @errorlist, "Cannot find $path_aln.\n"; } else {
    chdir "$path_aln";
    $w1 = `ls -1 *.aln | wc -l`; chomp $w1; if ($w1 == 0) { push @errorlist, "There are no ~.aln files in $path_aln.\n"; }
    chdir "$currdir";
  }
  if (-d "$path_csv" == 0) { push @errorlist, "Cannot find $path_csv.\n"; } else {
    chdir "$path_csv";
    $w1 = `ls -1 *.csv | wc -l`; chomp $w1; if ($w1 == 0) { push @errorlist, "There are no ~.csv files in $path_csv.\n"; }
    chdir "$currdir";
  }
  if (-d "$path_dalidat" == 0) { push @errorlist, "Cannot find $path_dalidat.\n"; }
  else {
    chdir "$path_dalidat";
    $w1 = `ls -1 *dssp | wc -l`; chomp $w1; if ($w1 == 0) { push @errorlist, "There are no ~.dssp files in $path_dalidat.\n"; }
    $w1 = `ls -1 *dat | wc -l`; chomp $w1; if ($w1 == 0) { push @errorlist, "There are no ~.dat files in $path_dalidat.\n"; }
    chdir "$currdir";
  }
  push @fileanddblist, "$compass_db", "$SIMPLE_BLAST_DB", "$RPS_DB", "$SCOP_DOMSEQ_DB", "$liblist", "$liblist_tab", "$convert_pairs", "$lib_scop_dircla", "$lib_scop_dircla_ss", "$scoptab";
  foreach $fdb (@fileanddblist) {
    if (-e "$fdb" == 0) { push @errorlist, "$fdb does not exist.\n"; }
    elsif (-z "$fdb" == 1) { push @errorlist, "$fdb has size zero.\n"; }
  }
  $w1 = `which blastclust`; chomp $w1; if ($w1 eq "") { push @errorlist, "Cannot find path to blastclust.\n"; }
  $w2 = `which blastpgp`; chomp $w2; if ($w2 eq "") { push @errorlist, "Cannot find path to blastpgp.\n"; }
  $w3 = `which rpsblast`; chomp $w3; if ($w3 eq "") { push @errorlist, "Cannot find path to rpsblast.\n"; }
  push @scrlist, "DaliLite", "mammoth", "align0", "al2co", "blsm_scores", "compscores_givenali", "compass_vs_db", "compgivenseqs_naccs", "compscores_givenali", "prep_psiblastali", "scoremat_chosenpos";
  foreach $scr (@scrlist) {
    if (-e "$path_bin/$scr" == 0) { push @errorlist, "$path_bin/$scr does not exist.\n"; }
    elsif (-z "$path_bin/$scr" == 1) { push @errorlist, "$path_bin/$scr has size zero.\n"; }
    elsif (-x "$path_bin/$scr" == 0) { push @errorlist, "$path_bin/$scr is not executable.\n"; }
  }
  if (-e "$path_bin/pdbcp.pl" == 0) { push @errorlist, "$path_bin/pdbcp.pl does not exist.\n"; }
  elsif (-z "$path_bin/pdbcp.pl" == 1) { push @errorlist, "$path_bin/pdbcp.pl has size zero.\n"; }
  elsif (-x "$path_bin/pdbcp.pl" == 0) { push @errorlist, "$path_bin/pdbcp.pl is not executable.\n"; }
  else {
    `$path_bin/pdbcp.pl 1h73`;
    if (-e "1h73.pdb" == 1 && -z "1h73.pdb" != 1) { `rm 1h73.pdb`; }
    else { push @errorlist, "$path_bin/pdbcp.pl cannot retrieve test structure PDB|1h73.\n"; }
  }
  if ($#errorlist != -1) {
    print "\n";
    foreach $er (@errorlist) { print "$er"; }
    die "\nProgram exiting due to setup errors.  Please address the preceding comments.  See the SCOPMAP_SETUP_TEXT file for help.\n\n";
  }
  @scrlist = (); @errorlist = ();
  print "Check finished.  Starting SCOPmap...\n";
  return;
}
### END OF SUB_check_setup


##########
#
# subroutine use: SUB_find_restart ()

sub SUB_find_restart {
  print "\nEvaluating current directory to determine where re-start should begin...\n";
  if (-e "$date.log" == 0 | -z "$date.log" == 1) {  print "$date.log file could not be found.  This query set must be competely re-run.  Exiting program...\n"; exit; }
  open(DATELOG, "$date.log");
  @loglines = <DATELOG>;
  $rspt = -1;
  if (-e "pdb_$date/blstclst.fa.bc" == 1) { $rspt = 1; }
  foreach $line (@loglines) {
    if ($line =~ /STATUS: finished running BLAST programs/ && $rspt < 2) { $rspt = 2; }
    if ($line =~ /STATUS: finished making query alignments from PSI-BLAST results/ && $rspt < 3) { $rspt = 3; }
    if ($line =~ /STATUS: finished running compass/ && $rspt < 4) { $rspt = 4; }
    if ($line =~ /STATUS: finished making ~.compass2 files and protein names found/ && $rspt < 5) { $rspt = 5; }
    if ($line =~ /STATUS: finished running MAMMOTH/ && $rspt < 6) { $rspt = 6; }
    if ($line =~ /STATUS: finished making all ~.mammoth2 files/ && $rspt < 7) { $rspt = 7; }
    if ($line =~ /STATUS: finished running all comparison tools, ready to start assignments/ && $rspt < 8) { $rspt = 8; }
    if ($line =~ /STATUS: finished with assignments, ready to start cleanup/ && $rspt < 9) { $rspt = 9; }
  }
  close(DATELOG); @loglines = ();

  if ($rspt == -1) { print "$date.log does not contain the information necessary for determining where re-start should begin.  This query set must be completely re-run.  Exiting program...\n"; exit; }
  open(BC, "pdb_$date/blstclst.fa.bc");
  @allbc = <BC>;
  $start = 0;
  foreach $line (@allbc) {
    if ($start == 1) {
      last if ($line =~ /\!\^\(NEW/);
      $this = substr($line, 0, 6);
      push @bc_rep, "$this";
    }
    else { if ($line =~ /\!\^\(NEW\_FILE\): $date.newpdb.list.bc/ && $line !~ /\.bc\.pre/) { $start = 1; } }
  }
  close(BC);
  if ($rspt >= 2) {
    $grepall = `grep "Do not consider" $date.log | grep "length = "`;
    @tmp = split/\n/, $grepall;
    for ($i=0; $i!=($#tmp+1); $i++) {
      $ch = substr($tmp[$i], 0, 6);
      @ar1a = split/length \= /, $tmp[$i];
      @ar1b = split/ /, $ar1a[1];
      push @frag_list, "$ch $ar1b[0]";
    }
    foreach $rep (@bc_rep) {
      open(BL2, "pdb_$date/$rep.blast2");
      @bl2 = <BL2>;
      $s1 = 0;
      $s1ct = -1;
      foreach $line (@bl2) {
        if ($s1 == 1) {
          last if ($line =~ /\!\^\(NEW/);
          $s1ct++;
          if ($s1ct == 0) { $step1ct++; }
          $step1[$step1ct][$s1ct] = $line;
        }
        else { if ($line =~ /\!\^\(NEW\_FILE\): $rep.out1/) { $s1 = 1; } }
      }
      if ($s1ct == -1) { push @unmap_rep, "$rep"; }
      $s2 = 0;
      $s2ct = -1;
      foreach $line (@bl2) {
        if ($s2 == 1) {
          last if ($line =~ /\!\^\(NEW/);
          $s2ct++;
          if ($s2ct == 0) { $step2ct++; }
          $step2[$step2ct][$s2ct] = $line;
        }
        else { if ($line =~ /\!\^\(NEW\_FILE\): $rep.out2/) { $s2 = 1; } }
      }
      $s3 = 0;
      $s3ct = -1;
      foreach $line (@bl2) {
        if ($s3 == 1) {
          last if ($line =~ /\!\^\(NEW/);
          $s3ct++;
          if ($s3ct == 0) { $step3ct++; }
          $step3[$step3ct][$s3ct] = $line;
        }
        else { if ($line =~ /\!\^\(NEW\_FILE\): $rep.out3/) { $s3 = 1; } }
      }
      close(BL2);
    }
  }
  if ($rspt >= 3) {
    chdir "new_domain_aln";
    @compass_rep = ();
    @compass_rep_pre = `ls -1 *.aln | cut -c1-6`;
    foreach $rep (@compass_rep_pre) { chomp $rep; push @compass_rep, "$rep"; }
    chdir "..";
  }
  if ($rspt >= 5) {
    foreach $rep (@bc_rep) {
      open(C2, "$rep.compass2");
      @c2 = <C2>;
      $s5 = 0;
      $s5ct = -1;
      foreach $line (@c2) {
        if ($s5 == 1) {
          last if ($line =~ /\!\^\(NEW/);
          $s5ct++;
          if ($s5ct == 0) { $step5ct++; }
          $step5[$step5ct][$s5ct] = $line;
        }
        else { if ($line =~ /\!\^\(NEW\_FILE\): $rep.out5/) { $s5 = 1; } }
      }
      close(C2);
    }
  }
  if ($rspt >= 8) {
    foreach $rep (@bc_rep) {
      open(M2, "$rep.mammoth2");
      @m2 = <M2>;
      $s6m = 0;
      $s6mct = -1;
      foreach $line (@m2) {
        if ($s6m == 1) {
          last if ($line =~ /\!\^\(NEW/);
          $s6mct++;
          if ($s6mct == 0) { $step6mct++; }
          $step6m[$step6mct][$s6mct] = $line;
        }
        else { if ($line =~ /\!\^\(NEW\_FILE\): $rep.out6/) { $s6m = 1; } }
      }
      close(M2);
      open(DR2, "$rep.dali2");
      @dr2 = <DR2>;
      $s6d = 0;
      $s6dct = -1;
      foreach $line (@dr2) {
        if ($s6d == 1) {
          last if ($line =~ /\!\^\(NEW/);
          $s6dct++;
          if ($s6dct == 0) { $step6dct++; }
          $step6d[$step6dct][$s6dct] = $line;
        }
        else { if ($line =~ /\!\^\(NEW\_FILE\): $rep.out6/) { $s6d = 1; } }
      }
      $s7 = 0;
      $s7ct = -1;
      foreach $line (@dr2) {
        if ($s7 == 1) {
          last if ($line =~ /\!\^\(NEW/);
          $s7ct++;
          if ($s7ct == 0) { $step7ct++; }
          $step7[$step7ct][$s7ct] = $line;
        }
        else { if ($line =~ /\!\^\(NEW\_FILE\): $rep.out7/) { $s7 = 1; } }
      }
      $s8 = 0;
      $s8ct = -1;
      foreach $line (@dr2) {
        if ($s8 == 1) {
          last if ($line =~ /\!\^\(NEW/);
          $s8ct++;
          if ($s8ct == 0) { $step8ct++; }
          $step8[$step8ct][$s8ct] = $line;
        } 
        else { if ($line =~ /\!\^\(NEW\_FILE\): $rep.out8/) { $s8 = 1; } }
      }
      $s9 = 0;
      $s9ct = -1;
      foreach $line (@dr2) {
        if ($s9 == 1) {
          last if ($line =~ /\!\^\(NEW/);
          $s9ct++;
          if ($s9ct == 0) { $step9ct++; }
          $step9[$step9ct][$s9ct] = $line;
        }
        else { if ($line =~ /\!\^\(NEW\_FILE\): $rep.out9/) { $s9 = 1; } }
      }
      close(DR2);
    }
  }

  if ($rspt == 1) {
    chdir "pdb_$date";
    $ls1 = `ls -1 *.br`; $quitrep = substr($ls1, 0, 6);
    `rm -f $quitrep*`;
    $g = `grep "$quitrep" seqali.pos-pos | wc -l`; chomp $g;
    if ($g != 0) {
      open(SP1, "seqali.pos-pos"); @sp1 = <SP1>;
      open(SP2, ">seqali.mod");
      foreach $line (@sp1) { last if ($line =~ /$quitrep/); print SP2 "$line"; }
      close(SP1); close(SP2);
      `mv seqali.mod seqali.pos-pos`;
    }
    chdir "..";
    if (-d "TMP_HOLDING_$date" == 0) {
      `mkdir TMP_HOLDING_$date`;
      `mv pdb_$date/new_domain_str TMP_HOLDING_$date`;
      `mv pdb_$date/blstclst.fa.bc TMP_HOLDING_$date`;
      `mv $date.log TMP_HOLDING_$date`;
    }
    else {
      $pt = 0;
      $dlines = `cat $date.log`; @ard1 = split/\n/, $dlines;
      for ($p=0; $p!=($#ard1+1); $p++) { if ($ard1[$p] =~ /Number of sequences whose length is/) { $pt = $j; last; } }
      if ($pt == 0) { $pt = 3; }
      $lc = $#ard1 - $pt;
      `tail -$lc $date.log >> TMP_HOLDING_$date/$date.log`;
    }
    `mv pdb_$date/*.blast2 TMP_HOLDING_$date`;
    `mv pdb_$date/*.blast TMP_HOLDING_$date`;
    `mv pdb_$date/*.chk TMP_HOLDING_$date`;
    `cat pdb_$date/seqali.pos-pos >> TMP_HOLDING_$date/seqali.pos-pos`;
    `rm -r -f pdb_$date`;
    print "gapped BLAST, RPS-BLAST, and PSI-BLAST jobs were not completed.  Continuing from here.\n";
  }

  if ($rspt == 2) { `rm *.adj.aln`; print "Query alignments from PSI-BLAST outputs not made.  Continuing from here.\n"; }

  if ($rspt == 3) { 
    $librepct = `wc -l $path_aln/rps_scop_aln_list`; chomp $librepct;
    $ls1 = `ls -1 *.compass`; @lsar1 = split/\n/, $ls1;
    foreach $rep (@lsar1) {
      $ct2 = `grep Evalue $rep | wc -l`; chomp $ct2;
      next if ($ct2/$librepct > 0.995);
      `rm $rep`;
    }
    print "COMPASS jobs not completed.  Continuing from here.\n";
  }

  if ($rspt == 4) { `rm -f *.compass2`; print "Extracting information from COMPASS results not completed. Continuing from here.\n"; }

  if ($rspt == 5) { `rm -r -f dir_*`; print "MAMMOTH jobs not completed.  Continuing from here.\n"; }

  if ($rspt == 6) { `rm -f *.mammoth2 *.mpatmp`; print "Extracting information from MAMMOTH results not completed. Continuing from here.\n"; }

  if ($rspt == 7) {
    `rm -f *.mpa *.csres *.compreshit`;
    `rm -f *.dali* *.dat *.dssp queryd* pairstorun`;
    `rm -f *.fromcons *.mpaconf *.fa *.struse *.sequse *.chosenposlist1 *.chosenposlist2`;
    `rm -r -f temp_pdb_dir`;
    `rm -r -f dali_output`;
    `rm -r -f tempdir.*`;
    `rm -r -f tmpdalidir`;
    `rm -r -f new_domain_csv`;
    print "Continuing at COMPASS evaluation of MAMMOTH results.\n";
  }

  if ($rspt == 8) {
    $ls1 = `ls -1 *chainlog`; @lsa1 = split/\n/, $ls1;
    foreach $c (@lsa1) {
      $ch = substr($c, 0, 6);
      $g = `grep "$ch.chainlog compl" tmptrack | wc -l`; chomp $g;
      if ($g == 0) { `rm -f $ch.chainlog $ch.*.inslog.tar`; }
    }
    `rm -f *.combined`;
    `rm -r -f output_pieces`;
    `rm -r -f tempdir.1*`;
    `rm -r -f tempdir.*`;
    `rm -r -f temp_pdb_dir`;
    `rm -r -f temp_pdb_dir2`;
    `rm -r -f dali_output`;
    `rm -r -f queryd* pairslist pairstorun *.dssp *.dat *_5N`;
    print "Results compilation not completed.  Continuing from here.\n";
  }

  if ($rspt == 9) { print "Cleanup of working area not completed.  Continuing from here.\n"; }

  return $rspt;
}
### END OF SUB_find_restart


