/*   scoremat_chosenpos.c
* ===========================================================================
*
*                            PUBLIC DOMAIN NOTICE
*             Howard Hughes Medical Institute, and Department of Biochemistry  
*		University of Texas Southwestern Medical Center at Dallas
*
*  This software is freely available to the public for use.We have not placed
*  any restriction on its use or reproduction.
*
*  Although all reasonable efforts have been taken to ensure the accuracy
*  and reliability of the software and data, the authors do not and cannot 
*  warrant the performance or results that may be obtained by using this 
*  software. The authors disclaim all warranties, express or implied,
*  including warranties of performance, merchantability or fitness for 
*  any particular purpose.
*
*  Please cite the authors in any work or product based on this material.
*
* ===========================================================================
*
* File Name:  scoremat_chosenpos.c
*
* Authors:  Ruslan Sadreyev, Nick Grishin

* Version Creation Date:  7/24/2003 
*
* File Description:
*       A program for making all-to-all matrix of scores between specified positions
*       in two submitted alignments    
*       (Modified from compass1_24.c)
*
* --------------------------------------------------------------------------*/


/* Version 1.22 : added input of Db length (as -d ), in order to calculate Evalue for DB searches
   Version 1.24 : 
in cases when lambda is not found, simple score rescaling is used and Evalue is calculated;
in such cases, Evalue is given with a warning;
  
in cases when no positively scoring alignment can be found, 
corresponding message is printed after the header

*/


#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <ctype.h>
#include <string.h>
#include <malloc.h>
#include <stddef.h>

#define NR_END 1
#define FREE_ARG char*
#define SQUARE(a) ((a)*(a))
#define MAXSTR   100001
#define JMAX 40                                         
#define SWAP(a,b) temp=(a);(a)=(b);(b)=temp;
#define M 7
#define NSTACK 50

#define LN2 0.69314718055994528623
#define LAMB_UNG 0.3176
#define BLOSUM_EXPECT -0.5209

static char str[MAXSTR+1];

void nrerror(char error_text[]);
char *cvector(long nl, long nh);
int *ivector(long nl, long nh);
double *dvector(long nl, long nh);
char **cmatrix(long nrl, long nrh, long ncl, long nch);
int **imatrix(long nrl, long nrh, long ncl, long nch);
double **dmatrix(long nrl, long nrh, long ncl, long nch);
char **cmatrix(long nrl, long nrh, long ncl, long nch);

void free_ivector(int *v, long nl, long nh);
void free_dvector(double *v, long nl, long nh);
void free_cvector(char *v, long nl, long nh);
void free_dmatrix(double **m, long nrl, long nrh, long ncl, long nch);
void free_imatrix(int **m, long nrl, long nrh, long ncl, long nch);
void free_cmatrix(char **m, long nrl, long nrh, long ncl, long nch);

static void *mymalloc(int size);
char *strsave(char *str);
char *strnsave(char *str, int l);
static char **incbuf(int n, char **was);
static int *incibuf(int n, int *was);

double  **read_aa_dmatrix(FILE *fmat);
void argument();

int am2num(int c);
int am2numBZX(int c);

void err_readali(int err_num);
void readali(char *filename);
static void printali_ali(FILE *fpp, int chunk, int n1, int n2, int len, char **aname1, char **aname2,
char **aseqGap1, char **aseqGap2, int *start1, int *start2, int *positive, int **col_score, int score);
int **ali_char2int(char **aseq,int start_num, int start_seq);
int **read_alignment2int(char *filename,int start_num,int start_seq);

double effective_number_nogaps(int **ali, int *marks, int n, int start, int end);

void **neffsForChosenCol_maskGapReg(int **ali, int nchpos, int *chosenpos, int n, int len, double effgapmax, double effgapRegionMin, double **n_effAa, double *sum_eff_let, int *maskgapRegion, int *apos_filtr, int *len_lowgaps, double *nef);

void **pseudoCounts(double **matrix, double n_eff, int len, double **pseudoCnt);

double **score_matrix, *score_matrix_srt;
double lambda_al, score_scale;

double ScoreForTwoRows_smat3_21(int pos1, int pos2);
double ScoreForTwoRows_smat3_22(int pos1, int pos2);
double ScoreForTwoRows_smat3_23(int pos1, int pos2);
double ScoreForTwoRows_smat3_27(int pos1, int pos2);
double ScoreForTwoRows_smat3_28(int pos1, int pos2);

double funcAl(double x,int len1,int len2, double *score_matrix_srt);
double lambdaAll(int len1, int len2, double *score_matrix_srt);
void sort(int n, double arr[]);
void sort_int(int n, int arr[]);

int gap_open, gap_extend;

typedef struct _score_Vector{
	int *noGap, *gapExists, *noGapOld, *gapExistsOld, *prevScoreGapQueryOld, *noGapStore, *gapExistsStore, *prevScoreGapQueryOldStore;
} score_Vector;

int queryEnd, dbEnd, queryStart,dbStart;
int gapOpen, gapExtend, dbLength, queryLength;
static int SmithWatermanScore( double **score_matrix, int queryLength, int dbLength, int gapOpen, int gapextend, int queryEnd, int dbEnd, int **tracebackDir, 
int **flagNewGapQuery, int **flagNewGapDb);

int score, End1, End2, Start1, Start2;
static int SmithWatermanFindStart( double **score_matrix, int queryLength,  int dbLength,  int gapOpen, int gapExtend, int queryEnd, int dbEnd, int
score, int queryStart, int dbStart);

double Sgap6_smat(int pos1, int pos2, double b);
double Sgap6_smat_off(int pos1, int pos2, double b);
double (*sgapfcn)(int pos1, int pos2, double b);

double GapExtend1(int pos2, double b);
double GapExtend1_off(int pos2, double b);
double (*g_e1)(int pos2, double b);
double GapExtend2(int pos1, double b);
double GapExtend2_off(int pos1, double b);
double (*g_e2)(int pos1, double b);

int *ScoreOverColumn (int colScore, int flag1, int flag2, int flag3, int flag4, int flag5, int flag6, int *column_Score);

void **traceback_outputPos(int start_ali1, int start_ali2, int end_ali1, int end_ali2, int **tracebackDir, int **flagNewGapQuery, int **flagNewGapDb, int *apos1, int *apos2);

char am2lower(char inchr);
char am2upper(char inchr);
		

/* Robinson frequencies and BLOSUM matrices correpond to aa order  WFYMLIVACGPTSNQDEHRK  */
double p_rbnsn[]={0.013298, 0.038556, 0.032165, 0.022425, 0.090191, 0.05142, 0.064409, 0.078047, 0.019246, 0.073772, 0.052028, 0.058413,
0.071198, 0.044873, 0.042644, 0.05364, 0.062949, 0.021992, 0.051295, 0.057438};

double q_blosum62[20][20] = {
{0.0065, 0.0008, 0.0009, 0.0002, 0.0007, 0.0004, 0.0004, 0.0004, 0.0001, 0.0004, 0.0001, 0.0003, 0.0003, 0.0002, 0.0002, 0.0002, 0.0003, 0.0002, 0.0003, 0.0003 },
{0.0008, 0.0183, 0.0042, 0.0012, 0.0054, 0.0030, 0.0026, 0.0016, 0.0005, 0.0012, 0.0005, 0.0012, 0.0012, 0.0008, 0.0005, 0.0008, 0.0009, 0.0008, 0.0009, 0.0009 },
{0.0009, 0.0042, 0.0102, 0.0006, 0.0022, 0.0014, 0.0015, 0.0013, 0.0003, 0.0008, 0.0005, 0.0009, 0.0010, 0.0007, 0.0007, 0.0006, 0.0009, 0.0015, 0.0009, 0.0010 },
{0.0002, 0.0012, 0.0006, 0.0040, 0.0049, 0.0025, 0.0023, 0.0013, 0.0004, 0.0007, 0.0004, 0.0010, 0.0009, 0.0005, 0.0007, 0.0005, 0.0007, 0.0004, 0.0008, 0.0009 },
{0.0007, 0.0054, 0.0022, 0.0049, 0.0371, 0.0114, 0.0095, 0.0044, 0.0016, 0.0021, 0.0014, 0.0033, 0.0024, 0.0014, 0.0016, 0.0015, 0.0020, 0.0010, 0.0024, 0.0025 },
{0.0004, 0.0030, 0.0014, 0.0025, 0.0114, 0.0184, 0.0120, 0.0032, 0.0011, 0.0014, 0.0010, 0.0027, 0.0017, 0.0010, 0.0009, 0.0012, 0.0012, 0.0006, 0.0012, 0.0016 },
{0.0004, 0.0026, 0.0015, 0.0023, 0.0095, 0.0120, 0.0196, 0.0051, 0.0014, 0.0018, 0.0012, 0.0036, 0.0024, 0.0012, 0.0012, 0.0013, 0.0017, 0.0006, 0.0016, 0.0019 },
{0.0004, 0.0016, 0.0013, 0.0013, 0.0044, 0.0032, 0.0051, 0.0215, 0.0016, 0.0058, 0.0022, 0.0037, 0.0063, 0.0019, 0.0019, 0.0022, 0.0030, 0.0011, 0.0023, 0.0033 },
{0.0001, 0.0005, 0.0003, 0.0004, 0.0016, 0.0011, 0.0014, 0.0016, 0.0119, 0.0008, 0.0004, 0.0009, 0.0010, 0.0004, 0.0003, 0.0004, 0.0004, 0.0002, 0.0004, 0.0005 },
{0.0004, 0.0012, 0.0008, 0.0007, 0.0021, 0.0014, 0.0018, 0.0058, 0.0008, 0.0378, 0.0014, 0.0022, 0.0038, 0.0029, 0.0014, 0.0025, 0.0019, 0.0010, 0.0017, 0.0025 },
{0.0001, 0.0005, 0.0005, 0.0004, 0.0014, 0.0010, 0.0012, 0.0022, 0.0004, 0.0014, 0.0191, 0.0014, 0.0017, 0.0009, 0.0008, 0.0012, 0.0014, 0.0005, 0.0010, 0.0016 },
{0.0003, 0.0012, 0.0009, 0.0010, 0.0033, 0.0027, 0.0036, 0.0037, 0.0009, 0.0022, 0.0014, 0.0125, 0.0047, 0.0022, 0.0014, 0.0019, 0.0020, 0.0007, 0.0018, 0.0023 },
{0.0003, 0.0012, 0.0010, 0.0009, 0.0024, 0.0017, 0.0024, 0.0063, 0.0010, 0.0038, 0.0017, 0.0047, 0.0126, 0.0031, 0.0019, 0.0028, 0.0030, 0.0011, 0.0023, 0.0031 },
{0.0002, 0.0008, 0.0007, 0.0005, 0.0014, 0.0010, 0.0012, 0.0019, 0.0004, 0.0029, 0.0009, 0.0022, 0.0031, 0.0141, 0.0015, 0.0037, 0.0022, 0.0014, 0.0020, 0.0024 },
{0.0002, 0.0005, 0.0007, 0.0007, 0.0016, 0.0009, 0.0012, 0.0019, 0.0003, 0.0014, 0.0008, 0.0014, 0.0019, 0.0015, 0.0073, 0.0016, 0.0035, 0.0010, 0.0025, 0.0031 },
{0.0002, 0.0008, 0.0006, 0.0005, 0.0015, 0.0012, 0.0013, 0.0022, 0.0004, 0.0025, 0.0012, 0.0019, 0.0028, 0.0037, 0.0016, 0.0213, 0.0049, 0.0010, 0.0016, 0.0024 },
{0.0003, 0.0009, 0.0009, 0.0007, 0.0020, 0.0012, 0.0017, 0.0030, 0.0004, 0.0019, 0.0014, 0.0020, 0.0030, 0.0022, 0.0035, 0.0049, 0.0161, 0.0014, 0.0027, 0.0041 },
{0.0002, 0.0008, 0.0015, 0.0004, 0.0010, 0.0006, 0.0006, 0.0011, 0.0002, 0.0010, 0.0005, 0.0007, 0.0011, 0.0014, 0.0010, 0.0010, 0.0014, 0.0093, 0.0012, 0.0012 },
{0.0003, 0.0009, 0.0009, 0.0008, 0.0024, 0.0012, 0.0016, 0.0023, 0.0004, 0.0017, 0.0010, 0.0018, 0.0023, 0.0020, 0.0025, 0.0016, 0.0027, 0.0012, 0.0178, 0.0062 },
{0.0003, 0.0009, 0.0010, 0.0009, 0.0025, 0.0016, 0.0019, 0.0033, 0.0005, 0.0025, 0.0016, 0.0023, 0.0031, 0.0024, 0.0031, 0.0024, 0.0041, 0.0012, 0.0062, 0.0161 }
};

double s_blosum62[20][20] = {
{5.2520, 0.4588, 1.0771, -0.7124, -0.8159, -1.2903, -1.4171, -1.2634, -1.1521, -1.2457, -1.8271, -1.2145, -1.3759, -1.8480, -0.9732, -2.1072, -1.4177, -1.1711, -1.3397, -1.4782 },
{0.4588, 3.0230, 1.4696, 0.0063, 0.2074, -0.0804, -0.4245, -1.1050, -1.1877, -1.5537, -1.7986, -1.0538, -1.1845, -1.4970, -1.5822, -1.7419, -1.5962, -0.6171, -1.3932, -1.5393 },
{1.0771, 1.4696, 3.2975, -0.4974, -0.5310, -0.6657, -0.6038, -0.8820, -1.2036, -1.5199, -1.4599, -0.8030, -0.8429, -1.0409, -0.7105, -1.5325, -1.0102, 0.8463, -0.8469, -0.9100 },
{-0.7124, 0.0063, -0.4974, 2.6963, 0.9959, 0.5634, 0.3436, -0.4676, -0.7099, -1.3383, -1.2382, -0.3331, -0.7404, -1.0754, -0.2105, -1.5293, -0.9990, -0.7756, -0.6836, -0.6774 },
{-0.8159, 0.2074, -0.5310, 0.9959, 1.9247, 0.7608, 0.3942, -0.7323, -0.6387, -1.8135, -1.4300, -0.5987, -1.2213, -1.6895, -1.0670, -1.8028, -1.4232, -1.3934, -1.0773, -1.2234 },
{-1.2903, -0.0804, -0.6657, 0.5634, 0.7608, 1.9993, 1.2735, -0.6609, -0.6138, -1.8624, -1.3783, -0.3588, -1.1741, -1.6085, -1.3848, -1.5606, -1.5972, -1.6158, -1.4951, -1.3351 },
{-1.4171, -0.4245, -0.6038, 0.3436, 0.3942, 1.2735, 1.8845, -0.0947, -0.4038, -1.5694, -1.1744, -0.0278, -0.8231, -1.4382, -1.0992, -1.5713, -1.2211, -1.5587, -1.2513, -1.1312 },
{-1.2634, -1.1050, -0.8820, -0.4676, -0.7323, -0.6609, -0.0947, 1.9646, -0.2043, 0.0798, -0.4071, -0.0227, 0.5579, -0.7654, -0.4020, -0.8767, -0.4319, -0.8126, -0.7068, -0.3670 },
{-1.1521, -1.1877, -1.2036, -0.7099, -0.6387, -0.6138, -0.4038, -0.2043, 4.2911, -1.2502, -1.3976, -0.4333, -0.4375, -1.3299, -1.4509, -1.7300, -1.8062, -1.4939, -1.6946, -1.5182 },
{-1.2457, -1.5537, -1.5199, -1.3383, -1.8135, -1.8624, -1.5694, 0.0798, -1.2502, 2.7816, -1.0668, -0.7877, -0.1462, -0.2114, -0.8926, -0.6568, -1.0551, -1.0204, -1.1521, -0.7640 },
{-1.8271, -1.7986, -1.4599, -1.2382, -1.4300, -1.3783, -1.1744, -0.4071, -1.3976, -1.0668, 3.6823, -0.5376, -0.4045, -1.0002, -0.6410, -0.7401, -0.5581, -1.0805, -1.0543, -0.5068 },
{-1.2145, -1.0538, -0.8030, -0.3331, -0.5987, -0.3588, -0.0278, -0.0227, -0.4333, -0.7877, -0.5376, 2.2727, 0.6906, -0.0230, -0.3377, -0.5254, -0.4316, -0.8429, -0.5612, -0.3348 },
{-1.3759, -1.1845, -0.8429, -0.7404, -1.2213, -1.1741, -0.8231, 0.5579, -0.4375, -0.1462, -0.4045, 0.6906, 1.9422, 0.3005, -0.0506, -0.1305, -0.0735, -0.4408, -0.3824, -0.1017 },
{-1.8480, -1.4970, -1.0409, -1.0754, -1.6895, -1.6085, -1.4382, -0.7654, -1.3299, -0.2114, -1.0002, -0.0230, 0.3005, 2.8266, 0.0008, 0.6358, -0.1340, 0.2892, -0.2199, -0.0895 },
{-0.9732, -1.5822, -0.7105, -0.2105, -1.0670, -1.3848, -1.0992, -0.4020, -1.4509, -0.8926, -0.6410, -0.3377, -0.0506, 0.0008, 2.6426, -0.1567, 0.9273, 0.2240, 0.4914, 0.6363 },
{-2.1072, -1.7419, -1.5325, -1.5293, -1.8028, -1.5606, -1.5713, -0.8767, -1.7300, -0.6568, -0.7401, -0.5254, -0.1305, 0.6358, -0.1567, 2.8871, 0.7552, -0.5595, -0.8029, -0.3509 },
{-1.4177, -1.5962, -1.0102, -0.9990, -1.4232, -1.5972, -1.2211, -0.4319, -1.8062, -1.0551, -0.5581, -0.4316, -0.0735, -0.1340, 0.9273, 0.7552, 2.4514, -0.0588, -0.0577, 0.3877 },
{-1.1711, -0.6171, 0.8463, -0.7756, -1.3934, -1.6158, -1.5587, -0.8126, -1.4939, -1.0204, -1.0805, -0.8429, -0.4408, 0.2892, 0.2240, -0.5595, -0.0588, 3.7555, -0.1249, -0.3605 },
{-1.3397, -1.3932, -0.8469, -0.6836, -1.0773, -1.4951, -1.2513, -0.7068, -1.6946, -1.1521, -1.0543, -0.5612, -0.3824, -0.2199, 0.4914, -0.8029, -0.0577, -0.1249, 2.7367, 1.0544 },
{-1.4782, -1.5393, -0.9100, -0.6774, -1.2234, -1.3351, -1.1312, -0.3670, -1.5182, -0.7640, -0.5068, -0.3348, -0.1017, -0.0895, 0.6363, -0.3509, 0.3877, -0.3605, 1.0544, 2.2523 },
};

double **blosum, **qBlosum;
double **smatrix, **qmatrix;

char **aname, **aname1, **aname2, **aseq, **aseq1, **aseq2;
int nal, nal1, nal2, nalmerge, alilen, alilen1, alilen2, maxalilen,
*astart, *astart1, *astart2, *alen;

int **align_mat1, **align_mat2;
int n_lowgaps, alilen_mat1, alilen_mat2;
char **aseq_mat1, **aseq_mat2;
char **aseqGapTr1, **aseqGapTr2;
int **tracebackDir;
int **flagNewGapQuery, **flagNewGapDb;
int *positive, **col_score;
int posGp, segment_len;
int *apos1, *apos2;

int **alignment1, **alignment2;
char *am="-WFYMLIVACGPTSNQDEHRKBZX*.wfymlivacgptsnqdehrkbzx";
int **count;
int *maskgaps, *maskgaps1, *maskgaps2, *maskgapRegion, *maskgapRegion1, *maskgapRegion2;
double **pseudoCnt1, **pseudoCnt2;
double n_eff1, n_eff2;
double **n_effAa1, **n_effAa2; 
double *sum_eff_let1, *sum_eff_let2;
int *apos_filtr1, *apos_filtr2;
int scoreGivenEnd, score_final;

double Evalue;
double lambda_len[] = {0.277, 2.25}; /* coefficients for linear approximation of lambda(len) and K(len) */ 
double K_len[] = {0.044, 7.4};
double lambda_est, K_est; /* estimated lambda_g and K_g values */

double lambda_u;
double b = 1.0;
double f= 32.0;

int **fV_RepeatOpenGapQuery, **fV_RepeatOpenGapDb;
int **fV_DbInClosestNewGapDb, **fV_QInClosestNewGapQuery;
int flag_errread = 0;

char **aseq_out1, **aseq_out2;
int *positive_out;
int inputpos1, inputpos2, outputpos, pos_beforegap1, pos_beforegap2, step1, step2;




main(int argc, char *argv[])
{
	char ARG_I1[200],ARG_I2[200],ARG_O[200]={'\0'},ARG_S[100]={'\0'},ARG_Q[100]={'\0'};
	char ARG_P1[200], ARG_P2[200]; /* files with single lines of space delimited numbers for positions to consider */
	int ARG_F=3,ARG_V=0,ARG_B=60, ARG_N=1;
	int ARG_GO = 10, ARG_GE = 1; /* penalties for gap opening and extension */  
	int ARG_E = 0; /* switch for adjustment of gap extension penalty according to gap content in opposite column */
	int ARG_R = 0; /* switch for reduction of column-column scores according to gap content */
	double ARG_G=0.5; /* threshold of gap content for column excision */
	double ARG_T=1.0; /* threshold of gap content for "gapped regions" with waiving of 1st gap_open penalty */
	double ARG_L = LAMB_UNG; /* Ungapped lambda */
	int dblen, ARG_D = 0; /* database length; 0 means using length of Ali2 for Evalue calculation */
	
	
	FILE *smatrixfile,*qmatrixfile, *bl62file, *qbl62file; 
	FILE *fout, *fpdb,*fp,*ft, *fpos1, *fpos2;
	int i,j,k,l;
	int *chosenpos1, *chosenpos2, npos1, npos2;
	char *t1;
	char *bl62qijLocation = "./blosum62.qij";
	char *bl62Location = "./blosum62.sij";
	double (*scorefcn)(int pos1, int pos2);
	double av_sij, std_sij, av_score, std_score, sigma_ratio;
	double av_abssij, av_absscore;
	
	int len_out;

	/*read input arguments */
        if(argc<=4) { argument(); exit(0);}
	for(i=1;i<argc;i++) {
	    if(strcmp(argv[i],"-i")==0) {strcpy(ARG_I1,argv[i+1]);i++;continue;}
	    if(strcmp(argv[i],"-j")==0) {strcpy(ARG_I2,argv[i+1]);i++;continue;}
	    if(strcmp(argv[i],"-o")==0) {strcpy(ARG_O,argv[i+1]);i++;continue;}
	    if(strcmp(argv[i],"-p1")==0) {strcpy(ARG_P1,argv[i+1]);i++;continue;}
	    if(strcmp(argv[i],"-p2")==0) {strcpy(ARG_P2,argv[i+1]);i++;continue;}

	    if(strcmp(argv[i],"-s")==0) {strcpy(ARG_S,argv[i+1]);i++;continue;}
	    if(strcmp(argv[i],"-q")==0) {strcpy(ARG_Q,argv[i+1]);i++;continue;}
	    if(strcmp(argv[i],"-e")==0) {sscanf(argv[i+1],"%d",&ARG_E);i++;continue;}
	    if(strcmp(argv[i],"-r")==0) {sscanf(argv[i+1],"%d",&ARG_R);i++;continue;}
	    if(strcmp(argv[i],"-f")==0) {sscanf(argv[i+1],"%d",&ARG_F);i++;continue;}
    	    if(strcmp(argv[i],"-O")==0) {sscanf(argv[i+1],"%d",&ARG_GO);i++;continue;}
	    if(strcmp(argv[i],"-E")==0) {sscanf(argv[i+1],"%d",&ARG_GE);i++;continue;}  
	    if(strcmp(argv[i],"-d")==0) {sscanf(argv[i+1],"%d",&ARG_D);i++;continue;}  
	    if(strcmp(argv[i],"-L")==0) {sscanf(argv[i+1],"%lf",&ARG_L);i++;continue;}  
	    if(strcmp(argv[i],"-b")==0) {sscanf(argv[i+1],"%d",&ARG_B);i++;continue;}
    	    if(strcmp(argv[i],"-n")==0) {sscanf(argv[i+1],"%d",&ARG_N);i++;continue;}
	    if(strcmp(argv[i],"-v")==0) {sscanf(argv[i+1],"%d",&ARG_V);i++;continue;}
	    if(strcmp(argv[i],"-g")==0) {sscanf(argv[i+1],"%lf",&ARG_G);i++;continue;}
	    if(strcmp(argv[i],"-t")==0) {sscanf(argv[i+1],"%lf",&ARG_T);i++;continue;}
	    
	}
	
        if((ARG_F>8)||(ARG_F<0)){fprintf(stderr,"column-column score calculation method(-f): \n1, 3_21; 2, 3_22 ...., 8, 3_28\n");
                    exit(0);}
        if((ARG_E>1)||(ARG_E<0)){fprintf(stderr,"adjustment of gap extension penalty depending on gap content in opposite column (-e): \n0, no adjustment; 1, adjust gap extension\n");
                exit(0);}
        if((ARG_R>1)||(ARG_R<0)){fprintf(stderr,"reduction of column-column scores according to gap content (-r): \n0, no reduction; 1, adjust scores\n");
                exit(0);}
	if((ARG_G>1.0)||(ARG_G<=0)){fprintf(stderr,"gap content(-g) to eliminate a column must be no more than 1 and more than 0 \n");
		    exit(0);}
	
	gap_open = f*ARG_GO; 
	gap_extend = f*ARG_GE;
	lambda_u = ARG_L/f;

	/* substitution matrix: should be bits (BLOSUM62 if reading from input failed) */
        if((smatrixfile=fopen(ARG_S,"r"))==NULL){
                if(strlen(ARG_S)!=0) { fprintf(stderr, "Using default residue substitution matrix: BLOSUM62\n",ARG_S); }
                smatrix = dmatrix(1,20,1,20);
                for(i=1;i<=20;i++) {
			for(j=1;j<=20;j++) { smatrix[i][j] = s_blosum62[i-1][j-1]; }
		}
       } else {
       		smatrix=read_aa_dmatrix(smatrixfile);
	     	fclose(smatrixfile);
	}
	
	for(i=1;i<=20;i++) {
		for(j=1;j<=20;j++) {
			smatrix[i][j] *= LN2;
		}
	}
	
	/* q_ij matrix:  qij for BLOSUM62 if reading from input failed */
        if((qmatrixfile=fopen(ARG_Q,"r"))==NULL){
                if(strlen(ARG_Q)!=0) { fprintf(stderr, "Using default residue q_ij matrix: BLOSUM62\n",ARG_Q); }
                qmatrix = dmatrix(1,20,1,20);
                for(i=1;i<=20;i++) {
			for(j=1;j<=20;j++) { qmatrix[i][j] = q_blosum62[i-1][j-1]; }
		}
        } else {
        	qmatrix=read_aa_dmatrix(qmatrixfile);
	     	fclose(qmatrixfile);
	 }


/* read alignments */
	alignment1=read_alignment2int(ARG_I1,1,1);
	if(alignment1==NULL){
		fprintf(stderr, "alignment1 file not readable\n");
	}

	alilen1 = alilen;
	nal1 = nal;
	astart1 = ivector(0,nal1);
	for (i=0; i<nal1; i++) {
		if(astart[i]) {astart1[i] = astart[i];}
		else { astart1[i] = 1; }
	}
	
	aname1 = cmatrix(0, nal1, 0, 100);  

	for (i=0; i<nal1; i++) {
		strcpy(aname1[i], aname[i]); 
	}

	aseq1 = cmatrix(0, nal1, 0, alilen1);
	for (j=0;j<alilen1;j++) {
		for (i=0; i<nal1; i++) {aseq1[i][j] = aseq[i][j];}			
	}
	
	free (astart);
	free (alen);
	free (aseq);
	free (aname);

	alignment2=read_alignment2int(ARG_I2,1,1);
	if(alignment2==NULL){
		fprintf(stderr, "alignment2 file not readable\n");
	}

	alilen2 = alilen;
	nal2 = nal;
	astart2 = ivector(0,nal2);
	for (i=0; i<nal2; i++) {
		if(astart[i]) {astart2[i] = astart[i];}
		else { astart2[i] = 1; }
	}
	aname2=cmatrix(0,nal2, 0, 100);
	for (i=0; i<nal2; i++) {
		strcpy(aname2[i], aname[i]); 
	}
	  
	aseq2 = cmatrix(0, nal2, 0, alilen2);
	for (j=0;j<alilen2;j++) {
		for (i=0; i<nal2; i++) {aseq2[i][j] = aseq[i][j];}			
	}

	free (astart);
	free (alen);
	free (aseq);
	free (aname);	

/* read sets of position numbers to consider */
	if((fpos1=fopen(ARG_P1,"r"))==NULL){ nrerror("Cannot open file with positions in alignment 1\n"); exit(0); }
	chosenpos1 = ivector(1, MAXSTR);
	npos1 = 0;
	while (fgets(str, MAXSTR, fpos1) != NULL) {
		for (t1 = strtok(str," "); t1 != NULL; t1 = strtok(NULL, " ")) {
			chosenpos1[++npos1] = atoi(t1);
		}
	}
	fclose(fpos1);

	sort_int(npos1, chosenpos1);

	if((fpos2=fopen(ARG_P2,"r"))==NULL){ nrerror("Cannot open file with positions in alignment 2\n"); exit(0); }
	chosenpos2 = ivector(1, MAXSTR);
	npos2 = 0;
	while (fgets(str, MAXSTR, fpos2) != NULL) {
		for (t1 = strtok(str," "); t1 != NULL; t1 = strtok(NULL, " ")) {
			chosenpos2[++npos2] = atoi(t1);
		}
	}
	fclose(fpos2);

	sort_int(npos2, chosenpos2);
	
/* check switch for optional modes of comparison:
 1st seq VS full ali; full ali VS 1st seq; 1st seq VS 1st seq 
*/ 	
	switch (ARG_V) {
			case 1:
				nal1 = 1; break;
			case 2: 
				nal2 = 1; break;
			case 3:
				nal1 =1; nal2= 1; break;
			}

/* filter "gapped" columns, get total n_eff, n_eff for each aa in each column and mask moderately "gapped regions" */
	apos_filtr1 = ivector(1,alilen1);
	n_effAa1 = dmatrix(1,alilen1,0,20);
	sum_eff_let1 = dvector(1,alilen1);			
	maskgapRegion1 = ivector(0,alilen1+1);
	neffsForChosenCol_maskGapReg(alignment1, npos1, chosenpos1, nal1, alilen1, ARG_G, ARG_T, n_effAa1, sum_eff_let1, maskgapRegion1, apos_filtr1, &alilen_mat1, &n_eff1);

	apos_filtr2 = ivector(1,alilen2);	
	n_effAa2 = dmatrix(1,alilen2,0,20);
	sum_eff_let2 = dvector(1,alilen2);			
	maskgapRegion2 = ivector(0,alilen2+1);
	
	neffsForChosenCol_maskGapReg(alignment2, npos2, chosenpos2, nal2, alilen2, ARG_G, ARG_T, n_effAa2, sum_eff_let2, maskgapRegion2, apos_filtr2, &alilen_mat2, &n_eff2);

/* Calculate target frequencies */
	pseudoCnt1 = dmatrix(1,alilen_mat1, 0, 20);
	pseudoCnt2 = dmatrix(1,alilen_mat2, 0, 20);
	pseudoCounts(n_effAa1, n_eff1, alilen_mat1, pseudoCnt1);
	pseudoCounts(n_effAa2, n_eff2, alilen_mat2, pseudoCnt2);
	
	fprintf(stderr,".");

/* Choose column-to-column score formula to use */
		switch (ARG_F) {
			case 1:
				scorefcn = ScoreForTwoRows_smat3_21; break;
			case 2: 
				scorefcn = ScoreForTwoRows_smat3_22; break;
			case 3:
				scorefcn = ScoreForTwoRows_smat3_23; break;
			case 7:
				scorefcn = ScoreForTwoRows_smat3_27; break;
			case 8:
				scorefcn = ScoreForTwoRows_smat3_28; break;	
			default:	
				scorefcn = ScoreForTwoRows_smat3_23;
			}

/* compute matrix of scores for column pairs, correct them with lambda */
	score_matrix = dmatrix(1,alilen_mat1,1,alilen_mat2);

	if((fout=fopen(ARG_O,"w"))==NULL){
                if(strlen(ARG_O)!=0){fprintf(stderr,"default output to stdout\n");}
		fout=stdout;
	}	

	for (i=1; i<=alilen_mat1; i++){
		for (j=1;j<=alilen_mat2;j++){			
			score_matrix[i][j] = scorefcn(i, j);
			fprintf(fout, "%f ", score_matrix[i][j]);
		}
		fprintf(fout, "\n");
	}
					
	free_dmatrix(n_effAa1,1,alilen1,0,20);
	free_dvector(sum_eff_let1,1,alilen1);
	free_dmatrix(pseudoCnt1,1,alilen1,0,20);
	free_dmatrix(n_effAa2,1,alilen2,0,20);
	free_dvector(sum_eff_let2,1,alilen2);
	free_dmatrix(pseudoCnt2,1,alilen2,0,20);

/*				
	lambda_al = lambdaAll(alilen_mat1,alilen_mat2,score_matrix_srt);
	if(lambda_al == -1.0) {      */       /* Lambda could not be found */


/* Rescale scores by forcing average absolute value of scores to that of substitution matrix s_ij */
                                                                
/*
                av_abssij = 0.0;
                for(i=1;i<=20;i++) {  
                        for(j=1;j<=20;j++)  av_abssij += fabs(smatrix[i][j]);
                }
                av_abssij /= 400.0;                               
                                
                av_absscore = 0.0;
                for(k=1;k<=alilen_mat1*alilen_mat2;k++)  av_absscore += fabs(score_matrix_srt[k]);
                av_absscore /= 1.0*alilen_mat1*alilen_mat2;
                                                        
                score_scale = f*av_abssij/av_absscore;   

	} else { score_scale = lambda_al/ lambda_u; }
*/
				
/* Print out header */ 
/*
				score_final = rint(score/32.0);
				if((fout=fopen(ARG_O,"a"))==NULL){
                			if(strlen(ARG_O)!=0){fprintf(stderr,"default output to stdout\n");}
	                		fout=stdout;
           			}	

				fprintf (fout, "Ali1: %s\tAli2: %s\n",ARG_I1,ARG_I2);
				fprintf (fout, "Threshold of effective gap content in columns: %.1f\n", ARG_G);
				fprintf (fout, "length1=%d\tfiltered_length1=%d\tlength2=%d\tfiltered_length2=%d\n", alilen1,alilen_mat1,alilen2,alilen_mat2);
				fprintf (fout, "Nseqs1=%d\tNeff1=%.3f\tNseqs2=%d\tNeff2=%.3f\n", nal1,n_eff1,nal2,n_eff2);
                                if(lambda_al == -1.0) { fprintf (fout, "Lambda equation unsolvable\n"); }
				else { fprintf (fout, "Lambda = %e\n", lambda_al);
*/

/* Print score matrix */
/*
	for (i=1; i<=alilen_mat1; i++){
		for (j=1;j<=alilen_mat2;j++){ fprintf(fout, "%e ", score_matrix[i][j]); }
		fprintf(fout, "\n");
	}
*/

	exit(0);
}


/* from given alignment with aa as numbers, computes effective aa counts (PSIC->our formula)
and marks the columns with EFFECTIVE content of gaps > threshold (effgapmax) */
void **neffsForChosenCol_maskGapReg(int **ali, int nchpos, int *chosenpos, int n, int len, double effgapmax, double effgapRegionMin, double **n_effAa, double *sum_eff_let, int *maskgapRegion, int *apos_filtr, int *len_lowgaps, double *nef)
{
	int i,j,k,l, jind;
	int alilen_mat, nsymbols_col, nsymbols;
	double nef_loc;
	int ele;
        double *effnu;
        double sum_let;
        int *mark;
	int flagmark;

	fprintf(stderr,".");
	
	effnu = dvector(0,20);
        mark = ivector(0,n+10);  
	alilen_mat = 0;
	nsymbols = 0;
        for(jind=1;jind<=nchpos;jind++) {

		j=chosenpos[jind];
                nsymbols_col = 0;
		sum_let=0;

                for(k=0;k<=20;++k){   			
/* Mark sequences that have amino acid  k (or gap, k=0) in this jth position */ 
                	 flagmark =0;     	
                        for(i=1;i<=n;++i){
                                mark[i]=0;

                                ele=ali[i][j];
                                if(ele==k){mark[i]=1; flagmark =1;}
                                ele=ali[i][j]-25;
                                if(ele==k) {mark[i]=1; flagmark =1;}
                        }

/* If aa k (or gap) is present in this position call compute k-th effective count */
                      if (flagmark == 1) { 
			
				effnu[k]=effective_number_nogaps(ali,mark,n,1,len);
				nsymbols_col++;

			} else { effnu[k] = 0.0; }
            
                       if (k>0) sum_let += effnu[k];
                }


		if ( sum_let > 0 && 1.0*effnu[0]/(sum_let + effnu[0]) < effgapmax ) {
			alilen_mat++;
			for (k=0; k<=20; k++) {
				n_effAa[alilen_mat][k] = effnu[k];
			}
			sum_eff_let[alilen_mat] = sum_let;
			apos_filtr[alilen_mat] = j;
			nsymbols += nsymbols_col;
			
			if(1.0*effnu[0]/(sum_let + effnu[0]) < effgapRegionMin) {
				 maskgapRegion[alilen_mat] = 0; 
			} else {
				maskgapRegion[alilen_mat] = 1; 
			}
			
		}
		
		
	}
	

	nef_loc = 1.0*nsymbols/alilen_mat;
	*nef = nef_loc;
	*len_lowgaps = alilen_mat;
	
	maskgapRegion[0] = maskgapRegion[alilen_mat+1] = 0;
	
	free_dvector(effnu,0,20);
        free_ivector(mark,0,n+10); 
		
}


void **pseudoCounts(double **matrix, double n_eff, int len, double **pseudoCnt)
{
	int i,j,k;
	double *f, *g;
	double sumN;
	double alpha, beta;
	
	alpha = n_eff-1;
	beta = 10.0;
	
	f = dvector(0,20);
	g = dvector(0,20);
	for (i=1;i<=len;i++) {
		sumN = 0;
		for (j=1;j<=20;j++) sumN += matrix[i][j];
		for (j=1;j<=20;j++) {
			f[j] = 1.0*matrix[i][j]/sumN;
		}
		for (j=1;j<=20;j++) {
			g[j] = 0;	
			for (k=1;k<=20;k++) g[j]+= qmatrix[j][k]*f[k]/p_rbnsn[k-1];	
			pseudoCnt[i][j]= (alpha*f[j] + beta*g[j])/(alpha+beta);		
		}
	}

}
	
/** Find lambda from all-to-all column comparisons (using funcAl()) ***/
double lambdaAll(int len1, int len2, double *score_matrix_srt)
{
	int i,j,k;
	double dx,f,fmid,xmid,rtb;
	double x1, x2, xacc, scale;

/*** Sorting score_matrix_srt[] for the further summation of exp  ***/
	sort(len1*len2, score_matrix_srt);
	
	
	x1=1e-10;
	x2=1.0;
	xacc = 1e-10;
 
        f=funcAl(x1,len1,len2,score_matrix_srt);
        fmid=funcAl(x2,len1,len2,score_matrix_srt);
/* TO DO: in case of failure, make a standard lambda */	
        if (f*fmid >= 0.0) return -1.0;
        rtb = f < 0.0 ? (dx=x2-x1,x1) : (dx=x1-x2,x2);
        for (j=1;j<=JMAX;j++) {
	        fmid=funcAl(xmid=rtb+(dx *= 0.5),len1,len2,score_matrix_srt);   
                if (fmid <= 0.0) rtb=xmid;
                if (fabs(dx) < xacc || fmid == 0.0) return rtb;
        }
        fprintf(stderr, "Too many bisections in rtbis\n");
        return -1.0;

}
	
/** Calculation of the left part of equation: funcAl(lambda) = 0  **/
/** Based on all column combinations from two alignments **/
double funcAl(double x, int len1, int len2, double *score_matrix_srt) {
	int i,j,k;
	double f, ffin;
	f=0.0;
	for (i=1; i<=len1*len2; i++) { f += exp(x*score_matrix_srt[i]);	}	
	ffin = f/(len1*len2) - 1.0;
	return ffin;
}


/* read matrix from file *fmat */
double  **read_aa_dmatrix(FILE *fmat){
	
int i,ncol,ri,rj,c,flag,j;
int col[31],row[31];
char stri[31];
double t;
double **mat;

mat=dmatrix(0,25,0,25);
for(i=0;i<=25;++i)for(j=0;j<=25;++j)mat[i][j]=0.0;

ncol=0;
i=0;
ri=0;
rj=0;
flag=0;

while( (c=getc(fmat)) != EOF){

if(flag==0 && c=='#'){flag=-1;continue;}
else if(flag==-1 && c=='\n'){flag=0;continue;}
else if(flag==-1){continue;}
else if(flag==0 && c==' '){flag=1;continue;}
else if(flag==1 && c=='\n'){flag=10;  continue;}
else if(flag==1 && c==' '){continue;}
else if(flag==1){
                ++ncol;
                if(ncol>=25){nrerror("matrix has more than 24 columns: FATAL");exit(0);}
                col[ncol]=am2numBZX(c);
                continue;
                }
else if(flag==10 && c!=' ' && c!='#'){
                ri=0;
                ++rj;
                if(rj>=25){nrerror("matrix has more than 24 rows: FATAL");exit(0);}
                row[rj]=col[rj];
                for(i=0;i<=30;++i){stri[i]=' ';}
                stri[0]=c; 
                j=0;
                flag=3;
                continue;
                }
else if (flag==2 && c==' '){for(i=0;i<=30;++i){stri[i]=' ';}j=0;continue;}
else if (flag==2 && c=='\n'){flag=10;continue;}
else if (flag==2){flag=3;stri[j]=c;if(j>30){nrerror("string too long:FATAL");exit(0);}continue;}
else if (flag==3 && c==' ' || flag==3 && c=='\n'){
                        j=0;
                        ++ri;
                        t=atof(stri);
                        mat[row[rj]][col[ri]]=t;
                        if (c=='\n')flag=10;else flag=2;
                        continue;
                        }
else if (flag==3){stri[++j]=c;continue;}

}

for(i=1;i<=ncol;i++) {
		for(j=i+1;j<=ncol;j++) {
			mat[col[i]][col[j]]=mat[col[j]][col[i]];
		}
	}
return mat;
}

	
void argument()
{
fprintf(stderr,"COMPASS 1.24\n");
fprintf(stderr,"COMPASS is the program for the comparison of two multiple protein alignments\n");
fprintf(stderr,"Arguments:\n\n");
fprintf(stderr,"  -i    Input alignment 1 [File in]\n");
fprintf(stderr,"        Format: ClustalW, STOCKHOLM, or simple alignment format\n");
fprintf(stderr,"  -j    Input alignment 2 [File in]\n");
fprintf(stderr,"        Format: ClustalW, STOCKHOLM, or simple alignment format\n");
fprintf(stderr,"\n");
fprintf(stderr,"Optional arguments:\n\n");
fprintf(stderr,"  -o    Output file (default = STDOUT) with COMPASS result:\n");
fprintf(stderr,"        a header\n");
fprintf(stderr,"        and resulting alignment (top sequences shown in both input alignments):\n");
fprintf(stderr,"        pluses (+) denote matches with positive scores;\n");
fprintf(stderr,"        Capital letters and dashes (-) denote the profile positions\n");
fprintf(stderr,"        that were used for alignment construction.\n");
fprintf(stderr,"        Lower-case letters and dots (.) denote the profile positions with high gap content\n");
fprintf(stderr,"        that were disregarded in the process of alignment construction\n");
fprintf(stderr,"        (they may be aligned with gaps (~) that are not scored).\n");
fprintf(stderr,"        Equal signs (=) denote the gaps introduced in profiles.\n");
fprintf(stderr,"  -s    Path to the residue substitution matrix (s_ij)\n");
fprintf(stderr,"        Default = BLOSUM62\n");
fprintf(stderr,"  -q	Path to the matrix of residue pair frequencies (q_ij)\n");
fprintf(stderr,"        Default = BLOSUM62\n");
fprintf(stderr,"  -f    Formula for the scores between profile columns (variants 1-8)\n");
fprintf(stderr,"        Default: 3\n");
fprintf(stderr,"  -g	Threshold of gap content to disregard 'gapped' columns (0.0 to 1.0)\n");
fprintf(stderr,"        Default = 0.5\n");
fprintf(stderr,"  -t 	Threshold of gap content for marking 'moderately gapped' regions,\n");
fprintf(stderr,"        where penalty for the 1st gap opening is waived (0.0 to 1.0)\n");
fprintf(stderr,"        Default = 1.0 (no waiving of gap opening penalty)\n");
fprintf(stderr,"  -e 	Switch (0/1) for adjustment of gap extension penalty\n");
fprintf(stderr,"        according to gap content in opposite column\n");
fprintf(stderr,"        Default = 0 (no adjustment)\n");
fprintf(stderr,"  -r 	Switch (0/1) for reduction of column-column scores according to gap content\n");
fprintf(stderr,"        Default = 0 (no reduction)\n");
fprintf(stderr,"  -O	Penalty for gap opening (integer)\n");
fprintf(stderr,"        Default = 10\n");
fprintf(stderr,"  -E    Penalty for gap extension (integer)\n");
fprintf(stderr,"        Default = 1\n");
fprintf(stderr,"  -L    Ungapped lambda for a standard sequence-sequence scoring system\n");
fprintf(stderr,"        used for rescaling of lambda for profile-profile comparison\n");
fprintf(stderr,"        Default = 0.3176 (ungapped lambda for BLOSUM62)\n");
fprintf(stderr,"  -d    Database length (including only columns with gap content lower than the threshold (-g))\n");
fprintf(stderr,"        used for Evalue calculation\n");
fprintf(stderr,"        Default = length of alignment 2\n");
fprintf(stderr,"  -v    Variant of the comparison mode:\n");
fprintf(stderr,"        Profile-profile (0), 1st sequence-profile (1), profile-1st sequence (2)\n");
fprintf(stderr,"        or 1st sequence-1st sequence (3)\n");
fprintf(stderr,"        Default = 0\n");
fprintf(stderr,"  -b    Length of alignment chunks to print out in lines\n");
fprintf(stderr,"        Default = 60\n");
fprintf(stderr,"  -n    Number of sequences to print out in each of the aligned alignments\n");
fprintf(stderr,"        Default = 1\n");
}

void nrerror(char error_text[]){
fprintf(stderr,"%s\n",error_text);
fprintf(stderr,"FATAL - execution terminated\n");
exit(1);
}


char *cvector(long nl, long nh){
char *v;
v=(char *)malloc((size_t) ((nh-nl+1+NR_END)*sizeof(int)));
if (!v) nrerror("allocation failure in ivector()");
return v-nl+NR_END;
}


int *ivector(long nl, long nh){
int *v;
v=(int *)malloc((size_t) ((nh-nl+1+NR_END)*sizeof(int)));
if (!v) nrerror("allocation failure in ivector()");
return v-nl+NR_END;
}

long *lvector(long nl, long nh){
long int *v;
v=(long int *)malloc((size_t) ((nh-nl+1+NR_END)*sizeof(long int)));
if (!v) nrerror("allocation failure in lvector()");
return v-nl+NR_END;
}

double *dvector(long nl, long nh){
double *v;
v=(double *)malloc((size_t) ((nh-nl+1+NR_END)*sizeof(double)));
if (!v) nrerror("allocation failure in dvector()");
return v-nl+NR_END;
}

char **cmatrix(long nrl, long nrh, long ncl, long nch){
long i, nrow=nrh-nrl+1,ncol=nch-ncl+1;
char **m;
m=(char **)malloc((size_t)((nrow+NR_END)*sizeof(char*)));
if (!m) nrerror("allocation failure 1 in cmatrix()");
m += NR_END;
m -= nrl;

m[nrl]=(char *)malloc((size_t)((nrow*ncol+NR_END)*sizeof(char)));
if (!m[nrl]) nrerror("allocation failure 2 in cmatrix()");
m[nrl] += NR_END;
m[nrl] -= ncl;

for(i=nrl+1;i<=nrh;i++) m[i]=m[i-1]+ncol;

return m;

}

int **imatrix(long nrl, long nrh, long ncl, long nch){
long i, nrow=nrh-nrl+1,ncol=nch-ncl+1;
int **m;
m=(int **)malloc((size_t)((nrow+NR_END)*sizeof(int*)));
if (!m) nrerror("allocation failure 1 in imatrix()");
m += NR_END;
m -= nrl;

m[nrl]=(int *)malloc((size_t)((nrow*ncol+NR_END)*sizeof(int)));
if (!m[nrl]) nrerror("allocation failure 2 in imatrix()");
m[nrl] += NR_END;
m[nrl] -= ncl;

for(i=nrl+1;i<=nrh;i++) m[i]=m[i-1]+ncol;

return m;

}

double **dmatrix(long nrl, long nrh, long ncl, long nch){
long i, nrow=nrh-nrl+1,ncol=nch-ncl+1;
double **m;
m=(double **)malloc((size_t)((nrow+NR_END)*sizeof(double*)));
if (!m) nrerror("allocation failure 1 in dmatrix()");
m += NR_END;
m -= nrl;

m[nrl]=(double *)malloc((size_t)((nrow*ncol+NR_END)*sizeof(double)));
if (!m[nrl]) nrerror("allocation failure 2 in dmatrix()");
m[nrl] += NR_END;
m[nrl] -= ncl;

for(i=nrl+1;i<=nrh;i++) m[i]=m[i-1]+ncol;

return m;
}

void free_ivector(int *v, long nl, long nh)
/* free an int vector allocated with ivector() */
{
	free((FREE_ARG) (v+nl-NR_END));
}

void free_cvector(char *v, long nl, long nh)
/* free an unsigned char vector allocated with cvector() */
{
	free((FREE_ARG) (v+nl-NR_END));
}

void free_dvector(double *v, long nl, long nh)
/* free a double vector allocated with dvector() */
{
	free((FREE_ARG) (v+nl-NR_END));
}



void free_dmatrix(double **m, long nrl, long nrh, long ncl, long nch)
/* free a double matrix allocated by dmatrix() */
{
	free((FREE_ARG) (m[nrl]+ncl-NR_END));
	free((FREE_ARG) (m+nrl-NR_END));
}

void free_imatrix(int **m, long nrl, long nrh, long ncl, long nch)
/* free an int matrix allocated by imatrix() */
{
	free((FREE_ARG) (m[nrl]+ncl-NR_END));
	free((FREE_ARG) (m+nrl-NR_END));
}

void free_cmatrix(char **m, long nrl, long nrh, long ncl, long nch)
/* free a double matrix allocated by dmatrix() */
{
	free((FREE_ARG) (m[nrl]+ncl-NR_END));
	free((FREE_ARG) (m+nrl-NR_END));
}


int am2num(c)
{
switch (c) {
           	 case 'W': case 'w':
                	c=1; break;
           	 case 'F': case 'f':
                	c=2; break;
           	 case 'Y': case 'y':
                	c=3; break;
           	 case 'M': case 'm':
                	c=4; break;
           	 case 'L': case 'l':
                	c=5; break;
           	 case 'I': case 'i':
          		c=6; break;
           	 case 'V': case 'v':
           		c=7; break;
          	 case 'A': case 'a': 
			c=8; break;
           	 case 'C': case 'c':
                	c=9; break;
		 case 'G': case 'g':
			c=10; break;
           	 case 'P': case 'p':
             	 	c=11; break;
       		 case 'T': case 't':
			c=12; break;
	         case 'S': case 's':
			c=13; break;
           	 case 'N': case 'n':
                	c=14; break;
           	 case 'Q': case 'q':
                	c=15; break;
           	 case 'D': case 'd':
                	c=16; break;
           	 case 'E': case 'e':
                	c=17; break;
           	 case 'H': case 'h':
                	c=18; break;
           	 case 'R': case 'r':
                	c=19; break;
           	 case 'K': case 'k':
                	c=20; break;
           	 default : 
			c=0; 
		}
return (c);
}


int am2numBZX(c)
{
switch (c) {
                 case 'W': case 'w':
                        c=1; break;
                 case 'F': case 'f':
                        c=2; break;
                 case 'Y': case 'y':
                        c=3; break;
                 case 'M': case 'm':
                        c=4; break;
                 case 'L': case 'l':
                        c=5; break;
                 case 'I': case 'i':
                        c=6; break;
                 case 'V': case 'v':
                        c=7; break;
                 case 'A': case 'a':
                        c=8; break;
                 case 'C': case 'c':
                        c=9; break;
                 case 'G': case 'g':
                        c=10; break;
                 case 'P': case 'p':
                        c=11; break;
                 case 'T': case 't':
                        c=12; break;
                 case 'S': case 's':
                        c=13; break;
                 case 'N': case 'n':
                        c=14; break;
                 case 'Q': case 'q':
                        c=15; break;
                 case 'D': case 'd':
                        c=16; break;
                 case 'E': case 'e':
                        c=17; break;
                 case 'H': case 'h':
                        c=18; break;
                 case 'R': case 'r':
                        c=19; break;
                 case 'K': case 'k':
                        c=20; break;
                 case 'B': case 'b':
                        c=21; break;
                 case 'Z': case 'z':
                        c=22; break;
                 case 'X': case 'x':
                        c=23; break;
                 case '*':
                        c=24; break;
                 default :
                        c=0;
                }
return (c);
}


char am2lower(char inchr)
{
char c;
switch (inchr) {
              	 case '-':
                 	c='.'; break;           	 
           	 case 'W': 
                	c='w'; break;
           	 case 'F': 
                	c='f'; break;
           	 case 'Y': 
                	c='y'; break;
           	 case 'M': 
                	c='m'; break;
           	 case 'L': 
                	c='l'; break;
           	 case 'I': 
          		c='i'; break;
           	 case 'V': 
           		c='v'; break;
          	 case 'A':  
			c='a'; break;
           	 case 'C': 
                	c='c'; break;
		 case 'G': 
			c='g'; break;
           	 case 'P': 
             	 	c='p'; break;
       		 case 'T': 
			c='t'; break;
	         case 'S': 
			c='s'; break;
           	 case 'N': 
                	c='n'; break;
           	 case 'Q': 
                	c='q'; break;
           	 case 'D': 
                	c='d'; break;
           	 case 'E': 
                	c='e'; break;
           	 case 'H': 
                	c='h'; break;
           	 case 'R': 
                	c='r'; break;
           	 case 'K': 
                	c='k'; break;
                 case 'B':
                 	c='b'; break;
                 case 'Z':
                 	c='z'; break;
                 case 'X':
                 	c='x'; break; 
		 default :
                        c=inchr;
		}
return (c);
}

char am2upper(char inchr)
{
char c;
switch (inchr) {
              	 case '.':
                 	c='-'; break;           	 
           	 case 'w': 
                	c='W'; break;
           	 case 'f': 
                	c='F'; break;
           	 case 'y': 
                	c='Y'; break;
           	 case 'm': 
                	c='M'; break;
           	 case 'l': 
                	c='L'; break;
           	 case 'i': 
          		c='I'; break;
           	 case 'v': 
           		c='V'; break;
          	 case 'a':  
			c='A'; break;
           	 case 'c': 
                	c='C'; break;
		 case 'g': 
			c='G'; break;
           	 case 'p': 
             	 	c='P'; break;
       		 case 't': 
			c='T'; break;
	         case 's': 
			c='S'; break;
           	 case 'n': 
                	c='N'; break;
           	 case 'q': 
                	c='Q'; break;
           	 case 'd': 
                	c='D'; break;
           	 case 'e': 
                	c='E'; break;
           	 case 'h': 
                	c='H'; break;
           	 case 'r': 
                	c='R'; break;
           	 case 'k': 
                	c='K'; break;
                 case 'b':
                 	c='B'; break;
                 case 'z':
                 	c='Z'; break;
                 case 'x':
                 	c='X'; break; 
		 default :
                        c=inchr;
		}
return (c);
}





int **alignment;



static void *mymalloc(int size);
char *strsave(char *str);
char *strnsave(char *str, int l);
static char **incbuf(int n, char **was);
static int *incibuf(int n, int *was);

void readali(char *filename);
int **ali_char2int(char **aseq,int start_num, int start_seq);
int **read_alignment2int(char *filename,int start_num,int start_seq);

double effective_number(int **ali, int *marks, int n, int start, int end);
double effective_number_nogaps(int **ali, int *marks, int n, int start, int end);



static void *mymalloc(size)
int size;
{
	void *buf;

	if ((buf = malloc(size)) == NULL) {
		fprintf(stderr, "Not enough memory: %d\n", size);
		exit(1);
	}
	return buf;
}

char *strsave(str)
char *str;
{
	char *buf;
	int l;

	l = strlen(str);
	buf = mymalloc(l + 1);
	strcpy(buf, str);
	return buf;
}

char *strnsave(str, l)
char *str;
int l;
{
	char *buf;

	buf = mymalloc(l + 1);
	memcpy(buf, str, l);
	buf[l] = '\0';
	return buf;
}

static char **incbuf(n, was)
int n;
char **was;
{
	char **buf;
	char *aaa;

	buf = mymalloc((n+1) * sizeof(buf[0]));
	if (n > 0) {
		memcpy(buf, was, n * sizeof(was[0]));
		free(was);
	}
	buf[n] = NULL;
	return buf;
}

static int *incibuf(n, was)
int n, *was;
{
	int *ibuf;

	ibuf = mymalloc((n+1) * sizeof(ibuf[0]));
	
	if (n > 0) {
		memcpy(ibuf, was, n * sizeof(was[0]));
		free(was);
	}
	ibuf[n] = 0;
	return ibuf;
}
void err_readali(int err_num)
{
	fprintf(stderr,"Error with reading alignment: %d\n",err_num);
}

void readali(char *filename)

{
	FILE *fp;
	char *s, *ss, *seqbuf;
	int n, l, len, len0;
	int ii,mark=1;
	
	if ((fp = fopen(filename, "r")) == NULL) {
		fprintf(stderr, "No such file: \"%s\"\n", filename);
		err_readali(1);
		flag_errread=1;
		return;
	}
	
	alilen = 0;
	nal = 0;
	n = 0;
		
	if(fgets(str, MAXSTR, fp) != NULL) {
		if(strncmp(str,"CLUSTAL ",8)!=0){rewind(fp);}
					}
	
	while (fgets(str, MAXSTR, fp) != NULL) {
		
		if (*str=='#' || strncmp(str,"//",2) == 0) {continue;}
		for (ss = str; isspace(*ss); ss++) ;
		if ((ii<=ss-str)&&(mark==0)) {continue;}
		if (*ss == '\0') {
			if (n == 0) {
				continue;
			}
			if (nal == 0) {
				if (n == 0) {
					fprintf(stderr, "No alignments read\n");
					err_readali(2);
					flag_errread=1;
					return;
				}
				nal = n;
			} else if (n != nal) {
				fprintf(stderr, "Wrong nal, was: %d, now: %d\n", nal, n);
				err_readali(3); 
				flag_errread=1;
				return;
			}
			n = 0;
			continue;
		}
		for (s = ss; *s != '\0' && !isspace(*s); s++) ;
		*s++ = '\0';
		
		if (nal == 0) {
						
			astart = incibuf(n, astart);
			alen = incibuf(n, alen);
			aseq = incbuf(n, aseq);
			aname = incbuf(n, aname);
			aname[n] = strsave(ss);

		} else {
			if (n < 0 || n >= nal) {
				fprintf(stderr, "Bad sequence number: %d of %d\n", n, nal);
				err_readali(4);  
/*				exit(1); */
				flag_errread=1;
				return;
			}
			if (strcmp(ss, aname[n]) != 0) {
				fprintf(stderr, "Names do not match");
				fprintf(stderr, ", was: %s, now: %s\n", aname[n], ss);
				err_readali(5); 
/*				exit(1); */
				flag_errread=1;
				return;
			}
		}
		for (ss = s; isspace(*ss); ss++);
		if(mark==1){
		ii = ss-str;
		mark=0;}
				
		for (s = ss; isdigit(*s); s++) ;
		if (isspace(*s)) {
			if (nal == 0) {
				astart[n] = atoi(ss);
			}
			for (ss = s; isspace(*ss); ss++);
		}
		for (s = ss, len=0, l = 0; *s != '\0' && !isspace(*s); s++) {
			if (isalpha(*s)) {
				l++;
			}
		
/*** Calculate len -- the full number of aa and gaps, excluding position numbers in the end ***/			
			if (isalpha(*s) || *s == '-' || *s == '.') {
				len++;
			}
		
		
		}
		
		if (n == 0) {
			len0 = len;
			alilen += len;
		} else if (len != len0) {
			fprintf(stderr, "wrong len for %s", aname[n]);
			fprintf(stderr, ", was: %d, now: %d\n", len0, len);
			err_readali(6);
/*			 exit(1); */
			flag_errread=1;
			return;
		}

		alen[n] += l;
		if (aseq[n] == NULL) {
			aseq[n] = strnsave(ss, len);
		} else {
			seqbuf = mymalloc(alilen+1);
			memcpy(seqbuf, aseq[n], alilen-len);
			free(aseq[n]);
			aseq[n] = seqbuf;
			memcpy(seqbuf+alilen-len, ss, len);
			seqbuf[alilen] = '\0';
		}
		n++;
	}
	if (nal == 0) {
		if (n == 0) {
			fprintf(stderr, "No alignments read\n");
			err_readali(7);
/*			exit(1); */
			flag_errread=1;
			return;
		}
		nal = n;
	} else if (n != 0 && n != nal) {
		fprintf(stderr, "Wrong nal, was: %d, now: %d\n", nal, n);
		err_readali(8);  
/*		exit(1); */
		flag_errread=1;
		return;
	}
	
	fclose(fp);
}

/*** Print alignment to file ****/
static void printali_ali(FILE *fpp, int chunk, int n1, int n2, int len, char **aname1, char **aname2,
char **aseqGap1, char **aseqGap2, int *start1, int *start2, int *positive, int **col_score, int score)
{
        int i, j, k, jj, mlen, str_len, len_start;
	int ratio;
	int scoreFin; 
	char arg_o[100], namebuf[100];
        char *sq;
	int *isq;
	char *sqn;
           		
	for (i=0; i<n1; i++) {
		start1[i] += apos1[1]-1;
		start2[i] += apos2[1]-1;
	}
	 
        for (i=1, mlen=strlen(aname1[0]); i < n1; i++) {
                if (mlen < strlen(aname1[i])) {
                        mlen = strlen(aname1[i]);
                }
        }
        for (i=0 ; i < n2; i++) {
                if (mlen < strlen(aname2[i])) {
                        mlen = strlen(aname2[i]);
                }
        }

        jj = 0;

        do {

/* Print the chunk of the first alignment */
                if (jj > 0) {
                        fprintf(fpp, "\n");
		}
			                        
                for (i=0; i < n1; i++) {
     			strcpy(namebuf,aname1[i]);
     			fprintf(fpp, namebuf);
     			str_len = strlen(aname1[i]);
     			for(k=str_len;k<mlen+3;k++) fprintf(fpp," ");
     			
    			if (jj==0) {

			for(len_start=0, ratio=start1[i]; ratio>0; ratio /= 10, len_start++);
     			fprintf(fpp, "%d", start1[i]);
     			} else {len_start=0;}
     			
 			for(k=len_start;k<7;k++) fprintf(fpp," ");       			
     					
                        sq = aseqGap1[i] + jj;
	
                        for (j=1; j+jj <=len && j <= chunk; j++) {

				 fprintf(fpp, "%c", sq[j]);
                        }
                        
                       fprintf(fpp, "\n");
                }
			
			for(k=0;k<mlen+10;k++) fprintf(fpp," ");
			isq = positive + jj ;
			for (j=1; j+jj <= len && j <= chunk; j++) {
				if (isq[j]) {
					fprintf(fpp,"+");
				} else {
					fprintf(fpp," ");
				}
			}
			fprintf(fpp, "\n");
				
						   
/*Print the chunk of the second alignment*/
                for (i=0; i < n2; i++) {
                        sqn = aname2[i];
       	
                	for(k=0;k<mlen+3;k++){
				if(k<strlen(sqn)){fprintf(fpp,"%c",sqn[k]);}
				else fprintf(fpp," ");
			}
			
			if (jj==0) {

			for(len_start=0, ratio=start2[i]; ratio>0; ratio /= 10, len_start++);
     			fprintf(fpp, "%d", start2[i]);
     			} else {len_start=0;}
     			
 			for(k=len_start;k<7;k++) fprintf(fpp," "); 
			
			
                        sq = aseqGap2[i] + jj;
                        for (j=1; j+jj <= len && j <= chunk; j++) {
                              fprintf(fpp, "%c", sq[j]);
                        }
                        fprintf(fpp, "\n");
                }
                fprintf(fpp, "\n");

                jj += chunk;

        } while (jj < len);

	fclose(fpp);
}

int **ali_char2int(char **aseq, int start_num, int start_seq){
/* fills the alignment ali[start_num..start_num+nal-1][start_seq..start_seq+alilen-1]
convetring charater to integer from aseq[0..nal-1][0..alilen-1]
*/

int i,j,end_num,end_seq;
int **ali;
end_num=start_num+nal-1;
end_seq=start_seq+alilen-1;
ali=imatrix(start_num,end_num,start_seq,end_seq);
for(i=start_num;i<=end_num;++i)for(j=start_seq;j<=end_seq;++j)ali[i][j]=am2num(aseq[i-start_num][j-start_seq]);
return ali;
}

int **read_alignment2int(char *filename,int start_num,int start_seq){
int **ali;
readali(filename);

if (flag_errread==1) return;

ali=ali_char2int(aseq,start_num,start_seq);
return ali;
}

double effective_number_nogaps(int **ali, int *marks, int n, int start, int end){

/* from the alignment of n sequences ali[1..n][1..l]
calculates effective number of sequences that are marked by 1 in mark[1..n]
for the segment of positions ali[][start..end]
Neff=ln(1-0.05*N-of-different-letters-per-site)/ln(0.95)
*/

int i,k,a,flag;
int *amco,lettercount=0,sitecount=0;
double letpersite=0,neff;

 amco=ivector(0,20); 
for(k=start;k<=end;++k){	
/******************DUMP the condition "consider only positions without gaps in the marked seqs" ***********/
/*****	flag=0;for(i=1;i<=n;++i)if(marks[i]==1 && ali[i][k]==0)flag=1;
	if(flag==1)continue;
*****/	
	for(a=0;a<=20;++a)amco[a]=0;
	for(i=1;i<=n;++i)if(marks[i]==1)amco[ali[i][k]]++;
	flag=0;for(a=1;a<=20;++a)if(amco[a]>0){flag=1;lettercount++;}
	if(flag==1)sitecount++;
			       }
if(sitecount==0)letpersite=0;
else letpersite=1.0*lettercount/sitecount;

 neff=-log(1.0-0.05*letpersite)/0.05129329438755; 

 free_ivector(amco,0,20);
return neff;
}

void **neffsForEachCol(int **ali, int n, int len, double **n_effAa, double *sum_eff_let)
{
	int i,j,k;
        int ele;
        double *effnu;
        double sum_let;
        int *mark;
	int flagmark;
	
	effnu = dvector(0,20);
        mark = ivector(0,n+10);  
        for(j=1;j<=len;j++) {
        	
        	sum_eff_let[j] = 0;
        	for(k=0;k<=20;++k){ 
        		 n_effAa[j][k]=0;
        		 } 
        }

        for(j=1;j<=len;j++) {
                sum_let=0;

                for(k=0;k<=20;++k){   
/* Mark sequences that have amino acid  k (or gap, k=0) in this jth position */ 
                	 flagmark =0;     	
                        for(i=1;i<=n;++i){
                                mark[i]=0;

                                ele=ali[i][j];
                                if(ele==k){mark[i]=1; flagmark =1;}
                                ele=ali[i][j]-25;
                                if(ele==k) {mark[i]=1; flagmark =1;}
                        }

/* If aa k (or gap) is present in this position call compute k-th effective count */
                        if (flagmark == 1) effnu[k]=effective_number_nogaps(ali,mark,n,1,len);
                        else effnu[k] = 0.0;
            
                       if (k>0) sum_let += effnu[k];
                }

      		for (k=0; k<=20; k++) {
			n_effAa[j][k] = effnu[k]; 
		}
		sum_eff_let[j] = sum_let;
	
	}
}


/*computes Smith-Waterman local alignment score and returns the
  evalue
  query is the query sequence
  queryLength is the length of query in amino acids
  dbSequence is the sequence corresponding to some matrix profile
  dbLength is the length of dbSequnece
  matrix is the position-specific matrix associated with dbSequence
  gapOpen is the cost of opening a gap
  gapExtend is the cost of extending an exisiting gap by 1 position
  queryEnd returns the final position in the query of an optimal
   local alignment
  dbEnd returns the final position in dbSequence of an optimal
   local alignment
  queryEnd and dbEnd can be used to run the local alignment in reverse
   to find optimal starting positions
  score is used to pass back the optimal score
  kbp holds the Karlin-Altschul paramters
  L holds an intermediate term for E-value computation
  adjustedDbLength is the adjusted database length used for e-value computation
  minGappedK holds the minimum gapped K for all matrices in the
  database, and is used for e-value computation */


 static int SmithWatermanScore(double **score_matrix,  int queryLength, int dbLength, int gapOpen, int gapextend, int queryEnd, int dbEnd, int **tracebackDir,
int **flagNewGapQuery, int **flagNewGapDb)
{
   int bestScore; /*best score seen so far*/
   int newScore;  /* score of next entry*/
   int bestQueryPos, bestDbPos; /*position ending best score in
                           query and database sequences*/
   int newGapCost; /*cost to have a gap of one character*/
   int gapExtend;
   int prevScoreNoGapQuery; /*score one row and column up
                               with no gaps*/
   int prevScoreGapQuery;   /*score if a gap already started in query*/
   int continueGapScore; /*score for continuing a gap in dbSequence*/
   int queryPos, dbPos; /*positions in query and dbSequence*/
/*   Nlm_FloatHi returnEvalue; /*e-value to return*/
   score_Vector scoreVector; /*keeps one row of the Smith-Waterman matrix
                           overwrite old row with new row*/
	int RowScore; /*score for match of two positions*/
	int gapDb2NoGap, gapQuery2NoGap, noGap2NoGap, score2NoGap;
	
	
	
/********************** Introduce arrays and variables:

int *fV_RepeatOpenGapQuery[queryPos] -- the current row of flagRepeatOpenGapQuery;

int *fV_RepeatOpenGapDb[queryPos] -- the current row of flagRepeatOpenGapDb;

int *fV_RepeatOpenGapDb1[queryPos] -- previous row of flagRepeatOpenGapDb, overwrite with the new row after
the SW matrix row is passed;

int *fV_RepeatOpenGapDb2[queryPos] -- the row of flagRepeatOpenGapDb 2 positions higher, overwrite with the
fV_RepeatOpenGapQuery1 after the SW matrix row is passed;

int *fV_RepeatOpenGapQuery1[queryPos] -- previous row of flagRepeatOpenGapQuery, overwrite with the new row after   
the SW matrix row is passed;

int *fV_RepeatOpenGapQuery2[queryPos] -- the row of flagRepeatOpenGapQuery 2 positions higher, overwrite with the
fV_RepeatOpenGapQuery1 after the SW matrix row is passed;

int  *fV_QInClosestNewGapDb, *fV_DbInClosestNewGapQuery - current rows of the flags equal to the flags in the closest starting points of new gaps in Db (closest at the vertical queryPos = const) and Query (closest at the horizontal dbPos = const). They are used to calculate flagRepeatOpenGapQuery and flagRepeatOpenGapDb, respectively.

int  *fV_QInClosestNewGapDb1 - previous row of flagInClosestNewGapDb, overwrite with the new flagInClosestNewGapDb after the SW matrix row is passed;

int *fV_DbInClosestNewGapQuery1 -- previous row of flagInClosestNewGapQuery, overwrite with the new flagInClosestNewGapDb after the SW matrix row is passed;

??? Some of arrays fV_InClosestNewGap... can be probably replaced by some variables, since we use only positions (-1,-1), (0,-1), (-1,0) and (0,0), or we can use the scheme analogous to scoreVector.nogap and scoreVector.gapExist.

*****************************/
int **fV_QInClosestNewGapDb, **fV_DbInClosestNewGapQuery;
int flagRepeatOpenGapDb, flagRepeatOpenGapQuery;

   scoreVector.noGap = ivector (1,queryLength);
   scoreVector.gapExists = ivector (1,queryLength);
   
   fV_RepeatOpenGapQuery = imatrix (0,queryLength,0, dbLength);
   fV_RepeatOpenGapDb = imatrix (0,queryLength,0, dbLength);
   fV_QInClosestNewGapDb = imatrix (0,queryLength,0, dbLength);
   fV_DbInClosestNewGapQuery = imatrix (0,queryLength,0, dbLength);
   
   fV_DbInClosestNewGapDb = imatrix (0,queryLength,0, dbLength);
   fV_QInClosestNewGapQuery = imatrix (0,queryLength,0, dbLength);
  
   bestQueryPos = 0;
   bestDbPos = 0;
   bestScore = 0;
   
   for (queryPos = 1; queryPos <= queryLength; queryPos++) {
     scoreVector.noGap[queryPos] = 0;
     scoreVector.gapExists[queryPos] = -(gapOpen);
	}
   
   for (queryPos = 0; queryPos <= queryLength; queryPos++) {
	
	fV_RepeatOpenGapDb[queryPos][0] = 0;
	fV_RepeatOpenGapQuery[queryPos][0] = 0;
	
	fV_QInClosestNewGapDb[queryPos][0] = 0;
	fV_DbInClosestNewGapQuery[queryPos][0] = 0;
	fV_DbInClosestNewGapDb[queryPos][0] = 0;
	fV_QInClosestNewGapQuery[queryPos][0] = 0;
	
	fV_QInClosestNewGapDb[queryPos][1] = 0;
	fV_DbInClosestNewGapQuery[queryPos][1] = 0;
	fV_DbInClosestNewGapDb[queryPos][1] = 0;
	fV_QInClosestNewGapQuery[queryPos][1] = 0;
   }

   for(dbPos = 1; dbPos <= dbLength; dbPos++) {  

     newScore = 0;
     noGap2NoGap = 0;
     prevScoreGapQuery = -(gapOpen);
     
        fV_RepeatOpenGapDb[0][dbPos]= 0;
	fV_RepeatOpenGapQuery[0][dbPos]= 0;
	
	fV_QInClosestNewGapDb[0][dbPos]= 0;
	fV_DbInClosestNewGapQuery[0][dbPos]= 0;
	fV_DbInClosestNewGapDb[0][dbPos]= 0;
	fV_QInClosestNewGapQuery[0][dbPos]= 0;
	        
     for(queryPos = 1; queryPos <= queryLength; queryPos++) {
	
		flagNewGapQuery[queryPos][dbPos] = 0;
		flagNewGapDb[queryPos][dbPos] = 0;

/*** Check if we are in the gapped region of query and no gaps in db were opened against this region before;
	if TRUE eliminate gapOpen penalty; if this is the first position of the gapped region, reward the extending 
	previous gap by compensating gapOpen in gapExtend ***/

	gapExtend = rint(g_e2(queryPos,b));
	
	flagRepeatOpenGapDb = fV_RepeatOpenGapDb[queryPos-1][dbPos];
	
	if (maskgapRegion1[queryPos]==1 && flagRepeatOpenGapDb==0) {newGapCost = gapExtend;}
	 else {	newGapCost = gapOpen + gapExtend; }
	
	if (maskgapRegion1[queryPos-1]==0 && maskgapRegion1[queryPos]==1 && fV_DbInClosestNewGapDb[queryPos-1][dbPos]==0) {
			gapExtend -= gapOpen;
	}  
       
       /*testing scores with a gap in DB, either starting a new
         gap or extending an existing gap*/
       
       if ((newScore = newScore - newGapCost) >
	   (prevScoreGapQuery = prevScoreGapQuery - gapExtend)) {
         	prevScoreGapQuery = newScore;
         	flagNewGapQuery[queryPos][dbPos] = 1;
         }
  

/*** Check if we are in the gapped region of Db and no gaps were opened in query against this region before;
	if TRUE, eliminate gapOpen penalty ***/ 
         gapExtend = rint(g_e1(dbPos,b));
	
	flagRepeatOpenGapQuery = fV_RepeatOpenGapQuery[queryPos][dbPos-1];
	
	if (maskgapRegion2[dbPos]==1 && flagRepeatOpenGapQuery==0) {newGapCost = gapExtend;}
	else {newGapCost = gapOpen + gapExtend;}

/***	if this is the first position of the gapped region
 reward the extending previous gap by compensating gapOpen in gapExtend ***/
	if (maskgapRegion2[dbPos]==1 && maskgapRegion2[dbPos-1]==0 && fV_QInClosestNewGapQuery[queryPos][dbPos-1]==0) {
			gapExtend -= gapOpen;
		}
	     
       /*testing scores with a gap in Query, either starting a new
         gap or extending an existing gap*/
         
       if ((newScore = scoreVector.noGap[queryPos] - newGapCost) >
           (continueGapScore = scoreVector.gapExists[queryPos] - gapExtend)) {
         continueGapScore = newScore;
         flagNewGapDb[queryPos][dbPos] = 1;
        }
        
       /*compute new score extending one position in query and db*/
          
       RowScore = rint(score_matrix[queryPos][dbPos]*score_scale - sgapfcn(queryPos,dbPos,b)); 
      
       newScore = noGap2NoGap + RowScore;
       
       if (newScore < 0)
       newScore = 0; /*Smith-Waterman locality condition*/
       
/*** Assign direction for traceback: ***/       
       
	if (RowScore>0) {tracebackDir[queryPos][dbPos] = 6;}
	 else {	tracebackDir[queryPos][dbPos] = 5;}

	if (maskgapRegion1[queryPos] == 0) fV_RepeatOpenGapDb[queryPos][dbPos] = 0;
	else fV_RepeatOpenGapDb[queryPos][dbPos] = fV_RepeatOpenGapDb[queryPos-1][dbPos-1];
	
	if (maskgapRegion2[dbPos] == 0) fV_RepeatOpenGapQuery[queryPos][dbPos] = 0;
	else fV_RepeatOpenGapQuery[queryPos][dbPos] = fV_RepeatOpenGapQuery[queryPos-1][dbPos-1];
	
/**** Assign the flags coming from the closest NewGap in Query ******/

	if (flagNewGapDb[queryPos][dbPos] == 1) {
		 fV_DbInClosestNewGapQuery[queryPos][dbPos] = fV_RepeatOpenGapDb[queryPos][dbPos-1];
		 fV_QInClosestNewGapQuery[queryPos][dbPos] = fV_RepeatOpenGapQuery[queryPos][dbPos-1];
	} else {
	        fV_DbInClosestNewGapQuery[queryPos][dbPos] = fV_DbInClosestNewGapQuery[queryPos][dbPos-1];
        	fV_QInClosestNewGapQuery[queryPos][dbPos] = fV_QInClosestNewGapQuery[queryPos][dbPos-1];
        }
        
        if (maskgapRegion2[dbPos]==1) {fV_QInClosestNewGapQuery[queryPos][dbPos] = 1;}
        if (maskgapRegion1[queryPos]==1) {fV_DbInClosestNewGapQuery[queryPos][dbPos] = 1;}
	
/**** Assign the flags coming from the closest NewGap in Db ******/

	if (flagNewGapQuery[queryPos][dbPos] == 1) {
		fV_QInClosestNewGapDb[queryPos][dbPos] = fV_RepeatOpenGapQuery[queryPos-1][dbPos];
		fV_DbInClosestNewGapDb[queryPos][dbPos] = fV_RepeatOpenGapDb[queryPos-1][dbPos];
          } else {
        	fV_QInClosestNewGapDb[queryPos][dbPos] = fV_QInClosestNewGapDb[queryPos-1][dbPos];
        	fV_DbInClosestNewGapDb[queryPos][dbPos] = fV_DbInClosestNewGapDb[queryPos-1][dbPos];
        }

	if (maskgapRegion1[queryPos]==1) {fV_DbInClosestNewGapDb[queryPos][dbPos] = 1;}
	if (maskgapRegion2[dbPos]==1) {fV_QInClosestNewGapDb[queryPos][dbPos] = 1;}

	
       /*test two alternatives*/

/*** Gap in Db ***/

   if (newScore < prevScoreGapQuery) {
         newScore = prevScoreGapQuery;

/**** Determine tracebackDir pointer and the flags fV_RepeatOpenGapQuery and fV_RepeatOpenGapDb ******/

         if (flagNewGapQuery[queryPos][dbPos] == 1) { tracebackDir[queryPos][dbPos] = 1;}
          else {tracebackDir[queryPos][dbPos] = 2;}

	fV_RepeatOpenGapQuery[queryPos][dbPos] = fV_QInClosestNewGapDb[queryPos][dbPos];
	fV_RepeatOpenGapDb[queryPos][dbPos] = fV_DbInClosestNewGapDb[queryPos][dbPos];
	
   }

/*** Gap in Query ***/

   if (newScore < continueGapScore) {
         newScore = continueGapScore;
        
/**** Determine tracebackDir pointer and the flags fV_RepeatOpenGapQuery and fV_RepeatOpenGapDb ******/   	
               
         if (flagNewGapDb[queryPos][dbPos] == 1) {tracebackDir[queryPos][dbPos] = 3;}
         else {tracebackDir[queryPos][dbPos] = 4;}

	fV_RepeatOpenGapDb[queryPos][dbPos] = fV_DbInClosestNewGapQuery[queryPos][dbPos];
	fV_RepeatOpenGapQuery[queryPos][dbPos] = fV_QInClosestNewGapQuery[queryPos][dbPos];

       }  

       noGap2NoGap = scoreVector.noGap[queryPos]; 
       scoreVector.noGap[queryPos] = newScore;
       scoreVector.gapExists[queryPos] = continueGapScore;

       if (newScore > bestScore) {
         bestScore = newScore;
         bestDbPos = dbPos;
         bestQueryPos = queryPos;
       }

	fprintf(stderr,"");
    }

 }

	fprintf(stderr,".");

   if (bestScore < 0)
     bestScore = 0;
   End1 = bestQueryPos;
   End2 = bestDbPos;

   return (bestScore);
}


/*computes where optimal Smith-Waterman local alignment starts given the
  ending positions
  query is the query sequence
  queryLength is the length of query in amino acids
  dbSequence is the sequence corresponding to some matrix profile
  dbLength is the length of dbSequnece
  matrix is the position-specific matrix associated with dbSequence
  gapOpen is the cost of opening a gap
  gapExtend is the cost of extending an exisiting gap by 1 position
  queryEnd is the final position in the query of an optimal
   local alignment
  dbEnd is the final position in dbSequence of an optimal
   local alignment
  queryEnd and dbEnd can be used to run the local alignment in reverse
   to find optimal starting positions
  these are passed back in queryStart and dbStart
  the optimal score is passed in to check when it has
   been reached going backwards
  the score is also returned
  */

	static int SmithWatermanFindStart( double **score_matrix, int
queryLength, int dbLength, int gapOpen, int gapExtend,int queryEnd, int dbEnd, int score, int queryStart, int dbStart)
{

   int bestScore; /*best score seen so far*/
   int newScore;  /* score of next entry*/
   int bestQueryPos, bestDbPos; /*position starting best score in
                           query and database sequences*/
   int newGapCost; /*cost to have a gap of one character*/
   int prevScoreNoGapQuery; /*score one row and column up
                               with no gaps*/
   int prevScoreGapQuery;   /*score if a gap already started in query*/
   int continueGapScore; /*score for continuing a gap in dbSequence*/
   int queryPos, dbPos; /*positions in query and dbSequence*/

   score_Vector scoreVector; /*keeps one row of the Smith-Waterman matrix
                           overwrite old row with new row*/
   
   int flagNewGapQuery_Rev, flagNewGapDb_Rev;


/********************** Introduce arrays and variables:

int *fV_RepeatOpenGapQuery[queryPos] -- the current row of flagRepeatOpenGapQuery;

int *fV_RepeatOpenGapDb[queryPos] -- the current row of flagRepeatOpenGapDb;

int *fV_RepeatOpenGapDb1[queryPos] -- previous row of flagRepeatOpenGapDb, overwrite with the new row after
the SW matrix row is passed;

int *fV_RepeatOpenGapDb2[queryPos] -- the row of flagRepeatOpenGapDb 2 positions higher, overwrite with the
fV_RepeatOpenGapQuery1 after the SW matrix row is passed;

int *fV_RepeatOpenGapQuery1[queryPos] -- previous row of flagRepeatOpenGapQuery, overwrite with the new row after   
the SW matrix row is passed;

int *fV_RepeatOpenGapQuery2[queryPos] -- the row of flagRepeatOpenGapQuery 2 positions higher, overwrite with the
fV_RepeatOpenGapQuery1 after the SW matrix row is passed;

int  *fV_QInClosestNewGapDb, *fV_DbInClosestNewGapQuery - current rows of the flags equal to the flags in the closest starting points of new gaps in Db (closest at the vertical queryPos = const) and Query (closest at the horizontal dbPos = const). They are used to calculate flagRepeatOpenGapQuery and flagRepeatOpenGapDb, respectively.

int  *fV_QInClosestNewGapDb1 - previous row of flagInClosestNewGapDb, overwrite with the new flagInClosestNewGapDb after the SW matrix row is passed;

int *fV_DbInClosestNewGapQuery1 -- previous row of flagInClosestNewGapQuery, overwrite with the new flagInClosestNewGapDb after the SW matrix row is passed;

??? Some of arrays fV_InClosestNewGap... can be probably replaced by some variables, since we use only positions (-1,-1), (0,-1), (-1,0) and (0,0), or we can use the scheme analogous to scoreVector.nogap and scoreVector.gapExist.

*****************************/
int *fV_RepeatOpenGapQuery, *fV_RepeatOpenGapDb, *fV_RepeatOpenGapDb1, *fV_RepeatOpenGapDb2, *fV_RepeatOpenGapQuery1, *fV_RepeatOpenGapQuery2; 
int *fV_QInClosestNewGapDb, *fV_DbInClosestNewGapQuery, *fV_QInClosestNewGapDb1 , *fV_DbInClosestNewGapQuery1;

int *fV_DbInClosestNewGapDb, *fV_QInClosestNewGapQuery, *fV_DbInClosestNewGapDb1 , *fV_QInClosestNewGapQuery1;

int flagRepeatOpenGapDb, flagRepeatOpenGapQuery;


   scoreVector.noGap = ivector (1,queryLength);
   scoreVector.gapExists = ivector (1,queryLength);
   
   fV_RepeatOpenGapQuery = ivector (1,queryLength+1);
   fV_RepeatOpenGapDb = ivector (1,queryLength+1);
   fV_RepeatOpenGapDb1 = ivector (1,queryLength+1);
   fV_RepeatOpenGapDb2 = ivector (1,queryLength+1);
   fV_RepeatOpenGapQuery1 = ivector (1,queryLength+1);
   fV_RepeatOpenGapQuery2 = ivector (1,queryLength+1);
   fV_QInClosestNewGapDb = ivector (1,queryLength+1);
   fV_QInClosestNewGapDb1 = ivector (1,queryLength+1);
   fV_DbInClosestNewGapQuery = ivector (1,queryLength+1);
   fV_DbInClosestNewGapQuery1 = ivector (1,queryLength+1);

   fV_DbInClosestNewGapDb = ivector (1,queryLength+1);
   fV_DbInClosestNewGapDb1 = ivector (1,queryLength+1);
   fV_QInClosestNewGapQuery = ivector (1,queryLength+1);
   fV_QInClosestNewGapQuery1 = ivector (1,queryLength+1);
  

   bestQueryPos = 0;
   bestDbPos = 0;
   bestScore = 0;

   for (queryPos = 1; queryPos <= queryLength; queryPos++) {
     scoreVector.noGap[queryPos] = 0;
     scoreVector.gapExists[queryPos] = -(gapOpen);
   }
   
      for (queryPos = 1; queryPos <= queryLength+1; queryPos++) {
	fV_RepeatOpenGapDb[queryPos] = 0;
	fV_RepeatOpenGapQuery[queryPos] = 0;
	
	fV_RepeatOpenGapDb1[queryPos] = 0;
   	fV_RepeatOpenGapDb2[queryPos] = 0;
	fV_RepeatOpenGapQuery1[queryPos] = 0;
	fV_RepeatOpenGapQuery2[queryPos] = 0;
	
	fV_QInClosestNewGapDb1[queryPos] = 0;
	fV_DbInClosestNewGapQuery1[queryPos] = 0;
	fV_DbInClosestNewGapDb1[queryPos] = 0;
	fV_QInClosestNewGapQuery1[queryPos] = 0;
	
	fV_QInClosestNewGapDb[queryPos] = 0;
	fV_DbInClosestNewGapQuery[queryPos] = 0;
	fV_DbInClosestNewGapDb[queryPos] = 0;
	fV_QInClosestNewGapQuery[queryPos] = 0;

   }

   for(dbPos = dbEnd; dbPos >= 1; dbPos--) {  
     
     newScore = 0;
     prevScoreNoGapQuery = 0;
     prevScoreGapQuery = -(gapOpen);

     for(queryPos = queryEnd; queryPos >= 1; queryPos--) { 
	flagNewGapQuery_Rev = 0;
	flagNewGapDb_Rev = 0;
	
/*** Check if we are in the gapped region of query and no gaps were opened against this region before;
	if TRUE, eliminate gapOpen penalty;
	if this is the first position of the gapped region, reward the extending 
	previous gap by compensating gapOpen in gapExtend ***/

	gapExtend = rint(g_e2(queryPos,b));
	
	flagRepeatOpenGapDb = fV_RepeatOpenGapDb[queryPos+1];
	
	if (maskgapRegion1[queryPos]==1 && flagRepeatOpenGapDb==0) {newGapCost = gapExtend;}
	 else {newGapCost = gapOpen + gapExtend;}
	
	if (maskgapRegion1[queryPos+1]==0 && maskgapRegion1[queryPos]==1 && fV_DbInClosestNewGapDb[queryPos+1]==0) {
			gapExtend -= gapOpen;
	}  

       /*testing scores with a gap in DB, either starting a new
         gap or extending an existing gap*/

      if ((newScore = newScore - newGapCost) > 
	   (prevScoreGapQuery = prevScoreGapQuery - gapExtend)) {
         prevScoreGapQuery = newScore;
         flagNewGapQuery_Rev = 1;
       }
       
         	 
/*** Check if we are in the gapped region of Db and no gaps were opened against this region before;
	if TRUE, eliminate gapOpen penalty; 
	if this is the first position of the gapped region, reward the extending 
	previous gap by compensating gapOpen in gapExtend ***/
        gapExtend = rint(g_e1(dbPos,b));

	flagRepeatOpenGapQuery = fV_RepeatOpenGapQuery1[queryPos];
	
	if (maskgapRegion2[dbPos]==1 && flagRepeatOpenGapQuery==0) {newGapCost = gapExtend;}
	 else {	newGapCost = gapOpen + gapExtend;}

	if (maskgapRegion2[dbPos]==1 && maskgapRegion2[dbPos+1]==0 && fV_QInClosestNewGapQuery1[queryPos]==0) {
			gapExtend -= gapOpen;
		}

       /*testing scores with a gap in Query, either starting a new
         gap or extending an existing gap*/ 

       if ((newScore = scoreVector.noGap[queryPos] - newGapCost) >
           (continueGapScore = scoreVector.gapExists[queryPos] -gapExtend)) {
         continueGapScore = newScore;
         flagNewGapDb_Rev = 1;
        }
        
       /*compute new score extending one position in query and dbSequence*/
       
       newScore = prevScoreNoGapQuery + rint(score_matrix[queryPos][dbPos]*score_scale - sgapfcn(queryPos, dbPos, b));

       if (newScore < 0)
       newScore = 0; /*Smith-Waterman locality condition*/
           
	if (maskgapRegion1[queryPos] == 0) fV_RepeatOpenGapDb[queryPos] = 0;
	else fV_RepeatOpenGapDb[queryPos] = fV_RepeatOpenGapDb1[queryPos+1];
	
	if (maskgapRegion2[dbPos] == 0) fV_RepeatOpenGapQuery[queryPos] = 0;
	else fV_RepeatOpenGapQuery[queryPos] = fV_RepeatOpenGapQuery1[queryPos+1];
	
/**** Assign the flags coming from the closest NewGap in Query ******/

	if (flagNewGapDb_Rev == 1) {	
		fV_DbInClosestNewGapQuery[queryPos] = fV_RepeatOpenGapDb1[queryPos];
		fV_QInClosestNewGapQuery[queryPos] = fV_RepeatOpenGapQuery1[queryPos];
	} else {
		fV_DbInClosestNewGapQuery[queryPos] = fV_DbInClosestNewGapQuery1[queryPos];
		fV_QInClosestNewGapQuery[queryPos] = fV_QInClosestNewGapQuery1[queryPos];
	}
	
	if (maskgapRegion2[dbPos]==1) {fV_QInClosestNewGapQuery[queryPos] = 1;}
        if (maskgapRegion1[queryPos]==1) {fV_DbInClosestNewGapQuery[queryPos] = 1;}
	
	
/**** Assign the flags coming from the closest NewGap in Db ******/

	if (flagNewGapQuery_Rev == 1) {
		fV_QInClosestNewGapDb[queryPos] = fV_RepeatOpenGapQuery[queryPos+1];
		fV_DbInClosestNewGapDb[queryPos] = fV_RepeatOpenGapDb[queryPos+1];
	} else {
		fV_QInClosestNewGapDb[queryPos] = fV_QInClosestNewGapDb[queryPos+1];
		fV_DbInClosestNewGapDb[queryPos] = fV_DbInClosestNewGapDb[queryPos+1];
	}
	
	if (maskgapRegion1[queryPos]==1) {fV_DbInClosestNewGapDb[queryPos] = 1;}
	if (maskgapRegion2[dbPos]==1) {fV_QInClosestNewGapDb[queryPos] = 1;}
     
       /*test two alternatives*/
       if (newScore < prevScoreGapQuery) {
         newScore = prevScoreGapQuery;
	fV_RepeatOpenGapQuery[queryPos] = fV_QInClosestNewGapDb[queryPos];
	fV_RepeatOpenGapDb[queryPos] = fV_DbInClosestNewGapDb[queryPos];
       }
       
       
       if (newScore < continueGapScore) {
         newScore = continueGapScore;
	fV_RepeatOpenGapDb[queryPos] = fV_DbInClosestNewGapQuery[queryPos];
	fV_RepeatOpenGapQuery[queryPos] = fV_QInClosestNewGapQuery[queryPos];
       } 
        
       prevScoreNoGapQuery = scoreVector.noGap[queryPos]; 
       scoreVector.noGap[queryPos]= newScore;
       scoreVector.gapExists[queryPos] = continueGapScore;
       
       if (newScore > bestScore) {
         bestScore = newScore;
         bestDbPos = dbPos;
         bestQueryPos = queryPos;
       }
     
       if (bestScore >= score) break;

     }
     if (bestScore >= score)  break;
       
/*Reassignments in "the array stack" of flags for previous rows of SW matrix */
 	
	for (queryPos=1; queryPos<=queryLength; queryPos++) {
		fV_RepeatOpenGapQuery2[queryPos] = fV_RepeatOpenGapQuery1[queryPos];
		fV_RepeatOpenGapQuery1[queryPos] = fV_RepeatOpenGapQuery[queryPos];
		fV_RepeatOpenGapDb2[queryPos] = fV_RepeatOpenGapDb1[queryPos];
		fV_RepeatOpenGapDb1[queryPos] = fV_RepeatOpenGapDb[queryPos];
		
		fV_QInClosestNewGapDb1[queryPos] = fV_QInClosestNewGapDb[queryPos];
		fV_DbInClosestNewGapQuery1[queryPos] = fV_DbInClosestNewGapQuery[queryPos];
		
		fV_DbInClosestNewGapDb1[queryPos] = fV_DbInClosestNewGapDb[queryPos];
		fV_QInClosestNewGapQuery1[queryPos] = fV_QInClosestNewGapQuery[queryPos];		
	}

   } 

   free(scoreVector.noGap);
   free(scoreVector.gapExists);
   
   if (bestScore < 0)
     bestScore = 0;
  
   Start1 = bestQueryPos;
   Start2 = bestDbPos; 
	
   scoreGivenEnd = bestScore;	

   return(bestScore); 
}

/* Traces back the best alignment path using tracebackDir[][], flagNewGapDb[][] and flagNewGapQuery[][];
output is the set of arrays: aligned portions of the aseq... arrays, with gaps inserted,
scores for each position in alignment,
flags for positive matches,
positions in the initial alignments that are aligned
*/  

	void **traceback_outputPos(int start_ali1, int start_ali2, int end_ali1, int end_ali2, int **tracebackDir, int **flagNewGapQuery, int **flagNewGapDb, int *apos1, int *apos2)
{
	int pos1, pos2, posGapped, dir, i, j;
	char **aseqGapTrInt1, **aseqGapTrInt2;
	int *positiveInt, *apos1Int, *apos2Int;
	int **col_scoreInt;
	int gapOpen, gapExtend, newGapCost, colScore, d0, d1, d2, d3;
	int sctrl;
	int ascore[10];
	int jnogp1, jnogp2;
	
	int flagRepeatOpenGap1, flagRepeatOpenGap2;	

	positiveInt = ivector(0,alilen_mat1+alilen_mat2);

	apos1Int = ivector(0,alilen_mat1+alilen_mat2);
	apos2Int = ivector(0,alilen_mat1+alilen_mat2);

	col_scoreInt = imatrix(0,alilen_mat1+alilen_mat2, 0,9);
	
	gapOpen = gap_open;

	sctrl = 0;
	segment_len = 0;
	flagRepeatOpenGap1 = 0;
	flagRepeatOpenGap2 = 0;
	pos1 = end_ali1;
	pos2 = end_ali2;
	posGapped = alilen_mat1+alilen_mat2;


/*** TraceBackDir: 6 - positive match; 5 - non-positive match; 
1 - previousScoreGapQuery wins, the gap is new; 2- previousScoreGapQuery wins, the gap is extended from existing;
3 - continueGapScore wins, the gap is new; 4- continueGapScore wins, the gap is extended from existing;
***/
 	
	do { 
		dir = tracebackDir[pos1][pos2];
		if (dir==3) {
			
			positiveInt[posGapped]=0;

			apos1Int[posGapped] = 0;
			apos2Int[posGapped] = apos_filtr2[pos2];
						
			gapExtend = rint(g_e1(pos2,b));
			newGapCost = gapOpen + gapExtend;
			colScore = -newGapCost;
		        if (maskgapRegion2[pos2]==1 && flagRepeatOpenGap1==0) {
				colScore += gapOpen;
				flagRepeatOpenGap1 = 1;  
		        }
			sctrl += colScore;
			if (sctrl < 0) sctrl = 0;
	
			ScoreOverColumn (colScore, fV_QInClosestNewGapQuery[pos1][pos2], fV_DbInClosestNewGapDb[pos1][pos2], fV_RepeatOpenGapDb[pos1][pos2], fV_RepeatOpenGapQuery[pos1][pos2], flagRepeatOpenGap1, flagRepeatOpenGap2, ascore);
			for (i=0; i<=9; i++) 	col_scoreInt[posGapped][i] = ascore[i];
	
			pos2--;
			posGapped--;
			segment_len ++;
			
		}	
		
		if (dir==4) {
			do {
			positiveInt[posGapped]=0;
			
			apos1Int[posGapped] = 0;
			apos2Int[posGapped] = apos_filtr2[pos2];
			
		        gapExtend = rint(g_e1(pos2,b));
		        colScore = -gapExtend;
		        if (maskgapRegion2[pos2]==1 && maskgapRegion2[pos2-1]==0 && flagRepeatOpenGap1 == 0) {
		                colScore += gapOpen;    
		                flagRepeatOpenGap1 = 1;
		        }
         
			sctrl += colScore;
			if (sctrl < 0) sctrl = 0;			

			ScoreOverColumn (colScore, fV_QInClosestNewGapQuery[pos1][pos2], fV_DbInClosestNewGapDb[pos1][pos2], fV_RepeatOpenGapDb[pos1][pos2], fV_RepeatOpenGapQuery[pos1][pos2], flagRepeatOpenGap1, flagRepeatOpenGap2, ascore);
			for (i=0; i<=9; i++) 	col_scoreInt[posGapped][i] = ascore[i];
	
			pos2--;
			posGapped--;
			segment_len ++;
			
			} while (flagNewGapDb[pos1][pos2]!= 1);
	
			positiveInt[posGapped]=0;
			
			apos1Int[posGapped] = 0;
			apos2Int[posGapped] = apos_filtr2[pos2];			
                        
		        gapExtend = rint(g_e1(pos2,b));
		        newGapCost = gapOpen + gapExtend;
		        colScore = -newGapCost;
		        if (maskgapRegion2[pos2]==1 && flagRepeatOpenGap1==0) {
		                colScore += gapOpen;    
		                flagRepeatOpenGap1 = 1;
		        }
		         
			sctrl += colScore;
			if (sctrl < 0) sctrl = 0;			
		
			ScoreOverColumn (colScore, fV_QInClosestNewGapQuery[pos1][pos2], fV_DbInClosestNewGapDb[pos1][pos2], fV_RepeatOpenGapDb[pos1][pos2], fV_RepeatOpenGapQuery[pos1][pos2], flagRepeatOpenGap1, flagRepeatOpenGap2, ascore);
			for (i=0; i<=9; i++) 	col_scoreInt[posGapped][i] = ascore[i];
				        
			pos2--;
			posGapped--;
			segment_len ++;
		
		}	
	
		if (dir==1) {
			
			positiveInt[posGapped]=0;

			apos1Int[posGapped] = apos_filtr1[pos1];
			apos2Int[posGapped] = 0;			

		        gapExtend = rint(g_e2(pos1,b));
		        newGapCost = gapOpen + gapExtend;
		        colScore = -newGapCost;
		        if (maskgapRegion1[pos1]==1 && flagRepeatOpenGap2==0) {
		                colScore += gapOpen;    
		                flagRepeatOpenGap2 = 1;
		        }
         
			sctrl += colScore;
			if (sctrl < 0) sctrl = 0;			
		
			ScoreOverColumn (colScore, fV_QInClosestNewGapQuery[pos1][pos2], fV_DbInClosestNewGapDb[pos1][pos2], fV_RepeatOpenGapDb[pos1][pos2], fV_RepeatOpenGapQuery[pos1][pos2], flagRepeatOpenGap1, flagRepeatOpenGap2, ascore);
			for (i=0; i<=9; i++) 	col_scoreInt[posGapped][i] = ascore[i];
                                               			
			pos1--;
			posGapped--;
			segment_len ++;
		}
		
		if (dir==2) {
		do {
							
			positiveInt[posGapped]=0;

			apos1Int[posGapped] = apos_filtr1[pos1];
			apos2Int[posGapped] = 0;
							
		        gapExtend = rint(g_e2(pos1,b));   
		        colScore = -gapExtend;
		        if (maskgapRegion1[pos1]==1 && maskgapRegion1[pos1-1]==0 && flagRepeatOpenGap2 == 0) {
		                colScore += gapOpen;
		                flagRepeatOpenGap2 = 1;
		        }
		
			sctrl += colScore;
			if (sctrl < 0) sctrl = 0;			

			ScoreOverColumn (colScore, fV_QInClosestNewGapQuery[pos1][pos2], fV_DbInClosestNewGapDb[pos1][pos2], fV_RepeatOpenGapDb[pos1][pos2], fV_RepeatOpenGapQuery[pos1][pos2], flagRepeatOpenGap1, flagRepeatOpenGap2, ascore);
			for (i=0; i<=9; i++) 	col_scoreInt[posGapped][i] = ascore[i];
               			
			pos1--;
			posGapped--;
			segment_len ++;
					
		} while (flagNewGapQuery[pos1][pos2] != 1 && sctrl<score);
		
			positiveInt[posGapped]=0;
			
			apos1Int[posGapped] = apos_filtr1[pos1];
			apos2Int[posGapped] = 0;			
                        
		        gapExtend = rint(g_e2(pos1,b));
		        newGapCost = gapOpen + gapExtend;
		        colScore = -newGapCost;
		        if (maskgapRegion1[pos1]==1 && flagRepeatOpenGap2==0) {
		                colScore += gapOpen;
		                flagRepeatOpenGap2 = 1;
		        }
                        
			sctrl += colScore;
			if (sctrl < 0) sctrl = 0;			

			ScoreOverColumn (colScore, fV_QInClosestNewGapQuery[pos1][pos2], fV_DbInClosestNewGapDb[pos1][pos2], fV_RepeatOpenGapDb[pos1][pos2], fV_RepeatOpenGapQuery[pos1][pos2], flagRepeatOpenGap1, flagRepeatOpenGap2, ascore);
			for (i=0; i<=9; i++) 	col_scoreInt[posGapped][i] = ascore[i];
	              
			pos1--;
			posGapped--;
			segment_len ++;

		}		
	
		if (dir==5) {
			
			positiveInt[posGapped]=0;
			
			apos1Int[posGapped] = apos_filtr1[pos1];
			apos2Int[posGapped] = apos_filtr2[pos2];
			
			colScore = rint(score_matrix[pos1][pos2]*score_scale - sgapfcn(pos1,pos2,b));

			sctrl += colScore;
			if (sctrl < 0) sctrl = 0;			

		        if (maskgapRegion2[pos2] == 0) flagRepeatOpenGap1 = 0;
		        if (maskgapRegion1[pos1] == 0) flagRepeatOpenGap2 = 0;

			pos2--;
			pos1--;
			posGapped--;
			
			segment_len++;
		}
		
		if (dir==6) {
			
			positiveInt[posGapped]=1;
			
			apos1Int[posGapped] = apos_filtr1[pos1];
			apos2Int[posGapped] = apos_filtr2[pos2];
			
			colScore = rint(score_matrix[pos1][pos2]*score_scale - sgapfcn(pos1,pos2,b));

			sctrl += colScore;
			if (sctrl < 0) sctrl = 0;			
		
			ScoreOverColumn (colScore, fV_QInClosestNewGapQuery[pos1][pos2], fV_DbInClosestNewGapDb[pos1][pos2], fV_RepeatOpenGapDb[pos1][pos2], fV_RepeatOpenGapQuery[pos1][pos2], flagRepeatOpenGap1, flagRepeatOpenGap2, ascore);
			for (i=0; i<=9; i++) 	col_scoreInt[posGapped][i] = ascore[i];
			              	
		        if (maskgapRegion2[pos2] == 0) flagRepeatOpenGap1 = 0;
		        if (maskgapRegion1[pos1] == 0) flagRepeatOpenGap2 = 0;

			pos2--;
			pos1--;
			posGapped--;
			
			segment_len++;
		}
		

/***	} while ((pos1>=start_ali1) && (pos2>=start_ali2)); ***/
	} while (sctrl<score && pos1>0 && pos2>0); 
	
	posGp = posGapped+1;
	jnogp1 = jnogp2 = 1;
	for (j=posGp;j<posGp+segment_len;j++) {
		positive[j-posGp+1] = positiveInt[j];
		
		apos1[j-posGp+1] = apos1Int[j];
		apos2[j-posGp+1] = apos2Int[j];

		for (i=0;i<=9; i++) col_score[i][j-posGp+1] = col_scoreInt[j][i];
	}

	free_ivector(positiveInt, 0,alilen_mat1+alilen_mat2);
	free_imatrix(col_scoreInt, 0,alilen_mat1+alilen_mat2, 0,9);
	free_ivector(apos1Int, 0,alilen_mat1+alilen_mat2);
	free_ivector(apos2Int, 0,alilen_mat1+alilen_mat2);

}

double ScoreForTwoRows_smat3_21(int pos1, int pos2)
{
	int i, k1, k2;
	double s;
	double ngap1, ngap2, g1, g2;
	double comp1, comp2;
	double s1,s2;
	s1=s2=0.0;

	comp1 = sum_eff_let1[pos1]-1.0;
	comp2 = sum_eff_let2[pos2]-1.0;
	
	if (abs(comp1)<1e-5 && abs(comp2)<1e-5) {		
		k1=k2=1;
		for (i=1;i<=20;i++) {
			if (n_effAa1[pos1][i]!= 0.0) k1=i;
			if (n_effAa2[pos2][i]!= 0.0) k2=i;
		}

		s = smatrix[k1][k2];
	} else {
		for (i=1;i<=20;i++) {

		s1 +=  n_effAa1[pos1][i]*log(pseudoCnt2[pos2][i]/p_rbnsn[i-1]);
		s2 +=  n_effAa2[pos2][i]*log(pseudoCnt1[pos1][i]/p_rbnsn[i-1]);
		
		}

/*** Do normalization of s: ***/
		s = s1*(sum_eff_let2[pos2]-1)/sum_eff_let1[pos1] + s2*(sum_eff_let1[pos1]-1)/sum_eff_let2[pos2];
		s= s/(sum_eff_let1[pos1]+sum_eff_let2[pos2]-2);   

	}
	s = s/lambda_u;
	
	return s;
}

double ScoreForTwoRows_smat3_22(int pos1, int pos2)
{
	int i, k1, k2;
	double s;
	double ngap1, ngap2, g1, g2;
	double comp1, comp2;
	s=0.0;

	comp1 = sum_eff_let1[pos1]-1.0;
	comp2 = sum_eff_let2[pos2]-1.0;	
	if (abs(comp1)<1e-5 && abs(comp2)<1e-5) {
		k1=k2=1;
		for (i=1;i<=20;i++) {
			if (n_effAa1[pos1][i]!= 0.0) k1=i;
			if (n_effAa2[pos2][i]!= 0.0) k2=i;
		}
		s = smatrix[k1][k2];
	} else {
		for (i=1;i<=20;i++) {
		s+= n_effAa1[pos1][i]*(sum_eff_let2[pos2]-1)/sum_eff_let1[pos1]*log(pseudoCnt2[pos2][i]/p_rbnsn[i-1]) + n_effAa2[pos2][i]*(sum_eff_let1[pos1]-1)/sum_eff_let2[pos2]*log(pseudoCnt1[pos1][i]/p_rbnsn[i-1]);
		}

/*** No normalization of s ***/
	}
	s = s/lambda_u;

	return s;
}

double ScoreForTwoRows_smat3_23(int pos1, int pos2)
{
	int i, k1, k2;
	double s;
	double ngap1, ngap2, g1, g2;
	double comp1, comp2;

	s=0.0;
	comp1 = sum_eff_let1[pos1]-1.0;
	comp2 = sum_eff_let2[pos2]-1.0;	
	if (abs(comp1)<1e-5 && abs(comp2)<1e-5) {
		k1=k2=1;			
		for (i=1;i<=20;i++) {
			if (n_effAa1[pos1][i]!= 0.0) k1=i;
			if (n_effAa2[pos2][i]!= 0.0) k2=i;
		}
		s = smatrix[k1][k2];
		
	} else {
		for (i=1;i<=20;i++) {
/* No division by the opposite sum_eff_let in each of two terms (formula 3_23): */ 
		s+= n_effAa1[pos1][i]*(sum_eff_let2[pos2]-1)*log(pseudoCnt2[pos2][i]/p_rbnsn[i-1]) + n_effAa2[pos2][i]*(sum_eff_let1[pos1]-1)*log(pseudoCnt1[pos1][i]/p_rbnsn[i-1]);
		}

/*** Do normalization of s: ***/
		s= s/(sum_eff_let1[pos1]+sum_eff_let2[pos2]-2);   
	}
	s = s/lambda_u;

	return s;
}


double ScoreForTwoRows_smat3_27(int pos1, int pos2)
{
	int i, k1, k2;
	double s;
	double ngap1, ngap2, g1, g2;
	double comp1, comp2;

	s=0.0;
	comp1 = sum_eff_let1[pos1]-1.0;
	comp2 = sum_eff_let2[pos2]-1.0;	
	if (abs(comp1)<1e-5 && abs(comp2)<1e-5) {
		k1=k2=1;
		for (i=1;i<=20;i++) {
			if (n_effAa1[pos1][i]!= 0.0) k1=i;
			if (n_effAa2[pos2][i]!= 0.0) k2=i;
		}
		s = smatrix[k1][k2];
	} else {
		for (i=1;i<=20;i++) {

/* Simplest scoring formula 3_27 */
		s+= n_effAa1[pos1][i]*log(pseudoCnt2[pos2][i]/p_rbnsn[i-1]) + n_effAa2[pos2][i]*log(pseudoCnt1[pos1][i]/p_rbnsn[i-1]);
		}

/*** No normalization of s ***/
	}
	s = s/lambda_u;

	return s;
}

double ScoreForTwoRows_smat3_28(int pos1, int pos2)
{
	int i, k1, k2;
	double s;
	double ngap1, ngap2, g1, g2;
	double comp1, comp2;
	s=0.0;

	comp1 = sum_eff_let1[pos1]-1.0;
	comp2 = sum_eff_let2[pos2]-1.0;	
	if (abs(comp1)<1e-5 && abs(comp2)<1e-5) {
		k1=k2=1;
		for (i=1;i<=20;i++) {
			if (n_effAa1[pos1][i]!= 0.0) k1=i;
			if (n_effAa2[pos2][i]!= 0.0) k2=i;
		}
		s = smatrix[k1][k2];
	} else {
		for (i=1;i<=20;i++) {
/* NO normalization and no division by the opposite sum_eff_let in each of two terms (formula 3_28): */ 
		s+= n_effAa1[pos1][i]*(sum_eff_let2[pos2]-1)*log(pseudoCnt2[pos2][i]/p_rbnsn[i-1]) + n_effAa2[pos2][i]*(sum_eff_let1[pos1]-1)*log(pseudoCnt1[pos1][i]/p_rbnsn[i-1]);
		}
/*** No normalization of s ***/
	}
	s = s/lambda_u;

	return s;
}

/* version of S_g = 0 */
double Sgap6_smat_off(int pos1, int pos2, double b) 
{
	double sg;
	sg = 0.0;  
	return sg;
}	

/* computes S_g - reduction of col-col score due to gap content */
double Sgap6_smat(int pos1, int pos2, double b) 
{
	double g1, g2, sg;
	g1 = n_effAa1[pos1][0]/(n_effAa1[pos1][0]+sum_eff_let1[pos1]);
	g2 = n_effAa2[pos2][0]/(n_effAa2[pos2][0]+sum_eff_let2[pos2]);
	sg = f*b*((1-g1)*g2 + (1-g2)*g1);  
	return sg;
}	


/* computes gap extension penalty in 1 depending on gap content in 2 */
double GapExtend1(int pos2, double b) 
{
	double ge;
	ge = gap_extend*b*(sum_eff_let2[pos2]/(n_effAa2[pos2][0]+sum_eff_let2[pos2]));
	return ge;
}

/* version not depending on gap content */
double GapExtend1_off(int pos2, double b) 
{
	double ge;
	ge = gap_extend;
	return ge;
}	

/* computes gap extension penalty in 2 depending on gap content in 1 */
double GapExtend2(int pos1, double b) 
{
	double ge;
	ge = gap_extend*b*(sum_eff_let1[pos1]/(n_effAa1[pos1][0]+sum_eff_let1[pos1]));
	return ge;
}		

/* version not depending on gap content */
double GapExtend2_off(int pos1, double b) 
{
	double ge;
	ge = gap_extend;
	return ge;
}

void sort(int n, double arr[])
{
	unsigned long i,ir=n,j,k,l=1;
	int jstack=0,*istack;
	double a,temp;

	istack=ivector(1,NSTACK);
	for (;;) {
		if (ir-l < M) {
			for (j=l+1;j<=ir;j++) {
				a=arr[j];
				for (i=j-1;i>=1;i--) {
					if (arr[i] <= a) break;
					arr[i+1]=arr[i];
				}
				arr[i+1]=a;
			}
			if (jstack == 0) break;
			ir=istack[jstack--];
			l=istack[jstack--];
		} else {
			k=(l+ir) >> 1;
			SWAP(arr[k],arr[l+1])
			if (arr[l+1] > arr[ir]) {
				SWAP(arr[l+1],arr[ir])
			}
			if (arr[l] > arr[ir]) {
				SWAP(arr[l],arr[ir])
			}
			if (arr[l+1] > arr[l]) {
				SWAP(arr[l+1],arr[l])
			}
			i=l+1;
			j=ir;
			a=arr[l];
			for (;;) {
				do i++; while (arr[i] < a);
				do j--; while (arr[j] > a);
				if (j < i) break;
				SWAP(arr[i],arr[j]);
			}
			arr[l]=arr[j];
			arr[j]=a;
			jstack += 2;
			if (jstack > NSTACK) nrerror("NSTACK too small in sort.");
			if (ir-i+1 >= j-l) {
				istack[jstack]=ir;
				istack[jstack-1]=i;
				ir=j-1;
			} else {
				istack[jstack]=j-1;
				istack[jstack-1]=l;
				l=i;
			}
		}
	}
	free (istack);
}

void sort_int(int n, int arr[])
{
	unsigned long i,ir=n,j,k,l=1;
	int jstack=0,*istack;
	int a,temp;

	istack=ivector(1,NSTACK);
	for (;;) {
		if (ir-l < M) {
			for (j=l+1;j<=ir;j++) {
				a=arr[j];
				for (i=j-1;i>=1;i--) {
					if (arr[i] <= a) break;
					arr[i+1]=arr[i];
				}
				arr[i+1]=a;
			}
			if (jstack == 0) break;
			ir=istack[jstack--];
			l=istack[jstack--];
		} else {
			k=(l+ir) >> 1;
			SWAP(arr[k],arr[l+1])
			if (arr[l+1] > arr[ir]) {
				SWAP(arr[l+1],arr[ir])
			}
			if (arr[l] > arr[ir]) {
				SWAP(arr[l],arr[ir])
			}
			if (arr[l+1] > arr[l]) {
				SWAP(arr[l+1],arr[l])
			}
			i=l+1;
			j=ir;
			a=arr[l];
			for (;;) {
				do i++; while (arr[i] < a);
				do j--; while (arr[j] > a);
				if (j < i) break;
				SWAP(arr[i],arr[j]);
			}
			arr[l]=arr[j];
			arr[j]=a;
			jstack += 2;
			if (jstack > NSTACK) nrerror("NSTACK too small in sort.");
			if (ir-i+1 >= j-l) {
				istack[jstack]=ir;
				istack[jstack-1]=i;
				ir=j-1;
			} else {
				istack[jstack]=j-1;
				istack[jstack-1]=l;
				l=i;
			}
		}
	}
	free (istack);
}


int *ScoreOverColumn (int colScore, int flag1, int flag2, int flag3, int flag4, int flag5, int flag6, int *column_score)
{
	int d0, d1, d2, d3;
			if (colScore>=0) d0 = 1;
			else d0 = 0;
			d1 = abs(colScore)/100;
			d2 = abs(colScore)/10 - 10*d1;
			d3 = abs(colScore) - 100*d1 - 10*d2;
			column_score[0] = d0;
			column_score[1] = d1;
			column_score[2] = d2;
			column_score[3] = d3;
                        
                        column_score[4] = flag1;
                        column_score[5] = flag2;
                        column_score[7] = flag3;
                        column_score[6] = flag4;
                                
                        column_score[8] = flag5;
                        column_score[9] = flag6;
	
}	


