* ===========================================================================
*
* File Name:  contacta.c
*
* Author:  Ruslan Sadreyev

* File Description:
*       A program for the calcultaion of LiveBench contact A score for two structures 
*
* --------------------------------------------------------------------------*/


#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <ctype.h>
#include <string.h>
#include <malloc.h>
#include <stddef.h>

#define NR_END 1
#define FREE_ARG char*
#define SQUARE(a) ((a)*(a))
#define MAXSTR   100001
#define JMAX 40                                         
#define SWAP(a,b) temp=(a);(a)=(b);(b)=temp;
#define M 7
#define NSTACK 50

void nrerror(char error_text[]);
char *cvector(long nl, long nh);
int *ivector(long nl, long nh);
double *dvector(long nl, long nh);
char **cmatrix(long nrl, long nrh, long ncl, long nch);
int **imatrix(long nrl, long nrh, long ncl, long nch);
double **dmatrix(long nrl, long nrh, long ncl, long nch);
char **cmatrix(long nrl, long nrh, long ncl, long nch);

void free_ivector(int *v, long nl, long nh);
void free_dvector(double *v, long nl, long nh);
void free_cvector(char *v, long nl, long nh);
void free_dmatrix(double **m, long nrl, long nrh, long ncl, long nch);
void free_imatrix(int **m, long nrl, long nrh, long ncl, long nch);
void free_cmatrix(char **m, long nrl, long nrh, long ncl, long nch);

static void *mymalloc(int size);
char *strsave(char *str);
char *strnsave(char *str, int l);
static char **incbuf(int n, char **was);
static int *incibuf(int n, int *was);

double  **read_aa_dmatrix(FILE *fmat);
void argument();

int am2num(int c);
int am2numBZX(int c);
double lambu20(double **s, double *p, double low, double hi);
double flambu20(double x, double **s, double *p, double *df); 

void err_readali(int err_num);
void readali_or_seq(char *filename);

static void printali_ali_positive_char_allstarts(FILE *fpp, int argc, int chunk, int n1, int n2, int len, int **name1, int **name2, \
int **seqs1, int **seqs2, int *start1, int *start2, int *positive);

int **ali_char2int(char **aseq,int start_num, int start_seq);
int **read_alignment2int(char *filename,int start_num,int start_seq);

int *mark_redundant_seqs(int **ali, int len, int n, double id);
void **neffsForEachCol_maskGapReg(int **ali, int n, int len, double effgapmax, double effgapRegionMin, double **n_effAa, double *sum_eff_let, int *maskgapRegion, int *apos_filtr, int *len_lowgaps, double *nef);
double effective_number_nogaps(int **ali, int *marks, int n, int start, int end);
void **pssm(double **matrix, double n_eff, int len, double **lnqp);
int *qtarget2cons_maxq(int *apos_filtr, int len, double **lnqp, int alilen_mat);

double **score_matrix, *score_matrix_srt;
int **score_imatrix;
double lambda_al, score_scale;
double transform_a, transform_b;


double **calc_prof_scores(double **nAa1, double **nAa2, double *sumlet1, double *sumlet2, double **sterm1, double **sterm2, int len1, int len2); 
double **calc_seq_scores_nef(double **n_effAa1, double **n_effAa2, int len1, int len2); 
int **transform_colscores(double **score_matrix, int len1, int len2, double lamb_u, double blen0, int *rescaled); 
double variance(double *score_matrix_srt, int size);
int *bin_count(double *score_matrix_srt, int size, double *binend, int nbins, double blen0); 

double ScoreForTwoRows_smat3_21(int pos1, int pos2);
double ScoreForTwoRows_smat3_22(int pos1, int pos2);
double ScoreForTwoRows_smat3_23(int pos1, int pos2);
double ScoreForTwoRows_smat3_27(int pos1, int pos2);
double ScoreForTwoRows_smat3_28(int pos1, int pos2);

double funcAl(double x,int len1,int len2, double *score_matrix_srt);
double lambdaAll(int len1, int len2, double *score_matrix_srt);
double funcAl_a(double x,int len1,int len2, double *score_matrix_srt, double mean_score);
double lambdaAll_a(int len1, int len2, double *score_matrix_srt, double mean_score);
double lambda_bins_newt(double *binctr, int *bincnt, int n, double low, double hi, int len1, int len2); 
double func_bins(double x, double *binctr, int *bincnt, int n, double norm_coef); 
void funcbins_d(double x, double *binctr, int *bincnt, int n, double norm_coef, double *f,double *df); 

int compare_doubles(const double *a, const double *b);
void sort(int n, double arr[]);

int gap_open, gap_extend;

typedef struct _score_Vector{
	int *noGap, *gapExists, *noGapOld, *gapExistsOld, *prevScoreGapQueryOld, *noGapStore, *gapExistsStore, *prevScoreGapQueryOldStore;
} score_Vector;

int queryEnd, dbEnd, queryStart,dbStart;
int gapOpen, gapExtend, dbLength, queryLength;
static int SmithWatermanScore( int **score_imatrix, int queryLength, int dbLength, int gapOpen, int gapExtend, int queryEnd, int dbEnd, int **tracebackDir, 
int **flagNewGapQuery, int **flagNewGapDb);

int score, End1, End2, Start1, Start2;


void **traceback_outputPos(int start_ali1, int start_ali2, int end_ali1, int end_ali2, int **tracebackDir, int **flagNewGapQuery, int **flagNewGapDb, int *apos1, int *apos2);

double funcab(double a, double n, double len, double *p);
void funcab_d(double a, double n, double len, double *p, double *f, double *df);
double *pevd_ms_approx261(double *p, double n, double len); 
double *pevd_ab_approx261(double *p, double n, double len); 

double powerevd_integr_ab1ms(double x1, double x2, double *fpars, int npars);
double powerevd_limitarg_ab1ms_1(double x, double *fpars, int npars ); 
double qtrap_npars(double (*func)(double, double *, int), double *fpars, int npars, double a, double b);
double midpnt_npars(double (*func)(double, double *, int), double *fpars, int npars, double a, double b, int n);

int **strings2int(char **seqs, int n, int n0, int ntot, int len);
void fill_gapped_regions_check_fullygapped(int *apos1, int *apos2, int *positive, int **ali_in1, int **ali_in2, int segment_len, int nseqs1, int nseqs2, int cutgaps, int **aseq_out1, int **aseq_out2, int *positive_out, int lout, int *outpos); 
void mark_identaa(int *positive, int *seq1, int *seq2, int len);
int alipos2seqpos(int alipos, int seqstart, int *seq);	


char am2lower(char inchr);
char am2upper(char inchr);
		

/* Robinson frequencies and BLOSUM matrices correpond to aa order  WFYMLIVACGPTSNQDEHRK  */
double p_rbnsn[]={0.013298, 0.038556, 0.032165, 0.022425, 0.090191, 0.05142, 0.064409, 0.078047, 0.019246, 0.073772, 0.052028, 0.058413,
0.071198, 0.044873, 0.042644, 0.05364, 0.062949, 0.021992, 0.051295, 0.057438};

double q_blosum62[20][20] = {
{0.0065, 0.0008, 0.0009, 0.0002, 0.0007, 0.0004, 0.0004, 0.0004, 0.0001, 0.0004, 0.0001, 0.0003, 0.0003, 0.0002, 0.0002, 0.0002, 0.0003, 0.0002, 0.0003, 0.0003 },
{0.0008, 0.0183, 0.0042, 0.0012, 0.0054, 0.0030, 0.0026, 0.0016, 0.0005, 0.0012, 0.0005, 0.0012, 0.0012, 0.0008, 0.0005, 0.0008, 0.0009, 0.0008, 0.0009, 0.0009 },
{0.0009, 0.0042, 0.0102, 0.0006, 0.0022, 0.0014, 0.0015, 0.0013, 0.0003, 0.0008, 0.0005, 0.0009, 0.0010, 0.0007, 0.0007, 0.0006, 0.0009, 0.0015, 0.0009, 0.0010 },
{0.0002, 0.0012, 0.0006, 0.0040, 0.0049, 0.0025, 0.0023, 0.0013, 0.0004, 0.0007, 0.0004, 0.0010, 0.0009, 0.0005, 0.0007, 0.0005, 0.0007, 0.0004, 0.0008, 0.0009 },
{0.0007, 0.0054, 0.0022, 0.0049, 0.0371, 0.0114, 0.0095, 0.0044, 0.0016, 0.0021, 0.0014, 0.0033, 0.0024, 0.0014, 0.0016, 0.0015, 0.0020, 0.0010, 0.0024, 0.0025 },
{0.0004, 0.0030, 0.0014, 0.0025, 0.0114, 0.0184, 0.0120, 0.0032, 0.0011, 0.0014, 0.0010, 0.0027, 0.0017, 0.0010, 0.0009, 0.0012, 0.0012, 0.0006, 0.0012, 0.0016 },
{0.0004, 0.0026, 0.0015, 0.0023, 0.0095, 0.0120, 0.0196, 0.0051, 0.0014, 0.0018, 0.0012, 0.0036, 0.0024, 0.0012, 0.0012, 0.0013, 0.0017, 0.0006, 0.0016, 0.0019 },
{0.0004, 0.0016, 0.0013, 0.0013, 0.0044, 0.0032, 0.0051, 0.0215, 0.0016, 0.0058, 0.0022, 0.0037, 0.0063, 0.0019, 0.0019, 0.0022, 0.0030, 0.0011, 0.0023, 0.0033 },
{0.0001, 0.0005, 0.0003, 0.0004, 0.0016, 0.0011, 0.0014, 0.0016, 0.0119, 0.0008, 0.0004, 0.0009, 0.0010, 0.0004, 0.0003, 0.0004, 0.0004, 0.0002, 0.0004, 0.0005 },
{0.0004, 0.0012, 0.0008, 0.0007, 0.0021, 0.0014, 0.0018, 0.0058, 0.0008, 0.0378, 0.0014, 0.0022, 0.0038, 0.0029, 0.0014, 0.0025, 0.0019, 0.0010, 0.0017, 0.0025 },
{0.0001, 0.0005, 0.0005, 0.0004, 0.0014, 0.0010, 0.0012, 0.0022, 0.0004, 0.0014, 0.0191, 0.0014, 0.0017, 0.0009, 0.0008, 0.0012, 0.0014, 0.0005, 0.0010, 0.0016 },
{0.0003, 0.0012, 0.0009, 0.0010, 0.0033, 0.0027, 0.0036, 0.0037, 0.0009, 0.0022, 0.0014, 0.0125, 0.0047, 0.0022, 0.0014, 0.0019, 0.0020, 0.0007, 0.0018, 0.0023 },
{0.0003, 0.0012, 0.0010, 0.0009, 0.0024, 0.0017, 0.0024, 0.0063, 0.0010, 0.0038, 0.0017, 0.0047, 0.0126, 0.0031, 0.0019, 0.0028, 0.0030, 0.0011, 0.0023, 0.0031 },
{0.0002, 0.0008, 0.0007, 0.0005, 0.0014, 0.0010, 0.0012, 0.0019, 0.0004, 0.0029, 0.0009, 0.0022, 0.0031, 0.0141, 0.0015, 0.0037, 0.0022, 0.0014, 0.0020, 0.0024 },
{0.0002, 0.0005, 0.0007, 0.0007, 0.0016, 0.0009, 0.0012, 0.0019, 0.0003, 0.0014, 0.0008, 0.0014, 0.0019, 0.0015, 0.0073, 0.0016, 0.0035, 0.0010, 0.0025, 0.0031 },
{0.0002, 0.0008, 0.0006, 0.0005, 0.0015, 0.0012, 0.0013, 0.0022, 0.0004, 0.0025, 0.0012, 0.0019, 0.0028, 0.0037, 0.0016, 0.0213, 0.0049, 0.0010, 0.0016, 0.0024 },
{0.0003, 0.0009, 0.0009, 0.0007, 0.0020, 0.0012, 0.0017, 0.0030, 0.0004, 0.0019, 0.0014, 0.0020, 0.0030, 0.0022, 0.0035, 0.0049, 0.0161, 0.0014, 0.0027, 0.0041 },
{0.0002, 0.0008, 0.0015, 0.0004, 0.0010, 0.0006, 0.0006, 0.0011, 0.0002, 0.0010, 0.0005, 0.0007, 0.0011, 0.0014, 0.0010, 0.0010, 0.0014, 0.0093, 0.0012, 0.0012 },
{0.0003, 0.0009, 0.0009, 0.0008, 0.0024, 0.0012, 0.0016, 0.0023, 0.0004, 0.0017, 0.0010, 0.0018, 0.0023, 0.0020, 0.0025, 0.0016, 0.0027, 0.0012, 0.0178, 0.0062 },
{0.0003, 0.0009, 0.0010, 0.0009, 0.0025, 0.0016, 0.0019, 0.0033, 0.0005, 0.0025, 0.0016, 0.0023, 0.0031, 0.0024, 0.0031, 0.0024, 0.0041, 0.0012, 0.0062, 0.0161 }
};

double s_blosum62[20][20] = {
{5.2520, 0.4588, 1.0771, -0.7124, -0.8159, -1.2903, -1.4171, -1.2634, -1.1521, -1.2457, -1.8271, -1.2145, -1.3759, -1.8480, -0.9732, -2.1072, -1.4177, -1.1711, -1.3397, -1.4782 },
{0.4588, 3.0230, 1.4696, 0.0063, 0.2074, -0.0804, -0.4245, -1.1050, -1.1877, -1.5537, -1.7986, -1.0538, -1.1845, -1.4970, -1.5822, -1.7419, -1.5962, -0.6171, -1.3932, -1.5393 },
{1.0771, 1.4696, 3.2975, -0.4974, -0.5310, -0.6657, -0.6038, -0.8820, -1.2036, -1.5199, -1.4599, -0.8030, -0.8429, -1.0409, -0.7105, -1.5325, -1.0102, 0.8463, -0.8469, -0.9100 },
{-0.7124, 0.0063, -0.4974, 2.6963, 0.9959, 0.5634, 0.3436, -0.4676, -0.7099, -1.3383, -1.2382, -0.3331, -0.7404, -1.0754, -0.2105, -1.5293, -0.9990, -0.7756, -0.6836, -0.6774 },
{-0.8159, 0.2074, -0.5310, 0.9959, 1.9247, 0.7608, 0.3942, -0.7323, -0.6387, -1.8135, -1.4300, -0.5987, -1.2213, -1.6895, -1.0670, -1.8028, -1.4232, -1.3934, -1.0773, -1.2234 },
{-1.2903, -0.0804, -0.6657, 0.5634, 0.7608, 1.9993, 1.2735, -0.6609, -0.6138, -1.8624, -1.3783, -0.3588, -1.1741, -1.6085, -1.3848, -1.5606, -1.5972, -1.6158, -1.4951, -1.3351 },
{-1.4171, -0.4245, -0.6038, 0.3436, 0.3942, 1.2735, 1.8845, -0.0947, -0.4038, -1.5694, -1.1744, -0.0278, -0.8231, -1.4382, -1.0992, -1.5713, -1.2211, -1.5587, -1.2513, -1.1312 },
{-1.2634, -1.1050, -0.8820, -0.4676, -0.7323, -0.6609, -0.0947, 1.9646, -0.2043, 0.0798, -0.4071, -0.0227, 0.5579, -0.7654, -0.4020, -0.8767, -0.4319, -0.8126, -0.7068, -0.3670 },
{-1.1521, -1.1877, -1.2036, -0.7099, -0.6387, -0.6138, -0.4038, -0.2043, 4.2911, -1.2502, -1.3976, -0.4333, -0.4375, -1.3299, -1.4509, -1.7300, -1.8062, -1.4939, -1.6946, -1.5182 },
{-1.2457, -1.5537, -1.5199, -1.3383, -1.8135, -1.8624, -1.5694, 0.0798, -1.2502, 2.7816, -1.0668, -0.7877, -0.1462, -0.2114, -0.8926, -0.6568, -1.0551, -1.0204, -1.1521, -0.7640 },
{-1.8271, -1.7986, -1.4599, -1.2382, -1.4300, -1.3783, -1.1744, -0.4071, -1.3976, -1.0668, 3.6823, -0.5376, -0.4045, -1.0002, -0.6410, -0.7401, -0.5581, -1.0805, -1.0543, -0.5068 },
{-1.2145, -1.0538, -0.8030, -0.3331, -0.5987, -0.3588, -0.0278, -0.0227, -0.4333, -0.7877, -0.5376, 2.2727, 0.6906, -0.0230, -0.3377, -0.5254, -0.4316, -0.8429, -0.5612, -0.3348 },
{-1.3759, -1.1845, -0.8429, -0.7404, -1.2213, -1.1741, -0.8231, 0.5579, -0.4375, -0.1462, -0.4045, 0.6906, 1.9422, 0.3005, -0.0506, -0.1305, -0.0735, -0.4408, -0.3824, -0.1017 },
{-1.8480, -1.4970, -1.0409, -1.0754, -1.6895, -1.6085, -1.4382, -0.7654, -1.3299, -0.2114, -1.0002, -0.0230, 0.3005, 2.8266, 0.0008, 0.6358, -0.1340, 0.2892, -0.2199, -0.0895 },
{-0.9732, -1.5822, -0.7105, -0.2105, -1.0670, -1.3848, -1.0992, -0.4020, -1.4509, -0.8926, -0.6410, -0.3377, -0.0506, 0.0008, 2.6426, -0.1567, 0.9273, 0.2240, 0.4914, 0.6363 },
{-2.1072, -1.7419, -1.5325, -1.5293, -1.8028, -1.5606, -1.5713, -0.8767, -1.7300, -0.6568, -0.7401, -0.5254, -0.1305, 0.6358, -0.1567, 2.8871, 0.7552, -0.5595, -0.8029, -0.3509 },
{-1.4177, -1.5962, -1.0102, -0.9990, -1.4232, -1.5972, -1.2211, -0.4319, -1.8062, -1.0551, -0.5581, -0.4316, -0.0735, -0.1340, 0.9273, 0.7552, 2.4514, -0.0588, -0.0577, 0.3877 },
{-1.1711, -0.6171, 0.8463, -0.7756, -1.3934, -1.6158, -1.5587, -0.8126, -1.4939, -1.0204, -1.0805, -0.8429, -0.4408, 0.2892, 0.2240, -0.5595, -0.0588, 3.7555, -0.1249, -0.3605 },
{-1.3397, -1.3932, -0.8469, -0.6836, -1.0773, -1.4951, -1.2513, -0.7068, -1.6946, -1.1521, -1.0543, -0.5612, -0.3824, -0.2199, 0.4914, -0.8029, -0.0577, -0.1249, 2.7367, 1.0544 },
{-1.4782, -1.5393, -0.9100, -0.6774, -1.2234, -1.3351, -1.1312, -0.3670, -1.5182, -0.7640, -0.5068, -0.3348, -0.1017, -0.0895, 0.6363, -0.3509, 0.3877, -0.3605, 1.0544, 2.2523 },
};

double **blosum, **qBlosum;
double **smatrix, **qmatrix, *paa, SMAT_EXPECT;

char **aname, **aname1, **aname2, **aseq, **aseq1, **aseq2;
int nal, nal1, nal1_1, nal2, nal2_1, alilen, alilen1, alilen2, 
*astart, *astart1, *astart2, *alen;

int n_lowgaps, alilen_mat1, alilen_mat2;
char **aseq_mat1, **aseq_mat2;
char **aseqGapTr1, **aseqGapTr2;
int **tracebackDir;
int **flagNewGapQuery, **flagNewGapDb;
int *positive;
int posGp, segment_len;
int *apos1, *apos2;

int **alignment1, **alignment1_1, **alignment2, **alignment2_1;
char *am="-WFYMLIVACGPTSNQDEHRKBZX*.wfymlivacgptsnqdehrkbzx";
int **count;
int *maskgaps, *maskgaps1, *maskgaps2, *maskgapRegion, *maskgapRegion1, *maskgapRegion2;
double **lnqp1, **lnqp2;
double **score_term1, **score_term2; 
double n_eff1, n_eff2;
double **n_effAa1, **n_effAa2; 
double *sum_eff_let1, *sum_eff_let2;
int *apos_filtr1, *apos_filtr2;
int scoreGivenEnd, score_final;

double Evalue;
/* coefficients for linear approximation of lambda(len) and K(len) */ 
double lam0=2.385933e-01, lam1=4.287176, K0=-3.906895e-03, K1=5.08363;
double nef, lenterm; 
double lambda_est, K_est; /* estimated lambda_g and K_g values */

double lambda_u, recipr_lambda_u;
double b = 1.0;
double f= 32.0;

int flag_errread = 0;

/* a,b parameters:
{d1, d2, d3, \[Kappa], \[Omega], \[Mu], \[Kappa]1, \[Omega]1, \[Mu]1, z, w1, 
w2}

{d1 -> 2.231815894988936`, d2 -> 1.843708152354934`, 
    d3 -> 0.16850628871445064`, \[Kappa] -> 
      4.371260304380246`*^-6, \[Omega] -> \(-0.005989881535598766`\), \[Mu] \
-> 0.00016663218529647955`, \[Kappa]1 -> 5.393433126953284`*^-8, \[Omega]1 -> 
      0.0010494809536282035`, \[Mu]1 -> \(-0.006828960229085754`\), 
    z -> 1.7475130753608696`, w1 -> 0.12742993602001498`, 
    w2 -> 0.00024754857255565084`}
*/
double pab[] = {2.231815894988936, 1.843708152354934, 0.16850628871445064, 4.371260304380246e-6, -0.005989881535598766, 0.00016663218529647955, 5.393433126953284e-8, 0.0010494809536282035, -0.006828960229085754, 1.7475130753608696, 0.12742993602001498, 0.00024754857255565084};



/* m,s parameters:
{afi, bfi, cfi, aT, bT, cT, aU, bU, cU, aA, bA, cA, dA, aB, bB, cB, aC, bC, 
cC, dC}

{afi -> 2.1091310404949697`, bfi -> 3.030063285093589`*^-7, 
    cfi -> 0.07473837110885179`, aT -> \(-1.3538000760997745`*^-7\), 
    bT -> 1.169674649231483`*^-6, cT -> 1.553017634619331`, 
    aU -> \(-0.00001965088272669018`\), bU -> 0.00789596724053364`, 
    cU -> \(-3.687983656288619`\), aA -> 0.0019103675816153555`, 
    bA -> 0.0020454196243591396`, cA -> \(-0.5536656098648951`\), 
    dA -> 0.0009361660640736545`, aB -> 33.05188901279279`, 
    bB -> 0.1273044998841928`, cB -> \(-40.18903007084032`\), 
    aC -> 0.00031011960848257637`, bC -> 1.9454327299531495`*^-8, 
    cC -> \(-1.5707822698388576`\), dC -> 22.491850537504448`}
*/
double pms[] = {2.1091310404949697, 3.030063285093589e-7, 0.07473837110885179, -1.3538000760997745e-7, 1.169674649231483e-6, 1.553017634619331, -0.00001965088272669018, 0.00789596724053364, -3.687983656288619, 0.0019103675816153555, 0.0020454196243591396, -0.5536656098648951, 0.0009361660640736545, 33.05188901279279, 0.1273044998841928, -40.18903007084032, 0.00031011960848257637, 1.9454327299531495e-8, -1.5707822698388576, 22.491850537504448};


main(int argc, char *argv[])
{
	char ARG_I1[200],ARG_I2[200],ARG_O[200]={'\0'},ARG_S[100]={'\0'},ARG_Q[100]={'\0'};
	int ARG_B=60;
	int ARG_N1 = 1, ARG_N2 = 1; /* number of top sequences of each input alignment to be displayed in the result */ 
	int ARG_C = 1; /* Display consensus sequences in the resulting alignment ? (1/0) */ 
	int ARG_U = 0; /* In the output alignments, cut out position matches that are fully gapped */
			/* in both representative sets of sequences ? (1/0) */
	int ARG_GO = 10, ARG_GE = 1; /* penalties for gap opening and extension */  
	double ARG_G=0.5; /* threshold of gap content for column excision */
	double ARG_T=1.0; /* threshold of gap content for "gapped regions" with waiving of 1st gap_open penalty */
	double ARG_L = 0.0; /* Ungapped lambda */
	double ARG_X = BIG; /* expected value for substitution matrix, to be used in score transformation */
	int dblen, ARG_Z = 0; /* database length; 0 means using length of Ali2 for Evalue calculation */
	
	double ARG_H = 0.03; /* bin length for the rightmost bin in the score histogram for lambda calculation */

	double purge_id = 0.97; /* ID cutoff to purge redundant sequences in input alignments */
		
	FILE *smatrixfile,*qmatrixfile, *bl62file, *qbl62file; 
	FILE *flen, *fout, *fpdb,*fp, *fdbQ, *fdb;
	int i,j,k,l, flag_headQ, flag_head, nposn, nposq, nsym, nsymbols, posini;
	int *topurge;
	int ndone, nd, naln2;
	char *bl62qijLocation = "./blosum62.qij";
	char *bl62Location = "./blosum62.sij";
	char flen_name[200];
	char aliname2[200], *t1;
	int rescaled;
	double summin1, summin2;
	int *cons1, *cons2;
	char cname1[] = "CONSENSUS_1";
	char cname2[] = "CONSENSUS_2";
	int nseqout1, nseqout2, **ali_in1, **ali_in2, *seqstart1, *seqstart2;
	int **aname_in1, **aname_in2; 
	int **aseq_out1, **aseq_out2, *positive_out;
	int len_out, outputpos;

	double *parab, *parms;
	double m,s;
	double m0, m00=-16.6642, m01=0.412233, m02=-0.0367164;
	double m1, m10=0.00851920, m11=-0.000865546, m12=0.0000608894;
	double m2, m20=7.83847, m21=-0.0558511, m22=0.00362532;
	double s0=-0.75;
	double s1, s10=0.000785406, s11=-0.000205592, s12=0.0000196777;
	double s2, s20=0.825386, s21=-0.00571187, s22=0.000440545;
	double lcorr, termL12a, termL12b, termL12c;
	double bA=0.0004803558962774157, cA=0.11123344623676754, aB=0.0037811147667704548, bB=-0.0010169702986395635, cB=-0.0072927707563071285, bC=0.12514343589814347, cC=-4.410830425863369;
	double *pars;
	double max, norm;


	/*read input arguments */
        if(argc<=4) { argument(); exit(0);}
	for(i=1;i<argc;i++) {
	    if(strcmp(argv[i],"-i")==0) {strcpy(ARG_I1,argv[i+1]);i++;continue;}
	    if(strcmp(argv[i],"-j")==0) {strcpy(ARG_I2,argv[i+1]);i++;continue;}
	    if(strcmp(argv[i],"-o")==0) {strcpy(ARG_O,argv[i+1]);i++;continue;}
	    if(strcmp(argv[i],"-s")==0) {strcpy(ARG_S,argv[i+1]);i++;continue;}
	    if(strcmp(argv[i],"-q")==0) {strcpy(ARG_Q,argv[i+1]);i++;continue;}
    	    if(strcmp(argv[i],"-O")==0) {sscanf(argv[i+1],"%d",&ARG_GO);i++;continue;}
	    if(strcmp(argv[i],"-E")==0) {sscanf(argv[i+1],"%d",&ARG_GE);i++;continue;}  
	    if(strcmp(argv[i],"-z")==0) {sscanf(argv[i+1],"%d",&ARG_Z);i++;continue;}  
	    if(strcmp(argv[i],"-L")==0) {sscanf(argv[i+1],"%lf",&ARG_L);i++;continue;}  
	    if(strcmp(argv[i],"-b")==0) {sscanf(argv[i+1],"%d",&ARG_B);i++;continue;}
    	    if(strcmp(argv[i],"-n1")==0) {sscanf(argv[i+1],"%d",&ARG_N1);i++;continue;}
    	    if(strcmp(argv[i],"-n2")==0) {sscanf(argv[i+1],"%d",&ARG_N2);i++;continue;}
    	    if(strcmp(argv[i],"-c")==0) {sscanf(argv[i+1],"%d",&ARG_C);i++;continue;}
    	    if(strcmp(argv[i],"-u")==0) {sscanf(argv[i+1],"%d",&ARG_U);i++;continue;}
	    if(strcmp(argv[i],"-g")==0) {sscanf(argv[i+1],"%lf",&ARG_G);i++;continue;}
	    if(strcmp(argv[i],"-t")==0) {sscanf(argv[i+1],"%lf",&ARG_T);i++;continue;}
	    if(strcmp(argv[i],"-x")==0) {sscanf(argv[i+1],"%lf",&ARG_X);i++;continue;}
       	    if(strcmp(argv[i],"-h")==0) {sscanf(argv[i+1],"%lf",&ARG_H);i++;continue;}
	    
	}
	
	if((ARG_G>1.0)||(ARG_G<=0)){fprintf(stderr,"gap content(-g) to eliminate a column must be no more than 1 and more than 0 \n");
		    exit(0);}
	
	if(ARG_C == 0) {
		if( (ARG_N1 == 0 && ARG_N2 !=0) || (ARG_N2 == 0 && ARG_N1 !=0) ) {
			fprintf(stderr, "With no consensus displayed, numbers of displayed top sequences\n"); 
			fprintf(stderr, "should be either both non-zero (headers and alignments diplayed)\n");
			fprintf(stderr, "or both zero (only headers displayed).\n");
			exit(0);
		}
	}
	
	gap_open = f*ARG_GO; 
	gap_extend = f*ARG_GE;

	/* q_ij matrix:  qij for BLOSUM62 if reading from input failed */
	paa = dvector(1,20);
	smatrix = dmatrix(1,20,1,20);
        if((qmatrixfile=fopen(ARG_Q,"r"))==NULL){
                if(strlen(ARG_Q)!=0) fprintf(stderr, "Using default residue q_ij matrix: BLOSUM62\n",ARG_Q); 

                qmatrix = dmatrix(1,20,1,20);
                for(i=1;i<=20;i++) {
			paa[i]=p_rbnsn[i-1];
			for(j=1;j<=20;j++) {
				qmatrix[i][j] = q_blosum62[i-1][j-1]; 
				smatrix[i][j] = 2.0* s_blosum62[i-1][j-1];
			}
		}
		SMAT_EXPECT = BLOSUM62_EXPECT;

		if(ARG_L > 0.0) lambda_u = ARG_L;
		else lambda_u = LAMB_UNG62;

        } else {
        	qmatrix=read_aa_dmatrix(qmatrixfile);
	     	fclose(qmatrixfile);

		/* Background aa frequencies */
		for(i=1;i<=20;i++) {
			paa[i]=0.0;
			for(j=1;j<=20;j++) paa[i] += qmatrix[i][j];
		}

		/* s_ij matrix in half-bits */
		/* (corresponds to BLOSUM 62 with gap penalties 10+1 ) */
		SMAT_EXPECT = 0.0;
		for(i=1;i<=20;i++) {
			for(j=1;j<=20;j++) {
				smatrix[i][j] = 2.0*(1.0/LN2) * log( qmatrix[i][j]*(1.0/(paa[i]*paa[j])) );
				SMAT_EXPECT += 0.5 * smatrix[i][j]*paa[i]*paa[j];
			}  
		}

		/* Derive lambda */
		if(ARG_L > 0.0) lambda_u = ARG_L;
		else lambda_u = lambu20(smatrix, paa, 1.0e-4, 1.0);
	}
	recipr_lambda_u = 1.0/lambda_u;

	if(ARG_X != BIG) /* desired expected value for indiv. scores is specified */
		SMAT_EXPECT = ARG_X;


	/* substitution matrix: should be bits (BLOSUM62 if reading from input failed) */
        if((smatrixfile=fopen(ARG_S,"r"))==NULL){
                if(strlen(ARG_S)!=0) { fprintf(stderr, "Using default residue substitution matrix: BLOSUM62\n",ARG_S); }
                smatrix = dmatrix(1,20,1,20);
                for(i=1;i<=20;i++) {
			for(j=1;j<=20;j++) { smatrix[i][j] = s_blosum62[i-1][j-1]; }
		}
       } else {
       		smatrix=read_aa_dmatrix(smatrixfile);
	     	fclose(smatrixfile);
	}
	
	for(i=1;i<=20;i++) {
		for(j=1;j<=20;j++) {
			 smatrix[i][j] *= LN2; 
		}
	}
	
	/* q_ij matrix:  qij for BLOSUM62 if reading from input failed */
        if((qmatrixfile=fopen(ARG_Q,"r"))==NULL){
                if(strlen(ARG_Q)!=0) { fprintf(stderr, "Using default residue q_ij matrix: BLOSUM62\n",ARG_Q); }
                qmatrix = dmatrix(1,20,1,20);
                for(i=1;i<=20;i++) {
			for(j=1;j<=20;j++) { qmatrix[i][j] = q_blosum62[i-1][j-1]; }
		}
        } else {
        	qmatrix=read_aa_dmatrix(qmatrixfile);
	     	fclose(qmatrixfile);
	 }


	fprintf(stderr, "Generating profile from %s...\n", ARG_I1);

	/* read alignment */
	alignment1=read_alignment2int(ARG_I1,1,1);
	if(alignment1==NULL){ fprintf(stderr, "alignment1 file not readable\n"); }

	alilen1 = alilen;
	nal1 = nal;
	astart1 = ivector(0,nal1);
	for (i=0; i<nal1; i++) {
		if(astart[i]) {astart1[i] = astart[i];}
		else { astart1[i] = 1; }
	}

	aname1 = cmatrix(0, nal1, 0, 100);  
	for (i=0; i<nal1; i++) { strcpy(aname1[i], aname[i]); }

	aseq1 = cmatrix(0, nal1, 0, alilen1);
	for (j=0;j<alilen1;j++) { for (i=0; i<nal1; i++) {aseq1[i][j] = aseq[i][j];} }
	free (astart);
	free (alen);
	free (aseq);
	free (aname);

	/* purge highly redundant sequences */  
	topurge = mark_redundant_seqs (alignment1, alilen1, nal1, purge_id);

	alignment1_1 = imatrix(1, nal1, 1, alilen1);
	nal1_1 = 0;
	for(i=1; i<=nal1; i++) {
		if(topurge[i]) continue;
		nal1_1++;
		for(j=1; j<=alilen1; j++) alignment1_1[nal1_1][j] = alignment1[i][j];
	}
	free_imatrix(alignment1, 1, nal1, 1, alilen1);
	free_ivector(topurge, 1, nal1);
		
	/* filter "gapped" columns, get total n_eff, n_eff for each aa in each column and mask moderately "gapped regions" */
	apos_filtr1 = ivector(1,alilen1);
	n_effAa1 = dmatrix(1,alilen1,0,20);
	sum_eff_let1 = dvector(1,alilen1);			
	neffsForEachCol_maskGapReg(alignment1_1, nal1_1, alilen1, ARG_G, ARG_T, n_effAa1, sum_eff_let1, maskgapRegion1, apos_filtr1, &alilen_mat1, &n_eff1);

	/* Calculate target frequencies for Ali1 */
	lnqp1 = dmatrix(1,alilen_mat1, 0, 20);
	pssm(n_effAa1, n_eff1, alilen_mat1, lnqp1);
	
	score_term1 = dmatrix(1,alilen_mat1, 0, 20);
	for (i=1; i<=alilen_mat1; i++){
		summin1 = sum_eff_let1[i] - 1.0;
		for (j=1; j<=20; j++) {
			score_term1[i][j] = summin1*lnqp1[i][j];
		}
	}
	
	fprintf(stderr, "Generating profile from %s...\n", ARG_I2);

	/* read alignment */
	alignment2=read_alignment2int(ARG_I2,1,1);
	if(alignment2==NULL){ fprintf(stderr, "alignment2 file not readable\n"); }

	alilen2 = alilen;
	nal2 = nal;
	astart2 = ivector(0,nal2);
	for (i=0; i<nal2; i++) {
		if(astart[i]) {astart2[i] = astart[i];}
		else { astart2[i] = 1; }
	}

	aname2 = cmatrix(0, nal2, 0, 100);  
	for (i=0; i<nal2; i++) { strcpy(aname2[i], aname[i]); }

	aseq2 = cmatrix(0, nal2, 0, alilen2);
	for (j=0;j<alilen2;j++) { for (i=0; i<nal2; i++) {aseq2[i][j] = aseq[i][j];} }
	free (astart);
	free (alen);
	free (aseq);
	free (aname);

	/* purge highly redundant sequences */  
	topurge = mark_redundant_seqs (alignment2, alilen2, nal2, purge_id);

	alignment2_1 = imatrix(1, nal2, 1, alilen2);
	nal2_1 = 0;
	for(i=1; i<=nal2; i++) {
		if(topurge[i]) continue;
		nal2_1++;
		for(j=1; j<=alilen2; j++) alignment2_1[nal2_1][j] = alignment2[i][j];
	}
	free_imatrix(alignment2, 1, nal2, 1, alilen2);
	free_ivector(topurge, 1, nal2);
		
	/* filter "gapped" columns, get total n_eff, n_eff for each aa in each column and mask moderately "gapped regions" */
	apos_filtr2 = ivector(1,alilen2);
	n_effAa2 = dmatrix(1,alilen2,0,20);
	sum_eff_let2 = dvector(1,alilen2);			
	neffsForEachCol_maskGapReg(alignment2_1, nal2_1, alilen2, ARG_G, ARG_T, n_effAa2, sum_eff_let2, maskgapRegion2, apos_filtr2, &alilen_mat2, &n_eff2);

	/* Calculate target frequencies for Ali2 */
	lnqp2 = dmatrix(1,alilen_mat2, 0, 20);
	pssm(n_effAa2, n_eff2, alilen_mat2, lnqp2);
	
	score_term2 = dmatrix(1,alilen_mat2, 0, 20);
	for (i=1; i<=alilen_mat2; i++){
		summin2 = sum_eff_let2[i] - 1.0;
		for (j=1; j<=20; j++) {
			score_term2[i][j] = summin2*lnqp2[i][j];
		}
	}
	
	
	if(n_eff2 - 1.0 < TINY && n_eff1 - 1.0 < TINY ) { /* sequence to sequence comparison */

		score_matrix = calc_seq_scores_nef(n_effAa1, n_effAa2, alilen_mat1, alilen_mat2); 

	} else { /* profile-profile or seq-profile comparison */

		score_matrix = calc_prof_scores( n_effAa1, n_effAa2, sum_eff_let1, sum_eff_let2, score_term1, score_term2, alilen_mat1, alilen_mat2); 

	}

	/* transfrom scores to the same mean and lambda as BLOSUM */
	score_imatrix = transform_colscores(score_matrix, alilen_mat1, alilen_mat2, lambda_u, ARG_H, &rescaled);

	tracebackDir = imatrix(1,alilen_mat1,1,alilen_mat2);
	flagNewGapQuery = imatrix(1,alilen_mat1,1,alilen_mat2);
	flagNewGapDb = imatrix(1,alilen_mat1,1,alilen_mat2);

	score = SmithWatermanScore(score_imatrix , alilen_mat1,
	alilen_mat2, gap_open, gap_extend, End1, End2, tracebackDir, flagNewGapQuery, flagNewGapDb); 
	score_final = rint(score*(1.0/f));

	/* Prepare header to print out */ 

	if((fout=fopen(ARG_O,"a"))==NULL){
		if(strlen(ARG_O)!=0){fprintf(stderr,"default output to stdout\n");}
		fout=stdout;
	}	

	fprintf(fout, "COMPASS 3.0\n");
	fprintf (fout, "Ali1: %s\tAli2: %s\n",ARG_I1,ARG_I2);
	fprintf (fout, "Threshold of effective gap content in columns: %.1f\n", ARG_G);
	fprintf (fout, "length1=%d\tfiltered_length1=%d\tlength2=%d\tfiltered_length2=%d\n", alilen1,alilen_mat1,alilen2,alilen_mat2);
	fprintf (fout, "Nseqs1=%d\tNeff1=%.3f\tNseqs2=%d\tNeff2=%.3f\n", nal1,n_eff1,nal2,n_eff2);
	fprintf (fout, "Smith-Waterman score = %d\t ", score_final);

	if(score==0) { 
		fprintf (fout, "\n\nNo alignment with positive score is found\n\n"); 
		fclose(fout);
		exit(0);

	} else { /* calculate and print Evalue */

		if(ARG_Z == 0) { dblen = alilen_mat2;}
		else { dblen = ARG_Z; }

		/* 
		calculate L  as a function of L1,2; n1,2:
		fnAfin[x1_, x2_] :=  bA (x1 + x2) + cA
		fnBfin[x1_, x2_] := Abs[aB(x1 - x2)] + bB (x1 + x2) + cB
		fnCfin[x1_, x2_] := bC (x1 + x2) + cC
		LapprFin[L1_, L2_, n1_, n2_] := L_harm + Abs[ fnAfin[n1, n2](L1 - L2)] + fnBfin[n1, n2](L1+L2) + fnCfin[n1, n2];			

		Then fix n to 0.5(n1+n2)
		*/

		nef = 0.5*(n_eff1 + n_eff2);
		termL12a = bA*(n_eff1 + n_eff2) + cA;
		termL12b = fabs( aB*(n_eff1 - n_eff2) ) + bB*(n_eff1 + n_eff2) + cB;
		termL12c = bC*(n_eff1 + n_eff2) + cC;
		lcorr = 2.0/(1.0/alilen_mat1 + 1.0/alilen_mat2) + fabs(termL12a*(alilen_mat1-alilen_mat2)) + termL12b*(alilen_mat1+alilen_mat2) + termL12c ;

		/* calculate a,b,m,s */
		parab = pevd_ab_approx261(pab,nef, lcorr);
		parms = pevd_ms_approx261(pms, nef, lcorr);

		if( lcorr>1000.0 || parab[0]<0.0 || parab[1]<0.0 || parms[0]<5.0 || parms[1]<1.0 || parab[1]*exp( parab[0]* log(parms[0]* (1.0/parms[1])) ) > 200.0 ) { 
			/* Use EVD with non-linear m,s for thick and long */

			m0 = m00 + m01*nef + m02*nef*nef;
			m1 = m10 + m11*nef + m12*nef*nef;
			m2 = m20 + m21*nef + m22*nef*nef;
			m = m0 + m1*lcorr + m2*log(lcorr);

			s0 = -0.75; 
			s1 = s10 + s11*nef + s12*nef*nef;
			s2 = s20 + s21*nef + s22*nef*nef;
			s = s0 + s1*lcorr + s2*log(lcorr);

			Evalue = dblen * (1.0/alilen_mat2)* exp( (m-score)*(1.0/s) );
	
		} else {

			pars = dvector(1,4);
			pars[0] = parab[0]; pars[1] = parab[1]; 
			pars[2] = parms[0]; pars[3] = parms[1]; 

			max = pars[2] + 100.0*pars[3];
			if(score_final >= max) { Evalue = 0.0; }
			else {
				norm = powerevd_integr_ab1ms(0.0, max, pars, 4);	
				Evalue = dblen * (1.0/alilen_mat2) * (1.0/norm) * powerevd_integr_ab1ms(score_final, max, pars, 4);
			}
		
		}

		if(rescaled) {  /* Lambda was found successfully and scores were rescaled */
			fprintf (fout, "Evalue = %.2e\n\n", Evalue);
		} else {
			fprintf (fout, "Evalue = %.2e ** WARNING: lambda could not be found; Evalue may be inaccurate **\n\n", Evalue);
		}

	}


	/* Get apos1,2[] from apos_filtr1,2[] : positions in the initial (input) ali */		
	apos1 = ivector(1, alilen_mat1+alilen_mat2);
	apos2 = ivector(1, alilen_mat1+alilen_mat2);
	positive = ivector(0,alilen_mat1+alilen_mat2);
	traceback_outputPos(Start1, Start2, End1, End2, tracebackDir, flagNewGapQuery, flagNewGapDb, apos1, apos2);

	free_imatrix(score_imatrix,1,alilen_mat1,1,alilen_mat2);
	free_imatrix(tracebackDir, 1,alilen_mat1,1,alilen_mat2);
	free_imatrix(flagNewGapQuery, 1,alilen_mat1,1,alilen_mat2);
	free_imatrix(flagNewGapDb, 1,alilen_mat1,1,alilen_mat2);

	/* Arrays for top names and starts for output (possibly with consensus added) */
	aname_in1 = imatrix(0, ARG_N1, 0, NAMELEN);  
	aname_in2 = imatrix(0, ARG_N2, 0, NAMELEN);  
	seqstart1 = ivector(0,ARG_N1);
	seqstart2 = ivector(0,ARG_N2);

	/* Prepare top subalignments to be used in output, */
	/* with consensus if specified */
	if(ARG_C==0) { /* no consensus output */
		nseqout1 = ARG_N1 > nal1 ? nal1 : ARG_N1;
		ali_in1 = strings2int(aseq1, nseqout1, 0, nseqout1, alilen1);						
		for(i=0; i<nseqout1; i++) 
			for(j=0; j<=strlen(aname1[i]); j++) aname_in1[i][j] = aname1[i][j];

		nseqout2 = ARG_N2 > nal2 ? nal2 : ARG_N2;
		ali_in2 = strings2int(aseq2, nseqout2, 0, nseqout2, alilen2);						
		for(i=0; i<nseqout2; i++)
			for(j=0; j<=strlen(aname2[i]); j++) aname_in2[i][j] = aname2[i][j];

	} else { /* Add consensus on the top of query and the bottom of subject */
		nseqout1 = ARG_N1 > nal1 ? nal1+1 : ARG_N1+1;
		/* Derive consensus sequence of the query alignment */
		cons1 = qtarget2cons_maxq(apos_filtr1, alilen1, lnqp1, alilen_mat1);

		ali_in1 = strings2int(aseq1, nseqout1-1, 0, nseqout1, alilen1);
		for(i=0; i<alilen1; i++) ali_in1[nseqout1-1][i] = cons1[i];				

		for(i=0; i<nseqout1-1; i++)
			for(j=0; j<=strlen(aname1[i]); j++) aname_in1[i][j] = aname1[i][j];

		for(j=0; j<=strlen(cname1); j++) aname_in1[nseqout1-1][j] = cname1[j];


		nseqout2 = ARG_N2 > nal2 ? nal2+1 : ARG_N2+1;
		/* Derive consensus sequence of the subject alignment */
		cons2 = qtarget2cons_maxq(apos_filtr2, alilen2, lnqp2, alilen_mat2);

		ali_in2 = strings2int(aseq2, nseqout2-1, 1, nseqout2, alilen2);
		for(i=0; i<alilen2; i++) ali_in2[0][i] = cons2[i];				

		for(i=0; i<nseqout2-1; i++)
			for(j=0; j<=strlen(aname2[i]); j++) aname_in2[i+1][j] = aname2[i][j];

		for(j=0; j<=strlen(cname2); j++) aname_in2[0][j] = cname2[j];
	}


	/* Fill the output alignments (in lower case are parts filtered out by the gap filter ),
	output arrays for positive matches */
	len_out = (apos1[segment_len]-apos1[1]+1)+(apos2[segment_len]-apos2[1]+1);

	aseq_out1 = imatrix(0,nseqout1,1,len_out);
	aseq_out2 = imatrix(0,nseqout2,1,len_out);
	positive_out = ivector(1,len_out);
	fill_gapped_regions_check_fullygapped(apos1, apos2, positive, ali_in1, ali_in2, segment_len, nseqout1, nseqout2, ARG_U, aseq_out1, aseq_out2, positive_out, len_out, &outputpos);

	if(ARG_C) { 

		/* for identical residues in consensus seqs, */
		/* put residue symbol in positive_out[] */
		mark_identaa(positive_out, aseq_out1[nseqout1-1], aseq_out2[0], outputpos);

		/* Make start for consensus1 = starting position for the whole alignment1 */
		seqstart1[nseqout1-1] = apos1[1];
		/* Make start for consensus2 = starting position for the whole alignment2 */
		seqstart2[0] = apos2[1];	

		/* Based on alignments ali_in1,2 and their starting sequence positions astart1,2[], */
		/* get seqstart1,2[] -- starting positions of sequences in profile-profile alignment */ 
		for(i=0; i<nseqout1-1; i++) 
			seqstart1[i] = alipos2seqpos(apos1[1], astart1[i], ali_in1[i]);	

		for(i=0; i<nseqout2-1; i++) 
			seqstart2[i+1] = alipos2seqpos(apos2[1], astart2[i], ali_in2[i+1]);
		
	} else {

		/* Based on alignments ali_in1,2 and their starting sequence positions astart1,2[], */
		/* get seqstart1,2[] -- starting positions of sequences in profile-profile alignment */ 
		for(i=0; i<nseqout1; i++) 
			seqstart1[i] = alipos2seqpos(apos1[1], astart1[i], ali_in1[i]);	

		for(i=0; i<nseqout2; i++) 
			seqstart2[i] = alipos2seqpos(apos2[1], astart2[i], ali_in2[i]);	
	}

	free_imatrix(ali_in1, 0, nseqout1-1, 0, alilen1-1);
	free_imatrix(ali_in2, 0, nseqout2-1, 0, alilen2-1);

	printali_ali_positive_char_allstarts(fout, ARG_C, ARG_B, nseqout1, nseqout2, outputpos, aname_in1, aname_in2, aseq_out1, aseq_out2, seqstart1, seqstart2, positive_out); 

	free_ivector(apos1, 1, alilen_mat1+alilen_mat2);
	free_ivector(apos2, 1, alilen_mat1+alilen_mat2);
	free_ivector(positive, 0,alilen_mat1+alilen_mat2);

	free_imatrix(aseq_out1, 0,nseqout1,1,len_out);
	free_imatrix(aseq_out2, 0,nseqout2,1,len_out);
	free_ivector(positive_out,1,len_out);

	fprintf(fout, "\nParameters:\n");		
	fprintf(fout, "Lambda_ungapped: %.4e\n", lambda_u);		
	fprintf(fout, "Expected value of positional scores: %.4e\n", BLOSUM62_EXPECT);		
	fprintf(fout, "Gap opening: %d\n", ARG_GO);		
	fprintf(fout, "Gap extension: %d\n\n", ARG_GE);		

	fclose(fout);


	exit(0);
}


/* In alignment, marks sequences that have a similar sequence (ID> specified threshold)
 located above in the alignment
(gaps considered as 21st symbol).
Returns array of marks for each seq (0 is non-marked, 1 is marked). 
*/ 
int *mark_redundant_seqs (int **ali, int len, int n, double id)
{
	int i,j,k,l, flag;
	int min_dif, ndif;
	int *mark;
	
	min_dif = (1.0-id)*len + 1;
	
	mark = ivector(1,n);
	for (i=1;i<=n;i++) mark[i] = 0;
			
	for (i=1;i<n;i++) {
		for (j=i+1;j<=n;j++) {
			if(mark[j]) continue;
			
			flag=0;
			ndif = 0;
			for (k=0;k<len;k++) {
				if (ali[i][k] != ali[j][k]) {
					if ( (++ndif) >= min_dif ) {flag=1; break;}
				}
			}
			if (flag==0) mark[j] = 1;
		}
	}
	
	return mark;
}



/* from given alignment with aa as numbers, computes effective aa counts (PSIC->our formula)
and marks the columns with EFFECTIVE content of gaps > threshold (effgapmax) */
void **neffsForEachCol_maskGapReg(int **ali, int n, int len, double effgapmax, double effgapRegionMin, double **n_effAa, double *sum_eff_let, int *maskgapRegion, int *apos_filtr, int *len_lowgaps, double *nef)
{
	int i,j,k,l;
	int alilen_mat, nsymbols_col, nsymbols;
	double nef_loc;
	int ele;
        double *effnu;
        double sum_let;
        int *mark;
	int flagmark;
	
	effnu = dvector(0,20);
        mark = ivector(0,n+10);  
	alilen_mat = 0;
	nsymbols = 0;
        for(j=1;j<=len;j++) {

                nsymbols_col = 0;
		sum_let=0;

                for(k=0;k<=20;++k){   			
/* Mark sequences that have amino acid  k (or gap, k=0) in this jth position */ 
                	 flagmark =0;     	
                        for(i=1;i<=n;++i){
                                mark[i]=0;

                                ele=ali[i][j];
                                if(ele==k){mark[i]=1; flagmark =1;}
                                ele=ali[i][j]-25;
                                if(ele==k) {mark[i]=1; flagmark =1;}
                        }

/* If aa k (or gap) is present in this position call compute k-th effective count */
                      if (flagmark == 1) { 
			
				effnu[k]=effective_number_nogaps(ali,mark,n,1,len);
				nsymbols_col++;

			} else { effnu[k] = 0.0; }
            
                       if (k>0) sum_let += effnu[k];
                }


		if ( sum_let > 0 && 1.0*effnu[0]/(sum_let + effnu[0]) < effgapmax ) {
			alilen_mat++;
			for (k=0; k<=20; k++) {
				n_effAa[alilen_mat][k] = effnu[k];
			}
			sum_eff_let[alilen_mat] = sum_let;
			apos_filtr[alilen_mat] = j;
			nsymbols += nsymbols_col;
			
		}
		
		
	}
	

	nef_loc = 1.0*nsymbols/alilen_mat;
	*nef = nef_loc;
	*len_lowgaps = alilen_mat;
	
	free_dvector(effnu,0,20);
        free_ivector(mark,0,n+10); 
		
}


double effective_number_nogaps(int **ali, int *marks, int n, int start, int end){

/* from the alignment of n sequences ali[1..n][1..l]
calculates effective number of sequences that are marked by 1 in mark[1..n]
for the segment of positions ali[][start..end]
Neff=ln(1-0.05*N-of-different-letters-per-site)/ln(0.95)
*/

int i,k,a,flag;
int *amco,lettercount=0,sitecount=0;
double letpersite=0,neff;

 amco=ivector(0,20); 
for(k=start;k<=end;++k){	
/******************DUMP the condition "consider only positions without gaps in the marked seqs" ***********/
/*****	flag=0;for(i=1;i<=n;++i)if(marks[i]==1 && ali[i][k]==0)flag=1;
	if(flag==1)continue;
*****/	
	for(a=0;a<=20;++a)amco[a]=0;
	for(i=1;i<=n;++i)if(marks[i]==1)amco[ali[i][k]]++;
	flag=0;for(a=1;a<=20;++a)if(amco[a]>0){flag=1;lettercount++;}
	if(flag==1)sitecount++;
			       }
if(sitecount==0)letpersite=0;
else letpersite=1.0*lettercount/sitecount;

 neff=-log(1.0-0.05*letpersite)/0.05129329438755; 

 free_ivector(amco,0,20);
return neff;
}


/* Given array of correspondence between position numbers in "filtered" profile
   (with highly gapped positions removed)
   and positions in original alignment, plus PSSM values ( = ln(Q_i/p_i) )
   for each aa at positions of filtered profile,
   produces a consensus sequence that has a gap at the highly gapped positions
   and  aa with biggest Q_i/p_i at other positions.
   Returns the resulting string as integer vector (characters as integers) [0..len-1] 
*/ 
int *qtarget2cons_maxq(int *apos_filtr, int len, double **lnqp, int alilen_mat)
{
	int i, j, pos, aa;
	double *lnp, lnq, max;
	int *cons;

	lnp = dvector(1,20);
	for(i=1;i<=20; i++) lnp[i] = log(paa[i]);

	cons = ivector(0, len-1);
	pos = 0;
	for(i=1; i<=alilen_mat; i++) {
		while( pos < apos_filtr[i]-1 ) cons[pos++] = '-';
		/* find aa with highest q */
		max = -BIG;
		aa = 0;
		for(j=1; j<=20; j++) { 
			lnq = lnqp[i][j] + lnp[j];	
			if(lnq > max) {
				max = lnq;
				aa = j;
			}
		} 

		cons[pos++] = am[aa];
	}
	
	while(pos < len) cons[pos++] = '-';

	return cons;
}


void **pssm(double **matrix, double n_eff, int len, double **lnqp)
{
	int i,j,k;
	double *f, *g;
	double sumN;
	double alpha, beta;
	double pcnt;	

	alpha = n_eff-1;
	beta = 10.0;
	f = dvector(0,20);
	g = dvector(0,20);
	for (i=1;i<=len;i++) {
		sumN = 0;
		for (j=1;j<=20;j++) sumN += matrix[i][j];
		for (j=1;j<=20;j++) {
			f[j] = 1.0*matrix[i][j]/sumN;
		}
		for (j=1;j<=20;j++) {
			g[j] = 0;	
			for (k=1;k<=20;k++) g[j]+= qmatrix[j][k]*f[k]/p_rbnsn[k-1];	
			pcnt = (alpha*f[j] + beta*g[j])/(alpha+beta);		
			lnqp[i][j] = log(pcnt/p_rbnsn[j-1]); 
		}
	}

}



int **transform_colscores(double **score_matrix, int len1, int len2, double lamb_u, double blen0, int *rescaled) {

	int i,j,k,l, k1,k2, size, ratio;
	double *score_matrix_srt;
	double min_score, max_score, clen, lnexpr, lamb_bins;
	double mean_score, disp, lamb_norm;
	int nsteps, flag, nbins, *bincnt, nbins1, *bincnt1;
	double binlen, *binend, *binctr, *binctr1;
	double transform_a, transform_b, lamscale;
	int **score_imatrix;

	size = len1*len2;
	score_matrix_srt = dvector(1,size);
	/* Get mean and assign linear array */
	k=1;
	mean_score = 0.0; 
	for(i=1; i<=len1; i++) {
		for(j=1; j<=len2; j++) {
			score_matrix_srt[k++] = score_matrix[i][j];
			mean_score += score_matrix[i][j];
		}
	}			
	mean_score *= (1.0/size);

	/* Shift score_matrix_srt[] ( use ONLY for score rescaling with fixed mean ) */
	max_score = -BIG;
	min_score = BIG;
	for (i=1; i<=size; i++) {
		score_matrix_srt[i] -= mean_score;
		if(score_matrix_srt[i] > max_score) max_score = score_matrix_srt[i];
		if(score_matrix_srt[i] < min_score) min_score = score_matrix_srt[i];
	}


	transform_a = -1.0;

	/* Calculate variance for the scores */
	disp = variance(score_matrix_srt, size);

	if(disp != 0) {

		lamb_norm = sqrt(-2.0* lamb_u * SMAT_EXPECT / disp);	

	/* Using lamb_norm, split range into bins; put counts for all populated bins in a separate array; 
	calculate lambda on these bins */ 
		flag = 1;
		nsteps = 0;
		binend = dvector(0, MAXBINS);
		binend[0] = max_score;
		while(flag == 1 && nsteps++ < 10) { 
			flag = 0;
			clen = 	(1.0 - exp(-blen0*lamb_norm) ) * exp( lamb_norm * max_score);
		
			nbins=0;
			while(binend[nbins] > min_score) {
				
				lnexpr = 1.0 - clen * exp(-lamb_norm*binend[nbins]);
				if( lnexpr < exp( -lamb_norm* (binend[nbins] - min_score) ) ) { 
					/* next bin goes below min_score; force its boundary to (min_score-1) */
					binlen = binend[nbins] - min_score +1.0;
				} else { /* make next binlen an integer number of blen0's */	
					ratio = rint( -(1.0/(blen0*lamb_norm) ) * log( lnexpr ) );
					binlen = ratio * blen0;

		
				}	
				binend[nbins+1] = binend[nbins] - binlen;
				if(++nbins > MAXBINS) { flag=1; break; }
			}

			if(flag==1) break; /* too many bins; binning failed */

			binctr = dvector(0, nbins-1);
			for(j=0;j<nbins;j++) binctr[j] = 0.5*( binend[j] + binend[j+1] ); 

			bincnt = bin_count(score_matrix_srt, len1*len2, binend, nbins, blen0);
			if(bincnt == NULL) { /* too many bins already; stop further binning */ 
				flag=1;
				free_dvector(binctr, 0, nbins-1);
				break;
			}

			for(j=0; j<nbins; j++) {
				if(bincnt[j] > 0.3 * size) { /* overpopulaed bin */
					blen0 *= 0.1;
					flag = 1; 
					free_dvector(binctr, 0, nbins-1);
					free_ivector(bincnt, 0,nbins-1);
					break;
				}
			}
		}

		if(flag==0) { /* Binning successful */

			bincnt1 = ivector(0, nbins-1);
			binctr1 = dvector(0,nbins-1);
			k=0;	
			for(j=0;j<nbins;j++) { 
				if( bincnt[j] != 0 ) { bincnt1[k] = bincnt[j];  binctr1[k++] = binctr[j]; }
			}
			nbins1 = k;

			transform_a = lambda_bins_newt(binctr1, bincnt1, nbins1-1, 0.5*lamb_norm, 2.0*lamb_norm, len1, len2);

			free_dvector(binctr, 0, nbins-1);
			free_ivector(bincnt, 0, nbins-1);
			free_dvector(binctr1, 0, nbins-1);
			free_ivector(bincnt1, 0, nbins-1);
		}
		
		free_dvector(binend, 0, MAXBINS);

	}

	if(transform_a == -1.0) {
		*rescaled = 0;
		transform_a = 1.0;
	} else {
		*rescaled = 1; 
	}

	lamscale = transform_a * recipr_lambda_u;
	transform_b=SMAT_EXPECT-(lamscale*mean_score);

/* rescale scores by forcing their mean and lambda to that of BLOSUM62 */
	score_imatrix = imatrix(1,len1,1,len2);
	for(i=1;i<=len1;i++) {
		for(j=1;j<=len2;j++) {	
			score_imatrix[i][j]= rint( f * ( lamscale * score_matrix[i][j] + transform_b) );
		} 
	} 

	free_dmatrix(score_matrix, 1, len1, 1, len2);
	free_dvector(score_matrix_srt, 1, size);

	return score_imatrix;
}


double **calc_prof_scores(double **nAa1, double **nAa2, double *sumlet1, double *sumlet2, double **sterm1, double **sterm2, int len1, int len2) {

	int i,j,k,l, k1,k2;
	double s, comp1, comp2, **scoremat;

	scoremat = dmatrix(1, len1, 1, len2);
	i=1;
	do {
		j=1;
		do {
			s=0.0;
			s+= nAa1[i][1]*sterm2[j][1] + nAa2[j][1]*sterm1[i][1];
			s+= nAa1[i][2]*sterm2[j][2] + nAa2[j][2]*sterm1[i][2];
			s+= nAa1[i][3]*sterm2[j][3] + nAa2[j][3]*sterm1[i][3];
			s+= nAa1[i][4]*sterm2[j][4] + nAa2[j][4]*sterm1[i][4];
			s+= nAa1[i][5]*sterm2[j][5] + nAa2[j][5]*sterm1[i][5];
			s+= nAa1[i][6]*sterm2[j][6] + nAa2[j][6]*sterm1[i][6];
			s+= nAa1[i][7]*sterm2[j][7] + nAa2[j][7]*sterm1[i][7];
			s+= nAa1[i][8]*sterm2[j][8] + nAa2[j][8]*sterm1[i][8];
			s+= nAa1[i][9]*sterm2[j][9] + nAa2[j][9]*sterm1[i][9];
			s+= nAa1[i][10]*sterm2[j][10] + nAa2[j][10]*sterm1[i][10];
			s+= nAa1[i][11]*sterm2[j][11] + nAa2[j][11]*sterm1[i][11];
			s+= nAa1[i][12]*sterm2[j][12] + nAa2[j][12]*sterm1[i][12];
			s+= nAa1[i][13]*sterm2[j][13] + nAa2[j][13]*sterm1[i][13];
			s+= nAa1[i][14]*sterm2[j][14] + nAa2[j][14]*sterm1[i][14];
			s+= nAa1[i][15]*sterm2[j][15] + nAa2[j][15]*sterm1[i][15];
			s+= nAa1[i][16]*sterm2[j][16] + nAa2[j][16]*sterm1[i][16];
			s+= nAa1[i][17]*sterm2[j][17] + nAa2[j][17]*sterm1[i][17];
			s+= nAa1[i][18]*sterm2[j][18] + nAa2[j][18]*sterm1[i][18];
			s+= nAa1[i][19]*sterm2[j][19] + nAa2[j][19]*sterm1[i][19];
			s+= nAa1[i][20]*sterm2[j][20] + nAa2[j][20]*sterm1[i][20];
			/*** Do normalization of s: ***/
			comp1 = sumlet1[i]-1.0;
			comp2 = sumlet2[j]-1.0;	
			s *= 1.0/(comp1+comp2);   

			scoremat[i][j] = s;

		} while(++j <= len2);
	} while(++i <= len1);

	return scoremat;
}


double **calc_seq_scores_nef(double **n_effAa1, double **n_effAa2, int len1, int len2) {
	int i, j, k, *iseq1, *iseq2;
	double **scoremat;

	iseq1 = ivector(1,alilen_mat1);
	for(i=1;i<=alilen_mat1;i++) {
		for(j=1;j<=20;j++) {
			if(n_effAa1[i][j] != 0.0) { iseq1[i]=j; break; }
		}
	}

	iseq2 = ivector(1,alilen_mat2);
	for(i=1;i<=alilen_mat2;i++) {
		for(j=1;j<=20;j++) {
			if(n_effAa2[i][j] != 0.0) { iseq2[i]=j; break; }
		}
	}

	scoremat = dmatrix(1,alilen_mat1, 1,alilen_mat2);	
	for(i=1;i<=len1;i++) {
		for(j=1;j<=len2;j++) {
			scoremat[i][j] = smatrix[ iseq1[i] ][ iseq2[j] ];
		}
	}

	free_ivector(iseq1, 1,alilen_mat1);
	free_ivector(iseq2, 1,alilen_mat2);

	return scoremat;
} 

double variance(double *score_matrix_srt, int size) {
	int i;	
	double disp;

	disp=0.0;
	for (i=1; i<=size; i++) {
		disp += score_matrix_srt[i] * score_matrix_srt[i];
	}
	disp *= 1.0/(size);

	return disp;
}


int *bin_count(double *score_matrix_srt, int size, double *binend, int nbins, double blen0) {

	int i, j, jmax, j1, j2, k, *cnt1, *cnt;
	double iblen0;

	iblen0 = 1.0/blen0;
	cnt = ivector(0,nbins-1);
	for(j=0;j<nbins;j++) cnt[j]= 0.0;

	jmax = (binend[0]-binend[nbins-1])*iblen0 +1;
	cnt1 = ivector(0,jmax);
	for(j=0;j<=jmax;j++)  cnt1[j] = 0.0;
/*	i = 1;
	do {
*/
	for(i=1; i<=size; i++) {

		j = ( score_matrix_srt[i]-binend[nbins-1] ) * iblen0;
		if(j>=0) j++; /* numbering of intervals > binend[nbins-1] starts with j=1 */
		else j = 0; /* everything <= binend[nbins-1] is in the lowest bin (corresponding to j=0) */ 

		cnt1[j]++;

/*	} while(++i <= size);
*/
	}




	cnt = ivector(0,nbins-1);
	cnt[nbins-1] = cnt1[0]; /* lowest bin, i.e. <= binend[nbins-1] */
	j2 = jmax;  /* higher bound, in interval units */
	for(k=0;k<nbins-1;k++)  { /* populate higher bins */
		cnt[k] = 0;
		j1 = ( binend[k+1] - binend[nbins-1] ) * iblen0 + 1; /* lower bound, in interval units */
		for(j=j1+1; j<=j2; j++) cnt[k] += cnt1[j];

		j2 = j1;
	}
	
	free_ivector(cnt1,0,jmax);

	return cnt;
}



/*

\!\(m -> \(1\/\(n + 
            u\ Cos[\[Theta]]\)\) \((1\/8\ Cos[\[Phi]]\ Csc[\[Theta]]\^3\ \((3\
\ c + 8\ a\ n\^4 + 24\ a\ n\^2\ u\^2 + 3\ a\ u\^4 + 
                8\ a\ n\ u\ \((4\ n\^2 + 3\ u\^2)\)\ Cos[\[Theta]] + 
                4\ \((\(-c\) + 6\ a\ n\^2\ u\^2 + a\ u\^4)\)\ Cos[
                    2\ \[Theta]] + 
                8\ a\ n\ u\^3\ Cos[3\ \[Theta]] + \((c + a\ u\^4)\)\ Cos[
                    4\ \[Theta]] + 6\ b\ n\ Sin[\[Theta]] + 
                2\ b\ u\ Sin[2\ \[Theta]] - 2\ b\ n\ Sin[3\ \[Theta]] - 
                b\ u\ Sin[4\ \[Theta]])\) - \((u + 
                n\ Cos[\[Theta]])\)\ \((n + 
                u\ Cos[\[Theta]])\)\ Csc[\[Theta]]\ Sin[\[Phi]])\), 
  s -> \((u + 
            n\ Cos[\[Theta]])\)\ Cos[\[Phi]]\ Csc[\[Theta]] + \(1\/\(8\ \((n \
+ u\ Cos[\[Theta]])\)\)\) \((Csc[\[Theta]]\^3\ \((3\ c + 8\ a\ n\^4 + 
                24\ a\ n\^2\ u\^2 + 3\ a\ u\^4 + 
                8\ a\ n\ u\ \((4\ n\^2 + 3\ u\^2)\)\ Cos[\[Theta]] + 
                4\ \((\(-c\) + 6\ a\ n\^2\ u\^2 + a\ u\^4)\)\ Cos[
                    2\ \[Theta]] + 
                8\ a\ n\ u\^3\ Cos[3\ \[Theta]] + \((c + a\ u\^4)\)\ Cos[
                    4\ \[Theta]] + 6\ b\ n\ Sin[\[Theta]] + 
                2\ b\ u\ Sin[2\ \[Theta]] - 2\ b\ n\ Sin[3\ \[Theta]] - 
                b\ u\ Sin[4\ \[Theta]])\)\ Sin[\[Phi]])\)\)


*/


/* Full m,s formulas:
\!\({m -> \((\(-\((n + \((cU + bU\ len + aU\ len\^2)\)\ Cos[
                        cT + bT\ len + aT\ len\^2])\)\)\ \((cU + bU\ len + 
                  aU\ len\^2 + n\ Cos[cT + bT\ len + aT\ len\^2])\)\ Csc[
                cT + bT\ len + aT\ len\^2]\ Sin[
                cfi + afi\/\@len + bfi\ len\^2] + 
            1\/8\ Cos[
                cfi + afi\/\@len + 
                  bfi\ len\^2]\ Csc[cT + bT\ len + aT\ len\^2]\^3\ \((6\ \
\((cB + aB\ len\^bB)\)\ n\ Sin[cT + bT\ len + aT\ len\^2] + 
                  2\ \((cU + bU\ len + aU\ len\^2)\)\ \((cB + 
                        aB\ len\^bB)\)\ Sin[
                      2\ \((cT + bT\ len + aT\ len\^2)\)] - 
                  2\ \((cB + aB\ len\^bB)\)\ n\ Sin[
                      3\ \((cT + bT\ len + aT\ len\^2)\)] - \((cU + bU\ len + 
                        aU\ len\^2)\)\ \((cB + aB\ len\^bB)\)\ Sin[
                      4\ \((cT + bT\ len + aT\ len\^2)\)] + 
                  3\ \((cU + bU\ len + aU\ len\^2)\)\^4\ \((dA + 
                        aA\ Tan[cA + bA\ len])\) + 
                  24\ \((cU + bU\ len + aU\ len\^2)\)\^2\ n\^2\ \((dA + 
                        aA\ Tan[cA + bA\ len])\) + 
                  8\ n\^4\ \((dA + aA\ Tan[cA + bA\ len])\) + 
                  8\ \((cU + bU\ len + 
                        aU\ len\^2)\)\ n\ \((3\ \((cU + bU\ len + aU\ len\^2)\
\)\^2 + 4\ n\^2)\)\ Cos[
                      cT + bT\ len + aT\ len\^2]\ \((dA + 
                        aA\ Tan[cA + bA\ len])\) + 
                  8\ \((cU + bU\ len + aU\ len\^2)\)\^3\ n\ Cos[
                      3\ \((cT + bT\ len + aT\ len\^2)\)]\ \((dA + 
                        aA\ Tan[cA + bA\ len])\) + 
                  4\ Cos[2\ \((cT + bT\ len + 
                            aT\ len\^2)\)]\ \((\(-dC\) + \((cU + bU\ len + aU\
\ len\^2)\)\^4\ \((dA + aA\ Tan[cA + bA\ len])\) + 
                        6\ \((cU + bU\ len + aU\ len\^2)\)\^2\ n\^2\ \((dA + 
                              aA\ Tan[cA + bA\ len])\) - 
                        aC\ Tan[cC + bC\ len])\) + 
                  3\ \((dC + aC\ Tan[cC + bC\ len])\) + 
                  Cos[4\ \((cT + bT\ len + 
                            aT\ len\^2)\)]\ \((dC + \((cU + bU\ len + aU\ len\
\^2)\)\^4\ \((dA + aA\ Tan[cA + bA\ len])\) + 
                        aC\ Tan[cC + bC\ len])\))\))\)/\((n + \((cU + 
                  bU\ len + aU\ len\^2)\)\ Cos[cT + bT\ len + aT\ len\^2])\), 
    s -> \((cU + bU\ len + aU\ len\^2 + 
              n\ Cos[cT + bT\ len + aT\ len\^2])\)\ Cos[
            cfi + afi\/\@len + bfi\ len\^2]\ Csc[
            cT + bT\ len + 
              aT\ len\^2] + \((Csc[cT + bT\ len + aT\ len\^2]\^3\ Sin[
                cfi + afi\/\@len + 
                  bfi\ len\^2]\ \((6\ \((cB + aB\ len\^bB)\)\ n\ Sin[
                      cT + bT\ len + aT\ len\^2] + 
                  2\ \((cU + bU\ len + aU\ len\^2)\)\ \((cB + 
                        aB\ len\^bB)\)\ Sin[
                      2\ \((cT + bT\ len + aT\ len\^2)\)] - 
                  2\ \((cB + aB\ len\^bB)\)\ n\ Sin[
                      3\ \((cT + bT\ len + aT\ len\^2)\)] - \((cU + bU\ len + 
                        aU\ len\^2)\)\ \((cB + aB\ len\^bB)\)\ Sin[
                      4\ \((cT + bT\ len + aT\ len\^2)\)] + 
                  3\ \((cU + bU\ len + aU\ len\^2)\)\^4\ \((dA + 
                        aA\ Tan[cA + bA\ len])\) + 
                  24\ \((cU + bU\ len + aU\ len\^2)\)\^2\ n\^2\ \((dA + 
                        aA\ Tan[cA + bA\ len])\) + 
                  8\ n\^4\ \((dA + aA\ Tan[cA + bA\ len])\) + 
                  8\ \((cU + bU\ len + 
                        aU\ len\^2)\)\ n\ \((3\ \((cU + bU\ len + aU\ len\^2)\
\)\^2 + 4\ n\^2)\)\ Cos[
                      cT + bT\ len + aT\ len\^2]\ \((dA + 
                        aA\ Tan[cA + bA\ len])\) + 
                  8\ \((cU + bU\ len + aU\ len\^2)\)\^3\ n\ Cos[
                      3\ \((cT + bT\ len + aT\ len\^2)\)]\ \((dA + 
                        aA\ Tan[cA + bA\ len])\) + 
                  4\ Cos[2\ \((cT + bT\ len + 
                            aT\ len\^2)\)]\ \((\(-dC\) + \((cU + bU\ len + aU\
\ len\^2)\)\^4\ \((dA + aA\ Tan[cA + bA\ len])\) + 
                        6\ \((cU + bU\ len + aU\ len\^2)\)\^2\ n\^2\ \((dA + 
                              aA\ Tan[cA + bA\ len])\) - 
                        aC\ Tan[cC + bC\ len])\) + 
                  3\ \((dC + aC\ Tan[cC + bC\ len])\) + 
                  Cos[4\ \((cT + bT\ len + 
                            aT\ len\^2)\)]\ \((dC + \((cU + bU\ len + aU\ len\
\^2)\)\^4\ \((dA + aA\ Tan[cA + bA\ len])\) + 
                        aC\ Tan[cC + bC\ len])\))\))\)/\((8\ \((n + \((cU + 
                        bU\ len + aU\ len\^2)\)\ Cos[
                      cT + bT\ len + aT\ len\^2])\))\)}\)
*/

/* m,s parameters:
{afi, bfi, cfi, aT, bT, cT, aU, bU, cU, aA, bA, cA, dA, aB, bB, cB, aC, bC, 
cC, dC}
*/

double *pevd_ms_approx261(double *p, double n, double len) {

	double a,b,c,u,fi,teta; /* parameters of dependency on neff; each depends on len */
	/* this dependency on len is defined by those parameters: */
	double afi, bfi, cfi, aT, bT, cT, aU, bU, cU, aA, bA, cA, dA, aB, bB, cB, aC, bC, cC, dC;
	double cscT, cscT3, u4, sum1, term1, term2; /* intermediate variables */
	double *ms;

	afi = p[0]; bfi = p[1]; cfi = p[2];
	aT = p[3]; bT = p[4]; cT = p[5];
	aU = p[6]; bU = p[7]; cU = p[8];
	aA = p[9]; bA = p[10]; cA = p[11]; dA = p[12];
	aB = p[13]; bB = p[14]; cB = p[15];
	aC = p[16]; bC = p[17]; cC = p[18]; dC = p[19]; 

	fi = afi*1.0/(sqrt(len)) + bfi*SQUARE(len) + cfi;
	teta = aT*SQUARE(len) + bT*len + cT;
	u = aU*SQUARE(len) + bU*len + cU;
	a = aA * tan(cA + bA*len) + dA;
	b = aB * exp( bB*log(len) ) + cB;
	c = aC * tan(cC + bC*len) + dC;

	cscT = 1.0/sin(teta);
	cscT3= cscT*cscT*cscT;
	u4 = SQUARE(SQUARE(u));
	sum1 = 3.0*c + 8.0*a*SQUARE(SQUARE(n)) + 24.0 *a*SQUARE(n)*SQUARE(u) + 3.0*a*u4 + 8.0*a*n*u*( 4.0*SQUARE(n)+3.0*SQUARE(u) )*cos(teta) + 4.0*(-c + 6.0*a*SQUARE(n)*SQUARE(u) + a*u4 ) * cos(2.0*teta) + 8.0*a*n*u*u*u * cos(3.0*teta) + (c + a*u4) * cos(4.0*teta) + 6.0*b*n*sin(teta) + 2.0*b*u*sin(2.0*teta) - 2.0*b*n*sin(3.0*teta) - b*u*sin(4.0*teta);
	term1 = 0.125 * (1.0/(n+u*cos(teta))) * cscT3 * sum1;
	term2 = (u + n*cos(teta)) * cscT ; 

	ms = dvector(0,1);

	ms[0] = cos(fi) * term1 - sin(fi) * term2;  
	ms[1] = sin(fi) * term1 + cos(fi) * term2; 

	return ms;
}





/* a,b parameters:
{d1, d2, d3, \[Kappa], \[Omega], \[Mu], \[Kappa]1, \[Omega]1, \[Mu]1, z, w1, 
w2}

*/

/* a,b calculation:

\!\(\(func0[x_] := d1\ \((\ x - d2)\)\^4 + \ d3;\)\[IndentingNewLine]
  \(func1[neff_, len_]\  := 
      w1\ \ Exp[\[Kappa]1\ len\^2\ neff\^2 + \[Omega]1\ neff\^2\  + 
              w2\ len\ neff\  + \ \[Mu]1\ len\ ] + \[Kappa]\ len\ neff + \
\[Omega]\ neff\  + \[Mu]\ len + \ z;\)\)



FACTOR=1.5;

\[Alpha] = 
    NSolve[Evaluate[\[Alpha] + FACTOR func0[\[Alpha]] - func1[n, len] /. 
              parsub] == 0]\[LeftDoubleBracket]4, 1, 2\[RightDoubleBracket];
\[Beta] = func0[\[Alpha]] /. parsub;


*/

double funcab(double a, double n, double len, double *p)
{
	double factor=1.5;
	double d1, d2, d3;
	double kappa, omega, mu, kappa1, omega1, mu1, w1, w2, z;
	double nlen, func0, func1, f;

	d1 = p[0]; d2 = p[1]; d3 = p[2];
	kappa = p[3]; omega = p[4]; mu = p[5];
	kappa1 = p[6]; omega1 = p[7]; mu1 = p[8];
	z = p[9]; w1 = p[10]; w2 = p[11];
	
	func0 = d1 * SQUARE( SQUARE(a - d2) ) + d3;
	nlen = n*len;
	func1 = w1 * exp( kappa1*SQUARE(nlen) + omega1*SQUARE(n) + w2*nlen + mu1*len ) +  kappa*nlen + omega*n + mu*len + z;
	f = a + factor*func0 - func1; 

	return f;
}


void funcab_d(double a, double n, double len, double *p, double *f, double *df)
{

	double factor=1.5;
	double d1, d2, d3;
	double kappa, omega, mu, kappa1, omega1, mu1, w1, w2, z;
	double nlen, func0, dfunc0, func1;

	d1 = p[0]; d2 = p[1]; d3 = p[2];
	kappa = p[3]; omega = p[4]; mu = p[5];
	kappa1 = p[6]; omega1 = p[7]; mu1 = p[8];
	z = p[9]; w1 = p[10]; w2 = p[11];
	
	func0 = d1 * SQUARE( SQUARE(a - d2) ) + d3;
	dfunc0 = 4.0*d1 * SQUARE(a - d2)*(a - d2); 
	nlen = n*len;
	func1 = w1 * exp( kappa1*SQUARE(nlen) + omega1*SQUARE(n) + w2*nlen + mu1*len ) +  kappa*nlen + omega*n + mu*len + z;
	*f = a + factor*func0 - func1; 
	*df = 1.0 + factor * dfunc0;

}


double *pevd_ab_approx261(double *p, double n, double len) {

	int i, j, k=0;
	double low= 0.45;
	double hi = 0.5;
	double alpha=-1.0, beta=-1.0, *ab;
	double x1, x2, xacc;
	double f, fmid, rtn, df, dx;

        /* bracket around zero, identify sign change */
        x1=low;
        x2=hi;

        f=funcab(x1, n, len, p);
        fmid=funcab(x2, n, len, p);
        while(f*fmid>=0.0 && ++k<=JMAX){
            
		x1+=0.025; x2+=0.025;
                f=funcab(x1,n,len,p);
                fmid=funcab(x2,n,len,p);
        }

        if(k>JMAX) {
                /* fprintf(stderr, "WARNING: Maximum number of expansions exceeded in pevd_ab_approx261\n");
		*/
		ab = dvector(0,1);
		ab[0]=ab[1]=-1.0;
                return ab;
        }

        xacc = 1e-4;
	x1 -= 0.05; x2 += 0.05;
        rtn=0.5*(x1+x2);
        for (j=1;j<=JMAX;j++) {
                funcab_d(rtn, n, len, p, &f,&df);
                dx=f/df;
                rtn -= dx;
                if ((x1-rtn)*(rtn-x2) < 0.0) {
                        fprintf(stderr, "Jumped out of brackets in pevd_ab_approx261");
			ab = dvector(0,1);
			ab[0]=ab[1]=-1.0;
			return ab;
		}
                if (fabs(dx) < xacc) { alpha=rtn; break; }
        }

        if(alpha==-1.0) { 
/*		fprintf(stderr, "WARNING: Maximum number of iterations exceeded in pevd_ab_approx261\n");
		fprintf(stderr, "         for n=%.3e  len=%.2e\n", n, len);
*/
	} else {
		/* beta = func0(a) = d1*(a-d2)^4 + d3 */
		beta = p[0] * SQUARE( SQUARE(alpha - p[1]) ) + p[2]; 
	} 


	ab = dvector(0,1);
	ab[0] = alpha;
	ab[1] = beta;

	return ab;

}


/* Calculates integral on the range (x1;x2) for "Power-EVD" function with this pdf:
f(x) = <norm>* exp[ -exp( -(x-m)/s ) - b (x^a - m^a)/s^a ],

*/
double powerevd_integr_ab1ms(double x1, double x2, double *fpars, int npars)
{
	double a,b,m,s;
	double xa, ma, sa, msa, rat, ratmax, y1, y2, ymin, f;

	a = fpars[0];
	b = fpars[1];
	m = fpars[2];
	s = fpars[3];

	y1 = (x1-m)*(1.0/s);	

	if(x2<0.0) { y2 = 100.0; } 
	else { 
		/* new variable : y = (x - m)/s  */
		y2 = (x2-m)*(1.0/s);	
	}

	if( y2 < 0.0 && powerevd_limitarg_ab1ms_1(y2, fpars, npars) == 0.0 ) 
		/* Far left tail */
		return 0.0; 
	else if( y1 > 0.0 && powerevd_limitarg_ab1ms_1(y1, fpars, npars) == 0.0 ) 
		/* Far right tail */
		return 0.0; 
	
	f = qtrap_npars( powerevd_limitarg_ab1ms_1, fpars, npars, y1, y2); 

	return f;
} 


/* Function for integration of "power-EVD" function, with changed argument 
 y = (x - m)/s    
*/ 
double powerevd_limitarg_ab1ms_1(double x, double *fpars, int npars ) {
	double a, b, m, s;
	double ma, sa, rat, ms, c, z,f;

	a = fpars[0];
	b = fpars[1];
	m = fpars[2];
	s = fpars[3];

	/* Intermediate parameter */
	ma = exp(a*log(m));
	sa = exp(a*log(s));
	z = exp( a*log(m+x*s) );
	rat = (z-ma)*(1.0/sa);

	f = s* exp(-exp(-x) - b*rat);	

/*	fprintf(stderr, "powerevd_limitarg(%e): z=%e b=%e kmn=%e  m=%e ma=%e sa=%e  rat=%e f=%e\n", x,z,b,fpars[3],m,ma, sa, rat,f);
*/	
	return f;
}

double qtrap_npars(double (*func)(double, double *, int), double *fpars, int npars, double a, double b)
{
	void nrerror(char error_text[]);
	int j;
	double s,olds;

	olds = -1.0e30;
	for (j=1;j<=JMAX;j++) {
		s=midpnt_npars(func,fpars,npars,a,b,j);


/*		fprintf(stderr, "qtrap_npars(a=%.3e to b=%.3e, j=%d):  s=%e\n", a,b, j ,s);
*/

		if (fabs(s-olds) < EPS*fabs(olds)) return s;
		olds=s;
	}

	if(s > 1e-30)
		nrerror("Too many steps in routine qtrap");

	return 0.0;
}


#define FUNC(x) ((*func)(x, fpars, npars))
double midpnt_npars(double (*func)(double, double *, int), double *fpars, int npars, double a, double b, int n)
{
	double x,tnm,sum,del,ddel;
	static double s;
	int it,j;

	if (n == 1) {
		return (s=(b-a)*FUNC(0.5*(a+b)));
	} else {
		for(it=1,j=1;j<n-1;j++) it *= 3;
		tnm=it;
		del=(b-a)/(3.0*tnm);
		ddel=del+del;
		x=a+0.5*del;
		sum=0.0;
		for (j=1;j<=it;j++) {
			sum += FUNC(x);
			x += ddel;
			sum += FUNC(x);
			x += del;
		}
		s=(s+(b-a)*sum/tnm)/3.0;
		return s;
	}
}
#undef FUNC





/* Processes top n sequences of an alignment (characters) and puts them 
into integer matrix [0..ntot-1][0..len-1], starting with row n0.
*/
int **strings2int(char **seqs, int n, int n0, int ntot, int len)
{
	int i, j;
	int **ali;

	if(ntot < n0+n) nrerror("strings2int : number of rows in matrix is insufficient for recording alignment");
	
	ali = imatrix(0, ntot-1, 0, len-1);
	for(i=0; i<n; i++) 
		for(j=0; j<len; j++)
			ali[i+n0][j] = seqs[i][j]; 

	return ali;
}						

/* From arrays of numbers of aligned positions of two profiles and nseqs representative sequences for each profile,
   creates the output profile-profile alignment:
   1. Fills in the "gapped" positions of the two input alignments that were disregarded in aligning profiles.
   2. If specified by cutgaps, removes parts of final profile-profile alignment that contain only gaps in all representative sequences. 

   Input:
   arrays of aligned positions of two profiles filtered by gap content (apos1,2[1..segment_len]);
   array of positive matches for these profiles (positive[1..segment_len]);
   nseqs aligned sequences for each profile, characters as integers (ali_in1,2[0..nseqs-1][0..alilen1,2-1]);
   flags: cutgaps (for removing fully gapped output positions)
   and idaa (for marking (in positive_out[]) of identical residue between the bottom sequence of ali 1 and the top sequence of alignment 2).
   Output: 
   output arrays of aligned residues (nseqs residues in each column) of the two alignments, chars as integers (aseq_out1,2[0..nseqs1,2][0..lout-1]);
   output array of positive matches for these residues (positive_out[0..lout-1]), chars as integers : ' ' for non-positive matches, '+' for positive;
   ( where lout is maximal potentially possible length of the output alignment, given lengths of apos1,2[] ); 
   actual length of the output alignment (*outpos).

*/ 
void fill_gapped_regions_check_fullygapped(int *apos1, int *apos2, int *positive, int **ali_in1, int **ali_in2, int segment_len, int nseqs1, int nseqs2, int cutgaps, int **aseq_out1, int **aseq_out2, int *positive_out, int lout, int *outpos) {

	int i,j,k,l, skip=0;
	int inputpos1, inputpos2, outputpos, pos_beforegap1, pos_beforegap2, step1, step2;

	inputpos1 = apos1[1]-2;
	inputpos2 = apos2[1]-2;
	pos_beforegap1 = apos1[1]-1;
	pos_beforegap2 = apos2[1]-1;
	outputpos=0;

	for(i=1;i<=segment_len;i++){

		if(apos1[i] != 0) { step1 = apos1[i]-pos_beforegap1; pos_beforegap1 = apos1[i];}
		else { step1 = 1; }
		if(apos2[i] != 0) { step2 = apos2[i]-pos_beforegap2; pos_beforegap2 = apos2[i];}
		else { step2 = 1; }

		if(step1 > 1 || step2 > 1) { /* something was filtered out in one of the alignments */

			if(step1 >= step2) {

				for(l=1;l<=step1-step2;l++) { /*spacer in 2nd output set */
					inputpos1++;
					if(cutgaps) {
						skip = 1;
						for(j=0; j<nseqs1; j++) { /* check if position is fully gapped in 1st output set */
							if( isalpha(ali_in1[j][inputpos1]) ) { skip=0; break; }
						}
					} 

					if(skip==0) { 
						positive_out[outputpos]=' ';
						for(j=0;j<nseqs1;j++) 
							aseq_out1[j][outputpos] =  am2lower( ali_in1[j][inputpos1] );
						for(j=0;j<nseqs2;j++)
							aseq_out2[j][outputpos] = '~';

						outputpos++;
					}
				}

				for(l=step1-step2+1;l<step1;l++) { /* put back gapped positions in both */
					inputpos1++;
					inputpos2++;
					if(cutgaps) {
						skip = 1;
						/*check if position is fully gapped in both output sets */
						for(j=0; j<nseqs1; j++)
							if( isalpha(ali_in1[j][inputpos1]) ) { skip=0; break; }
						for(j=0; j<nseqs2; j++) 
							if( isalpha(ali_in2[j][inputpos2]) ) { skip=0; break; }
					}

					if(skip==0) {
						positive_out[outputpos]=' ';
						for(j=0;j<nseqs1;j++) 
							aseq_out1[j][outputpos] = am2lower( ali_in1[j][inputpos1] );
						for(j=0;j<nseqs2;j++) 
							aseq_out2[j][outputpos] = am2lower( ali_in2[j][inputpos2] );

						outputpos++;
					}
				}

			} else {
			
				for(l=1;l<=step2-step1;l++) { /* spacer in 1st output set */
					inputpos2++;
					if(cutgaps) {
						skip = 1;
						for(j=0; j<nseqs2; j++) { /* check if position is fully gapped in 2nd output set */
							if( isalpha(ali_in2[j][inputpos2]) ) { skip=0; break; }
						}
					}

					if(skip==0) {
						positive_out[outputpos]=' ';
						for(j=0;j<nseqs2;j++)
							aseq_out2[j][outputpos] =   am2lower( ali_in2[j][inputpos2] ) ;
						for(j=0;j<nseqs1;j++)
							aseq_out1[j][outputpos] = '~';

						outputpos++;
					}
				}

				for(l=step2-step1+1;l<step2;l++) {  /* put back gapped positions in both */
					inputpos1++;
					inputpos2++;
					if(cutgaps) {
						skip = 1;
						/* check if position is fully gapped in both output sets */
						for(j=0; j<nseqs1; j++) 
							if( isalpha(ali_in1[j][inputpos1]) ) { skip=0; break; }
						for(j=0; j<nseqs2; j++)
							if( isalpha(ali_in2[j][inputpos2]) ) { skip=0; break; }
					}

					if(skip==0) {
						positive_out[outputpos]=' ';
						for(j=0;j<nseqs1;j++)
							aseq_out1[j][outputpos] =  am2lower( ali_in1[j][inputpos1] );
						for(j=0;j<nseqs2;j++)
							aseq_out2[j][outputpos] =  am2lower( ali_in2[j][inputpos2] );

						outputpos++;
					}
				}

			}
		}


/* Fill in capital letters in the end of 'lower case' region, or if no columns were filtered out */
		if (apos1[i] == 0) { /* gap in 1 alignment inserted by Smith-Waterman */
			inputpos2++;
			if(cutgaps) {
				skip = 1;
				for(j=0; j<nseqs2; j++) { /* check if position is fully gapped in 2nd output set */
					if( isalpha(ali_in2[j][inputpos2]) ) { skip=0; break; }
				}
			}

			if(skip==0) {
				positive_out[outputpos] = positive[i];
				for(j=0;j<nseqs1;j++)
					aseq_out1[j][outputpos] = '='; 
				for(j=0;j<nseqs2;j++)
					aseq_out2[j][outputpos] = am2upper( ali_in2[j][inputpos2] );

				outputpos++;
			}

		} else if (apos2[i] == 0) { /* gap in 2 alignment inserted by Smith-Waterman */
			inputpos1++;
			if(cutgaps) {
				skip = 1;
				for(j=0; j<nseqs1; j++) { /* check if position is fully gapped in first output set */
					if( isalpha(ali_in1[j][inputpos1]) ) { skip=0; break; }
				}
			}

			if(skip==0) {
				positive_out[outputpos] = positive[i];
				for(j=0;j<nseqs1;j++) 
					aseq_out1[j][outputpos] = am2upper( ali_in1[j][inputpos1] );
				for(j=0;j<nseqs2;j++) 
					aseq_out2[j][outputpos] = '='; 

				outputpos++;
			}

		} else {
			inputpos1++;
			inputpos2++;
			if(cutgaps) {
				skip = 1;
				/* check if position is fully gapped in both output sets */
				for(j=0; j<nseqs1; j++) 
					if( isalpha(ali_in1[j][inputpos1]) ) { skip=0; break; }
				for(j=0; j<nseqs2; j++)
					if( isalpha(ali_in2[j][inputpos2]) ) { skip=0; break; }
			}

			if(skip==0) {
				positive_out[outputpos] = positive[i];
				for(j=0;j<nseqs1;j++)
					aseq_out1[j][outputpos] = am2upper( ali_in1[j][inputpos1] );
				for(j=0;j<nseqs2;j++)
					aseq_out2[j][outputpos] = am2upper( ali_in2[j][inputpos2] );

				outputpos++;
			}
		}		
	
	}				
	*outpos = outputpos;

}

/* Given two aligned sequences (chars as integers) seq1[0..len-1] and seq2[0..len-1],
   finds identical matches and puts residue symbols (as integers) in the  array
   of positive matches.
*/
void mark_identaa(int *positive, int *seq1, int *seq2, int len)
{
	int i;
	for(i=0; i<len; i++) {
		if( seq1[i] == seq2[i] && isalpha(seq1[i]) ) positive[i]=seq1[i];
	}
}


int alipos2seqpos(int alipos, int seqstart, int *seq)	
{
	int i=0, p=seqstart-1;

	while(i++ < alipos) 
		if( isalpha(seq[i-1]) ) p++;

	i--;
	if( !isalpha(seq[i-1]) ) /* gap at starting alignment position */
		p++; /* roll up to the next residue */

	return p;
}


double lambda_bins_newt(double *binctr, int *bincnt, int n, double low, double hi, int len1, int len2) {

	int i,j,k;
	double dx,f,df,fmid,rtn;
	double x1, x2, xacc, norm_coef;

	norm_coef = 1.0* len1*len2;
	/* bracket around zero, identify sign change */
	x1=low;
	x2=hi;

	f=func_bins(x1,binctr, bincnt, n, norm_coef);
	fmid=func_bins(x2,binctr, bincnt, n, norm_coef);
	/* fprintf(stderr,"init x1: %f x2: %f f: %f fmid: %f\n",x1,x2,f,fmid); */
	while(f*fmid>=0.0){
		x1-=fabs(x1*2.0); x2+=(x2*2.0);
		f=func_bins(x1,binctr, bincnt, n, norm_coef);
		fmid=func_bins(x2,binctr, bincnt, n, norm_coef);
		fprintf(stderr,"expand x1: %f x2: %f f: %f fmid: %f\n",x1,x2,f,fmid);
	}

	xacc = 1e-4;
	rtn=0.5*(x1+x2);
	for (j=1;j<=JMAX;j++) {
		funcbins_d(rtn, binctr, bincnt, n, norm_coef, &f,&df);
		dx=f/df;
		rtn -= dx;
		if ((x1-rtn)*(rtn-x2) < 0.0)
			nrerror("Jumped out of brackets in rtnewt");
		if (fabs(dx) < xacc) return rtn;
	}
	nrerror("Maximum number of iterations exceeded in rtnewt");
	return -1.0;
}

double func_bins(double x, double *binctr, int *bincnt, int n, double norm_coef) {

	int i;
	double f, ffin;
	f=0.0;
	for (i=0; i<=n; i++) { f += 1.0* bincnt[i] * exp(x*binctr[i]); } /* sum exponents weighted by counts in each bin */ 
	ffin = (f/(norm_coef*exp(-lambda_u*SMAT_EXPECT))) - 1.0; /* blosum_lambda_al or lambda_u, blosum_mean or SMAT_EXPECT */
	return ffin;
}

void funcbins_d(double x, double *binctr, int *bincnt, int n, double norm_coef, double *f,double *df) {

	int i;
	double func, der, t, c;
	func = der = 0.0;
	for (i=0; i<=n; i++) { 
		/* sum exponents weighted by counts in each bin */ 
		t = 1.0* bincnt[i] * exp(x*binctr[i]); 
		func += t;
		der += binctr[i] * t;
	}

	c = exp(lambda_u*SMAT_EXPECT) * (1.0/ norm_coef);
	*f = func * c - 1.0; 
	*df = der * c; 

}

/* Calculates lambda given a score matrix s[1..20][1..20] and
   background aa frequencies p[1..20], using Newton-Raphson method
*/
double lambu20(double **s, double *p, double low, double hi)
{
	int i,j,k;
	double dx,f,df,fmid,rtn;
	double x1=low, x2=hi, xacc=1e-4;

	/* bracket around zero, identify sign change */
	f=flambu20(x1,s,p, &df);
	fmid=flambu20(x2,s,p, &df);
	while(f*fmid>=0.0){
		x1-=fabs(x1*2.0); x2+=(x2*2.0);
		f=flambu20(x1,s,p, &df);
		fmid=flambu20(x2,s,p, &df);
		fprintf(stderr,"expand x1: %f x2: %f f: %f fmid: %f\n",x1,x2,f,fmid);
	}

	rtn=0.5*(x1+x2);
	for (j=1;j<=JMAX;j++) {
		f = flambu20(rtn,s,p, &df);
		dx=f/df;
		rtn -= dx;
		if ((x1-rtn)*(rtn-x2) < 0.0)
			nrerror("Jumped out of brackets in lambu20");
		if (fabs(dx) < xacc) return rtn;
	}

	nrerror("Maximum number of iterations exceeded in lambu20");
}

double flambu20(double x, double **s, double *p, double *df) {

	int i, j;
	double f, t;
	f = *df = 0.0;
	for (i=1; i<=20; i++) { 
		for (j=1; j<=20; j++) {
			t = p[i]*p[j]*exp( x*s[i][j] );
			f += t;
			*df += s[i][j] * t;
		}
	}

	return f-1.0;
}


/* read matrix from file *fmat */
double  **read_aa_dmatrix(FILE *fmat){
	
int i,ncol,ri,rj,c,flag,j;
int col[31],row[31];
char stri[31];
double t;
double **mat;

mat=dmatrix(0,25,0,25);
for(i=0;i<=25;++i)for(j=0;j<=25;++j)mat[i][j]=0.0;

ncol=0;
i=0;
ri=0;
rj=0;
flag=0;

while( (c=getc(fmat)) != EOF){

if(flag==0 && c=='#'){flag=-1;continue;}
else if(flag==-1 && c=='\n'){flag=0;continue;}
else if(flag==-1){continue;}
else if(flag==0 && c==' '){flag=1;continue;}
else if(flag==1 && c=='\n'){flag=10;  continue;}
else if(flag==1 && c==' '){continue;}
else if(flag==1){
                ++ncol;
                if(ncol>=25){nrerror("matrix has more than 24 columns: FATAL");exit(0);}
                col[ncol]=am2numBZX(c);
                continue;
                }
else if(flag==10 && c!=' ' && c!='#'){
                ri=0;
                ++rj;
                if(rj>=25){nrerror("matrix has more than 24 rows: FATAL");exit(0);}
                row[rj]=col[rj];
                for(i=0;i<=30;++i){stri[i]=' ';}
                stri[0]=c; 
                j=0;
                flag=3;
                continue;
                }
else if (flag==2 && c==' '){for(i=0;i<=30;++i){stri[i]=' ';}j=0;continue;}
else if (flag==2 && c=='\n'){flag=10;continue;}
else if (flag==2){flag=3;stri[j]=c;if(j>30){nrerror("string too long:FATAL");exit(0);}continue;}
else if (flag==3 && c==' ' || flag==3 && c=='\n'){
                        j=0;
                        ++ri;
                        t=atof(stri);
                        mat[row[rj]][col[ri]]=t;
                        if (c=='\n')flag=10;else flag=2;
                        continue;
                        }
else if (flag==3){stri[++j]=c;continue;}

}

for(i=1;i<=ncol;i++) {
		for(j=i+1;j<=ncol;j++) {
			mat[col[i]][col[j]]=mat[col[j]][col[i]];
		}
	}
return mat;
}


void argument()
{
fprintf(stderr,"COMPASS 3.0\n");
fprintf(stderr,"COMPASS is the program for the comparison of two multiple protein alignments\n");
fprintf(stderr,"Arguments:\n\n");
fprintf(stderr,"  -i   First input alignment or single sequence[File in]\n");
fprintf(stderr,"       (ClustalW, Stockholm, FASTA formats, or simple format:\n");
fprintf(stderr,"        <name1> [<start1>] <sequence segment> [<end1>]\n");
fprintf(stderr,"        <name2> [<start2>] <sequence segment> [<end2>]\n");
fprintf(stderr,"        ....\n");
fprintf(stderr,"        Single sequence can also be simply a single or multiple\n");
fprintf(stderr,"        lines, with or without a name:\n");
fprintf(stderr,"        [<name>] <sequence segment>\n"); 
fprintf(stderr,"        [<sequence segment>]\n"); 
fprintf(stderr,"        ....\n");
fprintf(stderr,"\n");
fprintf(stderr,"  -j   Second input alignment or single sequence[File in]\n");
fprintf(stderr,"\n");
fprintf(stderr,"Optional arguments:\n\n");
fprintf(stderr,"  -o    Output file (default = STDOUT) with COMPASS result:\n");
fprintf(stderr,"        List of hits with E-value below cutoff and list of produced alignments\n");
fprintf(stderr,"        (top sequences and consensus sequences (optional) shown in both input alignments):\n");
fprintf(stderr,"        pluses (+) denote matches with positive scores;\n");
fprintf(stderr,"        Capital letters and dashes (-) denote the profile positions\n");
fprintf(stderr,"        that were used for alignment construction.\n");
fprintf(stderr,"        Lower-case letters and dots (.) denote the profile positions with high gap content\n");
fprintf(stderr,"        that were disregarded in the process of alignment construction\n");
fprintf(stderr,"        (they may be aligned with gaps (~) that are not scored).\n");
fprintf(stderr,"        Equal signs (=) denote the gaps introduced in profiles.\n");
fprintf(stderr,"  -n1   Number of top sequences to print out in aligned alignment 1 (query)\n");
fprintf(stderr,"        Default = 1\n");
fprintf(stderr,"  -n2   Number of top sequences to print out in aligned alignment 2 (subject)\n");
fprintf(stderr,"        Default = 1\n");
fprintf(stderr,"  -c    Display consensus sequences for both aligned alignments? (0/1)\n");
fprintf(stderr,"        Default = 1\n");
fprintf(stderr,"  -u    cUt out position matches that are fully gapped in the output sets of sequences\n");
fprintf(stderr,"        for both aligned alignments? (0/1)\n");
fprintf(stderr,"        Default = 1\n");
fprintf(stderr,"  -b    Length of alignment segment in the output format\n");
fprintf(stderr,"        Default = 60\n");
fprintf(stderr,"  -q	Path to the matrix of residue pair frequencies (q_ij)\n");
fprintf(stderr,"        Default = BLOSUM62\n");
fprintf(stderr,"  -g	Threshold of gap content to disregard 'gapped' columns (0.0 to 1.0)\n");
fprintf(stderr,"        Default = 0.5\n");
fprintf(stderr,"  -O	Penalty for gap opening (integer)\n");
fprintf(stderr,"        Default = 10\n");
fprintf(stderr,"  -E    Penalty for gap extension (integer)\n");
fprintf(stderr,"        Default = 1\n");
fprintf(stderr,"  -L    Ungapped lambda for a standard sequence-sequence scoring system\n");
fprintf(stderr,"        used in the score rescaling for profile-profile comparison\n");
fprintf(stderr,"        Default = 0.3176 (ungapped lambda for BLOSUM62)\n");
fprintf(stderr,"  -x    Expected value of individual positional scores for a standard sequence-sequence\n");
fprintf(stderr,"        scoring system; used in the score rescaling for profile-profile comparison\n");
fprintf(stderr,"        Default = expected value for BLOSUM62\n");
fprintf(stderr,"  -z    Database length (including only columns with gap content lower than the threshold (-g))\n");
fprintf(stderr,"        used for E-value calculation\n");
fprintf(stderr,"        Default = length of alignment 2\n");
}


void nrerror(char error_text[]){
fprintf(stderr,"%s\n",error_text);
fprintf(stderr,"FATAL - execution terminated\n");
exit(1);
}


char *cvector(long nl, long nh){
char *v;
v=(char *)malloc((size_t) ((nh-nl+1+NR_END)*sizeof(int)));
if (!v) nrerror("allocation failure in ivector()");
return v-nl+NR_END;
}


int *ivector(long nl, long nh){
int *v;
v=(int *)malloc((size_t) ((nh-nl+1+NR_END)*sizeof(int)));
if (!v) nrerror("allocation failure in ivector()");
return v-nl+NR_END;
}

long *lvector(long nl, long nh){
long int *v;
v=(long int *)malloc((size_t) ((nh-nl+1+NR_END)*sizeof(long int)));
if (!v) nrerror("allocation failure in lvector()");
return v-nl+NR_END;
}

double *dvector(long nl, long nh){
double *v;
v=(double *)malloc((size_t) ((nh-nl+1+NR_END)*sizeof(double)));
if (!v) nrerror("allocation failure in dvector()");
return v-nl+NR_END;
}

char **cmatrix(long nrl, long nrh, long ncl, long nch){
long i, nrow=nrh-nrl+1,ncol=nch-ncl+1;
char **m;
m=(char **)malloc((size_t)((nrow+NR_END)*sizeof(char*)));
if (!m) nrerror("allocation failure 1 in cmatrix()");
m += NR_END;
m -= nrl;

m[nrl]=(char *)malloc((size_t)((nrow*ncol+NR_END)*sizeof(char)));
if (!m[nrl]) nrerror("allocation failure 2 in cmatrix()");
m[nrl] += NR_END;
m[nrl] -= ncl;

for(i=nrl+1;i<=nrh;i++) m[i]=m[i-1]+ncol;

return m;

}

int **imatrix(long nrl, long nrh, long ncl, long nch){
long i, nrow=nrh-nrl+1,ncol=nch-ncl+1;
int **m;
m=(int **)malloc((size_t)((nrow+NR_END)*sizeof(int*)));
if (!m) nrerror("allocation failure 1 in imatrix()");
m += NR_END;
m -= nrl;

m[nrl]=(int *)malloc((size_t)((nrow*ncol+NR_END)*sizeof(int)));
if (!m[nrl]) nrerror("allocation failure 2 in imatrix()");
m[nrl] += NR_END;
m[nrl] -= ncl;

for(i=nrl+1;i<=nrh;i++) m[i]=m[i-1]+ncol;

return m;

}

double **dmatrix(long nrl, long nrh, long ncl, long nch){
long i, nrow=nrh-nrl+1,ncol=nch-ncl+1;
double **m;
m=(double **)malloc((size_t)((nrow+NR_END)*sizeof(double*)));
if (!m) nrerror("allocation failure 1 in dmatrix()");
m += NR_END;
m -= nrl;

m[nrl]=(double *)malloc((size_t)((nrow*ncol+NR_END)*sizeof(double)));
if (!m[nrl]) nrerror("allocation failure 2 in dmatrix()");
m[nrl] += NR_END;
m[nrl] -= ncl;

for(i=nrl+1;i<=nrh;i++) m[i]=m[i-1]+ncol;

return m;
}

void free_ivector(int *v, long nl, long nh)
/* free an int vector allocated with ivector() */
{
	free((FREE_ARG) (v+nl-NR_END));
}

void free_cvector(char *v, long nl, long nh)
/* free an unsigned char vector allocated with cvector() */
{
	free((FREE_ARG) (v+nl-NR_END));
}

void free_dvector(double *v, long nl, long nh)
/* free a double vector allocated with dvector() */
{
	free((FREE_ARG) (v+nl-NR_END));
}



void free_dmatrix(double **m, long nrl, long nrh, long ncl, long nch)
/* free a double matrix allocated by dmatrix() */
{
	free((FREE_ARG) (m[nrl]+ncl-NR_END));
	free((FREE_ARG) (m+nrl-NR_END));
}

void free_imatrix(int **m, long nrl, long nrh, long ncl, long nch)
/* free an int matrix allocated by imatrix() */
{
	free((FREE_ARG) (m[nrl]+ncl-NR_END));
	free((FREE_ARG) (m+nrl-NR_END));
}

void free_cmatrix(char **m, long nrl, long nrh, long ncl, long nch)
/* free a double matrix allocated by dmatrix() */
{
	free((FREE_ARG) (m[nrl]+ncl-NR_END));
	free((FREE_ARG) (m+nrl-NR_END));
}



int am2num(c)
{
switch (c) {
           	 case 'W': case 'w':
                	c=1; break;
           	 case 'F': case 'f':
                	c=2; break;
           	 case 'Y': case 'y':
                	c=3; break;
           	 case 'M': case 'm':
                	c=4; break;
           	 case 'L': case 'l':
                	c=5; break;
           	 case 'I': case 'i':
          		c=6; break;
           	 case 'V': case 'v':
           		c=7; break;
          	 case 'A': case 'a': 
			c=8; break;
           	 case 'C': case 'c':
                	c=9; break;
		 case 'G': case 'g':
			c=10; break;
           	 case 'P': case 'p':
             	 	c=11; break;
       		 case 'T': case 't':
			c=12; break;
	         case 'S': case 's':
			c=13; break;
           	 case 'N': case 'n':
                	c=14; break;
           	 case 'Q': case 'q':
                	c=15; break;
           	 case 'D': case 'd':
                	c=16; break;
           	 case 'E': case 'e':
                	c=17; break;
           	 case 'H': case 'h':
                	c=18; break;
           	 case 'R': case 'r':
                	c=19; break;
           	 case 'K': case 'k':
                	c=20; break;

		/* read special letters as alanine */
                 case 'B': case 'b':
                        c=8; break;
                 case 'J': case 'j':
                        c=8; break;
                 case 'U': case 'u':
                        c=8; break;
                 case 'Z': case 'z':
                        c=8; break;
                 case 'X': case 'x':
                        c=8; break;
           	 default : 
			c=0; 
		}
return (c);
}


int am2numBZX(c)
{
switch (c) {
                 case 'W': case 'w':
                        c=1; break;
                 case 'F': case 'f':
                        c=2; break;
                 case 'Y': case 'y':
                        c=3; break;
                 case 'M': case 'm':
                        c=4; break;
                 case 'L': case 'l':
                        c=5; break;
                 case 'I': case 'i':
                        c=6; break;
                 case 'V': case 'v':
                        c=7; break;
                 case 'A': case 'a':
                        c=8; break;
                 case 'C': case 'c':
                        c=9; break;
                 case 'G': case 'g':
                        c=10; break;
                 case 'P': case 'p':
                        c=11; break;
                 case 'T': case 't':
                        c=12; break;
                 case 'S': case 's':
                        c=13; break;
                 case 'N': case 'n':
                        c=14; break;
                 case 'Q': case 'q':
                        c=15; break;
                 case 'D': case 'd':
                        c=16; break;
                 case 'E': case 'e':
                        c=17; break;
                 case 'H': case 'h':
                        c=18; break;
                 case 'R': case 'r':
                        c=19; break;
                 case 'K': case 'k':
                        c=20; break;
                 case 'B': case 'b':
                        c=21; break;
                 case 'Z': case 'z':
                        c=22; break;
                 case 'X': case 'x':
                        c=23; break;
                 case '*':
                        c=24; break;
                 default :
                        c=0;
                }
return (c);
}


char am2lower(char inchr)
{
char c;
switch (inchr) {
              	 case '-':
                 	c='.'; break;           	 
           	 case 'W': 
                	c='w'; break;
           	 case 'F': 
                	c='f'; break;
           	 case 'Y': 
                	c='y'; break;
           	 case 'M': 
                	c='m'; break;
           	 case 'L': 
                	c='l'; break;
           	 case 'I': 
          		c='i'; break;
           	 case 'V': 
           		c='v'; break;
          	 case 'A':  
			c='a'; break;
           	 case 'C': 
                	c='c'; break;
		 case 'G': 
			c='g'; break;
           	 case 'P': 
             	 	c='p'; break;
       		 case 'T': 
			c='t'; break;
	         case 'S': 
			c='s'; break;
           	 case 'N': 
                	c='n'; break;
           	 case 'Q': 
                	c='q'; break;
           	 case 'D': 
                	c='d'; break;
           	 case 'E': 
                	c='e'; break;
           	 case 'H': 
                	c='h'; break;
           	 case 'R': 
                	c='r'; break;
           	 case 'K': 
                	c='k'; break;
                 case 'B':
                 	c='b'; break;
                 case 'Z':
                 	c='z'; break;
                 case 'X':
                 	c='x'; break; 
		 default :
                        c=inchr;
		}
return (c);
}

char am2upper(char inchr)
{
char c;
switch (inchr) {
              	 case '.':
                 	c='-'; break;           	 
           	 case 'w': 
                	c='W'; break;
           	 case 'f': 
                	c='F'; break;
           	 case 'y': 
                	c='Y'; break;
           	 case 'm': 
                	c='M'; break;
           	 case 'l': 
                	c='L'; break;
           	 case 'i': 
          		c='I'; break;
           	 case 'v': 
           		c='V'; break;
          	 case 'a':  
			c='A'; break;
           	 case 'c': 
                	c='C'; break;
		 case 'g': 
			c='G'; break;
           	 case 'p': 
             	 	c='P'; break;
       		 case 't': 
			c='T'; break;
	         case 's': 
			c='S'; break;
           	 case 'n': 
                	c='N'; break;
           	 case 'q': 
                	c='Q'; break;
           	 case 'd': 
                	c='D'; break;
           	 case 'e': 
                	c='E'; break;
           	 case 'h': 
                	c='H'; break;
           	 case 'r': 
                	c='R'; break;
           	 case 'k': 
                	c='K'; break;
                 case 'b':
                 	c='B'; break;
                 case 'z':
                 	c='Z'; break;
                 case 'x':
                 	c='X'; break; 
		 default :
                        c=inchr;
		}
return (c);
}





int **alignment;



static void *mymalloc(int size);
char *strsave(char *str);
char *strnsave(char *str, int l);
static char **incbuf(int n, char **was);
static int *incibuf(int n, int *was);

void readali_or_seq(char *filename);
int **ali_char2int(char **aseq,int start_num, int start_seq);
int **read_alignment2int(char *filename,int start_num,int start_seq);

double effective_number(int **ali, int *marks, int n, int start, int end);

static void *mymalloc(size)
int size;
{
	void *buf;

	if ((buf = malloc(size)) == NULL) {
		fprintf(stderr, "Not enough memory: %d\n", size);
		exit(1);
	}
	return buf;
}

char *strsave(str)
char *str;
{
	char *buf;
	int l;

	l = strlen(str);
	buf = mymalloc(l + 1);
	strcpy(buf, str);
	return buf;
}

char *strnsave(str, l)
char *str;
int l;
{
	char *buf;

	buf = mymalloc(l + 1);
	memcpy(buf, str, l);
	buf[l] = '\0';
	return buf;
}

static char **incbuf(n, was)
int n;
char **was;
{
	char **buf;
	char *aaa;

	buf = mymalloc((n+1) * sizeof(buf[0]));
	if (n > 0) {
		memcpy(buf, was, n * sizeof(was[0]));
		free(was);
	}
	buf[n] = NULL;
	return buf;
}

static int *incibuf(n, was)
int n, *was;
{
	int *ibuf;

	ibuf = mymalloc((n+1) * sizeof(ibuf[0]));
	
	if (n > 0) {
		memcpy(ibuf, was, n * sizeof(was[0]));
		free(was);
	}
	ibuf[n] = 0;
	return ibuf;
}
void err_readali(int err_num)
{
	fprintf(stderr,"Error with reading alignment: %d\n",err_num);
}



void readali_or_seq(char *filename)

{
	FILE *fp;
	char *s, *ss, *seqbuf;
	char *s1;
	int n, l, len, len0, lenrow=0;
	int ii,mark=1;
	int noname=0, fasta=0, first = 1;
	
	if ((fp = fopen(filename, "r")) == NULL) {
		fprintf(stderr, "No such file: \"%s\"\n", filename);
		err_readali(1);
		flag_errread=1;
		return;
	}
	
	alilen = 0;
	nal = 0;
	n = 0;
		
	if(fgets(str, MAXSTR, fp) != NULL) {
		if(strncmp(str,"CLUSTAL ",8)!=0){rewind(fp);}
					}
	
	while (fgets(str, MAXSTR, fp) != NULL) {
		
		if (*str=='#' || strncmp(str,"//",2) == 0) {continue;}
		if( *str == '>') { /* FASTA name, read 1st word (cut at 30 symbols ) */

			if(fasta) { /* some FASTA sequences are already read */
				if(n==0) {
					alilen=lenrow;
				} else if(lenrow != alilen) {
					fprintf(stderr, "wrong len for %s", aname[n]);
					fprintf(stderr, ", was: %d, now: %d\n", alilen, lenrow);
					err_readali(6);
					flag_errread=1;
					return;
				}
				n++;
				lenrow=0;
			} else {
				fasta = 1;
			}
			first = 1;
			aname = incbuf(n, aname);
			for (ss = str+1; isspace(*ss); ss++) ;
			for(s = ss; !isspace(*s) && *s != '\0'; s++);
			*s = '\0';
			aname[n] = strnsave(ss, 30);
			continue;
		}

		for (ss = str; isspace(*ss); ss++) ;
		if ((ii<=ss-str)&&(mark==0)) {continue;}
		if (*ss == '\0') {
			if (n == 0 || fasta) {
				continue;
			}
			if (nal == 0) {
				if (n == 0) {
					fprintf(stderr, "No alignments read\n");
					err_readali(2);
					flag_errread=1;
					return;
				}
				nal = n;
			} else if (n != nal) {
				fprintf(stderr, "Wrong nal, was: %d, now: %d\n", nal, n);
				err_readali(3); 
				flag_errread=1;
				return;
			}
			n = 0;
			continue;
		}
		for (s = ss; *s != '\0' && !isspace(*s); s++) ;

		/* Check if there is a single stretch of symbols in the line */
		/* if true, add sequence segment to aseq[0] */
		for(s1=s; isspace(*s1); s1++); /*  next spacer */
		if( *s1 == '\0') { /* only one continuous stretch of symbols is in the line */
			noname = 1;
	
			if(n != 0 && !fasta) { /* single sequence, previous chunk is already read with name */ 
				first = 0;
				n=0;
				lenrow=alilen; /* alilen is already non-zero */
			} 
			if(first) { /* first chunk of (potentially) multiple consecutive chunks */
				    /* of the current sequence */
				astart = incibuf(n, astart);
				alen = incibuf(n, alen);
				aseq = incbuf(n, aseq);

				if(! fasta) { /* Not a FASTA format; thus no name was read */
					aname = incbuf(n, aname);
					aname[n] = strnsave("QUERY", 5);
				}
				first = 0;
			}

			/* Calculate chunk length; update full length */
			for (s = ss, len=0; *s != '\0' && !isspace(*s); s++) {
			/*** Calculate len -- the full number of aa and gaps, excluding position numbers in the end ***/
			/* Include all symbols (weird symbols will be considered as gaps in the numerical representation of alignment) */
				len++;
			/* 	if (isalpha(*s) || *s == '-' || *s == '.') len++;
			*/
			}
			lenrow += len;

			/* add the stretch to aseq[n] */
			if (aseq[n] == NULL) {
				aseq[n] = strnsave(ss, len);
			} else {
				seqbuf = mymalloc(lenrow+1);
				memcpy(seqbuf, aseq[n], lenrow-len);
				free(aseq[n]);
				aseq[n] = seqbuf;
				memcpy(seqbuf+lenrow-len, ss, len);
				seqbuf[lenrow] = '\0';
			}

			continue;
		}	


		/* At least two symbol stretches exist in the line; do normal alignment reading */
		*s++ = '\0';
		
		if (nal == 0) {
						
			astart = incibuf(n, astart);
			alen = incibuf(n, alen);
			aseq = incbuf(n, aseq);
			aname = incbuf(n, aname);
			aname[n] = strsave(ss);

		} else {
			if (n < 0 || n >= nal) {
				fprintf(stderr, "Bad sequence number: %d of %d\n", n, nal);
				err_readali(4);  
				flag_errread=1;
				return;
			}
			if (strcmp(ss, aname[n]) != 0) {
				fprintf(stderr, "Names do not match");
				fprintf(stderr, ", was: %s, now: %s\n", aname[n], ss);
				err_readali(5); 
				flag_errread=1;
				return;
			}
		}
		for (ss = s; isspace(*ss); ss++);
		if(mark==1){
		ii = ss-str;
		mark=0;}
				
		for (s = ss; isdigit(*s); s++) ;
		if (isspace(*s)) {
			if (nal == 0) {
				astart[n] = atoi(ss);
			}
			for (ss = s; isspace(*ss); ss++);
		}
		for (s = ss, len=0, l = 0; *s != '\0' && !isspace(*s); s++) {
			if (isalpha(*s)) {
				l++;
			}
		
/*** Calculate len -- the full number of aa and gaps, excluding position numbers in the end ***/			
			/* Include all symbols (weird symbols will be considered as gaps in the numerical representation of alignment) */
			len++;
			/* if (isalpha(*s) || *s == '-' || *s == '.') len++;
			*/
		
		}
		
		if (n == 0) {
			len0 = len;
			alilen += len;
		} else if (len != len0) {
			fprintf(stderr, "wrong len for %s", aname[n]);
			fprintf(stderr, ", was: %d, now: %d\n", len0, len);
			err_readali(6);
			flag_errread=1;
			return;
		}

		alen[n] += l;
		if (aseq[n] == NULL) {
			aseq[n] = strnsave(ss, len);
		} else {
			seqbuf = mymalloc(alilen+1);
			memcpy(seqbuf, aseq[n], alilen-len);
			free(aseq[n]);
			aseq[n] = seqbuf;
			memcpy(seqbuf+alilen-len, ss, len);
			seqbuf[alilen] = '\0';
		}
		n++;
	}
	if(noname) { /* sequence lines with no names were processed (i.e. FASTA or simple single sequence) */

		if(n==0) { /* single sequence was read as a set of consecutive chunks */
				     /* or a single FASTA record */
			alilen=lenrow;
		} else if(fasta && lenrow != alilen) { /* len of the last FASTA sequence differs from previous */
			fprintf(stderr, "wrong len for %s", aname[n]);
			fprintf(stderr, ", was: %d, now: %d\n", alilen, lenrow);
			err_readali(6);
			flag_errread=1;
			return;
		}
		n++;
			
	} 
	

	if (nal == 0) {
		if (n == 0) {
			fprintf(stderr, "No alignments read\n");
			err_readali(7);
			flag_errread=1;
			return;
		}

		nal = n;
	} else if (n != 0 && n != nal) {
		fprintf(stderr, "Wrong nal, was: %d, now: %d\n", nal, n);
		err_readali(8);  
		flag_errread=1;
		return;
	}

	fclose(fp);
}



/*** Print alignment of two alignments (n1 and n2 seqs deep) to file ****/
/* positive[] contains characters (recorded as integers): 
   +'s, spaces, letters (for identical matches) and (potentially) other symbols.
*/
static void printali_ali_positive_char_allstarts(FILE *fpp, int argc, int chunk, int n1, int n2, int len, int **name1, int **name2,
int **seqs1, int **seqs2, int *start1, int *start2, int *positive)
{
        int i, j, k, jj, mlen, str_len;
	int *pos1, *pos2, ch_start, ch_end, cons=0;
        int *sq;
	int *isq;
	char *sqn;
        
	mlen = 0; 
        for (i=0; i < n1; i++) {
		for(k=0; k<NAMELEN && name1[i][k] != '\0'; k++);  
                if (mlen < k) mlen = k;
	}
        for (i=0; i < n2; i++) {
		for(k=0; k<NAMELEN && name2[i][k] != '\0'; k++);
                if (mlen < k) mlen = k;
	}

	pos1 = ivector(0, n1-1);
	for (i=0; i < n1; i++) pos1[i] = start1[i]-1; 

	pos2 = ivector(0, n2-1);
	for (i=0; i < n2; i++) pos2[i] = start2[i]-1; 

        jj = 0;
        do {

		/* Print chunk of the first alignment */
                if (jj > 0) fprintf(fpp, "\n");

                for (i=0; i < n1; i++) {
			cons = 0;

			for(k=0; k<NAMELEN && name1[i][k] != '\0'; k++)  fprintf(fpp, "%c", name1[i][k]);
			if(i==n1-1) {
				if(argc==1) cons=1; /* consensus as bottom sequence in query */
			} 

			str_len = k;
     			for(k=str_len;k<mlen+3;k++) fprintf(fpp," ");

			ch_start = pos1[i]+1;	
			fprintf(fpp, "%-7d", ch_start);

			sq = seqs1[i] + jj;

			for (j=0; j+jj < len && j < chunk; j++) {
				fprintf(fpp, "%c", sq[j]);

				if(cons) { /* consensus as bottom sequence in query */
					/* count alignment positions */
					if( sq[j] != '=' && sq[j] != '~' ) pos1[i]++;
				} else { /* count sequence positions */
					if( isalpha(sq[j]) ) pos1[i]++;
				}

			}
			if(pos1[i]==ch_start-1) ch_end=ch_start;
			else ch_end=pos1[i];
			fprintf(fpp, "  %d\n", ch_end);
                }

		/* Print marks of positive scores and identical matches */
		for(k=0;k<mlen+10;k++) fprintf(fpp," ");
		isq = positive + jj ;
		for (j=0; j+jj < len && j < chunk; j++) fprintf(fpp,"%c", isq[j]); 
		fprintf(fpp, "\n");
			
		/*Print chunk of the second alignment*/
		for (i=0; i < n2; i++) {
			cons = 0;
			for(k=0; k<NAMELEN && name2[i][k] != '\0'; k++)  fprintf(fpp, "%c", name2[i][k]);
			if(i==0) {
				if(argc==1) cons=1; /* consensus as top sequence in subject */
			} 
			str_len = k;
     			for(k=str_len;k<mlen+3;k++) fprintf(fpp," ");

			ch_start = pos2[i]+1;	
			fprintf(fpp, "%-7d", ch_start);

			sq = seqs2[i] + jj;
			for (j=0; j+jj < len && j < chunk; j++) {
				fprintf(fpp, "%c", sq[j]);

				if(cons) { /* consensus as top sequence in subject */
					/* count alignment positions */
					if( sq[j] != '=' && sq[j] != '~' ) pos2[i]++;
				} else { /* count sequence positions */
					if( isalpha(sq[j]) ) pos2[i]++;
				}

			}
			
			if(pos2[i]==ch_start-1) ch_end=ch_start;
			else ch_end=pos2[i];
			fprintf(fpp, "  %d\n", ch_end);
                }

                jj += chunk;

        } while (jj < len);

}


int **ali_char2int(char **aseq, int start_num, int start_seq){
/* fills the alignment ali[start_num..start_num+nal-1][start_seq..start_seq+alilen-1]
convetring charater to integer from aseq[0..nal-1][0..alilen-1]
*/

int i,j,end_num,end_seq;
int **ali;
end_num=start_num+nal-1;
end_seq=start_seq+alilen-1;
ali=imatrix(start_num,end_num,start_seq,end_seq);
for(i=start_num;i<=end_num;++i)for(j=start_seq;j<=end_seq;++j)ali[i][j]=am2num(aseq[i-start_num][j-start_seq]);
return ali;
}

int **read_alignment2int(char *filename,int start_num,int start_seq){
int **ali;
readali_or_seq(filename);

if (flag_errread==1) return;

ali=ali_char2int(aseq,start_num,start_seq);
return ali;
}



/*computes Smith-Waterman local alignment score and returns the
  evalue
  query is the query sequence
  queryLength is the length of query in amino acids
  dbSequence is the sequence corresponding to some matrix profile
  dbLength is the length of dbSequnece
  matrix is the position-specific matrix associated with dbSequence
  gapOpen is the cost of opening a gap
  gapExtend is the cost of extending an exisiting gap by 1 position
  queryEnd returns the final position in the query of an optimal
   local alignment
  dbEnd returns the final position in dbSequence of an optimal
   local alignment
  queryEnd and dbEnd can be used to run the local alignment in reverse
   to find optimal starting positions
  score is used to pass back the optimal score
  kbp holds the Karlin-Altschul paramters
  L holds an intermediate term for E-value computation
  adjustedDbLength is the adjusted database length used for e-value computation
  minGappedK holds the minimum gapped K for all matrices in the
  database, and is used for e-value computation */

 static int SmithWatermanScore(int **score_imatrix,  int queryLength, int dbLength, int gapOpen, int gapExtend, int queryEnd, int dbEnd, int **tracebackDir,
int **flagNewGapQuery, int **flagNewGapDb)
{
   int bestScore; /*best score seen so far*/
   int newScore;  /* score of next entry*/
   int bestQueryPos, bestDbPos; /*position ending best score in
                           query and database sequences*/
   int newGapCost; /*cost to have a gap of one character*/
   int prevScoreNoGapQuery; /*score one row and column up
                               with no gaps*/
   int prevScoreGapQuery;   /*score if a gap already started in query*/
   int continueGapScore; /*score for continuing a gap in dbSequence*/
   int queryPos, dbPos; /*positions in query and dbSequence*/
   score_Vector scoreVector; /*keeps one row of the Smith-Waterman matrix
                           overwrite old row with new row*/
	int RowScore; /*score for match of two positions*/
	int gapDb2NoGap, gapQuery2NoGap, noGap2NoGap, score2NoGap;
	

   scoreVector.noGap = ivector (1,queryLength);
   scoreVector.gapExists = ivector (1,queryLength);
   
   bestQueryPos = 0;
   bestDbPos = 0;
   bestScore = 0;
   
   for (queryPos = 1; queryPos <= queryLength; queryPos++) {
     scoreVector.noGap[queryPos] = 0;
     scoreVector.gapExists[queryPos] = -(gapOpen);
	}

   newGapCost = gapOpen + gapExtend;
   
   for(dbPos = 1; dbPos <= dbLength; dbPos++) {  

     newScore = 0;
     noGap2NoGap = 0;
     prevScoreGapQuery = -(gapOpen);
     
     for(queryPos = 1; queryPos <= queryLength; queryPos++) {
	
	flagNewGapQuery[queryPos][dbPos] = 0;
	flagNewGapDb[queryPos][dbPos] = 0;
	     
	 
       /*testing scores with a gap in DB, either starting a new
         gap or extending an existing gap*/
       
         if ((newScore = newScore - newGapCost) >
	   (prevScoreGapQuery = prevScoreGapQuery - gapExtend)) {
         	prevScoreGapQuery = newScore;
         	flagNewGapQuery[queryPos][dbPos] = 1;
         }

       /*testing scores with a gap in Query, either starting a new
         gap or extending an existing gap*/
         
       if ((newScore = scoreVector.noGap[queryPos] - newGapCost) >
           (continueGapScore = scoreVector.gapExists[queryPos] - gapExtend)) {
         continueGapScore = newScore;
         flagNewGapDb[queryPos][dbPos] = 1;
	   }
        
       /*compute new score extending one position in query and db*/
       RowScore = score_imatrix[queryPos][dbPos]; 
       newScore = noGap2NoGap + RowScore;
       
       if (newScore < 0)
       newScore = 0; /*Smith-Waterman locality condition*/
       
/*** Assign direction for traceback: ***/       
	if (RowScore>0) {tracebackDir[queryPos][dbPos] = 6;}
	 else {	tracebackDir[queryPos][dbPos] = 5;}
	 
       /*test two alternatives*/
/*** Gap in Db ***/
   if (newScore < prevScoreGapQuery) {
         newScore = prevScoreGapQuery;

/**** Determine tracebackDir pointer ******/
         if (flagNewGapQuery[queryPos][dbPos] == 1) { tracebackDir[queryPos][dbPos] = 1;}
          else {tracebackDir[queryPos][dbPos] = 2;}
   }

/*** Gap in Query ***/
   if (newScore < continueGapScore) {
         newScore = continueGapScore;
        
/**** Determine tracebackDir pointer ******/   	
         if (flagNewGapDb[queryPos][dbPos] == 1) {tracebackDir[queryPos][dbPos] = 3;}
         else {tracebackDir[queryPos][dbPos] = 4;}
       }  

       noGap2NoGap = scoreVector.noGap[queryPos]; 
       scoreVector.noGap[queryPos] = newScore;
       scoreVector.gapExists[queryPos] = continueGapScore;

       if (newScore > bestScore) {
         bestScore = newScore;
         bestDbPos = dbPos;
         bestQueryPos = queryPos;
       }

    }

 }
   
   if (bestScore < 0)
     bestScore = 0;
   End1 = bestQueryPos;
   End2 = bestDbPos;

   return (bestScore);
}


/* Traces back the best alignment path using tracebackDir[][], flagNewGapDb[][] and flagNewGapQuery[][];
output is the set of arrays: aligned portions of the aseq... arrays, with gaps inserted,
scores for each position in alignment,
flags for positive matches,
positions in the initial alignments that are aligned
*/  

	void **traceback_outputPos(int start_ali1, int start_ali2, int end_ali1, int end_ali2, int **tracebackDir, int **flagNewGapQuery, int **flagNewGapDb, int *apos1, int *apos2)
{
	int pos1, pos2, posGapped, dir, i, j;
	char **aseqGapTrInt1, **aseqGapTrInt2;
	int *positiveInt, *apos1Int, *apos2Int;
	int gapOpen, gapExtend, newGapCost, colScore, d0, d1, d2, d3;
	int sctrl;
	int jnogp1, jnogp2;
	
	positiveInt = ivector(0,alilen_mat1+alilen_mat2);

	apos1Int = ivector(0,alilen_mat1+alilen_mat2);
	apos2Int = ivector(0,alilen_mat1+alilen_mat2);

	gapOpen = gap_open;
	gapExtend = gap_extend;
	
	sctrl = 0;
	segment_len = 0;
	pos1 = end_ali1;
	pos2 = end_ali2;
	posGapped = alilen_mat1+alilen_mat2;


/*** TraceBackDir: 6 - positive match; 5 - non-positive match; 
1 - previousScoreGapQuery wins, the gap is new; 2- previousScoreGapQuery wins, the gap is extended from existing;
3 - continueGapScore wins, the gap is new; 4- continueGapScore wins, the gap is extended from existing;
***/
 	
	do { 
		dir = tracebackDir[pos1][pos2];
		if (dir==3) {
			
			positiveInt[posGapped]=' ';

			apos1Int[posGapped] = 0;
			apos2Int[posGapped] = apos_filtr2[pos2];
						
			newGapCost = gapOpen + gapExtend;
			colScore = -newGapCost;
			sctrl += colScore;
			if (sctrl < 0) sctrl = 0;
	
			pos2--;
			posGapped--;
			segment_len ++;
			
		}	
		
		if (dir==4) {
			do {
			positiveInt[posGapped]=' ';
			
			apos1Int[posGapped] = 0;
			apos2Int[posGapped] = apos_filtr2[pos2];
			
			colScore = -gapExtend;
			sctrl += colScore;
			if (sctrl < 0) sctrl = 0;			

			pos2--;
			posGapped--;
			segment_len ++;
			
			} while (flagNewGapDb[pos1][pos2]!= 1);
	
			positiveInt[posGapped]=' ';
			
			apos1Int[posGapped] = 0;
			apos2Int[posGapped] = apos_filtr2[pos2];			
                        
			newGapCost = gapOpen + gapExtend;
		        colScore = -newGapCost;
		         
			sctrl += colScore;
			if (sctrl < 0) sctrl = 0;			
		
			pos2--;
			posGapped--;
			segment_len ++;
		
		}	
	
		if (dir==1) {
			
			positiveInt[posGapped]=' ';

			apos1Int[posGapped] = apos_filtr1[pos1];
			apos2Int[posGapped] = 0;			

			newGapCost = gapOpen + gapExtend;
		        colScore = -newGapCost;
         
			sctrl += colScore;
			if (sctrl < 0) sctrl = 0;			
		
			pos1--;
			posGapped--;
			segment_len ++;
		}
		
		if (dir==2) {
		do {
							
			positiveInt[posGapped]=' ';

			apos1Int[posGapped] = apos_filtr1[pos1];
			apos2Int[posGapped] = 0;
							
			colScore = -gapExtend;
		
			sctrl += colScore;
			if (sctrl < 0) sctrl = 0;			
			
			pos1--;
			posGapped--;
			segment_len ++;
					
		} while (flagNewGapQuery[pos1][pos2] != 1 && sctrl<score);
		
			positiveInt[posGapped]=' ';
			
			apos1Int[posGapped] = apos_filtr1[pos1];
			apos2Int[posGapped] = 0;			
                        
		        newGapCost = gapOpen + gapExtend;
		        colScore = -newGapCost;
                        
			sctrl += colScore;
			if (sctrl < 0) sctrl = 0;			
			
			pos1--;
			posGapped--;
			segment_len ++;

		}		
	
		if (dir==5) {
			
			positiveInt[posGapped]=' ';
			
			apos1Int[posGapped] = apos_filtr1[pos1];
			apos2Int[posGapped] = apos_filtr2[pos2];
			
			colScore = score_imatrix[pos1][pos2];

			sctrl += colScore;
			if (sctrl < 0) sctrl = 0;			

			pos2--;
			pos1--;
			posGapped--;
			
			segment_len++;
		}
		
		if (dir==6) {
			
			positiveInt[posGapped]='+';
			
			apos1Int[posGapped] = apos_filtr1[pos1];
			apos2Int[posGapped] = apos_filtr2[pos2];
			
			colScore = score_imatrix[pos1][pos2];


			sctrl += colScore;
			if (sctrl < 0) sctrl = 0;			

			pos2--;
			pos1--;
			posGapped--;
			
			segment_len++;
		}
		

/***	} while ((pos1>=start_ali1) && (pos2>=start_ali2)); ***/
	} while (sctrl<score && pos1>0 && pos2>0); 
	
	posGp = posGapped+1;
	jnogp1 = jnogp2 = 1;
	for (j=posGp;j<posGp+segment_len;j++) {
		positive[j-posGp+1] = positiveInt[j];
		
		apos1[j-posGp+1] = apos1Int[j];
		apos2[j-posGp+1] = apos2Int[j];

	}

	free_ivector(positiveInt, 0,alilen_mat1+alilen_mat2);
	free_ivector(apos1Int, 0,alilen_mat1+alilen_mat2);
	free_ivector(apos2Int, 0,alilen_mat1+alilen_mat2);

}



double ScoreForTwoRows_smat3_21(int pos1, int pos2)
{
	int i, k1, k2;
	double s;
	double ngap1, ngap2, g1, g2;
	double comp1, comp2;
	double s1,s2;
	s1=s2=0.0;

	comp1 = sum_eff_let1[pos1]-1.0;
	comp2 = sum_eff_let2[pos2]-1.0;
	
	if (abs(comp1)<1e-5 && abs(comp2)<1e-5) {		
		k1=k2=1;
		for (i=1;i<=20;i++) {
			if (n_effAa1[pos1][i]!= 0.0) k1=i;
			if (n_effAa2[pos2][i]!= 0.0) k2=i;
		}

		s = smatrix[k1][k2];
	} else {
		for (i=1;i<=20;i++) {

		s1 +=  n_effAa1[pos1][i]*lnqp2[pos2][i];
		s2 +=  n_effAa2[pos2][i]*lnqp1[pos1][i];
		
		}

/*** Do normalization of s: ***/
		s = s1*(sum_eff_let2[pos2]-1)/sum_eff_let1[pos1] + s2*(sum_eff_let1[pos1]-1)/sum_eff_let2[pos2];
		s= s/(sum_eff_let1[pos1]+sum_eff_let2[pos2]-2);   

	}
	s = s/lambda_u;
	
	return s;
}

double ScoreForTwoRows_smat3_22(int pos1, int pos2)
{
	int i, k1, k2;
	double s;
	double ngap1, ngap2, g1, g2;
	double comp1, comp2;
	s=0.0;

	comp1 = sum_eff_let1[pos1]-1.0;
	comp2 = sum_eff_let2[pos2]-1.0;	
	if (abs(comp1)<1e-5 && abs(comp2)<1e-5) {
		k1=k2=1;
		for (i=1;i<=20;i++) {
			if (n_effAa1[pos1][i]!= 0.0) k1=i;
			if (n_effAa2[pos2][i]!= 0.0) k2=i;
		}
		s = smatrix[k1][k2];
	} else {
		for (i=1;i<=20;i++) {
		s+= n_effAa1[pos1][i]*(sum_eff_let2[pos2]-1)/sum_eff_let1[pos1]*lnqp2[pos2][i] + n_effAa2[pos2][i]*(sum_eff_let1[pos1]-1)/sum_eff_let2[pos2]*lnqp1[pos1][i];
		}

/*** No normalization of s ***/
	}
	s = s/lambda_u;

	return s;
}

double ScoreForTwoRows_smat3_23(int pos1, int pos2)
{
	int i, k1, k2;
	double s;
	double comp1, comp2;

	s=0.0;
	comp1 = sum_eff_let1[pos1]-1.0;
	comp2 = sum_eff_let2[pos2]-1.0;	
	if (fabs(comp1)<1e-5 && fabs(comp2)<1e-5) {
		k1=k2=1;			
		for (i=1;i<=20;i++) {
			if (n_effAa1[pos1][i]!= 0.0) k1=i;
			if (n_effAa2[pos2][i]!= 0.0) k2=i;
		}
		s = smatrix[k1][k2];
		
	} else {
		for (i=1;i<=20;i++) {
/* No division by the opposite sum_eff_let in each of two terms (formula 3_23): */ 
		s+= n_effAa1[pos1][i]*score_term2[pos2][i] + n_effAa2[pos2][i]*score_term1[pos1][i];
		}

/*** Do normalization of s: ***/
		s *= 1.0/(comp1+comp2);   
	}

/*	s *= recipr_lambda_u;
 */
	
	return s;
}


double ScoreForTwoRows_smat3_27(int pos1, int pos2)
{
	int i, k1, k2;
	double s;
	double ngap1, ngap2, g1, g2;
	double comp1, comp2;

	s=0.0;
	comp1 = sum_eff_let1[pos1]-1.0;
	comp2 = sum_eff_let2[pos2]-1.0;	
	if (abs(comp1)<1e-5 && abs(comp2)<1e-5) {
		k1=k2=1;
		for (i=1;i<=20;i++) {
			if (n_effAa1[pos1][i]!= 0.0) k1=i;
			if (n_effAa2[pos2][i]!= 0.0) k2=i;
		}
		s = smatrix[k1][k2];
	} else {
		for (i=1;i<=20;i++) {

/* Simplest scoring formula 3_27 */
		s+= n_effAa1[pos1][i]*lnqp2[pos2][i] + n_effAa2[pos2][i]*lnqp1[pos1][i];
		}

/*** No normalization of s ***/
	}
	s = s/lambda_u;

	return s;
}

double ScoreForTwoRows_smat3_28(int pos1, int pos2)
{
	int i, k1, k2;
	double s;
	double ngap1, ngap2, g1, g2;
	double comp1, comp2;
	s=0.0;

	comp1 = sum_eff_let1[pos1]-1.0;
	comp2 = sum_eff_let2[pos2]-1.0;	
	if (abs(comp1)<1e-5 && abs(comp2)<1e-5) {
		k1=k2=1;
		for (i=1;i<=20;i++) {
			if (n_effAa1[pos1][i]!= 0.0) k1=i;
			if (n_effAa2[pos2][i]!= 0.0) k2=i;
		}
		s = smatrix[k1][k2];
	} else {
		for (i=1;i<=20;i++) {
/* NO normalization and no division by the opposite sum_eff_let in each of two terms (formula 3_28): */ 
		s+= n_effAa1[pos1][i]*(sum_eff_let2[pos2]-1)*lnqp2[pos2][i] + n_effAa2[pos2][i]*(sum_eff_let1[pos1]-1)*lnqp1[pos1][i];
		}
/*** No normalization of s ***/
	}
	s = s/lambda_u;

	return s;
}


void sort(int n, double arr[])
{
	unsigned long i,ir=n,j,k,l=1;
	int jstack=0,*istack;
	double a,temp;

	istack=ivector(1,NSTACK);
	for (;;) {
		if (ir-l < M) {
			for (j=l+1;j<=ir;j++) {
				a=arr[j];
				for (i=j-1;i>=1;i--) {
					if (arr[i] <= a) break;
					arr[i+1]=arr[i];
				}
				arr[i+1]=a;
			}
			if (jstack == 0) break;
			ir=istack[jstack--];
			l=istack[jstack--];
		} else {
			k=(l+ir) >> 1;
			SWAP(arr[k],arr[l+1])
			if (arr[l+1] > arr[ir]) {
				SWAP(arr[l+1],arr[ir])
			}
			if (arr[l] > arr[ir]) {
				SWAP(arr[l],arr[ir])
			}
			if (arr[l+1] > arr[l]) {
				SWAP(arr[l+1],arr[l])
			}
			i=l+1;
			j=ir;
			a=arr[l];
			for (;;) {
				do i++; while (arr[i] < a);
				do j--; while (arr[j] > a);
				if (j < i) break;
				SWAP(arr[i],arr[j]);
			}
			arr[l]=arr[j];
			arr[j]=a;
			jstack += 2;
			if (jstack > NSTACK) nrerror("NSTACK too small in sort.");
			if (ir-i+1 >= j-l) {
				istack[jstack]=ir;
				istack[jstack-1]=i;
				ir=j-1;
			} else {
				istack[jstack]=j-1;
				istack[jstack-1]=l;
				l=i;
			}
		}
	}
	free (istack);
}

