/*** Code compgivenseqs_naccs.c reads the sequence-sequence alignment and compares it to the reference
alignment of these two sequences, using the starting points in the evaluated and the template alignment
as offsets.

Calculates 2 measures of alignment quality: Nacc1 (number of correct matches), Nacc2 (weighted measure for shifted matches)

Print as a tab-delimited line to stdout : Nacc1, Nacc2; 

Modified from compgivenseqs.c : output parameters changed from 6 Qs to 2 Naccs

 ***/


#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <ctype.h>
#include <string.h>
#include <malloc.h>
#include <stddef.h>
/**** #include "nrutil.h" ****/


#define SQUARE(a) ((a)*(a))
#define NUM_METHOD 9
#define MAX_WINDOW 20
#define MAX_DELTASITE 20
#define MAXSTR   10001
#define INDI -100

#define JMAX 40                                         
#define IA 16807
#define IM 2147483647
#define AM (1.0/IM)
#define IQ 127773
#define IR 2836
#define NTAB 32
#define NDIV (1+(IM-1)/NTAB)
#define EPS 1.2e-7
#define RNMX (1.0-EPS)

#define NRANSI
#define SWAP(a,b) temp=(a);(a)=(b);(b)=temp;
#define M 7
#define NSTACK 50

#define LAMB_UNG 0.009925

char *digit="0123456789";
void nrerror(char error_text[]);
char *cvector(long nl, long nh);
int *ivector(long nl, long nh);
double *dvector(long nl, long nh);
char **cmatrix(long nrl, long nrh, long ncl, long nch);
int **imatrix(long nrl, long nrh, long ncl, long nch);
double **dmatrix(long nrl, long nrh, long ncl, long nch);
char **cmatrix(long nrl, long nrh, long ncl, long nch);


int a3let2num(char *let);
int am2num_c(int c);
int am2num(int c);
int am2numBZX(int c);

static void *mymalloc(int size);
char *strsave(char *str);
char *strnsave(char *str, int l);
static char **incbuf(int n, char **was);
static int *incibuf(int n, int *was);

void err_readali(int err_num);
void readali(char *filename);

int **ali_char2int(char **aseq,int start_num, int start_seq);
int **read_alignment2int(char *filename,int start_num,int start_seq);

double n_c;
void **freqIntMaskGaps(int **ali,int nal, int alilen, int **f, int
*effindiarr,double gap_threshold, double gapRegionMin, double *p_comp);

typedef struct _conv_info{
        double **fq1, **fq2, **hfq1, **hfq2, **icfq1, **icfq2;
        char *alifilename1, *alifilename2;
        int alignlen;
	int nali;
	int *ngap1, *ngap2;
	int gapless50;
	double eff_num_seq;
	double *over_all_frq;
	int  *eff_indi_arr1, *eff_indi_arr2;
	double *avc,*csi;
        double ***conv;
            } conv_info;


	int *dbSequence, queryEnd, dbEnd, queryStart,dbStart;
	int gapOpen, gapExtend, dbLength, queryLength;

int score;

void *ReadRef (char *inputfile);
int CompareAlnVsReferenceAln_calculateNacc (int *apos1, int *apos2, int *aposref1, int *aposref2, int start_ref1, int  start_ref2, int end_ref1, int end_ref2 /* , int coverage1, int coverage2, int accuracy1, int accuracy2*/ );

double **blosum;
double **qBlosum;

char **aname, **aname1, **aname2, **aseq, **aseq1, **aseq2;
int nal, nal1, nal2, nalmerge, alilen, alilen1, alilen2, maxalilen,
*astart, *astart1, *astart2, *alen;
int *aposnogp1, *aposnogp2;
int **align_mat1, **align_mat2;
int n_lowgaps, alilen_mat1, alilen_mat2;
char **aseq_mat1, **aseq_mat2;
int segment_len;

int *apos1, *apos2, *aposref1, *aposref2;
int start_ref1, start_ref2, end_ref1, end_ref2, reflen_nogp;
double coverage1, coverage2, falsecov, accuracy1, accuracy2;
double q_modeller, q_developer, q_combined;

int **alignment1, **alignment2;
double **u_oaf,**h_oaf;
char *am="-WFYMLIVACGPTSNQDEHRKBZX*.wfymlivacgptsnqdehrkbzx";
char *am3[]={
"---",
"TRP",
"PHE",
"TYR",
"MET",
"LEU",
"ILE",
"VAL",
"ALA",
"CYS",
"GLY",
"PRO",
"THR",
"SER",
"ASN",
"GLN",
"ASP",
"GLU",
"HIS",
"ARG",
"LYS",
"ASX",
"GLX",
"UNK",
"***"
"...",
};

void argument();

double **score_matrix, *score_matrix_srt;
int **matrix1, **matrix2;

int **count;
int *maskgaps, *maskgapRegion;
double **n_effAa1, **n_effAa2;
double *sum_eff_let1, *sum_eff_let2;

double b = 1.0;
double f= 32.0;

main(int argc, char *argv[])
{

	FILE *fp,*ft;
	conv_info convinfo;
	int i,j,k,l,nt=0;
	int jposnogp1, jposnogp2, jmat;
	int markali[2000];
	char ARG_I[100],ARG_P[50];
	double ARG_G=1.0, ARG_T=1.0;
	int *csv_index;
	
	/*read input arguments */
        if(argc<=2) { argument(); exit(0);}
	for(i=1;i<argc;i++) {
	    if(strcmp(argv[i],"-i")==0) {strcpy(ARG_I,argv[i+1]);i++;continue;}
	    if(strcmp(argv[i],"-p")==0) {strcpy(ARG_P,argv[i+1]);i++;continue;}
				}
	
	if((ARG_G>1.0)||(ARG_G<=0)){fprintf(stderr,"gap percentage(-g) to eliminate a column must be no more than 1 and more than 0 \n");
		    exit(0);}
	
	/* read alignments */
	alignment1=read_alignment2int(ARG_I,1,1);

	if(alignment1==NULL){
		fprintf(stderr, "alignment1 file not readable\n");
			    }

	alilen1 = alilen;
	nal1 = nal;
	astart1 = ivector(0,nal1);
	for (i=0; i<nal1; i++) {astart1[i] = astart[i];}
	
	aname1 = cmatrix(0, nal1, 0, 100);  

	for (i=0; i<nal1; i++) {
		strcpy(aname1[i], aname[i]); 
	}

	aseq1 = cmatrix(0, nal1, 0, alilen1);
	for (j=0;j<alilen1;j++) {
		for (i=0; i<nal1; i++) {aseq1[i][j] = aseq[i][j];}			
	}
				 
	free (astart);
	free (alen);
	free (aseq);
	free (aname);
	
	/* memory allocation for the elements in convinfo */
	convinfo.ngap1=ivector(0,alilen1);
	convinfo.ngap2=ivector(0,alilen2);
	convinfo.eff_indi_arr1=ivector(0,alilen1+1);
	convinfo.eff_indi_arr2=ivector(0,alilen2+1);
	convinfo.fq1=dmatrix(0,20,0,alilen1);
	convinfo.fq2=dmatrix(0,20,0,alilen2);
	convinfo.hfq1=dmatrix(0,20,0,alilen1);
	convinfo.hfq2=dmatrix(0,20,0,alilen2);
	convinfo.icfq1=dmatrix(0,20,0,alilen1);
	convinfo.icfq2=dmatrix(0,20,0,alilen2);
	csv_index=ivector(0,alilen1);
	for (i=0; i<alilen; i++) csv_index[i]=0;

	matrix1 = imatrix(1,alilen1,0,20);
	matrix2 = imatrix(1,alilen2,0,20);

	alilen_mat1 = alilen1;
	
	aseq_mat1 = cmatrix(0,nal1,0,alilen_mat1);
	align_mat1 = imatrix (0,nal1,0,alilen_mat1);
	aposnogp1 = ivector(1, alilen1);
	aposnogp2 = ivector(1, alilen1);

	jmat = 0;
	if (astart1[0]!=0) jposnogp1 = astart1[0]; 
		else jposnogp1 = 1;
			
	if (astart1[1]!=0) jposnogp2 = astart1[1];
		else jposnogp2 = 1; 

	for(j=0;j<alilen1;j++){
				for (i=0; i<nal1; i++){
					aseq_mat1[i][jmat] = aseq1[i][j];
					align_mat1[i+1][jmat+1] = alignment1[i+1][j+1];
				}

				if(aseq_mat1[0][jmat]=='-' || aseq_mat1[0][jmat]=='.') {aposnogp1[jmat+1]=0;}
				else {
					if (islower(aseq_mat1[0][jmat])) {aposnogp1[jmat+1] = 0;}
					else {aposnogp1[jmat+1] = jposnogp1;}
					
					jposnogp1++;
				}

				if(aseq_mat1[1][jmat]=='-' || aseq_mat1[1][jmat]=='.') {aposnogp2[jmat+1]=0;}
				else {

					if (islower(aseq_mat1[1][jmat])) {aposnogp2[jmat+1] = 0;}
					else {aposnogp2[jmat+1] = jposnogp2;}

					jposnogp2++;

				}
				
				jmat++;
	}
							
		ReadRef (ARG_P);
		
		segment_len = alilen_mat1;		
		CompareAlnVsReferenceAln_calculateNacc (aposnogp1, aposnogp2, aposref1, aposref2, start_ref1, start_ref2,  end_ref1, end_ref2 /* coverage1, coverage2, accuracy1, accuracy2 */);
		
		fprintf (stdout, "Nacc1 = %.0f\tNacc2 = %e\n",accuracy1, accuracy2);

	exit(0);
}
	
void argument()
{
fprintf(stderr,"      compgivenseqs   arguments:\n");
fprintf(stderr,"\n");
fprintf(stderr,"  -i    Input alignment file [File in]\n");
fprintf(stderr,"        Format: ClustalW or simple alignment format\n");
fprintf(stderr,"  -p    File with template alignment\n");

}

#define NR_END 1
#define FREE_ARG char*

void nrerror(char error_text[]){
fprintf(stderr,"%s\n",error_text);
fprintf(stderr,"FATAL - execution terminated\n");
exit(1);
}


char *cvector(long nl, long nh){
char *v;
v=(char *)malloc((size_t) ((nh-nl+1+NR_END)*sizeof(int)));
if (!v) nrerror("allocation failure in ivector()");
return v-nl+NR_END;
}


int *ivector(long nl, long nh){
int *v;
v=(int *)malloc((size_t) ((nh-nl+1+NR_END)*sizeof(int)));
if (!v) nrerror("allocation failure in ivector()");
return v-nl+NR_END;
}

/**** DUMP IN FAVOR OF NRUTIL.H ****/

long *lvector(long nl, long nh){
long int *v;
v=(long int *)malloc((size_t) ((nh-nl+1+NR_END)*sizeof(long int)));
if (!v) nrerror("allocation failure in lvector()");
return v-nl+NR_END;
}

double *dvector(long nl, long nh){
double *v;
v=(double *)malloc((size_t) ((nh-nl+1+NR_END)*sizeof(double)));
if (!v) nrerror("allocation failure in dvector()");
return v-nl+NR_END;
}

char **cmatrix(long nrl, long nrh, long ncl, long nch){
long i, nrow=nrh-nrl+1,ncol=nch-ncl+1;
char **m;
m=(char **)malloc((size_t)((nrow+NR_END)*sizeof(char*)));
if (!m) nrerror("allocation failure 1 in cmatrix()");
m += NR_END;
m -= nrl;

m[nrl]=(char *)malloc((size_t)((nrow*ncol+NR_END)*sizeof(char)));
if (!m[nrl]) nrerror("allocation failure 2 in cmatrix()");
m[nrl] += NR_END;
m[nrl] -= ncl;

for(i=nrl+1;i<=nrh;i++) m[i]=m[i-1]+ncol;

return m;

}

int **imatrix(long nrl, long nrh, long ncl, long nch){
long i, nrow=nrh-nrl+1,ncol=nch-ncl+1;
int **m;
m=(int **)malloc((size_t)((nrow+NR_END)*sizeof(int*)));
if (!m) nrerror("allocation failure 1 in imatrix()");
m += NR_END;
m -= nrl;

m[nrl]=(int *)malloc((size_t)((nrow*ncol+NR_END)*sizeof(int)));
if (!m[nrl]) nrerror("allocation failure 2 in imatrix()");
m[nrl] += NR_END;
m[nrl] -= ncl;

for(i=nrl+1;i<=nrh;i++) m[i]=m[i-1]+ncol;

return m;

}


double **dmatrix(long nrl, long nrh, long ncl, long nch){
long i, nrow=nrh-nrl+1,ncol=nch-ncl+1;
double **m;
m=(double **)malloc((size_t)((nrow+NR_END)*sizeof(double*)));
if (!m) nrerror("allocation failure 1 in dmatrix()");
m += NR_END;
m -= nrl;

m[nrl]=(double *)malloc((size_t)((nrow*ncol+NR_END)*sizeof(double)));
if (!m[nrl]) nrerror("allocation failure 2 in dmatrix()");
m[nrl] += NR_END;
m[nrl] -= ncl;

for(i=nrl+1;i<=nrh;i++) m[i]=m[i-1]+ncol;

return m;
}

/*****
char **cmatrix(long nrl, long nrh, long ncl, long nch){
long i, nrow=nrh-nrl+1,ncol=nch-ncl+1;
char **m;
m=(char **)malloc((size_t)((nrow+NR_END)*sizeof(char*)));
if (!m) nrerror("allocation failure 1 in dmatrix()");
m += NR_END;
m -= nrl;

m[nrl]=(char *)malloc((size_t)((nrow*ncol+NR_END)*sizeof(char)));
if (!m[nrl]) nrerror("allocation failure 2 in dmatrix()");
m[nrl] += NR_END;
m[nrl] -= ncl;

for(i=nrl+1;i<=nrh;i++) m[i]=m[i-1]+ncol;

return m;
}
*******/

int am2num(c)
{
switch (c) {
           	 case 'W': case 'w':
                	c=1; break;
           	 case 'F': case 'f':
                	c=2; break;
           	 case 'Y': case 'y':
                	c=3; break;
           	 case 'M': case 'm':
                	c=4; break;
           	 case 'L': case 'l':
                	c=5; break;
           	 case 'I': case 'i':
          		c=6; break;
           	 case 'V': case 'v':
           		c=7; break;
          	 case 'A': case 'a': 
			c=8; break;
           	 case 'C': case 'c':
                	c=9; break;
		 case 'G': case 'g':
			c=10; break;
           	 case 'P': case 'p':
             	 	c=11; break;
       		 case 'T': case 't':
			c=12; break;
	         case 'S': case 's':
			c=13; break;
           	 case 'N': case 'n':
                	c=14; break;
           	 case 'Q': case 'q':
                	c=15; break;
           	 case 'D': case 'd':
                	c=16; break;
           	 case 'E': case 'e':
                	c=17; break;
           	 case 'H': case 'h':
                	c=18; break;
           	 case 'R': case 'r':
                	c=19; break;
           	 case 'K': case 'k':
                	c=20; break;
           	 default : 
			c=0; 
		}
return (c);
}


int am2numBZX(c)
{
switch (c) {
                 case 'W': case 'w':
                        c=1; break;
                 case 'F': case 'f':
                        c=2; break;
                 case 'Y': case 'y':
                        c=3; break;
                 case 'M': case 'm':
                        c=4; break;
                 case 'L': case 'l':
                        c=5; break;
                 case 'I': case 'i':
                        c=6; break;
                 case 'V': case 'v':
                        c=7; break;
                 case 'A': case 'a':
                        c=8; break;
                 case 'C': case 'c':
                        c=9; break;
                 case 'G': case 'g':
                        c=10; break;
                 case 'P': case 'p':
                        c=11; break;
                 case 'T': case 't':
                        c=12; break;
                 case 'S': case 's':
                        c=13; break;
                 case 'N': case 'n':
                        c=14; break;
                 case 'Q': case 'q':
                        c=15; break;
                 case 'D': case 'd':
                        c=16; break;
                 case 'E': case 'e':
                        c=17; break;
                 case 'H': case 'h':
                        c=18; break;
                 case 'R': case 'r':
                        c=19; break;
                 case 'K': case 'k':
                        c=20; break;
                 case 'B': case 'b':
                        c=21; break;
                 case 'Z': case 'z':
                        c=22; break;
                 case 'X': case 'x':
                        c=23; break;
                 case '*':
                        c=24; break;
                 default :
                        c=0;
                }
return (c);
}

static char str[MAXSTR+1];

char **aname, **aseq;
int nal, alilen, *astart, *alen;
int **alignment;



static void *mymalloc(int size);
char *strsave(char *str);
char *strnsave(char *str, int l);
static char **incbuf(int n, char **was);
static int *incibuf(int n, int *was);

void readali(char *filename);
int **ali_char2int(char **aseq,int start_num, int start_seq);
int **read_alignment2int(char *filename,int start_num,int start_seq);


static void *mymalloc(size)
int size;
{
	void *buf;

	if ((buf = malloc(size)) == NULL) {
		fprintf(stderr, "Not enough memory: %d\n", size);
		exit(1);
	}
	return buf;
}

char *strsave(str)
char *str;
{
	char *buf;
	int l;

	l = strlen(str);
	buf = mymalloc(l + 1);
	strcpy(buf, str);
	return buf;
}

char *strnsave(str, l)
char *str;
int l;
{
	char *buf;

	buf = mymalloc(l + 1);
	memcpy(buf, str, l);
	buf[l] = '\0';
	return buf;
}

static char **incbuf(n, was)
int n;
char **was;
{
	char **buf;
	char *aaa;

	buf = mymalloc((n+1) * sizeof(buf[0]));
	if (n > 0) {
		memcpy(buf, was, n * sizeof(was[0]));
		free(was);
	}
	buf[n] = NULL;
	return buf;
}

static int *incibuf(n, was)
int n, *was;
{
	int *ibuf;

	ibuf = mymalloc((n+1) * sizeof(ibuf[0]));
	if (n > 0) {
		memcpy(ibuf, was, n * sizeof(was[0]));
		free(was);
	}
	ibuf[n] = 0;
	return ibuf;
}
void err_readali(int err_num)
{
	fprintf(stderr,"Error with reading alignment: %d\n",err_num);
}



void readali(filename)
char *filename;
{
	FILE *fp;
	char *s, *ss, *seqbuf;
	int n, l, len, len0;
	int ii,mark=1;

	if ((fp = fopen(filename, "r")) == NULL) {
		fprintf(stderr, "No such file: \"%s\"\n", filename);
		err_readali(1);
		;exit(1);
	}
	
	alilen = 0;
	nal = 0;
	n = 0;
	if(fgets(str, MAXSTR, fp) != NULL) {
		if (strncmp(str,"BLAST",5)==0){
			do {
				if (fgets(str, MAXSTR, fp)== NULL) {
					fprintf(stderr, "No alignments read\n");
					err_readali(2);
					exit(1);
				}
			} while (strncmp(str," Identities = ",14)!=0);
		}	
		
		else if (strncmp(str,"CLUSTAL W",9)!=0){rewind(fp);}
	}
					
					
	while (fgets(str, MAXSTR, fp) != NULL) {
		if (*str=='#' || strncmp(str,"//",2) == 0) {continue;}
		for (ss = str; isspace(*ss); ss++) ;
		if ((ii<=ss-str)&&(mark==0)) {continue;}
		if (*ss == '\0') {
			if (n == 0) {
				continue;
			}
			if (nal == 0) {
				if (n == 0) {
					fprintf(stderr, "No alignments read\n");
					err_readali(2);
					exit(1);
				}
				nal = n;
			} else if (n != nal) {
				fprintf(stderr, "Wrong nal, was: %d, now: %d\n", nal, n);
				err_readali(3); exit(1);
			}
			n = 0;
			continue;
		}

		for (s = ss; *s != '\0' && !isspace(*s); s++) ;
		*s++ = '\0';
		if (nal == 0) {
			astart = incibuf(n, astart);
			alen = incibuf(n, alen);
			aseq = incbuf(n, aseq);
			aname = incbuf(n, aname);
			aname[n] = strsave(ss);
		} else {
			if (n < 0 || n >= nal) {
				fprintf(stderr, "Bad sequence number: %d of %d\n", n, nal);
				
				fprintf(stderr, aname[n]);
				
				err_readali(4);  exit(1);
			}
			if (strcmp(ss, aname[n]) != 0) {
				fprintf(stderr, "Names do not match");
				fprintf(stderr, ", was: %s, now: %s\n", aname[n], ss);
				err_readali(5);  exit(1);
			}
		}
		for (ss = s; isspace(*ss); ss++);
		if(mark==1){
		ii = ss-str;
		mark=0;}
		
		for (s = ss; isdigit(*s); s++) ;
		if (isspace(*s)) {
			if (nal == 0) {
				astart[n] = atoi(ss);
			}
			for (ss = s; isspace(*ss); ss++);
		}
		for (s = ss, len=0, l = 0; *s != '\0' && !isspace(*s); s++) {
			if (isalpha(*s)) {
				l++;
			}
		
/*** Calculate len -- the full number of aa and gaps, excluding position numbers in the end ***/			
			
			if (isalpha(*s) || *s == '-' || *s == '.') {
				len++;
			}
		
		
		}
		
/****		len = s - ss;  *************/
		
		
		
		if (n == 0) {
			len0 = len;
			alilen += len;
		} else if (len != len0) {
			fprintf(stderr, "wrong len for %s", aname[n]);
			fprintf(stderr, ", was: %d, now: %d\n", len0, len);
			err_readali(6); exit(1);
		}
		alen[n] += l;
		if (aseq[n] == NULL) {
			aseq[n] = strnsave(ss, len);
		} else {
			seqbuf = mymalloc(alilen+1);
			memcpy(seqbuf, aseq[n], alilen-len);
			free(aseq[n]);
			aseq[n] = seqbuf;
			memcpy(seqbuf+alilen-len, ss, len);
			seqbuf[alilen] = '\0';
		}
		n++;
	}
	if (nal == 0) {
		if (n == 0) {
			fprintf(stderr, "No alignments read\n");
			err_readali(7);  exit(1);
		}
		nal = n;
	} else if (n != 0 && n != nal) {
		fprintf(stderr, "Wrong nal, was: %d, now: %d\n", nal, n);
		err_readali(8);  exit(1);
	}
	fclose(fp);
}


int **ali_char2int(char **aseq, int start_num, int start_seq){
/* fills the alignment ali[start_num..start_num+nal-1][start_seq..start_seq+alilen-1]
convetring charater to integer from aseq[0..nal-1][0..alilen-1]
*/

int i,j,end_num,end_seq;
int **ali;
end_num=start_num+nal-1;
end_seq=start_seq+alilen-1;
ali=imatrix(start_num,end_num,start_seq,end_seq);
for(i=start_num;i<=end_num;++i)for(j=start_seq;j<=end_seq;++j)ali[i][j]=am2num(aseq[i-start_num][j-start_seq]);
return ali;
}

int **read_alignment2int(char *filename,int start_num,int start_seq){
int **ali;
readali(filename);
ali=ali_char2int(aseq,start_num,start_seq);
return ali;
}


int *letters; 


void **freqIntMaskGaps(int **ali,int nal, int alilen, int **f, int
*effindiarr,double gapmax, double gapRegionMin, double *p_comp)
{
	int i,j,k, jnew, effnumind, sumNC, fullCountNogaps;
	int count[21], sum_comp[21];
	letters = ivector(0, alilen+1);
	letters[0]=1;

	/* find the number of frequences at each position */
	effnumind=0;
	sumNC = 0;
	for(i=0;i<=20;i++) sum_comp[i]=0;
	jnew = 0;
	for(j=1;j<=alilen;j++){
		for(i=0;i<=20;i++) count[i]=0;
		for(i=1;i<=nal;i++) {
			if(ali[i][j]<=20) {
				count[ali[i][j]]++;
			}
			else {if(ali[i][j]>25&&ali[i][j]<=45)
				{count[ali[i][j]-25]++;}
			      else {
				fprintf(stderr,"not good number for AA\n");
				fprintf(stderr,"%d", i);
				fprintf(stderr,"\n");
				fprintf(stderr,"%d", j);
				exit(0);
				   }
		}
	}
		/*** Adding to the sum of different symbols in the columns over the alignment, to derive N_C , and to the overall aa counts ***/

 		for (i=0; i<=20; i++) {
			 if(count[i]>0) {
				sumNC++;
				sum_comp[i]+=count[i];
			}

/*			fprintf  (stderr, "%d_%d ",count[i], sumNC);	*/
		}
/*		fprintf (stderr, "\n\n");	*/
				   
/***	   	 f[0][j] = count[0]*1.0/nal;    ***/
	   	f[j][0] = count[0];
		if(f[j][0]>nal) {
			fprintf(stderr,"gap number>total number\n");
			exit(0);
			}
			

/* Eliminate the condition for small enough number of gaps */

/*		if(f[0][j]>=gap_threshold) {   /* ignore the case where gaps occur >= gap_threshold(percentage of gaps)  
			f[0][j]=INDI;
			continue;
				}   */

		effnumind++;
		effindiarr[effnumind]=j;
		count[0]=nal-count[0];
		letters[j] = count[0];
/***		if(count[0]<=0){
			fprintf(stderr, "count[0] less than 0: %d  column = %d\n",count[0],j);
			exit(0);
			       }
***/			
		if(1.0*f[j][0]/nal >= gapmax) maskgaps[j] = 1;
		else {
			maskgaps[j] = 0;
			if(1.0*f[j][0]/nal >= gapRegionMin) maskgapRegion[j] = 1;
			else maskgapRegion[j] = 0;
			jnew++;
			for(k=1;k<=20;k++){
/* Eliminate the division by count[0] - not freqs but counts !!!   */
/*		f[k][j]=count[k]*1.0/count[0];			*/
				f[jnew][k]=count[k];
			}
		}	
	}
	n_lowgaps = jnew;
	n_c = sumNC;
	n_c = n_c/alilen;

	fullCountNogaps = alilen*nal-sum_comp[0];
	for (i=1;i<=20;i++) p_comp[i] = 1.0*sum_comp[i]/fullCountNogaps;
	effindiarr[effnumind+1]=INDI;/*set the last element negative*/
	effindiarr[0]=effnumind;
}			


void *ReadRef (char *inputfile)
{
	int i, pos, pos1, pos2;
	
	readali(inputfile);
	
	aposref1 = ivector (0,alilen);
	aposref2 = ivector (0,alilen);	

	pos = 0;
	pos1 = astart[0]-1;
	pos2 = astart[1]-1;

	for (i=0; i<alilen; i++) {
			if (isalpha(aseq[0][i])) pos1++;
			if (isalpha(aseq[1][i])) pos2++;
			if (isupper(aseq[0][i]) && isupper(aseq[1][i])) {
				if (pos==0) {start_ref1 = pos1; start_ref2 = pos2;}
				pos++;
				aposref1[pos]=pos1;
				aposref2[pos]=pos2;
				end_ref1 = pos1; end_ref2 = pos2;
			}
		}
	
	reflen_nogp = pos;
}

int CompareAlnVsReferenceAln_calculateNacc(int *apos1, int *apos2, int *aposref1, int *aposref2, int start_ref1, int  start_ref2, int end_ref1, int end_ref2 /*, int coverage1, int coverage2, int accuracy1, int accuracy2 */)
{
	int i,j,k;
	int start_ali1, start_ali2, end_ali1, end_ali2;
	int len_common1, len_common2;
/*** Starting/ending positions of intersections	between ali1,2 and ref1,2 : ****/ 	
	int start_common1, end_common1, start_common2, end_common2;
/*** Indexes of elements closest to the starting/ending points of intersections between ali1,2 and ref1,2 : ****/ 	
	int ind_ali1_startc1, ind_ali1_endc1, ind_ali2_startc2, ind_ali2_endc2;
	int ind_ref1_startc1, ind_ref1_endc1, ind_ref2_startc2, ind_ref2_endc2;
	int ind_ref_startc, ind_ref_endc;
	double factor = 0.6931472;
	int num_pairs_evaluated_ali;
	
/*** Calculation of coverage1 and coverage2. 
In parallel to finding common points of starts and ends corresponding to the intersection,
find the indexes in arrays (apos[] and aposref[]) of the elements closest to these points  *****/	

	for (i=1; apos1[i]==0; i++); 
	start_ali1 = apos1[i];
	
	for (i=1; apos2[i]==0; i++);
	start_ali2 = apos2[i];
	
	for (i=segment_len; apos1[i]==0; i--);	
	end_ali1 = apos1[i];
	
	for (i=segment_len; apos2[i]==0; i--);	
	end_ali2 = apos2[i];
	
	if (end_ali1<end_ref1) {
		end_common1 = end_ali1;
		ind_ali1_endc1 = segment_len;
		for (i=reflen_nogp; aposref1[i]>end_common1 && i>1; i--);
		ind_ref1_endc1 = i;
	} else { 
		end_common1 = end_ref1;
		ind_ref1_endc1 = reflen_nogp;
		for (i=segment_len; (apos1[i]>end_common1 || apos1[i]==0) && i>1; i--);
		ind_ali1_endc1 = i;
	}
	
	if (end_ali2<end_ref2) {
		end_common2 = end_ali2;
		ind_ali2_endc2 = segment_len;
		for (i=reflen_nogp; aposref2[i]>end_common2 && i>1; i--);
		ind_ref2_endc2 = i;
	} else { 
		end_common2 = end_ref2;
		ind_ref2_endc2 = reflen_nogp;
		for (i=segment_len; (apos2[i]>end_common2 || apos2[i]==0) && i>1 ; i--);
		ind_ali2_endc2 = i;
	}
	
	if (start_ali1>start_ref1) {
		start_common1 = start_ali1;
		ind_ali1_startc1 = 1;
		for (i=1; aposref1[i]<start_common1 && i<reflen_nogp; i++);
		ind_ref1_startc1 = i;
	} else { 
		start_common1 = start_ref1;
		ind_ref1_startc1 = 1;		
		for (i=1; (apos1[i]<start_common1 || apos1[i]==0) && i<segment_len ; i++);
		ind_ali1_startc1 = i;
	}
	
	if (start_ali2>start_ref2) {
		start_common2 = start_ali2;
		ind_ali2_startc2 = 1;
		for (i=1; aposref2[i]<start_common2 && i<reflen_nogp; i++);
		ind_ref2_startc2 = i;
	} else { 
		start_common2 = start_ref2;
		ind_ref2_startc2 = 1;		
		for (i=1; (apos2[i]<start_common2 || apos2[i]==0) && i<segment_len ; i++);
		ind_ali2_startc2 = i;
	}		

	
	len_common1 = end_common1 - start_common1 + 1;
	len_common2 = end_common2 - start_common2 + 1;
	
	if (len_common1<=0 || len_common2<=0) {coverage1 = coverage2 = accuracy1 = accuracy2 = 0.0; return;}
	

	
	coverage1 = 1.0*(len_common1 + len_common2)/((end_ref1 - start_ref1 + 1) + (end_ref2 - start_ref2 + 1));
	coverage2 = 0.5*len_common1/(end_ref1 - start_ref1 + 1) + 0.5*len_common2/(end_ref2 - start_ref2 + 1);
	falsecov = 1.0*((end_ali1-start_ali1+1) - len_common1 + (end_ali2-start_ali2+1) - len_common2)/ (len_common1+len_common2);	
	
	
/*** Calculation of accuracy1 and 2 ***/

/* Change compared to compgivenali1.c:: to calculate accuracies (acc1 and acc2),
 take not MAXIMAL but MINIMAL region in the reference alignment
that would correspond to the evaluated alignment,
and divide number of correct matches by its length 
*/	
	if (ind_ref1_startc1 > ind_ref2_startc2) ind_ref_startc = ind_ref1_startc1;
	else ind_ref_startc = ind_ref2_startc2;
	
	if (ind_ref1_endc1 < ind_ref2_endc2) ind_ref_endc = ind_ref1_endc1;
	else ind_ref_endc = ind_ref2_endc2;
	
	
	accuracy1 = accuracy2 = 0.0;
/*	Let us do it simpler and search not throught the intersection regions but through all aposref1,2 and 
apos1,2. So let us temporarily change the cycle limits.

	for (i=ind_ref_startc; i<=ind_ref_endc; i++) {
		for (j=ind_ali1_startc1; j<=ind_ali1_endc1; j++) {
*/
	for (i=1; i<=reflen_nogp; i++) {
		for (j=1; j<=segment_len; j++) {

			if (apos1[j]>aposref1[i]) break;
			if (apos1[j] == aposref1[i]) {
/*				for (k=ind_ali2_startc2; k<=ind_ali2_endc2; k++) {
*/
				for (k=1; k<=segment_len; k++) {
					if (apos2[k]>aposref2[i]) break;				
					if (apos2[k] == aposref2[i]) {
						if (k==j) {
							accuracy1 += 1.0;
							 accuracy2 +=1.0;
/**							 fprintf (stderr, "\n match: posref1=%d\n",aposref1[i]); **/
							 }
						else { accuracy2 += exp(-factor*(abs(k-j))); }
					}
				}
			}
		}
	}
	
/*	fprintf (stderr, "\n ind_ref_endc = %d, i=%d, accuracySum1 = %e, accuracySum2 = %e\n", ind_ref_endc, i, accuracy1, accuracy2); */

/* TO DO: find these numbers of positions */
	num_pairs_evaluated_ali = 0;
	for(i=1; i<=segment_len; i++) {
		if(apos1[i] && apos2[i]) { num_pairs_evaluated_ali ++; }
	}
	
	q_modeller = 1.0*accuracy1/num_pairs_evaluated_ali;
	q_developer = 1.0*accuracy1/reflen_nogp;
/* Q_combined = ratio of N correctly aligned pairs to N pairs aligned by either FSSP or evaluated ali */
	q_combined = 1.0*accuracy1/(num_pairs_evaluated_ali + reflen_nogp - accuracy1);
	
}

