/* A program for taking a (possibly arbitrary) alignment score matrix * and back-calculating the implied target frequencies p_ab. * * Doing this requires solving for a nonzero lambda in: * \sum_ab f_a f_b e^{\lambda s_ab} = 1 * and this is a good excuse to demo two methods of root-finding: * bisection search and the Newton/Raphson method. * * The program is ANSI C, and should compile on any machine with a * C compiler: * % cc -o lambda lambda.c -lm * * You need two datafiles. The lists the background * residue frequencies; it is also used to determine what alphabet * you're using (DNA, 4 letters; or amino acids, 20 letters). The * is the score matrix. The format of this is the same as * any BLAST scoring matrix. (Any letters that aren't in your * backgroundfile are ignored; this lets you read matrices that * include scores for IUPAC ambiguity codes.) * * Example formats: * a : * A 0.25 * C 0.25 * G 0.25 * T 0.25 * * a : * A C G T * A +1 -2 -2 -2 * C -2 +1 -2 -2 * G -2 -2 +1 -2 * T -2 -2 -2 +1 * * See the bottom of this file for amino acid examples you can * cut out and save to files. * * To run the program: * % ./lambda * * The program prints the implied target frequencies for the matrix, * and some other statistics. * *-------------------------------------------------------------------- * * Organization of the code: * * section 1: main(), the main body of the program. * * section 2: The root-finding methods, bisection() and * newtonraphson(). Look here for an example of * how these methods work. * * section 3: output routines. Boring. But if you want to * hack something else into the output, either hack * main(), or hack one of the routines here. * * section 4: a couple of allocation routines. * * section 5: file format parsers. Equally boring. But if you * are having trouble with file formats, look here * for more format documentation. * * section 6: example file formats. Copies of the BLOSUM62 score * matrix, and the background frequencies used in * constructing BLOSUM62. * *-------------------------------------------------------------------- * * SRE, Sat Jun 26 09:47:59 2004 * This code is explicitly assigned to the public domain. You may * reuse it as you wish. * CVS $Id: lambda.c,v 1.7 2004/07/01 13:44:33 eddy Exp $ */ #include #include #include #include #include #define MAXABET 20 /* sets maximum alphabet size */ #define TOLERANCE 1e-6 /* how close to 0 root-finders must get */ /* Function declarations (in order of appearance) */ static float val(float **s, float *f, int K, float lambda); static float deriv(float **s, float *f, int K, float lambda); static float bisection(float **s, float *f, int K); static float newtonraphson(float **s, float *f, int K); static void output_alphalabeled_matrix(float **mx, int K, char *alphabet); static void output_alphalabeled_vector(float *f, int K, char *alphabet); static float **FMX2Alloc(int rows, int cols); static void FMX2Free(float **mx); static int ParseBackgroundFile(FILE *bfp, char *alphabet, int *ret_K, float *fq); static int ParseScoreMatrixFile(FILE *mfp, char *alphabet, int K, float **mx); int main(int argc, char **argv) { char *backgroundfile; char *matrixfile; FILE *fp; char alphabet[MAXABET+1]; /* alphabet order (e.g. ACGT) */ int K; /* size of alphabet (4 or 20) */ int a, b; /* indices of two residues */ float lambda; /* the lambda we've solved for */ float **s; /* score matrix s_ab */ float *f; /* background frequencies f_a, f_b */ float **pab; /* solved target frequencies p_ab */ float x; /* generic single number solution */ float *vec; /* generic vector solution */ float **mx; /* generic matrix solution */ /* Parse the command line. */ if (argc != 3) { fprintf(stderr, "Usage: ./lambda \n"); exit (1); } backgroundfile = argv[1]; matrixfile = argv[2]; /* Allocations. */ if ((f = malloc(sizeof(float) * MAXABET)) == NULL) { fprintf(stderr, "malloc failed\n"); exit(1); } if ((s = FMX2Alloc(MAXABET, MAXABET)) == NULL) { fprintf(stderr, "malloc failed\n"); exit(1); } if ((pab = FMX2Alloc(MAXABET, MAXABET)) == NULL) { fprintf(stderr, "malloc failed\n"); exit(1); } if ((vec = malloc(sizeof(float) * MAXABET)) == NULL) { fprintf(stderr, "malloc failed\n"); exit(1); } if ((mx = FMX2Alloc(MAXABET, MAXABET)) == NULL) { fprintf(stderr, "malloc failed\n"); exit(1); } /* Read in the background frequencies. * This also sets K (alphabet size; 4 or 20) and * the order of the alphabet (maybe ACGT or ACDEFGHIKLMNPQRSTVWY). */ if ((fp = fopen(backgroundfile, "r")) == NULL) { fprintf(stderr, "Failed to open background file %s for reading\n", backgroundfile); exit(1); } if (! ParseBackgroundFile(fp, alphabet, &K, f)) { fprintf(stderr, "Failed to parse background file %s\n", backgroundfile); exit(1); } fclose(fp); printf("Your input background frequencies:\n"); output_alphalabeled_vector(f, K, alphabet); printf("\n"); /* Read in the scoring matrix, which must be KxK. * Print it out (so user can make sure it's what they thought.) */ if ((fp = fopen(matrixfile, "r")) == NULL) { fprintf(stderr, "Failed to open matrix file %s for reading\n", matrixfile); exit(1);} if (! ParseScoreMatrixFile(fp, alphabet, K, s)) { fprintf(stderr, "Failed to parse matrix file %s\n", matrixfile); exit(1); } fclose(fp); printf("Your input scoring matrix:\n"); output_alphalabeled_matrix(s, K, alphabet); printf("\n"); /* Verify the two necessary conditions for the matrix: * 1. There must be at least one positive score. * 2. The expected score must be negative. */ for (a = 0; a < K; a++) for (b = 0; b < K; b++) if (s[a][b] > 0.) goto HAVE_POSITIVE; fprintf(stderr, "Score matrix has no positive score: can't solve lambda\n"); exit(1); HAVE_POSITIVE: /* one of few excuses for a goto: break out of nested loop. */ x = 0.; for (a = 0; a < K; a++) for (b = 0; b < K; b++) x += f[a] * f[b] * s[a][b]; if (x >= 0.) { fprintf(stderr, "Score matrix has nonnegative expectation: can't solve lambda\n"); exit(1); } /* Find lambda by Newton/Raphson algorithm. * Replace w/ bisection(s,f,K) if you want to play with the other * rootfinding method. */ lambda = newtonraphson(s, f, K); printf("Solve for lambda (Newton/Raphson method):\n"); printf("lambda = %f\n", lambda); printf("f(lambda) = %f ~= 0\n", val(s, f, K, lambda)); printf("\n"); /* Use lambda to backcalculate target frequencies p_ab: * p_ab = f_a f_b e^{lambda s_ab} */ for (a = 0; a < K; a++) for (b = 0; b < K; b++) pab[a][b] = f[a]*f[b]* exp(lambda * s[a][b]); printf("The implied target probabilities p_ab:\n"); output_alphalabeled_matrix(pab, K, alphabet); printf("\n"); /* The marginal probabilities of residues p_a in * the query sequence: not necessarily the same as * the background frequencies. * p_a = \sum_b p_ab */ for (a = 0; a < K; a++) { vec[a] = 0.; for (b = 0; b < K; b++) vec[a] += pab[a][b]; } printf("The implied marginal query composition p_a:\n"); output_alphalabeled_vector(vec, K, alphabet); printf("\n"); /* The conditional probabilities p(b | a) for * probability of target residue b given query residue a: * p(b | a) = p_ab / p_a */ for (a = 0; a < K; a++) for (b = 0; b < K; b++) mx[a][b] = pab[a][b] / vec[a]; printf("Implied conditional probabilities p(b|a):\n"); output_alphalabeled_matrix(mx, K, alphabet); printf("\n"); /* Find the expected score of the score matrix: * \sum_ab f_a f_b s_ab * Report in bits: so, x lambda / log 2. * (We already made sure this was negative, but now that we * know lambda, we can report it in units of bits.) * * Note: for BLOSUM62, result is -.46 bits; but header of BLOSUM62 * file says expect = -.52. Discrepancy arises from a roundoff * error. matblas (the program that constructed BLOSUM62) calculated * the expected score using an unrounded version of s_ab, but later * scaled and rounded to integers to get the BLOSUM62 we know and * love. Same applies for discrepancy in lambda: should be .3466 * (log 2/2), but the actual integer BLOSUM62 gives .3208. The * original unrounded blosum62.sij can be found on the Net. */ x = 0.; for (a = 0; a < K; a++) for (b = 0; b < K; b++) x += f[a] * f[b] * s[a][b]; x *= lambda / log(2.); printf("Expected score = %.4f bits\n", x); /* Find the relative entropy (average score in true alignments) * H = \sum_ab p_ab s_ab * Report in bits. */ x = 0.; for (a = 0; a < K; a++) for (b = 0; b < K; b++) x += pab[a][b] * s[a][b]; x *= lambda / log(2.); /* convert to bits */ printf("Relative entropy = %.4f bits\n", x); /* Find the expected % sequence identity in homologous * alignments: */ x = 0.; for (a = 0; a < K; a++) x += pab[a][a]; printf("Target %% identity = %.1f\n", x*100.); FMX2Free(s); FMX2Free(pab); FMX2Free(mx); free(f); free(vec); exit(0); } /* ================================================================ * Section 2: Solving for lambda with rootfinding methods. * ================================================================ * * We're looking for the nonzero \lambda that satisfies: * \sum_ab f_a f_b e^{\lambda s_ab} - 1 = 0 * * And if s_ab has at least one positive score, and has a negative * expectation, there is guaranteed to be one and only one nonzero * solution. * * This is a rootfinding problem (find x that satisfies f(x) = 0). * It sets up a nice, simple example for seeing how the bisection and * Newton/Raphson methods work. * * val() calculates f(lambda) for a given lambda. This is the * function that we're trying to find a root for. * * deriv() is the first derivative f'(lambda) at a given lambda. * We need first derivative info for the Newton/Raphson * method. * * bisection() implements the bisection search, and * newtonraphson() implements Newton/Raphson. * * * A good general reference on numerical root finding methods is * _Numerical Recipes in C_, WH Press et al., Cambridge Univ Press 1993. */ /* Calculate and return f(lambda): \sum_ab f_a f_b e^{lambda s_ab} - 1 */ static float val(float **s, float *f, int K, float lambda) { int a,b; float total = -1.; for (a = 0; a < K; a++) for (b = 0; b < K; b++) total += f[a] * f[b] * exp(lambda * s[a][b]); return total; } /* First derivative is: \sum_ab f_a f_b s_ab e^{lambda s_ab} */ static float deriv(float **s, float *f, int K, float lambda) { int a,b; float deriv = 0.; for (a = 0; a < K; a++) for (b = 0; b < K; b++) deriv += f[a] * f[b] * s[a][b] * exp(lambda * s[a][b]); return deriv; } /* The bisection method. * * The key idea of bisection is this: you bracket the root, finding an * x1 for which f(x1) < 0, and an x2 for which f(x2) > 0 -- so the * solution where f(x)=0 must lie in the interval (x1,x2). (Here, we * also happen to know that x1 < x2; f(x) is an increasing function * around the root we care about.) Then you guess a point x3 in * between, x1 < x3 < x2. If f(x3) > 0, then the solution is still to * our left; we reset the bracketed interval to (x1,x3). If f(x3) < 0, * the solution is to our right, and we reset the interval to * (x3,x2). We iterate this until we reach a suitably close f(x) ~ 0. * * Bisection is very robust; it is guaranteed to find at least one * root, if at least one root exists. * * Numerically, we can't expect to reach f(x) = 0 exactly. * We may reach two "adjacent" x's in the computer's floating point * representation that bracket the root. We can either check * that f(x) is "close enough" (within some TOLERANCE, say 1e-5, of 0) * or we can directly test that x1 and x2 are adjacent floating * point numbers: if (x1+x2)/2 = x2 || (x1+x2)/2 == x2, this is * the case. We use both tests here. */ static float bisection(float **s, float *f, int K) { float initial_guess; float left_lambda, right_lambda, mid_lambda; float fx; int iter; /* Our initial guess can be anything, but it doesn't * hurt to be close. */ initial_guess = 0.2; /* Find lambda that gives a result f(lambda) < 0; * this is the left side of our initial bracket. */ left_lambda = initial_guess; for (iter = 0; iter < 100; iter++) { if (val(s, f, K, left_lambda) < 0) break; left_lambda /= 2.; } if (iter == 100) { fprintf(stderr, "Failed to find left bracket\n"); exit(1); } /* Find lambda that gives a result of f(lambda) > 0; * this is the right side of our initial bracket. */ right_lambda = initial_guess; for (iter = 0; iter < 100; iter++) { if (val(s, f, K, right_lambda) > 0) break; right_lambda *= 2.; } if (iter == 100) { fprintf(stderr, "Failed to find right bracket\n"); exit(1); } /* Now, the iterative bisection search. */ for (iter = 0; iter < 100; iter++) { mid_lambda = (left_lambda + right_lambda) / 2; /* check if interval is as tight as numerically possible: */ if (mid_lambda == left_lambda || mid_lambda == right_lambda) break; fx = val(s, f, K, mid_lambda); /* check if we're suitably close to f(lambda) == 0 */ if (fabs(fx) < TOLERANCE) break; /* narrow the bracket: */ if (fx > 0) right_lambda = mid_lambda; else left_lambda = mid_lambda; } if (iter == 100) { fprintf(stderr, "Bisection search failed\n"); exit(1); } return mid_lambda; } /* The Newton/Raphson method. * * The basic idea is that for any guess of x, the first derivative f'x * should tell us which direction the root is in; and we can even make * a good guess where, by assuming f(x) is a straight line defined by * the slope f'(x). Our new guess for x is x - f(x)/f'(x). (If you * don't see why, draw the situation on a graph; it should be easy to * see geometrically.) We can iterate this until we find the root. * * Newton/Raphson is not robust in general; there are pathological * f(x)'s that make it blow up. We have to be careful that our * f(x) is amenable to Newton/Raphson. Here, f(lambda) is well behaved. * The only trick is that we have two roots (one of them is lambda=0, * and one of them is the lambda we want to find), so we don't * want Newton/Raphson to find the lambda=0 solution. It's sufficient * to make sure that we start with an initial guess to the right * of our desired root, where f(lambda) > 0. * * Again, numerical convergence must be tested, finding when * f(lambda) is within TOLERANCE (1e-5 or so) of 0. */ static float newtonraphson(float **s, float *f, int K) { float lambda, newlambda; int iter; float fx, dfx; /* There's two zeros, not one: lambda=0 is a solution. * To find the nonzero zero (ahem), we've got to make sure * we start to the right of it: where the function is positive. * Start with anything, then move right until fx > 0. */ lambda = 0.1; for (iter = 0; iter < 100; iter++) { if (val(s, f, K, lambda) > 0.) break; lambda *= 2.; } if (iter == 100) { fprintf(stderr, "failed to find a start pt for newton/raphson\n"); exit(1); } /* Now, starting from there, come back in with the Newton/Raphson * algorithm. At least in theory, because we know what f(x) looks * like, this should be well-behaved, smoothly converging to the * solution. */ for (iter = 0; iter < 100; iter++) { fx = val(s, f, K, lambda); if (fabs(fx) < TOLERANCE) break; /* success! */ dfx = deriv(s, f, K, lambda); newlambda = lambda - fx / dfx; /* This update defines Newton/Raphson */ if (newlambda <= 0) newlambda = 0.000001; /* this shouldn't happen */ lambda = newlambda; } if (iter == 100) { fprintf(stderr, "Newton/Raphson search failed\n"); exit(1); } return lambda; } /* ================================================================ * Section 3: Output routines. * ================================================================ */ /* Output an alphabet-labeled square matrix of floating point #'s. */ static void output_alphalabeled_matrix(float **mx, int K, char *alphabet) { int a,b; printf(" "); for (b = 0; b < K; b++) printf(" %c ", alphabet[b]); printf("\n"); for (a = 0; a < K; a++) { printf("%c ", alphabet[a]); for (b = 0; b < K; b++) printf("%8.4f ", mx[a][b]); printf("\n"); } } /* Output an alphabet-labeled vector of floating point #'s. */ static void output_alphalabeled_vector(float *f, int K, char *alphabet) { int a; printf(" "); for (a = 0; a < K; a++) printf(" %c ", alphabet[a]); printf("\n"); printf(" "); for (a = 0; a < K; a++) printf("%8.4f ", f[a]); printf("\n"); } /* ================================================================ * Section 4: Allocation routines. * ================================================================ */ static float ** FMX2Alloc(int rows, int cols) { float **mx; int r; if ((mx = (float **) malloc(sizeof(float *) * rows)) == NULL) return NULL; if ((mx[0] = (float *) malloc(sizeof(float) * rows * cols)) == NULL) { free(mx); return NULL; } for (r = 1; r < rows; r++) mx[r] = mx[0] + r*cols; return mx; } static void FMX2Free(float **mx) { free(mx[0]); free(mx); } /* ================================================================ * Section 5: File parsers. * ================================================================ */ /* Function: ParseBackgroundFile() * Incept: SRE, Tue Jun 22 15:03:47 2004 [St. Louis] * * Purpose: Parse a file of background residue frequencies. * * File format: * Blank lines are ignored. Lines w/ # as their * first non-whitespace are comments, and are also * ignored. * * Expects lines in format: * * * Example: * ----------------------- * A 0.33 * C 0.17 * G 0.17 * T 0.33 * ----------------------- * * This gets parsed into fq[], in the residue * order specified by alphabet. The parser also * stores the alphabet[], and the size of the * alphabet K. * * Example: * ----------------------- * float fq[4]; * char alphabet[MAXABET+1]; * int K; * FILE *bfp; * * bfp = fopen("datafile", "r"); * ParseBackgroundFile(bfp, alphabet, &K, fq); * fclose(bfp); * ----------------------- * * after this, fq[0] is f_A; fq[1] is f_C; etc; * K=4; and alphabet="ACGT". * * Args: bfp - open file ptr for reading. * alphabet - RETURN: residue alphabet, in order of storage in fq; * caller allocates for MAXABET+1, MAXABET is the maximum * alphabet size (20). * ret_K - RETURN: # of residues in alphabet * fq - RETURN: frequencies, 0..K-1 * Caller allocates this, and is responsible * for free'ing it. * * Returns: 1 on success; 0 on failure. * * Xref: STL8 p.50 */ static int ParseBackgroundFile(FILE *bfp, char *alphabet, int *ret_K, float *fq) { char buffer[512]; /* input buffer for lines from fp */ char *s1; /* tmp ptr into buffer, field 1 (residue) */ char *s2; /* tmp ptr into buffer, field 2 (frequency) */ int i; int n; float sum; n = 0; while (fgets(buffer, 512, bfp) != NULL) { if ((s1 = strtok(buffer, " \t\n")) == NULL) continue; /* blank */ if (*s1 == '#') continue; /* comment*/ if ((s2 = strtok(NULL, " \t\n")) == NULL) return 0; /* oops, eol*/ if (n >= MAXABET) return 0; /* out of space */ alphabet[n] = *s1; fq[n] = atof(s2); n++; } alphabet[n] = '\0'; *ret_K = n; /* Make sure it looks like a remotely plausible frequency * distribution (it should sum to one); if it's at all close, * normalize it. */ sum = 0.; for (i = 0; i < n; i++) sum += fq[i]; if (sum < 0.9 || sum > 1.1) return 0; for (i = 0; i < n; i++) fq[i] /= sum; return 1; } /* Function: ParseScoreMatrixFile() * Incept: SRE, Tue Jun 22 13:17:16 2004 [St. Louis] * * Purpose: Takes an open score matrix file, and parses * it into an allocated matrix "mx", in the * order given by symbols in "alphabet". * * Scores for symbols other than those in "alphabet" * are ignored. (For instance, score matrices often * include scores for IUPAC ambiguity codes and '*', * and maybe we only want to keep unambiguous standard * symbols like ACGT.) * * Allows up to 30 rows/cols in the matrix. * * Format of datafile: * Lines starting with '#' as their first non-blank * character are considered to be comments, and are * ignored. Blank lines are also ignored. * * First data line is a header with residue symbols, * labeling the columns. The number of symbols in * the score matrix is determined from this line. * * Next "nsymbols" valid lines are the data lines * of the matrix. They contain either nsymbols or * nsymbols+1 fields, because they are allowed to * have a leading residue label for the row (this * label, if present, is ignored). These rows * must be in the same order that the columns were * in. * * Example: * * -------------------------------- * A C G T * * A 4 -2 -2 -2 -4 * C -2 4 -2 -2 -4 * G -2 -2 4 -2 -4 * T -2 -2 -2 4 -4 * * -4 -4 -4 -4 -4 * -------------------------------- * * This has nsymbols=5. * * Example of parsing it is: * * --------------------------------- * float mx[4][4]; * FILE *mfp; * * mfp = fopen("datafile", "r"); * ParseScoreMatrixFile(mfp, "ATGC", 4, mx); * fclose(mfp); * --------------------------------- * * now mx[1][1] is the score for T/T; * mx[2][2] is the score for G/G; etc. * (that is, the rows/cols are in the order * specified by "ATGC", rearranged from how * they were in the file.) * * Args: mfp - open score matrix file for reading; * Caller must open; caller must close. * alphabet - which letters are to be parsed, in what order; * e.g. "ACGT" * K - # of residues in the parsed alphabet. * mx - RETURN: the score matrix. * Caller must allocate this KxK; and caller * is responsible for free'ing it. * * Returns: 1 on success; 0 on failure. * * Xref: STL8 p.50. */ static int ParseScoreMatrixFile(FILE *mfp, char *alphabet, int K, float **mx) { char buffer[512]; /* input buffer for lines from fp */ int order[30]; /* order of fields, from header */ char *sptr; /* tmp ptr into the buffer */ char *s2; /* tmp ptr into alphabet */ int nsymbols; /* total # of symbols in the mx */ int row,col; /* counters for rows, columns */ int i,n; for (i = 0; i < 30; i++) order[i] = -1; /* Look for the first non-blank, non-comment line in the file. * It gives single-letter codes in the order the PAM matrix * is arrayed in the file. * (fgets() and strtok() are both deprecated in real C * applications; only used here for a simple ANSI C example.) */ do { if (fgets(buffer, 512, mfp) == NULL) return 0; } while ((sptr = strtok(buffer, " \t\n")) == NULL || *sptr == '#'); /* Parse that line; figure out which rows/columns we're going * to store, and in what order. (This lets the caller request only * unambiguous, standard codes like a 4x4 matrix indexed for "ACGT", * even if the matrix file contains scores for IUPAC ambiguity * codes, and/or w/ symbols in a different order.) */ i = 0; do { s2 = strchr(alphabet, *sptr); /* is this a residue we'll store? */ if (s2 != NULL) order[i] = (int) (s2-alphabet); /* yes */ i++; } while ((sptr = strtok(NULL, " \t\n")) != NULL); nsymbols = i; /* Doublecheck that we're going to store a KxK matrix. */ n=0; for (i = 0; i < 30; i++) if (order[i] != -1) n++; if (n != K) return 0; /* The next nsymbols rows are the score matrix. * The first field of each may or may not be a letter, depending * on what format the matrix is in; if it's a letter, ignore it. */ row = 0; do { if (fgets(buffer, 512, mfp) == NULL) return 0; /* oops, eof */ if ((sptr = strtok(buffer, " \t\n")) == NULL) continue; /* blank line */ if (*sptr == '#') continue; /* comment */ if (isalpha(*sptr)) { /* skip char */ if ((sptr = strtok(NULL, " \t\n")) == NULL) return 0; /* bad line */ } col = 0; do { /* parse each field on the line */ if (sptr == NULL) return 0; /* oops, eol */ if (order[row] != -1 && order[col] != -1) /* a keeper? */ mx[order[row]][order[col]] = atof(sptr); /* yes. */ sptr = strtok(NULL, " \t\n"); col++; } while (col < nsymbols); row++; } while (row < nsymbols); return 1; } /* ================================================================ * Section 6: Example data files. * ================================================================ */ /* ---------------------------------------------------------------- * Example amino acid score matrix file (BLOSUM62) * Cut and save in your own file. * * ---------------------- ---------------------------------- # Matrix made by matblas from blosum62.iij # * column uses minimum score # BLOSUM Clustered Scoring Matrix in 1/2 Bit Units # Blocks Database = /data/blocks_5.0/blocks.dat # Cluster Percentage: >= 62 # Entropy = 0.6979, Expected = -0.5209 A R N D C Q E G H I L K M F P S T W Y V B Z X * A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4 R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4 N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4 D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4 C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4 Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4 E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4 H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4 I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -4 L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -4 K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4 M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4 F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4 P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4 S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4 T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -4 W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -4 Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -4 V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4 B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4 Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4 * -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1 * ---------------------- ---------------------------------- * */ /* Example background frequency file. * [These were the background frequencies used to construct BLOSUM62; * from blosum62.out, the matblas output file from 1992.] * Cut and save to a file. * ---------------------- ---------------------------------- A 0.074 C 0.025 D 0.054 E 0.054 F 0.047 G 0.074 H 0.026 I 0.068 L 0.099 K 0.058 M 0.025 N 0.045 P 0.039 Q 0.034 R 0.052 S 0.057 T 0.051 V 0.073 W 0.013 Y 0.034 * ---------------------- ---------------------------------- * */