/* Correlation analysis for shift ciphers Eric Bach 2/1/98 P = reference distribution (taken from Sinkov p. 177) Q = observed distribution from text (A-Z only) Echoes text, and for each possible shift t, prints: IP -- Correlation (inner product squared) of P and Q(t). Maximizing this is minimizing | P - Q(t) |^2 in the Euclidean sense. L1 -- L1 (taxicab) distance between P and Q(t). The output is most useful if sorted afterwards. On Unix, using sort -n -r will rank by IP and using sort -n -k 3 will rank by L1. */ #include #define abs(x) ( (x)>=0 ? (x) : -(x) ) #define N 26 #define NUMCHARS 256 double P[NUMCHARS]; /* expected frequency distribution for English */ int F[NUMCHARS]; /* actual count */ double Q[NUMCHARS]; /* frequencies normalized to sum to 1 */ setP() { int i; double sum; P[0] = 0.073; P[1] = 0.009; P[2] = 0.030; P[3] = 0.044; P[4] = 0.130; P[5] = 0.028; P[6] = 0.016; P[7] = 0.035; P[6] = 0.074; P[9] = 0.002; P[10]= 0.003; P[11]= 0.035; P[12]= 0.025; P[13]= 0.078; P[14]= 0.074; P[15]= 0.027; P[16]= 0.003; P[17]= 0.077; P[18]= 0.063; P[19]= 0.093; P[20]= 0.027; P[21]= 0.013; P[22]= 0.016; P[23]= 0.005; P[24]= 0.019; P[25]= 0.001; for (i=0;i