\documentclass[11pt]{article}
\include{lecture}
\usepackage{subfigure}
\begin{document}
\lecture{13}{10/4/10}{Factoring Integers}{Mark Wellons}
%\draft
In this lecture, we review order finding and use this to develop a
method for factoring integers efficiently. With the exception of
order finding, none of today's derivations rely on quantum computing.
\section{Order Finding}
In the previous class we covered order finding, which solves the following problem: Given integers $a,M>0$ and $00 $ such that $a^r \equiv 1 \mod M$.
Recall from the previous lecture that we used eigenvalue estimation to developed a quantum procedure that runs in time poly-log$(M + N)$ and returns $\tilde{\omega}_j$ such that $j$ is uniformly distributed in $\{ 1, 2, ... r\}$ and
\begin{equation}\label{13:equ:eigenvalue estimation}
\Pr \left[ \left| \tilde{\omega}_j - \frac{j}{r} \right| \le \frac{1}{N} \right] \ge \frac{8}{\pi^2} \approx 0.81.
\end{equation}
Here $N$ is defined as
\begin{equation}
N = 2^n,
\end{equation}
where $n$ is the number of qubits used in the eigenvalue estimation and with larger $n$ comes greater accuracy. There are two important facts about this procedure that we can exploit.
\subsection{Fact 1}
If the eigenvalue estimation is precise enough that
\begin{equation}\label{13:equ:eigenvalue precision}
\left| \tilde{\omega}_j - \frac{j}{r} \right| \le \frac{1}{2M^2}
\end{equation}
then we can recover $j/r$ in reduced terms in time poly-log$ (M )$. By reduced terms, we mean that we can find $j'$ and $r'$ such that $\gcd(j',r') = 1$ and $j'/r' = j/r$.
To recover $j/r$ in reduced terms, we use continued fraction expansion (CFE). By definition, a continued fraction takes the form
\begin{equation}
a_0 + \frac{b_1}{a_1 + \frac{b_2}{a_2 + \frac{b_3}{\dots} }}
\end{equation}
where $a_i, b_i \in \mathbb{Z}$. A continued fraction is sometimes denoted as
\begin{equation}
\sum_{i=0}^\infty \frac{b_i \rvert}{\lvert a_i}
\end{equation}
and the $k$th convergent is
\begin{equation}\label{13:equ:kth convergence}
\sum_{i=0}^k \frac{b_i \rvert}{\lvert a_i} = \frac{p_k}{q_k}
\end{equation}
where
$p_k, q_k \in \mathbb{Z}$ and $\gcd(p_k, q_k ) = 1$.
To construct the CFE of some $x \in \mathbb{R}$, we write $x$ as
\begin{equation}
x = \lfloor x \rfloor + \left(x - \lfloor x \rfloor \right) = \lfloor x \rfloor +\frac{1}{1/ (x - \lfloor x \rfloor )}.
\end{equation}
Since ${1}/ (x - \lfloor x \rfloor ) \ge 1$, it itself can be expanded into a CFE. Eventually, the expansion will end if for some iteration $x - \lfloor x \rfloor = 0$, which will happen if and only if $x$ is rational. If $x$ is irrational, this expansion continues forever but the sequence of convergents quickly converges to $x$. As an example of CFE, consider the case where $x = \pi$.
\begin{eqnarray*}
\pi & = & 3.14\dots\\
\pi & = & 3 + 0.14\dots \Rightarrow \frac{p_0}{q_0} = 3 \\
\pi & = & 3 + \frac{1}{1/0.14\dots} \\
\pi & = & 3 + \frac{1}{7 +0.06\dots} \Rightarrow \frac{p_1}{q_1} = 3 + \frac{1}{7} = \frac{22}{7}
\end{eqnarray*}
\subsubsection{Properties of Continued Fraction}
Recall from equation (\ref{13:equ:kth convergence}) that $q_k$ is the denominator of the $k$th convergent. It will be always be true that
\begin{equation}\label{13:equ:Properties of CRE}
q_{k+1} \ge 2q_k
\end{equation}
and
\begin{equation}
\left| \frac{p_k}{q_k} - x \right| \le \frac{1}{q_k^2}.
\end{equation}
From these two equations, it should be clear that CFE converges very quickly.
Additionally, if
\begin{equation} \label{13:equ:CFE convergence}
\left| \frac{p}{q} - x \right| \le \frac{1}{2q_k^2}
\end{equation}
and $\gcd(p,q)=1$ then $p/q$ appears as a convergent for some iteration of the CFE of $x$. Note the similarity between equation (\ref{13:equ:CFE convergence}) and equation (\ref{13:equ:eigenvalue precision}). If we set $N =2M^2$ and perform the order finding procedure to get some $\tilde{\omega}_j$, we can use CFE on $\tilde{\omega}_j$ to recover $j$ and $r$. It follows from equation (\ref{13:equ:Properties of CRE}) that the number of convergents we need to calculate is logarithmic in the size of $M$.
\subsection{Fact 2}
If we pick two integers, $j_1$ and $j_2$, independently and uniformly at random from $\{1,2, ...r \}$ then
\begin{equation}\label{13:equ:fact2}
\Pr \left[ \gcd \left(j_1, j_2 \right) = 1 \right] \ge 1 - \sum_{p \in \textrm{prime}}^r\frac{1}{p^2} > 1 - \sum_{p \in \textrm{prime}}^\infty\frac{1}{p^2} \ge 0.54.
\end{equation}
To show the inequality, consider that for any $j$ we pick, the odds that it is divisible by some prime $p$ is asymptotically $1/p$, but will always be $\le 1/p$. Since $j_1$ and $j_2$ are picked independently, the chance that they would both be divisible by prime $p$ is $\le 1/p^2$. If we sum over all primes, we get the probability that they share \textit{any} prime factors, thus the inequality shown in equation (\ref{13:equ:fact2}).
In the case that $j_1$ and $j_2$ are relatively prime, then $r = \textrm{lcm}(r'_1, r'_2)$. Since eigenvalue estimation produces $j_1$ and $j_2$ that are relatively prime, any factors of $r$ that were canceled in the fraction $j_1'/r_1'$ could not have been canceled in the fraction $j_2'/r_2'$. Thus $r = \textrm{lcm}(r'_1, r'_2)$.
\subsection{Quantum Algorithm}
We can now describe our order finding algorithm. We first run the
eigenvalue estimation twice and get a $\tilde{\omega}_1$, and
$\tilde{\omega}_2$. Using CRE, we can determine $r_1'$ and $r'_2$ and
compute $r = \textrm{lcm}(r'_1, r'_2)$. Using modular exponentiation,
we check whether $a^r \equiv 1 \bmod M$ in time polylog$(M)$. If so,
we know that $r$ equals the order of $a$ modulo $M$, or is a
nontrivial multiple. The probability of the former is at least the
probability that we have success in equation
(\ref{13:equ:eigenvalue estimation}) for both independent runs, and
success in equation (\ref{13:equ:fact2}). This
puts the total probability of success above $0.35$ provided $N\ge
2M^2$. With this probability, we can simply repeat this algorithm
several times and output the smallest $r$ retained. This gives the
correct result with very high probability.
Let us consider what the total running time of this algorithm is. We naturally choose $N = 2M^2$, so the running time is in terms of $M$, and using the naive implementation of multiplication, this runs in $\mathcal O \left( (\log M)^3 \right)$. The most efficient known algorithm runs in time $\mathcal O \left( (\log M)^2 (\log \log M) (\log \log \log M) \right)$.
\section{Factoring Integers}
When asked to factor some number $M$, we should first check if it is a
prime or a prime power. This check can be done in polynomial time, and if $M$ is a prime or prime power, we are done. If $M$ is composite, we need only a means to find a single nontrivial factor, as we can simply divide $M$ by this factor and repeat our factoring algorithm as needed. To find a nontrivial factor, we use two lemmas.
\subsection{Necessary Lemmas}
The first lemma lets us factor $M$ if we can find some $x$ such that $x^2 \equiv 1 \mod M$ and $x \not \equiv \pm 1 \mod M$.
\begin{lemma}\label{13:lemma:gcd}
For any integers $x, M > 0$ such that $x^2 \equiv 1 \mod M$ and $x \not \equiv \pm 1 \mod M$, then $\gcd(x \pm 1, M) $ is a nontrivial factor of $M$.
\end{lemma}
\begin{proof}
Since
\begin{equation}
x^2 \equiv 1 \mod M,
\end{equation}
this implies that
\begin{equation}
x^2 -1 \equiv 0 \mod M.
\end{equation}
Factoring the left side gives
\begin{equation}
(x -1)(x+1) \equiv 0 \mod M.
\end{equation}
Since $M$ is divisible by $(x -1)$ and $(x+1)$, clearly $\gcd(x \pm 1, M) \ne 1$. Furthermore we know that $x \not \equiv \pm 1 \mod M$, so $M$ does not divide $x \pm 1$. Therefore there is at least one factor of $M$ that is not in $(x -1)$, and at least one that is not in $(x+1)$. Thus $\gcd(x \pm 1, M) $ is some nontrivial-factor of $M$.
\end{proof}
The second lemma, combined with our order-finding algorithm, lets us find the $x$ that we use in lemma \ref{13:lemma:gcd}.
\begin{lemma}
If $M$ has $k $ distinct prime factors then the probability that
$order_M(y)$ is even and that $y^{order(y)/2} \not \equiv \pm 1 \bmod
M$ is at least $1 - 1/{2^{k-1}}$, where $y$ is picked uniformly at
random from the set of integers modulo $M$ that are relatively prime to
$M$.
\end{lemma}
\begin{proof}
We omit the proof, but it uses the Chinese remainder theorem. The full proof can be found in appendix four of \cite{book}.
\end{proof}
\subsection{Factoring Algorithm}
We can now describe our factoring algorithm for an integer $M$ with $k$ distinct prime factors. If $k =1$, then $M$ must be either prime or a prime power. However, checking that $M$ is prime or prime power can be done in polynomial time. If $M$ is composite, we pick $y \in \{1, 2, \dots,M-1 \}$ uniformly at random.
If $\gcd(y, M) \ne 1$, then we are done, as the GCD is the nontrivial factor. Otherwise, we check that order$_M(y)$ is even, and if so, compute $\gcd(y^{\textrm{order}_My/2}+1, M)$ and see if this is a non-trivial factor of $M$. If so, we are done. Otherwise, we pick another $y$ and try again.
This algorithm can only fail if the order$_M(y)$ is not even or
$y^{order(y)/2} \not \equiv \pm 1 \mod M$, which occurs only with
probability of $1/{2^{k-1}}$. As $k$ is at least 2, the
probability of failure is at most $1/2$.
\section{Breaking RSA}
Besides purely academic interest in factoring numbers, there are also
some applications for this algorithm, particularly in cryptographic
systems. The best known is breaking RSA public key system, which is
widely used in electronic commerce protocols.
If Bob wants to communicate with Alice using the RSA system, Alice will generate two keys. The first is a public key, which she will publish and Bob will use to send messages. The second is a private key which Alice shares with no one.
\subsection{Construction}
The private key consists of two distinct primes $p$ and $q$ and an integer $d$ such that
\begin{equation}\label{13:equ:distinct_primes}
\gcd\left(d, (p-1)(q-1) \right) = 1.
\end{equation}
$p$ and $q$ are typically chosen at random in a manner that an eavesdropper would have difficulty guessing. Equation (\ref{13:equ:distinct_primes}) implies the existence of integer $e$ such that $de \equiv 1 \mod ((p-1)(q-1))$. Alice can classically compute $e$ efficiently using Euclid's algorithm.
The public key that Alice publishes consists of simply $e$ and $n$, where $n =pq$.
\subsection{Encryption}
Suppose that Bob wants to send a message $M$ to Alice where $M \in \{0, 1, \dots, n-1 \}$, but is concerned somebody might eavesdrop on the communication channel and discover $M$. Instead, Bob will compute cyphertext
\begin{equation}\label{13:equ:cyphertext}
C = M^e \mod n
\end{equation}
and send $C$ to Alice.
Alice then computes
\begin{eqnarray}
C^d \mod n & \equiv& \left(M^e \right)^d \mod n \\
& \equiv & M^{1 + k(p-1)(q-1)} \mod n \label{13:equ:step2}
\end{eqnarray}
Recall Fermat's little theorem, which states that if a prime $p$ and an integer $a$ is coprime with $p$, then
\begin{equation}\label{13:equ:little}
a^{p-1} \equiv 1 \mod p.
\end{equation}
Additionally, note that if $\gcd(p,q) =1$ and $n =pq$, then
\begin{equation}\label{13:equ:mod}
a \equiv b \mod n \Leftrightarrow a \equiv b \mod p \textrm{ and } a \equiv b \mod q.
\end{equation}
From these two equations, we can simplify equation (\ref{13:equ:step2}) to
\begin{eqnarray}
C^d \mod n & \equiv & M \mod n\label{13:equ:step3} \\
& = & M.\label{13:equ:step4}
\end{eqnarray}
Going from equation (\ref{13:equ:step3}) to (\ref{13:equ:step4}) is
trivially true as $M < n$.
To illustrate RSA consider the following figure, which shows Bob
sending a message to Alice, but there is an eavesdropper listening in
on the communication channel.
% The process begins with Alice sending Bob the public key, but an
% eavesdropper receives this as well, as show in figure
% \ref{13:fig:RSA_1}.
% \begin{figure}[h!]
% \begin{center}
% \includegraphics[height=2.5 cm]{RSA_1}
% \end{center}
% \caption{Alice transmits the public key on an unsecured channel.
% Bob receives it, but so does Eve, the
% eavesdropper. } \label{13:fig:RSA_1}
% \end{figure}
Bob encrypts his message $M$ as $C$ using equation
(\ref{13:equ:cyphertext}), and sends it through the channel as shown
in the figure.
\begin{figure}[h!]
\begin{center}
\includegraphics[height=2.5 cm]{RSA_2}
\end{center}
\caption{Bob transmits $C$ back to Alice. Alice and Eve both receive it. } \label{13:fig:RSA_2}
\end{figure}
At this point Alice can recover $M$ using equation
(\ref{13:equ:step4}). Eve, having access to $C, e$ and $n$, can in
principle recover $M$, but the only known ways to do so involve
factoring $n$ into $p$ and $q$, for which no efficient classical
algorithms is known.
However, if Eve has a quantum computer, she \textit{can} efficiently factor $n$, and thus recover $M$. Therefore, efficient factoring breaks RSA, as we can factor $n$ into $p$ and $q$. From $p$ and $q$, we can use $e$ to compute $d$ giving us Alice's private key.
This has far reaching implications, as a malicious party with a sufficiently powerful quantum computer can break many modern electronic commerce and email encryption systems.
\begin{thebibliography}{}
\bibitem{book}
Michael A. Nielsen and Isaac L. Chuang.
\newblock {Quantum Computation and Quantum Information}.
\newblock {\em Cambridge}, 2000.
\end{thebibliography}
\end{document}