\documentclass[11pt]{article}
\include{lecture}
\begin{document}
\lecture{10}{9/27/2010}{Eigenvalue Estimation}{Dalibor Zelen\'y}
Last time we discussed the quantum Fourier transform, and introduced the
problem of phase estimation. Today we conclude the discussion
of phase estimation, and apply it to the problem of eigenvalue estimation.
\section{Phase Estimation}
Recall that in the phase estimation problem, we are given a state
$\psi$ of the form
$$\ket{\psi} = \frac{1}{\sqrt{N}} \sum_x e^{2\pi i \omega x} \ket{x}$$
for some real $0 \le \omega < 1$.
Our goal is to determine the approximate value of $\omega$.
In order to find $\omega$, we apply the inverse Fourier transform to the
state $\ket{\psi}$ and observe the system. We interpret the observation as
an integer $z$, and output $z/N$ as our approximation of $\omega$.
Define $\Delta$ to be the smallest real number $d$ (in terms of absolute value)
such that $e^{2 \pi i (z/N + d) x} = e^{2 \pi i \omega x}$, and note that
$|\Delta| \le 1/2$. We define $\Delta$ this way to make some facts easier
to state, and to make their proofs simpler.
We saw last time that if $\omega = z/N$ for some integer $z$, this algorithm
finds $\omega$ exactly.
When $\omega$ doesn't have the form $z/N$ for any integer $z$, we can only
approximate $\omega$ by outputting a value $z/N$ that's close to $\omega$
in the sense that $e^{2\pi i \omega x}$ and $e^{2\pi i (z/N) x}$ are close.
The best we can hope to find is a $z$ that minimizes $|\Delta|$, i.e.,
for which $|\Delta| \le 1/2N$.
The reason we cannot do better is that we only have a limited number (namely
$n$) qubits to work with. We showed last time that
\begin{equation}
\label{10:eq:phase_approximation}
F^{-1}\ket{\psi} = \sum_z \alpha_z \ket{z} \quad \textrm{with} \quad
\alpha_z = \frac{1}{N} \cdot \frac{1 - e^{2\pi i \Delta N}}{1 - e^{2\pi i\Delta}}.
\end{equation}
Unlike in the case where $\omega$ has the form $z/N$, we are not guaranteed
that we observe $z$ that minimizes $|\Delta|$. We observe a good $z$ with
high probability, as shown in the following claims. We prove the first and
the third claim. The proof of the second claim is left as an exercise.
\begin{claim}
\label{10:claim:optimal}
$\Pr[\textrm{We observe } z \textrm{ that minimizes } |\Delta|] \ge 4/\pi^2$.
\end{claim}
\begin{claim}
\label{10:claim:exercise}
$\Pr[\textrm{We observe } z \textrm{ such that } |\Delta| \le 1/N] \ge 8/\pi^2$.
\end{claim}
\begin{claim}
\label{10:claim:bad}
$\Pr[\textrm{We observe } z \textrm{ such that } |\Delta| \ge \delta] \le O(1/\delta N)$.
\end{claim}
\begin{proof}[Proof of Claim \ref{10:claim:optimal}]
When we observe a $z$ that minimizes $\Delta$, we have $|\Delta| \le 1/2N$.
The probability of observing this $z$ is $|\alpha_z|^2$. We give a lower
bound on this probability using \eqref{10:eq:phase_approximation} and a
geometric argument.
We get from \eqref{10:eq:phase_approximation} that
\begin{equation}
\label{10:eq:proof1}
|\alpha_z| = \frac{1}{N} \cdot \frac{|1 - e^{2 \pi i \Delta N}|}{|1 - e^{2 \pi i \Delta}|}.
\end{equation}
The numerator in \eqref{10:eq:proof1} is the distance between points
$a$ and $b$ in Figure \ref{10:fig:circle} with
$b = e^{2 \pi i \Delta N} = e^{i \theta}$, where $-\pi \le \theta \le \pi$.
Taking the right triangle formed by the points $0$, $a$, and $c$,
we see that $|b-a| = 2\sin(\theta/2)$. Note that for any
$\theta \in [-\pi,\pi]$,
we have $|\sin(\theta/2)| \ge |\theta / 2| / (\pi / 2) = |\theta|/\pi$, so
$|1 - e^{2 \pi i \Delta N}| = 2|\sin(\theta/2)| \ge 2|\theta| / \pi$.
Therefore, $|1 - e^{2 \pi i \Delta N}| \ge 2 \cdot (2 \pi |\Delta| N) / \pi =
4 |\Delta| N$. For the denominator, since the arc between two points is
longer than the line segment between
those two points, we have $|1 - e^{2 \pi i \Delta}| \le 2\pi|\Delta|$.
Combining the two yields
$$|\alpha_z| \ge \frac{1}{N} \cdot \frac{4 |\Delta| N}{2 \pi |\Delta|} = \frac{2}{\pi},$$
so $|\alpha_z|^2 \ge 4/\pi^2$ as we wanted.
\end{proof}
\begin{figure}[htb]
\centering
\input{10.fig.circle.pstex_t}
\caption{A geometric aid for the proof of Claims \ref{10:claim:optimal} and
\ref{10:claim:bad}. The circle has radius $1$, and
we have $a = 1$ and $b = e^{i \theta}$.
The point $c$ is in the middle of the line segment from $a$ to $b$.}
\label{10:fig:circle}
\end{figure}
\begin{exercise}
Prove Claim \ref{10:claim:exercise}.
\end{exercise}
\begin{proof}[Proof of Claim \ref{10:claim:bad}]
We need an upper bound for $|\alpha_z|$ now.
First note that the numerator in \eqref{10:eq:proof1} is at
most $2$ because it's the distance between two points on a unit circle.
Since $|\Delta| \le 1/2$, we have $|1 - e^{2 \pi i \Delta}| =
2 |\sin(\pi\Delta)| \ge 2 (\pi|\Delta|)/(\pi/2) = 4|\Delta|$, so
$|\alpha_z| \le 2/|\Delta| N$, and
\begin{equation}
\label{10:eq:bad}
|\alpha_z|^2 \le (2/\Delta N)^2.
\end{equation}
We need to sum \eqref{10:eq:bad} over all $z$ that cause a large value of
$\Delta$. The smallest $|\Delta|$ can be in order to count towards that sum
is $\delta$. Since we output integers, the next possible values of
$|\Delta|$ are $\delta + 1/N$, $\delta + 2/N$, and so on. Each of those
values occurs for two values of $z$ (once in an overestimate and once in an
underestimate). The smallest value of $|\Delta| N$ is then $\delta N$, and
the other possible values are $\delta N + k$ for positive integers $k$.
\begin{align*}
\Pr[\textrm{We observe } z \textrm{ such that } |\Delta| \ge \delta] &\le
2 \sum_{k = 0}^\infty \left(\frac{2}{N\delta + k}\right)^2 \\
&\le 2 \int_{x = 0}^\infty \left(\frac{2}{N\delta + x}\right)^2 dx \\
&\le 2 \int_{x = N\delta}^\infty \left(\frac{2}{x}\right)^2 dx \\
&\le O\left(\frac{1}{N\delta}\right).
\end{align*}
\end{proof}
\section{Eigenvalue Estimation}
In eigenvalue estimation, we are given a unitary operator $U$ acting on $m$
qubits and an eigenvector $\ket{\varphi}$ of $U$. Since $\ket{\varphi}$ is
an eigenvector of $U$, it follows that
\begin{equation}
\label{10:eq:eig}
U\ket{\varphi} = e^{2 \pi i \omega}\ket{\varphi} \quad \textrm{for some } \omega \in [0,1).
\end{equation}
Our goal is to estimate the eigenvalue corresponding to $\ket{\varphi}$, which
really means we just need a good estimate of $\omega$ in \eqref{10:eq:eig}.
As we will see soon, we can use phase estimation to find $\omega$.
Before we can find $\omega$, we need to create a superposition that admits
the use of phase estimation, namely something that looks like a Fourier
transform. We do so using an idea similar to phase
kickback---we inject the eigenvalue into the amplitude.
We apply a controlled $U$ operator to construct the necessary superposition.
The new operator, $CU$, has the following behavior on an eigenvector
$\ket{\varphi}$:
\begin{align*}
(CU)\ket{0}\ket{\varphi} &= \ket{0}\ket{\varphi} \\
(CU)\ket{1}\ket{\varphi} &= \ket{1} U\ket{\varphi} = e^{2\pi i \omega} \ket{1} \ket{\varphi}
\end{align*}
Then if we apply $CU$ to the superposition $\ket{+}\ket{\varphi}$, we get
$$(CU)\ket{+}\ket{\varphi} = \frac{\ket{0} + e^{2 \pi i \omega}\ket{1}}{\sqrt{2}} \ket{\varphi}.$$
Recall that our goal is to get something that looks like a Fourier transform.
To that end, construct $\ket{y_j}$ as follows:
$$\ket{y_j}\ket{\psi} = (CU)^{2^j}\ket{+}\ket{\psi} = \frac{\ket{0} + e^{2 \pi i 2^j \omega} \ket{1}}{\sqrt{2}} \ket{\psi},$$
so we have
\begin{equation}
\label{10:eq:superposition}
\ket{y}\ket{\psi} = \left(\bigotimes_{j = 1}^n \frac{\ket{0} + e^{2 \pi i 2^j \omega} \ket{1}}{\sqrt{2}}\right)\ket{\psi} = \frac{1}{\sqrt{N}} \sum_x e^{2 \pi i \omega x} \ket{x}\ket{\psi}.
\end{equation}
Figure \ref{10:fig:circuit} shows the circuit that produces the superposition
\eqref{10:eq:superposition}.
We construct it as a concatenation of $CU$ gates
raised to powers of to from $1$ to $2^{n-1}$, each controlled by a different
qubit in the $\ket{+}$ state. After that,
the $n$ control qubits are in the right superposition for phase estimation,
so we apply the
inverse Fourier transform to them, make an observation, and get an
approximation of $\omega$ like we did in phase estimation. Note that
the three claims we stated for phase estimation carry over to this setting.
\begin{figure}[htb]
\centering
\[
\Qcircuit @C=.7em @R=.4em {
\lstick{\ket{+}} & \qw & \ctrl{4} & \qw & \qw & \cdots & & \qw & \multigate{3}{F^{-1}} & \meter & \qw \\
\lstick{\ket{+}} & \qw & \qw & \ctrl{3} & \qw & \cdots & & \qw & \ghost{F^{-1}} & \meter & \qw \\
\vdots & & & & & & & & \ghost{F^{-1}} & \vdots & \\
\lstick{\ket{+}} & \qw & \qw & \qw & \qw & \cdots & & \ctrl{1} & \ghost{F^{-1}} & \meter & \qw \\
\lstick{\ket{\psi}} & {/^m} \qw & \gate{U} & \gate{U^2} & \qw & \cdots & & \gate{U^{2^{n-1}}} & \qw & \qw & \qw
}
\]
\caption{The Eigenvalue Estimation Circuit}
\label{10:fig:circuit}
\end{figure}
We make a few remarks about the circuit in Figure \ref{10:fig:circuit}. First,
this is efficient only if we can construct higher powers of the controlled
$U$ gates efficiently. For example, if we only have oracle access to $U$, we
are out of luck and need $k$ consecutive applications of the $CU$ gate to
get $(CU)^k$. But even that may be sufficient in some applications, as we will
see in the next section.
Second, when we apply eigenvalue estimation, we aren't always
going to have access to an eigenvector $\ket{\varphi}$, so let's see what
happens when
we use some general state $\ket{\psi}$ instead. We can write this state as
a linear combination $\sum_j \alpha_j \ket{\varphi_j}$ of eigenvectors of $U$.
After we apply the inverse Fourier transform in Figure \ref{10:fig:circuit},
the state is $\sum_j \alpha_j \ket{\widetilde{\omega_j}} \ket{\varphi_j}$.
With probability $|\alpha_j|^2$,
we observe a good approximation of $\omega_j$. Thus, we get
an estimation of some eigenvalue out of the algorithm. Whether this is useful
or not depends on the application. In the next section we will see some
applications where this is useful information.
\section{Applications of Eigenvalue Estimation}
Eigenvalue estimation has many applications. We list a few here.
\begin{itemize}
\item{An implementation of Grover's algorithm}
\item{Approximating the Fourier transform over $\mathbb{Z}_N$ for $N$ other than powers of $2$}
\item{Solving well-conditioned sparse systems of linear equations}
\item{Order finding and integer factorization}
\item{Computing discrete logarithms}
\end{itemize}
We describe the first application today. We will discuss the other applications
in the coming lectures.
\subsection{Grover's Algorithm}
Recall that in Grover's algorithm, we are given oracle access to
$f : \{0,1\}^m \to \{0,1\}$ and our goal is to find an input $x$ such
that $f(x) = 1$. During the analysis, we noted that all positive inputs
(those where $f(x) = 1$) had the same amplitude, and also that all negative
inputs (those where $f(x) = 0$) had the same amplitude. Let $t$ be the number
of positive inputs. We defined superpositions
$\ket{B} = \frac{1}{\sqrt{M - t}} \sum_{f(x) = 0} \ket{x}$ and
$\ket{C} = \frac{1}{\sqrt{t}} \sum_{f(x) = 1} \ket{x}$ representing all the
negative and all the positive inputs, respectively, and viewed the state as
a superposition of $\ket{B}$ and $\ket{C}$. The goal of the algorithm
was to increase the amplitude of the positive inputs and decrease the amplitude
of the negative inputs. We achieved that by describing an operator $G$ and
applying it the right number of times. For the analysis, we viewed the
$B$ component of our state on the horizontal axis, the $C$ component
on the vertical axis, and the state itself as a point on the unit circle.
In fact, looking at Figure \ref{10:fig:circle}, the state would be at
point $b$ and would have an angle of $\theta$ with the positive $B$ axis.
Applying $G$ had the effect of rotating the state counterclockwise by
$2\theta$.
\begin{exercise}
\label{10:ex:grover_eigenvalues}
The eigenvalues of $G$ and their corresponding eigenvectors are
\begin{align*}
% Also \begin{ugly hack to get the two columns closer to each other}
\lambda_+ &= e^{2 i \theta}, & \hspace{-3cm} \ket{\varphi_+} &= \frac{1}{\sqrt{2}} \ket{B} + \frac{i}{\sqrt{2}} \ket{C} \\
\lambda_- &= e^{-2 i \theta}, & \hspace{-3cm} \ket{\varphi_-} &= \frac{i}{\sqrt{2}} \ket{B} + \frac{1}{\sqrt{2}} \ket{C}
\end{align*}
\end{exercise}
Using the eigenvectors from Exercise \ref{10:ex:grover_eigenvalues} above, we
can write the state as
$\ket{\psi} = \alpha_+ \ket{\varphi_+} + \alpha_- \ket{\varphi_-}$.
We apply the eigenvalue estimation algorithm to
$\ket{+}^{\otimes{n}}\ket{\psi}$ to get
\begin{equation}
\alpha_+ \ket{\widetilde{2\theta}}\ket{\varphi_+} + \alpha_- \ket{\widetilde{-2\theta}}\ket{\varphi_-}
\end{equation}
Therefore, we observe a good estimate $\gamma$ that is within $\delta$ of
either $2 \theta$ or $-2 \theta$ with probabilities $|\alpha_+|^2$ and
$|\alpha_-|^2$, respectively (modulo a loss of a tiny constant factor
depending on $\delta$ coming from Claim \ref{10:claim:bad}).
Finally, we approximate the number of positive inputs $t$ by
$\tilde{t} = M \cdot \sin^2(\gamma/2)$,
which is the same regardless of which of the two angles $\gamma$ was
approximating.
The actual size of the set of positive inputs is $t$, and we would like to
bound the difference $|t - \tilde{t}|$.
\begin{align}
|t - \tilde{t}| &= M |\sin^2 \theta - \sin^2 (\gamma/2)| \nonumber \\
&\le M \cdot \left(2 \sin \theta + \frac{\delta}{2}\right) \frac{\delta}{2} \label{10:eq:grover_lipschitz} \\
&\le \delta \sqrt{tM} + M \frac{\delta^2}{4} \label{10:eq:grover_t} \\
&= O(\sqrt{t}) \quad \left(\textrm{ for } \delta = \frac{1}{\sqrt{M}}\right) \label{10:eq:grover_bound}
\end{align}
We get \eqref{10:eq:grover_lipschitz} by factoring the line above and
using the fact that the sine is Lipschitz
continuous with the Lipschitz constant less than $1$. We get
\eqref{10:eq:grover_t} by definition of $t$.
If we pick $\delta$ as in \eqref{10:eq:grover_bound}, we get an estimate of $t$
within an additive factor of $\sqrt{t}$, which is a very good approximation.
We need $\sqrt{M}$ applications of $G$ (and thus $\sqrt{M}$ queries to $f$)
to get this accuracy. Then to run Grover's algorithm, we can approximate $t$
with $\tilde{t}$, and then apply $G$ $\tilde{t}$ times to do the search, and
make an observation. To do the approximation, we initialize $\ket{\psi}$ in
Figure \ref{10:fig:circuit} with $\ket{+}^{\otimes m}$, and use the controlled
version of $G$ in place of $CU$.
\section{Next Time}
Now suppose we observe the bottom $m$ wires in the circuit from Figure
\ref{10:fig:circuit} instead of the top $n$ wires. The state has the form
$\alpha_+ \ket{\widetilde{2\theta}}\ket{\varphi_+} +
\alpha_- \ket{\widetilde{-2\theta}}\ket{\varphi_-}$,
and the two components of the state are almost orthogonal,
so they do not interfere with each other too much.
Now both $\ket{\varphi_+}$ and
$\ket{\varphi_-}$ cause an observation of a positive example with probability
$1/2$ because they are both uniform superpositions of positive and negative
inputs. Then we might as well not use the inverse Fourier transform on the top
wires because it has no effect when performing Grover's algorithm.
%% But now we have exactly the Grover's algorithm
%% we described in a previous lecture. Since the top wires now represent a
%% uniform superposition, they essentially pick a number of iterations of $G$
%% from the uniform distribution, which is something we wanted in our
%% implementation of Grover's algorithm.
We make this intuition more formal in the next lecture.
\end{document}
% LocalWords: qubits qubit Lipschitz