\documentclass[11pt]{article}
\usepackage{amssymb,amsmath,amsthm,url}
\usepackage{graphicx}
%uncomment to get hyperlinks
%\usepackage{hyperref}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Some macros (you can ignore everything until "end of macros")
\newif\ifpdf
\ifx\pdfoutput\undefined \else
\ifx\pdfoutput\relax
\else
\ifcase\pdfoutput
\else
\pdftrue
\fi
\fi
\fi
\topmargin 0pt \advance \topmargin by -\headheight \advance
\topmargin by -\headsep
\textheight 8.9in
\oddsidemargin 0pt \evensidemargin \oddsidemargin \marginparwidth
0.5in
\textwidth 6.5in
%%%%%%
\newcommand{\getsr}{\gets_{\mbox{\tiny R}}}
\newcommand{\bits}{\{0,1\}}
\newcommand{\Ex}{\mathbb{E}}
\newcommand{\eqdef}{\stackrel{def}{=}}
\newcommand{\To}{\rightarrow}
\newcommand{\e}{\epsilon}
\newcommand{\R}{\mathbb{R}}
\newcommand{\N}{\mathbb{N}}
\newcommand{\Z}{\mathbb{Z}}
\newcommand{\indist}{\approx}
\newcommand{\dist}{\Delta}
\newcommand{\PRG}{\mathsf{G}}
\newcommand{\Enc}{\mathsf{E}}
\newcommand{\Dec}{\mathsf{D}}
\newcommand{\Adv}{\mathsf{A}}
\newtheorem{theorem}{Theorem}
\theoremstyle{definition}
\newtheorem{exercise}{Exercise}
\newcommand{\angles}[1]{\langle #1 \rangle}
% end of macros
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\title{\vspace{-8ex} Handout 3: Computational Indistinguishability}
\author{Boaz Barak}
\date{Exercises due Tuesday October 11th, 2005 1:30pm \\
Total of 120 points (not including additional extra credit question
of 20 points).}
\begin{document}
\maketitle
Recall that $X$ and $Y$ are $(T,\e)$-computationally
indistinguishable, denoted by $X \indist_{T,\e} Y$ if for every
$T$-sized Boolean circuit $C$, $ \left| \Pr_X[C(X)=1] - \Pr_Y [
C(Y)=1 ] \right| \leq \e $
%
%\begin{exercise}[10 points] Prove that the following variant of the computational
%indistinguishability definition is equivalent to the original
%definition: We say that $X$ is
%$(T,\e)$-computationally-indistinguishable$'$ from $Y$, denoted by
%$X \indist'_{T,\e} Y$ if for every $T$-sized circuit, $C$,
%\[
% \Pr_X[ C(X)=1 ] - \Pr_Y[ C(Y)=1] \leq \e
%\]
%(That is, we dropped the absolute value).
%
%Prove that the two definitions are essentially equivalent. That is,
%prove that:
%\begin{itemize}
%\item If $X \indist_{T,\e} Y$ then $X
%\indist'_{T,\e} Y$
%
%\item If $X \indist'_{T,\e} Y$ then $X \indist_{T-1,\e} Y$.
%\end{itemize}
%
%\end{exercise}
%\begin{exercise}[20 points] Prove the transitivity of computational
%indistinguishability. That is, prove the following: Let $X$, $Y$,
%$Z$ be distributions such that $X \indist_{T,\e} Y$ and $Y
%\indist_{T,\e'} Z$. Then $X \indist_{T,\e+\e'} Z$.
%\end{exercise}
%\begin{exercise}[20 points] Prove that computational indistinguishability is
%maintained under composition with efficient functions. That is,
%prove the following: Let $X,Y$ over $\bits^m$ such that $X
%\indist_{T,\e} Y$ and let $f:\bits^m \To \bits^{m'}$ be a function
%computable by a $t$ circuit (for $t100n$). Let $\circ$ denote the concatenation operator (that is
for two strings $x,y$ $x\circ y$ is the concatenation of $x$ and
$y$). Then, $X \circ Y \indist_{T/5 , 5\e } X' \circ Y'$
\noindent\textbf{Hint:} Find a suitable intermediate distribution
and then use the transitivity / triangle inequality property of
computational indistinguishable.
\end{exercise}
\begin{exercise}[15 points] Prove the existence of a (not necessarily efficiently
computable) pseudorandom generator. That is, prove that for every
(sufficiently large) $n$, there exist a
$(2^{n/10},2^{-n/10})$-pseudorandom generator $g:\bits^n \To
\bits^{2^{n/20}}$. See footnote for hint\footnote{\textbf{Hint:}
\tiny Show that a random function $g(\cdot)$ will satisfy this
property with high probability. Do this by using the \emph{Chernoff
bound} (see first handout) to bound the probability of $g(\cdot)$
failing with respect to a fixed $2^{n/10}$-sized circuit $C$ (that
is, consider the experiment where first $C$ is fixed and \emph{then}
$g$ is chosen at random). Since the probability that $g(\cdot)$ is
not a PRG --- i.e. that it fails for \emph{some} circuit (that can
depend on the choice of $g(\cdot)$) --- is the union of these bad
events with respect to all possible circuits, you can use the
\emph{union bound} to bound it.}
Is the output of your function (i.e., $g(U_n)$) also
\emph{statistically close} to the uniform distribution on $2^{n/20}$
bits?
\end{exercise}
%\begin{exercise}[10 points]
%Prove that allowing \emph{probabilistic} distinguishers does not
%change the definition of computational indistinguishability: For
%$X$,$Y$ over $\bits^n$, we say that $X$ is
%$(T,\e)$-computationally-indistinguishable$'$ from $Y$, denoted by
%$X \indist'_{T,\e} Y$ if for every $T$-sized circuit, $C$ that gets
%$n+m$ inputs (for $m\leq T$)
%\[
% \Pr_{X,U_m}[ C(X,U_m)=1 ] - \Pr_{Y,U_m}[ C(Y,U_m)=1] \leq \e
%\]
%(In this equation the two occurrences of $U_m$ are two distinct
%independent random variables that are uniformly distributed over
%$\bits^m$. This is always our convention that occurrences of random
%variables inside different $\Pr[ \cdots ]$ statements are
%independent.)
%
%Prove that the two definitions are essentially equivalent. That is,
%prove that:
%\begin{itemize}
%\item If $X \indist'_{T,\e} Y$ then $X
%\indist_{T,\e} Y$
%
%\item If $X \indist_{T,\e} Y$ then $X \indist'_{T/10,\e} Y$.
%\end{itemize}
%\end{exercise}
%
\ifpdf
\begin{figure}[t]
\begin{center}
\includegraphics*{secureid.jpg}
\end{center}
\caption{RSA SecurID Device.} \label{fig:secureid}
\end{figure}
\fi
\begin{exercise}[15 points] The RSA SecurID card \ifpdf (see Figure~\ref{fig:secureid}) \fi
is a credit-card sized device that displays $6$ digits that change
every minute. The idea is that when you log into your account
remotely (say when you want to log into your UNIX account in
Princeton from an Internet Cafe) then you have to type the numbers
that appear in the card in addition to your PIN or password.
\begin{enumerate}
\item What is the security advantage of such a card over traditional
password? That is, what sort of attack can this card resist which
cannot be resisted using a standard password mechanism. (Assume that
it's possible for users to remember a $6$-digits PIN or a password
with similar security.)
\item Describe how you would implement such a scheme using
pseudorandom functions.
Assume that the PRF family takes a seed of size $n$, and that the
number of possible devices is $m$ (for $m<2^n$). How many bits of
storage does your implementation use at the server and each of the
devices? (there is an implementation that uses at most $O(n)$ bits).
\item Try to \emph{define} what it means that such a scheme
is \emph{secure} and sketch a proof that your construction satisfies
it (you don't have to formally define and prove if you don't want to
--- you can use English but try to be precise). Say how the security
depends on $n$ - the number of bits that
the device stores in memory (where its running time is polynomial in
$n$) and on $k$ - the number of digits that we display to the user.
\end{enumerate}
\end{exercise}
\begin{exercise}[15 points] Recall that in class we gave a construction of a \emph{probabilistic}
CPA-secure encryption scheme (i.e., the function $\Enc$ used extra
randomness in computing the encryption).
\begin{enumerate}
\item Show that there is no \emph{deterministic} and
\emph{stateless} CPA-secure encryption scheme.
\item Give a construction for a \emph{deterministic stateful}
CPA-secure encryption scheme. (Note that CPA security implies in
particular that the total number of messages is longer than the key
length.) A stateful deterministic encryption scheme is often called
a \emph{stream cipher}.
\end{enumerate}
\end{exercise}
\begin{exercise}[15 points] In these two questions you'll show that if we have
a pseudorandom function family with particular input and output
sizes, we can easily obtain a family that handles larger inputs and
outputs. (It's easy to handle smaller outputs and inputs by
truncation and padding.)
\begin{enumerate}
\item \emph{(Changing PRFs output size)} Prove that if there exists a
collection $\{ f_s \}$ of pseudorandom functions with
$f_s:\bits^{|s|} \To \bits$ (i.e., one-bit output) then there exists
a collection $\{ f'_s \}$ with $f'_s:\bits^{|s|}\To\bits^{|s|}$.
See footnote for hint.\footnote{\textbf{Hint:} \tiny First come up
with a pseudorandom family with output longer than $1$ but shorter
than $|s|$. For example, if $s \in \bits^{n^2}$ then the output can
be $n$. Then show that existence of PRF implies existence of
pseudorandom generators and use that to expand your output.}
\item \emph{(Changing PRFs input size)} Prove that if there exists a
collection $\{ f_s \}$ of pseudorandom functions with
$f_s:\bits^{|s|}\To\bits^{|s|}$ then there exists a collection $\{
f'_s \}$ with $f'_s:\bits^{*}\To\bits^{|s|}$ (i.e., $f'_s$ for a
random $s\in\bits^n$ is indistinguishable from a random function
from $\bits^*$ to $\bits^n$. See footnote for
hint\footnote{\textbf{Hint:} \tiny (This is definitely not the only
approach to do this.) First note that such a PRF family implies
immediately a family where $f_s:\bits^{|s|} \To \bits^{|s|/2}$. Then
try to use this to get a function $f'_s$ that works only for inputs
whose size is a multiple of $|s|/2$. Then try to get a function that
works for every finite length string.}
\end{enumerate}
\end{exercise}
%\begin{exercise}[20 points] Let $G:\bits^n \To \bits^{n+1}$ be a
%$(T,\e)$-pseudorandom generator. Let $y \in \bits^{n+1}$ be some
%string. Prove that the distribution $G(U_n) \oplus y$ is
%$(T/10,10\e)$-pseudorandom.
%\end{exercise}
\section*{Additional Exercises.}
\begin{exercise}[15 points] Recall that we defined a function $T:\N\To\N$ to be
\emph{super-polynomial} if $T(n)=n^{\omega(1)}$ or in other words,
for every constant $c>0$ there exists $N>0$ such that for every
$n>N$, $T(n)>cn^c$.\footnote{\textbf{Reminder of O notations:} (We
don't need all of these but it's good to know.) Let $f,g:\N\To\N$ be
two functions. We say that $f(n)=O(g(n))$ (or sometimes just
$f=O(g)$) if there is a constant $c>0$ and a number $N$ such that
for every $n>N$, $f(n) \leq c\cdot g(n)$. We say that
$f(n)=\Omega(g(n))$ if $g(n)=O(f(n))$ or equivalently, there's a
constant $c>0$ such that for every large enough $n$, $f(n) \geq
c\cdot g(n)$. We say that $f(n)=\Theta(g(n))$ if $f(n)=O(g(n))$ and
$f(n)=\Omega(g(n))$. We say that $f(n)=o(g(n))$ if $f(n)=O(g(n))$
but $f(n) \neq \Omega(g(n))$. In other words $f(n)=o(g(n))$ if for
\emph{every} constant $c>0$ there's an $N$ such that for every
$n>N$, $f(n)0$ if $n$ is large enough then
$f(n)
> c\cdot g(n)$.}
\begin{enumerate}
\item For each the following functions say (no need to prove)
whether it is super-polynomial or not.
\begin{enumerate}
\item $f_1(n)= 2^{\sqrt{n}}$.
\item $f_2(n) = n^{\log n}$
\item $f_3(n) = n\log n$.
\item
\[
f_4(n) = \begin{cases}
2^n & \text{$n$ even} \\
n^2 & \text{$n$ odd}
\end{cases}
\]
\end{enumerate}
\item Prove that for every super-polynomial function $T:\N\To\N$ the
function $T':\N\To\N$ defined as follows $T'(n) =
\frac{T(n^{1/3})^{1/3}}{n^3}$ (with all values rounded to integers
if they are not integers) is also super-polynomial.
\end{enumerate}
\end{exercise}
\paragraph{Pseudorandom permutations.} Recall that we define a
collection of permutations $\{ p_k \}_{k \in \bits^*}$ where for
$k\in\bits^n$, $p_k$ is a permutation over $\bits^{m(n)}$ to be a
\emph{pseudorandom-permutation collection} if it
satisfies:\footnote{We assume that $m$ and $n$ are polynomially
related. That is, $n^{1/c} < m(n) < n^c$ for some constant $c$.}
%for some polynomially bounded $m(\cdot)$ (i.e.,
%$n^{1/c} < m(n) < m^c$ for some constant $c$ and large enough $n$)
\vspace{-2ex}
\begin{itemize}
\item \emph{(Efficient computation)} The functions $(k,x) \mapsto p_k(x)$
and $(k,y) \mapsto p_k^{-1}(y)$ are efficiently computable (i.e., in
polynomial time).
\vspace{-1ex}
\item \emph{(Pseudorandomness)} There are super polynomial functions $T,\e$
such that for every $T(n)$ time adversary $\Adv$,
\[
\left| \Pr_{k \getsr \bits^n}\left[
\Adv^{p_k(\cdot),p_k^{-1}(\cdot)}(1^m)=1 \right] - \Pr_{P \getsr
\bits^m \stackrel{\text{\tiny 1-1}}{\To} \bits^m}\left[
\Adv^{P(\cdot),P^{-1}(\cdot)}(1^m)=1 \right] \right| < \e(n)
\]
(The notation $\Adv^{f,g}$ means that the adversary is given oracle
access to the functions $f(\cdot)$ , $g(\cdot)$ which naturally it
can query for at most $T$ times. If an event happens with
probability at most $1/n^{\omega(1)}$ then we say it happens with
\emph{negligible} probability.)
\end{itemize}
\begin{exercise}[15 points] Let $\{ p_k \}_{k\in\bits^*}$ be a pseudorandom permutation
collection, where for $k \in \bits^n$, $p_k$ is a permutation over
$\bits^m$.
\begin{enumerate}
\item Consider the following encryption scheme $(\Enc,\Dec)$:
$\Enc_k(x) = p_k(x)$ , $\Dec_k(y)=p_k^{-1}(y)$. Prove that this
scheme is \emph{not} a CPA-secure encryption.
\item Consider the following scheme $(\Enc,\Dec)$ that encrypts
$m/2$-bit messages in the following way: on input $x\in\bits^{m/2}$,
$\Enc_k$ chooses $r \getsr\bits^{m/2}$ and outputs $p_k(x,r)$ (where
comma denotes concatenation), on input $y\in\bits^{m/2}$, $\Dec_k$
computes $(x,r) = p^{-1}_k(y)$ and outputs $x$. Prove that
$(\Enc,\Dec)$ \emph{is} a CPA-secure encryption scheme.
\noindent\textbf{Hint:} You can prove first that this scheme
satisfies the weaker notion of \emph{multiple message security}.
That is, for every polynomial $p=p(n)$ and
$x_1,\ldots,x_p,x'_1,\ldots,x'_p \in \bits^{m/2}$ adversaries the
two sequences of random variables $\angles{ Enc_K(x_1) ,\ldots,
\Enc_K(x_p) }$ and $\angles{ \Enc_K(x'_1) , \ldots, \Enc_K(x'_p)}$
are computationally indistinguishable (where $K$ and $K'$ are two
independent random variables distributed uniformly over $\bits^n$).
\end{enumerate}
\end{exercise}
\newcommand{\CBC}[2]{\mathsf{CBC}_{#1}\angles{#2}}
\begin{exercise}[15 points] The CBC construction is often used to get
an encryption for larger message size. If $p:\bits^m\To\bits^m$ is a
permutation, then $\CBC{\ell}{p}$ is a permutation from
$\bits^{\ell\cdot m}$ to $\bits^{\ell\cdot m}$ defined in the
following way: for $x_1,\ldots,x_{\ell} \in \bits^m$, let $y_0 =
0^n$ and define $y_i = p(y_{i-1} \oplus x_i)$. Then,
$\CBC{\ell}{p}(x_1,\ldots,x_{\ell}) =
(y_1,\ldots,y_{\ell})$.\footnote{Often a different public value
instead of $0^m$ is chosen for $y_0$ although this does not make a
lot of difference for security. This value is called IV or
initialization vector.} Note that the inverse of $\CBC{\ell}{p}$ can
be computed in a similar way using the inverse of $p(\cdot)$.
Let $\{ p_k \}$ be a pseudorandom permutation collection. Determine
the CPA-security of the following two encryption schemes which are
based on the CBC construction. That is, for each scheme either prove
that it is CPA-secure or give an attack showing that it is not. For
simplicity, we consider only the 3-block variant of the scheme (i.e.
$\ell=3$).
\begin{enumerate}
\item\emph{(Padding in the end)} Given $p_k:\bits^m \To \bits^m$ and a
message $x=x_1,x_2 \in\bits^{2m}$, $\Enc_k$ chooses $r \getsr
\bits^m$ and outputs $\CBC{3}{p_k}(x_1,x_2,r)$. Decrypting done in
the obvious way.
\item\emph{(Padding in the start)} Given $p_k:\bits^m \To \bits^m$ and a
message $x=x_1,x_2 \in\bits^{2m}$, $\Enc_k$ chooses $r \getsr
\bits^m$ and outputs $\CBC{3}{p_k}(r,x_1,x_2)$. Decrypting done in
the obvious way.
\end{enumerate}
\end{exercise}
\newcommand{\kmas}{k_{\text{\sf\tiny master}}}
\begin{exercise}[Extra credit question --- do only if you have energy and time --- 20 points.]
Suppose that we designed a cryptographic chip implementing a
pseudorandom permutation and wanted to insert a hidden ``back door''
into it. More formally, we want to construct a collection $\{
P_{\kmas,k} \}$ that has two keys: the ``normal'' key $k$ and a
\emph{master} key $\kmas$. We will choose $\kmas$ at random and know
it, and give to our unsuspecting clients two black-boxes
$F_{\kmas}(\cdot,\cdot)$ and $B_{\kmas}(\cdot,\cdot)$ (for forward
and backward) that allow the client to specify $k$ and compute
$P_{\kmas,k}$ forwards and backwards (i.e.,
$F_{\kmas}(k,x)=P_{\kmas,k}(x)$ and
$B_{\kmas}(k,y)=P_{\kmas,k}^{-1}(y)$).\footnote{Actually, we can
allow a slight relaxation: $P_{\kmas,k}$ does not necessarily have
to be a permutation as long as without knowledge of $\kmas$ it is
not possible to find an $x$ on which the inversion algorithm fails.
That is, for some $x$'s it may be $B_{\kmas}(k,F_{\kmas}(k,x))\neq
x$ but these inputs should be hard to find by the client.}
To anyone not knowing $\kmas$
this should behave like a normal pseudorandom permutation, but we
should be able to break it.
That is, on the one hand if $\kmas$ and $k$ are chosen at random
then access to the algorithms $F_{\kmas}(k,\cdot)$ and
$B_{\kmas}(k,\cdot)$ is indistinguishable from access to a random
permutation and its inverse.
On the other hand, we'd like to be able to break the scheme and
recover the client's key $k$ using the master key $\kmas$. This
naturally leads to the following questions:
\begin{enumerate}
\item \emph{(Insecurity for chosen inputs)} Show a construction for such a
scheme where there exists a polynomial-time algorithm $A$ that if
$\kmas$ and $k$ are chosen at random and $A$ is given $\kmas$ as
input and access to a black box computing $P_{\kmas,k}$ then it can
recover $k$ with probability at least $0.9$.
\item \emph{(Insecurity for known inputs)}\footnote{Solving this took me a bit
of time, a slightly cumbersome construction, and also required me to
use the relaxation that $P_{\kmas,k}$ may not be completely a
permutation (but rather for a random $\kmas$, polynomial algorithms
can't find inputs $k,x$ on which $B_{\kmas}(F_{\kmas}(k,x))\neq x$
with non-negligible probability).} Show a construction for such a
scheme where there exists a polynomial-time algorithm $A$ and a
polynomial $q=q(n)$ such that if $\kmas$ and $k$ are chosen at
random, and $x_1,\ldots,x_q$ are also chosen uniformly and
independently at random, and $A$ is given $\kmas$ and the sequence
of pairs $\angles{x_1,P_{\kmas,k}(x_1)}$, $\ldots$,
$\angles{x_q,P_{\kmas,k}(x_q)}$ as input, then $A$ outputs $k$ with
probability at least $0.9$.
\end{enumerate}
\end{exercise}
\end{document}