% No 'submit' option for the problems by themselves.
\documentclass{cos302}
% Use the 'submit' option when you submit your solutions.
%\documentclass[submit]{cos302}
% Put in your full name and email address.
\name{Your Name}
\email{email@princeton.edu}
\discussants{Marie Curie \\ Leonhard Euler}
% You don't need to change these.
\course{COS302-F23}
\assignment{Assignment \#8}
\duedate{6:00pm Wednesday 15 November 2023}
\dropboxurl{https://www.gradescope.com/courses/606160/assignments/3624075}
\usepackage{bm} % for bold math
\usepackage{mathtools} % colon-equals
\usepackage{multirow}
\begin{document}
% IMPORTANT: Uncomment the \documentclass command above with the [submit] option and put your name where it says ``your name''.
\begin{center}
Assignments in COS 302 should be done individually. See the \href{https://www.cs.princeton.edu/courses/archive/fall23/cos302/files/syllabus.pdf}{course syllabus} for the collaboration policy.
\vspace{\baselineskip}
\textcolor{red}{%
Remember to append your Colab PDF as explained in the first homework, with all outputs visible.\\
When you print to PDF it may be helpful to scale at 95\% or so to get everything on the page.
}
\end{center}
\begin{problem}[18pts]
A coin is weighted so that its probability of landing on heads is 0.2, suppose the coin is flipped 20 times.
\begin{enumerate}[(A)]
\item Compute the probability that the coin lands on heads at least 16 times.
\item Use Markov's inequality to bound the event that the coin lands on heads at least 16 times.
\item Use Chebyshev's inequality to bound the event that the coin lands on heads at least 16 times.
\end{enumerate}
\end{problem}
\newpage
\begin{problem}[30pts]
Lots of problems in science and engineering boil down to computing difficult integrals.
Monte Carlo methods are fundamentally about exactly this: numerical estimation of challenging integrals.
One really interesting and general-purpose Monte Carlo trick is called \emph{importance sampling}.
The idea is this: imagine that we have a definite integral on some domain~$\Omega$:
\begin{align*}
I = \int_{\Omega} g(x)\,dx\,.
\end{align*}
This could correspond to solving a difficult partial differential equation or a Bayesian machine learning problem for example.
Importance sampling introduces a random variable~$X$ with a probability density function~$f_X(x)$ that has~$\Omega$ for its support, and it multiplies and divides the integrand by the density:
\begin{align*}
I = \int_{\Omega} g(x)\,dx = \int_{\Omega} \frac{f_X(x)}{f_X(x)}g(x)\,dx\,.
\end{align*}
This is an okay thing to do generally as long as~$f_X(x)$ is not zero anywhere that~$g(x)$ is nonzero.
We call~$f_X(x)$ the \emph{proposal distribution} and it turns this integral into something we can consider an expectation under~$f_X(x)$.
\begin{enumerate}[(A)]
\item Explain how the integral can be understood as an expectation.
\item Here's a concrete example of an annoying integral:
\begin{align*}
I = \int^1_0 e^{-5x^3}dx \,.
\end{align*}
Why might the beta family of distributions be a good choice for the proposal distribution in this case?
\item Plot the integrand.
\item Plot the beta PDF for each of the following~$(\alpha,\beta)$ pairs:~$(1,1),(1,3),(3,1),(2,5),(1,1.5)$.
\item For each of the pairs above, estimate this integral by drawing 10,000 random beta variates and computing the expectation.
Report the mean and variance for each estimate.
Which proposal distribution seemed to work the best? Why do you think that is?
(In this problem it is fine to use the functions in the \href{https://docs.scipy.org/doc/scipy/reference/stats.html}{scipy.stats} package to generate the samples and to compute the density function.)
\end{enumerate}
\end{problem}
\newpage
\begin{problem}[25pts]
The moment generating function (MGF) for a random variable $X$ is:
\begin{align*}
M_X(t) &= \mathbb{E}[e^{tX}]\,.
\end{align*}
Although this probably looks obnoxious, it's really just a function that corresponds to expectations for different values of~$t$.
Given a~$t$ you would just write down the sum or integral like you normally would to compute any other expectation.
If you've seen the Fourier or Laplace transforms before, the MGF should sort of remind you of them, as they are closely related.
One useful property of moment generating functions is that they make it relatively easy to compute weighted sums of independent random variables:
\begin{align*}
Z &= \alpha X + \beta Y & M_Z(t) &= M_X(\alpha t)M_Y(\beta t)\,.
\end{align*}
\begin{enumerate}[(A)]
\item Derive the MGF for a Poisson random variable $X$ with parameter $\lambda$.
\item Let $X$ be a Poisson random variable with parameter $\lambda$, as above, and let $Y$ be a Poisson random variable with parameter $\gamma$. $X$ and $Y$ are independent. Use the MGF to show that their sum is also Poisson. What is the parameter of the resulting distribution?
\item Derive the MGF for an exponential random variable~$X$ with parameter $\lambda$.
For what values of~$t$ does the MGF $M_X(t)$ exist?
\item Let $X$ be an exponential random variable with parameter $\lambda$, and let $\alpha>0$ be a constant. Show that the random variable $\alpha X$ is also exponentially distributed. What is its parameter?
\item Imagine that you have $k$ i.i.d.\ random variables $X_1,X_2,\ldots,X_k$, each of which is exponentially distributed with parameter $\lambda$. Show that this sum is gamma distributed and compute its parameters. This will require finding the MGF of the gamma distribution. Use the following parameterization of the gamma density:
\begin{align*}
f(z\,;\,\alpha,\beta) &= \frac{\beta^\alpha}{\Gamma(\alpha)}z^{\alpha-1}e^{-\beta z}
\end{align*}
\end{enumerate}
\end{problem}
\newpage
\begin{problem}[25pts]
Consider the following joint distribution over random variables $X$ and $Y$:
\begin{center}
\begin{tabular}{c c | l | l | l | l | l |}
\cline{3-7}
\multirow{4}{*}{$Y$} & $y_1$ & 0.01 & 0.02 & 0.03 & 0.1 & 0.1 \\ \cline{3-7}
& $y_2$ & 0.05 & 0.1 & 0.05 & 0.07 & 0.2 \\\cline{3-7}
& $y_3$ & 0.1 & 0.05 & 0.03 & 0.05 & 0.04\\\cline{3-7}
& \multicolumn{1}{c}{}& \multicolumn{1}{c}{$x_1$} & \multicolumn{1}{c}{$x_2$} & \multicolumn{1}{c}{$x_3$} & \multicolumn{1}{c}{$x_4$} & \multicolumn{1}{c}{$x_5$}\\
& \multicolumn{1}{c}{}& \multicolumn{5}{ c }{$X$}
\end{tabular}
\end{center}
Use Colab to compute the following quantities.
Remember to compute these in \textbf{bits} which means using base 2 for logarithms.
Here's the PMF in Python to make life a bit easier:
\begin{lstlisting}[style=python]
PXY = np.array([[0.01, 0.02, 0.03, 0.1, 0.1],
[0.05, 0.1, 0.05, 0.07, 0.2],
[0.1, 0.05, 0.03, 0.05, 0.04]])
\end{lstlisting}
\begin{enumerate}[(A)]
\item What is the \href{https://en.wikipedia.org/wiki/Entropy_(information_theory)}{entropy}~$H(X)$?
\item What is the \href{https://en.wikipedia.org/wiki/Entropy_(information_theory)}{entropy}~$H(Y)$?
\item What is the \href{https://en.wikipedia.org/wiki/Conditional_entropy}{conditional entropy}~$H(X \,|\, Y)$?
\item What is the \href{https://en.wikipedia.org/wiki/Conditional_entropy}{conditional entropy}~$H(Y \,|\, X)$?
\item What is the \href{https://en.wikipedia.org/wiki/Joint_entropy}{joint entropy}~$H(X, Y)$?
\item What is the \href{https://en.wikipedia.org/wiki/Mutual_information}{mutual information}~$I(X\,;\,Y)$? Compute it once using the PMF and then once using relationships between the quantities you used in (A)-(E) to verify your answer.
\end{enumerate}
\end{problem}
\newpage
\begin{problem}[2pts]
Approximately how many hours did this assignment take you to complete?
\end{problem}
My notebook URL:
{\small \url{https://colab.research.google.com/XXXXXXXXXXXXXXXXXXXXXXX}}
\subsection*{Changelog}
\begin{itemize}
\item 1 Nov 2023 -- Initial F23 version
\end{itemize}
% \includepdf[pages=-]{mynotebook.pdf}
\end{document}