% No 'submit' option for the problems by themselves.
\documentclass{cos302}
% Use the 'submit' option when you submit your solutions.
%\documentclass[submit]{cos302}
% Put in your full name and email address.
\name{Your Name}
\email{email@princeton.edu}
\discussants{Marie Curie \\ Leonhard Euler}
% You don't need to change these.
\course{COS302-F23}
\assignment{Assignment \#4}
\duedate{6:00pm Wednesday 4 October 2023}
\dropboxurl{https://www.gradescope.com/courses/606160/assignments/3377028}
\usepackage{bm} % for bold math
\usepackage{mathtools} % colon-equals
\begin{document}
% IMPORTANT: Uncomment the \documentclass command above with the [submit] option and put your name where it says ``your name''.
\begin{center}
Assignments in COS 302 should be done individually. See the \href{https://www.cs.princeton.edu/courses/archive/fall23/cos302/files/syllabus.pdf}{course syllabus} for the collaboration policy.
\vspace{\baselineskip}
\textcolor{red}{%
Remember to append your Colab PDF as explained in the first homework, with all outputs visible.\\
When you print to PDF it may be helpful to scale at 95\% or so to get everything on the page.}
\end{center}
\begin{problem}[20pts]
\begin{enumerate}[(A)]
\item Compute an orthonormal basis of the kernel of
\begin{align*}
\bm{A} &=
\begin{bmatrix}
1 & -1 & 1 & -1 & 1\\
1 & 1 & -1 & 1 & 1
\end{bmatrix}
\end{align*}
\item Write down an orthonormal basis for the image of~$\bm{A}$.
\end{enumerate}
\end{problem}
\newpage
\begin{problem}[23pts]
You've encountered power series before in other classes, but one thing you may not've realized is that you can construct \emph{matrix functions} from \emph{matrix power series}.
That is, if you have a function~$f(\cdot)$ that has a convergent power series representation:
\begin{align*}
f(x) &= \sum_{i=0}^\infty a_i x^i
\end{align*}
then you can generally write a similar matrix version for square symmetric matrices~$\bm{X}$ using the same~$a_i$:
\begin{align*}
F(\bm{X}) &= \sum_{i=0}^\infty a_i\bm{X}^i
\end{align*}
\begin{enumerate}[(A)]
\item The matrix version~$F$ turns out to just apply the scalar~$f$ to each eigenvalue independently.
Explain why. (Hint: Write down the diagonalization of~$\bm{X}$ and then multiply it by itself. What happens? How would a diagonalized version of~$\bm{X}$ interact with the power series?)
\item In power series there is a notion of \href{https://en.wikipedia.org/wiki/Radius_of_convergence}{radius of convergence}.
How would you expect this concept to generalize to square symmetric matrices?
\item One important example is where the function~$f(x)$ is the exponential function.
I can take any square symmetric matrix and if I compute its matrix exponential, I get a positive definite matrix. Explain why.
\end{enumerate}
\end{problem}
\newpage
\begin{problem}[25pts]
Consider the following set of vectors in $\mathbb{R}^3$:
\begin{align*}
v_1 &= \begin{bmatrix} 1 \\ 2 \\ 1 \end{bmatrix} &
v_2 &= \begin{bmatrix} 1 \\ 0 \\ -1 \end{bmatrix} &
v_3 &= \begin{bmatrix} 2 \\ 1 \\ 3 \end{bmatrix}.
\end{align*}
\begin{enumerate}[(A)]
\item Perform Gram-Schmidt orthogonalization on this set of vectors to obtain an orthonormal set \(\{u_1, u_2, u_3\}\). Show all your intermediate steps and calculations.
\item Once you have the vectors, verify that they are orthogonal by calculating the dot products between all possible pairs of vectors in the set.
\end{enumerate}
\end{problem}
\newpage
\begin{problem}[30pts]
One of the most important algorithms in data analysis is \href{https://en.wikipedia.org/wiki/Principal_component_analysis}{principal component analysis} or PCA.
PCA tries to find a way to represent high-dimensional data in a low-dimensional way so that human brains can reason about it.
It tries to identify the ``important'' directions in a data set and represent the data just in that basis.
PCA does this by computing the empirical covariance matrix of the data (we'll learn more about that in a couple of weeks), and then looking at the eigenvectors of it that correspond to the largest eigenvalues.
\begin{enumerate}[(A)]
\item Load \href{https://www.cs.princeton.edu/courses/archive/fall23/cos302/files/mnist2000.pkl}{\texttt{mnist2000.pkl}} into a Colab notebook.
Take the $2000 \times 28 \times 28$ tensor of training data and reshape it so that it is a~$2000 \times 784$ matrix, where the rows are ``unrolled'' image vectors.
Typically in PCA, one first centers the data.
Center the data by subtracting off the mean image; you did a very similar procedure in HW2.
\item Now compute the ``scatter matrix'' which is the $784 \times 784$ matrix you get from multiplying data matrix by its transpose, making sure that you get it so the data dimension is the one being summed over.
\item This scatter matrix is square and symmetric, so use the \href{https://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.eigh.html}{\texttt{eigh}} function in the \texttt{numpy.linalg} package to compute the eigenvalues and eigenvectors.
Plot the eigenvalues in decreasing order.
\item Read the documentation for \texttt{eigh} and figure out how to get the ``big'' eigenvectors.
For each of the top five eigenvectors, reshape them into $28 \times 28$ images and use \href{https://matplotlib.org/3.1.3/api/_as_gen/matplotlib.pyplot.imshow.html}{\texttt{imshow}} to render them.
\item Now, create a low-dimensional representation of the data.
Take the $2000 \times 784$ matrix and multiply it by each of the top two eigenvectors.
This takes all 2000 data, each of which are 784-dimensional, and gives them two-dimensional coordinates.
Make a scatter plot of these two-dimensional coordinates.
\item That scatter plot doesn't really give you much of a visualization.
Here's some starter code to build a more interesting figure.
It takes the two-dimensional projection and builds a ``scatter plot'' where the images themselves are rendered instead of dots.
Here I have the projections in a $2000 \times 2$ matrix called \texttt{proj}, which I modify so that all the values are in~$[0,1]$.
\begin{lstlisting}[style=python]
# Make the projections into [0,1]
proj = proj - np.min(proj, axis=0)
proj = proj / np.max(proj, axis=0)
# Create a 12" x 12" figure.
viz_fig = pl.figure(figsize=(12.,12.))
# Get the figure width and height in pixels.
width, height = viz_fig.get_size_inches()*viz_fig.dpi
pl.plot() # Colab seems to require this to render.
# Loop over images. Could do all 2000 but it's crowded.
for ii in range(400):
# Render each image in a location on the figure.
pl.figimage(train_images[ii,:,:],
xo=proj[ii,1]*width,
yo=(proj[ii,0]*height-150), # hack to make visible
origin='upper')
\end{lstlisting}
Modify this code to work with your projections and make a visualization of the MNIST digits.
Do you see any interesting structure?
\end{enumerate}
\end{problem}
\newpage
\begin{problem}[2pts]
Approximately how many hours did this assignment take you to complete?
\end{problem}
My notebook URL:
{\small \url{https://colab.research.google.com/XXXXXXXXXXXXXXXXXXXXXXX}}
\subsection*{Changelog}
\begin{itemize}
\item 20 Sept 2023 -- Initial F23 version
\end{itemize}
% \includepdf[pages=-]{mynotebook.pdf}
\end{document}