\documentclass[11pt]{amsart}
\usepackage{amsmath,amsthm,amsfonts,amssymb}
\usepackage[usenames]{xcolor}
\usepackage{enumerate}
\usepackage[ruled,noline,noend]{algorithm2e}
\usepackage{fullpage}
\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\newtheorem{corollary}{Corollary}
\newtheorem{proposition}{Proposition}
\theoremstyle{definition}
\newtheorem{definition}{Definition}
\theoremstyle{plain}
\DeclareMathOperator{\E}{\mathrm{E}}
\newcommand{\replacethistext}[1]{{\color{red} #1}}
\title{
CS 466/666: Algorithm Design and Analysis \\
Problem Set 3}
\author{Due on: October 18, 2017}
\begin{document}
\begin{flushright}
\textbf{
\replacethistext{Your name} \\
\replacethistext{Your student number}
}
\end{flushright}
\medskip
\maketitle
\section*{Acknowledgements.}
\replacethistext{Acknowledge {\bf all collaborators} with whom you discussed any of the problems in this problem set and {\bf all external sources} that you may have used in the completion of the assignment. See the website for the full homework policy.}
%%% PROBLEM 1 %%%
\section{Problem 1: Hashing and chaining}
\begin{enumerate}[(a)]
\item Show that we don't even need linked lists when the number of hash values is large enough.
\begin{theorem}
Let $H = \{h : U \to [5n^2]\}$ be a universal family of hash functions. For any sequence $x_1,\ldots,x_n \in U$ of $n$ items, when $h \in H$ is drawn uniformly at random then with probability at least $\frac9{10}$ all the hash values $h(x_1),\ldots,h(x_n)$ are distinct.
\end{theorem}
\begin{proof}
\replacethistext{Enter your answer here.}
\end{proof}
\item A family $H$ of hash functions mapping $U$ to $[n]$ is \emph{pairwise independent} if for every distinct $x,x' \in U$ and every $y \in [n]$, $\Pr_{h \in H}[ h(x) = h(x') = y ] = \frac1{n^2}$.\footnote{As an aside: how does this definition compare to universality?}
\begin{theorem}
Let $H = \{h : U \to [n]\}$ be a pairwise independent family of hash functions. For any sequence $x_1,\ldots,x_n \in U$ of $n$ items, when $h \in H$ is drawn uniformly at random then with probability at least $\frac{99}{100}$, every $y \in [n]$ satisfies $|\{i \le n : h(x_i) = y\}| \le 10\sqrt{n}$.
\end{theorem}
\begin{proof}
\replacethistext{Enter your answer here.}
\end{proof}
\item {\bf(Bonus.)} Show that the bound in the last part is tight.
\begin{theorem}
There exists a pairwise independent family of hash functions $H = \{h : U \to [n]\}$ and a sequence $x_1,\ldots,x_n \in U$ of $n$ items such that when $h \in H$ is drawn uniformly at random then with probability at least $\frac9{10}$, there is some value $y \in [n]$ such that $|\{i \le n : h(x_i) = y\}| = \Omega(\sqrt{n})$.
\end{theorem}
\begin{proof}
\replacethistext{Enter your answer here.}
\end{proof}
\end{enumerate}
%%% PROBLEM 2 %%%
\newpage
\section{Problem 2: Bloom filters}
Recall that we say a randomized algorithm \emph{solves} the \textsc{Set Membership} problem if, after seeing $n$ objects $x_1,\ldots,x_n \in U$ and then receiving a query $a \in U$, it (i) correctly answers YES when $a = x_i$ for some $i \le n$, and (ii) correctly answers NO with probability at least $\frac23$ when $a$ is distinct from the $n$ elements $x_1,\ldots,x_n$. In class, we saw a simple algorithm that solves the \textsc{Set Membership} problem:
\begin{algorithm}
\caption{Simple set membership algorithm}
Draw $h : U \to [n]$ uniformly at random from the universal family $H$\;
Initialize $T \in \{0,1\}^{3n}$ to zero\;
\For{$i=1,\ldots,n$}{
Set $T\big[ h(x_i) \big] = 1$\;
}
On query $a \in U$, output YES iff $T\big[ h(a) \big] = 1$\;
\end{algorithm}
That algorithm also requires $3n$ bits of memory to store the table $T$. Show that it is possible to modify the algorithm to use a smaller hash table.
\begin{enumerate}[(a)]
\item Establish the following theorem (by definining the algorithm and completing its analysis).
\begin{theorem}
For every pairwise independent\footnote{See Problem 1 for the definition of pairwise independent families of hash functions.} family of hash functions $H = \{h : U \to [2\sqrt{3}n]\}$, there is a variant of Algorithm 1 that solves the \textsc{Set Membership} problem with a hash table $T$ of size $\lceil 2 \sqrt{3} n \rceil$.
\end{theorem}
\begin{proof}
\replacethistext{Enter your answer here.}
\end{proof}
\item Is this actually an improvement over the simple set membership algorithm?
\bigskip
\replacethistext{Enter your answer here.}
\bigskip
\item {\bf (Bonus.)} Show that there is some constant $c < 3$ such that when $H_{\rm all} = \{h : U \to [c n]\}$ is the family that includes all functions mapping $U$ to $[c n]$, then there is another variant of Algorithm 1 that solves the \textsc{Set Membership} problem with a hash table $T$ of size $\lceil c n \rceil$.
\bigskip
\replacethistext{Enter your theorem and proof here.}
\bigskip
\end{enumerate}
%%% PROBLEM 3 %%%
\newpage
\section{Problem 3: Almost-universal families of hash functions}
Chernoff's bound gives exponentially small probabilities that the sum of independent random variables ends up being far from its expected value. One of the forms of this bound is as follows.
\begin{lemma}[Chernoff (lower) bound]
Let $X = X_1 + X_2 + \cdots + X_t$ where $X_1,\ldots,X_t$ are independent random variables that are each drawn from the same distribution, and let $\mu = \E[X]$. Then for any $\delta > 0$,
\[
\Pr[ X < (1-\delta) \mu] \le e^{-\mu \delta^2/2}.
\]
\end{lemma}
\begin{enumerate}[(a)]
\item Use Chernoff's bound to prove the following strengthening of the claim made in class:
\begin{theorem}
There exists a $\frac34$-universal family of hash functions $H = \{h : \{0,1\}^n \to \{0,1\}\}$ of size $|H| = 16 n$.
\end{theorem}
\begin{proof}
\replacethistext{Enter your answer here.}
\end{proof}
\bigskip
\noindent \emph{Remark.} Note also that the family obtained in this way is of size \emph{linear} in $n$, not $O(n^2)$, as we originally claimed. Does this make a significant difference in applications, or at least in the ones we have seen?
\bigskip
\item \textbf{(Bonus.)} For any prime $p \ge 2$ and integers $m \ge n \ge 2$, define the hash family $H_{p,n} = \{ h_a : [m] \to [n]\}_{0 \le a < p}$ by setting
\[
h_a(x) = \big( a \cdot x \pmod{p} \big) \pmod{n}
\]
for each $a = 0,1,2,\ldots,p-1$. Prove that $H_{p,n}$ is $\frac2{n}$-universal.
\begin{theorem}
The family of hash functions $H_{p,n}$ defined above is $\frac{2}{n}$-universal.
\end{theorem}
\begin{proof}
\replacethistext{Enter your answer here.}
\end{proof}
\bigskip
\noindent \emph{Extra-bonus question.} Modify the definition of $H_{p,n}$ to obtain a (truly) universal family of hash functions.
\end{enumerate}
\end{document}