% New Appendices for the Purple Book
% (hardcopy documents filed in 1984, forgotten in 1990, found in 1995)

% first, a subset of the macros used to generate the third edition in 1990

\hsize=4.5 in \vsize=7.0 in
\parindent=10pt
\font\rh=cmcsc10

\newdimen\vu \vu=12pt
\parskip .5\vu
\mathcode`\@="8000 {\catcode`\@=\active \gdef@{\mkern1mu}} % for tidy math

\def \psmajor #1{
  \penalty -500
  \vskip .6\vu
  \noindent{\bf #1}
  \par\vskip .5\vu}

\def \psminor #1{
  \penalty -500
  \vskip .5\vu 
  \noindent{\bf #1}
  \par\vskip .5\vu
  \mark {\lowercase {#1}}}

\def \pssub #1{
  \penalty -250
  \vskip .3\vu 
  \noindent{\bf #1}
  \par\vskip .3\vu}

\input picmac % subset of LaTeX picture mode ... found in CTAN archives

\nopagenumbers
\newif\ifinhead
\headline={\inheadtrue\ifodd\pageno\rh\hfill\botmark\hskip30pt\tenrm A\folio
   \else\tenrm A\folio\hskip30pt\rh greene and knuth: new appendices\hfill\fi}

% OK, now here's the new stuff.
% I haven't updated the index ... sorry.

\psmajor{Appendix J: Midterm Exam IV and Solutions}
\psminor{Midterm Exam IV\noexpand\ifinhead\noexpand\else
   \ \ \rm(A. C. Yao, 1984)\noexpand\fi}
\noindent {\bf Problem 1.}\enspace
Let ${\cal S}$ be a recurrence relation of the form
$$\displaylines{x_0=x_1=2\,,\cr
\noalign{\vskip-2pt}
a(n)@x_{n+2}+b(n)@x_{n+1}+c(n)@x_n=1\qquad\hbox{for $n\geq 0\,$,}\cr}$$
where $a$, $b$, $c$ are given polynomials with complex coefficients. We say
that ${\cal S}$ is {\sl reducible}, if there exist polynomials (with
complex coefficients) $s$, $t$, $l$, $m$ such that
$$l(n)@u_{n+1}+m(n)@u_n=a(n)@x_{n+2}+b(n)@x_{n+1}+c(n)@x_n\,,$$
where $u_n=s(n)@x_{n+1}+t(n)@x_n$.

For each of the following ${\cal S}$, prove or disprove that ${\cal S}$ is
reducible.

\smallskip\noindent
(A) [10 points]. $a(n)=4n^2-4n-15$, $b(n)=8n^2-18n+4$, and
$c(n)=3n^2-7n+4$.

\smallskip\noindent
(B) [30 points]. $a(n)=4(n^2+n+1)$, $b(n)=3n^3-4n^2+8n+19$, and
$c(n)=6n^2+7n-3$.

\medskip
\noindent{\bf Problem 2.}\enspace
Let $n=2^k$ for some integer $k>0$. We will first define a class of
permutations on $\{0,2,3,\ldots,n-1\}$. A~{\sl signed-tree of order\/}~$n$
is a complete balanced binary tree $T$ with $n$~leaves, with the two
edges out of each internal node labeled with distinct Boolean values. The
leaves from left to right are denoted $v_0$, $v_1$, \dots,~$v_{n-1}$. Let
$\sigma_T(v_i)$ denote the integer $0\leq j<n$ whose binary representation
is the concatenation of the labels from the root to~$v_i$. Let us regard
$\sigma_T$ as the permutation $\bigl(\sigma_T(v_0),\sigma_T(v_1),\ldots,
\sigma_T(v_{n-1})\bigr)$. An example of a signed-tree~$T$ of order~8 is
shown below, in which $\sigma_T(v_3)=6$ because 110 is the string of
labels read from the root to~$v_3$; $\sigma_T$~is $(4,5,7,6,2,3,1,0)$ in
this example.
\unitlength=15pt
$$\def\lfbox
  {\vbox to0pt{\moveleft3pt\hbox{\vbox{\hrule
  \hbox to 6pt{\vrule height 6pt\hfill\vrule}
  \hrule}}\vss}}
\beginpicture(14,8)(0,-1)
\put(7,6){\disk{.2}}
\put(3,4){\disk{.2}}
\put(11,4){\disk{.2}}
\put( 1,2){\disk{.2}}
\put( 5,2){\disk{.2}}
\put( 9,2){\disk{.2}}
\put(13,2){\disk{.2}}
\put(0,0){\lfbox}
\put(2,0){\lfbox}
\put(4,0){\lfbox}
\put(6,0){\lfbox}
\put(8,0){\lfbox}
\put(10,0){\lfbox}
\put(12,0){\lfbox}
\put(14,0){\lfbox}
\put(0,-1){\makebox(0,0){$v_0$}}
\put(2,-1){\makebox(0,0){$v_1$}}
\put(4,-1){\makebox(0,0){$v_2$}}
\put(6,-1){\makebox(0,0){$v_3$}}
\put(8,-1){\makebox(0,0){$v_4$}}
\put(10,-1){\makebox(0,0){$v_5$}}
\put(12,-1){\makebox(0,0){$v_6$}}
\put(14,-1){\makebox(0,0){$v_7$}}
\put(0,0){\line(1,2){1}}
\put(4,0){\line(1,2){1}}
\put(8,0){\line(1,2){1}}
\put(12,0){\line(1,2){1}}
\put(2,0){\line(-1,2){1}}
\put(6,0){\line(-1,2){1}}
\put(10,0){\line(-1,2){1}}
\put(14,0){\line(-1,2){1}}
\put(1,2){\line(1,1){2}}
\put(9,2){\line(1,1){2}}
\put(5,2){\line(-1,1){2}}
\put(13,2){\line(-1,1){2}}
\put(3,4){\line(2,1){4}}
\put(11,4){\line(-2,1){4}}
\put(4.1,5.1){\makebox(0,0){1}}
\put(9.9,5.1){\makebox(0,0){0}}
\put(1.4,3){\makebox(0,0){0}}
\put(4.6,3){\makebox(0,0){1}}
\put(9.4,3){\makebox(0,0){1}}
\put(12.6,3){\makebox(0,0){0}}
\put(0.05,1){\makebox(0,0){0}}\put(1.95,1){\makebox(0,0){1}}
\put(4.05,1){\makebox(0,0){1}}\put(5.95,1){\makebox(0,0){0}}
\put(8.05,1){\makebox(0,0){0}}\put(9.95,1){\makebox(0,0){1}}
\put(12.05,1){\makebox(0,0){1}}\put(13.95,1){\makebox(0,0){0}}
\endpicture$$

Let $\Sigma_n=\{\,\sigma_T\mid T\hbox{ is a signed-tree of order $n$}\,\}$.
Consider a random $\sigma_T$ chosen uniformly from~$\Sigma_n$.

\smallskip\noindent
(A) [5 points]. What is the expected number of inversions in a random
$\sigma_T$? 

\smallskip\noindent
(B) [10 points]. Determine the expected number of element exchanges in each
pass of a $(4,2,1)$-Shellsort applied to a random~$\sigma_T$.

\smallskip\noindent
(C) [15 points]. Determine the expected value and the variance of the
number of right-to-left maxima in a random~$\sigma_T$.

\smallskip\noindent
(D) [40 points]. Determine the expected value of $B$ in the `straight
selection sort' (Algorithm~5.2.3S in [Knuth III]), applied to a random
$\sigma_T$.

\smallskip\noindent
(E) [20 points]. Determine the expected number of cycles in a random
$\sigma_T$. Determine the expected {\sl order\/} of a random~$i$ ($0\leq
i<n$) in a random~$\sigma_T$. [The order of $i$ in a permutation $\sigma$
is the length of the cycle that contains~$i$.
For example, the order of 2 in $\sigma=(34)(0521)$ is~4.]

\medskip
\let\sc=\rh
\noindent{\bf Problem 3.}\enspace
It's the year 2525, and the making of biochips is a big industry.
A synthetic organism
known as {\sc maybesort} can be produced cheaply, and we would like to
study its suitability as a sorting device.

{\sc maybesort} consists of an array $A[1:n]$. Its relevant behavior
under infrared light can be described as follows. Let $\alpha=a_1a_2\ldots
a_n$ be an $n$-bit string. Initially (at $\tau=0$), let $A[j]=a_j$ for $1\leq
j\leq n$. At each subsequent integer~$\tau$, a~random integer $1\leq i<n$
is selected and the contents of $A[i]$ and $A[i+1]$ are compared and
exchanged if necessary to make $A[i]\leq A[i+1]$. Let $T_n(\alpha)$ be the
random variable corresponding to the time when array~$A$ first becomes
sorted. Let $t_n(\alpha)$ be the expected value of $T_n(\alpha)$, and let
$\beta_n$ denote the string $1^20^{n-2}$.

\smallskip\noindent
(A) [10 points]. Show that $t_n(\beta_n)\geq t_n(\alpha)$ for all $\alpha$
with exactly two~1's.

\smallskip\noindent
(B) [20 points]. Determine $t_n(\beta_n)$ up to an additive term of
$O(n^{1.5})$ or better.

\smallskip\noindent
{\sl Comment:\/} Suppose $n=10^6$ and one time unit for {\sc maybesort} is
one picosec ($10^{-12}$~sec). How would you rate its performance against
today's computers in sorting binary strings with exactly two~1's?

\vfill\eject
\psminor{Solutions to Midterm Exam IV}
\par\vskip-.3\vu\rm
\pssub{Solution to Problem 1.}
\noindent(A) We are looking for possible solutions to
$$\eqalign{l(n)@s(n+1)&=4n^2-4n-15\,,\cr
l(n)@t(n+1)+m(n)@s(n)&=8n^2-18n+4\,,\cr
m(n)@t(n)&=3n^2-7n+4\,.\cr}$$
Trial and error gives the solution
$$\vcenter{\halign{$\hfil{#}\;$&${#}\hfil$\qquad&$\hfil{#}\;$&${#}\hfil$\cr
s(n)&=2n+1\,,&t(n)&=3n-4,\cr
l(n)&=2n-5\,,&m(n)&=n-1\,.\cr}}$$
So, it is reducible.

\smallskip
\noindent(B)
When $a(n)=4(n^2+n+1)$, $b(n)=3n^3-4n^2+8n+19$, and $c(n)=6n^2+7n-3$,
the corresponding recurrence relation is {\sl irreducible}, because
there is no solution to
$$\eqalignno{l(n)@s(n+1)&=a(n)\,,&(1.1)\cr
l(n)@t(n+1)+m(n)@s(n)&=b(n)\,,&(1.2)\cr
m(n)@t(n)&=c(n)\,.&(1.3)\cr}$$
Here each of $a(n)$ and $c(n)$ has only two linear factors.
A direct attack would
examine all the possible assignments in (1.1) and~(1.3) to $l$, $m$,
$s$, $t$ (with some coefficients left unspecified), and show that it's
impossible to satisfy~(1.2). The following clever alternative method,
due to Richard Beigel, is much more concise.

\smallskip
We use the notation $f(n)\;\hbox{div}\;g(n)$ to denote the unique
polynomial $h(n)$ such
that $f(n)=g(n)h(n)+($polynomial with degree less than that of~$g)$.
First observe that as $b(n)$
is cubic, there are only two possibilities:
\smallskip\noindent
{\sl Case~1\/}: $l$ and $s$
are linear, $m$ and~$t$ are one quadratic and one constant;
\smallskip\noindent
{\sl Case~2\/}: $m$~and $t$ are linear, $l$~and~$s$ are one quadratic
and one constant.

In Case 1, if $m$ is quadratic, consider the linear polynomial $h(n)$
defined as $b(n)\;\hbox{div}\;m(n)$. As $l(n)t(n+1)$ is linear, $h(n)$ has
to be exactly~$s(n)$. But $b(n)\;\hbox{div}\;m(n)=\lambda\;b(n)\;\hbox{div}\;c(n)$ 
for
some constant~$\lambda$, and hence is of the form $\lambda(\lambda'n+\lambda'')$,
where both $\lambda'$ and $\lambda''$ are real; $s(n)$ cannot be of this
form, as $s(n+1)$ is a factor of the polynomial $n^2+n+1$, which is
irreducible over the reals. The case when $t$ is quadratic is ruled
out by a similar argument.

In Case 2, if $l$ is quadratic, we know from (1.2) that
$$t(n+1)=\lambda\bigl(b(n)\;\hbox{div}\;l(n)\bigr)=\lambda(3n-7)\,,$$
i.e., $t(n)=(3n-10)\lambda$. But $3n-10$ is not a factor of~$c(n)$. If $s$
is quadratic, we know that $m(n)=\lambda\bigl(b(n)\;\hbox{div}\;s(n)\bigr)
=\lambda(3n-7)$. But $3n-7$ is not a factor of~$c(n)$. [Note that Case~1
can also be argued this way.] We have ruled out all the possibilities.

\medskip
\pssub{Solution to Problem 2(A).}
Let $a_n$ be the expected number of inversions. Clearly,
$$a_2={1\over 2}\,.$$
Let $n=2^k>2$. Take a random $\sigma_T\in\Sigma_n$, write $\sigma_T=
(\sigma_{T_1},\sigma_{T_2})$.
$$\beginpicture(6,4)(0,0)
\put(3,4){\makebox(0,0){$r$}}
\put(3,3.5){\disk{.2}}
\put(1,2.5){\line(2,1){2}}
\put(5,2.5){\line(-2,1){2}}
\put(1,1.5){\makebox(0,0){$T_1$}}
\put(5,1.5){\makebox(0,0){$T_2$}}
\put(0,1){\line(2,3){1}}
\put(2,1){\line(-2,3){1}}
\put(0,1){\line(1,0){2}}
\put(4,1){\line(2,3){1}}
\put(6,1){\line(-2,3){1}}
\put(4,1){\line(1,0){2}}
\put(1,.2){\makebox(0,0){\vbox{\hbox to 2\unitlength{\upbracefill}
                               \hbox to 2\unitlength{\hss$n/2$\hss}}}}
\put(5,.2){\makebox(0,0){\vbox{\hbox to 2\unitlength{\upbracefill}
                               \hbox to 2\unitlength{\hss$n/2$\hss}}}}
\endpicture$$
With probability 1/2, we have 
$$\vcenter{\hbox{$\beginpicture(2.5,1.5)(0,0)
\put(1,1.4){\makebox(0,0){$r$}}
\put(1,1){\disk{.2}}
\put(0,0){\line(1,1){1}}
\put(2,0){\line(-1,1){1}}
\put(0,.6){\makebox(0,0){0}}
\put(2,.6){\makebox(0,0){1}}
\endpicture$}},$$
in which case there is no inversion between numbers $\sigma_{T_1}$ and~$\sigma_{T_2}$.
With probability $1/2$, we have
$$\vcenter{\hbox{$\beginpicture(2.5,1.5)(0,0)
\put(1,1.4){\makebox(0,0){$r$}}
\put(1,1){\disk{.2}}
\put(0,0){\line(1,1){1}}
\put(2,0){\line(-1,1){1}}
\put(0,.6){\makebox(0,0){1}}
\put(2,.6){\makebox(0,0){0}}
\endpicture$}},$$
in which case there are exactly $(n/2)^2$ inversions between members
in $\sigma_{T_1}$ and~$\sigma_{T_2}$. Now $\sigma_{T_1}$ and~$\sigma_{T_2}$ 
are both random signed-trees of order $n/2$, except that one of them will
be a permutation of the numbers $\{{n\over 2},\allowbreak
{{n\over 2}+1},\allowbreak\ldots,n-1\}$.
Thus, we have
$$a_n={1\over 2}\bigl(2a_{n/2}\bigr)+{1\over 2}\bigl((n/2)^2+2a_{n/2}\bigr)
={1\over 8}n^2+2a_{n/2}\,,$$
and it follows that $a_n={1\over 4}n(n-1)$ for $n\ge 2$.

The simple form of this answer suggests that a simpler proof should be
possible. And indeed, it is easy to see that the label strings for any two
given leaves yield an inversion with probability 1/2. Thus the expected number
of inversions of labels strings generated by any $n$-leaf signed tree~$T$ is
${1\over2}{n\choose2}$, regardless of whether $n$ is a power of~2 or
$T$ is complete or balanced.

\pssub{Solution to Problem 2(B).}
Assume $n\geq 4$ and $a_1=0$.

{\sl First Pass\/}: Four files $F_i$ ($1\le i\le 4$) are separately sorted. Each
$F_i$ consists of $n/4$ numbers with distinct first $k-2$ bits. In fact,
the relative ordering of numbers in $F_i$ gives a random permutation
from $\Sigma_{n/4}$. Thus, the average number of inversions in $F_i$ is
$a_{n/4}={1\over 4}\,{n\over 4}\bigl(\,{n\over 4}-1\bigr)$. The total
average number of exchanges is thus $4a_{n/4}={n\over 4}\bigl(\,{n\over 4}-1\bigr)$.

\smallskip
{\sl Second Pass\/}: After the first pass, the permutation will be a 
$\sigma_T\in\Sigma_n$, with the top $k-2$ levels having all left branches
labeled~0, and right branches labeled~1. Each of the $n/4$
height-2 trees at the bottom is a random signed-tree of order~4;
call these $lh2$-{\sl trees\/} (lowest height-2 trees). The only inversions
are among elements in the same $lh2$-trees.

Each of the two subfiles to be sorted in this pass has its number of
inversions equal to the number of $lh2$--trees with configuration
$$\vcenter{\hbox{$\beginpicture(2.5,1.5)(0,0)
\put(1,1.4){\makebox(0,0){$r$}}
\put(1,1){\disk{.2}}
\put(0,0){\line(1,1){1}}
\put(2,0){\line(-1,1){1}}
\put(0,.6){\makebox(0,0){1}}
\put(2,.6){\makebox(0,0){0}}
\endpicture$}}.$$
The expected number of exchanges is thus $2\cdot\bigl(\,{1\over 2}\cdot
{n\over 4}\,\bigr)={n\over 4}$.

\smallskip
{\sl Third Pass\/}: Similar to the second pass; the expected number of exchanges
is equal to the expected number of lowest height-1 trees with configuration
$$\vcenter{\hbox{$\beginpicture(2.5,1.5)(0,0)
\put(1,1.4){\makebox(0,0){$r$}}
\put(1,1){\disk{.2}}
\put(0,0){\line(1,1){1}}
\put(2,0){\line(-1,1){1}}
\put(0,.6){\makebox(0,0){1}}
\put(2,.6){\makebox(0,0){0}}
\endpicture$}}.$$
This is ${1\over 2}\cdot{n\over 2}={n\over 4}$.

{\sl Comment}. Suppose Shellsort were used with $l$ increments
$(2^{l-1},\ldots,2,1)$, where $l\le k$. A similar argument shows that the
average number of exchanges on the first pass would be ${n\over4}
(n/2^{l-1}-1)$, and each subsequent pass would have $n\over4$ on the average.

\pssub{Solution to Problem 2(C).}
Let $g_n(z)=\sum_{k\ge 0}a_{nk}z^k$, where $a_{nk}$ is the probability
that a random $\sigma_T\in\Sigma_n$ has exactly $k$ RL-max. Considering
the two cases
$$\vcenter{\hbox{$\beginpicture(2.5,1.5)(0,0)
\put(1,1.4){\makebox(0,0){$r$}}
\put(1,1){\disk{.2}}
\put(0,0){\line(1,1){1}}
\put(2,0){\line(-1,1){1}}
\put(0,.6){\makebox(0,0){0}}
\put(2,.6){\makebox(0,0){1}}
\endpicture$}}\qquad\hbox{and}\qquad
\vcenter{\hbox{$\beginpicture(2.5,1.5)(0,0)
\put(1,1.4){\makebox(0,0){$r$}}
\put(1,1){\disk{.2}}
\put(0,0){\line(1,1){1}}
\put(2,0){\line(-1,1){1}}
\put(0,.6){\makebox(0,0){1}}
\put(2,.6){\makebox(0,0){0}}
\endpicture$}},$$
we get for $n\ge 1$
$$a_{2n,k}={1\over 2}a_{n,k}+{1\over 2}\sum_{l\ge 0}a_{n,l}a_{n,k-l}\,.$$
This leads to
$$g_{2n}(z)={1\over 2}g_n(z)+{1\over 2}\bigl(g_n(z)\bigr)^2\,,\qquad
\hbox{for }n\ge 1\,.\eqno(2\hbox{C}.1)$$
Note that $g_1(z)=z$, and
$$g'_{2n}(z)={1\over 2}g'_n(z)+g_n(z)g'_n(z)\,.\eqno(2\hbox{C}.2)$$
Thus, using the fact $g_n(1)=1$, we have
$$g'_{2n}(1)={3\over 2}g'_n(1)\,,$$
which implies
$$g'_n(1)=\left(\,{3\over 2}\,\right)^k\,,\qquad
k=\lg n\,.\eqno(2\hbox{C}.3)$$
This is the expected number of RL-max; it can also be written in the
form $n^{\lg(3/2)}\approx n^{0.585}$.

To get the variance, recall that
$$\hbox{Var}_n=g''_n(1)+g'_n(1)-\bigl(g'_n(1)\bigr)^2\,.\eqno(2\hbox{C}.4)$$
Now, from (2C.2),
$$g''_{2n}(z)={1\over 2}g''_n(z)+\bigl(g'_n(z)\bigr)^2+g_n(z)g''_n(z)\,.$$
Thus, using (2C.3),
$$g''_{2n}(1)={3\over 2}g''_n(1)+\left(\,{3\over 2}\,\right)^{2k}\,.$$
Therefore
$$g''_n(1)={4\over 3}\bigl((3/2)^{2k}-(3/2)^k\bigr)\,,\eqno(2\hbox{C}.5)$$
and formulas (2C.3), (2C.4), (2C.5) give
$$\hbox{Var}_n={1\over 3}\bigl((3/2)^{2k}-(3/2)^k\bigr)
={1\over3}(n^{\lg(9/4)}-n^{\lg(3/2)})\,.$$

\pssub{Solution to Problem 2(D).}
\smallskip
\noindent {\bf I. Some useful facts about $\Sigma_n$.}
\smallskip
{\bf Fact 1.} {\sl $\Sigma_n$ is a permutation group.}
\smallskip\noindent
{\sl Proof.} Call an $n\times n$ matrix $A=(a_{ij})$ a {\sl permutation
matrix\/} if $A$ contains exactly one~1 in each column, and in each row;
all other entries are~0. Let $\Gamma_n$ be the set of $n\times n$ permutation
matrices defined recursively as the set of permutation matrices of the
form $\left({B\ 0\atop 0\ C}\right)$ or $\left({0\ B\atop C\ 0}\right)$
with $B,C\in\Gamma_{n/2}$ and 0 being the matrix of all~0's;
$\Gamma_1=\{(1)\}$. It is easy to show by induction that $\Gamma_n$ forms
a subgroup of matrices under matrix multiplication.
Now, associate each permutation $\rho$ on $\{0,1,\ldots,n-1\}$ with a unique
$n\times n$ matrix $A_{\rho}=(a_{ij})$ where $a_{ij}=1$ iff
$\rho(i)=j$. It is clear that $A_{\rho\rho'}=A_{\rho'}\cdot A_{\rho}$;
also $A_{\rho}$ is the identity matrix if $\rho$ is the identity permutation.
Thus, this association $\rhoA_{\rho}$ is an isomorphism. As $\Gamma_n$ is
the image of $\Sigma_n$ under this isomorphism, we have the desired result.

\smallskip
{\bf Fact 2.} {\sl
If $\rho$, $\rho'$ are random permutations uniformly chosen from
$\Sigma_n$, then $\rho\cdot\rho'$ is random (uniformly over~$\Sigma_n$).}
\smallskip\noindent
{\sl Proof.} This follows from Fact 1 (even if the distribution of
$\rho'$ is nonuniform).

\medskip\noindent
{\bf II. About Algorithm S.}

Algorithm S works in $n-1$ passes for a file of $n$ numbers. Let $b_j$ be
the number of RL-max, before the $j$-th pass starts, in the file
consisting of the first $n-j+1$ locations in the current array. Then the
quantity $B$ in the analysis of Algorithm 5.2.3S is
$(b_1-1)+(b_2-1)+\cdots +(b_{n-1}-1)$. Let us imagine that the
algorithm has an extra, $n$-th, pass with $b_n=1$. Let $K_n$ denote the
expected value of $\sum_{1\le i\le n}b_i$; then the answer we want is given by
$$B_n=K_n-n\,.\eqno(2\hbox{D}.1)$$
We will concentrate on finding $K_n$.

Consider a random $\sigma_T\in\Sigma_n$ being sorted by
Algorithm~S. If the root of $T$ is
$$\vcenter{\hbox{$\beginpicture(2.5,1.5)(0,0)
\put(1,1.4){\makebox(0,0){$r$}}
\put(1,1){\disk{.2}}
\put(0,0){\line(1,1){1}}
\put(2,0){\line(-1,1){1}}
\put(0,.6){\makebox(0,0){0}}
\put(2,.6){\makebox(0,0){1}}
\endpicture$}},$$
then $S$ can be considered as working first on the right subtree, then
the left subtree, separately; this contributes
${1\over 2}\cdot 2K_{n/2}$ to~$K_n$. If the root of $T$ is
$$\vcenter{\hbox{$\beginpicture(2.5,1.5)(0,0)
\put(1,1.4){\makebox(0,0){$r$}}
\put(1,1){\disk{.2}}
\put(0,0){\line(1,1){1}}
\put(2,0){\line(-1,1){1}}
\put(0,.6){\makebox(0,0){1}}
\put(2,.6){\makebox(0,0){0}}
\endpicture$}},$$
the situation is more complicated, and we say the tree is {\sl inverted\/}
at the root. Let
$L_n$ denote the expected value of the total number of RL-max encountered
in the first $n/2$ passes when sorting a random inverted tree.
 At the end of the $(n/2)$-th pass, the
left half of the array contains a permutation $\rho$ of
$\{0,1,\ldots,{n\over2}-1\}$,
and we will show that $\rho$ is a random permutation uniformly chosen
from $\Sigma_{n/2}$. This will imply that the second $n/2$ passes
have an expected RL-max numbering $K_{n/2}$. To show $\rho$ to be random,
let $T_1$, $T_2$ denote the left and right subtrees of~$T$. At the end
of the first $n/2$ passes, array location $\sigma_T^{-1}(n-i)$ contains
the number $\sigma_T(n-i)$ for $1\le i\le n/2$. Or, equivalently, location
$j=\sigma_{T_1}^{-1}({n\over 2}-i)$
contains the number
$\sigma_{T_2}({n\over 2}-i)$,
i.e.,
$\rho(j)=\sigma_{T_2}\bigl(\sigma_{T_1}(j)\bigr)$. Thus $\rho=\sigma_{T_2}\cdot
\sigma_{T_1}$, and is random by Fact~2.

Summing both cases, we have shown that
$$\eqalignno{K_n&={1\over 2}(2K_{n/2})+{1\over 2}(L_n+K_{n/2})\cr
\noalign{\smallskip}
&={3\over 2}K_{n/2}+{1\over 2}L_n\,.&(2\hbox{D}.2)\cr}$$
We will find $L_n$ first. For example, $L_2=2$, 
and $L_4=(6+5+5+5)/4={21\over4}$.

\medskip\noindent
{\bf III. More preliminary calculations.}

Let $M_n$ be the expected number of RL-max in a random $\sigma_T$ from $\Sigma_n$.
We know from 2(C) that, for $n=2^k$,
$$M_n=(3/2)^k\,.\eqno(2\hbox{D}.3)$$

For a random $\sigma_T \in\Sigma_n$, 
let $t_{nj}$ be the expected number of
RL-max in $\bigl(\sigma_T(0),\sigma_T(1),\ldots,\sigma_T(n-j-1)\bigr)$,
which is the file obtained by deleting the $j$ rightmost numbers from $\sigma_T$.
Let $T_n$ denote $\sum_{0\le j<n}t_{nj}$. By considering the two possible
different ways of labeling at the root, we obtain the following recurrence:
$$\eqalign{T_n&={1\over 2}(2T_{n/2})+{1\over 2}\bigl(T_{n/2}+
  (n/2)M_{n/2}+T_{n/2}\bigr)\cr
\noalign{\smallskip}
&=2\,T_{n/2}+{n\over 4}M_{n/2}\,.\cr}$$
Using (2D.3), we can solve the recurrence to obtain
$$T_n={1\over 2}(3^k+2^k)\,.\eqno(2\hbox{D}.4)$$

We need one more definition besides $M_n$, $T_n$. Let $\sigma_T^{(j)}$ denote
the sequence of $n-j$ numbers obtained from $\sigma_T\in\Sigma_n$ by
deleting the largest $j$ elements. For example, if $\sigma_T=(2\,3\,1\,0)$,
then $\sigma_T^{(1)}=(2\,1\,0)$ and $\sigma_T^{(2)}=(1\,0)$. For
a random~$\sigma_T$, let $v_{nj}$ denote the expected number of RL-max
in~$\sigma_T^{(j)}$. Let $V_n=\sum_{0\le j<n}v_{nj}$. Arguing
as in the last paragraph, we can determine~$V_n$. In fact,
$$V_n={1\over 2}(3^k+2^k)\,.\eqno(2\hbox{D}.5)$$

\medskip\noindent
{\bf IV. Determining $L_n$.}

Take a random $\sigma_T\in\Sigma_n$ with inversion at the root.
 There are four cases, each occurring
with probability $1/4$. Let us write down the expected number of RL-max
in each case.

\smallskip\noindent{\sl Case 1.}
$$\beginpicture(7,1.5)(0,0)
\put(3,2){\disk{.2}}
\put(1,1){\disk{.2}}
\put(5,1){\disk{.2}}
\put(1,1){\line(2,1){2}}
\put(5,1){\line(-2,1){2}}
\put(1.8,1.9){\makebox(0,0){1}}
\put(4.2,1.9){\makebox(0,0){0}}
\put(0,0){\line(1,1){1}}
\put(2,0){\line(-1,1){1}}
\put(0,.6){\makebox(0,0){1}}
\put(2,.6){\makebox(0,0){0}}
\put(4,0){\line(1,1){1}}
\put(6,0){\line(-1,1){1}}
\put(4,.6){\makebox(0,0){1}}
\put(6,.6){\makebox(0,0){0}}
\put(0,-.4){\makebox(0,0){$A$}}
\put(2,-.4){\makebox(0,0){$B$}}
\put(4,-.4){\makebox(0,0){$C$}}
\put(6,-.4){\makebox(0,0){$D$}}
\endpicture$$
\medskip\noindent
In the first $n/4$ passes, the elements of $D$ and $A$ will be exchanged.
As elements of $B$ and $C$ are greater than those of~$D$, the elements of $D$
once swapped to $A$ will not give any RL-max. Thus, the expected number of
RL-max in these $n/4$ passes will be equal to $T_{n/4}+{n\over 4}M_{n/4}
+{n\over 4}M_{n/4}+V_{n/4}$ (contributed by elements in $D$, $C$, $B$,
$A$, respectively).

In the second $n/4$ passes, Algorithm S works on the array
$(D',B,C)$, where $D'$ is $D$ swapped and deformed. The expected
number of RL-max is $L_{n/2}$ (contributed by $B$ and~$C$).

\smallskip\noindent{\sl Case 2.}
$$\beginpicture(7,1.5)(0,0)
\put(3,2){\disk{.2}}
\put(1,1){\disk{.2}}
\put(5,1){\disk{.2}}
\put(1,1){\line(2,1){2}}
\put(5,1){\line(-2,1){2}}
\put(1.8,1.9){\makebox(0,0){1}}
\put(4.2,1.9){\makebox(0,0){0}}
\put(0,0){\line(1,1){1}}
\put(2,0){\line(-1,1){1}}
\put(0,.6){\makebox(0,0){0}}
\put(2,.6){\makebox(0,0){1}}
\put(4,0){\line(1,1){1}}
\put(6,0){\line(-1,1){1}}
\put(4,.6){\makebox(0,0){1}}
\put(6,.6){\makebox(0,0){0}}
\endpicture$$
$$\vcenter{\halign{{#}\hfil\quad&${#}\hfil$\cr
First $n/4$ passes:&T_{n/4}+{n\over 4}M_{n/4}+V_{n/4}\,.\cr
Second $n/4$ passes:&L_{n/2}\,.\cr}}$$

\smallskip\noindent{\sl Case 3.}
$$\beginpicture(7,1.5)(0,0)
\put(3,2){\disk{.2}}
\put(1,1){\disk{.2}}
\put(5,1){\disk{.2}}
\put(1,1){\line(2,1){2}}
\put(5,1){\line(-2,1){2}}
\put(1.8,1.9){\makebox(0,0){1}}
\put(4.2,1.9){\makebox(0,0){0}}
\put(0,0){\line(1,1){1}}
\put(2,0){\line(-1,1){1}}
\put(0,.6){\makebox(0,0){1}}
\put(2,.6){\makebox(0,0){0}}
\put(4,0){\line(1,1){1}}
\put(6,0){\line(-1,1){1}}
\put(4,.6){\makebox(0,0){0}}
\put(6,.6){\makebox(0,0){1}}
\endpicture$$
\bigskip\centerline{Same result as in Case 2.}

\smallskip\noindent{\sl Case 4.}
$$\beginpicture(7,1.5)(0,0)
\put(3,2){\disk{.2}}
\put(1,1){\disk{.2}}
\put(5,1){\disk{.2}}
\put(1,1){\line(2,1){2}}
\put(5,1){\line(-2,1){2}}
\put(1.8,1.9){\makebox(0,0){1}}
\put(4.2,1.9){\makebox(0,0){0}}
\put(0,0){\line(1,1){1}}
\put(2,0){\line(-1,1){1}}
\put(0,.6){\makebox(0,0){0}}
\put(2,.6){\makebox(0,0){1}}
\put(4,0){\line(1,1){1}}
\put(6,0){\line(-1,1){1}}
\put(4,.6){\makebox(0,0){0}}
\put(6,.6){\makebox(0,0){1}}
\endpicture$$
$$\vcenter{\halign{{#}\hfil\quad&${#}\hfil$\cr
First $n/4$ passes:&L_{n/2}\,.\cr
Second $n/4$ passes:&V_{n/4}+{n\over 4}M_{n/4}+T_{n/4}\,.\cr}}$$
(The first $n/2$ passes leave $(A,D',C)$, where $D'$ is random by the
argument of part~II.)

\medskip
Summing over all four cases, we obtain
$$L_n=L_{n/2}+V_{n/4}+T_{n/4}+{5\over 16}n\,M_{n/4}\,,$$
which simplifies because of (2D.3), (2D.4), and (2D.5):
$$\eqalignno{L_n&=L_{n/2}+3^{k-2}+2^{k-2}+{5\over 16}2^k(3/2)^{k-2}\cr
\noalign{\smallskip}
&=L_{n/2}+{1\over 4}(3^k+2^k)\,.&(2\hbox{D}.6)\cr}$$
With $L_2=2$, we solve (2D.6) to obtain for $n=2^k\ge 2$,
$$L_n={1\over 8}3^{k+1}+2^{k-1}-{1\over 8}\,.\eqno(2\hbox{D}.7)$$

\medskip\noindent
{\bf V. The Answer.}
From (2D.7) and (2D.2), we have
$$K_n={3\over 2}K_{n/2}+{1\over16}3^{k+1}+2^{k-2}-{1\over16}\,,\qquad
 n\ge2.$$
With $K_2=5/2$, we solve the recurrence to obtain
$$K_n={1\over 8}3^{k+1}+2^k-{1\over2}(3/2)^k+{1\over8}\,,\qquad
 n\ge1.$$
And from (2D.1), we obtain the final answer:
$$B_n={1\over 8}3^{k+1}-{1\over2}(3/2)^k+{1\over 8}\,,\qquad
 k=\lg n.$$

\pssub{Solution to Problem 2(E).}
Let us first derive some connections between the cycle structure of a random
permutation in $\Sigma_{2n}$ and of one in~$\Sigma_n$. Write $n=2^k$. Let
us generate a random $\sigma_T\in\Sigma_{2n}$ in two steps: First generate
a random $\sigma_{T'}$; and later replace each leaf of $T'$ by either
$$\vcenter{\hbox{$\beginpicture(2.5,1.5)(0,0)
\put(1,1.4){\makebox(0,0){$r$}}
\put(1,1){\disk{.2}}
\put(0,0){\line(1,1){1}}
\put(2,0){\line(-1,1){1}}
\put(0,.6){\makebox(0,0){0}}
\put(2,.6){\makebox(0,0){1}}
\endpicture$}}\qquad\hbox{or}\qquad
\vcenter{\hbox{$\beginpicture(2.5,1.5)(0,0)
\put(1,1.4){\makebox(0,0){$r$}}
\put(1,1){\disk{.2}}
\put(0,0){\line(1,1){1}}
\put(2,0){\line(-1,1){1}}
\put(0,.6){\makebox(0,0){1}}
\put(2,.6){\makebox(0,0){0}}
\endpicture$}}$$
with equal probability. Take such a $T'$, and let $(i_1i_2\ldots i_s)$
be a cycle in~$\sigma_{T'}$, where each $i_j$ is a $k$-bit integer. If
we take a random $\sigma_T\in\Sigma_{2n}$ derived from~$T'$, then the
successor of $i_j0$ must be either $i_{j+1}0$ or $i_{j+1}1$, and similarly
for~$i_j1$. It follows that the $2s$ integers $\{i_j0,i_j1\}$ will constitute
either one cycle or two cycles in~$\sigma_T$. In fact, if we form the
cycle sequence $i_10i_2b_2i_3b_3\cdots i_sb_s$ in~$T$, we will get one
cycle if $b_{s+1}=1$, and two cycles if $b_{s+1}=0$; each possibility happens
with probability 1/2. The above happens
independently for each cycle in~$\sigma_{T'}$.

We can conclude two facts: First, all cycles in $\Sigma_n$ have lengths
that are powers of~2; this, of course, follows also from the fact that
$\Sigma_n$ is a group of $2^{n-1}$ elements (see part~D).
Second, if we write $g_n(z)=\sum_{l\ge 0}a_{nl}z^l$
where $a_{nl}$ is the average number of cycles of length~$2^l$ for a random
permutation in~$\Sigma_n$, then $g_{2n}(z)$ can be obtained from $g_n(z)$
by replacing each power $z^l$ with
 ${1\over 2}(z^{l+1}+2z^l)=z^l\bigl(1+{1\over 2}z)$.
This implies $g_{2n}(z)=\bigl(1+{z\over 2}\,\bigr)g_n(z)$.

As $g_1(z)=1$, we have, for $n=2^k$, $g_n(z)=\bigl(1+{1\over 2}z\bigr)^k$.
The average number of cycles for a random permutation in $\Sigma_n$ is thus
equal to $\sum_{l\ge 0}a_{nl}=g_n(1)=(3/2)^k$.

The average order for a random integer for a $\sigma_T\in\Sigma_n$ is
equal to
$${1\over n}\sum_{l\ge 0}a_{nl}2^l\cdot 2^l={1\over n}g_n(4)
=(3/2)^k\,.$$

\pssub{Solution to Problem 3.}

(A) Let $I_n$ be the set of all binary strings of length~$n$ with
exactly two~1's. For any nonnegative integer~$m$, let $J_{n,m}$ be the set
of all strings of the form $j_1j_2\ldots j_m$, where $1\le j_i<n$. For
each $\alpha\in I_n$ and $\rho=j_1j_2\ldots j_m\in J_{n,m}$,
a~{\sc maybesort} array~$A$ initially containing $\alpha$ is transformed
by the sequence of ``comparison--exchanges''
$A[j_1]:A[j_1+1],A[j_2]:A[j_2+1],\ldots,A[j_m]:A[j_m+1]$; let $f(\alpha,\rho)$
denote the resulting binary string in~$A$. Take a random $\rho$ uniformly
from $J_{n,m}$ and let $q(\alpha,m)$ denote the probability that
$f(\alpha,\rho)0^{n-2}1^2$, i.e., the probability that $\alpha$
is not yet sorted after $m$ steps in {\sc maybesort}. Then 
$$t_n(\alpha)=\sum_{m\ge 0}q(\alpha,m)\,.\eqno(3.1)$$

Write the string $\alpha\in I_n$, with the $k$-th and $l$-th bits
being~1, as $[k,l]$, $1\le k<l\le n$. Write $[k,l]\le[k',l']$ if $k\le k'$
and $l\le l'$. A simple induction on~$m$ proves

\smallskip\noindent
{\bf Lemma 3.1.} {\sl For any $\rho\in J_{n,m}$, 
$$f([k,l],\rho)\le f([k',l'],\rho)\qquad\hbox{if}\quad [k,l]\le[k',l']\,.$$
}
\smallskip
It follows from Lemma 3.1 that, for any $\alpha\in I_n$,
$$q(\alpha,m)\le q(\beta_n,m)\,.\eqno(3.2)$$
Combining (3.1) and (3.2), we obtain
$t_n(\alpha)\le t_n(\beta_n)$ as desired

\smallskip
(B) Consider the process of sorting a string $\alpha\in I_n$ with
{\sc maybesort} as the movement of the two~1's from left to right in the
array~$A$; we name the two~1's as distinct objects~$S$ and $L$ so that, when
$S$ and $L$ occupy consecutive positions $j$ and $j+1$, the 
``comparison--exchange'' $A[j]:A[j+1]$ has no effect on the array. Note that
$L$ is always to the right of~$S$.

Let us divide the sorting process into two phases: The first phase
ends when $L$ reaches $A[n]$, and the rest is the second phase. In the
sorting process, an operation ``$A[j]:A[j+1]$'' in the first phase
is called a {\sl hit\/} if it produces a movement of~$L$;  it is
a {\sl hit\/} in the second phase 
if it produces a movement of~$S$.

Write 
$$t_n(\beta_n)=F_n+G_n\,,\eqno(3.3)$$
where $F_n$ is the average number of steps used in the first phase, and
$G_n$ is that in the second phase. In the first phase, exactly $n-2$ hits
will occur. Let $F_n=C_0+C_1+\cdots +C_{n-2}$, where $C_i$ is the average
number of steps used after the $i$-th and up to the $(i+1)$-st hits. Clearly,
$C_i=n-1$. Thus,
$$F_n=(n-1)(n-2)\,.\eqno(3.4)$$

Now, at the end of phase 1, suppose $S$ is in location $z$; then $n-1-z$
hits will occur in phase~2, and the expected number of steps will be exactly
$(n-1-z)\cdot (n-1)$. Let $Z$ denote the random variable corresponding
to~$z$; we have
$$G_n=E(n-1-Z)\cdot (n-1)\,.\eqno(3.5)$$
We will evaluate $E(n-1-Z)$; then (3.3), (3.4), and (3.5) will give the
answer to $t_n(\beta_n)$.

Let us represent the movements of $S$ and $L$ during the first phase on
a two-dimensional lattice. If currently $L$ is in $A[l+2]$ and $S$
in in $A[s+1]$, we say that the configuration is the point $(l,s)$
on the lattice. Initially, the configuration is (0,0).
$$\unitlength=10pt
\beginpicture(13,10)(-2,-2)
\multiput(-1,-1)(.2,.2){37}{\disk{.05}}
\put(0,0){\vector(0,1){7}}
\put(0,0){\vector(1,0){9}}
\multiput(1,0)(1,0)5{\line(0,1){5}}
\multiput(0,1)(0,1)5{\line(1,0){5}}
\thicklines
\put(0,0){\line(1,0)2}
\put(2,0){\line(0,1)2}
\put(2,2){\line(1,0)2}
\put(4,2){\line(0,1)1}
\put(4,3){\line(1,0)1}
\put(-2.5,5){\makebox(0,0){$(0,n-2)$}}
\put(8.4,4.9){\makebox(0,0){$(n-2,n-2)$}}
\put(8.4,3){\makebox(0,0){$(n-2,Z-1)$}}
\put(.8,-.9){\makebox(0,0){$(0,0)$}}
\put( 7,-.9){\makebox(0,0){$(n-2,0)$}}
\put(11.5,0){\makebox(0,0){$l$}}
\put(0,7.5){\makebox(0,0){$s$}}
\put(7.1,7.1){\makebox(0,0){$s=l$}}
\endpicture$$
The sequence of movements of $S$ and $L$ in phase 1 traces a path on the
lattice from (0,0) to a point having coordinates
$(l,s)=(n-2,Z-1)$.

Let $m=n-2$.
We can describe the random path as generated by the following process:
Start at (0,0); at each step, say in configuration $(l,s)$, move to
$(l+1,s)$ or $(l,s+1)$ with equal probability except when $s=l$, in which
case move to $(l+1,s)$; the path ends when $l=m$. We are interested
in the random variable $n-1-Z=m-(Z-1)$, the difference between the
two coordinates when the path ends.

It is simpler to consider an alternative
process by removing the exception rule at $s=l$. Generate a path
starting at (0,0); at each step move rightward or upward with equal
probability, until one of $l$ or $s$ is equal to $m$.
Let $(X,Y)$ denote the
final coordinates $(l,s)$. Then $|X-Y|$ has the same distribution as
$m-(Z-1)$ in the more complicated process.

But the probability of reaching $(X,Y)=(m,k)$ is $2^{-m-k}$ times the number
of paths from $(0,0)$ to $(m-1,k)$, and there are $m-1+k\choose m-1$ such
paths. The probability of reaching $(X,Y)=(k,m)$ is the same as the
probability of
reaching $(m,k)$, for $0\le k<m$. Hence the probability that $\vert X-Y\vert
=d$ is
$${2\over 2^{m+(m-d)}}{m-1+m-d\choose m-1}={2^d\over 2^{2m-1}}
{2m-1-d\choose m-1}\,,\eqno(3.6)$$
for $1\le d\le m$.

The expected value is
$$\medmuskip=1mu
\eqalignno{{1\over2^{2m-1}}\sum_{d=1}^m d\cdot2^d{2m-1-d\choose m-1}
&={1\over2^{2m-1}}\sum_{k=0}^{m-1}(m-k)\cdot2^{m-k}{m-1+k\choose m-1}\cr
&={1\over2^{2m-1}}\biggl(\,\sum_{k=0}^{m-1}2m\cdot2^{m-k}{m-1+k\choose m-1}\cr
&\hskip5em{}
 -\sum_{k=0}^m m\cdot2^{m-k}{m+k\choose m}+m{2m\choose m}\biggr)\cr
&={1\over2^{2m-1}}\biggl(2m\cdot2^{2m-1}-m\cdot2^m\cdot2^m+m{2m\choose m}
 \biggr)\cr
&={2m\over2^{2m}}{2m\choose m}\,;&(3.7)\cr}$$
here we have used the identity
$$\sum_{k=0}^m {m+k\choose k}2^{-k}=2^m,\eqno(3.8)$$
which holds because the sum of probabilities (3.6) must be~1.

Therefore, from (3.3), (3.4), and (3.7),
$$t_n(\beta_n)=(n-1)(n-2)\biggl(1+{1\over2^{2n-5}}{2n-4\choose n-2}\biggr).
\eqno(3.9)$$
Asymptotically, this gives
$$t_n(\beta_n)=n^2+{2\over\sqrt{\pi}}n^{3/2}+O(n)\,.\eqno(3.10)$$

\smallskip\noindent
{\sl Comments.} 
If $n=10^6$ and one time unit for {\sc maybesort} is one picosecond
($10^{-12}$ sec), it will take about a second to do the sort. This is about
the same as for one of today's computers if we assume that the sort
will take about $n$ microseconds.

If {\sc maybesort} is applied to an arbitrary bit string, it takes an average
of $O(n)$ steps to reduce the number of inversions by 1. Hence $t_n(\alpha)
=O(n^3)$ for all $\alpha$.

\vfill\eject
\psmajor{Appendix K: Final Exam IV and Solutions}
\psminor{Final Exam IV\noexpand\ifinhead\noexpand\else
   \ \ \rm(A. C. Yao, 1984)\noexpand\fi}

\noindent {\bf Problem 1.}\enspace
Let $f(x)=\sum_{p\leq x}\,{1\over p\ln p}$, where the sum is over the
primes~$p$.

\smallskip\noindent 
(A) [5 points]. Prove that the limit $N=\lim_{x\rightarrow\infty} f(x)$
exists and is finite. Use elementary arguments (without Stieltjes
integrals) if you can.

\smallskip\noindent
(B) [10 points]. Determine the asymptotic value of $f(x)-N$ up to an
additive $O\bigl((\log x)^{-10}\bigr)$ error term.

\medskip\noindent
{\bf Problem 2.}\enspace[50 points total]

\noindent(A) [15 points]. Let $(2-e^z)^{1/2}=\sum_{n\geq 0}a_nz^n$.
Determine the asymptotic value of~$a_n$. Give your answer in the form of
$g(n)\bigl(1+O\bigl(h(n)\bigr)\bigr)$, where $g$ and $h$ are familiar
functions.

\noindent(B) [15 points]. Let $(1+e^z)^{-1}=\sum_{n\geq
0}a_nz^n$. Determine the asymptotic value of $a_n$ to within a
multiplicative factor of $1+O(100^{-n})$ or better.

\noindent(C) [20 points]. Let
$\lambda=\exp\bigl(-\pi/\tan(\pi/\sqrt{7}\,)\bigr)$, and $f(n)=
\sum_{k\geq0_{\mathstrut}}(-\lambda)^kk^n$.
Determine the asymptotic value of $f(n)$ to within a
multiplicative factor of $1+o(1)$.

{\sl Comment:\/} You may find useful the fact that there exists a constant
$K>0$ such that $\vert p/q-\sqrt{7}\,\vert >K/q^2$ for all integers $p$
and~$q$. (This is a consequence of Liouville's Theorem; see, e.g., Hardy
and Wright, {\sl The Theory of Numbers}, Section 11.7.)

\medskip\noindent
{\bf Problem 3.}\enspace
(The Modest Cookie Monster) [35 points]. 
Consider a monster whose appetite for cookies decreases as its size
increases. When we throw a cookie to such a monster with $k$~cookies in its
belly, with probability $1-pk^2$ the monster will grow to size $k+1$.

Let $n>0$ be an integer and $p=1/n^2$. Initially the monster has an empty
stomach. Let $X_{n,m}$ be the random variable corresponding to the size of
the monster after $m$~cookies have been thrown. Let $\alpha>0$ be any fixed
real number. {\sl Question:\/} Determine the asymptotic mean and variance of
$X_{n,\lceil \alpha n\rceil}$ to
within a multiplicative factor of $1+o(1)$, as $n\rightarrow\infty$.

\smallskip\noindent
{\sl Comment:\/} This question arises in the analysis of a modified version
of hashing with separate chaining, in which each key has two random hashing
addresses. A~key will be added to the list headed by its first hashing
address, only if both addresses are nonempty.

\vfill\eject
\psminor{Solutions to Final Exam IV}
\par\vskip-.3\vu\rm
\pssub{Solution to Problem 1.}

\noindent(A) There are $O(2^n\!/n)$ primes between $2^n$ and $2^{n+1}$, and
each term of the sum in this range of~$p$ is $O(2^{-n}\!/n)$. Hence the
total is $\sum_{n\ge1}O(1/n^2)$.

\smallskip
\noindent(B) The difference is
$$\eqalign{\int_0^\infty {d\pi(t)\over t\ln t}
&=\int_x^\infty{1\over(\ln t)^{m+1}}\,
  d\int_{1.5}^t{(\ln u)^m\,d\pi(u)\over u}
  =\int_x^\infty{dC_m(t)\over(\ln t)^{m+1}}\cr
&={C_m(t)\over(\ln t)^{m+1}}\bigg\vert_x^\infty
    +(m+1)\int_x^\infty{C_m(t)\,dt\over t(\ln t)^{m+2}}\cr
&=-{1\over m\ln x}+O\biggl({1\over(\log x)^{m+1}}\biggr)
    +{m+1\over m}\int_x^\infty{dt\over t(\ln t)^2}\cr
&={1\over\ln x}+O\biggl({1\over(\log x)^{m+1}}\biggr)
    \qquad\hbox{for all $m>0$.}\cr}$$

\medskip
\pssub{Solution to Problem 2.}
\noindent
(A) Let $w=z/\ln 2$. We have $2-e^z=2-e^{w\ln 2}=(\ln4)(1-w)f(w)$, where
$$f(w)=1-(1-w){\ln2\over2!}+(1-w)^2{(\ln 2)^2\over3!}-\cdots$$
is analytic and nonzero for $\vert w\vert<\vert1+2\pi i/\ln2\vert$.
Thus the function
$\sqrt{@2{-}e^z}\mkern-1mu=\sqrt{@\ln4}\sqrt{@1-w}f(w)^{1/2}$ has coefficients
rather like those of $\sqrt{@\ln4}\sqrt{@1-w}$, and
$a_n=\sqrt{@\ln4}(-1/\ln2)^n{1/2\choose n}\bigl(1+O(n^{-1})\bigr)
=-C(\ln2)^{-n}n^{-3/2}\*\bigl(1+O(n^{-1})\bigr)$, $C=\sqrt{@\ln2/2\pi}$.

\smallskip\noindent
(B) These coefficients are known in closed form:
$$\sum_{n\ge0}a_nz^n={1\over e^z+1}={1\over e^z-1}-{2\over e^{2z}-1}
 =\sum_{n\ge1}{(1-2^n)B_n z^{n-1}\over n!},$$
so $a_n=(1-2^{n+1})B_{n+1}/(n+1)!$. We have $a_n=0$ for even $n>0$, and
$a_{2n-1}=(-1)^{n-1}\pi^{-2n}2^{1-2n}H_\infty^{(2n)}
         =(-1)^{n-1}\pi^{-2n}2^{1-2n}(1+2^{-2n}+\cdots+99^{-2n})
  \bigl(1+O(100^{-(2n-1)})\bigr)$.

\smallskip\noindent
(C) The strange behavior of this function is evident from the values
$f(10)=9.474$, $f(11)=-3.899$, $f(12)=-119.3$, $f(13)=-289.5$, $f(14)=997.1$.
But its exponential generating function is quite simple:
$$\sum_{n\ge0}f(n){z^n\over n!}=\sum_{k,n\ge0}(-\lambda)^kk^n{z^n\over n!}
=\sum_{k\ge0}(-\lambda e^z)^k={1\over1+\lambda e^z}.$$
There are poles at $z=x+iy$ where $y$ is an odd multiple of $\pi$ and
$x=-\ln\lambda$. We have
$${1\over1+\lambda e^z}={1\over x+iy-z}+{1\over2}+{B_2\over2!}(x+iy-z)
 +{B_4\over4!}(x+iy-z)^3+\cdots$$
near $z=x+iy$, hence the asymptotic value of $f(n)$ is
$$n!\biggl({1\over(x+i\pi)^{n+1}}+{1\over(x-i\pi)^{n+1}}
              +O\biggl({1\over\vert x+3i\pi\vert^{n+1}}\biggr)\biggr).$$
For the stated $\lambda$ we have $x+i\pi=e^{i\theta}\pi/\sin\theta$,
where $\theta=\pi/\sqrt7$. Hence
$$f(n)\sim{2\,n!\,(\sin\theta)^{n+1}\cos(n+1)\theta\over\pi^{n+1}},$$
provided that the cosine factor is large enough to keep this term from
being swamped by the $O$ term.

To complete the proof, we will show that $\vert\cos n\theta\vert
=\Omega(1/n)$. Suppose $c$ is a constant such that $\vert\cos n\theta\vert<c/n$
for infinitely many~$n$. Then there are
infinitely many pairs of integers $(m,n)$ with
$\vert n\pi/\sqrt7-(m+{1\over2})\pi\vert<2c/n$. But this yields
infinitely many rational $p/q=2n/(2m+1)$ with
$$\biggl\vert\,{p\over q}-\sqrt7\,\biggr\vert=
{2\sqrt7\over(2m+1)\pi}\biggl\vert{n\pi\over\sqrt7}-{(2m+1)\pi\over2}
\biggr\vert<{28c\over\pi n^2},$$
contradicting Liouville's theorem if $c\le\pi K/28$.

\pssub{Solution to Problem 3.}
\noindent
Eigenoperator methods do not seem to apply to this problem, so we must
work harder. First, let's proceed heuristically: Suppose there is a function
$M$ such that approximately $M(\beta)n$ cookies are required to make the
monster's size $\beta n$. Then $1/(1-\beta^2)$ further cookies are needed
to make it reach size $\beta n+1$. So $M'(\beta)=1/(1-\beta^2)$, and we
suspect that the function
$$M(\beta)=\int_0^\beta{dx\over1-x^2}={1\over2}\ln{1+\beta\over1-\beta}
=\hbox{arctanh}\,\beta$$
has the stated property. For example, let $\beta={1\over2}$; about ${\ln3\over
2}n$ cookies should make the monster half full.

Conversely, after $\alpha n$ cookies have been thrown, this argument suggests
that the monster's size should be about $M^{-1}(\alpha)n$, where
$$M^{-1}(\alpha)=\tanh\alpha={e^\alpha-e^{-\alpha}\over
                              e^\alpha+e^{-\alpha}}.$$

There probably is a way to make this heuristic argument rigorous, and more
generally to convert any ``coupon collecting'' result about the approximate
distribution of the time needed to collect a given number of coupons
into a dual result about the approximate distribution of the number of coupons
collected up to a given time. The following derivation, however, takes a
different tack: First we find formulas for the exact distribution of size
at a given time, then we apply asymptotic methods to these formulas in
order to compute the mean and variance. This approach involves lengthy but
interesting formula manipulation that provides more information than
requested in the problem.

Let $g_m(z)$ be the probability generating function for $X_{n,m}$, so that
$p_{m,l}=[z^l]\,g_m(z)$ is the probability that the monster's size is~$l$ after
$m$~cookies. Thus $g_1(z)=z$, $g_2(z)=pz+(1-p)z^2$, $g_3(z)=p^2z+
5p{(1-p)}z+{(1-p)(1-4p)}z^3$. In general we have
$$g_m(z)=\sum_{l=0}^m a_{m,l} p^{m-l}\prod_{k=1}^{l-1}(1-k^2p)\,z^l\eqno(1)$$
where the integers $a_{m+1,l}=l^2 a_{m,l}+a_{m,l-1}$ form a triangular array
that begins as follows:
$$\vbox{\halign{&\hbox to 1.5em{\hss#\hss}\cr
&&&&&1\cr
&&&&1&&1\cr
&&&1&&5&&1\cr
&&1&&21&&14&&1\cr
&1&&85&&147&&30&&1\cr
1&&341&&1408&&627&&55&&1\cr
}}$$
The law of formation shows that
$$a_{m,l}=[z^{m-l}]\,{1\over(1-z)(1-4z)\ldots(1-l^2z)}\,.\eqno(2)$$
We can simplify formula (1) for the probability $p_{m,l}=[z^l]\,g_m(z)$,
because $1-k^2p=(1-k/n)(1+k/n)$; this yields
$$p_{m,l}={a_{m,l}\,(n+l-1)!\over n^{2m-1}\,(n-l)!}\,.\eqno(3)$$
If we knew the approximate size of $a_{m,l}$ we would therefore know
the approximate probability $p_{m,l}$. The partial fraction expansion
$${1\over(1-z)(1-2^2z)\ldots(1-l^2z)}={2\over(2l)!}\sum_{k=0}^l
  {2l\choose l+k}{k^{2l}(-1)^{l-k}\over 1-k^2z}$$
gives us the asymptotic formula
$$a_{m,l}={2\over(2l)!}\,l^{2m}+O\bigl((l-1)^{2m}\bigr)$$
for fixed $l$ as $m\to\infty$. But this is not good enough, because we need to
know $a_{m,l}$ when $l$ grows with~$m$; simply expanding the generating
function around its dominant singularity does not in this case provide the
necessary information. The saddle-point method now comes to our rescue:

\smallskip\noindent
{\bf Lemma.} {\sl Given a real number $0<\rho<1$, there are functions
$a(\rho)$
and $c(\rho)$ such that
$$a_{m,l}=c(\rho)\,a(\rho)^{2l}\,l^{2m-2l-1/2}\,\bigl(1+O(1/m)\bigr)\eqno(4)$$
when $l$ and $m$ approach $\infty$ with ratio $m/l=\rho$.}

\noindent{\sl Proof.} Writing $n$ for $l$, we want to estimate
$$a_{\rho n,n}=[z^{(\rho-1)n}]\,{1\over\prod_{k=1}^n(1-k^2z)}
 ={1\over2\pi i}\oint e^{nf(z)}{dz\over z}\,,\eqno(5)$$
where
$$f(z)=(\rho-1)\ln{1\over z}+{1\over n}\sum_{k=1}^n{1\over1-k^2z}\eqno(6)$$
and the path of integration encloses
 the origin inside $\vert z\vert<1/n^2$. The
substitutions $z=w^2\!/n^2$ and $f(z)=2g(w)$ change this to
$$a_{\rho n,n}={1\over\pi}\int_{-\pi/2}^{\pi/2}e^{2ng(w)}\,dt\,,
 \qquad w=re^{it};\eqno(7)$$
thus we choose a radius $r<1$ and integrate around a semicircle. Here
$$\eqalignno{g(w)
&=(\rho-1)\ln{n\over w}+{1\over n}\sum_{k=1}^n\sum_{j=1}^{\infty}
   {k^{2j}\over n^{2j}}{w^{2j}\over2j}\cr
&=(\rho-1)\ln{n\over w}+\sum_{j=1}^\infty{1\over2j+1}{w^{2j}\over 2j}
 \sum_{l=0}^\infty{2j+1\choose l}B_l\left(-1\over n\right)^{\!l}\cr
&=(\rho-1)\ln n-(\rho-1)\ln w+h(w)+{1\over4n}\ln{1\over1-w^2}+O(n^{-2}),
 &(8)\cr}$$
where
$$h(w)=\sum_{j=1}^\infty{w^{2j}\over(2j+1)(2j)}
 =1+{(1-w)\ln(1-w)-(1+w)\ln(1+w)\over2w}\,.\eqno(9)$$
As in most applications of the saddle point method, it is convenient to use
the identity
$$g(re^s)=g(r)+{s\over1!}\vartheta g(r)+{s^2\over2!}\vartheta^2\!g(r)
             +{s^3\over3!}\vartheta^3\!g(r)+\cdots\,,\eqno(10)$$
where $\vartheta$ is the operator $w{d\over dw}$; cf.~[Knuth II,
third edition, exercise 3.3.2--30]. For if we choose the radius of
integration $r$ near the saddle point, so that $\vartheta g(r)=O(n^{-1})$,
and if we apply this identity with $s=it$ and $t=u/\sqrt n$, our integral (7)
takes the form
$$\eqalignno{a_{\rho n,n}&={1\over\pi\sqrt n}\int_{-\pi\sqrt n/2}
 ^{\pi\sqrt n/2}\exp\bigl(2ng(r)+iuO(n^{-1/2})-u^2\vartheta^2\!g(r)\cr
&\hskip10em{}-iu^3O(n^{-1/2})+u^4O(n^{-1})+\cdots\,\bigr)\,du\,.&(11)\cr}$$
Now
$$\eqalign{
\vartheta g(w)&=1-\rho+\sum_{j\ge1}{1\over2j+1}w^{2j}+O(n^{-1}),\cr
\vartheta^2\!g(w)&=\phantom{1-\rho+{}}\sum_{j\ge1}{2j\over2j+1}w^{2j}
  +O(n^{-1}),\cr
\vartheta^3\!g(w)&=\phantom{1-\rho+{}}\sum_{j\ge1}{4j^2\over2j+1}w^{2j}
  +O(n^{-1}),\cr}$$
etc. To make $\vartheta g(r)$ small we choose $r$ so that $\sum_{j\ge1}
r^{2j}\!/(2j+1)=\rho-1$:
$$\rho r=r+{r^3\over3}+{r^5\over5}+\cdots\,=\hbox{arctanh}\,r=M(r)\,.
  \eqno(12)$$
Notice that this choice of $r$ also makes
$$\eqalign{\vartheta^2\!g(r)&=\vartheta g(r)+\vartheta^2\!g(r)+O(n^{-1})\cr
&=1-\rho+\sum_{j\ge1}r^{2j}+O(n^{-1})=1/(1-r^2)-\rho+O(n^{-1}).\cr}$$
Plugging this into (11) and using $\int_{-\infty}^\infty e^{-au^2}\,du
=\sqrt{\pi/a}$ yields
$$\eqalign{a_{\rho n,n}
&={e^{2ng(r)}\over\pi\sqrt n}\int_{-\infty}^\infty
  e^{-\vartheta^2\!g(r)u^2}\,du\,\bigl(1+O(n^{-1})\bigr)\cr
&={n^{2(\rho-1)n}\over r^{2(\rho-1)n}}{e^{2nh(r)}\over
  \sqrt{\pi n(1-(1-r^2)\rho)}}\bigl(1+O(n^{-1})\bigr)\,.\cr}$$
This completes the proof of the lemma, establishing (4) with $l=n$,
$a(\rho)=e^{h(r)}\!/r^{\rho-1}$, and
$c(\rho)=1/\sqrt{\pi(1-(1-r^2)\rho)}$.

For example, the true value of $a_{20,10}$ is 5,992,171,630,749,371,157,181;
the approximation in the lemma, with $2r=M(r)$ for $r\approx.9575$, gives
$6.05\times10^{21}$. The true value of $a_{100,70}$ is the 124-digit number
$6{,}018{,}\ldots,\!616$; the lemma estimates $6.04\times10^{123}$.
Carrying further accuracy in the proof of (4) would yield a complete
asymptotic series, with the $O(n^{-1})$ terms replaced by
$b_1(\rho)/n+b_2(\rho)/n^2+\cdots\,$.

Returning to probabilities, our formula (3) can now be converted to
an asymptotic estimate, using Stirling's formula to write $(2l)!\sim
(2l/e)^{2l}\sqrt{4\pi l}$:
$$p_{m,l}={n\over n+l}\left(l\over n\right)^{\!2m}{n+l\choose2l}\,
  C(\rho)\,A(\rho)^{2l}\,\biggl(1+O\Bigl({1\over n}\Bigr)\biggr),\eqno(13)$$
where $\rho=m/l$, assuming that $l$, $m$, and $n$
approach infinity at approximately proportional rates. Here
$$\eqalignno{
A(\rho)&=2e^{-1}a(\rho)=2e^{h(r)-1}/r^{\rho-1},&(14)\cr
C(\rho)&=2\sqrt\pi\, c(\rho)=2/\sqrt{1-(1-r^2)\rho},&(15)\cr}$$
and $r$ is chosen so that
$$\rho r=\hbox{arctanh}\,r={1\over2}\ln{1+r\over1-r}\,.\eqno(16)$$
Formulas (9) and (16) imply that
$$e^{h(r)-1}=\left(1-r\over1+r\right)^{\!(1+r)/2r}{1\over1-r}
={e^{-\rho(r+1)}\over1-r}={e^{-\rho}\over\sqrt{1-r^2}}\,,$$
so we can considerably simplify the formula for $A(\rho)$:
$$A(\rho)=2e^{-\rho}/\sqrt{1-r^2}\, r^{\rho-1}\,.\eqno(17)$$

The heuristic discussion we began with suggests that the probability will be
negligible except when $l$ and $m$ are approximately $\beta n$ and
$M(\beta)n$ for some~$\beta$. For example, let $n=100$ and $\beta={1\over2}$;
then $M(\beta)=\ln\sqrt3\approx.5493$, so if $m=55$ we expect the value
of~$l$ to be near~50. Indeed, $p_{55,40}$ is very small, approximately
$8\times10^{-7}$, when $n=100$, and the only probabilities $\ge.01$ are
$$\vbox{\halign{$\hfil#$&&$\ \;\hfil#\hfil$\cr
             l\ =& 46& 47& 48& 49& 50& 51& 52& 53& 54\cr
p_{55,l}\ \approx&.02&.05&.10&.17&.21&.21&.14&.07&.02\cr}}$$

Let therefore $m=\alpha n$ and $\beta=\tanh\alpha$, so that $\alpha=M(\beta)$.
We will express $p_{m,l}$ with respect to $\beta n$ as a central point of
reference: Let
$$P(\delta)={p_{m,l}\over p_{m,\beta n}},\qquad l=(\beta+\delta)\,n.
\eqno(18)$$
Our goal will be to show that this ratio $P(\delta)$ is ``bell-shaped'';
more precisely, we will prove that
$$P(\delta)=\exp(f_1\delta+f_2\delta^2+f_3\delta^3+\cdots\,)\eqno(19)$$
where each $f_k$ is $O(n)$ and where $f_1$ is $O(1)$. The value of $f_2$
will be $-cn+O(1)$. Once we have established this, the mean and variance
desired in Problem~3 can be found. For we have
$$\eqalign{1
=\sum_{l=0}^m p_{m,l}
&=p_{m,\beta n}\sum_{l=-\infty}^\infty P\Bigl({l\over n}-\beta\Bigr)\cr
&\sim p_{m,\beta n}\int_{-\infty}^\infty P\Bigl({t\over n}\Bigr)\,dt\cr
&=\sqrt n\,p_{m,\beta n}\int_{-\infty}^\infty
            P\Bigl({u\over\sqrt n}\Bigr)\,du\cr
&=\sqrt n\,p_{m,\beta n}\int_{-\infty}^\infty\exp\bigl(-cu^2+O(n^{-1/2})\bigr)
   \,du\cr
&=\sqrt{\pi n\over c}p_{m,\beta n}\bigl(1+O(n^{-1/2})\bigr);\cr}$$
hence $p_{m,\beta n}\sim\sqrt{c/\pi n}$. Next,
$$\eqalign{E(X_{m,\alpha n})-\beta n
&=\sum_{l=0}^m(l-\beta n)p_{m,l}\cr
&=p_{m,\beta n}\sum_{l=-\infty}^\infty(l-\beta n)P\Bigl({l\over n}-\beta
        \Bigr)\cr
&\sim\sqrt{c\over\pi n}\int_{-\infty}^\infty t\,P\Bigl({t\over n}\Bigr)\,dt\cr
&=\sqrt{cn\over\pi}\int_{-\infty}^\infty u\,P\Bigl({u\over\sqrt n}\Bigr)\,du\cr
&=\sqrt{cn\over\pi}\int_{-\infty}^\infty u\exp\bigl(-cu^2+O(n^{-1/2})\bigr)
     \,du=O(1).\cr}$$
And in a similar fashion,
$$\eqalign{E\bigl((X_{n,\alpha n}-\overline X)^2\bigr)
&=\sum_l\bigl(l-\beta n+O(1)\bigr)^2 p_{m,l}\cr
&\sim\sqrt{c\over\pi n}\int_{-\infty}^\infty\bigl(t+O(1)\bigr)^2
          P\Bigl({t\over n}\Bigr)\,dt\cr
&=n\sqrt{c\over\pi}\int_{-\infty}^\infty\bigl(u+O(n^{-1/2})\bigr)^2
          P\Bigl({u\over\sqrt n}\Bigr)\,du\cr
&={n\over2c}\bigl(1+O(n^{-1/2})\bigr)\,.\cr}$$
To complete the solution, therefore, we want to establish (19).
Formula (13) already shows that (19) holds with each $f_k=O(n)$, so the
remaining problem is to prove that $f_1=O(1)$.

When $m=\alpha n$ and $\beta=\tanh\alpha$ and $l=(\beta+\delta)n$, the
value of $r=\beta+q\delta+O(\delta^2)$ satisfies
$${\alpha\over\beta+\delta}(\beta+q\delta)=M(\beta+q\delta)
=M(\beta)+q\delta M'(\beta)+O(\delta^2)\,;$$
hence
$$q={\alpha\over\alpha-\beta/(1-\beta^2)}=-{\alpha(1-\beta^2)\over
    \beta-\alpha(1-\beta^2)}\,.$$
We can now calculate $\ln(p_{m,l}/p_{m,\beta n})$, ignoring low-order terms:
$$\eqalign{&O(1)+O(\delta^2)\cr
&{\quad}+2m\ln(\beta+\delta)+\ln(n+l)!-\ln(n-l)!-\ln(2l)!
 +2l\ln A\bigl(\alpha/(\beta+\delta)\bigr)\cr
&{\quad}-2m\ln\beta-\ln(n+\beta n)!+\ln(n-\beta n)!+\ln(2\beta n)!
 -2\beta n\ln A\bigl(\alpha/\beta\bigr)\cr
&=O(1)+O(\delta^2)+n\delta\biggl({2\alpha\over\beta}+1+\ln(1+\beta)
     +1+\ln(1-\beta)-2-2\ln2\beta\cr
&\hskip9em{}+2\ln2+{2\beta q\over1-\beta^2}-\ln(1-\beta^2)-{2\alpha q\over
    \beta}+2q+2\ln\beta\biggr),\cr}$$
and everything cancels out beautifully.

To find the coefficient $c$ of $-n\delta^2$, we can extend the accuracy
to $O(\delta^3)$, with
$r=\beta+q\delta+p\delta^2+O(\delta^3)$
when $p=(\alpha-\beta)q/(\beta-\alpha(1-\beta^2))^2$. Or, we can use the
relation $p_{m,\beta n}\sim\sqrt{c/\pi n}$ derived earlier. Both approaches
give the same result,
$$c={1\over(1-\beta^2)(\beta-\alpha(1-\beta^2))};\eqno(20)$$
this provides an excellent check on the calculations. The variance is
$\sim n/2c$.

\bye



