%%% Formated for "World Scientific Publishing Co"
	\magnification=\magstep1

%***************  TO GET SMALLER FONT FAMILIES	*****************
\newskip\ttglue

% ********** EIGHT POINT **************

\def\eightpoint{\def\rm{\fam0\eightrm}  
  \textfont0=\eightrm \scriptfont0=\sixrm \scriptscriptfont0=\fiverm
  \textfont1=\eighti  \scriptfont1=\sixi  \scriptscriptfont1=\fivei
  \textfont2=\eightsy  \scriptfont2=\sixsy  \scriptscriptfont2=\fivesy
  \textfont3=\tenex  \scriptfont3=\tenex  \scriptscriptfont3=\tenex
  \textfont\itfam=\eightit  \def\it{\fam\itfam\eightit}
  \textfont\slfam=\eightsl  \def\sl{\fam\slfam\eightsl}
  \textfont\ttfam=\eighttt  \def\tt{\fam\ttfam\eighttt}
  \textfont\bffam=\eightbf  \scriptfont\bffam=\sixbf
    \scriptscriptfont\bffam=\fivebf  \def\bf{\fam\bffam\eightbf}
  \tt  \ttglue=.5em plus.25em minus.15em
  \normalbaselineskip=9pt
  \setbox\strutbox=\hbox{\vrule height7pt depth2pt width0pt}
  \let\sc=\sixrm  \let\big=\eightbig \normalbaselines\rm}

\font\eightrm=cmr8 \font\sixrm=cmr6 \font\fiverm=cmr5
\font\eighti=cmmi8  \font\sixi=cmmi6   \font\fivei=cmmi5
\font\eightsy=cmsy8  \font\sixsy=cmsy6 \font\fivesy=cmsy5
\font\eightit=cmti8  \font\eightsl=cmsl8  \font\eighttt=cmtt8
\font\eightbf=cmbx8  \font\sixbf=cmbx6 \font\fivebf=cmbx5

\def\eightbig#1{{\hbox{$\textfont0=\ninerm\textfont2=\ninesy
	\left#1\vbox to6.5pt{}\right.\enspace$}}}

%************** NINE POINT *****************
\def\ninepoint{\def\rm{\fam0\ninerm}  
  \textfont0=\ninerm \scriptfont0=\sixrm \scriptscriptfont0=\fiverm
  \textfont1=\ninei  \scriptfont1=\sixi  \scriptscriptfont1=\fivei
  \textfont2=\ninesy  \scriptfont2=\sixsy  \scriptscriptfont2=\fivesy
  \textfont3=\tenex  \scriptfont3=\tenex  \scriptscriptfont3=\tenex
  \textfont\itfam=\nineit  \def\it{\fam\itfam\nineit}
  \textfont\slfam=\ninesl  \def\sl{\fam\slfam\ninesl}
  \textfont\ttfam=\ninett  \def\tt{\fam\ttfam\ninett}
  \textfont\bffam=\ninebf  \scriptfont\bffam=\sixbf
    \scriptscriptfont\bffam=\fivebf  \def\bf{\fam\bffam\ninebf}
  \tt  \ttglue=.5em plus.25em minus.15em
  \normalbaselineskip=11pt
  \setbox\strutbox=\hbox{\vrule height8pt depth3pt width0pt}
  \let\sc=\sevenrm  \let\big=\ninebig \normalbaselines\rm}

\font\ninerm=cmr9 \font\sixrm=cmr6 \font\fiverm=cmr5
\font\ninei=cmmi9  \font\sixi=cmmi6   \font\fivei=cmmi5
\font\ninesy=cmsy9  \font\sixsy=cmsy6 \font\fivesy=cmsy5
\font\nineit=cmti9  \font\ninesl=cmsl9  \font\ninett=cmtt9
\font\ninebf=cmbx9  \font\sixbf=cmbx6 \font\fivebf=cmbx5
\def\ninebig#1{{\hbox{$\textfont0=\tenrm\textfont2=\tensy
	\left#1\vbox to7.25pt{}\right.$}}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
	\overfullrule=0pt 
	\hsize=6.0truein
	\vsize=8.5truein
	\parindent=3truepc
%%%% NO PAGE NUMBERS ON FINAL COPY  !!!!!!!!!!!!!!!!!!!!
	%\nopagenumbers
%%% Added macros (combs)
	% for Remarks, Proofs, Definitions, etc. 
	\def\demo#1.{\medskip \noindent {\it #1.}\enspace}
	\def\section#1{\vskip.6truecm\noindent{\bf#1}\vskip.4truecm}
	\def\qed{~\hfill\eop\medskip}
	\def\eop{\hbox{\vrule width6pt height7pt depth1pt}} 
	\def\cite#1{$^{#1}$}
	\def\myskip{\noalign{\vskip6pt}}	% extra space multi-line eqns.
	\def\frac#1#2{\textstyle{#1\over#2}}
	\def\bigmid{{\ \Big|\ }}
	\def\un#1{{\underline{#1}}}
%	\def\balpha{{\underline{\alpha}}}
   \def\balpha{\mathop{\alpha\kern-.6em{\alpha}}\nolimits}   % poormans' bold	
   \def\bpsi{\mathop{\psi\kern-.73em{\psi}}\nolimits}	     % poormans' bold 
   \def\bvarphi{\mathop{\varphi\kern-.71em{\varphi}}\nolimits}% poormans' bold 
   \def\bomega{\mathop{\omega\kern-.59em{\omega}}\nolimits}
	\def\Ball{\mathop{\rm Ball}\nolimits} 
	\def\diam{\mathop{\rm diam}\nolimits} 
	\def\dist{\mathop{\rm dist}\nolimits} 
	\def\Id{\mathop{\rm Id}\nolimits} 
	\def\Im{\mathop{\rm Im}\nolimits} 
	\def\mod{\mathop{\rm mod}\nolimits} 
	\def\Vol{\mathop{\rm Vol}\nolimits} 
	\def\triv{{|\!|\!|}}		%  ||| 
	\def\varep{\varepsilon}
	\def\nat{{\bf N}} 			% natural 
	\def\real{{\bf R}}			% real  
	\def\zed{{\bf Z}} 			% integer 
	\def\integer{\zed}
\def\complex{\kern.1em{\raise.47ex\hbox{
            $\scriptscriptstyle |$}}\kern-.40em{\rm C}}
	\def\bzero{{\bf 0}}
	\def\bA{{\bf A}} 
	\def\bG{{\bf G}} 
	\def\bY{{\bf Y}} 
	\def\ba{{\bf a}} 
	\def\bb{{\bf b}}
	\def\bk{{\bf k}}
	\def\bp{{\bf p}}
	\def\bq{{\bf q}}
	\def\bx{{\bf x}}
	\def\bz{{\bf z}}
	\def\A{{\cal A}}
	\def\B{{\cal B}} 
	\def\C{{\cal C}} 
	\def\F{{\cal F}}
	\def\G{{\cal G}}
	\def\H{{\cal H}}
	\def\L{{\cal L}}
	\def\R{{\cal R}} 
%%% End of added macros 
%Updated with effect from: 5 Sept 1991
%\headline={\ifnum\pageno=1\firstheadline\else
%\ifodd\pageno\rightheadline \else\leftheadline\fi\fi}
%\def\firstheadline{\hfil}
%\def\rightheadline{\hfil}
%\def\leftheadline{\hfil}
%        \footline={\ifnum\pageno=1\firstfootline\else\otherfootline\fi}
%\def\firstfootline{\rm\hss\folio\hss}
%\def\otherfootline{\hfil}
%\font\tenbf=cmbx10
%\font\tenrm=cmr10
%\font\tenit=cmti10
%\font\elevenbf=cmbx10 scaled\magstep 1
%\font\elevenrm=cmr10 scaled\magstep 1
%\font\elevenit=cmti10 scaled\magstep 1
%\font\ninebf=cmbx9
%\font\ninerm=cmr9
%\font\nineit=cmti9
%\font\eightbf=cmbx8
%\font\eightrm=cmr8
%\font\eightit=cmti8
%\font\sevenrm=cmr7
%\TagsOnRight
%%%% NO PAGE NUMBERS ON FINAL COPY  !!!!!!!!!!!!!!!!!!!!
	%\nopagenumbers
%\line{\hfil }
%\vglue 1cm
\topinsert\vskip1truecm\endinsert 
\centerline{{\ninebf INTRODUCTION TO K.A.M. THEORY}*}
\vskip 1.0truecm
\centerline{\ninerm RAFAEL DE LA LLAVE} 
\vskip 1pt
\centerline{\nineit Mathematics Department, The University of Texas at Austin}
\centerline{\nineit Austin, Texas 78712-1082, USA}
\vskip 0.8truecm
%\centerline{\ninerm ABSTRACT}
%\vskip 0.3truecm
%{\narrower\noindent 
% \ninerm			
%%%%% put text here if there is an abstract 
% \noindent
%\vskip 0.8truecm }
\baselineskip=14pt
\section{1. Introduction}
In this lecture, we will discuss the K.A.M. theorem about persistence of 
quasi-periodic motions under small perturbations of the Hamiltonian. 

Rather than presenting a proof of the full theorem, we will give a proof 
of a caricature that goes half-way in the difficulty and give a sketch 
of the modifications necessary to obtain the complete result. Very 
pedagogical proofs of K.A.M.\ theorem exist 
\cite{10,14,15,35}. 
A proof that contains very precise information about 
differentiability properties is \cite{29}. 

In our model problem we
 will study diffeomorphisms of the torus $T^{2d} = \{ (q_i,p_i), 
i=1,\ldots,d\}$ which preserve the symplectic form $\omega = \sum dp_i \wedge 
dq_i$. Preserving a symplectic form is one of the key features of Hamiltonian 
systems, and is what gives the characteristic flavor to most of the 
problems and techniques of Hamiltonian mechanics. 

Take a translation in the torus $E_{\balpha} : T^{2d} \to 
T^{2d}\quad E_{\balpha} (\bx) = \bx  + \balpha (\mod 1)$. 
Clearly, this preserves the symplectic form $\omega$. Now consider a 
perturbation of it, that is, a family of symplectic transformations 
$f_\varep : T^{2d}\to T^{2d}$, such that it is analytic in $\varep$ 
and $f_0 \equiv E_\alpha$. 

The question is: When can this perturbation be eliminated by a change 
of coordinates; in more analytic terms, when can we find another 
analytic family of invertible symplectic transformations such that 
$$\displaylines{ \hfill
f_\varep \circ g_\varep = g_\varep \circ E_{\balpha}  \hfill
(1)}$$
The most effective way (even computationally) of making perturbation 
calculations in mechanics is probably to try to absorb as much of the 
perturbation as possible in a change of coordinates (see 
\cite{8} for very 
practical computations not necessarily in Hamiltonian mechanics). 

The advantage of such a method with respect to other more straightforward 
methods --- like trying to make perturbation around each orbit --- is that 
we can obtain global information about the whole phase portrait; we also 
obtain control --- uniform in all initial conditions --- of the range of 
validity of the expansions. This takes care of secular terms which are  
difficult to interpret, and a computational hazard (e.g., if we try to 
match powers in the problem $\ddot x = - (\omega +\varep)^2 x$; $x(0) =0$; 
$\dot x (0) = \omega +\varep$ we are led to 
$x = \sin (\omega t) +\omega  \cos (\omega t) \varep t+ \omega^2 
\sin (\omega t) {(\varep t)^2 \over 2!} - \omega^3 \cos (\omega t) 
{(\varep t)^3 \over 3!}$
which is, of course, $\sin (\omega +\varep) t$. However, if we try to sum 
the series for large values of $\varep t$, we are led to unstable computations. 
Notice also that the periodicity of the perturbed solution is not apparent 
in any order of the perturbation theory. Those terms first appeared in the 
computation of rotation of perihelia and they become appreciable --- for 
Mercury --- when $t$ is of the order of centuries, hence the name secular. 
The classical ways of dealing with secular terms were to resume most of 
them in recognizable forms (Lindstet, etc.) \cite{28}). 

\demo Remark. (Relation to commutator groups)\quad
If we rewrite $f_\varep = \hat f_\varep \circ E_{\balpha}$, then Eq.~(1) 
becomes 
$$\displaylines{
\hfill \hat f_\varep E_{\balpha} g_\varep = g_\varep E_{\balpha}
\hfill\cr 
\hfill \hat f_\varep = g_\varep E_{\balpha} g_\varep^{-1} 
E_{\balpha}^{-1}\ .\hfill 
\llap{(2)}\cr}$$ 

This means that we rewrite $\hat f_\varep$ (close to the identity) as a 
commutator. The problem of characterizing the group of diffeomorphisms 
generated by commutators is very important, since it is related to 
geometric properties of the manifold \cite{13}. So, the study of which 
diffeomorphisms are commutators has considerable interest. 

Even if Eq.~(1) and Eq.~(2) are equivalent, from the point of view of 
analysis, Eq.~(1) is nicer because it has ``group structure,'' so that it 
can be studied by hard implicit function theorems. The idea of using this 
strategy for this problem of commutators appeared in \cite{20}.

If our interest were to study the problem of the commutator subgroup, it 
is worth remarking that the K.A.M.\ strategy we are going to discuss fails 
in $C^r$ when $r\ne \omega,\infty$ because we will only be able to 
conclude that $g\in C^{r-r_0}$ ($r_0 >0$), that is, we cannot show that 
$g$ is in the group of $C^r$ diffeomorphisms. However, there is another 
strategy of proof which works even in this case \cite{6} 
(see also \cite{23,24,25}). 
This suggests that K.A.M.\ strategy is not always the finest tool 
--- as should be expected from a technique that works for so many problems. 
It would be quite interesting if the analogy between the two problems 
could be pushed far enough to give a proof of the K.A.M.\ theorem not 
based in K.A.M.\ strategy. 

\demo Remark.
Looking for analogues of K.A.M.\ theorem, it would be more natural, perhaps, 
to study flows rather than diffeomorphisms. The problem for flows is simpler 
than for diffeomorphisms and handling the latter is useful in mechanics 
anyway. 

\section{A. Formal considerations} 
Before stating the theorem that solves the problem, we will pause to 
develop some formalism; this will help to find some hypothesis that this 
theorem should have. 

We will develop several formalisms in the hope of enabling  the reader 
to understand the relationships between several results in the literature 
and to serve as a motivation for the last formulation --- the one I 
personally favor. So, it will be quite possible --- indeed, even sensible 
in a first reading --- to skip quite a lot of what comes next. 

The first difficulty that stands in the way of solving the problem is how 
to write diffeomorphisms in a way which allows effective computation. In 
the case of the torus, we have an obvious way: to use coordinates. However, 
this is a bad solution because there are many manifolds that do not admit 
such global coordinates; besides in coordinates the symplectic character 
appears as a set of non-linear non-local constraints. A technique that 
solves the problem of keeping the symplectic character (but does not help 
with the problem of requiring a global system of coordinates --- it makes 
it worse) is the use of generating functions; this has the side advantage 
that rather than dealing with $2d$ functions of $2d$ variables linked by 
(horrid) constraints, we will only have to deal with one function of $2d$ 
variables. We will consider the variables as ranging in $\real^{2d}$ and 
keep track of periodicity conditions.  

We observe that $E_{\balpha}$ admits a generating function of the type 
$(\bq,\bp')$.  (It does not admit one of the form $(\bq,\bq')$ or $(\bp,
\bp')$ which are the most natural.  
$$\bq' = \bq + \ba = {\partial F_0\over \partial \bp'} \quad ;\quad
\bp = \bp' - \bb = {\partial F_0\over \partial\bq}\ .$$ 
Hence, $F_0 = \bp' \bq + \ba \bp' - \bb \bq$ up to a constant which we will 
fix as zero without loss of generality. Since the construction of a 
generating function out of a diffeomorphism is quite explicit, we can 
conclude that $f_\varep$ will have a family of generating functions 
$F_\varep (\bq,\bp')$ which will be as smooth both in the coordinates and 
in $\varep$ as $f_\varep$ (provided that we choose the constants adequately). 

Likewise, $g_\varep$, if it exists at all, will have a family of 
generating functions $G_\varep (\bq,\bp')$; $G_0 (\bq,\bp') = \bp',\bq$. 
We systematically will write 
$$\eqalign{
G_\varep & = G_0 + \varep G_1 + \varep^2 G_2 +\cdots \cr
G_\varep^{[\le N]} & = \sum_{k=0}^N \varep^k G_k\ .\cr}$$ 

We can write Eq.~(1) as an equation for $G_\varep$ in terms of  $F_\varep$ 
and, then, match powers to obtain a hierarchy of equations of the $G_n$'s
in terms of the $F_n$'s. We will only observe that the first equation of the 
hierarchy is 
$$G_1 (\bx + \balpha) - G_1 (\bx) = F_1 (\bx) 
\eqno(3)_1$$ 
and that the other equations  of the hierarchy are 
$$G_n (\bx + \balpha) - G_n (\bx) = F_n (\bx) + R_n 
\eqno(3)_n$$ 
where $R_n$ is an (awful) expression involving only $G_1,\ldots, G_{n-1}$, 
$F_1,\ldots,F_{n-1}$ and their derivatives. 

>From the structure of Eq.~$(3)_n$, we can proceed inductively,  examining 
recursively the equations, provided that we have a theory for equations 
of the form 
$$G(\bx + \balpha) - G(\bx) = F(\bx)\ .$$ 

We should notice that since $G_\varep$, $F_\varep$ are generating functions 
of a transformation in the torus, they are obtained integrating a periodic 
function and, hence, they are the sum of a periodic function and a linear 
one. We should also remember that they are defined up to constants. These 
restrictions on the class of functions that can appear are a translation 
of the geometric content of the problem. 

If the linear part of $F$ were non-null, then $G$ would have a quadratic term 
and, hence, be out of the class we are interested in. 

The periodic part can be studied by using Fourier components; we are led to 
$$(e^{2\pi i\bk \balpha}-1) G_{\bk} = F_{\bk}$$ 
so that there is a solution provided $F_{\bk} = 0$ whenever $\bk \balpha 
\in \zed$. We will call ``resonant'' those $\bk$'s such that $\bk\balpha 
\in \zed$, $\bk\ne 0$. 

The solution is not unique for the $F_{\bk}$'s with $\bk \cdot \balpha =0$. 
When $\bk =0$ this does not matter, because it is precisely the natural 
ambiguity of the $G$'s --- as generating functions defined up to constants. 
Moreover it requires some computation to become convinced, but is 
nevertheless true, that the ambiguities making different choices for 
constant terms at some stage do not affect the solvability of the 
equations of higher order in the hierarchy and only affect the solution up 
to constants. 

Some $\balpha$, enjoy the remarkable property that there are no 
resonant terms. This can be expressed as the components being ``linearly 
independent over the rationals.'' (For example $\balpha = (1,\sqrt2,
\sqrt5)\balpha \bk \in \zed$, $\bk \in \zed^3 \Rightarrow \bk =0$.) 


Summing up, the necessary conditions for the existence of a formal solution  
are that $F_n + R_n$ do not have either a linear part or resonant terms for any 
$n$. The absence of resonant terms is automatic if $\balpha$ is such that 
there are no resonances. The absence of linear part depends on $\hat f_\varep$ 
(and, even if mysterious in this formalism, it will turn out to be quite 
simple when  interpreted geometrically). 

Discussing convergence, even in the case of rationally independent 
$\balpha$ seems, at first sight, to be quite hopeless because 
$G_{\bk} = (1/(e^{2\pi i\bk\balpha} -1)) F_{\bk}$ and the 
denominators become arbitrarily small, since $\bk\cdot\balpha$ will become 
arbitrarily close to integers as $\bk$ ranges over $\zed^{2d}$. 
Moreover, this is only the linear approximation!.

For the moment, we will postpone all discussions 
about convergence and proceed to develop
other formalisms in perturbation theories.
The next method is based on 
the idea of writing all symplectic diffeomorphisms close to the identity 
as time one maps of vector fields. 

The idea is very analogous to the very successful use of Lie algebras 
in the study of problems of conjugacy and commutators for finite 
dimensional Lie groups. 

We think of symplectic diffeomorphisms as an infinite-dimensional Lie group 
(the group operation is composition) then the elements of the Lie algebras 
(in old fashioned language the ``infinitesimal canonical transformations'') 
are the locally Hamiltonian vector fields. 

Unfortunately, the relation of infinite dimensional Lie groups and 
their Lie algebras is much more complicated than for the finite dimensional 
ones. 

For example, if the manifold is not compact, there could be vector fields 
some of whose trajectories get out of the manifold in a finite time so that 
those vector fields do not define a flow (i.e., there is no exponential 
for all the elements of the Lie algebra). 
This is not a very serious difficulty because for compact manifolds, 
all vector fields have time one map (i.e., we can define the exponential 
of the Lie algebra). 

Much worse is the fact that, even when the exponential is well defined, 
it does not need to cover a whole neighborhood of the origin. That is, 
there are diffeomorphisms as close to the identity as we please which cannot 
be expressed as time one maps. This shows there is no hope of getting a 
logarithm (the inverse of the exponential) which works without problems. 

\demo Example.
We will show that if $f:S^1\to S^1$ without fixed points is the time one map 
of a vector field, then the  derivatives of $f^n$ are bounded independently 
of $n$. 

Since arbitrarily close to the identity there are diffeomorphisms with 
expansive periodic points $f^N (x_0) = x_0$ $|f^{N'} (x_0)| >1$ we have 
$f^{nN} (x_0) = (f^{N'} (x_0))^n$ which is arbitrarily large. 



In effect, if $f$ without fixed points in the time-one map of the vector 
field $X$, $X$ cannot have any zeros. $t_0 = \int_S{d\theta\over |X(\theta)|}$ 
is finite and, the time-$t_0$ map is the identity.

So that 
$$f^n = T_n = T_{t_0[n/t_0] +r(n)} = T_{r(n)}$$ 
but $0\le r(n)\le t_0$ so that $T_{r(n)}$ is uniformly bounded in $C^1$. 

Therefore, for the circle, the exponential of the Lie algebra of the group 
of diffeomorphisms does not cover any $C^\infty$
neighborhood of the identity. 

\demo Remark. 
Something slightly weaker  is however true: all diffeomorphisms of class 
$C^r$, $r\notin \zed$, $r\ne n+1$ ($n$ dimension of the manifold) can be 
written as the product of a finite number of time one maps (this number, 
however depends on the diffeomorphism) of $C^r$ vector fields. 
This is because of the results of Mather 
\cite{23,24,25}  and Epstein \cite{13}, 
who show that the group of diffeomorphisms homotopic to the identity with this 
regularity is simple.
We observe that the set of all  diffeomorphisms that can
 be written as a finite product 
of time one maps of $C^r$ vector fields is a group.

Moreover, it is normal (if $h$ is time-one map of the vector field $X$, 
$g^{-1}hg$ is the time-one map of the vector field obtain changing 
coordinates in $X$) so  by the results of
Epstein and Mather, it should be the whole component of the identity. 
\vskip1pt
But, even if the machinery of finite dimensional Lie groups does not carry 
over,  it is true that many of the calculations are done in the spirit 
suggested by it and it can be a very strong guide to know which is the 
right framework in which to perform analysis (to identity the obstructions 
and decide which properties should the conjugation elements have) 
\cite{34,22}. 
\vskip1pt
But, if we do not want bona-fide convergence the problems are much easier. 
Formal power expansions work pretty much like finite dimensional Lie groups. 

In particular, we have 

\proclaim Proposition. 
Given a family $f_\varep $ of smooth diffeomorphisms $f_0 = \Id$, we can 
find a sequence of vector fields $\L_1,\ldots,\L_n,\ldots$ such that 
$$d\biggl( f_\varep, \exp \biggl[ \sum_{i=1}^n \varep^i \L_i \biggr]
\biggr) = O (\varep^{n+1})\ \forall\ n\ .$$ 

\demo Proof. 
Suffices to show that we can match enough derivatives of $f_\varep$ at 
$\varep =0$. 
A simple computation shows that 
$$\Bigl( {d\over d\varep}\Bigr)^k \exp \biggl[ \sum_{i=1}^n \varep^i L_i 
\biggr]\Big|_{\varep=0} = i! \L_k + \R_k$$ 
where $\R_k$ depends complicatedly on $\L_1,\ldots,\L_{k-1}$ but 
{\it not\/} on any $\L$ of higher index. 

So that recursively we can adjust the $\L_k$'s so that those derivatives 
agree with those of $f_\varep$.
The same result holds true when $f_\varep$ preserves the symplectic form 
and we restrict the $\L_i$'s to be locally Hamiltonian.

Once we have a logarithm, (with some reasonable properties with respect 
to derivatives, etc. which are easy in this case) there are many pieces 
of machinery that can be imported. Notably Campbell-Hausdorff formula. 
(It suffices to walk through the proof of \cite{12} 
reproduced e.g., in \cite{30}.) 
$$\exp \A \exp \B = \exp \C$$ 
where 
$$\C = \A + \B + \sum_{n>0} {(-1)^{n+1} \over n(n+1)} \sum 
{\overbrace{[\A,[,\ldots,\A,}^{r_1} 
[\overbrace{\B,[\ldots \B_1}^{s_1} 
\overbrace{[\A_1[\ldots\A\ldots\ldots}^{r_2} 
[\overbrace{\A\ldots\A}^{r_n},\B]\ldots] 
\over 
(s_1+\cdots + s_{n-1}+1) r_1!\ldots r_{n!} s_1!\ldots s_n!}$$ 
which will hold in the sense of asymptotic expansions when the manifolds 
are compact. 

Taking into account that $E_{\balpha}$ is a canonical transformation, 
Eq.~(2) can be written as 
$$\exp \bigl[ \varep \F_1 + \varep^2 \F_2 +\cdots\bigr] 
= \exp \bigl[ \varep \G_1 +\varep^2 \G_2 +\cdots] 
\exp \bigl[ -\varep E_{\balpha}^* \G_1 - \varep^2 E_\alpha^* 
\G_2\ldots\bigr]\ .$$ 
(``${}^*$'' is the usual push forward) 

Using the Campbell-Hausdorff formula to express the second term, we obtain 
matching powers 
$$\eqalign{
\F_1 & = \G_1 - E_{\balpha}^* \G_1\cr 
\F_2 & = \G_2 - E_{\balpha}^* \G_2 -\frac12 \bigl[ \G_1,E_{\balpha}^* 
\G_2\bigr]\ .\cr}$$ 

Notice that all the nonlinear terms in the hierarchy are going to be 
commutators. 

Again, we obtain a perturbation theory that has all the features desirable 
of a perturbation expansion; that is, it can be solved step  by set (and  
renormalizability). 

Now, we can investigate a little bit more the obstructions for solution. 

\proclaim Lemma 1. 
If $\G$ is a locally Hamiltonian vector field in a torus, then 
$\G - E_{\balpha}^* \G$ is globally Hamiltonian. 

\proclaim Lemma 2. 
If $\G,\H$ are locally Hamiltonian vector fields $[\G,\H]$ is globally 
Hamiltonian with Hamiltonian $\omega (\G,\H)$. 

\demo Proof. 
Lemma 1 can be established quickly (we will see another proof later) noticing 
that $\G$ is globally Hamiltonian in $\real^{2d}$ and the Hamiltonian is 
periodic and linear. $\G- E_{\balpha}^* \G$ will also have a Hamiltonian 
in $\real^{2d}$ and, when  we compute it, the linear parts drop. Hence, 
the Hamiltonian in $\real^{2d}$ is periodic and that is equivalent to being 
globally Hamiltonian. 

Lemma 2 is much more important than Lemma 1; it is true for all manifolds 
and \cite{36} 
showed the converse, that is, that all globally Hamiltonian vector 
fields are commutators of locally Hamiltonian ones. This means that we have 
found the commutator subalgebra of the Lie algebra tangent to the Lie group. 

Here we present a coordinate independent proof similar to 
the one that can 
be found in \cite{2}. 

Given a vector field $X$ we call $i_\omega X$ the one form defined by 
$(i_\omega  X) (Y) = \omega (X,Y) Y$. 

$X$ is a globally Hamiltonian vector field of Hamiltonian $H$ and can be 
written as $i_\omega H= dH$ and $X$ being a locally Hamiltonian vector field 
can be written as $d(i_\omega X) =0$. 

We can express $\omega (Z[X,Y]) = \alpha_z ([X,Y]) = X(\alpha_Z(Y)) 
- Y(\alpha_Z (X))$. 
Since $d\omega =0$, we also have, for all vector fields $X_1,X_2,Y$ 
$$\eqalign{0& = (d\omega (X_1,X_2,Y)\cr 
& = Y\omega (X_1X_2) + X_2 \omega (Y,X_1) +X_1 \omega (Y,X_2) \cr 
&\qquad + \omega \bigl( [Y,X_1],X_2\bigr) 
+ \omega \bigl( [X_2,Y],X_1\bigr) 
+ \omega \bigl( [X_1X_2],Y\bigr)\ .\cr}$$ 

If $X_1\,X_2$ are locally Hamiltonian, we can substitute the previous 
identity and derive 
$$\eqalign{ \omega (X_1,X_2) & = \omega \bigl( [X_1X_2],Y\bigr)\cr 
d\bigl(\omega (X_1,X_2)\bigr) & = \bigl( [X_1X_2],Y\bigr)\cr }$$ 
so that indeed, $[X_1X_2]$ is the Hamiltonian vector field associated to 
the function $\omega (X_1,X_2)$. 

A coordinate dependent proof is also quite feasible. The theorem can be 
reduced to show that in any set of coordinate patches covering the 
manifold, the commutator and the Hamiltonian flow agree. We can take 
coordinate patches in such a way that $\omega = \sum_i dp_1\wedge dq_i$ 
(Darboux theorem) and we can compute both objects and show that they are 
the same, using the identities implied by local Hamiltonian property for 
the derivatives of the components of $\G$ and $\H$. 

A s a corollary of the Lemmas we clearly obtain 

\proclaim Proposition. 
In order that a solution of Eq.~(2) exists, it is necessary that all 
$\F_i$ be globally Hamiltonian. 

In that case, it suffices to look for $\G_i$ only among globally 
Hamiltonian vector fields and the equations of the hierarchy are 
$$\eqalign{ F_1 (\bx) & = G_1 (\bx) - G_1 (\bx + \balpha) \cr 
F_2 (\bx) & =  G_2 (\bx) - G_2 (x + \balpha) + R_2 (\bx)\cr}$$ 
where $F_i$ and $G_i$ are the Hamiltonians of $\F_i$ and $\G_i$ and the 
equalities are up to constants and $R_n$, as before, can be computed 
from the previous terms. 

So far, we have only been working in the formal power series context and 
the obstructions for solvability can be quite obscure if left in this form. 
So, we will have to work a bit more of formalism. This formalism will 
also become quite useful to establish convergence, because a direct 
proof estimating the coefficients of $\varep$ in the expansion is quite 
impractical and, in the second case, even false, due to the poor 
properties of log in infinite dimensional Lie algebras. 

The idea is to interpolate all the $f_\varep$ by a vector field 
$${d\over d\varep} f_\varep = \F_\varep \circ f_\varep$$ 
and similarly for $g_\varep$ and use the vector fields to express the 
conjugacy equation (1). As is well known, the $\F_\varep$ will be locally 
Hamiltonian vector fields. 

If we take derivatives with respect to $\varep$ in Eq.~(1), we obtain 
$$\F_\varep (f_\varep \circ g_\varep) + f_\varep^* \ _\varep(f_\varep 
g_\varep) = \G_\varep (g_\varep E_\varep)\ .
\eqno(4)$$ 
And using Eq.~(1) again 
$$\F_\varep = \G_\varep - f_\varep^* \G_\varep\ . 
\eqno(5)$$ 
This implies that $F_\varep$ should not only be locally Hamiltonian, but 
also globally Hamiltonian. In some particular cases, we had already 
established that in Lemma~1. A more geometric proof is as follows. 
Globally Hamiltonian  means $[i_\omega (\F_\varep)] =0$ (that is, the 
result of identifying the vector field with a one-form produces an exact 
differential -cohomology zero-). On the other hand, we just observe that 
$f_\varep$ preserves the symplectic form, so that we can write  
$$[i_\omega f_\varep^* \G_\varep ] 
= [f_\varep^* i_\omega \G_\varep] 
= f_{\varep\#} [i_\omega \G_\varep]\ .$$ 

Moreover, since $f_\varep$ is connected to the identity, its action on 
cohomologies is the identity

Hence,  
$$[i_\omega \F_\varep] = [i_\omega \G_\varep] 
- f_{\varep\#} [i_\omega \G_\varep] = 0\ .$$ 

The previous calculation make it reasonable to 
introduce the following definition. 

\demo Definition. 
We can say that $f_\varep$ is a globally Hamiltonian isotopy (G.H.I.) when 
$${d\over d\varep} f_\varep = \F_\varep \circ f_\varep$$ 
and $\F_\varep$ is a globally Hamiltonian vector field. That is, we obtain 
$f_\varep$ as a time one map of a {\it globally\/} Hamiltonian vector field 
which also depends on time. 

Clearly, a globally  Hamiltonian isotopy is determined by the initial point 
and $F_\varep$ the Hamiltonian function of $\F_\varep$. Conversely, a 
G.H.I.\ determines uniquely $\F_\varep$ and this determines $F_\varep$ 
up to a constant, which we will fix by requiring $\int F_\varep =0$. We 
will call $F_\varep$ the Hamiltonian of $f_\varep$. 

As we will discuss at the end, the G.H.I.'s enjoy quite remarkable 
properties. For the moment, we will only work out some formal properties. 

Notice that G.H.I.'s allow to express transformations by a simple function; 
in that respect, they are similar to generating functions, but, however, 
they are natural geometrical objects and, so, they can be defined in all 
manifolds; besides, the equations we will have to solve are simpler. 

\proclaim Lemma. 
Let $f_\varep$, $g_\varep$ be G.H.I.\ of Hamiltonians $F_\varep$, $G_\varep$, 
then $f_\varep\circ g_\varep$ has Hamiltonian $F_\varep + G_\varep 
(f_\varep^{-1})$. 

The proof is very simple proceeding along the same lines as in the 
calculations  of Eq.~(5). We just have to observe that, since $f_\varep$ 
is a symplectic transformation, we can just transform the Hamiltonian. 

Notice that, in particular, this Lemma shows that all the 
diffeomorphisms connected to the identity by a G.H.I.\ form a group. 

If we decide to search for the $g_\varep$ among G.H.I., Eq.~(5), --- which 
is a necessary condition for Eq.~(1) --- becomes 
$$F_\varep = G_\varep - G_\varep (f_\varep^{-1})\ . 
\eqno(6)$$ 

However, it turns out that, in spite of the fact that we used Eq.~(1) twice  
to derive Eq.~(5), and, therefore could fail to the sufficient on Eq.~(6) 
is equivalent to Eq.~(1). 

If effect, calling 
$$\Delta_\varep = f_\varep \circ g_\varep \circ f_0^{-1} \circ g_\varep$$ 
we can show that provided Eq.~(6) holds 
$${d\over d\varep} \Delta_\varep = 0\ \hbox{ when }\ \Delta_\varep = \Id\ .$$ 
Since clearly $\Delta_0 = \Id$ and everything is so smooth that uniqueness 
of O.D.E.\ holds, we have $\Delta_\varep = \Id$. 
\vfill\eject 

\section{B. Convergence} 
We now tackle the problem of actually constructing the solutions. More 
precisely, we want to prove the following. 

\proclaim Theorem. 
Let $f_\varep$ be an analytic globally Hamiltonian isotopy  
$f_0 = E_{\balpha}$, where $\balpha$ satisfies 
$$\displaylines{\hfill 
|(\balpha ,\bk) - n|^{-1} \le \exp 
{A|\bk| \over \log |\bk| \cdot \log\log |\bk| \times \cdots \times 
\underbrace{\log\ldots \log}_{\ell\, \hbox{\sevenrm times}}{}^{1+\nu} 
|\bk|} + B\hfill \cr 
\hfill n\in \zed\quad ,\quad \bk \in \zed^{2d}\hfill\cr}$$ 
for some $A,B,\nu >0$ (\ $|\bk|$ means $\sum_i |k_i|$\ ). 

Then, for sufficiently small $\varep$, there is an analytic globally 
Hamiltonian isotopy $g_\varep$ satisfying Eq.~(1). 

For convenience, we will call 
$$\exp {A|\bk| \over \log |\bk| \cdot\log\log |\bk| \times \cdots \log 
\cdots \log^{1+\nu} |\bk|} + B\equiv \Omega (|\bk|)$$ 
following a notation due to R\"ussmann. 
We will discuss later how abundant  are the
$\balpha$ for which these conditions are satisfied.


\demo Remark. 
We will disregard what happens  for $|\bk|$ so small that one of the 
iterated log does not make sense. What is going to be relevant for the 
proof is that this inequality holds from a certain $|\bk|$ on.  If we were 
systematic about using that, we could do with the $B$, but it takes a bit 
of effort --- a good exercise to check it --- that the $|\bk|$ big 
enough that we take can be uniform in all steps, so we still keep the $B$. 

Given an analytic function in the torus, we will denote by 
$$\|F\|_r = \sup_{|\Im z_i| \le r} |F(\bz)|$$ 
and given an analytic family of analytic functions we will denote by 
$$\triv F_\varep \triv_f = \sup_{|\varep|\in [0,1]} \|F_\varep \|_r\ .$$ 

More precisely, what we are going to show is that, given $r$, $u$, there 
is some $S^* (r,u,\balpha)$ such that when $\triv F_\varep\triv_r \le S^*$ 
the conclusions of the theorem hold, when $G_\varep$ analytic in a slightly 
smaller domain. We remark that the loss of analyticity domains can be made 
as small as we please by imposing smallness conditions in $\triv F_\varep 
\triv_r$. It is also important to remark that $\balpha$ only enters in $S^*$ 
through $\Omega$ so that the proof holds at the same time for all the 
$\balpha$ with the same $\Omega$. 

Even if $F_\varep$ was defined in a smaller neighborhood in the variable, 
we can scale $\varep$ so that it is defined for $\varep \in [0,1]$. 
Moreover, by scaling enough, we can adjust any smallness conditions in 
$\triv F_\varep\triv$. So that the theorem can as well be enunciated for 
sufficiently small 

(1) can be written as 
$$g_\varep^{-1} \circ f_\varep \circ g_\varep = E_\alpha$$ 
and, if we apply repeatedly the formula for the Hamiltonian of a composition 
of G.H.I., we see that imposing that the Hamiltonian of the left hand side 
be equal to zero we obtain 
$$F_\varep \circ g_\varep - G_\varep \circ g_\varep + G_\varep \circ 
f_\varep^{-1} \circ g_\varep = 0 
\eqno(7)$$ 
(which, of course, is to hold in some appropriate complex extension of the 
torus).  

Even if a direct solution of Eq.~(7) is beyond hope, we can observe that 
there is a similar equation we can solve using Fourier coefficients. 
$$F_\varep - G_\varep^0 + G_\varep^0 \circ E_{\balpha}^{-1} = 0\ . 
\eqno(8)_0$$ 
Then, $G_\varep^0$ will be commensurate, in some sense, to $F_\varep$. 
Then, $g_\varep^0 - \Id$ will also be controlled by $F_\varep$ and, 
finally, $E_{\balpha}^{-1} - f_\varep^{-1}$ will also be controlled by  
$F_\varep$. 

If we substitute all this in the left hand side of Eq.~(7), we obtain 
something which, even if not exactly zero, is ``quadratic in $F_\varep$.'' 

The crucial fact is that Eq.~(7) has some structure (usually called 
``group structure'' \cite{38}) that allows us to take advantage of approximate 
solutions. 

In effect, if we could solve Eq.~(7) with $(g_\varep^0)^{-1}\circ f_\varep 
\circ g_\varep^0 = f^1$ in place of $f_\varep$, we could solve the original 
problem. But, as we argued before, $(g_\varep^0)^{-1} f_\varep g_\varep^0$ 
is much more favorable.  

Of course, we still do not know how to solve the exact equation, so what we 
will do is to solve Eq.~$(8)_1$ again and find another approximation 
$g_\varep^1$ and keep repeating the process, hoping that the remainders 
will go to zero; $g= g_\varep^0 \circ g_\varep^1 \circ g_\varep^2\cdots$ 
will, then, be a solution of the problem. 

The details of the proof of how this ``quadratic convergence'' is enough 
to beat the small denominators are quite fascinating and are the basis of a 
whole theory useful for many problems \cite{38}. 

A very suggestive way of thinking about this technique is 
suggested in \cite{14}. 
We could think of each one of our inductive steps as recognizing that a 
substantial part of the perturbation expansion can be absorbed by a change 
in the Hamiltonian, which is then closer to ``free.'' This is quite analogous 
to the perturbative application of renormalization group and, indeed, this 
analogy can be exploited to obtain results in statistical mechanics of quantum 
field theory problems by methods reminiscent of K.A.M.. The theories 
involved have much more structure, their treatment has to include more 
technique. See \cite{17} 
for a formulation of renormalization group close to this 
idea and for references about other points of view or results in 
this field. 

The next four Lemmas will be a systematic control of the inductive step; then, 
we will show we can iterate indefinitely and that the iteration converges. 

\proclaim Lemma. 
Assume $F$ is analytic in $T^{2d} \int F=0$ and $\balpha$ as before. 
Then, there is a unique $G$, analytic, $\int G=0$, such that 
$$F(\bx) = G(\bx) - G(\bx + \balpha)\ .$$ 
Moreover, 
$$\|G\|_{r-u} \le e^{B+3n(u)} \|F\|_r$$ 
$r,u$ sufficiently small where the smallness depends only on $\Omega$ 
($n(u)$ to be defined later, is an explicit function, involving only 
$\Omega$).

To avoid cluttering the proof with different constants, we will denote 
by $C$ several constants which only depend on $\balpha$ and are 
independent of $r$, $F$, and all the other  parameters of the proof. 

\demo Proof. 
The equation can be solved equating Fourier coefficients 
$$\eqalign{ G_{\bk} & = (e^{2\pi i(\bk, \balpha)} -1)^{-1} F_k\qquad 
\bk\ne 0\cr 
G_{\bzero} & = 0\ .\cr}$$ 
Now, we have 
$$\displaylines{\qquad {\rm i)}\hfill 
|e^{2\pi i(\bk,\balpha)}-1|^{-1} \le \Omega (|\bk|)\hfill} $$ 
because 
$$|e^{2\pi ix}-1|^{-1} \le Cd (x,\zed)^{-1}$$ 
($x=\frac12$ gives the worst constant which is $C=1$) 
also, because of Cauchy inequalities, 
$$\displaylines{\qquad {\rm ii)}\hfill 
|F_{\bk} | \le e^{-2\pi r|\bk|} 
\|F\|_r\hfill}$$ 
and the triangle inequality gives 
$$\displaylines{\qquad {\rm iii)}\hfill 
\|G\|_{r-u} \le \sum |G_{\bk}| e^{2\pi (r-u)|\bk|}\ .\hfill }$$ 

Hence, 
$$\eqalign{ \| G\|_{r-u} & \le \| F\|_r \sum e^{-2\pi u|k|} (|k|) \cr 
&\le \|F\|_r C \sum_{n\in \nat} e^{-2\pi un} \Omega (n) n^{2d-1}\cr}$$ 
where $C$ is the area of the unit sphere with respect to 
$| \un{\enspace }|$ in $2^d$ dimensions. 
$$\sum_{n\in \nat} e^{-2\pi un} n^{2d-1} \Omega (n) 
\le \biggl( \sum_{n\in \nat} e^{-\pi un}\biggr) 
\biggl( \max_{n\in \nat} e^{-\pi un} \Omega (n) n^{2d-1}\biggr) \ .$$ 

The sum can be done explicitly and bounded by $Cu^{-1}$. The maximum will 
be reached at the point that the log reaches  the maximum and this can be 
computed taking derivatives 
$$\eqalign{
0 & = {d\over dn} \left[ -\pi un + B + 
{An\over \log n\log \log n\ldots \log \ldots \log^{1+\nu} n} 
+ (2d-1) \log n\right] \cr 
&= -\pi u + 
{A\over \log n\log \log n\ldots \log \ldots \log^{1+\nu} n} 
- {A\over \log^2 n\log \log^2 n\ldots \log \ldots \log^{1+\nu} n} \cr 
&\qquad\qquad 
- {A\over \log^2 n \log \log^2 n \ldots \log \ldots \log^{1+\nu} n}\cr 
&\qquad\qquad - {A(1+\nu) \over 
\log^2 n\log \log^2 n\ldots \log^2 \ldots \log^{(1-\nu)2}n} 
+ {2d-1\over n} \cr 
& = -u + {A\over \log n\ldots \log \ldots \log^{1+\nu} n} 
\left[ 1- {1\over \log n} - {1\over \log\log n} -\cdots - 
{1+\nu\over \log \ldots \log^{1+\nu} n} \right] \cr 
&\qquad\qquad + {2d-1\over n}\ .\cr}$$ 

So, we see that for $u$ sufficiently small we have just one solution 
$n(u)$; the exact form of $n(u)$ will not be terribly important for the 
moment, we only have to observe that $n(u)\to\infty$ as $u\to0$ 
$|\ell n(u)| = o(n(u))$. The maximum of the exponent will then be bounded by 
$$\eqalign{ & - \pi un (u) + B + 
{An(u) \over \log n(u)\ldots \log \ldots \log^{1+\nu} n(u)} \cr  
&= B + n (u) \left[ {A\over \log n(u)\ldots \log\ldots\log^{1+\nu}n(u)} 
- u\right]\ .\cr}$$ 

And using the equation satisfied by $n(u)$ 
$$\eqalign{&= B + n (u) {A\over \log n(u)\ldots \log\ldots \log^{1+\nu}n(u)} 
\Bigl( {1\over \log n(u)} + {1\over \log\log n(u)} +\cdots \cr 
&\qquad \qquad + {1+ \over \log \ldots\log^{1+\nu} n(u)} 
\le B+2n (u)\cr} $$ 
provided that $n(u)$ is large enough, that is, $u$ small enough. 
We also observe that $e^{n(u)}\gg u^{-1}$ as $u\to0$ so that the $u^{-1}$ 
factor can be absorbed in adding a small coefficient of $e^{n(u)}$. Since 
everything was uniform with respect to extra parameters, the same result 
with the same bounds is true for families. 

\proclaim Lemma.  
$$\triv F_\varep - F_\varep \circ g_\varep \triv_{r-u} 
\le Cu^{-2} \triv F\triv_r \triv G_\varep \triv_r$$ 
provided 
$$\triv G_\varep \triv_r \le 3u^2 C\ .$$ 

\demo Proof. 
Using analyticity estimates, we have that $\triv \nabla G_\varep\triv_{r-u/2} 
\le Cu^{-1} \triv G_\varep\triv_r$. Since $g_\varep$ is obtained solving 
Hamilton's equations 
$$\triv \bx - g_\varep (\bx) \triv_{r-u} \le 
\triv \nabla G_\varep \triv_{r-u/2} \le Cu^{-1} \| G_\varep \|_r$$ 
where the first inequality assumes that $g_\varep (\bx)$ is always contained 
in  $\{ |\Im (\bz) | \le r-u/2\}$ whenever $|\Im (\bx) | \le r$. 
But this is true because of the smallness assumption on $\triv G_\varep 
\triv_r$ we included  in the hypothesis of the lemma, which make the last 
bound $\le u/3$. (We showed that provided $g_\varep (x)$ is contained 
in $\{\Im (z) \le r-u/2\}$ it can not leave $\{\Im (\bz) \le r-u +u/3\}$.) 

We also have 
$$\triv F_\varep - F_\varep \circ g_\varep\triv_{r-u} 
\le\triv\nabla F_\varep \triv_{r-u/2} \Vert \bx - g_\varep (\bx) \triv_{r-u}$$  
and we can bound the first factor by analyticity estimates.  

Applying the previous lemma twice and the formula of composition, we obtain 

\proclaim Lemma. 
$$\triv H_\varep (f_0^{-1}) - H_\varep (f_\varep^{-1}\circ g_\varep)\triv_{r-u} 
\le Cu^{-4} \triv H_\varep \triv_r \bigl( \triv F_\varep\triv_r + 
\triv G_\varep \triv_r\bigr)$$ 
provided 
$$\eqalign{ \triv F_\varep\triv_r & \le u^2 C\cr 
\triv G_\varep \triv_r & \le u^2 C\ .\cr}$$ 

Now, we can bound the left hand side of Eq.~(7) provided that $G_\varep$ is 
obtained from $F_\varep$ by solving Eq.~(8) (we will assume that the 
conditions  to apply the lemma hold). 
$$\eqalign{ 
&\triv F_\varep g_\varep - G_\varep g_\varep + G_\varep f_\varep^{-1} 
g_\varep \triv_{r-u} \cr 
&\qquad = \triv F_\varep g_\varep - G_\varep g_\varep + G_\varep f_\varep^{-1} 
g_\varep - F_\varep + G_\varep - G_\varep f^{-1} \triv_{r-1} \cr 
&\qquad \le \triv F_\varep \circ g_\varep - F_\varep \triv_{r-u} 
+ \triv G_\varep \circ g_\varep - G_\varep \triv_{r-u} 
+ \triv G_\varep \circ f_\varep^{-1} \circ g_\varep - G_\varep \circ 
f^{-1}\triv_{r-u}\le \cr}$$ 

Since we want, later, to bound $G$ in terms of $F$, we will use only bounds 
of $G$ in a slightly smaller domain. This amounts to taking $u/2$ in the 
previous lemma and, therefore, to multiplying some constants by $2^4$ 
$$ \le Cu^{-2}\triv F_\varep\triv_r \triv G_\varep \triv_{r-u/2} 
+ Cu^{-2} \triv G_\varep \triv_{r-u/2} 
+ Cu^{-4} \triv G_\varep \triv_{r-u/2} 
\quad \bigl( \triv F_\varep \triv_r + \triv G_\varep\triv_{r-u/2}\bigr)\ .$$ 

Now (being incredibly wasteful), we bound all this by 
$$Ce^{B+4n(u/2)} \triv F_\varep\triv_r^2\ .$$ 
We have to assume $e^{n(u/2)} >u^{-4} u^{-2}$, which is certainly true for 
sufficiently small $u$. 

Now, we have to show that this step can be iterated. (The only things to 
check are the smallness conditions we assumed for $\triv G\triv,\triv F\triv$) 
and that the iterates converge. Since each of the steps have  a choice for 
$u$, we will have to show that there is a choice for the $u$'s at each step 
so that indefinite iteration and convergence holds. 

So, we pick once and for all a fixed sequence $(u_k)$ in such a way that 
$n(u_k/2) = n_0 (3/2)^k$ ($n_0$ to be adjusted later). 

Notice that these choices imply (using the equation for $n(u)$) that 
$$\eqalign{ \sum_k u_k & = \sum_k {2\over \pi}\ 
{A\over \log n_0 (3/2)^k\ldots \log \ldots\log^{1+\nu} n_0 (3/2)^k} \cr 
\myskip 
&\qquad \left( 1- {1\over \log n_0(3/2)^k} - \cdots - 
{1+\over \log \ldots \log^{1+\nu} n_0 (3/2)^k}\right)\cr}$$ 
which converges by comparison with 
$$\sum {1\over k\log k\ldots \log \ldots \log^{1+\nu} k}$$ 
which converges by the integral test. (The factor in brackets clearly tends 
to $1$ when $k\to\infty$.) 

Moreover, this sum is arbitrarily small provided $n_0$ is sufficiently 
large. Since $r-\sum u_k$ is going to be the measure of analyticity domain 
of $G$, we see that, if we can pick $n_0$ large, the analyticity loss will 
be a small factor of the original domain. 

Calling $S_n = \triv F_\varep \triv_{r_n}$ after performing $n$ inductive 
steps, we have shown that 
$$S_{k+1} \le e^{B+3n} \Bigl( {uk\over2}\Bigr) S_k^2 \equiv 
e^B e^{3n_0} \left( \Bigl( {3\over2}\Bigr)^k\right) S_k^2$$ 
(assuming that the conditions to perform a step hold) so that 
$$\eqalign{ 
S_{k+1} & \le e^{B+3n_0 (3/2)^k} e^{2B+2n_0(3/2)^{k-1}} S_{k-1}^{2^2} \cr 
&\le e^{B(1+2+\cdots +2^{k+1})} e^{2n_0((3/2)^k} 
(1 + 2\Bigl( {3\over2}\Bigr)^{k-1} +\cdots + 2^k ) S_0^{2^{k+1}}\ .\cr}$$ 

\noindent Now, 
$$\eqalign{
1+2+\cdots + 2^{k+1} 
&\le 2^{{k+1}} \Bigl( {3\over2}\Bigr)^k + 2\Bigl({3\over2}\Bigr)^k 
+\cdots + 2^{k+1} \cr 
&= 2^k \sum_{\ell=0}^k \Bigl({3\over2}\Bigr)^{k-\ell} 
\Bigl({1\over2}\Bigr)^{\ell-1} \cr 
&\le 2^k \sum_{\ell=0}^\infty \Bigl({3\over4}\Bigr)^\ell 
\le C2^{k+1}\cr}$$ 
so that 
$$S_{k+1} \le \left( S_0 e^{B+Cn_0}\right)^{2^{k+1}}\ .$$ 

Reading the formula for $u_k$ we obtain 
$$u_k \le {C(\nu) \over (\log (n_0)+ (\log {3\over2})k)^2}$$ 
where $C$ depends only on $\nu$. 

The conditions to iterate that we found for $G$ are implied then by 
$$e^{-n(u_k/2)} S_k \le C_k^2$$ 
which are, in turn, implied by 
$$e^{-(3/2)^k} \left( S_0 e^{B+Cn_0}\right)^{2k+1} 
\le {C(\nu)\over [\log (n_0) + \log (3/2) k]^2}\ .$$ 

We can see that, given a certain $n_0$, we can find $S_0$ small enough so 
that the conditions hold for all $k$. Therefore, if they hold for the 
first step, the iteration keeps them valid. 

Analogous, even easier, considerations hold for the other condition of 
the inductive step. 

So, the inductive step can be iterated indefinitely. The remainder converges 
to zero and $n_0$ can be taken as big as we want provided $S_0$ is small 
enough. 

Convergence of $g_\varep^0 \circ g_\varep^1 \cdots \circ g_\varep^k$ 
as $k\to\infty$ in the domain given by $r-\sum u_k$ can now be very easily 
established. At each step of the iteration, the same bounds we proved for 
the inductive hypothesis show that all the compositions make sense. 
We also have very good bounds on $G_\varep^k$.  Using them, the formula for 
the composition of G.H.I.\ and proceeding as in Lemma~6, we are done. 

\section{C. About the nonresonance conditions} 
The idea of introducing a function $\Omega$ to measure the non-resonance and 
try to determine the weakest conditions can be attributed to Russman 
\cite{31}. 

Rather than calculate at each step of the process as he did, he left all 
the calculations for the end. The conditions he found are 
$$\sum {\Omega (2^n) \over 2^{n+1}} <\infty $$ 
(plus monotonicity and logarithmic convexity). 

The same conditions were derived by other methods by Brjuno \cite{9}. 
These conditions are weaker than those of the proof we presented. 
For the proof we presented here, the left hand side of the inequality 
above becomes 
$$\sum {1\over n(\log n) (\log\log n)\ldots (\log\ldots\log^{1+\nu}n)}$$ 
which is not, of course, the slowest convergent series, but is probably 
the slowest which is comfortable to write. 

In the next paragraph we will present some examples of $\balpha$'s 
failing to satisfy the condition and for which the theorem is false. 

It is interesting to remark that there is a no man's land of numbers for which 
it is not known either how to prove the theorem or how to construct 
counterexamples. 
Certainly this no man's land is small in any sensible sense of the word 
``small'' for uncountable sets. (It has zero measure, the complement is 
residual, has zero Hausdorff dimension, etc..) But nevertheless, its 
existence is vexing (it is quite similar to ruling out the existence of 
singular continuous spectrum whose presence prevents having a well behaved 
scattering theory).  For a similar model problem, studying the 
conjugacion of analytic maps to a rotation in a vicinity
of a fixed point with derivative of unit modulus, there are
examples \cite{39} that show that for a number failing to
satisfy the contitions used in \cite{9} to 
prove linearization, the  conclusions of the theorem are false.
See also \cite{40} for some extensions of the methods.

What we will do now is to study the set of $\balpha$'s for which the 
conditions 
$$\Omega (|k|) \le |\balpha \bk|^{-1}$$ 
are satisfied. 

We want to show it is a big set (it has positive measure, as close to full 
as we want by taking $\Omega \to \lambda\Omega$, $\lambda$ big) and also 
some geometric properties that will be useful in the proof of the full 
K.A.M.\ theorem. 

We will introduce the set of ``almost resonances of order $\bk$'' 
$$R_{\bk} = \left\{ \balpha \bigmid  |\balpha \bk| \le 
{1\over \Omega (|k|)}\right\}\ .$$ 

Each one of those is a strip bounded by two planes perpendicular to $\bk$ 
and separated by a distance 
$${2\over \Omega (|k|) (\sum^d k_i^2)^{1/2} }\ .$$ 
The set in which the non-resonance conditions hold is the complement 
of $\bigcup_{\bk\in\zed^d}  R_{\bk}$. 

If we restrict ourselves to a ball in $\balpha$ space, 
$$\eqalign{ 
\Vol (R_{\bk}) & \le \diam (\Ball) {2\over 
\Omega (|k|) (\sum_{i=1}^d k_i^2)^{1/2}} \cr 
\myskip 
& \le {\diam (\Ball) 2\sqrt{d} \over 
\Omega (|k|) |\bk|}\ .\cr}$$ 
(By Cauchy Schwartz we have 
$|\bk|\le \sqrt{d} (\sum_{i=1}^d |k_i|^2)^{1/2})$.  
We then see that 
$$\Vol \biggl( \bigcup_{\bk} R_{\bk}\biggr) \le 
\sum_{\bk\in\zed^d} {\diam (\Ball) 2\sqrt{d} \over 
\Omega (|k|) |k|}$$ 
which, for our choice of $\Omega (|k|)$ certainly is finite. Moreover, by 
choosing big enough constants we can make this measure of the excluded 
set as small as we wish. 

But remember that the proof could go through for arbitrarily large 
constants in $\Omega$ provided that we took small enough perturbations. 

This is one of the key ingredients in the proof that, for the full K.A.M.\ 
theorem, the proportion of phase space occupied by invariant tori is close to 
one, if the perturbation is sufficiently small. 

The actual proof is slightly more complicated and involves other slightly 
stronger statements about how big the set is; something more is necessary 
to control the changes introduced by the partial conjugacies. The ``bad'' 
set is either those of resonances or the things that can be moved into 
resonances by a movement of the size of the perturbations at the 
corresponding stage of the iterative process. 

We call 
$$\widetilde{R}_{\bk,u} = \bigl\{ \balpha \mid \dist (\balpha,R_k) 
\le u\bigr\}\ .$$ 
In the $\ell^{\rm th}$ step of the proof the ``bad'' set will turn out to be 
$$\bigcup_{|\bk| \le n(u_\ell)} \widetilde{R}_{\bk,\varep_\ell} 
\ \hbox{ where }\ \bigl( \varep_\ell = \|F^\ell\|_{r_\ell}\bigr)\ .$$ 

These sets still have small measure when restricted to a ball because 
$\widetilde{R}_{\bk,u_\ell}$ is still bounded by two planes perpendicular 
to $\bk$ and separated by a distance bounded above by 
$({2\sqrt{d}\over\Omega (|k|)|\bk|} + 2\varep_\ell)$ and the sum of the measure 
of the bad sets is the same as before plus  
$$\sum_\ell\ \sum_{|j|\le |k|\le n_{\ell+1}} \!\!\!\! 
u_\ell \le C\sum_\ell n_{\ell+1}^{d-1} \varep_\ell$$ 
which also converges and is as small as we wish provided $\varep_0$ is small. 

Notice that, given a Cantor set even of large measure, it is by 
no means obvious that we can enlarge the complement at each of the stages of 
the construction and still obtain something non-empty. 

\section{D. A counterexample}
Here we construct a G.H.I.  $f_\varep$ in the torus $T^2$ that satisfies 
all the conditions of the previous theorem except for non-resonance and, 
such that it is not topologically conjugate for any $\varep$ to $f_0$. 

We can find an $a \in  \real$ such that  
$\limsup_{k\to\infty} \exp (-Ak) \min_{n\in\zed} |ak-n|^{-1}\to\infty$. 
An explicit construction of such an $a$ can be achieved by, e.g., taking 
a number with a decimal expression 
$0.1\overbrace{0\ldots0\ldots}^{n_1}01\overbrace{0\ldots0}^{n_2}1\ldots$ 
we have 
$$[10^{n_1+\cdots +n_j+j} a-1]^{-1} = 10^{(n_1+\cdots+ n_{j-}n_{j+1} +j+1)}$$ 
we can recursively choose the $n_{j+1}$ so that the limit of the resonances  
along this sequence is $\infty$. A more elegant way could be using continued  
fractions. 

Notice that the construction can be modified to accommodate any arbitrary 
finite sequence of numbers before starting with the $0$'s and $1$'s. So, 
these counterexamples are dense (although we showed
before that they are of zero 
Lebesgue measure). 

We will be considering families 
$$f_\varep = \bigl( x+a,y +b +\varep \psi (x)\bigr) $$ 
which are G.H.I.'s whenever $\int_0^1 \psi (x)\,dx = 0$. $b$ will not play 
any role in what follows and we can choose it in any way we like. 
In order to get a counterexample as strong as possible we will choose it in 
such a way that the resonances of the pair $(a,b)$ are as weak as possible. 
(It turns out that we can make them as weak as those of $a$.) 

We will call  $\H_A$ the set of functions analytic in $S^1$ integrating 
to zero, and with finite norm $\|\psi\|_A = \sup_{k\in\zed^2} e^{kA} 
|\hat\psi_k|$. ($\hat\psi_k$ is the $k^{\rm th}$ Fourier coefficient) 

\proclaim Proposition. 
There is a residual set in $\H_A$ such that when $\psi$ belongs to this 
set $f_\varep$ is not topologically conjugate to $f_0$ for any $\varep$. 

\demo Proof. 
We will just establish disconjugacy of $(x+\alpha, y+\beta + \psi (x))$ 
to $(x+\alpha,y+\beta)$ in a residual set which will also be invariant 
by multiplication by scalars. 

We observe that the set $\{f_0^k\}$ is equicontinuous and, if $f_1$ were 
topologically conjugate to $f_0$ so would be $\{f_1^k\}$. We show that 
this is not the case. 
$$f_1^k (x,y) = \biggl( x+k\alpha, y+k\beta + 
\sum_{\ell=1}^k \psi (x+\ell\alpha)\biggr) $$ 
and it suffices to show that $\{ C_k \psi (x) \equiv \sum_{\ell=1}^k 
\psi (x+\ell\alpha)\}$ are not equicontinuous for a residual set of $\psi$. 

To establish the theorem it suffices  to show that for the operators 
$\C_k $, $\|\C_k\|_{\H_A \to C^0}$  is unbounded, and invoke Baire 
Category theorem. (See e.g., \cite{31}, p.44.) 

We have 
$$\eqalign{ 
C_k e^{2\pi inx} & = e^{2\pi inx} \sum_{\ell=0}^{k-1} e^{2\pi i\ell n\alpha}\cr 
\myskip 
&= e^{2\pi inx} {3^{2\pi ikn} -1\over e^{2\pi in\alpha}-1}\cr 
\myskip 
& \sup_k \|\C_k\|_{\H_A \to C^0} 
\ge \sup_k \sup_n e^{-An} {|e^{2\pi ikn}-1| \over 
|e^{2\pi in} -1|} \cr 
\myskip 
& = \sup_n {e^{-An} \over |e^{2\pi in\alpha}-1|}  
\sup_k |e^{2\pi ikn\alpha} -1|\cr 
\myskip 
&= \infty\ .\cr}$$ 

It is quite easy to modify the construction of the example so that it 
shows disconjugacy generically in $C^r$ when the growth of resonances 
is faster than $k^r$. 

Notice also that the conjugacy would be provided by function 
$g(x,y) = (x,y+\eta (y))$ where $\eta(y)$ satisfies $\eta (y)-\eta (y+\alpha) 
= \psi (x)$ so that one can also play the game of finding resonance 
conditions so that there is conjugacy in one regularity class and not 
in another. 

Some partial converses of this counterexample are possible. 

\proclaim Theorem. 
If $f:T^d \to T^d$ is an area preserving $C^k$ diffeomorphisms $k>2$ 
and $\|f^{n(k}\|_{C^0}$ is bounded independently of $n$, then there exists 
a $C^{k-1}$ area preserving $h^* :T^d\to T^d$ satisfying 
$$h^* \bigl( f(x)\bigr) = h^* (x) + \ba$$ 
where $\ba = \int_{T^d} f(\bx) - \bx\,dx$. (The integral is to be interpreted 
as the functions $f$ and the vectors taking values in $\real^d$ --- 
the universal cover of $T^d$ and the integral ranging over a fundamental 
domain.  $f^{(k}$ means now $k^{\rm th}$ derivative.)  

We start by observing that since 
$$f(\bx) = x+\ba + \varphi (\bx) \ ,\ \hbox{ where }\ \int_{T^d} \varphi 
(x) = 0$$ 
we have 
$$f^2 (x) = x+2\ba + \varphi \bigl( f(x)\bigr) + \varphi (x)$$ 
in general 
$$f^n (x) = x+n\ba + \sum_{\ell=0}^{n-1} \varphi \bigl( f^\ell (x)\bigr)$$ 
since $f$ preserves area 
$$\int_{T^d} \varphi \bigl( f^\ell (x)\bigr) 
= \int_{T^d} \varphi (x) = \bzero\ .$$ 
Hence 
$$\int_{T^{2d}} f^n (\bx ) - \bx = n\ba\ .$$ 
Now, if we form 
$$h_n (\bx) = {1\over n} \sum_{\ell=0}^{n-1} \bigl( f^\ell (\bx) - \ell 
\ba \bigr)$$ 
we see that $\|h_n^{(k}\|_{C^0}$ is also bounded independently of $n$. 
Moreover, since 
$$\int_{T^d} h_n (\bx) -\bx = \bzero$$ 
it also follows that 
$$\sup_{x\in T^d} |h_n (\bx) - n\ba|$$ 
is also uniformly bounded. 

Then, we have a set of equicontinuous equibounded $h_n$ and we can select 
a convergent subsequence 
$$h_{n_i} \to h^*$$ 
but 
$$h_n \bigl( f(x)\bigr) = h_n (x) + a + {1\over n} \bigl[ f^n (x) - n\ba 
-x\bigr]\ .$$ 
Again $f^n (\bx) - n\ba -\bx$ has $k$ derivative bounded independently of 
$n$ and integrate to zero. 

Hence, the bracket converges to zero pointwise so that $h^*$ satisfies the 
equation of the theorem. 

The fact that $h^*$ preserves area follows from the fact that $h_n$ does, and 
the convergence $h_{n_i}\to h^*$ is uniform $C^{k-1}$ convergence so that 
the property of conserving the area is preserved under those limits. 

This argument is very useful in many contexts. It shows that in order to 
produce smooth solutions of conjugacy equations, it suffices to obtain 
uniform bounds for the iterates in a smooth norm. 

(Similar arguments appear very often in scattering theory, and are very 
similar to the Krylov--Bogoliubov argument \cite{33}.) 

Notice however that we had to use here many properties of the space, like the 
fact that the cover is $\real^d$ so that we can add. In particular,  I do 
not know how to prove by the same method existence of solutions  to 
$$f\bigl( h^{**} (\bx)\bigr) = h^{**} (\bx + \ba)\ .$$
(This, however follows if we can guarantee $h^*$ is invertible which 
can be established assuming that $\| D(f^n) - \Id\|_{C^0}$ is small.) 

\section{Some side remarks about the proof}

\demo Remark 1. 
We can think about the previous proof as an implicit function theorem. 
Eq.~(7) is a particular case of a relation between $F$ and $G$ (which we 
write $\varphi (F,G) =0$). $F$ and $G$ range on a linear space and $\varphi$, 
even if it is very complicated, has the advantage that we know that 
$F=0$, $G=0$ is a solution and that we know how to make first order 
perturbation theory around it. That is, the relation $\varphi$ is 
differentiable in some sense and we know how to invert $D_2(0,0)$. 

The standard way of proceeding is to produce a scheme of successive 
approximations and show it converges when all the differentials, etc.\ 
are taken in the appropriate sense. 

For example if we took the scheme 
$$\eqalign{ & G_{n+1} = -D_2^{-1} (0,0)\  \bigl[\varphi (F,G_n)\bigr] +G_n\cr 
&G_0 = 0\ .\cr}$$ 
Provided that $F$ and $G$ range in a Banach space, that $\varphi$ is 
differentiable in the strong sense, that the differential is continuous 
and that existence of the inverse is supposed to mean existence of a 
bounded inverse, it is possible to show that this scheme is a contraction 
(see e.g., \cite{11}). 

This is very similar to what we did, but such a simple framework does not 
suffice. For our problem $D_2^{-1} (0,0)$ is not bounded in any space of 
functions, and moreover, the derivative is not continuous in almost any sense. 

So, we have to use some extra structure of our problem. We had a natural scale 
of Banach spaces, the spaces of analytic functions on strips, and the 
operator $D_2\varphi (0,0)^{-1}$ was bounded from one of those spaces to 
the others with smaller domain; the norm of those operators gets unboundedly 
large if the loss of domain gets small (as it should if we are to be able to 
iterate indefinitely). 

The saving grace for our scheme was the ``quadratic convergence'': 
The new remainder was obtained from the old by multiplying by a factor that 
contains not only the bad factor due to the analyticity loss, but also a 
term proportional to the error. This combination is less than one, and 
we keep on improving. 

Notice that the scheme we proposed for the easy implicit function theorem 
does not have this quadratic convergence in Banach spaces. The new error 
$\|\varphi (F,G_{n+1})\|$ can be bounded only by 
$\|D_2^{-1}\varphi (0,0)\|\ \|D^2 \varphi\|\ \|\varphi (F,G_n)\|$ which, even 
if a contraction in Banach space for small $F$, is not quadratic. 

The best known scheme with quadratic convergence is Newton's method 
$$G_{n+1} = -D_2^{-1} \varphi (F,G_n) \varphi (F,G_n) + G_n\ .$$ 
However, this was not the one we used and, indeed, is unsuitable for our 
problem, because it requires to invert the linearized equation  away from 
$(0,0)$, a task that we could not accomplish by using Fourier analysis. 

What we did was to use the scheme involving only the solution of 
$D_2 \varphi (0,0)$ but we used this knowledge to improve $F$ (make $F$ 
closer to zero) so that we can get quadratic convergence even if not using 
Newton's scheme. 

This is something that cannot  be done for all equations, only for those 
that have some ``group structure.'' 

To summarize: we can prove the implicit function theorem in a more general 
framework that in Banach space, but we need some structure on the equation: 
either solvability in an open neighborhood or group structure. We also need 
some structure on the framework in which we set up the problem (the 
scale of Banach spaces). 

Since non-linear functional equations are pervasive in applications and the 
implicit function theorem is a very natural tool, there has been lots of 
thought given to the problem of finding more and more general settings. 
For Dynamical Systems, probably the most useful setting is that of 
Zehnder \cite{38} but there are others, probably more useful in geometry 
\cite{19}. 
There are still other approaches (e.g., \cite{18}). 

There is however lots of room for improvement. In particular the results 
of \cite{26} which are a kind of implicit function theorem do not fit in any of 
the schemes above. 

Besides the example we already discussed it is worth keeping the following 
counterexample in mind. In comes from \cite{2}. 

Consider the Frechet space of functions analytic in the unit disk endowed 
with the usual topology of uniform convergence on compact sets. 

If $f(z) = \sum_{k\ge 0} f_kz^k$ define $\varphi$ by $(\varphi (f)) (z) = 
\sum_{k\ge0} f_k^2 z^k$ (which is again a function of radius of convergence 
at least one). 

The function  $f^* = 1/1-z$ is a fixed point of $\bvarphi$ and, moreover, 
$D\varphi (f^*)=2$. 

However, in any neighborhood, there are functions $\tilde f$ different 
from $f^*$ so that $\varphi (\tilde f) = f^*$ (just take $\tilde f_k = 1$ 
if $k\ne N$, $\tilde f_N =1$), one can produce even an uncountable number 
of them in any neighborhood (so there is no inverse function; the example can 
be modified to be a counterexample of the implicit function theorem). 

\demo Remark 2. 
Giovanni Gallavotti has repeatedly drawn attention to the analogy 
between this treatment and the renormalization group approach to the 
obtention of uniform estimates in quantum field theory. 
There one also uses the group  structure of the problem. The perturbation 
theory is used to guess a transformation that substitutes our problem by 
one closer to ``free.'' 

\demo Remark 3. 
Observe that the quadratic convergence does not get spoiled if instead 
of using the full solution of Eq.~(8) we only use an approximate one, 
solving Eq.~(8) up to an error comparable to the square of $\|F\|$. 

In the full K.A.M.\ theorem, or in the standard proof of the toy theorem 
discussed here with a finite number of derivatives, it becomes quite 
useful to use a truncation 
$$G^{[\le N]} = \sum_{|\bk|\le N} e^{2ri \bk\bx} G_{\bk}\ .$$ 
Using the triangle inequality 
$$\|G-G^{[\le N]} \|_{A-\delta} 
\le \sum_{|\bk| >N} |G_k | e^{(A-\delta) |\bk|}\ .$$ 
By Cauchy estimates 
$$\eqalign{
&\le \sum_{|\bk| >N} e^{-\delta |\bk|} \|G\|_A \le \cr 
\myskip 
&\le \|G \|_A e^{-\delta N} C \sum_\zed \ell^{d-1} e^{-\delta \ell}\cr 
\myskip 
&\le \|G \|_A C e^{-\delta N} \delta^{-(d-1)}\ .\cr}$$ 

We can, at each step, accommodate an extra loss of domain and an $N$ in 
such a way that 
$$e^{-\delta N} \le \|F\|_A\ .$$ 

If we fix that $N$ grows exponentially with the number of steps this makes a 
$\delta$ which is comparable to the other losses of domains and the proof 
of infinite iteration and convergence needs no modification. 

\section{E. The full K.A.M. Theorem} 
The most famous of the applications of these techniques is the theorem of 
persistence of quasi-periodic motions in perturbations of integrable systems, 
that is, systems that have coordinates $\bA$, $\bvarphi$ ($\bvarphi$ are 
angles) such that the Hamiltonian is only a function of $\bA$. 
The equations of motion for an integrable system are 
$$\eqalign{
\dot \varphi_i &= {\partial H\over\partial A_i} = \bomega (\bA)\cr 
\myskip 
\dot A_i & =- {\partial H\over\partial \varphi_i} = 0\cr}$$ 
so that the solutions are 
$$\eqalign{
\bvarphi & = \bvarphi (0) + \bomega (\bA) t\cr 
\bA & = \bA (0)\cr}$$ 
and all of them are quasi-periodic. 

The theorem states that, provided that $\bomega$ appears in the range of 
$\bomega (A)$ (and some other conditions that we will discuss later) and 
satisfies irrationally conditions, any sufficiently small perturbation 
$H$ will have an invariant torus of dimension $2d$. The motion on this torus 
will be conjugate to the irrational flow of angular frequencies $\bomega$. 

The conditions we need can be motivated by the following example 
$$H_\varep = (\bomega_0 + \varep \bomega_1 )\bA\ .$$ 
Hamilton's equation for this Hamiltonian can be readily solved but there 
are no motions with frequency $\bomega_0$. This shows that we need some 
conditions that ``clamp'' the frequency forcing it to appear in the motions 
of $H_\varep$. 

Conditions which are sufficient are that 
$\det ({\partial^2 H_0\over\partial\bA \partial\bA}) \ne 0$. 
Heuristically, the range of $\bomega (\bA) \equiv \nabla_A H$ is 
persistent and those are the conditions under which the theorem is proved 
most often. 

Unfortunately these hypotheses  do not apply to the most interesting case, 
when $H_0$ is the Hamiltonian of the solar system with the interactions 
between the planets turned off. (As is very well known the frequency of orbit 
does not depend on the angular momentum so that the Hessian has less rank 
than the dimension.) However, there are weaker hypotheses that apply to 
this case \cite{4} and allow us to prove the theorem. 

There are basically two strategies to prove this theorem: 

The first --- we will call it Kolmogorov's strategy --- starts with the 
observation that Hamiltonians of the form 
$$H(\bA ,\bvarphi) = \bomega \bA + 0 (|\bA|^2)$$ 
admit quasi-periodic solutions of frequency $\bomega$ 
$$\eqalign{
\varphi (t) & = \bvarphi (0) + \bomega t\cr 
\dot \bA (t) & = \bzero\ .\cr}$$ 

However, if we take coordinates in which $\omega (0) = \bomega_0$, the most 
general perturbation of our integrable system is 
$$H(\bA,\bvarphi) = F(\bvarphi) + \bomega_0 \bA + \bG (\bvarphi) 
\bigl( \bA + o (|\bA|^2)\bigr)$$ 
(as can be readily seen by expanding in Fourier series) where $F$, $G$ are 
assumed to be small. 
So, we try to make a change of variables in which the terms $F$, $G$ are 
absent. 

If we use the variables $A'$, $\varphi'$ given by $\exp \L$, where $\L$ is 
a globally Hamiltonian  vector field of Hamiltonian $L$, we see that 
``up to higher order terms in $L$, $F$, $G$,'' the Hamiltonian in the new 
variables is 
$$H(A',\varphi' ) = F(\varphi') + \bomega_0 A' + G(\varphi') \bA' + 
\bigl\{ \bomega_0 \bA', L(A'\varphi')\bigr\}$$ 
where $\{\enspace\}$ means Poisson Bracket. 

So, we try to determine $L$ in such a way that it cancels as many as possible 
undesirable terms in first order. 

The equations  that imply this cancellation can be studied because the Poisson 
bracket means either derivative $\bomega\,\bA'$ along the Hamiltonian 
field of $L$ or minus derivative of $L$ along the field of Hamiltonian 
$\bomega\,A'$. In this later interpretation, we can solve by taking Fourier 
coefficients eliminating all Fourier coefficients except the constants. 

That is, we can find $L$ so that 
$$\eqalign{
H(A',\varphi') &= \biggl( \int F(\bvarphi') d^d\varphi'\biggr) 
+ \bomega \bA' + \biggl( \int \bG(\varphi') d^d \bvarphi'\biggr) \bA 
+ o(|A|^2)\cr
\myskip 
&\qquad\qquad \hbox{ (up to quadratic in $F$, $G$)}\ .\cr}$$ 
We can omit the constant $\int F(\varphi')$ because adding a constant to 
the Hamiltonian does not change the dynamics. 

As it stands now, the step is impossible to iterate because $(\omega + 
\int \bG)$ could fail to satisfy the irrationality conditions. 

But we now  try to make a change of coordinates $A'' = A' + \bY (\bvarphi')$ 
(which is clearly symplectic) and that restores the form we started with 
$$H(\bA'' ,\bvarphi'') = \tilde F(\bvarphi'') + \bomega \bA + 
\tilde\bG (\bvarphi'') A'' + o(A^2)$$  
but with $\tilde F$, $\tilde G$ which are ``quadratic'' in $F$, $G$. 

We can indeed find such a $\bY (\bvarphi)$ because of the hypothesis on 
the Hessian. Moreover, the size of $\bY$ will be commensurate with that of 
$\int \bG$. (This only requires the standard implicit function theorem 
in $\real^n$.) 

To prove convergence, we will proceed as before, but with the extra induction  
hypothesis that the Hessian is uniformly bounded away from zero; again, 
this is no problem because controlling the change of the Hessian involves 
controlling only three derivatives which, in the previous notation means 
that controlling $\sum u_k^{-3} S_k$ which follows from estimates we had 
already done. 

Notice that since the hypothesis of the theorem include that $\bomega (A)$ 
covers an open set when $\bA$ ranges in an open set, there will be, 
for sufficiently small perturbations, many invariant tori. However, the proof 
we gave requires a different transformation for each one of them. This makes 
it very hard to answer questions about the nature of the sets of invariant 
tori. 

For example, it is known that there is a set of smooth invariant tori which 
can be embedded in a differentiable family of tori and which occupy a 
fraction of phase space arbitrarily close to 1 for sufficiently small 
perturbations. (It is, in principle, possible that there are some 
invariant tori, corresponding to frequencies closely approximated by 
rationals, which do not fit in this smooth family, or which are not smooth, 
or such that the motion of them is not conjugate, to a linear flow. 
I will be very curious to know whether the same techniques in the 
counterexample provided here can be used to provide counterexamples for 
flows.) 

To prove these results about families of tori, it is necessary to use 
another strategy, due to Arnold \cite{3}.  We try to reduce the Hamiltonian 
flow to linear  simultaneously in several places of the phase space. 
Again, we proceed inductively. 

At stage $\ell$, we try to construct the conjugacy in a domain of points 
that satisfy 
$$|\bk \bomega (A)|^{-1} \le \Omega (|\bk|) \qquad 
0\le |\bk| < (3/2)^\ell\ ,$$ 
Then, we solve the linearized conjugacy equation in Fourier coefficients up 
to order $(3/2)^k$. As explained before, this does not affect the 
quadratic convergence. However, the structure of the set where we are 
solving is very complicated and we should worry that each of the conjugacy 
does not  take us out of the space for the next.  The idea is to use the 
anisochromy condition to argue that the geometry of the resonant set, 
in $\bA$, is very similar to that of the resonant set in $\bomega$ which 
we discussed. This produces the estimates in the fraction of the volume 
preserved and, taking care of keeping all the estimates uniform, also the 
smoothness of the family interpolating the invariant tori. For the details 
and quantitative statements, the reader should look at 
\cite{14,29}. 

Besides the above statements about the global appearance of a large set of 
invariant tori the previous argument can be modified to yield other results: 

If we stop the elimination procedure at a certain stage, we can still show 
that in non-resonant regions (not just in tori), the motion is up to a 
change of coordinates, extremely similar to a linear flow. In particular, 
it does not move off for an extremely long time. 

This can be combined with an analysis of the motion in the resonant region 
to show that the motion is essentially perpendicular to the resonance line to 
conclude that small perturbations of integrable systems do no wander off for 
extremely long times, much longer than what a naive dimensional  argument 
would suggest. This is the content of a famous theorem proved by 
Nekhoroshev \cite{27}. (See simpler proofs in \cite{7}.) 

The fact that for even longer times, this recurrence disappears and points  
eventually do wander off was established in some examples 
\cite{5,1}, 
conjecturing this type of behavior is the rule. Some people claim that 
they have observed and measured this Arnold diffusion in some systems. 
Nevertheless, the effect to be measured is several orders of magnitude 
smaller than the truncation error due to the numerical methods and quite 
comparable to round-off and the results are not quite reproducible. 

\section{F. Computational Aspects}
Most of the proofs based on the previous strategy suffer the shortcoming 
that the smallness conditions required are much stronger than those that 
are relevant for practical applications (we recommend \cite{41} for a 
discussion of the applicability of K.A.M.\ theory specifically to the 
solar system). 

Of course, one could argue that it is possible that the method of proof is not
optimized to produce larger constants. Presumably, the figure of merit that has 
been optimized in many of the proofs is the number of derivatives. Since 
the K.A.M.\ theorem makes statements for whole classes of functions and this 
is an infinite dimensional space it is not even  clear what does it mean 
to optimize the constants in general because there are many different ways to 
measure distances and sizes of functions. 

There are two basic numerical methods to study directly the existence of 
quasi-periodic orbits lying on smooth tori. One of them is to write down 
the equation for conjugacy and then apply a non-linear equation solver 
(e.g., a Newton or continuation method) and the other is to implement 
numerically the perturbation expansions that we discussed in the previous 
section. 

The numerical literature about implementation of perturbation series 
expansions is quite extended. Let us mention, among others\cite{42} 
for an implementation of perturbation series for
the quasiperiodic orbit   (sometimes called
Lindstedt series) or   \cite{43} (and references there), \cite{44} 
for a discussion of the calculation of the 
approximate chage of variables that integrates the whole system
approximately. 
For numerical implementation of direct solutions of the equation, 
and a very lucid discussion of the difficulties,
let us mention \cite{44}. 

The validity of these numerical methods has been very controversial. Of course, 
all numerical methods are subject to round-off and truncation so that, 
strictly speaking, all numerical results are not mathematically 
conclusive and, if not accompanied by an error analysis, suspect. 

For the computation of quasi-periodic orbits this has been a little bit more 
so because all the counterexamples point to the effect that even very small 
values of derivatives of relatively high order can make a difference. It is 
well known that many numerical discretizations of functions --- specifically 
of several variables --- do not allow numerically stable evaluation of 
derivatives. (The only method we know that allows stable discretization of 
derivatives is the storage of Fourier coefficients.) Also the examples point 
out that the irrationality properties of the frequencies play a significant 
role. Of course, all the numbers stored in a computer are rational and it is 
not obvious how one could attempt to discretize such problems. 

In practice, the methods based on the perturbation series expansions are 
somewhat easier to implement since the recursion expansion is always done 
by solving the first order expansion around the unperturbed case. The 
Newton method requires one to solve a system of linear equations of  
dimension the number of discrete variables. Even if with todays computers 
solving a system of equations with 1000 variables is quite feasible, 
it is a computation of the order of one hour in a typical workstation. 
Even if shortcuts are possible, this remains a calculation much slower than 
one step of a recursion. An important reason why these methods based on the 
solving of functional equations are feasible is that there is a natural 
variational formulation of the problem. 

It is well known that numerically, variational methods --- if the variation 
principle is reasonable --- are more reliable and faster than direct 
solution of the variational equations. 
Moreover, expansions give you information for several values of the 
parameter.

On the other hand perturbation expansions in powers of the parameter have 
several shortcomings. First notice that power series expansions always converge 
in circles and diverge outside of them. So that if the domain of analyticity 
of the function we are studying is not a circle, there are values 
of the parameter for which the result is true but which cannot be studied 
by the perturbation expansion. It could very well happen that some of 
these inaccessible values are real --- i.e., physical --- because there are 
some unphysical singularities for complex values very close to the origin. 

Also, for many of the perturbation expansions of periodic orbits the 
coefficients differ greatly in size and involve large amounts of 
cancellations. Especially close to the domain of convergence, evaluation 
may be quite unstable. 

The methods of successive changes of variables used frequently in the 
mathematical literature are not very convenient for numerical implementations. 
The main reason is that the difficulty of discretizing a function increases 
very rapidly with the number of variables. Since the tori are half the 
dimension of phase space, the embedding of the torus in the phase space 
involves many less variables than studying functions in the phase space. 

>From the point of view of numerical applications, one of the most 
successful methods to compute invariant tori has been its approximation 
by periodic orbits. This indirect method,
even if it has very good empirical 
justification \cite{45} and can be justified from
the point of view of
renormalization \cite{46} is hard to justify 
mathematically (see \cite{47}, \cite{48} for some partial results). 

Recently, there has been considerable progress in the study of the validity 
of numerical computations of K.A.M.\ tori. 

At the root is the use of interval arithmetic and its extension to function 
spaces \cite{49}. The basic idea of interval arithmetic is very simple. 
Rather than represent a real quantity by a machine number that we hope 
approximates its value, we represent it by two machine numbers that are 
upper and lower bounds for its value. 

One can write operations among pairs of representable numbers that bound 
the usual arithmetic operations. That is, the sum of two intervals is another 
interval that is guaranteed to contain the possible values of the sums 
of two numbers, whenever these two numbers belong to the summand intervals. 
(If it is impossible to find a pair of representable numbers with this 
property, the operation should just raise an exception.) 

Then, given an algebraic expression, by translating these algebraic 
operations into calls to the corresponding interval routines one obtains a 
routine that produces rigorous bounds for the values of algebraic 
expression given bounds on the variables. 

Similarly, for all the functions for which one knows rational approximations 
and explicit bounds on the error, one can also obtain routines that bound 
them by enlarging by a convenient amount the result returned by the routines 
that bound the polynomial approximation. 

A similar circle of ideas can be extended to function spaces. We need to 
identify families  of sets that can be finitely specified and for 
which it is nevertheless easy to implement operations that bound the usual 
operations of analysis. 

There are many such families of sets that one could consider. One, 
introduced in \cite{50} that has been found to be particularly useful in 
the type of questions discussed in these lectures  is  the family of sets:

$$\eqalign{ B_{I_{-N}\cdots I_N;\varep}  = &\Bigl\{ f:T^1\to 
\complex \ \big|\ \ f= f_p + f_e\cr 
&f_p (\theta)  = \sum_{k=-N}^N \hat f_{p,k} e^{2\pi ik\theta}\ ,\ 
\hat f_{p,k} \in I_i\hfill\cr 
&\hfill f_e (\theta) = \sum_{k=-\infty}^\infty 
\hat f_{e,k} e^{2\pi ik\theta}\ ,\ 
\sum |\hat f_{e,k}| e^{-\delta |k|} \le \varep\Bigr\}\hfill\cr}$$ 

Roughly speaking, we are fixing the main Fourier coefficients 
to lie in some intervals but 
allowing some error. 

It is quite possible to bound many of the operations of analysis --- including, 
of course, the elementary arithmetic ones --- in this class of functions. 
Roughly, one performs the truncated operations in the Fourier coefficients 
and then, estimates the errors incurred either because of the uncertainties 
in the original data or because  of the truncation process. 

Notice that the norm in this particular example satisfies the Banach 
algebra property $\|fg\| \le \|f\|\, \|g\|$. Another important property of such 
discretizations is that they contain information about all derivatives and 
allow for reliable evaluation of them. 

There are many variations that improve several practical considerations. 
For example, one can keep several error terms, some of which enjoy special 
properties (e.g., having only coefficients of high order). 
Or, one can keep more detailed information about the error terms than that 
afforded by just one norm. For example, keeping  estimates both 
$\sum |\hat f_k| e^{\delta k}$ and 
$\sum |\hat f_k|\, |k| e^{\delta k}$ 
leads to a more efficient proof of the K.A.M.\ theorem (see \cite{51}) 
and hence, it is a good idea to incorporate it in the algorithms to compute
how good teh approximate solutions are.

There are other discretizations of spaces of functions in the literature. 
For example \cite{52} discusses in detail discretizations based on pointwise 
bounds of the functions. These methods are much more efficient than the 
one above in some problems such as integral equations. They are not so 
useful when one requires derivatives. 

Once one has a powerful enough interval arithmetic package it is possible 
to show that a specific trigonometric polynomial is an approximate 
solution of (1). One can verify that the right and left hand sides differ 
by a small amount and one can also obtain estimates of the properties 
of an analytic extension. 

It is possible to prove a theorem --- more or less along the same lines 
as the one discussed in the text --- that shows that if a function satisfies 
(1) with an error sufficiently small with respect to other analyticity of 
properties, then a Newton-like method started on it will converge to a 
true solution. 

Hence, one can verify that there is a true solution of (1) and that it is 
close to our initial guess. 

Notice that the way that the solution is produced is irrelevant for the 
verification that a Newton method started on it converges. In practice, 
the solution is produced using a numerical method that may include as many 
shortcuts as are considered expedient. The verification step, guarantees 
that this is a solution. 

The relevant theorems and implementation for some cases is described in 
\cite{Ra}, \cite{53} where it is shown that this way of proceeding --- given 
enough time --- can explore as much as desired of the region of validity 
of the K.A.M.\ theorem. The actual implementation was run in some concrete 
problems and got verified the results for some values of $\varep$ bigger 
than 95\% of values for which it is known that the conclusions of the 
theorem are false. 

Another different method to take advantage of interval arithmetic is considered 
in \cite{54}. There, interval arithmetic is used to obtain bounds of each 
of the coefficients in the perturbative expansions and a theorem is proved 
stating that certain properties of the perturbative expansion imply lower 
bounds for the radius of convergence. 

As mentioned before, there are reasons why power series expansions cannot 
cover the whole domain of analyticity. Nevertheless, they provide information 
for whole ranges of parameters, which may be preferable in some practical 
situations  where the relevant values of parameters are covered. 

One should also point out that the use of interval arithmetic prevents 
the use of the cancellations that provide the convergence of this series so 
that one should expect that the method reaches smaller values than the 
true ones. 

One improvement of the method in which the coefficients of the power series 
expansions are computed without using interval arithmetic is \cite{55}. 

The validity of the K.A.M.\ theorem can be delineated with the help of 
counterexamples to the conclusions. For twist maps (two dimensional) there are 
several counterexamples to the differentiability and irrationality properties 
\cite{56}, \cite{57}, \cite{58}. 
For the numerical values allowed in the perturbation 
\cite{59}, \cite{60}, \cite{61}. 
The latter two  involve computer assisted calculations. 
The situation in higher dimensions is more confusing since tori can be 
notably more complicated \cite{62}. 
Computer assisted counterexamples to several possible statements 
for sufficiently large values of the parameter can be found in \cite{Ma}. 

For the model problem of iteration of analytic functions near a fixed point 
with derivative of modulus~1, a very satisfactory description of the 
boundaries of validity exists \cite{39}, \cite{40}.  


\frenchspacing 
\section{References}

\item{1.} V. A. Arnold  and A. Avez, 
{\it Ergodic Problems of Classical Mechanics}, 
(Benjamin, Reading, 1968). 

\item{2.} R. Abraham and J. Marsden, 
{\it Foundations of Mechanics}, 
(Addison-Wesley, New York, 1978). 

\item{3.} V. A. Arnold, 
Proof of A. N. Kolmogorov's theorem on the preservation of quasi-periodic 
motions under small perturbations of the hamiltonian, 
{\it Russ. Mat. Surv.}  {\bf18} (1965). 

\item{4.} V. A. Arnold, 
Small divisor problems in classical and celestial mechanics, 
{\it Russ. Mat. Surv.} {\bf18} (1965). 

\item{5.} V. A. Arnold, 
Instability of dynamical systems with several degrees of freedom, 
{\it Dok. Aad Nauk} {\bf156} (1964). 

\item{6.} A. Banyaga, 
Sur le structure du groupe de diffeomorphism qui preservent une 
forme symplectique, 
{\it Comm. Mat. Helv.} {\bf53} (1978). 

\item{7.} G. Benettin, L. Galgani and A. Giogilli, 
A proof of Nekhoroshev's theorem for the stability times in nearly 
integrable systems, 
preprint.  

\item{8.} N. Bogoliubov and A. Mitropolskii, 
{\it Asymptotic Methods in the Theory of Non-Linear Oscillations}, 
(Gordon and Breach, New York, 1961). 

\item{9.} A. D. Brjuno, 
Analytic form of differential equations,  
I. {\it Trans. Moscow Math. Soc.} {\bf25} (1971); 
II. {\it Trans. Moscow Math. Soc.} {\bf26} (1972). 

\item{10.} L. Chierchia and G. Gallavotti, 
Smooth prime integrals for quasi-integrable Hamiltonian systems, 
{\it Nuovo Cimento} {\bf67B} (1982). 

\item{11.} J. Dieudonne, 
{\it Foundations of Modern Analysis}, 
(Academic Press, New York, 1960). 

\item{12.} E. Dynkin, 
The structure of semi-simple Lie algebras, 
{\it Trans. A.M.S.} Ser.1 {\bf9} (1950). 

\item{13.} D. B. A. Epstein,  
The simplicity of certain groups of homeomorphisms, 
{\it Compositio Matematica} {\bf22} (1970). 

\item{14.} G. Gallavotti, 
in {\it Scaling and Self-Similarity in Physics}, 
ed. J.~Fr\"ohlich\hfill\break 
(Birkhauser, Boston, 1983).  

\item{15.} G. Gallavotti, 
in {\it Regular and Chaotic Motions in Dynamic Systems}, \hfill\break
eds. G.~Velo, A.~S.~Wightan 
(Plenum, New York, 1985). 

\item{16.} G. Gallavotti, 
A criterion of integrability for perturbed non-resonant harmonic 
oscillators. ``Wick ordering'' of the perturbations in classical 
mechanics and invariance of the frequency spectrum, 
{\it Comm. Math. Phys.} {\bf87} (1982). 

\item{17.} G. Gallavotti, 
Renormalization theory and ultraviolet stability for scalar fields 
via renormalization group methods, 
{\it Rev. Mod. Phys.} {\bf57} (1985). 

\item{18.} R. Graff, 
Elements of non-linear functional analysis, 
{\it Memoirs A.M.S.} {\bf206} (1978). 

\item{19.} D. Hamilton, 
The implicit function theorems of Nash and Moser, 
{\it Bull. A.M.S.} {\bf7} (1982). 

\item{20.} M. Herman, 
Sur les diffeomorphisms du tore, 
{\it Ann. Inst. Fourier} {\bf23} (1974). 

\item{21.} R. de la Llave, J. M. Marco and R. Moriyon, 
Canonical perturbation theory for Anosov diffeomorphisms and 
regularity results for Livsic cohomology equation, 
{\it Ann. of Math.} in print. 

\item{22.} J. Moser, 
{\it Stable and Random Motions in Dynamical Systems With 
Special Emphasis on Celestial Mechanics} 
(Princeton Univ. Press, Princeton, 1973). 

\item{23.} J. Mather, 
Commutators of diffeomorphisms, I. 
{\it Comm. Math. Helv.} {\bf49} (1974). 

\item{24.} J. Mather, 
Commutators of diffeomorphisms, II. 
{\it Comm. Math. Helv.} {\bf50} (1975). 

\item{25.} J. Mather, 
Commutators of diffeomorphisms, III. 
{\it Comm. Math. Helv.} {\bf60} (1985). 

\item{26.} J. Mather, 
Stability of $C^\infty$ mappings, II, Infinitesimal stability implies 
stability, 
{\it Ann. of Math.} {\bf89} (1969). 

\item{27.} N. N. Nekhoroshev, 
An exponential estimate of the time of stability of nearly 
integrable systems, 
{\it Russ. Math. Surv.} {\bf32} (1977). 

\item{28.} H. Poincar\'e, 
{\it Les Methodes Nouvelles de la Mecanique Celeste} 
(Gauthier Villars, Paris, 1892). 

\item{29.} J. Poschel, 
Integrability of Hamiltonian systems on Cantor sets, 
{\it Comm. Pure and Appl. Math.} {\bf35} (1982). 

\item{30.} R. D. Ritchmyer, 
{\it Principles of Advanced Mathematical Physics, II} 
(Springer, New York, 1981). 

\item{31.} W. Rudin, 
{\it Functional Analysis} 
(McGraw-Hill, New York, 1973). 

\item{32.} H. R\"usmann, 
On the one dimensional Schr\"odinger equation with 
a quasiperiodic potential, 
{\it Ann. New York Acad. Sci.} {\bf357} (1980). 

\item{33.} Y. G. Sinai, 
{\it Introduction to Ergodic Theory}
(Princeton Univ. Press, Princeton, 1976). 

\item{34.} S. Sternberg, 
Infinite Lie groups and the formal aspects of mechanics, 
{\it Jour. Math. Mech.} {\bf10} (1961). 

\item{35.} W. Thirring, 
{\it A Course in Mathematical Physics, Vol. 1, 
Classical Dynamical Systems} 
(Springer Verlag, New York, 1978). 

\item{36.} A. Weinstein, 
Lectures on symplectic manifolds, 
{\it C.B.M.S. Conf. Series A.M.S.} {\bf29} (1977). 

\item{37.} A. Wintner, 
{\it The Analytical Foundations of Celestial Mechanics} 
(Princeton Univ. Press, Princeton, 1941). 

\item{38.} E. Zehnder, 
Generalized implicit function theorems with applications to 
some small divisor problems, 
I. {\it Comm. Pure and Appl. Math.} {\bf28} (1975); 
II. {\it Comm. Pure and Appl. Math.} {\bf29} (1975). 

\item{39.} J.~C. Yoccoz,
Theoreme de Siegel, polynomes quadratiques et nombres de Brjuno,
preprint (1988).

\item{40.} R. Perez-Marco
Sur la structure des germes holomorphes non-linearizables,
{\it C. R. Acad. Sci. Paris} {\bf 312} (1991).

\item{41.} J. Moser
Is the solar system stable?,
{\it Math Intelligencer} {\bf 1} (1978).

\item{42.} W. H. Pressler, R. Broucke,
Computerized formal solutions of dynamica systems 
with two degrees of freedom and an application to the Contopoulos
Potential,
I,II {\it Comp. \& Maths. with Appl. } {\bf 7} (1981).


\item{43.} S. Coffey, A. Deprit, E. Deprit, L. Healy, B. Miller
A toolbox for non-linear dynamics.
{|it Computer aided Proof in Analysis, K.R. Meyer, D. S. Schmidt ed.}
Srpinger (1991).

\item{44.} D. Braess, E. Zehnder,
On the numerical treatment of a small divisor problem,
{\it Numer. Math. } (1982).

\item{44.} A.J. Dragt, J. Finn
Lie Series and invariant functions for analytic
symplectic maps 
{\it Jour. Math. Phys.} {\bf 17} (1976).

\item{45.} J. Greene
A method for determining a stochastic transition
{\it J. Math. Phys.}{\bf 20} (1979).

\item{46.} R. S. MkKay
Renormalization in area preserving maps.
{\it Princeton U. thesis} (1982).

\item{47.} C. Falcolini, R. de la Llave
A rigorous partial justification of Greene's criterion
{\it Jour. Stat. Phys} {\bf 67} (1992).

\item{48.} R. S. McKay
On Greene's residue criterion
{\it Nonlinearity} {\bf 5} (1992).


\item{49.} R. Moore
{\it Methods and applications of interval analysis}
Siam, Philadelphia (1979).

\item{50.} O. E. Lanford
Computer assisted proofs in analysis
{\it Physica} {\bf 124A} (1984).

\item{51.} D. Rana
Proof of accurate upper and lower bounds to stability domains in small denominator problems
{\it Princeton thesis} (1987).


\item{52.} E. W. Kaucher,  W. Miranker
Self-Validating numerics for function space problems
{\it Academic Press} (1984).

\item{53.} R. de la Llave, D. Rana
Accurate stategies for K.~A.~M. bounds and their implementation.
{\it In Computer Aided Proofs in Analysis, K. Meyer, D. Schmidt eds.}
{\it Springer} (1991).

\item{54.} A. Celletti, L. Chierchia
Construction of analytic K.~A.~M. surfaces and effective stability bounds
{\it Comm. Math. Phys.} {\bf 118} (1988).

\item{55.} A. Celletti, L. Chierchia
Invariant curves for area-preserving twist maps far from integrable
{\it Jour. Stat. Phys.} {\bf 65} (1991).


\item{56.} J. Mather
Destruction of invariant circles
{\it Erg. Theo. \& Dyn. Syst.} {\bf 8} (1998).

\item{57.} M. R. Herman
Sur les courbes invarinates par les diff\'eomor\-phi\-smes de l'ann\-eau vol 2
{\it Asterisque} {\bf 144} (1986).

\item{58.} G. Forni
Analytic destruction of invariant circles
{\it Princeton Univ. Preprint} (1992).

\item{59.} J. Mather
Non-existence of invarinat circles,
{\it Erg. Theo. \& Dyn. Syst.} {\bf 4} (1984).


\item{60.} R. S. McKay, I. Percival
Converse K.~A.~M.: theory and practice
{\it Comm. Math. Phys.} {\bf 98} (1985)

\item{61.} I. Jungreis
A method for proving that monotone twist maps have no invariant circles
{\it Erg. Theo. \& Dyn. Syst.} {\bf 11} (1991).


\item{62.} M. R. Herman
Dynamics connected with indefinite torsion
{\it Ecole Polytechnique preprint} (1991).

\item{63.} M. Muldoon
Ghosts of order on the frontier of chaos
{\it Caltech thesis} (1988).
