%INSTRUCTIONS:
%This paper needs to be tex'ed twice.
%The first time it generates an auxiliary called d.aux
%which contains references etc.
%ENDINSTRUCTIONS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\magnification=\magstep1\hoffset=0.cm
\voffset=-0.5truecm\hsize=16.5truecm\vsize=24.truecm
\baselineskip=14pt plus0.1pt minus0.1pt \parindent=12pt
\lineskip=4pt\lineskiplimit=0.1pt      \parskip=0.1pt plus1pt
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%%%%%GRECO%%%%%%%%%
%
\let\a=\alpha \let\b=\beta  \let\g=\gamma    \let\d=\delta \let\e=\varepsilon
\let\z=\zeta  \let\h=\eta   \let\th=\vartheta\let\k=\kappa \let\l=\lambda
\let\m=\mu    \let\n=\nu    \let\x=\xi       \let\p=\pi    \let\r=\rho
\let\s=\sigma \let\t=\tau   \let\iu=\upsilon \let\f=\varphi\let\c=\chi
\let\ps=\psi  \let\o=\omega \let\y=\upsilon
\let\G=\Gamma \let\D=\Delta  \let\Th=\Theta  \let\L=\Lambda\let\X=\Xi
\let\P=\Pi    \let\Si=\Sigma \let\F=\Phi     \let\Ps=\Psi  \let\O=\Omega
\let\U=\Upsilon
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\global\newcount\numsec\global\newcount\numfor
\gdef\profonditastruttura{\dp\strutbox}
\def\senondefinito#1{\expandafter\ifx\csname#1\endcsname\relax}
\def\SIA #1,#2,#3 {\senondefinito{#1#2}
\expandafter\xdef\csname #1#2\endcsname{#3} \else
\write16{???? ma #1,#2 e' gia' stato definito !!!!} \fi}
\def\etichetta(#1){(\veroparagrafo.\veraformula)
\SIA e,#1,(\veroparagrafo.\veraformula)
 \global\advance\numfor by 1
 \write15{\string\FU (#1){\equ(#1)}}
 \write16{ EQ \equ(#1) == #1  }}
\def \FU(#1)#2{\SIA fu,#1,#2 }
\def\etichettaa(#1){(A\veroparagrafo.\veraformula)
 \SIA e,#1,(A\veroparagrafo.\veraformula)
 \global\advance\numfor by 1
 \write15{\string\FU (#1){\equ(#1)}}
 \write16{ EQ \equ(#1) == #1  }}
\def\BOZZA{\def\alato(##1){
 {\vtop to \profonditastruttura{\baselineskip
 \profonditastruttura\vss
 \rlap{\kern-\hsize\kern-1.2truecm{$\scriptstyle##1$}}}}}}
\def\alato(#1){}
\def\veroparagrafo{\number\numsec}\def\veraformula{\number\numfor}
\def\Eq(#1){\eqno{\etichetta(#1)\alato(#1)}}
\def\eq(#1){\etichetta(#1)\alato(#1)}
\def\Eqa(#1){\eqno{\etichettaa(#1)\alato(#1)}}
\def\eqa(#1){\etichettaa(#1)\alato(#1)}
\def\eqv(#1){\senondefinito{fu#1}$\clubsuit$#1\write16{No translation for #1}%
\else\csname fu#1\endcsname\fi}
%\def\eqv(#1){\senondefinito{fu#1}$\clubsuit$#1\else\csname fu#1\endcsname\fi}
\def\equ(#1){\senondefinito{e#1}\eqv(#1)\else\csname e#1\endcsname\fi}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\let\EQS=\Eq\let\EQ=\Eq
\let\eqs=\eq
\let\Eqas=\Eqa
\let\eqas=\eqa
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\def\V#1{\vec#1}\let\dpr=\partial\let\ciao=\bye
\let\io=\infty\let\i=\infty
\let\ii=\int\let\ig=\int
\def\media#1{\langle{#1}\rangle}
\def\guida{\leaders\hbox to 1em{\hss.\hss}\hfill}
\def\tende#1{\vtop{\ialign{##\crcr\rightarrowfill\crcr
              \noalign{\kern-1pt\nointerlineskip}
              \hglue3.pt${\scriptstyle #1}$\hglue3.pt\crcr}}}
\def\otto{{\kern-1.truept\leftarrow\kern-5.truept\to\kern-1.truept}}
\def\pagina{\vfill\eject}\def\acapo{\hfill\break}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%LATINORUM
\def\etc{\hbox{\it etc}}\def\eg{\hbox{\it e.g.\ }}
\def\ap{\hbox{\it a priori\ }}\def\aps{\hbox{\it a posteriori\ }}
\def\ie{\hbox{\it i.e.\ }}
\def\fiat{{}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%DEFINIZIONI LOCALI
\def\AA{{\V A}}\def\aa{{\V\a}}\def\bv{{\V\b}}\def\dd{{\V\d}}
\def\ff{{\V\f}}\def\nn{{\V\n}}\def\oo{{\V\o}}
\def\tt{{\V\th}}\def\zz{{\V z}}\def\FF{{\V F}}\def\xx{{\V x}}
\def\yy{{\V y}} \def\q{{q_0/2}}\let\lis=\overline\def\Dpr{{\V\dpr}}
\def\mm{{\V m}}
\def\ff{{\V\f}}\def\zz{{\V z}}\def\mb{{\bar\m}}
\def\UU{{\cal U}}\def\BB{{\cal B}}\def\bB{{\V\b}}
\def\DD{{\cal D}}\def\CC{{\cal C}}\def\II{{\cal I}}
\def\EE{{\cal E}}\def\MM{{\cal M}}\def\LL{{\cal L}}
\def\Sol{{\cal S}}\def\TT{{\cal T}}\def\RR{{\cal R}}
\def\WI{{W_+}}\def\WS{{W_-}}\def\sign{{\rm sign\,}}
\def\BAK{{{{\lis A}^2\over\lis K}}}
\def\thb{{{\bar \th}}}\def\fb{{{\bar \f}}}\def\psb{{{\bar \ps}}}
\def\bak{{\bar A^2\over\bar K}}
\def\={{ \; \equiv \; }}\def\su{{\uparrow}}\def\giu{{\downarrow}}
\def\mb{{\bar\m}}
\def\kb{{\bar\k}}\def\rb{{\bar\r}}\def\xb{{\bar\x}}
\def\cb{{\bar c}}\def\mb{{\bar\m}}
\def\xc{{\hat\x}}
\def\ct{{\tilde \CC}}
\def\rt{{\tilde\r}}\def\mt{{\tilde\m}}\def\kt{{\tilde\k}}
\let\ch=\chi
\def\PP{{\cal P}}
\def\bb{{\V\b}}
\def\Im{{\rm\,Im\,}}\def\Re{{\rm\,Re\,}}
\def\nn{{\V\n}}\def\lis#1{{\overline #1}}\def\q{{{q_0/2}}}
\def\atan{{\,\rm arctg\,}} \def\0{{\V0}}\def\pps{{\V\ps{\,}}}
\def\bul{{l\bar u\e^{-2}}} \def\hB{{\hat B}}
%%%%%%%%%%%%%%%%%%
\openin14=d.aux \ifeof14 \relax \else
\input d.aux \fi
\openout15=d.aux
%%%%%%%%%%%%%%%%%%%%%%
\vskip0.pt\def\ap{{\it a priori\ }}
\vglue1.truecm
{\centerline {\bf DRIFT AND DIFFUSION IN PHASE SPACE} }
\vskip.5truecm\numsec=1\numfor=1

\centerline{
Luigi Chierchia\footnote{${}^1$}{Dip. di Matematica, $II^a$
Universit\`a di Roma, ``Tor Vergata",  via della Ricerca Scientifica,
00133 Roma, Italia},
Giovanni Gallavotti\footnote{${}^2$}{Dip. di Fisica,
Universit\`a di Roma, ``La Sapienza", P. Moro 5, 00185 Roma, Italia}
}
\vskip1.truecm

\noindent
{\bf Abstract:}
{\it The problem of stability of the action variables (\ie of the adiabatic
invariants) in perturbations
of completely integrable (real analytic) hamiltonian systems with more than
two degrees of freedom is considered. Extending the analysis of {\rm [A]},
we work out a general quantitative theory, from the point of view of
{\sl dimensional analysis}, for {\sl a priori unstable systems}
(\ie systems for which
the unperturbed integrable part possesses separatrices), proving, in
general, the existence of the so--called Arnold's diffusion
and establishing upper bounds on the time needed for
the perturbed action variables to {\sl drift} by an amount of $O(1)$.

\noindent
The above theory can be extended so as to cover cases of {\sl a priori
stable systems} (\ie systems for which separatrices are generated
near the resonances by the perturbation).
As an example we consider the ``D'Alembert precession problem
in Celestial Mechanics"
(a planet modelled by a rigid rotational ellipsoid with small
``flatness" $\h$, revolving on a given Keplerian orbit of eccentricity
$e=\h^c$, $c>1$, around a fixed star and subject only to Newtonian
gravitational forces) proving in such a case the existence of
Arnold's drift and diffusion; this means that
there exist initial data for which, for any $\h\neq 0$ small enough,
the planet changes, in due ($\h$--dependent) time, the inclination of the
precession cone by an amount of $O(1)$. The homo/heteroclinic angles
(introduced in general and discussed in detail together with homoclinic
splittings  and scatterings) in the D'Alembert problem are not
exponentially small with $\h$ (in spite of first order predictions based
upon Melnikov type  integrals).
}

\vskip1.truecm
\noindent {\bf Key words:}
{\it perturbed hamiltonian systems, stability theory, Arnold's diffusion,
homoclinic splitting, heteroclinic trajectories, KAM theory, whiskered
tori, dimensional estimates, Celestial Mechanics, D'Alembert Equinox
Precession problem.
}

\vfill\eject

\vglue2.truecm

\noindent{\bf CONTENTS} \vskip.5truecm

\line{\sl  {\bf\S1\phantom{2}}
\qquad Introduction and description of the results
\quad\dotfill\quad 3}
\line{\sl  {\bf\S2\phantom{2}}
\qquad A priori unstable systems. Regularity assumptions
\quad\dotfill\quad 8}
\line{\sl  {\bf\S3\phantom{2}}
\qquad The free system. Diffusion paths and whisker ladders
\quad\dotfill\quad 11}
\line{\sl  {\bf\S4\phantom{2}}
\qquad Motion on the separatrices. Melnikov integrals
\quad\dotfill\quad 14}
\line{\sl  {\bf\S5\phantom{2}}
\qquad Existence of ladders of whiskers
\quad\dotfill\quad 17}
\line{\sl  {\bf\S6\phantom{2}}
\qquad Large whiskers. Homoclinic points and their angles
\quad\dotfill\quad 36}
\line{\sl  {\bf\S7\phantom{2}}
\qquad Whisker ladders and rounds density
\quad\dotfill\quad 49}
\line{\sl  {\bf\S8\phantom{2}}
\qquad Heteroclinic intersections. Drift and diffusion along
directly open paths
\quad\dotfill\quad 52}
\line{\sl  {\bf\S9\phantom{2}}
\qquad A class of exactly soluble homoclines
\quad\dotfill\quad 60}
\line{\sl  {\bf\S10}
\qquad Homoclinic scattering. Large separatrix splitting
\quad\dotfill\quad 63}
\line{\sl  {\bf\S11}
\qquad Variable coefficients. Fast mode averaging
\quad\dotfill\quad 72}
\line{\sl  {\bf\S12}
\qquad Planetary precession. Existence of drift and diffusion
\quad\dotfill\quad 79}

\vskip.5truecm
\noindent{\bf Appendices:}
\vskip.3truecm

\line{\sl  {\bf\S A1\phantom{2}}
\qquad Resonances: Nekhorossev theorem
\quad\dotfill\quad 89}
\line{\sl  {\bf\S A2\phantom{2}}
\qquad Diffusion paths  and diophantine conditions
\quad\dotfill\quad 90}
\line{\sl  {\bf\S A3\phantom{2}}
\qquad Normal hyperbolic coordinates for a pendulum
\quad\dotfill\quad 92}
\line{\sl  {\bf\S A4\phantom{2}}
\qquad Diffusion sheets. Relative size of the time scales
\quad\dotfill\quad 99}
\line{\sl  {\bf\S A5\phantom{2}}
\qquad Divisor bounds
\quad\dotfill\quad 100}
\line{\sl  {\bf\S A6\phantom{2}}
\qquad The equinox precession
\quad\dotfill\quad 101}
\line{\sl  {\bf\S A7\phantom{2}}
\qquad Application to the Earth precession
\quad\dotfill\quad 101}
\line{\sl  {\bf\S A8\phantom{2}}
\qquad Trigonometry of the Andoyer-Deprit angles
\quad\dotfill\quad 103}
\line{\sl  {\bf\S A9\phantom{2}}
\qquad Determinants, wronskians, Jacobi's map
\quad\dotfill\quad 103}
\line{\sl  {\bf\S A10}
\qquad High order perturbation theory and averaging
\quad\dotfill\quad 111}
\line{\sl  {\bf\S A11}
\qquad Scattering phase shifts and intrinsic angles
\quad\dotfill\quad 118}
\line{\sl  {\bf\S A12}
\qquad Compatibility. Homoclinic identities
\quad\dotfill\quad 120}
\line{\sl  {\bf\S A13}
\qquad Second (and third) order whiskers and phase shifts
\quad\dotfill\quad 123}
\line{\sl  {\bf\S A14}
\qquad Development of the perturbatrix
\quad\dotfill\quad 131}
\vskip.3truecm

\line{\sl  \phantom{{\bf\S A14}}
\qquad References \quad\dotfill\quad 134}

\vfill\eject

{\bf\S1 Introduction and description of the results}
\vskip1.truecm\numsec=1\numfor=1
A typical question about diffusion in phase space is the following:
could the Earth axis tilt? To put the question in mathematical form we
consider a model for the Earth precession, well known since D' Alembert,
[L].

Let a planet $\EE$ be a homogenous rigid body with rotational symmetry
about its $N$-$S$ axis and with polar and equatorial inertia moments
$J_3,J_1$: hence with {\it mechanical} polar {\it flattening}
$\h=(J_3-J_1)/J_3$, which is supposed to be small. Let the planet move on a
keplerian orbit $t\to\V r_T(t)$, with eccentricity $e$, about a fixed
heavenly body $\Sol$ with mass $m_\Sol$; also $e$ will supposed to be small
and in fact we shall assume that {\it the eccentricity and the flattening
coefficient are related by a power law}: $e=\h^c$ for some positive
constant $c$.  Wishing to be closer to reality
one could also assume that $\EE$ had a satellite $\MM$: what follows could
be adapted to this stranger situation (in the case of the $\EE$arth this is
particularly relevant as the $\MM$oon accounts for $2/3$ of the lunisolar
precession).  But here, as it will be far too clear below, we are
addressing a purely conceptual question and we have no pretension that our
results apply directly to the solar system or subsystems thereof.

Regarding the flattening $\h$ (and hence the eccentricity $e$)
as a (non vanishing)
parameter, we consider initial conditions close to those in which the planet
is rotating around its symmetry axis, at a daily angular velocity $\o_D$,
and precessing around the normal to the orbit, at an angular velocity
denoted $\o_p\equiv-\h\o$, on a cone with inclination $i$.  And we ask
whether, {\it no matter how small the flattening coefficient $\h$ may be}
(below some $\h_0$), there is an
initial condition such that, after due time, one can find the planet
precessing on a cone with inclination $i'\ne i$, with $i,i'$ fixed {\it a
priori, independent on} $\h$.  Such a phenomenon will be called {\it
drift} in phase space.

We have not worried, above, about fine points like the distinction between
the symmetry axis of $\EE$ and the angular momentum or the angular velocity
axes: such a distinction is not a minor one and is of course relevant to a
rigorous analysis of the problem which we defer to \S12.

Closely related to the drift in phase space is the {\it diffusion}: we
shall see that the same mechanism that we discuss to show the existence of
drift also shows the existence of orbits along which the inclination does
not increase monotonically (in average) from $i$ to $i'$ but rather it
evolves, on a suitably large scale of time, so as to either increase or
decrease the inclination by an amount $O(\e)$ according to a prefixed
pattern at least for a number of time steps of order $\gg O(\e^{-1})$, for
some $\e$ small compared to $\h$.  If one chooses the initial datum
randomly and with equal distribution among the initial data of the above
orbits, one will see the inclination change as a brownian motion, at least
as long as it takes to reach the target value $i'$ (or its symmetric value
with respect to $i$).

This work is a generalization of the well known example
given by Arnold, [A]. The basic feature of Arnold's example was that the
drift took place around invariant tori of dimension $l-1$ if $l$ is the
number of degrees of freedom of the system and that the system considered had a
very special form: the tori around which the diffusion took place
were explicit exact solutions of the equations of motion.  This is a
property which does not hold in general and a fraction of the work in this
paper is devoted to a detailed construction of the tori and of the flow
around them (an analysis started in [M]). Furthermore the
instability of the tori is also explicit in the model in [A].  The general
system, however, will be such that most of the tori will have dimension $l$
and the unstable tori arise near resonances.

Some details of the mechanism generating unstable tori of dimension $l-1$
along which diffusion takes place may be quite involved, in general.

The point of view of this work has been to see if, starting with the ideas
in the well known example of Arnold, one could develop the theory to a
point to make it applicable to the above celestial problem (for which the
invariant tori arise only near resonances).  We felt that
such a precise goal, if pursued without further simplifying hypotheses,
would provide a natural selection of possible assumptions
(which could, otherwise, appear as {\it ad hoc} to the reader).

To achieve such a goal several intermediate problems had to be solved.

1) In \S2 we define precisely a class of systems that we study: it is a
system of $l-1$ rotators coupled to a pendulum. Arnold's example is in
this class, but not so the D' Alembert model for the Earth precession.
The simplifying aspect of the systems in such a class is that it is obvious from
their definition that they are unstable (the instability simply occurs near
the pendulum separatix): thus we call them {\it \ap unstable}. A
detailed theory of such systems is necessary to attack the far harder \ap
{\it stable} systems (defined below).

2) In \S3 we point out the main (easy) properties of the
uncoupled ({\it free}) systems of a pendulum and several rotators.

3) In \S4 we introduce the key notion of {\it diffusion path}: it is a
curve in the rotator action space, along which the free rotators angular
velocities form a vector with suitable diophantine properties. It will play
the role of marking the projection in action space of a drifting or
diffusing motion.

4) In \S5 we prove that the points of the diffusion curves can be
interpreted as $l-1$ dimensional invariant tori: most of them persist after
the perturbation (\ie the coupling between the pendulum and the rotators)
is switched on. The stability of low dimensional tori has been studied in
the literature by various authors: we present it from scratch because we
need very detailed bounds and analyticity properties of the perturbed tori
equations and a simple {\it normal form} for the motion of a large
class ($l+1$ dimensional) of nearby points. The bounds must be general and
at the same time simple enough to be applicable
to the harder cases that we analyze later (like the D' Alembert model).
Hence we need results stated in terms of the few really important features
of the hamiltonian. We therefore proceed by identifying the relevant
parameters (basically ratios of the independent time scales that govern the
motions) and produce a proof in which the only ingredient is the use of
the Cauchy theorem to bound the derivative of a holomorphic function by the
ratio between the maximum modulus, in the considered analyticity domain, and
the distance to the boundary of the analyticity domain. We call, for
obvious reasons, such bounds {\it dimensional bounds}, see lemmata 1,1'
of \S 5. The normal coordinates that we describe are a generalization of the
celebrated Jacobi coordinates near the unstable equilibrium point of the
pendulum (see lemma 0 of \S 5, and appendix 9 for a description of the classical
Jacobi map).

5) In \S6 we develop the perturbation theory of the asymptotic
manifolds of the invariant tori constructed in \S5; following Arnold,
we call such manifolds {\it whiskers}.
The theory is discussed to arbitrary order of
perturbation theory: such a generality is necessary only if one has in
sight applications to {\it a priori} stable systems (such as
the celestial one of D' Alembert).
Such analysis requires establishing, for the purpose of a consistency check,
some remarkable {\it homoclinic identities}, established in appendix A12.
\hfill\break      %
For the models in the class of the \ap unstable systems the theory to
first order is sufficient and we deduce that the homoclinic angles (\ie
the angles between tangent vectors to the stable and to the unstable
whiskers) are, no wonder, described by a tensor (that we call the {\it
intersection tensor}) related to the Melnikov integrals, reproducing
results of Melnikov which are well known, [Me].

6) In \S7 we show that, given a diffusion path, if the perturbation has
suitable properties (expressed in terms of some explicit condition of
absence of low order resonant harmonics in the Fourier development of the
perturbation at path points) then the set of points along the path
representing invariant tori (for the full hamiltonian) is so dense that one
can find a sequence of them spaced by an amount far smaller than the size
of the homoclinic angles.

7) In \S8, using the normal form described in \S5 in a very essential way,
we show that in the assumptions of \S7 the diffusion path is {\it open for
diffusion} and show the existence of initial conditions which evolve in
time so that the projection of the motion in action space follows the
diffusion path. We also  find an {\it explicit} estimate of the time
needed by the drifting motions to reach the other extreme of the
diffusion path. The path is independent on the size $\m$ of the perturbation
and it is non trivial (\ie not a single point) if $l>3$
(no diffusion or drift are possible if $l=2$ by the KAM stability).

The time it takes is of the order $O(\exp-k\m^{-2})$: Arnold's example
is covered by the theorem, but our result is less general than Arnold's one
as it can be applied to diffusion paths which are segments of length of $O(1)$
but not arbitrarily placed on the action axis: this is the price that we
have to pay to get concrete bounds on the drift time (and not only a
finiteness result). We do not know if this restriction would also be
present by using Arnold's method (\ie whether Arnold's method could give,
in his example, actual constructive upper bounds on the diffusion time).

8) In \S9 we begin to worry about the fact that the above analysis does
not cover \ap unstable systems in which the pendulum Lyapunov exponent
(\ie in physical terms the gravity acceleration), that we call here
$\h$, is not fixed but it is linked to the perturbation size (usually
much smaller) that we call $\m$.  The reason is that in such cases the
first order of perturbation theory is ``degenerate" in the sense that it
predicts homoclinic splitting with some angles of size
$O(\m\exp-k\h^{-1/2})$, for some $k>0$.  This leads essentially to a
situation in which the first order perturbation theory is not
sufficient, even to establish the existence of the homoclinic
splitting, not to speak of the existence of drift: it is well known
that there are examples in which the situation does not improve by
going to higher order (see \eg [La2]).

In fact the problem is already quite hard in the case of a forced pendulum
(\ie $l=2$) and with the rotator being a clock model, perfectly
isochronous; this means that the rotator action $B$ appears in the form of
an additive term in the hamiltonian equal to $\o B$ and the rest of the
hamiltonian depends only on the pendulum coordinates $I,\f$ and on the
conjugate angle $\l$, ``position of the clock arm".  If the perturbation
size is supposed $\m=O(\h^d)$ for some $d>0$ the problem is non trivial (a
case reducible to the ones treated in \S6,\S7,\S8 would be if
$\m=O(\exp-{c\h^{-b})}$ with $b>1/2$: but this is, unfortunately, a case of
little interest in view of the expected size of $\m$ in the applications).

If $l>2$ the angles are in general rather hard to describe:
we find some rather implicit expressions for them, in general, but we can
make use of them in the one case with $l=3$ which motivated our work
(\ie the D' Alembert equinox precession model). Actually we point out an
ambiguity about what one defines to be the homoclinic angles of splitting
as there are at least two different interesting sets of coordinates that
can be considered. To relate them we introduce the concept of homoclinic
phase shift (a quite remarkable notion in itself: see item 13) below for
a qualitative description of it).

In general, in the cases with an exponentially small splitting to first
order, we {\it do
not} discuss a proof of the existence of a homoclinic point: although
the results that we have developed are probably sufficient for
constructing a proof.  The reason is not only to cut a little shorter
this paper but mainly because the theory is, nevertheless, not empty:
in fact we can apply it to a special but wide class of models for which
the homoclinic point problem is (well known to be) exactly soluble (in
the sense that one can show the existence, and locate exactly the
position, of the homoclinic point).  We call such class the {\it even
models}: as the property is based on a symmetry of such hamiltonians.
Many models of forced pendula fall in this class that we introduce and
treat, for completeness, in \S9.

9) In \S10 we discuss in more detail the notion of homoclinic phase
shift particularly in the case of even models with $l=3$, in which one
of the rotators is a clock and the other is ``slow", \ie its free
angular velocity is of order $\h$ while also the pendulum gravity
constant is of order $\h$.  The introduced formalism allows us to show
that the phenomenon of {\it large homoclinic splitting} takes place
even in presence of fast rotations, {\it as
long as there is at least one slow among them}: this property
holds only in systems with $l\ge3$ (and generically it does happen,
as we show) and in spite of first order (Melnikov type) computations
(which predict exponentially small splittings).
Some detailed calculations are performed in appendix A13
and they are interesting by themselves.

The existence of one fast rotation and other slow ones looks very
special but we show in \S12 that the D' Alembert precession model,
which is \ap stable, is reducible to such a case: this is due to the
extra degeneracies present in all celestial problems.

10) The actual application of the theory to even models with $l=3$, relevant
for the precession problem, requires some extra work performed in \S11 and
the technique is also an illustration of a rigorous application of the
usually qualitative {\it averaging methods}.

11) In \S12 we finally study the \ap stable D' Alembert precession
model.  The original D' Alembert model took the planet orbit to be
circular: in this case the model has $l=2$ and diffusion is not
possible.  Therefore we take the orbit to be keplerian with
eccentricity $e>0$; this leads to a large class of models obtained by
truncating the eccentricity series to order $k$; we study for
simplicity only the case $k=2$: the general case ($k$ arbitrary), does
not seem to offer more difficulties, except notational ones.  The work
having been organized in order to treat this case, the discussion is
rather simple.

We choose in our example as diffusion path a line which has the
physical interpretation of a $1:2$ resonance between the ``day" period
and the ``year" period, and is such that a motion along it has the
interpretation of changing the size of the angle between the ecliptic
and the angular momentum of the planet (``inclination").  We just have
to check that the model can be reduced, by a suitable change of
coordinates, to a $l=3$ system of a pendulum with small gravity of
order $\h$ forced by a fast clock and by a slow anisochronous rotator;
the perturbation parameter is the eccentricity $e$ of the orbit, which
we have to take small with $\h$, \eg $e=\h^c$, for some convenient
$c>0$.  The model is even, in the sense of \S9,\S10, and the theory of
\S9,\S11 fully applies at least to portions of $O(1)$ of the diffusion
path: for many of them we thus get the existence of drift (and
diffusion).

13) The notion of {\it homoclinic scattering} and {\it phase shifts}
arises naturally as a byproduct of the analysis performed to describe
the phase shifts occurring on the homoclinic motion and near it.
Calling $\aa$ the rotators angular coordinates and $\f$ the pendulum
angle suppose that at some arbitrarily fixed reference angle
$\f=\bar\f$ there is a homoclinic point at $\aa=\aa_0$.  Two points
starting at $t=0,\f=\bar\f$, one on the stable whisker and one on the
unstable whisker of some invariant torus with the same position
coordinates $\aa$, will evolve towards the invariant torus
(respectively forward and backward in time) so that their asymptotic
motion gives two points which move quasi periodically keeping a time
independent {\it phase} with respect to the homoclinic motion.  It will
be a function of the distance of the initial points to the homoclinic
point, \ie of $\aa$.  The difference $\V\s[\aa]$ between such phases
evaluated at $t=\pm\io$ will be the {\it phase shift}.  The
``scattering" will be the family of derivatives of $\V\s[\aa]$ at
$\aa=\aa_0$.  In other words we use the homoclinic point as a gauge to
fix the origin of the angles on the standard torus on which the quasi
periodic motion is linear and we look at the trajectory starting on the
unstable whisker at $t=-\io$ infinitesimally close to the invariant
torus and evolving into a point with $\f=\bar \f$ and some $\aa$ at
$t=0$, ``jump" on the stable whisker (keeping the values of $\aa,\f$),
and evolve towards the invariant torus again.  The trajectory will be
asymptotically lagging behind the homoclinic trajectory by an amount
$\pps$, say, at $t=-\io$ and by an amount $\pps+\V\s[\aa]$ at $t=+\io$.
The notion of $\V\s[\aa]$ is intrinsic as the coordinates on which the
motion on the torus appears as linear and which are ``close" to the
corresponding unperturbed ones are uniquely defined.

In presence of perturbations the phase shift is a non trivial function
of the distance to the homoclinic point.  We define analytically the
phase shifts in \S10 and briefly discuss them in \S10 and, appendix A11,
how they are related to the homoclinic splitting.

We present all details in a self contained way.  Some of the details are,
however, exposed in a series of appendices.  Some of the appendices also
contain classical results not so easy to find in the literature in the form
in which we need them. Some, (very few), of them are not really necessary
but they are reported because they clarify conceptual and historical
aspects of the problem (namely the statement of Nekhorossev theorem (\S
A1), the D' Alembert precession theory for the Earth (\S A6,\S A7), the
Jacobi map (\S A9)), the bounds on the homoclinic scattering (\S A11) and
they occupy a negligible amount of space.
%
\vskip1.truecm

\noindent{\bf Acknowledgments}: We are indebted to CNR-GNFM and to
the research funds of MPI for partial support. L.C. also acknowledges
support for a visit to Rutgers University under grant \# DMR 89--18903.
G.G. is grateful to J. Lebowitz for his encouragement and for his support
to this work which was partly developed at Rutgers University with the
support of the grants \# DMR 89--18903 during the years 1989 through 1991.

\vskip1.truecm
\vglue2.truecm

\penalty-200

{\bf\S2 A priori unstable systems. Regularity assumptions}

\penalty10000

\vskip0.5truecm\numsec=2\numfor=1

\penalty10000

Let $(\AA,\aa)$, $(I,\f)$ be canonical coordinates describing a
mechanical system with $l$ degrees of freedom.  We suppose $\AA\in V
\subset R^{l-1},\aa\in T^{l-1}$, $I\in R^1$ and $\f\in T^1$, where $V$
is the closure of some open bounded set and $T^s$ is the $s$-dimensional
torus.  We shall regard $T^s$ interchangeably as $[-\p,\p]^s$ with opposite
sides identified or we regard it as $C_1^s$=$\{$product of $s$ unit circles
in the $s$-dimensional complex space $C^s\}$ via the identification
$\ff=(\f_1,\ldots,\f_s)\in T^s\otto$ $\zz=(z_1,\ldots,z_s)\in C^s$ with
$z_j\equiv e^{i\f_j}$, ($j=1,\ldots,s)$.

The {\it free} system will consist of $l-1$ rotators described by the
angles $\aa$ and their conjugate momenta $\AA$, and one pendulum
described by the angle $\f$ with conjugate momentum $I$.

The pendulum oscillates with energy:
%
$$P_0(I,\AA,\f)={1\over2}{I^2\over
J_0(\AA)}+g(\AA)^2J_0(\AA)(\cos\f-1)\Eq(2.1)$$
%
where $J_0(\AA)$ is a suitable {\it inertia moment} and $2\p g(\AA)^{-1}$ is
the characteristic period of the small oscillations or, as well, $g(\AA)$ is
the Lyapunov exponent of the unstable fixed point. We call \equ(2.1) a
{\it standard pendulum} hamiltonian.

The rotators will move without being affected by the pendulum
oscillations. A complete example hamiltonian will be:
%
$$h_0={1\over2}{\AA^2\over R}+P_0(I,\AA,\f)\Eq(2.2)$$
%
where $R$ is another inertia moment.

More generally we shall consider $\aa$-independent hamiltonians like:
%
$$H_0(I,\AA,\f,\m)=h(\AA,\m)+P(I,\AA,\f,\m)\Eq(2.3)$$
%
where $P$ is a real analytic hamiltonian depending on a parameter $\m$ and
describing a pendulum in the sense discussed below, and $h(\AA,\m)$ will
also be assumed real analytic.

To clarify what we mean by a {\it pendulum} hamiltonian $P$ we recall
the characteristics of the pendulum phase portrait. The isoenergy
lines in $(I,\f)$-space with $P=E$ are closed continuous curves with
topological properties that may change as $E$ varies. The lines of
separation between the regions covered by curves of the same type (\ie
curves which do not contain an equilibrium point and which can be
deformed into each other without crossing an equilibrium point) are
called separatrices and contain at least one equilibrium point, and at
most finitely many (as we are only considering analytic hamiltonians).

In our case we want to allow an explicit $(\m,\AA)$-dependence of $P$:
hence the above picture is $\m,\AA$ dependent. We shall require that,
for all values of $\AA$ of interest, the pendulum $P$ has a linearly
unstable fixed point $I_\m(\AA),\f_\m(\AA)$ which is the only such
point on the corresponding separatrix and, furthermore, we require that
$I_\m(\AA),\f_\m(\AA)$, together with its Lyapunov exponent
$g(\AA,\m)$ ($\ne0$ by assumption), depend analytically on $\AA,\m$.

Clearly the above is a very mild restriction, only exceptionally
false: it emerges from the analysis that all we really want is that
in the whole range of the $\AA$'s the unstable fixed point, which we
select for our analysis, depends analytically on $\AA,\m$ and does not
merge, as $\AA,\m$ vary, with other fixed points. We shall call
the above equilibrium point a {\it selected unstable equilibrium point
of $P$}.

In such a situation we shall say that \equ(2.3) describes an \ap unstable
free assembly of rotators witnessed in their rotations by a free pendulum
with a selected unstable point of equilibrium.

It is not restrictive, under the above circumstances, to assume that the
selected unstable point is the origin $I=0,\f=0$, and that its energy is
$P=0$. In fact one can always change coordinates by using the canonical map
generated by the function: $(\f-\f_\m(\AA'))I'+
I_\m(\AA')\sin(\f-\f_\m(\AA'))+\aa\cdot\AA'$, \ie:
%
$$\eqalignno{
I=&I'+I_\m(\AA')\cos(\f-\f_\m(\AA')),\quad\AA=\AA',\quad
\f'=\f-\f_\m(\AA')&\eq(2.4)\cr
\aa'=&\aa-\bigl(I'+I_\m(\AA')\cos(\f-\f_\m(\AA'))\bigr)
\dpr_{\AA}\f_\m(\AA')
+\dpr_{\AA}I_\m(\AA')\sin(\f-\f_\m(\AA'))\cr}$$
%
which is clearly well defined and which generates a new hamiltonian of
type \equ(2.3) which has $I=0,\f=0$ as selected unstable equilibrium
point. Furthermore if $P(\AA,\m)\=P(0,\AA,0,\m)$ we can always redefine $P$
as $P-P(\AA,\m)$ by accordingly changing $h$: hence the requirement
that also $P(0,\AA,0,\m)=0$ is not restrictive.

The aspects of the regularity properties that we use,  motivated by the
above descriptions, are as follows:

\vskip3.pt
\noindent{\it {\bf Assumption 1}: The unperturbed hamiltonian
$H_0$ has the form \equ(2.3) and
the pendulum energy $P$ has the origin $(I=0,\f=0)$ as a selected
unstable equilibrium point where $P$ takes the value $0$
(for all $\AA$ and $\m$ in the domain of definition of $H_0$);
the associated (non negative) Lyapunov exponent, $g(\AA,\m)$:
%
$$
g^2 \= [\big( \dpr^2_{I\f} P\big)^2- \dpr^2_{I} P \dpr^2_\f P]
|_{(I,\f)=(0,0)}
\Eq(2.5)$$
%
is bounded away from zero as $(\AA,\m)$ vary in their domain of definition.
}
\vskip3.pt

\noindent{\it {\bf Assumption 2}:
The functions $h$ and $P$ are real analytic in their
arguments. Hence they are holomorphic in their variables in a complex
domain $S_{\r',\r,\x',\x,\bar\m}$, described by
five parameters $\r',\r,\x',\x,\bar\m>0$ as:
%
$$\eqalign{
S_{\r',\r,\x',\x,\bar\m}=\Bigl\{&
I,\AA,\z,\zz,\m\Bigl|\ |I|\le\r',\  {\rm and\ there\ is\ }
\V a\in V,{\rm\ for\ which}\cr&
|A_i- a_i|\le\r\ {\rm and}\ e^{-\x'}<|\z|<e^{\x' },\
e^{-\x}<|z_j|<e^\x,\ {\rm and} \ |\m|\le \bar \m \Bigr\}\cr}\Eq(2.6)$$
%
with $z_j\=e^{i\a_j},\z=e^{i\f}$}.
\vskip3.pt

\noindent{\it {\bf Assumption 3}:
The following non degeneracy conditions:
%
$$\det({\dpr^2_{\AA}}h)\ne0,\qquad
\det(\dpr^2_{(I,\f)} P|_{I=0,\f=0})\ne0,\qquad
\dpr_\AA h\cdot(\dpr^2_{\AA}h)^{-1}{\dpr_\AA h}\ne0\Eq(2.7)$$
%
hold on $S_{\r',\r,\x',\x,\bar\m}$}.
\vskip3.pt
Then we set:
\vskip3.pt

\noindent{\it {\bf Definition}:
Hamiltonians verifying {\sl all} the above assumptions
1$\div$3 will be briefly referred  to as
regular {\sl anisochronous a priori unstable free hamiltonians}.}

They are called \ap unstable, because the instability assumption is
clearly built in the free system definition.
\vskip3.pt

Such hamiltonians are quite common in the theory of the resonances of
anisochronous systems.

For instance consider an $l$ degrees of freedom system with free
hamiltonian $h$ of the form $h(\AA,B)$ in action angle
coordinates $\AA,B,\aa,\l$ such that the equation of the resonance is simply
$\dpr_B h(\AA,B)=0$. Suppose that $B=B(\AA)$ is the consequent
resonance surface.  Then, if $\e f(\AA,\aa,B,\l)$ is a perturbation, one
can find canonical coordinates $(\AA',\aa',I,\f)$ apt to describe the
motions that take place near the resonance and in which the
hamiltonian takes the form \equ(2.3) (in square brackets in the following
expression) plus a {\it small} correction:
%
$$\eqalign{
&\left[h_p(\AA',I,\e)+\e
G_p(I,\AA',\f,\e)\right]+\e^{p}f_p(I,\AA',\aa',\f,\e)\cr
&h_p= h(\AA',I)+\e\lis f(\AA',I)+O(\e^2),\qquad
G_p=\tilde f(I,\AA',\f)+O(\e)\cr}\Eq(2.8)$$
%
with $\lis f$ equal to the average of $f$ over the angles $\aa,\,\l$ and
$G_p$ equal to the average of the function $f-\lis f$ over the $\aa$ alone;
here $p$ can be fixed arbitrarily and $\e$ is the strength of the
perturbation. But, the larger $p$ is, the harder it is to find the
functions $G_p,f_p$ and a coordinate system in which \equ(2.8) holds and
the smaller becomes the (tiny) region of phase space around the resonance
surface where the new coordinates can be used to describe the motion, (this
is essentially the Nekhorossev theorem, see [BG], and appendix A1).

We consider hamiltonians $H$ which are perturbations of regular free \ap
unstable hamiltonians $H_0$, defining the latter by the assumptions 1$\div$3
above:
%
$$H=H_0(I,\AA,\f,\m)+\m f(I,\AA,\f,\aa,\m)\Eq(2.9)$$
%
with $f$ holomorphic in the domain
$S_{\r',\r,\x',\x,\bar\m}$, see \equ(2.6).  We
shall often refer to the Fourier expansion of $f$ in the $\aa$ variables,
which we shall write as:
%
$$f(I,\AA,\f,\aa,\m)=\sum_{\nn\in Z^{l-1}}
f_\nn(I,\AA,\f,\m)\,e^{i\nn\cdot\aa}
\Eq(2.10)$$
%

The problem of phase space drift and diffusion will be posed as follows:
\vskip3.pt

\noindent{\it {\bf Diffusion problem}:
Given $\AA_1,\AA_2$, with $H_0(0,\AA_1,0,0)=
H_0(0,\AA_2,0,0)$ can one find for all $\m$ small enough, but {\sl non
zero}, initial data close (as $\m\to0$) to $(0,\AA_1,0)$ in the
$(I,\AA,\f)$-variables which, in due time ($\m$-dependent, of course)
evolve into data close to $(0,\AA_2,0)$? More bluntly can one realize a
displacement of $O(1)$ in the $\AA$ variables with a perturbation of
order $\m$ as small as we please?}
%
\vglue2.truecm

\penalty-200

{\bf\S3 The free system.  Diffusion paths and whisker ladders}

\penalty10000

\vskip0.5truecm\numsec=3\numfor=1

\penalty10000

To formulate our results we need several concepts.  The first is the notion
of {\it diffusion path} on a energy level $E$, whose value will be kept
fixed throughout this section, as well as the value $\m=0$.

Let $s\to\AA_s$ be a curve $\LL$, piecewise analytic in $s\in[s_1,s_2]$,
joining $\AA^{1}=\AA_{s_1}$ to $\AA^{2}=\AA_{s_2}$, such that, using the
notation in \equ(2.3), one can find two constants $\t,t$ for which:
\vskip.5truecm
\item{1) } $H_0(0,\AA_s,0,0)=h(\AA_s,0)\equiv E$,\ \ for $s\in[s_1,s_2]$,

\item{2) } if $\oo_{s}=\dpr_\AA h(\AA_s,0)$ and if we set:

$$\eqalign{ C(s)=&\sup_{\V\n\ne\V0,\,\V\n\in
Z^{l-1}}\,|\V\n|^{-\t}|\V\n\cdot
\oo_{s}|^{-1}\qquad\quad\hbox{\it ``non\ resonance\ constant"}\cr
\Si(C)=&\{ {\rm set\ of\ the\ } s\in[s_1,s_2] {\rm\
such\ that\ } C(s)<C\}\cr}\EQS(3.1)$$
%
then there is a $\LL$-dependent constant $\bar K>0$ such that:
%
$$(s_2-s_1)^{-1}\cdot({\rm measure\ of\ the\ set\ }\Si(C))\ge (1-
{\bar K(DC)^{-1/t}})\EQS(3.2)$$
%
if $D$ is the maximum of $|\dpr_\AA h(\AA,0)|$ in a neighborhood of the
curve $\LL$.

\vskip0.5truecm
\noindent{\it {\bf Definition}:
if $\LL$ is a curve with the properties 1),2) above
we call it a diffusion path.}
\vskip0.5truecm

Clearly under the genericity assumption \equ(2.7), $\det \dpr^2_{\AA}h
\ne0$, a diffusion path consists of just one point if $l=2$ (because
$h=E$ fixes $A$): no diffusion path exists between distinct points in
action space, if $l=2$. For this trivial reason our results, which
otherwise do not distinguish $l=2$ from $l>2$, will be occasionally
uninteresting if $l=2$.

{\it
In appendix A2 we show that under the genericity assumption \equ(2.7) the
constants $t,\t$ can be taken to be $t=l-1$ and $\t=(l-1)^2$.}
But on
special curves it could be possible to make better choices: for instance in
\S11 we discuss an application with $l=3$ in which $t=1$.

Note that the diffusion paths lie, by definition, in the space of the
$\AA$-variables which are the ``rotators" velocities (or {\it fast
action variables}, or {\it adiabatic invariants}: using the terminology
borrowed from the theory of resonances mentioned in connection with
\equ(2.8),\equ(2.9); see also appendix A1);
it is a notion depending solely on the free
system hamiltonian ($\m=0$) evaluated when the {\it pendulum} (or {\it
slow}, or {\it secular}) variables (\ie $(I,\f)$) are set to the
equilibrium position.

It is easy to see that if $l>2$ there are, under the non degeneracy
conditions \equ(2.7) many diffusion paths joining {\it any} two close
enough points $\AA^{1},\AA^{2}$ lying on a connected portion of the energy
shell $h(\AA,0)=E$, see appendix A2.  The argument is similar to the one
usually invoked to prove the abundance of diophantine irrationals (see, for
instance, [G]): the $l-1=2$ case is particularly easy and the condition
is fulfilled by any curve with non vanishing curvature; in the case $l-1=3$
one has to consider a curve joining $\AA^1,\AA^2$ with nowhere vanishing
curvature and torsion, \etc.

To see the connection between the torsion and the above mentioned values of
$\t,t$ one should recall that a smooth curve $s\to\V a(s)$ in
$d$-dimensions is said to have all its $d-1$ torsion coefficients non
vanishing if, for each fixed $s$, the first $d$ derivatives of $\V a(s)$
are linearly independent: the torsion coefficients are suitable orthogonal
invariants associated with the derivatives of order higher than the first
(hence their number is $d-1$).

The first non degeneracy condition of the second line of \equ(2.7) permits
us to conclude that any curve with all its $l-2$ torsion coefficients non zero
verifies \equ(3.2); the last non degeneracy condition in the second line of
\equ(2.7) implies that a curve which, in a local chart on the energy
surface, has all (the $l-3$) torsion coefficients non vanishing will also
have all the ($l-2$) torsion coefficients non vanishing when it is regarded
as lying on the $l-1$ dimensional action space.

The values of the exponents arise from the remark that if the curve has all
torsions non zero then a codimension one plane cutting it in a point cannot
have a contact of order higher than $l-1$ with the curve.  Thus a layer of
width $\d$ does not contain, locally, an arc length exceeding
$O(\d^{1/(l-1)})$.  Therefore the statement follows by choosing
$\d=1/(C|\V\n|^{\t+1})$ with $(\t+1)/(l-1)>l-1$, so that one can sum the
arc lengths over $\V\n$ (as it is clearly necessary); \ie the choice
$\t=(l-1)^2,\,t=l-1$ is sufficient, see appendix A2.

For every $\AA$ one can define the $(l-1)$-dimensional torus
invariant for the motion governed by $H_0$:
%
$$\TT_0(\AA)=\bigl\{\AA',\aa',I',\f'\bigl|\,
\AA'=\AA,\, I'=0,\, \f'=0,\,\aa'\in T^{l-1}\bigr\}\Eq(3.3)$$
%
Such tori represent data in which the $l-1$ rotators are mindlessly and
freely rotating while the pendulum {\it stands up} in its selected unstable
equilibrium position. The picture, hence the tori, is obviously unstable
and in fact the tori posses stable and unstable manifolds, called {\it
whiskers} by Arnold, [A], (for reasons that emerge as soon as one tries to
make a symbolic drawing of the situation). The whiskers correspond to data
in which the rotators continue to rotate freely witnessing the pendulum
falling from or climbing to the equilibrium position (respectively
describing the unstable or the stable whiskers) and performing one of the
two {\it separatrix} swings. More mathematically:
%
$$\eqalign{
W_{unstable}(\AA)=
&\bigl\{\aa\in T^{l-1},\,\sign I=\sign \f, P(\AA,I,\f,0)=0\bigr\}\cr
W_{stable}(\AA)=
&\bigl\{\aa\in T^{l-1},\,\sign I=-\sign \f, P(\AA,I,\f,0)=0\bigr\}
\cr}\EQ(3.4)$$
%
where, to fix the ideas, we have assumed that $I>0$ means $\dot\f>0$, while
$I<0$ means $\dot\f<0$ and each separatrix swing takes place over the
complete circle $\f\in [-\p,\p]$ (as in the standard pendulum case; in these
cases we shall speak of ``open separatrices").  Such
properties may fail in some pendula (\eg one of the separatrices could be
contractible to a point): in these cases \equ(3.4) has to be
changed in an obvious way.

It is always true, however, that the set $P(I,\AA,\f,0)=0$ will consist of
two branches which will be called the {\it separatrix swings}: in the case
of the standard pendulum they are the subsets of $W(\AA)$ with $I>0$ or
$I<0$. Furthermore the following well known accident happens:
%
$$\eqalign{
&W(\AA)=W_{stable}(\AA)\equiv W_{unstable}(\AA);\cr
&W(\AA)\cap W(\AA')=\emptyset\qquad{\rm if\ }\AA\ne\AA'\cr}\Eq(3.5)$$
%
hence in the general case both sets in \equ(3.4) will be equal and
coinciding with the separatrix data.  Given a diffusion path $\LL$ we can
associate to it, for $\m=0$, a one parameter family
$s\to\TT_{0}(s)\equiv\TT_0(\AA_s)$ of $(l-1)$-dimensional tori, invariant
with respect to the free evolution.

The family $s\to(\TT_0(s),W_{stable}(s),W_{unstable}(s))$ of the
above tori and of their whiskers will be said to form a {\it whisker
ladder}, {\it leaning} on $\LL$; again try a drawing for the word
motivation.

\vskip1.truecm
\vglue2.truecm

\penalty-200

{\bf\S4 Motion on the separatrices. Melnikov integrals}

\penalty10000

\vskip0.5truecm\numsec=4\numfor=1

\penalty10000

Suppose, for simplicity, a ({\it open}) separatrix encircling the circle,
with a monotonic motion taking place on it (e.g. such that the sign of $I$
and that of $\dot\f$ coincide).  We shall write the parametric equations for
the branch $I<0,\f>0$ of $W(\AA)$ as:
%
$$I=i(\f,\AA),\quad\f\in(0,2\p),\quad\aa\in T^{l-1}\Eq(4.1)$$
%
where $i$ is the separatrix swing with $I<0$ (\ie the branch with $I<0$ of
the curve $P|_{\m=0}=0$ through the selected unstable equilibrium point,
(see \S2)).  In the general case (when the separatrix may be shorter than
the full circle, ``closed separatrix case'') one cannot use $\f$ to
parameterize a full separatrix swing, \ie a branch of $W(\AA)$: one would
have to use a different extra parameter to describe $W(\AA)$ at the cost of
conceptually uninteresting complications.

If $X(\AA,\f,\aa)=(i(\f,\AA),\AA,\f,\aa)$ is the point \equ(4.1), let us
denote with the symbol $X^0(\AA,\f,\aa,t)\equiv(I^0(t),\AA,\f^0(t),\aa^0(t))$
the point into which $X(\AA,\f,\aa)$ evolves at time $t$ in the motion
governed by the hamiltonian equations with hamiltonian \equ(2.3) with
$\m=0$.

The \equ(3.4),\equ(2.3) and our choice of coordinates (in which $I=0,\f=0$
is the selected unstable point) imply:
%
$$I^0(t),\f^0(t)=O(e^{\mp gt})\tende{t\to\pm\i}0\Eq(4.2)$$
%
where $g\=g(\AA)\=g(\AA,0)$ is the Lyapunov exponent of the selected
equilibrium point, \ie it is given by $g^2=-\det(\dpr^2P(0,\AA,0,0))$,
where $\dpr^2 P$ is the matrix of the second derivatives with respect to
$I,\f$.  Furthermore, denoting $\oo(\AA)=\dpr_\AA h(\AA,0)$:
%
$$\eqalign{\aa^0(t)=&\aa+\oo(\AA)t+\ig_0^t{\dpr_\AA P}
(\AA,I^0(\t),\f^0(\t),0)\,d\t\equiv\cr
\equiv&\aa+\oo(\AA)t+\tt(t;\AA,\f)\cr}\Eq(4.3)$$
%
where we have used that $P(0,\AA,0,0)\equiv0$, by our assumptions 1$\div$3,
\S2, so that the integrand tends to zero by \equ(4.2); the function
$\V\th$ is defined by \equ(4.3).

It is convenient to fix once and for all an origin on the separatrix
corresponding to the action $\AA$: we take it to be the point $\bar
I,\bar \f$ with $\bar \f$ such that the solution $I(\AA,\f)$ of the equation
$P(I,\AA,\f,0)=0$ for $I$, parameterized by $\AA$, reaches its
absolute maximum
value as a function of $\f$. We call this point {\it the origin of the
separatrix}.  In the case in which $P$ is a standard pendulum (\ie it is
given by
\equ(2.1)) the position $\bar \f$ is $\bar \f=\p$, where the pendulum
attains the maximum velocity.

Therefore we can define the asymptotic {\it phase shifts} $\V\th^\pm(\AA)$
equal to the limits as $t\to\pm\i$ of $\V\th(t;\AA,\bar \f)$. They depend on
the starting point, \ie on $\bar \f$, which however we keep fixed as above,
and on $\AA$; their difference $\V\th(\AA)$ is:
%
$$\V\th^+(\AA)-\V\th^-(\AA)
\equiv\V\th(\AA)=\ii_{-\io}^\io\dpr_\AA P(I^0(\t),\AA,\f^0(\t),0)d\t\EQ(4.4)$$
%
and $-\V\th(\AA)/2$ has the geometric interpretation of the
$\AA$-gradient of the area enclosed between the considered branch
($I<0,\f>0$) of separatrix and the $I=0$
axis (for closed separatrices it is the $\AA$ gradient of the area
enclosed by the considered separatrix swing).

We set the following definition in terms of the above concepts:
\vskip0.5truecm
\noindent {\it {\bf Definition 1}:
\sl\ The free system rotators and pendulum
are {\it independent} at $\AA$ if
$\V\th(\AA)\equiv0$.}
\vskip0.5truecm
The obviously interesting case \equ(2.1) with $\AA$ independent
$R(\AA),g(\AA)$ is clearly very special and it is an example of
independence in the above sense. If, on the other hand, in \equ(2.1), the
functions $g(\AA),R(\AA)$ are not constant the {\it phase shifts}
$\V\th(t,\AA;\bar \f)$ are easily computed:
%
$$\V\th(t,\AA;\bar \f)=\tt_0\,\tanh gt,\qquad \tt_0\=
-4\dpr_\AA(g R)\,\Eq(4.5)$$
%

We shall call $X^0(t)\=(I^0(t),\AA,\bar \f(t),\V\a^0(t))\=
X^0(\AA,\bar \f,\aa,t)$  the separatrix
motion corresponding to the initial point with $\f=\bar \f$ and some initial
$\AA,\aa$ (cfr. paragraph after \equ(4.1)).

Given a diffusion curve $\LL$, $s\to\AA_s$ we introduce the
following notations:
$\oo_s\=\oo(\AA_s)$ $\=$ $\dpr_\AA h(\AA_s,0)$,
$\tt_s(t)$ $=\tt(t;\AA_s,\bar \f)$,
and define (see also \equ(2.10)):
%
$$F(t;\aa,s)=-\sum_{\nn\ne\V0}{e^{i\aa\cdot\nn}\over i\oo_s\cdot\nn}
\dpr_t\Bigl[f_\nn(I^0(t),\AA_s,\bar \f(t),0)e^{i\tt_s(t)\cdot\nn}\Bigr]
\Eq(4.6)$$
%
which makes sense for $s\in\Si(\i)$, (which, in general, is a subset of
full measure of $\LL$) see \equ(3.1).

Clearly the function $F(t;\aa,s)\tende{t\to\pm\i}0$ exponentially fast (see
\equ(4.2)), and the following {\it Melnikov integral} is well defined, see
\equ(3.1), for $s\in\Si(\i)\subseteq[s_1,s_2]$:
%
$$M_f(\aa,s)=\ii_{-\i}^\i F(t;\aa+\oo_st,s)dt\Eq(4.7)$$
%
(similar quantities were considered by Poincar\`e in [P]; see also [A]).
Note that in the special case of a degenerate phase shift, \ie
of independence of the rotators and the pendulum,
the $M_f$ are defined for all $s\in[s_1,s_2]$ because the part involving the
small denominators in \equ(4.6) disappears by integration by parts.
In the latter case, in fact, it is:
%
$$M_f(\aa,s)=
c(\AA,s)+\ii_{-\i}^\i f(t;\aa+\oo_st,s)dt\Eq(4.8)$$
%
where $c(\AA,s)\equiv\ig_{-\io}^\io [f_{\V 0}(I^0,\AA_s,\bar \f,0)-
f_{\V 0}(0,\AA_s,0,0)]dt$ is a constant which shall play no role and
$f(t;\aa,s)=f(I^0(t),\AA_s,\bar \f(t),\aa,0)-f(0,\AA_s,0,\aa,0)$.

Such a case with $f(0,\AA_s,0,\aa,0)=0$ was considered by Arnold in [A].

For $s\in\Si(\i)$, see \equ(3.1), the equation:
%
$$\V\dpr_\aa M_f(\aa,s)=\V0\Eq(4.9)$$
%
admits necessarily at least two solutions (\eg one is at $\aa=\aa_s$
when $\aa_{s}$ is a minimum for $M_f$ and the other when $\aa_{s}$
is a maximum).

The following definition will be important:
\vskip3.truept

\noindent{\it {\bf Definition 2}:
We say that the arc of diffusion path
corresponding to $s\in[\bar s_1,\bar s_2]\subseteq[s_1,s_2]$ is {\sl
directly open for diffusion} under the perturbation $f$, see
\equ(2.9),\equ(2.10), if:
\item{1)} no $f$-resonance occurs for $s\in[\bar s_1,\bar s_2]$, in the
sense that ${f_{\V\n}(0,0,\AA_s,0)/ \oo_{s}\cdot\V\n}$
is analytic in $s\in[\bar s_1,\bar s_2]$ for all $\V\n$.
\item{2)} the equation \equ(4.9) admits a continuous solution
$\aa\to\aa_s$ for all $s\in[\bar s_1,\bar s_2]$ and such that:
%
$$\det\,\dpr^2_\aa  M_f(\aa_{s},s)\ne 0\qquad
s\in[\bar s_1,\bar s_2]\Eq(4.10)$$
%
More generally we say that an arc of a diffusion path is {\sl open for
diffusion} under the perturbation $f$ if it can be covered by finitely many
arcs directly open for diffusion.}
\vglue3.truept

Note that the non resonance condition is a very strong condition: except
for very special $f$ we can expect to find open diffusion paths only when
$f$ is a trigonometric polynomial. In the latter case, however, it is clear
that, in general, there will be many open, possibly very long, such paths.

Consider a diffusion path and assume that property 1) of the above
definition is verified because $f$ is a trigonometric polynomial with no
non vanishing coefficients $f_{\V\n}$ corresponding to $\V\n$'s for which
$\oo_{s}\cdot\V\n=0$ for some $s$.  Then given a point of parameter $s$ on
the path, it will be generically true that $s$ is inside some arc of $\LL$
directly open for diffusion under $f$: the genericity is with respect to
the choices of the non zero coefficients of the trigonometric polynomial
$f$.  This is a consequence of the explicit formula \equ(4.6) and of the
remark that one can change rather arbitrarily the function $M_f(\aa,s)$ by
changing $f$ and the change is effectively computable.


Our main result in the above anisochronous, \ap unstable, case is
the following.
\vskip0.5truecm

\noindent{\it {\bf Proposition}: Consider a hamiltonian like \equ(2.9)
with $H_0$ verifying the assumptions 1$\div$3 of \S2 and $f$ being a
trigonometric polynomial of degree $d$.

\noindent
Given a diffusion path $\LL$ directly open for diffusion, suppose that
$\oo(\AA)\cdot\V\n$
$\=$ $\dpr_\AA h(\AA,0)\cdot \nn$ $\ne0$
for $\AA$ in $\LL$ and for all $|\V\n|<c\,d$, for
some constant $c>0$.

If $c$ is large enough then one can find, for all $\m\ne0$ small
enough, initial data with
``fast action variables" (\ie $\AA$ variables) {\sl
close} to one extreme of $\LL$, $\AA^{1}$, and ``slow variables" (\ie
$(I,\f)$) {\sl close} to the selected unstable equilibrium position,
which evolve, {\sl drift},
into data with the $\AA$ variables {\sl
close} to the other extreme, $\AA^{2}$, of $\LL$.  And {\sl close} can
be taken to mean {\sl within a distance} $\d_\m\tende{\m\to0}0$.

One can find constants $T_1,c_1>0$ such that:
%
$$T(\m)\=T_1 e^{c_1|\m|^{-2}}\Eq(4.11)$$
%
provides {\sl an upper bound} to the minimum time necessary for the
drift from $\AA^1$ to $\AA^2$.

If the path $\LL$ is open for diffusion, but not directly open,
one can show the same result with a function $T(\m)$ whose expression
will depend on the structure of $\LL$: in particular, it will depend
on the number of segments directly open for diffusion.}
\vskip0.5truecm


The above theorem does not convey all the information that we gather by
proving it: the dimensional nature of our bounds makes them very flexible
and we use them in the later sections of this paper to cover a variety of
cases in which the non degeneracy conditions are not verified, and
eventually lead us to the result on the \ap stable heavenly problem
described in the introduction.
%
\vglue2.truecm

\penalty-200

{\bf\S5 Existence of ladders of whiskers.}

\penalty10000

\vskip0.5truecm\numsec=5\numfor=1

\penalty10000

In this section we consider a hamiltonian \equ(2.9) verifying the
assumptions 1$\div$3 of \S 2 and study the persistence of the unperturbed
whiskered tori and their regularity properties (see, also, [M],
[Gr], [Z]).

The basic technical facts concerning the existence of the $l-1$
dimensional invariant tori and the normal form of the flow in their
vicinity are stated in the following lemmata 1,1' and in lemma 2
(formulated after the proofs).

Since the theorem presented in the lemmata is a local theorem in the
vicinity of the unperturbed invariant tori, it is useful to introduce a
system of coordinates in which it is most conveniently studied. Thus we
introduce a new system of canonical coordinates $(I,\AA,\f,\aa)=
\lis \RR_\m(p_0,q_0,\AA_0,\aa_0)$
defined by a canonical transformation $\lis \RR_\m$
enjoying the properties explained in the following lemma.

Let $C_\x\=\{z\,\big|\, e^{-\x}<|z|<e^\x\}$, and consider the sets of the
points $I,\AA,\z,\zz,\m\in U$ and, respectively, $p,q,\AA,\zz,\m\in W$
with:
%
$$\eqalign{
U(\r',\r,\x',\x,\bar\m,\V a)\=&\bigl\{|I|\le\r',\,
|A_i- a_i|\le\r,\,\z\in C_{\x'},\,z_j\in C_\x,\,|\m|\le\bar\m\bigr\}\cr
W(\k,\r,\x,\bar\m,\V a)\=&\bigl\{|p|,|q|<\k,\, |A_{j}-a_j|<\r,\,
z_j\in C_\x,\,|\m|<\bar\m\bigr\}\cr}\Eq(5.1)$$
%
Recall the definition of $V$ (beginning of \S 2) and that $H$ in \equ(2.9)
is holomorphic in $U(\r', \r,$
$\x',$  $\x, $ $\bar \m,$ $\V a)$ (assumption 2, \S 2).
%
\vskip0.3truecm
\noindent{\it {\bf Lemma 0}:
For all $\V a\in V$ there exist positive constants
$\bar \k_0,\bar \r_0,\bar \x_0$
and a canonical transformation $(I,\f,\AA,\aa)=$
$\lis \RR_\m(p_0,q_0,\AA_0,\aa_0)$ defined and holomorphic in
$W(\bar \k_0,\bar \r_0,\bar \x_0,\bar \m,\V a)$ with values in a domain
$U(\r',\r,\x',\x,\bar\m,\V a)$ of holomorphy of \equ(2.9) and casting
$H$ in the form:
%
$$\eqalign{ &h_0(\AA_0,p_0q_0,\m)+f_0(\AA_0,\aa_0,p_0,q_0,\m),\cr
&h_0(\AA_0,J,\m)= h(\AA_0,\m)+G(J,\AA,\m),\qquad \dpr_J
G(0,\AA,\m)\=g(\AA,\m)\cr}\Eq(5.2)$$
%
where $f_0$ is divisible by $\m$ and $h_0,f_0$ are analytic in
$W(\bar \k_0,\bar \r_0,\bar \x_0,\bar \m,\V a)$.
Expressions for possible values of $\kb_0,\rb_0,\xb_0$
in terms of $\r',\r,\x',\x,\mb$ and of a few constants depending on $h,f$
can be found in appendix A3, see \equ(A3.39).}
\vskip0.5truecm
The {\bf proof} is given in appendix A3.

The map $\lis \RR_\m$ will have the form:
%
$$\eqalign{
I=&R(\AA_0,p_0,q_0,\m),\qquad\kern1.1truecm\f=S(\AA_0,p_0,q_0,\m)\cr
\aa=&\aa_0+\V\d(\AA_0,p_0,q_0,\m),\qquad\kern0.2truecm
\AA=\AA_0\cr}\Eq(5.3)$$
%
with $R,S,\V \d$ real--analytic in $W(\bar \k_0,\bar \r_0,\bar
\x_0,\bar \m,\V a)$ (often \equ(5.3) will be supposed to act also on
the variable $\m$, trivially changing $\m$ into itself).

The result in lemma 0 is well known: it extends a celebrated theorem by
Jacobi who proved the above lemma in a variety of cases, first of all
for the standard pendulum.  In the latter case the Jacobi map $\lis
\RR_\m$ can be constructed quite explicitly by using the theory of
the jacobian elliptic functions, see appendix A9.

Lemma 1 below gives us a {\it normal form} for the hamiltonian flow
near the unperturbed whiskers. It tells us that {\it most} of the
structure of unstable tori and of corresponding manifolds survives the
onset of the perturbation. In particular the tori are obtained by
setting suitable coordinates $p,q$ equal to $0$; and the whiskers, in
the vicinity of the tori, are obtained by setting $p=0$ (unstable
whisker) or $q=0$ (stable whisker). The whisker ladder still exists,
with a {\it few rounds missing} (where $s\not\in\Si_\m$, see below).
\vglue0.5truecm
%
\noindent{\it{\bf Lemma 1}: Consider a hamiltonian
\equ(2.9), verifying the
assumptions 1$\div$3 of \S2.  Let $\LL$ be a diffusion path $s\to\AA_s$ with
energy $E$ (see 1),2) of \S3), and let $s\to\TT_0(s)\=\TT_0(\AA_s)$ be the
family of $(l-1)$-dimensional tori, see \equ(3.3), associated with $\LL$.
Suppose that $\bigcup_{\V a\in\LL}U(\r',\r,\x',\x,\bar\m,\V a)$,
which is contained in the holomorphy domain of \equ(2.9),
is a region where the map $\lis {\RR}_\m$ can be defined
via lemma 0 above: \ie $U(\r',\r,\x',\x,\bar\m,\V a)\supset \lis \RR_\m
W(\kb_0,\rb_0,$ $\xb_0,$ $\mb,\V a)$.

Fixed $n>0$ and $\m$ real, there exists, on the energy level $E$ of the
perturbed system, a family $s\to\TT_\m(s)$ of $(l-1)$-dimensional
``whiskered" tori,
$C^n$-close to the line of tori $s\to\TT_0(s)$ within $O(\m)$ as $\m\to0$,
which for $\m$ small enough verify the following properties:
\item{1) }
There exist positive constants $c,\bar c, K, k$ such that
the tori $\TT_\m(s)$ are invariant for
$s\in\Si_\m\subset[s_1,s_2]$ where: $\Si_\m\=\{s|\,C(s)<k|\m|^{-1/\cb}\}$
and:
%
$$(s_2-s_1)^{-1}\ii_{\Si_\m}ds\geq(1-K|\m|^{1/c})\Eq(5.4)$$
%
\item{2) } The tori $\TT_\m(s)$ are part of a family of
$l$--dimensional invariant surfaces
having energy $E$ and parameterized by $\V\ps\in T^{l-1}$, $|p|,|q|<
\bar \k$, for some $\bar \k>0$, as:
%
$$\eqalign{\AA=&\AA'+\V\X(\V\ps,p,q,s,\m)\quad
\kern2.2truecm\aa=\V\ps+\V\D(\V\ps,p,q,s,\m)+\V\d(\AA',p,q,\m)\cr
I=&R(\AA',p,q,\m)+\L(\V\ps,p,q,s,\m)\quad\kern0.15truecm\f=
S(\AA',p,q,\m)+\Th(\V\ps,p,q,s,\m)\cr}\Eq(5.5)$$
%
where $\AA'\=\AA_s(pq,\m)$ with $\AA_s(J,\m)$ analytic in $J$,
$C^n$--smooth in $J,s,\m$ and $\AA_s(0,0)$ coincides with the diffusion
curve $\AA_s$; $\V\X,\V\D,\L,\Th$ are analytic in $\V\psi,p,q$,
divisible by $\m$ and $C^n$-smooth in $\V\ps,p,q,s,\m$, and $R,S,\V\d$
are as in lemma 0 (hence depend on $s,\m,p,q$ only and are analytic in
their variables).
%
\item{3) } There are functions $\g'(J,s,\m),\g(J,s,\m)$ analytic in $J$
for $|J|<\bar \k^2$, $C^n$-smooth in $J,s,\m$ and
divisible by $\m$ if $J=0$ (and by $J$ if $\m=0$), such that the motion
on the invariant surfaces is simply:
%
$$\V\ps(t)=\V\ps+(1+\g)\oo_st,\qquad
p(t)=pe^{-g_s(1+\g')t},\qquad q(t)=q e^{+g_s(1+\g')t}\Eq(5.6)$$
%
where $\g=\g(pq,s,\m),\g'=\g'(pq,s,\m), g_s\equiv
g(\AA_s,0)$, see \equ(2.5), $\oo_s\equiv \oo(\AA_s)$,
see \equ(3.1).
Hence the tori $\TT_\m(s)$ and their stable/unstable whiskers $W_\m(s)$
are obtained by setting in \equ(5.5), respectively, $p=q=0$; $p\ne 0$, $q=0$;
and $p=0$, $q\ne 0$.

\item{4) } The smallness condition on $\m$ and the constants $k,K,\cb,c,
\bar \k$ can
be given an explicit dimensional form in terms of a few parameters
associated with $h,f$,
(see \equ(5.76), \equ(5.90), \equ(5.82),
\equ(5.67), \equ(5.18) below); similarly one can construct
explicit bounds on the smallness of $\V\X,\V\D,\L,\Th,\g,\g'$, (see lemma 2
and \equ(5.89),\equ(5.79) ,\equ(5.88) below).}
\vglue0.5truecm
Instead of fixing the energy $E$ of the invariant tori and the frequency
ratios of the corresponding quasi periodic motions one can fix the
frequencies (\ie $\g$ in \equ(5.6)) at the cost of leaving $E$ free.

Calling $s\to \AA_{s}$ the diffusion curve equation and
defining the two functions $\oo_s=\dpr_{\AA} h_0(\AA_s,0,0)$, and
$g_s=\dpr_J h_0(\AA_s,0,0)$,
see \equ(5.2), we introduce a real parameter $u$ and
consider the vectors:
%
$$\oo_{su}=(1+u)\oo_s,\quad u\ {\rm real}\ ,
\qquad \Big( \ \oo_s\=\dpr_\AA h_0(\AA_s,0,0) \ \Big)\Eq(5.7)$$
%
We define the {\it diffusion sheet} $\tilde\LL$: $(s,u)\to\AA_{su}$ by:
%
$$\dpr_\AA h_0(\AA_{su},0,0)=\oo_{su}\Eq(5.8)$$
%
This is well defined, taking into account the non degeneracy conditions
\equ(2.7), by the implicit function theorem, if $|u|$ is small
enough. We shall suppose that $u$ varies in an interval $[-\bar u,\bar
u]$ so small that:
%
$$
{\rm setting\ } \dpr_J
h_0(\AA_{su},0,0)=(1+u'_{su})g_s\equiv g_{su}\ \  {\rm it\  is\ }:
\ \ |u|,|u'_{su}|<4^{-1}\Eq(5.9)$$
%
More stringent requirements on $\bar u$ will be imposed later.

One then obtains
results similar to those described in lemma 1 with the basic difference
that all the main functions will be {\it analytic also in  $\m$
near $\m=0$}, and the energy of the motions on the invariant surfaces will
{\it no longer be  fixed}. More precisely one obtains the following
statement:

\vglue0.5truecm
\noindent{\it {\bf Lemma 1'}:
Consider, as in lemma 1, a hamiltonian \equ(2.9), verifying the
assumptions 1$\div$3 of \S2.  Let $\LL$ be a diffusion path $s\to\AA_s$ with
energy $E$, and let $s,u\to\AA_{su}$ be the diffusion sheet, defined in
\equ(5.7),\equ(5.8), and let $s,u\to\TT_0(s,u)$  be the family of
$(l-1)$-dimensional tori (see \equ(3.3) with $\AA=\AA_{su}$)) associated
with $\LL$.  Suppose, as in lemma 1,
that $\bigcup_{\V a\in\LL}U(\r',\r,\x',\x,\bar\m,\V
a)$, is a region where a map $\lis \RR_\m$ can be defined via lemma 0.

Fix $n>0$, let  $u$ be real and small, and $\m$  complex.
Then there exists a family $s,u\to\TT_\m(s,u)$ of $(l-1)$-dimensional
``whiskered" tori, $C^n$-close to the
sheet of tori $s,u\to\TT_0(s,u)$ as $\m\to0$, which for $\m$ small enough
verify the following properties:
\item{1) } The tori $\TT_\m(s,u)$ are invariant for
$s\in\Si_\m\subset[s_1,s_2]$ and for $u$
$\in [-\bar u,\bar u]$ for a suitable $\bar u>0$:
here $\Si_\m$ is the same set defined in 1) of lemma 1 and verifies the
same bound \equ(5.4) (same constants).
%
\item{2) } The tori $\TT_\m(s,u)$ are part of a family of invariant
$l$--dimensional surfaces parameterized by $\V\ps\in T^{l-1}$, $|p|,|q|
<\bar \k$, ($\bar \k$ as in lemma 1), as:
%
$$\eqalign{\AA=&\AA'+\V\X(\V\ps,p,q,s,u,\m)\quad
\kern1.6truecm\aa=\V\ps+\V \d(\AA',p,q,\m)+\V\D(\V\ps,p,q,s,u,\m)\cr
I=&R(\AA',p,q,\m)+\L(\V\ps,p,q,s,u,\m)\kern0.1truecm\quad\f=
S(\AA',p,q,\m)+\Th(\V\ps,p,q,s,u,\m)\cr}\Eq(5.10)$$
%
where $\AA'\=\AA_{su}(pq,\m)$ with $\AA_{su}(J,\m)$ analytic in $J,\m$,
$C^n$--smooth in $J,\m,s,u$ and $\AA_{su}(0,$ $0)$
$=\AA_{su}$ (see \equ(5.8));
%
$\V\X,\V\D,\L,\Th$ are analytic in $\V\ps,p,q,\m$, divisible
by $\m$, and $C^n$-smooth in all their arguments, and $R,S,\V\d$,
which  depend on $s,\m,p,q$ only, are as in \equ(5.3).
%
\item{3) } There is a function $\g'(J,s,u,\m)$ analytic in $J,\m$
for $|J|<\bar \k^2$ and $\m$ small enough, $C^n$-smooth in $s,u,\m,J$ and
divisible by $\m$ if $J=0$ (and by $J$ if $\m=0$), such that the motion
on the invariant surfaces is simply:
%
$$\V\ps(t)=\V\ps+\oo_{su}t,\qquad
p(t)=pe^{-g_{su}(1+\g')t},\qquad q(t)=q e^{+g_{su}(1+\g')t}\Eq(5.11)$$
%
where $\g'=\g'(pq,s,u,\m), g_{su}\equiv g(\AA_{su},0),\oo_{su}\equiv
(1+u)\oo_s$ and $\g\equiv u$ is now fixed {\sl a priori}.
\item{4) }
The constants $k,K,\bar c,c,\bar \k$ are as in lemma 1 above; furthermore
the smallness condition on $|\m|$ and the
(new) functions $\V\X,\V\D,\L,\Th,\g'$  satisfy the same bounds
of the corresponding objects of lemma 1 (see point 4) of lemma 1).}

\vglue0.5truecm
In fact the strategy of our analysis will be to prove lemma 1' first and
deduce lemma 1 by showing that the parameter $u$ can be determined so that
the real part of the energy {\it maintains a prefixed value $E$}.
\vglue0.3truecm
\noindent{\bf Proof}: the first step is to change variables
$(I,\f,\AA,\aa)\to (p_0,q_0,\AA_0,\aa_0)$ using the canonical change of
coordinates of lemma 0 to put \equ(2.9) in the form \equ(5.2).

By our assumption this is possible and we call $\kb_0,\rb_0,\xb_0$
parameters such that $\lis \RR_\m W(\kb_0,$ $\rb_0,$ $\xb_0,$
$\mb,\V a)$ is, for all
$\V a\in\LL$ contained in the set $\bigcup_{\V a\in\LL}U(\r',\r,\x',\x,\mb,\V
a)$ where the hamiltonian is defined.

In this way we define $h_0,f_0$ on $W=W(\kb_0,\rb_0,\xb_0,\mb,\V a)$
for all $\V a\in\LL$. Let
$E_0,\h_0, \G_0$ be the suprema, in $W$ and $\V a\in \LL$,
of the functions $||\dpr h_0||$
and $||(\dpr^2_\AA h_0)^{-1}||,\,||(\dpr_J h_0)^{-1}||$, respectively.  The
norm of a vector or matrix will be, for simplicity, the maximum of the
components.

Consider the equation \equ(5.8). By a simple implicit function analysis
we see that if:
%
$$|u|<\tilde u\={1\over\bar B^2(E_0\h_0\rb_0^{-1})^2}\Eq(5.12)$$
%
for $\bar B$ large enough, it admits a solution $\AA_{su}$ such that,
(see appendix A4, \equ(A4.3)):
%
$$|\AA_{su}-\AA_s|<\tilde\r\={\rb_0\over (\bar B E_0\h_0\rb_0^{-1})}
<\rb_0/4\Eq(5.13)$$
%
We also consider the equation:
%
$$\dpr_\AA h_0(\AA,J,\m)=\oo_s(1+u)\=\oo_{su}\Eq(5.14)$$
%
and we see that if $|u|$ verifies \equ(5.12) and
$|J|<\tilde \k^2,\,|\m|<\tilde\m$ with:
%
$$\kt\={\kb_0\over\bar B(E_0\h_0\rb_0^{-1})},\qquad\mt\={\mb\over \bar
B^2(E_0\h_0\rb_0^{-1})^2}\Eq(5.15)$$
%
then the equation has a solution $\AA^0(s,u,J,\m)$ close to $\AA_{su}$
within $\rb_0/4$, (see appendix A4,
\equ(A4.5)) and, obviously, $\AA^0(s,u,0,0)\=\AA_{su}$.

Recalling that $g_{su}\=\dpr_J h_0(\AA_{su},0,0)$ and setting:
%
$$(1+u'(s,u,\AA,J,\m))g_{su}\=\dpr_J h_0(\AA,J,\m),\qquad
\quad\l_0\=\sup |u'|\Eq(5.16)$$
%
we find that in a domain $|\AA-\AA^0(s,u,J,\m)|<\r_0,\ |J|<\k_0^2,
|\m|<\m_0$ the following bound holds for a suitable constant $\hat B$:
%
$$\l_0=\sup|u'|\le2l \G_0
E_0\Bigl({\kt^2\over\kb_0^2}+{\rt\over\rb_0}+{\mt\over\mb}\Bigr)
\le \hat B (\G_0 E_0) \ (E_0\h_0\rb_0^{-1})^2\Eq(5.17)$$
%
see appendix A4, \equ(A4.6);
in such bounds we have used ``dimensional" (or ``Cauchy") estimates:
see below.

Therefore we can fix $\r_0,\k_0^2,\bar u$ so that
\equ(5.12) holds (hence $|u|<1/4$) and also
$4\l_0\G_0 E_0<1$
(hence $|u'|,$ $|u'_{su}|$ $<$ $1/4$, (see \equ(5.9) for the definition of
$u'_{su}$)
because $|u'_{su}|$ can also be bounded by the r.h.s. of \equ(5.17) by
a similar estimate; see \equ(5.9) for the definition of $u'_{su}$).
A possible choice is:
%
$$\eqalign{
\bar u\=&{1\over \bar B^2(E_0\h_0\bar\r_0^{-1})^2},\qquad\kern2.truecm
\m_0\=\min\{
{\bar\m\over \bar B^2(E_0\h_0\rb_0^{-1})^2(E_0\G_0)^2}\ ,\ 1\}\cr
\k_0\=&{\kb_0\over\bar B\,(E_0\h_0\rb_0^{-1})}{1\over (E_0\G_0)},\qquad
\r_0\=\min\{
{\rb_0\over\bar B (E_0\h_0\rb_0^{-1})(E_0\G_0)^2}\ ,\ \k_0^2\}\ ,\qquad
\x_0\=\xb_0\cr} \Eq(5.18)$$
%
where $\r_0,\m_0$ are taken to be necessarily smaller than $\k^2_0$ and
$1$, respectively, for later convenience.
The constant $\bar B$ can be taken to be the same in all the above
formulae, possibly readjusting it (to avoid the introduction of too many
symbols, {\it a procedure that we shall use very often below}).

The functions $h_0,f_0$ will be holomorphic in the new coordinates in a
domain that we have, to some extent, tailored to our needs. They will, in
fact, be regarded as holomorphic in a domain containing:
%
$$\eqalignno{
&W_0\=W(\k_0,\r_0,\x_0,\m_0)\=&\eq(5.19)\cr
& \=\bigcup_{s,u\in\tilde\II_0}
\{ |p_0|, |q_0|<\k_0,\,
|A_{0j}-A^0_j(s,u,p_0q_0,\m)|<\r_0\ ,
e^{-\x_0}<|z_j|<e^{\x_0},\,
|\m|<\m_0\}\kern1.truecm\cr}$$
%
where the sheet $(s,u)\to \AA^0(s,u,J,\m)$ is defined by \equ(5.14) with:
%
$$(s,u)\in\tilde\II_0\=\II_0\times[-\bar u,\bar u]\ ,
\quad \II_0\=\Si(C_0)\quad {\rm for\  some\ } C_0>\G_0\Eq(5.20)$$
%
In the coming analysis the constant $C_0$ will be left as a {\it free
parameter} and will be chosen at the end in order to check \equ(5.4).
Thus, using
$|u|<1/4$,  in $\tilde \II_0$ it will be true that:
%
$$\eqalign{
|\oo_{su}\cdot\nn|^{-1}&\le C_0|\nn|^\t,\qquad \forall \nn\in
Z^{l-1},\,\nn\ne\V0\cr|g_{su}|^{-1}&\le\G_0\cr}\Eq(5.21)$$
%
where, see \equ(3.1), $\t$ is a diophantine constant.
Note that the just introduced parameters $\k_0,\r_0,\x_0,\m_0$ are not, in any
sense, the maximal ones compatible with the analyticity properties of
$h_0,f_0$.

All our arguments will have {\it dimensional nature} involving combinations
of the {\it sizes} of various functions, hence it is convenient to define
the size of a function $\V F$, holomorphic in a domain $W$, as:
%
$$||\V F|| \=||\V F||_W\=
\sup_{j,W}|F_j(p,q,\AA,\V z,\m)|\Eq(5.22)$$
%
where, of course, the symbol $||\cdot||$ is incomplete and (therefore) it
will be always accompanied by the specification of the domain $W$
considered in evaluating \equ(5.22), unless obvious from the context.

Let us collect here the positive parameters
$E_0,\G_0,\e_0,\h_0,\th_0$ that we use to measure the size of $h_0$, $f_0$
(compare \equ(2.7)):
%
$$\eqalign{
||\dpr h_0||\le E_0,\quad&||(\dpr_J h_0)^{-1} ||\le \G_0,
\quad\qquad||f_0||\le\e_0,\cr
||(\dpr^2_\AA h_0)^{-1}||\le \h_0,\quad&||[(\dpr^2_\AA h_0)^{-1}
\Dpr_\AA h_0\cdot\Dpr_\AA h_0]^{-1}||\le\th_0\cr}\Eq(5.23)$$
%
where $||\cdot||$ is considered in $W_0$,
see \equ(5.19). This is consistent with the
previous meaning and usage of the previously defined values of $E_0,\h_0,\G_0$.

The holomorphy of $h_0,f_0$ imposes restrictions on the relative values
of the above constants; namely there exists $B_0>0$ depending only on
the number $l$ of degrees of freedom and such that:
%
$$E_0C_0 >E_0\G_0\ge B_0,\quad\h_0 E_0\r^{-1}_0\ge B_0,\quad
\th_0 E^2_0\h_0\ge B_0\Eq(5.24)$$
%
which we will repeatedly use for the purpose of
simplifying bounds, at the expense of their sharpness; (one can take
$B_0=l^{-1}$, see appendix A4).

{\it The quantities in \equ(5.24) have the physical interpretation of
ratios of the various relevant time scales relevant for our problem}.

Our basic tool (already used in obtaining \equ(5.17))
for bounds on a function $F$, of one variable,
holomorphic in a domain $\DD$ will be to restrict it to a smaller domain
$\DD'\subset \DD$ and to estimate the $n$-th derivative of $F$ in $\DD'$
by $n!$ times $r^{-n}$, with $r=$ distance between $\DD'$ and $\dpr\DD$,
times the supremum of $F$ in $\DD$. We call such a bound a {\it
dimensional estimate}: it is a consequence of (one among) the Cauchy's
theorem(s).

In performing dimensional bounds it is convenient to deal with {\it
dimensionless combinations} of the main parameters \equ(5.23). Thus all our
bounds will naturally involve the following dimensionless combinations
of the parameters $E_0,\h_0,C_0,\G_0,\th_0,\e_0,\r_0,\k_0,\x_0,\l_0,\m_0$
that we have associated with our hamiltonian (see \equ(5.23),
\equ(5.18), \equ(5.20), \equ(5.17),\equ(5.19)):
%
$$\eqalign{
&E_0C_0,\ C_0\G_0^{-1},\ \h_0E_0\r_0^{-1},\ \th_0E_0^2\h_0,\
\k_0^2\r_0^{-1}, \ \xc_0^{-1}\=\x_0^{-1}(1+\x_0),\ \m_0,\ \l_0\cr
&\e_0E_0^{-1}\r_0^{-1}}\Eq(5.25)$$
%
and we see, from \equ(5.17),\equ(5.24),\equ(5.20), and from
\equ(5.18) and the comment following it, that all the elements of the
first line are $\ge B_0>0$; we shall impose, without loss of
generality, that the element of the second line is $\le1/2$.

To help reading the formulae we often close in parentheses the above
dimensionless combinations of parameters, even tough they may not be
necessary.

Given a function $F$ holomorphic on $W_0$, see \equ(5.19), we introduce the
Fourier coefficients $F_\nn(\AA,p,q)$ and the Taylor coefficients
$F_{,hk}(\AA,\zz)$ of the expansions:
%
$$F(\AA,\zz,p,q)=\sum_{\nn\in Z^{l-1}} F_\nn(\AA,p,q)\zz^{\,\nn}=
\sum_{h,k=0}^\i F_{,hk}(\AA,\zz)p^hq^k\Eq(5.26)$$
%
where $z_j=\exp i\a_j$ and $\zz^{\,\nn}=\prod_jz_j^{\n_j}=\prod_j
e^{i\n_j\a_j}$, (the latter two notations will be used interchangeably).

Thus we can introduce the following functions ({\it truncations of $F$}),
for $N\ge0,|\nn|\equiv\sum_j|\n_j|$:
%
$$\eqalign{
F^{[\le N]}(\AA,\zz,p,q)=&\sum_{|\nn|\le N} F_\nn(\AA,p,q)
e^{i\nn\aa},\qquad\quad F^{[>N]}\equiv F-F^{[\le N]}\cr
F^D(\AA,\zz,w)=&\sum_{h\ge0} F_{,hh}(\AA,\zz) w^h\cr}\Eq(5.27)$$
%

We can now begin our sequence of estimates leading to the proof of
lemma 1' and lemma 1.

The following dimensional estimates hold for various truncations of
$f_0$; given $N_0,\d_0$:
%
$$\eqalign{
||f_0^{[\le N_0]}||\le&B_1\e_0\xc_0^{-\b_1}\d_0^{-\b_1},\qquad
||f_0^{[>N_0]}||\le B_1\e_0\xc_0^{-\b_1}\d_0^{-\b_1}e^{-\x_0\d_0N_0/2},\cr
||f_0^{[\le N_0]D}||\le&B_1\e_0\xc_0^{-\b_1}\d_0^{-\b_1},\qquad
||f_0^{[\le N_0]}-f_0^{[\le N_0]D}||\le B_1\e_0\xc_0^{-\b_1}\d_0^{-\b_1}\cr}
\Eq(5.28)$$
%
where the $||\cdot||$ is evaluated from \equ(5.22) on the domain
$W(\k_0e^{-\d_0},\r_0,\x_0e^{-\d_0},\m_0)$ and the inequalities express
simple dimensional estimates in the sense defined above: the constants
that arise have been adjusted so that only the two parameters
$B_1,\b_1>0$ are needed. Sharper bounds would require more constants;
but we are not interested in sharpness of the estimates (in this paper).

We assign, \ap, a sequence $\d_0>\d_1\ldots$ of positive numbers such that
$4\sum_{j=0}^\i \d_j<\log2$ and such that $\d_j$ does not approach zero too
fast (\eg $\d_j=(1+j^2)^{-1}2^{-4}\log2$): it will be a set of auxiliary
parameters that we shall use in our inductive construction.  Below we
introduce sequences of other parameters $B_1,B_2,B_3,\ldots$ and
$\b_1,\b_2,\b_3,\ldots$, depending only on the number of degrees of freedom
$l$
(and on the diophantine constant $\t$, see \equ(3.1)),
and we shall suppose the $B_j$'s and the $\b_j$'s increasing (there
will be, however, only finitely many such constants).

Let $N_0$ be such that \equ(5.28) implies $||f_0^{[> N_0]}||\le O(\e_0^2)$;
for instance, recalling that $\e_0 E^{-1}_0\r_0^{-1}<1/2$,
by the remark following \equ(5.25):
%
$$N_0=-2\x_0^{-1}\d_0^{-1}\log(\e_0 E_0^{-1}\r_0^{-1})
\quad\Rightarrow\quad||f_0^{[>N_0]}||\le
B_1\xc_0^{-\b_1}\d_0^{-\b_1}\e_0^2E_0^{-1}\r_0^{-1}
\Eq(5.29)$$

Calling $(\AA_0,\aa_0,p_0,q_0)$ the canonical coordinates in which we
describe our initial hamiltonian as in \equ(5.2), we consider the canonical
map defined via a generating function which, denoting the new variables
with a prime, is a function $\F(\AA',p',\aa_0,q_0,\m)$ given by:
%
$$\oo_0\cdot\Dpr_{\aa_0}\F+g_0\ [q_0\dpr_{q_0}\F-p'\dpr_{p'}\F]
=-f^{[\le N_0]}_0+\lis{f_0^{[\le N_0]}}^D\Eq(5.30)$$
%
where $g_0\equiv g_0(\AA',J,\m)\=\dpr_J h_0(\AA',J,\m)$,
$\oo_0\=\oo(\AA',J,\m)\=\dpr_{\AA'}h_0(\AA',J,\m)$ with $J\equiv p'q_0$
and the bar denotes average over the $\aa$-variables.

The function $\F$ can be written:
%
$$\F(\AA',\aa_0,p',q_0,\m)=\sum_{|h-k|+|\nn|>0\atop|\nn|\le N_0}
{f_{0\nn,hk}(\AA',\m)e^{i\nn\cdot\aa_0}{p'}^hq_0^k\over
-i\oo_0(\AA',p'q_0,\m)\cdot\nn-g_0(\AA',p'q_0,\m)(k-h)}\Eq(5.31)$$
%
The function $\F$ is defined in a domain:
%
$$W(\k_0e^{-\d_0},\tilde\r_0,\x_0e^{-\d_0},\m_0),\quad\tilde\r_0<\r_0
\Eq(5.32)$$
%
(hence smaller than the one where \equ(5.28) hold), where $\tilde \r_0$ is
so chosen to control the denominators in \equ(5.31). By dimensional
bounds one checks easily that if:
%
$$\tilde \r_0=\r_0 [4 l E_0C_0N_0^{\t+1}]^{-1},\qquad\l_0E_0\G_0<4^{-1}
\Eq(5.33)$$
%
(cfr. also \equ(5.18)) then, for $|A'_i - A^0_{i} (s,u,J,\m)| <
\tilde \r_0$, $0<|\nn| \leq N_0$ and $|J| < \k_0^2$ one has:
%
$$|-i\oo_0(\AA',J,\m)
\cdot\nn+g_0(\AA',J,\m)(h-k)|^{-1}\le 2C_0(|\nn|^\t+|h-k|)\Eq(5.34)$$
%
see appendix A5.

The last inequality can be combined with dimensional bounds to imply:
%
$$||\F||\leq B_2\xc_0^{-\b_2}\d_0^{-\b_2}\,\e_0C_0\Eq(5.35)$$
%
for suitably chosen $B_2,\b_2>0$, and in the domain \equ(5.32).

The canonical map associated with $\F$ is generated by the following
standard relations
(omitting the explicit $\m$--dependence):
%
$$\eqalign{\AA_0&=\AA'+\dpr_{\aa_0}\F(\AA',\aa_0,p',q_0),\quad
p_0=p'+\dpr_{q_0}\F(\AA',\aa_0,p',q_0),\cr
\aa'&=\aa_0 + \dpr_{\AA'}\F(\AA',\aa_0,p',q_0),\quad
q'=q_0+\dpr_{p'}\F(\AA',\aa_0,p',q_0)\cr}
\Eq(5.36)$$
%
which could be written in the more precise complex variables notation,
(see comment after \equ(5.26)),
by replacing $\aa_0$ by $\zz_0$ in the argument of $\F$, writing $i z_{0j}
\dpr_{z_{0j}}$ for $\dpr_{\a_{0j}}$ and replacing the third of \equ(5.36)
by:
%
$$z'_j=z_{0j} \exp [i \dpr_{A'_j} \F (\AA', \zz_0, p',q_0)]\Eq(5.37)$$
%

To obtain a map $\tilde \CC$ from \equ(5.36), one has to use the
implicit functions theorem: in so doing the domain of definition of
$\tilde \CC$ has to be taken somewhat smaller than the domain,
\equ(5.32), of
definition of $\F$.  If we want $\tilde \CC (\AA', \aa', p',q',\m)$ to
be defined on the domain:
%
$$\tilde W\=W(\k_0e^{-2 \d_0},\tilde\r_0 e^{-\d_0},\xc_0e^{-2\d_0},\m_0)
\Eq(5.38)$$
%
(\ie ``just giving up regularity" by an extra $\d_0$) we must impose a
condition implying that it is $\tilde\r_0^{-1}\xc_0^{-1}
\d_0^{-2}||\F||\ll 1$, \ie :
%
$$x\=B_3 \xc_0^{-\b_3}
\d_0^{-\b_3}(\e_0C_0 \r_0^{-1})(E_0C_0)N_0^{\t+1} < 1\Eq(5.39)$$
%
with $B_3,\b_3 $ conveniently large.

This follows from a trivial implicit function theorem.  After a moment of
thought one realizes that such a condition implies at the same time the
injectivity of the map \equ(5.36), the non vanishing of its jacobian and it
also imposes that the image of the boundary of the domain $W(\k_0
e^{-\d_0}, \tilde\r_0,\x_0 e^{-\d_0})$ where $||\F||$ is defined stays well
away from the boundary of $\tilde W$: in appendix A4
we have called such an argument an {\it
image of the boundary} lemma, (see, for instance, [G], \S 5.11).  Here
\equ(5.18), \ie $\r_0<\k_0^2$, has been used to eliminate $\k_0$ from the
condition.

The map $\tilde\CC$: $(\AA',\aa',p',q',\m)\in \tilde W
\to (\AA_0,\aa_0,p_0,q_0)$ will take the form:
%
$$\eqalign{
\AA_0=&\AA'+\V\X_0(\AA',\aa',p',q')\qquad\kern1.1truecm
p_0=p'+\L_0(\AA',\aa',p',q')\cr
\aa_0=&\aa'+\V\D_0(\AA',\aa',p',q')\qquad\qquad\kern0.2truecm
q_0=q'+\Th_0(\AA',\aa',p',q')\cr}\Eq(5.40)$$
%
and in the domain $\tilde W$ the bounds:
%
$$\eqalign{
||\V\X_0||<&x\tilde\r_0\d_0,\kern0.3truecm\qquad\quad
||\L_0||<x\k_0\d_0\cr
||\V\D_0||<&x\d_0\kern0.60truecm
\quad\qquad||\Th_0||<x\k_0\d_0\cr}\Eq(5.41)$$
%
are valid, with $x$ defined by \equ(5.39), and $\r_0<\k^2_0$ has been
again used.

The map $\tilde\CC$ will transform the Hamiltonian \equ(5.2) into:
%
$$h_1(\AA',p'q',\m) +f_1(\AA',\aa',p',q',\m)\Eq(5.42)$$
%
where:
%
$$\eqalign{
h_1&=h_0(\AA',p'q',\m)+\overline{f_0}^D (\AA',p'q',\m)\cr
\overline{f_0}^D(\AA', p'q',\m)&\=\sum_{k=0}^{\i} f_{0\V0,kk}(\AA',\m)
(p'q')^k\=\int f_0^D (\AA',\aa',p'q',\m){d\aa'\over (2 \pi )^{l-1}}\cr}
\Eq(5.43)$$
%
The functions $h_1, f_1$ are easily controlled (by ``just
giving up a bit $\d_0$ of regularity'' in each variable) in:
%
$$\lis W=W(\k_0 e^{-3\d_0}, \tilde\r_0
e^{-2 \d_0},\xc_0e^{-3\d_0},\m_0)\Eq(5.44)$$
%
by using dimensional estimates, from \equ(5.40),\equ(5.41) and
along a well known elementary scheme, see [G] \S 5.12; the result is:
%
$$\eqalign{
||\dpr h_1||_{\lis W}&\leq E_0(1+B_4\d_0^{-\b_4}\,(\e_0\r_0^{-1}E_0^{-1}))\cr
||f_1||_{\lis W}&\leq B_4\xc^{\b_4}\d_0^{-\b_4}\e_0(\e_0 E_0^{-1}\r_0^{-1})
(E_0C_0)^2 N_0^{\t+1}\cr}\Eq(5.45)$$

The next step is to study the equations for $\V a\in C^{l-1}$ given by:
%
$$\dpr_{\AA'} h_1 (\AA^0+\V a,J,\m)=\oo_{su},\Eq(5.46)$$
%
with $(s,u)\in\tilde\II_0$, $\AA^0=\AA^0(s,u,J,\m)$,
see \equ(5.20).

By Taylor expansion this can be written, setting
$M_0=\dpr_{\AA\AA}h_0(\AA^0,J,\m)$, as:
%
$$\V a+M_0^{-1}\V n(\V a)=\V0\Eq(5.47)$$
%
and $M_0^{-1}\V n(\V a)\=-\V m(\V a)$ can be bounded by:
%
$$|M_0^{-1}\V n(\V a)|
\le4l^2\h_0(\e_0\r_0^{-1}+E_0\r^2\r_0^{-2})\ ,
\qquad {\rm if} \ \ |\V a|<\r<\tilde\r_0/2
\Eq(5.48)$$
%

The \equ(5.47) can be studied by applying the implicit function
theorem. The usual argument about the ``image of the boundary'' implies the
existence of a unique solution to the equation $\V a=\V m(\V a)$ with $|\V
a|<\r$ if $b||\V m||_\r$ $\r^{-1}<1$ for a suitably large $b$: for instance
[G, proposition 19, p.490]  shows that $b=2^8$ is sufficient (but
$b=2$ would also be sufficient).

Therefore we take:
%
$$\r\={\rt_0(\e_0E_0\r_0^{-1})^{\ch}\d_0\over4l^2b(\h_0E_0\r_0^{-1})}
<{\rt_0\d_0(\e_0E_0\r_0^{-1})^\ch\over 4}\Eq(5.49)$$
%}
where $\ch\in(0,1)$ is a free parameter that we eventually fix close
to $0$ (\eg $1/4$) and $b$ is as above (\eg $b=2$).

We deduce that a sufficient condition for
the existence of a solution to \equ(5.47) with $|\V a|<\r$ is:
%
$$\eqalign{
1>&4l^2\ b\ \h_0(\e_0\r_0^{-1}+E_0\r^2\r_0^{-2})\r^{-1}\=
4l^2b(\h_0E_0\r_0^{-1})\Bigl(\e_0E_0^{-1}\r_0^{-1}
{\r_0\over\r}+{\r\over\r_0}\Bigr)\Leftarrow\cr
&\Leftarrow
4l^2b(\e_0E_0^{-1}\r_0^{-1})^{1-\ch}{4l^3b(\h_0E_0\r_0^{-1})^2\over\d_0}
{\r_0\over\rt_0}+{1\over2}<1\cr}\Eq(5.50)$$
%
and the latter condition can be imposed by requiring:
%
$$B_5\xc_0^{-\b_5}\d_0^{-\b_5}(\e_0E_0^{-1}\r_0^{-1})^{1-\ch}
(\h_0E_0\r_0^{-1})^2(E_0C_0)
\bigl[-\log(\e_0E_0^{-1}\r_0^{-1})\bigr]^{(\t+1)}<1\Eq(5.51)$$
%
for suitably large $B_5,\b_5$.

Setting $\AA^1(s,u,J,\m)=\AA^0(s,u,J,\m)+\V a$ we get
(see \equ(5.48)):
%
$$|\AA^1(s,u,J,\m)-\AA^0(s,u,J,\m)|<\r<{1\over4}\tilde\r_0\d_0
(\e_0E_0^{-1}\r_0^{-1})^\ch\Eq(5.52)$$
%
The free constant $\ch$ could in fact be taken zero, at the price of
having no $\e_0$ dependence in the r.h.s. of \equ(5.52): a property that
we do not want in later estimates.

Therefore \equ(5.52) insures also that $(\AA,\aa,p,q,\m)
\equiv(\AA^1,\aa,0,0,0)$ lies very well inside the domain, $\overline W$, of
definition of $h_1+f_1$, (\ie of $f_1$).

Choosing suitably $B_6$ and $\b_6$
one easily checks that the two conditions:
%
$$\eqalign{
&B_6\xc_0^{-\b_6}\d_0^{-\b_6}(\e_0E_0^{-1}\r_0^{-1})^{1-\ch}
(\h_0E_0\r_0^{-1})^2(C_0E_0)
[\log(E_0\r_0\e_0^{-1})]^{\t+1}<1\cr
&\l_0\G_0E_0<4^{-1}\cr}\Eq(5.53)$$
%
imply all the conditions imposed so far (\ie imposed in
\equ(5.51),\equ(5.33),\equ(5.39)).

With the above defined $(s,u)\to\AA^1(s,u,J,\m)$
we can define, via \equ(5.19) with $0\to1$, the set:
%
$$W_1\=W(\k_1,\r_1,\x_1,\m_1)\Eq(5.54)$$
%
where:
%
$$\eqalign{
\k_1\=&\k_0e^{-4\d_0},\quad \x_1\=\xc_0e^{-4\d_0},\quad \m_1\=\m_0,\cr
\r_1\=&\r_0\bigl(B_7\xc_0^{-\b_7}\d_0^{-\b_7}(E_0C_0)(\log E_0\r_0/\e_0)^{\t+1}
\bigr)^{-1}\cr
C_1\=&C_0,\quad\G_1\=\G_0\cr}\Eq(5.55)$$
%
Note that for $B_7,\b_7$ large enough it follows that
$\r_1<\tilde\r_0/2$ so that the domain $W_1$ is strictly contained in
the domain $\overline W$, see \equ(5.44), of definition of $h_1,f_1$
and the above definitions, via dimensional estimates, allows to control
{\it all the derivatives} of $h_1$ and $f_1$ in $W_1$.

The new parameters measuring the size of $h_1,f_1$
(cfr. \equ(5.23), \equ(5.16)) can be taken, by
\equ(5.45), to be any parameters $E_1,\e_1,\h_1,\l_1$ verifying the following
inequalities:
%
$$\eqalign{
&E_1\ge E_0(1+B_8
\d_0^{-\b_8} (\e_0\r_0^{-1}E_0^{-1}))\cr
&\e_1\r_1^{-1}E_1^{-1}\ge B_8\xc_0^{-\b_8}\d_0^{-\b_8}
(\e_0\r_0^{-1}E_0^{-1})^2(E_0C_0)^3(-\log \e_0E_0^{-1}\r_0^{-1})^{2(\t+1)}\cr
&\h_1\ge \h_0\bigl(1+B_8
\d_0^{-\b_8} (\h_0\r_0^{-1}E_0)(\e_0
E_0^{-1}\r_0^{-1})\bigr)\cr
&\l_1\ge \l_0+B_8(E_0\G_0)(\e_0\r_0^{-1}E_0^{-1})\cr}\Eq(5.56)$$
%
provided the conditions in \equ(5.53) hold.

Following the familiar pattern of KAM theory we are now going to
iterate the above scheme, \ie
{\it we shall label by indices $j=0,1,2...$ the
Hamiltonians $h_j+f_j$ together with their size parameters ($\e_j,
E_j,...$) obtained by sequentially applying the above scheme.}
%
This procedure makes sense {\it provided} the analogous of condition
\equ(5.53) are satisfied at each step of the construction.

We claim that one can find $B,\b$ depending only on $l$,
$\t$ and large enough so that:
%
$$B\xc_0^{-\b}(\e_0 \r_0^{-1}E_0^{-1})(E_0C_0)^6
(\h_0 E_0 \r_0^{-1})^3<1\Eq(5.57)$$
%
implies that the above scheme can be carried out an infinite number of
times.

To prove the claim we proceed by induction and to simplify the
discussion we introduce the following {\it dimensionless} parameters:
%
$$\bar\e_j=\e_j\r_j^{-1}E_j^{-1},\quad \bar E_j=E_jC_j,\quad
\bar\h_j=\h_jE_j\r_j^{-1}\Eq(5.58)$$
%
and a number $1^-$ which is any prefixed number less than $1$. Given
$1^-$ we fix the so far free $\ch$ so that $\ch>1-1^-\=0^+$ (one could
already say that $\ch$ is any prefixed number close to $0$ (\eg $1/4$)
and $1^-$ is a free parameter  to be eventually fixed slightly above
$1/2$: but we prefer to keep the parameters free as the inequalities
look probably more transparent in this way).

Furthermore we impose the following conditions which permit simple
bounds on the r.h.s.  of \equ(5.56) and \equ(5.53):
%
$$\eqalign{
&B_8\d_j^{-\b_8} \bar\e_j^{1^-}<1,\qquad
B_8\x_j^{-\b_8}\d_j^{-\b_8}
\bar\e_j^{1^-}(\log\bar\e_j^{-1})^{2(\t+1)})\bar E_j^3<1,\quad
B_8\d_j^{-\b_8}\bar\e_j^{1^-}\bar\h_j<1\cr
&B_6\x_j^{-\b_6}\d_j^{-\b_6}{\bar\e_j}^{1^-}\bar\h_j^2\bar
E_j(-\log\bar\e_j)^{\t+1}<1\cr}\Eq(5.59)$$
%
and in terms of this definition we fix the definition of the parameters
verifying the analogous of \equ(5.56) for general $j$ as follows:
%
$$\eqalign{
\bar E_{j+1}=&\bar E_j(1+\bar\e_j^{0^+}),\qquad\bar\e_{j+1}=\bar\e_j^{1^+},
\qquad\kern1.truecm\x_{j+1}=e^{-4\d_j}\x_j\cr
\h_{j+1}=&\h_j(1+\bar\e_j^{0^+}),\qquad
\l_{j+1}=\l_j+\bar\e_j^{0^+},\qquad \r_{j+1}={\r_j(\x_j\d_j)^{\b_7}\over
B_7\bar E_j\log\bar\e_j^{-1}}\cr
C_{j+1}=&C_j,\kern1.7truecm\qquad\G_{j+1}=\G_j\cr}\Eq(5.60)$$
%
where $1^+=2-1^-,\,0^+=1-1^-$ (and for $j=0$, $\x_{j=0}=$  $\hat \x_0$).

Hence if $\bar\e_0$ is small enough (depending on the value chosen for
$1^-$) we see that $\forall j$:
%
$$E_j\le\sqrt2E_0,\qquad\h_j\leq\sqrt2\h_0,\qquad\l_j<2\l_0,\qquad
\k_j\geq\k_0/2,\qquad\x_j\ge\xc_0/2 \Eq(5.61)$$
%
so that, if $\bar\e_0$ is small enough compared to $1$ (depending on the
choice of the number denoted $1^-$) and if $\l_0E_0\G_0$ is small enough
(\ie $<8^{-1}$), it will be $\l_jE_j\G_j<4^{-1}$ and:
%
$$\eqalign{
\r_{j+1}\ge&\r_j{\xc_0^{\b_7}\over2 B_7(E_0C_0)}{2^4(\log2)^{-1}\over
(1+j^2)^{\b_7}}
{1\over(1^+)^j\log\bar\e_0^{-1}}\ge\cr
\ge&\r_0\bigl(B'_7\xc_0^{-\b_7}(E_0C_0)\log\bar\e_0^{-1}\bigr)^{-j-1}
(1+j)^2!^{-\b_7}{(1^+)^{-j(j+1)/2}}\=\s_j^{-1}\cr\cr
\bar\h_j\le&4\bar\h_0\s_j\cr}\Eq(5.62)$$
%
for some $B'_7$.

Thus we see, by taking into account the rapidity of convergence to zero of
$\bar\e_j$ and if $\b_-,B_-$ are suitably large, that the conditions in
\equ(5.59) are equivalent to:
%
$$\eqalign{
&B_-\bar\e_0^{1^-}<1,\qquad
B_-\xc_0^{-\b_-}\bar\e_0^{1^{--}}\bar E_0^3<1\qquad\cr
&B_-\xc_0^{-\b_-}(\bar\e_0)^{(1+)^j\cdot1^{--}}\bar E_0^{j}\s_j^2
\bar\h_0^2\bar E_0<1,\qquad j\ge0\cr}\Eq(5.63)$$
%
if $1^{--}$ is defined to be slightly smaller (by any prefixed amount)
than the value fixed for $1^-$ appearing in
\equ(5.59).

Choosing $1^-,1^{--}$ slightly larger than $1/2$, and taking into account
the expression in \equ(5.62) for $\s_j$, it follows that all conditions are
implied by the following:
%
$$B_9\xc_0^{-\b_9}\bar\e_0\bar E_0^{6}\bar\h_0^{3}<1\Eq(5.64)$$
%
where $B_9,\b_9$ are constants depending only on $l$.

The above discussion contains some ``hidden'' assumptions on the initial
data, namely \equ(5.20),\equ(5.9), and $8\l_0E_0\G_0<1$. They are verified
automatically if the parameters $\rb_0,\kb_0,\mb_0$ are chosen as
prescribed by \equ(5.18),\equ(5.20).

Hence we can say that \equ(5.64) together with \equ(5.9),\equ(5.20)
and $4\l_0E_0\G_0<1$ are implied by:
%
$$B_{9}\xc_0^{-\b_9}\bar\e_0\bar
E_0^{6}\bar\h_0^{3}<1\Eq(5.65)$$
%
possibly readjusting $B_{9},\b_9$ (recall, as well, that $\G_0<C_0$).
As we shall see below this is the final
condition under which lemma 1' holds.

This completes our check of the claim in \equ(5.57).

Thus we can construct, for all $j \geq 0$, canonical transformations
$\tilde \CC_j$ ($\tilde \CC_0=\tilde \CC$) mapping
$W_{j+1}$ into $W_j$ ($W_j$ is defined in \equ(5.54) with $1\to j$;
recall that $\m_{j}\equiv\m_0$).
Such maps are close to the
identity within $\| \F_j \| \r_{j+1}^{-1}$ in the $\AA$--variables and
within $\| \F_j \|\k_{j+1}^{-1}\d_{j+1}^{-1}$ in the $p,q$--variables and
within $||\F_j||\x_{j+1}^{-1}\d_{j+1}^{-1}$ in the $\aa$--variables.

Their derivatives of order $k$ in $\AA$'s, $h$ in the $\aa$'s, $z$
in the $p,q$ are bounded by multiplying the above bounds
by $\r_{j+1}^{-k}\k_{j+1}^{-z}\d_{j+1}^{-h-z}$.
Since $\| \F_j \| \leq B_2
\hat \x_j^{-\b_2}\d_j^{-\b_2} \e_jC_0$, see \equ(5.35),
\equ(5.60), we realize that the map $\tilde \CC_j$ approaches the
identity very quickly.

Taking into account the \equ(5.52) it also follows that
the sheets $\bar\LL^j$ defined by $(s,u)\in \tilde\II_0
\to\AA^j(s,u,0,\m)$ approach a limit sheet:
%
$$\bar\LL^\i \ {\rm  defined\  by\ }
\quad (s,u)\in\tilde\II_0\to\AA^\i(s,u,0,\m)\ , \quad
\AA^\io(s,u,0,0)\=\AA_{su}\Eq(5.66)$$
%
and control is kept on any prefixed number of derivatives of
$\bar\LL^\i$: here we have used that, see \equ(5.52), $\ch>0$.

Furthermore the domains of holomorphy of the maps $\tilde \CC_j$, hence of
$\ct_0 \ct_1 ... \ct_j=\ct^{(j)}$ do not shrink to zero in the $\aa,p,q,\m$
variables.

If we call $\tilde \F_j (\AA',\aa,p',q,\m)$ the generating function of the
composite map $\ct^{(j)}$, the above remarks imply that
$\tilde \F_j$  can be extended
to a $C^n$ function defined in the vicinity of the sets
$W_j$: the extension, which we still denote $\tilde \F_j$,
can be made in class $C^n$ for any $n$ so
that $\tilde \F_j$ converges in the $C^n$-norm to a limit $\tilde \F_\i$
(simply because the variations of the $\tilde \F_j$'s are basically bounded
as the $\F_j$, \ie by $\tilde B_2\xc_0^{-\b_2}\bar\e_0^{(3/2)^j}\bar
E_0\r_j$ see \equ(5.35),
\equ(5.58), \equ(5.61),\equ(5.62), in their analyticity domain;
hence they have their derivatives very small and therefore can be extended
remaining small), see [La], [Sv], [CG] and [P\"o] for similar constructions.

The limit $\tilde \F_\i$ will be uniquely
defined on $\bar\LL^\i \times {T}^{l-1} \times
\{ |p'| < \k_\i,\ |q|<\k_\i\}$, with (cfr. 2) of lemma 1):
%
$$\k_\io={\k_0\over 2}\= \bar \k \Eq(5.67)$$
%
and it will be real-analytic in the $\aa_0,p',q_0$ variables,
$C^n$--smooth in $\AA',\aa_0,p',q,\m$
prefixed number $n$ of derivatives if $\e_0$ is small enough.

Therefore $\tilde \F_\i$ generates a canonical map,
$\tilde \CC_\io$, which for
$\AA' \in\bar\LL^\i$ takes the form (cfr. \equ(5.5)):
%
$$\eqalign{\AA_0=&\AA'+\V\X'(\AA',\V\ps,p,q,\m)
\quad\kern2.2truecm\aa_0=\V\ps+\V\D'(\AA',\V\ps,p,q,\m)\cr
p_0=&p+\L'(\AA',\V\ps,p,q,\m)\kern2.5truecm\quad q_0=
q+\Th'(\AA',\V\ps,p,q,\m)\cr}\Eq(5.68)$$
%
and for  $\AA'=\AA^\io(s,u,pq,\m)$
the solutions of the motion equations take the form
\equ(5.11)
with $\oo_{su},g_{su}$ defined in \equ(5.7), \equ(5.9) and with
$\g'\=u_\i'(s,u,J,\m)$ defined by (cfr. \equ(5.16)):
%
$$\dpr_J h_\i(\AA^\i(s,u,J,\m),
J,\m)=(1+u_\i')g_{su}
\=(1+\g') g_{su}\Eq(5.69)$$
%
where $h_\i$ $\=\lim_{j\to \io} h_j$. Note that if we denote
by $H_\io(\AA',\V
\ps,p,q,\m)$ the original hamiltonian \equ(5.2) computed
in the new variables defined by $\tilde \F_\io$, it is:
%
$$h_\io(\AA^\io(s,u,pq,\m),pq,\m)=H_\io(\AA^\io(s,u,pq,\m),\V \ps,pq,\m)
\Eq(5.70)$$
%
Furthermore $u'_\io(s,u,pq,\m)$ is analytic in
$p,q,\m$, if $(s,u)$ are fixed in $\tilde\II_0$.
The parametric equations of the whiskers \equ(5.10) are now immediately
obtained in terms of \equ(5.68) and of the transformation \equ(5.3) of
lemma 0. Setting:
%
$$\eqalign{
& z\=(\AA^\io(s,u,pq,\m),\V \ps ,p,q,\m)\quad
\hat z \= (\AA^\io(s,u,pq,\m),p,q,\m) \cr
& \hat \z \= \hat z + \Big( \V \X'(z), \L'(z), \Th'(z),0\Big)\cr}
\Eq(5.71)$$
%
we find (cfr. \equ(5.10)):
%
$$\eqalign{
& \AA_{su}(J,\m)\= \AA^\io(s,u,J,\m)\ , \qquad (\Rightarrow
\ \AA_{su}(0,0)\=\AA_{su} )\cr
& \V \X(\V \ps,p,q,s,u,\m)\= \V \X'(z)\ , \quad
\V \D(\V \ps,p,q,s,u,\m)\= \V \D'(z) + \V \d(\hat \z) - \V \d(\hat z)\cr
& \L(\V \ps,p,q,s,u,\m)\= R(\hat \z)-R(\hat z)\ ,\quad
\Th(\V \ps,p,q,s,u,\m)\= S(\hat \z)-S(\hat z)\cr}
\Eq(5.72)$$
%
The linearity of the flow on the surfaces \equ(5.10)
follows because $f_j$ tends to zero very fast with all its derivatives,
including the $\AA$ derivatives in spite of the fact that the
$\AA$-domain shrinks: in fact the derivatives are bounded, for real
$\AA,\aa,p,q$, by $\e_j$ times some inverse power of $\r_j$ and $\e_j
\r_j^{-k} \rightarrow 0$ for all $k \geq 0$, by the inequality
\equ(5.62).

Note that the $H_\i$, by our construction, has derivatives with respect
to $\V\ps$ vanishing if $\AA'=\AA^\i(s,u,pq,\m)$,
$(s,u)\in\tilde\II_0$, see \equ(5.70);
points it depends non trivially on $\V\ps$.  Hence for $p=q=0,
\AA'=\AA^\io(s,u,0,\m)$ and $(s,u)\in\tilde\II_0$ the \equ(5.68)
describe invariant tori $\TT_\m(s,u)$, and their whiskers are obtained
by considering $p=0$, $q\ne 0$ or $q=0$ and $p\ne 0$.

We express \equ(5.65) in terms of the more fundamental parameters
$\kb_0,\rb_0,\xb_0$ of lemma 0; see \equ(5.18), \equ(5.25), \equ(5.58).
If we assume for simplicity that for a suitable constant $\bar B_0\ge 1$
one has:
%
$$\bar B_0 \rb_0 \le \kb_0^2\Eq(5.73)$$
%
so that (see \equ(5.18)):
%
$$\r_0={\rb_0\over\bar B (E_0\h_0\rb_0^{-1})(E_0\G_0)^2}\Eq(5.74)$$
%
then \equ(5.65) becomes:
%
$$B_{10}(\e_0E_0^{-1}\rb_0^{-1})(E_0C_0)^6(E_0\G_0)^8
(E_0\h_0\rb_0^{-1})^7\xc^{-\b_{10}}<1\Eq(5.75)$$
%
Finally, we see that, in the case of interest to us,
{\it $\e_0$ is of the form
$\m\widetilde \e_0$ for some $\widetilde \e_0$,}
so that the condition of
$\m$ small takes the form (see \equ(5.75)):
%
$$|\m|< \m_0\=
\big[ B_{11}\xc_0^{-\b_{11}}(\widetilde\e_0\rb_0^{-1} E_0^{-1})
(E_0C_0)^{6}(E_0\G_0)^8(\h_0E_0\rb_0^{-1})^{7}\big]^{-1}\Eq(5.76)$$
%
provided $2\G_0<C_0$ and for suitable constants $B_{11},\b_{11}$.

This is still not completely explicit as the values of
$\rb_0,\kb_0,\xb_0$ are not the analyticity parameters of the original
hamiltonian.  In fact they can be deduced from the latter via the
application of lemma 0.

Lemma 0 allows us to take (see appendix A3, \equ(A3.39)
and \equ(5.73)):
%
$$\eqalign{& \kb_0\ ={1\over2}
{\k \over B m^7}  \min \Big\{ {\r'\over \k^2}\ ,\
\x'\ ,\ {1\over (\EE \G \k^{-2}) \s_2^2\s_3}\ ,\ {\r\x\over \r'\hat \s}
\Big\} \cr& \rb_0 \= \min\{ {\r\over 2}\ , \ {\kb_0^2\over \bar B_0} \}\ ,
\qquad \xb_0\= {\x\over 2}\cr}
\Eq(5.77)$$
%
if $\r',\x',\r,\x$ are the original hamiltonian regularity parameters
(see \S 2), and $m,\G,{\cal E},B,\k,$ $\s_2,$ $\s_3,\hat \s$
are introduced in appendix A3,
see \equ(A3.3),\equ(A3.47), \equ(A3.49) and \equ(A3.53),\equ(A3.39).

We can also deduce, from the analyticity in $\m$, a simple bound on the
size of the variation $|\AA^\io(s,u,J,\m)-\AA^0(s,u,J,\m)|$ and of the
variation of the {\it whisker graphs}, \ie of the functions in the r.h.s.
of \equ(5.68) and, by dimensional estimates, consequent bounds on their
derivatives.  We see from the above analysis that the bounds
\equ(5.41),\equ(5.52) must hold, with different constants replacing
$\b_3,B_3$ for the corresponding functions in \equ(5.68).  Hence for
suitable constants $G_A,G_\CC$:
%
$$\eqalign{
&\bar \r_0^{-1}(||\AA^\i(s,u,\cdot)-\AA^0(s,u,\cdot)||)\le
G_A|\m|\m_0^{-1}\cr
&\bar \r_0^{-1}||\V\X'||
+\bar \x_0^{-1}||\V\D'||+{\r'}^{-1}||\L'||+{\x'}^{-1}||\Th'||<G_\CC
|\m|\m_0^{-1}\cr}\Eq(5.78)$$
%
where the norms are evaluated by fixing $(s,u)\in\bar\LL$;
here we have just bounded the
value at $z$ of a function holomorphic in a disk of
radius $z_0$ and vanishing at the center $z=0$ by its supremum times
$|z|/z_0$: we take $z=\m$ and use the holomorphy in $\m$.

And, using \equ(5.52) and \equ(5.35),\equ(5.41)
the constants $G_A,G_\CC$ can be easily expressed in terms of our
dimensionless constants:
%
$$\eqalign{
G_A=&B_{12}\bigl[
(\widetilde\e_0E_0^{-1}\rb_0^{-1})(\h_0E_0\rb_0^{-1})\bigr]^\ch\cr
G_\CC=&B_{12}(E_0\G_0)\xc_0^{-\b_{12}}\left[
(\widetilde\e_0E_0^{-1}\rb_0^{-1})
(E_0C_0)(\h_0E_0\rb_0^{-1})\right]^>\cr}\Eq(5.79)$$
%
for suitably chosen constants $B_{12},\b_{12}$ and having denoted $[x]^>$
the function $x\log x^{-1}$ for $x>1$.

The function $\g'$ in \equ(5.11) is the value $u'_\io$ in \equ(5.69):
it is analytic in $|J|<\k_0^2/2$ (see \equ(5.61)) and $|\m|<|\m_0|$
and it is bounded there, for all $s,u\in \tilde \II_0$
(see \equ(5.20), \equ(5.17), \equ(5.61)), by:
%
$$
|\g'(J,s,u,\m)|\le 2 \hat B (\G_0 E_0) (E_0\h_0 \bar \r_0^{-1})
\Big( {|J|\over \k_0^2} + {|\m|\over \m_0} \Big)
\Eq(5.80)$$
%
To check \equ(5.4) we simply use that the above proof has a free parameter
$C_0$.  The set $\Si(C_0)$, see \equ(3.1), has measure at least
$(s_2-s_1)[1- (\bar K/(DC_0)^{1/t})]$ by the assumption that $\LL$ is a
diffusion path, see \equ(3.2).  Therefore we choose, taking into account
that the constant $C_0$ appears to the power $6$ in the basic condition
\equ(5.75):
%
$$C_0\={\G_0|\m|^{-1/7}}\qquad \Rightarrow\ \Si_\m=\Si(C_0)\Eq(5.81)$$
%
Then we see that the constants
$k,K,\bar c$ and $c$ of lemma 1' can be taken:
%
$$ k=\G_0, \quad K={\bar K(E_0\G_0)^{-1/t}}
\ ,\quad \bar c\= 7\ ,\quad c\= 7\ t \Eq(5.82)$$
%
(where we have replaced $D$ by $E_0$: see 2) \S 3)
and, what is more important, the smallness condition on $\m$ can still
be met.

{\it This finishes the proof of lemma 1'}. Note that the smallness
condition on $|\m|$
(\ie \equ(5.75) with $C_0\=\G_0 |\m|^{-1/7}$) does not involve
$\th_0$ (defined in \equ(5.23)): such a quantity will appear in fixing
the energy in order to get lemma 1 as a  corollary of lemma 1'.

We now let $p,q$ be such that $|p|,|q|<\k_0/2$, $J\=pq$,
$\V \ps = \V 0$, $\AA^\io\=\AA^\io(s,u,J,\m)$ and, fixing $s\in \II_0$,
we try to find $u\=u(s,J,\m)$ so that the real part of the energy
$E(s,u,J,\m)$ associated to the initial data $(\AA^\io,\V 0,p,q,\m)$
coincide with the {\it prefixed value} $E\=h_0(\AA_s,0,0)$ (see 1) of \S
3). In view of the above construction, the energy $E(s,u,J,\m)$ is given by
(compare with \equ(5.69), \equ(5.70)):
%
$$
E(s,u,J,\m)= h_\io(\AA^\io(s,u,J,\m),J,\m)
\Eq(5.83)$$
%
and by Taylor expansion at $\m=0$, $u=0$ (see \equ(5.7),\equ(5.8),
\equ(5.66)):
%
$$\eqalign{
{\rm Re}\,E(s,u,J,\m)&=E+u
\Big(\dpr_\AA h_0(\AA_{s},0,0)\cdot[\dpr_u\AA_{su}]_{\m=0}\Big)
+\b \tilde G  \Big( {u^2\over \bar u^2}+{|\m|\over \m_0}+
{|J|\over \k_0^2}\Big)=\cr
&=E+u\,[\oo_{s}\cdot(\dpr^2_{\AA}h_0)^{-1}\oo_{s}]
+\b \tilde G  \Big( {u^2\over \bar u^2}+{|\m|\over \m_0}+
{|J|\over \k_0^2}\Big)\cr}
\Eq(5.84)$$
%
where the derivative $\dpr_u\AA_{su}$ is computed by differentiating
\equ(5.8) and $\dpr^2_\AA h_0\=\dpr^2_\AA h_0(\AA_s,0,0)$; $\b$ is
some $C^\i$ function (at $s$ fixed) with $|\b|\le1$ and the constant
$\tilde G$ can be taken to be proportional, via a constant depending
only on $l$, to $E_0 (E_0\h_0 \rb_0^{-1})^2$ (recall that the constant
$\bar u$, see \equ(5.9), can be taken to be a numerical constant
times $(\h_0E_0\bar \r_0^{-1})^{-2}$, see \equ(5.18)).

The first two derivatives of $\b$ with respect to $u,\m,J$
can be bounded by our dimensionless constants. Hence, recalling the
definition in \equ(5.23) of $\th_0$, we see, by the implicit function
theorem, that under the further condition:
%
$$B_{13}(|\m|\m_0^{-1})(\th_0 E_0\bar \r_0)(\h_0E_0\bar \r_0^{-1})^2
\equiv B_{13}(|\m|\m_0^{-1})(\th_0 E_0^2\h_0)(E_0\h_0\bar
\r_0^{-1})<1\Eq(5.85)$$
%
we can find $u=u(s,J,\m)$ as desired \ie so that (see \equ(5.83)):
%
$$
E(s,u(s,J,\m),J,\m)\= E\Eq(5.86)$$
%
Therefore condition \equ(5.85) together with \equ(5.75) with
$C_0\=\G_0 |\m|^{-1/7}$ are sufficient to yield lemma 1' {\it and} lemma 1.

The functions $\g',\V \X, \V \D, \L, \Th$ of lemma 1 are obviously related
to the corresponding (but different) functions of lemma 1': just set
$u\=u(s,J,\m)$ in \equ(5.72), \equ(5.71) and in the definition of $\g'$
(cfr. \equ(5.69)); \eg:
%
$$
\g'(J,s,\m)\=\g'(J,s,u(J,s,\m),\m)\ ,\quad
\V \X(\V \ps, p,q,s,\m)\=
\V \X(\V \ps, p,q,s,u(J,s,\m),\m)\ , \etc
\Eq(5.87)$$
%
and the function $\g(J,s,\m)$ is just $\g\=u(J,s,\m)$.
The functions $\g,\g'$ are easily seen to satisfy the bound (cfr.
\equ(5.69),\equ(5.17),\equ(5.18)):
%
$$
\sup_{ {s\in \II_0, \m\in[-\m_0.\m_0]}\atop {|J|<\k_0^2/2}}
|\g|, |\g'| \le \bar u \= \bar B^2 (E_0 \h_0 \bar \r_0^{-1})^2
\Eq(5.88)$$
%
Bounds on $|\V \X|, |\V \D|, |\L|, |\Th|$ are easily obtained by recalling
their definitions, \equ(5.71), \equ(5.72) (for the functions of lemma
1') and \equ(5.87) (for the functions of lemma 1),
the bounds \equ(5.78), \equ(5.79), and the bounds on $|R|,|S|,|\V \d|$
(see \equ(A3.54)):
%
$$
\bar \r_0^{-1}||\V\X||
+\bar \x_0^{-1}||\V\D||+{\r'}^{-1}||\L||+{\x'}^{-1}||\Th||<G_\CC
|\m|\m_0^{-1}
\Eq(5.89)$$
%
where $G_\CC$ ia as in \equ(5.79) (actually increased by a factor 2)
and the norms are taken at $s\in \II_0$ fixed for the functions of lemma 1,
or at $(s,u)\in \bar \LL$ fixed for the functions of lemma 1'.

Finally, we remark that all the requirements
(\equ(5.75),\equ(5.76),\equ(5.81),\equ(5.85)) we needed to prove lemma
1',1 can be enforced by requiring the single condition:
%
$$|\m|<\m^*\=
\left[\bigl[B \xc_0^{-\b}(\h_0 E_0 \rb_0^{-1})^{7}
(\G_0E_0)^{14}(\widetilde\e_0\rb_0^{-1}E_0^{-1})\bigr]^7(\th_0E_0^2\h_0)
\right]^{-1}\Eq(5.90)$$
%
where $B,\b>0$ are suitable constants depending only on $l$ and $\t$
(see \equ(5.23), \equ(5.25), lemma 0 and \equ(5.76) to refresh the memory
about the various parameters involved).

{\it This completes the proof of lemma 1' and lemma 1}.
In fact we have proved:
\vskip0.5truecm
\noindent{\it {\bf Lemma 2}: There exists a canonical map
$\CC(p',q',\AA',\aa')=
(I,\AA,\f,\aa)$ of class $C^n$ and a line
$\LL_\m:\,s\to\AA_\m(s)$, contained in the energy surface of energy $E$ for
the perturbed hamiltonian \equ(2.9), of class $C^n$ with the properties:
\item{1) }$\CC$ is $C^n$-close to the identity as $\m \rightarrow 0$,
$\LL_\m$ is $C^n$-close to $\LL$ as $\m \rightarrow 0$ and the domain of
$\CC$ is a set of the form $V \times { T}^{l-1} \times { S}^2$ where $V$ is
a neighborhood of $\LL$ containing $\LL_\m$ and ${ S}^2$ is a neighborhood
of the origin in $R^2$.
\item{2) } for $s$ in a set of measure $\ge (1-K|\m|^{1/c}), K,c>0$,
the set $\CC (\AA_\m(s) \times { T}^{l-1} \times { S}^2)$ is
invariant for the flow generated by the perturbed hamiltonian \equ(2.9);
\item{3) } the derivatives in $\AA'$ of the hamiltonian \equ(2.9)
regarded as a function of the new coordinates $(p',q',\AA',\aa')$
%
as well as those in $p',q'$ at constant $p'q'$ vanish on the above set, so that
the flow is linear in the $\V\psi
\in { T}^{l-1}$ variables and hyperbolic in the $p,q$ variables.
\item{4) } explicit bounds on the parametric equations of the invariant
tori, on their whiskers and on the main dimensionless parameters involved
in the construction are provided by the bounds found in the course of the
above proof.}
\vskip0.5truecm

This lemma is a quick if a little mysterious, way of summarizing the
analysis of this section.

Another important {\it corollary of the above lemmata is
that they can be shown to cover the case of a forced system}:
%
$$H\=\o B+H_0+\m f\=
\o B+ h(\tilde A,\m)+P(I,\tilde A,\f,\m)+\m
f(I,\tilde A,\f,\tilde\a,\f,\l,\m) \Eq(5.91)$$
%
where $B,\l$ are a pair of conjugate action angle variables and
$(\tilde A,\tilde a)$ $\in$ $R^{l-2}\times T^{l-2}$
are other action angle coordinates which will be supposed
anisochronous, \ie such that $||(\dpr^2_{\tilde A} h)^{-1}||=\h_0<+\io$;
to compare with the previous notations one should set
$\AA\=(B,\tilde A)$ and $\aa\=(\l,\tilde \a)$.

In this case the notion of diffusion path has to be suitably adapted. We
consider a curve in $\tilde A$ space, $\LL=\{s\to \tilde A_s\}$
and define $\Si(C)$ exactly as in \equ(3.1) with $\AA$ replaced by
$\tilde A$ and we say that $\LL$  is a diffusion path if
\equ(3.2) holds. In other words in forced systems {\it the action of the
``forcing reservoir" $ B$ does not enter into the definition}.

The following technique, invented by Poincar\'e, applies remarkably well to
this case, see [P] p. 118, tome I, ch. III. Note that, if
$H_0(I,\tilde A,\f)\=h(\tilde A,\m)+P(I,\tilde A,\f,\m)$
and $H$ is as in \equ(5.91), the hamiltonian:
%
$$\eqalign{
& H_2(\AA,\aa,\m)
\={H^2\over2 E}\=h_2+f_2\ ,\qquad \AA\=(B,\tilde A)\ ,\ \aa\=(\l,\tilde  \a)\cr
& h_2(I,B,\tilde A,\f)\={1\over2E}\bigl(
\o^2B^2+H_0^2+2 \o B H_0\bigr)\cr}\Eq(5.92)$$
%
where $E\ne0$ is fixed arbitrarily, is such that $h_2$ has the property
that  [see 1) of appendix A9]:
%
$$\det(\dpr_\AA^2h_2)\=
\det(\dpr_{B\tilde A}^2 h_2)={\o^2\over E} \Big({h_2\over E}\Big)^{l-2}
\det(\dpr^2_{\tilde A} h)\Eq(5.93)$$
%
where in \equ(5.93) we evaluate the derivatives at $I=0$, $\f=0$ (hence
$P=0$).  Thus $h_2$ is non degenerate and, furthermore, at $I=0$, $\f=0$:
%
$$\dpr_\AA h_2\=
(\dpr_B h_2,\dpr_{\tilde A} h_2)\=(\o,\tilde \o) \qquad{\rm if}\qquad
h_2|_{(I,\f)=(0,0)}\=\o B+h=E
\Eq(5.94)$$
%
and we see that the line $\LL_2$ obtained form $\LL$ by adding to each of its
points a coordinate $B_s$ computed from the equation
$\o B+h(\tilde A_s,0)=E$ is a
diffusion path for $h_2$ in the sense of \S2.

It is immediate to check that if $z(t)\=(\AA(t),\aa(t))$ is a motion for
\equ(5.92), then $\bar z(t)$ $\=$ $z({t/\s})$, with
$\s\=(H_2(\AA(0),\aa(0),\m)/E)$, is a motion for \equ(5.91).
We can thus construct, by using the above lemmata, whiskered tori
for $h_2+f_2$ (and hence for $H$).

For a proper usage of the bounds involved in the above lemmata,
one has to estimate the
basic dimensional quantities. Fixing the arbitrary parameter $E\=4 \bar
\r_0$ $\max\{||\dpr_{\tilde A} h||, |\o|\}$ we see that:
%
$$||{\o B + h(\tilde A)\over E}-1||\=
\sup_{{|B-B_s|\le \rb_0}\atop ||\tilde A-\tilde A_s||\le \rb_0}
|{\o B + h(\tilde A)\over E}-1|\le {1\over 2}\ ,\quad B_s\={E-h(\tilde A_s)
\over \o}\Eq(5.95)$$
%
and this allows to bound the norms,
$||\dpr h_2||$, $||(\dpr_J h_2)^{-1}||$, $||f_2||$,
associated to $H_2$ in \equ(5.92)
{\it in terms of constant times the corresponding quantities
for \equ(5.91)}, while we can take $E_0\=\tilde \b (||\dpr_{\tilde A}
h||+|\o|)$ and
$\th_0\=\tilde \b(E_0\rb_0)^{-1}$ [see also 1) of appendix A9]
for a suitable constant $\tilde \b$;
of course the norms referring to \equ(5.92) are taken over the action
domain $|B-B_s|\le \rb_0$, $||\tilde A-\tilde A_s||\le \rb_0$ (see
\equ(5.95)).
Thus we see that the statements of lemma 1' and lemma 1
just carry over to the present case under a condition like \equ(5.90)
with:
%
$$\m^*=\left[\bigl[B \xc_0^{-\b}(\h_0 E_0 \r_0^{-1})^{7}
(\G_0E_0)^{14}(\widetilde\e_0\r_0^{-1}E_0^{-1})\bigr]^7(\h_0E_0\r^{-1}_0)
\right]^{-1}\Eq(5.96)$$
%
and with the same quantitative bounds established in the proofs, provided
we interpret the notion of diffusion path in the way described above.

Finally, we remark that the whiskered tori that we obtain for \equ(5.91)
via lemma 1' applied to \equ(5.92) and via the rescaling described after
\equ(5.94), have, {\it for all} $s\in \Si_\m$ {\it and} $u\in [-\bar u,\bar u]$,
the $\l$--frequency equal to $\o$ (as it should  as the clock velocity
$\o$ cannot change, just because it is a clock).
%
\vglue2.truecm

\penalty-200

{\bf\S6 Large whiskers. Homoclinic points and angles}

\penalty10000

\vskip0.5truecm\numsec=6\numfor=1

\penalty10000

In \S5 we have constructed invariant tori surviving the onset of a
perturbation as well as the parts of their whiskers in their immediate
vicinity. We now derive the equations of the whiskers away from the
invariant tori with the purpose of finding whether they contain homoclinic
intersections.

The whiskers can be continued to form a full invariant manifold by evolving
them with the solution map $(I,\AA,\f,\aa)\to S_t^\m(I,\AA,\f,\aa)$
associated with the perturbed Hamilton equations generated by   \equ(2.9).
We regard the map
$S^\m_t$ as defined in the original coordinates, which are {\it globally}
describing our system and we shall call {\it local} the part of the
whiskers constructed so far, via lemma 1', denoting it by $W^{loc}(s,u)$.

The full stable whisker will be:
%
$$W_{stable}(s,u)=\bigcup_{t\le0}S_t^\m\left\{(I,\AA,\f,\aa)\in
W^{loc}_{stable}(s,u)\right\}\Eq(6.1)$$
%
for values of $u$ small and $s\in\II_0\equiv\Si(C_0)$, see lemma 1' \S5
and \equ(5.20).  Lemmata 0,1' imply that this set can be described, for
$|\f|<\tilde\f$, and $|\m|$ small enough, by parametric equations:
%
$$I= I(\f,\aa,\m)\ ,\qquad
\AA(\f,\aa,\m),\qquad|\f|<\tilde\f,\,\aa\in T^{l-1}\Eq(6.2)$$
%
where $\tilde\f$ is \ap fixed
in the following discussion (to be $<2\p$ in the case
of an {\it open} (see \S4) separatrix while for {\it closed} separatrices,
$|\f|<\tilde\f$ should be replaced by $\f_{\min}+\d<\f<\f_{max}-\d$ for some
$\d>0$).  To fix ideas and simplify notations we shall discuss here mainly
the {\it stable case}, the unstable one being completely analogous (see
also below); however, when needed, we shall attach to the above functions
\equ(6.2) superscripts to distinguish among the two different cases (such
superscripts should not be confused with the parameters $s,u$ in \equ(6.1)
and elsewhere).

Fixing $s\in\II_0$ and $u$ small, the functions
$\AA(\f,\aa,\m),I(\f,\aa,\m)$ have to be such that for any
$|\f'|<\tilde\f,\aa'\in T^{l-1}$ there are $\f,\aa$ such that:
%
$$S^\m_t(I(\f',\aa',\m),\f',\AA(\f',\aa',\m),\aa')=
(I(\f,\aa,\m),\f,\AA(\f,\aa,\m),\aa)\Eq(6.3)$$
%
and we know, from lemma 1', that for $|\f|,|\m|$ small enough
$I(\f,\aa,\m),\AA(\f,\aa,\m)$ are analytic functions in the perturbation
parameter $\m$.

We shall fix $(\f,\aa)$ and try to determine the functions
$I(\f,\aa),\AA(\f,\aa)$.

We begin by noting that $S^\m_t$ is close to $S^\m_t|_{\m=0}\equiv S^0_t$
and depends analytically on $t,\aa,\f,\AA,I,\m$.  Also $S^0_t$ expands any
$\f\ne0$ to a value larger (in absolute value) than $\tilde\f$ in a finite
(positive or negative) time.

Hence it is clear that, fixed $\tilde\f$ and given $W_{stable}^{loc}(s,u)$,
we can use (6.3) with $|\f|<\d,\,\aa\in T^{l-1}$ and $|t|<t_\d$ to define
$I(\f,\aa),\AA(\f,\aa)$ for $|\f|<\tilde\f,\,\aa\in T^{l-1}$ and $\d$ can
be taken to be any prefixed small positive number and $t_\d$ a suitably
long (but finite) time.

And the remarked analyticity of $S^\m_t$ together with the analyticity of
$W^{loc}_{stable}(s,u)$, see lemma 1,1',2 of \S5, imply the analyticity of
$I(.),\AA(.)$ in their arguments, at fixed $s,u$.

>From now on, in order to avoid confusion with upper {\it indices} $\s=s,u$,
indicating stable/unstable, we drop the dependence upon the {\it
parameters} $s,u$, which in this section will be kept fixed.

Given a hamiltonian $H=H_0+\m f$ as in \equ(2.9) we write the equations of
motion for the vector $(I,\AA,\f,\aa)=X$ as:
%
$$\dot{X}= G_0(X)+\m  G(X)\Eq(6.4)$$
%
and we remark that if $\f,\aa\to (I^s(\f,\aa),\AA^s(\f,\aa),\f,\aa)$, for
$|\f|<\tilde\f$
and for $\aa\in T^{l-1}$, are the equations of the stable whisker, then:
%
$$
I^s(\f,\aa)=I^0(\f)+\sum_{k=1}^\i \m^kv^{ks}(\f,\aa)\ , \qquad
\AA^s(\f,\aa)=\AA^0+\sum_{k=1}^\i\m^k{\V h}^{ks}(\f,\aa)\Eq(6.5)$$
%
Since in this section we shall mainly discuss the stable whiskers,
we shall also drop the suffix $s$ (for stable) when this does not lead to
confusion.

It will be useful to consider also the slightly more general case in which
the variable $\aa$ is a function of $\m$:
%
$$\aa \= \aa_\m \= \sum_{k\ge 0} \aa^k \m^k\Eq(6.6)$$
%
while the variable $\f$ will be fixed once for all to be the value
$\bar \f$ corresponding to the point where $|I|$ is maximal for the
unperturbed hamiltonian (cfr. \S 4; $\bar \f=\p$ for the pendulum
\equ(2.1)).

Furthermore if $X^s(t)$ is the solution of the Hamilton equations with
initial data $X^s(0)=(I^s (\f,\aa),\AA^s (\f,\aa),\f,\aa)$ then, for large
enough $t$, $X^s (t)$ is inside the vicinity of the unperturbed torus
$\AA=\AA^0$, $I=\f=0$ where we can use the coordinates, described in \S5,
$(p,q,\V\ps)$.

Actually, by using the analyticity of the flow $S_t^\m$ and the analyticity
properties in $p,q,\V\ps$ discussed in \S 5, one can analytically continue
the functions $R(\AA',p,q,\m),\L(\V\ps,p,q,\m),...$ in \equ(5.10)
to a domain around the real $p,q$ such that $|pq|<\bar\k^2$ and
around the real $\V\psi$, so large to cover a vicinity of the points
$p=\bar p,q=\bar q, \V\psi\in T^{l-1}$ corresponding to $\bar I,\bar
\f,\aa\in T^{l-1}$, where $\bar I,\bar \f$ is the separatrix point chosen
as the origin. Therefore, after the analytic continuation, we can write
for $\Re t\ge0,|\Im t|\le\hat\x$ and $\hat\x$ small enough
(cfr. \equ(5.10), \equ(5.11) and recall that we are dropping the parameters
$s,u$ from the notation):
%
$$\eqalignno{
I^s (t)=&R(\AA',pe^{-(1+\g')gt},0,\m)+\L(\V\ps+\oo t,
pe^{-(1+\g')gt},0,\m)\cr
\AA^s (t)=&\AA+\V\X(\V\ps+\oo t,pe^{-(1+\g')gt},0,\m)&\eq(6.7)\cr
\f^s (t)=&S(\AA',pe^{-(1+\g')gt},0,\m)+\Th(\V\ps+\oo t,
pe^{-(1+\g')gt},0,\m)\cr
\aa^s (t)=&\V\ps+\oo t+\V\D(\V\ps+\oo t,pe^{-(1+\g')g t},0,\m)
+\V\d(\AA ,pe^{-(1+\g')g t},0,\m)\cr}$$
%
and the expressions of $\aa,\f$ in terms of $\V\ps,p$ are deduced from the
above relations with $t=0$. The constant $\hat \x$ is, of course, small and
cannot exceed the width of the holomorphy domain of $X^0(t)$.  The same
holds for the unstable whisker with the obvious changes (\ie exchanging the
roles of of $p$ and $q$ and considering $\Re t\le 0$).

If:
%
$$X^s(t,\aa)=X^0(t,\aa)+\sum_{k=1}^\i\m^k X^{ks}(t,\aa)\=X^0(t,\aa)+
\bar X^s(t,\aa)\Eq(6.8)$$
%
denotes the evolution of the initial point in \equ(6.5),\equ(6.6), on the
stable whisker, it follows from \S5 that $X^{s}(t,\aa)$ has the form
$X^{s}(\oo t,t;\aa)$ for a suitable analytic function $X^s(\V\ps,t;\aa)$
periodic in $\V\ps,\aa$ converging at an exponential rate as $t\to \io$ to
$X^s(\V\ps,\io;\aa)$.

Analogously, if $X^{ks}(t,\aa_\m)$ denotes the $k^{\rm th}$ Taylor
coefficient in the $\m$ expansion of $X^s(t,\aa_\m)$, then
$X^{ks}(t,\aa_\m)=$ $X^{ks}(\oo t,t;\aa_\m)$.

Note that $X^{ks}$ depends only on the first $k+1$ coefficients of
$\aa_\m$; to stress this fact we shall sometimes write
$X^{ks}(\V\ps,t;\aa_\m^{[\le k]})$. Note also that $X^s(\V\ps,t,\aa)$ is
holomorphic in a domain $|\Im \ps_j|<\bar\x, |\m|<\bar\m_0$,
$\Re t\ge T$ and $\Im t$ arbitrary if $|\m|<\bar \m_0$ with
$\bar\m_0,\bar \x$ small enough  and if $T$ is large
enough (for instance so that $(\bar p+\bar\k)e^{-g T/2}<\k$, see lemma 1').

Recursive expressions for $v^k,\V h^k$ and the $X^{ks}(\V\ps,t;\aa_\m)$
could be deduced from \S 5, however it is more convenient to derive them
directly.

To do this we put \equ(6.8) into \equ(6.4) and introduce the following
notations.

If $G(I,\AA,\f,\aa,\m)$ is a function and if $p$, $\V
m=(m_1,\ldots,m_{2l})$ and  $k^i_j$ are integers, we denote:
%
$$\eqalign{
(G)^p_{\V m}(\cdot)\=&\Bigl(
{\dpr^{p}_\m\dpr^{m_1}_I\dpr^{m_2}_{A_1}
\ldots\dpr^{m_{l}}_{A_{l-1}}\dpr^{m_{l+1}}_\f\dpr^{m_{l+2}}_{\a_1}
\ldots\dpr^{m_{2l}}_{\a_{l-1}}\,G\over
p!\, m_1!\,m_2!\,m_3!\,\ldots m_{l+2}!\,\ldots m_{2l}!}\Bigr)(\cdot)\cr
(k^i_j)_{\V m,p}\=&(k^1_1,\ldots,k^1_{m_1},k^2_1,\ldots,k^2_{m_2},
\ldots,k^{2l}_1,\ldots,k^{2l}_{m_{2l}})\qquad {\rm
s.t.\ }\sum k^i_j=p\cr}\Eq(6.9)$$
%
where $k_j^i\ge1$ if $m_i>0$. Then, given $\aa_\m$, the \equ(6.4)
can be translated into a hierarchy of equations for the Taylor coefficients
$X^{ks}$ of $X(t,\aa_\m)\=X(t)$; it becomes
(omitting the stable index $s$):
%
$$\eqalign{
\dot X_r^k=&\sum_j(\dpr_j {G_0}_r)(X^0(t)) X^{k}_j
+\sum_{|\V m|+p>1}({G_0}_r)^p_{\V m}(X^0(t))\sum_{(k^i_j)_{\V m,k-p}}
\prod_{i=1}^{2l}\prod_{j=1}^{m_i}X^{k^i_j}_i+\cr
&+\sum_{|\V m|+p>0}
(G_r)^p_{\V m}(X^0(t))\sum_{(k^i_j)_{\V m,k-p-1}}
\prod_{i=1}^{2l}\prod_{j=1}^{m_i}X^{k^i_j}_i\cr}\Eq(6.10)$$
%
where $X^0(t)\=X^0(t,\aa_\m)$ and the first term in the r.h.s. is separated
from the others being the only one involving $X^h$ with $h=k$. We write
\equ(6.10) as:
%
$$\dot {X}^k=L X^k+F^k\Eq(6.11)$$
%
where $\dpr_j {G_0}_r\equiv L_{jr}$ and $F^k=F^{ks}$ is implicitly defined by
\equ(6.10) and \equ(6.11), so that:
%
$$L\=L(t)=\pmatrix{
-\dpr^2_{I\f}H_0&-\dpr^2_{\AA\f}H_0&-\dpr^2_{\f\f}H_0&0\cr
\noalign{\vskip2.pt}
0&0&0&0\cr
\dpr^2_{II}H_0&\dpr^2_{\AA I}H_0&\dpr^2_{I\f}H_0&0\cr
\dpr^2_{\AA I}H_0&\dpr^2_{\AA\AA}H_0&\dpr^2_{\AA\f}H_0&0\cr},\quad
F^1(t)=\pmatrix{
-\dpr_\f f-\dpr^2_{\f\m}H_0\cr
-\dpr_\aa f\cr
\dpr_I f+\dpr^2_{I\m}H_0\cr
\dpr_\AA f+\dpr_{\AA\m} H_0\cr}\Eq(6.12)$$
%
where the zeroes in the matrix $L$ appear because we make use of the form
\equ(2.3) of $H_0$ and all the derivatives in \equ(6.12) are evaluated at
the point $ X^0(t)$ $=$ $(I^0(t),\AA^0,\f^0(t),\aa_0+\oo t +\V \th(t))$ and
at $\m=0$ ($\f(0)=\bar \f$ and $\V \th$ is the phase shift introduced in \S
4).  And more generally $F^k(t)$ are defined by solving recursively
\equ(6.11), using \equ(6.10).

In fact we will check directly that \equ(6.10) can be solved for every
$k\ge1$ and that, for each $\aa^k$, the initial data:
%
$$X^k(0)=\pmatrix{v^k\cr\V h^k\cr0\cr\aa^k\cr}\Eq(6.13)$$
%
can be fixed (\ie $v^k$, $\V h^k$ can be fixed) for each $k\ge1$ so that
$X^k(t)$ has the asymptotic properties dictated by lemma 1' (in particular
that $X^k(t)$ is bounded).

The check can be done by studying with some care the wronskian of
\equ(6.11), \ie the solution to the equation:
%
$$\dot W=L W,\qquad  W(0)=1\Eq(6.14)$$
%

In fact the solution to \equ(6.11) can then be written:
%
$${X}(t)=W(t)\Bigl(X(0)+\ig_0^tW(\t)^{-1}F(\t)d\t\Bigr)\Eq(6.15)$$
%
where we drop the index $k$ on $X$ and $F$ to simplify the notation
while performing the $k$ independent algebra that follows.

As mentioned above,
we also suppose, for convenience, that the fixed $\bar \f$ is so chosen that
the solution $i=i(\f,\AA^0)$ of the separatrix equation
$P(\AA^0,i,\f,0)=0$ is maximal for $\f=\bar \f$, see \equ(4.1).

The general properties of the wronskian matrix $W(t)$, solution of
\equ(6.14), can be found easily from lemma 0, \S5. Let us write
(cfr. \equ(5.3), \equ(5.2)):
%
$$\pmatrix{I\cr \AA\cr \f\cr\aa}=\pmatrix{R(p,\V a,q)\cr
\V a\cr S(p,\V a,q)\cr \V\psi+\V\d(p,\V a,q)}
\= V^0(p,q,\V\ps,\V a)
\qquad\quad
H_0\=h_0(\V a,pq,0)\=h(\V a) + E(pq,\V a) \Eq(6.16)$$
%
the canonical map of lemma 0, for $\m=0$ (and dropped from the notation),
reducing to normal form  the free part of the hamiltonian:
$E(pq,\V a)=P(\V a,I,\f)$  (if the free pendulum is an ordinary
pendulum like \equ(2.1), the \equ(6.16) is the well known
Jacobi map, and $R,S$ are suitable Jacobian elliptic functions, see
appendix 9; note that here we use a different order of the variables
from that used in \S 5).

If we replace:
%
$$\eqalign{
&p\to pe^{-gt}, \quad q\to qe^{+gt},\quad\V \psi \to\V\psi+\V\O t\cr
&g\=\dpr_JE(J,\V a), \qquad J\=pq\cr
&\V\O\=\dpr_\AA h(\V a) +\dpr_{\V a} E(J,\V a)
%\t\dpr_E h(\V A,\t E_{\V A}(J))\dpr_{\V A} E_\AA(J)
\cr}\Eq(6.17)$$
%
the map is still canonical (as \equ(6.17) is the solution of the
Hamilton equations in normal form for the free hamiltonian). Hence its
jacobian is a canonical matrix that can be written:
%
$$
U(t)=\pmatrix{\dpr_p R&{\dpr_{\V a}R}&\dpr_q R&0\cr
0&1&0&0&\cr
\dpr_p S&{\dpr_{\V a}S}&\dpr_q S&0\cr
{\dpr_p \V\d}&{{\dpr_{\V a}\V\d}}
&{\dpr_q\V\d}&1\cr}\cdot\pmatrix{e^{-gt}&0&0&0\cr
0&1&0&0\cr
0&0&e^{gt}&0\cr
0&t\,\dpr_{\V a} \V \O&0&1\cr}\= U_0(t) \bar U(t)\Eq(6.18)$$
%
where the derivatives of $R,S,\V \d,\V \O$ are evaluated at
$(pe^{-gt},q e^{gt},\V a)$.

If $(\bar p,0)$ and $(0,\bar q)$ denote the points corresponding to $\bar
\f$, we denote by $U_\cdot^s(t),U_\cdot^u(t)$ the above matrices evaluated,
respectively, at the points $(\bar p e^{-gt},0)$, $(0,\bar q e^{gt})$; in
both cases $\dpr_{\V a} \V \O\= \bar H \= \dpr^2_\AA h$, $g=\dpr_J E(0,\V
a)$. Note also that the entries involving the $\V a$-- derivatives and all
the $\dpr
\V \d$ vanish as $t \to \pm \io$ ($+$ for the stable case and $-$ for the
unstable one).

The representation \equ(6.18) is symbolic as the $1$'s are in fact
$(l-1)\times(l-1)$ identity matrices, the $\dpr_{\V a}R$, $\dpr_{\V a}S$
are row vectors (or $1 \times (l-1)$ matrices), the $\dpr\V\d$ are column
vectors (or $(l-1) \times 1$ matrices) while $\bar H, \dpr\d_{\V a}$ are
$(l-1)\times(l-1)$ matrices. Since, however, the notation is (after a moment
of thought) self evident we shall use it also in the following without
describing the obvious meaning of the matrix elements of $U(t)$ and of the
corresponding ones of $U(t)^{-1}$.

The inverse of the matrix $U(t)$ is immediately computed (because $U(t)$ is
canonical):
%
$$
U(t)^{-1}=\bar U(-t) U_0(t)^{-1}=\bar U(-t)
\pmatrix{
\dpr_q S&{\dpr_q \V\d}&-\dpr_q R&0\cr
0&1&0&0\cr
-\dpr_p S&-{{\dpr_p \V \d}}&\dpr_p R&0\cr
-{\dpr_{\V a}S}&-(\dpr_{\V a}\V \d)^T&
{\dpr_{\V a} R}&1\cr}\Eq(6.19)$$
%

The $U(t)$ is the jacobian of a family of solutions of the equations of
motion, hence it verifies \equ(6.14) except for the initial condition;
so that:
%
$$W(t)\=U(t)U(0)^{-1}\Eq(6.20)$$
%

We proceed to investigate the functions $X^k(t)$ with the aim of
finding explicit conditions which can be used to determine the initial
data $v^k,\V h^k$, from the boundedness at $+\io$ of the $X^k$ and,
more in general, to determine recursive equations for the $X^k(\V\ps,t;\aa)$.

Let us write \equ(5.10) as:
%
$$\eqalign{
I=&R(p,{\V a},q)+ V_+(p,q,\V\psi,\m)\cr
\AA=&\V a+\V V_\su(p,q,\V\psi,\m)\cr}\qquad\eqalign{
\f=&S(p,{\V a},q)+V_-(p,q,\V\psi,\m)\cr
\aa=&\V\ps+\V\d(p,{\V a},q)+\V V_\giu(p,q,\V\psi,\m)\cr}\Eq(6.21)$$
%
where $\V a\in \Si_\m$ (defined in 1) of lemma 1, \S 5)
will be fixed throughout the the analysis and it
will be dropped from the notations.  As discussed in \S5, the functions $V$
are analytic in $p,q,\V\psi,\m$ for $pq,\Im p, \Im q,\Im\ps_j$ small, say
$|pq|<\bar \k^2$, $|\Im p|,|\Im q|<\bar \k$, $|\Im \ps_j|<\bar \x$
and $|\m|<\m_0$, for
some $\bar \x,\bar \k,\m_0$ suitably chosen as functions of the hamiltonian
parameters; furthermore the $R,S$ are analytic in the same domain and the
$V$ are divisible by $\m$.

If we define $p_0(\aa),\V\psi^s(\aa)$ as the solution of the
equations:
%
$$\eqalign{
\bar\f=&S(p_0,0)+V_-(p_0,0,\V\ps^s,\m)\cr
\aa=&\V\psi^s+
\V \d(p_0,0)+
\V V_\giu(p_0,0,\V\ps^s,\m)\cr}\Eq(6.22)$$
%
we can remark that the above equations can be solved at $\m=0$ with a non
zero jacobian $\dpr_p S(p,0)$ (because at $(\bar p,0)$ it is $\f=\bar\f$
and therefore $\dot\f$ is, by the above definition of $\bar \f$, maximal so
that (since $q=0$) $S_p(\bar p,0)= - (g \bar p)^{-1} \dot \f$ $\neq 0$ and
$R_p(\bar p,0)=0$). The functions $p_0,\V\ps^s$ will be analytic in $\aa$
for $|\Im\aa_j|<\bar \x$, and $|\m|<\m_0$ imagining to redefine $\bar \x,\m_0$
so that they are the same here and in the analyticity domain of $V$, to avoid
introducing too many parameters.

We shall also use the following notations:
%
$$\eqalign{ & Z(p,q,\V\ps,\m) \=
V^0(p,q,\V\ps) + V(p,q,\V\ps,\m)\cr & X^s(\V\ps,t;\aa,\m) =
Z(p_0(\aa)e^{-g(1+\g')t},0,\V\ps +\V \ps^s(\aa),\m) \cr
& X^u(\V\ps,t;\aa,\m) = Z(0,q_0(\aa)e^{g(1+\g')t},\V\ps +
\V \ps^u(\aa),\m) \cr}\Eq(6.23)$$
%
where the functions $q_0,\V \ps^u$ are defined as in \equ(6.22)
exchanging the roles of $p$ and $q$.


Thus we can define:
%
$$\eqalign{
& X^0(\V\ps,t;\aa)= V^0(\bar p e^{-gt},0,\V \ps+\V\ps^{0s})\cr
& \bar X^s(\V\ps,t;\V\a)=V(p_0e^{-g(1+\g')t},0,\V\ps+\V\ps^s,\m)
-V^0(p_0e^{-gt},0,\V\ps+\V\ps^s,\m) \cr}\Eq(6.24)$$
%
$\g'$ being the correction to the Lyapunov exponent in lemma 1' so that
$X(t)=$ $X(\oo t,t;\aa)$ $=X^0(\oo t,t;\aa)+\bar X(\oo t,t;\aa)$; $\V
\ps^{0s}$ is simply $\aa- \V\d(\bar p,0,\V a)$ while $\V \ps^{0u}$ is $\aa-
\V\d(0,\bar q,\V a)$. Furthermore, all the functions $t\to X(\V\ps+\oo
t,t;\aa)$ are orbits on the stable whiskers.

The analyticity in $p$ implies that all the functions $ X(\V\ps,t;\aa)$
converge as $t\to\io$ at exponential rate.  The functions $X(\V\ps,t;\aa)$
are analytic in $t,\aa,\V\ps,\m$ in a domain:
%
$$\bar \DD=\{|\Im \a_j|<\bar \x,|\Im \ps_j|<\bar \x,\,
|\Im t|<\bar \x g^{-1}/2\, {\rm or}\, |\Re t|>Kg^{-1},
|\m|<\m_0\}\Eq(6.25)$$
%
for a suitable $K$ (so that the point $(p_0+\bar \k)e^{-gt}$ is inside the
analyticity domain for the $X$ functions), again by lemma 1' and having
once more redefined $\bar \k,\bar \x$ to avoid introducing too many symbols.

If $\r',\r,\x',\x$ are the analyticity parameters of the original
hamiltonian, we shall use, to measure the size of the vectors
$X=(X_+,\V X_\su,X_-,\V X_\giu)$ the dimensionless norm:
%
$$|X|={\r'}^{-1}|X_+|+\r^{-1}|\V X_\su|+{\x'}^{-1}|X_-|+\x^{-1}|\V
X_\giu|\Eq(6.26)$$
%
and the above statements can be summarized by the first of:
%
$$||\bar X||_{\bar \DD} \=\sup_{\bar \DD} |\bar X|
\le \bar v,\qquad|\g'|<\g_0\Eq(6.27)$$
%
where $\bar v,\g_0$ are suitable constants proportional to $|\m|\m_0^{-1}$
(see \S 5); the second bound
provides a further property and also comes from lemma 1'; of course, all
the above constants can be derived explicitly (if one wishes) from the
dimensional bounds in the proof of lemma 1' (\ie the above statement is
``constructive"); the second inequality is a quantitative bound on the
Lyapunov exponent, also part of lemma 1'.

The above remarks can be used to bound $\bar X^k_\nn$, the Fourier
transform with respect to the $\V\ps$ variables of the $k^{\rm th}$--
Taylor coefficient (in $\m$) of $\bar X$, defined in \equ(6.24); in fact
one immediately gets:
%
$$\eqalign{
|\bar X^k_\nn(t;\aa)|<&\bar v \m_0^{-k} e^{-\bar \x|\nn|}\cr
|\bar X^k_\nn(t;\aa)-\bar X^k_\nn(+\io;\aa)|<&
\bar v \m_0^{-k} e^{-\bar \x|\nn|}
e^{-g(1-\g_0)\Re t}\cr}\qquad {\rm in\ }\DD\Eq(6.28)$$
%
in a domain $\DD$ defined as in \equ(6.25) by omitting the
condition on the $\V\ps$ variables.

A further consequence of the above remarks is that if we define:
%
$$F(\V\ps,t;\aa,\m)=E\,[\m \dpr f(X^0+\bar X)+\dpr H_0(X^0+\bar X)-
\Bigl(\dpr H_0(X^0)+\dpr^2 H_0(X^0)\bar X\Bigl)]
\Eq(6.29)$$
%
where $E$ is the standard $2l\times2l$ matrix which in block form
looks like $\pmatrix{0&-1\cr1&0\cr}$ and $X^0,\bar X$ denote as above
$V^0(\bar p e^{-gt},0,\V\ps+\V\ps^{0s}),\,
\bar X(\V\ps,t;\aa)$, then:
%
$$
%F^k(\V\ps,t;\aa)=F^{[k]}(\V\ps,\aa_\m,t)\qquad{\it rm with}\qquad
||F||_{\bar \DD}<
%\bigl({|\m|\over\m_0}\bigr)
\bar f\Eq(6.30)$$
%
for a suitable $\bar f$; again here we possibly redefine the constants
$\m_0,\bar \x,\bar \k$.
In the notations of \equ(6.8) $\div$ \equ(6.12), it is $F^k(t)=
F(\oo t, t; \aa,\m)$.

The \equ(6.30) implies the bounds:
%
$$\eqalign{
|F^k_\nn(t;\aa)|<&\bar f \m_0^{-k} e^{-\bar \x|\nn|}\cr
|F^k_\nn(t;\aa)-F^k_\nn(+\io;\aa)|<&\bar f \m_0^{-k} e^{-\bar\x|\nn|}
e^{-g(1-\g_0)\Re t}\cr}\Eq(6.31)$$
%
in $\DD$.

If $\aa$ is replaced by $\aa_\m$, all the above estimates \equ(6.31),
\equ(6.28) hold provided $|\Im \aa_\m|<\bar\x$ for $|\m|<\m_0$ and since $F^k$
depends in such a case only on the first $k-1$ coefficients of $\aa_\m$,
the proper notation will be $F^k(\V\ps,t;\aa_\m^{[<k]})$.

{\it Remark}: in the case in which $P_0(I,\AA,\f)$ is an ordinary pendulum
hamiltonian the matrix elements of $U_0(t),W(t)$ can be computed
essentially explicitly (see appendix 9): they have some remarkable
analyticity and symmetry properties in $t$; namely the matrix elements of
$U_0(t)$ are holomorphic in the domain $|\Im t|<(1-\e)\p/(2g_0)$, for
$\e>0$, and are bounded there by $\bar u\e^{-2}$ for some $\bar u$, see
\equ(A9.8); furthermore the $W(t)$ matrix elements of the block
$(+,\su)\times(+,\su)$ and those of the block $(-,\giu)\times(-,\giu)$ are
even in $t$ while those of the other two blocks are odd.

In general the ``rows"
of the matrix $U_0(t)^{-1}$, \equ(6.19), will be denoted
$\x_+,\x_{\su}^0,\x_-,\x_\giu+\x^0_\giu$, where $\x^0_{\su j}=(0,\V e_j
,0,0)$, $\x^0_{\giu j}=(0,0,0,\V e_j)$ with $\V e_j$ being the unit
$(l-1)$-vector with the $j-th$ component equal to $1$.  The splitting
$\x_\giu+\x_\giu^0$ is performed so that all the matrices $\x_j$ have
the last $l-1$ components zero.

With the above notations for $\bar X,F,U(t)^{-1}$ we deduce immediately
an explicit expression for $X^k$ in terms of $F^k$; it is
$X^k(t)=U_0(t)Y^k(t)$ with:
%
$$Y^k(t)=\bar U(t)\left(
U_0(0)^{-1} X^k(0) + \ii_0^t \bar U(-\t) U_0(\t)^{-1} F^k(\t)d\t \right)
\Eq(6.32)$$
%
so that one finds:
%
$$\eqalignno{
Y^k_+(t)=&e^{-gt}\Bigl(\x_+(0) X^k(0)+\ig_0^t e^{g\t}\x_+(\t)F^k
\,d\t\Bigl)\cr
Y^k_\su(t)=&\V h^k +\ig_0^t\V F^k_\su\,d\t&\eq(6.33)\cr
Y^k_-(t)=&e^{gt}\Bigl(\x_-(0) X^k(0)+\ig_0^te^{-g\t}\x_-(\t)F^k\,
d\t\Bigr)\cr
Y^k_\giu(t)=&t\,\bar H\Bigl(\V h^k +\ig_0^t\V F^k_\su\,d\t\Bigr)+
\x_\giu(0) X^k(0)+X^k_\giu (0)+\cr
&+\ig_0^t [-\t\bar H F^k_\su+\x_\giu(\t)F^k+F^k_\giu]\,d\t\cr}$$
%
where the arguments of $F^k$ are $(\oo\t,\t;\aa_\m^{[< k]})$;
when needed we regard $m$ vectors as $1\times m$ or $m\times 1$ matrices
and use the standard rules for matrix multiplication (\eg in the first of
\equ(6.33) $\x_+$ is a $(1\times 2l)$ matrix while $X^k$ is a $(2l\times
1)$ matrix and their product is a scalar); in the
following formulae we shall drop the explicit dependence on $\aa$ as such
dependence plays here no role. Hence, since the boundedness as $t\to+\io$
of $X^k(t)$ is equivalent to the boundedness of $Y^k(t)$, a straightforward
asymptotic analysis based on the bounds \equ(6.28),\equ(6.31) shows
that the latter corresponds to the following conditions on the initial
data $X^k(0)$ for $k\ge1$:
%
$$\eqalignno{
&\media{\V F^k_\su(\cdot,\io)}=0\cr
&\x_-(0) X^k(0)+\ig_0^\io
e^{-g\t}\x_-(\t)F^k(\oo\t,\t)\,d\t=0&\eq(6.34)\cr
&\media{\V F^k_\giu(\cdot,\io)}+
\bar H\Bigr(\V h^k+\ig_0^\io\bigr[\V F^k_\su(\oo\t,\t)-
\V F_\su(\oo\t,\io)\bigr]\,d\t
-(\oo\cdot\V\dpr)^{-1}\V F^k_\su(\V0,\io)\Bigr)=\V0\cr}$$
%
where to derive the third equality we have used the first identity and the
fact that $\x_\giu(\io)=0$, see the comment following \equ(6.18);
$\langle \cdot\rangle$ denotes average over $\V \ps$,
$(\oo\cdot \dpr)^{-1}$ acts by dividing the $\nn$--Fourier coefficient
(with respect to $\V\ps$) by $i \oo\cdot \nn$: notice that it is possible
to apply  $(\oo\cdot \dpr)^{-1}$ to $\V F_\su(\cdot,\io)$ because of the
first identity.
The first co