%INSTRUCTIONS:
%This paper needs to be tex'ed twice.
%The first time it generates an auxiliary called d.aux
%which contains references etc.
%ENDINSTRUCTIONS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\magnification=\magstep1\hoffset=0.cm
\voffset=-0.5truecm\hsize=16.5truecm\vsize=24.truecm
\baselineskip=14pt plus0.1pt minus0.1pt \parindent=12pt
\lineskip=4pt\lineskiplimit=0.1pt      \parskip=0.1pt plus1pt
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%%%%%GRECO%%%%%%%%%
%
\let\a=\alpha \let\b=\beta  \let\g=\gamma    \let\d=\delta \let\e=\varepsilon
\let\z=\zeta  \let\h=\eta   \let\th=\vartheta\let\k=\kappa \let\l=\lambda
\let\m=\mu    \let\n=\nu    \let\x=\xi       \let\p=\pi    \let\r=\rho
\let\s=\sigma \let\t=\tau   \let\iu=\upsilon \let\f=\varphi\let\c=\chi
\let\ps=\psi  \let\o=\omega \let\y=\upsilon
\let\G=\Gamma \let\D=\Delta  \let\Th=\Theta  \let\L=\Lambda\let\X=\Xi
\let\P=\Pi    \let\Si=\Sigma \let\F=\Phi     \let\Ps=\Psi  \let\O=\Omega
\let\U=\Upsilon
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\global\newcount\numsec\global\newcount\numfor
\gdef\profonditastruttura{\dp\strutbox}
\def\senondefinito#1{\expandafter\ifx\csname#1\endcsname\relax}
\def\SIA #1,#2,#3 {\senondefinito{#1#2}
\expandafter\xdef\csname #1#2\endcsname{#3} \else
\write16{???? ma #1,#2 e' gia' stato definito !!!!} \fi}
\def\etichetta(#1){(\veroparagrafo.\veraformula)
\SIA e,#1,(\veroparagrafo.\veraformula)
 \global\advance\numfor by 1
 \write15{\string\FU (#1){\equ(#1)}}
 \write16{ EQ \equ(#1) == #1  }}
\def \FU(#1)#2{\SIA fu,#1,#2 }
\def\etichettaa(#1){(A\veroparagrafo.\veraformula)
 \SIA e,#1,(A\veroparagrafo.\veraformula)
 \global\advance\numfor by 1
 \write15{\string\FU (#1){\equ(#1)}}
 \write16{ EQ \equ(#1) == #1  }}
\def\BOZZA{\def\alato(##1){
 {\vtop to \profonditastruttura{\baselineskip
 \profonditastruttura\vss
 \rlap{\kern-\hsize\kern-1.2truecm{$\scriptstyle##1$}}}}}}
\def\alato(#1){}
\def\veroparagrafo{\number\numsec}\def\veraformula{\number\numfor}
\def\Eq(#1){\eqno{\etichetta(#1)\alato(#1)}}
\def\eq(#1){\etichetta(#1)\alato(#1)}
\def\Eqa(#1){\eqno{\etichettaa(#1)\alato(#1)}}
\def\eqa(#1){\etichettaa(#1)\alato(#1)}
\def\eqv(#1){\senondefinito{fu#1}$\clubsuit$#1\write16{No translation for #1}%
\else\csname fu#1\endcsname\fi}
%\def\eqv(#1){\senondefinito{fu#1}$\clubsuit$#1\else\csname fu#1\endcsname\fi}
\def\equ(#1){\senondefinito{e#1}\eqv(#1)\else\csname e#1\endcsname\fi}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\let\EQS=\Eq\let\EQ=\Eq
\let\eqs=\eq
\let\Eqas=\Eqa
\let\eqas=\eqa
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\def\V#1{\vec#1}\let\dpr=\partial\let\ciao=\bye
\let\io=\infty\let\i=\infty
\let\ii=\int\let\ig=\int
\def\media#1{\langle{#1}\rangle}
\def\guida{\leaders\hbox to 1em{\hss.\hss}\hfill}
\def\tende#1{\vtop{\ialign{##\crcr\rightarrowfill\crcr
              \noalign{\kern-1pt\nointerlineskip}
              \hglue3.pt${\scriptstyle #1}$\hglue3.pt\crcr}}}
\def\otto{{\kern-1.truept\leftarrow\kern-5.truept\to\kern-1.truept}}
\def\pagina{\vfill\eject}\def\acapo{\hfill\break}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%LATINORUM
\def\etc{\hbox{\it etc}}\def\eg{\hbox{\it e.g.\ }}
\def\ap{\hbox{\it a priori\ }}\def\aps{\hbox{\it a posteriori\ }}
\def\ie{\hbox{\it i.e.\ }}
\def\fiat{{}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%DEFINIZIONI LOCALI
\def\AA{{\V A}}\def\aa{{\V\a}}\def\bv{{\V\b}}\def\dd{{\V\d}}
\def\ff{{\V\f}}\def\nn{{\V\n}}\def\oo{{\V\o}}
\def\tt{{\V\th}}\def\zz{{\V z}}\def\FF{{\V F}}\def\xx{{\V x}}
\def\yy{{\V y}} \def\q{{q_0/2}}\let\lis=\overline\def\Dpr{{\V\dpr}}
\def\mm{{\V m}}
\def\ff{{\V\f}}\def\zz{{\V z}}\def\mb{{\bar\m}}
\def\UU{{\cal U}}\def\BB{{\cal B}}\def\bB{{\V\b}}
\def\DD{{\cal D}}\def\CC{{\cal C}}\def\II{{\cal I}}
\def\EE{{\cal E}}\def\MM{{\cal M}}\def\LL{{\cal L}}
\def\Sol{{\cal S}}\def\TT{{\cal T}}\def\RR{{\cal R}}
\def\WI{{W_+}}\def\WS{{W_-}}\def\sign{{\rm sign\,}}
\def\BAK{{{{\lis A}^2\over\lis K}}}
\def\thb{{{\bar \th}}}\def\fb{{{\bar \f}}}\def\psb{{{\bar \ps}}}
\def\bak{{\bar A^2\over\bar K}}
\def\={{ \; \equiv \; }}\def\su{{\uparrow}}\def\giu{{\downarrow}}
\def\mb{{\bar\m}}
\def\kb{{\bar\k}}\def\rb{{\bar\r}}\def\xb{{\bar\x}}
\def\cb{{\bar c}}\def\mb{{\bar\m}}
\def\xc{{\hat\x}}
\def\ct{{\tilde \CC}}
\def\rt{{\tilde\r}}\def\mt{{\tilde\m}}\def\kt{{\tilde\k}}
\let\ch=\chi
\def\PP{{\cal P}}
\def\bb{{\V\b}}
\def\Im{{\rm\,Im\,}}\def\Re{{\rm\,Re\,}}
\def\nn{{\V\n}}\def\lis#1{{\overline #1}}\def\q{{{q_0/2}}}
\def\atan{{\,\rm arctg\,}} \def\0{{\V0}}\def\pps{{\V\ps{\,}}}
\def\bul{{l\bar u\e^{-2}}} \def\hB{{\hat B}}
%%%%%%%%%%%%%%%%%%
\openin14=d.aux \ifeof14 \relax \else
\input d.aux \fi
\openout15=d.aux
%%%%%%%%%%%%%%%%%%%%%%
\vskip0.pt\def\ap{{\it a priori\ }}
\vglue1.truecm
{\centerline {\bf DRIFT AND DIFFUSION IN PHASE SPACE} }
\vskip.5truecm\numsec=1\numfor=1

\centerline{
Luigi Chierchia\footnote{${}^1$}{Dip. di Matematica, $II^a$
Universit\`a di Roma, ``Tor Vergata",  via della Ricerca Scientifica,
00133 Roma, Italia},
Giovanni Gallavotti\footnote{${}^2$}{Dip. di Fisica,
Universit\`a di Roma, ``La Sapienza", P. Moro 5, 00185 Roma, Italia}
}
\vskip1.truecm

\noindent
{\bf Abstract:}
{\it The problem of stability of the action variables (\ie of the adiabatic
invariants) in perturbations
of completely integrable (real analytic) hamiltonian systems with more than
two degrees of freedom is considered. Extending the analysis of {\rm [A]},
we work out a general quantitative theory, from the point of view of
{\sl dimensional analysis}, for {\sl a priori unstable systems}
(\ie systems for which
the unperturbed integrable part possesses separatrices), proving, in
general, the existence of the so--called Arnold's diffusion
and establishing upper bounds on the time needed for
the perturbed action variables to {\sl drift} by an amount of $O(1)$.

\noindent
The above theory can be extended so as to cover cases of {\sl a priori
stable systems} (\ie systems for which separatrices are generated
near the resonances by the perturbation).
As an example we consider the ``D'Alembert precession problem
in Celestial Mechanics"
(a planet modelled by a rigid rotational ellipsoid with small
``flatness" $\h$, revolving on a given Keplerian orbit of eccentricity
$e=\h^c$, $c>1$, around a fixed star and subject only to Newtonian
gravitational forces) proving in such a case the existence of
Arnold's drift and diffusion; this means that
there exist initial data for which, for any $\h\neq 0$ small enough,
the planet changes, in due ($\h$--dependent) time, the inclination of the
precession cone by an amount of $O(1)$. The homo/heteroclinic angles
(introduced in general and discussed in detail together with homoclinic
splittings  and scatterings) in the D'Alembert problem are not
exponentially small with $\h$ (in spite of first order predictions based
upon Melnikov type  integrals).
}

\vskip1.truecm
\noindent {\bf Key words:}
{\it perturbed hamiltonian systems, stability theory, Arnold's diffusion,
homoclinic splitting, heteroclinic trajectories, KAM theory, whiskered
tori, dimensional estimates, Celestial Mechanics, D'Alembert Equinox
Precession problem.
}

\vfill\eject

\vglue2.truecm

\noindent{\bf CONTENTS} \vskip.5truecm

\line{\sl  {\bf\S1\phantom{2}}
\qquad Introduction and description of the results
\quad\dotfill\quad 3}
\line{\sl  {\bf\S2\phantom{2}}
\qquad A priori unstable systems. Regularity assumptions
\quad\dotfill\quad 8}
\line{\sl  {\bf\S3\phantom{2}}
\qquad The free system. Diffusion paths and whisker ladders
\quad\dotfill\quad 11}
\line{\sl  {\bf\S4\phantom{2}}
\qquad Motion on the separatrices. Melnikov integrals
\quad\dotfill\quad 14}
\line{\sl  {\bf\S5\phantom{2}}
\qquad Existence of ladders of whiskers
\quad\dotfill\quad 17}
\line{\sl  {\bf\S6\phantom{2}}
\qquad Large whiskers. Homoclinic points and their angles
\quad\dotfill\quad 36}
\line{\sl  {\bf\S7\phantom{2}}
\qquad Whisker ladders and rounds density
\quad\dotfill\quad 49}
\line{\sl  {\bf\S8\phantom{2}}
\qquad Heteroclinic intersections. Drift and diffusion along
directly open paths
\quad\dotfill\quad 52}
\line{\sl  {\bf\S9\phantom{2}}
\qquad A class of exactly soluble homoclines
\quad\dotfill\quad 60}
\line{\sl  {\bf\S10}
\qquad Homoclinic scattering. Large separatrix splitting
\quad\dotfill\quad 63}
\line{\sl  {\bf\S11}
\qquad Variable coefficients. Fast mode averaging
\quad\dotfill\quad 72}
\line{\sl  {\bf\S12}
\qquad Planetary precession. Existence of drift and diffusion
\quad\dotfill\quad 79}

\vskip.5truecm
\noindent{\bf Appendices:}
\vskip.3truecm

\line{\sl  {\bf\S A1\phantom{2}}
\qquad Resonances: Nekhorossev theorem
\quad\dotfill\quad 89}
\line{\sl  {\bf\S A2\phantom{2}}
\qquad Diffusion paths  and diophantine conditions
\quad\dotfill\quad 90}
\line{\sl  {\bf\S A3\phantom{2}}
\qquad Normal hyperbolic coordinates for a pendulum
\quad\dotfill\quad 92}
\line{\sl  {\bf\S A4\phantom{2}}
\qquad Diffusion sheets. Relative size of the time scales
\quad\dotfill\quad 99}
\line{\sl  {\bf\S A5\phantom{2}}
\qquad Divisor bounds
\quad\dotfill\quad 100}
\line{\sl  {\bf\S A6\phantom{2}}
\qquad The equinox precession
\quad\dotfill\quad 101}
\line{\sl  {\bf\S A7\phantom{2}}
\qquad Application to the Earth precession
\quad\dotfill\quad 101}
\line{\sl  {\bf\S A8\phantom{2}}
\qquad Trigonometry of the Andoyer-Deprit angles
\quad\dotfill\quad 103}
\line{\sl  {\bf\S A9\phantom{2}}
\qquad Determinants, wronskians, Jacobi's map
\quad\dotfill\quad 103}
\line{\sl  {\bf\S A10}
\qquad High order perturbation theory and averaging
\quad\dotfill\quad 111}
\line{\sl  {\bf\S A11}
\qquad Scattering phase shifts and intrinsic angles
\quad\dotfill\quad 118}
\line{\sl  {\bf\S A12}
\qquad Compatibility. Homoclinic identities
\quad\dotfill\quad 120}
\line{\sl  {\bf\S A13}
\qquad Second (and third) order whiskers and phase shifts
\quad\dotfill\quad 123}
\line{\sl  {\bf\S A14}
\qquad Development of the perturbatrix
\quad\dotfill\quad 131}
\vskip.3truecm

\line{\sl  \phantom{{\bf\S A14}}
\qquad References \quad\dotfill\quad 134}

\vfill\eject

{\bf\S1 Introduction and description of the results}
\vskip1.truecm\numsec=1\numfor=1
A typical question about diffusion in phase space is the following:
could the Earth axis tilt? To put the question in mathematical form we
consider a model for the Earth precession, well known since D' Alembert,
[L].

Let a planet $\EE$ be a homogenous rigid body with rotational symmetry
about its $N$-$S$ axis and with polar and equatorial inertia moments
$J_3,J_1$: hence with {\it mechanical} polar {\it flattening}
$\h=(J_3-J_1)/J_3$, which is supposed to be small. Let the planet move on a
keplerian orbit $t\to\V r_T(t)$, with eccentricity $e$, about a fixed
heavenly body $\Sol$ with mass $m_\Sol$; also $e$ will supposed to be small
and in fact we shall assume that {\it the eccentricity and the flattening
coefficient are related by a power law}: $e=\h^c$ for some positive
constant $c$.  Wishing to be closer to reality
one could also assume that $\EE$ had a satellite $\MM$: what follows could
be adapted to this stranger situation (in the case of the $\EE$arth this is
particularly relevant as the $\MM$oon accounts for $2/3$ of the lunisolar
precession).  But here, as it will be far too clear below, we are
addressing a purely conceptual question and we have no pretension that our
results apply directly to the solar system or subsystems thereof.

Regarding the flattening $\h$ (and hence the eccentricity $e$)
as a (non vanishing)
parameter, we consider initial conditions close to those in which the planet
is rotating around its symmetry axis, at a daily angular velocity $\o_D$,
and precessing around the normal to the orbit, at an angular velocity
denoted $\o_p\equiv-\h\o$, on a cone with inclination $i$.  And we ask
whether, {\it no matter how small the flattening coefficient $\h$ may be}
(below some $\h_0$), there is an
initial condition such that, after due time, one can find the planet
precessing on a cone with inclination $i'\ne i$, with $i,i'$ fixed {\it a
priori, independent on} $\h$.  Such a phenomenon will be called {\it
drift} in phase space.

We have not worried, above, about fine points like the distinction between
the symmetry axis of $\EE$ and the angular momentum or the angular velocity
axes: such a distinction is not a minor one and is of course relevant to a
rigorous analysis of the problem which we defer to \S12.

Closely related to the drift in phase space is the {\it diffusion}: we
shall see that the same mechanism that we discuss to show the existence of
drift also shows the existence of orbits along which the inclination does
not increase monotonically (in average) from $i$ to $i'$ but rather it
evolves, on a suitably large scale of time, so as to either increase or
decrease the inclination by an amount $O(\e)$ according to a prefixed
pattern at least for a number of time steps of order $\gg O(\e^{-1})$, for
some $\e$ small compared to $\h$.  If one chooses the initial datum
randomly and with equal distribution among the initial data of the above
orbits, one will see the inclination change as a brownian motion, at least
as long as it takes to reach the target value $i'$ (or its symmetric value
with respect to $i$).

This work is a generalization of the well known example
given by Arnold, [A]. The basic feature of Arnold's example was that the
drift took place around invariant tori of dimension $l-1$ if $l$ is the
number of degrees of freedom of the system and that the system considered had a
very special form: the tori around which the diffusion took place
were explicit exact solutions of the equations of motion.  This is a
property which does not hold in general and a fraction of the work in this
paper is devoted to a detailed construction of the tori and of the flow
around them (an analysis started in [M]). Furthermore the
instability of the tori is also explicit in the model in [A].  The general
system, however, will be such that most of the tori will have dimension $l$
and the unstable tori arise near resonances.

Some details of the mechanism generating unstable tori of dimension $l-1$
along which diffusion takes place may be quite involved, in general.

The point of view of this work has been to see if, starting with the ideas
in the well known example of Arnold, one could develop the theory to a
point to make it applicable to the above celestial problem (for which the
invariant tori arise only near resonances).  We felt that
such a precise goal, if pursued without further simplifying hypotheses,
would provide a natural selection of possible assumptions
(which could, otherwise, appear as {\it ad hoc} to the reader).

To achieve such a goal several intermediate problems had to be solved.

1) In \S2 we define precisely a class of systems that we study: it is a
system of $l-1$ rotators coupled to a pendulum. Arnold's example is in
this class, but not so the D' Alembert model for the Earth precession.
The simplifying aspect of the systems in such a class is that it is obvious from
their definition that they are unstable (the instability simply occurs near
the pendulum separatix): thus we call them {\it \ap unstable}. A
detailed theory of such systems is necessary to attack the far harder \ap
{\it stable} systems (defined below).

2) In \S3 we point out the main (easy) properties of the
uncoupled ({\it free}) systems of a pendulum and several rotators.

3) In \S4 we introduce the key notion of {\it diffusion path}: it is a
curve in the rotator action space, along which the free rotators angular
velocities form a vector with suitable diophantine properties. It will play
the role of marking the projection in action space of a drifting or
diffusing motion.

4) In \S5 we prove that the points of the diffusion curves can be
interpreted as $l-1$ dimensional invariant tori: most of them persist after
the perturbation (\ie the coupling between the pendulum and the rotators)
is switched on. The stability of low dimensional tori has been studied in
the literature by various authors: we present it from scratch because we
need very detailed bounds and analyticity properties of the perturbed tori
equations and a simple {\it normal form} for the motion of a large
class ($l+1$ dimensional) of nearby points. The bounds must be general and
at the same time simple enough to be applicable
to the harder cases that we analyze later (like the D' Alembert model).
Hence we need results stated in terms of the few really important features
of the hamiltonian. We therefore proceed by identifying the relevant
parameters (basically ratios of the independent time scales that govern the
motions) and produce a proof in which the only ingredient is the use of
the Cauchy theorem to bound the derivative of a holomorphic function by the
ratio between the maximum modulus, in the considered analyticity domain, and
the distance to the boundary of the analyticity domain. We call, for
obvious reasons, such bounds {\it dimensional bounds}, see lemmata 1,1'
of \S 5. The normal coordinates that we describe are a generalization of the
celebrated Jacobi coordinates near the unstable equilibrium point of the
pendulum (see lemma 0 of \S 5, and appendix 9 for a description of the classical
Jacobi map).

5) In \S6 we develop the perturbation theory of the asymptotic
manifolds of the invariant tori constructed in \S5; following Arnold,
we call such manifolds {\it whiskers}.
The theory is discussed to arbitrary order of
perturbation theory: such a generality is necessary only if one has in
sight applications to {\it a priori} stable systems (such as
the celestial one of D' Alembert).
Such analysis requires establishing, for the purpose of a consistency check,
some remarkable {\it homoclinic identities}, established in appendix A12.
\hfill\break      %
For the models in the class of the \ap unstable systems the theory to
first order is sufficient and we deduce that the homoclinic angles (\ie
the angles between tangent vectors to the stable and to the unstable
whiskers) are, no wonder, described by a tensor (that we call the {\it
intersection tensor}) related to the Melnikov integrals, reproducing
results of Melnikov which are well known, [Me].

6) In \S7 we show that, given a diffusion path, if the perturbation has
suitable properties (expressed in terms of some explicit condition of
absence of low order resonant harmonics in the Fourier development of the
perturbation at path points) then the set of points along the path
representing invariant tori (for the full hamiltonian) is so dense that one
can find a sequence of them spaced by an amount far smaller than the size
of the homoclinic angles.

7) In \S8, using the normal form described in \S5 in a very essential way,
we show that in the assumptions of \S7 the diffusion path is {\it open for
diffusion} and show the existence of initial conditions which evolve in
time so that the projection of the motion in action space follows the
diffusion path. We also  find an {\it explicit} estimate of the time
needed by the drifting motions to reach the other extreme of the
diffusion path. The path is independent on the size $\m$ of the perturbation
and it is non trivial (\ie not a single point) if $l>3$
(no diffusion or drift are possible if $l=2$ by the KAM stability).

The time it takes is of the order $O(\exp-k\m^{-2})$: Arnold's example
is covered by the theorem, but our result is less general than Arnold's one
as it can be applied to diffusion paths which are segments of length of $O(1)$
but not arbitrarily placed on the action axis: this is the price that we
have to pay to get concrete bounds on the drift time (and not only a
finiteness result). We do not know if this restriction would also be
present by using Arnold's method (\ie whether Arnold's method could give,
in his example, actual constructive upper bounds on the diffusion time).

8) In \S9 we begin to worry about the fact that the above analysis does
not cover \ap unstable systems in which the pendulum Lyapunov exponent
(\ie in physical terms the gravity acceleration), that we call here
$\h$, is not fixed but it is linked to the perturbation size (usually
much smaller) that we call $\m$.  The reason is that in such cases the
first order of perturbation theory is ``degenerate" in the sense that it
predicts homoclinic splitting with some angles of size
$O(\m\exp-k\h^{-1/2})$, for some $k>0$.  This leads essentially to a
situation in which the first order perturbation theory is not
sufficient, even to establish the existence of the homoclinic
splitting, not to speak of the existence of drift: it is well known
that there are examples in which the situation does not improve by
going to higher order (see \eg [La2]).

In fact the problem is already quite hard in the case of a forced pendulum
(\ie $l=2$) and with the rotator being a clock model, perfectly
isochronous; this means that the rotator action $B$ appears in the form of
an additive term in the hamiltonian equal to $\o B$ and the rest of the
hamiltonian depends only on the pendulum coordinates $I,\f$ and on the
conjugate angle $\l$, ``position of the clock arm".  If the perturbation
size is supposed $\m=O(\h^d)$ for some $d>0$ the problem is non trivial (a
case reducible to the ones treated in \S6,\S7,\S8 would be if
$\m=O(\exp-{c\h^{-b})}$ with $b>1/2$: but this is, unfortunately, a case of
little interest in view of the expected size of $\m$ in the applications).

If $l>2$ the angles are in general rather hard to describe:
we find some rather implicit expressions for them, in general, but we can
make use of them in the one case with $l=3$ which motivated our work
(\ie the D' Alembert equinox precession model). Actually we point out an
ambiguity about what one defines to be the homoclinic angles of splitting
as there are at least two different interesting sets of coordinates that
can be considered. To relate them we introduce the concept of homoclinic
phase shift (a quite remarkable notion in itself: see item 13) below for
a qualitative description of it).

In general, in the cases with an exponentially small splitting to first
order, we {\it do
not} discuss a proof of the existence of a homoclinic point: although
the results that we have developed are probably sufficient for
constructing a proof.  The reason is not only to cut a little shorter
this paper but mainly because the theory is, nevertheless, not empty:
in fact we can apply it to a special but wide class of models for which
the homoclinic point problem is (well known to be) exactly soluble (in
the sense that one can show the existence, and locate exactly the
position, of the homoclinic point).  We call such class the {\it even
models}: as the property is based on a symmetry of such hamiltonians.
Many models of forced pendula fall in this class that we introduce and
treat, for completeness, in \S9.

9) In \S10 we discuss in more detail the notion of homoclinic phase
shift particularly in the case of even models with $l=3$, in which one
of the rotators is a clock and the other is ``slow", \ie its free
angular velocity is of order $\h$ while also the pendulum gravity
constant is of order $\h$.  The introduced formalism allows us to show
that the phenomenon of {\it large homoclinic splitting} takes place
even in presence of fast rotations, {\it as
long as there is at least one slow among them}: this property
holds only in systems with $l\ge3$ (and generically it does happen,
as we show) and in spite of first order (Melnikov type) computations
(which predict exponentially small splittings).
Some detailed calculations are performed in appendix A13
and they are interesting by themselves.

The existence of one fast rotation and other slow ones looks very
special but we show in \S12 that the D' Alembert precession model,
which is \ap stable, is reducible to such a case: this is due to the
extra degeneracies present in all celestial problems.

10) The actual application of the theory to even models with $l=3$, relevant
for the precession problem, requires some extra work performed in \S11 and
the technique is also an illustration of a rigorous application of the
usually qualitative {\it averaging methods}.

11) In \S12 we finally study the \ap stable D' Alembert precession
model.  The original D' Alembert model took the planet orbit to be
circular: in this case the model has $l=2$ and diffusion is not
possible.  Therefore we take the orbit to be keplerian with
eccentricity $e>0$; this leads to a large class of models obtained by
truncating the eccentricity series to order $k$; we study for
simplicity only the case $k=2$: the general case ($k$ arbitrary), does
not seem to offer more difficulties, except notational ones.  The work
having been organized in order to treat this case, the discussion is
rather simple.

We choose in our example as diffusion path a line which has the
physical interpretation of a $1:2$ resonance between the ``day" period
and the ``year" period, and is such that a motion along it has the
interpretation of changing the size of the angle between the ecliptic
and the angular momentum of the planet (``inclination").  We just have
to check that the model can be reduced, by a suitable change of
coordinates, to a $l=3$ system of a pendulum with small gravity of
order $\h$ forced by a fast clock and by a slow anisochronous rotator;
the perturbation parameter is the eccentricity $e$ of the orbit, which
we have to take small with $\h$, \eg $e=\h^c$, for some convenient
$c>0$.  The model is even, in the sense of \S9,\S10, and the theory of
\S9,\S11 fully applies at least to portions of $O(1)$ of the diffusion
path: for many of them we thus get the existence of drift (and
diffusion).

13) The notion of {\it homoclinic scattering} and {\it phase shifts}
arises naturally as a byproduct of the analysis performed to describe
the phase shifts occurring on the homoclinic motion and near it.
Calling $\aa$ the rotators angular coordinates and $\f$ the pendulum
angle suppose that at some arbitrarily fixed reference angle
$\f=\bar\f$ there is a homoclinic point at $\aa=\aa_0$.  Two points
starting at $t=0,\f=\bar\f$, one on the stable whisker and one on the
unstable whisker of some invariant torus with the same position
coordinates $\aa$, will evolve towards the invariant torus
(respectively forward and backward in time) so that their asymptotic
motion gives two points which move quasi periodically keeping a time
independent {\it phase} with respect to the homoclinic motion.  It will
be a function of the distance of the initial points to the homoclinic
point, \ie of $\aa$.  The difference $\V\s[\aa]$ between such phases
evaluated at $t=\pm\io$ will be the {\it phase shift}.  The
``scattering" will be the family of derivatives of $\V\s[\aa]$ at
$\aa=\aa_0$.  In other words we use the homoclinic point as a gauge to
fix the origin of the angles on the standard torus on which the quasi
periodic motion is linear and we look at the trajectory starting on the
unstable whisker at $t=-\io$ infinitesimally close to the invariant
torus and evolving into a point with $\f=\bar \f$ and some $\aa$ at
$t=0$, ``jump" on the stable whisker (keeping the values of $\aa,\f$),
and evolve towards the invariant torus again.  The trajectory will be
asymptotically lagging behind the homoclinic trajectory by an amount
$\pps$, say, at $t=-\io$ and by an amount $\pps+\V\s[\aa]$ at $t=+\io$.
The notion of $\V\s[\aa]$ is intrinsic as the coordinates on which the
motion on the torus appears as linear and which are ``close" to the
corresponding unperturbed ones are uniquely defined.

In presence of perturbations the phase shift is a non trivial function
of the distance to the homoclinic point.  We define analytically the
phase shifts in \S10 and briefly discuss them in \S10 and, appendix A11,
how they are related to the homoclinic splitting.

We present all details in a self contained way.  Some of the details are,
however, exposed in a series of appendices.  Some of the appendices also
contain classical results not so easy to find in the literature in the form
in which we need them. Some, (very few), of them are not really necessary
but they are reported because they clarify conceptual and historical
aspects of the problem (namely the statement of Nekhorossev theorem (\S
A1), the D' Alembert precession theory for the Earth (\S A6,\S A7), the
Jacobi map (\S A9)), the bounds on the homoclinic scattering (\S A11) and
they occupy a negligible amount of space.
%
\vskip1.truecm

\noindent{\bf Acknowledgments}: We are indebted to CNR-GNFM and to
the research funds of MPI for partial support. L.C. also acknowledges
support for a visit to Rutgers University under grant \# DMR 89--18903.
G.G. is grateful to J. Lebowitz for his encouragement and for his support
to this work which was partly developed at Rutgers University with the
support of the grants \# DMR 89--18903 during the years 1989 through 1991.

\vskip1.truecm
\vglue2.truecm

\penalty-200

{\bf\S2 A priori unstable systems. Regularity assumptions}

\penalty10000

\vskip0.5truecm\numsec=2\numfor=1

\penalty10000

Let $(\AA,\aa)$, $(I,\f)$ be canonical coordinates describing a
mechanical system with $l$ degrees of freedom.  We suppose $\AA\in V
\subset R^{l-1},\aa\in T^{l-1}$, $I\in R^1$ and $\f\in T^1$, where $V$
is the closure of some open bounded set and $T^s$ is the $s$-dimensional
torus.  We shall regard $T^s$ interchangeably as $[-\p,\p]^s$ with opposite
sides identified or we regard it as $C_1^s$=$\{$product of $s$ unit circles
in the $s$-dimensional complex space $C^s\}$ via the identification
$\ff=(\f_1,\ldots,\f_s)\in T^s\otto$ $\zz=(z_1,\ldots,z_s)\in C^s$ with
$z_j\equiv e^{i\f_j}$, ($j=1,\ldots,s)$.

The {\it free} system will consist of $l-1$ rotators described by the
angles $\aa$ and their conjugate momenta $\AA$, and one pendulum
described by the angle $\f$ with conjugate momentum $I$.

The pendulum oscillates with energy:
%
$$P_0(I,\AA,\f)={1\over2}{I^2\over
J_0(\AA)}+g(\AA)^2J_0(\AA)(\cos\f-1)\Eq(2.1)$$
%
where $J_0(\AA)$ is a suitable {\it inertia moment} and $2\p g(\AA)^{-1}$ is
the characteristic period of the small oscillations or, as well, $g(\AA)$ is
the Lyapunov exponent of the unstable fixed point. We call \equ(2.1) a
{\it standard pendulum} hamiltonian.

The rotators will move without being affected by the pendulum
oscillations. A complete example hamiltonian will be:
%
$$h_0={1\over2}{\AA^2\over R}+P_0(I,\AA,\f)\Eq(2.2)$$
%
where $R$ is another inertia moment.

More generally we shall consider $\aa$-independent hamiltonians like:
%
$$H_0(I,\AA,\f,\m)=h(\AA,\m)+P(I,\AA,\f,\m)\Eq(2.3)$$
%
where $P$ is a real analytic hamiltonian depending on a parameter $\m$ and
describing a pendulum in the sense discussed below, and $h(\AA,\m)$ will
also be assumed real analytic.

To clarify what we mean by a {\it pendulum} hamiltonian $P$ we recall
the characteristics of the pendulum phase portrait. The isoenergy
lines in $(I,\f)$-space with $P=E$ are closed continuous curves with
topological properties that may change as $E$ varies. The lines of
separation between the regions covered by curves of the same type (\ie
curves which do not contain an equilibrium point and which can be
deformed into each other without crossing an equilibrium point) are
called separatrices and contain at least one equilibrium point, and at
most finitely many (as we are only considering analytic hamiltonians).

In our case we want to allow an explicit $(\m,\AA)$-dependence of $P$:
hence the above picture is $\m,\AA$ dependent. We shall require that,
for all values of $\AA$ of interest, the pendulum $P$ has a linearly
unstable fixed point $I_\m(\AA),\f_\m(\AA)$ which is the only such
point on the corresponding separatrix and, furthermore, we require that
$I_\m(\AA),\f_\m(\AA)$, together with its Lyapunov exponent
$g(\AA,\m)$ ($\ne0$ by assumption), depend analytically on $\AA,\m$.

Clearly the above is a very mild restriction, only exceptionally
false: it emerges from the analysis that all we really want is that
in the whole range of the $\AA$'s the unstable fixed point, which we
select for our analysis, depends analytically on $\AA,\m$ and does not
merge, as $\AA,\m$ vary, with other fixed points. We shall call
the above equilibrium point a {\it selected unstable equilibrium point
of $P$}.

In such a situation we shall say that \equ(2.3) describes an \ap unstable
free assembly of rotators witnessed in their rotations by a free pendulum
with a selected unstable point of equilibrium.

It is not restrictive, under the above circumstances, to assume that the
selected unstable point is the origin $I=0,\f=0$, and that its energy is
$P=0$. In fact one can always change coordinates by using the canonical map
generated by the function: $(\f-\f_\m(\AA'))I'+
I_\m(\AA')\sin(\f-\f_\m(\AA'))+\aa\cdot\AA'$, \ie:
%
$$\eqalignno{
I=&I'+I_\m(\AA')\cos(\f-\f_\m(\AA')),\quad\AA=\AA',\quad
\f'=\f-\f_\m(\AA')&\eq(2.4)\cr
\aa'=&\aa-\bigl(I'+I_\m(\AA')\cos(\f-\f_\m(\AA'))\bigr)
\dpr_{\AA}\f_\m(\AA')
+\dpr_{\AA}I_\m(\AA')\sin(\f-\f_\m(\AA'))\cr}$$
%
which is clearly well defined and which generates a new hamiltonian of
type \equ(2.3) which has $I=0,\f=0$ as selected unstable equilibrium
point. Furthermore if $P(\AA,\m)\=P(0,\AA,0,\m)$ we can always redefine $P$
as $P-P(\AA,\m)$ by accordingly changing $h$: hence the requirement
that also $P(0,\AA,0,\m)=0$ is not restrictive.

The aspects of the regularity properties that we use,  motivated by the
above descriptions, are as follows:

\vskip3.pt
\noindent{\it {\bf Assumption 1}: The unperturbed hamiltonian
$H_0$ has the form \equ(2.3) and
the pendulum energy $P$ has the origin $(I=0,\f=0)$ as a selected
unstable equilibrium point where $P$ takes the value $0$
(for all $\AA$ and $\m$ in the domain of definition of $H_0$);
the associated (non negative) Lyapunov exponent, $g(\AA,\m)$:
%
$$
g^2 \= [\big( \dpr^2_{I\f} P\big)^2- \dpr^2_{I} P \dpr^2_\f P]
|_{(I,\f)=(0,0)}
\Eq(2.5)$$
%
is bounded away from zero as $(\AA,\m)$ vary in their domain of definition.
}
\vskip3.pt

\noindent{\it {\bf Assumption 2}:
The functions $h$ and $P$ are real analytic in their
arguments. Hence they are holomorphic in their variables in a complex
domain $S_{\r',\r,\x',\x,\bar\m}$, described by
five parameters $\r',\r,\x',\x,\bar\m>0$ as:
%
$$\eqalign{
S_{\r',\r,\x',\x,\bar\m}=\Bigl\{&
I,\AA,\z,\zz,\m\Bigl|\ |I|\le\r',\  {\rm and\ there\ is\ }
\V a\in V,{\rm\ for\ which}\cr&
|A_i- a_i|\le\r\ {\rm and}\ e^{-\x'}<|\z|<e^{\x' },\
e^{-\x}<|z_j|<e^\x,\ {\rm and} \ |\m|\le \bar \m \Bigr\}\cr}\Eq(2.6)$$
%
with $z_j\=e^{i\a_j},\z=e^{i\f}$}.
\vskip3.pt

\noindent{\it {\bf Assumption 3}:
The following non degeneracy conditions:
%
$$\det({\dpr^2_{\AA}}h)\ne0,\qquad
\det(\dpr^2_{(I,\f)} P|_{I=0,\f=0})\ne0,\qquad
\dpr_\AA h\cdot(\dpr^2_{\AA}h)^{-1}{\dpr_\AA h}\ne0\Eq(2.7)$$
%
hold on $S_{\r',\r,\x',\x,\bar\m}$}.
\vskip3.pt
Then we set:
\vskip3.pt

\noindent{\it {\bf Definition}:
Hamiltonians verifying {\sl all} the above assumptions
1$\div$3 will be briefly referred  to as
regular {\sl anisochronous a priori unstable free hamiltonians}.}

They are called \ap unstable, because the instability assumption is
clearly built in the free system definition.
\vskip3.pt

Such hamiltonians are quite common in the theory of the resonances of
anisochronous systems.

For instance consider an $l$ degrees of freedom system with free
hamiltonian $h$ of the form $h(\AA,B)$ in action angle
coordinates $\AA,B,\aa,\l$ such that the equation of the resonance is simply
$\dpr_B h(\AA,B)=0$. Suppose that $B=B(\AA)$ is the consequent
resonance surface.  Then, if $\e f(\AA,\aa,B,\l)$ is a perturbation, one
can find canonical coordinates $(\AA',\aa',I,\f)$ apt to describe the
motions that take place near the resonance and in which the
hamiltonian takes the form \equ(2.3) (in square brackets in the following
expression) plus a {\it small} correction:
%
$$\eqalign{
&\left[h_p(\AA',I,\e)+\e
G_p(I,\AA',\f,\e)\right]+\e^{p}f_p(I,\AA',\aa',\f,\e)\cr
&h_p= h(\AA',I)+\e\lis f(\AA',I)+O(\e^2),\qquad
G_p=\tilde f(I,\AA',\f)+O(\e)\cr}\Eq(2.8)$$
%
with $\lis f$ equal to the average of $f$ over the angles $\aa,\,\l$ and
$G_p$ equal to the average of the function $f-\lis f$ over the $\aa$ alone;
here $p$ can be fixed arbitrarily and $\e$ is the strength of the
perturbation. But, the larger $p$ is, the harder it is to find the
functions $G_p,f_p$ and a coordinate system in which \equ(2.8) holds and
the smaller becomes the (tiny) region of phase space around the resonance
surface where the new coordinates can be used to describe the motion, (this
is essentially the Nekhorossev theorem, see [BG], and appendix A1).

We consider hamiltonians $H$ which are perturbations of regular free \ap
unstable hamiltonians $H_0$, defining the latter by the assumptions 1$\div$3
above:
%
$$H=H_0(I,\AA,\f,\m)+\m f(I,\AA,\f,\aa,\m)\Eq(2.9)$$
%
with $f$ holomorphic in the domain
$S_{\r',\r,\x',\x,\bar\m}$, see \equ(2.6).  We
shall often refer to the Fourier expansion of $f$ in the $\aa$ variables,
which we shall write as:
%
$$f(I,\AA,\f,\aa,\m)=\sum_{\nn\in Z^{l-1}}
f_\nn(I,\AA,\f,\m)\,e^{i\nn\cdot\aa}
\Eq(2.10)$$
%

The problem of phase space drift and diffusion will be posed as follows:
\vskip3.pt

\noindent{\it {\bf Diffusion problem}:
Given $\AA_1,\AA_2$, with $H_0(0,\AA_1,0,0)=
H_0(0,\AA_2,0,0)$ can one find for all $\m$ small enough, but {\sl non
zero}, initial data close (as $\m\to0$) to $(0,\AA_1,0)$ in the
$(I,\AA,\f)$-variables which, in due time ($\m$-dependent, of course)
evolve into data close to $(0,\AA_2,0)$? More bluntly can one realize a
displacement of $O(1)$ in the $\AA$ variables with a perturbation of
order $\m$ as small as we please?}
%
\vglue2.truecm

\penalty-200

{\bf\S3 The free system.  Diffusion paths and whisker ladders}

\penalty10000

\vskip0.5truecm\numsec=3\numfor=1

\penalty10000

To formulate our results we need several concepts.  The first is the notion
of {\it diffusion path} on a energy level $E$, whose value will be kept
fixed throughout this section, as well as the value $\m=0$.

Let $s\to\AA_s$ be a curve $\LL$, piecewise analytic in $s\in[s_1,s_2]$,
joining $\AA^{1}=\AA_{s_1}$ to $\AA^{2}=\AA_{s_2}$, such that, using the
notation in \equ(2.3), one can find two constants $\t,t$ for which:
\vskip.5truecm
\item{1) } $H_0(0,\AA_s,0,0)=h(\AA_s,0)\equiv E$,\ \ for $s\in[s_1,s_2]$,

\item{2) } if $\oo_{s}=\dpr_\AA h(\AA_s,0)$ and if we set:

$$\eqalign{ C(s)=&\sup_{\V\n\ne\V0,\,\V\n\in
Z^{l-1}}\,|\V\n|^{-\t}|\V\n\cdot
\oo_{s}|^{-1}\qquad\quad\hbox{\it ``non\ resonance\ constant"}\cr
\Si(C)=&\{ {\rm set\ of\ the\ } s\in[s_1,s_2] {\rm\
such\ that\ } C(s)<C\}\cr}\EQS(3.1)$$
%
then there is a $\LL$-dependent constant $\bar K>0$ such that:
%
$$(s_2-s_1)^{-1}\cdot({\rm measure\ of\ the\ set\ }\Si(C))\ge (1-
{\bar K(DC)^{-1/t}})\EQS(3.2)$$
%
if $D$ is the maximum of $|\dpr_\AA h(\AA,0)|$ in a neighborhood of the
curve $\LL$.

\vskip0.5truecm
\noindent{\it {\bf Definition}:
if $\LL$ is a curve with the properties 1),2) above
we call it a diffusion path.}
\vskip0.5truecm

Clearly under the genericity assumption \equ(2.7), $\det \dpr^2_{\AA}h
\ne0$, a diffusion path consists of just one point if $l=2$ (because
$h=E$ fixes $A$): no diffusion path exists between distinct points in
action space, if $l=2$. For this trivial reason our results, which
otherwise do not distinguish $l=2$ from $l>2$, will be occasionally
uninteresting if $l=2$.

{\it
In appendix A2 we show that under the genericity assumption \equ(2.7) the
constants $t,\t$ can be taken to be $t=l-1$ and $\t=(l-1)^2$.}
But on
special curves it could be possible to make better choices: for instance in
\S11 we discuss an application with $l=3$ in which $t=1$.

Note that the diffusion paths lie, by definition, in the space of the
$\AA$-variables which are the ``rotators" velocities (or {\it fast
action variables}, or {\it adiabatic invariants}: using the terminology
borrowed from the theory of resonances mentioned in connection with
\equ(2.8),\equ(2.9); see also appendix A1);
it is a notion depending solely on the free
system hamiltonian ($\m=0$) evaluated when the {\it pendulum} (or {\it
slow}, or {\it secular}) variables (\ie $(I,\f)$) are set to the
equilibrium position.

It is easy to see that if $l>2$ there are, under the non degeneracy
conditions \equ(2.7) many diffusion paths joining {\it any} two close
enough points $\AA^{1},\AA^{2}$ lying on a connected portion of the energy
shell $h(\AA,0)=E$, see appendix A2.  The argument is similar to the one
usually invoked to prove the abundance of diophantine irrationals (see, for
instance, [G]): the $l-1=2$ case is particularly easy and the condition
is fulfilled by any curve with non vanishing curvature; in the case $l-1=3$
one has to consider a curve joining $\AA^1,\AA^2$ with nowhere vanishing
curvature and torsion, \etc.

To see the connection between the torsion and the above mentioned values of
$\t,t$ one should recall that a smooth curve $s\to\V a(s)$ in
$d$-dimensions is said to have all its $d-1$ torsion coefficients non
vanishing if, for each fixed $s$, the first $d$ derivatives of $\V a(s)$
are linearly independent: the torsion coefficients are suitable orthogonal
invariants associated with the derivatives of order higher than the first
(hence their number is $d-1$).

The first non degeneracy condition of the second line of \equ(2.7) permits
us to conclude that any curve with all its $l-2$ torsion coefficients non zero
verifies \equ(3.2); the last non degeneracy condition in the second line of
\equ(2.7) implies that a curve which, in a local chart on the energy
surface, has all (the $l-3$) torsion coefficients non vanishing will also
have all the ($l-2$) torsion coefficients non vanishing when it is regarded
as lying on the $l-1$ dimensional action space.

The values of the exponents arise from the remark that if the curve has all
torsions non zero then a codimension one plane cutting it in a point cannot
have a contact of order higher than $l-1$ with the curve.  Thus a layer of
width $\d$ does not contain, locally, an arc length exceeding
$O(\d^{1/(l-1)})$.  Therefore the statement follows by choosing
$\d=1/(C|\V\n|^{\t+1})$ with $(\t+1)/(l-1)>l-1$, so that one can sum the
arc lengths over $\V\n$ (as it is clearly necessary); \ie the choice
$\t=(l-1)^2,\,t=l-1$ is sufficient, see appendix A2.

For every $\AA$ one can define the $(l-1)$-dimensional torus
invariant for the motion governed by $H_0$:
%
$$\TT_0(\AA)=\bigl\{\AA',\aa',I',\f'\bigl|\,
\AA'=\AA,\, I'=0,\, \f'=0,\,\aa'\in T^{l-1}\bigr\}\Eq(3.3)$$
%
Such tori represent data in which the $l-1$ rotators are mindlessly and
freely rotating while the pendulum {\it stands up} in its selected unstable
equilibrium position. The picture, hence the tori, is obviously unstable
and in fact the tori posses stable and unstable manifolds, called {\it
whiskers} by Arnold, [A], (for reasons that emerge as soon as one tries to
make a symbolic drawing of the situation). The whiskers correspond to data
in which the rotators continue to rotate freely witnessing the pendulum
falling from or climbing to the equilibrium position (respectively
describing the unstable or the stable whiskers) and performing one of the
two {\it separatrix} swings. More mathematically:
%
$$\eqalign{
W_{unstable}(\AA)=
&\bigl\{\aa\in T^{l-1},\,\sign I=\sign \f, P(\AA,I,\f,0)=0\bigr\}\cr
W_{stable}(\AA)=
&\bigl\{\aa\in T^{l-1},\,\sign I=-\sign \f, P(\AA,I,\f,0)=0\bigr\}
\cr}\EQ(3.4)$$
%
where, to fix the ideas, we have assumed that $I>0$ means $\dot\f>0$, while
$I<0$ means $\dot\f<0$ and each separatrix swing takes place over the
complete circle $\f\in [-\p,\p]$ (as in the standard pendulum case; in these
cases we shall speak of ``open separatrices").  Such
properties may fail in some pendula (\eg one of the separatrices could be
contractible to a point): in these cases \equ(3.4) has to be
changed in an obvious way.

It is always true, however, that the set $P(I,\AA,\f,0)=0$ will consist of
two branches which will be called the {\it separatrix swings}: in the case
of the standard pendulum they are the subsets of $W(\AA)$ with $I>0$ or
$I<0$. Furthermore the following well known accident happens:
%
$$\eqalign{
&W(\AA)=W_{stable}(\AA)\equiv W_{unstable}(\AA);\cr
&W(\AA)\cap W(\AA')=\emptyset\qquad{\rm if\ }\AA\ne\AA'\cr}\Eq(3.5)$$
%
hence in the general case both sets in \equ(3.4) will be equal and
coinciding with the separatrix data.  Given a diffusion path $\LL$ we can
associate to it, for $\m=0$, a one parameter family
$s\to\TT_{0}(s)\equiv\TT_0(\AA_s)$ of $(l-1)$-dimensional tori, invariant
with respect to the free evolution.

The family $s\to(\TT_0(s),W_{stable}(s),W_{unstable}(s))$ of the
above tori and of their whiskers will be said to form a {\it whisker
ladder}, {\it leaning} on $\LL$; again try a drawing for the word
motivation.

\vskip1.truecm
\vglue2.truecm

\penalty-200

{\bf\S4 Motion on the separatrices. Melnikov integrals}

\penalty10000

\vskip0.5truecm\numsec=4\numfor=1

\penalty10000

Suppose, for simplicity, a ({\it open}) separatrix encircling the circle,
with a monotonic motion taking place on it (e.g. such that the sign of $I$
and that of $\dot\f$ coincide).  We shall write the parametric equations for
the branch $I<0,\f>0$ of $W(\AA)$ as:
%
$$I=i(\f,\AA),\quad\f\in(0,2\p),\quad\aa\in T^{l-1}\Eq(4.1)$$
%
where $i$ is the separatrix swing with $I<0$ (\ie the branch with $I<0$ of
the curve $P|_{\m=0}=0$ through the selected unstable equilibrium point,
(see \S2)).  In the general case (when the separatrix may be shorter than
the full circle, ``closed separatrix case'') one cannot use $\f$ to
parameterize a full separatrix swing, \ie a branch of $W(\AA)$: one would
have to use a different extra parameter to describe $W(\AA)$ at the cost of
conceptually uninteresting complications.

If $X(\AA,\f,\aa)=(i(\f,\AA),\AA,\f,\aa)$ is the point \equ(4.1), let us
denote with the symbol $X^0(\AA,\f,\aa,t)\equiv(I^0(t),\AA,\f^0(t),\aa^0(t))$
the point into which $X(\AA,\f,\aa)$ evolves at time $t$ in the motion
governed by the hamiltonian equations with hamiltonian \equ(2.3) with
$\m=0$.

The \equ(3.4),\equ(2.3) and our choice of coordinates (in which $I=0,\f=0$
is the selected unstable point) imply:
%
$$I^0(t),\f^0(t)=O(e^{\mp gt})\tende{t\to\pm\i}0\Eq(4.2)$$
%
where $g\=g(\AA)\=g(\AA,0)$ is the Lyapunov exponent of the selected
equilibrium point, \ie it is given by $g^2=-\det(\dpr^2P(0,\AA,0,0))$,
where $\dpr^2 P$ is the matrix of the second derivatives with respect to
$I,\f$.  Furthermore, denoting $\oo(\AA)=\dpr_\AA h(\AA,0)$:
%
$$\eqalign{\aa^0(t)=&\aa+\oo(\AA)t+\ig_0^t{\dpr_\AA P}
(\AA,I^0(\t),\f^0(\t),0)\,d\t\equiv\cr
\equiv&\aa+\oo(\AA)t+\tt(t;\AA,\f)\cr}\Eq(4.3)$$
%
where we have used that $P(0,\AA,0,0)\equiv0$, by our assumptions 1$\div$3,
\S2, so that the integrand tends to zero by \equ(4.2); the function
$\V\th$ is defined by \equ(4.3).

It is convenient to fix once and for all an origin on the separatrix
corresponding to the action $\AA$: we take it to be the point $\bar
I,\bar \f$ with $\bar \f$ such that the solution $I(\AA,\f)$ of the equation
$P(I,\AA,\f,0)=0$ for $I$, parameterized by $\AA$, reaches its
absolute maximum
value as a function of $\f$. We call this point {\it the origin of the
separatrix}.  In the case in which $P$ is a standard pendulum (\ie it is
given by
\equ(2.1)) the position $\bar \f$ is $\bar \f=\p$, where the pendulum
attains the maximum velocity.

Therefore we can define the asymptotic {\it phase shifts} $\V\th^\pm(\AA)$
equal to the limits as $t\to\pm\i$ of $\V\th(t;\AA,\bar \f)$. They depend on
the starting point, \ie on $\bar \f$, which however we keep fixed as above,
and on $\AA$; their difference $\V\th(\AA)$ is:
%
$$\V\th^+(\AA)-\V\th^-(\AA)
\equiv\V\th(\AA)=\ii_{-\io}^\io\dpr_\AA P(I^0(\t),\AA,\f^0(\t),0)d\t\EQ(4.4)$$
%
and $-\V\th(\AA)/2$ has the geometric interpretation of the
$\AA$-gradient of the area enclosed between the considered branch
($I<0,\f>0$) of separatrix and the $I=0$
axis (for closed separatrices it is the $\AA$ gradient of the area
enclosed by the considered separatrix swing).

We set the following definition in terms of the above concepts:
\vskip0.5truecm
\noindent {\it {\bf Definition 1}:
\sl\ The free system rotators and pendulum
are {\it independent} at $\AA$ if
$\V\th(\AA)\equiv0$.}
\vskip0.5truecm
The obviously interesting case \equ(2.1) with $\AA$ independent
$R(\AA),g(\AA)$ is clearly very special and it is an example of
independence in the above sense. If, on the other hand, in \equ(2.1), the
functions $g(\AA),R(\AA)$ are not constant the {\it phase shifts}
$\V\th(t,\AA;\bar \f)$ are easily computed:
%
$$\V\th(t,\AA;\bar \f)=\tt_0\,\tanh gt,\qquad \tt_0\=
-4\dpr_\AA(g R)\,\Eq(4.5)$$
%

We shall call $X^0(t)\=(I^0(t),\AA,\bar \f(t),\V\a^0(t))\=
X^0(\AA,\bar \f,\aa,t)$  the separatrix
motion corresponding to the initial point with $\f=\bar \f$ and some initial
$\AA,\aa$ (cfr. paragraph after \equ(4.1)).

Given a diffusion curve $\LL$, $s\to\AA_s$ we introduce the
following notations:
$\oo_s\=\oo(\AA_s)$ $\=$ $\dpr_\AA h(\AA_s,0)$,
$\tt_s(t)$ $=\tt(t;\AA_s,\bar \f)$,
and define (see also \equ(2.10)):
%
$$F(t;\aa,s)=-\sum_{\nn\ne\V0}{e^{i\aa\cdot\nn}\over i\oo_s\cdot\nn}
\dpr_t\Bigl[f_\nn(I^0(t),\AA_s,\bar \f(t),0)e^{i\tt_s(t)\cdot\nn}\Bigr]
\Eq(4.6)$$
%
which makes sense for $s\in\Si(\i)$, (which, in general, is a subset of
full measure of $\LL$) see \equ(3.1).

Clearly the function $F(t;\aa,s)\tende{t\to\pm\i}0$ exponentially fast (see
\equ(4.2)), and the following {\it Melnikov integral} is well defined, see
\equ(3.1), for $s\in\Si(\i)\subseteq[s_1,s_2]$:
%
$$M_f(\aa,s)=\ii_{-\i}^\i F(t;\aa+\oo_st,s)dt\Eq(4.7)$$
%
(similar quantities were considered by Poincar\`e in [P]; see also [A]).
Note that in the special case of a degenerate phase shift, \ie
of independence of the rotators and the pendulum,
the $M_f$ are defined for all $s\in[s_1,s_2]$ because the part involving the
small denominators in \equ(4.6) disappears by integration by parts.
In the latter case, in fact, it is:
%
$$M_f(\aa,s)=
c(\AA,s)+\ii_{-\i}^\i f(t;\aa+\oo_st,s)dt\Eq(4.8)$$
%
where $c(\AA,s)\equiv\ig_{-\io}^\io [f_{\V 0}(I^0,\AA_s,\bar \f,0)-
f_{\V 0}(0,\AA_s,0,0)]dt$ is a constant which shall play no role and
$f(t;\aa,s)=f(I^0(t),\AA_s,\bar \f(t),\aa,0)-f(0,\AA_s,0,\aa,0)$.

Such a case with $f(0,\AA_s,0,\aa,0)=0$ was considered by Arnold in [A].

For $s\in\Si(\i)$, see \equ(3.1), the equation:
%
$$\V\dpr_\aa M_f(\aa,s)=\V0\Eq(4.9)$$
%
admits necessarily at least two solutions (\eg one is at $\aa=\aa_s$
when $\aa_{s}$ is a minimum for $M_f$ and the other when $\aa_{s}$
is a maximum).

The following definition will be important:
\vskip3.truept

\noindent{\it {\bf Definition 2}:
We say that the arc of diffusion path
corresponding to $s\in[\bar s_1,\bar s_2]\subseteq[s_1,s_2]$ is {\sl
directly open for diffusion} under the perturbation $f$, see
\equ(2.9),\equ(2.10), if:
\item{1)} no $f$-resonance occurs for $s\in[\bar s_1,\bar s_2]$, in the
sense that ${f_{\V\n}(0,0,\AA_s,0)/ \oo_{s}\cdot\V\n}$
is analytic in $s\in[\bar s_1,\bar s_2]$ for all $\V\n$.
\item{2)} the equation \equ(4.9) admits a continuous solution
$\aa\to\aa_s$ for all $s\in[\bar s_1,\bar s_2]$ and such that:
%
$$\det\,\dpr^2_\aa  M_f(\aa_{s},s)\ne 0\qquad
s\in[\bar s_1,\bar s_2]\Eq(4.10)$$
%
More generally we say that an arc of a diffusion path is {\sl open for
diffusion} under the perturbation $f$ if it can be covered by finitely many
arcs directly open for diffusion.}
\vglue3.truept

Note that the non resonance condition is a very strong condition: except
for very special $f$ we can expect to find open diffusion paths only when
$f$ is a trigonometric polynomial. In the latter case, however, it is clear
that, in general, there will be many open, possibly very long, such paths.

Consider a diffusion path and assume that property 1) of the above
definition is verified because $f$ is a trigonometric polynomial with no
non vanishing coefficients $f_{\V\n}$ corresponding to $\V\n$'s for which
$\oo_{s}\cdot\V\n=0$ for some $s$.  Then given a point of parameter $s$ on
the path, it will be generically true that $s$ is inside some arc of $\LL$
directly open for diffusion under $f$: the genericity is with respect to
the choices of the non zero coefficients of the trigonometric polynomial
$f$.  This is a consequence of the explicit formula \equ(4.6) and of the
remark that one can change rather arbitrarily the function $M_f(\aa,s)$ by
changing $f$ and the change is effectively computable.


Our main result in the above anisochronous, \ap unstable, case is
the following.
\vskip0.5truecm

\noindent{\it {\bf Proposition}: Consider a hamiltonian like \equ(2.9)
with $H_0$ verifying the assumptions 1$\div$3 of \S2 and $f$ being a
trigonometric polynomial of degree $d$.

\noindent
Given a diffusion path $\LL$ directly open for diffusion, suppose that
$\oo(\AA)\cdot\V\n$
$\=$ $\dpr_\AA h(\AA,0)\cdot \nn$ $\ne0$
for $\AA$ in $\LL$ and for all $|\V\n|<c\,d$, for
some constant $c>0$.

If $c$ is large enough then one can find, for all $\m\ne0$ small
enough, initial data with
``fast action variables" (\ie $\AA$ variables) {\sl
close} to one extreme of $\LL$, $\AA^{1}$, and ``slow variables" (\ie
$(I,\f)$) {\sl close} to the selected unstable equilibrium position,
which evolve, {\sl drift},
into data with the $\AA$ variables {\sl
close} to the other extreme, $\AA^{2}$, of $\LL$.  And {\sl close} can
be taken to mean {\sl within a distance} $\d_\m\tende{\m\to0}0$.

One can find constants $T_1,c_1>0$ such that:
%
$$T(\m)\=T_1 e^{c_1|\m|^{-2}}\Eq(4.11)$$
%
provides {\sl an upper bound} to the minimum time necessary for the
drift from $\AA^1$ to $\AA^2$.

If the path $\LL$ is open for diffusion, but not directly open,
one can show the same result with a function $T(\m)$ whose expression
will depend on the structure of $\LL$: in particular, it will depend
on the number of segments directly open for diffusion.}
\vskip0.5truecm


The above theorem does not convey all the information that we gather by
proving it: the dimensional nature of our bounds makes them very flexible
and we use them in the later sections of this paper to cover a variety of
cases in which the non degeneracy conditions are not verified, and
eventually lead us to the result on the \ap stable heavenly problem
described in the introduction.
%
\vglue2.truecm

\penalty-200

{\bf\S5 Existence of ladders of whiskers.}

\penalty10000

\vskip0.5truecm\numsec=5\numfor=1

\penalty10000

In this section we consider a hamiltonian \equ(2.9) verifying the
assumptions 1$\div$3 of \S 2 and study the persistence of the unperturbed
whiskered tori and their regularity properties (see, also, [M],
[Gr], [Z]).

The basic technical facts concerning the existence of the $l-1$
dimensional invariant tori and the normal form of the flow in their
vicinity are stated in the following lemmata 1,1' and in lemma 2
(formulated after the proofs).

Since the theorem presented in the lemmata is a local theorem in the
vicinity of the unperturbed invariant tori, it is useful to introduce a
system of coordinates in which it is most conveniently studied. Thus we
introduce a new system of canonical coordinates $(I,\AA,\f,\aa)=
\lis \RR_\m(p_0,q_0,\AA_0,\aa_0)$
defined by a canonical transformation $\lis \RR_\m$
enjoying the properties explained in the following lemma.

Let $C_\x\=\{z\,\big|\, e^{-\x}<|z|<e^\x\}$, and consider the sets of the
points $I,\AA,\z,\zz,\m\in U$ and, respectively, $p,q,\AA,\zz,\m\in W$
with:
%
$$\eqalign{
U(\r',\r,\x',\x,\bar\m,\V a)\=&\bigl\{|I|\le\r',\,
|A_i- a_i|\le\r,\,\z\in C_{\x'},\,z_j\in C_\x,\,|\m|\le\bar\m\bigr\}\cr
W(\k,\r,\x,\bar\m,\V a)\=&\bigl\{|p|,|q|<\k,\, |A_{j}-a_j|<\r,\,
z_j\in C_\x,\,|\m|<\bar\m\bigr\}\cr}\Eq(5.1)$$
%
Recall the definition of $V$ (beginning of \S 2) and that $H$ in \equ(2.9)
is holomorphic in $U(\r', \r,$
$\x',$  $\x, $ $\bar \m,$ $\V a)$ (assumption 2, \S 2).
%
\vskip0.3truecm
\noindent{\it {\bf Lemma 0}:
For all $\V a\in V$ there exist positive constants
$\bar \k_0,\bar \r_0,\bar \x_0$
and a canonical transformation $(I,\f,\AA,\aa)=$
$\lis \RR_\m(p_0,q_0,\AA_0,\aa_0)$ defined and holomorphic in
$W(\bar \k_0,\bar \r_0,\bar \x_0,\bar \m,\V a)$ with values in a domain
$U(\r',\r,\x',\x,\bar\m,\V a)$ of holomorphy of \equ(2.9) and casting
$H$ in the form:
%
$$\eqalign{ &h_0(\AA_0,p_0q_0,\m)+f_0(\AA_0,\aa_0,p_0,q_0,\m),\cr
&h_0(\AA_0,J,\m)= h(\AA_0,\m)+G(J,\AA,\m),\qquad \dpr_J
G(0,\AA,\m)\=g(\AA,\m)\cr}\Eq(5.2)$$
%
where $f_0$ is divisible by $\m$ and $h_0,f_0$ are analytic in
$W(\bar \k_0,\bar \r_0,\bar \x_0,\bar \m,\V a)$.
Expressions for possible values of $\kb_0,\rb_0,\xb_0$
in terms of $\r',\r,\x',\x,\mb$ and of a few constants depending on $h,f$
can be found in appendix A3, see \equ(A3.39).}
\vskip0.5truecm
The {\bf proof} is given in appendix A3.

The map $\lis \RR_\m$ will have the form:
%
$$\eqalign{
I=&R(\AA_0,p_0,q_0,\m),\qquad\kern1.1truecm\f=S(\AA_0,p_0,q_0,\m)\cr
\aa=&\aa_0+\V\d(\AA_0,p_0,q_0,\m),\qquad\kern0.2truecm
\AA=\AA_0\cr}\Eq(5.3)$$
%
with $R,S,\V \d$ real--analytic in $W(\bar \k_0,\bar \r_0,\bar
\x_0,\bar \m,\V a)$ (often \equ(5.3) will be supposed to act also on
the variable $\m$, trivially changing $\m$ into itself).

The result in lemma 0 is well known: it extends a celebrated theorem by
Jacobi who proved the above lemma in a variety of cases, first of all
for the standard pendulum.  In the latter case the Jacobi map $\lis
\RR_\m$ can be constructed quite explicitly by using the theory of
the jacobian elliptic functions, see appendix A9.

Lemma 1 below gives us a {\it normal form} for the hamiltonian flow
near the unperturbed whiskers. It tells us that {\it most} of the
structure of unstable tori and of corresponding manifolds survives the
onset of the perturbation. In particular the tori are obtained by
setting suitable coordinates $p,q$ equal to $0$; and the whiskers, in
the vicinity of the tori, are obtained by setting $p=0$ (unstable
whisker) or $q=0$ (stable whisker). The whisker ladder still exists,
with a {\it few rounds missing} (where $s\not\in\Si_\m$, see below).
\vglue0.5truecm
%
\noindent{\it{\bf Lemma 1}: Consider a hamiltonian
\equ(2.9), verifying the
assumptions 1$\div$3 of \S2.  Let $\LL$ be a diffusion path $s\to\AA_s$ with
energy $E$ (see 1),2) of \S3), and let $s\to\TT_0(s)\=\TT_0(\AA_s)$ be the
family of $(l-1)$-dimensional tori, see \equ(3.3), associated with $\LL$.
Suppose that $\bigcup_{\V a\in\LL}U(\r',\r,\x',\x,\bar\m,\V a)$,
which is contained in the holomorphy domain of \equ(2.9),
is a region where the map $\lis {\RR}_\m$ can be defined
via lemma 0 above: \ie $U(\r',\r,\x',\x,\bar\m,\V a)\supset \lis \RR_\m
W(\kb_0,\rb_0,$ $\xb_0,$ $\mb,\V a)$.

Fixed $n>0$ and $\m$ real, there exists, on the energy level $E$ of the
perturbed system, a family $s\to\TT_\m(s)$ of $(l-1)$-dimensional
``whiskered" tori,
$C^n$-close to the line of tori $s\to\TT_0(s)$ within $O(\m)$ as $\m\to0$,
which for $\m$ small enough verify the following properties:
\item{1) }
There exist positive constants $c,\bar c, K, k$ such that
the tori $\TT_\m(s)$ are invariant for
$s\in\Si_\m\subset[s_1,s_2]$ where: $\Si_\m\=\{s|\,C(s)<k|\m|^{-1/\cb}\}$
and:
%
$$(s_2-s_1)^{-1}\ii_{\Si_\m}ds\geq(1-K|\m|^{1/c})\Eq(5.4)$$
%
\item{2) } The tori $\TT_\m(s)$ are part of a family of
$l$--dimensional invariant surfaces
having energy $E$ and parameterized by $\V\ps\in T^{l-1}$, $|p|,|q|<
\bar \k$, for some $\bar \k>0$, as:
%
$$\eqalign{\AA=&\AA'+\V\X(\V\ps,p,q,s,\m)\quad
\kern2.2truecm\aa=\V\ps+\V\D(\V\ps,p,q,s,\m)+\V\d(\AA',p,q,\m)\cr
I=&R(\AA',p,q,\m)+\L(\V\ps,p,q,s,\m)\quad\kern0.15truecm\f=
S(\AA',p,q,\m)+\Th(\V\ps,p,q,s,\m)\cr}\Eq(5.5)$$
%
where $\AA'\=\AA_s(pq,\m)$ with $\AA_s(J,\m)$ analytic in $J$,
$C^n$--smooth in $J,s,\m$ and $\AA_s(0,0)$ coincides with the diffusion
curve $\AA_s$; $\V\X,\V\D,\L,\Th$ are analytic in $\V\psi,p,q$,
divisible by $\m$ and $C^n$-smooth in $\V\ps,p,q,s,\m$, and $R,S,\V\d$
are as in lemma 0 (hence depend on $s,\m,p,q$ only and are analytic in
their variables).
%
\item{3) } There are functions $\g'(J,s,\m),\g(J,s,\m)$ analytic in $J$
for $|J|<\bar \k^2$, $C^n$-smooth in $J,s,\m$ and
divisible by $\m$ if $J=0$ (and by $J$ if $\m=0$), such that the motion
on the invariant surfaces is simply:
%
$$\V\ps(t)=\V\ps+(1+\g)\oo_st,\qquad
p(t)=pe^{-g_s(1+\g')t},\qquad q(t)=q e^{+g_s(1+\g')t}\Eq(5.6)$$
%
where $\g=\g(pq,s,\m),\g'=\g'(pq,s,\m), g_s\equiv
g(\AA_s,0)$, see \equ(2.5), $\oo_s\equiv \oo(\AA_s)$,
see \equ(3.1).
Hence the tori $\TT_\m(s)$ and their stable/unstable whiskers $W_\m(s)$
are obtained by setting in \equ(5.5), respectively, $p=q=0$; $p\ne 0$, $q=0$;
and $p=0$, $q\ne 0$.

\item{4) } The smallness condition on $\m$ and the constants $k,K,\cb,c,
\bar \k$ can
be given an explicit dimensional form in terms of a few parameters
associated with $h,f$,
(see \equ(5.76), \equ(5.90), \equ(5.82),
\equ(5.67), \equ(5.18) below); similarly one can construct
explicit bounds on the smallness of $\V\X,\V\D,\L,\Th,\g,\g'$, (see lemma 2
and \equ(5.89),\equ(5.79) ,\equ(5.88) below).}
\vglue0.5truecm
Instead of fixing the energy $E$ of the invariant tori and the frequency
ratios of the corresponding quasi periodic motions one can fix the
frequencies (\ie $\g$ in \equ(5.6)) at the cost of leaving $E$ free.

Calling $s\to \AA_{s}$ the diffusion curve equation and
defining the two functions $\oo_s=\dpr_{\AA} h_0(\AA_s,0,0)$, and
$g_s=\dpr_J h_0(\AA_s,0,0)$,
see \equ(5.2), we introduce a real parameter $u$ and
consider the vectors:
%
$$\oo_{su}=(1+u)\oo_s,\quad u\ {\rm real}\ ,
\qquad \Big( \ \oo_s\=\dpr_\AA h_0(\AA_s,0,0) \ \Big)\Eq(5.7)$$
%
We define the {\it diffusion sheet} $\tilde\LL$: $(s,u)\to\AA_{su}$ by:
%
$$\dpr_\AA h_0(\AA_{su},0,0)=\oo_{su}\Eq(5.8)$$
%
This is well defined, taking into account the non degeneracy conditions
\equ(2.7), by the implicit function theorem, if $|u|$ is small
enough. We shall suppose that $u$ varies in an interval $[-\bar u,\bar
u]$ so small that:
%
$$
{\rm setting\ } \dpr_J
h_0(\AA_{su},0,0)=(1+u'_{su})g_s\equiv g_{su}\ \  {\rm it\  is\ }:
\ \ |u|,|u'_{su}|<4^{-1}\Eq(5.9)$$
%
More stringent requirements on $\bar u$ will be imposed later.

One then obtains
results similar to those described in lemma 1 with the basic difference
that all the main functions will be {\it analytic also in  $\m$
near $\m=0$}, and the energy of the motions on the invariant surfaces will
{\it no longer be  fixed}. More precisely one obtains the following
statement:

\vglue0.5truecm
\noindent{\it {\bf Lemma 1'}:
Consider, as in lemma 1, a hamiltonian \equ(2.9), verifying the
assumptions 1$\div$3 of \S2.  Let $\LL$ be a diffusion path $s\to\AA_s$ with
energy $E$, and let $s,u\to\AA_{su}$ be the diffusion sheet, defined in
\equ(5.7),\equ(5.8), and let $s,u\to\TT_0(s,u)$  be the family of
$(l-1)$-dimensional tori (see \equ(3.3) with $\AA=\AA_{su}$)) associated
with $\LL$.  Suppose, as in lemma 1,
that $\bigcup_{\V a\in\LL}U(\r',\r,\x',\x,\bar\m,\V
a)$, is a region where a map $\lis \RR_\m$ can be defined via lemma 0.

Fix $n>0$, let  $u$ be real and small, and $\m$  complex.
Then there exists a family $s,u\to\TT_\m(s,u)$ of $(l-1)$-dimensional
``whiskered" tori, $C^n$-close to the
sheet of tori $s,u\to\TT_0(s,u)$ as $\m\to0$, which for $\m$ small enough
verify the following properties:
\item{1) } The tori $\TT_\m(s,u)$ are invariant for
$s\in\Si_\m\subset[s_1,s_2]$ and for $u$
$\in [-\bar u,\bar u]$ for a suitable $\bar u>0$:
here $\Si_\m$ is the same set defined in 1) of lemma 1 and verifies the
same bound \equ(5.4) (same constants).
%
\item{2) } The tori $\TT_\m(s,u)$ are part of a family of invariant
$l$--dimensional surfaces parameterized by $\V\ps\in T^{l-1}$, $|p|,|q|
<\bar \k$, ($\bar \k$ as in lemma 1), as:
%
$$\eqalign{\AA=&\AA'+\V\X(\V\ps,p,q,s,u,\m)\quad
\kern1.6truecm\aa=\V\ps+\V \d(\AA',p,q,\m)+\V\D(\V\ps,p,q,s,u,\m)\cr
I=&R(\AA',p,q,\m)+\L(\V\ps,p,q,s,u,\m)\kern0.1truecm\quad\f=
S(\AA',p,q,\m)+\Th(\V\ps,p,q,s,u,\m)\cr}\Eq(5.10)$$
%
where $\AA'\=\AA_{su}(pq,\m)$ with $\AA_{su}(J,\m)$ analytic in $J,\m$,
$C^n$--smooth in $J,\m,s,u$ and $\AA_{su}(0,$ $0)$
$=\AA_{su}$ (see \equ(5.8));
%
$\V\X,\V\D,\L,\Th$ are analytic in $\V\ps,p,q,\m$, divisible
by $\m$, and $C^n$-smooth in all their arguments, and $R,S,\V\d$,
which  depend on $s,\m,p,q$ only, are as in \equ(5.3).
%
\item{3) } There is a function $\g'(J,s,u,\m)$ analytic in $J,\m$
for $|J|<\bar \k^2$ and $\m$ small enough, $C^n$-smooth in $s,u,\m,J$ and
divisible by $\m$ if $J=0$ (and by $J$ if $\m=0$), such that the motion
on the invariant surfaces is simply:
%
$$\V\ps(t)=\V\ps+\oo_{su}t,\qquad
p(t)=pe^{-g_{su}(1+\g')t},\qquad q(t)=q e^{+g_{su}(1+\g')t}\Eq(5.11)$$
%
where $\g'=\g'(pq,s,u,\m), g_{su}\equiv g(\AA_{su},0),\oo_{su}\equiv
(1+u)\oo_s$ and $\g\equiv u$ is now fixed {\sl a priori}.
\item{4) }
The constants $k,K,\bar c,c,\bar \k$ are as in lemma 1 above; furthermore
the smallness condition on $|\m|$ and the
(new) functions $\V\X,\V\D,\L,\Th,\g'$  satisfy the same bounds
of the corresponding objects of lemma 1 (see point 4) of lemma 1).}

\vglue0.5truecm
In fact the strategy of our analysis will be to prove lemma 1' first and
deduce lemma 1 by showing that the parameter $u$ can be determined so that
the real part of the energy {\it maintains a prefixed value $E$}.
\vglue0.3truecm
\noindent{\bf Proof}: the first step is to change variables
$(I,\f,\AA,\aa)\to (p_0,q_0,\AA_0,\aa_0)$ using the canonical change of
coordinates of lemma 0 to put \equ(2.9) in the form \equ(5.2).

By our assumption this is possible and we call $\kb_0,\rb_0,\xb_0$
parameters such that $\lis \RR_\m W(\kb_0,$ $\rb_0,$ $\xb_0,$
$\mb,\V a)$ is, for all
$\V a\in\LL$ contained in the set $\bigcup_{\V a\in\LL}U(\r',\r,\x',\x,\mb,\V
a)$ where the hamiltonian is defined.

In this way we define $h_0,f_0$ on $W=W(\kb_0,\rb_0,\xb_0,\mb,\V a)$
for all $\V a\in\LL$. Let
$E_0,\h_0, \G_0$ be the suprema, in $W$ and $\V a\in \LL$,
of the functions $||\dpr h_0||$
and $||(\dpr^2_\AA h_0)^{-1}||,\,||(\dpr_J h_0)^{-1}||$, respectively.  The
norm of a vector or matrix will be, for simplicity, the maximum of the
components.

Consider the equation \equ(5.8). By a simple implicit function analysis
we see that if:
%
$$|u|<\tilde u\={1\over\bar B^2(E_0\h_0\rb_0^{-1})^2}\Eq(5.12)$$
%
for $\bar B$ large enough, it admits a solution $\AA_{su}$ such that,
(see appendix A4, \equ(A4.3)):
%
$$|\AA_{su}-\AA_s|<\tilde\r\={\rb_0\over (\bar B E_0\h_0\rb_0^{-1})}
<\rb_0/4\Eq(5.13)$$
%
We also consider the equation:
%
$$\dpr_\AA h_0(\AA,J,\m)=\oo_s(1+u)\=\oo_{su}\Eq(5.14)$$
%
and we see that if $|u|$ verifies \equ(5.12) and
$|J|<\tilde \k^2,\,|\m|<\tilde\m$ with:
%
$$\kt\={\kb_0\over\bar B(E_0\h_0\rb_0^{-1})},\qquad\mt\={\mb\over \bar
B^2(E_0\h_0\rb_0^{-1})^2}\Eq(5.15)$$
%
then the equation has a solution $\AA^0(s,u,J,\m)$ close to $\AA_{su}$
within $\rb_0/4$, (see appendix A4,
\equ(A4.5)) and, obviously, $\AA^0(s,u,0,0)\=\AA_{su}$.

Recalling that $g_{su}\=\dpr_J h_0(\AA_{su},0,0)$ and setting:
%
$$(1+u'(s,u,\AA,J,\m))g_{su}\=\dpr_J h_0(\AA,J,\m),\qquad
\quad\l_0\=\sup |u'|\Eq(5.16)$$
%
we find that in a domain $|\AA-\AA^0(s,u,J,\m)|<\r_0,\ |J|<\k_0^2,
|\m|<\m_0$ the following bound holds for a suitable constant $\hat B$:
%
$$\l_0=\sup|u'|\le2l \G_0
E_0\Bigl({\kt^2\over\kb_0^2}+{\rt\over\rb_0}+{\mt\over\mb}\Bigr)
\le \hat B (\G_0 E_0) \ (E_0\h_0\rb_0^{-1})^2\Eq(5.17)$$
%
see appendix A4, \equ(A4.6);
in such bounds we have used ``dimensional" (or ``Cauchy") estimates:
see below.

Therefore we can fix $\r_0,\k_0^2,\bar u$ so that
\equ(5.12) holds (hence $|u|<1/4$) and also
$4\l_0\G_0 E_0<1$
(hence $|u'|,$ $|u'_{su}|$ $<$ $1/4$, (see \equ(5.9) for the definition of
$u'_{su}$)
because $|u'_{su}|$ can also be bounded by the r.h.s. of \equ(5.17) by
a similar estimate; see \equ(5.9) for the definition of $u'_{su}$).
A possible choice is:
%
$$\eqalign{
\bar u\=&{1\over \bar B^2(E_0\h_0\bar\r_0^{-1})^2},\qquad\kern2.truecm
\m_0\=\min\{
{\bar\m\over \bar B^2(E_0\h_0\rb_0^{-1})^2(E_0\G_0)^2}\ ,\ 1\}\cr
\k_0\=&{\kb_0\over\bar B\,(E_0\h_0\rb_0^{-1})}{1\over (E_0\G_0)},\qquad
\r_0\=\min\{
{\rb_0\over\bar B (E_0\h_0\rb_0^{-1})(E_0\G_0)^2}\ ,\ \k_0^2\}\ ,\qquad
\x_0\=\xb_0\cr} \Eq(5.18)$$
%
where $\r_0,\m_0$ are taken to be necessarily smaller than $\k^2_0$ and
$1$, respectively, for later convenience.
The constant $\bar B$ can be taken to be the same in all the above
formulae, possibly readjusting it (to avoid the introduction of too many
symbols, {\it a procedure that we shall use very often below}).

The functions $h_0,f_0$ will be holomorphic in the new coordinates in a
domain that we have, to some extent, tailored to our needs. They will, in
fact, be regarded as holomorphic in a domain containing:
%
$$\eqalignno{
&W_0\=W(\k_0,\r_0,\x_0,\m_0)\=&\eq(5.19)\cr
& \=\bigcup_{s,u\in\tilde\II_0}
\{ |p_0|, |q_0|<\k_0,\,
|A_{0j}-A^0_j(s,u,p_0q_0,\m)|<\r_0\ ,
e^{-\x_0}<|z_j|<e^{\x_0},\,
|\m|<\m_0\}\kern1.truecm\cr}$$
%
where the sheet $(s,u)\to \AA^0(s,u,J,\m)$ is defined by \equ(5.14) with:
%
$$(s,u)\in\tilde\II_0\=\II_0\times[-\bar u,\bar u]\ ,
\quad \II_0\=\Si(C_0)\quad {\rm for\  some\ } C_0>\G_0\Eq(5.20)$$
%
In the coming analysis the constant $C_0$ will be left as a {\it free
parameter} and will be chosen at the end in order to check \equ(5.4).
Thus, using
$|u|<1/4$,  in $\tilde \II_0$ it will be true that:
%
$$\eqalign{
|\oo_{su}\cdot\nn|^{-1}&\le C_0|\nn|^\t,\qquad \forall \nn\in
Z^{l-1},\,\nn\ne\V0\cr|g_{su}|^{-1}&\le\G_0\cr}\Eq(5.21)$$
%
where, see \equ(3.1), $\t$ is a diophantine constant.
Note that the just introduced parameters $\k_0,\r_0,\x_0,\m_0$ are not, in any
sense, the maximal ones compatible with the analyticity properties of
$h_0,f_0$.

All our arguments will have {\it dimensional nature} involving combinations
of the {\it sizes} of various functions, hence it is convenient to define
the size of a function $\V F$, holomorphic in a domain $W$, as:
%
$$||\V F|| \=||\V F||_W\=
\sup_{j,W}|F_j(p,q,\AA,\V z,\m)|\Eq(5.22)$$
%
where, of course, the symbol $||\cdot||$ is incomplete and (therefore) it
will be always accompanied by the specification of the domain $W$
considered in evaluating \equ(5.22), unless obvious from the context.

Let us collect here the positive parameters
$E_0,\G_0,\e_0,\h_0,\th_0$ that we use to measure the size of $h_0$, $f_0$
(compare \equ(2.7)):
%
$$\eqalign{
||\dpr h_0||\le E_0,\quad&||(\dpr_J h_0)^{-1} ||\le \G_0,
\quad\qquad||f_0||\le\e_0,\cr
||(\dpr^2_\AA h_0)^{-1}||\le \h_0,\quad&||[(\dpr^2_\AA h_0)^{-1}
\Dpr_\AA h_0\cdot\Dpr_\AA h_0]^{-1}||\le\th_0\cr}\Eq(5.23)$$
%
where $||\cdot||$ is considered in $W_0$,
see \equ(5.19). This is consistent with the
previous meaning and usage of the previously defined values of $E_0,\h_0,\G_0$.

The holomorphy of $h_0,f_0$ imposes restrictions on the relative values
of the above constants; namely there exists $B_0>0$ depending only on
the number $l$ of degrees of freedom and such that:
%
$$E_0C_0 >E_0\G_0\ge B_0,\quad\h_0 E_0\r^{-1}_0\ge B_0,\quad
\th_0 E^2_0\h_0\ge B_0\Eq(5.24)$$
%
which we will repeatedly use for the purpose of
simplifying bounds, at the expense of their sharpness; (one can take
$B_0=l^{-1}$, see appendix A4).

{\it The quantities in \equ(5.24) have the physical interpretation of
ratios of the various relevant time scales relevant for our problem}.

Our basic tool (already used in obtaining \equ(5.17))
for bounds on a function $F$, of one variable,
holomorphic in a domain $\DD$ will be to restrict it to a smaller domain
$\DD'\subset \DD$ and to estimate the $n$-th derivative of $F$ in $\DD'$
by $n!$ times $r^{-n}$, with $r=$ distance between $\DD'$ and $\dpr\DD$,
times the supremum of $F$ in $\DD$. We call such a bound a {\it
dimensional estimate}: it is a consequence of (one among) the Cauchy's
theorem(s).

In performing dimensional bounds it is convenient to deal with {\it
dimensionless combinations} of the main parameters \equ(5.23). Thus all our
bounds will naturally involve the following dimensionless combinations
of the parameters $E_0,\h_0,C_0,\G_0,\th_0,\e_0,\r_0,\k_0,\x_0,\l_0,\m_0$
that we have associated with our hamiltonian (see \equ(5.23),
\equ(5.18), \equ(5.20), \equ(5.17),\equ(5.19)):
%
$$\eqalign{
&E_0C_0,\ C_0\G_0^{-1},\ \h_0E_0\r_0^{-1},\ \th_0E_0^2\h_0,\
\k_0^2\r_0^{-1}, \ \xc_0^{-1}\=\x_0^{-1}(1+\x_0),\ \m_0,\ \l_0\cr
&\e_0E_0^{-1}\r_0^{-1}}\Eq(5.25)$$
%
and we see, from \equ(5.17),\equ(5.24),\equ(5.20), and from
\equ(5.18) and the comment following it, that all the elements of the
first line are $\ge B_0>0$; we shall impose, without loss of
generality, that the element of the second line is $\le1/2$.

To help reading the formulae we often close in parentheses the above
dimensionless combinations of parameters, even tough they may not be
necessary.

Given a function $F$ holomorphic on $W_0$, see \equ(5.19), we introduce the
Fourier coefficients $F_\nn(\AA,p,q)$ and the Taylor coefficients
$F_{,hk}(\AA,\zz)$ of the expansions:
%
$$F(\AA,\zz,p,q)=\sum_{\nn\in Z^{l-1}} F_\nn(\AA,p,q)\zz^{\,\nn}=
\sum_{h,k=0}^\i F_{,hk}(\AA,\zz)p^hq^k\Eq(5.26)$$
%
where $z_j=\exp i\a_j$ and $\zz^{\,\nn}=\prod_jz_j^{\n_j}=\prod_j
e^{i\n_j\a_j}$, (the latter two notations will be used interchangeably).

Thus we can introduce the following functions ({\it truncations of $F$}),
for $N\ge0,|\nn|\equiv\sum_j|\n_j|$:
%
$$\eqalign{
F^{[\le N]}(\AA,\zz,p,q)=&\sum_{|\nn|\le N} F_\nn(\AA,p,q)
e^{i\nn\aa},\qquad\quad F^{[>N]}\equiv F-F^{[\le N]}\cr
F^D(\AA,\zz,w)=&\sum_{h\ge0} F_{,hh}(\AA,\zz) w^h\cr}\Eq(5.27)$$
%

We can now begin our sequence of estimates leading to the proof of
lemma 1' and lemma 1.

The following dimensional estimates hold for various truncations of
$f_0$; given $N_0,\d_0$:
%
$$\eqalign{
||f_0^{[\le N_0]}||\le&B_1\e_0\xc_0^{-\b_1}\d_0^{-\b_1},\qquad
||f_0^{[>N_0]}||\le B_1\e_0\xc_0^{-\b_1}\d_0^{-\b_1}e^{-\x_0\d_0N_0/2},\cr
||f_0^{[\le N_0]D}||\le&B_1\e_0\xc_0^{-\b_1}\d_0^{-\b_1},\qquad
||f_0^{[\le N_0]}-f_0^{[\le N_0]D}||\le B_1\e_0\xc_0^{-\b_1}\d_0^{-\b_1}\cr}
\Eq(5.28)$$
%
where the $||\cdot||$ is evaluated from \equ(5.22) on the domain
$W(\k_0e^{-\d_0},\r_0,\x_0e^{-\d_0},\m_0)$ and the inequalities express
simple dimensional estimates in the sense defined above: the constants
that arise have been adjusted so that only the two parameters
$B_1,\b_1>0$ are needed. Sharper bounds would require more constants;
but we are not interested in sharpness of the estimates (in this paper).

We assign, \ap, a sequence $\d_0>\d_1\ldots$ of positive numbers such that
$4\sum_{j=0}^\i \d_j<\log2$ and such that $\d_j$ does not approach zero too
fast (\eg $\d_j=(1+j^2)^{-1}2^{-4}\log2$): it will be a set of auxiliary
parameters that we shall use in our inductive construction.  Below we
introduce sequences of other parameters $B_1,B_2,B_3,\ldots$ and
$\b_1,\b_2,\b_3,\ldots$, depending only on the number of degrees of freedom
$l$
(and on the diophantine constant $\t$, see \equ(3.1)),
and we shall suppose the $B_j$'s and the $\b_j$'s increasing (there
will be, however, only finitely many such constants).

Let $N_0$ be such that \equ(5.28) implies $||f_0^{[> N_0]}||\le O(\e_0^2)$;
for instance, recalling that $\e_0 E^{-1}_0\r_0^{-1}<1/2$,
by the remark following \equ(5.25):
%
$$N_0=-2\x_0^{-1}\d_0^{-1}\log(\e_0 E_0^{-1}\r_0^{-1})
\quad\Rightarrow\quad||f_0^{[>N_0]}||\le
B_1\xc_0^{-\b_1}\d_0^{-\b_1}\e_0^2E_0^{-1}\r_0^{-1}
\Eq(5.29)$$

Calling $(\AA_0,\aa_0,p_0,q_0)$ the canonical coordinates in which we
describe our initial hamiltonian as in \equ(5.2), we consider the canonical
map defined via a generating function which, denoting the new variables
with a prime, is a function $\F(\AA',p',\aa_0,q_0,\m)$ given by:
%
$$\oo_0\cdot\Dpr_{\aa_0}\F+g_0\ [q_0\dpr_{q_0}\F-p'\dpr_{p'}\F]
=-f^{[\le N_0]}_0+\lis{f_0^{[\le N_0]}}^D\Eq(5.30)$$
%
where $g_0\equiv g_0(\AA',J,\m)\=\dpr_J h_0(\AA',J,\m)$,
$\oo_0\=\oo(\AA',J,\m)\=\dpr_{\AA'}h_0(\AA',J,\m)$ with $J\equiv p'q_0$
and the bar denotes average over the $\aa$-variables.

The function $\F$ can be written:
%
$$\F(\AA',\aa_0,p',q_0,\m)=\sum_{|h-k|+|\nn|>0\atop|\nn|\le N_0}
{f_{0\nn,hk}(\AA',\m)e^{i\nn\cdot\aa_0}{p'}^hq_0^k\over
-i\oo_0(\AA',p'q_0,\m)\cdot\nn-g_0(\AA',p'q_0,\m)(k-h)}\Eq(5.31)$$
%
The function $\F$ is defined in a domain:
%
$$W(\k_0e^{-\d_0},\tilde\r_0,\x_0e^{-\d_0},\m_0),\quad\tilde\r_0<\r_0
\Eq(5.32)$$
%
(hence smaller than the one where \equ(5.28) hold), where $\tilde \r_0$ is
so chosen to control the denominators in \equ(5.31). By dimensional
bounds one checks easily that if:
%
$$\tilde \r_0=\r_0 [4 l E_0C_0N_0^{\t+1}]^{-1},\qquad\l_0E_0\G_0<4^{-1}
\Eq(5.33)$$
%
(cfr. also \equ(5.18)) then, for $|A'_i - A^0_{i} (s,u,J,\m)| <
\tilde \r_0$, $0<|\nn| \leq N_0$ and $|J| < \k_0^2$ one has:
%
$$|-i\oo_0(\AA',J,\m)
\cdot\nn+g_0(\AA',J,\m)(h-k)|^{-1}\le 2C_0(|\nn|^\t+|h-k|)\Eq(5.34)$$
%
see appendix A5.

The last inequality can be combined with dimensional bounds to imply:
%
$$||\F||\leq B_2\xc_0^{-\b_2}\d_0^{-\b_2}\,\e_0C_0\Eq(5.35)$$
%
for suitably chosen $B_2,\b_2>0$, and in the domain \equ(5.32).

The canonical map associated with $\F$ is generated by the following
standard relations
(omitting the explicit $\m$--dependence):
%
$$\eqalign{\AA_0&=\AA'+\dpr_{\aa_0}\F(\AA',\aa_0,p',q_0),\quad
p_0=p'+\dpr_{q_0}\F(\AA',\aa_0,p',q_0),\cr
\aa'&=\aa_0 + \dpr_{\AA'}\F(\AA',\aa_0,p',q_0),\quad
q'=q_0+\dpr_{p'}\F(\AA',\aa_0,p',q_0)\cr}
\Eq(5.36)$$
%
which could be written in the more precise complex variables notation,
(see comment after \equ(5.26)),
by replacing $\aa_0$ by $\zz_0$ in the argument of $\F$, writing $i z_{0j}
\dpr_{z_{0j}}$ for $\dpr_{\a_{0j}}$ and replacing the third of \equ(5.36)
by:
%
$$z'_j=z_{0j} \exp [i \dpr_{A'_j} \F (\AA', \zz_0, p',q_0)]\Eq(5.37)$$
%

To obtain a map $\tilde \CC$ from \equ(5.36), one has to use the
implicit functions theorem: in so doing the domain of definition of
$\tilde \CC$ has to be taken somewhat smaller than the domain,
\equ(5.32), of
definition of $\F$.  If we want $\tilde \CC (\AA', \aa', p',q',\m)$ to
be defined on the domain:
%
$$\tilde W\=W(\k_0e^{-2 \d_0},\tilde\r_0 e^{-\d_0},\xc_0e^{-2\d_0},\m_0)
\Eq(5.38)$$
%
(\ie ``just giving up regularity" by an extra $\d_0$) we must impose a
condition implying that it is $\tilde\r_0^{-1}\xc_0^{-1}
\d_0^{-2}||\F||\ll 1$, \ie :
%
$$x\=B_3 \xc_0^{-\b_3}
\d_0^{-\b_3}(\e_0C_0 \r_0^{-1})(E_0C_0)N_0^{\t+1} < 1\Eq(5.39)$$
%
with $B_3,\b_3 $ conveniently large.

This follows from a trivial implicit function theorem.  After a moment of
thought one realizes that such a condition implies at the same time the
injectivity of the map \equ(5.36), the non vanishing of its jacobian and it
also imposes that the image of the boundary of the domain $W(\k_0
e^{-\d_0}, \tilde\r_0,\x_0 e^{-\d_0})$ where $||\F||$ is defined stays well
away from the boundary of $\tilde W$: in appendix A4
we have called such an argument an {\it
image of the boundary} lemma, (see, for instance, [G], \S 5.11).  Here
\equ(5.18), \ie $\r_0<\k_0^2$, has been used to eliminate $\k_0$ from the
condition.

The map $\tilde\CC$: $(\AA',\aa',p',q',\m)\in \tilde W
\to (\AA_0,\aa_0,p_0,q_0)$ will take the form:
%
$$\eqalign{
\AA_0=&\AA'+\V\X_0(\AA',\aa',p',q')\qquad\kern1.1truecm
p_0=p'+\L_0(\AA',\aa',p',q')\cr
\aa_0=&\aa'+\V\D_0(\AA',\aa',p',q')\qquad\qquad\kern0.2truecm
q_0=q'+\Th_0(\AA',\aa',p',q')\cr}\Eq(5.40)$$
%
and in the domain $\tilde W$ the bounds:
%
$$\eqalign{
||\V\X_0||<&x\tilde\r_0\d_0,\kern0.3truecm\qquad\quad
||\L_0||<x\k_0\d_0\cr
||\V\D_0||<&x\d_0\kern0.60truecm
\quad\qquad||\Th_0||<x\k_0\d_0\cr}\Eq(5.41)$$
%
are valid, with $x$ defined by \equ(5.39), and $\r_0<\k^2_0$ has been
again used.

The map $\tilde\CC$ will transform the Hamiltonian \equ(5.2) into:
%
$$h_1(\AA',p'q',\m) +f_1(\AA',\aa',p',q',\m)\Eq(5.42)$$
%
where:
%
$$\eqalign{
h_1&=h_0(\AA',p'q',\m)+\overline{f_0}^D (\AA',p'q',\m)\cr
\overline{f_0}^D(\AA', p'q',\m)&\=\sum_{k=0}^{\i} f_{0\V0,kk}(\AA',\m)
(p'q')^k\=\int f_0^D (\AA',\aa',p'q',\m){d\aa'\over (2 \pi )^{l-1}}\cr}
\Eq(5.43)$$
%
The functions $h_1, f_1$ are easily controlled (by ``just
giving up a bit $\d_0$ of regularity'' in each variable) in:
%
$$\lis W=W(\k_0 e^{-3\d_0}, \tilde\r_0
e^{-2 \d_0},\xc_0e^{-3\d_0},\m_0)\Eq(5.44)$$
%
by using dimensional estimates, from \equ(5.40),\equ(5.41) and
along a well known elementary scheme, see [G] \S 5.12; the result is:
%
$$\eqalign{
||\dpr h_1||_{\lis W}&\leq E_0(1+B_4\d_0^{-\b_4}\,(\e_0\r_0^{-1}E_0^{-1}))\cr
||f_1||_{\lis W}&\leq B_4\xc^{\b_4}\d_0^{-\b_4}\e_0(\e_0 E_0^{-1}\r_0^{-1})
(E_0C_0)^2 N_0^{\t+1}\cr}\Eq(5.45)$$

The next step is to study the equations for $\V a\in C^{l-1}$ given by:
%
$$\dpr_{\AA'} h_1 (\AA^0+\V a,J,\m)=\oo_{su},\Eq(5.46)$$
%
with $(s,u)\in\tilde\II_0$, $\AA^0=\AA^0(s,u,J,\m)$,
see \equ(5.20).

By Taylor expansion this can be written, setting
$M_0=\dpr_{\AA\AA}h_0(\AA^0,J,\m)$, as:
%
$$\V a+M_0^{-1}\V n(\V a)=\V0\Eq(5.47)$$
%
and $M_0^{-1}\V n(\V a)\=-\V m(\V a)$ can be bounded by:
%
$$|M_0^{-1}\V n(\V a)|
\le4l^2\h_0(\e_0\r_0^{-1}+E_0\r^2\r_0^{-2})\ ,
\qquad {\rm if} \ \ |\V a|<\r<\tilde\r_0/2
\Eq(5.48)$$
%

The \equ(5.47) can be studied by applying the implicit function
theorem. The usual argument about the ``image of the boundary'' implies the
existence of a unique solution to the equation $\V a=\V m(\V a)$ with $|\V
a|<\r$ if $b||\V m||_\r$ $\r^{-1}<1$ for a suitably large $b$: for instance
[G, proposition 19, p.490]  shows that $b=2^8$ is sufficient (but
$b=2$ would also be sufficient).

Therefore we take:
%
$$\r\={\rt_0(\e_0E_0\r_0^{-1})^{\ch}\d_0\over4l^2b(\h_0E_0\r_0^{-1})}
<{\rt_0\d_0(\e_0E_0\r_0^{-1})^\ch\over 4}\Eq(5.49)$$
%}
where $\ch\in(0,1)$ is a free parameter that we eventually fix close
to $0$ (\eg $1/4$) and $b$ is as above (\eg $b=2$).

We deduce that a sufficient condition for
the existence of a solution to \equ(5.47) with $|\V a|<\r$ is:
%
$$\eqalign{
1>&4l^2\ b\ \h_0(\e_0\r_0^{-1}+E_0\r^2\r_0^{-2})\r^{-1}\=
4l^2b(\h_0E_0\r_0^{-1})\Bigl(\e_0E_0^{-1}\r_0^{-1}
{\r_0\over\r}+{\r\over\r_0}\Bigr)\Leftarrow\cr
&\Leftarrow
4l^2b(\e_0E_0^{-1}\r_0^{-1})^{1-\ch}{4l^3b(\h_0E_0\r_0^{-1})^2\over\d_0}
{\r_0\over\rt_0}+{1\over2}<1\cr}\Eq(5.50)$$
%
and the latter condition can be imposed by requiring:
%
$$B_5\xc_0^{-\b_5}\d_0^{-\b_5}(\e_0E_0^{-1}\r_0^{-1})^{1-\ch}
(\h_0E_0\r_0^{-1})^2(E_0C_0)
\bigl[-\log(\e_0E_0^{-1}\r_0^{-1})\bigr]^{(\t+1)}<1\Eq(5.51)$$
%
for suitably large $B_5,\b_5$.

Setting $\AA^1(s,u,J,\m)=\AA^0(s,u,J,\m)+\V a$ we get
(see \equ(5.48)):
%
$$|\AA^1(s,u,J,\m)-\AA^0(s,u,J,\m)|<\r<{1\over4}\tilde\r_0\d_0
(\e_0E_0^{-1}\r_0^{-1})^\ch\Eq(5.52)$$
%
The free constant $\ch$ could in fact be taken zero, at the price of
having no $\e_0$ dependence in the r.h.s. of \equ(5.52): a property that
we do not want in later estimates.

Therefore \equ(5.52) insures also that $(\AA,\aa,p,q,\m)
\equiv(\AA^1,\aa,0,0,0)$ lies very well inside the domain, $\overline W$, of
definition of $h_1+f_1$, (\ie of $f_1$).

Choosing suitably $B_6$ and $\b_6$
one easily checks that the two conditions:
%
$$\eqalign{
&B_6\xc_0^{-\b_6}\d_0^{-\b_6}(\e_0E_0^{-1}\r_0^{-1})^{1-\ch}
(\h_0E_0\r_0^{-1})^2(C_0E_0)
[\log(E_0\r_0\e_0^{-1})]^{\t+1}<1\cr
&\l_0\G_0E_0<4^{-1}\cr}\Eq(5.53)$$
%
imply all the conditions imposed so far (\ie imposed in
\equ(5.51),\equ(5.33),\equ(5.39)).

With the above defined $(s,u)\to\AA^1(s,u,J,\m)$
we can define, via \equ(5.19) with $0\to1$, the set:
%
$$W_1\=W(\k_1,\r_1,\x_1,\m_1)\Eq(5.54)$$
%
where:
%
$$\eqalign{
\k_1\=&\k_0e^{-4\d_0},\quad \x_1\=\xc_0e^{-4\d_0},\quad \m_1\=\m_0,\cr
\r_1\=&\r_0\bigl(B_7\xc_0^{-\b_7}\d_0^{-\b_7}(E_0C_0)(\log E_0\r_0/\e_0)^{\t+1}
\bigr)^{-1}\cr
C_1\=&C_0,\quad\G_1\=\G_0\cr}\Eq(5.55)$$
%
Note that for $B_7,\b_7$ large enough it follows that
$\r_1<\tilde\r_0/2$ so that the domain $W_1$ is strictly contained in
the domain $\overline W$, see \equ(5.44), of definition of $h_1,f_1$
and the above definitions, via dimensional estimates, allows to control
{\it all the derivatives} of $h_1$ and $f_1$ in $W_1$.

The new parameters measuring the size of $h_1,f_1$
(cfr. \equ(5.23), \equ(5.16)) can be taken, by
\equ(5.45), to be any parameters $E_1,\e_1,\h_1,\l_1$ verifying the following
inequalities:
%
$$\eqalign{
&E_1\ge E_0(1+B_8
\d_0^{-\b_8} (\e_0\r_0^{-1}E_0^{-1}))\cr
&\e_1\r_1^{-1}E_1^{-1}\ge B_8\xc_0^{-\b_8}\d_0^{-\b_8}
(\e_0\r_0^{-1}E_0^{-1})^2(E_0C_0)^3(-\log \e_0E_0^{-1}\r_0^{-1})^{2(\t+1)}\cr
&\h_1\ge \h_0\bigl(1+B_8
\d_0^{-\b_8} (\h_0\r_0^{-1}E_0)(\e_0
E_0^{-1}\r_0^{-1})\bigr)\cr
&\l_1\ge \l_0+B_8(E_0\G_0)(\e_0\r_0^{-1}E_0^{-1})\cr}\Eq(5.56)$$
%
provided the conditions in \equ(5.53) hold.

Following the familiar pattern of KAM theory we are now going to
iterate the above scheme, \ie
{\it we shall label by indices $j=0,1,2...$ the
Hamiltonians $h_j+f_j$ together with their size parameters ($\e_j,
E_j,...$) obtained by sequentially applying the above scheme.}
%
This procedure makes sense {\it provided} the analogous of condition
\equ(5.53) are satisfied at each step of the construction.

We claim that one can find $B,\b$ depending only on $l$,
$\t$ and large enough so that:
%
$$B\xc_0^{-\b}(\e_0 \r_0^{-1}E_0^{-1})(E_0C_0)^6
(\h_0 E_0 \r_0^{-1})^3<1\Eq(5.57)$$
%
implies that the above scheme can be carried out an infinite number of
times.

To prove the claim we proceed by induction and to simplify the
discussion we introduce the following {\it dimensionless} parameters:
%
$$\bar\e_j=\e_j\r_j^{-1}E_j^{-1},\quad \bar E_j=E_jC_j,\quad
\bar\h_j=\h_jE_j\r_j^{-1}\Eq(5.58)$$
%
and a number $1^-$ which is any prefixed number less than $1$. Given
$1^-$ we fix the so far free $\ch$ so that $\ch>1-1^-\=0^+$ (one could
already say that $\ch$ is any prefixed number close to $0$ (\eg $1/4$)
and $1^-$ is a free parameter  to be eventually fixed slightly above
$1/2$: but we prefer to keep the parameters free as the inequalities
look probably more transparent in this way).

Furthermore we impose the following conditions which permit simple
bounds on the r.h.s.  of \equ(5.56) and \equ(5.53):
%
$$\eqalign{
&B_8\d_j^{-\b_8} \bar\e_j^{1^-}<1,\qquad
B_8\x_j^{-\b_8}\d_j^{-\b_8}
\bar\e_j^{1^-}(\log\bar\e_j^{-1})^{2(\t+1)})\bar E_j^3<1,\quad
B_8\d_j^{-\b_8}\bar\e_j^{1^-}\bar\h_j<1\cr
&B_6\x_j^{-\b_6}\d_j^{-\b_6}{\bar\e_j}^{1^-}\bar\h_j^2\bar
E_j(-\log\bar\e_j)^{\t+1}<1\cr}\Eq(5.59)$$
%
and in terms of this definition we fix the definition of the parameters
verifying the analogous of \equ(5.56) for general $j$ as follows:
%
$$\eqalign{
\bar E_{j+1}=&\bar E_j(1+\bar\e_j^{0^+}),\qquad\bar\e_{j+1}=\bar\e_j^{1^+},
\qquad\kern1.truecm\x_{j+1}=e^{-4\d_j}\x_j\cr
\h_{j+1}=&\h_j(1+\bar\e_j^{0^+}),\qquad
\l_{j+1}=\l_j+\bar\e_j^{0^+},\qquad \r_{j+1}={\r_j(\x_j\d_j)^{\b_7}\over
B_7\bar E_j\log\bar\e_j^{-1}}\cr
C_{j+1}=&C_j,\kern1.7truecm\qquad\G_{j+1}=\G_j\cr}\Eq(5.60)$$
%
where $1^+=2-1^-,\,0^+=1-1^-$ (and for $j=0$, $\x_{j=0}=$  $\hat \x_0$).

Hence if $\bar\e_0$ is small enough (depending on the value chosen for
$1^-$) we see that $\forall j$:
%
$$E_j\le\sqrt2E_0,\qquad\h_j\leq\sqrt2\h_0,\qquad\l_j<2\l_0,\qquad
\k_j\geq\k_0/2,\qquad\x_j\ge\xc_0/2 \Eq(5.61)$$
%
so that, if $\bar\e_0$ is small enough compared to $1$ (depending on the
choice of the number denoted $1^-$) and if $\l_0E_0\G_0$ is small enough
(\ie $<8^{-1}$), it will be $\l_jE_j\G_j<4^{-1}$ and:
%
$$\eqalign{
\r_{j+1}\ge&\r_j{\xc_0^{\b_7}\over2 B_7(E_0C_0)}{2^4(\log2)^{-1}\over
(1+j^2)^{\b_7}}
{1\over(1^+)^j\log\bar\e_0^{-1}}\ge\cr
\ge&\r_0\bigl(B'_7\xc_0^{-\b_7}(E_0C_0)\log\bar\e_0^{-1}\bigr)^{-j-1}
(1+j)^2!^{-\b_7}{(1^+)^{-j(j+1)/2}}\=\s_j^{-1}\cr\cr
\bar\h_j\le&4\bar\h_0\s_j\cr}\Eq(5.62)$$
%
for some $B'_7$.

Thus we see, by taking into account the rapidity of convergence to zero of
$\bar\e_j$ and if $\b_-,B_-$ are suitably large, that the conditions in
\equ(5.59) are equivalent to:
%
$$\eqalign{
&B_-\bar\e_0^{1^-}<1,\qquad
B_-\xc_0^{-\b_-}\bar\e_0^{1^{--}}\bar E_0^3<1\qquad\cr
&B_-\xc_0^{-\b_-}(\bar\e_0)^{(1+)^j\cdot1^{--}}\bar E_0^{j}\s_j^2
\bar\h_0^2\bar E_0<1,\qquad j\ge0\cr}\Eq(5.63)$$
%
if $1^{--}$ is defined to be slightly smaller (by any prefixed amount)
than the value fixed for $1^-$ appearing in
\equ(5.59).

Choosing $1^-,1^{--}$ slightly larger than $1/2$, and taking into account
the expression in \equ(5.62) for $\s_j$, it follows that all conditions are
implied by the following:
%
$$B_9\xc_0^{-\b_9}\bar\e_0\bar E_0^{6}\bar\h_0^{3}<1\Eq(5.64)$$
%
where $B_9,\b_9$ are constants depending only on $l$.

The above discussion contains some ``hidden'' assumptions on the initial
data, namely \equ(5.20),\equ(5.9), and $8\l_0E_0\G_0<1$. They are verified
automatically if the parameters $\rb_0,\kb_0,\mb_0$ are chosen as
prescribed by \equ(5.18),\equ(5.20).

Hence we can say that \equ(5.64) together with \equ(5.9),\equ(5.20)
and $4\l_0E_0\G_0<1$ are implied by:
%
$$B_{9}\xc_0^{-\b_9}\bar\e_0\bar
E_0^{6}\bar\h_0^{3}<1\Eq(5.65)$$
%
possibly readjusting $B_{9},\b_9$ (recall, as well, that $\G_0<C_0$).
As we shall see below this is the final
condition under which lemma 1' holds.

This completes our check of the claim in \equ(5.57).

Thus we can construct, for all $j \geq 0$, canonical transformations
$\tilde \CC_j$ ($\tilde \CC_0=\tilde \CC$) mapping
$W_{j+1}$ into $W_j$ ($W_j$ is defined in \equ(5.54) with $1\to j$;
recall that $\m_{j}\equiv\m_0$).
Such maps are close to the
identity within $\| \F_j \| \r_{j+1}^{-1}$ in the $\AA$--variables and
within $\| \F_j \|\k_{j+1}^{-1}\d_{j+1}^{-1}$ in the $p,q$--variables and
within $||\F_j||\x_{j+1}^{-1}\d_{j+1}^{-1}$ in the $\aa$--variables.

Their derivatives of order $k$ in $\AA$'s, $h$ in the $\aa$'s, $z$
in the $p,q$ are bounded by multiplying the above bounds
by $\r_{j+1}^{-k}\k_{j+1}^{-z}\d_{j+1}^{-h-z}$.
Since $\| \F_j \| \leq B_2
\hat \x_j^{-\b_2}\d_j^{-\b_2} \e_jC_0$, see \equ(5.35),
\equ(5.60), we realize that the map $\tilde \CC_j$ approaches the
identity very quickly.

Taking into account the \equ(5.52) it also follows that
the sheets $\bar\LL^j$ defined by $(s,u)\in \tilde\II_0
\to\AA^j(s,u,0,\m)$ approach a limit sheet:
%
$$\bar\LL^\i \ {\rm  defined\  by\ }
\quad (s,u)\in\tilde\II_0\to\AA^\i(s,u,0,\m)\ , \quad
\AA^\io(s,u,0,0)\=\AA_{su}\Eq(5.66)$$
%
and control is kept on any prefixed number of derivatives of
$\bar\LL^\i$: here we have used that, see \equ(5.52), $\ch>0$.

Furthermore the domains of holomorphy of the maps $\tilde \CC_j$, hence of
$\ct_0 \ct_1 ... \ct_j=\ct^{(j)}$ do not shrink to zero in the $\aa,p,q,\m$
variables.

If we call $\tilde \F_j (\AA',\aa,p',q,\m)$ the generating function of the
composite map $\ct^{(j)}$, the above remarks imply that
$\tilde \F_j$  can be extended
to a $C^n$ function defined in the vicinity of the sets
$W_j$: the extension, which we still denote $\tilde \F_j$,
can be made in class $C^n$ for any $n$ so
that $\tilde \F_j$ converges in the $C^n$-norm to a limit $\tilde \F_\i$
(simply because the variations of the $\tilde \F_j$'s are basically bounded
as the $\F_j$, \ie by $\tilde B_2\xc_0^{-\b_2}\bar\e_0^{(3/2)^j}\bar
E_0\r_j$ see \equ(5.35),
\equ(5.58), \equ(5.61),\equ(5.62), in their analyticity domain;
hence they have their derivatives very small and therefore can be extended
remaining small), see [La], [Sv], [CG] and [P\"o] for similar constructions.

The limit $\tilde \F_\i$ will be uniquely
defined on $\bar\LL^\i \times {T}^{l-1} \times
\{ |p'| < \k_\i,\ |q|<\k_\i\}$, with (cfr. 2) of lemma 1):
%
$$\k_\io={\k_0\over 2}\= \bar \k \Eq(5.67)$$
%
and it will be real-analytic in the $\aa_0,p',q_0$ variables,
$C^n$--smooth in $\AA',\aa_0,p',q,\m$
prefixed number $n$ of derivatives if $\e_0$ is small enough.

Therefore $\tilde \F_\i$ generates a canonical map,
$\tilde \CC_\io$, which for
$\AA' \in\bar\LL^\i$ takes the form (cfr. \equ(5.5)):
%
$$\eqalign{\AA_0=&\AA'+\V\X'(\AA',\V\ps,p,q,\m)
\quad\kern2.2truecm\aa_0=\V\ps+\V\D'(\AA',\V\ps,p,q,\m)\cr
p_0=&p+\L'(\AA',\V\ps,p,q,\m)\kern2.5truecm\quad q_0=
q+\Th'(\AA',\V\ps,p,q,\m)\cr}\Eq(5.68)$$
%
and for  $\AA'=\AA^\io(s,u,pq,\m)$
the solutions of the motion equations take the form
\equ(5.11)
with $\oo_{su},g_{su}$ defined in \equ(5.7), \equ(5.9) and with
$\g'\=u_\i'(s,u,J,\m)$ defined by (cfr. \equ(5.16)):
%
$$\dpr_J h_\i(\AA^\i(s,u,J,\m),
J,\m)=(1+u_\i')g_{su}
\=(1+\g') g_{su}\Eq(5.69)$$
%
where $h_\i$ $\=\lim_{j\to \io} h_j$. Note that if we denote
by $H_\io(\AA',\V
\ps,p,q,\m)$ the original hamiltonian \equ(5.2) computed
in the new variables defined by $\tilde \F_\io$, it is:
%
$$h_\io(\AA^\io(s,u,pq,\m),pq,\m)=H_\io(\AA^\io(s,u,pq,\m),\V \ps,pq,\m)
\Eq(5.70)$$
%
Furthermore $u'_\io(s,u,pq,\m)$ is analytic in
$p,q,\m$, if $(s,u)$ are fixed in $\tilde\II_0$.
The parametric equations of the whiskers \equ(5.10) are now immediately
obtained in terms of \equ(5.68) and of the transformation \equ(5.3) of
lemma 0. Setting:
%
$$\eqalign{
& z\=(\AA^\io(s,u,pq,\m),\V \ps ,p,q,\m)\quad
\hat z \= (\AA^\io(s,u,pq,\m),p,q,\m) \cr
& \hat \z \= \hat z + \Big( \V \X'(z), \L'(z), \Th'(z),0\Big)\cr}
\Eq(5.71)$$
%
we find (cfr. \equ(5.10)):
%
$$\eqalign{
& \AA_{su}(J,\m)\= \AA^\io(s,u,J,\m)\ , \qquad (\Rightarrow
\ \AA_{su}(0,0)\=\AA_{su} )\cr
& \V \X(\V \ps,p,q,s,u,\m)\= \V \X'(z)\ , \quad
\V \D(\V \ps,p,q,s,u,\m)\= \V \D'(z) + \V \d(\hat \z) - \V \d(\hat z)\cr
& \L(\V \ps,p,q,s,u,\m)\= R(\hat \z)-R(\hat z)\ ,\quad
\Th(\V \ps,p,q,s,u,\m)\= S(\hat \z)-S(\hat z)\cr}
\Eq(5.72)$$
%
The linearity of the flow on the surfaces \equ(5.10)
follows because $f_j$ tends to zero very fast with all its derivatives,
including the $\AA$ derivatives in spite of the fact that the
$\AA$-domain shrinks: in fact the derivatives are bounded, for real
$\AA,\aa,p,q$, by $\e_j$ times some inverse power of $\r_j$ and $\e_j
\r_j^{-k} \rightarrow 0$ for all $k \geq 0$, by the inequality
\equ(5.62).

Note that the $H_\i$, by our construction, has derivatives with respect
to $\V\ps$ vanishing if $\AA'=\AA^\i(s,u,pq,\m)$,
$(s,u)\in\tilde\II_0$, see \equ(5.70);
points it depends non trivially on $\V\ps$.  Hence for $p=q=0,
\AA'=\AA^\io(s,u,0,\m)$ and $(s,u)\in\tilde\II_0$ the \equ(5.68)
describe invariant tori $\TT_\m(s,u)$, and their whiskers are obtained
by considering $p=0$, $q\ne 0$ or $q=0$ and $p\ne 0$.

We express \equ(5.65) in terms of the more fundamental parameters
$\kb_0,\rb_0,\xb_0$ of lemma 0; see \equ(5.18), \equ(5.25), \equ(5.58).
If we assume for simplicity that for a suitable constant $\bar B_0\ge 1$
one has:
%
$$\bar B_0 \rb_0 \le \kb_0^2\Eq(5.73)$$
%
so that (see \equ(5.18)):
%
$$\r_0={\rb_0\over\bar B (E_0\h_0\rb_0^{-1})(E_0\G_0)^2}\Eq(5.74)$$
%
then \equ(5.65) becomes:
%
$$B_{10}(\e_0E_0^{-1}\rb_0^{-1})(E_0C_0)^6(E_0\G_0)^8
(E_0\h_0\rb_0^{-1})^7\xc^{-\b_{10}}<1\Eq(5.75)$$
%
Finally, we see that, in the case of interest to us,
{\it $\e_0$ is of the form
$\m\widetilde \e_0$ for some $\widetilde \e_0$,}
so that the condition of
$\m$ small takes the form (see \equ(5.75)):
%
$$|\m|< \m_0\=
\big[ B_{11}\xc_0^{-\b_{11}}(\widetilde\e_0\rb_0^{-1} E_0^{-1})
(E_0C_0)^{6}(E_0\G_0)^8(\h_0E_0\rb_0^{-1})^{7}\big]^{-1}\Eq(5.76)$$
%
provided $2\G_0<C_0$ and for suitable constants $B_{11},\b_{11}$.

This is still not completely explicit as the values of
$\rb_0,\kb_0,\xb_0$ are not the analyticity parameters of the original
hamiltonian.  In fact they can be deduced from the latter via the
application of lemma 0.

Lemma 0 allows us to take (see appendix A3, \equ(A3.39)
and \equ(5.73)):
%
$$\eqalign{& \kb_0\ ={1\over2}
{\k \over B m^7}  \min \Big\{ {\r'\over \k^2}\ ,\
\x'\ ,\ {1\over (\EE \G \k^{-2}) \s_2^2\s_3}\ ,\ {\r\x\over \r'\hat \s}
\Big\} \cr& \rb_0 \= \min\{ {\r\over 2}\ , \ {\kb_0^2\over \bar B_0} \}\ ,
\qquad \xb_0\= {\x\over 2}\cr}
\Eq(5.77)$$
%
if $\r',\x',\r,\x$ are the original hamiltonian regularity parameters
(see \S 2), and $m,\G,{\cal E},B,\k,$ $\s_2,$ $\s_3,\hat \s$
are introduced in appendix A3,
see \equ(A3.3),\equ(A3.47), \equ(A3.49) and \equ(A3.53),\equ(A3.39).

We can also deduce, from the analyticity in $\m$, a simple bound on the
size of the variation $|\AA^\io(s,u,J,\m)-\AA^0(s,u,J,\m)|$ and of the
variation of the {\it whisker graphs}, \ie of the functions in the r.h.s.
of \equ(5.68) and, by dimensional estimates, consequent bounds on their
derivatives.  We see from the above analysis that the bounds
\equ(5.41),\equ(5.52) must hold, with different constants replacing
$\b_3,B_3$ for the corresponding functions in \equ(5.68).  Hence for
suitable constants $G_A,G_\CC$:
%
$$\eqalign{
&\bar \r_0^{-1}(||\AA^\i(s,u,\cdot)-\AA^0(s,u,\cdot)||)\le
G_A|\m|\m_0^{-1}\cr
&\bar \r_0^{-1}||\V\X'||
+\bar \x_0^{-1}||\V\D'||+{\r'}^{-1}||\L'||+{\x'}^{-1}||\Th'||<G_\CC
|\m|\m_0^{-1}\cr}\Eq(5.78)$$
%
where the norms are evaluated by fixing $(s,u)\in\bar\LL$;
here we have just bounded the
value at $z$ of a function holomorphic in a disk of
radius $z_0$ and vanishing at the center $z=0$ by its supremum times
$|z|/z_0$: we take $z=\m$ and use the holomorphy in $\m$.

And, using \equ(5.52) and \equ(5.35),\equ(5.41)
the constants $G_A,G_\CC$ can be easily expressed in terms of our
dimensionless constants:
%
$$\eqalign{
G_A=&B_{12}\bigl[
(\widetilde\e_0E_0^{-1}\rb_0^{-1})(\h_0E_0\rb_0^{-1})\bigr]^\ch\cr
G_\CC=&B_{12}(E_0\G_0)\xc_0^{-\b_{12}}\left[
(\widetilde\e_0E_0^{-1}\rb_0^{-1})
(E_0C_0)(\h_0E_0\rb_0^{-1})\right]^>\cr}\Eq(5.79)$$
%
for suitably chosen constants $B_{12},\b_{12}$ and having denoted $[x]^>$
the function $x\log x^{-1}$ for $x>1$.

The function $\g'$ in \equ(5.11) is the value $u'_\io$ in \equ(5.69):
it is analytic in $|J|<\k_0^2/2$ (see \equ(5.61)) and $|\m|<|\m_0|$
and it is bounded there, for all $s,u\in \tilde \II_0$
(see \equ(5.20), \equ(5.17), \equ(5.61)), by:
%
$$
|\g'(J,s,u,\m)|\le 2 \hat B (\G_0 E_0) (E_0\h_0 \bar \r_0^{-1})
\Big( {|J|\over \k_0^2} + {|\m|\over \m_0} \Big)
\Eq(5.80)$$
%
To check \equ(5.4) we simply use that the above proof has a free parameter
$C_0$.  The set $\Si(C_0)$, see \equ(3.1), has measure at least
$(s_2-s_1)[1- (\bar K/(DC_0)^{1/t})]$ by the assumption that $\LL$ is a
diffusion path, see \equ(3.2).  Therefore we choose, taking into account
that the constant $C_0$ appears to the power $6$ in the basic condition
\equ(5.75):
%
$$C_0\={\G_0|\m|^{-1/7}}\qquad \Rightarrow\ \Si_\m=\Si(C_0)\Eq(5.81)$$
%
Then we see that the constants
$k,K,\bar c$ and $c$ of lemma 1' can be taken:
%
$$ k=\G_0, \quad K={\bar K(E_0\G_0)^{-1/t}}
\ ,\quad \bar c\= 7\ ,\quad c\= 7\ t \Eq(5.82)$$
%
(where we have replaced $D$ by $E_0$: see 2) \S 3)
and, what is more important, the smallness condition on $\m$ can still
be met.

{\it This finishes the proof of lemma 1'}. Note that the smallness
condition on $|\m|$
(\ie \equ(5.75) with $C_0\=\G_0 |\m|^{-1/7}$) does not involve
$\th_0$ (defined in \equ(5.23)): such a quantity will appear in fixing
the energy in order to get lemma 1 as a  corollary of lemma 1'.

We now let $p,q$ be such that $|p|,|q|<\k_0/2$, $J\=pq$,
$\V \ps = \V 0$, $\AA^\io\=\AA^\io(s,u,J,\m)$ and, fixing $s\in \II_0$,
we try to find $u\=u(s,J,\m)$ so that the real part of the energy
$E(s,u,J,\m)$ associated to the initial data $(\AA^\io,\V 0,p,q,\m)$
coincide with the {\it prefixed value} $E\=h_0(\AA_s,0,0)$ (see 1) of \S
3). In view of the above construction, the energy $E(s,u,J,\m)$ is given by
(compare with \equ(5.69), \equ(5.70)):
%
$$
E(s,u,J,\m)= h_\io(\AA^\io(s,u,J,\m),J,\m)
\Eq(5.83)$$
%
and by Taylor expansion at $\m=0$, $u=0$ (see \equ(5.7),\equ(5.8),
\equ(5.66)):
%
$$\eqalign{
{\rm Re}\,E(s,u,J,\m)&=E+u
\Big(\dpr_\AA h_0(\AA_{s},0,0)\cdot[\dpr_u\AA_{su}]_{\m=0}\Big)
+\b \tilde G  \Big( {u^2\over \bar u^2}+{|\m|\over \m_0}+
{|J|\over \k_0^2}\Big)=\cr
&=E+u\,[\oo_{s}\cdot(\dpr^2_{\AA}h_0)^{-1}\oo_{s}]
+\b \tilde G  \Big( {u^2\over \bar u^2}+{|\m|\over \m_0}+
{|J|\over \k_0^2}\Big)\cr}
\Eq(5.84)$$
%
where the derivative $\dpr_u\AA_{su}$ is computed by differentiating
\equ(5.8) and $\dpr^2_\AA h_0\=\dpr^2_\AA h_0(\AA_s,0,0)$; $\b$ is
some $C^\i$ function (at $s$ fixed) with $|\b|\le1$ and the constant
$\tilde G$ can be taken to be proportional, via a constant depending
only on $l$, to $E_0 (E_0\h_0 \rb_0^{-1})^2$ (recall that the constant
$\bar u$, see \equ(5.9), can be taken to be a numerical constant
times $(\h_0E_0\bar \r_0^{-1})^{-2}$, see \equ(5.18)).

The first two derivatives of $\b$ with respect to $u,\m,J$
can be bounded by our dimensionless constants. Hence, recalling the
definition in \equ(5.23) of $\th_0$, we see, by the implicit function
theorem, that under the further condition:
%
$$B_{13}(|\m|\m_0^{-1})(\th_0 E_0\bar \r_0)(\h_0E_0\bar \r_0^{-1})^2
\equiv B_{13}(|\m|\m_0^{-1})(\th_0 E_0^2\h_0)(E_0\h_0\bar
\r_0^{-1})<1\Eq(5.85)$$
%
we can find $u=u(s,J,\m)$ as desired \ie so that (see \equ(5.83)):
%
$$
E(s,u(s,J,\m),J,\m)\= E\Eq(5.86)$$
%
Therefore condition \equ(5.85) together with \equ(5.75) with
$C_0\=\G_0 |\m|^{-1/7}$ are sufficient to yield lemma 1' {\it and} lemma 1.

The functions $\g',\V \X, \V \D, \L, \Th$ of lemma 1 are obviously related
to the corresponding (but different) functions of lemma 1': just set
$u\=u(s,J,\m)$ in \equ(5.72), \equ(5.71) and in the definition of $\g'$
(cfr. \equ(5.69)); \eg:
%
$$
\g'(J,s,\m)\=\g'(J,s,u(J,s,\m),\m)\ ,\quad
\V \X(\V \ps, p,q,s,\m)\=
\V \X(\V \ps, p,q,s,u(J,s,\m),\m)\ , \etc
\Eq(5.87)$$
%
and the function $\g(J,s,\m)$ is just $\g\=u(J,s,\m)$.
The functions $\g,\g'$ are easily seen to satisfy the bound (cfr.
\equ(5.69),\equ(5.17),\equ(5.18)):
%
$$
\sup_{ {s\in \II_0, \m\in[-\m_0.\m_0]}\atop {|J|<\k_0^2/2}}
|\g|, |\g'| \le \bar u \= \bar B^2 (E_0 \h_0 \bar \r_0^{-1})^2
\Eq(5.88)$$
%
Bounds on $|\V \X|, |\V \D|, |\L|, |\Th|$ are easily obtained by recalling
their definitions, \equ(5.71), \equ(5.72) (for the functions of lemma
1') and \equ(5.87) (for the functions of lemma 1),
the bounds \equ(5.78), \equ(5.79), and the bounds on $|R|,|S|,|\V \d|$
(see \equ(A3.54)):
%
$$
\bar \r_0^{-1}||\V\X||
+\bar \x_0^{-1}||\V\D||+{\r'}^{-1}||\L||+{\x'}^{-1}||\Th||<G_\CC
|\m|\m_0^{-1}
\Eq(5.89)$$
%
where $G_\CC$ ia as in \equ(5.79) (actually increased by a factor 2)
and the norms are taken at $s\in \II_0$ fixed for the functions of lemma 1,
or at $(s,u)\in \bar \LL$ fixed for the functions of lemma 1'.

Finally, we remark that all the requirements
(\equ(5.75),\equ(5.76),\equ(5.81),\equ(5.85)) we needed to prove lemma
1',1 can be enforced by requiring the single condition:
%
$$|\m|<\m^*\=
\left[\bigl[B \xc_0^{-\b}(\h_0 E_0 \rb_0^{-1})^{7}
(\G_0E_0)^{14}(\widetilde\e_0\rb_0^{-1}E_0^{-1})\bigr]^7(\th_0E_0^2\h_0)
\right]^{-1}\Eq(5.90)$$
%
where $B,\b>0$ are suitable constants depending only on $l$ and $\t$
(see \equ(5.23), \equ(5.25), lemma 0 and \equ(5.76) to refresh the memory
about the various parameters involved).

{\it This completes the proof of lemma 1' and lemma 1}.
In fact we have proved:
\vskip0.5truecm
\noindent{\it {\bf Lemma 2}: There exists a canonical map
$\CC(p',q',\AA',\aa')=
(I,\AA,\f,\aa)$ of class $C^n$ and a line
$\LL_\m:\,s\to\AA_\m(s)$, contained in the energy surface of energy $E$ for
the perturbed hamiltonian \equ(2.9), of class $C^n$ with the properties:
\item{1) }$\CC$ is $C^n$-close to the identity as $\m \rightarrow 0$,
$\LL_\m$ is $C^n$-close to $\LL$ as $\m \rightarrow 0$ and the domain of
$\CC$ is a set of the form $V \times { T}^{l-1} \times { S}^2$ where $V$ is
a neighborhood of $\LL$ containing $\LL_\m$ and ${ S}^2$ is a neighborhood
of the origin in $R^2$.
\item{2) } for $s$ in a set of measure $\ge (1-K|\m|^{1/c}), K,c>0$,
the set $\CC (\AA_\m(s) \times { T}^{l-1} \times { S}^2)$ is
invariant for the flow generated by the perturbed hamiltonian \equ(2.9);
\item{3) } the derivatives in $\AA'$ of the hamiltonian \equ(2.9)
regarded as a function of the new coordinates $(p',q',\AA',\aa')$
%
as well as those in $p',q'$ at constant $p'q'$ vanish on the above set, so that
the flow is linear in the $\V\psi
\in { T}^{l-1}$ variables and hyperbolic in the $p,q$ variables.
\item{4) } explicit bounds on the parametric equations of the invariant
tori, on their whiskers and on the main dimensionless parameters involved
in the construction are provided by the bounds found in the course of the
above proof.}
\vskip0.5truecm

This lemma is a quick if a little mysterious, way of summarizing the
analysis of this section.

Another important {\it corollary of the above lemmata is
that they can be shown to cover the case of a forced system}:
%
$$H\=\o B+H_0+\m f\=
\o B+ h(\tilde A,\m)+P(I,\tilde A,\f,\m)+\m
f(I,\tilde A,\f,\tilde\a,\f,\l,\m) \Eq(5.91)$$
%
where $B,\l$ are a pair of conjugate action angle variables and
$(\tilde A,\tilde a)$ $\in$ $R^{l-2}\times T^{l-2}$
are other action angle coordinates which will be supposed
anisochronous, \ie such that $||(\dpr^2_{\tilde A} h)^{-1}||=\h_0<+\io$;
to compare with the previous notations one should set
$\AA\=(B,\tilde A)$ and $\aa\=(\l,\tilde \a)$.

In this case the notion of diffusion path has to be suitably adapted. We
consider a curve in $\tilde A$ space, $\LL=\{s\to \tilde A_s\}$
and define $\Si(C)$ exactly as in \equ(3.1) with $\AA$ replaced by
$\tilde A$ and we say that $\LL$  is a diffusion path if
\equ(3.2) holds. In other words in forced systems {\it the action of the
``forcing reservoir" $ B$ does not enter into the definition}.

The following technique, invented by Poincar\'e, applies remarkably well to
this case, see [P] p. 118, tome I, ch. III. Note that, if
$H_0(I,\tilde A,\f)\=h(\tilde A,\m)+P(I,\tilde A,\f,\m)$
and $H$ is as in \equ(5.91), the hamiltonian:
%
$$\eqalign{
& H_2(\AA,\aa,\m)
\={H^2\over2 E}\=h_2+f_2\ ,\qquad \AA\=(B,\tilde A)\ ,\ \aa\=(\l,\tilde  \a)\cr
& h_2(I,B,\tilde A,\f)\={1\over2E}\bigl(
\o^2B^2+H_0^2+2 \o B H_0\bigr)\cr}\Eq(5.92)$$
%
where $E\ne0$ is fixed arbitrarily, is such that $h_2$ has the property
that  [see 1) of appendix A9]:
%
$$\det(\dpr_\AA^2h_2)\=
\det(\dpr_{B\tilde A}^2 h_2)={\o^2\over E} \Big({h_2\over E}\Big)^{l-2}
\det(\dpr^2_{\tilde A} h)\Eq(5.93)$$
%
where in \equ(5.93) we evaluate the derivatives at $I=0$, $\f=0$ (hence
$P=0$).  Thus $h_2$ is non degenerate and, furthermore, at $I=0$, $\f=0$:
%
$$\dpr_\AA h_2\=
(\dpr_B h_2,\dpr_{\tilde A} h_2)\=(\o,\tilde \o) \qquad{\rm if}\qquad
h_2|_{(I,\f)=(0,0)}\=\o B+h=E
\Eq(5.94)$$
%
and we see that the line $\LL_2$ obtained form $\LL$ by adding to each of its
points a coordinate $B_s$ computed from the equation
$\o B+h(\tilde A_s,0)=E$ is a
diffusion path for $h_2$ in the sense of \S2.

It is immediate to check that if $z(t)\=(\AA(t),\aa(t))$ is a motion for
\equ(5.92), then $\bar z(t)$ $\=$ $z({t/\s})$, with
$\s\=(H_2(\AA(0),\aa(0),\m)/E)$, is a motion for \equ(5.91).
We can thus construct, by using the above lemmata, whiskered tori
for $h_2+f_2$ (and hence for $H$).

For a proper usage of the bounds involved in the above lemmata,
one has to estimate the
basic dimensional quantities. Fixing the arbitrary parameter $E\=4 \bar
\r_0$ $\max\{||\dpr_{\tilde A} h||, |\o|\}$ we see that:
%
$$||{\o B + h(\tilde A)\over E}-1||\=
\sup_{{|B-B_s|\le \rb_0}\atop ||\tilde A-\tilde A_s||\le \rb_0}
|{\o B + h(\tilde A)\over E}-1|\le {1\over 2}\ ,\quad B_s\={E-h(\tilde A_s)
\over \o}\Eq(5.95)$$
%
and this allows to bound the norms,
$||\dpr h_2||$, $||(\dpr_J h_2)^{-1}||$, $||f_2||$,
associated to $H_2$ in \equ(5.92)
{\it in terms of constant times the corresponding quantities
for \equ(5.91)}, while we can take $E_0\=\tilde \b (||\dpr_{\tilde A}
h||+|\o|)$ and
$\th_0\=\tilde \b(E_0\rb_0)^{-1}$ [see also 1) of appendix A9]
for a suitable constant $\tilde \b$;
of course the norms referring to \equ(5.92) are taken over the action
domain $|B-B_s|\le \rb_0$, $||\tilde A-\tilde A_s||\le \rb_0$ (see
\equ(5.95)).
Thus we see that the statements of lemma 1' and lemma 1
just carry over to the present case under a condition like \equ(5.90)
with:
%
$$\m^*=\left[\bigl[B \xc_0^{-\b}(\h_0 E_0 \r_0^{-1})^{7}
(\G_0E_0)^{14}(\widetilde\e_0\r_0^{-1}E_0^{-1})\bigr]^7(\h_0E_0\r^{-1}_0)
\right]^{-1}\Eq(5.96)$$
%
and with the same quantitative bounds established in the proofs, provided
we interpret the notion of diffusion path in the way described above.

Finally, we remark that the whiskered tori that we obtain for \equ(5.91)
via lemma 1' applied to \equ(5.92) and via the rescaling described after
\equ(5.94), have, {\it for all} $s\in \Si_\m$ {\it and} $u\in [-\bar u,\bar u]$,
the $\l$--frequency equal to $\o$ (as it should  as the clock velocity
$\o$ cannot change, just because it is a clock).
%
\vglue2.truecm

\penalty-200

{\bf\S6 Large whiskers. Homoclinic points and angles}

\penalty10000

\vskip0.5truecm\numsec=6\numfor=1

\penalty10000

In \S5 we have constructed invariant tori surviving the onset of a
perturbation as well as the parts of their whiskers in their immediate
vicinity. We now derive the equations of the whiskers away from the
invariant tori with the purpose of finding whether they contain homoclinic
intersections.

The whiskers can be continued to form a full invariant manifold by evolving
them with the solution map $(I,\AA,\f,\aa)\to S_t^\m(I,\AA,\f,\aa)$
associated with the perturbed Hamilton equations generated by   \equ(2.9).
We regard the map
$S^\m_t$ as defined in the original coordinates, which are {\it globally}
describing our system and we shall call {\it local} the part of the
whiskers constructed so far, via lemma 1', denoting it by $W^{loc}(s,u)$.

The full stable whisker will be:
%
$$W_{stable}(s,u)=\bigcup_{t\le0}S_t^\m\left\{(I,\AA,\f,\aa)\in
W^{loc}_{stable}(s,u)\right\}\Eq(6.1)$$
%
for values of $u$ small and $s\in\II_0\equiv\Si(C_0)$, see lemma 1' \S5
and \equ(5.20).  Lemmata 0,1' imply that this set can be described, for
$|\f|<\tilde\f$, and $|\m|$ small enough, by parametric equations:
%
$$I= I(\f,\aa,\m)\ ,\qquad
\AA(\f,\aa,\m),\qquad|\f|<\tilde\f,\,\aa\in T^{l-1}\Eq(6.2)$$
%
where $\tilde\f$ is \ap fixed
in the following discussion (to be $<2\p$ in the case
of an {\it open} (see \S4) separatrix while for {\it closed} separatrices,
$|\f|<\tilde\f$ should be replaced by $\f_{\min}+\d<\f<\f_{max}-\d$ for some
$\d>0$).  To fix ideas and simplify notations we shall discuss here mainly
the {\it stable case}, the unstable one being completely analogous (see
also below); however, when needed, we shall attach to the above functions
\equ(6.2) superscripts to distinguish among the two different cases (such
superscripts should not be confused with the parameters $s,u$ in \equ(6.1)
and elsewhere).

Fixing $s\in\II_0$ and $u$ small, the functions
$\AA(\f,\aa,\m),I(\f,\aa,\m)$ have to be such that for any
$|\f'|<\tilde\f,\aa'\in T^{l-1}$ there are $\f,\aa$ such that:
%
$$S^\m_t(I(\f',\aa',\m),\f',\AA(\f',\aa',\m),\aa')=
(I(\f,\aa,\m),\f,\AA(\f,\aa,\m),\aa)\Eq(6.3)$$
%
and we know, from lemma 1', that for $|\f|,|\m|$ small enough
$I(\f,\aa,\m),\AA(\f,\aa,\m)$ are analytic functions in the perturbation
parameter $\m$.

We shall fix $(\f,\aa)$ and try to determine the functions
$I(\f,\aa),\AA(\f,\aa)$.

We begin by noting that $S^\m_t$ is close to $S^\m_t|_{\m=0}\equiv S^0_t$
and depends analytically on $t,\aa,\f,\AA,I,\m$.  Also $S^0_t$ expands any
$\f\ne0$ to a value larger (in absolute value) than $\tilde\f$ in a finite
(positive or negative) time.

Hence it is clear that, fixed $\tilde\f$ and given $W_{stable}^{loc}(s,u)$,
we can use (6.3) with $|\f|<\d,\,\aa\in T^{l-1}$ and $|t|<t_\d$ to define
$I(\f,\aa),\AA(\f,\aa)$ for $|\f|<\tilde\f,\,\aa\in T^{l-1}$ and $\d$ can
be taken to be any prefixed small positive number and $t_\d$ a suitably
long (but finite) time.

And the remarked analyticity of $S^\m_t$ together with the analyticity of
$W^{loc}_{stable}(s,u)$, see lemma 1,1',2 of \S5, imply the analyticity of
$I(.),\AA(.)$ in their arguments, at fixed $s,u$.

>From now on, in order to avoid confusion with upper {\it indices} $\s=s,u$,
indicating stable/unstable, we drop the dependence upon the {\it
parameters} $s,u$, which in this section will be kept fixed.

Given a hamiltonian $H=H_0+\m f$ as in \equ(2.9) we write the equations of
motion for the vector $(I,\AA,\f,\aa)=X$ as:
%
$$\dot{X}= G_0(X)+\m  G(X)\Eq(6.4)$$
%
and we remark that if $\f,\aa\to (I^s(\f,\aa),\AA^s(\f,\aa),\f,\aa)$, for
$|\f|<\tilde\f$
and for $\aa\in T^{l-1}$, are the equations of the stable whisker, then:
%
$$
I^s(\f,\aa)=I^0(\f)+\sum_{k=1}^\i \m^kv^{ks}(\f,\aa)\ , \qquad
\AA^s(\f,\aa)=\AA^0+\sum_{k=1}^\i\m^k{\V h}^{ks}(\f,\aa)\Eq(6.5)$$
%
Since in this section we shall mainly discuss the stable whiskers,
we shall also drop the suffix $s$ (for stable) when this does not lead to
confusion.

It will be useful to consider also the slightly more general case in which
the variable $\aa$ is a function of $\m$:
%
$$\aa \= \aa_\m \= \sum_{k\ge 0} \aa^k \m^k\Eq(6.6)$$
%
while the variable $\f$ will be fixed once for all to be the value
$\bar \f$ corresponding to the point where $|I|$ is maximal for the
unperturbed hamiltonian (cfr. \S 4; $\bar \f=\p$ for the pendulum
\equ(2.1)).

Furthermore if $X^s(t)$ is the solution of the Hamilton equations with
initial data $X^s(0)=(I^s (\f,\aa),\AA^s (\f,\aa),\f,\aa)$ then, for large
enough $t$, $X^s (t)$ is inside the vicinity of the unperturbed torus
$\AA=\AA^0$, $I=\f=0$ where we can use the coordinates, described in \S5,
$(p,q,\V\ps)$.

Actually, by using the analyticity of the flow $S_t^\m$ and the analyticity
properties in $p,q,\V\ps$ discussed in \S 5, one can analytically continue
the functions $R(\AA',p,q,\m),\L(\V\ps,p,q,\m),...$ in \equ(5.10)
to a domain around the real $p,q$ such that $|pq|<\bar\k^2$ and
around the real $\V\psi$, so large to cover a vicinity of the points
$p=\bar p,q=\bar q, \V\psi\in T^{l-1}$ corresponding to $\bar I,\bar
\f,\aa\in T^{l-1}$, where $\bar I,\bar \f$ is the separatrix point chosen
as the origin. Therefore, after the analytic continuation, we can write
for $\Re t\ge0,|\Im t|\le\hat\x$ and $\hat\x$ small enough
(cfr. \equ(5.10), \equ(5.11) and recall that we are dropping the parameters
$s,u$ from the notation):
%
$$\eqalignno{
I^s (t)=&R(\AA',pe^{-(1+\g')gt},0,\m)+\L(\V\ps+\oo t,
pe^{-(1+\g')gt},0,\m)\cr
\AA^s (t)=&\AA+\V\X(\V\ps+\oo t,pe^{-(1+\g')gt},0,\m)&\eq(6.7)\cr
\f^s (t)=&S(\AA',pe^{-(1+\g')gt},0,\m)+\Th(\V\ps+\oo t,
pe^{-(1+\g')gt},0,\m)\cr
\aa^s (t)=&\V\ps+\oo t+\V\D(\V\ps+\oo t,pe^{-(1+\g')g t},0,\m)
+\V\d(\AA ,pe^{-(1+\g')g t},0,\m)\cr}$$
%
and the expressions of $\aa,\f$ in terms of $\V\ps,p$ are deduced from the
above relations with $t=0$. The constant $\hat \x$ is, of course, small and
cannot exceed the width of the holomorphy domain of $X^0(t)$.  The same
holds for the unstable whisker with the obvious changes (\ie exchanging the
roles of of $p$ and $q$ and considering $\Re t\le 0$).

If:
%
$$X^s(t,\aa)=X^0(t,\aa)+\sum_{k=1}^\i\m^k X^{ks}(t,\aa)\=X^0(t,\aa)+
\bar X^s(t,\aa)\Eq(6.8)$$
%
denotes the evolution of the initial point in \equ(6.5),\equ(6.6), on the
stable whisker, it follows from \S5 that $X^{s}(t,\aa)$ has the form
$X^{s}(\oo t,t;\aa)$ for a suitable analytic function $X^s(\V\ps,t;\aa)$
periodic in $\V\ps,\aa$ converging at an exponential rate as $t\to \io$ to
$X^s(\V\ps,\io;\aa)$.

Analogously, if $X^{ks}(t,\aa_\m)$ denotes the $k^{\rm th}$ Taylor
coefficient in the $\m$ expansion of $X^s(t,\aa_\m)$, then
$X^{ks}(t,\aa_\m)=$ $X^{ks}(\oo t,t;\aa_\m)$.

Note that $X^{ks}$ depends only on the first $k+1$ coefficients of
$\aa_\m$; to stress this fact we shall sometimes write
$X^{ks}(\V\ps,t;\aa_\m^{[\le k]})$. Note also that $X^s(\V\ps,t,\aa)$ is
holomorphic in a domain $|\Im \ps_j|<\bar\x, |\m|<\bar\m_0$,
$\Re t\ge T$ and $\Im t$ arbitrary if $|\m|<\bar \m_0$ with
$\bar\m_0,\bar \x$ small enough  and if $T$ is large
enough (for instance so that $(\bar p+\bar\k)e^{-g T/2}<\k$, see lemma 1').

Recursive expressions for $v^k,\V h^k$ and the $X^{ks}(\V\ps,t;\aa_\m)$
could be deduced from \S 5, however it is more convenient to derive them
directly.

To do this we put \equ(6.8) into \equ(6.4) and introduce the following
notations.

If $G(I,\AA,\f,\aa,\m)$ is a function and if $p$, $\V
m=(m_1,\ldots,m_{2l})$ and  $k^i_j$ are integers, we denote:
%
$$\eqalign{
(G)^p_{\V m}(\cdot)\=&\Bigl(
{\dpr^{p}_\m\dpr^{m_1}_I\dpr^{m_2}_{A_1}
\ldots\dpr^{m_{l}}_{A_{l-1}}\dpr^{m_{l+1}}_\f\dpr^{m_{l+2}}_{\a_1}
\ldots\dpr^{m_{2l}}_{\a_{l-1}}\,G\over
p!\, m_1!\,m_2!\,m_3!\,\ldots m_{l+2}!\,\ldots m_{2l}!}\Bigr)(\cdot)\cr
(k^i_j)_{\V m,p}\=&(k^1_1,\ldots,k^1_{m_1},k^2_1,\ldots,k^2_{m_2},
\ldots,k^{2l}_1,\ldots,k^{2l}_{m_{2l}})\qquad {\rm
s.t.\ }\sum k^i_j=p\cr}\Eq(6.9)$$
%
where $k_j^i\ge1$ if $m_i>0$. Then, given $\aa_\m$, the \equ(6.4)
can be translated into a hierarchy of equations for the Taylor coefficients
$X^{ks}$ of $X(t,\aa_\m)\=X(t)$; it becomes
(omitting the stable index $s$):
%
$$\eqalign{
\dot X_r^k=&\sum_j(\dpr_j {G_0}_r)(X^0(t)) X^{k}_j
+\sum_{|\V m|+p>1}({G_0}_r)^p_{\V m}(X^0(t))\sum_{(k^i_j)_{\V m,k-p}}
\prod_{i=1}^{2l}\prod_{j=1}^{m_i}X^{k^i_j}_i+\cr
&+\sum_{|\V m|+p>0}
(G_r)^p_{\V m}(X^0(t))\sum_{(k^i_j)_{\V m,k-p-1}}
\prod_{i=1}^{2l}\prod_{j=1}^{m_i}X^{k^i_j}_i\cr}\Eq(6.10)$$
%
where $X^0(t)\=X^0(t,\aa_\m)$ and the first term in the r.h.s. is separated
from the others being the only one involving $X^h$ with $h=k$. We write
\equ(6.10) as:
%
$$\dot {X}^k=L X^k+F^k\Eq(6.11)$$
%
where $\dpr_j {G_0}_r\equiv L_{jr}$ and $F^k=F^{ks}$ is implicitly defined by
\equ(6.10) and \equ(6.11), so that:
%
$$L\=L(t)=\pmatrix{
-\dpr^2_{I\f}H_0&-\dpr^2_{\AA\f}H_0&-\dpr^2_{\f\f}H_0&0\cr
\noalign{\vskip2.pt}
0&0&0&0\cr
\dpr^2_{II}H_0&\dpr^2_{\AA I}H_0&\dpr^2_{I\f}H_0&0\cr
\dpr^2_{\AA I}H_0&\dpr^2_{\AA\AA}H_0&\dpr^2_{\AA\f}H_0&0\cr},\quad
F^1(t)=\pmatrix{
-\dpr_\f f-\dpr^2_{\f\m}H_0\cr
-\dpr_\aa f\cr
\dpr_I f+\dpr^2_{I\m}H_0\cr
\dpr_\AA f+\dpr_{\AA\m} H_0\cr}\Eq(6.12)$$
%
where the zeroes in the matrix $L$ appear because we make use of the form
\equ(2.3) of $H_0$ and all the derivatives in \equ(6.12) are evaluated at
the point $ X^0(t)$ $=$ $(I^0(t),\AA^0,\f^0(t),\aa_0+\oo t +\V \th(t))$ and
at $\m=0$ ($\f(0)=\bar \f$ and $\V \th$ is the phase shift introduced in \S
4).  And more generally $F^k(t)$ are defined by solving recursively
\equ(6.11), using \equ(6.10).

In fact we will check directly that \equ(6.10) can be solved for every
$k\ge1$ and that, for each $\aa^k$, the initial data:
%
$$X^k(0)=\pmatrix{v^k\cr\V h^k\cr0\cr\aa^k\cr}\Eq(6.13)$$
%
can be fixed (\ie $v^k$, $\V h^k$ can be fixed) for each $k\ge1$ so that
$X^k(t)$ has the asymptotic properties dictated by lemma 1' (in particular
that $X^k(t)$ is bounded).

The check can be done by studying with some care the wronskian of
\equ(6.11), \ie the solution to the equation:
%
$$\dot W=L W,\qquad  W(0)=1\Eq(6.14)$$
%

In fact the solution to \equ(6.11) can then be written:
%
$${X}(t)=W(t)\Bigl(X(0)+\ig_0^tW(\t)^{-1}F(\t)d\t\Bigr)\Eq(6.15)$$
%
where we drop the index $k$ on $X$ and $F$ to simplify the notation
while performing the $k$ independent algebra that follows.

As mentioned above,
we also suppose, for convenience, that the fixed $\bar \f$ is so chosen that
the solution $i=i(\f,\AA^0)$ of the separatrix equation
$P(\AA^0,i,\f,0)=0$ is maximal for $\f=\bar \f$, see \equ(4.1).

The general properties of the wronskian matrix $W(t)$, solution of
\equ(6.14), can be found easily from lemma 0, \S5. Let us write
(cfr. \equ(5.3), \equ(5.2)):
%
$$\pmatrix{I\cr \AA\cr \f\cr\aa}=\pmatrix{R(p,\V a,q)\cr
\V a\cr S(p,\V a,q)\cr \V\psi+\V\d(p,\V a,q)}
\= V^0(p,q,\V\ps,\V a)
\qquad\quad
H_0\=h_0(\V a,pq,0)\=h(\V a) + E(pq,\V a) \Eq(6.16)$$
%
the canonical map of lemma 0, for $\m=0$ (and dropped from the notation),
reducing to normal form  the free part of the hamiltonian:
$E(pq,\V a)=P(\V a,I,\f)$  (if the free pendulum is an ordinary
pendulum like \equ(2.1), the \equ(6.16) is the well known
Jacobi map, and $R,S$ are suitable Jacobian elliptic functions, see
appendix 9; note that here we use a different order of the variables
from that used in \S 5).

If we replace:
%
$$\eqalign{
&p\to pe^{-gt}, \quad q\to qe^{+gt},\quad\V \psi \to\V\psi+\V\O t\cr
&g\=\dpr_JE(J,\V a), \qquad J\=pq\cr
&\V\O\=\dpr_\AA h(\V a) +\dpr_{\V a} E(J,\V a)
%\t\dpr_E h(\V A,\t E_{\V A}(J))\dpr_{\V A} E_\AA(J)
\cr}\Eq(6.17)$$
%
the map is still canonical (as \equ(6.17) is the solution of the
Hamilton equations in normal form for the free hamiltonian). Hence its
jacobian is a canonical matrix that can be written:
%
$$
U(t)=\pmatrix{\dpr_p R&{\dpr_{\V a}R}&\dpr_q R&0\cr
0&1&0&0&\cr
\dpr_p S&{\dpr_{\V a}S}&\dpr_q S&0\cr
{\dpr_p \V\d}&{{\dpr_{\V a}\V\d}}
&{\dpr_q\V\d}&1\cr}\cdot\pmatrix{e^{-gt}&0&0&0\cr
0&1&0&0\cr
0&0&e^{gt}&0\cr
0&t\,\dpr_{\V a} \V \O&0&1\cr}\= U_0(t) \bar U(t)\Eq(6.18)$$
%
where the derivatives of $R,S,\V \d,\V \O$ are evaluated at
$(pe^{-gt},q e^{gt},\V a)$.

If $(\bar p,0)$ and $(0,\bar q)$ denote the points corresponding to $\bar
\f$, we denote by $U_\cdot^s(t),U_\cdot^u(t)$ the above matrices evaluated,
respectively, at the points $(\bar p e^{-gt},0)$, $(0,\bar q e^{gt})$; in
both cases $\dpr_{\V a} \V \O\= \bar H \= \dpr^2_\AA h$, $g=\dpr_J E(0,\V
a)$. Note also that the entries involving the $\V a$-- derivatives and all
the $\dpr
\V \d$ vanish as $t \to \pm \io$ ($+$ for the stable case and $-$ for the
unstable one).

The representation \equ(6.18) is symbolic as the $1$'s are in fact
$(l-1)\times(l-1)$ identity matrices, the $\dpr_{\V a}R$, $\dpr_{\V a}S$
are row vectors (or $1 \times (l-1)$ matrices), the $\dpr\V\d$ are column
vectors (or $(l-1) \times 1$ matrices) while $\bar H, \dpr\d_{\V a}$ are
$(l-1)\times(l-1)$ matrices. Since, however, the notation is (after a moment
of thought) self evident we shall use it also in the following without
describing the obvious meaning of the matrix elements of $U(t)$ and of the
corresponding ones of $U(t)^{-1}$.

The inverse of the matrix $U(t)$ is immediately computed (because $U(t)$ is
canonical):
%
$$
U(t)^{-1}=\bar U(-t) U_0(t)^{-1}=\bar U(-t)
\pmatrix{
\dpr_q S&{\dpr_q \V\d}&-\dpr_q R&0\cr
0&1&0&0\cr
-\dpr_p S&-{{\dpr_p \V \d}}&\dpr_p R&0\cr
-{\dpr_{\V a}S}&-(\dpr_{\V a}\V \d)^T&
{\dpr_{\V a} R}&1\cr}\Eq(6.19)$$
%

The $U(t)$ is the jacobian of a family of solutions of the equations of
motion, hence it verifies \equ(6.14) except for the initial condition;
so that:
%
$$W(t)\=U(t)U(0)^{-1}\Eq(6.20)$$
%

We proceed to investigate the functions $X^k(t)$ with the aim of
finding explicit conditions which can be used to determine the initial
data $v^k,\V h^k$, from the boundedness at $+\io$ of the $X^k$ and,
more in general, to determine recursive equations for the $X^k(\V\ps,t;\aa)$.

Let us write \equ(5.10) as:
%
$$\eqalign{
I=&R(p,{\V a},q)+ V_+(p,q,\V\psi,\m)\cr
\AA=&\V a+\V V_\su(p,q,\V\psi,\m)\cr}\qquad\eqalign{
\f=&S(p,{\V a},q)+V_-(p,q,\V\psi,\m)\cr
\aa=&\V\ps+\V\d(p,{\V a},q)+\V V_\giu(p,q,\V\psi,\m)\cr}\Eq(6.21)$$
%
where $\V a\in \Si_\m$ (defined in 1) of lemma 1, \S 5)
will be fixed throughout the the analysis and it
will be dropped from the notations.  As discussed in \S5, the functions $V$
are analytic in $p,q,\V\psi,\m$ for $pq,\Im p, \Im q,\Im\ps_j$ small, say
$|pq|<\bar \k^2$, $|\Im p|,|\Im q|<\bar \k$, $|\Im \ps_j|<\bar \x$
and $|\m|<\m_0$, for
some $\bar \x,\bar \k,\m_0$ suitably chosen as functions of the hamiltonian
parameters; furthermore the $R,S$ are analytic in the same domain and the
$V$ are divisible by $\m$.

If we define $p_0(\aa),\V\psi^s(\aa)$ as the solution of the
equations:
%
$$\eqalign{
\bar\f=&S(p_0,0)+V_-(p_0,0,\V\ps^s,\m)\cr
\aa=&\V\psi^s+
\V \d(p_0,0)+
\V V_\giu(p_0,0,\V\ps^s,\m)\cr}\Eq(6.22)$$
%
we can remark that the above equations can be solved at $\m=0$ with a non
zero jacobian $\dpr_p S(p,0)$ (because at $(\bar p,0)$ it is $\f=\bar\f$
and therefore $\dot\f$ is, by the above definition of $\bar \f$, maximal so
that (since $q=0$) $S_p(\bar p,0)= - (g \bar p)^{-1} \dot \f$ $\neq 0$ and
$R_p(\bar p,0)=0$). The functions $p_0,\V\ps^s$ will be analytic in $\aa$
for $|\Im\aa_j|<\bar \x$, and $|\m|<\m_0$ imagining to redefine $\bar \x,\m_0$
so that they are the same here and in the analyticity domain of $V$, to avoid
introducing too many parameters.

We shall also use the following notations:
%
$$\eqalign{ & Z(p,q,\V\ps,\m) \=
V^0(p,q,\V\ps) + V(p,q,\V\ps,\m)\cr & X^s(\V\ps,t;\aa,\m) =
Z(p_0(\aa)e^{-g(1+\g')t},0,\V\ps +\V \ps^s(\aa),\m) \cr
& X^u(\V\ps,t;\aa,\m) = Z(0,q_0(\aa)e^{g(1+\g')t},\V\ps +
\V \ps^u(\aa),\m) \cr}\Eq(6.23)$$
%
where the functions $q_0,\V \ps^u$ are defined as in \equ(6.22)
exchanging the roles of $p$ and $q$.


Thus we can define:
%
$$\eqalign{
& X^0(\V\ps,t;\aa)= V^0(\bar p e^{-gt},0,\V \ps+\V\ps^{0s})\cr
& \bar X^s(\V\ps,t;\V\a)=V(p_0e^{-g(1+\g')t},0,\V\ps+\V\ps^s,\m)
-V^0(p_0e^{-gt},0,\V\ps+\V\ps^s,\m) \cr}\Eq(6.24)$$
%
$\g'$ being the correction to the Lyapunov exponent in lemma 1' so that
$X(t)=$ $X(\oo t,t;\aa)$ $=X^0(\oo t,t;\aa)+\bar X(\oo t,t;\aa)$; $\V
\ps^{0s}$ is simply $\aa- \V\d(\bar p,0,\V a)$ while $\V \ps^{0u}$ is $\aa-
\V\d(0,\bar q,\V a)$. Furthermore, all the functions $t\to X(\V\ps+\oo
t,t;\aa)$ are orbits on the stable whiskers.

The analyticity in $p$ implies that all the functions $ X(\V\ps,t;\aa)$
converge as $t\to\io$ at exponential rate.  The functions $X(\V\ps,t;\aa)$
are analytic in $t,\aa,\V\ps,\m$ in a domain:
%
$$\bar \DD=\{|\Im \a_j|<\bar \x,|\Im \ps_j|<\bar \x,\,
|\Im t|<\bar \x g^{-1}/2\, {\rm or}\, |\Re t|>Kg^{-1},
|\m|<\m_0\}\Eq(6.25)$$
%
for a suitable $K$ (so that the point $(p_0+\bar \k)e^{-gt}$ is inside the
analyticity domain for the $X$ functions), again by lemma 1' and having
once more redefined $\bar \k,\bar \x$ to avoid introducing too many symbols.

If $\r',\r,\x',\x$ are the analyticity parameters of the original
hamiltonian, we shall use, to measure the size of the vectors
$X=(X_+,\V X_\su,X_-,\V X_\giu)$ the dimensionless norm:
%
$$|X|={\r'}^{-1}|X_+|+\r^{-1}|\V X_\su|+{\x'}^{-1}|X_-|+\x^{-1}|\V
X_\giu|\Eq(6.26)$$
%
and the above statements can be summarized by the first of:
%
$$||\bar X||_{\bar \DD} \=\sup_{\bar \DD} |\bar X|
\le \bar v,\qquad|\g'|<\g_0\Eq(6.27)$$
%
where $\bar v,\g_0$ are suitable constants proportional to $|\m|\m_0^{-1}$
(see \S 5); the second bound
provides a further property and also comes from lemma 1'; of course, all
the above constants can be derived explicitly (if one wishes) from the
dimensional bounds in the proof of lemma 1' (\ie the above statement is
``constructive"); the second inequality is a quantitative bound on the
Lyapunov exponent, also part of lemma 1'.

The above remarks can be used to bound $\bar X^k_\nn$, the Fourier
transform with respect to the $\V\ps$ variables of the $k^{\rm th}$--
Taylor coefficient (in $\m$) of $\bar X$, defined in \equ(6.24); in fact
one immediately gets:
%
$$\eqalign{
|\bar X^k_\nn(t;\aa)|<&\bar v \m_0^{-k} e^{-\bar \x|\nn|}\cr
|\bar X^k_\nn(t;\aa)-\bar X^k_\nn(+\io;\aa)|<&
\bar v \m_0^{-k} e^{-\bar \x|\nn|}
e^{-g(1-\g_0)\Re t}\cr}\qquad {\rm in\ }\DD\Eq(6.28)$$
%
in a domain $\DD$ defined as in \equ(6.25) by omitting the
condition on the $\V\ps$ variables.

A further consequence of the above remarks is that if we define:
%
$$F(\V\ps,t;\aa,\m)=E\,[\m \dpr f(X^0+\bar X)+\dpr H_0(X^0+\bar X)-
\Bigl(\dpr H_0(X^0)+\dpr^2 H_0(X^0)\bar X\Bigl)]
\Eq(6.29)$$
%
where $E$ is the standard $2l\times2l$ matrix which in block form
looks like $\pmatrix{0&-1\cr1&0\cr}$ and $X^0,\bar X$ denote as above
$V^0(\bar p e^{-gt},0,\V\ps+\V\ps^{0s}),\,
\bar X(\V\ps,t;\aa)$, then:
%
$$
%F^k(\V\ps,t;\aa)=F^{[k]}(\V\ps,\aa_\m,t)\qquad{\it rm with}\qquad
||F||_{\bar \DD}<
%\bigl({|\m|\over\m_0}\bigr)
\bar f\Eq(6.30)$$
%
for a suitable $\bar f$; again here we possibly redefine the constants
$\m_0,\bar \x,\bar \k$.
In the notations of \equ(6.8) $\div$ \equ(6.12), it is $F^k(t)=
F(\oo t, t; \aa,\m)$.

The \equ(6.30) implies the bounds:
%
$$\eqalign{
|F^k_\nn(t;\aa)|<&\bar f \m_0^{-k} e^{-\bar \x|\nn|}\cr
|F^k_\nn(t;\aa)-F^k_\nn(+\io;\aa)|<&\bar f \m_0^{-k} e^{-\bar\x|\nn|}
e^{-g(1-\g_0)\Re t}\cr}\Eq(6.31)$$
%
in $\DD$.

If $\aa$ is replaced by $\aa_\m$, all the above estimates \equ(6.31),
\equ(6.28) hold provided $|\Im \aa_\m|<\bar\x$ for $|\m|<\m_0$ and since $F^k$
depends in such a case only on the first $k-1$ coefficients of $\aa_\m$,
the proper notation will be $F^k(\V\ps,t;\aa_\m^{[<k]})$.

{\it Remark}: in the case in which $P_0(I,\AA,\f)$ is an ordinary pendulum
hamiltonian the matrix elements of $U_0(t),W(t)$ can be computed
essentially explicitly (see appendix 9): they have some remarkable
analyticity and symmetry properties in $t$; namely the matrix elements of
$U_0(t)$ are holomorphic in the domain $|\Im t|<(1-\e)\p/(2g_0)$, for
$\e>0$, and are bounded there by $\bar u\e^{-2}$ for some $\bar u$, see
\equ(A9.8); furthermore the $W(t)$ matrix elements of the block
$(+,\su)\times(+,\su)$ and those of the block $(-,\giu)\times(-,\giu)$ are
even in $t$ while those of the other two blocks are odd.

In general the ``rows"
of the matrix $U_0(t)^{-1}$, \equ(6.19), will be denoted
$\x_+,\x_{\su}^0,\x_-,\x_\giu+\x^0_\giu$, where $\x^0_{\su j}=(0,\V e_j
,0,0)$, $\x^0_{\giu j}=(0,0,0,\V e_j)$ with $\V e_j$ being the unit
$(l-1)$-vector with the $j-th$ component equal to $1$.  The splitting
$\x_\giu+\x_\giu^0$ is performed so that all the matrices $\x_j$ have
the last $l-1$ components zero.

With the above notations for $\bar X,F,U(t)^{-1}$ we deduce immediately
an explicit expression for $X^k$ in terms of $F^k$; it is
$X^k(t)=U_0(t)Y^k(t)$ with:
%
$$Y^k(t)=\bar U(t)\left(
U_0(0)^{-1} X^k(0) + \ii_0^t \bar U(-\t) U_0(\t)^{-1} F^k(\t)d\t \right)
\Eq(6.32)$$
%
so that one finds:
%
$$\eqalignno{
Y^k_+(t)=&e^{-gt}\Bigl(\x_+(0) X^k(0)+\ig_0^t e^{g\t}\x_+(\t)F^k
\,d\t\Bigl)\cr
Y^k_\su(t)=&\V h^k +\ig_0^t\V F^k_\su\,d\t&\eq(6.33)\cr
Y^k_-(t)=&e^{gt}\Bigl(\x_-(0) X^k(0)+\ig_0^te^{-g\t}\x_-(\t)F^k\,
d\t\Bigr)\cr
Y^k_\giu(t)=&t\,\bar H\Bigl(\V h^k +\ig_0^t\V F^k_\su\,d\t\Bigr)+
\x_\giu(0) X^k(0)+X^k_\giu (0)+\cr
&+\ig_0^t [-\t\bar H F^k_\su+\x_\giu(\t)F^k+F^k_\giu]\,d\t\cr}$$
%
where the arguments of $F^k$ are $(\oo\t,\t;\aa_\m^{[< k]})$;
when needed we regard $m$ vectors as $1\times m$ or $m\times 1$ matrices
and use the standard rules for matrix multiplication (\eg in the first of
\equ(6.33) $\x_+$ is a $(1\times 2l)$ matrix while $X^k$ is a $(2l\times
1)$ matrix and their product is a scalar); in the
following formulae we shall drop the explicit dependence on $\aa$ as such
dependence plays here no role. Hence, since the boundedness as $t\to+\io$
of $X^k(t)$ is equivalent to the boundedness of $Y^k(t)$, a straightforward
asymptotic analysis based on the bounds \equ(6.28),\equ(6.31) shows
that the latter corresponds to the following conditions on the initial
data $X^k(0)$ for $k\ge1$:
%
$$\eqalignno{
&\media{\V F^k_\su(\cdot,\io)}=0\cr
&\x_-(0) X^k(0)+\ig_0^\io
e^{-g\t}\x_-(\t)F^k(\oo\t,\t)\,d\t=0&\eq(6.34)\cr
&\media{\V F^k_\giu(\cdot,\io)}+
\bar H\Bigr(\V h^k+\ig_0^\io\bigr[\V F^k_\su(\oo\t,\t)-
\V F_\su(\oo\t,\io)\bigr]\,d\t
-(\oo\cdot\V\dpr)^{-1}\V F^k_\su(\V0,\io)\Bigr)=\V0\cr}$$
%
where to derive the third equality we have used the first identity and the
fact that $\x_\giu(\io)=0$, see the comment following \equ(6.18);
$\langle \cdot\rangle$ denotes average over $\V \ps$,
$(\oo\cdot \dpr)^{-1}$ acts by dividing the $\nn$--Fourier coefficient
(with respect to $\V\ps$) by $i \oo\cdot \nn$: notice that it is possible
to apply  $(\oo\cdot \dpr)^{-1}$ to $\V F_\su(\cdot,\io)$ because of the
first identity.
The first condition must be an identity, as it does not involve the initial
conditions which we suppose to have already determined for $X^h(0)$,
$h=1,\ldots,k-1$, (otherwise lemma 1' could not possibly hold). If one
feels uneasy with such a boldly indirect proof one can check the statement
directly (see appendix 12).

The third condition fixes $\V h^k(0)$, while $\V X_\giu^k(0)$ has to be
$\V\a^k$, by definition, and finally the second condition fixes
$v^k=X^k_+(0)$, because $X^k_-(0)\=0$ (by definition) and :
%
$$\eqalign{
&\x_-(0) X^k(0)=-S_p(\bar p,0) X_+^k - (\dpr_{\V a}\d)^T \V h^k\=\cr
&\= (g\bar p)^{-1}\bigl(X_+^k(0)\dpr_I P+X^k_\su(0)
\cdot\dpr_\AA P\big)\cr}\Eq(6.35)$$
%
and more generally:
%
$$e^{-gt}\x_-(t)\=(g\bar p)^{-1}w(t),
\quad w(t)\=(\dpr_I P,\dpr_\AA P,\dpr_\f P,0)\Eq(6.36)$$
%
where the derivatives are evaluated at $X^0(t)$.
Notice that $w(t)$ in \equ(6.36) is such that $Ew=\dot X^0(t)$,
if $E$ denotes the standard symplectic matrix.
Furthermore:
%
$$e^{-gt}\x_-(t)=(g\bar p)^{-1}w(t)\=
e^{gt}\x_+(t)\ ,\qquad{\rm for\ all \ \ }-\io<t<\io
\Eq(6.37)$$
%
and $w(t)$ is holomorphic in $t$, for all real values of $t$: the above
statements reflect the degeneracy of the unperturbed whiskers.

It is also useful to rewrite the vector $Y^k(t)$ after the above
\equ(6.34) are taken into account, leading to a few cancellations which
make clear the asymptotic boundedness as $t\to\io$ of $Y^k$ (imposed,
indeed, by \equ(6.34)). We find:
%
$$\eqalignno{
Y^k_+(t)=&e^{-gt}\r^k +e^{-gt}\ig_0^t e^{g\t}\x_+(\t)F^k(\oo\t,\t)
\,d\t\cr
\V Y^k_\su(t)=&\V h^k+\ig_0^t\V F^k_\su(\oo\t,\t)\,d\t\cr
Y^k_-(t)=&-e^{gt}\ig_t^\io e^{-g\t}\x_-(\t)F^k(\oo\t,\t)
\,d\t&\eq(6.38)\cr
\V Y^k_\giu(t)=&\V\ch^k -\ig_t^\io\Bigl[\x_\giu F^k(\oo\t,\t)+
[\V F_\giu^k(\oo\t,\t)-\V F^k_\giu(\oo\t,\io)]\Bigr]\,d\t+\cr
&+(\oo\cdot\V\dpr)^{-1}\PP \V F_\giu^k(\oo t,\io)-\bar H\ig_t^\io(t-\t)
[\V F_\su^k(\oo t,\t)-\V F^k_\su(\oo t,\io)]\,d\t +\cr
&+ \bar H (\oo \cdot \dpr)^{-2} \V F_\su^k( \oo t, \io)\cr
}$$
%
where $(\oo\cdot\V\dpr)$, $\PP$, $\media\cdot$ are operators acting on
the $\V\ps$ dependence of the $F^k$- functions: they multiply the Fourier
transforms of $F^k$ by $i\oo\cdot\nn$, $\d_{|\nn|\ne0}$ and $\d_{\nn=\V0}$
respectively (hence $\media\cdot$ is the average over $\V\ps$),
$\delta_\cdot$ being the Kronecker $\d$. The
constants $\r^k,\V\ch^k$ are:
%
$$\eqalignno{
\r^k =& (\dpr_q S)_0 v^k+(\dpr_q \V \d)_0\cdot\V h^k\cr
\V\ch^k=&\aa_k-(\dpr_{\V a}S)_0 v^k-(\dpr_{\V a}\V\d)^T_0\V
h^k+\ig_0^\io\x_\giu(\t)
F^k(\oo\t,\t)\,d\t+\cr
&+\ig_0^\io[\V F^k_\giu(\oo\t,\t)-\V F^k_\giu(\oo\t,\io)]\,d\t
-(\oo\cdot\V\dpr)^{-1}\PP\V F^k_\giu(\V0,\io)&\eq(6.39)\cr
&-(\oo\cdot\V\dpr)^{-2}\bar H \V F^k_\su(\V0,\io) -\bar H\ig_0^\io\t
[\V F^k_\su(\oo\t,\t)-\V F^k_\su(\oo\t,\io)]\Bigl]\,d\t\cr}$$
%
where $(\cdot)_0$ means ``at $t=0$"; in the cases in which there is no
coupling between the pendulum and the rotators in the free system (see \S4)
the $\r^k$ and also the coefficients
of $v^k$ and $\V h^k$ in $\V\ch^k$ vanish.

Note also that, in the case $\aa=\aa_\m$, the dependence on the
coefficients of $\aa_\m$ (as it follows by inspection from
\equ(6.15),\equ(6.20),\equ(6.29)) is:
%
$$\eqalign{
X^k(\V\ps,t;\aa_\m^{[\le k]}) =& \pmatrix{0\cr 0\cr0\cr\aa^k\cr}
+ X^k(\V \ps,t;\aa^{[<k]}) \cr
F^k(\V\ps,t;\aa_\m^{[\le k-1]}) =& E \dpr^2 f(X^0)
\pmatrix{0\cr 0\cr0\cr\aa^{k-1}\cr}
+ F^k(\V \ps,t;\aa^{[<k-1]}) \cr}
\Eq(6.40)$$
%
The theory of the unstable whisker is identical, with the obvious changes which
essentially consist in taking $\Re t$ negative, replacing $\io$ with $-\io$
and exchanging the role of $p$ and $q$ and of the components $+,-$ of
$Y^k$. Note that, in general, the functions $F^k,X^k$ for the unstable
whisker {\it are not} the analytic continuation of those of the stable
whisker with the same
initial conditions (which is a fact that would mean that all stable whisker
orbits are also orbits on the unstable whisker, so that the homoclinic
points would be completely degenerate).

As remarked above, see \equ(6.37), $W(t)$ (but not $U(t)$) is analytic
for all $t$ because of the degeneracy of the free system. Hence in what
follows we should append a superscript $s$ or $u$ (or equivalently $+$ or
$-$) to $F,X,Y,U,U_0$ depending on whether we consider them for a stable or
an unstable whisker. When, as usual, we do not follow this convention {\it we
shall assume that the functions being considered are associated with the
stable whisker if their time argument has $\Re t>0$ and with the unstable
if $\Re t<0$}. Of course we label the initial conditions $X^\s(0)$ with
$\s=s$ or $\s=u$, depending on the whisker to which they refer.

We now look at the {\it homoclinic conditions}. The homoclinic points that we
study will be \ap supposed to have the form:
%
$$\f=\bar\f,\qquad \a_\m=\V\a^0+\m\, \aa^1+\m^2\aa^2+\ldots\Eq(6.41)$$
%
with the series in \equ(6.41) being at least asymptotic as $\m\to0$.
Such points correspond to  initial conditions $X^{k\s}(0)=(v^{k\s},\V
h^{k\s},0,\aa^k)$.

Remarking that the definition of $X(\V\ps,t;\aa,\m)$, \equ(6.23), implies that
$X^s(\V\ps,\io;\aa,\m)=X^u(\V\ps+\V\s(\aa,\m),-\io;\aa,\m)$ with
$\V\s=\V\ps^s(\aa)-\V\ps^u(\aa)$ and also
$F^s(\V\ps,\io;\aa,\m)=F^u(\V\ps+\V\s,-\io;\aa,\m)$ so that for each $k$:
%
$$\media{X^{ks}(\cdot,+\io;\aa_\m)}=\media{X^{ku}
(\cdot,-\io;\aa_\m)}\ ,\qquad
\media{F^{ks}(\cdot,+\io;\aa_\m)}=\media{F^{ku}
(\cdot,-\io;\aa_\m)}\Eq(6.42)$$
%
we see that, in general, the homoclinic conditions $\V h^{ks}=\V
h^{ku}$ become, (if $\s_t=\sign(t)$):
%
$$\eqalign{
-\ig_{-\io}^\io \bigl[\V F^k_{\su }(\oo \t,\t;\aa_\m^{[<k]})
-\V F^k_{\su }(\oo \t,\s_\t\io;\aa_\m^{[<k]})\bigr]\,d\t+
\sum_{{\nn\ne0}\atop{\s=\pm}}\s{\V F^k_{\su \nn}(\s\io;\aa_\m^{[<k]})\over
i\oo\cdot\nn}=0\cr}\Eq(6.43)$$
%
which we call ``the conditions associated with the $\AA$ variables".
Note that \equ(6.43) is immediately derived from \equ(6.34) in the
anisochronous case (\ie in the case $\bar H$ is invertible); however
it is possible  to deduce it directly also in the important case of
partially isochronous systems (\ie periodically time--dependent systems)
associated to hamiltonians of the form:
%
$$\o B+ h(\tilde A)+ P(I,\tilde  A,\f)+\m f(I,\tilde  A,\f,\l,\tilde \a)
\ ,\qquad (B,\tilde A)\= \AA\ ,(\l,\tilde \a)\=\aa
\Eq(6.44)$$
%
with $\det \dpr^2_{\tilde A}h\neq 0$.
To check \equ(6.43) in such a case, consider the second of
\equ(6.38) observing that $\V
Y^k_\su\=\V X^k_\su$; add and subtract to the l.h.s. $\V X^k_\su(\oo t,\s_t
\io)$ and to the r.h.s. $\ii_0^t \V F_\su^k(\oo \t,\s_\t \io)d\t$; take the
quasi--periodic average $\lim_{T\to \s \io} (1/T)\ii_0^T\cdot$ using
\equ(6.28), \equ(6.31) and the first of \equ(6.34); finally use \equ(6.42).
Hence we conclude that a {\it unified treatment of anisochronous
systems and of forced systems is possible}.

To derive the other homoclinic condition, $v^{ks}=v^{ku}$,
we use the \equ(6.35), \equ(6.36) to find an expression of
$v^{ks}-v^{ku}$ which is symmetric for the two whiskers contributions, and,
by the second of \equ(6.34)  we find:
%
$$\left[ w_\su(0) (\V h^{ks} - \V h^{ku}) +
\ig_{-\io}^\io w(\t) F^k(\oo \t,\t;\aa_\m^{[<k]})d\t \right]=0\Eq(6.45)$$
%
($w_\su\=\dpr_\AA P$ and notice that $w\to 0$ exponentially fast as $t\to
\pm\io$).
We call the above equation ``the homoclinic condition corresponding to the
$I$ variable".

Hence \equ(6.43),\equ(6.45) is the complete set of homoclinic conditions.
It is clear that the the $l$ equations in \equ(6.43),\equ(6.45) cannot be
independent: and in fact one can check that either the \equ(6.43)
or the \equ(6.45) together with any
$l-2$ of the \equ(6.43) imply the remaining one: this expresses the fact
that the energy of the two whiskers is the same (because they are
asymptotic to the same torus).

The \equ(6.43) is not yet very explicit, as a recursive
equation for $\aa^k$. But it is easy to make it clearer.

We begin by considering it for $k=1$.  The function $F$ of order $1$,
$F^1(\V\ps,t;\aa^{[<1]})$, is in this case:
%
$$F^1(\V \ps,t;\aa^0)
=\pmatrix{-\dpr_\f f(I^0(t),\V a,\f^0(t),\V\ps+\aa^0+\V\th(t),0)\cr
-\dpr_\aa f(I^0(t),\V a,\f^0(t),\V\ps+\aa^0+\V\th(t),0)\cr
\dpr_I f(I^0(t),\V a,\f^0(t),\V\ps+\aa^0+\V\th(t),0)\cr
\dpr_\AA f(I^0(t),\V a,\f^0(t),\V\ps+\aa^0+\V\th(t),0)\cr}\Eq(6.46)$$
%
where $\V\th(t)$ is the phase shift introduced in \S4. And the
homoclinic conditions for the $\AA$ variables becomes (cfr.
\equ(4.6)$\div$\equ(4.8))
%
$$\eqalign{
&\ig_{-\io}^\io[\dpr_\aa f|_t
-\dpr_\aa f|_{\s_t\io}]
\,d\t-
\sum_{{\nn\ne\V0}\atop{\s=\pm}}{\s\over i\oo\cdot\nn}\dpr_\aa f_\nn(0,\V
a,0,0)e^{i(\oo\cdot\aa^0+\nn\cdot\th(\s\io)}=\cr
&=\ig_{-\io}^\io\sum_{\nn\ne\V0}i\nn
e^{i(\oo\cdot\nn\t+\nn\cdot\aa^0)}\dpr_\t\Bigl(e^{i\nn\cdot\V\th(\t)}
f_\nn(I^0(\t),\V a,\f^0(\t),0)\Bigr)\,d\t=\V0\cr}\Eq(6.47)$$
%
where $f|_t\=f$ evaluated at $(I^0(t),\V a,\f^0(t),\aa^0+\oo
t+\V\th(t),0)$ while $f|_{\s_t\io}$ denotes $f$ evaluated at
$(0,\V a,0,\aa^0+\oo t+\V\th(\s_t\io),0)$ if $\s_t={\rm sign\,}t$.
And we see that the \equ(6.47) always has at least two
solutions, namely the critical points of the periodic function:
%
$$M_f(\aa)=\ig_{-\io}^\io \sum_{\nn\ne\V0}
e^{i(\oo\cdot\nn\t+\nn\cdot\aa)}\dpr_\t\bigl(e^{i\nn\cdot\V\th(\t)}
f_\nn(I^0(\t),\V a,\f^0(\t),0)\bigr)\,d\t\Eq(6.48)$$
%
The equality of the whiskers energies then implies that the homoclinic
equation relative to $I$ is also satisfied.

Therefore the equations for $k=1$ determine the zeroth order
approximation $\aa^0$ to the homoclinic point. Let $M_0$ be the
jacobian matrix (with respect to $\aa$) of the $l-1$ functions in
\equ(6.43) evaluated at a solution point $\aa^0$ (equivalently: $M_0\=
\dpr_\aa^2 M_f(\aa_0)$) and {\it assume that $M_0$ is non degenerate}, see
\equ(4.10).

To determine the higher orders $\aa_k,\, k\ge1$, we remark that
\equ(6.40) shows that the dependence of $F^k$ on $\aa_{k-1}$ is rather
simple. If we call $\V D_k$ the l.h.s. of \equ(6.43) evaluated by
replacing $\aa_\m^{[<k]}$ with $\aa_\m^{[<k-1]}$, we see
that, by \equ(6.40), the \equ(6.43) take the form:
%
$$M_0\aa_{k-1}=\V D_k\Eq(6.49)$$
%
Therefore the non degeneracy of $M_0$, the hessian of \equ(6.48), at the
solution point $\aa^0$ is all one needs to perform the perturbation
theory to arbitrary order.

But the first order calculation is usually sufficient to prove the
existence of the homoclinic point. Its exact equation is in fact:
%
$$\V H(\aa)\=\sum_{k=0}^\io \m^{k-1} (\V H^{k+}(\aa)-\V H^{k-}(\aa))
=0\Eq(6.50)$$
%
where $\V H^{k\s}(\aa)$ is just the l.h.s. of \equ(6.43).

We see that the equation \equ(6.50) admits, for $\V a$ on a diffusion
path open for diffusion (in the sense of \S4, see \equ(4.10),
a non degenerate solution for
$\m=0$, by our assumption on $\aa^0$: furthermore the function in the
l.h.s. of \equ(6.50) is analytic in $\m$ for $|\m|<\m_0$ by lemma 1':
therefore for all the invariant tori associated with the diffusion path,
by lemma 1', the equation has an analytic solution $\aa_\m$ whose Taylor
expansion coefficients have already been determined in \equ(6.49).

We also need some informations about the non degeneracy of the intersection
between the two whiskers at $\aa_\m$.  They are provided by the Taylor
coefficients of the expansion in $\aa-\aa_\m$ of the functions $\V H(\aa)$
around $\aa_\m$.  In particular the first order coefficients will define a
$(l-1)\times(l-1)$ matrix that we call {\it the homoclinic intersection
tensor} or the {\it homoclinic angles matrix}: it is clear that such matrix
is just $M_0$: hence we see the physical meaning of the matrix $M_0$.  The
name is slightly improper as the matrix $M_0$ has the dimension of an
action and (the trigonometric tangent of) the physical angles between the
tangent vectors to the two whiskers will not be the eigenvalues of $M_0$
but proportional to them via a constant bearing the dimension of an action.

We conclude that, {\it at a homoclinic point,
there is a uniform lower bound to the angles between
pairs of vectors tangent to the whiskers associated
with the invariant tori of a diffusion path open for diffusion}.
%
\vglue2.truecm

\penalty-200

{\bf\S7 Whisker ladders and rounds density}

\penalty10000

\vskip0.5truecm\numsec=7\numfor=1

\penalty10000

Given a diffusion path $\LL$, we see from lemma 1, \S5, that we can
expect that there are gaps on it, in which whiskered tori are missing
(\ie ladder segments with no {\it rounds}), which are quite large and,
in fact they can be as large as $K\m^{1/c}$; see \equ(5.4) and the comment
after \equ(5.90): so that $c=7t$.

However the rule is that {\it the gaps are much narrower if the path $\LL$ is
not too badly placed in the action space}: this depends of course on the
perturbation $f$. The situation is clearest if $f$ is a trigonometric
polynomial. We give the following, general, definition:
\vskip3.pt
{\it Definition: Given a free hamiltonian $h$ as in \equ(2.3),
a region $V\subset R^{l-1}$ of the action
variables $\AA$ is {\sl free of resonances} to order $N$ if it is:
%
$$\oo(\AA)\cdot\nn\ne0 \qquad \forall|\nn|\le N\EQS(7.1)$$
%
having set, as usual, $\oo(\AA)\equiv\dpr_\AA h(\AA,0,0)$.

If $f$ is a trigonometric polynomial of degree $N_f$:
%
$$f(I,\AA,\f,\aa,\m)=\sum_{|\nn|\le N_f}
f_\nn(I,\AA,\f,\m)e^{i\nn\cdot\aa}\EQS(7.2)$$
%
we say that $h$ and $f$ {\sl do not resonate to degree $p$} in the region $V$
in action space if the region $V$ is free of resonances to order
$N=pN_f$.}
\vskip3.pt
Recalling \equ(3.1),\equ(3.2),\equ(5.4), \equ(5.82) we can prove
the following lemma:

\vskip3.pt
\noindent{\it {\bf Lemma 3}:
Let $f$ be a trigonometric polynomial which does not
resonate to degree $p$ with the free hamiltonian $h$ in the phase space
domain $V$. And let $\LL$ be a diffusion path contained in $V$. Then the
set $\Si_\m$ in \equ(5.4) can be taken to verify:
%
$$(s_2-s_1)^{-1}\ii_{\Si_\m}ds\ge(1-K |\m|^{(p+1)/c})\EQS(7.3)$$
%
and all the remaining statements of lemma 1 and 1' stay unchanged.}
\vskip0.5truecm

So in particular, if $p>2c-1$ any interval of length $O(\m^2)$ on $\LL$
will necessarily contain rounds of the whisker ladder, if $\m$ is small
enough.  The analysis in \S5 allows us to say that $c$ can be
taken, for instance, $c=7(l-1)$ hence for $p=14(l-1)$ we have gaps of
relative size of $O(\m^2)$.

\vskip0.3truecm
\noindent{\bf Proof}: The assumptions are just what  one needs to
perform with no troubles perturbation theory to order $p$.  We shall
take for simplicity $f,h$ to be $\m$ independent.

Let $\F$ be a generating function for a canonical map: it will depend on
variables $(\AA',\aa_0,p',q_0)\in V\times T^{l-1}\times S^2_{\tilde \k_0}$,
assuming to have already done the first change of coordinates
considered in the proof of lemma 1, \S5, to change $I,\f$ into the more
natural (but local) coordinates $p_0,q_0$ of the pendulum (and
$S_\k=\{p:\  |p|<\k\}$), see lemma 0, \S5.

We take $\F$ to be a polynomial of order $p$ in $\m$:
%
$$\F=\m\F^{(1)}+\m^2\F^{(2)}+\ldots+\m^p\F^{(p)}\EQS(7.4)$$
%
which we impose to be a solution to order $O(\m^{p+1})$
of the Hamilton-Jacobi equation:
%
$$\eqalign{
&h_0(\AA'+\dpr_{\aa_0}\F,(p'+\dpr_q\F)q_0)+\m f(
\AA'+\dpr_{\aa_0}\F,\aa_0,p'+\dpr_q\F,q_0)=\cr
&\kern2.truecm=\tilde h(\AA',p'(q_0+\dpr_{p'}\F),\m)+O(\m^{p+1})\cr
&\tilde h(\AA',J)=h_0(\AA',J)+\m h^{(1)}(\AA',J)+\ldots+\m^p
h^{(0)}(\AA',J)\cr}\EQS(7.5)$$
%
where $\F, h^{(1)},\ldots,h^{(p)}$ are regarded as unknown.

The equations for $\F^{(1)},\ldots,\F^{(p)},h^{(1)},\ldots,h^{(p)}$
generated by \equ(7.5) can be recursively solved if the non resonance
property \equ(7.1) holds for $N=p N_f$ in a domain around the diffusion
curve. The domain of definition of the solution can be given the form
$V\times T^{l-1}\times S^2_{\tilde\k}$, with $\tilde\k$ small enough. In
fact, assuming to have solved the equations for $i=1,\ldots,k$ and to have
determined $\F^{(i)}, h^{(i)}$, $i\le k$, as analytic functions on
$V\times T^{l-1}\times S^2_{\tilde\k}$, with $\F^{(i)}$ being a
trigonometric polynomial of degree $iN_f$, we see that \equ(7.5) says:
%
$$\eqalign{
&\oo_0(\AA',p'q_0)\cdot\dpr_\aa\F^{(k+1)}+g_0(\AA',p'q_0)
\bigl(q_0\dpr_{q_0}\F^{(k+1)}-p'\dpr_{p'}\F^{(k+1)}\bigr)+\cr
&+P^{(k+1)}(\AA',\aa_0,p',q_0)=h^{(k+1)}(\AA',p'q_0)\cr}\EQS(7.6)$$
%
where $\oo_0(\AA,J)\equiv\dpr_{\AA'} h_0(\AA',J)$, $g_0(\AA',J)\equiv
\dpr_J h_0(\AA',J)$ and $P^{(k+1)}$ is a polynomial in the derivatives
of $f$ of order
$\le k$ and in $\dpr\F^{(1)},\ldots,\dpr\F^{(k)}$, $\dpr
h^{(1)},\ldots,\dpr h^{(k)}$ and its monomials:
%
$$(\dpr\F^{(1)})^{n_1}\ldots(\dpr\F^{(k)})^{n_k}(\dpr h^{(1)})^{m_1}
\ldots(\dpr h^{(k)})^{m_k}\EQS(7.7)$$
%
must be such that their order verifies:
%
$$\sum_{j=1}^k j(n_j+m_j)\le k+1\EQS(7.8)$$
%
Hence we see that $P^{(k+1)}$ is a trigonometric polynomial of degree
$N_{k+1}\le (k+1)N_f$, and \equ(7.7) can be solved, using the notations
introduced in \equ(5.27), by:
%
$$\F^{(k+1)}_{\nn, hh'}=P^{(k+1)}_{\nn,hh'}(\AA')\bigl[
-i\oo(\AA',p'q_0)\cdot\nn-(h'-h)g_0(\AA',p'q_0)\bigr]^{-1}\EQS(7.9)$$
%
for $|h'-h|+|\nn|\ne0,|\nn|\le(k+1)N_f$, and:
%
$$h^{(k+1)}(\AA',p'q_0)=\sum_{h=0}^\i
P^{(k+1)}_{\V0,hh}(\AA')(p'q_0)^h\EQS(7.10)$$
%

The induction construction will work until $k\le p$, as it is clear that
$\F^{(k+1)},h^{(k+1)}$ have the same analyticity domains as
$\F^{(1)},\ldots,\F^{(k)},h^{(1)},\ldots,h^{(k)}$.

The canonical map generated by $\F$ will have a somewhat smaller domain
$V'\times T^{l-1}\times S^2_{\k'}$, with $V'$ differing from $V$ and
$\k'$ differing from $\tilde\k$ only by an amount of order $O(\m)$ (which
means that the boundaries of $V$ and $V'$ are close within a distance
$O(\m)$).

This completes the proof of lemma 3 because, in the new coordinates, the
hamiltonian takes the form:
%
$$
h'(\AA',p'q',\m)+\m^{p+1} f'(\AA',\aa',p',q',\m)\EQS(7.11)$$
%
where the difference $h'-h$ is divisible by $\m$ and $h',f'$ are
analytic in $V'\times T^{l-1}\times S^2_{\k'}$.

Under the above circumstances lemma 1 applies: but this time $\m$ is
replaced by $\m^{p+1}$, and therefore $1/c$, in \equ(5.4),
is replaced by $(p+1)/c$.

Lemma 3 above has some rather obvious extensions to the cases in which
$f$ is not a trigonometric polynomial.  In such cases, if $\bar N$ is
the maximum value such that no resonance of $h$ occurs for $|\nn|\le
\bar N$ we can write $f=f^{[\le \bar N/c]}+f^{[>\bar N/c]}$ and if
$\m||\bar f_0|| e^{-\x \bar N/c}\ll\m_*$ and, at the same time,
$|\m|\ll\m_*$ of lemma 1, see \equ(5.90).  Hence we can hope to reach
the same conclusion of lemma 3 in various particular cases.  But we see
that, unless $f$ is a trigonometric polynomial, the improvement of the
density estimate of the whisker is a delicate matter.  Since
the ideas are quite clear, we refrain from formulating
precise results on the cases when $f$ is not a trigonometric
polynomial.
%
\vglue2.truecm

\penalty-200

{\bf\S8 Heteroclinic intersections. Drift and diffusion along
directly open paths}

\penalty10000

\vskip0.5truecm\numsec=8\numfor=1

\penalty10000

We now want to study intersections between stable whiskers of
tori corresponding to some value of $s$,  and unstable whiskers
of tori corresponding to a different $s$ value: such intersections are
called {\it heteroclinic} intersections. Then, by means of {\it chains}
of heteroclinic intersections (cfr. [A]; see also [D])
corresponding to tori related to directly open diffusion paths,
we will show how one can construct
orbits {\it shadowing} all the heteroclinic orbits of the chain. In
particular, we will estimate the time needed by the diffusion orbit
to drift from one end of the chain to the other end.

We consider a line of tori (and corresponding whiskers) with a given
energy $E$ associated with a diffusion line $\LL$ {\it directly open for
diffusion}; see definition 2 of \S 4. Given $\LL$ the existence of
such a line of tori is consequence of lemma 1 of \S5.
We consider also the associated diffusion sheet $\tilde \LL$ (see
\equ(5.8)) and the relative whiskered tori constructed by lemma 1'.

The equations for the whiskers associated to $\tilde \LL$,
deduced in \S 6, take the form, at
the point corresponding to the value $s,u$ of the diffusion sheet
parameters:
%
$$\AA^\pm(\f,\aa;s,u)=\AA_{su}+\m\{\dpr_\aa M^\pm_{f} (\f,\aa;s,u)+
\V\x^\io_{ su}\}+O(\m^2)\Eq(8.1)$$
%
where the function $M^\pm_{f}$ is defined in terms of the Melnikov function
$F$, see \equ(4.6), \equ(6.48) and (see \equ(6.42)):
%
$$
\V \x^\io_{su}=\langle X^{1+}(\cdot,+\io;\aa)\rangle=
\langle X^{1-}(\cdot,-\io;\aa)\rangle
\Eq(8.2)$$
%
In this section ``$+/-$" will mean ``stable$/$unstable" as we reserve
the labels $u,s$ to have the meaning they have in \S5: so that $s$ is a
parameter describing points on $\LL$ and $u$ describes the variation of
the quasi periodic motions frequencies.

The results of \S5 show that we can imagine that the functions $X,\V\x$ are
defined for {\it all} $(s,u)\in[s_1,s_2]\times [-\bar u,\bar u]$
and for $|\m|\le\m^*$, $\aa\in
T^{l-1}$ and $|\f|<\tilde\f$ and there they are of class $C^p$ where $p$ is
any prefixed integer.  Below we imagine to have fixed such an extension
with $p=2$. Of course this is just
a convenient way of expressing the regularity properties of functions that
are defined on sets with a lot of holes: the values of the functions in
points with $s\not\in\Si_\m$ (cfr. 1) of lemma 1, \S 5)
are not interesting and all that is being said
is that the interesting values of our functions can be smoothly
interpolated. At a point with a prefixed $\f$ coordinate,
say $\f=\bar \f$ (as in \S 4 and elsewhere) the condition
for heteroclinic intersection between $W_{stable}(s)$ and
$W_{unstable}(s')$, in the prefixed energy surface,
becomes to first order in $\m$:
%
$$\eqalign{
& \m\left\{\dpr_\aa[M^+_{f}(\bar\f,\aa;s,u)-M^-_{f}(\bar\f,\aa;s',u')]+
\V\x^{\io}_{su}-\V\x^{\io}_{ s'u'}\right\}+
\AA_{su}-\AA_{s'u'}+\m^2 R=\V0\cr
&E^+(s,u,\m)\=E\=E^-(s',u',\m)\cr}
\Eq(8.3)$$
%
where $E^\pm$ denotes the value of the hamiltonian $H$ on the whiskers
$W^\pm$, $E\=H_0(0,\AA_s,0,0)$ (see \S 3) and
the remainder $R\=R(\aa,s,s',u,u',\m)$ is a $C^2$ function of
its arguments.

We regard \equ(8.3) as an implicit function equation determining $\aa,u,u'$ at
fixed $\bar\f$ in terms of the parameters $s,s',\m$.

Note that finding solutions of \equ(8.3) means that the whiskers contain a
point with equal $\aa,\f,\AA$ coordinates: hence the whiskers  have a point in
common as their energy is the same and hence also the $I$
coordinate has to be the same (notice, in fact, that
the derivative of the hamiltonian with respect to $I$
cannot vanish, at $\m=0$ and  at the $\f$ value that we have chosen).

We first determine $u^+(s,\m)$ and $u^-(s',\m)$ so that the energy
constraint is satisfied: that this is possible follows from the non
degeneracy hypothesis \equ(2.7) (see also the last of \equ(5.23)) and from
the condition \equ(5.85). The precise argument is basically a repetition of
the argument used to deduce lemma 1 from lemma 1', see \equ(5.83) $\div$
\equ(5.85) and we shall not repeat it here.
We then substitute $u=u^+$ and $u'=u^-$ in the first of
\equ(8.3) and observe that $u^\pm(\cdot,0)=0$.

Hence we are interested in the jacobian matrix of the first of \equ(8.3)
at the non-degenerate solution point $s'=s$, $u=u'=0$.
Here, if the diffusion curve
$\LL$ is open for direct diffusion (as we suppose throughout this
section), the above \equ(8.3) has the solution $\aa'_s=\aa_s+O(\m)$,
($\f = \bar \f$), described in the definition following \equ(4.9).  In
fact by \equ(4.9) at $\aa=\aa_s$, $u=u'=0$:
%
$$\dpr_\aa (M^+_{f}-M^-_{f})\equiv\dpr_\aa M_f(\aa;s,0)=\V0\Eq(8.4)$$
%
and by \equ(4.10) the jacobian of \equ(8.3) at
$\aa_s$, $s=s'$, $u=u'=0$,  is:
%
$$\m\,\dpr_{\aa\aa}(M^+_{f}-M^-_{f})
+O(\m^2)\equiv \m\,\dpr_{\aa\aa} M_f(\aa_s;s,0)+O(\m^2)\Eq(8.5)$$
%
so that by the implicit function theorem, for $\m$ small and $\f = \bar\f$,
we have the solution $\aa_s'$ of \equ(8.3) when $s=s'$.

Again, the implicit function theorem implies, giving up control of the
values of the constants (for simplicity rather than by necessity), that
there is a constant $G_1>0$ such that for:
%
$$|s-s'|<G_1\m^2\Eq(8.6)$$
%
and for $\m$ small enough, the equation \equ(8.3) admits a non degenerate
solution $\aa(s,s')=$ $\aa_s'+O(\m)$. Here we do not find how the
constant $G_1$ depends on dimensionless quantities but, of course, this
could be done if necessary.

Hence if $s,s'\in\Si_\m$ and verify \equ(8.6) there is a heteroclinic
intersection $H_{ss'}$ between the whiskers $W_{stable}(s)$ and
$W_{unstable}(s')$ at energy level $E$.

As an application we consider the case in which $f$ is a trigonometric
polynomial of degree $N_f$ and we suppose that the path $\LL$ is contained
in a region free of resonances (see definition following
\equ(7.1)) to order $p=2c$ for the perturbing function $f$, where $c$
is the constant appearing in lemma 1, \S5, see \equ(5.4) and the comment
after lemma 3 of \S 7.

Then  by \equ(7.3) we can find a sequence
$s_0=\s_1<\ldots<\s_N=s_1$ with:
%
$$
|\s_{i+1}-\s_i|<G_1|\m|^2,\qquad i=1,\ldots,N-1\qquad {\rm and}\
\s_i\in\Si_\m, \Eq(8.7)$$
%
provided $|\m|$ is small enough, \ie provided $|\m|<\m^{*'}$ so that the
results of \S5,\S6,\S7 can be used to infer the existence of heteroclinic
intersections: it appears that $N< G_2|\m|^{-2}$, with $G_2>0$ conveniently
chosen.

Therefore for each $i=1,\ldots,N-1$ we have an invariant torus
$\TT_i\equiv\TT(\s_i)$ with two whiskers $W_{stable}^i$ and
$W_{unstable}^i$ and for $i=1,\ldots,N-1$ we consider the heteroclinic
intersections of $W_{unstable}^i$ with $W_{stable}^{i+1}$. Such
intersection contains a curve: it is an orbit ``spiraling" onto $\TT_i$ as
$t \rightarrow -\infty$ and onto $\TT_{i+1}$ as $t \rightarrow +\infty$. On
each of such orbits we fix a unique point:
%
$$H_i\in W_{unstable}^{i}\cap W_{stable}^{i+1} \Eq(8.8)$$
%
with a $\f$ coordinate prefixed and equal to $\bar \f$ (independently of
$i$) as discussed in the previous sections (in particular $\bar \f$ is
distinct from the equilibrium positions on the separatrix and for the
standard pendulum \equ(2.2) it is $\bar\f=\p$).

The motion on the tori is quasi periodic with $l-1$ frequencies and, by
lemmata 1,3, we can suppose that the frequencies of such quasi periodic
motions  have a non resonance constant $C(\s_i)$ verifying \equ(5.81):
%
$$C(\s_i)<G_3 |\m|^{-G_4}\equiv C_\m\Eq(8.9)$$
%
(one can take $G_4=(p+1)/7$ by the discussion of \S7,\S5).

We imagine to draw around each of the tori $\TT_i$ a small vicinity
$U_i$ of radius $\hat r$, $i$ independent but so small that inside it
the tori and their whiskers can be described by parametric equations,
analytic in some standard coordinates $(p,\AA',q,\V\psi)$ with:
$|\AA'-\AA'_i(0)| < \hat r$, $|p|<\hat
r,|q|<\hat r,\V\psi\in T^{l-1}$, where:
%
$$\AA'_i(J)\equiv
\AA^\io(s_i,u(s_i,J,\m),J,\m)
\Eq(8.10)$$
where (consistently with the use of the
symbol in \S5 we call $\AA^\io(s,u,J,\m)$ the solution to \equ(5.14) with
$h_0$ replaced by $h_\io$: see also the comment following \equ(5.69)
and \equ(8.16) below).

In this section we shall refer to such coordinates as to ``normal
coordinates". We suppose $\hat r$ so small that the heteroclinic points
$H_j$ are outside the sets $U_i$.  We shall proceed in a asymmetric
fashion, treating differently the stable and the unstable directions (but
we could, everywhere below, interchange their roles).  The following
analysis can be followed rather easily if on tries to draw a picture (which
after some thought becomes not too hard) of the various geometrical
concepts that we need and introduce below.

For $i=1,\ldots,N-1$ we fix in $U_i$ a point $E^u_i\in W^i_{unstable}$
which is on the heteroclinic orbit of $H_i$ and with normal coordinates
given by:
%
$$E^u_i\in W^i_{unstable},\qquad E^u_i=(0,\AA'_i(0),\hat r/2,\V\ps_i),
\quad i=1,\ldots,N-1\Eq(8.11)$$
%

Here $\V\psi_i$ is uniquely determined: just evolve with the solution
of the equations of motion, $t\to S_tH_i$, the datum $H_i$ backwards in
time until its $q$-coordinate becomes (meaningful and) equal to $\hat r/2$:
%
$$E^u_i=S_{-T^u_i}H_i\Eq(8.12)$$
%
The times $T^u_i$ are all bounded by some $T'$ which is
$i$-independent, which can be estimated at small enough $\m$ only in
terms of $\hat r$ and of the free part of the hamiltonian: $T^u_i \leq
T' \equiv G_5$.

We now define the surface element
$\tilde \D^s_i\equiv\bigl(W^{i+1}_{stable}\cap M_{\bar r}
(H_i)\bigr)^*$ where $M_{\bar r}(H_i)$ is the ball of radius $\bar r$
around $H_i$ and the $*$ signifies that:
\item{1) } we consider the connected
component of the intersection containing the center of $M_{\bar r}$
($\bar r$, at the moment, is rather  arbitrary,
for instance take it so that $M_{\bar
r}(H_i)\cap U_i=\not0$ and $\bar r<\hat r$);
\item{2) } inside the connected component we select the points with $\f$
coordinate equal to $\bar \f $ (\ie equal to the $\f$ coordinate of the
heteroclinic point).

\noindent
The set $\tilde\D^s_i$ is a $(l-1)-$ dimensional regular submanifold
of the $2(l-1)-$ dimensional manifold $\tilde M_i$ obtained by
intersecting  $M_{\bar r}$ with the energy level $E$ ($E$ being the energy
of the diffusion curve and, therefore, of the whiskers) and with the points
with $\f = \bar \f$:
%
$$\tilde M_i = M_{\bar r} \cap \{H=E\} \cap \{\f = \bar \f \}.
\Eq(8.13)$$
%
The same is true for the analogous surface element
$\tilde\D^u_i \equiv (W^i_{unstable} \cap M_{\bar r})^*$. Moreover,
the nondegeneracy condition \equ(4.10), \ie [see \equ(8.1), \equ(8.4),
\equ(8.5)]:
%
$$
\det \dpr _\aa (\AA^i_{unstable} - \AA^{i+1}_{stable})
|_{(\aa,\f)=(\aa_i,\bar \f)} \neq 0,
\Eq(8.14)$$
%
(where $\aa_i \equiv \aa_{\s_i,\s_{i+1}}(\bar \f,\m)$
is the locally unique nondegenerate
solution of $\AA^i_{unstable} = \AA^{i+1}_{stable}$ whose existence has
been proved in \S7), implies
that $\tilde\D_i^s$ and $\tilde\D_i^u$ {\it intersect transversally}
at $H_i$. This means that any pair of tangents to
the two surfaces form an angle bounded away from $0$.

Note also that the determinants in \equ(8.14) are $O(\m)$ so that also
the angle between $\tilde \D_i^s$ and $\tilde \D_i^u$ (\ie the smallest
angle between corresponding tangent vectors) is bounded below by a
quantity of the same order.

We transport the above two surface elements inside the region $U_i$ by
using the hamiltonian flow $S_t$, solving the Hamilton equations,
as follows.

Consider a $2(l-1)$ neighborhood, $M_i$, of $E_i^u$, obtained by
considering points with $q-$coordinate equal to $\hat r/2$ and having
energy equal to $E$.  By taking $\bar r$ small enough we can construct
a (smooth) diffeomorphism, $F_i$, of $\tilde M_i$ into $M_i$ as
follows.  For each $x \in \tilde M_i$ we can find a (smooth) real
function $\t=\t(x)$ so that $S_{-(T_i^u+\t)}(x)$ has $q-$coordinate
exactly equal to $\hat r/2$ (in particular: $\t(H_i) = 0$) and then we
set $F_i(x) = S_{-(T_i^u+\t)}(x)$.

Hence we can define $\D_i^s \equiv F_i \tilde \D_i^s$, $\D_i^u \equiv F_i
\tilde \D_i^u$ and observe that, since transversality is preserved
under diffeomorphisms, $\D_i^s$ and $\D_i^u$ intersect transversally at
$E_i^u$ (with angle of order $\m$).

Furthermore $\D_i^s$ and $\D_i^u$ can be represented, in normal
coordinates, as regular {\it graphs} over the angles $\V\psi$'s for
$\V\psi$ varying in some open $(l-1)$-dimensional set, $D_i$, whose
size is independent of $\m$.  For simplicity (and without loss of
generality) we let $D_i$ be a $(l-1)$-ball around $\V\psi_i$: $D_i
\equiv \{ \V\psi \in T^{l-1}: |\V\psi - \V\psi_i|< \d \}$ for some
positive $\d$.  Then the above construction and lemma 1 of \S5 imply
that $\D_i^u$ is simply
$\{(p,\AA',q,\V\ps)=(0,\AA'(0),\hat r/2,\V\ps_i) | \V\psi \in D_i \}$,
while $\D_i^s$ will have the form:
%
$$\{(\AA',\V\psi,p,q)=(\V{{\bar A}}_i
(\V\psi),\V\psi, p_i(\V\psi),\hat r/2)|\,\V\psi \in D_i \}\Eq(8.15)$$
%
for suitable (smooth) functions $\V{{\bar A}}_i$ and $p_i$ $\d-$close,
respectively, to $\AA_i'(0)$ and $0$.

Note that, since the energy $E$ is fixed, the $p_i(\V\ps)$ is actually
computable in terms of $\V\psi$ and $\bar A_i$; in fact the hamiltonian
$H$, in normal coordinates, takes the form:
%
$$H_\infty \equiv h_\infty(\AA',pq,\m) + f_\infty(\AA',\V\psi,p,q,\m)
\Eq(8.16)$$
%
with $h_\infty - h_0$ and $f_\infty$ of $O(\m)$ and with $f_\infty$
vanishing, together with all its derivatives, when $\AA' = \AA'_i(pq)$.
This is in fact the content of the results of \S5, see lemma 2; therefore
(see \equ(2.7) and use $J=pq,\, r=\hat r/2$):
%
$$\dpr _p H_\infty|_{(\AA'_i(0),\V\psi_i,0,\hat r/2)}
= {\hat r\over2}\dpr_J h_0 + O(\m) > 0, \Eq(8.17)$$
%
which, by the implicit function theorem, allows us to express $p$ in
terms of $\AA=\V{{\bar A}}(\V\ps)$, provided $\d$ is small enough.

In fact, recalling that on $\D^s_1$ the energy is fixed, we realize that by
taking the $\V\ps$-gradient of the relation
$H_\io(\AA_1(\V\ps),\V\ps,p_i(\V\ps),\hat r/2)\equiv E$ and evaluating the
result at the center $\V\ps_i$ (see \equ(8.16) and the comments after it)
one obtains: $\dpr_{\V\ps} p_i(\V\ps_1)=2(\hat r \dpr_J
h_\io)^{-1}\dpr_{\V\ps}\V{{\bar A}}_1\cdot\oo_1$ with
$\oo_1=\oo_s(1+u)\ne\V0$, $(\dpr_J h_\io)^{-1}=(1+u')g_{su}\ne\io$, see
\equ(5.6),\equ(5.21); thus the vector $\dpr_{\V\ps} p_i(\V\ps_1)\ne0$ and
its length is of size $O(\m)$, see \equ(8.18).

In the normal coordinates, transversality of $\D_i^s$ and $\D_i^u$ reads
simply:
%
$$\det \dpr_{\V\psi} \V{{\bar A}}_i |_{\V\psi_i} \ne 0\Eq(8.18)$$
%
and since such a determinant is of $O(\m)$ one sees that there exists a
constant $G_6>0$ such that the set $\{p=$ value determined by the energy
conservation; $\V{{\bar A}}_i(\V\ps),\, q=0,\,\V\ps\in D_i\}$ contains a
$(l-1)$- ball of radius which is not too small, namely $G_6 |\m|$, around
$\V{{\bar A}}_i (\V\psi_i) = \AA'_i(0)$.  This property is important in the
following construction.

Let $\e^i_{||}>\e^i_\perp>0$ and let $\x_i\in\D^s_i$ be a point with
$\V\ps$-coordinate equal to some $\V\ch_{i},\,|\V\ch_{i}-\V\ps_i|<\d/ 2$
and $p$-coordinate $p_{i}=p(\V\ch_i)> 2 \hat r\e^i_\perp$.  Let $B_i$
be the set:
%
$$B_i=\Bigl\{|\V\ps-\V\ch_{i}|<\e^i_\parallel,\,|q-{\hat r\over2}|<
\e^i_\perp {\hat r\over2},\ |p-p_{i}|<\e^i_\perp\hat r,\,|\AA-\V{{\bar A}}_i
(\V\ch_{i})|<\hat r^2\e^i_\perp\Bigr\}\EQ(8.19)$$
%

It is clear that if $\e^i_\parallel \le \d/C_1$, for $C_1$ large
enough, and $\e^i_\perp$ is small enough $B_i\subset U_i\cap S_{
-T^u_i}\,M_{\bar r}(H_i)$ and in a suitable time, denoted $T^u_i+T^s_i$ and
bounded uniformly in $i$, {\it it evolves into a set containing the point}
$\x'_i\=(\AA'_{i+1}(0),\V\ch'_i,\hat r/2,0)\in(W^{i+1}_s\cap U_{i+1})^{cc}$
(where ${}^{cc}\equiv$ connected component) {\it as well as a set}:
%
$$B'_i=\Bigl\{|\V\ps-\V\ch_{i}'|<{\e^i_\parallel\over C_2},\,|q|< \e^i_\perp
\hat r/2C_2,\,|p-{\hat r\over2}|<{\e^i_\perp\hat r\over C_2},\,
|\AA-\AA_{i+1}'(0)| <{\hat r^2\e^i_\perp \over C_2 }\Bigr\}\EQ(8.20)$$
%
where $C_2>1$ is a suitable constant of $O(1)$ (more precisely of order
$O(\exp g_i (T^u_i+T^s_i))$, uniformly bounded in $i$).

All that the latter statement is saying is that the flow $S_t$ takes a
finite time to carry a point on the heteroclinic orbit (or close to it)
and at distance $\sim\hat r$ from the torus $\TT_i$ to a point
close within the same distance to the torus $\TT_{i+1}$. During such finite
time nothing bad can really happen; all expansions and contractions in
phase space being bounded by a suitably large constant (basically depending
only on the size of $\hat r$).

We take $\e^1_\parallel=\d/C_1,\,\e^1_\perp\le G_7\m$ and suppose $\m$
small enough: this guarantees that not only $B_1$ has the above mentioned
inclusion property but also that as $\V\ps$ varies around $\V\ps_1$ then
$p_1(\V\ps)$ becomes different from $0$ and spans an interval of $O(\m)$ by
\equ(8.17),\equ(8.18) (thus implying the existence of the point $\V\ch_1$
with the above properties).

Having constructed $B_i$ and hence $B'_i$ we consider the evolution of the
set $B'_i$: we shall see that it evolves in time and crosses $\D^s_{i+1}$
at a time $T_i$ that can be chosen so that its image still contains a set
$B_{i+1}$ around some point $\x_{i+1}\in
\D^s_{i+1}$ described by \equ(8.19) with a suitable $\V\ch_{i+1}$ and
with $\e^{i+1}_\perp,\,\e^i_\parallel$ such that:
%
$$\e^{i+1}_\perp=(C_3^{-1}\e^i_\perp)^a\quad\e^{i+1}_\parallel
=C_3^{-1}\e^i_\parallel,\quad T_i\le 2 g^{-1}\log(C_3/2\e^i_\perp)
\EQ(8.21)$$
%
for suitable constants $a,C_3>1$ (determined below).

To simplify the analysis we take $\e^1_\perp$ exponentially small with
respect to $\m$ otherwise the above expression for $T_i$ would be more
involved (see \equ(8.29)). Hence we define $\e_\perp$ by:
%
$$\e^1_\parallel \= {\d\over C_1},\qquad
\e^1_\perp \= {C_2\over 2} \exp \left[-\bar g \th\Bigl({C_2\over|\m|
\e^1_{||} }\Bigr)^{G_{11}}\right]\EQ(8.22)$$
%
where $C_2$ is the constant fixed above (cfr. \equ(8.20)), $\bar g$ is a
($i-$independent) upper bound on $g_i$ and $\th$ and $G_{11}$ are suitable
constants related to diophantine properties of the motion on the invariant
tori (see below).

Assuming \equ(8.21),\equ(8.22) it will follow that the time of drift
can be bounded by $T^*\le \sum_{i=1}^N(T_i+2T_0)$ if $T_0$ is a
$i$-independent bound on $T^u_i+T^s_i$.  Recalling that $N\le G_2\m^{-2}$,
we see that \equ(8.21) implies (given the choice of $\e_\parallel^1$ and
$\e^1_\perp$ in
\equ(8.22)):
%
$$T^*\le C_4 \th e^{C_5/\m^2}\EQ(8.23)$$
%
for $\m$ small enough and suitable $C_4,C_5$.

It remains to check the above recursion in \equ(8.21). Let $\V\ch_i$ be the
$\V\ps$-coordinate of $\x_i'$. In $B'_i$ we consider, for
$|q|<\e^i_\perp\hat r/2 C_2$ and $\m$ small enough the points
$z(q,\V\ps)=(\hat r/2,\AA'_{i+1}(q\hat r/2),q,\V\ps)$ with
$|\V\ps-\V\ch_i'|<\e^i_\parallel/C_2$: such points are indeed in $B'_i$
because the function $J\to \AA'_{i+1}(J)$ is differentiable with derivative
of order $\m$, and we see that, see \equ(8.20):
%
$$|\AA'_{i+1}(q\hat r/2)-\AA'_{i+1}(0)|< G_8|\m| q\hat r/2< \hat
r^2\e^i_\perp/ C_2\EQ(8.24)$$
%
if $C_2G_8 q|\m|\hat r^{-1}<\e^i_\perp$, \ie if $|\m|$ is small enough.

The evolution of $z$, which for all $q,\V\ps$ has energy $E$, is simply
(by lemma 1, \S5):
%
$$S_T\,z(q,\V\ps)=( q e^{g_i T},\,\AA'_{i+1}(q\hat r/2),\,
\hat r e^{-g_i T}/2,\,\V\ps+\oo_i T)\EQ(8.25)$$
%
with $\o_i,g_i$  depending on $\m$ and on the product $q\hat r/2$, see
\equ(5.6). Therefore we can define $T(q)$ by:
%
$$q e^{g_i T(q)}=\hat r /2\EQ(8.26)$$
%
and $T(q)\tende{q\to0}\io$.

Let $T_\e$ be the time necessary in order that a trajectory on the torus
$T^{l-1}$ fills, running quasi periodically with velocity $\oo_i$, the
torus within a distance $\e/2$, \ie such that no point of $T^{l-1}$ has
distance $\ge \e/2$ from the set $\{\oo_i t|\,0\le t\le T_\e\}$.
Let $\h(\e)\=e^{-\bar g T_\e}$ if $\bar g,g$ are such that $\bar g\ge g_i
\ge g>0$ for all $i=1,\ldots,N-1$.

Call $q_{\max}\=\hat r \e^i_\perp/C_2$ and $q_{\min}\=q_{\max}
\exp(-g_i T_{\e^i_\parallel/C_2})$ so that $q_{\min}\ge q_{\max}
\h(\e^i_\parallel/C_2)$ and $T(q_{\min})-T(q_{\max})=T_{\e^i_\parallel
/C_2}$.

Then it is clear that as $q$ varies between $q_{\max}$
and $q_{\min}$ there is a value $\bar q$ such that $T(\bar q)$ verifies:
%
$$|\V\ch_i^{\prime}+\oo_i T(\bar q)-\V\ps_{i+1}|<{\e^i_\parallel\over 2C_2},
\qquad T(\bar q)\le T(q_{\max})+T_{\e^i_\parallel/C_2}\EQ(8.27)$$
%

>From the theory of quasi periodic motions it follows that $T_\e$ is
bounded above in terms of the non resonance constants $C_0,\t$ of the
frequencies $\oo_i$, see \equ(3.1), by $G'_9C_0\e^{-G_9}$ for some
$G'_9,G_9>0$ (one can take $G_9=\t+2$). And in our application it is, see
\equ(8.9), $C_0\le G_3|\m|^{-G_4}$, so that:
%
$$T_\e\le \th\, (\e|\m|)^{-G_{11}}\=\bar T(\e)\EQ(8.28)$$
%
for suitable $\th,G_{11}>0$.

Hence \equ(8.27) implies that:
%
$$T(\bar q)\le \th\,[ C_2(\m\e_\parallel^i)^{-1}]^{G_{11}}+
g^{-1}\log [C_2(2 \e_\perp^i)^{-1}]\= T_i\EQ(8.29)$$
%

At this time the trajectory of $z(\bar q,\V\ch_i')$ is in a point $\bar
\x_{i+1}$ which has $q$-coordinate equal to $\hat r/2$ and $\AA$
coordinate still equal to the original value, while the $p$ coordinate
is:
%
$$p={\hat r\over2} e^{-g_i T(\bar q)}
\ge {\hat r\over 2} e^{-\bar g \hat T_i}>0\EQ(8.30)$$
%

The set image of $B'_i$ has dimensions constant in the $\V\ps$ variables
(because the quasi periodic motion is rigid), while it contracts by at most
$e^{-\bar g \hat T_i}G_{12}$ in the $\AA$ variables (recall \equ(8.29) and
that $\bar g$ is an upper bound for the expansion rates), for some $G_{12}$
that we suppose $>1$, and also by at most $G_{12}e^{-\bar g \hat T_i}$ in
the $p$-variables.

The $\AA_{i+1}$-coordinate, being equal to its initial value, has distance
from the point $\V {{\bar A}}_{i+1} (\V\ps_{i+1})\={\AA}'_{i+1}(0)$ bounded
by \equ(8.24), \ie by $G_8|\m|\hat r\bar q/2\le G_8|\m|\hat r^2\e^i_\perp/
2C_2$.  This is {\it far less than} $|\m|\e^i_\parallel/2C_2$ if, as we shall
suppose (see below), $\e^i_\parallel\gg G_8\hat r^2\e^i_\perp$.  Hence we
can find a point $\V{{\tilde\ps}}$ such that:
%
$$\V {{\bar A}}_{i+1}
(\V{{\tilde\ps}})=\AA'_{i+1}(\bar q\hat r/2),\qquad
|\V{{\tilde\ps}}-\V\ps_{i+1}|<\e_\parallel^i/4C_2\EQ(8.31)$$
%
In fact such $\V\ps$ can be estimated by the implicit function theorem
by:
%
$$|\V {{\tilde \ps}}-\V\ps_{i+1}|<|\AA'_{i+1}(q\hat
r/2)-\AA'_{i+1}(0)|O(|\m|^{-1})\le {G_8|\m|\hat r^2\e^i_\perp\over 2 C_2
G_{13}|\m|}\EQ(8.32)$$
%
provided:
%
$$G_8{\hat r^2\e^i_\perp\over 2 G_{13}}< {\e^i_\parallel\over4C_2}
\EQ(8.33)$$
%

By energy conservation it must be that $\bar p_{i+1}(\V {{\tilde \ps}})
\=\exp[-g_i T(\bar q)] \hat r/2$ is such that the point
$(\V {{\bar A}}_{i+1}(\V{{\tilde\ps}}),\V{{\tilde\ps}},
\bar p_{i+1}(\V {{\tilde \ps}}),\hat r/2)$ will be
inside $W^u_{i+1}$ and it is such that the set $B_{i+1}$ defined by
\equ(8.19) with $i$ replaced by $i+1$, $\V\ch_{i+1}\=\V{{\tilde\ps}}$,
$p_{1\,i+1}\= \bar p_{i+1}(\V {{\tilde \ps}})$ and parameters:
%
$$\e_\perp^{i+1}\le {\e^i_\perp\,e^{-\bar g \hat T_i}\over2G_{12}},\qquad
\e^{i+1}_\parallel\le {\e^i_\parallel\over4C_2}\EQ(8.34)$$
%
is contained into $M_{i+1}$ and $S_{-T(\bar q)}(B_{i+1})\subset B_i'$.

It is now easy to check that letting $C_3\=C_2^2 G_{12}/2$,
$a\=C_3^{G_{11}}$ ($>3$) and defining the $\e^{i+1}_\cdot$'s as in
\equ(8.21),\equ(8.22),\equ(8.19) we see (inductively)
that for all $i$'s the second term in \equ(8.29) dominates over the first
and \equ(8.34) holds for all $i$'s together with $T_i\le \hat T_i \le 2
g^{-1} \log(C_3/2 \e^i_\perp)$ for $|\m|$ small enough; and, finally, also
\equ(8.33) will be readily verified for small $|\m|$.

Hence we conclude that drift takes place on a time scale $T^*$ {\it bounded
above} by \equ(8.23).

Clearly instead of trying to go systematically forward along the ladder of
whiskers we could have chosen an arbitrary up/down pattern and found the
existence of an initial datum which would have followed the prescribed
pattern (taking essentially the same time). In this way we can construct
a collection of $2^{N}$, $N$ being of the order of $|\m|^{-2}$, of sets of (very
small but positive) measure of initial data collected according to the
up/down pattern that they follow in their evolution along the ladder of
whiskers. If we give equal probability to data corresponding to each
pattern and choose one of them at random we shall see that it climbs
brownianly the ladder: \ie the existence of what we have called drift and of
diffusion are essentially the same phenomenon.

Finally, we remark that the analysis of this section {\it extends to the
case of forced systems} \equ(5.91). Recall from the discussion at the end
of \S 5, that one can construct whiskered tori $W^\pm(s,u)$ for \equ(5.91)
(with the right $\l$--frequency $\o$) for all $s\in \Si_\m$ and $u\in
[-\bar u,\bar u]$. The second of \equ(8.3) is trivially solved (as
\equ(5.91) is linear in the clock action $B$) and the analysis of the first
of \equ(8.3) is then carried out as discussed above (of course various
notions,
such as the diffusion path, have now to be properly reinterpreted: see end
of \S 5).
%
\def\atan{{\,\rm arctg\,}} \def\0{{\V0}}\def\pps{{\V\ps{\,}}}
\def\bul{{\bar u\e^{-2}}} \def\hB{{\hat B}}\def\xb{{\bar \x}}\def\ch{{\chi}}
\def\db{{\bar \d}}\def\zb{{\bar \z}}\def\NN{{\cal N}}
\vglue2.truecm

\penalty-200

{\bf\S9 A class of exactly soluble homoclines}

\penalty10000

\vskip0.5truecm\numsec=9\numfor=1

\penalty10000

After reduction to normal form, the motion of a quasi integrable
hamiltonian system near a resonance is described by a hamiltonian with
two perturbation parameters [N,BG]:
%
$$H=\oo\cdot\AA+{\AA^2\over2J}+{I^2\over2J_0}+v(\f)+f'(I,\AA,\f,\aa)
\Eq(9.1)$$
%
with:
%
$$\oo=\oo_0\h^{-1/2},\qquad f'=\m f(\f,\aa)\Eq(9.2)$$
%
where $\h>0,\m>0$, the function $J$ is analytic in $\h^{1/2}\V A,\,\h$; and
the functions $J_0,v,f_j$ are regular analytic in the variables
$\h^{1/2}I,\h^{1/2}\AA,\h$ while $\m$ is generally much smaller than $\h$
{\it but related} to it: \eg $\m=\h^Q$ with $Q$ large (in fact as large as
wished, at the expense of the complexity of the $f$'s). All functions are
analytic in the angles on which they depend.

If the model comes from the perturbation theory of a degenerate system,
like a celestial mechanics system or a forced system with clock variables
$(B,\l)\=(A_1,\a_1)$, the \equ(9.1) may contain some additional features
and some variations, see \S 11, \S 12.

In this section we  look at a more special class of systems which we
shall call the {\it even class}:
%
$$H_{even}=\oo\cdot\AA+h(\AA)+{I^2\over2J_0}+J_0g_0^2(\cos\f-1)
+ \m \sum_{\n} f_{\n}\cos(\nn\cdot\aa+n\f)\Eq(9.3)$$
%
where $J_0,g^2_0,f$ depend, in general, on $I,\AA$ and
$\n\=(n,\nn),\,\nn\ne\0$.

The hamiltonian \equ(9.3) is remarkable because the homoclinic points at
$\f=\p$ can be, often, computed exactly (namely they are at $\aa=\V0$,
modulo some convergence questions).

An important class of examples, with $l=2$, is:
%
$$H_{p}=
\o B+{I^2\over 2J_0}+J_0g_0^2(\cos\f-1)+\m f(\h^{1/2}I,\f,\l)\Eq(9.4)$$
%
with $J_0,g_0$ positive constants, $\o=\o_1/\h^{1/2}$. And for $l=3$:
%
$$H_c=\o B+\h {A^2\over 2J}+{I^2\over 2J_0}+J_0g_0^2(\cos\f-1)+
\m f(\h^{1/2}I,\h^{1/2}A,\f,\aa)\Eq(9.5)$$
%
with $J=J(\h^{1/2}A,\h)$, $J_0,g_0$ positive constants, $\o=\o_1/\h^{1/2}$,
and, typically, $A=O(\h^{-1/2})$.  The functions $f$ may also depend on
$\h,\m$ and they will be supposed analytic in $\h,\m$ near $0$ and in
$\aa$ for $|\Im \a_j|<\x$, $\h e^{|\Im\f|}<\x$, and averageless and
uniformly bounded in this domain (as $\h\to0$).  We make the
identification $\a_1\=\l$, $A_1\=B$, where $\l,B$ are the clock angle
and the clock action.

The model \equ(9.4) is a classical forced pendulum
(compare [La2, HMS, ACKR, DS, Ge]) and the model
\equ(9.5) is a system arising in some celestial mechanics problems, see
\S12.

The determination of the location of the homoclinic point is based on a
very simple {\it symmetry argument}. Therefore we present it in the simple
case of \equ(9.3) with $J,J_0,f_{\nn,m}$ action independent (\ie
constants) and all the phases are isochronous (quite simple, of course,
but useful for illustration purposes).

{\it We shall show that for all the models in the even class the homoclinic
equations at $\f=\p$ (\equ(6.47)) can be solved to all orders of
perturbation theory and have $\aa=\V0$ as solution.  Since the difference
between the two whiskers at $\f=\p$ and at any $\aa$ is analytic in
$\aa,\m$, by lemma 1', we see that $\aa=\V0$ is an actual solution of the
equation, as long as $\m$ is small enough.}

We begin by remarking that from the explicit form of $W(t)$, see appendix
9, the wronskian can be thought as formed with four $l\times l$ blocks ($l$
is arbitrary): the action action and the phase phase blocks are even
functions of $t$, while the other two blocks are odd functions of $t$. If
$p$ denotes an even function (of $t$) and $d$ denotes an odd function, we
can write symbolically $W(t)=\pmatrix{p&d\cr d&p\cr}$.  Similarly the
vectors $F^k$ and $X^k$ in \equ(6.8) can be thought as column vectors with
two $l$ dimensional columns.

Set right away $\aa=\V0$: then the blocks of $F^1$ are of parity
$\pmatrix{d\cr p\cr}$, thus we see that, regarding the initial data for
$X^1$ as (constant) even or odd functions:
%
$$X^1(t)=\pmatrix{p&d\cr d&p\cr}\Bigl[\pmatrix{p\cr 0\cr}+\ig_0^t
\pmatrix{\tilde p&\tilde d\cr \tilde d&\tilde p\cr}
\pmatrix{d'\cr p'\cr} d\t\Bigr]=\pmatrix{p''\cr d''\cr}\Eq(9.6)$$
%
and we see that the homoclinic conditions have the form, see \equ(6.43):
%
$$(p_\io-p_{-\io})+\ig_{-\io}^\io (pd+dp)=0\Eq(9.7)$$
%
because one can see that also the contribution from $t=\pm\io$ vanish as
$F^1$ is even in the phase components. Thus \equ(9.7) is automatically
satisfied. In this case one sees by direct calculation that
$p_\io-p_{-\io}=0$.

However in general functions $p_\io-p_{-\io}$ vanish, if constructed in the
way they are in the homoclinic equation. In fact, see \equ(6.43), they are
generated by an odd function $F(\oo t,t)$ which as $t\to\pm\io$ converges
exponentially to $F(\oo t,\pm\io)$ in the sense of our functions, so that
we can proceed to computing the harmonics of $F$ via the formula:
%
$$\eqalign{
F_\nn(+\io)=&\lim_{T\to+\io}T^{-1}\ig_0^T F(\oo t,t) e^{-i\oo\cdot\nn
t}dt=\cr
=&\lim_{T\to+\io}-T^{-1}\ig_0^T F(-\oo t,-t) e^{-i\oo\cdot\nn t}
dt=-F_{-\nn}(-\io)\cr}\Eq(9.8)$$
%
which immediately implies that the expression outside the integral in the
homoclinic equations for the $\AA$ variables, \ie $\sum_\nn(F_\nn(+\io)-
F_\nn(-\io))(\oo\cdot\nn)^{-1}$, denoted symbolically $p_\io-p_{-\io}$ in
\equ(9.7) does vanish.

The above formula implies also immediately that $F(\oo t,\io)$
$=-F(-\oo t,-\io)$ so that the integrand in \equ(6.43) is odd
and therefore the homoclinic condition holds.
%

We now assume inductively that the block structure of $F^k$ and $X^k$ is
respectively $d,p$ and $p,d$: if so, the homoclinic conditions will be
satisfied to all orders, for the same reason they were satisfied at $k=1$.
Clearly if the assumption is verified for $h=1,\ldots,k-1$ and if we prove
that as a consequence the $F^k$ has the correct parity property, also $X^k$
will have the correct parity property.

The parity of $F^k$ is immediately checked by inspection of \equ(6.29)
\ie:
%
$$F^k(\oo t,t)=
{ \dpr^k_\m\over k!}
E\,[\m \dpr f(X_0+\bar X^k)+\dpr H_0(X_0+\bar X^k)-
\Bigl(\dpr H_0(X_0)+\dpr^2 H_0(X_0)\bar X\Bigl)]|_{\m=0}
\Eq(9.9)$$
%
where $\bar X^k$ is the order $k$ truncation of $\bar X$: just observe that
the matrix $E$ inverts parities, that $\dpr b(X)$ is of type $\pmatrix{p\cr
d}$ for any $X=\pmatrix{p\cr d\cr}$ and any function $b$ even in $\aa$ and
$\f$, and finally that $\dpr^2H_0(X^0)$ $=\pmatrix{ p & d\cr d& p \cr}$ so
that $\dpr^2H_0 X = \pmatrix{p\cr d\cr}$.
%

One easily checks that the argument is neither affected by action
dependence of the various coefficients nor by lack of isochrony: in fact
the unperturbed motion is such that the action dependence on $t$ is even
and this is all one really needs: the only change is that not all the
matrix elements of $F^k$ relative to the angle block (\ie lower) are zero
(but they are still even).
%
\vglue2.truecm

\penalty-200

{\bf\S10 Homoclinic scattering. Large separatrix splitting}

\penalty10000

\vskip0.5truecm\numsec=10\numfor=1

\penalty10000

\def\MM{{\cal M}}\def\mm{{\V\m}}

The concept of {\it homoclinic scattering} arises naturally if one
compares the homoclinic splitting as seen in the original $\aa$
coordinates or in the {\it intrinsic} coordinate $\pps$ associated with
the whiskers normal forms of \S5.  The first part of this section is
devoted to it.

The second part will deal with the theory of the homoclinic splitting
in systems with more than $2$ degrees of freedom: the main point in the
analysis will be that when the unperturbed frequences depend on a
paramenter $\h$ and one of them becomes large (we shall say {\it fast})
as $\h\to0$ {\it it is not, in general, true that the homoclinic
splitting is smaller than any power in $\h$}, \ie the determinant of the
intersection matrix is not exponentially small with some inverse power
of $\h$ as $\h\to0$ (see below for a formal definition).
{\it Unless} all the frequences are fast.

Suppose that there is a homoclinic point at $\aa=\aa_0,\f=\p$.  We can
regard such point either as a point on the stable whisker, or as a
point on the unstable whisker.  In this way the point receives, from
the parametrization in lemma 1', the coordinates $p=p_0,q=0$,
$\V\ps=\V\ps_0^s$ or $p=0,q=q_0$, $\V\ps=\V\ps^u_0$, so that:
%
$$\eqalign{
Z_-(p_0,0,\V\ps^s_0)=&\p=Z_-(0,q_0,\V\ps^u_0)\cr
\V Z_\giu(p_0,0,\V\ps^s_0)=&\aa_0=\V Z_\giu(0,q_0,\V\ps^u_0)\cr}\Eq(10.1)$$
%
with the notations of \equ(6.23), \ie $Z$ is the r.h.s. of \equ(6.21);
of course all the above functions $(\cdot)_0$ are analytic functions of
$\m$.

We can consider the point on the stable manifold with coordinates
$p=p_{\V\ps},q=0$ with $p_{\V\ps}$ such that:
%
$$Z_-(p_{\V\ps},0,\V\ps^s_0+\V\ps)=\p\Eq(10.2)$$
%
We can also consider the point on the unstable manifold with coordinates
$q_{\V\ps{\,}'},\V\ps^u_0+\V\ps{\,}'$ such that:
%
$$Z_-(0,q_{\V\ps{\,}'},\V\ps^u_0+\pps{\,}')=\p\Eq(10.3)$$
%
and  lemma 1' guarantees that $p_\pps,q_\pps$ are analytic in $\pps,\m$ for
$\m$ small; note that $p_{\V 0}=p_0$, $q_{\V 0}=q_0$.

We shall establish a correspondence between $\V\ps$ and $\V\ps{\,}'$,
if they describe the same $\aa$, so
that:
%
$$\V Z_\giu(p_{\V\ps},0,\V\ps^s_0+\V\ps)=\V Z_\giu(0,q_{\pps{\,}'},\V\ps^u_0+
\pps{\,}')
\= \aa
\Eq(10.4)$$
%
and we denote it as:
%
$$\V\ps{\,}'=\V\ps+\V\s(\V\ps)\Eq(10.5)$$
%
calling the function $\V\s$ the {\it scattering phase shift};
here we think of $\ps,\ps'$ as functions of the independent variable
$\aa$. We also
introduce the same function regarded as a function of the common value
$\aa$ of the two sides of \equ(10.4):
%
$$\V\s[\aa]\=\V\s(\pps)\Eq(10.6)$$
%
if $\aa,\pps$ are coordinates of the same points.  The functions $\V\s$
are, by lemma 1', analytic on $T^{l-1}$ and in $\m$ for small $\m$, and
$\V\s(\0)=\0,\V\s[\aa_0]=\0$, by definition.

The scattering measures the degree of interaction between the pendulum
and the rotators. If the rotators are not isochronous the scattering is an
interesting homoclinic property: of course if the $j$-th rotator is a
clock (\ie it is isochronous) then $\s_j\=0$ (as it should because it
is a clock).

The equations for the homoclinic point can be written:
%
$$\eqalign{
Q_+(\aa)=&Z_+(p_{\V\ps},0,\V\ps_0^s+\V\ps)-Z_+(0,q_{\pps+\V\s(\pps)},
\V\ps_0^u+\V\ps+\V\s(\V\ps))=0\cr
\V Q_\su(\aa)=&Z_\su(p_{\V\ps},0,\V\ps_0^s+\V\ps)-Z_\su
(0,q_{\V\ps+\V\s(\V\ps)},
\V\ps_0^u+\V\ps+\V\s(\V\ps))=\0\cr}\EQ(10.7)$$
%
or also as:
%
$$\eqalign{
Q^0_+(\pps)=&Z_+(p_{\V\ps},0,\V\ps_0^s+\V\ps)-Z_+(0,q_{\pps},
\V\ps_0^u+\V\ps)=0\cr
\V Q^0_\su(\pps)=&\V Z_\su(p_{\V\ps},0,\V\ps_0^s+\V\ps)-\V Z_\su
(0,q_{\V\ps},
\V\ps_0^u+\V\ps)=\0\cr}\EQ(10.8)$$
%

Hence the {\it homoclinic splitting} can be measured as a
function of $\aa$ by
$\V Q(\aa)$ or as a function of $\pps$ by $\V Q^0(\pps)$.
Note that if $X^\s(t;\aa)$ denote the evolutions of the stable/unstable
motions with initial angle coordinates $(\p,\aa)$, it is
$X^s(0;\aa)-X^u(0;\aa)\=Q(\aa)$.

Therefore there are two interesting sets of homoclinic angles.  One set
is described by the $\pps$ derivatives of $\V Q^0_\su$ defined by
\equ(10.8) which we could call the {\it intrinsic} homoclinic angles.
The other set is described by the $\aa$ derivatives of the $\V
Q_\su$ at $\pps=\0$ or, respectively, at $\aa=\aa_0\=\aa_{hom}$ (the
latter derivatives are proportional, via the matrix $\dpr_\aa \pps$
(close to the identity), to the $\pps$ derivatives of $\V Q_\su$ in
\equ(10.7), phase shift included. The latter will be called the {\it
natural} homoclinic angles. Note that $Q^0_+,Q_+$ do not appear in the
definitions of the intersection matrix as they can be computed from the
$Q^0_\su,Q_\su$ by using the fact that the energy of the whiskers can be
supposed fixed (and equal for both).

The higher $\aa$ or $\pps$ derivatives of $\V Q_\su,\V Q^0_\su$ in $\aa$
or $\pps$, respectively, will define the {\it intersection tensors}.
The {\it homoclinic angles} will be the eigenvalues of the the {\it
intersection matrix}, \ie of the matrix of the first derivatives of the
$\V Q_\su$, or $\V Q^0_\su$.

More precisely the trigonometric tangent of the homoclinic angles is
proportional to the mentioned eigenvalues. The latter have the dimension
of an action and, therefore, a normalization constant with the dimension
of an inverse of an action has to be introduced to really define the
tangents of the angles. A natural normalization could be
$(J_0|\oo|)^{-1}$, see \equ(9.1).

\noindent{\it {\bf Definition}:
In the case of hamiltonians depending on a parameter
$\h$, \eg \equ(9.4),\equ(9.5), we shall say, that the homoclinic
splitting is ``smaller than any power" in the parameter $\h$ if there
exists $c>0$ such that when the perturbation constant $\m$ is $\m=\h^c$
the determinant of the intersection matrix tends to $0$ as $\h\to0$
faster than any power in $\h$.  Likewise in the same situation we say
that the homoclinic splitting is ``exponentially small" if the
determinant of the intersection matrix is asymptotically equal to the
determinant of its first order approximation (in $\m$) and the latter
tends to $0$ as an exponential of some inverse power of the parameter
$\h$.}
\vskip0.3pt
If $l=2$ the results of [Nei] imply, for the model
\equ(9.4), that the scattering phase shifts are smaller than any power,
together with their derivatives at the homoclinic point.  The same
holds if $l>2$ and all the angles rotate at fast speed (\ie with $\oo$
as in \equ(9.2)).  The intersection matrix and all its derivatives are,
under the same conditions, smaller than any power.

The relatioship between the two notions of homoclinic angles and their
connection with the scattering phase shifts is outlined in Appendix A11.

If $l>2$ with mixed fast and slow rotators, like \equ(9.4),\equ(9.5),
more analysis is necessary to understand such cases.

In the remaining part of this section we study some more detailed
results concerning:

\item{1) } the relation between the intersection tensors in the two systems of
coordinates ($\aa$ or $\pps$). In fact we shall prove
that for even hamiltonians all the odd derivatives of $\V\s[\aa]$ vanish at
the homoclinic point $\aa=\aa_0\=\0$. Hence {\it the homoclinic angles are
the same} in the natural and in the intrinsic coordinates (in general even
models).\hfill\break
%
The even derivatives do not vanish, in general, but they are bounded in
a useful way only if $l=2$ (or if $l>2$ and $\oo$ has non resonant and
large components, see \equ(9.2) and take $\h$ small).  In such cases,
as mentioned above, in fact the homoclinic splitting is smaller than
any power in $\h$ and therefore (by the discussion in appendix A11) on
can see that generically the scattering phase shifts turn out to be
also smaller than any power.

\item{2) } We consider even hamiltonians, depending on a parameter
$\h>0$, of the type \equ(9.3) or \equ(9.4),\equ(9.5) and the vector
$\oo$ will be supposed to have one of the two forms:
%
$$\oo=\oo_0\h^{-1/2},\quad
\oo=(\h^{-1/2}\o_1,\h^{1/2}\o_2,\ldots,\h^{1/2}\o_{l-1})\Eq(10.9)$$
%
and to verify a diophantine condition $|\oo\cdot\nn|^{-1}\le \h^{-b}
C_0|\nn|^\t$ for some $b,C_0,\t>0$:
in the first case take $b=1/2$ and $\oo_0$ diophantine with constants
$C_0,\t$; in the second case
let $(\o_2,...,\o_{l-1})$ verify a diophantine
condition with constants $C_0$ and $\t>l-2$, then given $a>1/2$,
it is easy to see that there exists
a set $\O_1$ $\subset [\lis \o,\io)$ with:
$$
{\rm meas.} \{ [\lis \o,\io)\backslash \O_1\}\le
{K\over C_0} \h^a \Big( \sum_{i=2}^{l-1} |\o_i/\lis \o|\Big)^{\t-l+2}
\Eq(10.10)$$
%
such  that if $\o_1/\h\in \O_1$ then the above estimate on $|\oo\cdot\nn|$
holds with $b\=a-1/2$.

We shall study the whiskers of an
invariant torus run quasi periodically with rotation spectrum $\oo$.
\hfill\break
The first of \equ(10.9) will be called the {\it fast rotation case} and the
second will be called the {\it mixed fast-slow case}.
The angles whose rotation velocity is $O(\h^{-1/2})$ will be called {\it
fast angles} or {\it fast modes}; the others {\it slow}. Thus in the
first of the cases in \equ(10.9) all the angles (or modes) are fast
while in the second case the first angle $\a_1$ is fast and the others
are slow.

\noindent{We} shall usually add the hypothesis that $f$ is a
trigonometric polynomial of degree $N$ in the $\aa$'s.  The
results a),b) below will be derived under the additional assumption that
$g_0,J_0,J,f$ depend on the parameter $\h$ and are unformly bounded
and holomorphic in:
%
$$\DD=\{|\h^{1/2}\AA|,\,|\h^{1/2} I|< r,\ |\Im\a_j|<\x_0,
|\h(\cos \f-1)|<\x_0\}\Eq(10.11)$$
%
while the results c),d) require the same properties for $g_0,J_0,J$ but
put on $f$ only the requirement of boundedness
and holomorphy in $\AA,I$ as above and for $|\Im \a_j|,|\Im \f|<\x_0$.

The above $\h$ dependence will be recorded by appending a subscript $\h$
to the hamiltonian as in $H_\h$.

The hamiltonians we are considering
are {\it even} in the sense of \S9. Therefore if $\oo$
is as above, the invariant tori constructed by using lemma 1' of \S5
will have, for $\m$ small enough (depending on $\h$, in the $\h$
dependent cases), whiskers homoclinic at $\f=\p,\aa=\0$.

The following theorem summarizes our main results about the {\it homoclinic
splitting for even models}:

\vskip3.pt
\noindent{\it {\bf Theorem 3}:
a) The odd derivatives of the scattering phase shifts
vanish at the homoclinic point. Hence the homoclinic angles will be the
same in both systems of coordinates (hence smaller than any power in the fast
rotation cases, \ie if $l=2$ or if $l>2$ and $\oo$ is given by the first of
\equ(10.9)). This shows that the difference between the two notions of
splitting of the whiskers (at the homoclinic point) is a {\it higher order}
effect.

b) In the fast rotation cases, \ie if $l=2$ or if $l>2$ and $\oo$ is given
by the first of \equ(10.9) all the even derivatives of the phase shifts
and the odd derivatives of the homoclinic splitting
are smaller than any power.

c) The jacobian determinant of the derivatives of the scattering phase
shifts are not, in general, smaller than any power in the mixed cases,
(\ie if $l>2$ and $\oo$ is given by the second of \equ(10.9)).  The
same can be said of the jacobian determinant of the derivatives with
respect to $\aa$ or to $\pps$ of the homoclinic intersection tensors.

d) If $l>2$ the second order value of the
determinant of the intersection matrix is not
smaller than any power
as $\h\to0$, in general, for the mixed rotation cases.}

\vskip3.pt
\noindent{\bf Remark}:
Hence one should not be led erroneously to believe that the
homoclinic splitting is, as a rule, exponentially small when there is
one or more rapidly rotating angle (unless all of them do rotate at
fast speed).  This is particularly striking in the case d).
\vskip.3truecm

Here we prove a); the part of the statement b) concerning the
connection between the homoclinic splitting and the homoclinic
scattering is briefly discussed in appendix A11 while the statement
about the size smaller than any power is not analyzed here as it is
well known ([N], [Nei]) and we do not really need it; the proof of
c),d) is an explicit check and, to set an example, the calculation is
performed in appendix A13 for the statement d).
\vskip3.pt

\noindent{\bf Proof}:
We shall take $J,J_0,g_0>0$ constants and $f$ depending
only on $\aa,\f$.  Most arguments being based on symmetry properties,
the general case is identical.  We make the above simplifying
assumptions only to have a lighter notation and to exhibit the essence
of the argument (as we did in the analogous situation in \S9).

The wronskian matrix for such a case is simply related to \equ(A9.8):
%
$$W(t)=\pmatrix{w_{11}(t)&0&w_{12}(t)&0\cr
0&1&0&0\cr
w_{21}(t)&0&w_{22}(t)&0\cr
0&J^{-1} t&0&1\cr}
\ ,\quad
W(t)^{-1}=\pmatrix{w_{22}(t)&0&-w_{12}(t)&0\cr
0&1&0&0\cr
-w_{21}(t)&0&w_{11}(t)&0\cr
0&-J^{-1} t&0&1\cr}
\Eq(10.12)$$
%
where $w_{ij},\,i,j=1,2$, is the matrix in \equ(A9.8).  And we shall
write, if $g\=g_0$:
%
$$\eqalign{
w_{11}(t)=&{c'}(x^{-1}+x)/2+C^{11}(x)+g\,\s t\,\bar C^{11}(x)\cr
w_{21}(t)=&{c}\s (x^{-1}-x)/2+\s \,g\, \s t\,\bar
C^{21}(x)\=c\,\s \ (x^{-1}-x)/2+w^0_{21}(t)\cr
w_{12}(t)= &\s\,C^{12}(x),\qquad w_{22}(t)= C^{22}(x)\cr}\Eq(10.13)$$
%
where $\s\=\s_t\=
\sign(\Re t)$, $x=e^{-g\s t}$, and $c,c'$ are constants and the $C$
functions are analytic in $x$ at $x=0$. The radius of convergence of the
series defining $C^{ij},\bar C^{ij}$ is $1$, but all the above functions of
$x$ can be perfectly continued beyond, as their singularities are poles at
$x=\pm i$.

As function of $t$ the $\s^{i+j}C^{ij},\s^{i+j} \bar C^{ij}$ are
holomorphic in $t$ with poles, at most double, at $i(2n+1)\p/2g_0$,
where $n$ is an integer.

We shall consider functions of $t$ which can be represented as:
%
$$M(t)=\sum_{j=0}^s{(\s t g)^j\over j!} M_j^\s(x,\oo t)
\ ,\quad  x\=e^{-\s gt}\ ,\quad \s\=\sign(\Re t)
\Eq(10.14)$$
%
with $s<\io$, $M_j^\s(x,\pps)$ holomorphic,
{\it at $\s$ fixed equal to $+$ or $-$},
in the $x$-plane in a strip
$|\Im x|<1$ except, possibly, for a polar singularity at $x=0$.
We restrict also $M_j$ to be trigonometric polynomials in the $\pps$
variables. We call $\MM$ such class of functions. We call $\MM_0$ the
class obtained by requiring that no $M_j^\s(x,\pps)$ in  \equ(10.14) is
$\pps$ {\it and} $x$ independent: \ie we ``quotient" $\MM$ with respect to
polynomials in $t$.

Note that a function $M(t)$ can admit at most one representation like
\equ(10.14) with the above mentioned analyticity properties: \ie given
$M(t)$ one can compute $M_j(x,\pps,\s)$.
In fact, assuming for simplicity that $x\to M_j(x,\pps)$
are analytic at $x=0$, then:
$$
\lim_{T\to\io} {s!\over (gT)^s} \ig_0^T M^\s(t) e^{-i \oo\cdot \nn t}
d(gt)  = M_{s,o\nn}^\s
\Eq(10.15)$$
%
where we write $M_j^\s\=\sum_{\nn,k\ge 0} M_{j,k\nn}^\s x^k e^{i \pps
\cdot \nn}$; to compute $M^\s_{s,1\nn}$ substitute in the above equation
$M^\s(t)$ with $(M_\s(t)-\sum_\nn M_{s,o\nn} e^{i \oo\cdot\nn t})e^{\s
gt}$; and so on; having computed $M^\s_s(x,\pps)$ repeat the procedure with
$s$ replaced by $(s-1)$ to $M_{s-1}(t)\=$
$M^\s(t)- {(gt)^s\over s!} M^\s_s $.

We define a linear operation $\II$ on the functions $M\in \MM_0$ by
defining its action on the monomials:
%
$$M(t)={(g\s t)^h\over h!} x^k \s^\th e^{i\r\oo\cdot\nn t}\Eq(10.16)$$
%
with $h,k$ integers, $\th=0,1$, $\r=\pm1$ and
$gk \pm i\oo\cdot \nn\ne 0$:
%
$$\II M(t)=- g^{-1}  \s^{\th +1} x^k e^{i\r\oo\cdot\nn t}
\sum_{p=0}^h{ (g\s t)^{h-p}
\over(h-p)!}
{1 \over(k- i \r \s g^{-1} \oo\cdot\nn)^{p+1}}
\Eq(10.17)$$
%
Note that
the $\II$ is {\it not defined} on the polynomials of $t,\s$, \ie if
$k=0$ and $\oo\cdot\nn=0$ (so that no exponentials are present in the
monomial defining $M$).

The operation $\II$ yields,
{\it at fixed $\s$} a special primitive of $M$, in fact:
%
$$\dpr_t\II M\=M\Eq(10.18)$$
%
A few further features of $\II$ are the following:
\item{1) } if $M$ is odd in $t$ then $\II M$ is even; if $M$ is even
then $\II M$ is odd.
\item{2) } if $M$ is analytic in $t$ and odd then $\II M$  is analytic
and even.
\item{3) } if $M$ is analytic in $t$ and even then $\II M(t)$ can be
continued analytically from $t>0$ (or $t<0$) to a function $\II^+M(t)$
(or, respectively, to $\II^-M(t)$) defined for all $t$'s and
$\II^+M(t)-\II^+M(0)$ is odd (or, respectively, $\II^- M(t)-\II^-M(0)$
is odd). In general $\II^+M(t)\ne \II^-M(t)$ unless $\II^+ M(0)=0$ (or
$\II^-M(0)=0$). In the latter case $\II^+ M(0)=\II^-M(0)=0$.
\item{4) } the function $\II_R M(t)\=\ig_{\s\io}^te^{-Rg\s\t} M(\t)d\t$
is defined for $\Re R$ large enough and it admits an analytic
continuation to $\Re R<0$ and:
%
$$\II M(t)\=\II_0 M(t)
\Eq(10.19)$$
%
\item{5) } if $M$ is such that $M(t)\=M(\oo t,\s)$ for some $M(\pps,\s)$
defined on the torus, then:
%
$$\II M(t)=(\oo\cdot\V\dpr_\pps)^{-1}M(\oo t,\s)\Eq(10.20)$$
%
and $\II M$ is analytic if $M$ is analytic in $t$ and the functions $\II M$
and $M$ have the opposite parity, if $M$ has well defined parity in $t$.
\item{7) } if $M$ depends on other $l-1$ dimensional angles $\aa$ as a
linear combination of monomials:
%
$${(g\s t)^h\over h!} x^k\s^\th\cos_{\th'}(\oo\cdot\nn
t+\aa\cdot\mm)
\= {(g\s t)^h\over h!} x^k\s^\th
{(-1)^{[\th'/2]} \over 2 i^{\th'/2}} \sum_{\r=\pm1}
\r^{\th'} e^{i \r (\oo\cdot \nn t +\mm\cdot \aa)}
\Eq(10.21)$$
%
with $\th,\th'=0,1$ and $\cos_{\th'} y=\cos y$ if $\th'=0$ and
$\cos_{\th'} y=\sin y$ if $\th'=1$, the $\II M$ has the same form. We
shall say that $M$ is {\it time-angle even} if $\th+\th'=$ even for all
monomials of $M$. If, instead, $\th+\th'=$ odd for all monomials we say
that $M$ is {\it time-angle odd}.
It then follows that the time angle parities of $M$ and $\II M$ are
opposite (when either is well defined).

\item{8) } $\II$ does not change the trigonometric degree of $M$: \ie
if $M_j(t,\pps,\s)$ had a maximum trigonometric degree $N$ in
the $\pps$ variables also the functions representing $\II M$ will have
trigonometric degree $\le N$. And the operator $\II$ does not increase the
degree in $t$.

\item{9) } we extend the operation $\II$ to $\MM$ by setting $\II
t^n=t^{n+1}/(n+1)$: the above parity properties remain valid.  Property
4) holds for $\II_R F(t)-\II_R F(0)$ in general.  Property 8) changes
as the degree in $t$ of the ``non exponential" monomials of the form
$(g\s t)^h\s^\th$ is increased by $1$. {\it However, such a rather
arbitrary extension will play no role in the following, as in our
context $\II$ is applied only to functions in $\MM_0$}.

After the above remarks we make the inductive assumption that
$F^{h\s}(t,\aa)$ has action components $(+,\su)$, denoted symbolically
$d$, of odd time-angle parity in the above sense (different from the one
used in \S9) and angle components $(-,\giu)$, denoted $p$, of even time
angle parity. Opposite parity assumptions will be made for
$X^{h\s}(t,\aa)$. We shall write:
%
$$F^h=\pmatrix {d\cr p\cr},\quad X^h=\pmatrix{p\cr d\cr}\Eq(10.22)$$
%
dropping the label $\s$ from $F$ and $X$. In fact the main goal of the
above formalism is to treat simoultaneously the stable and the unstable
whiskers: for $t>0$ it is $\s=1$ and $F^h,X^h$ represent $F^{h+},X^{h+}$
while for $t<0$, $\s=-1$ and $F^h,X^h$ represent $F^{h-},X^{h-}$. Hence
we can symbolically write:
%
$$\eqalign{
F^h=&\sum\,\d\,x^k\,(g\s t)^{k'}\s^\th\cos_{\th'}(\oo\cdot\nn
t+\aa\cdot\mm)\cr
X^h=&\sum\,\x\,x^k\,(g\s t)^{k'}\s^\th\cos_{\th'}(\oo\cdot\nn
t+\aa\cdot\mm)\cr}\Eq(10.23)$$
%
with suitable $\s$-independent coefficients $\d,\x$ and $\th+\th'=$ even
for the $(+,\su)$ components and odd for the $(-,\giu)$ components in
the case of $X$, and with reversed parities in the case of $F$.

\noindent{\bf Remark:}
It will be useful to use also complex notation: in this case
the $(t,\aa)$ parities reflect into $(\s,\nn,\mm)$ parities. More precisely
if $\g$ is either $\d$ or $\xi$ and if $\l\=(\nn,\mm)$ we can rewrite the
r.h.s. of \equ(10.23) as:
$$
\sum i^{\th'} \hat \g^{\th'}_{\l,k,k'} x^k (\s gt)^{k'} \s^\th
e^{i \nn\cdot \oo t} e^{i \aa\cdot \mm}\ ,\quad  \l\=(\nn,\mm)
\Eq(10.24)$$
%
where $\hat \g\in {R}$ and $\hat \g_{-\l}^{\th'}= (-1)^{\th'} \hat
\g_\l^{\th'}$; here, as usual, we use the convention the the sum over
$\l$ runs over vectors with non--negative first component
(so as not to repeat identical terms).
>From \equ(10.24) it follow immediately the usual parity rules:
$p\cdot d= d$ and $p\cdot p=d \cdot d= p$; $p\=$ time/angle even,
$d\=$ time/anlge odd.

\vskip.3truecm
Having set the above definitions we deduce immediately from the wronskians
structure \equ(10.13), from \equ(6.15) and from the above property of $\II$
that $X^h$ will have the {\it opposite} structure to $F^h$ (\ie if
$F^h=\pmatrix{d\cr p\cr}$ then $X^h=\pmatrix{p\cr d}$). Explicit
expressions of $X$ in terms of the $\II$ operators can be found in
appendix A13, see \equ(A13.3)$\div$\equ(A13.5), for the cases
$J,J_0,g_0,f_\n$ constant.

The above remark and \equ(6.10)
imply that if $X^{h'}$ has the structure $\pmatrix{d\cr
p}$ for $h'<h$ then $F^h$ has $\pmatrix{d\cr p}$ structure. And since it is
obvious that $F^1$ has $\pmatrix{d\cr p\cr}$ structure, the \equ(10.22)
follows by induction.

To establish a connection between the above remarks and the scattering
theory (and to prove theorem 3) we consider the functions $X^\s(t,\aa)\=
X^\s(\0,\aa,t)$, see \equ(6.24), defined in \S6 and describing the $\s$
whiskers orbits with initial data at angles $(\p,\aa)$. We can write:
%
$$\eqalign{
X^s(t,\aa)=&Z(p(\aa) e^{-gt},0,\pps^s_\aa+\oo t)\cr
X^u(t,\aa)=&Z(0,q(\aa) e^{gt},\pps^u_\aa+\oo t)\cr}\Eq(10.25)$$
%
for suitable $p(\aa),q(\aa)$.
We write \equ(10.25), at $t=0$, for the $\giu$ components as:
%
$$\aa=\pps^s_\aa+\V\D^s_\aa,\qquad \aa=\pps^u_\aa+\V\D^u_\aa\Eq(10.26)$$
%
where $\pps^\s_\aa$ are the $\pps$ coordinates, see lemma 1', of the
point on the $\s$-whisker with angle coordinates $(\p,\aa)$. Then the
lemma 1' statement that :
%
$$\V Z_{\giu}(p,q,\pps)\=\pps+\V\z_\giu(p,q,\pps)\Eq(10.27)$$
%
with $\V\z_\giu$ analytic in its arguments (for $p,q$ small) and
\equ(10.26) imply that:
%
$$\V\D^s(\aa)=\V\z_\giu(p(\aa),0,\pps^s_\aa),\qquad
\D^u(\aa)=\V\z_\giu(0,q(\aa),\pps^u_\aa)\Eq(10.28)$$
%
Hence:
%
$$\eqalign{
\V X_\giu(t,\aa)=&\pps_\aa+\oo t+\V\z_\giu(p(\aa) e^{-gt},0,\pps_\aa+\oo
t)=\cr=&\aa+\oo t+\V\z_\giu(p(\aa) e^{-gt},0,\aa+\V\D^s(\aa)+\oo
t)-\V\z_\giu(p(\aa),0,\pps_\aa)\cr}\Eq(10.29)$$
%
and therefore we conclude that:
%
$$T^{-1}\ig_0^T (X^s_\giu(t,\aa)-\aa-\oo t) dt\tende{T\to\io}
-\V\z_\giu(p(\aa),0,\pps_\aa)+const\Eq(10.30)$$
%

The scattering phase shifts $\V\s[\aa]\=\pps^u_\aa-\pps^s_\aa
=\V\D^s_\aa-\V\D^u_\aa$, by \equ(10.26),\equ(10.28) will be:
%
$$\V\s[\aa]=\V\z_\giu(p(\aa),0,\pps_\aa)-\V\z_\giu(0,q(\aa),\pps_\aa)
\Eq(10.31)$$
%
and ``all it remains to do" is to find expressions for $\V\z_\giu$ via
\equ(10.30). Note that \equ(10.30) and \equ(10.31) are quite general and
could be used also for non even hamiltonians. But we keep concentrating
on the even case, for simplicity.

To find more concrete expressions we use \equ(10.23): if we set
$t=0,x=1$ we find in fact $\V\z_\giu(p(\aa),0,\pps^s_\aa)$ when $\s=+$ and
$\V\z_\giu(0,q(\aa),\pps^u_\aa)$ when $\s=-$ from the $\giu$ components of
$X$.

Hence we see that only the terms with $k'=0,\th=1,\th'=0$ can contribute
to $\V\s[\aa]$ as the time-angle parity must be odd (and as the
monomials in the expansion of $X$ have no discontinuity at $t=0$ if
$\th=0$). Therefore:
%
$$\V\s^h[\aa]=2\sum\,\x^h\,(\cos\aa\cdot\mm -1)\Eq(10.32)$$
%
where the $-1$ has been introduced recalling the convention that
$\V\s[\0]=\0$.

Hence in the even models the odd $\aa$-derivatives of the scattering phase
shifts vanish, at $\aa=\0$ (\ie at the symmetric homoclinic point), More
general expansions for the scattering phase shifts and for the splitting
are derived in appendix A13.

Although we have always referred to hamiltonians like \equ(9.3) with
$J,J_0,g_0,f_{\nn\, m}$ constants, we have only used parity properties
which remain unchanged if $J$ is allowed to depend on $\AA$ and if
$J_0,g_0,f_{\nn\,m}$ are allowed to depend on $I,\AA,1-\cos\f$.  The
only difference will be a more complicated wronskian, still with the
even time parity properties for its action-action or angle-angle matrix
elements and with odd parity for the action-angle and angle-action
elements..  The matrix elements will still have the property of being
expressible as power series in $x=e^{-g\s t}$ and $\s gt$ with $\s$
independent coefficients up to some parity fixing factors $\s$ as in
\equ(10.13), and $C^h$ will be expressed in terms of $F^h$ via the $\II$
operations through suitable extensions of the formulae in appendix A13.

This proves a) of the theorem 3 showing the coincidence of the
homoclinic angles in the intrinsic coordinates $\pps$ and in the natural
$\aa$ coordinates. Part b) is a simple corollary of the results in \S9,
(we allude to its check in Appendix A11).

The proof of part c),d) simply consists in exhibiting an explicit example
as we ``just" have to show that {\it in general} the homoclinic splitting is
large if $l\ge3$.

It emerges, from the example, that for the $l=2$ systems it is possible to
think that the homoclinic intersection tensors are all
exponentially small because in such cases a special property holds. Namely
that an expression like $\sum_i \nn_i\cdot\oo$ is either $0$ or it is
necessarily $\ge|\oo|$.

This property is no longer true if $l>2$ and we can obtain slow non zero
velocity $\oo\cdot\sum_i\nn_i$, much smaller than $\o_1$, even by combining
modes $\nn_i$ which have fast velocity $\oo\cdot\nn_i$ (unless of course
all the angles rotate at fast velocity).

It will be sufficient to show that, in a model, the second order
contribution to the first derivatives of the homoclinic splitting $\V
Q_\su(\aa)$ (with respect to $\aa$) define a matrix (called above the
{\it intersection matrix}) with determinant which is not exponentially
small. This means that it is not bounded by an exponential of an inverse
power of the parameter $\h$ in \equ(10.9), as $\h\to0$ at second order
(it is easy to see that if $\a_1$ is the fast angle then, to first
order, one still has exponentially small splitting at least in the
$\a_1$ direction).

The analogous analysis for the scattering phase shifts is essentially
identical and in appendix A13 we only derive the expression of the
second order phase shift without actually computing it.  In fact we
shall not really need, in the application analyzed in \S12, the
homoclinic angles in the intrinsic coordinates and, therefore, we shall
not really need the part of theorem 3 concerning the phase shifts.

As a final comment we point out that the inductive check of \equ(10.23)
yields a somewhat stronger result if one examines it more carefully. In
fact one can check, inductively, that the terms with $k=0$ have $k'=0$ as
well; and furthermore the terms with $k\ge1$ have $k'\le h-1$ for $F^h$ and
$k'\le h$ for $X^h$:
%
$$\eqalign{
F^{h\s}=& F^{h(\io)}(\oo t,\s)+\sum_{p=0}^{h-1}{(g\,\s t)^p\over p!}
e^{-g\,\s t} F^{h(p)}(e^{-g\,\s t},\oo t,\s)\cr
X^{h\s}=& X^{h(\io)}(\oo t,\s)+\sum_{p=0}^{h}{(g\,\s t)^p\over p!}
e^{-g\,\s t} X^{h(p)}(e^{-g\,\s t},\oo t,\s)\cr}\Eq(10.33)$$
%
and, of course, $\pps\to X^{h(\io)}(\pps,\pm)$ yield (different)
parametrizations of the invariant torus.
%
\vglue2.truecm

\penalty-200

{\bf\S11 Variable coefficients. Fast mode averaging}

\penalty10000

\vskip0.5truecm\numsec=11\numfor=1

\penalty10000

\def\2{{1\over2}}
\def\Im{{\,\rm Im\,}}\def\Re{{\,\rm
Re\,}}\def\bb{\V\b}\def\atan{{\,\rm arctg\,}}\def\md{{\h^{-1/2}}}
\def\pd{{\h^{1/2}}}
Let $l=3$ and consider a hamiltonian $H_\m\=H_\m(I,\AA,\f,\aa;\h)$
dependent on a small parameter  $\h$ having  the form:
%
$$\md\o B+h(\pd A)+{I^2\over 2J_0}+
J_0g_0^2(\cos\f-1)+\m \sum_{\n}\,f_{\n}\cos(\nn\cdot\aa+n\f)\Eq(11.1)$$
%
where $\AA\=(B,A)$, $\aa\=(\l,\a)$ are canonically conjugated variables;
$\n\=(\nn,n)$ is an integer vector; and where  $J\= (h'')^{-1}$,
$J_0,g_0,f_\n$ are not constants.  In fact we shall
allow $J_0,g_0,f_{\n}$ to be functions of $\h^{1/2}A,\h^{1/2}I, \h
(\cos\f -1)$. We shall call
$\bar J,\bar J_0,\bar g_0, \bar f_\n$ the values of such functions at zero
arguments, assuming them to be $\neq 0$.  This implies that when
$J_0,J,g_0$ are replaced by their values at $0$ in \equ(11.1) (\ie they
are given the barred values) then the theory of \S9 applies.
It is not restrictive to suppose
that $J$ is $I$ independent and that $J_0$ is $\f$ independent and we
shall do so.
{\it Here, the choice of the origin $\pd A=0$
is arbitrary and in fact, later,
we shall replace the center of the rescaled action variables
as an arbitrary point on the diffusion path} (of course $\pd I=0$
is instead fixed, being related to the unstable equilibrium of
the pendulum).

The $\f$-dependence of $J_0^{-1}I^2$ can be put together with the
$J_0g_0(\cos\f-1)$ part of the pendulum hamiltonian; and the $I$
dependence of $J$ can be removed by shifting the origin of the $I$
variables by a suitably chosen, $\AA$ dependent, quantity $G$: this can
be achieved (up to corrections in $(1-\cos\f)$ that can be included in
the $g_0$) by a canonical transformation generated by:
%
$$I'\f+A'\a-G(A'\h^{1/2})\sin\f\Eq(11.2)$$
%
and we can take $G(a)\=-\2\h^{1/2}a^2J_0(a,0)\dpr_i J^{-1}(a,i)|_{i=0}$.
Thus, we shall consider \equ(11.1) with $J,J_0$ functions of
$(\h^{1/2}A,\h^{1/2}I)$ and $g_0, f_\n$ functions of
$(\h^{1/2}A,\h^{1/2}I,\h(\cos \f -1))$. Such
%
functions will be supposed analytic in their arguments and
admitting holomorphic extensions ``by $\r_0$" in the variables
$\h^{1/2}A,\h^{1/2}I$ (for $I$ near $I=0$ and for $A$ near a real
interval $\D=\md[a_1,a_2]$) and ``by $\x_0$" in the angles.  The
functions will be supposed to admit upper bounds uniform in $\h$ as $\h\to0$
(this is only slightly more general than the assumption that they are
in fact fixed functions of $\pd I,\pd A$, which is what we really
need).  In fact we could, in most of what follows, permit a $\h$
dependence on the bounds of $f$ proportional to $\h^{-b}$ for some
$b\ge 0$: but we require boundedness to simplify the formulation of the
results, occasionally commenting on extensions of the latter type.
Uniformly and positive lower bounds on $|J|$, $|J_0|$, $|g_0|$,
$|\dpr_A h|$ will be supposed to hold, as well.

The $f$ will be supposed a trigonometric polynomial in the $\aa$ angles
with degree $\le N$ for some $N>0$. In fact if we want $I$-independence of
$J$ we see from \equ(11.2) that $f$ cannot, without loss of generality, be
considered a trigonometric polynomial in the $\f$ variables even if the
original $f$ in \equ(11.1) was such.

The above hamiltonian \equ(11.1) is taken as an example because of its
relevance for the applications of \S12: and essentially all the results
of \S10 extend to the cases of variable coefficients (in the above
sense).

The dependence on the action variables through their values scaled by a
small parameter $\h^{1/2}$ is natural. At least if one thinks that
\equ(11.1) arises from the change of variables $a=\h^{1/2}A,
i=\h^{1/2}I,b=\h^{1/2}B$ accompanied by a multiplication
of the hamiltonian by a factor $\h^{-1/2}$ (which is a transformation
leaving the Hamilton equations invariant) followed by a rescaling
$\bar t=\h^{-1/2}t$ of the time (which divides the hamiltonian by another
factor $\h^{1/2}$: here $t$ is the time for \equ(11.1) while $\bar t$
is the unscaled time for \equ(11.3) below)),
and starting from a hamiltonian $\bar H_\m$:
%
$$\bar\o_1 b+ \h h(a) +{i^2\over 2J_0}+\h  J_0g_0^2(\cos\f-1)+
\m\h\sum_{\nn,n}f_{\nn,n}\cos(\aa\cdot\nn+n\f) ,\quad \bar \o_1\=\o
\Eq(11.3)$$
%
with  $h$ analytic in $a$ (and $h''\neq 0$);
$J_0\neq 0$ analytic in $a,i$; $g_0^2\neq 0$ analytic in
$a,i,\h(\cos\f-1)$; and $f_{\nn,n}$ analytic in $a,i,\h(\cos \f -1)$
and vanishing if  $|\nn|+|n|>N$ for some $N>0$.

The \equ(11.3) is a natural form in which the hamiltonian appears after
the first basic approximations, in many Celestial Mechanics problems, as
the three body example discussed in \S12.

We call $\oo$ an {\it
admissible} velocity vector if $\o_1=\md\o$ and $\o_2\in\pd
[\bar\o,\tilde\o]$ varies in an interval covering the values taken
by $\dpr_A h(\pd A)$  as $A$ varies in the interval
$\D=\md(a_1,a_2)$ around
which $H$ is defined, see above.  The set $\D(C)$ of the $A$'s such
that $|\oo\cdot\nn|^{-1}< C|\nn|^2$ has relative measure $>(1-K\h^{-1/2}
C^{-1})$, for some suitable $K>0$ and $\h$ small;
(such a straightforward bound, obtained, as usual, by summing up
over all $\nn\neq 0$ the resonant intervals of length
$\sim (C \h |\nn|^2 \n_1)^{-1}$, could be improved by
taking into account that the centers of the above resonant intervals
have to be in $\D$: this observation leads to a relative measure
$> (1-K'\pd C^{-1})$ with any $C>\bar C \md$ for a suitable $\bar C$).

We fix $\oo=(\o_1,\o_2)$ admissible and verifying a diophantine
condition, $|\oo\cdot\nn|^{-1}<C|\nn|^2$, for some $C>0$ and we
consider the invariant torus constructed by lemma 1', \S5, with
rotation velocity $\oo$, (when existing).

A precise description of what we have in mind by saying ``constructed via
lemma 1'$\,$" is as follows.

We are in a situation considered already in the corollary
to lemma 1' described in \S5, see \equ(5.91) and following.  The
parameters in \equ(5.96) are, of course, $\h$ dependent in the present
case.  And we easily see that $\x_0$ (hence $\hat\x_0$), can be
taken $\h$ independent, while $E_0,\h_0,\G_0,\r_0,\tilde\e_0$
can be taken proportional to $\md,\h^{-1},1,\h^{-1/2},1$ respectively.

Then \equ(5.96) shows that we shall be able to construct a family of
invariant tori with rotation velocities $\oo$ and Lyapunov exponent
$g'$ with $\o_1\=\md\o$, with the second angular velocity given by
$\o_2=\dpr_A h(A)$ for $A$ in $\D(\m^{-1/7})$, \ie for a set
of $a$'s with relative measure $(1-K\h^{-1/2}\m^{1/7})$ if:
%
$$|\m|<\m^*\=B^*\Big[[(\h^{-1}\md\pd)^7(\md)^{14}\h]^7
(\h^{-1})\Big]^{-1}
<B^* \h^{92}
\Eq(11.4)$$
%
where $92$ is what comes out of a blind application of the general
results of \S5 (see \equ(5.96).
This constant can be greatly improved by taking into
account the special properties of our particular case.
Already by using more carefully the estimates of \S 5,
\ie using the weaker (but sufficient) conditions \equ(5.76) and \equ(5.85)
with $C=\h^{-(\d+1/2)}$ one would obtain a condition like
$|\m|<\bar B^* \h^{-(16+6\d)}$ with the  set $\D(\h^{-(\d+1/2)})$
having relative measure $>(1-K\h^\d)$.

However, for simplicity, we shall
use \equ(11.4).
If $f$ is
supposed to be bounded proportionally to
$\h^{-b}$,
for some $b\ge0$, instead
of being uniformly bounded, the results of the theorem change
by suitably increasing $92$ to a (linearly $b$--dependent) new constant.

The above invariant tori for the hamiltonian \equ(11.1) are run quasi
periodically with angular velocity
$\oo$ and Lyapunov exponent $g$ which have the form:
%
$$\o_1=\bar \o_1\h^{-1/2},\quad \o_2=\bar\o_2 \h^{1/2},\quad
g=g_0(1+\g'(\h))\Eq(11.5)$$
%
with $\g'\to0$ as $\h\to0$, $\bar\o_2\in[\bar\o,\tilde\o]$.

Given any admissible $\oo$ verifying a diophantine condition
$|\oo\cdot\nn|^{-1}<C |\nn|^2$ with $C<|\m|^{-1/7})$, there will
be an invariant torus run quasi periodically with angular velocities
$\oo$, if $\h$ is small enough and if $\m$ verifies \equ(11.4).

In what follows we suppose that $\h$ is small enough, that $\m$ verifies
\equ(11.4) and {\it study one of the above invariant tori}, with  prefixed
angular velocities and Lyapunov exponents given by $\oo,g$ like in
\equ(11.5). And we want to estimate the whiskers splitting at the
symmetric homoclinic point $\f=\p,\aa=\0$.

\noindent{\bf Remark:}
It is convenient to fix $a\in  \lis \D \=\pd \D$
as the origin of the unscaled variables so that $\o_2\= \dpr_a h(a)$
(obviously the condition on $\m$ will not be affected by such a choice
and we shall assume that the associated invariant torus is persistent).
{\it Hence, the values of $\bar J,\bar J_0, \bar g_0, \bar f_\n$ are now
the values of the corresponding functions evaluated at $\pd A= a$,
$\pd I=0$.}

\vskip.3truecm
Consider first the case $\m=0$.  In this case one can perform an
elementary discussion of the separatrix quadratures.  For instance if
$\m=0$ and the pendulum and rotators are independent in the sense of
\S4 (\ie $J_0,g_0$ are $A$ independent) one finds:
%
$$
\dot\f={I\over J_0}-{I^2\over 2J_0}{\dpr_I J_0\over J_0}- 2\dpr_I(J_0
g_0^2)\sin^2{\f\over2},\qquad
I=\pm 2 J_0 g_0\sin{\f\over2}\Eq(11.6)$$
%
One can check that the pendulum wronskian matrix elements verify, for
$\h$ small:
%
$$|w_{ij}|\le \bar u\h^{-1},\qquad{\rm if}\quad |\Im t|\le {\p\over 2
\bar g_0}(1-\k\h^{1/2})
,\quad |\Re t|\le \bar g_0^{-1}
\Eq(11.7)$$
%
Here the constants
$\k,\bar u$ have
to be taken large enough, depending on the
functions $J,J_0,g_0$. Furthermore the functions $w_{ij}(t)$ admit
expansions \equ(10.13): this is now true also for the components that
were zero in the cases considered in \S9,\S10. In appendix A9, part
4, we have studied the full wronskian in a rather general pendulum
system. Also the parity properties in $t$ of the full  wronskian are the same
(\ie even in the action-action or angle-\-angle blocks and odd in
the other, mixed, blocks). The above ``large" domain bounds \equ(11.7),
are useful in the fast rotation cases (\ie when both frequencies have
size of $O(\md)$, or $\bar\o_2=O(\h^{-1})$), discussed in \S10 but they
will not be really necessary in what follows (boundedness in a finite
strip being sufficient).

The parity properties and the analyticity together with the bounds
\equ(11.7) are the only ingredients necessary to perform the analysis of
\S9,\S10 as we have repeatedly claimed and as it is easy to check.

Therefore the same conclusions about the homoclinic angles at the
symmetric homocline $\aa=\0$ hold.  In particular we consider the
intersection matrix to the lowest non trivial order (\ie to the lowest
order that makes its determinant not exponentially small as $\h\to0$,
(namely the second)).  By the final result of appendix A13, see
\equ(A13.22),\equ(A13.23):
%
$$\eqalign{
M=&\pmatrix{0&\d\cr\d& \bar\g \m\cr},\qquad \d\=
\m^2\sum_{\V\m\,fast\,\atop\nn\,slow}
nm \m_1\n_2K_{n} (-1)^m{\bar f_{\nn,n} \bar
f_{\V\m,m}\over \bar J_0\bar g_0^2}
{\oo\cdot\nn\over(\oo\cdot\V\m)^2}\cr}
\Eq(11.8)$$
%
is the leading part of the intersection matrix
$\dpr_\aa \V Q_\su(\V 0)$
(as $\h\to0$ and at
second order in $\m$);
see \S 10, \equ(10.7), for the definition of $Q$; $\d$ denotes here
the $\d_{12}=\d_{21}$ of appendix A13; $\V \o$ is defined in
\equ(11.5),
$\bar\g$ is a constant at fixed
$\bar\o_1,\bar\o_2$ and the first matrix element is exponentially small
(to second order); $K_n,\G_{mn}$ are defined by the integrals in
\equ(A13.21),\equ(A13.23); and, finally, see the remark after
\equ(11.5) for the values of $\bar f_\n,\bar J_0, \bar g_0$.

The \equ(11.8), therefore, shows that
generically $\d=O(\m^2\h^{3/2})$, and hence if $\m<\h^{1/2}$
(consequence of \equ(11.5)) this leads over the terms of order $\m^3$
and higher and the splitting is not exponentially small as $\h\to0$,
but of the order of $\det$ $M$ $=$ $-\d^2$.

The $\d$ in \equ(11.8) has to be multiplied by $\h^{1/2}$ if one wants
to regard it as the intersection tensor for \equ(11.3), (the reason
being that with our definitions the ``homoclinic angles" have the
dimensions of an action and scale as such upon coordinates rescalings).

A more formal statement of the above conclusions is:
\vskip0.3truecm
\noindent{\it {\bf Lemma 4}: Consider the hamiltonian
\equ(11.1) near the segment $\D$ where $\dpr_A h$ varies, as
$A\in \D$, in an interval $\pd[\bar \o,\tilde\o]$ with $\bar\o>0$.
There is $c>0$ such that if $\m=\h^c$  then \equ(11.1) admits
invariant tori which, if $\h$ is small enough, have whiskers with a
``homoclinic splitting"
($\=$ determinant of the above intersection matrix
$\dpr_\aa \V Q_\su (\V 0)$) of $O((\h^{3/2}\m^2)^2)$ as $\h\to0$, provided the
sum in \equ(11.8) does not vanish accidentally.}
\vskip0.3truecm
As a second important extension of the results of \S9,\S10 we consider a
situation also met in some applications, (\eg see \S12). Namely a
hamiltonian obtained by adding to $H_\m$ in \equ(11.1) a further
perturbation:
%
$$F=\sum_{0<\n<N} F_\n\cos\n\, \nn_0\cdot\aa\Eq(11.9)$$
%
where $F$ is analytic in $\h^{1/2}A,\h^{1/2}I$ and in $(\cos\f-1)$
{\it but it is not small}.

We suppose that $F$, which by assumption contains only harmonics that are
{\it multiples of a given mode} $\nn_0$, depends on the fast variable
$\l\=\a_1$: \ie $\n_{01}\ne0$. We say that $F$ is {\it unimodal} on a fast
mode, with mode $\nn_0$.

Hence the hamiltonian that we consider is:
%
$$H\=H_{\m,F}=H_\m+F,\qquad{\rm
or}\qquad \bar H\=\bar H_{\m,F}=\bar H_\m+\h \bar F\Eq(11.10)$$
%
depending on whether we regard it as a function of $A,B,I$ or of
$a=\h^{1/2}A,b=\h^{1/2}B,i=\h^{1/2}I$.

We shall refer to the above two equivalent representations of the same
mechanical problem as the {\it scaled representation} ($H$) and as the
{\it unscaled} or {\it natural} representation ($\bar H$).

In the case \equ(11.10) we cannot apply directly the results of
\S9,\S10. But one can remark that the angle $\aa\cdot\nn_0$ is a ``fast
angle", \ie it rotates (if unperturbed) at speed $\h^{-1/2}\o_1$
compared to the speed $\o_2\h^{1/2}$ of the ``slow mode"
$\a=\a_2$.

The idea of the {\it averaging method} is just the remark that {\it
quickly oscillating} perturbing forces of order $1$ can, in fact, for many
purposes, be treated as small. However
the method does not consist in the brutal setting of $F=0$, so
familiar in heuristic treatments, but rather consists in treating $F$
as a perturbation by putting a formal parameter $\b$ in front of the
total perturbing terms $\b(F+\m f)$, and by taking as many orders in
$\b$ as it might be necessary to match the precision required, when
$\b=1$ (and eventually setting $\b=1$).

We consider the \equ(11.1) perturbed by \equ(11.9) \ie we consider
\equ(11.10):
%
$$H=\md\o B+\pd\bar\o_2 A+\h{A^2\over 2J}+{I^2\over 2J_0}+
J_0g_0^2(\cos\f-1)+\b(F+\m f)\Eq(11.11)$$
%
where we have expanded $h(A)= \pd\bar\o_2 A+\h{A^2\over 2J}$
for a $J$ analytic in $\pd A$ ($J(0)\neq 0$) and
with the functions $J,J_0,g_0,F,f$ having the analyticity and
boundedness properties described above around a line:
%
$$\LL=\{\h^{-1/2}[a_1,a_2]\}\times \{I=0\}\=\D\times\{I=0\}\Eq(11.12)$$
%
In addition $F,f$ will be supposed to be trigonometric polynomials of
degree $\le N$
in $(\a,\f)$, allowing also an analytic dependence of the Fourier
coefficients on the variable $z\=\h (1 - \cos \f)$.

In the present case we show that the method allows one to establish the
existence of the invariant tori and to compute the leading order
expressions of the homoclinic angles, as $\h\to0$, $|\m|=\h^c$,
with $c>0$ large enough.
\vskip0.3truecm
\noindent{\it {\bf Lemma 5}:
Fix $x>0$ and $0<\s<1/2$. There exist constants $\h_0$,
$B^*$ $>0$ such that if  $|\m|$ $< \h^c$,  $c>10$,  and $|\b|$
$<$ $B^*$  $\h^{-\s}$ then one can construct
a holomorphic canonical map casting the hamiltonian $H$ \equ(11.11), for
all $0<\h<\h_0$, in a form:
%
$${\o B\over\h^{1/2}}+\h^{1/2}\bar\o_2
A+{\h A^2\over 2\hat J (\h^{1/2} A)}+
g(\h^{1/2}A,pq)pq+\h^x\hat  f(p,q,A,\l,\a) \Eq(11.13)$$
%
with $\hat J,g,\hat f$ (depending also on $\b$) bounded and
holomorphic in the complex domain:
%
$$\big\{|p|,|q|<\k,\,
|A|<\r\h^{-1/2},\,|\Im\a_j|<\x,\,|\m|<\h^c\big\}
\ \times\ \{|\b|<B^*\h^{-\s} \}
\Eq(11.14)$$
%
for suitable $\h$--independent  $\k,\r,\x>0$. The smallness condition on
$\h_0$ can be taken to be
$\h^{1/2}_0 \log \h_0^{-1} < D x$ for  a suitable $D>0$
}.
\vskip0.3truecm

The reason for the validity of \equ(11.13) is simply that in $H_{0,F}$,
see \equ(11.10), no strong resonances occur with $|\nn|\le O(\h^{-1})$,
all denominators being bounded below by $O(\pd)$. Hence
we are in an essentially  better situation compared to that in \S7,
as we can proceed to perturbation theory of much larger order,
essentially $O(\h^{-1/2})$, after taking advantage in the first step
of {\it large} denominators (of order $O(\h^{-1/2})$) to reduce the
size of $F$.

The method used to deduce \equ(11.13) is the usual method developed in
the Nekhorossev resonance theory. Note, however, that in \equ(11.13) the
angle $\aa$ are in the remainder term (while, perhaps, one would expect
them to remain of order $\m$): the mechanism for this is essentially the
same as the one used in \S7.

{\it The estimates leading to \equ(11.13) are carried out in detail in
appendix A10}, using the scaled variables form $H_{\m,F}$ of the
hamiltonian.

Once the  hamiltonian has been put in the above form we are in a
situation in which the theory of \S5 becomes applicable, at least if
$c$ and $x$ are chosen large enough.
Assuming that the value $c=92$ (\ie the value dictated by the ``blind"
bound discussed above: see \equ(11.4))
is also large enough for Lemma 4 to hold, and taking $|\m|=\h^c$
we see that the methods of \S 2 $\div$ \S 8 are applicable.

In fact we see that for a set of $A\in \Si_\h\subset\D$ of relative
measure $\le K\md\h^{x/7}$, with $K$ being a suitable constant, it is (if
$\o_2=\dpr_A h(A)$ $\=$ $\h^{1/2}\bar \o_2$ $+$
$\dpr_A[\h A^2/(2J(A)]$; see also the definition of ``admissible" after
\equ(11.3)):
%
$$|\h^{-1/2}\bar\o_1\n_1+\o_2\n_2
|^{-1}\le \bar g_0^{-1}|\nn|^{2}
\h^{-x/7}
\Eq(11.15)$$
%
and for each such $A$ there is an invariant whiskered torus run quasi
periodically with angular velocities $\oo=(\h^{-1/2}\bar \o_1,\o_2)$.
Thus if Lemma 4 holds and if $x>2597$ (so that the round spacing
$\sim O(\h^{x/7})$ is larger than the ``homoclinic splitting" $\sim
O((\h^{3/2} \m^2)^2)$)
we see that drift and diffusion take place along $\LL$.
\vskip0.3truecm
\noindent{\bf Remark 1)}:
Thus we see that along the line $\LL$ there is a whiskers
ladder with very small rounds spacing, as $\h\to0$: \ie of order
$O(\h^{x/7})$.  We see that this is so {\it in spite} of the presence
of $F$ (which is of $O(1)$) and of $\m f$ (which is smaller than $F$ but
still very large, of $O(\h^c)=O(\m)$, compared to the spacing in the
ladder).
\vskip0.3truecm

\noindent{\bf Remark 2)}:
The holomorphy and uniform boundedness in $\b$ is very
important: it allows us to conclude that the tori equations as well as
those of their whiskers can be computed as power series in $\b$.  And
since $\b=1$ is inside the radius of convergence
($B^*\h^{-\s}$) we get
immediately that the various orders in $\b$, ({\it note the distinction
between orders in $\b$ and orders in $\m$ or in $\h$}), give
contributions to the whiskers parametric equations or to the size of
the homoclinic angles whose size decreases with the order $k$ in $\b$
as $\h^{\s k}$ at least.  Hence if to some order some contribution has a
size of some power of $\h$ {\it it becomes a matter of a calculation to
finite order to check if it is the dominant contribution to the
quantity being calculated}. See remark 5) below.
\vskip0.3truecm

\noindent{\bf Remark 3)}:
All the above invariant tori will have whiskers homoclinic
at $\f=\p,\aa=\0$ because all the above hamiltonians are even in the
sense of \S9 (it is easy to see, although not necessary, that all the
canonical changes of coordinates that we use (in appendix A10) to
perform the perturbation theory construction of $\hat f$ do not change
the even nature of the hamiltonians).  Hence it makes sense to ask
about their homoclinic angles or tensors.
\vskip0.3truecm

\noindent{\bf Remark 4)}:
The above analysis shows that if we introduce an artificial
parameter $\b$ that we put in front of both $\h F$ and $\h\m f$, then
then for $\h$ small, we can compute the whiskers for $\b=1$ in power
series of $\b$.  In other words we can apply perturbation theory to
compute the intersections tensors.  We have to push perturbation theory
up to an order $n$ (in $\b$) such that the exactly computed terms are
larger than the remainder (which is of order $\h^{-n/2}$).  In concrete
cases this might mean just the second order (never the first as we have
seen that to first order the intersection tensors are degenerate).
\vskip0.3truecm

\noindent{\bf Remark 5)}:
And drift or diffusion will follow along the line $\LL$ by
the theory of \S8, for most choices of $\b$ around $\b=1$
(and possibly $\b$ {\it exactly} equal to $1$), provided
there is an order at which one sees that the homoclinic intersection
tensor is not exponentially small.  Because in this case the rounds
spacing in the ladder of whiskers is, by the averaging phenomenon,
always faster than any power (being $O(\h^{x/7})$ for a prefixed $x$,
if $\h$ is small enough) and the splitting cannot be exponentially
small unless there is a cancellation between finitely many orders in
$\b$ on a segment of order $O(1)$ of $\LL$.  By the analyticity in $\b$,
this can only be for exceptional values of $\b$.  Of course in a given
problem one has to exclude that $\b=1$ is not an exceptional value
(unless $\b$ happens to be a natural parameter in the problem and one
is just interested in showing existence of drift or diffusion for some
values of $\b$).  The check of the latter property is reduced in
general, by the above analysis, to a finite order calculation which, in
concrete cases, could be conceivably performed with the help of an
electronic device.

\vskip.3truecm
The analysis is thus concluded and one can try to apply it to some concrete
problem. This is better than trying to continue proceeding in general
because in this way we can avoid formulating too abstract results, and
apparently unphysical hypotheses on the perturbations.
%
\vglue2.truecm

\penalty-200

{\bf\S12 Planetary precession. Existence of drift and diffusion}

\penalty10000

\vskip0.5truecm\numsec=12\numfor=1

\penalty10000

\def\BMK{{\bar M^2\over \bar K}}\def\arctg{{\rm arctg\,}}
\def\4{{1\over4}}\def\2{{1\over2}}\def\8{{1\over8}}
\def\pd{{\h^{1/2}}}\def\md{{\h^{-1/2}}}
%
Imagine a planet $\EE$ as a homogeneous rigid body with cylindrical
symmetry. The body surface will be described in polar coordinates by
$\r=R h(\cos\th)$ for some $R$ and some $h$, $R>0,\,0<h\le1$, \eg for a
rotation ellipsoid with equatorial radius $R$ and polar radius
$R/(1+2\h)^{1/2}$ it is $h(z)=(1+2\h z^2)^{-1/2}$.

We suppose the planet center $T$ to revolve on a keplerian orbit
$t\to\V r_T(t)$: the orbit plane will be called the {\it ecliptic}
plane and $\V{{\bar k}}$ will denote its unit normal vector which sees
the planet rotating counterclockwise.

The longitude $\l_T$ of $\V r_T$ on the ecliptic will be reckoned from
the major semiaxis of the ellipse; hence $\l_T=0$ is the {\it aphelion}
position \ie when $r_T \equiv |\V r_T|$ is maximal: $r_T(0) = a (1
+e)$, $a$ being the major semiaxis of the Keplerian ellipse and $e$ its
eccentricity.

With these conventions, $r_T$ and $\l_T$ are related by the {\it focal
equation} (see, e.g. [G] p.304):
%
$$r_T \equiv |\V r_T| = {p \over 1- e \cos \l_T}, \quad p \equiv a (1-e^2).
\Eq(12.1)$$
%

{\it In this section we shall always denote by $e$ the eccentricity of
the orbit and to avoid confusion with the Neper constant we denote the
exponential of a number $\a$ by $\exp\a$, while $e^\a$ will denote
everywhere the $\a$-th power of the eccentricity $e$}.

Kepler's law, ${\dot \l}_T r_T^2 = const$, and \equ(12.1)
imply that if $\l$ is the keplerian {\it average anomaly}:
%
$$\l \= (1-e^2)^{3/2}\ii_0^{\l_T}{d\b\over(1-e\cos\b)^2}
=\l_T + 2e\sin \l_T +{3\over 4} e^2 \sin 2 \l_T +\ldots,\Eq(12.2)$$
%
then:
%
$$\l_T=\l-2e \sin\l+(5/4)e^2\sin2\l+
\ldots,\ \quad {a \over r_T} = 1-e\cos\l+e^2\cos2\l+ \ldots\Eq(12.3)$$
%
and the motion is $\l \rightarrow \l+\o_Tt$, where $2\p/\o_T = 2\p a^{3/2}
g_N^{-1/2}$ is the year of the planet, $g_N\=k(m_S+m_T)$ if $k$ is Newton's
constant and $m_T,m_S$ are the masses of the planet and of its star.

The unit vector $\V{{\bar \imath}}$ pointing from the focus towards the
aphelion will be used together with $\V{{\bar k}}$ and a third vector
$\V{{\bar \jmath}}$ to form an orthonormal triad $(\V{{\bar
\imath}},\V{{\bar \jmath}},\V{{\bar k}})$ of fixed directions in space.

A comoving frame $(T;\V\imath_1,\V\imath_2,\V\imath_3)$ will be attached to
the planet with $\V\imath_3$ axis coinciding with the symmetry axis ({\it
polar axis}) of the planet and $\V\imath_1$ is arbitrarily chosen on the
{\it equatorial plane}, (\ie the plane orthogonal to $\V\imath_3$).

The position of $(T;\V\imath_1,\V\imath_2,\V\imath_3)$ referred to
$(T;\V{{\bar \imath}},\V{{\bar \jmath}},\V{{\bar k}})$ will be determined
by the three Euler angles $\thb,\fb,\psb$ with $\thb$ being the angle
between $\V{{\bar k}}$ and $\V\imath_3$, $\fb$ being the angle on the
ecliptic between $\V{{\bar \imath}}$ and the ecliptic -- equator node
$\V{{\bar n}}$, while $\psb$ is the angle on the equator between
$\V{{\bar n}}$ and $\V\imath_1$, (drawings with the above and the
following notations can be found in [G, p.318 $\div$ 321]).

In the coordinates $(\thb,\fb,\psb)$ the motion of the planet $\EE$
is described by the Euler-Lagrange equation associated to the lagrangian:
%
$$\LL\equiv
{1\over2}J_3(\dot{{\bar \f}}\cos{{\bar \th}}+\dot{{\bar \ps}})^2+
{1\over2}J_1({\dot{{\bar \th}}}^2+{\dot{{\bar\f}}}^2\sin^2\thb)+
\ii_\EE{k m_T m_S\over|\V r_T+\V x|}{d\V x\over|\EE|}\Eq(12.4)$$
%
where  $J_3, J_1\=J_2$
are the inertia moments of $\EE$, $m_T$ its mass, $|\EE|$
its volume, $m_S$ is the mass of the heavenly body keeping the planet
$\EE$ on its celestial path, $t\to\V r_T(t)$, and $k$ is Newton's
constant.

Very remarkable is a theorem by Andoyer-Deprit, see [G, p.318 $\div$ 321],
which produces canonically conjugate variables casting the Hamiltonian
corresponding to $\LL$ in a simple form.  To describe such variables we
consider the unit vector $\V k$ parallel to the angular momentum $\V
K_T \equiv M\V k,\, M=|\V K_T|$ and call {\it angular momentum plane}
the plane orthogonal to $\V k$.  We define the angle $\d$ and $\th$
between $\V{{\bar k}}$ and $\V k$ and, respectively, $\V k$ and
$\V\imath_3$, so that the components of the angular momentum on
$\V{{\bar k}}$ and on $\V\imath_3$ will be, respectively:
%
$$K=M\cos \d,\qquad L=M\cos \th\Eq(12.5)$$
%

We also associate with $\V K_T$ two more remarkable angles: in fact the
angular momentum plane has a node $\V m$ on the ecliptic plane and one
$\V n$ on the equator plane. We call $\g$ the angle on the ecliptic
between  $\V m$ and $\V{{\bar\imath}}$ and $\f$ the angle on the angular
momentum plane between the node $\V m$ and the node $\V n$. Finally we let
$\psi$ denote the angle between $\V{{\bar n}}$ and $\V \imath_1$.

Deprit's theorem states that  the variables $(K,\g),(M,\f),(L,\psi)$
are canonically conjugate for the hamiltonian $H$ associated to $\LL$,
and that $H$ in such variables takes the form:
%
$$H={M^2\over2 J_3}+{J_3-J_1\over 2J_1J_3}{(M^2-L^2)}+\o_TB+V\Eq(12.6)$$
%
where $V$ is the integral in \equ(12.4) changed in sign, and $(B,\l)$ is
a fourth pair of canonical coordinates with $\l$ being the average
anomaly of the planet in its revolution about the ellipse focus, see
\equ(12.2). The pair $(B,\l)$ has been introduced in order to eliminate
the explicit time dependence from the hamiltonian.

It is convenient to bear in mind that $\o_T B$ has a simple physical
interpretation: it is the energy stored in the device providing the
external force that keeps the heavenly body $\EE$ on its keplerian
celestial path, $t\to\V r_T(t)$.

By symmetry considerations it is clear that $V$ is a function of the
angle $\l$ (or of $\l_T$), and of the angle $\a$ between the position
vector $\V r_T$ and the axis $\V\imath_3$ of the planet.  In fact, it
is easy to find out an expression for $V=V(\a,\l)$.  Recalling the
relation between the Legendre polynomials $P_l(z)$ and their generating
function $(1+x^2-2xz)^{-1/2}$, one finds:
%
$$\eqalignno{
V=&{-k m_S m_T\over|\V r_T|}\ii{d\V x\over|\EE|}\left(
1+({|\V x|\over r_T})^2
+2{|\V x|\over r_T}({\V x\over|\V x|}\cdot{\V r_T\over
r_T})\right)^{-1/2}=\cr
=&{-k m_S m_T\over|\V r_T|}\sum_{l=0}^\i\ii_\EE {d\V x \over |\EE|}
({-|\V x|\over
r_T})^l P_l({\V x\over|\V x|}\cdot{\V r_T\over r_T})&\eq(12.7)\cr}$$
%
and the above expression can be used to compute the series expansion of
the potential energy in the eccentricity.

If we perform the calculation neglecting the terms in \equ(12.7) which
come from $l\ge4$ (which roughly means neglecting $O(R/a)^2$ compared
to $1$, with $R$ being the planet radius and $a$ being the major
semiaxis of its orbit, because the odd orders in $l$ vanish by
symmetry), it is well known, (see [L]), that the only properties of
the rigid body that matter are the inertia moments.  It is also clear
that the hamiltonian must be expressible in terms of the physical
quantities that establish the orders of magnitude of the problem.  Thus
we expect the hamiltonian to be a function depending, besides on the
angles and their conjugate moments,  on the daily rotation of the
planet $\o_D$, on the yearly rotation $\o_T$ and on the inertia moments
$J_i$. The physical periods are introduced into the problem through the
initial data, which we denote $\bar K$, $\bar M$, $\bar L$,
around which we want to set up a perturbation theory.
Denoting $\h\=(J_3-J_1)/J_3$, $\h'\=(J_3-J_1)/J_1$,
$\o_D\=\bar M/J_1$,  $\cos i_0\=\bar K\bar M^{-1}$ (the cosine of the
planet inclination $i_0$ over the ecliptic), and:
%
$$\o=\o_T^2\o_D^{-1}\cos i_0\Eq(12.8)$$
%
it is a classical calculation to check that the exact form of the
hamiltonian is, to order $k$ in the eccentricity $e$ and denoting
$[\cdot]^{[\le k]}$ the truncation to power $k$ of a series in $e$:
%
$$H={M^2\over2J_1}- \h'{L^2\over2J_3}+\o_TB+
\h\o\BMK\bigl[{(1-e\cos\l_T)^3\over(1-e^2)^3}\cos^2\a\bigr]^{[\le
k]}\Eq(12.9)$$
%
The model thus obtained will be called the {\it D' Alembert
precession-nutation} model. The reason for the above definition
[especially \equ(12.8)] is that $-\h\o$ has the
physical meaning of the average angular velocity
of {\it precession of the equinoxes}, as it appears also from the
following analysis: for more details see Appendices 6,7 where we discuss
this celebrated result of D'Alembert using canonical formalism.

Concerning the approximations involved in passing from \equ(12.6) to
\equ(12.9) we note that the terms of $O(\h (R/a)^4)$ are believed to be
really negligible for all practical purposes in many astronomy problems
while, for the truncation approximation, D' Alembert did not have data
on the Moon mass accurate enough to wish to consider orders $k>0$ in
his theory of lunisolar precession.  Here we consider only the case $k=2$:
but it is clear that what follows does not really require
neither the truncation nor neglecting the higher orders in $\h(R/a)^2$.
Considering such more general problems would only lead to some (minor)
modifications, except in the case $k=0$, where the result is simply
false (\ie no
drift or diffusion can take place) and the case $k=1$ which cannot
be decided by a ``lowest order'' perturbation theory as, instead, the
cases $k\ge2$ are, (at least if the initial data are chosen as we are
going to do).

To compute the D' Alembert hamiltonian \equ(12.9) we have, of course, to
find how $\cos^2\a$ depends on the canonically conjugated variables
$(K,M,L,B,\g,\f,\ps,\l)$.

Simple spherical trigonometry arguments, see appendix A8, lead to:
%
$$\eqalignno{
\cos\a=&\sin(\l_T-\g)\,\bigl(\cos\f\sin\th\cos\d+\sin\d\cos\th\bigr)
-\cos(\l_T-\g)
\sin\th\sin\f=\cr
=&\sin(\l_T-\g)\left((K/M)\left(1-(L/M)^2\right)^{1/2}\cos\f
+(L/M)\left(1-(K/M)^2\right)^{1/2}\right)-\cr
&-\left(1-(L/M)^2\right)^{1/2}\sin\f\cos(\l_T-\g)
\equiv s(\k\n c_\f+\m\s)-\n  s_\f c&\eq(12.10)\cr}$$
%
where:
%
$$\matrix{\m \= L/M, \cr \k \= K/M, \cr} \quad \matrix{\n^2 \equiv
1-\m^2, \cr \s^2\equiv 1-\k^2, \cr} \quad \matrix{s
\equiv\sin(\l_T-\g), \cr c\equiv\cos(\l_T-\g),\cr} \quad \matrix{s_\f
\= \sin\f, \cr c_\f \= \cos\f.  \cr}\Eq(12.11)$$
%
Hence we see that \equ(12.9), as well as the full \equ(12.7), does not
contain $\ps$. Therefore $L$ is a constant of motion and it will be
regarded as a parameter. It has the physical interpretation that
$\bar\n=(1-L^2/\bar M^2)^{1/2}$ is the angle between the spin axis and the
symmetry axis and in the theory of nutation it is called the {\it eulerian
nutation constant}, at the initial {\it epoch}, \ie at a prefixed reference
time, when $\bar M,\bar L,\bar K,\bar\f,\bar\ps,\bar\g$ are the values of
the canonical variables.

Therefore setting:
%
$$V\=\bigl[{(1-e\cos\l_T)^3\over(1-e^2)^3}\cos^2\a\bigr]^{[\le
2]}\=V_0+eV_1+e^2V_2\Eq(12.12)$$
%
and $\g_0\=\g-\l,\,\ch_0\=\f,
\l_0\=\l$ and using:
%
$$
{(1-e\cos\l_T)^3\over(1-e^2)^3}=1+{3\over2}e^2-3e\cos\l+{9\over2}e^2\cos2\l
+\ldots\Eq(12.13)$$
%
one finds that:
%
$$V=\sum_{h=0}^2 e^h\sum_{r,p,j\atop r,\,p+h=even}
\bar B^h_{rpj}\cos(r\g_0+p\l_0+j\ch_0)\Eq(12.14)$$
%
where $\bar B^h_{rpj}$ are suitable coefficients depending on $M,K$,
listed in appendix A14. For  instance:
%
$$\bar B^0_{000}\=c_0\={1\over 4} [2 \s^2 \m^2 +
(1+\k^2)\n^2] \ , \qquad
\bar B^0_{201} \= d_1 \= - {(1+\k)\m\s\over 2} \n \Eq(12.15)$$
%

Thus, setting $\bar E\= \o \BMK$ and  dropping from \equ(12.9) the
additive constant $\h' L^2/(2J_3)$,
the full (``order 2") D' Alembert hamiltonian, in the
canonical variables $(\g_0,K_0)$ $\=$ $(\g-\l,K)$,
$(\chi_0,M_0)\=(\f,M)$, $(\l_0,B_0)\=(\l,K+B_T)$, takes the form:
%
$$\o_T B_0 + h_0(K_0,M_0;\h) + \h f_0(K_0,M_0,\g_0,\chi_0,\l_0;e) \ ,
\Eq(12.16)$$
%
where:
%
$$\eqalign{ h_0& \= -\o_T K_0 + {M_0^2\over 2J_1} +
\h \bar E \ c_0(K_0,M_0)\cr
f_0& \= \bar E \ [V_0 - c_0 + e V_1 +
e^2 V_2] , \qquad \langle f_0 \rangle = {3 \over 2} e^2 c_0\ ,\cr}
\Eq(12.17)$$
%
with $V_i=V_i(K_0,M_0,2\g_0,\chi_0,\l_0)$,
$c_0=c_0(K_0,M_0)$, and $\langle\cdot\rangle$ denotes average over the
angles.

{\it Note that,
taking into account the coefficients calculated in appendix A14 and
neglecting terms of $O(\n^2)$, the integrable part
of the hamiltonian
becomes:  $c_0= \s^2/2= (1- K_0^2/M_0^2)/2$
leading to the standard ``D' Alembert equinox precession"
$\dot \g = - \h \o$}, see \equ(12.8) and appendix A6,A7.

To analyze the motions of the D' Alembert hamiltonian we shall consider
particular regions of phase space that we single out also for
convenience and for the sake of concreteness.  Fix $\bar M, \bar K >0$,
such that $\bar K/\bar M\le 1/8$; the hamiltonian \equ(12.16) will be
studied in the vicinity of the set:
%
$$\U_0=\{ M_0=\bar M,\ |K_0-\bar K|\le {\bar M\over 8} \ \}\Eq(12.18)$$
%
where $\bar M,\bar K$ are values around which
drift or diffusion will take
place; the role of $B_0$ is trivial: see, however, end of \S 5 [especially
\equ(5.95)] and the final remark of \S 8.
>From now on $(\m,\n,\k,\s)$ {\it denote the functions in \equ(12.11)
evaluated  at}  $(K,M)=(K_0,M_0)$.
%
The condition $|\bar K/\bar M|\le 1/8$
implies that, on $\U_0$, $|\k|\le 1/4$, so that $\s$ is well defined
and $15/16 \le \s^2\le 1$.

The physical meaning of a drift along $\U_0$ is a variation of
$K$ at $M,L$ fixed: hence it represents a change in the inclination of
the spin axis, see \S 1.

Furthermore, the hamiltonian \equ(12.16) is holomorphic in a complex
neighborhood of $\U_0$.  To be more precise, let $\bar L\= L$ be such
that $15/16< \bar \m <1$, ($\bar \m \= \bar L/\bar M$), let $\r_0>0$
and define:
%
$$ \U_{0,\r_0}\= \bigcup_{(K_0',M_0')\in \U_0} \{ (K_0,M_0)
\in C^2 \ :\ |K_0-K_0'|<\r_0\ ,\ |M_0-M_0'|<\r_0 \}\ .  \Eq(12.19)$$
%
Then, we can choose $\h_0,e_0<1/4$, $\r_0>0$ and an arbitrary $\x_0$
such that \equ(12.16) is holomorphic on:
%
$$ \{ B_0\in C\}\times
\U_{0,\r_0} \times \{(\g_0,\chi_0,\l_0) \in S^3_{\x_0}\} \times
\{|\h|<\h_0\}\times \{|e|<e_0\}\ , \Eq(12.20)$$
%
with $S^n_\x\= \{\aa\in C^n: \exp{(-\x)}<|\exp{i\a_j}|< \exp{\x} \}$
being the standard complex neighborhood of $T^n$, and so that, for
$(K_0,M_0)\in \U_{0,\r_0}$, one has:
%
$$ |{M_0\over \bar M} - 1|<{1\over 4}\ , \quad |\n-\bar \n|<
{\bar \n\over 4}\ ,\quad |\m-\bar \m|<{1\over 4}\ ,\quad
|\k|<{1\over 2}\ ,\quad {1\over 2} \le |\s|\le {3\over 2}\ .
\Eq(12.21)$$

If we suppose that $\h=0$ and that $p_0\bar M/J_1\=p_0\o_D=q_0\o_T$ for
some integers $p_0,q_0$, the set $\U_0$ is clearly a resonance for our
hamiltonian if $\h=0$.

We shall fix, in the example that we treat here, $p_0=1,q_0=2$; thus we
set $\bar M\= 2 \o_T J_1$, so that $\bar E\=J_1\o_T^2$.

The condition $\o_D=2\o_T$, \ie a day/year simple resonance $2:1$, is a
condition far from the ones relevant for the Earth nutation, but it might
be more realistic for other situations (\eg for Mercury there is a similar
simple resonance which is relevant, namely the $3:2$). In Celestial
Mechanics there is however a rather general feature in the data: usually
the bodies are almost spherical and the symmetry axis and the spin axis are
very close. In fact the angle between such axes, measured by
$\bar\n=(1-L^2/\bar M^2)^{1/2}$ is usually much smaller than the parameter
$\h$ (in the Earth case $\bar\n<\h^2$), as it has to be according to various
models of planet formation by accretion. But, unfortunately, we must
require also that the initial value of $L$, which is a constant of the
motion, verifies $\bar\n\ge\bar\n_0>0$ for some $\bar\n_0$, no matter how
small $\h$ is. This is a feature that makes our model somewhat unrealistic:
it is a necessary requirement to guarantee that the function
$\n =(1-L^2/M_0^2)^{1/2}$ does not become singular in the domain in which
we consider it (note that the $M_0$ derivative causes problems even in
writing down the equations for $\dot\chi_0$, if we do not impose that $\n$
stays away from zero). With our choices above, we can take $\bar \n_0\=
\bar \n \ 3/4$.

We shall perform a few (trivial) changes of coordinates and rescalings to
put the hamiltonian in a standard form to which the theory of \S 11
can be easily applied.

Because of our selection of the resonance, the harmonic $(2\g_0
+\chi_0)$ will be the angle of the pendulum--part of the hamiltonian.
Therefore, we perform the following linear (canonical) change of
variables,
%
$$(K_0,\g_0),\, (M_0,\chi_0),\, (B_0,\l_0)\, \to (I_0 ,\f_0),\,
(A_0 ,\a_0 ),\,(B'_0,\l_0)\Eq(12.22)$$
%
defined by $B_0=B'_0-
(A_0-a_0)$, with $a_0\=\bar K - 2 \bar M$ and:
%
$$\eqalign{
& \g_0= -(\a_0 +\l_0+\p/2)\cr
& \chi_0 = 2(\a_0+\l_0)+\f_0\cr}
\qquad
\eqalign{&K_0=2I_0 -
(A_0-a_0)+4\o_T J_1 \= 2 I_0 - A_0 + \bar K
\cr&M_0 = I_0  +2 \o_T J_1\= I_0+\bar M \cr}\Eq(12.23)$$
%
where the shifts have been introduced so that the unstable point
of the pendulum is $\h$--close to $(I_0,\f_0 )=(0,0)$
and so that the initial datum $(\bar K, \bar M)$ corresponds to
$(\bar I_0,\bar A_0)\=(0,0)$.
%
The hamiltonian
\equ(12.16), in the canonical variables $(I_0,\f_0) $, $(A_0,\a_0)$,
$(B_0' ,\l_0 )$, takes the form (up to a neglected constant):
%
$$\big(\o_TB_0'+ {I_0^2\over 2 J_1} +\h J_1 g^2\
(\cos\f_0 -1)+\h h\big) + \h
\bar E \Big[ V_0^{(1)} + eV_1^{(1)} + e^2 V_2^{(1)}\Big]\Eq(12.24)$$
%
where $h=h(A_0,I_0)$, $g=g(A_0,I_0)$
%
and the functions $V^{(1)}_j$ are simply defined in terms
of the $\bar B^h_{rpj}$ functions above evaluated in the new coordinates
with $V^{(1)}_0$ being derived
from $V_0-c_0$ by extracting from it the $J_1g^2
\cos \f_0$ term which we call the ``pendulum term".
Using the appendix A14 one sees that:
%
$$
h\= \bar E \Big( {\s_0^2\m_0^2\over 2} +{(1+\k_0)\m_0\s_0\over2}
\n_0 + {1+\k_0^2\over 4} \n_0^2 \Big)\ ,\quad
g^2 \= \o_T^2 {(1+\k_0) \m_0 \s_0 \over 2} \n_0
\Eq(12.25)$$
%
where:
$$(\m_0,\n_0,\k_0,\s_0) \= (\m,\n,\k,\s)|_{\{(K_0,M_0)=
(2I_0-A_0+\bar K, I_0 + \bar M)\}}
\Eq(12.26)$$
Analogously, one finds:
%
$$V^{(1)}_h=\sum_{rpj\ :\  r,p+h=even
\atop |r|\le 6,|p|\le 6+h,|j|\le 2} B^h_{rpj}
\cos{(r \a_0+p\l_0+j\f_0)}\Eq(12.27)$$
%
and a simple calculation yields the values of the coefficients
$B^h\=B^h(A_0,I_0)$ in
terms of the functions in \equ(12.11) evaluated at $K_0=2I_0-A_0+\bar K$
and $M_0=I_0+\bar M$. The results are in appendix A14, where all the
coefficients are derived.
Here we just remark that all the non--trivial modes are {\it fast}
\ie $r\neq 0$ (see \S 11 and below) with the {\it only exception}
of the two {\it slow} modes:
%
$$
B^2_{-20-1}\=  {9\over 2} c^0_{-1}\= {\k_0 \s_0 \m_0 \over 2} \n_0\ ,
\qquad  B^2_{202} \= - {17\over 2} d^0_2 \= {17\over 2}
{(1+\k_0)^2\over 8} \n_0^2
\Eq(12.28)$$
%
{\it This is the reason for having kept the second order in the
$e$--expansion}.

The initial datum $(\bar K,\bar M)$ becomes, in the new variables,
$(\bar I_0,\bar A_0)\=(0,0)$ so that the resonance $\U_0$ gets mapped into:
%
$$\U_1\= \{ I_0=0\ , \ |A_0| \le {\bar M \over 8} \}\ ,
\Eq(12.29)$$
%
and we can easily find a $\r_1$, $\x_1$ such that \equ(12.23) is analytic
on $\U_{1,\r_1}\times S^3_{\x_1}$ and the image of such domain under the
canonical transformation \equ(12.23) is contained in $\U_{0,\r_0}\times
S^3_{\x_0}$.

To see that \equ(12.24) can be put in the form \equ(11.3) so that the theory
of \S 11 can be applied, we perform a change of variables setting the
unstable equilibrium of the pendulum {\it exactly} in the origin. Let:
%
$$
h(A_0,I_0) \= h_0(A_0)+ h_1(A_0,I_0) I_0\ ,\qquad h_0(A_0)\=
h(A_0,0)
\Eq(12.30)$$
%
with $h_1$ analytic and define (via the implicit function theorem)
$G(A',I';\h)=-J_1 h_1(A',I')$ $+O(\h)$ as the solution of:
%
$$
{(I'+\h G)^2\over 2 J_1} + \h h(A',I'+\h G) = {{I'}^2\over 2 J_1} +
\h h_0(A')
\Eq(12.31)$$
%
Then it is easy to check that the canonical transformation
$(A_0,\a_0)$, $(I_0,\f_0)$ $\to$ $(A',\a')$, $(I',\f')$ and
$(B_0',\l_0)$ $\=$ $(B',\l')$, generated by:
%
$$
B'\l_0 + I' \f_0 + A' \a_0 + \h G(A',I';\h) \sin \f_0
\Eq(12.32)$$
%
transforms the hamiltonian \equ(12.24) into a hamiltonian like
\equ(11.3), \ie into:
%
$$
\o_T B' + \h h_0(A') + {{I'}^2\over 2 J_1} + \h J_1 g_0^2 \
(\cos \f' - 1)  + \h  \bar E [ v_0 + e v_1 + e^2 v_2]
\Eq(12.33)$$
%
where (cfr. \equ(12.25) $\div$ \equ(12.27)):
%
$$
h_0(a)\= h(a,0)\ ,\qquad v_h \= \sum_{rpj\ :\  r,p+h=even
\atop |r|\le 6,|p|\le 6+h,|j|\le 2}b^h_{rpj}
\cos{(r \a'+p\l'+j\f')}\Eq(12.34)$$
%
with $g_0\=g_0(A',I',\h (\cos \f -1);\h)$ and
$b^h_{rpj}\=b^h_{rpj}  (A',I',\h (\cos \f -1);\h)$
being analytic functions of their variables $A', I'$,
$z\= \h (\cos \f -1)$, $\h$ and:
%
$$
g_0^2(A',0,0;0)=g^2(A',0)\ ,\qquad
b^h_{rpj}(A',0,0;0) \= B^h_{rpj}(A',0)\Eq(12.35)$$
%
Furthermore along the resonance $\U_1$ (see \equ(12.29) and
replace $(I_0,A_0)$ with $(I',A')$) one has:
%
$$
\dpr_a h_0(a) = \o_T {\hat \k(a) \over 2} [1 + O(\bar \n)]\ ,
\qquad \big( \dpr_a^2 h_0 (a) \big)^{-1} = - 4 J_1 [1 + O(\bar \n)]
\Eq(12.36)$$
%
where (see \equ(12.26)):
%
$$
\hat \k(a)\= \k |_{(K_0,M_0)=(\bar K-a, \bar M)}\ ,
\quad \bar \n \= (1-\bar \m^2)^{1/2}\ ,
\quad \bar \m\= {\bar L\over \bar M}\Eq(12.37)$$
%
To conform with the analysis of \S 11 we use also the scaled
form of \equ(12.33); setting:
%
$$
\a'\=\a\ ,\quad \l'\=\l\ ,\quad \f'\=\f\ ,\quad
A'\=\pd A\ , \quad B' \= \pd B\ , \quad I' \=\pd I
\Eq(12.38)$$
%
and multiplying the hamiltonian by a factor $\h^{-1}$
(see remark before \equ(11.3)) we get from \equ(12.33),
{\it introducing also the auxiliary  parameter} $\b$
(eventually to be set equal to $1$):
%
$$\md \o_T B + h_0(\pd A) +
{I^2\over2 J_1}+ J_1 g_0^2 \ (\cos\f -1) +\b \bar E
[v_0 +ev_1+ e^2 v_2]\Eq(12.39)$$
%
where $g_0$ and $v_h$ are now evaluated at $\a'\=\a$,...,
$I'\=\pd I$: see \equ(12.38).

Note that the ``fast" term $F$ in \equ(11.9), \equ(11.10) corresponds
here to $v_0$ with $N=6$ and $\nn_0=2(1,1)$. Given the final form
\equ(12.39), we fix a  diffusion curve $\h$--close to the resonance
$\U_1$ (cfr. \equ(11.12)):
%
$$\LL\=  \{ |A|<\md {\bar M \over 8} \} \times \{ I=0\}\= \D \times
\{ I=0\}
\Eq(12.40)$$
%
We take $e=\h^c$, with $c$ large enough to apply the theory of \S11
(\eg $c>92 $).  If we check that the determinant of the intersection
matrix given by the generalization of \equ(11.8) is not exponentially
small as $\h\to0$, then it will follow that the homoclinic angles are
not exponentially small but have the size of a power of $\h$, while
their spacing has exponentially small size (by the averaging properties
discussed in \S11: see remarks 1 and 5). Recall that on a portion
$\Si_\h$ $\times$ $\{I=0\}$ of the diffusion curve, with $\Si_\h$
of relative measure $\le$ $K \h^{-1/2}$ $\h^{x/7}$ (see the discussion
around \equ(11.2)), the diophantine property \equ(11.15) holds
(with $\bar \o_1$ $=$ $\o_T$ and $\o_2$ $=$ $\dpr_A h_0(\h^{1/2}A)$).

We write the leading terms which can arise only, in the second order of
perturbation theory in the auxiliary parameter $\b$, from the
``interference" between $v_0$ and $v_2$.  Recall that such a perturbation
theory has a radius of convergence
$B^* \h^{-s}$  with a $s$ arbitrarily close to $1/2$
(here $s$ corresponds to the constant $\s$ of lemma 5 of \S 11),
so that its term sizes
decrease with the order $p$ at least as
$\h^{2s}$.
With the notations of \equ(11.8) (but note that here $\nn\otto (p,r)$)
we see that $\V\m$ fast corresponds to $p\neq 0$ and that
$\nn$ slow corresponds to $p=0,$ $r\neq 0$; thus
the definitions in \S 11 (cfr. \equ(11.8): $-\d^2 \=$ leading term of the
determinant of the intersection matrix $\dpr_\aa \V Q_\su(\V 0)$) and
the computations in appendix A14 yield
{\it to leading terms} (as $\h \to 0$ with $e=\h^c$):
%
$$
\d=\h^{3/2}e^2\
{4 \bar \o_2\over \o_T^2 J_1 \bar g_0^2}\
\bar E^2\ \big( \sum_{r\neq 0,j\neq 0} (-1)^j {j\over r} B^0_{2r,2r,j}\big)
\ \big(2 K_2 B^2_{202}+ K_1 B^2_{-20-1}\big)
\Eq(12.41)$$
%
where  (see also \equ(12.36), \equ(12.35), \equ(12.25) )
$\bar \o_2\= \dpr_a h_0(a)$, $\bar g_0^2\= g^2(a,0)$
and the coefficients
$B^h$ are computed on the image of the diffusion curve $\LL$:
%
$$\eqalign{
& B^h_{rpj} \= B^h_{rpj}(a,0)\ ,\qquad |a|<{\bar M\over 8}\ ; \qquad
K_j=\ig_{-\io}^{+\io}{u\sin j\tilde \f(u)\over 2 \cosh u} du \cr
& \tilde \f(u)
\= \f_0(u/g) =4\arctg e^{-u}\ ,\quad K_1=2\ , \quad K_2={10 \p\over 3}\cr
}\Eq(12.42)$$
%
On the basis of the results of \S11 we know that the order $p$
contribution to the intersection matrix (in the expansion in the
auxiliary parameter $\b$) will be bounded at least by
$O(\h^{2s})$:
but they might be much smaller.  In fact if $p=1$ they are essentially
exponentially small in $\md$ as $\h\to0$.  If $p=2$ we see from
\equ(12.41) that they are of order $\h^{3/2}e^2$.

To order $p$ we can get non exponentially small contributions only from
terms like $v_0^{p-1}v_2$ or $v_1^2v_0^{p-2}$ or by terms involving at
least $v_1^3$ or $v_1v_2$ which contribute corrections of size $e^3$ at
least (hence negligible with respect to \equ(12.41)): this
can be seen directly by inspection of the Fourier transform structure of
the $v_0,v_1,v_2$ in appendix A14. But the terms $v_0^{p-1}v_2$ or
$v_1^2v_0^{p-2}$ contribute $\h^{s  p} e^2\pd$ where the last $\pd$
arises because if $\bar \o_2=0$ we would have corrections
smaller than any power
coming even from the above terms
(note that the order $p$ is {\it analytic} in $\bar \o_2$
{\it and} in $e^2$).

Hence  we can set $\b=1$ and
the leading term is actually \equ(12.41), if it does not vanish:
and the determinant of the intersection matrix is of order $\h^3
e^4=\h^{3+4c}$.

It is easy to check that the sum in \equ(12.41) does not vanish on
the diffusion curve $\LL$: in fact from \equ(12.41),\equ(A14.10),
\equ(A14.6), \equ(12.37), \equ(12.36), \equ(12.35), \equ(12.25)
it follows immediately that
on $\LL$ it is, to leading order in $\h$ and up to a factor
$[1+O(\bar \n)]$:
%
$$\eqalign{
& \d=  {9\over 2}
\h^{3/2} e^2\ (J_1 \o_T) \ \hat\d_a\qquad {\rm with:}\cr
& \hat \d_a\=
{1-\hat \k(a) \over 1+ \hat \k(a)}\  \hat \k(a)^2\  \hat \s(a) \
\bar \n \= \tan^2\Big({i_a\over 2}\Big) \cos^2 i_a\ \sin i_a\  \sin\th\cr}
\Eq(12.43)$$
%
where $|a|<\bar M/8$; $\hat \s(a)$ is defined analogously to $\hat \k$
(see \equ(12.37); $\th$ is defined in \equ(12.5) and $i_a$ is defined here
and represents the planet inclination over the ecliptic (cfr. \equ(12.8)).
Equation \equ(12.43) holds, of course, for values of the coordinate $a$ in
$\Si_\h$ (\ie for all points outside a family of gaps of size smaller than
any prefixed power in $\h$: see comment after \equ(12.40))
where the appropriate diophantine inequality holds. One could also check
that \equ(12.43) holds uniformly on paths $\LL$ with $\D$ (see
\equ(12.40)) replaced by any closed interval such that $\hat \k(a)$
$\ne$ $\pm 1$ or equivalently $i_a\ne$ $0,\p$ (which means that the spin
axis is not parallel to the normal to the ecliptic).

This implies that on the diffusion path $\LL$ the homoclinic
angles are much larger than the whiskered tori spacing, so that we shall have
(by the analysis of \S8)
heteroclinic ladders along which Arnold's drift and diffusion will take
place on a time scale proportional to $\exp{(b\h^{-d})}$ for suitable
positive constants $b,d$.
%
\vglue2.truecm

\penalty-200

{\bf Appendix A1  Resonances: Nekhorossev theorem}

\penalty10000

\vskip0.5truecm\numsec=1\numfor=1

\penalty10000

Let $h(\AA)$ be an anisochronous hamiltonian (\ie $\det \dpr_\AA^2 h\neq
0$), analytic on $V_R\times
T^l$, where $V_R=\{\AA=(A_1,\ldots,A_l),\,|A_j|\le R\}$ and
holomorphic in:
%
$$\eqalign{
W(V_R;\r,\x)=\{\AA,\V z\ \ |&\,(\AA,\V z)\in C^{2l};\,|A_j-A_{0j}|<\r\cr
&{\rm for\ some\ }\AA_0\in V_R,\ e^{-\x}<|z_j|<e^\x\}\cr}\Eqa(A1.1)
$$
%
see [BG], p. 296: it is convenient to regard $h$ as defined on $W$ even
though it is independent on the angles $\f_j$, ($z_j\= e^{i\f_j}$).

Let $\nn_1\in Z^l$ and let $\cal M$ be the line parallel to $\nn_1$; define
the {\it resonance surface} $\Si_{\cal M}$ as: $\Si_{\cal
M}=\{\AA|\oo(\AA)\cdot\V\n_1=0\}$, with $\oo(\AA)\=\dpr_{\AA}h(\AA)$.  There
is no loss of generality if one takes $\V\n_1=(0,0,\ldots,0,1)$, (see [BG]
proposition 3 and p.303, for the obvious change of coordinates).

So we suppose that $\Si_{\cal M}$ is defined by:
%
$$\dpr_{A_l}h(\AA)=0\Eqa(A1.2)$$
%
and rename $\V F=(A_1,\ldots,A_{l-1})$, $S=A_l$, and $\V\f,\s$ the
conjugate angles; the notation is motivated by the fact that $\s$ is
a {\it slow  angle} (indeed on the resonance $\s(t)$ does not move at all)
in opposition to $\f$ which is {\it fast} as it evolves on a time scale of
order $1$.

Let $f(S,\V F,\s,\ff)$ be a perturbation and consider the hamiltonian:
%
$$H_\e(S,\V F,\s,\ff)=h(\V F,S)+\e f(S,\V F,\s,\V\f)\Eqa(A1.3)$$
%

We shall assume that $\dpr^2_{S}h(\V F,S)\ne0$ on $\Si_{\cal M}$ and
let $\V F\to s(\V F)$ be the equation for $\Si_{\cal M}$, (\ie $\dpr_S$
$h(\V F$, $s(\V F))\equiv0$).

Let $\cal U_{\cal M}$ be a resonant region of order $1$ with parameters
$b,\s_1,\s_2,\l^0_1,\l_2^0$, defined by:
%
$$\eqalign{
|\oo(\V F,S)\cdot\nn_1|&<\l^0_1\e^{\s_1}\cr
|\oo(\V F,S)\cdot\nn|&>\l_2^0\e^{\s_2},\qquad\forall\ |\nn|<\e^{-b},
\quad\V\n\ {\rm not\ parallel\ to\ }\V\n_1\cr}
\Eqa(A1.4)$$
%
where, see [BG], (3.1)\%(3.6):
%
$$\eqalign{
b&={(8l(l+1))}^{-1},\qquad\kern2.truecm \s_1=8^{-1}\cr
\s_2&=(1-1/l(l+1))8^{-1},\qquad \l^0_i=E2^{i-1}\bigl(m/8lM)^{-l+i}\cr
M&=\{{\rm maximum\ of\ the\ absolute\ values\ of\ the\ eigenvalues
of\ }\dpr^2_{\AA\AA}h\}\cr
m&=\{{\rm minimum\ of\ the\ absolute\ values\ of\ the\ eigenvalues
of\ }\dpr^2_{\AA\AA}h\}\cr
E&=\{{\rm maximum\ of\ }|\dpr_\AA h|\}\cr}\Eqa(A1.5)$$
%
where the $\max$ and $\min$ are considered in the holomorphy domain,
\equ(A1.1).

Let $W_\e\equiv W({\cal U}_{\cal M};\r',\x')$ be a vicinity of
$\cal U_{\cal M}$, with:
%
$$\r'=\e^{1/4}\l^0_1(8M)^{-1},\qquad\x'=\x/8\Eqa(A1.6)$$
%

Then, see [BG] proposition 2, ii), if $\e$ is small enough, (\eg
$|\e|<\e_c$ with $\e_c$ defined in (3.6) of [BG])), for all $1\le p\le
\e^{-b/3}$ one can find a change
of coordinates changing the hamiltonian into:
%
$$h_p(\V F,S,\e)+\e G_p(\V F,S,\s,\e)+\e^p f_p(\V F,S,\ff,\s,\e)
\Eqa(A1.7)$$
%
where the new $(\V F,S,\V\f,\s)$ coordinates vary in $W_\e$ and describe at
least all the points which are in $W_{\e/4}$ in terms of the original
coordinates.

Furthermore the change of coordinates is analytic and in the whole domain
$W_\e$:
%
$$\eqalign{
h_p(\V F,S,\e)\equiv &h(\V F,S)+\e\bar f(\V F,S)+O(\e^2)\cr
G_p(\V F,S,\s,\e)\equiv &\tilde f(\V F,S,\s)+O(\e),\kern3.truecm
|f_p(S,\V F,\s,\e)|\le O(1)\cr}
\Eqa(A1.8)$$
%
where $\bar f$ is the average of $f$ over both $\ff$ and $\s$ and
$\tilde f(\V F,S,\s,\e)$ is the average of $f-\bar f$ over $\ff$ alone.

Finally if one is interested in a fixed, $\e$-independent, value of $p$
then one can fix $\r'$ in \equ(A1.6) to be $\e$-independent, see also the
following appendices A10, A11.
%
\vglue2.truecm

\penalty-200

{\bf Appendix A2 Diffusion paths and diophantine conditions}

\penalty10000

\vskip0.5truecm\numsec=2\numfor=1

\penalty10000

Here we prove the claims in \S 3.
Fix $E$ and $\AA_0$ such that $h(\AA_0,0)=E$; see \equ(2.3) and assumptions
1$\div$3 of \S 2. We simplify the notation
in this appendix by replacing $h(\AA,0)$ with $h(\AA)$. We consider
first the case $h(\AA)=\AA^2/2$, so that $\oo(\AA)$ $\equiv$
$\dpr_\AA h(\AA)$ $\=$ $\AA$.
We consider a small vicinity $U_\e$ of $\AA_0$ with diameter $\e$.
Given $\AA^1,\AA^2$ in $U_\e$ we define the curve $\LL_0$ as:
%
$$s\to\AA^1+\sum_{j=1}^{l-1} s^j(A^2_{j}-A^1_{j})_j\V u_j=\AA(s)
\equiv\AA_s\qquad s\in[0,1]\Eqa(A2.1)$$
%
where $\V u_1,\ldots,\V u_{l-1}$ are the natural basis in $R^{l-1}$.
Without loss of generality we suppose that $\AA^1$ and $\AA^2$ have
different corresponding coordinates: $A^1_{i}\ne A^2_{i}$. Let
$\oo_s=\oo(\AA_(s))$.

At every point of $\LL_0$ the derivatives of $\AA_s$ of orders
$1,\ldots,k\le l-1$ span a $k$ dimensional space, \ie the curve has ``full
torsion''.

No codimension $1$ plane can have a contact with $\LL_0$ of order
higher than $l-1$.  Therefore, given $\V\n\in Z^l,\,\V\n\ne\V0$, the
set of the values of $s$ for which
$|\oo_s\cdot\V\n|/|\V\n|\le\h_{\V\n}$ has a measure that can be bounded
by $const\,|\h_{\V\n}|^{1/(l-1)}$.  It follows that the measure of the
set of values of $s$ for which $|\oo_s\cdot\V\n|<\h_{\V\n}|\V\n|$ will
be bounded by $const\,\sum|\h_{\V\n}|^{1/(l-1)}$.

Thus, if we choose $|\h_\nn|=1/C|\nn|^{(l-1)^2+1}$ it is:
%
$${|\oo_s\cdot\nn|\,|\nn|^{-1}}>{C^{-1}|\nn|^{-(l-1)^2-1}}\Eqa(A2.2)$$
%
on a set $\Si$ of values of $s$ with measure of order:
%
$$1-const\sum_\nn(C|\nn|^{(l-1)^2+1})^{-1/(l-1)}\ge1-{const\,
C^{-1/(l-1)}}\Eqa(A2.3)$$
%
Thus the curve $s\to \AA(s)$ has the property \equ(3.1),\equ(3.2), but it does
not necessarily verify $h(\AA(s))=E$.

Therefore we modify $\LL_0$ into $\LL$ defined by: $s\to\AA(s)+\dd(s)$.
We determine the correction $\dd(s)$ together with an auxiliary
parameter $\g(s)$ by the equations:
%
$$\eqalign{
\V\dpr h(\AA+\dd)=&\oo(\AA)(1+\g)\cr
h(\AA+\dd)=&E\cr}\Eqa(A2.4)$$
%
with $\AA=\AA(s)$.

The latter equations, in linearized form, look like:
%
$$\eqalign{
\dpr^2_\AA h(\AA)\dd=&\g\oo(\AA)+\ldots\qquad\kern0.3truecm
\dd(\AA)=\g(\dpr_\AA^2
h)^{-1}(\AA)\oo(\AA)+\ldots\cr
\oo(\AA)\cdot\dd=&E-h(\AA)+\ldots\quad\g(\AA)={E-h(\AA)\over
\oo(\AA)\cdot(\dpr^2 h(\AA))^{-1}\oo(\AA)}+\ldots\cr}
\Eqa(A2.5)$$
%
which show that \equ(A2.3) can be solved expressing $\dd,\g$ as analytic
functions of $\AA,h-E$ at least if the size $\e$ of $U_\e$ is small
enough.  Hence also the first property of the diffusion paths holds for
$\LL$ which is therefore an example of a (short) diffusion path.

The general case in which $h(\AA)$ is not $\AA^2/2$, but still
$\det\dpr_{\AA\AA}h\ne0$, can be reduced to the above by changing
variables $\AA\otto\oo=\oo(\AA)=\dpr_\AA h(\AA)$ and by drawing the
curves in $\oo$ coordinates.

If one gives up full constructivity one can produce a somewhat
different class of examples: they are even better, as far as the
exponents in \equ(3.1), \equ(3.2) are concerned.  But they are
constructed with the help of measure theoretic lemmata and, therefore,
are not {\it really} constructive examples.
%
\vglue2.truecm

\penalty-200

{\bf Appendix A3: normal hyperbolic coordinates for a pendulum}

\penalty10000

\vskip0.5truecm\numsec=3\numfor=1

\penalty10000

\def\EE{{\cal E}}\def\DD{{\cal D}}
%
Here we prove lemma 0 of \S 5. Although the following proof is elementary,
we report it here in detail to establish the values of several constants
needed in the main text. The proof is based on a iteration method in the
style of \S 5, but it is clear that softer methods could also be used.

Let $P(I,\f)\= P(I,\AA,\f,\m)$ be a pendulum hamiltonian holomorphic for
$|I|<\r',e^{-\x'}<|e^{i\f}|<e^{\x'}$, see \equ(2.3) and
assumptions 1,2 of \S 2. We fix $\V a\in V$ and we regard $\AA,\m$ in:
%
$$
(\AA,\m)\in \DD \=\{ \AA\in C^{l-1}, \m \in C : \ |\AA-\V a|\le \r\ , \
|\m|\le \bar \m \}
\Eqa(A3.1)$$
%
as parameters, which often will be omitted from the notation. The following
analysis is local (near $(I,\f)=(0,0)$) therefore we shall consider only
$|\f|<\x'$.

By assumption 1, \S 2, $(I,\f)=(0,0)$ is an
unstable equilibrium point; hence $\dpr_{(I,\f)} P(0,0)$
$=$ $0$ and the matrix $M\=\dpr^2P(0,0)$ can be put in a off
diagonal form via a canonical transformation $\RR\=\RR(\AA,\m)$:
%
$$\RR^TM\RR=g\pmatrix{0&1\cr1&0\cr},\quad{\rm with\ }g^2\=(\dpr^2_{I\f}
P)^2 -\dpr^2_{II}P\dpr^2_{\f\f}P\Eqa(A3.2)$$
%
with the derivatives evaluated at $(I,\AA,\f,\m)=(0,\AA,0,\m)$: in fact
$\RR$ is one of the ($\io^1$-many) matrices with determinant $1$
(and hence symplectic, since we are in dimension two) that diagonalize
$E M$ where $E\=\left(\matrix{0&-1\cr 1&0\cr}\right)$
(given one of such matrices  one obtains the others by right
multiplication by $\left(\matrix{\s^{-1}&0\cr 0& \s\cr}\right)$, for any
$\s\neq 0$).
We select one among the above  canonical maps as follows. Let:
%
$$\k^2\=
(16)^{-1}\cdot {\rm \ [\ area\  enclosed\  by\  the\  two\  separatrices\
swings\ ]}\Eqa(A3.3)$$
this is a natural unit of measure for the pendulum action $I$
(\eg, for the standard pendulum \equ(2.1) it is $\k^2=J_0g$);
define the
``dimensionless energy'' $P_0(x_1,x_2)\=P(x_1\k^2,x_2)/(g\k^2)$ and
$M_0\=\dpr^2 P_0|_{(x_1,x_2)=(0,0)}$, so that:
%
$$M=g\,D\,M_0\,D,\quad D\=\pmatrix{\k^{-1}&0\cr0&\k\cr}\Eqa(A3.4)$$
%
Now let $\pm \l^{\pm 1}$, $\l>0$, be the eigenvalues of $M_0$ (recall that
$\det M=-g^2$ so that $\det M_0=$ $-1$), and let $U,V$ be the unitary matrices
with determinant $1$ that diagonalize, respectively, $M_0$ and
$\left(\matrix{0&1\cr 1&0\cr}\right)$:
%
$$
U^T M_0 U = \pmatrix{\l & 0\cr 0 & -\l^{-1} \cr}\ ,
\quad V^T \pmatrix{0 & 1\cr 1 & 0\cr} V = \pmatrix{1 & 0 \cr 0 & -1\cr}\ ,
\quad V\= {1\over \sqrt{2}} \pmatrix{1 & -1 \cr 1 & 1 \cr}
\Eqa(A3.5)$$
%
then it is immediate to check that (see \equ(A3.4)):
$$
\RR= D^{-1} U \L V^T \ , \qquad \L \= \pmatrix{ \l^{-1/2} & 0 \cr
0 & \l^{1/2} \cr}
\Eqa(A3.6)$$
%
We also set:
%
$$\RR_0\=D\RR,\quad \RR_0=\pmatrix{a&b\cr
c&d\cr},\quad m^2\=\sup_\DD [|a|^2+|b|^2+|c|^2+|d|^2]=
\sup_\DD (\l+\l^{-1})\Eqa(A3.7)$$
%
(to check last equality, note that $UV^T$ is unitary so that the sum
of the squared absolute values of the entries is equal to $2$).

We define a first canonical map via:
%
$$\pmatrix{I\cr\f}=\pmatrix{\RR_{11}&\RR_{12}\cr \RR_{21}&\RR_{22}\cr}
\pmatrix{p\cr q}\Eqa(A3.8)$$
%
For instance in the case of the standard pendulum in \equ(2.1) it is
$\k^2=J_0 g$ and:
%
$$\RR={1\over\sqrt2}\pmatrix{{\k}&{\k}\cr
{-\k^{-1}}&{\k^{-1}}\cr},\qquad
\RR^{-1}={1\over\sqrt2}\pmatrix{{\k^{-1}}&{-\k}\cr
{\k^{-1}}&{\k}}\Eqa(A3.9)$$
%
For later convenience it is useful to write explicitly the generating
function of \equ(A3.8):
%
$$
F(p,\f)\= F(p,\f;\AA,\m) \= \RR_{22}^{-1} (\RR_{12} {\f^2 \over 2} + p \f -
\RR_{21} {p^2 \over 2} )\ ,\  I=F_\f\ ,\  q=F_p
\Eqa(A3.10)$$
%
provided:
%
$$
\sup_\DD |\RR_{22}|^{-1} \= \hat \k < \io
\Eqa(A3.11)$$
%
(clearly \equ(A3.11) can be achieved, by taking $\r,\bar \m$ small enough,
if $\RR_{22}(\V a, 0)\neq 0$, otherwise we would have to choose
different independent ``mixed variables"; see [G] for general informations
on canonical transformations).
%

In the new coordinates $p,q$ it is clear that $P(I,\f)$ becomes:
%
$$P(I,\f)=G_0(pq)+Q_0(p,q)\equiv K_0(p,q)\Eqa(A3.12)$$
%
where $Q_0,G_0$ are holomorphic for $|p|,|q|<\tilde \k_0$,
if $\tilde \k_0>0$ is suitably  small:
to guarantee that the image under \equ(A3.8) of the complex domain
$\{|p|,|q|<\tilde \k_0 \}$ is contained in $|I|< \r'$, $|\f|<\x'$,
we can take (see \equ(A3.7)):
%
$$
\tilde \k_0 \le {\k \over 2m} \min\{ \r' \k^{-2}, \x' \}
\Eqa(A3.13)$$
%
The Taylor series of $Q_0$ starts at third order:
%
$$Q_0(p,q)=\sum_{h+k\ge3\atop h\ne k} Q^0_{hk}p^hq^k,
\qquad(p,q)\in W_{\tilde \k_0}\equiv\{|p|,|q|<\tilde \k_0\}\Eqa(A3.14)$$
%
and $G_0(J)$ is holomorphic for $|J|<\tilde \k_0^2$:
%
$$G_0(J)=\sum_{h=1}^\i g_h J^h,\quad g_1\= g\ ,
\quad J\in \tilde W_{\tilde \k_0}\equiv
\{|J|<\tilde \k_0^2\}\Eqa(A3.15)$$
%
Suppose that:
%
$$||G_0||_{\tilde \k_0}< E_0\tilde \k_0^2,\qquad
||(\dpr_J G_0)^{-1}||_{\tilde \k_0}<\g_0,\qquad
||Q_0||_{\tilde \k_0}<\e_0\tilde \k_0^2\Eqa(A3.16)$$
%
where $||\cdot||_{\tilde \k}\= ||\cdot ||_{\tilde \k,\r,\bar \m}$
denotes the maximum in $\tilde W_{\tilde \k}$ (or $W_{\tilde \k}$,
whichever makes sense) times $\DD$, see \equ(A3.1).
Let $\d_j=\d_0/2^j$, $\d_0={(1/8)}\log2$, be a convenient sequence of
positive numbers.
We define a canonical map $(p,q)=\CC_0(p',q')$ via a generating
function:
%
$$\F_0(p',q)=\sum_{h+k\ge3\atop h\ne k}{{p'}^h q^k Q^0_{hk}\over
-(k-h)\dpr_J G_0(p'q)}\Eqa(A3.17)$$
%
The function $\F_0$ can be bounded, together with its derivatives,
in $W_{\tilde \k_0 e^{-\d_0/2}}$ by:
%
$$\tilde \k_0||\dpr\F_0||_{\tilde \k_0e^{-\d_0/2}},
||\F_0||_{\tilde \k_0e^{-\d_0/2}}\le
B_1\g_0\e_0\k_0^2\d_0^{-1}\Eqa(A3.18)$$
%
if $B_1$ is a suitable constant (\eg $B_1=\sup_\d \d\sum_{0\ne h\ne
k}|h-k|^{-1}e^{-\d(h+k)/2}$).
To estimate the domain of definition of the map generated by $\F_0$, \ie
defined by the relations:
%
$$p=p'+\dpr_q \F_0(p',q)\qquad q'=q+\dpr_{p'}\F_0(p',q)\Eqa(A3.19)$$
%{
we use the implicit function theorem.  The latter (see for instance,
[G, p.490]) will guarantee that $\CC_0$ and $\CC_0^{-1}$ have a
domain containing $W_{\tilde \k_0e^{-\d_0}}$ with images contained in
$W_{\tilde \k_0e^{-\d_0/2}}$ provided:
%
$$B_2\g_0\e_0\d_0^{-3}<1\Eqa(A3.20)$$
%
for $B_2$ large enough (we simply use that the image of the boundary of a
set under a holomorphic map is the boundary of the image; this gives, for
instance, $B_2=16 B_1$).
Notice that $\CC_0,\CC_0^{-1}$ have the form: identity $+$
second order polynomial (in the $p,q$ variables).
Assuming \equ(A3.20) valid we can write the hamiltonian \equ(A3.12) in the new
coordinates, writing it as:
%
$$K_1(p',q')=G_1(p'q')+Q_1(p',q')\Eqa(A3.21)$$
%
where $Q_1$ is defined in terms of $Q'$:
%
$$Q'(p',q')=G_0(pq)-G_0(p'q')+Q_0(p,q)\=
\sum_{h+k\ge3}Q'_{hk}{p'}^h{q'}^k\Eqa(A3.22)$$
%
by setting:
%
$$Q_1(p',q')\=\sum_{h+k\ge3\atop h\ne k}Q'_{hk}{p'}^h{q'}^k,\quad
G_1(J)\=G_0(J)+\sum_{h\ge 2}Q'_{hh}J^h\equiv G_0+\D_0 \Eqa(A3.23)$$
%
The estimate of the size of $Q_1$ can be performed by taking into
account that $\F_0$ has been chosen so as to verify
the {\it first order Hamilton-Jacobi
equation}:
%
$$\dpr_JG_0(p'q)\bigl(
q{\dpr_q\F_0}(p',q)-p'\dpr_{p'}\F_0(p',q)\bigr)+Q_0(p',q)=0\Eqa(A3.24)$$
%
so that, using \equ(A3.16) and $\g_0 E_0\ge1$, we find, for a suitable $B_3$:
%
$$||Q'||_{\tilde \k_0 e^{-2\d_0}}\le
B_3\,E_0\tilde \k_0^2\,\g_0^2\e_0^2\,\d_0^{-6}\Eqa(A3.25)$$
%
Therefore for a suitable $B_4$:
%
$$\eqalign{
||Q_1||_{\tilde \k_0 e^{-3\d_0}}\le &
B_4\,E_0\tilde \k_0^2(\g_0\,\e_0)^2\,\d_0^{-7}\cr
||\D_0||_{\tilde \k_0 e^{-3\d_0}}\le &
B_4\,E_0\tilde \k_0^2\,(\g_0\,\e_0)^2\,\d_0^{-7}\cr}\Eqa(A3.26)$$
%
This, in turn implies, for a suitable $B_5$:
%
$$\eqalign{
||G_1||_{\tilde \k_0 e^{-4\d_0}}\le
&E_0\tilde \k_0^2+ B_5\,E_0\tilde \k_0^2\g_0^2\e_0^2\,\d_0^{-7}\cr
||(\dpr_J G_1)^{-1}||_{\tilde \k_0 e^{-4\d_0}}\le &
\g_0(1+B_5\,E_0\g_0\e_0^2\g_0^2\,\d_0^{-8})\qquad
{\rm if\ }B_5\,E_0\g_0\e_0^2\g_0^2\d_0^{-8}<1\cr}\Eqa(A3.27)$$
%
Hence if we suppose (see \equ(A3.20), \equ(A3.27)), that for some $B_6$ large
enough it is:
%
$$B_6\,E_0\g_0(\e_0\g_0)^{1/2}\d_0^{-8}<1\Eqa(A3.28)$$
%
we see that both conditions in \equ(A3.20), \equ(A3.27), are satisfied;
and the hamiltonian $K_1$ is defined in terms of the functions
$G_1,Q_1$ which can be bounded as in \equ(A3.16) with constants
$\e_1,\g_1,E_1,\tilde \k_1$ which can be taken as given by:
%
$$\eqalign{\tilde \k_1=&\tilde
\k_0 e^{-4\d_0},\quad\g_1=\g_0(1+(\e_0\g_0)^{1/2}),\cr
E_1=&E_0(1+(\e_0\g_0)^{1/2}),\quad\g_1\e_1=(\g_0\e_0)^{3/2}\cr}
\Eqa(A3.29)$$
%
Hence if we disregard \equ(A3.28) and define $(\g_j,E_j,\tilde \k_j,\e_j)$ by
iterating \equ(A3.29) we see that, if $\g_0\e_0$ is small enough, the
sequence verifies:
%{
$$\tilde \k_j\ge{2\over3}\tilde \k_0,\qquad E_j\le2E_0,\qquad\g_j\le2\g_0,\qquad
\g_j\e_j\le(\g_0\e_0)^{(3/2)^j}\Eqa(A3.30)$$
%}
This allows us to infer that \equ(A3.28) with $j$ replacing $0$ will be
automatically verified for all but a finite number of values of $j$ if
$\e_0\g_0$ is sufficiently smaller than $1$. Therefore, under a condition like:
%
$$B_7(E_0\g_0)^2(\e_0\g_0)<1\Eqa(A3.31)$$
%
with $B_7$ suitably chosen, we see that \equ(A3.28), with $j$
instead of $0$, is verified for all $j\ge0$.
Thus we conclude that $\CC_j\to$ identity very fast with all its
derivatives, in the slightly smaller domain $W_{\k'}$ with $\k'=\tilde \k_0
\exp-\sum_{j}^\io4\d_j\=\tilde \k_0/2$.  The composition:
%
$$(p,q)=\CC(p_0,q_0)\= \CC(p_0,q_0;\AA_0,\m)\=
\lim_{n\to\i}\CC_n\CC_{n-1}\ldots\CC_0(p_0,q_0)\ ,
\quad (p_0,q_0)\in W_{\tilde \k_0/2}\Eqa(A3.32)$$
%
is clearly a canonical
map casting $K_0$ in the form $K_\i(p_0q_0)$, for a suitable function
$K_\i$, and defining {\it normal hyperbolic coordinates}.  Finally we
remark that in our case, since $Q_0$ has a third order zero it is not
restrictive to suppose that:
%
$$\e_0=\lis Q\  \tilde \k_0 \quad  {\rm for \ some \ constant}\quad \lis Q>0
\Eqa(A3.33)$$
%
hence \equ(A3.31) can be fulfilled for
$\tilde \k_0$ small enough, \ie:
%
$$
\tilde \k_0 < \k [B_7 (E_0 \g_0)^2 (\lis Q \k \g_0)]^{-1}
\Eqa(A3.34)$$
It is clear that this conclusion is what we need to establish the claims
from which this appendix is called.
In fact the map $\lis \RR_\m$ we are after (see \equ(5.3)) is given by:
%
$$\eqalign{
& \pmatrix{R(\AA_0,p_0,q_0,\m) \cr S(\AA_0,p_0,q_0,\m) \cr}
\= \RR(\AA_0,\m) \CC(p_0,q_0;\AA_0,\m) \cr
& \aa= \aa_0 - \dpr_\AA F(p,S;\AA_0,\m)- \dpr_{\AA_0} \F (p_0,q;\AA_0,\m)
\= \aa_0 + \V \d(\AA_0,p_0,q_0,\m)\cr}
\Eqa(A3.35)$$
where $(p,q)$ is as in \equ(A3.32) and $p_0q+\F(p_0,q;\AA_0,\m)$ denotes
the generating function associated to $\CC$.


The above condition \equ(A3.31) can be easily used to infer
explicit values
$\bar \r_0,\bar \k_0,\bar \x_0$ which are needed in the first step of the
proof of lemmata 1,1'. From the definition of $F$, \equ(A3.10), and the
fact that (see \equ(A3.7):
%
$$\sup_{W_{\tilde \k_0\over 2}\times \DD} |S| \le m {\tilde \k_0\over \k}
\Eqa(A3.36)$$
%
it follows for a suitable constant $B_8>1$:
%
$$
\sup_{W_{\tilde \k_0\over 2}\times \DD} |F(p,S,\AA,\m)|
\le B_8 \ m^3 \tilde \k_0^2 {\hat \k\over\k}
\Eqa(A3.37)$$
%
where we have used that $m>1$ and \equ(A3.11). Thus if $\DD'$ is defined
as in \equ(A3.1) with $\r$ replaced by $\r/2$ (so as to be able to perform
dimensional bounds) we obtain easily:
%
$$
\sup_{W_{\tilde \k_0\over 2}\times \DD'} |\V \d|\le B_9 m^3 \big(1+ {\hat
\k\over \k}\big) {\tilde \k_0^2\over \r}\ ,\quad
\DD'\=\{|\AA-\V a|\le {\r\over 2}, |\m|\le \bar \m\}
\Eqa(A3.38)$$
%
for a suitable constant $B_9>1$ (we have also used $\e_0\g_0<1$ in view of
\equ(A3.31)). Thus we see that we can take:
%
$$\bar \k_0\= {\tilde \k_0\over 2}\ ,\quad
\bar \r_0\= {\r\over 2}\ ,\quad
\bar \x_0\= {\x\over 2}
\Eqa(A3.39)$$
%
provided $\tilde \k_0$ satisfies \equ(A3.13), \equ(A3.34)
(or \equ(A3.31)) and:
%
$$\tilde \k_0^2< \x \r\  [2B_9\  m^3\  (1+\hat \k/\k)]^{-1}
\Eqa(A3.40)$$
%
a condition that guarantees that if $|\Im \a_{0j}|<\xb_0$ then
$|\Im \a_{j}|<\x$  (see \equ(A3.38)). As already pointed out,
the choice of $\rb_0$ allowed us to perform dimensional bounds and get
\equ(A3.38).


As an example consider the case of a standard pendulum hamiltonian, see
\equ(2.1), with $g,J_0$ being $\AA$ independent. Then, $\k=\sqrt{J_0g}$,
$m=\sqrt{2}$, $\hat \k=(2J_0g)^{-1}$ and, by \equ(A3.9):
%
$$P=P\Bigl((p+q)(J_0g/2)^{1/2},{q-p\over(2J_0g)^{1/2}}\Bigr)\Eqa(A3.41)$$
%
and we see that:
%
$$\eqalign{ G_0=&gpq+J_0g^2\sum_{k=2}^\io\pmatrix{2k\cr
k\cr}\bigl({pq\over2J_0g}\bigr)^k{1\over(2k)!}\cr
Q_0=&\sum_{k=2}^\io{(-1)^k\over(2k)!}\Bigl({p-q\over(2J_0g)^{1/2}}
\Bigr)_{\ne}^{2k}\cr}\Eqa(A3.42)$$
%
where the subscript $\ne$ means that the expansion of the binomial has to
be carried out omitting the terms with $p^kq^k$.
So that setting:
%
$$\tilde \k_0=x\sqrt{J_0g},\qquad x<1/4\Eqa(A3.43)$$
%
where the restriction $x<1/4$ is imposed to simplify the analysis, we
easily find that we can take:
%
$$E_0=b_1 g,\qquad \e_0=b_2 gx^2,\qquad \g_0=b_3 g^{-1}\Eqa(A3.44)$$
%
(\eg $b_1=\cosh\sqrt2$, $b_2=4b_1$, $b_3=2$). The condition \equ(A3.31)
becomes simply: $x<b_4^{-1}$ for a suitable $b_4>1$
and to match also \equ(A3.13), \equ(A3.34) we see that we can
take $\r_0=\r/2,$ $\x_0=\x/2$ and:
%
$$\tilde \k_0\=b_4^{-1} \sqrt{J_0g}  \min\bigl(\x',\ {\r'\over{J_0g}},\ \x'
{\r\over {J_0g}}\bigr)\Eqa(A3.45)$$
%
(Note that in the present case $\V \d$ is actually identically zero and in
\equ(A3.45) one can drop the third argument in the minimum).

In the $\AA,\m$ dependent case, if $\k_{\min}=\min\sqrt{J_0g}$ and
$\k_{\max}=\max\sqrt{J_0g}$ with the extrema evaluated as $\AA,\m$ vary in
$\DD$, the \equ(A3.45) is replaced by:
%
$$\tilde \k_0\=b_4^{-1} \k_{\min} \min\bigl(\x',\ {\r'\over{\k_{\max}}},\ \x'
{\r\over {\k_{\max}}}\bigr)\Eqa(A3.46)$$
%
and we can still take $\rb_0=\r/2,\xb_0=\x/2$.

In general it is possible to express more explicitly the conditions
\equ(A3.13), \equ(A3.31) and \equ(A3.40).
Note that the domain of definition of the dimensionless energy
$P_0(x_1,x_2)$, defined before \equ(A3.4), is $|x_1|<\r'/\k^2,|\f|<\x'$.
Recall the definitions of $m,\hat \k$ (\equ(A3.7), \equ(A3.11));
define the parameters $\EE,\G,m$
by the following suprema in the latter domain of definition times $\DD$
(see \equ(A3.1)):
%
$$\EE=\sup|P_0|,\quad\G=\sup|g^{-1}|,
\quad m^2=\sup(\l+\l^{-1})\Eqa(A3.47)$$
%
and observe that from the construction of the map $\CC$ it follows
(see \equ(A3.35), \equ(A3.2)):
%
$$
P(I,\f) \= {1\over 2} M \pmatrix{I\cr\f\cr} \cdot\pmatrix{I\cr\f\cr}
+\sum_{h+k\ge 3} P_{hk} I^h \f^k
= g\ pq + \sum_{h+k\ge 3} P_{hk} I^h \f^k
\Eqa(A3.48)$$
%
Set $\b\=\k^2/\r'$ and introduce the following parameters:
%
$$
\hat \s\=\max\{1,{\hat \k\over \k}\} \ ,\quad
\s_2\=\max\{1,\b^2\ ,{\b\over \x'}\ ,
{1\over {\x'}^2}\}\ ,\quad \s_3\=\max\{1,\b^3\ ,\b^2
{1\over \x'}\ , {\b \over {\x'}^2}\ ,
{1\over {\x'}^3}\}
\Eqa(A3.49)$$
%
and observe that (see \equ(A3.7)):
%
$$
\sup|I|\le m\k \tilde \k_0\ ,\quad \sup |\f|\le m{\tilde \k\over \k}\ ,\quad
|P_{hk}|\le {\EE\over {\r'}^h{\x'}^k}\ ,\quad\sup|g|\le 4{\EE\over\r'\x'}
\Eqa(A3.50)$$
%
(where $(I,\f)=\CC(p,q)$ and the suprema are taken over the usual domain
$W_{\tilde\k_0/2}\times \DD$). Then one check easily that, for a suitable
constant $B_{10}>1$ the parameters in \equ(A3.16) can be taken to be (cfr.
\equ(A3.47), \equ(A3.33)):
%
$$
E_0\=B_{10} (\EE\k^{-2}) m^2 \s_2\ ,\quad \g_0\=2\G\ ,\quad
\e_0\=B_{10}(\EE \k^{-2})m^3\s_3 {\tilde \k_0\over \k} \= \lis Q \tilde
\k_0\Eqa(A3.51)$$
%
provided:
%
$$ B_{10} (\G \EE \k^{-2}) {\tilde \k_0\over \k} \s_3<1
\Eqa(A3.52)$$
%
a condition which is needed in bounding $(\dpr_J G_0)^{-1}$ in terms of
$\G$. Finally we see that all the smallness requirements on $\tilde \k_0$
(\ie \equ(A3.13), \equ(A3.34), \equ(A3.40), \equ(A3.52)) are enforced by
taking, for a suitable $B>1$:
%
$$
\tilde \k_0< {\k\over B m^7} \min\{ {\r'\over \k^2}\ ,
\x'\ ,{1\over (\EE\G \k^{-2})\s_2^2 \s_3}\ ,{\r\x\over\r'\hat\s}\}
\Eqa(A3.53)$$
%
determining the range of $\tilde \k_0$ in terms of the analyticity radii,
$\r',\r,\x',\x$ and the parameters $\EE,\G,m,\hat \k$ defined in
\equ(A3.47),\equ(A3.7), \equ(A3.11).

This means that the map $\lis \RR_\m$ \equ(A3.35) can be defined in a domain
$\lis W\=W(\kb_0,\rb_0,\xb_0,\mb,\V a)$ with $\kb_0,\rb_0,\xb_0$ as in
\equ(A3.39)
and a $\tilde \k_0$ verifying \equ(A3.53); moreover such a map satisfy
on  $\lis W$ the bounds (see \equ(A3.50),
\equ(A3.39), \equ(A3.38), \equ(A3.49), \equ(A3.53)):
%
$$
\sup_{\lis W} |R|<{\r'\over 4}\ ,\quad \sup_{\lis W} |S|<{\x'\over 4}\ ,
\quad \sup_{\lis W} |\V \d|<{\x\over 4}
\Eqa(A3.54)$$
%
\vglue2.truecm

\penalty-200

{\bf Appendix A4 Diffusion sheets. Relative size of the time scales}

\penalty10000

\vskip0.5truecm\numsec=4\numfor=1

\penalty10000

\def\rb{{\bar\r}}\def\mb{{\bar\m}}\def\kb{{\bar\k}}
\def\rt{{\tilde\r}}\def\mt{{\tilde\m}}\def\kt{{\tilde\k}}
%
The following 1),2),3) are the estimates needed to construct the diffusion
sheet discussed in the proof of lemma 1'.The 4),5),6) describe the relative
size of the various time scales involved in lemma 1'.

1) We write \equ(5.8), if $\AA\=\AA_s+\V a$ and $M_s\=\dpr^2_\AA
h_0(\AA_s,0,0)$ as:
%
$$M_s\V a+\left[\dpr_\AA h_0(\AA_s+\V a,0,0)-\dpr_\AA h_0(\AA_s,0,0)-M_s\V
a-u\oo_s\right]=\V0\Eqa(A4.1)$$
%
or $\V a=\V m(\V a)$, with $\V m(\V a)$ defined as $M_s^{-1}$ applied to
the term in square brackets.

Hence, using the holomorphy of $h_0$ and Cauchy's theorem, we see that if
$\tilde\r\le\bar\r_0/2$, we can bound the $\V m$ as:
%
$$||\V m||\le|u|\h_0E_0+4\h_0E_0{\tilde\r^2\over\bar\r_0^2}\Eqa(A4.2)$$
%
Fixing $\bar B$ large (\eg $\bar B>20 l^2$) and:
%
$$\tilde u={1\over\bar B^2(E_0\h_0\bar\r_0^{-1})^2},\quad\rt={\rb_0\over
\bar B(E_0\h_0\rb_0^{-1})}\Eqa(A4.3)$$
%
we see that $\tilde u<1/4,\rt<\rb_0/4$ and $4||\V m||\rt^{-1}<1$ for $|\V
a|<\rt$, $|u|<\tilde u$. The constant $\bar B$ can be taken depending only
on the dimension $l$ because $E_0\h_0\rb_0^{-1}\ge l^{-1}$, see item 5) below).

Thus the equation $\V a=\V m(\V a)$ has a solution with $|\V a|<\rt<\rb_0/4$
because $4||\V m||\rt^{-1}<1$. The implicit functions theorem used here is
the essentially obvious argument ``on the image of the boundary'': it is the
same used in appendix A3, see \equ(A3.20).

2) The \equ(5.14) can be similarly written, setting $\AA=\AA_{su}+\V a$ as
$\V a=\V n(\V a)$ with $\V n$ verifying:
%
$$||\V n||\le
2l^2E_0\h_0\Bigl({\rt^2\over\rb_0^2}+{\kt^2\over\kb_0^2}+{\mt\over\mb_0}\Bigr),
\qquad |\V a|<\rt,\,|J|<\kt^2,\,|\m|<\mt\Eqa(A4.4)$$
%
Hence if:
%
$$\kt={\kb_0\over\bar B(E_0\h_0\rb_0^{-1})},\qquad\mt={\mb_0\over\bar
B^2(E_0\h_0\rb_0^{-1})^2}
\Eqa(A4.5)$$
%
we see that, possibly readjusting $\bar B$, it is $4\rt^{-1}||\V n||<1$.
Therefore, there is a solution with $|\V a|<\rt<\rb_0/4$.

3) Considering \equ(5.16) and recalling that
$g_{su}\=\dpr_Jh_0(\AA_{su},0,0)$ (defined after \equ(5.9)) we see that:
%
$$\eqalign{
|u'_{su}|\=&|g_{su}^{-1}(g_{su}-g_s)|< 2\G_0lE_0\rt\rb_0^{-1}\cr
|u'|\le&\G_0\Bigl|\dpr_J h_0(\AA_{su}+\V a,J,\m)-\dpr_J
h_0(\AA_{su},0,0)\Bigr|\le2l\G_0E_0\Bigl({\kt^2\over\kb_0^2}+{\rt\over\rb_0
}+{\mt\over\mb}\Bigr)\cr}\Eqa(A4.6)$$
%
for $|J|<\k_0<\kt,\,|\m|<\mb<\mt,\,|\V a|<\r_0<\rt$.

4) From $|\oo_0\cdot\nn|\ge C_0^{-1}|\nn|^{(l-1)^2}$ and choosing
$\nn=(1,\ldots,0)$ one finds:
%
$$C_0|\oo_{01}|\ge1\Rightarrow C_0E_0\ge1\Eqa(A4.7)$$
%
and a similar argument yields also $\G_0E_0\ge1$. The relation
$E_0C_0>E_0\G_0$ is simply our assumption \equ(5.33).

5) From $[(\dpr^2_\AA h_0)^{-1}(\dpr^2_\AA h_0)]_{11}=1$ one finds:
%
$$1\le \sum_j|[(\dpr^2_\AA h_0)^{-1}]_{1j}[(\dpr^2_\AA h_0)]_{j1}|\le
E_0\h_0\r_0^{-1} l\Eqa(A4.8)$$
%

6) From $[(\dpr^2_\AA h_0)^{-1}\oo_0\cdot\oo_0]^{-1}
[(\dpr^2_\AA h_0)^{-1}\oo_0\cdot\oo_0]=1$ one deduces $\th_0\h_0
E_0^2\ge1$

Hence we can take $B_0=l^{-1}$.
%
\vglue2.truecm

\penalty-200

{\bf Appendix A5: Divisor bounds}

\penalty10000

\vskip0.5truecm\numsec=5\numfor=1

\penalty10000

If $k-h=0$ the denominator in \equ(5.31) becomes, using also
\equ(5.21),\equ(5.23) and a dimensional bound (recall also that
$\oo_{su}\=\dpr_\AA h_0(\AA^0(s,u,0,\m)$ by the definition \equ(5.14)
of $\AA^0(s,u,J,\m)$):
%
$$\eqalign{
|\oo_0(\AA,J,\m)\cdot\nn|=&|\oo_{su}\cdot\nn+(\oo_0(\AA,J,\m)-
\oo_{su})\cdot\nn|
\ge\cr
\ge&|\oo_{su}\cdot\nn|(1-|\oo_{su}\cdot\nn|^{-1}|(\oo_0(\AA,J,\m)-\oo_{su})
\cdot\nn|)\ge\quad\cr
\ge&C_0^{-1}|\nn|^{-\t}(1- l N_0^{\t+1}C_0E_0\tilde\r_0/\r_0)\ge
(2C_0)^{-1}|\nn|^\t\cr}\Eqa(A5.1)$$
%
valid if $|A_i-A^0(s,u,J,\m)_{i}|\le\tilde\r_0$ and $\tilde\r_0$ verifies:
%
$$\tilde\r_0<\r_0/(2 l E_0C_0N_0^{\t+1})\Eqa(A5.2)$$
%

If $k-h=p=$ is a non zero integer, then, assuming $g_s>0$ for
definiteness, and recalling that $\oo_{su}$ is real and
$g_0(\AA,J,\m)\=(1+u')g_{su}$, see \equ(5.16),\equ(5.30):
%
$$\eqalign{
&|i\oo_0(\AA,J,\m)\cdot\nn+g_0(\AA,J,\m)p|\ge|Re\,
[i(\oo_0(\AA,J,\m)-\oo_{su})\cdot\nn+g_0(\AA,J,\m)p]|\ge\cr
&\ge|p|(g_{su}-E_0\l_0-{N_0E_0\tilde\r_0\over\r_0})\ge|p|
\G_0^{-1}(1-\G_0E_0\l_0-{N_0E_0\G_0\tilde\r_0\over\r_0})\ge{|p|\over2\G_0}
\cr}\Eqa(A5.3)$$
%
as, recalling \equ(5.20):
%
$$\tilde\r_0<\r_0(4lE_0C_0N_0^{\t+1})^{-1},\qquad C_0>\G_0,
\qquad 4\l_0E_0\G_0<1\Eqa(A5.4)$$
%
so that \equ(5.34) follows (for $|J|<\k_0$, \ie if the above quantities
make sense).

Note that \equ(A5.3) holds trivially in the case $\nn=\V0$; hence.:
%
$$|i\oo_0\cdot\nn+g_0(h-k)|^{-1}<2\bigl(C_0|\nn|^\t+\G_0|h-k|\bigr)\Eqa(A5.5)$$
%
for all $|\nn|+|h-k|>0$.
%
\vglue2.truecm

\penalty-200

{\bf Appendix A6 The equinox precession}

\penalty10000

\vskip0.5truecm\numsec=6\numfor=1

\penalty10000

\def\ww{\vglue0.4truecm}
\def\rr{{\V r}}\def\bfi{{\bar\f}}\def\bp{{\bar\psi}}\def\bt{{\bar\th}}
\def\iii{{\V\imath}}\def\jjj{{\V\jmath}}\def\kkk{{\V k}}
%
Consider the D'Alembert Lagrangian \equ(12.4) and the associated
hamiltonian \equ(12.6) $\div$ \equ(12.15).
Suppose that the eccentricity of the planet orbit is neglected (\ie that
the orbit is taken circular with radius $a$ equal to the major
semiaxis of the keplerian ellipse), then the average of the hamiltonian $H$
over the angles $\g,\f,\ps$ and over $\l\=\o_T t$  is:
%
$$\eqalign{
\bar H_p\=&{M^2\over2J_3}+\h\Bigl({M^2-L^2\over2J_1}+{3km_Tm_S\over5a}({R\over
a})^2\cr
&[{K^2\over M^2}(1-{L^2\over M^2}){1\over4}
+{1\over2}(1-{K^2\over M^2}){L^2\over M^2}+{1\over4}(1-{L^2\over M^2})]
\Bigr)\cr}\Eqa(A6.1)$$
%
with an error of order $O(\h e)$.

Suppose also that $M=L$ (\ie neglect the non alignment between the
planet axis and the angular momentum), so that $\g=\bfi$. And, furthermore,
assume that the hamiltonian $H$ can be replaced by $\bar H_p$ for the
purpose of evaluating the average motion over many periods of revolutions
(see Ch. 5, of [G] \S10 $\div$ 12 for a more rigorous treatment). Then
the precession angular velocity would be $\dpr_K \bar H_p$:
%
$$\dot\g\=\l_p^S\=-\h{3km_Tm_S\over 5a}({R\over a})^2{K\over M^2}+O(\h
e^2)\Eqa(A6.2)$$
%
\vglue2.truecm

\penalty-200

{\bf Appendix 7 Application to the Earth precession}

\penalty10000

\vskip0.5truecm\numsec=7\numfor=1

\penalty10000

>From \equ(A6.2) and neglecting the small variations of the average
inclination, $i_0$, of the planet axis and denoting $\o_D$ is the angular
velocity of the daily rotation, and $T$ the period of revolution, the solar
precession rate is:
%
$$
\l^S_p=-{3\over2}\h{g_N\over a}({R\over a})^2{\cos i_0\over
R^2\o_D}=-{6\p^2\h\over \o_D T^2}
{\cos i_0}=-{3\over2}\h{\o_T^2\over\o_D}\cos i_0\Eqa(A7.1)$$
%
having used the third Kepler law to eliminate the gravitational constant
(\ie having used that $T=\p(2a)^{3/2}(2k m_S)^{-1/2}$); the fact that the
precession is negative is often referred as a {\it retrograde} precession.
This shows also that the period of precession, is $T_p^S=-2\p/\l_p^S=T(
\o_DT \cos i_0)/3\p\h$, or since $T=1.\,$year $=375581495\,s$
and $\h\simeq0.003$, $T_p^S\sim7.94\,10^4$years.

A rough analysis of the lunar precession can be made assuming that the Moon
is on the ecliptic and that its orbit is circular. One easily checks that
the solar precession analysis can the be applied to the Moon influence and
that the lunar precession would be, if $m_L,a_L$ denote respectively the
Moon mass and the radius of its orbit:
%
$$\dot\g=\l_p^L=-\h{3km_Tm_L\over 5a_L}({R\over a_L})^2{K\over A^2}+O(\h
e^2)=\l_p^S({a\over a_L})^3{m_L\over m_S}\Eqa(A7.2)$$
%
so that, taking also into account that the Moon orbit forms an angle
$i_L$ with the ecliptic and that the orbit eccentricity $e_L$ is quite
large, the total luni-solar precession would be:
%
$$
\l_p=\l^S_p+\l_p^L=\l^S_p\left((1+{3\over2}e_T^2)
+({a\over a_L})^3{m_L\over m_S}
(1+{3\over2}e_L^2)(1-{3\over2}\sin^2 i_L)\right)
\sim 3\l^S_p\Eqa(A7.3)$$
%
where the eccentricity corrections are obtained by remarking that the above
theory with $e=0$ has taken $(a/r_T)^2\equiv1$: but $1+(3/2)e^2$ is the
actual average of $a^2/r_T^2$ over the period with a time evolution based
on the Kepler laws; in a similar way one takes into account the inclination
of the Moon orbit to found the second correcting factor.  Of course one
could do also the latter corrections in a less empirical way by using the
canonical formalism, but we do not reproduce the details.

Using the data:
%
$$\eqalign{
a=&1.496\,10^8\,Km,\qquad a_L=3.844\,10^5\,Km,\qquad i_L=5^o\,1'\cr
e_L=&0.0549\cr
m_L=&81.3\,m_T,\qquad m_S=1.99\,10^{30}\,Kg_N,\qquad
m_T=5.98\,10^{24}\,Kg_N\cr}\Eqa(A7.4)$$
%
the total rate of lunisolar precession in the above approximation gives,
after a small correction for the Moon inclination over the ecliptic is
taken into account, $T_p\sim2.51\,10^4$ years, or a yearly precession of
the equinoxes of $\sim51''$ per (sidereal) year.  So that only $1/3$ of
the luni-solar precession is due to the Sun.

Even assuming that Jupiter gravitated around the Earth on a circular orbit
its contribution to the precession would be much smaller (as, with obvious
notations, it would be a fraction of the order of $({a/a_J})^3{m_J/m_S}$,
\ie $O(10^{-5})$ of the solar precession).

A more fundamental formula is obtained if the Earth is not supposed an
homogeneous ellipsoid, but is supposed only to be rigid. In this case one
finds that \equ(A7.3) remains the same if $\h$ is defined in terms of the
inertia moments as $\h=(2J_3-J_1-J_2)/2J_3$; the analysis is unchanged and
the constant $\h$ thus defined is called the {\it mechanical flattening}
and it is independent on the Earth shape and mass distribution, as long as
it can be supposed rigid: hence it is this quantity that can be really
deduced from the observed rate of the precession of the equinoxes, and it
is $\h=1/304=0.0329$, (while the observed polar radius of the Earth is by
$0.0035$ shorter than the equatorial radius, showing that the ellipsoidal
model is, to some extent, not satisfactory).

The above calculation, due to D' Alembert (who did not use the canonical
formalism) (see [L]: vol II, book V, \S6, fourth formula to the last,
where $l=\l_p$, $m=\o_T$, $n=\o_D$, $h=i_0$, $\l=({a/ a_L})^3({m_L/ m_S})$,
and $e_T,e_L,i_L$ are neglected) was in fact {\it used} to determine $\h$
from the known precession rate, in terms of the masses of the Sun and of
the Moon.
%
\vglue2.truecm

\penalty-200

{\bf Appendix 8 Trigonometry of the Andoyer--Deprit angles}

\penalty10000

\vskip0.5truecm\numsec=8\numfor=1

\penalty10000

We refer here to fig. 4.11, 4.12, 4.10 of [G, p. 321$\div$323]
and to the well known spherical trigonometry identities:
%
$$\eqalign{
{\sin A\over \sin\a}=&{\sin B\over \sin \b}={\sin C\over\sin \g}\cr
\cos A=&\cos B\cos C+\sin B\sin C\cos \a\cr
\sin C\cos\b=&\cos B\sin A-\sin B\cos A\cos\g\cr
\cos A\cos\g=&\sin A\cot B-\sin\g\cot\b\cr}\Eqa(A8.1)$$
%
the inversion  can be actually performed via the relations:
%
$$\eqalign{\cos\d=&{K_z\over A},\kern1.truecm\cos\th={L\over A}\cr
\cot(\bfi-\g)=&(\cos\f\cos\d+\sin\d\cot\th)/\sin\f\cr
\cot(\bp-\ps)=&(-\cos\f\cos\g+\sin\f\cot\d)/\sin\th\cr
\sin\bt=&\sin\th{\sin\f\over\sin(\bfi-\g)}\cr}\Eqa(A8.2)$$
%
which follow immediately from the definitions, see [G, p. 323],
and the result is, after some algebra, \equ(12.10).
%
\vglue2.truecm

\penalty-200

{\bf Appendix 9: Determinants, wronskians, Jacobi's map}

\penalty10000

\vskip0.5truecm\numsec=9\numfor=1

\penalty10000

\def\KJ{{\bf K}}\def\dn{{\,{\rm dn}\,}}\def\sn{{\,{\rm sn}\,}}
\def\cn{{\,{\rm cn}\,}}\def\am{{\,{\rm am}\,}}\def\atan{{\,{\rm arctg}\,}}
%
{\bf 1)} Consider (see \equ(5.92) with $(I,\f)=(0,0)$):
%
$$h_2(\AA)\= {[\o B + h(\tilde A)]^2\over 2E}\ ,\qquad \AA\=(B,\tilde A)\in
R\times R^{l-2}\ ,\qquad E\ne 0
\Eqa(A9.1)$$
%
Then, setting $\tilde \o\=\dpr_{\tilde A} h$ and assuming $\tilde \o_i\ne
0$ for $i=1,...,l-2$, one has:
%
$$\det\dpr_{\AA}^2 h_2\=\det {1\over E} \pmatrix{\o^2&\o\tilde \o\cr
\o\tilde \o^T& (h_2/ E)  \dpr^2_{\tilde A} h_2+\tilde \o^T\times
\tilde \o\cr}=
{\o^2\over E} \Big({h_2\over E}\Big)^{l-2}
\det\dpr^2_{\tilde A} h_2\Eqa(A9.2)$$
%
as it follows by multiplying, for $i=1,...,l-2$, the first row
by $\o_i/\o$ and subtracting it to the $i$-th
following row: this proves \equ(5.93).
Furthermore the following general identity is  valid for any
$n\times n$ matrix $H$ and row vector $\oo$:
%
$$\det\pmatrix{0&\oo\cr
\oo^T&H\cr}=-(\oo\cdot H^{-1}\oo)\,\det H\Eqa(A9.3)$$
%
where if $H$ is not invertible the right hand side has to be interpreted
as $-(\oo \cdot \tilde H \oo)$, with $\tilde H_{ij}$ $\=$
$(i,j)^{th}-$cofactor of $H$ $\=$ $(-1)^{i+j}$ $\times$
the determinant of the matrix obtained by deleting the $i^{th}$
row and the $j^{th}$ column.

\vskip.5truecm \noindent
{\bf 2)} The standard pendulum: $P_0=I^2/2J_0+g_0^2J_0(\cos\f-1)$ has a
separatrix motion
$t\to\f^0(t)$ which is easily computable. One finds, starting at $\f=\p$ at
$t=0$ on finds:
%
$$\eqalign{
\sin\f^0(t)/2=&1/\cosh g_0t,\cr
\cos\f^0(t)/2=&\tanh g_0t,\cr}\qquad
\eqalign{
\sin\f^0(t)=&2 \sinh g_0t\,(\cosh g_0t)^{-2}\cr
\cos\f^0(t)=&1-2\,(\cosh g_0t)^{-2}\cr}\Eqa(A9.4)$$
%

A further elementary discussion of the pendulum quadratures near $E=0$,
allows us to find the $E$ derivatives of the separatrix motion and leads
to:
%
$$\eqalign{
I^0={-2g_0J_0\over{\,\rm cosh\,}g_0t}=-2g_0J_0
\sin{\f^0\over2}\kern1.truecm&\qquad
\dpr_EI^0=J_0(I^0)^{-1}\Bigl(1+J_0g_0^2(\dpr_E\f^0)\sin\f^0\Bigr)\cr
\f^0=4{\,\rm arctg\,}e^{-g_0t}\kern3.truecm&\kern1.truecm
\dpr_E \f^0={-1\over8g_0^2J_0}\,\bigl(2g_0t+{\rm\, sinh\,}2g_0t\bigr)
\sin{\f^0\over2}\cr}\Eqa(A9.5)$$
%
exhibiting the analyticity properties in the complex $t$ plane that are
useful in discussing the size of the homoclinic angles. The \equ(A9.5)
allows us to compute the wronskian matrix of the above separatrices, \ie
the solution of the pendulum equation, linearized on the separatices:
%
$$\dot{\lis W}=\lis L(t)\lis W,\quad\lis W(0)=1,\quad \lis
L(t)=\pmatrix{0&J_0g_0^2\cos\f^0(t)\cr J_0^{-1}&0\cr}\Eqa(A9.6)$$
%
and we get:
%
$$\lis W(t)=\pmatrix{
\dpr_EI^0/c_1&\dot I^0/c_2\cr
\dpr_E\f^0/c_1&\dot\f^0/c_2\cr},
\qquad\matrix{c_1=&\dpr_EI^0(0)\cr
c_2=&\dot\f^0(0)\cr}\Eqa(A9.7)$$
%
where the $E$ derivative is computed by imagining motions close to the
separatrix (which has energy $E=0$) and with the same initial $\f=\p$.
This becomes:
%
$$\lis W(t)=\pmatrix{ (1-{{\cal F}\over4}{\sinh g_0t\over\cosh^2g_0t})\cosh
g_0t&-J_0g_0{\sinh g_0t\over\cosh^2 g_0t}\cr {{\cal F}\over4J_0g_0}&{1\over\cosh
g_0t}\cr},\qquad{\cal F}\={2g_0t+\sinh 2g_0t\over\cosh g_0t}\Eqa(A9.8)$$
%
We are also interested in the matrices $U^s(t), U^u(t)$ of \S6.  If we
write $\f=S(p_0,0)$ and if $\bar p$ is the value of $p_0$ such that $S(\bar
p,0)=\p$, we see that:
%
$$\eqalign{
\f^0(t)=&S(\bar p e^{-g_0t},0)=4\,{\rm arctg}\, e^{-g_0t}\cr
I^0(t)=&J_0(\bar p e^{-g_0t},0)=J_0\dot\f^0(t)=-J_0\bar p g_0
e^{-g_0t}\dpr_p S(\bar p e^{-g_0t},0)\cr}\Eqa(A9.9)$$
%
and it can be seen that $\bar p=(32J_0g_0)^{1/2}$; so that, noting that at
$q_0=0,E=0$ it is $\dpr_q=p_0 g_0\dpr_E$ at $p_0=const$:
%
$$U^s(0)=\pmatrix{\dpr_p I&\dpr_q I\cr\dpr_p \f&\dpr_q \f}=
\pmatrix{0&-\bar p/2\cr2/\bar p&0\cr}\Eqa(A9.10)$$
%
and, similarly:
%
$$U^u(0)=\pmatrix{\bar p/2&0\cr0&2/\bar p\cr}\Eqa(A9.11)$$

{\bf 3)} the theory of the jacobian elliptic functions shows how to
perform a complete calculation of the functions $R,S$, see [GR]
(8.198),(8.153), (8.146), (8.128), (8.197).  The result (a celebrated
theorem by Jacobi, and a strongly instructive exercise in Mechanics) is
reported here for completeness and is discussed in terms of the pendulum
energy:
%
$${J_0\dot\f^2\over2}+J_0 g_0^2(1-\cos\f)=E\Eqa(A9.12)$$
%
where the origin in $\f$ is set at the stable equilibrium, to adhere to
the notations in the theory of elliptic functions.

Setting $u=t(E/2J_0)^{1/2}\=\e^{1/2} g_0t$, $k^2={2J_0 g_0^2/E}=\e^{-1}$
where $\e$ is the {\it dimensionless} energy so that $\e=1$ is the
separatrix, and:
%
$$\KJ(k)=\ig_0^{\p/2} {d\a\over (1-k^2\sin^2\a)^{1/2}}\Eqa(A9.13)$$
%
One sets, using whenever possible, the standard notations for the jacobian
elliptic integrals except for $x(.)$ which is usually denoted $q(.)$ but
which we would confuse with the canonical variable $q$ of lemma 0:
%
$$\eqalign{
k'=&(1-k^2)^{1/2},\qquad g_J=g_0{\p\over2k\KJ(k')},\qquad
\l\={1\over2}{1-k^{1/2}\over1+k^{1/2}}\cr
x(k')=&e^{-\p\KJ(k)/\KJ(k')}=
\l+2\l^5+15\l^9+150\l^{13}+1707\l^{17}+
\ldots\cr}\Eqa(A9.14)$$
%

In terms of the above conventions we have, directly from the definitions
(\ie from the equations of motion):
%
$$\eqalign{
I(t)=&J_0\dot \f=-2J_0g_0\e^{1/2}\dn(u,k)\cr
\f(t)=&2\am(tg_0\e^{1/2})\cr}\Eqa(A9.15)$$
%
which yield, changing at this point the origin for $\f$ to the unstable
point to conform with our notations (\ie obtaining
$\f(t)=2(\am(tg_0\e^{1/2})+\p/2)$):
%
$$R=-2J_0g_0\e^{1/2}{\dn(iu,k')\over\cn(iu,k')},\qquad
\sin {S\over2}={1\over \cn(iu,k')},\qquad\cos {S\over2}=
i{\sn(iu,k')\over\cn(iu,k')}
\Eqa(A9.16)$$
%
which, using also $R(p,q)=g_J J_0(-p\dpr_p+q\dpr_q)S(p,q)$ to evaluate $S$
from $R$, imply immediately the Jacobi map:
%
$$\eqalignno{
R(p,q)=&-2J_0g_J
\Bigl[{p\over1+p^2}+{q\over 1+q^2}-\sum_{n=1}^\io(-1)^n{1+x^{2n-1}\over
1-x^{2n-1}}(p^{2n-1}+q^{2n-1})\Bigr]\cr
S(p,q)=&2\left[\atan p-\atan q-\sum_{n=1}^\io(-1)^n{1+x^{2n-1}\over
1-x^{2n-1}}{(p^{2n-1}-q^{2n-1})\over 2n-1}\right]&\eqa(A9.17)\cr
\sin {S(p,q)\over2}=&{\p\over 2 k \KJ(k')}\Bigl[
{p\over 1+p^2}-{q\over 1+q^2}-\sum_{n=1}^\io(-1)^n{1-x^{2n-1}\over
1+x^{2n-1}}(p^{2n-1}-q^{2n-1})\Bigr]\cr
\cos {S(p,q)\over2}=&{-\p\over 4 k \KJ(k')}\Bigl[
{1-p^2\over 1+p^2}+{1-q^2\over 1+q^2}+2
\sum_{n=1}^\io(-1)^n{1-x^{2n}\over
1+x^{2n}}(p^{2n}+q^{2n})\Bigr]\cr}$$
%
with $x\=pq$.

Note that $g_J$ depends on $k$, and so do $k',k$: hence the coefficients of
the first and of the last two of \equ(A9.17) are also functions of $x=pq$.

Furthermore the (dimensionless) energy becomes a function of
$x=pq$ defined by inverting the map:
%
$$\e\to x(k')\=x((1-\e^{-1})^{1/2})\Eqa(A9.18)$$
%
and the point corresponding to $\f=\p$ and to a
dimensionless energy $\e$, has coordinates:
%
$$p\=1,\quad q\=x(k')\Eqa(A9.19)$$
%
(a rearrangement of the above series showing the convergence for $p=1$ and
$|x|<1$ is exhibited below).

However the derivative of the energy with respect to $pq$ is not
proportional to $g$, defined above: this would mean that the map
\equ(A9.17) between $I,\f$ and $p,q$ would be a canonical map.  The above
implies, nevertheless, that the jacobian determinant
$D(x)=\det(\dpr(p,q)/\dpr(p_J,q_J))$ with respect to the (yet unknown)
canonical coordinates of lemma 0 is a function of the $x$
variable identical to the determinant of the jacobian matrix
$\dpr(p,q)/\dpr(I,\f)\=D(x)$ (which has to be a function of $x$ and can be
computed from \equ(A9.17)).

To be precise, $(p_J,q_J)$ denote here {\it some} canonical variables
which transform the pendulum hamiltonian into a function of the product
$(p_Jq_J)$ (but clearly there is a large ambiguity in the construction of
such  variables and therefore $(p_J,q_J)$ need not coincide with the
variables constructed in lemma 0). It is easy to check that with the above
definitions it is:
%
$$
g_J = 2 J_0 g_0^2 {d\e \over dx} D\Eqa(A9.20)$$
%
which (using (8.197) of [GR]) yields at once $D(0)= (32 J_0g_0)^{-1}$.
%

Setting $p=p_J$, $q=f(x)q_J$ we see that the jacobian determinant
$\dpr(p,q)/\dpr(p_J,q_J)\=f+(1-xf'/f)^{-1}xf'$. Hence, if $f$ is the
solution of the differential equation:
%
$$f+{xf'\over1-xf'/f}=D(x),\qquad f(0)=1/(32 J_0g_0)^{1/2}\Eqa(A9.21)$$
%
regular at $x=0$, then the map $(I,\f)\to(p_J,q_J)$ is canonical;
%
in fact, one finds: $f(x)=x (\ig_0^x D^{-1}(y)dy)^{-1}$.

Therefore a canonical Jacobi map is, in terms of the Jacobi map
\equ(A9.17), simply obtained by substituting $p,q$ with
$p_J,\,q_J F(p_Jq_J)$, where $x_J\= p_J q_J$ and $F(x_J)$ is implicitly
defined by $x=x_Jf(x)=x_J F(x_J)$, \ie $f(x)=F(x_J)$.

In lemma 0 we did not use dimensionless $p,q$ coordinates: it is easy
to check that with the conventions of \S5, and appendix 3 the $p,q$
coordinates above are related to canonical (dimensional) ones, which we
henceforth denote $p_0,q_0$, coinciding with the coordinates constructed
by lemma 0 up to first order in $p_0,q_0$, by:
%
$$p={-p_0\over (32J_0g_0)^{1/2}},\quad q={-q_0\over
(32J_0g_0)^{1/2}}\Eqa(A9.22)$$
%
so that $p_0,q_0$ have the dimension of the square root of an action.

The above \equ(A9.17) is written in the form in which it is easily recognized
in the elliptic functions tables.  However, once derived it, it can be
rewritten in the following form:
%
\def\txt{\textstyle}
$$\eqalign{\txt
R(p,q)=&\txt-4J_0g\left[\sum_{m=0}^\io\bigr({x^mp\over 1+x^{2m}p^2}
+{x^mq\over 1+x^{2m}q^2}\bigl)\right]\cr\txt
S(p,q)=&\txt4\left[\sum_{m=0}^\io\bigl(\atan x^mp-\atan
x^mq\bigr)\right]\cr
\txt
\sin {S(p,q)\over2}=&\txt{\p\over k\KJ(k')}\left[\sum_{m=0}^\io
(-1)^m\bigl(
{x^m p\over 1+ x^{2m} p^2}-{x^m q\over 1+x^{2m}q^2}\bigr)\right]\cr\txt
\cos {S(p,q)\over2}=&\txt{-\p\over 2k\KJ(k')}\left[
1-2\sum_{m=0}^{\io}(-1)^m\bigl({x^{2m}p^2\over 1+x^{2m}p^2}+
{x^{2m}q^2\over 1+x^{2m}q^2}\bigr)\right]\cr}\Eqa(A9.23)$$
%
exhibiting some of the properties of the Jacobi map in a better way.

{\bf 5)} In general the wronskian $W$ in \equ(6.14) [and hence
the solution of \equ(6.15)] can be computed quite
explicitly.  It is however convenient, for computational purposes, to
rearrange its rows and columns by writing them in the order
$(I,\f,\AA,\aa)$ instead of $(I,\AA,\f,\aa)$ used in \S6.

Consider the equation for $I'$ $P(\AA,I',\f',0)=e$ and let
$I'=i(\AA,e,\f')$ be a solution. Then define the functions $\F(t,e)$ and
$I(t,e)=i(\AA,e,\F(t,e))$, with $\AA,e$ regarded as parameters,
solutions of:
%
$$\dot\F={\dpr_I}P(\AA,i(\AA,e,\F,0),\F,0),\qquad\F(0,e)
=\f,\,\qquad I(0,e)=i(\AA,e,\f)\Eqa(A9.24)$$
%

The above functions $t\to(\F(t,e),I(t,e))$ will be a family of motions of
the pendulum with energy $e$ close to the separatrix motion, ($e=0$).
The functions:
%
$$t\to S_1(t)=
\left(\eqalign{\dpr_e I(t)&\cr\dpr_e \F(t)&\cr}\right)_{e=0}
{1\over\dpr_e I(0)_{e=0}},\qquad
t\to S_2(t)= \left(\eqalign{\dot I(t)&\cr\dot\F(t)&\cr}\right)_{e=0}
{1\over\dot\F(0)_{e=0}}\Eqa(A9.25)$$
%
verify the equations of motion linearized around the separatrix motion:
%
$$
\dot{\V S}_j=\lis L(t)\V S_j,\qquad\lis L(t)=\pmatrix{-\dpr^2_{I\f}
H_0&-\dpr^2_{\f\f}H_0\cr
\dpr^2_{II} H_0&\dpr^2_{I\f} H_0\cr}\Eqa(A9.26)$$
%
here $H_0=h(\AA,0)+P(\AA,I,\f,0)$ and all the derivatives are
evaluated at the point $X^0_{su}(t)$ and at $\m=0$, see also
\equ(6.12); note that $\dpr_e I(0)_{e=0}=\dpr_e
i(\AA,0,\f)=1/\dpr_IP(i(\AA,0,\f))>0$.

Furthermore $S_{12}(0)=S_{21}(0)=0$: in fact $\F(0)\equiv\f$ so that
$S_{12}(0)\equiv0$, and $\dot\F(0)=0$ (because $\dot I(0,0)=\dpr_\f
i(\AA,0,\f)=0$ by our choice of $\f$ as the point where $i$ has a
maximum), hence $S_{21}(0)=0$.

Hence the matrix $\overline W(t)=(S_1(t),S_2(t))$ verifies the
equation:
%
$$\dot{\overline W}=\overline L(t)\overline W,\qquad \overline
W(0)=1\Eqa(A9.27)$$
%
and we realize that $\lis W$ is the wronskian for the separatrix motion
of the pendulum $h(\AA)+P(\AA,i,\f,0)$, with $\AA=\AA_{su}$.
For the standard pendulum it is given by \equ(A9.8).

The wronskian $W(t)$ can be expressed in terms of $\lis W$ as follows:
%
$$W(t)=\pmatrix{\lis W(t)&x(t)&0\cr0&1&0\cr
y(t)^T&H(t)+R(t)&1\cr}\Eqa(A9.28)$$
%
where $x,y$ are $2\times l-1$ matrices and $R,H$ are $(l-1)\times(l-1)$
matrices: we shall think $x,y$ as rows of $2$-vectors
$x=(\xx_1,\ldots,\xx_{l-1})$, $y=(\yy_1,\ldots,\yy_{l-1})$, with
$\xx_i,\yy_i$ being column $2$-vectors; or, alternatively, a one column of
two $l-1$ vectors: $x=\pmatrix{\V x^{1}\cr\V x^{2}\cr}$, $y=\pmatrix{
\V y^{1}\cr\V y^{2}\cr}$; by the matrix multiplication rules we
have:
%
$$(y^Tx)_{ij}=\yy_i\cdot\xx_j,  \quad (i,j=1,...,l-1),\quad (y
x^T)_{ij}=\yy^{(i)}\cdot\xx^{(j)},\quad (i,j=1,2)\Eqa(A9.29)$$
%

The conditions that \equ(A9.28) verifies $\dot W=L W$, $W(0)=1$ are:
%
$$\eqalign{
\dot\xx_i=&\lis L\xx_i+\V\x_i\quad \kern6.0truecm\xx_I(0)=0\cr
\dot\yy_i=&\s\lis W^{\,-1}\V\x_i\quad\kern6.005truecm \yy_I(0)=0\cr
\dot R=&(\s\x)^T x\quad\kern6.2truecm R(0)=0\cr
\dot H=&(\dpr^2_{\AA\AA}H_0)(\AA,I(t),\f(t),0)\equiv
M\quad\kern2.truecm H(0)=0\cr
\V\x_i(t)=&\pmatrix{-\dpr_{A_i\f} H_0(\AA,I(t),\f(t),0)\cr
\dpr_{A_i I}H_0(\AA,I(t),\f(t),0)\cr}\kern3.0truecm
\s\equiv\pmatrix{0&1\cr-1&0\cr}\cr}\Eqa(A9.30)$$
%
because $L$ can be written:
%
$$L=\pmatrix{\lis L&\x&0\cr0&0&0\cr
(\s\x)^T&M&0\cr}\Eqa(A9.31)$$
%
and $\s{\lis W}^T\s=-\lis W^{\,-1}$, as $\det \lis W\equiv 1$, and
$\s^2=-1$. Hence:
%
$$\eqalign{
\xx_i(t)=&\lis W(t)\ii_0^t\lis W(\t)^{-1}\V\x_i(\t)d\t\equiv\lis W(t)
\zz_i(t)\cr
\yy_i(t)=&\s\ii_0^t\lis W(\t)^{-1}\V\x_i(\t)d\t\equiv\s\zz_i(t)\cr
R_{ij}(t)=&\ii_0^t(\s\V\x_i(\t))^T\cdot\xx_j(\t)d\t,\qquad
H(t)=\ii_0^t M(\t)d\t\cr}\Eqa(A9.32)$$
%
where $\V z_i(t)$ is defined by the second equality in the first line of
\equ(A9.32).

It is important to find the asymptotic expansion of $\lis W, x, y, z$ as
$t\to+\i$. It can be derived from the expansion of $S_{1}(t)$ and
from:
%
$$\x_i(t)=\lis\x^1_i e^{-gt}+\lis\x^2_i e^{-2g t}+\ldots,\qquad
S_i(t)=\lis S^1_i e^{-gt}+\lis S^2_i e^{-2g t}+\ldots\Eqa(A9.33)$$
%
where $g$ is the Lyapunov exponent of the selected unstable equilibrium
point of the pendulum (to be consistent with \S3 we should replace
everywhere below $g$ with $\k g$). The expansions of $S_1$ and $S_2$ are
deduced from the corresponding quadratures; for instance that of $S_1$
is derived from the quadrature:
%
$$t=\ii_\f^{\F(t,e)}(\dpr_I P)(\AA,i(\AA,e,\ps,0),\ps,0)^{-1}d\ps
\Eqa(A9.34)$$
%
by differentiating with respect to $e$ and setting $e=0$.

For suitably chosen constants $\g,\g_0,\g',\g'_0$ one easily finds:
%
$$\lis W(t)=\pmatrix{\g e^{gt}+\s_{11}(t)&S_{21}(t)\cr
\g' e^{gt}+\s_{12}(t)&S_{22}(t)\cr}\Eqa(A9.35)$$
%
where $\s_{11},\s_{12}$ converge to $\g_0,\g'_0$ at speed $O(te^{-gt})$.

Therefore, see \equ(A9.32):
%
$$\eqalign{
\lis W(t)^{-1}=&\pmatrix{S_{22}(t)&-S_{21}(t)\cr
-\g'e^{gt}-\s_{12}(t)&\g e^{gt}+\s_{11}(t)\cr}\cr
\zz_i(t)=&\ii_0^t\lis W(\t)^{-1}\V \x_i
(\t)d\t=\lis\z_it\bigl({0\atop1}\bigr)
+\pmatrix{\z_i^1\cr\z_i^2\cr}
+\pmatrix{\z^{r1}_i(t)\cr\z^{r2}_i(t)\cr}}\Eqa(A9.36)$$
%
where, setting $\hat\x_i=\bar\x^2_i e^{-gt}+\bar\x^3_ie^{-2gt}+\ldots$
and $\D(t)=-\g'\hat\x_{i1}(t)+\g\hat\x_{i2}(t)$:
%
$$\eqalign{
& \lis\z_i=-\g'\,\lis\x_{i1}+\g\lis\x_{i2}\cr
&\V\z_{i}=\pmatrix{\z^1_i\cr\z_i^2\cr}=\ii_0^{+\i} d\t
\pmatrix{\x_{i1}(\t)S_{22}(\t)-\x_{i2}(\t)S_{21}(\t)\cr
\D(\t)-\x_{i1}\s_{12}(\t)+\x_{i2}(\t)\s_{22}(\t)\cr}\cr
&{\V\z\,}_i^{\,r}(t)=\ig_t^\i d\t\pmatrix{
\x_{i1}(\t)S_{22}(\t)-\x_{i2}(\t)S_{21}(\t)\cr
\D(\t)-\x_{i1}(\t)\s_{12}(\t)+\x_{i2}(\t)\s_{22}(\t)}=
\pmatrix{O(e^{-2gt})\cr O(e^{-gt})\cr}\cr}
\Eqa(A9.37)$$
%
and we shall denote $\V{{\bar\z}}$ the $(l-1)$-vector with components
$\bar\z_i$, $i=1,\ldots,l-1$, thinking also, see \equ(A9.29):
%
$${\V\z\,}=\pmatrix{{\V\z\,}^{1}\cr{\V\z\,}^{2}\cr},\quad
\V\x=\pmatrix{\V\x^{1}\cr\V\x^{2}\cr},\quad etc\Eqa(A9.38)$$
%

Therefore the complete wronskians $W(t)$ and $W(t)^{-1}$ are:
%
$$\pmatrix{\lis W(t)&\lis W(t)\zz(t)&0\cr
0&1&0\cr
(\s\zz(t))^T&H+R&1\cr},\ {\rm and}\
\pmatrix{\lis W(t)^{-1}&-\V z(t)&0\cr
0&1&0\cr
-(\s\V z)^T\lis W^{\,-1}&-H-R+(\s\V z)^T\V z&1\cr}\Eqa(A9.39)$$
%
respectively, the inverse matrix being computed by the general formula:
%
$$W=\pmatrix{\lis W&B&0\cr0&1&0\cr
C^T&K&1\cr}\,\,\otto\,\,W^{\,-1}=\pmatrix{\lis W^{\,-1}&-\lis W^{\,-1} B&0\cr
0&1&0\cr
-C^T\lis W^{\,-1}&-K+C^T\lis W^{\,-1}B&1\cr}
\Eqa(A9.40))$$
%
and one finds for $W(t)W(\t)^{-1}$, shortening $(H(t)+R(t))$ into
$(H+R)_t$:
%
$$
\pmatrix{\lis W(t)\lis W(\t)^{-1}&
\lis W(t)(\zz(t)-\zz(\t))&0\cr
0&1&0\cr
(\s(\zz(t)-\zz(\t))^T\lis W^{\,-1}(\t)&(H+R)_t-(H+R)_\t
-(\s(\zz(t)-\zz(\t))^T\zz(\t))&1\cr}\Eqa(A9.41)$$
%
\vglue2.truecm

\penalty-200

{\bf Appendix 10 High order perturbation theory and averaging}

\penalty10000

\vskip0.5truecm\numsec=10\numfor=1

\penalty10000
\def\hb{{\bar\h}}\def\xt{{\tilde \x}}
\def\pd{{\h^{1/2}}}\def\md{{\h^{-1/2}}}
%
We consider the hamiltonian, see \equ(11.11):
%
$$H=\md\o B+ h(\h^{1/2}A)+{I^2\over 2J_0}+J_0g_0^2(\cos\f-1)+\b(F+
\m f)\Eqa(A10.1)$$
%
where  $h\=$ constant $+\h^{1/2} \bar \o_2 A + \h (A^2/ 2J(A))$;
$J_0$ depends on $A$ and $I$ while $g_0$ depends on
$A,I,z\=(\cos\f-1)$; possibly such functions depend also on $\h,\m $.
{\it The parameter $\b$ is an auxiliary complex parameter that will be
eventually set  equal to $1$}.

We assume that each function in \equ(A10.1) is holomorphic for
$|\h|<\hb_0,|\m|<\mb_0$ and in a domain obtained by complexifying, by an
amount $\rb_0=\md\rb$ ($\rb$ being some given positive action) the
actions and by an amount $\xb_0$ the angles, around the real domain:
%
$$\U=\bigl\{I=0,\ A\in\md[\bar A,\tilde A]\bigr\}\Eqa(A10.2)$$
%
for some $\bar A,\tilde A,\ 0<\bar A<\tilde A$, and we call the latter
domain $\U(\rb_0,\xb_0)$. As usual we suppose for simplicity that all
$\x_.$ variables are $<1$ (no loss of generality). The functions will be
supposed to verify uniform bounds (with respect to $\h$) in the above
holomorphy domain.

Note that the ``large" size, $O(\md)$ of the analyticity domains in the $A,I$
variables simply reflects the assumptions in \S10 that the dependence of
$J,J_0,F,f$ on the $A,I$ is via $\pd A,\pd I$. This assumption implies,
together with the boundedness assumption, that the rotation vector
$\oo=(\md\o,\dpr_A h( \h^{1/2} A))$ is a vector $\oo=(\o_1,\o_2)$ with
$\o_2$ varying between $\pd(\bar\o,\tilde\o)$ as $A$ varies in the
interval $[\bar A,\tilde A]$: which we assume to exclude the origin
(this takes into account the fact that \equ(A10.1) has to come from
\equ(11.3) with $\bar\o_2>0$).

In the following discussion the assumption that the functions $F,f$
depend on $\h z$, \ie that they have a large analyticity domain in the
$z$ variable is not necessary (although it is part of the assumptions of
lemma 5, to simplify the formulation).

We also assume that the functions $f,F$, whose Fourier transforms will
be denoted by affixing a label $\nn,n$, are trigonometric polynomials in
the $\aa\=(\a_1,\a_2)\=(\l,\a)$ variables: \ie their Fourier transforms
vanish if $|\nn|>N$, for some $N>0$:
this hypothesis , as we shall see, is not really necessary and is done
only for simplicity.

Finally we suppose that $F$ has zero average in the $\aa$ variables
and that it contains only harmonics multiples
of a fixed $\nn_0$ (\eg $\nn_0=(1,1)$)  which is a ``fast mode", (\ie
$\n_{01}\ne0$, see \equ(11.9)).

We shall show that, if $c$ is a large enough constant, {\it for any}
$x>0$  and any $0<\s<1/2$
there exist constants $\k,\r,\x>0$ such that, for all $\h>0$ small
enough, in the domain:
%
$$  \eqalign{\O(\k,\r,\x,\mb)=\bigl(&\{|p|,|q|<\k, |\Im A|<\r,\cr
    &\Re A\in\md[\bar A,\tilde A],\,|\Im
    \ps_j|<\x,|\m|<\mb\bigr\}\cr}
\qquad\mb=\h^{c}\Eqa(A10.3)$$
%
there exist functions $\d,R,S,\L,\Th,\X,\D$ holomorphic in \equ(A10.3)
defining a map:
%
$$\eqalign{
I=&R(p,a,q)+\L(p,a,q,\V\ps),\qquad A=a+\X(p,a,q,\V\ps),\qquad\l=\ps_1\cr
\f=&S(p,a,q)+\Th(p,a,q,\V\ps),\qquad \a=
\ps_2+\d(p,a,q,\V\ps)+\D(p,a,q,\V\ps)\cr}\Eqa(A10.4)$$
%
with the $\m,\h,\b$ dependence of the above functions not explicitly shown,
and with $\D,\X,\L,\Th$ of order $\b\sqrt\h$. The functions are uniformly
bounded in $|\b|<B^*\h^{-\s} $. And the map is canonical and
changes the hamiltonian \equ(A10.1) (up to a trivial constant) into:
%
$$H=\md\o b+ \h^{1/2} \bar \o_2 a+{\h a^2\over2\bar J(a)}+\bar g(a,pq)+
\b^{2x}\h^x \bar f(p,a,q,\V\ps)\Eqa(A10.5)$$
%
with $\bar J,\bar g,\bar f$ depending on $\h,\m,\b$. The size of $\r,\x,\k$
and how small should $\h$ be depend on $x$ can be easily deduced from the
proof below. The constant $c$ can be
taken a suitably large number (\eg $c=10 $ is proposed in the proof).

\vskip.3truecm
\noindent{\bf Proof:}
We begin by performing the (generalization of the) Jacobi
map of lemma 0, \S5.  This gives a canonical map defined on
$W(\kt_0,\rt_0, \xt_0,\mt_0)$ for some $\kt_0,\rt_0\=\md\tilde\r,
\xt_0,\mt_0>0$ (where we use the notations of \S5) like \equ(A10.4)
with $\D,\X,\Th,\L=0$, and we take for simplicity $\kt_0^2\=\rt$.
The parameters $\kt_0,\rt_0,\ldots$ are larger than the corresponding
barred ones in \equ(A10.3).

The map transforms the original hamiltonian into:
%
$$\md\o B+h(\h^{1/2}A)+\tilde g(A,pq)+\b( \tilde F
+\m\tilde f)\Eqa(A10.6)$$
%
where $\tilde F,\,\tilde f$ are evaluated at $(p,q,A,\aa)$.  The
various functions depend also on $\h,\m$ and are analytic in the above
domain and in $\pd$, for $\h$ small enough.

The variables in \equ(A10.6) should be primed as they are different from
the original ones, but we do not do so.

The main property of $\tilde F$ is that in the new variables it is still
a trigonometric polynomial with the same modes in $\aa$ as $F$ (\ie only
finitely many multiples of a given fast mode), hence with zero average.
This happens because the variation of $\aa$ in the transformation of lemma
0, \S5, is $\aa$-independent, see \equ(5.3).

As a preliminary step we apply perturbation theory to remove the $\tilde
F$ by performing a perturbation expansion to first order in the
auxiliary variable $\b$.  This leads to the Hamilton Jacobi equation
($J\=pq$):
%
$$\md\o\dpr_\l\F+\pd\o_2\,\dpr_\a\F+\dpr_J\tilde g\,
(q_0\dpr_{q_0}\F-p\dpr_p\F)+ \tilde F=0\Eqa(A10.7)$$
%
where $\pd\o_2=\dpr_A h+\dpr_A\tilde g(A,J)=O(\pd)$, see also
\equ(5.31).

The hypothesis that $F$, hence $\tilde F$, has only one fast mode
is easily seen to imply that $\F$ exists, is
holomorphic in a suitable domain and it generates a canonical
transformation, close to the identity within $O(\b\pd)$, on a domain
$W(\kb_0,\rb_0,\xb_0,\mt_0)$ for some $\kb_0,\rb_0\=\md\rb,\xb_0,
\mt_0>0$, $\rb>0$. Of course this holds if $\b\h^{1/2}$ is small enough

The finite modality of $F$ is not really necessary: it gives easily the
property that the divisors found in solving \equ(A10.7) cannot vanish
even for complex phase space points (which could in general happen as
discussed in Appendix A5). But such divisors can arise only if $w\=\n
i\oo\cdot\nn_0+g p$ (with $g\=\dpr_J \tilde g$) vanishes for some $p,\n$
different from zero: this is impossible for $\h $ small as one can easily
check that $|w|>\,const\,\md$ if $\n$ and $p$ do not vanish.

The canonical map transforms \equ(A10.5) into:
%
$$\md\o B_0+\hat h(\h{1/2})+\hat g(A,p_0q_0)+\b\m\hat f+
\b^2\pd \hat F\Eqa(A10.8)$$
%
as the largest ``second order term" comes from $\tilde F$  itself and
therefore it has size $O(\b\dpr\F)=O(\b^2\pd)$.

Note that the fact that $\F$ has size of order $O(\b\pd)$ is a consequence
of the fast mode assumption on $F$, forbidding the appearance of
divisors of order $\pd$ or even of order $1$, in the solution of
\equ(A10.7), see also \equ(5.31).  But what said so far would
hold rather generally if we only assumed that $F$ contained just fast
modes, not necessarily parallel to a fixed one $\nn_0$.

We proceed by remarking that the assumption that $F$ contains only modes
parallel to $\nn_0$ has the simple consequence that also $\tilde F$
{\it and} $\hat F$ have the same property. The $\hat F$ might have non
zero average over the angles $\aa$: by a further canonical change of
coordinates of the type of  lemma 0, \S5, we can transform \equ(A10.8)
back into  a hamiltonian of the same form with $\hat F$ with zero
average (and new $\hat h$, $\hat g$). In this way we see that we can
assume that the canonical map transforms \equ(A10.6) into \equ(A10.10)
with $\hat F$ unimodal, fast, and with zero average over the $\aa$
angles.

{\it The latter properties would fail if $F$ had not been assumed
unimodal (but just fast).}

Hence we can repeat the above argument and further reduce, in suitable
new coordinates the size of $F$ to $\b^{2^n}\h^{(2^n-1)/2}$, after $n$
steps.  No small divisors problems can arise, again by our unimodality
assumption (which makes the system, for the purposes of the present
discussion, effectively one dimensional).  If $\m=\h^c$ we continue
until $2^n-1>2c$. At each step we must put a requirement on
the size of $\b\pd$ in order to solve the implicit functions problems
that arise at each step in passing from the generating functions $\F$ to
the actual map. However, see the remark 1) after lemma 1', \S5, we
could continue indefinitely and build a canonical transformation casting
\equ(A10.6) into the same form with $\tilde F\=0$. The quadratic
decrease of the size of $F$ is such that the successive conditions on
the size of $\b\pd$ or the decrease in the analyticity domains (\ie of
the constants measuring their size) become essentially negligible: but it
is sufficient to stop when the size of the new $F$ has become of order
$\h^c$ for all $|\b|\pd$ small enough. This happens if one considers
an order $n_0$ such that $n_0\sim\log\big((\log \h^{c+1/2})(\log
\b\pd)^{-1}\big)$.
However, we shall prefer, in order to have {\it analyticity}
in $\b$, to consider a smaller domain, say,
$|\b|<B^* \h^{-\s}$ with $0<\s<1/2$, in which case
it will be sufficient to take $n_0\ge (\log 2)^{-1} \log[(2c+1)/
(1-2\s)]$.
%

At this point we have put the original hamiltonian in the form
\equ(A10.6) with $\tilde F=0$ and a new $\tilde f$, as $\m\le\h^{c}$ is
supposed to hold.  However the new $\tilde f$ will in general have all
harmonics (\ie it will no longer be a trigonometric polynomial in the
$\aa$).

Thus we see the ``averaging" phenomenon: the problem of casting the
\equ(A10.1) into the form \equ(A10.4) is equivalent (if $\b\pd$ is small
enough) to the same problem with $F=0,\,\b=1,\,\m=\h^c$ and (another) $f$ with
the same analyticity properties and with the (minor as we shall see)
difference that it is not a trigonometric polynomial but ``only''
$\bar\x_0$ analytic in the $\aa$ variables with some $\bar\x_0$.

Hence we look at the same problem with $F=0,\b=1$, and at first with
$f$ being a trigonometric polynomial of degree $N$: and we perform the
Jacobi map to put the hamiltonian in the form \equ(A10.6); this time
with $\tilde F$ and $\b=1$.

We denote $A_0,p_0,q_0,\aa_0$ the canonical coordinated describing our
problem after the Jacobi map and in a domain $W(\k_0,\r_0,\x_0,\mb)$,
with $\mb=\h^c$.  By the assumption that $f$ is a trigonometric
polynomial {\it we can fix $\x_0$ arbitrarily}, ($\le1$); and $\r_0$ is
of order $\md$.

The hamiltonian will be written $H\= H_0+\e x$ with $x\=\m \tilde f$
and with $\e$ being a formal parameter to be set eventually equal to
$1$.

The function $x\=\m f$ can be treated perturbatively up to order $s$
(to be fixed later), at least formally, in the sense that one can
define:
%
$$\F=\e\F^1+\e^2\F^2+\ldots+\e^s\F^s\Eqa(A10.9)$$
%
recursively so that, if $H=H_0+\e x$, it verifies in the sense of formal
series in $\e$:
%
$$\eqalign{
&H_0(B+\dpr_{\l_0} \F,A+\dpr_{\a_0}\F,p+\dpr_{q_0}\F,q_0)+
\e x(A+\dpr_\a\F,p+\dpr_{q_0}\F,q_0,\l_0,\a_0)=\cr&=
H_0(B,A,p\cdot(q_0+\dpr_{p}\F),q_0)+\sum_{k=1}^s\e^k
H_k(A,p\cdot(q_0+\dpr_{p}\F))+O(\e^{s+1})\cr}\Eqa(A10.10)$$
%
with suitable functions $H_h$, up to order $s$ in powers of $\e$.

The \equ(A10.10) gives the following set of recursive equations
($\aa_0\=(\l_0,\a_0)$):
%
$$\oo\cdot\dpr_{\aa_0}\F^k+\bar g\,(q_0\dpr_{q_0}\F^k-p\dpr_p\F^k)+
x^k(p,q_0,A,\aa_0)-\media{x^k}^D(p\,q_0,A)=0\Eqa(A10.11)$$
%
for $k=1,\ldots,s$; here $\oo=\oo(A,pq_0)=(\md\o,\dpr_A(\,\h A^2/(2\bar
J)+\bar g),\,\bar g=\bar g(A,pq_0)$ and the $D$ superscript denotes
the ``diagonal cut'' operation, defined for any function
$f(p,q)=\sum_{r,s=0}^\io f_{rs}p^rq^s$ as the map:
%
$$f(p,q)\to f^D(p\cdot q)=\sum_{r=0}^\io (pq)^rf_{rr}\Eqa(A10.12)$$
%
By assumption (see comment following \equ(A10.2)) $\oo$ has the form
$(\md\o,\pd\o')$ with $0<\bar \o<\o'<\tilde \o$ in the real part of the
definition domains.

If we label with $i=1,2,3,4,5$ the five variables
conjugated to $(A,p,q_0,\a_0,\l_0)$ (\ie $\a_0$, $q_0$, $p, A, B$)
%
it is, with the notations of \equ(6.10):
%
$$\eqalignno{
x^k(p,A,q_0,\aa)=&\sum_{{\V m}\atop m_3=0,|{\V m}|>1} H^{{\V m}}_0(A,p,q_0)
\sum_{(k^i_j)_{{\V m},k}}\prod_{i=1}^5\prod_{j=1}^{m_i}\dpr_i\F^{k^i_j}+\cr
+&\sum_{{\V m}\atop m_3,m_4=0,|{\V m}|\ge1} \m f^{{\V m}}(A,p,q_0,\aa_0)
\sum_{(k^i_j)_{{\V m},k-1}}\prod_{i=1}^5\prod_{j=1}^{m_i}\dpr_i\F^{k^i_j}+\cr
-&
\widetilde{\sum_{m_j\=0, j\ne3\atop r=0,...,k-1}}
H_k^{{\V m}}(A,pq_0)
\sum_{(k^3_j)_{m_3,k-r}}\prod_{j=1}^{m_3}\dpr_3\F^{k^3_j}&\eqa(A10.13)\cr
H_k(A,pq)=&\media{x^{k}}^D\=x^{kD}\cr}$$
%
where $\widetilde\Si$ means that $m_3>1$ if $r=0$ and $m_3\ge 1$ for $r\ge 1$.
%

{\it The key remark is that the above equation can be solved recursively,
producing $\F^k,x^k$ which are trigonometric polynomials of degree $\le
kN$ for all $k\le s_0$, provided $s_0$ is such that
$|\oo\cdot\nn|>\pd
\o/2$ for $0<|\n_i|\le s_0N$}. Since
$\oo=(\md\o,\pd\o_2)$, with $\bar\o<\o_2<\tilde\o$ by assumption, this
holds if $s_0=\o/(2N\h \tilde \o)\=b/N\h$.

We need, however, also some bounds on $\F^k,H_k,x^k$, for $k\le s_0$.
We set, for $\d>0$ to be chosen later:
%
$$\r_h=\r_0(1-h\d),\qquad\x_h=\x_0(1-h\d),\quad\k_h=\k_0(1-h\d)\Eqa(A10.14)$$
%
and $\r'_h=\r_{h-1/2},\,\k'_h=\k_{h-1/2},\x'_h=\x'_{h-1/2}$, so that
$\r_h>\r'_h>\r_{h-1}$, {\it etc}.

To simplify the analysis we shall often replace, in the following,
$\r_0$ with $\k_0^2$, in spite of the fact that $\r_0=O(\md)$ while
$\k_0=O(1)$.

Note that, for $k=1$ and suitable $D_X,D_H,D_\F$ it is ($x^1=\m f$):
%
$$|x^1|_{\r_1,\x_1,\k_1,\m_0}<D_X,\quad
|H_1|_{\r_1,\x_1,\k_1,\m_0}<D_H,\quad
|\F^1|_{\r_1,\x_1,\k_1,\m_0}<D_\F\Eqa(A10.15)$$
%

Possibly reducing by a factor $2$ the size of the original analyticity
domains, (an operation which we may and shall assume as unnecessary,
possibly by redefining the analyticity parameters $\k_0,\x_0$), we see
immediately that in our case we can take, for some $\g'>\g>1$,
$D_X=\g\m,\,D_H= \g'\m$.

We can also take $D_\F=D_X \md\g^{\prime\prime}$ for some
$\g^{\prime\prime}>\g$.  This is because, in general for $k\le s_0$, we
can bound $\dpr \F$ in terms of $x$ by $D_X K\,(\x\d)^{-4}\md$ if $K>1$ is
a suitable constant and if $\d$ denotes the analyticity loss in the
domain of $\F$ with respect to that of $x$ (\ie if $\k,\r,\x$ are the
analyticity parameters of $x$ and $\k(1-\d)$, $\r(1-\d)$ ,$\x(1-\d)$
are those of $\F$).

The point being that for $k\le s_0$ the smallest divisors are bounded
below by $O(\pd)$ and the sum giving $\F$ runs over four integer
indices, see \equ(5.31), so that $\dpr_\a\F$ requires a bound ``of
dimension'' $4$ (in fact there are only two angle variables and we
could get a better bound of the order $\x^{-3}\d^{-4}$; but here and
below we do not do so, for simplicity).

Suppose also that for $1\le h\le k-1<s_0$ it is:
%
$$\eqalign{
|x^h|_{\r_h,\x_h,\k_h,\m_0}<&D_X B^{h-1},\qquad
|H_h|_{\r_h,\x_h,\k_h,\m_0}<D_HB^{h-1},\cr
|\F^h|_{\r_h,\x_h,\k_h,\m_0}<&D_\F B^{h-1}\cr}\Eqa(A10.16)$$
%
for a suitably chosen $B$. This holds for $h=1$ (with {\it any} $B$) by
the above comments.

Then we see that \equ(A10.13) can be used to find a bound on $x^k$ in
$W_{\r'_k,\x'_k,\k'_k,\m_0}$
%
$$\eqalign{
|x^k|_{\r'_k,\x'_k,\k'_k,\m_0}\le& \sum_{m\ge2}
E_0 B^k \bigl(\k_0^{-2}d\d^{-1}\x_0^{-1}D_\F B^{-1}\bigr)^m \pmatrix{k-1\cr
m-1}+\cr
&+\sum_{m\ge1}
D_X B^{k-1}
\bigl(\k_0^{-2}d\d^{-1}\x_0^{-1} D_\F B^{-1}\bigr)^m \pmatrix{k-1\cr
m-1}+\cr
&+\sum_{r=1}^{k-1}\sum_{m=1}^{k-r}
D_H B^{k-r}\bigl(\k_0^{-2}d\d^{-1}\x_0^{-1}D_\F B^{-1}\bigr)^m
\pmatrix{k-r-1\cr m-1}\cr}\Eqa(A10.17)$$
%
where the factor $d$ is a numerical constant arising from
various bounds (for instance from
bounding from
below $\d/2$ and $(1-e^{-\x_0\d/4})$ by
a constant times
$\d$ and $\x_0\d$, respectively,
in the dimensional bounds leading to \equ(A10.17));
the term with $r=0$ in the third line of \equ(A10.17)
is bounded here by the first line (having absorbed numerical constants in
the definition of $d$).
%

This implies that $|x^k|_{\r'_k,\x'_k,\k'_k,\m_0}$ is bounded above by:
%
$$B^{k-1}\Bigl(1+{d D_\F\over\k_0^2\x_0 B\d}\Bigr)^k
\,\Big[E_0B({d D_\F\over \k_0^2\x_0 B\d})^2+
D_X({d D_\F\over \k_0^2\d\x_0 B}) + D_H ({d D_\F\over \k_0^2\x_0
B\d})\Bigr]\Eqa(A10.18)$$
%

Let $\bar\g>1$ be fixed large enough so that the diagonal cut of the function
$x$ can be bounded by $\bar\g\d^{-2}$ times \equ(A10.18) on the smaller
domain $W(\r_k,\x_k,\k_k,\m_0)$.
Note that the function $\F_k$ can be bounded dimensionally in the
domain $W(\r_k,\x_k,\k_k,\m_0)$ by $2^4 K (\x_0\d)^{-4}\md$
times the bound \equ(A10.18) of $x_k$ in the domain
$W(\r'_k,\x'_k,\k'_k,\m_0)$.
We shall proceed by choosing $B$ so large that
$(1+d D_\F/(\k_0^2\x_0 B\d))^k<e$  and so that
each of the three term in square brackets are bounded by
$D_X (\bar \g^{-1} \x_0^2 \d^2)^2/(3e)$.

This is achieved if we
suppose
that $\d=1/4k$, so that $\x_h>\x_0/2$, for $h\le4$, that
$\bar\g>2^4 K$ and if we impose:
%
$$\d={1\over4k},\qquad \bar \g^{-1}\x_h^4 \d^2<1\quad {\rm for\ } h\le k
\Eqa(A10.19)$$
%
and:
%
$$\eqalign{
B>&{d k D_\F\over \k_0^2\x_0 \d},\qquad
B>{\bar\g^2
\over\d^4\x_0^4}3eE_0{D_\F^2\over D_X}{d^2\over (\k_0^2\x_0 \d)^2},\cr
B>&{\bar\g^2 \over\d^4\x_0^4}3e{d D_\F\over \k_0^2\x_0 \d },\qquad
B >{\bar\g^2\over\x_0^4\d^4} 3e {D_H D_\F\over D_X}
{d \over \k_0^2\x_0 \d}\cr}\Eqa(A10.20)$$
%
We see that:
%
$$|x^{k}|_{\r_k,\x_k,\k_k,\m_0}\le D_X B^{k-1}  (\bar
\g^{-1}\x_0^2\d^2)^2 \Eqa(A10.21)$$
%
%

Giving up explicit control of the constants (for simplicity of
notation)
we see that \equ(A10.20)
can be implied by the stronger conditions, having chosen
$\d=1/{4k}$ for an arbitrarily fixed $k$, and replacing $D_\F$ by
$D_X\g^{\prime\prime}\md$, $D_H$ by $\g'\m$ and $D_X$ by $\g \m$
(see the comment after
\equ(A10.15):
%
$$\eqalign{
B>& c_0 \m\md k^2,\qquad B>c_0 \m k^6 \h^{-1}
\cr B>& c_0 \m
\h^{-1/2} k^5,\qquad B> \m k^5\md\cr}\Eqa(A10.22)$$
%
for some $c_0>0$.

Therefore, if $h\le k$, we see that $B$ can be taken
$B=B_0 k^6 \m\h^{-1} $ for some constant $B_0>1$.
And also, recalling that $\g',\g''$ have been chosen larger than $\g$,
that $\bar \g^2>2^4K$ and \equ(A10.19) we see that
:
%
$$\eqalign{
|\F_k|_{\r_k,\x_k,\k_k,\m_0}&\le D_X
B^{k-1}(\bar \g^{-1}\x_h^2\d^2)^2{2^4K\over(\x_0\d)^4\pd}\le B^{k-1}D_\F\cr
|x^{kD}|_{\r_k,\x_k,\k_k,\m_0}
&\le B^{k-1} D_X\le B^{k-1} D_H\cr}\Eqa(A10.23)$$

Hence the inductive proof works
and we get:
%
$$\eqalign{
|x^k|_{\r_k,\x_k,\k_k,\m_0}<&c_1\m(\m\h^{-1}B_0)^{k-1}
(k-1)!^6,\cr
|H_k|_{\r_k,\x_k,\k_k,\m_0}<&    c_1\m(\m\h^{-1}B_0)^{k-1}(k-1)!^6\cr
|\F^k|_{\r_k,\x_k,\k_k,\m_0}<&c_1\m\md(\m\h^{-1}B_0)^{k-1}(k-1)!^6\cr}
\Eqa(A10.24)$$
%
for some $B_0,c_1$ being constants depending on the maximum of the
coefficients in $F,f,J,$ $J_0,$ $g_0$
in their analyticity domains as well as
on the sizes $\rb_0,\xb_0$ of the domains.

We see that the above results \equ(A10.4),\equ(A10.5) follow immediately,
{\it under the present assumption that $f$ is a trigonometric
polynomial}: and in fact we get a better bound as the remainder will be
of order
$O(e^{-b/(\h N)})$ (recall the definition of $b$ in the comment after
\equ(A10.13))
%
\ie much
smaller than what declared in \equ(A10.10). This is obtained by pushing
the perturbation analysis up to an order $s_0=b/N\h$.
%
The remainders are estimated via the
analyticity. They are of order $e^{-b/N\h}$, if
$\m=\h^c$ and $\h^{c-7} B_0 (b/N)^6<e^{-1}$.

But we still have to relax the trigonometric polynomial assumption.  We
follow the usual cut off technique to exploit the fast decay as
$|\nn|\to\io$ of the Fourier transform, which allows us to regard $f$
``almost'' as a trigonometric polynomial.

More precisely let $N_0$ be a cut off parameter so that if $f$ is
$\rb_0,\xb_0$ analytic then:
%
$$||f^{[>N_0]}||_{{\rb_0\over2}{\xb_0\over2}}<c_3
\xb_0^{-1}||f||_{\rb_0\xb_0} e^{-\xb_0 N_0/2},\qquad
||f^{[\le N_0]}||_{{\rb_0\over2}{\xb_0\over2}}<c_3 \xb_0^{-1}
||f||_{\rb_0\xb_0}\Eqa(A10.25)$$
%
for a suitable constant $c_3$.

We fix, therefore, $N_0=\h^{-1/2}$ and apply the above argument to the
hamiltonian with $f^{[\le N_0]}$ replacing $f$. Then we can perform
perturbation theory up to $s_0=b/N_0\h=b\h^{-1/2}$.
%
We construct
in this way a canonical transformation casting the hamiltonian $H_0+\m
f^{[\le N_0]}$ exactly in the form \equ(A10.5), provided
$\h^{c-10} B_0 b^6<e^{-1}$ and  $e^{-b/\sqrt{\h}}<\h^x$.
%

The same transformation
will cast the total hamiltonian (\ie with $f$ rather than $f^{[\le N_0]}$)
in the form \equ(A10.5) with a remainder which will be $O(e^{-\xb_0
N_0/2})$ $=O(e^{-c'/\sqrt{\h}})$
by \equ(A10.25). This yields \equ(A10.5).

Therefore we see that the above analysis can be carried out if $c>10$ and
if $|\b|<B^*\h^{-\s}$, $0<\s<1/2$ (which was necessary in the first part of
our discussion).

Thus, by the analyticity in $\b$,
%
the invariant tori and their whiskers, constructed for \equ(A10.1)
via lemma 1', can be expanded in powers of $\b$ and, if $\b=1$, the
power series terms are bounded, at order $k$, proportionally to
$(\bar b\h^{-\s})^{k}$, for some $\bar b>0$. The round spacing in the
whiskers ladders will be smaller than $\h^y$ for some $y>0$ provided $x$
is large enough: and by taking $x$ large enough and $\h$ small enough
we can make $y$ larger than any prefixed amount.

{\it In other words the $F$ in \equ(A10.1)  can be regarded as formally of
order $\pd$ (actually $\h^\s$ with any $\s<1/2$)
and along the line $A\in\md[\bar A,\tilde A]$ the whiskers
form a ladder with round spacing that is smaller than any power of $\h$
as $\h\to0$}.
%
\vglue1.5truecm


{\bf Appendix 11: Scattering phases shifts and intrinsic angles}

\penalty10000

\vskip0.5truecm\numsec=11\numfor=1

\penalty10000

In this section we show that if the homoclinic splitting is exponentially
small, also the scattering phase shifts are such. A fact checked, for some
even models, by explicit estimates in \S11.

To express the homoclinic angles in the intrinsic coordinates, we
consider the derivatives of $Q$, see \equ(10.7), with respect to $\aa$
at $\aa=\0$.  This means that we consider:
%
$$\eqalign{
\dpr_{\a_j}Q\=&\dpr_\pps Q\cdot\dpr_{\a_j}\pps=\cr
=&\Big[\Bigl(\dpr_p Z(p_\0,0,\pps^s_0)\,\dpr_\pps p_\pps-
\dpr_q Z(0,q_\0,\pps^u_0)\,\dpr_\pps
q_\pps\,(1+\dpr_\pps\V\s)\Bigr)+\cr
+&\sum_\nn\,i\nn\Bigl(
X^{s}_\nn-X^{u}_\nn(1+\dpr_\pps\V\s)\Bigr)\Big]\cdot\dpr_{\a_j}\pps
\cr}\Eqa(A11.1)$$
%
where $X^s=X^{sk}=X^{sk}(p_0,0,\pps^s_0),\,X^u=X^{uk}=X^{uk}
(0,q_0,\pps^u_0)$, and all the derivatives are evaluated at $\pps=\0$.

The above \equ(A11.1) is written symbolically: various indices are
omitted as the contraction rules are obvious.

For instance the derivatives with respect to $\pps$ of the $Q^0$
functions, which give what we have just called the intrinsic angles,
are given by \equ(A11.1) with $\V\s\=\0$. The second line in \equ(A11.1), with
$\V\s\=\0$ can be studied by remarking that \equ(10.2),\equ(10.3) imply:
%
$$\eqalign{
&\dpr_p Z_-(p_\0,0,\pps^s_0)\,\dpr_\pps p_\pps+\dpr_\pps
Z_-(p_\0,0,\pps^s_0)=0\cr
&\dpr_q Z_-(0,q_\0,\pps^u_0)\,\dpr_\pps q_\pps+\dpr_\pps
Z_-(0,q_\0,\pps^s_0)=0\cr \cr}\Eqa(A11.2)$$
%
Furthermore considering:
%
$$\z_s(t)=Z(p_\0e^{-gt},0,\pps_0^s+\oo t),\qquad\z_u(t)=Z(0,q_\0e^{gt},
\pps^u_0+\oo t)\Eqa(A11.3)$$
%
we get:
%
$$\dot\z_s(t)=-e^{-gt} gp_\0 \dpr_p Z +\oo\cdot\V\dpr_{\V \ps} Z,
\qquad \dot\z_u(t)=e^{gt} gq_\0 \dpr_q Z +\oo\cdot\V\dpr_{\V \ps} Z
\Eqa(A11.4)$$
%
Denoting $\dpr_{\cdot}Z|_p$ the derivatives of $Z$ at
$p=p_0,q=0,\pps=\0$ and $\dpr_{\cdot} Z|_q$ the derivative at
$q=q_0,p=0,\pps=\0$ we add and subtract terms and use \equ(A11.4) to
transform the part of the second line in \equ(A11.1) not containing the
terms proportional to $\dpr_\pps\V\s$ into:
%
\def\txt{\textstyle}
$$\eqalignno{ \txt&\txt{\dpr_p Z\over\dpr_p Z_-}\,\dpr_\pps Z_-|_p
-{\dpr_q Z\over\dpr_q Z_-}\,\dpr_\pps Z_-|_q\=\cr
\txt&\txt\=\Bigl[{\dpr_p Z\over\dpr_p Z_-}-{\dpr_q Z\over\dpr_q
Z_-}\Bigr]\, \dpr_\pps Z_-|_p+{\dpr_q Z\over \dpr_q Z_-}\,
\Bigl[\dpr_\pps Z_-|_p-\dpr_\pps Z_-|_q\Bigr] \=&\txt\eqa(A11.5)\cr
\txt&\txt\=-\Big[{\dot\z_s-\oo\cdot\V\dpr_\pps Z|_p\over
\dot\z_{s-}-\oo\cdot\V\dpr_\pps Z_-|_p}- {\dot\z_u-\oo\cdot\V\dpr_\pps
Z|_q\over \dot\z_{u-}-\oo\cdot\V\dpr_\pps Z_-|_q}\Big]\,\dpr_\pps
Z_-|_p +{\dpr_q Z\over\dpr_q Z_-}\,\Big[\dpr_\pps Z_-|_p-\dpr_\pps
Z_-|_q\Big]\cr}$$
%
which simply are the derivatives of the above introduced functions
$Q^0$ instead of the $Q$,

We can remark that $\dot \z_s=\dot\z_u$, at $t=0$, because we are at a
homoclinic point. Furthermore the denominators in \equ(A11.5) are $(-2g)$
for $\m=0$, hence they are bounded away from zero if $\m$ is small enough.
And the derivatives $\dpr_\pps Z_-|_p$ and $\dpr_q Z$ are small of order
$\m$ as $\m\to0$ (as they vanish if $\m=0$).
Hence we see that the derivatives of the homoclinic equation in the
$\aa$ coordinates are related to the derivatives in the $\pps$
coordinates by terms proportional either to the $\pps$ derivatives of the
scattering phase shifts or to the $\pps$ derivatives of the splitting itself
multiplied by $O(\m)$.

We see that this implies that if $\dpr_{\ps_i}\s_j(\0)=\0$, as in fact
we show in \S10 for even models, and if the intrinsic intersection tensor
is exponentially small then also the natural intersection tensor is
exponentially small. One can, likewise check that also the converse
holds: in general, for even models and to leading order in
$\m$, the two notions of angles coincide.

We can also deduce, in general, from the knowledge that the splitting is
exponentially small for all $\aa$, as it is the case in even models
if $l=2$ or $l\ge3$
and all rotations fast, as shown in \S11, that the scattering phase
shifts must be exponentially small (as we can ``compute" them by
difference from \equ(A11.1), and as we can infer, by dimensional bound
that exponentially small in $\aa$ implies exponentially small in
$\pps$ (because the $\aa$ are analytic in the $\pps$ and viceversa).
%
\vglue2.truecm

\penalty-200

{\bf Appendix 12: Compatibility. Homoclinic identities}

\penalty10000

\vskip0.5truecm\numsec=12\numfor=1

\penalty10000

\def\thth{{\V \theta}}\def\2{{1\over2}}
%
{\bf 1}) Here we want to check directly that $\V F^k_{\su\i}$ has
vanishing mean value, which is a crucial fact in the derivation of the
main equations of \S6.

More precisely, {\it assume} that, for $1\le j\le k-1$,
$\V X^j(t)$ in \equ(6.11) has the form  $\V X^j(t)= \V X^j(\oo_{su} t,t)$,
with $\V X^j(\pps,t)$ periodic in $\pps$ and $\dpr_t \V X^j(\pps,t)$
converging to $0$
exponentially fast as $t \to \i$ and  recall that, if $\aa$ is the point
over which we construct the whisker, then  $\V X^0$ $=$
$(I^0(t),\f^0(t),\AA,\aa+\V \th(t))$ so that:
%
$$\V X^0(\ps,\io)\=
\V X^0_\i(\pps) = (0,0,\AA,\pps+\V \th(\i))\Eqa(A12.1)$$
%
(the limit $\V \th(\i)$ being reached at an exponential rate).
Then, for $1\le j\le k$, $\V F^j(t)$, which is defined in terms of the
$\V X^i$, $0\le i\le j-1$ [see \equ(6.10)], has also the form
$\V F^j(t)=\V F^j(\pps+\oo_{su}t,t)$ (with
$\dpr_t \V F^j(\pps,t) \to 0$ as $t\to\i$ exponentially fast) and
the limits $\V X^j(\pps,\i) \= \V X^j_\i(\pps)$ ($j\le k-1$)  and
$\V F^j(\pps,\i) \= \V F^j_\i(\pps)$ ($1\le j\le k$)are well
defined.

We then show that {\it from the above assumptions it follows} that:
%
$$\ii \V F^k_{\i\su}(\pps)  d\pps = \0\Eqa(A12.2)$$
%
(recall that the subscripts $(\cdot)_+,(\cdot)_-,(\cdot)_\su,
(\cdot)_\giu$ denote projections onto, respectively, $I$,$\f$,
$\AA$,$\aa$-coordinates). The argument is adapted from the similar
argument in [CZ].

>From the definitions of \S6 it is clear that the function $\V F^k_\io(\aa)$
is given by:
%
$$\V F^k_{\io\su}\=-[\Dpr f(\sum_{j=0}^{k-1}\m^j \V X^j_\io)]_{k-1}=
[\Dpr H(\sum_{j=0}^{k-1}\m^j \V X^j_\io)]_k\Eqa(A12.3)$$
%
where $[\cdot]_k$ denotes the $k$-th order coefficient of a power series in
$\m$; $\V \dpr$ $\=$ $\dpr_\aa$; $H=H_0+\m f$ is the hamiltonian and the
sums are the arguments of the functions $\Dpr f,\Dpr H$.

On the other hand the function $Y(\pps)=\sum_{j=0}^{k-1}\m^j
X^j_\io(\pps)$ is such that $Y(\oo_{su}t)$ verifies the Hamilton
equations up to order $k-1$ in $\m$, \ie if $D$ is the operator
$D=\oo_{su}\cdot\Dpr$ (where, of course, $\V \dpr$ stands now for
$\dpr_{\V \ps}$):
%
$$\dot Y\=D Y=E\dpr H(Y)\qquad{\rm up\ to\ order\ }
k-1\Eqa(A12.4)$$
%
where $\dpr=(\dpr_I,\dpr_\f,\dpr_\AA,\Dpr)$ and $E$ is the
obvious symplectic matrix (so that $E^2=-1$): $\dpr H(Y)$ is
$\dpr H$ evaluated at $Y(\pps)$ and $D Y$ as well as $Y$ are
evaluated at $\pps$, with $\pps=\oo t$.

We rewrite \equ(A12.4) as:
%
$$E\,D Y+\dpr H(Y)=0\qquad{\rm up\ to\ order\ }k-1
\Eqa(A12.5)$$
%
and we note the identities:
%
$$\ig \dpr Y(\pps)\cdot ED Y(\pps)\,d\pps\=0,\qquad \ig \dpr
Y(\pps)\cdot \dpr H(Y(\a))\,d\pps=\V0 \Eqa(A12.6)$$
%
valid for {\it any} periodic function $Y(\pps)$ of $\pps$; hence in our
case they are valid to {\it all} orders in $\m$. They are trivial
consequences of the periodicity in $\pps$ of $Y(\pps)$ and of the peculiar
structure of $E$ or,  in the case of the second, of the remark that the
integrand is just the $\Dpr$ of $H(Y(\pps))$.

Therefore we have:
%
$$\ig \dpr Y\cdot(EDY+\dpr H(Y))\,d\pps=0
\Eqa(A12.7)$$
%
to all orders in $\m$. We shall write it explicitly to order $k$: remarking
that $Y$ has order $k-1$ and that \equ(A12.5) holds up to order $k-1$,
this gives:
%
$$\ig[\dpr Y]_0\cdot[\dpr H(Y)]_k=0\Eqa(A12.8)$$
%
but $[Y(\pps)]_0=(0,0,\AA,\pps+\V\th_\io)$ so that \equ(A12.8)
becomes simply \equ(A12.2).

\vskip.3truecm
{\bf 2}) In fact, the above method can be used to prove other
interesting {\it homoclinic identities}, namely:
%
$$\eqalignno{
-2 \ig_0^\io&  F^k_{\su \V 0 i}(t) dt=
\sum_{j=1}^{k-1}\media{ X^j_+(\cdot,\io)
\dpr_{\ps_i} X^{k-j}_-(\cdot,\io) +\V X^j_\su(\cdot,\io) \cdot
\dpr_{\ps_i}X^{k-j}_\giu(\cdot,\io)} + \cr
&-\sum_{j=1}^{k-1}\media{ X^j_+(\cdot,0)
\dpr_{\ps_i} X^{k-j}_-(\cdot,0) +\V X^j_\su(\cdot,0) \cdot
\dpr_{\ps_i}X^{k-j}_\giu(\cdot,0)}\cr
2\ig_0^{+\io}&w(\t)F^k_{\0}(\t)=w_\su(0)\cdot X^k_{\su\0}(0)&
\eqa(A12.9)\cr}$$
%
where $\V F^k_{\su \V 0} \= (F^k_{\su \V 0 1},\ldots,F^k_{\su \V 0
l-1})$, and $w(t)$ is defined in \equ(6.36).

To check this identity we use that $X(\pps+\oo t;t,\aa)\=X(\pps+\oo
t,t)$ describes for all $\pps$ a motion on the stable
whisker and in particular satisfies the Hamilton equation:
%
$${d\over dt}\ X^j(\pps+ \oo t, t) = L(t) X^j(\pps+ \oo t, t) + F^j(\pps+
\oo t,t)\Eqa(A12.10)$$
%
which, performing the $t$-derivative and using the arbitrariness
of $\pps$, can be rewritten as:
%
$$E\  D X^j(\pps,t) + E \ \dpr_t X^j(\pps,t) + \left[
\dpr H \big(\sum_{i=0}^j \m^i
X^i(\pps,t)\big)\right]_j = 0\ ,\qquad (0\le j\le k-1)\Eqa(A12.11)$$
%
Similarly to above, let $Y(\pps,t) \= \sum_{j=0}^{k-1} \m^j
X^j(\pps,t)$, then by \equ(A12.6) we see that:
%
$$\ig \dpr_{\ps_i} Y \cdot \big( E \ DY + E\ \dpr_t Y + \dpr H(Y)\big)
d \pps = \ig \dpr_{\ps_i} Y \cdot E \ \dpr_t Y d \pps\Eqa(A12.12)$$
%
to all orders in $\m$; and taking the order $k$ in $\m$, using \equ(A12.10),
\equ(A12.11), one recognizes:
%
$$-F^k_{\su \V 0 i} (t) = \sum_{j=1}^{k-1} \langle
\dpr_{\ps_i} X^j \cdot E \ \dpr_t X^{k-j} \rangle\Eqa(A12.13)$$
%
Finally one integrates the latter identity between $0$ and $\io$:
the integral can be performed by parts first in $t$ and then in $\ps_i$ and,
after changing $j$ to $k-j$ and using $E^T=-E$, the \equ(A12.9)
is easily obtained.

The second identity is obtained in the same way by multiplying
by $\dot Y(t)$ the expression $E\dot Y(t)+\dpr H(Y(t))$, which vanishes
up to order $k-1$, and by
integrating from $0$ to $t$. On obtains the variation $H(Y(t)-H(Y(0))$.
Writing the above identity to order $k$ and using that the energy $H(Y)$
is conserved up to order $k-1$ we get:
%
$$\ig_0^t\dot X^0(\t)\big[\dpr H(Y(\t))\big]_kd\t=\Big[\dpr
H_0(X^0(\t))\,X^k(\t)\Big]\big|_0^t\Eqa(A12.14)$$
%
We can write the Hamilton equations in the form $\dot X=E\dpr H$ so that
the above relation is (using $\dot X^0=E w+ E\dpr h$, and \equ(6.36)
and $\big[E\dpr H(Y)\big]_k\=F^k$ and $\dpr H_0 X^k\=w X^k+\oo\cdot
X^k_\su$):
%
$$-\ig_0^t w(\t)F^k_\0(\t)d\t=\media{w(t)
X^k(t)-w(0)X^k(0)}+\oo\cdot\media{X^k_\su(t)-X^k_\su(0)}\Eqa(A12.15)$$
%
(which would hold even without the averages over $\pps$).
Hence, using $\dot X^k_\su=F^k_\su$ and the second of \equ(6.34) and
\equ(6.36) [which tell us that $w(0)X^k(0)=-\ig_0^\io w(\t)F^k(\t)$] we
get \equ(A12.9). In the special case considered in \S9 it is
$w_\su(\t)\=\0,\,w_\giu(\t)\=\0,$  $w_+(\t)\=\dot\f_0(\t)=\dot\f_0(0)
w_{-}(\t)$ and we get:
%
$$2\ig_0^\io w_{-}(\t)F^h_{+\0}(\t)d\t=-2\dot\f(0)^{-1}
\ig_0^\io\oo\cdot F^k_{\su\0}(\t)\Eqa(A12.16)$$
%
\vglue2.truecm

\penalty-200

{\bf Appendix 13 Second (and third) order whiskers and phase shifts.}

\penalty10000

\vskip0.5truecm\numsec=13\numfor=1

\penalty10000

\def\II{{\cal I}}\def\OO{{\cal O}}\def\IId{{\lis\II^{\,2}}}
\def\FFF{{\cal F}}
\def\mm{{\V\m}}\def\eln#1{{e^{i\l\nn\cdot\oo\,#1}}}
\def\erm#1{{e^{i\r\V\m\cdot\oo\,#1}}}
\def\Eln{{E^\l_\n}}\def\Erm{{E^\r_\m}}
\def\el#1#2{{e^{i\l_#1\nn_#1\cdot\oo\,#2}}}
\def\El#1{{E^{\l_#1}_{\n_#1}}}
%
{\bf 1)} Here we prove $c)$ of theorem 3, \S 10.

Consider the hamiltonian:
%
$$H=\oo\cdot\V
A+{1\over2}J^{-1}\AA\cdot\AA+{I^2\over2J_0}+g_0^2J_0(\cos\f-1)+\m\sum_\n
f_\n\cos(\aa\cdot\nn+n\f)\Eqa(A13.1)$$
%
with $\n\=(n,\nn), \,\nn\ne\0$, $J_0,g_0,f_\n$ constants and $J^{-1}$ being
a constant diagonal matrix.

Then $F^h_-,F^h_\giu$ vanish identically and:
%
$$\eqalign{
F^1_+=&-\dpr_\f f,\qquad F^1_\su=-\dpr_\aa f\cr
F^2_+=&-{J_0g_0^2\over2}\sin\f\,
(X^1_-)^2-\dpr_{\f^2} f X^1_- -\dpr_{\f\aa} f X^1_\giu\cr
F^2_\su=&-\dpr_{\aa\f}f X^1_--\dpr_{\aa^2}f X^1_\giu\cr
F^3_+=&-{J_0g^2_0\over2}\,2X_-^1X_-^2\sin\f-{J_0g_0^2\over
3!}\cos\f(X^1_-)^3-\dpr_{\f^2}f\,X_-^2+\cr
&-\dpr_{\aa\f} f
X^2_\giu-{1\over2}\dpr_{\f\aa^2}fX^1_\giu X^1_\giu-\dpr_{\f^2\aa}f
X^1_\giu X^1_--{1\over2}\dpr_{\f^3}f\,(X^1_-)^2\cr
F^3_\su=&-\dpr_{\aa\f}fX^2_--\dpr_{\aa^2}f
X_\giu^2-{1\over2}\dpr_{\aa^3}f X^1_\giu
X^1_\giu-\dpr_{\f\aa^2}f\,X^1_\giu X^1_--
{1\over2}\dpr_{\aa\f^2}f X^1_-X^1_-\cr}
\Eqa(A13.2)$$
%
where $\f$ in $\cos \f,\,\sin\f$ is $\f\=\f(t)\=\f_0(t)$, see
\equ(A9.5), \ie it is the unperturbed separatrix motion.

To compute the $X^h$ we define the operator $\II$ as in \S10 and
we introduce the following operators:
%
$$\eqalign{
\IId F(t)=&\II^2F(t)-\II^2 F(0^\s)\cr
\OO F(t)= &w_{21}(t)\bigl(\II \,w_{22}(\t)
F(\t)\bigr)(t)-w_{22}(t)\bigl(\II
w_{21}(\t)F(\t)\bigr)|^t_{0^\s}\cr
\OO_+ F(t)=& w_{11}(t)\bigl(\II \,w_{22}(\t)
F(\t)\bigr)(t)-w_{12}(t)\bigl(\II
w_{21}(\t)F(\t)\bigr)|^t_{0^\s}\cr}\Eqa(A13.3)$$
%
where the notation of \equ(10.13) is used, and $\s=\sign(t)$. Then:
%
$$\eqalignno{
X^1_-(t)=&w_{21}(t)X^1_+(0)+w_{21}(t)\ig_0^t w_{22}(\t) F^1_+(\t)
d\t-w_{22}(t)\ig_0^t w_{21}(\t) F^1_+(\t)\,d\t=\cr
=&w_{21}(t)\ig_{\s\io}^t w_{22}(\t) F^1_+(\t)d\t-w_{22}(t)\ig_0^t w_{21}(\t)
F^1_+(\t)d\t=\OO F_+^1(t)\cr
X^1_+=&w_{11}(t)\ig_{\s\io}^t
w_{22}(\t)F^1_+(\t)d\t-w_{12}(t)\ig_0^t w_{21}(\t)
F^1_+(\t)d\t=\OO_+ F^1_+(t)&\eqa(A13.4)\cr
X^1_\giu(t)=& J^{-1}\IId F^1_\su(t),\qquad
\qquad X^1_\su(t)=\II F_\su^1(t)\cr}$$
%
where we used the boundedness criterion of \S6 to eliminate the
exponentially, or linearly (in the case of $X_\giu$), divergent terms
and to find the initial conditions.

In fact \equ(A13.4)
{\it can be immediately generalized to arbitrary order}:
%
$$\eqalign{X^h_-(t)=&\OO F^h_+(t),\qquad\qquad X^h_+(t)=\OO_+ F^h_+\cr
X^h_\giu(t)=&J^{-1}\IId F^h_\su(t),\qquad X^h_\su(t)=\II F^h_\su(t)\cr}
\Eqa(A13.5)$$
%

More explicitly:
%
$$\eqalign{
X^1_-=&\OO(-\dpr_\f f),\qquad X^1_\giu=J^{-1}\IId (-\dpr_\aa f)\cr
X^2_-=&\OO\Big(-{J_0g_0^2\over2}\sin\f\,(X_-^1)^2\Big)+\OO\Big(-\dpr_{\f^2} f
X^1_--\dpr_{\aa\f} f\cdot X^1_\giu\Big)\cr
X^2_\giu=&J^{-1}\IId\Big(-\dpr_{\aa^2}f X^1_\giu-\dpr_{\f\aa}f X^1_-\Big)
\cr}\Eqa(A13.6)$$
%
leading to:
%
$$\eqalignno{
X_\su^1(t)=&-\II(\dpr_\aa f)\cr
X^2_\su(t)=&-\II\Big(\dpr_{\aa\f} f\,\OO(-\dpr_\f f)\Big)-
\II\Big(\dpr_{\aa^2} f
J^{-1}\IId(-\dpr_\aa f)\Big)&\eqa(A13.7)\cr
X^3_\su(t)=&-\II\Big(\dpr_{\aa\f} f\OO\big(
-{J_0 g_0^2\over2}\sin\f\OO(\dpr_\f
f)\OO(\dpr_\f f)\big)\Big)+\cr
&-\II\Big(\dpr_{\aa\f} f\OO\big(\dpr_{\f^2} f\OO(\dpr_\f f)\big)\Big)
-\II\Big(\dpr_{\aa\f}f\OO\big(\dpr_{\aa\f} f\, J^{-1}\IId
(\dpr_\aa f)\big)\Big)+\cr
&-\II\Big(\dpr_{\aa^2}fJ^{-1}\IId\big(\dpr_{\aa^2} f J^{-1}\IId (\dpr_\aa
f)\big)\Big)-
\II\Big(\dpr_{\aa^2}f\,J^{-1}\IId\big(\dpr_{\aa\f}
f \OO(\dpr_\f f)\big)\Big)+\cr
&-{1\over2}\II\Big(\dpr_{\aa^3}\,fJ^{-1}\IId(\dpr_\aa f) J^{-1}
\IId(\dpr_\aa f)\Big)
-\II\Big(\dpr_{\f\aa^2}f J^{-1}\IId(\dpr_\aa f)\OO(\dpr_\f f)\Big)+\cr
&-{1\over2}\II\Big(\dpr_{\aa\f^2} f\OO(\dpr_\f f)\OO(\dpr_\f f)\Big)\cr}$$
%

We shall see that, setting $\Eln(t)\=\eln{t}e^{i\l n\f(t)}$, with
$\l=\pm1$, the splitting and the phase shifts can be expressed in terms
of the matrices:
%
$$\eqalignno{
&H^1_{\l\n}(t)={1\over2i\l}\II(\Eln)(t)\cr
&H^2_{\l\n\r\m}={1\over2^2i\r}\bigl(nm\II\Eln\OO(\Erm)+\mm\cdot J^{-1}
\nn\,\II(\Eln\IId\Erm)\bigr)\cr
&H^3_{\l_1\n_1\l_2\n_2\l_3\n_3}={\nn_1\cdot J^{-1}
\nn_2\,\nn_2\cdot J^{-1}
\nn_3\over 2^3 i\l_3}\II(\El1 \IId(\El2\IId\El3))+&\eqa(A13.8)\cr
&+{1\over2}{\nn_1\cdot J^{-1}\nn_2\,\nn_1\cdot J^{-1}
\nn_3\over2^3i\l_1\l_2\l_3}
\II\Big(\El1(\IId\El2)\,(\IId
\El3)\Big)+\cr
&+{n_2n_3\nn_1\cdot J^{-1}\nn_2\over2^3i\l_3}
\II\Big(\El1\IId(\El2\OO\El3)\Big)
+{n_1n_2\nn_2\cdot J^{-1}\nn_3\over2^3i\l_3}\II\Big(\El1\OO(\El2
\IId\El3)\Big)+\cr
&+{n_1n_3\nn_1\cdot J^{-1}\nn_2\over2^3i\l_1\l_2\l_3}\II\Big(\El1
\IId(\El2\OO(\El3))\Big)+\cr
&-{n_1n_2n_3\over
2^3\l_2\l_3}\II\Big(\El1\OO(-{J_0g_0^2\over2}\sin\f\,\OO(\El2)\OO(\El3))
\Big)+\cr
&+{n_1n_2^2n_3\over2^3 i\l_3}\II\Big(\El1\OO(\El2\OO(\El3))\Big)+
{n_1^2n_2n_3\over2}{1\over2^3i\l_1\l_2\l_3}\II\Big(
\El1\OO(\El2)\OO(\El3)\Big)\cr}$$
%
where $\m\=(m,\V\m)$ each of the addends above arises from a
corresponding one in \equ(A13.7): if we label $12345678$ the addends in
\equ(A13.7) then they generate the r.h.s of \equ(A13.8) in the order
$46537128$.

In fact one finds:
%
$$\eqalignno{
\V Q_\su^1(\aa)=&\sum_\s\s\sum_{\l\n}\nn f_\n e^{i\l\nn\cdot\aa}
H^1_{\l\n}(0^\s)\cr
\V Q_\su^2(\aa)=&\sum_\s\s\sum_{\l\n\r\m}\nn f_\n f_\m e^{i(\l\nn+\r\mm)
\cdot\aa}H^2_{\l\n\r\m}(0^\s)&\eqa(A13.9)\cr
\V Q_\su^3(\aa)=&\sum_\s\s\sum_{\l_i\n_i}\nn_1(\prod f_{\nn_i}) e^{i\sum
\l_i\nn_i\cdot\aa}H^3_{\l_1\n_1\l_2\n_2\l_3\n_3}(0^\s)\cr}$$
%
Note that the $\V Q_\su$ functions are naturally expressed in terms of
the $\aa$ variables in \equ(A13.9), and the $\pps$ variables of the
definition \equ(10.7) do not appear explicitly.

The scattering phase shifts are related to the time average of $-\V X^h_\giu$,
see \equ(10.30),\equ(10.31), so that $\V\s^h[\aa]\=-J^{-1}\sum_\s
(\II^2 F_\su)(0^\s)\s$:
%
$$\eqalign{
\V\s^1[\aa]=&-J^{-1}\sum_\s\s\sum_{\l\n}\nn f_\n e^{i\l\nn\cdot\aa}
\II(H^1_{\l\n})(0^\s)\cr
\V\s^2[\aa]=&-J^{-1}
\sum_\s\s\sum_{\l\n\r\m}\nn f_\n f_\m e^{i(\l\nn+\r\mm)
\cdot\aa}\II(H^2_{\l\n\r\m})(0^\s)\cr
\V\s^3[\aa]=&-J^{-1}\sum_\s\s\sum_{\l_i\n_i}\nn_1(\prod f_{\n_i})
e^{i\sum\l_i\nn_i\cdot\aa}\II(H^3_{\l_1\n_1\l_2\n_2\l_3\n_3})(0^\s)\cr}
\Eqa(A13.10)$$
%

To study the $H$ matrices we introduce:
%
$$\eqalign{
\e^{n\l}(t)=&e^{i\l\f(t)n}-1,\qquad\kern1.truecm
\sin n\f(t)\,\sinh gt=2n+\g_n(t)\cr
\bar\e^n(t)=&\e^{n\l}(t)-i\l\sin n\f(t)\=\cos n\f(t)-1\cr
}\Eqa(A13.11)$$
%
with $\bar\e^n,\g_n\tende{t\to\pm\io}0$ faster than $e^{-g\,\s t}$ by
$\simeq e^{-g\,\s t}$. And we see that:
%
$$\sum_\s H^1_{\l\n}(0^\s)\s=
\sum_\s{1\over2i\l}\II\big(\eln\t(1+\e^{n\l}(\t))\big)(0^\s)={1\over2i\l}
\ig_{+\io}^{-\io}\e^{n\l}(\t)\eln\t d\t\Eqa(A13.12)$$
%

Also $4i\r\sum_\s\s H^2_{\l\n\r\m}$ is given by:
%
$$\eqalign{
(\nn J^{-1}\cdot\mm)&\ig_{+\io}^{-\io}\Bigl({\e^{n\l}(t)\over(i\r\oo\cdot\mm)^2}
+{\e^{m\r}(t)\over(i\l\oo\cdot\nn)^2}\Bigr)e^{i(\l\nn+\r\mm)\cdot\oo t}dt+\cr
-(\nn J^{-1}\cdot\mm)&\ig_{+\io}^{-\io}\Bigl({\e^{n\l}(t)e^{i\l\nn\cdot\oo t}
\over(i\r\oo\cdot\mm)^2}
+{\e^{m\r}(t)e^{i\r\mm
\cdot\oo t}\over(i\l\oo\cdot\nn)^2}\Bigr)dt+\cr
+(\nn J^{-1}\cdot\mm)&(\ig_{+\io}^{-\io}
\e^{n\l}(t)\eln{t} dt)(\ig_{+\io}^0\e^{m\r}(t)\erm{t} t\,dt)+\cr
+(\nn J^{-1}\cdot\mm)&(\ig_{+\io}^{-\io}
\e^{m\r}(t)\erm{t} dt)(\ig_{-\io}^0\e^{n\l}(t)\eln{t} t\,dt)+\cr
+(\nn J^{-1}\cdot\mm)&\ig_{+\io}^{-\io}\e^{n\l}(t)\eln{t}\ig_{+\io}^t\erm\t
(t-\t)\e^{m\r}(\t)\,d\t+ 4i\r\V \D^2\cr}\Eqa(A13.13)$$
%
where $\V\D^2$ is the same expression evaluated at $J^{-1}=0$.

If $\tilde w^n_\l(t)\=w^0_{21}(t)e^{in\l\f(t)}+ic\l\g_n(t)+\bar\e^n(t)
c\sinh gt$ (with $w^0_{12}\=c gt(\cosh gt)^{-1}$ and $c=(2J_0g_0)^{-1}$, see
\equ(10.13) and \equ(A9.8)), $\V\D^2$ is:
%
$$\eqalignno{&
{4i\r\V\D^2\over nm}=\ig_{+\io}^{-\io}\ig_{+\io}^t
\bigl(\eln{t}\tilde w_\l^n(t)w_{22}(\t)\Erm(\t)-\erm\t\tilde w^m_\r(\t)
w_{22}(t)\Eln(t)\bigr)\,d\t d t+\cr&
-(\ig_{+\io}^{-\io}
w_{22}(\t)\Erm(\t)d\t)(\ig_{0}^{-\io}\eln{t}\tilde w_\l^n(t) dt)+
\cr&-(\ig_{+\io}^{-\io}
w_{22}(\t)\Eln(\t)d\t)(\ig_{0}^{+\io}\erm{t}\tilde w^m_\r(t) dt)+\cr
&-{c\over2}\ig_{+\io}^{-\io}\Bigl[
\bigl({\eln{t}e^{gt}-1\over i\l\oo\cdot\nn+g}
-{\eln{t}e^{-gt}-1\over i\l\oo\cdot\nn-g}
\bigr)w_{22}(t)\Erm(t)\Bigr]\, dt+\cr&
-{c\over2}\ig_{+\io}^{-\io}\Bigl[
\bigl({\erm{t}e^{gt}-1\over i\r\oo\cdot\mm+g}
-{\erm{t}e^{-gt}-1\over i\r\oo\cdot\mm-g}
\bigr)w_{22}(t)\Eln(t)\Bigr]\, dt+&\eqa(A13.14)\cr&
-\ig_{+\io}^{-\io}
w_{22}(t)
\Bigl({2ni\l c\over i\l\nn\cdot\oo}\Erm(t)(\eln{t}-1)
+{2mi\r c\over i\r\mm\cdot\oo}\Eln(t)(\erm{t}-1)\Bigr)\,dt\cr}
$$
%
It is easy to see that the above expressions \equ(A13.13),\equ(A13.14)
{\it are symmetric in the exchange} $\l\,\n\otto\r\,\m$. Furthermore
the terms with $e^{\pm gt}$ are slightly improperly written as the $\II$
operation is not exactly an ordinary integral as such terms contain
quantities which oscillate at $\io$. To write them correctly we introduce
$e^{in\l\f(t)}\tanh gt\=\th_n^\l(t)+\tanh gt$. Recalling that $w_{22}=1/\cosh
gt$ and setting $\O\=(\l\nn+\r\mm)\cdot\oo$, we see that the (four) just
mentioned terms can be written as:
%
$$\eqalign{&
-c\ig_{+\io}^{-\io} e^{i\O t}\Bigl(-
{i\r\mm\cdot\oo\over\oo\cdot\mm^2+g^2}\th_n^\l(t)+{g(e^{i\f(t)n\l}-1)\over
\oo\cdot\mm^2+g^2}+\,(\l\n\otto\r\m)\Bigr)+\cr
&+c\sum_\s \s \II\Big( e^{i\O t}\Bigl(
{i\r\mm\cdot\oo\over\oo\cdot\mm^2+g^2}+
{i\l\nn\cdot\oo\over\oo\cdot\nn^2+g^2}\Bigr)\tanh
gt\Bigr)(0^\s)\cr}\Eqa(A13.15)$$
%
and the last row can be explicitly computed in terms of the
coefficients of the series: $(1-x^2)(1+x^2)^{-1}\=\sum_{k=0}^\io t_k
x^{2k}$:
%
$$-2c\Bigl({i\r\mm\cdot\oo\over\oo\cdot\mm^2+g^2}+
{i\l\nn\cdot\oo\over\oo\cdot\nn^2+g^2}\Bigr)\sum_{k=1}^\io t_k
{2i\O\over\O^2+4g^2k^2} \Eqa(A13.16)$$
%

The above formulae show that, if $l=2$, $\dpr_\aa^p \V Q_\su^2(\0)$ is
exponentially small if $\o\to\io$. This, together with the results of \S9,
implies immediately that also $\dpr^p\V\s^2|_{\a=0}$ are exponentially
small, see also appendix A11.

If $l=2$ the homoclinic splitting has first derivative exponentially
small to all orders and higher derivatives exponentially small at least
to second order; the same holds for $l>2$ if $\oo$ is given by the
first of \equ(10.9). In fact, by \S9 this holds to all orders.

Before proceeding to a higher order calculation we examine a case with
$l>2$ and $\oo$ given by the second of \equ(10.9): \ie a mixed case in
which $\o_1$ is fast and $\o_j,\,j>1$ are slow.

We fix our attention on the simple model:
%
$$f(\aa,\f)=f_1\cos(\n_1\a_1+n_1\f)+f_2\cos(\n_2\a_2+n_2\f)\Eqa(A13.17)$$
%
which has only two {\it modes} $\nn$: $\nn=\V\th_1=\n_1(1,0)$ and
$\nn=\V\th_2=\n_2(0,1)$.

In the latter case, assuming
$\o_1=\bar\o_1\h^{-1/2},\,\o_2=\bar\o_2\h^{1/2}, g=O(1)$, we study the first
derivatives of $\V Q_\su(\aa)$ to second order.

If we study the $\a_1$ derivatives of $\V Q_\su(\aa)$ we see that we
must consider in \equ(A13.9) only modes $\mm,\nn$ such that
$\l\n_1+\r\m_1\ne0$.  Since the case $\nn=\mm=\V\th_1$ has to be
discarded because it gives exponentially small contributions to the
\equ(A13.14) we see that the only terms that are not obviously
contributing exponentially small quantities are pairs $\nn,\mm$ with
$\l\n_1+\r\m_1\ne0$, $\nn\cdot\mm=0$.  So that only the part
$\V\D^2$ can contribute to the $\a_1$ derivatives of $\V
Q_\su^2(\aa)$ in \equ(A13.14), at $\aa=\0$.

If we consider \equ(A13.14),\equ(A13.15) we see that many terms
are exponentially small as $\o_1\to\io$ if $\l\n_1+\r\m_1\ne0$.  The
part of $\tilde\D^2$ of $\V\D^2$ which is not obviously exponentially
small as $\o_1\to\io$ corresponds to:
%
$$\eqalign{
{4i\r\tilde \D^2_{\l\nn\r\mm}\over nm}
=&-(\ig_{+\io}^{-\io}
w_{22}(t)\Erm(t)dt)(\ig_0^{-\io}\eln\t\tilde w_\l^n(\t)d\t)+\cr
&-(\ig_{+\io}^{-\io}w_{22}(t)\Eln(t)dt)(\ig_0^{+\io}\erm\t\tilde
w^m_\r(\t)d\t)+\cr
&+cg\ig_{+\io}^{-\io}w_{22}(t)({\Erm(t)\over \oo\cdot\nn^2+g^2}
+{\Eln(t)\over \oo\cdot\mm^2+g^2})\,d\t+\cr
&+2c\ig_{+\io}^{-\io}w_{22}(t)({n\Erm(t)\over \oo\cdot\nn}
+{m\Eln(t)\over \oo\cdot\mm})\,d\t+\cr
&-2c\Bigl({i\r\mm\cdot\oo\over\oo\cdot\mm^2+g^2}+
{i\l\nn\cdot\oo\over\oo\cdot\nn^2+g^2}\Bigr)\sum_{k=1}^\io t_k
{2i\O\over\O^2+4g^2k^2}\cr}\Eqa(A13.18)$$
%

We look for $\dpr_{\a_h}Q^2_{\su k}(\aa)|_{\aa=\0}\=\d_{hk}$ and we see that
\equ(A13.18) contributes, taking into account the parity properties of
the integrals, $\sum 2^{-2}\n_k\m_h4i\r\tilde\D^2_{\l\nn\r\mm}$.  Hence
for $h=1$ it must be $\mm=\V\th_1$ and $\nn=\V\th_2$, otherwise we se
that \equ(A13.18) contributes exponentially small quantities.
Therefore up to exponentially small terms $\d_{12}$ is given by:
%
$$\eqalignno{
\d_{12}=&-\n_1\n_2\f_1f_2\Big(n_2\ig_{+\io}^{-\io} w_{22}\cos(\n_2\o_2
t+n_2\f(t))\Big)\cdot&\eqa(A13.19)\cr
&n_1\Big[\ig_0^{+\io}2^{-1}\sum_\r
e^{i\o_1\n_1\r t}\tilde w^{n_1}_\r(t)\,dt
-c{g\over\n_1^2\o_1^2+g^2}-{2n_1c\over\n_1\o_1} d\t\Big]=\cr
=&-\n_1\n_2f_1f_2c\Big(\ig_{+\io}^{-\io}n_2
w_{22}\cos(\n_2\o_2 t+n_2\f(t)) dt\Big)\cdot n_1
\Big[-{g\over\n_1^2\o_1^2+g^2}-{2n_1\over\n_1\o_1}-\cr
&-\ig_{+\io}^0
\big({gt\over \cosh gt}\cos(\n_1\o_1t+n_1\f)-\g_{n_1}(t)\sin\n_1\o_1 t+
\bar\e^{n_1}(t)\sinh gt\cos\n_1\o_1t\big)dt\Big]=\cr
=&\d_{21}=2(-1)^{n_1}
\,c f_1f_2\,{\n_2\o_2\over g\n_1^2\o_1^2}\n_1n_1\n_2n_2K_{n_2}\cr}$$
%
where $c\=1/(2 g_0 J_0)$ and the equalities holds up to exponentially
small terms as $\o_1\to\io$ except the last which holds up to a factor
$1+O(g/\o_1+\o_2/g)$, and:
%
$$K_n\=\ig_{-\io}^\io t{\sin n\f_0(t)\over\cosh t} dt,\qquad
\f_0(t)\=4\atan e^{-t}\Eqa(A13.20)$$
%
The symmetry $\d_{hk}=\d_{kh}$ is a direct consequence of the symmetry
remarked after \equ(A13.14) above; the asymptotic analysis is made
easier if one remarks that:
%
$$\eqalign{
\g_n(0)=&-2n,\qquad \ig_{-\io}^{+\io}(\cos n\f)(\cosh gt)^{-1}dt=0\cr
\ig_{-\io}^{+\io}&t{\sin \f_0(t)\over\cosh t}dt=2,\qquad
\ig_{-\io}^{+\io}t{\sin 2\f_0(t)\over\cosh t}dt={10\p\over3}\cr}
\Eqa(A13.21)$$
%
where the last two formulae are useful in the applications of \S13.

The above formulae show that, if $l=2$, $\dpr_\aa^p \V Q_\su^2(\0)$ is
exponentially small if $\o\to\io$. This, together with the results of \S9,
implies immediately that also $\dpr^p\V\s^2|_{\a=0}$ are exponentially
small, see also appendix A11.

If $l=2$ the homoclinic splitting has first derivative exponentially
small to all orders and higher derivatives exponentially small at least
to second order; the same holds for $l>2$ if $\oo$ is given by the
first of \equ(10.9). In fact, by \S9 this holds to all orders.

Before proceeding to a higher order calculation we examine a case with
$l=3$ and $\oo$ given by the second of \equ(10.9): \ie a mixed case in
which $\o_1$ is fast and $\o_2$ is slow, but we do not suppose the
orthogonality between the slow and fast modes.  More precisely we
assume that $\o_1=\bar \o_1 \h^{-1/2}$, $\o_2=\bar \o_2 \h^{1/2}$,
$g=O(1)$; we shall also assume here, for simplicity, that $f$ is a
trigonometric polynomial \ie $f_\n=0$ if $|\n|> N$ for a suitable
$N>0$.

Calling $\FFF=\{\m:\mm\cdot\oo=O(\h^{-1/2})\}$ $=$ ``fast modes" and
${\cal S}=\{\m:\mm\cdot\oo=O(\h^{1/2})\}$ $=$ ``slow modes", the above
formulae yield easily that the terms in \equ(A13.13)\equ(A13.14) with $\m,\n$
both fast give an exponentially small contribution and that the leading
contribution to $\d_{12}$ is given by:
%
$$ \d_{12}=\d_{21}=\sum_{\m\in
\FFF,\nn\in{\cal S}} {\m_1 \n_2 (-1)^m\over g \mm\cdot\oo^2} [-(\nn\cdot
J^{-1} \mm) \hat K_n + 2 nm c K_n \nn\cdot \oo] \Eqa(A13.22)$$
%
where $c\=(2g_0J_0)^{-1}$, and:
%
$$ \hat K_n\= \ii_{-\io}^\io (\cos n \tilde
\f -1)du\ ,\quad K_n\= \ii_{-\io}^\io { u \sin n \tilde \f\over \cosh
u} du\ ,\quad \tilde \f(u)\= 4 \arctan e^{-u} \Eqa(A13.23)$$
%
Note also that $\d_{11}=0$ up to exponentially small terms (in the
general case).  This proves c) for what concerns the homoclinic
splitting.  The check concerning the phase shifts is a similar
calculation (and it works for the above considered example,
\equ(A13.17)), but we omit the details.

Since the first order yields exponentially small contributions to the
$\a_1$-derivatives of both components of $\V Q_\su$ as well as to
$\dpr_{\a_2}Q_1$, we see that \equ(A13.22) gives that the
intersection matrix determinant is, to leading order, equal to
$-\d_{12}\d_{21}$ and this proves d).

\vskip.3truecm
{\bf 2)} We study, now,  the third order for the simple model:
%
$$f(\a,\f)= f\cos(\a+\f)\Eqa(A13.24)$$
%
with the purpose of performing an instructive calculation showing quite
clearly one among several cancellation mechanisms behind the
smallness of the homoclinic angles when $l=2$.

We shall study the contributions to $\D^3(\aa)$ of order $J^{-2}$ as
$J\to0$. Note that $\D^3(\aa)$ is a polynomial of degree $2$ in
$J^{-1}$.

We see from \equ(A13.8) that in this case we must consider:
%
$$\sum_{\l_1\l_2\l_3}\Big[{2^{-3}\over
i\l_3}\II\Big(E^{\l_1}\IId\big(E^{\l_2}\IId E^{\l_3}\big)\Big)+
{2^{-3}\over2i\l_1\l_2\l_3}\II\Big(E^{\l_1}\big(\IId
E^{\l_2}\big)\big(\IId E^{\l_3}\big)\Big)\Big]\Eqa(A13.25)$$
%
Each $E^\l$ is split as $e^{i\l\o t}$ plus $e^{i\l\o t}\e^\l$ and each
addend in \equ(A13.25) generates eight terms. To avoid considering
improper integrals we shall consider only the terms
obtained by operating the first three second choices.

The same mechanism, once understood applies equally well
to the other seven terms (and actually it is more convenient not to
separate them and to operate with the improper integrals).
We hope to examine the general theory elsewhere.

Setting
$\oo\cdot\nn_j\=\o_j$, and omitting writing the summation symbol over
$\l_1,\l_2,\l_3$, we get a contribution denoted $C^\s_3$ given by:
%
$$\eqalignno{
&{2^{-3}\over i\l_3}
\ig_{\s\io}^0 d t
e^{i\l_1\o_1 t}\e^{\l_1}\ig_{\s\io}^td\t(t-\t) e^{i\l_2\o_2\t}
\e^{\l_2}\ig_{\s\io}^\t d\th\,(\t-\th) e^{i\l_3\o_3\th} \e^{\l_3}+
                                  &\eqa(A13.26)\cr
&+{2^{-3}\over2i\l_1\l_2\l_3}
\ig_{\s\io}^0 d t
e^{i\l_1\o_1 t}\e^{\l_1}\Big(\ig_{\s\io}^td\t(t-\t)\e^{\l_2}e^{i\l_2\o_2\t}
\Big)\big(\ig_{\s\io}^t d\th\, (t-\th) e^{i\l_3\o_3\th} \e^{\l_3}\Big)-
\cr
&-{2^{-3}\over i\l_3}
\ig_{\s\io}^0 d t
e^{i\l_1\o_1 t}\e^{\l_1}\ig_{\s\io}^td\t(t-\t) e^{i\l_2\o_2\t}
\e^{\l_2}\ig_{\s\io}^0 d\th\,(-\th) e^{i\l_3\o_3\th} \e^{\l_3}+\cr
&-{2^{-3}\over2i\l_1\l_2\l_3}
\ig_{\s\io}^0 d t
e^{i\l_1\o_1 t}\e^{\l_1}\Big(\ig_{\s\io}^td\t(t-\t)\e^{\l_2}e^{i\l_2\o_2\t}
\Big)\big(\ig_{\s\io}^0d\th\, (-\th) e^{i\l_3\o_3\th} \e^{\l_3}\Big)-\cr
&-{2^{-3}\over i\l_3}
\Big(\ig_{\s\io}^0 d t
e^{i\l_1\o_1 t}\e^{\l_1}\Big)\Big(\ig_{\s\io}^0d\t\,(-\t) e^{i\l_2\o_2\t}
\e^{\l_2}\ig_{\s\io}^\t d\th\,(\t-\th) e^{i\l_3\o_3\th} \e^{\l_3}\Big)+\cr
&-{2^{-3}\over2i\l_1\l_2\l_3}
\Big(\ig_{\s\io}^0 d t
e^{i\l_1\o_1 t}\e^{\l_1}\Big(\ig_{\s\io}^0d\t(-\t)\e^{\l_2}e^{i\l_2\o_2\t}
\Big)\,\ig_{\s\io}^t d\th\, (t-\th) e^{i\l_3\o_3\th} \e^{\l_3}\Big)-\cr
&+{2^{-3}\over i\l_3}
\big(\ig_{\s\io}^0 d t
e^{i\l_1\o_1 t}\e^{\l_1}\big)\big(\ig_{\s\io}^0d\t(-\t) e^{i\l_2\o_2\t}
\e^{\l_2}\big)\big(\ig_{\s\io}^0 d\th\,(-\th) e^{i\l_3\o_3\th}
\e^{\l_3}\big)+\cr
&+{2^{-3}\over2i\l_1\l_2\l_3}
\Big(\ig_{\s\io}^0 d t
e^{i\l_1\o_1 t}\e^{\l_1}\Big)
\Big(\ig_{\s\io}^0d\t(-\t)\e^{\l_2}e^{i\l_2\o_2\t}
\Big)\Big(\ig_{\s\io}^0 d\th\, (-\th) e^{i\l_3\o_3\th}
\e^{\l_3}\Big)\cr}$$
%
and, computing $\sum \s C_3^\s\= C_3$ up to terms giving, obviously,
exponentially small contributions as $\h\to0$, we see that
$C_3$ is given by:
%
$$\eqalignno{
&-{2^{-3}\over i\l_3}
\ig_{-\io}^0 d t
e^{i\l_1\o_1 t}\e^{\l_1}\ig_{-\io}^{+\io}d\t(t-\t) e^{i\l_2\o_2\t}
\e^{\l_2}\ig_{+\io}^\t d\th\,(\t-\th) e^{i\l_3\o_3\th} \e^{\l_3}+\cr
&-{2^{-3}\over i\l_3}
\ig_{+\io}^{-\io} d t
e^{i\l_1\o_1 t}\e^{\l_1}\ig_{+\io}^td\t(t-\t) e^{i\l_2\o_2\t}
\e^{\l_2}\ig_{+\io}^0 d\th\,(-\th) e^{i\l_3\o_3\th} \e^{\l_3}+
                               &\eqa(A13.27)\cr
&-{2^{-3}\over2i\l_1\l_2\l_3}
\ig_{+\io}^{-\io} d t
e^{i\l_1\o_1 t}\e^{\l_1}\ig_{+\io}^td\t(t-\t)\e^{\l_2}e^{i\l_2\o_2\t}
\ig_{+\io}^0 d\th\, (-\th) e^{i\l_3\o_3\th} \e^{\l_3}-\cr
&+{2^{-3}\over i\l_3}
\ig_{-\io}^0 d t
e^{i\l_1\o_1 t}\e^{\l_1}\ig_{-\io}^{+\io} d\t(-\t) e^{i\l_2\o_2\t}
\e^{\l_2}\ig_{+\io}^\t d\th\,(\t-\th) e^{i\l_3\o_3\th} \e^{\l_3}+\cr
&-{2^{-3}\over2i\l_1\l_2\l_3}
\ig_{+\io}^{-\io} d t
e^{i\l_1\o_1 t}\e^{\l_1}\ig_{+\io}^td\t(t-\th)\e^{\l_3}e^{i\l_3\o_3\th}
\ig_{+\io}^0 d\t\, (-\t) e^{i\l_2\o_2\t} \e^{\l_2}\cr}
$$
%
and we easily see that only the cases $\l_1+\l_2+\l_3=\pm1$ can give
non exponentially small contributions to $C_3$.  But in such cases the
sum of the second, third and fifth terms vanishes identically
(exhibiting the mentioned cancellation mechanism); while the
sum over the $\l$'s of the first and fourth terms is also exponentially
small.
%
\vglue2.truecm

\penalty-200

{\bf Appendix A14: Development of the perturbatrix}

\penalty10000

\vskip0.5truecm\numsec=14\numfor=1

\penalty10000

\def\sg{{s_\g}}\def\sch{{s_\ch}}\def\slm{{s_\l}}
\def\sgx{{s_{2\g}}}\def\schx{{s_{2\ch}}}\def\slmx{{s_{2\l}}}
\def\cg{{c_\g}}\def\cch{{c_\ch}}\def\clm{{c_\l}}
\def\cgx{{c_{2\g}}}\def\cchx{{c_{2\ch}}}\def\clmx{{c_{2\l}}}
\def\4{{1\over4}}\def\2{{1\over2}}\def\8{{1\over8}}
%
We study here the perturbation $V$ in the D'Alembert model of \S 12.
We set, for a general angle $x$, $s_x\=\sin x, \,c_x\=\cos x$ and:
%
$$\eqalign{
a&\=\k\n,\,b\=\m\s,\,d\=-\n,\qquad \g_0=\g-\l\cr
\bar s&=\sin(\l_T-\g),\quad\kern2.3truecm \bar c=\cos(\l_T-\g)\cr
c_{\ch_0}&\=\cos\ch_0,\quad s_{\ch_0}\=\sin\ch_0,\quad
s_{\g_0}=\sin\g_0,\quad c_{\g_0}=\cos\g_0\cr
\cos\a&\=c_\a=a\bar s c_{\ch_0}+b\bar s +d \bar c
s_{\ch_0}\cr}\Eqa(A14.1)$$
%
and, dropping everywhere below the subscript $._0$ from the angles,
we have, see \equ(12.3):
%
$$\eqalign{
\bar s=&-\sg (1-2 e^2\slm^2)+\cg\,(-2e \slm+{5\over4} e^2\slmx)\cr
\bar c=&\cg (1-2 e^2\slm^2)+\sg\,(-2e \slm+{5\over4}
e^2\slmx)\cr}\Eqa(A14.2)$$
%
up to $O(e^3)$. It follows immediately that if:
%
$$A\= a\sg\cch+b\sg-d\cg\sch,\qquad B\=a\cg\cch+b\cg+d\sg\sch
\Eqa(A14.3)$$
%
it is (again up to $O(e^3)$):
%
$$ c_\a^2=A^2+4e \slm
AB+4e^2\big((B^2-A^2)\slm^2-{5\over8}AB\slmx\big)\Eqa(A14.4)$$
%
More explicitly we find:
%
$$\eqalignno{
A^2=&\4(a^2+2b^2+d^2)+{1\over4}(a^2-d^2)\cchx+
\4(-a^2-2b^2+d^2)\cgx+\cr
&-\4(a^2+d^2)\cgx\cchx+ab\cch-ab\cgx\cch+\cr
&+\4ad(c_{2\g+2\ch}-c_{2\g-2\ch})+\2bd(c_{2\g+\ch}-
c_{2\g+\ch})\cr
B^2=&\4(a^2+2b^2+d^2)+{1\over4}(a^2-d^2)\cchx-
\4(-a^2-2b^2+d^2)\cgx+\cr
&+\4(a^2+d^2)\cgx\cchx+ab\cch+ab\cgx\cch+&\eqa(A14.5)\cr
&-\4ad(c_{2\g+2\ch}-c_{2\g-2\ch})-\2bd(c_{2\g+\ch}-c_{2\g+\ch})\cr
B^2-A^2=&\2(a^2+2b^2-d^2)\cchx+\4(a^2+d^2)(c_{2\g+2\ch}+c_{2\g-2\ch})+\cr
&+ab(c_{2\g+\ch}+c_{2\g-\ch})-\2 ad(c_{2\g+2\ch}
-c_{2\g-2\ch})-bd(c_{2\g+\ch}-c_{2\g-\ch})\cr
4AB\slm=&-\2(a^2+2b^2-d^2)(c_{2\g+\l}-c_{2\g-\l})
-\2(a^2+d^2)\cchx(c_{2\g+\l}-c_{2\g-\l})+\cr
&-2ab\cch(c_{2\g+\l}-c_{2\g-\l})
+ad\cgx(c_{\l+2\ch}-c_{\l-2\ch})+2bd\cgx(c_{\l+\ch}-c_{\l-\ch})\cr}$$
%
Defining $c_j\=c_j(K_0,M-0), \ d_j(K_0,M_0)$ by:
%
$$\eqalign{
&c_0\=\4(a^2+2b^2+d^2)= {1\over 4} [2 \s^2 \m^2 + (1+\k^2)\n^2]\cr
& d_0\=\4(-a^2-2b^2+d^2)=-{\s^2\over 4} (2 \m^2 - \n^2)\cr
&c_{\pm1}\=\2ab={\k\s\m\over2}\n\ ,\quad
c_{\pm2}=\8(a^2-d^2)= -{\s^2 \over 8} \n^2\cr
& d_{\pm1}\=-\2ab\pm\2bd= \mp {(1 \pm \k) \m \s\over2} \n\cr
& d_{\pm2}=-\8(a^2+d^2)\pm\4ad=-{(1\pm\k)^2 \over 8} \n^2
\cr}\Eqa(A14.6)$$
%
we see from \equ(A14.4) that:
%
$$\eqalign{
c^2_\a\=&\bar V_0+e\bar V_1+e^2\bar V_2\cr
\bar V_0\=&A^2=\sum_{j=-2}^2 c_j\cos j\ch+d_j\cos(j\ch+2\g)\cr
\bar V_1\=&4\slm AB=
\sum_{j=-2}^2 2d_j\cos(2\g+\l+j\ch)-2d_j\cos(2\g-\l+j\ch)\cr
\bar V_2\=&-{5\over8}4\slmx AB+4\slm^2(B^2-A^2)=\cr
=&\sum_{j=-2}^2d_j\Big[-{5\over4}
\cos(2\g+2\l+j\ch)+{5\over4}\cos(2\g-2\l+j\ch)+\cr
&-4\cos(2\g+j\ch)+2\cos(2\g+2\l+j\ch)+
2\cos(2\g-2\l+j\ch)\Big]=\cr
=&\sum_{j=-2}^2d_j\Big[{3\over4}\cos(2\g+2\l+j\ch)
+{13\over4}\cos(2\g-2\l+j\ch)-4\cos(2\g+j\ch)\Big]\cr}\Eqa(A14.7)$$
%
so that (up to order $O(e^2)$):
%
$$\eqalign{
&(\bar V_0+e\bar V_1+e^2\bar
V_2)(1-3ec_\l+{3\over2}e^2+{9\over2}e^2c_{2\l})=\cr
&\bar V_0+e(\bar V_1-3c_\l \bar V_0)+ e^2(\bar V_2-3c_\l\bar
V_1+{3\over2}\bar V_0+{9\over2}c_{2\l}\bar V_0)\cr}\Eqa(A14.8)$$
%
and, reinserting the lower index $0$:
%
$$\eqalignno{
V_0=&\sum_{j=-2}^2 c_j\cos j\ch_0 +d_j \cos(2\g_0+j\ch_0)\cr
V_1=&\sum_{j=-2}^2\Big(-3c_j\cos(\l_0+j\ch_0)
+\cr&+\2d_j\big(\cos(2\g_0+\l_0+j\ch_0)-7\cos(2\g_0-\l_0+j\ch_0)\big)\Big)\cr
V_2=&\sum_{j=-2}^2\Big[
c_j\Big({3\over2}\cos j\ch_0+{9\over2}\cos(2\l_0+j\ch_0)\Big)+
&\eqa(A14.9)\cr&+d_j\Big({17\over2}\cos(2\g_0-2\l_0+j\ch_0)
-{5\over2}\cos(2\g_0+j\ch_0)\Big)\Big]\cr}$$
%
and the coefficients $\bar B^h_{rpj}$ (cfr. \equ(12.14)) vanish unless
they belong to the following list where $|j|\le 2$:
$$\eqalign{
& \bar B^0_{00j}\= c_j\ ,\quad \bar B^0_{20j}\= d_j\cr
& \bar B^1_{01j}\= -3 c_j\ ,\quad \bar B^1_{211} \= {d_j\over 2}\ ,
\quad \bar B^1_{2-1j}\=-{7\over 2} d_j\cr
&\bar B^2_{00j}\= {3\over 2} c_j\ ,\quad \bar B^2_{02j}\={9\over 2} c_j\ ,
\bar B^2_{20j}\= -{5\over 2} d_j\ , \bar B^2_{2-2j}\={17\over 2} d_j\cr}
\Eqa(A14.10)$$
Next, after the linear change of variables \equ(12.22), \equ(12.23) one
gets \equ(12.24) with the $B^h_{rpj}$ vanishing unless they belong
to the following list where $|j|\le 2$ and where
$c^0_j,d^0_j$ are the functions in \equ(A14.6) evaluated at
$K_0=2I_0-A_0+\lis K$, $M_0=I_0+2\o_T J_3$:
$$\eqalign{
& B^0_{2j,2j,j}\=c^0_j\quad (j\neq 0)\ ,\quad
B^0_{2(j-1),2(j-1),j}\= -d^0_j\quad (j\neq 1)\cr
& B^1_{2j,2j+1,j}\=-3 c^0_j\ ,\quad B^1_{2(j-1),2j-1,j}\=-{d^0_j\over 2}\ ,
\quad B^1_{2(j-1),2j-3,j}\={7\over 2} d^0_j\cr
& B^2_{2j,2j,j}\={3\over 2} c^0_j\ ,\quad B^2_{2j,2(j+1),j}\=
{9\over 2} c^0_j\cr
& B^2_{2(j-1),2(j-2),j}\=-{17\over 2} d^0_j\ ,\quad B^2_{2(j-1),2(j-1),j}\=
{5\over 2} d^0_j\cr}
\Eqa(A14.11)$$
\pagina
%
\vglue2.truecm

\penalty-200

{\bf References}

\penalty10000

\vskip0.5truecm\numsec=0\numfor=1

\penalty10000

\item{[A] } Arnold, V.: {\it Instability of dynamical systems with several
degrees of freedom}, Sov. Mathematical Dokl., 5, 581-585, 1966.

\item{[ACKR] } Amick C., Ching E.S.C., Kadanoff L.P., Rom--Kedar V.:
{\it Beyond All Orders: Singular Perturbations in a Mapping}
J. Nonlinear Sci. 2, 9--67, 1992.

\item{[BG] } Benettin, G., Gallavotti, G.: {\it Stability of motions near
resonances in quasi-integrable hamiltonian systems}, J. Statistical Physics,
44, 293-338, 1986.

\item{[CG] } Chierchia, L., Gallavotti, G.: {\it Smooth prime integrals for
quasi-integrable Hamiltonian systems} Il Nuovo Cimento, 67 B, 277-295,
1982.

\item{[CZ] } Chierchia, L., Zehnder, E.: {\it Asymptotic expansions of
quasi-periodic motions}, Annali della Scuola Normale Superiore di Pisa,
Serie IV Vol XVI Fasc.2 (1989).

\item{[D] } Douady, R.: {\it Stabilit\'e ou instabilit\'e des points fixes
elliptiques}, Annales Scientifiques de l' Ecole Normale Superieure, 21,
1-46, 1988.

\item{[DS]} Delshams, A., Seara, M.T.:
{\it An asymptotic expression for the splitting of separatrices of rapidly
forced pendulum}, preprint 1991.

\item{[G] } Gallavotti, G.: {\it The elements of Mechanics}, Springer, 1983.

\item{[Ge]} Gelfreich, V. G.: {\it Separatrices splitting for the rapidly
forced pendulum}, Preprint, 1992.

\item{[GLT] } Gelfreich, V. G., Lazutkin, V.F., Tabanov, M.B.:{\it
Exponentially small splitting in Hamiltonian systems}, Chaos, 1 (2),
1991.

\item{[Gr] } Graff, S. M.: {\it On the conservation for hyperbolic invariant
tori for Hamiltonian systems}, J. Differential Equations 15, 1-69, 1974.

\item{[HMS] } Holmes, P., Marsden, J., Scheurle,J: {\it Exponentially Small
Splittings of Separatrices in KAM Theory and Degenerate Bifurcations},
Preprint, 1989.

\item{[L] } de la Place, S.: {\it M\'ecanique C\'eleste}, tome II, book 5,
ch. I, 1799, english translation by Bodwitch, E., reprinted by Chelsea,
1966.

\item{[La1]} Lazutkin, V. F.: {\it The existence of caustics for a billiard
problem in a convex domain}, Izv. Akad. Nauk. SSSR, 37 (1), 1973

\item{[La2] } Lazutkin, V.F.: {\it Separatrices splitting for standard and
semistandard mappings}, Pre\-pr\-int, 1989.

\item{[LW] } de la Llave, R., Wayne, E.: {\it Whiskered Tori},
preprint 1990

\item{[M] } Moser, J.: {\it Convergent series expansions for quasi-periodic
motions}, Matematische Annalen, 169, 136-176, 1967.

\item{[Me] } Melnikov, V.K.: {\it On the stability of the center for
time periodic perturbations}, Trans. Moscow Math Math. Soc., 12, 1-57,
1963.

\item{[N] } Nekhorossev, N.: {An exponential estimate of the time of
stability of nearly integrable hamiltonian systems}, Russian Mathematical
Surveys, 32, 1-65, 1975.

\item{[Nei] } Neihstad, A.I.: {\it The separation of motions in systems
with rapidly rotating phase}, PMM U.S.S.R.  48, 133--139, 1984.

\item{[P] } Poincar\`e, H.: {\it Les M\'ethodes nouvelles de la m\'ecanique
c\'eleste}, 1892, reprinted by Blanchard, Paris, 1987.

\item{[P\"o] } P\"oschel, J.: {\it Integrability of Hamiltonian systems on
Cantor sets}, Communications Pure Appl. Math, 35, 653-696, 1982.

\item{[Sv]} Svanidze, N.V.: {\it Small perturbations of an integrable
dynamical system with an integral invariant}, Proceed. Steklov
Institute of Math., 2, 1981

\item{[Z] } Zehnder, E.: {\it Generalized implicit function theorems with
applications to some small divisor problems} I,II, Communications Pure
Applied Mathematics, 28, 91-140, 1975.
\end