\magnification 1200

\magnification=\magstep1
%
%The original version of these macros is due to J.P.  Eckmann
%
%\magnification \magstep1
\vsize=22 truecm
\hsize=16 truecm
\hoffset=0.8 truecm
\normalbaselineskip=5.25mm
\baselineskip=5.25mm
\parskip=10pt
\immediate\openout1=key
\font\titlefont=cmbx10 scaled\magstep1
\font\authorfont=cmcsc10 
\font\footfont=cmr7 
\font\sectionfont=cmbx10 scaled\magstep1
\font\subsectionfont=cmbx10
\font\small=cmr7
\font\smaller=cmr5
%%%%%constant subscript positions%%%%%
\fontdimen16\tensy=2.7pt
\fontdimen17\tensy=2.7pt
\fontdimen14\tensy=2.7pt
%%%%%%%%%%%%%%%%%%%%%%
%%% macros  %%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%
\def\dowrite #1{\immediate\write16 {#1} \immediate\write1 {#1} }
%\headline={\ifnum\pageno>1 {\hss\tenrm-\ \folio\ -\hss} \else {\hfill}\fi}
\newcount\EQNcount \EQNcount=1
\newcount\SECTIONcount \SECTIONcount=0
\newcount\APPENDIXcount \APPENDIXcount=0
\newcount\CLAIMcount \CLAIMcount=1
\newcount\SUBSECTIONcount \SUBSECTIONcount=1
\def\SECTIONHEAD{X}
\def\undertext#1{$\underline{\smash{\hbox{#1}}}$}
\def\QED{\hfill\smallskip
         \line{\hfill\vrule height 1.8ex width 2ex depth +.2ex
               \ \ \ \ \ \ }
         \bigskip}
% These ones cannot be used in amstex
%
\def\real{{\bf R}}
\def\rational{{\bf Q}}
\def\natural{{\bf N}}
\def\complex{{\bf C}}
\def\integer{{\bf Z}}
\def\torus{{\bf T}}
%
%  These ones can only be used in amstex
%
%\def\real{{\Bbb R}}
%\def\rational{{\Bbb Q}}
%\def\natural{{\Bbb N}}
%\def\complex{{\Bbb C}}
%\def\integer{{\Bbb Z}}
%\def\torus{{\Bbb T}}
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
\def\Re{{\rm Re\,}}
\def\Im{{\rm Im\,}}
\def\PROOF{\medskip\noindent{\bf Proof.\ }}
\def\REMARK{\medskip\noindent{\bf Remark.\ }}
\def\NOTATION{\medskip\noindent{\bf Notation.\ }}
\def\PRUEBA{\medskip\noindent{\bf Demostraci\'on.\ }}
\def\NOTA{\medskip\noindent{\bf Nota.\ }}
\def\NOTACION{\medskip\noindent{\bf Notaci\'on.\ }}
\def\ifundefined#1{\expandafter\ifx\csname#1\endcsname\relax}
\def\equ(#1){\ifundefined{e#1}$\spadesuit$#1 \dowrite{undefined equation #1}
\else\csname e#1\endcsname\fi}
\def\clm(#1){\ifundefined{c#1}$\clubsuit$#1 \dowrite{undefined claim #1}
\else\csname c#1\endcsname\fi}
\def\EQ(#1){\leqno\JPtag(#1)}
\def\NR(#1){&\JPtag(#1)\cr}  %the same as &\tag(xx)\cr in eqalignno
\def\JPtag(#1){(\SECTIONHEAD.
              \number\EQNcount)
    \expandafter\xdef\csname
e#1\endcsname{(\SECTIONHEAD.\number\EQNcount)}
    \dowrite{ EQ \equ(#1):#1  }
    \global\advance\EQNcount by 1
    }
\def\CLAIM #1(#2) #3\par{
\vskip.1in\medbreak\noindent
{\bf #1~\SECTIONHEAD.\number\CLAIMcount.} {\sl #3}\par
\expandafter\xdef\csname c#2\endcsname{#1\
\SECTIONHEAD.\number\CLAIMcount}
%\immediate \write16{ CLAIM #1 (\number\SECTIONcount.\number\CLAIMcount) :#2}
%\immediate \write1{ CLAIM #1 (\number\SECTIONcount.\number\CLAIMcount) :#2}
\dowrite{ CLAIM #1 (\SECTIONHEAD.\number\CLAIMcount) :#2}
\global\advance\CLAIMcount by 1
\ifdim\lastskip<\medskipamount
\removelastskip\penalty55\medskip\fi}

\def\CLAIMNONR #1(#2) #3\par{
\vskip.1in\medbreak\noindent
{\bf #1~#2} {\sl #3}\par
\global\advance\CLAIMcount by 1
\ifdim\lastskip<\medskipamount
\removelastskip\penalty55\medskip\fi}
\def\SECTION#1\par{\vskip0pt plus.3\vsize\penalty-75
    \vskip0pt plus -.3\vsize\bigskip\bigskip
    \global\advance\SECTIONcount by 1
    \def\SECTIONHEAD{\number\SECTIONcount}
    \immediate\dowrite{ SECTION \SECTIONHEAD:#1}\leftline
     {{\sectionfont \SECTIONHEAD.}\ {\sectionfont #1} }
    \EQNcount=1
    \CLAIMcount=1
    \SUBSECTIONcount=1
    \nobreak\smallskip\noindent}
\def\APPENDIX#1\par{\vskip0pt plus.3\vsize\penalty-75
    \vskip0pt plus -.3\vsize\bigskip\bigskip
    \def\SECTIONHEAD{\ifcase \number\APPENDIXcount X\or A\or B\or C\or D\or E\or F \fi}
    \global\advance\APPENDIXcount by 1
    \vfill \eject
    \immediate\dowrite{ APPENDIX \SECTIONHEAD:#1}\leftline
     {\titlefont APPENDIX \SECTIONHEAD: }
     {\sectionfont  #1}
    \EQNcount=1
    \CLAIMcount=1
    \SUBSECTIONcount=1
    \nobreak\smallskip\noindent}
\def\SECTIONNONR#1\par{\vskip0pt plus.3\vsize\penalty-75
    \vskip0pt plus -.3\vsize\bigskip\bigskip
    \global\advance\SECTIONcount by 1
    \immediate\dowrite{SECTION:#1}\leftline
     {\sectionfont  #1}
     \EQNcount=1
     \CLAIMcount=1
     \SUBSECTIONcount=1
     \nobreak\smallskip\noindent}
\def\SUBSECTION#1\par{\vskip0pt plus.2\vsize\penalty-75
    \vskip0pt plus -.2\vsize\bigskip\bigskip
    \def\SUBSECTIONHEAD{\number\SUBSECTIONcount}
    \immediate\dowrite{    SUBSECTION \SECTIONHEAD.\SUBSECTIONHEAD :#1}\leftline
    {\subsectionfont
    \SECTIONHEAD.\number\SUBSECTIONcount.\ #1}
    \global\advance\SUBSECTIONcount by 1
    \nobreak\smallskip\noindent}
\def\SUBSECTIONNONR#1\par{\vskip0pt plus.2\vsize\penalty-75
    \vskip0pt plus -.2\vsize\bigskip\bigskip
    \immediate\dowrite{SUBSECTION:#1}\leftline{\subsectionfont
     #1}
    \nobreak\smallskip\noindent}
%%%%%%%%%%%%%TITLE PAGE%%%%%%%%%%%%%%%%%%%%
\let\endarg=\par
\def\finish{\def\endarg{\par\endgroup}}
\def\start{\endarg\begingroup}
\def\getNORMAL#1{{#1}}
\def\TITLE{\beginTITLE\getTITLE}
 \def\beginTITLE{\start
   \titlefont\baselineskip=1.728
   \normalbaselineskip\rightskip=0pt plus1fil
   \noindent
   \def\endarg{\par\vskip.35in\endgroup}}
 \def\getTITLE{\getNORMAL}
\def\AUTHOR{\beginAUTHOR\getAUTHOR}
 \def\beginAUTHOR{\start
   \vskip .25in\rm\noindent\finish}
 \def\getAUTHOR{\getNORMAL}
\def\FROM{\beginFROM\getFROM}
 \def\beginFROM{\start\baselineskip=3.0mm\normalbaselineskip=3.0mm
  \obeylines\sl\finish}
 \def\getFROM{\getNORMAL}
\def\ENDTITLE{\endarg}
\def\ABSTRACT#1\par{
\vskip 2em {\noindent\sectionfont Abstract.} #1 \par}
\def\ENDABSTRACT{\vfill\break}
\def\TODAY{\number\day~\ifcase\month\or January \or February \or March \or
April \or May \or June
\or July \or August \or September \or October \or November \or December \fi
\number\year}
\newcount\timecount
\timecount=\number\time
\divide\timecount by 60
\def\DRAFT{\font\footfont=cmti7
\footline={{\footfont \hfil File:\jobname, \TODAY,  \number\timecount h}}
}
%%%%%%%%%%%%%%%%BIBLIOGRAPHY%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\def\period{\unskip.\spacefactor3000 { }}
%
% ...invisible stuff
%
\newbox\noboxJPE
\newbox\byboxJPE
\newbox\paperboxJPE
\newbox\yrboxJPE
\newbox\jourboxJPE
\newbox\pagesboxJPE
\newbox\volboxJPE
\newbox\preprintboxJPE
\newbox\toappearboxJPE
\newbox\bookboxJPE
\newbox\bybookboxJPE
\newbox\publisherboxJPE
\def\refclearJPE{
   \setbox\noboxJPE=\null             \gdef\isnoJPE{F}
   \setbox\byboxJPE=\null             \gdef\isbyJPE{F}
   \setbox\paperboxJPE=\null          \gdef\ispaperJPE{F}
   \setbox\yrboxJPE=\null             \gdef\isyrJPE{F}
   \setbox\jourboxJPE=\null           \gdef\isjourJPE{F}
   \setbox\pagesboxJPE=\null          \gdef\ispagesJPE{F}
   \setbox\volboxJPE=\null            \gdef\isvolJPE{F}
   \setbox\preprintboxJPE=\null       \gdef\ispreprintJPE{F}
   \setbox\toappearboxJPE=\null       \gdef\istoappearJPE{F}
   \setbox\bookboxJPE=\null           \gdef\isbookJPE{F}  \gdef\isinbookJPE{F}

   \setbox\bybookboxJPE=\null         \gdef\isbybookJPE{F}
   \setbox\publisherboxJPE=\null      \gdef\ispublisherJPE{F}

}
\def\ref{\refclearJPE\bgroup}
\def\no   {\egroup\gdef\isnoJPE{T}\setbox\noboxJPE=\hbox\bgroup}
\def\by   {\egroup\gdef\isbyJPE{T}\setbox\byboxJPE=\hbox\bgroup}
\def\paper{\egroup\gdef\ispaperJPE{T}\setbox\paperboxJPE=\hbox\bgroup}
\def\yr{\egroup\gdef\isyrJPE{T}\setbox\yrboxJPE=\hbox\bgroup}
\def\jour{\egroup\gdef\isjourJPE{T}\setbox\jourboxJPE=\hbox\bgroup}
\def\pages{\egroup\gdef\ispagesJPE{T}\setbox\pagesboxJPE=\hbox\bgroup}
\def\vol{\egroup\gdef\isvolJPE{T}\setbox\volboxJPE=\hbox\bgroup\bf}
\def\preprint{\egroup\gdef
\ispreprintJPE{T}\setbox\preprintboxJPE=\hbox\bgroup}
\def\toappear{\egroup\gdef
\istoappearJPE{T}\setbox\toappearboxJPE=\hbox\bgroup}
\def\book{\egroup\gdef\isbookJPE{T}\setbox\bookboxJPE=\hbox\bgroup\it}
\def\publisher{\egroup\gdef
\ispublisherJPE{T}\setbox\publisherboxJPE=\hbox\bgroup}
\def\inbook{\egroup\gdef\isinbookJPE{T}\setbox\bookboxJPE=\hbox\bgroup\it}
\def\bybook{\egroup\gdef\isbybookJPE{T}\setbox\bybookboxJPE=\hbox\bgroup}
\def\endref{\egroup \sfcode`.=1000
 \if T\isnoJPE  \item{[\unhbox\noboxJPE\unskip]}
     \else     \item{} \fi
 \if T\isbyJPE    \unhbox\byboxJPE\unskip: \fi
 \if T\ispaperJPE \unhbox\paperboxJPE\unskip\period \fi
 \if T\isbookJPE ``\unhbox\bookboxJPE\unskip''\if T\ispublisherJPE, \else.
\fi\fi
 \if T\isinbookJPE In ``\unhbox\bookboxJPE\unskip''\if T\isbybookJPE,
\else\period \fi\fi
 \if T\isbybookJPE  (\unhbox\bybookboxJPE\unskip)\period \fi
 \if T\ispublisherJPE \unhbox\publisherboxJPE\unskip \if T\isjourJPE, \else\if
T\isyrJPE \  \else\period \fi\fi\fi
 \if T\istoappearJPE (To appear)\period \fi
 \if T\ispreprintJPE Preprint\period \fi
 \if T\isjourJPE    \unhbox\jourboxJPE\unskip\ \fi
 \if T\isvolJPE     \unhbox\volboxJPE\unskip, \fi
 \if T\ispagesJPE   \unhbox\pagesboxJPE\unskip\  \fi
 \if T\isyrJPE      (\unhbox\yrboxJPE\unskip)\period \fi

}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% SOME SMALL TRICKS%%%%%%%%%%
\def \breakline{\vskip 0em}
\def \script{\bf}
\def \norm{\vert \vert}
\def \endnorm{\vert \vert}
	\def\cite#1{{\rm [#1]}}
	\def\bref#1{{\rm [~\enspace~]}} 	% blank ref cite 
	\def\degree{\mathop{\rm degree}\nolimits} 
	\def\mod{\mathop{\rm mod}\nolimits} 
	\def\bT{{\bf T}} 
	\def\frac#1#2{{#1\over#2}} 
	\def\Norm{{\|~\enspace~\|}} 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\magnification=\magstep1

\def\no{\noindent}


\TITLE 
Nekhoroshev stability of non-linear normal modes 
near an elliptic fixed point of a
Hamiltonian system with symmetry 

\ENDTITLE
\AUTHOR
Panayotis Panayotaros$^*$
\FROM

FENOMEC, I.I.M.A.S. - U.N.A.M. 
Apdo. Postal 20-726
Depto. Matem\'aticas y Mec\'anica
01000 M\'exico D.F. 
M\'exico

\ENDTITLE
\vskip 5 em
\bigskip


\ABSTRACT
We consider  
Hamiltonian systems of resonantly coupled harmonic oscillators
with a physically motivated $U(1)$ symmetry, and identify 
a class of small amplitude 
periodic orbits (the non-linear normal modes) of approximating 
systems. We show that under some additional conditions on the
quartic part of the Birkhoff normal form Hamiltonian
that are in many cases weaker than integrability and convexity,
the non-linear normal modes are Nekhoroshev stable.   

\bigskip

$^*${\it e-mail: panos@uxmym1.iimas.unam.mx}

\ENDABSTRACT


\SECTION Introduction 

We present sufficient conditions for the long time stability
of quasi-monochromatic oscillations in an 
analytic Hamiltonian system of resonant
weakly coupled harmonic oscillators with a $U(1)$ symmetry. 
By quasi-monochromatic
oscillations we here denote motions 
in which the amplitude of 
all but one of the oscillators is very  small.
Using the resonance and symmetry properties of the Hamiltonian 
we will see that such motions are in the neighborhood of families 
of periodic orbits (the ``non-linear normal modes'')
of Hamiltonian systems that approximate the 
original one. We will show that, 
under some additional conditions, trajectories of the original 
system that start near
such periodic orbits stay nearby for a very long time. 
%(such motions are also frequently referred to as non-linear normal modes).
The frequencies of the oscillators are assumed be equal
(the symmetry is best described in the next section), 
and the stability conditions involve 
the quartic part of the Birkhoff normal form 
Hamiltonian.  


Although our results concern finite dimensional systems, 
the motivation and the symmetry condition we impose come 
from a problem  of non-linear water waves in spherical 
geometry, where analogous motions were observed 
numerically to be stable (see [P]). 
The application of the present work to the 
water wave problem and other wave equations on the sphere 
is outlined in the last section, 
where we also point out some heuristic 
connections with other problems in non-linear wave propagation. 


The stability of periodic and quasi-periodic orbits near 
an elliptic fixed point of a Hamiltonian system has been studied 
by several authors.  
Of particular relevance here are  
results of [FGB] and [N] on the Nekhoroshev stability of tori, 
and especially of [B] on the 
Nekhoroshev stability of finite dimensional tori
in perturbed non-linear Schr\"odinger equations.
Note that following the work of [L] we can study the 
stability of tori by looking at 
neighboring periodic orbits.   
In the above works the key properties for stability 
are integrability and convexity 
(possibly on an invariant set) 
of the quartic part of the Birkhoff normal form
around the origin. In the problem 
we consider here, the assumed 
resonance and symmetry conditions lead formally to normal forms
that posses near-monochromatic periodic orbits to all orders,
but are too  
weak to guarantee integrability or convexity 
of the quartic normal form around the origin.
Nevertheless, we can still use a strategy 
similar to the one used by [B] to 
show that, under two additional 
conditions on the quartic terms of the normal form,
trajectories starting in a neighborhood of a 
near-monochromatic periodic orbit of a suitably defined 
approximating system
will stay nearby for an exponentially long time. 
The first condition is that certain  
non-integrable quartic terms that we will call  
Benjamin-Feir terms are absent. The second condition is that    
certain integrable quartic terms be absent. This second condition 
is neither stronger nor weaker than convexity 
of the (integrable) quartic terms, while both 
conditions allow for the presence of non-integrable terms.
Some slightly weaker versions of the above assumptions, 
as well as some special cases will also be pointed out. 

The paper is organized as follows: 
In section 2 we describe the  
resonance and symmetry properties of the 
Hamiltonian, 
and use Birkhoff normal form arguments 
to show the existence of 
periodic orbits in approximating systems. 
This section serves mainly as a set-up and motivation 
for the stability result of the next chapter.  
In section 3 we state a stability theorem for 
near-monochromatic 
motions , and sketch the plan of the 
proof. The proof is given in the next three sections. In sections 4 and 5
we perform normal forms around the origin and 
periodic orbits respectively, and in section 6 we conclude the 
proof. In section 7 we discuss applications and some
heuristic connections of the present work with Stokes waves
in non-linear wave equations.


\vfill\eject


\SECTION Resonance and symmetry properties of the Hamiltonian 


We consider 
$ \real^{2n} $ with the standard 
symplectic structure 
$\Omega = \sum_{\gamma} dq_\gamma \wedge dp_\gamma$. 
The coordinates $ q_\gamma$, $p_\gamma$ are 
labeled by the indices
$\gamma \in {\cal J}$,  
where $ {\cal J} $ is either 
the set of integers between $-{{n-1}\over{2}}$ and ${{n-1}\over{2}}$, for 
$ n$ odd; or the set of non-zero integers 
between $-{{n}\over{2}}$ and ${{n}\over{2}}$, for $n$ even.
For any smooth functions $f$, $g$, 
the Poisson bracket induced by $\Omega$ will be denoted by 
$ [f,g]$. 

Also, we consider 
Hamiltonians $ h$ 
that have an elliptic fixed point at the origin. 
For the purposes of this section it will suffice to 
consider Hamiltonians that are given as formal power series 
in the variables $ q_\gamma$, $p_\gamma$.
Thus, defining the variables 
$ a_\gamma$, $ a^*_\gamma$ by 
$ a_\gamma = q_\gamma + i p_\gamma $, $ a^*_\gamma = q_\gamma - i p_\gamma $,
we may assume that the  
Hamiltonians $h$ have the form 
$$h = h_0 + \sum^{\infty}_{j = 1} \epsilon^j h_j, \quad\hbox{with} $$ 
$$ h_0 = \sum_{\gamma \in {\cal J }} \omega_\gamma |a_\gamma|^2, \qquad     
h_j = \sum_{ {\kappa + \overline \kappa = j} \atop
{ } }
\sum_{\gamma_1, \ldots, \gamma_{\kappa + \overline \kappa } \in {\cal J}  } 
I^{(\kappa, \overline \kappa)}_{\gamma_1, 
\ldots, \gamma_{\kappa + \overline \kappa}  } 
a_{\gamma_1} \ldots a_{\gamma_{\kappa}} 
a^*_{\gamma_{\kappa+1}} \ldots 
a^*_{\gamma_{\kappa + \overline \kappa}},  \EQ(2.1)$$
where $ \kappa, \overline \kappa \in \integer^{+} \cup \{0\} $.

We are interested in Hamiltonians $ h$ given by \equ(2.1) 
that also satisfy three additional properties:

{\bf Property (i)} The frequencies $ \omega_\gamma $ in \equ(2.1) 
satisfy the resonance condition 
$$ \omega_\gamma = \omega \neq 0, 
\quad \forall \gamma \in {\cal J}. \EQ(2.2) $$

{\bf Property (ii)}
Defining the linear circle action 
$ \varrho:S^1 \times \real^{2n} \rightarrow \real^{2n}$
by $ \varrho_{\phi} a_\gamma = e^{i \gamma \phi} a_\gamma$ and 
$ \varrho_{\phi} a^*_\gamma = e^{- i \gamma \phi} a^*_\gamma$,
the Hamiltonian $h$ of \equ(2.1) satisfies 
$$ h(\varrho_\phi x) = h(x), 
\quad \forall \phi \in S^1, \quad \forall x \in \real^{2n}. \EQ(2.3) $$
Note that requirement \equ(2.3) is equivalent to 
$$  I^{(\kappa, \overline \kappa)}_{\gamma_1, 
\ldots, \gamma_{ \kappa + \overline \kappa } } 
\neq 0 \Rightarrow \sum_{ \mu= \gamma_1}^{\gamma_{\kappa}} \mu -
\sum_{ \mu=\gamma_{\kappa +1}}^{\gamma_{\kappa +\overline \kappa}} \mu
= 0, \quad 
\forall \kappa, \overline \kappa \in \integer^{+} \cup \{0\}, \quad 
\forall \gamma_1, \ldots, \gamma_{\kappa +\overline \kappa} \in {\cal J}. 
\EQ(2.3) $$

Before stating the third property we note
some of the implications of properties (i) and (ii). 
To see these we bring the Hamiltonian $h$
to a Birkhoff normal form
by performing formal canonical transformations that eliminate 
non-resonant terms order by order.
We use the normalization procedure  
described by Dragt and Finn [DF], in which 
the transformed Hamiltonian 
$h^{(r)}$ obtained after $r$ steps is given by 
$h^{(r)} = \exp(Ad_{\epsilon^r \chi_r}) \ldots \exp(Ad_{\epsilon \chi_1})h=
h_0 + N_r + R_r$, where $N_r$ is in normal form 
and $R_r$ is the remainder, i.e. the $O(\epsilon^{r+1})$ 
yet unnormalized terms of the transformed Hamiltonian. 
The functions $ \chi_i$ are computed so as to 
eliminate the resonant terms of order $ \epsilon^{i}$ at the 
$i$-th stage of the computation (see e.g. [FB] for 
an example of a detailed calculation using this method). 
Other ways of performing the normal form, e.g. the method of 
Deprit [D], lead to the same order $r$ normal form part $N_r$.
The calculations lead to the 
following formal result: 

{\bf Proposition 2.1} 
Consider a Birkhoff normal form 
Hamiltonian $h^{(r)} = h_0 +N_r +R_r$ 
with $r \in \integer^{+}$ and $[N_r, h_0]=0$,
obtained as described above from  
a Hamiltonian $h$ given by \equ(2.1) and satisfying (i) and (ii). 
Also let $ {\overline h}^{(r)} = h_0 + N_r $ and 
let $\Gamma $ be any index in ${\cal J} $. 
Then the planes $ M^{(r)}_\Gamma $ of points with
$ a_\gamma = 0 $, $\forall \gamma \neq \Gamma$ are invariant 
under the Hamiltonian flow of $ {\overline h}^{(r)}$.

 
{\it Proof:} The proof will be sketchy, as details can be filled-in 
easily using material from the next sections. 
We start with two observations.
First, $ N_r $ can not have terms of odd 
order in $ \epsilon$, since by property (i) the resonance condition
for odd terms is $ \omega = 0$. 
Second, all monomials of $h^{(r)}$, and hence $h^{(r)}$ itself,
satisfy the symmetry condition (ii).
We can see this fact 
recursively. The orbits of the circle action $\varrho$ coincide 
with the the orbits of the Hamiltonian flow of the 
function $L = \sum_{\gamma \in {\cal J} } \gamma |a_\gamma|^2$. 
The function $L$ Poisson-commutes with all the monomials of the 
$h_j $ of \equ(2.1). Then $ [\chi_1,L] = 0 $ so that by the
linearity of $ Ad_{\epsilon \chi_1}$ and the Jacobi identity 
the monomials of $h^{(1)} = \exp( Ad_{\epsilon \chi_1})h$ will 
also Poisson-commute with $ L$. The argument works for 
all the stages of the normalization, up to $h^{(r)}$.

To show the proposition, 
it is enough to show that
the rate of change $ \dot a_\gamma$, $\gamma \neq \Gamma $,
does not contain monomials of the form 
$ (a_\Gamma)^n (a^*_\Gamma)^m$ for any $n$, $m >0$. 
Since $ N_r$ is in normal form, property  
(i) implies that such monomials 
must be of the form $ (a_\Gamma)^{m} (a^*_\Gamma)^{m-1}$.
Moreover, such terms can only arise from the monomials 
$ (a_\Gamma)^{m} (a^*_\Gamma)^{m-1} a^*_\gamma$ of the Hamiltonian. 
However, the symmetry property (ii), and the fact that it also holds 
for the monomials of $ h^{(r)}$ imply that the 
coefficients of the monomials 
$ (a_\Gamma)^{m} (a^*_\Gamma)^{m-1} a^*_\gamma$ of $ h^{(r)}$
vanish, unless $m \Gamma - (m-1)\Gamma - \gamma = 0 $, 
i.e. unless $\gamma = \Gamma$. \QED


The above formal argument works for any order $r$, however 
the remainder $R_r $ of the Birkhoff normal form generally diverges 
as $r$ increases and we can not 
conclude that the planes $ M^{(r)}_\Gamma$ (i.e. as $r \rightarrow \infty$)
are invariant under the Hamiltonian flow of $h$. 
On the other hand we can make the argument rigorous for 
analytic Hamiltonians
by defining the consecutive canonical transformations in 
suitable neighborhoods of the origin, and we can also see that 
for given $\epsilon$ sufficiently small we can 
chose $r$ so that $R_r$ is minimized. 
Thus the calculation of ${\overline h}^{(r)}$
should be thought of as an asymptotic procedure 
for approximating periodic orbits of the 
Hamiltonian flow of $h$.
Also, since the transformations are near-identity, the planes 
$M^{(r)}_\Gamma$ are near the planes $M^{(0)}_{\Gamma}$, i.e.
the sets with  
$a_\gamma = 0$, $\forall \gamma \neq \Gamma$ for the original variables.

{\bf Remark 2.1} The question of whether the periodic orbits of the
${\overline h}^{(r)}$ approximate periodic orbits of $h$
will not play any role in our analysis. We point out however 
work on a similar problem by [MRS], who use a variational method to 
show that periodic orbits of the normal form do approximate 
periodic orbits of the system. 

Thus it is reasonable to study the behavior of 
trajectories starting near 
the $M^{(r)}_\Gamma $, and in 
the next section we give sufficient conditions
trajectories of the Hamiltonian flow of $h$ to stay near 
periodic orbits of the ${\overline h}^{(r)}$ on the $M^{(r)}_\Gamma $
for a long time. 
Note that the existence of two constants of motion, 
$h$ and $L$, does not give any stability information 
in the particular problem
and more detailed knowledge of $ h$ is required.


To state the stability conditions we examine more closely the 
second and third order normal form Hamiltonians $  h^{(2)} $
and $  h^{(3)} $. We apply the Dragt-Finn algorithm three times, 
first to eliminate non-resonant cubic terms, then 
to eliminate the non-resonant, and finally to eliminate 
non-resonant quintic terms.
We arrive at the Hamiltonian $ H = h^{(3)} $, which
by (i) and (ii) has the general form 
$$  H = h_0 + \epsilon^2 {\hat h}_2  + f_2  , \quad\hbox{with} \EQ(2.5) $$
$$  {\hat h}_2 = \sum_{\gamma_1, \gamma_2, \gamma_3, \gamma_4 \in {\cal J} }
I_{\gamma_1, \gamma_2, \gamma_3, \gamma_4 } 
a_{\gamma_1} a_{\gamma_2}a^*_{\gamma_3} a^*_{\gamma_4}, \EQ(2.6) $$
and $f_2 $ the $O(\epsilon^4)$ remainder.
The coefficients 
$ I_{\gamma_1, \gamma_2, \gamma_3, \gamma_4 } $ 
can be computed from the coefficients of the cubic and quartic parts
of $ h$. Note that the Hamiltonian $H$ also satisfies 
properties (i) and (ii). 

In the next section we will discuss 
the stability of periodic orbits on the planes $ M^{(3)}_\Gamma$  that 
are invariant under the Hamiltonian flow of $ H$ above.
Note that our results can be easily modified 
to deal with periodic orbits of $  h^{(2)} $ on the planes $ M^{(2)}_\Gamma$.
The only difference is that  
$  h^{(3)} $ has a smaller remainder, and this leads to 
slightly weaker stability conditions that can also be checked without
much additional effort.  
 
 
Fixing some $ \Gamma \in {\cal J} $ we now assume that 
the Hamiltonian $ H $ of \equ(2.5) (and hence $h$) satisfies: 

{\bf Property (iii)} $ {\hat h}_2 $ contains a term 
$ C_\Gamma |a_\Gamma|^4 $ for some $C_\Gamma \in \real$, 
with $C_\Gamma \neq 0$. 


Property (iii) is specific to a given $ \Gamma$. Also note 
that by (i) and (ii) the monomial $  |a_\Gamma|^4 $
is the only monomial of the form $(a_{\Gamma})^n (a^*_{\Gamma})^m $
in $ {\hat h}_2 $. 


We now decompose $ {\hat h_2}$ as
$$ {\hat h_2} = {\hat h_{2,\Gamma}} + {\hat h_{2,(\gamma,\Gamma)}} + 
{\hat h_{2,\gamma}},  \EQ(3.01)$$
where
$ {\hat h_{2,\Gamma}} $ contains all monomials with 
products of $ a_\Gamma$ and $ a^*_\Gamma$,
$ {\hat h_{2,\gamma}} $  contains all monomials with 
products of $ a_\gamma$ and $ a^*_\gamma$ with $\gamma \neq \Gamma$, and
$ {\hat h_{2,(\gamma,\Gamma)}} =  {\hat h_2} -{\hat h_{2,\Gamma}}
-{\hat h_{2,\gamma}} $.


In ${\hat h_{2,(\gamma,\Gamma)}}$ we identify two 
further types of terms. We let 
$\hat h_{BF}$ be the sum of monomials proportional to 
$ a_{\gamma_1} a_{\gamma_2}a^*_{\Gamma} a^*_{\Gamma} $ 
with $ \gamma_1 $, $\gamma_2 \neq \Gamma$, 
and complex conjugates.
Also, we let $\hat h_{I}$ be the 
sum of monomials proportional to 
$ |a_\gamma|^2|a_\Gamma|^2$ with 
$\gamma \neq \Gamma$. 
Finally, we let $ h_N = 
{\hat h_{2,(\gamma,\Gamma)}} - \hat h_{I} -\hat h_{BF}$,
and note that $h_N $ is in general non-zero, containing
terms $a_\Gamma a_{\gamma_1} a^*_{\gamma_2}a^*_{\gamma_3}$ with 
$\Gamma + \gamma_1 - \gamma_2 -\gamma_3 = 0$, and complex conjugates.
The terms $\hat h_{BF}$ are the``Benjamin-Feir'' terms (see the 
discussion in section 7 for this terminology).

\SECTION A stability theorem 

To state and prove the main theorem we assume that 
$H = h_0 + \epsilon^2 {\hat h_2} + f_2$ as in \equ(2.5), \equ(2.6)
has already been computed from a Hamiltonian $h$ 
satisfying properties (i)-(iii) of the previous 
section. Moreover we assume that $h$ is real analytic
in a neighborhood of the origin, so that 
$H$ is also real analytic in a smaller neighborhood.

We work in the following domains:
consider the coordinates
$q_\gamma $, $p_\gamma $, $\gamma \in {\cal J}$ as complex  
and define the domain $D_\rho \subset \complex^{2n}$ to be the set of
points satisfying $ |q_\gamma| \leq \rho$, $ |p_\gamma| \leq \rho$,
$ \forall \gamma \in {\cal J}$.
Also, for functions $g:D_\rho \rightarrow \complex$
we denote the supremum norm in  $D_\rho $ by 
$ ||g||_{D_\rho}$.
By the assumption of real analyticity,
the Hamiltonian $H$, defined as a power series 
in $q_\gamma $, $p_\gamma $
in \equ(2.1) and 
extended to $\complex^{2n}$
by the complexification of $q_\gamma $, $p_\gamma $,
is analytic in some domain   $D_R \subset \complex^{2n}$.
A second type of domain we will need is defined as follows:
consider $\real^{2n}$ with the coordinates
$q_\gamma $, $p_\gamma $, $\gamma \in {\cal J}$.
Fix $ \nu > 0 $ and some $\Gamma \in {\cal J}$,
and let  $J_\Gamma = |a_\Gamma|^{2} - \nu $
(the dependence of $J_\Gamma$ on $\nu$ will be suppressed 
from the notation).
We complexify $q_\gamma $, $p_\gamma $, 
$\forall \gamma \neq \Gamma$
and the angle $\theta_\Gamma = \arg(a_\Gamma)$,
and we let $D^{\nu, R_1}_{\rho}$ be the set of points 
of $\complex^{2n}$ satisfying
$ |q_\gamma| \leq \rho$, $ |p_\gamma| \leq \rho$,
$ \forall \gamma \neq \Gamma $, and 
$ {{| J_\Gamma|}\over {R_1}} + R_1 | Im(\theta_\Gamma)| \leq \rho$.
For functions $g:D^{\nu, R_1}_{\rho} \rightarrow \complex$
we denote the supremum norm in  $D^{\nu, R_1}_{\rho} $ by 
$ ||g||_{D^{\nu, R_1}_{\rho} }$.
By the definition of $D_{\rho}$, $D^{\nu,R_1}_{\rho}  $
the meaning of $ \real^{2n} \cap D_{\rho}$, 
$ \real^{2n} \cap D^{\nu}_{\rho}$ 
is clear.

The theorem below involves $O(1)$ constants 
$\alpha$, $\beta$, $C_{NBF}$, $\tilde C_I$, $\tilde C_{BF}$, 
$\tilde H_1$ and $\tilde F_1$. Possible values of these constants
are given immediately after the statement.
Note that $C_{NBF}$, $\tilde C_I$, $\tilde C_{BF}$, 
$\tilde H_1$ and $\tilde F_1$ 
are specific to a particular $\Gamma$ since 
they involve the $\hat h_{I}$, $\hat h_{N}$, $\hat h_{BF}$, $\hat h_{BF}$
of the previous section that are specific to a given $\Gamma$.


{\bf Theorem 3.1} Consider the real analytic 
Hamiltonian $H: \real^{2n} \cap D_R  \rightarrow \complex $, $R>0$,
satisfying properties (i)-(iii)
of section 2. Consider a mode $\Gamma$ with $|a_\Gamma|^2 =\nu'$,  
fix $\sigma \in (0,{1\over 2})$, and let $\epsilon>0$ 
satisfy 
$$ {{8 e^2 {\tilde c} \pi} \over {\omega R^2}}
\epsilon^2(2\alpha + 10 \epsilon^2 \beta) < 1,  \EQ(3.2)$$
$$ {{4 e^{{{2}\over{ 5 e^2 }} }}\over{e^2 }}
{{ \beta}\over{\alpha}} R \epsilon^{2-\sigma} \leq  {{\xi}\over{16}},
\quad 
{{8 e^{{{2}\over{ 5 e^2 }} }}\over{e^2 }}
{{ \beta}\over{\alpha}} R^2 \epsilon^{2-2\sigma}  \leq  {{\xi}\over{16}}, 
\EQ(3.21) $$
$$   {{2 e^{{{2}\over{ 5 e^2 }} }}\over{e^2 }}
{{\tilde F_1}\over{\tilde H_1}} \epsilon^{1-2\sigma} +
 {{32  {\tilde c_1} \pi} \over {C_\Gamma}\sqrt{\nu'}}
C_{NBF} \epsilon^{\sigma}  \leq {{\xi}\over{16}}, 
\quad\hbox{with}\quad
\xi = {{4}\over{n}}\left(\sqrt{1+{{n}\over{64}}}-1 \right), \EQ(3.22) $$
and 
$$ \epsilon^{\sigma} {{16 e^2 {\tilde c_1} } \over {C_\Gamma \nu'}} 
(2{\tilde H_1} + 10 \epsilon^{\sigma}{\tilde F_1} ) < 1. \EQ(3.4) $$
Also, let $\rho_2 = \epsilon^{\sigma}$ and assume that 
$$ \nu' + {{3}\over{2}}(\rho_2)^2 \leq  {{R^2}\over{9}}, \quad
\hbox{and} \quad \nu' \geq {{3}\over{2}} (\rho_2)^2. \EQ(3.1)$$
Then if $x(t)$ is a trajectory of the Hamiltonian flow of $H$
with initial condition $x(0) \in \real^{2n} \cap D^{\nu', \rho_2}_{\rho_0}$,
where $0<\rho_0 \leq {{\xi \rho_2}\over {32}}$, we have that 
$$ x(t) \in \real^{2n} \cap D^{\nu',\rho_2}_{{{\rho_2}\over{2}}}, \quad 
\forall t \in  [-T_m,T_m],   \EQ(3.5) $$
where
$$ T_m = {{1 } \over { 2^{9} \tilde c_1 \epsilon^2 }}
(M_2 +{\tilde M_1} + \tilde C_{BF} +  \epsilon^{\sigma} \tilde C_I)^{-1},  
\EQ(3.6) $$
and
$$ M_2 = 
e^2 \epsilon^{2 + 4\sigma}{\tilde F_1}\exp \left( 
- \sqrt{ {{ 3 \nu'C_\Gamma } \over  { 8 e^2 \tilde c_1 } } }
(2{\tilde H_1} + 10 \epsilon^{\sigma}{\tilde F_1} )^{-{1 \over 2}}
\epsilon^{- {{\sigma}\over{2}}}
\right),  \EQ(3.7) $$
$$ \tilde M_1 =  3 e^2 (2n+2) 
\epsilon^{4}{\beta}
\exp \left(- \sqrt{ { {2  R^2 \omega } \over  { e^2 \pi \tilde c }  } }
(2\alpha + 10 \epsilon \beta )^{-{1 \over 2}}
 \epsilon^{-1} \right).  \EQ(3.8) $$


{\bf Constants:}
The constants $\alpha$, $\beta$ are such that 
$$  ||\epsilon^2 {\hat h_2} ||_{D_R} \leq \epsilon^2 \alpha, \quad
||f_2 ||_{D_R} \leq  \epsilon^4 \beta \EQ(3.10) $$
and $C_{NBF}$, $\tilde H_1$, $\tilde F_1$, $\tilde C_I$ 
and $\tilde C_{BF}$ are defined by
$$ C_{NBF} = 2^5 e^2 \sqrt{3\over2}
||
\hat h_N ||_1, \EQ(3.11) $$
$$ \tilde H_1 = 2^5 \sqrt{3\over2}  \sqrt{\nu'} C_{NBF} + 
\epsilon^{2\sigma}C_\Gamma + 
2^4 \epsilon^{2\sigma}||{\hat h_{2,\gamma}}||_1,
\EQ(3.12) $$
$$\tilde F_1 = 2  \epsilon^{2-4\sigma} \beta + 
3\dot 2^5 \sqrt{3\over2} {{\pi \tilde c_1}\over{\nu' C_\Gamma}}  C_{NBF}
(2^6 \sqrt{3\over2} C_{NBF} + \tilde H_1  ), 
\EQ(3.13) $$
$$ \tilde C_I = 6 \nu' \left( {{\tilde F_1} \over {\tilde H_1}} +
\epsilon^{\sigma} {{\pi \tilde c_1}\over{4 C_\Gamma \nu'}}
(1+ \tilde F_1 {\tilde H_1}^{-1}   ) \right) ||\hat h_{I}||_1 \EQ(3.14) $$
$$  \tilde C_{BF} = 18 \nu' e^{2 \epsilon^{\sigma}}
\left(1 + \epsilon^{\sigma} 
{{\pi \tilde c_1}\over{4 C_\Gamma \nu'}} C_{NBF}\right) 
||\hat h_{BF}||_1.
\EQ(3.15) $$
Here, for any quartic polynomial $P = 
\sum_{(j_1, j_2, j_3, j_4) \in {\cal P} }
I_{j_1, j_2, j_3, j_4 } 
a_{j_1} a_{j_2}a^*_{j_3} a^*_{j_4}$
with $ {\cal P} $ a set of index quartets we let
$ ||P||_1 = \sum_{(j_1, j_2, j_3, j_4) \in {\cal P} }
|I_{j_1, j_2, j_3, j_4 }|   $.


{\bf Remark 3.1} In the case where the quartic terms $h_I$ and $h_{BF}$
both vanish we have from \equ(3.14) and \equ(3.15) 
that $\tilde C_I = \tilde C_{BF} =0 $. Then, 
\equ(3.6) implies that the time of stability is exponentially long. 

If either of $h_I$ and $h_{BF}$ does not vanish our statement is
not optimal. 
If $h_{BF}$ does not vanish, we can not in general 
obtain an exponentially long stability time. 
In the special case where the size of $h_{BF}$ is 
comparable to $\epsilon^{\sigma}$ an  
exponentially long stability time can be recovered 
by slightly modifying the present proof. 
If $h_{BF}$ vanishes and $h_I$ does not vanish several 
improvements can be made in special cases. 
If $\hat h_2$ is integrable and convex we can invoke the more
general theorems of [N], [FGB] to prove stability for 
exponentially long time. In the case where the terms of $h_I$ 
can be absorbed in a quartic term that can be expressed as 
a function of $h_0$, stability for exponentially long time  
can be shown with a slight modification of the proof we give here. 
Note that convexity of the integrable part of $\hat h_2$
neither implies the condition $h_I \equiv 0$
nor is implied by it. Also, even if  
$h_I$ and $h_{BF}$ vanish, $\hat h_2$ can still contain 
non-integrable terms.


{\bf Remark 3.2} The theorem is local, in the sense that
the assumptions are not uniform in $\nu'$. On the other hand, here 
we are mainly interested in the case where $\nu'$ is of $O(1)$.


The plan of the proof is to estimate the rates of change of 
$J_\Gamma$ and $h_0$, showing that they are slow, approximately 
$\sim T_m^{-1}$. To see this we perform a sequence of 
analytic canonical 
transformations to a new set of coordinates in which  
the slow evolution of $J_\Gamma$ and $h_0$
becomes manifest. 
In the first sequence of transforms we apply the Birkhoff
normal form algorithm around the origin $\kappa$ times, 
with $\kappa$ chosen so
that the unnormalized remainder is minimized. Denoting the 
composition of these transforms by $\tau_1$, 
the transformed Hamiltonian has the form  
$ H \circ \tau_1 = h_0 + N_1 + R_1$, with $N_1$ in normal 
form (i.e. $ [h_0,N_1] = 0$) and $R_1$ the remainder.
Thus in the new coordinates, $\dot h_0 =[h_0,R_1] $, which will be small.
The domain of the transform $\tau_1$ will be $D_{{{R}\over{2}}}$, 
and this step is described by Lemma 4.1.

Next, we consider normal forms around monochromatic periodic orbits
of $  {\overline {H \circ \tau_1}} = h_0 + N_1$. We fix 
a mode $\Gamma$ and consider sets 
with $a_\Gamma a^*_\Gamma = \nu $ and 
$a_\gamma =0 $, $\forall \gamma\neq  \Gamma$.  
Expressing the Hamiltonian $ H \circ \tau_1 $ in terms of
the coordinates $J_\Gamma$, $\theta_\Gamma$ and 
$a_\gamma$, $a^*_\gamma$
$\forall \gamma\neq  \Gamma$ we may write 
$$ H\circ \tau_1 = h_\nu + 
(\hat h_2 - h_\nu) + (N_1 - \hat h_2) + R_1 + h_0, $$  
where $h_\nu = 2C_\Gamma J_\Gamma$, i.e. 
$ h_\nu$ is the linear (in $ J_\Gamma$) part of the quartic 
self-interaction terms of the mode $\Gamma$.
Our goal is to perform a second sequence of 
normal form transformations, in which the role of the unperturbed Hamiltonian
is played by $h_\nu$. First we consider the case where 
the quartic terms 
$h_{BF}$ and $h_{I}$ vanish. 
Then, the ``non-linear part'' (i.e. the part to be averaged)
of $  H\circ \tau_1$, specifically 
$  H\circ \tau_1 -  h_\nu-  h_0$, 
is of size $O(\epsilon^{2+3\sigma})$ 
while $ h_\nu$ is of size $O(\epsilon^{2+2\sigma})$, and we can perform the 
Birkhoff normal form algorithm   
obtaining an exponentially small remainder.
An important feature of the argument is that 
$ h_0 =  \sum_{\gamma \neq \Gamma} \omega |a_\gamma|^2 + \omega J_\Gamma$
is invariant under this
second sequence of transformations.
We therefore have that both $h_\nu$ 
(and hence $ J_\Gamma$), and $ h_0 $ evolve at an 
exponentially small rate.

When the quartic terms $h_{BF}$ and $h_{I}$ are present
the time of stability is considerably shorter. 
This is because 
$\epsilon^2 h_{BF}$ and $\epsilon^2 h_{I}$ 
are in general comparable to $h_\nu$ and we can not obtain 
an exponentially small remainder by averaging them. Thus we 
subtract them from $  H\circ \tau_1 -  h_\nu-  h_0$, 
add them at the end of $  H\circ \tau_1$, and simply 
apply the transformations on them. 
Their size is almost unchanged by the transformations 
and since they don't commute with 
$h_\nu$ they give the dominant contribution 
to the rate of change of $J_\nu$.
 
The normal form around the periodic orbits is described by Lemmas 5.1
and 5.2, and the result is used to estimate the drift 
of $ J_\Gamma$ and the $a_\gamma$, for $\gamma\neq  \Gamma$ in Lemma 6.1.
The results are translated to the original variables in the proof of the 
theorem, in section 6. 


\SECTION Normal form around the origin

The first step of the proof is a Birkhoff normal form around
the origin, described by the next lemma.

{\bf Lemma 4.1} Consider the real analytic Hamiltonian 
$H=h_0+\epsilon^2 {\hat h_2}+ f_2$ defined 
in the previous sections, 
and its analytic extension $ H:D_R \rightarrow \complex$.
Let $ A_0 $, $B_0$ be such that 
$$ ||\epsilon^2 {\hat h_2} ||_{D_R} \leq A_0, \quad
||f_2 ||_{D_R} \leq B_0 \EQ(4.01)$$ 
and assume that 
for  some $ \Delta \in (0,R)$ we have 
$$ {{ e^2 {\tilde C} T} \over {\Delta^2}} (2A_0 + 10 B_0) < 1,  \EQ(4.2) $$
where $T = {{2 \pi}\over{\omega}}$. 
Then there exists an analytic canonical transformation
$ \tau_1: D_{R-\Delta} \rightarrow  D_{R} $ for which we have 
$$ H \circ \tau_1 = h_0 + N_1 + R_1,  \EQ(4.3) $$ 
where $N_1$ is in normal form, i.e. $[N_1,h_0] = 0$, 
and $R_1 $ is the remainder, satisfying 
$$  || R_1 ||_{D_{R-\Delta}} \leq e^2B_0 \exp \left( 
- {{2 } \over { ({ e^2 {{\tilde c  T}
\over{\Delta^2}}})^{{1}\over{2}}   }  }   
(2A_0+10B_0)^{- {1\over2}}
\right). \EQ(4.4) $$ 
Moreover, for any analytic function $g:D_R \rightarrow \complex$ we have 
$$ ||g \circ \tau_1  ||_{D_{R-\Delta}} \leq 
||g ||_{D_{R}} \exp \left( {{ 4B_0 } \over  { e^2 }  } 
(2A_0+10B_0)^{-1}  \right) \EQ(4.5) $$ 
and 
$$ ||g \circ \tau_1 - g ||_{D_{R-\Delta}} \leq 
||g ||_{D_{R}} 
{ { 4B_0 } \over  { e^2 (2A_0+10B_0)}  }
\exp \left( {{ 4B_0 } \over  { e^2 }  } 
(2A_0+10B_0)^{-1}  \right).  \EQ(4.6) $$

{\it Proof of Lemma 4.1:} The transformation 
$ \tau_1 $ will be the composition of 
$ \kappa $ canonical transformations
$ \tau_{(j) } $, $ j = 1, 2, \ldots, \kappa$, 
with $\kappa$ determined in the course of the proof.

We start by formally constructing normalizing 
canonical transformations $\tau_{(n+1) }$ and 
Hamiltonians $ H^{(n+1)} =  H^{(n)} \circ \tau_{(n+1)}$ recursively
for $ n \in \integer$, $ n \geq 0$.
The operations we describe below can be thought of as operations
on formal powers series, and we want that
at each step the Hamiltonian have the form
$$ H^{(n)} = h_0 + {\hat h_{(n)}  } + f_{(n)}.  \EQ(4.61)$$
Setting $ {\hat h_{(0)}} = \epsilon^2 {\hat h_2}$ , 
$f_{(0)} = f_2$, and $ H^{(0)} = H$, we want to define 
$ {\hat h_{(n+1)}}  $, $f_{(n+1)}$ and $ H^{(n+1)}$ from 
$ {\hat h_{(n)}}  $, $f_{(n)}$ and $ H^{(n)}$.

To do this we define the time-average of functions $ g:M \rightarrow \real $ 
along the Hamiltonian flow $\Phi^t_0$ of $h_0$ by 
$$ <g>_0 = {1 \over T } \int_0^T g(\Phi^t_0(x)) dt. $$ 
Also we want functions $ \chi_{(n+1)} $ satisfying
$$ [\chi_{(n+1)}, h_0  ]  + f_{(n)}  = <f_{(n)}>_0, \EQ(4.7)$$ 
$\forall n \in \integer, n>0$, and we check that the solution of \equ(4.7) is
$$ \chi_{(n+1)} = {1 \over T } \int_0^T t 
\left( f_{(n)}(\Phi^t_0(x)) - <f_{(n)}>_0(\Phi^t_0(x)) \right) dt. \EQ(4.8) $$

With these ingredients we define 
$$ H^{(n+1)} =  H^{(n)} \circ \tau_{(n+1)} = 
\exp Ad_{ \chi_{(n+1)} } H^{(n+1)}, \EQ(4.9) $$ 
where $ {Ad_f} g = [f,g] $ for $f$, $g$ smooth functions on $\real^{2n}$, 
and $ \exp Ad_f g = g + \sum_{j=1}^{\infty} (Ad_f)^j g $.   
The transformations $\tau_{(n+1)} $ thus formally defined are canonical.
>From the definition of $ H^{(n+1)}$ and $\exp Ad_f g $ 
we have
$$ H^{(n+1)} = h_0 + {\hat h_{(n)}} + f_{(n)} + [\chi_{(n+1)},h_0] + 
\EQ(4.10) $$ 
$$ + \left( h_0 \circ \tau_{(n+1)} - h_0 - [\chi_{(n+1)},h_0]\right) + 
\left( \hat h_{(n)} \circ \tau_{(n+1)} - \hat h_{(n)} \right) + 
\left( f_{(n)} \circ \tau_{(n+1)} - f_{(n)} \right), $$
thus, defining $ {\hat h_{(n+1)}}$ and $ f_{(n+1)}  $ by 
$$ {\hat h_{(n+1)}} = {\hat h_{(n)}} + <f_{(n)}>_0  \EQ(4.11) $$ 
and 
$$ f_{(n+1)} = \left(h_0 \circ \tau_{(n+1)} - h_0 - [\chi_{(n+1)},h_0]\right)
+ \EQ(4.12) $$
$$\left( {\hat h_{(n)}} \circ \tau_{(n+1)} - {\hat h_{(n)}} \right) + 
\left( f_{(n)} \circ \tau_{(n+1)} - f_{(n)} \right), $$
the Hamiltonian $ H^{(n+1)}$ also 
has the desired decomposition of \equ(4.61).

The fact that $ {\hat h_{(0)}} $ is in normal form 
implies that the $ {\hat h_{(n)}} $, $n>0$ are also in normal 
from. For suppose that $ [h_0, {\hat h_{(n)}} ] = 0$.
The average $ <g>_0 $ of any smooth function $g$
Poisson-commutes with $h_0$ and therefore, by the definition
of ${\hat h_{(n+1)}}$ in \equ(4.11), we also have 
$  [h_0, {\hat h_{(n+1)}} ] = 0$.

Thus we have constructed formal Birkhoff normal form Hamiltonians
$ H^{(n)}$ and we now proceed 
to define the transforms $ \tau_{(n)} $ for 
analytic functions in subsets of $ D_R$.
The goal is to have well defined analytic canonical transformations
$ \tau_{(n+1)}: D_{R- (n+1)\delta}  \rightarrow D_{R- n\delta}$
for $n+1 \leq \kappa$, with appropriate $\kappa$ and $\delta $.
To achieve this we need to estimate 
$ { \chi_{(n+1)}}$ and hence $ {\hat h_{(n+1)}}$ and 
$ f_{(n+1)} $ recursively. From now on we will consider
extension of these functions to $\complex^{2n}$.

Let $ A_n$, $B_n$ satisfy 
$$ || {\hat h_{(n)}   }||_{D_{R-n\delta} }  \leq A_n, \quad 
|| f_{(n)}   ||_{D_{R-n\delta} }  \leq B_n, \quad \forall n>0. $$ 
For $ A_0$, $B_0$ we have \equ(4.01). From the definition of 
$ {\hat h_{(n)}}$, $<f_{(n)}>_0$ and $ \chi_{(n+1)} $ we have that 
$$ ||f_{(n)}  ||_{D_{R-(n+1)\delta} } \leq B_n, \quad\hbox{and}\EQ(4.13) $$ 
$$ ||   \chi_{(n+1)}  ||_{D_{R-(n+1)\delta} } \leq 2TB_n.   \EQ(4.14) $$
In \equ(4.14) we have used the fact that $ \Phi^t_0(x) \in D_{\rho}$ for 
$x \in   D_{\rho}$. 
>From the definition of $  { \hat h_{(n+1)} }  $ we also have 
$$ ||  { \hat h_{(n+1)} } ||_{D_{R-(n+1)\delta} } \leq A_n + B_n $$ 
and we can therefore choose the $ A_n$, $B_n$ so that 
$$ A_{n+1} \leq A_n + B_n, \quad \forall n\geq 0. \EQ(4.15) $$ 
To estimate $ f_{(n+1)} $ we use \equ(4.12), Lemma A1 and 
the bound on $ { \chi_{(n+1)}}$ in \equ(4.14). 
For the second and third parentheses in \equ(4.11) we have 
$$ || f_{(n)} \circ \tau_{(n+1)} -  f_{(n)}  ||_{D_{R-(n+1)\delta} } \leq 
 \int^1_0 
\bigl|\bigl| [\chi_{(n+1)}, f_{(n)} ]  \bigl|\bigl|_{D_{R-(n+1)\delta} } 
\leq 2  {{ \tilde c } \over { \delta^2}  }T B^2_n,  \EQ(4.16) $$ 
and 
$$|| {\hat h_{(n)}} \circ \tau_{(n+1)} - {\hat h_{(n)}}||_{D_{R-(n+1)\delta} }
\leq 2  {{ \tilde c } \over { \delta^2}  }T A_n B_n. \EQ(4.17) $$

To estimate the first parenthesis in \equ(4.12) we note that,
using the definition of $ \chi_{(n+1)}$ and letting 
$ g\circ \tau^s_{n+1} = \exp s Ad_{\chi_{(n+1)}  } g $, we have 
$$ l = h_0 \circ \tau_{(n+1)} - h_0 - [\chi_{(n+1)},h_0] = 
h_0 \circ \tau_{(n+1)} - h_0 + f_{(n)} - <f_{(n)}>_0 $$ 
$$ = \int_0^1 \left(  [\chi_{(n+1)}, h_0  ](\tau^s_{(n+1)}(x))   + 
( f_{(n)} - <f_{(n)}>_0)(x)  \right)ds =  $$
$$ = - \int_0^1  \left( (f_{(n)} - <f_{(n)}>_0)( \tau^s_{(n+1)}(x))- 
(f_{(n)} - <f_{(n)}>_0) (x)  \right)ds,  $$ 
hence 
$$ || {\hat h_{(n)}} \circ \tau_{(n+1)} - {\hat h_{(n)}}||_{D_{R-(n+1)\delta} }
\leq \bigl|\bigl| [\chi_{(n+1)}, f_{(n)} - 
<f_{(n)}>_0 ]\bigl|\bigl|_{D_{R-(n+1)\delta} } 
\leq  4  {{ \tilde c } \over { \delta^2}  }T B^2_n \EQ(4.18) $$
by Lemma A1. Then, from \equ(4.12) and by using  
\equ(4.16), \equ(4.17), \equ(4.18), and \equ(4.15) for $A_n$ we obtain
$$ || f_{(n+1)} ||_{D_{R-(n+1)\delta} } \leq 
{{ \tilde c T} \over { \delta^2}  } (2 A_n + 6 B_n)  
 \leq {{ \tilde c T} \over { \delta^2}  }
\left(2A_0 + 6B_n +2\sum^{n-1}_{j=0} B_j \right) B_n.   \EQ(4.19)  $$

To simplify \equ(4.19), we let
$$ \kappa = \left[ {{1}\over {\lambda}}\right], \quad\hbox{with}\quad 
  {\lambda}^2 = e^2 {{ \tilde c T} \over { \Delta^2}  }
(2 A_0 + 10 B_0)  \EQ(4.22) $$
for some $ \Delta \in (0,R)$,
and choose $\delta = { {\Delta} \over {\kappa} } $.
>From \equ(4.2) we have that $\kappa \geq 1$, and we can use this
$\delta$ to perform at least $\kappa$ transformations.
Also let 
$$ E = {{ \tilde c T} \over { \delta^2}  }
(2 A_0 + 10 B_0)  <  { 1 \over 2 }.  \EQ(4.21) $$
The inequality is satisfied by the hypothesis \equ(4.2).
Also, from \equ(4.19) we have that $B_1 \leq E B_0$. 
Suppose now that for some $n<\kappa$ we have $B_j \leq E^j B_0$,
for all $j = 1, 2, \ldots, n$.
Then, from \equ(4.19) we obtain 
$$ B_{n+1} \leq  {{ \tilde c T} \over { \delta^2}  }
\left(2 A_0 + 6 E^n B_0 + 2 B_0 \sum^{n-1}_{j=0} E^j \right) B_n  
\leq {{ \tilde c T} \over { \delta^2}  }
(2 A_0 + 10 B_0) E^n B_0, \EQ(4.191) $$
and therefore by \equ(4.21) we have  $ B_{n+1} \leq E^{n+1} B_0 $.


With this information we see that the 
transformations 
$ \tau_{(n+1)}: D_{R-(n+1)\delta} \rightarrow D_{R-n\delta} $,
with $n+1\leq \kappa$ are well 
defined, since 
from \equ(4.14), and using the $\delta$ and $\kappa$ of \equ(4.22) we have  
$$ {{ e^2 \tilde c } \over { \delta^2}  }
||   \chi_{(n+1)}  ||_{D_{R-(n+1)\delta} } \leq 
{{2 e^2 \tilde c T} \over { \delta^2}  } E^n B_0 \leq
{ { 2 B_0 } \over {2A_0+10 B_0 }} < 1, $$
which, by Lemma A3 is the condition for the 
power series defining the transformations $\tau_{(n)}$ to converge.


Performing the normal form $\kappa$ times, we define 
the remainder $R_1$ by $R_1 = f_{(\kappa)}$. Then 
combining $ B_{\kappa} \leq E^{\kappa} B_0$ with 
\equ(4.22) and \equ(4.21) we obtain
$$ B_\kappa \leq 
\left(  {{ e^2 \tilde c \tilde T} \over { \Delta^2}  }(2 A_0 + 10 B_0)
{{ \kappa^2 }\over { e^2}} \right)^{\kappa} B_0, $$
or by \equ(4.22) 
$$ B_\kappa
\leq \left( { {\lambda \kappa} \over {e} }\right)^{2\kappa} B_0, $$ 
and therefore 
$$ ||R_1||_{D_{R-\Delta}}\leq B_{\kappa}\leq 
e^2 e^{- { {2} \over {\lambda}}}B_0, $$
which by the definition of $\lambda $ gives us conclusion \equ(4.4).

To show conclusion \equ(4.5) we use 
$$ g \circ \tau_{(n+1)}(x) = g \circ \tau_{(n)}(x) + 
\int_0^1 [ \chi_{(n+1)}, g \circ \tau_{(n)}](x(s)) ds,  \EQ(4.26) $$
which implies 
$$ ||  g \circ \tau_{(n+1)}   ||_{D_{R-(n+1)\delta} } \leq 
||  g \circ \tau_{(n)}   ||_{D_{R-(n+1)\delta} } 
\left(1 +  {{  \tilde c } \over { \delta^2}  } 
||\chi_{(n+1)}   ||_{D_{R-(n)\delta} } \right) \EQ(4.27) $$
for $n+1 \leq \kappa $. 
Then \equ(4.14), and $B_n \leq E^n B_0$, for $n\leq\kappa$, 
with $E < {1 \over2}$ imply 
 $$ ||  g \circ \tau_{(n+1)}   ||_{D_{R-(n+1)\delta} } \leq 
||  g  ||_{D_{R} } \prod^n_{j=0}
\left( 1+ {{ 2 \tilde c } \over { \delta^2}  } T E^j B_0 \right) \leq 
||  g  ||_{D_{R} } \exp\left({{ 4 \tilde c T  } \over { \delta^2}}B_0 \right).
\EQ(4.28) $$
>From $ \delta = {{\Delta} \over {\kappa}} $ and $ \kappa$ as in \equ(4.22),
we see that \equ(4.28) implies conclusion \equ(4.5) in the statement.

Finally, to show conclusion \equ(4.6) we use 
$$ ||  g \circ \tau_{(\kappa)} - g   ||_{D_{R-\Delta} } \leq 
||  g \circ \tau_{(\kappa)} -  g \circ \tau_{(\kappa-1)}   ||_{D_{R-\Delta} } +
||g\circ \tau_{(\kappa-1)}-g \circ\tau_{(\kappa-2)}||_{D_{R-(\kappa-1)\delta}}
$$ 
$$ +\ldots + ||g \circ \tau_{(1)} -  g ||_{D_{R-\delta} }, \EQ(4.29) $$
with $ \delta = { {\Delta} \over { \kappa} }$, so that by \equ(4.26) 
and (4.14) we have 
$$ ||  g \circ \tau_{(\kappa)} -g   ||_{D_{R-\Delta} } \leq 
 {{ 2 \tilde c T} \over { \Delta^2}  } \kappa^2 
( B_{\kappa-1} ||  g \circ \tau_{(\kappa-1)}   ||_{D_{R-(\kappa-1)\delta} }
+ \ldots B_0  ||  g  ||_{D_{R }} ).  \EQ(4.30)  $$
Combining\equ(4.30) with \equ(4.28) we therefore have 
$$  ||  g \circ \tau_{(\kappa)} -g   ||_{D_{R-\Delta} } \leq
{{ 2 \tilde c T} \over { \Delta^2}  } \kappa^2 
\left(\sum^{\infty}_{j=0}E^j \right)B_0 ||g||_{D_{R}} 
\exp\left( {{4 \tilde c T}\over  {\delta^2} } B_0\right),  \EQ(4.31)$$
and from $ \delta = { {\Delta} \over { \kappa} }$ and $ E < {1\over2}$, 
we see that \equ(4.31) implies \equ(4.6) in the statement. \QED

{\bf Remark 4.1} Note that for given $\epsilon$
the choice to perform $n=\kappa$
transformations  minimizes the remainder estimate,
in the sense that if we consider $E$ in \equ(4.21)
as a function of $n$, i.e. with $\delta = {{\Delta}\over{n}}$,
the minimum of $E^n$ is achieved for $ n ={{1}\over{\lambda}}  $.


\SECTION Normal form around periodic orbits

We now consider the Hamiltonian 
$ H \circ \tau_1 = h_0 + N_1  + R_1 $ obtained in Lemma 4.1. 
We want to compute a new Birkhoff normal form,
this time around 
sets of points with $|a_\Gamma|^2 = \nu$ and $a_\gamma = 0$, 
$\gamma \neq \Gamma$.
(By Proposition 2.1 these sets correspond to periodic orbits of 
the Hamiltonian flow of $ h_0 + N_1 $.)


Consider $ \nu > 0 $ and define the action 
$J_\Gamma = |a_\Gamma|^2 - \nu$,
and the angle $ \theta_\Gamma = \arg(a_\Gamma)$. 
(The dependence of $J_\Gamma$ on $\nu$ will be suppressed from the notation.)
The Poisson bracket between two functions $f$, $g$ is 
$$ [f,g] = 2\left({{\partial f}\over{ \partial J_\Gamma}} 
{{\partial g}\over{ \partial \theta_\Gamma}} - 
{{\partial f}\over{ \partial \theta_\Gamma}}
{{\partial g}\over{ \partial J_\Gamma}}\right) + \sum_{\gamma \neq \Gamma}
\left( {{\partial f}\over{ \partial q_\gamma}}
{{\partial g}\over{ \partial p_\gamma}} -
{{\partial g}\over{ \partial q_\gamma}} 
{{\partial f}\over{ \partial p_\gamma}}  \right).  \EQ(5.01) $$


The Hamiltonian $H \circ \tau_1 $ of the previous lemma 
may be written as 
$$ H \circ \tau_1 = \epsilon^2 {\hat h_2} + (N_1 - \epsilon^2 {\hat h_2}) + 
R_1 + h_0,  \EQ(5.1)$$ 
with $ \epsilon^2 {\hat h_2} $ the quartic part of $ N_1$ 
(it coincides with the quartic part of $H$ so that the 
notation is consistent). 
Using the decomposition of $ {\hat h_2}$ introduced in \equ(3.01),
and the variables $J_\Gamma $ and $\theta_\Gamma $, we may write 
${\hat h_{2,\Gamma}}$ as
$$  {\hat h_{2,\Gamma}} = 2C_\Gamma \nu J_\Gamma + C_\Gamma J^2_\Gamma + 
\hbox{const.}, \EQ(5.2) $$
and we may also write the Hamiltonian $H \circ \tau_1$ as
$$ H \circ \tau_1 = h_\nu + {\tilde h_{(0)}} + 
\epsilon^2 {\hat h_N}
+{\tilde f_{(0)} } + R_1 + h_0 + \epsilon^2 {\hat h_{BF}}
+ \epsilon^2 {\hat h_I}, \EQ(5.3) $$ 
with
$$  h_\nu = 2C_\Gamma \nu J_\Gamma, \quad
{\tilde h_{(0)}} = \epsilon^2  C_{\Gamma} J^2_{\Gamma}  + 
\epsilon^2 {\hat h_{2,\gamma}}, \EQ(5.4) $$
$$ \hat h_N =  \hat h_{2,(\gamma,\Gamma)} - {\hat h_{BF}}- {\hat h_I}, \quad
{\tilde f_{(0)} } = N_1 - {\epsilon^2} {\hat h_2}.  $$

We now perform a Birkhoff normal form in some domain 
$ D^{\nu,R_1}_{\rho}$ (see section 3 for this notation).
The role of the unperturbed Hamiltonian will be 
played by $ h_\nu$. 

{\bf Lemma 5.1 } 
Fix $ \nu$, $\rho_2$ satisfying $ \rho^2_2 < \nu $ and 
$ 8( \nu^2 +2 \nu \rho_2 +2 \rho_2 )^{1\over2}< R^2 $, and 
let $D^{\nu}_{{\rho}}$ denote $D^{\nu,\rho_2}_{{\rho}}$.
Consider the real analytic Hamiltonian 
$ H \circ \tau_1 $ defined in Lemma 4.1, and its complex extension
$  H \circ \tau_1: D^{\nu}_{{\rho}_2} \rightarrow \complex$ f.
Let $ \delta_2 \in (0,\rho_2) $ and suppose that 
$$  {{ 2 e^2 {\tilde c_1} {\tilde T}} \over { \delta^2_2}  }
|| 
 \epsilon^2 {\hat h_N}
||_{D^{\nu}_{\rho_2} } < 1, \EQ(5.5) $$
with $ {\tilde T} = { {\pi } \over {\epsilon^2 C_\Gamma \nu} }$.
Then there exists an analytic canonical transformation  
$ \tau_2:D^{\nu}_{\rho_2-\delta_2} \rightarrow D^{\nu}_{\rho_2}$ 
satisfying
$$ h_0 \circ \tau_2 = h_0 \EQ(5.6) $$
and for which $ H \circ \tau_1 \circ \tau_2 $ takes the form 
$$ H \circ \tau_1 \circ \tau_2 = h_{\nu} + {\tilde h_{(1)} } + 
{\tilde f_{(1)} } + R^{(1)}_1 + h_0 
+ \epsilon^2 {\hat h_{BF}}\circ \tau_2 
+ \epsilon^2 {\hat h_{I}}\circ \tau_2  \EQ(5.7) $$ 
with ${\tilde h_{(1)} } $ in normal form, i.e. 
$[ h_{\nu}, {\tilde h_{(1)} }]=0 $, and ${\tilde h_{(1)} }$, 
${\tilde f_{(1)} }$ satisfying
$$ [ h_0, {\tilde h_{(1)} }]= [ h_0, {\tilde f_{(1)} }]=0,  \EQ(5.8)$$
and
$$ || {\tilde h_{(1)} }   ||_{D^{\nu}_{\rho_2-\delta_2} } \leq 
|| 
{\tilde h_{(0)} } + {\hat h_N}
||_{D^{\nu}_{\rho_2} }, \EQ(5.9) $$
$$  || {\tilde f_{(1)}}   ||_{D^{\nu}_{\rho_2-\delta_2} } \leq 
2||   {\tilde f_{(0)} }  ||_{D^{\nu}_{\rho_2} } + \EQ(5.10)$$
$$ + {{ 2 {\tilde c} {\tilde T}  } \over { \delta^2_2}  } 
||  
 \epsilon^2  {\hat h_N}
||_{D^{\nu}_{\rho_2}} 
( 3 ||   
 \epsilon^2  {\hat h_N}
||_{D^{\nu}_{\rho_2}} + 
||  {\tilde h_{(0)} }   ||_{D^{\nu}_{\rho_2} } ).  $$ 
Moreover, for any analytic function 
$g: D^{\nu}_{\rho_2} \rightarrow \complex $ we have 
$$  ||g \circ \tau_2  ||_{D^{\nu}_{\rho_2-\delta_2} } \leq
\left(1+{{ 2 {\tilde c} {\tilde T}} \over { \delta^2_2}  } 
||  
\epsilon^2  {\hat h_N}
||_{D^{\nu}_{\rho_2}} \right)
|| g||_{D^{\nu}_{\rho_2} }   \EQ(5.11) $$  
$$ ||g \circ \tau_2 - g ||_{D^{\nu}_{\rho_2-\delta_2} } \leq
{{ 2 {\tilde c_1} {\tilde T}} \over { \delta^2_2}  } 
||  
\epsilon^2  {\hat h_N}||_{D^{\nu}_{\rho_2}}
|| g||_{D^{\nu}_{\rho_2}}. \EQ(5.12) $$
{\bf Remark 5.1} The condition on $\nu$ and $\rho_2$ guarantees 
that $D^{\nu}_{\rho_2} \subset D_{{R}\over{2}}$.

{\it Proof of Lemma 5.1:} Let 
$  \Phi^t_{\nu}$ be the time-$t$ map of 
the Hamiltonian flow of $h_\nu$, $ \tilde T$ the period, and 
$<g>_{\nu}(x) = { {1} \over {\tilde T} } \int_0^{\tilde T}
g(\Phi^t_{\nu}(x)) dt$, for any function $g$. 
The canonical transformation $\tau_2$ will be the 
time-$1$ map of the Hamiltonian flow of a function 
$ \tilde \chi_{(1)} $ satisfying 
$$ \epsilon^2 {\hat h_N}
+ [\tilde \chi_{(1)},  \epsilon^2 
{\hat h_N} ]  = 
<  \epsilon^2 
{\hat h_N}>_{\nu}. \EQ(5.13) $$

The solution of \equ(5.13) is given by an expression analogous to
\equ(4.8), and as in \equ(4.14) we obtain
$$  || \tilde \chi_{(1)}    ||_{D^{\nu}_{\rho_2-\delta_2} } \leq
2 {\tilde T } || \epsilon^2 
{\hat h_N} ||_{D^{\nu}_{\rho_2}}. \EQ(5.14)$$
>From Lemma A3 and \equ(5.14), condition \equ(5.5) guarantees the convergence 
of the series for $ \exp Ad_{ \tilde \chi_{(1)}  } $, 
and the transform $ \tau_2$ is well defined on $D^{\nu}_{\rho_2-\delta_2}$.  
Also, the estimate on $ \tilde \chi_{(1)}$ in  \equ(5.14) gives us
conclusions \equ(5.11) and \equ(5.12) of the statement.

The transformed Hamiltonian $  H \circ \tau_1 \circ \tau_2 $
has the form  
$$  H \circ \tau_1 \circ \tau_2  = h_\nu + {\tilde h_{(0)} }  
{\hat h_N} +  \epsilon^2 
+ [\tilde \chi_{(1)},  \epsilon^2 
{\hat h_N} ] + 
{\tilde f_{(0)} } \circ \tau_2 + 
(  h_\nu \circ \tau_2 - h_\nu - [\tilde \chi_{(1)}, h_\nu]) + $$ 
$$ + (  {\tilde h_{(0)} } \circ \tau_2 -    {\tilde h_{(0)} }  ) + 
( \epsilon^2 
{\hat h_N} \circ \tau_2 - 
\epsilon^2 
{\hat h_N}) + R_1  \circ \tau_2 +  h_0 \circ \tau_2
+ \epsilon^2{\hat h_{BF}}\circ \tau_2 + 
\epsilon^2{\hat h_{I}}\circ \tau_2, $$ 
and we set 
$$ {\tilde h_{(1)} } ={\tilde h_{(0)} }+
<  \epsilon^2 
{\hat h_N} >_{\nu}, \quad
R^{(1)}_1 =  R_1  \circ \tau_2,  \EQ(5.15)$$
and 
$$  {\tilde f_{(1)} } =   {\tilde f_{(0)} } \circ \tau_2 +
\left(  h_\nu \circ \tau_2 - h_\nu - [\tilde \chi_{(1)}, h_\nu]\right) +
\EQ(5.16)$$
$$ + \left(  {\tilde h_{(0)} } \circ \tau_2 -    {\tilde h_{(0)} }  \right) + 
\left( \epsilon^2 
{\hat h_N}\circ \tau_2 - 
\epsilon^2 
{\hat h_N} \right).  $$
Then \equ(5.9) follows immediately from the definitions of the 
time-average and $   {\tilde h_{(1)} } $. To obtain
\equ(5.10) we estimate the parentheses in \equ(5.16)
as in Lemma 4.1, and calculate
$$ || {\tilde f_{(1)} }||_{D^{\nu}_{\rho_2-\delta_2} } \leq 
2|| {\tilde f_{(0)} }||_{D^{\nu}_{\rho_2} } + 
{{ 2 \tilde c_1 } \over  { \delta^2_2} }  
|| {\tilde \chi_{(1)} }||_{D^{\nu}_{\rho_2} }
\left( 3 || \epsilon^2 
{\hat h_N} ||_{D^{\nu}_{\rho_2}} +
|| {\tilde h_{(0)} } ||_{D^{\nu}_{\rho_2}} \right), \EQ(5.161) $$ 
which together with \equ(5.14) is what we want. 

To see that $ h_0 \circ \tau_2 = h_0$ we note that 
$ [{\hat h_N} , h_0 ] = 0 $ and 
$ h_0(\Phi^t_{\nu}(x)) = h_0(x) $, and hence 
$ [< {\hat h_N}>_{\nu} , h_0 ] = 0 $ .
>From the definition of $ {\tilde \chi_{(1)} }$ we then have that 
$ [  {\tilde \chi_{(1)} } , h_0 ] = 0 $, and therefore 
$ \exp Ad_{ {\tilde \chi_{(1)} }  } h_0 = h_0 $.  
>From the definition of $ {\tilde h_{(1)} }$ and the fact that 
$[h_0,{\hat h_N}]=0$ we also see
that  $[h_0,{\tilde h_{(1)} }]=0$. For 
${\tilde f_{(1)} }$, we examine \equ(5.15),
and since ${\tilde f_{(0)} }$, $ {\tilde \chi_{(1)}}$, $h_\nu$
and ${\hat h_N}$ Poisson-commute with
$h_0$, and $\tau_2$ is canonical we must also have
$ [h_0,{\tilde f_{(1)} }]=0$. \QED

In the next lemma the Birkhoff normal form around the 
set of points with $ J_\Gamma = 0$ and 
$a_\gamma = 0$ for $\gamma \neq \Gamma$ is iterated. The goal is to
minimize the remainder (unnormalized) part of the Hamiltonian, 
and the construction is very much along the lines of Lemma 4.1.
An important feature of the new sequence of transforms is that they leave 
$h_0$ invariant. 

{\bf Lemma 5.2} Consider the analytic Hamiltonian 
$ H \circ \tau_1 \circ \tau_2: D^{\nu}_{{\rho}_3} \rightarrow \complex$ 
defined as in Lemmas 4.1, 5.1, with  
$ \rho_3 \in (0, \rho_2 - \delta_2] $.
Let $ \tilde A_1 $, $\tilde B_1 $ be such that 
$$  ||  {\tilde h_{(1)}} ||_{ D^{\nu}_{{\rho}_3} }   \leq {\tilde A_1}, \quad
||  {\tilde f_{(1)}} ||_{ D^{\nu}_{{\rho}_3} }   \leq {\tilde B_1}, 
\EQ(5.16) $$ 
and assume that for some $ \delta_3 \in (0,\rho_3)$ we have 
$$  {{ e^2 {\tilde c_1} \tilde T} \over {\delta_3^2}} 
(2 {\tilde A_1} + 10 {\tilde B_1}) < 1. \EQ(2.46) $$
Then there exists an 
analytic canonical transformation 
$ \tau_3: D_{\rho_3-\delta_3} \rightarrow  D_{\rho_3} $, 
satisfying 
$$ h_0 \circ \tau_3 = h_0 \EQ(4.47) $$
for which we have 
$$ H \circ \tau_1 \circ \tau_2 \circ \tau_3 = 
h_\nu + N_2 + R_2 + { \tilde R_1} + h_0  + 
\epsilon^2 h_{BF} \circ  \tau_2 \circ  \tau_3 +
\epsilon^2  h_{I} \circ  \tau_2 \circ  \tau_3
\EQ(2.48) $$
with $N_2 $ in normal form, i.e. $ [N_2  , h_\nu  ] =  0 $.
The $N_2 $ and $R_2 $ also satisfy
$$ [N_2 , h_0]=[R_2, h_0]=0,  \EQ(2.481)$$
and we have
$$ || R_2 ||_{D^{\nu}_{\rho_3-\delta_3}} \leq e^2 {\tilde B_1} \exp \left( 
- {{2 } \over  { ({e^2 {{\tilde c_1 \tilde T} 
\over{\delta_3^2}}} )}^{{1}\over{2}}   } 
(2{\tilde A_1}+10{\tilde B_1})^{-{1\over2}}  \right), \EQ(4.49) $$
$$ || \tilde R_1 ||_{D^{\nu}_{\rho_3-\delta_3}} \leq 
|| R^{(1)}_1 ||_{D^{\nu}_{\rho_3}}
\exp\left({{4{\tilde B_1}}\over{e^2 } } 
(2{\tilde A_1}+10{\tilde B_1})^{-1} \right). \EQ(4.50) $$
Moreover, for any analytic function $g:D_{\rho_3} \rightarrow \complex$ 
we have 
$$ ||g \circ \tau_3  ||_{D^{\nu}_{\rho_3-\delta_3}} \leq 
||g ||_{D^{\nu}_{\rho_3}} \exp \left( {{ 4{\tilde B_1} } 
\over  { e^2 (2{\tilde A_1}+10{\tilde B_1})}  } \right), \EQ(4.51) $$ 
and 
$$ ||g \circ \tau_3 - g ||_{D^{\nu}_{\rho_3-\delta_3}} \leq 
||g ||_{D^{\nu}_{\rho_3}} 
{ { 4{\tilde B_1} } \over  { e^2 (2{\tilde A_1}+10{\tilde B_1})}  }
\exp \left( {{ 4{\tilde B_1} } \over  
{ e^2 (2{\tilde A_1}+10{\tilde B_1})}  } \right).  \EQ(4.52) $$


{\it Proof of Lemma 5.2:} The method of proof is that of Lemma 4.1.
Letting $ H_2 =  H \circ \tau_1 \circ \tau_2 $, we will define 
a sequence of canonical transformations $ \tilde \tau_{(n)}$, $n\geq2$, and 
Hamiltonians $ H^{(n+1)} = H_2 \circ \tilde \tau_{(n+1)} $, $n\geq1$,
and let $ \tau_3 $ be the composition of $ \tilde \kappa $ such 
transforms for appropriate $ \tilde \kappa$. At the 
$n-$th iteration the Hamiltonian will have the decomposition
$$   H_2^{(n)} = h_\nu  + {\tilde h_{(n)}  } + \tilde f_{(n)} + 
R^{(n)}_1 + {h_0}^{(n)}
+ \epsilon^2 {\hat h}_{BF}^{(n)}
+ \epsilon^2 {\hat h}_{I}^{(n)},  \EQ(4.53) $$ 
The $  {\tilde h_{(1)}  }$, $\tilde f_{(1)}$ and $R^{(1)}_1$ are the 
ones of Lemma 5.1 (i.e. the notation is consistent), and 
$ h^{(1)}_0 = h_0$, $H_2^{(1)}=H_2$, 
${\hat h}_{I}^{(1)}= {\hat h}_{I} \circ \tau_2$, 
${\tilde h}_{BF}^{(1)}= {\tilde h}_{BF}\circ \tau_2 $. 
Also we define functions $ \tilde \chi_{(n+1)} $ satisfying
$$ [\tilde \chi_{(n+1)}, h_0  ]  + \tilde f_{(n)}  = <\tilde f_{(n)}>_\nu
,\quad n \geq 1. \EQ(4.54)$$
The $ \tilde \chi_{(n+1)} $ are given by an expression analogous 
to \equ(4.8). To describe the $  {\tilde h_{(n)}  }$, $\tilde f_{(n)}$
recursively, we let 
$ H_2^{(n+1)} =  H_2^{(n)} \circ {\tilde \tau_{(n+1)}} = 
\exp Ad_{ \tilde \chi_{(n+1)} } H_1^{(n)} $. We have 
$$ H_2^{(n+1)} = h_\nu + {\tilde  h_{(n)}} + {\tilde f_{(n)}} + 
[\tilde \chi_{(n+1)}, h_\nu] + 
( h_\nu \circ { \tilde \tau_{(n+1)}}- h_\nu-[\tilde \chi_{(n+1)},h_\nu])+ 
\EQ(4.541)$$ 
$$ +( \tilde  h_{(n)} \circ {\tilde \tau_{(n+1)}} - \tilde h_{(n)} ) + 
( \tilde f_{(n)} \circ {\tilde  \tau_{(n+1)}} - \tilde f_{(n)} )
+ R^{(n)}_1 \circ {\tilde \tau_{(n+1)} } + $$
$$ + h_0 \circ \tilde \tau_{(n+1)}
+ \epsilon^2{\hat h}^{(n+1)}_{BF} + \epsilon^2{\hat h}^{(n+1)}_{I}. $$
Defining $  {\tilde  h_{(n+1)}}$, $ R^{(n+1)}_1 $, $ {h_0}^{(n+1)} $ 
and $  {\tilde  f_{(n+1)}}$ by 
$$  {\tilde  h_{(n+1)}} = {\tilde  h_{(n)}} + 
< {\tilde  f_{(n)}} >_\nu, \quad
R^{(n+1)}_1 =  R^{(n)}_1 \circ {\tilde \tau_{(n+1)}  }, \quad
{h_0}^{(n+1)} = {h_0}^{(n)}  \circ {\tilde \tau_{(n+1)}  }, \EQ(4.531)$$ 
$$ {\hat h}_{BF}^{(n+1)} = {\hat h}_{BF}^{(n)}\circ {\tilde \tau}_{(n+1)}, 
\quad {\hat h}_{I}^{(n+1)} = {\hat h}_{I}^{(n)}\circ {\tilde \tau}_{(n+1)},
\EQ(4.532) $$
$$ \quad  {\tilde  f_{(n+1)}} = H_2^{(n+1)} - h_\nu - {\tilde  h_{(n+1)}} - 
R^{(n+1)}_1 - {h_0}^{(n+1)} \EQ(4.533)$$
the Hamiltonian $ H_2^{(n+1)}$ has the decomposition of \equ(4.53).

The $ {\tilde h_{(n+1)}  }$, $ {\tilde f_{(n+1)}  }$ and 
$ {\tilde \chi_{(n+1)}  }$ are analogous to the respective 
$ {\hat h_{(n)}  }$, $ {\hat f_{(n)}  }$ and 
$ {\chi_{(n)}  }$ of Lemma 4.1, and we can similarly define bounds
$ \tilde A_n$, $ \tilde B_n$ on the the supremum
norm of the $ {\hat h_{(n)}  }$, $ {\hat f_{(n)}  }$ respectively
in the domains $D^{\nu} _{\rho_3-(n-1){\tilde \delta}} $, for
$n \geq 1$.
The iterative estimates for $ {\tilde h_{(n+1)}  }$, $ {\tilde f_{(n+1)}  }$ 
and $ {\tilde \chi_{(n+1)}  }$ readily follow from the arguments of 
Lemma 4.1: $ \tilde \delta$, $\delta_3 $, $\tilde \kappa$ and 
$ \tilde \lambda $ are analogous to $ \delta$, $\Delta $, 
$\kappa$ and $\lambda $ respectively in Lemma 4.1.
Thus $ \tilde A_1$, $ \tilde B_1$ are analogous to 
$ A_0$, $  B_0$ in Lemma 4.1, so that by assumption \equ(2.46)
we can apply the iteration $ \tilde \kappa \geq 1$ times, where
$$ \tilde \kappa = \left[ {{1}\over {\tilde \lambda}}\right], 
\quad\hbox{with}\quad 
  { \tilde \lambda} = e^2 {{ {\tilde c_1} {\tilde T}} \over { \delta_3^2}  }
(2 \tilde A_1  + 10 \tilde B_1 ).  \EQ(4.55) $$ 
Following the argument of Lemma 4.1, by letting 
$ \tilde \delta = { {\delta_3 } \over {\tilde \kappa} } $
we have well defined analytic canonical transformations 
$ \tilde \tau_{(n)}: D^{\nu} _{\rho_3-(n-1){\tilde \delta}} 
\rightarrow  D^{\nu} _{\rho_3-(n-2){\tilde \delta}} $, for $n \geq 2$.


The iterative estimates for $ {\tilde h_{(n+1)}  }$, $ {\tilde f_{(n+1)}  }$ 
and $ {\tilde \chi_{(n+1)}  }$ readily follow from the arguments of 
Lemma 4.1: $ \tilde \delta$, $\delta_3 $, $\tilde \kappa$ and 
$ \tilde \lambda $ are analogous to $ \delta$, $\Delta $, 
$\kappa$ and $\lambda $ respectively in Lemma 4.1.

Setting $N_2 = {\tilde h_{\tilde\kappa}  }$ and 
$R_2 = {\tilde f_{\tilde\kappa}  }$ and 
comparing with Lemma 4.1 we thus immediately obtain 
conclusions \equ(4.49), \equ(4.51) and \equ(4.52), while 
conclusion \equ(4.50) is \equ(4.51) with $g$ replaced by $ R_1^{(1)}$. 

Finally, from $[ \tilde f_{(1)}  , h_0  ] = 0 $ shown in 
Lemma 5.1, the inductive step $[ \tilde f_{(n)}  , h_0  ] = 0$ 
for some $n>1$,  
and the definition of the $ \tilde \chi_{(n+1)} $,
we have that  $[ \tilde \chi_{(n+1)}  , h_0  ] = 0$.
Therefore $  h_0 \circ {\tilde \tau_{(n+1)}} = h_0  $, 
$2 \leq n \leq \tilde\kappa$, and $ h_0 \circ \tau_3 = h_0  $.
To show \equ(2.481) we first note that 
$[ \tilde h_{(1)}  , h_0  ] =[ \tilde f_{(1)}  , h_0  ]= 0 $ by 
Lemma 5.1. Supposing that 
$[ \tilde h_{(n)}  , h_0  ] =[ \tilde f_{(n)}  , h_0  ]= 0 $
for some $n>1$, we easily see from the definition of 
$\tilde h_{(n+1)}$ that it also Poisson-commutes with $h_0$.
Also, examining the individual terms of $\tilde f_{(n+1)}$, $n>1$,
i.e. \equ(4.541) and \equ(4.532), we see that they 
Poisson-commute with $h_0$. Hence both
$ \tilde h_{(\tilde\kappa)}$ and  $\tilde f_{(\tilde\kappa)}$ 
Poisson-commute with $h_0$. \QED


\SECTION Proof of the stability theorem

We now use the transformed Hamiltonian
$ H \circ \tau_1 \circ \tau_2 \circ \tau_3 $ defined by the previous 
lemma to study the rate of change of $ J_\Gamma $ and 
$ h_0 $ in $ \real^{2n} \cap D^{\nu}_{\rho_3-\delta_3} $. 

{\bf Lemma 6.1} Consider the real analytic Hamiltonian 
$ H \circ \tau_1 \circ \tau_2 \circ \tau_3:
\real^{2n} \cap D^{\nu}_{\rho_3-\delta_3}\rightarrow \real $ 
as in Lemma 5.1, and let  
$ \rho_4 $, $\delta_4$, $ \rho_5$ be positive reals satisfying 
$ \rho_4 \leq \rho_3 - \delta_3$, $ \delta_4 < \rho_4$, and 
$ \rho_5  \leq \rho_4 - \delta_4$.
Also suppose that $ \rho_2 \leq 8\rho_5$, with $ \rho_2$ 
as in Lemmas 4.1, 5.1.
Then for initial conditions $x(0)$ 
such that 
$$ x(0) \in \real^{2n} \cap D^{\nu,\rho_2}_{\xi\rho_5}, \quad\hbox{with} \quad
\xi = {{4}\over{n}}\left(\sqrt{1+{{n}\over{64}}}-1 \right), \EQ(6.1) $$ 
the trajectories $x(t)$ of the Hamiltonian flow of 
$  H \circ \tau_1 \circ \tau_2 \circ \tau_3$ satisfy 
$$ x(t) \in\real^{2n} \cap D^{\nu,\rho_2}_{\rho_5} \EQ(6.2) $$ 
for all times $t \in [-T_D,T_D ]$, where 
$$ T_D \leq {{\delta^2_4} \over {2 \tilde c_1}  }
\left({{\rho_5} \over {\rho_4} } \right)^2 
(m_2 + (2n+2) \tilde m_1 + m_I + m_{BF})^{-1},  \EQ(6.3) $$ 
and $ m_2 $, $ \tilde m_1$, $m_I$, $m_{BF}$ are numbers satisfying 
$$ || R_2 ||_{D^{\nu}_{\rho_4}} \leq m_2, \quad 
|| \tilde R_1 ||_{D^{\nu}_{\rho_4}} \leq \tilde m_1, \EQ(6.4) $$ \quad
$$ || \epsilon^2 {\hat h}_{BF}\circ  
{ \tau}_2 \circ { \tau}_3 ||_{D^{\nu}_{\rho_4}} \leq m_{BF}, \quad
||\epsilon^2 {\hat h}_{I}\circ  { \tau}_2 \circ { \tau}_3  
- {\hat h}_{I} ||_{D^{\nu}_{\rho_4}} \leq m_{I}.   $$ 

{\bf Remark 6.1} Condition $ \rho_2 \leq 8\rho_5$ will be satisfied for 
an appropriate choice of $\rho_j$, $\delta_j$, $j= 2, 3, 4$. 
This choice will be made in the proof of Theorem 3.1 below.

{\it Proof of Lemma 6.1:} By Lemma 5.1, the rate of change of
$h_\nu$ and $h_0$ under the Hamiltonian flow of 
$ H \circ \tau_1 \circ \tau_2 \circ \tau_3 $ in 
$ D^{\nu}_{\rho_3-\delta_3} $ is given by  
$$ {{d h_\nu} \over {dt  }  } =  [ h_\nu ,R_2 + \tilde R_1 
+ \epsilon^2 \hat h_{BF}\circ  {\tilde \tau}
+ \epsilon^2 ({\hat h}_{I}\circ  {\tilde \tau} -
{\hat h}_{I})],   \EQ(6.5) $$
$$ {{d h_0} \over {dt  }  } =  [ h_0 , \tilde R_1  ],   \EQ(6.6) $$
where $ {\hat h}_{BF}\circ \tilde \tau = 
{\hat h}_{BF}\circ  { \tau}_2 \circ { \tau}_3$ and 
$ {\hat h}_{I}\circ \tilde \tau = 
{\hat h}_{I} \circ { \tau}_2 \circ {\tau}_3$.


Hence, using Lemma A2 in $D^{\nu}_{\rho_4-\delta_4} $
with $\delta_4 $, $\rho_4   $ satisfying 
$ 0< \delta_4 < \rho_4 \leq \rho_3 - \delta_3$,
we have that 
$$ \sup_{ x \in D^{\nu}_{\rho_4-\delta_4} }  
\left| {{d h_\nu} \over {dt  }  } \right| 
\leq  {{ {\tilde c_1} } \over {\delta_4^2}} 
|| h_\nu ||_{D^{\nu}_{\rho_4}}
\left(|| R_2 + \tilde R_1 ||_{D^{\nu}_{\rho_4}} +
\epsilon^2|| {\hat h}_{BF}\circ  {\tilde \tau} ||_{D^{\nu}_{\rho_4}}
+ \epsilon^2||{\hat h}_{I}\circ  {\tilde \tau}  
- {\hat h}_{I} ||_{D^{\nu}_{\rho_4}}
\right), \EQ(6.7) $$
$$ \sup_{ x \in D^{\nu}_{\rho_4-\delta_4} }  
\left| {{d h_0} \over {dt  }  } \right| 
\leq  {{ {\tilde c_1} } \over {\delta_4^2}} 
|| h_0 ||_{D^{\nu}_{\rho_4}}
||  \tilde R_1 ||_{D^{\nu}_{\rho_4}}. \EQ(6.8) $$
Consequently we have 
$$ |h_\nu(t)|\leq |h_\nu(0)|+{\tilde T_D} Q_\nu \EQ(6.9) $$
with 
$$ Q_\nu = {{ {\tilde c_1} } \over {\delta_4^2}} 
|| h_\nu ||_{D^{\nu}_{\rho_4}}
\left(|| R_2 + \tilde R_1 ||_{D^{\nu}_{\rho_4}} +
\epsilon^2|| {\hat h}_{BF}\circ  {\tilde \tau} ||_{D^{\nu}_{\rho_4}}
+ \epsilon^2 ||{\hat h}_{I}\circ  {\tilde \tau}  
- {\hat h}_{I} ||_{D^{\nu}_{\rho_4}} \right), $$
and
$$ |h_0(t)| \leq |h_0(0)|+ {\tilde T_D} Q_0, \EQ(6.10) $$
with 
$$ Q_0 = {{ {\tilde c_1} } \over {\delta_4^2}} 
|| h_0 ||_{D^{\nu}_{\rho_4}}
||  \tilde R_1 ||_{D^{\nu}_{\rho_4}}, $$
where $ |t| \leq \tilde T_D $, and $\tilde T_D $ is such that trajectories 
are in $ D^{\nu}_{\rho_4-\delta_4}$, $\forall |t| \leq \tilde T_D$. 
>From the definitions of $ h_\nu $ and $ h_0$, \equ(6.9) and \equ(6.10) imply 
that for $ |t| \leq \tilde T_D$ we have 
$$  |J_\Gamma(t)| \leq |J_\Gamma(0)| + 
{ {\tilde T_D} \over {} 2 \epsilon^2 C_\Gamma \nu} Q_\nu,  \EQ(6.11) $$ 
and 
$$ |a_\gamma(t)|^2 \leq |a_\gamma(0)|^2 + 
\sum_{ {\gamma \in {\cal J} } \atop {\gamma \neq \Gamma} } |a_\gamma(0)|^2
+ |J_\Gamma(t)| + |J_\Gamma(0)| + 
{ {\tilde T_D} \over {\omega} } Q_0,
\quad \forall \gamma \neq \Gamma. \EQ(6.12) $$
>From assumption \equ(6.1) on the initial conditions, 
\equ(6.11) becomes
$$  |J_\Gamma(t)| \leq 
\xi \rho_2\rho_5 + 
{ {\tilde T_D} \over {2 \epsilon^2 C_\Gamma \nu}} Q_\nu, \EQ(6.13)  $$
so that from \equ(6.12) and \equ(6.13) we have
$$  |a_\gamma(t)| \leq 
2n\xi^2\rho^2_5 + 2\xi\rho_5\rho_2
{ {\tilde T_D} \over { 2 \epsilon^2 C_\Gamma \nu}} Q_\nu + 
{ {\tilde T_D} \over {\omega} } Q_0, \quad \forall \gamma \neq \Gamma. 
\EQ(6.14)$$ 
By our choice of $\xi$ in \equ(6.1), we have 
$2n\xi^2\rho^2_5 + 2\xi\rho_5\rho_2 \leq {{\rho^2_5}\over{2}}$. 
Then, if $ \tilde T_D$ is such that 
$$ {\tilde T_D } Q_\Gamma = 
{ {\tilde T_D} \over  {2 \epsilon^2 C_\Gamma \nu}} Q_\nu + 
{ {\tilde T_D} \over {\omega}} Q_0 \leq {{{\rho_5}^2} \over {2}},\EQ(6.15)$$ 
\equ(6.13) and \equ(6.14) imply the bounds of \equ(6.2) for
$ |t| \leq \tilde T_D$. On the other hand,
estimating the size of $h_\nu$ and $h_0$ in $ D^{\nu}_{\rho_4}$
we have 
$$ Q_\Gamma \leq {{ {\tilde c_1} \rho_4^2} \over  { \delta_4^2} }
\left(|| R_2  ||_{D^{\nu}_{\rho_4}} + 
(2n+2) ||\tilde R_1 ||_{D^{\nu}_{\rho_4}}
 +
\epsilon^2 || {\hat h}_{BF}\circ  {\tilde \tau} ||_{D^{\nu}_{\rho_4}}
+ \epsilon^2 ||{\hat h}_{I}\circ  {\tilde \tau} - 
{\hat h}_{I} ||_{D^{\nu}_{\rho_4}}
\right), \EQ(6.16)$$ 
so that \equ(6.15) holds for any
$\tilde T_D \leq T_D$, since $T_D$ satisfies \equ(6.3). 
Suppose now that some trajectory 
with initial conditions as in \equ(6.1) 
crosses the boundary of $ D^{\nu}_{\rho_4-\delta_4}$ for the first time
at some time $t_0$, with $ t_0 < T_D $. But then we have a contradiction
since by \equ(6.2) we must have that 
$x(t_0) \in  D^{\nu}_{\rho_5} \subset D^{\nu}_{\rho_4-\delta_4}$.
Hence choosing some $T_D $ satisfying 
assumption \equ(6.3),  $ \tilde T_D$ can be as large as
$T_D $, and the lemma follows.  \QED  

{\it Proof of Theorem 3.1:} We want
to estimate the drift of $J_\Gamma$ and $a_\gamma$, 
$\gamma \neq \Gamma$ using the transformed Hamiltonian
$H\circ \tau_1 \circ \tau_2  \circ \tau_3$.
Setting $\Delta = {{R}\over{2}}$ in Lemma 4.1
and using assumption \equ(3.2) of the theorem we see that the 
hypothesis of Lemma 4.1 is satisfied and therefore
$ H\circ \tau_1$ is well defined in 
$D_{{R}\over{2}}$. Now let $\nu =  \tau_1(\nu')$.
>From the conditions \equ(3.1) on $\nu'$,
\equ(3.21) of the statement,
and the estimate for the difference between $ \nu'$ and $ \nu$ 
implied by \equ(4.6) in Lemma 4.1, we can see that 
$ D^{\nu}_{\rho_2}  \in D_{{R}\over{2}} $. 

To specify the domains of $ \tau_2 $ and $ \tau_3$ 
we set 
$ \rho_2 = \epsilon^{\sigma}$, $ \delta_2 = {1\over 2} \rho_2$ 
in Lemma 5.1, and 
$ \rho_3 = {1 \over 2} \rho_2$, $ \delta_3 = {1\over 4} \rho_2$
in Lemma 5.2. 
Then, by the definition of $ C_{NBF} $ we find that 
condition \equ(3.22) of the theorem implies \equ(5.5) 
in the hypothesis of Lemma 5.1.
To see this we note that 
$$ {\hat h_{2,(\gamma,\Gamma)}} =  {\hat h_2} -{\hat h_{2,\Gamma}}
- {\hat h_{2,\gamma}} = 
\sum_{(\gamma_1, \gamma_2, \gamma_3, \gamma_4) \in {\cal Q} }
I_{\gamma_1, \gamma_2, \gamma_3, \gamma_4 }
a_{\gamma_1} a_{\gamma_2} a^*_{\gamma_3} a^*_{\gamma_4}, \EQ(6.17)$$
for some set $ {\cal Q} $ of index quartets for which exactly 
one of them is $ \Gamma$.
Using the variables
$a_{\gamma}$, $a^*_{\gamma}$ for $\gamma\neq \Gamma$, and 
$J_\Gamma$, $\theta_\Gamma$, 
the sum of \equ(6.17) consists of monomials
$ \sqrt{\nu+J_\Gamma}e^{\pm i\theta_\Gamma}
a_{\phi}a^*_{\chi} a^*_{\psi}$, 
for some $\phi$, $\chi$, $\psi \in {\cal J}$,
so that 
$$ ||
{\hat h_N} ||_{D^{\nu}_{\epsilon^{\sigma}}} \leq
2^4(\sqrt{\nu}+\epsilon^{\sigma})\epsilon^{3 \sigma}
\left(\sum_{(\gamma_1, \gamma_2, \gamma_3, \gamma_4) \in {\cal Q} }
|I_{\gamma_1, \gamma_2, \gamma_3, \gamma_4 }| \right).  \EQ(6.18)$$
Then, from \equ(3.1) 
and the estimate for $| \nu' -\nu|  $, we have 
$$ ||
{\hat h_N}  ||_{D^{\nu}_{\epsilon^{\sigma}}} \leq
\sqrt{\nu'} {{3}\over{2}}2^5 \epsilon^{3\sigma}
||
{\hat h_N} ||_1.  \EQ(6.19)$$
>From the definition of $C_{NBF}$ in \equ(3.11), 
the hypothesis of Lemma 5.1 follows, and therefore the Hamiltonian 
$ H \circ \tau_1 \circ \tau_2 $ is well defined in 
$ D^{\nu}_{ {{\rho_2} \over {2 } }} $.

Also, bounding $\tilde h_{(1)}$ and  $\tilde f_{(1)}$,
along the same lines in $D^{\nu}_{\rho_2}$
we find that condition \equ(3.4) implies hypothesis \equ(2.46)
of Lemma 5.2. To see this we use the definition of 
$\tilde h_{(1)}$ in \equ(5.15), to find that 
$$ ||\tilde h_{(1)}  ||_{D^{\nu}_{\epsilon^{\sigma}}} \leq
||\tilde h_{(0)}  ||_{D^{\nu}_{\epsilon^{\sigma}}} +
||
{\hat h_N}||_{D^{\nu}_{\epsilon^{\sigma}}} \leq  $$
$$ 
2^5 \sqrt{{3}\over{2}} \epsilon^{2+3\sigma}C_{NBF} +
\epsilon^{2+4\sigma}C_\Gamma + 
2^4 \epsilon^{2+4\sigma}||{\hat h_{2,\gamma}}||_1 =
\epsilon^{2+3\sigma} \tilde H_1, 
\EQ(4.72)$$
while for $\tilde f_{(1)}$ we use \equ(5.161) to see that 
$$ ||\tilde f_{(1)}  ||_{D^{\nu}_{\epsilon^{\sigma}}} \leq 
2 \epsilon^{4}\beta + 
3 \dot 2^5 \sqrt{3 \over 2} {{\pi  {\tilde c_1}}\over{\nu' C_\Gamma}} 
\epsilon^{2+4\sigma} C_{NBF}
(2^6 \sqrt{3 \over 2} C_{NBF} + \tilde H_1) = \epsilon^{2+4\sigma}\tilde F_1.
\EQ(6.21)$$
Thus choosing $ \tilde H_1 $ and $ \tilde F_1$
as in \equ(3.12) and \equ(3.13),
condition \equ(3.4) of the theorem implies assumption
\equ(5.2) in Lemma 5.1. Thus the Hamiltonian 
$ H \circ \tau_1 \circ \tau_2 \circ \tau_3 $ is well defined 
in $ D^{\nu}_{ {{\rho_2} \over {4} }} $.

To estimate the drifts of $ J_\Gamma$ and $ a_\gamma$, $ \gamma \neq \Gamma$
we use Lemma 6.1 with $ \rho_4 = {{\rho_2}\over{4}} $,
$ \delta_4 = {{\rho_2} \over  {8}  }$ and 
$ \rho_5 = {{\rho_2}\over{8}} $ so that we consider 
the Hamiltonian flow  of 
$ H \circ \tau_1 \circ \tau_2 \circ \tau_3 $ in a subset of 
$  \real^{2n} \cap D^{\nu}_{ {{\rho_2} \over {8} }} $. 
We will be using the fact that the transformations and the 
Hamiltonian flow map real vectors to real vectors.
Let now $y'= (\tau_2 \circ \tau_3)^{-1}(y)$ and suppose that 
$y'(0) \in \real^{2n} \cap D^{\nu}_{ {{\xi \rho_5} \over {2} }}$. We can use 
\equ(5.12) and \equ(4.52) to estimate the difference between 
$y'$ and $y$, obtaining
$$ |q_\gamma \circ \tau_2 \circ \tau_3 - q_\gamma| \leq
||q_\gamma \circ \tau_2 \circ \tau_3 - 
q_\gamma \circ \tau_2||_{D^{\nu}_{\rho_2 - \delta_2 -\delta_3}} +
||q_\gamma \circ \tau_2  -
q_\gamma ||_{D^{\nu}_{\rho_2 - \delta_2 }} \leq
\rho_2(C_3 + C_2), $$  
$$ |J_\gamma \circ \tau_2 \circ \tau_3 - J_\gamma| \leq \rho^2_2(C_3 + C_2), $$
where 
$$  C_3 = {{2 e^{{{2}\over{ 5 e^2 }} }}\over{e^2 }}
{{\tilde F_1}\over{\tilde H_1}} \epsilon^{1-2\sigma}, \quad
 C_2 = {{32  {\tilde c_1} \pi} \over {C_\Gamma}\sqrt{\nu'}}
C_{NBF} \epsilon^{\sigma}. $$
Using assumption \equ(3.22) of the theorem, 
and $y'(0) \in  \real^{2n} \cap D^{\nu}_{ {{\xi \rho_5} \over {2} }}$,
we therefore have that 
$$ |q_\gamma \circ \tau_2 \circ \tau_3| \leq \xi \rho_5 , \quad 
|J_\gamma \circ \tau_2 \circ \tau_3 | \leq \xi \rho_2 \rho_5, $$ 
i.e. $y(0) \in  \real^{2n} \cap D^{\nu}_{ \xi \rho_5}$. Then, from  
Lemma 6.1 we have that $y(t) \in  \real^{2n} \cap D^{\nu}_{ \rho_5}$, $\forall 
|t| \leq T_D$, and using \equ(5.12) and \equ(4.52) and assumption 
\equ(3.21) of the theorem we see that 
$y'(t) \in \real^{2n} \cap D^{\nu}_{{ {\rho_2}\over {4} }} $, i.e. 
$y'(t)$ is in the domain of $  \tau_2 \circ \tau_3$, $\forall |t| \leq T_D$. 

To pass to the original variables we let 
$y'' = (\tau_1)^{-1}(y')$, and use \equ(4.6) and assumption
\equ(3.2) to estimate the 
distance between $y''$ and $y'$ (and $|\nu'-\nu|$). 
Arguing as previously, we have that 
$y''(0) \in   \real^{2n} \cap D^{\nu'}_{{ {\xi \rho_5}\over {4} }} $ 
implies that 
$y'(0) \in  \real^{2n} \cap D^{\nu'}_{{ {\xi \rho_5}\over {2} }} $. 
We can then apply the
argument of the previous paragraph to see that
$y'(t) \in  \real^{2n} \cap D^{\nu}_{{ {\rho_2}\over {4} }} $, 
$\forall |t| \leq T_D$. 
Using \equ(4.6) and assumption \equ(3.21) again 
we see that this implies that 
$y''(t) \in  \real^{2n} \cap D^{\nu'}_{{ {\rho_2}\over {2} }}$,
$\forall |t| \leq T_D$. We can also check 
that assumption \equ(3.1) on $\nu'$ guarantees that
$\nu $ satisfies the requirements of Lemma 5.1. 

To obtain the theorem for the time $T_m$ as in the statement
we bound $Q_\Gamma$ in \equ(6.16) using the estimates
\equ(4.49) and \equ(4.50) of $R_2$ and $\tilde R_1$ in Lemma 5.2
and the definitions of 
$\alpha$, $\beta$, $C_{NBF}$, $\tilde H_1$ and $\tilde F_1$.
Also, we easily estimate possible values for
the constants of $m_{BF}$ and  $m_{I}$,
obtaining $m_{BF} = \epsilon^{2+2\sigma} \tilde C_{BF}$ and
$m_{I} = \epsilon^{2+2\sigma} \tilde C_{I}$, with 
$ \tilde C_{I}$ and $ \tilde C_{BF}$ given by \equ(3.14) and
\equ(3.15) respectively. 
We then have that $T_M \leq T_D$ and the theorem follows. \QED


\SECTION Discussion


We conclude with a brief discussion of the physical motivation,
applications, and possible extensions 
of the present work.

Our main motivation comes from a Hamiltonian model of
small amplitude gravity water waves 
on the sphere. The model was developed in [LP], where it was 
shown that the 
equations of motion of gravity water waves can be written as a
Hamiltonian system of an infinite set of coupled harmonic 
oscillators. The oscillators (modes) are in one to one correspondence
with the spherical harmonics $Y_{[l,m]}$, with $l \in \integer^{+}$
and $m = -l, -l+1, \ldots, l$. It was also shown in the same work that  
the water wave system possesses families of approximate
periodic orbits for which the amplitude of all but 
one of the modes is very small (i.e. quasi-monochromatic 
approximate solutions). 
In [P] we have seen numerically that   
trajectories starting near these sets 
stay nearby for long times. Moreover, we proved that the transfer
of energy between modes with different wavenumber $l$ is slow,
in particular we showed that the sums $I_l$ of amplitudes of 
modes with the same wavenumber $l$ Poisson-commute 
with the quartic part of the 
second order Birkhoff normal form Hamiltonian.   
Thus the only possible way for a near-monochromatic motions to decay 
in a time scale smaller that $O(\epsilon^{-2})$ is
through interaction with modes with the same wavenumber $l$,
and we are led to study the projection of the full system to 
the space of the modes with a given wavenumber $l$. 
Such a projection preserves the Hamiltonian structure,  
moreover it leads to a Hamiltonian satisfying
the resonance and symmetry properties (i) and (ii) 
of section 2, i.e. it belongs to the class of systems we considered here. 
Note that properties (i) and (ii) follow from the rotational symmetry 
of the water wave model, and will therefore hold for a variety of 
other dispersive wave equations on the sphere. 

Application of the method we used to the water wave model studied 
in [P] is quite straightforward. Note   
that here we have aimed for a stability time scale 
that is much longer than the time scale 
needed to understand the numerical observations. The strategy however 
of performing the normal form once around the origin and then around a 
periodic orbit can still be applied.  
In the case where $h_{BF}$ and $h_{I}$ vanish for a particular 
mode $\Gamma$, we can 
start with the second order Birkhoff normal form of the water wave model
around the origin, 
and then apply the transformation 
generated by $\tilde \chi_{(1)}$ of Lemma 5.1. 
It is then easy to see that, up to a remainder of $O(\epsilon^{2+4 \sigma})$,
the action $J_\Gamma$ and 
the quantities $I_l$ Poisson-commute with the Hamiltonian. 
Thus near-monochromatic trajectories stay $O(\epsilon^{\sigma})$ close to 
periodic near monochromatic trajectories of the truncated quartic
normal form for an $O(\epsilon^{-2-4 \sigma})$ time.
When $h_{BF}$ and $h_{I}$ are present we can reach a similar stability 
time provided that an additional bound on the size of $h_{BF}$ is satisfied.
A more detailed account of applications to the water wave problem 
will appear elsewhere. 

The motions we studied here are also related 
to weakly non-linear traveling waves on the line.
In the context of waves, a near-monochromatic solution 
corresponds to a traveling wave with an almost sinusoidal periodic profile
and constant amplitude-dependent speed.
Stokes derived an expansion for such solutions in surface water waves
using a perturbation method that is similar 
to the Poincare-Linsted method
for constructing periodic orbits in dynamical systems (see e.g. [KC]). 
In the approach we took here, secular terms are avoided by the
normal form procedure, while the dependence of the frequency 
(or wave speed) $\omega_\epsilon$ on the amplitude is 
$\omega_\epsilon = \omega + \epsilon^2 C_\Gamma |a_\Gamma|^2 + O(\epsilon^3)$.
Also, the symmetry we imposed is analogous to translation invariance
in non-linear wave equations,
although the particular combination of
resonance and symmetry properties 
(i) and (ii) of Section 2 is quite untypical, 
leading to the absence of odd order resonant
terms. Clearly, the analogies hold at a formal level since the convergence
properties of the expansions may be different, for instance the 
normal form argument we used does not suffice for convergence. 

The linear stability of Stokes waves was studied by [BF], who 
assumed that the wave is given by the first terms in  
the Stokes expansion. This is analogous to considering the 
stability of the monochromatic periodic orbits 
of the truncated normal forms ${\overline h}^{(r)}$.
[BF] found that mode interactions that have the form of the terms belonging to 
$\hat h_{BF}$ discussed here can lead to linear instability. 
Such terms force the modes with index $\gamma \neq \Gamma$ 
parametrically, with a forcing frequency that is subharmonic
(with an $O(\epsilon^2)$ detuning). 
As we noted, in the case where the Benjamin-Feir quadratic part
$\hat h_{BF}$ is present the stability time is not optimal,  
it can be prolonged by a factor $\epsilon^{4 \sigma}$, provided that 
an additional bound of the ratio of $\tilde C_{BF}$ (i.e.
the size of $\hat h_{BF}$) to $\nu' C_\Gamma$ (i.e. the detuning)
is assumed. We hope to present these extensions in a future work. 
Note that a condition involving the 
ratio of coupling to detuning for avoiding the 
instability due to the 
Benjamin-Feir terms 
is easily obtained using the theory of the Mathieu equation,
and is sometimes referred to as ``Lighthill condition'' (see e.g. [RT]).
Our approach suggests that such a condition extends the time of 
(non-linear) stability, but is not enough for Nekhoroshev stability. 

\SECTION Acknowledgments

I would like to thank R. de la Llave, A. Minzoni and A. Olvera 
for helpful discussions and comments.


\SECTION Appendix

The following technical lemmas are versions of well known
consequences of the Cauchy estimates for analytic functions.

{\bf Lemma A1} (Poisson bracket estimate in Cartesian coordinates)
Let $ F$, $G$ be analytic functions on  
$D_{\rho}  $, $D_{\rho-\delta}$ respectively.
and $ 0 < \delta, \delta' < \rho$. Then 
$$ || [F,G] ||_{D_{\rho -\delta - \delta'}} \leq 
{ {\tilde c} \over { \delta(\delta+\delta')}}
|| F  ||_{D_{\rho- \delta'}} 
|| G ||_{D_{\rho}} , \quad\hbox{with}\quad
{\tilde c} = 2n (\kappa_1)^2, $$
and $\kappa_1=1$.


{\bf Lemma A2} (Poisson bracket estimate in mixed Cartesian, 
action-angle coordinates, see \equ(5.01))
Let $ F$, $G$ be analytic functions on
$D^{\nu,R_1}_{\rho}$, $D^{\nu,R_1}_{\rho-\delta}$
respectively,
and $ 0 < \delta, \delta' < \rho$, $R_1 >0$. Then 
$$ || [F,G] ||_{D^{\nu,R_1}_{\rho -\delta - \delta'}} \leq 
{ {\tilde c_1} \over { \delta(\delta+\delta')}}
|| F  ||_{D^{\nu,R_1}_{\rho- \delta'}}  
|| G ||_{D^{\nu,R_1}_{\rho}} , \quad\hbox{with}\quad
{\tilde c_1} = 4n (\kappa_1)^2 , $$
and $\kappa_1$ as in Lemma $A1$.

The following lemma holds by majorizing the Lie-series, using 
either Lemma A1 
for Cartesian coordinates 
or Lemma A2 for mixed 
Cartesian, action-angle coordinates.
The domain $ \tilde D_{\rho}$ below is either 
$ D_{\rho}$, or $D^{\nu,R_1}_{\rho}$ (for fixed $\nu, R_1$). 

{\bf Lemma A3} (convergence of the Lie-series)
Let $ S, H $ be analytic functions on $\tilde D_{\rho}$,
and define $G_0 = H$,
$ G_r = {1 \over r} [ S, G_{r-1}]$ for $r \geq 1$. Then 
$$ || G_r ||_{\tilde D_{\rho - \delta}} \leq B_r, \quad\hbox{with}\quad
B_r = \left( { {\kappa_2} \over {\delta^2}} 
|| S||_{\tilde D_{\rho}} \right)^{r}
|| H ||_{\tilde D_{\rho}}, $$
where $\kappa_2 = e^2 {\tilde c} $ when $ \tilde D_{\rho}$ denotes $ D_{\rho}$,
and $\kappa_2 = 2e^2 {\tilde c} $ when $ \tilde D_{\rho}$ 
denotes $D^{\nu,R_1}_{\rho}$. The constants
${\tilde c}$, ${\tilde c_1}$ are as in Lemmas $A1$, $A2$. If
$ { {\kappa_2} \over {\delta^2}} || S||_{\tilde D_{\rho}} < 1 $ then 
$\exp(Ad_S H) $ is analytic in 
${\tilde D}_{\rho - \delta})$.


\SECTION REFERENCES 

\parindent  = 0 pt

[B] D. Bambusi: Nekhoroshev theorem for small amplitude 
solutions in nonlinear \break 
Schr\"odinger equations, preprint (1998)

[BF] T.B. Benjamin, J. E. Feir: The disintegration of 
wavetrains on deep water, J. Fluid Mech. 27, 417-430 (1967)

[D] A. Deprit: Canonical transformations depending on a small parameter,
Cel. Mech. 20, 1-12 (1967)

[DF] A.J. Dragt, J.M. Finn: Lie series and invariant functions
for analytic symplectic maps, J. Math. Phys. 17, 
2215-2227 (1976)

[FB] F. Fass\`o, G. Benettin: Composition of Lie transforms with 
rigorous estimates and applications to Hamiltonian perturbation theory, 
J. Appl. Math. Phys. (ZAMP), 140, 307-329 (1989)

[FGB] F. Fass\`o, M. Guzzo, G. Benettin: 
On the stability of elliptic equilibria,
Math. Phys. Electron. J. 4, 16 pp.,
http://www.ma.utexas.edu/mpej/MPEJ.html (1998) 

[L] P. Lochak: Canonical perturbation theory via simultaneous 
approximation, Russ. Math. Surv. 47, 57-133 (1992)

[LP] R. de la Llave, P. Panayotaros: Water waves on the surface of the 
sphere, J. Nonlin. Sci. 6, 147-167 (1996) 

[KC] J. Kevorkian, J.D. Cole: Multiple scale and singular perturbation
methods, Springer, New York (1996) 

[MRS] J. Montaldi, M. Roberts, I. Stewart: Existence of 
nonlinear normal modes of symmetric Hamiltonian systems, Nonlinearity 3,
695-730 (1990)

[N] L. Niederman: Nonlinear stability around an elliptic equilibrium point
in a Hamiltonian system, Nonlinearity 11, 1465-1479 (1998)

[P] P. Panayotaros: Near-monochromatic water waves on the sphere, 
Physica D, to appear (1998)  

[RT] M.I. Rabinowitz, D.I. Trubeskov: Oscillations and waves, Kluwer, 
Dordrecht (1989)

\end