% version  Nov 30
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{filecontents*}{llave-fig1.eps}
%!PS-Adobe-3.0 EPSF-3.0
%%Creator: Adobe Illustrator(r) 6.0
%%For: (combs) (Dept of Math, Univ of Texas )
%%Title: (llave-fig1.eps)
%%CreationDate: (5/6/99) (8:19 AM)
%%BoundingBox: 144 465 449 615
%%HiResBoundingBox: 144 465 448.7293 615
%%DocumentProcessColors: Black
%%DocumentFonts: Helvetica
%%+ Symbol
%%+ Times-Italic
%%DocumentSuppliedResources: procset Adobe_level2_AI5 1.0 0
%%+ procset Adobe_typography_AI5 1.0 0
%%+ procset Adobe_Illustrator_AI6_vars Adobe_Illustrator_AI6
%%+ procset Adobe_Illustrator_AI5 1.0 0
%AI5_FileFormat 2.0
%AI3_ColorUsage: Black&White
%%AI6_ColorSeparationSet: 1 1 (AI6 Default Color Separation Set)
%%+ Options: 1 16 0 1 0 1 1 1 0 1 1 1 1 18 0 0 0 0 0 0 0 0 -1 -1
%%+ PPD: 1 21 0 0 60 45 2 2 1 0 0 1 0 0 0 0 0 0 0 0 0 0 ()
%AI3_TemplateBox: 306 396 306 396
%AI3_TileBox: 30 31 582 761
%AI3_DocumentPreview: Macintosh_ColorPic
%AI5_ArtSize: 612 792
%AI5_RulerUnits: 0
%AI5_ArtFlags: 1 0 0 1 0 0 1 1 0
%AI5_TargetResolution: 800
%AI5_NumLayers: 1
%AI5_OpenToView: 2 684 1.5 794 557 58 1 1 3 40
%AI5_OpenViewLayers: 7
%%EndComments
%%BeginProlog
%%BeginResource: procset Adobe_level2_AI5 1.2 0
%%Title: (Adobe Illustrator (R) Version 5.0 Level 2 Emulation)
%%Version: 1.2
%%CreationDate: (04/10/93) ()
%%Copyright: ((C) 1987-1993 Adobe Systems Incorporated All Rights Reserved)
userdict /Adobe_level2_AI5 23 dict dup begin
 put
 /packedarray where not
 {
  userdict begin
  /packedarray
  {
   array astore readonly
  } bind def
  /setpacking /pop load def
  /currentpacking false def
  end
  0
 } if
 pop
 userdict /defaultpacking currentpacking put true setpacking
 /initialize
 {
  Adobe_level2_AI5 begin
 } bind def
 /terminate
 {
  currentdict Adobe_level2_AI5 eq
  {
   end
  } if
 } bind def
 mark
 /setcustomcolor where not
 {
  /findcmykcustomcolor
  {
   5 packedarray
  } bind def
  /setcustomcolor
  {
   exch aload pop pop
   4
   {
    4 index mul 4 1 roll
   } repeat
   5 -1 roll pop
   setcmykcolor
  }
  def
 } if

 /gt38? mark {version cvr cvx exec} stopped {cleartomark true} {38 gt exch pop} ifelse def
 userdict /deviceDPI 72 0 matrix defaultmatrix dtransform dup mul exch dup mul add sqrt put
 userdict /level2?
 systemdict /languagelevel known dup
 {
  pop systemdict /languagelevel get 2 ge
 } if
 put
/level2ScreenFreq
{
 begin
  60
  HalftoneType 1 eq
  {
   pop Frequency
  } if
  HalftoneType 2 eq
  {
   pop GrayFrequency
  } if
  HalftoneType 5 eq
  {
   pop Default level2ScreenFreq
  } if
 end
} bind def
userdict /currentScreenFreq
 level2? {currenthalftone level2ScreenFreq} {currentscreen pop pop} ifelse put
level2? not
 {
  /setcmykcolor where not
  {
   /setcmykcolor
   {
    exch .11 mul add exch .59 mul add exch .3 mul add
    1 exch sub setgray
   } def
  } if
  /currentcmykcolor where not
  {
   /currentcmykcolor
   {
    0 0 0 1 currentgray sub
   } def
  } if
  /setoverprint where not
  {
   /setoverprint /pop load def
  } if
  /selectfont where not
  {
   /selectfont
   {
    exch findfont exch
    dup type /arraytype eq
    {
     makefont
    }
    {
     scalefont
    } ifelse
    setfont
   } bind def
  } if
  /cshow where not
  {
   /cshow
   {
    [
    0 0 5 -1 roll aload pop
    ] cvx bind forall
   } bind def
  } if
 } if
 cleartomark
 /anyColor?
 {
  add add add 0 ne
 } bind def
 /testColor
 {
  gsave
  setcmykcolor currentcmykcolor
  grestore
 } bind def
 /testCMYKColorThrough
 {
  testColor anyColor?
 } bind def
 userdict /composite?
 level2?
 {
  gsave 1 1 1 1 setcmykcolor currentcmykcolor grestore
  add add add 4 eq
 }
 {
  1 0 0 0 testCMYKColorThrough
  0 1 0 0 testCMYKColorThrough
  0 0 1 0 testCMYKColorThrough
  0 0 0 1 testCMYKColorThrough
  and and and
 } ifelse
 put
 composite? not
 {
  userdict begin
  gsave
  /cyan? 1 0 0 0 testCMYKColorThrough def
  /magenta? 0 1 0 0 testCMYKColorThrough def
  /yellow? 0 0 1 0 testCMYKColorThrough def
  /black? 0 0 0 1 testCMYKColorThrough def
  grestore
  /isCMYKSep? cyan? magenta? yellow? black? or or or def
  /customColor? isCMYKSep? not def
  end
 } if
 end defaultpacking setpacking
%%EndResource
%%BeginResource: procset Adobe_typography_AI5 1.0 1
%%Title: (Typography Operators)
%%Version: 1.0
%%CreationDate:(03/26/93) ()
%%Copyright: ((C) 1987-1993 Adobe Systems Incorporated All Rights Reserved)
currentpacking true setpacking
userdict /Adobe_typography_AI5 54 dict dup begin
put
/initialize
{
 begin
 begin
 Adobe_typography_AI5 begin
 Adobe_typography_AI5
 {
  dup xcheck
  {
   bind
  } if
  pop pop
 } forall
 end
 end
 end
 Adobe_typography_AI5 begin
} def
/terminate
{
 currentdict Adobe_typography_AI5 eq
 {
  end
 } if
} def
/modifyEncoding
{
 /_tempEncode exch ddef
 /_pntr 0 ddef
 {
  counttomark -1 roll
  dup type dup /marktype eq
  {
   pop pop exit
  }
  {
   /nametype eq
   {
    _tempEncode /_pntr dup load dup 3 1 roll 1 add ddef 3 -1 roll
    put
   }
   {
    /_pntr exch ddef
   } ifelse
  } ifelse
 } loop
 _tempEncode
} def
/TE
{
 StandardEncoding 256 array copy modifyEncoding
 /_nativeEncoding exch def
} def
%
/TZ
{
 dup type /arraytype eq
 {
  /_wv exch def
 }
 {
  /_wv 0 def
 } ifelse
 /_useNativeEncoding exch def
 pop pop
 findfont _wv type /arraytype eq
 {
  _wv makeblendedfont
 } if
 dup length 2 add dict
 begin
 mark exch
 {
  1 index /FID ne
  {
   def
  } if
  cleartomark mark
 } forall
 pop
 /FontName exch def
 counttomark 0 eq
 {
  1 _useNativeEncoding eq
  {
   /Encoding _nativeEncoding def
  } if
  cleartomark
 }
 {
  /Encoding load 256 array copy
  modifyEncoding /Encoding exch def
 } ifelse
 FontName currentdict
 end
 definefont pop
} def
/tr
{
 _ax _ay 3 2 roll
} def
/trj
{
 _cx _cy _sp _ax _ay 6 5 roll
} def
/a0
{
 /Tx
 {
  dup
  currentpoint 3 2 roll
  tr _psf
  newpath moveto
  tr _ctm _pss
 } ddef
 /Tj
 {
  dup
  currentpoint 3 2 roll
  trj _pjsf
  newpath moveto
  trj _ctm _pjss
 } ddef
} def
/a1
{
 /Tx
 {
  dup currentpoint 4 2 roll gsave
  dup currentpoint 3 2 roll
  tr _psf
  newpath moveto
  tr _ctm _pss
  grestore 3 1 roll moveto tr sp
 } ddef
 /Tj
 {
  dup currentpoint 4 2 roll gsave
  dup currentpoint 3 2 roll
  trj _pjsf
  newpath moveto
  trj _ctm _pjss
  grestore 3 1 roll moveto tr jsp
 } ddef
} def
/e0
{
 /Tx
 {
  tr _psf
 } ddef
 /Tj
 {
  trj _pjsf
 } ddef
} def
/e1
{
 /Tx
 {
  dup currentpoint 4 2 roll gsave
  tr _psf
  grestore 3 1 roll moveto tr sp
 } ddef
 /Tj
 {
  dup currentpoint 4 2 roll gsave
  trj _pjsf
  grestore 3 1 roll moveto tr jsp
 } ddef
} def
/i0
{
 /Tx
 {
  tr sp
 } ddef
 /Tj
 {
  trj jsp
 } ddef
} def
/i1
{
 W N
} def
/o0
{
 /Tx
 {
  tr sw rmoveto
 } ddef
 /Tj
 {
  trj swj rmoveto
 } ddef
} def
/r0
{
 /Tx
 {
  tr _ctm _pss
 } ddef
 /Tj
 {
  trj _ctm _pjss
 } ddef
} def
/r1
{
 /Tx
 {
  dup currentpoint 4 2 roll currentpoint gsave newpath moveto
  tr _ctm _pss
  grestore 3 1 roll moveto tr sp
 } ddef
 /Tj
 {
  dup currentpoint 4 2 roll currentpoint gsave newpath moveto
  trj _ctm _pjss
  grestore 3 1 roll moveto tr jsp
 } ddef
} def
/To
{
 pop _ctm currentmatrix pop
} def
/TO
{
 iTe _ctm setmatrix newpath
} def
/Tp
{
 pop _tm astore pop _ctm setmatrix
 _tDict begin
 /W
 {
 } def
 /h
 {
 } def
} def
/TP
{
 end
 iTm 0 0 moveto
} def
/Tr
{
 _render 3 le
 {
  currentpoint newpath moveto
 } if
 dup 8 eq
 {
  pop 0
 }
 {
  dup 9 eq
  {
   pop 1
  } if
 } ifelse
 dup /_render exch ddef
 _renderStart exch get load exec
} def
/iTm
{
 _ctm setmatrix _tm concat 0 _rise translate _hs 1 scale
} def
/Tm
{
 _tm astore pop iTm 0 0 moveto
} def
/Td
{
 _mtx translate _tm _tm concatmatrix pop iTm 0 0 moveto
} def
/iTe
{
 _render -1 eq
 {
 }
 {
  _renderEnd _render get dup null ne
  {
   load exec
  }
  {
   pop
  } ifelse
 } ifelse
 /_render -1 ddef
} def
/Ta
{
 pop
} def
/Tf
{
 dup 1000 div /_fScl exch ddef
%
 selectfont
} def
/Tl
{
 pop
 0 exch _leading astore pop
} def
/Tt
{
 pop
} def
/TW
{
 3 npop
} def
/Tw
{
 /_cx exch ddef
} def
/TC
{
 3 npop
} def
/Tc
{
 /_ax exch ddef
} def
/Ts
{
 /_rise exch ddef
 currentpoint
 iTm
 moveto
} def
/Ti
{
 3 npop
} def
/Tz
{
 100 div /_hs exch ddef
 iTm
} def
/TA
{
 pop
} def
/Tq
{
 pop
} def
/Th
{
 pop pop pop pop pop
} def
/TX
{
 pop
} def
/Tk
{
 exch pop _fScl mul neg 0 rmoveto
} def
/TK
{
 2 npop
} def
/T*
{
 _leading aload pop neg Td
} def
/T*-
{
 _leading aload pop Td
} def
/T-
{
 _ax neg 0 rmoveto
 _hyphen Tx
} def
/T+
{
} def
/TR
{
 _ctm currentmatrix pop
 _tm astore pop
 iTm 0 0 moveto
} def
/TS
{
 currentfont 3 1 roll
 /_Symbol_ _fScl 1000 mul selectfont

 0 eq
 {
  Tx
 }
 {
  Tj
 } ifelse
 setfont
} def
/Xb
{
 pop pop
} def
/Tb /Xb load def
/Xe
{
 pop pop pop pop
} def
/Te /Xe load def
/XB
{
} def
/TB /XB load def
currentdict readonly pop
end
setpacking
%%EndResource
%%BeginProcSet: Adobe_ColorImage_AI6 1.0 0
userdict /Adobe_ColorImage_AI6 known not
{
 userdict /Adobe_ColorImage_AI6 17 dict put
} if
userdict /Adobe_ColorImage_AI6 get begin

 /initialize
 {
  Adobe_ColorImage_AI6 begin
  Adobe_ColorImage_AI6
  {
   dup type /arraytype eq
   {
    dup xcheck
    {
     bind
    } if
   } if
   pop pop
  } forall
 } def
 /terminate { end } def

 currentdict /Adobe_ColorImage_AI6_Vars known not
 {
  /Adobe_ColorImage_AI6_Vars 14 dict def
 } if

 Adobe_ColorImage_AI6_Vars begin
  /channelcount 0 def
  /sourcecount 0 def
  /sourcearray 4 array def
  /plateindex -1 def
  /XIMask 0 def
  /XIBinary 0 def
  /XIChannelCount 0 def
  /XIBitsPerPixel 0 def
  /XIImageHeight 0 def
  /XIImageWidth 0 def
  /XIImageMatrix null def
  /XIBuffer null def
  /XIDataProc null def
 end

 /WalkRGBString null def
 /WalkCMYKString null def

 /StuffRGBIntoGrayString null def
 /RGBToGrayImageProc null def
 /StuffCMYKIntoGrayString null def
 /CMYKToGrayImageProc null def
 /ColorImageCompositeEmulator null def

 /SeparateCMYKImageProc null def

 /FourEqual null def
 /TestPlateIndex null def

 currentdict /_colorimage known not
 {
  /colorimage where
  {
   /colorimage get /_colorimage exch def
  }
  {
   /_colorimage null def
  } ifelse
 } if

 /_currenttransfer systemdict /currenttransfer get def

 /colorimage null def
 /XI null def


 /WalkRGBString
 {
  0 3 index

  dup length 1 sub 0 3 3 -1 roll
  {
   3 getinterval { } forall

   5 index exec

   3 index
  } for

   5 { pop } repeat

 } def


 /WalkCMYKString
 {
  0 3 index

  dup length 1 sub 0 4 3 -1 roll
  {
   4 getinterval { } forall

   6 index exec

   3 index

  } for

  5 { pop } repeat

 } def


 /StuffRGBIntoGrayString
 {
  .11 mul exch

  .59 mul add exch

  .3 mul add

  cvi 3 copy put

  pop 1 add
 } def


 /RGBToGrayImageProc
 {
  Adobe_ColorImage_AI6_Vars begin
   sourcearray 0 get exec
   dup length 3 idiv string
   dup 3 1 roll

   /StuffRGBIntoGrayString load exch
   WalkRGBString
  end
 } def


 /StuffCMYKIntoGrayString
 {
  exch .11 mul add

  exch .59 mul add

  exch .3 mul add

  dup 255 gt { pop 255 } if

  255 exch sub cvi 3 copy put

  pop 1 add
 } def


 /CMYKToGrayImageProc
 {
  Adobe_ColorImage_AI6_Vars begin
   sourcearray 0 get exec
   dup length 4 idiv string
   dup 3 1 roll

   /StuffCMYKIntoGrayString load exch
   WalkCMYKString
  end
 } def


 /ColorImageCompositeEmulator
 {
  pop true eq
  {
   Adobe_ColorImage_AI6_Vars /sourcecount get 5 add { pop } repeat
  }
  {
   Adobe_ColorImage_AI6_Vars /channelcount get 1 ne
   {
    Adobe_ColorImage_AI6_Vars begin
     sourcearray 0 3 -1 roll put

     channelcount 3 eq
     {
      /RGBToGrayImageProc
     }
     {
      /CMYKToGrayImageProc
     } ifelse
     load
    end
   } if
   image
  } ifelse
 } def


 /SeparateCMYKImageProc
 {
  Adobe_ColorImage_AI6_Vars begin

   sourcecount 0 ne
   {
    sourcearray plateindex get exec
   }
   {
    sourcearray 0 get exec

    dup length 4 idiv string

    0 2 index

    plateindex 4 2 index length 1 sub
    {
     get 255 exch sub

     3 copy put pop 1 add

     2 index
    } for

    pop pop exch pop
   } ifelse
  end
 } def


 /FourEqual
 {
  4 index ne
  {
   pop pop pop false
  }
  {
   4 index ne
   {
    pop pop false
   }
   {
    4 index ne
    {
     pop false
    }
    {
     4 index eq
    } ifelse
   } ifelse
  } ifelse
 } def


 /TestPlateIndex
 {
  Adobe_ColorImage_AI6_Vars begin
   /plateindex -1 def

   /setcmykcolor where
   {
    pop
    gsave
    1 0 0 0 setcmykcolor systemdict /currentgray get exec 1 exch sub
    0 1 0 0 setcmykcolor systemdict /currentgray get exec 1 exch sub
    0 0 1 0 setcmykcolor systemdict /currentgray get exec 1 exch sub
    0 0 0 1 setcmykcolor systemdict /currentgray get exec 1 exch sub
    grestore

    1 0 0 0 FourEqual
    {
     /plateindex 0 def
    }
    {
     0 1 0 0 FourEqual
     {
      /plateindex 1 def
     }
     {
      0 0 1 0 FourEqual
      {
       /plateindex 2 def
      }
      {
       0 0 0 1 FourEqual
       {
        /plateindex 3 def
       }
       {
        0 0 0 0 FourEqual
        {
         /plateindex 5 def
        } if
       } ifelse
      } ifelse
     } ifelse
    } ifelse
    pop pop pop pop
   } if
   plateindex
  end
 } def


 /colorimage
 {
  Adobe_ColorImage_AI6_Vars begin
   /channelcount 1 index def
   /sourcecount 2 index 1 eq { channelcount 1 sub } { 0 } ifelse def

   4 sourcecount add index dup
   8 eq exch 1 eq or not
  end

  {
   /_colorimage load null ne
   {
    _colorimage
   }
   {
    Adobe_ColorImage_AI6_Vars /sourcecount get
    7 add { pop } repeat
   } ifelse
  }
  {
   dup 3 eq
   TestPlateIndex
   dup -1 eq exch 5 eq or or
   {
    /_colorimage load null eq
    {
     ColorImageCompositeEmulator
    }
    {
     dup 1 eq
     {
      pop pop image
     }
     {
      Adobe_ColorImage_AI6_Vars /plateindex get 5 eq
      {
       gsave

       0 _currenttransfer exec
       1 _currenttransfer exec
       eq
       { 0 _currenttransfer exec 0.5 lt }
       { 0 _currenttransfer exec 1 _currenttransfer exec gt } ifelse

       { { pop 0 } } { { pop 1 } } ifelse
       systemdict /settransfer get exec
      } if

      _colorimage

      Adobe_ColorImage_AI6_Vars /plateindex get 5 eq
      {
       grestore
      } if
     } ifelse
    } ifelse
   }
   {
    dup 1 eq
    {
     pop pop
     image
    }
    {
     pop pop

     Adobe_ColorImage_AI6_Vars begin
      sourcecount -1 0
      {
       exch sourcearray 3 1 roll put
      } for

      /SeparateCMYKImageProc load
     end

     systemdict /image get exec
    } ifelse
   } ifelse
  } ifelse
 } def

 /XI
 {
  Adobe_ColorImage_AI6_Vars begin
   gsave
   /XIMask exch 0 ne def
   /XIBinary exch 0 ne def
   pop
   pop
   /XIChannelCount exch def
   /XIBitsPerPixel exch def
   /XIImageHeight exch def
   /XIImageWidth exch def
   pop pop pop pop
   /XIImageMatrix exch def

   XIBitsPerPixel 1 eq
   {
    XIImageWidth 8 div ceiling cvi
   }
   {
    XIImageWidth XIChannelCount mul
   } ifelse
   /XIBuffer exch string def

   XIBinary
   {
    /XIDataProc { currentfile XIBuffer readstring pop } def
    currentfile 128 string readline pop pop
   }
   {
    /XIDataProc { currentfile XIBuffer readhexstring pop } def
   } ifelse

   0 0 moveto
   XIImageMatrix concat
   XIImageWidth XIImageHeight scale

   XIMask
   {
    XIImageWidth XIImageHeight
    false
    [ XIImageWidth 0 0 XIImageHeight neg 0 0 ]
    /XIDataProc load

    /_lp /null ddef
    _fc
    /_lp /imagemask ddef

    imagemask
   }
   {
    XIImageWidth XIImageHeight
    XIBitsPerPixel
    [ XIImageWidth 0 0 XIImageHeight neg 0 0 ]
    /XIDataProc load

    XIChannelCount 1 eq
    {

     gsave
     0 setgray

     image

     grestore
    }
    {
     false
     XIChannelCount
     colorimage
    } ifelse
   } ifelse
   grestore
  end
 } def

end
%%EndProcSet
%%BeginResource: procset Adobe_Illustrator_AI5 1.1 0
%%Title: (Adobe Illustrator (R) Version 5.0 Full Prolog)
%%Version: 1.1
%%CreationDate: (3/7/1994) ()
%%Copyright: ((C) 1987-1994 Adobe Systems Incorporated All Rights Reserved)
currentpacking true setpacking
userdict /Adobe_Illustrator_AI5_vars 81 dict dup begin
put
/_eo false def
/_lp /none def
/_pf
{
} def
/_ps
{
} def
/_psf
{
} def
/_pss
{
} def
/_pjsf
{
} def
/_pjss
{
} def
/_pola 0 def
/_doClip 0 def
/cf currentflat def
/_tm matrix def
/_renderStart
[
/e0 /r0 /a0 /o0 /e1 /r1 /a1 /i0
] def
/_renderEnd
[
null null null null /i1 /i1 /i1 /i1
] def
/_render -1 def
/_rise 0 def
/_ax 0 def
/_ay 0 def
/_cx 0 def
/_cy 0 def
/_leading
[
0 0
] def
/_ctm matrix def
/_mtx matrix def
/_sp 16#020 def
/_hyphen (-) def
/_fScl 0 def
/_cnt 0 def
/_hs 1 def
/_nativeEncoding 0 def
/_useNativeEncoding 0 def
/_tempEncode 0 def
/_pntr 0 def
/_tDict 2 dict def
/_wv 0 def
/Tx
{
} def
/Tj
{
} def
/CRender
{
} def
/_AI3_savepage
{
} def
/_gf null def
/_cf 4 array def
/_if null def
/_of false def
/_fc
{
} def
/_gs null def
/_cs 4 array def
/_is null def
/_os false def
/_sc
{
} def
/_pd 1 dict def
/_ed 15 dict def
/_pm matrix def
/_fm null def
/_fd null def
/_fdd null def
/_sm null def
/_sd null def
/_sdd null def
/_i null def
/discardSave null def
/buffer 256 string def
/beginString null def
/endString null def
/endStringLength null def
/layerCnt 1 def
/layerCount 1 def
/perCent (%) 0 get def
/perCentSeen? false def
/newBuff null def
/newBuffButFirst null def
/newBuffLast null def
/clipForward? false def
end
userdict /Adobe_Illustrator_AI5 known not {
 userdict /Adobe_Illustrator_AI5 91 dict put
} if
userdict /Adobe_Illustrator_AI5 get begin
/initialize
{
 Adobe_Illustrator_AI5 dup begin
 Adobe_Illustrator_AI5_vars begin
 discardDict
 {
  bind pop pop
 } forall
 dup /nc get begin
 {
  dup xcheck 1 index type /operatortype ne and
  {
   bind
  } if
  pop pop
 } forall
 end
 newpath
} def
/terminate
{
 end
 end
} def
/_
null def
/ddef
{
 Adobe_Illustrator_AI5_vars 3 1 roll put
} def
/xput
{
 dup load dup length exch maxlength eq
 {
  dup dup load dup
  length 2 mul dict copy def
 } if
 load begin
 def
 end
} def
/npop
{
 {
  pop
 } repeat
} def
/sw
{
 dup length exch stringwidth
 exch 5 -1 roll 3 index mul add
 4 1 roll 3 1 roll mul add
} def
/swj
{
 dup 4 1 roll
 dup length exch stringwidth
 exch 5 -1 roll 3 index mul add
 4 1 roll 3 1 roll mul add
 6 2 roll /_cnt 0 ddef
 {
  1 index eq
  {
   /_cnt _cnt 1 add ddef
  } if
 } forall
 pop
 exch _cnt mul exch _cnt mul 2 index add 4 1 roll 2 index add 4 1 roll pop pop
} def
/ss
{
 4 1 roll
 {
  2 npop
  (0) exch 2 copy 0 exch put pop
  gsave
  false charpath currentpoint
  4 index setmatrix
  stroke
  grestore
  moveto
  2 copy rmoveto
 } exch cshow
 3 npop
} def
/jss
{
 4 1 roll
 {
  2 npop
  (0) exch 2 copy 0 exch put
  gsave
  _sp eq
  {
   exch 6 index 6 index 6 index 5 -1 roll widthshow
   currentpoint
  }
  {
   false charpath currentpoint
   4 index setmatrix stroke
  } ifelse
  grestore
  moveto
  2 copy rmoveto
 } exch cshow
 6 npop
} def
/sp
{
 {
  2 npop (0) exch
  2 copy 0 exch put pop
  false charpath
  2 copy rmoveto
 } exch cshow
 2 npop
} def
/jsp
{
 {
  2 npop
  (0) exch 2 copy 0 exch put
  _sp eq
  {
   exch 5 index 5 index 5 index 5 -1 roll widthshow
  }
  {
   false charpath
  } ifelse
  2 copy rmoveto
 } exch cshow
 5 npop
} def
/pl
{
 transform
 0.25 sub round 0.25 add exch
 0.25 sub round 0.25 add exch
 itransform
} def
/setstrokeadjust where
{
 pop true setstrokeadjust
 /c
 {
  curveto
 } def
 /C
 /c load def
 /v
 {
  currentpoint 6 2 roll curveto
 } def
 /V
 /v load def
 /y
 {
  2 copy curveto
 } def
 /Y
 /y load def
 /l
 {
  lineto
 } def
 /L
 /l load def
 /m
 {
  moveto
 } def
}
{
 /c
 {
  pl curveto
 } def
 /C
 /c load def
 /v
 {
  currentpoint 6 2 roll pl curveto
 } def
 /V
 /v load def
 /y
 {
  pl 2 copy curveto
 } def
 /Y
 /y load def
 /l
 {
  pl lineto
 } def
 /L
 /l load def
 /m
 {
  pl moveto
 } def
} ifelse
/d
{
 setdash
} def
/cf
{
} def
/i
{
 dup 0 eq
 {
  pop cf
 } if
 setflat
} def
/j
{
 setlinejoin
} def
/J
{
 setlinecap
} def
/M
{
 setmiterlimit
} def
/w
{
 setlinewidth
} def
/XR
{
 0 ne
 /_eo exch ddef
} def
/H
{
} def
/h
{
 closepath
} def
/N
{
 _pola 0 eq
 {
  _doClip 1 eq
  {
   _eo {eoclip} {clip} ifelse /_doClip 0 ddef
  } if
  newpath
 }
 {
  /CRender
  {
   N
  } ddef
 } ifelse
} def
/n
{
 N
} def
/F
{
 _pola 0 eq
 {
  _doClip 1 eq
  {
   gsave _pf grestore _eo {eoclip} {clip} ifelse newpath /_lp /none ddef _fc
   /_doClip 0 ddef
  }
  {
   _pf
  } ifelse
 }
 {
  /CRender
  {
   F
  } ddef
 } ifelse
} def
/f
{
 closepath
 F
} def
/S
{
 _pola 0 eq
 {
  _doClip 1 eq
  {
   gsave _ps grestore _eo {eoclip} {clip} ifelse newpath /_lp /none ddef _sc
   /_doClip 0 ddef
  }
  {
   _ps
  } ifelse
 }
 {
  /CRender
  {
   S
  } ddef
 } ifelse
} def
/s
{
 closepath
 S
} def
/B
{
 _pola 0 eq
 {
  _doClip 1 eq
  gsave F grestore
  {
   gsave S grestore _eo {eoclip} {clip} ifelse newpath /_lp /none ddef _sc
   /_doClip 0 ddef
  }
  {
   S
  } ifelse
 }
 {
  /CRender
  {
   B
  } ddef
 } ifelse
} def
/b
{
 closepath
 B
} def
/W
{
 /_doClip 1 ddef
} def
/*
{
 count 0 ne
 {
  dup type /stringtype eq
  {
   pop
  } if
 } if
 newpath
} def
/u
{
} def
/U
{
} def
/q
{
 _pola 0 eq
 {
  gsave
 } if
} def
/Q
{
 _pola 0 eq
 {
  grestore
 } if
} def
/*u
{
 _pola 1 add /_pola exch ddef
} def
/*U
{
 _pola 1 sub /_pola exch ddef
 _pola 0 eq
 {
  CRender
 } if
} def
/D
{
 pop
} def
/*w
{
} def
/*W
{
} def
/`
{
 /_i save ddef
 clipForward?
 {
  nulldevice
 } if
 6 1 roll 4 npop
 concat pop
 userdict begin
 /showpage
 {
 } def
 0 setgray
 0 setlinecap
 1 setlinewidth
 0 setlinejoin
 10 setmiterlimit
 [] 0 setdash
 /setstrokeadjust where {pop false setstrokeadjust} if
 newpath
 0 setgray
 false setoverprint
} def
/~
{
 end
 _i restore
} def
/O
{
 0 ne
 /_of exch ddef
 /_lp /none ddef
} def
/R
{
 0 ne
 /_os exch ddef
 /_lp /none ddef
} def
/g
{
 /_gf exch ddef
 /_fc
 {
  _lp /fill ne
  {
   _of setoverprint
   _gf setgray
   /_lp /fill ddef
  } if
 } ddef
 /_pf
 {
  _fc
  _eo {eofill} {fill} ifelse
 } ddef
 /_psf
 {
  _fc
  ashow
 } ddef
 /_pjsf
 {
  _fc
  awidthshow
 } ddef
 /_lp /none ddef
} def
/G
{
 /_gs exch ddef
 /_sc
 {
  _lp /stroke ne
  {
   _os setoverprint
   _gs setgray
   /_lp /stroke ddef
  } if
 } ddef
 /_ps
 {
  _sc
  stroke
 } ddef
 /_pss
 {
  _sc
  ss
 } ddef
 /_pjss
 {
  _sc
  jss
 } ddef
 /_lp /none ddef
} def
/k
{
 _cf astore pop
 /_fc
 {
  _lp /fill ne
  {
   _of setoverprint
   _cf aload pop setcmykcolor
   /_lp /fill ddef
  } if
 } ddef
 /_pf
 {
  _fc
  _eo {eofill} {fill} ifelse
 } ddef
 /_psf
 {
  _fc
  ashow
 } ddef
 /_pjsf
 {
  _fc
  awidthshow
 } ddef
 /_lp /none ddef
} def
/K
{
 _cs astore pop
 /_sc
 {
  _lp /stroke ne
  {
   _os setoverprint
   _cs aload pop setcmykcolor
   /_lp /stroke ddef
  } if
 } ddef
 /_ps
 {
  _sc
  stroke
 } ddef
 /_pss
 {
  _sc
  ss
 } ddef
 /_pjss
 {
  _sc
  jss
 } ddef
 /_lp /none ddef
} def
/x
{
 /_gf exch ddef
 findcmykcustomcolor
 /_if exch ddef
 /_fc
 {
  _lp /fill ne
  {
   _of setoverprint
   _if _gf 1 exch sub setcustomcolor
   /_lp /fill ddef
  } if
 } ddef
 /_pf
 {
  _fc
  _eo {eofill} {fill} ifelse
 } ddef
 /_psf
 {
  _fc
  ashow
 } ddef
 /_pjsf
 {
  _fc
  awidthshow
 } ddef
 /_lp /none ddef
} def
/X
{
 /_gs exch ddef
 findcmykcustomcolor
 /_is exch ddef
 /_sc
 {
  _lp /stroke ne
  {
   _os setoverprint
   _is _gs 1 exch sub setcustomcolor
   /_lp /stroke ddef
  } if
 } ddef
 /_ps
 {
  _sc
  stroke
 } ddef
 /_pss
 {
  _sc
  ss
 } ddef
 /_pjss
 {
  _sc
  jss
 } ddef
 /_lp /none ddef
} def
/A
{
 pop
} def
/annotatepage
{
userdict /annotatepage 2 copy known {get exec} {pop pop} ifelse
} def
/XT {
 pop pop
} def
/discard
{
 save /discardSave exch store
 discardDict begin
 /endString exch store
 gt38?
 {
  2 add
 } if
 load
 stopped
 pop
 end
 discardSave restore
} bind def
userdict /discardDict 7 dict dup begin
put
/pre38Initialize
{
 /endStringLength endString length store
 /newBuff buffer 0 endStringLength getinterval store
 /newBuffButFirst newBuff 1 endStringLength 1 sub getinterval store
 /newBuffLast newBuff endStringLength 1 sub 1 getinterval store
} def
/shiftBuffer
{
 newBuff 0 newBuffButFirst putinterval
 newBuffLast 0
 currentfile read not
 {
 stop
 } if
 put
} def
0
{
 pre38Initialize
 mark
 currentfile newBuff readstring exch pop
 {
  {
   newBuff endString eq
   {
    cleartomark stop
   } if
   shiftBuffer
  } loop
 }
 {
 stop
 } ifelse
} def
1
{
 pre38Initialize
 /beginString exch store
 mark
 currentfile newBuff readstring exch pop
 {
  {
   newBuff beginString eq
   {
    /layerCount dup load 1 add store
   }
   {
    newBuff endString eq
    {
     /layerCount dup load 1 sub store
     layerCount 0 eq
     {
      cleartomark stop
     } if
    } if
   } ifelse
   shiftBuffer
  } loop
 } if
} def
2
{
 mark
 {
  currentfile buffer readline not
  {
  stop
  } if
  endString eq
  {
   cleartomark stop
  } if
 } loop
} def
3
{
 /beginString exch store
 /layerCnt 1 store
 mark
 {
  currentfile buffer readline not
  {
  stop
  } if
  dup beginString eq
  {
   pop /layerCnt dup load 1 add store
  }
  {
   endString eq
   {
    layerCnt 1 eq
    {
     cleartomark stop
    }
    {
     /layerCnt dup load 1 sub store
    } ifelse
   } if
  } ifelse
 } loop
} def
end
userdict /clipRenderOff 15 dict dup begin
put
{
 /n /N /s /S /f /F /b /B
}
{
 {
  _doClip 1 eq
  {
   /_doClip 0 ddef _eo {eoclip} {clip} ifelse
  } if
  newpath
 } def
} forall
/Tr /pop load def
/Bb {} def
/BB /pop load def
/Bg {12 npop} def
/Bm {6 npop} def
/Bc /Bm load def
/Bh {4 npop} def
end
/Lb
{
 4 npop
 6 1 roll
 pop
 4 1 roll
 pop pop pop
 0 eq
 {
  0 eq
  {
   (%AI5_BeginLayer) 1 (%AI5_EndLayer--) discard
  }
  {

   /clipForward? true def

   /Tx /pop load def
   /Tj /pop load def

   currentdict end clipRenderOff begin begin
  } ifelse
 }
 {
  0 eq
  {
   save /discardSave exch store
  } if
 } ifelse
} bind def
/LB
{
 discardSave dup null ne
 {
  restore
 }
 {
  pop
  clipForward?
  {
   currentdict
   end
   end
   begin

   /clipForward? false ddef
  } if
 } ifelse
} bind def
/Pb
{
 pop pop
 0 (%AI5_EndPalette) discard
} bind def
/Np
{
 0 (%AI5_End_NonPrinting--) discard
} bind def
/Ln /pop load def
/Ap
/pop load def
/Ar
{
 72 exch div
 0 dtransform dup mul exch dup mul add sqrt
 dup 1 lt
 {
  pop 1
 } if
 setflat
} def
/Mb
{
 q
} def
/Md
{
} def
/MB
{
 Q
} def
/nc 3 dict def
nc begin
/setgray
{
 pop
} bind def
/setcmykcolor
{
 4 npop
} bind def
/setcustomcolor
{
 2 npop
} bind def
currentdict readonly pop
end
end
setpacking
%%EndResource
%%EndProlog
%%BeginSetup
%%IncludeFont: Helvetica
%%IncludeFont: Symbol
%%IncludeFont: Times-Italic
Adobe_level2_AI5 /initialize get exec
Adobe_Illustrator_AI5_vars Adobe_Illustrator_AI5 Adobe_typography_AI5 /initialize get exec
Adobe_ColorImage_AI6 /initialize get exec
Adobe_Illustrator_AI5 /initialize get exec
[
39/quotesingle 96/grave 128/Adieresis/Aring/Ccedilla/Eacute/Ntilde/Odieresis
/Udieresis/aacute/agrave/acircumflex/adieresis/atilde/aring/ccedilla/eacute
/egrave/ecircumflex/edieresis/iacute/igrave/icircumflex/idieresis/ntilde
/oacute/ograve/ocircumflex/odieresis/otilde/uacute/ugrave/ucircumflex
/udieresis/dagger/degree/cent/sterling/section/bullet/paragraph/germandbls
/registered/copyright/trademark/acute/dieresis/.notdef/AE/Oslash
/.notdef/plusminus/.notdef/.notdef/yen/mu/.notdef/.notdef
/.notdef/.notdef/.notdef/ordfeminine/ordmasculine/.notdef/ae/oslash
/questiondown/exclamdown/logicalnot/.notdef/florin/.notdef/.notdef
/guillemotleft/guillemotright/ellipsis/.notdef/Agrave/Atilde/Otilde/OE/oe
/endash/emdash/quotedblleft/quotedblright/quoteleft/quoteright/divide
/.notdef/ydieresis/Ydieresis/fraction/currency/guilsinglleft/guilsinglright
/fi/fl/daggerdbl/periodcentered/quotesinglbase/quotedblbase/perthousand
/Acircumflex/Ecircumflex/Aacute/Edieresis/Egrave/Iacute/Icircumflex
/Idieresis/Igrave/Oacute/Ocircumflex/.notdef/Ograve/Uacute/Ucircumflex
/Ugrave/dotlessi/circumflex/tilde/macron/breve/dotaccent/ring/cedilla
/hungarumlaut/ogonek/caron
TE
%AI3_BeginEncoding: _Helvetica Helvetica
[/_Helvetica/Helvetica 0 0 1 TZ
%AI3_EndEncoding AdobeType
%AI3_BeginEncoding: _Symbol Symbol
[/_Symbol/Symbol 0 0 0 TZ
%AI3_EndEncoding TrueType
%AI3_BeginEncoding: _Times-Italic Times-Italic
[/_Times-Italic/Times-Italic 0 0 1 TZ
%AI3_EndEncoding TrueType
%AI5_Begin_NonPrinting
Np
8 Bn
%AI5_BeginGradient: (Black & White)
(Black & White) 0 2 Bd
[
<
FFFEFDFCFBFAF9F8F7F6F5F4F3F2F1F0EFEEEDECEBEAE9E8E7E6E5E4E3E2E1E0DFDEDDDCDBDAD9D8
D7D6D5D4D3D2D1D0CFCECDCCCBCAC9C8C7C6C5C4C3C2C1C0BFBEBDBCBBBAB9B8B7B6B5B4B3B2B1B0
AFAEADACABAAA9A8A7A6A5A4A3A2A1A09F9E9D9C9B9A999897969594939291908F8E8D8C8B8A8988
87868584838281807F7E7D7C7B7A797877767574737271706F6E6D6C6B6A69686766656463626160
5F5E5D5C5B5A595857565554535251504F4E4D4C4B4A494847464544434241403F3E3D3C3B3A3938
37363534333231302F2E2D2C2B2A292827262524232221201F1E1D1C1B1A19181716151413121110
0F0E0D0C0B0A09080706050403020100
>
0 %_Br
[
0 0 50 100 %_Bs
1 0 50 0 %_Bs
BD
%AI5_EndGradient
%AI5_BeginGradient: (Green & Blue)
(Green & Blue) 0 2 Bd
[
<
99999A9A9B9B9B9C9C9D9D9D9E9E9F9F9FA0A0A1A1A1A2A2A3A3A3A4A4A5A5A5A6A6A7A7A7A8A8A9
A9A9AAAAABABABACACADADADAEAEAFAFAFB0B0B1B1B1B2B2B3B3B3B4B4B5B5B5B6B6B7B7B7B8B8B9
B9B9BABABBBBBBBCBCBDBDBDBEBEBFBFBFC0C0C1C1C1C2C2C3C3C3C4C4C5C5C5C6C6C7C7C7C8C8C9
C9C9CACACBCBCBCCCCCDCDCDCECECFCFCFD0D0D1D1D1D2D2D3D3D3D4D4D5D5D5D6D6D7D7D7D8D8D9
D9D9DADADBDBDBDCDCDDDDDDDEDEDFDFDFE0E0E1E1E1E2E2E3E3E3E4E4E5E5E5E6E6E7E7E7E8E8E9
E9E9EAEAEBEBEBECECEDEDEDEEEEEFEFEFF0F0F1F1F1F2F2F3F3F3F4F4F5F5F5F6F6F7F7F7F8F8F9
F9F9FAFAFBFBFBFCFCFDFDFDFEFEFFFF
>
<
000102020304050506070808090A0B0B0C0D0E0E0F101111121314141516171718191A1A1B1C1D1D
1E1F20202122232324252626272829292A2B2C2C2D2E2F2F303132323334353536373838393A3B3B
3C3D3E3E3F404141424344444546474748494A4A4B4C4D4D4E4F5050515253535455565657585959
5A5B5C5C5D5E5F5F606162626364656566676868696A6B6B6C6D6E6E6F7071717273747475767777
78797A7A7B7C7D7D7E7F80808182828384858586878888898A8B8B8C8D8E8E8F9091919293949495
96979798999A9A9B9C9D9D9E9FA0A0A1A2A3A3A4A5A6A6A7A8A9A9AAABACACADAEAFAFB0B1B2B2B3
B4B5B5B6B7B8B8B9BABBBBBCBDBEBEBF
>
<
FFFEFDFCFBFAF9F8F7F6F5F4F3F2F1F0EFEEEDECEBEAE9E8E7E6E5E4E3E2E1E0DFDEDDDCDBDAD9D8
D7D6D5D4D3D2D1D0CFCECDCCCBCAC9C8C7C6C5C4C3C2C1C0BFBEBDBCBBBAB9B8B7B6B5B4B3B2B1B0
AFAEADACABAAA9A8A7A6A5A4A3A2A1A09F9E9D9C9B9A999897969594939291908F8E8D8C8B8A8988
87868584838281807F7E7D7C7B7A797877767574737271706F6E6D6C6B6A69686766656463626160
5F5E5D5C5B5A595857565554535251504F4E4D4C4B4A494847464544434241403F3E3D3C3B3A3938
37363534333231302F2E2D2C2B2A292827262524232221201F1E1D1C1B1A19181716151413121110
0F0E0D0C0B0A09080706050403020100
>
0
1 %_Br
[
1 0.75 0 0 1 50 100 %_Bs
0.6 0 1 0 1 50 0 %_Bs
BD
%AI5_EndGradient
%AI5_BeginGradient: (Pink, Yellow, Green)
(Pink, Yellow, Green) 0 3 Bd
[
<
00000000000000000000000000000000000000010101010101010101010101010101010101010101
01010101010202020202020202020202020202020202020202020203030303030303030303030303
03030303030303030404040404040404040404040404040404040404050505050505050505050505
05050505050505060606060606060606060606060606060606060707070707070707070707070707
07070707080808080808080808080808080808080809090909090909090909090909090909090A0A
0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0C0C0C0C0C0C0C0C0C
0C0C0C0C0C0C0C0D0D0D0D0D
>
<
050506060606070708080809090A0A0A0B0B0C0C0D0D0E0E0F0F1010111112121313141415151617
17181819191A1A1B1C1C1D1D1E1F1F202021222223232425252626272828292A2A2B2C2C2D2D2E2F
2F3031313233333435353637373839393A3B3B3C3D3E3E3F4040414242434445454647474849494A
4B4C4C4D4E4F4F505151525354545556575758595A5A5B5C5C5D5E5F5F6061626363646566666768
69696A6B6C6C6D6E6F707071727373747576777778797A7B7B7C7D7E7F7F80818283838485868787
88898A8B8B8C8D8E8F8F9091929394949596979898999A9B9C9D9D9E9FA0A1A2A2A3A4A5A6A7A7A8
A9AAABACADADAEAFB0B1B2B2
>
<
CCCCCBCBCBCACACAC9C9C8C8C7C7C6C6C5C5C4C4C3C2C2C1C1C0C0BFBEBEBDBDBCBBBBBAB9B9B8B7
B7B6B6B5B4B4B3B2B1B1B0AFAFAEADADACABAAAAA9A8A8A7A6A5A5A4A3A2A2A1A0A09F9E9D9C9C9B
9A999998979696959493929291908F8E8E8D8C8B8A8A8988878686858483828181807F7E7D7C7C7B
7A7978777776757473727171706F6E6D6C6B6A6A69686766656463636261605F5E5D5C5B5B5A5958
5756555453525151504F4E4D4C4B4A49484746464544434241403F3E3D3C3B3A3938383736353433
3231302F2E2D2C2B2A29282726252423222221201F1E1D1C1B1A191817161514131211100F0E0D0C
0B0A09080706050403020100
>
0
1 %_Br
<
737271706F6E6D6C6B6A696867666564636261605F5E5D5C5B5B5A59585756555453525150504F4E
4D4C4B4A4949484746454443434241403F3E3E3D3C3B3A3A393837363635343333323130302F2E2D
2D2C2B2A2A29282827262525242323222121201F1F1E1D1D1C1C1B1A1A1918181717161615141413
1312121111100F0F0E0E0D0D0C0C0C0B0B0A0A090908080807070606060505050404040303030202
020201010101010000000000
>
<
00000000000000000000000001010101010101010101010101010101010101010101010102020202
02020202020202020202020202020202020202020202030303030303030303030303030303030303
03030303030303030303030303040404040404040404040404040404040404040404040404040404
04040404040404040404050505050505050505050505050505050505050505050505050505050505
050505050505050505050505
>
<
BFBFBFC0C0C0C0C0C0C0C0C0C1C1C1C1C1C1C1C1C1C2C2C2C2C2C2C2C2C2C2C3C3C3C3C3C3C3C3C3
C3C4C4C4C4C4C4C4C4C4C4C5C5C5C5C5C5C5C5C5C5C5C6C6C6C6C6C6C6C6C6C6C6C6C7C7C7C7C7C7
C7C7C7C7C7C7C8C8C8C8C8C8C8C8C8C8C8C8C8C9C9C9C9C9C9C9C9C9C9C9C9C9C9C9CACACACACACA
CACACACACACACACACACACBCBCBCBCBCBCBCBCBCBCBCBCBCBCBCBCBCBCBCBCBCBCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCC
>
0
1 %_Br
[
0.05 0.7 0 0 1 50 100 %_Bs
0 0.02 0.8 0 1 57 36 %_Bs
0.45 0 0.75 0 1 37 0 %_Bs
BD
%AI5_EndGradient
%AI5_BeginGradient: (Purple, Red & Yellow)
(Purple, Red & Yellow) 0 3 Bd
[
0
<
FFFEFDFCFBFAF9F8F7F6F5F4F3F2F1F0EFEEEDECEBEAE9E8E7E6E5E4E3E2E1E0DFDEDDDCDBDAD9D8
D7D6D5D4D3D2D1D0CFCECDCCCBCAC9C8C7C6C5C4C3C2C1C0BFBEBDBCBBBAB9B8B7B6B5B4B3B2B1B0
AFAEADACABAAA9A8A7A6A5A4A3A2A1A09F9E9D9C9B9A999897969594939291908F8E8D8C8B8A8988
87868584838281807F7E7D7C7B7A797877767574737271706F6E6D6C6B6A69686766656463626160
5F5E5D5C5B5A595857565554535251504F4E4D4C4B4A494847464544434241403F3E3D3C3B3A3938
37363534333231302F2E2D2C2B2A292827262524232221201F1E1D1C1B1A19181716151413121110
0F0E0D0C0B0A
>
<
CCCCCCCDCDCDCDCDCECECECECECFCFCFCFD0D0D0D0D0D1D1D1D1D1D2D2D2D2D2D3D3D3D3D3D4D4D4
D4D5D5D5D5D5D6D6D6D6D6D7D7D7D7D7D8D8D8D8D8D9D9D9D9DADADADADADBDBDBDBDBDCDCDCDCDC
DDDDDDDDDDDEDEDEDEDFDFDFDFDFE0E0E0E0E0E1E1E1E1E1E2E2E2E2E2E3E3E3E3E4E4E4E4E4E5E5
E5E5E5E6E6E6E6E6E7E7E7E7E7E8E8E8E8E9E9E9E9E9EAEAEAEAEAEBEBEBEBEBECECECECECEDEDED
EDEEEEEEEEEEEFEFEFEFEFF0F0F0F0F0F1F1F1F1F1F2F2F2F2F3F3F3F3F3F4F4F4F4F4F5F5F5F5F5
F6F6F6F6F6F7F7F7F7F8F8F8F8F8F9F9F9F9F9FAFAFAFAFAFBFBFBFBFBFCFCFCFCFDFDFDFDFDFEFE
FEFEFEFFFFFF
>
0
1 %_Br
<
E5E4E3E2E1E0DFDEDDDCDBDAD9D8D7D6D5D4D3D2D1D0CFCECDCCCBCAC9C8C7C6C5C4C3C2C1C0BFBE
BDBCBBBAB9B8B7B6B5B4B3B2B1B0AFAEADACABAAA9A8A7A6A5A4A3A2A1A09F9E9D9C9B9A99989796
9594939291908F8E8D8C8B8A898887868584838281807F7E7D7C7B7A797877767574737271706F6E
6D6C6B6A696867666564636261605F5E5D5C5B5A595857565554535251504F4E4D4C4B4A49484746
4544434241403F3E3D3C3B3A393837363534333231302F2E2D2C2B2A292827262524232221201F1E
1D1C1B1A191817161514131211100F0E0D0C0B0A09080706050403020100
>
<
E5E6E6E6E6E6E6E6E6E7E7E7E7E7E7E7E7E7E8E8E8E8E8E8E8E8E8E9E9E9E9E9E9E9E9E9EAEAEAEA
EAEAEAEAEAEBEBEBEBEBEBEBEBEBECECECECECECECECECEDEDEDEDEDEDEDEDEDEEEEEEEEEEEEEEEE
EEEFEFEFEFEFEFEFEFEFF0F0F0F0F0F0F0F0F0F1F1F1F1F1F1F1F1F1F2F2F2F2F2F2F2F2F2F3F3F3
F3F3F3F3F3F3F4F4F4F4F4F4F4F4F4F5F5F5F5F5F5F5F5F5F6F6F6F6F6F6F6F6F6F7F7F7F7F7F7F7
F7F7F8F8F8F8F8F8F8F8F8F9F9F9F9F9F9F9F9F9FAFAFAFAFAFAFAFAFAFBFBFBFBFBFBFBFBFBFCFC
FCFCFCFCFCFCFCFDFDFDFDFDFDFDFDFDFEFEFEFEFEFEFEFEFEFFFFFFFFFF
>
<
00010203040405060708090A0B0C0C0D0E0F10111213141415161718191A1B1C1D1D1E1F20212223
242525262728292A2B2C2D2D2E2F30313233343535363738393A3B3C3D3D3E3F4041424344454546
4748494A4B4C4D4E4E4F50515253545556565758595A5B5C5D5E5E5F60616263646566666768696A
6B6C6D6E6E6F70717273747576767778797A7B7C7D7E7F7F80818283848586878788898A8B8C8D8E
8F8F90919293949596979798999A9B9C9D9E9F9FA0A1A2A3A4A5A6A7A7A8A9AAABACADAEAFAFB0B1
B2B3B4B5B6B7B8B8B9BABBBCBDBEBFC0C0C1C2C3C4C5C6C7C8C8C9CACBCC
>
0
1 %_Br
[
0 0.04 1 0 1 50 100 %_Bs
0 1 0.8 0 1 50 50 %_Bs
0.9 0.9 0 0 1 50 0 %_Bs
BD
%AI5_EndGradient
%AI5_BeginGradient: (Rainbow)
(Rainbow) 0 6 Bd
[
<
FFFEFDFCFBFAF9F8F7F6F5F4F3F2F1F0EFEEEDECEBEAE9E8E7E6E5E4E3E2E1E0DFDEDDDCDBDAD9D8
D7D6D5D4D3D2D1D0CFCECDCCCBCAC9C8C7C6C5C4C3C2C1C0BFBEBDBCBBBAB9B8B7B6B5B4B3B2B1B0
AFAEADACABAAA9A8A7A6A5A4A3A2A1A09F9E9D9C9B9A999897969594939291908F8E8D8C8B8A8988
87868584838281807F7E7D7C7B7A797877767574737271706F6E6D6C6B6A69686766656463626160
5F5E5D5C5B5A595857565554535251504F4E4D4C4B4A494847464544434241403F3E3D3C3B3A3938
37363534333231302F2E2D2C2B2A292827262524232221201F1E1D1C1B1A19181716151413121110
0F0E0D0C0B0A09080706050403020100
>
1
0
0
1 %_Br
1
<
0708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F202122232425262728292A2B2C2D2E
2F303132333435363738393A3B3C3D3E3F404142434445464748494A4B4C4D4E4F50515253545556
5758595A5B5C5D5E5F606162636465666768696A6B6C6D6E6F707172737475767778797A7B7C7D7E
7F808182838485868788898A8B8C8D8E8F909192939495969798999A9B9C9D9E9FA0A1A2A3A4A5A6
A7A8A9AAABACADAEAFB0B1B2B3B4B5B6B7B8B9BABBBCBDBEBFC0C1C2C3C4C5C6C7C8C9CACBCCCDCE
CFD0D1D2D3D4D5D6D7D8D9DADBDCDDDEDFE0E1E2E3E4E5E6E7E8E9EAEBECEDEEEFF0F1F2F3F4F5F6
F7F8F9FAFBFCFDFEFF
>
0
0
1 %_Br
1
<
00000000000000000000000000000000000001010101010101010101010101010101010101010101
01010101010101010101010101010202020202020202020202020202020202020202020202020202
02020202020202020202030303030303030303030303030303030303030303030303030303030303
03030303030304040404040404040404040404040404040404040404040404040404040404040404
04040505050505050505050505050505050505050505050505050505050505050505050505050606
06060606060606060606060606060606060606060606060606060606060606060606070707070707
07070707070707070707070707070707
>
<
FFFEFDFCFBFAF9F8F7F6F5F4F3F2F1F0EFEEEDECEBEAE9E8E7E6E5E4E3E2E1E0DFDEDDDCDBDAD9D8
D7D6D5D4D3D2D1D0CFCECDCCCBCAC9C8C7C6C5C4C3C2C1C0BFBEBDBCBBBAB9B8B7B6B5B4B3B2B1B0
AFAEADACABAAA9A8A7A6A5A4A3A2A1A09F9E9D9C9B9A999897969594939291908F8E8D8C8B8A8988
87868584838281807F7E7D7C7B7A797877767574737271706F6E6D6C6B6A69686766656463626160
5F5E5D5C5B5A595857565554535251504F4E4D4C4B4A494847464544434241403F3E3D3C3B3A3938
37363534333231302F2E2D2C2B2A292827262524232221201F1E1D1C1B1A19181716151413121110
0F0E0D0C0B0A09080706050403020100
>
0
1 %_Br
<
000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F2021222324252627
28292A2B2C2D2E2F303132333435363738393A3B3C3D3E3F404142434445464748494A4B4C4D4E4F
505152535455565758595A5B5C5D5E5F606162636465666768696A6B6C6D6E6F7071727374757677
78797A7B7C7D7E7F808182838485868788898A8B8C8D8E8F909192939495969798999A9B9C9D9E9F
A0A1A2A3A4A5A6A7A8A9AAABACADAEAFB0B1B2B3B4B5B6B7B8B9BABBBCBDBEBFC0C1C2C3C4C5C6C7
C8C9CACBCCCDCECFD0D1D2D3D4D5D6D7D8D9DADBDCDDDEDFE0E1E2E3E4E5E6E7E8E9EAEBECEDEEEF
F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF
>
0
1
0
1 %_Br
0
<
FFFEFDFCFBFAF9F8F7F6F5F4F3F2F1F0EFEEEDECEBEAE9E8E7E6E5E4E3E2E1E0DFDEDDDCDBDAD9D8
D7D6D5D4D3D2D1D0CFCECDCCCBCAC9C8C7C6C5C4C3C2C1C0BFBEBDBCBBBAB9B8B7B6B5B4B3B2B1B0
AFAEADACABAAA9A8A7A6A5A4A3A2A1A09F9E9D9C9B9A999897969594939291908F8E8D8C8B8A8988
87868584838281807F7E7D7C7B7A797877767574737271706F6E6D6C6B6A69686766656463626160
5F5E5D5C5B5A595857565554535251504F4E4D4C4B4A494847464544434241403F3E3D3C3B3A3938
37363534333231302F2E2D2C2B2A292827262524232221201F1E1D1C1B1A19181716151413121110
0F0E0D0C0B0A09080706050403020100
>
1
0
1 %_Br
[
0 1 0 0 1 50 100 %_Bs
1 1 0 0 1 50 80 %_Bs
1 0.0279 0 0 1 50 60 %_Bs
1 0 1 0 1 50 40 %_Bs
0 0 1 0 1 50 20 %_Bs
0 1 1 0 1 50 0 %_Bs
BD
%AI5_EndGradient
%AI5_BeginGradient: (Steel Bar)
(Steel Bar) 0 3 Bd
[
<
FFFEFDFCFBFAF9F8F7F6F5F4F3F2F1F0EFEEEDECEBEAE9E8E7E6E5E4E3E2E1E0DFDEDDDCDBDAD9D8
D7D6D5D4D3D2D1D0CFCECDCCCBCAC9C8C7C6C5C4C3C2C1C0BFBEBDBCBBBAB9B8B7B6B5B4B3B2B1B0
AFAEADACABAAA9A8A7A6A5A4A3A2A1A09F9E9D9C9B9A999897969594939291908F8E8D8C8B8A8988
87868584838281807F7E7D7C7B7A797877767574737271706F6E6D6C6B6A69686766656463626160
5F5E5D5C5B5A595857565554535251504F4E4D4C4B4A494847464544434241403F3E3D3C3B3A3938
37363534333231302F2E2D2C2B2A292827262524232221201F1E1D1C1B1A19181716151413121110
0F0E0D0C0B0A09080706050403020100
>
0 %_Br
<
000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F2021222324252627
28292A2B2C2D2E2F303132333435363738393A3B3C3D3E3F404142434445464748494A4B4C4D4E4F
505152535455565758595A5B5C5D5E5F606162636465666768696A6B6C6D6E6F7071727374757677
78797A7B7C7D7E7F808182838485868788898A8B8C8D8E8F909192939495969798999A9B9C9D9E9F
A0A1A2A3A4A5A6A7A8A9AAABACADAEAFB0B1B2B3B4B5B6B7B8B9BABBBCBDBEBFC0C1C2C3C4C5C6C7
C8C9CACBCCCDCECFD0D1D2D3D4D5D6D7D8D9DADBDCDDDEDFE0E1E2E3E4E5E6E7E8E9EAEBECEDEEEF
F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF
>
0 %_Br
[
0 0 50 100 %_Bs
1 0 50 70 %_Bs
0 0 50 0 %_Bs
BD
%AI5_EndGradient
%AI5_BeginGradient: (Yellow & Orange Radial)
(Yellow & Orange Radial) 1 2 Bd
[
0
<
0001010203040506060708090A0B0C0C0D0E0F10111213131415161718191A1B1C1D1D1E1F202122
232425262728292A2B2B2C2D2E2F303132333435363738393A3B3C3D3E3E3F404142434445464748
494A4B4C4D4E4F505152535455565758595A5B5C5D5E5F60606162636465666768696A6B6C6D6E6F
707172737475767778797A7B7C7D7E7F808182838485868788898A8B8C
>
<
FFFFFFFFFEFEFEFEFEFEFEFDFDFDFDFDFDFCFCFCFCFCFCFBFBFBFBFBFBFAFAFAFAFAFAF9F9F9F9F9
F9F8F8F8F8F8F8F7F7F7F7F7F7F6F6F6F6F6F6F5F5F5F5F5F5F4F4F4F4F4F3F3F3F3F3F3F2F2F2F2
F2F2F1F1F1F1F1F0F0F0F0F0F0EFEFEFEFEFEFEEEEEEEEEEEDEDEDEDEDEDECECECECECEBEBEBEBEB
EBEAEAEAEAEAE9E9E9E9E9E9E8E8E8E8E8E8E7E7E7E7E7E6E6E6E6E6E5
>
0
1 %_Br
[
0 0 1 0 1 52 19 %_Bs
0 0.55 0.9 0 1 50 100 %_Bs
BD
%AI5_EndGradient
%AI5_BeginGradient: (Yellow & Purple Radial)
(Yellow & Purple Radial) 1 2 Bd
[
<
000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F2021222324252627
28292A2B2C2D2E2F303132333435363738393A3B3C3D3E3F404142434445464748494A4B4C4D4E4F
505152535455565758595A5B5C5D5E5F606162636465666768696A6B6C6D6E6F7071727374757677
78797A7B7C7D7E7F808182838485868788898A8B8C8D8E8F909192939495969798999A9B9C9D9E9F
A0A1A2A3A4A5A6A7A8A9AAABACADAEAFB0B1B2B3B4B5B6B7B8B9BABBBCBDBEBFC0C1C2C3C4C5C6C7
C8C9CACBCCCDCECFD0D1D2D3D4D5D6D7D8D9DADBDCDDDEDFE0E1E2E3E4E5E6E7E8E9EAEBECEDEEEF
F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF
>
<
1415161718191A1B1C1D1E1F1F202122232425262728292A2A2B2C2D2E2F30313233343536363738
393A3B3C3D3E3F40414142434445464748494A4B4C4D4D4E4F50515253545556575858595A5B5C5D
5E5F60616263646465666768696A6B6C6D6E6F6F707172737475767778797A7B7B7C7D7E7F808182
83848586868788898A8B8C8D8E8F90919292939495969798999A9B9C9D9D9E9FA0A1A2A3A4A5A6A7
A8A9A9AAABACADAEAFB0B1B2B3B4B4B5B6B7B8B9BABBBCBDBEBFC0C0C1C2C3C4C5C6C7C8C9CACBCB
CCCDCECFD0D1D2D3D4D5D6D7D7D8D9DADBDCDDDEDFE0E1E2E2E3E4E5E6E7E8E9EAEBECEDEEEEEFF0
F1F2F3F4F5F6F7F8F9F9FAFBFCFDFEFF
>
<
ABAAAAA9A8A7A7A6A5A5A4A3A3A2A1A1A09F9F9E9D9D9C9B9B9A9999989797969595949393929191
908F8F8E8D8D8C8B8B8A8989888787868585848383828181807F7F7E7D7D7C7B7B7A797978777776
7575747373727171706F6F6E6D6D6C6B6B6A6969686767666565646362626160605F5E5E5D5C5C5B
5A5A5958585756565554545352525150504F4E4E4D4C4C4B4A4A4948484746464544444342424140
403F3E3E3D3C3C3B3A3A3938383736363534343332323130302F2E2E2D2C2C2B2A2A292828272626
25242423222121201F1F1E1D1D1C1B1B1A1919181717161515141313121111100F0F0E0D0D0C0B0B
0A090908070706050504030302010100
>
0
1 %_Br
[
0 0.08 0.67 0 1 50 14 %_Bs
1 1 0 0 1 50 100 %_Bs
BD
%AI5_EndGradient
%AI5_End_NonPrinting--
%AI5_BeginPalette
0 2 Pb
Pn
Pc
1 g
Pc
0 g
Pc
0 0 0 0 k
Pc
0.75 g
Pc
0.5 g
Pc
0.25 g
Pc
0 g
Pc
Bb
2 (Black & White) -4014 4716 0 0 1 0 0 1 0 0 Bg
0 BB
Pc
0.25 0 0 0 k
Pc
0.5 0 0 0 k
Pc
0.75 0 0 0 k
Pc
1 0 0 0 k
Pc
0.25 0.25 0 0 k
Pc
0.5 0.5 0 0 k
Pc
0.75 0.75 0 0 k
Pc
1 1 0 0 k
Pc
Bb
2 (Pink, Yellow, Green) -4014 4716 0 0 1 0 0 1 0 0 Bg
0 BB
Pc
0 0.25 0 0 k
Pc
0 0.5 0 0 k
Pc
0 0.75 0 0 k
Pc
0 1 0 0 k
Pc
0 0.25 0.25 0 k
Pc
0 0.5 0.5 0 k
Pc
0 0.75 0.75 0 k
Pc
0 1 1 0 k
Pc
Bb
0 0 0 0 Bh
2 (Yellow & Purple Radial) -4014 4716 0 0 1 0 0 1 0 0 Bg
0 BB
Pc
0 0 0.25 0 k
Pc
0 0 0.5 0 k
Pc
0 0 0.75 0 k
Pc
0 0 1 0 k
Pc
0.25 0 0.25 0 k
Pc
0.5 0 0.5 0 k
Pc
0.75 0 0.75 0 k
Pc
1 0 1 0 k
Pc
Bb
2 (Rainbow) -4014 4716 0 0 1 0 0 1 0 0 Bg
0 BB
Pc
0.25 0.125 0 0 k
Pc
0.5 0.25 0 0 k
Pc
0.75 0.375 0 0 k
Pc
1 0.5 0 0 k
Pc
0.125 0.25 0 0 k
Pc
0.25 0.5 0 0 k
Pc
0.375 0.75 0 0 k
Pc
0.5 1 0 0 k
Pc
Bb
2 (Steel Bar) -4014 4716 0 0 1 0 0 1 0 0 Bg
0 BB
Pc
0 0.25 0.125 0 k
Pc
0 0.5 0.25 0 k
Pc
0 0.75 0.375 0 k
Pc
0 1 0.5 0 k
Pc
0 0.125 0.25 0 k
Pc
0 0.25 0.5 0 k
Pc
0 0.375 0.75 0 k
Pc
0 0.5 1 0 k
Pc
Bb
2 (Purple, Red & Yellow) -4014 4716 0 0 1 0 0 1 0 0 Bg
0 BB
Pc
0.125 0 0.25 0 k
Pc
0.25 0 0.5 0 k
Pc
0.375 0 0.75 0 k
Pc
0.5 0 1 0 k
Pc
0.25 0 0.125 0 k
Pc
0.5 0 0.25 0 k
Pc
0.75 0 0.375 0 k
Pc
1 0 0.5 0 k
Pc
Bb
2 (Green & Blue) -4014 4716 0 0 1 0 0 1 0 0 Bg
0 BB
Pc
0.25 0.125 0.125 0 k
Pc
0.5 0.25 0.25 0 k
Pc
0.75 0.375 0.375 0 k
Pc
1 0.5 0.5 0 k
Pc
0.25 0.25 0.125 0 k
Pc
0.5 0.5 0.25 0 k
Pc
0.75 0.75 0.375 0 k
Pc
1 1 0.5 0 k
Pc
Bb
0 0 0 0 Bh
2 (Yellow & Orange Radial) -4014 4716 0 0 1 0 0 1 0 0 Bg
0 BB
Pc
0.125 0.25 0.125 0 k
Pc
0.25 0.5 0.25 0 k
Pc
0.375 0.75 0.375 0 k
Pc
0.5 1 0.5 0 k
Pc
0.125 0.25 0.25 0 k
Pc
0.25 0.5 0.5 0 k
Pc
0.375 0.75 0.75 0 k
Pc
0.5 1 1 0 k
Pc
0 0 0 0 k
Pc
0.125 0.125 0.25 0 k
Pc
0.25 0.25 0.5 0 k
Pc
0.375 0.375 0.75 0 k
Pc
0.5 0.5 1 0 k
Pc
0.25 0.125 0.25 0 k
Pc
0.5 0.25 0.5 0 k
Pc
0.75 0.375 0.75 0 k
Pc
1 0.5 1 0 k
Pc
PB
%AI5_EndPalette
%%EndSetup
%AI5_BeginLayer
1 1 1 1 0 0 0 79 128 255 Lb
(Layer 1) Ln
0 A
0 O
0 g
800 Ar
0 J 0 j 1 w 4 M []0 d
%AI3_Note:
0 D
0 XR
144 615 m
144 468 l
F
433.5 612 m
433.5 465 l
F
0 R
0 G
145.3333 553.3333 m
280.3333 547.3333 220.3201 477.904 289.5 480 c
339 481.5 350.8751 579 432 579 c
S
433.3333 494.6667 m
298.5948 504.9709 382.5 550.5 264 547.5 c
214.4936 546.2465 226.5828 511.9116 145.5 514.5 c
S
384 550.5 m
421.5 571.5 l
S
390.875 540.4375 m
428.375 561.4375 l
S
371.3125 522.5625 m
408.8125 543.5625 l
S
390.875 507.4375 m
428.375 528.4375 l
S
400.5625 502.625 m
426 516 l
S
357.5312 528.6875 m
382.9687 542.0625 l
S
153 546 m
153 526.5 l
S
166.5 543 m
166.5 523.5 l
S
186 544.5 m
186 525 l
S
175.5 541.5 m
175.5 522 l
S
249 535.5 m
312 535.5 l
S
262.5 526.5 m
325.5 526.5 l
S
246 516 m
309 516 l
S
255 501 m
318 501 l
S
0 To
1 0 0 1 394.6667 520 0 Tp
TP
0 Tr
0 O
0 g
/_Helvetica 36 Tf
0 Ts
100 Tz
0 Tt
1 TA
%_ 0 XL
36 0 Xb
XB
0 0 5 TC
100 100 200 TW
0 0 0 Ti
0 Ta
0 0 2 2 3 Th
0 Tq
0 0 Tl
0 Tc
0 Tw
(+) Tx
(\r) TX
TO
0 To
1 0 0 1 285 495 0 Tp
TP
0 Tr
(-) Tx
(\r) TX
TO
0 To
1 0 0 1 358 576 0 Tp
TP
0 Tr
/_Times-Italic 18 Tf
(T) Tx
/_Helvetica 18 Tf
(\() Tx
/_Symbol 18 Tf
(g) Tx
/_Helvetica 18 Tf
(\)) Tx
(\r) TX
TO
0 To
1 0 0 1 441.3333 493.3333 0 Tp
TP
0 Tr
/_Symbol 18 Tf
(g) Tx
(\r) TX
TO
LB
%AI5_EndLayer--
%%PageTrailer
gsave annotatepage grestore showpage
%%Trailer
Adobe_Illustrator_AI5 /terminate get exec
Adobe_ColorImage_AI6 /terminate get exec
Adobe_typography_AI5 /terminate get exec
Adobe_level2_AI5 /terminate get exec
%%EOF
\end{filecontents*}
\begin{filecontents*}{llave-fig2.eps}
%!PS-Adobe-3.0 EPSF-3.0
%%Creator: Adobe Illustrator(r) 6.0
%%For: (combs) (Dept of Math, Univ of Texas )
%%Title: (llave-fig2.eps)
%%CreationDate: (5/13/99) (3:52 PM)
%%BoundingBox: 18 120 553 717
%%HiResBoundingBox: 18 120.5485 552.2673 716.4407
%%DocumentProcessColors: Black
%%DocumentFonts: Symbol
%%+ Times-Italic
%%+ Times-Roman
%%DocumentSuppliedResources: procset Adobe_level2_AI5 1.0 0
%%+ procset Adobe_typography_AI5 1.0 0
%%+ procset Adobe_Illustrator_AI6_vars Adobe_Illustrator_AI6
%%+ procset Adobe_Illustrator_AI5 1.0 0
%AI5_FileFormat 2.0
%AI3_ColorUsage: Black&White
%%AI6_ColorSeparationSet: 1 1 (AI6 Default Color Separation Set)
%%+ Options: 1 16 0 1 0 1 1 1 0 1 1 1 1 18 0 0 0 0 0 0 0 0 -1 -1
%%+ PPD: 1 21 0 0 60 45 2 2 1 0 0 1 0 0 0 0 0 0 0 0 0 0 ()
%AI3_TemplateBox: 306 396 306 396
%AI3_TileBox: 30 31 582 761
%AI3_DocumentPreview: Macintosh_ColorPic
%AI5_ArtSize: 612 792
%AI5_RulerUnits: 0
%AI5_ArtFlags: 1 0 0 1 0 0 1 1 0
%AI5_TargetResolution: 800
%AI5_NumLayers: 1
%AI5_OpenToView: 66 620 1.5 790 553 58 1 1 3 40
%AI5_OpenViewLayers: 7
%%EndComments
%%BeginProlog
%%BeginResource: procset Adobe_level2_AI5 1.2 0
%%Title: (Adobe Illustrator (R) Version 5.0 Level 2 Emulation)
%%Version: 1.2
%%CreationDate: (04/10/93) ()
%%Copyright: ((C) 1987-1993 Adobe Systems Incorporated All Rights Reserved)
userdict /Adobe_level2_AI5 23 dict dup begin
 put
 /packedarray where not
 {
  userdict begin
  /packedarray
  {
   array astore readonly
  } bind def
  /setpacking /pop load def
  /currentpacking false def
  end
  0
 } if
 pop
 userdict /defaultpacking currentpacking put true setpacking
 /initialize
 {
  Adobe_level2_AI5 begin
 } bind def
 /terminate
 {
  currentdict Adobe_level2_AI5 eq
  {
   end
  } if
 } bind def
 mark
 /setcustomcolor where not
 {
  /findcmykcustomcolor
  {
   5 packedarray
  } bind def
  /setcustomcolor
  {
   exch aload pop pop
   4
   {
    4 index mul 4 1 roll
   } repeat
   5 -1 roll pop
   setcmykcolor
  }
  def
 } if

 /gt38? mark {version cvr cvx exec} stopped {cleartomark true} {38 gt exch pop} ifelse def
 userdict /deviceDPI 72 0 matrix defaultmatrix dtransform dup mul exch dup mul add sqrt put
 userdict /level2?
 systemdict /languagelevel known dup
 {
  pop systemdict /languagelevel get 2 ge
 } if
 put
/level2ScreenFreq
{
 begin
  60
  HalftoneType 1 eq
  {
   pop Frequency
  } if
  HalftoneType 2 eq
  {
   pop GrayFrequency
  } if
  HalftoneType 5 eq
  {
   pop Default level2ScreenFreq
  } if
 end
} bind def
userdict /currentScreenFreq
 level2? {currenthalftone level2ScreenFreq} {currentscreen pop pop} ifelse put
level2? not
 {
  /setcmykcolor where not
  {
   /setcmykcolor
   {
    exch .11 mul add exch .59 mul add exch .3 mul add
    1 exch sub setgray
   } def
  } if
  /currentcmykcolor where not
  {
   /currentcmykcolor
   {
    0 0 0 1 currentgray sub
   } def
  } if
  /setoverprint where not
  {
   /setoverprint /pop load def
  } if
  /selectfont where not
  {
   /selectfont
   {
    exch findfont exch
    dup type /arraytype eq
    {
     makefont
    }
    {
     scalefont
    } ifelse
    setfont
   } bind def
  } if
  /cshow where not
  {
   /cshow
   {
    [
    0 0 5 -1 roll aload pop
    ] cvx bind forall
   } bind def
  } if
 } if
 cleartomark
 /anyColor?
 {
  add add add 0 ne
 } bind def
 /testColor
 {
  gsave
  setcmykcolor currentcmykcolor
  grestore
 } bind def
 /testCMYKColorThrough
 {
  testColor anyColor?
 } bind def
 userdict /composite?
 level2?
 {
  gsave 1 1 1 1 setcmykcolor currentcmykcolor grestore
  add add add 4 eq
 }
 {
  1 0 0 0 testCMYKColorThrough
  0 1 0 0 testCMYKColorThrough
  0 0 1 0 testCMYKColorThrough
  0 0 0 1 testCMYKColorThrough
  and and and
 } ifelse
 put
 composite? not
 {
  userdict begin
  gsave
  /cyan? 1 0 0 0 testCMYKColorThrough def
  /magenta? 0 1 0 0 testCMYKColorThrough def
  /yellow? 0 0 1 0 testCMYKColorThrough def
  /black? 0 0 0 1 testCMYKColorThrough def
  grestore
  /isCMYKSep? cyan? magenta? yellow? black? or or or def
  /customColor? isCMYKSep? not def
  end
 } if
 end defaultpacking setpacking
%%EndResource
%%BeginResource: procset Adobe_typography_AI5 1.0 1
%%Title: (Typography Operators)
%%Version: 1.0
%%CreationDate:(03/26/93) ()
%%Copyright: ((C) 1987-1993 Adobe Systems Incorporated All Rights Reserved)
currentpacking true setpacking
userdict /Adobe_typography_AI5 54 dict dup begin
put
/initialize
{
 begin
 begin
 Adobe_typography_AI5 begin
 Adobe_typography_AI5
 {
  dup xcheck
  {
   bind
  } if
  pop pop
 } forall
 end
 end
 end
 Adobe_typography_AI5 begin
} def
/terminate
{
 currentdict Adobe_typography_AI5 eq
 {
  end
 } if
} def
/modifyEncoding
{
 /_tempEncode exch ddef
 /_pntr 0 ddef
 {
  counttomark -1 roll
  dup type dup /marktype eq
  {
   pop pop exit
  }
  {
   /nametype eq
   {
    _tempEncode /_pntr dup load dup 3 1 roll 1 add ddef 3 -1 roll
    put
   }
   {
    /_pntr exch ddef
   } ifelse
  } ifelse
 } loop
 _tempEncode
} def
/TE
{
 StandardEncoding 256 array copy modifyEncoding
 /_nativeEncoding exch def
} def
%
/TZ
{
 dup type /arraytype eq
 {
  /_wv exch def
 }
 {
  /_wv 0 def
 } ifelse
 /_useNativeEncoding exch def
 pop pop
 findfont _wv type /arraytype eq
 {
  _wv makeblendedfont
 } if
 dup length 2 add dict
 begin
 mark exch
 {
  1 index /FID ne
  {
   def
  } if
  cleartomark mark
 } forall
 pop
 /FontName exch def
 counttomark 0 eq
 {
  1 _useNativeEncoding eq
  {
   /Encoding _nativeEncoding def
  } if
  cleartomark
 }
 {
  /Encoding load 256 array copy
  modifyEncoding /Encoding exch def
 } ifelse
 FontName currentdict
 end
 definefont pop
} def
/tr
{
 _ax _ay 3 2 roll
} def
/trj
{
 _cx _cy _sp _ax _ay 6 5 roll
} def
/a0
{
 /Tx
 {
  dup
  currentpoint 3 2 roll
  tr _psf
  newpath moveto
  tr _ctm _pss
 } ddef
 /Tj
 {
  dup
  currentpoint 3 2 roll
  trj _pjsf
  newpath moveto
  trj _ctm _pjss
 } ddef
} def
/a1
{
 /Tx
 {
  dup currentpoint 4 2 roll gsave
  dup currentpoint 3 2 roll
  tr _psf
  newpath moveto
  tr _ctm _pss
  grestore 3 1 roll moveto tr sp
 } ddef
 /Tj
 {
  dup currentpoint 4 2 roll gsave
  dup currentpoint 3 2 roll
  trj _pjsf
  newpath moveto
  trj _ctm _pjss
  grestore 3 1 roll moveto tr jsp
 } ddef
} def
/e0
{
 /Tx
 {
  tr _psf
 } ddef
 /Tj
 {
  trj _pjsf
 } ddef
} def
/e1
{
 /Tx
 {
  dup currentpoint 4 2 roll gsave
  tr _psf
  grestore 3 1 roll moveto tr sp
 } ddef
 /Tj
 {
  dup currentpoint 4 2 roll gsave
  trj _pjsf
  grestore 3 1 roll moveto tr jsp
 } ddef
} def
/i0
{
 /Tx
 {
  tr sp
 } ddef
 /Tj
 {
  trj jsp
 } ddef
} def
/i1
{
 W N
} def
/o0
{
 /Tx
 {
  tr sw rmoveto
 } ddef
 /Tj
 {
  trj swj rmoveto
 } ddef
} def
/r0
{
 /Tx
 {
  tr _ctm _pss
 } ddef
 /Tj
 {
  trj _ctm _pjss
 } ddef
} def
/r1
{
 /Tx
 {
  dup currentpoint 4 2 roll currentpoint gsave newpath moveto
  tr _ctm _pss
  grestore 3 1 roll moveto tr sp
 } ddef
 /Tj
 {
  dup currentpoint 4 2 roll currentpoint gsave newpath moveto
  trj _ctm _pjss
  grestore 3 1 roll moveto tr jsp
 } ddef
} def
/To
{
 pop _ctm currentmatrix pop
} def
/TO
{
 iTe _ctm setmatrix newpath
} def
/Tp
{
 pop _tm astore pop _ctm setmatrix
 _tDict begin
 /W
 {
 } def
 /h
 {
 } def
} def
/TP
{
 end
 iTm 0 0 moveto
} def
/Tr
{
 _render 3 le
 {
  currentpoint newpath moveto
 } if
 dup 8 eq
 {
  pop 0
 }
 {
  dup 9 eq
  {
   pop 1
  } if
 } ifelse
 dup /_render exch ddef
 _renderStart exch get load exec
} def
/iTm
{
 _ctm setmatrix _tm concat 0 _rise translate _hs 1 scale
} def
/Tm
{
 _tm astore pop iTm 0 0 moveto
} def
/Td
{
 _mtx translate _tm _tm concatmatrix pop iTm 0 0 moveto
} def
/iTe
{
 _render -1 eq
 {
 }
 {
  _renderEnd _render get dup null ne
  {
   load exec
  }
  {
   pop
  } ifelse
 } ifelse
 /_render -1 ddef
} def
/Ta
{
 pop
} def
/Tf
{
 dup 1000 div /_fScl exch ddef
%
 selectfont
} def
/Tl
{
 pop
 0 exch _leading astore pop
} def
/Tt
{
 pop
} def
/TW
{
 3 npop
} def
/Tw
{
 /_cx exch ddef
} def
/TC
{
 3 npop
} def
/Tc
{
 /_ax exch ddef
} def
/Ts
{
 /_rise exch ddef
 currentpoint
 iTm
 moveto
} def
/Ti
{
 3 npop
} def
/Tz
{
 100 div /_hs exch ddef
 iTm
} def
/TA
{
 pop
} def
/Tq
{
 pop
} def
/Th
{
 pop pop pop pop pop
} def
/TX
{
 pop
} def
/Tk
{
 exch pop _fScl mul neg 0 rmoveto
} def
/TK
{
 2 npop
} def
/T*
{
 _leading aload pop neg Td
} def
/T*-
{
 _leading aload pop Td
} def
/T-
{
 _ax neg 0 rmoveto
 _hyphen Tx
} def
/T+
{
} def
/TR
{
 _ctm currentmatrix pop
 _tm astore pop
 iTm 0 0 moveto
} def
/TS
{
 currentfont 3 1 roll
 /_Symbol_ _fScl 1000 mul selectfont

 0 eq
 {
  Tx
 }
 {
  Tj
 } ifelse
 setfont
} def
/Xb
{
 pop pop
} def
/Tb /Xb load def
/Xe
{
 pop pop pop pop
} def
/Te /Xe load def
/XB
{
} def
/TB /XB load def
currentdict readonly pop
end
setpacking
%%EndResource
%%BeginProcSet: Adobe_ColorImage_AI6 1.0 0
userdict /Adobe_ColorImage_AI6 known not
{
 userdict /Adobe_ColorImage_AI6 17 dict put
} if
userdict /Adobe_ColorImage_AI6 get begin

 /initialize
 {
  Adobe_ColorImage_AI6 begin
  Adobe_ColorImage_AI6
  {
   dup type /arraytype eq
   {
    dup xcheck
    {
     bind
    } if
   } if
   pop pop
  } forall
 } def
 /terminate { end } def

 currentdict /Adobe_ColorImage_AI6_Vars known not
 {
  /Adobe_ColorImage_AI6_Vars 14 dict def
 } if

 Adobe_ColorImage_AI6_Vars begin
  /channelcount 0 def
  /sourcecount 0 def
  /sourcearray 4 array def
  /plateindex -1 def
  /XIMask 0 def
  /XIBinary 0 def
  /XIChannelCount 0 def
  /XIBitsPerPixel 0 def
  /XIImageHeight 0 def
  /XIImageWidth 0 def
  /XIImageMatrix null def
  /XIBuffer null def
  /XIDataProc null def
 end

 /WalkRGBString null def
 /WalkCMYKString null def

 /StuffRGBIntoGrayString null def
 /RGBToGrayImageProc null def
 /StuffCMYKIntoGrayString null def
 /CMYKToGrayImageProc null def
 /ColorImageCompositeEmulator null def

 /SeparateCMYKImageProc null def

 /FourEqual null def
 /TestPlateIndex null def

 currentdict /_colorimage known not
 {
  /colorimage where
  {
   /colorimage get /_colorimage exch def
  }
  {
   /_colorimage null def
  } ifelse
 } if

 /_currenttransfer systemdict /currenttransfer get def

 /colorimage null def
 /XI null def


 /WalkRGBString
 {
  0 3 index

  dup length 1 sub 0 3 3 -1 roll
  {
   3 getinterval { } forall

   5 index exec

   3 index
  } for

   5 { pop } repeat

 } def


 /WalkCMYKString
 {
  0 3 index

  dup length 1 sub 0 4 3 -1 roll
  {
   4 getinterval { } forall

   6 index exec

   3 index

  } for

  5 { pop } repeat

 } def


 /StuffRGBIntoGrayString
 {
  .11 mul exch

  .59 mul add exch

  .3 mul add

  cvi 3 copy put

  pop 1 add
 } def


 /RGBToGrayImageProc
 {
  Adobe_ColorImage_AI6_Vars begin
   sourcearray 0 get exec
   dup length 3 idiv string
   dup 3 1 roll

   /StuffRGBIntoGrayString load exch
   WalkRGBString
  end
 } def


 /StuffCMYKIntoGrayString
 {
  exch .11 mul add

  exch .59 mul add

  exch .3 mul add

  dup 255 gt { pop 255 } if

  255 exch sub cvi 3 copy put

  pop 1 add
 } def


 /CMYKToGrayImageProc
 {
  Adobe_ColorImage_AI6_Vars begin
   sourcearray 0 get exec
   dup length 4 idiv string
   dup 3 1 roll

   /StuffCMYKIntoGrayString load exch
   WalkCMYKString
  end
 } def


 /ColorImageCompositeEmulator
 {
  pop true eq
  {
   Adobe_ColorImage_AI6_Vars /sourcecount get 5 add { pop } repeat
  }
  {
   Adobe_ColorImage_AI6_Vars /channelcount get 1 ne
   {
    Adobe_ColorImage_AI6_Vars begin
     sourcearray 0 3 -1 roll put

     channelcount 3 eq
     {
      /RGBToGrayImageProc
     }
     {
      /CMYKToGrayImageProc
     } ifelse
     load
    end
   } if
   image
  } ifelse
 } def


 /SeparateCMYKImageProc
 {
  Adobe_ColorImage_AI6_Vars begin

   sourcecount 0 ne
   {
    sourcearray plateindex get exec
   }
   {
    sourcearray 0 get exec

    dup length 4 idiv string

    0 2 index

    plateindex 4 2 index length 1 sub
    {
     get 255 exch sub

     3 copy put pop 1 add

     2 index
    } for

    pop pop exch pop
   } ifelse
  end
 } def


 /FourEqual
 {
  4 index ne
  {
   pop pop pop false
  }
  {
   4 index ne
   {
    pop pop false
   }
   {
    4 index ne
    {
     pop false
    }
    {
     4 index eq
    } ifelse
   } ifelse
  } ifelse
 } def


 /TestPlateIndex
 {
  Adobe_ColorImage_AI6_Vars begin
   /plateindex -1 def

   /setcmykcolor where
   {
    pop
    gsave
    1 0 0 0 setcmykcolor systemdict /currentgray get exec 1 exch sub
    0 1 0 0 setcmykcolor systemdict /currentgray get exec 1 exch sub
    0 0 1 0 setcmykcolor systemdict /currentgray get exec 1 exch sub
    0 0 0 1 setcmykcolor systemdict /currentgray get exec 1 exch sub
    grestore

    1 0 0 0 FourEqual
    {
     /plateindex 0 def
    }
    {
     0 1 0 0 FourEqual
     {
      /plateindex 1 def
     }
     {
      0 0 1 0 FourEqual
      {
       /plateindex 2 def
      }
      {
       0 0 0 1 FourEqual
       {
        /plateindex 3 def
       }
       {
        0 0 0 0 FourEqual
        {
         /plateindex 5 def
        } if
       } ifelse
      } ifelse
     } ifelse
    } ifelse
    pop pop pop pop
   } if
   plateindex
  end
 } def


 /colorimage
 {
  Adobe_ColorImage_AI6_Vars begin
   /channelcount 1 index def
   /sourcecount 2 index 1 eq { channelcount 1 sub } { 0 } ifelse def

   4 sourcecount add index dup
   8 eq exch 1 eq or not
  end

  {
   /_colorimage load null ne
   {
    _colorimage
   }
   {
    Adobe_ColorImage_AI6_Vars /sourcecount get
    7 add { pop } repeat
   } ifelse
  }
  {
   dup 3 eq
   TestPlateIndex
   dup -1 eq exch 5 eq or or
   {
    /_colorimage load null eq
    {
     ColorImageCompositeEmulator
    }
    {
     dup 1 eq
     {
      pop pop image
     }
     {
      Adobe_ColorImage_AI6_Vars /plateindex get 5 eq
      {
       gsave

       0 _currenttransfer exec
       1 _currenttransfer exec
       eq
       { 0 _currenttransfer exec 0.5 lt }
       { 0 _currenttransfer exec 1 _currenttransfer exec gt } ifelse

       { { pop 0 } } { { pop 1 } } ifelse
       systemdict /settransfer get exec
      } if

      _colorimage

      Adobe_ColorImage_AI6_Vars /plateindex get 5 eq
      {
       grestore
      } if
     } ifelse
    } ifelse
   }
   {
    dup 1 eq
    {
     pop pop
     image
    }
    {
     pop pop

     Adobe_ColorImage_AI6_Vars begin
      sourcecount -1 0
      {
       exch sourcearray 3 1 roll put
      } for

      /SeparateCMYKImageProc load
     end

     systemdict /image get exec
    } ifelse
   } ifelse
  } ifelse
 } def

 /XI
 {
  Adobe_ColorImage_AI6_Vars begin
   gsave
   /XIMask exch 0 ne def
   /XIBinary exch 0 ne def
   pop
   pop
   /XIChannelCount exch def
   /XIBitsPerPixel exch def
   /XIImageHeight exch def
   /XIImageWidth exch def
   pop pop pop pop
   /XIImageMatrix exch def

   XIBitsPerPixel 1 eq
   {
    XIImageWidth 8 div ceiling cvi
   }
   {
    XIImageWidth XIChannelCount mul
   } ifelse
   /XIBuffer exch string def

   XIBinary
   {
    /XIDataProc { currentfile XIBuffer readstring pop } def
    currentfile 128 string readline pop pop
   }
   {
    /XIDataProc { currentfile XIBuffer readhexstring pop } def
   } ifelse

   0 0 moveto
   XIImageMatrix concat
   XIImageWidth XIImageHeight scale

   XIMask
   {
    XIImageWidth XIImageHeight
    false
    [ XIImageWidth 0 0 XIImageHeight neg 0 0 ]
    /XIDataProc load

    /_lp /null ddef
    _fc
    /_lp /imagemask ddef

    imagemask
   }
   {
    XIImageWidth XIImageHeight
    XIBitsPerPixel
    [ XIImageWidth 0 0 XIImageHeight neg 0 0 ]
    /XIDataProc load

    XIChannelCount 1 eq
    {

     gsave
     0 setgray

     image

     grestore
    }
    {
     false
     XIChannelCount
     colorimage
    } ifelse
   } ifelse
   grestore
  end
 } def

end
%%EndProcSet
%%BeginResource: procset Adobe_Illustrator_AI5 1.1 0
%%Title: (Adobe Illustrator (R) Version 5.0 Full Prolog)
%%Version: 1.1
%%CreationDate: (3/7/1994) ()
%%Copyright: ((C) 1987-1994 Adobe Systems Incorporated All Rights Reserved)
currentpacking true setpacking
userdict /Adobe_Illustrator_AI5_vars 81 dict dup begin
put
/_eo false def
/_lp /none def
/_pf
{
} def
/_ps
{
} def
/_psf
{
} def
/_pss
{
} def
/_pjsf
{
} def
/_pjss
{
} def
/_pola 0 def
/_doClip 0 def
/cf currentflat def
/_tm matrix def
/_renderStart
[
/e0 /r0 /a0 /o0 /e1 /r1 /a1 /i0
] def
/_renderEnd
[
null null null null /i1 /i1 /i1 /i1
] def
/_render -1 def
/_rise 0 def
/_ax 0 def
/_ay 0 def
/_cx 0 def
/_cy 0 def
/_leading
[
0 0
] def
/_ctm matrix def
/_mtx matrix def
/_sp 16#020 def
/_hyphen (-) def
/_fScl 0 def
/_cnt 0 def
/_hs 1 def
/_nativeEncoding 0 def
/_useNativeEncoding 0 def
/_tempEncode 0 def
/_pntr 0 def
/_tDict 2 dict def
/_wv 0 def
/Tx
{
} def
/Tj
{
} def
/CRender
{
} def
/_AI3_savepage
{
} def
/_gf null def
/_cf 4 array def
/_if null def
/_of false def
/_fc
{
} def
/_gs null def
/_cs 4 array def
/_is null def
/_os false def
/_sc
{
} def
/_pd 1 dict def
/_ed 15 dict def
/_pm matrix def
/_fm null def
/_fd null def
/_fdd null def
/_sm null def
/_sd null def
/_sdd null def
/_i null def
/discardSave null def
/buffer 256 string def
/beginString null def
/endString null def
/endStringLength null def
/layerCnt 1 def
/layerCount 1 def
/perCent (%) 0 get def
/perCentSeen? false def
/newBuff null def
/newBuffButFirst null def
/newBuffLast null def
/clipForward? false def
end
userdict /Adobe_Illustrator_AI5 known not {
 userdict /Adobe_Illustrator_AI5 91 dict put
} if
userdict /Adobe_Illustrator_AI5 get begin
/initialize
{
 Adobe_Illustrator_AI5 dup begin
 Adobe_Illustrator_AI5_vars begin
 discardDict
 {
  bind pop pop
 } forall
 dup /nc get begin
 {
  dup xcheck 1 index type /operatortype ne and
  {
   bind
  } if
  pop pop
 } forall
 end
 newpath
} def
/terminate
{
 end
 end
} def
/_
null def
/ddef
{
 Adobe_Illustrator_AI5_vars 3 1 roll put
} def
/xput
{
 dup load dup length exch maxlength eq
 {
  dup dup load dup
  length 2 mul dict copy def
 } if
 load begin
 def
 end
} def
/npop
{
 {
  pop
 } repeat
} def
/sw
{
 dup length exch stringwidth
 exch 5 -1 roll 3 index mul add
 4 1 roll 3 1 roll mul add
} def
/swj
{
 dup 4 1 roll
 dup length exch stringwidth
 exch 5 -1 roll 3 index mul add
 4 1 roll 3 1 roll mul add
 6 2 roll /_cnt 0 ddef
 {
  1 index eq
  {
   /_cnt _cnt 1 add ddef
  } if
 } forall
 pop
 exch _cnt mul exch _cnt mul 2 index add 4 1 roll 2 index add 4 1 roll pop pop
} def
/ss
{
 4 1 roll
 {
  2 npop
  (0) exch 2 copy 0 exch put pop
  gsave
  false charpath currentpoint
  4 index setmatrix
  stroke
  grestore
  moveto
  2 copy rmoveto
 } exch cshow
 3 npop
} def
/jss
{
 4 1 roll
 {
  2 npop
  (0) exch 2 copy 0 exch put
  gsave
  _sp eq
  {
   exch 6 index 6 index 6 index 5 -1 roll widthshow
   currentpoint
  }
  {
   false charpath currentpoint
   4 index setmatrix stroke
  } ifelse
  grestore
  moveto
  2 copy rmoveto
 } exch cshow
 6 npop
} def
/sp
{
 {
  2 npop (0) exch
  2 copy 0 exch put pop
  false charpath
  2 copy rmoveto
 } exch cshow
 2 npop
} def
/jsp
{
 {
  2 npop
  (0) exch 2 copy 0 exch put
  _sp eq
  {
   exch 5 index 5 index 5 index 5 -1 roll widthshow
  }
  {
   false charpath
  } ifelse
  2 copy rmoveto
 } exch cshow
 5 npop
} def
/pl
{
 transform
 0.25 sub round 0.25 add exch
 0.25 sub round 0.25 add exch
 itransform
} def
/setstrokeadjust where
{
 pop true setstrokeadjust
 /c
 {
  curveto
 } def
 /C
 /c load def
 /v
 {
  currentpoint 6 2 roll curveto
 } def
 /V
 /v load def
 /y
 {
  2 copy curveto
 } def
 /Y
 /y load def
 /l
 {
  lineto
 } def
 /L
 /l load def
 /m
 {
  moveto
 } def
}
{
 /c
 {
  pl curveto
 } def
 /C
 /c load def
 /v
 {
  currentpoint 6 2 roll pl curveto
 } def
 /V
 /v load def
 /y
 {
  pl 2 copy curveto
 } def
 /Y
 /y load def
 /l
 {
  pl lineto
 } def
 /L
 /l load def
 /m
 {
  pl moveto
 } def
} ifelse
/d
{
 setdash
} def
/cf
{
} def
/i
{
 dup 0 eq
 {
  pop cf
 } if
 setflat
} def
/j
{
 setlinejoin
} def
/J
{
 setlinecap
} def
/M
{
 setmiterlimit
} def
/w
{
 setlinewidth
} def
/XR
{
 0 ne
 /_eo exch ddef
} def
/H
{
} def
/h
{
 closepath
} def
/N
{
 _pola 0 eq
 {
  _doClip 1 eq
  {
   _eo {eoclip} {clip} ifelse /_doClip 0 ddef
  } if
  newpath
 }
 {
  /CRender
  {
   N
  } ddef
 } ifelse
} def
/n
{
 N
} def
/F
{
 _pola 0 eq
 {
  _doClip 1 eq
  {
   gsave _pf grestore _eo {eoclip} {clip} ifelse newpath /_lp /none ddef _fc
   /_doClip 0 ddef
  }
  {
   _pf
  } ifelse
 }
 {
  /CRender
  {
   F
  } ddef
 } ifelse
} def
/f
{
 closepath
 F
} def
/S
{
 _pola 0 eq
 {
  _doClip 1 eq
  {
   gsave _ps grestore _eo {eoclip} {clip} ifelse newpath /_lp /none ddef _sc
   /_doClip 0 ddef
  }
  {
   _ps
  } ifelse
 }
 {
  /CRender
  {
   S
  } ddef
 } ifelse
} def
/s
{
 closepath
 S
} def
/B
{
 _pola 0 eq
 {
  _doClip 1 eq
  gsave F grestore
  {
   gsave S grestore _eo {eoclip} {clip} ifelse newpath /_lp /none ddef _sc
   /_doClip 0 ddef
  }
  {
   S
  } ifelse
 }
 {
  /CRender
  {
   B
  } ddef
 } ifelse
} def
/b
{
 closepath
 B
} def
/W
{
 /_doClip 1 ddef
} def
/*
{
 count 0 ne
 {
  dup type /stringtype eq
  {
   pop
  } if
 } if
 newpath
} def
/u
{
} def
/U
{
} def
/q
{
 _pola 0 eq
 {
  gsave
 } if
} def
/Q
{
 _pola 0 eq
 {
  grestore
 } if
} def
/*u
{
 _pola 1 add /_pola exch ddef
} def
/*U
{
 _pola 1 sub /_pola exch ddef
 _pola 0 eq
 {
  CRender
 } if
} def
/D
{
 pop
} def
/*w
{
} def
/*W
{
} def
/`
{
 /_i save ddef
 clipForward?
 {
  nulldevice
 } if
 6 1 roll 4 npop
 concat pop
 userdict begin
 /showpage
 {
 } def
 0 setgray
 0 setlinecap
 1 setlinewidth
 0 setlinejoin
 10 setmiterlimit
 [] 0 setdash
 /setstrokeadjust where {pop false setstrokeadjust} if
 newpath
 0 setgray
 false setoverprint
} def
/~
{
 end
 _i restore
} def
/O
{
 0 ne
 /_of exch ddef
 /_lp /none ddef
} def
/R
{
 0 ne
 /_os exch ddef
 /_lp /none ddef
} def
/g
{
 /_gf exch ddef
 /_fc
 {
  _lp /fill ne
  {
   _of setoverprint
   _gf setgray
   /_lp /fill ddef
  } if
 } ddef
 /_pf
 {
  _fc
  _eo {eofill} {fill} ifelse
 } ddef
 /_psf
 {
  _fc
  ashow
 } ddef
 /_pjsf
 {
  _fc
  awidthshow
 } ddef
 /_lp /none ddef
} def
/G
{
 /_gs exch ddef
 /_sc
 {
  _lp /stroke ne
  {
   _os setoverprint
   _gs setgray
   /_lp /stroke ddef
  } if
 } ddef
 /_ps
 {
  _sc
  stroke
 } ddef
 /_pss
 {
  _sc
  ss
 } ddef
 /_pjss
 {
  _sc
  jss
 } ddef
 /_lp /none ddef
} def
/k
{
 _cf astore pop
 /_fc
 {
  _lp /fill ne
  {
   _of setoverprint
   _cf aload pop setcmykcolor
   /_lp /fill ddef
  } if
 } ddef
 /_pf
 {
  _fc
  _eo {eofill} {fill} ifelse
 } ddef
 /_psf
 {
  _fc
  ashow
 } ddef
 /_pjsf
 {
  _fc
  awidthshow
 } ddef
 /_lp /none ddef
} def
/K
{
 _cs astore pop
 /_sc
 {
  _lp /stroke ne
  {
   _os setoverprint
   _cs aload pop setcmykcolor
   /_lp /stroke ddef
  } if
 } ddef
 /_ps
 {
  _sc
  stroke
 } ddef
 /_pss
 {
  _sc
  ss
 } ddef
 /_pjss
 {
  _sc
  jss
 } ddef
 /_lp /none ddef
} def
/x
{
 /_gf exch ddef
 findcmykcustomcolor
 /_if exch ddef
 /_fc
 {
  _lp /fill ne
  {
   _of setoverprint
   _if _gf 1 exch sub setcustomcolor
   /_lp /fill ddef
  } if
 } ddef
 /_pf
 {
  _fc
  _eo {eofill} {fill} ifelse
 } ddef
 /_psf
 {
  _fc
  ashow
 } ddef
 /_pjsf
 {
  _fc
  awidthshow
 } ddef
 /_lp /none ddef
} def
/X
{
 /_gs exch ddef
 findcmykcustomcolor
 /_is exch ddef
 /_sc
 {
  _lp /stroke ne
  {
   _os setoverprint
   _is _gs 1 exch sub setcustomcolor
   /_lp /stroke ddef
  } if
 } ddef
 /_ps
 {
  _sc
  stroke
 } ddef
 /_pss
 {
  _sc
  ss
 } ddef
 /_pjss
 {
  _sc
  jss
 } ddef
 /_lp /none ddef
} def
/A
{
 pop
} def
/annotatepage
{
userdict /annotatepage 2 copy known {get exec} {pop pop} ifelse
} def
/XT {
 pop pop
} def
/discard
{
 save /discardSave exch store
 discardDict begin
 /endString exch store
 gt38?
 {
  2 add
 } if
 load
 stopped
 pop
 end
 discardSave restore
} bind def
userdict /discardDict 7 dict dup begin
put
/pre38Initialize
{
 /endStringLength endString length store
 /newBuff buffer 0 endStringLength getinterval store
 /newBuffButFirst newBuff 1 endStringLength 1 sub getinterval store
 /newBuffLast newBuff endStringLength 1 sub 1 getinterval store
} def
/shiftBuffer
{
 newBuff 0 newBuffButFirst putinterval
 newBuffLast 0
 currentfile read not
 {
 stop
 } if
 put
} def
0
{
 pre38Initialize
 mark
 currentfile newBuff readstring exch pop
 {
  {
   newBuff endString eq
   {
    cleartomark stop
   } if
   shiftBuffer
  } loop
 }
 {
 stop
 } ifelse
} def
1
{
 pre38Initialize
 /beginString exch store
 mark
 currentfile newBuff readstring exch pop
 {
  {
   newBuff beginString eq
   {
    /layerCount dup load 1 add store
   }
   {
    newBuff endString eq
    {
     /layerCount dup load 1 sub store
     layerCount 0 eq
     {
      cleartomark stop
     } if
    } if
   } ifelse
   shiftBuffer
  } loop
 } if
} def
2
{
 mark
 {
  currentfile buffer readline not
  {
  stop
  } if
  endString eq
  {
   cleartomark stop
  } if
 } loop
} def
3
{
 /beginString exch store
 /layerCnt 1 store
 mark
 {
  currentfile buffer readline not
  {
  stop
  } if
  dup beginString eq
  {
   pop /layerCnt dup load 1 add store
  }
  {
   endString eq
   {
    layerCnt 1 eq
    {
     cleartomark stop
    }
    {
     /layerCnt dup load 1 sub store
    } ifelse
   } if
  } ifelse
 } loop
} def
end
userdict /clipRenderOff 15 dict dup begin
put
{
 /n /N /s /S /f /F /b /B
}
{
 {
  _doClip 1 eq
  {
   /_doClip 0 ddef _eo {eoclip} {clip} ifelse
  } if
  newpath
 } def
} forall
/Tr /pop load def
/Bb {} def
/BB /pop load def
/Bg {12 npop} def
/Bm {6 npop} def
/Bc /Bm load def
/Bh {4 npop} def
end
/Lb
{
 4 npop
 6 1 roll
 pop
 4 1 roll
 pop pop pop
 0 eq
 {
  0 eq
  {
   (%AI5_BeginLayer) 1 (%AI5_EndLayer--) discard
  }
  {

   /clipForward? true def

   /Tx /pop load def
   /Tj /pop load def

   currentdict end clipRenderOff begin begin
  } ifelse
 }
 {
  0 eq
  {
   save /discardSave exch store
  } if
 } ifelse
} bind def
/LB
{
 discardSave dup null ne
 {
  restore
 }
 {
  pop
  clipForward?
  {
   currentdict
   end
   end
   begin

   /clipForward? false ddef
  } if
 } ifelse
} bind def
/Pb
{
 pop pop
 0 (%AI5_EndPalette) discard
} bind def
/Np
{
 0 (%AI5_End_NonPrinting--) discard
} bind def
/Ln /pop load def
/Ap
/pop load def
/Ar
{
 72 exch div
 0 dtransform dup mul exch dup mul add sqrt
 dup 1 lt
 {
  pop 1
 } if
 setflat
} def
/Mb
{
 q
} def
/Md
{
} def
/MB
{
 Q
} def
/nc 3 dict def
nc begin
/setgray
{
 pop
} bind def
/setcmykcolor
{
 4 npop
} bind def
/setcustomcolor
{
 2 npop
} bind def
currentdict readonly pop
end
end
setpacking
%%EndResource
%%EndProlog
%%BeginSetup
%%IncludeFont: Symbol
%%IncludeFont: Times-Italic
%%IncludeFont: Times-Roman
Adobe_level2_AI5 /initialize get exec
Adobe_Illustrator_AI5_vars Adobe_Illustrator_AI5 Adobe_typography_AI5 /initialize get exec
Adobe_ColorImage_AI6 /initialize get exec
Adobe_Illustrator_AI5 /initialize get exec
[
39/quotesingle 96/grave 128/Adieresis/Aring/Ccedilla/Eacute/Ntilde/Odieresis
/Udieresis/aacute/agrave/acircumflex/adieresis/atilde/aring/ccedilla/eacute
/egrave/ecircumflex/edieresis/iacute/igrave/icircumflex/idieresis/ntilde
/oacute/ograve/ocircumflex/odieresis/otilde/uacute/ugrave/ucircumflex
/udieresis/dagger/degree/cent/sterling/section/bullet/paragraph/germandbls
/registered/copyright/trademark/acute/dieresis/.notdef/AE/Oslash
/.notdef/plusminus/.notdef/.notdef/yen/mu/.notdef/.notdef
/.notdef/.notdef/.notdef/ordfeminine/ordmasculine/.notdef/ae/oslash
/questiondown/exclamdown/logicalnot/.notdef/florin/.notdef/.notdef
/guillemotleft/guillemotright/ellipsis/.notdef/Agrave/Atilde/Otilde/OE/oe
/endash/emdash/quotedblleft/quotedblright/quoteleft/quoteright/divide
/.notdef/ydieresis/Ydieresis/fraction/currency/guilsinglleft/guilsinglright
/fi/fl/daggerdbl/periodcentered/quotesinglbase/quotedblbase/perthousand
/Acircumflex/Ecircumflex/Aacute/Edieresis/Egrave/Iacute/Icircumflex
/Idieresis/Igrave/Oacute/Ocircumflex/.notdef/Ograve/Uacute/Ucircumflex
/Ugrave/dotlessi/circumflex/tilde/macron/breve/dotaccent/ring/cedilla
/hungarumlaut/ogonek/caron
TE
%AI3_BeginEncoding: _Symbol Symbol
[/_Symbol/Symbol 0 0 0 TZ
%AI3_EndEncoding TrueType
%AI3_BeginEncoding: _Times-Italic Times-Italic
[/_Times-Italic/Times-Italic 0 0 1 TZ
%AI3_EndEncoding TrueType
%AI3_BeginEncoding: _Times-Roman Times-Roman
[/_Times-Roman/Times-Roman 0 0 1 TZ
%AI3_EndEncoding TrueType
%AI5_Begin_NonPrinting
Np
8 Bn
%AI5_BeginGradient: (Black & White)
(Black & White) 0 2 Bd
[
<
FFFEFDFCFBFAF9F8F7F6F5F4F3F2F1F0EFEEEDECEBEAE9E8E7E6E5E4E3E2E1E0DFDEDDDCDBDAD9D8
D7D6D5D4D3D2D1D0CFCECDCCCBCAC9C8C7C6C5C4C3C2C1C0BFBEBDBCBBBAB9B8B7B6B5B4B3B2B1B0
AFAEADACABAAA9A8A7A6A5A4A3A2A1A09F9E9D9C9B9A999897969594939291908F8E8D8C8B8A8988
87868584838281807F7E7D7C7B7A797877767574737271706F6E6D6C6B6A69686766656463626160
5F5E5D5C5B5A595857565554535251504F4E4D4C4B4A494847464544434241403F3E3D3C3B3A3938
37363534333231302F2E2D2C2B2A292827262524232221201F1E1D1C1B1A19181716151413121110
0F0E0D0C0B0A09080706050403020100
>
0 %_Br
[
0 0 50 100 %_Bs
1 0 50 0 %_Bs
BD
%AI5_EndGradient
%AI5_BeginGradient: (Green & Blue)
(Green & Blue) 0 2 Bd
[
<
99999A9A9B9B9B9C9C9D9D9D9E9E9F9F9FA0A0A1A1A1A2A2A3A3A3A4A4A5A5A5A6A6A7A7A7A8A8A9
A9A9AAAAABABABACACADADADAEAEAFAFAFB0B0B1B1B1B2B2B3B3B3B4B4B5B5B5B6B6B7B7B7B8B8B9
B9B9BABABBBBBBBCBCBDBDBDBEBEBFBFBFC0C0C1C1C1C2C2C3C3C3C4C4C5C5C5C6C6C7C7C7C8C8C9
C9C9CACACBCBCBCCCCCDCDCDCECECFCFCFD0D0D1D1D1D2D2D3D3D3D4D4D5D5D5D6D6D7D7D7D8D8D9
D9D9DADADBDBDBDCDCDDDDDDDEDEDFDFDFE0E0E1E1E1E2E2E3E3E3E4E4E5E5E5E6E6E7E7E7E8E8E9
E9E9EAEAEBEBEBECECEDEDEDEEEEEFEFEFF0F0F1F1F1F2F2F3F3F3F4F4F5F5F5F6F6F7F7F7F8F8F9
F9F9FAFAFBFBFBFCFCFDFDFDFEFEFFFF
>
<
000102020304050506070808090A0B0B0C0D0E0E0F101111121314141516171718191A1A1B1C1D1D
1E1F20202122232324252626272829292A2B2C2C2D2E2F2F303132323334353536373838393A3B3B
3C3D3E3E3F404141424344444546474748494A4A4B4C4D4D4E4F5050515253535455565657585959
5A5B5C5C5D5E5F5F606162626364656566676868696A6B6B6C6D6E6E6F7071717273747475767777
78797A7A7B7C7D7D7E7F80808182828384858586878888898A8B8B8C8D8E8E8F9091919293949495
96979798999A9A9B9C9D9D9E9FA0A0A1A2A3A3A4A5A6A6A7A8A9A9AAABACACADAEAFAFB0B1B2B2B3
B4B5B5B6B7B8B8B9BABBBBBCBDBEBEBF
>
<
FFFEFDFCFBFAF9F8F7F6F5F4F3F2F1F0EFEEEDECEBEAE9E8E7E6E5E4E3E2E1E0DFDEDDDCDBDAD9D8
D7D6D5D4D3D2D1D0CFCECDCCCBCAC9C8C7C6C5C4C3C2C1C0BFBEBDBCBBBAB9B8B7B6B5B4B3B2B1B0
AFAEADACABAAA9A8A7A6A5A4A3A2A1A09F9E9D9C9B9A999897969594939291908F8E8D8C8B8A8988
87868584838281807F7E7D7C7B7A797877767574737271706F6E6D6C6B6A69686766656463626160
5F5E5D5C5B5A595857565554535251504F4E4D4C4B4A494847464544434241403F3E3D3C3B3A3938
37363534333231302F2E2D2C2B2A292827262524232221201F1E1D1C1B1A19181716151413121110
0F0E0D0C0B0A09080706050403020100
>
0
1 %_Br
[
1 0.75 0 0 1 50 100 %_Bs
0.6 0 1 0 1 50 0 %_Bs
BD
%AI5_EndGradient
%AI5_BeginGradient: (Pink, Yellow, Green)
(Pink, Yellow, Green) 0 3 Bd
[
<
00000000000000000000000000000000000000010101010101010101010101010101010101010101
01010101010202020202020202020202020202020202020202020203030303030303030303030303
03030303030303030404040404040404040404040404040404040404050505050505050505050505
05050505050505060606060606060606060606060606060606060707070707070707070707070707
07070707080808080808080808080808080808080809090909090909090909090909090909090A0A
0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0C0C0C0C0C0C0C0C0C
0C0C0C0C0C0C0C0D0D0D0D0D
>
<
050506060606070708080809090A0A0A0B0B0C0C0D0D0E0E0F0F1010111112121313141415151617
17181819191A1A1B1C1C1D1D1E1F1F202021222223232425252626272828292A2A2B2C2C2D2D2E2F
2F3031313233333435353637373839393A3B3B3C3D3E3E3F4040414242434445454647474849494A
4B4C4C4D4E4F4F505151525354545556575758595A5A5B5C5C5D5E5F5F6061626363646566666768
69696A6B6C6C6D6E6F707071727373747576777778797A7B7B7C7D7E7F7F80818283838485868787
88898A8B8B8C8D8E8F8F9091929394949596979898999A9B9C9D9D9E9FA0A1A2A2A3A4A5A6A7A7A8
A9AAABACADADAEAFB0B1B2B2
>
<
CCCCCBCBCBCACACAC9C9C8C8C7C7C6C6C5C5C4C4C3C2C2C1C1C0C0BFBEBEBDBDBCBBBBBAB9B9B8B7
B7B6B6B5B4B4B3B2B1B1B0AFAFAEADADACABAAAAA9A8A8A7A6A5A5A4A3A2A2A1A0A09F9E9D9C9C9B
9A999998979696959493929291908F8E8E8D8C8B8A8A8988878686858483828181807F7E7D7C7C7B
7A7978777776757473727171706F6E6D6C6B6A6A69686766656463636261605F5E5D5C5B5B5A5958
5756555453525151504F4E4D4C4B4A49484746464544434241403F3E3D3C3B3A3938383736353433
3231302F2E2D2C2B2A29282726252423222221201F1E1D1C1B1A191817161514131211100F0E0D0C
0B0A09080706050403020100
>
0
1 %_Br
<
737271706F6E6D6C6B6A696867666564636261605F5E5D5C5B5B5A59585756555453525150504F4E
4D4C4B4A4949484746454443434241403F3E3E3D3C3B3A3A393837363635343333323130302F2E2D
2D2C2B2A2A29282827262525242323222121201F1F1E1D1D1C1C1B1A1A1918181717161615141413
1312121111100F0F0E0E0D0D0C0C0C0B0B0A0A090908080807070606060505050404040303030202
020201010101010000000000
>
<
00000000000000000000000001010101010101010101010101010101010101010101010102020202
02020202020202020202020202020202020202020202030303030303030303030303030303030303
03030303030303030303030303040404040404040404040404040404040404040404040404040404
04040404040404040404050505050505050505050505050505050505050505050505050505050505
050505050505050505050505
>
<
BFBFBFC0C0C0C0C0C0C0C0C0C1C1C1C1C1C1C1C1C1C2C2C2C2C2C2C2C2C2C2C3C3C3C3C3C3C3C3C3
C3C4C4C4C4C4C4C4C4C4C4C5C5C5C5C5C5C5C5C5C5C5C6C6C6C6C6C6C6C6C6C6C6C6C7C7C7C7C7C7
C7C7C7C7C7C7C8C8C8C8C8C8C8C8C8C8C8C8C8C9C9C9C9C9C9C9C9C9C9C9C9C9C9C9CACACACACACA
CACACACACACACACACACACBCBCBCBCBCBCBCBCBCBCBCBCBCBCBCBCBCBCBCBCBCBCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCC
>
0
1 %_Br
[
0.05 0.7 0 0 1 50 100 %_Bs
0 0.02 0.8 0 1 57 36 %_Bs
0.45 0 0.75 0 1 37 0 %_Bs
BD
%AI5_EndGradient
%AI5_BeginGradient: (Purple, Red & Yellow)
(Purple, Red & Yellow) 0 3 Bd
[
0
<
FFFEFDFCFBFAF9F8F7F6F5F4F3F2F1F0EFEEEDECEBEAE9E8E7E6E5E4E3E2E1E0DFDEDDDCDBDAD9D8
D7D6D5D4D3D2D1D0CFCECDCCCBCAC9C8C7C6C5C4C3C2C1C0BFBEBDBCBBBAB9B8B7B6B5B4B3B2B1B0
AFAEADACABAAA9A8A7A6A5A4A3A2A1A09F9E9D9C9B9A999897969594939291908F8E8D8C8B8A8988
87868584838281807F7E7D7C7B7A797877767574737271706F6E6D6C6B6A69686766656463626160
5F5E5D5C5B5A595857565554535251504F4E4D4C4B4A494847464544434241403F3E3D3C3B3A3938
37363534333231302F2E2D2C2B2A292827262524232221201F1E1D1C1B1A19181716151413121110
0F0E0D0C0B0A
>
<
CCCCCCCDCDCDCDCDCECECECECECFCFCFCFD0D0D0D0D0D1D1D1D1D1D2D2D2D2D2D3D3D3D3D3D4D4D4
D4D5D5D5D5D5D6D6D6D6D6D7D7D7D7D7D8D8D8D8D8D9D9D9D9DADADADADADBDBDBDBDBDCDCDCDCDC
DDDDDDDDDDDEDEDEDEDFDFDFDFDFE0E0E0E0E0E1E1E1E1E1E2E2E2E2E2E3E3E3E3E4E4E4E4E4E5E5
E5E5E5E6E6E6E6E6E7E7E7E7E7E8E8E8E8E9E9E9E9E9EAEAEAEAEAEBEBEBEBEBECECECECECEDEDED
EDEEEEEEEEEEEFEFEFEFEFF0F0F0F0F0F1F1F1F1F1F2F2F2F2F3F3F3F3F3F4F4F4F4F4F5F5F5F5F5
F6F6F6F6F6F7F7F7F7F8F8F8F8F8F9F9F9F9F9FAFAFAFAFAFBFBFBFBFBFCFCFCFCFDFDFDFDFDFEFE
FEFEFEFFFFFF
>
0
1 %_Br
<
E5E4E3E2E1E0DFDEDDDCDBDAD9D8D7D6D5D4D3D2D1D0CFCECDCCCBCAC9C8C7C6C5C4C3C2C1C0BFBE
BDBCBBBAB9B8B7B6B5B4B3B2B1B0AFAEADACABAAA9A8A7A6A5A4A3A2A1A09F9E9D9C9B9A99989796
9594939291908F8E8D8C8B8A898887868584838281807F7E7D7C7B7A797877767574737271706F6E
6D6C6B6A696867666564636261605F5E5D5C5B5A595857565554535251504F4E4D4C4B4A49484746
4544434241403F3E3D3C3B3A393837363534333231302F2E2D2C2B2A292827262524232221201F1E
1D1C1B1A191817161514131211100F0E0D0C0B0A09080706050403020100
>
<
E5E6E6E6E6E6E6E6E6E7E7E7E7E7E7E7E7E7E8E8E8E8E8E8E8E8E8E9E9E9E9E9E9E9E9E9EAEAEAEA
EAEAEAEAEAEBEBEBEBEBEBEBEBEBECECECECECECECECECEDEDEDEDEDEDEDEDEDEEEEEEEEEEEEEEEE
EEEFEFEFEFEFEFEFEFEFF0F0F0F0F0F0F0F0F0F1F1F1F1F1F1F1F1F1F2F2F2F2F2F2F2F2F2F3F3F3
F3F3F3F3F3F3F4F4F4F4F4F4F4F4F4F5F5F5F5F5F5F5F5F5F6F6F6F6F6F6F6F6F6F7F7F7F7F7F7F7
F7F7F8F8F8F8F8F8F8F8F8F9F9F9F9F9F9F9F9F9FAFAFAFAFAFAFAFAFAFBFBFBFBFBFBFBFBFBFCFC
FCFCFCFCFCFCFCFDFDFDFDFDFDFDFDFDFEFEFEFEFEFEFEFEFEFFFFFFFFFF
>
<
00010203040405060708090A0B0C0C0D0E0F10111213141415161718191A1B1C1D1D1E1F20212223
242525262728292A2B2C2D2D2E2F30313233343535363738393A3B3C3D3D3E3F4041424344454546
4748494A4B4C4D4E4E4F50515253545556565758595A5B5C5D5E5E5F60616263646566666768696A
6B6C6D6E6E6F70717273747576767778797A7B7C7D7E7F7F80818283848586878788898A8B8C8D8E
8F8F90919293949596979798999A9B9C9D9E9F9FA0A1A2A3A4A5A6A7A7A8A9AAABACADAEAFAFB0B1
B2B3B4B5B6B7B8B8B9BABBBCBDBEBFC0C0C1C2C3C4C5C6C7C8C8C9CACBCC
>
0
1 %_Br
[
0 0.04 1 0 1 50 100 %_Bs
0 1 0.8 0 1 50 50 %_Bs
0.9 0.9 0 0 1 50 0 %_Bs
BD
%AI5_EndGradient
%AI5_BeginGradient: (Rainbow)
(Rainbow) 0 6 Bd
[
<
FFFEFDFCFBFAF9F8F7F6F5F4F3F2F1F0EFEEEDECEBEAE9E8E7E6E5E4E3E2E1E0DFDEDDDCDBDAD9D8
D7D6D5D4D3D2D1D0CFCECDCCCBCAC9C8C7C6C5C4C3C2C1C0BFBEBDBCBBBAB9B8B7B6B5B4B3B2B1B0
AFAEADACABAAA9A8A7A6A5A4A3A2A1A09F9E9D9C9B9A999897969594939291908F8E8D8C8B8A8988
87868584838281807F7E7D7C7B7A797877767574737271706F6E6D6C6B6A69686766656463626160
5F5E5D5C5B5A595857565554535251504F4E4D4C4B4A494847464544434241403F3E3D3C3B3A3938
37363534333231302F2E2D2C2B2A292827262524232221201F1E1D1C1B1A19181716151413121110
0F0E0D0C0B0A09080706050403020100
>
1
0
0
1 %_Br
1
<
0708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F202122232425262728292A2B2C2D2E
2F303132333435363738393A3B3C3D3E3F404142434445464748494A4B4C4D4E4F50515253545556
5758595A5B5C5D5E5F606162636465666768696A6B6C6D6E6F707172737475767778797A7B7C7D7E
7F808182838485868788898A8B8C8D8E8F909192939495969798999A9B9C9D9E9FA0A1A2A3A4A5A6
A7A8A9AAABACADAEAFB0B1B2B3B4B5B6B7B8B9BABBBCBDBEBFC0C1C2C3C4C5C6C7C8C9CACBCCCDCE
CFD0D1D2D3D4D5D6D7D8D9DADBDCDDDEDFE0E1E2E3E4E5E6E7E8E9EAEBECEDEEEFF0F1F2F3F4F5F6
F7F8F9FAFBFCFDFEFF
>
0
0
1 %_Br
1
<
00000000000000000000000000000000000001010101010101010101010101010101010101010101
01010101010101010101010101010202020202020202020202020202020202020202020202020202
02020202020202020202030303030303030303030303030303030303030303030303030303030303
03030303030304040404040404040404040404040404040404040404040404040404040404040404
04040505050505050505050505050505050505050505050505050505050505050505050505050606
06060606060606060606060606060606060606060606060606060606060606060606070707070707
07070707070707070707070707070707
>
<
FFFEFDFCFBFAF9F8F7F6F5F4F3F2F1F0EFEEEDECEBEAE9E8E7E6E5E4E3E2E1E0DFDEDDDCDBDAD9D8
D7D6D5D4D3D2D1D0CFCECDCCCBCAC9C8C7C6C5C4C3C2C1C0BFBEBDBCBBBAB9B8B7B6B5B4B3B2B1B0
AFAEADACABAAA9A8A7A6A5A4A3A2A1A09F9E9D9C9B9A999897969594939291908F8E8D8C8B8A8988
87868584838281807F7E7D7C7B7A797877767574737271706F6E6D6C6B6A69686766656463626160
5F5E5D5C5B5A595857565554535251504F4E4D4C4B4A494847464544434241403F3E3D3C3B3A3938
37363534333231302F2E2D2C2B2A292827262524232221201F1E1D1C1B1A19181716151413121110
0F0E0D0C0B0A09080706050403020100
>
0
1 %_Br
<
000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F2021222324252627
28292A2B2C2D2E2F303132333435363738393A3B3C3D3E3F404142434445464748494A4B4C4D4E4F
505152535455565758595A5B5C5D5E5F606162636465666768696A6B6C6D6E6F7071727374757677
78797A7B7C7D7E7F808182838485868788898A8B8C8D8E8F909192939495969798999A9B9C9D9E9F
A0A1A2A3A4A5A6A7A8A9AAABACADAEAFB0B1B2B3B4B5B6B7B8B9BABBBCBDBEBFC0C1C2C3C4C5C6C7
C8C9CACBCCCDCECFD0D1D2D3D4D5D6D7D8D9DADBDCDDDEDFE0E1E2E3E4E5E6E7E8E9EAEBECEDEEEF
F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF
>
0
1
0
1 %_Br
0
<
FFFEFDFCFBFAF9F8F7F6F5F4F3F2F1F0EFEEEDECEBEAE9E8E7E6E5E4E3E2E1E0DFDEDDDCDBDAD9D8
D7D6D5D4D3D2D1D0CFCECDCCCBCAC9C8C7C6C5C4C3C2C1C0BFBEBDBCBBBAB9B8B7B6B5B4B3B2B1B0
AFAEADACABAAA9A8A7A6A5A4A3A2A1A09F9E9D9C9B9A999897969594939291908F8E8D8C8B8A8988
87868584838281807F7E7D7C7B7A797877767574737271706F6E6D6C6B6A69686766656463626160
5F5E5D5C5B5A595857565554535251504F4E4D4C4B4A494847464544434241403F3E3D3C3B3A3938
37363534333231302F2E2D2C2B2A292827262524232221201F1E1D1C1B1A19181716151413121110
0F0E0D0C0B0A09080706050403020100
>
1
0
1 %_Br
[
0 1 0 0 1 50 100 %_Bs
1 1 0 0 1 50 80 %_Bs
1 0.0279 0 0 1 50 60 %_Bs
1 0 1 0 1 50 40 %_Bs
0 0 1 0 1 50 20 %_Bs
0 1 1 0 1 50 0 %_Bs
BD
%AI5_EndGradient
%AI5_BeginGradient: (Steel Bar)
(Steel Bar) 0 3 Bd
[
<
FFFEFDFCFBFAF9F8F7F6F5F4F3F2F1F0EFEEEDECEBEAE9E8E7E6E5E4E3E2E1E0DFDEDDDCDBDAD9D8
D7D6D5D4D3D2D1D0CFCECDCCCBCAC9C8C7C6C5C4C3C2C1C0BFBEBDBCBBBAB9B8B7B6B5B4B3B2B1B0
AFAEADACABAAA9A8A7A6A5A4A3A2A1A09F9E9D9C9B9A999897969594939291908F8E8D8C8B8A8988
87868584838281807F7E7D7C7B7A797877767574737271706F6E6D6C6B6A69686766656463626160
5F5E5D5C5B5A595857565554535251504F4E4D4C4B4A494847464544434241403F3E3D3C3B3A3938
37363534333231302F2E2D2C2B2A292827262524232221201F1E1D1C1B1A19181716151413121110
0F0E0D0C0B0A09080706050403020100
>
0 %_Br
<
000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F2021222324252627
28292A2B2C2D2E2F303132333435363738393A3B3C3D3E3F404142434445464748494A4B4C4D4E4F
505152535455565758595A5B5C5D5E5F606162636465666768696A6B6C6D6E6F7071727374757677
78797A7B7C7D7E7F808182838485868788898A8B8C8D8E8F909192939495969798999A9B9C9D9E9F
A0A1A2A3A4A5A6A7A8A9AAABACADAEAFB0B1B2B3B4B5B6B7B8B9BABBBCBDBEBFC0C1C2C3C4C5C6C7
C8C9CACBCCCDCECFD0D1D2D3D4D5D6D7D8D9DADBDCDDDEDFE0E1E2E3E4E5E6E7E8E9EAEBECEDEEEF
F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF
>
0 %_Br
[
0 0 50 100 %_Bs
1 0 50 70 %_Bs
0 0 50 0 %_Bs
BD
%AI5_EndGradient
%AI5_BeginGradient: (Yellow & Orange Radial)
(Yellow & Orange Radial) 1 2 Bd
[
0
<
0001010203040506060708090A0B0C0C0D0E0F10111213131415161718191A1B1C1D1D1E1F202122
232425262728292A2B2B2C2D2E2F303132333435363738393A3B3C3D3E3E3F404142434445464748
494A4B4C4D4E4F505152535455565758595A5B5C5D5E5F60606162636465666768696A6B6C6D6E6F
707172737475767778797A7B7C7D7E7F808182838485868788898A8B8C
>
<
FFFFFFFFFEFEFEFEFEFEFEFDFDFDFDFDFDFCFCFCFCFCFCFBFBFBFBFBFBFAFAFAFAFAFAF9F9F9F9F9
F9F8F8F8F8F8F8F7F7F7F7F7F7F6F6F6F6F6F6F5F5F5F5F5F5F4F4F4F4F4F3F3F3F3F3F3F2F2F2F2
F2F2F1F1F1F1F1F0F0F0F0F0F0EFEFEFEFEFEFEEEEEEEEEEEDEDEDEDEDEDECECECECECEBEBEBEBEB
EBEAEAEAEAEAE9E9E9E9E9E9E8E8E8E8E8E8E7E7E7E7E7E6E6E6E6E6E5
>
0
1 %_Br
[
0 0 1 0 1 52 19 %_Bs
0 0.55 0.9 0 1 50 100 %_Bs
BD
%AI5_EndGradient
%AI5_BeginGradient: (Yellow & Purple Radial)
(Yellow & Purple Radial) 1 2 Bd
[
<
000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F2021222324252627
28292A2B2C2D2E2F303132333435363738393A3B3C3D3E3F404142434445464748494A4B4C4D4E4F
505152535455565758595A5B5C5D5E5F606162636465666768696A6B6C6D6E6F7071727374757677
78797A7B7C7D7E7F808182838485868788898A8B8C8D8E8F909192939495969798999A9B9C9D9E9F
A0A1A2A3A4A5A6A7A8A9AAABACADAEAFB0B1B2B3B4B5B6B7B8B9BABBBCBDBEBFC0C1C2C3C4C5C6C7
C8C9CACBCCCDCECFD0D1D2D3D4D5D6D7D8D9DADBDCDDDEDFE0E1E2E3E4E5E6E7E8E9EAEBECEDEEEF
F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF
>
<
1415161718191A1B1C1D1E1F1F202122232425262728292A2A2B2C2D2E2F30313233343536363738
393A3B3C3D3E3F40414142434445464748494A4B4C4D4D4E4F50515253545556575858595A5B5C5D
5E5F60616263646465666768696A6B6C6D6E6F6F707172737475767778797A7B7B7C7D7E7F808182
83848586868788898A8B8C8D8E8F90919292939495969798999A9B9C9D9D9E9FA0A1A2A3A4A5A6A7
A8A9A9AAABACADAEAFB0B1B2B3B4B4B5B6B7B8B9BABBBCBDBEBFC0C0C1C2C3C4C5C6C7C8C9CACBCB
CCCDCECFD0D1D2D3D4D5D6D7D7D8D9DADBDCDDDEDFE0E1E2E2E3E4E5E6E7E8E9EAEBECEDEEEEEFF0
F1F2F3F4F5F6F7F8F9F9FAFBFCFDFEFF
>
<
ABAAAAA9A8A7A7A6A5A5A4A3A3A2A1A1A09F9F9E9D9D9C9B9B9A9999989797969595949393929191
908F8F8E8D8D8C8B8B8A8989888787868585848383828181807F7F7E7D7D7C7B7B7A797978777776
7575747373727171706F6F6E6D6D6C6B6B6A6969686767666565646362626160605F5E5E5D5C5C5B
5A5A5958585756565554545352525150504F4E4E4D4C4C4B4A4A4948484746464544444342424140
403F3E3E3D3C3C3B3A3A3938383736363534343332323130302F2E2E2D2C2C2B2A2A292828272626
25242423222121201F1F1E1D1D1C1B1B1A1919181717161515141313121111100F0F0E0D0D0C0B0B
0A090908070706050504030302010100
>
0
1 %_Br
[
0 0.08 0.67 0 1 50 14 %_Bs
1 1 0 0 1 50 100 %_Bs
BD
%AI5_EndGradient
%AI5_End_NonPrinting--
%AI5_BeginPalette
0 2 Pb
Pn
Pc
1 g
Pc
0 g
Pc
0 0 0 0 k
Pc
0.75 g
Pc
0.5 g
Pc
0.25 g
Pc
0 g
Pc
Bb
2 (Black & White) -4014 4716 0 0 1 0 0 1 0 0 Bg
0 BB
Pc
0.25 0 0 0 k
Pc
0.5 0 0 0 k
Pc
0.75 0 0 0 k
Pc
1 0 0 0 k
Pc
0.25 0.25 0 0 k
Pc
0.5 0.5 0 0 k
Pc
0.75 0.75 0 0 k
Pc
1 1 0 0 k
Pc
Bb
2 (Pink, Yellow, Green) -4014 4716 0 0 1 0 0 1 0 0 Bg
0 BB
Pc
0 0.25 0 0 k
Pc
0 0.5 0 0 k
Pc
0 0.75 0 0 k
Pc
0 1 0 0 k
Pc
0 0.25 0.25 0 k
Pc
0 0.5 0.5 0 k
Pc
0 0.75 0.75 0 k
Pc
0 1 1 0 k
Pc
Bb
0 0 0 0 Bh
2 (Yellow & Purple Radial) -4014 4716 0 0 1 0 0 1 0 0 Bg
0 BB
Pc
0 0 0.25 0 k
Pc
0 0 0.5 0 k
Pc
0 0 0.75 0 k
Pc
0 0 1 0 k
Pc
0.25 0 0.25 0 k
Pc
0.5 0 0.5 0 k
Pc
0.75 0 0.75 0 k
Pc
1 0 1 0 k
Pc
Bb
2 (Rainbow) -4014 4716 0 0 1 0 0 1 0 0 Bg
0 BB
Pc
0.25 0.125 0 0 k
Pc
0.5 0.25 0 0 k
Pc
0.75 0.375 0 0 k
Pc
1 0.5 0 0 k
Pc
0.125 0.25 0 0 k
Pc
0.25 0.5 0 0 k
Pc
0.375 0.75 0 0 k
Pc
0.5 1 0 0 k
Pc
Bb
2 (Steel Bar) -4014 4716 0 0 1 0 0 1 0 0 Bg
0 BB
Pc
0 0.25 0.125 0 k
Pc
0 0.5 0.25 0 k
Pc
0 0.75 0.375 0 k
Pc
0 1 0.5 0 k
Pc
0 0.125 0.25 0 k
Pc
0 0.25 0.5 0 k
Pc
0 0.375 0.75 0 k
Pc
0 0.5 1 0 k
Pc
Bb
2 (Purple, Red & Yellow) -4014 4716 0 0 1 0 0 1 0 0 Bg
0 BB
Pc
0.125 0 0.25 0 k
Pc
0.25 0 0.5 0 k
Pc
0.375 0 0.75 0 k
Pc
0.5 0 1 0 k
Pc
0.25 0 0.125 0 k
Pc
0.5 0 0.25 0 k
Pc
0.75 0 0.375 0 k
Pc
1 0 0.5 0 k
Pc
Bb
2 (Green & Blue) -4014 4716 0 0 1 0 0 1 0 0 Bg
0 BB
Pc
0.25 0.125 0.125 0 k
Pc
0.5 0.25 0.25 0 k
Pc
0.75 0.375 0.375 0 k
Pc
1 0.5 0.5 0 k
Pc
0.25 0.25 0.125 0 k
Pc
0.5 0.5 0.25 0 k
Pc
0.75 0.75 0.375 0 k
Pc
1 1 0.5 0 k
Pc
Bb
0 0 0 0 Bh
2 (Yellow & Orange Radial) -4014 4716 0 0 1 0 0 1 0 0 Bg
0 BB
Pc
0.125 0.25 0.125 0 k
Pc
0.25 0.5 0.25 0 k
Pc
0.375 0.75 0.375 0 k
Pc
0.5 1 0.5 0 k
Pc
0.125 0.25 0.25 0 k
Pc
0.25 0.5 0.5 0 k
Pc
0.375 0.75 0.75 0 k
Pc
0.5 1 1 0 k
Pc
0 0 0 0 k
Pc
0.125 0.125 0.25 0 k
Pc
0.25 0.25 0.5 0 k
Pc
0.375 0.375 0.75 0 k
Pc
0.5 0.5 1 0 k
Pc
0.25 0.125 0.25 0 k
Pc
0.5 0.25 0.5 0 k
Pc
0.75 0.375 0.75 0 k
Pc
1 0.5 1 0 k
Pc
PB
%AI5_EndPalette
%%EndSetup
%AI5_BeginLayer
1 1 1 1 0 0 0 79 128 255 Lb
(Layer 1) Ln
0 A
1 Ap
0 O
0.5 g
0 R
0 G
800 Ar
0 J 0 j 1 w 4 M []0 d
%AI3_Note:
0 D
0 XR
346.8577 544.1069 m
364.3316 564.6893 L
197.3859 706.4224 L
179.912 685.8401 L
346.8577 544.1069 L
b
0 Ap
[4 ]0 d
261.0277 631.2915 m
297.1186 601.7046 l
B
u
0 g
[]0 d
247.9361 520.771 m
364.4269 657.993 l
B
365.1742 660.4569 m
363.7237 659.5196 362.6775 658.9981 361.4132 658.3314 c
365.2587 655.0669 l
365.3943 655.5746 366.056 657.5396 366.7456 659.1229 c
367.4831 660.8178 368.2428 662.2776 368.8168 663.1555 c
368.0438 662.4465 366.7268 661.4599 365.1742 660.4569 c
f
U
0 R
0 G
[3 ]0 d
246.3539 522.7701 m
224.5695 541.9192 l
S
u
[]0 d
238.075 541.6039 m
286.6128 598.7797 l
S
0 O
0 g
287.3601 601.2436 m
285.9096 600.3063 284.8635 599.7848 283.5991 599.1181 c
287.4446 595.8536 l
287.5802 596.3613 288.2419 598.3264 288.9316 599.9096 c
289.669 601.6045 290.4287 603.0643 291.0027 603.9421 c
290.2297 603.2332 288.9127 602.2466 287.3601 601.2436 c
f
237.3277 539.1399 m
238.7783 540.0772 239.8244 540.5988 241.0887 541.2654 c
237.2432 544.53 l
237.1076 544.0223 236.4459 542.0572 235.7563 540.4739 c
235.0188 538.779 234.2592 537.3192 233.6851 536.4414 c
234.4581 537.1503 235.7751 538.1369 237.3277 539.1399 c
f
U
1 Ap
0 0 0 0 k
0 R
0 0 0 0 K
377.8748 567.2164 m
367.5832 575.9532 L
335.5482 538.2171 L
345.8399 529.4803 L
377.8748 567.2164 L
b
214.8266 706.9992 m
204.535 715.736 L
172.5 678 L
182.7917 669.2632 L
214.8266 706.9992 L
b
0.5 g
0 G
417.4107 321.4586 m
434.5617 342.3108 L
265.4271 481.4242 L
248.2761 460.572 L
417.4107 321.4586 L
b
382.2276 274.2196 m
399.3786 295.0718 L
230.244 434.1852 L
213.093 413.333 L
382.2276 274.2196 L
b
270.6572 139.6125 m
287.8081 160.4647 L
118.6735 299.5781 L
101.5226 278.7259 L
270.6572 139.6125 L
b
307.4424 187.422 m
324.5934 208.2743 L
155.4588 347.3877 L
138.3078 326.5354 L
307.4424 187.422 L
b
344.7969 229.6922 m
361.9479 250.5444 L
192.8133 389.6579 L
175.6623 368.8056 L
344.7969 229.6922 L
b
0 0 0 0 k
0 0 0 0 K
457.6706 362.222 m
449.561 368.8917 L
251.3761 127.922 L
259.4856 121.2523 L
457.6706 362.222 L
b
283.3214 495.97 m
275.2118 502.6397 L
77.0269 261.67 L
85.1364 255.0003 L
283.3214 495.97 L
b
u
0 Ap
0 g
0 G
305.2162 518.7115 m
328.5144 546.1559 l
B
329.2617 548.6199 m
327.8111 547.6825 326.765 547.161 325.5006 546.4943 c
329.3461 543.2298 l
329.4817 543.7375 330.1434 545.7026 330.8331 547.2858 c
331.5705 548.9808 332.3302 550.4406 332.9042 551.3184 c
332.1312 550.6095 330.8142 549.6228 329.2617 548.6199 c
b
U
u
358.5653 581.0398 m
384.3173 611.3748 l
B
357.818 578.5759 m
359.2685 579.5132 360.3146 580.0347 361.579 580.7014 c
357.7335 583.9659 l
357.5979 583.4582 356.9362 581.4931 356.2466 579.9099 c
355.5091 578.2149 354.7494 576.7551 354.1754 575.8773 c
354.9484 576.5862 356.2654 577.5729 357.818 578.5759 c
f
U
u
0 R
0 G
231.9761 268.2405 m
279.6167 326.1659 l
S
0 O
0 g
280.3255 328.6412 m
278.8897 327.6814 277.8518 327.1436 276.598 326.4573 c
280.494 323.2531 l
280.6216 323.7629 281.2526 325.738 281.9175 327.3318 c
282.6284 329.0381 283.3653 330.5095 283.9255 331.3962 c
283.1637 330.6753 281.8622 329.6683 280.3255 328.6412 c
f
231.2673 265.7652 m
232.703 266.725 233.7409 267.2628 234.9947 267.9491 c
231.0988 271.1533 l
230.9711 270.6435 230.3401 268.6684 229.6752 267.0746 c
228.9643 265.3684 228.2274 263.8969 227.6672 263.0102 c
228.429 263.7311 229.7305 264.7382 231.2673 265.7652 c
f
U
0 R
0 G
[3 ]0 d
401.4348 276.2838 m
379.2945 294.4931 l
S
364.4732 230.5885 m
342.393 249.3957 l
S
u
[]0 d
363.6595 242.5869 m
389.4913 273.9954 l
S
0 O
0 g
390.2001 276.4706 m
388.7643 275.5108 387.7265 274.9731 386.4727 274.2868 c
390.3686 271.0826 l
390.4962 271.5923 391.1272 273.5675 391.7921 275.1613 c
392.5031 276.8675 393.2399 278.339 393.8002 279.2256 c
393.0383 278.5047 391.7368 277.4977 390.2001 276.4706 c
f
362.9507 240.1116 m
364.3864 241.0714 365.4243 241.6092 366.6781 242.2955 c
362.7822 245.4997 l
362.6545 244.9899 362.0235 243.0148 361.3587 241.421 c
360.6477 239.7148 359.9109 238.2433 359.3506 237.3566 c
360.1125 238.0775 361.4139 239.0846 362.9507 240.1116 c
f
U
u
u
0 R
0 G
393.3086 313.2054 m
400.9311 322.4735 l
S
0 O
0 g
401.6399 324.9487 m
400.2041 323.9889 399.1663 323.4512 397.9125 322.7648 c
401.8084 319.5607 l
401.9361 320.0704 402.567 322.0456 403.2319 323.6394 c
403.9429 325.3456 404.6797 326.8171 405.24 327.7037 c
404.4781 326.9828 403.1766 325.9758 401.6399 324.9487 c
f
U
U
u
0 R
0 G
441.2524 370.4499 m
430.6656 357.5776 l
B
429.9568 355.1023 m
431.3925 356.0621 432.4304 356.5998 433.6842 357.2862 c
429.7883 360.4904 l
429.6606 359.9806 429.0296 358.0054 428.3647 356.4116 c
427.6538 354.7054 426.9169 353.234 426.3567 352.3473 c
427.1185 353.0682 428.42 354.0752 429.9568 355.1023 c
f
U
0 To
1 0 0 1 120 469.5 0 Tp
TP
0 Tr
/_Times-Roman 18 Tf
0 Ts
100 Tz
0 Tt
1 TA
%_ 0 XL
36 0 Xb
XB
0 0 5 TC
100 100 200 TW
0 0 0 Ti
0 Ta
0 0 2 2 3 Th
0 Tq
0 0 Tl
0 Tc
0 Tw
(OR) Tx
(\r) TX
TO
0 To
1 0 0 1 367.5 667.5 0 Tp
TP
0 Tr
/_Times-Italic 21 Tf
(k) Tx
/_Times-Roman 21 Tf
(/|) Tx
/_Times-Italic 21 Tf
(k ) Tx
/_Times-Roman 21 Tf
(|) Tx
(\r) TX
TO
0 To
1 0 0 1 366 556.5 0 Tp
TP
0 Tr
(2 ) Tx
/_Symbol 21 Tf
(W) Tx
/_Times-Roman 21 Tf
(\(|) Tx
/_Times-Italic 21 Tf
(k) Tx
/_Times-Roman 21 Tf
-6 Ts
( ) Tx
0 Ts
(|\)) Tx
/_Times-Roman 18 Tf
6 Ts
(-1) Tx
/_Times-Roman 21 Tf
0 Ts
(|) Tx
/_Times-Italic 21 Tf
(k) Tx
/_Times-Roman 21 Tf
( |) Tx
/_Times-Roman 18 Tf
8 Ts
(-1) Tx
(\r) TX
TO
0 To
1 0 0 1 18 606 0 Tp
TP
0 Tr
/_Symbol 21 Tf
0 Ts
(\r) TX
TO
0 To
1 0 0 1 430.5 325.5 0 Tp
TP
0 Tr
/_Times-Roman 21 Tf
(2 ) Tx
/_Symbol 21 Tf
(W) Tx
/_Times-Roman 21 Tf
(\(|) Tx
/_Times-Italic 21 Tf
(k) Tx
/_Times-Roman 21 Tf
-6 Ts
( ) Tx
0 Ts
(|\)) Tx
/_Times-Roman 18 Tf
6 Ts
(-1) Tx
/_Times-Roman 21 Tf
0 Ts
(|) Tx
/_Times-Italic 21 Tf
(k) Tx
/_Times-Roman 21 Tf
( |) Tx
/_Times-Roman 18 Tf
8 Ts
(-1) Tx
(\r) TX
TO
u
0 To
1 0 0 1 383 240 0 Tp
TP
0 Tr
/_Times-Roman 21 Tf
0 Ts
(  /|) Tx
/_Times-Italic 21 Tf
(k) Tx
/_Times-Roman 21 Tf
( |) Tx
(\r) TX
TO
0 R
0 G
386.7399 237.4968 m
380.0649 257.5196 398.4184 266.9485 383.0087 239.0954 c
S
U
u
0 To
1 0 0 1 216.3484 574.3812 0 Tp
TP
0 Tr
0 O
0 g
(  /|) Tx
/_Times-Italic 21 Tf
(k) Tx
/_Times-Roman 21 Tf
( |) Tx
(\r) TX
TO
0 R
0 G
220.0883 571.878 m
213.4132 591.9008 231.7667 601.3297 216.3571 573.4766 c
S
U
0 O
0 g
115.3333 466.6667 m
157.3333 466.6667 l
F
LB
%AI5_EndLayer--
%%PageTrailer
gsave annotatepage grestore showpage
%%Trailer
Adobe_Illustrator_AI5 /terminate get exec
Adobe_ColorImage_AI6 /terminate get exec
Adobe_typography_AI5 /terminate get exec
Adobe_level2_AI5 /terminate get exec
%%EOF
%\end{filecontents*}
\documentclass[11pt]{amsart}
\usepackage{amssymb}
\input epsf
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newtheorem{theorem}{Theorem}[section]
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{proposition}[theorem]{Proposition}
\theoremstyle{definition}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{example}[theorem]{Example}
% \newtheorem{xca}[theorem]{Exercise}
\theoremstyle{remark}
\newtheorem{remark}[theorem]{Remark}
\newtheorem{exercise}[theorem]{Exercise}
\numberwithin{equation}{section}
%%%%  Author macros  %%%%%%%%%%%%%%%%%%%
\def\ds{\displaystyle}
\def\ep{\varepsilon}
\def\A{{\mathcal A}}
\def\B{{\mathcal B}}
\def\C{{\mathcal C}}
\def\D{{\mathcal D}}
\def\E{{\mathcal E}}
\def\F{{\mathcal F}}
\def\G{{\mathcal G}}
\def\H{{\mathcal H}}
\def\I{{\mathcal I}}
\def\K{{\mathcal K}}
\def\L{{\mathcal L}}
\def\M{{\mathcal M}}
\def\N{{\mathcal N}}
\def\S{{\mathcal S}}
\def\U{{\mathcal U}}
\def\Tau{{\mathcal T}}
\def\cee{{\mathbf C}}
\def\PP{{\mathbf P}}

\def\que{{\mathbf Q}}
\def\RR{{\mathbf R}}
\def\TT{{\mathbf T}}
\def\zed{{\mathbf Z}}

\def\nat{{\mathbf N}}
\def\integer{{\mathbf Z}}
\def\rational{{\mathbf Q}}
\def\real{{\mathbf R}}
\def\complex{{\mathbf C}}
\def\torus{{\mathbf T}}

\def\dist{\operatorname{dist}}
\def\div{\operatorname{div}}
\def\Id{\operatorname{Id}}
\def\Spec{\operatorname{Spec}}
\def\Lip{\operatorname{Lip}}
\def\Im{\operatorname{Im}}
\def\Area{\operatorname{Area}}
\def\NORM{{\|\enspace\|}}
\def\modulo#1{\,({\rm mod}\, #1)}
\def\fhat{\hat{f}}
\def\hhat{\hat{h}}
\def\sigmap{{\sigma'}}

\def\d{{\rm d}}
\def\i{{\rm i}}
\def\RHS{ R.H.S.\ }
%\renewcommand{\kappa}{k}


%%%%  Temporary commands - N. Petrov  %%%%%%%%
%\newcommand{\mar}[1]{{\marginpar{\small #1}}}
\newcommand{\mar}[1]{}
%
%\newcommand{\NPnote}[1]{\footnote{#1}}
%\newcommand{\NPnote}[1]{}
%
%\renewcommand{\cite}[1]{{[#1]}}
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%  Body of paper  %%%%%%%%%%%%%%%%%%%

\begin{document}

\title{A tutorial on KAM theory}

%   Information for first author
\author{Rafael de la Llave}
\address{Department of Mathematics, The University of Texas at Austin, 
Austin, TX 78712-1082}
\email{llave@math.utexas.edu}

\subjclass{Primary 37J40; }
\keywords{KAM theory, stability, Perturbation theory,quasiperiodic orbits,Hamiltonian systems}
\begin{abstract}

This is a tutorial on some of the main ideas in KAM theory. 
The goal is to present the background and to explain and 
compare somewhat informally some of the main methods of proof.

It is an expanded version of the lectures given by the author 
in the Summer Research Institute on {\sl Smooth Ergodic Theory} 
Seattle, 1999.   The style is pedagogical and expository 
and it only aims to be an introduction to the primary literature.
It does not aim to be a systematic survey nor to present 
full proofs.
\end{abstract} 


\maketitle

\tableofcontents

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{Introduction}

The goal of these lectures is to present 
an introduction to some of  the main ideas involved in 
KAM theory on the persistence of quasiperiodic motions under 
perturbations. The name comes  from the initials of 
A. N. Kolmogorov, V. I.  Arnol'd and J. Moser who initiated 
the theory. See \cite{Kolmogorov79},  \cite{Arnold63a},
\cite{Arnold63b}, \cite{Moser62}, \cite{Moser66a},
\cite{Moser66b} for the original papers.

By now, it is a full fledged theory and it 
provides a systematic tool for the analysis of many 
dynamical systems and it also has relations with 
other areas of analysis.

 The conclusions of the theory are, roughly, that 
in $C^k$ -- $k$ rather high depending on the dimension -- 
open sets of  
of dynamical systems  satisfying some geometric properties 
-- e.g. Hamiltonian, volume preserving, reversible, etc. --  
there are sets of positive measure covered by
invariant tori.  In particular, since sets with 
a positive measure of invariant tori is incompatible 
with ergodicity, we conclude that  for the systems 
mentioned above, ergodicity 
cannot be  a $C^k$ generic property
\cite{MarkusM74}.

Of course, the existence of 
the quasiperiodic orbits, has many other 
consequences besides preventing  ergodicity. 
The invariant tori are
important landmarks 
that guide the motion. 

Besides its applications to mechanics, dynamical systems 
and ergodic theory, KAM theory 
has grown enormously and has very interesting ramifications
in dynamical systems and in Analysis.
For example, averaging theory gives rise to 
estimates for very long times valid for all
initial conditions, one can use partially 
hyperbolic tori to show existence of orbits that escape. 
On the analytical side, the theory
leads to functional analysis methods that can be used to 
solve a variety of functional equations, many of which have 
interest in ergodic theory and in related disciplines
such as differential geometry.


There already exist excellent surveys, 
systematic expositions 
and tutorials of KAM theory. 

We quote in chronological order:
\cite{Arnold63b}, 
\cite{Moser66a,Moser66b,Moser73}, 
\cite{ArnoldA68},
\cite{Russman70},
\cite{Russman72},
\cite{Zehnder75,Zehnder76}, 
\cite{Douady82a},
\cite{Bost86}, 
\cite{Salamon86}, 
\cite{Poschel}, 
\cite{Poschel92}
\cite{Yoccoz92b}, 
\cite{ArnoldKN93},
\cite{Llave93}. 
\cite{BroerHS96}, 
\cite{Gallavotti1}, 
\cite{Gallavotti2}, 
\cite{Wayne96},
\cite{Russmann98},
and
\cite{Marmi99}.


Hence, one has to justify the effort in writing
and reading yet another exposition.

I decided that each of the surveys above has picked up 
a particular point of view and tried to either present 
a large part of KAM theory from this point of 
view  or to provide a particularly enlightening 
example. 

Given the high quality of all  (but one) of 
the above  surveys and tutorials, there seems to be little 
point in trying to achieve the same goals. 
Therefore, rather than presenting 
a point of view with full 
proofs, this tutorial will have only the more
modest goal of summarizing some of the
main ideas entering into KAM theory and 
describing and comparing
the main points of view.
Therefore,
it is not a substitute for the 
full papers we reference.

One of the disadvantages of covering 
such wide ground is that the presentation will have to be 
sketchy at some points.  Hopefully, we have flagged 
a good fraction of these sketchy points and 
referred to the relevant literature.
I would be happy if these lectures provide a road map
(necessarily omitting important details) 
of a fraction of  the literature 
that encourages somebody to enter into the field. Needless to say, 
this is not a survey and we have not made any  attempt to 
be systematic nor to reach the forefront of research.


It should be kept in mind that KAM theory has 
experienced   spectacular pro\-gress in recent  years
and that it is a very active area of research. 

\subsection{Some recent developments} \label{newdevelopments}

Let me mention some of these new developments (in no particular order and, 
with no claim of completeness of the list and omitting classical results
-- i.e. more that 15 years old --).
They will not be covered in the lectures, which will be 
concerned only with the most classical results. 

The novice that is reading the paper to
get initiated to KAM theory is encouraged to skip it for the moment 
and only come back to it as suggestions for future reading. 
Of course, the experts will notice  many omissions. The only point 
we are trying to make is that the theory is still finding exciting
results and that there is work to do.


\begin{itemize}
\item The ``lack of parameters" which was 
considered inaccessible 
has been solved very elegantly
\cite{JorbaS92}, 
\cite{Eliasson88a}.
(See \cite{BroerHS96} for a recent survey,  and also
\cite{BroerHS96b} 
\cite{Sevryuk99}.)
This has lead to remarkable progress in the 
existence of lower dimensional tori, specially elliptic 
tori -- a theory of hyperbolic tori has been known for 
a long time --
(see e.g.  \cite{JorbaV97a}, \cite{JorbaV97b}.)
\item As a corollary of this, one can get a 
reasonable KAM theory for volume preserving systems 
getting tori of codimension one. Hence blocking diffusion in many 
problems in hydrodynamics, etc. 
(See \cite{ChengS89}, \cite{BroerHS96}, \cite{DelshamsL90}, 
\cite{Xia92}, \cite{Yoccoz92b}.)

\item
The
KAM theory for infinite dimensional systems has made remarkable progress.

Note that in infinite dimensional systems,  the most
interesting tori are of lower dimension than the number of 
degrees of freedom.  

The subject of infinite dimensional KAM by itself 
would require a  review of its own  longer than these 
notes.
We just refer to \cite{CraigW93}, \cite{CraigW94},
\cite{Poschel96},
\cite{Bourgain95},
\cite{Bourgain00},
and \cite{Kuksin93} as representative references,
where the interested reader can find further references.


\item  Many systems in applications -- e.g in statistical 
mechanics -- have the structure that they consists of 
arrays of systems connected by local couplings.   
\footnote{
These systems, under the name of {\sl coupled lattice maps}
have also been the  subject of very intense research 
when they have hyperbolicity properties, in some sense 
opposite to the situation considered in KAM theory}
For these systems one can take advantage of this 
structure and develop a more efficient KAM theory  
than the simple  application of the general results. 
\cite{Wayne84}, \cite{Poschel90}
\cite{FrolichSW86}.  Other KAM 
methods for these systems are 
developed in \cite{AlbaneseFS88a} 
and \cite{albaneseFS88b}, \cite{AlbaneseF91}
which consider the existence of periodic solutions.
See also \cite{Wayne86} for
an Nekhoroshev theorem for these systems.
Conjectures and preliminary estimates (a challenge for 
rigorous proofs)  on these systems can 
be found in 
\cite{BenettinGG85a}, 
\cite{BenettinGG85b}, \cite{HaroL00},\cite{CassettiCPC97}.


\item The non-degeneracy conditions needed for KAM theorems
have been greatly weakened \cite{Russmann90}, \cite{ChengS94},
\cite{Russmann98}.
See also \cite{BroerHS96},  \cite{BroerHS96b},\cite{Sevryuk95}, \cite{Sevryuk96}.

\item Modern techniques of PDE's such as viscosity 
solutions have been used to study the 
Hamilton-Jacobi equation \cite{Lions82}, \cite{CrandallL83}, 
\cite{CrandallEL84},
leading to a weak version of KAM theory 
that has deep relations with Aubry-Mather theory
\cite{Fathi97a}, \cite{Fathi97b}.

\item There has been 
quite spectacular progress in the  
problem of {\sl reducibility} of linear equations 
with quasiperiodic  coefficients 
(That is, the study whether an equation with the form 
$\dot x = A( \phi + \omega t) x $ 
where $A \rightarrow \torus^d \M_{n \times n}$ and 
$\omega \in \real^d$ is an irrational vector.
can be transformed into constant coefficients. 
After the original work of \cite{DinaburgS75},
two important  recent developments were 
\cite{MoserP84} which introduced the deep idea of 
using transformations which are not close to the identity
to eliminate small terms and \cite{Rychlik92} which introduced
a renormalization mechanism.
After that, many more new important refinements 
were introduced in several works (one needs to 
find ways to combine perturbative steps with non-perturbative 
ones). This is still a very active area and progress is
being made constantly. We refer to the lectures of 
prof. Eliasson in this volume for up to date references. 
See also \cite{Eliasson98},\cite{Krikorian99a}.

\item The problem of reducibility is related to the problem of 
existence of pure point spectrum of one-dimensional 
Schr\"odinger operators with quasiperiodic coefficients.
This area has experienced quite significant progress. 
Besides some of the papers mentioned in the previous 
paragraphs, let us mention 
\cite{ChulaevskyS91},
\cite{FrolichSW90},
\cite{Eliasson97}.

\item For Schr\"odinger operators in higher dimensions
with random or
quasi-periodic potential the theory of localization also 
has advanced greatly thanks to a multi-scale analysis which is 
quite reminiscent of KAM theory \cite{FrolichS83},\cite{FrolichS84}.
Indeed, this analogy has been pursued quite fruitfully.
\cite{Albanese93}.
\item Even if the symplectic forms that appear in mechanical
systems 
admit a primitive (see later in Section \ref{Geometric_structures}),
 there are other symplectic forms 
without this feature. 
For such forms without a primitive,
one has the possibility of finding 
persistent tori of more dimension than the degrees
of freedom. This has important consequences and 
leads to very interesting examples in ergodic theory. 
See \cite{Yoccoz92b}, \cite{Herman91} 
(See also \cite{Parasyuk84}, \cite{Parasyuk89},
\cite{FuzhongY98}.)

\item KAM methods have been extended to 
elliptic PDE's -- they are not evolution equations. 
The role of time in KAM has been taken by 
spatial variables. (See \cite{Kozlov83}, \cite{Moser88}, \cite{Moser95}.)
This has also been related to a variational 
structure of the equations \cite{Moser86b}, \cite{Bangert89}.

\item There are some proofs of KAM type theorems based on 
different principles, notably renormalization group, 
\cite{BricmontGK99}, \cite{Koch}, \cite{Kosygin91}. 
This is perhaps related to some recent proofs that 
do not even use Fourier analysis
\cite{KhaninS86},
\cite{SinaiK87}, \cite{SinaiK89},
\cite{KatznelsonO93}, \cite{KatznelsonO89a}, \cite{KatznelsonO89b},
\cite{Stark88}, \cite{Haydn90}
\cite{Stirnemann94}.

\item More interestingly, renormalization group has 
been used to describe the breakdown of 
invariant circles, starting with \cite{McKay82} -- which 
includes a beautiful picture in terms of fixed points 
and manifolds of operators and makes very 
detailed predictions about scalings at breakdown -- or \cite{EscandeD81}, 
which contains a simpler approach that gives less
detailed predictions.
Much of what is known at this level remains 
at the level of numerical well founded conjectures. 
Indeed, there are still quite important issues that 
are not even known at this level. 
Among the rigorous work in this area, we 
mention \cite{Stirnemann93}, \cite{Stirnemann97}.


\item KAM theory has started to become a tool of applied 
mathematics with the advent of constructive methods 
to asses the reliability of numerical computations
\cite{CellettiC95}, \cite{LlaveR90}, \cite{Schmidt95}, \cite{Jorba99}. 

\item For some special cases of KAM theory, there has 
also  been very important progress examining the limits of validity; 
the role of the arithmetic 
conditions has been clarified for complex mappings -- specially 
quadratic -- \cite{Yoccoz95}. 
See also \cite{Perezmarco00}.
The study 
of the radius of convergence of the linearization 
in the same mappings
\cite{MarmiMY97} has also been quite well understood.

In some twist mappings, there has been a very significant advances in the 
study of non-existence of tori 
\cite{Mather88}, \cite{McKayP85}, 
\cite{Jungreis91}. The domains of convergence 
of the  perturbative expansions have been analyzed using 
tools similar to  those used for analytic  complex mappings starting 
in \cite{Davie94}  -- a map which has features between 
those  of a complex analytic map and those of 
a twist map -- and then in \cite{MarmiS92}, \cite{BerrettiM95}, 
\cite{BerrettiG98}.


\item  
Two different techniques to study 
quasiperiodic orbits on twist maps
are the variational 
methods of Mather \cite{MatherF91} and  the
renormalization group \cite{Koch}; 

In many cases, these theories have ranges of 
validity much greater than those covered by 
KAM theory and, therefore provide some glimpse into 
what happens at the breakdown of KAM theory.

\item There has been great progress 
in using ``direct methods'', which are based on 
writing a perturbative expansion and  showing it 
converges by studying more deeply the 
structure of small denominators.

In the study of iterations of 
analytic functions, these methods led
to the original proof of 
Siegel \cite{Siegel42}, which was the first problem in 
which small denominators were understood.
They were also used in the first proof
of the optimal arithmetic conditions
\cite{Brjuno71}. 

In the study of Lindstedt series (see Section \ref{linstedt}),
the proof of convergence by exhibiting 
explicitly cancellations of the series 
was accomplished in \cite{Eliasson96}
(the preprint circulated much earlier).
The proof of the convergence of the 
Lindstedt series  in \cite{Eliasson96} is much more subtle 
than that of \cite{Siegel42}.
Contrary to the terms in the expansions considered 
in \cite{Siegel42}, the terms in  the 
Lindstedt series do grow very fast and one 
cannot establish convergence by just bounding sizes 
but one needs to exhibit cancellations
in the terms.

Expositions  and simplifications of  this work
relating it also to  techniques  of 
perturbative Quantum Field Theory
can be found in \cite{Gallavotti94b},
\cite{GallavottiG95},
\cite{ChierchiaF94} and  extensions 
to some PDE's
in \cite{ChierchiaF96}.

Direct methods not only provide 
alternative proofs of known facts, 
but also  have been used to prove several
results, which at the moment do not seem
to have proofs using  rapidly convergent methods.
To my knowledge, the following results 
established using  direct methods do
not have rapidly convergent proofs:
The existence of 
some invariant manifolds contained in 
center manifolds in \cite{Poschel86} 
was proved using cancellations similar to those
of Siegel. 
It seems that there are no rapidly convergent 
proofs of these results
(however, see \cite{Stolovitch94a},
\cite{Stolovitch94b} which  solve a very related 
problem.)

The deeper cancellations of 
\cite{Eliasson96} have been used to
give a proof of the Gallavotti conjectures 
(which imply, among other consequences, the 
amusing result that an analytic Hamiltonian
near an elliptic fixed point is the sum of 
two integrable systems -- of course integrated  in 
different coordinates.)
\cite{Eliasson88}
and  to prove the 
existence of quasi-flat intersections in \cite{Gallavotti94}.
A problem that remains open is the fact that the Lindstedt
series for lower dimensional KAM tori involve less
small divisors conditions than the KAM proof. 
(See \cite{JorbaLZ00} for a discussion of this problem.)
\item Subjects closely related to KAM theory such 
as averaging and Nekhoroshev theory have also 
experienced a great deal of development. 


\item
Even if this is somewhat out of the line of 
topics to be discussed here, we note 
that related fields such as  averaging theory
and Nekhoroshev estimates has also experienced 
very important 
developments. Let us just mention very 
quickly:  An elegant proof of the 
theorem based on approximation by periodic orbits 
\cite{Lochak92}, the proof 
of what are conjectured to be the optimal exponents
\cite{LochakN92},  \cite{Poschel93}, 
\cite{DelshamsG96} -- the later paper contains a 
unified point of view for KAM and Nekhoroshev theorems --
and the proof of Nekhoroshev estimates in a
neighborhood of an elliptic fixed point 
\cite{GuzzoFB98b}, \cite{FassoGB98a},
\cite{Niederman98}, \cite{Poschel99}.
In a more innovative direction, Nekhoroshev 
type theorems for PDE's have been 
established \cite{BambusiN98}, \cite{Nekhoroshev99},
\cite{Bambusi99a}, \cite{Bambusi99b}.
\item The list could (perhaps should)  be continued, with other topics
that are related to KAM theory and connecting it to 
other theories of mechanics,
such as averaging theory, Aubry-Mather theory, quantum versions of 
KAM theory, rigidity theory, exponential asymptotics or 
Arnol'd diffusion and many others which are not even  mentioned 
mainly because of the ignorance of the lecturer, which he is the 
first to regret.  
\end{itemize}

Needless to say in this tutorial, we cannot hope to do 
justice to all the topics above.
(Indeed, I have little hope that the above 
list of topics and references is complete.) 
The only goal is to provide an entry point to the main 
ideas that will need to be read from the literature
and, possibly, to convey some of the excitement and the beauty
of this area of research.

Clearly, I  cannot (and I do not)  make any claim of 
originality or completeness.  This is not 
a systematic survey of 
topics of current research. The modest goal I set 
set for  these notes is to 
help some readers to get started in the
beautiful and active subject of KAM theory
by giving a crude road map. I just 
hope  that the many deficiencies of 
this tutorial will incense somebody into writing a proper review or 
a better tutorial. 
In the mean time, I will be happy to receive 
comments, corrections and suggestions for improvement
of this tutorial
which I will make available electronically.


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{Some motivating examples}

\mar{linstedt}
\subsection{Lindstedt series for twist maps} \label{linstedt}

One of the original motivations of KAM theory was the study of 
quasi-periodic solutions of Hamiltonian systems.
In this Section we will cover some elementary and well-known examples.

One particularly motivating example is the so-called {\em standard map}.
This is a map from $\real \times \torus$ to itself.
We denote the real coordinate by $p$ and the angle 
one by $q$. Denoting by $p_n$, $q_n$ the values of 
these coordinates at  the discrete time $n$, the 
map can be written as:
\mar{standardhamiltonian}
\begin{equation}\label{standardhamiltonian}
\begin{array}{rcl}
p_{n+1} & = & p_n - \ep V' (q_n)\\
q_{n+1} & = & (q_n + p_{n+1}) \mod 1 ,
\end{array}
\end{equation}
where $V(x) = V(x+1)$ is a smooth
(for our purposes in this Section,  analytic) function.
We will also use a more explicit 
expression for the map.
\mar{standardmap}
\begin{equation}\label{standardmap}
T_\ep(p,q) = \left( p - \ep V'(q), \, q + p - \ep V'(q) \right) \ .
\end{equation}
Substituting the expression for $p_{n+1}$ given in the second
equation of \eqref{standardhamiltonian} into the first,
we see that the system \eqref{standardhamiltonian} is 
equivalent to the second order equation.
\mar{standardlagrangian}
\begin{equation}
\label{standardlagrangian}
q_{n+1} + q_{n-1} - 2q_n = -\ep V' (q_n) \ ,
\end{equation}

The first, ``Hamiltonian'', formulation 
\eqref{standardhamiltonian} appears naturally in some mechanical 
systems (e.g., the kicked pendulum). 
The second, ``Lagrangian'', one \eqref{standardlagrangian} 
appears naturally from a variational principle, 
namely, it is equivalent to 
the equations 
\begin{equation}\label{EulerLagrange}
\partial\L/\partial q_n =0 
\end{equation}
with
\mar{Frenkel-Kontorova}
\begin{equation}\label{Frenkel-Kontorova}
\L(q) = - \sum_n \left[\frac12 (q_{n+1} - q_n -a)^2 + \ep V(q_n)\right] \ .
\end{equation}
The equations \eqref{EulerLagrange}  -- often called 
Euler-Lagrange equations --
express that $\{q_n\}$ is  a critical point for the action
\eqref{Frenkel-Kontorova}.

The model \eqref{Frenkel-Kontorova} has appeared in solid state physics 
under the name {\em Frenkel-Kontorova model}. (See e.g. \cite{AubryD83}.) 
One physical interpretation (not the only 
possible one) that has lead to 
many  heuristic insights
is that $q_n$ is the position of the 
$n^{\rm th}$ atom in a chain. 
These atoms interact with their nearest neighbors 
by the quadratic potential energy $\frac12 (q_{n+1} -q_n -a)^2$ 
(corresponding to springs connecting the nearest neighbors)
and with a substratum by the potential energy $\ep V(q_n)$. 
The parameter $a$ is the equilibrium length of each spring. 
Note that $a$ drops from the equilibrium equations 
\eqref{standardlagrangian} but affects which among all the equilibria
corresponds  to a minimum of the energy.

Another interpretation,  of more interest for the 
theme of these lectures,  is  
that $q_n$ are the positions at consecutive 
times of a one-degree of freedom twist  map. 
The general term in the sum 
$S(q_{n+1},q_n)$ 
are the 
generating  functions of the map. 
(See Section \ref{generating}.)
Then, the Euler-Lagrange equations for 
critical points of the functional are
equivalent to the sequence $\{q_n\}$ being the 
projection of an orbit.


The first formulation \eqref{standardhamiltonian} 
is area preserving  whenever $V'$ is a periodic 
function of the cylinder  -- not necessarily the derivative
of a periodic function
(i.e., the Jacobian of the transformation 
$(p_n, q_n) \mapsto (p_{n+1}, q_{n+1})$ is equal to~1). 
When, as we have indicated, $V'$ is indeed the derivative
of a periodic function, then the map 
is {\sl exact}, a concept that we will discuss in greater
detail in Section \ref{Geometric_structures} and that has 
great importance for KAM theory.

If we look at the map \eqref{standardhamiltonian} 
for $\ep=0$, we note that it becomes 
\mar{integrable}
\begin{equation}\label{integrable}
\begin{array}{rcl}
p_{n+1} & = & p_n\\
q_{n+1} & = & q_n + p_n \ ,
\end{array}
\end{equation}
so that the ``horizontal'' circles $\{p_n={\rm const},\,n\in\zed\}$ 
in the cylinder are preserved and the motion of each $q_n$ 
in each circle is a rigid rotation that is faster 
in the circles with larger $p_n$. 
Note that when $p_0$ is an irrational number, 
a classical elementary theorem in number theory shows 
that the  orbit  is dense on the circle. 
(A deeper theorem due to Weyl shows that it is actually 
equidistributed in the circle.)

We are interested in finding whether, when we turn on the perturbation 
$\ep$, some of this behavior persists. 
More concretely, we are interested in knowing whether there are 
quasi-periodic orbits that persist and that fill a circle densely. 

Problems that are qualitatively similar to \eqref{standardhamiltonian} 
appear in celestial mechanics \cite{SiegelM95} 
and the role of these quasi-periodic orbits 
have been appreciated for many years. 
One can already find a rather systematic study in \cite{Poincare} 
and the treatment there refers to many older works. 


We note that the existence of quasi-periodic orbits 
is hopeless if one allows general perturbations of 
\eqref{integrable}.
For example, if we take a map of the form 
\mar{example1}
\begin{equation}\label{example1}
\begin{array}{rcl}
p_{n+1} & = & p_n - \ep p_n\\
q_{n+1} & = & q_n + p_{n+1} \ ,
\end{array}
\end{equation}
we see that
applying repeatedly 
\eqref{example1},
we have 
$$
p_n = (1 -\ep)^n p_0
$$
so that, when $0 < \ep < 2$,
all orbits concentrate on the very small set $p=0$ and that 
we get at most only one frequency.  When $\ep < 0$ or $\ep > 2$, 
all the orbits except those in $p = 0$, blow up to infinity.
Hence, we can have maps with radically different dynamical 
behavior by making arbitrarily small perturbations.

More subtly, the orbits of 
\mar{example2}
\begin{equation}\label{example2}
\begin{array}{rcl}
p_{n+1} & = & p_n +\ep\\
q_{n+1} & = & q_n + p_{n+1} 
\end{array}
\end{equation}
escape towards infinity and never come  back to themselves 
(in particular, can never be quasi-periodic). 

The first example is not area preserving and the motion is concentrated 
in a smaller area (in particular, it does not come back to itself). 
The second example is area preserving but 
has non-zero ``flux''.
\mar{fig1}
\begin{figure}
\centerline{\epsfysize=1.5truein\epsfbox{llave-fig1.eps}}
\label{fig1}
\caption{The flux is the oriented area between a circle and its image.}
\end{figure}

\begin{definition}\label{flux}
The ``flux'' of an area preserving map $T$ of the cylinder 
is defined as follows: 
given a continuous circle $\gamma$ on the cylinder, 
the flux of $T$ is the oriented area between $T(\gamma)$, 
the image of the circle, and~$\gamma$ --- 
see Figure 1. 
\end{definition}

The  fact that the map is area preserving implies 
easily that this flux is 
independent of the circle (hence it is an invariant of the map). 
Clearly, if the map $T$ had a continuous invariant circle, 
the flux should be zero, 
so we cannot find an invariant circle in \eqref{example2} for 
$\ep\ne0$ since the flux is $\ep$.

\begin{remark}
Note that if a map has a homotopically nontrivial invariant curve, then 
the flux is zero (compute it for the curve). Conversely, if the flux is 
zero, any  homotopically non-trivial curve has
 to have an intersection with its image. 
(If it did not  have any intersection, by Rolle's theorem, then the 
image would always be in above or below the curve.) 
The property that every curve intersects its image plays an important 
role in KAM theory and is sometimes called {\sl intersection property}.
Besides area preserving and zero flux, there are other geometric assumptions 
that imply  the intersection property, notably, reversibility
of the map (see \cite{ArnoldS86})
\end{remark}


As a simple calculation shows, that perturbation in 
\eqref{standardhamiltonian} is of the form $V' (q_n)$, with $V$ 
1-periodic ---  therefore $\int_0^1 V' (q_n) \, dq_n = V(1) - V(0) = 0$ 
--- the flux  of \eqref{standardhamiltonian}
is zero.

We see that even the possibility that there exist these quasi-periodic 
orbits filling an invariant circle depends on geometric invariants. 

Indeed, when we consider higher dimensional mechanical systems, 
the analogue of area preservation is the preservation of a symplectic form, 
the analogue of the flux is the Calabi invariant \cite{Calabi} 
and the systems with zero Calabi invariant are called exact.

We point out, however, that the relation of the geometry to KAM theory 
is somewhat subtle. Even if the above considerations 
show that some amount of geometry is necessary, 
they by no means show what the geometric structure is, 
and much less hint on how it is  to be incorporated in the proof. 

The first widely used and generally 
applicable method to study numerically 
quasi-periodic orbits seems to have been the method of Lindstedt. 
(We follow in this exposition \cite{FalcoliniL92b}.)

The basic idea of Lindstedt's method is  to  consider a  family of
quasiperiodic functions depending on the parameter $\ep$ and to impose that 
it becomes a solution of our equations of motion.
The resulting equation is solved -- in the sense of 
power series in $\ep$ -- by equating terms with same powers
of $\ep$ on both sides of the equation.  We will see how to 
apply this  procedure to
\eqref{standardhamiltonian} or \eqref{standardlagrangian}. 

In the Hamiltonian formulation \eqref{standardhamiltonian},
\eqref{standardmap}
we  seek $K_\ep :\TT^1 \to \RR \times \TT^1$ 
in such a way that 
\mar{linstedthamilton}
\begin{equation}\label{linstedthamilton}
T_\ep \circ K_\ep (\theta) = K_\ep (\theta +\omega) \ .
\end{equation}
We set:
\begin{equation}\label{TaylorK}
K_\ep (\theta) = \sum_{n=0}^\infty \ep^n K_n (\theta)
\end{equation}
and try to solve by matching powers  of $\ep$
on both sides of \eqref{linstedthamilton},
(after expanding $T_\ep\circ K_\ep(\theta)$
as much as possible in $\ep$ using the Taylor's theorem). 
\footnote{The notation is somewhat unfortunate since 
$K_n$ could  mean both the $n$ term in the Taylor expansion 
and $K_\ep$ evaluated for $\ep = n$. 
In the discussion that follows, $K_1$,$K_2$, etc. will always refer to 
the Taylor expansion. Note that $K_0$ is the same in both meanings. }
That is,
\begin{eqnarray*}
T_\ep \circ K_\ep (\theta) 
& = & T_0\circ K_0 + \ep [T_1 \circ K_0 + (DT_0 \circ K_0) K_1]\\
&&\quad + \ep^2 [T_2 \circ K_0 + (DT_0\circ K_0) K_2 \\
&&\qquad\qquad + (DT_1\circ K_0) K_1 
+ \frac{1}{2}(D^2 T_0\circ K_0) K_1^{\otimes2}] + \ldots \ .
\end{eqnarray*}

In the Lagrangian formulation \eqref{standardlagrangian} 
we seek $g_\ep:\RR \to \RR$ satisfying 
$g_\ep(\theta+1) = g_\ep(\theta)+1$ 
--- or, equivalently, $g_\ep(\theta) = \theta+\ell_\ep (\theta)$ 
with $\ell_\ep (\theta+1)=\ell_\ep(\theta)$, 
i.e., $\ell_\ep:\TT^1 \to \TT^1$ --- 
in such a way that 
\mar{linstedtlagrange}
\begin{equation}
\label{linstedtlagrange}
\ell_\ep (\theta+\omega) + \ell_\ep(\theta-\omega) - 2\ell_\ep (\theta) 
       = - \ep V' (\theta+\ell_\ep(\theta))\ .
\end{equation}

If we find solutions of  \eqref{linstedtlagrange},  we can
ensure that some orbits $q_n$ solving \eqref{standardlagrangian} 
can be written as 
$$q_n = n\omega + \ell_\ep (n\omega)\ .$$

Note that the fact that, when we choose coordinates on the circle, 
we can put the origin at any place, implies that 
$K_\ep(\cdot+\sigma)$ is a solution of \eqref{linstedthamilton} 
if $K_\ep$ is, 
and that $\ell_\ep (\cdot +\sigma)+\sigma$ is a solution of 
\eqref{linstedtlagrange} if $\ell_\ep$ is. 
Hence, we can -- and will -- always  assume that 
\begin{equation}
\label{normalization}
\int_0^1 \ell_\ep (\theta) \,d\theta=0 \ . 
\end{equation}
This assumption, will not interfere with existence questions, since it 
can always be adjusted, but will ensure uniqueness.


If we now write 
\footnote{The same remark about the unfortunate notation we made in 
\eqref{TaylorK} also applies here.}
$$\ell_\ep(\theta) = \sum_{n=0}^\infty \ell_n (\theta) \ep^n$$
and start matching powers, we see that matching the zero order terms yields
\mar{zeroorder}
\begin{equation} \label{zeroorder}
\begin{array}{c}
L_\omega \ell_0 (\theta) 
\equiv \ell_0 (\theta+\omega) + \ell_0 (\theta-\omega) 
- 2\ell_0 (\theta) = 0 \ , \\
\noalign{\vskip6pt}
\ds \int_0^1 \ell_0 (\theta) \,d\theta =0 \ .
\end{array}
\end{equation}


The operator $L_\omega$ n 
\eqref{zeroorder}, which will appear repeatedly 
in KAM theory, can be conveniently analyzed by using Fourier 
coefficients.  Note that
$$
L_\omega e^{2\pi ik\theta} = 2(\cos 2\pi k\omega-1) \, 
e^{2\pi ik\theta} \ .
$$ 
Hence, if $\eta (\theta) = \sum_k \hat\eta_k e^{2\pi ik\theta}$,
then the equation 
$$
L_\omega \varphi (\theta) = \eta (\theta)
$$
reduces formally  to
$$
2 (\cos 2\pi k\omega-1) \, \hat \varphi_k = \hat\eta_k \ .
$$
We see that if $\omega \notin \que$, the equation \eqref{zeroorder} can be 
solved formally in Fourier coefficients and $\ell_0 =0$. 
(Later we will develop an analytic theory and describe precisely 
conditions under which these solutions can indeed be 
interpreted as functions.)

When $\omega \notin\que$, we see that $\cos 2\pi k\omega \ne1$ 
except when $k=0$. 
Hence, even to write a solution we need $\hat\eta_0 = 0$,  
and then we can write the formal solutions as 
\mar{solution}
\begin{equation}\label{solution}
\hat\varphi_k = \frac{\hat \eta_k}{2(\cos 2\pi k\omega -1)} \ , 
\qquad k\ne0
\end{equation}
Note, however, that the status of the solution
\eqref{solution} is somewhat complicated 
since $2\pi k\omega$ is dense on the circle and, hence, the denominator 
in \eqref{solution} becomes arbitrarily small. 
Nevertheless, provided that  $\eta$ is a 
trigonometric polynomial, 
(See  Exercise~\ref{trigpol} , where this is established under certain 
circumstances)
and  $\omega$ is irrational, we can solve 
the equation \eqref{zeroorder}. In case that the \RHS is analytic 
and that the number $\omega$ satisfies certain number
theoretic properties, 
in Exercise~\ref{secondorder}, we can show that the solution is 
analytic.


The equation obtained by matching $\ep^1$  is:
\mar{firstorder}
\begin{equation}\label{firstorder}
L_\omega\ell_1 (\theta) = - V' (\theta) \ ; \qquad 
\int_0^1 \ell_1 (\theta)\,d\theta=0 \ .
\end{equation}
Since $\int_0^1 V' (\theta)\,d\theta =0$, we see that \eqref{firstorder} 
admits a formal solution. 
(Again, we note that the fact that $\int_0^1 V' (\theta)\,d\theta=0$ 
has a geometric interpretation as zero flux.) 

Matching the $\ep^2$ terms, we obtain 
\mar{secondorder}
\begin{equation}\label{secondorder}
L_\omega \ell_2 (\theta) = -V''(\theta) \ell_1 (\theta) \ ; \qquad 
\int_0^1 \ell_2 (\theta) \,d\theta=0 \ , 
\end{equation}
and, more generally, 
\mar{norder}
\begin{equation}\label{norder}
L_\omega \ell_n (\theta) = S_n (\theta) \ ; \qquad
\int_0^1 \ell_n (\theta) \, d\theta=0 \ , 
\end{equation}
where $S_n$ is an expression  which involves derivatives of $V$ and terms 
previously computed. 
It is true (but by no means obvious) that 
\begin{equation} \label{cancellation}
\int_0^1 S_n (\theta)\,d\theta=0,
\end{equation}
so that we can solve \eqref{norder} and proceed to 
compute the series to all orders (when $\omega$ is 
irrational and $S$ is a trigonometric polynomial or 
when  $\omega$ is Diophantine (see later) 
and $S$ is analytic).
The fact that \eqref{cancellation} holds was already pointed out in 
Vol II of \cite{Poincare}.

We will establish (\ref{cancellation}) 
directly  by a seemingly miraculous 
calculation, whose meaning will become clear 
when we study the geometry of the problem. 
(We hope that going through the messy calculation 
first will give an appreciation for the geometric methods.
Similar calculations will appear in Section~\ref{Lagrangianmethod}.)

The desired result \eqref{cancellation} follows 
if we realize that denoting 
$\ell_\ep^{[\le n]}(\theta) = \sum_{i\le n} \ep^i \ell_i(\theta)$, 
we have: 
\begin{equation}\label{nordersum}
L_\omega \ell_\ep^{[\le n]} = \ep^n S_n
\end{equation}


Hence, multiplying \eqref{nordersum} by 
$\left[1+\ell_\ep^{[\le n]}{}'(\theta)\right]$ 
and integrating, we obtain 
\begin{equation}\label{totalintegral}
\begin{split}
 0 &= \int_0^1 L_\omega \ell_\ep^{[\le n]}(\theta) \,d\theta
+ \int_0^1 L_\omega \ell_\ep^{[\le n]}(\theta) \, 
  \ell_\ep^{[\le n]}{}'(\theta)\,d\theta \\
&+ \int_0^1 V' (\theta + \ell_\ep^{[\le n]}(\theta)) 
  \left[1 + \ell_\ep^{[\le n]}{}' (\theta) \right] \,d\theta \\
&-\ep^n \int_0^1 S_n (\theta) \, 
  \ell_\ep^{[\le n]}{}' (\theta) \,d\theta \\
&- \ep^n \int_0^1 S_n (\theta) \,d\theta  \\
&+ O(\ep^{n+1}) \ .
\end{split}
\end{equation}

Now, we are going to use different arguments to show 
that all the terms in \eqref{totalintegral}
except $\int S_n(\theta) \, d\theta$ vanish. 
This will establish the desired result.

By changing variables in the integral we have:
\begin{equation}\label{integralV}
\int_0^1 V'(\theta + \ell_\ep^{[\le n]}(\theta))
\left[1+\ell^{[\le n]}_\ep{}'(\theta)\right] \,d\theta = 0.
\end{equation}
Furthermore, it is clear that 
$\int_0^1 L_\omega \ell_\ep^{[\le n]}(\theta)\,d\theta=0$
because for any periodic function  $f$
$\int_0^1 f(\theta)\,d\theta = 
\int_0^1 f(\theta +\omega)\,d\theta =
\int_0^1 f(\theta -\omega)\,d\theta$

Noting that 
$$
\int_0^1 \ell_\ep^{[\le n]}(\theta) \, \ell_\ep^{[\le n]}{}'(\theta) 
= \int_0^1 \frac12 
\biggl(\left[\ell_\ep^{[\le n]}(\theta)\right]^2\biggr)^{'}\,d\theta = 0 
$$
and that 
\begin{eqnarray*}
\int_0^1 \ell_\ep^{[\le n]} (\theta+\omega) \, 
\ell_\ep^{[\le n]}{}'(\theta)\,d\theta 
&=& -\int_0^1 \ell_\ep^{[\le n]}{}'(\theta+\omega) \, 
\ell_\ep^{[\le n]}(\theta)\,d\theta\\
&=& - \int_0^1 \ell_\ep^{[\le n]}{}'(\theta) \, 
\ell_\ep^{[\le n]}(\theta-\omega)\,d\theta \ ,
\end{eqnarray*}
we obtain that
$$
\int_0^1 L_\omega \ell_\ep^{[\le n]}(\theta) \, 
  \ell_\ep^{[\le n]}{}'(\theta)\,d\theta  = 0.
$$

It is also clear  that, because $\ell_0$ is a constant,
\begin{equation}\label{ishighorder}
\ep^n S_n(\theta) \ell_\ep^{[\le n]}{}' (\theta)  = O(\ep^{n+1})
\end{equation}

Hence,
putting together \eqref{totalintegral}
and the subsequent identities, we obtain the desired conclusion that 
$\int_0^1 S_n(\theta)\,d\theta$ vanishes. 

\qed

\begin{remark}
There is a geometric interpretation for the vanishing of this integral. 
One can compute the flux over the curve in the Hamiltonian formalism 
predicted by $\ell_\ep^{[\le n]}(\theta)$. The fact that the flux vanishes is 
equivalent to the  fact that the integral vanishes.
\end{remark} 


\begin{remark}
Note that it is rather remarkable that for every irrational frequency 
we can find formal solutions (when the perturbation is a polynomial), or 
for Diophantine frequencies for analytic perturbations.
Heuristically, this can be explained by the fact that, in area preserving 
systems, we do not have small parts of the system controlling the long 
term behavior (as it is the case in dissipative systems) and, hence, 
perturbations still have to leave open many 
possibilities for motion of the system.

When one applies the Lindstedt method to dissipative systems, 
\cite{RandA87}, typically  one sees that, except for a few 
frequencies, the perturbation equations do not have a solution.
\end{remark}

\begin{remark}
The Lindstedt method can be used for dissipative systems 
\cite{RandA87}. (Code for easy to use, 
general purpose implementations is available from 
\cite{RandAProg}.)
Then, one considers 
$$
T_\ep \circ K_\ep (\theta) = K_\ep (\theta + \omega_\ep)\ .
$$
with $\omega_\ep = \sum \ep^n \omega_n$.
One has to choose the terms $\omega_0,\ldots,\omega_n$, so that 
the equations \eqref{norder} have solutions. 
It is a practical and easily implementable 
method to compute limit cycles.
\end{remark}

\begin{exercise}\label{trigpol}
Show that if $V$ is a trigonometric polynomial, 
then $l_n$ is also a trigonometric polynomial.
Moreover, $\deg(l_n) \le A n + B$ where $A$ and 
$B$ are constants that depend only on  the 
degree of $V$. (For a trigonometric polynomial,
$V(\theta) = \sum_{|k| \le M} \hat V_k \exp(2\pi i k \theta)$,
the degree is $M$ when $\hat V_M \ne 0$ or 
$\hat V_{-M} \ne 0.$) 

As a consequence, if $V$ is a trigonometric polynomial 
and $\omega$ is irrational, then the  Lindstedt procedure 
can be carried out to all orders.
\end{exercise}

\begin{remark}
The above procedure can be
carried out  even in the case that
the function $V(x)$ is $e^{2\pi i x}$. 

In this case, we obtain the so-called semi-standard map. 
It can be easily shown that the  trigonometric polynomials 
that appear in the series  only contain terms with
positive frequencies. 
This makes the terms in the Lindstedt series
easier to analyze than those of the 
case $V(x) = e^{2\pi i x} + e^{ - 2\pi i x}$. 
Indeed,  the analytical properties 
of  the term of the series 
for $V(x) = e^{2\pi i x}$
very  similar to those of
the normalization problem  for a 
polynomial. 

We refer to 
\cite{GreeneP81} for numerical 
explorations,  to \cite{Davie94} for rigorous upper 
bounds of the radius of convergence and to
\cite{BerrettiM95}, 
\cite{BerrettiG98} for  a method to transfer results from this 
complex case to the real one.
\end{remark}


The convergence of the expansions obtained remains 
at this stage of the argument we
have presented  highly problematic.
Note that, at every stage, \eqref{norder} involves small divisors. 
Worse still, the $S_n$'s are formed by multiplying terms obtained 
through solving small divisor equations.  Hence, the $S_n$
could be much bigger than the individual terms.

Poincar\'e undertook in 
\cite{Poincare},  Paragraph  148
a study of the convergence of these series. 
He obtained negative results for uniform convergence in a parameter 
that also forced the frequency to change. 
His conclusions read 
(I transcribe the French as an example of the extremely nuanced
way in which Poincar\'e formulated the result.)
Roughly, he says that one can conclude that the series 
does not converge, then points out that this has not been 
proved rigorously and that there are cases that could be 
left open, including quadratic irrationals. 
The conclusion is that, even if the divergence has not 
been proved, it is quite improbable.

\begin{quote}

Il semble donc permis de conclure que les series (2)
ne convergent pas. 

Toutefois le raisonement qui pr\'ec\`ede ne suffit pas 
pour \'etablir ce point avec une rigueur compl\'ete.

En effect, ce que nous avons d\'emontr\'e au ${\rm n}^{ {}_o}$  42
c'est  qu'il ne peut  pas arriver que, pour toutes les 
valeurs de $\mu$ inferieurs a une certaine limite, il y ait une
double infinit\'e de solutions p\'eriodiques, et il nous suffirait 
ici que cette double infinit\'e exist\^ait pour une valeur de 
$\hat \mu$ determin\'ee, different de $0$ et 
g\'en\'eralment tr\'es petite.

[....]

Ne peut-il pas arriver que les series (2) converg\'ent 
quand on donne aux $x^0_i$ certaines valeurs convenablement choisies?

Supposons, pour simplifier, qu'il y ait deux degrees de libert\'e;
les series ne pourraient-elles pas, par example, converger
quand $x^0_1$ et  $x^0_2$  ont \'et\'e  choisis  de telle sorte que 
le rapport $\frac{n_1}{n_2}$ soit incommensurable, et que  son carr\'e 
soit au contraire commensurable.
(ou quand le rapport  $\frac{n_1}{n_2}$
est assujetti \'a une autre condition analogue \`a celle 
que je viens d'ennoncer un peu au hassard)?

Les raisonnements de ce Chapitre ne me permettent pas
d'affirmer que ce fait ne se pr\'esentera pas. 
Tout ce qu'il m'est permis de dire, cest qu'il
es fort inversemblable.
\end{quote}


This was remarkably prescient since indeed the series do converge 
for Diophantine numbers. In particular, 
for algebraic irrationals 
(see Section~\ref{Diophantine_properties}, Theorem~\ref{Liouville}). 

It is not difficult to show that, 
for Diophantine frequencies,  these series satisfy 
estimates that fall short of  
showing analyticity
\begin{equation}\label{Gevrey}
\|\ell_n\|_\sigma \le (n!)^{\nu}
\end{equation}
where $\nu$ is a  positive number. 
These estimates are sometimes called Gevrey estimates 
and they appear very frequently in asymptotic analysis. 

It is not difficult to construct examples 
(indeed we present one in Exercise \ref{withoutgroup})
which have a similar structure and that the linearized
equation that we have to solve at each step satisfy 
similar estimates. Nevertheless 
they saturate  \eqref{Gevrey}. Indeed, in many
apparently similar problems with a very similar 
structure
(e.g. Birkhoff normal forms near a 
fixed point, normal 
forms near a torus, jets of center manifolds)
the bounds  
\eqref{Gevrey} are saturated. We will not have 
time to discuss these problems in these notes.


The proof of convergence of Lindstedt series was obtained in 
\cite{Moser67} in a somewhat indirect way. Using the KAM 
theory, it is shown that the solutions produced 
by the KAM theory are analytic on  the perturbation parameter.
It follows that the coefficients of the expansion have
to be the terms of the Lindstedt series and, therefore, 
that the Lindstedt series are convergent. 

The example in Exercise \ref{withoutgroup} shows that the 
convergence that one finds in 
KAM theory  has to depend on the existence  of
massive cancellations.


The direct study of the Lindstedt series was tackled successfully 
in \cite{Eliasson96}. One needs to exhibit remarkable cancellations. 
The papers \cite{Gallavotti94b} and \cite{ChierchiaF94} 
contain another version of the cancellations above relating it 
to methods of quantum field theory.

We note that the  transformations that 
reduce a map to its normal
Birkhoff normal form either near 
a fixed point or near a torus were known 
to diverge for a long time.
(See \cite{Siegel54}, \cite{Moser60}.)

Examples of divergence of asymptotic series 
were constructed in \cite{Poincare}. To justify their
empirically observed usefulness, the same reference 
developed a theory of asymptotic series, which
has a great importance even today.


It should be remarked that, at the moment of this
writing,
the convergence of Lindstedt series in 
slightly different situations (lower dimensional tori 
\cite{JorbaLZ00} or 
the jets for center manifolds of positive definite systems
\cite{Mielke91}, p. 39)
are still open problems.


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\subsection{Siegel disks} \label{Siegel}

The following example is interesting because the geometry is reduced to a 
minimum and only the analytical difficulties remain. 
Not surprisingly, it was the first small divisors 
problem to be solved \cite{Siegel42}, albeit with a 
technique very different from KAM. 
(Even if we will not discuss the original Siegel 
technique in these notes, we point out that, 
besides the original paper, there are 
more modern expositions and extensions. 
\cite{Brjuno71},\cite{Poschel86}.)


This problem is quite paradigmatic both for 
KAM theory and for the theory of holomorphic dynamics. 
In these lectures, we will discuss only the KAM 
aspects and not the  holomorphic dynamics. 
A very good introduction to the problems 
connected with Siegel theorem is 
including both the KAM 
aspects and the holomorphic dynamics
aspect is \cite{Herman87}. More up to date references are
\cite{Perezmarco92},
\cite{Yoccoz95}. The lectures \cite{Marmi99} 
contain a great deal of material on the Siegel problem.


We consider analytic maps $f:\cee \to \cee$, $f(z) = az+N(z)$ with 
$N(0) = 0$, $N'(0) = 0$, 
and we are interested in studying their dynamics near the origin. 
\medskip

When $|a|\ne1$, it is easy to show that the dynamics, up to an 
analytic change of variables is that of $az$. 
More precisely, there exists an $h: U \subset \cee\to\cee$, $h(0)=0$, $h'(0)=1$ and 
\mar{semiconjugacy}
\begin{equation}\label{semiconjugacy}
f\circ h=h(az)
\end{equation} in a neighborhood of the origin.

The proof for $|a|>1$ can be easily obtained as follows (the case 
$ 0 < |a|<1$ case follows by considering $f^{-1}$ in place of $f$). 

We seek a fixed point of $h \mapsto f\circ h\circ a^{-1}$ 
on a space of functions 
$h(z) = z+\Delta (z)$ with $\Delta (z)=O(z^2)$. 

That is, we seek fixed points of the operator 
$$
\Tau (\Delta) = a\, \Delta \circ a^{-1} 
+ N\circ (\Id +\Delta)\circ a^{-1}\ .
$$
We note that, on a space of functions with 
$\|\Delta\|_r = \sup_{|z|\le r} |\Delta (z)/z^2|$, 
the operator $\Tau$ is a contraction if $r$ is sufficiently small. 
Note that then $\Tau^r(0)$ converges uniformly on a ball 
and the limit is analytic. 

\begin{remark}
Note that the previous argument works without any significant change when 
$f:\cee^d \to \cee^d$ and $a$ is a matrix all eigenvalues of which 
have modulus less than 1. Indeed,  a very 
similar result for flows  already appears
in Poincar\'e's thesis \cite{Poincare78}, where it was 
established using the majorant method. (Remember that the concept of Banach
spaces had not been yet formalized, so that 
fixed point proofs were unthinkable).
The method in \cite{Poincare78} can be adapted 
without too much difficulty to cover the theorem 
started above.  Hence, the situation 
when all the eigenvalues are smaller than one is sometimes 
called  {\em  the Poincar\'e domain}. 
\end{remark}

The situation that remains to be settled is that when $|a|=1$. 

\begin{remark}
Building up on  case  for $|a| < 1 $, there is 
a lovely proof by Yoccoz \cite{Herman87}  using complex function 
theory that one can extend the conjugacies for $|a| < 1$  to 
a positive measure set 
with $|a| = 1$.  Several elements of this proof 
can be used to obtain a very fast algorithm to 
compute  the so called {\sl Siegel radius}.
(See the definition in Proposition~\ref{univalent}.)

Another cute proof of  a  particular case of Siegel's theorem is in 
\cite{Llave83} adapting a method of \cite{Herman86}. This method
can be applied to a variety of one-dimensional problems.

The method of \cite{Siegel42} has been quite refined 
and extended in \cite{Brjuno71}, \cite{Brjuno72}. 

We will not discuss the above proofs here,
because, in contrast with KAM ideas that have a
wide range of applications, they  seem to be 
rather restricted.


\end{remark}

It is typical of complex dynamics that there are very few possibilities 
for the dynamics. 
Either it is very unstable or it is a rigid rotation (up to a change of 
variables).

We will prove something more general.
\mar{rigid}

\begin{lemma}\label{rigid}
Let $f:\cee^d\to\cee^d$ be analytic in a neighborhood of the origin and 
$$
f(0)=0 \ , \qquad Df(0)=A \ ,
$$ 
where $A$ is a diagonal matrix with all the diagonal elements 
of unit modulus (hence $\|A^{-n}\| = 1 \ \forall n \in \integer$).

Assume that there is a domain $U$, $0\in U$ and 
a constant $K > 0$  such that 
for all $n \in \nat$
\mar{equibounded1}
\begin{equation}\label{equibounded1}
\sup_{z\in U} |f^n(z)| \le K\ .
\end{equation}
Then there exists an analytic function $h:U\to \cee^d$ such that $h(0)=0$, 
\begin{equation}\label{conclusion}
h'(0)=\Id\quad\quad, h\circ f=A\circ h.
\end{equation}
\end{lemma}

Of course, by the implicit function theorem 
a solution of \eqref{conclusion} 
implies that there is a solution of 
\eqref{conjugacys} ($h$ in \eqref{conjugacys}
is the inverse of $h$  in \eqref{conclusion}).

Note also that the assumption \eqref{equibounded1} implies, 
by Cauchy estimates that $|Df^n(0)| \le K'$, hence, 
that all the eigenvalues are inside the closed unit 
circle and that the eigenvalues on the unit circle 
have trivial Jordan blocks.  If rather than assuming 
\eqref{equibounded1} for $n \in \nat$, we assumed 
it for $n \in \integer$, this would imply the assumption 
that $A$ is diagonal  and has the eigenvalues on 
the unit circle.

\begin{proof}
Consider 
$$
h^{(n)} (z) = \frac{1}{n} \sum_{i=0}^n A^{-n} f^n(z)\ .
$$
Note that, using the definition of $A$ and 
\eqref{equibounded1} and
\mar{normalization2} we have:
\mar{equibounded}
\mar{intertwine}
\begin{eqnarray}
&&h^{(n)} (0) =0\ ,\quad h^{(n)}{}' (0) =1\ , \label{normalization2}\\
&&\sup_{z\in U} |h^{(n)} (z) |\le K\ , \label{equibounded} \\ 
&&h^{(n)}\circ f(z) = Ah^{(n)} (z) + (1/n) [A^{-(n+1)} f^{n+1} (z)-z]\ . 
\label{intertwine}
\end{eqnarray}
By \eqref{equibounded}, $h^{(n)}$ restricted to $U$ is a normal family and we 
can find a subsequence converging uniformly on compact sets to a function 
$h$. 
Using \eqref{normalization2}, we obtain that $\tilde h(0)=0$, $\tilde h' (0)=1$.

Note also that, 
since $\left| f^n (z)\right|$ is bounded independently of $n$, 
by \eqref{equibounded1}
and so is $z$ for  $z \in U$, we have that 
$$
\frac{1}{n} [A^{-(n+1)} f^{n+1} (z)-z]
$$ 
converges  to zero uniformly on any compact set
contained in $U$ as $n\to\infty$.  
Therefore,
taking the limit $n\to\infty$ of \eqref{intertwine}, 
we obtain $h\circ f=A\circ h$.
\end{proof}

\begin{exercise} 
Show that one can always assume that  $U$  is
to be simply connected.
(Somewhat imprecisely, but 
pictorially, if we are given are given a $U$ with holes, 
we can always consider $\tilde U$ obtained by filling the holes 
of $U$.  The maximum modulus principle shows that $f^n$ is 
uniformly bounded in $\tilde U$.)

In one dimension, show that the Riemann mapping that 
sends $U$ into the unit disk and $0$ to itself 
should satisfy \eqref{conclusion} except the normalization of 
the derivative.
\end{exercise}

\begin{proposition}\label{uniqueness}
If  the product of eigenvalues of $A$ is not another 
eigenvalue, then 
the function $\tilde h$ satisfying 
\eqref{conclusion} is unique even in the sense of formal power series.
\end{proposition}

Note that, when $d = 1$ the condition of 
Proposition \ref{uniqueness} reduces to the fact that
$A$ is not a root of unity. In particular, it is satisfied 
when the modulus of $A$ is not equal to one.
When the modulus  equals to $1$, the hypothesis of 
Proposition \ref{uniqueness} reduces to $a$ not being 
a root of unity, which is the same as 
$a = \exp( 2 \pi i \theta) $ with $\theta \in \real -\rational$.


\begin{proof}
If we expand using the standard Taylor formula 
for multi-variable functions,
$$
f(z) = \sum_{n=0}^\infty f_nz^{\otimes n}
$$
(where $f_n$ is a symmetric $n$-linear form taking values in $\cee^d$) 
and seek a similar expansion for $\tilde h$, we notice that 
$$
A\tilde h_n - \tilde h_n A^{\otimes n} = S_n \ ,
$$ 
where $S_n$ is a polynomial expression involving only the coefficients 
of $f$ and $\tilde h_1 = \Id,\ldots,\tilde h_{n-1}$.

As it turns out, the spectrum of the operator $\L_A$
acting on $n$-multilinear forms  by
\begin{equation} \label{action}
\tilde h_n \mapsto A \tilde h_n - \tilde h_n A^{\otimes n}
\end{equation}
is:
 
\begin{equation}\label{spectrumis}
\Spec(\L_A) = 
a_i - a_{\sigma_1} \ldots a_{\sigma_n}, \quad
i\in \{ 1,\ldots,d \}, \quad
\sigma_1,\ldots,\sigma_n\in \{1,\ldots,d\},
\end{equation}
where $a_i$ denotes the eigenvalues of $A$.


See, e.g.,~\cite{Nelson69} for a detailed computation 
which also leads to interesting algorithms.
We just indicate that the result can be obtained very
easily when the matrix is diagonalizable since one 
can construct a complete set of eigenvalues of 
\eqref{action} by taking products of eigenvalues of 
$A$. The set  of diagonalizable 
matrices is dense on the space of 
matrices. Hence the desired 
identity between the  spectrum of 
\eqref{action} and the set described in 
\eqref{spectrumis} holds in a dense set of matrices.
We also note that the spectrum is continuous
with respect to the linear operator.

\end{proof}

When $d=1$ and $|a|=1$,  as 
we mentioned before, the condition  
for Proposition \ref{uniqueness}
(usually referred to as {\sl  non-resonance condition})
reduces to:
$$
a= e^{2\pi i\omega}\ ,\qquad \omega\in \RR -\que\ .
$$
We note that, even if $a(a^{n-1}-1)\ne0$, it can be arbitrarily close 
to zero, because $e^{2\pi i\omega (n-1)}$ is dense in the unit circle. 
Hence, we also have small divisors in the computation of the $\tilde h_n$'s. 

We note that when $d>1$, we can have small divisors if there is 
some $|a_i|>1$,  $|a_j|<1$ even if they are real. 
When all $|a_j| =1$, $a_j = e^{2\pi i\omega_j}$, the non-resonance 
condition amounts to 
\mar{nonresonanceanalytic}
\begin{equation}\label{nonresonanceanalytic}
\sum_j k_j \omega_j\ne\omega_i \ , \ 
\forall k_j \in \nat, \sum_j k_j \ge 2
\end{equation}

We now investigate a few of the analyticity properties of $h$. 
Of course, the power series expansion converges in a disk (perhaps
of zero radius) but we could worry about whether it is possible 
to perform analytic continuation and obtain $h$ defined on 
a larger domain.

\mar{univalent}
\begin{proposition} \label{univalent}
If $f$ is entire, the maximal domain of definition 
of $h$ is  invariant under
$A$. 

In particular, when $d = 1$, $|a| = 1$, $a^n \ne 1$, the domain of 
convergence is a disk. (The radius of the disk of convergence 
of the  function $h$ such that $h'(0) = 1$ is called the 
Siegel radius.)

Moreover,  when $d = 1$, $|a| \le 1$, $a^n \ne 1$, 
the function $h$ is univalent in the domain of convergence.
\end{proposition}

\begin{proof}
To prove the first point, we just observe that if $f$ is entire and $h$
is analytic in the neighborhood of a point $z_0$, we can use the
functional equation \eqref{semiconjugacy} to define  the function $h$ in
a neighborhood of $A\, z_0$. 

Hence, if $h$ was defined in domain $D$
and $z_0 \in D$ was connected to the origin by a path $\gamma \subset
D$, we see that $A z_0$ is connected to the origin by $a \gamma
\subset a D$. We conclude that it is defined in $A D \cup D$ and that
the analytical continuation is unique. If we consider the maximal
domain of definition $A D \cup D \subset D$. Hence $A D = D$.

The second statement follows by observing that the only domains invariant
under an irrational rotation are disks.

To prove univalence, we assume  that 
if $h(z_1) = h(z_2)$ 
and one of them -- say $z_2$ -- different from $0$.
We want to conclude that $z_1 = z_2$.

Using \eqref{semiconjugacy}, we obtain 
$h( a z_1) = h(a z_2)$. Repeating the process, 
$h( a^n z_1 ) = h(a^n z_2) $. 

Hence, when 
$z \in \{ a^n z_2\}$, we have 
\mar{contradiction}
\begin{equation} \label{contradiction}
 h(z) = h( z \alpha)
\end{equation} 
with $\alpha =  z_1/z_2$. Since the  set 
where \eqref{contradiction} holds has an accumulation point:
when $|a| < 1 $, it accumulates to $0$, when $|a| = 1$ since 
it is an irrational rotation, the orbits are dense on circles), we 
conclude that it holds all over the unit disk. 
Taking derivatives at $z = 0$, using $h'(0) = 1$, we obtain 
$ \alpha = 1$.
\end{proof}

\mar{highd}
\begin{exercise}\label{highd}
Show that the conclusions of 
Proposition \ref{univalent}
remain true if we consider 
$d > 1$ and $A$ 
a diagonalizable matrix
with all eigenvalues in the unit disc 
and satisfying \eqref{nonresonanceanalytic}.
Namely
\item[i')] The domain of definition is a polydisk.
\item[ii')] The function is univalent in its domain of 
definition.
\end{exercise}

\begin{exercise}
Once we know that the domain of the function $h$ in 
\eqref{semiconjugacy} is
a disk, the question is to obtain estimates of the radius. 

Lower bounds are obtained from KAM theory. 

Obtain upper bounds also using the fact that
by the Bieberbach-De Branges theorem, 
the Taylor coefficients of a univalent function
satisfy upper bounds that depend on the radius of the disk. 
On the other hand, we know the coefficients explicitly. 

Also obtain upper bounds when $f(z) = a z + z^2 $ using the area formula
for univalent functions 
$\Area h( B_r(0) ) = \pi \sum_{i=1}^\infty |h_i|^2 r^{2 i -2}$ 
knowing that the  
range of $h$ -- orbits that are bounded -- cannot include any 
point outside of the disk of radius $2$ and that we 
know the coefficients $h_k$. 

This exercise is carried out in great detail in 
\cite{Rana87}, which established upper and lower bounds of 
the radius for rotation  by the golden mean.
\end{exercise}


It turns out to be very easy to produce examples where the series diverges. 
We will discuss what we think is
oldest one \cite{Cremer28} (reproduced  in 
\cite{Blanchard84}). Other examples  of \cite{Cremer38} can be found in 
\cite{SiegelM95}  Chapter 25  in a more modern form. 
A different line of argument appears in 
\cite{Ilyashenko79}, using more 
complex analysis. This argument has been recently extended considerably
\cite{Perezmarco00}.

Consider $f(z) = az+z^2$ with $a=e^{2\pi i\omega}$, 
then its $n^{\rm th}$ iteration is 
$$
f^n (z) = a^n z+ \cdots + z^{2^n}\ .
$$
If we seek fixed points of $f^n$, different from zero, they satisfy 
$(a^n -1) +\cdots + z^{2^n-1} =0$. 
The product of the $2^n-1$ roots of this equation is $a^n-1$. 
Hence, there is at least one root with modulus 
smaller or equal to 
$|a^n-1|^{1/(2^n-1)}$. 
It is possible to find numbers $\omega \in \RR-\que$ such that 
$$
\liminf_{n\to\infty} \,[\dist (n\omega,\nat)]^{1/(2^n-1)} =0\ .
$$
Hence, the $f$ above has periodic orbits different from zero in any 
neighborhood of the origin. 
This is a contradiction with $f$ being conjugate to an irrational rotation 
in any neighborhood of the origin. 
This shows that the perturbation expansions may diverge if the rotations 
are very well approximated by rational numbers. 

For complex polynomials in one variable it has been shown 
\cite{Yoccoz95}, (see also \cite{Perezmarco92}) 
that if $\omega$ does not satisfy the 
Brjuno conditions  \eqref{Brjunocondition} below, the series for
the quadratic polynomial diverges. 
The Theorem \ref{Siegel1} which we will prove 
later will establish that if the condition 
is met,  then the series for all the non-linearities converges.


We say that $\omega$ satisfies  
a Brjuno condition when there exists an 
$\Omega$ increasing and log convex (the later properties are 
just for convenience and can always be adjusted ) such that
\mar{Brjunocondition} 
\begin{eqnarray}
&&\Omega (n)  \ge \sup_{k\le n} |a^k -1|^{-1} \nonumber\\
&&\sum_n \frac{\log \Omega (2^n)}{2^n} <\infty\  \iff 
\sum_n \frac{\log \Omega (n)}{n^2} <\infty\   
 \label{Brjunocondition} 
\end{eqnarray}  
The equivalence of the two forms of the condition is very easy from 
Cauchy test for the convergence of series.
An example of 
functions  $\Omega(n)$ satisfying  \eqref{Brjunocondition} is:
\[
\Omega(n) =
\exp( A n/(\log(n) \log\log(n) \cdots [\log^k(n)]^{1+\epsilon}) )
\] 
for large enough $n$, 
where by $\log^k$ we denote the function $\log$ applied $k$ times.

Indeed, \cite{Yoccoz95} shows that if
$\omega$ fails to satisfy the condition \eqref{Brjunocondition}
then $f(z) = e^{2 \pi i \omega} z  + z^2$ is not linearizable 
in any neighborhood of the origin. 

\begin{remark}
In \cite{Yoccoz95} one can find the result that if, 
a function $f(z)$  with $f(0) = 0$ , 
$f'(0) = a$, with $|a| = 1$ is not
linearizable, near $0$, then, the 
quadratic function
$a z + z^2$  is not linearizable.

See also \cite{Perezmarco00}.
\end{remark}

In the case of one dimensional  variables, 
one can use the powerful theory of 
continued fractions to express the Brjuno condition in an equivalent 
form.

If $\omega \in \RR - \que$ can be written 
$ \omega = [a_0,a_1,a_2,\cdots,a_n,\cdots]$ 
with $a_i \in \nat^+$,
we call $[a_0,a_1,\cdots,a_n] = p_n/q_n$ 
the convergents. 

Brjuno condition is equivalent to 
\mar{brjuno2}
\begin{equation}\label{brjuno2}
\B(\omega) \equiv \sum_n (\log q_{n+1})/q_n \le \infty
\end{equation}

A very similar condition 
\mar{Perez-Marco2}
\begin{equation}\label{Perez-Marco2}
\sum_n (\log \log q_{n+1})/q_n \le \infty
\end{equation}
has been found in \cite{Perezmarco91} \cite{Perezmarco93}
to be necessary and sufficient for the existence of the 
Cremer's phenomenon of accumulation of periodic 
orbits near the origin in the sense that if condition
\eqref{Perez-Marco2}  is satisfied, then, all non-linearizable
functions 
have a sequence of periodic orbits accumulating at the
origin. If condition \eqref{Perez-Marco2} is not satisfied,
there exists a non-linearizable germ with no periodic 
orbits other than zero in a neighborhood of zero.

\begin{remark}
We note that the formula \eqref{brjuno2} has very interesting 
covariance properties under modular transformations. 
They have been used quite successfully in 
\cite{MarmiMY97}. 

Without entering in many details,  
we point out that another 
function very  closely related to the one 
we have defined 
satisfies (setting $\tilde \B(x) = +\infty$ when  $x \in \rational$)

\begin{equation}\nonumber
\begin{split}
\tilde \B(\omega) &= -\log(x) + x \tilde \B( 1/x) \quad x \in (0,1/2) \\
\tilde \B(\omega)(-x)&  = \tilde \B(x) \quad x \in (-1/2,0) \\
\tilde \B(\omega)(x+1) & = \tilde \B(x)
\end{split}
\end{equation}

Similar invariance properties are true 
for  the sum appearing in \eqref{Perez-Marco2}.
Nevertheless, it does not seem to have been investigated 
as extensively.


Unfortunately, this one dimensional 
theory does not have analogues in higher dimensions. 
Some preliminary numerical explorations   for the higher 
dimensional case were done in  \cite{Tompaidis96}.
\end{remark}

\begin{remark}
There is a very similar theory of changes 
of variables that reduce the problem to linear
-- or some canonical -- form 
for differential equations.  

Of course, these normalizations resemble 
the normalizations of singularity theory
and are basic for many applied questions such 
as {\sl bifurcation theory}.


Similarly, there is a theory of these questions 
in the $C^\infty$ or $C^r$ categories 
under assumptions, which  typically include  that there are no 
eigenvalues of unit length. This theory 
usually goes under the name of 
Sternberg theory. 

The reduction of maps and 
differential equations  to {\sl normal form} 
by means of changes of variables can also 
be done when the map is required to preserve a 
symplectic  -- or another geometric -- structure 
and one requires that the change of variables 
preserve the same structure.

We will not discuss much of these interesting theories. 
For more information on many of these topics
we refer to \cite{Brjuno89}, \cite{Bibikov79}.
\end{remark}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{Preliminaries}

In this Section, we will collect some background in analysis,
number theory and  (symplectic and volume preserving) geometry.
Experts will presumably be familiar with most of the material
and will only need to read this as it is referenced in the 
following text. 
Of course, this chapter is not  a substitute for manuals in 
geometry or on analysis. 
I have found  \cite{Thirring97}, 
\cite{AbrahamMarsden}, \cite{GuilleminP74}
useful for geometry
and \cite{Stein70} \cite{Katznelson76} useful for analysis. Many of the 
techniques are discussed in other papers in KAM theory
which we will mention as we  proceed.
Specially the papers \cite{Moser66a}, \cite{Moser66b} 
contain an excellent background in many of the analytical 
techniques.


In the previous discussion of Lindstedt series we saw that we had 
to  consider
repeatedly equations of the form 
$$L_\omega\varphi =\eta.$$ 
(The formal solution was given in \eqref{solution}.)


In this Section, we  will also  study equations 
$$ D_\omega\varphi = \eta, \ \ {\rm where \ } \ \ 
D_\omega = \sum_i \omega_i \frac{\partial}{\partial\theta_i} \ , 
$$
which also appears in KAM theory. 

A first step  towards obtaining  proofs of the KAM theorem
is to devise a theory of these equations. 
That is, find conditions in $\omega$ and $\eta$ so that the function 
defined by \eqref{solution} has a precise meaning. 

The guiding heuristic principles are very simple: 
\begin{itemize}
\item[1)] 
The smoother the function $\eta$, the faster its Fourier coefficients 
$\hat\eta_k$ decay.
\item[2)]
Some numbers $\omega$ are such that the 
denominators  appearing in 
the solution \eqref{solution} do not grow very fast with $k$. 
\item[3)]
Hence, for the numbers alluded to in 2), we 
will be able to 
make sense of the formal solutions \eqref{solution}
when the function considered is smooth.
\end{itemize}

We devote Sections \ref{prelimanalysis},
\ref{Diophantine_properties}, \ref{linear_estimates}
to making precise the points above. 
We will need to discuss number theoretic properties
(usually called Diophantine properties) that 
quantify how small the denominators  can be as a
function of $k$. We will also need to study 
characterizations of regularity in terms of 
Fourier coefficients.

Since the result in KAM theory depends on the geometric properties 
of the map -- as illustrated in \eqref{example1} and 
\eqref{example2} --  it is clear that we will need to understand
which geometric properties enter in the conclusions.
Moreover, many of the traditional proofs indeed use a geometric formalism.
Hence, we have devoted a Section  \ref{Geometric_structures}
to collect the facts we will need from differential geometry.


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\mar{prelimanalysis}
\subsection{Preliminaries in analysis} \label{prelimanalysis}

In modern analysis, it is customary to measure the regularity of a function  
by saying that it belongs to some space in a certain scale of spaces.
Some scales that are widely used on  compact manifolds are: 
$$
\begin{array}{rcl}
C^r &\equiv& \Big\{\eta \,\Big|\, 
D^r\eta \mbox{ is continuous, } \|\eta \|_{C^r} 
\equiv \max \, \big(\sup_x |\eta (x)|,\ldots,\\
&&\qquad\qquad \sup_x |D^r\eta (x)| \big) \Big\}\ ,\\
%
\noalign{\vskip6pt}
C^{r+\alpha}& \equiv& \bigg\{\eta \,\Big|\, \|\eta\|_{C^r}
\equiv \max \biggl(\sup_x |\eta (x)|,\ldots,\sup_x |D^r\eta (x)|,\\[4mm]
&&\qquad\qquad
\sup_{x\ne y}\ds \frac{|D^r\eta (x) -D^r\eta(y)|}{|x-y|^\alpha}\biggr)\bigg\}\ , \\

%
\noalign{\vskip6pt}
A_\delta &\equiv& \Big\{\eta \,\Big|\,
\eta\mbox{ analytic on } |\Im \theta|<\delta,
\mbox{ continuous on } |\Im \theta|\le\delta, \\
&&\qquad\qquad \|\eta\|_\delta \equiv 
\sup_{|\Im\theta|\le\delta}|\eta(\theta)|\Big\}\ , \\
%
\noalign{\vskip6pt}
H^s &\equiv& \Big\{\eta \,\Big|\, \eta\in L^2,\  
(-\Delta +1)^{s/2} \eta\in L^2,\ 
\|\eta\|_{H^s} \equiv \|(-\Delta +1)^{s/2}\eta\|_{L^2}\Big\}\ ,
\end{array}
$$
where
$r=0,1,2,\ldots$, $\alpha\in [0,1]$, $\delta\in \RR^+$, $s\in\RR$
and  we have used $\Delta$ to denote the Laplacian

Note that this notation (even if in wide usage) has certain ugly points. 
$C^{r+0}$ and $C^{r+1}$ are ambiguous and can be 
considered according to the first or the second definition.
Indeed, $C^{r+0}$ consider according to 
the two definitions agrees as a space 
(that is, the functions in one are functions in the 
other and the topologies are the same), but the norms differ
(they are equivalent). On the other hand, $C^{r+1}$ differs 
when the we interpret it in the first or in the second 
sense. To avoid that, we will use $C^{r + \Lip}$ 
to identify the second definition.

All the above scales of spaces have advantages and disadvantages. 
Against  $C^{r+\alpha}$ we  note that,
even if for $r = 0$,
these are the H\"older spaces which can
be defined in great generality (e.g.
metric spaces), when $r  \ge 1$, the definition needs to be 
done in a differentiable system of coordinates. This is because, for $r \ge 1$,
$D^r\eta (x)$ and $D^r\eta (y)$ are 
multilinear operators in $T_xM$ and $T_yM$, 
so that the differences in the definition are comparing 
operators in different spaces. 
Even though the different choices of coordinates lead to equivalent norms, 
some of the geometric considerations are somehow cumbersome. 
Also the composition operator --- ubiquitous  in KAM theory --- 
has properties which are cumbersome to trace in $C^{r+\alpha}$. 
For example, the mapping $x\to f(x+ \cdot)$ can be discontinuous in 
$C^{r+\alpha}$ when $f$ is $C^{r+\alpha}$. 

It is somewhat unfortunate that the notations $C^r$ ($r \in \nat$ )
and $C^{r +\alpha}$, ($r \in \nat, \alpha \in [0,1) \cup \{\Lip \}$
suggest that one can consider perhaps $C^s$ ($ s \in \real$) 
which includes both. 
If one proceeds in this way, 
one obtains very bad properties for the scale of spaces. 
In colorful words, ``the limit of the space $C^{k+\alpha}$ 
as $\alpha\to 0$ is not $C^{k}$''.  More precisely, 
several important inequalities such as interpolation 
inequalities which relate the different norms in a scale 
fail to hold. Many characterizations -- e.g. in terms of 
approximations by analytic functions -- break down 
for the case that $r$ is an integer.

A possible way of breaking up the unfortunate $C^r$ vs.\ $C^{r+\alpha}$ 
notation is to introduce the spaces called $\Lambda_r$ in \cite{Stein70}, 
or $\hat C^r$ in
\cite{Zehnder75}, \cite{Moser66a}, \cite{Moser66b}.
In general we define:
\begin{equation} \label{Lambdar}
\begin{split}
&\Lambda_0 = C^0\\
&\Lambda_1 = \Big\{ f\,\Big|\, \sup_{\substack{1>|h|>0\\ x\in\RR}}   
\frac{|f(x+h)+f(x-h)-2f(x)|}{h} 
\equiv \|f\|_{\Lambda_1}<\infty\Big\} \\
&\Lambda_r = \{f \mid  Df\in\Lambda_{r-1}, \ \|f\|_{\Lambda_r} 
= \max (\|f\|_{C^0},\|Df\|_{\Lambda_{r-1}})\Big\}\   \ r \in \nat \\
&\Lambda_{r+\alpha} = C^{r +\alpha} \qquad r+\alpha \not\in \nat
\end{split}
\end{equation}
Here $[r]$ means the integer part of $r$ and 
$\{r\}$ means the fractional part of $r$.

There are many reasons why the $\Lambda_\alpha$
spaces are the natural scale of spaces to 
consider when one is considering 
a space that includes the usual $C^{r + \alpha}$.
For example, one can obtain very nice approximation theory, interpolation 
inequalities, and generalize naturally to several variables. 
Note that 
$$
C^1 \varsubsetneq C^{0+\Lip}\varsubsetneq \Lambda_1\ .
$$
Again, we point out that it is not easy to define these spaces on 
manifolds except through patches. 
Choosing different patches leads to different norms.
Fortunately, all of them are  equivalent and, hence
define the same topology in the spaces. 


Note that $C^r$ norms can be defined naturally on any smooth 
Riemannian  manifold.  
(The norm of derivatives can be defined since it is the norm of 
multilinear operators in the tangent bundle.)

The main inconvenience of $C^r$ ($r$ is, by assumption,
an integer) is that the characterization 
by Fourier series is rather cumbersome. 
It is easy to show integrating by parts that 
\begin{eqnarray*}
\hat\eta_k & \equiv & \int_0^1 \eta (\theta) e^{-2\pi ik\theta}\,d\theta\\
&=& (-2\pi i)^{-r} k^{-r} \int_0^1 D^r \eta (\theta)e^{-2\pi ik\theta}\, 
d\theta\ .
\end{eqnarray*} 

Hence, if $\eta \in C^r$, we have
\mar{inequality1}
\begin{equation}\label{inequality1}
\sup_k \bigl( |\hat\eta_k|\, |k|^r \bigr) \le C_r \|\eta\|_{C^r}\ .
\end{equation}
where $C_r$ is a constant that depends  only on $r$. 

In the other direction, we have  for 
any $\delta > 0$
\mar{inequality2}
\begin{equation}
\begin{split}
\label{inequality2}
\|\eta\|_{C^r} 
& \le  \tilde C_r \sum_k |\hat\eta_k|\, |k|^r 
 = \tilde C_r \sum_k \frac{1}{|k|^{1+\delta}} \, 
            |\hat\eta_k|\, |k|^{r+1 +\delta} \\
& \le  \tilde C_r \left( \sum_{k} \frac{1}{|k|^{1+\delta}} \right) \, 
            \sup_k \bigl(|\hat\eta_k|\, |k|^{r+1+\delta}\bigr)  \\
&\le \tilde{\tilde C}_{r,\delta} 
\sup_k \bigl(|\hat\eta_k|\, |k|^{r+1+\delta}\bigr) \ .
\end{split}
\end{equation}

Both inequalities \eqref{inequality1}, \eqref{inequality2} 
are essentially optimal in the following sense.
Inequality \eqref{inequality1} is saturated by trigonometric polynomials,
while
the usual square wave --- or iterated integrals of it --- shows that 
it is impossible to reduce the exponent on the right hand side of 
\eqref{inequality2} to $r+1$. 
This discrepancy is worse
when we consider functions on $\TT^d$, $d>1$. 
In that case, to obtain convergence of the series, 
in \eqref{inequality2} one 
needs to take  $\delta > d$.
This shows that studying regularity in terms of just the size of 
the coefficients will lead to less than optimal results.

\begin{exercise} 
Show that given any sequence $a_n$ of positive numbers converging to 
zero, the set of continuous functions $f$
with $\limsup_k |\hat f_k| / a_k = \infty $ is 
residual in $C^0$.
\end{exercise}


The spaces of analytic functions $A_\delta$ are better behaved 
in  respect of characterizations of norms of the function 
in terms of its Fourier coefficients.
Integrating over an appropriate contour, we have Cauchy inequality 
\mar{inequality1analytic}
\begin{equation}\label{inequality1analytic}
|\hat\eta_k| \le e^{-2\pi \delta |k|} \|\eta \|_{A_\delta}\ .
\end{equation}
On the other hand, 
\mar{inequality2analytic}
\begin{equation}\label{inequality2analytic}
\begin{array}{rcl}
\|\eta\|_{A_{\delta-\sigma}}
& \le & \ds{\sum_{k\in\zed} e^{2\pi (\delta-\sigma)|k|} |\hat\eta_k|}\\[5mm]
&\le & \ds{\biggl( \sum_{k\in\zed} e^{-2\pi\sigma|k|}\biggr) \sup_k 
e^{2\pi \delta |k|} |\hat\eta_k|}  \\[5mm]
&\le & \ds{C\sigma^{-1} \sup_k e^{2\pi \delta|k|} |\hat\eta_k|}
\end{array}
\end{equation} 

Of course, for Sobolev spaces, the characterization in terms of Fourier 
coefficients is extremely clean: 
$$\|\eta\|_{H^s} = \biggl( \sum_{k\in\zed} 
(|k|^2 +1)^s |\hat\eta_k|^2\biggr)^{1/2}\ .$$

Sobolev spaces have other advantages. 
For example, they are very well suited for numerical work and they also 
work nicely with partial differential operators. 
Many of the  tools that we  used in 
$\Lambda_\alpha$ spaces also carry through to Sobolev spaces.


For example, we have  the interpolation inequalities:
\mar{Sobolev-interp}
\begin{equation} \label{Sobolev-interp}
\|u\|_{H^{j}} \le K \| u \|_{H^m}^{j/m} \|u\|_{H^0}^{1 - j/m} \ .
\end{equation}

This inequality is a particular case of the 
following Nirenberg inequality
\mar{nirenberg}
\begin{equation}\label{nirenberg}
\| D^i u \|_{L^r(\RR^n)} \le
C \| u \|_{L^p(\RR^n)}^{1 - i/m} \cdot \|D^m u \|_{L^q(\RR^n)}^{i/m}\ ,
\end{equation}
where $1/r = ( 1 - i/m)(1/p) + (i/m)(1/q)$.
We refer to \cite{Adams75}, p.~79. 

These interpolation inequalities both for $\Lambda_\alpha$ 
and for Sobolev spaces are part of the more general 
``complex interpolation method" and the scales of 
spaces are ``interpolation spaces".  Even if this is 
quite important for certain problems of  analysis in these spaces, we will
not go into these matters here.


As we will see later, some of the abstract versions of KAM as an implicit 
function theorem work perfectly well for Sobolev spaces. 
I think it is mainly a historical anomaly that these spaces are not 
used more  frequently in  the KAM theory of dynamical 
systems.  (Notable exceptions are
\cite{Herman86}, \cite{KatznelsonO89a}.) Of course, for the applications of 
Nash-Moser theory to PDE's or geometric problems, Sobolev spaces are used 
quite often.

One of the most useful tools in the study of $C^{r+\alpha}$ spaces 
is that they can be 
characterized by their approximation properties by analytic functions. 

The following characterization of $\Lambda_r$ spaces (remember that they 
agree with the H\"older spaces $C^{[r]+\{r\}}$ when $\{r\}\ne0$) comes 
{from} \cite{Moser66a,Moser66b} 
(see also \cite{Zehnder75}, Lemma~2.2).


\mar{characterization}
\begin{lemma}\label{characterization}
Let $h\in C^0(\TT^1)$. 
Then $h\in\Lambda_r$ if and only if for some $\sigma >0$ we can find a 
sequence $h_i\in A_{\sigma2^{-i}}$ such that 
\begin{itemize}

\item[i)] $\|h_i-h_0\|_{C^0}\to 0$ 
\vspace{2mm}
\item[ii)] $\sup_{i\ge1} (2^{ir}\|h_i-h_{i-1}\|_{A_{\sigma 2^{i-1}}})<\infty$

\end{itemize}
\end{lemma} 

Moreover, it is possible to arrange that the sup in ii) is 
equivalent to $\|h\|_{\Lambda_r}$  if one chooses the $h_i$ appropriately.

If we denote  the sup in ii) by $M$, we have that for $h\in\Lambda_r$ it is 
possible to  find a sequence 
$h_i$ in such a way that $M\le C_{\sigma,r}\|h\|_{\Lambda_r}$.
Conversely, for any 
sequence $h_i$ as above, we have $\|h\|_{\Lambda_r}
\le \tilde C_{\sigma,r}M$.
Given a function $h\in\Lambda_r$ there are canonical ways of producing 
the desired $h_j$. 
For example, in \cite{Stein70} and \cite{Krantz83} 
is shown that one can use
convolution with the Poisson kernel  to produce 
the $h_j$.
In that case, the sup in ii) can be taken to define a norm equivalent 
to $\NORM_{\Lambda_r}$. 

Another important feature of the $\Lambda_\alpha$ spaces is 
that they admit a very efficient approximation theory.

The first naive idea that occurs to one 
when trying to approximate a function by a smoother one is just 
to  expand  in Fourier series and to keep only a finite number of terms 
 corresponding to the harmonics of small degree.
Indeed, for some methods of proof of the KAM theorem 
that emphasize geometry this is the method of choice. 
(See Section \ref{Arnoldmethod}.) 
Unfortunately, keeping only a 
finite number of the low order  Fourier terms
is a  much  less efficient method of approximation
(from the point of view of the number of derivatives 
required)
than convolving with a 
smooth kernel. 
Recall that summing a Fourier series is just convolution with the 
Dirichlet kernel, 
\begin{eqnarray*}
\sum_{k=-N}^N \hat\eta_k e^{2\pi ik\theta} 
& = & \int_0^1 \eta (\varphi)\D_N (\theta-\varphi)\,d\varphi
= (\eta*D_N)(\theta)\\
\D_N(\theta) & = & \frac{\sin(2N+1)\pi\theta}{\sin\pi\theta} \ , 
\end{eqnarray*}
which is large and oscillatory and hence generates more oscillations 
upon convolution than smooth kernels.

Hence the method of choice of approximating functions 
by smoother ones is to 
choose an  positive analytic function
$K: \real^d \rightarrow \real$ 
decaying at infinity rather fast and
with integral $1$ and define 
$K_t(x) \equiv \frac{1}{t^d} K(x/t)$.

We define smoothing  operators 
$S_t$ by  convoluting with the kernels $K_t$. That is:
$$
S_t \phi = K_t * \phi.
$$


The properties of these smoothing  operators that are 
useful in KAM theory are (we express them in terms 
of the $\Lambda_r$  spaces introduced in 
\eqref{Lambdar}):

\mar{smoothing}
\begin{equation}\label{smoothing}
\begin{array}{ll}
\mbox{i)}&\quad 
\lim_{t\to\infty} \|S_t u-u\|_{\Lambda_0} = 0\ , \quad u\in\Lambda_0\\
\noalign{\vskip6pt}
\mbox{ii)}&\quad 
\|S_t u\|_{\Lambda_\mu} \le t^{\mu-\lambda} C_{\lambda\mu} \|u\|_{\Lambda_\lambda}\ ,
\quad u\in\Lambda_\lambda,\  0\le\lambda\le \mu\\
\noalign{\vskip6pt}
\mbox{iii)}&\quad 
\|(S_t-1)u\|_{\Lambda_\lambda} 
\le t^{-(\mu-\lambda)} C_{\lambda\mu} \|u\|_{\Lambda_\mu}\ ,
\quad x\in\Lambda_\mu\ , 0\le \lambda\le\mu
\end{array}
\end{equation}
We note that a slightly weaker version of these properties is:
\mar{smoothinganalytic}
\begin{equation}\label{smoothinganalytic}
\begin{array}{ll}
\mbox{ii$'$)}&\quad \|S_tu \|_{\Lambda_{t^{-1}}}
 \le k(\ell) \|u\|_{\Lambda_\ell}\qquad 
t\ge 0\\
\noalign{\vskip6pt}
\mbox{iii$'$)}&\quad \|(S_\tau-S_t) u\|_{\Lambda_{\tau^{-1}}} 
\le t^{-\ell} k(\ell)\|u\|_{\Lambda_\ell}
\qquad u\in\Lambda_\ell\quad \tau \ge t\ge1
\end{array}
\end{equation}
Note that it is easy to show that ii) $\Rightarrow$ ii$'$), 
iii) $\Rightarrow$ iii$'$). 
In \cite{Zehnder75} operators $S_t$ satisfying \eqref{smoothing} are said to 
constitute a $C^\infty$ smoothing and those satisfying i), ii$'$), iii$'$) 
a $C^\omega$ smoothing.  

There are other smoothing operators and other 
scales of spaces that satisfies the same  inequalities.
Indeed, in the most abstract version of KAM theory,
which we discuss in  Section \ref{Implicitfunction}, one 
can even abstract these properties and obtain a general 
proof which also applies to many other situations.

\medskip
One important consequence of 
the existence of smoothing operators is 
the existence of interpolation inequalities
(see \cite{Zehnder75}).
Even if this inequality were  proved directly long
time ago, and can be obtained by different methods, 
it is interesting to note that they are a consequence of 
the existence of smoothing operators. As we mentioned, this 
happens in other situations and for other spaces than 
$\Lambda_r$.
In the following, we denote
$\| u \|_{r}  \equiv \| u \|_{\Lambda_r}  $.


\begin{lemma}\label{lem:interpolation}
For any $0 \le \lambda \le \mu$,
$0 \le \alpha \le 1$,
denoting 
$$
\nu = (1-\alpha)\lambda +\alpha\mu
$$
we have for any $u \in \Lambda_\mu$:
\mar{interpolation}
\begin{equation} \label{interpolation}
\|u\|_{\nu}  \le
C_{\alpha,\lambda,\mu} \|u\|_\lambda^{1-\alpha}\,
\|u\|_\mu^{\alpha}
\end{equation}
\end{lemma}

\proof
We clearly have:

$$\|u\|_\nu \le \|S_tu \|_\nu + \|(\Id - S_t)u\|_\nu\ .$$
Applying $ii)$ of 
\eqref{smoothing} to the first
term  and $iii)$ to the second, we obtain: 
$$\|u\|_\nu \le t^{\nu-\lambda} C_{\alpha\lambda,\mu}\|u\|_\lambda 
+ t^{-(\mu-\nu)} C_{\alpha\mu,\nu} \|u\|_\mu$$
and we obtain \eqref{interpolation} 
by optimizing the right hand side in $t$.

\qed

These inequalities are descendents of inequalities for derivatives 
of functions which were proved, in different versions,
by Hadamard and Kolmogorov and others.
For $\Lambda_r$, $r\notin\nat$ and for $C^r$, $r\in\nat$, the proofs can 
be done by elementary methods and extend even to functions defined 
in Banach spaces \cite{LlaveO99}.  For analytic spaces, these interpolation 
inequalities  are classical in complex analysis and 
are a consequence of the fact that the $\log | f(z)| $ is 
sub-harmonic when $f(z)$ is analytic
\cite{Rudin87}.

In KAM theory the interpolation inequalities
\eqref{interpolation} are useful because if we have a smooth norm 
$(\NORM_\mu)$ blowing up and a not so smooth one $(\NORM_\lambda)$ going 
to zero, we can still get that other norms smoother than $\lambda$ 
still converge.  


All the above results about 
$\Lambda_\alpha$ spaces 
of functions on the real 
line  can be generalized 
to spaces of functions on $\RR^n$.
Indeed, one of the nicest
things of these spaces is that the theory 
for them can be reduced to the study of one 
dimensional restrictions of the function. 
We refer to \cite{Stein70, Krantz83} for more details.

For analytic spaces, the theory can be 
also extended with minor modifications. 
In KAM theory we often have to 
consider functions defined in 
$\torus^m \times \real^n$ (often $n = m = d$).
In such a case, it is very convenient to 
use expansions  which are Taylor expansions in the real 
variables and Fourier expansions in the angle:
\begin{equation} 
\label{FourierTaylor}
f(\theta, I)  = \sum_{j \in \nat^n, k \in \integer^m}
f_{j,k} I^j \exp( 2 \pi i k \cdot \theta) 
\end{equation} 

For these functions, it is convenient to define norms
\begin{equation}\label{analyticnorm}
\| f\|_\sigma =
\sup_{|I | \le e^{2 \pi \sigma}, |\Im(\theta)| \le \sigma}  
|f(\theta, I)|
\end{equation}

With this definition, we have the Cauchy bounds 
\begin{equation}  
\label{Cauchyn} 
\begin{split}
|f_{j,k} | &\le \exp( -2 \pi \delta( |j| + |k|) ) \|f\|_\sigma\\
\left|\left|\frac{\partial^{|r| +|s|}}
{ \partial I^r \partial \theta^s} f \right|\right|_{\sigma - \delta} 
& \le  C_{r,s,n,m} \delta^{-|r| -|s|} 
\| f\|_\sigma  
\end{split}
\end{equation}

The proof of these inequalities is quite standard
in complex analysis and will not be given in detail here.
It suffices to express the derivatives as integrals 
over  an $n+m$ dimensional torus which is close to 
the boundary of the domain  in which 
$f(\theta, I)$ is controlled by $\|f\|_\sigma$. 
The only subtlety is that 
for some $l \in \{ 1,\ldots, m\}$,
$k_l > 0 $ one needs to choose 
the torus $\Im(\theta_l) = -\sigma$. 
(Similarly for the case when $k_l < 0$
one needs to choose the torus
$\Im(\theta_l) = \sigma$.)

It is also obvious that, under  these supremum norm 
the spaces constitute a Banach  algebra, 
that is:
\begin{equation}\label{banachalgebra}
\| f g \|_\sigma \le \| f\|_\sigma \|g\|_\sigma  \ .
\end{equation}
Therefore, if $\|f\|_\sigma<1$, then 
$\| (1+f)^{-1} \|_\sigma \le (1-\|f\|_\sigma)^{-1}$.

\subsection{Regularity of functions defined in closed sets. The 
Whitney extension theorem}\label{Whitney}


In KAM theory, we often have to study functions defined in 
Cantor sets. In particular, sets with empty interior.
In this situation, the concept of 
Whitney differentiability plays an important role. 

A reasonable notion of smooth  functions 
in closed sets is that they are the restriction 
of smooth functions in open sets that contain them.
This definition is somewhat unsatisfactory since 
the extension is not unique. 

In the paper \cite{Whitney34a}, one can find an 
intrinsic characterization of smooth functions in 
a closed set.   

\begin{definition}
We say that a function $f$ is $C^k$ in the sense of 
Whitney in a compact set $F \subset \real^d$ 
when for every point  $x \in F$ we can find 
polynomials $P_x$ of  degree less that  $k$ 
such that 
\begin{equation} \label{whitneybounds}
\begin{split}
& f(x) = P_x(x)  \quad x \in F \\
& | D^i P_x(y) - D^i P_x(x)| \le |x -y|^{r-i}  \sigma(|x -y|) \quad x, y \in F
\end{split}
\end{equation}
where $\sigma$ is a function that tends to zero. 
\end{definition}

It is clear that if a function is the restriction of 
a $C^k$ function the  Taylor polynomials will do. 

The deep theorem of \cite{Whitney34a} is that the converse is 
true. That is,
\begin{theorem}\label{thm:Whitney}
Let $F \subset \real^d$ be a compact set.

If for a function $f$  we can find  
polynomials satisfying \eqref{whitneybounds} and 
such that $f(x) = P_x(x)$
then the function $f$ can be extended to 
an a $C^r$ function in $\real^d$.
\end{theorem}

Note that if a function is $C^r$ in $\real^d$, then 
one can find polynomials satisfying 
\eqref{whitneybounds} by taking just the Taylor expansions
of $f$. 


Contrary with what happened  with the ordinary 
derivatives, the polynomials satisfying \eqref{whitneybounds}
may not be unique.  (For example, if we take  $F$ to be the 
$x$-axis in $\real^2$, we can take polynomials with a 
a very different behavior in the $y$ direction.)

There are other variants of the definitions in which 
rather than using  $D^i P_x$ one introduces another   polynomial
$P^i_x$ which is then, required to satisfy compatibility 
conditions with the other polynomials. 

Another variant useful for KAM theory appears in 
\cite{LlaveV00}. It roughly states that, for Cantor sets with 
a certain geometric structure, one just needs to verify 
\eqref{whitneybounds} for $i = 0$.


The assumption  that $F$ is compact can be removed. 
It suffices to require \eqref{whitneybounds} in each 
compact subset of $F$, allowing $\sigma$ to depend 
on the compact subset.

In \cite{Stein70} one can find a version of this 
theorem in which the extensions can be implemented
via a linear extension operator. 
(There is a different extension operator $\E_k$ for each  $k$.)
In \cite{Stein70}, one can also find versions for $C^{k+ \alpha}$. 
The $C^\infty$ version can be found in \cite{Whitney34b}.

Even if adapting Whitney's theorem from real valued function to 
functions taking values in a Banach space is well known, 
(e.g \cite{Federer69} p. 225 ff.)
I do not know  how to prove a similar result when
$F$ lies on an infinite dimensional space.


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\mar{Diophantineproperties}
\subsection{Diophantine properties} \label{Diophantine_properties}

In this Section, we want to study the existence of vectors $\omega\in\RR^n$ 
so that we can obtain upper bounds of $[\dist(\omega\cdot k,\nat)]^{-1}$
and of 
$|\omega\cdot k|^{-1}$ when $k\in\zed^n - \{0\}$. 
These are the small divisors that appear respectively 
in the solution of the equations 
\eqref{diff}, \eqref{der},
which appear often in KAM theory. 

When we are studying problems such as those in Section~\ref{Siegel}, 
we need only to consider $k\in\nat^n$. 

When $n=1$ for \eqref{diff} (and for $n=2$ for \eqref{der})
one can get quite good results using classical tools of number theory, 
notably continued fractions, which we will not review here, in spite of their 
importance in 1-dimensional dynamics. 

For example, the classical result of Liouville states 

\mar{Liouville}
\begin{theorem}\label{Liouville}
Let $\omega\in \RR-\que$ satisfy $P(\omega) =0$ with $P$ a polynomial 
of degree $\ell$ with integer coefficients. 
Assume that $P'(\omega)=0,\ldots,P^{(j}(\omega)=0$, $P^{(j+1}(\omega)\ne0$. 
Then for some $C>0$
\begin{equation}\label{Diophantinebounds}
\Big| \omega -\frac{m}n\Big| \ge Cn^{-\ell/(j+1)} \qquad \forall\ m,n\in\zed\ .
\end{equation}
\end{theorem}


\begin{proof} 
The zeroes of polynomials are isolated, hence $P(\frac{m}{n})\ne0$ 
when $\frac{m}{n}$ is close enough to $\omega$. 
This together with the fact that $n^\ell P(\frac{m}{n})\in\zed$ 
implies that $|n^\ell P(\frac{m}{n})|\ge1$ 
and, therefore, $|P(\frac{m}{n})-P(\omega)| \ge n^{-\ell}$. 
On the other hand, by the Taylor's theorem,
$$
\Big|P\Big(\frac{m}n\Big)-P(\omega)\Big| 
\le C\Big|\omega-\frac{m}n\Big|^{j+1} 
$$
for some $C>0$.
(The \RHS is the remainder of Taylor's theorem.)
This yields the desired result for $\frac{m}{n}$ close to $\omega$. 
For $\frac{m}{n}$ far from $\omega$, the result is obvious.
\end{proof} 

Theorem~\ref{Liouville} was significantly improved by Roth,
 who showed that, if $\omega$ is 
an algebraic irrational,
$|\omega - \frac{m}n|\ge C_\ep n^{-2-\ep}$ for every $\ep > 0$. 

The numbers that satisfy the equation \eqref{Diophantinebounds}
in  the conclusions of 
Theorem~\ref{Liouville} are quite important 
in number theory and in KAM theory and are 
called Diophantine. As we will see in Lemma~\ref{Diophantine},
Diophantine
numbers occupy positive measure, hence, there
are some of them which do not satisfy the 
hypothesis of Theorem \ref{Liouville}.


\mar{def:Diophcond}
\begin{definition}\label{def:Diophcond}
A number $\omega$ is called 
{\em Diophantine} of type $(K,\nu)$
for $K>0$ and $\nu\geq1$, if 
\mar{diophantinebound}
\begin{equation}\label{diophantinebound}
\left| \omega - \frac pq \right| > K \, |q|^{-1-\nu}
\end{equation} 
for all $\frac pq \in \que$. 
We will denote by $\D_{K,\nu}$ the set of
numbers that satisfy 
\eqref{diophantinebound}.
We denote by $\D_\nu = \cup_{K> 0} \D_{K,\nu}$.
 
A number which is not Diophantine 
is called a {\em Liouville number}.
\end{definition}


The numbers $\omega$ for which 
$|\omega-\frac{m}n|\ge Cn^{-2}$ are called ``constant type'' 
and the previous result shows that quadratic irrationals are constant type. 
It is an open problem to decide whether $\root 3 \of 2$ is constant 
type or not.  Indeed, it would be quite interesting 
to produce any non-quadratic algebraic number 
which is of constant type.

In higher dimensions, there are two types of 
Diophantine conditions that appear in 
KAM theory, namely:
\begin{eqnarray}
&|\omega \cdot k |^{-1} \le C |k|^\nu \quad  \forall k \in \integer^n - \{0\}  
\label{diophantineflow} \\
&|\omega \cdot k - \ell |^{-1} \le C |k|^\nu \quad 
 \forall (k, \ell) \in \integer^n \times \integer - \{ (0,0) \}
\label{diophantinemap}
\end{eqnarray}

The first condition \eqref{diophantineflow}
 appears when we consider the KAM theory
for flows, the second one \eqref{diophantinemap} when we consider KAM theory 
for maps. As we will see the arguments are very similar in both 
cases.
\begin{remark}
One important difference  between these Diophantine 
conditions is that the first 
condition  \eqref{diophantineflow} is maintained
-- with only different constants -- 
if the  vector $\omega$ is multiplied by a constant. 
Nevertheless, the second one is not.  Indeed, if we 
take advantage of this to set one of the coordinates to 1,
then, we see that \eqref{diophantineflow} becomes 
\eqref{diophantinemap} for the vector in one dimension less
obtained by keeping the coordinates not set to $1$.

The arguments that study geometry of these Diophantine conditions 
are identical.
Nevertheless, we point out that 
 the scale invariance of \eqref{diophantineflow} 
will have some consequences later, namely
that KAM tori for flows often appear 
in smooth one-dimensional families, whereas those for maps
are isolated.
\end{remark}


For us, the most important result is 

\mar{Diophantine}
\begin{lemma}\label{Diophantine}
Let $\Omega :\RR\to\RR$ be an increasing function satisfying 
\mar{summability}
\begin{equation}\label{summability}
\sum_{r=1}^\infty \Omega(r)^{-1} r^{n-1} <a(n)
\end{equation} 
where $a(n)$ is an explicit function of the dimension $n$. 
Then the set $\D_\Omega$ of $\omega\in\RR^n$ such that
\mar{denbound}
\begin{equation}\label{denbound}
\biggl( \inf_{\ell\in\nat} |\omega\cdot k-\ell|\biggr)^{-1} 
\le \Omega(|k|)\ \qquad \forall\ k\in\zed^n \setminus \{0\} 
\end{equation}
has the property that, given any unit cube $\C$ 
\mar{unitcube}
\begin{equation}\label{unitcube}
|\C \cap \D_\Omega| \ge 1- a(n)^{-1}  
\sum_{r=1}^\infty \Omega (r)^{-1} r^{n-1}
\end{equation} 
where $|\enspace|$ denotes the Lebesgue measure.

\end{lemma} 

Note that when we take $\Omega(|k|) = K^{-1} |k|^\nu$,
\eqref{denbound} reduces to \eqref{diophantinebound}.
The condition \eqref{summability}  is satisfied for $\nu > n$ and 
for $K$ sufficiently big.
This shows that the set of 
Diophantine numbers 
$\D_\nu$ has full measure for $\nu > n$.
Indeed
\mar{density}
\begin{equation}\label{density}
| \C \cap \D_{K,\nu} | \ge 1 - K b(\nu,n)
\end{equation}
  

\begin{proof} 
We will denote by $\sigma_n$ constants that depend only on the 
dimension~$n$. The same symbol can be used for 
different constants.

For $k\in \zed^n\setminus\{0\}$, $\ell\in\zed$ we consider the set 
$$
\B_{k,\ell} = \{\omega\in\RR^n \ | \ |\omega\cdot k-\ell| 
\le \Omega (|k|)^{-1}\}
$$ 
consisting of the $\omega$'s for which the desired inequality 
\eqref{denbound} fails precisely for $k,\ell$. 
The desired set will be the intersection of the complements of these sets. 

Geometrically $\B_{k,\ell}$ is a strip bounded by parallel planes which 
are at a distance $2\Omega(|k|)^{-1}|k|^{-1}$ apart 
(see Figure~\ref{parallel-fig}).
%
\mar{parallel-fig}
\begin{figure}
\centerline{\epsfysize=4.0truein\epsfbox{llave-fig2.eps}}
\label{parallel-fig}
\caption{}
\end{figure}
%
Thus, given a unit cube $\C\in\RR^n$, the measure of $\C\cap 
\B_{k,\ell}$ cannot exceed $\sigma_n\Omega (|k|)^{-1}|k|^{-1}$.

We also observe that given $k\in \zed^n - \{0\}$, there is only a finite 
number of $\ell$ such that $\C\cap \B_{k,\ell} \ne\emptyset$. 
Indeed, this number can be bounded by $\sigma_n|k|$. 

Therefore, for any $k\in\zed^n\setminus\{0\}$ 
$$
\sum_{\ell\in\zed} |\B_{k,\ell} \cap \C| \le \sigma_n \Omega(|k|)^{-1} \ ,
$$
hence, 
\begin{equation} 
\begin{split}
1 - |\C \cap \D_\Omega| 
 =  & \sum_{k\in\zed^n} \sum_{\ell\in\zed\setminus\{0\}} 
|\B_{k,\ell} \cap \C| \\
& \le  \sigma_n \sum_{k\in\zed^n\setminus \{0\}} \Omega(|k|)^{-1}  \\
 & \le  \sigma_n \sum_{r=1}^{\infty} \Omega(r)^{-1} r^{n-1} \ .
\end{split}
\end{equation}

Under the hypothesis that  the \RHS of the above 
equation is smaller than $1$, the conclusions hold.
\end{proof} 

An important generalization of the above argument 
\cite{Pjartli69} leads to the conclusion 
that  a submanifold of Euclidean space
that has curvature (or torsion or any other 
higher order condition) in such a way that planes
cannot have a high order tangency to it
(see below or see the references)
then the submanifold  has to contain Diophantine numbers.
Even if the proof is relatively simple, the abundance of 
Diophantine numbers in lower dimensional curves has 
very deep consequences since it allows  one to
reduce the number of free parameters needed in 
KAM proofs.

\begin{lemma}\label{pjartli}
Let $\Sigma$ be a compact  $C^{l+1}$ submanifold of 
$\real^n$. 

Assume that at every point $x \in \Sigma$ of the manifold
\begin{equation}\label{torsion}
T_x\Sigma + T^2_x \Sigma + \cdots + T^l_x \Sigma = T_x \real^n
\end{equation}
where by $T^j_x \Sigma$  
we denote the $j$ tangent plane to $\Sigma$.

Then, we can find a constant $C_\Sigma$ that depends only on 
the manifold  such that:
$$
| \Sigma - \D_\Omega| \le  C_\Sigma \sum_{r=1}^\infty \Omega(r)^{1/l} r^{n -1}
$$
where by $| \cdot |$ we denote the Riemannian volume of the  
manifold.
\end{lemma} 

The geometric meaning of the  hypothesis 
\eqref{torsion} is that the manifold is 
not too flat and that it has  curvature 
and torsion  (or torsion of 
high order) so that every neighborhood of a  point  has to 
explore all the directions in space.
In particular, we will have a lower bound  on the area of 
the portion of the manifold that can be 
trapped in a resonant region, which in the 
space of $\omega$ is  a flat plane.

The remaining details of the
proof is left as an exercise for  the interested reader. 
See also the lectures on number theory in this volume.
The proof follows by noting that  because of \eqref{torsion}
we can bound the measure of the 
regions $ \sum_{l \in \integer} \Sigma \cup \B_{k,l} \le C_\Sigma
\Omega(k)^{-1/l}$.
The worst case happens when the manifold is tangent to a very high order 
to one of the resonant regions. Since the order of tangency 
-- as well as the constants involved -- are uniformly bounded, 
we obtain the desired result.

\begin{remark} 
Notice that the formulation of 
the Diophantine properties 
\eqref{diophantineflow} and \eqref{diophantinemap}
also makes sense if we allow $\omega$ to take 
complex values. This sometimes appears when 
we study complex maps and it is a useful 
tool. Notice that  the argument we have presented 
works very similarly for the case of $\omega$ taking 
complex values. Indeed, the  norm of the inverse 
can be bounded by the norm of inverse of the real 
part (or the norm of the inverse of the imaginary 
part) so, when the real or imaginary parts 
of an $\omega$ vector are Diophantine, the vector
is Diophantine.

Sometimes, when studying problems with polynomials 
we will also need the inequalities  
only for $k \in \nat^n$. Needless to say, these 
are much easier to satisfy since the signs 
have less possibilities to compensate and lead to 
small numbers.
\end{remark}

\begin{exercise}
Construct a complex vector which is Diophantine, 
but whose imaginary and real parts are not 
Diophantine.
\end{exercise}

\begin{remark}
The same simple minded  argument used in the proof of 
Lemma \ref{Diophantine} can be used to obtain 
estimates not only on the Lebesgue measure of 
the set of Diophantine numbers but also other 
geometric properties (for example Hausdorff measure),
of sets satisfying Diophantine properties,
and that are 
forced to belong to a manifold, have a resonance, 
etc.
\end{remark}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\subsection{Estimates for the linearized equation} \label{linear_estimates}


In this subsection, we will consider estimates for the
following equations  \eqref{der}, \eqref{diff} that occur
very frequently in KAM theory. 
We have encounter them  already in the 
study of Lindstedt series and we will encounter them 
again as linearized equations.

We will consider equations of the form:
\mar{der}\mar{diff}
\begin{gather}
D_\omega \varphi =\eta\qquad \left( D\omega \equiv \omega_1 
\frac{\partial}{\partial\theta_1} + \cdots 
+ \omega_n\frac{\partial}{\partial\theta_n}\right) \label{der}\\[2mm]
\begin{array}{rcl}
L_\omega\varphi =\eta\quad&&\Bigl(L_\omega \varphi (\theta_1,\ldots,\theta_n) 
\\[2mm]
&&\quad \equiv \varphi (\theta_1 +\omega_1,\ldots,\theta_n+\omega_n) 
- \varphi (\theta_1,\ldots,\theta_n)\Bigr)\ ,
\end{array}\label{diff}
\end{gather}
where $\eta :\TT^n\to\RR$ is given and  the unknown function 
to be found is  $\varphi$. 

For the sake of simplicity we will only discuss in detail \eqref{der}. 
The same considerations apply for \eqref{diff} and we will indicate 
the minor differences -- in fact simplifications  -- that 
enter in the discussion of \eqref{diff}.

Recall that these equations have a formal solution in terms of Fourier series. 
Namely, if 
$$\eta (\theta) = \sum_{k\in\zed^n} \hat\eta_k 
e^{2\pi i k\cdot\theta}\ ,
\qquad \hat\eta_0=0\ ,$$
then any reasonable solution  of \eqref{der}
for which one can define unique Fourier coefficients 
(e.g. any distribution) has 
to satisfy: 
$$
\hat\varphi_k 2\pi i k\cdot\omega = \hat\eta_k\ .
$$
Hence, if $k\cdot\omega\ne0$, then\mar{solution2}
\begin{equation}
\label{solution2}
\hat\varphi_k = \frac{\hat{\eta}_k}{2\pi i k\cdot\omega}\ .
\end{equation}
We restrict our attention to cases when $k\cdot\omega\ne0$ for any 
$k\in\zed^n-\{0\}$. 
In that case $\varphi$ is determined by \eqref{solution2} up to an additive 
constant since we can take any $\hat\varphi_0$. 
To avoid unnecessary complications, we will set $\hat\varphi_0=0$.


It is not difficult to see that, unless we impose some quantitative 
restriction on how fast 
$|k \cdot\omega|^{-1}$ 
can grow, the solutions given by (\ref{solution2}) may fail to be even 
distributions. 
E.g., take $\hat\eta_k = e^{-|k|}$ and arrange that there are 
infinitely many $k$ for which 
$|k \cdot\omega|^{-1} \ge e^{e^{|k|}}$.

\begin{exercise}
Given any sequence $a_n$ of positive terms tending to infinity
construct an $\omega \in  \real^n - \rational^n $ 
such that, for infinitely many $k \in \integer^n $
\begin{equation}
|\omega \cdot k |^{-1} \ge a_{|k|}.
\end{equation}

Show that the $\omega$ constructed above are dense
(even if, as we have shown,  they will be of measure zero for 
sequences $a_n$ which grow fast enough).
\end{exercise}


We will consider $\omega$ which satisfy\mar{Diophantinecond}
\begin{equation}
\label{Diophantinecond}
|k \cdot\omega|^{-1} \le \gamma |k|^\nu \ .
\end{equation}
These numbers were studied in Section~\ref{Diophantine_properties}. 

It is not difficult to obtain some crude bounds for analytic or finite 
differentiable functions (we will do better later). 
Recall that for $\eta\in A_\delta$ 
$$|\hat\eta_k| \le e^{-2\pi\delta|k|} \|\eta\|_{A_\delta}\ ,$$
while for $\eta\in C^r$ 
$$|\hat\eta_k| \le (2\pi)^{-r} |k|^{-r} \|\eta\|_{C^r}\ .$$
Hence, if $\omega$ satisfies \eqref{Diophantinecond}, we have for 
$\eta\in A_\delta$ 
$$|\hat\varphi_k| \le (2\pi)^{-1}\gamma |k|^\nu 
e^{-2\pi \delta|k|} \|\eta\|_{A_\delta}\ ,$$
and for $\eta\in C^r$ 
$$|\hat\varphi_k| \le (2\pi)^{-r-1} 
\gamma |k|^{\nu-r}\|\eta\|_{C^r}\ .$$
These estimates do not allow us to conclude that $\varphi$ belongs to the 
same space as $\eta$, but allow us to conclude that it belongs to a 
slightly weaker space. 

As mentioned before, the characterization of the analytic
spaces in terms of their Fourier series  is very 
clean, so that we can obtain 
estimates of the solutions in these spaces. 
Then,   we will use 
Lemma \ref{characterization} 
to obtain the results for $\Lambda_r$ spaces.

Since  for $0 < \sigma < \delta$ we have:
$$\|e^{2\pi i k\cdot\theta}\|_{\delta-\sigma} 
\le e^{2\pi (\delta-\sigma)|k|}\ ,$$
we have\mar{analytic}
\begin{equation}
\label{analytic}
\begin{array}{rcl}
\|\varphi\|_{A_{\delta-\sigma}} 
&\le &\ds \sum_{k\in\zed^n\setminus\{0\}} |\hat\varphi_k| 
e^{2\pi |k| (\delta-\sigma)} \\
\noalign{\vskip6pt}
&\le &\ds \sum_{k\in\zed^n\setminus\{0\}} 
\frac1{2\pi |k\cdot\omega|} \|\eta\|_{A_\delta}
e^{-2\pi \sigma|k|}\\
\noalign{\vskip6pt}
&\le &\ds \frac{1}{2\pi}\gamma\|\eta\|_{A_\delta} \sum_{k\in\zed^n\setminus\{0\}} 
|k|^\nu e^{-2\pi \sigma|k|}\\
\noalign{\vskip6pt}
&\le &\ds C\gamma\|\eta\|_{A_\delta} \sum_{\ell\in\nat} 
\ell^{\nu  +n-1} e^{-2\pi \sigma\ell}\\
\noalign{\vskip6pt}
&\le &\ds C\gamma\sigma^{-(\nu+n)} \|\eta\|_{A_\delta}\ ,
\end{array}
\end{equation}
where in the fourth inequality we have just used that we do first the sum 
in the $k$ with $|k|=\ell$ (the number of terms in this sum can 
be bounded by $C\ell^{n-1}$). 
We denote by $C$ constants that depend only on $\nu$ and the dimension $n$ 
and are independent of $\gamma,k$, etc. 

Similarly, using that 
$$\|e^{2\pi i k\cdot\theta}\|_{C^s} \le C|k|^s\ ,$$
we have
\footnote{Here, $C$ depends on $s$ even if it is independent of 
$k$. We, however do  not include  the $s$
dependence in the notation to avoid clutter.}
\begin{eqnarray*}
\|\varphi\|_{C^s} 
&\le & C\gamma \|\eta\|_{C^r} \sum_{k\in\zed^n} |k|^{\nu-r+s}\\
&\le & C\gamma \|\eta\|_{C^r} \sum_{\ell\in\nat} 
\ell^{\nu -r+s+n-1}\ .
\end{eqnarray*}
The sum in the \RHS converges provided that 
$$r>\nu +s +n\ .$$

Actually, one can do significantly better that these 
crude bounds if one notices that 
the small divisors have to appear rather infrequently
(see \cite{Russmann75,Russmann76}).

Note that $\omega \cdot(k + \ell) = \omega \cdot k + \omega \cdot \ell$.
Hence, if $\omega \cdot k$ happens to be very small, 
$\omega \cdot(k + \ell) \approx  \omega \cdot \ell$,
so that if $|\ell| << |k|$, 
$\omega\cdot (k  + \ell) \approx \omega \cdot \ell$.

In other words, the really bad small divisors appear 
surrounded by a ball on which the divisors are not that 
small.  Hence,  if instead of estimating the size 
as in \eqref{analytic} using the estimates \eqref{diophantineflow}
in the third step we use a Cauchy-Schwartz inequality,
which  takes into account  the sum of 
terms, not just the 
the sup and that can profit from the 
fact that \eqref{diophantineflow} cannot 
be saturated very often, we obtain the result of 
\cite{Russmann75,Russmann76}, which reads:


\begin{lemma}\label{linearestimates}
Assume that
$\omega$ satisfies \eqref{diophantineflow}, 
with $\nu  \ge n-1$
and that 
$\tilde \omega$ satisfies \eqref{diophantinemap}.
Let $\eta,\tilde \eta$ be analytic functions 
with zero average.

Then, we can find $\varphi, \tilde \varphi$ solving 
\eqref{der}, \eqref{diff}.
Namely 
\begin{equation}
\begin{split}
&D_\omega \varphi = \eta \\
& L_{\tilde \omega} \tilde \varphi =  \tilde \eta.
\end{split}
\end{equation}
and $\varphi$, $\tilde \varphi$ have zero average.

Moreover, we have for all $\delta > 0$:
\begin{equation} \label{analyticcohomology}
\begin{split}
&\| \varphi \|_{\sigma - \delta} \le C \delta^{-\nu}   K_{\nu, n} \| \eta\|_\sigma \\
&\|  \tilde \varphi \|_{\sigma - \delta} \le C  \delta^{-\nu} K_{\nu, n} \|\tilde \eta\|_\sigma 
\end{split}
\end{equation}
Where the $C$ are the same constants that appear in \eqref{diophantineflow},
\eqref{diophantinemap} and $K$ are constants that depend
(in a very explicit formula) only 
on the exponent in \eqref{diophantineflow}, \eqref{diophantinemap}
and the dimension of the space.

If we assume that $\eta$, $\tilde \eta$ are in $\Lambda_r$, 
$r > \nu$, we obtain:
\begin{equation} \label{differentiablecohomology}
\begin{split}
& \| \varphi\|_{\Lambda_{r -\nu}} \le
 C K_{\nu,n}  \|\eta\|_{\Lambda_r} \\
& \| \tilde \varphi\|_{\Lambda_{r -\nu}} \le
 C K_{\nu,n} \| \tilde \eta\|_{\Lambda_r} 
\end{split}
\end{equation}
\end{lemma}

We just note that the part \eqref{differentiablecohomology}
is a consequence of \eqref{analyticcohomology} using the 
the characterization of 
differentiable functions by properties of the approximation 
by analytic functions in 
Lemma \ref{characterization}.  

When studying analytic problems, one can be sloppy with the 
exponents obtained and still arrive at the same result. 
However,
as \eqref{differentiablecohomology} shows, taking care of 
the exponents is crucial if we are studying finitely differentiable 
problems and want to obtain regularity which is close to optimal.


\begin{exercise} 
Read the argument in \cite{Russmann76}. Do you obtain 
some improvement using the  H\"older inequality in place 
of Cauchy-Schwartz? 
\end{exercise}


\begin{exercise}  \label{secondordereq}
In the study of Lindstedt series 
(e.g. \eqref{norder})
we encountered second order 
equations for $\varphi$
given $\eta$ of the form:
\begin{equation} \label{eq:secondordereq}
\varphi(x + \omega) + \varphi(x - \omega) - 2 \varphi(x)  = \eta(x)
\end{equation}
where $\varphi$ and $\eta$ are periodic and 
$\omega$ is a Diophantine number. 

Develop a theory of the equation 
\eqref{eq:secondordereq}
along the theory of the theory developed
in Lemma~\ref{linearestimates}.

Do it either by treating it directly in 
Fourier series or by 
factoring it as two 
equations:
\begin{equation}
\begin{split}
& w(x) - w(x- \omega) = \eta(x) \\
& \varphi(x +\omega) - \varphi(x) = w(x) 
\end{split}
\end{equation}

Are there any differences between the estimates or 
the solvability conditions you get by the two methods?

What happens if instead of using the naive estimates
presented in the text you use the estimates of 
\cite{Russman76}? 
\end{exercise}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\mar{Geometricstructures}
\subsection{Geometric structures} \label{Geometric_structures}

There are several structures that play an important role in KAM theory. 
In this Section, we will discuss symplectic and, more briefly,
 volume preserving and 
reversible systems 
(there are other geometric structures that have come to play a role 
in KAM theory, but we will not discuss them here).

In this Section, the emphasis will be on the geometric structures and not on 
the differentiability properties, so we will assume that vector fields 
generate flows, for which variational equations are valid, 
etc. (i.e., that they have some mild differentiability properties). 

Here we will use Cartan calculus of differential forms rather than the 
old-fashioned notation. 
Since Cartan calculus uses only geometrically natural operations, it is 
conceptually simpler. 
This is a great advantage in mechanics, where one frequently uses changes 
of variables, restriction to submanifolds given 
by regular values of the  integrals of motion, etc.. 

The traditional notation --- in which one writes functions as functions of  
the coordinates, e.g., $H(p,q)$ --- is perfectly adequate when the 
coordinates are fixed. 
On the other hand, when one changes coordinates, one has to decide whether 
$H(p',q')$ denotes the same function of new arguments or whether $H(p',q')$ 
is a different function of $p'$ and $q'$ which produces the same numerical 
value as the old function $H$ produced with the old variables $p$ and $q$. 
The ambiguity increases enormously when one needs to compute partial 
derivatives --- a great deal of the complications in traditional books and 
papers on mechanics and thermodynamics arises from this.

For KAM theory these considerations are not so crucial because many of the 
operations one has to perform require using Fourier coefficients and the like, 
which forces the fixing of a certain system of coordinates. 
Nevertheless, we think the conceptual simplification  
provided by the geometric notation is worth the effort required in 
introducing it.

Now let us start with some important definitions. 

\begin{definition}
A {\em symplectic structure\/} in a manifold is given by a 2-form 
$\omega_2$ satisfying the conditions 
\begin{itemize}
\item[i)] $\omega_2$ is non degenerate 
\item[ii)] $\omega_2$ is closed, i.e., $\d\omega_2=0$.
\end{itemize}
A {\em volume form\/} in a manifold of dimension $n$ is an $n$-form 
$\omega_n$ that satisfies 
\begin{itemize}
\item[i$'$)] $\omega_n$ is non degenerate.
\end{itemize}
\end{definition}

Naturally, an $n$-form $\omega_n$ in an $n$-dimensional manifold 
automatically satisfies 
\begin{itemize}
\item[ii$'$)] $\d\omega_n=0$.
\end{itemize}

Much of the geometric theory goes through just under the conditions i) and 
ii) --- or i$'$) and ii$'$). 
When we do not need to distinguish between the symplectic and the volume 
preserving cases, we will use $\omega$ to denote either 
$\omega_2$  or $\omega_n$. 

Properties i) and i$'$) allow us to identify a vector field $v$ with a $1$- 
and $(n-1)$-form, respectively, by \mar{identification}
\begin{equation}
\label{identification}
\i_v \omega_2:=\omega_2(v,\cdot)=\gamma_1\ ,\qquad \i_v \omega_n =\gamma_{n-1}\ .
\end{equation}
We will denote the identifications \eqref{identification} 
by $\I_{\omega_2}$ and $\I_{\omega_n}$, respectively. 

Fundamental examples of a symplectic  form $\omega_2$ on $\RR^k\times \RR^k$ 
and a volume form $\omega_n$ on $\RR^n$ are \mar{examples}
\begin{equation}
\label{examples}
\begin{array}{rcl}
\omega_2 & = &\ds \sum_{i=1}^k \d p_i \wedge \d q_i\ ,\\
\noalign{\vskip6pt}
\omega_n & = &\ds \d x_1 \wedge \ldots \wedge \d x_n \ .
\end{array}
\end{equation}

\begin{remark} 
The name symplectic seems to have been originated as a pun
on the name complex. Indeed, there is a sense in which 
symplectic geometry is a  complexification of 
Riemannian geometry. This is actually quite deep 
and there is a wonderful new area of research using 
methods of  complex analysis in symplectic topology. 

Since these notes are focused on 
KAM theory, it suffices to note that 
in mechanics one often finds
the matrix $J  \equiv
= \left( 
\begin{array}{cc}
 0 & \Id_d \\
 -\Id_d  & 0
\end{array} 
\right) 
$
which satisfies $J^2 = -1$ and which, therefore,
is  quite analogous  to multiplication by
$i$ in complex analysis. 
\end{remark}


The identification of vector fields with forms plays a very important role 
because it allows us to describe the vector fields whose flow preserves the 
structure. 
Denote by $\Phi_t$ a family of diffeomorphisms of the manifold 
generated by the time-dependent vector field $v_t$, i.e., 
$$
\frac{d}{dt} \, \Phi_t = v_t \circ \Phi_t\ ,\qquad \Phi_0 =\Id\ .
$$
In particular, if $v_t$ is independent of $t$, $\Phi_t$ is a flow: 
$\Phi_{t+s} = \Phi_t\circ \Phi_s$. 
(Again we recall that in this Section we are assuming the objects to be 
differentiable enough, in this case $v_t$ to be $C^1$.)

Using the definition of Lie derivative, Cartan's so called  ``magic formula''
to express the Lie derivative
\begin{equation}\label{magicformula}
L_X \gamma = d (\i_X  \gamma) + \i_X ( d \gamma), 
\end{equation} the 
closedness of $\omega$ and the definition of $\I_\omega$ we obtain:
$$
\frac{d}{ds}|_{s = 0} \Phi_{t+s}^{*}\omega  
= \Phi_t^* L_{v_t} \omega 
= \Phi_t^* ( \d \,\i_{v_t}\omega + \i_{v_t}\,\d\omega ) 
= \Phi_t^* \, \d \, \I_\omega v_t \ .
$$
Thus, if $\omega$ is invariant under the flow $\Phi_t$ (i.e., 
$\Phi_{t}^*\omega = \omega$), we conclude that $\I_\omega v_t$ is closed. 

The above result is quite interesting because the $\Phi_t$-invariance 
of $\omega$ seems at first sight to be a non-linear and non-local 
constraint for the flow $\Phi_t$. 
The vector field $v_t$ is perfectly linear and local. 

Of particular importance for KAM 
theory  are the vector fields (called {\em exact 
symplectic}, resp.\ {\em exact volume preserving}) 
for which $\I_\omega v_t$ is exact, i.e., 
$$\I_\omega v_t = \d\gamma_t$$
with $\gamma_t$ a function (symplectic case) or an $(n-2)$-form 
(volume preserving case). Sometimes these are called 
{\sl globally Hamiltonian} vector fields to indicate 
that one can find a Hamiltonian that generates them 
globally and not only locally. All
the flows that preserve the symplectic or volume 
structure can be expressed locally 
as a Hamiltonian flow, but perhaps not globally. We will come back to this in more
detail when we consider some extra structure of the space.


Of course, when we are considering local problems, by Poincar\'e's lemma, 
we do not need to distinguish between symplectic and exact symplectic 
vector fields. 

In the symplectic case, for \eqref{examples}, 
we have that $\I_{\omega_2}v \equiv \i_v\omega_2 = - \d H$ 
reduces to the standard Hamilton's equations
$$
v_{p_i} = - \frac{\partial H}{\partial q_i}\ ,\qquad 
v_{q_i} = \frac{\partial H}{\partial p_i}\ .
$$
The function $H$ is called the {\em Hamiltonian} 
of the vector field $v$. Vector fields satisfying locally 
$\i_v\omega_2 = - \d H$ for some function $H$ 
are called {\em locally Hamiltonian vector fields}. 
If the function $H$ can be defined globally, 
the vector field $v$ is called {\em globally Hamiltonian}. 

An important consequence
of the preservation of symplectic or volume form 
is that if a diffeomorphism 
$f$ preserves the form $\omega$ and $\i_X\omega = - \d H$, 
we have
\begin{equation}
\label{canonicaltransformation}
\i_{f_*X}\omega  = \i_{f_*X}f_*\omega = f_* (\i_X \omega) = 
- f_* \d H= - \d f_* H = - \d(H\circ f^{-1}) \ ,
\end{equation}
so that $f_*X$ is also a Hamiltonian flow for $H\circ f^{-1}$. 

In old fashioned language, this was described as saying that ``canonical 
transformations preserve the form of Hamilton's equations'' or some similar 
sentence.   (In old fashioned books the name canonical 
transformations referred to diffeomorphisms preserving 
the symplectic form, or sometimes to what we have 
referred to as exact symplectic.)

The importance of the
formula \eqref{canonicaltransformation}
is that to make canonical changes of variables 
to  a Hamiltonian vector field, it suffices to make changes of variables in 
the Hamiltonian functions. 
This is conceptually much simpler and computationally more efficient. 
As we will see, canonical perturbation theory owes its success to this remark. 
Note that this calculation goes through both for symplectic and volume forms. 
Using Cartan calculus, it is possible to develop perturbation theories for 
symplectic and volume preserving flows which are completely analogous. 

Notice that in 2 dimensions the volume form and the symplectic structure 
are the same and that, when $n=2k$,
$$\omega_n^{\wedge k} := 
\omega_2\wedge \omega_2\wedge\cdots\wedge \omega_2\qquad 
\mbox{($k$ times)}$$ 
is a volume form. 

Clearly, a flow that preserves $\omega_2$, also 
preserves $\omega_n^{\wedge k}$. 
This fact is usually referred to in mechanics
as {\em Liouville's theorem} and is of 
fundamental importance since it makes a connection of 
mechanics with ergodic theory. Indeed, ergodic theory 
was introduced in the study of the relations of this observation 
with statistical mechanics.

In the  study of Hamiltonian flows, 
it is also of interest to  study the form
$\mu_E$ defined in the regular
energy surfaces $\Sigma_E = \{ H = E\} $  --
assumed that $d H $ is not degenerate so that it is 
an smooth manifold --
by $\omega^{\wedge k} = \mu_E \wedge d H $. 
Since $H$ is invariant under the flow, so is 
$d H$ and $\mu_E$ is invariant.


The intermediate forms, $\omega_2\wedge\cdots\wedge\omega_2$ ($\ell$ times, 
$\ell<k$) are also invariant. 
It seems that not much use has been made of them 
(\cite{Poincare}, Chapters~XXII--XXVII is devoted to this question). 

One of the first consequences of the identification 
between vector fields and forms \eqref{identification}
is a simple proof of the Darboux theorem. (See
\cite{McDuffS95}. The original proof along this 
lines was done for the volume case in 
\cite{Moser65}.)

\begin{theorem}\label{thm:darboux}
Given a symplectic or volume preserving form $\omega$ and a point $x_0$, 
there exists a local diffeomorphism $f$ on a neighborhood of $x_0$ to 
$\RR^n$ such that $f^* \omega$ is of the form in \eqref{examples}.
\end{theorem}


Note that the Darboux theorem implies that there are no local symplectic 
or volume invariants 
(so that the recent but already very rich theory of symplectic 
invariants and obstructions is eminently global).
Moreover, an argument of \cite{Moser65}
shows that for volume preserving geometry 
in a compact manifold
the only invariant is the total volume. 
This is in great contrast with Riemannian geometry where the 
``theorema egregium'' of Gauss shows that there are local invariants for 
isometry \footnote{ Note that the condition
$d \omega = 0$ is some sort of curvature condition, 
so that  perhaps it is fairer to compare 
symplectic geometry to a sort of Riemannian 
geometry of flat manifolds.}.

Of particular importance for KAM theory will be the study of exact 
transformations. 
They can only be defined on manifolds where $\omega$ is exact, i.e., 
manifolds for which
$$
\omega = \d\theta\ .
$$
One important example is 
$$
\omega_2 = \sum_{i=1}^k \d p_i\wedge \d q_i\ ,\qquad 
\theta = \sum_{i=1}^k p_i\,\d q_i
$$ 
with $q\in \TT^k$, $p\in \RR^k$, so that $M= \TT^k\times \RR^k$. 

More generally, if $M= T^* N$ is the cotangent bundle of the $k$-dimensional 
manifold $N$, and $\pi :T^*N\to N$ is the projection, one can define 
$\theta$ intrinsically as the only 1-form in $T^*N$ with the property 
$$\gamma^*\theta =\gamma$$
for all 1-forms $\gamma$ on $N$ \cite{AbrahamMarsden}, Pro.~3.2.11. 
(Here, $\gamma$ is considered as a map from $N$ to $T^*N$, satisfying 
$\pi\circ\gamma=\Id$, so that $\gamma^*$ maps the 1-forms in $T^*N$ into 
1-forms in $N$.) 
One can easily check that this is equivalent to the standard prescription 
of taking a local trivialization of $T^*N$ with coordinates $(p,q)$ 
and then setting $\theta = \sum_{i=1}^k p_i\,\d q_i$. 
One needs to check that the definition is independent 
of the system of coordinates chosen.

For volume preserving maps, our main example will be 
$$
M= \TT^{n-1}\times\RR\ ,\qquad \theta 
= p\,\d q_1 \wedge \cdots \wedge \d q_{n-1}\ ,
$$ 
where $(q_1,\ldots,q_{n-1})\in \TT^{n-1}$, $p\in \RR$. 
We note that given $\omega$, $\theta$ is determined up to a closed form. 

When $\omega$ is exact (i.e., $\omega=\d\theta$) we use that 
$f^*\d=\d f^*$ to obtain that for any diffeomorphism $f$ 
preserving $\omega$ 
$$
\d (f^*\theta - \theta) =0\ .
$$
We say that the $\omega$-preserving diffeomorphism $f$ 
is {\em exact\/} when 
\begin{equation}
\label{exact}
f^*\theta -\theta = \d S\ .
\end{equation}

Once we fix $\theta$, $S$ is defined up to a form of zero exterior 
derivative; in the symplectic case, this means up to a constant.


Conversely, it turns out that the function 
$S$ determines to a large extent the diffeomorphism.
If we know $S$ in the whole manifold and 
and the diffeomorphism restricted to a {\sl Lagrangian}
submanifold, it is possible to reconstruct the 
diffeomorphism in the whole manifold.
(See \cite{Haro00}.)

In exact symplectic  (or volume) manifolds ($\omega = d \theta$), there is 
a very close relationship between exact families and hamiltonian 
flows.   Families of exact diffeomorphisms are 
generated by a Hamiltonian flow and vice versa. 

To show the first statement, note that 
if $f_t$ is an smooth family, we have that 
${f_t}^* \theta - \theta = \d S_t$ and we can choose $S_t $ 
smooth in $t$. 
If we take derivatives of this relation with respect to $t$ 
and introduce the vector field $\F_t$ generating $f_t$ by 
$\frac{d}{dt}f_t = \F_t \circ f_t $, we have
\begin{equation}
  {f_t}^* L_{\F_t} \theta  = \d \dot S_t   \ ,
\end{equation}
where $L$ denotes the Lie derivative and $\dot S_t = \frac{d}{dt} S_t$.
Using Cartan's formula for the Lie derivative, we have 
\begin{equation}
  {f_t}^* [ \d \i_{\F_t} \theta  + \i_{\F_t}  d \theta ]
  = \d \dot S_t \ .
\end{equation}
Therefore,
\begin{equation} 
  \i_{\F_t} \omega = \d \, [  ({f_t}^*)^{-1}\dot S_t - \i_{\F_t}\theta] \ .
\end{equation}
Hence, we conclude that a family of exact 
maps is generated by a Hamiltonian flow of 
Hamiltonian given by the formula:
\begin{equation} 
  H_t =  \i_{\F_t}\theta - ({f_t}^*)^{-1} \dot S_t  \ .
\end{equation}
In the exact symplectic case, the last formula reads 
$$
H_t =  \i_{\F_t}\theta - \dot S_t \circ f_t^{-1} \ .
$$

The converse is proved by a very similar calculation. 
Note that if we are given $H_t$ and an exact $\omega$-preserving 
diffeomorphism $f_0$ with an initial primitive $S_0$, 
and $f_t$ is generated by the Hamiltonian flow of $H_t$, 
then the deformation $f_t$ is also exact, 
and the primitive $S_t$ satisfies the differential equation 
\begin{equation} 
  \dot S_t = f_t^* ( \i_{\F_t}\theta - H_t )  \ .
\end{equation}


Notice that the obstruction for a symplectic or volume preserving 
diffeomorphism $f$ to be exact is just the 
cohomology class with real 
coefficients of $f_{*}\theta-\theta$. 
For example, in the map we considered before, 
$$
f(q,p) = (q,p+a)
$$ 
with $a$ a constant $n$-vector, we have
$$
\begin{array}{rcl}
f_{*}\theta_1-\theta_1 & = &\ds \sum_{i=1}^k a_i\,\d q_i\ ,\\
f_{*} \theta_{n-1} - \theta_{n-1} 
& = & a\,\d q_1\wedge\cdots\wedge \d q_{n-1}\ .
\end{array}$$
In this case, one can see that the cohomology obstruction vanishes if and 
only if the flux that we considered in 
Definition \ref{flux} vanishes.


If $f$ is a diffeomorphism close to the identity in the $C^r$ ($r=1,2,\ldots,
\infty$) topology, it is not hard to show for $M$ as in the example that 
there is an exact family of vector fields interpolating with the identity. 

This  can also be proved for analytic functions. 
however,  it is  far from trivial. See \cite{KuksinP94}.

The reason why exactness plays an important role in KAM theory can be 
understood from the simple example
(already mentioned) in $\RR\times\TT$, 
$$
f(p,q) = (p+\ep,q+p)\ , \qquad f^n(p,q) = (p+n\ep,q+np)\ ,
$$
which does not admit any quasiperiodic orbits for $\ep\ne0$ 
(all the orbits escape to infinity).

A consequence of great importance for us later is that if we choose 
a function, resp.\ an $(n-1)$-form, $\gamma$ 
and form a vector field by 
$$
v= \I_\omega\,\d\gamma\ ,
$$
then the time one map of the vector field, $\Phi_1$, is exact. 
This gives a convenient way to generate transformations close to the identity. 


Since commutators of vector fields are an
ingredient of the variational equations, it 
is quite interesting to study how 
commutators interact with the geometric 
structures (volume forms and symplectic).

Recall that the commutator of two vector fields 
can be considered  as the commutator of the vector fields
considered as differential operators.  
That is, the commutator of $C^1$ vector fields is 
defined as  
\begin{equation}\label{commutatorformula}
[ X, Y ] = XY - YX,
\end{equation}
when we consider the vector fields as first order differential 
operators in a manifold i
without boundary. (It is somewhat surprising, but of course true, 
that the  commutator is 
first order operator, the \RHS of
\eqref{commutatorformula} looks like 
a second order operator!)
The commutator can also be defined 
as 
$$
[X,Y] = \lim_{t \to 0} t^{-2}
( Y_{-t}\circ X_{-t}  \circ Y_{t} \circ X_t - \Id )
$$
where $X_t$ denotes  the flow generated by 
$X$ and similarly for $Y$. We  have also taken the usual 
liberty of employing additive notation rather than a more geometric one
to denote comparisons.


The following well known  result 
relates the commutators to geometry. 
We have followed the presentation of 
\cite{BanyagaLW96}.


\begin{lemma}
\label{commutator}
Let $\omega$ be a non-degenerate closed form as before.
\begin{itemize}
\item[(i)] 
If $X$, $Y$ are locally Hamiltonian vector fields, 
then $[X,Y]$ is a globally Hamiltonian vector field 
with Hamiltonian  $\i_Y (\i_X\omega) = \omega(X,Y)$.
\item[(ii)] 
If $X$ has $H$ as a Hamiltonian and $Y$ is locally Hamiltonian,
then $-L_Y H$ is a Hamiltonian for $[X,Y]$.
\item[(iii)] 
If $Y$ has $F$ as a Hamiltonian and $X$ is locally Hamiltonian,
then $L_X F$ is a Hamiltonian of $[X,Y]$.
\end{itemize}
\end{lemma}

\proof
First recall the identities $L_X\d\alpha=\d L_X\alpha$ and 
$$
\i_{[X,Y]}\alpha = L_X\i_Y\alpha - \i_Y L_X\alpha 
$$
which are valid for each $m$-form $\alpha$ and vector fields $X$ and $Y$. 
Also, observe that a locally Hamiltonian vector field $X$ satisfies
$$
L_X\omega =0
$$
which follows easily from Cartan's ``magic formula'' \eqref{magicformula}.

To prove (i), compute:
\begin{eqnarray*}
\i_{[X,Y]}\omega &=& L_X \i_Y\omega - \i_Y L_X\omega = L_X \i_Y\omega \\
&=& \i_X\d\,\i_Y\omega + \d\,\i_X\i_Y\omega = \d\,\i_X\i_Y\omega \\
&=& \d(\omega(Y,X)) = -\d(\omega(X,Y)) \ .
\end{eqnarray*}

For (ii), we know that $\i_X\omega = - \d H$ and use (i):
\begin{eqnarray*}
-\d(-L_Y H) &=& L_Y(\d H) = - L_Y\i_X\omega 
= -\i_Y\d\,\i_X\omega - \d\,\i_Y\i_X\omega \\
&=& \i_Y\d (\d H) - \d(\omega(X,Y)) = \i_{[X,Y]}\omega \ .
\end{eqnarray*}

The proof of (iii) is analogous to that of (ii): 
from $\i_Y\omega = -\d F$ we obtain
\begin{equation*}
-\d(L_X F) = -L_X(\d F) = L_X\i_Y\omega 
= \i_{[X,Y]}\omega \ .
\end{equation*}
\qed


Let $X$ and $Y$ be Hamiltonian vector fields with Hamiltonians 
$H$ and $F$, respectively 
(i.e., $\i_X\omega=-\d H$, $\i_Y\omega=-\d F$). 
The {\em Poisson bracket} of $H$ and $F$ is defined as
\mar{Poisson-def}
\begin{equation}
\label{Poisson-def}
\{H,F\} := - L_X F \ , 
\end{equation}
or, equivalently, as
$$
\{H,F\} = - \d H (X) = (\i_Y\omega)(X) = \omega(Y,X) \ .
$$
The antisymmetry of $\omega$ yields
\begin{equation} \label{Poisson}
\{H,F\} = - \{F,H\} 
\end{equation}
as well as the formula $\{H,F\} = L_Y H$. 

In coordinates, 
\begin{equation}
\label{Poisson-coordinates}
\{H,F\} = \sum_{i=1}^{k} 
\left( \frac{\partial H}{\partial q_i} \, \frac{\partial F}{\partial p_i} - 
  \frac{\partial H}{\partial p_i} \, \frac{\partial F}{\partial q_i} 
  \right) \ .
\end{equation}

Using \eqref{Poisson-coordinates}, one can easily check that 
the Poisson bracket satisfies the Jacobi identity,
$$
\{H,\{F,G\}\} + \{F,\{G,H\}\}  + \{G,\{H,F\}\} = 0 \ , 
$$
which, together with the linearity and the antisymmetry of the Poisson 
bracket, implies that the functions on the phase space of 
a dynamical systems with the Poisson bracket are a Lie algebra. 
Moreover, the Poisson bracket is a derivation of this Lie algebra. 

This means:
\begin{equation}
\{H,\{F,G\}\} = \{H,F\} G + F \{H,G\}.
\end{equation}


The property (ii) (or (iii)) of Lemma~\ref{commutator} implies that 
the Hamiltonian vector field corresponding to $\{H,F\}$ 
is equal to $-[X,Y]$:
$$
\i_{[X,Y]} = \d \{H,F\} \ .
$$
This means that the map from the functions on the phase space 
to their Hamiltonian vector fields 
(i.e., $H\mapsto X$ such that $\i_X\omega=-\d H$) is a morphism 
of Lie algebras (the Lie-algebraic operations being respectively 
the Poisson bracket and the commutator of vector fields).


Note that $L_X F $ means the Lie derivative of 
$F$ along the flow of 
$X$
and, similarly, 
$L_Y H$ is the Lie derivative of 
$H$ along the flow of $Y$.

By the identities above, 
$$
L_X F = - L_Y H,
$$
which indicates
that the derivative of 
a Hamiltonian  form along the Hamiltonian flow
of another Hamiltonian form is related 
by  a sign change to the situations when the roles
are reversed. This is a somewhat surprising property
of Hamiltonian systems.


One way to look at the above calculations is to realize that the exact 
transformations are a group and that the vector fields $\I_\omega\,\d\gamma$ 
are a Lie algebra. 
(In the old fashioned  language, the vector fields of the form 
$\I_\omega\,\d\gamma$ were called ``infinitesimal transformations'' or, 
given that ``infinitesimal'' is a somewhat dirty word in some circles, 
``transformations close to the identity.'') 

Unfortunately, even if this point of view is heuristically correct, 
it is not without problems. 
First of all, composition of transformations is not a differentiable 
operation in almost any precise sense. 
Indeed, note that $f\circ (g+\Delta) - f\circ g\approx f'\circ g\Delta$, 
so that the derivative of composition should be a multiplication by 
$f'\circ g$. 
Hence, if we consider composition in a space as $C^r$, $\Lambda_\delta$, etc., 
then $f'\circ g$ may not belong to this space; if we consider 
$C^{r+\alpha}$ spaces, then the composition is not even continuous!

More importantly, the exponential of the Lie algebra does not cover an 
open neighborhood of the identity. 
That is, in any arbitrarily small neighborhood 
of the identity in $\Lambda_\delta$, 
there exist exact maps that cannot be written as time one maps of a 
differentiable vector field. 

Another important geometric structure that plays a role
in KAM theory is the so called  {\em reversible } systems. 
They appear in any applied problem in which time can be ``run backwards'' 
(i.e., if $\gamma(t) $ is a trajectory, then $\gamma(T-t)$ also is). 
This happens in mechanical problems without friction 
or in electric circuits without resistors and in other problems. 
Examples of reversible systems also appear in finite dimensional truncations 
of fluid mechanics problems when there is no viscosity. 
In general, physical problems in which there is no 
dissipation are often reversible. 
When the systems are not mechanical, there is no reason 
why we should have also a symplectic structure.  
In particular, in the example of circuits, it is 
possible to find interesting examples with odd
dimensions.

A map is said to be {\em reversible\/} when there exists an involution $R$ 
(that is, $R^2=\Id$) for which $A^{-1} = R^{-1}AR$, i.e., $A$ is 
conjugate by $R$ to its own inverse. 

Since $R^{-1}=R$, the above condition  can be expressed as 
$A^{-1} = RAR = RAR^{-1}$. 
Note also that reversibility implies that $S=AR$ is an involution. 
Hence, $A$ is a product of two involutions, $A=SR$.
One can also check that the product of two involutions is reversible with 
respect to either of them, so that one can just as well define 
a reversible map as the product of two involutions, even if this obscures 
the physical interpretation and the origin of the name. 

Sometimes one does not require that $R$ is an involution. 
These systems are sometimes called {\em weakly reversible\/}. 
The KAM theory only needs weak reversibility. 
(Actually, in many occasions that KAM theory applies, 
we can use KAM theory to show that the systems  are 
actually reversible.)

For flows, the definition is similar: the flow $f_t$ is {\em reversible\/} if 
there exists an involution $R$ such that $R^{-1}f_tR = f_t^{-1}=f_{-t}$. 
Taking derivatives, we obtain the reversibility condition in terms of the 
vector field $\F_t$ generating the flow: $R_* \F_t = -\F_t$.

One very important example of a reversible system is a mechanical 
system without friction whose forces depend only on the position of the 
particles. 
If we reverse the velocities and keep the positions the same, the system 
runs backwards.  Hence we can take $R(x,v) = (x,-v)$.
Clearly, $R$ is an involution.  
Reversible mappings have 
recently received a great deal of interest 
in the context of statistical mechanics since many 
slightly dissipative models are reversible. 
This reversibility leads to very amusing consequences
such as pairing rules for Lyapunov exponents.
See \cite{BonettoCP98} for some applications to Statistical Mechanics
and references.

Good  surveys of reversible systems in general are
\cite{Sevryuk86} and \cite{ArnoldS86} and recent developments in 
the  KAM theory for reversible systems are covered in 
\cite{Sevryuk98}. 


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\subsection{Canonical perturbation theory}\label{canonicalperturbation}

The goal of perturbation theory is to understand the dynamics of a 
``perturbed'' system which is close to another well understood system.
Usually  these well understood systems are chosen among
``integrable'' systems but this is not necessarily the case.  
As we will see in later proofs of KAM theorem, sometimes 
we want to take as unperturbed systems systems of 
a particular kind that have an interesting feature. 
In the case of integrable systems, the feature of interest 
for the study is quasi-periodic orbits.

The most naive approach to perturbation theory is to develop the solutions 
in powers of the perturbation parameter. 
That is, if we have a vector field 
$$X_\ep = X_0 + \ep X_1 + \ep^2 X_2 +\cdots \ , $$ 
we try to find solutions of 
$$
\dot x_\ep = X_\ep(x_\ep) \ , \qquad  x_\ep(0)=a(\ep) \, 
$$
by setting 
\begin{equation}\label{expansion}
\begin{split}
& x_\ep (t) = x_0 (t) + \ep x_1 (t) + \cdots \ ,  \\
& a(\ep) = a_0 + \ep a_1 + \ep^2 a_2 + \cdots \ ,
\end{split}
\end{equation}
substituting in the equation and solving. 

That is,
\begin{equation}\label{hierarchy}
\begin{array}{rcl}
\dot x_0 & = & X_0(x_0)\quad\hskip1.23truein ,\qquad x_0(0) =a_0\\
\noalign{\vskip6pt}
\dot x_1 & = & X_1(x_0) + DX_0(x_0)x_1\quad\qquad ,\qquad x_1 (0) = a_1\\
\noalign{\vskip6pt}
\dot x_2 & = & X_2(x_0) + DX_1(x_0)x_1 \quad\quad\quad ,\qquad x_2(0)=a_2\\
\noalign{\vskip6pt}
&& + DX_0(x_0)x_2 + \frac12 D^2X_0(x_0) x_1^{\otimes 2} \\
&& \cdots \ \quad .
\end{array}
\end{equation}

Provided that $X_\ep$ is analytic in $\ep$ and its argument,
this series was shown to converge by Cauchy 
(but before that, it was used regularly by Newton). 

Note that all the equations in the hierarchy have the form 
$$\dot x_n - DX_0(x_0) x_n = R_n \ ,\qquad x_n(0)=a_n \ , $$ 
where $R_n$ is a polynomial expression involving only terms $x_0\ldots x_{n-1}$
and (known!) derivatives of $X_i$.

In spite of its ancient pedigree and the theorems of convergence this method 
has shortcomings. 

It is an easy exercise that taking the second order problem 
$$\ddot x_\ep = - (1+2\ep + \ep^2) x_\ep \ , \qquad 
x_\ep (0) = 1\ ,\ \dot x_\ep (0) =0 \ , $$
the solution is 
$$\begin{array}{rcl}
x_0(t) & = & \cos t\\
\noalign{\vskip6pt}
x_1(t) & = & - t \sin t\\
\noalign{\vskip6pt}
x_2(t) & = & - t^2 \cos t \\
 & \cdots &  \ . 
\end{array}$$
This series indeed converges to the right solution $x_\ep(t) =\cos((1+\ep)t)$ 
as well as one can expect (it is entire in $\ep$ and in $t$) but, if one 
truncates, one can see that the approximate solution thus obtained blows up. 
Indeed, the more terms one takes, the more severe the blow~up is. 
On the other hand, the true solution remains bounded for all times. 

Hence, these series are unable to predict long term behavior, even in 
those extremely favorable examples where the function is linear and the 
solutions are entire. 
Of course, this phenomenon only becomes worse
if one considers other  more complicated non-linear problems. 

This phenomenon caused consternation when the phenomena above appeared 
in the study of the solar system and the instability of the solar system was 
confirmed to all orders in perturbation theory. 
The terms with powers in $t$ became dominant for $t$ of order of centuries, 
which gave then the name ``secular'' terms 
(in Latin ``saeculum'' means  -- among other things --  century). 

A more careful examination of the convergence proof and the quantitative 
estimates that lead to it, shows that one cannot trust this perturbation 
theory to order $n$ except when $\ep^n t\ll 1$. 
For non-entire perturbations, one should not use  this 
naive perturbation method except when $\ep t\ll 1$.
(Fortunately, we will be using some more effective methods 
that can give information on perturbations over longer time scales.)

The Lindstedt series we have seen in Section~\ref{linstedt} originated 
with the goal of obtaining a perturbation series that produced series which
were always periodic or quasi-periodic (that is, free of secular terms).

A much more effective method to ascertain the long term behavior of systems 
is the following:

We try to find transformations $g_\ep$ in such a way that 
\begin{equation}\label{transformation}
g_{\ep *} X_\ep = X_0 \ . 
\end{equation}
This method is not restricted to Hamiltonian systems. 
Indeed, the very influential book 
\cite{BogoliubovMitropolsky} 
develops many applications to non-Hamiltonian systems 
(one can also find there Lindstedt series for dissipative systems). 

This method is, however, very well suited for Hamiltonian systems 
because it is very easy to keep track of families of transformations 
of Hamiltonian systems and vector fields. 

In the case where $g_\ep$ are canonical transformations and $X_\ep$ are 
Hamiltonian vector fields (i.e., $\i_{X_\ep} \omega = \d H_\ep$), 
as we saw in (\ref{canonicaltransformation}), the equation 
(\ref{transformation}) reduces to 
\begin{equation}\label{perturbative}
H_\ep \circ g_\ep = H_0 \ .
\end{equation}

One should emphasize that in contrast with the more elementary ``secular
method'', the validity of this method is not limited by the length of the 
orbit but rather by whether the orbit leaves the region where the 
transformation $g_\ep$ is defined. 

In some cases, especially when there is some contraction 
(of course, this never happens for Hamiltonian systems), 
one can use the perturbation theory itself to show that 
this region is never left by the trajectories. 

Note that if \eqref{perturbative} is solved, then we have 
$$g_\ep^{-1} \circ \Phi_t^\ep \circ g_\ep = \Phi_t^0 \ , $$ 
where $\Phi_t^\ep$ and $\Phi_t^0$ denote the flows of $H^\ep$ and $H^0$, 
respectively. 

To solve \eqref{perturbative}, it is of paramount importance to parameterize 
the families $g_\ep$ in such a way that \eqref{perturbative} can be solved order 
by order.

One possibility followed in old fashioned books
(but not very practical in many applications) 
is to parameterize $g_\ep$ by their generating functions
(see Section \ref{generating}).
One shortcoming of generating functions is that
one needs to assume existence of a system of global coordinates
which are mixed variables
(or work in patches).
Another shortcoming of this method is that the rule of composition is awkward
and it involves solving implicit equations 
(see Section \ref{generating}).


Another alternative, which is more geometrical is that of the Lie series.
The basic idea is that we try to consider transformations as time one maps 
of Hamiltonian vector fields.  Some more detailed tutorials 
on Lie series are \cite{MeyerH92},\cite{Meyer91},\cite{DragtFinn}. 
Some reviews of canonical perturbation 
theory  from the point of view of Physicists including a
variety of applications are \cite{Cary81}, \cite{Omohundro86}.


It is customary to write the time one maps of a vector field $\L$ as 
$\exp (\L)$. 
This notation is motivated by the remark that the space vector fields can be 
considered as the Lie algebra of the space of diffeomorphisms. 
Also, if we identify $g$ with the operator 
$$
U_g : L^2(M) \to L^2 (M) \ : g \mapsto U_g \varphi = \varphi \circ g \ , 
$$
then $\exp (\L) = U_g$ in the usual sense of operator theory when $\L$ 
is a complete flow preserving volume. 

This notation is very suggestive and one would also like to use tools of Lie 
group theory such as Baker-Cambell-Hausdorff formula 
\begin{equation} \label{BCH}
\begin{split}
\exp (\ep \L) \exp (\ep \L^1) 
&= \exp \Bigl( \ep (\L+\L^1) + \ep^2 \frac12 [\L,\L^1] + \ep^3 \Tau_3 \\ 
& \qquad +\cdots + \ep^n\Tau_n+\cdots \Bigr) \ , 
\end{split}
\end{equation}
where $[\ ,\ ]$ denotes the commutator and $\Tau_n$  is a sum of iterated 
commutators of $\L,\L^1$. 

Even if the sums in \eqref{BCH} cannot be considered as convergent, the
formula can be justified in an appropriate weak sense (\cite{LlaveMM86}), 
and it is true that when applying the formula up to order $n$, we have, for 
sufficiently differentiable vector fields, 
\begin{equation}\label{asymptotic}
\begin{split}
&\left\|\left[\exp (\ep\L) \exp (\ep\L^1) - \exp 
\biggl( \sum_{n=1}^N \ep^N \Tau_n\biggr) \right] \varphi\right\|_{C^r} \le \\
&\qquad\qquad\qquad\qquad \le \ep^{N+1} C_{\L,\L^1,N}\|\varphi\|_{C^{N+r+2}} \ . 
\end{split}
\end{equation}
In spite of \eqref{asymptotic}, it is not true that 
$\exp\left(\sum_{n=1}^N \ep^n \Tau_n\right) \varphi$ 
converges as $N\to\infty$ even for an analytic $\varphi$ (a sketch of a proof 
will be given later). 
It is, however, not difficult to obtain bounds 
 $C_{\L,\L^1,N} \le (N!)^k$ for some $k>0$, 
so that \eqref{asymptotic} can be used quite quantitatively. 

In connection with \eqref{BCH} it is interesting to note that the commutator 
of two locally Hamiltonian vector fields is globally Hamiltonian 
(see Proposition~\ref{commutator}).
Hence, even if $\L$, $\L^1$ are only locally Hamiltonian, all the $\Tau_n$'s are 
globally Hamiltonian and can, therefore, be described by the Hamiltonian 
function. 

There are several variants of the method of Lie transforms that have been 
considered in the literature depending on how we write our candidate map 
in terms of exponentials (time-one maps) of Hamiltonian vector fields. 
In order of historical appearance some of the methods proposed in the 
literature are: 
\begin{align}
g_\ep & = \exp (\ep \L_1 + \ep^2 \L_2 + \cdots + \ep^n\L_n+\cdots) \ , 
\label{Deprit}\\[3mm]
g_\ep & = \cdots \exp (\ep^n\L_n) \cdots \exp (\ep^2\L_2)\exp (\ep\L_1) \ , 
\label{Dragt}\\[3mm]
g_\ep & = \cdots \exp\biggl( \sum_{i=2^n}^{2^{n+1}-1} \ep^i \L_i\biggr) 
\cdots \exp (\ep^3 \L_3 + \ep^2 \L_2) \exp (\L_1) \ . 
\label{LMM}
\end{align}
(See \cite{Deprit69},\cite{DragtFinn},\cite{LlaveMM86} respectively.)

The recursive equation for the perturbation expansions can be computed rather 
straightforwardly if we use with abandon --- we can if we interpret the 
formulas  in the 
asymptotic sense --  
the formulas 
$\exp \L = \sum_{i=0}^\infty \frac1{n!} \L^n$,
think of the $\L^n$ as differential operators  and 
rearrange the expressions according to the rules of 
of non-commutative algebra.

For example, in \eqref{Deprit} we obtain:
\begin{gather*}
\exp (\ep \L_1 \cdots + \ep^n\L_n) H_\ep = H_0\\[2mm]
\L_1H_0 + H_1 = 0\\[1mm]
\left(\tfrac12 \L_1^2 + \L_2 \right) H_0 + \L_1 H_1 + H_2 = 0\\[2mm]
\left[ \tfrac16 \L_1^3 + \tfrac12 (\L_1\L_2+\L_2\L_1) 
  + \L_3 \right] H_0 \\[1mm]
  \hspace{20mm}+ \left(\tfrac12\L_1^2+\L_2\right) H_1 
  + \L_1 H_2 + H_3 = 0 \ .
\end{gather*}

A point that we would like to  emphasize is that the equation that we obtain 
in the three schemes \eqref{Deprit},
\eqref{Dragt}, \eqref{LMM} for Lie series 
is always
\begin{equation}\label{cohomology}
\L_n H_0 + H_n = R_n \ , 
\end{equation}
where $R_n$ is an expression that depends only on previously computed terms. 

Using \eqref{Poisson},
we can transform \eqref{cohomology} into
\begin{equation}
\label{cohomology2}
-\H_0 L_n + H_n = R_n \ . 
\end{equation}

Note that, if we have a theory for the solutions of equations of the form 
\eqref{cohomology2}, we can proceed along the perturbation schemes above. 


Note that if we take
$$H_0(p,q) = \omega\cdot p\ ,$$
then \eqref{cohomology2} reduces to the equation \eqref{diff} 
that we have studied (under Diophantine assumptions on $\omega$) in 
Section~\ref{linear_estimates}.  Both the data and the unknown 
in \eqref{cohomology2} have an extra variable, but since it enters 
as a parameter, we can discuss the regularity of the equation 
in terms of the theory that we have developed.

Perhaps more importantly, we note that if we have a good theory of 
approximate solutions of \eqref{cohomology2} we can solve the hierarchies of 
equations approximately. 
This is important in practice as well as in some proofs on KAM theorem. 

We also note that an integrable system $H_0(p)$ can be written using 
the Taylor expansion 
$$H_0(p) = H_0(0) + \omega\cdot p +O(p^2)$$
Hence, we can solve very approximately \eqref{cohomology2} in a sufficiently 
small neighborhood of $\{p=0\}$. 
This is what is actually used in KAM theory. 

These algorithms are also practical tools
that can and have been implemented numerically.
The next  two remarks  are concerned with some issues about 
numerical implementations.

In \cite{LlaveMM86} one can find an appendix
where it is shown that  the theories based in 
the three schemes above (and in others) 
are equivalent in the sense that they give 
results which are equivalent in the sense of 
asymptotic series.

\begin{remark}
We emphasize that although all the schemes 
\eqref{Deprit}, \eqref{Dragt}, \eqref{LMM}
are formally equivalent in the sense that 
they require solving the same equations, they are not at all equivalent from 
the point of view of
efficiency and 
stability of the  numerical implementation
or from the point of view of 
detailed estimates or even convergence. 

As we pointed out, the exponential
of vector fields  does not cover any neighborhood of the 
origin in the group of 
diffeomorphisms so that \eqref{Deprit}
 does not provide with a good parameterization  
of a neighborhood of the identity and,
perhaps relatedly, it is known to be outperformed in stability etc.\ 
by (\ref{Dragt})~\cite{DragtFinn}. 

The method (\ref{LMM}) \cite{LlaveMM86} is actually convergent in many cases. 
Indeed, the KAM theorem asserts it does converge in certain cases
as we will see.  For example, it is convergent for the 
perturbation series that are based in Kolmogorov's method'
that will be discussed in Section \ref{Kolmogorovmethod}.


The only numerical implementations of \eqref{LMM} that I know of are some 
tentative ones carried out by A.~Delshams and the author, but it seems 
that  the scheme \eqref{LMM}
has a very good chance to be  very efficient  and stable.
Indeed, it seems to be the only method for which it is possible to 
establish convergence. 
\end{remark}


\begin{remark}
Sometimes in the numerical solution of the equations 
\eqref{Deprit}, \eqref{Dragt}, \eqref{LMM}
it is sometimes advantageous -- both from the point of 
view of speed and of reliability -- 
not to proceed order by order but rather to 
take groups  of 
orders $[2^n,  2^ {n+1} -1]$. 


This is tantamount to solving the equations by a Newton method in 
the space of families.  It has the disadvantage over the 
order by order algorithm that at every stage one has to 
solve a different equation. This inconvenience is sometimes offset 
by the advantage that one linear equation  allows one to study many orders
and because the  equations that need 
to be solved may be more stable than those of other methods.


These quadratic algorithms can be used for all the three methods 
described above. Nevertheless, they are somewhat easier to implement 
in \eqref{LMM} which has some quadratic convergence already in  place.

We emphasize that all the methods can be studied either order 
by order or quadratically. 

I think that it would be quite important to have a better theory of 
these algorithms.
\end{remark}


One lemma that we will be using later is that it is 
possible to approximate the action of 
the Lie transform on functions  by just the first term
in the series of the exponential.

\begin{lemma}\label{leadingorder}
Let $H,G$ be  functions on $\torus^n\times \real^n$
endowed with the  canonical symplectic structure.
We use  the notation of \eqref{analyticnorm} for the 
analytic norms of functions.

Assume that:
\begin{itemize}
\item[ i)]
$\|H\|_\sigma $ is finite.
\item[ii)] For a constant $C$ which depends only on the dimension,
we have for $\delta > 0$
\begin{equation} \label{doesnotflow}
\delta^2 >  C \|G\|_\sigma \ .
\end{equation}
\end{itemize}

Then, for another constant $\tilde C$ depending only on the dimension, 
we have:
\begin{equation}\label{eq:leadingorder}
\| H \circ \exp{ \L_G} - H - \{ H, G\} \|_{\sigma -\delta}  \le
\tilde{C} \delta^{-4} \|G\|_\sigma^2 \|H\|_\sigma
\end{equation}
\end{lemma}

\begin{proof}
By Cauchy estimates, \eqref{Cauchyn}, we have:
\begin{equation}\label{doesnotflow2}
\|\nabla G\|_{\sigma -\delta/2} \le \hat C \delta^{-1} \|G\|_\sigma.
\end{equation}
with $\hat C$ a constant that depends only on the dimension.

The constant in \eqref{doesnotflow} is chosen so
that the \RHS of \eqref{doesnotflow2} is smaller than $\delta/2$. 

Therefore, all the trajectories of the  Hamiltonian flow 
generated by $G$ which  start in the 
region 
$$ 
\D_{\sigma - \delta} \equiv \{ |I| \le e^{2\pi (\sigma - \delta)},
|\Im(\phi)| \le  \sigma - \delta \}
$$
do not leave the region  $\D_{\sigma -\delta/2}$ for a time 
smaller than one (note that they are moving at 
an speed that does not allow them to transverse the region 
separating the domains in a  unit of time).
Hence,
$$
\exp(\L_G) (\D_{\sigma - \delta})  \subset \D_{\sigma - \delta/2}\ .
$$
In particular, we can define the composition $H \circ \exp(\L_G)$
in $\D_{\sigma -\delta}$.

For any point $(I,\phi)$, we can estimate the difference 
along a trajectory by using the Taylor theorem with remainder
along a trajectory. 
It suffices to estimate the second derivative of 
$H$  and the  square of the displacement.
The second derivative of $H$ can be estimated 
by Cauchy estimates \eqref{Cauchyn} 
$ \|\nabla^2 H \|_{\sigma - \delta/2} \le
\tilde{C} \delta^{-2} \|H \|_\sigma$.

The displacement
can be estimated by $\|\nabla G\|_{\sigma - \delta/2}$, 
which by Cauchy estimates \eqref{Cauchyn} 
can be estimated by $ \tilde{C} \delta^{-1} \|G\|_\sigma$.

Putting these two estimates together, obtains the desired
result. 

\end{proof}

\begin{remark}
Analogues of 
Lemma~\ref{leadingorder} are true in any analytic
symplectic manifold. 
One just needs to define appropriately norms
of analytic functions, Cauchy inequalities, etc. 
In the versions of KAM theory
that we will cover in this tutorial, the version we have stated is
enough, but the reader  is encouraged 
to formulate and prove the more general versions. 
\end{remark}


\bigskip

It is also possible to develop a canonical perturbation theory for maps. 
Again, the main idea is to change variables so that the system becomes 
close to the system which is ``well understood''. 

The perturbative equation in this case becomes  
\begin{equation}\label{perturbationmap}
g_\ep^{-1} \circ f_\ep \circ g_\ep = f_0 \ . 
\end{equation}
We should think of those equations as equations for $g_\ep$ given $f_\ep$. 

These equations have been dealt with traditionally by parameterizing 
$f_\ep$ using the generating functions method, and similarly for the $g_\ep$. 

A more geometric method to use in perturbation theory 
is the method of deformations which was introduced in singularity theory. 
(In the book \cite{MeyerH92}, one can also find this method
introduced in the Lie transform method.)
It seems particularly well suited to discuss conjugacy equations of a 
geometric nature. (See \cite{LlaveMM86, BanyagaLW96} for 
some global geometric applications.)
We write 
$$
\frac{d}{d\ep} f_\ep = \F_\ep \circ f_\ep \ , \qquad
\F_\ep = \I_\omega (\d F_\ep) \ .
$$
We refer to $f_\ep$ as a family, $\F_\ep$ as the generator and to $F_\ep$ 
as the Hamiltonian and adopt the typographical convention of
using the same letter to denote the  objects associated 
with the same family but 
using lowercase to denote the family,
calligraphic font to  denote the 
generator and capital  to denote the Hamiltonian.

We note that, under  the assumption that $\F_\ep$ is $C^1$, given the generator 
and the initial point $f_0$ of the family, we can reconstruct $f_\ep$ 
in a unique way. 
Hence, given $F_\ep \subset C^2$, and $f_0$ we can reconstruct $f_\ep$. 

If we express equation \eqref{perturbationmap} in terms of the 
generators, it becomes 
\begin{equation}\label{perturbationmap2}
-\G_\ep + \F_\ep + f_{\ep *} \G_\ep = 0 \ . 
\end{equation}
Expressed in terms of Hamiltonians, it reads 
\begin{equation}\label{perturbationmap3}
-G_\ep + F_\ep + f_{\ep *} G_\ep = 0 \ . 
\end{equation}
(In the Hamiltonian case, we recall $f_{\ep*} G_\ep = G_\ep \circ f_\ep$.) 

There are several advantages in expressing equation 
\eqref{perturbationmap} in terms of the generators and the Hamiltonians: 
\begin{itemize}
\item
The equations in terms of the generators are linear. 
This is natural if we think that the vector fields are infinitesimal 
quantities which can, therefore, enter only linearly.

\item
The geometric structure --- not only symplectic, but also volume preserving 
and contact (which we have not and will not discuss in these lectures) 
are taken care without any extra constraint.

\item
These equations are geometrically natural and can be formulated globally.
\end{itemize}

The proof that \eqref{perturbationmap2} and \eqref{perturbationmap3} are 
equivalent to \eqref{perturbationmap} follows easily from the 
observation that 
\begin{equation}\label{deformationcalculus}
\begin{array}{rcl}
k_\ep & = & f_\ep \circ g_\ep \\[3mm]
\Longleftrightarrow \K_\ep & = & \F_\ep + f_{\ep*} \G_\ep\quad ;
\quad k_0 = f_0\circ g_0\\[3mm]
\Longleftrightarrow K_\ep & = & F_\ep + f_{\ep*} G_\ep \quad ;
\quad k_0 = f_0\circ g_0 \ .
\end{array}
\end{equation}

Even if the equations \eqref{perturbationmap3} is linear in the 
Hamiltonian $F_\ep$, we should keep in mind that $f_\ep$ depends on 
$F_\ep$ through the very non-linear process of solving the corresponding ODE.

Nevertheless, one can approximate \eqref{perturbationmap3} by 
\begin{equation}\label{perturbationapproximate}
F_\ep - G_\ep + f_{0*} G_\ep = 0 \ . 
\end{equation}
When 
$f_0 (I,\phi) = (I,\phi + \omega)$, 
this equation -- for a fixed $I$ -- 
has the form of \eqref{diff} 
the difference equations which were studied in 
Section~\ref{linear_estimates}. Since $I$ can be considered 
as just a parameter in the data for the equation, we can 
use the regularity theory derived for 
\eqref{diff}.

If $G_\ep$ is a solution of \eqref{perturbationapproximate}, we note that 
\begin{equation}\label{remainder}
F_\ep - G_\ep + f_{\ep*} G_\ep = (f_{\ep*} - f_{0*}) G_\ep \ .
\end{equation}

The intuition is that if $F_\ep$ is small, we can think that $G_\ep$ 
(obtained by solving a linear equation with $F_\ep$ as \RHS) is small 
and that $f_{\ep*} -f_0$ 
(obtained by solving a differential equation which involves 
derivatives of $F_\ep$) is also small. 
Hence, the term in  the \RHS of 
\eqref{remainder} is ``quadratically'' small. 

Using the estimates in Lemma~\ref{linearestimates} and mean value theorem etc., we 
can prove the estimate in the analytic spaces
$$
\|(f_{\ep*} -f_{0*})G_\ep\|_{\sigma -\delta}
\le C \delta^{-2 \nu -4} \|F_\ep\|_\sigma \ . 
$$ 
Similarly, for the finitely differentiable case,
$$
\|(f_{\ep*} -f_{0*})G_\ep\|_{\Lambda^r} 
\le  C \|F_\ep\|_{\Lambda^{r+\nu+4}}^2 \ . 
$$

Note also that if we write 
$$
F_\ep = \ep F_1 + \ep^2 F_2 + \cdots
$$
and try to find 
$$
G_\ep = \ep G_1 + \ep^2 G_2 + \cdots \ , 
$$ 
then \eqref{perturbationmap3} 
can be turned into a hierarchy of equations for the $G_n$'s. 
All the equations are of the form 
$$G_n - f_{0*} G_n + F_n = R_n \ , $$
where $R_n$ is an expression involving previously computed terms. 

\begin{remark}
For later developments, it is important to note that 
both \eqref{perturbationmap} and 
\eqref{perturbationmap3}  (and \eqref{perturbationmap2}, \eqref{perturbationmap3}) 
have a ``group structure''.

This means that if we can find an approximate solution $g_\ep$ (e.g., by 
solving the first order equations), we can perform the 
\eqref{perturbationapproximate}, 
\eqref{cohomology2} 
change of variables and set 
\begin{equation}\label{firststep}
\begin{array}{rcl}
\tilde f_\ep & = & g_\ep^{-1}\circ  f_\ep \circ g_\ep\\[2mm]
\tilde H_\ep & = & H_\ep \circ g_\ep\ .
\end{array}
\end{equation}

If we solve the problem for $\tilde{f}_\ep$, $\tilde H_\ep$, i.e.,
\begin{equation}\label{exact2}
\begin{array}{rcl}
\tilde g_\ep^{-1} \circ\tilde f_\ep\circ\tilde g_\ep & = & f_0\\[2mm]
\tilde H_\ep \circ \tilde g_\ep & = & H_0 \ , 
\end{array}
\end{equation}
then, we have solved the original problem since joining 
\eqref{firststep} and \eqref{exact2}, we obtain 
\begin{equation*}
\begin{array}{rcl}
(g_\ep \circ\tilde g_\ep)^{-1}\circ f_\ep \circ g_\ep \circ\tilde g_\ep 
& = & f_0\\[2mm]
H_\ep \circ g_\ep \circ \tilde g_\ep & = & H_0 \ . 
\end{array}
\end{equation*}


The importance of the above observation, which will be 
appreciated later, is that, 
by making successive changes of variables,
we can eliminate all the linear 
terms of the error by solving 
an equation which is just the linearized 
equation {\sl at the integrable system}.

This is an important difference with the standard Newton method
since the standard  Newton method requires that we solve
the linearized equation in a 
neighborhood.

The fact that we can obtain a method that, for all purposes
is like a Newton method but which nevertheless only requires that 
we know how to solve one linearized equation depends crucially 
on the fact that the equations that we are studying have a 
particular structure which is called {\sl group structure} 
and that will be discussed much more in 
Section \ref{Implicitfunction}, in particular,
Remark \ref{groupstructure} and Exercise\ref{withoutgroup}.


\end{remark}


\subsection{Generating functions} \label{generating}

One of the reasons why Hamiltonian mechanics is so practical is 
because of the ease with which one can generate enough canonical 
transformations.

In old fashioned books (\cite{Whittaker}, \cite{Goldstein}, \cite{Landau}) 
one can find that canonical transformations are described 
in terms of generating functions. We will describe those briefly 
and only for purposes of comparing with older books.  It should be 
remarked however, that generating functions, even if 
not so useful from the point of view of transformation theory
(there are better tools such as Lie transforms) 
are still quite useful tools in the variational 
formulation of Hamiltonian mechanics, providing thus a valuable link to
Lagrangian mechanics. Moreover, some of the constructions 
that appear in generating functions are quite natural in optics.
See \cite{BornWolf65}.

The equation 
$$
f^* \theta - \theta = \d S 
$$
is written in old fashioned notations as 
\mar{pushforward}
\begin{equation}
\label{pushforward}
p' \, \d q' - p \, \d q = \d S \ ,
\end{equation}
where $p \, \d q := \sum_{i=1}^k p_i \, \d q_i$, etc. 
This should be interpreted as saying that we consider 
the coordinate functions $p_i$, $q_i$ 
and the transformed functions $p'_i = p_i \circ f$, 
$q'_i = q_i \circ f$. Then, $\theta = p \,\d q$ 
and $f^* \theta = p' \,\d q'$; $S$ is a function on the manifold.

When  $q$, $q'$ are a good coordinate system 
(i.e.\ $p$ can be expressed as a function of $q$ and $q'$, 
$p=p(q,q')$), 
we can define a function $\S:\RR^n \times \RR^n \to \RR$ 
by setting  $\S(q, q'):=S(q,p(q,q'))$. Usually, in old fashioned notations, 
this is described as ``expressing $S$ in terms of $q$ and $q'$'' 
or simply by writing ``$S = S(q, q')$'' or something to that effect. 
Very often the same letter is used for $S$, $\S$. 

\begin{remark}
In old fashioned notation in mechanics, the same letter is
used for the functions that give the same result 
irrespective of the arguments. Of course,
even if this is almost  manageable and one understand 
what is meant by $S(q,p)$, $S(q,q')$,
by paying attention to the arguments
this notation
wrecks havock when one  tries to
evaluate at concrete points. 
For example, what is meant by $S(2, \pi)$ when one 
is considering at the same time  $S(q,p)$, $S(q,q')$?
\end{remark}


Note however, that the assumption that $q$, $q'$ 
is a system of coordinates is far from trivial. 
To begin with, it is not obvious that the manifold 
on which we are working admits a system of coordinates. 
Even if it does, or if we work just 
on a neighborhood so that we have local coordinates, 
there are other conditions to be imposed.
For example, it is false for the identity and 
for transformations close to identity, 
it may be a system of coordinates with undesirable properties. 
It is, however, true for $(p,q)\mapsto (p, q+p)$ 
and small perturbations. 

In that case, when we compute the differential in \eqref{pushforward}, 
we have 
$$
\d S = \partial_1 \S(q, q') \, \d q + \partial_2 \S(q, q') \, \d q' \ ,
$$
hence 
\mar{graph}
\begin{equation}
\label{graph}
p =  - \partial_1 \S(q, q') \ , \qquad 
p' =  \partial_2 \S(q, q') \ .
\end{equation}
We think of \eqref{graph} as of an equation for $p'$, $q'$ 
in terms of $p$, $q$. If the implicit function theorem applies 
(for which it suffices that $q,q'$ provide a good 
system of coordinates on the manifold)
and indeed the  equations \eqref{graph} can be solved differentiably,  $\S$ 
determines the transformation. 
Note that the implicit function theorem will apply 
in a $C^2$ open set of functions $\S$, so that we can think 
of this procedure as giving a chart of some subset of the
space of symplectic mappings. Also  note that we 
parameterize the transformation 
by one scalar function.
Moreover, the changes of variables 
given by \eqref{graph} are automatically symplectic. 
Keeping track of transformations -- in an open set --
which satisfy some non-linear and non-local constraints
(preserving the symplectic structure) 
by just keeping track of a function is a great simplification.

However, one important shortcoming of 
these generating functions is that for the identity transformation,
$q,q'$ is not a good system of coordinates on the manifold and 
we cannot use \eqref{graph} to represent the identity or near 
identity transformations. As we have seen, near identity 
transformations play an important role in canonical
perturbation theory, so, it is necessary to devise 
variants of the method to incorporate them.


In the case that the coordinate functions $p$, $q$ are global 
(or that we just work on a neighborhood), we can write 
$$
p \, \d q = - q \,\d p + \d (pq) \ .
$$
Hence \eqref{pushforward} reads
\mar{pushforward2}
\begin{equation}
\label{pushforward2}
p' \, \d q' + q \, \d p = \d (S + pq) \ .
\end{equation}
In the case that $p$, $q'$ is a good system of coordinates 
(as happens in a neighborhood of the identity), we can write 
$$
S + p q = \tilde{\S} (p, q') 
$$
and from \eqref{pushforward2} we see that 
$$
q = \partial_1 \tilde{\S} (p, q') \ , \qquad 
p' = \partial_2 \tilde{\S} (p, q')  \ .
$$
Again, we can consider this as a system of implicit equations 
defining $p'$, $q'$ in terms of $p$, $q$. 

Note that if $q$ is an angle, then 
$\S(q+k, q'+\ell) = \S(q, q')$ 
for all $k, \ell \in \zed^n$. On the other hand, 
$\tilde{\S} (p, q'+\ell) = \S(p, q') + p\,\ell$. 
Even if this generating function works 
in neighborhoods of the identity, 
it does not work at all for the map $(p,q) \mapsto (-q, p)$. 

One can use similar procedures to obtain 
many other generating functions. 

For example, one can use for a  partition of 
$\{1,\ldots,d\} $ into two sets $\A$ and $\B$
the formula:
$$
- \sum_{i\in\B} p'_i \, \d q'_i + \sum_{i\in\A} p_i \, \d q_i 
=     \sum_{i\in\B} q'_i \, \d p'_i 
      - \sum_{i\in\A} q_i \, \d p_i 
    + \d \left(
        - \sum_{i\in\B} p'_i q'_i
        + \sum_{i\in\A} p_i q_i 
        \right) 
$$
to change some of the $p_i$'s for $q_i$'s 
in the push-forward.

Even if these procedures are quite customary in old fashioned 
mechanics treatises, they will not be very useful for us. 
Again, we emphasize that even if the $q,q'$ generating function 
can be defined in any exact manifold, the others seem to require
some extra structure, which can be arranged in small neighborhoods.

We note however, that the function $S$ has a well defined 
intrinsic meaning as evidenced in \cite{BornWolf65} --- this is sometimes 
described as Hamilton-Jacobi equation or ``the action as a function 
of coordinates'' depending on what interpretation one gives. 
We refer to \cite{Haro00} for much more information on this 
primitive function.

In Hamiltonian optics \cite{BornWolf65}, $S$ represents the phase of the wave.
Indeed, Hamiltonian mechanics was developed 
as a byproduct of Hamiltonian optics. 
This explains why so much of Hamiltonian mechanics, 
especially in earlier treatises is based on studying 
$S$ and its relatives.

More modern treatments (\cite{Arnold-MathMethods}, \cite{AbrahamMarsden}) 
prefer to start from the symplectic geometry and postulate it 
without any other motivation that it eventually works. 
This is certainly expeditious.


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\section{Two KAM  proofs in a model problem}

In this section we will discuss one of
the technically simplest applications of 
the KAM methodology:
the Siegel center theorem. 

The main goal of this application is to show
in action perhaps the most basic heuristic principle
of the KAM method:

\begin{quote}
{\large \sf Quadratic convergence can overcome small divisors.}
\end{quote}

Roughly speaking this means that if we have a method of
improvement that reduces the error to something that is
quadratic in the original error, even if 
the solution requires solving an equation which involves 
small denominators, we can still obtain convergence.

The fact that the convergence does indeed take place 
is rather subtle.
In our opinion, the only way to appreciate 
the subtlety of the  convergence achieved
by KAM theory  is to give a serious 
try to  several other 
seemingly reasonable schemes and see them fail. 
At the end of the proof, we have suggested several of these schemes 
as exercises.

Besides those exercises, 
we have also included some exercises which admit easy 
solutions and provide extensions to the material in the text. 

We also emphasize that the fact that one can get a 
quadratically convergent method solving only one 
 small denominator equation is 
far from trivial and it requires that the 
equations we consider have some special
structure. 
This will be elaborated in more
detail in Section \ref{Implicitfunction}
and in  particular in Remark~\ref{groupstructure}.

In this section, we will
present two versions of the Siegel theorem 
-- one using just Diophantine conditions in one dimension, 
and another using approximation functions and decomposition 
in scales in higher dimensions.

The second proof will be formulated as a set of 
exercises. 
The main ideas of this section 
follow \cite{Moser66b}, \cite{Zehnder77} and \cite{Arnold88}. 
Indeed, we  follow these references rather closely.

These two proofs will illustrate the main features of KAM 
proofs and contain the essential analytic and 
number theoretic difficulties even if they do not 
involve any geometry.

We will start with a one dimensional problem.
See \cite{Moser66b} for more details on the proof 
we present and \cite{Zehnder77} 
for a higher dimensional version.

\begin{theorem}\mar{Siegel1}
\label{Siegel1}
Let $f: U \subset \cee \to \cee$ be and analytic  function of the form\mar{fhat}
\begin{equation}
\label{fhat}
f(z) = a z + \fhat (z) 
\end{equation}
with $\fhat(z)=O(z^2)$.

Assume that\mar{diophantines}
\begin{equation}
\label{diophantines}
| ( a^n - 1) ^{-1} | \leq n^\nu K 
\end{equation}
and that\mar{smallness}
\begin{equation}
\label{smallness}
\| \fhat \|_1 \leq \rho(\nu) K^2 \ , 
\end{equation}
where $\rho(\nu)$ is an explicit function.

Then there exists a unique function 
$$
h(z) = z + \hhat(z)
$$
with $\hhat(z)$ analytic in a disc of radius 
$$
\sigma=1-2\rho(\nu)
$$
such that\mar{conjugacys}
\begin{equation}
\label{conjugacys}
f \circ h(z) = h ( a z) \ .
\end{equation}

Moreover, we have\mar{solutionestimates}
\begin{equation}
\label{solutionestimates}
\| \hhat \|_\sigma \leq \|\fhat \|_1 C \ .
\end{equation}
\end{theorem}

\begin{remark}
The uniqueness  for $h$ claimed in Theorem 
\ref{Siegel1} means that if there are two functions 
satisfying this they have to agree in an open set of 
the origin. As we have seen already, the condition 
\eqref{diophantines} and \eqref{conjugacys} determine 
the jet of $h$ uniquely.
\end{remark}

\begin{remark}
Condition \eqref{diophantines} is 
automatic when $|a| \ne 1$. In that case, we have presented 
a  simple proof already. So we will restrict ourselves to the 
case  when $|a| = 1$.
\end{remark}


\begin{remark}
It is a standard observation that, 
assuming that $f$ is defined in a ball of radius~1 and small 
is the same as considering a small neighborhood.

Heuristically, in a small neighborhood, 
the linear part is the dominant  term and it is 
natural to try to describe the behavior  of the whole system 
in terms of the behavior of the linear one.

More precisely, given $f$, consider for $\lambda$ small
$$
f_\lambda = \lambda^{-1} f(\lambda z) \ .
$$
Notice that $f_\lambda$ has the same linear part and is defined in 
$\lambda^{-1} B_r$ if $f$ is defined on $B_r$. 
Since $| \fhat(z) | = O(|z|^2)$, 
we have $\| \fhat_\lambda \|_{B_1} \leq C\lambda$.

If we apply Theorem~\ref{Siegel1} to $f_\lambda$, 
we obtain a~$h_\lambda$. 
Then $h$ will satisfy \eqref{conjugacys}.
\end{remark}


\begin{remark}
Condition \eqref{diophantines} is not optimal.
Later we  will discuss how to obtain the same 
result when the  arithmetic condition 
\eqref{diophantines} is replaced by  the Brjuno condition, 
which is  indeed  optimal as shown in
\cite{Yoccoz95},\cite{Perezmarco92}.

The fact that if Brjuno condition
fails one can construct counterexamples is 
considerably deeper and out of the scope of these notes. See the references 
above.
\end{remark}

Before embarking in the proof, we note that all 
the methods are based in estimates for 
the equation  
\begin{equation} \label{siegelcohomology}
\varphi (a z) - a \varphi(z) = \eta ; \quad \varphi(0) = 0
\end{equation}
in which we consider $\eta$ and $a$ as given and we are 
to determine $\varphi$.

The analysis of this equation is very similar 
to the analysis of \eqref{diff} in 
Section  \ref{linear_estimates}.
Since it is not completely identical, 
we need to start by revising slightly 
the definitions of norms and the setup.


We define the norm of an analytic function by
\footnote{ These norms are slightly inconsistent 
with those in Section \ref{prelimanalysis} 
in which we took $\|f\|_\sigma = \sup_{ |z| \le e^\sigma} |f(z)|$. 
The convention of Section \ref{prelimanalysis} is more natural when one is 
using at the same time  Fourier series and Taylor series. 
For the present section, the convention we  now take is more 
natural.}
$$
\| f\|_r =  \sup_{ |z| \le r} |f(z)|.
$$


\begin{lemma}  \label{Siegelanalytic}
Assume that $a$ satisfies \eqref{diophantines}.
Then, if $ \eta(0) \equiv \eta_0 = 0$ 
we can find a solution of 
\eqref{siegelcohomology}. 
Moreover  
\begin{equation}
\| \varphi\|_{r e^{-\delta}} \le C K |\delta|^{-\tau} \| \eta\|_r
\end{equation}
\end{lemma}

\begin{proof}
This follows from the  
results in Section \ref{linear_estimates}. 

It suffices to write $z = \exp( 2 \pi i \theta)$. 
Then, the result stated is a particular case of 
Lemma \ref{linearestimates} applied to 
a Fourier series which only has positive 
terms.
\end{proof}


\begin{exercise} 
Give a direct proof of the Lemma~\ref{Siegelanalytic}

One can  follow
 the sketch in the beginning of 
Section \ref{linear_estimates}. 

Start by observing that the 
solution of 
\eqref{siegelcohomology}  is $\varphi_k = \eta_k (a^k -a)^{-1} $.
Estimate
$|\varphi_k|$ using the above formula, 
\eqref{diophantines}, and the estimates for 
$|\eta_k|$ in terms of $\|\eta\|_r$ 
obtained using Cauchy estimates.

Estimate $\|\varphi\|_{r e^{-\delta}}$
by the sup of the coefficients.
Then, one ends up  with the desired 
result with $\tau = \nu +1$. 

Since we are dealing with analytic estimates, 
this is enough to get through the proof. The ambitious reader 
is invited to carry out an analysis similar to 
that in \cite{Russmann76} and obtain the optimal 
exponent.
\end{exercise}


Now, we proceed to the proof of 
Theorem \ref{Siegel1}. The proof we present here 
follows \cite{Zehnder77} -- it is a particular case of 
the results of that paper. 

\begin{proof}
Proceeding heuristically for the moment, we can think of \eqref{conjugacys} 
as an implicit equation in a space of functions
$$
0 = \Tau(f,h) \equiv f \circ h - h \circ a 
$$
(by $a$ we denote either the constant or the function $a(z)=az$). 
Note that $\Tau (a,\Id) = 0$. 

We consider $f$ fixed (but close to $a$) 
and we are given an approximate solution $h$\mar{initialsituation}
\begin{equation}
\label{initialsituation}
\Tau(f,h) \equiv f \circ h - h \circ a = R \ , 
\end{equation}
where $R$ is the remainder which we would like to think of as small 
(the precise sense in which it is small will not be made explicit 
in this heuristic discussion).

We would like to obtain a $\Delta$ that eliminates 
most of $R$ so that  $\Tau(f,h+\Delta) \ll R$. 
This amounts to a Newton's method. 
Since 
$$
\Tau(f,h+\Delta) \approx \Tau(f,h) + D_2\Tau(f,h) \Delta \ , 
$$
we are lead to consider the equation for $\Delta$
\mar{newtongeneral}
\begin{equation}\label{newtongeneral}
R + D_2\Tau(f,h) \Delta = 0 \ .
\end{equation}

In our case -- remember that we are, for the moment, 
just proceeding heuristically, but this step is
not difficult to justify -- we have that the derivatives
will be:
$$
D_2\Tau(f,h) \Delta = (f' \circ h) \Delta - \Delta \circ a \ . 
$$
Hence, in our case \eqref{newtongeneral}  becomes:
\begin{equation}
\label{tosolve}
(f'\circ h) \Delta - \Delta \circ a = - R \ .
\end{equation}

If the factor $f'\circ h = a + \fhat'\circ h$ were just $a$, 
the equation \eqref{tosolve} would reduce 
to those considered in Lemma~\ref{Siegelanalytic}.

One way that  succeeds in reducing  the annoying
$\fhat'\circ h $  to a constant is the following:
(in  the exercises we examine 
several seemingly natural methods 
which do not work).


Take derivatives with respect to $z$ of \eqref{tosolve} 
and obtain the identity\mar{derivativeremainder}
\begin{equation} \label{derivativeremainder}
f'\circ h \, h' - a  h' \circ a = R' \ .
\end{equation}

If rather than looking for $\Delta$, we look for $w$ defined by 
$\Delta = h'\, w$ (remember $h$ is close to the identity, so 
that indeed $1/h'$ is an analytic function so that
looking for $\Delta$ and  for $w$  is equivalent), equation
\eqref{tosolve} becomes
\begin{equation}
\label{tosolve2}
f'\circ h \, h' \, w  - h'\circ a \, w \circ a = - R \ .
\end{equation}


Substituting \eqref{derivativeremainder} in \eqref{tosolve2}, 
we are lead to 
\begin{equation}
\label{magic}
a \, h'\circ a \, w - h' \circ a \, w \circ a = - R - R' \, w 
\end{equation}
or
\begin{equation}
\label{tosolve3}
a \, w - w \circ a = - (h'\circ a)^{-1} R - (h'\circ a)^{-1} R' \, w \ .
\end{equation}

If we ignore the term $(h'\circ a)^{-1} R' \, w$ 
(the intuition, which we will later turn into 
rigorous estimates, 
says that $h'\circ a$ is of order one, $R$ and $R'$ are small, 
hence $w$ is small and $R'w$ is much smaller), 
we simplify the problem 
to studying\mar{constantcoefficients}
\begin{equation}
\label{constantcoefficients}
a w - w \circ a = -(h'\circ a)^{-1} R \ ,
\end{equation}
which indeed is an equation of the type we considered 
in Lemma~\ref{Siegelanalytic}.

Hence, the prescription 
that we have derived heuristically
to obtain a more approximate solution 
is:
\begin{enumerate}
\item
Take $w$ solving \eqref{constantcoefficients};
\item
Form $\Delta = h' w$;
\item Then,
$h+\Delta$ should be a better solution to the problem.
\end{enumerate} 

Now, we turn to making all the previous ideas 
rigorous. We will need to show that the procedure
improves (that is, show estimates for the 
remainder after one step given estimates on the 
remainder before starting). We will also need to
show that the procedure can be repeated infinitely
often and that it leads to a convergent procedure.

If we are given a system with an remainder and 
run the procedure outlined above, 
the following lemma will establish bounds for the new remainder
in terms of the original one.

We will follow standard practice in KAM theory and denote by $C$ 
throughout the proof constants that depend only on the dimension 
and other parameter which are fixed in our proof.
In our case, since we are paying special attention 
to the dependence of the domain loss parameter on the size 
of the Diophantine constants and the smallness assumptions, 
$C$ will not depend on them. 
Other KAM proofs which emphasize other features may allow $C$  
to stand for constants that could also depend on the 
Diophantine constants.


\begin{lemma}\mar{iterativeSiegel1}
\label{iterativeSiegel1}
Let $f$ be as in Theorem \ref{Siegel1}, $h(z) = z + \hhat(z)$,
($\hhat(z) = O(|z|^2)$) defined in a ball of radius 
$\frac12 < \sigma < 1$ satisfy
\mar{hypothesis2}
\begin{equation}\label{hypothesis2}
\| \hhat' \| _\sigma \leq M  \le 1/2 \ .
\end{equation}
with\mar{hypothesis1}
\begin{equation}
\label{hypothesis1}
\sigma + M < 1 \ , 
\end{equation}
\mar{initialremainder}
\begin{equation}\label{initialremainder}
\| f \circ h - h \circ a \|_\sigma \leq \ep \ .
\end{equation}
Assume furthermore that $\delta > 0 $ is such that 
\begin{equation}
\label{hypothesis3}
K C \delta^{-\nu-1} \ep + \sigma e^{-\delta} < \sigma \ . 
\end{equation}
Then, the prescription 
above can be carried out and we have:
\mar{conclusion1}
\begin{equation}
\label{conclusion1}
\| f \circ (h+\Delta) - (h+ \Delta) \circ a \|_{\sigma e^{-\delta}} 
\leq K C \delta^{-\nu-1} \ep^2 + 2 \| f \|_1 (KC\delta^{-\nu-1}\ep M)^2 \ .
\end{equation}
\end{lemma}


\begin{remark}
Notice that since for $\delta \ge 0$,
$
\sigma (1-e^{-\delta}) \leq \sigma \delta \ ,
$
condition \eqref{hypothesis3} is implied by
\begin{equation}
\label{hypothesis2new}
 \sigma \delta \geq C K \delta^{-\nu-1} \ep \ . 
\end{equation}
which, once we have $\sigma$, just tells us that $\delta$
cannot be smaller than a power of $\ep$.
\end{remark}


\begin{remark}
Note that if we assume without loss of generality that 
$\|\fhat\|_1\leq 2$, $K\leq K^2$, $\delta<1$, 
the \RHS of \eqref{conclusion1} is less  or equal to
\begin{equation}
\label{conclusion2}
C K^2 \ep^2 \delta^{-2(\nu+1)} \ . 
\end{equation}
\end{remark}


\begin{proof}
To check that the prescription can indeed be carried out,
we just need to check  that the  function 
$f\circ(h + \Delta)$ 
can be defined.
Hence, our 
first goal will be to obtain estimates on $\Delta$ 
and show that  the image of the ball of 
radius $r e^{-\delta} $ under $h + \Delta$ is 
contained in the domain of $f$. Indeed, the estimates
for  the range will allow us also to obtain estimates 
for the derivative of $f$ via the Cauchy theorem 
which will later prove to be useful. 

Then, we will obtain the estimates in 
\eqref{conclusion1} and \eqref{conclusion2} provided that 
we have suitable estimates on $\|\Delta\|_{\sigma e^{-\delta}}$.

To obtain the estimates on $\|\Delta\|_{\sigma e^{-\delta}}$,
we note that using the Banach algebra property
of the norms and the inductive assumption 
\eqref{hypothesis2}, we can bound the \RHS of 
\eqref{constantcoefficients} by
$$
\|(h'\circ a) ^{-1} R\|_\sigma \le (1 - 1/2)^{-1} \| R\|_\sigma \ .
$$

By Lemma~\ref{Siegelanalytic} we have that
$$
\| w \|_{\sigma e^{-\delta}} \leq K C \delta^{-\nu} \|R\|_\sigma \ . 
$$
By Cauchy estimates, (see Lemma~\ref{Cauchyn}, but take into 
account that now we are in an slightly different situation), we have:
\mar{hcauchy}
\begin{eqnarray}
\| h' \circ a \|_{\sigma e^{-\delta}} &\leq& K \delta^{-1} \|h\|_\sigma\ ,
\label{hcauchy}\\
\| R' \|_{\sigma e^{-\delta}}         &\leq& K \delta^{-1} \ep \ .
\label{Rcauchy}
\end{eqnarray}
Hence, taking into account 
\eqref{banachalgebra},  and that  we had called 
$\|R\|_\sigma = \ep$, we obtain from the previous results:
\begin{eqnarray}
\| \Delta \|_{\sigma e^{-\delta}} &\leq& K C \delta^{-\nu-1} \ep M \ , 
\label{deltaestimates}\\
\| R' w \|_{\sigma e^{-\delta}}   &\leq& K C \delta^{-\nu-1} \ep^2 \ .
\label{firstterm}
\end{eqnarray}

Note that the assumption~\eqref{hypothesis2}, 
$$
\| h + \Delta \|_{\sigma e^{-\delta}} < 1 \ , 
$$
so that, as claimed, the composition in~\eqref{conclusion1} indeed makes sense. 

To obtain the estimates in \eqref{conclusion1},
we consider  the 
term to be estimated in 
\eqref{conclusion1}
and the obvious identity obtained
just by adding and subtracting terms to
it and grouping the result 
conveniently.

\mar{newremainder}
\begin{equation}
\label{newremainder}
\begin{split}
 f \circ (h+\Delta) &- (h+ \Delta) \circ a \\
&  = f \circ h - h \circ a 
         + f'\circ h \, \Delta - \Delta\circ a  \\
& + [ f \circ (h+\Delta) 
         - f \circ h - f' \circ h \, \Delta ]  \ .
\end{split}
\end{equation}

The first four terms in  the \RHS of \eqref{newremainder},
using \eqref{derivativeremainder} and \eqref{tosolve2} amount to: 
$$
R +  h'\circ a w + R' w - a h'\circ a w  = 
R' \, w \ .
$$
The term in braces in \eqref{newremainder} can be estimated because, 
by a calculus identity (Taylor theorem with the Lagrange form of 
the remainder)
\begin{equation}
\begin{split}
\label{taylorformula}
f(h(z)+\Delta(z))& - f(h(z)) - f'(h(z)) \Delta(z) = \\
& \qquad = - \int_0^1 (s-1) 
         \,f''(h(z)+s\Delta(z))\,\Delta^2(z)\,ds \ .
\end{split}
\end{equation}

Since, again by Cauchy bounds and \eqref{hypothesis2} we have
$$
\| f''(h(z) + s \Delta(z)) \|_{\sigma e^{-\delta}} \leq
C \delta^{-2} \|f\|_1 \ ,
$$
we can bound the $\| \mbox{ } \|_{\sigma e^{-\delta}}$ of 
\eqref{taylorformula} by
\begin{equation}
\label{secondterm}
1/2 \| f \|_1 (K C \delta^{-\nu-1} \ep M)^2
\end{equation}


If we estimate \eqref{newremainder}
putting together \eqref{firstterm} and \eqref{secondterm}, 
and remembering the standing assumptions on $M, \|f\|_1$, 
we obtain~\eqref{conclusion1}.

\end{proof}


To finish the proof of Theorem~\ref{Siegel1}, 
we just need to show that if $\| \fhat \|_1$ is sufficiently small, 
we can repeat the iterative procedure arbitrarily often 
and that we converge to a limit which satisfies~\eqref{solutionestimates}.

We will denote by subindices $n$ the objects 
after $n$ steps of the iterative process 
(assuming that it can be carried out this far). 
For example, $\sigma_n$ will be the domain of definition of $h_n$ 
and we have $\sigma_{n+1}=\sigma_n e^{-\delta_n}$. 
To simplify the discussion, 
we will use the condition~\eqref{hypothesis2new} 
which implies \eqref{hypothesis2} and the bounds \eqref{conclusion2}.

The main thing that we have to do is to choose the $\delta_n$'s. 
Notice that if we choose $\delta_n$ going to zero slowly, 
we lose more domain than needed  and end up with a weaker theorem 
--of course, if we lose too fast, we end up with an empty domain.
On the other hand, the smaller that we choose $\delta_n$, 
the worse \eqref{conclusion2} becomes. 

A reasonable compromise that is neither too fast so 
that we end up with no domain nor too slow so that we 
can still converge is to choose an exponential rate of 
decay. In the exercises, we will explore other choices.

We will choose
\begin{equation}
\label{deltachoice}
\delta_n = \delta_0 2^{-n} \ , 
\end{equation}
and then, will show how to choose $\delta_0$. 

With this choice of $\delta_n$, \eqref{conclusion2} implies easily
\begin{equation}
\label{recurrence}
\ep_{n+1} \leq C K^2 \ep_n^2 \delta_0^{-2\mu} A^{2n} 
\end{equation}
where $\mu = \nu+1$, $A = 2^\mu$. 

We assume by induction that the iterative step can be carried out $n$ times 
(i.e., that hypothesis \eqref{hypothesis2new} is verified for the first 
$n$ steps). 
We will show that, under certain assumptions 
on the size of $\delta_0$, $\ep_0$, which will be independent of $n$, 
hypothesis \eqref{hypothesis2new} will be verified for $n+1$. 
Moreover, we will show that $\ep_{n+1}$ decreases very fast. 
Then, by repeated application of 
\eqref{recurrence} we have:
\begin{equation} \label{recurrencen}
\begin{split}
\ep_{n+1} &\leq C K^2 \delta_0^{-2\mu} \ep_n^2 A^n \\
      &\leq (C K^2 \delta_0^{-2\mu})^{1+2} 
             \, A^{n+2(n-1)} \,\ep_{n-1}^{2\cdot2} \\
      &\leq \cdots \,\,\, \\
      &\leq (C K^2 \delta_0^{-2\mu})^{1+2+2^2+\cdots+2^{n-1}\cdot1} 
             \, A^{n+2(n-1)+\cdots+2^{n-1}} \, \ep_0^{2^{n+1}} \ .
\end{split}
\end{equation}

Note that $1+2+2^2+\cdots+2^{n} \leq 2^{n+1}$ 
and without loss of generality, 
we can assume that $C K^2 \delta_0^{-2\mu} > 1$.
Similarly, 
\begin{eqnarray*}
&& n + 2(n-1) + \cdots + 2^{n-1}\cdot 1 \\[1mm]
&& \qquad \qquad = 2^n [n2^{-n} + (n-1) 2^{-(n-1)} + \cdots + 2^{-1}\cdot 1] 
   \\[1mm]
&& \qquad \qquad \leq 2^n \sum_{k=1}^{\infty} k 2^{-k} = 2^n\cdot 2 
= 2^{n+1} \ , 
\end{eqnarray*}
hence
\begin{equation}
\label{iteratedbound}
\ep_{n+1} \leq \left( C K^2 \delta_0^{-2\mu} \ep_0 A \right)^{2^{n+1}} \ .
\end{equation}
Notice that if 
$\rho \equiv C K^2 \delta_0^{-2\mu} A \ep_0 < 1$, then 
\eqref{iteratedbound} converges to zero extremely fast 
(faster than any exponential). 

The equation that we need to satisfy 
to be able to perform the next step is 
$$
 \delta_{n+1} \equiv \delta_0 2^{-(n+1)} \geq 
C K \delta_0^{-\mu} 2^{-n\mu} \ep_{n+1} 
= C K \delta_0^{-\mu} 2^{-n\mu} \rho^{2^{n+1}}
$$
or
\begin{equation}
\label{iteratedcondition}
C K \delta_0^{-\mu-1} \leq 2^{n\mu-(n+1)} \rho^{-2^{n+1}} \ .
\end{equation}

By now, it should be clear that if we take $\delta_0=\frac12$
(so that $\sigma_n\geq e^{-1}$), if we assume that 
$\ep_0$ is sufficiently small, we can satisfy \eqref{iteratedcondition}.

Moreover, since by \eqref{deltaestimates},
$$
\| \Delta_n \|_{e^{-1}} \leq K C \delta_0 2^{\mu n} \rho^{2^n} \ ,
$$
we see that 
$
\sum \Delta_n < \infty
$
Hence
$$
 \Delta \equiv \sum \Delta_n
$$
converges uniformly  in the space of
functions in the disk of radius $e^{-1}$
and  we can  easily  bound
$\| \Delta \|_{e^{-1}}$.


\end{proof}
At the end of this subsection, we have collected 
some exercises that explore alternatives for the present proof
and for another that will be presented.

Let us highlight some of the remarkable points of the proof.


\begin{remark} \label{wardidentities}
We call attention to the remarkable fact that the 
derivatives of 
\eqref{tosolve}  
could be used to transform the 
equation \eqref{derivativeremainder} 
into a much simpler equation
(with an error which is small if the remainder 
is small and of  {\sl quadratic order}).

This is what allowed us to solve the 
step with quadratic error.  In turn, this
quadratic error was crucial in being 
able to deal with the small divisors
(see the following remark).
See exercise \ref{withoutgroup} for  an 
example of a problem with very similar 
analytical properties but without group structure 
for which the result is false.


The possibility of performing this
remarkable simplification comes
from the group structure of the equations, 
as was emphasized in \cite{Zehnder76}. 

This remarkable cancellation has other justifications,
for example, in the context of Lagrangian principles. 
Indeed, one can see that it is related to the symmetry 
that we used in \eqref{cancellation}. With a bit of 
hindsight we can see that the factor 
$(1 + {\ell^{[<n]}}')$ used there is really an {\sl infinitesimal 
translation} on the right for the 
data of the problem and that the 
cancellation is just a reflection of the 
fact that the original problem is invariant 
under translations (see the classical Noether 
theorem about  variational principles with
continuous symmetries).
In Quantum Field Theory the identities 
that come from changes of variables are called 
Ward identities.  The relation between Ward identities 
and the identities used in Lindstedt series has 
been emphasized in \cite{Gallavotti94}, 
\cite{BricmontGK99}, which are papers designed to 
bridge the gap between the language of Quantum Field Theory and 
KAM theory. Of course, in QFT one often does not consider
the objects as defined per se, but rather as 
formal power series. 
\end{remark}

\begin{remark}
Once we have the iterative step and the estimates that give quadratic 
convergence, the rest is (even if miraculous and quite remarkable)
by now well understood.  

Indeed, there are several abstract formulations, some of 
which we will discuss later. See Section \ref{Implicitfunction}.

In what follows, we will emphasize the steps 
required to reach the quadratic convergence and 
leave to the reader the checking that the convergence indeed 
takes place. 

Experience shows that, once one has worked out 
a few quadratically converging 
arguments it becomes faster and more reliable 
to work out a proof by oneself than to read the proofs
by others. It is certainly more instructive for the reader
and more comfortable for the writer. In this case, 
the reader should be assured by the existence of 
properly written papers that we reference where 
he/she is encouraged to look for extra details.

Obtaining the quadratically convergent 
algorithm in classical KAM theory is  not obvious since it depends on 
cancellations given by the geometry or the structure of 
the problem which eliminate some terms which would result in
a linearly convergent method. 

Note that in the classical KAM theory, we are constrained
by the fact that we  know only how to solve one 
linearized equation (in contrast with the usual 
Newton method, where we can solve the linearized
equation in a whole neighborhood.

\end{remark}


In the remainder of this 
subsection, we will present  a proof of the multidimensional 
case of Theorem \ref{Siegel1} following 
\cite{Arnold88} chapter 28. 
The one-dimensional  version of this proof is
covered in 
\cite{SiegelM95} chapter 25.

\begin{theorem}\label{Siegel2}
Let $f: U \subset \complex^d \rightarrow \complex^d$, 
be analytic in a polydisk. 
$f(0) = 0$. Denote $Df(0) = A$ and 
assume that $A$ is diagonal and 
$\sigma = (\sigma_1, \sigma_2, \ldots, \sigma_d)$,
the spectrum of $A$ 
 satisfies:
\begin{equation} \label{diophantineproduct}
|\sigma^k - \sigma_i    |^{-1} 
\le C |k|^{\nu} \quad k \in \nat^d  \quad |k| \ge 2, \quad i \in \{1,\cdots n\}
\end{equation}
(where we use the customary multi-index notation 
$\sigma^k = \sigma_1^{k_1} \cdot 
 \sigma_2^{k_2} \cdot \, \cdots \, \cdot \sigma_d^{k_d}$, 
$|k| = k_1 +\, \cdots \, k_d$).

Then, we can find an $h:  V \subset \complex^d \rightarrow \complex^d$,
$h(0) = 0$, $D h(0) = \Id$ such that in a neighborhood 
we have:
\begin{equation} \label{conjugacy}
h^{-1} \circ f \circ h  = A
\end{equation}
\end{theorem}

The conclusion of \eqref{conjugacy} is again that 
$f$ is just the linear map in other coordinates. 

\begin{remark}
Notice that we are not assuming that the  
$\sigma_i$ have modulus $1$. The multidimensional case 
can have several interesting examples in which 
some of the $\sigma_i$ are smaller than $1$ and 
others are greater than $1$. 
In the case that there are no eigenvalues equal to 
$1$ and that no product of eigenvalues is 
another eigenvalue, Sternberg theorem will guarantee
us  that there exists a $C^\infty$ change of variables 
that reduces the system to  a linear one. To obtain that the 
change of variables is analytic, we need 
not only that the products are not eigenvalues but also some 
quantitative estimates on how far they are such as 
\eqref{diophantineproduct}. Note also that the 
$C^\infty$ changes of variables produced 
by Sternberg theorem are not unique, whereas, as pointed 
out before, the analytic ones are unique.
\end{remark}

\begin{remark}
Note that, implicitly,  condition 
\eqref{diophantineproduct} requires that there is no
eigenvalue $0$, hence $A$ is invertible.

It is very easy to show that if one eigenvalue is 
$0$ one should not expect the conclusion to be true.
\end{remark}

\begin{remark}
If  we write $\sigma_j = \exp( 2 \pi i \omega_j)$ 
-- with $\omega_j$ possibly complex numbers -- 
we see that the  \eqref{diophantineproduct} 
is equivalent to  the fact that 
$\omega$ satisfies $\eqref{diophantinemap}$ 
but we only need it for  $k \in \nat^d $ rather 
than $k \in \integer^d$.
\end{remark}


We will discuss the different stages of the proof
but leave many details to the reader since this will 
provide some training and, moreover, it can be 
found in the references indicated.

Proceeding heuristically, we will 
note that if $h(z) = z + \hat h (z)$, 
we have $h^{-1}(z) = z - \hat h (z) + O([\hat h]^2)$.
(Here in the $O$ notation 
 we allow to include derivatives.  For example,
$ \hat h' \hat h''$ will be a term allowed in $ O([\hat h]^2)$.)

If we assume that $f(z) = Az + \hat f(z) $ and that 
$\hat f$ is small, if we want to make the changes of 
variables that reduce  $f$ to linear with an smaller 
error, we have
\begin{equation}
h^{-1} \circ f \circ h(z)   = A(z)
+ \hat f(z)  - \hat h \circ A(z)  - A \hat h(z) +
O( [\hat h ]^2, [\hat f] [ \hat h] )
\end{equation}

This suggests the following iterative step 
\begin{itemize}
\item[1)] Solve the following  equation  for $\hat h$
\begin{equation} \label{tosolvemult}
\hat f(z) =    \hat h \circ A(z)  - A \hat h (z)
\end{equation} 
\item[2)] Consider  now the 
the map 
$$
 \tilde f = h^{-1} \circ f \circ h(z) 
$$ 
If all works according to plan, $\tilde f$ will be much 
closer to the linear map $A$.
\end{itemize}

The approximations we have taken can be 
readily estimated by adding and subtracting
as follows.  
(Ignore for the moment questions of domains of 
definition. Suffice it to say that the simple minded 
identities we obtain are supposed to hold in 
a domain near the origin. Later we will need 
to worry about how big we can choose the domain.)


\begin{equation} \label{rest1}
f\circ h(z) =  A z +  A \hat h(z) + \hat f(z) + R_1(z)
\end{equation}  
where $R_1(z) = \hat f \circ h (z)  - \hat f(z) $.

\begin{equation} \label{rest2}
(\Id - \hat h )\circ f\circ h(z) =  
A z + \hat h(z) + \hat f(z) + R_1(z)  -  \hat h(A z)
+ R_2(z) 
\end{equation}
where $R_2(z) = \hat h(A z) - \hat h \circ f\circ h(z)$. 

\begin{equation} \label{rest3}
h^{-1} \circ f \circ h(z) =  (\Id - \hat h )\circ f\circ h(z) 
+ R_3(z)
\end{equation}
where $R_3(z) = \left( h^{-1} - (\Id - \hat h) \right) \circ f \circ h(z)$.


Hence, from \eqref{rest1}, \eqref{rest2}, \eqref{rest3}, 
we obtain:

\begin{equation} 
h^{-1} \circ f \circ h(z)    = 
 Az  - \hat h \circ  (A z) + \hat f(z) + \hat h(z)
+  R_1(z) + R_2(z) + R_3(z) \ . 
\end{equation}

Now we turn to  the task of obtaining estimates that 
quantify how this step indeed improves the 
situation and how we  can use it repeatedly to converge
to a solution. We highlight the main arguments.


\begin{itemize}
\item[i)] Estimates   on $\hat h$ obtained solving 
\eqref{tosolvemult}.

By carrying out exactly the same procedure indicated 
before estimating the sizes of the Taylor 
coefficients by the size of the function, solving the small 
divisor equation for the coefficients and 
then, estimating the size in an slightly smaller domain 
we obtain:
\begin{equation} \label{goodform}
\| \hat h \|_{r e^-\delta} \le C \delta^{-\tau} \| \hat f\|_r
\end{equation}


Note that using \eqref{tosolvemult} we immediately 
obtain estimates 
\begin{equation} \label{goodform2}
\| \hat h \circ A \|_{r e^-\delta} \le C \delta^{-\tau} \| \hat f\|_r
\end{equation}
Having control of $\hat h$ both in the polydisk and in its
image under $A$ will be quite important to be able to check 
that compositions, etc. make sense.


\item[ ii) ] Estimates obtained using
\eqref{goodform} and the implicit function theorem.

Note that this requires that we assume some smallness condition in 
$\hat h$ that ensures that we can indeed define the compositions.

\begin{equation}\label{implict}
\|h^{-1} - (\Id - \hat h ) \|_{r e^{-2\delta} } 
\le C \delta^{- 2 \tau - 1} \| \hat h \|_{r e^{-\delta}}^2
\end{equation}
and, similarly using \eqref{goodform2} and the implicit
function theorem  (again, we need some conditions 
that ensure that we can define the compositions needed to
apply the implicit function theorem).
\begin{equation}\label{implict2}
\| h^{-1} - (\Id - \hat h ) \|_{r e^{-2\delta}  }
\le C \delta^{- 2 \tau - 1} \| \hat h \circ A \|_{r e^{-\delta}}^2
\end{equation}

For the conditions that allow us to use the implicit function theorem, 
it is enough to assume that 
we have:
\begin{equation}
\begin{split}
& \|\hat  h\|_{r e^{-\delta} } \le K  \delta   \\
& \|\hat  h\circ A\|_{r e^{-\delta} } \le K  \delta   
\end{split}
\end{equation}
which, in view of  \eqref{goodform} \eqref{goodform2} 
are implied by: 
\begin{equation} \label{iterativeeq}
\begin{split}
& \|\hat  f\|_{r e^{-\delta} } \le K  \delta^{\tau + 1}   \\
& \|\hat  f\circ A\|_{r e^{-\delta} } \le K  \delta^{\tau + 1}  
\end{split}
\end{equation}

\item[iii)] Easy estimates using the mean value theorem. 

Note that we can estimate 
\begin{equation}
\| f \circ h_1(z) - f \circ h_2 (z)  \|_r \le 
\sup_{z \in \Sigma} |f'(z)| \| h_1 - h_2 \|_r   \ ,
\end{equation} 
where $\Sigma$ is a convex domain that includes the image of 
the polydisk of radius $r$ under $h_1$ and $h_2$. 
In particular, we can take $\Sigma$ to be the polydisk 
of radius $r + \max( \| h_1\|_r, \|h_2\|_r) $. 

If we use Cauchy estimates, can obtain 
\begin{equation}
\| f \circ h_1(z) - f \circ h_2 (z)  \|_r \le 
\delta^{-1}\|f\|_{r'} \| h_1 - h_2 \|_r
\end{equation}  
where $r' = \left[r + \max( \| h_1\|_r, \|h_2\|_r) \right]e^\delta $.

As it turns out, to be able to make sure that the domains match 
we need a  condition of the same form as \eqref{iterativeeq}.
\end{itemize}

Hence, one can prove a lemma that ensures that, provided 
\eqref{iterativeeq} holds, we can perform the step and 
obtain estimates 
\begin{equation}
\| \tilde f  - A \|_{r e^{-3 \delta}} 
         \le C K^2 \delta^{-\tau'} \| f - A\|_r^2 \ , 
\end{equation}
where, as usual, we have denoted by $C$ a constant that 
depends only on the dimension, $K$ is the constant in 
the Diophantine inequality \eqref{diophantines}
and $\tau'$ is an exponent related to the Diophantine
exponent (roughly twice, since we are squaring
the result of Lemma \ref{Siegelanalytic} and 
we are applying Cauchy bounds twice).

This statement is usually called the {\sl iterative lemma}. 

Once we have an iterative lemma, we need to show 
\begin{itemize} 
\item[iv)]  Choose $\delta_n = \delta_0 2^{-n} $. 
If you assume that $r_0 \ge 3 \delta_0$ and 
that  $\| f - A \|_{r_0} $is sufficiently 
small, then the iterative lemma can be applied 
repeatedly  to obtain  a sequence 
$\{ f_n \}$ defined on 
$r_n = r_{n -1} e^{- \delta_{n-1} } $. 
This sequence satisfies 
\begin{equation} 
\|f_n - A \|_{r_n} \le C \alpha^{2^n} 
\end{equation} 
for some $0 < \alpha < 1 $, which can be made arbitrarily 
small by assuming $\|f_0 - A \|_{r_0} $ is sufficiently small. 

\item[v)] 
We need to show that the
compositions 
$ h^{(n)} \equiv h_n \circ h_{n -1 } \circ \cdots h_0 $ 
converge on a non-trivial domain. 

This follows  because 
$h_{n} - h_{n-1} = \hat h_n \circ  h_{n -1 } \circ \cdots h_0$
and we can estimate
\[
\|\hat h_n \|_{r_{n} e^{-2 \delta_n}} \le \|\hat f\|_{r_n} 2^{n \tau'}
\]
and using \eqref{iterativeeq} show 
that 
\[
\|\hat h_n  \circ h_{n -1 } \circ \cdots h_0\|_{r_{n} e^{-3 \delta_n}} \le 
\|\hat h_n \|_{r_{n} e^{-2 \delta_n} }
\]
\end{itemize}

Besides the fact that the quadratic convergence is 
allowing us to dominate the small divisors, we want to highlight 
some features of the algorithm. 

Note again that we can only solve the linearized equation at
precisely the identity. Nevertheless, the progress that we 
are making, allows us to reduce the problem closer 
to the identity so that we are starting at a problem which is 
even more favorable. Again, this is the group structure of 
the problem.  The successive changes of variables 
has been applied very often in the proofs involving Hamiltonian 
systems with preference to the proofs that involve 
just solving functional equations. 
This is due, in part to the fact that Hamiltonian systems 
have a very nice transformation theory. It is also 
true that reducing to normal forms, even if only approximately 
has very interesting byproducts. For example, the Nekhoroshev theorem.

Note that the analytic part of convergence was extremely similar. 
We obtained the  estimates which are {\sl quadratic} 
but which contain the bad term which has to grow unbounded. 
All these estimates were proved under some inductive assumptions that 
allow one to perform the algorithm.  
The quadratic nature of the estimates can be used to show 
that if we start with small enough error, 
the growing terms due to the 
solution of the linearized equation  do not spoil the convergence and 
that indeed we recover the inductive assumption that allows us to
keep on improving  our linearization.
Once we obtain that the remainder goes to zero 
extremely fast, it is possible to show that the 
composition of the transformations converges.


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{exercise}
When $A$ has a non-trivial Jordan block
and the spectrum satisfies
\eqref{diophantineproduct}, show that  the cohomology equation 
$A \hat h - \hat h \circ A = \hat f$
is solvable as a formal power series.

What type of estimates do you obtain? 

Are the estimates you obtain enough to 
prove Theorem \ref{Siegel2} 
without the assumption that $A$ is diagonalizable? 

If not, can you construct a counterexample? 

Note: Answers to this exercise are known in the literature. 
A recent paper that the reader can consult is \cite{DelatteG00}.
\end{exercise}


\begin{exercise}
\label{russmananalytic}

Obtain optimal estimates in the R\"ussman style for the linear equation 
for analytic functions in the several 
variables case. The case when $A$ is a diagonalizable matrix
with all eigenvalues equal to $1$ 
is very similar to the one we have discussed so far.
Much more interesting are the cases when the 
matrix has eigenvalues of modulus $1$ and non-trivial Jordan 
blocks.

In preparation of arguments to come, note that, when
$A$ has eigenvalues of modulus different from $1$, 
if the domain of 
$\varphi$ is a polydisk, the domain of 
$\varphi\circ A$ is a different set.
\end{exercise}


\begin{exercise}
Formulate the improved estimates of Exercise~\ref{russmananalytic} 
in the language of approximation functions $\Omega$. 
Do they lead to some improvement in Brjuno conditions?
\end{exercise}

\begin{exercise}

Some of the estimates in the proof
of Theorem~\ref{Siegel1}
we have presented 
are rather wasteful. 

Notice in particular that we estimated in~\eqref{Rcauchy} 
$$
\| R' \|_{\sigma e^{-\delta}} \leq K \delta^{-1} \ep \ .
$$
We can observe that, as we iterate, 
the remainder vanishes at higher and higher orders.
This will allow us to use sharper Cauchy estimates,
which we detail below.

Note that if a function $f(z) = z^N g(z)$, 
we have $f'(z) = N z^{N-1} g(z) + z^N g'(z)$.
Also, we have $\|f\|_1 = \|g\|_1$. Hence
\begin{equation}
\begin{split}
\|f'\|_r & \le  N r^{N-1} \|g\|_r + C r^N r^{-1} \|g\|_1   \\
& \le C N r^{N-1} \|f\|_1 
\end{split}
\end{equation}

Carry on the proof using these improved estimates 
and see if one obtains something better. 
\end{exercise}


\begin{exercise}
There is a certain arbitrariness in the speed 
at which domain is lost in the proofs. 

What happens is you take $\delta_n = \delta_0 n^{-\alpha^n}$ 
with $\alpha>0$? 

What happens with $\delta_n = \delta_0 n^{-\alpha}$, $\alpha>1$, 
or $\delta_n = \delta_0 n^{-1} (\log n)^{-\alpha}$, $\alpha>1$?
\end{exercise}


\begin{exercise}
Fix $a = \exp\left\{2\pi i \frac{\sqrt{5}-1}{2}\right\}$
and consider 
$$
f_N(z) = a z + z ^N  \ .
$$
What are the asymptotic of the Siegel radius as $N\to \infty$?
\end{exercise}


\begin{exercise}
\label{withoutgroup}

In the classical Newton method, we use the fact that 
if  the derivative $D_2{\Tau}(a,\Id) $ is invertible, 
then $D_2\Tau(f,h)$ is 
invertible when 
$(f,h)$ are in a neighborhood 
of $(a \Id)$
and, moreover, the norm of the inverse is
bounded. 

We can try to apply the same ideas involved
in the proof that in the classical 
case the invertibility of the derivative is an open condition
$ (A +  B )^{-1} = A^{-1}\sum_{i = 0} (- BA^{-1} )^i $.
(sometimes called the Neumann series)
to
solve the equation
$$
f'\circ h \Delta - \Delta \circ a = -R
$$
by  iterating the solution
of
$$
a \Delta - \Delta \circ a = -R - {\hat f}'\circ h \Delta
$$

Try to carry out the procedure and decide whether 
it can be applied as an ingredient in a KAM proof. 
(e.g. one can try to take more stages in the proof 
as one progresses etc.


To the best of the knowledge of
the author it cannot be made to work
(unless one uses cancellations similar to those used in the 
quadratically convergent methods or those of the direct methods)
but attempting this will give an 
appreciation of the cleverness of the use of 
rapidly convergence methods. 

Of course, if there is a proof that  succeeds in accomplishing this, 
the result will be quite interesting.
\end{exercise}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\section{Hard implicit function theorems.} \label{Implicitfunction}


Before proceeding to more geometric considerations,
it will be convenient to 
abstract some of the properties that made the 
previous argument work  and isolate them in 
an abstract implicit function theorem. 
This will streamline a good deal of the arguments and 
illustrate quite strikingly the principle that the 
quadratic convergence can dominate the small divisors.

Even if implicit function theorems take care very 
nicely of the analysis of the convergence,
they ignore the geometric considerations  and 
particularities of the problem at hand.
This particularities are crucial to obtain the general
framework of the implicit function theorem. 
Nevertheless, it  is useful to introduce the 
difficulties one at a time.

Later we will have  to spend time making sure 
that we can fit a problem  or an algorithm to solve 
a problem into the functional framework of a
theorem.

We emphasize however that the usefulness  of these implicit 
function theorems is not restricted to 
KAM theory and they have been used in a variety 
of problems in geometry, PDE, etc. 
and that in any case,  they are a very useful strategic 
guide on how to organize the proofs of the problem at hand.


There are different versions of implicit function theorems 
adapted to work in KAM theory. We just mention 
\cite{Zehnder75}, \cite{Hamilton82}, 
\cite{Hormander90}.(See also \cite{Hormander85}). The main variation we have included
is that we have used the approximation functions 
(introduced seemingly  in \cite{Russmann80} ) in the implicit 
function theorem.  Some parts of the 
exposition are based on \cite{LlaveV00}. 
A very good recent exposition -- regretfully, not easy 
to obtain --   of Nash-Moser theorems including 
detailed comparisons and examples of applications, 
specially to PDE's is \cite{HounieM94}. 
Also very important for the relation with PDE's are
\cite{AlinhacG91}, \cite{Hormander90}.
(Of course, one should also consider the work of 
\cite{CraigW93}, \cite{Bourgain00}, even if it has not 
been formulated as an abstract implicit function theorem
and I am not sure it fits easily into the existing ones.)

The theorem that most closely models the problem 
we have discussed so far (and those that we will 
discuss later) is  that of \cite{Zehnder75}, 
which he calls {\sl analytic smoothing}
which we now, reproduce, 
with an small improvement to  deal with
the R\"usmann conditions  rather than just the Diophantine conditions.

\begin{remark}
In the case of one-dimensional dynamics, one can  use the theory 
of continued fractions to show that the R\"usmmann conditions
that we use in the implicit function theorem 
(expressed in terms of solvability of 
equations) are equivalent to the conditions used in 
\cite{Brjuno71} (expressed in terms of number 
theoretic properties). I do not know if this equivalence is 
true in higher dimensions.
\end{remark}


Note that to abstract the spaces of analytic 
functions defined on balls of different radius, 
we will consider not just a single Banach space, 
but rather a family of Banach spaces.  
In the following, it will be good to keep in mind
the proof of Theorem \ref{Siegel1} as motivation
for the definitions and the assumptions.

\begin{theorem}\label{Zehnder}
We will consider  scales of Banach 
spaces $ \{ X_{\sigma}\}_{\sigma \in [0,1]}$
such that  for $
 0 \le  \sigmap \le \sigma  \le 1 $ 
we have:
\begin{eqnarray}\label{spaceinclusion}
&X_0 \supseteq X_{\sigmap} \supseteq X_\sigma \supseteq X_1 \\
&\| x\|_{X_{\sigmap}} \le \| x\|_{X_\sigma}   \label{normdominated} \ ,
\end{eqnarray}
and analogously for $ \{ Y_{\sigma}\}_{\sigma \in [0,1]}$, 
$ \{ Z_{\sigma}\}_{\sigma \in [0,1]}$.

Assume that we have $F: X_0 \times Y_0  \to Z_0$ 
\begin{itemize}
\item[1)] $F(f_0, u_0) = 0 $ for some $f_0 \in X_1, u_0 \in Y_1$. 

\item[2)] The domain of $F$ contains the sets 
$$
\B_\sigma = \{ (f, u) \in X_\sigma \times Y_\sigma 
\  \big| \  \|f - f_0\|_{X_\sigma} \le A, 
\ \|u  - u_0\|_{X_\sigma} \le B   \}   \ .
$$

\item[3)]  $F(\B_\sigma)   \subset Z_\sigma$ and it is 
continuous when the range and the domain are given 
the natural topologies.
\end{itemize}

In what follows,
$M \ge 1, \gamma > 0, \alpha \ge 0$ 
will denote 
fixed constants.


Assume furthermore: 
\begin{itemize} 
\item[H1)] $F$ satisfies a  so called  ``{\sl Taylor estimate}''. 
More precisely: 
\begin{itemize}
\item[H1.1)] The mapping
\[
F(f,\cdot): Y_\sigma \cap B_\sigma \rightarrow Z_\sigmap
\]
is Frechet differentiable for every $\sigmap < \sigma$. 

Denote by $D_2F(f,u)$ the Frechet derivative 
and 
\begin{equation} \label{Qdefined}
Q(f; u, v) \equiv F(f,u) - F(f,v) - D_2F(f,v)(u-v)
\end{equation} 
\item[H1.2)] We have the bounds:
\begin{equation}\label{Taylorestimate}
\| Q(f; u,v)\|_\sigmap \le \Upsilon(\sigma -\sigmap) \|u - v\|_\sigma^2
\end{equation} 
where $\Upsilon$ is a decreasing 
function. (We will assume without loss of generality 
and to avoid complications in algebraic 
expressions,  that $\Upsilon > 1$.) 
The function $\Upsilon$ is called an {\sl approximation function}.
It will also enter in subsequent hypothesis and in (H4) it will be required to 
satisfy certain conditions.

\end{itemize}
%\item[H2)] $F$ satisfies a uniform {\sl Lipschitz condition} in its 
%first argument. That is, for every $0 < \sigma \le 1$, 
%$(f,u), (g,u) \in  \B_\sigma $
%\begin{equation}\label{Lipschitzcondition}
%\| F(f,u) - F(g,u)\|_\sigma \le  M \| f  - g \|_\sigma
%\end{equation}

\item[H3)] {\sl Approximate right inverse}
We can find an approximate right inverse 
for the derivative. 

That is   we can find a linear operator 
$\eta$ that maps $Z_\sigma $ into 
$X_\sigmap$ for all $ \sigmap < \sigma$ and that 
satisfies:
\begin{equation} \label{remainderbounds}
\begin{split}
&\| \eta(f,u) z \|_\sigmap \le \Upsilon(\sigma - \sigmap) \| z \|_\sigma \\
&\| D_2 F(f,u) \eta(f,u) z - z \|_\sigmap \le 
\Upsilon( \sigma - \sigmap) \|F(f,u)\|_\sigma \| z \|_\sigma
\end{split}
\end{equation}


\item[H4)]  The approximation function satisfies the 
{\sl Brjuno-R\"ussmann conditions}: 

The function $\Upsilon$ in 
\eqref{remainderbounds} satisfies 
that there is a sequence $\delta_n > 0$ 
such that $\sum_n \delta_n = 1/2$,
$\sum 2^{-n} |\log( \delta_n/2 )| <  \infty$
and such
\begin{equation} \label{BrjunoRussmann}
\sum_n 2^{-n} \log( \Upsilon( \delta_n) ) < \infty
\end{equation}
\end{itemize}

Then,  there exists  a constant $C$, depending only on 
$M, \alpha$ and  $\Upsilon$ such that 
if $u_0$ is an approximate solution.
That is:
\begin{equation}\label{approximatesolution}
\|F(f,u_0)\|_1 \equiv \ep
\end{equation}
is sufficiently small, then,
we can find $u^* \in X_{1/2}$ solving exactly
the equation
$$ F(f,u^*) = 0$$
Moreover,
\begin{equation} \label{notmoved}
\|u - u^*\|_{1/2} \le C \|F(f,u_0)\|_1
\end{equation}
\end{theorem}


\begin{remark}
The theorem in \cite{Zehnder75} included also 
a hypothesis H2 that allowed  one to obtain information on the dependence
of the solutions $u$ in terms of  $f$.

We have eliminated the dependence  of $u$ on  $f$ 
from  the conclusions of the main theorem
and relegated it to remarks 
(see Remark \ref{dependence}). Hence, we suppressed H2 from the main 
theorem, but kept the 
numbering to allow easy comparisons. On the other hand, the 
hypothesis H4 here is different from that of \cite{Zehnder75},
but it plays the same role.
\end{remark}

\begin{remark}
There are several equivalent formulations of 
hypothesis H4). For all practical purposes, it suffices 
to take  $\delta$ a fixed exponential sequence. 
See the exercises.
\end{remark}


The proof of this theorem is very simple  
since we have abstracted away many of the complications 
of the previous theorem. 
We will present it and then, we will highlight some of the 
subtle points and indicate some of the applications.

\begin{remark}
One of the important features of
the proof in \cite{Zehnder75}, which we have
eliminated for this pedagogical presentation,
is that the final result is
expressed in a form which is independent of the
space considered. This requires one to
assume that
$\Upsilon(t) = C t^{-\alpha}$ for some positive $C$, $\alpha$.
This case has 
very important consequences such as
the finitely differentiable case.
We will develop these improvements in the
exercises.
\end{remark}


\begin{proof}
We use a quasi-Newton method defined by the iterative procedure

\begin{equation}\label{iterativestep}
u_{n+1} = u_n - \eta(f,u_n) F(f,u_n)
\end{equation}
in which $\eta$ takes the place of the 
inverse of the derivative  in the regular Newton method.

We set $\sigma_{n +1 } = \sigma_n - \delta_n$  and  $\sigma_0 = 1$.
We will obtain recursively estimates of $\|F(f,u_n)\|_{\sigma_n}$
 $\|u_n\|_{\sigma_{n} } $, 
and of $\| u_n - u_{n+1}\|_{\sigma_{n+1}}$. 

Since $\sigma_n \ge 1/2$, the later estimates will 
imply that $u_n$ converges in $X_{1/2}$.


Adding and subtracting, we have: 
\begin{equation} 
\label{newtonestimate}
\begin{split} 
F( f, u_{n+1} ) &= 
F(f, u_{n+1}) - F(f, u_n)  - D_2F(f,u_n) \eta(f,u_n) F(f,u_n) \\
                     & + F(f,u_n) + D_2 F(f, u_n)\eta(f,u_n) F(f,u_n)  \ .
\end{split}
\end{equation}


We can estimate  the  terms in
the second line in \eqref{newtonestimate} using the 
second part of   \eqref{remainderbounds}
\[
\| F(f,u_n)  - D_2 F(f, u_n)\eta (f,u_n) F(f,u_n)\|_{\sigma_{n+1} }
\le
\Upsilon(\delta_n  ) \|F(f,u)\|_{\sigma_n}^2   \ .
\]

Using the first part of \eqref{remainderbounds}, we obtain:
for  $\tau_n = (\sigma_n + \sigma_{n+1} ) / 2$
\begin{equation} \label{incrementestimate}
\|\eta(f, u_n) F(f,u_n) \|_{\tau_n} 
\le \Upsilon(\delta_n/2) \| F(f,u_n)\|_{\sigma_n}  \ .
\end{equation}
This estimate allows us to  apply \eqref{Taylorestimate}
to the terms in the first line of \eqref{newtonestimate}. 

Hence, we obtain 
(bounding $\Upsilon( \delta_n/2 ) > \Upsilon(\delta_n) $)
\begin{equation}\label{stepestimate}
\|F( f, u_{n+1} ) \|_{\sigma_{n+1}} \le 
2 \Upsilon( \delta_n/2)^2 \|F(f,u_n)\|_{\sigma_n}^2   \ .
\end{equation}

If we iterate \eqref{stepestimate}, 
we obtain 
\begin{equation} 
\label{nestimate}
\begin{split}
\|F( f, u_{n+1} ) \|_{\sigma_{n+1}} \le & 
2 \Upsilon( \delta_n/2)^2\times (2 \Upsilon(\delta_{n-1}/2)^2 )^2 
\times \cdots \times \\
& \times (2 \Upsilon( \delta_0/2 )^2)^{2^{n} }
\|F(f,u_0)\|_{\sigma_0}^{2^{n+1}} \\
= & 2^{1+ 2+ \cdots + 2^n}
\Upsilon( \delta_n/2)^2 \times
 \Upsilon((1/2) \delta_{n-1})^{2^2} \times  \cdots \times \\
 & \times \Upsilon((1/2) \delta_0)^{2^{n+1}} 
\|F(f,u_0)\|_{\sigma_0}^{2^{n+1}}   \ . 
\end{split}
\end{equation}

We can estimate the logarithm of the factor of 
$\|F(f,u_0)\|_{\sigma_0}^{2^{n+1}}$ 
in the \RHS of \eqref{nestimate} by:
\[
\begin{split}
2^{n+1}\big[ \log(2) +
\log &\Upsilon( (1/2) \delta_n ) 2^{-n} + 
\cdots + \log \Upsilon( (1/2) \delta_{n-1} ) 2^{-(n -1)} \\
&
+ 
\cdots
+ 
 \log \Upsilon( (1/2) \delta_{0} ) 2^{0} \big]   \ .
\end{split}
\]
We see that under our assumption H4) 
(see \eqref{BrjunoRussmann} ),
the term in braces can be bounded by a constant 
(the sum of the series). Hence 
\eqref{nestimate} yields 
\begin{equation} \label{quadraticconvergence}
\|F( f, u_{n+1} ) \|_{\sigma_{n+1}} \le 
(A \|F(f,u_0) \|_{\sigma_0} )^{2^{n+1}}
\end{equation}
where $A$ is a constant depending on the 
properties of the approximation  function and 
the other constants involved in the set up 
of the problem.

We see that, if we 
$\|F(f,u_0)\|_{\sigma_0} $ is 
sufficiently small, the right hand 
side of \eqref{quadraticconvergence} 
converges to zero extremely fast.

Using \eqref{incrementestimate}, 
we have:
\begin{equation} \label{nincrement}
\|u_n - u_{n+1} \|_{\sigma_{n+1}} \le
\Upsilon( \delta_n/2) 
 (A \|F(f,u_0) \|_{\sigma_0} )^{2^{n}}
\end{equation}
where $A$ is also a constant depending 
only on the properties of the approximation
function and the other constants involved in the set 
up of the problem. (It will be different from the 
$A$ in \eqref{quadraticconvergence}, but we
follow the standard practice of denoting all such 
constants by the same letter.)

The \RHS of  \eqref{nincrement}  
is a convergent series because
by our assumption  \eqref{BrjunoRussmann} 
the general term of the series is bounded.
Therefore,
$\log \Upsilon( \delta_n/2) 2^{-n}   \le B$
(where, again, $B$ is another constant depending only
on the constants of the problem and the approximation 
function). 

When $A \|F(f,u_0) \|_{\sigma_0}  < 1$,
the second factor converges to zero faster than 
any exponential.

Note also that, if  $\|F(f,u_0) \|_{\sigma_0} $ small enough,
the series obtained summing \eqref{nincrement} has a sum 
as small as desired. In particular, we can verify that the 
limit is close to $u_0$ in $X_{1/2}$.
 
Hence:
\[
\Upsilon( 1/2 \delta_n) 
 (A \|F(f,u_0) \|_{\sigma_0} )^{2^{n}} \le
 (A e^B \|F(f,u_0) \|_{\sigma_0} )^{2^{n}}
\]

This establishes the claim.
\end{proof}

\begin{exercise}
Many classical proofs of the 
classical implicit function theorem are based not in the 
Newton method, which is quadratically convergent, 
but rather in a contraction mapping principle
(which is called linearly convergence since 
the remainder after one step is only a 
fixed factor smaller than the remainder 
before the step.)


Can one base a method that beats small denominators 
on a linearly convergent procedure? 

Similarly, one can get algorithms whose convergence is
faster than 
quadratic. (For example, solving the equation given by the
second order Taylor expansion or interpolating several 
of the previous steps of the algorithm.) Can one 
base a  hard implicit function theorem on these algorithms?
\end{exercise}


It is interesting to check how the  
previous result compares with the 
proof we have presented of Theorem \ref{Siegel1}. 
The scales of spaces are just spaces of 
analytic functions on balls of different 
radii. The approximate inverse corresponds to 
the solving of the linearized equation 
by comparing it with the equation obtained 
by taking derivatives of the remainder. 
Checking that the scales map into each other 
is roughly the same as our inductive hypothesis. 

In the presentation of  Theorem \ref{Siegel1}, 
we have, of course taken 
\begin{equation}\label{powercase}
\Upsilon(\delta) = M \delta^{-\tau} 
\end{equation}
This is a very important particular  case of the whole theorem 
since it not only appears in interesting situations 
but also leads to further consequences which we will discuss 
in the following remarks. (Of course,  the reader  
should also consult \cite{Zehnder75} and the other references.)


The choice of a general $\Upsilon$ satisfying 
\eqref{BrjunoRussmann} corresponds to the small 
divisors satisfying \eqref{Brjunocondition},
whereas \eqref{powercase} corresponds to 
Diophantine conditions.   
(For more details see \cite{Russmann90},
\cite{DeLatte97}, \cite{LlaveV00}.)

\begin{remark}\label{groupstructure}
The existence of approximate inverses is a 
general feature of conjugacy problems
or of problems having a group structure. 

As pointed out in \cite{Zehnder75} p. 133 ff.
existence of
approximate inverses 
assuming only the existence of an inverse in the 
trivial case
is a general feature of conjugacy problems,
at least at the heuristic level.
This indeed gives a guiding principle for the cancellations that
we found e.g. in  the proof of 
Theorem \ref{Siegel1} in which we used 
that
comparing the
prescription suggested by the heuristic 
Newton method with the derivative of
the remainder the linearized
equation  suggested by the 
heuristic Newton method can be reduced to constant coefficients up to 
quadratically small errors.

Notice that the functionals we are solving are 
conjugacy equations. 
Hence they satisfy the identity
\begin{equation}\label{conjugacyformula}
F( f, u\circ v) = F( F(f,u), v )
\end{equation}
If we take $v = \Id + \hat v$ and we think of $\hat v$ as 
infinitesimal, we obtain
\begin{equation}\label{infinitesimalgroup}
D_2 F(f, u) u' \hat v  =  D_2 F( F(f,u), \Id) \hat v
\end{equation}
If we assume that $\eta  D_2( f_0, \Id ) = \Id $, 
we obtain that:
\begin{equation}\label{infinitesimalgroup2}
\eta D_2 F(f, u) u' \hat v =
 \Id + \eta[ D_2 F( F(f,u), \Id) - D_2F ( f_0, \Id)]
\end{equation}
Notice that we can expect that, if $D_2F$
satisfies some Lipschitz conditions on the 
first argument, the term in braces in the \RHS 
of \eqref{infinitesimalgroup2} satisfies the bounds we 
wanted for an approximate inverse provided 
that $\eta$ satisfies the desired bounds. 

The importance of this remark is 
that by knowing the existence of 
$\eta$, which is just an inverse 
of $D_2F(f_0,\Id) $ we can deduce, for
functionals with a group structure, 
the existence of approximate inverse in a 
whole neighborhood, which  the hypothesis 
needed by Theorem \ref{Zehnder}.

Of course, $F$ satisfies  assumption \eqref{conjugacyformula}
when $F(f,u) = u^{-1} \circ f \circ u$ but it could 
also be the action by $u$ on vector fields or more complicated 
objects and indeed it happens quite frequently when one is considering
geometrical problems.

\end{remark}

\begin{remark}


strategy for KAM (Discussed in more 
detail in Section  \ref{Kolmogorovmethod})
can be formulated as reducing the 
Hamiltonian to a Hamiltonian of a 
particular kind. 

Hence, we are not interested in 
just solving the equation $F(f,u) = 0 $ 
but rather $F(f,u)  = N$, where $N$ is 
a submanifold of infinite codimension.
Indeed, this is the problem that is considered
in \cite{Zehnder75} and especially in 
\cite{Zehnder76}.
\end{remark}


\begin{remark}
Even if  most  of the classical KAM problems 
(certainly all that will be discussed in this notes)
are conjugacy  problems and, therefore, have the group 
structure, this is not completely 
necessary to have a quadratic algorithm not completely 
necessary to have a quadratic scheme.

A review of problems in geometry which are 
not conjugacy problems can be found in 
\cite{Hamilton82}.


A very interesting recent development is 
the observation that variational problems with symmetry
also present another general  structure that allows 
to obtain quadratic convergence.
See, for example \cite{Kozlov83}, \cite{Moser88}
for PDE's
or, in the context of KAM \cite{SalamonZ89}. 
(We will present an account of that work in 
Section \ref{Lagrangianmethod}.)

Much more interesting is the fact that in 
\cite{CraigW93}, \cite{CraigW94}, another
mechanism to obtain  quadratic convergence was 
introduced. At the moment, I do not know of 
a functional analytic framework  that encompasses
these remarkable results.

\end{remark}

\begin{remark}\label{normalform}
In the applications of 
the implicit function theorems to problems of
persistence of tori -- and to some geometric problems --
we are not interested in the equation
$F(f,u) = 0$ 
but rather in the equation 
$F(f,u) \in N$ where 
$N$ is an appropriate submanifold.

See exercises \ref{ex:normalform}, 
\ref{ex:Moriyon}
\end{remark}


\begin{remark} \label{consequences}
Note that
the structure of Theorem \ref{Zehnder} is that the input is 
just an approximate solution  (with some 
extra mild requirements) and that the output is 
an exact solution not too far from the original 
approximate solution. 

In the most commonly quoted applications, 
the input is the exact solution for 
an integrable system, which is an approximate 
solution for a quasi-integrable system. Nevertheless, 
other applications are possible.  Among them, 
we mention:

1) Numerical algorithms:

If carefully implemented 
and successfully, numerical algorithms produce 
approximate solutions (i.e.\ ssomething that, when 
plugged into the equations satisfies them approximately).

Hence, using a theorem with the structure of 
Theorem \ref{Zehnder}, one 
can justify that the approximate solutions 
produced by a computer algorithm indeed correspond to 
a true solution nearby. In numerical analysis, this is
sometimes called a-posteriori bounds.  
(See \cite{BraessZ82}, \cite{LlaveR90}, \cite{CellettiC95}.)
We discuss some numerical issues involved in 
Section \ref{compassisted}.


2) Justification of asymptotic expansions (e.g.\ LLindstedt series) 

These expansions produce objects that satisfy the 
equations approximately. Hence, a theorem
similar to Theorem \ref{Zehnder},
can be used to justify asymptotic expansions.
That is, show that one can indeed find tori
which are not far away from the truncations of 
the Lindstedt series.  For the KAM tori, 
one can find this type of arguments in 
\cite{Moser67}. In these case, it is also 
shown that the Lindstedt series converge
(since the  torus should be analytic as a 
function of the parameter). 
We emphasize that to justify the asymptotic nature of 
the  series one just needs that the series 
produce objects that satisfy the equation with
smaller errors and that are not too complicated. 
The Lindstedt series of lower dimensional 
tori  are studied by this method in 
\cite{JorbaLZ00}.  In that case, we do 
not know whether the series converges or
not, but following the argument sketched here, it is 
possible  to show that they are asymptotic in  
a certain complex domain.

3) Establishing continuity or 
Whitney regularity of the solutions with respect to 
parameters -- assuming that $F$ is more regular in
both its arguments --.

This  application is
worked out explicitly in  \cite{LlaveV00}.
The latter arguments require some certain amount of 
uniqueness, which is not provided by the theorem 
in the way we have stated and proved it, but which we 
obtain in Exercise \ref{ex:uniqueness}.

4) Obtaining a result for finitely differentiable 
problems out of the analytic ones.

An application that can already be found in 
\cite{Moser66a}, \cite{Moser66b} is that, 
as we saw in Lemma \ref{characterization}, 
we can characterize finitely differentiable functions 
by their approximation properties by analytic 
functions. We just sketch the argument.

Given a smooth $f$, we study the problem 
$F(f,u)$ by considering a sequence 
of problems $F(f_n, u_n) = 0$ where $f_n$
are constructed approximating the smooth 
function $f$ by analytic functions.

Using that $||f_n - f_{n+1} ||_{2^{-(n+1)}} \le C 2^{-(n+1) r}$
it is often possible (using the structure of $F$)
to show that 
$$
||F(f_n,u_n) - F(f_{n+1}, u_n) ||_{2^{-(n+1)}}  = 
||F(f_{n+1}, u_n) ||_{2^{-(n+1)}}   \le  C 2^{-(n+1) r'}
$$
We consider $u_n$ as an approximate solution for the problem 
with $f_{n+1}$. 
In the case that $\Upsilon$ is a power, it follows 
that 
$$
||u_n - u_{n+1} ||_{2^{-(n+2)}}  \le  C 2^{-(n+1) r''}
$$
from which, appealing again to Lemma \ref{characterization} 
we obtain that there $u = \lim u_n$ which solves 
the desired equation and which is analytic.

This method has the advantage that one always works
with analytic functions for which estimates are 
often easier and, as we have seen sharper 
if one needs to use Fourier coefficients.

We refer to \cite{Moser66a}, \cite{Moser66b},
\cite{Zehnder75}, \cite{Zehnder76} for more details
(such as how to get the induction started), 
somewhat different versions of the argument,
and applications to concrete problems.


The quantitative estimates needed to carry out this 
strategy are explained in Exercise\ref{improvements}.


5) Bootstrapping the regularity.

A solution which is moderately smooth, if approximated 
by an analytic one is an analytic approximate solution. 

Applying a theorem of this sort, one can conclude that 
given an analytic problem,
if there is a sufficiently smooth solution
(so that the smoothings are indeed very good approximations),
then there is an analytic one.  Of course, if one does 
have uniqueness of the problem, one obtains that any 
solution that has a certain regularity, is analytic. 

Of course, if we start with a problem that is very 
regular, we can also show that given a solution which is 
beyond a certain critical regularity, there will be another 
one which is as as the problem allows, and if there is 
uniqueness, we conclude that all the solutions beyond 
a certain regularity are as smooth as the problem allows.

Arguments of this type are worked out explicitly in 
\cite{SalamonZ89}.
Again, we refer to Exercise\ref{improvements} for some 
of the quantitative estimates 
needed.

\end{remark}

\begin{remark} 
Notice that the Theorem \ref{Zehnder} only assumes the existence of 
an approximate right inverse. 

One should not expect that the solution one produces   in the theorem
to be unique. Indeed, in some problems 
such as the Nash embedding theorem which motivated a 
good deal of the original research
one only has an approximate right inverse and, indeed the solution is 
not unique.  In many geometric problems, the results we seek 
are in any case invariant under diffeomorphisms, so that it 
is to be expected that the solution is not unique.

Under moderate assumptions -- e.g. under the existence of 
an approximate left inverse -- one gets uniqueness. 
See the remarks in \cite{Zehnder75} and see 
Exercise  \ref{ex:uniqueness}.
These assumptions are often satisfied in KAM theory 
or uniqueness  of the objects we 
are interested in can be obtained by other means.
(Often one seeks geometrical objects in coordinate 
systems, so that the geometric objects may be unique
even if their coordinate representation is not.)

One situation when these considerations play a
role is the proof of the KAM theorem following 
Kolmogorov's strategy. (See Section \ref{Kolmogorovmethod}.)


In this method, we 
seek a change of variables in which the resulting 
system  manifestly has an invariant torus. 
That is, we try to reduce the system to the
the  Kolmogorov normal form \eqref{easysystem}. 
Such change of variables is manifestly not unique since the 
normal form does not specify what are the higher order terms and 
one can make changes of variables that only  depend to 
higher order in the actions. Therefore, one cannot expect 
uniqueness in the change of variables nor in the term of 
the normal form and a formulation of the theorem based 
on this formalism cannot aspire to obtain uniqueness. 
Nevertheless, it is true that, under moderate non-degeneracy 
assumptions, the torus  that has a prescribed frequency is unique.
\end{remark}


\begin{remark} \label{dependence}
The theorem of \cite{Zehnder75} has an 
extra hypothesis H2 that requires that $F$
is Lipschitz in the first argument 
Then, one obtains Lipschitz dependence on the solution
on the function $f$ (in some appropriate spaces).

We note that, in the case that there is no uniqueness, 
the only claim made is that the algorithm  \eqref{iterativestep}
leads to a solution that depends in a Lipschitz manner on $f$. 
Clearly, when there is no uniqueness
one  could make different choices of solutions 
for different $f$  and end with a $u$ that depends discontinuously on 
$f$.

A  detailed treatment  of these ideas can be found in 
\cite{Zehnder76b}.

We point out that there are other methods to obtain
smooth dependence with respect to parameters that do 
not involve following the proof of Theorem \ref{Zehnder}
and checking the differentiability with respect to parameters
of all the steps.

1) One can also obtain quickly higher regularity with respect 
to parameters  by applying Theorem \ref{Zehnder} 
in spaces that consists of smooth  families of
functions. Of course, one needs that the approximate 
inverse also maps smooth families into smooth families. 
This is somewhat tricky since approximate inverses are 
not uniquely defined, so one could make different 
choices for different values of the parameter and 
spoil even continuity.

Nevertheless, for problems with group 
structure, the prescription  given by \eqref{infinitesimalgroup}
gives a way of accomplishing the solution in spaces of 
smooth families of functions.
Arguments of this sort are carried out in detail 
in \cite{LlaveO00} to solve a problem in differential geometry.

2) When there is uniqueness, one can follow other sort of 
arguments such as finding formal derivatives 
for the solution and then, showing that these 
formal derivatives satisfy the hypothesis of
Whitney theorem \cite{LlaveV00}. 
\end{remark}

\begin{remark} 
When one has some  regularity -- at least Lipschitz -- 
with respect to the parameters, 
one can start discussing 
issues -- important in the applications -- such as the measures 
in the space of parameter covered. 
\end{remark}

\begin{exercise}
Write precisely the reduction of  
Theorem 
\ref{Siegel1} to Theorem \ref{Zehnder} 
by making explicit choices of spaces, etc. 
\end{exercise}

\begin{exercise} 
A challenging variant of the previous exercise 
is to show that, if the number $\omega$  satisfies the 
conditions \eqref{Brjunocondition}, the approximate 
inverse we constructed in the proof
of Theorem \ref{Siegel1} satisfies \eqref{BrjunoRussmann}.


If independent study fails, 
see \cite{Russmann90}, \cite{DeLatte97} for 
estimates that go from arithmetic conditions 
to approximation functions.
\end{exercise}

\begin{remark} 
In practical applications,  
e.g.\ when one is computing numerically solutions 
to a problem defined implicitly
one of course, does not compute the inverse of 
the matrix, but rather solves numerically the system. 

In numerical practice, this usually entails
a factorization of the matrix. 
Traditionally, one uses the LU factorization
(Gaussian elimination), even if in KAM theorems
that tend to be ill conditioned one should, perhaps,
prefer the SVD decomposition.

In any case, it is convenient not to have to 
recompute these factorizations -- which may much more 
costly than the application to a function --. 
Of course, we would not like to lose the quadratic convergence
which, e.g. in continuation methods that require great precision is 
much more practical that a method that converges more slowly.


The following two schemes,
which avoid having to 
recompute factorizations 
but which get convergence 
faster than linear are studied in 
\cite{Moser73} p. 151. The second one 
comes from \cite{Hald75}. A geometric
interpretation of these methods 
as a Newton method in the space of 
jets is discussed in \cite{McGehee90}.

\begin{equation}
\label{quasihald}
\begin{split}
u_{n+1} &= u_n - \eta_{n}F(f, u_n) \\
\eta_{n+1} &= \eta_n - \eta_n(\Id -  D_2 F(f, u_{n}) )\eta_n \\
\end{split}
\end{equation}

\begin{equation} \label{hald}
\begin{split}
u_{n+1} &= u_n - \eta_{n}F(f, u_n) \\
\eta_{n+1} &= \eta_n - \eta_n(\Id -  D_2 F(f, u_{n+1}) )\eta_n \\
\end{split}
\end{equation}

(In numerical applications, one does not compute the product of 
matrices in \eqref{quasihald}, \eqref{hald}. Note that it 
suffices to apply the matrices to vectors.)
\end{remark}

\begin{exercise}

Show in finite dimensions that, under smoothness assumptions 
and smallness assumptions:
\eqref{quasihald} 
leads to 
$$
\|F(f,u_{n+1} )\| \le C |F(f,u_n)|^{ (\sqrt{5} + 1)/2}
$$
and  \eqref{hald}  leads to
$$
\|F(f,u_{n+1} )\| \le C |F(f,u_n)|^2.
$$
\end{exercise}

Applications  of these schemes  to hard implicit function theorems
and other modifications of the basic algorithm  will be 
developed in  the following exercises.


The following exercises are designed to show that the quadratic 
convergence is rather forgiving and that there are many variants
that also work. We have also included some variants in which 
the results fail so that the reader can start to develop a feeling 
for the range of applicability of the techniques.


\begin{exercise} \label{improvements}
Consider the following improvements to 
Theorem \ref{Zehnder}  
(either separately or several at the same time, for 
the most ambitious reader).

As we will note in the exercises some of them have important 
consequences, beyond serving as training.
\begin{itemize}


\item
Modify the hypothesis and the conclusions so
that 
the approximate solution is assumed to satisfy 
$$
\|F(f,u_0)\|_{\delta_0} \equiv \ep
$$
instead of \eqref{approximatesolution}
and 
the conclusion about $u^*$
reads 
\begin{equation} \label{notmoved2}
\|u - u^*\|_{\delta_0/2} \le C(\delta_0) \|F(f,u_0)\|_{\delta_0}
\end{equation}
instead of \eqref{notmoved}.

Hint: This result can be deduced from the 
statement of the theorem just by a relabeling of the spaces.

\item 
Show that in case that we take 
$\Upsilon(t) = C t^{-\alpha}$
for some $C, \alpha > 0$, we have 
$C(\delta_0) = C' t ^{-\alpha'} $
for some $C', \alpha' > 0$. 

\begin{remark}
The previous two items are quite important 
since they allow to obtain finite differentiability
out of the analytic result.  The strategy to 
obtain that is explained in Remark\ref{consequences}.

They are worked out in \cite{Zehnder75}. It can also 
be worked out from the statement that we have given 
by a rescaling argument.
\end{remark}


\item 
Show that in case that we take 
$\Upsilon(t) = s \Upsilon_0(t)$
where we consider $\Upsilon_0$ as a fixed function and 
$s$ as a variable, the
smallness conditions required in 
\eqref{approximatesolution} 
are 
$$
\epsilon s^2 < K
$$
and that the  conclusions  \eqref{notmoved}
read 
$$
\|u - u^*\|_{1/2} \le  K s \|F(f,u_0)\|_1
$$
where now $K$ is a constant depending on all the other properties of 
the hypothesis, but independent of $s$.

\begin{remark}
In applications to the KAM theorem, the meaning of the 
parameter $s$ is teh allowes size of the Diophantine constant.

This improvement is worked in \cite{Zehnder76b}. 

It leads rather directly to estimates on the measure of the
set of tori covered by KAM theorem.  (See \cite{LlaveV00}.)
\end{remark}


\item 
Consider that in \eqref{Taylorestimate}, 
\eqref{remainderbounds} we have three different 
$\Upsilon$ functions. For example, three different 
powers. 


(This appears in practice. Some of the 
powers come from the Diophantine approximations 
whereas others come from the differentiation of composition and the like.)

\item Modify the second equation  of \eqref{remainderbounds} 
to read 
\[
\| dF(f,u) \eta(f,u) z - z \|_\sigmap \le
\Upsilon( \sigma - \sigmap) \|F(f,u)\|_\sigma^{\kappa'} \| z \|_\sigma
\]
for some $\kappa' > 0$. 

\item
Modify \eqref{Taylorestimate} to read
\begin{equation}\label{weaktaylor}
\| Q(f; u,v)\|_\sigmap \le 
\Upsilon(\sigma - \sigmap)  \|u - v\|_\sigma^{1 +\kappa}
\end{equation}
for some $\kappa > 0$.

\item One can also have a different 
approximate inverse during the iteration.
\[
\begin{split}
\| dF(f,u_n) \eta_n(f,u_n) z - z \|_\sigmap \le&
\Upsilon( \sigma - \sigmap) \|F(f,u_n)\|_\sigma\| z \|_\sigma \\
& + \exp( - a (1 + \kappa'')^n ) 
\end{split}
\] 
for some $\kappa'' > 0$. 

A variant is to choose
\begin{equation}\label{extraquadratic}
\begin{split}
\| dF(f,u_n) \eta_n(f,u_n) z - z \|_\sigmap \le&
\Upsilon( \sigma - \sigmap) \|F(f,u_n)\|_\sigma\| z \|_\sigma \\
& + \exp( - 4^n (\sigma - \sigmap)) 
\end{split}
\end{equation}

This appears in some proofs  (e.g. in Arnol'd type proofs )
when one tries to do some truncation of the problem. 
This improvement is not too tricky to do by 
itself, but  it is not so easy to understand 
how it does work with the others.
It is quite enlightening to understand how it works
with the method of obtaining finite differentiability.

with some of the others.

\end{itemize}

Under these modifications, one has to modify slightly the 
conditions \eqref{BrjunoRussmann}.
\end{exercise}

\begin{exercise}
Formulate precisely the assumptions of domain loss etc. 
to obtain a proof of the implicit function theorem using 
an iteration as in \eqref{hald}. 
\end{exercise}

\begin{exercise}
Taking into account the improvement suggested in 
\eqref{weaktaylor}
give a proof of the theorem 
using the scheme of \eqref{quasihald}.
\end{exercise}

\begin{exercise}\label{ex:uniqueness}
Show that if one supplements the 
assumption H3 of Theorem \ref{Zehnder} 
with the existence of  a left approximate 
inverse satisfying the  same 
estimates, one obtains that the solution is 
unique in an appropriate sense. 

Formulate a precise theorem in which the  
domains in which uniqueness holds are 
explicitly specified. 


(Some version of this is done in \cite{Zehnder75}.)
\end{exercise}


\begin{exercise}  \label{ex:normalform}
State and prove an implicit function theorem 
in which we do not attempt to solve
$F(f,u) = 0 $ but rather 
$F(f,u) \in N$ as explained in 
Remark \ref{normalform}.

In that generality, one should not expect uniqueness, 
hence, continuity and differentiability with respect to 
parameters is presumably not very clean.
\end{exercise}

\begin{exercise} \label{ex:Moriyon}
When considering the normal form problem 
one should also modify the
assumption H3 of Theorem 
to be:
\ref{Zehnder} 
$$
\| dF(f,u) \eta(f,u) z - z \|_\sigmap \le
\Upsilon( \sigma - \sigmap) d_\sigma(N, F(f,u)) \| z \|_\sigma
$$
where $d_\sigma$ denotes the distance between sets
measured with the norm $\| \cdot \|_\sigma$.

This observation appears in \cite{Moriyon82}.
\end{exercise}


\begin{exercise} 
A classical theorem in KAM theory is 
the theorem of \cite{Arnold61}
which states that given a diffeomorphism 
of the circle with a rotation number $\rho$, 
which is Diophantine and sufficiently close to 
the rotation by $\rho$ in an analytic topology, 
then, there is an analytic change of 
variables that transforms it in the 
rotation by $\rho$. 

Formulate it in terms of an abstract implicit
function theorem. 

The main difficulty is that, when we 
start proving this theorem, we do 
not know that the set diffeomorphisms 
with rotation number $\rho$ is a manifold. 
(We know it after we prove the theorem!.)

Note also that the conjugacy is not unique
since all rotations conjugate a rotation to itself.

I know several ways to do it, but all of them 
require some dirty tricks. (A 
good source for those -- and for almost anything having to 
do with circle maps --  is 
\cite{Herman79} and  \cite{Herman83}). 

Note also that for this problem there are proofs  that do 
not use KAM. Besides the renormalization proofs
and \cite{KatznelsonO89b}, already mentioned
in Section  \ref{newdevelopments}, we mention 
\cite{Herman85} and, for constant type numbers (those that have
a bounded continued fraction expansion) \cite{Herman83a}.
\end{exercise}


\begin{remark}
Note that, the estimates we have made to prove 
Theorem~\ref{Zehnder} do not use that $\| \cdot \|_\sigma$ 
is a norm. They would have worked just as well if 
$\| \cdot \|_\sigma$ had been a semi-norm.

Of course, in order that the result is meaningful, 
we would need that the family of seminorms 
$\{\| \cdot \|_\sigma\}_{\sigma\in(0,1/2)}$ 
defines a useful space, i.e., they define a Fr\'{e}chet space.
See \cite{Hamilton82} for more details about such improvement 
and also for applications. 
\end{remark}


The original proof of KAM theorems for finite differentiability 
were based on different schemes than the proof we have presented. 

Note that, for example, the proof of Theorem~\ref{Siegel2} 
follows a different scheme. 
At every step, the linear operator we have to solve 
does have an inverse (not just an approximate inverse). 
The problem is that the operator is unbounded 
and, hence, simple-minded iterations such as those 
of the classical Newton method do not work. 

This situation happens also in PDE's. 
A notable example was the celebrated Nash embedding 
theorem \cite{Nash63}.

The method used in \cite{Moser66a} and \cite{Moser66b} 
was to combine steps of the linearized operator with smoothings. 
The method allows  a  norm -- in a space of 
somewhat smooth functions -- to blow up,
whereas a norm   -- in a space of rougher functions --  decreases. 
By using interpolation inequalities, 
one can recover good behavior 
of some intermediate norms. 
(The decrease may not be exactly quadratic, 
but it is still is faster than exponential.) 
This technique has been highly formalized in \cite{Hamilton82}, 
which also includes a wealth of applications, 
mainly to geometry. 
See \cite{Hamilton82}, Section 3, for a comparison 
with the methods of Zehnder \cite{Zehnder75}. 

In the following, we present a proof along these lines, 
which follows rather closely \cite{Schwartz69}.
This book also contains a very nice discussion 
of the Nash embedding theorem and on other problems 
of nonlinear functional analysis.

In the sequel, we shall refer to a certain range 
$m-\alpha \le r \le m+10\alpha$ 
of spaces $\Lambda_r$ (defined in Section~\ref{prelimanalysis}), 
and to a certain constant $M>1$. 
We suppose that $M$ is sufficiently large so that the smoothing 
operators $S_t$ satisfy 
\begin{equation}
\label{smoothingproperties1}
\begin{split}
\| S_t u \|_\rho &\le M t^{\rho - r} \| u \|_r   \quad u \in \Lambda_r \\
\| (\Id - S_t) u \|_r &\le M t^{r-\rho} \| u \|_\rho   \quad u \in \Lambda_\rho 
\end{split}
\end{equation}
for $m-\alpha \le r \le \rho \le m+10\alpha$ 
($\| \cdot \|_r$ stands for the norm in $\Lambda_r$). 

\begin{theorem} 
Let $B_m$ be the unit ball in $\Lambda_m$ 
and $f: B_m \to \Lambda_{m-\alpha}$ 
be a map that satisfies:
\begin{itemize}
\item[(i)] $f(B_m\cap \Lambda_r) \subset \Lambda_{r-\alpha}$, 
for $m \le r \le m+10\alpha$;   
\item[(ii)] $f_{\mid B_m\cap \Lambda_r}: B_m\cap \Lambda_r \to 
\Lambda_{r-\alpha}$ has two continuous Fr\'{e}chet derivatives, 
both bounded by $M$, for $m \le r \le m+10\alpha$; 
\item[(ii)] There exists a map 
$L:B_m \to {\mathcal B}(\Lambda_m,\Lambda_{m-\alpha})$, where 
${\mathcal B}(\Lambda_m,\Lambda_{m-\alpha})$ is the space of 
bounded linear operators on $\Lambda_m$ to $\Lambda_{m-\alpha}$ such that:
\begin{itemize}
\item[(ii.a)] $\| L(u) h\|_{m-\alpha} \le M \| h\|_m, 
	\quad u\in B_m, h\in\Lambda_m$;
\item[(ii.b)] $df(u) L(u) h= h, \quad u\in B_m, h\in\Lambda_{m+\alpha}$;
\item[(ii.c)] $\| L(u) f(u) \|_{m+9\alpha} \le M(1+ \| u \|_{m+10\alpha}), 
\quad u\in B_m\cap\Lambda_{m+10\alpha}$.
\end{itemize}
\end{itemize}
Then, if $E:= \| f(0) \|_{m+9\alpha}$ is sufficiently small, there exists
$u\in \Lambda_m$ such that $f(u)= 0$.
\end{theorem}

\begin{proof}
Let $\kappa  > 1$, $\beta,\mu,\nu >0$  be real numbers to be specified later.
We will need that they satisfy a finite set of inequalities 
relating them and the constants appearing in the assumptions of 
the problem. 

We construct a sequence $\{ u_n \}_{n\ge 1} \subset \Lambda_m$ 
by taking $u_0= 0$ and
\[
	u_{n+1} = u_n - S_n L(u_n) f(u_n) \,
\]
where $S_n= S_{t_n}$ and $t_n= e^{\beta\kappa^n}$. Later on, we will prove 
that this sequence satisfies, for $n\ge 1$:
\begin{itemize}
\item[(p1;n)] $\displaystyle u_{n-1} \in B_m$, 
\item[(p2;n)] $\displaystyle \| u_n - u_{n-1} \|_m 
	\le e^{-\mu\alpha\beta \kappa^n}$,
\item[(p3;n)] $\displaystyle u_n \in \Lambda_{m+10\alpha}$ and 
$\displaystyle 1 + \| u_n \|_{m+10\alpha} \le e^{\nu\alpha\beta \kappa^n}$.
\end{itemize}

Notice that, then, $\{ u_n \}_{n\ge 1}\subset \Lambda_m$ converges to some 
$u\in\Lambda_m$ and, moreover:
\begin{equation}\label{stepestimate2}
\begin{split}
\| f(u_n) \|_{m-\alpha} = &
\| df(u_n) (u_{n+1}-u_n) - df(u_n) (\Id - S_n) L(u_n) f(u_n) \|_{m-\alpha} \\
\le &
M \| (u_{n+1}-u_n) \|_m	+ 
M^2 t_n^{-9\alpha} \|  L(u_n) f(u_n) \|_{m+9\alpha} \\
\le &
M e^{-\mu\alpha\beta \kappa^{n+1}} + M^2 e^{(\nu-9) \alpha\beta\kappa^{n}}.
\end{split}
\end{equation}

Hence, the \RHS of the previous inequality  \eqref{stepestimate2}
converges to zero when 
$n$ goes to infinity, provided that 
\begin{equation}\label{condition1}
\nu<9.
\end{equation}

We are going two prove by induction the three properties satisfied by 
the sequence 
$\{ u_n \}_{n\ge 1}$. For $n= 1$, condition(p1;1) is trivial. 

Condition
(p2;1) reads
\begin{equation}
\begin{split}
\| u_1 - u_0 \|_m = & \| S_0 L(0) f(0) \|_m 
        \le  M t_0^\alpha \|	 L(0) f(0) \|_{m-\alpha} \\
       \le & M^2 e^{\alpha\beta} E \\ 
       \le & e^{-\mu\alpha\beta\kappa}, 
\end{split}
\end{equation}
where the last inequality holds if 
\begin{equation} \label{condition2}
E \le M^{-2} e^{-(1+\mu\kappa)\alpha\beta}.
\end{equation}

Condition (p3;1) reads
\[
\begin{split}
1 + \| u_1 \|_{m+10\alpha} = & 1 + \| S_0 L(0) f(0) \|_{m+10\alpha} 
\le 1 + M t_0^\alpha \|L(0) f(0) \|_{m+ 9\alpha} \\ \le &
	 1 + M^2 e^{\alpha\beta} \le 2 M^2 e^{\alpha\beta} \\
	\le & e^{\nu\alpha\beta\kappa},
\end{split}
\]
where the last inequality holds if 
\begin{equation}\label{condition3}
1 \le \frac 12 e^{\alpha\beta(\nu\kappa-1} M^{-2},
\end{equation}
that is, if 
$\nu\kappa > 1 $ and $\beta$ is sufficiently large.  
	
Suppose now that conditions (p1;j),(p2;j) and (p3;j) are true for $j\le n$.
Then, 
\begin{equation}\label{intermediatebound}
\displaystyle \|u_n\|_m \le \sum_{j=1}^{\infty} e^{-\mu \alpha\beta \kappa^j }
\le
\sum_{j=1}^{\infty} e^{-\mu \alpha\beta (\kappa-1) j }= 
\frac{e^{-\mu\alpha\beta (\kappa-1)}}{1-e^{\mu \alpha\beta (\kappa-1) j }} <1,
\end{equation}

If we require that 
\begin{equation} \label{condition4}
\frac{e^{-\mu\alpha\beta (\kappa-1)}}{1-e^{\mu \alpha\beta (\kappa-1) j }} <1
\end{equation}
which holds when
$$
\mu\beta >> 1
$$
we obtain that the \RHS of  \eqref{intermediatebound} is 
bounded from above by $1$ and, therefore, we recover (p1:n+1).


To prove (p3;n+1) note that
\[
\begin{split}
 1 + \| u_{n+1} \|_{m+10\alpha} &\le 
	1 + \sum_{j=0}^n \| S_j L(u_j) f(u_j) \|_{m+10\alpha} \\
       & \le 1 + M^2 \sum_{j=0}^n e^{(1+\nu) \alpha\beta \kappa^j} \ .
\end{split}
\]
Hence, 
\[
\begin{split}
(1 + \| u_{n+1} \|_{m+10\alpha}) e^{-nu\alpha\beta\kappa^{n+1}} &\le
	e^{-nu\alpha\beta\kappa^{n+1}} + 
	M^2 \sum_{j=0}^n e^{(1+\nu-\nu\kappa) \alpha\beta \kappa^j} \\
       & \leq 1 \ ,
\end{split}
\]
where the last inequality holds (and so (p3;n+1)) if $\nu>\frac{1}{\kappa-1}$
and $\beta$ is sufficiently large.

Finally, we come to  the proof of (p2;n+1).  We have:
\[
\begin{split}
\| (u_{n+1}-u_n) \|_m = & \| S_n L(u_n) f(u_n) \|_m \le M^2 
	e^{\alpha\beta\kappa^n} \| f(u_n) \|_m \\
\le & M^2 e^{\alpha\beta\kappa^n} 
(\| f(u_{n-1}) - df(u_{n-1}) S_{n-1} L(u_{n-1}) f(u_{n-1}) \|_m \\
& \qquad + 
	M \| (u_{n}-u_{n-1}) \|_m^2 ) \\
	\le & M^5 (e^{(\nu-9+\kappa)\alpha\beta\kappa^{n-1}} + 
	         e^{(1-2\mu)\alpha\beta \kappa^n})
\end{split}
\]
Therefore, if 
\begin{equation}\label{condition5}
M^5 (e^{(\nu-9+\kappa)\alpha\beta\kappa^{n-1}} + 
	         e^{(1-2\mu)\alpha\beta \kappa^n})
	e^{-\mu\alpha\beta\kappa^{n+1}} 
\end{equation}
we recover (p2;n+1)). 

The condition \eqref{condition5}  is true when 
$\kappa<2$, $\mu> \frac 1{2-\kappa}$, $\nu > 9 - \kappa -\mu\kappa^2$ 
and $\beta$ is sufficiently large. 

Therefore, we have established 
that, when the   parameters $\mu,\nu,\kappa$ satisfy
\eqref{condition1},
\eqref{condition2},
\eqref{condition3},
\eqref{condition4},
\eqref{condition5} then we can carry out the induction
and establish the theorem.


This is satisfied if we take
$1<\kappa< 2$, $\mu>\frac 1{2-\kappa}$ and 
$\frac 1\kappa <\frac 1{\kappa-1} < \nu <9-\kappa-\mu\kappa^2 < 9$.
For  instance, $\kappa= \frac 32$, $\mu= \frac{20}{9}$ and $\nu= \frac 94$)
then,  choose $\beta$ sufficiently large). 
\end{proof}

\begin{remark}
The above methods of proof can also produce results for 
$C^\infty$ functions. This is significantly more complicated than the 
ideas used so far and we will not discuss them.
\end{remark}

\begin{remark} 
In many applications the embeddings of scales of 
spaces considered are not just continuous but
also compact. This allows  one to improve several of 
the steps. See \cite{Hormander90} which also includes 
very nice ideas on how to use paradifferential calculus and  
several interesting new ideas to obtain very sharp results
on the differentiability.
\end{remark}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\section{Persistence of invariant tori for quasi-integrable systems.}


In this section, we will present several 
proofs of the theorem  that made KAM theory famous.
This theorem is very useful in mechanics and in ergodic theory.


Basically the theorem says that an integrable system
which is not degenerate
(See below  for a precise definitions) and 
sufficiently differentiable has the property that many 
of the quasi-periodic orbits persist under small perturbations. 

The theorem has versions for  Hamiltonian flows and for 
exact symplectic maps. 

The simple minded versions that we will discuss 
can be stated as follows:

\begin{theorem}\label{KAMflows}
Consider the symplectic manifold
$M = \real^n \times \torus^n $
endowed with the canonical 
symplectic form. 

Let $H: M \rightarrow \real$ 
be an analytic function such that:
\begin{equation}  \label{hamiltonianperturbed}
H(I, \phi) = h(I) + R(I, \phi)
\end{equation}

Let $\omega \in \real^n$ satisfy \eqref{diophantineflow}, 
and
$\omega  = \nabla h(I_0)$ for some $I_0$. 

Assume that for $I$ 
in a neighborhood of $I_0$ we have:
\begin{equation}
\left|\det  \frac{ \partial^2 } {\partial  I_i \partial I_j } h(I)\right| 
\ge \kappa > 0.
\end{equation}

Then, if $\|R\|_\sigma$ is sufficiently small, 
the Hamilton equations for 
\eqref{hamiltonianperturbed} 
admit a quasiperiodic solution of 
frequency $\omega$.  

This solution lies on an analytic torus $\Tau$,
which it fills densely. Moreover, 
if 
$\|R\|_\sigma$ is  sufficiently small
$\Tau$ is arbitrarily close
to the torus $\{I_0\} \times \torus^n$ 
\end{theorem}

The version for exact symplectic maps reads as follows:

\begin{theorem}\label{KAMmaps}
Consider the symplectic manifold
$M = \real^n \times \torus^n $
endowed with the canonical 
symplectic form. 

Consider the map 
$F_0: M \rightarrow M $ 
given by:
\begin{equation}
F_0(I, \phi) = (I ,  \phi + \Delta(I)) 
\end{equation}
where $\Delta: \real^n \rightarrow \real^n $ 
is an analytic function, 
\begin{equation}\label{gradient}
\Delta_j(I) = \frac{\partial}{ \partial I_j } \Phi(I)
\end{equation}

Assume that $\omega \in \real^n$
satisfies \eqref{diophantinemap}
and 
$$
\omega  = \Delta(I_0) 
$$ for some $I_0$ and that 
\begin{equation}\label{twistcondition}
\left|\det\frac{\partial}{\partial I_j}\Delta_i(I)\right|\geq\kappa>0
\end{equation}
in a neighborhood of $I_0$.

Let $F: M \rightarrow M$ be an analytic, exact symplectic 
map.  

If $ \| F - F_0\|_\sigma$ is sufficiently small, then 
the map $F$ admits a quasiperiodic 
orbit of frequency $\omega$. This orbit is dense in an 
analytic torus which (if 
$ \| F - F_0\|_\sigma$ is sufficiently small) 
is arbitrarily close in the analytic topology 
to the torus $\{I_0\} \times \torus^n$ which is 
filled densely by the orbit of frequency $\omega$ 
of $F_0$.
\end{theorem}

\begin{remark} 
The condition \eqref{gradient} is imposed so that the 
unperturbed map is exact symplectic.

An obvious consequence of \eqref{gradient} is that the matrix
in  \eqref{twistcondition} is symmetric. 
\end{remark}


In what follows we will indicate several proofs of 
the above theorems.

The ideas and techniques  of the proofs in both cases are roughly 
the same. Moreover, one can pass from one to the other 
by an ingenious construction \cite{Douady82}, so that they 
are indeed equivalent in a precise sense.


Since proofs of these theorems have been in the literature
for several decades,
and many of the estimates have been covered 
in the previous sections, we will leave many of the details to the reader, 
indicating the most interesting ones as exercises.

Of course, the theorems, as stated  above are quite far from the state of 
the art, but we hope that they still contain enough difficulty 
to illustrate the techniques of the theory and to fulfill the pedagogical 
goal of these notes.
We will also present references to places  in the 
literature where more elaborate arguments, which 
we will try to sketch,  lead to 
sharper results.


\subsection{Kolmogorov's method.} \label{Kolmogorovmethod}

The original paper has been translated in 
\cite{Kolmogorov79}. 
A translation of a  much less detailed account can be found in 
an appendix of \cite{AbrahamMarsden}.
Very good modern implementations of the method
can be found in 
\cite{BenettinGGS84}, \cite{Barrar70}.
A generalized discussion of the ideas, 
putting them in a much wider context
can be found in \cite{Moser67}, \cite{Zehnder76}.
(See Remark \ref{generalities}.)


We observe that a Hamiltonian system of the form
\footnote{
We use the notation $O(I^2)$ 
to denote functions $A(I,\phi)$ such that 
$A(0,\phi) = 0$, $\frac{\partial}{\partial \phi} A(0,\phi) = 0$
and similarly for other orders
}
\begin{equation}\label{easysystem}
H(I,\phi) = \omega I +  O(I^2)
\end{equation}
has Hamiltonian equations of motion
\begin{eqnarray*}
\dot{\phi} &=& \omega + O(I) \\
\dot I &=& O(I^2)
\end{eqnarray*}
Hence 
$\phi = \phi_0 + \omega t$, $I=0$ 
is a solution.

A quasi-integrable system has the form 
$$
H(I,\phi) = h(I) + R(I,\phi)
$$
with $R(I,\phi)$ ``small'' in some sense that 
will be made precise later. 

Clearly we can consider Hamiltonians defined up to constants. 

%Notice also that the distinction between $h$ and $R$ is not 
%uniquely defined:
%$$
%h(I) + R(I,\phi) = h'(I) + \{[h(I) - h'(I)] + R(I,\phi)\} =: 
                        %h'(I) + R'(I,\phi)   \ .
%$$

We will write 
$$
h(I) = \omega I + h_2 I^2 + \cdots + h_n I^n + h_{[>n]} (I)
$$
where $h_i I^i$ stands for the homogeneous  polynomial of 
degree $i$ in the Taylor expansion of $h$
(think of $I^i$ as standing for 
all the monomials of degree $i$ ),
and $h_{[>n]} (I) = o(I^n)$. 

Similarly, we will write,  performing a Taylor expansion in 
the variable $I$,
$$
R(I,\phi) = R_0 (\phi) + R_1 (\phi) I + \cdots + R_n (\phi) I^n
                + R_{[>n]} (I,\phi)  \ .
$$

Then, we can write the quasi-integrable Hamiltonian as
\begin{equation}
\label{breakup}
H(I,\phi) =  R_0 (\phi)+ \omega I + R_1 (\phi) I 
        + h_2 I^2 + R_2 (\phi) I^2 + H_{[>2]} (I,\phi)
\end{equation}

We observe that, if  
$R_0 (\phi)$ and $R_1(\phi)$ were zero, 
we would be in the situation described
in  \eqref{easysystem}. To add a bit of color to 
the description of the proof, we will refer to these terms 
as the ``{\sl bad}'' terms since their presence spoils the 
easy argument for existence of quasi-periodic orbits.

The idea of the proof  of Theorem \ref{KAMflows} 
by this method 
is to 
find a canonical transformation $C$ -- which will be close to the identity 
-- in such a way that $H\circ C^{-1}$ will not have the
bad terms.

The canonical transformation will be constructed as 
the limit of a sequence of canonical transformations $C^{(n}$
defined recursively by:
$$
C^{(n+1} = \exp\left(-\L_{G^{(n}}\right)\circ (T^{(n})^{-1}\circ C^{(n} \ ,
$$ where $T^{(n}$ is a canonical transformation of the form 
\begin{equation}
\label{translation}
T^{(n}(I,\phi) = (I+k_n, \phi) \ , \qquad (k_n \ {\rm constant}) \ .
\end{equation}
and $\exp\left(-\L_{G^{(n}}\right)$ is the 
time one map of the Hamiltonian flow corresponding to the Hamiltonian 
$-G^{(n}$. The  theory of these canonical transformations
was developed in Section \ref{canonicalperturbation}.
\footnote{
Notice that the canonical transformation in \eqref{translation} 
cannot be generated by a time one map of 
a vector field generated by a 
Hamiltonian function since it is 
not an exact symplectic transformation.
One can develop the proof considering the 
exponential of a locally Hamiltonian vector
field that combines $\exp\left(-\L_{G^{(n}}\right) \circ (T^{(n})^{-1}$.
(See \cite{BenettinGGS84}.) We prefer to keep the 
translations separate with a view to proving 
translated curve theorems later.}


We will denote by $H^{(n}$ the Hamiltonian expressed in the 
coordinates given by $C^{(n}$. That is, 
$H^{(n}\circ C^{(n} = H$. Hence, 
$H^{(n+1}  = H^{(n} \circ T^{(n} \circ \exp( \L_{G^{(n}}) $. 

We will choose the $G^{(n}$ and the $T^{(n}$ in such a way that they 
reduce as much as possible the bad terms  of the 
Hamiltonian $H^{(n}$.

We will try to find  the Hamiltonians of these 
transformations among linear functions in $I$
\begin{equation}\label{Gform}
G^{(n} (I,\phi) = G_0^{(n} (\phi) + G_1^{(n}(\phi) I  \ . 
\end{equation}

This is a reasonable choice to try  first since this
is the form of the terms that we want to eliminate. 
As we will see rather quickly, it works. 
(If not, we would have gone back and chosen 
 a more  complicated $G^{(n}$.) 

Even if for the proof that we have
discussed here it is enough to verify that the 
above form works, the reader that plans to study 
new problems may be interested in the fact that
there is a
theory to predict what terms will work and 
we have sketched it in Remark~\ref{generalities}.
See also \cite{Moser73} p. 138.

We first describe semi-formally the step to construct 
the transformation $G^{(n}$. Since
\begin{equation}\label{transformedhamiltonian}
H^{(n} \exp G^{(n} = H^{(n} + \{ H^{(n}, G^{(n} \} 
        + \mbox{``second order in $G^{(n}$''}      \ ,
\end{equation}
we try to eliminate the bad terms in the main part of 
\eqref{transformedhamiltonian}.
Expanding \eqref{transformedhamiltonian} more explicitly,
taking into account \eqref{Gform} and \eqref{breakup},
we have:
\begin{equation}
\begin{split}
\label{toeliminate}
H^{(n} + \{ H^{(n}, G^{(n} \} 
        =&\,\,\, \omega I  \\
        &+ \{ \omega I, G_0^{(n} \} + R_0^{(n} (\phi)  \\
        &+ \{ \omega I, G_1^{(n}(\phi) I \}  
             + \{ h_2^{(n} I^2, G_0^{(n} (\phi) \} + R_1^{(n} (\phi) I  \\
        & + \{H_{[>1]}^{(n} (I,\phi) , G_1^{(n} (\phi) I \}   \\
        & + \{H_{[>2]}^{(n} (I,\phi) , G_0^{(n} (\phi) \}  
                + H_{[>1]}^{(n} (I,\phi)  \\
        & + \mbox{``second order in $G^{(n}$''} 
                \ .
\end{split}
\end{equation}
Notice that the ``bad'' terms  of
\eqref{toeliminate}
(i.e.\ those that do not include $I$ or include it 
only to the first power)
are precisely those on the 
second and third lines 
(up to ``second order in $G^{(n}$'' terms).  

The goal will be to choose $G^{(n}$ in such 
a way that the bad terms in the 
resulting Hamiltonian 
are  much smaller than those in the
original system.   If 
we manage to 
eliminate the  bad terms in 
the main part of 
\eqref{toeliminate}, the 
Hamiltonian in  \eqref{transformedhamiltonian}
will only have bad terms which are ``second order in $G^{(n}$''.


We claim that it is always possible to find a $G_0^{(n}$ 
in such a way that we eliminate the bad term with no
powers of $I$. 
Equating the second line of \eqref{toeliminate} to zero, we 
obtain the following equation for $G_0^{(n}$:
\begin{equation}\label{toeliminate0}
\{ \omega I, G_0^{(n} \} + R_0^{(n} (\phi) = 0
\end{equation}

Equation \eqref{toeliminate0}
is, of the form \eqref{der} for which we have developed a theory
in Lemma \ref{linearestimates}.
Note that $\{ \omega I, G_0^{(n} \} = L_\omega G_0^{(n} (\phi)$
where $L_\omega$ is defined in \eqref{der}.)

The main conclusion of the theory of Lemma \ref{linearestimates}
is that the equation \eqref{toeliminate0} can 
always be solved (with an slightly regular function), if 
the \RHS has average zero.
Notice that, since Hamiltonians are defined only up to the addition 
of a constant, we can always ensure that $R_0^{(n}$ has average 
zero and, hence, that equation \eqref{toeliminate0} can be 
solved for $G_0^{(n}$.


Eliminating the second bad term in \eqref{toeliminate} is more subtle. 
The equation to eliminate this term is 
\begin{equation}
\label{secondbad}
\{ \omega I, G_1^{(n}(\phi) I \} 
        + \{ h_2^{(n} I^2, G_0^{(n} (\phi) \} + R_1^{(n} (\phi) I = 0
\end{equation}

The  $G_0^{(n}$ appearing in \eqref{secondbad} is known since we found it 
by solving \eqref{toeliminate0} so that the equation 
\eqref{secondbad} is only an equation for $G_1^{(n}$
and all the other terms in it are known.

Noting that all the terms have the structure of the dot product 
of a vector (depending on $\phi$) with $I$ 
and eliminating this vector, 
we can write the equation \eqref{secondbad} as 
\begin{equation}
\label{secondbadvector}
L_\omega G_1^{(n} (\phi) = - 2 h^{(n}_2 \nabla G_0^{(n} (\phi) 
                                - R_1^{(n} (\phi)       \ .
\end{equation}
The equation for each of the components of \eqref{secondbadvector} 
is just one equation of the form \eqref{der}. 

We see that \eqref{secondbadvector} will have 
a solution when and only when the average of 
the right-hand side is equal to zero. 
The average of the term $h^{(n}_2 \nabla G_0^{(n} (\phi)$ 
is automatically zero. 
Hence, we conclude that if 
\begin{equation}
\label{inductiveK}
\int_0^1 R_1^{(n} (\phi) \, d\phi = 0       \ ,
\end{equation}
we can indeed solve \eqref{secondbadvector} and, hence, 
eliminate the second part of bad terms. 

Of course, \eqref{inductiveK} is very restrictive. 
It is very easy to construct perturbations that 
do not satisfy the condition.  
Here is when the translations $T^{(n}$ come into play. 
Given any Hamiltonian of the form 
\eqref{breakup}, provided that the 
$h_2^{(n}$ satisfies the non-degeneracy assumptions,
it is possible to choose a translation 
$T^{(n}$ of the form \eqref{translation} in such a way 
that the average of $R_1^{(n}$ vanishes. 
This is an application of the implicit function theorem 
provided that $h_2^{(n}$ is an invertible matrix
and that, of course, all the $R$ terms are small. 
Notice that the ``vertical'' translation by $k_n$ 
is roughly given by 
$k_n \approx - \frac 12 \left(h_2^{(n}\right)^{-1} {\bar R}_1^{(n}$.
(We call attention to the fact that the conditions that 
need to be adjusted in \eqref{inductiveK} is
exactly the number of parameters at our disposal 
when we apply a translation.)

The magnitude of the translation required to adjust 
the average of $R_1^{(n}$ can be bounded by a constant times 
the size of $R_1^{(n}$ (provided that $h_2^{(n}$ is invertible 
and  that the other terms are small, so that we 
can apply the implicit function theorem).

\medskip 

Hence, the algorithm for the iterative proof is: 
\begin{enumerate}
\item
To determine the translation so that 
$H^{(n} \circ T^{(n}$ satisfies the normalization 
$$
\int_0^1 \frac{\partial}{\partial I} H^{(n} \circ T^{(n} \Big|_{I=0}
                \, d\phi = \omega  \ .
$$
\item
For the ``new'' $H^{(n}$ (i.e.\ for $H^{(n} \circ T^{(n}$) 
find $G_0^{(n}$ and $G_1^{(n}$ in such a way that we eliminate 
the two ``bad terms'' in \eqref{expansion} 
up to quadratic error. 
\end{enumerate}

We have already seen that step~2 
involves small divisors and unbounded operators. 
Nevertheless, we have also seen several times that 
the quadratic convergence can overcome the effect 
of small denominators (for Diophantine numbers). 
Compared with the  previous cases we have 
dealt with,  the only new complication of 
the present algorithm is that
we have to deal with the extra complication 
of having to adjust the translation so that 
\eqref{secondbadvector} becomes solvable.

The main complication of the  translation is  
that terms that were high order generate lower order terms. 
For example, a ``good term'' $H(I,\phi) = f(\phi) I^2$, 
with $f(\phi)$ a $\phi$-dependent quadratic form becomes upon 
translation 
\begin{equation}
\label{translated} 
H\circ T = f(\phi) I^2 + 2 f(\phi) I k + f(\phi) k^2    \ .
\end{equation}
The last two terms of \eqref{translated} are ``bad''. 

The fact that  find a 
translation to eliminate the average in 
\eqref{secondbadvector}
depends on the  fact that the  quadratic 
term  $h_2^{(n}$ is invertible. We need to keep track of 
the fact that this remain so under the successive changes of 
variables.
This is not so difficult since the condition is 
an open condition.

>From the analytic point of view, we note that the 
procedure involves solving (twice) equations of 
the form \eqref{der} and applying  the implicit function theorem. 
As we did in Theorem~\ref{Siegel1}, the {\sl second order} terms 
can be estimated in analyticity domains using 
Cauchy  estimates.


In summary, we have sketched  a procedure that 
given a perturbation  
that satisfies certain non-degeneracy conditions,
makes a change of variable that reduces 
the bad terms and 
whose resulting error  is smaller.
More precisely, given estimates of the bad 
terms in a domain, we can obtain estimates of the 
resulting bad terms in an slightly smaller domain.
The estimates will be of the form 
$ || New||_{\sigma e^{-\delta}  } \le C \delta^{-\tau} || Original||_{\sigma}$
Note also that, in order to match domains etc. we need that 
$\delta$ and the size of the remainder are suitably related.


The proof consists in showing that 
if the original error is sufficiently small, 
then we can  carry out indefinitely
the iterative  procedure sketched above
and it  converges 
in a non-trivial domain. 

Here we sketch the main considerations that need 
to be taken into account converting the above 
remarks into a proof.  The reader is urged to 
either work them out alone or to use this 
as a reading guide for excellent 
expositions in the literature (some of them are
discussed below).

\begin{itemize}
\item[A)]
We start by deciding that 
we consider domains loses of the
form 
$ \delta_0 2^{-n}$, 
and that we will do estimates 
on domains parametrized by a 
$r_n$ defined by
$r_{n+1} = r_n - \delta_0 2^{-n}$.


\item[B)]
We will need to assume inductively that 
\begin{itemize}
\item[B.1)] We have bounds:
$$
\| {( h_2^{(n})} ^{-1} \| \leq C_1       \ ,
$$
and that the derivatives of $R$ are sufficiently small 
so that they do not affect the application of the  implicit 
function theorem (to ensure the existence 
of the translation $T^{(n}$). 

We take $C_1$ to be twice the initial constant: 
$C_1 := 2\,  \| {(h_2^{(0})} ^{-1} \|$. 
We will need to check that, if the initial error
is small enough, the iterative procedure keeps the assumption being 
valid.


\item[ B.2)]
Assume inductively that:
$$
\| R^{(n} \|_{r_n} \leq C_2    \ ,
$$
with $C_2$ being twice the initial value: 
$C_2 := 2 \, \| R^{(0} \|_{r_0} $.

\item[B.3)]
We will also assume that we have bounds similar to those 
in the study of the Siegel problem 
\begin{equation}
\label{domainmatch}
\| \nabla G^{(n} \|_{r_n} \leq \delta_0 2^{-n\tau} 
\end{equation}

The goal of the latter bounds  \eqref{domainmatch}
is to ensure that when we perform the composition of 
$H^{(n} \circ T^{(n} \circ \exp (\L_ {G^{(n}}) $, 
the composition is still defined in the smaller domain. 
\end{itemize}

\item[C)] 
Using assumption B.1, we are able to control the size 
of the translation by $\| R^{n)}\|_{r_n}$ 
times an universal constant. 

Given B.2, we see that the size of the remainder of 
$H^{(n} \circ T^{(n}$ is still of the same order of magnitude 
as~$\|R^{(n}\|_{r_n}$. 
(The new lower order terms generated are bounded 
by the size of the translation.) 

\item[D)]
Solving the small divisors equation, we obtain 
$G_1^{n)}$, $G_0^{n)}$. We can bound 
$$
\| G_1^{(n} \|_{r_{n+1}} + \| G_0^{(n} \|_{r_{n+1}} 
        \leq C K^2 \, \frac{2^{n\tau'}}{\delta_0^{\tau'}}  
        \left( \| R_0^{(n} \|_{r_n} + \| R_1^{(n} \|_{r_n} 
                \right)^2       \ ,
$$
The factor $2^{{n\tau'}}{\delta_0^{\tau'}}$ is 
the usual small divisor factor when we take domain losses
as in A).


\item[E)]
The heuristics can be justified by adding and subtracting 
and applying the mean value theorem pretty much in the same way 
that we did in the proof of Siegel theorem 
but using  the estimates we developed 
in Section~\ref{canonicalperturbation}. 

We obtain:
\begin{equation}
\label{quadraticestimates}
\| R_0^{(n+1} \|_{r_{n+1}} + \| R_1^{(n+1} \|_{r_{n+1}} 
        \leq C K^2 \, \frac{2^{n\tau'}}{\delta_0^{\tau'}}  
        \left( \| R_0^{(n} \|_{r_n} + \| R_1^{(n} \|_{r_n} \right)^2 \ .
\end{equation}


\item[F)]
The rest is essentially mopping up:
\begin{itemize}

\item[F.1)]
We need to show that the quadratic convergence implied 
by \eqref{quadraticestimates} implies that the inductive assumptions 
in B) remain valid (if we start with a small enough error). 
This is accomplished in a similar manner as that 
in the Siegel theorem (the only delicate one is 
\eqref{domainmatch} and this is exactly the same 
as in the Siegel domain). 

\item[F.2)]
We need to show that the accumulated transformation converge. 

Again, this is not very delicate since the quadratic convergence 
implies that $C^{(n}$ are converging to the identity 
extremely rapidly. 
\end{itemize}
\end{itemize}


We urge the reader to compare the above 
sketch with  the papers
\cite{BenettinGGS84} and with \cite{Barrar70}
which contain very readable full proofs.

The main difference in the strategies of those papers
with the presentation here is that
\cite{Barrar70} uses generating functions to deal 
with canonical transformations. 
Both of  \cite{BenettinGGS84} \cite{Barrar70}
do not make a distinction 
between the translations and the exact exact transformations
and they use just one locally hamiltonian 
transformation that accomplishes the effect of the two steps
that we discussed.
This is, of course, perfectly fine for the problem at hand. 
we  have, however, preferred to keep the two  types of 
transformations 
separate with a view in translated curve theorems. 

A very pedagogical proof of a particular case of the result
(that nevertheless contains the most essential difficulties) is 
\cite{Thirring97}. The paper \cite{Zehnder76} contains 
a detailed reduction of the proof based in the Kolmogorov
method to an abstract implicit 
function theorem very similar to Theorem \ref{Zehnder}.


\begin{remark}
The Kolmogorov method of proof has the advantage 
that it is quite direct and very well suited to functional analysis. 
We always deal with the same linearized equation 
with the same frequency. 
In particular, it leads to very good regularity results.


The main disadvantages arise from the fact that every different 
frequencies  require  different transformations. 
Moreover, the form \eqref{easysystem} is not unique.

Natural question, which are important for applications, 
but that do not follow directly from the results are
what is the measure covered by the tori and determining 
whether tori of similar frequencies are close together. 
(Indeed, so far, we have not shown that there is only one torus
with the a given frequency. Note that there are 
many hamiltonians with the same form \eqref{easysystem}.)

The question about the measure occupied by tori
can be answered by showing that 
the mapping which associates to a  frequency $\omega$ satisfying 
\eqref{diophantineflow}
the torus with frequency $\omega$  produced   
in the Theorem \ref{KAMflows} is Lipschitz. 

Moreover, the tori  can be  expressed as the graph 
of a function of $\phi$, 
\begin{equation}
I=W_\omega(\phi). 
\end{equation}
Clearly, given one torus, there is only one 
function $W$, whereas, given one torus, there 
will be several hamiltonians of the form 
\eqref{easysystem} and several transformations reducing 
the original flow to them.

It is true that $W_\omega(\phi)$ turns out to be Lipschitz 
with Lipschitz constant close to $\|h_2^{-1}\|$. 

The proof of these Lipschitz properties can be obtained 
rather easily if we note that the system of the form 
\eqref{easysystem} is also an approximate solution 
to the equation for $\tilde{\omega}$ in the plane of $\omega$. 
Hence, if $\omega$ and $\tilde\omega$ are close enough, 
we can consider the torus for frequency $\omega$ as
an approximate solution for the equation 
that would produce a torus of frequency 
$\tilde \omega$. The error of the approximation is 
controlled by $\omega - \tilde \omega$. 
Hence, applying the procedure, we see that we produce a 
solution which differs not more than something that 
can be controlled by $\omega - \tilde \omega$.
This type of argument also leads to uniqueness 
results of the torus with a given frequency.
More details on this type of argument can be found 
in \cite{Douady88}, \cite{Sevryuk95}.


As we will see later, it is true that the map 
$(\omega,\phi)\mapsto W_\omega(\phi)$ 
introduced above is differentiable in the sense of Whitney.
\end{remark}


\begin{remark}
Another aspect in which the method of proof we have discussed 
is not optimal is that it requires very strong 
non-degeneracy conditions. 

Notice that we want to ensure that 
the size of the 
translation required to 
adjust the error to zero average 
is commensurate with the error. 
In a degenerate situation, the size of the translation 
would be a root of the size of the error and then, 
the method as we have presented it, would collapse.

As a matter of fact, one can get a better non-degeneracy 
condition if one does not fix the frequency, 
but fixes it up to a multiple. 
Hence, the only thing that we require is that 
${\rm Span}(\omega)+{\rm Range}(h_2)=\RR^n$.

One can also use clever tricks to reduce degenerate situations 
to non-degenerate ones. For example,
in \cite{BroerH91}.

As we will see later, one can do significantly better than that 
by using other methods. For example, \cite{Sevryuk95}.
\end{remark}

\begin{remark} \label{generalities}
There is an interesting interpretation of the 
method  of  proof  we have presented above in terms of 
geometry in infinite dimensional spaces. 

This interpretation can certainly serve as a heuristic 
guide and many KAM theorems can be fit 
into this form. It was proposed in \cite{Moser67}
and developed quite forcefully in 
\cite{Zehnder76}, which developed 
in this language the main KAM theorems. 
In \cite{Hamilton82}, a similar philosophy is applied 
to many geometric problems. 


The idea is to think of \eqref{easysystem} as defining 
a manifold $\N$ in the space of Hamiltonians $\H$. All the elements of 
this manifold have a feature that we are interesting in 
studying. In this case, having 
an invariant torus of frequency $\omega$. 

We also have an action $\Psi$ of a group. In this case, the action by 
canonical transformations. 
The proof we have sketched shows that given a neighborhood $\U$ 
of 
$\N$ in $\H$ all  the elements of $\U$ have an orbit under 
$\Psi$ that  intersects $\N$.

Even if this  is not completely trivial to make precise, 
(one has to define the topologies of the spaces of hamiltonians 
and mappings, check
that they are manifolds, check the properties of the action of the group
of transformations on it, etc.)
it can serve as a heuristic principle to decide which theorems 
are possible. (Note that, if we were considering a 
finite dimensional problem, we could just decide what 
was true by deciding whether the tangent spaces of $\N$
and  of the orbits of the action span the tangent space of 
$\H$. )

We note that this  line of reasoning and these 
heuristic principles  apply to other problems outside mechanics.
Indeed, a good part of singularity theory can be formulated in this 
way. Similarly,
many problems in geometry and PDE can be reduced
to implicit function theorems by applying this heuristic picture.
(See \cite{Hamilton82}.)
\footnote{
Incidentally, in singularity theory one has a very 
powerful  implicit function theorem \cite{Mather69}, which allows
to deal in some cases with operators that loose   fraction of
the derivatives. 
The method of \cite{Mather69} paper has, to my knowledge  not been used in 
KAM theory, even if \cite{LlaveMM86}, which 
considers perturbations theories for Hamiltonian systems that 
were, previously done using  KAM theory,
was very inspired by it.}

The idea of deciding which theorems in KAM theory could be true 
by just looking at when the tangent spaces span leads very 
quickly to  the problem of counting parameters.
(See the discussion in \cite{Moser67}.)  
Roughly one needs that the normal form $\N$ and the 
group acting contain enough free parameters to overcome 
all the obstructions imposed by the geometry.  

One of the important developments of later years is 
that in this counting of parameters, one should include 
the frequency \cite{Eliasson88a} or 
the perturbation parameter \cite{JorbaS92}. 
One  reason why this is not obvious is 
that these extra parameters have a Cantor structure, hence 
at first sight, notions based on the geometry of tangent 
spaces etc. do not seem workable. 
Nevertheless, it turns out to be true that 
one can use these Cantor parameters 
very much in the same way as continuous families
supplementing the standard geometric arguments based on 
implicit function theorems with measure theoretic estimates.
Indeed, the next method of proof which we discuss can be used 
to cope with  this type of problems.
We refer to \cite{Sevryuk99} for an account of recent
developments in the lack of parameters problem and, 
relatedly on the problem of study of degenerate systems.

\end{remark}

\begin{exercise}
Try to carry out the proof choosing 
the translation $T^{(n}$ given by 
$k_n = - \frac 12 \left(h_2^{(n}\right)^{-1} {\bar R}_1^{(n}$. 
Notice that in such a choice we kill the average of $R_1^{(n}$ 
up to second order terms in~$R^{(n}$.
\end{exercise}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\subsection{Arnol'd method.} \label{Arnoldmethod}

In \cite{Arnold63a}, V. I. Arnol'd introduced a 
method  prove  the persistence of  
quasi-periodic solution
quite different from the method of  proof 
by Kolmogorov that we have discussed in 
the previous Section \ref{Kolmogorovmethod}.

Rather than trying to perform a change of variables that produces 
one torus, the method of \cite{Arnold63a} produces changes of 
variables that  reduce the system to approximately integrable in 
a region of space.  Hence, the method of \cite{Arnold63a}
produces all the tori at the same time.

The main complication that arises 
with respect to the method of Kolmogorov 
is that the intermediate steps require 
to study transformations 
that are defined in 
rather
complicate regions.  The fist transformation is 
defined in a  domain that 
excludes the low order resonances. 
(The places where $\partial_I  H(I) \cdot k \ll 1 $ for 
$|k|$ not big.) 
In successive steps of the iterative procedure, one performs another 
transformation that reduces the system much more closely to integrable, 
but in a more complicated region since we need to take into 
account more resonances. 
At the end of the process one ends up with a transformation 
defined on a Cantor family of invariant tori. 
(A set which is locally  diffeomorphic to  the Cartesian 
product of a Cantor set and a torus. Each of the  torus
in a connected component of the set is invariant. )


An alternative way to describe the whole process is 
to say that we have a smooth canonical 
transformation defined in the whole space which reduces 
the perturbed differential equation  to integrable in a smaller set.
At intermediate steps of the iteration we just keep 
estimates of how the system differs from integrable in 
a smaller and smaller set with increasingly complicated 
geometry. In the limit, we obtain control 
on just  a Cantor family of tori, on which the system 
can be considered as integrable.


The basic strategy, which we will detail later, 
has several  advantages with respect to the
Kolmogorov one:

One of them is
that one obtains more information on the 
way that the tori are organized.
For example, it follow rather naturally
that the tori constitute a family that is 
differentiable in the sense of Whitney. 
(This was observed in \cite{ChierchiaG82}
\cite{Gallavotti1}. Similar results can 
were obtained by other methods in \cite{Poschel}.)

Another advantage is that if 
we stop the process after a finite number of steps,
we may still have quite good information about the 
system. For example, under the assumption that 
$\frac{\partial^2 }{\partial I_i \partial I_j } H(I) $ 
is a positive definite matrix,  in \cite{Nekhoroshev77}
(as a matter of fact, the assumption in \cite{Nekhoroshev77} is 
sharper but more complicated to 
state  than the positive definiteness,
which is enough in many applications and which is the assumption 
used in many more modern proofs.)
one can find the  result that, denoting
by $\epsilon = \| R\|_\sigma $ in \eqref{hamiltonianperturbed}
we have, 
for times $ t \le \exp(A \epsilon^{-a})$, {\bf all} the orbits of 
the  perturbed system \eqref{hamiltonianperturbed}  
remain at a distance  not more than $\epsilon^b$ of those of 
the perturbed system.  

The method of exclusion of parameters  
near the resonances and continuing the transformation
in the rest of the space, has had many applications in other 
KAM problems. For example, in the problem of changing 
a system with quasi-periodic coefficients 
into constant coefficients, usually 
called the reducibility problem, most of 
the papers (see specially the early ones \cite{DinaburgS75})
are quite influenced by the method. We refer to the lectures 
of Prof. Eliasson in this meeting for an up to date 
review of this problem.  The strategy of 
\cite{Arnold63a} was also employed in the first 
proofs that started to study the problem of lack of 
parameters and the related problem of studying systems which are
rather degenerate.


{From} the point of 
view of the regularity assumptions needed
the main  shortcoming of the method is that the 
analytic part of the proof is based on truncating the Fourier series
of the perturbation
which produces bad results in finitely differentiable 
systems.  Even it  it is not too difficult, 
I know of no place in the literature where the 
Arnol'd strategy is implemented  for finitely differentiable
systems. (I wrote some very preliminary 
notes on that for a graduate course.)

Another shortcoming arises from the 
fact that one of the elements of the iterative 
step is the domain of the definition on which the 
changes of variables are defined. 
Keeping track of this domain is much more complicated 
than keeping track of the sizes of the functions. 
Hence, the proofs are more complicated 
and  often one obtains worse estimates on the 
sizes of perturbations allowed and other quantitative results.
(Nevertheless the method was used in the first proofs of
several sharp estimates such as \cite{Neishtadt81}
\cite{Wayne84}.)

I do not think that the method of \cite{Arnold63a} has been formalized
in such a way that it leads to an abstract implicit 
function theorem in the style of Theorem \ref{Zehnder}
which takes care of the detailed estimates in applications or,
at least provided with a detailed strategy to carry them out.


Besides \cite{Arnold63a}, a  very pleasant
and instructive modern 
exposition  of this method of proof is 
\cite{Gallavotti1} (see also \cite{ChierchiaG82}.)
The Nekhoroshev theorem 
proved by this method is nicely explained in 
\cite{BenettinGG85} and a unified exposition  of 
Nekhoroshev and KAM theorems is 
in \cite{DelshamsG96}. Other proofs of 
Nekhoroshev theorems are covered in 
\cite{Poschel93}, \cite{Lochak92}.


In somewhat more, but still insufficient, detail: 
At the $n$th step of Arnold's method, 
we keep track of:
\begin{itemize}
\item[1)]
An excluded set, on which we do not expect to define the 
transformation.
\item[2)]
In the complement of the excluded set we have defined 
a transformation $C^{(n}$ in such a way that 
$$
H\circ C^{(n} = \bar{H}^{(n}(I) + R^{(n} \ ,
$$
\item[3)]
We keep track of 
$\| \nabla H^{(0} - \nabla \bar{H}^{(n} \|_{\sigma_n}$ 
and $\| R^{(n} \|_{\sigma_n}$. 
We assume by induction that 
$\| \nabla H^{(0} - \nabla \bar{H}^{(n} \|_{\sigma_n}$ 
remains bounded and that $\| R^{(n} \|_{\sigma_n}$ 
is bounded by a superexponentially decreasing function. 
(The $\| \cdot \|_{\sigma_n}$ norms will refer to 
complex extensions of the excluded set, not a fixed set.)
\end{itemize}

Filling in more details about 1):
The excluded set consists of bands given by 
\begin{equation}\label{excludeddefined}
\Bigl| \frac{\partial \bar{H}^{(i} }{\partial I} (I) \cdot k \Bigr| 
                  \ge C_{n,i} |k|^{-\nu} \ , \quad 
2^{i-1} < |k| \le 2^i \ . 
\end{equation}
In particular, it is a set with piecewise smooth boundary 
and the angles of the corners are bounded from below 
by $C4^{-n}$ (where, $C$ again is a constant  that depends on the 
inductive assumptions).

This lower bound on the angles comes from the fact that 
a bound of this sort is what one would get for planes 
whose normals are integer vectors of total length $2^n$ 
and the fact that $\bar{H}^{(i}$ are 
uniform diffeomorphisms and, therefore only change the angles
by a  factor which remains uniformly bounded through all
the iteration.

We denote the excluded set by ${\mathcal E}_n$ and 
\begin{equation}\nonumber
\begin{split}
{\mathcal D}_{n,\sigma} &:= \{ \, z \in \cee^d \times \cee^d/\zed^d \, 
                         | \, d(z,{\mathcal E}_n) \le \sigma \, \} \\
\| f \|_{n,\sigma} &:= \sup_{z\in {\mathcal D}_{n,\sigma}} |f(z)| \ .
\end{split}
\end{equation}
Once we fix a sequence $\{\sigma_n\}$ (we will take 
$\sigma_n = \sigma_0 (1 - \frac 12 \sum_{n=0}^\infty (\frac 13)^n )$), 
we denote the norm $\| \cdot \|_{n,\sigma_n}$ 
by $\| \cdot \|_{n}$. 

The main difference between these norms and the regular ones 
is that, due to to the small angles, the Cauchy estimates are worse. 
Nevertheless, given the lower bound on the angles, 
they do not get too much worse:
$$
\| \nabla f \|_{\sigma_n e^{-\delta_n}} \le 
        C \left(\delta_n e^{-4^n}\right)^{-1} \| f \|_{\sigma_n} \ .
$$
To go from one step to the next, we exclude an slightly larger region 
and define a new transformation 
$C^{(n+1} = C^{(n} \circ \exp(\L_{G^{(n}})$
so that the new remainder is much smaller 
(here, we will need to make a small modification 
to our usual notion of smaller, 
meaning quadratic times powers of the domain loss).

We see that 
\begin{equation}\nonumber
\begin{split}
H^{(n} \circ \exp(\L_{G^{(n}}) = 
   & \,\,\bar{H}^{(n} + R^{(n} + \{ \bar{H}^{(n}(I), \, G^{(n} \} \\
   & + \{ R^{(n}(I), \, G^{(n} \} + O ((G^{(n})^2) 
\end{split}
\end{equation}
(a precise estimate for $O ((G^{(n})^2)$ 
appears in Lemma~\ref{leadingorder}). 

A new idea of the method is to modify the prescription 
of Newton method by restricting only to a finite number 
of frequencies and include a truncation of the Fourier series 
so that, at every stage, we only have to deal with a finite 
(but growing) number of denominators. 
The error incurred by the truncation can be estimated 
if we increase the  order of truncation at the right speed. 

We write:
\begin{equation}\nonumber
\begin{split}
R^{(n\,[\le 2^n]} (I,\phi) &= \sum_{|k|\le 2^n} 
                     \hat{R}^{(n}_k e^{2\pi ik \phi} \\
R^{(n\,[> 2^n]} (I,\phi) &= \sum_{|k|>2^n} 
                     \hat{R}^{(n}_k e^{2\pi ik \phi} \\
\end{split}
\end{equation}
Hence, we solve:
\begin{equation}\label{linearizedarnold}
\{ \bar{H}^{(n} (I) , \, G^{(n} \} + R^{(n\,[\le 2^n]} (I,\phi) 
                  = \Delta^{(n} (I) \ .
\end{equation}

The equation  \eqref{linearizedarnold} can be solved by setting 
\begin{equation}\label{arnoldsolution}
\hat{G}^{(n}_k (I) = {\hat{R}^{(n}_k (I)}/
               ({\frac{\partial \bar{H}^{(n}}{\partial I} \cdot k}) \ , 
               \quad |k| \le 2^n \ .
\end{equation}
By the definition of the excluded set, 
we can bound the denominators \eqref{arnoldsolution} 
over the complement of the excluded set.

Notice also that we can bound 
$$
\| R^{(n\,[> 2^n]} \|_{\sigma_n e^{-\delta_n}} \le
\| R^{(n} \|_{\sigma_n} e^{-\delta_n 2^n} \ .
$$
This allows us to define the generator of the transformation 
that eliminates $R^{(n\,[\le 2^n]}$ (up to quadratic orders). 

We have estimates 
$$
\| G^{(n} \|_{\sigma_n e^{-\delta}} \le 
              C \delta^{-\tau} \| R^{(n} \|_{\sigma_n} \ ,
$$
where, as usual, $\tau$ is roughly $\nu$ plus something depending 
on the dimension. 
We use the letter $\tau$ to denote similarly constants that 
depend only on the Diophantine exponent and the dimension. 

To study the domain of $\exp(\L_{G^{(n}})$, 
we note that if we set
$$
C_{n+1,\,i} = \delta^{-\tau} 2^n \| R^{(n} \|_{\sigma_n} + C_{n,\,i} \ ,
$$
we can define the transformation from the set 
\begin{equation}\label{excludednew}
\Bigl| \frac{\partial \bar{H}^{(i}}{\partial I} \cdot k \Bigr| \ge 
      C_{n+1,\,i} |k|^{-\nu} \ , \quad 
2^{i-1} < |k| \le 2^i \, \quad i = 1,2,\ldots,n 
\end{equation}
to the set
$$
\Bigl| \frac{\partial \bar{H}^{(i}}{\partial I} \cdot k \Bigr| \ge 
      C_{n,\,i} |k|^{-\nu} \ , \quad 
2^{i-1} < |k| \le 2^i \, \quad i = 1,2,\ldots,n \ .
$$
In that case, we have 
$$
\bar{H}^{(n+1} (I) = \bar{H}^{(n} (I) + \bar{\Delta}^{(n+1} (I) \ ,
$$
from which it is clear that 
$$
\| \bar{H}^{(n+1} \|_{\sigma_{n+1},\,{\mathcal D}_n} \le 
        \| \bar{H}^{(n} \|_{\sigma_n} + \| R^{(n} \|_{\sigma_n} 
$$
and 
$$
\| \nabla \bar{H}^{(n+1} - \nabla \bar{H}^{(n} 
                                 \|_{\sigma_{n+1},\,{\mathcal D}_n} 
          \le C 2^\tau \| R^{(n} \|_{\sigma_n} \ .
$$
Most importantly, we have:
\begin{equation}\label{importantrecursion}
\| R^{(n+1} \|_{\sigma_{n+1},\,{\mathcal D}_{n+1}} 
          \le C 2^{n\tau} \| R^{(n} \|_{\sigma_n, {\mathcal D}_{n}}
              + 2^{-\delta_n 2^n}    \ .
\end{equation}


To define the next excluded set, 
the only thing we have to do is to add to the excluded regions 
corresponding to 
$$
\Bigl| \frac{\partial \bar{H}^{(n+1}}{\partial I} \cdot k \Bigr| \ge 
      C |k|^{-\nu} \ , \quad 
                              2^n < |k| \le 2^{n+1} \ .
$$
Of course, excluding more regions makes the suprema 
in the left-hand side of \eqref{importantrecursion} 
and all the other estimates even smaller. 

The recursion \eqref{importantrecursion} 
leads still to superexponential convergence 
choosing $\delta_n = \delta_0 (2/3)^n$.
Establishing this  was proposed in Exercise~\ref{improvements},
see \eqref{extraquadratic}. 

Once we have the superexponential 
convergence of the reminders,   we obtain  that the $C_{n,\,i}$'s 
remain bounded and  so does $\|\nabla \bar{H}^{(n}\|_{\sigma_n}$
Indeed, 
$\|\nabla \bar{H}^{(0}- \nabla \bar{H}^{(n}\|_{\sigma_n}$ 
is small (arbitrarily small if we assume that 
$\|R^{(0}\|_{\sigma_0}$ is sufficiently 
small. Similarly, it is easy to 
check that $\|(\nabla^2 \bar{H}^{(n})^{-1}\|_{\sigma_n}$
remains bounded and that the bound is close to 
the one for $\|(\nabla^2 \bar{H}^{(0})^{-1}\|_{\sigma_n}$
if $\|R^{(0}\|_{\sigma_0}$ is sufficiently small.
Hence, under the assumption that  $\|R^{(0}\|_{\sigma_0}$
is sufficiently small, we can verify 
the  inductive assumption 
on $\|(\nabla^2 \bar{H}^{(n})^{-1}\|_{\sigma_n}$.


The passage to the limit in this procedure 
is somewhat subtle. 

In the original coordinates, we have to study 
the sets
$(C^{(n})^{-1} {\mathcal E}^n$. 
These sets will be dense. 
By increasing slightly the excluded sets
at each stage so that we exclude also the mismatches
of the domain,
we can arrange that $(C^{(n})^{-1} {\mathcal E}^n$ 
are increasing.
(note that this extra exclusion will be decreasing 
superexponentially since the transformations 
that we need to carry out in each step are 
decreasing superexponentially)
Hence, 
$(C^{(n})^{-1} (\TT^d\times \RR^d - {\mathcal E}^n) $ 
is a decreasing sequence of compact sets. 
On the other hand, their measure remains bounded away from zero 
as  follows from the fact that
$\|(\nabla^2 \bar{H}^{(n})^{-1}\|_{\sigma_n}$
remains uniformly bounded
so that we can use the same arguments as in Section~\ref{Diophantine}). 

It is slightly more subtle, but we can also estimate the derivatives 
of the transformations $C^{(n}$ to show that the derivatives 
remain bounded 
(it follows by an argument very similar to that used
in the proof of Theorem \ref{Siegel2} part v) ) 
This shows that the sets 
$(C^{(n})^{-1} (\TT^d\times \RR^d - {\mathcal E}^n) $ 
get closer and closer to being invariant. 
The limiting set will be invariant. 


If one keeps track of all the derivatives in the closed sets, 
one can show that the limiting transformation $C^{(\infty}$ 
is differentiable in the sense of Whitney 
(see \cite{ChierchiaG82} or \cite{Gallavotti83}). 
An interesting remark \cite{Valdinoci98}
is that one can use the fact that the gaps between the sets 
are much larger than the corrections to show directly 
the Whitney extension theorem. 
This remark could be important when studying infinite 
dimensional systems (e.g.\ PDE's).
In infinite dimensions, the Whitney extension theorem 
is not a available, but the method of \cite{Valdinoci98} 
could still work to produce tori that lie in a smooth family. 

For more details of this method of proof 
we refer to the original paper \cite{Arnold63a},
and the more expository paper \cite{Arnold63b}, which also contains 
applications to celestial mechanics.

An early development of the method with several improvements is 
\cite{Svanidze80}.
More modern expositions (including the Whitney differentiability) 
of Arnol'd method are \cite{ChierchiaG82} and \cite{Gallavotti83}. 
An exposition of the  Arnol'd method that, 
at the same time proves Nekhoroshev's theorem 
and clarifies the geometry of the domains, is \cite{DelshamsG96}. 

The method also lies at the heart of several other papers. 
One paper that incorporates the exclusion of parameters 
but is free of many geometric complications is 
\cite{DinaburgS75}.
This paper also shows that the method can allow 
some frequencies that are not Diophantine 
(they  allow  
$|\omega\cdot k|^{-1} \ge \exp{\frac{A|k|}{\log|k|^{1+\ep}}}$).

The method of transformations and exclusion 
of parameters is the basis of many modern developments 
in KAM theory related to lower dimensional tori, 
e.g.\ \cite{Eliasson88}, \cite{JorbaS92}, 
\cite{JorbaV97b}.


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\subsection{Lagrangian proof.} \label{Lagrangianmethod}

In this section, we study a proof of  
of the Theorem 
\ref{KAMmaps} which has a different 
flavor from the proofs already presented.
We will present the proof
only in the case $d=1$ and only for the 
particular case of the map 
given in \eqref{standardmap}. Similar proofs 
in any dimension and for more general maps 
are in the literature and we refer to that.

The proof differs substantially from the previous proofs
of Theorem \ref{KAMflows} in 
that it does not use compositions.
Of course, the proof we presented of 
Theorem \ref{Siegel1} does 
not require compositions either, even if 
the proofs we have presented so far for 
Theorems \ref{KAMflows} do rely on transformation theory/
More interesting is that it is based on  Lagrangian formalism. 
(That is,  on second order equations rather than in systems 
of first order equations. The structure that is 
used is the fact that the equations solve a 
Lagrange variational principle, not that they come from 
a Hamiltonian formalism.)

The proof we present is based on unpublished notes of 
J. Moser for a course he gave in Z\"urich. 
A generalization of these results  is included in the paper 
\cite{SalamonZ89}. We follow very closely the presentation 
in one of the chapters of \cite{Rana87} 
(which in turn followed  the presentation of the 
Moser's course.) 

In \cite{Rana87}, one can also find 
the  implementation of computer assisted proofs
based on this method. In particular the result that the map 
given in
\eqref{standardmap} for $V(x) = \ep \frac{1}{2 \pi} \sin( 2 \pi x)$
has an invariant circle with golden mean rotation 
for  $\ep = .93$ 
(this was later improved to $\ep = 0.935$ ).  This is very close 
to the values for which \cite{Jungreis91} showed that there 
can be no invariant circle. 
We discuss some of these issues in 
Section \ref{compassisted}.

\footnote{The conjecture in 
\cite{Greene79}, given a theoretical --  but not yet rigorous -- basis in 
\cite{McKay82} is  that there are smooth invariant circles
with rotation golden mean 
when $\ep < \ep^*$ and not when $\ep > \ep*$. For 
$\ep = \ep^*$ there is an invariant circle which is not very 
differentiable. It is believed that $\ep^* \approx .971635$. 
Of course, in other families it could -- probably does -- happen
that the set of parameters  for which one can find 
an smooth invariant circle is a more complicated set, perhaps 
with infinitely many components. 
}

The Lagrangian formalism for KAM theory has several other 
applications. For example, many elliptic PDE's have 
a very natural Lagrangian formalism but not a 
simple Hamiltonian one.   (Note that in this case,
the independent variable is multidimensional, while 
in Mechanics, the independent variable is the time, which is 
one-dimensional)
There is no easy  canonical transformation theory for
elliptic  PDE's.


We will try to find solutions to 
\eqref{linstedtlagrange}
which read
\begin{equation}
\label{linstedtlagrange2}
\ell_\ep (\theta+\omega) + \ell_\ep(\theta-\omega) - 2\ell_\ep (\theta)
       = - \ep V' (\theta+\ell_\ep(\theta))\ .
\end{equation}
We refer to Section \ref{linstedt} for the interpretation of 
this equation as a parameterization of a set in 
which the motion is quasiperiodic of frequency $\omega$. 

Somewhat informally,
what we will do is to show that 
there is a procedure that, given an 
an approximate solution
of \eqref{linstedtlagrange2}
(which is not too badly behaved)
we can produce another function that solves
the equation even more approximately. 
Then, we will have to show that the whole process can be 
iterated indefinitely and that it converges to a solution. 

Of course, making precise the notion of close, will involve
introducing 
analytic norms. The statement that the result of the algorithm is 
closer  to being a solution 
 will mean to prove  that in an slightly smaller domain, we will have
the usual bounds which are quadratic in the previous error and 
have powers of the loss of analyticity. 
The fact that the iterative step can be performed 
and that it will lead to the desired improvement will require that
certain expressions are not too large. 
(This is what we alluded to when mentioning
that the solution is well behaved.)  Of course, we will need to 
check that, if the initial error is small enough, the quadratic 
convergence that ensues, allows us  to recover the inductive hypothesis
indefinitely.

The theorem whose proof we will sketch is:

\begin{theorem}\label{KAMlagrangian}

Let $\ell_0: \torus^1 \to \torus^1 \times \real^1$
be such that $\| \ell_0\|_\sigma < \infty$.
Assume that
\begin{equation} \label{1apm}
\begin{split}
&\| \ell^\prime_0 + 1 \|_\sigma \leq M_+, \\ 
&\| (\ell^\prime_0 +1)^{-1} \|_\sigma \leq M_- 
\end{split}
\end{equation}


\begin{equation} \label{1b}
\frac{1}{2} 
\| V^{\prime\prime} \|_{\sigma e^{( M_+ + \frac{3} {M_-})} } \leq D
\end{equation}
and
\begin{equation}
\| \ell_0 (x + \omega) - 2 \ell_0 (x) + \ell_0 (x - \omega) -  V'(\ell_0 (x) + x)
 \|_{\sigma} \leq \epsilon
\end{equation}
where $M_+, M_-, D$, and $\epsilon$ are finite positive constants.

Let  $\Gamma(M_+,M_-, D, K, \nu)$ be a
function  which will be  made 
rather explicit during the proof, where $K$ and $\nu$
are the constant and the exponent appearing in the 
Diophantine  properties of $\omega$.

If
$$
\epsilon \le \Gamma(M_+,M_-, D, K, \nu)
$$
then, 
there is a periodic function $\ell, \ \ \ell (x+1) = \ell (x)$, \ solving
\eqref{linstedtlagrange2}.

Moreover
$$
\| \ell - \ell_0 \|_{\sigma_0/2}\leq C \epsilon
$$
where $C$ is a constant that depends on $M_+,M_-,D,K,\nu$. 
\end{theorem}


The proof will be done using a quasi-Newton method. 
The method will be rather similar to the proof of 
Theorem \ref{Siegel1}.
We try to solve the infinitesimal equation
suggested by the Newton method. 
This will lead to an equation which is not immediately 
solvable with the method of Section~\ref{linear_estimates}. 
Nevertheless, by manipulating the equation with the remainder, 
we will arrive at a factorization of the equation that 
will be solvable by applying repeatedly the 
theory for the equation \eqref{diff}.

Denote
\begin{equation} \label{lagrangianfunctional}
\Tau(\ell)(x) \equiv 
\ell (x + \omega) - 2 \ell (x) + \ell (x - \omega) -  V'(\ell(x) + x)
\end{equation}

We assume that we are  given an approximate solution $\ell$ such that
\begin{equation}\label{lagrangianremainder}
\Tau(\ell) = R
\end{equation}
where $R$ is small. 

The prescription of the Newton method would be to 
find a $\Delta$ periodic solving
\begin{equation}\label{newtonlagrangian}
\Delta(x + \omega) 
+ \Delta(x - \omega) -  (2 +  V''(\ell(x) + x) ) \Delta(x) = -R(x)
\end{equation}


This equation \eqref{newtonlagrangian} 
is not readily solvable in terms of Fourier coefficients 
(as indicated in Exercise \ref{secondordereq})
since the term  $( 2 + V''(\ell(x) + x)) \Delta(x) $ is not diagonal 
in Fourier coefficients. Our next task is to manipulate the 
equation so that it becomes solvable using the 
Fourier methods. The manipulations that will follow, 
even if rather straightforward and indeed convenient 
for numerical work
will perhaps look mysterious, but in later sections
we will argue that the success is due to natural 
geometric reasons.

If we take derivatives  with respect to $x$ of 
\eqref{lagrangianremainder}, we obtain, 
denoting by $T_\omega(x) = x + \omega$  
and  by $g'(x) = \ell'(x) + 1$
\begin{equation}\label{derivativeremainder2}
g'\circ T_\omega + g'\circ T_{-\omega} - (2 + V''\circ g) g' = R'
\end{equation}

Substituting  the expression for $2 + V''\circ g$ 
from \eqref{derivativeremainder2} into \eqref{newtonlagrangian}, 
we obtain 
\begin{equation}\label{step3}
 g' \Delta\circ T_\omega
+ g'  \Delta \circ T_{-\omega} -
[-R'  + g'\circ T_\omega + g'\circ T_{-\omega}] \Delta = - g' R
\end{equation}

Ignoring the term
$R^\prime \Delta$, which is quadratic in the error,
yields the system of equations:
\begin{equation} \label{substitution1}
\left( \frac{\Delta} {\ell^\prime + 1} \right) \circ T_{-\omega} - 
\left( 
\frac{\Delta }{\ell^\prime+1} \right) =
\frac{W}{(\ell^\prime +1) 
(\ell^\prime_0 +1) \circ T_{-\omega}}
\end{equation}
with:
\begin{equation} \label{substitution2}
W \circ T_\omega - W = (\ell^\prime +1) R 
\end{equation}

The above system of equations consists of  equations of the form
\eqref{diff} which can be studied using 
Lemma~\ref{linearestimates}. We first solve 
\eqref{substitution2} for $W$ and we take the solution 
and substitute it in the \RHS of
\eqref{substitution1}. We then solve 
\eqref{substitution1} for $\frac{\Delta }{\ell^\prime+1}$, 
out of which $\Delta$ is obtained just multiplying by ${\ell^\prime+1}$.

Of course, in order to carry out the above plan, we need to 
check that the equations we plan to solve are indeed 
solvable (i.e. that their \RHS has average zero).
Later, we will have to worry about obtaining estimates of 
the solution thus obtained.

The fact that \eqref{substitution2} can be solved
is a calculation which we have done in 
Section~\ref{linstedt} when we wanted to 
show the solvability of 
equation \eqref{norder}. Once we have that  the
equation \eqref{substitution2} is solvable up to an additive constant, 
we can determine the additive constant in $W$ in such 
a way that the \RHS of \eqref{substitution1}  
has average zero. 
(Note that adding a constant $\bar W$  to  $W$ changes the average of 
the \RHS of \eqref{substitution1} 
by  $\bar W \cdot  \int 
( (\ell^\prime +1) 
(\ell^\prime +1) \circ T_{-\omega})^{-1} $. 
The integral is not zero, since we assumed by  induction that
the denominator in the integrand is bounded away from zero and 
hence, it is positive. 
Indeed, we can have a  bound for it under the assumptions given  
in our inductive hypothesis.

The fact that this procedure works can be 
shown using the familiar method. 

Adding and subtracting, we show that given some inductive
assumptions on bounds on $||\ell' + 1||_\sigma$, 
$||(\ell' + 1)^{-1}||_\sigma$, the above procedure leads to a
new quadratic remainder. That is, as usual,  we have
$$
||R^{n+1} ||_{\sigma_n - \delta_n } \le  
||R^{n} ||_{\sigma_n}^2 C K^2 \delta_n^{-\tau}
$$
The bounds we assumed on the derivatives deteriorate slightly, but again 
the quadratic convergence ensures that they remain bounded during the 
iteration.

\begin{remark}
The remarkable cancellations 
(see \eqref{derivativeremainder2}, \eqref{newtonlagrangian}) 
between the derivative of the remainder and the linearized 
equation which allowed us to obtain a quadratically convergent 
method solving only the linear equation (and performing 
easy multiplication and divisions by known functions) 
are not 
a coincidence. In \cite{SalamonZ89} one can find how 
they work for twist maps if we uses the equations given by the 
generating functions, linking them to a Lagrangian formalism. 

Indeed there are deeper reasons. For example, the cancellations 
apply to partial differential equations. See \cite{Kozlov83}, 
\cite{Moser88}.
\end{remark}

\begin{remark}
If one is interested in obtaining the existence of
invariant circles for numerical values that are as close 
to the optimal value as possible, one should be 
prepared to cope with the difficulty of having the quality of 
the solution get worse and worse. 
Indeed, the domains of analyticity shrink and function
becomes more and more close to having zeros. 
Indeed, there are precise predictions -- not rigorous
but supported by numerical evidence that at the breakdown 
of the invariant circle  for 
the map given in \eqref{standardmap},
all the difficulties happen at the same time 
and indeed all the quantities that need to be estimated blow 
up as powers of the distance of the parameter to the critical 
one.

See  \cite{BerrettiCCF92} for numerical 
results and \cite{Llave92} for a non-rigorous explanation 
and precise conjectures. 
\end{remark}


\subsection{Proof without changes of variables.} \label{nochangemethod}


In this section, we will present another proof 
of theorem \ref{KAMmaps}
This proof is based on \cite{JorbaLV00}. 
A version of the method for
lower dimensional tori was presented in 
\cite{JorbaLZ00}.  

The proof actually proves something more general since 
the main result does not require that the map is 
exact. Of course, without 
assuming exactness, we  cannot expect to have invariant tori 
as was shown in the examples.
The conclusion of the main theorem is that 
for symplectic maps that satisfy 
all the other assumptions of 
Theorem \ref{KAMmaps}
there is a  torus that gets translated rigidly
in the direction of the actions.
The points of the torus are, roughly, rotated.

 
This is a generalization to higher dimensions of 
the translated curve theorem of 
\cite{Russmann76}.

If we assume that the map is exact, it will be 
very easy to show that the translation has to vanish 
and that the torus is indeed invariant and that the motion on 
it is conjugated to a rigid rotation. 


We will consider the symplectic manifold 
$\torus^d \times \real^d$ endowed with  the standard symplectic 
structure. 

We will consider a map $F:\torus^d \times \real^d \to \torus^d \times \real^d$ 
which is symplectic (not necessarily exact)
analytic (and other conditions somewhat 
weaker than those of Theorem \ref{KAMmaps}
which we will formulate when  the 
heuristic discussion motivates them). 
We fix $\omega$ Diophantine
and seek a mapping 
$K: \torus^d \rightarrow \torus^d \times \real^d$ 
and a vector $a \in \real^d$ 
in such a way  
that 
\begin{equation}  \label{parametrization}
F\circ K(\theta) = K( \theta + \omega) + (0, a)
\end{equation} 


Note that, of course, the equation 
\eqref{parametrization} expresses that the image of the 
torus is translated in the direction of the action by 
a rigid displacement $a$. 

In the case that $F$ is an integrable map, all the tori
given by a parameterization
\begin{equation} \label{integrableexample}
K_0(\theta) =  (\theta, I_0) \ 
\end{equation}
are ``vertically'' translated.
So, we expect that the functions we will have 
to consider will be close to that.

Later, we will show that if $F$ is exact (and there are other
conditions), then, $a$ should vanish. This is very similar to 
the line of argument in \cite{Russmann76}).  
When $d = 1$, it can be seen that the zero flux condition indeed
implies that $a = 0$. 
We note that, even if the proof does not use the exactness of the
symplectic structure, it uses the symplectic structure. 
Under appropriate redefinition of translation, one can have similar
theorems in other symplectic manifolds.

We will sketch the proof of the following theorem:

\begin{theorem}\label{translatedcurve} 
Assume that $F$ is an analytic symplectic map of  
$\torus^d \times \real^d$ endowed with the 
canonical symplectic structure and that $\omega$ is a Diophantine 
number.

Assume that $F$ is close to an integrable map and that it satisfies
the hypothesis of non-degeneracy of Theorem \ref{KAMmaps}. 
Assume that we can find an approximate $(K, a)$
solution of  \eqref{parametrization}. 

If the residual of  \eqref{parametrization} is small enough
(depending on properties of $F$ and  of $\|K  - K_0\|$ )
where $K_0$ is the solution in \eqref{integrableexample}),
then we can find
an exact solution of \eqref{parametrization}.

In particular,  if we take as approximate solution  $K_0$ 
as in \eqref{integrableexample}
the hypothesis are satisfied when $F$ is sufficiently close 
to an integrable map.
\end{theorem}

The fact that $F$ is close to integrable is not
really necessary as it will transpire from the proof.
At this stage it is only introduced to 
avoid using a more complicated notion of 
non-degeneracy than that used in 
Theorem \ref{KAMmaps}. As the proof in 
Section \ref{Lagrangianmethod} it can apply to 
all the maps of the form \eqref{standardmap}.
Even that can be generalized by formulating 
appropriately the degeneracy.
See \cite{JorbaLV00}.


A very simple calculation
(a more general version appears in \cite{JorbaLZ00})
shows that  if 
we have an exact system, then the translation $a$ 
for an true solution has to be zero.

\begin{proposition}\label{vanishing} If the $K$ solving \eqref{parametrization}
is close to $K_0$ in an analytic norm and 
$F$ is exact, then $a = 0$.
\end{proposition}

Before starting the discussion of the  Theorem~\ref{translatedcurve}, 
we discuss the proof of Proposition~\ref{vanishing}.


\begin{proof}
Let $\alpha$ be  the symplectic potential form 
$\alpha= \sum_i I_i \ d \phi_i$. Assume also  
that $F^* \alpha= \alpha + d S$.

We  consider the loops in the $i$th angle coordinate given by 
$$
L_{\theta_1, \cdots, \theta_{i -1}, \theta_{i+1} ,\cdots,\theta_d} (\theta) :=
K(\theta_1, \cdots, \theta_{i -1},  \theta, \theta_{i+1} , \cdots ,\theta_d ),
$$
where 
$\theta_1, \cdots, \theta_{i -1}, \theta_{i+1} , \cdots ,\theta_d 
\in \torus$.

Because  of \eqref{parametrization} and the exactness of the map, 
we have
$$
\int_
{L_{\theta_1, \cdots, \theta_{i -1}, \theta_{i+1} , \cdots ,\theta_d}  }
\alpha  = 
a_i + 
\int_
{L_{\theta_1 +\omega_1, \cdots, \theta_{i -1} + \omega_{i-1}, 
\theta_{i+1}+\omega_{i+1} , \cdots ,\theta_d +\omega_d}} \alpha
$$

And, integrating over the variables 
$
\theta_1, \cdots, \theta_{i -1}, \theta_{i+1} , \cdots\theta_d 
$, we obtain 
$$
a_i = 0
$$
\end{proof}


\begin{remark} 
This Theorem (and the Proposition)
are  much weaker than what can be proved by the method. 

For example, the hypothesis that the system is close to 
integrable can be replaced by several quantitative statements 
about the approximate solution.  This is quite important  
for several applications.  We note that approximate 
embeddings can be obtained with the computer. One can try 
to solve a discretized Fourier series or, a big advantage for
the present method, just compute orbits and compute the 
Fourier transform.  
This improvement is discussed in more detail 
in Remark~\ref{rem:alternative}.

Once this improvement is in place, it should be apparent that the 
way that the torus is embedded does not play any role.
We do not need that the system is close to integrable 
or that the tori are close to integrable.
(This is the main difference with the exposition of 
\cite{Bost86}.) 

In particular, 
we can justify the existence of tori which have 
 different  topology than
the tori of the unperturbed system. 
(Recently there has been some interest in these 
secondary tori since there are numerical 
experiments that suggest that secondary tori are very important for the 
statistical properties of coupled systems \cite{HaroL00}.)


Also, an important advantage of the method is that it allows one
to deal will more degenerate situations than the twist mapping. 
Indeed, one can use it to deal with non-twist maps and with 
even more degenerate situations.


We refer the reader to \cite{JorbaLV00} for these precisions as well
as for more details about the proof. 
We also refer to \cite{JorbaLZ00} for another application 
of similar techniques to discuss lower dimensional tori.
\end{remark}


Now, we  start describing the main ideas of the proof.
Again, we refer to  \cite{JorbaLV00} for more details.


The method of proof will be an iterative procedure 
in which we start  from \eqref{parametrization} being satisfied 
with a certain error and return a solution that satisfies the equation 
with an smaller error. Of course, as usual in this theory, what 
we mean by smaller  error is that the size of the new error 
will be bounded (in a smaller domain than the original 
one) by the square of the  size of the original error 
times a factor that is the domain loss parameter to 
a negative power.  Of course, by now the convergence
of the procedure should be well understood. Actually, since 
we do not need to make changes of variables and we do not 
need to keep  track of much the geometric structures, 
the inductive hypothesis  will be very mild.

We will begin with a heuristic discussion. 

If we start with an approximate solution of 
\eqref{parametrization} that is:
\begin{equation}\label{approximatepar}
F\circ K(\theta) - K(\theta + \omega) - (0, a) = R(\theta)
\end{equation}
where $R$ is small in some appropriate norm that we will make 
precise later.

The Newton method prescription, would be to change 
$K$ into $K + \Delta$, $a$ into $a + \alpha$ in such a way
that 
\begin{equation}\label{newtonpar}
DF\circ K(\theta)\ \Delta(\theta) - \Delta(\theta + \omega) 
- (0,\alpha) = - R(\theta) \ .
\end{equation}

Unfortunately, this equation is not readily solvable
by easy methods such as comparing Fourier coefficients
since it involves the non-constant coefficient
factor $DF\circ K(\theta)$. 

Hence, we try to compare it 
with the equation obtained taking derivatives 
of \eqref{approximatepar}:
\begin{equation} \label{approximateparder}
DF\circ K(\theta) \partial_\theta K(\theta) -
\partial_\theta K(\theta + \omega )  =  \partial_\theta R(\theta) .
\end{equation}

At this point, we are going to introduce some notation
(which is not completely necessary but which will make the 
geometry more concrete). We define 
$D(\theta) := DF \circ K(\theta)$
and let
$K_1(\theta)$
an orthogonal basis 
for $\partial_\theta K(\theta)$. 
The  previous equation  \eqref{approximateparder}
reads $D(\theta) K_1(\theta) - K_1(\theta + \omega) = R_1(\theta)$.
As usual, we define the matrix 
$$ J = 
\left( 
\begin{array}{cc}
 0 & \Id_d \\
 -\Id_d  & 0
\end{array} 
\right) \ ,
$$
which is the representation in 
coordinates of the symplectic form.


We define then the symplectic matrices.
\begin{equation}\label{Mmatrix}
M(\theta) = [ K_1(\theta), J K_1(\theta)] 
\end{equation}

Notice that  from the fact that 
$K_1$ is almost invariant under $DF(\theta)$ 
we obtain that:
\begin{equation} \label{invariantform}
DF(\theta) M(\theta) = M(\theta + \omega) 
\left( 
\begin{array}{cc}
\Id & A(\theta) \\
 0  & B(\theta) 
\end{array} 
\right) \,  + O( R ) 
\end{equation}


We will introduce the assumption that 
$M(\theta)$ is invertible for all 
$\theta$.

This is  reasonable assumption in view of 
the fact that, for integrable  systems
\footnote{
Indeed, this is the only reason why we assumed 
that $F$ was close to integrable. If  we
formulate the theorem assuming that 
$M$ is invertible, we could have eliminated the 
assumption  of close to integrability. 
Later, we will need to formulate the non-degeneracy 
assumption using this matrix $M$.
}

This is explained in more detail in 
Remark~\ref{rem:alternative} and  
in the references quoted there.
using \eqref{integrableexample}
we have: 
$$
M(\theta) = 
\left( 
\begin{array}{cc}
\Id_d &0 \\
0 & \Id_d
\end{array} 
\right) \ .
$$


Recall also that 
the assumption that the map $F$
preserves the symplectic form 
is equivalent to
\begin{equation} \label{preservation}
J DF(x) = \left [  DF(x)^t \right]^{-1} J \ .
\end{equation}
This  gives that  $B(\theta) = \Id_d$.


We note that in the integrable case, the matrix
$A(\theta)$ will be a constant $d \times d$ matrix $A$ and, 
the twist condition implies that $A$ is invertible. 
Hence, in the proof of the theorem, we will  assume that 
$A(\theta)$ is not very far 
from a constant, invertible matrix
in the sense that $\bar A(\theta) $ is an invertible matrix. 
Indeed, this is the only non-degeneracy condition that we will 
assume. 

We call attention to the fact that the non-degeneracy assumption
only amounts to the invertibility of $M$ and the 
fact that  $\bar A(\theta) $ is invertible. 
These assumptions could be checked a  posteriori on 
a numerically computed solution or on an approximate 
solution produced by any other means. Other than that, 
we do not need any property of the map $F$.
See Remark \ref{rem:alternative} and the references quoted there
for an explanation of this alternative approach.

\begin{remark}
It is an easy exercise to show that, under Diophantine conditions 
we can reduce the block $A(\theta)$ to a constant, so that the matrix
is is indeed reducible, Nevertheless, for the applications that 
we have in mind, this does not help. Indeed, by doing it, we incur
in extra small denominator estimates, which can worsen the result. 
\end{remark}


\begin{remark}
A more geometric interpretation of the previous 
calculations is to say that 
$DF \circ K(\theta)$ is a reducible matrix
whenever $K$ is a parameterization of 
an invariant torus  by a rigid rotation. 

We want to give a geometric argument that shows 
that the linearization of the equations around
an invariant torus is reducible.
The argument will show that for an approximate 
solution, the equation will be approximately 
reducible and, hence that one can start 
an iterative procedure in which in the iterative 
step we improve the solution of the main equation and 
its reducibility.


That is,  our goal  find a system of coordinates on the tangent 
of the torus so that the matrix representing 
$DF \circ K(\theta)$  has constant coefficients.

Since the vectors along the direction of $\theta$ are 
moved just by a rotation  in the torus, this is an invariant 
field that can be lifted to the space by the embedding. 
By the preservation of the symplectic structure, 
we also have that the plane spanned by the the 
vector and its symplectic conjugate is
also preserved. 
We can see that in the plane spanned 
by a vector and its symplectic the matrix 
has to be upper diagonal (one vector is preserved.)
The dilation along the symplectic conjugate has to be the 
inverse of the dilation along the preserved direction 
due to the requirement that the two-area in the plane is 
preserved. This gives us the diagonal blocks of the 
matrix. The upper diagonal does not bother.

This system of coordinates 
provides with a system in which the derivative is 
upper triangular.

Once that we have that the diagonal blocks are constant,
then it is easy  to see that the linearized equation can be 
solved by using equations of the form \eqref{diff}.


The above geometric interpretation makes it clear 
that we do not need  the symplectic form
 to be constant. Moreover, it is clear that it 
does not require that the symplectic form has 
action-angle variables and that it can 
accommodate certain singularities.

\end{remark}


The algorithm is now very easy. 
If we write $\Delta(\theta) = M(\theta) w(\theta) $
and substitute in \eqref{newtonpar}
we obtain 
\begin{equation}
D(\theta) M(\theta) w(\theta) - M(\theta + \omega) w(\theta + \omega) 
- (0,\alpha) = - R(\theta)
\end{equation}
which using \eqref{Mmatrix}
becomes:
\begin{equation} \label{transformed}
M(\theta + \omega) \left[
\left( 
\begin{array}{cc}
\Id_d  &A(\theta)\\
0 &  \Id_d
\end{array}
\right) w(\theta) - w (\theta + \omega) \right]
-(0,\alpha)  = - R(\theta) - N(\theta) w(\theta) \ .
\end{equation}
Therefore, ignoring the 
last term of the \RHS of  \eqref{transformed}, which 
is quadratic, we are lead to the 
study of the equation:

\begin{equation}\label{tosolvepar}
\left( 
\begin{array}{cc}
\Id_d  &A(\theta)\\
0 &  \Id_d
\end{array}
\right) \left[ w(\theta) - w (\theta + \omega) \right]
= - M(\theta + \omega)^{-1} \left[ R(\theta) - (0,\alpha) \right]
\end{equation}

We claim that this equation for $w$, $\alpha$ 
can be studied using the methods that we have 
developed in Section \ref{linear_estimates}. 
This will constitute our iterative step. 
Of course, after this heuristic derivation, we will 
need to go back and justify the estimates of the step 
and show that it can be iterated. 
This, even if being the essential part of the proof, 
we hope will bring no surprises anymore for the reader.

If we write \eqref{tosolvepar} in components,
 denoting the components of 
$w(\theta) = (w_\phi(\theta), w_I(\theta) ) $
and by $\Pi_\phi$,$\Pi_I$ the projections over the
components,
we have:

\begin{equation}\label{tosolveparcomp}
\begin{split}
w_\phi(\theta) + A(\theta) w_I(\theta) - 
w_\phi(\theta + \omega)  & = 
- \Pi_\phi M(\theta + \omega)^{-1}
 \left[ R(\theta) - (0,\alpha) \right] ) \\
w_I (\theta)  - w_I (\theta + \omega)  & = 
- \Pi_I M\left(\theta + \omega)^{-1}
\left[ R(\theta) - (0,\alpha) \right]
\right)
\end{split}
\end{equation}

If we look at the second equation  in
\eqref{tosolveparcomp} 
(recall that it is an equation for $w_\phi$ and $\alpha$) 
we see that it is an equation of the form 
\eqref{diff} which we have already studied.
We chose $\alpha$ in such a way that the \RHS 
has average  $0$. (This can be done if 
$M$ is close to the identity, but otherwise 
it can be made into an assumption to be checked 
a posteriori on the approximate solution.)

Note that we have bounds 
\begin{equation}\label{alphabounds}
|\alpha| \le C\|R\|_\sigma \|M^{-1}\|_\sigma \| 
        (M^{-1} - \Id)^{-1}\|_\sigma\end{equation}

If we assume for convenience 
(somewhat sharper assumptions could also 
work, see Remark \ref{rem:alternative})  that the 
factors  in the \RHS of \eqref{alphabounds} 
satisfy:
\begin{equation}\label{inductive1par}
\| M^{-1}\|_\sigma   \| (M^{-1} - \Id)^{-1}\|_\sigma \le 333 \ .
\end{equation}

Then we can apply Lemma \ref{linearestimates} to 
obtain  $w_I$ up to a constant,
which we will determine in the next equation.

We have, denoting by $\tilde w_I$ the solution with 
zero average:
\begin{equation} \label{wIestimates}
\| \tilde w_I\|_{\sigma -\delta} \le C \delta^{-\nu} \|R\|_\sigma \ .
\end{equation}

If we look at first equation of 
\eqref{tosolveparcomp} 
we see that, at this stage of the argument is 
an equation only for $w_\phi$ and the average of 
of $w_I$. 
Hence, we write it as 
\begin{equation}\label{firsteqtransformed}
w_\phi(\theta) - w_\phi(\theta + \omega)  = 
- \Pi_\phi M(\theta + \omega)^{-1} \left[ R(\theta) - (0,\alpha) \right] ) 
-  A(\theta) w_I(\theta) \ . 
\end{equation}

If we assume that 
\begin{equation} \label{inductive2par}
\| (\bar A)^{-1} \| \le 333 \ ,
\end{equation}
we can determine
$\bar w_I$ so that the terms in the \RHS of 
\eqref{firsteqtransformed} have average 
$0$. We have:
\[
| \bar w_I | \le C\|R\|_\sigma
\]
We will furthermore assume that 
\begin{equation} \label{inductive3par}
\|  A \|_\sigma \le C
\end{equation}

Hence, we can apply Lemma \ref{linearestimates} 
and obtain a $w_\phi$ with zero average 
which satisfies:
\begin{equation}\label{wphiestimates}
\|w_\phi \|_{\sigma - 2\delta } \le C\delta^{-2\nu} \|R\|_\sigma \ .
\end{equation}
Note that the power  of $\delta$ in this case is twice as high 
as that in the previous one since the \RHS of
\eqref{firsteqtransformed} involves the solution of the previous one.


>From \eqref{wIestimates}\eqref{wphiestimates}, using the inductive 
assumptions on the size of $M$, we obtain 
$$
\|\Delta\|_{\sigma - 2 \delta } \le  C \delta^{-\tau} \|R\|_\sigma .
$$

>From this, the rest of the proof of the translated 
tori theorem is very similar to the 
previous proofs, in particular to  the proof of 
Theorem \ref{Siegel1}. 

Under the assumption that 
\begin{equation}\label{inductive0par}
\|K \|_\sigma + \| \Delta\|_{\sigma - 4 \delta } \le   \Sigma  - \delta
\end{equation} 
where $\Sigma$ denotes the size of the domain of analyticity 
of $F$, we can define the composition  $F\circ( K + \Delta)$
and indeed the range of $K +\Delta$ is at least a distance 
$\delta$ from the boundary of the domain of definition 
of $F$.

Note that adding and subtracting and using Taylor's theorem 
to control the  terms neglected to derive \eqref{newtonpar},
(and  Cauchy bounds to control the size or the derivatives
involved)
we get: 
\begin{equation}
\|\tilde R\|_{\sigma -4 \delta} \le C \delta^{-\tau'} \| R\|_\sigma^2 \ .
\end{equation}

>From this, we  can conclude as in the previous cases that if
the original remainder is small, then the iteration can be carried 
out an arbitrarily  large
number  of times, moreover,  the final remainder in its domain 
of definition  keeps decreasing.

Note also that this proof -- in contrast with those based on 
composition -- does not require any subtle inductive  hypothesis 
to ensure that the domains of the composition match.
These assumptions, that we had  to consider in the proofs based on 
composition are subtle because they
require that the errors decrease faster than 
the analyticity losses.

In this case, the only assumptions that we have to check 
are
\eqref{inductive0par}, 
\eqref{inductive1par}, 
\eqref{inductive2par}, 
\eqref{inductive3par}. 

We can see that if we start with an small enough residual,
the iterative procedure does not change $A$  or $M$ much, 
so that using in the step bounds which are twice the ones at the 
start, the estimates of the step remain valid if 
the original error is small enough.

\begin{remark}
We  emphasize that the only thing that we need to get the proof
started is an approximate solution of the functional 
equation. 

This can be obtained in a variety of ways. For 
example if the system was close to integrable, 
one could take as an initial guess the 
parameterization of the integrable system. 

Other choices are possible. One could 
use a few steps of the Lindstedt series. 
In such a case, the proof will establish that the 
Lindstedt series is asymptotic.

More audaciously, one could use 
the results of a non-rigorous, numerical 
algorithm. Provided that one can verify
rigorously that one has an approximate solution, 
one  then obtains a rigorous proof of the existence of 
these circles.  These issues will be explored in 
more detail in Section \ref{compassisted}.

We also note that the present proof does not require 
much from the function except that it gives 
a parameterization of an invariant torus. 
In particular, it can apply to tori of topological 
types not present in the original system.
\end{remark}


\begin{remark}
Another proof without changes of variables 
can be found in \cite{Bost86} 
which is based in  unpublished work of 
M. Herman. This proof contains 
also a translated curve theorem. 

The main difference with the proof presented here is
that that method parameterizes the curves by the graph of 
a function. When studying tori that are not graphs, 
it requires that one performs a preliminary change of 
variables. 
\end{remark}

\begin{remark}
The twist hypothesis in this method of proof
can be bargained away considerably.  
\end{remark}

\begin{remark} \label{rem:alternative}
A variant of the method that is useful in the study of 
lower dimensional tori or for some degenerate situations, 
is  to take as  a starting point of the procedure not just the $K$
but the $K$ and the $M$, which,  respectively, 
almost solve the equation and almost reduce the equation 
to constant coefficients.

The  iterative step, uses the $M$ to solve the 
equation and then updates the $M$ so that it reduces 
the new linearized equation to an even higher approximation. 

By intertwining the improvement in $K$ and in $M$ it is 
possible to achieve quadratic convergence. 

One advantage of this improvement is that, if one studies this 
for lower dimensional tori, both the $K$ and the $M$ 
can be computed perturbatively. 
The approximate $K$ is a polynomial in the perturbation 
parameter, nevertheless, the $M$ is a polynomial in the 
square root. Hence, the iterative method based on both 
approximations can capture the 
singularity structure much better than the 
approximation we have discussed here.


We refer to \cite{JorbaLZ00} for more details about the method
for lower dimensional tori.
\end{remark}


\begin{remark} 
One feels that these methods of reducing the equation 
to constant coefficients is a bit of overkill. 
When one tries to invert an operator, 
diagonalizing it is rather more than 
what is needed. 

Indeed, the great advances in KAM for PDE's
started when the emphasis went  from 
diagonalizing the linear operator as was done 
usually in KAM theory to just using estimates 
from the inverse.  (See \cite{CraigW93},\cite{Bourgain95})
Even if we will not discuss it in these notes, when one 
considers elliptic lower dimensional  tori some of the
resonances that appear in some proofs are obstructions 
to the diagonalization of the operator, not to the 
invertibility. Therefore, they can be 
eliminated from the proofs of the existence of the torus
if one relies on inverting the operator rather than just 
diagonalizing it. (See \cite{Bourgain97}.)
Related to this issue we call attention to 
the lectures of prof. Eliasson in this meeting. 
He shows that even if it could happen that the 
tori are not reducible, using his non-perturbative
results, they are arbitrarily close to reducible. 
This is enough to continue the iterative procedure.
\end{remark}

\begin{remark}
One issue that still is quite puzzling to me is that 
if one performs the Lindstedt series for lower 
dimensional tori, one  encounters only small denominators
related to the frequencies of the motion on the torus. 
This is significantly less  small denominators 
than those appearing in the proofs 
mentioned above in which one 
needs to take into account denominators which 
happen when harmonics of the intrinsic frequencies of 
the torus are close to a normal frequency.
The proofs in which one also proves 
reducibility of the lower dimensional 
torus, require even more small denominators
conditions. In them,
one has to take into account the 
cases when differences  of two normal 
frequencies become a combination of the frequencies of 
motion in the torus.

In \cite{JorbaLZ00}, one can find a proof of the fact that the 
Lindstedt series is asymptotic and defines an analytic function in 
a large sector. (One has to exclude an exponentially thin wedge.)
Nevertheless, the convergence or not of these series has not 
been settled.

Note that at the same time that one develops the series 
for the torus, one develops also a series for the reducing matrix
which also does not present other small divisors than those of 
the intrinsic frequencies. The convergence of this series 
has not been settled either.
\end{remark}

\section{ Some remarks on computer assisted proofs}\label{compassisted}

The existence or non-existence of invariant tori in a system 
appearing in a concrete application could have 
enormous practical importance. 

For example, there are many systems such as 
accelerators or plasma devices that are modeled
rather well  by Hamiltonian systems. 
The existence of tori in these systems has
very drastic effects in their long term 
behavior. For example, if the system is 
a two dimensional map, the existence of 
invariant circles, will imply that one region of 
phase space will remain trapped for ever. 
This is of great interest for plasma devices 
whose goal is to confine a plasma or for 
accelerators that  try to keep a beam of particles in place.
Indeed, many of these devices are designed in such a
way that they maximize the {\sl abundance }
and robustness of invariant tori. One hopes that, 
even if the Hamiltonian approximation is not 
completely accurate, the KAM tori will survive somehow.

In celestial mechanics, one is interested also in 
finding regions with invariant tori since they are 
suitable for parking orbits. 

The judicious numerical experimentation with 
dynamical system has been a great source of 
insight and inspiration, even if, of course, 
much of the work is non-rigorous and, hence, does 
not fit well with this tutorial.
We refer the reader to  \cite{Henon83}, \cite{Simo98}
for some study of the issues involved in numerical 
computations and to \cite{Meiss92} for
a point of view closer to the physical applications

Of course, in these applications, one also wants to get, 
besides the existence, information about the shape of 
the torus and more details about its properties.
What we want to discuss in this section  is how some of
these non-rigorous  calculations can be turned into theorems.


The basic observation is that 
some of the KAM  proofs
we have presented here  have
the structure that they formulate a 
functional equation and show that, given an approximate solution
which is not too bad from the analytic point of view, then 
there is a true solution, which, moreover is not too 
far from the approximate solutions.
These constructive  methods do not require that the system 
is close to integrable.

Note that these proofs do not care about how we have produced 
the  approximate solutions. The only thing that we need 
to  verify rigorously is  that these approximate 
solutions  indeed solve the functional equation to 
 up to an small error
and that  their analyticity properties are adequate.
Hence the problem of justifying that these computed 
solution correspond to a true one 
reduces to showing rigorously  that these numerically 
specified functions indeed solve the 
desired functional equation  with a good accuracy
and verifying  rigorously their 
analyticity properties. 

Of course, given one polynomial with a few coefficients 
one could imagine studying its 
properties  with respect to an easy 
equation such as \eqref{lagrangianremainder} 
with a pencil and a  notepad.  
(See \cite{Herman86} for an example of
these verifications with pencil and paper.)
Nevertheless, if the number of coefficients approaches 
those needed for what is considered good accuracy in numerical 
calculations (this is often a few  hundred or a few thousand coefficients),
using a notepad becomes impossible. 

One would like to use a computer. 
The problem with using a computer is that, as they are used most 
commonly computers do not deal with real numbers
and they do not perform on them the mathematical
arithmetic operations.
In their normal mode of working, computers 
deal only 
with a finite set of numbers, the {\sl representable} numbers
\begin{footnote}
In modern computers, there  are almost 
universally  around $2^{64}$ representable numbers, 
those which can be written in 8 bytes -- there are a few 
delicate  and complicated issues such as denormalized numbers. 
Most computers also use for certain calculations 
numbers with $80$ bits, which are, $2^80$ numbers. 

There is a rather detailed standard by IEEE  
\cite{IEEE85}
on how to perform arithmetic in numbers.  It does 
not only specify the precision to be used, but also 
rounding and how to report troubles such as attempted 
division by zero or overflow.
This standard is now almost universally implemented 
in the chips and the languages 
(rater inexplicably Java did not include it)  and there are good tests of 
compliance so that one can asses one's arithmetic. 
See \cite{Kahan96}.
\end{footnote}
On these representable numbers, we perform arithmetic operations 
which are approximations of the arithmetic operations 
among real numbers.

These operations produce an approximation to the true answer 
if at all possible
\begin{footnote}
The process of taking the true result and
producing a representable number is called {\sl rounding}. 
Returning an representable number that is larger than the 
true result is called rounding up, similarly 
rounding down,  rounding to nearest, rounding towards zero
etc. 
The IEEE standard mentioned above specifies that the 
user can control 
the properties of the rounding and of the exceptions 
by setting a control word.
\end{footnote}
or if it is impossible to give 
a reasonable answer in terms of representable 
numbers (e.g. if you ask to multiply by 10 the largest representative 
number or to divide by 
zero) they do not return an answer, but they {\sl 
raise an exception} which typically does something drastic
such as  causing the program to terminate abruptly, perhaps 
copying  the state of the memory to a file 
({\sl dumping a core}) that can be examined to
trace the problem. 
(A good discussion of the subtleties involved in the 
implementation of floating point arithmetic is 
\cite{Knuth97}.)


One problem with this approximate way of proceeding
is that approximate of 
approximate  may not be approximate enough. 
Much less if one repeats the process  of approximation 
a large number of times.  Of course, given that a computer 
nowadays produces over one hundred million operations in 
a second, we have to worry about the effect that 
performing millions of approximations may lead 
us away from a good approximation.

As every good  numerical analyst knows, producing numbers is not too 
difficult. Unless the computer catches fire, you will get 
numbers. The real difficult issue is to produce numbers that can 
be trusted.  More difficult even is to device methods
that ensure the numbers produced can be trusted. 
One should keep in mind that most of the technology 
and research
happens  at the borderline regions  when the algorithms are about 
to break. (If the problems we are 
studying were safely solvable, we would fix the situation 
going to a more challenging problem.)


The problem of reliability of arithmetic 
calculations  is significantly more pressing for the problems 
involving small divisors. We have seen already that the 
Lindstedt series  manage to converge only through massive 
amounts of cancellations. Cancellations are  one of 
the worst enemies of accuracy in floating point 
calculations. Since computers 
keep a fixed number of digits, adding numbers that  
cancel almost exactly, will lead to a catastrophic lack 
of precision (e.g.  if  we have 1.00001 and  1.00000 
exact up to six digits, their difference will only have
one exact digit.) 
Many of the problems with small divisors are such that the
numerics deteriorates in a complicated way  until 
the algorithms blow up or start behaving erratically.


\begin{exercise} 
One of the standard programs to asses the characteristics
of a computer is 

\begin{verbatim}
epsilon = 1.0; 
oneplus = 1.0 + epsilon;
count = 0;
while (oneplus > 1.0){ 
	epsilon /= 2.0;
	oneplus = one + epsilon;
	count++;
}
printf("%d", count); 

\end{verbatim}

Run  it in your computer. 

Run also

	
\begin{verbatim}
epsilon = 1.0; 
count = 0;
while ( 1.0 + epsilon > 1.0){ 
	epsilon /= 2.0;
	count++;
}
printf("%d", count); 
\end{verbatim}

Chances are that the results will  be quite different.
Explain why.

\end{exercise}

\begin{exercise} 
The computer program Mathematica uses a 
numerical scheme in which high precision numbers drop
precision if the last figures cannot be kept. 

This leads to some 
unexpected effects. 

Run
\begin{verbatim}
a = N[Pi,40]
Do[ a = 2*a -a , {100}]
\end{verbatim}
and discuss the results.
\end{exercise}

One way of obtaining reliable results from a computer
without sacrificing too much performance is to 
use {\sl interval arithmetic}
(See \cite{Moore79}, \cite{KaucherM84}.)
The idea is that a real variable is represented 
by two representable numbers which are supposed to 
mean  an upper and a lower bound for  the value of the 
variable we are interested in.

Once one has bounds for the values of 
a variable, one can operate on these bounds 
in such a way that one always keeps obtaining  bounds. 
The only subtlety is that when adding upper bounds, one 
has to round up, adding lower bounds, one has to round down,
etc.  This can be done by reprogramming pieces of the 
arithmetic, or, in systems that conform to the IEEE standard
by setting appropriately the control word.
This quickly leads to an arithmetic among intervals that 
can produce bounds of arithmetic expressions given bounds 
on the variables.


One can pass from bounds on arithmetic expressions 
to bounds on sets in functional spaces. For example, 
one can specify a set in 
function space.  For example, if 
we specify a set of analytic functions by 
\begin{equation} \label{representablesets}
U_{v_1, \ldots, v_n; \ep}  = \{ f(z) \quad |\quad f(z) =
\sum_{i = 0}^N f_i z^i + f_e(z) , f_i \in v_i, || f_e||_1\le \ep \}
\end{equation}
where $v_i$ are intervals (i.e. pairs of representable 
numbers) and $\ep$ is a representable number.  
(There are, of course, many variants. One can 
for example, take into account that some errors 
are high order, use other norm for the error 
or even several norms at the same time.)

It is reasonably easy to imagine how can one 
define operations on sets of the type  in \eqref{representablesets}
such that the numerical operations bound the real operations on 
sets.  With a bit more of imagination, one can do compositions, 
integrals, and other operations. In particular, one can implement
the operations involved in the evaluation of the terms 
in \eqref{linstedtlagrange2}.

If starting with the numerically produced non-rigorous guess
one can use the rigorous interval arithmetic to verify 
the hypothesis of Theorem \ref{KAMlagrangian} -- or some other 
theorem enjoying a similar structure -- then, one can guarantee 
that there is a true solution near the computed one.
This strategy has been implemented in \cite{Rana87}, \cite{LlaveR90}. 
Similar ideas have been implemented in 
\cite{CellettiC95}. 

Indeed, by now, starting with the inspiring proof of 
\cite{Lanford82}
(it relied on the usual contraction mapping 
theorem rather than in the  hard implicit function theorems)
there has been a number of 
significant theorems proved with similar techniques. 
A survey of these  developments is \cite{KochSW96}.

One of the main difficulties of the method is that it requires 
to spend a great deal of time in
coding carefully the problems. One 
can hope that some of the tasks could be automated
but there are difficulties. Even if automatic 
translation of arithmetic expressions produces a valid answer, 
arithmetic expressions that 
are equivalent under the ordinary rules of arithmetic
are not equivalent under interval arithmetic. 
For example in intervals 
\begin{equation} \label{subdistributive}
(a + b) \times c  \subset  a \times c + b \times c
\end{equation}
and the inclusion can be strict. 
A classic problem in interval arithmetic is to 
find fast  algorithms  to compute accurately  the image of 
the unit disk under a polynomial.

\begin{exercise} 
Give a proof of  \eqref{subdistributive} and find 
examples when it is strict. 
\end{exercise}

I personally think that  computer assisted proofs 
and is a very interesting area in which 
it is possible to find a meaningful collaboration between 
Mathematicians (proving theorems of the right kind), 
Computer Scientists (developing good software tools that relieve
the tedium of programming the variants required) and 
applied scientists that have challenging real life problems.


\section{Acknowledgements}
The work of the author was supported in part 
by NSF grants.  I received substantial  assistance in the 
preparation of these notes from A. Haro, N. Petrov, J. Vano,
Parts of  this 
work are based on unpublished joint work  
with other people that we intend to publish in fuller 
versions.
Comments from H. Eliasson, T. Gramchev,
and many other participants in the SRI and by A. Jorba, M. Sevryuk, R.
Perez-Marco
removed many 
mistakes and typos. Needless to say, they 
are not to be blamed for those that escaped 
their eye or for missing those that were introduced 
in ulterior revisions which they did not see.
I also want 
to express my appreciation of the great 
amount of work put by 
the AMS staff, specially W. Drady 
and by  A. Katok, Y. Pesin and, specially H. 
Weiss to organize this Summer  Institute.
It was a privilege and a humbling experience  
to witness their dedication.
The enthusiasm of the participants in the 
SRA was contagious.


%\bibliographystyle{alpha}
%\bibliography{llave99}
\begin{thebibliography}{GJdlLV00}

\bibitem[AA68]{ArnoldA68}
V.~I. Arnol'd and A.~Avez.
\newblock {\em Ergodic Problems of Classical Mechanics}.
\newblock W. A. Benjamin, New York-Amsterdam, 1968.

\bibitem[Ada75]{Adams75}
R.~A. Adams.
\newblock {\em Sobolev Spaces}.
\newblock Academic Press, New York-London, 1975.

\bibitem[AF88]{albaneseFS88b}
C.~Albanese and J.~Fr{\"o}hlich.
\newblock Periodic solutions of some infinite-dimensional {H}amiltonian systems
  associated with nonlinear partial differential equations. {I}.
\newblock {\em Comm. Math. Phys.}, 116(3):475--502, 1988.

\bibitem[AF91]{AlbaneseF91}
Claudio Albanese and J{\"u}rg Fr{\"o}hlich.
\newblock Perturbation theory for periodic orbits in a class of
  infinite-dimensional {H}amiltonian systems.
\newblock {\em Comm. Math. Phys.}, 138(1):193--205, 1991.

\bibitem[AFS88]{AlbaneseFS88a}
Claudio Albanese, J{\"u}rg Fr{\"o}hlich, and Thomas Spencer.
\newblock Periodic solutions of some infinite-dimensional {H}amiltonian systems
  associated with nonlinear partial difference equations. {I}{I}.
\newblock {\em Comm. Math. Phys.}, 119(4):677--699, 1988.

\bibitem[AG91]{AlinhacG91}
Serge Alinhac and Patrick G{\'e}rard.
\newblock {\em Op\'erateurs pseudo-diff\'erentiels et th\'eor\`eme de
  {N}ash-{M}oser}.
\newblock InterEditions, Paris, 1991.

\bibitem[AKN93]{ArnoldKN93}
V.~I. Arnol'd, V.~V. Kozlov, and A.~I. Neishtadt.
\newblock Mathematical aspects of classical and celestial mechanics.
\newblock In {\em Dynamical Systems, III}, pages vii--xiv, 1--291. Springer,
  Berlin, 1993.

\bibitem[Alb93]{Albanese93}
Claudio Albanese.
\newblock K{A}{M} theory in momentum space and quasiperiodic {S}chr\"odinger
  operators.
\newblock {\em Ann. Inst. H. Poincar\'e Anal. Non Lin\'eaire}, 10(1):1--97,
  1993.

\bibitem[AM78]{AbrahamMarsden}
R.~Abraham and J.~E. Marsden.
\newblock {\em Foundations of Mechanics}.
\newblock Benjamin/Cummings, Reading, Mass., 1978.

\bibitem[Arn61]{Arnold61}
V.~I. Arnol'd.
\newblock Small denominators. {I}. {M}apping the circle onto itself.
\newblock {\em Izv. Akad. Nauk SSSR Ser. Mat.}, 25:21--86, 1961.
\newblock English translation: {\em Amer. Math. Sos. Transl. (2)},
  46:\-213--284, 1965.

\bibitem[{A}rn63a]{Arnold63a}
V.~I. {A}rnol'd.
\newblock Proof of a theorem of {A}. {N}. {K}olmogorov on the invariance of
  quasi-periodic motions under small perturbations.
\newblock {\em Russian Math. Surveys}, 18(5):9--36, 1963.

\bibitem[{A}rn63b]{Arnold63b}
V.~I. {A}rnol'd.
\newblock Small denominators and problems of stability of motion in classical
  and celestial mechanics.
\newblock {\em Russian Math. Surveys}, 18(6):85--191, 1963.

\bibitem[Arn88]{Arnold88}
V.~I. Arnol'd.
\newblock {\em Geometrical Methods in the Theory of Ordinary Differential
  Equations}.
\newblock Springer-Verlag, New York, second edition, 1988.

\bibitem[Arn89]{Arnold-MathMethods}
V.~I. Arnol'd.
\newblock {\em Mathematical Methods of Classical Mechanics}.
\newblock Springer-Verlag, New York, second edition, 1989.

\bibitem[AS86]{ArnoldS86}
V.I. Arnol'd and M.~Sevryuk.
\newblock Oscillations and bifurcations in reversible systems.
\newblock In R.~Z. Sagdeeev, editor, {\em Nonlinear Phenomena in Plasma Physics
  and Hydrodynamics}, pages 31--64. Mir, Moscow, 1986.

\bibitem[Aub83]{AubryD83}
S.~Aubry.
\newblock The twist map, the extended {F}renkel-{K}ontorova model and the
  devil's staircase.
\newblock {\em Phys. D}, 7(3):240--258, 1983.

\bibitem[Bam99a]{Bambusi99b}
D.~Bambusi.
\newblock Nekhoroshev theorem for small amplitude solutions in nonlinear
  {S}chr\"odinger equations.
\newblock {\em Math. Z.}, 230(2):345--387, 1999.

\bibitem[Bam99b]{Bambusi99a}
D.~Bambusi.
\newblock On long time stability in {H}amiltonian perturbations of non-resonant
  linear {P}{D}{E}s.
\newblock {\em Nonlinearity}, 12(4):823--850, 1999.

\bibitem[Ban89]{Bangert89}
V.~Bangert.
\newblock On minimal laminations of the torus.
\newblock {\em Ann. Inst. H. Poincar\'e Anal. Non Lin\'eaire}, 6(2):95--138,
  1989.

\bibitem[Bar70]{Barrar70}
R.~B. Barrar.
\newblock Convergence of the von {Z}eipel procedure.
\newblock {\em Celestial Mech.}, 2(4):494--504, 1970.

\bibitem[BCCF92]{BerrettiCCF92}
A.~Berretti, A.~Celletti, L.~Chierchia, and C.~Falcolini.
\newblock Natural boundaries for area-preserving twist maps.
\newblock {\em J. Statist. Phys.}, 66(5-6):1613--1630, 1992.

\bibitem[BCP98]{BonettoCP98}
F.~Bonetto, E.~G.~D. Cohen, and C.~Pugh.
\newblock On the validity of the conjugate pairing rule for {L}yapunov
  exponents.
\newblock {\em J. Statist. Phys.}, 92(3-4):587--627, 1998.

\bibitem[BdlLW96]{BanyagaLW96}
A.~Banyaga, R.~de~la Llave, and C.~E. Wayne.
\newblock Cohomology equations near hyperbolic points and geometric versions of
  {S}ternberg linearization theorem.
\newblock {\em J. Geom. Anal.}, 6(4):613--649 (1997), 1996.

\bibitem[BG99]{BerrettiG98}
A.~Berretti and G.~Gentile.
\newblock Scaling properties for the radius of convergence of {L}indstedt
  series: Generalized standard maps.
\newblock {\em {\tt http://www.ma.\-utexas.edu/mp\_arc }}, 99--377, 1999.

\bibitem[BGG85a]{BenettinGG85b}
G.~Benettin, L.~Galgani, and A.~Giorgilli.
\newblock Classical perturbation theory for systems of weakly coupled rotators.
\newblock {\em Nuovo Cimento B (11)}, 89(2):89--102, 1985.

\bibitem[BGG85b]{BenettinGG85a}
G.~Benettin, L.~Galgani, and A.~Giorgilli.
\newblock Numerical investigations on a chain of weakly coupled rotators in the
  light of classical perturbation theory.
\newblock {\em Nuovo Cimento B (11)}, 89(2):103--119, 1985.

\bibitem[BGG85c]{BenettinGG85}
G.~Benettin, L.~Galgani, and A.~Giorgilli.
\newblock A proof of {N}ekhoroshev's theorem for the stability times in nearly
  integrable {H}amiltonian systems.
\newblock {\em Celestial Mech.}, 37(1):1--25, 1985.

\bibitem[BGGS84]{BenettinGGS84}
G.~Benettin, L.~Galgani, A.~Giorgili, and J.-M. Strelcyn.
\newblock A proof of {K}olmogorov's theorem on invariant tori using canonical
  transformations defined by the {L}ie method.
\newblock {\em Nuovo Cimento B (11)}, 79(2):201--223, 1984.

\bibitem[BGK99]{BricmontGK99}
J.~Bricmont, K.~Gaw{\c{e}}dzki, and A.~Kupiainen.
\newblock K{A}{M} theorem and quantum field theory.
\newblock {\em Comm. Math. Phys.}, 201(3):699--727, 1999.

\bibitem[BH91]{BroerH91}
H.~W. Broer and G.~B. Huitema.
\newblock A proof of the isoenergetic {K}{A}{M}-theorem from the ``ordinary''
  one.
\newblock {\em J. Differential Equations}, 90(1):52--60, 1991.

\bibitem[BHS96a]{BroerHS96b}
H.~W. Broer, G.~B. Huitema, and M.~B. Sevryuk.
\newblock Families of quasi-periodic motions in dynamical systems depending on
  parameters.
\newblock In {\em Nonlinear dynamical systems and chaos (Groningen, 1995)},
  pages 171--211. Birkh\"auser, Basel, 1996.

\bibitem[BHS96b]{BroerHS96}
H.~W. Broer, G.~B. Huitema, and M.~B. Sevryuk.
\newblock {\em Quasi-Periodic Motions in Families of Dynamical Systems. {\rm
  Order Amidst Chaos}}.
\newblock Springer-Verlag, Berlin, 1996.

\bibitem[Bib79]{Bibikov79}
Yu.~N. Bibikov.
\newblock {\em Local Theory of Nonlinear Analytic Ordinary Differential
  Equations}.
\newblock Springer-Verlag, Berlin, 1979.

\bibitem[Bla84]{Blanchard84}
P.~Blanchard.
\newblock Complex analytic dynamics on the {R}iemann sphere.
\newblock {\em Bull. Amer. Math. Soc. (N.S.)}, 11(1):85--141, 1984.

\bibitem[BM61]{BogoliubovMitropolsky}
N.~N. Bogoliubov and Y.~A. Mitropolsky.
\newblock {\em Asymptotic Methods in the Theory of Non-Linear Oscillations}.
\newblock Hindustan Publ. Corp., Delhi, Gordon and Breach, New York, 1961.

\bibitem[BM95]{BerrettiM95}
A.~Berretti and S.~Marmi.
\newblock Scaling, perturbative renormalization and analyticity for the
  standard map and some generalizations.
\newblock {\em Chaos Solitons Fractals}, 5(2):257--269, 1995.

\bibitem[BN98]{BambusiN98}
D.~Bambusi and N.~N. Nekhoroshev.
\newblock A property of exponential stability in nonlinear wave equations near
  the fundamental linear mode.
\newblock {\em Phys. D}, 122(1-4):73--104, 1998.

\bibitem[Bos86]{Bost86}
J.-B. Bost.
\newblock Tores invariants des syst\`emes dynamiques hamiltoniens (d'apr\`es
  {K}olmogorov, {A}rnold, {M}oser, {R}\"ussmann, {Z}ehnder, {H}erman,
  {P}\"oschel,$\,\ldots$).
\newblock {\em Ast\'erisque}, No. 133--134:113--157, 1986.
\newblock Seminar Bourbaki, Vol.\ 1984/85.

\bibitem[Bou97]{Bourgain97}
J.~Bourgain.
\newblock On {M}elnikov's persistency problem.
\newblock {\em Math. Res. Lett.}, 4(4):445--458, 1997.

\bibitem[Bou99a]{Bourgain95}
J.~Bourgain.
\newblock Nonlinear {S}chr\"odinger equations.
\newblock In {\em Hyperbolic Equations and Frequency Interactions (Park City,
  UT, 1995)}, pages 3--157. Amer. Math. Soc., Providence, RI, 1999.

\bibitem[Bou99b]{Bourgain00}
Jean Bourgain.
\newblock Periodic solutions of nonlinear wave equations.
\newblock In {\em Harmonic analysis and partial differential equations
  (Chicago, IL, 1996)}, pages 69--97. Univ. Chicago Press, Chicago, IL, 1999.

\bibitem[Brj71]{Brjuno71}
A.~D. Brjuno.
\newblock Analytic form of differential equations. {I}.
\newblock {\em Trudy Moskov. Mat. Ob\v s\v c.}, 25:119--262, 1971.
\newblock English translation: {\em Trans. Moscow Math. Soc.},
  25(1971):\-131--288, 1973.

\bibitem[Brj72]{Brjuno72}
A.~D. Brjuno.
\newblock Analytic form of differential equations. {I}{I}.
\newblock {\em Trudy Moskov. Mat. Ob\v s\v c.}, 26:199--239, 1972.
\newblock English translation: {\em Trans. Moscow Math. Soc.},
  26(1972):\-199--239, 1974.

\bibitem[Bru89]{Brjuno89}
A.~D. Bruno.
\newblock {\em Local Methods in Nonlinear Differential Equations}.
\newblock Springer-Verlag, Berlin, 1989.

\bibitem[BW65]{BornWolf65}
M.~Born and E.~Wolf.
\newblock {\em Principles of Optics: {E}lectromagnetic Theory of Propagation,
  Interference and Diffraction of Light}.
\newblock Pergamon Press, Oxford, revised edition, 1965.

\bibitem[BZ82]{BraessZ82}
D.~Braess and E.~Zehnder.
\newblock On the numerical treatment of a small divisor problem.
\newblock {\em Numer. Math.}, 39(2):269--292, 1982.

\bibitem[Cal70]{Calabi}
E.~Calabi.
\newblock On the group of automorphisms of a symplectic manifold.
\newblock In {\em Problems in {A}nalysis (Lectures at the Symposium in {H}onor
  of {S}alomon {B}ochner, Princeton Univ., Princeton, N.J., 1969)}, pages
  1--26. Princeton Univ. Press, Princeton, N.J., 1970.

\bibitem[Car81]{Cary81}
John~R. Cary.
\newblock Lie transform perturbation theory for {H}amiltonian systems.
\newblock {\em Phys. Rep.}, 79(2):129--159, 1981.

\bibitem[CC95]{CellettiC95}
A.~Celletti and L.~Chierchia.
\newblock A constructive theory of {L}agrangian tori and computer-assisted
  applications.
\newblock In {\em Dynamics Reported}, pages 60--129. Springer, Berlin, 1995.

\bibitem[CCSPC97]{CassettiCPC97}
L.~Casetti, M.~Cerruti-Sola, M.~Pettini, and E.~G.~D. Cohen.
\newblock The {F}ermi-{P}asta-{U}lam problem revisited: stochasticity
  thresholds in nonlinear {H}amiltonian systems.
\newblock {\em Phys. Rev. E (3)}, 55(6, part A):6566--6574, 1997.

\bibitem[CEL84]{CrandallEL84}
M.~G. Crandall, L.~C. Evans, and P.-L. Lions.
\newblock Some properties of viscosity solutions of {H}amilton-- {J}acobi
  equations.
\newblock {\em Trans. Amer. Math. Soc.}, 282(2):487--502, 1984.

\bibitem[CF94]{ChierchiaF94}
L.~Chierchia and C.~Falcolini.
\newblock A direct proof of a theorem by {K}olmogorov in {H}amiltonian systems.
\newblock {\em Ann. Scuola Norm. Sup. Pisa Cl. Sci. (4)}, 21(4):541--593, 1994.

\bibitem[CF96]{ChierchiaF96}
L.~Chierchia and C.~Falcolini.
\newblock A note on quasi-periodic solutions of some elliptic systems.
\newblock {\em Z. Angew. Math. Phys.}, 47(2):210--220, 1996.

\bibitem[CG82]{ChierchiaG82}
L.~Chierchia and G.~Gallavotti.
\newblock Smooth prime integrals for quasi-integrable {H}amiltonian systems.
\newblock {\em Nuovo Cimento B (11)}, 67(2):277--295, 1982.

\bibitem[CL83]{CrandallL83}
M.~G. Crandall and P.-L. Lions.
\newblock Viscosity solutions of {H}amilton-{J}a\-co\-bi equations.
\newblock {\em Trans. Amer. Math. Soc.}, 277(1):1--42, 1983.

\bibitem[Cre28]{Cremer28}
H.~Cremer.
\newblock Zum zentrumproblem.
\newblock {\em Math. Ann.}, 98:151--163, 1928.

\bibitem[Cre38]{Cremer38}
H.~Cremer.
\newblock {\"U}ber die h{\"a}ufigkeit der nichtzentren.
\newblock {\em Math. Ann.}, 115:573--580, 1938.

\bibitem[CS91]{ChulaevskyS91}
V.~A. Chulaevsky and Ya.~G. Sina{\u\i}.
\newblock The exponential localization and structure of the spectrum for $1${D}
  quasi-periodic discrete {S}chr\"odinger operators.
\newblock {\em Rev. Math. Phys.}, 3(3):241--284, 1991.

\bibitem[CS94]{ChengS94}
Chong~Qing Cheng and Yi~Sui Sun.
\newblock Existence of {K}{A}{M} tori in degenerate {H}amiltonian systems.
\newblock {\em J. Differential Equations}, 114(1):288--335, 1994.

\bibitem[CS90]{ChengS89}
Chong~Qing Cheng and Yi~Sui Sun.
\newblock Existence of invariant tori in three-dimensional measure-preserving
  mappings.
\newblock {\em Celestial Mech. Dynam. Astronom.}, 47(3):275--292, 1989/90.

\bibitem[CW93]{CraigW93}
W.~Craig and C.~E. Wayne.
\newblock Newton's method and periodic solutions of nonlinear wave equations.
\newblock {\em Comm. Pure Appl. Math.}, 46(11):1409--1498, 1993.

\bibitem[CW94]{CraigW94}
W.~Craig and C.~E. Wayne.
\newblock Periodic solutions of nonlinear {S}chr\"odinger equations and the
  {N}ash-{M}oser method.
\newblock In {\em {H}amiltonian Mechanics (Toru\'n, 1993)}, pages 103--122.
  Plenum, New York, 1994.

\bibitem[Dav94]{Davie94}
A.~M. Davie.
\newblock The critical function for the semistandard map.
\newblock {\em Nonlinearity}, 7(1):219--229, 1994.

\bibitem[DdlL90]{DelshamsL90}
A.~Delshams and R.~de~la Llave.
\newblock Existence of quasi-periodic orbits and absence of transport for
  volume preserving transformations and flows.
\newblock {\em Preprint}, 1990.

\bibitem[DeL97]{DeLatte97}
D.~DeLatte.
\newblock Diophantine conditions for the linearization of commuting holomorphic
  functions.
\newblock {\em Discrete Contin. Dynam. Systems}, 3(3):317--332, 1997.

\bibitem[Dep70]{Deprit69}
A.~Deprit.
\newblock Canonical transformations depending on a small parameter.
\newblock {\em Celestial Mech.}, 1:12--30, 1969/1970.

\bibitem[DF76]{DragtFinn}
A.~J. Dragt and J.~M. Finn.
\newblock Lie series and invariant functions for analytic symplectic maps.
\newblock {\em J. Mathematical Phys.}, 17(12):2215--2127, 1976.

\bibitem[DG96]{DelshamsG96}
A.~Delshams and P.~Guti{\'e}rrez.
\newblock Effective stability and {K}{A}{M} theory.
\newblock {\em J. Differential Equations}, 128(2):415--490, 1996.

\bibitem[DG00]{DelatteG00}
D.~Delatte and T.~Gramchev.
\newblock Normal forms of maps with linear parts having jordan blocks.
\newblock {\em Preprint}, 2000.

\bibitem[dlL83]{Llave83}
R.~de~la Llave.
\newblock A simple proof of a particular case of {C}. {S}iegel's center
  theorem.
\newblock {\em J. Math. Phys.}, 24(8):2118--2121, 1983.

\bibitem[dlL92]{Llave92}
R.~de~la Llave.
\newblock A renormalization group explanation of numerical observations of
  analyticity domains.
\newblock {\em J. Statist. Phys.}, 66(5-6):1631--1634, 1992.

\bibitem[dlL93]{Llave93}
R.~de~la Llave.
\newblock Introduction to {K}.{A}.{M}.\ theory.
\newblock In {\em Computational Physics (Almu\~{n}\'{e}car, 1992)}, pages
  73--105. World Sci. Publishing, River Edge, NJ, 1993.

\bibitem[dlLMM86]{LlaveMM86}
R.~de~la Llave, J.~M. Marco, and R.~Moriy{\'o}n.
\newblock Canonical perturbation theory of {A}nosov systems and regularity
  results for the {L}iv\v sic cohomology equation.
\newblock {\em Ann. of Math. (2)}, 123(3):537--611, 1986.

\bibitem[dlLO99]{LlaveO99}
R.~de~la Llave and R.~Obaya.
\newblock Regularity of the composition operator in spaces of {H}\"older
  functions.
\newblock {\em Discrete Contin. Dynam. Systems}, 5(1):157--184, 1999.

\bibitem[dlLO00]{LlaveO00}
R.~de~la Llave and R.~Obaya.
\newblock Decomposition theorems for groups of diffeomorphisms in the sphere.
\newblock {\em Trans. Amer. Math. Soc.}, 352(3):1005--1020, 2000.

\bibitem[dlLR91]{LlaveR90}
R.~de~la Llave and D.~Rana.
\newblock Accurate strategies for {K}.{A}.{M}.\ bounds and their
  implementation.
\newblock In {\em Computer Aided Proofs in Analysis (Cincinnati, OH, 1989)},
  pages 127--146. Springer, New York, 1991.

\bibitem[dlLV00]{LlaveV00}
R.~de~la Llave and J.~Vano.
\newblock A {W}hitney-{Z}ehnder implicit function theorem.
\newblock {\em Manuscript}, 2000.

\bibitem[Dou82a]{Douady82a}
R.~Douady.
\newblock Applications du th\'eor\`eme des tores invariants.
\newblock {\em Univ. Paris VII, These 3 cycle}, 1982.

\bibitem[Dou82b]{Douady82}
R.~Douady.
\newblock Une d\'emonstration directe de l'\'equivalence des th\'eor\`emes de
  tores invariants pour diff\'eomorphismes et champs de vecteurs.
\newblock {\em C. R. Acad. Sci. Paris S\'er. I Math.}, 295(2):201--204, 1982.

\bibitem[Dou88]{Douady88}
R.~Douady.
\newblock Regular dependence of invariant curves and {A}ubry-{M}ather sets of
  twist maps of an annulus.
\newblock {\em Ergodic Theory Dynamical Systems}, 8(4):555--584, 1988.

\bibitem[DS75]{DinaburgS75}
E.~I. Dinaburg and Ja.~G. Sinai.
\newblock The one-dimensional {S}chr\"odinger equation with quasiperiodic
  potential.
\newblock {\em Funkcional. Anal. i Prilo\v zen.}, 9(4):8--21, 1975.
\newblock English translation: {\em Functional Anal. Appl.},
  9(1975):\-279--289, 1976.

\bibitem[ED81]{EscandeD81}
D.~F. Escande and F.~Doveil.
\newblock Renormalization method for computing the threshold of the large-scale
  stochastic instability in two degrees of freedom {H}amiltonian systems.
\newblock {\em J. Statist. Phys.}, 26(2):257--284, 1981.

\bibitem[Eli]{Eliasson98}
L.~H. Eliasson.
\newblock Reducibility and point spectrum for linear quasi-periodic
  skew-products.
\newblock In {\em Proceedings of the International Congress of Mathematicians,
  Vol. II (Berlin, 1998)}, pages 779--787 (electronic).

\bibitem[Eli88]{Eliasson88a}
L.~H. Eliasson.
\newblock Perturbations of stable invariant tori for {H}amiltonian systems.
\newblock {\em Ann. Scuola Norm. Sup. Pisa Cl. Sci. (4)}, 15(1):115--147
  (1989), 1988.

\bibitem[Eli89]{Eliasson88}
L.~H. Eliasson.
\newblock {H}amiltonian systems with linear normal form near an invariant
  torus.
\newblock In {\em Nonlinear Dynamics (Bologna, 1988)}, pages 11--29. World Sci.
  Publishing, Teaneck, NJ, 1989.

\bibitem[Eli96]{Eliasson96}
L.~H. Eliasson.
\newblock Absolutely convergent series expansions for quasi periodic motions.
\newblock {\em Math. Phys. Electron. J.}, 2:Paper 4, 33 pp.\ (electronic),
  1996.

\bibitem[Eli97]{Eliasson97}
L.~H. Eliasson.
\newblock Discrete one-dimensional quasi-periodic {S}chr\"odinger operators
  with pure point spectrum.
\newblock {\em Acta Math.}, 179(2):153--196, 1997.

\bibitem[Fat97a]{Fathi97b}
A.~Fathi.
\newblock Solutions {K}{A}{M} faibles conjugu\'ees et barri\`eres de {P}eierls.
\newblock {\em C. R. Acad. Sci. Paris S\'er. I Math.}, 325(6):649--652, 1997.

\bibitem[Fat97b]{Fathi97a}
A.~Fathi.
\newblock Th\'eor\`eme {K}{A}{M} faible et th\'eorie de {M}ather sur les
  syst\`emes lagrangiens.
\newblock {\em C. R. Acad. Sci. Paris S\'er. I Math.}, 324(9):1043--1046, 1997.

\bibitem[FdlL92]{FalcoliniL92b}
C.~Falcolini and R.~de~la Llave.
\newblock Numerical calculation of domains of analyticity for perturbation
  theories in the presence of small divisors.
\newblock {\em J. Statist. Phys.}, 67(3-4):645--666, 1992.

\bibitem[Fed69]{Federer69}
H.~Federer.
\newblock {\em Geometric Measure Theory}.
\newblock Springer-Verlag New York Inc., New York, 1969.

\bibitem[FGB98]{FassoGB98a}
F.~Fass{\`o}, M.~Guzzo, and G.~Benettin.
\newblock Nekhoroshev-stability of elliptic equilibria of hamiltonian systems.
\newblock {\em Comm. Math. Phys.}, 197(2):347--360, 1998.

\bibitem[FS83]{FrolichS83}
J.~Fr{\"o}hlich and T.~Spencer.
\newblock Absence of diffusion in the {A}nderson tight binding model for large
  disorder or low energy.
\newblock {\em Comm. Math. Phys.}, 88(2):151--184, 1983.

\bibitem[FS84]{FrolichS84}
J.~Fr{\"o}hlich and T.~Spencer.
\newblock A rigorous approach to {A}nderson localization.
\newblock {\em Phys. Rep.}, 103(1-4):9--25, 1984.

\bibitem[FSW86]{FrolichSW86}
J.~Fr{\"o}hlich, T.~Spencer, and C.~E. Wayne.
\newblock Localization in disordered, nonlinear dynamical systems.
\newblock {\em J. Statist. Phys.}, 42(3-4):247--274, 1986.

\bibitem[FSW90]{FrolichSW90}
J.~Fr{\"o}hlich, T.~Spencer, and P.~Wittwer.
\newblock Localization for a class of one-dimensional quasi-periodic
  {S}chr\"odinger operators.
\newblock {\em Comm. Math. Phys.}, 132(1):5--25, 1990.

\bibitem[FY98]{FuzhongY98}
Cong Fuzhong and Li~Yong.
\newblock Existence of higher-dimensional invariant tori for {H}amiltonian
  systems.
\newblock {\em J. Math. Anal. Appl.}, 222(1):255--267, 1998.

\bibitem[Gal83a]{Gallavotti1}
G.~Gallavotti.
\newblock Perturbation theory for classical {H}amiltonian systems.
\newblock In {\em Scaling and Self-Similarity in Physics (Bures-sur-Yvette,
  1981/1982)}, pages 359--426. Birkh\"auser, Boston, Mass., 1983.

\bibitem[Gal83b]{Gallavotti83}
G.~Gallavotti.
\newblock Perturbation theory for classical {H}amiltonian systems.
\newblock In {\em Scaling and Self-Similarity in Physics (Bures-sur-Yvette,
  1981/1982)}, pages 359--426. Birkh\"auser Boston, Boston, Mass., 1983.

\bibitem[Gal86]{Gallavotti2}
G.~Gallavotti.
\newblock Quasi-integrable mechanical systems.
\newblock In {\em Ph\'enom\`enes critiques, syst\`emes al\'eatoires, th\'eories
  de jauge, Part I, II (Les Houches, 1984)}, pages 539--624. North-Holland,
  Amsterdam, 1986.

\bibitem[Gal94a]{Gallavotti94}
G.~Gallavotti.
\newblock Twistless {K}{A}{M} tori.
\newblock {\em Comm. Math. Phys.}, 164(1):145--156, 1994.

\bibitem[Gal94b]{Gallavotti94b}
G.~Gallavotti.
\newblock Twistless {K}{A}{M} tori, quasi flat homoclinic intersections, and
  other cancellations in the perturbation series of certain completely
  integrable {H}amiltonian systems. {A} review.
\newblock {\em Rev. Math. Phys.}, 6(3):343--411, 1994.

\bibitem[GFB98]{GuzzoFB98b}
M.~Guzzo, F.~Fass{\`o}, and G.~Benettin.
\newblock On the stability of elliptic equilibria.
\newblock {\em Math. Phys. Electron. J.}, 4:Paper 1, 16 pp.\ (electronic),
  1998.

\bibitem[GG95]{GallavottiG95}
G.~Gallavotti and G.~Gentile.
\newblock Majorant series convergence for twistless {K}{A}{M} tori.
\newblock {\em Ergodic Theory Dynam. Systems}, 15(5):857--869, 1995.

\bibitem[GJdlLV00]{JorbaLV00}
A.~Gonzalez, A.~Jorba, R.~de~la Llave, and J.~Villanueva.
\newblock {K}{A}{M} theory for non action-angle {H}amiltonian systems.
\newblock {\em Manuscript}, 2000.

\bibitem[Gol80]{Goldstein}
H.~Goldstein.
\newblock {\em Classical Mechanics}.
\newblock Addison-Wesley, Reading, Mass., second edition, 1980.

\bibitem[GP74]{GuilleminP74}
Victor Guillemin and Alan Pollack.
\newblock {\em Differential topology}.
\newblock Prentice-Hall Inc., Englewood Cliffs, N.J., 1974.

\bibitem[GP81]{GreeneP81}
J.~M. Greene and I.~C. Percival.
\newblock {H}amiltonian maps in the complex plane.
\newblock {\em Phys. D}, 3(3):530--548, 1981.

\bibitem[Gre79]{Greene79}
J.~M. Greene.
\newblock A method for determining a stochastic transition.
\newblock {\em Jour. Math. Phys.}, 20:1183--1201, 1979.

\bibitem[Hal75]{Hald75}
O.~H. Hald.
\newblock On a {N}ewton-{M}oser type method.
\newblock {\em Numer. Math.}, 23:411--426, 1975.

\bibitem[Ham82]{Hamilton82}
R.~S. Hamilton.
\newblock The inverse function theorem of {N}ash and {M}oser.
\newblock {\em Bull. Amer. Math. Soc. (N.S.)}, 7(1):65--222, 1982.

\bibitem[Har99]{Haro00}
A.~Haro.
\newblock Interpolation of an exact symplectomorphism by a {H}amiltonian flow.
\newblock {\em {\tt http://www.ma.\-utexas.edu/mp\_arc }}, 99--100, 1999.

\bibitem[Hay90]{Haydn90}
N.~T.~A. Haydn.
\newblock On invariant curves under renormalisation.
\newblock {\em Nonlinearity}, 3(3):887--912, 1990.

\bibitem[HdlL00]{HaroL00}
A.~Haro and R.~de~la Llave.
\newblock New mechanisms for lack of equipartion of energy.
\newblock {\em Phys. Rev. Lett.}, 89(7):1859--1862, 2000.

\bibitem[H{\'e}n83]{Henon83}
Michel H{\'e}non.
\newblock Numerical exploration of {H}amiltonian systems.
\newblock In {\em Chaotic Behavior of Deterministic Systems (Les Houches,
  1981)}, pages 53--170. North-Holland, Amsterdam, 1983.

\bibitem[Her79]{Herman79}
M.-R. Herman.
\newblock Sur la conjugaison diff\'erentiable des diff\'eomorphismes du cercle
  \`a des rotations.
\newblock {\em Inst. Hautes \'Etudes Sci. Publ. Math.}, (49):5--233, 1979.

\bibitem[Her83a]{Herman83}
M.-R. Herman.
\newblock {\em Sur les courbes invariantes par les diff\'eomorphismes de
  l'anneau. {V}ol. 1}.
\newblock Soci\'et\'e Math\'ematique de France, Paris, 1983.

\bibitem[Her83b]{Herman83a}
M.-R. Herman.
\newblock Sur les diff\'eomorphismes du cercle de nombre de rotation de type
  constant.
\newblock In {\em Conference on Harmonic Analysis in Honor of Antoni Zygmund,
  Vol. I, II (Chicago, Ill., 1981)}, pages 708--725. Wadsworth, Belmont, CA,
  1983.

\bibitem[Her85]{Herman85}
M.-R. Herman.
\newblock Simple proofs of local conjugacy theorems for diffeomorphisms of the
  circle with almost every rotation number.
\newblock {\em Bol. Soc. Brasil. Mat.}, 16(1):45--83, 1985.

\bibitem[Her86]{Herman86}
M.-R. Herman.
\newblock Sur les courbes invariantes par les diff\'eomorphismes de l'anneau.
  {V}ol.\ 2.
\newblock {\em Ast\'erisque}, (144):248, 1986.
\newblock With a correction to: {\it On the curves invariant under
  diffeomorphisms of the annulus, Vol.\ 1} (French) [Ast\'erisque No. 103-104,
  Soc.\ Math.\ France, Paris, 1983; MR 85m:58062].

\bibitem[Her87]{Herman87}
M.-R. Herman.
\newblock Recent results and some open questions on {S}iegel's linearization
  theorem of germs of complex analytic diffeomorphisms of ${\mathbf{{c}}}\sp n$
  near a fixed point.
\newblock In {\em VIIIth International Congress on Mathematical Physics
  (Marseille, 1986)}, pages 138--184. World Sci. Publishing, Singapore, 1987.

\bibitem[Her91]{Herman91}
M.-R. Herman.
\newblock Exemples de flots hamiltoniens dont aucune perturbation en topologie
  ${C}\sp \infty$ n'a d'orbites p\'eriodiques sur un ouvert de surfaces
  d'\'energies.
\newblock {\em C. R. Acad. Sci. Paris S\'er. I Math.}, 312(13):989--994, 1991.

\bibitem[HM94]{HounieM94}
J.~Hounie and P.~Malagutti.
\newblock O teorema de {N}ash-{M}oser e aplicacoes.
\newblock {\em Coloquio Matematico Brasileiro}, 19, 1994.

\bibitem[H{\"o}r85]{Hormander85}
L.~H{\"o}rmander.
\newblock On the {N}ash--{M}oser implicit function theorem.
\newblock {\em Ann. Acad. Sci. Fenn. Ser. A I Math.}, 10:255--259, 1985.

\bibitem[H{\"o}r90]{Hormander90}
L.~H{\"o}rmander.
\newblock The {N}ash-{M}oser theorem and paradifferential operators.
\newblock In {\em Analysis, et cetera}, pages 429--449. Academic Press, Boston,
  MA, 1990.

\bibitem[IEE85]{IEEE85}
IEEE.
\newblock Ieee standard no.:754-1985 pdf, standard for binary floating-point
  arithmetic, 1985.

\bibitem[Ily79]{Ilyashenko79}
Ju.~S. Ilyashenko.
\newblock Divergence of series that reduce an analytic differential equation to
  linear normal form at a singular point.
\newblock {\em Funktsional. Anal. i Prilozhen.}, 13(3):87--88, 1979.

\bibitem[JdlLZ99]{JorbaLZ00}
{\`A}.~Jorba, R.~de~la Llave, and M.~Zou.
\newblock {L}indstedt series for lower-dimensional tori.
\newblock In {\em {H}amiltonian Systems with Three or More Degrees of Freedom
  (S'Agar\'o, 1995)}, pages 151--167. Kluwer Acad. Publ., Dordrecht, 1999.

\bibitem[Jor99]{Jorba99}
{\`A}.~Jorba.
\newblock A methodology for the numerical computation of normal forms, centre
  manifolds and first integrals of {H}amiltonian systems.
\newblock {\em Experiment. Math.}, 8(2):155--195, 1999.

\bibitem[JS92]{JorbaS92}
{\`A}.~Jorba and C.~Sim{\'o}.
\newblock On the reducibility of linear differential equations with
  quasiperiodic coefficients.
\newblock {\em J. Differential Equations}, 98(1):111--124, 1992.

\bibitem[Jun91]{Jungreis91}
I.~Jungreis.
\newblock A method for proving that monotone twist maps have no invariant
  circles.
\newblock {\em Ergodic Theory Dynamical Systems}, 11(1):79--84, 1991.

\bibitem[JV97a]{JorbaV97b}
{\`A}.~Jorba and J.~Villanueva.
\newblock On the normal behaviour of partially elliptic lower-dimensional tori
  of {H}amiltonian systems.
\newblock {\em Nonlinearity}, 10(4):783--822, 1997.

\bibitem[JV97b]{JorbaV97a}
{\`A}.~Jorba and J.~Villanueva.
\newblock On the persistence of lower-dimensional invariant tori under
  quasi-periodic perturbations.
\newblock {\em J. Nonlinear Sci.}, 7(5):427--473, 1997.

\bibitem[Kah96]{Kahan96}
W.~Kahan.
\newblock Lecture notes on the status of ieee standard for binary floating
  point arithmetic, 1996.
\newblock {\tt http://www.cs.berkeley.edu/\~wkahan/}.

\bibitem[Kat76]{Katznelson76}
Yitzhak Katznelson.
\newblock {\em An introduction to harmonic analysis}.
\newblock Dover Publications Inc., New York, corrected edition, 1976.

\bibitem[KM84]{KaucherM84}
E.~W. Kaucher and W.~L. Miranker.
\newblock {\em Self-validating Numerics for Function Space Problems}.
\newblock Academic Press Inc., Orlando, Fla., 1984.

\bibitem[Knu97]{Knuth97}
Donald~E. Knuth.
\newblock {\em The art of computer programming. {V}ol. 2: {S}eminumerical
  algorithms}.
\newblock Addison-Wesley Publishing Co., Reading, Mass.-London-Don Mills, Ont,
  third revised edition, 1997.

\bibitem[KO89a]{KatznelsonO89b}
Y.~Katznelson and D.~Ornstein.
\newblock The absolute continuity of the conjugation of certain diffeomorphisms
  of the circle.
\newblock {\em Ergodic Theory Dynamical Systems}, 9(4):681--690, 1989.

\bibitem[KO89b]{KatznelsonO89a}
Y.~Katznelson and D.~Ornstein.
\newblock The differentiability of the conjugation of certain diffeomorphisms
  of the circle.
\newblock {\em Ergodic Theory Dynamical Systems}, 9(4):643--680, 1989.

\bibitem[KO93]{KatznelsonO93}
Y.~Katznelson and D.~S. Ornstein.
\newblock A new method for twist theorems.
\newblock {\em J. Anal. Math.}, 60:157--208, 1993.

\bibitem[Koc99]{Koch}
H.~Koch.
\newblock A renormalization group for {H}amiltonians, with applications to
  {K}{A}{M} tori.
\newblock {\em Ergodic Theory Dynam. Systems}, 19:1--47, 1999.

\bibitem[Kol54]{Kolmogorov79}
A.~N. Kolmogorov.
\newblock On conservation of conditionally periodic motions for a small change
  in {H}amilton's function.
\newblock {\em Dokl. Akad. Nauk SSSR (N.S.)}, 98:527--530, 1954.
\newblock English translation in {\it Stochastic Behavior in Classical and
  Quantum Hamiltonian Systems (Volta Memorial Conf., Como, 1977)}, Lecture
  Notes in Phys., 93, pages 51--56. Springer, Berlin, 1979.

\bibitem[Kos91]{Kosygin91}
D.~V. Kosygin.
\newblock Multidimensional {K}{A}{M} theory from the renormalization group
  viewpoint.
\newblock In {\em Dynamical {S}ystems and {S}tatistical {M}echanics (Moscow,
  1991)}, pages 99--129. Amer. Math. Soc., Providence, RI, 1991.

\bibitem[Koz83]{Kozlov83}
S.~M. Kozlov.
\newblock Reducibility of quasiperiodic differential operators and averaging.
\newblock {\em Trudy Moskov. Mat. Obshch.}, 46:99--123, 1983.
\newblock English translation: {\em Trans. Moscow Math. Soc.}, Issue
  2:\-101--126, 1984.

\bibitem[KP94]{KuksinP94}
Sergei Kuksin and J{\"u}rgen P{\"o}schel.
\newblock On the inclusion of analytic symplectic maps in analytic
  {H}amiltonian flows and its applications.
\newblock In {\em Seminar on Dynamical Systems (St.\ Petersburg, 1991)}, pages
  96--116. Birkh\"auser, Basel, 1994.

\bibitem[Kra83]{Krantz83}
S.~G. Krantz.
\newblock Lipschitz spaces, smoothness of functions, and approximation theory.
\newblock {\em Exposition. Math.}, 1(3):193--260, 1983.

\bibitem[Kri99]{Krikorian99a}
Rapha{\"e}l Krikorian.
\newblock R\'eductibilit\'e des syst\`emes produits-crois\'es \`a valeurs dans
  des groupes compacts.
\newblock {\em Ast\'erisque}, (259):vi+216, 1999.

\bibitem[KS86]{KhaninS86}
K.~Khanin and Ya.~G. Sinai.
\newblock Renormalization group method and {K}olmogorov-{A}rnold-{M}oser
  theory.
\newblock In R.~Z. Sagdeev, editor, {\em Nonlinear Phenomena in Plasma Physics
  and Hydrodynamics}, pages 31--64. Mir, Moscow, 1986.

\bibitem[KS87]{SinaiK87}
K.~M. Khanin and Ya.~G. Sinai.
\newblock A new proof of {M}. {H}erman's theorem.
\newblock {\em Comm. Math. Phys.}, 112(1):89--101, 1987.

\bibitem[KSW96]{KochSW96}
H.~Koch, A.~Schenkel, and P.~Wittwer.
\newblock Computer-assisted proofs in analysis and programming in logic: a case
  study.
\newblock {\em SIAM Rev.}, 38(4):565--604, 1996.

\bibitem[Kuk93]{Kuksin93}
S.~B. Kuksin.
\newblock {\em Nearly Integrable Infinite-Dimensional {H}amiltonian Systems}.
\newblock Springer-Verlag, Berlin, 1993.

\bibitem[Lan82]{Lanford82}
O.~E. Lanford, III.
\newblock A computer-assisted proof of the {F}eigenbaum conjectures.
\newblock {\em Bull. Amer. Math. Soc. (N.S.)}, 6(3):427--434, 1982.

\bibitem[Lio82]{Lions82}
P.-L. Lions.
\newblock {\em Generalized Solutions of {H}amilton--{J}acobi Equations}.
\newblock Pitman (Advanced Publishing Program), Boston, Mass., 1982.

\bibitem[LL76]{Landau}
L.~D. Landau and E.~M. Lifshitz.
\newblock {\em Course of Theoretical Physics. {V}ol. 1. Mechanics}.
\newblock Pergamon Press, Oxford, third edition, 1976.

\bibitem[LN92]{LochakN92}
P.~Lochak and A.~I. Neishtadt.
\newblock Estimates of stability time for nearly integrable systems with a
  quasiconvex {H}amiltonian.
\newblock {\em Chaos}, 2(4):495--499, 1992.

\bibitem[Loc92]{Lochak92}
P.~Lochak.
\newblock Canonical perturbation theory: an approach based on joint
  approximations.
\newblock {\em Uspekhi Mat. Nauk}, 47(6(288)):59--140, 1992.
\newblock English translation: {\em Russian Math. Surveys}, 47(6):\-57--133,
  1992.

\bibitem[Mar00]{Marmi99}
Stefano Marmi.
\newblock {\em An introduction to small divisors problems}.
\newblock Istituti Editoriali e Poligrafici Internazionali Pisa-Roma, 2000.
\newblock Available from {\tt www.ma.utexas.edu/mp\_arc}.

\bibitem[Mat69]{Mather69}
J.~N. Mather.
\newblock Stability of ${C}\sp{\infty }$ mappings. {I}{I}. {I}nfinitesimal
  stability implies stability.
\newblock {\em Ann. of Math. (2)}, 89:254--291, 1969.

\bibitem[Mat88]{Mather88}
J.~N. Mather.
\newblock Destruction of invariant circles.
\newblock {\em Ergodic Theory Dynamical Systems}, 8(Charles Conley Memorial
  Issue):199--214, 1988.

\bibitem[McG90]{McGehee90}
R.~McGehee.
\newblock A note on the {M}oser-{H}ald variation of {N}ewton's method.
\newblock In {\em Analysis, et cetera}, pages 495--499. Academic Press, Boston,
  MA, 1990.

\bibitem[McK82]{McKay82}
R.~S. McKay.
\newblock {\em Renormalisation in Area Preserving Maps}.
\newblock PhD thesis, Princeton University, 1982.

\bibitem[Mei92]{Meiss92}
J.~D. Meiss.
\newblock Symplectic maps, variational principles, and transport.
\newblock {\em Rev. Modern Phys.}, 64(3):795--848, 1992.

\bibitem[Mey91]{Meyer91}
K.~R. Meyer.
\newblock Lie transform tutorial. {I}{I}.
\newblock In {\em Computer Aided Proofs in Analysis (Cincinnati, OH, 1989)},
  pages 190--210. Springer, New York, 1991.

\bibitem[MF94]{MatherF91}
J.~N. Mather and G.~Forni.
\newblock Action minimizing orbits in {H}amiltonian systems.
\newblock In {\em Transition to Chaos in Classical and Quantum Mechanics
  (Montecatini Terme, 1991)}, pages 92--186. Springer, Berlin, 1994.

\bibitem[MH92]{MeyerH92}
K.~R. Meyer and G.~R. Hall.
\newblock {\em Introduction to {H}amiltonian Dynamical Systems and the
  ${N}$-body Problem}.
\newblock Springer-Verlag, New York, 1992.

\bibitem[Mie91]{Mielke91}
A.~Mielke.
\newblock {\em {H}amiltonian and {L}agrangian Flows on Center Manifolds. {\rm
  With Applications to Elliptic Variational Problems}}.
\newblock Springer-Verlag, Berlin, 1991.

\bibitem[MM74]{MarkusM74}
L.~Markus and K.~R. Meyer.
\newblock {\em Generic {H}amiltonian dynamical systems are neither integrable
  nor ergodic}.
\newblock American Mathematical Society, Providence, R.I., 1974.
\newblock Memoirs of the American Mathematical Society, No. 144.

\bibitem[MMY97]{MarmiMY97}
S.~Marmi, P.~Moussa, and J.-C. Yoccoz.
\newblock The {B}rjuno functions and their regularity properties.
\newblock {\em Comm. Math. Phys.}, 186(2):265--293, 1997.

\bibitem[Moo79]{Moore79}
R.~E. Moore.
\newblock {\em Methods and Applications of Interval Analysis}.
\newblock Society for Industrial and Applied Mathematics (SIAM), Philadelphia,
  Pa., 1979.

\bibitem[Mor82]{Moriyon82}
R.~Moriy{\'o}n.
\newblock Regularity of the {D}irichlet problem for the degenerate complex
  {M}onge-{A}mp\`ere equation.
\newblock {\em Comm. Pure Appl. Math.}, 35(1):1--27, 1982.

\bibitem[Mos60]{Moser60}
J.~Moser.
\newblock On the integrability of area preserving {C}remona mappings near an
  elliptic fixed point.
\newblock {\em Bol. Soc. Mat. Mexicana (2)}, 5:176--180, 1960.

\bibitem[Mos62]{Moser62}
J.~Moser.
\newblock On invariant curves of area-preserving mappings of an annulus.
\newblock {\em Nachr. Akad. Wiss. G\"ottingen Math.-Phys. Kl. II}, 1962:1--20,
  1962.

\bibitem[Mos65]{Moser65}
J.~Moser.
\newblock On the volume elements on a manifold.
\newblock {\em Trans. Amer. Math. Soc.}, 120:286--294, 1965.

\bibitem[Mos66a]{Moser66b}
J.~Moser.
\newblock A rapidly convergent iteration method and non-linear differential
  equations. {I}{I}.
\newblock {\em Ann. Scuola Norm. Sup. Pisa (3)}, 20:499--535, 1966.

\bibitem[Mos66b]{Moser66a}
J.~Moser.
\newblock A rapidly convergent iteration method and non-linear partial
  differential equations. {I}.
\newblock {\em Ann. Scuola Norm. Sup. Pisa (3)}, 20:265--315, 1966.

\bibitem[Mos67]{Moser67}
J.~Moser.
\newblock Convergent series expansions for quasi-periodic motions.
\newblock {\em Math. Ann.}, 169:136--176, 1967.

\bibitem[Mos73]{Moser73}
J.~Moser.
\newblock {\em Stable and Random Motions in Dynamical Systems}.
\newblock Princeton University Press, Princeton, N. J., 1973.

\bibitem[Mos86]{Moser86b}
J.~Moser.
\newblock Minimal solutions of variational problems on a torus.
\newblock {\em Ann. Inst. H. Poincar\'e Anal. Non Lin\'eaire}, 3(3):229--272,
  1986.

\bibitem[Mos88]{Moser88}
J.~Moser.
\newblock A stability theorem for minimal foliations on a torus.
\newblock {\em Ergodic Theory Dynamical Systems}, 8$\sp *$(Charles Conley
  Memorial Issue):251--281, 1988.

\bibitem[Mos95]{Moser95}
J.~Moser.
\newblock On the persistence of pseudo-holomorphic curves on an almost complex
  torus (with an appendix by {J}\"urgen {P}\"oschel).
\newblock {\em Invent. Math.}, 119(3):401--442, 1995.

\bibitem[MP84]{MoserP84}
J{\"u}rgen Moser and J{\"u}rgen P{\"o}schel.
\newblock An extension of a result by {D}inaburg and {S}inai on quasiperiodic
  potentials.
\newblock {\em Comment. Math. Helv.}, 59(1):39--85, 1984.

\bibitem[MP85]{McKayP85}
R.~S. MacKay and I.~C. Percival.
\newblock Converse {K}{A}{M}: theory and practice.
\newblock {\em Comm. Math. Phys.}, 98(4):469--512, 1985.

\bibitem[MS92]{MarmiS92}
Stefano Marmi and Jaroslav Stark.
\newblock On the standard map critical function.
\newblock {\em Nonlinearity}, 5(3):743--761, 1992.

\bibitem[MS95]{McDuffS95}
D.~McDuff and D.~Salamon.
\newblock {\em Introduction to Symplectic Topology}.
\newblock The Clarendon Press Oxford University Press, New York, 1995.

\bibitem[Nas56]{Nash63}
J.~Nash.
\newblock The imbedding problem for {R}iemannian manifolds.
\newblock {\em Ann. of Math. (2)}, 63:20--63, 1956.

\bibitem[Neh77]{Nekhoroshev77}
N.~N. Nehoro{\v{s}}ev.
\newblock An exponential estimate of the time of stability of nearly integrable
  {H}amiltonian systems.
\newblock {\em Uspehi Mat. Nauk}, 32(6(198)):5--66, 287, 1977.
\newblock English translation: {\em Russian Math. Surveys}, 32(6):\-1--65,
  1977.

\bibitem[Ne{\u\i}81]{Neishtadt81}
A.~I. Ne{\u\i}shtadt.
\newblock Estimates in the {K}olmogorov theorem on conservation of
  conditionally periodic motions.
\newblock {\em Prikl. Mat. Mekh.}, 45(6):1016--1025, 1981.

\bibitem[Nek99]{Nekhoroshev99}
N.~N. Nekhoroshev.
\newblock Exponential stability of the approximate fundamental mode of a
  nonlinear wave equation.
\newblock {\em Funktsional. Anal. i Prilozhen.}, 33(1):80--83, 1999.
\newblock English translation will be published in {\em Funct. Anal. Appl}.

\bibitem[Nel69]{Nelson69}
E.~Nelson.
\newblock {\em Topics in Dynamics. {I}: {F}lows}.
\newblock Princeton University Press, Princeton, N.J., 1969.

\bibitem[Nie98]{Niederman98}
Laurent Niederman.
\newblock Nonlinear stability around an elliptic equilibrium point in a
  {H}amiltonian system.
\newblock {\em Nonlinearity}, 11(6):1465--1479, 1998.

\bibitem[Omo86]{Omohundro86}
Stephen~M. Omohundro.
\newblock {\em Geometric perturbation theory in physics}.
\newblock World Scientific Publishing Co., Singapore, 1986.

\bibitem[Par84]{Parasyuk84}
I.~O. Parasyuk.
\newblock Preservation of multidimensional invariant tori of {H}amiltonian
  systems.
\newblock {\em Ukrain. Mat. Zh.}, 36(4):467--473, 1984.

\bibitem[Par89]{Parasyuk89}
I.~O. Parasyuk.
\newblock Deformations of a symplectic structure, and co-isotropic invariant
  tori of {H}amiltonian systems.
\newblock {\em Mat. Fiz. Nelinein. Mekh.}, (12(46)):35--37, 94, 1989.

\bibitem[Pja69]{Pjartli69}
A.~S. Pjartli.
\newblock Diophantine approximations of submanifolds of a {E}uclidean space.
\newblock {\em Funkcional. Anal. i Prilo\v zen.}, 3(4):59--62, 1969.

\bibitem[PM91]{Perezmarco91}
R.~P{\'e}rez~Marco.
\newblock Sur la structure des germes holomorphes non lin\'earisables.
\newblock {\em C. R. Acad. Sci. Paris S\'er. I Math.}, 312(7):533--536, 1991.

\bibitem[PM92]{Perezmarco92}
R.~P{\'e}rez~Marco.
\newblock Solution compl\`ete au probl\`eme de {S}iegel de lin\'earisation
  d'une application holomorphe au voisinage d'un point fixe (d'apr\`es
  {J}.-{C}.\ {Y}occoz).
\newblock {\em Ast\'erisque}, 206:Exp.\ No.\ 753, 4, 273--310, 1992.
\newblock S\'eminaire Bourbaki, Vol.\ 1991/92.

\bibitem[PM93]{Perezmarco93}
R.~P{\'e}rez~Marco.
\newblock Sur les dynamiques holomorphes non lin\'earisables et une conjecture
  de {V}. {I}. {A}rnol'd.
\newblock {\em Ann. Sci. \'Ecole Norm. Sup. (4)}, 26(5):565--644, 1993.

\bibitem[PM00]{Perezmarco00}
R.~P{\'e}rez~Marco.
\newblock Total convergence or general divergence in small divisors.
\newblock {\em Preprint}, 2000.

\bibitem[Poi78]{Poincare78}
H.~Poincar{\'e}.
\newblock Note sur les propri\'et\'es de fonctions d\'efinies par les equations
  diffe\'erentielles.
\newblock {\em Jour. de la Ec. Polyth.}, pages 13--26, 1878.

\bibitem[Poi93]{Poincare}
H.~Poincar{\'e}.
\newblock {\em New Methods of Celestial Mechanics. {V}ols. 1--3}.
\newblock American Institute of Physics, New York, 1993.

\bibitem[P{\"o}s]{Poschel99}
J{\"u}rgen P{\"o}schel.
\newblock On {N}ekhoroshev's estimate at an elliptic equilibrium.
\newblock {\em Internat. Math. Res. Notices}, 1999(4):203--215.

\bibitem[P{\"o}s82]{Poschel}
J.~P{\"o}schel.
\newblock Integrability of {H}amiltonian systems on {C}antor sets.
\newblock {\em Comm. Pure Appl. Math.}, 35(5):653--696, 1982.

\bibitem[P{\"o}s86]{Poschel86}
J.~P{\"o}schel.
\newblock On invariant manifolds of complex analytic mappings near fixed
  points.
\newblock {\em Exposition. Math.}, 4(2):97--109, 1986.

\bibitem[P{\"o}s90]{Poschel90}
J.~P{\"o}schel.
\newblock Small divisors with spatial structure in infinite-dimensional
  {H}amiltonian systems.
\newblock {\em Comm. Math. Phys.}, 127(2):351--393, 1990.

\bibitem[P{\"o}s92]{Poschel92}
J{\"u}rgen P{\"o}schel.
\newblock A lecture on the classical kam theorem.
\newblock 1992.

\bibitem[P{\"o}s93]{Poschel93}
J.~P{\"o}schel.
\newblock Nekhoroshev estimates for quasi-convex {H}amiltonian systems.
\newblock {\em Math. Z.}, 213(2):187--216, 1993.

\bibitem[P{\"o}s96]{Poschel96}
J.~P{\"o}schel.
\newblock A {K}{A}{M}-theorem for some nonlinear partial differential
  equations.
\newblock {\em Ann. Scuola Norm. Sup. Pisa Cl. Sci. (4)}, 23(1):119--148, 1996.

\bibitem[RA87]{RandA87}
R.~H. Rand and D.~Armbruster.
\newblock {\em Perturbation Methods, Bifurcation Theory and Computer Algebra}.
\newblock Springer-Verlag, New York, 1987.

\bibitem[RA92]{RandAProg}
R.~Rand and D.~Armbruster.
\newblock Programs from the book "{P}erturbation {M}ethods, {B}ifurcation
  {T}heory and {C}omputer {A}lgebra".
\newblock {\em {\tt http://www.ma.\-utexas.edu/mp\_arc }}, 92--113, 1992.

\bibitem[Ran87]{Rana87}
D.~Rana.
\newblock {\em Proof of Accurate Upper and Lower Bounds to Stability Domains in
  Small Denominator Problems}.
\newblock PhD thesis, Princeton University, 1987.

\bibitem[Rud87]{Rudin87}
W.~Rudin.
\newblock {\em Real and Complex Analysis}.
\newblock McGraw-Hill, New York, third edition, 1987.

\bibitem[R{\"u}s]{Russman72}
H.~R{\"u}ssmann.
\newblock Kleine {N}enner. {I}{I}. {B}emerkungen zur {N}ewtonschen {M}ethode.
\newblock {\em Nachr. Akad. Wiss. G\"ottingen Math.-Phys. Kl. II}, 1972:1--10.

\bibitem[R{\"u}s70]{Russman70}
H.~R{\"u}ssmann.
\newblock Kleine {N}enner. {I}. \"{U}ber invariante {K}urven differenzierbarer
  {A}bbildungen eines {K}reisringes.
\newblock {\em Nachr. Akad. Wiss. G\"ottingen Math.-Phys. Kl. II},
  1970:67--105, 1970.

\bibitem[R{\"u}s75]{Russmann75}
H.~R{\"u}ssmann.
\newblock On optimal estimates for the solutions of linear partial differential
  equations of first order with constant coefficients on the torus.
\newblock In {\em Dynamical Systems, Theory and Applications (Battelle
  Rencontres, Seattle, Wash., 1974)}, pages 598--624. Lecture Notes in Phys.,
  Vol. 38, Berlin, 1975. Springer.

\bibitem[R{\"u}s76a]{Russman76}
H.~R{\"u}ssmann.
\newblock On a new proof of {M}oser's twist mapping theorem.
\newblock In {\em Proceedings of the Fifth Conference on Mathematical Methods
  in Celestial Mechanics (Oberwolfach, 1975), Part I}, volume~14, pages 19--31,
  1976.

\bibitem[R{\"u}s76b]{Russmann76}
H.~R{\"u}ssmann.
\newblock On optimal estimates for the solutions of linear difference equations
  on the circle.
\newblock {\em Celestial Mech.}, 14(1):33--37, 1976.

\bibitem[R{\"u}s80]{Russmann80}
H.~R{\"u}ssmann.
\newblock On the one-dimensional {S}chr\"odinger equation with a quasiperiodic
  potential.
\newblock In {\em Nonlinear Dynamics (Internat. Conf., New York, 1979)}, pages
  90--107. New York Acad. Sci., New York, 1980.

\bibitem[R{\"u}s90]{Russmann90}
H.~R{\"u}ssmann.
\newblock Nondegeneracy in the perturbation theory of integrable dynamical
  systems.
\newblock In {\em Stochastics, Algebra and Analysis in Classical and Quantum
  Dynamics (Marseille, 1988)}, pages 211--223. Kluwer Acad. Publ., Dordrecht,
  1990.

\bibitem[R{\"u}s98]{Russmann98}
H.~R{\"u}ssmann.
\newblock Invariant tori in the perturbation theory of weakly non-degenerate
  integrable {H}amiltonian systems.
\newblock {\em Mainz preprint}, 14, 1998.

\bibitem[Ryc92]{Rychlik92}
Marek Rychlik.
\newblock Renormalization of cocycles and linear {O}{D}{E} with almost-periodic
  coefficients.
\newblock {\em Invent. Math.}, 110(1):173--206, 1992.

\bibitem[Sal86]{Salamon86}
D.~Salamon.
\newblock The {K}olmogorov-{A}rnold-{M}oser theorem.
\newblock {\em Z\"urich preprint}, 1986.

\bibitem[Sch69]{Schwartz69}
J.~T. Schwartz.
\newblock {\em Nonlinear Functional Analysis}.
\newblock Gordon and Breach, New York, 1969.

\bibitem[Sch95]{Schmidt95}
D.~S. Schmidt.
\newblock Computing the motion of the moon accurately.
\newblock In {\em Hamiltonian Dynamical Systems (Cincinnati, OH, 1992)}, pages
  341--361. Springer, New York, 1995.

\bibitem[Sev86]{Sevryuk86}
M.~B. Sevryuk.
\newblock {\em Reversible Systems}.
\newblock Springer-Verlag, Berlin, 1986.

\bibitem[Sev95]{Sevryuk95}
M.~B. Sevryuk.
\newblock K{A}{M}-stable {H}amiltonians.
\newblock {\em J. Dynam. Control Systems}, 1(3):351--366, 1995.

\bibitem[Sev96]{Sevryuk96}
M.~B. Sevryuk.
\newblock Invariant tori of {H}amiltonian systems that are nondegenerate in the
  sense of {R}\"ussmann.
\newblock {\em Dokl. Akad. Nauk}, 346(5):590--593, 1996.

\bibitem[Sev98]{Sevryuk98}
M.~B. Sevryuk.
\newblock The finite-dimensional reversible {K}{A}{M} theory.
\newblock {\em Phys. D}, 112(1-2):132--147, 1998.

\bibitem[Sev99]{Sevryuk99}
M.~B. Sevryuk.
\newblock The lack-of-parameters problem in the {K}{A}{M} theory revisited.
\newblock In {\em {H}amiltonian Systems with Three or More Degrees of Freedom
  (S'Agar\'o, 1995)}, pages 568--572. Kluwer Acad. Publ., Dordrecht, 1999.

\bibitem[Sie42]{Siegel42}
C.~L. Siegel.
\newblock Iteration of analytic functions.
\newblock {\em Ann. of Math. (2)}, 43:607--612, 1942.

\bibitem[Sie54]{Siegel54}
Carl~Ludwig Siegel.
\newblock \"{U}ber die {E}xistenz einer {N}ormalform analytischer
  {H}amiltonscher {D}ifferentialgleichungen in der {N}\"ahe einer
  {G}leichgewichtsl\"osung.
\newblock {\em Math. Ann.}, 128:144--170, 1954.

\bibitem[Sim98]{Simo98}
C.~Sim{\'o}.
\newblock Effective computations in celestial mechanics and astrodynamics.
\newblock In {\em Modern Methods of Analytical Mechanics and Their Applications
  (Udine, 1997)}, pages 55--102. Springer, Vienna, 1998.

\bibitem[SK89]{SinaiK89}
Ya.~G. Sinai and K.~M. Khanin.
\newblock Smoothness of conjugacies of diffeomorphisms of the circle with
  rotations.
\newblock {\em Uspekhi Mat. Nauk}, 44(1(265)):57--82, 247, 1989.
\newblock English translation: {\em Russian Math. Surveys}, 44(1):69--99, 1989.

\bibitem[SM95]{SiegelM95}
C.~L. Siegel and J.~K. Moser.
\newblock {\em Lectures on Celestial Mechanics}.
\newblock Springer-Verlag, Berlin, 1995.
\newblock Reprint of the 1971 translation.

\bibitem[Sta88]{Stark88}
J.~Stark.
\newblock Smooth conjugacy and renormalisation for diffeomorphisms of the
  circle.
\newblock {\em Nonlinearity}, 1(4):541--575, 1988.

\bibitem[Ste70]{Stein70}
E.~M. Stein.
\newblock {\em Singular Integrals and Differentiability Properties of
  Functions}.
\newblock Princeton University Press, Princeton, N.J., 1970.

\bibitem[Sti93]{Stirnemann93}
Andreas Stirnemann.
\newblock Renormalization for golden circles.
\newblock {\em Comm. Math. Phys.}, 152(2):369--431, 1993.

\bibitem[Sti94]{Stirnemann94}
Andreas Stirnemann.
\newblock A renormalization proof of {S}iegel's theorem.
\newblock {\em Nonlinearity}, 7(3):943--958, 1994.

\bibitem[Sti97]{Stirnemann97}
Andreas Stirnemann.
\newblock Towards an existence proof of {M}ac{K}ay's fixed point.
\newblock {\em Comm. Math. Phys.}, 188(3):723--735, 1997.

\bibitem[Sto94a]{Stolovitch94a}
Laurent Stolovitch.
\newblock Sur un th\'eor\`eme de {D}ulac.
\newblock {\em Ann. Inst. Fourier (Grenoble)}, 44(5):1397--1433, 1994.

\bibitem[Sto94b]{Stolovitch94b}
Laurent Stolovitch.
\newblock Sur un th\'eor\`eme de {D}ulac.
\newblock {\em C. R. Acad. Sci. Paris S\'er. I Math.}, 319(3):253--256, 1994.

\bibitem[Sva80]{Svanidze80}
N.~V. Svanidze.
\newblock Small perturbations of an integrable dynamical system with an
  integral invariant.
\newblock {\em Trudy Mat. Inst. Steklov.}, 147:124--146, 204, 1980.
\newblock English translation: {\em Proc. Steklov Inst. Math.}, 1981, no. 2.

\bibitem[SZ89]{SalamonZ89}
D.~Salamon and E.~Zehnder.
\newblock K{A}{M} theory in configuration space.
\newblock {\em Comment. Math. Helv.}, 64(1):84--132, 1989.

\bibitem[Thi97]{Thirring97}
W.~Thirring.
\newblock {\em Classical Mathematical Physics}.
\newblock Springer-Verlag, New York, third edition, 1997.

\bibitem[Tom96]{Tompaidis96}
S.~Tompaidis.
\newblock Numerical study of invariant sets of a quasiperiodic perturbation of
  a symplectic map.
\newblock {\em Experiment. Math.}, 5(3):211--230, 1996.

\bibitem[Val98]{Valdinoci98}
E.~Valdinoci.
\newblock Poster presented in {A}ussois.
\newblock 1998.

\bibitem[Way84]{Wayne84}
C.~E. Wayne.
\newblock The {K}{A}{M} theory of systems with short range interactions. {I},
  {I}{I}.
\newblock {\em Comm. Math. Phys.}, 96(3):311--329, 331--344, 1984.

\bibitem[Way86]{Wayne86}
C.~E. Wayne.
\newblock Bounds on the trajectories of a system of weakly coupled rotators.
\newblock {\em Comm. Math. Phys.}, 104(1):21--36, 1986.

\bibitem[Way96]{Wayne96}
C.~E. Wayne.
\newblock An introduction to {K}{A}{M} theory.
\newblock In {\em Dynamical Systems and Probabilistic Methods in Partial
  Differential Equations (Berkeley, CA, 1994)}, pages 3--29. Amer. Math. Soc.,
  Providence, RI, 1996.

\bibitem[Whi34a]{Whitney34a}
H.~Whitney.
\newblock Analytic extensions of differentiable functions defined in closed
  sets.
\newblock {\em Trans. Amer. Math. Soc.}, 36(1):63--89, 1934.

\bibitem[Whi34b]{Whitney34b}
H.~Whitney.
\newblock Differentiable functions defined in closed sets. {I}.
\newblock {\em Trans. Amer. Math. Soc.}, 36(2):369--387, 1934.

\bibitem[Whi88]{Whittaker}
E.~T. Whittaker.
\newblock {\em A Treatise on the Analytical Dynamics of Particles and Rigid
  Bodies. {\rm With an Introduction to the Problem of Three Bodies}}.
\newblock Cambridge University Press, Cambridge, 1988.

\bibitem[Xia92]{Xia92}
Zhihong Xia.
\newblock Existence of invariant tori in volume-preserving diffeomorphisms.
\newblock {\em Ergodic Theory Dynamical Systems}, 12(3):621--631, 1992.

\bibitem[Yoc92]{Yoccoz92b}
J.-C. Yoccoz.
\newblock Travaux de {H}erman sur les tores invariants.
\newblock {\em Ast\'erisque}, 206:Exp.\ No.\ 754, 4, 311--344, 1992.
\newblock S\'eminaire Bourbaki, Vol.\ 1991/92.

\bibitem[Yoc95]{Yoccoz95}
J.-C. Yoccoz.
\newblock Th\'eor\`eme de {S}iegel, nombres de {B}runo et polyn\^{o}mes
  quadratiques.
\newblock {\em Ast\'erisque}, 231:3--88, 1995.

\bibitem[Zeh75]{Zehnder75}
E.~Zehnder.
\newblock Generalized implicit function theorems with applications to some
  small divisor problems. {I}.
\newblock {\em Comm. Pure Appl. Math.}, 28:91--140, 1975.

\bibitem[Zeh76a]{Zehnder76}
E.~Zehnder.
\newblock Generalized implicit function theorems with applications to some
  small divisor problems. {I}{I}.
\newblock {\em Comm. Pure Appl. Math.}, 29(1):49--111, 1976.

\bibitem[Zeh76b]{Zehnder76b}
Eduard Zehnder.
\newblock Moser's implicit function theorem in the framework of analytic
  smoothing.
\newblock {\em Math. Ann.}, 219(2):105--121, 1976.

\bibitem[Zeh77]{Zehnder77}
E.~Zehnder.
\newblock A simple proof of a generalization of a theorem by {C}. {L}.
  {S}iegel.
\newblock In {\em Geometry and Topology (Proc. III Latin Amer. School of Math.,
  Inst. Mat. Pura Aplicada CNPq, Rio de Janeiro, 1976)}, pages 855--866.
  Lecture Notes in Math., Vol. 597, Berlin, 1977. Springer.

\end{thebibliography}
\end{document}