<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:x="urn:schemas-microsoft-com:office:excel" xmlns:p="urn:schemas-microsoft-com:office:powerpoint" xmlns:a="urn:schemas-microsoft-com:office:access" xmlns:dt="uuid:C2F41010-65B3-11d1-A29F-00AA00C14882" xmlns:s="uuid:BDC6E3F0-6DA3-11d1-A2A3-00AA00C14882" xmlns:rs="urn:schemas-microsoft-com:rowset" xmlns:z="#RowsetSchema" xmlns:b="urn:schemas-microsoft-com:office:publisher" xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet" xmlns:c="urn:schemas-microsoft-com:office:component:spreadsheet" xmlns:odc="urn:schemas-microsoft-com:office:odc" xmlns:oa="urn:schemas-microsoft-com:office:activation" xmlns:html="http://www.w3.org/TR/REC-html40" xmlns:q="http://schemas.xmlsoap.org/soap/envelope/" xmlns:D="DAV:" xmlns:x2="http://schemas.microsoft.com/office/excel/2003/xml" xmlns:ois="http://schemas.microsoft.com/sharepoint/soap/ois/" xmlns:dir="http://schemas.microsoft.com/sharepoint/soap/directory/" xmlns:ds="http://www.w3.org/2000/09/xmldsig#" xmlns:dsp="http://schemas.microsoft.com/sharepoint/dsp" xmlns:udc="http://schemas.microsoft.com/data/udc" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:sub="http://schemas.microsoft.com/sharepoint/soap/2002/1/alerts/" xmlns:ec="http://www.w3.org/2001/04/xmlenc#" xmlns:sp="http://schemas.microsoft.com/sharepoint/" xmlns:sps="http://schemas.microsoft.com/sharepoint/soap/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:udcxf="http://schemas.microsoft.com/data/udc/xmlfile" xmlns:wf="http://schemas.microsoft.com/sharepoint/soap/workflow/" xmlns:mver="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns:mrels="http://schemas.openxmlformats.org/package/2006/relationships" xmlns:ex12t="http://schemas.microsoft.com/exchange/services/2006/types" xmlns:ex12m="http://schemas.microsoft.com/exchange/services/2006/messages" xmlns:Z="urn:schemas-microsoft-com:" xmlns:st="" xmlns="http://www.w3.org/TR/REC-html40">

<head>
<meta http-equiv=Content-Type content="text/html; charset=us-ascii">
<meta name=Generator content="Microsoft Word 12 (filtered medium)">
<!--[if !mso]>
<style>
v\:* {behavior:url(#default#VML);}
o\:* {behavior:url(#default#VML);}
w\:* {behavior:url(#default#VML);}
.shape {behavior:url(#default#VML);}
</style>
<![endif]-->
<title>Summary of today's meeting</title>
<style>
<!--
 /* Font Definitions */
 @font-face
        {font-family:Wingdings;
        panose-1:5 0 0 0 0 0 0 0 0 0;}
@font-face
        {font-family:"Cambria Math";
        panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
        {font-family:Calibri;
        panose-1:2 15 5 2 2 2 4 3 2 4;}
@font-face
        {font-family:Tahoma;
        panose-1:2 11 6 4 3 5 4 4 2 4;}
 /* Style Definitions */
 p.MsoNormal, li.MsoNormal, div.MsoNormal
        {margin:0in;
        margin-bottom:.0001pt;
        font-size:12.0pt;
        font-family:"Times New Roman","serif";}
a:link, span.MsoHyperlink
        {mso-style-priority:99;
        color:blue;
        text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
        {mso-style-priority:99;
        color:purple;
        text-decoration:underline;}
p
        {mso-style-priority:99;
        mso-margin-top-alt:auto;
        margin-right:0in;
        mso-margin-bottom-alt:auto;
        margin-left:0in;
        font-size:12.0pt;
        font-family:"Times New Roman","serif";}
span.EmailStyle17
        {mso-style-type:personal;
        font-family:"Calibri","sans-serif";
        color:#1F497D;}
span.EmailStyle19
        {mso-style-type:personal-reply;
        font-family:"Calibri","sans-serif";
        color:#1F497D;}
.MsoChpDefault
        {mso-style-type:export-only;
        font-size:10.0pt;}
@page Section1
        {size:8.5in 11.0in;
        margin:1.0in 1.0in 1.0in 1.0in;}
div.Section1
        {page:Section1;}
 /* List Definitions */
 @list l0
        {mso-list-id:437024744;
        mso-list-template-ids:-2036855040;}
@list l0:level1
        {mso-level-number-format:bullet;
        mso-level-text:\F0B7;
        mso-level-tab-stop:.5in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l1
        {mso-list-id:773288165;
        mso-list-template-ids:-1789259314;}
@list l1:level1
        {mso-level-number-format:bullet;
        mso-level-text:\F0B7;
        mso-level-tab-stop:.5in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l2
        {mso-list-id:959188578;
        mso-list-template-ids:82582192;}
@list l2:level1
        {mso-level-number-format:bullet;
        mso-level-text:\F0B7;
        mso-level-tab-stop:.5in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l3
        {mso-list-id:1581523520;
        mso-list-template-ids:1976729270;}
@list l3:level1
        {mso-level-number-format:bullet;
        mso-level-text:\F0B7;
        mso-level-tab-stop:.5in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l4
        {mso-list-id:1648820226;
        mso-list-template-ids:-390269798;}
@list l4:level1
        {mso-level-number-format:bullet;
        mso-level-text:\F0B7;
        mso-level-tab-stop:.5in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l5
        {mso-list-id:1850096045;
        mso-list-template-ids:-1431802212;}
@list l5:level1
        {mso-level-number-format:bullet;
        mso-level-text:\F0B7;
        mso-level-tab-stop:.5in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
ol
        {margin-bottom:0in;}
ul
        {margin-bottom:0in;}
-->
</style>
<!--[if gte mso 9]><xml>
 <o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
 <o:shapelayout v:ext="edit">
  <o:idmap v:ext="edit" data="1" />
 </o:shapelayout></xml><![endif]-->
</head>

<body lang=EN-US link=blue vlink=purple>

<div class=Section1>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>Thanks Kannan,<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'><o:p> </o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>In the bullets list below you probably meant hat system-level
C/R require some quiescence hooks (not application-level).<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'><o:p> </o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>When I look at the restore requirements on MPI as described
below, they seem quit extensive. Including re-pining and opening any previous
opened communication handles.<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'><o:p> </o:p></span></p>

<div>

<div style='border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0in 0in 0in'>

<p class=MsoNormal><b><span style='font-size:10.0pt;font-family:"Tahoma","sans-serif"'>From:</span></b><span
style='font-size:10.0pt;font-family:"Tahoma","sans-serif"'> mpi3-ft-bounces@lists.mpi-forum.org
[mailto:mpi3-ft-bounces@lists.mpi-forum.org] <b>On Behalf Of </b>Narasimhan,
Kannan<br>
<b>Sent:</b> Thursday, October 23, 2008 12:53 PM<br>
<b>To:</b> MPI 3.0 Fault Tolerance and Dynamic Process Control working Group<br>
<b>Subject:</b> Re: [Mpi3-ft] Summary of today's meeting<o:p></o:p></span></p>

</div>

</div>

<p class=MsoNormal><o:p> </o:p></p>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Arial","sans-serif";
color:blue'>Some more notes from our discussion on the topic of MPI standard
 support for</span><span style='font-size:10.0pt;font-family:"Calibri","sans-serif";
color:blue'> "</span><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:blue'>checkpoint/restart": </span><o:p></o:p></p>

<ul type=disc>
 <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
     mso-list:l5 level1 lfo1'><span style='font-size:10.0pt;font-family:"Arial","sans-serif";
     color:blue'>We grouped C/R under two categories: application-directed and
     system-level. </span><o:p></o:p></li>
 <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
     mso-list:l5 level1 lfo1'><span style='font-size:10.0pt;font-family:"Arial","sans-serif";
     color:blue'>System-level C/R can be accomplished via many techniques:
     intercepting every level of system stack, using virtualization techniques,
     etc.</span><o:p></o:p></li>
 <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
     mso-list:l5 level1 lfo1'><span style='font-size:10.0pt;font-family:"Arial","sans-serif";
     color:blue'>Application-directed C/R will still require some quiescence
     hooks from the MPI layer (ex: asyncronous progression by the MPI layer).
     There was some discussion on this.</span><o:p></o:p></li>
 <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
     mso-list:l5 level1 lfo1'><span style='font-size:10.0pt;font-family:"Arial","sans-serif";
     color:blue'>The MPI requirements for System-level checkpointing cannot be
     formulated until we get more data to define a "quiet state"</span><o:p></o:p></li>
</ul>

<p class=MsoNormal> <o:p></o:p></p>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Arial","sans-serif";
color:blue'>I queried Mike Hefner on the sematics of freeze/unfreeze in their
(Evergrid/Librato) transparent C/R approach, and here is his
response :</span><o:p></o:p></p>

<p class=MsoNormal> <o:p></o:p></p>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Arial","sans-serif";
color:blue'>Question 1: What is ur definition of a quiet state (after the
freeze call)? Do U expect the MPI to unpin memory? free resources? or just
quiet the message traffic? We need to explicitly state the semantics here ...</span><span
style='font-size:11.0pt;font-family:"Calibri","sans-serif"'><o:p></o:p></span></p>

<p><span style='font-size:10.0pt;font-family:"Arial","sans-serif";color:purple'>We
defined it as a state that will provide a consistent state of the application
across all processes. From the MPI standpoint, this would mean a state whereby
all processes in the "freeze" state would be able to continue
communication if a restart were invoked.</span><span style='font-size:11.0pt;
font-family:"Calibri","sans-serif"'><o:p></o:p></span></p>

<p><span style='font-size:10.0pt;font-family:"Arial","sans-serif";color:purple'>In
terms of particular resources, our CP/R software manages storing all
application and, optionally, all MPI memory. This includes memory that has been
allocated by either a malloc(3) call or a mmap/mremap call. If that memory has
been pinned by the IB driver, we will store it to disk as well. We also store
the primary process resources in use: IPCs, shared memory, file handles and
file rollback state, etc.</span><span style='font-size:11.0pt;font-family:"Calibri","sans-serif"'><o:p></o:p></span></p>

<p><span style='font-size:10.0pt;font-family:"Arial","sans-serif";color:purple'>These
memory regions and other resources are recorded after each process returns from
the freeze API</span><span style='font-size:10.0pt;font-family:"Arial","sans-serif";
color:blue'>.</span><span style='font-size:11.0pt;font-family:"Calibri","sans-serif"'><o:p></o:p></span></p>

<p><span style='font-size:10.0pt;font-family:"Arial","sans-serif";color:blue'>Question
2: The same goes for restore. What is expected to be there, and what is
expected to be supplied as the context....</span><span style='font-size:11.0pt;
font-family:"Calibri","sans-serif"'><o:p></o:p></span></p>

<p><span style='font-size:10.0pt;font-family:"Arial","sans-serif";color:purple'>On
a restore all of the memory (and other resources such as IPCs, open files,
etc.) will be recreated and reloaded with the state that was recorded at
checkpoint time *before* the restart API is called. On the restart, it is
expected that the MPI stack reinitialize the interconnect card, recreate
necessary handles for fabric communication, and re-pin all previously pinned
memory regions in use by the fabric's card.</span><span style='font-size:11.0pt;
font-family:"Calibri","sans-serif"'><o:p></o:p></span></p>

<p><span style='font-size:10.0pt;font-family:"Arial","sans-serif";color:blue'>-Kannan-</span><span
style='font-size:11.0pt;font-family:"Calibri","sans-serif"'><o:p></o:p></span></p>

<div class=MsoNormal align=center style='text-align:center'>

<hr size=2 width="100%" align=center>

</div>

<p class=MsoNormal style='margin-bottom:12.0pt'><b><span style='font-size:10.0pt;
font-family:"Tahoma","sans-serif"'>From:</span></b><span style='font-size:10.0pt;
font-family:"Tahoma","sans-serif"'> mpi3-ft-bounces@lists.mpi-forum.org
[mailto:mpi3-ft-bounces@lists.mpi-forum.org] <b>On Behalf Of </b>Erez Haba<br>
<b>Sent:</b> Wednesday, October 22, 2008 8:53 PM<br>
<b>To:</b> MPI 3.0 Fault Tolerance and Dynamic Process Control working Group<br>
<b>Subject:</b> Re: [Mpi3-ft] Summary of today's meeting</span><o:p></o:p></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>Thanks for capturing this.<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'><o:p> </o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>My comments inline…<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'><o:p> </o:p></span></p>

<div>

<div style='border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0in 0in 0in'>

<p class=MsoNormal><b><span style='font-size:10.0pt;font-family:"Tahoma","sans-serif"'>From:</span></b><span
style='font-size:10.0pt;font-family:"Tahoma","sans-serif"'>
mpi3-ft-bounces@lists.mpi-forum.org
[mailto:mpi3-ft-bounces@lists.mpi-forum.org] <b>On Behalf Of </b>Richard Graham<br>
<b>Sent:</b> Tuesday, October 21, 2008 9:03 PM<br>
<b>To:</b> MPI 3.0 Fault Tolerance and Dynamic Process Control working Group<br>
<b>Subject:</b> [Mpi3-ft] Summary of today's meeting<o:p></o:p></span></p>

</div>

</div>

<p class=MsoNormal><o:p> </o:p></p>

<p class=MsoNormal style='margin-bottom:12.0pt'><span style='font-size:11.0pt;
font-family:"Calibri","sans-serif"'>Here is a summary of what I think that we
agreed to today.  Please correct any errors, and add what I am missing.</span><o:p></o:p></p>

<ul type=disc>
 <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
     mso-list:l3 level1 lfo2'><span style='font-size:11.0pt;font-family:"Calibri","sans-serif"'>We
     need to be able to restore MPI_COMM_WORLD (and it’s derivatives) to
     a usable state when a process fails. </span><o:p></o:p></li>
</ul>

<p class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto'><b><i><span
style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D'>[erezh]
I think that we discussed this with reference to the comment that MPI is not
usable once it returned an error. we need to address that in the current
standard. (I think that this should be the first item on the list)<o:p></o:p></span></i></b></p>

<p class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto'><b><i><span
style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D'>[erezh]
as I recall the second item on the list, is returning errors per call site (per
the Error Reporting Rules proposal)<o:p></o:p></span></i></b></p>

<p class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto'><b><i><span
style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D'>[erez]
as for this specific items, I think that the wording should be
“repair” rather than restore (when repair is either making a
“hole” in the communicator or “filling” the whole with
a new process.</span></i></b><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'><o:p></o:p></span></p>

<ul type=disc>
 <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
     mso-list:l2 level1 lfo3'><span style='font-size:11.0pt;font-family:"Calibri","sans-serif"'>Restoration
     may involve having MPI_PROC_NULL replace the lost process, or may replaced
     the lost processes with a new process (have not specified how this would
     happen) </span><o:p></o:p></li>
</ul>

<p class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto'><b><i><span
style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D'>[erezh]
again I would replace “restoration” with “repair”<o:p></o:p></span></i></b></p>

<p class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto'><b><i><span
style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D'>[erezh]
We said that we can use MPI_PROC_NULL for making a “hole”. i.e.,
the communicator will not be in the error state anymore (thus you can receive
from MPI_ANY_SOURCE or use a collective) however any direct communication with
the “hole” rank is like using MPI_PROC_NULL.<o:p></o:p></span></i></b></p>

<p class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto'><b><i><span
style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D'>[erezh]
We also said that replacing the lost process with a new one only applies to
MPI_COMM_WORD.<o:p></o:p></span></i></b></p>

<ul type=disc>
 <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
     mso-list:l0 level1 lfo4'><span style='font-size:11.0pt;font-family:"Calibri","sans-serif"'>Processes
     communicating directly with the failed processes will be notified via a
     returned error code about the failure. </span><o:p></o:p></li>
 <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
     mso-list:l0 level1 lfo4'><span style='font-size:11.0pt;font-family:"Calibri","sans-serif"'>When
     a process is notified of the failure, comm_repair() must be called.
      Comm_repair() is not a collective call, and is what will initiate
     the communicator repair associated with the failed process. </span><o:p></o:p></li>
</ul>

<p class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto'><b><i><span
style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D'>[erezh]
we also discussed “generation” or “revision” of a
process rank to identify if a process was recycled. I think that we ended up
saying that it’s not really required and it’s the application
responsibility to identify a restored process where there might be a dependency
on prev communication (with other ranks)</span></i></b><span style='font-size:
11.0pt;font-family:"Calibri","sans-serif";color:#1F497D'><o:p></o:p></span></p>

<ul type=disc>
 <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
     mso-list:l4 level1 lfo5'><span style='font-size:11.0pt;font-family:"Calibri","sans-serif"'>If
     a process wants to be notified of process failure even if it is not
     communicating directly with this process, it must register for this
     notification. </span><o:p></o:p></li>
 <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
     mso-list:l4 level1 lfo5'><span style='font-size:11.0pt;font-family:"Calibri","sans-serif"'>We
     don’t have enough information to know how to continue with support
     for checkpoint/restart. </span><o:p></o:p></li>
</ul>

<p class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto'><b><i><span
style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D'>[erezh]
we discussed system level checkpoint/restart versus application aware
checkpoint restart</span></i></b><span style='font-size:11.0pt;font-family:
"Calibri","sans-serif";color:#1F497D'><o:p></o:p></span></p>

<ul type=disc>
 <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
     mso-list:l1 level1 lfo6'><span style='font-size:11.0pt;font-family:"Calibri","sans-serif"'>We
     need to discuss what needs to do with respect to failure of collective
     communications.</span> <o:p></o:p></li>
</ul>

<p class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto'><b><i><span
style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D'>[erezh]
we raised the issue of identifying asymmetric view of the communicator after a
“hole” repair and its impact on collectives (e.g., the link between
ranks 2 and 3 is broken but they can both comm. With rank 1) . Furthermore we
explored some solution by adding information to the collective message(s) to
identify that the communicator view is consistent. (we said that it requires
further exploration)</span></i></b><span style='font-size:11.0pt;font-family:
"Calibri","sans-serif";color:#1F497D'><o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif"'><br>
There are several issues that came up with respect to these, which will be
detailed later on.<br>
<br>
Rich</span><o:p></o:p></p>

</div>

</body>

</html>