<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:x="urn:schemas-microsoft-com:office:excel" xmlns:p="urn:schemas-microsoft-com:office:powerpoint" xmlns:a="urn:schemas-microsoft-com:office:access" xmlns:dt="uuid:C2F41010-65B3-11d1-A29F-00AA00C14882" xmlns:s="uuid:BDC6E3F0-6DA3-11d1-A2A3-00AA00C14882" xmlns:rs="urn:schemas-microsoft-com:rowset" xmlns:z="#RowsetSchema" xmlns:b="urn:schemas-microsoft-com:office:publisher" xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet" xmlns:c="urn:schemas-microsoft-com:office:component:spreadsheet" xmlns:odc="urn:schemas-microsoft-com:office:odc" xmlns:oa="urn:schemas-microsoft-com:office:activation" xmlns:html="http://www.w3.org/TR/REC-html40" xmlns:q="http://schemas.xmlsoap.org/soap/envelope/" xmlns:rtc="http://microsoft.com/officenet/conferencing" xmlns:D="DAV:" xmlns:Repl="http://schemas.microsoft.com/repl/" xmlns:mt="http://schemas.microsoft.com/sharepoint/soap/meetings/" xmlns:x2="http://schemas.microsoft.com/office/excel/2003/xml" xmlns:ppda="http://www.passport.com/NameSpace.xsd" xmlns:ois="http://schemas.microsoft.com/sharepoint/soap/ois/" xmlns:dir="http://schemas.microsoft.com/sharepoint/soap/directory/" xmlns:ds="http://www.w3.org/2000/09/xmldsig#" xmlns:dsp="http://schemas.microsoft.com/sharepoint/dsp" xmlns:udc="http://schemas.microsoft.com/data/udc" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:sub="http://schemas.microsoft.com/sharepoint/soap/2002/1/alerts/" xmlns:ec="http://www.w3.org/2001/04/xmlenc#" xmlns:sp="http://schemas.microsoft.com/sharepoint/" xmlns:sps="http://schemas.microsoft.com/sharepoint/soap/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:udcs="http://schemas.microsoft.com/data/udc/soap" xmlns:udcxf="http://schemas.microsoft.com/data/udc/xmlfile" xmlns:udcp2p="http://schemas.microsoft.com/data/udc/parttopart" xmlns:wf="http://schemas.microsoft.com/sharepoint/soap/workflow/" xmlns:dsss="http://schemas.microsoft.com/office/2006/digsig-setup" xmlns:dssi="http://schemas.microsoft.com/office/2006/digsig" xmlns:mdssi="http://schemas.openxmlformats.org/package/2006/digital-signature" xmlns:mver="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns:mrels="http://schemas.openxmlformats.org/package/2006/relationships" xmlns:spwp="http://microsoft.com/sharepoint/webpartpages" xmlns:ex12t="http://schemas.microsoft.com/exchange/services/2006/types" xmlns:ex12m="http://schemas.microsoft.com/exchange/services/2006/messages" xmlns:pptsl="http://schemas.microsoft.com/sharepoint/soap/SlideLibrary/" xmlns:spsl="http://microsoft.com/webservices/SharePointPortalServer/PublishedLinksService" xmlns:Z="urn:schemas-microsoft-com:" xmlns:st="" xmlns="http://www.w3.org/TR/REC-html40">

<head>
<meta http-equiv=Content-Type content="text/html; charset=us-ascii">
<meta name=Generator content="Microsoft Word 12 (filtered medium)">
<!--[if !mso]>
<style>
v\:* {behavior:url(#default#VML);}
o\:* {behavior:url(#default#VML);}
w\:* {behavior:url(#default#VML);}
.shape {behavior:url(#default#VML);}
</style>
<![endif]-->
<style>
<!--
 /* Font Definitions */
 @font-face
        {font-family:Helvetica;
        panose-1:2 11 6 4 2 2 2 2 2 4;}
@font-face
        {font-family:Helvetica;
        panose-1:2 11 6 4 2 2 2 2 2 4;}
@font-face
        {font-family:Calibri;
        panose-1:2 15 5 2 2 2 4 3 2 4;}
@font-face
        {font-family:Tahoma;
        panose-1:2 11 6 4 3 5 4 4 2 4;}
@font-face
        {font-family:Verdana;
        panose-1:2 11 6 4 3 5 4 4 2 4;}
 /* Style Definitions */
 p.MsoNormal, li.MsoNormal, div.MsoNormal
        {margin:0in;
        margin-bottom:.0001pt;
        font-size:12.0pt;
        font-family:"Times New Roman","serif";}
a:link, span.MsoHyperlink
        {mso-style-priority:99;
        color:blue;
        text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
        {mso-style-priority:99;
        color:purple;
        text-decoration:underline;}
p
        {mso-style-priority:99;
        mso-margin-top-alt:auto;
        margin-right:0in;
        mso-margin-bottom-alt:auto;
        margin-left:0in;
        font-size:12.0pt;
        font-family:"Times New Roman","serif";}
p.MsoListParagraph, li.MsoListParagraph, div.MsoListParagraph
        {mso-style-priority:34;
        margin-top:0in;
        margin-right:0in;
        margin-bottom:0in;
        margin-left:.5in;
        margin-bottom:.0001pt;
        font-size:12.0pt;
        font-family:"Times New Roman","serif";}
p.ecmsonormal, li.ecmsonormal, div.ecmsonormal
        {mso-style-name:ec_msonormal;
        mso-margin-top-alt:auto;
        margin-right:0in;
        mso-margin-bottom-alt:auto;
        margin-left:0in;
        font-size:12.0pt;
        font-family:"Times New Roman","serif";}
p.ecmsochpdefault, li.ecmsochpdefault, div.ecmsochpdefault
        {mso-style-name:ec_msochpdefault;
        mso-margin-top-alt:auto;
        margin-right:0in;
        mso-margin-bottom-alt:auto;
        margin-left:0in;
        font-size:12.0pt;
        font-family:"Times New Roman","serif";}
span.ecmsohyperlink
        {mso-style-name:ec_msohyperlink;}
span.ecmsohyperlinkfollowed
        {mso-style-name:ec_msohyperlinkfollowed;}
span.ecemailstyle20
        {mso-style-name:ec_emailstyle20;}
p.ecmsonormal1, li.ecmsonormal1, div.ecmsonormal1
        {mso-style-name:ec_msonormal1;
        mso-margin-top-alt:auto;
        margin-right:0in;
        margin-bottom:0in;
        margin-left:0in;
        margin-bottom:.0001pt;
        font-size:12.0pt;
        font-family:"Times New Roman","serif";}
span.ecmsohyperlink1
        {mso-style-name:ec_msohyperlink1;
        color:blue;
        text-decoration:underline;}
span.ecmsohyperlinkfollowed1
        {mso-style-name:ec_msohyperlinkfollowed1;
        color:purple;
        text-decoration:underline;}
span.ecemailstyle201
        {mso-style-name:ec_emailstyle201;
        font-family:"Calibri","sans-serif";
        color:#1F497D;}
p.ecmsochpdefault1, li.ecmsochpdefault1, div.ecmsochpdefault1
        {mso-style-name:ec_msochpdefault1;
        mso-margin-top-alt:auto;
        margin-right:0in;
        mso-margin-bottom-alt:auto;
        margin-left:0in;
        font-size:10.0pt;
        font-family:"Times New Roman","serif";}
span.ececapple-style-span
        {mso-style-name:ec_ecapple-style-span;}
span.ececapple-converted-space
        {mso-style-name:ec_ecapple-converted-space;}
span.EmailStyle30
        {mso-style-type:personal-reply;
        font-family:"Calibri","sans-serif";
        color:#1F497D;}
.MsoChpDefault
        {mso-style-type:export-only;
        font-size:10.0pt;}
@page Section1
        {size:8.5in 11.0in;
        margin:1.0in 1.0in 1.0in 1.0in;}
div.Section1
        {page:Section1;}
 /* List Definitions */
 @list l0
        {mso-list-id:148597673;
        mso-list-type:hybrid;
        mso-list-template-ids:534549390 67698705 67698713 67698715 67698703 67698713 67698715 67698703 67698713 67698715;}
@list l0:level1
        {mso-level-text:"%1\)";
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
ol
        {margin-bottom:0in;}
ul
        {margin-bottom:0in;}
-->
</style>
<!--[if gte mso 9]><xml>
 <o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
 <o:shapelayout v:ext="edit">
  <o:idmap v:ext="edit" data="1" />
 </o:shapelayout></xml><![endif]-->
</head>

<body lang=EN-US link=blue vlink=purple>

<div class=Section1>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>I agree that the issue is fundamental on some networks, but it
is not going to be fundamental across all networks.  I’m more
worried about the “not broken” networks ;-)  Actually, I’m
more worried about the ability to build a not-broken network for the purpose.    And,
having a collective allocation fixes most of the issues, but it does leave some
open questions.  I would like us to update the design goal to acknowledge
that collective allocation is allowed, but not required. <o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'><o:p> </o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>Anyway, as far as what I’m worried about, I’ll use a
very specialized example to make the point...<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'><o:p> </o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>So, I’ll start by assuming shmem, Portals 3.3, a
lightweight kernel, and hardware optimized to try to do that.  I apologize
to those not versed in Portals ;-)<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'><o:p> </o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>Let’s say that you shmem uses Portals to expose the entire
virtual address space with an ME.  The ME is persistent (never changes)
and is the only item on a given portal table entry.  The hardware takes
the head of that list and caches it in an associative matching structure. 
Now you can process a message from the network every cycle…  Oh, the
hardware has to do virtual to physical address translation and it had better do
protection based on whether a virtual page is physically backed or not, but,
wait, I can constrain the kernel to always guarantee that ;-)<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'><o:p> </o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>At the transmitter, let’s say that you can push a (PE, portal
table entry, offset) tuple as the target address directly to the hardware
somehow… the T3E did something like this using E-registers, a centrifuge,
and a machine partitioning approach.  Ok, that takes a change to the
Portals API, but we did that for Portals 4.  Now you can push messages
into the network very quickly too (PE is in the shmem call, portal table entry
is a constant, offset is in virtual address space and is symmetric across nodes,
so it is one or two arithmetic instructions).<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'><o:p> </o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>And, of course, there are platforms like the Cray X1 that had
even better hardware support for this type of operation.<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'><o:p> </o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>Ok, so, how does the proposal we are discussing for MPI differ?<o:p></o:p></span></p>

<p class=MsoListParagraph style='text-indent:-.25in;mso-list:l0 level1 lfo1'><![if !supportLists]><span
style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D'><span
style='mso-list:Ignore'>1)<span style='font:7.0pt "Times New Roman"'>     
</span></span></span><![endif]><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>There is a communicator context.  How much work this is
depends on how much you need from that context.<o:p></o:p></span></p>

<p class=MsoListParagraph style='text-indent:-.25in;mso-list:l0 level1 lfo1'><![if !supportLists]><span
style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D'><span
style='mso-list:Ignore'>2)<span style='font:7.0pt "Times New Roman"'>     
</span></span></span><![endif]><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>There is a “memory object”.  How many of these
are there?  Is there anything that strongly encourages the user to keep it
relatively small?  If not, how do I build good hardware support?  I
can’t have an unlimited number of unique system wide identifiers.<o:p></o:p></span></p>

<p class=MsoListParagraph style='text-indent:-.25in;mso-list:l0 level1 lfo1'><![if !supportLists]><span
style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D'><span
style='mso-list:Ignore'>3)<span style='font:7.0pt "Times New Roman"'>     
</span></span></span><![endif]><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>We have the nagging issue of non-coherent architectures and
heterogeneous architectures.  Ok, that one is probably workable, since vendor
specific MPI implementations may drop both if their architecture is coherent
and homogeneous.  Of course, if it causes us to do something weird with
the completion semantics (ala the ability to defer all actual data transfer to
the end of a window in the current MPI-2 one sided operations), that could be
an issue.<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'><o:p> </o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>So, the telling issue will be:  if you put this proposal on
a Cray X1 (ORNL still have one of those running?), how does it compare to the
message rate of shmem on the same platform?   Perhaps doing such a prototype
would help us advance the discussion more than anything.  We would have a
baseline where we could say “what would it take to get competitive?”. 
Unfortunately, I don’t know of many other places that currently support shmem
well enough to make a good comparison.<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'><o:p> </o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>Keith<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'><o:p> </o:p></span></p>

<div style='border:none;border-left:solid blue 1.5pt;padding:0in 0in 0in 4.0pt'>

<div>

<div style='border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0in 0in 0in'>

<p class=MsoNormal><b><span style='font-size:10.0pt;font-family:"Tahoma","sans-serif"'>From:</span></b><span
style='font-size:10.0pt;font-family:"Tahoma","sans-serif"'> mpi3-rma-bounces@lists.mpi-forum.org
[mailto:mpi3-rma-bounces@lists.mpi-forum.org] <b>On Behalf Of </b>Vinod
tipparaju<br>
<b>Sent:</b> Friday, September 04, 2009 9:41 AM<br>
<b>To:</b> MPI 3.0 Remote Memory Access working group<br>
<b>Subject:</b> Re: [Mpi3-rma] MPI3 RMA Design Goals<o:p></o:p></span></p>

</div>

</div>

<p class=MsoNormal><o:p> </o:p></p>

<p class=MsoNormal><span lang=EN style='font-size:10.0pt;font-family:"Verdana","sans-serif"'>hence
the word "similar" to MDBind. MDBind can potentially a lot more as
far as preparing the network. Binding is for reducing latency, for no other
reason. I agree that in your example of random target communication, it is not
useful.<br>
 <br>
You are right and I have always agreed that when you do need such a table of
mem-object data structures or a table of pointers under the hood of the
implementation of collective mem-object data structures, random accesses will
incur cache misses and latency. I get that. My points are(please let me know if
you disagree to any of the bullets):<o:p></o:p></span></p>

<p class=MsoNormal style='margin-left:.5in'><span lang=EN style='font-size:
10.0pt;font-family:"Verdana","sans-serif"'>1) the problem doesn't exist if collective
memobjs are used and the memobjs can internally allocate symmetric memory (same
pointers and same "keys" across the system).<br>
2) This is a more fundamental problem associated with if it is possible to
allocate symmetric memory and corresponding symmetric key-ids on networks that
require them. <br>
3) this problem is same to shmem and these interfaces. <br>
4) this problem of cache misses for random target communication call will occur
if: a)the implementation of a collective object requires an array of keys or
memory region identifies for sake of communication and creation of a single key
is not possible, b) an array of pointers is required on this system instead of
1 symmetric pointer, and, c) if, say, ofed will have different keys on each
node and the user cannot pass a number that he/she wants to be the key value
-- there is nothing, you, me or any interface (the
"users") can do to fix this.<o:p></o:p></span></p>

<p class=MsoNormal><span lang=EN style='font-size:10.0pt;font-family:"Verdana","sans-serif"'>To
me it seems like we are discussing about a case that is a fundamental problem
that we don’t have control over. I cannot see how you would define an
interface that will not have cache misses for random target
communication in case of 4).<br>
 <br>
<br>
So the question is are these interfaces causing cache misses when they can be
avoided otherwise? I don’t think so. Do you? If you agree, we have
concluded the discussion.<br>
</span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'> <br>
--<br>
Vinod Tipparaju ^ http://ft.ornl.gov/~vinod ^ 1-865-241-1802<br>
<br>
<br>
 <o:p></o:p></span></p>

<div class=MsoNormal align=center style='text-align:center'><span
style='font-size:10.0pt;font-family:"Verdana","sans-serif"'>

<hr size=2 width="100%" align=center id=stopSpelling>

</span></div>

<p class=MsoNormal style='margin-bottom:12.0pt'><span style='font-size:10.0pt;
font-family:"Verdana","sans-serif"'>From: keith.d.underwood@intel.com<br>
To: mpi3-rma@lists.mpi-forum.org<br>
Date: Fri, 4 Sep 2009 08:51:12 -0600<br>
Subject: Re: [Mpi3-rma] MPI3 RMA Design Goals<o:p></o:p></span></p>

<div>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>Bill’s proposal was for a persistent handle for transmit
operations that included things like target rank.  I think there is some
merit in that, though we need to evaluate the trade-offs.  Unfortunately,
that does not really do anything for the random communication
scenario.   In the non-random communication scenarios, I’m a
lot less worried about things like random lookups of local data structures
getting in the way.  </span><span style='font-size:10.0pt;font-family:
"Verdana","sans-serif"'><o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'><br>
Bill’s proposal was nothing like MDBind in Portals, and MDBind does
nothing to help the issue I was concerned about.  Specifically, MDBind
only associates local options and a local memory region with a handle.  It
says nothing about the target node or the target memory region.  It is the
lookup of information associated with the target node and target memory region
that I am worried about and that is addressed by Bill’s proposal for the
non-random access case.</span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>Keith</span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

<div style='border:none;border-left:solid blue 1.5pt;padding:0in 0in 0in 4.0pt'>

<div>

<div style='border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0in 0in 0in'>

<p class=MsoNormal><b><span style='font-size:10.0pt;font-family:"Tahoma","sans-serif"'>From:</span></b><span
style='font-size:10.0pt;font-family:"Tahoma","sans-serif"'>
mpi3-rma-bounces@lists.mpi-forum.org
[mailto:mpi3-rma-bounces@lists.mpi-forum.org] <b>On Behalf Of </b>Vinod
tipparaju<br>
<b>Sent:</b> Thursday, September 03, 2009 10:39 PM<br>
<b>To:</b> MPI 3.0 Remote Memory Access working group<br>
<b>Subject:</b> Re: [Mpi3-rma] MPI3 RMA Design Goals</span><span
style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'> <o:p></o:p></span></p>

<p class=MsoNormal style='margin-bottom:12.0pt'><span style='font-size:10.0pt;
font-family:"Verdana","sans-serif"'>I forgot to include an important parameter
(communicator) in the psuedo interface below:<br>
<span class=ececapple-style-span><span style='color:black'>Create_memobj_collective(IN
user_ptr, IN size, IN communicator, OUT mem_obj)</span></span><span
style='color:black'><br>
<br>
<span class=ececapple-style-span>In addition to this Bill suggested Bind
interface (something similar to MDBind in portals) that would help reduce
latency for commonly re-used RMAs.</span><br>
<br>
<br>
</span>Vinod Tipparaju ^ http://ft.ornl.gov/~vinod ^ 1-865-241-1802<br>
<br>
<o:p></o:p></span></p>

<div class=MsoNormal align=center style='text-align:center'><span
style='font-size:10.0pt;font-family:"Verdana","sans-serif"'>

<hr size=2 width="100%" align=center>

</span></div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'>From:
wgropp@illinois.edu<br>
To: mpi3-rma@lists.mpi-forum.org<br>
Date: Thu, 3 Sep 2009 13:20:38 -0500<br>
Subject: Re: [Mpi3-rma] MPI3 RMA Design Goals<br>
<br>
Design goal one allows collective creation of objects; its there because many
important algorithms don't have collective (over MPI_COMM_WORLD) allocation
semantics, and a design that *requires* collective creation of memory objects
will also limit the use of the interface.<o:p></o:p></span></p>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'> <o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'>Bill<o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'> <o:p></o:p></span></p>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'>On
Sep 3, 2009, at 10:49 AM, Underwood, Keith D wrote:<o:p></o:p></span></p>

</div>

<blockquote style='margin-top:5.0pt;margin-bottom:5.0pt'>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'> <o:p></o:p></span></p>

<div>

<div>

<div>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>My commentary was on the design goals…  if we<span
class=ececapple-converted-space> </span><b><i>allow</i></b><span
class=ececapple-converted-space> </span>collective creation of memory
objects, and design goal #1 simply says we don’t<span
class=ececapple-converted-space> </span><b><i>require</i></b><span
class=ececapple-converted-space> </span>it, that may be ok.  Design
goal #1 could be interpreted to mean that you wouldn’t have collective
creation in the semantic at all.  Do you really anticipate one data type
for an object that is either collectively or non-collectively created? </span><span
style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>I strongly disagree with your assertion that you can communicate
with no cache misses for the non-collectively allocated memory object.  In
a non-collectively allocated case, you will have to keep an array of these on
every process, right?  i.e. one for every process you are communicating
with?  Randomly indexing that array is going to pound on your cache.</span><span
style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>We need to make sure that we don’t ignore the overhead
having multiple communicators and heterogeneity.  Yes, I think there are
ways around this, but we should at least consider what is practical and likely
rather than just what is possible.</span><span style='font-size:10.0pt;
font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>Keith</span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div style='border:none;border-left:solid blue 1.5pt;padding:0in 0in 0in 4.0pt'>

<div>

<div style='border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0in 0in 0in'>

<div>

<p class=MsoNormal><b><span style='font-size:10.0pt;font-family:"Tahoma","sans-serif";
color:black'>From:</span></b><span class=ececapple-converted-space><span
style='font-size:10.0pt;font-family:"Tahoma","sans-serif";color:black'> </span></span><span
style='font-size:10.0pt;font-family:"Tahoma","sans-serif";color:black'><a
href="mailto:mpi3-rma-bounces@lists.mpi-forum.org">mpi3-rma-bounces@lists.mpi-forum.org</a><span
class=ececapple-converted-space> </span>[<a
href="mailto:mpi3-rma-bounces@lists.mpi-forum.org">mailto:mpi3-rma-bounces@lists.mpi-forum.org</a>]<span
class=ececapple-converted-space> </span><b>On Behalf Of<span
class=ececapple-converted-space> </span></b>Vinod tipparaju<br>
<b>Sent:</b><span class=ececapple-converted-space> </span>Tuesday,
September 01, 2009 10:15 AM<br>
<b>To:</b><span class=ececapple-converted-space> </span>MPI 3.0 Remote
Memory Access working group<br>
<b>Subject:</b><span class=ececapple-converted-space> </span>Re:
[Mpi3-rma] MPI3 RMA Design Goals</span><span style='font-size:10.0pt;
font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

</div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>You are correct in trying to look at the best possible case and
estimating cache-misses/performance-bottlenecks. However, personally don't
see any difference between this and shmem. When you cannot really allocate
symmetric memory underneath, the amount of bookkeeping is same in both cases.
When there is no heterogeneity, the check for this can be disabled at MPI
startup. When there is heterogeneity we cannot compare with shmem.</span><span
style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>I cannot imagine not having symmetric/collective memory object
creation to support these RMA interfaces, I think it is a must-have. Sorry I
have only been saying we should have these interfaces but haven't given any
example for this yet. Given how many times this same issue is coming up, I will
do it now.</span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>Consider the creation interfaces:</span><span style='font-size:
10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>Create_memobj(IN user_ptr, IN size, OUT mem_obj)</span><span
style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>Create_memobj_collective(user_ptr, size, OUT mem_obj)</span><span
style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>Assign_memobj(IN/OUT mem_obj, IN user_address, IN size) </span><span
style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>There will be more details on how a mem object which is a result
of create_memobj on process A will be exchanged with process B. When it is
exchanged explicitly, the heterogeneity information can be created at process
B. </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>Now take the example with symmetric object:</span><span
style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>Process A                
                     
                       
        </span><span style='font-size:10.0pt;
font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>myptr = allocate(mysize);</span><span style='font-size:10.0pt;
font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>Create_memobj_collective(myptr,mysize, all_obj);</span><span
style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>Do all kinds of RMA_Xfers</span><span style='font-size:10.0pt;
font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>and an example without symmetric object:</span><span
style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>myptr = allocate(mysize);</span><span style='font-size:10.0pt;
font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>Create_memobj(myptr,mysize,my_obj);</span><span style='font-size:
10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'> ----exchange objects here----</span><span style='font-size:
10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>do all kinds of RAM_Xfers</span><span style='font-size:10.0pt;
font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>In both cases, I can see being able to communicate without any
cache misses for mem_obj.</span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<p class=MsoNormal style='margin-bottom:12.0pt'><span style='font-size:10.0pt;
font-family:"Verdana","sans-serif";color:black'><br>
Vinod Tipparaju ^<span class=ececapple-converted-space> </span><a
href="http://ft.ornl.gov/~vinod">http://ft.ornl.gov/~vinod</a><span
class=ececapple-converted-space> </span>^ 1-865-241-1802</span><span
style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

<div class=MsoNormal align=center style='text-align:center'><span
style='font-size:10.0pt;font-family:"Verdana","sans-serif";color:black'>

<hr size=2 width="100%" align=center>

</span></div>

<p class=MsoNormal style='margin-bottom:12.0pt'><span style='font-size:10.0pt;
font-family:"Verdana","sans-serif";color:black'>From:<span
class=ececapple-converted-space> </span><a
href="mailto:keith.d.underwood@intel.com">keith.d.underwood@intel.com</a><br>
To:<span class=ececapple-converted-space> </span><a
href="mailto:mpi3-rma@lists.mpi-forum.org">mpi3-rma@lists.mpi-forum.org</a><br>
Date: Tue, 1 Sep 2009 09:07:41 -0600<br>
Subject: Re: [Mpi3-rma] MPI3 RMA Design Goals</span><span style='font-size:
10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

<div>

<div>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>If we take the SINGLE_RMA_INTERFACE_DRAFT_PROPOSAL as an example,
and combine it with the draft design goal #1:<span
class=ececapple-converted-space> </span></span><span style='font-size:
10.0pt;font-family:"Verdana","sans-serif";color:black'>In order to support RMA
to arbitrary locations, no constraints on memory, such as symmetric allocation
or collective window creation, can be required</span><span style='font-size:
10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>We get an interesting view on how difficult it can be to get
“close to the metal”.  So, for MPI_RMA_xfer, we have to assume
that the user has some array of target_mem data items.  That means the
sequence of steps in user space is:</span><span style='font-size:10.0pt;
font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>target_mem = ranks[dest];</span><span style='font-size:10.0pt;
font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>MPI_RMA_xfer(… target_mem, dest…);</span><span
style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>If we assume that the message sizes are small and the destinations
randomly selected and the machine is large… every access to ranks is a
cache miss, and we cannot prevent that by providing fancy hardware.  This
actually leads me to believe that we may need to reconsider design goal #1, or
at least clarify what it means in a way that makes the access more efficient.</span><span
style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>MPI_RMA_xfer itself is no picnic either.  If we take the
draft design goal #5: The RMA model must support non-cache-coherent and
heterogeneous environments, then MPI is required to maintain a data structure
for every rank (ok, it has to do this anyway, but we are trying to get close to
the metal) and do a lookup into that data structure with every MPI_RMA_xfer to
find out if the target is heterogeneous relative to the target rank –
another cache miss.  Now, nominally, since this is inside MPI, a lower
layer could absorb that check… or, a given MPI could refuse to support
heterogeneity or… but, you get the idea. </span><span
style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>So, we’ve got two cache line loads for every transfer. 
One in the application and one in the MPI library.  One is impossible to
move to the hardware and the other is simply very difficult to move. </span><span
style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>For a contrast, look at SHMEM.  Assume homogeneous, only one
communicator context, and hardware mapping of ranks to physical
locations.  A shmem_put() of a short item could literally be turned into a
few instructions and a processor store (on machines that supported such
things).  Personally, I think we will have done well if we can get to the
point that a reasonable hardware implementation can get MPI RMA to within 2x of
a reasonable SHMEM implementation.  I think we have a long way to go to
get there.</span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>Keith</span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div style='border:none;border-left:solid blue 1.5pt;padding:0in 0in 0in 4.0pt'>

<div>

<div style='border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0in 0in 0in'>

<div>

<p class=MsoNormal><b><span style='font-size:10.0pt;font-family:"Tahoma","sans-serif";
color:black'>From:</span></b><span class=ececapple-converted-space><span
style='font-size:10.0pt;font-family:"Tahoma","sans-serif";color:black'> </span></span><span
style='font-size:10.0pt;font-family:"Tahoma","sans-serif";color:black'><a
href="mailto:mpi3-rma-bounces@lists.mpi-forum.org">mpi3-rma-bounces@lists.mpi-forum.org</a><span
class=ececapple-converted-space> </span>[<a
href="mailto:mpi3-rma-bounces@lists.mpi-forum.org">mailto:mpi3-rma-bounces@lists.mpi-forum.org</a>]<span
class=ececapple-converted-space> </span><b>On Behalf Of<span
class=ececapple-converted-space> </span></b>Vinod tipparaju<br>
<b>Sent:</b><span class=ececapple-converted-space> </span>Tuesday,
September 01, 2009 5:23 AM<br>
<b>To:</b><span class=ececapple-converted-space> </span>MPI 3.0 Remote
Memory Access working group<br>
<b>Subject:</b><span class=ececapple-converted-space> </span>Re:
[Mpi3-rma] MPI3 RMA Design Goals</span><span style='font-size:10.0pt;
font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

</div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>Good points! RMA interfaces should do nothing to prevent utilizing
a high message rate (or low overhead communication) that the underlying
hardware may offer. To ensure this happens, there should always be a
unrestricted path (lets call it this for now, people have called it a
"thin layer", "direct access") to the network. </span><span
style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>This means, despite the fact the the RMA interface has features
that abstract out complexity by providing useful characteristics such as
ordering and atomicity, it (the RMA interface) should always have this
unrestricted, close to the heart of the hardware, path. To achieve this, the
unrestricted path should not require any book keeping (from implementation
perspective) in relation to the feature-rich path or vice-versa.  </span><span
style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>I believe this is what we have demonstrated with the example
interfaces hence the null set isn't the case here :-). I will distribute an
example implementation very soon so people can get a feel.</span><span
style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>---</span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<div>

<div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";
color:black'>Vinod Tipparaju ^<span class=ececapple-converted-space> </span><a
href="http://ft.ornl.gov/~vinod">http://ft.ornl.gov/~vinod</a><span
class=ececapple-converted-space> </span>^ 1-865-241-1802<br>
<br>
<br>
<br>
> From:<span class=ececapple-converted-space> </span><a
href="mailto:keith.d.underwood@intel.com">keith.d.underwood@intel.com</a><br>
> To:<span class=ececapple-converted-space> </span><a
href="mailto:mpi3-rma@lists.mpi-forum.org">mpi3-rma@lists.mpi-forum.org</a><br>
> Date: Mon, 31 Aug 2009 16:17:28 -0600<br>
> Subject: Re: [Mpi3-rma] MPI3 RMA Design Goals<br>
><span class=ececapple-converted-space> </span><br>
> There has been stunning silence since this email, so I will go ahead and
toss out a thought...<br>
><span class=ececapple-converted-space> </span><br>
> In the draft design goals, I don't see two issues that I see as key. The
first is "support for high message rate/low overhead communications to
random targets". As best I can tell, this is one of the key places were
the existing one-sided operations are perceived as falling down for existing
customers of SHMEM/PGAS. The second is "elimination of the access epoch
requirement". This one may be, um, more controversial, but I believe it is
part and parcel with the first one. That is, the first one is not that valuable
if the programming model requires an excessive amount of access epoch opens and
closes just to force the global visibility of the operations. Unfortunately,
the intersection of this solution space with the solution space for the current
draft design goal #5 (support non-cache-coherent and heterogeneous
environments) may be the null set... I will hold out hope that this isn't the
case ;-)<br>
><span class=ececapple-converted-space> </span><br>
> Keith<span class=ececapple-converted-space> </span><br>
><span class=ececapple-converted-space> </span><br>
> > -----Original Message-----<br>
> > From: <a href="mailto:mpi3-rma-bounces@lists.mpi-forum.org">mpi3-rma-bounces@lists.mpi-forum.org</a>
[<a href="mailto:mpi3-rma-">mailto:mpi3-rma-</a><br>
> ><span class=ececapple-converted-space> </span><a
href="mailto:bounces@lists.mpi-forum.org">bounces@lists.mpi-forum.org</a>] On
Behalf Of William Gropp<br>
> > Sent: Wednesday, August 05, 2009 12:37 PM<br>
> > To:<span class=ececapple-converted-space> </span><a
href="mailto:mpi3-rma@lists.mpi-forum.org">mpi3-rma@lists.mpi-forum.org</a><br>
> > Subject: [Mpi3-rma] MPI3 RMA Design Goals<br>
> ><span class=ececapple-converted-space> </span><br>
> > I've added versions of the RMA design goals that we discussed at the<br>
> > Forum meeting last week to the wiki page for our group (<br>
> ><span class=ececapple-converted-space> </span><a
href="https://svn.mpi-forum.org/trac/mpi-forum-web/wiki/RmaWikiPage">https://svn.mpi-forum.org/trac/mpi-forum-web/wiki/RmaWikiPage</a><br>
> > ). This is a draft; lets discuss these. Also, feel free to add to<br>
> > the discussion, particularly in the background section.<br>
> ><span class=ececapple-converted-space> </span><br>
> > Bill<br>
> ><span class=ececapple-converted-space> </span><br>
> > William Gropp<br>
> > Deputy Director for Research<br>
> > Institute for Advanced Computing Applications and Technologies<br>
> > Paul and Cynthia Saylor Professor of Computer Science<br>
> > University of Illinois Urbana-Champaign<br>
> ><span class=ececapple-converted-space> </span><br>
> ><span class=ececapple-converted-space> </span><br>
> ><span class=ececapple-converted-space> </span><br>
> ><span class=ececapple-converted-space> </span><br>
> > _______________________________________________<br>
> > mpi3-rma mailing list<br>
> ><span class=ececapple-converted-space> </span><a
href="mailto:mpi3-rma@lists.mpi-forum.org">mpi3-rma@lists.mpi-forum.org</a><br>
> ><span class=ececapple-converted-space> </span><a
href="http://lists.mpi-forum.org/mailman/listinfo.cgi/mpi3-rma">http://lists.mpi-forum.org/mailman/listinfo.cgi/mpi3-rma</a><br>
><span class=ececapple-converted-space> </span><br>
> _______________________________________________<br>
> mpi3-rma mailing list<br>
><span class=ececapple-converted-space> </span><a
href="mailto:mpi3-rma@lists.mpi-forum.org">mpi3-rma@lists.mpi-forum.org</a><br>
><span class=ececapple-converted-space> </span><a
href="http://lists.mpi-forum.org/mailman/listinfo.cgi/mpi3-rma">http://lists.mpi-forum.org/mailman/listinfo.cgi/mpi3-rma</a></span><span
style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

</div>

</div>

</div>

</div>

</div>

</div>

</div>

<p class=MsoNormal><span style='font-size:13.5pt;font-family:"Helvetica","sans-serif";
color:black'><ATT00001.txt></span><span style='font-size:10.0pt;
font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

</blockquote>

</div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'> <o:p></o:p></span></p>

<div>

<div>

<div>

<div>

<p class=MsoNormal><span style='font-size:9.0pt;font-family:"Helvetica","sans-serif";
color:black'>William Gropp</span><span style='font-size:10.0pt;font-family:
"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:9.0pt;font-family:"Helvetica","sans-serif";
color:black'>Deputy Director for Research</span><span style='font-size:10.0pt;
font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:9.0pt;font-family:"Helvetica","sans-serif";
color:black'>Institute for Advanced Computing Applications and Technologies</span><span
style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:9.0pt;font-family:"Helvetica","sans-serif";
color:black'>Paul and Cynthia Saylor Professor of Computer Science</span><span
style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:9.0pt;font-family:"Helvetica","sans-serif";
color:black'>University of Illinois Urbana-Champaign</span><span
style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

<div>

<p class=MsoNormal><span style='font-size:9.0pt;font-family:"Helvetica","sans-serif";
color:black'> </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'><o:p></o:p></span></p>

</div>

</div>

<p class=MsoNormal style='margin-bottom:12.0pt'><span style='font-size:10.0pt;
font-family:"Verdana","sans-serif"'><o:p> </o:p></span></p>

</div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'> <o:p></o:p></span></p>

</div>

<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif"'> <o:p></o:p></span></p>

</div>

</div>

</div>

</div>

</div>

</body>

</html>