6#ifndef BRICK_BRICK_MPI_H 
    7#define BRICK_BRICK_MPI_H 
   10#include <unordered_map> 
   49template<
typename T, 
unsigned dim, 
unsigned d>
 
   52template<
typename T, 
unsigned dim>
 
   64template<
typename T, 
unsigned dim, 
unsigned d>
 
   86  typedef std::vector<std::pair<int, void *>> 
Dest;
 
   97    std::vector<MPI_Request> requests(
seclen.size() * 2);
 
   98    std::vector<MPI_Status> stats(requests.size());
 
  100#ifdef BARRIER_TIMESTEP 
  104    double st = omp_get_wtime(), ed;
 
  106    for (
int i = 0; i < 
seclen.size(); ++i) {
 
  109                &(requests[i << 1]));
 
  112                &(requests[(i << 1) + 1]));
 
  115    ed = omp_get_wtime();
 
  119    MPI_Waitall(
static_cast<int>(requests.size()), requests.data(), stats.data());
 
  121    ed = omp_get_wtime();
 
  146#ifdef BARRIER_TIMESTEP 
  150    double st = omp_get_wtime(), wtime = 0;
 
  151    for (
long i = 0; i < 
send.size(); ++i) {
 
  152      std::vector<MPI_Request> requests(
send[i].size() + 
recv[i].size());
 
  153      for (
long j = 0; j < 
recv[i].size(); ++j)
 
  154        MPI_Irecv(
recv[i][j].buf, 
recv[i][j].len, MPI_CHAR, 
recv[i][j].rank, j, 
comm, &(requests[j]));
 
  155      for (
long j = 0; j < 
send[i].size(); ++j)
 
  156        MPI_Isend(
send[i][j].buf, 
send[i][j].len, MPI_CHAR, 
send[i][j].rank, j, 
comm, &(requests[
recv[i].size() + j]));
 
  157      std::vector<MPI_Status> stats(requests.size());
 
  158      double stime = omp_get_wtime();
 
  159      MPI_Waitall(
static_cast<int>(requests.size()), requests.data(), stats.data());
 
  160      wtime += omp_get_wtime() - stime;
 
  178template<
unsigned dim,
 
  210  template<
typename T, 
unsigned di, 
unsigned d>
 
  225    if (region.
get(d + 1)) {
 
  229    if (region.
get(-d - 1)) {
 
  234      for (
unsigned i = 0; i < 
g_depth[d]; ++i)
 
  237      for (
unsigned i = 2 * 
g_depth[d]; i < 
dims[d]; ++i)
 
  245    for (
long d = 0; d < dim; ++d) {
 
  246      if (owner.
get(d + 1))
 
  249      if (owner.
get(-d - 1))
 
  258    for (
long d = 1; d <= dim; ++d)
 
  259      if (region.
get(d) || region.
get(-d))
 
  274      for (
int i = 0; i < 3; ++i) {
 
  275        if (i + cur < 1 || i + cur > 
t_dims[d] || ref < 0)
 
  278          adj[idx + i] = 
grid[ref + i - 1];
 
  281      for (
int i = 0; i < 3; ++i)
 
  282        if (i + cur < 1 || i + cur > 
t_dims[d] || ref < 0)
 
  306    assert(
dims.size() == dim);
 
  307    std::vector<unsigned> bdims = {BDims ...};
 
  309    std::reverse(bdims.begin(), bdims.end());
 
  311    for (
int i = 0; i < dim; ++i) {
 
  312      assert(depth % bdims[i] == 0);
 
  313      g_depth.emplace_back(depth / bdims[i]);
 
  314      this->dims[i] /= bdims[i];
 
  327    unsigned grid_size = 1;
 
  333    for (
int i = 0; i < dim; ++i) {
 
  340    grid = 
new unsigned[grid_size];
 
  342    int pagesize = sysconf(_SC_PAGESIZE);
 
  345    if (std::max(bSize, pagesize) % std::min(bSize, pagesize) != 0)
 
  346      throw std::runtime_error(
"brick size must be a factor/multiple of pagesize.");
 
  349    if (bSize < pagesize)
 
  350      factor = pagesize / bSize;
 
  353    unsigned pos = factor;
 
  355    auto mypop = [&pos, &factor, 
this](
BitSet owner, 
BitSet region) {
 
  357#ifndef DECOMP_PAGEUNALIGN 
  358      pos = (pos + factor - 1) / factor * factor;
 
  362    auto calc_pad = [&factor, 
this](
const BitSet ®ion) -> 
long {
 
  363#ifdef DECOMP_PAGEUNALIGN 
  370    std::vector<unsigned> st_pos;  
 
  371    std::vector<bool> pad_first;
 
  374    for (
long i = 0; i < 
skinlist.size(); ++i) {
 
  378      pad_first.push_back(((last & 
skinlist[i]).size() < (
skinlist[i] & next).size()));
 
  385    st_pos.emplace_back(pos);
 
  390    for (
long i = 0; i < 
skinlist.size(); ++i) {
 
  396      st_pos.emplace_back(pos);
 
  404    std::vector<BitSet> neighbors;
 
  408    for (
auto n: neighbors) {
 
  418      for (
int l = 0; l < 
skinlist.size(); ++l)
 
  422            i.skin_st = g.
skin_st = (unsigned) last;
 
  425            i.pos = st_pos[last];
 
  430        } 
else if (last >= 0) {
 
  432          i.last_pad = g.
last_pad = pad_first[last - 1] ? 0 : calc_pad(
skinlist[last - 1]);
 
  435          ghost.emplace_back(g);
 
  436          i.len = st_pos[last] - i.pos;
 
  437          i.skin_ed = (unsigned) last;
 
  438          skin.emplace_back(i);
 
  443        i.last_pad = g.
last_pad = pad_first[last - 1] ? 0 : calc_pad(
skinlist[last - 1]);
 
  446        ghost.emplace_back(g);
 
  447        i.len = st_pos[
skinlist.size()] - i.pos;
 
  448        i.skin_ed = (unsigned) last;
 
  449        skin.emplace_back(i);
 
  456    if (
bInfo == 
nullptr)
 
  458    for (
unsigned i = 0; i < grid_size; ++i)
 
  467    std::vector<MPI_Request> requests(
ghost.size() * 2);
 
  468    std::vector<MPI_Status> stats(requests.size());
 
  470#ifdef BARRIER_TIMESTEP 
  474    double st = omp_get_wtime(), ed;
 
  476    for (
int i = 0; i < 
ghost.size(); ++i) {
 
  478      MPI_Irecv(&(bStorage.
dat.get()[(
ghost[i].pos + 
ghost[i].first_pad) * bStorage.
step]),
 
  482      MPI_Isend(&(bStorage.
dat.get()[(
skin[i].pos + 
skin[i].first_pad) * bStorage.
step]),
 
  484                MPI_CHAR, 
rank_map[
skin[i].neighbor.set], i, 
comm, &(requests[(i << 1) + 1]));
 
  487    ed = omp_get_wtime();
 
  491    MPI_Waitall(
static_cast<int>(requests.size()), requests.data(), stats.data());
 
  493    ed = omp_get_wtime();
 
  530    std::vector<BitSet> neighbors;
 
  534    std::vector<size_t> seclen;
 
  535    std::vector<size_t> first_pad;
 
  543    for (
auto n: neighbors) {
 
  548      std::vector<size_t> packing;
 
  550      long first_pad_v = -1;
 
  552      for (
auto g: 
ghost) {
 
  553        if (g.neighbor.set == n.set) {
 
  555            first_pad_v = g.first_pad * bStorage.
step * 
sizeof(
bElem);
 
  556          last_pad = g.last_pad * bStorage.
step * 
sizeof(
bElem);
 
  557          packing.push_back(g.pos * bStorage.
step * 
sizeof(
bElem));
 
  558          size_t l = g.len * bStorage.
step * 
sizeof(
bElem);
 
  559          packing.push_back(l);
 
  563      recv.push_back(std::make_pair(
rank_map[n.set], memfd->packed_pointer(packing)));
 
  566      first_pad.emplace_back(first_pad_v);
 
  567      seclen.push_back(len);
 
  572        if (s.neighbor.set == in.
set && s.len) {
 
  573          packing.push_back(s.pos * bStorage.
step * 
sizeof(
bElem));
 
  574          packing.push_back(s.len * bStorage.
step * 
sizeof(
bElem));
 
  576      send.push_back(std::make_pair(
rank_map[in.
set], memfd->packed_pointer(packing)));
 
  592    std::vector<MultiStageExchangeView::Stage> send, recv;
 
  596    std::vector<BitSet> neighbors;
 
  604    for (
int d = 1; d <= dim; ++d) {
 
  605      BitSet toexchange = exchanged;
 
  609      toexchange = toexchange | exchanging;
 
  613      for (
auto &n: neighbors)
 
  614        if (n <= exchanging && n.
set != 0) {  
 
  617          std::vector<size_t> rpacking;
 
  618          std::vector<size_t> spacking;
 
  621            if (g.neighbor.set == n.set) {
 
  622              rpacking.push_back(g.pos * bStorage.
step * 
sizeof(
bElem));
 
  624              rpacking.push_back(l);
 
  629            if (s.neighbor.set == Sn.
set) {
 
  630              spacking.push_back(s.pos * bStorage.
step * 
sizeof(
bElem));
 
  631              spacking.push_back(s.len * bStorage.
step * 
sizeof(
bElem));
 
  634          BitSet n2 = n | exchanged;
 
  635          for (
auto &g: 
ghost) {
 
  636            if (g.neighbor <= n2 && !(g.neighbor <= exchanged) && !(g.neighbor.set == n.set)) {
 
  638              rpacking.push_back(g.pos * bStorage.
step * 
sizeof(
bElem));
 
  639              size_t l = g.len * bStorage.
step * 
sizeof(
bElem);
 
  640              rpacking.push_back(l);
 
  645              for (
int sec = g.skin_st; sec < g.skin_ed; ++sec)
 
  648                  if (g2.neighbor.set == s.
set)
 
  649                    if (sec >= g2.skin_st && sec < g2.skin_ed) {
 
  651                      for (
int j = g2.skin_st; j < sec; ++j)
 
  653                      int last = spacking.size();
 
  654                      size_t fst = pos * bStorage.
step * 
sizeof(
bElem);
 
  656                      if (spacking[last - 2] + spacking[last - 1] == fst)
 
  657                        spacking[last - 1] += seclen;
 
  659                        spacking.push_back(fst);
 
  660                        spacking.push_back(seclen);
 
  666          recvpkg.
buf = memfd->packed_pointer(rpacking);
 
  668          recv.back().emplace_back(recvpkg);
 
  671          for (
int i = 0; i < spacking.size(); i += 2)
 
  672            ll += spacking[i + 1];
 
  674          sendpkg.
buf = memfd->packed_pointer(spacking);
 
  676          send.back().emplace_back(sendpkg);
 
  678      exchanged = toexchange;
 
  689    double st = omp_get_wtime(), ed;
 
  691    MPI_Win_fence(0, win);
 
  693    ed = omp_get_wtime();
 
  697    for (
int i = 0; i < 
ghost.size(); ++i) {
 
  701              skin[i].pos * bStorage.
step * 
sizeof(
bElem), len, MPI_CHAR, win);
 
  704    ed = omp_get_wtime();
 
  708    MPI_Win_fence(0, win);
 
  710    ed = omp_get_wtime();
 
  730template<
unsigned dim, 
unsigned ...BDims>
 
  734    MPI_Cart_rank(comm, coo, &rank);
 
  743  populate(comm, bDecomp, neighbor, d + 1, coo);
 
  747  populate(comm, bDecomp, neighbor, d + 1, coo);
 
  751  populate(comm, bDecomp, neighbor, d + 1, coo);
 
  759  double min, max, 
avg, sigma;
 
  770  if (comm == MPI_COMM_NULL)
 
  773  MPI_Comm_size(comm, &size);
 
  774  MPI_Reduce(&stats, &ret.
avg, 1, MPI_DOUBLE, MPI_SUM, 0, comm);
 
  776  MPI_Reduce(&stats, &ret.
max, 1, MPI_DOUBLE, MPI_MAX, 0, comm);
 
  777  MPI_Reduce(&stats, &ret.
min, 1, MPI_DOUBLE, MPI_MIN, 0, comm);
 
  778  stats = stats * stats;
 
  779  MPI_Reduce(&stats, &ret.
sigma, 1, MPI_DOUBLE, MPI_SUM, 0, comm);
 
  782  ret.
sigma /= std::max(size - 1, 1);
 
  791  os << 
"[" << stats.
min << 
", " << stats.
avg << 
", " << stats.
max << 
"]" << 
" (σ: " << stats.
sigma << 
")";
 
void allneighbors(BitSet cur, long idx, long dim, std::vector< BitSet > &neighbors)
Enumerate all neighbors.
Definition: brick-mpi.cpp:9
void populate(MPI_Comm &comm, BrickDecomp< dim, BDims... > &bDecomp, BitSet neighbor, int d, int *coo)
Populate neighbor-rank map for BrickDecomp using MPI_Comm.
Definition: brick-mpi.h:731
mpi_stats mpi_statistics(double stats, MPI_Comm comm)
Definition: brick-mpi.h:768
double waittime
Definition: brick-mpi.h:23
std::vector< BitSet > skin3d_bad
Definition: brick-mpi.h:804
double calltime
Definition: brick-mpi.h:23
std::vector< BitSet > skin3d_good
Optimized surface ordering for 3D.
Definition: brick-mpi.cpp:25
double movetime
Definition: brick-mpi.h:23
std::ostream & operator<<(std::ostream &os, const mpi_stats &stats)
pretty print an mpi_stats object
Definition: brick-mpi.h:790
std::vector< BitSet > skin3d_normal
Definition: brick-mpi.cpp:54
double packtime
Definition: brick-mpi.cpp:7
double calctime
Definition: brick-mpi.h:23
Decomposition for MPI communication.
Definition: brick-mpi.h:180
std::vector< unsigned > g_depth
The depth of ghostzone in bricks.
Definition: brick-mpi.h:204
unsigned * grid
Grid indices.
Definition: brick-mpi.h:206
std::vector< g_region > skin
surface regions record
Definition: brick-mpi.h:195
void exchange(BrickStorage &bStorage)
Minimal PUT exchange without mmap.
Definition: brick-mpi.h:466
void initialize(const std::vector< BitSet > &skinlist)
initialize the decomposition using skinlist
Definition: brick-mpi.h:322
void populate(BitSet owner, BitSet region, unsigned &pos)
Definition: brick-mpi.h:241
BrickInfo< dim > * bInfo
Associated BrickInfo.
Definition: brick-mpi.h:208
BrickDecomp(const std::vector< unsigned > &dims, const unsigned depth, unsigned numfield=1)
MPI decomposition for bricks.
Definition: brick-mpi.h:304
MPI_Comm comm
MPI communicator it is attached to.
Definition: brick-mpi.h:294
MultiStageExchangeView multiStageExchangeView(BrickStorage bStorage)
Create a view for SHIFT exchange (mmap)
Definition: brick-mpi.h:587
grid_access< mytype, dim, dim - 1 > operator[](int i)
Accessing grid indices using []
Definition: brick-mpi.h:502
std::vector< BitSet > skinlist
the order of skin
Definition: brick-mpi.h:197
BrickDecomp< dim, BDims... > mytype
shorthand for type of this instance
Definition: brick-mpi.h:200
std::vector< unsigned > dims
dimension of internal in bricks
Definition: brick-mpi.h:202
void _populate(BitSet region, long ref, int d, unsigned &pos)
Definition: brick-mpi.h:218
ExchangeView exchangeView(BrickStorage bStorage)
Create a view for PUT exchange (mmap)
Definition: brick-mpi.h:525
~BrickDecomp()
Definition: brick-mpi.h:515
std::vector< long > skin_size
the size of skin
Definition: brick-mpi.h:198
std::vector< unsigned > t_dims
dimension including ghosts in bricks
Definition: brick-mpi.h:203
std::vector< g_region > ghost
ghost regions record
Definition: brick-mpi.h:194
void adj_populate(unsigned i, unsigned *adj)
Definition: brick-mpi.h:289
unsigned numfield
Number of fields that are interleaved.
Definition: brick-mpi.h:207
unsigned sep_pos[3]
seperation points internal-surface-ghost
Definition: brick-mpi.h:196
BrickInfo< dim > getBrickInfo()
Access the associated metadata.
Definition: brick-mpi.h:511
std::vector< unsigned > stride
stride in bricks
Definition: brick-mpi.h:205
void exchange(BrickStorage bStorage, MPI_Win &win)
Exchange using MPI_Win (don't use)
Definition: brick-mpi.h:688
long get_region_size(BitSet region)
Definition: brick-mpi.h:256
std::unordered_map< uint64_t, int > rank_map
Mapping from neighbor to each neighbor's rank.
Definition: brick-mpi.h:296
void _adj_populate(long ref, unsigned d, unsigned idx, unsigned *adj)
Definition: brick-mpi.h:266
Helper data structure for memory file.
Definition: __init__.py:1
Set using bitfield.
Definition: bitset.h:18
uint64_t set
The bitfield of this set.
Definition: bitset.h:33
BitSet & flip(long pos)
Flipping an element.
Definition: bitset.h:64
bool get(long pos) const
Return whether a number is in the set.
Definition: bitset.h:75
Record start and end of each region.
Definition: brick-mpi.h:185
unsigned skin_st
starting elements in the skin list
Definition: brick-mpi.h:187
unsigned first_pad
Definition: brick-mpi.h:191
unsigned len
ending at which brick (not included)
Definition: brick-mpi.h:190
unsigned last_pad
Definition: brick-mpi.h:192
unsigned pos
starting from which brick
Definition: brick-mpi.h:189
unsigned skin_ed
ending index in the skin list (not included)
Definition: brick-mpi.h:188
BitSet neighbor
The set for the neighbor.
Definition: brick-mpi.h:186
Metadata related to bricks.
Definition: brick.h:97
adjlist adj
Adjacency list.
Definition: brick.h:101
Initializing and holding the storage of bricks.
Definition: brick.h:53
std::shared_ptr< bElem > dat
Pointer holding brick data.
Definition: brick.h:55
void * mmap_info
MMAP data structure when using mmap as allocator.
Definition: brick.h:65
size_t step
Size of a chunk in number of elements.
Definition: brick.h:63
PUT view of the ghost and surface region using mmap.
Definition: brick-mpi.h:82
std::vector< size_t > first_pad
Definition: brick-mpi.h:85
void exchange()
Exchange all ghost zones.
Definition: brick-mpi.h:96
ExchangeView(MPI_Comm comm, std::vector< size_t > seclen, std::vector< size_t > first_pad, Dest send, Dest recv)
Definition: brick-mpi.h:89
std::vector< size_t > seclen
Definition: brick-mpi.h:84
Dest send
Definition: brick-mpi.h:87
MPI_Comm comm
Definition: brick-mpi.h:83
std::vector< std::pair< int, void * > > Dest
Definition: brick-mpi.h:86
Dest recv
Definition: brick-mpi.h:87
Definition: brick-mpi.h:133
size_t len
Definition: brick-mpi.h:135
int rank
Definition: brick-mpi.h:134
void * buf
Definition: brick-mpi.h:136
SHIFT view of the ghost and surface region using mmap.
Definition: brick-mpi.h:131
MultiStageExchangeView(MPI_Comm comm, std::vector< Stage > send, std::vector< Stage > recv)
Definition: brick-mpi.h:141
MPI_Comm comm
Definition: brick-mpi.h:132
std::vector< Stage > recv
Definition: brick-mpi.h:139
void exchange()
Definition: brick-mpi.h:144
std::vector< Package > Stage
Definition: brick-mpi.h:138
std::vector< Stage > send
Definition: brick-mpi.h:139
Generic base template for Calculate the product of n numbers in a template.
Definition: brick.h:143
unsigned operator[](int i)
Definition: brick-mpi.h:59
grid_access(T *self, unsigned ref)
Definition: brick-mpi.h:57
T * self
Definition: brick-mpi.h:54
unsigned ref
Definition: brick-mpi.h:55
Generic base template for Accessing grid indices using [].
Definition: brick-mpi.h:65
grid_access< T, dim, d - 1 > operator[](int i)
Definition: brick-mpi.h:71
unsigned ref
Definition: brick-mpi.h:67
T * self
Definition: brick-mpi.h:66
grid_access(T *self, unsigned ref)
Definition: brick-mpi.h:69
Statistics collection for MPI programs.
Definition: brick-mpi.h:758
double sigma
Definition: brick-mpi.h:759
double avg
Definition: brick-mpi.h:759
double min
Definition: brick-mpi.h:759
double max
Definition: brick-mpi.h:759
#define bElem
Basic datatype for all brick elements.
Definition: vecscatter.h:13