6#ifndef BRICK_BRICK_MPI_H
7#define BRICK_BRICK_MPI_H
10#include <unordered_map>
49template<
typename T,
unsigned dim,
unsigned d>
52template<
typename T,
unsigned dim>
64template<
typename T,
unsigned dim,
unsigned d>
86 typedef std::vector<std::pair<int, void *>>
Dest;
97 std::vector<MPI_Request> requests(
seclen.size() * 2);
98 std::vector<MPI_Status> stats(requests.size());
100#ifdef BARRIER_TIMESTEP
104 double st = omp_get_wtime(), ed;
106 for (
int i = 0; i <
seclen.size(); ++i) {
109 &(requests[i << 1]));
112 &(requests[(i << 1) + 1]));
115 ed = omp_get_wtime();
119 MPI_Waitall(
static_cast<int>(requests.size()), requests.data(), stats.data());
121 ed = omp_get_wtime();
146#ifdef BARRIER_TIMESTEP
150 double st = omp_get_wtime(), wtime = 0;
151 for (
long i = 0; i <
send.size(); ++i) {
152 std::vector<MPI_Request> requests(
send[i].size() +
recv[i].size());
153 for (
long j = 0; j <
recv[i].size(); ++j)
154 MPI_Irecv(
recv[i][j].buf,
recv[i][j].len, MPI_CHAR,
recv[i][j].rank, j,
comm, &(requests[j]));
155 for (
long j = 0; j <
send[i].size(); ++j)
156 MPI_Isend(
send[i][j].buf,
send[i][j].len, MPI_CHAR,
send[i][j].rank, j,
comm, &(requests[
recv[i].size() + j]));
157 std::vector<MPI_Status> stats(requests.size());
158 double stime = omp_get_wtime();
159 MPI_Waitall(
static_cast<int>(requests.size()), requests.data(), stats.data());
160 wtime += omp_get_wtime() - stime;
178template<
unsigned dim,
210 template<
typename T,
unsigned di,
unsigned d>
225 if (region.
get(d + 1)) {
229 if (region.
get(-d - 1)) {
234 for (
unsigned i = 0; i <
g_depth[d]; ++i)
237 for (
unsigned i = 2 *
g_depth[d]; i <
dims[d]; ++i)
245 for (
long d = 0; d < dim; ++d) {
246 if (owner.
get(d + 1))
249 if (owner.
get(-d - 1))
258 for (
long d = 1; d <= dim; ++d)
259 if (region.
get(d) || region.
get(-d))
274 for (
int i = 0; i < 3; ++i) {
275 if (i + cur < 1 || i + cur >
t_dims[d] || ref < 0)
278 adj[idx + i] =
grid[ref + i - 1];
281 for (
int i = 0; i < 3; ++i)
282 if (i + cur < 1 || i + cur >
t_dims[d] || ref < 0)
306 assert(
dims.size() == dim);
307 std::vector<unsigned> bdims = {BDims ...};
309 std::reverse(bdims.begin(), bdims.end());
311 for (
int i = 0; i < dim; ++i) {
312 assert(depth % bdims[i] == 0);
313 g_depth.emplace_back(depth / bdims[i]);
314 this->dims[i] /= bdims[i];
327 unsigned grid_size = 1;
333 for (
int i = 0; i < dim; ++i) {
340 grid =
new unsigned[grid_size];
342 int pagesize = sysconf(_SC_PAGESIZE);
345 if (std::max(bSize, pagesize) % std::min(bSize, pagesize) != 0)
346 throw std::runtime_error(
"brick size must be a factor/multiple of pagesize.");
349 if (bSize < pagesize)
350 factor = pagesize / bSize;
353 unsigned pos = factor;
355 auto mypop = [&pos, &factor,
this](
BitSet owner,
BitSet region) {
357#ifndef DECOMP_PAGEUNALIGN
358 pos = (pos + factor - 1) / factor * factor;
362 auto calc_pad = [&factor,
this](
const BitSet ®ion) ->
long {
363#ifdef DECOMP_PAGEUNALIGN
370 std::vector<unsigned> st_pos;
371 std::vector<bool> pad_first;
374 for (
long i = 0; i <
skinlist.size(); ++i) {
378 pad_first.push_back(((last &
skinlist[i]).size() < (
skinlist[i] & next).size()));
385 st_pos.emplace_back(pos);
390 for (
long i = 0; i <
skinlist.size(); ++i) {
396 st_pos.emplace_back(pos);
404 std::vector<BitSet> neighbors;
408 for (
auto n: neighbors) {
418 for (
int l = 0; l <
skinlist.size(); ++l)
422 i.skin_st = g.
skin_st = (unsigned) last;
425 i.pos = st_pos[last];
430 }
else if (last >= 0) {
432 i.last_pad = g.
last_pad = pad_first[last - 1] ? 0 : calc_pad(
skinlist[last - 1]);
435 ghost.emplace_back(g);
436 i.len = st_pos[last] - i.pos;
437 i.skin_ed = (unsigned) last;
438 skin.emplace_back(i);
443 i.last_pad = g.
last_pad = pad_first[last - 1] ? 0 : calc_pad(
skinlist[last - 1]);
446 ghost.emplace_back(g);
447 i.len = st_pos[
skinlist.size()] - i.pos;
448 i.skin_ed = (unsigned) last;
449 skin.emplace_back(i);
456 if (
bInfo ==
nullptr)
458 for (
unsigned i = 0; i < grid_size; ++i)
467 std::vector<MPI_Request> requests(
ghost.size() * 2);
468 std::vector<MPI_Status> stats(requests.size());
470#ifdef BARRIER_TIMESTEP
474 double st = omp_get_wtime(), ed;
476 for (
int i = 0; i <
ghost.size(); ++i) {
478 MPI_Irecv(&(bStorage.
dat.get()[(
ghost[i].pos +
ghost[i].first_pad) * bStorage.
step]),
482 MPI_Isend(&(bStorage.
dat.get()[(
skin[i].pos +
skin[i].first_pad) * bStorage.
step]),
484 MPI_CHAR,
rank_map[
skin[i].neighbor.set], i,
comm, &(requests[(i << 1) + 1]));
487 ed = omp_get_wtime();
491 MPI_Waitall(
static_cast<int>(requests.size()), requests.data(), stats.data());
493 ed = omp_get_wtime();
530 std::vector<BitSet> neighbors;
534 std::vector<size_t> seclen;
535 std::vector<size_t> first_pad;
543 for (
auto n: neighbors) {
548 std::vector<size_t> packing;
550 long first_pad_v = -1;
552 for (
auto g:
ghost) {
553 if (g.neighbor.set == n.set) {
555 first_pad_v = g.first_pad * bStorage.
step *
sizeof(
bElem);
556 last_pad = g.last_pad * bStorage.
step *
sizeof(
bElem);
557 packing.push_back(g.pos * bStorage.
step *
sizeof(
bElem));
558 size_t l = g.len * bStorage.
step *
sizeof(
bElem);
559 packing.push_back(l);
563 recv.push_back(std::make_pair(
rank_map[n.set], memfd->packed_pointer(packing)));
566 first_pad.emplace_back(first_pad_v);
567 seclen.push_back(len);
572 if (s.neighbor.set == in.
set && s.len) {
573 packing.push_back(s.pos * bStorage.
step *
sizeof(
bElem));
574 packing.push_back(s.len * bStorage.
step *
sizeof(
bElem));
576 send.push_back(std::make_pair(
rank_map[in.
set], memfd->packed_pointer(packing)));
592 std::vector<MultiStageExchangeView::Stage> send, recv;
596 std::vector<BitSet> neighbors;
604 for (
int d = 1; d <= dim; ++d) {
605 BitSet toexchange = exchanged;
609 toexchange = toexchange | exchanging;
613 for (
auto &n: neighbors)
614 if (n <= exchanging && n.
set != 0) {
617 std::vector<size_t> rpacking;
618 std::vector<size_t> spacking;
621 if (g.neighbor.set == n.set) {
622 rpacking.push_back(g.pos * bStorage.
step *
sizeof(
bElem));
624 rpacking.push_back(l);
629 if (s.neighbor.set == Sn.
set) {
630 spacking.push_back(s.pos * bStorage.
step *
sizeof(
bElem));
631 spacking.push_back(s.len * bStorage.
step *
sizeof(
bElem));
634 BitSet n2 = n | exchanged;
635 for (
auto &g:
ghost) {
636 if (g.neighbor <= n2 && !(g.neighbor <= exchanged) && !(g.neighbor.set == n.set)) {
638 rpacking.push_back(g.pos * bStorage.
step *
sizeof(
bElem));
639 size_t l = g.len * bStorage.
step *
sizeof(
bElem);
640 rpacking.push_back(l);
645 for (
int sec = g.skin_st; sec < g.skin_ed; ++sec)
648 if (g2.neighbor.set == s.
set)
649 if (sec >= g2.skin_st && sec < g2.skin_ed) {
651 for (
int j = g2.skin_st; j < sec; ++j)
653 int last = spacking.size();
654 size_t fst = pos * bStorage.
step *
sizeof(
bElem);
656 if (spacking[last - 2] + spacking[last - 1] == fst)
657 spacking[last - 1] += seclen;
659 spacking.push_back(fst);
660 spacking.push_back(seclen);
666 recvpkg.
buf = memfd->packed_pointer(rpacking);
668 recv.back().emplace_back(recvpkg);
671 for (
int i = 0; i < spacking.size(); i += 2)
672 ll += spacking[i + 1];
674 sendpkg.
buf = memfd->packed_pointer(spacking);
676 send.back().emplace_back(sendpkg);
678 exchanged = toexchange;
689 double st = omp_get_wtime(), ed;
691 MPI_Win_fence(0, win);
693 ed = omp_get_wtime();
697 for (
int i = 0; i <
ghost.size(); ++i) {
701 skin[i].pos * bStorage.
step *
sizeof(
bElem), len, MPI_CHAR, win);
704 ed = omp_get_wtime();
708 MPI_Win_fence(0, win);
710 ed = omp_get_wtime();
730template<
unsigned dim,
unsigned ...BDims>
734 MPI_Cart_rank(comm, coo, &rank);
743 populate(comm, bDecomp, neighbor, d + 1, coo);
747 populate(comm, bDecomp, neighbor, d + 1, coo);
751 populate(comm, bDecomp, neighbor, d + 1, coo);
759 double min, max,
avg, sigma;
770 if (comm == MPI_COMM_NULL)
773 MPI_Comm_size(comm, &size);
774 MPI_Reduce(&stats, &ret.
avg, 1, MPI_DOUBLE, MPI_SUM, 0, comm);
776 MPI_Reduce(&stats, &ret.
max, 1, MPI_DOUBLE, MPI_MAX, 0, comm);
777 MPI_Reduce(&stats, &ret.
min, 1, MPI_DOUBLE, MPI_MIN, 0, comm);
778 stats = stats * stats;
779 MPI_Reduce(&stats, &ret.
sigma, 1, MPI_DOUBLE, MPI_SUM, 0, comm);
782 ret.
sigma /= std::max(size - 1, 1);
791 os <<
"[" << stats.
min <<
", " << stats.
avg <<
", " << stats.
max <<
"]" <<
" (σ: " << stats.
sigma <<
")";
void allneighbors(BitSet cur, long idx, long dim, std::vector< BitSet > &neighbors)
Enumerate all neighbors.
Definition: brick-mpi.cpp:9
void populate(MPI_Comm &comm, BrickDecomp< dim, BDims... > &bDecomp, BitSet neighbor, int d, int *coo)
Populate neighbor-rank map for BrickDecomp using MPI_Comm.
Definition: brick-mpi.h:731
mpi_stats mpi_statistics(double stats, MPI_Comm comm)
Definition: brick-mpi.h:768
double waittime
Definition: brick-mpi.h:23
std::vector< BitSet > skin3d_bad
Definition: brick-mpi.h:804
double calltime
Definition: brick-mpi.h:23
std::vector< BitSet > skin3d_good
Optimized surface ordering for 3D.
Definition: brick-mpi.cpp:25
double movetime
Definition: brick-mpi.h:23
std::ostream & operator<<(std::ostream &os, const mpi_stats &stats)
pretty print an mpi_stats object
Definition: brick-mpi.h:790
std::vector< BitSet > skin3d_normal
Definition: brick-mpi.cpp:54
double packtime
Definition: brick-mpi.cpp:7
double calctime
Definition: brick-mpi.h:23
Decomposition for MPI communication.
Definition: brick-mpi.h:180
std::vector< unsigned > g_depth
The depth of ghostzone in bricks.
Definition: brick-mpi.h:204
unsigned * grid
Grid indices.
Definition: brick-mpi.h:206
std::vector< g_region > skin
surface regions record
Definition: brick-mpi.h:195
void exchange(BrickStorage &bStorage)
Minimal PUT exchange without mmap.
Definition: brick-mpi.h:466
void initialize(const std::vector< BitSet > &skinlist)
initialize the decomposition using skinlist
Definition: brick-mpi.h:322
void populate(BitSet owner, BitSet region, unsigned &pos)
Definition: brick-mpi.h:241
BrickInfo< dim > * bInfo
Associated BrickInfo.
Definition: brick-mpi.h:208
BrickDecomp(const std::vector< unsigned > &dims, const unsigned depth, unsigned numfield=1)
MPI decomposition for bricks.
Definition: brick-mpi.h:304
MPI_Comm comm
MPI communicator it is attached to.
Definition: brick-mpi.h:294
MultiStageExchangeView multiStageExchangeView(BrickStorage bStorage)
Create a view for SHIFT exchange (mmap)
Definition: brick-mpi.h:587
grid_access< mytype, dim, dim - 1 > operator[](int i)
Accessing grid indices using []
Definition: brick-mpi.h:502
std::vector< BitSet > skinlist
the order of skin
Definition: brick-mpi.h:197
BrickDecomp< dim, BDims... > mytype
shorthand for type of this instance
Definition: brick-mpi.h:200
std::vector< unsigned > dims
dimension of internal in bricks
Definition: brick-mpi.h:202
void _populate(BitSet region, long ref, int d, unsigned &pos)
Definition: brick-mpi.h:218
ExchangeView exchangeView(BrickStorage bStorage)
Create a view for PUT exchange (mmap)
Definition: brick-mpi.h:525
~BrickDecomp()
Definition: brick-mpi.h:515
std::vector< long > skin_size
the size of skin
Definition: brick-mpi.h:198
std::vector< unsigned > t_dims
dimension including ghosts in bricks
Definition: brick-mpi.h:203
std::vector< g_region > ghost
ghost regions record
Definition: brick-mpi.h:194
void adj_populate(unsigned i, unsigned *adj)
Definition: brick-mpi.h:289
unsigned numfield
Number of fields that are interleaved.
Definition: brick-mpi.h:207
unsigned sep_pos[3]
seperation points internal-surface-ghost
Definition: brick-mpi.h:196
BrickInfo< dim > getBrickInfo()
Access the associated metadata.
Definition: brick-mpi.h:511
std::vector< unsigned > stride
stride in bricks
Definition: brick-mpi.h:205
void exchange(BrickStorage bStorage, MPI_Win &win)
Exchange using MPI_Win (don't use)
Definition: brick-mpi.h:688
long get_region_size(BitSet region)
Definition: brick-mpi.h:256
std::unordered_map< uint64_t, int > rank_map
Mapping from neighbor to each neighbor's rank.
Definition: brick-mpi.h:296
void _adj_populate(long ref, unsigned d, unsigned idx, unsigned *adj)
Definition: brick-mpi.h:266
Helper data structure for memory file.
Definition: __init__.py:1
Set using bitfield.
Definition: bitset.h:18
uint64_t set
The bitfield of this set.
Definition: bitset.h:33
BitSet & flip(long pos)
Flipping an element.
Definition: bitset.h:64
bool get(long pos) const
Return whether a number is in the set.
Definition: bitset.h:75
Record start and end of each region.
Definition: brick-mpi.h:185
unsigned skin_st
starting elements in the skin list
Definition: brick-mpi.h:187
unsigned first_pad
Definition: brick-mpi.h:191
unsigned len
ending at which brick (not included)
Definition: brick-mpi.h:190
unsigned last_pad
Definition: brick-mpi.h:192
unsigned pos
starting from which brick
Definition: brick-mpi.h:189
unsigned skin_ed
ending index in the skin list (not included)
Definition: brick-mpi.h:188
BitSet neighbor
The set for the neighbor.
Definition: brick-mpi.h:186
Metadata related to bricks.
Definition: brick.h:97
adjlist adj
Adjacency list.
Definition: brick.h:101
Initializing and holding the storage of bricks.
Definition: brick.h:53
std::shared_ptr< bElem > dat
Pointer holding brick data.
Definition: brick.h:55
void * mmap_info
MMAP data structure when using mmap as allocator.
Definition: brick.h:65
size_t step
Size of a chunk in number of elements.
Definition: brick.h:63
PUT view of the ghost and surface region using mmap.
Definition: brick-mpi.h:82
std::vector< size_t > first_pad
Definition: brick-mpi.h:85
void exchange()
Exchange all ghost zones.
Definition: brick-mpi.h:96
ExchangeView(MPI_Comm comm, std::vector< size_t > seclen, std::vector< size_t > first_pad, Dest send, Dest recv)
Definition: brick-mpi.h:89
std::vector< size_t > seclen
Definition: brick-mpi.h:84
Dest send
Definition: brick-mpi.h:87
MPI_Comm comm
Definition: brick-mpi.h:83
std::vector< std::pair< int, void * > > Dest
Definition: brick-mpi.h:86
Dest recv
Definition: brick-mpi.h:87
Definition: brick-mpi.h:133
size_t len
Definition: brick-mpi.h:135
int rank
Definition: brick-mpi.h:134
void * buf
Definition: brick-mpi.h:136
SHIFT view of the ghost and surface region using mmap.
Definition: brick-mpi.h:131
MultiStageExchangeView(MPI_Comm comm, std::vector< Stage > send, std::vector< Stage > recv)
Definition: brick-mpi.h:141
MPI_Comm comm
Definition: brick-mpi.h:132
std::vector< Stage > recv
Definition: brick-mpi.h:139
void exchange()
Definition: brick-mpi.h:144
std::vector< Package > Stage
Definition: brick-mpi.h:138
std::vector< Stage > send
Definition: brick-mpi.h:139
Generic base template for Calculate the product of n numbers in a template.
Definition: brick.h:143
unsigned operator[](int i)
Definition: brick-mpi.h:59
grid_access(T *self, unsigned ref)
Definition: brick-mpi.h:57
T * self
Definition: brick-mpi.h:54
unsigned ref
Definition: brick-mpi.h:55
Generic base template for Accessing grid indices using [].
Definition: brick-mpi.h:65
grid_access< T, dim, d - 1 > operator[](int i)
Definition: brick-mpi.h:71
unsigned ref
Definition: brick-mpi.h:67
T * self
Definition: brick-mpi.h:66
grid_access(T *self, unsigned ref)
Definition: brick-mpi.h:69
Statistics collection for MPI programs.
Definition: brick-mpi.h:758
double sigma
Definition: brick-mpi.h:759
double avg
Definition: brick-mpi.h:759
double min
Definition: brick-mpi.h:759
double max
Definition: brick-mpi.h:759
#define bElem
Basic datatype for all brick elements.
Definition: vecscatter.h:13