Brick Library 0.1
Performance-portable stencil datalayout & codegen
brick-mpi.h
Go to the documentation of this file.
1
6#ifndef BRICK_BRICK_MPI_H
7#define BRICK_BRICK_MPI_H
8
9#include <vector>
10#include <unordered_map>
11#include <algorithm>
12#include <cassert>
13#include <cstring>
14#include <cmath>
15#include <iostream>
16#include <mpi.h>
17#include <omp.h>
18#include <unistd.h>
19#include "brick.h"
20#include "bitset.h"
21#include "memfd.h"
22
24
32void allneighbors(BitSet cur, long idx, long dim, std::vector<BitSet> &neighbors);
33
49template<typename T, unsigned dim, unsigned d>
50struct grid_access;
51
52template<typename T, unsigned dim>
53struct grid_access<T, dim, 1> {
54 T *self;
55 unsigned ref;
56
57 grid_access(T *self, unsigned ref) : self(self), ref(ref) {}
58
59 inline unsigned operator[](int i) {
60 return self->grid[ref + i];
61 }
62};
63
64template<typename T, unsigned dim, unsigned d>
66 T *self;
67 unsigned ref;
68
69 grid_access(T *self, unsigned ref) : self(self), ref(ref) {}
70
71 inline grid_access<T, dim, d - 1> operator[](int i) {
72 return grid_access<T, dim, d - 1>(self, ref + i * self->stride[d - 1]);
73 }
74};
83 MPI_Comm comm;
84 std::vector<size_t> seclen;
85 std::vector<size_t> first_pad;
86 typedef std::vector<std::pair<int, void *>> Dest;
88
89 ExchangeView(MPI_Comm comm, std::vector<size_t> seclen, std::vector<size_t> first_pad, Dest send, Dest recv) :
90 comm(comm), seclen(std::move(seclen)), first_pad(std::move(first_pad)),
91 send(std::move(send)), recv(std::move(recv)) {}
92
96 void exchange() {
97 std::vector<MPI_Request> requests(seclen.size() * 2);
98 std::vector<MPI_Status> stats(requests.size());
99
100#ifdef BARRIER_TIMESTEP
101 MPI_Barrier(comm);
102#endif
103
104 double st = omp_get_wtime(), ed;
105
106 for (int i = 0; i < seclen.size(); ++i) {
107 // receive to ghost[i]
108 MPI_Irecv((uint8_t *) recv[i].second + first_pad[i], seclen[i], MPI_CHAR, recv[i].first, i, comm,
109 &(requests[i << 1]));
110 // send from skin[i]
111 MPI_Isend((uint8_t *) send[i].second + first_pad[i], seclen[i], MPI_CHAR, send[i].first, i, comm,
112 &(requests[(i << 1) + 1]));
113 }
114
115 ed = omp_get_wtime();
116 calltime += ed - st;
117 st = ed;
118
119 MPI_Waitall(static_cast<int>(requests.size()), requests.data(), stats.data());
120
121 ed = omp_get_wtime();
122 waittime += ed - st;
123 }
124};
125
132 MPI_Comm comm;
133 typedef struct {
134 int rank;
135 size_t len;
136 void *buf;
137 } Package;
138 typedef std::vector<Package> Stage;
139 std::vector<Stage> send, recv;
140
141 MultiStageExchangeView(MPI_Comm comm, std::vector<Stage> send, std::vector<Stage> recv) :
142 comm(comm), send(std::move(send)), recv(std::move(recv)) {}
143
144 void exchange() {
145
146#ifdef BARRIER_TIMESTEP
147 MPI_Barrier(comm);
148#endif
149
150 double st = omp_get_wtime(), wtime = 0;
151 for (long i = 0; i < send.size(); ++i) {
152 std::vector<MPI_Request> requests(send[i].size() + recv[i].size());
153 for (long j = 0; j < recv[i].size(); ++j)
154 MPI_Irecv(recv[i][j].buf, recv[i][j].len, MPI_CHAR, recv[i][j].rank, j, comm, &(requests[j]));
155 for (long j = 0; j < send[i].size(); ++j)
156 MPI_Isend(send[i][j].buf, send[i][j].len, MPI_CHAR, send[i][j].rank, j, comm, &(requests[recv[i].size() + j]));
157 std::vector<MPI_Status> stats(requests.size());
158 double stime = omp_get_wtime();
159 MPI_Waitall(static_cast<int>(requests.size()), requests.data(), stats.data());
160 wtime += omp_get_wtime() - stime;
161 }
162 calltime += omp_get_wtime() - st - wtime;
163 waittime += wtime;
164 }
165};
166
178template<unsigned dim,
179 unsigned ... BDims>
181public:
185 typedef struct {
187 unsigned skin_st;
188 unsigned skin_ed;
189 unsigned pos;
190 unsigned len;
191 unsigned first_pad;
192 unsigned last_pad;
193 } g_region;
194 std::vector<g_region> ghost;
195 std::vector<g_region> skin;
196 unsigned sep_pos[3];
197 std::vector<BitSet> skinlist;
198 std::vector<long> skin_size;
199private:
200 typedef BrickDecomp<dim, BDims...> mytype;
201
202 std::vector<unsigned> dims;
203 std::vector<unsigned> t_dims;
204 std::vector<unsigned> g_depth;
205 std::vector<unsigned> stride;
206 unsigned *grid;
207 unsigned numfield;
208 BrickInfo<dim> *bInfo;
209
210 template<typename T, unsigned di, unsigned d>
211 friend
212 struct grid_access;
213
214
215 /* Regions can be identified with:
216 * Own/neighbor(ghost zone) + which region
217 */
218 void _populate(BitSet region, long ref, int d, unsigned &pos) {
219 if (d == -1) {
220 grid[ref] = pos++;
221 return;
222 }
223 long sec = 0;
224 long st = 0;
225 if (region.get(d + 1)) {
226 sec = 1;
227 st = dims[d];
228 }
229 if (region.get(-d - 1)) {
230 sec = -1;
231 st = g_depth[d];
232 }
233 if (sec != 0)
234 for (unsigned i = 0; i < g_depth[d]; ++i)
235 _populate(region, ref + (st + i) * stride[d], d - 1, pos);
236 else
237 for (unsigned i = 2 * g_depth[d]; i < dims[d]; ++i)
238 _populate(region, ref + i * stride[d], d - 1, pos);
239 }
240
241 void populate(BitSet owner, BitSet region, unsigned &pos) {
242 // For myself
243 long ref = 0;
244 // For neighbor owner
245 for (long d = 0; d < dim; ++d) {
246 if (owner.get(d + 1))
247 // Shift up
248 ref += dims[d] * stride[d];
249 if (owner.get(-d - 1))
250 // Shift down
251 ref -= dims[d] * stride[d];
252 }
253 _populate(region, ref, dim - 1, pos);
254 }
255
256 long get_region_size(BitSet region) {
257 long ret = 1;
258 for (long d = 1; d <= dim; ++d)
259 if (region.get(d) || region.get(-d))
260 ret *= g_depth[d - 1];
261 else
262 ret *= dims[d - 1] - 2 * g_depth[d - 1];
263 return ret;
264 }
265
266 void _adj_populate(long ref, unsigned d, unsigned idx, unsigned *adj) {
267 long cur = ref;
268 if (cur >= 0) {
269 cur = ref / stride[d];
270 cur = cur % (t_dims[d]);
271 }
272 idx *= 3;
273 if (d == 0) {
274 for (int i = 0; i < 3; ++i) {
275 if (i + cur < 1 || i + cur > t_dims[d] || ref < 0)
276 adj[idx + i] = 0;
277 else
278 adj[idx + i] = grid[ref + i - 1];
279 }
280 } else {
281 for (int i = 0; i < 3; ++i)
282 if (i + cur < 1 || i + cur > t_dims[d] || ref < 0)
283 _adj_populate(-1, d - 1, idx + i, adj);
284 else
285 _adj_populate(ref + (i - 1) * (long) stride[d], d - 1, idx + i, adj);
286 }
287 }
288
289 void adj_populate(unsigned i, unsigned *adj) {
290 _adj_populate(i, dim - 1, 0, adj);
291 }
292
293public:
294 MPI_Comm comm;
295
296 std::unordered_map<uint64_t, int> rank_map;
297
304 BrickDecomp(const std::vector<unsigned> &dims, const unsigned depth, unsigned numfield = 1)
305 : dims(dims), numfield(numfield), bInfo(nullptr), grid(nullptr) {
306 assert(dims.size() == dim);
307 std::vector<unsigned> bdims = {BDims ...};
308 // Arrays needs to be kept contiguous first
309 std::reverse(bdims.begin(), bdims.end());
310
311 for (int i = 0; i < dim; ++i) {
312 assert(depth % bdims[i] == 0);
313 g_depth.emplace_back(depth / bdims[i]);
314 this->dims[i] /= bdims[i];
315 }
316 };
317
322 void initialize(const std::vector<BitSet> &skinlist) {
323 calltime = waittime = 0;
324 this->skinlist = skinlist;
325
326 // All space is dimensions + ghostzone + 1
327 unsigned grid_size = 1;
328 stride.clear();
329 stride.resize(dim);
330 t_dims.clear();
331 t_dims.resize(dim);
332
333 for (int i = 0; i < dim; ++i) {
334 stride[i] = grid_size;
335 t_dims[i] = dims[i] + 2 * g_depth[i];
336 grid_size *= t_dims[i];
337 }
338
339 // Global reference to grid Index
340 grid = new unsigned[grid_size];
341
342 int pagesize = sysconf(_SC_PAGESIZE);
343 int bSize = cal_size<BDims ...>::value * sizeof(bElem) * numfield;
344
345 if (std::max(bSize, pagesize) % std::min(bSize, pagesize) != 0)
346 throw std::runtime_error("brick size must be a factor/multiple of pagesize.");
347
348 int factor = 1;
349 if (bSize < pagesize)
350 factor = pagesize / bSize;
351
352 // This assumes all domains are aligned and allocated with same "skinlist"
353 unsigned pos = factor;
354
355 auto mypop = [&pos, &factor, this](BitSet owner, BitSet region) {
356 populate(owner, region, pos);
357#ifndef DECOMP_PAGEUNALIGN
358 pos = (pos + factor - 1) / factor * factor;
359#endif
360 };
361
362 auto calc_pad = [&factor, this](const BitSet &region) -> long {
363#ifdef DECOMP_PAGEUNALIGN
364 return 0;
365#else
366 return factor - (get_region_size(region) + factor - 1) % factor - 1;
367#endif
368 };
369
370 std::vector<unsigned> st_pos; // The starting position of a segment
371 std::vector<bool> pad_first;
372
373 BitSet last = 0;
374 for (long i = 0; i < skinlist.size(); ++i) {
375 BitSet next = 0;
376 if (i < skinlist.size() - 1)
377 next = skinlist[i + 1];
378 pad_first.push_back(((last & skinlist[i]).size() < (skinlist[i] & next).size()));
379 last = skinlist[i];
380 }
381
382 // Allocating inner region
383 mypop(0, 0);
384
385 st_pos.emplace_back(pos);
386 sep_pos[0] = pos;
387
388 skin_size.clear();
389 // Allocating skinlist
390 for (long i = 0; i < skinlist.size(); ++i) {
391 long ppos = pos;
392 if (pad_first[i])
393 pos += calc_pad(skinlist[i]);
394 if (skinlist[i].set != 0)
395 mypop(0, skinlist[i]);
396 st_pos.emplace_back(pos);
397 skin_size.emplace_back(pos - ppos);
398 }
399 sep_pos[1] = pos;
400
401 /* Allocating ghost
402 * A ghost regions contains all region that are superset of the "inverse"
403 */
404 std::vector<BitSet> neighbors;
405 allneighbors(0, 1, dim, neighbors);
406 ghost.clear();
407 skin.clear();
408 for (auto n: neighbors) {
409 if (n.set == 0)
410 continue;
411 BitSet in = !n;
412 g_region g, i;
413 int last = -1;
414 g.neighbor = n;
415 i.neighbor = in;
416 // Following the order of the skinlist
417 // Record contiguousness
418 for (int l = 0; l < skinlist.size(); ++l)
419 if (in <= skinlist[l]) {
420 if (last < 0) {
421 last = l;
422 i.skin_st = g.skin_st = (unsigned) last;
423 i.first_pad = g.first_pad = pad_first[last] ? calc_pad(skinlist[last]) : 0;
424 g.pos = pos;
425 i.pos = st_pos[last];
426 }
427 if (pad_first[l])
428 pos += calc_pad(skinlist[l]);
429 mypop(n, skinlist[l]);
430 } else if (last >= 0) {
431 last = l;
432 i.last_pad = g.last_pad = pad_first[last - 1] ? 0 : calc_pad(skinlist[last - 1]);
433 g.skin_ed = (unsigned) last;
434 g.len = pos - g.pos;
435 ghost.emplace_back(g);
436 i.len = st_pos[last] - i.pos;
437 i.skin_ed = (unsigned) last;
438 skin.emplace_back(i);
439 last = -1;
440 }
441 if (last >= 0) {
442 last = skinlist.size();
443 i.last_pad = g.last_pad = pad_first[last - 1] ? 0 : calc_pad(skinlist[last - 1]);
444 g.skin_ed = (unsigned) last;
445 g.len = pos - g.pos;
446 ghost.emplace_back(g);
447 i.len = st_pos[skinlist.size()] - i.pos;
448 i.skin_ed = (unsigned) last;
449 skin.emplace_back(i);
450 }
451 }
452 sep_pos[2] = pos;
453
454 // Convert grid to adjlist
455 int size = pos;
456 if (bInfo == nullptr)
457 bInfo = new BrickInfo<dim>(size);
458 for (unsigned i = 0; i < grid_size; ++i)
459 adj_populate(i, bInfo->adj[grid[i]]);
460 }
461
466 void exchange(BrickStorage &bStorage) {
467 std::vector<MPI_Request> requests(ghost.size() * 2);
468 std::vector<MPI_Status> stats(requests.size());
469
470#ifdef BARRIER_TIMESTEP
471 MPI_Barrier(comm);
472#endif
473
474 double st = omp_get_wtime(), ed;
475
476 for (int i = 0; i < ghost.size(); ++i) {
477 // receive to ghost[i]
478 MPI_Irecv(&(bStorage.dat.get()[(ghost[i].pos + ghost[i].first_pad) * bStorage.step]),
479 (ghost[i].len - ghost[i].first_pad - ghost[i].last_pad) * bStorage.step * sizeof(bElem),
480 MPI_CHAR, rank_map[ghost[i].neighbor.set], i, comm, &(requests[i << 1]));
481 // send from skin[i]
482 MPI_Isend(&(bStorage.dat.get()[(skin[i].pos + skin[i].first_pad) * bStorage.step]),
483 (skin[i].len - skin[i].first_pad - skin[i].last_pad) * bStorage.step * sizeof(bElem),
484 MPI_CHAR, rank_map[skin[i].neighbor.set], i, comm, &(requests[(i << 1) + 1]));
485 }
486
487 ed = omp_get_wtime();
488 calltime += ed - st;
489 st = ed;
490
491 MPI_Waitall(static_cast<int>(requests.size()), requests.data(), stats.data());
492
493 ed = omp_get_wtime();
494 waittime += ed - st;
495 }
496
502 grid_access<mytype, dim, dim - 1> operator[](int i) {
503 auto ga = grid_access<mytype, dim, dim>(this, 0);
504 return ga[i];
505 }
506
512 return *bInfo;
513 }
514
516 delete[] grid;
517 delete bInfo;
518 }
519
526 // All Brick Storage are initialized with mmap for exchanging using views
527 assert(bStorage.mmap_info != nullptr);
528
529 // Generate all neighbor bitset
530 std::vector<BitSet> neighbors;
531 allneighbors(0, 1, dim, neighbors);
532
533 // All each section is a pair of exchange with one neighbor
534 std::vector<size_t> seclen;
535 std::vector<size_t> first_pad;
538
539 // memfd that backs the canonical view
540 auto memfd = (MEMFD *) bStorage.mmap_info;
541
542 // Iterating over neighbors
543 for (auto n: neighbors) {
544 // Skip
545 if (n.set == 0)
546 continue;
547 // Receive buffer + buffer length
548 std::vector<size_t> packing;
549 size_t len = 0;
550 long first_pad_v = -1;
551 long last_pad = 0;
552 for (auto g: ghost) {
553 if (g.neighbor.set == n.set) {
554 if (first_pad_v < 0)
555 first_pad_v = g.first_pad * bStorage.step * sizeof(bElem);
556 last_pad = g.last_pad * bStorage.step * sizeof(bElem);
557 packing.push_back(g.pos * bStorage.step * sizeof(bElem));
558 size_t l = g.len * bStorage.step * sizeof(bElem);
559 packing.push_back(l);
560 len += l;
561 }
562 }
563 recv.push_back(std::make_pair(rank_map[n.set], memfd->packed_pointer(packing)));
564 len -= first_pad_v;
565 len -= last_pad;
566 first_pad.emplace_back(first_pad_v);
567 seclen.push_back(len);
568 packing.clear();
569 // Send buffer
570 BitSet in = !n;
571 for (auto s: skin)
572 if (s.neighbor.set == in.set && s.len) {
573 packing.push_back(s.pos * bStorage.step * sizeof(bElem));
574 packing.push_back(s.len * bStorage.step * sizeof(bElem));
575 }
576 send.push_back(std::make_pair(rank_map[in.set], memfd->packed_pointer(packing)));
577 }
578
579 return ExchangeView(comm, seclen, first_pad, send, recv);
580 }
581
588 // All Brick Storage are initialized with mmap for exchanging using views
589 assert(bStorage.mmap_info != nullptr);
590
591 // Stages are exchanges along one axis.
592 std::vector<MultiStageExchangeView::Stage> send, recv;
593
594 // Generate all neighbor bitset
595 // The skin_list is no longer important however it is useful to compose the messages.
596 std::vector<BitSet> neighbors;
597 allneighbors(0, 1, dim, neighbors);
598
599 // memfd that backs the canonical view
600 auto memfd = (MEMFD *) bStorage.mmap_info;
601
602 BitSet exchanged = 0;
603
604 for (int d = 1; d <= dim; ++d) {
605 BitSet toexchange = exchanged;
606 BitSet exchanging = 0;
607 exchanging.flip(d);
608 exchanging.flip(-d);
609 toexchange = toexchange | exchanging;
610 send.emplace_back();
611 recv.emplace_back();
612 // Exchanging in one direction at a time
613 for (auto &n: neighbors)
614 if (n <= exchanging && n.set != 0) { // This neighbor is the one exchanging with
615 MultiStageExchangeView::Package sendpkg, recvpkg;
616 size_t len = 0;
617 std::vector<size_t> rpacking;
618 std::vector<size_t> spacking;
619 // Receiving the stuff from the current neighbor
620 for (auto g: ghost)
621 if (g.neighbor.set == n.set) {
622 rpacking.push_back(g.pos * bStorage.step * sizeof(bElem));
623 size_t l = g.len * bStorage.step * sizeof(bElem);
624 rpacking.push_back(l);
625 len += l;
626 }
627 BitSet Sn = !n;
628 for (auto s: skin)
629 if (s.neighbor.set == Sn.set) {
630 spacking.push_back(s.pos * bStorage.step * sizeof(bElem));
631 spacking.push_back(s.len * bStorage.step * sizeof(bElem));
632 }
633 // Receiving from neighbor's neighbor, assuming neighbor is ordered much like our own.
634 BitSet n2 = n | exchanged;
635 for (auto &g: ghost) {
636 if (g.neighbor <= n2 && !(g.neighbor <= exchanged) && !(g.neighbor.set == n.set)) {
637 // this need to be exchanged
638 rpacking.push_back(g.pos * bStorage.step * sizeof(bElem));
639 size_t l = g.len * bStorage.step * sizeof(bElem);
640 rpacking.push_back(l);
641 len += l;
642 // The corresponding us part also needs to be exchanged
643 // Find the corresponding neighbor
644 BitSet s = g.neighbor ^n;
645 for (int sec = g.skin_st; sec < g.skin_ed; ++sec)
646 // looking for the corresponding skin part
647 for (auto g2: ghost)
648 if (g2.neighbor.set == s.set)
649 if (sec >= g2.skin_st && sec < g2.skin_ed) {
650 auto pos = g2.pos;
651 for (int j = g2.skin_st; j < sec; ++j)
652 pos += skin_size[j];
653 int last = spacking.size();
654 size_t fst = pos * bStorage.step * sizeof(bElem);
655 size_t seclen = skin_size[sec] * bStorage.step * sizeof(bElem);
656 if (spacking[last - 2] + spacking[last - 1] == fst)
657 spacking[last - 1] += seclen;
658 else {
659 spacking.push_back(fst);
660 spacking.push_back(seclen);
661 }
662 }
663 }
664 }
665 recvpkg.len = len;
666 recvpkg.buf = memfd->packed_pointer(rpacking);
667 recvpkg.rank = rank_map[n.set];
668 recv.back().emplace_back(recvpkg);
669 // Sending my stuff
670 size_t ll = 0;
671 for (int i = 0; i < spacking.size(); i += 2)
672 ll += spacking[i + 1];
673 sendpkg.len = len;
674 sendpkg.buf = memfd->packed_pointer(spacking);
675 sendpkg.rank = rank_map[Sn.set];
676 send.back().emplace_back(sendpkg);
677 }
678 exchanged = toexchange;
679 }
680 return MultiStageExchangeView(comm, send, recv);
681 }
682
688 void exchange(BrickStorage bStorage, MPI_Win &win) {
689 double st = omp_get_wtime(), ed;
690
691 MPI_Win_fence(0, win);
692
693 ed = omp_get_wtime();
694 waittime += ed - st;
695 st = ed;
696
697 for (int i = 0; i < ghost.size(); ++i) {
698 size_t len = ghost[i].len * bStorage.step * sizeof(bElem);
699 // receive from remote
700 MPI_Get(&(bStorage.dat.get()[ghost[i].pos * bStorage.step]), len, MPI_CHAR, rank_map[ghost[i].neighbor.set],
701 skin[i].pos * bStorage.step * sizeof(bElem), len, MPI_CHAR, win);
702 }
703
704 ed = omp_get_wtime();
705 calltime += ed - st;
706 st = ed;
707
708 MPI_Win_fence(0, win);
709
710 ed = omp_get_wtime();
711 waittime += ed - st;
712 }
713};
714
730template<unsigned dim, unsigned ...BDims>
731void populate(MPI_Comm &comm, BrickDecomp<dim, BDims...> &bDecomp, BitSet neighbor, int d, int *coo) {
732 if (d > dim) {
733 int rank;
734 MPI_Cart_rank(comm, coo, &rank);
735 bDecomp.rank_map[neighbor.set] = rank;
736 return;
737 }
738
739 int dd = dim - d;
740 int c = coo[dd];
741 neighbor.flip(d);
742 coo[dd] = c - 1;
743 populate(comm, bDecomp, neighbor, d + 1, coo);
744 neighbor.flip(d);
745 // Not picked
746 coo[dd] = c;
747 populate(comm, bDecomp, neighbor, d + 1, coo);
748 // Picked -
749 neighbor.flip(-d);
750 coo[dd] = c + 1;
751 populate(comm, bDecomp, neighbor, d + 1, coo);
752 coo[dd] = c;
753}
754
758typedef struct {
759 double min, max, avg, sigma;
760} mpi_stats;
761
768inline mpi_stats mpi_statistics(double stats, MPI_Comm comm) {
769 mpi_stats ret = {0, 0, 0, 0};
770 if (comm == MPI_COMM_NULL)
771 return ret;
772 int size;
773 MPI_Comm_size(comm, &size);
774 MPI_Reduce(&stats, &ret.avg, 1, MPI_DOUBLE, MPI_SUM, 0, comm);
775 ret.avg = ret.avg;
776 MPI_Reduce(&stats, &ret.max, 1, MPI_DOUBLE, MPI_MAX, 0, comm);
777 MPI_Reduce(&stats, &ret.min, 1, MPI_DOUBLE, MPI_MIN, 0, comm);
778 stats = stats * stats;
779 MPI_Reduce(&stats, &ret.sigma, 1, MPI_DOUBLE, MPI_SUM, 0, comm);
780 ret.sigma -= ret.avg * ret.avg / size;
781 ret.avg /= size;
782 ret.sigma /= std::max(size - 1, 1);
783 ret.sigma = std::sqrt(ret.sigma);
784 return ret;
785}
786
790inline std::ostream &operator<<(std::ostream &os, const mpi_stats &stats) {
791 os << "[" << stats.min << ", " << stats.avg << ", " << stats.max << "]" << " (σ: " << stats.sigma << ")";
792 return os;
793}
794
803extern std::vector<BitSet> skin3d_good;
804extern std::vector<BitSet> skin3d_normal, skin3d_bad;
805
806#endif //BRICK_BRICK_MPI_H
Set using bitfield.
void allneighbors(BitSet cur, long idx, long dim, std::vector< BitSet > &neighbors)
Enumerate all neighbors.
Definition: brick-mpi.cpp:9
void populate(MPI_Comm &comm, BrickDecomp< dim, BDims... > &bDecomp, BitSet neighbor, int d, int *coo)
Populate neighbor-rank map for BrickDecomp using MPI_Comm.
Definition: brick-mpi.h:731
mpi_stats mpi_statistics(double stats, MPI_Comm comm)
Definition: brick-mpi.h:768
double waittime
Definition: brick-mpi.h:23
std::vector< BitSet > skin3d_bad
Definition: brick-mpi.h:804
double calltime
Definition: brick-mpi.h:23
std::vector< BitSet > skin3d_good
Optimized surface ordering for 3D.
Definition: brick-mpi.cpp:25
double movetime
Definition: brick-mpi.h:23
std::vector< BitSet > skin3d_normal
Definition: brick-mpi.cpp:54
double packtime
Definition: brick-mpi.cpp:7
double calctime
Definition: brick-mpi.h:23
Main header for bricks.
Decomposition for MPI communication.
Definition: brick-mpi.h:180
std::vector< g_region > skin
surface regions record
Definition: brick-mpi.h:195
void exchange(BrickStorage &bStorage)
Minimal PUT exchange without mmap.
Definition: brick-mpi.h:466
void initialize(const std::vector< BitSet > &skinlist)
initialize the decomposition using skinlist
Definition: brick-mpi.h:322
BrickDecomp(const std::vector< unsigned > &dims, const unsigned depth, unsigned numfield=1)
MPI decomposition for bricks.
Definition: brick-mpi.h:304
MPI_Comm comm
MPI communicator it is attached to.
Definition: brick-mpi.h:294
MultiStageExchangeView multiStageExchangeView(BrickStorage bStorage)
Create a view for SHIFT exchange (mmap)
Definition: brick-mpi.h:587
grid_access< mytype, dim, dim - 1 > operator[](int i)
Accessing grid indices using []
Definition: brick-mpi.h:502
std::vector< BitSet > skinlist
the order of skin
Definition: brick-mpi.h:197
ExchangeView exchangeView(BrickStorage bStorage)
Create a view for PUT exchange (mmap)
Definition: brick-mpi.h:525
~BrickDecomp()
Definition: brick-mpi.h:515
std::vector< long > skin_size
the size of skin
Definition: brick-mpi.h:198
std::vector< g_region > ghost
ghost regions record
Definition: brick-mpi.h:194
unsigned sep_pos[3]
seperation points internal-surface-ghost
Definition: brick-mpi.h:196
BrickInfo< dim > getBrickInfo()
Access the associated metadata.
Definition: brick-mpi.h:511
void exchange(BrickStorage bStorage, MPI_Win &win)
Exchange using MPI_Win (don't use)
Definition: brick-mpi.h:688
std::unordered_map< uint64_t, int > rank_map
Mapping from neighbor to each neighbor's rank.
Definition: brick-mpi.h:296
Definition: memfd.h:19
Helper data structure for memory file.
i
Definition: 7pt.py:5
tuple j
Definition: dag_opt.py:297
Definition: __init__.py:1
Set using bitfield.
Definition: bitset.h:18
uint64_t set
The bitfield of this set.
Definition: bitset.h:33
BitSet & flip(long pos)
Flipping an element.
Definition: bitset.h:64
bool get(long pos) const
Return whether a number is in the set.
Definition: bitset.h:75
std::ostream & operator<<(std::ostream &os, const BitSet &bs)
Print a bit set.
Definition: bitset.h:139
Record start and end of each region.
Definition: brick-mpi.h:185
unsigned skin_st
starting elements in the skin list
Definition: brick-mpi.h:187
unsigned first_pad
Definition: brick-mpi.h:191
unsigned len
ending at which brick (not included)
Definition: brick-mpi.h:190
unsigned last_pad
Definition: brick-mpi.h:192
unsigned pos
starting from which brick
Definition: brick-mpi.h:189
unsigned skin_ed
ending index in the skin list (not included)
Definition: brick-mpi.h:188
BitSet neighbor
The set for the neighbor.
Definition: brick-mpi.h:186
adjlist adj
Adjacency list.
Definition: brick.h:101
Initializing and holding the storage of bricks.
Definition: brick.h:53
std::shared_ptr< bElem > dat
Pointer holding brick data.
Definition: brick.h:55
void * mmap_info
MMAP data structure when using mmap as allocator.
Definition: brick.h:65
size_t step
Size of a chunk in number of elements.
Definition: brick.h:63
PUT view of the ghost and surface region using mmap.
Definition: brick-mpi.h:82
std::vector< size_t > first_pad
Definition: brick-mpi.h:85
void exchange()
Exchange all ghost zones.
Definition: brick-mpi.h:96
ExchangeView(MPI_Comm comm, std::vector< size_t > seclen, std::vector< size_t > first_pad, Dest send, Dest recv)
Definition: brick-mpi.h:89
std::vector< size_t > seclen
Definition: brick-mpi.h:84
Dest send
Definition: brick-mpi.h:87
MPI_Comm comm
Definition: brick-mpi.h:83
std::vector< std::pair< int, void * > > Dest
Definition: brick-mpi.h:86
Dest recv
Definition: brick-mpi.h:87
Definition: brick-mpi.h:133
size_t len
Definition: brick-mpi.h:135
int rank
Definition: brick-mpi.h:134
void * buf
Definition: brick-mpi.h:136
SHIFT view of the ghost and surface region using mmap.
Definition: brick-mpi.h:131
MultiStageExchangeView(MPI_Comm comm, std::vector< Stage > send, std::vector< Stage > recv)
Definition: brick-mpi.h:141
MPI_Comm comm
Definition: brick-mpi.h:132
std::vector< Stage > recv
Definition: brick-mpi.h:139
void exchange()
Definition: brick-mpi.h:144
std::vector< Package > Stage
Definition: brick-mpi.h:138
std::vector< Stage > send
Definition: brick-mpi.h:139
Generic base template for Calculate the product of n numbers in a template.
Definition: brick.h:143
unsigned operator[](int i)
Definition: brick-mpi.h:59
grid_access(T *self, unsigned ref)
Definition: brick-mpi.h:57
T * self
Definition: brick-mpi.h:54
unsigned ref
Definition: brick-mpi.h:55
Generic base template for Accessing grid indices using [].
Definition: brick-mpi.h:65
grid_access< T, dim, d - 1 > operator[](int i)
Definition: brick-mpi.h:71
unsigned ref
Definition: brick-mpi.h:67
T * self
Definition: brick-mpi.h:66
grid_access(T *self, unsigned ref)
Definition: brick-mpi.h:69
Statistics collection for MPI programs.
Definition: brick-mpi.h:758
double sigma
Definition: brick-mpi.h:759
double avg
Definition: brick-mpi.h:759
double min
Definition: brick-mpi.h:759
double max
Definition: brick-mpi.h:759
#define bElem
Basic datatype for all brick elements.
Definition: vecscatter.h:13