MPI-AMRVAC 3.2
The MPI - Adaptive Mesh Refinement - Versatile Advection Code (development version)
Loading...
Searching...
No Matches
mod_space_filling_curve.t
Go to the documentation of this file.
2 implicit none
3
4contains
5
6 !> build Morton space filling curve for level 1 grid
8 ! use Morton curve to connect level 1 grid blocks
9 use mod_forest
11
12 integer, allocatable :: gsq_sfc(:^D&),seq_sfc(:),seq_ig^D(:)
13 integer :: ig^D, ngsq^D, isq, total_number
14 !integer(kind=8), external :: mortonEncode
15 logical, allocatable :: in_domain(:)
16
17 ! use the smallest square/cube to cover the full domain
18 ngsq^d=2**ceiling(log(real(ng^d(1)))/log(2.0));
19 {^nooned
20 {ngsq^d=max(ngsq^dd) \}
21 }
22 total_number={ngsq^d|*}
23 ! Morton number acquired by block numbers
24 allocate(gsq_sfc(ngsq^d))
25 ! Morton number in sequence
26 allocate(seq_sfc(total_number))
27 ! block numbers in sequence
28 {allocate(seq_ig^d(total_number))\}
29 allocate(in_domain(total_number))
30 in_domain=.true.
31 ! get Morton-order numbers in the square/cube
32 {do ig^db=1,ngsq^db\}
33 gsq_sfc(ig^d)=int(mortonencode(ig^d-1,ndim))+1
34 seq_sfc(gsq_sfc(ig^d))=gsq_sfc(ig^d)
35 {seq_ig^d(gsq_sfc(ig^dd))=ig^d \}
36 {end do\}
37 ! mark blocks that are out of the domain and change Morton number
38 do isq=1,total_number
39 if (seq_ig^d(isq)>ng^d(1)|.or.) then
40 seq_sfc(isq:total_number)=seq_sfc(isq:total_number)-1
41 in_domain(isq)=.false.
42 end if
43 end do
44 ! copy the modified Morton numbers to the blocks in the domain
45 if(.not. allocated(iglevel1_sfc)) allocate(iglevel1_sfc(ng^d(1)))
46 if(.not. allocated(sfc_iglevel1)) allocate(sfc_iglevel1(ndim,nglev1))
47 do isq=1,total_number
48 if(in_domain(isq)) then
49 iglevel1_sfc(seq_ig^d(isq))=seq_sfc(isq)
50 {sfc_iglevel1(^d,seq_sfc(isq))=seq_ig^d(isq) \}
51 end if
52 end do
53
54 deallocate(gsq_sfc,seq_sfc,seq_ig^d,in_domain)
55
56 end subroutine level1_morton_order
57
58 integer(kind=8) function mortonencode(ig^D,ndim)
59 use iso_fortran_env, only : int64
60 implicit none
61 integer(kind=8) :: answer, lg^d
62 integer(kind=4), intent(in) :: ig^d,ndim
63 integer(kind=4) :: i
64
65 ! Create a 64-bit version of ig^D
66 lg^d=ig^d;
67 answer = 0
68
69 do i=0,64/ndim
70 {^ifoned answer=ig1}
71 {^iftwod
72 answer=ior(answer,ior(ishft(iand(lg1,ishft(1_int64,i)),i),&
73 ishft(iand(lg2,ishft(1_int64,i)),i+1)))
74 \}
75 {^ifthreed
76 answer=ior(answer,ior(ishft(iand(lg1,ishft(1_int64,i)),2*i),ior(ishft(&
77 iand(lg2,ishft(1_int64,i)),2*i+1),ishft(iand(lg3,ishft(1_int64,i)),2*i+2))))
78 \}
79 end do
80 mortonencode=answer
81 return
82 end function mortonencode
83
84 !> Construct Morton-order as a global recursive lexicographic ordering.
86 use mod_forest
88 use mod_comm_lib, only: mpistop
89
90 integer :: ig^D, Morton_no, isfc
91
92 morton_no=0
93 nglev1={ng^d(1)*}
94 do isfc=1,nglev1
95 ig^d=sfc_iglevel1(^d,isfc)\
97 end do
98
99 if (morton_no/=nleafs) then
100 call mpistop("error in amr_Morton_order: Morton_no/=nleafs")
101 end if
102
103 contains
104
105 recursive subroutine get_morton_number(tree)
106
107 type(tree_node_ptr) :: tree
108
109 integer :: ic^d
110
111 if (tree%node%leaf) then
112 morton_no=morton_no+1
113 sfc(1,morton_no)=tree%node%igrid
114 sfc(2,morton_no)=tree%node%ipe
115 if (tree%node%active) then
116 sfc(3,morton_no)=1
117 else
118 sfc(3,morton_no)=0
119 end if
120 if(tree%node%ipe==mype) igrid_to_sfc(tree%node%igrid)=morton_no
121 else
122 {do ic^db=1,2\}
123 call get_morton_number(tree%node%child(ic^d))
124 {end do\}
125 end if
126
127 end subroutine get_morton_number
128
129 end subroutine amr_morton_order
130
131 !> Set the Morton range for each processor
133 use mod_forest
135
136 integer :: ipe, blocks_left, procs_left, num_blocks
137
138 if (allocated(sfc_to_igrid)) deallocate(sfc_to_igrid)
139 {#IFDEF EVOLVINGBOUNDARY
140 if (allocated(sfc_phybound)) deallocate(sfc_phybound)
141 }
142
143 blocks_left = nleafs
144
145 do ipe = 0, npe-1
146 if (ipe == 0) then
147 morton_start(ipe) = 1
148 else
149 morton_start(ipe) = morton_stop(ipe-1) + 1
150 end if
151
152 ! Compute how many blocks this cpu should take
153 procs_left = npe - ipe
154 num_blocks = ceiling(blocks_left / dble(procs_left))
155 morton_stop(ipe) = morton_start(ipe) + num_blocks - 1
156 blocks_left = blocks_left - num_blocks
157 end do
158
160 {#IFDEF EVOLVINGBOUNDARY
161 allocate(sfc_phybound(nleafs))
163 }
164
165 end subroutine get_morton_range
166
168 use mod_forest
170
171 ! Cut the sfc based on weighted decision.
172 ! Oliver Porth, 02.02.2012
173
174 !!!Here you choose the weithts:!!
175 integer, parameter :: wa=3, wp=1
176 ! wp : Weight for passive block
177 ! wa : Weight for active block
178 ! wp=0 : balance load (active blocks) exactly and
179 ! don't care about memory imbalance
180 ! wp=wa : balance memory exactly and don't care about load
181 ! Maximum possible memory imbalance is X=wa/wp.
182
183 ! If you run into memory issues, decrease this ratio.
184 ! Scaling should be better if you allow for higher ratio.
185 ! Note also that passive cells still do regridding and boundary swap.
186 ! I have best results with a ratio 2:1, but it is problem dependent.
187 ! It can't do magic though...
188 ! Best to make sure that the sfc is properly aligned with your problem.
189
190 integer :: ipe, Morton_no
191 integer :: Mtot, Mstop, Mcurr
192 ! For debugging:
193 integer :: nactive(0:npe-1),npassive(0:npe-1)
194 !double precision, save :: ptasum=0
195
196 if (allocated(sfc_to_igrid)) deallocate(sfc_to_igrid)
197 {#IFDEF EVOLVINGBOUNDARY
198 if (allocated(sfc_phybound)) deallocate(sfc_phybound)
199 }
200
201 mtot = nleafs_active*wa+(nleafs-nleafs_active)*wp
202 ipe = 0
203 mcurr = 0
204
205 nactive=0
206 npassive=0
207
208 morton_start(0) = 1
209 do morton_no=1,nleafs
210 ! This is where we ideally would like to make the cuts:
211 mstop = (ipe+1)*int(mtot/npe)+min(ipe+1,mod(mtot,npe))
212 ! Build up mass:
213 mcurr = mcurr + (wa*sfc(3,morton_no)+wp*(1-sfc(3,morton_no)))
214
215 if (sfc(3,morton_no)==1) then
216 nactive(ipe) = nactive(ipe) +1
217 else
218 npassive(ipe) = npassive(ipe) +1
219 end if
220
221 if (mcurr >= mstop) then
222 morton_stop(ipe) = morton_no
223 ipe = ipe +1
224 if (ipe>=npe) exit
225 morton_start(ipe) = morton_no + 1
226 end if
227 end do
228
229 xmemory=dble(maxval(npassive+nactive))/&
230 dble(minval(npassive+nactive))
231 xload=dble(maxval(nactive))/&
232 dble(minval(nactive))
233
234 !ptasum = ptasum +dble(nleafs-nleafs_active)/dble(nleafs_active)
235
236 !if (mype == 0) print*, 'nleafs_passive:',nleafs-nleafs_active, 'nleafs_active:',nleafs_active,'ratio:',dble(nleafs-nleafs_active)/dble(nleafs_active),'mean ratio:',ptasum/it
237
238 if (morton_stop(mype)>=morton_start(mype)) then
240 {#IFDEF EVOLVINGBOUNDARY
241 allocate(sfc_phybound(nleafs))
243 }
244 end if
245
246 end subroutine get_morton_range_active
247
248 !> Cost-weighted SFC partition. Reduces the per-rank block_cost array
249 !> into a global Morton-indexed cost, EWMA-blends with the persistent
250 !> costlist, and partitions the Morton order so each rank's cumulative
251 !> cost is balanced. Falls back to equal-block-count if all costs are
252 !> zero (cold start before the first measured step).
254 use mod_forest
256
257 integer :: ipe, Morton_no, igrid, ix
258 double precision :: cost_total, cost_target, cost_cum
259 double precision :: maxcount, mincount, maxcost, meancost
260 double precision, allocatable :: cost_local(:)
261 integer :: nblocks_per(0:npe-1)
262
263 if (allocated(sfc_to_igrid)) deallocate(sfc_to_igrid)
264 {#IFDEF EVOLVINGBOUNDARY
265 if (allocated(sfc_phybound)) deallocate(sfc_phybound)
266 }
267
268 ! 1. Each rank fills its measured per-step block cost into the
269 ! Morton-indexed cost_local at its own slots, zeros elsewhere.
270 ! Morton numbering is invariant across load_balance migration, so
271 ! costlist values built up via EWMA below stay meaningful when
272 ! blocks change rank. Refinement events insert/remove Morton
273 ! indices; the EWMA recovers within ~5 cycles for affected entries.
274 allocate(cost_local(nleafs))
275 cost_local = 0.0d0
276 do morton_no = 1, nleafs
277 if (sfc(2,morton_no) == mype) then
278 igrid = sfc(1,morton_no)
279 if (igrid > 0 .and. igrid <= max_blocks) then
280 cost_local(morton_no) = block_cost(igrid)
281 end if
282 end if
283 end do
284
285 ! 2. Reduce so every rank has the global per-Morton cost vector. Since
286 ! each Morton index is owned by exactly one rank in the pre-balance
287 ! state, the SUM is just the value contributed by that one rank.
288 if (npe > 1) then
289 call mpi_allreduce(mpi_in_place, cost_local, nleafs, &
290 mpi_double_precision, mpi_sum, icomm, ierrmpi)
291 end if
292
293 ! 3. EWMA blend the measurement into the persistent global costlist.
294 ! Skip slots with zero measurement (block didn't run advect this
295 ! cycle for some reason — keep the prior estimate). This blend at
296 ! the GLOBAL Morton level (not per-igrid) is what makes the
297 ! partition migration-safe.
298 do morton_no = 1, nleafs
299 if (cost_local(morton_no) > 0.0d0) then
300 costlist(morton_no) = lb_alpha * costlist(morton_no) &
301 + (1.0d0 - lb_alpha) * cost_local(morton_no)
302 end if
303 end do
304 deallocate(cost_local)
305
306 cost_total = sum(costlist(1:nleafs))
307 if (cost_total <= 0.0d0) then
308 ! Fully cold: fall back to equal-block partition.
310 return
311 end if
312
313 ! 4. Greedy cumulative-cost cut along the Morton-sorted leaves. Aim for
314 ! each rank to receive cost_total / npe of cumulative work; cut at
315 ! the leaf where the running sum first exceeds the per-rank target.
316 nblocks_per = 0
317 ipe = 0
318 cost_cum = 0.0d0
319 morton_start(0) = 1
320 do morton_no = 1, nleafs
321 cost_cum = cost_cum + costlist(morton_no)
322 cost_target = (dble(ipe) + 1.0d0) * cost_total / dble(npe)
323 nblocks_per(ipe) = nblocks_per(ipe) + 1
324 if (cost_cum >= cost_target .and. ipe < npe-1) then
325 morton_stop(ipe) = morton_no
326 ipe = ipe + 1
327 morton_start(ipe) = morton_no + 1
328 end if
329 end do
330 morton_stop(npe-1) = nleafs
331
332 ! 5. Diagnostics (also drive the existing log columns Xload/Xmemory).
333 maxcount = dble(maxval(nblocks_per))
334 mincount = dble(max(1,minval(nblocks_per)))
335 xmemory = maxcount / mincount
336 maxcost = 0.0d0
337 do ix = 0, npe-1
338 maxcost = max(maxcost, sum(costlist(morton_start(ix):morton_stop(ix))))
339 end do
340 meancost = cost_total / dble(npe)
341 xload = maxcost / max(meancost, tiny(1.0d0))
342
343 if (morton_stop(mype) >= morton_start(mype)) then
345 {#IFDEF EVOLVINGBOUNDARY
346 allocate(sfc_phybound(nleafs))
347 sfc_phybound = 0
348 }
349 end if
350
351 end subroutine get_morton_range_costed
352
recursive subroutine get_morton_number(tree)
subroutine, public mpistop(message)
Exit MPI-AMRVAC with an error message.
Module with basic grid data structures.
Definition mod_forest.t:2
integer, dimension(:), allocatable, save sfc_to_igrid
Go from a Morton number to an igrid index (for a single processor)
Definition mod_forest.t:53
integer nleafs_active
Definition mod_forest.t:78
integer, dimension(:), allocatable, save sfc_phybound
Space filling curve used for physical boundary blocks.
Definition mod_forest.t:59
integer, dimension(:), allocatable, save morton_start
First Morton number per processor.
Definition mod_forest.t:62
integer, save nleafs
Number of leaf block.
Definition mod_forest.t:76
integer, dimension(:), allocatable, save igrid_to_sfc
Go from a grid index to Morton number (for a single processor)
Definition mod_forest.t:56
integer nglev1
Definition mod_forest.t:78
integer, dimension(:,:), allocatable, save sfc_iglevel1
Space filling curve for level 1 grid. sfc_iglevel1(^D, MN) gives ig^D (the spatial index of the grid)
Definition mod_forest.t:47
integer, dimension(:), allocatable, save morton_stop
Last Morton number per processor.
Definition mod_forest.t:65
type(tree_node_ptr), dimension(:^d &), allocatable, save tree_root
Pointers to the coarse grid.
Definition mod_forest.t:29
integer, dimension(:,:), allocatable, save sfc
Array to go from a Morton number to an igrid and processor index. Sfc(1:3, MN) contains [igrid,...
Definition mod_forest.t:43
This module contains definitions of global parameters and variables and some generic functions/subrou...
double precision xload
Stores the memory and load imbalance, used in printlog.
double precision, dimension(:), allocatable costlist
Persistent global per-Morton-leaf EWMA cost. Sized to max_blocks*npe (the upper bound on nleafs); onl...
double precision, dimension(:), allocatable block_cost
Per-step per-block (per-rank, indexed by igrid) cost accumulator. Reset at start of each advance call...
integer, parameter ndim
Number of spatial dimensions for grid variables.
integer icomm
The MPI communicator.
integer, dimension(:), allocatable ng
number of grid blocks in domain per dimension, in array over levels
integer mype
The rank of the current MPI task.
integer ierrmpi
A global MPI error return code.
integer npe
The number of MPI tasks.
integer max_blocks
The maximum number of grid blocks in a processor.
double precision lb_alpha
Exponential-moving-average decay for the per-block cost. costlist <- lb_alpha*costlist + (1-lb_alpha)...
subroutine get_morton_range_costed
Cost-weighted SFC partition. Reduces the per-rank block_cost array into a global Morton-indexed cost,...
integer(kind=8) function mortonencode(igd, ndim)
subroutine get_morton_range
Set the Morton range for each processor.
subroutine amr_morton_order
Construct Morton-order as a global recursive lexicographic ordering.
subroutine level1_morton_order
build Morton space filling curve for level 1 grid
Pointer to a tree_node.
Definition mod_forest.t:6