1 c---------------------------------------------------------------------
2 c---------------------------------------------------------------------
4 subroutine blts ( ldmx, ldmy, ldmz,
9 > ist, iend, jst, jend,
12 c---------------------------------------------------------------------
13 c---------------------------------------------------------------------
15 c---------------------------------------------------------------------
17 c compute the regular-sparse, block lower triangular solution:
21 c---------------------------------------------------------------------
25 c---------------------------------------------------------------------
27 c---------------------------------------------------------------------
28 integer ldmx, ldmy, ldmz
31 double precision omega
32 double precision v( 5, -1:ldmx+2, -1:ldmy+2, *),
33 > ldz( 5, 5, ldmx, ldmy),
34 > ldy( 5, 5, ldmx, ldmy),
35 > ldx( 5, 5, ldmx, ldmy),
36 > d( 5, 5, ldmx, ldmy)
42 c---------------------------------------------------------------------
44 c---------------------------------------------------------------------
45 integer i, j, m, l, istp, iendp
47 double precision tmp, tmp1
48 double precision tmat(5,5)
51 c---------------------------------------------------------------------
52 c receive data from north and west
53 c---------------------------------------------------------------------
55 call exchange_1( v,k,iex )
62 v( m, i, j, k ) = v( m, i, j, k )
63 > - omega * ( ldz( m, 1, i, j ) * v( 1, i, j, k-1 )
64 > + ldz( m, 2, i, j ) * v( 2, i, j, k-1 )
65 > + ldz( m, 3, i, j ) * v( 3, i, j, k-1 )
66 > + ldz( m, 4, i, j ) * v( 4, i, j, k-1 )
67 > + ldz( m, 5, i, j ) * v( 5, i, j, k-1 ) )
74 do l = ist+jst, iend+jend
75 istp = max(l - jend, ist)
76 iendp = min(l - jst, iend)
83 ! manually unroll the loop
86 v( 1, i, j, k ) = v( 1, i, j, k )
87 > - omega * ( ldy( 1, 1, i, j ) * v( 1, i, j-1, k )
88 > + ldx( 1, 1, i, j ) * v( 1, i-1, j, k )
89 > + ldy( 1, 2, i, j ) * v( 2, i, j-1, k )
90 > + ldx( 1, 2, i, j ) * v( 2, i-1, j, k )
91 > + ldy( 1, 3, i, j ) * v( 3, i, j-1, k )
92 > + ldx( 1, 3, i, j ) * v( 3, i-1, j, k )
93 > + ldy( 1, 4, i, j ) * v( 4, i, j-1, k )
94 > + ldx( 1, 4, i, j ) * v( 4, i-1, j, k )
95 > + ldy( 1, 5, i, j ) * v( 5, i, j-1, k )
96 > + ldx( 1, 5, i, j ) * v( 5, i-1, j, k ) )
97 v( 2, i, j, k ) = v( 2, i, j, k )
98 > - omega * ( ldy( 2, 1, i, j ) * v( 1, i, j-1, k )
99 > + ldx( 2, 1, i, j ) * v( 1, i-1, j, k )
100 > + ldy( 2, 2, i, j ) * v( 2, i, j-1, k )
101 > + ldx( 2, 2, i, j ) * v( 2, i-1, j, k )
102 > + ldy( 2, 3, i, j ) * v( 3, i, j-1, k )
103 > + ldx( 2, 3, i, j ) * v( 3, i-1, j, k )
104 > + ldy( 2, 4, i, j ) * v( 4, i, j-1, k )
105 > + ldx( 2, 4, i, j ) * v( 4, i-1, j, k )
106 > + ldy( 2, 5, i, j ) * v( 5, i, j-1, k )
107 > + ldx( 2, 5, i, j ) * v( 5, i-1, j, k ) )
108 v( 3, i, j, k ) = v( 3, i, j, k )
109 > - omega * ( ldy( 3, 1, i, j ) * v( 1, i, j-1, k )
110 > + ldx( 3, 1, i, j ) * v( 1, i-1, j, k )
111 > + ldy( 3, 2, i, j ) * v( 2, i, j-1, k )
112 > + ldx( 3, 2, i, j ) * v( 2, i-1, j, k )
113 > + ldy( 3, 3, i, j ) * v( 3, i, j-1, k )
114 > + ldx( 3, 3, i, j ) * v( 3, i-1, j, k )
115 > + ldy( 3, 4, i, j ) * v( 4, i, j-1, k )
116 > + ldx( 3, 4, i, j ) * v( 4, i-1, j, k )
117 > + ldy( 3, 5, i, j ) * v( 5, i, j-1, k )
118 > + ldx( 3, 5, i, j ) * v( 5, i-1, j, k ) )
119 v( 4, i, j, k ) = v( 4, i, j, k )
120 > - omega * ( ldy( 4, 1, i, j ) * v( 1, i, j-1, k )
121 > + ldx( 4, 1, i, j ) * v( 1, i-1, j, k )
122 > + ldy( 4, 2, i, j ) * v( 2, i, j-1, k )
123 > + ldx( 4, 2, i, j ) * v( 2, i-1, j, k )
124 > + ldy( 4, 3, i, j ) * v( 3, i, j-1, k )
125 > + ldx( 4, 3, i, j ) * v( 3, i-1, j, k )
126 > + ldy( 4, 4, i, j ) * v( 4, i, j-1, k )
127 > + ldx( 4, 4, i, j ) * v( 4, i-1, j, k )
128 > + ldy( 4, 5, i, j ) * v( 5, i, j-1, k )
129 > + ldx( 4, 5, i, j ) * v( 5, i-1, j, k ) )
130 v( 5, i, j, k ) = v( 5, i, j, k )
131 > - omega * ( ldy( 5, 1, i, j ) * v( 1, i, j-1, k )
132 > + ldx( 5, 1, i, j ) * v( 1, i-1, j, k )
133 > + ldy( 5, 2, i, j ) * v( 2, i, j-1, k )
134 > + ldx( 5, 2, i, j ) * v( 2, i-1, j, k )
135 > + ldy( 5, 3, i, j ) * v( 3, i, j-1, k )
136 > + ldx( 5, 3, i, j ) * v( 3, i-1, j, k )
137 > + ldy( 5, 4, i, j ) * v( 4, i, j-1, k )
138 > + ldx( 5, 4, i, j ) * v( 4, i-1, j, k )
139 > + ldy( 5, 5, i, j ) * v( 5, i, j-1, k )
140 > + ldx( 5, 5, i, j ) * v( 5, i-1, j, k ) )
144 c---------------------------------------------------------------------
145 c diagonal block inversion
147 c forward elimination
148 c---------------------------------------------------------------------
150 ! manually unroll the loop
152 tmat( 1, 1 ) = d( 1, 1, i, j )
153 tmat( 1, 2 ) = d( 1, 2, i, j )
154 tmat( 1, 3 ) = d( 1, 3, i, j )
155 tmat( 1, 4 ) = d( 1, 4, i, j )
156 tmat( 1, 5 ) = d( 1, 5, i, j )
157 tmat( 2, 1 ) = d( 2, 1, i, j )
158 tmat( 2, 2 ) = d( 2, 2, i, j )
159 tmat( 2, 3 ) = d( 2, 3, i, j )
160 tmat( 2, 4 ) = d( 2, 4, i, j )
161 tmat( 2, 5 ) = d( 2, 5, i, j )
162 tmat( 3, 1 ) = d( 3, 1, i, j )
163 tmat( 3, 2 ) = d( 3, 2, i, j )
164 tmat( 3, 3 ) = d( 3, 3, i, j )
165 tmat( 3, 4 ) = d( 3, 4, i, j )
166 tmat( 3, 5 ) = d( 3, 5, i, j )
167 tmat( 4, 1 ) = d( 4, 1, i, j )
168 tmat( 4, 2 ) = d( 4, 2, i, j )
169 tmat( 4, 3 ) = d( 4, 3, i, j )
170 tmat( 4, 4 ) = d( 4, 4, i, j )
171 tmat( 4, 5 ) = d( 4, 5, i, j )
172 tmat( 5, 1 ) = d( 5, 1, i, j )
173 tmat( 5, 2 ) = d( 5, 2, i, j )
174 tmat( 5, 3 ) = d( 5, 3, i, j )
175 tmat( 5, 4 ) = d( 5, 4, i, j )
176 tmat( 5, 5 ) = d( 5, 5, i, j )
179 tmp1 = 1.0d+00 / tmat( 1, 1 )
180 tmp = tmp1 * tmat( 2, 1 )
181 tmat( 2, 2 ) = tmat( 2, 2 )
182 > - tmp * tmat( 1, 2 )
183 tmat( 2, 3 ) = tmat( 2, 3 )
184 > - tmp * tmat( 1, 3 )
185 tmat( 2, 4 ) = tmat( 2, 4 )
186 > - tmp * tmat( 1, 4 )
187 tmat( 2, 5 ) = tmat( 2, 5 )
188 > - tmp * tmat( 1, 5 )
189 v( 2, i, j, k ) = v( 2, i, j, k )
190 > - v( 1, i, j, k ) * tmp
192 tmp = tmp1 * tmat( 3, 1 )
193 tmat( 3, 2 ) = tmat( 3, 2 )
194 > - tmp * tmat( 1, 2 )
195 tmat( 3, 3 ) = tmat( 3, 3 )
196 > - tmp * tmat( 1, 3 )
197 tmat( 3, 4 ) = tmat( 3, 4 )
198 > - tmp * tmat( 1, 4 )
199 tmat( 3, 5 ) = tmat( 3, 5 )
200 > - tmp * tmat( 1, 5 )
201 v( 3, i, j, k ) = v( 3, i, j, k )
202 > - v( 1, i, j, k ) * tmp
204 tmp = tmp1 * tmat( 4, 1 )
205 tmat( 4, 2 ) = tmat( 4, 2 )
206 > - tmp * tmat( 1, 2 )
207 tmat( 4, 3 ) = tmat( 4, 3 )
208 > - tmp * tmat( 1, 3 )
209 tmat( 4, 4 ) = tmat( 4, 4 )
210 > - tmp * tmat( 1, 4 )
211 tmat( 4, 5 ) = tmat( 4, 5 )
212 > - tmp * tmat( 1, 5 )
213 v( 4, i, j, k ) = v( 4, i, j, k )
214 > - v( 1, i, j, k ) * tmp
216 tmp = tmp1 * tmat( 5, 1 )
217 tmat( 5, 2 ) = tmat( 5, 2 )
218 > - tmp * tmat( 1, 2 )
219 tmat( 5, 3 ) = tmat( 5, 3 )
220 > - tmp * tmat( 1, 3 )
221 tmat( 5, 4 ) = tmat( 5, 4 )
222 > - tmp * tmat( 1, 4 )
223 tmat( 5, 5 ) = tmat( 5, 5 )
224 > - tmp * tmat( 1, 5 )
225 v( 5, i, j, k ) = v( 5, i, j, k )
226 > - v( 1, i, j, k ) * tmp
230 tmp1 = 1.0d+00 / tmat( 2, 2 )
231 tmp = tmp1 * tmat( 3, 2 )
232 tmat( 3, 3 ) = tmat( 3, 3 )
233 > - tmp * tmat( 2, 3 )
234 tmat( 3, 4 ) = tmat( 3, 4 )
235 > - tmp * tmat( 2, 4 )
236 tmat( 3, 5 ) = tmat( 3, 5 )
237 > - tmp * tmat( 2, 5 )
238 v( 3, i, j, k ) = v( 3, i, j, k )
239 > - v( 2, i, j, k ) * tmp
241 tmp = tmp1 * tmat( 4, 2 )
242 tmat( 4, 3 ) = tmat( 4, 3 )
243 > - tmp * tmat( 2, 3 )
244 tmat( 4, 4 ) = tmat( 4, 4 )
245 > - tmp * tmat( 2, 4 )
246 tmat( 4, 5 ) = tmat( 4, 5 )
247 > - tmp * tmat( 2, 5 )
248 v( 4, i, j, k ) = v( 4, i, j, k )
249 > - v( 2, i, j, k ) * tmp
251 tmp = tmp1 * tmat( 5, 2 )
252 tmat( 5, 3 ) = tmat( 5, 3 )
253 > - tmp * tmat( 2, 3 )
254 tmat( 5, 4 ) = tmat( 5, 4 )
255 > - tmp * tmat( 2, 4 )
256 tmat( 5, 5 ) = tmat( 5, 5 )
257 > - tmp * tmat( 2, 5 )
258 v( 5, i, j, k ) = v( 5, i, j, k )
259 > - v( 2, i, j, k ) * tmp
263 tmp1 = 1.0d+00 / tmat( 3, 3 )
264 tmp = tmp1 * tmat( 4, 3 )
265 tmat( 4, 4 ) = tmat( 4, 4 )
266 > - tmp * tmat( 3, 4 )
267 tmat( 4, 5 ) = tmat( 4, 5 )
268 > - tmp * tmat( 3, 5 )
269 v( 4, i, j, k ) = v( 4, i, j, k )
270 > - v( 3, i, j, k ) * tmp
272 tmp = tmp1 * tmat( 5, 3 )
273 tmat( 5, 4 ) = tmat( 5, 4 )
274 > - tmp * tmat( 3, 4 )
275 tmat( 5, 5 ) = tmat( 5, 5 )
276 > - tmp * tmat( 3, 5 )
277 v( 5, i, j, k ) = v( 5, i, j, k )
278 > - v( 3, i, j, k ) * tmp
282 tmp1 = 1.0d+00 / tmat( 4, 4 )
283 tmp = tmp1 * tmat( 5, 4 )
284 tmat( 5, 5 ) = tmat( 5, 5 )
285 > - tmp * tmat( 4, 5 )
286 v( 5, i, j, k ) = v( 5, i, j, k )
287 > - v( 4, i, j, k ) * tmp
289 c---------------------------------------------------------------------
291 c---------------------------------------------------------------------
292 v( 5, i, j, k ) = v( 5, i, j, k )
295 v( 4, i, j, k ) = v( 4, i, j, k )
296 > - tmat( 4, 5 ) * v( 5, i, j, k )
297 v( 4, i, j, k ) = v( 4, i, j, k )
300 v( 3, i, j, k ) = v( 3, i, j, k )
301 > - tmat( 3, 4 ) * v( 4, i, j, k )
302 > - tmat( 3, 5 ) * v( 5, i, j, k )
303 v( 3, i, j, k ) = v( 3, i, j, k )
306 v( 2, i, j, k ) = v( 2, i, j, k )
307 > - tmat( 2, 3 ) * v( 3, i, j, k )
308 > - tmat( 2, 4 ) * v( 4, i, j, k )
309 > - tmat( 2, 5 ) * v( 5, i, j, k )
310 v( 2, i, j, k ) = v( 2, i, j, k )
313 v( 1, i, j, k ) = v( 1, i, j, k )
314 > - tmat( 1, 2 ) * v( 2, i, j, k )
315 > - tmat( 1, 3 ) * v( 3, i, j, k )
316 > - tmat( 1, 4 ) * v( 4, i, j, k )
317 > - tmat( 1, 5 ) * v( 5, i, j, k )
318 v( 1, i, j, k ) = v( 1, i, j, k )
325 c---------------------------------------------------------------------
326 c send data to east and south
327 c---------------------------------------------------------------------
329 call exchange_1( v,k,iex )