1 c---------------------------------------------------------------------
2 c---------------------------------------------------------------------
4 subroutine buts( ldmx, ldmy, ldmz,
9 > ist, iend, jst, jend,
10 > nx0, ny0, ipt, jpt )
12 c---------------------------------------------------------------------
13 c---------------------------------------------------------------------
15 c---------------------------------------------------------------------
17 c compute the regular-sparse, block upper triangular solution:
21 c---------------------------------------------------------------------
25 c---------------------------------------------------------------------
27 c---------------------------------------------------------------------
28 integer ldmx, ldmy, ldmz
31 double precision omega
32 double precision v( 5, -1:ldmx+2, -1:ldmy+2, *),
34 > d( 5, 5, ldmx, ldmy),
35 > udx( 5, 5, ldmx, ldmy),
36 > udy( 5, 5, ldmx, ldmy),
37 > udz( 5, 5, ldmx, ldmy )
43 c---------------------------------------------------------------------
45 c---------------------------------------------------------------------
46 integer i, j, m, l, istp, iendp
48 double precision tmp, tmp1
49 double precision tmat(5,5)
52 c---------------------------------------------------------------------
53 c receive data from south and east
54 c---------------------------------------------------------------------
56 call exchange_1( v,k,iex )
62 > omega * ( udz( m, 1, i, j ) * v( 1, i, j, k+1 )
63 > + udz( m, 2, i, j ) * v( 2, i, j, k+1 )
64 > + udz( m, 3, i, j ) * v( 3, i, j, k+1 )
65 > + udz( m, 4, i, j ) * v( 4, i, j, k+1 )
66 > + udz( m, 5, i, j ) * v( 5, i, j, k+1 ) )
72 do l = iend+jend, ist+jst, -1
73 istp = max(l - jend, ist)
74 iendp = min(l - jst, iend)
81 ! manually unroll the loop
83 tv( 1, i, j ) = tv( 1, i, j )
84 > + omega * ( udy( 1, 1, i, j ) * v( 1, i, j+1, k )
85 > + udx( 1, 1, i, j ) * v( 1, i+1, j, k )
86 > + udy( 1, 2, i, j ) * v( 2, i, j+1, k )
87 > + udx( 1, 2, i, j ) * v( 2, i+1, j, k )
88 > + udy( 1, 3, i, j ) * v( 3, i, j+1, k )
89 > + udx( 1, 3, i, j ) * v( 3, i+1, j, k )
90 > + udy( 1, 4, i, j ) * v( 4, i, j+1, k )
91 > + udx( 1, 4, i, j ) * v( 4, i+1, j, k )
92 > + udy( 1, 5, i, j ) * v( 5, i, j+1, k )
93 > + udx( 1, 5, i, j ) * v( 5, i+1, j, k ) )
94 tv( 2, i, j ) = tv( 2, i, j )
95 > + omega * ( udy( 2, 1, i, j ) * v( 1, i, j+1, k )
96 > + udx( 2, 1, i, j ) * v( 1, i+1, j, k )
97 > + udy( 2, 2, i, j ) * v( 2, i, j+1, k )
98 > + udx( 2, 2, i, j ) * v( 2, i+1, j, k )
99 > + udy( 2, 3, i, j ) * v( 3, i, j+1, k )
100 > + udx( 2, 3, i, j ) * v( 3, i+1, j, k )
101 > + udy( 2, 4, i, j ) * v( 4, i, j+1, k )
102 > + udx( 2, 4, i, j ) * v( 4, i+1, j, k )
103 > + udy( 2, 5, i, j ) * v( 5, i, j+1, k )
104 > + udx( 2, 5, i, j ) * v( 5, i+1, j, k ) )
105 tv( 3, i, j ) = tv( 3, i, j )
106 > + omega * ( udy( 3, 1, i, j ) * v( 1, i, j+1, k )
107 > + udx( 3, 1, i, j ) * v( 1, i+1, j, k )
108 > + udy( 3, 2, i, j ) * v( 2, i, j+1, k )
109 > + udx( 3, 2, i, j ) * v( 2, i+1, j, k )
110 > + udy( 3, 3, i, j ) * v( 3, i, j+1, k )
111 > + udx( 3, 3, i, j ) * v( 3, i+1, j, k )
112 > + udy( 3, 4, i, j ) * v( 4, i, j+1, k )
113 > + udx( 3, 4, i, j ) * v( 4, i+1, j, k )
114 > + udy( 3, 5, i, j ) * v( 5, i, j+1, k )
115 > + udx( 3, 5, i, j ) * v( 5, i+1, j, k ) )
116 tv( 4, i, j ) = tv( 4, i, j )
117 > + omega * ( udy( 4, 1, i, j ) * v( 1, i, j+1, k )
118 > + udx( 4, 1, i, j ) * v( 1, i+1, j, k )
119 > + udy( 4, 2, i, j ) * v( 2, i, j+1, k )
120 > + udx( 4, 2, i, j ) * v( 2, i+1, j, k )
121 > + udy( 4, 3, i, j ) * v( 3, i, j+1, k )
122 > + udx( 4, 3, i, j ) * v( 3, i+1, j, k )
123 > + udy( 4, 4, i, j ) * v( 4, i, j+1, k )
124 > + udx( 4, 4, i, j ) * v( 4, i+1, j, k )
125 > + udy( 4, 5, i, j ) * v( 5, i, j+1, k )
126 > + udx( 4, 5, i, j ) * v( 5, i+1, j, k ) )
127 tv( 5, i, j ) = tv( 5, i, j )
128 > + omega * ( udy( 5, 1, i, j ) * v( 1, i, j+1, k )
129 > + udx( 5, 1, i, j ) * v( 1, i+1, j, k )
130 > + udy( 5, 2, i, j ) * v( 2, i, j+1, k )
131 > + udx( 5, 2, i, j ) * v( 2, i+1, j, k )
132 > + udy( 5, 3, i, j ) * v( 3, i, j+1, k )
133 > + udx( 5, 3, i, j ) * v( 3, i+1, j, k )
134 > + udy( 5, 4, i, j ) * v( 4, i, j+1, k )
135 > + udx( 5, 4, i, j ) * v( 4, i+1, j, k )
136 > + udy( 5, 5, i, j ) * v( 5, i, j+1, k )
137 > + udx( 5, 5, i, j ) * v( 5, i+1, j, k ) )
140 c---------------------------------------------------------------------
141 c diagonal block inversion
142 c---------------------------------------------------------------------
144 ! manually unroll the loop
146 tmat( 1, 1 ) = d( 1, 1, i, j )
147 tmat( 1, 2 ) = d( 1, 2, i, j )
148 tmat( 1, 3 ) = d( 1, 3, i, j )
149 tmat( 1, 4 ) = d( 1, 4, i, j )
150 tmat( 1, 5 ) = d( 1, 5, i, j )
151 tmat( 2, 1 ) = d( 2, 1, i, j )
152 tmat( 2, 2 ) = d( 2, 2, i, j )
153 tmat( 2, 3 ) = d( 2, 3, i, j )
154 tmat( 2, 4 ) = d( 2, 4, i, j )
155 tmat( 2, 5 ) = d( 2, 5, i, j )
156 tmat( 3, 1 ) = d( 3, 1, i, j )
157 tmat( 3, 2 ) = d( 3, 2, i, j )
158 tmat( 3, 3 ) = d( 3, 3, i, j )
159 tmat( 3, 4 ) = d( 3, 4, i, j )
160 tmat( 3, 5 ) = d( 3, 5, i, j )
161 tmat( 4, 1 ) = d( 4, 1, i, j )
162 tmat( 4, 2 ) = d( 4, 2, i, j )
163 tmat( 4, 3 ) = d( 4, 3, i, j )
164 tmat( 4, 4 ) = d( 4, 4, i, j )
165 tmat( 4, 5 ) = d( 4, 5, i, j )
166 tmat( 5, 1 ) = d( 5, 1, i, j )
167 tmat( 5, 2 ) = d( 5, 2, i, j )
168 tmat( 5, 3 ) = d( 5, 3, i, j )
169 tmat( 5, 4 ) = d( 5, 4, i, j )
170 tmat( 5, 5 ) = d( 5, 5, i, j )
173 tmp1 = 1.0d+00 / tmat( 1, 1 )
174 tmp = tmp1 * tmat( 2, 1 )
175 tmat( 2, 2 ) = tmat( 2, 2 )
176 > - tmp * tmat( 1, 2 )
177 tmat( 2, 3 ) = tmat( 2, 3 )
178 > - tmp * tmat( 1, 3 )
179 tmat( 2, 4 ) = tmat( 2, 4 )
180 > - tmp * tmat( 1, 4 )
181 tmat( 2, 5 ) = tmat( 2, 5 )
182 > - tmp * tmat( 1, 5 )
183 tv( 2, i, j ) = tv( 2, i, j )
184 > - tv( 1, i, j ) * tmp
186 tmp = tmp1 * tmat( 3, 1 )
187 tmat( 3, 2 ) = tmat( 3, 2 )
188 > - tmp * tmat( 1, 2 )
189 tmat( 3, 3 ) = tmat( 3, 3 )
190 > - tmp * tmat( 1, 3 )
191 tmat( 3, 4 ) = tmat( 3, 4 )
192 > - tmp * tmat( 1, 4 )
193 tmat( 3, 5 ) = tmat( 3, 5 )
194 > - tmp * tmat( 1, 5 )
195 tv( 3, i, j ) = tv( 3, i, j )
196 > - tv( 1, i, j ) * tmp
198 tmp = tmp1 * tmat( 4, 1 )
199 tmat( 4, 2 ) = tmat( 4, 2 )
200 > - tmp * tmat( 1, 2 )
201 tmat( 4, 3 ) = tmat( 4, 3 )
202 > - tmp * tmat( 1, 3 )
203 tmat( 4, 4 ) = tmat( 4, 4 )
204 > - tmp * tmat( 1, 4 )
205 tmat( 4, 5 ) = tmat( 4, 5 )
206 > - tmp * tmat( 1, 5 )
207 tv( 4, i, j ) = tv( 4, i, j )
208 > - tv( 1, i, j ) * tmp
210 tmp = tmp1 * tmat( 5, 1 )
211 tmat( 5, 2 ) = tmat( 5, 2 )
212 > - tmp * tmat( 1, 2 )
213 tmat( 5, 3 ) = tmat( 5, 3 )
214 > - tmp * tmat( 1, 3 )
215 tmat( 5, 4 ) = tmat( 5, 4 )
216 > - tmp * tmat( 1, 4 )
217 tmat( 5, 5 ) = tmat( 5, 5 )
218 > - tmp * tmat( 1, 5 )
219 tv( 5, i, j ) = tv( 5, i, j )
220 > - tv( 1, i, j ) * tmp
224 tmp1 = 1.0d+00 / tmat( 2, 2 )
225 tmp = tmp1 * tmat( 3, 2 )
226 tmat( 3, 3 ) = tmat( 3, 3 )
227 > - tmp * tmat( 2, 3 )
228 tmat( 3, 4 ) = tmat( 3, 4 )
229 > - tmp * tmat( 2, 4 )
230 tmat( 3, 5 ) = tmat( 3, 5 )
231 > - tmp * tmat( 2, 5 )
232 tv( 3, i, j ) = tv( 3, i, j )
233 > - tv( 2, i, j ) * tmp
235 tmp = tmp1 * tmat( 4, 2 )
236 tmat( 4, 3 ) = tmat( 4, 3 )
237 > - tmp * tmat( 2, 3 )
238 tmat( 4, 4 ) = tmat( 4, 4 )
239 > - tmp * tmat( 2, 4 )
240 tmat( 4, 5 ) = tmat( 4, 5 )
241 > - tmp * tmat( 2, 5 )
242 tv( 4, i, j ) = tv( 4, i, j )
243 > - tv( 2, i, j ) * tmp
245 tmp = tmp1 * tmat( 5, 2 )
246 tmat( 5, 3 ) = tmat( 5, 3 )
247 > - tmp * tmat( 2, 3 )
248 tmat( 5, 4 ) = tmat( 5, 4 )
249 > - tmp * tmat( 2, 4 )
250 tmat( 5, 5 ) = tmat( 5, 5 )
251 > - tmp * tmat( 2, 5 )
252 tv( 5, i, j ) = tv( 5, i, j )
253 > - tv( 2, i, j ) * tmp
257 tmp1 = 1.0d+00 / tmat( 3, 3 )
258 tmp = tmp1 * tmat( 4, 3 )
259 tmat( 4, 4 ) = tmat( 4, 4 )
260 > - tmp * tmat( 3, 4 )
261 tmat( 4, 5 ) = tmat( 4, 5 )
262 > - tmp * tmat( 3, 5 )
263 tv( 4, i, j ) = tv( 4, i, j )
264 > - tv( 3, i, j ) * tmp
266 tmp = tmp1 * tmat( 5, 3 )
267 tmat( 5, 4 ) = tmat( 5, 4 )
268 > - tmp * tmat( 3, 4 )
269 tmat( 5, 5 ) = tmat( 5, 5 )
270 > - tmp * tmat( 3, 5 )
271 tv( 5, i, j ) = tv( 5, i, j )
272 > - tv( 3, i, j ) * tmp
276 tmp1 = 1.0d+00 / tmat( 4, 4 )
277 tmp = tmp1 * tmat( 5, 4 )
278 tmat( 5, 5 ) = tmat( 5, 5 )
279 > - tmp * tmat( 4, 5 )
280 tv( 5, i, j ) = tv( 5, i, j )
281 > - tv( 4, i, j ) * tmp
283 c---------------------------------------------------------------------
285 c---------------------------------------------------------------------
286 tv( 5, i, j ) = tv( 5, i, j )
289 tv( 4, i, j ) = tv( 4, i, j )
290 > - tmat( 4, 5 ) * tv( 5, i, j )
291 tv( 4, i, j ) = tv( 4, i, j )
294 tv( 3, i, j ) = tv( 3, i, j )
295 > - tmat( 3, 4 ) * tv( 4, i, j )
296 > - tmat( 3, 5 ) * tv( 5, i, j )
297 tv( 3, i, j ) = tv( 3, i, j )
300 tv( 2, i, j ) = tv( 2, i, j )
301 > - tmat( 2, 3 ) * tv( 3, i, j )
302 > - tmat( 2, 4 ) * tv( 4, i, j )
303 > - tmat( 2, 5 ) * tv( 5, i, j )
304 tv( 2, i, j ) = tv( 2, i, j )
307 tv( 1, i, j ) = tv( 1, i, j )
308 > - tmat( 1, 2 ) * tv( 2, i, j )
309 > - tmat( 1, 3 ) * tv( 3, i, j )
310 > - tmat( 1, 4 ) * tv( 4, i, j )
311 > - tmat( 1, 5 ) * tv( 5, i, j )
312 tv( 1, i, j ) = tv( 1, i, j )
315 v( 1, i, j, k ) = v( 1, i, j, k ) - tv( 1, i, j )
316 v( 2, i, j, k ) = v( 2, i, j, k ) - tv( 2, i, j )
317 v( 3, i, j, k ) = v( 3, i, j, k ) - tv( 3, i, j )
318 v( 4, i, j, k ) = v( 4, i, j, k ) - tv( 4, i, j )
319 v( 5, i, j, k ) = v( 5, i, j, k ) - tv( 5, i, j )
325 c---------------------------------------------------------------------
326 c send data to north and west
327 c---------------------------------------------------------------------
329 call exchange_1( v,k,iex )