1 /*************************************************************************
3 * N A S P A R A L L E L B E N C H M A R K S 3.3 *
7 *************************************************************************
9 * This benchmark is part of the NAS Parallel Benchmark 3.3 suite. *
11 * Permission to use, copy, distribute and modify this software *
12 * for any purpose with or without fee is hereby granted. We *
13 * request, however, that all derived work reference the NAS *
14 * Parallel Benchmarks 3.3. This software is provided "as is" *
15 * without express or implied warranty. *
17 * Information on NPB 3.3, including the technical report, the *
18 * original specifications, source code, results and information *
19 * on how to submit new results, is available at: *
21 * http: www.nas.nasa.gov/Software/NPB *
23 * Send comments or suggestions to npb@nas.nasa.gov *
24 * Send bug reports to npb-bugs@nas.nasa.gov *
26 * NAS Parallel Benchmarks Group *
27 * NASA Ames Research Center *
29 * Moffett Field, CA 94035-1000 *
31 * E-mail: npb@nas.nasa.gov *
32 * Fax: (650) 604-3957 *
34 *************************************************************************
36 * Author: M. Frumkin * *
38 *************************************************************************/
45 #include "nas_common.h"
46 #include "simgrid/instr.h" //TRACE_
49 int timer_on=0,timers_tot=64;
50 double start[64], elapsed[64];
58 static int verify(char *bmname,double rnm2){
59 double verify_value=0.0;
60 double epsilon=1.0E-8;
64 if(strstr(bmname,"BH")){
65 verify_value=30892725.0;
66 }else if(strstr(bmname,"WH")){
67 verify_value=67349758.0;
68 }else if(strstr(bmname,"SH")){
69 verify_value=58875767.0;
71 fprintf(stderr,"No such benchmark as %s.\n",bmname);
74 }else if(class=='W') {
75 if(strstr(bmname,"BH")){
76 verify_value = 4102461.0;
77 }else if(strstr(bmname,"WH")){
78 verify_value = 204280762.0;
79 }else if(strstr(bmname,"SH")){
80 verify_value = 186944764.0;
82 fprintf(stderr,"No such benchmark as %s.\n",bmname);
85 }else if(class=='A') {
86 if(strstr(bmname,"BH")){
87 verify_value = 17809491.0;
88 }else if(strstr(bmname,"WH")){
89 verify_value = 1289925229.0;
90 }else if(strstr(bmname,"SH")){
91 verify_value = 610856482.0;
93 fprintf(stderr,"No such benchmark as %s.\n",bmname);
96 }else if(class=='B') {
97 if(strstr(bmname,"BH")){
98 verify_value = 4317114.0;
99 }else if(strstr(bmname,"WH")){
100 verify_value = 7877279917.0;
101 }else if(strstr(bmname,"SH")){
102 verify_value = 1836863082.0;
104 fprintf(stderr,"No such benchmark as %s.\n",bmname);
107 }else if(class=='C') {
108 if(strstr(bmname,"BH")){
110 }else if(strstr(bmname,"WH")){
112 }else if(strstr(bmname,"SH")){
115 fprintf(stderr,"No such benchmark as %s.\n",bmname);
118 }else if(class=='D') {
119 if(strstr(bmname,"BH")){
121 }else if(strstr(bmname,"WH")){
123 }else if(strstr(bmname,"SH")){
126 fprintf(stderr,"No such benchmark as %s.\n",bmname);
130 fprintf(stderr,"No such class as %c.\n",class);
132 fprintf(stderr," %s L2 Norm = %f\n",bmname,rnm2);
134 fprintf(stderr," No verification was performed.\n");
135 }else if( rnm2 - verify_value < epsilon &&
136 rnm2 - verify_value > -epsilon) { /* abs here does not work on ALTIX */
138 fprintf(stderr," Deviation = %f\n",(rnm2 - verify_value));
141 fprintf(stderr," The correct verification value = %f\n",verify_value);
142 fprintf(stderr," Got value = %f\n",rnm2);
150 static int ipowMod(int a,long long int n,int md){
153 fprintf(stderr,"ipowMod: exponent must be nonnegative exp=%lld\n",n);
158 fprintf(stderr,"ipowMod: module must be positive mod=%d",md);
179 static DGraph *buildSH(const char cls){
181 Nodes of the graph must be topologically sorted
182 to avoid MPI deadlock.
185 int numSources=num_sources; /* must be power of 2 */
186 int numOfLayers=0,tmpS=numSources>>1;
187 int firstLayerNode=0;
190 int mask=0x0,ndid=0,ndoff=0;
194 sprintf(nm,"DT_SH.%c",cls);
201 for(i=0;i<numSources;i++){
202 sprintf(nm,"Source.%d",i);
206 for(j=0;j<numOfLayers;j++){
208 for(i=0;i<numSources;i++){
209 sprintf(nm,"Comparator.%d",(i+j*firstLayerNode));
213 ndid=firstLayerNode+ndoff;
214 ar=newArc(dg->node[ndid],nd);
217 ndid=firstLayerNode+ndoff;
218 ar=newArc(dg->node[ndid],nd);
221 firstLayerNode+=numSources;
223 mask=0x00000001<<numOfLayers;
224 for(i=0;i<numSources;i++){
225 sprintf(nm,"Sink.%d",i);
229 ndid=firstLayerNode+ndoff;
230 ar=newArc(dg->node[ndid],nd);
233 ndid=firstLayerNode+ndoff;
234 ar=newArc(dg->node[ndid],nd);
239 static DGraph *buildWH(const char cls){
240 /* Nodes of the graph must be topologically sorted to avoid MPI deadlock. */
242 int numSources=num_sources,maxInDeg=4;
243 int numLayerNodes=numSources,firstLayerNode=0;
244 int totComparators=0;
245 int numPrevLayerNodes=numLayerNodes;
248 DGNode *nd=NULL,*source=NULL,*tmp=NULL,*snd=NULL;
252 sprintf(nm,"DT_WH.%c",cls);
255 for(i=0;i<numSources;i++){
256 sprintf(nm,"Sink.%d",i);
261 numPrevLayerNodes=numLayerNodes;
262 while(numLayerNodes>maxInDeg){
263 numLayerNodes=numLayerNodes/maxInDeg;
264 if(numLayerNodes*maxInDeg<numPrevLayerNodes)numLayerNodes++;
265 for(i=0;i<numLayerNodes;i++){
266 sprintf(nm,"Comparator.%d",totComparators);
269 id=AttachNode(dg,nd);
270 for(j=0;j<maxInDeg;j++){
272 if(sid>=numPrevLayerNodes) break;
273 snd=dg->node[firstLayerNode+sid];
274 ar=newArc(dg->node[id],snd);
278 firstLayerNode+=numPrevLayerNodes;
279 numPrevLayerNodes=numLayerNodes;
281 source=newNode((char*)"Source");
282 AttachNode(dg,source);
283 for(i=0;i<numPrevLayerNodes;i++){
284 nd=dg->node[firstLayerNode+i];
285 ar=newArc(source,nd);
289 for(i=0;i<dg->numNodes/2;i++){ /* Topological sorting */
291 dg->node[i]=dg->node[dg->numNodes-1-i];
293 dg->node[dg->numNodes-1-i]=tmp;
294 dg->node[dg->numNodes-1-i]->id=dg->numNodes-1-i;
298 static DGraph *buildBH(const char cls){
299 /* Nodes of the graph must be topologically sorted to avoid MPI deadlock.*/
301 int numSources=num_sources,maxInDeg=4;
302 int numLayerNodes=numSources,firstLayerNode=0;
304 DGNode *nd=NULL, *snd=NULL, *sink=NULL;
306 int totComparators=0;
307 int numPrevLayerNodes=numLayerNodes;
311 sprintf(nm,"DT_BH.%c",cls);
314 for(i=0;i<numSources;i++){
315 sprintf(nm,"Source.%d",i);
319 while(numLayerNodes>maxInDeg){
320 numLayerNodes=numLayerNodes/maxInDeg;
321 if(numLayerNodes*maxInDeg<numPrevLayerNodes)numLayerNodes++;
322 for(i=0;i<numLayerNodes;i++){
323 sprintf(nm,"Comparator.%d",totComparators);
326 id=AttachNode(dg,nd);
327 for(j=0;j<maxInDeg;j++){
329 if(sid>=numPrevLayerNodes) break;
330 snd=dg->node[firstLayerNode+sid];
331 ar=newArc(snd,dg->node[id]);
335 firstLayerNode+=numPrevLayerNodes;
336 numPrevLayerNodes=numLayerNodes;
338 sink=newNode((char*)"Sink");
340 for(i=0;i<numPrevLayerNodes;i++){
341 nd=dg->node[firstLayerNode+i];
353 static Arr *newArr(int len){
354 Arr *arr=(Arr *)malloc(sizeof(Arr)); //Arr *arr=(Arr *)SMPI_SHARED_MALLOC(sizeof(Arr));
356 arr->val=(double *)malloc(len*sizeof(double)); //arr->val=(double *)SMPI_SHARED_MALLOC(len*sizeof(double));
360 static void arrShow(Arr* a){
361 if(!a) fprintf(stderr,"-- NULL array\n");
363 fprintf(stderr,"-- length=%d\n",a->len);
367 static double CheckVal(Arr *feat){
370 for(i=0;i<feat->len;i++){
371 csum+=feat->val[i]*feat->val[i]/feat->len; /* The truncation does not work since result will be 0 for large len */
376 static int GetFNumDPar(int* mean, int* stdev){
382 static int GetFeatureNum(char *mbname,int id){
383 double tran=314159265.0;
385 double denom=randlc(&tran,&A);
387 int mean=num_samples,stdev=128;
389 GetFNumDPar(&mean,&stdev);
390 rtfs=ipowMod((int)(1/denom)*(int)cval,(long long int) (2*id+1),2*stdev);
391 if(rtfs<0) rtfs=-rtfs;
396 static Arr* RandomFeatures(char *bmname,int fdim,int id){
397 int len=GetFeatureNum(bmname,id)*fdim;
398 Arr* feat=newArr(len);
399 int nxg=2,nyg=2,nzg=2,nfg=5;
400 int nx=421,ny=419,nz=1427,nf=3527;
401 long long int expon=(len*(id+1))%3141592;
402 int seedx=ipowMod(nxg,expon,nx),
403 seedy=ipowMod(nyg,expon,ny),
404 seedz=ipowMod(nzg,expon,nz),
405 seedf=ipowMod(nfg,expon,nf);
411 for(i=0;i<len;i+=fdim){
412 seedx=(seedx*nxg)%nx;
413 seedy=(seedy*nyg)%ny;
414 seedz=(seedz*nzg)%nz;
415 seedf=(seedf*nfg)%nf;
417 feat->val[i+1]=seedy;
418 feat->val[i+2]=seedz;
419 feat->val[i+3]=seedf;
423 fprintf(stderr,"** RandomFeatures time in node %d = %f\n",id,timer_read(id+1));
428 static void Resample(Arr *a,int blen){
429 long long int i=0,j=0,jlo=0,jhi=0;
431 double *nval=(double *)malloc(blen*sizeof(double));
432 //double *nval=(double *)SMPI_SHARED_MALLOC(blen*sizeof(double));
433 for(i=0;i<blen;i++) nval[i]=0.0;
434 for(i=1;i<a->len-1;i++){
435 jlo=(int)(0.5*(2*i-1)*(blen/a->len));
436 jhi=(int)(0.5*(2*i+1)*(blen/a->len));
438 avval=a->val[i]/(jhi-jlo+1);
439 for(j=jlo;j<=jhi;j++){
444 nval[blen-1]=a->val[a->len-1];
445 free(a->val); //SMPI_SHARED_FREE(a->val);
451 static Arr* WindowFilter(Arr *a, Arr* b,int w){
453 double rms0=0.0,rms1=0.0,rmsm1=0.0;
454 double weight=((double) (w+1))/(w+2);
461 if(a->len<b->len) Resample(a,b->len);
462 if(a->len>b->len) Resample(b,a->len);
463 for(i=fielddim;i<a->len-fielddim;i+=fielddim){
464 rms0=(a->val[i]-b->val[i])*(a->val[i]-b->val[i])
465 +(a->val[i+1]-b->val[i+1])*(a->val[i+1]-b->val[i+1])
466 +(a->val[i+2]-b->val[i+2])*(a->val[i+2]-b->val[i+2])
467 +(a->val[i+3]-b->val[i+3])*(a->val[i+3]-b->val[i+3]);
469 rms1=(a->val[j]-b->val[j])*(a->val[j]-b->val[j])
470 +(a->val[j+1]-b->val[j+1])*(a->val[j+1]-b->val[j+1])
471 +(a->val[j+2]-b->val[j+2])*(a->val[j+2]-b->val[j+2])
472 +(a->val[j+3]-b->val[j+3])*(a->val[j+3]-b->val[j+3]);
474 rmsm1=(a->val[j]-b->val[j])*(a->val[j]-b->val[j])
475 +(a->val[j+1]-b->val[j+1])*(a->val[j+1]-b->val[j+1])
476 +(a->val[j+2]-b->val[j+2])*(a->val[j+2]-b->val[j+2])
477 +(a->val[j+3]-b->val[j+3])*(a->val[j+3]-b->val[j+3]);
486 a->val[i]=weight*b->val[i];
487 a->val[i+1]=weight*b->val[i+1];
488 a->val[i+2]=weight*b->val[i+2];
489 a->val[i+3]=weight*b->val[i+3];
492 a->val[i]=weight*b->val[j];
493 a->val[i+1]=weight*b->val[j+1];
494 a->val[i+2]=weight*b->val[j+2];
495 a->val[i+3]=weight*b->val[j+3];
496 }else { /*if(k==-1)*/
498 a->val[i]=weight*b->val[j];
499 a->val[i+1]=weight*b->val[j+1];
500 a->val[i+2]=weight*b->val[j+2];
501 a->val[i+3]=weight*b->val[j+3];
506 fprintf(stderr,"** WindowFilter time in node %d = %f\n",(w-1),timer_read(w));
511 static int SendResults(DGraph *dg,DGNode *nd,Arr *feat){
516 TRACE_smpi_set_category ("SendResults");
517 for(i=0;i<nd->outDegree;i++){
519 if(ar->tail!=nd) continue;
522 if(head->address!=nd->address){
523 MPI_Send(&feat->len,1,MPI_INT,head->address,tag,MPI_COMM_WORLD);
524 MPI_Send(feat->val,feat->len,MPI_DOUBLE,head->address,tag,MPI_COMM_WORLD);
527 TRACE_smpi_set_category (NULL);
530 static Arr* CombineStreams(DGraph *dg,DGNode *nd){
531 Arr *resfeat=newArr(num_samples*fielddim);
536 Arr *feat=NULL,*featp=NULL;
538 if(nd->inDegree==0) return NULL;
539 for(i=0;i<nd->inDegree;i++){
541 if(ar->head!=nd) continue;
543 if(tail->address!=nd->address){
546 MPI_Recv(&len,1,MPI_INT,tail->address,tag,MPI_COMM_WORLD,&status);
548 MPI_Recv(feat->val,feat->len,MPI_DOUBLE,tail->address,tag,MPI_COMM_WORLD,&status);
549 resfeat=WindowFilter(resfeat,feat,nd->id);
550 free(feat);//SMPI_SHARED_FREE(feat);
552 featp=(Arr *)tail->feat;
553 feat=newArr(featp->len);
554 memcpy(feat->val,featp->val,featp->len*sizeof(double));
555 resfeat=WindowFilter(resfeat,feat,nd->id);
556 free(feat);//SMPI_SHARED_FREE(feat);
559 for(i=0;i<resfeat->len;i++) resfeat->val[i]=((int)resfeat->val[i])/nd->inDegree;
564 static double Reduce(Arr *a,int w){
570 retv=(int)(w*CheckVal(a));/* The casting needed for node and array dependent verifcation */
573 fprintf(stderr,"** Reduce time in node %d = %f\n",(w-1),timer_read(w));
578 static double ReduceStreams(DGraph *dg,DGNode *nd){
586 TRACE_smpi_set_category ("ReduceStreams");
588 for(i=0;i<nd->inDegree;i++){
590 if(ar->head!=nd) continue;
592 if(tail->address!=nd->address){
596 MPI_Recv(&len,1,MPI_INT,tail->address,tag,MPI_COMM_WORLD,&status);
598 MPI_Recv(feat->val,feat->len,MPI_DOUBLE,tail->address,tag,MPI_COMM_WORLD,&status);
599 csum+=Reduce(feat,(nd->id+1));
600 free(feat);//SMPI_SHARED_FREE(feat);
602 csum+=Reduce(tail->feat,(nd->id+1));
605 if(nd->inDegree>0)csum=(((long long int)csum)/nd->inDegree);
606 retv=(nd->id+1)*csum;
610 static int ProcessNodes(DGraph *dg,int me){
613 int i=0,verified=0,tag;
618 TRACE_smpi_set_category ("ProcessNodes");
620 for(i=0;i<dg->numNodes;i++){
622 if(nd->address!=me) continue;
623 if(strstr(nd->name,"Source")){
624 nd->feat=RandomFeatures(dg->name,fielddim,nd->id);
625 SendResults(dg,nd,nd->feat);
626 }else if(strstr(nd->name,"Sink")){
627 chksum=ReduceStreams(dg,nd);
628 tag=dg->numArcs+nd->id; /* make these to avoid clash with arc tags */
629 MPI_Send(&chksum,1,MPI_DOUBLE,0,tag,MPI_COMM_WORLD);
631 feat=CombineStreams(dg,nd);
632 SendResults(dg,nd,feat);
636 TRACE_smpi_set_category ("ProcessNodes");
638 if(me==0){ /* Report node */
641 for(i=0;i<dg->numNodes;i++){
643 if(!strstr(nd->name,"Sink")) continue;
644 tag=dg->numArcs+nd->id; /* make these to avoid clash with arc tags */
645 MPI_Recv(&rchksum,1,MPI_DOUBLE,nd->address,tag,MPI_COMM_WORLD,&status);
648 verified=verify(dg->name,chksum);
653 int main(int argc,char **argv ){
654 int my_rank,comm_size;
657 int verified=0, featnum=0;
658 double bytes_sent=2.0,tot_time=0.0;
660 MPI_Init( &argc, &argv );
661 MPI_Comm_rank( MPI_COMM_WORLD, &my_rank );
662 MPI_Comm_size( MPI_COMM_WORLD, &comm_size );
664 TRACE_smpi_set_category ("begin");
665 get_info(argc, argv, &nprocs, &class);
666 check_info(DT, nprocs, class);
668 if (class == 'S') { num_samples=1728; deviation=128; num_sources=4; }
669 else if (class == 'W') { num_samples=1728*8; deviation=128*2; num_sources=4*2; }
670 else if (class == 'A') { num_samples=1728*64; deviation=128*4; num_sources=4*4; }
671 else if (class == 'B') { num_samples=1728*512; deviation=128*8; num_sources=4*8; }
672 else if (class == 'C') { num_samples=1728*4096; deviation=128*16; num_sources=4*16; }
673 else if (class == 'D') { num_samples=1728*4096*8; deviation=128*32; num_sources=4*32; }
675 printf("setparams: Internal error: invalid class type %c\n", class);
680 if(argc!=2|| ( strncmp(argv[1],"BH",2)!=0 && strncmp(argv[1],"WH",2)!=0 &&strncmp(argv[1],"SH",2)!=0)){
682 fprintf(stderr,"** Usage: mpirun -np N ../bin/dt.S GraphName\n");
683 fprintf(stderr,"** Where \n - N is integer number of MPI processes\n");
684 fprintf(stderr," - S is the class S, W, or A \n");
685 fprintf(stderr," - GraphName is the communication graph name BH, WH, or SH.\n");
686 fprintf(stderr," - the number of MPI processes N should not be be less than \n");
687 fprintf(stderr," the number of nodes in the graph\n");
692 if(strncmp(argv[1],"BH",2)==0){
694 }else if(strncmp(argv[1],"WH",2)==0){
696 }else if(strncmp(argv[1],"SH",2)==0){
700 if(timer_on&&dg->numNodes+1>timers_tot){
703 fprintf(stderr,"Not enough timers. Node timeing is off. \n");
705 if(dg->numNodes>comm_size){
707 fprintf(stderr,"** The number of MPI processes should not be less than \n");
708 fprintf(stderr,"** the number of nodes in the graph\n");
709 fprintf(stderr,"** Number of MPI processes = %d\n",comm_size);
710 fprintf(stderr,"** Number nodes in the graph = %d\n",dg->numNodes);
715 for(i=0;i<dg->numNodes;i++){
716 dg->node[i]->address=i;
719 printf( "\n\n NAS Parallel Benchmarks 3.3 -- DT Benchmark\n\n" );
724 verified=ProcessNodes(dg,my_rank);
725 TRACE_smpi_set_category ("end");
727 featnum=num_samples*fielddim;
728 bytes_sent=featnum*dg->numArcs;
732 tot_time=timer_read(0);
733 c_print_results( dg->name, class, featnum, 0, 0, dg->numNodes, 0, comm_size, tot_time, bytes_sent/tot_time,
734 "bytes transmitted", verified);