examples/smpi/gemm/gemm.c

   1 /*==================================================================================================*/
   2 /*# This file is part of the CodeVault project. The project is licensed under Apache Version 2.0.*/
   3 /*# CodeVault is part of the EU-project PRACE-4IP (WP7.3.C).*/
   4 /*#*/
   5 /*# Author(s):*/
   6 /*#   Valeriu Codreanu <valeriu.codreanu@surfsara.nl>*/
   7 /*#*/
   8 /*# ==================================================================================================*/
   9
  10 #include "stdio.h"
  11 #include "mpi.h"
  12
  13 void multiply(float* a, float* b, float* c, int istart, int iend, int size);
  14 void multiply_sampled(float* a, float* b, float* c, int istart, int iend, int size);
  15
  16
  17 void multiply(float* a, float* b, float* c, int istart, int iend, int size)
  18 {
  19     for (int i = istart; i <= iend; ++i) {
  20         for (int j = 0; j < size; ++j) {
  21             for (int k = 0; k < size; ++k) {
  22                 c[i*size+j] += a[i*size+k] * b[k*size+j];
  23             }
  24         }
  25     }
  26 }
  27
  28 void multiply_sampled(float* a, float* b, float* c, int istart, int iend, int size)
  29 {
  30     //for (int i = istart; i <= iend; ++i) {
  31     SMPI_SAMPLE_GLOBAL (int i = istart, i <= iend, ++i, 10, 0.005){
  32         for (int j = 0; j < size; ++j) {
  33             for (int k = 0; k < size; ++k) {
  34                 c[i*size+j] += a[i*size+k] * b[k*size+j];
  35             }
  36         }
  37     }
  38 }
  39
  40 int main(int argc, char* argv[])
  41 {
  42     int rank, nproc;
  43     int istart, iend;
  44     double start, end;
  45
  46     MPI_Init(&argc, &argv);
  47     MPI_Comm_size(MPI_COMM_WORLD, &nproc);
  48     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  49
  50     if(argc<2){
  51       if (rank == 0)
  52         printf("Usage : gemm size \"native/sampling\"\n");
  53       exit(-1);
  54     }
  55
  56     int size=0;
  57     int read = sscanf(argv[1], "%d", &size);
  58     if(read==0){
  59       if (rank == 0)
  60         printf("Invalid argument %s\n", argv[1]);
  61       exit(-1);
  62     }else{
  63       if (rank == 0)
  64         printf("Matrix Size : %dx%d\n",size,size);
  65     }
  66
  67     float *a = (float*)malloc(sizeof(float)*size*size);
  68     float *b = (float*)malloc(sizeof(float)*size*size);
  69     float *c = (float*)malloc(sizeof(float)*size*size);
  70
  71
  72     MPI_Barrier(MPI_COMM_WORLD);
  73     start = MPI_Wtime();
  74
  75     if (rank == 0) {
  76         // Initialize buffers.
  77         for (int i = 0; i < size; ++i) {
  78             for (int j = 0; j < size; ++j) {
  79                 a[i*size+j] = (float)i + j;
  80                 b[i*size+j] = (float)i - j;
  81                 c[i*size+j] = 0.0f;
  82             }
  83         }
  84     }
  85
  86     // Broadcast matrices to all workers.
  87     MPI_Bcast(a, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD);
  88     MPI_Bcast(b, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD);
  89     MPI_Bcast(c, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD);
  90
  91     // Partition work by i-for-loop.
  92     istart = (size / nproc) * rank;
  93     iend = (size / nproc) * (rank + 1) - 1;
  94
  95     // Compute matrix multiplication in [istart,iend]
  96     // of i-for-loop.
  97     // C <- C + A x B
  98     if (strcmp(argv[2], "sampling")){
  99       if (rank == 0)
 100         printf ("Native mode\n");
 101       multiply(a, b, c, istart, iend, size);
 102     }else{
 103       if (rank == 0)
 104         printf ("Sampling mode\n");
 105       multiply_sampled(a, b, c, istart, iend, size);
 106     }
 107
 108     // Gather computed results.
 109     MPI_Gather(c + (size/nproc*rank),
 110                size*size/nproc,
 111                MPI_FLOAT,
 112                c + (size/nproc*rank),
 113                size*size/nproc,
 114                MPI_FLOAT,
 115                0,
 116                MPI_COMM_WORLD);
 117
 118     if (rank == 0) {
 119         // Compute remaining multiplications
 120         // when size % nproc > 0.
 121         if (size % nproc > 0) {
 122             if (strcmp(argv[2], "sampling"))
 123                 multiply(a, b, c, (size/nproc)*nproc, size-1, size);
 124             else
 125                 multiply_sampled(a, b, c, (size/nproc)*nproc, size-1, size);
 126         }
 127     }
 128
 129     MPI_Barrier(MPI_COMM_WORLD);
 130     end = MPI_Wtime();
 131
 132     MPI_Finalize();
 133     free(a);
 134     free(b);
 135     free(c);
 136     if (rank == 0) { /* use time on master node */
 137         //float msec_total = 0.0f;
 138
 139         // Compute and print the performance
 140         float sec_per_matrix_mul = end-start;
 141         double flops_per_matrix_mul = 2.0 * (double)size * (double)size * (double)size;
 142         double giga_flops = (flops_per_matrix_mul * 1.0e-9f) / (sec_per_matrix_mul / 1000.0f);
 143         printf(
 144             "Performance= %.2f GFlop/s, Time= %.3f sec, Size= %.0f Ops\n",
 145             giga_flops,
 146             sec_per_matrix_mul,
 147             flops_per_matrix_mul);
 148     }
 149
 150
 151     return 0;
 152 }