\tikzstyle{iblock}=[rectangle, draw=black, rounded corners, top color=white, bottom color=black!50, drop shadow, text centered, anchor=north, text width=3cm]

\begin{tikzpicture}[->,>=stealth',shorten >=1pt,thick]
        
\node (GPU1mem) [iblock] { Device memory };
\node (GPU2mem) [iblock,right=2.0cm of GPU1mem] { Device memory };
\node (CPU1mem) [iblock,below=2.0cm of GPU1mem] { Host memory };
\node (CPU2mem) [iblock,right=2.0cm of CPU1mem] { Host memory };

\draw[->,loop left]  (GPU1mem.west) to node {kernel} (GPU1mem.west);
\draw[->,loop right] (GPU2mem.east) to node {kernel} (GPU2mem.east);  
\draw[->] (GPU1mem.south) to node[auto] {PCIe} (CPU1mem.north);
\draw[->] (CPU1mem.east)  to node[auto] {MPI}  (CPU2mem.west);
\draw[->] (CPU2mem.north) to node[auto] {PCIe} (GPU2mem.south);

\end{tikzpicture}