\tikzstyle{iblock}=[rectangle, draw=black, rounded corners, top color=white, bottom color=black!50, drop shadow, text centered, anchor=north, text width=3cm] \begin{tikzpicture}[->,>=stealth',shorten >=1pt,thick] \node (GPU1mem) [iblock] { Device memory }; \node (GPU2mem) [iblock,right=2.0cm of GPU1mem] { Device memory }; \node (CPU1mem) [iblock,below=2.0cm of GPU1mem] { Host memory }; \node (CPU2mem) [iblock,right=2.0cm of CPU1mem] { Host memory }; \draw[->,loop left] (GPU1mem.west) to node {kernel} (GPU1mem.west); \draw[->,loop right] (GPU2mem.east) to node {kernel} (GPU2mem.east); \draw[->] (GPU1mem.south) to node[auto] {PCIe} (CPU1mem.north); \draw[->] (CPU1mem.east) to node[auto] {MPI} (CPU2mem.west); \draw[->] (CPU2mem.north) to node[auto] {PCIe} (GPU2mem.south); \end{tikzpicture}