\tikzstyle{iblock}=[rectangle, draw=black, rounded corners, top color=white, bottom color=black!50, drop shadow, text centered, anchor=north, text width=3cm] \newcommand*{\gridscale}{0.85\linewidth} \resizebox{\gridscale}{!}{ \begin{tikzpicture}[->,>=stealth',shorten >=1pt,thick] \node (GPU1mem) [iblock] { Device memory }; \node (GPU2mem) [iblock,right=2.0cm of GPU1mem] { Device memory }; \node (CPU1mem) [iblock,below=2.0cm of GPU1mem] { Host memory }; \node (CPU2mem) [iblock,right=2.0cm of CPU1mem] { Host memory }; \draw[->,loop left] (GPU1mem.west) to node {kernel} (GPU1mem.west); \draw[->,loop right] (GPU2mem.east) to node {kernel} (GPU2mem.east); \draw[->] (GPU1mem.south) to node[left] {PCIe} (CPU1mem.north); \draw[->] (CPU1mem.east) to node[auto] {Network} (CPU2mem.west); \draw[->] (CPU2mem.north) to node[right] {PCIe} (GPU2mem.south); \draw[->] (GPU1mem.south) edge[out=-90,in=-90,->] node[auto] {GPUDirect} (GPU2mem.south); \end{tikzpicture} }