1 \tikzstyle{iblock}=[rectangle, draw=black, rounded corners, top color=white, bottom color=black!50, drop shadow, text centered, anchor=north, text width=3cm]
2 \newcommand*{\gridscale}{0.85\linewidth}
3 \resizebox{\gridscale}{!}{
5 \begin{tikzpicture}[->,>=stealth',shorten >=1pt,thick]
7 \node (GPU1mem) [iblock] { Device memory };
8 \node (GPU2mem) [iblock,right=2.0cm of GPU1mem] { Device memory };
9 \node (CPU1mem) [iblock,below=2.0cm of GPU1mem] { Host memory };
10 \node (CPU2mem) [iblock,right=2.0cm of CPU1mem] { Host memory };
12 \draw[->,loop left] (GPU1mem.west) to node {kernel} (GPU1mem.west);
13 \draw[->,loop right] (GPU2mem.east) to node {kernel} (GPU2mem.east);
14 \draw[->] (GPU1mem.south) to node[left] {PCIe} (CPU1mem.north);
15 \draw[->] (CPU1mem.east) to node[auto] {Network} (CPU2mem.west);
16 \draw[->] (CPU2mem.north) to node[right] {PCIe} (GPU2mem.south);
18 \draw[->] (GPU1mem.south) edge[out=-90,in=-90,->] node[auto] {GPUDirect} (GPU2mem.south);