-\tikzstyle{iblock}=[rectangle, draw=black, rounded corners, top color=white, bottom color=black!50, drop shadow, text centered, anchor=north, text width=3cm]\r
-\r
-\begin{tikzpicture}[->,>=stealth',shorten >=1pt,thick]\r
- \r
-\node (GPU1mem) [iblock] { Device memory };\r
-\node (GPU2mem) [iblock,right=2.0cm of GPU1mem] { Device memory };\r
-\node (CPU1mem) [iblock,below=2.0cm of GPU1mem] { Host memory };\r
-\node (CPU2mem) [iblock,right=2.0cm of CPU1mem] { Host memory };\r
-\r
-\draw[->,loop left] (GPU1mem.west) to node {kernel} (GPU1mem.west);\r
-\draw[->,loop right] (GPU2mem.east) to node {kernel} (GPU2mem.east); \r
-\draw[->] (GPU1mem.south) to node[auto] {PCIe} (CPU1mem.north);\r
-\draw[->] (CPU1mem.east) to node[auto] {MPI} (CPU2mem.west);\r
-\draw[->] (CPU2mem.north) to node[auto] {PCIe} (GPU2mem.south);\r
-\r
-\end{tikzpicture}\r
+\tikzstyle{iblock}=[rectangle, draw=black, rounded corners, top color=white, bottom color=black!50, drop shadow, text centered, anchor=north, text width=3cm]
+\newcommand*{\gridscale}{0.85\linewidth}
+\resizebox{\gridscale}{!}{
+
+\begin{tikzpicture}[->,>=stealth',shorten >=1pt,thick]
+
+\node (GPU1mem) [iblock] { Device memory };
+\node (GPU2mem) [iblock,right=2.0cm of GPU1mem] { Device memory };
+\node (CPU1mem) [iblock,below=2.0cm of GPU1mem] { Host memory };
+\node (CPU2mem) [iblock,right=2.0cm of CPU1mem] { Host memory };
+
+\draw[->,loop left] (GPU1mem.west) to node {kernel} (GPU1mem.west);
+\draw[->,loop right] (GPU2mem.east) to node {kernel} (GPU2mem.east);
+\draw[->] (GPU1mem.south) to node[left] {PCIe} (CPU1mem.north);
+\draw[->] (CPU1mem.east) to node[auto] {Network} (CPU2mem.west);
+\draw[->] (CPU2mem.north) to node[right] {PCIe} (GPU2mem.south);
+
+\draw[->] (GPU1mem.south) edge[out=-90,in=-90,->] node[auto] {GPUDirect} (GPU2mem.south);
+
+\end{tikzpicture}
+}