[9017] | 1 | <!doctype html> |
---|
| 2 | <html> |
---|
| 3 | <head> |
---|
| 4 | <meta charset="utf-8"> |
---|
| 5 | <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes"> |
---|
| 6 | <style> |
---|
| 7 | h1, |
---|
| 8 | h2, |
---|
| 9 | h3, |
---|
| 10 | h4, |
---|
| 11 | h5, |
---|
| 12 | h6, |
---|
| 13 | p, |
---|
| 14 | blockquote { |
---|
| 15 | margin: 0; |
---|
| 16 | padding: 0; |
---|
| 17 | } |
---|
| 18 | body { |
---|
| 19 | font-family: "Helvetica Neue", Helvetica, "Hiragino Sans GB", Arial, sans-serif; |
---|
| 20 | font-size: 13px; |
---|
| 21 | line-height: 18px; |
---|
| 22 | color: #737373; |
---|
| 23 | background-color: white; |
---|
| 24 | margin: 10px 13px 10px 13px; |
---|
| 25 | } |
---|
| 26 | table { |
---|
| 27 | margin: 10px 0 15px 0; |
---|
| 28 | border-collapse: collapse; |
---|
| 29 | } |
---|
| 30 | td,th { |
---|
| 31 | border: 1px solid #ddd; |
---|
| 32 | padding: 3px 10px; |
---|
| 33 | } |
---|
| 34 | th { |
---|
| 35 | padding: 5px 10px; |
---|
| 36 | } |
---|
| 37 | |
---|
| 38 | a { |
---|
| 39 | color: #0069d6; |
---|
| 40 | } |
---|
| 41 | a:hover { |
---|
| 42 | color: #0050a3; |
---|
| 43 | text-decoration: none; |
---|
| 44 | } |
---|
| 45 | a img { |
---|
| 46 | border: none; |
---|
| 47 | } |
---|
| 48 | p { |
---|
| 49 | margin-bottom: 9px; |
---|
| 50 | } |
---|
| 51 | h1, |
---|
| 52 | h2, |
---|
| 53 | h3, |
---|
| 54 | h4, |
---|
| 55 | h5, |
---|
| 56 | h6 { |
---|
| 57 | color: #404040; |
---|
| 58 | line-height: 36px; |
---|
| 59 | } |
---|
| 60 | h1 { |
---|
| 61 | margin-bottom: 18px; |
---|
| 62 | font-size: 30px; |
---|
| 63 | } |
---|
| 64 | h2 { |
---|
| 65 | font-size: 24px; |
---|
| 66 | } |
---|
| 67 | h3 { |
---|
| 68 | font-size: 18px; |
---|
| 69 | } |
---|
| 70 | h4 { |
---|
| 71 | font-size: 16px; |
---|
| 72 | } |
---|
| 73 | h5 { |
---|
| 74 | font-size: 14px; |
---|
| 75 | } |
---|
| 76 | h6 { |
---|
| 77 | font-size: 13px; |
---|
| 78 | } |
---|
| 79 | hr { |
---|
| 80 | margin: 0 0 19px; |
---|
| 81 | border: 0; |
---|
| 82 | border-bottom: 1px solid #ccc; |
---|
| 83 | } |
---|
| 84 | blockquote { |
---|
| 85 | padding: 13px 13px 21px 15px; |
---|
| 86 | margin-bottom: 18px; |
---|
| 87 | font-family:georgia,serif; |
---|
| 88 | font-style: italic; |
---|
| 89 | } |
---|
| 90 | blockquote:before { |
---|
| 91 | content:"\201C"; |
---|
| 92 | font-size:40px; |
---|
| 93 | margin-left:-10px; |
---|
| 94 | font-family:georgia,serif; |
---|
| 95 | color:#eee; |
---|
| 96 | } |
---|
| 97 | blockquote p { |
---|
| 98 | font-size: 14px; |
---|
| 99 | font-weight: 300; |
---|
| 100 | line-height: 18px; |
---|
| 101 | margin-bottom: 0; |
---|
| 102 | font-style: italic; |
---|
| 103 | } |
---|
| 104 | code, pre { |
---|
| 105 | font-family: Monaco, Andale Mono, Courier New, monospace; |
---|
| 106 | } |
---|
| 107 | code { |
---|
| 108 | background-color: #fee9cc; |
---|
| 109 | color: rgba(0, 0, 0, 0.75); |
---|
| 110 | padding: 1px 3px; |
---|
| 111 | font-size: 12px; |
---|
| 112 | -webkit-border-radius: 3px; |
---|
| 113 | -moz-border-radius: 3px; |
---|
| 114 | border-radius: 3px; |
---|
| 115 | } |
---|
| 116 | pre { |
---|
| 117 | display: block; |
---|
| 118 | padding: 14px; |
---|
| 119 | margin: 0 0 18px; |
---|
| 120 | line-height: 16px; |
---|
| 121 | font-size: 11px; |
---|
| 122 | border: 1px solid #d9d9d9; |
---|
| 123 | white-space: pre-wrap; |
---|
| 124 | word-wrap: break-word; |
---|
| 125 | } |
---|
| 126 | pre code { |
---|
| 127 | background-color: #fff; |
---|
| 128 | color:#737373; |
---|
| 129 | font-size: 11px; |
---|
| 130 | padding: 0; |
---|
| 131 | } |
---|
| 132 | sup { |
---|
| 133 | font-size: 0.83em; |
---|
| 134 | vertical-align: super; |
---|
| 135 | line-height: 0; |
---|
| 136 | } |
---|
| 137 | * { |
---|
| 138 | -webkit-print-color-adjust: exact; |
---|
| 139 | } |
---|
| 140 | @media screen and (min-width: 914px) { |
---|
| 141 | body { |
---|
| 142 | width: 854px; |
---|
| 143 | margin:10px auto; |
---|
| 144 | } |
---|
| 145 | } |
---|
| 146 | @media print { |
---|
| 147 | body,code,pre code,h1,h2,h3,h4,h5,h6 { |
---|
| 148 | color: black; |
---|
| 149 | } |
---|
| 150 | table, pre { |
---|
| 151 | page-break-inside: avoid; |
---|
| 152 | } |
---|
| 153 | } |
---|
| 154 | </style> |
---|
| 155 | <title>This is the GPU based ANUGA |
---|
| 156 | =========================== |
---|
| 157 | |
---|
| 158 | |
---|
| 159 | |
---|
| 160 | Documentation |
---|
| 161 | ------------- |
---|
| 162 | |
---|
| 163 | Documentation is under doc directory, and the html</title> |
---|
| 164 | <script type="text/x-mathjax-config">MathJax.Hub.Config({tex2jax:{inlineMath:[['$$$','$$$']]}});</script><script src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script> |
---|
| 165 | </head> |
---|
| 166 | <body> |
---|
| 167 | <h1>This is the GPU based ANUGA </h1> |
---|
| 168 | |
---|
| 169 | <h2>Documentation</h2> |
---|
| 170 | |
---|
| 171 | <p>Documentation is under doc directory, and the html version generated by <strong>Sphinx</strong> under the directory <em>doc/sphinx/build/html/</em></p> |
---|
| 172 | |
---|
| 173 | <h2>Install Guide</h2> |
---|
| 174 | |
---|
| 175 | <ol> |
---|
| 176 | <li>Original <a href="anuga.anu.edu.au">ANUGA</a> is required</li> |
---|
| 177 | <li>For CUDA version, <a href="documen.tician.de/pycuda">PyCUDA</a> is required</li> |
---|
| 178 | <li>For OpenHMPP version, current implementation is based on the <a href="www.caps-entreprise.com">CAPS OpenHMPP Compiler</a> |
---|
| 179 | |
---|
| 180 | <ul> |
---|
| 181 | <li>When compiling the code with <strong>Makefile</strong>, the NVIDIA device architecture and compute capability need to be specified |
---|
| 182 | |
---|
| 183 | <ul> |
---|
| 184 | <li>For example, GTX480 with 2.0 compute capability </br> |
---|
| 185 | <code>HMPP_FLAGS13 = -e --nvcc-options -Xptxas=-v,-arch=sm_20 -c --force</code></li> |
---|
| 186 | <li>For GTX680 with 3.0 compute capability </br> |
---|
| 187 | <code>HMPP_FLAGS13 = -e --nvcc-options -Xptxas=-v,-arch=sm_30 -c --force</code></li> |
---|
| 188 | </ul> |
---|
| 189 | </li> |
---|
| 190 | <li>Also the path for <strong>python</strong>, <strong>numpy</strong>, and <strong>ANUGA/utilities</strong> packages need to be specified</li> |
---|
| 191 | <li>Defining the macro <strong>USING_MIRROR_DATA</strong> in <strong>hmpp_fun.h</strong> </br> |
---|
| 192 | <code>#define USING_MIRROR_DATA</code> |
---|
| 193 | </br>will enable the advanced version, which uses OpenHMPP Mirrored Data technology so that data transmission costs can be effectively cut down, otherwise basic version is enabled.</li> |
---|
| 194 | </ul> |
---|
| 195 | </li> |
---|
| 196 | </ol> |
---|
| 197 | |
---|
| 198 | |
---|
| 199 | <h2>Environment Vars</h2> |
---|
| 200 | |
---|
| 201 | <p>Please add following vars to you <strong>.bashrc</strong> or <strong>.bash_profile</strong> file</p> |
---|
| 202 | |
---|
| 203 | <pre><code>export ANUGA_CUDA=/where_the_anuga-cuda/src |
---|
| 204 | |
---|
| 205 | export $PYTHONPATH=$PYTHONPATH:$ANUGA_CUDA |
---|
| 206 | </code></pre> |
---|
| 207 | |
---|
| 208 | <h2>Basic Code Structure</h2> |
---|
| 209 | |
---|
| 210 | <p><img src="docs/codeStructure.pdf" title="anuga-cuda code structure" alt="Code Structure" /></p> |
---|
| 211 | |
---|
| 212 | <ul> |
---|
| 213 | <li><strong>README</strong> (What you are reading)</li> |
---|
| 214 | <li><strong>docs/</strong> Documentation directory |
---|
| 215 | |
---|
| 216 | <ul> |
---|
| 217 | <li><strong>codeStructure.pdf</strong> The diagram above</li> |
---|
| 218 | <li><strong>Evolve workflow.pdf</strong> The overall workflow of the evolve procedure. This includes all the function dependency and function interfaces, which is helpful to understand the evolve procedure of ANUGA</li> |
---|
| 219 | <li><strong>device_spe/</strong> Some device specifications of our working station</li> |
---|
| 220 | <li><strong>profiling/</strong> Profiling results |
---|
| 221 | |
---|
| 222 | <ul> |
---|
| 223 | <li><strong>CUDA/</strong> All the profiling results on CUDA implementation</li> |
---|
| 224 | </ul> |
---|
| 225 | </li> |
---|
| 226 | <li><strong>sphinx/</strong> The <a href="http://sphinx-doc.org/">Sphinx</a> generated documentation |
---|
| 227 | |
---|
| 228 | <ul> |
---|
| 229 | <li><strong>source/</strong> The source files for Sphinx based documents</li> |
---|
| 230 | <li><strong>build/</strong> The generated documents |
---|
| 231 | |
---|
| 232 | <ul> |
---|
| 233 | <li><strong>html/</strong> HTML version documentation</li> |
---|
| 234 | </ul> |
---|
| 235 | </li> |
---|
| 236 | </ul> |
---|
| 237 | </li> |
---|
| 238 | </ul> |
---|
| 239 | </li> |
---|
| 240 | <li><strong>src/</strong> Source code directory |
---|
| 241 | |
---|
| 242 | <ul> |
---|
| 243 | <li><strong>anuga_cuda/</strong> The CUDA implementation |
---|
| 244 | |
---|
| 245 | <ul> |
---|
| 246 | <li><strong>config.py</strong> Detail configuration for the CUDA implementation, including the path for all the kernel functions, optimal CUDA thread block configuration, etc.</li> |
---|
| 247 | <li><strong>gpu_domain_advanced.py</strong> Python Class for CUDA implementation in advanced version</li> |
---|
| 248 | <li><strong>gpu_domain_basic.py</strong> Python Class for CUDA implementation in basic version</li> |
---|
| 249 | </ul> |
---|
| 250 | </li> |
---|
| 251 | <li><strong>anuga_HMPP/</strong> The OpenHMPP implementation |
---|
| 252 | |
---|
| 253 | <ul> |
---|
| 254 | <li><strong>Makefile</strong> The Makefile</li> |
---|
| 255 | <li><strong>hmpp_dimain.py</strong> Python Class for OpenHMPP implementation</li> |
---|
| 256 | <li><strong>hmpp_python_glue.c</strong> The Python/C API to set up communication between Python ANUGA and OpenHMPP.</li> |
---|
| 257 | <li><strong>sw_domain.h</strong> The C Struct type <strong>domain</strong> used in C implementation to access mesh information generated in Python ANUGA</li> |
---|
| 258 | <li><strong>sw_domain_fun.h</strong> Connect C Struct type <strong>domain</strong> to all mesh information</li> |
---|
| 259 | <li><strong>hmpp_fun.h</strong> All function declarations.</li> |
---|
| 260 | <li><strong>evolve.c</strong> The evolve procedure</li> |
---|
| 261 | </ul> |
---|
| 262 | </li> |
---|
| 263 | <li><strong>scripts/</strong> Some useful bash script</li> |
---|
| 264 | <li><strong>utilities/</strong> Utilities for sorting mesh information, checking results, etc.</li> |
---|
| 265 | </ul> |
---|
| 266 | </li> |
---|
| 267 | <li><strong>test/</strong> Testing cases |
---|
| 268 | |
---|
| 269 | <ul> |
---|
| 270 | <li><strong>CUDA/</strong> Testing cases for CUDA implementation |
---|
| 271 | |
---|
| 272 | <ul> |
---|
| 273 | <li><strong>merimbula/</strong> Merimbula testing case directory |
---|
| 274 | |
---|
| 275 | <ul> |
---|
| 276 | <li><strong>merimbula.py</strong> Merimbula testing case</li> |
---|
| 277 | </ul> |
---|
| 278 | </li> |
---|
| 279 | </ul> |
---|
| 280 | </li> |
---|
| 281 | <li><strong>OpenHMPP/</strong> Testing cases for OpenHMPP implementation |
---|
| 282 | |
---|
| 283 | <ul> |
---|
| 284 | <li><strong>merimbula.py</strong> Merimbula testing case</li> |
---|
| 285 | </ul> |
---|
| 286 | </li> |
---|
| 287 | </ul> |
---|
| 288 | </li> |
---|
| 289 | </ul> |
---|
| 290 | |
---|
| 291 | |
---|
| 292 | <h2>Examples</h2> |
---|
| 293 | |
---|
| 294 | <p>Running Merimbula model with CUDA implementation.</p> |
---|
| 295 | |
---|
| 296 | <p><code>$ python merimbula.py -gpu</code></p> |
---|
| 297 | |
---|
| 298 | <p>With pair-testing.</p> |
---|
| 299 | |
---|
| 300 | <p><code>$ python merimbula.py -gpu -test</code></p> |
---|
| 301 | |
---|
| 302 | <p>With rearranged mesh information.</p> |
---|
| 303 | |
---|
| 304 | <p><code>$ python merimbula.py -gpu -rg</code></p> |
---|
| 305 | |
---|
| 306 | <h2>Author</h2> |
---|
| 307 | |
---|
| 308 | <p>Mail to <a href="wengcsyz@gmail.com">Zhe Weng (John)</a></p> |
---|
| 309 | </body> |
---|
| 310 | </html> |
---|