Context Navigation

utility.py

Last change on this file was 9017, checked in by steve, 11 years ago
Adding in Zhe (John) Weng's anuga_cuda code as obtained from googlecode https://code.google.com/p/anuga-cuda
File size: 10.6 KB

Line
1	#!/usr/bin/env python
2
3	def get_kernel_function_info(a, W1=0, W2=1, W3=1):
4	"""Show kernel information
5
6	Including
7	1. max #threads per block,
8	2. active warps per MP,
9	3. thread block per MP,
10	4. usage of shared memory,
11	5. const memory ,
12	6. local memory
13	7. registers
14	8. hardware occupancy
15	9. limitation of the hardware occupancy
16	"""
17
18	import pycuda.tools as tl
19	import pycuda.driver as dri
20	dev = dri.Device(0)
21	td = tl.DeviceData()
22	if not W1:
23	W1 = a.max_threads_per_block
24	to = tl.OccupancyRecord(td, W1W2W3, a.shared_size_bytes, a.num_regs)
25
26	print "***************************************"
27	print " Function Info "
28	print " -> max threads per block: %d / %d / %d" % \
29	(a.max_threads_per_block,
30	dev.max_threads_per_block,
31	dev.max_threads_per_multiprocessor)
32	print " -> shared mem : %d / %d" % (a.shared_size_bytes,
33	td.shared_memory)
34	print " -> const mem : %d" % a.const_size_bytes
35	print " -> local mem : %d" % a.local_size_bytes
36	print " -> register : %d / %d" % (a.num_regs, td.registers)
37	print " -> thread block per MP %d / %d" % \
38	(to.tb_per_mp, td.thread_blocks_per_mp)
39	print " -> warps per MP %d / %d" % (to.warps_per_mp, td.warps_per_mp)
40	print " -> occupancy %f" % to.occupancy
41	print " -> limitation %s" % to.limited_by
42	print " Block size : %dx%dx%d" % (W1, W2, W3)
43	print "***************************************"
44
45
46
47
48	def get_sourceModule(k_dir, k_name, rearranged_domain=False):
49	"""Compile kernel code and return the PyCUDA function object"""
50
51	from pycuda.compiler import SourceModule
52	from anuga_cuda import archM, codeM
53
54	if rearranged_domain:
55	defince_macro = "#define REARRANGED_DOMAIN\n"
56	else:
57	defince_macro = ""
58	return SourceModule(
59	defince_macro + open( k_dir + k_name, "r").read(),
60	arch = 'compute_20',
61	code = 'sm_20',
62	options =['-use_fast_math', '--compiler-options', '-O3'],
63	include_dirs=[ k_dir ]
64	)
65
66
67
68	def get_page_locked_array(a):
69	"""Replace the pageable array to page-locked array"""
70
71	import pycuda.driver as drv
72
73	temp_page_lock_p = drv.pagelocked_zeros_like(a,
74	mem_flags=drv.host_alloc_flags.DEVICEMAP)
75	if len(a.shape) == 1:
76	temp_page_lock_p[:] = a
77	else:
78	temp_page_lock_p[:, :] = a
79	assert numpy.allclose(a, temp_page_lock_p)
80	return temp_page_lock_p
81
82
83	def get_device_array(a):
84	"""Allocate device memory"""
85
86	import pycuda.driver as drv
87
88	return drv.mem_alloc(a.nbytes)
89
90
91
92	def asy_cpy(a, a_gpu, auto_init_context= True):
93	"""Data transfer from host to device.
94
95	Asynchronous will be enabled when auto_init_context is True, otherwise
96	use normal transfer.
97	"""
98
99	import pycuda.driver as drv
100
101	if auto_init_context:
102	strm = drv.Stream()
103	drv.memcpy_htod_async(a_gpu, a, strm)
104
105	# Test correctness
106	#ctx.synchronize()
107	#b= numpy.zeros_like(a, a.dtype)
108	#drv.memcpy_dtoh(b, a_gpu)
109	#print numpy.allclose(a, b)
110	return strm
111	else:
112	drv.memcpy_htod(a_gpu, a)
113
114
115
116	def cpy_back(a, a_gpu, auto_init_context=True):
117	"""Data transfer from device to host.
118
119	Asynchronous will be enabled when auto_init_context is True, otherwise
120	use normal transfer.
121	"""
122
123	import pycuda.driver as drv
124
125	if auto_init_context:
126	strm = drv.Stream()
127	drv.memcpy_dtoh_async(a, a_gpu, strm)
128	return strm
129	else:
130	drv.memcpy_dtoh(a, a_gpu)
131
132
133
134	def cpy_back_and_cmp(a, b, value_type, gpu = True, rg = False):
135	"""Download mesh information and check result."""
136
137	import numpy
138
139	if gpu:
140	if value_type is "centroid_values":
141	cpy_back(a.centroid_values, a.centroid_values_gpu)
142	return numpy.allclose(a.centroid_values, b.centroid_values)
143	elif value_type is "vertex_values":
144	cpy_back(a.vertex_values, a.vertex_values_gpu)
145	if rg:
146	return check_rearranged_array(
147	b.vertex_values, a.vertex_values, 3)
148	return numpy.allclose(a.vertex_values, b.vertex_values)
149	elif value_type is "boundary_values":
150	cpy_back(a.boundary_values, a.boundary_values_gpu)
151	return numpy.allclose(a.boundary_values, b.boundary_values)
152	elif value_type is "edge_values":
153	cpy_back(a.edge_values, a.edge_values_gpu)
154	if rg:
155	return check_rearranged_array(
156	b.edge_values, a.edge_values, 3)
157	return numpy.allclose(a.edge_values, b.edge_values)
158	elif value_type is "x_gradient_values":
159	cpy_back(a.x_gradient, a.x_gradient_gpu)
160	return numpy.allclose(a.x_gradient, b.x_gradient)
161	elif value_type is "y_gradient_values":
162	cpy_back(a.y_gradient, a.y_gradient_gpu)
163	return numpy.allclose(a.y_gradient, b.y_gradient)
164	elif value_type is "explicit_update":
165	cpy_back(a.explicit_update, a.explicit_update_gpu)
166	return numpy.allclose(a.explicit_update, b.explicit_update)
167	elif value_type is "semi_implicit_update":
168	cpy_back(a.semi_implicit_update, a.semi_implicit_update_gpu)
169	return numpy.allclose(a.semi_implicit_update,
170	b.semi_implicit_update)
171	elif value_type is "areas":
172	cpy_back(a.areas, a.areas_gpu)
173	return numpy.allclose(a.areas, b.areas)
174	elif value_type is "surrogate_neighbours":
175	cpy_back(a.surrogate_neighbours, a.surrogate_neighbours_gpu)
176	if rg:
177	return check_rearranged_array(
178	b.surrogate_neighbours, a.surrogate_neighbours, 3)
179	return numpy.allclose(a.surrogate_neighbours, b.surrogate_neighbours)
180	elif value_type is "number_of_boundaries":
181	cpy_back(a.number_of_boundaries, a.number_of_boundaries_gpu)
182	return numpy.allclose(a.number_of_boundaries,
183	b.number_of_boundaries)
184	elif value_type is "centroid_coordinates":
185	cpy_back(a.centroid_coordinates, a.centroid_coordinates_gpu)
186	if rg:
187	return check_rearranged_array(
188	b.centroid_coordinates, a.centroid_coordinates, 2)
189	return numpy.allclose(a.centroid_coordinates,
190	b.centroid_coordinates)
191	elif value_type is "vertex_coordinates":
192	cpy_back(a.vertex_coordinates, a.vertex_coordinates_gpu)
193	if rg:
194	return check_rearranged_array(
195	b.vertex_coordinates, a.vertex_coordinates, 32)
196	return numpy.allclose(a.vertex_coordinates,
197	b.vertex_coordinates)
198	elif value_type is "edge_coordinates":
199	cpy_back(a.edge_coordinates, a.edge_coordinates_gpu)
200	if rg:
201	return check_rearranged_array(
202	b.edge_coordinates, a.edge_coordinates, 32)
203	return numpy.allclose(a.edge_coordinates,
204	b.edge_coordinates)
205	else:
206	raise Exception('Unknown value_type %s' % value_type)
207	else:
208	if value_type is "centroid_values":
209	return numpy.allclose(a.centroid_values, b.centroid_values)
210	elif value_type is "vertex_values":
211	return numpy.allclose(a.vertex_values, b.vertex_values)
212	elif value_type is "boundary_values":
213	return numpy.allclose(a.boundary_values, b.boundary_values)
214	elif value_type is "edge_values":
215	return numpy.allclose(a.edge_values, b.edge_values)
216	elif value_type is "x_gradient_values":
217	return numpy.allclose(a.x_gradient, b.x_gradient)
218	elif value_type is "y_gradient_values":
219	return numpy.allclose(a.y_gradient, b.y_gradient)
220	elif value_type is "explicit_update":
221	return numpy.allclose(a.explicit_update, b.explicit_update)
222	elif value_type is "semi_implicit_update":
223	return numpy.allclose(
224	a.semi_implicit_update, b.semi_implicit_update)
225	elif value_type is "vertex_coordinates":
226	return numpy.allclose(
227	a.vertex_coordinates, b.vertex_coordinates)
228	elif value_type is "areas":
229	return numpy.allclose(a.areas, b.areas)
230	elif value_type is "surrogate_neighbours":
231	return numpy.allclose(
232	a.surrogate_neighbours, b.surrogate_neighbours)
233	elif value_type is "number_of_boundaries":
234	return numpy.allclose(
235	a.number_of_boundaries, b.number_of_boundaries)
236	elif value_type is "centroid_coordinates":
237	return numpy.allclose(
238	a.centroid_coordinates, b.centroid_coordinates)
239	elif value_type is "vertex_coordinates":
240	return numpy.allclose(
241	a.vertex_coordinates, b.vertex_coordinates)
242	else:
243	raise Exception('Unknown value_type %s' % value_type)
244
245
246
247	def number_domain_method(domain):
248	"""Convert mesh information stored in text string to ingeger.
249
250	This is used in hmpp_pyhton_glue.
251
252	Return value:
253	(compute_fluxes_method, flow_algorithm, timestepping_method)
254	"""
255
256	if domain.timestepping_method == 'euler':
257	timestepping_method = 1
258	elif domain.timestepping_method == 'rk2':
259	timestepping_method = 2
260	elif domain.timestepping_method == 'rk3':
261	timestepping_method = 3
262	else:
263	timestepping_method = 4
264	print " The timestepping_method is '%s' %d" % (domain.timestepping_method, timestepping_method)
265
266
267	if domain.flow_algorithm == 'tsunami':
268	flow_algorithm = 1
269	elif domain.flow_algorithm == 'yusuke':
270	flow_algorithm = 2
271	else:
272	flow_algorithm = 3
273	print " The flow_algorithm us '%s' %d" % (domain.flow_algorithm, flow_algorithm)
274
275
276	if domain.compute_fluxes_method == 'original':
277	compute_fluxes_method = 0
278	elif domain.compute_fluxes_method == 'wb_1':
279	compute_fluxes_method = 1
280	elif domain.compute_fluxes_method == 'wb_2':
281	compute_fluxes_method = 2
282	elif domain.compute_fluxes_method == 'wb_3':
283	compute_fluxes_method = 3
284	elif domain.compute_fluxes_method == 'tsunami':
285	compute_fluxes_method = 4
286	else:
287	compute_fluxes_method = 5
288	print " The compute_fluxes_method is '%s' %d" % (domain.compute_fluxes_method, compute_fluxes_method)
289
290
291	return (compute_fluxes_method, flow_algorithm, timestepping_method)
292
293
294
295

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/anuga_work/anuga_cuda/src/utilities/utility.py

Download in other formats: