Der Goopax Compiler GPU-Programmierung in C++
AMD R9 290X: 5.6 TFLOPS (SP MulAdd)
Programmierung ~10000 Threads Entwicklungsumgebungen
Entwicklungsumgebungen CUDA, OpenCL Compiler: kernel GPU Maschinencode Trennung CPU/GPU: CPU: C/C++ GPU: kernel void hello( global int* out) int tid = get_global_id(0); out[tid] = tid;
Hello World #include <goopax.h> using namespace goopax; struct hello_world : public kernel void impl() gpu_ostream out(std::cout); out << "Hello from thread " << global_id() << std::endl; ; int main(int argc, char** argv) easy_env Env(argc, argv); hello_world P; P(); Einbindung als Bibliothek GPU-Kernel als Klasse Parsing durch C++-Compiler Programmierung durch spezielle Datentypen: gpu_int, gpu_float,...
#include <goopax.h> #include <goopax/gl_env.h> #include <complex> using namespace goopax; using std::complex; Mandelbrot struct mandelbrot : public kernel const unsigned int width; const unsigned int height; buffer<float> params; void impl() const resource<float> params(this->params); resource<uint32_t> image; const complex<gpu_float> top_left(params[0], params[1]); const gpu_float scale = params[2]; gpu_for_global(k, 0, width*height) const complex<gpu_float> c = top_left + scale * complex<gpu_float>(gpu_float(k%width), gpu_float(k/width)); complex<gpu_float> z(0, 0); gpu_int escape = 0; gpu_for(i, 0, 256) z = z*z + c; escape.cmov(norm(z) < 4.0, i); image[k] = color(escape); static gpu_uint color(const gpu_int escape)
void impl() const resource<float> params(this->params); resource<uint32_t> image; Mandelbrot const complex<gpu_float> top_left(params[0], params[1]); const gpu_float scale = params[2]; gpu_for_global(k, 0, width*height) const complex<gpu_float> c = top_left + scale * complex<gpu_float>(gpu_float(k%width), gpu_float(k/width)); complex<gpu_float> z(0, 0); gpu_int escape = 0; gpu_for(i, 0, 256) z = z*z + c; escape.cmov(norm(z) < 4.0, i); image[k] = color(escape); static gpu_uint color(const gpu_int escape)...
void impl() const resource<float> params(this->params); resource<uint32_t> image; Mandelbrot const complex<gpu_float> top_left(params[0], params[1]); const gpu_float scale = params[2]; gpu_for_global(k, 0, width*height) const complex<gpu_float> c = top_left + scale * complex<gpu_float>(gpu_float(k%width), gpu_float(k/width)); complex<gpu_float> z(0, 0); gpu_int escape = 0; gpu_for(i, 0, 256) z = z*z + c; escape.cmov(norm(z) < 4.0, i); image[k] = color(escape); static gpu_uint color(const gpu_int escape)...
void impl() const resource<float> params(this->params); resource<uint32_t> image; Mandelbrot const complex<gpu_float> top_left(params[0], params[1]); const gpu_float scale = params[2]; gpu_for_global(k, 0, width*height) const complex<gpu_float> c = top_left + scale * complex<gpu_float>(gpu_float(k%width), gpu_float(k/width)); complex<gpu_float> z(0, 0); gpu_int escape = 0; gpu_for(i, 0, 256) z = z*z + c; escape.cmov(norm(z) < 4.0, i); image[k] = color(escape); static gpu_uint color(const gpu_int escape)...
void impl() const resource<float> params(this->params); resource<uint32_t> image; Mandelbrot const complex<gpu_float> top_left(params[0], params[1]); const gpu_float scale = params[2]; gpu_for_global(k, 0, width*height) const complex<gpu_float> c = top_left + scale * complex<gpu_float>(gpu_float(k%width), gpu_float(k/width)); complex<gpu_float> z(0, 0); gpu_int escape = 0; gpu_for(i, 0, 256) z = z*z + c; escape.cmov(norm(z) < 4.0, i); image[k] = color(escape); static gpu_uint color(const gpu_int escape)...
void impl() const resource<float> params(this->params); resource<uint32_t> image; Mandelbrot const complex<gpu_float> top_left(params[0], params[1]); const gpu_float scale = params[2]; gpu_for_global(k, 0, width*height) const complex<gpu_float> c = top_left + scale * complex<gpu_float>(gpu_float(k%width), gpu_float(k/width)); complex<gpu_float> z(0, 0); gpu_int escape = 0; gpu_for(i, 0, 256) z = z*z + c; escape.cmov(norm(z) < 4.0, i); image[k] = color(escape); static gpu_uint color(const gpu_int escape)...
Anwendungsgebiete HPC Simulationen Mathematik Maschinelles Lernen Bitcoins Grafik Computerspiele Virtual Reality Videoschnitt
Goopax: Vorteile OO-Programmierung in C++ CPU/GPU Code bestens integriert Zusätzliche Optimierungen durch maßgeschneiderte Programme Automatisierte Fehlersuche: Array overflows uninitialisierte Variablen Race conditions