Commit 53ac98b1 authored by bbguimaraes's avatar bbguimaraes
Browse files

hcwocl: import original code

parent e05f0885
......@@ -5,3 +5,5 @@ Repository for exercises from books/websites I've read.
- sicp: "Structure and Interpretation of Computer Programs" by Harold Abelson,
Gerald Jay Sussman, and Julie Sussman (https://mitpress.mit.edu/sicp, ISBN
0262510871).
- hcwocl: "Heterogeneous Computing with OpenCL" by Benedict R. Gaster, Lee
Howes, David R. Kaeli, Perhaad Mistry, and Dana Schaa (ISBN 9780123877666).
/*
2011 Takahiro Harada
*/
#ifndef CL_ERROR_H
#define CL_ERROR_H
#ifdef DX11RENDER
#include <windows.h>
#endif
#ifdef _DEBUG
#include <assert.h>
#define CLASSERT(x) if(!(x)){__debugbreak(); }
#else
#define CLASSERT(x) if(x){}
#endif
#ifdef _DEBUG
#define COMPILE_TIME_ASSERT(x) {int compileTimeAssertFailed[x]; compileTimeAssertFailed[0];}
#else
#define COMPILE_TIME_ASSERT(x)
#endif
#ifdef _DEBUG
#include <stdarg.h>
#include <stdio.h>
__inline
void debugPrintf(const char *fmt, ...)
{
va_list arg;
va_start(arg, fmt);
#ifdef DX11RENDER
char buf[256];
vsprintf_s( buf, 256, fmt, arg );
#ifdef UNICODE
WCHAR wbuf[256];
int sizeWide = MultiByteToWideChar(0,0,buf,-1,wbuf,0);
MultiByteToWideChar(0,0,buf,-1,wbuf,sizeWide);
// swprintf_s( wbuf, 256, L"%s", buf );
OutputDebugString( wbuf );
#else
OutputDebugString( buf );
#endif
#else
vprintf(fmt, arg);
#endif
va_end(arg);
}
#else
__inline
void debugPrintf(const char *fmt, ...)
{
}
#endif
#define WARN(msg) debugPrintf("WARNING: %s\n", msg);
#endif
/*
2011 Takahiro Harada
*/
#ifndef SYNC_OBJ_H
#define SYNC_OBJ_H
#include <Windows.h>
template<typename T>
T atomAdd(const T* ptr, int value)
{
return (T)InterlockedExchangeAdd((LONG*)ptr, value);
}
template<typename T>
T atomCmpxhg(const T* ptr, int cmp, int value)
{
return (T)InterlockedCompareExchange((LONG*)ptr, value, cmp);
}
#endif
/*
2011 Takahiro Harada
*/
#include <Common/Base/ThreadPool.h>
ThreadPool::ThreadPool(int nThreads)
: m_nThreads( nThreads ), m_deleteSignal( false ), m_taskHead( 0 ), m_taskTail( 0 )
{
InitializeCriticalSection(&m_cs);
m_threads = new Thread[nThreads];
for(int i=0; i<nThreads; i++)
m_threads[i].init(i, this);
resetThreadTimer();
}
ThreadPool::~ThreadPool()
{
m_deleteSignal = true;
start();
wait();
DeleteCriticalSection(&m_cs);
delete [] m_threads;
m_threads = 0;
m_nThreads = 0;
m_taskHead = 0;
m_taskTail = 0;
}
void ThreadPool::resetThreadTimer()
{
QueryPerformanceFrequency( &m_frequency );
QueryPerformanceCounter( &m_startTime );
for(int i=0; i<m_nThreads; i++)
{
m_threads[i].timestampReset();
}
}
void ThreadPool::start(bool resetTimestamp)
{
if( resetTimestamp )
resetThreadTimer();
for(int i=0; i<m_nThreads; i++)
{
m_threads[i].start();
}
}
void ThreadPool::wait()
{
HANDLE* finSignals = new HANDLE[m_nThreads];
for(int i=0; i<m_nThreads; i++) finSignals[i] = m_threads[i].m_finSignal;
WaitForMultipleObjects( m_nThreads, finSignals, true, INFINITE );
for(int i=0; i<m_nThreads; i++)
{
ResetEvent( m_threads[i].m_finSignal );
}
delete [] finSignals;
}
void ThreadPool::pushBack(Task* task)
{
EnterCriticalSection(&m_cs);
CLASSERT( m_taskHead != ((m_taskTail+1)&TASK_MASK) ); // full
m_tasks[m_taskTail] = task;
m_taskTail = (m_taskTail+1) & TASK_MASK;
LeaveCriticalSection(&m_cs);
}
ThreadPool::Task* ThreadPool::pop()
{
Task* task = NULL;
EnterCriticalSection(&m_cs);
if( m_taskHead != m_taskTail )
{
task = m_tasks[ m_taskHead ];
m_taskHead = (m_taskHead+1)&TASK_MASK;
}
LeaveCriticalSection(&m_cs);
return task;
}
ThreadPool::Thread::Thread()
{
}
void ThreadPool::Thread::init(int idx, ThreadPool* threadPool)
{
m_args.m_threadPool = threadPool;
m_args.m_idx = idx;
_beginthreadex(NULL, 0, run, &m_args, 0, &m_threadIdx);
m_runSignal = CreateEvent(NULL, TRUE, FALSE, NULL);
m_finSignal = CreateEvent(NULL, TRUE, FALSE, NULL);
}
ThreadPool::Thread::~Thread()
{
}
void ThreadPool::Thread::start()
{
SetEvent( m_runSignal );
}
void ThreadPool::Thread::timestampReset()
{
m_nTimestamps = 0;
}
u32 __stdcall ThreadPool::Thread::run(void* args)
{
ThreadArgs* tArgs = (ThreadArgs*)args;
ThreadPool* threadPool = tArgs->m_threadPool;
int idx = tArgs->m_idx;
Thread* th = &threadPool->m_threads[idx];
volatile bool& deleteSignal = threadPool->m_deleteSignal;
while(!deleteSignal)
{
WaitForSingleObject(th->m_runSignal, INFINITE);
ResetEvent( th->m_runSignal );
if( deleteSignal )
{
break;
}
{
Task* task = threadPool->pop();
while( task )
{
LARGE_INTEGER s,e;
QueryPerformanceCounter( &s );
task->run( idx );
QueryPerformanceCounter( &e );
float start, end;
start = (float)(1000*(s.QuadPart - threadPool->m_startTime.QuadPart))/threadPool->m_frequency.QuadPart;
end = (float)(1000*(e.QuadPart - threadPool->m_startTime.QuadPart))/threadPool->m_frequency.QuadPart;
th->pushBackTimeStamp( task->getType(), start, end );
delete task;
task = threadPool->pop();
}
}
SetEvent( th->m_finSignal );
}
SetEvent( th->m_finSignal );
_endthreadex(0);
return 0;
}
void ThreadPool::Thread::pushBackTimeStamp(u16 type, float s, float e)
{
if( m_nTimestamps < MAX_TIMESTAMPS-1 )
{
m_timestamps[ m_nTimestamps ].m_type = type;
m_timestamps[ m_nTimestamps ].m_start = s;
m_timestamps[ m_nTimestamps ].m_end = e;
m_nTimestamps++;
}
}
/*
2011 Takahiro Harada
*/
#ifndef THREAD_POOL_H
#define THREAD_POOL_H
#pragma warning( disable : 4996 )
#include <windows.h>
#include <process.h>
#include <Common/Math/Math.h>
class ThreadPool
{
public:
struct Task
{
virtual u16 getType() = 0;
virtual void run(int tIdx) = 0;
};
ThreadPool(int nThreads);
~ThreadPool();
void resetThreadTimer();
void start(bool resetTimestamp = true);
void wait();
void pushBack(Task* task);
Task* pop();
public:
struct ThreadArgs
{
ThreadPool* m_threadPool;
int m_idx;
};
class Thread
{
public:
Thread();
~Thread();
void init(int idx, ThreadPool* threadPool);
void start();
void timestampReset();
static
u32 __stdcall run(void* args);
struct Timestamp
{
float m_start;
float m_end;
u16 m_type;
};
void pushBackTimeStamp(u16 type, float s, float e);
public:
u32 m_threadIdx;
HANDLE m_runSignal;
HANDLE m_finSignal;
ThreadArgs m_args;
enum
{
MAX_TIMESTAMPS = 256,
};
Timestamp m_timestamps[MAX_TIMESTAMPS];
int m_nTimestamps;
};
int m_nThreads;
bool m_deleteSignal;
CRITICAL_SECTION m_cs;
Thread* m_threads;
LARGE_INTEGER m_startTime;
LARGE_INTEGER m_frequency;
enum
{
MAX_TASKS = 256,
TASK_MASK = MAX_TASKS-1,
};
Task* m_tasks[MAX_TASKS];
int m_taskHead;
int m_taskTail;
};
#endif
/*
2011 Takahiro Harada
*/
#ifndef DEVICE_DRAW_H
#define DEVICE_DRAW_H
#include <Common/Math/Math.h>
#include <Common/DeviceUtils/DeviceUtils.h>
#include <Common/Geometry/Aabb.h>
#include <glut.h>
#include <Common/DeviceUtils/DeviceDrawGL.inl>
#define pxDrawLine(a,b,color) drawLine(a, b, color)
#define pxDrawLineList(vtx,idx,nVtx,nIdx,color) drawLineList(vtx,idx,nVtx,nIdx,color)
#define pxDrawPoint(a,color) drawPoint(a, color)
#define pxDrawPointList(vtx,color,nVtx) drawPointList(vtx,color,nVtx);
#define pxDrawPointSprite(vtx,color,radius, nVtx) drawPointList(vtx,color,nVtx)
#define pxDrawPointListTransformed(vtx,color,nVtx,translation,quaternion) drawPointListTransformed(vtx,color,nVtx,translation,quaternion);
#define pxDrawTriangle(a,b,c,color) drawTriangle(a,b,c,color)
#define pxDrawTriangleList(vtx,idx,nVtx,nIdx,color) drawTriangleList(vtx,idx,nVtx,nIdx,color)
#define pxDrawTriangleList1(vtx,idx,nVtx,nIdx,color) drawTriangleList1(vtx,idx,nVtx,nIdx,color)
#define pxDrawTriangleListNormal(vtx,vtxNormal,idx,nVtx,nIdx,color) drawTriangleList(vtx,vtxNormal,idx,nVtx,nIdx,color)
#define pxDrawTriangleListTessellated(vtx,vtxNormal,idx,nVtx,nIdx,color,translation,quaternion,vtxShader,hShader,dShader,pShader) drawTriangleListTransformed(vtx,vtxNormal,idx,nVtx,nIdx,color,translation,quaternion)
#define pxDrawTriangleListTransformed(vtx,vtxNormal,idx,nVtx,nIdx,color,translation,quaternion) drawTriangleListTransformed(vtx,vtxNormal,idx,nVtx,nIdx,color,translation,quaternion)
#define pxDrawText(txt,pos) glDraw3DStrings(txt, pos)
#define pxDrawAabb(aabb, c) drawAabb(aabb, c)
#define DevicePSShader int
#define pxCreatePixelShader(deviceData, shaderPath, profile, shaderOut) {shaderOut;}
#define pxDeleteShader(shader) {shader;}
#define pxSetPixelShader(pShader) {pShader;}
#define pxClearDepthStencil glClear( GL_DEPTH_BUFFER_BIT )
#endif
/*
2011 Takahiro Harada
*/
#include <common/Math/Quaternion.h>
#include <common/Math/Matrix3x3.h>
__inline
void glVertexFloat4( const float4& v )
{
glVertex3f( v.x, v.y, v.z );
}
__inline
void drawLine(const float4& a, const float4& b, const float4& color)
{
glColor3fv( (float*)&color );
glBegin(GL_LINES);
glVertexFloat4( a );
glVertexFloat4( b );
glEnd();
}
__inline
void drawLineList(float4* vtx, u32* idx, int nVtx, int nIdx, const float4& color)
{
glColor3fv( (float*)&color );
glBegin(GL_LINES);
for(int i=0; i<nIdx; i++)
{
glVertexFloat4( vtx[idx[i]] );
}
glEnd();
}
__inline
void drawPoint(const float4& a, const float4& color)
{
glColor3fv( (float*)&color );
glBegin(GL_POINTS);
glVertexFloat4( a );
glEnd();
}
__inline
void drawPointList(float4* vtx, const float4* color, int nVtx)
{
glBegin(GL_POINTS);
for(int i=0; i<nVtx; i++)
{
const float4& c = color[i];
glColor4f(c.x, c.y, c.x, 1.f );
glVertexFloat4( vtx[i] );
}
glEnd();
}
__inline
void drawPointListTransformed(const float4* vtx, const float4* color, int nVtx, const float4& translation, const Quaternion& quat)
{
glPushMatrix();
Matrix3x3 rotMat = mtTranspose( qtGetRotationMatrix( quat ) );
float transformMat[16] =
{
rotMat.m_row[0].x, rotMat.m_row[0].y, rotMat.m_row[0].z, 0,
rotMat.m_row[1].x, rotMat.m_row[1].y, rotMat.m_row[1].z, 0,
rotMat.m_row[2].x, rotMat.m_row[2].y, rotMat.m_row[2].z, 0,
translation.x, translation.y, translation.z,1
};
glMultMatrixf( transformMat );
glBegin(GL_POINTS);
for(int i=0; i<nVtx; i++)
{
const float4& c = color[i];
glColor4f(c.x, c.y, c.z, 1);
glVertexFloat4( vtx[i] );
}
glEnd();
glPopMatrix();
}
__inline
void drawTriangle(const float4& a, const float4& b, const float4& c, const float4& color)
{
glColor3fv( (float*)&color );
glBegin(GL_TRIANGLES);
glVertexFloat4( a );
glVertexFloat4( b );
glVertexFloat4( c );
glEnd();
}
__inline
void drawTriangleList(float4* vtx, u32* idx, int nVtx, int nIdx, const float4& color)
{
glColor3fv( (float*)&color );
glBegin(GL_TRIANGLES);
for(int i=0; i<nIdx; i++)
{
glVertexFloat4( vtx[ idx[i] ] );
}
glEnd();
}
__inline
void drawTriangleList1(float4* vtx, u32* idx, int nVtx, int nIdx, const float4* color)
{
glBegin(GL_TRIANGLES);
for(int i=0; i<nIdx; i++)
{
glColor3fv( (float*)&color[ idx[i] ] );
glVertexFloat4( vtx[ idx[i] ] );
}
glEnd();
}
__inline
void drawTriangleList(const float4* vtx, const float4* vtxNormal, u32* idx, int nVtx, int nIdx, const float4& color)
{
glColor3fv( (float*)&color );
glBegin(GL_TRIANGLES);
for(int i=0; i<nIdx; i++)
{
glNormal3f( vtxNormal[idx[i]].x, vtxNormal[idx[i]].y, vtxNormal[idx[i]].z );
glVertexFloat4( vtx[ idx[i] ] );
}
glEnd();
}
__inline
void drawTriangleListTransformed(const float4* vtx, const float4* vtxNormal, u32* idx, int nVtx, int nIdx, const float4& color, const float4& translation, const Quaternion& quat)
{
glPushMatrix();
Matrix3x3 rotMat = mtTranspose( qtGetRotationMatrix( quat ) );
float transformMat[16] =
{
rotMat.m_row[0].x, rotMat.m_row[0].y, rotMat.m_row[0].z, 0,
rotMat.m_row[1].x, rotMat.m_row[1].y, rotMat.m_row[1].z, 0,
rotMat.m_row[2].x, rotMat.m_row[2].y, rotMat.m_row[2].z, 0,
translation.x, translation.y, translation.z,1
};
glMultMatrixf( transformMat );
glColor3fv( (float*)&color );
glBegin(GL_TRIANGLES);
for(int i=0; i<nIdx; i++)
{
glNormal3f( vtxNormal[idx[i]].x, vtxNormal[idx[i]].y, vtxNormal[idx[i]].z );
glVertexFloat4( vtx[ idx[i] ] );
}
glEnd();
glPopMatrix();
}
__inline
void glDraw3DStrings(const char* str, const float4& pos)
{
glRasterPos3f(pos.x, pos.y, pos.z);
for(const char* c = str; *c!='\0'; c++)
glutBitmapCharacter( GLUT_BITMAP_HELVETICA_12, *c );
}
__inline
void glDraw3DStrings(float value, const float4& pos)
{
glDisable(GL_LIGHTING);
char valueChar[128];
sprintf_s(valueChar, "%3.2f", value);
glDraw3DStrings(valueChar, pos);
glEnable(GL_LIGHTING);
}
__inline
void drawAabb(const Aabb& a, const float4& color)
{
}
/*
2011 Takahiro Harada
*/
#ifndef DX11UTILS_H
#define DX11UTILS_H
struct DeviceDataBase
{
enum Type
{
TYPE_CL,
TYPE_DX11,
TYPE_CPU,
};
DeviceDataBase( Type type ) : m_type( type ) {}
virtual ~DeviceDataBase(){}
Type m_type;
};
struct DeviceBufferBase
{
virtual ~DeviceBufferBase(){}
enum Type
{