diff --git a/hart/inferno-hart-cpu/src/kdtree.cpp b/hart/inferno-hart-cpu/src/kdtree.cpp new file mode 100644 index 0000000..b74d238 --- /dev/null +++ b/hart/inferno-hart-cpu/src/kdtree.cpp @@ -0,0 +1,5 @@ +#include "kdtree.hpp" + +#include +#include + diff --git a/hart/inferno-hart-cpu/src/kdtree.hpp b/hart/inferno-hart-cpu/src/kdtree.hpp index 40a0f01..dc98bb7 100644 --- a/hart/inferno-hart-cpu/src/kdtree.hpp +++ b/hart/inferno-hart-cpu/src/kdtree.hpp @@ -1,203 +1,135 @@ -#include - -#include +#pragma once #include #include #include -#include -#include -#include + +#include + +#include +#include using namespace inferno; -inline bool AABBIntersection(glm::vec3 min, glm::vec3 max, const Ray* r) +struct Triangle { - float tmin = 0.0, tmax = INFINITY; - glm::vec3 invDir = 1.0f / r->Direction; + unsigned int indices[3]; +}; - for (int i = 0; i < 3; ++i) { - float t1 = (min[i] - r->Origin[i]) * invDir[i]; - float t2 = (max[i] - r->Origin[i]) * invDir[i]; +struct AABB +{ + glm::vec3 min; + glm::vec3 max; +}; - tmin = std::max(tmin, std::min(t1, t2)); - tmax = std::min(tmax, std::max(t1, t2)); +struct KDNode +{ + AABB aabb; + unsigned int splitAxis; + std::vector triangles; + KDNode *left; + KDNode *right; +}; + +inline AABB createAABB(const std::vector& vertices, const std::vector& triangles) +{ + glm::vec3 min(std::numeric_limits::max()); + glm::vec3 max(std::numeric_limits::min()); + + for (const auto& triangle : triangles) + { + for (int i = 0; i < 3; ++i) + { + min = glm::min(min, vertices[triangle.indices[i]]); + max = glm::max(max, vertices[triangle.indices[i]]); + } } - bool hit = (tmin <= tmax); - //if (hit) { - //std::cout << "Ray hits AABB: " << tmin << ", " << tmax << std::endl; - //} else { - //std::cout << "Ray misses AABB" << std::endl; - //} - return hit; + return { min, max }; } -struct KDNode { - uint32_t TriIdx; - glm::vec3 MinBounds; - glm::vec3 MaxBounds; - KDNode* LeftChild; - KDNode* RightChild; +inline bool intersectAABB(const AABB& aabb, const glm::vec3& origin, const glm::vec3& direction, float& tNear, float& tFar) +{ + for (int i = 0; i < 3; ++i) + { + float invDirection = 1.0f / direction[i]; + float t0 = (aabb.min[i] - origin[i]) * invDirection; + float t1 = (aabb.max[i] - origin[i]) * invDirection; - KDNode(uint32_t triIdx, glm::vec3 minBounds, glm::vec3 maxBounds) - : TriIdx(triIdx), MinBounds(minBounds), MaxBounds(maxBounds), LeftChild(nullptr), RightChild(nullptr) {} - - ~KDNode() { - delete LeftChild; - delete RightChild; - } - - void setLeftChild(KDNode* child) { - LeftChild = child; - updateBounds(); - } - - void setRightChild(KDNode* child) { - RightChild = child; - updateBounds(); - } - - void updateBounds() { - if (LeftChild && RightChild) { - MinBounds = glm::min(LeftChild->MinBounds, RightChild->MinBounds); - MaxBounds = glm::max(LeftChild->MaxBounds, RightChild->MaxBounds); - } - } -}; - -class KDTree { - public: - KDTree(float* vertices, uint32_t* indices, std::vector& indicesToProcess, uint32_t startIdx, uint32_t endIdx, uint32_t depthLimit) - : mVertices(vertices), mIndices(indices), mDepthLimit(depthLimit), mRoot(nullptr) + if (invDirection < 0.0f) { - if (indicesToProcess.size() == 0) { - return; - } - - mRoot = buildNode(indicesToProcess, startIdx, endIdx, 0); + std::swap(t0, t1); } - ~KDTree() { - delete mRoot; + tNear = t0 > tNear ? t0 : tNear; + tFar = t1 < tFar ? t1 : tFar; + + if (tNear > tFar) + { + return false; } + } - void intersect(const Ray* ray, std::vector& outIndices) { - intersect(mRoot, ray, outIndices); - } + return true; +} - KDNode* getRoot() const { - return mRoot; - } +inline KDNode* buildKDTree(const std::vector& vertices, std::vector& triangles, unsigned int depth = 0) +{ + if (triangles.empty()) + { + return nullptr; + } - void printTree(KDNode* node, int depth) const { - if (!node) { - return; - } + unsigned int splitAxis = depth % 3; + std::sort(triangles.begin(), triangles.end(), [&](const Triangle& a, const Triangle& b) { + return vertices[a.indices[0]][splitAxis] < vertices[b.indices[0]][splitAxis]; + }); - for (int i = 0; i < depth; i++) { - std::cout << "-"; - } - std::cout << " " << glm::to_string(node->MinBounds) << " " << glm::to_string(node->MaxBounds) << ": " << node->TriIdx << "\n"; + size_t midIdx = triangles.size() / 2; + KDNode* node = new KDNode; + node->splitAxis = splitAxis; + node->aabb = createAABB(vertices, triangles); + node->triangles.push_back(triangles[midIdx]); - printTree(node->LeftChild, depth + 1); - printTree(node->RightChild, depth + 1); - } + std::vector leftTriangles(triangles.begin(), triangles.begin() + midIdx); + std::vector rightTriangles(triangles.begin() + midIdx + 1, triangles.end()); - private: - KDNode* buildNode(std::vector& indicesToProcess, uint32_t startIdx, uint32_t endIdx, uint32_t depth) { - if (startIdx >= endIdx || depth >= mDepthLimit) { - return nullptr; - } + node->left = buildKDTree(vertices, leftTriangles, depth + 1); + node->right = buildKDTree(vertices, rightTriangles, depth + 1); - if (endIdx - startIdx == 1) { - return new KDNode(indicesToProcess[startIdx], getVertexBounds(mIndices[indicesToProcess[startIdx] * 3]), getVertexBounds(mIndices[indicesToProcess[startIdx] * 3])); - } + return node; +} - glm::vec3 minBounds(INFINITY), maxBounds(-INFINITY); - for (uint32_t i = startIdx; i < endIdx; ++i) { - const glm::vec3& v0 = getVertexBounds(mIndices[indicesToProcess[i] * 3]); - const glm::vec3& v1 = getVertexBounds(mIndices[indicesToProcess[i] * 3 + 1]); - const glm::vec3& v2 = getVertexBounds(mIndices[indicesToProcess[i] * 3 + 2]); - minBounds = glm::min(minBounds, glm::min(v0, glm::min(v1, v2))); - maxBounds = glm::max(maxBounds, glm::max(v0, glm::max(v1, v2))); - } +inline void intersectKDTree(KDNode* node, const glm::vec3& origin, const glm::vec3& direction, std::vector& hitCandidates) { + if (!node) + { + return; + } - uint32_t axis = depth % 3; - uint32_t median = partition(indicesToProcess, startIdx, endIdx, axis); - bool isPartitionValid = checkPartition(indicesToProcess, startIdx, endIdx, axis, median); - if (!isPartitionValid) { - std::cout << "Partition failed!" << std::endl; - } + float tNear = -std::numeric_limits::max(); + float tFar = std::numeric_limits::max(); + if (!intersectAABB(node->aabb, origin, direction, tNear, tFar)) + { + return; + } - KDNode* node = new KDNode(0, minBounds, maxBounds); + if (!node->left && !node->right) + { + hitCandidates.insert(hitCandidates.end(), node->triangles.begin(), node->triangles.end()); + return; + } - std::vector leftIndices(indicesToProcess.begin() + startIdx, indicesToProcess.begin() + median); - std::vector rightIndices(indicesToProcess.begin() + median, indicesToProcess.begin() + endIdx); + intersectKDTree(node->left, origin, direction, hitCandidates); + intersectKDTree(node->right, origin, direction, hitCandidates); +} - node->setLeftChild(buildNode(leftIndices, startIdx, median, depth + 1)); - node->setRightChild(buildNode(rightIndices, 0, endIdx - median, depth + 1)); +inline void deleteKDTree(KDNode* node) +{ + if (node) + { + deleteKDTree(node->left); + deleteKDTree(node->right); + delete node; + } +} - return node; - } - - void intersect(const KDNode* node, const Ray* ray, std::vector& outIndices) { - if (!node) { - return; - } - - //std::cout << "Checking node bounds: " << glm::to_string(node->MinBounds) << " " << glm::to_string(node->MaxBounds) << std::endl; - - if (AABBIntersection(node->MinBounds, node->MaxBounds, ray)) { - //std::cout << "Ray intersects node, num tris: " << (node->LeftChild || node->RightChild ? -1 : 1) << std::endl; - if (node->LeftChild || node->RightChild) { - intersect(node->LeftChild, ray, outIndices); - intersect(node->RightChild, ray, outIndices); - } - else { - //std::cout << "Ray hit leaf node with triangle index: " << node->TriIdx << std::endl; - outIndices.push_back(node->TriIdx); - } - } - else { - //std::cout << "Ray does not intersect node" << std::endl; - } - //std::cout << std::endl; - //exit(0); - } - - glm::vec3 getVertexBounds(uint32_t index) const { - return { mVertices[index * 3], mVertices[index * 3 + 1], mVertices[index * 3 + 2] }; - } - - // TODO: this could definately be more advanced, at the moment is is a split down the middle - uint32_t partition(std::vector& indicesToProcess, uint32_t startIdx, uint32_t endIdx, uint32_t axis) { - uint32_t medianIdx = (startIdx + endIdx) / 2; - glm::vec3 pivot = getVertexBounds(mIndices[indicesToProcess[medianIdx] * 3]); - std::nth_element(indicesToProcess.begin() + startIdx, indicesToProcess.begin() + medianIdx, indicesToProcess.begin() + endIdx, - [this, &pivot, axis](uint32_t a, uint32_t b) { return getVertexBounds(mIndices[a * 3])[axis] < getVertexBounds(mIndices[b * 3])[axis]; }); - return medianIdx; - } - - bool checkPartition(std::vector& indicesToProcess, uint32_t startIdx, uint32_t endIdx, uint32_t axis, uint32_t median) { - for (uint32_t i = startIdx; i < median; ++i) { - if (getVertexBounds(mIndices[indicesToProcess[i] * 3])[axis] > getVertexBounds(mIndices[indicesToProcess[median] * 3])[axis]) { - return false; - } - } - - for (uint32_t i = median + 1; i < endIdx; ++i) { - if (getVertexBounds(mIndices[indicesToProcess[i] * 3])[axis] < getVertexBounds(mIndices[indicesToProcess[median] * 3])[axis]) { - return false; - } - } - - return true; - } - - private: - float* mVertices; - uint32_t* mIndices; - uint32_t mDepthLimit; - KDNode* mRoot; -}; diff --git a/hart/inferno-hart-cpu/src/main.cpp b/hart/inferno-hart-cpu/src/main.cpp index 644c04f..a7d1683 100644 --- a/hart/inferno-hart-cpu/src/main.cpp +++ b/hart/inferno-hart-cpu/src/main.cpp @@ -16,158 +16,166 @@ using namespace inferno; class HARTCPU : public HARTModule { -public: - HARTCPU() - { - mMasterWorker = std::thread(&HARTCPU::intersectMasterWorker, this); - mLogModule = yolo::registerModule("hartcpu", "\u001b[35;1m"); - } - - ~HARTCPU() - { - this->stop(true); - mMasterWorker.detach(); - } - - void submitTris(void* vert, - void* norm, - int vc, - void* indices, - int ic) override - { - std::lock_guard lock(_mData); - - mState = EModuleState::Build; - mVert = (float*)vert; mNorm = (float*)norm; mVc = vc; mIndices = (uint32_t*)indices; mIc = ic; - yolo::info(mLogModule, "Recieved {} verticies ({}) and {} indicies ({})", vc / 3, vert, ic / 3, indices); - - std::vector indicesToProcess(ic / 3); - for (uint32_t i = 0; i < ic / 3; ++i) + public: + HARTCPU() { - indicesToProcess[i] = i; + mMasterWorker = std::thread(&HARTCPU::intersectMasterWorker, this); + mLogModule = yolo::registerModule("hartcpu", "\u001b[35;1m"); + rootNode = nullptr; } - mKdTree = new KDTree(mVert, mIndices, indicesToProcess, 0, indicesToProcess.size() - 1, 10); - mKdTree->printTree(mKdTree->getRoot(), 1); - yolo::info(mLogModule, "Accelerator ready.."); - - mState = EModuleState::Idle; - } - - void updateTris() override {} - - void start() override - { - std::lock_guard signalLock(_mSignalMut); - mIsRunning = true; - mState = EModuleState::Trace; - _mSignalCv.notify_all(); - - yolo::info(mLogModule, "Signal master to start"); - + ~HARTCPU() { - std::unique_lock doneLock(_mDoneMut); - _mDoneCv.wait(doneLock, [this] { return mState == EModuleState::Idle; }); + this->stop(true); + mMasterWorker.detach(); + if (rootNode) { + deleteKDTree(rootNode); + } } - } - void stop(bool interrupt) override - { - if (!interrupt) + void submitTris(void* vert, + void* norm, + int vc, + void* indices, + int ic) override { - mIsRunning = false; - return; - } - // TODO: Find a way to force the thread to hault - } + std::lock_guard lock(_mData); - void intersectMasterWorker() - { - for (;;) - { - std::unique_lock lock(_mData); - if (!mIsRunning) - { - _mSignalCv.wait(lock, [this]{ return (mIsRunning || mState == EModuleState::Trace); }); + mState = EModuleState::Build; + mVert = (float*)vert; mNorm = (float*)norm; mVc = vc; mIndices = (uint32_t*)indices; mIc = ic; + yolo::info(mLogModule, "Recieved {} verticies ({}) and {} indicies ({})", vc / 3, vert, ic / 3, indices); + + std::vector vertices(mVc / 3); + for (size_t i = 0; i < vertices.size(); ++i) { + vertices[i] = glm::vec3(mVert[i * 3], mVert[i * 3 + 1], mVert[i * 3 + 2]); } - if (mToTrace.size() == 0) - { - lock.unlock(); - mState = EModuleState::Idle; - _mDoneCv.notify_all(); - continue; + std::vector triangles(mIc / 3); + for (size_t i = 0; i < triangles.size(); ++i) { + triangles[i].indices[0] = mIndices[i * 3]; + triangles[i].indices[1] = mIndices[i * 3 + 1]; + triangles[i].indices[2] = mIndices[i * 3 + 2]; } + + rootNode = buildKDTree(vertices, triangles); + yolo::info(mLogModule, "Accelerator ready.."); + + mState = EModuleState::Idle; + } + + void updateTris() override {} + + void start() override + { + std::lock_guard signalLock(_mSignalMut); + mIsRunning = true; mState = EModuleState::Trace; + _mSignalCv.notify_all(); - Ray* ray = mToTrace.front(); - int bestIdx = -1; - glm::vec2 coords; - glm::vec2 bestTexcoord; - float bestDist = INFINITY; - float dist; + yolo::info(mLogModule, "Signal master to start"); - // Traverse the K-D tree to identify the set of triangles that may intersect the ray. - std::vector candidateIndices; - mKdTree->intersect(ray, candidateIndices); - - for (uint32_t idx : candidateIndices) { - uint32_t ind1 = mIndices[idx * 3]; - uint32_t ind2 = mIndices[idx * 3 + 1]; - uint32_t ind3 = mIndices[idx * 3 + 2]; - - const glm::vec3 a = { mVert[ind1], mVert[ind1 + 1], mVert[ind1 + 2] }; - const glm::vec3 b = { mVert[ind2], mVert[ind2 + 1], mVert[ind2 + 2] }; - const glm::vec3 c = { mVert[ind3], mVert[ind3 + 1], mVert[ind3 + 2] }; - - // Perform intersection test... - if (!glm::intersectRayTriangle(ray->Origin, ray->Direction, a, b, c, coords, dist)) { continue; } - if (dist > bestDist || dist < 0.0f) { continue; } - - bestIdx = idx; - bestDist = dist; - bestTexcoord = coords; + std::unique_lock doneLock(_mDoneMut); + _mDoneCv.wait(doneLock, [this] { return mState == EModuleState::Idle; }); } - - HitInfo hit; - hit.Caller = ray; - // If no hit, we still need to inform the HHM - if (bestIdx < 0) - { - mToTrace.pop(); - continue; - } - - hit.Distance = bestDist; - hit.UV = bestTexcoord; - - Hit(mCtx, &hit); - - mToTrace.pop(); } - } -private: - // Signaling Stuffs - std::atomic mIsRunning; - std::thread mMasterWorker; - std::mutex _mSignalMut; - std::mutex _mDoneMut; - std::condition_variable _mSignalCv; - std::condition_variable _mDoneCv; + void stop(bool interrupt) override + { + if (!interrupt) + { + mIsRunning = false; + return; + } + // TODO: Find a way to force the thread to hault + } -private: - // Scene Data - KDTree* mKdTree; + void intersectMasterWorker() + { + for (;;) + { + std::unique_lock lock(_mData); + if (!mIsRunning) + { + _mSignalCv.wait(lock, [this]{ return (mIsRunning || mState == EModuleState::Trace); }); + } - float* mVert; - float* mNorm; - int mVc; - uint32_t* mIndices; - int mIc; + if (mToTrace.size() == 0) + { + lock.unlock(); + mState = EModuleState::Idle; + _mDoneCv.notify_all(); + continue; + } + mState = EModuleState::Trace; - uint8_t mLogModule; + Ray* ray = mToTrace.front(); + int bestIdx = -1; + glm::vec2 coords; + glm::vec2 bestTexcoord; + float bestDist = INFINITY; + float dist; + // ... + // (Keep the existing implementation of intersectMasterWorker, but replace the KDTree intersection part) + + std::vector candidateTriangles; + intersectKDTree(rootNode, ray->Origin, ray->Direction, candidateTriangles); + + for (const Triangle& triangle : candidateTriangles) + { + uint32_t ind1 = triangle.indices[0]; + uint32_t ind2 = triangle.indices[1]; + uint32_t ind3 = triangle.indices[2]; + + const glm::vec3 a = { mVert[ind1 * 3], mVert[ind1 * 3 + 1], mVert[ind1 * 3 + 2] }; + const glm::vec3 b = { mVert[ind2 * 3], mVert[ind2 * 3 + 1], mVert[ind2 * 3 + 2] }; + const glm::vec3 c = { mVert[ind3 * 3], mVert[ind3 * 3 + 1], mVert[ind3 * 3 + 2] }; + + // Perform intersection test... + if (!glm::intersectRayTriangle(ray->Origin, ray->Direction, a, b, c, coords, dist)) { continue; } + if (dist > bestDist || dist < 0.0f) { continue; } + + //bestIdx = ; + bestDist = dist; + bestTexcoord = coords; + } + + HitInfo hit; + hit.Caller = ray; + // If no hit, we still need to inform the HHM + if (bestIdx < 0) + { + mToTrace.pop(); + continue; + } + + hit.Distance = bestDist; + hit.UV = bestTexcoord; + + Hit(mCtx, &hit); + + mToTrace.pop(); + } + } + private: + // Signaling Stuffs + std::atomic mIsRunning; + std::thread mMasterWorker; + std::mutex _mSignalMut; + std::mutex _mDoneMut; + std::condition_variable _mSignalCv; + std::condition_variable _mDoneCv; + + private: + KDNode* rootNode; + + float* mVert; + float* mNorm; + int mVc; + uint32_t* mIndices; + int mIc; + + uint8_t mLogModule; }; HART_INTERFACE void* _GET() @@ -185,10 +193,11 @@ HART_INTERFACE void* _CREDIT() { return new ModuleCredit { .ModuleName = "HART_CPU", - .AuthorName = "Ben Kyd", - .ModuleDesc = "Accelerating inferno raytracing with CPU", - .VersionMajor = 0, - .VersionMinor = 0, - .VersionBuild = 1, + .AuthorName = "Ben Kyd", + .ModuleDesc = "Accelerating inferno raytracing with CPU", + .VersionMajor = 0, + .VersionMinor = 0, + .VersionBuild = 1, }; } + diff --git a/libhart/inferno_hart.hpp b/libhart/inferno_hart.hpp index 1ff670f..0a4cc94 100644 --- a/libhart/inferno_hart.hpp +++ b/libhart/inferno_hart.hpp @@ -38,7 +38,7 @@ class HARTModule public: class HARTViz { - + }; enum class EModuleState : uint8_t