WIP: Kerr, interpolation still need a lot of work

Co-Authored-By: Wilhelm Björkström <143391787+Grantallkotten@users.noreply.github.com>
2026-01-08 04:31:08 -06:00 · 2025-04-24 12:48:42 +02:00
parent cecdf8fdf1
commit fc74833d6b
4 changed files with 148 additions and 60 deletions
--- a/modules/blackhole/cuda/kerr.cu
+++ b/modules/blackhole/cuda/kerr.cu
@@ -1,6 +1,7 @@
 #include "kerr.h"
 #include <cuda_runtime.h>
 #include "device_launch_parameters.h"
+#include "vector_functions.h"
 #include <cmath>
 #include <cstdio>
 #include <vector>
@@ -17,17 +18,50 @@
 // Device constants (set at compile time; you may also update via cudaMemcpyToSymbol)
 __constant__ float c_a = 0.99f;
 __constant__ float c_rs = 1;
-__constant__ unsigned int c_num_steps = 1000;
+__constant__ unsigned int c_num_steps = 15000;
 __constant__ unsigned int c_layers = 1;
 __constant__ float c_M = 1.0f;     // Mass parameter
 __constant__ float c_epsilon = 1e-10;   // Numerical tolerance
+__constant__ float3 worldUp = { 0.0f, 1.0f, 0.0f };
+
+

 // Additional simulation parameters
-__constant__ float c_h = 0.1f;            // Integration step size
+__constant__ float c_h = 0.01f;            // Integration step size

 // ---------------------------------------------------------------------
 // Coordinate convertion functions

+// helper math (as before)
+__device__ float3 crossf3(const float3& a, const float3& b) {
+    return make_float3(
+        a.y * b.z - a.z * b.y,
+        a.z * b.x - a.x * b.z,
+        a.x * b.y - a.y * b.x
+    );
+}
+
+__device__ float3 normalizef3(const float3& v) {
+    float len2 = v.x * v.x + v.y * v.y + v.z * v.z;
+    float invLen = 1.0f / sqrtf(len2);
+    return make_float3(v.x * invLen,
+        v.y * invLen,
+        v.z * invLen);
+}
+
+
+__device__ inline float3 operator*(const float3& v, float s) {
+    return make_float3(v.x * s, v.y * s, v.z * s);
+}
+
+__device__ inline float3 operator*(float s, const float3& v) {
+    return make_float3(v.x * s, v.y * s, v.z * s);
+}
+
+__device__ inline float3 operator+(const float3& a, const float3& b) {
+    return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
+}
+
 __device__ float3 spherical_to_cartesian(float r, float theta, float phi) {
    return make_float3(
        r * sinf(theta) * cosf(phi),
@@ -40,16 +74,16 @@ __device__ void cartesian_to_boyer_lindquist(float x, float x_vel,
    float y, float y_vel,
    float z, float z_vel,
    float A, float* out) {
-    float r2 = x * x + y * y + z * z;
-    float A2 = A * A;
-    float root = sqrtf(A2 * (A2 - 2.0f * (x * x + y * y) + 2.0f * z * z) + r2 * r2);
-    float radius = sqrtf((-A2 + r2 + root) * 0.5f);
+    double r2 = x * x + y * y + z * z;
+    double A2 = A * A;
+    double root = sqrt(A2 * (A2 - 2.0 * (x * x + y * y) + 2.0 * z * z) + r2 * r2);
+    double radius = sqrt((-A2 + r2 + root) * 0.5);

    float azimuthal_angle = atan2f(y, x);
    float polar_angle = acosf(z / radius);

-    float denom = 2.0f * radius * radius + A2 - r2;
-    float radius_velocity = (radius * (x * x_vel + y * y_vel + z * z_vel)) / denom +
+    double denom = 2.0 * radius * radius + A2 - r2;
+    double radius_velocity = (radius * (x * x_vel + y * y_vel + z * z_vel)) / denom +
        A2 * z * z_vel / (radius * denom);

    float polar_denom = radius * sqrtf(radius * radius - z * z);
@@ -208,6 +242,7 @@ __device__ void rk4(float* y, float h, float E, float L, float k_val) {
 // The output trajectory (state vector per step) and the number of steps per ray
 // are stored in contiguous device memory.
 __global__ void simulateRayKernel(float3 pos, size_t num_rays_per_dim, float* lookup_table) {
+    //printf("%.2f %.2f %.2f \n", pos.x ,pos.y, pos.z);
    int const idx = blockIdx.x * blockDim.x + threadIdx.x;
    int const num_rays = num_rays_per_dim * num_rays_per_dim;
    if (idx >= num_rays) return;
@@ -215,11 +250,29 @@ __global__ void simulateRayKernel(float3 pos, size_t num_rays_per_dim, float* lo
    int const idx_theta = idx / num_rays_per_dim;
    int const idx_phi = idx % num_rays_per_dim;

-    float theta = 0.1f + (M_PI - 0.1f) * idx_theta / (num_rays_per_dim - 1);
+    float theta = (M_PI * idx_theta) / num_rays_per_dim;
    float phi = (2.0f * M_PI * idx_phi) / num_rays_per_dim;

    // @TODO: (Investigate); Might need to rotate outgoing dirs to account for camera orientation
-    float3 const dir = spherical_to_cartesian(1.0f, theta, phi);
+    float3 camPos  = make_float3(pos.x, pos.y, pos.z);  // camera world pos
+    float3 forward = normalizef3(make_float3(
+        -camPos.x,   // since modelCenter == (0,0,0)
+        -camPos.y,
+        -camPos.z
+    ));
+
+    float3 right = normalizef3(crossf3(forward, worldUp));
+    float3 upVec = crossf3(right, forward);
+
+    // now build your ray as before:
+    float sinT = sinf(theta), cosT = cosf(theta);
+    float sinP = sinf(phi),   cosP = cosf(phi);
+
+    float3 dir = 
+        sinT * ( cosP * right + sinP * upVec )
+      + cosT * forward;
+
+    dir = normalizef3(dir);

    float const x_vel = M_C * dir.x;
    float const y_vel = M_C * dir.y;
@@ -250,6 +303,7 @@ __global__ void simulateRayKernel(float3 pos, size_t num_rays_per_dim, float* lo
    // Set up the initial state vector: [r, theta, phi, p_r, p_theta]
    float y[5];
    y[0] = r0; y[1] = theta0; y[2] = phi0; y[3] = p_r0; y[4] = p_theta0;
+    //printf("%.2f, %.2f\n", theta0, phi0);

    // Pointer to this ray's lookup data. @TODO Correct index calculation old form trejectory
    float* entry = &lookup_table[idx * (1 + c_layers) * 2];
--- a/modules/blackhole/rendering/renderableblackhole.cpp
+++ b/modules/blackhole/rendering/renderableblackhole.cpp
@@ -20,7 +20,7 @@
 #include <filesystem>
 #include <vector>

-#define M_Kerr false
+#define M_Kerr true
 #if M_Kerr
 #include <modules/blackhole/cuda/kerr.h>
 #else
@@ -114,7 +114,7 @@ namespace openspace {
    bool RenderableBlackHole::isReady() const {
        return _program != nullptr;
    }
-
+    bool rerender = true;
    void RenderableBlackHole::update(const UpdateData& data) {
        if (data.modelTransform.translation != _chachedTranslation) {
            _chachedTranslation = data.modelTransform.translation;
@@ -124,10 +124,23 @@ namespace openspace {
        _viewport.updateViewGrid(global::renderEngine->renderingResolution());

        #if M_Kerr
-        glm::dvec3 cameraPosition = global::navigationHandler->camera()->positionVec3() - _chachedTranslation;
-        if (glm::distance(cameraPosition, _chacedCameraPos) > 0.01f) {
-            kerr(cameraPosition.x, cameraPosition.y, cameraPosition.z, _rs, 0.99f, _rayCount, _stepsCount, _schwarzschildWarpTable);
+        // world-space camera
+        glm::dvec3 camW = global::navigationHandler->camera()->positionVec3();
+
+        // 1) Translate into model-centered space
+        glm::dvec3 v = camW - data.modelTransform.translation;
+
+        // 2) Remove the rotation: for an orthonormal matrix, inverse == transpose
+        glm::dvec3 v_rot = glm::transpose(data.modelTransform.rotation) * v;
+
+        // 3) Remove scaling (component-wise)
+        glm::dvec3 cameraPosition = v_rot / data.modelTransform.scale;
+
+        if (glm::distance(cameraPosition, _chacedCameraPos) > 0.01f && rerender) {
+            //kerr(2e11, 0, 0, _rs, 0.99f, _rayCount, _stepsCount, _schwarzschildWarpTable);
+            kerr(cameraPosition.x, cameraPosition.y, cameraPosition.z, _rs, 0.5f, _rayCount, _stepsCount, _schwarzschildWarpTable);
            _chacedCameraPos = cameraPosition;
+            //rerender = false;
        }
 #else
        glm::dvec3 cameraPosition = global::navigationHandler->camera()->positionVec3();
@@ -185,13 +198,13 @@ namespace openspace {
        
        _program->setUniform(
            _uniformCache.cameraRotationMatrix,
-             glm::mat4(glm::mat4_cast(camRot.localRotation)) * CameraPlaneRotation
+            glm::mat4(glm::mat4_cast(camRot.localRotation)) * CameraPlaneRotation
        );

-        _program->setUniform(
-            _uniformCache.worldRotationMatrix,
-            glm::mat4(glm::mat4_cast(camRot.globalRotation))
-            );
+        //_program->setUniform(
+        //    _uniformCache.worldRotationMatrix,
+        //    glm::mat4(glm::mat4_cast(camRot.globalRotation))
+        //    );
 #if !M_Kerr
        if (_uniformCache.r_0 != -1) {
            _program->setUniform(
@@ -299,8 +312,8 @@ namespace openspace {
    }

    void RenderableBlackHole::loadEnvironmentTexture() {
-        //const std::string texturePath = "${MODULE_BLACKHOLE}/rendering/uv.png";
-        const std::string texturePath = "${BASE}/sync/http/milkyway_textures/2/DarkUniverse_mellinger_8k.jpg";
+        const std::string texturePath = "${MODULE_BLACKHOLE}/rendering/uv.png";
+        //const std::string texturePath = "${BASE}/sync/http/milkyway_textures/2/DarkUniverse_mellinger_8k.jpg";
        //const std::string texturePath = "${MODULE_BLACKHOLE}/rendering/skybox.jpg";

        _environmentTexture = ghoul::io::TextureReader::ref().loadTexture(absPath(texturePath), 2);
--- a/modules/blackhole/rendering/renderableblackhole.h
+++ b/modules/blackhole/rendering/renderableblackhole.h
@@ -46,7 +46,7 @@ namespace openspace {
        std::unique_ptr<ghoul::opengl::ProgramObject> _cullProgram = nullptr;
        glm::dvec3 _chachedTranslation{};
        glm::dvec3 _chacedCameraPos{};
-        static constexpr size_t _rayCount{ 500 };
+        static constexpr size_t _rayCount{ 100 };
        static constexpr size_t _stepsCount = 100000;
        static constexpr float _stepLength = 0.0001f;
        static constexpr size_t _mapCount = 3;
--- a/modules/blackhole/shaders/kerr_fs.glsl
+++ b/modules/blackhole/shaders/kerr_fs.glsl
@@ -39,9 +39,7 @@ const int num_rays = schwarzschildWarpTable.length() / (layerCount + 1);


 const float max_theta = PI;
-const float delta_theta = 0.1f;
 const float max_phi = 2*PI;
-const float delta_phi = 0;
 const int num_rays_per_dim = 100;
 /**********************************************************
                        Math
@@ -91,48 +89,73 @@ vec2 sphericalToUV(vec2 sphereCoords){
 ***********************************************************/

 vec2 getInterpolatedWarpAngles(float input_theta, float input_phi, int layer) {
-    // Convert angles to floating-point grid indices
-    float theta_f = (input_theta - delta_theta) * float(num_rays_per_dim - 1) / (max_theta - delta_theta);
-    float phi_f = input_phi * float(num_rays_per_dim) / (2.0 * PI);
+    // 1) Convert to float grid indices
+    float theta_f = (input_theta)
+                  * float(num_rays_per_dim - 1)
+                  / (max_theta);
+    float phi_f   =  input_phi
+                  * float(num_rays_per_dim -1)
+                  / (2.0 * PI);

-    // Clamp theta indices (no wrap)
-    int theta_low = int(clamp(floor(theta_f), 0.0, float(num_rays_per_dim - 2)));
-    int theta_high = theta_low + 1;
+    // 2) Integer bounds
+    int theta_low  = int(clamp(floor(theta_f), 0.0, float(num_rays_per_dim-1)));
+    int theta_high = int(clamp(theta_low + 1, 0, num_rays_per_dim-1));

-    // Wrap phi indices
-    int phi_low = int(floor(phi_f)) % num_rays_per_dim;
+    int phi_low  = int(floor(phi_f)) % num_rays_per_dim;
    int phi_high = (phi_low + 1) % num_rays_per_dim;

+    // 3) Flat indices
+    int N = num_rays_per_dim;
+    int idx00 = theta_low  * N + phi_low;
+    int idx10 = theta_high * N + phi_low;
+    int idx01 = theta_low  * N + phi_high;
+    int idx11 = theta_high * N + phi_high;
+
+    // 4) Record stride & per-layer offset
    int record_stride = 2 + layerCount * 2;
-    int offset = 2 + layer * 2;
+    int offset        = 2 + layer * 2;

-    // Flat index computation
-    int idx00 = theta_low  * num_rays_per_dim + phi_low;
-    int idx10 = theta_high * num_rays_per_dim + phi_low;
-    int idx01 = theta_low  * num_rays_per_dim + phi_high;
-    int idx11 = theta_high * num_rays_per_dim + phi_high;
-
-    // Base offsets into warp table
+    // 5) Base pointers into the table
    int base00 = idx00 * record_stride;
    int base10 = idx10 * record_stride;
    int base01 = idx01 * record_stride;
    int base11 = idx11 * record_stride;

-    // Interpolation weights
-    float t = (input_theta - schwarzschildWarpTable[base00]) / (schwarzschildWarpTable[base10] - schwarzschildWarpTable[base00]);
-    float u = (input_phi - schwarzschildWarpTable[base00+1]) / (schwarzschildWarpTable[base10+1] - schwarzschildWarpTable[base00+1]);
+    // 6) Extract the original theta/phi stored in the table
+    float theta00 = schwarzschildWarpTable[base00    ];
+    float phi00   = schwarzschildWarpTable[base00 + 1];
+    float theta10 = schwarzschildWarpTable[base10    ];
+    float phi01   = schwarzschildWarpTable[base01 + 1];

-    // Fetch surrounding values
-    vec2 v00 = vec2(schwarzschildWarpTable[base00 + offset],     schwarzschildWarpTable[base00 + offset + 1]);
-    vec2 v10 = vec2(schwarzschildWarpTable[base10 + offset],     schwarzschildWarpTable[base10 + offset + 1]);
-    vec2 v01 = vec2(schwarzschildWarpTable[base01 + offset],     schwarzschildWarpTable[base01 + offset + 1]);
-    vec2 v11 = vec2(schwarzschildWarpTable[base11 + offset],     schwarzschildWarpTable[base11 + offset + 1]);
+    // 7) Interpolation weights
+    float t = (input_theta - theta00) / (theta10 - theta00);
+    float u = (input_phi   - phi00)   / (phi01   - phi00);
+
+    // 8) Fetch the four target warp‐angle pairs
+    vec2 v00 = vec2(
+      schwarzschildWarpTable[base00 + offset    ],
+      schwarzschildWarpTable[base00 + offset + 1]
+    );
+    vec2 v10 = vec2(
+      schwarzschildWarpTable[base10 + offset    ],
+      schwarzschildWarpTable[base10 + offset + 1]
+    );
+    vec2 v01 = vec2(
+      schwarzschildWarpTable[base01 + offset    ],
+      schwarzschildWarpTable[base01 + offset + 1]
+    );
+    vec2 v11 = vec2(
+      schwarzschildWarpTable[base11 + offset    ],
+      schwarzschildWarpTable[base11 + offset + 1]
+    );
    return v00;
-    // Bilinear interpolation
-    vec2 interp = mix(mix(v00, v10, t), mix(v01, v11, t), u);
-    return interp;
+    // 9) Bilinear interpolate
+    vec2 interp_low  = mix(v00, v10, t);
+    vec2 interp_high = mix(v01, v11, t);
+    return mix(interp_low, interp_high, u);
 }

+
 vec2 applyBlackHoleWarp(vec2 cameraOutAngles, int layer){
    float theta = cameraOutAngles.x;
    float phi = cameraOutAngles.y;
@@ -278,7 +301,6 @@ Fragment getFragment() {
    vec4 rotatedViewCoords = cameraRotationMatrix * viewCoords;
    
    vec2 sphericalCoords;
-    vec2 envMapSphericalCoords;
    
    vec4 accumulatedColor = vec4(0.0f);
    float accumulatedWeight = 0.0f;  // Track total weight of blending
@@ -286,20 +308,19 @@ Fragment getFragment() {
    // Apply black hole warping to spherical coordinates
    for (int l = 0; l < layerCount; ++l) {
        sphericalCoords = cartesianToSpherical(rotatedViewCoords.xyz);
-        envMapSphericalCoords = applyBlackHoleWarp(sphericalCoords, l);
-
-        if (isnan(envMapSphericalCoords.x)) {
+        sphericalCoords = applyBlackHoleWarp(sphericalCoords, l);
+        if (isnan(sphericalCoords.x)) {
            // If inside the event horizon
            frag.color = vec4(.5f);
            return frag;
        }

-        vec4 envMapCoords = vec4(sphericalToCartesian(envMapSphericalCoords.x, envMapSphericalCoords.y), 0.0f);
+        // vec4 envMapCoords = vec4(sphericalToCartesian(sphericalCoords.x, sphericalCoords.y), 0.0f);

        // User world input rotation of the black hole
-        envMapCoords = worldRotationMatrix * envMapCoords;
+        // envMapCoords = worldRotationMatrix * envMapCoords;

-        sphericalCoords = cartesianToSpherical(envMapCoords.xyz);
+        // sphericalCoords = cartesianToSpherical(envMapCoords.xyz);
        // vec4 starColor = searchNearestStar(vec3(0.0f, sphericalCoords.x, sphericalCoords.y), l);

        // if (starColor.a > 0.0) {
@@ -315,7 +336,7 @@ Fragment getFragment() {
    vec4 texColor = texture(environmentTexture, uv);

    // frag.color.rgb = accumulatedColor.rgb * accumulatedWeight + texColor.rgb * (1.0 - accumulatedWeight);
-    frag.color.a = 1.0;
    frag.color.rgb = texColor.rgb;
+    frag.color.a = 1.0;
    return frag;
 }