GodotShade/compute_samples.gd
2026-02-12 21:22:17 +01:00

236 lines
8.8 KiB
GDScript

extends Node3D
var rd: RenderingDevice
var shader_file1: Resource
var shader_file2: Resource
var shader_spirv1: RDShaderSPIRV
var shader_spirv2: RDShaderSPIRV
var shader_pass1: RID
var shader_pass2: RID
var start_time := Time.get_ticks_msec() / 1000.0
var color = Color.CORAL
@export var iout_surface_points = []
func create_device():
rd = RenderingServer.create_local_rendering_device()
shader_file1 = load("res://sdf_shader.glsl")
shader_spirv1 = shader_file1.get_spirv()
shader_pass1 = rd.shader_create_from_spirv(shader_spirv1)
shader_file2 = load("res://sdf_mesh_generation.glsl")
shader_spirv2 = shader_file2.get_spirv()
shader_pass2 = rd.shader_create_from_spirv(shader_spirv2)
func compute_mesh(world_size: int, threshold: float) -> ArrayMesh:
# Prepare our data. We use floats in the shader, so we need 32 bit.
var total = world_size * world_size * world_size
var input := PackedFloat32Array()
input.resize(total)
var input_bytes := input.to_byte_array()
# Create a storage buffer that can hold our float values.
var buffer := rd.storage_buffer_create(input_bytes.size(), input_bytes)
# Create a uniform to assign the buffer to the rendering device
var uniform_buf := RDUniform.new()
uniform_buf.uniform_type = RenderingDevice.UNIFORM_TYPE_STORAGE_BUFFER
uniform_buf.binding = 0 # this needs to match the "binding" in our shader file
uniform_buf.add_id(buffer)
# Prepare our data. We use floats in the shader, so we need 32 bit.
var surface_total = world_size * world_size * world_size * 3
var surface_input := PackedFloat32Array()
surface_input.resize(surface_total)
var surface_input_bytes := surface_input.to_byte_array()
# Create a storage buffer that can hold our float values.
var surface_buffer := rd.storage_buffer_create(surface_input_bytes.size(), surface_input_bytes)
# Create a uniform to assign the buffer to the rendering device
var surface_uniform_buf := RDUniform.new()
surface_uniform_buf.uniform_type = RenderingDevice.UNIFORM_TYPE_STORAGE_BUFFER
surface_uniform_buf.binding = 2
surface_uniform_buf.add_id(surface_buffer)
# Binding 3: Normals ( 3 floats per voxel)
var normal_total = total * 3
var normal_input := PackedFloat32Array()
normal_input.resize(normal_total)
# Ensure we pass the byte size correctly: total_elements * 4 bytes per float
var normal_buffer = rd.storage_buffer_create(normal_input.size() * 4, normal_input.to_byte_array())
var normal_uniform = RDUniform.new()
normal_uniform.uniform_type = RenderingDevice.UNIFORM_TYPE_STORAGE_BUFFER
normal_uniform.binding = 3
normal_uniform.add_id(normal_buffer)
# Binding 4: UVs (vec2 = 2 floats per voxel)
var uv_bytes = PackedFloat32Array()
uv_bytes.resize(total * 2)
var uv_buffer = rd.storage_buffer_create(uv_bytes.size() * 4, uv_bytes.to_byte_array())
var uv_uniform = RDUniform.new()
uv_uniform.uniform_type = RenderingDevice.UNIFORM_TYPE_STORAGE_BUFFER
uv_uniform.binding = 4
uv_uniform.add_id(uv_buffer)
# Index buffer
# 1. Use a safer max_indices for testing (18 is the absolute max, 6 is usually enough)
var safe_max = world_size * world_size * world_size * 6
# 2. Pre-calculate byte array to avoid double-allocation spikes
var index_bytes := PackedInt32Array()
index_bytes.resize(safe_max)
var index_raw_bytes = index_bytes.to_byte_array()
# 3. Explicitly create the buffer
var idx_buffer = rd.storage_buffer_create(index_raw_bytes.size(), index_raw_bytes)
var idx_uniform := RDUniform.new()
idx_uniform.uniform_type = RenderingDevice.UNIFORM_TYPE_STORAGE_BUFFER
idx_uniform.binding = 5
idx_uniform.add_id(idx_buffer)
# Atomic counter
var counter_bytes = PackedInt32Array([0]).to_byte_array()
var counter_buffer = rd.storage_buffer_create(4, counter_bytes)
var counter_uniform := RDUniform.new()
counter_uniform.uniform_type = RenderingDevice.UNIFORM_TYPE_STORAGE_BUFFER
counter_uniform.binding = 6
counter_uniform.add_id(counter_buffer)
var u_time := Time.get_ticks_msec() / 1000.0 - start_time
var peer := StreamPeerBuffer.new()
peer.put_32(world_size)
peer.put_float(threshold)
peer.put_float(u_time)
peer.put_32(0)
var bytes := peer.data_array
var params_buffer := rd.uniform_buffer_create(bytes.size(), bytes)
var uniform_params := RDUniform.new()
uniform_params.uniform_type = RenderingDevice.UNIFORM_TYPE_UNIFORM_BUFFER
uniform_params.binding = 1
uniform_params.add_id(params_buffer)
var uniform_set1 := rd.uniform_set_create([uniform_buf, uniform_params, surface_uniform_buf, normal_uniform, uv_uniform, idx_uniform, counter_uniform], shader_pass1, 0) # the last parameter (the 0) needs to match the "set" in our shader file
var uniform_set2 := rd.uniform_set_create([uniform_buf, uniform_params, surface_uniform_buf, normal_uniform, uv_uniform, idx_uniform, counter_uniform], shader_pass2, 0)
var dispatch_count = int(ceil(world_size / 4.0))
# 1. Dispatch PASS 1 (Calculate Points)
var pipeline1 := rd.compute_pipeline_create(shader_pass1) # Points only
var compute_list = rd.compute_list_begin()
rd.compute_list_bind_compute_pipeline(compute_list, pipeline1)
rd.compute_list_bind_uniform_set(compute_list, uniform_set1, 0)
rd.compute_list_dispatch(compute_list, dispatch_count, dispatch_count, dispatch_count)
rd.compute_list_end()
# This is the "Magic" command that fixes the stripes/holes
# It forces the GPU to finish all writes to the surface_buffer
# before any other shader starts reading it.
rd.barrier(RenderingDevice.BARRIER_MASK_ALL_BARRIERS)
# 2. Dispatch PASS 2 (Generate Indices)
var pipeline2 := rd.compute_pipeline_create(shader_pass2) # Indices only
compute_list = rd.compute_list_begin()
rd.compute_list_bind_compute_pipeline(compute_list, pipeline2)
rd.compute_list_bind_uniform_set(compute_list, uniform_set2, 0)
rd.compute_list_dispatch(compute_list, dispatch_count, dispatch_count, dispatch_count)
rd.compute_list_end()
# Submit to GPU and wait for sync
rd.submit()
rd.sync()
# Read back the data from the buffer
var out_verts = rd.buffer_get_data(surface_buffer).to_float32_array()
var out_norms = rd.buffer_get_data(normal_buffer).to_float32_array()
var out_indices = rd.buffer_get_data(idx_buffer).to_int32_array()
var final_count = rd.buffer_get_data(counter_buffer).to_int32_array()[0]
var out_surface_points = rd.buffer_get_data(surface_buffer).to_float32_array()
rd.free_rid(buffer)
rd.free_rid(params_buffer)
rd.free_rid(surface_buffer)
rd.free_rid(normal_buffer)
rd.free_rid(uv_buffer)
rd.free_rid(idx_buffer)
rd.free_rid(counter_buffer)
# 5. Build the Mesh
var mesh = ArrayMesh.new()
var arrays = []
arrays.resize(Mesh.ARRAY_MAX)
# We need to reshape the flat float array into Vector3s
var verts := PackedVector3Array()
var normals := PackedVector3Array()
# Instead of blindly appending every voxel, we check if the voxel was "active"
# Or, even better, map the original voxel indices to new packed indices
var active_map = {}
var packed_verts := PackedVector3Array()
var packed_normals := PackedVector3Array()
var packed_indices := PackedInt32Array()
var offset = Vector3(world_size / 2.0, world_size / 2.0, world_size / 2.0)
var final_indices = out_indices.slice(0, final_count)
for old_idx in final_indices:
var v_base = old_idx * 3
# 1. Skip if the shader marked this as an empty voxel
if out_verts[v_base] < -0.5:
continue
if not active_map.has(old_idx):
active_map[old_idx] = packed_verts.size()
# 2. Re-center the vertex so the mesh isn't floating in the corner
var pos = Vector3(out_verts[v_base], out_verts[v_base+1], out_verts[v_base+2]) - offset
packed_verts.append(pos)
packed_normals.append(Vector3(out_norms[v_base], out_norms[v_base+1], out_norms[v_base+2]))
packed_indices.append(active_map[old_idx])
iout_surface_points = out_surface_points
if packed_verts.size() > 0:
arrays[Mesh.ARRAY_VERTEX] = packed_verts
arrays[Mesh.ARRAY_NORMAL] = packed_normals
arrays[Mesh.ARRAY_INDEX] = packed_indices
mesh.add_surface_from_arrays(Mesh.PRIMITIVE_TRIANGLES, arrays)
return mesh
func build_sample_dict(world_size: int, flat_buffer: PackedFloat32Array) -> Dictionary:
var dict := {}
var total = world_size * world_size * world_size
for idx in total:
var voxel_x = idx % world_size
var voxel_y = (idx / world_size) % world_size
var voxel_z = idx / (world_size * world_size)
var voxel_id = Vector3i(voxel_x, voxel_y, voxel_z)
var distance = flat_buffer[idx]
dict[voxel_id] = distance
return dict
func build_surface_dict(world_size: int, flat_buffer: PackedFloat32Array) -> Dictionary:
var dict := {}
var total = world_size * world_size * world_size
for idx in total:
var base = idx * 3
var x = flat_buffer[base]
var y = flat_buffer[base + 1]
var z = flat_buffer[base + 2]
var voxel_x = idx % world_size
var voxel_y = (idx / world_size) % world_size
var voxel_z = idx / (world_size * world_size)
var voxel_id = Vector3i(voxel_x, voxel_y, voxel_z)
var surface_pos = Vector3(x, y, z)
dict[voxel_id] = surface_pos
return dict