
641 lines
22 KiB

#include "voxel_data.h"
#include "../util/dstack.h"
#include "../util/math/conv.h"
#include "voxel_data_grid.h"
namespace zylann::voxel {
VoxelData::VoxelData() {}
VoxelData::~VoxelData() {}
void VoxelData::set_lod_count(unsigned int p_lod_count) {
ZN_ASSERT(p_lod_count < constants::MAX_LOD);
ZN_ASSERT(p_lod_count >= 1);
RWLockWrite wlock(_rw_lock);
if (p_lod_count == _lod_count) {
_lod_count = p_lod_count;
// Not entirely required, but changing LOD count at runtime is rarely needed
void VoxelData::reset_maps() {
RWLockWrite wlock(_rw_lock);
void VoxelData::reset_maps_no_lock() {
for (unsigned int lod_index = 0; lod_index < _lods.size(); ++lod_index) {
Lod &data_lod = _lods[lod_index];
// Instance new maps if we have more lods, or clear them otherwise
if (lod_index < _lod_count) {
} else {
void VoxelData::set_bounds(Box3i bounds) {
RWLockWrite wlock(_rw_lock);
_bounds_in_voxels = bounds;
void VoxelData::set_generator(Ref<VoxelGenerator> generator) {
RWLockWrite wlock(_rw_lock);
_generator = generator;
void VoxelData::set_stream(Ref<VoxelStream> stream) {
RWLockWrite wlock(_rw_lock);
_stream = stream;
void VoxelData::set_streaming_enabled(bool enabled) {
_streaming_enabled = enabled;
inline VoxelSingleValue get_voxel_with_lock(VoxelBufferInternal &vb, Vector3i pos, unsigned int channel) {
VoxelSingleValue v;
if (channel == VoxelBufferInternal::CHANNEL_SDF) {
RWLockRead rlock(vb.get_lock());
v.f = vb.get_voxel_f(pos.x, pos.y, pos.z, channel);
} else {
RWLockRead rlock(vb.get_lock());
v.i = vb.get_voxel(pos, channel);
return v;
// TODO Piggyback on `copy`? The implementation is quite complex, and it's not supposed to be an efficient use case
VoxelSingleValue VoxelData::get_voxel(Vector3i pos, unsigned int channel_index, VoxelSingleValue defval) const {
if (!_bounds_in_voxels.contains(pos)) {
return defval;
Vector3i block_pos = pos >> get_block_size_po2();
bool generate = false;
if (_streaming_enabled) {
const Lod &data_lod0 = _lods[0];
std::shared_ptr<VoxelBufferInternal> voxels = try_get_voxel_buffer_with_lock(data_lod0, block_pos, generate);
if (voxels == nullptr) {
// TODO We should be able to get a value if modifiers are used but not a base generator
Ref<VoxelGenerator> generator = get_generator();
if (generator.is_valid()) {
VoxelSingleValue value = generator->generate_single(pos, channel_index);
if (channel_index == VoxelBufferInternal::CHANNEL_SDF) {
float sdf = value.f;
_modifiers.apply(sdf, to_vec3(pos));
value.f = sdf;
return value;
} else {
const Vector3i rpos = data_lod0.map.to_local(pos);
return get_voxel_with_lock(*voxels, rpos, channel_index);
return defval;
} else {
// We might hit places where data isn't loaded, in this case we try to fallback on higher LOD indices
Vector3i voxel_pos = pos;
Ref<VoxelGenerator> generator = get_generator();
for (unsigned int lod_index = 0; lod_index < _lod_count; ++lod_index) {
const Lod &data_lod = _lods[lod_index];
std::shared_ptr<VoxelBufferInternal> voxels = try_get_voxel_buffer_with_lock(data_lod, block_pos, generate);
if (voxels != nullptr) {
return get_voxel_with_lock(*voxels, data_lod.map.to_local(voxel_pos), channel_index);
} else if (generate) {
// TODO We should be able to get a value if modifiers are used but not a base generator
if (generator.is_valid()) {
VoxelSingleValue value = generator->generate_single(pos, channel_index);
if (channel_index == VoxelBufferInternal::CHANNEL_SDF) {
float sdf = value.f;
_modifiers.apply(sdf, to_vec3(pos));
value.f = sdf;
return value;
} else {
return defval;
// Fallback on lower LOD
block_pos = block_pos >> 1;
voxel_pos = voxel_pos >> 1;
return defval;
// TODO Piggyback on `paste`? The implementation is quite complex, and it's not supposed to be an efficient use case
bool VoxelData::try_set_voxel(uint64_t value, Vector3i pos, unsigned int channel_index) {
const Vector3i block_pos_lod0 = pos >> get_block_size_po2();
Lod &data_lod0 = _lods[0];
const Vector3i block_pos = data_lod0.map.voxel_to_block(pos);
bool can_generate = false;
std::shared_ptr<VoxelBufferInternal> voxels = try_get_voxel_buffer_with_lock(data_lod0, block_pos, can_generate);
if (voxels == nullptr) {
if (_streaming_enabled && !can_generate) {
return false;
Ref<VoxelGenerator> generator = get_generator();
if (generator.is_valid()) {
voxels = make_shared_instance<VoxelBufferInternal>();
VoxelGenerator::VoxelQueryData q{ *voxels, pos, 0 };
_modifiers.apply(q.voxel_buffer, AABB(pos, q.voxel_buffer.get_size()));
RWLockWrite wlock(data_lod0.map_lock);
if (data_lod0.map.has_block(block_pos_lod0)) {
// A block was loaded by another thread, cancel our edit.
return false;
data_lod0.map.set_block_buffer(block_pos_lod0, voxels, true);
// If it turns out to be a problem, use CoW?
RWLockWrite lock(voxels->get_lock());
voxels->set_voxel(value, data_lod0.map.to_local(pos), channel_index);
// We don't update mips, this must be done by the caller
return true;
float VoxelData::get_voxel_f(Vector3i pos, unsigned int channel_index) const {
VoxelSingleValue defval;
defval.f = 1.f;
return get_voxel(pos, channel_index, defval).f;
bool VoxelData::try_set_voxel_f(real_t value, Vector3i pos, unsigned int channel_index) {
// TODO Handle format instead of hardcoding 16-bits
return try_set_voxel(snorm_to_s16(value), pos, channel_index);
void VoxelData::copy(Vector3i min_pos, VoxelBufferInternal &dst_buffer, unsigned int channels_mask) const {
const Lod &data_lod0 = _lods[0];
const VoxelModifierStack &modifiers = _modifiers;
Ref<VoxelGenerator> generator = get_generator();
if (is_streaming_enabled() && generator.is_valid()) {
struct GenContext {
VoxelGenerator &generator;
const VoxelModifierStack &modifiers;
GenContext gctx{ **generator, modifiers };
RWLockRead rlock(data_lod0.map_lock);
data_lod0.map.copy(min_pos, dst_buffer, channels_mask, &gctx,
[](void *callback_data, VoxelBufferInternal &voxels, Vector3i pos) {
// Suffixed with `2` because GCC warns it shadows a previous local...
GenContext *gctx2 = reinterpret_cast<GenContext *>(callback_data);
VoxelGenerator::VoxelQueryData q{ voxels, pos, 0 };
gctx2->modifiers.apply(voxels, AABB(pos, voxels.get_size()));
} else {
RWLockRead rlock(data_lod0.map_lock);
// TODO Apply modifiers
data_lod0.map.copy(min_pos, dst_buffer, channels_mask);
void VoxelData::paste(Vector3i min_pos, const VoxelBufferInternal &src_buffer, unsigned int channels_mask,
bool use_mask, uint64_t mask_value, bool create_new_blocks) {
Lod &data_lod0 = _lods[0];
data_lod0.map.paste(min_pos, src_buffer, channels_mask, use_mask, mask_value, create_new_blocks);
bool VoxelData::is_area_loaded(const Box3i p_voxels_box) const {
if (is_streaming_enabled() == false) {
return true;
const Box3i voxel_box = p_voxels_box.clipped(get_bounds());
const Lod &data_lod0 = _lods[0];
RWLockRead rlock(data_lod0.map_lock);
const bool all_blocks_present = data_lod0.map.is_area_fully_loaded(voxel_box);
return all_blocks_present;
void VoxelData::pre_generate_box(Box3i voxel_box, Span<Lod> lods, unsigned int data_block_size, bool streaming,
unsigned int lod_count, Ref<VoxelGenerator> generator, VoxelModifierStack &modifiers) {
//ERR_FAIL_COND_MSG(_full_load_mode == false, nullptr, "This function can only be used in full load mode");
struct Task {
Vector3i block_pos;
uint32_t lod_index;
std::shared_ptr<VoxelBufferInternal> voxels;
// TODO Optimize: thread_local pooling?
std::vector<Task> todo;
// We'll pack tasks per LOD so we'll have less locking to do
// TODO Optimize: thread_local pooling?
std::vector<unsigned int> count_per_lod;
// Find empty slots
for (unsigned int lod_index = 0; lod_index < lod_count; ++lod_index) {
const Box3i block_box = voxel_box.downscaled(data_block_size << lod_index);
//ZN_PRINT_VERBOSE(format("Preloading box {} at lod {} synchronously", block_box, lod_index));
Lod &data_lod = lods[lod_index];
const unsigned int prev_size = todo.size();
RWLockRead rlock(data_lod.map_lock);
block_box.for_each_cell([&data_lod, lod_index, &todo, streaming](Vector3i block_pos) {
// We don't check "loading blocks", because this function wants to complete the task right now.
const VoxelDataBlock *block = data_lod.map.get_block(block_pos);
if (streaming) {
// Non-resident blocks must not be touched because we don't know what's in them.
// We can generate caches if resident ones have no voxel data.
if (block != nullptr && !block->has_voxels()) {
todo.push_back(Task{ block_pos, lod_index, nullptr });
} else {
// We can generate anywhere voxel data is not in memory
if (block == nullptr || !block->has_voxels()) {
todo.push_back(Task{ block_pos, lod_index, nullptr });
count_per_lod.push_back(todo.size() - prev_size);
const Vector3i block_size = Vector3iUtil::create(data_block_size);
// Generate
for (unsigned int i = 0; i < todo.size(); ++i) {
Task &task = todo[i];
task.voxels = make_shared_instance<VoxelBufferInternal>();
// TODO Format?
if (generator.is_valid()) {
VoxelGenerator::VoxelQueryData q{ //
*task.voxels, task.block_pos * (data_block_size << task.lod_index), task.lod_index
modifiers.apply(q.voxel_buffer, AABB(q.origin_in_voxels, q.voxel_buffer.get_size() << q.lod));
// Populate slots
unsigned int task_index = 0;
for (unsigned int lod_index = 0; lod_index < lod_count; ++lod_index) {
ZN_ASSERT(lod_index < count_per_lod.size());
const unsigned int count = count_per_lod[lod_index];
if (count > 0) {
const unsigned int end_task_index = task_index + count;
Lod &data_lod = lods[lod_index];
RWLockWrite wlock(data_lod.map_lock);
for (; task_index < end_task_index; ++task_index) {
Task &task = todo[task_index];
ZN_ASSERT(task.lod_index == lod_index);
const VoxelDataBlock *prev_block = data_lod.map.get_block(task.block_pos);
if (prev_block != nullptr && prev_block->has_voxels()) {
// Sorry, that block has been set in the meantime by another thread.
// We'll assume the block we just generated is redundant and discard it.
data_lod.map.set_block_buffer(task.block_pos, task.voxels, true);
void VoxelData::pre_generate_box(Box3i voxel_box) {
const unsigned int data_block_size = get_block_size();
const bool streaming = is_streaming_enabled();
const unsigned int lod_count = get_lod_count();
pre_generate_box(voxel_box, to_span(_lods), data_block_size, streaming, lod_count, get_generator(), _modifiers);
void VoxelData::clear_cached_blocks_in_voxel_area(Box3i p_voxel_box) {
const unsigned int lod_count = get_lod_count();
for (unsigned int lod_index = 0; lod_index < lod_count; ++lod_index) {
Lod &lod = _lods[lod_index];
RWLockRead rlock(lod.map_lock);
const Box3i blocks_box = p_voxel_box.downscaled(lod.map.get_block_size() << lod_index);
blocks_box.for_each_cell_zxy([&lod](const Vector3i bpos) {
VoxelDataBlock *block = lod.map.get_block(bpos);
if (block == nullptr || block->is_edited() || block->is_modified()) {
void VoxelData::mark_area_modified(Box3i p_voxel_box, std::vector<Vector3i> *lod0_new_blocks_to_lod) {
const Box3i bbox = p_voxel_box.downscaled(get_block_size());
Lod &data_lod0 = _lods[0];
RWLockRead rlock(data_lod0.map_lock);
bbox.for_each_cell([this, &data_lod0, lod0_new_blocks_to_lod](Vector3i block_pos_lod0) {
VoxelDataBlock *block = data_lod0.map.get_block(block_pos_lod0);
// We can get null blocks due to the added padding...
//ERR_FAIL_COND(block == nullptr);
if (block == nullptr) {
// We can get blocks without voxels in them due to the added padding...
if (!block->has_voxels()) {
//RWLockWrite wlock(block->get_voxels_shared()->get_lock());
// TODO call `set_edited(true)` as well? Apparently it wasn't needed so far, but it's a bit confusing
// TODO That boolean is also modified by the threaded update task (always set to false)
if (!block->get_needs_lodding()) {
// This is what indirectly causes remeshing
if (lod0_new_blocks_to_lod != nullptr) {
bool VoxelData::try_set_block_buffer(
Vector3i block_position, unsigned int lod_index, std::shared_ptr<VoxelBufferInternal> buffer, bool edited) {
Lod &data_lod = _lods[lod_index];
if (buffer->get_size() != Vector3iUtil::create(get_block_size())) {
// Voxel block size is incorrect, drop it
ERR_PRINT("Block is different from expected size");
return false;
// Store buffer
RWLockWrite wlock(data_lod.map_lock);
// TODO Expose `overwrite` as parameter?
VoxelDataBlock *block = data_lod.map.set_block_buffer(block_position, buffer, false);
CRASH_COND(block == nullptr);
return true;
void VoxelData::set_empty_block_buffer(Vector3i block_position, unsigned int lod_index) {
Lod &data_lod = _lods[lod_index];
RWLockWrite wlock(data_lod.map_lock);
// TODO Expose `overwrite` as parameter?
VoxelDataBlock *block = data_lod.map.set_empty_block(block_position, false);
ZN_ASSERT(block != nullptr);
bool VoxelData::has_block(Vector3i bpos, unsigned int lod_index) const {
const Lod &data_lod = _lods[lod_index];
RWLockRead rlock(data_lod.map_lock);
return data_lod.map.has_block(bpos);
unsigned int VoxelData::get_block_count() const {
unsigned int sum = 0;
const unsigned int lod_count = get_lod_count();
for (unsigned int lod_index = 0; lod_index < lod_count; ++lod_index) {
const Lod &lod = _lods[lod_index];
RWLockRead rlock(lod.map_lock);
sum += lod.map.get_block_count();
return sum;
void VoxelData::update_lods(Span<const Vector3i> modified_lod0_blocks, std::vector<BlockLocation> *out_updated_blocks) {
// Propagates edits performed so far to other LODs.
// These LODs must be currently in memory, otherwise terrain data will miss it.
// This is currently ensured by the fact we load blocks in a "pyramidal" way,
// i.e there is no way for a block to be loaded if its parent LOD isn't loaded already.
// In the future we may implement storing of edits to be applied later if blocks can't be found.
const int data_block_size = get_block_size();
const int data_block_size_po2 = get_block_size_po2();
const unsigned int lod_count = get_lod_count();
const bool streaming_enabled = is_streaming_enabled();
Ref<VoxelGenerator> generator = get_generator();
static thread_local FixedArray<std::vector<Vector3i>, constants::MAX_LOD> tls_blocks_to_process_per_lod;
// Make sure LOD0 gets updates even if _lod_count is 1
std::vector<Vector3i> &dst_lod0 = tls_blocks_to_process_per_lod[0];
memcpy(dst_lod0.data(), modified_lod0_blocks.data(), dst_lod0.size() * sizeof(Vector3i));
Lod &data_lod0 = _lods[0];
RWLockRead rlock(data_lod0.map_lock);
std::vector<Vector3i> &blocks_pending_lodding_lod0 = tls_blocks_to_process_per_lod[0];
for (unsigned int i = 0; i < blocks_pending_lodding_lod0.size(); ++i) {
const Vector3i data_block_pos = blocks_pending_lodding_lod0[i];
VoxelDataBlock *data_block = data_lod0.map.get_block(data_block_pos);
ERR_CONTINUE(data_block == nullptr);
if (out_updated_blocks != nullptr) {
out_updated_blocks->push_back(BlockLocation{ data_block_pos, 0 });
const int half_bs = data_block_size >> 1;
// Process downscales upwards in pairs of consecutive LODs.
// This ensures we don't process multiple times the same blocks.
// Only LOD0 is editable at the moment, so we'll downscale from there
for (uint8_t dst_lod_index = 1; dst_lod_index < lod_count; ++dst_lod_index) {
const uint8_t src_lod_index = dst_lod_index - 1;
std::vector<Vector3i> &src_lod_blocks_to_process = tls_blocks_to_process_per_lod[src_lod_index];
std::vector<Vector3i> &dst_lod_blocks_to_process = tls_blocks_to_process_per_lod[dst_lod_index];
//VoxelLodTerrainUpdateData::Lod &dst_lod = state.lods[dst_lod_index];
Lod &src_data_lod = _lods[src_lod_index];
RWLockRead src_data_lod_map_rlock(src_data_lod.map_lock);
Lod &dst_data_lod = _lods[dst_lod_index];
// TODO Could take long locking this, we may generate things first and assign to the map at the end.
// Besides, in per-block streaming mode, it is not needed because blocks are supposed to be present
RWLockRead wlock(dst_data_lod.map_lock);
for (unsigned int i = 0; i < src_lod_blocks_to_process.size(); ++i) {
const Vector3i src_bpos = src_lod_blocks_to_process[i];
const Vector3i dst_bpos = src_bpos >> 1;
VoxelDataBlock *src_block = src_data_lod.map.get_block(src_bpos);
VoxelDataBlock *dst_block = dst_data_lod.map.get_block(dst_bpos);
if (dst_block == nullptr) {
if (!streaming_enabled) {
// TODO Doing this on the main thread can be very demanding and cause a stall.
// We should find a way to make it asynchronous, not need mips, or not edit outside viewers area.
std::shared_ptr<VoxelBufferInternal> voxels = make_shared_instance<VoxelBufferInternal>();
VoxelGenerator::VoxelQueryData q{ //
*voxels, //
dst_bpos << (dst_lod_index + data_block_size_po2), //
if (generator.is_valid()) {
q.voxel_buffer, AABB(q.origin_in_voxels, q.voxel_buffer.get_size() << dst_lod_index));
dst_block = dst_data_lod.map.set_block_buffer(dst_bpos, voxels, true);
} else {
ERR_PRINT(String("Destination block {0} not found when cascading edits on LOD {1}")
.format(varray(dst_bpos, dst_lod_index)));
// The block and its lower LOD indices are expected to be available.
// Otherwise it means the function was called too late?
ZN_ASSERT(src_block != nullptr);
//ZN_ASSERT(dst_block != nullptr);
// The block should have voxels if it has been edited or mipped.
if (out_updated_blocks != nullptr) {
out_updated_blocks->push_back(BlockLocation{ dst_bpos, dst_lod_index });
if (dst_lod_index != lod_count - 1 && !dst_block->get_needs_lodding()) {
const Vector3i rel = src_bpos - (dst_bpos << 1);
// Update lower LOD
// This must always be done after an edit before it gets saved, otherwise LODs won't match and it will look
// ugly.
// TODO Optimization: try to narrow to edited region instead of taking whole block
RWLockRead rlock(src_block->get_voxels().get_lock());
dst_block->get_voxels(), Vector3i(), src_block->get_voxels_const().get_size(), rel * half_bs);
// No need to clear the last list because we never add blocks to it
// uint64_t time_spent = profiling_clock.restart();
// if (time_spent > 10) {
// print_line(String("Took {0} us to update lods").format(varray(time_spent)));
// }
void VoxelData::get_missing_blocks(
Span<const Vector3i> block_positions, unsigned int lod_index, std::vector<Vector3i> &out_missing) const {
const Lod &lod = _lods[lod_index];
RWLockRead rlock(lod.map_lock);
for (const Vector3i &pos : block_positions) {
if (!lod.map.has_block(pos)) {
void VoxelData::get_missing_blocks(
Box3i p_blocks_box, unsigned int lod_index, std::vector<Vector3i> &out_missing) const {
const Lod &data_lod = _lods[lod_index];
const Box3i bounds_in_blocks = _bounds_in_voxels.downscaled(get_block_size());
const Box3i blocks_box = p_blocks_box.clipped(bounds_in_blocks);
RWLockRead rlock(data_lod.map_lock);
blocks_box.for_each_cell_zxy([&data_lod, &out_missing](Vector3i bpos) {
if (!data_lod.map.has_block(bpos)) {
unsigned int VoxelData::get_blocks_with_voxel_data(
Box3i p_blocks_box, unsigned int lod_index, Span<std::shared_ptr<VoxelBufferInternal>> out_blocks) const {
ZN_ASSERT(int64_t(out_blocks.size()) >= Vector3iUtil::get_volume(p_blocks_box.size));
const Lod &data_lod = _lods[lod_index];
RWLockRead rlock(data_lod.map_lock);
unsigned int count = 0;
// Iteration order matters for thread access.
p_blocks_box.for_each_cell_zxy([&count, &data_lod, &out_blocks](Vector3i data_block_pos) {
const VoxelDataBlock *nblock = data_lod.map.get_block(data_block_pos);
// The block can actually be null on some occasions. Not sure yet if it's that bad
//CRASH_COND(nblock == nullptr);
if (nblock != nullptr && nblock->has_voxels()) {
out_blocks[count] = nblock->get_voxels_shared();
return count;
void VoxelData::get_blocks_grid(VoxelDataGrid &grid, Box3i box_in_voxels, unsigned int lod_index) const {
const Lod &data_lod = _lods[lod_index];
RWLockRead rlock(data_lod.map_lock);
grid.reference_area(data_lod.map, box_in_voxels);
} // namespace zylann::voxel