From 864c1dc14a641c74272ce012e12622e022c23922 Mon Sep 17 00:00:00 2001 From: Nick Fisher Date: Sun, 10 Jul 2022 17:50:58 +1000 Subject: [PATCH] add additional C++ source files needed for Android build --- android/src/main/cpp/JobSystem.cpp | 610 ++++++++++++++++++ android/src/main/cpp/KtxReader1.cpp | 259 ++++++++ android/src/main/cpp/StbProvider.cpp | 261 ++++++++ ...{filament_api.cpp => filament_android.cpp} | 35 +- android/src/main/cpp/filament_api.h | 13 - 5 files changed, 1149 insertions(+), 29 deletions(-) create mode 100644 android/src/main/cpp/JobSystem.cpp create mode 100644 android/src/main/cpp/KtxReader1.cpp create mode 100644 android/src/main/cpp/StbProvider.cpp rename android/src/main/cpp/{filament_api.cpp => filament_android.cpp} (85%) delete mode 100644 android/src/main/cpp/filament_api.h diff --git a/android/src/main/cpp/JobSystem.cpp b/android/src/main/cpp/JobSystem.cpp new file mode 100644 index 00000000..7ce947ed --- /dev/null +++ b/android/src/main/cpp/JobSystem.cpp @@ -0,0 +1,610 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Note: The overhead of SYSTRACE_TAG_JOBSYSTEM is not negligible especially with parallel_for(). +#ifndef SYSTRACE_TAG +//#define SYSTRACE_TAG SYSTRACE_TAG_JOBSYSTEM +#define SYSTRACE_TAG SYSTRACE_TAG_NEVER +#endif + +// when SYSTRACE_TAG_JOBSYSTEM is used, enables even heavier systraces +#define HEAVY_SYSTRACE 0 + +// enable for catching hangs waiting on a job to finish +static constexpr bool DEBUG_FINISH_HANGS = false; + +#include + +#include +#include +#include +#include + +#include + +#include + +#if !defined(WIN32) +# include +#endif + +#ifdef __ANDROID__ +# include +# include +# ifndef ANDROID_PRIORITY_URGENT_DISPLAY +# define ANDROID_PRIORITY_URGENT_DISPLAY -8 // see include/system/thread_defs.h +# endif +# ifndef ANDROID_PRIORITY_DISPLAY +# define ANDROID_PRIORITY_DISPLAY -4 // see include/system/thread_defs.h +# endif +# ifndef ANDROID_PRIORITY_NORMAL +# define ANDROID_PRIORITY_NORMAL 0 // see include/system/thread_defs.h +# endif +#elif defined(__linux__) +// There is no glibc wrapper for gettid on linux so we need to syscall it. +# include +# include +# define gettid() syscall(SYS_gettid) +#endif + +#if HEAVY_SYSTRACE +# define HEAVY_SYSTRACE_CALL() SYSTRACE_CALL() +# define HEAVY_SYSTRACE_NAME(name) SYSTRACE_NAME(name) +# define HEAVY_SYSTRACE_VALUE32(name, v) SYSTRACE_VALUE32(name, v) +#else +# define HEAVY_SYSTRACE_CALL() +# define HEAVY_SYSTRACE_NAME(name) +# define HEAVY_SYSTRACE_VALUE32(name, v) +#endif + +namespace utils { + +void JobSystem::setThreadName(const char* name) noexcept { +#if defined(__linux__) + pthread_setname_np(pthread_self(), name); +#elif defined(__APPLE__) + pthread_setname_np(name); +#else +// TODO: implement setting thread name on WIN32 +#endif +} + +void JobSystem::setThreadPriority(Priority priority) noexcept { +#ifdef __ANDROID__ + int androidPriority = 0; + switch (priority) { + case Priority::NORMAL: + androidPriority = ANDROID_PRIORITY_NORMAL; + break; + case Priority::DISPLAY: + androidPriority = ANDROID_PRIORITY_DISPLAY; + break; + case Priority::URGENT_DISPLAY: + androidPriority = ANDROID_PRIORITY_URGENT_DISPLAY; + break; + } + setpriority(PRIO_PROCESS, 0, androidPriority); +#endif +} + +void JobSystem::setThreadAffinityById(size_t id) noexcept { +#if defined(__linux__) + cpu_set_t set; + CPU_ZERO(&set); + CPU_SET(id, &set); + sched_setaffinity(gettid(), sizeof(set), &set); +#endif +} + +JobSystem::JobSystem(const size_t userThreadCount, const size_t adoptableThreadsCount) noexcept + : mJobPool("JobSystem Job pool", MAX_JOB_COUNT * sizeof(Job)), + mJobStorageBase(static_cast(mJobPool.getAllocator().getCurrent())) +{ + SYSTRACE_ENABLE(); + + int threadPoolCount = userThreadCount; + if (threadPoolCount == 0) { + // default value, system dependant + int hwThreads = std::thread::hardware_concurrency(); + if (UTILS_HAS_HYPER_THREADING) { + // For now we avoid using HT, this simplifies profiling. + // TODO: figure-out what to do with Hyper-threading + // since we assumed HT, always round-up to an even number of cores (to play it safe) + hwThreads = (hwThreads + 1) / 2; + } + // one of the thread will be the user thread + threadPoolCount = hwThreads - 1; + } + // make sure we have at least one thread in the thread pool + threadPoolCount = std::max(1, threadPoolCount); + // and also limit the pool to 32 threads + threadPoolCount = std::min(UTILS_HAS_THREADING ? 32 : 0, threadPoolCount); + + mThreadStates = aligned_vector(threadPoolCount + adoptableThreadsCount); + mThreadCount = uint16_t(threadPoolCount); + mParallelSplitCount = (uint8_t)std::ceil((std::log2f(threadPoolCount + adoptableThreadsCount))); + + static_assert(std::atomic::is_always_lock_free); + static_assert(std::atomic::is_always_lock_free); + + std::random_device rd; + const size_t hardwareThreadCount = mThreadCount; + auto& states = mThreadStates; + + #pragma nounroll + for (size_t i = 0, n = states.size(); i < n; i++) { + auto& state = states[i]; + state.rndGen = default_random_engine(rd()); + state.id = (uint32_t)i; + state.js = this; + if (i < hardwareThreadCount) { + // don't start a thread of adoptable thread slots + state.thread = std::thread(&JobSystem::loop, this, &state); + } + } +} + +JobSystem::~JobSystem() { + requestExit(); + + #pragma nounroll + for (auto &state : mThreadStates) { + // adopted threads are not joinable + if (state.thread.joinable()) { + state.thread.join(); + } + } +} + +inline void JobSystem::incRef(Job const* job) noexcept { + // no action is taken when incrementing the reference counter, therefore we can safely use + // memory_order_relaxed. + job->refCount.fetch_add(1, std::memory_order_relaxed); +} + +UTILS_NOINLINE +void JobSystem::decRef(Job const* job) noexcept { + + // We must ensure that accesses from other threads happen before deleting the Job. + // To accomplish this, we need to guarantee that no read/writes are reordered after the + // dec-ref, because ANOTHER thread could hold the last reference (after us) and that thread + // needs to see all accesses completed before it deletes the object. This is done + // with memory_order_release. + // Similarly, we need to guarantee that no read/write are reordered before the last decref, + // or some other thread could see a destroyed object before the ref-count is 0. This is done + // with memory_order_acquire. + auto c = job->refCount.fetch_sub(1, std::memory_order_acq_rel); + assert(c > 0); + if (c == 1) { + // This was the last reference, it's safe to destroy the job. + mJobPool.destroy(job); + } +} + +void JobSystem::requestExit() noexcept { + mExitRequested.store(true); + std::lock_guard lock(mWaiterLock); + mWaiterCondition.notify_all(); +} + +inline bool JobSystem::exitRequested() const noexcept { + // memory_order_relaxed is safe because the only action taken is to exit the thread + return mExitRequested.load(std::memory_order_relaxed); +} + +inline bool JobSystem::hasActiveJobs() const noexcept { + return mActiveJobs.load(std::memory_order_relaxed) > 0; +} + +inline bool JobSystem::hasJobCompleted(JobSystem::Job const* job) noexcept { + return job->runningJobCount.load(std::memory_order_acquire) <= 0; +} + +void JobSystem::wait(std::unique_lock& lock, Job* job) noexcept { + if constexpr (!DEBUG_FINISH_HANGS) { + mWaiterCondition.wait(lock); + } else { + do { + // we use a pretty long timeout (4s) so we're very confident that the system is hung + // and nothing else is happening. + std::cv_status status = mWaiterCondition.wait_for(lock, + std::chrono::milliseconds(4000)); + if (status == std::cv_status::no_timeout) { + break; + } + + // hang debugging... + + // we check of we had active jobs or if the job we're waiting on had completed already. + // there is the possibility of a race condition, but our long timeout gives us some + // confidence that we're in an incorrect state. + + auto id = getState().id; + auto activeJobs = mActiveJobs.load(); + + if (job) { + auto runningJobCount = job->runningJobCount.load(); + ASSERT_POSTCONDITION(runningJobCount > 0, + "JobSystem(%p, %d): waiting while job %p has completed and %d jobs are active!", + this, id, job, activeJobs); + } + + ASSERT_POSTCONDITION(activeJobs <= 0, + "JobSystem(%p, %d): waiting while %d jobs are active!", + this, id, activeJobs); + + } while (true); + } +} + +void JobSystem::wakeAll() noexcept { + HEAVY_SYSTRACE_CALL(); + std::lock_guard lock(mWaiterLock); + // this empty critical section is needed -- it guarantees that notify_all() happens + // after the condition's variables are set. + mWaiterCondition.notify_all(); +} + +void JobSystem::wakeOne() noexcept { + HEAVY_SYSTRACE_CALL(); + std::lock_guard lock(mWaiterLock); + // this empty critical section is needed -- it guarantees that notify_one() happens + // after the condition's variables are set. + mWaiterCondition.notify_one(); +} + +inline JobSystem::ThreadState& JobSystem::getState() noexcept { + std::lock_guard lock(mThreadMapLock); + auto iter = mThreadMap.find(std::this_thread::get_id()); + ASSERT_PRECONDITION(iter != mThreadMap.end(), "This thread has not been adopted."); + return *iter->second; +} + +JobSystem::Job* JobSystem::allocateJob() noexcept { + return mJobPool.make(); +} + +void JobSystem::put(WorkQueue& workQueue, Job* job) noexcept { + assert(job); + size_t index = job - mJobStorageBase; + assert(index >= 0 && index < MAX_JOB_COUNT); + + // put the job into the queue first + workQueue.push(uint16_t(index + 1)); + // then increase our active job count + uint32_t oldActiveJobs = mActiveJobs.fetch_add(1, std::memory_order_relaxed); + // but it's possible that the job has already been picked-up, so oldActiveJobs could be + // negative for instance. We signal only if that's not the case. + if (oldActiveJobs >= 0) { + wakeOne(); // wake-up a thread if needed... + } +} + +JobSystem::Job* JobSystem::pop(WorkQueue& workQueue) noexcept { + // decrement mActiveJobs first, this is to ensure that if there is only a single job left + // (and we're about to pick it up), other threads don't loop trying to do the same. + mActiveJobs.fetch_sub(1, std::memory_order_relaxed); + + size_t index = workQueue.pop(); + assert(index <= MAX_JOB_COUNT); + Job* job = !index ? nullptr : &mJobStorageBase[index - 1]; + + // if our guess was wrong, i.e. we couldn't pick-up a job (b/c our queue was empty), we + // need to correct mActiveJobs. + if (!job) { + if (mActiveJobs.fetch_add(1, std::memory_order_relaxed) >= 0) { + // and if there are some active jobs, then we need to wake someone up. We know it + // can't be us, because we failed taking a job and we know another thread can't + // have added one in our queue. + wakeOne(); + } + } + return job; +} + +JobSystem::Job* JobSystem::steal(WorkQueue& workQueue) noexcept { + // decrement mActiveJobs first, this is to ensure that if there is only a single job left + // (and we're about to pick it up), other threads don't loop trying to do the same. + mActiveJobs.fetch_sub(1, std::memory_order_relaxed); + + size_t index = workQueue.steal(); + assert(index <= MAX_JOB_COUNT); + Job* job = !index ? nullptr : &mJobStorageBase[index - 1]; + + // if we failed taking a job, we need to correct mActiveJobs + if (!job) { + if (mActiveJobs.fetch_add(1, std::memory_order_relaxed) >= 0) { + // and if there are some active jobs, then we need to wake someone up. We know it + // can't be us, because we failed taking a job and we know another thread can't + // have added one in our queue. + wakeOne(); + } + } + return job; +} + +inline JobSystem::ThreadState* JobSystem::getStateToStealFrom(JobSystem::ThreadState& state) noexcept { + auto& threadStates = mThreadStates; + // memory_order_relaxed is okay because we don't take any action that has data dependency + // on this value (in particular mThreadStates, is always initialized properly). + uint16_t adopted = mAdoptedThreads.load(std::memory_order_relaxed); + uint16_t const threadCount = mThreadCount + adopted; + + JobSystem::ThreadState* stateToStealFrom = nullptr; + + // don't try to steal from someone else if we're the only thread (infinite loop) + if (threadCount >= 2) { + do { + // this is biased, but frankly, we don't care. it's fast. + uint16_t index = uint16_t(state.rndGen() % threadCount); + assert(index < threadStates.size()); + stateToStealFrom = &threadStates[index]; + // don't steal from our own queue + } while (stateToStealFrom == &state); + } + return stateToStealFrom; +} + +JobSystem::Job* JobSystem::steal(JobSystem::ThreadState& state) noexcept { + HEAVY_SYSTRACE_CALL(); + Job* job = nullptr; + do { + ThreadState* const stateToStealFrom = getStateToStealFrom(state); + if (UTILS_LIKELY(stateToStealFrom)) { + job = steal(stateToStealFrom->workQueue); + } + // nullptr -> nothing to steal in that queue either, if there are active jobs, + // continue to try stealing one. + } while (!job && hasActiveJobs()); + return job; +} + +bool JobSystem::execute(JobSystem::ThreadState& state) noexcept { + HEAVY_SYSTRACE_CALL(); + + Job* job = pop(state.workQueue); + if (UTILS_UNLIKELY(job == nullptr)) { + // our queue is empty, try to steal a job + job = steal(state); + } + + if (job) { + assert(job->runningJobCount.load(std::memory_order_relaxed) >= 1); + + if (UTILS_LIKELY(job->function)) { + HEAVY_SYSTRACE_NAME("job->function"); + job->function(job->storage, *this, job); + } + finish(job); + } + return job != nullptr; +} + +void JobSystem::loop(ThreadState* state) noexcept { + setThreadName("JobSystem::loop"); + setThreadPriority(Priority::DISPLAY); + + // set a CPU affinity on each of our JobSystem thread to prevent them from jumping from core + // to core. On Android, it looks like the affinity needs to be reset from time to time. + setThreadAffinityById(state->id); + + // record our work queue + mThreadMapLock.lock(); + bool inserted = mThreadMap.emplace(std::this_thread::get_id(), state).second; + mThreadMapLock.unlock(); + ASSERT_PRECONDITION(inserted, "This thread is already in a loop."); + + // run our main loop... + do { + if (!execute(*state)) { + std::unique_lock lock(mWaiterLock); + while (!exitRequested() && !hasActiveJobs()) { + wait(lock); + setThreadAffinityById(state->id); + } + } + } while (!exitRequested()); +} + +UTILS_NOINLINE +void JobSystem::finish(Job* job) noexcept { + HEAVY_SYSTRACE_CALL(); + + bool notify = false; + + // terminate this job and notify its parent + Job* const storage = mJobStorageBase; + do { + // std::memory_order_release here is needed to synchronize with JobSystem::wait() + // which needs to "see" all changes that happened before the job terminated. + auto runningJobCount = job->runningJobCount.fetch_sub(1, std::memory_order_acq_rel); + assert(runningJobCount > 0); + if (runningJobCount == 1) { + // no more work, destroy this job and notify its parent + notify = true; + Job* const parent = job->parent == 0x7FFF ? nullptr : &storage[job->parent]; + decRef(job); + job = parent; + } else { + // there is still work (e.g.: children), we're done. + break; + } + } while (job); + + // wake-up all threads that could potentially be waiting on this job finishing + if (notify) { + wakeAll(); + } +} + +// ----------------------------------------------------------------------------------------------- +// public API... + + +JobSystem::Job* JobSystem::create(JobSystem::Job* parent, JobFunc func) noexcept { + parent = (parent == nullptr) ? mRootJob : parent; + Job* const job = allocateJob(); + if (UTILS_LIKELY(job)) { + size_t index = 0x7FFF; + if (parent) { + // add a reference to the parent to make sure it can't be terminated. + // memory_order_relaxed is safe because no action is taken at this point + // (the job is not started yet). + auto parentJobCount = parent->runningJobCount.fetch_add(1, std::memory_order_relaxed); + + // can't create a child job of a terminated parent + assert(parentJobCount > 0); + + index = parent - mJobStorageBase; + assert(index < MAX_JOB_COUNT); + } + job->function = func; + job->parent = uint16_t(index); + } + return job; +} + +void JobSystem::cancel(Job*& job) noexcept { + finish(job); + job = nullptr; +} + +JobSystem::Job* JobSystem::retain(JobSystem::Job* job) noexcept { + JobSystem::Job* retained = job; + incRef(retained); + return retained; +} + +void JobSystem::release(JobSystem::Job*& job) noexcept { + decRef(job); + job = nullptr; +} + +void JobSystem::signal() noexcept { + wakeAll(); +} + +void JobSystem::run(Job*& job) noexcept { + HEAVY_SYSTRACE_CALL(); + + ThreadState& state(getState()); + + put(state.workQueue, job); + + // after run() returns, the job is virtually invalid (it'll die on its own) + job = nullptr; +} + +JobSystem::Job* JobSystem::runAndRetain(Job* job) noexcept { + JobSystem::Job* retained = retain(job); + run(job); + return retained; +} + +void JobSystem::waitAndRelease(Job*& job) noexcept { + SYSTRACE_CALL(); + + assert(job); + assert(job->refCount.load(std::memory_order_relaxed) >= 1); + + ThreadState& state(getState()); + do { + if (!execute(state)) { + // test if job has completed first, to possibly avoid taking the lock + if (hasJobCompleted(job)) { + break; + } + + // the only way we can be here is if the job we're waiting on it being handled + // by another thread: + // - we returned from execute() which means all queues are empty + // - yet our job hasn't completed yet + // ergo, it's being run in another thread + // + // this could take time however, so we will wait with a condition, and + // continue to handle more jobs, as they get added. + + std::unique_lock lock(mWaiterLock); + if (!hasJobCompleted(job) && !hasActiveJobs() && !exitRequested()) { + wait(lock, job); + } + } + } while (!hasJobCompleted(job) && !exitRequested()); + + if (job == mRootJob) { + mRootJob = nullptr; + } + + release(job); +} + +void JobSystem::runAndWait(JobSystem::Job*& job) noexcept { + runAndRetain(job); + waitAndRelease(job); +} + +void JobSystem::adopt() { + const auto tid = std::this_thread::get_id(); + + std::unique_lock lock(mThreadMapLock); + auto iter = mThreadMap.find(tid); + ThreadState* const state = iter == mThreadMap.end() ? nullptr : iter->second; + lock.unlock(); + + if (state) { + // we're already part of a JobSystem, do nothing. + ASSERT_PRECONDITION(this == state->js, + "Called adopt on a thread owned by another JobSystem (%p), this=%p!", + state->js, this); + return; + } + + // memory_order_relaxed is safe because we don't take action on this value. + uint16_t adopted = mAdoptedThreads.fetch_add(1, std::memory_order_relaxed); + size_t index = mThreadCount + adopted; + + ASSERT_POSTCONDITION(index < mThreadStates.size(), + "Too many calls to adopt(). No more adoptable threads!"); + + // all threads adopted by the JobSystem need to run at the same priority + JobSystem::setThreadPriority(JobSystem::Priority::DISPLAY); + + // This thread's queue will be selectable immediately (i.e.: before we set its TLS) + // however, it's not a problem since mThreadState is pre-initialized and valid + // (e.g.: the queue is empty). + + lock.lock(); + mThreadMap[tid] = &mThreadStates[index]; +} + +void JobSystem::emancipate() { + const auto tid = std::this_thread::get_id(); + std::lock_guard lock(mThreadMapLock); + auto iter = mThreadMap.find(tid); + ThreadState* const state = iter == mThreadMap.end() ? nullptr : iter->second; + ASSERT_PRECONDITION(state, "this thread is not an adopted thread"); + ASSERT_PRECONDITION(state->js == this, "this thread is not adopted by us"); + mThreadMap.erase(iter); +} + +io::ostream& operator<<(io::ostream& out, JobSystem const& js) { + for (auto const& item : js.mThreadStates) { + out << size_t(item.id) << ": " << item.workQueue.getCount() << io::endl; + } + return out; +} + +} // namespace utils diff --git a/android/src/main/cpp/KtxReader1.cpp b/android/src/main/cpp/KtxReader1.cpp new file mode 100644 index 00000000..9148d861 --- /dev/null +++ b/android/src/main/cpp/KtxReader1.cpp @@ -0,0 +1,259 @@ +/* + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include + +namespace ktxreader { +namespace Ktx1Reader { + +Texture* createTexture(Engine* engine, const Ktx1Bundle& ktx, bool srgb, + Callback callback, void* userdata) { + using Sampler = Texture::Sampler; + const auto& ktxinfo = ktx.getInfo(); + const uint32_t nmips = ktx.getNumMipLevels(); + const auto cdatatype = toCompressedPixelDataType(ktxinfo); + const auto datatype = toPixelDataType(ktxinfo); + const auto dataformat = toPixelDataFormat(ktxinfo); + + auto texformat = toTextureFormat(ktxinfo); + +#ifndef NDEBUG + if (srgb && !isSrgbTextureFormat(texformat)) { + utils::slog.w << "Requested sRGB format but KTX contains a linear format. " + << utils::io::endl; + } else if (!srgb && isSrgbTextureFormat(texformat)) { + utils::slog.w << "Requested linear format but KTX contains a sRGB format. " + << utils::io::endl; + } +#endif + + Texture* texture = Texture::Builder() + .width(ktxinfo.pixelWidth) + .height(ktxinfo.pixelHeight) + .levels(static_cast(nmips)) + .sampler(ktx.isCubemap() ? Sampler::SAMPLER_CUBEMAP : Sampler::SAMPLER_2D) + .format(texformat) + .build(*engine); + + struct Userdata { + uint32_t remainingBuffers; + Callback callback; + void* userdata; + }; + + Userdata* cbuser = new Userdata({nmips, callback, userdata}); + + PixelBufferDescriptor::Callback cb = [](void*, size_t, void* cbuserptr) { + Userdata* cbuser = (Userdata*) cbuserptr; + if (--cbuser->remainingBuffers == 0) { + if (cbuser->callback) { + cbuser->callback(cbuser->userdata); + } + delete cbuser; + } + }; + + uint8_t* data; + uint32_t size; + + if (isCompressed(ktxinfo)) { + if (ktx.isCubemap()) { + for (uint32_t level = 0; level < nmips; ++level) { + ktx.getBlob({level, 0, 0}, &data, &size); + PixelBufferDescriptor pbd(data, size * 6, cdatatype, size, cb, cbuser); + texture->setImage(*engine, level, std::move(pbd), Texture::FaceOffsets(size)); + } + return texture; + } + for (uint32_t level = 0; level < nmips; ++level) { + ktx.getBlob({level, 0, 0}, &data, &size); + PixelBufferDescriptor pbd(data, size, cdatatype, size, cb, cbuser); + texture->setImage(*engine, level, std::move(pbd)); + } + return texture; + } + + if (ktx.isCubemap()) { + for (uint32_t level = 0; level < nmips; ++level) { + ktx.getBlob({level, 0, 0}, &data, &size); + PixelBufferDescriptor pbd(data, size * 6, dataformat, datatype, cb, cbuser); + texture->setImage(*engine, level, std::move(pbd), Texture::FaceOffsets(size)); + } + return texture; + } + + for (uint32_t level = 0; level < nmips; ++level) { + ktx.getBlob({level, 0, 0}, &data, &size); + PixelBufferDescriptor pbd(data, size, dataformat, datatype, cb, cbuser); + texture->setImage(*engine, level, std::move(pbd)); + } + return texture; +} + +Texture* createTexture(Engine* engine, Ktx1Bundle* ktx, bool srgb) { + auto freeKtx = [] (void* userdata) { + Ktx1Bundle* ktx = (Ktx1Bundle*) userdata; + delete ktx; + }; + return createTexture(engine, *ktx, srgb, freeKtx, ktx); +} + +CompressedPixelDataType toCompressedPixelDataType(const KtxInfo& info) { + return toCompressedFilamentEnum(info.glInternalFormat); +} + +PixelDataType toPixelDataType(const KtxInfo& info) { + switch (info.glType) { + case Ktx1Bundle::UNSIGNED_BYTE: return PixelDataType::UBYTE; + case Ktx1Bundle::UNSIGNED_SHORT: return PixelDataType::USHORT; + case Ktx1Bundle::HALF_FLOAT: return PixelDataType::HALF; + case Ktx1Bundle::FLOAT: return PixelDataType::FLOAT; + case Ktx1Bundle::R11F_G11F_B10F: return PixelDataType::UINT_10F_11F_11F_REV; + } + return (PixelDataType) 0xff; +} + +PixelDataFormat toPixelDataFormat(const KtxInfo& info) { + switch (info.glFormat) { + case Ktx1Bundle::LUMINANCE: + case Ktx1Bundle::RED: return PixelDataFormat::R; + case Ktx1Bundle::RG: return PixelDataFormat::RG; + case Ktx1Bundle::RGB: return PixelDataFormat::RGB; + case Ktx1Bundle::RGBA: return PixelDataFormat::RGBA; + // glFormat should NOT be a sized format according to the spec + // however cmgen was generating incorrect files until after Filament 1.8.0 + // so we keep this line here to preserve compatibility with older assets + case Ktx1Bundle::R11F_G11F_B10F: return PixelDataFormat::RGB; + } + return (PixelDataFormat) 0xff; +} + +bool isCompressed(const KtxInfo& info) { + return info.glFormat == 0; +} + +bool isSrgbTextureFormat(TextureFormat format) { + switch(format) { + // Non-compressed + case Texture::InternalFormat::RGB8: + case Texture::InternalFormat::RGBA8: + return false; + + // ASTC + case Texture::InternalFormat::RGBA_ASTC_4x4: + case Texture::InternalFormat::RGBA_ASTC_5x4: + case Texture::InternalFormat::RGBA_ASTC_5x5: + case Texture::InternalFormat::RGBA_ASTC_6x5: + case Texture::InternalFormat::RGBA_ASTC_6x6: + case Texture::InternalFormat::RGBA_ASTC_8x5: + case Texture::InternalFormat::RGBA_ASTC_8x6: + case Texture::InternalFormat::RGBA_ASTC_8x8: + case Texture::InternalFormat::RGBA_ASTC_10x5: + case Texture::InternalFormat::RGBA_ASTC_10x6: + case Texture::InternalFormat::RGBA_ASTC_10x8: + case Texture::InternalFormat::RGBA_ASTC_10x10: + case Texture::InternalFormat::RGBA_ASTC_12x10: + case Texture::InternalFormat::RGBA_ASTC_12x12: + return false; + + // ETC2 + case Texture::InternalFormat::ETC2_RGB8: + case Texture::InternalFormat::ETC2_RGB8_A1: + case Texture::InternalFormat::ETC2_EAC_RGBA8: + return false; + + // DXT + case Texture::InternalFormat::DXT1_RGB: + case Texture::InternalFormat::DXT1_RGBA: + case Texture::InternalFormat::DXT3_RGBA: + case Texture::InternalFormat::DXT5_RGBA: + return false; + + default: + return true; + } +} + +TextureFormat toTextureFormat(const KtxInfo& info) { + switch (info.glInternalFormat) { + case Ktx1Bundle::RED: return TextureFormat::R8; + case Ktx1Bundle::RG: return TextureFormat::RG8; + case Ktx1Bundle::RGB: return TextureFormat::RGB8; + case Ktx1Bundle::RGBA: return TextureFormat::RGBA8; + case Ktx1Bundle::LUMINANCE: return TextureFormat::R8; + case Ktx1Bundle::LUMINANCE_ALPHA: return TextureFormat::RG8; + case Ktx1Bundle::R8: return TextureFormat::R8; + case Ktx1Bundle::R8_SNORM: return TextureFormat::R8_SNORM; + case Ktx1Bundle::R8UI: return TextureFormat::R8UI; + case Ktx1Bundle::R8I: return TextureFormat::R8I; + case Ktx1Bundle::STENCIL_INDEX8: return TextureFormat::STENCIL8; + case Ktx1Bundle::R16F: return TextureFormat::R16F; + case Ktx1Bundle::R16UI: return TextureFormat::R16UI; + case Ktx1Bundle::R16I: return TextureFormat::R16I; + case Ktx1Bundle::RG8: return TextureFormat::RG8; + case Ktx1Bundle::RG8_SNORM: return TextureFormat::RG8_SNORM; + case Ktx1Bundle::RG8UI: return TextureFormat::RG8UI; + case Ktx1Bundle::RG8I: return TextureFormat::RG8I; + case Ktx1Bundle::RGB565: return TextureFormat::RGB565; + case Ktx1Bundle::RGB9_E5: return TextureFormat::RGB9_E5; + case Ktx1Bundle::RGB5_A1: return TextureFormat::RGB5_A1; + case Ktx1Bundle::RGBA4: return TextureFormat::RGBA4; + case Ktx1Bundle::DEPTH_COMPONENT16: return TextureFormat::DEPTH16; + case Ktx1Bundle::RGB8: return TextureFormat::RGB8; + case Ktx1Bundle::SRGB8: return TextureFormat::SRGB8; + case Ktx1Bundle::RGB8_SNORM: return TextureFormat::RGB8_SNORM; + case Ktx1Bundle::RGB8UI: return TextureFormat::RGB8UI; + case Ktx1Bundle::RGB8I: return TextureFormat::RGB8I; + case Ktx1Bundle::R32F: return TextureFormat::R32F; + case Ktx1Bundle::R32UI: return TextureFormat::R32UI; + case Ktx1Bundle::R32I: return TextureFormat::R32I; + case Ktx1Bundle::RG16F: return TextureFormat::RG16F; + case Ktx1Bundle::RG16UI: return TextureFormat::RG16UI; + case Ktx1Bundle::RG16I: return TextureFormat::RG16I; + case Ktx1Bundle::R11F_G11F_B10F: return TextureFormat::R11F_G11F_B10F; + case Ktx1Bundle::RGBA8: return TextureFormat::RGBA8; + case Ktx1Bundle::SRGB8_ALPHA8: return TextureFormat::SRGB8_A8; + case Ktx1Bundle::RGBA8_SNORM: return TextureFormat::RGBA8_SNORM; + case Ktx1Bundle::RGB10_A2: return TextureFormat::RGB10_A2; + case Ktx1Bundle::RGBA8UI: return TextureFormat::RGBA8UI; + case Ktx1Bundle::RGBA8I: return TextureFormat::RGBA8I; + case Ktx1Bundle::DEPTH24_STENCIL8: return TextureFormat::DEPTH24_STENCIL8; + case Ktx1Bundle::DEPTH32F_STENCIL8: return TextureFormat::DEPTH32F_STENCIL8; + case Ktx1Bundle::RGB16F: return TextureFormat::RGB16F; + case Ktx1Bundle::RGB16UI: return TextureFormat::RGB16UI; + case Ktx1Bundle::RGB16I: return TextureFormat::RGB16I; + case Ktx1Bundle::RG32F: return TextureFormat::RG32F; + case Ktx1Bundle::RG32UI: return TextureFormat::RG32UI; + case Ktx1Bundle::RG32I: return TextureFormat::RG32I; + case Ktx1Bundle::RGBA16F: return TextureFormat::RGBA16F; + case Ktx1Bundle::RGBA16UI: return TextureFormat::RGBA16UI; + case Ktx1Bundle::RGBA16I: return TextureFormat::RGBA16I; + case Ktx1Bundle::RGB32F: return TextureFormat::RGB32F; + case Ktx1Bundle::RGB32UI: return TextureFormat::RGB32UI; + case Ktx1Bundle::RGB32I: return TextureFormat::RGB32I; + case Ktx1Bundle::RGBA32F: return TextureFormat::RGBA32F; + case Ktx1Bundle::RGBA32UI: return TextureFormat::RGBA32UI; + case Ktx1Bundle::RGBA32I: return TextureFormat::RGBA32I; + } + return toCompressedFilamentEnum(info.glInternalFormat); +} + +} // namespace Ktx1Reader +} // namespace ktxreader diff --git a/android/src/main/cpp/StbProvider.cpp b/android/src/main/cpp/StbProvider.cpp new file mode 100644 index 00000000..5f796599 --- /dev/null +++ b/android/src/main/cpp/StbProvider.cpp @@ -0,0 +1,261 @@ +/* + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +#include + +#include +#include +#define STB_IMAGE_IMPLEMENTATION +#include + +using namespace filament; +using namespace utils; + +using std::atomic; +using std::vector; +using std::unique_ptr; + +namespace filament::gltfio { + +class StbProvider final : public TextureProvider { +public: + StbProvider(Engine* engine); + ~StbProvider(); + + Texture* pushTexture(const uint8_t* data, size_t byteCount, + const char* mimeType, uint64_t flags) final; + + Texture* popTexture() final; + void updateQueue() final; + void waitForCompletion() final; + void cancelDecoding() final; + const char* getPushMessage() const final; + const char* getPopMessage() const final; + size_t getPushedCount() const final { return mPushedCount; } + size_t getPoppedCount() const final { return mPoppedCount; } + size_t getDecodedCount() const final { return mDecodedCount; } + +private: + enum class TextureState { + DECODING, // Texture has been pushed, mipmap levels are not yet complete. + READY, // Mipmap levels are available but texture has not been popped yet. + POPPED, // Client has popped the texture from the queue. + }; + + struct TextureInfo { + Texture* texture; + TextureState state; + atomic decodedTexelsBaseMipmap; + vector sourceBuffer; + JobSystem::Job* decoderJob; + }; + + // Declare some sentinel values for the "decodedTexelsBaseMipmap" field. + // Note that the "state" field can be modified only on the foreground thread. + static const intptr_t DECODING_NOT_READY = 0x0; + static const intptr_t DECODING_ERROR = 0x1; + + void decodeSingleTexture(); + + size_t mPushedCount = 0; + size_t mPoppedCount = 0; + size_t mDecodedCount = 0; + vector > mTextures; + JobSystem::Job* mDecoderRootJob; + std::string mRecentPushMessage; + std::string mRecentPopMessage; + Engine* const mEngine; +}; + +Texture* StbProvider::pushTexture(const uint8_t* data, size_t byteCount, + const char* mimeType, FlagBits flags) { + int width, height, numComponents; + if (!stbi_info_from_memory(data, byteCount, &width, &height, &numComponents)) { + mRecentPushMessage = std::string("Unable to parse texture: ") + stbi_failure_reason(); + return nullptr; + } + + using InternalFormat = Texture::InternalFormat; + const FlagBits sRGB = FlagBits(Flags::sRGB); + + Texture* texture = Texture::Builder() + .width(width) + .height(height) + .levels(0xff) + .format((flags & sRGB) ? InternalFormat::SRGB8_A8 : InternalFormat::RGBA8) + .build(*mEngine); + + if (texture == nullptr) { + mRecentPushMessage = "Unable to build Texture object."; + return nullptr; + } + + mRecentPushMessage.clear(); + TextureInfo* info = mTextures.emplace_back(new TextureInfo).get(); + ++mPushedCount; + + info->texture = texture; + info->state = TextureState::DECODING; + info->sourceBuffer.assign(data, data + byteCount); + info->decodedTexelsBaseMipmap.store(DECODING_NOT_READY); + + // On single threaded systems, it is usually fine to create jobs because the job system will + // simply execute serially. However in our case, we wish to amortize the decoder cost across + // several frames, so we instead use the updateQueue() method to perform decoding. + if constexpr (!UTILS_HAS_THREADING) { + info->decoderJob = nullptr; + return texture; + } + + JobSystem* js = &mEngine->getJobSystem(); + info->decoderJob = jobs::createJob(*js, mDecoderRootJob, [this, info] { + auto& source = info->sourceBuffer; + int width, height, comp; + + // Test asynchronous loading by uncommenting this line. + // std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 10000)); + + stbi_uc* texels = stbi_load_from_memory(source.data(), source.size(), + &width, &height, &comp, 4); + source.clear(); + source.shrink_to_fit(); + info->decodedTexelsBaseMipmap.store(texels ? intptr_t(texels) : DECODING_ERROR); + }); + + js->runAndRetain(info->decoderJob); + return texture; +} + +Texture* StbProvider::popTexture() { + // We don't bother shrinking the mTextures vector here, instead we periodically clean it up in + // the updateQueue method, since popTexture is typically called more frequently. Textures + // can become ready in non-deterministic order due to concurrency. + for (auto& texture : mTextures) { + if (texture->state == TextureState::READY) { + texture->state = TextureState::POPPED; + ++mPoppedCount; + const intptr_t ptr = texture->decodedTexelsBaseMipmap.load(); + if (ptr == DECODING_ERROR || ptr == DECODING_NOT_READY) { + mRecentPopMessage = "Texture is incomplete"; + } else { + mRecentPopMessage.clear(); + } + return texture->texture; + } + } + return nullptr; +} + +void StbProvider::updateQueue() { + if (!UTILS_HAS_THREADING) { + decodeSingleTexture(); + } + JobSystem* js = &mEngine->getJobSystem(); + for (auto& info : mTextures) { + if (info->state != TextureState::DECODING) { + continue; + } + Texture* texture = info->texture; + if (intptr_t data = info->decodedTexelsBaseMipmap.load()) { + if (info->decoderJob) { + js->waitAndRelease(info->decoderJob); + } + if (data == DECODING_ERROR) { + info->state = TextureState::READY; + ++mDecodedCount; + continue; + } + Texture::PixelBufferDescriptor pbd((uint8_t*) data, + texture->getWidth() * texture->getHeight() * 4, Texture::Format::RGBA, + Texture::Type::UBYTE, [](void* mem, size_t, void*) { stbi_image_free(mem); }); + texture->setImage(*mEngine, 0, std::move(pbd)); + + // Call generateMipmaps unconditionally to fulfill the promise of the TextureProvider + // interface. Providers of hierarchical images (e.g. KTX) call this only if needed. + texture->generateMipmaps(*mEngine); + + info->state = TextureState::READY; + ++mDecodedCount; + } + } + + // Here we periodically clean up the "queue" (which is really just a vector) by removing unused + // items from the front. This might ignore a popped texture that occurs in the middle of the + // vector, but that's okay, it will be cleaned up eventually. + decltype(mTextures)::iterator last = mTextures.begin(); + while (last != mTextures.end() && (*last)->state == TextureState::POPPED) ++last; + mTextures.erase(mTextures.begin(), last); +} + +void StbProvider::waitForCompletion() { + JobSystem& js = mEngine->getJobSystem(); + for (auto& info : mTextures) { + if (info->decoderJob) { + js.waitAndRelease(info->decoderJob); + } + } +} + +void StbProvider::cancelDecoding() { + // TODO: Currently, StbProvider runs jobs eagerly and JobSystem does not allow cancellation of + // in-flight jobs. We should consider throttling the number of simultaneous decoder jobs, which + // would allow for actual cancellation. + waitForCompletion(); +} + +const char* StbProvider::getPushMessage() const { + return mRecentPushMessage.empty() ? nullptr : mRecentPushMessage.c_str(); +} + +const char* StbProvider::getPopMessage() const { + return mRecentPopMessage.empty() ? nullptr : mRecentPopMessage.c_str(); +} + +void StbProvider::decodeSingleTexture() { + assert_invariant(!UTILS_HAS_THREADING); + for (auto& info : mTextures) { + if (info->state == TextureState::DECODING) { + auto& source = info->sourceBuffer; + int width, height, comp; + stbi_uc* texels = stbi_load_from_memory(source.data(), source.size(), + &width, &height, &comp, 4); + source.clear(); + source.shrink_to_fit(); + info->decodedTexelsBaseMipmap.store(texels ? intptr_t(texels) : DECODING_ERROR); + break; + } + } +} + +StbProvider::StbProvider(Engine* engine) : mEngine(engine) { + mDecoderRootJob = mEngine->getJobSystem().createJob(); +} + +StbProvider::~StbProvider() { + cancelDecoding(); + mEngine->getJobSystem().release(mDecoderRootJob); +} + +TextureProvider* createStbProvider(Engine* engine) { + return new StbProvider(engine); +} + +} // namespace filament::gltfio diff --git a/android/src/main/cpp/filament_api.cpp b/android/src/main/cpp/filament_android.cpp similarity index 85% rename from android/src/main/cpp/filament_api.cpp rename to android/src/main/cpp/filament_android.cpp index c044df09..35f19094 100644 --- a/android/src/main/cpp/filament_api.cpp +++ b/android/src/main/cpp/filament_android.cpp @@ -10,7 +10,7 @@ using namespace std; static AAssetManager* am; -vector _assets; +static vector _assets; uint64_t id = -1; static polyvox::ResourceBuffer loadResource(const char* name) { @@ -25,21 +25,29 @@ static polyvox::ResourceBuffer loadResource(const char* name) { __android_log_print(ANDROID_LOG_VERBOSE, "filament_api", "Loading asset [ %s ]", name); off_t length = AAsset_getLength(asset); const void * buffer = AAsset_getBuffer(asset); + + uint8_t *buf = new uint8_t[length ]; + memcpy(buf,buffer, length); __android_log_print(ANDROID_LOG_VERBOSE, "filament_api", "Read [ %lu ] bytes into buffer", length); _assets.push_back(asset); __android_log_print(ANDROID_LOG_VERBOSE, "filament_api", "Loaded asset [ %s ] of length %zu", name, length); - return ResourceBuffer(buffer, length, id); + return ResourceBuffer(buf, length, id); } static void freeResource(ResourceBuffer rb) { - AAsset_close(_assets.at(rb.id)); + __android_log_print(ANDROID_LOG_VERBOSE, "filament_api", "Freeing loaded resource at index [ %d ] ", rb.id); + AAsset* asset = _assets[rb.id]; + if(asset) { + AAsset_close(asset); + } + _assets[rb.id] = nullptr; } extern "C" { void load_skybox(void* viewer, const char* skyboxPath, const char* iblPath) { - ((FilamentViewer*)viewer)->loadSkybox(skyboxPath, iblPath, am); + ((FilamentViewer*)viewer)->loadSkybox(skyboxPath, iblPath); } void load_glb(void* viewer, const char* assetPath) { @@ -61,7 +69,6 @@ extern "C" { ) { ANativeWindow* layer = ANativeWindow_fromSurface(env, surface); am = AAssetManager_fromJava(env, assetManager); - return new FilamentViewer((void*)layer, loadResource, freeResource); } @@ -104,7 +111,7 @@ extern "C" { } void grab_update(void* viewer, int x, int y) { - __android_log_print(ANDROID_LOG_VERBOSE, "filament_api", "Grab update at %d %d %d", x, y); + __android_log_print(ANDROID_LOG_VERBOSE, "filament_api", "Grab update at %d %d", x, y); ((FilamentViewer*)viewer)->manipulator->grabUpdate(x, y); } @@ -132,29 +139,25 @@ extern "C" { char** names_c; names_c = new char*[names->size()]; for(int i = 0; i < names->size(); i++) { - __android_log_print(ANDROID_LOG_VERBOSE, "filament_api", "Memcpy %d bytes ", names->at(i).size()); names_c[i] = (char*) malloc(names->at(i).size() +1); strcpy(names_c[i], names->at(i).c_str()); - __android_log_print(ANDROID_LOG_VERBOSE, "filament_api", "Alloced animation name %s ", (char*) names_c[i]); } (*countPtr) = names->size(); return names_c; } char** get_target_names(void* viewer, char* meshName, int* countPtr ) { - StringList names = ((FilamentViewer*)viewer)->getTargetNames(meshName); + unique_ptr> names = ((FilamentViewer*)viewer)->getTargetNames(meshName); - __android_log_print(ANDROID_LOG_VERBOSE, "filament_api", "Got %d names", names.count); + __android_log_print(ANDROID_LOG_VERBOSE, "filament_api", "Got %d names", names->size()); - *countPtr = names.count; + *countPtr = names->size(); char** retval; - retval = new char*[names.count]; + retval = new char*[names->size()]; - __android_log_print(ANDROID_LOG_VERBOSE, "filament_api", "Allocated char* array of size %d", names.count); - - for(int i =0; i < names.count; i++) { - retval[i] = (char*)names.strings[i]; + for(int i =0; i < names->size(); i++) { + retval[i] = (char*)(names->at(i).c_str()); } return retval; } diff --git a/android/src/main/cpp/filament_api.h b/android/src/main/cpp/filament_api.h deleted file mode 100644 index bf9db778..00000000 --- a/android/src/main/cpp/filament_api.h +++ /dev/null @@ -1,13 +0,0 @@ -#include "FilamentViewer.hpp" -#include -#include -#include -#include - -void load_skybox(void* viewer, const char* skyboxPath, const char* iblPath); - -void* filament_viewer_new( - void* layer, - void* assetManager -); -} \ No newline at end of file