From a9c2bd1ec5dc00a770d825a7b4f935c2425163a3 Mon Sep 17 00:00:00 2001 From: Martin Date: Mon, 12 Jul 2021 19:04:17 +0200 Subject: [PATCH] New settings to compress cache (meta) objects via btrfs compression (cherry picked from commit 989e60f639e7add19c0a72667efa4fa9219f0b6a) # Conflicts: # Interface/BackupFileSystem.h # btrfs/btrfsplugin/BackupFileSystem.h # clouddrive/CloudFile.cpp # clouddrive/KvStoreBackendAzure.cpp # clouddrive/KvStoreBackendAzure.h # clouddrive/KvStoreBackendLocal.cpp # clouddrive/KvStoreBackendLocal.h # clouddrive/KvStoreBackendS3.cpp # clouddrive/KvStoreBackendS3.h # clouddrive/KvStoreFrontend.cpp # clouddrive/OnlineKvStore.cpp # clouddrive/OnlineKvStore.h # clouddrive/PassThroughFileSystem.cpp # clouddrive/dllmain.cpp --- clouddrive/CloudFile.cpp | 15899 +++++++++++++------------- clouddrive/CloudFile.h | 3 +- clouddrive/ClouddriveFactory.cpp | 4 +- clouddrive/CompressEncrypt.cpp | 25 + clouddrive/IClouddriveFactory.h | 6 +- clouddrive/ICompressEncrypt.h | 2 + clouddrive/IKvStoreBackend.h | 7 +- clouddrive/IOnlineKvStore.h | 8 +- clouddrive/KvStoreBackendS3.cpp | 71 +- clouddrive/KvStoreBackendS3.h | 6 +- clouddrive/KvStoreFrontend.cpp | 232 +- clouddrive/KvStoreFrontend.h | 12 +- clouddrive/TransactionalKvStore.cpp | 130 +- clouddrive/TransactionalKvStore.h | 24 +- 14 files changed, 8264 insertions(+), 8165 deletions(-) diff --git a/clouddrive/CloudFile.cpp b/clouddrive/CloudFile.cpp index 72821389..c76ad7d7 100644 --- a/clouddrive/CloudFile.cpp +++ b/clouddrive/CloudFile.cpp @@ -14,5918 +14,5919 @@ * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . -**************************************************************************/ -#ifndef _WIN32 -#include -#endif -#include "CloudFile.h" -#include "../stringtools.h" -#include "../common/data.h" -#include "../urbackupcommon/os_functions.h" -#include "KvStoreFrontend.h" -#ifdef HAS_LOCAL_BACKEND -#include "KvStoreBackendLocal.h" -#endif -#include "../Interface/ThreadPool.h" -#include "../Interface/Pipe.h" -#include "../urbackupcommon/events.h" -#include "../common/data.h" -#ifdef HAS_MOUNT_SERVICE -#include "MountServiceClient.h" -#endif -#include "../Interface/SettingsReader.h" -#include "ICompressEncrypt.h" -#ifdef HAS_LOCAL_CACHEFS -#include "PassThroughFileSystem.h" -#endif -#include "Auto.h" -#ifdef WITH_MIGRATION -#include "KvStoreBackendS3.h" -#include "KvStoreBackendAzure.h" -#endif -#include "../urbackupcommon/os_functions.h" -#ifdef WITH_SLOG -#include "../common/crc.h" -#endif -#include "../urbackupcommon/json.h" -#ifdef HAS_ASYNC -#include "fuse_kernel.h" -#endif -#ifdef HAS_LINUX_MEMORY_FILE -#include "LinuxMemFile.h" -#endif - -#if !defined(NO_JEMALLOC) && !defined(_WIN32) -#include -#endif - -#ifndef _WIN32 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#else -typedef int64 ssize_t; -#define aligned_alloc(allign, size) malloc(size) -int fcntl(int fd, int cmd, int val) -{ - return -1; -} -#define F_SETPIPE_SZ 1 -int pipe2(int p[2], int flags) -{ - return -1; -} -#define O_NONBLOCK 1 -#define O_CLOEXEC 2 -int eventfd(unsigned int initval, int flags) -{ - return -1; -} -#define EFD_CLOEXEC 1 -#define EFD_SEMAPHORE 2 -int eventfd_write(int fd, int value) { - return -1; -} -#define POSIX_FADV_DONTNEED 1 -#endif - -#define EXTENSIVE_DEBUG_LOG(x) - -#ifndef PR_SET_IO_FLUSHER -#define PR_SET_IO_FLUSHER 57 -#endif - -#include -#include -#include -#include -#include -#include - -const int64 min_cachesize = 1LL * 1024* 1024* 1024LL; // 1GB -const int64 min_free_size = 20LL * 1024* 1024* 1024LL; // 20GB -const int64 critical_free_size = 1000LL * 1024 * 1024LL; //1GB -const int64 throttle_free_size = 5000LL * 1024 * 1024LL; //5GB - -const int64 comp_start_limit = 20LL * 1024* 1024* 1024LL; // 20GB -const float comp_percent = 0.5; // 50% - -const int64 big_block_size = 20 *1024 *1024; // 20MB -const int64 small_block_size = 512 *1024; // 512KB - -const int64 block_size = 4096; - -const int64 slog_kernel_buffer_size = 16 * 1024 * 1024; - -std::string pbkdf2_sha256(const std::string& key); -bool is_automount_finished(); - -#ifndef _WIN32 -bool flush_dev(std::string dev_fn) -{ - int fd = open64(dev_fn.c_str(), O_RDWR | O_LARGEFILE | O_CLOEXEC); - - if (fd == -1) - { - Server->Log("Error opening device file " + dev_fn + ". " + os_last_error_str(), LL_ERROR); - return false; - } - - if (fsync(fd) == -1) - { - Server->Log("Error fsyncing device file " + dev_fn + ". " + os_last_error_str(), LL_ERROR); - close(fd); - return false; - } - - if (ioctl(fd, BLKFLSBUF, 0) != 0) - { - Server->Log("Error flushing device file " + dev_fn + ". " + os_last_error_str(), LL_ERROR); - close(fd); - return false; - } - - close(fd); - return true; -} -int get_file_fd(IFsFile::os_file_handle h) -{ - return h; -} -#else -bool flush_dev(std::string dev_fn) -{ - return false; -} -#define SPLICE_F_NONBLOCK 256 -#define SPLICE_F_MOVE 128 -int get_file_fd(IFsFile::os_file_handle h) -{ - return reinterpret_cast(h); -} -#endif - -namespace -{ - const char queue_cmd_resize = 1; - const char queue_cmd_get_size = 2; - const char queue_cmd_meminfo = 3; - - enum { - IOPRIO_CLASS_NONE, - IOPRIO_CLASS_RT, - IOPRIO_CLASS_BE, - IOPRIO_CLASS_IDLE, - }; - -#define IOPRIO_CLASS_SHIFT 13 - const _u16 io_prio_val = 2 | IOPRIO_CLASS_BE << IOPRIO_CLASS_SHIFT; - - //divide rounding up - int64 div_up(int64 num, int64 div) - { - if(num%div == 0) - { - return num/div; - } - else - { - return num/div + 1; - } - } - - int64 roundUp(int64 numToRound, int64 multiple) - { - return ((numToRound + multiple - 1) / multiple) * multiple; - } - - int64 roundDown(int64 numToRound, int64 multiple) - { - return ((numToRound / multiple) * multiple); - } - - void retryWait(size_t n) - { - unsigned int waittime = (std::min)(static_cast(1000.*pow(2., static_cast(n))), (unsigned int)30*60*1000); //30min - if(n>20) - { - waittime = (unsigned int)30*60*1000; - } - Server->Log("Waiting "+PrettyPrintTime(waittime)); - Server->wait(waittime); - } - - bool writeZeroes(int64 offset, IFile* file, int64 num, int set_val) - { - static char buf[4096] = {}; - - while(num>0) - { - int64 towrite = (std::min)((int64)4096, num); - if(file->Write(offset, buf, static_cast<_u32>(towrite))!=towrite) - { - return false; - } - num-=towrite; - offset+=towrite; - } - return true; - } - -#ifndef _WIN32 - void doublefork_writestring(const std::string& str, const std::string& file) - { - pid_t pid1; - pid1 = fork(); - if (pid1 == 0) - { - setsid(); - pid_t pid2; - pid2 = fork(); - if (pid2 == 0) - { - writestring(str, file); - exit(0); - } - else - { - exit(1); - } - } - else - { - int status; - waitpid(pid1, &status, 0); - } - } -#else - void doublefork_writestring(const std::string& str, const std::string& file) - { - writestring(str, file); - } -#endif - - class FileBitmap - { - public: - FileBitmap(IFile* backing_file, int64 n, bool init_set) - : backing_file(backing_file) - { - resize(n, init_set); - } - - void resize(int64 n, bool init_set) - { - total_size=n; - bitmap_size = static_cast(n/8 + (n%8==0 ? 0 : 1)); - - if(backing_file->Size()(bitmap_size)) - { - while(!writeZeroes(backing_file->Size(), backing_file, bitmap_size-backing_file->Size(), init_set?255:0)) - { - Server->Log("Error resizing bitmap file. Retrying...", LL_ERROR); - Server->wait(1000); - } - } - - cache.resize(bitmap_size); - - backing_file->Seek(0); - size_t pos=0; - while(posRead(reinterpret_cast(&cache[pos]), static_cast<_u32>(toread))!=toread) - { - throw std::runtime_error("Error reading from backing bitmap file"); - } - - pos+=toread; - } - } - - void set(int64 i, bool v) - { - size_t bitmap_byte=(size_t)(i/8); - size_t bitmap_bit=i%8; - - unsigned char b = cache[bitmap_byte]; - - if(v==true) - b=b|(1<<(7-bitmap_bit)); - else - b=b&(~(1<<(7-bitmap_bit))); - - cache[bitmap_byte] = b; - } - - size_t set_range(int64 start, int64 end, bool v) - { - size_t set_bits = 0; - for(;start0); - - return has_bit; - } - - bool flush() - { - while(backing_file->Write(0, reinterpret_cast(cache.data()), - static_cast<_u32>(cache.size()))!=cache.size()) - { - Server->Log("Error writing to backing bitmap file. Retrying...", LL_WARNING); - Server->wait(1000); - } - - return true; - } - - size_t count_bits() - { - size_t set_count = 0; - for(int64 i=0;i cache; - }; - - const size_t bitmap_block_size=4096; - - int64 bitmap_assert_limit = 1*1024*1024; - - class SparseFileBitmap - { - - public: - SparseFileBitmap(IFsFile* backing_file, int64 n, bool init_set, - size_t bitmap_cache_items) - : backing_file(backing_file), - bitmap_cache_items(bitmap_cache_items), - async_evict_skip_block(std::string::npos) - { - resize(n, init_set); - } - - ~SparseFileBitmap() - { - flush(); - } - - void resize(int64 n, bool init_set) - { - flush(); - - total_size=n; - bitmap_size = static_cast(n/8 + (n%8==0 ? 0 : 1)); - - bitmap_size += bitmap_block_size - bitmap_size%bitmap_block_size; - - if(backing_file->Size()(bitmap_size)) - { - while(!writeZeroes(backing_file->Size(), backing_file, bitmap_size-backing_file->Size(), init_set?255:0)) - { - Server->Log("Error resizing bitmap file", LL_ERROR); - Server->wait(1000); - } - } - } - - void set(int64 i, bool v) - { - size_t bitmap_byte=(size_t)(i/8); - size_t bitmap_bit=i%8; - - char* entry = cache_entry(bitmap_byte); - - unsigned char b = entry[bitmap_byte%bitmap_block_size]; - - if(v==true) - b=b|(1<<(7-bitmap_bit)); - else - b=b&(~(1<<(7-bitmap_bit))); - - entry[bitmap_byte%bitmap_block_size] = b; - } - -#ifdef HAS_ASYNC - fuse_io_context::io_uring_task set_async(fuse_io_context& io, int64 i, bool v) - { - size_t bitmap_byte = (size_t)(i / 8); - size_t bitmap_bit = i % 8; - - char* entry = co_await cache_entry_async(io, bitmap_byte); - - unsigned char b = entry[bitmap_byte % bitmap_block_size]; - - if (v == true) - b = b | (1 << (7 - bitmap_bit)); - else - b = b & (~(1 << (7 - bitmap_bit))); - - entry[bitmap_byte % bitmap_block_size] = b; - - co_return 0; - } -#endif - - size_t set_range(int64 start, int64 end, bool v) - { - size_t set_bits = 0; - for(;start set_range_async(fuse_io_context& io, int64 start, int64 end, bool v) - { - size_t set_bits = 0; - for (; start < end; ++start) - { - if (co_await get_async(io, start) != v) - { - co_await set_async(io, start, v); - ++set_bits; - } - } - co_return set_bits; - } -#endif - - char getb(int64 i) - { - size_t bitmap_byte = (size_t)(i / 8); - char* entry = cache_entry(bitmap_byte); - return entry[bitmap_byte%bitmap_block_size]; - } - - char* getPtr(int64 i, size_t& bsize) - { - size_t bitmap_byte = (size_t)(i / 8); - char* entry = cache_entry(bitmap_byte); - bsize = bitmap_block_size - bitmap_byte%bitmap_block_size; - return &entry[bitmap_byte%bitmap_block_size]; - } - -#ifdef HAS_ASYNC - fuse_io_context::io_uring_task > getPtrAsync(fuse_io_context& io, int64 i) - { - size_t bitmap_byte = (size_t)(i / 8); - char* entry = co_await cache_entry_async(io, bitmap_byte); - size_t bsize = bitmap_block_size - bitmap_byte % bitmap_block_size; - co_return std::make_pair(&entry[bitmap_byte % bitmap_block_size], bsize); - } -#endif - - bool get(int64 i) - { - size_t bitmap_byte=(size_t)(i/8); - size_t bitmap_bit=i%8; - - char* entry = cache_entry(bitmap_byte); - - unsigned char b = entry[bitmap_byte%bitmap_block_size]; - - bool has_bit=((b & (1<<(7-bitmap_bit)))>0); - - return has_bit; - } - -#ifdef HAS_ASYNC - fuse_io_context::io_uring_task get_async(fuse_io_context& io, int64 i) - { - size_t bitmap_byte = (size_t)(i / 8); - size_t bitmap_bit = i % 8; - - char* entry = co_await cache_entry_async(io, bitmap_byte); - - unsigned char b = entry[bitmap_byte % bitmap_block_size]; - - bool has_bit = ((b & (1 << (7 - bitmap_bit))) > 0); - - co_return has_bit; - } -#endif - - bool flush() - { - while(!cache.empty()) - { - evict_one(); - } - - return true; - } - -#ifdef HAS_ASYNC - fuse_io_context::io_uring_task flush_async(fuse_io_context& io) - { - while (!cache.empty()) - { - if (!co_await evict_one_async(io)) - { - co_return; - } - } - } -#endif - - size_t count_bits_old() - { - size_t set_count = 0; - for (int64 i = 0; i < total_size;++i) - { - if (get(i)) - { - ++set_count; - } - } - return set_count; - } - - size_t count_bits() - { - size_t set_count = 0; - for(int64 i=0;ibitmap_assert_limit || set_count == count_bits_old()); - - return set_count; - } - - bool get_range_old( int64 start, int64 end ) - { - for(;start get_range_old_async(fuse_io_context& io, int64 start, int64 end) - { - for (; start < end; ++start) - { - if (co_await get_async(io, start)) - { - co_return true; - } - } - co_return false; - } -#endif - - bool get_range( int64 start, int64 end ) - { - if(end-start<=8) - { - return get_range_old(start, end); - } - - int64 orig_start = start; - - for(;startbitmap_assert_limit || get_range_old(orig_start, end)); - return true; - } - } - - for(;start+8bitmap_assert_limit || get_range_old(orig_start, end)); - return true; - } - start+=8; - ++ptr; - } - } - - for(;startbitmap_assert_limit || get_range_old(orig_start, end)); - return true; - } - } - - assert(total_size>bitmap_assert_limit || !get_range_old(orig_start, end)); - return false; - } - -#ifdef HAS_ASYNC - fuse_io_context::io_uring_task get_range_async(fuse_io_context& io, int64 start, int64 end) - { - if (end - start <= 8) - { - co_return co_await get_range_old_async(io, start, end); - } - - int64 orig_start = start; - - for (; start < end && start % 8 != 0; ++start) - { - if (co_await get_async(io, start)) - { - co_return true; - } - } - - for (; start + 8 < end;) - { - auto [ptr, bsize] = co_await getPtrAsync(io, start); - char* eptr = ptr + bsize; - for (; ptr < eptr && start + 8 < end;) - { - if (*ptr != 0) - { - co_return true; - } - start += 8; - ++ptr; - } - } - - for (; start < end; ++start) - { - if (co_await get_async(io, start)) - { - co_return true; - } - } - - co_return false; - } -#endif - - size_t meminfo() - { - return cache.size()*bitmap_block_size; - } - - size_t get_max_cache_items() - { - return bitmap_cache_items; - } - - void clear_cache_no_flush() - { - while (!cache.empty()) - { - std::pair evicted = cache.evict_one(); - delete[] evicted.second; - } - } - - private: - - char* cache_entry(size_t bitmap_byte) - { - size_t block = bitmap_byte/bitmap_block_size; - - char** res = cache.get(block); - - if(res!=nullptr) - { - return *res; - } - - return fill_cache(block); - } - -#ifdef HAS_ASYNC - fuse_io_context::io_uring_task cache_entry_async(fuse_io_context& io, size_t bitmap_byte) - { - size_t block = bitmap_byte / bitmap_block_size; - - char** res = cache.get(block); - - if (res != NULL) - { - co_return *res; - } - - co_return co_await fill_cache_async(io, block); - } -#endif - - char* fill_cache(size_t block) - { - char* np = new char[bitmap_block_size]; - - backing_file->Seek(block*bitmap_block_size); - - if(backing_file->Read(np, bitmap_block_size)!=bitmap_block_size) - { - throw std::runtime_error("Error reading from backing bitmap file"); - } - - cache.put(block, np); - - evict_from_cache(); - - return np; - } - -#ifdef HAS_ASYNC - struct BlockAwaitersCo - { - BlockAwaitersCo* next; - std::coroutine_handle<> awaiter; - }; - - BlockAwaitersCo* block_awaiters_head = nullptr; - - struct BlockAwaiters - { - BlockAwaiters(SparseFileBitmap& bitmap) - : bitmap(bitmap) {} - BlockAwaiters(BlockAwaiters const&) = delete; - BlockAwaiters(BlockAwaiters&& other) = delete; - BlockAwaiters& operator=(BlockAwaiters&&) = delete; - BlockAwaiters& operator=(BlockAwaiters const&) = delete; - - bool await_ready() const noexcept - { - return false; - } - - void await_suspend(std::coroutine_handle<> p_awaiter) noexcept - { - awaiter.awaiter = p_awaiter; - awaiter.next = bitmap.block_awaiters_head; - bitmap.block_awaiters_head = &awaiter; - } - - void await_resume() const noexcept - { - } - - private: - BlockAwaitersCo awaiter; - SparseFileBitmap& bitmap; - }; - - fuse_io_context::io_uring_task fill_cache_async(fuse_io_context& io, size_t block) - { - if (async_retrieval.find(block) != async_retrieval.end()) - { - while (async_retrieval.find(block) != async_retrieval.end()) - { - co_await BlockAwaiters(*this); - } - - char** op = cache.get(block); - - if (op != nullptr) - co_return *op; - } - - async_retrieval.insert(block); - - char* np = new char[bitmap_block_size]; - - io_uring_sqe* sqe = io.get_sqe(); - - io_uring_prep_read(sqe, get_file_fd(backing_file->getOsHandle()), np, - bitmap_block_size, block * bitmap_block_size); - - int rc = co_await io.complete(sqe); - - if (rc < 0 || rc != bitmap_block_size) - { - Server->Log("Error reading from backing bitmap file rc="+convert(rc), LL_ERROR); - throw std::runtime_error("Error reading from backing bitmap file"); - } - -#ifndef NDEBUG - char** op = cache.get(block); - assert(op == nullptr); -#endif - cache.put(block, np); - - async_retrieval.erase(block); - - bool has_skip_evict = false; - bool can_evict = false; - if (async_evict_skip_block == std::string::npos) - { - async_evict_skip_block = block; - can_evict = true; - } - else if (block_awaiters_head!=nullptr) - { - skip_evict_blocks.insert(block); - has_skip_evict = true; - } - - resume_retrieval_waiters(); - - if (can_evict) - { - co_await evict_from_cache_async(io); - async_evict_skip_block = std::string::npos; - } - - if (has_skip_evict) - { - skip_evict_blocks.erase(block); - } - - co_return np; - } - - void resume_retrieval_waiters() - { - BlockAwaitersCo* block_curr = block_awaiters_head; - block_awaiters_head = nullptr; - - while (block_curr != nullptr) - { - BlockAwaitersCo* next = block_curr->next; - block_curr->awaiter.resume(); - block_curr = next; - } - } -#endif - - void evict_one() - { - std::pair evicted = cache.evict_one(); - - while(backing_file->Write((int64)evicted.first*bitmap_block_size, - reinterpret_cast(evicted.second), - static_cast<_u32>(bitmap_block_size))!=bitmap_block_size) - { - Server->Log("Error writing to backing bitmap file. Retrying...", LL_WARNING); - Server->wait(1000); - } - - delete[] evicted.second; - } - -#ifdef HAS_ASYNC - fuse_io_context::io_uring_task evict_one_async(fuse_io_context& io) - { - std::pair evicted = cache.evict_one(); - - if (evicted.first == async_evict_skip_block - || skip_evict_blocks.find(evicted.first) != skip_evict_blocks.end()) - { - cache.put(evicted.first, evicted.second); - co_return false; - } - - async_retrieval.insert(evicted.first); - - int rc; - do - { - io_uring_sqe* sqe = io.get_sqe(); - - io_uring_prep_write(sqe, get_file_fd(backing_file->getOsHandle()), - evicted.second, bitmap_block_size, evicted.first * bitmap_block_size); - - rc = co_await io.complete(sqe); - - if (rc < 0 || rc != bitmap_block_size) - { - Server->Log("Error writing to backing bitmap file. Retrying...", LL_WARNING); - co_await io.sleep_ms(1000); - } - - } while (rc < 0 || rc != bitmap_block_size); - - delete[] evicted.second; - async_retrieval.erase(evicted.first); - resume_retrieval_waiters(); - - co_return true; - } -#endif - - void evict_from_cache() - { - while(cache.size()>bitmap_cache_items) - { - evict_one(); - } - } - -#ifdef HAS_ASYNC - fuse_io_context::io_uring_task evict_from_cache_async(fuse_io_context& io) - { - while (cache.size() > bitmap_cache_items) - { - if (!co_await evict_one_async(io)) - { - co_return; - } - } - } -#endif - - IFsFile* backing_file; - size_t bitmap_size; - int64 total_size; - common::lrucache cache; - size_t bitmap_cache_items; - std::set async_retrieval; - size_t async_evict_skip_block; - std::set skip_evict_blocks; - }; - - class PreloadThread : public IThread - { - IPipe* pipe; - CloudFile* cf; - public: - PreloadThread(IPipe* pipe, CloudFile* cf) - : pipe(pipe), cf(cf) {} - - void operator()() - { - std::string msg; - char buffer[block_size]; - while (pipe->Read(&msg)) - { - if (msg.empty()) - { - pipe->Write(msg); - break; - } - - CRData data(msg.data(), msg.size()); - int64 pos; - data.getInt64(&pos); - - cf->Read(pos, buffer, block_size); - } - - delete this; - } - }; - - class UpdateMissingChunksThread : public IThread - { - CloudFile* cf; - bool do_stop; - std::unique_ptr mutex; - std::unique_ptr cond; - public: - UpdateMissingChunksThread(CloudFile* cf) - : mutex(Server->createMutex()), - cond(Server->createCondition()), - do_stop(false), cf(cf) {} - void operator()() - { - IScopedLock lock(mutex.get()); - bool no_missing = true; - while (!do_stop) - { - if (no_missing) - { - cond->wait(&lock); - } - - if (do_stop) - break; - - int64 starttime = Server->getTimeMS(); - while (Server->getTimeMS() - starttime < 10000 - && !do_stop) - cond->wait(&lock, 1000); - - if (do_stop) - break; - - no_missing = cf->update_missing_fs_chunks(); - } - } - - void stop() { - IScopedLock lock(mutex.get()); - do_stop = true; - cond->notify_all(); - } - - void notify() { - cond->notify_all(); - } - }; - - class MigrationCoordThread : public IThread - { - bool has_error; - CloudFile* cf; - std::string conf_fn; - bool continue_migration; - public: - MigrationCoordThread(CloudFile* cf, std::string conf_fn, bool continue_migration) - : has_error(false), cf(cf), conf_fn(conf_fn), continue_migration(continue_migration) {} - void operator()() - { - if (!cf->migrate(conf_fn, continue_migration)) - has_error = true; - - delete this; - } - }; - - class ShareWithUpdater : public IThread - { - int64 get_mount_total_space(const std::string& mount) - { - int64 total_size; - if (os_directory_exists(mount) - && (total_size = os_total_space(mount))>0) - { - std::unique_ptr zero_shrink_file(Server->openFile(ExtractFilePath(mount) + os_file_sep() + "zero.shrink.file", MODE_READ)); - if (zero_shrink_file.get() != nullptr) - { - total_size -= zero_shrink_file->Size(); - } - - return total_size; - } - - return -1; - } - - public: - ShareWithUpdater(std::string own_mount, const std::string& mounts, TransactionalKvStore& kv_store) - :own_mount(std::move(own_mount)), kv_store(kv_store), do_stop(false), - stop_cond(Server->createCondition()), stop_mutex(Server->createMutex()) - { - Tokenize(mounts, share_mounts, ";"); - } - - void operator()() - { - IScopedLock lock(stop_mutex.get()); - while (!do_stop) - { - stop_cond->wait(&lock, 10 * 60 * 1000); - - if (do_stop) - break; - - lock.relock(nullptr); - - if (!is_automount_finished()) - { - lock.relock(stop_mutex.get()); - continue; - } - - int64 own_total_space = get_mount_total_space(own_mount); - - if (own_total_space <= 0) - continue; - - - int64 sum_total_space = 0; - for (const std::string& mount : share_mounts) - { - int64 total_space = get_mount_total_space(mount); - - if (total_space <= 0) - continue; - - sum_total_space += total_space; - } - - double prop = static_cast(own_total_space) / (sum_total_space + own_total_space); - - int64 cache_total_space = kv_store.cache_total_space(); - - if (cache_total_space > 0) - { - int64 new_max_cachesize = static_cast(static_cast(cache_total_space)*prop); - Server->Log("Setting max cachesize to " + PrettyPrintBytes(new_max_cachesize) + " cause own file system size " + PrettyPrintBytes(own_total_space) + " other file system size " + PrettyPrintBytes(sum_total_space), LL_INFO); - kv_store.set_max_cachesize(new_max_cachesize); - } - - lock.relock(stop_mutex.get()); - } - } - private: - std::vector share_mounts; - std::string own_mount; - TransactionalKvStore& kv_store; - std::unique_ptr stop_cond; - std::unique_ptr stop_mutex; - bool do_stop; - }; - - bool syncDir(const std::string& fp) - { -#ifndef _WIN32 - int fd = open(fp.c_str(), O_RDONLY | O_CLOEXEC); - if (fd == -1) - return false; - - if (fsync(fd) != 0) - { - close(fd); - return false; - } - - close(fd); - return true; -#else - return false; -#endif - } -} // namespace {} - -std::string getCdInterfacePath(); - -void setMountStatus(const std::string& data) -{ -#ifdef _WIN32 - writestring(data, getCdInterfacePath() + "/mount.status"); -#else - int fd = open((getCdInterfacePath() + "/mount.status").c_str(), O_CREAT|O_WRONLY|O_CLOEXEC, S_IRWXU | S_IRGRP | S_IROTH); - if (fd != -1) - { - flock(fd, LOCK_EX); - ftruncate(fd, 0); - write(fd, data.data(), data.size()); - flock(fd, LOCK_UN); - close(fd); - } -#endif -} - -void setMountStatusErr(const std::string & err) -{ - std::string last_logs = extractLastLogErrors(5, std::string(), true); - std::vector lines; - Tokenize(last_logs, lines, "\n"); - - JSON::Object jerr; - jerr.set("state", "error"); - jerr.set("err", err); - - JSON::Array jlast_logs; - for (std::string& line : lines) - jlast_logs.add(line); - - jerr.set("last_logs", jlast_logs); - - setMountStatus(jerr.stringify(true)); -} - - -CloudFile::CloudFile(const std::string& cache_path, - IBackupFileSystem* cachefs, - int64 p_cloudfile_size, - int64 max_cloudfile_size, - IOnlineKvStore* online_kv_store, - const std::string& encryption_key, - ICompressEncryptFactory* compress_encrypt_factory, bool verify_cache, - float cpu_multiplier, - bool background_compress, - size_t no_compress_mult, - int64 reserved_cache_device_space, - int64 min_metadata_cache_free, - bool with_prev_link, - bool allow_evict, - bool with_submitted_files, - float resubmit_compressed_ratio, - int64 max_memfile_size, - std::string memcache_path, - float memory_usage_factor, - bool only_memfiles, - std::string mount_path, - std::string share_with_mount_paths, - unsigned int background_comp_method, - const std::string& slog_path, - int64 slog_max_size, - bool is_async) - : online_kv_store(online_kv_store), - kv_store(cachefs, min_cachesize, min_free_size + reserved_cache_device_space, - critical_free_size + reserved_cache_device_space, throttle_free_size + reserved_cache_device_space, - min_metadata_cache_free, background_compress ? comp_percent : 0, comp_start_limit, - online_kv_store, encryption_key, compress_encrypt_factory, verify_cache, - cpu_multiplier, no_compress_mult, with_prev_link, allow_evict, with_submitted_files, - resubmit_compressed_ratio, max_memfile_size, memcache_path, memory_usage_factor, only_memfiles, - background_comp_method), - cf_pos(0), active_big_block(-1), - last_stat_update_time(0), used_bytes(0), last_flush_check(0), - mutex(Server->createMutex()), - new_big_blocks_bitmap_file(nullptr), - cloudfile_size(p_cloudfile_size), - writeback_count(0), io_alignment(512), - locked_extents_max_alive(0), exit_thread(false), thread_cond(Server->createCondition()), - wait_for_exclusive(0), memory_usage_factor(memory_usage_factor), bitmaps_file_size(0), - flush_enabled(0), cache_path(cache_path), - chunks_mutex(Server->createSharedMutex()), - last_fs_chunks_update(0), updating_fs_chunks(false), - update_missing_chunks_thread_ticket(ILLEGAL_THREADPOOL_TICKET), - migrate_to_cf(nullptr), - migration_ticket(ILLEGAL_THREADPOOL_TICKET), - in_write_retrieval_cond(Server->createCondition()), - read_bytes(0), - written_bytes(0), - slog_path(slog_path), slog_max_size(slog_max_size), - enable_raid_freespace_stats(false), - is_flushing(false), - is_async(is_async), - msg_queue_mutex(Server->createMutex()), - msg_queue_id(0), - msg_queue_cond(Server->createCondition()), - fracture_big_blogs_ticket(ILLEGAL_THREADPOOL_TICKET), - cachefs(cachefs) -{ - setMountStatus("{\"state\": \"get_size\"}"); - - IFile* f_cloudfile_size = kv_store.get("cloudfile_size", TransactionalKvStore::BitmapInfo::Unknown, - TransactionalKvStore::Flag::disable_fd_cache|TransactionalKvStore::Flag::disable_throttling, -1); - - if (f_cloudfile_size == nullptr) - { - throw std::runtime_error("Cannot open cloudfile_size"); - } - - -#ifdef HAS_ASYNC - zero_file.reset(Server->openTemporaryFile()); - if (zero_file.get() == nullptr) - { - throw std::runtime_error("Error opening /dev/zero. " + os_last_error_str()); - } - if(!zero_file->Resize(100*1024*1024)) - { - throw std::runtime_error("Error resizing /dev/zero. " + os_last_error_str()); - } -#endif //HAS_ASYNC - - /*zero_file.reset(Server->openFile("/dev/zero", MODE_RW)); - if (zero_file.get() == nullptr) - { - throw std::runtime_error("Error opening /dev/zero. " + os_last_error_str()); - }*/ - -#ifdef HAS_ASYNC - null_file.reset(Server->openFile("/dev/null", MODE_RW)); - if (null_file.get()==nullptr) - { - throw std::runtime_error("Error opening /dev/null. " + os_last_error_str()); - } - - queue_in_eventfd = eventfd(0, EFD_CLOEXEC); - if (queue_in_eventfd == -1) - { - throw std::runtime_error("Error creating queue in eventfd. " + os_last_error_str()); - } - - update_fs_chunks_eventfd = eventfd(0, EFD_CLOEXEC); - if (update_fs_chunks_eventfd == -1) - { - throw std::runtime_error("Error creating update_fs_chunks_eventfd. " + os_last_error_str()); - } -#endif - - int64 read_cloudfile_size; - bool write_new = false; - if (f_cloudfile_size->Read(0, reinterpret_cast(&read_cloudfile_size), sizeof(read_cloudfile_size)) == sizeof(read_cloudfile_size)) - { - if (cloudfile_size != -1) - { - if (read_cloudfile_size != cloudfile_size) - { - write_new = true; - } - - if (read_cloudfile_size > cloudfile_size) - { - Server->Log("Cloud drive size is bigger than configured size. Configured=" + PrettyPrintBytes(cloudfile_size) + " actual=" + PrettyPrintBytes(read_cloudfile_size) + ". Using actual size.", LL_WARNING); - cloudfile_size = read_cloudfile_size; - } - } - else - { - cloudfile_size = read_cloudfile_size; - } - } - else - { - write_new = true; - } - - if (write_new) - { -#ifdef HAS_LOCAL_BACKEND - if (cloudfile_size == -1) - { - auto frontend = dynamic_cast(online_kv_store); - if (frontend != nullptr) - { - auto local = dynamic_cast(frontend->getBackend()); - if (local != nullptr) - { - int64 free_space; - local->get_space_info(cloudfile_size, free_space); - } - } - } -#endif - - if (max_cloudfile_size > 0) - { - cloudfile_size = (std::min)(cloudfile_size, max_cloudfile_size); - } - - if (f_cloudfile_size->Write(0, reinterpret_cast(&cloudfile_size), sizeof(cloudfile_size)) != sizeof(cloudfile_size)) - { - Server->Log("Error writing new cloudfile size. " + os_last_error_str(), LL_ERROR); - } - } - - kv_store.release("cloudfile_size"); - - setMountStatus("{\"state\": \"open_bitmaps\"}"); - - open_bitmaps(); - - setMountStatus("{\"state\": \"delete_objects\"}"); - - bool submit_need_flush = false; - if (online_kv_store->submit_del(this, kv_store.get_transid(), submit_need_flush) && - submit_need_flush) - { - if (!close_bitmaps()) - throw std::runtime_error("Error closing bitmaps after submit_del"); - - if (!kv_store.checkpoint(true, 0)) - throw std::runtime_error("Error checkpointing after submit_del"); - - open_bitmaps(); - - online_kv_store->submit_del_post_flush(); - } - - update_missing_chunks_thread.reset(new UpdateMissingChunksThread(this)); - update_missing_chunks_thread_ticket = Server->getThreadPool()->execute(update_missing_chunks_thread.get(), "mchunk update"); - - - IFsFile* migration_settings_f = kv_store.get("clouddrive_migration_settings", TransactionalKvStore::BitmapInfo::Unknown, - TransactionalKvStore::Flag::disable_fd_cache | TransactionalKvStore::Flag::disable_throttling, -1); - - if (migration_settings_f != nullptr - && migration_settings_f->Size()>0) - { - setMountStatus("{\"state\": \"continue_migration\"}"); - - Server->Log("Continuing cloud drive migration", LL_WARNING); - - std::unique_ptr tmpf(Server->openTemporaryFile()); - std::string tmpfn = tmpf->getFilename(); - - if (tmpf != nullptr - && copy_file(migration_settings_f, tmpf.get())) - { - tmpf.reset(); - - migration_ticket = Server->getThreadPool()->execute(new MigrationCoordThread(this, tmpfn, true), "migration coord"); - } - kv_store.release("clouddrive_migration_settings"); - } - else if (migration_settings_f != nullptr) - { - kv_store.release("clouddrive_migration_settings"); - kv_store.del("clouddrive_migration_settings"); - } - else - { - kv_store.release("clouddrive_migration_settings"); - } - - if (!share_with_mount_paths.empty()) - { - share_with_updater.reset(new ShareWithUpdater(mount_path, share_with_mount_paths, kv_store)); - share_with_updater_ticket = Server->getThreadPool()->execute(share_with_updater.get(), "share updt"); - } - - setMountStatus("{\"state\": \"ready\"}"); - - if (FileExists("/var/urbackup/no_fallocate_block_file")) - { - fallocate_block_file = false; - } -} - -CloudFile::~CloudFile() -{ - if (update_missing_chunks_thread.get() != nullptr) - update_missing_chunks_thread->stop(); - - kv_store.set_num_second_chances_callback(nullptr); - - { - IScopedLock lock(mutex.get()); - exit_thread = true; - thread_cond->notify_all(); - } - - Server->getThreadPool()->waitFor(fracture_big_blogs_ticket); - Server->getThreadPool()->waitFor(update_missing_chunks_thread_ticket); - - if (migration_ticket != ILLEGAL_THREADPOOL_TICKET) - Server->getThreadPool()->waitFor(migration_ticket); -} - -std::string CloudFile::Read( _u32 tr, bool* has_error) -{ - std::string ret; - ret.resize(tr); - - _u32 read = Read(&ret[0], tr, has_error); - - if(read==0) - { - return std::string(); - } - - ret.resize(read); - return ret; -} - -std::string CloudFile::Read( int64 pos, _u32 tr, bool* has_error) -{ - std::string ret; - ret.resize(tr); - - _u32 read = Read(pos, &ret[0], tr, has_error); - - if(read==0) - { - return std::string(); - } - - ret.resize(read); - return ret; -} - -_u32 CloudFile::Read(char* buffer, _u32 bsize, bool* has_error) -{ - _u32 ret = Read(cf_pos, buffer, bsize, has_error); - cf_pos+=ret; - return ret; -} - -_u32 CloudFile::Read(int64 pos, char* buffer, _u32 bsize, bool* has_error) -{ - return ReadInt(pos, buffer, bsize, nullptr, - TransactionalKvStore::Flag::prioritize_read| - TransactionalKvStore::Flag::read_random, has_error); -} - -#ifdef HAS_ASYNC -fuse_io_context::io_uring_task CloudFile::ReadAsync(fuse_io_context& io, fuse_io_context::FuseIo& fuse_io, - int64 pos, _u32 bsize, unsigned int flags, bool ext_lock) -{ - fuse_in_header* fheader = reinterpret_cast(fuse_io.header_buf); - fuse_out_header* out_header = reinterpret_cast(fuse_io.scratch_buf); - out_header->error = 0; - out_header->len = sizeof(fuse_out_header) + bsize; - out_header->unique = fheader->unique; - - if(pos>cloudfile_size) - { - Server->Log("Read position " + convert(pos) + " bigger than cloud drive file " + convert(cloudfile_size), LL_INFO); - - std::vector read_tasks; - read_tasks.push_back(ReadTask(get_file_fd(zero_file->getOsHandle()), zero_file.get(), std::string(), 0, bsize)); - - int ret = co_await complete_read_tasks(io, fuse_io, read_tasks, 0, - pos, bsize, flags); - - co_return ret; - } - - EXTENSIVE_DEBUG_LOG(Server->Log("Reading " + convert(bsize) + " bytes at pos " + convert(pos), LL_DEBUG);) - - EXTENSIVE_DEBUG_LOG(int64 starttime = Server->getTimeMS();) - - int64 target = pos + bsize; - - if (target>cloudfile_size) - { - target = cloudfile_size; - bsize = static_cast<_u32>(target - pos); - } - - size_t lock_idx; - int64 orig_pos = pos; - if(!ext_lock) - { - lock_idx = co_await lock_extent_async(orig_pos, bsize, false); - } - - std::vector read_tasks; - - size_t flush_mem_n_tasks = 0; - - bool io_error = false; - while(posget_async(io, pos/block_size); - - if(!has_block) - { - int64 tozero = (std::min)((block_size - pos%block_size), target-pos); - if (!read_tasks.empty() && read_tasks.back().file == zero_file.get()) - { - read_tasks.back().size += tozero; - } - else - { - read_tasks.push_back(ReadTask(get_file_fd(zero_file->getOsHandle()), zero_file.get(), std::string(), 0, static_cast(tozero))); - } - Server->Log("Adding " + convert(tozero) + " zero bytes at " + convert(pos), LL_INFO); - EXTENSIVE_DEBUG_LOG(Server->Log("Adding "+convert(tozero)+" zero bytes at "+convert(pos), LL_DEBUG);) - pos+=tozero; - continue; - } - - //TODO: FIXME: Currently it reads the whole block without checking the bitmap further - //(and returning zeroes), i.e. discard_zeroes_data=0 - - bool in_big_block = co_await big_blocks_bitmap->get_async(io, pos/big_block_size); - - int64 curr_block_size; - std::string key; - if(in_big_block) - { - curr_block_size = big_block_size; - key = big_block_key(pos/curr_block_size); - } - else - { - curr_block_size = small_block_size; - key = small_block_key(pos/curr_block_size); - } - - int64 toread = (std::min)((curr_block_size - pos%curr_block_size), target-pos); - - unsigned int curr_flags = flags; - if (co_await is_metadata_async(io, pos, key)) - { - curr_flags |= TransactionalKvStore::Flag::disable_memfiles; - } - - IFsFile* block = co_await kv_store.get_async(io, key, TransactionalKvStore::BitmapInfo::Present, - curr_flags|TransactionalKvStore::Flag::read_only, curr_block_size); - - bool has_read_error; - if(block!=NULL) - { - int64 block_pos = pos % curr_block_size; - read_tasks.push_back(ReadTask(get_file_fd(block->getOsHandle()), block, key, block_pos, static_cast<_u32>(toread))); - - if (block->getFilename()!=key) - { - read_tasks.back().flush_mem = true; - ++flush_mem_n_tasks; - } - } - else - { - addSystemEvent("cache_err", - "Error reading", - "Cannot get block for volume offset " + convert(pos) + "" - " at block positition " + convert(pos % curr_block_size) + " len " + convert(toread) + ". ", LL_ERROR); - io_error = true; - break; - } - - pos+= toread; - read_bytes += toread; - } - - if (io_error) - { - if (!ext_lock) - { - unlock_extent_async(orig_pos, bsize, false, lock_idx); - } - - out_header->error = -EIO; - out_header->len = sizeof(fuse_out_header); - - io_uring_sqe* header_sqe = io.get_sqe(2); - io_uring_prep_write_fixed(header_sqe, fuse_io.pipe[1], - fuse_io.scratch_buf, sizeof(fuse_out_header), - -1, fuse_io.scratch_buf_idx); - header_sqe->flags |= IOSQE_FIXED_FILE | IOSQE_IO_LINK; - - io_uring_sqe* splice_sqe = io.get_sqe(); - io_uring_prep_splice(splice_sqe, fuse_io.pipe[0], - -1, io.fuse_ring.fd, -1, out_header->len, - SPLICE_F_FD_IN_FIXED | SPLICE_F_NONBLOCK); - splice_sqe->flags |= IOSQE_FIXED_FILE; - - auto [rc1, rc2] = co_await io.complete(std::make_pair(header_sqe, splice_sqe)); - - if (rc1 < 0) - { - co_return -1; - } - - if (rc2 < 0 || rc2 < out_header->len) - { - co_return -1; - } - - co_return 0; - } - - if (read_tasks.empty()) - { - Server->Log("No read_tasks in ReadAsync", LL_WARNING); - abort(); - co_return -1; - } - - //Server->Log("flush_mem_n_tasks=" + convert(flush_mem_n_tasks)); - - int ret = co_await complete_read_tasks(io, fuse_io, read_tasks, flush_mem_n_tasks, - orig_pos, bsize, flags); - //Maybe needs loop for short splice - - for (ReadTask& task : read_tasks) - { - if(!task.key.empty()) - co_await kv_store.release_async(io, task.key); - } - - if(!ext_lock) - { - unlock_extent_async(orig_pos, bsize, false, lock_idx); - } - - EXTENSIVE_DEBUG_LOG(Server->Log("Done. " + convert(Server->getTimeMS() - starttime) + "ms", LL_DEBUG);) - - co_return ret; -} -#endif //HAS_ASYNC - -_u32 CloudFile::ReadInt(int64 pos, char* buffer, _u32 bsize, IScopedLock* ext_lock, - unsigned int flags, bool* has_error) -{ - if (is_async) - abort(); - - IScopedLock lock(nullptr); - - if(ext_lock==nullptr) - { - lock.relock(mutex.get()); - } - - if (pos > cloudfile_size) - { - Server->Log("Read position " + convert(pos) + " bigger than cloud drive file " + convert(cloudfile_size), LL_INFO); - memset(buffer, 0, bsize); - return bsize; - } - - EXTENSIVE_DEBUG_LOG(Server->Log("Reading " + convert(bsize) + " bytes at pos " + convert(pos), LL_DEBUG);) - - EXTENSIVE_DEBUG_LOG(int64 starttime = Server->getTimeMS();) - - int64 target = pos + bsize; - - if (target>cloudfile_size) - { - target = cloudfile_size; - bsize = static_cast<_u32>(target - pos); - } - - int64 orig_pos=pos; - if(ext_lock==nullptr) - { - lock_extent(lock, orig_pos, bsize, false); - } - - while(posget(pos/block_size); - - if(!has_block) - { - int64 tozero = (std::min)((block_size - pos%block_size), target-pos); - memset(buffer, 0, static_cast(tozero)); - Server->Log("Adding " + convert(tozero) + " zero bytes at " + convert(pos), LL_INFO); - EXTENSIVE_DEBUG_LOG(Server->Log("Adding "+convert(tozero)+" zero bytes at "+convert(pos), LL_DEBUG);) - buffer+=tozero; - pos+=tozero; - continue; - } - - bool in_big_block = big_blocks_bitmap->get(pos/big_block_size); - - int64 curr_block_size; - std::string key; - if(in_big_block) - { - curr_block_size = big_block_size; - key = big_block_key(pos/curr_block_size); - } - else - { - curr_block_size = small_block_size; - key = small_block_key(pos/curr_block_size); - } - - int64 toread = (std::min)((curr_block_size - pos%curr_block_size), target-pos); - - if (ext_lock == nullptr) - { - lock.relock(nullptr); - } - else - { - ext_lock->relock(nullptr); - } - - unsigned int curr_flags = flags; - if (is_metadata(pos, key)) - { - curr_flags |= TransactionalKvStore::Flag::disable_memfiles; - } - - IFile* block = kv_store.get(key, TransactionalKvStore::BitmapInfo::Present, - curr_flags |TransactionalKvStore::Flag::read_only, curr_block_size); - - _u32 read=0; - - bool has_read_error; - if(block!=nullptr) - { - _u32 last_read; - do - { - int64 block_pos = pos%curr_block_size + read; - /* If Direct-IO is enabled TODO: Fix alignment of buffer - if (block_pos%io_alignment == 0 - && toread%io_alignment == 0) - { - last_read = block->Read(block_pos, buffer+read, static_cast<_u32>(toread)-read, &has_read_error); - } - else - { - last_read = ReadAligned(block, block_pos, buffer+read, static_cast<_u32>(toread)-read, &has_read_error); - }*/ - - has_read_error = false; - last_read = block->Read(block_pos, buffer + read, static_cast<_u32>(toread) - read, &has_read_error); - read += last_read; - - if (read < toread) - { - Server->Log("Short read ("+convert(read)+"/"+convert(toread)+") while reading from file " + block->getFilename() + " (size "+convert(block->Size())+")" - " at positition " + convert(pos%curr_block_size) + " block device pos " + convert(pos) + " len " + convert(toread) + " errno "+convert((int64)errno)+". Continuing...", LL_WARNING); - } - - } while (!has_read_error && read 0); - } - - if (has_read_error) - { - std::string syserr = os_last_error_str(); - Server->Log("Read error while reading from file "+block->getFilename()+"" - " at positition "+convert(pos%curr_block_size)+" block device pos "+convert(pos)+" len "+convert(toread)+". " - + syserr, LL_ERROR); - - addSystemEvent("cache_err", - "Error reading from block file on cache", - "Read error while reading from file " + block->getFilename() + "" - " at positition " + convert(pos%curr_block_size) + " block device pos " + convert(pos) + " len " + convert(toread) + ". " - + syserr, LL_ERROR); - } - - - if(readLog("Adding " + convert(toread - read) + " zero bytes at " + convert(pos+read)+" (2)", LL_INFO); - memset(buffer+read, 0, static_cast(toread-read)); - read=static_cast<_u32>(toread); - } - - if(block!=nullptr) - { - kv_store.release(key); - } - - if(read!=toread) - { - if (has_error) *has_error = true; - Server->Log("Error reading from block file (read only "+convert(read)+" of "+convert(toread)+" bytes)", LL_ERROR); - if (!has_read_error) - { - addSystemEvent("cache_err", - "Error reading from block file on cache", - "Error reading from block file at " + cachefs->getName() + " (read only " + convert(read) + " of " + convert(toread) + " bytes)", LL_ERROR); - } - if (ext_lock==nullptr) - { - lock.relock(mutex.get()); - unlock_extent(lock, orig_pos, bsize, false); - } - else - { - ext_lock->relock(mutex.get()); - } - pos += read; - return static_cast<_u32>(pos-orig_pos); - } - - - buffer+=read; - pos+=read; - read_bytes += read; - - if(ext_lock==nullptr) - { - lock.relock(mutex.get()); - } - else - { - ext_lock->relock(mutex.get()); - } - } - - if(ext_lock==nullptr) - { - unlock_extent(lock, orig_pos, bsize, false); - } - - EXTENSIVE_DEBUG_LOG(Server->Log("Done. " + convert(Server->getTimeMS() - starttime) + "ms", LL_DEBUG);) - - return bsize; -} - -_u32 CloudFile::ReadAligned(IFile* block, int64 pos, char* buffer, _u32 toread, bool* has_read_error) -{ - EXTENSIVE_DEBUG_LOG(Server->Log("Fixing alignment for read at pos "+convert(pos)+" size "+convert(toread), LL_DEBUG);) - - int64 startpos = (pos/io_alignment)*io_alignment; - _u32 alignstartadd = static_cast<_u32>(pos-startpos); - _u32 alignendadd = io_alignment - (toread+alignstartadd)%io_alignment; - _u32 alignread = toread + alignstartadd + alignendadd; - char* buf = reinterpret_cast(aligned_alloc(io_alignment, alignread) ); - _u32 r = block->Read(startpos, buf, alignread, has_read_error); - - if(rtoread) - { - r=toread; - } - - memcpy(buffer, buf+alignstartadd, r); - free(buf); - - return r; -} - -_u32 CloudFile::Write( const std::string &tw, bool* has_error) -{ - return Write(tw.c_str(), static_cast<_u32>(tw.size()), has_error); -} - -_u32 CloudFile::Write(int64 pos, const std::string &tw, bool* has_error) -{ - return Write(pos, tw.c_str(), static_cast<_u32>(tw.size()), has_error); -} - -_u32 CloudFile::Write( const char* buffer, _u32 bsize, bool* has_error) -{ - _u32 ret = WriteInt(cf_pos, buffer, bsize, false, nullptr, true, has_error); - cf_pos += ret; - return ret; -} - -_u32 CloudFile::Write( int64 pos, const char* buffer, _u32 bsize, bool* has_error) -{ - _u32 rc = WriteInt(pos, buffer, bsize, false, nullptr, true, has_error); - return rc; -} - -_u32 CloudFile::WriteNonBlocking( int64 pos, const char* buffer, _u32 bsize, bool* has_error) -{ - EXTENSIVE_DEBUG_LOG(Server->Log("Non-blocking write at pos "+convert(pos)+" size "+convert(bsize));) - _u32 rc = WriteInt(pos, buffer, bsize, false, nullptr, false, has_error); - return rc; -} - -namespace -{ - class PreloadItemsThread : public IThread - { - CloudFile& cf; - IPipe* pipe; - int tag; - bool disable_memfiles; - bool load_only; - public: - PreloadItemsThread(CloudFile& cf, IPipe* pipe, int tag, bool disable_memfiles, bool load_only) - : cf(cf), pipe(pipe), tag(tag), disable_memfiles(disable_memfiles), - load_only(load_only) {} - - void operator()() - { - while (true) - { - std::string data; - pipe->Read(&data); - - if (data.empty()) - { - pipe->Write(data); - break; - } - - CRData rdata(data.data(), data.size()); - - std::string key; - int64 offset, len; - if (rdata.getStr2(&key) - && rdata.getVarInt(&offset) - && rdata.getVarInt(&len)) - { - cf.preload_key(key, offset, len, tag, disable_memfiles, load_only); - } - } - - delete this; - } - }; - - class UpdateFsChunksThread : public IThread - { - std::string path; - int notify_eventfd; - IBackupFileSystem* fs; - public: - struct SOutput - { - std::vector chunks; - std::atomic done; - }; - - UpdateFsChunksThread(IBackupFileSystem* fs, - std::shared_ptr output, - std::string path, int notify_eventfd) : - fs(fs), output(output), path(path), - notify_eventfd(notify_eventfd) {} - - void operator()() { -#ifdef HAS_MOUNT_SERVICE - if (!btrfs_get_chunks_via_service(path, output->chunks)) - { - output->chunks = btrfs_chunks::get_chunks(path); - } -#else - output->chunks = fs->getChunks(); -#endif - output->done.store(true, std::memory_order_release); - if (notify_eventfd != -1) - { - eventfd_write(notify_eventfd, 1); - } - delete this; - } - - private: - std::shared_ptr output; - }; -} - -void CloudFile::preload_items(const std::string & fn, size_t n_threads, int tag, bool disable_memfiles, bool load_only) -{ - if (is_async) - abort(); - - std::unique_ptr f(Server->openFile(fn, MODE_READ)); - - if (f.get() == nullptr) - return; - - Server->Log("Starting preloading fn=" + fn + " size=" + PrettyPrintBytes(f->Size()) + - " items="+convert(f->Size()/(2*sizeof(int64)))+ - " n_threads=" + convert(n_threads) + " tag=" + convert(tag) + - " disable_memfiles=" + convert(disable_memfiles) + " load_only=" + convert(load_only), LL_INFO); - - std::vector tickets; - std::unique_ptr pipe(Server->createMemoryPipe()); - for (size_t i = 0; i < n_threads; ++i) - { - tickets.push_back(Server->getThreadPool()->execute(new PreloadItemsThread(*this, pipe.get(), tag, disable_memfiles, load_only), - "preload items")); - } - - IScopedLock lock(mutex.get()); - - char buf[1024]; - _u32 rc; - while ((rc = f->Read(buf, sizeof(buf))) > 0) - { - for (size_t i = 0; i+1 < rc / sizeof(int64); i+=2) - { - int64 offset_start; - memcpy(&offset_start, &buf[i * sizeof(offset_start)], sizeof(offset_start)); - - int64 offset_end; - memcpy(&offset_end, &buf[(i+1) * sizeof(offset_end)], sizeof(offset_end)); - - if (offset_start > cloudfile_size) - { - continue; - } - - if (offset_end > cloudfile_size) - offset_end = cloudfile_size; - - while (offset_start < offset_end) - { - Server->Log("Preloading offset " + convert(offset_start) + " size " + convert(offset_end - offset_start), LL_INFO); - bool has_block = bitmap->get(offset_start / block_size); - - if (!has_block) - { - int64 tozero = (std::min)((block_size - offset_start%block_size), offset_end - offset_start); - offset_start += tozero; - continue; - } - - bool in_big_block = big_blocks_bitmap->get(offset_start / big_block_size); - - int64 curr_block_size; - std::string key; - if (in_big_block) - { - curr_block_size = big_block_size; - key = big_block_key(offset_start / curr_block_size); - } - else - { - curr_block_size = small_block_size; - key = small_block_key(offset_start / curr_block_size); - } - - int64 toread = (std::min)((curr_block_size - offset_start%curr_block_size), offset_end - offset_start); - - CWData data; - data.addString2(key); - data.addVarInt((offset_start / curr_block_size)*curr_block_size); - data.addVarInt(curr_block_size); - pipe->Write(std::string(data.getDataPtr(), data.getDataSize())); - - while (pipe->getNumElements() > n_threads * 3) - { - lock.relock(nullptr); - Server->wait(100); - lock.relock(mutex.get()); - } - - offset_start += toread; - } - } - } - - lock.relock(nullptr); - - pipe->Write(std::string()); - - Server->getThreadPool()->waitFor(tickets); -} - -#ifdef HAS_ASYNC -fuse_io_context::io_uring_task CloudFile::preload_items_async(fuse_io_context& io, const std::string& fn, size_t n_threads, int tag, bool disable_memfiles, bool load_only) -{ - std::unique_ptr f(Server->openFile(fn, MODE_READ)); - - if (f.get() == nullptr) - co_return 0; - - Server->Log("Starting async preloading fn=" + fn + " size=" + PrettyPrintBytes(f->Size()) + - " items=" + convert(f->Size() / (2 * sizeof(int64))) + - " n_threads=" + convert(n_threads) + " tag=" + convert(tag) + - " disable_memfiles=" + convert(disable_memfiles) + " load_only=" + convert(load_only), LL_INFO); - - auto available_workers = std::make_unique(n_threads); - auto worker_waiters_head = std::make_unique(nullptr); - - char buf[1024]; - _u32 rc; - while ((rc = f->Read(buf, sizeof(buf))) > 0) - { - for (size_t i = 0; i + 1 < rc / sizeof(int64); i += 2) - { - int64 offset_start; - memcpy(&offset_start, &buf[i * sizeof(offset_start)], sizeof(offset_start)); - - int64 offset_end; - memcpy(&offset_end, &buf[(i + 1) * sizeof(offset_end)], sizeof(offset_end)); - - if (offset_start > cloudfile_size) - { - continue; - } - - if (offset_end > cloudfile_size) - offset_end = cloudfile_size; - - while (offset_start < offset_end) - { - size_t lock_idx = co_await lock_extent_async(0, 0, false); - - Server->Log("Preloading offset " + convert(offset_start) + " size " + convert(offset_end - offset_start), LL_INFO); - bool has_block = co_await bitmap->get_async(io, offset_start / block_size); - - if (!has_block) - { - int64 tozero = (std::min)((block_size - offset_start % block_size), offset_end - offset_start); - offset_start += tozero; - unlock_extent_async(0, 0, false, lock_idx); - continue; - } - - bool in_big_block = co_await big_blocks_bitmap->get_async(io, offset_start / big_block_size); - - unlock_extent_async(0, 0, false, lock_idx); - - int64 curr_block_size; - std::string key; - if (in_big_block) - { - curr_block_size = big_block_size; - key = big_block_key(offset_start / curr_block_size); - } - else - { - curr_block_size = small_block_size; - key = small_block_key(offset_start / curr_block_size); - } - - int64 toread = (std::min)((curr_block_size - offset_start % curr_block_size), offset_end - offset_start); - - - preload_items_single_async(io, *available_workers, *worker_waiters_head, key, - (offset_start / curr_block_size) * curr_block_size, curr_block_size, - tag, disable_memfiles, load_only); - - offset_start += toread; - } - } - } - - while (*available_workers != n_threads) - { - co_await io.sleep_ms(100); - } - - co_return 0; -} -#endif //HAS_ASYNC - -bool CloudFile::has_preload_item(int64 offset_start, int64 offset_end) -{ - if (is_async) - abort(); - - IScopedLock lock(mutex.get()); - - if (offset_start > cloudfile_size) - { - return false; - } - - if (offset_end > cloudfile_size) - offset_end = cloudfile_size; - - while (offset_start < offset_end) - { - bool has_block = bitmap->get(offset_start / block_size); - - if (!has_block) - { - int64 tozero = (std::min)((block_size - offset_start%block_size), offset_end - offset_start); - offset_start += tozero; - continue; - } - - bool in_big_block = big_blocks_bitmap->get(offset_start / big_block_size); - - int64 curr_block_size; - std::string key; - if (in_big_block) - { - curr_block_size = big_block_size; - key = big_block_key(offset_start / curr_block_size); - } - else - { - curr_block_size = small_block_size; - key = small_block_key(offset_start / curr_block_size); - } - - int64 toread = (std::min)((curr_block_size - offset_start%curr_block_size), offset_end - offset_start); - - lock.relock(nullptr); - - if (!kv_store.has_item_cached(key)) - { - return false; - } - - lock.relock(mutex.get()); - - offset_start += toread; - } - - return true; -} - -#ifdef HAS_ASYNC -fuse_io_context::io_uring_task CloudFile::has_preload_item_async(fuse_io_context& io, int64 offset_start, int64 offset_end) -{ - if (offset_start > cloudfile_size) - { - co_return false; - } - - if (offset_end > cloudfile_size) - offset_end = cloudfile_size; - - size_t lock_idx = co_await lock_extent_async(0, 0, false); - - while (offset_start < offset_end) - { - bool has_block = co_await bitmap->get_async(io, offset_start / block_size); - - if (!has_block) - { - int64 tozero = (std::min)((block_size - offset_start % block_size), offset_end - offset_start); - offset_start += tozero; - continue; - } - - bool in_big_block = co_await big_blocks_bitmap->get_async(io, offset_start / big_block_size); - - int64 curr_block_size; - std::string key; - if (in_big_block) - { - curr_block_size = big_block_size; - key = big_block_key(offset_start / curr_block_size); - } - else - { - curr_block_size = small_block_size; - key = small_block_key(offset_start / curr_block_size); - } - - int64 toread = (std::min)((curr_block_size - offset_start % curr_block_size), offset_end - offset_start); - - if (!kv_store.has_item_cached(key)) - { - unlock_extent_async(0, 0, false, lock_idx); - co_return false; - } - - offset_start += toread; - } - - unlock_extent_async(0, 0, false, lock_idx); - - co_return true; -} -#endif //HAS_ASYNC - -int64 CloudFile::min_block_size() -{ - return small_block_size; -} - -void CloudFile::set_is_mounted(const std::string & p_mount_path, IBackupFileSystem* fs) -{ - std::vector new_fs_chunks; -#ifdef HAS_MOUNT_SERVICE - if (!btrfs_get_chunks_via_service(p_mount_path, new_fs_chunks)) - { - new_fs_chunks = btrfs_chunks::get_chunks(p_mount_path); - } -#else - topfs = fs; - new_fs_chunks = fs->getChunks(); -#endif - - for (auto& it : new_fs_chunks) - { - it.offset += 8192; - } - - { - IScopedWriteLock lock(chunks_mutex.get()); - fs_chunks = new_fs_chunks; - mount_path = p_mount_path; - } - - - kv_store.set_num_second_chances_callback(this); -} - -void CloudFile::update_fs_chunks(bool update_time, IScopedWriteLock& lock) -{ - lock.relock(chunks_mutex.get()); - -#ifndef HAS_MOUNT_SERVICE - if (topfs == nullptr) - return; -#endif - - if (updating_fs_chunks) - { - return; - } - - std::string local_mount_path; - updating_fs_chunks = true; - local_mount_path = mount_path; - lock.relock(nullptr); - - std::shared_ptr new_fs_chunks = std::make_shared(); - THREADPOOL_TICKET ticket = Server->getThreadPool()->execute( - new UpdateFsChunksThread(topfs, new_fs_chunks, local_mount_path, -1), "updt fs chunks"); - - if (Server->getThreadPool()->waitFor(ticket, 1000)) - { - lock.relock(chunks_mutex.get()); - - fs_chunks = new_fs_chunks->chunks; - for (auto& it : fs_chunks) - { - it.offset += 8192; - } - } - else - { - lock.relock(chunks_mutex.get()); - - if (update_time) - last_fs_chunks_update += 60000; - - update_time = false; - } - - - if(update_time) - last_fs_chunks_update = Server->getTimeMS()-10*60*1000; - - updating_fs_chunks = false; -} - -#ifdef HAS_ASYNC -fuse_io_context::io_uring_task CloudFile::update_fs_chunks_async(fuse_io_context& io, bool update_time) -{ - IScopedWriteLock lock(chunks_mutex.get()); - - if (updating_fs_chunks) - { - co_return; - } - - std::string local_mount_path; - updating_fs_chunks = true; - local_mount_path = mount_path; - lock.relock(nullptr); - - std::shared_ptr new_fs_chunks = std::make_shared(); - Server->getThreadPool()->execute(new UpdateFsChunksThread(new_fs_chunks, local_mount_path, update_fs_chunks_eventfd), "updt fs chunks"); - - int64 starttime = Server->getTimeMS(); - std::vector buf(sizeof(int64)); - - bool thread_done = false; - - io_uring_sqe* sqe = io.get_sqe(2); - io_uring_prep_read(sqe, update_fs_chunks_eventfd, buf.data(), buf.size(), 0); - sqe->flags |= IOSQE_IO_LINK; - - io_uring_sqe* sqe_timeout = io.get_sqe(); - __kernel_timespec waitt = io.msec_timespec(1000); - io_uring_prep_link_timeout(sqe_timeout, &waitt, 0); - - auto [rc1, rc2] = co_await io.complete(std::make_pair(sqe, sqe_timeout)); - - if (new_fs_chunks->done.load(std::memory_order_acquire)) - { - lock.relock(chunks_mutex.get()); - - fs_chunks = new_fs_chunks->chunks; - for (auto& it : fs_chunks) - { - it.offset += 8192; - } - } - else - { - lock.relock(chunks_mutex.get()); - - if (update_time) - last_fs_chunks_update += 60000; - - update_time = false; - } - - - if (update_time) - last_fs_chunks_update = Server->getTimeMS() - 10 * 60 * 1000; - - updating_fs_chunks = false; -} -#endif //HAS_ASYNC - -namespace -{ -#ifdef HAS_ONLINE_BACKEND - class TokenRefreshCallback : public ITokenRefreshCallback - { - std::string conf_fn; - public: - TokenRefreshCallback(const std::string& conf_fn) - :conf_fn(conf_fn) - { - } - - virtual std::string getNewToken() - { - std::unique_ptr settings(Server->createFileSettingsReader( - conf_fn)); - - if (settings.get() != NULL) - { - std::string password; - if (settings->getValue("auth_token", &password)) - { - return password; - } - } - - return std::string(); - } - }; -#endif - - class MigrationThread : public IThread - { - IPipe* pipe; - CloudFile* cf; - std::atomic& has_error; - public: - MigrationThread(IPipe* pipe, CloudFile* cf, std::atomic& has_error) - : pipe(pipe), cf(cf), has_error(has_error) {} - void operator()() - { - std::vector buf; - buf.resize(block_size); - - std::string msg; - while (pipe->Read(&msg) > 0) - { - CRData data(msg.data(), msg.size()); - - int64 offset; - int64 len; - if (data.getVarInt(&offset) - && data.getVarInt(&len)) - { - if (!cf->migrate(buf, offset, len)) - { - has_error = true; - break; - } - } - } - - pipe->Write(std::string()); - delete this; - } - }; -} - -bool CloudFile::migrate(const std::string & conf_fn, bool continue_migration) -{ - std::unique_ptr settings(Server->createMemorySettingsReader(getFile(conf_fn))); - - if (!continue_migration) - { - if (os_directory_exists(cache_path + "/migration")) - { - os_remove_nonempty_dir(cache_path + "/migration"); - } - - if (os_directory_exists(cache_path + "/migration")) - { - Server->Log(cache_path + "/migration still exists", LL_ERROR); - return false; - } - - if (!os_create_dir_recursive(cache_path + "/migration")) - { - Server->Log("Error creating " + cache_path + "/migration . " + os_last_error_str(), LL_ERROR); - return false; - } - } - - return migrate(conf_fn, settings.get()); -} - -std::string CloudFile::migration_info() -{ - IScopedLock lock(mutex.get()); - - if (migrate_to_cf == nullptr) - return "{\"migration\": false}"; - - int done_pc = static_cast((migration_copy_done * 100) / cloudfile_size); - - JSON::Object ret; - - ret.set("done_pc", done_pc); - ret.set("migration", true); - ret.set("num_dirty_items", migrate_to_cf->getNumDirtyItems()); - ret.set("num_memfile_items", migrate_to_cf->getNumMemfileItems()); - ret.set("submitted_bytes", migrate_to_cf->getSubmittedBytes()); - ret.set("used_bytes", migrate_to_cf->getSubmittedBytes()); - ret.set("cache_size", migrate_to_cf->getCacheSize()); - ret.set("comp_bytes", migrate_to_cf->getCompBytes()); - ret.set("congested", migrate_to_cf->Congested()); - ret.set("raid_groups", migrate_to_cf->get_raid_groups()); - ret.set("disk_error_info", migrate_to_cf->disk_error_info()); - ret.set("memfile_bytes", migrate_to_cf->get_memfile_bytes()); - ret.set("submitted_memfile_bytes", migrate_to_cf->get_submitted_memfile_bytes()); - ret.set("conf_fn", migration_conf_fn); - - return ret.stringify(false); -} - -void CloudFile::preload_key(const std::string & key, int64 offset, int64 len, int tag, bool disable_memfiles, bool load_only) -{ - if (is_async) - abort(); - - IScopedLock lock(mutex.get()); - - lock_extent(lock, offset, len, false); - - bool in_big_block = big_blocks_bitmap->get(offset / big_block_size); - - int64 curr_block_size; - std::string curr_key; - bool has_block; - if (in_big_block) - { - curr_block_size = big_block_size; - curr_key = big_block_key(offset / curr_block_size); - has_block = bitmap_has_big_block(offset / curr_block_size); - } - else - { - curr_block_size = small_block_size; - curr_key = small_block_key(offset / curr_block_size); - has_block = bitmap_has_small_block(offset / curr_block_size); - } - - if (key == curr_key - && has_block) - { - lock.relock(nullptr); - - unsigned int flags; - if (disable_memfiles - && load_only) - { - flags = TransactionalKvStore::Flag::read_only | TransactionalKvStore::Flag::prioritize_read | - TransactionalKvStore::Flag::disable_memfiles; - } - else - { - flags = TransactionalKvStore::Flag::read_only | TransactionalKvStore::Flag::prioritize_read | - TransactionalKvStore::Flag::preload_once; - - if(disable_memfiles) - flags |= TransactionalKvStore::Flag::disable_memfiles; - } - - IFile* block = kv_store.get(key, TransactionalKvStore::BitmapInfo::Present, - flags, curr_block_size, tag); - - if (block != nullptr) - { - kv_store.release(key); - } - - lock.relock(mutex.get()); - } - - unlock_extent(lock, offset, len, false); -} - -#ifdef HAS_ASYNC -fuse_io_context::io_uring_task CloudFile::preload_key_async(fuse_io_context& io, const std::string& key, - int64 offset, int64 len, int tag, bool disable_memfiles, bool load_only) -{ - size_t lock_idx = co_await lock_extent_async(offset, len, false); - - bool in_big_block = big_blocks_bitmap->get(offset / big_block_size); - - int64 curr_block_size; - std::string curr_key; - bool has_block; - if (in_big_block) - { - curr_block_size = big_block_size; - curr_key = big_block_key(offset / curr_block_size); - has_block = co_await bitmap_has_big_block_async(io, offset / curr_block_size); - } - else - { - curr_block_size = small_block_size; - curr_key = small_block_key(offset / curr_block_size); - has_block = co_await bitmap_has_small_block_async(io, offset / curr_block_size); - } - - if (key == curr_key - && has_block) - { - unsigned int flags; - if (disable_memfiles - && load_only) - { - flags = TransactionalKvStore::Flag::read_only | TransactionalKvStore::Flag::prioritize_read | - TransactionalKvStore::Flag::disable_memfiles; - } - else - { - flags = TransactionalKvStore::Flag::read_only | TransactionalKvStore::Flag::prioritize_read | - TransactionalKvStore::Flag::preload_once; - - if (disable_memfiles) - flags |= TransactionalKvStore::Flag::disable_memfiles; - } - - IFile* block = co_await kv_store.get_async(io, key, TransactionalKvStore::BitmapInfo::Present, - flags, curr_block_size, tag); - - if (block != nullptr) - { - co_await kv_store.release_async(io, key); - } - } - - unlock_extent_async(offset, len, false, lock_idx); - - co_return 0; -} -#endif //HAS_ASYNC - -int64 CloudFile::get_uploaded_bytes() -{ - return online_kv_store->get_uploaded_bytes(); -} - -int64 CloudFile::get_downloaded_bytes() -{ - return online_kv_store->get_downloaded_bytes(); -} - -int64 CloudFile::get_written_bytes() -{ - if (is_async) - { - return written_bytes; - } - - IScopedLock lock(mutex.get()); - return written_bytes; -} - -int64 CloudFile::get_read_bytes() -{ - if (is_async) - { - read_bytes; - } - - IScopedLock lock(mutex.get()); - return read_bytes; -} - - -std::string CloudFile::get_raid_io_stats() -{ -#ifdef HAS_LOCAL_BACKEND - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != NULL) - { - KvStoreBackendLocal* backend = dynamic_cast(frontend->getBackend()); - if (backend != NULL) - { - return backend->io_stats(); - } - } -#endif - - return std::string(); -} - -namespace -{ -#pragma pack(1) - struct SLogHeader - { - _u32 size; - int64 offset; - _u32 crc; - }; -#pragma pack() - - const std::string slog_magic = "TCDSLOG#1.0"; -} - -bool CloudFile::replay_slog() -{ -#ifdef WITH_SLOG - if (slog_path.empty()) - return true; - - if (!FileExists(slog_path)) - { - slog.reset(Server->openFile(slog_path, MODE_WRITE)); - - if (slog.get() == nullptr) - { - Server->Log("Error creating slog file at \"" + slog_path + "\". " + os_last_error_str(), LL_ERROR); - return false; - } - - if (slog->Write(slog_magic) != slog_magic.size()) - { - Server->Log("Error writing magic to slog file at \"" + slog_path + "\". " + os_last_error_str(), LL_ERROR); - return false; - } - - int64 transid = kv_store.get_basetransid(); - - if (slog->Write(reinterpret_cast(&transid), sizeof(transid)) != sizeof(transid)) - { - Server->Log("Error writing transid to slog at \""+slog_path+"\". " + os_last_error_str(), LL_ERROR); - return false; - } - - slog->Sync(); - - slog_size = slog_magic.size() + sizeof(transid); - slog_last_sync = slog_magic.size() + sizeof(transid); - - return true; - } - - slog.reset(Server->openFile(slog_path, MODE_READ_SEQUENTIAL)); - - if (slog.get() == nullptr) - { - Server->Log("Error opening slog file at \"" + slog_path + "\". " + os_last_error_str(), LL_ERROR); - return false; - } - - std::string magic = slog->Read(static_cast<_u32>(slog_magic.size())); - - if (magic != slog_magic) - { - Server->Log("Slog magic wrong", LL_ERROR); - return false; - } - - slog_size = slog_magic.size(); - - - int64 transid = 0; - - if (slog->Read(reinterpret_cast(&transid), sizeof(transid)) != sizeof(transid)) - { - Server->Log("Error reading transid", LL_ERROR); - return false; - } - - if (kv_store.get_basetransid() != transid) - { - Server->Log("Transid in slog wrong expected " + convert(kv_store.get_basetransid()) + " got " + convert(transid), LL_ERROR); - return false; - } - - slog_size += sizeof(transid); - - std::vector buf; - - Server->Log("Replaying slog size " + PrettyPrintBytes(slog->Size()), LL_INFO); - - do - { - bool has_read_error = false; - SLogHeader slog_header = {}; - if (slog->Read(reinterpret_cast(&slog_header), sizeof(slog_header), &has_read_error) != sizeof(slog_header)) - { - if (has_read_error) - { - Server->Log("Error reading slog header. " + os_last_error_str(), LL_ERROR); - return false; - } - - break; - } - - if (buf.size() < slog_header.size) - { - buf.resize(slog_header.size); - } - - if (slog->Read(buf.data(), slog_header.size, &has_read_error) != slog_header.size) - { - if (has_read_error) - { - Server->Log("Error reading slog data. " + os_last_error_str(), LL_ERROR); - return false; - } - - Server->Log("Error reading enough slog data", LL_ERROR); - return false; - } - - _u32 crc = slog_header.crc; - slog_header.crc = 0; - - cryptopp_crc::CRC32C crc2; - crc2.Update(reinterpret_cast(&slog_header), sizeof(slog_header)); - crc2.Update(buf.data(), slog_header.size); - _u32 r2 = crc2.Final(); - - if (r2 != crc) - { - Server->Log("slog crc32c wrong. " + convert(r2) + "!=" + convert(crc), LL_ERROR); - return false; - } - - if (Write(slog_header.offset, buf.data(), slog_header.size) != slog_header.size) - { - Server->Log("Error writing slog at offset " + convert(slog_header.offset) + " size " + convert(slog_header.size), LL_ERROR); - return false; - } - - slog_size += sizeof(slog_header) + slog_header.size; - } while (true); - - slog.reset(Server->openFile(slog_path, MODE_RW_READNONE)); - - if (slog.get() == nullptr) - { - Server->Log("Error opening slog file at \"" + slog_path + "\" (2). " + os_last_error_str(), LL_ERROR); - return false; - } - - slog_last_sync = slog_size.load(); - -#endif //WITH_SLOG - - return true; -} - -std::string CloudFile::get_mirror_stats() -{ - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != nullptr) - { - return frontend->mirror_stats(); - } - - return std::string(); -} - -std::string CloudFile::get_raid_freespace_stats() -{ -#ifdef HAS_LOCAL_BACKEND - { - IScopedLock lock(mutex.get()); - - if (!enable_raid_freespace_stats) - return std::string(); - } - - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != NULL) - { - KvStoreBackendLocal* backend = dynamic_cast(frontend->getBackend()); - if (backend != NULL) - { - return backend->freespace_stats(); - } - } -#endif - - return std::string(); -} - -bool CloudFile::slog_write(int64 pos, const char * buffer, _u32 bsize, bool& needs_reset) -{ -#ifdef WITH_SLOG - needs_reset = false; - - if (slog.get() == nullptr) - return true; - - SLogHeader slog_header = {}; - slog_header.offset = pos; - slog_header.size = bsize; - - cryptopp_crc::CRC32C crc2; - crc2.Update(reinterpret_cast(&slog_header), sizeof(slog_header)); - crc2.Update(buffer, bsize); - slog_header.crc = crc2.Final(); - - int64 slog_pos = slog_size.fetch_add(sizeof(slog_header) + bsize); - - if (slog->Write(slog_pos, reinterpret_cast(&slog_header), sizeof(slog_header)) != sizeof(slog_header)) - { - Server->Log("Error writing to slog file " + slog_path + " (1). " + os_last_error_str(), LL_ERROR); - return false; - } - - if (slog->Write(slog_pos + sizeof(slog_header), buffer, bsize) != bsize) - { - Server->Log("Error writing to slog file " + slog_path + " (2). " + os_last_error_str(), LL_ERROR); - return false; - } - - int64 slog_pos_end = slog_pos + static_cast(sizeof(slog_header)) + bsize; - - if (slog_pos < slog_max_size - && slog_pos_end >= slog_max_size) - { - needs_reset = true; - } - else if (slog_last_sync + slog_kernel_buffer_size < slog_pos - && slog_pos_end >= slog_last_sync + slog_kernel_buffer_size) - { -#ifndef _WIN32 - sync_file_range(slog->getOsHandle(), 0, slog_pos_end - slog_kernel_buffer_size, - SYNC_FILE_RANGE_WRITE); - posix_fadvise64(slog->getOsHandle(), 0, slog_pos_end - slog_kernel_buffer_size, POSIX_FADV_DONTNEED); -#endif - slog_last_sync = slog_pos_end; - } - -#endif //WITH_SLOG - - return true; -} - -bool CloudFile::migrate(const std::string& conf_fn, ISettingsReader * settings) -{ -#ifdef WITH_MIGRATION - if (!os_directory_exists(cache_path + "/migration")) - { - Server->Log(cache_path + "/migration does not exist", LL_ERROR); - return false; - } - - std::string key; - if (!settings->getValue("key", &key)) - { - Server->Log("Migration setting missing", LL_ERROR); - return false; - } - - std::string aes_key = pbkdf2_sha256(key); - - IScopedLock lock(mutex.get()); - - int64 orig_cloudfile_size = cloudfile_size; - ++wait_for_exclusive; - lock_extent(lock, 0, orig_cloudfile_size, true); - --wait_for_exclusive; - - migration_conf_fn = conf_fn; - - IBackupFileSystem* migrate_cachefs = new PassThroughFileSystem(cache_path + "/migration"); - - IOnlineKvStore* migrate_online_kv_store = create_migration_endpoint(conf_fn, settings, - migrate_cachefs); - - int64 reserved_cache_device_space = 1LL * 1024 * 1024 * 1024; - int64 min_metadata_cache_free = 1LL * 1024 * 1024 * 1024; - - int64 total_ram = get_total_ram_kb(); - - if (total_ram > 0) - { - total_ram *= 1024LL; - } - - int64 cache_size = 2LL * 1024 * 1024 * 1024; - if (total_ram < 4LL * 1024 * 1024 * 1024) - { - cache_size = total_ram / 4; - } - - std::string background_compression_method = settings->getValue("background_compression_method", "zstd_19"); - - migrate_to_cf.reset(new CloudFile(cache_path + "/migration", - migrate_cachefs, cloudfile_size, cloudfile_size, migrate_online_kv_store, - aes_key, get_compress_encrypt_factory(), false, 1, false, 1, - reserved_cache_device_space, min_metadata_cache_free, false, true, true, 0.8f, cache_size, std::string(), 1, true, - std::string(), std::string(), CompressionMethodFromString(background_compression_method), - std::string(), 0, is_async)); - - migration_has_error = false; - migration_copy_max = 0; - migration_copy_done = 0; - - std::string last_transid = settings->getValue("transid"); - if (!last_transid.empty() - && watoi64(last_transid) != migrate_to_cf->kv_store.get_basetransid()) - { - Server->Log("Last transid " + last_transid + " does not equal current base transid " + convert(migrate_to_cf->kv_store.get_basetransid()) + ". Not continuing migration", LL_ERROR); - migration_has_error = true; - } - - size_t num_threads = os_get_num_cpus(); - - migration_copy_done_lag = (num_threads + 2)*big_block_size; - - migration_thread_pool.reset(Server->createThreadPool(num_threads, 2, "idle migration thread")); - - IPipe* migration_pipe = Server->createMemoryPipe(); - - for (size_t i = 0; i < num_threads; ++i) - { - migration_thread_pool->execute(new MigrationThread(migration_pipe, this, migration_has_error), "migration"); - } - - std::string settings_data; - std::vector settings_keys = settings->getKeys(); - - for (std::string& key : settings_keys) - { - if (key != "transid") - { - settings_data += key + "=" + settings->getValue(key) + "\n"; - } - } - - settings_data += "transid=" + convert(migrate_to_cf->kv_store.get_transid()) + "\n"; - - IFsFile* settings_f = kv_store.get("clouddrive_migration_settings", TransactionalKvStore::BitmapInfo::Unknown, - TransactionalKvStore::Flag::disable_fd_cache | TransactionalKvStore::Flag::disable_throttling, -1); - - if (settings_f != nullptr) - { - if (settings_f->Write(0, settings_data) != settings_data.size()) - { - migration_has_error = true; - } - - if (!settings_f->Resize(settings_data.size(), false)) - { - migration_has_error = true; - } - - kv_store.release("clouddrive_migration_settings"); - } - else - { - migration_has_error = true; - } - - unlock_extent(lock, 0, orig_cloudfile_size, true); - - if (migration_has_error) - { - migration_pipe->Write(std::string()); - return false; - } - - int64 pos = settings->getValue("done", 0LL); - - migration_copy_done = pos; - - for (; pos < cloudfile_size; pos += big_block_size) - { - CWData data; - data.addVarInt(pos); - data.addVarInt(big_block_size); - - migration_copy_max = pos + big_block_size; - - migration_pipe->Write(data.getDataPtr(), data.getDataSize()); - - if (migration_has_error) - { - break; - } - - while (migration_pipe->getNumElements() > 100) - { - Server->wait(100); - } - } - - migration_pipe->Write(std::string()); - - return !migration_has_error; -#else //WITH_MIGRATION - return false; -#endif //WITH_MIGRATION -} - -bool CloudFile::update_migration_settings() -{ - IFsFile* settings_f = kv_store.get("clouddrive_migration_settings", TransactionalKvStore::BitmapInfo::Unknown, - TransactionalKvStore::Flag::disable_fd_cache | TransactionalKvStore::Flag::disable_throttling, -1); - - if (settings_f != nullptr) - { - std::string curr_settings = settings_f->Read(static_cast<_u32>(settings_f->Size())); - - std::unique_ptr settings(Server->createMemorySettingsReader(curr_settings)); - - std::string settings_data; - std::vector settings_keys = settings->getKeys(); - - for (std::string& key : settings_keys) - { - if (key != "transid" - && key!="done") - { - settings_data += key + "=" + settings->getValue(key) + "\n"; - } - } - - settings_data += "transid=" + convert(migrate_to_cf->kv_store.get_transid()) + "\n"; - settings_data += "done=" + convert(migration_copy_done) + "\n"; - - if (settings_f->Write(0, settings_data) != settings_data.size()) - { - kv_store.release("clouddrive_migration_settings"); - return false; - } - - if (!settings_f->Resize(settings_data.size(), false)) - { - kv_store.release("clouddrive_migration_settings"); - return false; - } - - kv_store.release("clouddrive_migration_settings"); - } - else - { - return false; - } - - return true; -} - -IOnlineKvStore * CloudFile::create_migration_endpoint(const std::string& conf_fn, ISettingsReader* settings, - IBackupFileSystem* migrate_cachefs) -{ -#ifdef WITH_MIGRATION - std::string endpoint; - std::string key; - if (!settings->getValue("endpoint", &endpoint) - || !settings->getValue("key", &key)) - { - Server->Log("Migration setting missing", LL_ERROR); - return nullptr; - } - - std::string aes_key = pbkdf2_sha256(key); - - if (endpoint == "urbackup") - { - std::string auth_token; - std::string api; - if (!settings->getValue("auth_token", &auth_token) - || !settings->getValue("api", &api)) - { - Server->Log("Migration setting missing (1)", LL_ERROR); - return nullptr; - } - - return new OnlineKvStore(api, auth_token, aes_key, - cache_path + "/migration/generation", new TokenRefreshCallback(conf_fn), - get_compress_encrypt_factory(), - CompressionMethodFromString(settings->getValue("compression_method", "zstd_9")), - migrate_cachefs); - } - else if (endpoint == "s3") - { - std::string access_key; - std::string secret_access_key; - - if (!settings->getValue("access_key", &access_key) || - !settings->getValue("secret_access_key", &secret_access_key)) - { - Server->Log("access_key and secret_access_key not set correctly", LL_ERROR); - return nullptr; - } - - std::string bucket_name; - if (!settings->getValue("bucket_name", &bucket_name)) - { - Server->Log("S3 bucket name not set correctly", LL_ERROR); - return nullptr; - } - std::string s3_endpoint; - settings->getValue("s3_endpoint", &s3_endpoint); - std::string s3_region; - settings->getValue("s3_region", &s3_region); - std::string storage_class; - settings->getValue("storage_class", &storage_class); - -#ifndef _WIN32 - IKvStoreBackend* backend = new KvStoreBackendS3(aes_key, access_key, - secret_access_key, bucket_name, get_compress_encrypt_factory(), s3_endpoint, - s3_region, storage_class, - CompressionMethodFromString(settings->getValue("compression_method", "zstd_9")), - CompressionMethodFromString(settings->getValue("metadata_compression_method", "zstd_9")) - migrate_cachefs); - - return new KvStoreFrontend(cache_path + "/migration/objects.db", - backend, true, std::string(), std::string(), nullptr, std::string(), false, - false, migrate_cachefs); -#else - return nullptr; -#endif - } - else if (endpoint == "azure") - { - std::string account_name; - std::string account_key; - - if (!settings->getValue("account_name", &account_name) || - !settings->getValue("account_key", &account_key)) - { - Server->Log("account_name and account_key not set correctly", LL_ERROR); - return nullptr; - } - - std::string container_name; - if (!settings->getValue("container_name", &container_name)) - { - Server->Log("Azure container name not set correctly", LL_ERROR); - return nullptr; - } - -#ifndef _WIN32 - IKvStoreBackend* backend = new KvStoreBackendAzure(aes_key, account_name, - account_key, container_name, get_compress_encrypt_factory(), - CompressionMethodFromString(settings->getValue("compression_method", "zstd_9")), - CompressionMethodFromString(settings->getValue("metadata_compression_method", "zstd_9")) - migrate_cachefs); - - return new KvStoreFrontend(cache_path + "/migration/objects.db", - backend, true, std::string(), std::string(), nullptr, std::string(), false, - false, migrate_cachefs); -#else - return nullptr; -#endif - } - - Server->Log("Endpoint " + endpoint + " not implemented", LL_ERROR); -#endif //WITH_MIGRATION - return nullptr; -} - -bool CloudFile::is_metadata(int64 offset, const std::string& key) -{ - assert(fuse_io_context::is_sync_thread()); - - if (offset <= 5*1024*1024) - { - return true; - } - - { - IScopedReadLock lock(chunks_mutex.get()); - - if (mount_path.empty()) - return false; - - int64 timems = Server->getTimeMS(); - - if (timems - last_fs_chunks_update>60*60*1000 - && !updating_fs_chunks) - { - lock.relock(nullptr); - { - IScopedWriteLock lock(nullptr); - update_fs_chunks(true, lock); - } - lock.relock(chunks_mutex.get()); - } - - auto it = std::upper_bound(fs_chunks.begin(), fs_chunks.end(), IBackupFileSystem::SChunk(offset, 0, false)); - - if (it != fs_chunks.begin()) - --it; - - if (it != fs_chunks.end() - && it->offset <= offset && it->offset + it->len >= offset) - return it->metadata; - - if (timems - last_fs_chunks_update < 1000) - { - lock.relock(nullptr); - - IScopedWriteLock wlock(chunks_mutex.get()); - if (!key.empty()) - { - missing_chunk_keys.push_back(std::make_pair(key, timems)); - update_missing_chunks_thread->notify(); - } - return false; - } - - while (updating_fs_chunks - && Server->getTimeMS() - timems < 60000) - { - lock.relock(nullptr); - Server->wait(100); - lock.relock(chunks_mutex.get()); - } - - if (updating_fs_chunks) - { - Server->Log("Cannot find chunk for offset " + convert(offset) + " in fs chunk data. Fs chunk data currently updating.", LL_INFO); - return false; - } - } - - IScopedWriteLock lock(nullptr); - - update_fs_chunks(false, lock); - - auto it = std::upper_bound(fs_chunks.begin(), fs_chunks.end(), IBackupFileSystem::SChunk(offset, 0, false)); - - if (it != fs_chunks.begin()) - --it; - - if (it != fs_chunks.end() - && it->offset <= offset && it->offset + it->len >= offset) - return it->metadata; - - last_fs_chunks_update = Server->getTimeMS(); - - if (!key.empty()) - { - assert(lock.getLock() != nullptr); - missing_chunk_keys.push_back(std::make_pair(key, last_fs_chunks_update)); - update_missing_chunks_thread->notify(); - } - Server->Log("Cannot find chunk for offset " + convert(offset) + " in fs chunk data. Fs chunk data size " + convert(fs_chunks.size()), LL_INFO); - return false; -} - -#ifdef HAS_ASYNC -fuse_io_context::io_uring_task CloudFile::is_metadata_async(fuse_io_context& io, int64 offset, const std::string& key) -{ - if (offset <= 5 * 1024 * 1024) - { - co_return true; - } - - { - IScopedReadLock lock(chunks_mutex.get()); - - if (mount_path.empty()) - co_return false; - - int64 timems = Server->getTimeMS(); - - if (timems - last_fs_chunks_update > 60 * 60 * 1000 - && !updating_fs_chunks) - { - lock.relock(NULL); - { - co_await update_fs_chunks_async(io, true); - } - lock.relock(chunks_mutex.get()); - } - - auto it = std::upper_bound(fs_chunks.begin(), fs_chunks.end(), btrfs_chunks::SChunk(offset, 0, false)); - - if (it != fs_chunks.begin()) - --it; - - if (it != fs_chunks.end() - && it->offset <= offset && it->offset + it->len >= offset) - co_return it->metadata; - - if (timems - last_fs_chunks_update < 1000) - { - lock.relock(nullptr); - - IScopedWriteLock wlock(chunks_mutex.get()); - if (!key.empty()) - { - missing_chunk_keys.push_back(std::make_pair(key, timems)); - update_missing_chunks_thread->notify(); - } - co_return false; - } - - while (updating_fs_chunks - && Server->getTimeMS() - timems < 60000) - { - lock.relock(nullptr); - - co_await io.sleep_ms(100); - - lock.relock(chunks_mutex.get()); - } - - if (updating_fs_chunks) - { - Server->Log("Cannot find chunk for offset " + convert(offset) + " in fs chunk data. Fs chunk data currently updating.", LL_INFO); - co_return false; - } - } - - co_await update_fs_chunks_async(io, false); - - IScopedReadLock lock(chunks_mutex.get()); - - auto it = std::upper_bound(fs_chunks.begin(), fs_chunks.end(), btrfs_chunks::SChunk(offset, 0, false)); - - if (it != fs_chunks.begin()) - --it; - - if (it != fs_chunks.end() - && it->offset <= offset && it->offset + it->len >= offset) - co_return it->metadata; - - last_fs_chunks_update = Server->getTimeMS(); - - if (!key.empty()) - { - assert(lock.getLock() != nullptr); - missing_chunk_keys.push_back(std::make_pair(key, last_fs_chunks_update)); - update_missing_chunks_thread->notify(); - } - Server->Log("Cannot find chunk for offset " + convert(offset) + " in fs chunk data. Fs chunk data size " + convert(fs_chunks.size()), LL_INFO); - - co_return false; -} -#endif //HAS_ASYNC - -const unsigned int def_metadata_second_chances = 10; - -unsigned int CloudFile::get_num_second_chances(const std::string & key) -{ - int64 offset = key_offset(key); - - if (offset < 0) - return 0; - - if (is_metadata(offset, key)) - { - return def_metadata_second_chances; - } - - return 0; -} - -bool CloudFile::is_metadata(const std::string & key) -{ - int64 offset = key_offset(key); - - if (offset < 0) - return false; - - return is_metadata(offset, key); -} - -bool CloudFile::update_missing_fs_chunks() -{ - IScopedWriteLock lock(nullptr); - update_fs_chunks(false, lock); - - Server->Log("Retrying update missing chunks. Missing: " + convert(missing_chunk_keys.size())); - - int64 timems = Server->getTimeMS(); - bool first = true; - - std::vector new_metadata_keys; - assert(lock.getLock() != nullptr); - std::vector > new_missing_chunk_keys; - for (std::pair key : missing_chunk_keys) - { - int64 offset = key_offset(key.first); - - if (offset < 0) - continue; - - if (first) - { - Server->Log("First missing chunk offset: "+convert(offset)); - first = false; - } - - auto it = std::upper_bound(fs_chunks.begin(), fs_chunks.end(), IBackupFileSystem::SChunk(offset, 0, false)); - - if (it != fs_chunks.begin()) - --it; - - if (it != fs_chunks.end() - && it->offset <= offset && it->offset + it->len >= offset) - { - Server->Log("Chunk for offset " + convert(offset) + " found now.", LL_INFO); - - if (it->metadata) - { - new_metadata_keys.push_back(key.first); - } - } - else if(timems - key.second<60*1000) - { - new_missing_chunk_keys.push_back(key); - } - else - { - Server->Log("Giving up on finding chunk for offset " + convert(offset) + ".", LL_INFO); - } - } - - missing_chunk_keys = new_missing_chunk_keys; - - lock.relock(nullptr); - - for (std::string& key : new_metadata_keys) - { - kv_store.set_second_chances(key, def_metadata_second_chances); - } - - return new_missing_chunk_keys.empty(); -} - -bool CloudFile::migrate(std::vector& buf, int64 offset, int64 len) -{ - IScopedLock lock(mutex.get()); - - lock_extent(lock, offset, len, true); - - IScopedLock migrate_lock(migrate_to_cf->mutex.get()); - migrate_to_cf->lock_extent(migrate_lock, offset, len, true); - - bool ret = true; - - for (int64 curr_offset = offset; curr_offset < offset + len;) - { - bool has_block = bitmap->get(curr_offset / block_size); - - if (!has_block) - { - int64 tozero = (std::min)((block_size - curr_offset%block_size), offset + len - curr_offset); - curr_offset += tozero; - continue; - } - - int64 toread = (std::min)(static_cast(buf.size()), offset + len - curr_offset); - - if (ReadInt(curr_offset, buf.data(), static_cast<_u32>(toread), &lock, 0, nullptr) != toread) - { - ret = false; - break; - } - - if (migrate_to_cf->WriteInt(curr_offset, buf.data(), static_cast<_u32>(toread), false, &migrate_lock, true, nullptr) != toread) - { - ret = false; - break; - } - - curr_offset += toread; - } - - migrate_to_cf->unlock_extent(migrate_lock, offset, len, true); - - unlock_extent(lock, offset, len, true); - - if (ret - && offset+len- migration_copy_done_lag > migration_copy_done) - { - migration_copy_done = offset + len; - } - - return ret; -} - -#ifdef HAS_ASYNC -fuse_io_context::io_uring_task CloudFile::WriteAsync(fuse_io_context& io, fuse_io_context::FuseIo& fuse_io, - int64 pos, _u32 bsize, bool new_block, bool ext_lock, bool can_block) -{ - if (!fuse_io.write_fuse) - { - Server->Log("Write not from fuse"); - } - - fuse_in_header* fheader = reinterpret_cast(fuse_io.header_buf); - - if (pos > cloudfile_size) - { - Server->Log("Write position " + convert(pos) + " bigger than cloud drive file " + convert(cloudfile_size), LL_ERROR); - co_return 0; - } - - EXTENSIVE_DEBUG_LOG(int64 starttime = Server->getTimeMS();) - - int64 target = pos + bsize; - - if (target > cloudfile_size) - { - target = cloudfile_size; - Server->Log("Write beyond target size. Reducing write size to " + convert(target - pos) + " from " + convert(bsize), LL_WARNING); - bsize = static_cast<_u32>(target - pos); - } - - int64 orig_pos = pos; - size_t lock_idx; - if (!ext_lock) - { - lock_idx = co_await lock_extent_async(orig_pos, bsize, false); - } - - EXTENSIVE_DEBUG_LOG(Server->Log("Writing " + convert(bsize) + " bytes at pos " + convert(pos), LL_DEBUG);) - - size_t set_bits = 0; - - struct WriteTask - { - WriteTask(IFsFile* fd, std::string key, uint64_t off, size_t size) - : fd(fd), key(std::move(key)), off(off), size(size) {} - IFsFile* fd; - std::string key; - int64 off; - _u32 size; - }; - - std::vector write_tasks; - - while (pos < target) - { - int64 big_block_num = pos / big_block_size; - bool has_big_block = co_await big_blocks_bitmap->get_async(io, big_block_num); - - int64 block_diff = big_block_num - active_big_block; - if (has_big_block && active_big_block >= 0 && (block_diff < -1 || block_diff>1) - && old_big_blocks_bitmap->get(big_block_num)) - { - add_fracture_big_block(big_block_num); - } - - int64 curr_block_size; - std::string key; - bool has_block; - if (has_big_block) - { - active_big_block = big_block_num; - curr_block_size = big_block_size; - key = big_block_key(pos / curr_block_size); - has_block = co_await bitmap_has_big_block_async(io, pos / curr_block_size); - - new_big_blocks_bitmap->set(big_block_num, true); - } - else - { - curr_block_size = small_block_size; - key = small_block_key(pos / curr_block_size); - has_block = co_await bitmap_has_small_block_async(io, pos / curr_block_size); - } - - int64 towrite = (std::min)((curr_block_size - pos % curr_block_size), target - pos); - - if (!new_block && !has_block) - { - bool waited = false; - while (in_write_retrieval.find(key) != in_write_retrieval.end()) - { - co_await WriteRetrievalAwaiter(*this); - waited = true; - } - - if (waited) - { - //Update bitmap info (has_block) - continue; - } - - in_write_retrieval.insert(key); - } - - unsigned int flags = TransactionalKvStore::Flag::read_random; - - if (!can_block) - { - flags |= TransactionalKvStore::Flag::disable_throttling; - } - - if (co_await is_metadata_async(io, pos, key)) - { - flags |= TransactionalKvStore::Flag::disable_memfiles; - } - - IFsFile* block = co_await kv_store.get_async(io, key, - new_block ? TransactionalKvStore::BitmapInfo::NotPresent - : (has_block ? TransactionalKvStore::BitmapInfo::Present - : TransactionalKvStore::BitmapInfo::NotPresent), - flags, curr_block_size); - - assert(block != NULL); - - if (fallocate_block_file) - { - int64 file_block_size = block->Size(); - if (file_block_size < pos % curr_block_size) - { - io_uring_sqe* sqe = io.get_sqe(); - - io_uring_prep_fallocate(sqe, get_file_fd(block->getOsHandle()), - 0, 0, curr_block_size); - - int rc = co_await io.complete(sqe); - - if (rc == 0) - block->increaseCachedSize(curr_block_size); - } - } - - int64 block_pos = pos % curr_block_size; - write_tasks.push_back(WriteTask(block, - key, block_pos, towrite)); - - set_bits += co_await bitmap->set_range_async(io, pos / block_size, div_up(pos + towrite, block_size), true); - - if (!new_block && !has_block) - { - in_write_retrieval.erase(key); - resume_write_retrieval_awaiters(); - } - - pos += towrite; - } - - fuse_out_header* out_header = reinterpret_cast(fuse_io.scratch_buf); - out_header->error = 0; - out_header->len = sizeof(fuse_out_header) + sizeof(fuse_write_out); - out_header->unique = fheader->unique; - - fuse_write_out* write_out = reinterpret_cast(fuse_io.scratch_buf + sizeof(fuse_out_header)); - write_out->size = bsize; - write_out->padding = 0; - - std::vector done_write_tasks; - - size_t ret = 0; - size_t sqe_written = 0; - size_t tries = 10; - while (tries > 0) - { - size_t n_peek = write_tasks.size(); - - if (fuse_io.write_fuse) - n_peek += 2; - - std::vector sqes; - sqes.reserve(n_peek); - for (WriteTask& task : write_tasks) - { - io_uring_sqe* write_sqe = io.get_sqe(n_peek); - if (write_sqe == nullptr) - co_return -1; - --n_peek; - - io_uring_prep_splice(write_sqe, fuse_io.pipe[0], - -1, get_file_fd(task.fd->getOsHandle()), task.off, task.size, - SPLICE_F_MOVE | SPLICE_F_FD_IN_FIXED | SPLICE_F_NONBLOCK); - write_sqe->flags |= IOSQE_IO_LINK; - - sqes.push_back(write_sqe); - } - - int expected_last_rc =-1; - - if (fuse_io.write_fuse) - { - io_uring_sqe* sqe_write_response = io.get_sqe(n_peek); - if (sqe_write_response == nullptr) - co_return -1; - --n_peek; - - io_uring_prep_write_fixed(sqe_write_response, fuse_io.pipe[1], - fuse_io.scratch_buf, out_header->len, - 0, fuse_io.scratch_buf_idx); - sqe_write_response->flags |= IOSQE_FIXED_FILE | IOSQE_IO_LINK; - - sqes.push_back(sqe_write_response); - - assert(n_peek == 1); - io_uring_sqe* sqe_splice_response = io.get_sqe(); - if (sqe_splice_response == nullptr) - co_return -1; - - io_uring_prep_splice(sqe_splice_response, fuse_io.pipe[0], - -1, io.fuse_ring.fd, -1, out_header->len, - SPLICE_F_MOVE | SPLICE_F_FD_IN_FIXED | SPLICE_F_NONBLOCK); - sqe_splice_response->flags |= IOSQE_FIXED_FILE; - - sqes.push_back(sqe_splice_response); - - expected_last_rc = out_header->len; - } - else - { - assert(n_peek == 0); - sqes.back()->flags &= ~IOSQE_IO_LINK; - - expected_last_rc = write_tasks.back().size; - } - - std::vector rcs = co_await io.complete(sqes); - - int last_rc = rcs[rcs.size() - 1]; - - if ( (last_rc < 0 && last_rc == -ECANCELED) || - (expected_last_rc>=0 && last_rc!= expected_last_rc) ) - { - std::string rcs_str; - for (int lrc : rcs) - { - if (!rcs_str.empty()) - rcs_str += ", "; - rcs_str += convert(lrc); - } - - //Maybe needs short write splice handling here (last_rc>0 && last_rcLog("Write error (ECANCELED) rcs=" + rcs_str, LL_ERROR); - else if(last_rc!= expected_last_rc) - Server->Log("Write error (last_rc="+convert(last_rc)+" expected="+convert(expected_last_rc)+") rcs="+ rcs_str, LL_ERROR); - - for (size_t i = 0; i < write_tasks.size();) - { - WriteTask& task = write_tasks[i]; - if (rcs[i] >= 0 && rcs[i] == task.size) - { - sqe_written += task.size; - task.fd->increaseCachedSize(task.off + task.size); - done_write_tasks.push_back(task); - write_tasks.erase(write_tasks.begin() + i); - } - else if (rcs[i] > 0) - { - sqe_written += rcs[i]; - task.fd->increaseCachedSize(task.off + rcs[i]); - - Server->Log("Short write writing to block " + bytesToHex(reinterpret_cast(task.key.c_str()), - task.key.size()) + " at " + task.fd->getFilename()+" off "+convert(task.off)+" size "+convert(task.size)+ - ". Rc " + convert(rcs[i]) + ". Retrying...", LL_WARNING); - - co_await io.sleep_ms(1000); - - write_tasks[i].off += rcs[i]; - write_tasks[i].size -= rcs[i]; - ++tries; - ++i; - } - else if (rcs[i] <= 0 && rcs[i] != -ECANCELED) - { - struct stat stbuf; - int rc = fstat(get_file_fd(task.fd->getOsHandle()), &stbuf); - - Server->Log("Error writing to block " + bytesToHex(reinterpret_cast(task.key.c_str()), - task.key.size()) + " at " + task.fd->getFilename() + " off " + convert(task.off) + " size " + convert(task.size) + - ". Rc " + convert(rcs[i]) + " file_size="+convert(rc!=0 ? (int64)-1 : (int64)stbuf.st_size)+" cached_file_size="+convert(task.fd->Size())+ - ". Errno " + convert(rcs[i]) + "." + (tries>0 ? " Retrying..." : ""), - tries>0 ? LL_WARNING:LL_ERROR); - - if (tries == 0) - { - addSystemEvent("cache_err_retry", - "Error writing to block on cache", - "Error writing to block " + bytesToHex(reinterpret_cast(task.key.c_str()), - task.key.size()) + " at " + task.fd->getFilename() + " off " + convert(task.off) + " size " + convert(task.size) + - ". Rc " + convert(rcs[i]) + ". Errno " + convert(rcs[i]), LL_ERROR); - } - else - { - co_await io.sleep_ms(1000); - } - ++i; - } - else - { - ++i; - } - } - --tries; - - if (tries == 0) - { - if (fuse_io.write_fuse) - { - out_header->error = -EIO; - out_header->len = sizeof(fuse_out_header) + sizeof(fuse_write_out); - write_out->size = sqe_written; - - io_uring_sqe* sqe_write_response = io.get_sqe(2); - if (sqe_write_response == nullptr) - co_return -1; - - io_uring_prep_write_fixed(sqe_write_response, fuse_io.pipe[1], - fuse_io.scratch_buf, out_header->len, - 0, fuse_io.scratch_buf_idx); - sqe_write_response->flags |= IOSQE_FIXED_FILE | IOSQE_IO_LINK; - - io_uring_sqe* sqe_splice_response = io.get_sqe(); - if (sqe_splice_response == nullptr) - co_return -1; - - io_uring_prep_splice(sqe_splice_response, fuse_io.pipe[0], - -1, io.fuse_ring.fd, -1, out_header->len, - SPLICE_F_MOVE | SPLICE_F_FD_IN_FIXED | SPLICE_F_NONBLOCK); - sqe_splice_response->flags |= IOSQE_FIXED_FILE; - - auto [rc1, rc2] = co_await io.complete(std::make_pair(sqe_write_response, sqe_splice_response)); - - ret = 0; - } - else - { - ret = -1; - } - break; - } - - continue; - } - else if (last_rc != expected_last_rc) - { - Server->Log("Error sending response " + convert(last_rc) - + " expected " + convert(expected_last_rc), LL_ERROR); - ret = -1; - } - else - { - sqe_written = bsize; - for (WriteTask& task : write_tasks) - { - task.fd->increaseCachedSize(task.off + task.size); - } - } - - break; - } - - for (WriteTask& task : write_tasks) - { - co_await kv_store.release_async(io, task.key); - } - - for (WriteTask& task : done_write_tasks) - { - co_await kv_store.release_async(io, task.key); - } - - written_bytes += bsize; - used_bytes += set_bits * block_size; - - EXTENSIVE_DEBUG_LOG(Server->Log("Done. " + convert(Server->getTimeMS() - starttime) + "ms", LL_DEBUG);) - - if (!ext_lock) - { - unlock_extent_async(orig_pos, bsize, false, lock_idx); - } - - if (ret == 0) - co_return sqe_written; - - co_return ret; -} -#endif //HAS_ASYNC - -_u32 CloudFile::WriteInt( int64 pos, const char* buffer, _u32 bsize, bool new_block, IScopedLock* ext_lock, bool can_block, bool* has_error) -{ - if (is_async) - abort(); - - IScopedLock lock(nullptr); - - if(ext_lock==nullptr) - { - lock.relock(mutex.get()); - } - - if (pos > cloudfile_size) - { - Server->Log("Write position " + convert(pos) + " bigger than cloud drive file " + convert(cloudfile_size), LL_ERROR); - return 0; - } - - EXTENSIVE_DEBUG_LOG(int64 starttime=Server->getTimeMS();) - - int64 target = pos + bsize; - - if (target>cloudfile_size) - { - target = cloudfile_size; - Server->Log("Write beyond target size. Reducing write size to " + convert(target - pos) + " from " + convert(bsize), LL_WARNING); - bsize = static_cast<_u32>(target - pos); - } - - const char* orig_buffer = buffer; - int64 orig_pos = pos; - if(ext_lock==nullptr) - { - lock_extent(lock, orig_pos, bsize, false); - } - - IScopedLock migration_lock(nullptr); - if (migrate_to_cf != nullptr) - { - migration_lock.relock(migrate_to_cf->mutex.get()); - migrate_to_cf->lock_extent(migration_lock, orig_pos, bsize, false); - } - - bool slog_needs_reset = false; - if (ext_lock == nullptr) - { - if (!slog_write(pos, buffer, bsize, slog_needs_reset)) - { - unlock_extent(lock, orig_pos, bsize, false); - return 0; - } - } - - EXTENSIVE_DEBUG_LOG(Server->Log("Writing " + convert(bsize) + " bytes at pos " + convert(pos), LL_DEBUG);) - - size_t set_bits = 0; - - while(posget(big_block_num); - - int64 block_diff = big_block_num - active_big_block; - if(has_big_block && active_big_block>=0 && ( block_diff<-1 || block_diff>1) - && old_big_blocks_bitmap->get(big_block_num) ) - { - add_fracture_big_block(big_block_num); - } - - int64 curr_block_size; - std::string key; - bool has_block; - if(has_big_block) - { - active_big_block = big_block_num; - curr_block_size = big_block_size; - key = big_block_key(pos/curr_block_size); - has_block = bitmap_has_big_block(pos/curr_block_size); - - new_big_blocks_bitmap->set(big_block_num, true); - } - else - { - curr_block_size = small_block_size; - key = small_block_key(pos/curr_block_size); - has_block = bitmap_has_small_block(pos/curr_block_size); - } - - int64 towrite = (std::min)((curr_block_size - pos%curr_block_size), target-pos); - - if (!new_block && !has_block) - { - bool waited = false; - while (in_write_retrieval.find(key) != in_write_retrieval.end()) - { - if (ext_lock == nullptr) - { - in_write_retrieval_cond->wait(&lock); - } - else - { - in_write_retrieval_cond->wait(ext_lock); - } - waited = true; - } - - if (waited) - { - //Update bitmap info (has_block) - continue; - } - - in_write_retrieval.insert(key); - } - - if(ext_lock==nullptr) - { - lock.relock(nullptr); - } - else - { - ext_lock->relock(nullptr); - } - - unsigned int flags = TransactionalKvStore::Flag::read_random; - - if (!can_block) - { - flags |= TransactionalKvStore::Flag::disable_throttling; - } - - if (is_metadata(pos, key)) - { - flags |= TransactionalKvStore::Flag::disable_memfiles; - } - - IFsFile* block = kv_store.get(key, - new_block ? TransactionalKvStore::BitmapInfo::NotPresent - : (has_block ? TransactionalKvStore::BitmapInfo::Present - : TransactionalKvStore::BitmapInfo::NotPresent) , - flags, curr_block_size); - - assert(block!=nullptr); - - int64 file_block_size = block->Size(); - if(file_block_size Resize(curr_block_size); - } - - _u32 written = 0; - size_t tries = 0; - while(written(towrite)) - { - int64 block_pos = pos%curr_block_size+written; - _u32 curr_towrite = static_cast<_u32>(towrite)-written; - - - /* Enable for Direct-IO TODO: buffer needs to be aligned - if(block_pos%io_alignment==0 - && curr_towrite%io_alignment==0) - { - written += block->Write(block_pos, buffer+written, curr_towrite); - } - else - { - written += WriteAligned(block, block_pos, buffer+written, curr_towrite); - }*/ - written += block->Write(block_pos, buffer + written, curr_towrite); - - if(written(towrite)) - { - std::string syserr = os_last_error_str(); - if (tries==4) - { - addSystemEvent("cache_err_retry", - "Error writing to block on cache", - "Error writing to block " + bytesToHex(reinterpret_cast(key.c_str()), - key.size()) + " at "+ block->getFilename()+". " + syserr, LL_ERROR); - } - Server->Log("Error writing to block "+bytesToHex(reinterpret_cast(key.c_str()), - key.size())+" at "+ block->getFilename()+". "+syserr+". Retrying...", LL_WARNING); - Server->wait(1000); - ++tries; - } - } - - kv_store.release(key); - - if(written!=towrite) - { - if (has_error) *has_error = true; - Server->Log("Error writing to block file", LL_ERROR); - addSystemEvent("cache_err", - "Error writing to block on cache", - "Error writing to block " + bytesToHex(reinterpret_cast(key.c_str()), - key.size()) + " at "+cachefs->getName()+".", LL_ERROR); - if (migrate_to_cf != nullptr) - { - migrate_to_cf->unlock_extent(migration_lock, orig_pos, bsize, false); - } - if(ext_lock==nullptr) - { - lock.relock(mutex.get()); - unlock_extent(lock, orig_pos, bsize, false); - } - else - { - ext_lock->relock(mutex.get()); - } - - if (!new_block && !has_block) - { - in_write_retrieval.erase(key); - in_write_retrieval_cond->notify_all(); - } - - return 0; - } - - if(ext_lock==nullptr) - { - lock.relock(mutex.get()); - } - else - { - ext_lock->relock(mutex.get()); - } - - set_bits += bitmap->set_range(pos / block_size, div_up(pos + written, block_size), true); - - if (!new_block && !has_block) - { - in_write_retrieval.erase(key); - in_write_retrieval_cond->notify_all(); - } - - buffer += written; - pos += written; - - written_bytes += written; - } - - used_bytes+=set_bits*block_size; - - EXTENSIVE_DEBUG_LOG(Server->Log("Done. " + convert(Server->getTimeMS()-starttime)+"ms", LL_DEBUG);) - - if (migrate_to_cf != nullptr) - { - if (orig_pos < migration_copy_max) - { - bool local_has_error = false; - if (migrate_to_cf->WriteInt(orig_pos, orig_buffer, bsize, false, &migration_lock, can_block, &local_has_error) != bsize - || local_has_error) - { - migration_has_error = true; - } - } - migrate_to_cf->unlock_extent(migration_lock, orig_pos, bsize, false); - } - - if(ext_lock==nullptr) - { - unlock_extent(lock, orig_pos, bsize, false); - } - - if (slog_needs_reset) - { - if (!Flush(false, true)) - { - abort(); - } - } - - return bsize; -} - -_u32 CloudFile::WriteAligned(IFile* block, int64 pos, const char* buffer, _u32 towrite) -{ - EXTENSIVE_DEBUG_LOG(Server->Log("Fixing alignment for write at pos "+convert(pos)+" size "+convert(towrite), LL_DEBUG);) - - int64 startpos = (pos/io_alignment)*io_alignment; - _u32 alignstartadd = static_cast<_u32>(pos-startpos); - _u32 alignendadd = io_alignment - (towrite+alignstartadd)%io_alignment; - _u32 alignwrite = towrite + alignstartadd + alignendadd; - char *buf = reinterpret_cast(aligned_alloc(io_alignment, alignwrite)); - if(buf==nullptr) - { - return 0; - } - _u32 r = block->Read(startpos, buf, alignwrite); - - if(rWrite(startpos, buf, alignwrite); - - free(buf); - - if(wtowrite) - { - w=towrite; - } - - return w; -} - -bool CloudFile::Seek( _i64 spos ) -{ - if (is_async) - abort(); - - cf_pos = spos; - return true; -} - -_i64 CloudFile::Size( void ) -{ -#ifdef HAS_ASYNC - if (is_async) - { - CWData wdata; - wdata.addChar(queue_cmd_get_size); - - CRData rdata; - get_async_msg(wdata, rdata); - - int64 ret; - bool b = rdata.getVarInt(&ret); - assert(b); - return ret; - } -#endif - - IScopedLock lock(mutex.get()); - - lock_extent(lock, 0, 0, false); - - int64 ret = cloudfile_size; - - unlock_extent(lock, 0, 0, false); - - return ret; -} - -#ifdef HAS_ASYNC -fuse_io_context::io_uring_task<_i64> CloudFile::SizeAsync(void) -{ - size_t lock_idx = co_await lock_extent_async(0, 0, false); - - int64 ret = cloudfile_size; - - unlock_extent_async(0, 0, false, lock_idx); - - co_return ret; -} -#endif - -_i64 CloudFile::RealSize(void) -{ - return getUsedBytes(); -} - -bool CloudFile::PunchHole( _i64 spos, _i64 size ) -{ - if (is_async) - abort(); - - IScopedLock lock(mutex.get()); - - EXTENSIVE_DEBUG_LOG(int64 starttime=Server->getTimeMS();) - - if (spos > cloudfile_size) - { - Server->Log("Punch start position " + convert(spos) + " bigger than cloud drive file " + convert(cloudfile_size), LL_ERROR); - return false; - } - - int64 target = spos + size; - if (target>cloudfile_size) - { - target = cloudfile_size; - size = target - spos; - } - - int64 orig_spos = spos; - lock_extent(lock, orig_spos, size, true); - - EXTENSIVE_DEBUG_LOG(Server->Log("Punching hole " + convert(size) + " bytes at pos " + convert(spos), LL_DEBUG);) - - std::vector > del_blocks; - - int64 lock_start = cloudfile_size; - int64 lock_end = 0; - while(sposget(big_block_num); - - int64 curr_block_size; - std::string key; - bool has_block; - if(has_big_block) - { - curr_block_size = big_block_size; - key = big_block_key(spos/curr_block_size); - has_block = bitmap_has_big_block(spos/curr_block_size); - } - else - { - curr_block_size = small_block_size; - key = small_block_key(spos/curr_block_size); - has_block = bitmap_has_small_block(spos/curr_block_size); - } - - int64 towrite = (std::min)((curr_block_size - spos%curr_block_size), target-spos); - - if(has_block) - { - size_t set_bits = bitmap->set_range(div_up(spos, block_size), (spos+towrite)/block_size, false); - - used_bytes-=set_bits*block_size; - - if(has_big_block) - { - has_block = bitmap_has_big_block(spos/curr_block_size); - } - else - { - has_block = bitmap_has_small_block(spos/curr_block_size); - } - - if(!has_block) - { - lock_start = (std::min)(lock_start, roundDown(spos, curr_block_size)); - lock_end = (std::max)(lock_end, spos%curr_block_size==0 ? spos+curr_block_size : roundUp(spos, curr_block_size)); - del_blocks.push_back(std::make_pair(spos / curr_block_size, has_big_block)); - } - } - - spos+=towrite; - } - - unlock_extent(lock, orig_spos, size, true); - - - if (!del_blocks.empty()) - { - //We need to lock the whole big/small block before deleting it - assert(lock_end > lock_start); - lock_extent(lock, lock_start, lock_end - lock_start, true); - for (auto& it : del_blocks) - { - bool has_big_block = it.second; - - //If small/big block change it got fractured during unlock and has data - if (has_big_block && !big_blocks_bitmap->get(it.first)) - { - Server->Log("Block " + convert(it.first) + " changed to small block while trying to delete it"); - continue; - } - - std::string key; - bool has_block; - int64 big_block_num; - if (has_big_block) - { - key = big_block_key(it.first); - has_block = bitmap_has_big_block(it.first); - big_block_num = it.first; - - assert(lock_start <= it.first*big_block_size); - assert(lock_end >= (it.first + 1)*big_block_size); - } - else - { - key = small_block_key(it.first); - has_block = bitmap_has_small_block(it.first); - big_block_num = (it.first*small_block_size) / big_block_size; - - assert(lock_start <= it.first*small_block_size); - assert(lock_end >= (it.first + 1)*small_block_size); - } - - if (!has_block) - { - { - lock.relock(nullptr); - - kv_store.del(key); - - lock.relock(mutex.get()); - } - if (!has_big_block && - !big_blocks_bitmap->get(big_block_num) ) - { - consider_big_blocks.insert(big_block_num); - } - } - else - { - Server->Log("Block " + convert(it.first) + " present when trying to delete it big_block=" + convert(has_big_block)); - } - } - for (int64 it : consider_big_blocks) - { - if (!bitmap_has_big_block(it)) - { - old_big_blocks_bitmap->set(it, false); - big_blocks_bitmap->set(it, true); - new_big_blocks_bitmap->set(it, false); - } - } - consider_big_blocks.clear(); - unlock_extent(lock, lock_start, lock_end - lock_start, true); - } - - EXTENSIVE_DEBUG_LOG(Server->Log("Done. " + convert(Server->getTimeMS()-starttime)+"ms", LL_DEBUG);) - - return true; -} - -#ifdef HAS_ASYNC -fuse_io_context::io_uring_task CloudFile::PunchHoleAsync(fuse_io_context& io, _i64 spos, _i64 size) -{ - EXTENSIVE_DEBUG_LOG(int64 starttime = Server->getTimeMS();) - - if (spos > cloudfile_size) - { - Server->Log("Punch start position " + convert(spos) + " bigger than cloud drive file " + convert(cloudfile_size), LL_ERROR); - co_return 1; - } - - int64 target = spos + size; - if (target > cloudfile_size) - { - target = cloudfile_size; - size = target - spos; - } - - int64 orig_spos = spos; - size_t lock_idx = co_await lock_extent_async(orig_spos, size, true); - - EXTENSIVE_DEBUG_LOG(Server->Log("Punching hole " + convert(size) + " bytes at pos " + convert(spos), LL_DEBUG);) - - std::vector > del_blocks; - - int64 lock_start = cloudfile_size; - int64 lock_end = 0; - while (spos < target) - { - int64 big_block_num = spos / big_block_size; - bool has_big_block = co_await big_blocks_bitmap->get_async(io, big_block_num); - - int64 curr_block_size; - std::string key; - bool has_block; - if (has_big_block) - { - curr_block_size = big_block_size; - key = big_block_key(spos / curr_block_size); - has_block = co_await bitmap_has_big_block_async(io, spos / curr_block_size); - } - else - { - curr_block_size = small_block_size; - key = small_block_key(spos / curr_block_size); - has_block = co_await bitmap_has_small_block_async(io, spos / curr_block_size); - } - - int64 towrite = (std::min)((curr_block_size - spos % curr_block_size), target - spos); - - if (has_block) - { - size_t set_bits = co_await bitmap->set_range_async(io, div_up(spos, block_size), (spos + towrite) / block_size, false); - - used_bytes -= set_bits * block_size; - - if (has_big_block) - { - has_block = co_await bitmap_has_big_block_async(io, spos / curr_block_size); - } - else - { - has_block = co_await bitmap_has_small_block_async(io, spos / curr_block_size); - } - - if (!has_block) - { - lock_start = (std::min)(lock_start, roundDown(spos, curr_block_size)); - lock_end = (std::max)(lock_end, spos % curr_block_size == 0 ? spos + curr_block_size : roundUp(spos, curr_block_size)); - del_blocks.push_back(std::make_pair(spos / curr_block_size, has_big_block)); - } - } - - spos += towrite; - } - - unlock_extent_async(orig_spos, size, true, lock_idx); - - - if (!del_blocks.empty()) - { - std::set consider_big_blocks; - //We need to lock the whole big/small block before deleting it - assert(lock_end > lock_start); - lock_idx = co_await lock_extent_async(lock_start, lock_end - lock_start, true); - for (auto it : del_blocks) - { - bool has_big_block = it.second; - int64 block = it.first; - - //If small/big block change we didn't lock enough/wrong - if (has_big_block && !(co_await big_blocks_bitmap->get_async(io, block))) - { - Server->Log("Block " + convert(block) + " changed to small block while trying to delete it"); - continue; - } - - std::string key; - bool has_block; - int64 big_block_num; - if (has_big_block) - { - key = big_block_key(block); - has_block = co_await bitmap_has_big_block_async(io, block); - big_block_num = block; - - assert(lock_start <= block * big_block_size); - assert(lock_end >= (block + 1) * big_block_size); - } - else - { - key = small_block_key(block); - has_block = co_await bitmap_has_small_block_async(io, block); - big_block_num = (block * small_block_size) / big_block_size; - - assert(lock_start <= block * small_block_size); - assert(lock_end >= (block + 1) * small_block_size); - } - - if (!has_block) - { - { - co_await kv_store.del_async(io, key); - } - - if (!has_big_block && - !(co_await big_blocks_bitmap->get_async(io, big_block_num)) ) - { - if (curr_consider_big_blocks.find(big_block_num) - == curr_consider_big_blocks.end()) - { - consider_big_blocks.insert(big_block_num); - } - } - } - else - { - Server->Log("Block " + convert(block) + " present when trying to delete it big_block=" + convert(has_big_block)); - } - } - - if (!consider_big_blocks.empty()) - { - curr_consider_big_blocks = std::move(consider_big_blocks); - consider_big_blocks = std::set(); - for (int64 it : curr_consider_big_blocks) - { - if (!(co_await bitmap_has_big_block_async(io, it))) - { - old_big_blocks_bitmap->set(it, false); - co_await big_blocks_bitmap->set_async(io, it, true); - new_big_blocks_bitmap->set(it, false); - } - } - curr_consider_big_blocks.clear(); - } - - unlock_extent_async(lock_start, lock_end - lock_start, true, lock_idx); - } - - EXTENSIVE_DEBUG_LOG(Server->Log("Done. " + convert(Server->getTimeMS() - starttime) + "ms", LL_DEBUG);) - - co_return 0; -} -#endif //HAS_ASYNC - -std::string CloudFile::getFilename( void ) -{ - return "CloudFile"; -} - -std::string CloudFile::block_key(int64 blocknum) -{ - std::string key; - if(blocknum(blocknum); - memcpy(&key[1], &bnum, sizeof(bnum)); - } - else if(blocknum(blocknum); - memcpy(&key[1], &bnum, sizeof(bnum)); - } - else if(blocknum(blocknum); - memcpy(&key[1], &bnum, sizeof(bnum)); - } - else - { - key.resize(sizeof(int64)+1); - memcpy(&key[1], &blocknum, sizeof(blocknum)); - } - return key; -} - -int64 CloudFile::block_key_rev(const std::string & data) -{ - if (data.size() == sizeof(unsigned char) + 1) - { - return data[1]; - } - else if (data.size() == sizeof(unsigned short) + 1) - { - unsigned short ret; - memcpy(&ret, &data[1], sizeof(ret)); - return ret; - } - else if (data.size() == sizeof(unsigned int) + 1) - { - unsigned int ret; - memcpy(&ret, &data[1], sizeof(ret)); - return ret; - } - else if (data.size() == sizeof(int64) + 1) - { - int64 ret; - memcpy(&ret, &data[1], sizeof(ret)); - return ret; - } - else - { - return -1; - } -} - -std::string CloudFile::big_block_key( int64 blocknum ) -{ - std::string key = block_key(blocknum); - key[0]='b'; - return key; -} - -std::string CloudFile::small_block_key( int64 blocknum ) -{ - std::string key = block_key(blocknum); - key[0]='s'; - return key; -} - -std::string CloudFile::hex_big_block_key(int64 bytenum) -{ - return bytesToHex(big_block_key(bytenum/big_block_size)); -} - -std::string CloudFile::hex_small_block_key(int64 bytenum) -{ - return bytesToHex(small_block_key(bytenum/small_block_size)); -} - -int CloudFile::Congested() -{ - int ret = 0; - if (kv_store.is_congested()) - ret |= 1; - - IScopedLock lock(mutex.get()); - if (is_flushing) - ret |= 2; - return ret; -} - -#ifdef HAS_ASYNC -int CloudFile::CongestedAsync() -{ - int ret = 0; - if (kv_store.is_congested_async()) - ret |= 1; - if (is_flushing) - ret |= 2; - return ret; -} -#endif - -std::string CloudFile::get_raid_groups() -{ -#ifdef HAS_LOCAL_BACKEND - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != NULL) - { - KvStoreBackendLocal* backend = dynamic_cast(frontend->getBackend()); - if (backend != NULL) - { - return backend->get_raid_groups(); - } - } -#endif - - return std::string(); -} - -std::string CloudFile::scrub_stats() -{ - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != nullptr) - { - return frontend->scrub_stats(); - } - - return std::string(); -} - -void CloudFile::start_scrub(ScrubAction action) -{ - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != nullptr) - { - frontend->start_scrub(action, std::string()); - } -} - -void CloudFile::stop_scrub() -{ - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != nullptr) - { - frontend->stop_scrub(); - } -} - -bool CloudFile::add_disk(const std::string & path) -{ -#ifdef HAS_LOCAL_BACKEND - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != NULL) - { - KvStoreBackendLocal* backend = dynamic_cast(frontend->getBackend()); - if (backend != NULL) - { - return backend->add_location(path); - } - } -#endif - - return false; -} - -bool CloudFile::remove_disk(const std::string & path, bool completely) -{ -#ifdef HAS_LOCAL_BACKEND - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != NULL) - { - KvStoreBackendLocal* backend = dynamic_cast(frontend->getBackend()); - if (backend != NULL) - { - return backend->remove_location(path, completely); - } - } -#endif - - return false; -} - -std::string CloudFile::disk_error_info() -{ -#ifdef HAS_LOCAL_BACKEND - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != NULL) - { - KvStoreBackendLocal* backend = dynamic_cast(frontend->getBackend()); - if (backend != NULL) - { - return backend->disk_error_info(); - } - } -#endif - - return std::string(); -} - -int64 CloudFile::current_total_size() -{ -#ifdef HAS_LOCAL_BACKEND - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != NULL) - { - KvStoreBackendLocal* backend = dynamic_cast(frontend->getBackend()); - if (backend != NULL) - { - int64 total_space, free_space; - if (backend->get_space_info(total_space, free_space)) - { - return total_space; - } - } - } -#endif - - return -1; -} - -int64 CloudFile::current_free_space() -{ -#ifdef HAS_LOCAL_BACKEND - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != NULL) - { - KvStoreBackendLocal* backend = dynamic_cast(frontend->getBackend()); - if (backend != NULL) - { - int64 total_space, free_space; - if (backend->get_space_info(total_space, free_space)) - { - return free_space; - } - } - } -#endif - - return -1; -} - -bool CloudFile::set_target_failure_probability(double t) -{ -#ifdef HAS_LOCAL_BACKEND - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != NULL) - { - KvStoreBackendLocal* backend = dynamic_cast(frontend->getBackend()); - if (backend != NULL) - { - return backend->set_target_failure_probability(t); - } - } -#endif - - return false; -} - -bool CloudFile::set_target_overhead(double t) -{ -#ifdef HAS_LOCAL_BACKEND - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != NULL) - { - KvStoreBackendLocal* backend = dynamic_cast(frontend->getBackend()); - if (backend != NULL) - { - return backend->set_target_overhead(t); - } - } -#endif - - return false; -} - -std::string CloudFile::get_scrub_position() -{ - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != nullptr) - { - return frontend->scrub_position(); - } - return std::string(); -} - -int64 CloudFile::get_memfile_bytes() -{ - return kv_store.get_memfile_bytes(); -} - -int64 CloudFile::get_submitted_memfile_bytes() -{ - return kv_store.get_submitted_memfile_bytes(); -} - -void CloudFile::operator()() -{ - if (is_async) - abort(); - - std::vector buf; - buf.resize(small_block_size); - - IScopedLock lock(mutex.get()); - while (!exit_thread) - { - int64 wtime = 1000; - int64 ctime = Server->getTimeMS(); - for (auto it = fracture_big_blogs.begin(); it != fracture_big_blogs.end();) - { - if (it->second <= ctime) - { - int64 big_block_num = it->first; - Server->Log("Fracturing big block " + convert(big_block_num) + "...", LL_INFO); - - int64 orig_start = big_block_num*big_block_size; - int64 stop = big_block_num*big_block_size + big_block_size; - - lock_extent(lock, orig_start, stop - orig_start, true); - - bool has_big_block = big_blocks_bitmap->get(big_block_num); - if (has_big_block - && bitmap_has_big_block(big_block_num) - && old_big_blocks_bitmap->get(big_block_num)) - { - bool success = true; - for (int64 start = orig_start; start < stop; start += small_block_size) - { - _u32 toread = static_cast<_u32>((std::min)(small_block_size, cloudfile_size - start)); - - if (!bitmap_has_small_block(start / small_block_size)) - { - continue; - } - - bool has_error; - big_blocks_bitmap->set(big_block_num, true); - if (ReadInt(start, buf.data(), toread, &lock, 0, &has_error) != toread) - { - Server->Log("Error reading while fracturing big block", LL_ERROR); - success = false; - break; - } - - big_blocks_bitmap->set(big_block_num, false); - if (WriteInt(start, buf.data(), toread, true, &lock, true, &has_error) != toread) - { - Server->Log("Error writing while fracturing big block", LL_ERROR); - success = false; - break; - } - - if (toread < small_block_size) - { - break; - } - } - - big_blocks_bitmap->set(big_block_num, !success); - - if (success) - { - lock.relock(nullptr); - - kv_store.del(big_block_key(big_block_num)); - - lock.relock(mutex.get()); - } - - unlock_extent(lock, orig_start, stop - orig_start, true); - } - else - { - unlock_extent(lock, orig_start, stop - orig_start, true); - } - - auto it_curr = it; - ++it; - fracture_big_blogs.erase(it_curr); - - ctime = Server->getTimeMS(); - } - else - { - wtime = (std::min)(wtime, it->second - ctime); - ++it; - } - } - - thread_cond->wait(&lock, static_cast(wtime)); - } -} - -#ifdef HAS_ASYNC -fuse_io_context::io_uring_task_discard CloudFile::run_async(fuse_io_context& io) -{ - run_async_queue(io); - - if (!io.has_fuse_io()) - co_return -1; - - fuse_io_context::FuseIoVal fuse_io = io.get_fuse_io(); - fuse_io->write_fuse = false; - - { - fuse_io_context::ScratchBufInit* scratch_init = reinterpret_cast(fuse_io->scratch_buf); - - Server->Log("Setting pipe fd " + convert(scratch_init->pipe_out) + " size " + PrettyPrintBytes(small_block_size)); - int rc = fcntl(scratch_init->pipe_out, F_SETPIPE_SZ, small_block_size); - if (rc < 0) - { - Server->Log("Error setting pipe size to " + std::to_string(small_block_size) + ". " + os_last_error_str(), LL_ERROR); - co_return -1; - } - } - - while (!exit_thread) - { - int64 wtime = 1000; - int64 ctime = Server->getTimeMS(); - for (auto it = fracture_big_blogs.begin(); it != fracture_big_blogs.end();) - { - if (it->second <= ctime) - { - int64 big_block_num = it->first; - Server->Log("Fracturing big block " + convert(big_block_num) + "...", LL_INFO); - - int64 orig_start = big_block_num * big_block_size; - int64 stop = big_block_num * big_block_size + big_block_size; - - size_t lock_idx = co_await lock_extent_async(orig_start, stop - orig_start, true); - - bool has_big_block = co_await big_blocks_bitmap->get_async(io, big_block_num); - if (has_big_block - && (co_await bitmap_has_big_block_async(io, big_block_num)) - && old_big_blocks_bitmap->get(big_block_num)) - { - bool success = true; - for (int64 start = orig_start; start < stop; start += small_block_size) - { - _u32 toread = static_cast<_u32>((std::min)(small_block_size, cloudfile_size - start)); - - if (!(co_await bitmap_has_small_block_async(io, start / small_block_size))) - { - continue; - } - - co_await big_blocks_bitmap->set_async(io, big_block_num, true); - int rc = co_await ReadAsync(io, fuse_io.get(), start, toread, 0, true); - if (rc<0 || rc != toread) - { - Server->Log("Error reading while fracturing big block. Rc=" + convert(rc) + " Expected=" + convert(toread), LL_ERROR); - co_await empty_pipe(io, fuse_io.get()); - success = false; - break; - } - - co_await big_blocks_bitmap->set_async(io, big_block_num, false); - rc = co_await WriteAsync(io, fuse_io.get(), start, toread, true, true, true); - if (rc<0 || rc != toread) - { - co_await empty_pipe(io, fuse_io.get()); - Server->Log("Error writing while fracturing big block. Rc="+convert(rc)+" Expected="+convert(toread), LL_ERROR); - success = false; - break; - } - - if (toread < small_block_size) - { - break; - } - } - - big_blocks_bitmap->set(big_block_num, !success); - - if (success) - { - co_await kv_store.del_async(io, big_block_key(big_block_num)); - } - - unlock_extent_async(orig_start, stop - orig_start, true, lock_idx); - } - else - { - unlock_extent_async(orig_start, stop - orig_start, true, lock_idx); - } - - auto it_curr = it; - ++it; - fracture_big_blogs.erase(it_curr); - - ctime = Server->getTimeMS(); - } - else - { - wtime = (std::min)(wtime, it->second - ctime); - ++it; - } - } - - co_await io.sleep_ms(wtime); - } - - co_return 0; -} -#endif //HAS_ASYNC - -void CloudFile::run_cd_fracture() -{ - fracture_big_blogs_ticket = Server->getThreadPool()->execute(this, "cd fracture"); -} - -bool CloudFile::start_raid_defrag(const std::string& settings) -{ - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != nullptr) - { - return frontend->start_defrag(settings); - } - - return false; -} - -bool CloudFile::is_background_worker_enabled() -{ - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != nullptr) - { - return frontend->is_background_worker_enabled(); - } - return false; -} - -bool CloudFile::is_background_worker_running() -{ - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != nullptr) - { - return frontend->is_background_worker_running(); - } - return false; -} - -void CloudFile::enable_background_worker(bool b) -{ - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != nullptr) - { - return frontend->enable_background_worker(b); - } +**************************************************************************/ +#ifndef _WIN32 +#include +#endif +#include "CloudFile.h" +#include "../stringtools.h" +#include "../common/data.h" +#include "../urbackupcommon/os_functions.h" +#include "KvStoreFrontend.h" +#ifdef HAS_LOCAL_BACKEND +#include "KvStoreBackendLocal.h" +#endif +#include "../Interface/ThreadPool.h" +#include "../Interface/Pipe.h" +#include "../urbackupcommon/events.h" +#include "../common/data.h" +#ifdef HAS_MOUNT_SERVICE +#include "MountServiceClient.h" +#endif +#include "../Interface/SettingsReader.h" +#include "ICompressEncrypt.h" +#ifdef HAS_LOCAL_CACHEFS +#include "PassThroughFileSystem.h" +#endif +#include "Auto.h" +#ifdef WITH_MIGRATION +#include "KvStoreBackendS3.h" +#include "KvStoreBackendAzure.h" +#endif +#include "../urbackupcommon/os_functions.h" +#ifdef WITH_SLOG +#include "../common/crc.h" +#endif +#include "../urbackupcommon/json.h" +#ifdef HAS_ASYNC +#include "fuse_kernel.h" +#endif +#ifdef HAS_LINUX_MEMORY_FILE +#include "LinuxMemFile.h" +#endif + +#if !defined(NO_JEMALLOC) && !defined(_WIN32) +#include +#endif + +#ifndef _WIN32 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#else +typedef int64 ssize_t; +#define aligned_alloc(allign, size) malloc(size) +int fcntl(int fd, int cmd, int val) +{ + return -1; +} +#define F_SETPIPE_SZ 1 +int pipe2(int p[2], int flags) +{ + return -1; +} +#define O_NONBLOCK 1 +#define O_CLOEXEC 2 +int eventfd(unsigned int initval, int flags) +{ + return -1; +} +#define EFD_CLOEXEC 1 +#define EFD_SEMAPHORE 2 +int eventfd_write(int fd, int value) { + return -1; +} +#define POSIX_FADV_DONTNEED 1 +#endif + +#define EXTENSIVE_DEBUG_LOG(x) + +#ifndef PR_SET_IO_FLUSHER +#define PR_SET_IO_FLUSHER 57 +#endif + +#include +#include +#include +#include +#include +#include + +const int64 min_cachesize = 1LL * 1024* 1024* 1024LL; // 1GB +const int64 min_free_size = 20LL * 1024* 1024* 1024LL; // 20GB +const int64 critical_free_size = 1000LL * 1024 * 1024LL; //1GB +const int64 throttle_free_size = 5000LL * 1024 * 1024LL; //5GB + +const int64 comp_start_limit = 20LL * 1024* 1024* 1024LL; // 20GB +const float comp_percent = 0.5; // 50% + +const int64 big_block_size = 20 *1024 *1024; // 20MB +const int64 small_block_size = 512 *1024; // 512KB + +const int64 block_size = 4096; + +const int64 slog_kernel_buffer_size = 16 * 1024 * 1024; + +std::string pbkdf2_sha256(const std::string& key); +bool is_automount_finished(); + +#ifndef _WIN32 +bool flush_dev(std::string dev_fn) +{ + int fd = open64(dev_fn.c_str(), O_RDWR | O_LARGEFILE | O_CLOEXEC); + + if (fd == -1) + { + Server->Log("Error opening device file " + dev_fn + ". " + os_last_error_str(), LL_ERROR); + return false; + } + + if (fsync(fd) == -1) + { + Server->Log("Error fsyncing device file " + dev_fn + ". " + os_last_error_str(), LL_ERROR); + close(fd); + return false; + } + + if (ioctl(fd, BLKFLSBUF, 0) != 0) + { + Server->Log("Error flushing device file " + dev_fn + ". " + os_last_error_str(), LL_ERROR); + close(fd); + return false; + } + + close(fd); + return true; +} +int get_file_fd(IFsFile::os_file_handle h) +{ + return h; +} +#else +bool flush_dev(std::string dev_fn) +{ + return false; +} +#define SPLICE_F_NONBLOCK 256 +#define SPLICE_F_MOVE 128 +int get_file_fd(IFsFile::os_file_handle h) +{ + return reinterpret_cast(h); +} +#endif + +namespace +{ + const char queue_cmd_resize = 1; + const char queue_cmd_get_size = 2; + const char queue_cmd_meminfo = 3; + + enum { + IOPRIO_CLASS_NONE, + IOPRIO_CLASS_RT, + IOPRIO_CLASS_BE, + IOPRIO_CLASS_IDLE, + }; + +#define IOPRIO_CLASS_SHIFT 13 + const _u16 io_prio_val = 2 | IOPRIO_CLASS_BE << IOPRIO_CLASS_SHIFT; + + //divide rounding up + int64 div_up(int64 num, int64 div) + { + if(num%div == 0) + { + return num/div; + } + else + { + return num/div + 1; + } + } + + int64 roundUp(int64 numToRound, int64 multiple) + { + return ((numToRound + multiple - 1) / multiple) * multiple; + } + + int64 roundDown(int64 numToRound, int64 multiple) + { + return ((numToRound / multiple) * multiple); + } + + void retryWait(size_t n) + { + unsigned int waittime = (std::min)(static_cast(1000.*pow(2., static_cast(n))), (unsigned int)30*60*1000); //30min + if(n>20) + { + waittime = (unsigned int)30*60*1000; + } + Server->Log("Waiting "+PrettyPrintTime(waittime)); + Server->wait(waittime); + } + + bool writeZeroes(int64 offset, IFile* file, int64 num, int set_val) + { + static char buf[4096] = {}; + + while(num>0) + { + int64 towrite = (std::min)((int64)4096, num); + if(file->Write(offset, buf, static_cast<_u32>(towrite))!=towrite) + { + return false; + } + num-=towrite; + offset+=towrite; + } + return true; + } + +#ifndef _WIN32 + void doublefork_writestring(const std::string& str, const std::string& file) + { + pid_t pid1; + pid1 = fork(); + if (pid1 == 0) + { + setsid(); + pid_t pid2; + pid2 = fork(); + if (pid2 == 0) + { + writestring(str, file); + exit(0); + } + else + { + exit(1); + } + } + else + { + int status; + waitpid(pid1, &status, 0); + } + } +#else + void doublefork_writestring(const std::string& str, const std::string& file) + { + writestring(str, file); + } +#endif + + class FileBitmap + { + public: + FileBitmap(IFile* backing_file, int64 n, bool init_set) + : backing_file(backing_file) + { + resize(n, init_set); + } + + void resize(int64 n, bool init_set) + { + total_size=n; + bitmap_size = static_cast(n/8 + (n%8==0 ? 0 : 1)); + + if(backing_file->Size()(bitmap_size)) + { + while(!writeZeroes(backing_file->Size(), backing_file, bitmap_size-backing_file->Size(), init_set?255:0)) + { + Server->Log("Error resizing bitmap file. Retrying...", LL_ERROR); + Server->wait(1000); + } + } + + cache.resize(bitmap_size); + + backing_file->Seek(0); + size_t pos=0; + while(posRead(reinterpret_cast(&cache[pos]), static_cast<_u32>(toread))!=toread) + { + throw std::runtime_error("Error reading from backing bitmap file"); + } + + pos+=toread; + } + } + + void set(int64 i, bool v) + { + size_t bitmap_byte=(size_t)(i/8); + size_t bitmap_bit=i%8; + + unsigned char b = cache[bitmap_byte]; + + if(v==true) + b=b|(1<<(7-bitmap_bit)); + else + b=b&(~(1<<(7-bitmap_bit))); + + cache[bitmap_byte] = b; + } + + size_t set_range(int64 start, int64 end, bool v) + { + size_t set_bits = 0; + for(;start0); + + return has_bit; + } + + bool flush() + { + while(backing_file->Write(0, reinterpret_cast(cache.data()), + static_cast<_u32>(cache.size()))!=cache.size()) + { + Server->Log("Error writing to backing bitmap file. Retrying...", LL_WARNING); + Server->wait(1000); + } + + return true; + } + + size_t count_bits() + { + size_t set_count = 0; + for(int64 i=0;i cache; + }; + + const size_t bitmap_block_size=4096; + + int64 bitmap_assert_limit = 1*1024*1024; + + class SparseFileBitmap + { + + public: + SparseFileBitmap(IFsFile* backing_file, int64 n, bool init_set, + size_t bitmap_cache_items) + : backing_file(backing_file), + bitmap_cache_items(bitmap_cache_items), + async_evict_skip_block(std::string::npos) + { + resize(n, init_set); + } + + ~SparseFileBitmap() + { + flush(); + } + + void resize(int64 n, bool init_set) + { + flush(); + + total_size=n; + bitmap_size = static_cast(n/8 + (n%8==0 ? 0 : 1)); + + bitmap_size += bitmap_block_size - bitmap_size%bitmap_block_size; + + if(backing_file->Size()(bitmap_size)) + { + while(!writeZeroes(backing_file->Size(), backing_file, bitmap_size-backing_file->Size(), init_set?255:0)) + { + Server->Log("Error resizing bitmap file", LL_ERROR); + Server->wait(1000); + } + } + } + + void set(int64 i, bool v) + { + size_t bitmap_byte=(size_t)(i/8); + size_t bitmap_bit=i%8; + + char* entry = cache_entry(bitmap_byte); + + unsigned char b = entry[bitmap_byte%bitmap_block_size]; + + if(v==true) + b=b|(1<<(7-bitmap_bit)); + else + b=b&(~(1<<(7-bitmap_bit))); + + entry[bitmap_byte%bitmap_block_size] = b; + } + +#ifdef HAS_ASYNC + fuse_io_context::io_uring_task set_async(fuse_io_context& io, int64 i, bool v) + { + size_t bitmap_byte = (size_t)(i / 8); + size_t bitmap_bit = i % 8; + + char* entry = co_await cache_entry_async(io, bitmap_byte); + + unsigned char b = entry[bitmap_byte % bitmap_block_size]; + + if (v == true) + b = b | (1 << (7 - bitmap_bit)); + else + b = b & (~(1 << (7 - bitmap_bit))); + + entry[bitmap_byte % bitmap_block_size] = b; + + co_return 0; + } +#endif + + size_t set_range(int64 start, int64 end, bool v) + { + size_t set_bits = 0; + for(;start set_range_async(fuse_io_context& io, int64 start, int64 end, bool v) + { + size_t set_bits = 0; + for (; start < end; ++start) + { + if (co_await get_async(io, start) != v) + { + co_await set_async(io, start, v); + ++set_bits; + } + } + co_return set_bits; + } +#endif + + char getb(int64 i) + { + size_t bitmap_byte = (size_t)(i / 8); + char* entry = cache_entry(bitmap_byte); + return entry[bitmap_byte%bitmap_block_size]; + } + + char* getPtr(int64 i, size_t& bsize) + { + size_t bitmap_byte = (size_t)(i / 8); + char* entry = cache_entry(bitmap_byte); + bsize = bitmap_block_size - bitmap_byte%bitmap_block_size; + return &entry[bitmap_byte%bitmap_block_size]; + } + +#ifdef HAS_ASYNC + fuse_io_context::io_uring_task > getPtrAsync(fuse_io_context& io, int64 i) + { + size_t bitmap_byte = (size_t)(i / 8); + char* entry = co_await cache_entry_async(io, bitmap_byte); + size_t bsize = bitmap_block_size - bitmap_byte % bitmap_block_size; + co_return std::make_pair(&entry[bitmap_byte % bitmap_block_size], bsize); + } +#endif + + bool get(int64 i) + { + size_t bitmap_byte=(size_t)(i/8); + size_t bitmap_bit=i%8; + + char* entry = cache_entry(bitmap_byte); + + unsigned char b = entry[bitmap_byte%bitmap_block_size]; + + bool has_bit=((b & (1<<(7-bitmap_bit)))>0); + + return has_bit; + } + +#ifdef HAS_ASYNC + fuse_io_context::io_uring_task get_async(fuse_io_context& io, int64 i) + { + size_t bitmap_byte = (size_t)(i / 8); + size_t bitmap_bit = i % 8; + + char* entry = co_await cache_entry_async(io, bitmap_byte); + + unsigned char b = entry[bitmap_byte % bitmap_block_size]; + + bool has_bit = ((b & (1 << (7 - bitmap_bit))) > 0); + + co_return has_bit; + } +#endif + + bool flush() + { + while(!cache.empty()) + { + evict_one(); + } + + return true; + } + +#ifdef HAS_ASYNC + fuse_io_context::io_uring_task flush_async(fuse_io_context& io) + { + while (!cache.empty()) + { + if (!co_await evict_one_async(io)) + { + co_return; + } + } + } +#endif + + size_t count_bits_old() + { + size_t set_count = 0; + for (int64 i = 0; i < total_size;++i) + { + if (get(i)) + { + ++set_count; + } + } + return set_count; + } + + size_t count_bits() + { + size_t set_count = 0; + for(int64 i=0;ibitmap_assert_limit || set_count == count_bits_old()); + + return set_count; + } + + bool get_range_old( int64 start, int64 end ) + { + for(;start get_range_old_async(fuse_io_context& io, int64 start, int64 end) + { + for (; start < end; ++start) + { + if (co_await get_async(io, start)) + { + co_return true; + } + } + co_return false; + } +#endif + + bool get_range( int64 start, int64 end ) + { + if(end-start<=8) + { + return get_range_old(start, end); + } + + int64 orig_start = start; + + for(;startbitmap_assert_limit || get_range_old(orig_start, end)); + return true; + } + } + + for(;start+8bitmap_assert_limit || get_range_old(orig_start, end)); + return true; + } + start+=8; + ++ptr; + } + } + + for(;startbitmap_assert_limit || get_range_old(orig_start, end)); + return true; + } + } + + assert(total_size>bitmap_assert_limit || !get_range_old(orig_start, end)); + return false; + } + +#ifdef HAS_ASYNC + fuse_io_context::io_uring_task get_range_async(fuse_io_context& io, int64 start, int64 end) + { + if (end - start <= 8) + { + co_return co_await get_range_old_async(io, start, end); + } + + int64 orig_start = start; + + for (; start < end && start % 8 != 0; ++start) + { + if (co_await get_async(io, start)) + { + co_return true; + } + } + + for (; start + 8 < end;) + { + auto [ptr, bsize] = co_await getPtrAsync(io, start); + char* eptr = ptr + bsize; + for (; ptr < eptr && start + 8 < end;) + { + if (*ptr != 0) + { + co_return true; + } + start += 8; + ++ptr; + } + } + + for (; start < end; ++start) + { + if (co_await get_async(io, start)) + { + co_return true; + } + } + + co_return false; + } +#endif + + size_t meminfo() + { + return cache.size()*bitmap_block_size; + } + + size_t get_max_cache_items() + { + return bitmap_cache_items; + } + + void clear_cache_no_flush() + { + while (!cache.empty()) + { + std::pair evicted = cache.evict_one(); + delete[] evicted.second; + } + } + + private: + + char* cache_entry(size_t bitmap_byte) + { + size_t block = bitmap_byte/bitmap_block_size; + + char** res = cache.get(block); + + if(res!=nullptr) + { + return *res; + } + + return fill_cache(block); + } + +#ifdef HAS_ASYNC + fuse_io_context::io_uring_task cache_entry_async(fuse_io_context& io, size_t bitmap_byte) + { + size_t block = bitmap_byte / bitmap_block_size; + + char** res = cache.get(block); + + if (res != NULL) + { + co_return *res; + } + + co_return co_await fill_cache_async(io, block); + } +#endif + + char* fill_cache(size_t block) + { + char* np = new char[bitmap_block_size]; + + backing_file->Seek(block*bitmap_block_size); + + if(backing_file->Read(np, bitmap_block_size)!=bitmap_block_size) + { + throw std::runtime_error("Error reading from backing bitmap file"); + } + + cache.put(block, np); + + evict_from_cache(); + + return np; + } + +#ifdef HAS_ASYNC + struct BlockAwaitersCo + { + BlockAwaitersCo* next; + std::coroutine_handle<> awaiter; + }; + + BlockAwaitersCo* block_awaiters_head = nullptr; + + struct BlockAwaiters + { + BlockAwaiters(SparseFileBitmap& bitmap) + : bitmap(bitmap) {} + BlockAwaiters(BlockAwaiters const&) = delete; + BlockAwaiters(BlockAwaiters&& other) = delete; + BlockAwaiters& operator=(BlockAwaiters&&) = delete; + BlockAwaiters& operator=(BlockAwaiters const&) = delete; + + bool await_ready() const noexcept + { + return false; + } + + void await_suspend(std::coroutine_handle<> p_awaiter) noexcept + { + awaiter.awaiter = p_awaiter; + awaiter.next = bitmap.block_awaiters_head; + bitmap.block_awaiters_head = &awaiter; + } + + void await_resume() const noexcept + { + } + + private: + BlockAwaitersCo awaiter; + SparseFileBitmap& bitmap; + }; + + fuse_io_context::io_uring_task fill_cache_async(fuse_io_context& io, size_t block) + { + if (async_retrieval.find(block) != async_retrieval.end()) + { + while (async_retrieval.find(block) != async_retrieval.end()) + { + co_await BlockAwaiters(*this); + } + + char** op = cache.get(block); + + if (op != nullptr) + co_return *op; + } + + async_retrieval.insert(block); + + char* np = new char[bitmap_block_size]; + + io_uring_sqe* sqe = io.get_sqe(); + + io_uring_prep_read(sqe, get_file_fd(backing_file->getOsHandle()), np, + bitmap_block_size, block * bitmap_block_size); + + int rc = co_await io.complete(sqe); + + if (rc < 0 || rc != bitmap_block_size) + { + Server->Log("Error reading from backing bitmap file rc="+convert(rc), LL_ERROR); + throw std::runtime_error("Error reading from backing bitmap file"); + } + +#ifndef NDEBUG + char** op = cache.get(block); + assert(op == nullptr); +#endif + cache.put(block, np); + + async_retrieval.erase(block); + + bool has_skip_evict = false; + bool can_evict = false; + if (async_evict_skip_block == std::string::npos) + { + async_evict_skip_block = block; + can_evict = true; + } + else if (block_awaiters_head!=nullptr) + { + skip_evict_blocks.insert(block); + has_skip_evict = true; + } + + resume_retrieval_waiters(); + + if (can_evict) + { + co_await evict_from_cache_async(io); + async_evict_skip_block = std::string::npos; + } + + if (has_skip_evict) + { + skip_evict_blocks.erase(block); + } + + co_return np; + } + + void resume_retrieval_waiters() + { + BlockAwaitersCo* block_curr = block_awaiters_head; + block_awaiters_head = nullptr; + + while (block_curr != nullptr) + { + BlockAwaitersCo* next = block_curr->next; + block_curr->awaiter.resume(); + block_curr = next; + } + } +#endif + + void evict_one() + { + std::pair evicted = cache.evict_one(); + + while(backing_file->Write((int64)evicted.first*bitmap_block_size, + reinterpret_cast(evicted.second), + static_cast<_u32>(bitmap_block_size))!=bitmap_block_size) + { + Server->Log("Error writing to backing bitmap file. Retrying...", LL_WARNING); + Server->wait(1000); + } + + delete[] evicted.second; + } + +#ifdef HAS_ASYNC + fuse_io_context::io_uring_task evict_one_async(fuse_io_context& io) + { + std::pair evicted = cache.evict_one(); + + if (evicted.first == async_evict_skip_block + || skip_evict_blocks.find(evicted.first) != skip_evict_blocks.end()) + { + cache.put(evicted.first, evicted.second); + co_return false; + } + + async_retrieval.insert(evicted.first); + + int rc; + do + { + io_uring_sqe* sqe = io.get_sqe(); + + io_uring_prep_write(sqe, get_file_fd(backing_file->getOsHandle()), + evicted.second, bitmap_block_size, evicted.first * bitmap_block_size); + + rc = co_await io.complete(sqe); + + if (rc < 0 || rc != bitmap_block_size) + { + Server->Log("Error writing to backing bitmap file. Retrying...", LL_WARNING); + co_await io.sleep_ms(1000); + } + + } while (rc < 0 || rc != bitmap_block_size); + + delete[] evicted.second; + async_retrieval.erase(evicted.first); + resume_retrieval_waiters(); + + co_return true; + } +#endif + + void evict_from_cache() + { + while(cache.size()>bitmap_cache_items) + { + evict_one(); + } + } + +#ifdef HAS_ASYNC + fuse_io_context::io_uring_task evict_from_cache_async(fuse_io_context& io) + { + while (cache.size() > bitmap_cache_items) + { + if (!co_await evict_one_async(io)) + { + co_return; + } + } + } +#endif + + IFsFile* backing_file; + size_t bitmap_size; + int64 total_size; + common::lrucache cache; + size_t bitmap_cache_items; + std::set async_retrieval; + size_t async_evict_skip_block; + std::set skip_evict_blocks; + }; + + class PreloadThread : public IThread + { + IPipe* pipe; + CloudFile* cf; + public: + PreloadThread(IPipe* pipe, CloudFile* cf) + : pipe(pipe), cf(cf) {} + + void operator()() + { + std::string msg; + char buffer[block_size]; + while (pipe->Read(&msg)) + { + if (msg.empty()) + { + pipe->Write(msg); + break; + } + + CRData data(msg.data(), msg.size()); + int64 pos; + data.getInt64(&pos); + + cf->Read(pos, buffer, block_size); + } + + delete this; + } + }; + + class UpdateMissingChunksThread : public IThread + { + CloudFile* cf; + bool do_stop; + std::unique_ptr mutex; + std::unique_ptr cond; + public: + UpdateMissingChunksThread(CloudFile* cf) + : mutex(Server->createMutex()), + cond(Server->createCondition()), + do_stop(false), cf(cf) {} + void operator()() + { + IScopedLock lock(mutex.get()); + bool no_missing = true; + while (!do_stop) + { + if (no_missing) + { + cond->wait(&lock); + } + + if (do_stop) + break; + + int64 starttime = Server->getTimeMS(); + while (Server->getTimeMS() - starttime < 10000 + && !do_stop) + cond->wait(&lock, 1000); + + if (do_stop) + break; + + no_missing = cf->update_missing_fs_chunks(); + } + } + + void stop() { + IScopedLock lock(mutex.get()); + do_stop = true; + cond->notify_all(); + } + + void notify() { + cond->notify_all(); + } + }; + + class MigrationCoordThread : public IThread + { + bool has_error; + CloudFile* cf; + std::string conf_fn; + bool continue_migration; + public: + MigrationCoordThread(CloudFile* cf, std::string conf_fn, bool continue_migration) + : has_error(false), cf(cf), conf_fn(conf_fn), continue_migration(continue_migration) {} + void operator()() + { + if (!cf->migrate(conf_fn, continue_migration)) + has_error = true; + + delete this; + } + }; + + class ShareWithUpdater : public IThread + { + int64 get_mount_total_space(const std::string& mount) + { + int64 total_size; + if (os_directory_exists(mount) + && (total_size = os_total_space(mount))>0) + { + std::unique_ptr zero_shrink_file(Server->openFile(ExtractFilePath(mount) + os_file_sep() + "zero.shrink.file", MODE_READ)); + if (zero_shrink_file.get() != nullptr) + { + total_size -= zero_shrink_file->Size(); + } + + return total_size; + } + + return -1; + } + + public: + ShareWithUpdater(std::string own_mount, const std::string& mounts, TransactionalKvStore& kv_store) + :own_mount(std::move(own_mount)), kv_store(kv_store), do_stop(false), + stop_cond(Server->createCondition()), stop_mutex(Server->createMutex()) + { + Tokenize(mounts, share_mounts, ";"); + } + + void operator()() + { + IScopedLock lock(stop_mutex.get()); + while (!do_stop) + { + stop_cond->wait(&lock, 10 * 60 * 1000); + + if (do_stop) + break; + + lock.relock(nullptr); + + if (!is_automount_finished()) + { + lock.relock(stop_mutex.get()); + continue; + } + + int64 own_total_space = get_mount_total_space(own_mount); + + if (own_total_space <= 0) + continue; + + + int64 sum_total_space = 0; + for (const std::string& mount : share_mounts) + { + int64 total_space = get_mount_total_space(mount); + + if (total_space <= 0) + continue; + + sum_total_space += total_space; + } + + double prop = static_cast(own_total_space) / (sum_total_space + own_total_space); + + int64 cache_total_space = kv_store.cache_total_space(); + + if (cache_total_space > 0) + { + int64 new_max_cachesize = static_cast(static_cast(cache_total_space)*prop); + Server->Log("Setting max cachesize to " + PrettyPrintBytes(new_max_cachesize) + " cause own file system size " + PrettyPrintBytes(own_total_space) + " other file system size " + PrettyPrintBytes(sum_total_space), LL_INFO); + kv_store.set_max_cachesize(new_max_cachesize); + } + + lock.relock(stop_mutex.get()); + } + } + private: + std::vector share_mounts; + std::string own_mount; + TransactionalKvStore& kv_store; + std::unique_ptr stop_cond; + std::unique_ptr stop_mutex; + bool do_stop; + }; + + bool syncDir(const std::string& fp) + { +#ifndef _WIN32 + int fd = open(fp.c_str(), O_RDONLY | O_CLOEXEC); + if (fd == -1) + return false; + + if (fsync(fd) != 0) + { + close(fd); + return false; + } + + close(fd); + return true; +#else + return false; +#endif + } +} // namespace {} + +std::string getCdInterfacePath(); + +void setMountStatus(const std::string& data) +{ +#ifdef _WIN32 + writestring(data, getCdInterfacePath() + "/mount.status"); +#else + int fd = open((getCdInterfacePath() + "/mount.status").c_str(), O_CREAT|O_WRONLY|O_CLOEXEC, S_IRWXU | S_IRGRP | S_IROTH); + if (fd != -1) + { + flock(fd, LOCK_EX); + ftruncate(fd, 0); + write(fd, data.data(), data.size()); + flock(fd, LOCK_UN); + close(fd); + } +#endif +} + +void setMountStatusErr(const std::string & err) +{ + std::string last_logs = extractLastLogErrors(5, std::string(), true); + std::vector lines; + Tokenize(last_logs, lines, "\n"); + + JSON::Object jerr; + jerr.set("state", "error"); + jerr.set("err", err); + + JSON::Array jlast_logs; + for (std::string& line : lines) + jlast_logs.add(line); + + jerr.set("last_logs", jlast_logs); + + setMountStatus(jerr.stringify(true)); +} + + +CloudFile::CloudFile(const std::string& cache_path, + IBackupFileSystem* cachefs, + int64 p_cloudfile_size, + int64 max_cloudfile_size, + IOnlineKvStore* online_kv_store, + const std::string& encryption_key, + ICompressEncryptFactory* compress_encrypt_factory, bool verify_cache, + float cpu_multiplier, + bool background_compress, + size_t no_compress_mult, + int64 reserved_cache_device_space, + int64 min_metadata_cache_free, + bool with_prev_link, + bool allow_evict, + bool with_submitted_files, + float resubmit_compressed_ratio, + int64 max_memfile_size, + std::string memcache_path, + float memory_usage_factor, + bool only_memfiles, + std::string mount_path, + std::string share_with_mount_paths, + unsigned int background_comp_method, + const std::string& slog_path, + int64 slog_max_size, + bool is_async, + unsigned int cache_comp, unsigned int meta_cache_comp) + : online_kv_store(online_kv_store), + kv_store(cachefs, min_cachesize, min_free_size + reserved_cache_device_space, + critical_free_size + reserved_cache_device_space, throttle_free_size + reserved_cache_device_space, + min_metadata_cache_free, background_compress ? comp_percent : 0, comp_start_limit, + online_kv_store, encryption_key, compress_encrypt_factory, verify_cache, + cpu_multiplier, no_compress_mult, with_prev_link, allow_evict, with_submitted_files, + resubmit_compressed_ratio, max_memfile_size, memcache_path, memory_usage_factor, only_memfiles, + background_comp_method, cache_comp, meta_cache_comp), + cf_pos(0), active_big_block(-1), + last_stat_update_time(0), used_bytes(0), last_flush_check(0), + mutex(Server->createMutex()), + new_big_blocks_bitmap_file(nullptr), + cloudfile_size(p_cloudfile_size), + writeback_count(0), io_alignment(512), + locked_extents_max_alive(0), exit_thread(false), thread_cond(Server->createCondition()), + wait_for_exclusive(0), memory_usage_factor(memory_usage_factor), bitmaps_file_size(0), + flush_enabled(0), cache_path(cache_path), + chunks_mutex(Server->createSharedMutex()), + last_fs_chunks_update(0), updating_fs_chunks(false), + update_missing_chunks_thread_ticket(ILLEGAL_THREADPOOL_TICKET), + migrate_to_cf(nullptr), + migration_ticket(ILLEGAL_THREADPOOL_TICKET), + in_write_retrieval_cond(Server->createCondition()), + read_bytes(0), + written_bytes(0), + slog_path(slog_path), slog_max_size(slog_max_size), + enable_raid_freespace_stats(false), + is_flushing(false), + is_async(is_async), + msg_queue_mutex(Server->createMutex()), + msg_queue_id(0), + msg_queue_cond(Server->createCondition()), + fracture_big_blogs_ticket(ILLEGAL_THREADPOOL_TICKET), + cachefs(cachefs) +{ + setMountStatus("{\"state\": \"get_size\"}"); + + IFile* f_cloudfile_size = kv_store.get("cloudfile_size", TransactionalKvStore::BitmapInfo::Unknown, + TransactionalKvStore::Flag::disable_fd_cache|TransactionalKvStore::Flag::disable_throttling, -1); + + if (f_cloudfile_size == nullptr) + { + throw std::runtime_error("Cannot open cloudfile_size"); + } + + +#ifdef HAS_ASYNC + zero_file.reset(Server->openTemporaryFile()); + if (zero_file.get() == nullptr) + { + throw std::runtime_error("Error opening /dev/zero. " + os_last_error_str()); + } + if(!zero_file->Resize(100*1024*1024)) + { + throw std::runtime_error("Error resizing /dev/zero. " + os_last_error_str()); + } +#endif //HAS_ASYNC + + /*zero_file.reset(Server->openFile("/dev/zero", MODE_RW)); + if (zero_file.get() == nullptr) + { + throw std::runtime_error("Error opening /dev/zero. " + os_last_error_str()); + }*/ + +#ifdef HAS_ASYNC + null_file.reset(Server->openFile("/dev/null", MODE_RW)); + if (null_file.get()==nullptr) + { + throw std::runtime_error("Error opening /dev/null. " + os_last_error_str()); + } + + queue_in_eventfd = eventfd(0, EFD_CLOEXEC); + if (queue_in_eventfd == -1) + { + throw std::runtime_error("Error creating queue in eventfd. " + os_last_error_str()); + } + + update_fs_chunks_eventfd = eventfd(0, EFD_CLOEXEC); + if (update_fs_chunks_eventfd == -1) + { + throw std::runtime_error("Error creating update_fs_chunks_eventfd. " + os_last_error_str()); + } +#endif + + int64 read_cloudfile_size; + bool write_new = false; + if (f_cloudfile_size->Read(0, reinterpret_cast(&read_cloudfile_size), sizeof(read_cloudfile_size)) == sizeof(read_cloudfile_size)) + { + if (cloudfile_size != -1) + { + if (read_cloudfile_size != cloudfile_size) + { + write_new = true; + } + + if (read_cloudfile_size > cloudfile_size) + { + Server->Log("Cloud drive size is bigger than configured size. Configured=" + PrettyPrintBytes(cloudfile_size) + " actual=" + PrettyPrintBytes(read_cloudfile_size) + ". Using actual size.", LL_WARNING); + cloudfile_size = read_cloudfile_size; + } + } + else + { + cloudfile_size = read_cloudfile_size; + } + } + else + { + write_new = true; + } + + if (write_new) + { +#ifdef HAS_LOCAL_BACKEND + if (cloudfile_size == -1) + { + auto frontend = dynamic_cast(online_kv_store); + if (frontend != nullptr) + { + auto local = dynamic_cast(frontend->getBackend()); + if (local != nullptr) + { + int64 free_space; + local->get_space_info(cloudfile_size, free_space); + } + } + } +#endif + + if (max_cloudfile_size > 0) + { + cloudfile_size = (std::min)(cloudfile_size, max_cloudfile_size); + } + + if (f_cloudfile_size->Write(0, reinterpret_cast(&cloudfile_size), sizeof(cloudfile_size)) != sizeof(cloudfile_size)) + { + Server->Log("Error writing new cloudfile size. " + os_last_error_str(), LL_ERROR); + } + } + + kv_store.release("cloudfile_size"); + + setMountStatus("{\"state\": \"open_bitmaps\"}"); + + open_bitmaps(); + + setMountStatus("{\"state\": \"delete_objects\"}"); + + bool submit_need_flush = false; + if (online_kv_store->submit_del(this, kv_store.get_transid(), submit_need_flush) && + submit_need_flush) + { + if (!close_bitmaps()) + throw std::runtime_error("Error closing bitmaps after submit_del"); + + if (!kv_store.checkpoint(true, 0)) + throw std::runtime_error("Error checkpointing after submit_del"); + + open_bitmaps(); + + online_kv_store->submit_del_post_flush(); + } + + update_missing_chunks_thread.reset(new UpdateMissingChunksThread(this)); + update_missing_chunks_thread_ticket = Server->getThreadPool()->execute(update_missing_chunks_thread.get(), "mchunk update"); + + + IFsFile* migration_settings_f = kv_store.get("clouddrive_migration_settings", TransactionalKvStore::BitmapInfo::Unknown, + TransactionalKvStore::Flag::disable_fd_cache | TransactionalKvStore::Flag::disable_throttling, -1); + + if (migration_settings_f != nullptr + && migration_settings_f->Size()>0) + { + setMountStatus("{\"state\": \"continue_migration\"}"); + + Server->Log("Continuing cloud drive migration", LL_WARNING); + + std::unique_ptr tmpf(Server->openTemporaryFile()); + std::string tmpfn = tmpf->getFilename(); + + if (tmpf != nullptr + && copy_file(migration_settings_f, tmpf.get())) + { + tmpf.reset(); + + migration_ticket = Server->getThreadPool()->execute(new MigrationCoordThread(this, tmpfn, true), "migration coord"); + } + kv_store.release("clouddrive_migration_settings"); + } + else if (migration_settings_f != nullptr) + { + kv_store.release("clouddrive_migration_settings"); + kv_store.del("clouddrive_migration_settings"); + } + else + { + kv_store.release("clouddrive_migration_settings"); + } + + if (!share_with_mount_paths.empty()) + { + share_with_updater.reset(new ShareWithUpdater(mount_path, share_with_mount_paths, kv_store)); + share_with_updater_ticket = Server->getThreadPool()->execute(share_with_updater.get(), "share updt"); + } + + setMountStatus("{\"state\": \"ready\"}"); + + if (FileExists("/var/urbackup/no_fallocate_block_file")) + { + fallocate_block_file = false; + } +} + +CloudFile::~CloudFile() +{ + if (update_missing_chunks_thread.get() != nullptr) + update_missing_chunks_thread->stop(); + + kv_store.set_num_second_chances_callback(nullptr); + + { + IScopedLock lock(mutex.get()); + exit_thread = true; + thread_cond->notify_all(); + } + + Server->getThreadPool()->waitFor(fracture_big_blogs_ticket); + Server->getThreadPool()->waitFor(update_missing_chunks_thread_ticket); + + if (migration_ticket != ILLEGAL_THREADPOOL_TICKET) + Server->getThreadPool()->waitFor(migration_ticket); +} + +std::string CloudFile::Read( _u32 tr, bool* has_error) +{ + std::string ret; + ret.resize(tr); + + _u32 read = Read(&ret[0], tr, has_error); + + if(read==0) + { + return std::string(); + } + + ret.resize(read); + return ret; +} + +std::string CloudFile::Read( int64 pos, _u32 tr, bool* has_error) +{ + std::string ret; + ret.resize(tr); + + _u32 read = Read(pos, &ret[0], tr, has_error); + + if(read==0) + { + return std::string(); + } + + ret.resize(read); + return ret; +} + +_u32 CloudFile::Read(char* buffer, _u32 bsize, bool* has_error) +{ + _u32 ret = Read(cf_pos, buffer, bsize, has_error); + cf_pos+=ret; + return ret; +} + +_u32 CloudFile::Read(int64 pos, char* buffer, _u32 bsize, bool* has_error) +{ + return ReadInt(pos, buffer, bsize, nullptr, + TransactionalKvStore::Flag::prioritize_read| + TransactionalKvStore::Flag::read_random, has_error); +} + +#ifdef HAS_ASYNC +fuse_io_context::io_uring_task CloudFile::ReadAsync(fuse_io_context& io, fuse_io_context::FuseIo& fuse_io, + int64 pos, _u32 bsize, unsigned int flags, bool ext_lock) +{ + fuse_in_header* fheader = reinterpret_cast(fuse_io.header_buf); + fuse_out_header* out_header = reinterpret_cast(fuse_io.scratch_buf); + out_header->error = 0; + out_header->len = sizeof(fuse_out_header) + bsize; + out_header->unique = fheader->unique; + + if(pos>cloudfile_size) + { + Server->Log("Read position " + convert(pos) + " bigger than cloud drive file " + convert(cloudfile_size), LL_INFO); + + std::vector read_tasks; + read_tasks.push_back(ReadTask(get_file_fd(zero_file->getOsHandle()), zero_file.get(), std::string(), 0, bsize)); + + int ret = co_await complete_read_tasks(io, fuse_io, read_tasks, 0, + pos, bsize, flags); + + co_return ret; + } + + EXTENSIVE_DEBUG_LOG(Server->Log("Reading " + convert(bsize) + " bytes at pos " + convert(pos), LL_DEBUG);) + + EXTENSIVE_DEBUG_LOG(int64 starttime = Server->getTimeMS();) + + int64 target = pos + bsize; + + if (target>cloudfile_size) + { + target = cloudfile_size; + bsize = static_cast<_u32>(target - pos); + } + + size_t lock_idx; + int64 orig_pos = pos; + if(!ext_lock) + { + lock_idx = co_await lock_extent_async(orig_pos, bsize, false); + } + + std::vector read_tasks; + + size_t flush_mem_n_tasks = 0; + + bool io_error = false; + while(posget_async(io, pos/block_size); + + if(!has_block) + { + int64 tozero = (std::min)((block_size - pos%block_size), target-pos); + if (!read_tasks.empty() && read_tasks.back().file == zero_file.get()) + { + read_tasks.back().size += tozero; + } + else + { + read_tasks.push_back(ReadTask(get_file_fd(zero_file->getOsHandle()), zero_file.get(), std::string(), 0, static_cast(tozero))); + } + Server->Log("Adding " + convert(tozero) + " zero bytes at " + convert(pos), LL_INFO); + EXTENSIVE_DEBUG_LOG(Server->Log("Adding "+convert(tozero)+" zero bytes at "+convert(pos), LL_DEBUG);) + pos+=tozero; + continue; + } + + //TODO: FIXME: Currently it reads the whole block without checking the bitmap further + //(and returning zeroes), i.e. discard_zeroes_data=0 + + bool in_big_block = co_await big_blocks_bitmap->get_async(io, pos/big_block_size); + + int64 curr_block_size; + std::string key; + if(in_big_block) + { + curr_block_size = big_block_size; + key = big_block_key(pos/curr_block_size); + } + else + { + curr_block_size = small_block_size; + key = small_block_key(pos/curr_block_size); + } + + int64 toread = (std::min)((curr_block_size - pos%curr_block_size), target-pos); + + unsigned int curr_flags = flags; + if (co_await is_metadata_async(io, pos, key)) + { + curr_flags |= TransactionalKvStore::Flag::disable_memfiles; + } + + IFsFile* block = co_await kv_store.get_async(io, key, TransactionalKvStore::BitmapInfo::Present, + curr_flags|TransactionalKvStore::Flag::read_only, curr_block_size); + + bool has_read_error; + if(block!=NULL) + { + int64 block_pos = pos % curr_block_size; + read_tasks.push_back(ReadTask(get_file_fd(block->getOsHandle()), block, key, block_pos, static_cast<_u32>(toread))); + + if (block->getFilename()!=key) + { + read_tasks.back().flush_mem = true; + ++flush_mem_n_tasks; + } + } + else + { + addSystemEvent("cache_err", + "Error reading", + "Cannot get block for volume offset " + convert(pos) + "" + " at block positition " + convert(pos % curr_block_size) + " len " + convert(toread) + ". ", LL_ERROR); + io_error = true; + break; + } + + pos+= toread; + read_bytes += toread; + } + + if (io_error) + { + if (!ext_lock) + { + unlock_extent_async(orig_pos, bsize, false, lock_idx); + } + + out_header->error = -EIO; + out_header->len = sizeof(fuse_out_header); + + io_uring_sqe* header_sqe = io.get_sqe(2); + io_uring_prep_write_fixed(header_sqe, fuse_io.pipe[1], + fuse_io.scratch_buf, sizeof(fuse_out_header), + -1, fuse_io.scratch_buf_idx); + header_sqe->flags |= IOSQE_FIXED_FILE | IOSQE_IO_LINK; + + io_uring_sqe* splice_sqe = io.get_sqe(); + io_uring_prep_splice(splice_sqe, fuse_io.pipe[0], + -1, io.fuse_ring.fd, -1, out_header->len, + SPLICE_F_FD_IN_FIXED | SPLICE_F_NONBLOCK); + splice_sqe->flags |= IOSQE_FIXED_FILE; + + auto [rc1, rc2] = co_await io.complete(std::make_pair(header_sqe, splice_sqe)); + + if (rc1 < 0) + { + co_return -1; + } + + if (rc2 < 0 || rc2 < out_header->len) + { + co_return -1; + } + + co_return 0; + } + + if (read_tasks.empty()) + { + Server->Log("No read_tasks in ReadAsync", LL_WARNING); + abort(); + co_return -1; + } + + //Server->Log("flush_mem_n_tasks=" + convert(flush_mem_n_tasks)); + + int ret = co_await complete_read_tasks(io, fuse_io, read_tasks, flush_mem_n_tasks, + orig_pos, bsize, flags); + //Maybe needs loop for short splice + + for (ReadTask& task : read_tasks) + { + if(!task.key.empty()) + co_await kv_store.release_async(io, task.key); + } + + if(!ext_lock) + { + unlock_extent_async(orig_pos, bsize, false, lock_idx); + } + + EXTENSIVE_DEBUG_LOG(Server->Log("Done. " + convert(Server->getTimeMS() - starttime) + "ms", LL_DEBUG);) + + co_return ret; +} +#endif //HAS_ASYNC + +_u32 CloudFile::ReadInt(int64 pos, char* buffer, _u32 bsize, IScopedLock* ext_lock, + unsigned int flags, bool* has_error) +{ + if (is_async) + abort(); + + IScopedLock lock(nullptr); + + if(ext_lock==nullptr) + { + lock.relock(mutex.get()); + } + + if (pos > cloudfile_size) + { + Server->Log("Read position " + convert(pos) + " bigger than cloud drive file " + convert(cloudfile_size), LL_INFO); + memset(buffer, 0, bsize); + return bsize; + } + + EXTENSIVE_DEBUG_LOG(Server->Log("Reading " + convert(bsize) + " bytes at pos " + convert(pos), LL_DEBUG);) + + EXTENSIVE_DEBUG_LOG(int64 starttime = Server->getTimeMS();) + + int64 target = pos + bsize; + + if (target>cloudfile_size) + { + target = cloudfile_size; + bsize = static_cast<_u32>(target - pos); + } + + int64 orig_pos=pos; + if(ext_lock==nullptr) + { + lock_extent(lock, orig_pos, bsize, false); + } + + while(posget(pos/block_size); + + if(!has_block) + { + int64 tozero = (std::min)((block_size - pos%block_size), target-pos); + memset(buffer, 0, static_cast(tozero)); + Server->Log("Adding " + convert(tozero) + " zero bytes at " + convert(pos), LL_INFO); + EXTENSIVE_DEBUG_LOG(Server->Log("Adding "+convert(tozero)+" zero bytes at "+convert(pos), LL_DEBUG);) + buffer+=tozero; + pos+=tozero; + continue; + } + + bool in_big_block = big_blocks_bitmap->get(pos/big_block_size); + + int64 curr_block_size; + std::string key; + if(in_big_block) + { + curr_block_size = big_block_size; + key = big_block_key(pos/curr_block_size); + } + else + { + curr_block_size = small_block_size; + key = small_block_key(pos/curr_block_size); + } + + int64 toread = (std::min)((curr_block_size - pos%curr_block_size), target-pos); + + if (ext_lock == nullptr) + { + lock.relock(nullptr); + } + else + { + ext_lock->relock(nullptr); + } + + unsigned int curr_flags = flags; + if (is_metadata(pos, key)) + { + curr_flags |= TransactionalKvStore::Flag::disable_memfiles; + } + + IFile* block = kv_store.get(key, TransactionalKvStore::BitmapInfo::Present, + curr_flags |TransactionalKvStore::Flag::read_only, curr_block_size); + + _u32 read=0; + + bool has_read_error; + if(block!=nullptr) + { + _u32 last_read; + do + { + int64 block_pos = pos%curr_block_size + read; + /* If Direct-IO is enabled TODO: Fix alignment of buffer + if (block_pos%io_alignment == 0 + && toread%io_alignment == 0) + { + last_read = block->Read(block_pos, buffer+read, static_cast<_u32>(toread)-read, &has_read_error); + } + else + { + last_read = ReadAligned(block, block_pos, buffer+read, static_cast<_u32>(toread)-read, &has_read_error); + }*/ + + has_read_error = false; + last_read = block->Read(block_pos, buffer + read, static_cast<_u32>(toread) - read, &has_read_error); + read += last_read; + + if (read < toread) + { + Server->Log("Short read ("+convert(read)+"/"+convert(toread)+") while reading from file " + block->getFilename() + " (size "+convert(block->Size())+")" + " at positition " + convert(pos%curr_block_size) + " block device pos " + convert(pos) + " len " + convert(toread) + " errno "+convert((int64)errno)+". Continuing...", LL_WARNING); + } + + } while (!has_read_error && read 0); + } + + if (has_read_error) + { + std::string syserr = os_last_error_str(); + Server->Log("Read error while reading from file "+block->getFilename()+"" + " at positition "+convert(pos%curr_block_size)+" block device pos "+convert(pos)+" len "+convert(toread)+". " + + syserr, LL_ERROR); + + addSystemEvent("cache_err", + "Error reading from block file on cache", + "Read error while reading from file " + block->getFilename() + "" + " at positition " + convert(pos%curr_block_size) + " block device pos " + convert(pos) + " len " + convert(toread) + ". " + + syserr, LL_ERROR); + } + + + if(readLog("Adding " + convert(toread - read) + " zero bytes at " + convert(pos+read)+" (2)", LL_INFO); + memset(buffer+read, 0, static_cast(toread-read)); + read=static_cast<_u32>(toread); + } + + if(block!=nullptr) + { + kv_store.release(key); + } + + if(read!=toread) + { + if (has_error) *has_error = true; + Server->Log("Error reading from block file (read only "+convert(read)+" of "+convert(toread)+" bytes)", LL_ERROR); + if (!has_read_error) + { + addSystemEvent("cache_err", + "Error reading from block file on cache", + "Error reading from block file at " + cachefs->getName() + " (read only " + convert(read) + " of " + convert(toread) + " bytes)", LL_ERROR); + } + if (ext_lock==nullptr) + { + lock.relock(mutex.get()); + unlock_extent(lock, orig_pos, bsize, false); + } + else + { + ext_lock->relock(mutex.get()); + } + pos += read; + return static_cast<_u32>(pos-orig_pos); + } + + + buffer+=read; + pos+=read; + read_bytes += read; + + if(ext_lock==nullptr) + { + lock.relock(mutex.get()); + } + else + { + ext_lock->relock(mutex.get()); + } + } + + if(ext_lock==nullptr) + { + unlock_extent(lock, orig_pos, bsize, false); + } + + EXTENSIVE_DEBUG_LOG(Server->Log("Done. " + convert(Server->getTimeMS() - starttime) + "ms", LL_DEBUG);) + + return bsize; +} + +_u32 CloudFile::ReadAligned(IFile* block, int64 pos, char* buffer, _u32 toread, bool* has_read_error) +{ + EXTENSIVE_DEBUG_LOG(Server->Log("Fixing alignment for read at pos "+convert(pos)+" size "+convert(toread), LL_DEBUG);) + + int64 startpos = (pos/io_alignment)*io_alignment; + _u32 alignstartadd = static_cast<_u32>(pos-startpos); + _u32 alignendadd = io_alignment - (toread+alignstartadd)%io_alignment; + _u32 alignread = toread + alignstartadd + alignendadd; + char* buf = reinterpret_cast(aligned_alloc(io_alignment, alignread) ); + _u32 r = block->Read(startpos, buf, alignread, has_read_error); + + if(rtoread) + { + r=toread; + } + + memcpy(buffer, buf+alignstartadd, r); + free(buf); + + return r; +} + +_u32 CloudFile::Write( const std::string &tw, bool* has_error) +{ + return Write(tw.c_str(), static_cast<_u32>(tw.size()), has_error); +} + +_u32 CloudFile::Write(int64 pos, const std::string &tw, bool* has_error) +{ + return Write(pos, tw.c_str(), static_cast<_u32>(tw.size()), has_error); +} + +_u32 CloudFile::Write( const char* buffer, _u32 bsize, bool* has_error) +{ + _u32 ret = WriteInt(cf_pos, buffer, bsize, false, nullptr, true, has_error); + cf_pos += ret; + return ret; +} + +_u32 CloudFile::Write( int64 pos, const char* buffer, _u32 bsize, bool* has_error) +{ + _u32 rc = WriteInt(pos, buffer, bsize, false, nullptr, true, has_error); + return rc; +} + +_u32 CloudFile::WriteNonBlocking( int64 pos, const char* buffer, _u32 bsize, bool* has_error) +{ + EXTENSIVE_DEBUG_LOG(Server->Log("Non-blocking write at pos "+convert(pos)+" size "+convert(bsize));) + _u32 rc = WriteInt(pos, buffer, bsize, false, nullptr, false, has_error); + return rc; +} + +namespace +{ + class PreloadItemsThread : public IThread + { + CloudFile& cf; + IPipe* pipe; + int tag; + bool disable_memfiles; + bool load_only; + public: + PreloadItemsThread(CloudFile& cf, IPipe* pipe, int tag, bool disable_memfiles, bool load_only) + : cf(cf), pipe(pipe), tag(tag), disable_memfiles(disable_memfiles), + load_only(load_only) {} + + void operator()() + { + while (true) + { + std::string data; + pipe->Read(&data); + + if (data.empty()) + { + pipe->Write(data); + break; + } + + CRData rdata(data.data(), data.size()); + + std::string key; + int64 offset, len; + if (rdata.getStr2(&key) + && rdata.getVarInt(&offset) + && rdata.getVarInt(&len)) + { + cf.preload_key(key, offset, len, tag, disable_memfiles, load_only); + } + } + + delete this; + } + }; + + class UpdateFsChunksThread : public IThread + { + std::string path; + int notify_eventfd; + IBackupFileSystem* fs; + public: + struct SOutput + { + std::vector chunks; + std::atomic done; + }; + + UpdateFsChunksThread(IBackupFileSystem* fs, + std::shared_ptr output, + std::string path, int notify_eventfd) : + fs(fs), output(output), path(path), + notify_eventfd(notify_eventfd) {} + + void operator()() { +#ifdef HAS_MOUNT_SERVICE + if (!btrfs_get_chunks_via_service(path, output->chunks)) + { + output->chunks = btrfs_chunks::get_chunks(path); + } +#else + output->chunks = fs->getChunks(); +#endif + output->done.store(true, std::memory_order_release); + if (notify_eventfd != -1) + { + eventfd_write(notify_eventfd, 1); + } + delete this; + } + + private: + std::shared_ptr output; + }; +} + +void CloudFile::preload_items(const std::string & fn, size_t n_threads, int tag, bool disable_memfiles, bool load_only) +{ + if (is_async) + abort(); + + std::unique_ptr f(Server->openFile(fn, MODE_READ)); + + if (f.get() == nullptr) + return; + + Server->Log("Starting preloading fn=" + fn + " size=" + PrettyPrintBytes(f->Size()) + + " items="+convert(f->Size()/(2*sizeof(int64)))+ + " n_threads=" + convert(n_threads) + " tag=" + convert(tag) + + " disable_memfiles=" + convert(disable_memfiles) + " load_only=" + convert(load_only), LL_INFO); + + std::vector tickets; + std::unique_ptr pipe(Server->createMemoryPipe()); + for (size_t i = 0; i < n_threads; ++i) + { + tickets.push_back(Server->getThreadPool()->execute(new PreloadItemsThread(*this, pipe.get(), tag, disable_memfiles, load_only), + "preload items")); + } + + IScopedLock lock(mutex.get()); + + char buf[1024]; + _u32 rc; + while ((rc = f->Read(buf, sizeof(buf))) > 0) + { + for (size_t i = 0; i+1 < rc / sizeof(int64); i+=2) + { + int64 offset_start; + memcpy(&offset_start, &buf[i * sizeof(offset_start)], sizeof(offset_start)); + + int64 offset_end; + memcpy(&offset_end, &buf[(i+1) * sizeof(offset_end)], sizeof(offset_end)); + + if (offset_start > cloudfile_size) + { + continue; + } + + if (offset_end > cloudfile_size) + offset_end = cloudfile_size; + + while (offset_start < offset_end) + { + Server->Log("Preloading offset " + convert(offset_start) + " size " + convert(offset_end - offset_start), LL_INFO); + bool has_block = bitmap->get(offset_start / block_size); + + if (!has_block) + { + int64 tozero = (std::min)((block_size - offset_start%block_size), offset_end - offset_start); + offset_start += tozero; + continue; + } + + bool in_big_block = big_blocks_bitmap->get(offset_start / big_block_size); + + int64 curr_block_size; + std::string key; + if (in_big_block) + { + curr_block_size = big_block_size; + key = big_block_key(offset_start / curr_block_size); + } + else + { + curr_block_size = small_block_size; + key = small_block_key(offset_start / curr_block_size); + } + + int64 toread = (std::min)((curr_block_size - offset_start%curr_block_size), offset_end - offset_start); + + CWData data; + data.addString2(key); + data.addVarInt((offset_start / curr_block_size)*curr_block_size); + data.addVarInt(curr_block_size); + pipe->Write(std::string(data.getDataPtr(), data.getDataSize())); + + while (pipe->getNumElements() > n_threads * 3) + { + lock.relock(nullptr); + Server->wait(100); + lock.relock(mutex.get()); + } + + offset_start += toread; + } + } + } + + lock.relock(nullptr); + + pipe->Write(std::string()); + + Server->getThreadPool()->waitFor(tickets); +} + +#ifdef HAS_ASYNC +fuse_io_context::io_uring_task CloudFile::preload_items_async(fuse_io_context& io, const std::string& fn, size_t n_threads, int tag, bool disable_memfiles, bool load_only) +{ + std::unique_ptr f(Server->openFile(fn, MODE_READ)); + + if (f.get() == nullptr) + co_return 0; + + Server->Log("Starting async preloading fn=" + fn + " size=" + PrettyPrintBytes(f->Size()) + + " items=" + convert(f->Size() / (2 * sizeof(int64))) + + " n_threads=" + convert(n_threads) + " tag=" + convert(tag) + + " disable_memfiles=" + convert(disable_memfiles) + " load_only=" + convert(load_only), LL_INFO); + + auto available_workers = std::make_unique(n_threads); + auto worker_waiters_head = std::make_unique(nullptr); + + char buf[1024]; + _u32 rc; + while ((rc = f->Read(buf, sizeof(buf))) > 0) + { + for (size_t i = 0; i + 1 < rc / sizeof(int64); i += 2) + { + int64 offset_start; + memcpy(&offset_start, &buf[i * sizeof(offset_start)], sizeof(offset_start)); + + int64 offset_end; + memcpy(&offset_end, &buf[(i + 1) * sizeof(offset_end)], sizeof(offset_end)); + + if (offset_start > cloudfile_size) + { + continue; + } + + if (offset_end > cloudfile_size) + offset_end = cloudfile_size; + + while (offset_start < offset_end) + { + size_t lock_idx = co_await lock_extent_async(0, 0, false); + + Server->Log("Preloading offset " + convert(offset_start) + " size " + convert(offset_end - offset_start), LL_INFO); + bool has_block = co_await bitmap->get_async(io, offset_start / block_size); + + if (!has_block) + { + int64 tozero = (std::min)((block_size - offset_start % block_size), offset_end - offset_start); + offset_start += tozero; + unlock_extent_async(0, 0, false, lock_idx); + continue; + } + + bool in_big_block = co_await big_blocks_bitmap->get_async(io, offset_start / big_block_size); + + unlock_extent_async(0, 0, false, lock_idx); + + int64 curr_block_size; + std::string key; + if (in_big_block) + { + curr_block_size = big_block_size; + key = big_block_key(offset_start / curr_block_size); + } + else + { + curr_block_size = small_block_size; + key = small_block_key(offset_start / curr_block_size); + } + + int64 toread = (std::min)((curr_block_size - offset_start % curr_block_size), offset_end - offset_start); + + + preload_items_single_async(io, *available_workers, *worker_waiters_head, key, + (offset_start / curr_block_size) * curr_block_size, curr_block_size, + tag, disable_memfiles, load_only); + + offset_start += toread; + } + } + } + + while (*available_workers != n_threads) + { + co_await io.sleep_ms(100); + } + + co_return 0; +} +#endif //HAS_ASYNC + +bool CloudFile::has_preload_item(int64 offset_start, int64 offset_end) +{ + if (is_async) + abort(); + + IScopedLock lock(mutex.get()); + + if (offset_start > cloudfile_size) + { + return false; + } + + if (offset_end > cloudfile_size) + offset_end = cloudfile_size; + + while (offset_start < offset_end) + { + bool has_block = bitmap->get(offset_start / block_size); + + if (!has_block) + { + int64 tozero = (std::min)((block_size - offset_start%block_size), offset_end - offset_start); + offset_start += tozero; + continue; + } + + bool in_big_block = big_blocks_bitmap->get(offset_start / big_block_size); + + int64 curr_block_size; + std::string key; + if (in_big_block) + { + curr_block_size = big_block_size; + key = big_block_key(offset_start / curr_block_size); + } + else + { + curr_block_size = small_block_size; + key = small_block_key(offset_start / curr_block_size); + } + + int64 toread = (std::min)((curr_block_size - offset_start%curr_block_size), offset_end - offset_start); + + lock.relock(nullptr); + + if (!kv_store.has_item_cached(key)) + { + return false; + } + + lock.relock(mutex.get()); + + offset_start += toread; + } + + return true; +} + +#ifdef HAS_ASYNC +fuse_io_context::io_uring_task CloudFile::has_preload_item_async(fuse_io_context& io, int64 offset_start, int64 offset_end) +{ + if (offset_start > cloudfile_size) + { + co_return false; + } + + if (offset_end > cloudfile_size) + offset_end = cloudfile_size; + + size_t lock_idx = co_await lock_extent_async(0, 0, false); + + while (offset_start < offset_end) + { + bool has_block = co_await bitmap->get_async(io, offset_start / block_size); + + if (!has_block) + { + int64 tozero = (std::min)((block_size - offset_start % block_size), offset_end - offset_start); + offset_start += tozero; + continue; + } + + bool in_big_block = co_await big_blocks_bitmap->get_async(io, offset_start / big_block_size); + + int64 curr_block_size; + std::string key; + if (in_big_block) + { + curr_block_size = big_block_size; + key = big_block_key(offset_start / curr_block_size); + } + else + { + curr_block_size = small_block_size; + key = small_block_key(offset_start / curr_block_size); + } + + int64 toread = (std::min)((curr_block_size - offset_start % curr_block_size), offset_end - offset_start); + + if (!kv_store.has_item_cached(key)) + { + unlock_extent_async(0, 0, false, lock_idx); + co_return false; + } + + offset_start += toread; + } + + unlock_extent_async(0, 0, false, lock_idx); + + co_return true; +} +#endif //HAS_ASYNC + +int64 CloudFile::min_block_size() +{ + return small_block_size; +} + +void CloudFile::set_is_mounted(const std::string & p_mount_path, IBackupFileSystem* fs) +{ + std::vector new_fs_chunks; +#ifdef HAS_MOUNT_SERVICE + if (!btrfs_get_chunks_via_service(p_mount_path, new_fs_chunks)) + { + new_fs_chunks = btrfs_chunks::get_chunks(p_mount_path); + } +#else + topfs = fs; + new_fs_chunks = fs->getChunks(); +#endif + + for (auto& it : new_fs_chunks) + { + it.offset += 8192; + } + + { + IScopedWriteLock lock(chunks_mutex.get()); + fs_chunks = new_fs_chunks; + mount_path = p_mount_path; + } + + + kv_store.set_num_second_chances_callback(this); +} + +void CloudFile::update_fs_chunks(bool update_time, IScopedWriteLock& lock) +{ + lock.relock(chunks_mutex.get()); + +#ifndef HAS_MOUNT_SERVICE + if (topfs == nullptr) + return; +#endif + + if (updating_fs_chunks) + { + return; + } + + std::string local_mount_path; + updating_fs_chunks = true; + local_mount_path = mount_path; + lock.relock(nullptr); + + std::shared_ptr new_fs_chunks = std::make_shared(); + THREADPOOL_TICKET ticket = Server->getThreadPool()->execute( + new UpdateFsChunksThread(topfs, new_fs_chunks, local_mount_path, -1), "updt fs chunks"); + + if (Server->getThreadPool()->waitFor(ticket, 1000)) + { + lock.relock(chunks_mutex.get()); + + fs_chunks = new_fs_chunks->chunks; + for (auto& it : fs_chunks) + { + it.offset += 8192; + } + } + else + { + lock.relock(chunks_mutex.get()); + + if (update_time) + last_fs_chunks_update += 60000; + + update_time = false; + } + + + if(update_time) + last_fs_chunks_update = Server->getTimeMS()-10*60*1000; + + updating_fs_chunks = false; +} + +#ifdef HAS_ASYNC +fuse_io_context::io_uring_task CloudFile::update_fs_chunks_async(fuse_io_context& io, bool update_time) +{ + IScopedWriteLock lock(chunks_mutex.get()); + + if (updating_fs_chunks) + { + co_return; + } + + std::string local_mount_path; + updating_fs_chunks = true; + local_mount_path = mount_path; + lock.relock(nullptr); + + std::shared_ptr new_fs_chunks = std::make_shared(); + Server->getThreadPool()->execute(new UpdateFsChunksThread(new_fs_chunks, local_mount_path, update_fs_chunks_eventfd), "updt fs chunks"); + + int64 starttime = Server->getTimeMS(); + std::vector buf(sizeof(int64)); + + bool thread_done = false; + + io_uring_sqe* sqe = io.get_sqe(2); + io_uring_prep_read(sqe, update_fs_chunks_eventfd, buf.data(), buf.size(), 0); + sqe->flags |= IOSQE_IO_LINK; + + io_uring_sqe* sqe_timeout = io.get_sqe(); + __kernel_timespec waitt = io.msec_timespec(1000); + io_uring_prep_link_timeout(sqe_timeout, &waitt, 0); + + auto [rc1, rc2] = co_await io.complete(std::make_pair(sqe, sqe_timeout)); + + if (new_fs_chunks->done.load(std::memory_order_acquire)) + { + lock.relock(chunks_mutex.get()); + + fs_chunks = new_fs_chunks->chunks; + for (auto& it : fs_chunks) + { + it.offset += 8192; + } + } + else + { + lock.relock(chunks_mutex.get()); + + if (update_time) + last_fs_chunks_update += 60000; + + update_time = false; + } + + + if (update_time) + last_fs_chunks_update = Server->getTimeMS() - 10 * 60 * 1000; + + updating_fs_chunks = false; +} +#endif //HAS_ASYNC + +namespace +{ +#ifdef HAS_ONLINE_BACKEND + class TokenRefreshCallback : public ITokenRefreshCallback + { + std::string conf_fn; + public: + TokenRefreshCallback(const std::string& conf_fn) + :conf_fn(conf_fn) + { + } + + virtual std::string getNewToken() + { + std::unique_ptr settings(Server->createFileSettingsReader( + conf_fn)); + + if (settings.get() != NULL) + { + std::string password; + if (settings->getValue("auth_token", &password)) + { + return password; + } + } + + return std::string(); + } + }; +#endif + + class MigrationThread : public IThread + { + IPipe* pipe; + CloudFile* cf; + std::atomic& has_error; + public: + MigrationThread(IPipe* pipe, CloudFile* cf, std::atomic& has_error) + : pipe(pipe), cf(cf), has_error(has_error) {} + void operator()() + { + std::vector buf; + buf.resize(block_size); + + std::string msg; + while (pipe->Read(&msg) > 0) + { + CRData data(msg.data(), msg.size()); + + int64 offset; + int64 len; + if (data.getVarInt(&offset) + && data.getVarInt(&len)) + { + if (!cf->migrate(buf, offset, len)) + { + has_error = true; + break; + } + } + } + + pipe->Write(std::string()); + delete this; + } + }; +} + +bool CloudFile::migrate(const std::string & conf_fn, bool continue_migration) +{ + std::unique_ptr settings(Server->createMemorySettingsReader(getFile(conf_fn))); + + if (!continue_migration) + { + if (os_directory_exists(cache_path + "/migration")) + { + os_remove_nonempty_dir(cache_path + "/migration"); + } + + if (os_directory_exists(cache_path + "/migration")) + { + Server->Log(cache_path + "/migration still exists", LL_ERROR); + return false; + } + + if (!os_create_dir_recursive(cache_path + "/migration")) + { + Server->Log("Error creating " + cache_path + "/migration . " + os_last_error_str(), LL_ERROR); + return false; + } + } + + return migrate(conf_fn, settings.get()); +} + +std::string CloudFile::migration_info() +{ + IScopedLock lock(mutex.get()); + + if (migrate_to_cf == nullptr) + return "{\"migration\": false}"; + + int done_pc = static_cast((migration_copy_done * 100) / cloudfile_size); + + JSON::Object ret; + + ret.set("done_pc", done_pc); + ret.set("migration", true); + ret.set("num_dirty_items", migrate_to_cf->getNumDirtyItems()); + ret.set("num_memfile_items", migrate_to_cf->getNumMemfileItems()); + ret.set("submitted_bytes", migrate_to_cf->getSubmittedBytes()); + ret.set("used_bytes", migrate_to_cf->getSubmittedBytes()); + ret.set("cache_size", migrate_to_cf->getCacheSize()); + ret.set("comp_bytes", migrate_to_cf->getCompBytes()); + ret.set("congested", migrate_to_cf->Congested()); + ret.set("raid_groups", migrate_to_cf->get_raid_groups()); + ret.set("disk_error_info", migrate_to_cf->disk_error_info()); + ret.set("memfile_bytes", migrate_to_cf->get_memfile_bytes()); + ret.set("submitted_memfile_bytes", migrate_to_cf->get_submitted_memfile_bytes()); + ret.set("conf_fn", migration_conf_fn); + + return ret.stringify(false); +} + +void CloudFile::preload_key(const std::string & key, int64 offset, int64 len, int tag, bool disable_memfiles, bool load_only) +{ + if (is_async) + abort(); + + IScopedLock lock(mutex.get()); + + lock_extent(lock, offset, len, false); + + bool in_big_block = big_blocks_bitmap->get(offset / big_block_size); + + int64 curr_block_size; + std::string curr_key; + bool has_block; + if (in_big_block) + { + curr_block_size = big_block_size; + curr_key = big_block_key(offset / curr_block_size); + has_block = bitmap_has_big_block(offset / curr_block_size); + } + else + { + curr_block_size = small_block_size; + curr_key = small_block_key(offset / curr_block_size); + has_block = bitmap_has_small_block(offset / curr_block_size); + } + + if (key == curr_key + && has_block) + { + lock.relock(nullptr); + + unsigned int flags; + if (disable_memfiles + && load_only) + { + flags = TransactionalKvStore::Flag::read_only | TransactionalKvStore::Flag::prioritize_read | + TransactionalKvStore::Flag::disable_memfiles; + } + else + { + flags = TransactionalKvStore::Flag::read_only | TransactionalKvStore::Flag::prioritize_read | + TransactionalKvStore::Flag::preload_once; + + if(disable_memfiles) + flags |= TransactionalKvStore::Flag::disable_memfiles; + } + + IFile* block = kv_store.get(key, TransactionalKvStore::BitmapInfo::Present, + flags, curr_block_size, tag); + + if (block != nullptr) + { + kv_store.release(key); + } + + lock.relock(mutex.get()); + } + + unlock_extent(lock, offset, len, false); +} + +#ifdef HAS_ASYNC +fuse_io_context::io_uring_task CloudFile::preload_key_async(fuse_io_context& io, const std::string& key, + int64 offset, int64 len, int tag, bool disable_memfiles, bool load_only) +{ + size_t lock_idx = co_await lock_extent_async(offset, len, false); + + bool in_big_block = big_blocks_bitmap->get(offset / big_block_size); + + int64 curr_block_size; + std::string curr_key; + bool has_block; + if (in_big_block) + { + curr_block_size = big_block_size; + curr_key = big_block_key(offset / curr_block_size); + has_block = co_await bitmap_has_big_block_async(io, offset / curr_block_size); + } + else + { + curr_block_size = small_block_size; + curr_key = small_block_key(offset / curr_block_size); + has_block = co_await bitmap_has_small_block_async(io, offset / curr_block_size); + } + + if (key == curr_key + && has_block) + { + unsigned int flags; + if (disable_memfiles + && load_only) + { + flags = TransactionalKvStore::Flag::read_only | TransactionalKvStore::Flag::prioritize_read | + TransactionalKvStore::Flag::disable_memfiles; + } + else + { + flags = TransactionalKvStore::Flag::read_only | TransactionalKvStore::Flag::prioritize_read | + TransactionalKvStore::Flag::preload_once; + + if (disable_memfiles) + flags |= TransactionalKvStore::Flag::disable_memfiles; + } + + IFile* block = co_await kv_store.get_async(io, key, TransactionalKvStore::BitmapInfo::Present, + flags, curr_block_size, tag); + + if (block != nullptr) + { + co_await kv_store.release_async(io, key); + } + } + + unlock_extent_async(offset, len, false, lock_idx); + + co_return 0; +} +#endif //HAS_ASYNC + +int64 CloudFile::get_uploaded_bytes() +{ + return online_kv_store->get_uploaded_bytes(); +} + +int64 CloudFile::get_downloaded_bytes() +{ + return online_kv_store->get_downloaded_bytes(); +} + +int64 CloudFile::get_written_bytes() +{ + if (is_async) + { + return written_bytes; + } + + IScopedLock lock(mutex.get()); + return written_bytes; +} + +int64 CloudFile::get_read_bytes() +{ + if (is_async) + { + read_bytes; + } + + IScopedLock lock(mutex.get()); + return read_bytes; +} + + +std::string CloudFile::get_raid_io_stats() +{ +#ifdef HAS_LOCAL_BACKEND + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != NULL) + { + KvStoreBackendLocal* backend = dynamic_cast(frontend->getBackend()); + if (backend != NULL) + { + return backend->io_stats(); + } + } +#endif + + return std::string(); +} + +namespace +{ +#pragma pack(1) + struct SLogHeader + { + _u32 size; + int64 offset; + _u32 crc; + }; +#pragma pack() + + const std::string slog_magic = "TCDSLOG#1.0"; +} + +bool CloudFile::replay_slog() +{ +#ifdef WITH_SLOG + if (slog_path.empty()) + return true; + + if (!FileExists(slog_path)) + { + slog.reset(Server->openFile(slog_path, MODE_WRITE)); + + if (slog.get() == nullptr) + { + Server->Log("Error creating slog file at \"" + slog_path + "\". " + os_last_error_str(), LL_ERROR); + return false; + } + + if (slog->Write(slog_magic) != slog_magic.size()) + { + Server->Log("Error writing magic to slog file at \"" + slog_path + "\". " + os_last_error_str(), LL_ERROR); + return false; + } + + int64 transid = kv_store.get_basetransid(); + + if (slog->Write(reinterpret_cast(&transid), sizeof(transid)) != sizeof(transid)) + { + Server->Log("Error writing transid to slog at \""+slog_path+"\". " + os_last_error_str(), LL_ERROR); + return false; + } + + slog->Sync(); + + slog_size = slog_magic.size() + sizeof(transid); + slog_last_sync = slog_magic.size() + sizeof(transid); + + return true; + } + + slog.reset(Server->openFile(slog_path, MODE_READ_SEQUENTIAL)); + + if (slog.get() == nullptr) + { + Server->Log("Error opening slog file at \"" + slog_path + "\". " + os_last_error_str(), LL_ERROR); + return false; + } + + std::string magic = slog->Read(static_cast<_u32>(slog_magic.size())); + + if (magic != slog_magic) + { + Server->Log("Slog magic wrong", LL_ERROR); + return false; + } + + slog_size = slog_magic.size(); + + + int64 transid = 0; + + if (slog->Read(reinterpret_cast(&transid), sizeof(transid)) != sizeof(transid)) + { + Server->Log("Error reading transid", LL_ERROR); + return false; + } + + if (kv_store.get_basetransid() != transid) + { + Server->Log("Transid in slog wrong expected " + convert(kv_store.get_basetransid()) + " got " + convert(transid), LL_ERROR); + return false; + } + + slog_size += sizeof(transid); + + std::vector buf; + + Server->Log("Replaying slog size " + PrettyPrintBytes(slog->Size()), LL_INFO); + + do + { + bool has_read_error = false; + SLogHeader slog_header = {}; + if (slog->Read(reinterpret_cast(&slog_header), sizeof(slog_header), &has_read_error) != sizeof(slog_header)) + { + if (has_read_error) + { + Server->Log("Error reading slog header. " + os_last_error_str(), LL_ERROR); + return false; + } + + break; + } + + if (buf.size() < slog_header.size) + { + buf.resize(slog_header.size); + } + + if (slog->Read(buf.data(), slog_header.size, &has_read_error) != slog_header.size) + { + if (has_read_error) + { + Server->Log("Error reading slog data. " + os_last_error_str(), LL_ERROR); + return false; + } + + Server->Log("Error reading enough slog data", LL_ERROR); + return false; + } + + _u32 crc = slog_header.crc; + slog_header.crc = 0; + + cryptopp_crc::CRC32C crc2; + crc2.Update(reinterpret_cast(&slog_header), sizeof(slog_header)); + crc2.Update(buf.data(), slog_header.size); + _u32 r2 = crc2.Final(); + + if (r2 != crc) + { + Server->Log("slog crc32c wrong. " + convert(r2) + "!=" + convert(crc), LL_ERROR); + return false; + } + + if (Write(slog_header.offset, buf.data(), slog_header.size) != slog_header.size) + { + Server->Log("Error writing slog at offset " + convert(slog_header.offset) + " size " + convert(slog_header.size), LL_ERROR); + return false; + } + + slog_size += sizeof(slog_header) + slog_header.size; + } while (true); + + slog.reset(Server->openFile(slog_path, MODE_RW_READNONE)); + + if (slog.get() == nullptr) + { + Server->Log("Error opening slog file at \"" + slog_path + "\" (2). " + os_last_error_str(), LL_ERROR); + return false; + } + + slog_last_sync = slog_size.load(); + +#endif //WITH_SLOG + + return true; +} + +std::string CloudFile::get_mirror_stats() +{ + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != nullptr) + { + return frontend->mirror_stats(); + } + + return std::string(); +} + +std::string CloudFile::get_raid_freespace_stats() +{ +#ifdef HAS_LOCAL_BACKEND + { + IScopedLock lock(mutex.get()); + + if (!enable_raid_freespace_stats) + return std::string(); + } + + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != NULL) + { + KvStoreBackendLocal* backend = dynamic_cast(frontend->getBackend()); + if (backend != NULL) + { + return backend->freespace_stats(); + } + } +#endif + + return std::string(); +} + +bool CloudFile::slog_write(int64 pos, const char * buffer, _u32 bsize, bool& needs_reset) +{ +#ifdef WITH_SLOG + needs_reset = false; + + if (slog.get() == nullptr) + return true; + + SLogHeader slog_header = {}; + slog_header.offset = pos; + slog_header.size = bsize; + + cryptopp_crc::CRC32C crc2; + crc2.Update(reinterpret_cast(&slog_header), sizeof(slog_header)); + crc2.Update(buffer, bsize); + slog_header.crc = crc2.Final(); + + int64 slog_pos = slog_size.fetch_add(sizeof(slog_header) + bsize); + + if (slog->Write(slog_pos, reinterpret_cast(&slog_header), sizeof(slog_header)) != sizeof(slog_header)) + { + Server->Log("Error writing to slog file " + slog_path + " (1). " + os_last_error_str(), LL_ERROR); + return false; + } + + if (slog->Write(slog_pos + sizeof(slog_header), buffer, bsize) != bsize) + { + Server->Log("Error writing to slog file " + slog_path + " (2). " + os_last_error_str(), LL_ERROR); + return false; + } + + int64 slog_pos_end = slog_pos + static_cast(sizeof(slog_header)) + bsize; + + if (slog_pos < slog_max_size + && slog_pos_end >= slog_max_size) + { + needs_reset = true; + } + else if (slog_last_sync + slog_kernel_buffer_size < slog_pos + && slog_pos_end >= slog_last_sync + slog_kernel_buffer_size) + { +#ifndef _WIN32 + sync_file_range(slog->getOsHandle(), 0, slog_pos_end - slog_kernel_buffer_size, + SYNC_FILE_RANGE_WRITE); + posix_fadvise64(slog->getOsHandle(), 0, slog_pos_end - slog_kernel_buffer_size, POSIX_FADV_DONTNEED); +#endif + slog_last_sync = slog_pos_end; + } + +#endif //WITH_SLOG + + return true; +} + +bool CloudFile::migrate(const std::string& conf_fn, ISettingsReader * settings) +{ +#ifdef WITH_MIGRATION + if (!os_directory_exists(cache_path + "/migration")) + { + Server->Log(cache_path + "/migration does not exist", LL_ERROR); + return false; + } + + std::string key; + if (!settings->getValue("key", &key)) + { + Server->Log("Migration setting missing", LL_ERROR); + return false; + } + + std::string aes_key = pbkdf2_sha256(key); + + IScopedLock lock(mutex.get()); + + int64 orig_cloudfile_size = cloudfile_size; + ++wait_for_exclusive; + lock_extent(lock, 0, orig_cloudfile_size, true); + --wait_for_exclusive; + + migration_conf_fn = conf_fn; + + IBackupFileSystem* migrate_cachefs = new PassThroughFileSystem(cache_path + "/migration"); + + IOnlineKvStore* migrate_online_kv_store = create_migration_endpoint(conf_fn, settings, + migrate_cachefs); + + int64 reserved_cache_device_space = 1LL * 1024 * 1024 * 1024; + int64 min_metadata_cache_free = 1LL * 1024 * 1024 * 1024; + + int64 total_ram = get_total_ram_kb(); + + if (total_ram > 0) + { + total_ram *= 1024LL; + } + + int64 cache_size = 2LL * 1024 * 1024 * 1024; + if (total_ram < 4LL * 1024 * 1024 * 1024) + { + cache_size = total_ram / 4; + } + + std::string background_compression_method = settings->getValue("background_compression_method", "zstd_19"); + + migrate_to_cf.reset(new CloudFile(cache_path + "/migration", + migrate_cachefs, cloudfile_size, cloudfile_size, migrate_online_kv_store, + aes_key, get_compress_encrypt_factory(), false, 1, false, 1, + reserved_cache_device_space, min_metadata_cache_free, false, true, true, 0.8f, cache_size, std::string(), 1, true, + std::string(), std::string(), CompressionMethodFromString(background_compression_method), + std::string(), 0, is_async)); + + migration_has_error = false; + migration_copy_max = 0; + migration_copy_done = 0; + + std::string last_transid = settings->getValue("transid"); + if (!last_transid.empty() + && watoi64(last_transid) != migrate_to_cf->kv_store.get_basetransid()) + { + Server->Log("Last transid " + last_transid + " does not equal current base transid " + convert(migrate_to_cf->kv_store.get_basetransid()) + ". Not continuing migration", LL_ERROR); + migration_has_error = true; + } + + size_t num_threads = os_get_num_cpus(); + + migration_copy_done_lag = (num_threads + 2)*big_block_size; + + migration_thread_pool.reset(Server->createThreadPool(num_threads, 2, "idle migration thread")); + + IPipe* migration_pipe = Server->createMemoryPipe(); + + for (size_t i = 0; i < num_threads; ++i) + { + migration_thread_pool->execute(new MigrationThread(migration_pipe, this, migration_has_error), "migration"); + } + + std::string settings_data; + std::vector settings_keys = settings->getKeys(); + + for (std::string& key : settings_keys) + { + if (key != "transid") + { + settings_data += key + "=" + settings->getValue(key) + "\n"; + } + } + + settings_data += "transid=" + convert(migrate_to_cf->kv_store.get_transid()) + "\n"; + + IFsFile* settings_f = kv_store.get("clouddrive_migration_settings", TransactionalKvStore::BitmapInfo::Unknown, + TransactionalKvStore::Flag::disable_fd_cache | TransactionalKvStore::Flag::disable_throttling, -1); + + if (settings_f != nullptr) + { + if (settings_f->Write(0, settings_data) != settings_data.size()) + { + migration_has_error = true; + } + + if (!settings_f->Resize(settings_data.size(), false)) + { + migration_has_error = true; + } + + kv_store.release("clouddrive_migration_settings"); + } + else + { + migration_has_error = true; + } + + unlock_extent(lock, 0, orig_cloudfile_size, true); + + if (migration_has_error) + { + migration_pipe->Write(std::string()); + return false; + } + + int64 pos = settings->getValue("done", 0LL); + + migration_copy_done = pos; + + for (; pos < cloudfile_size; pos += big_block_size) + { + CWData data; + data.addVarInt(pos); + data.addVarInt(big_block_size); + + migration_copy_max = pos + big_block_size; + + migration_pipe->Write(data.getDataPtr(), data.getDataSize()); + + if (migration_has_error) + { + break; + } + + while (migration_pipe->getNumElements() > 100) + { + Server->wait(100); + } + } + + migration_pipe->Write(std::string()); + + return !migration_has_error; +#else //WITH_MIGRATION + return false; +#endif //WITH_MIGRATION +} + +bool CloudFile::update_migration_settings() +{ + IFsFile* settings_f = kv_store.get("clouddrive_migration_settings", TransactionalKvStore::BitmapInfo::Unknown, + TransactionalKvStore::Flag::disable_fd_cache | TransactionalKvStore::Flag::disable_throttling, -1); + + if (settings_f != nullptr) + { + std::string curr_settings = settings_f->Read(static_cast<_u32>(settings_f->Size())); + + std::unique_ptr settings(Server->createMemorySettingsReader(curr_settings)); + + std::string settings_data; + std::vector settings_keys = settings->getKeys(); + + for (std::string& key : settings_keys) + { + if (key != "transid" + && key!="done") + { + settings_data += key + "=" + settings->getValue(key) + "\n"; + } + } + + settings_data += "transid=" + convert(migrate_to_cf->kv_store.get_transid()) + "\n"; + settings_data += "done=" + convert(migration_copy_done) + "\n"; + + if (settings_f->Write(0, settings_data) != settings_data.size()) + { + kv_store.release("clouddrive_migration_settings"); + return false; + } + + if (!settings_f->Resize(settings_data.size(), false)) + { + kv_store.release("clouddrive_migration_settings"); + return false; + } + + kv_store.release("clouddrive_migration_settings"); + } + else + { + return false; + } + + return true; +} + +IOnlineKvStore * CloudFile::create_migration_endpoint(const std::string& conf_fn, ISettingsReader* settings, + IBackupFileSystem* migrate_cachefs) +{ +#ifdef WITH_MIGRATION + std::string endpoint; + std::string key; + if (!settings->getValue("endpoint", &endpoint) + || !settings->getValue("key", &key)) + { + Server->Log("Migration setting missing", LL_ERROR); + return nullptr; + } + + std::string aes_key = pbkdf2_sha256(key); + + if (endpoint == "urbackup") + { + std::string auth_token; + std::string api; + if (!settings->getValue("auth_token", &auth_token) + || !settings->getValue("api", &api)) + { + Server->Log("Migration setting missing (1)", LL_ERROR); + return nullptr; + } + + return new OnlineKvStore(api, auth_token, aes_key, + cache_path + "/migration/generation", new TokenRefreshCallback(conf_fn), + get_compress_encrypt_factory(), + CompressionMethodFromString(settings->getValue("compression_method", "zstd_9")), + migrate_cachefs); + } + else if (endpoint == "s3") + { + std::string access_key; + std::string secret_access_key; + + if (!settings->getValue("access_key", &access_key) || + !settings->getValue("secret_access_key", &secret_access_key)) + { + Server->Log("access_key and secret_access_key not set correctly", LL_ERROR); + return nullptr; + } + + std::string bucket_name; + if (!settings->getValue("bucket_name", &bucket_name)) + { + Server->Log("S3 bucket name not set correctly", LL_ERROR); + return nullptr; + } + std::string s3_endpoint; + settings->getValue("s3_endpoint", &s3_endpoint); + std::string s3_region; + settings->getValue("s3_region", &s3_region); + std::string storage_class; + settings->getValue("storage_class", &storage_class); + +#ifndef _WIN32 + IKvStoreBackend* backend = new KvStoreBackendS3(aes_key, access_key, + secret_access_key, bucket_name, get_compress_encrypt_factory(), s3_endpoint, + s3_region, storage_class, + CompressionMethodFromString(settings->getValue("compression_method", "zstd_9")), + CompressionMethodFromString(settings->getValue("metadata_compression_method", "zstd_9")) + migrate_cachefs); + + return new KvStoreFrontend(cache_path + "/migration/objects.db", + backend, true, std::string(), std::string(), nullptr, std::string(), false, + false, migrate_cachefs); +#else + return nullptr; +#endif + } + else if (endpoint == "azure") + { + std::string account_name; + std::string account_key; + + if (!settings->getValue("account_name", &account_name) || + !settings->getValue("account_key", &account_key)) + { + Server->Log("account_name and account_key not set correctly", LL_ERROR); + return nullptr; + } + + std::string container_name; + if (!settings->getValue("container_name", &container_name)) + { + Server->Log("Azure container name not set correctly", LL_ERROR); + return nullptr; + } + +#ifndef _WIN32 + IKvStoreBackend* backend = new KvStoreBackendAzure(aes_key, account_name, + account_key, container_name, get_compress_encrypt_factory(), + CompressionMethodFromString(settings->getValue("compression_method", "zstd_9")), + CompressionMethodFromString(settings->getValue("metadata_compression_method", "zstd_9")) + migrate_cachefs); + + return new KvStoreFrontend(cache_path + "/migration/objects.db", + backend, true, std::string(), std::string(), nullptr, std::string(), false, + false, migrate_cachefs); +#else + return nullptr; +#endif + } + + Server->Log("Endpoint " + endpoint + " not implemented", LL_ERROR); +#endif //WITH_MIGRATION + return nullptr; +} + +bool CloudFile::is_metadata(int64 offset, const std::string& key) +{ + assert(fuse_io_context::is_sync_thread()); + + if (offset <= 5*1024*1024) + { + return true; + } + + { + IScopedReadLock lock(chunks_mutex.get()); + + if (mount_path.empty()) + return false; + + int64 timems = Server->getTimeMS(); + + if (timems - last_fs_chunks_update>60*60*1000 + && !updating_fs_chunks) + { + lock.relock(nullptr); + { + IScopedWriteLock lock(nullptr); + update_fs_chunks(true, lock); + } + lock.relock(chunks_mutex.get()); + } + + auto it = std::upper_bound(fs_chunks.begin(), fs_chunks.end(), IBackupFileSystem::SChunk(offset, 0, false)); + + if (it != fs_chunks.begin()) + --it; + + if (it != fs_chunks.end() + && it->offset <= offset && it->offset + it->len >= offset) + return it->metadata; + + if (timems - last_fs_chunks_update < 1000) + { + lock.relock(nullptr); + + IScopedWriteLock wlock(chunks_mutex.get()); + if (!key.empty()) + { + missing_chunk_keys.push_back(std::make_pair(key, timems)); + update_missing_chunks_thread->notify(); + } + return false; + } + + while (updating_fs_chunks + && Server->getTimeMS() - timems < 60000) + { + lock.relock(nullptr); + Server->wait(100); + lock.relock(chunks_mutex.get()); + } + + if (updating_fs_chunks) + { + Server->Log("Cannot find chunk for offset " + convert(offset) + " in fs chunk data. Fs chunk data currently updating.", LL_INFO); + return false; + } + } + + IScopedWriteLock lock(nullptr); + + update_fs_chunks(false, lock); + + auto it = std::upper_bound(fs_chunks.begin(), fs_chunks.end(), IBackupFileSystem::SChunk(offset, 0, false)); + + if (it != fs_chunks.begin()) + --it; + + if (it != fs_chunks.end() + && it->offset <= offset && it->offset + it->len >= offset) + return it->metadata; + + last_fs_chunks_update = Server->getTimeMS(); + + if (!key.empty()) + { + assert(lock.getLock() != nullptr); + missing_chunk_keys.push_back(std::make_pair(key, last_fs_chunks_update)); + update_missing_chunks_thread->notify(); + } + Server->Log("Cannot find chunk for offset " + convert(offset) + " in fs chunk data. Fs chunk data size " + convert(fs_chunks.size()), LL_INFO); + return false; +} + +#ifdef HAS_ASYNC +fuse_io_context::io_uring_task CloudFile::is_metadata_async(fuse_io_context& io, int64 offset, const std::string& key) +{ + if (offset <= 5 * 1024 * 1024) + { + co_return true; + } + + { + IScopedReadLock lock(chunks_mutex.get()); + + if (mount_path.empty()) + co_return false; + + int64 timems = Server->getTimeMS(); + + if (timems - last_fs_chunks_update > 60 * 60 * 1000 + && !updating_fs_chunks) + { + lock.relock(NULL); + { + co_await update_fs_chunks_async(io, true); + } + lock.relock(chunks_mutex.get()); + } + + auto it = std::upper_bound(fs_chunks.begin(), fs_chunks.end(), btrfs_chunks::SChunk(offset, 0, false)); + + if (it != fs_chunks.begin()) + --it; + + if (it != fs_chunks.end() + && it->offset <= offset && it->offset + it->len >= offset) + co_return it->metadata; + + if (timems - last_fs_chunks_update < 1000) + { + lock.relock(nullptr); + + IScopedWriteLock wlock(chunks_mutex.get()); + if (!key.empty()) + { + missing_chunk_keys.push_back(std::make_pair(key, timems)); + update_missing_chunks_thread->notify(); + } + co_return false; + } + + while (updating_fs_chunks + && Server->getTimeMS() - timems < 60000) + { + lock.relock(nullptr); + + co_await io.sleep_ms(100); + + lock.relock(chunks_mutex.get()); + } + + if (updating_fs_chunks) + { + Server->Log("Cannot find chunk for offset " + convert(offset) + " in fs chunk data. Fs chunk data currently updating.", LL_INFO); + co_return false; + } + } + + co_await update_fs_chunks_async(io, false); + + IScopedReadLock lock(chunks_mutex.get()); + + auto it = std::upper_bound(fs_chunks.begin(), fs_chunks.end(), btrfs_chunks::SChunk(offset, 0, false)); + + if (it != fs_chunks.begin()) + --it; + + if (it != fs_chunks.end() + && it->offset <= offset && it->offset + it->len >= offset) + co_return it->metadata; + + last_fs_chunks_update = Server->getTimeMS(); + + if (!key.empty()) + { + assert(lock.getLock() != nullptr); + missing_chunk_keys.push_back(std::make_pair(key, last_fs_chunks_update)); + update_missing_chunks_thread->notify(); + } + Server->Log("Cannot find chunk for offset " + convert(offset) + " in fs chunk data. Fs chunk data size " + convert(fs_chunks.size()), LL_INFO); + + co_return false; +} +#endif //HAS_ASYNC + +const unsigned int def_metadata_second_chances = 10; + +unsigned int CloudFile::get_num_second_chances(const std::string & key) +{ + int64 offset = key_offset(key); + + if (offset < 0) + return 0; + + if (is_metadata(offset, key)) + { + return def_metadata_second_chances; + } + + return 0; +} + +bool CloudFile::is_metadata(const std::string & key) +{ + int64 offset = key_offset(key); + + if (offset < 0) + return false; + + return is_metadata(offset, key); +} + +bool CloudFile::update_missing_fs_chunks() +{ + IScopedWriteLock lock(nullptr); + update_fs_chunks(false, lock); + + Server->Log("Retrying update missing chunks. Missing: " + convert(missing_chunk_keys.size())); + + int64 timems = Server->getTimeMS(); + bool first = true; + + std::vector new_metadata_keys; + assert(lock.getLock() != nullptr); + std::vector > new_missing_chunk_keys; + for (std::pair key : missing_chunk_keys) + { + int64 offset = key_offset(key.first); + + if (offset < 0) + continue; + + if (first) + { + Server->Log("First missing chunk offset: "+convert(offset)); + first = false; + } + + auto it = std::upper_bound(fs_chunks.begin(), fs_chunks.end(), IBackupFileSystem::SChunk(offset, 0, false)); + + if (it != fs_chunks.begin()) + --it; + + if (it != fs_chunks.end() + && it->offset <= offset && it->offset + it->len >= offset) + { + Server->Log("Chunk for offset " + convert(offset) + " found now.", LL_INFO); + + if (it->metadata) + { + new_metadata_keys.push_back(key.first); + } + } + else if(timems - key.second<60*1000) + { + new_missing_chunk_keys.push_back(key); + } + else + { + Server->Log("Giving up on finding chunk for offset " + convert(offset) + ".", LL_INFO); + } + } + + missing_chunk_keys = new_missing_chunk_keys; + + lock.relock(nullptr); + + for (std::string& key : new_metadata_keys) + { + kv_store.set_second_chances(key, def_metadata_second_chances); + } + + return new_missing_chunk_keys.empty(); +} + +bool CloudFile::migrate(std::vector& buf, int64 offset, int64 len) +{ + IScopedLock lock(mutex.get()); + + lock_extent(lock, offset, len, true); + + IScopedLock migrate_lock(migrate_to_cf->mutex.get()); + migrate_to_cf->lock_extent(migrate_lock, offset, len, true); + + bool ret = true; + + for (int64 curr_offset = offset; curr_offset < offset + len;) + { + bool has_block = bitmap->get(curr_offset / block_size); + + if (!has_block) + { + int64 tozero = (std::min)((block_size - curr_offset%block_size), offset + len - curr_offset); + curr_offset += tozero; + continue; + } + + int64 toread = (std::min)(static_cast(buf.size()), offset + len - curr_offset); + + if (ReadInt(curr_offset, buf.data(), static_cast<_u32>(toread), &lock, 0, nullptr) != toread) + { + ret = false; + break; + } + + if (migrate_to_cf->WriteInt(curr_offset, buf.data(), static_cast<_u32>(toread), false, &migrate_lock, true, nullptr) != toread) + { + ret = false; + break; + } + + curr_offset += toread; + } + + migrate_to_cf->unlock_extent(migrate_lock, offset, len, true); + + unlock_extent(lock, offset, len, true); + + if (ret + && offset+len- migration_copy_done_lag > migration_copy_done) + { + migration_copy_done = offset + len; + } + + return ret; +} + +#ifdef HAS_ASYNC +fuse_io_context::io_uring_task CloudFile::WriteAsync(fuse_io_context& io, fuse_io_context::FuseIo& fuse_io, + int64 pos, _u32 bsize, bool new_block, bool ext_lock, bool can_block) +{ + if (!fuse_io.write_fuse) + { + Server->Log("Write not from fuse"); + } + + fuse_in_header* fheader = reinterpret_cast(fuse_io.header_buf); + + if (pos > cloudfile_size) + { + Server->Log("Write position " + convert(pos) + " bigger than cloud drive file " + convert(cloudfile_size), LL_ERROR); + co_return 0; + } + + EXTENSIVE_DEBUG_LOG(int64 starttime = Server->getTimeMS();) + + int64 target = pos + bsize; + + if (target > cloudfile_size) + { + target = cloudfile_size; + Server->Log("Write beyond target size. Reducing write size to " + convert(target - pos) + " from " + convert(bsize), LL_WARNING); + bsize = static_cast<_u32>(target - pos); + } + + int64 orig_pos = pos; + size_t lock_idx; + if (!ext_lock) + { + lock_idx = co_await lock_extent_async(orig_pos, bsize, false); + } + + EXTENSIVE_DEBUG_LOG(Server->Log("Writing " + convert(bsize) + " bytes at pos " + convert(pos), LL_DEBUG);) + + size_t set_bits = 0; + + struct WriteTask + { + WriteTask(IFsFile* fd, std::string key, uint64_t off, size_t size) + : fd(fd), key(std::move(key)), off(off), size(size) {} + IFsFile* fd; + std::string key; + int64 off; + _u32 size; + }; + + std::vector write_tasks; + + while (pos < target) + { + int64 big_block_num = pos / big_block_size; + bool has_big_block = co_await big_blocks_bitmap->get_async(io, big_block_num); + + int64 block_diff = big_block_num - active_big_block; + if (has_big_block && active_big_block >= 0 && (block_diff < -1 || block_diff>1) + && old_big_blocks_bitmap->get(big_block_num)) + { + add_fracture_big_block(big_block_num); + } + + int64 curr_block_size; + std::string key; + bool has_block; + if (has_big_block) + { + active_big_block = big_block_num; + curr_block_size = big_block_size; + key = big_block_key(pos / curr_block_size); + has_block = co_await bitmap_has_big_block_async(io, pos / curr_block_size); + + new_big_blocks_bitmap->set(big_block_num, true); + } + else + { + curr_block_size = small_block_size; + key = small_block_key(pos / curr_block_size); + has_block = co_await bitmap_has_small_block_async(io, pos / curr_block_size); + } + + int64 towrite = (std::min)((curr_block_size - pos % curr_block_size), target - pos); + + if (!new_block && !has_block) + { + bool waited = false; + while (in_write_retrieval.find(key) != in_write_retrieval.end()) + { + co_await WriteRetrievalAwaiter(*this); + waited = true; + } + + if (waited) + { + //Update bitmap info (has_block) + continue; + } + + in_write_retrieval.insert(key); + } + + unsigned int flags = TransactionalKvStore::Flag::read_random; + + if (!can_block) + { + flags |= TransactionalKvStore::Flag::disable_throttling; + } + + if (co_await is_metadata_async(io, pos, key)) + { + flags |= TransactionalKvStore::Flag::disable_memfiles; + } + + IFsFile* block = co_await kv_store.get_async(io, key, + new_block ? TransactionalKvStore::BitmapInfo::NotPresent + : (has_block ? TransactionalKvStore::BitmapInfo::Present + : TransactionalKvStore::BitmapInfo::NotPresent), + flags, curr_block_size); + + assert(block != NULL); + + if (fallocate_block_file) + { + int64 file_block_size = block->Size(); + if (file_block_size < pos % curr_block_size) + { + io_uring_sqe* sqe = io.get_sqe(); + + io_uring_prep_fallocate(sqe, get_file_fd(block->getOsHandle()), + 0, 0, curr_block_size); + + int rc = co_await io.complete(sqe); + + if (rc == 0) + block->increaseCachedSize(curr_block_size); + } + } + + int64 block_pos = pos % curr_block_size; + write_tasks.push_back(WriteTask(block, + key, block_pos, towrite)); + + set_bits += co_await bitmap->set_range_async(io, pos / block_size, div_up(pos + towrite, block_size), true); + + if (!new_block && !has_block) + { + in_write_retrieval.erase(key); + resume_write_retrieval_awaiters(); + } + + pos += towrite; + } + + fuse_out_header* out_header = reinterpret_cast(fuse_io.scratch_buf); + out_header->error = 0; + out_header->len = sizeof(fuse_out_header) + sizeof(fuse_write_out); + out_header->unique = fheader->unique; + + fuse_write_out* write_out = reinterpret_cast(fuse_io.scratch_buf + sizeof(fuse_out_header)); + write_out->size = bsize; + write_out->padding = 0; + + std::vector done_write_tasks; + + size_t ret = 0; + size_t sqe_written = 0; + size_t tries = 10; + while (tries > 0) + { + size_t n_peek = write_tasks.size(); + + if (fuse_io.write_fuse) + n_peek += 2; + + std::vector sqes; + sqes.reserve(n_peek); + for (WriteTask& task : write_tasks) + { + io_uring_sqe* write_sqe = io.get_sqe(n_peek); + if (write_sqe == nullptr) + co_return -1; + --n_peek; + + io_uring_prep_splice(write_sqe, fuse_io.pipe[0], + -1, get_file_fd(task.fd->getOsHandle()), task.off, task.size, + SPLICE_F_MOVE | SPLICE_F_FD_IN_FIXED | SPLICE_F_NONBLOCK); + write_sqe->flags |= IOSQE_IO_LINK; + + sqes.push_back(write_sqe); + } + + int expected_last_rc =-1; + + if (fuse_io.write_fuse) + { + io_uring_sqe* sqe_write_response = io.get_sqe(n_peek); + if (sqe_write_response == nullptr) + co_return -1; + --n_peek; + + io_uring_prep_write_fixed(sqe_write_response, fuse_io.pipe[1], + fuse_io.scratch_buf, out_header->len, + 0, fuse_io.scratch_buf_idx); + sqe_write_response->flags |= IOSQE_FIXED_FILE | IOSQE_IO_LINK; + + sqes.push_back(sqe_write_response); + + assert(n_peek == 1); + io_uring_sqe* sqe_splice_response = io.get_sqe(); + if (sqe_splice_response == nullptr) + co_return -1; + + io_uring_prep_splice(sqe_splice_response, fuse_io.pipe[0], + -1, io.fuse_ring.fd, -1, out_header->len, + SPLICE_F_MOVE | SPLICE_F_FD_IN_FIXED | SPLICE_F_NONBLOCK); + sqe_splice_response->flags |= IOSQE_FIXED_FILE; + + sqes.push_back(sqe_splice_response); + + expected_last_rc = out_header->len; + } + else + { + assert(n_peek == 0); + sqes.back()->flags &= ~IOSQE_IO_LINK; + + expected_last_rc = write_tasks.back().size; + } + + std::vector rcs = co_await io.complete(sqes); + + int last_rc = rcs[rcs.size() - 1]; + + if ( (last_rc < 0 && last_rc == -ECANCELED) || + (expected_last_rc>=0 && last_rc!= expected_last_rc) ) + { + std::string rcs_str; + for (int lrc : rcs) + { + if (!rcs_str.empty()) + rcs_str += ", "; + rcs_str += convert(lrc); + } + + //Maybe needs short write splice handling here (last_rc>0 && last_rcLog("Write error (ECANCELED) rcs=" + rcs_str, LL_ERROR); + else if(last_rc!= expected_last_rc) + Server->Log("Write error (last_rc="+convert(last_rc)+" expected="+convert(expected_last_rc)+") rcs="+ rcs_str, LL_ERROR); + + for (size_t i = 0; i < write_tasks.size();) + { + WriteTask& task = write_tasks[i]; + if (rcs[i] >= 0 && rcs[i] == task.size) + { + sqe_written += task.size; + task.fd->increaseCachedSize(task.off + task.size); + done_write_tasks.push_back(task); + write_tasks.erase(write_tasks.begin() + i); + } + else if (rcs[i] > 0) + { + sqe_written += rcs[i]; + task.fd->increaseCachedSize(task.off + rcs[i]); + + Server->Log("Short write writing to block " + bytesToHex(reinterpret_cast(task.key.c_str()), + task.key.size()) + " at " + task.fd->getFilename()+" off "+convert(task.off)+" size "+convert(task.size)+ + ". Rc " + convert(rcs[i]) + ". Retrying...", LL_WARNING); + + co_await io.sleep_ms(1000); + + write_tasks[i].off += rcs[i]; + write_tasks[i].size -= rcs[i]; + ++tries; + ++i; + } + else if (rcs[i] <= 0 && rcs[i] != -ECANCELED) + { + struct stat stbuf; + int rc = fstat(get_file_fd(task.fd->getOsHandle()), &stbuf); + + Server->Log("Error writing to block " + bytesToHex(reinterpret_cast(task.key.c_str()), + task.key.size()) + " at " + task.fd->getFilename() + " off " + convert(task.off) + " size " + convert(task.size) + + ". Rc " + convert(rcs[i]) + " file_size="+convert(rc!=0 ? (int64)-1 : (int64)stbuf.st_size)+" cached_file_size="+convert(task.fd->Size())+ + ". Errno " + convert(rcs[i]) + "." + (tries>0 ? " Retrying..." : ""), + tries>0 ? LL_WARNING:LL_ERROR); + + if (tries == 0) + { + addSystemEvent("cache_err_retry", + "Error writing to block on cache", + "Error writing to block " + bytesToHex(reinterpret_cast(task.key.c_str()), + task.key.size()) + " at " + task.fd->getFilename() + " off " + convert(task.off) + " size " + convert(task.size) + + ". Rc " + convert(rcs[i]) + ". Errno " + convert(rcs[i]), LL_ERROR); + } + else + { + co_await io.sleep_ms(1000); + } + ++i; + } + else + { + ++i; + } + } + --tries; + + if (tries == 0) + { + if (fuse_io.write_fuse) + { + out_header->error = -EIO; + out_header->len = sizeof(fuse_out_header) + sizeof(fuse_write_out); + write_out->size = sqe_written; + + io_uring_sqe* sqe_write_response = io.get_sqe(2); + if (sqe_write_response == nullptr) + co_return -1; + + io_uring_prep_write_fixed(sqe_write_response, fuse_io.pipe[1], + fuse_io.scratch_buf, out_header->len, + 0, fuse_io.scratch_buf_idx); + sqe_write_response->flags |= IOSQE_FIXED_FILE | IOSQE_IO_LINK; + + io_uring_sqe* sqe_splice_response = io.get_sqe(); + if (sqe_splice_response == nullptr) + co_return -1; + + io_uring_prep_splice(sqe_splice_response, fuse_io.pipe[0], + -1, io.fuse_ring.fd, -1, out_header->len, + SPLICE_F_MOVE | SPLICE_F_FD_IN_FIXED | SPLICE_F_NONBLOCK); + sqe_splice_response->flags |= IOSQE_FIXED_FILE; + + auto [rc1, rc2] = co_await io.complete(std::make_pair(sqe_write_response, sqe_splice_response)); + + ret = 0; + } + else + { + ret = -1; + } + break; + } + + continue; + } + else if (last_rc != expected_last_rc) + { + Server->Log("Error sending response " + convert(last_rc) + + " expected " + convert(expected_last_rc), LL_ERROR); + ret = -1; + } + else + { + sqe_written = bsize; + for (WriteTask& task : write_tasks) + { + task.fd->increaseCachedSize(task.off + task.size); + } + } + + break; + } + + for (WriteTask& task : write_tasks) + { + co_await kv_store.release_async(io, task.key); + } + + for (WriteTask& task : done_write_tasks) + { + co_await kv_store.release_async(io, task.key); + } + + written_bytes += bsize; + used_bytes += set_bits * block_size; + + EXTENSIVE_DEBUG_LOG(Server->Log("Done. " + convert(Server->getTimeMS() - starttime) + "ms", LL_DEBUG);) + + if (!ext_lock) + { + unlock_extent_async(orig_pos, bsize, false, lock_idx); + } + + if (ret == 0) + co_return sqe_written; + + co_return ret; +} +#endif //HAS_ASYNC + +_u32 CloudFile::WriteInt( int64 pos, const char* buffer, _u32 bsize, bool new_block, IScopedLock* ext_lock, bool can_block, bool* has_error) +{ + if (is_async) + abort(); + + IScopedLock lock(nullptr); + + if(ext_lock==nullptr) + { + lock.relock(mutex.get()); + } + + if (pos > cloudfile_size) + { + Server->Log("Write position " + convert(pos) + " bigger than cloud drive file " + convert(cloudfile_size), LL_ERROR); + return 0; + } + + EXTENSIVE_DEBUG_LOG(int64 starttime=Server->getTimeMS();) + + int64 target = pos + bsize; + + if (target>cloudfile_size) + { + target = cloudfile_size; + Server->Log("Write beyond target size. Reducing write size to " + convert(target - pos) + " from " + convert(bsize), LL_WARNING); + bsize = static_cast<_u32>(target - pos); + } + + const char* orig_buffer = buffer; + int64 orig_pos = pos; + if(ext_lock==nullptr) + { + lock_extent(lock, orig_pos, bsize, false); + } + + IScopedLock migration_lock(nullptr); + if (migrate_to_cf != nullptr) + { + migration_lock.relock(migrate_to_cf->mutex.get()); + migrate_to_cf->lock_extent(migration_lock, orig_pos, bsize, false); + } + + bool slog_needs_reset = false; + if (ext_lock == nullptr) + { + if (!slog_write(pos, buffer, bsize, slog_needs_reset)) + { + unlock_extent(lock, orig_pos, bsize, false); + return 0; + } + } + + EXTENSIVE_DEBUG_LOG(Server->Log("Writing " + convert(bsize) + " bytes at pos " + convert(pos), LL_DEBUG);) + + size_t set_bits = 0; + + while(posget(big_block_num); + + int64 block_diff = big_block_num - active_big_block; + if(has_big_block && active_big_block>=0 && ( block_diff<-1 || block_diff>1) + && old_big_blocks_bitmap->get(big_block_num) ) + { + add_fracture_big_block(big_block_num); + } + + int64 curr_block_size; + std::string key; + bool has_block; + if(has_big_block) + { + active_big_block = big_block_num; + curr_block_size = big_block_size; + key = big_block_key(pos/curr_block_size); + has_block = bitmap_has_big_block(pos/curr_block_size); + + new_big_blocks_bitmap->set(big_block_num, true); + } + else + { + curr_block_size = small_block_size; + key = small_block_key(pos/curr_block_size); + has_block = bitmap_has_small_block(pos/curr_block_size); + } + + int64 towrite = (std::min)((curr_block_size - pos%curr_block_size), target-pos); + + if (!new_block && !has_block) + { + bool waited = false; + while (in_write_retrieval.find(key) != in_write_retrieval.end()) + { + if (ext_lock == nullptr) + { + in_write_retrieval_cond->wait(&lock); + } + else + { + in_write_retrieval_cond->wait(ext_lock); + } + waited = true; + } + + if (waited) + { + //Update bitmap info (has_block) + continue; + } + + in_write_retrieval.insert(key); + } + + if(ext_lock==nullptr) + { + lock.relock(nullptr); + } + else + { + ext_lock->relock(nullptr); + } + + unsigned int flags = TransactionalKvStore::Flag::read_random; + + if (!can_block) + { + flags |= TransactionalKvStore::Flag::disable_throttling; + } + + if (is_metadata(pos, key)) + { + flags |= TransactionalKvStore::Flag::disable_memfiles; + } + + IFsFile* block = kv_store.get(key, + new_block ? TransactionalKvStore::BitmapInfo::NotPresent + : (has_block ? TransactionalKvStore::BitmapInfo::Present + : TransactionalKvStore::BitmapInfo::NotPresent) , + flags, curr_block_size); + + assert(block!=nullptr); + + int64 file_block_size = block->Size(); + if(file_block_size Resize(curr_block_size); + } + + _u32 written = 0; + size_t tries = 0; + while(written(towrite)) + { + int64 block_pos = pos%curr_block_size+written; + _u32 curr_towrite = static_cast<_u32>(towrite)-written; + + + /* Enable for Direct-IO TODO: buffer needs to be aligned + if(block_pos%io_alignment==0 + && curr_towrite%io_alignment==0) + { + written += block->Write(block_pos, buffer+written, curr_towrite); + } + else + { + written += WriteAligned(block, block_pos, buffer+written, curr_towrite); + }*/ + written += block->Write(block_pos, buffer + written, curr_towrite); + + if(written(towrite)) + { + std::string syserr = os_last_error_str(); + if (tries==4) + { + addSystemEvent("cache_err_retry", + "Error writing to block on cache", + "Error writing to block " + bytesToHex(reinterpret_cast(key.c_str()), + key.size()) + " at "+ block->getFilename()+". " + syserr, LL_ERROR); + } + Server->Log("Error writing to block "+bytesToHex(reinterpret_cast(key.c_str()), + key.size())+" at "+ block->getFilename()+". "+syserr+". Retrying...", LL_WARNING); + Server->wait(1000); + ++tries; + } + } + + kv_store.release(key); + + if(written!=towrite) + { + if (has_error) *has_error = true; + Server->Log("Error writing to block file", LL_ERROR); + addSystemEvent("cache_err", + "Error writing to block on cache", + "Error writing to block " + bytesToHex(reinterpret_cast(key.c_str()), + key.size()) + " at "+cachefs->getName()+".", LL_ERROR); + if (migrate_to_cf != nullptr) + { + migrate_to_cf->unlock_extent(migration_lock, orig_pos, bsize, false); + } + if(ext_lock==nullptr) + { + lock.relock(mutex.get()); + unlock_extent(lock, orig_pos, bsize, false); + } + else + { + ext_lock->relock(mutex.get()); + } + + if (!new_block && !has_block) + { + in_write_retrieval.erase(key); + in_write_retrieval_cond->notify_all(); + } + + return 0; + } + + if(ext_lock==nullptr) + { + lock.relock(mutex.get()); + } + else + { + ext_lock->relock(mutex.get()); + } + + set_bits += bitmap->set_range(pos / block_size, div_up(pos + written, block_size), true); + + if (!new_block && !has_block) + { + in_write_retrieval.erase(key); + in_write_retrieval_cond->notify_all(); + } + + buffer += written; + pos += written; + + written_bytes += written; + } + + used_bytes+=set_bits*block_size; + + EXTENSIVE_DEBUG_LOG(Server->Log("Done. " + convert(Server->getTimeMS()-starttime)+"ms", LL_DEBUG);) + + if (migrate_to_cf != nullptr) + { + if (orig_pos < migration_copy_max) + { + bool local_has_error = false; + if (migrate_to_cf->WriteInt(orig_pos, orig_buffer, bsize, false, &migration_lock, can_block, &local_has_error) != bsize + || local_has_error) + { + migration_has_error = true; + } + } + migrate_to_cf->unlock_extent(migration_lock, orig_pos, bsize, false); + } + + if(ext_lock==nullptr) + { + unlock_extent(lock, orig_pos, bsize, false); + } + + if (slog_needs_reset) + { + if (!Flush(false, true)) + { + abort(); + } + } + + return bsize; +} + +_u32 CloudFile::WriteAligned(IFile* block, int64 pos, const char* buffer, _u32 towrite) +{ + EXTENSIVE_DEBUG_LOG(Server->Log("Fixing alignment for write at pos "+convert(pos)+" size "+convert(towrite), LL_DEBUG);) + + int64 startpos = (pos/io_alignment)*io_alignment; + _u32 alignstartadd = static_cast<_u32>(pos-startpos); + _u32 alignendadd = io_alignment - (towrite+alignstartadd)%io_alignment; + _u32 alignwrite = towrite + alignstartadd + alignendadd; + char *buf = reinterpret_cast(aligned_alloc(io_alignment, alignwrite)); + if(buf==nullptr) + { + return 0; + } + _u32 r = block->Read(startpos, buf, alignwrite); + + if(rWrite(startpos, buf, alignwrite); + + free(buf); + + if(wtowrite) + { + w=towrite; + } + + return w; +} + +bool CloudFile::Seek( _i64 spos ) +{ + if (is_async) + abort(); + + cf_pos = spos; + return true; +} + +_i64 CloudFile::Size( void ) +{ +#ifdef HAS_ASYNC + if (is_async) + { + CWData wdata; + wdata.addChar(queue_cmd_get_size); + + CRData rdata; + get_async_msg(wdata, rdata); + + int64 ret; + bool b = rdata.getVarInt(&ret); + assert(b); + return ret; + } +#endif + + IScopedLock lock(mutex.get()); + + lock_extent(lock, 0, 0, false); + + int64 ret = cloudfile_size; + + unlock_extent(lock, 0, 0, false); + + return ret; +} + +#ifdef HAS_ASYNC +fuse_io_context::io_uring_task<_i64> CloudFile::SizeAsync(void) +{ + size_t lock_idx = co_await lock_extent_async(0, 0, false); + + int64 ret = cloudfile_size; + + unlock_extent_async(0, 0, false, lock_idx); + + co_return ret; +} +#endif + +_i64 CloudFile::RealSize(void) +{ + return getUsedBytes(); +} + +bool CloudFile::PunchHole( _i64 spos, _i64 size ) +{ + if (is_async) + abort(); + + IScopedLock lock(mutex.get()); + + EXTENSIVE_DEBUG_LOG(int64 starttime=Server->getTimeMS();) + + if (spos > cloudfile_size) + { + Server->Log("Punch start position " + convert(spos) + " bigger than cloud drive file " + convert(cloudfile_size), LL_ERROR); + return false; + } + + int64 target = spos + size; + if (target>cloudfile_size) + { + target = cloudfile_size; + size = target - spos; + } + + int64 orig_spos = spos; + lock_extent(lock, orig_spos, size, true); + + EXTENSIVE_DEBUG_LOG(Server->Log("Punching hole " + convert(size) + " bytes at pos " + convert(spos), LL_DEBUG);) + + std::vector > del_blocks; + + int64 lock_start = cloudfile_size; + int64 lock_end = 0; + while(sposget(big_block_num); + + int64 curr_block_size; + std::string key; + bool has_block; + if(has_big_block) + { + curr_block_size = big_block_size; + key = big_block_key(spos/curr_block_size); + has_block = bitmap_has_big_block(spos/curr_block_size); + } + else + { + curr_block_size = small_block_size; + key = small_block_key(spos/curr_block_size); + has_block = bitmap_has_small_block(spos/curr_block_size); + } + + int64 towrite = (std::min)((curr_block_size - spos%curr_block_size), target-spos); + + if(has_block) + { + size_t set_bits = bitmap->set_range(div_up(spos, block_size), (spos+towrite)/block_size, false); + + used_bytes-=set_bits*block_size; + + if(has_big_block) + { + has_block = bitmap_has_big_block(spos/curr_block_size); + } + else + { + has_block = bitmap_has_small_block(spos/curr_block_size); + } + + if(!has_block) + { + lock_start = (std::min)(lock_start, roundDown(spos, curr_block_size)); + lock_end = (std::max)(lock_end, spos%curr_block_size==0 ? spos+curr_block_size : roundUp(spos, curr_block_size)); + del_blocks.push_back(std::make_pair(spos / curr_block_size, has_big_block)); + } + } + + spos+=towrite; + } + + unlock_extent(lock, orig_spos, size, true); + + + if (!del_blocks.empty()) + { + //We need to lock the whole big/small block before deleting it + assert(lock_end > lock_start); + lock_extent(lock, lock_start, lock_end - lock_start, true); + for (auto& it : del_blocks) + { + bool has_big_block = it.second; + + //If small/big block change it got fractured during unlock and has data + if (has_big_block && !big_blocks_bitmap->get(it.first)) + { + Server->Log("Block " + convert(it.first) + " changed to small block while trying to delete it"); + continue; + } + + std::string key; + bool has_block; + int64 big_block_num; + if (has_big_block) + { + key = big_block_key(it.first); + has_block = bitmap_has_big_block(it.first); + big_block_num = it.first; + + assert(lock_start <= it.first*big_block_size); + assert(lock_end >= (it.first + 1)*big_block_size); + } + else + { + key = small_block_key(it.first); + has_block = bitmap_has_small_block(it.first); + big_block_num = (it.first*small_block_size) / big_block_size; + + assert(lock_start <= it.first*small_block_size); + assert(lock_end >= (it.first + 1)*small_block_size); + } + + if (!has_block) + { + { + lock.relock(nullptr); + + kv_store.del(key); + + lock.relock(mutex.get()); + } + if (!has_big_block && + !big_blocks_bitmap->get(big_block_num) ) + { + consider_big_blocks.insert(big_block_num); + } + } + else + { + Server->Log("Block " + convert(it.first) + " present when trying to delete it big_block=" + convert(has_big_block)); + } + } + for (int64 it : consider_big_blocks) + { + if (!bitmap_has_big_block(it)) + { + old_big_blocks_bitmap->set(it, false); + big_blocks_bitmap->set(it, true); + new_big_blocks_bitmap->set(it, false); + } + } + consider_big_blocks.clear(); + unlock_extent(lock, lock_start, lock_end - lock_start, true); + } + + EXTENSIVE_DEBUG_LOG(Server->Log("Done. " + convert(Server->getTimeMS()-starttime)+"ms", LL_DEBUG);) + + return true; +} + +#ifdef HAS_ASYNC +fuse_io_context::io_uring_task CloudFile::PunchHoleAsync(fuse_io_context& io, _i64 spos, _i64 size) +{ + EXTENSIVE_DEBUG_LOG(int64 starttime = Server->getTimeMS();) + + if (spos > cloudfile_size) + { + Server->Log("Punch start position " + convert(spos) + " bigger than cloud drive file " + convert(cloudfile_size), LL_ERROR); + co_return 1; + } + + int64 target = spos + size; + if (target > cloudfile_size) + { + target = cloudfile_size; + size = target - spos; + } + + int64 orig_spos = spos; + size_t lock_idx = co_await lock_extent_async(orig_spos, size, true); + + EXTENSIVE_DEBUG_LOG(Server->Log("Punching hole " + convert(size) + " bytes at pos " + convert(spos), LL_DEBUG);) + + std::vector > del_blocks; + + int64 lock_start = cloudfile_size; + int64 lock_end = 0; + while (spos < target) + { + int64 big_block_num = spos / big_block_size; + bool has_big_block = co_await big_blocks_bitmap->get_async(io, big_block_num); + + int64 curr_block_size; + std::string key; + bool has_block; + if (has_big_block) + { + curr_block_size = big_block_size; + key = big_block_key(spos / curr_block_size); + has_block = co_await bitmap_has_big_block_async(io, spos / curr_block_size); + } + else + { + curr_block_size = small_block_size; + key = small_block_key(spos / curr_block_size); + has_block = co_await bitmap_has_small_block_async(io, spos / curr_block_size); + } + + int64 towrite = (std::min)((curr_block_size - spos % curr_block_size), target - spos); + + if (has_block) + { + size_t set_bits = co_await bitmap->set_range_async(io, div_up(spos, block_size), (spos + towrite) / block_size, false); + + used_bytes -= set_bits * block_size; + + if (has_big_block) + { + has_block = co_await bitmap_has_big_block_async(io, spos / curr_block_size); + } + else + { + has_block = co_await bitmap_has_small_block_async(io, spos / curr_block_size); + } + + if (!has_block) + { + lock_start = (std::min)(lock_start, roundDown(spos, curr_block_size)); + lock_end = (std::max)(lock_end, spos % curr_block_size == 0 ? spos + curr_block_size : roundUp(spos, curr_block_size)); + del_blocks.push_back(std::make_pair(spos / curr_block_size, has_big_block)); + } + } + + spos += towrite; + } + + unlock_extent_async(orig_spos, size, true, lock_idx); + + + if (!del_blocks.empty()) + { + std::set consider_big_blocks; + //We need to lock the whole big/small block before deleting it + assert(lock_end > lock_start); + lock_idx = co_await lock_extent_async(lock_start, lock_end - lock_start, true); + for (auto it : del_blocks) + { + bool has_big_block = it.second; + int64 block = it.first; + + //If small/big block change we didn't lock enough/wrong + if (has_big_block && !(co_await big_blocks_bitmap->get_async(io, block))) + { + Server->Log("Block " + convert(block) + " changed to small block while trying to delete it"); + continue; + } + + std::string key; + bool has_block; + int64 big_block_num; + if (has_big_block) + { + key = big_block_key(block); + has_block = co_await bitmap_has_big_block_async(io, block); + big_block_num = block; + + assert(lock_start <= block * big_block_size); + assert(lock_end >= (block + 1) * big_block_size); + } + else + { + key = small_block_key(block); + has_block = co_await bitmap_has_small_block_async(io, block); + big_block_num = (block * small_block_size) / big_block_size; + + assert(lock_start <= block * small_block_size); + assert(lock_end >= (block + 1) * small_block_size); + } + + if (!has_block) + { + { + co_await kv_store.del_async(io, key); + } + + if (!has_big_block && + !(co_await big_blocks_bitmap->get_async(io, big_block_num)) ) + { + if (curr_consider_big_blocks.find(big_block_num) + == curr_consider_big_blocks.end()) + { + consider_big_blocks.insert(big_block_num); + } + } + } + else + { + Server->Log("Block " + convert(block) + " present when trying to delete it big_block=" + convert(has_big_block)); + } + } + + if (!consider_big_blocks.empty()) + { + curr_consider_big_blocks = std::move(consider_big_blocks); + consider_big_blocks = std::set(); + for (int64 it : curr_consider_big_blocks) + { + if (!(co_await bitmap_has_big_block_async(io, it))) + { + old_big_blocks_bitmap->set(it, false); + co_await big_blocks_bitmap->set_async(io, it, true); + new_big_blocks_bitmap->set(it, false); + } + } + curr_consider_big_blocks.clear(); + } + + unlock_extent_async(lock_start, lock_end - lock_start, true, lock_idx); + } + + EXTENSIVE_DEBUG_LOG(Server->Log("Done. " + convert(Server->getTimeMS() - starttime) + "ms", LL_DEBUG);) + + co_return 0; +} +#endif //HAS_ASYNC + +std::string CloudFile::getFilename( void ) +{ + return "CloudFile"; +} + +std::string CloudFile::block_key(int64 blocknum) +{ + std::string key; + if(blocknum(blocknum); + memcpy(&key[1], &bnum, sizeof(bnum)); + } + else if(blocknum(blocknum); + memcpy(&key[1], &bnum, sizeof(bnum)); + } + else if(blocknum(blocknum); + memcpy(&key[1], &bnum, sizeof(bnum)); + } + else + { + key.resize(sizeof(int64)+1); + memcpy(&key[1], &blocknum, sizeof(blocknum)); + } + return key; +} + +int64 CloudFile::block_key_rev(const std::string & data) +{ + if (data.size() == sizeof(unsigned char) + 1) + { + return data[1]; + } + else if (data.size() == sizeof(unsigned short) + 1) + { + unsigned short ret; + memcpy(&ret, &data[1], sizeof(ret)); + return ret; + } + else if (data.size() == sizeof(unsigned int) + 1) + { + unsigned int ret; + memcpy(&ret, &data[1], sizeof(ret)); + return ret; + } + else if (data.size() == sizeof(int64) + 1) + { + int64 ret; + memcpy(&ret, &data[1], sizeof(ret)); + return ret; + } + else + { + return -1; + } +} + +std::string CloudFile::big_block_key( int64 blocknum ) +{ + std::string key = block_key(blocknum); + key[0]='b'; + return key; +} + +std::string CloudFile::small_block_key( int64 blocknum ) +{ + std::string key = block_key(blocknum); + key[0]='s'; + return key; +} + +std::string CloudFile::hex_big_block_key(int64 bytenum) +{ + return bytesToHex(big_block_key(bytenum/big_block_size)); +} + +std::string CloudFile::hex_small_block_key(int64 bytenum) +{ + return bytesToHex(small_block_key(bytenum/small_block_size)); +} + +int CloudFile::Congested() +{ + int ret = 0; + if (kv_store.is_congested()) + ret |= 1; + + IScopedLock lock(mutex.get()); + if (is_flushing) + ret |= 2; + return ret; +} + +#ifdef HAS_ASYNC +int CloudFile::CongestedAsync() +{ + int ret = 0; + if (kv_store.is_congested_async()) + ret |= 1; + if (is_flushing) + ret |= 2; + return ret; +} +#endif + +std::string CloudFile::get_raid_groups() +{ +#ifdef HAS_LOCAL_BACKEND + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != NULL) + { + KvStoreBackendLocal* backend = dynamic_cast(frontend->getBackend()); + if (backend != NULL) + { + return backend->get_raid_groups(); + } + } +#endif + + return std::string(); +} + +std::string CloudFile::scrub_stats() +{ + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != nullptr) + { + return frontend->scrub_stats(); + } + + return std::string(); +} + +void CloudFile::start_scrub(ScrubAction action) +{ + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != nullptr) + { + frontend->start_scrub(action, std::string()); + } +} + +void CloudFile::stop_scrub() +{ + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != nullptr) + { + frontend->stop_scrub(); + } +} + +bool CloudFile::add_disk(const std::string & path) +{ +#ifdef HAS_LOCAL_BACKEND + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != NULL) + { + KvStoreBackendLocal* backend = dynamic_cast(frontend->getBackend()); + if (backend != NULL) + { + return backend->add_location(path); + } + } +#endif + + return false; +} + +bool CloudFile::remove_disk(const std::string & path, bool completely) +{ +#ifdef HAS_LOCAL_BACKEND + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != NULL) + { + KvStoreBackendLocal* backend = dynamic_cast(frontend->getBackend()); + if (backend != NULL) + { + return backend->remove_location(path, completely); + } + } +#endif + + return false; +} + +std::string CloudFile::disk_error_info() +{ +#ifdef HAS_LOCAL_BACKEND + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != NULL) + { + KvStoreBackendLocal* backend = dynamic_cast(frontend->getBackend()); + if (backend != NULL) + { + return backend->disk_error_info(); + } + } +#endif + + return std::string(); +} + +int64 CloudFile::current_total_size() +{ +#ifdef HAS_LOCAL_BACKEND + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != NULL) + { + KvStoreBackendLocal* backend = dynamic_cast(frontend->getBackend()); + if (backend != NULL) + { + int64 total_space, free_space; + if (backend->get_space_info(total_space, free_space)) + { + return total_space; + } + } + } +#endif + + return -1; +} + +int64 CloudFile::current_free_space() +{ +#ifdef HAS_LOCAL_BACKEND + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != NULL) + { + KvStoreBackendLocal* backend = dynamic_cast(frontend->getBackend()); + if (backend != NULL) + { + int64 total_space, free_space; + if (backend->get_space_info(total_space, free_space)) + { + return free_space; + } + } + } +#endif + + return -1; +} + +bool CloudFile::set_target_failure_probability(double t) +{ +#ifdef HAS_LOCAL_BACKEND + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != NULL) + { + KvStoreBackendLocal* backend = dynamic_cast(frontend->getBackend()); + if (backend != NULL) + { + return backend->set_target_failure_probability(t); + } + } +#endif + + return false; +} + +bool CloudFile::set_target_overhead(double t) +{ +#ifdef HAS_LOCAL_BACKEND + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != NULL) + { + KvStoreBackendLocal* backend = dynamic_cast(frontend->getBackend()); + if (backend != NULL) + { + return backend->set_target_overhead(t); + } + } +#endif + + return false; +} + +std::string CloudFile::get_scrub_position() +{ + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != nullptr) + { + return frontend->scrub_position(); + } + return std::string(); +} + +int64 CloudFile::get_memfile_bytes() +{ + return kv_store.get_memfile_bytes(); +} + +int64 CloudFile::get_submitted_memfile_bytes() +{ + return kv_store.get_submitted_memfile_bytes(); +} + +void CloudFile::operator()() +{ + if (is_async) + abort(); + + std::vector buf; + buf.resize(small_block_size); + + IScopedLock lock(mutex.get()); + while (!exit_thread) + { + int64 wtime = 1000; + int64 ctime = Server->getTimeMS(); + for (auto it = fracture_big_blogs.begin(); it != fracture_big_blogs.end();) + { + if (it->second <= ctime) + { + int64 big_block_num = it->first; + Server->Log("Fracturing big block " + convert(big_block_num) + "...", LL_INFO); + + int64 orig_start = big_block_num*big_block_size; + int64 stop = big_block_num*big_block_size + big_block_size; + + lock_extent(lock, orig_start, stop - orig_start, true); + + bool has_big_block = big_blocks_bitmap->get(big_block_num); + if (has_big_block + && bitmap_has_big_block(big_block_num) + && old_big_blocks_bitmap->get(big_block_num)) + { + bool success = true; + for (int64 start = orig_start; start < stop; start += small_block_size) + { + _u32 toread = static_cast<_u32>((std::min)(small_block_size, cloudfile_size - start)); + + if (!bitmap_has_small_block(start / small_block_size)) + { + continue; + } + + bool has_error; + big_blocks_bitmap->set(big_block_num, true); + if (ReadInt(start, buf.data(), toread, &lock, 0, &has_error) != toread) + { + Server->Log("Error reading while fracturing big block", LL_ERROR); + success = false; + break; + } + + big_blocks_bitmap->set(big_block_num, false); + if (WriteInt(start, buf.data(), toread, true, &lock, true, &has_error) != toread) + { + Server->Log("Error writing while fracturing big block", LL_ERROR); + success = false; + break; + } + + if (toread < small_block_size) + { + break; + } + } + + big_blocks_bitmap->set(big_block_num, !success); + + if (success) + { + lock.relock(nullptr); + + kv_store.del(big_block_key(big_block_num)); + + lock.relock(mutex.get()); + } + + unlock_extent(lock, orig_start, stop - orig_start, true); + } + else + { + unlock_extent(lock, orig_start, stop - orig_start, true); + } + + auto it_curr = it; + ++it; + fracture_big_blogs.erase(it_curr); + + ctime = Server->getTimeMS(); + } + else + { + wtime = (std::min)(wtime, it->second - ctime); + ++it; + } + } + + thread_cond->wait(&lock, static_cast(wtime)); + } +} + +#ifdef HAS_ASYNC +fuse_io_context::io_uring_task_discard CloudFile::run_async(fuse_io_context& io) +{ + run_async_queue(io); + + if (!io.has_fuse_io()) + co_return -1; + + fuse_io_context::FuseIoVal fuse_io = io.get_fuse_io(); + fuse_io->write_fuse = false; + + { + fuse_io_context::ScratchBufInit* scratch_init = reinterpret_cast(fuse_io->scratch_buf); + + Server->Log("Setting pipe fd " + convert(scratch_init->pipe_out) + " size " + PrettyPrintBytes(small_block_size)); + int rc = fcntl(scratch_init->pipe_out, F_SETPIPE_SZ, small_block_size); + if (rc < 0) + { + Server->Log("Error setting pipe size to " + std::to_string(small_block_size) + ". " + os_last_error_str(), LL_ERROR); + co_return -1; + } + } + + while (!exit_thread) + { + int64 wtime = 1000; + int64 ctime = Server->getTimeMS(); + for (auto it = fracture_big_blogs.begin(); it != fracture_big_blogs.end();) + { + if (it->second <= ctime) + { + int64 big_block_num = it->first; + Server->Log("Fracturing big block " + convert(big_block_num) + "...", LL_INFO); + + int64 orig_start = big_block_num * big_block_size; + int64 stop = big_block_num * big_block_size + big_block_size; + + size_t lock_idx = co_await lock_extent_async(orig_start, stop - orig_start, true); + + bool has_big_block = co_await big_blocks_bitmap->get_async(io, big_block_num); + if (has_big_block + && (co_await bitmap_has_big_block_async(io, big_block_num)) + && old_big_blocks_bitmap->get(big_block_num)) + { + bool success = true; + for (int64 start = orig_start; start < stop; start += small_block_size) + { + _u32 toread = static_cast<_u32>((std::min)(small_block_size, cloudfile_size - start)); + + if (!(co_await bitmap_has_small_block_async(io, start / small_block_size))) + { + continue; + } + + co_await big_blocks_bitmap->set_async(io, big_block_num, true); + int rc = co_await ReadAsync(io, fuse_io.get(), start, toread, 0, true); + if (rc<0 || rc != toread) + { + Server->Log("Error reading while fracturing big block. Rc=" + convert(rc) + " Expected=" + convert(toread), LL_ERROR); + co_await empty_pipe(io, fuse_io.get()); + success = false; + break; + } + + co_await big_blocks_bitmap->set_async(io, big_block_num, false); + rc = co_await WriteAsync(io, fuse_io.get(), start, toread, true, true, true); + if (rc<0 || rc != toread) + { + co_await empty_pipe(io, fuse_io.get()); + Server->Log("Error writing while fracturing big block. Rc="+convert(rc)+" Expected="+convert(toread), LL_ERROR); + success = false; + break; + } + + if (toread < small_block_size) + { + break; + } + } + + big_blocks_bitmap->set(big_block_num, !success); + + if (success) + { + co_await kv_store.del_async(io, big_block_key(big_block_num)); + } + + unlock_extent_async(orig_start, stop - orig_start, true, lock_idx); + } + else + { + unlock_extent_async(orig_start, stop - orig_start, true, lock_idx); + } + + auto it_curr = it; + ++it; + fracture_big_blogs.erase(it_curr); + + ctime = Server->getTimeMS(); + } + else + { + wtime = (std::min)(wtime, it->second - ctime); + ++it; + } + } + + co_await io.sleep_ms(wtime); + } + + co_return 0; +} +#endif //HAS_ASYNC + +void CloudFile::run_cd_fracture() +{ + fracture_big_blogs_ticket = Server->getThreadPool()->execute(this, "cd fracture"); +} + +bool CloudFile::start_raid_defrag(const std::string& settings) +{ + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != nullptr) + { + return frontend->start_defrag(settings); + } + + return false; +} + +bool CloudFile::is_background_worker_enabled() +{ + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != nullptr) + { + return frontend->is_background_worker_enabled(); + } + return false; +} + +bool CloudFile::is_background_worker_running() +{ + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != nullptr) + { + return frontend->is_background_worker_running(); + } + return false; +} + +void CloudFile::enable_background_worker(bool b) +{ + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != nullptr) + { + return frontend->enable_background_worker(b); + } } bool CloudFile::start_background_worker() { - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != nullptr) - { - return frontend->start_background_worker(); + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != nullptr) + { + return frontend->start_background_worker(); } return false; } void CloudFile::set_background_worker_result_fn(const std::string& result_fn) { - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != nullptr) - { - return frontend->set_background_worker_result_fn(result_fn); + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != nullptr) + { + return frontend->set_background_worker_result_fn(result_fn); } } bool CloudFile::has_background_task() { - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != nullptr) - { - return frontend->has_background_task(); + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != nullptr) + { + return frontend->has_background_task(); } return false; -} - -void CloudFile::preload(int64 start, int64 stop, size_t n_threads) -{ - if (is_async) - abort(); - - std::unique_ptr preload_pipe(Server->createMemoryPipe()); - std::vector preload_tickets; - for (size_t i = 0; i < n_threads; ++i) - { - preload_tickets.push_back(Server->getThreadPool()->execute(new PreloadThread(preload_pipe.get(), this), "preload")); - } - - { - IScopedLock lock(mutex.get()); - while (start < stop - && startget(start / block_size); - - if (!has_block) - { - start += block_size; - continue; - } - - bool in_big_block = big_blocks_bitmap->get(start / big_block_size); - - int64 curr_block_size; - if (in_big_block) - { - curr_block_size = big_block_size; - } - else - { - curr_block_size = small_block_size; - } - - int64 toread = (std::min)((curr_block_size - start%curr_block_size), stop - start); - - CWData data; - data.addInt64(start); - - preload_pipe->Write(std::string(data.getDataPtr(), data.getDataSize())); - - while (preload_pipe->getNumElements() > 1000) - { - lock.relock(nullptr); - Server->wait(1000); - lock.relock(mutex.get()); - } - - start += toread; - } - } - - preload_pipe->Write(std::string()); - - Server->getThreadPool()->waitFor(preload_tickets); -} - -#ifdef HAS_ASYNC -fuse_io_context::io_uring_task_discard CloudFile::preload_async_read(fuse_io_context& io, - std::vector >& ios, AwaiterCoList*& ios_waiters_head, int64 start, _u32 size) -{ - std::unique_ptr fuse_io = co_await IosAwaiter(ios, ios_waiters_head); - - co_await ReadAsync(io, *fuse_io.get(), start, size, 0, false); - - co_await empty_pipe(io, *fuse_io.get()); - - ios.push_back(std::move(fuse_io)); - - iosWaitersResume(ios, ios_waiters_head); - - co_return 0; -} -#endif //HAS_ASYNC - -#ifdef HAS_ASYNC -fuse_io_context::io_uring_task_discard CloudFile::preload_items_single_async(fuse_io_context& io, - size_t& available_workers, AwaiterCoList*& worker_waiters_head, std::string key, int64 offset, _u32 len, - int tag, bool disable_memfiles, bool load_only) -{ - co_await WorkerAwaiter(available_workers, worker_waiters_head); - - co_await preload_key_async(io, key, offset, len, tag, disable_memfiles, load_only); - - ++available_workers; - - workerWaitersResume(available_workers, worker_waiters_head); - - co_return 0; -} -#endif - -#ifdef HAS_ASYNC -fuse_io_context::io_uring_task CloudFile::complete_read_tasks(fuse_io_context& io, fuse_io_context::FuseIo& fuse_io, - const std::vector& read_tasks, const size_t flush_mem_n_tasks, const int64 orig_pos, - const _u32 bsize, const unsigned int flags) -{ - fuse_out_header* out_header = reinterpret_cast(fuse_io.scratch_buf); - - _u32 n_peek = static_cast<_u32>(read_tasks.size()) + flush_mem_n_tasks; - - if (fuse_io.write_fuse) - n_peek += 2; - - std::vector sqes; - sqes.reserve(n_peek); - - if (fuse_io.write_fuse) - { - assert(co_await verify_pipe_empty(io, fuse_io)); - - io_uring_sqe* header_sqe = io.get_sqe(n_peek); - --n_peek; - sqes.push_back(header_sqe); - - io_uring_prep_write_fixed(header_sqe, fuse_io.pipe[1], - fuse_io.scratch_buf, sizeof(fuse_out_header), - -1, fuse_io.scratch_buf_idx); - header_sqe->flags |= IOSQE_FIXED_FILE | IOSQE_IO_LINK; - } - - for (const ReadTask& task : read_tasks) - { - io_uring_sqe* sqe = io.get_sqe(n_peek); - --n_peek; - sqes.push_back(sqe); - - io_uring_prep_splice(sqe, task.fd, - task.off, fuse_io.pipe[1], -1, task.size, - SPLICE_F_MOVE | SPLICE_F_NONBLOCK); - sqe->flags |= IOSQE_FIXED_FILE | IOSQE_IO_LINK; - - if (flags & TransactionalKvStore::Flag::prioritize_read) - { - sqe->ioprio = io_prio_val; - } - } - - if (fuse_io.write_fuse) - { - io_uring_sqe* splice_sqe = io.get_sqe(n_peek); - --n_peek; - io_uring_prep_splice(splice_sqe, fuse_io.pipe[0], - -1, io.fuse_ring.fd, -1, out_header->len, - SPLICE_F_MOVE | SPLICE_F_FD_IN_FIXED | SPLICE_F_NONBLOCK); - splice_sqe->flags |= IOSQE_FIXED_FILE; - splice_sqe->ioprio = io_prio_val; - sqes.push_back(splice_sqe); - } - else - { - sqes.back()->flags &= ~IOSQE_IO_LINK; - } - - io_uring_sqe* sqe_rm_mem = nullptr; - for (const ReadTask& task : read_tasks) - { - if (task.flush_mem) - { - if (sqe_rm_mem == nullptr) - { - sqes.back()->flags |= IOSQE_IO_LINK; - } - - sqe_rm_mem = io.get_sqe(n_peek); - --n_peek; - - io_uring_prep_fadvise(sqe_rm_mem, - task.fd, task.off, task.size, POSIX_FADV_DONTNEED); - sqe_rm_mem->flags |= IOSQE_IO_LINK; - - io.submit(sqe_rm_mem); - } - } - - if (sqe_rm_mem != nullptr) - { - sqe_rm_mem->flags &= ~IOSQE_IO_LINK; - } - - assert(n_peek==0); - - std::vector rcs = co_await io.complete(sqes); - - int last_rc = rcs[sqes.size() - 1]; - - size_t need_fuse_write = 0; - int ret = bsize; - if ((last_rc == -ECANCELED && - (rcs[0] == sizeof(fuse_out_header) || !fuse_io.write_fuse)) || - (!fuse_io.write_fuse && last_rc >= 0 && last_rc < read_tasks.back().size)) - { - Server->Log("Last rc=" + convert(last_rc) + " Retrying (short) reads... write_fuse=" + convert(fuse_io.write_fuse), LL_WARNING); - need_fuse_write += sizeof(fuse_out_header); - - bool submit = false; - for (size_t i = 0; i < read_tasks.size();) - { - const ReadTask& task = read_tasks[i]; - size_t task_rc_idx = fuse_io.write_fuse ? i + 1 : i; - const int rc = rcs[task_rc_idx]; - - const std::string src_fn = task.file->getFilename() == task.key ? ("mem:" + bytesToHex(task.key)) : task.file->getFilename(); - - if (rc < 0 && rc != -ECANCELED) - { - addSystemEvent("cache_err", - "Error reading from block file on cache", - "Read error while reading from file " + src_fn + "" - " at positition " + convert(task.off) + " block device pos " + convert(orig_pos) + " len " + convert(task.size) + " " - + " write_fuse=" + convert(fuse_io.write_fuse) + ". Errno " - + convert(rc * -1), LL_ERROR); - ret = -1; - break; - } - else if (rc > 0 && rc == task.size) - { - need_fuse_write += task.size; - ++i; - } - else if (rc >= 0 && rc < task.size) - { - struct stat stbuf; - fstat(task.fd, &stbuf); - - Server->Log("Short read " + convert(i) + " (" + convert(rc) + "/" + convert(task.size) + ") while reading from file " + src_fn + - " at positition " + convert(task.off) + " block file size " + convert(task.file->Size()) + " non cached " + convert((int64)stbuf.st_size) + ". Continuing with zeros...", LL_WARNING); - - if (task.file->Size() >= task.off + task.size) - { - Server->Log("Short read offset below (cached) file size (see warning)", LL_ERROR); - abort(); - } - - submit = true; - - need_fuse_write += rc; - - size_t towrite = task.size - rc; - - while (towrite > 0) - { - io_uring_sqe* zero_sqe = io.get_sqe(); - io_uring_prep_splice(zero_sqe, get_file_fd(zero_file->getOsHandle()), - 0, fuse_io.pipe[1], -1, towrite, - SPLICE_F_MOVE | SPLICE_F_NONBLOCK); - zero_sqe->flags |= IOSQE_FIXED_FILE; - - int zero_rc = co_await io.complete(zero_sqe); - - if (zero_rc <= 0) - { - Server->Log("Error reading zeroes. Rc=" + convert(zero_rc), LL_ERROR); - ret = -1; - break; - } - else - { - towrite -= zero_rc; - need_fuse_write += zero_rc; - } - } - - if (ret == -1) - break; - - ++i; - continue; - } - else if (submit) - { - io_uring_sqe* sqe = io.get_sqe(); - - io_uring_prep_splice(sqe, task.fd, - task.off, fuse_io.pipe[1], -1, task.size, - SPLICE_F_MOVE | SPLICE_F_NONBLOCK); - sqe->flags |= IOSQE_FIXED_FILE; - - int submit_rc = co_await io.complete(sqe); - - if (submit_rc < 0) - { - Server->Log("Error submitting to pipe after short read rc=" + convert(submit_rc), LL_ERROR); - ret = -1; - break; - } - - need_fuse_write += submit_rc; - - if (submit_rc < task.size) - { - Server->Log("Short read " + convert(i) + " while recovering from previous short read (" + convert(submit_rc) + "/" + convert(task.size) + ") while reading from file " + src_fn + - " at positition " + convert(task.off) + " block file size " + convert(task.file->Size()) + ". Looping...", LL_WARNING); - rcs[task_rc_idx] = submit_rc; - continue; - } - - ++i; - } - else - { - assert(false); - ++i; - } - } - } - else if (last_rc < 0) - { - Server->Log("Read error errno=" + convert(last_rc * -1) + " write_fuse=" + convert(fuse_io.write_fuse), LL_ERROR); - ret = -1; - } - else if (fuse_io.write_fuse && - last_rc < out_header->len) - { - Server->Log("Last_rc smaller than size in header (" + convert(out_header->len) + "). Last_rc=" + convert(last_rc) + " write_fuse=" + convert(fuse_io.write_fuse), LL_ERROR); - ret = -1; - } - - if (need_fuse_write > 0 && - fuse_io.write_fuse) - { - io_uring_sqe* splice_sqe = io.get_sqe(); - io_uring_prep_splice(splice_sqe, fuse_io.pipe[0], - -1, io.fuse_ring.fd, -1, need_fuse_write, - SPLICE_F_FD_IN_FIXED | SPLICE_F_NONBLOCK); - splice_sqe->flags |= IOSQE_FIXED_FILE; - - int rc = co_await io.complete(splice_sqe); - - if (rc < 0 || rc != need_fuse_write) - { - Server->Log("Fuse write failed in ReadAsync error recovery. rc=" + convert(rc), LL_ERROR); - ret = -1; - } - } - - co_return ret; -} -#endif //HAS_ASYNC - -#ifdef HAS_ASYNC -fuse_io_context::io_uring_task_discard CloudFile::run_async_queue(fuse_io_context& io) -{ - IScopedLock lock(msg_queue_mutex.get()); - std::vector buf(sizeof(int64)); - - while (true) - { - if (msg_queue.empty()) - { - lock.relock(nullptr); - - io_uring_sqe* sqe = io.get_sqe(); - io_uring_prep_read(sqe, queue_in_eventfd, buf.data(), buf.size(), 0); - - int rc = co_await io.complete(sqe); - - if (rc != buf.size()) - { - Server->Log("Error reading from queue_in_eventfd. Rc=" + convert(rc), LL_ERROR); - } - - lock.relock(msg_queue_mutex.get()); - } - - if (msg_queue.empty()) - continue; - - std::vector msg = msg_queue.front(); - msg_queue.pop(); - - lock.relock(nullptr); - - CRData rdata(msg.data(), msg.size()); - - int64 id; - bool b = rdata.getVarInt(&id); - assert(b); - char cmd; - b = rdata.getChar(&cmd); - assert(b); - - CWData resp; - - if (cmd == queue_cmd_resize) - { - int64 new_size;; - b = rdata.getVarInt(&new_size); - assert(b); - - b = co_await ResizeAsync(io, new_size); - resp.addChar(b ? 1 : 0); - } - else if (cmd == queue_cmd_get_size) - { - int64 size = co_await SizeAsync(); - resp.addVarInt(size); - } - else if (cmd == queue_cmd_meminfo) - { - size_t lock_idx = co_await lock_extent_async(0, 0, false); - - resp.addVarInt(async_locked_extents.capacity()); - resp.addVarInt(bitmap->meminfo()); - resp.addVarInt(bitmap->get_max_cache_items()); - resp.addVarInt(big_blocks_bitmap->meminfo()); - resp.addVarInt(big_blocks_bitmap->get_max_cache_items()); - resp.addVarInt(old_big_blocks_bitmap->meminfo()); - resp.addVarInt(new_big_blocks_bitmap->meminfo()); - resp.addVarInt(fracture_big_blogs.size()); - resp.addVarInt(in_write_retrieval.size()); - - unlock_extent_async(0, 0, false, lock_idx); - } - else - { - assert(false); - } - - lock.relock(msg_queue_mutex.get()); - - msg_queue_responses[id].assign(resp.getDataPtr(), resp.getDataPtr()+resp.getDataSize()); - - msg_queue_cond->notify_all(); - } - - co_return 0; -} -#endif //HAS_ASYNC - -#ifdef HAS_ASYNC -bool CloudFile::get_async_msg(CWData& msg, CRData& resp) -{ - CWData int_msg; - - IScopedLock lock(msg_queue_mutex.get()); - - int64 id = msg_queue_id++; - - int_msg.addVarInt(id); - int_msg.addBuffer(msg.getDataPtr(), msg.getDataSize()); - - std::vector data; - data.assign(int_msg.getDataPtr(), int_msg.getDataPtr()+int_msg.getDataSize()); - - msg_queue.push(data); - - eventfd_write(queue_in_eventfd, 1); - - std::map >::iterator it; - while ((it=msg_queue_responses.find(id)) == msg_queue_responses.end()) - { - msg_queue_cond->wait(&lock); - } - - resp.set(it->second.data(), it->second.size(), true); - - msg_queue_responses.erase(it); - - return true; +} + +void CloudFile::preload(int64 start, int64 stop, size_t n_threads) +{ + if (is_async) + abort(); + + std::unique_ptr preload_pipe(Server->createMemoryPipe()); + std::vector preload_tickets; + for (size_t i = 0; i < n_threads; ++i) + { + preload_tickets.push_back(Server->getThreadPool()->execute(new PreloadThread(preload_pipe.get(), this), "preload")); + } + + { + IScopedLock lock(mutex.get()); + while (start < stop + && startget(start / block_size); + + if (!has_block) + { + start += block_size; + continue; + } + + bool in_big_block = big_blocks_bitmap->get(start / big_block_size); + + int64 curr_block_size; + if (in_big_block) + { + curr_block_size = big_block_size; + } + else + { + curr_block_size = small_block_size; + } + + int64 toread = (std::min)((curr_block_size - start%curr_block_size), stop - start); + + CWData data; + data.addInt64(start); + + preload_pipe->Write(std::string(data.getDataPtr(), data.getDataSize())); + + while (preload_pipe->getNumElements() > 1000) + { + lock.relock(nullptr); + Server->wait(1000); + lock.relock(mutex.get()); + } + + start += toread; + } + } + + preload_pipe->Write(std::string()); + + Server->getThreadPool()->waitFor(preload_tickets); +} + +#ifdef HAS_ASYNC +fuse_io_context::io_uring_task_discard CloudFile::preload_async_read(fuse_io_context& io, + std::vector >& ios, AwaiterCoList*& ios_waiters_head, int64 start, _u32 size) +{ + std::unique_ptr fuse_io = co_await IosAwaiter(ios, ios_waiters_head); + + co_await ReadAsync(io, *fuse_io.get(), start, size, 0, false); + + co_await empty_pipe(io, *fuse_io.get()); + + ios.push_back(std::move(fuse_io)); + + iosWaitersResume(ios, ios_waiters_head); + + co_return 0; +} +#endif //HAS_ASYNC + +#ifdef HAS_ASYNC +fuse_io_context::io_uring_task_discard CloudFile::preload_items_single_async(fuse_io_context& io, + size_t& available_workers, AwaiterCoList*& worker_waiters_head, std::string key, int64 offset, _u32 len, + int tag, bool disable_memfiles, bool load_only) +{ + co_await WorkerAwaiter(available_workers, worker_waiters_head); + + co_await preload_key_async(io, key, offset, len, tag, disable_memfiles, load_only); + + ++available_workers; + + workerWaitersResume(available_workers, worker_waiters_head); + + co_return 0; +} +#endif + +#ifdef HAS_ASYNC +fuse_io_context::io_uring_task CloudFile::complete_read_tasks(fuse_io_context& io, fuse_io_context::FuseIo& fuse_io, + const std::vector& read_tasks, const size_t flush_mem_n_tasks, const int64 orig_pos, + const _u32 bsize, const unsigned int flags) +{ + fuse_out_header* out_header = reinterpret_cast(fuse_io.scratch_buf); + + _u32 n_peek = static_cast<_u32>(read_tasks.size()) + flush_mem_n_tasks; + + if (fuse_io.write_fuse) + n_peek += 2; + + std::vector sqes; + sqes.reserve(n_peek); + + if (fuse_io.write_fuse) + { + assert(co_await verify_pipe_empty(io, fuse_io)); + + io_uring_sqe* header_sqe = io.get_sqe(n_peek); + --n_peek; + sqes.push_back(header_sqe); + + io_uring_prep_write_fixed(header_sqe, fuse_io.pipe[1], + fuse_io.scratch_buf, sizeof(fuse_out_header), + -1, fuse_io.scratch_buf_idx); + header_sqe->flags |= IOSQE_FIXED_FILE | IOSQE_IO_LINK; + } + + for (const ReadTask& task : read_tasks) + { + io_uring_sqe* sqe = io.get_sqe(n_peek); + --n_peek; + sqes.push_back(sqe); + + io_uring_prep_splice(sqe, task.fd, + task.off, fuse_io.pipe[1], -1, task.size, + SPLICE_F_MOVE | SPLICE_F_NONBLOCK); + sqe->flags |= IOSQE_FIXED_FILE | IOSQE_IO_LINK; + + if (flags & TransactionalKvStore::Flag::prioritize_read) + { + sqe->ioprio = io_prio_val; + } + } + + if (fuse_io.write_fuse) + { + io_uring_sqe* splice_sqe = io.get_sqe(n_peek); + --n_peek; + io_uring_prep_splice(splice_sqe, fuse_io.pipe[0], + -1, io.fuse_ring.fd, -1, out_header->len, + SPLICE_F_MOVE | SPLICE_F_FD_IN_FIXED | SPLICE_F_NONBLOCK); + splice_sqe->flags |= IOSQE_FIXED_FILE; + splice_sqe->ioprio = io_prio_val; + sqes.push_back(splice_sqe); + } + else + { + sqes.back()->flags &= ~IOSQE_IO_LINK; + } + + io_uring_sqe* sqe_rm_mem = nullptr; + for (const ReadTask& task : read_tasks) + { + if (task.flush_mem) + { + if (sqe_rm_mem == nullptr) + { + sqes.back()->flags |= IOSQE_IO_LINK; + } + + sqe_rm_mem = io.get_sqe(n_peek); + --n_peek; + + io_uring_prep_fadvise(sqe_rm_mem, + task.fd, task.off, task.size, POSIX_FADV_DONTNEED); + sqe_rm_mem->flags |= IOSQE_IO_LINK; + + io.submit(sqe_rm_mem); + } + } + + if (sqe_rm_mem != nullptr) + { + sqe_rm_mem->flags &= ~IOSQE_IO_LINK; + } + + assert(n_peek==0); + + std::vector rcs = co_await io.complete(sqes); + + int last_rc = rcs[sqes.size() - 1]; + + size_t need_fuse_write = 0; + int ret = bsize; + if ((last_rc == -ECANCELED && + (rcs[0] == sizeof(fuse_out_header) || !fuse_io.write_fuse)) || + (!fuse_io.write_fuse && last_rc >= 0 && last_rc < read_tasks.back().size)) + { + Server->Log("Last rc=" + convert(last_rc) + " Retrying (short) reads... write_fuse=" + convert(fuse_io.write_fuse), LL_WARNING); + need_fuse_write += sizeof(fuse_out_header); + + bool submit = false; + for (size_t i = 0; i < read_tasks.size();) + { + const ReadTask& task = read_tasks[i]; + size_t task_rc_idx = fuse_io.write_fuse ? i + 1 : i; + const int rc = rcs[task_rc_idx]; + + const std::string src_fn = task.file->getFilename() == task.key ? ("mem:" + bytesToHex(task.key)) : task.file->getFilename(); + + if (rc < 0 && rc != -ECANCELED) + { + addSystemEvent("cache_err", + "Error reading from block file on cache", + "Read error while reading from file " + src_fn + "" + " at positition " + convert(task.off) + " block device pos " + convert(orig_pos) + " len " + convert(task.size) + " " + + " write_fuse=" + convert(fuse_io.write_fuse) + ". Errno " + + convert(rc * -1), LL_ERROR); + ret = -1; + break; + } + else if (rc > 0 && rc == task.size) + { + need_fuse_write += task.size; + ++i; + } + else if (rc >= 0 && rc < task.size) + { + struct stat stbuf; + fstat(task.fd, &stbuf); + + Server->Log("Short read " + convert(i) + " (" + convert(rc) + "/" + convert(task.size) + ") while reading from file " + src_fn + + " at positition " + convert(task.off) + " block file size " + convert(task.file->Size()) + " non cached " + convert((int64)stbuf.st_size) + ". Continuing with zeros...", LL_WARNING); + + if (task.file->Size() >= task.off + task.size) + { + Server->Log("Short read offset below (cached) file size (see warning)", LL_ERROR); + abort(); + } + + submit = true; + + need_fuse_write += rc; + + size_t towrite = task.size - rc; + + while (towrite > 0) + { + io_uring_sqe* zero_sqe = io.get_sqe(); + io_uring_prep_splice(zero_sqe, get_file_fd(zero_file->getOsHandle()), + 0, fuse_io.pipe[1], -1, towrite, + SPLICE_F_MOVE | SPLICE_F_NONBLOCK); + zero_sqe->flags |= IOSQE_FIXED_FILE; + + int zero_rc = co_await io.complete(zero_sqe); + + if (zero_rc <= 0) + { + Server->Log("Error reading zeroes. Rc=" + convert(zero_rc), LL_ERROR); + ret = -1; + break; + } + else + { + towrite -= zero_rc; + need_fuse_write += zero_rc; + } + } + + if (ret == -1) + break; + + ++i; + continue; + } + else if (submit) + { + io_uring_sqe* sqe = io.get_sqe(); + + io_uring_prep_splice(sqe, task.fd, + task.off, fuse_io.pipe[1], -1, task.size, + SPLICE_F_MOVE | SPLICE_F_NONBLOCK); + sqe->flags |= IOSQE_FIXED_FILE; + + int submit_rc = co_await io.complete(sqe); + + if (submit_rc < 0) + { + Server->Log("Error submitting to pipe after short read rc=" + convert(submit_rc), LL_ERROR); + ret = -1; + break; + } + + need_fuse_write += submit_rc; + + if (submit_rc < task.size) + { + Server->Log("Short read " + convert(i) + " while recovering from previous short read (" + convert(submit_rc) + "/" + convert(task.size) + ") while reading from file " + src_fn + + " at positition " + convert(task.off) + " block file size " + convert(task.file->Size()) + ". Looping...", LL_WARNING); + rcs[task_rc_idx] = submit_rc; + continue; + } + + ++i; + } + else + { + assert(false); + ++i; + } + } + } + else if (last_rc < 0) + { + Server->Log("Read error errno=" + convert(last_rc * -1) + " write_fuse=" + convert(fuse_io.write_fuse), LL_ERROR); + ret = -1; + } + else if (fuse_io.write_fuse && + last_rc < out_header->len) + { + Server->Log("Last_rc smaller than size in header (" + convert(out_header->len) + "). Last_rc=" + convert(last_rc) + " write_fuse=" + convert(fuse_io.write_fuse), LL_ERROR); + ret = -1; + } + + if (need_fuse_write > 0 && + fuse_io.write_fuse) + { + io_uring_sqe* splice_sqe = io.get_sqe(); + io_uring_prep_splice(splice_sqe, fuse_io.pipe[0], + -1, io.fuse_ring.fd, -1, need_fuse_write, + SPLICE_F_FD_IN_FIXED | SPLICE_F_NONBLOCK); + splice_sqe->flags |= IOSQE_FIXED_FILE; + + int rc = co_await io.complete(splice_sqe); + + if (rc < 0 || rc != need_fuse_write) + { + Server->Log("Fuse write failed in ReadAsync error recovery. rc=" + convert(rc), LL_ERROR); + ret = -1; + } + } + + co_return ret; +} +#endif //HAS_ASYNC + +#ifdef HAS_ASYNC +fuse_io_context::io_uring_task_discard CloudFile::run_async_queue(fuse_io_context& io) +{ + IScopedLock lock(msg_queue_mutex.get()); + std::vector buf(sizeof(int64)); + + while (true) + { + if (msg_queue.empty()) + { + lock.relock(nullptr); + + io_uring_sqe* sqe = io.get_sqe(); + io_uring_prep_read(sqe, queue_in_eventfd, buf.data(), buf.size(), 0); + + int rc = co_await io.complete(sqe); + + if (rc != buf.size()) + { + Server->Log("Error reading from queue_in_eventfd. Rc=" + convert(rc), LL_ERROR); + } + + lock.relock(msg_queue_mutex.get()); + } + + if (msg_queue.empty()) + continue; + + std::vector msg = msg_queue.front(); + msg_queue.pop(); + + lock.relock(nullptr); + + CRData rdata(msg.data(), msg.size()); + + int64 id; + bool b = rdata.getVarInt(&id); + assert(b); + char cmd; + b = rdata.getChar(&cmd); + assert(b); + + CWData resp; + + if (cmd == queue_cmd_resize) + { + int64 new_size;; + b = rdata.getVarInt(&new_size); + assert(b); + + b = co_await ResizeAsync(io, new_size); + resp.addChar(b ? 1 : 0); + } + else if (cmd == queue_cmd_get_size) + { + int64 size = co_await SizeAsync(); + resp.addVarInt(size); + } + else if (cmd == queue_cmd_meminfo) + { + size_t lock_idx = co_await lock_extent_async(0, 0, false); + + resp.addVarInt(async_locked_extents.capacity()); + resp.addVarInt(bitmap->meminfo()); + resp.addVarInt(bitmap->get_max_cache_items()); + resp.addVarInt(big_blocks_bitmap->meminfo()); + resp.addVarInt(big_blocks_bitmap->get_max_cache_items()); + resp.addVarInt(old_big_blocks_bitmap->meminfo()); + resp.addVarInt(new_big_blocks_bitmap->meminfo()); + resp.addVarInt(fracture_big_blogs.size()); + resp.addVarInt(in_write_retrieval.size()); + + unlock_extent_async(0, 0, false, lock_idx); + } + else + { + assert(false); + } + + lock.relock(msg_queue_mutex.get()); + + msg_queue_responses[id].assign(resp.getDataPtr(), resp.getDataPtr()+resp.getDataSize()); + + msg_queue_cond->notify_all(); + } + + co_return 0; +} +#endif //HAS_ASYNC + +#ifdef HAS_ASYNC +bool CloudFile::get_async_msg(CWData& msg, CRData& resp) +{ + CWData int_msg; + + IScopedLock lock(msg_queue_mutex.get()); + + int64 id = msg_queue_id++; + + int_msg.addVarInt(id); + int_msg.addBuffer(msg.getDataPtr(), msg.getDataSize()); + + std::vector data; + data.assign(int_msg.getDataPtr(), int_msg.getDataPtr()+int_msg.getDataSize()); + + msg_queue.push(data); + + eventfd_write(queue_in_eventfd, 1); + + std::map >::iterator it; + while ((it=msg_queue_responses.find(id)) == msg_queue_responses.end()) + { + msg_queue_cond->wait(&lock); + } + + resp.set(it->second.data(), it->second.size(), true); + + msg_queue_responses.erase(it); + + return true; } #endif @@ -5967,2057 +5968,2057 @@ bool CloudFile::hasKey(const std::string& key) int64 CloudFile::get_transid() { return kv_store.get_transid(); -} - - -#ifdef HAS_ASYNC -fuse_io_context::io_uring_task CloudFile::preload_async(fuse_io_context& io, int64 start, int64 stop, size_t n_threads) -{ - const _u32 rsize = 4096; - auto ios = std::make_unique< std::vector > >(); - - for (size_t i = 0; i < n_threads; ++i) - { - //needs special handling for non-fixed pipe - abort(); - std::unique_ptr fuse_io = std::make_unique(); - int rc = pipe2(fuse_io->pipe, O_CLOEXEC | O_NONBLOCK); - if (rc != 0) - { - Server->Log("Error creating pipe for preload. " + os_last_error_str(), LL_ERROR); - co_return -1; - } - rc = fcntl(fuse_io->pipe[0], F_SETPIPE_SZ, rsize); - if (rc < 0) - { - Server->Log("Error setting pipe size to " + std::to_string(rsize) + " in preload_async. " + os_last_error_str(), LL_ERROR); - co_return -1; - } - fuse_io->write_fuse = false; - ios->push_back(std::move(fuse_io)); - } - - auto ios_waiters_head = std::make_unique(nullptr); - - while (start < stop - && start < cloudfile_size) - { - size_t lock_idx = co_await lock_extent_async(0, 0, false); - - bool has_block = co_await bitmap->get_async(io, start / block_size); - - if (!has_block) - { - start += block_size; - unlock_extent_async(0, 0, false, lock_idx); - continue; - } - - bool in_big_block = co_await big_blocks_bitmap->get_async(io, start / big_block_size); - - unlock_extent_async(0, 0, false, lock_idx); - - int64 curr_block_size; - if (in_big_block) - { - curr_block_size = big_block_size; - } - else - { - curr_block_size = small_block_size; - } - - int64 toread = (std::min)((curr_block_size - start % curr_block_size), stop - start); - - preload_async_read(io, *ios, *ios_waiters_head, start, rsize); - - start += toread; - } - - while (ios->size() != n_threads) - { - co_await io.sleep_ms(100); - } - - for (auto& it : *ios) - { - io_uring_sqe* sqe = io.get_sqe(); - io_uring_prep_close(sqe, it->pipe[0]); - io.submit(sqe); - - io_uring_sqe* sqe2 = io.get_sqe(); - io_uring_prep_close(sqe2, it->pipe[1]); - io.submit(sqe2); - } -} -#endif //HAS_ASYNC - -void CloudFile::cmd(const std::string & c) -{ - if (is_async) - abort(); - - str_map params; - ParseParamStrHttp(c, ¶ms); - - std::string action = params["a"]; - if (action == "preload") - { - auto it_start = params.find("start"); - auto it_stop = params.find("stop"); - auto it_n_threads = params.find("n_threads"); - - if (it_start == params.end()) - { - Server->Log("Insufficient arguments for preload. 'start' missing.", LL_WARNING); - return; - } - - if (it_stop == params.end()) - { - Server->Log("Insufficient arguments for preload. 'stop' missing.", LL_WARNING); - return; - } - - size_t n_threads = 8; - if (it_n_threads!=params.end()) - { - n_threads = watoi(it_n_threads->second); - } - - preload(watoi64(it_start->second), watoi64(it_stop->second), n_threads); - } - else if (action == "scrub_sync_test1") - { - KvStoreFrontend::start_scrub_sync_test1(); - } - else if (action == "scrub_sync_test2") - { - KvStoreFrontend::start_scrub_sync_test2(); - } - else if (action == "reset_disk_errors") - { - auto frontend = dynamic_cast(online_kv_store.get()); - if (frontend != nullptr) - { -#ifdef HAS_LOCAL_BACKEND - auto local = dynamic_cast(frontend->getBackend()); - if (local != nullptr) - { - if (!local->reset_disk_errors()) - { - Server->Log("Resetting disk error info failed", LL_WARNING); - } - } - else -#endif //HAS_LOCAL_BACKEND - { - Server->Log("Wrong kv store backend type (not KvStoreBackendLocal)", LL_WARNING); - } - } - else - { - Server->Log("Wrong kv store type (not KvStoreFrontend)", LL_WARNING); - } - } - else if (action == "dirty_all") - { - kv_store.dirty_all(); - } - else if (action == "retry_all_deletion") - { - auto frontend = dynamic_cast(online_kv_store.get()); - if (frontend != nullptr) - { - frontend->retry_all_deletion(); - } - } - else if (action == "disable_compression") - { - kv_store.disable_compression(watoi64(params["time"])); - } - else if (action == "disable_flush") - { - IScopedLock lock(mutex.get()); - ++flush_enabled; - } - else if (action == "enable_flush") - { - IScopedLock lock(mutex.get()); - if (flush_enabled > 0) - --flush_enabled; - } - else if (action == "preload_items") - { - auto it_n_threads = params.find("n_threads"); - - size_t n_threads = os_get_num_cpus()*3; - if (it_n_threads != params.end()) - { - n_threads = watoi(it_n_threads->second); - } - - int tag = watoi(params["tag"]); - bool enable_memfiles = params["enable_memfiles"] == "1"; - bool load_only = params["load_only"] == "1"; - - preload_items(params["fn"], n_threads, tag, !enable_memfiles, load_only); - } - else if (action == "preload_remove") - { - int tag = watoi(params["tag"]); - kv_store.remove_preload_items(tag); - } - else if (action == "set_loglevel") - { - int loglevel = LL_INFO; - if (params["loglevel"] == "debug") - loglevel = LL_DEBUG; - else if (params["loglevel"] == "warn") - loglevel = LL_WARNING; - else if (params["loglevel"] == "error") - loglevel = LL_ERROR; - else if (params["loglevel"] == "info") - loglevel = LL_INFO; - else - loglevel = watoi(params["loglevel"]); - - Server->setLogLevel(loglevel); - } - else if (action == "migrate") - { - if (migration_ticket != ILLEGAL_THREADPOOL_TICKET) - { - Server->Log("Waiting for existing migration to finish", LL_WARNING); - Server->getThreadPool()->waitFor(migration_ticket); - } - - std::string conf_fn = params["conf"]; - migration_ticket = Server->getThreadPool()->execute(new MigrationCoordThread(this, conf_fn, false), "migrate coord"); - } - else if (action == "stop_defrag") - { - auto frontend = dynamic_cast(online_kv_store.get()); - if (frontend != nullptr) - { - frontend->stop_defrag(); - } - } - else if (action == "disable_read_memfiles") - { - kv_store.set_disable_read_memfiles(true); - } - else if (action == "disable_write_memfiles") - { - kv_store.set_disable_write_memfiles(true); - } - else if (action == "enable_read_memfiles") - { - kv_store.set_disable_read_memfiles(false); - } - else if (action == "enable_write_memfiles") - { - kv_store.set_disable_write_memfiles(false); - } - else if (action == "set_background_throttle_limit") - { - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != nullptr) - { -#ifdef HAS_LOCAL_BACKEND - KvStoreBackendLocal* backend = dynamic_cast(frontend->getBackend()); - if (backend != NULL) - { - backend->set_background_throttle_limit(atof(params["limit"].c_str())); - } -#endif - } - } - else if (action == "set_all_mirrored") - { - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != nullptr) - { - frontend->set_all_mirrored(true); - } - } - else if (action == "set_all_unmirrored") - { - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != nullptr) - { - frontend->set_all_mirrored(false); - } - } - else if (action == "enable_raid_freespace_stats") - { - IScopedLock lock(mutex.get()); - enable_raid_freespace_stats = true; - } - else if (action == "disable_raid_freespace_stats") - { - IScopedLock lock(mutex.get()); - enable_raid_freespace_stats = false; - } - else if (action == "purge_jemalloc") - { - purge_jemalloc(); - } - else if (action == "set_jemalloc_dirty_decay_ms") - { - set_jemalloc_dirty_decay(watoi64(params["timems"])); - } - else - { - Server->Log("Unknown cloud file action '" + action + "'", LL_WARNING); - } -} - -#ifdef HAS_ASYNC -fuse_io_context::io_uring_task CloudFile::cmd_async(fuse_io_context& io, const std::string& c) -{ - str_map params; - ParseParamStrHttp(c, ¶ms); - - std::string action = params["a"]; - if (action == "preload") - { - auto it_start = params.find("start"); - auto it_stop = params.find("stop"); - auto it_n_threads = params.find("n_threads"); - - if (it_start == params.end()) - { - Server->Log("Insufficient arguments for preload. 'start' missing.", LL_WARNING); - co_return 0; - } - - if (it_stop == params.end()) - { - Server->Log("Insufficient arguments for preload. 'stop' missing.", LL_WARNING); - co_return 0; - } - - size_t n_threads = 8; - if (it_n_threads != params.end()) - { - n_threads = watoi(it_n_threads->second); - } - - co_await preload_async(io, watoi64(it_start->second), watoi64(it_stop->second), n_threads); - - co_return 0; - } - else if (action == "scrub_sync_test1") - { - co_return co_await io.run_in_threadpool([]() { - KvStoreFrontend::start_scrub_sync_test1(); - return 0; - }, "test1"); - } - else if (action == "scrub_sync_test2") - { - co_return co_await io.run_in_threadpool([]() { - KvStoreFrontend::start_scrub_sync_test2(); - return 0; - }, "test2"); - } - else if (action == "reset_disk_errors") - { - auto frontend = dynamic_cast(online_kv_store.get()); - if (frontend != nullptr) - { - auto local = dynamic_cast(frontend->getBackend()); - if (local != nullptr) - { - co_return co_await io.run_in_threadpool([local]() { - if (!local->reset_disk_errors()) - { - Server->Log("Resetting disk error info failed", LL_WARNING); - } - return 0; - }, "rest disk errors"); - } - else - { - Server->Log("Wrong kv store backend type (not KvStoreBackendLocal)", LL_WARNING); - } - } - else - { - Server->Log("Wrong kv store type (not KvStoreFrontend)", LL_WARNING); - } - } - else if (action == "dirty_all") - { - co_return co_await io.run_in_threadpool([this]() { - this->kv_store.dirty_all(); - return 0; - }, "dirty all"); - } - else if (action == "retry_all_deletion") - { - auto frontend = dynamic_cast(online_kv_store.get()); - if (frontend != nullptr) - { - co_return co_await io.run_in_threadpool([frontend]() { - frontend->retry_all_deletion(); - return 0; - }, "del retry all"); - } - } - else if (action == "disable_compression") - { - int64 timems = watoi64(params["time"]); - co_return co_await io.run_in_threadpool([this, timems]() { - this->kv_store.disable_compression(timems); - return 0; - }, "disable compression"); - } - else if (action == "disable_flush") - { - IScopedLock lock(mutex.get()); - ++flush_enabled; - } - else if (action == "enable_flush") - { - IScopedLock lock(mutex.get()); - if (flush_enabled > 0) - --flush_enabled; - } - else if (action == "preload_items") - { - auto it_n_threads = params.find("n_threads"); - - size_t n_threads = os_get_num_cpus() * 3; - if (it_n_threads != params.end()) - { - n_threads = watoi(it_n_threads->second); - } - - int tag = watoi(params["tag"]); - bool enable_memfiles = params["enable_memfiles"] == "1"; - bool load_only = params["load_only"] == "1"; - - co_return co_await preload_items_async(io, params["fn"], n_threads, tag, !enable_memfiles, load_only); - } - else if (action == "preload_remove") - { - int tag = watoi(params["tag"]); - co_return co_await io.run_in_threadpool([this, tag]() { - this->kv_store.remove_preload_items(tag); - return 0; - }, "remove preload items"); - } - else if (action == "set_loglevel") - { - int loglevel = LL_INFO; - if (params["loglevel"] == "debug") - loglevel = LL_DEBUG; - else if (params["loglevel"] == "warn") - loglevel = LL_WARNING; - else if (params["loglevel"] == "error") - loglevel = LL_ERROR; - else if (params["loglevel"] == "info") - loglevel = LL_INFO; - else - loglevel = watoi(params["loglevel"]); - - Server->setLogLevel(loglevel); - } - else if (action == "migrate") - { - abort(); - - if (migration_ticket != ILLEGAL_THREADPOOL_TICKET) - { - Server->Log("Waiting for existing migration to finish", LL_WARNING); - Server->getThreadPool()->waitFor(migration_ticket); - } - - std::string conf_fn = params["conf"]; - migration_ticket = Server->getThreadPool()->execute(new MigrationCoordThread(this, conf_fn, false), "migrate coord"); - } - else if (action == "stop_defrag") - { - auto frontend = dynamic_cast(online_kv_store.get()); - if (frontend != nullptr) - { - co_return co_await io.run_in_threadpool([frontend]() { - frontend->stop_defrag(); - return 0; - }, "stop defrag"); - } - } - else if (action == "disable_read_memfiles") - { - co_return co_await io.run_in_threadpool([this]() { - kv_store.set_disable_read_memfiles(true); - return 0; - }, "disable read memfiles"); - } - else if (action == "disable_write_memfiles") - { - co_return co_await io.run_in_threadpool([this]() { - kv_store.set_disable_write_memfiles(true); - return 0; - }, "disable_write_memfiles"); - } - else if (action == "enable_read_memfiles") - { - co_return co_await io.run_in_threadpool([this]() { - kv_store.set_disable_read_memfiles(false); - return 0; - }, "enable_read_memfiles"); - } - else if (action == "enable_write_memfiles") - { - co_return co_await io.run_in_threadpool([this]() { - kv_store.set_disable_write_memfiles(false); - return 0; - }, "enable_write_memfiles"); - } - else if (action == "set_background_throttle_limit") - { - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != NULL) - { - KvStoreBackendLocal* backend = dynamic_cast(frontend->getBackend()); - if (backend != NULL) - { - double limit = atof(params["limit"].c_str()); - co_return co_await io.run_in_threadpool([backend, limit]() { - backend->set_background_throttle_limit(limit); - return 0; - }, "set bg throttle limit"); - } - } - } - else if (action == "set_all_mirrored") - { - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != NULL) - { - co_return co_await io.run_in_threadpool([frontend]() { - frontend->set_all_mirrored(true); - return 0; - }, "set mirrored"); - } - } - else if (action == "set_all_unmirrored") - { - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != NULL) - { - co_return co_await io.run_in_threadpool([frontend]() { - frontend->set_all_mirrored(false); - return 0; - }, "set unmirrored"); - } - } - else if (action == "enable_raid_freespace_stats") - { - enable_raid_freespace_stats = true; - } - else if (action == "disable_raid_freespace_stats") - { - enable_raid_freespace_stats = false; - } - else if (action == "purge_jemalloc") - { - io.run_in_threadpool_nowait([this]() { - this->purge_jemalloc(); - }); - } - else if (action == "set_jemalloc_dirty_decay_ms") - { - int64 timems = watoi64(params["timems"]); - io.run_in_threadpool_nowait([this, timems]() { - this->set_jemalloc_dirty_decay(timems); - }); - } - else - { - Server->Log("Unknown cloud file action '" + action + "'", LL_WARNING); - } - - co_return 0; -} -#endif //HAS_ASYNC - -int64 CloudFile::key_offset_hex(const std::string & key) -{ - std::string bkey = hexToBytes(key); - -#ifndef NDEBUG - if (strlower(bytesToHex(bkey))!=strlower(key)) - { - Server->Log("Key is not hex", LL_ERROR); - return -1; - } -#endif - - return key_offset(bkey); -} - -int64 CloudFile::key_offset(const std::string& key) -{ - bool big_block; - return key_offset(key, big_block); -} - -int64 CloudFile::key_offset(const std::string & key, bool& big_block) -{ - if (key.empty()) - return -1; - - if (key[0] == 's') - { - big_block = false; - return block_key_rev(key)*small_block_size; - } - else if (key[0] == 'b') - { - big_block = true; - return block_key_rev(key)*big_block_size; - } - else - { - return -1; - } -} - -std::string CloudFile::meminfo() -{ - std::string ret; - - if (is_async) - { -#ifdef HAS_ASYNC - CWData wdata; - wdata.addChar(queue_cmd_meminfo); - CRData rdata; - get_async_msg(wdata, rdata); - - int64 locked_extents_info; - rdata.getVarInt(&locked_extents_info); - int64 bitmap_info; - rdata.getVarInt(&bitmap_info); - int64 bitmap_max; - rdata.getVarInt(&bitmap_max); - int64 big_blocks_bitmap_info; - rdata.getVarInt(&big_blocks_bitmap_info); - int64 big_blocks_bitmap_max; - rdata.getVarInt(&big_blocks_bitmap_max); - int64 old_big_blocks_bitmap_info; - rdata.getVarInt(&old_big_blocks_bitmap_info); - int64 new_big_blocks_bitmap_info; - rdata.getVarInt(&new_big_blocks_bitmap_info); - int64 fracture_big_blogs_info; - rdata.getVarInt(&fracture_big_blogs_info); - int64 in_write_retrieval_info; - rdata.getVarInt(&in_write_retrieval_info); - - ret += "##CloudFile:\n"; - ret += " locked_extents: " + convert(locked_extents_info) + " * " + PrettyPrintBytes(sizeof(SExtent)) + "\n"; - ret += " bitmap: " + PrettyPrintBytes(bitmap_info) + "/" + PrettyPrintBytes(bitmap_max * 4096) + "\n"; - ret += " big_blocks_bitmap: " + PrettyPrintBytes(big_blocks_bitmap_info) + "/" + PrettyPrintBytes(big_blocks_bitmap_max * 4096) + "\n"; - if (old_big_blocks_bitmap.get() != nullptr) - { - ret += " old_big_blocks_bitmap: " + PrettyPrintBytes(old_big_blocks_bitmap_info) + "\n"; - } - if (new_big_blocks_bitmap.get() != nullptr) - { - ret += " new_big_blocks_bitmap: " + PrettyPrintBytes(new_big_blocks_bitmap_info) + "\n"; - } - ret += " fracture_big_blogs: " + convert(fracture_big_blogs_info) + " * " + PrettyPrintBytes(sizeof(int64) * 2) + "\n"; - ret += " in_write_retrieval: " + convert(in_write_retrieval_info) + " * " + PrettyPrintBytes(sizeof(std::string)) + "\n"; -#else - assert(false); -#endif - } - else - { - IScopedLock lock(mutex.get()); - - ret += "##CloudFile:\n"; - ret += " locked_extents: " + convert(locked_extents.capacity()) + " * " + PrettyPrintBytes(sizeof(SExtent))+"\n"; - ret += " bitmap: " + PrettyPrintBytes(bitmap->meminfo())+"/"+PrettyPrintBytes(bitmap->get_max_cache_items()*4096)+ "\n"; - ret += " big_blocks_bitmap: " + PrettyPrintBytes(big_blocks_bitmap->meminfo()) +"/"+PrettyPrintBytes(big_blocks_bitmap->get_max_cache_items()*4096)+ "\n"; - if (old_big_blocks_bitmap.get() != nullptr) - { - ret += " old_big_blocks_bitmap: " + PrettyPrintBytes(old_big_blocks_bitmap->meminfo()) + "\n"; - } - if (new_big_blocks_bitmap.get() != nullptr) - { - ret += " new_big_blocks_bitmap: " + PrettyPrintBytes(new_big_blocks_bitmap->meminfo()) + "\n"; - } - ret += " fracture_big_blogs: " + convert(fracture_big_blogs.size()) + " * " + PrettyPrintBytes(sizeof(int64) * 2) + "\n"; - ret += " in_write_retrieval: " + convert(in_write_retrieval.size()) + " * " + PrettyPrintBytes(sizeof(std::string)) + "\n"; - } - - { - IScopedReadLock lock(chunks_mutex.get()); - ret += " fs_chunks: " + convert(fs_chunks.capacity()) + " * " + PrettyPrintBytes(sizeof(IBackupFileSystem::SChunk)) + " = "+PrettyPrintBytes(sizeof(IBackupFileSystem::SChunk)*fs_chunks.capacity())+"\n"; - } - - ret += kv_store.meminfo(); - - return ret; -} - -void CloudFile::shrink_mem() -{ - if (is_async) - abort(); - - { - IScopedLock lock(mutex.get()); - if(bitmap.get()!=nullptr) - bitmap->flush(); - if (big_blocks_bitmap.get() != nullptr) - big_blocks_bitmap->flush(); - } - - kv_store.shrink_mem(); -} - -#ifdef HAS_ASYNC -fuse_io_context::io_uring_task CloudFile::shrink_mem_async(fuse_io_context& io) -{ - Server->Log("Shrinking memory (async)", LL_WARNING); - - size_t lock_idx = co_await lock_extent_async(0, 0, false); - - if (bitmap.get() != nullptr) - co_await bitmap->flush_async(io); - if (big_blocks_bitmap.get() != nullptr) - co_await big_blocks_bitmap->flush_async(io); - - unlock_extent_async(0, 0, false, lock_idx); - - co_await io.run_in_threadpool([this]() { - this->kv_store.shrink_mem(); - return 0; - }, "shrink kvstore mem"); - - co_return 0; -} -#endif - -int64 CloudFile::get_total_hits() -{ - return kv_store.get_total_hits(); -} - -int64 CloudFile::get_total_hits_async() -{ - return kv_store.get_total_hits_async(); -} - -int64 CloudFile::get_total_memory_hits() -{ - return kv_store.get_total_memory_hits(); -} - -int64 CloudFile::get_total_memory_hits_async() -{ - return kv_store.get_total_memory_hits_async(); -} - -int64 CloudFile::get_total_cache_miss_backend() -{ - return kv_store.get_total_cache_miss_backend(); -} - -int64 CloudFile::get_total_cache_miss_decompress() -{ - return kv_store.get_total_cache_miss_decompress(); -} - -int64 CloudFile::get_total_dirty_ops() -{ - return kv_store.get_total_dirty_ops(); -} - -int64 CloudFile::get_total_balance_ops() -{ - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != nullptr) - { - return frontend->get_total_balance_ops(); - } - return -1; -} - -int64 CloudFile::get_total_del_ops() -{ - KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); - if (frontend != nullptr) - { - return frontend->get_total_del_ops(); - } - return -1; -} - -int64 CloudFile::get_total_put_ops() -{ - return kv_store.get_total_put_ops(); -} - -int64 CloudFile::get_total_compress_ops() -{ - return kv_store.get_total_compress_ops(); -} - -bool CloudFile::Flush(bool do_submit, bool for_slog) -{ - if (is_async) - abort(); - - if (!bdev_name.empty()) - { - Server->Log("Switching bcache device " + bdev_name + " to writethrough mode...", LL_INFO); - - { - IScopedLock lock(mutex.get()); - - if (writeback_count == 0) - { - std::unique_ptr writeback_percent_f(Server->openFile("/sys/block/" + bdev_name + "/bcache/writeback_percent", MODE_READ)); - bcache_writeback_percent = trim(writeback_percent_f->Read(512)); - } - - ++writeback_count; - } - - doublefork_writestring("writethrough", "/sys/block/" + bdev_name + "/bcache/cache_mode"); - - //disables writeback throttling - writestring("0", "/sys/block/" + bdev_name + "/bcache/writeback_percent"); - - while (true) - { - Server->wait(100); - std::unique_ptr state_f(Server->openFile("/sys/block/" + bdev_name + "/bcache/state", MODE_READ)); - std::string state = trim(state_f->Read(512)); - - if (!state.empty() && state != "dirty") - { - Server->Log("Bcache device is in state '"+state+"'", LL_INFO); - break; - } - } - -#ifndef _WIN32 - /* - This makes Linux deadlock in some cases. Not sure if even necessary... - if (!bdev_name.empty()) - { - Server->Log("Flushing /dev/" + bdev_name + "..."); - flush_dev("/dev/" + bdev_name); - } - - - if (!ldev_name.empty()) - { - Server->Log("Flushing /dev/" + ldev_name + "..."); - flush_dev("/dev/" + ldev_name); - }*/ -#endif - } - - IScopedLock lock(mutex.get()); - - is_flushing = true; - - Auto(is_flushing = false); - - while (flush_enabled>0) - { - lock.relock(nullptr); - Server->wait(1000); - lock.relock(mutex.get()); - } - - int64 orig_cloudfile_size = cloudfile_size; - ++wait_for_exclusive; - lock_extent(lock, 0, orig_cloudfile_size, true); - --wait_for_exclusive; - - if (for_slog - && slog.get() != nullptr - && slog->Size() < slog_max_size) - { - return true; - } - - bool ret = true; - - IScopedLock migrate_lock(nullptr); - if (migrate_to_cf != nullptr) - { - migrate_lock.relock(migrate_to_cf->mutex.get()); - ++migrate_to_cf->wait_for_exclusive; - migrate_to_cf->lock_extent(migrate_lock, 0, orig_cloudfile_size, true); - --migrate_to_cf->wait_for_exclusive; - - if (!update_migration_settings()) - { - Server->Log("Error updating migration settings", LL_ERROR); - migrate_to_cf->unlock_extent(migrate_lock, 0, orig_cloudfile_size, true); - unlock_extent(lock, 0, orig_cloudfile_size, true); - return false; - } - - if (!migrate_to_cf->close_bitmaps()) - { - migrate_to_cf->unlock_extent(migrate_lock, 0, orig_cloudfile_size, true); - unlock_extent(lock, 0, orig_cloudfile_size, true); - return false; - } - - size_t retry_n = 0; - while (migrate_to_cf->kv_store.checkpoint(do_submit, retry_n)) - { - Server->Log("Error checkpointing migrate_to KVStore. Retrying...", LL_WARNING); - retryWait(retry_n++); - } - - try - { - migrate_to_cf->open_bitmaps(); - } - catch (const std::runtime_error& e) - { - Server->Log(std::string("Error while opening migrate_to bitmaps: ") + e.what(), LL_ERROR); - ret = false; - } - } - - if(!close_bitmaps()) - { - if (migrate_to_cf != nullptr) - { - migrate_to_cf->unlock_extent(migrate_lock, 0, orig_cloudfile_size, true); - } - unlock_extent(lock, 0, orig_cloudfile_size, true); - if (!bdev_name.empty()) - { - Server->Log("Switching bcache device " + bdev_name + " to writeback mode...", LL_INFO); - - --writeback_count; - - if (writeback_count == 0) - { - doublefork_writestring("writeback", "/sys/block/" + bdev_name + "/bcache/cache_mode"); - } - } - return false; - } - - size_t retry_n=0; - while(!kv_store.checkpoint(do_submit, retry_n)) - { - Server->Log("Error checkpointing KVStore. Retrying...", LL_WARNING); - retryWait(retry_n++); - } - - try - { - open_bitmaps(); - } - catch(const std::runtime_error& e) - { - Server->Log(std::string("Error while opening bitmaps: ") + e.what(), LL_ERROR); - ret = false; - } - - if (migrate_to_cf != nullptr) - { - migrate_to_cf->unlock_extent(migrate_lock, 0, orig_cloudfile_size, true); - } - - unlock_extent(lock, 0, orig_cloudfile_size, true); - - if (!bdev_name.empty()) - { - Server->Log("Switching bcache device " + bdev_name + " to writeback mode...", LL_INFO); - - --writeback_count; - - if (writeback_count == 0) - { - lock.relock(nullptr); - - doublefork_writestring("writeback", "/sys/block/" + bdev_name + "/bcache/cache_mode"); - - writestring(bcache_writeback_percent, "/sys/block/" + bdev_name + "/bcache/writeback_percent"); - } - } - - if (!slog_open()) - { - ret = false; - } - - return ret; -} - -#ifdef HAS_ASYNC -fuse_io_context::io_uring_task CloudFile::FlushAsync(fuse_io_context& io, bool do_submit) -{ - if (!bdev_name.empty()) - { - Server->Log("Switching bcache device " + bdev_name + " to writethrough mode...", LL_INFO); - - { - IScopedLock lock(mutex.get()); - - if (writeback_count == 0) - { - std::unique_ptr writeback_percent_f(Server->openFile("/sys/block/" + bdev_name + "/bcache/writeback_percent", MODE_READ)); - bcache_writeback_percent = trim(writeback_percent_f->Read(512)); - } - - ++writeback_count; - } - - doublefork_writestring("writethrough", "/sys/block/" + bdev_name + "/bcache/cache_mode"); - - //disables writeback throttling - writestring("0", "/sys/block/" + bdev_name + "/bcache/writeback_percent"); - - while (true) - { - Server->wait(100); - std::unique_ptr state_f(Server->openFile("/sys/block/" + bdev_name + "/bcache/state", MODE_READ)); - std::string state = trim(state_f->Read(512)); - - if (!state.empty() && state != "dirty") - { - Server->Log("Bcache device is in state '" + state + "'", LL_INFO); - break; - } - } - -#ifndef _WIN32 - /* - This makes Linux deadlock in some cases. Not sure if even necessary... - if (!bdev_name.empty()) - { - Server->Log("Flushing /dev/" + bdev_name + "..."); - flush_dev("/dev/" + bdev_name); - } - - - if (!ldev_name.empty()) - { - Server->Log("Flushing /dev/" + ldev_name + "..."); - flush_dev("/dev/" + ldev_name); - }*/ -#endif - } - - is_flushing = true; - Auto(is_flushing = false); - - while (flush_enabled > 0) - { - co_await io.sleep_ms(1000); - } - - int64 orig_cloudfile_size = cloudfile_size; - ++wait_for_exclusive; - size_t lock_idx = co_await lock_extent_async(0, orig_cloudfile_size, true); - --wait_for_exclusive; - resume_exclusive_awaiters(); - - bool ret = true; - - size_t migrate_lock_idx; - if (migrate_to_cf != nullptr) - { - ++migrate_to_cf->wait_for_exclusive; - migrate_lock_idx = co_await migrate_to_cf->lock_extent_async(0, orig_cloudfile_size, true); - --migrate_to_cf->wait_for_exclusive; - migrate_to_cf->resume_exclusive_awaiters(); - - if (!update_migration_settings()) - { - Server->Log("Error updating migration settings", LL_ERROR); - migrate_to_cf->unlock_extent_async(0, orig_cloudfile_size, true, migrate_lock_idx); - unlock_extent_async(0, orig_cloudfile_size, true, lock_idx); - co_return false; - } - - if (!migrate_to_cf->close_bitmaps()) - { - migrate_to_cf->unlock_extent_async(0, orig_cloudfile_size, true, migrate_lock_idx); - unlock_extent_async(0, orig_cloudfile_size, true, lock_idx); - co_return false; - } - - size_t retry_n = 0; - while (migrate_to_cf->kv_store.checkpoint(do_submit, retry_n)) - { - Server->Log("Error checkpointing migrate_to KVStore. Retrying...", LL_WARNING); - retryWait(retry_n++); - } - - try - { - migrate_to_cf->open_bitmaps(); - } - catch (const std::runtime_error& e) - { - Server->Log(std::string("Error while opening migrate_to bitmaps: ") + e.what(), LL_ERROR); - ret = false; - } - } - - if (!close_bitmaps()) - { - unlock_extent_async(0, orig_cloudfile_size, true, lock_idx); - if (!bdev_name.empty()) - { - Server->Log("Switching bcache device " + bdev_name + " to writeback mode...", LL_INFO); - - --writeback_count; - - if (writeback_count == 0) - { - doublefork_writestring("writeback", "/sys/block/" + bdev_name + "/bcache/cache_mode"); - } - } - co_return false; - } - - bool b = co_await io.run_in_threadpool([this, do_submit]() { - size_t retry_n = 0; - while (!this->kv_store.checkpoint(do_submit, retry_n)) - { - Server->Log("Error checkpointing KVStore. Retrying...", LL_WARNING); - retryWait(retry_n++); - } - - try - { - this->open_bitmaps(); - } - catch (const std::runtime_error& e) - { - Server->Log(std::string("Error while opening bitmaps: ") + e.what(), LL_ERROR); - return false; - } - return true; - - }, "kv checkpoint"); - - if (!b) - ret = false; - - - if (migrate_to_cf != nullptr) - { - migrate_to_cf->unlock_extent_async(0, orig_cloudfile_size, true, migrate_lock_idx); - } - - unlock_extent_async(0, orig_cloudfile_size, true, lock_idx); - - if (!bdev_name.empty()) - { - Server->Log("Switching bcache device " + bdev_name + " to writeback mode...", LL_INFO); - - --writeback_count; - - if (writeback_count == 0) - { - doublefork_writestring("writeback", "/sys/block/" + bdev_name + "/bcache/cache_mode"); - - writestring(bcache_writeback_percent, "/sys/block/" + bdev_name + "/bcache/writeback_percent"); - } - } - - if (!slog_open()) - { - ret = false; - } - - co_return ret; -} -#endif //HAS_ASYNC - -bool CloudFile::slog_open() -{ - if (slog.get() == nullptr) - return true; - - slog.reset(); - Server->deleteFile(slog_path); - - syncDir(ExtractFilePath(slog_path, os_file_sep())); - - slog.reset(Server->openFile(slog_path, MODE_WRITE)); - - if (slog.get() == nullptr) - { - Server->Log("Error opening slog file at " + slog_path + ". " + os_last_error_str(), LL_ERROR); - return false; - } - - if (slog->Size() != 0) - return false; - - if (slog->Write(slog_magic) != slog_magic.size()) - { - Server->Log("Error writing slog magic. " + os_last_error_str(), LL_ERROR); - return false; - } - - int64 transid = kv_store.get_basetransid(); - - if (slog->Write(reinterpret_cast(&transid), sizeof(transid)) != sizeof(transid)) - { - Server->Log("Error writing transid to slog. " + os_last_error_str(), LL_ERROR); - return false; - } - - slog->Sync(); - - slog_size = slog_magic.size() + sizeof(transid); - slog_last_sync = slog_magic.size() + sizeof(transid);; - - return true; -} - -void CloudFile::purge_jemalloc() -{ -#if !defined(NO_JEMALLOC) && !defined(_WIN32) - unsigned narenas = 0; - size_t sz = sizeof(unsigned); - if (!mallctl("arenas.narenas", &narenas, &sz, NULL, 0)) - { - std::string purge_cmd = "arena."+convert(narenas)+".purge"; - if(mallctl(purge_cmd.c_str(), NULL, 0, NULL, 0)) - { - Server->Log("Purging jemalloc arenas failed", LL_WARNING); - } - } -#endif -} - -void CloudFile::set_jemalloc_dirty_decay(int64 timems) -{ -#if !defined(NO_JEMALLOC) && !defined(_WIN32) - ssize_t dirty_decay_old; - ssize_t dirty_decay_new = timems; - size_t sz = sizeof(dirty_decay_old); - if (!mallctl("arenas.dirty_decay_ms", &dirty_decay_old, &sz, &dirty_decay_new, sizeof(dirty_decay_new))) - { - Server->Log("Setting dirty decay ms failed", LL_WARNING); - } -#endif -} - -bool CloudFile::Flush() -{ - return Flush(true); -} - - -void CloudFile::open_bitmaps() -{ - bitmaps_file_size = 0; - - big_blocks_bitmap_file = kv_store.get("big_blocks_bitmap", TransactionalKvStore::BitmapInfo::Unknown, - TransactionalKvStore::Flag::disable_fd_cache|TransactionalKvStore::Flag::disable_throttling| - TransactionalKvStore::Flag::disable_memfiles, -1); - - if(big_blocks_bitmap_file==nullptr) - { - throw std::runtime_error("Cannot open big_blocks_bitmap"); - } - - bitmaps_file_size += big_blocks_bitmap_file->Size(); - - if(new_big_blocks_bitmap_file!=nullptr) - { - std::string fn = new_big_blocks_bitmap_file->getFilename(); - Server->destroy(new_big_blocks_bitmap_file); - Server->deleteFile(fn); - } - - new_big_blocks_bitmap_file = Server->openTemporaryFile(); - - if(new_big_blocks_bitmap_file==nullptr) - { - throw std::runtime_error("Cannot open new_big_blocks_bitmap"); - } - - old_big_blocks_bitmap_file = kv_store.get("old_big_blocks_bitmap", TransactionalKvStore::BitmapInfo::Unknown, - TransactionalKvStore::Flag::disable_fd_cache | TransactionalKvStore::Flag::disable_throttling | - TransactionalKvStore::Flag::disable_memfiles, -1); - - if(old_big_blocks_bitmap_file==nullptr) - { - throw std::runtime_error("Cannot open old_big_blocks_bitmap"); - } - - bitmaps_file_size += old_big_blocks_bitmap_file->Size(); - - int64 big_blocks = div_up(cloudfile_size, big_block_size); - - size_t bitmap_cache_items = static_cast((70 * 1024 * 1024*memory_usage_factor) / bitmap_block_size); - - size_t big_blocks_bitmap_cache_items = (std::max)(static_cast(bitmap_cache_items / (big_block_size / block_size)), - static_cast(100)); - - big_blocks_bitmap.reset(new SparseFileBitmap(big_blocks_bitmap_file, big_blocks, true, big_blocks_bitmap_cache_items)); - - new_big_blocks_bitmap.reset(new FileBitmap(new_big_blocks_bitmap_file, big_blocks, false)); - old_big_blocks_bitmap.reset(new FileBitmap(old_big_blocks_bitmap_file, big_blocks, false)); - - bitmap_file = kv_store.get("bitmap", TransactionalKvStore::BitmapInfo::Unknown, - TransactionalKvStore::Flag::disable_fd_cache | TransactionalKvStore::Flag::disable_throttling| - TransactionalKvStore::Flag::disable_memfiles, -1); - - if(bitmap_file==nullptr) - { - throw std::runtime_error("Cannot open bitmap"); - } - - bitmaps_file_size += bitmap_file->Size(); - - int64 bitmap_blocks = div_up(div_up(cloudfile_size, big_block_size)*big_block_size, block_size); - - bitmap_cache_items = (std::max)(bitmap_cache_items, static_cast(100)); - - bitmap.reset(new SparseFileBitmap(bitmap_file, bitmap_blocks, false, bitmap_cache_items)); - - if (used_bytes == 0) - { - used_bytes = bitmap->count_bits()*block_size; - bitmap->clear_cache_no_flush(); - } - - Server->Log("Cloud drive size: " + PrettyPrintBytes(used_bytes)); -} - -bool CloudFile::bitmap_has_big_block( int64 blocknum ) -{ - return bitmap->get_range((blocknum*big_block_size)/block_size, ((blocknum+1)*big_block_size)/block_size); -} - -bool CloudFile::bitmap_has_small_block( int64 blocknum ) -{ - return bitmap->get_range((blocknum*small_block_size)/block_size, ((blocknum+1)*small_block_size)/block_size); -} - -#ifdef HAS_ASYNC -fuse_io_context::io_uring_task CloudFile::bitmap_has_big_block_async(fuse_io_context& io, int64 blocknum) -{ - co_return co_await bitmap->get_range_async(io, (blocknum * big_block_size) / block_size, ((blocknum + 1) * big_block_size) / block_size); -} - -fuse_io_context::io_uring_task CloudFile::bitmap_has_small_block_async(fuse_io_context& io, int64 blocknum) -{ - co_return co_await bitmap->get_range_async(io, (blocknum * small_block_size) / block_size, ((blocknum + 1) * small_block_size) / block_size); -} -#endif - -bool CloudFile::Sync() -{ - if (!slog_path.empty()) - { - IScopedLock lock(mutex.get()); - lock_extent(lock, 1, 0, false); - bool ret; - if (slog.get() == nullptr) - { - ret = false; - } - else - { - ret = slog->Sync(); - } - unlock_extent(lock, 1, 0, false); - return ret; - } - return true; -} - -int64 CloudFile::getDirtyBytes() -{ - return kv_store.get_dirty_bytes() - bitmaps_file_size; -} - -int64 CloudFile::getSubmittedBytes() -{ - return kv_store.get_submitted_bytes(); -} - -int64 CloudFile::getTotalSubmittedBytes() -{ - return kv_store.get_total_submitted_bytes(); -} - -int64 CloudFile::getUsedBytes() -{ - IScopedLock lock(mutex.get()); - - return used_bytes; -} - -std::string CloudFile::getNumDirtyItems() -{ - std::string dirty_items; - std::map di = kv_store.get_num_dirty_items(); - for(std::map::iterator it=di.begin(); - it!=di.end();++it) - { - dirty_items+=convert(it->first)+": "+convert(it->second)+"\n"; - } - - return dirty_items; -} - -std::string CloudFile::getNumMemfileItems() -{ - std::string memfile_items; - std::map di = kv_store.get_num_memfile_items(); - for (std::map::iterator it = di.begin(); - it != di.end(); ++it) - { - memfile_items += convert(it->first) + ": " + convert(it->second) + "\n"; - } - - return memfile_items; -} - -int64 CloudFile::getCacheSize() -{ - return kv_store.get_cache_size(); -} - -void CloudFile::Reset() -{ - if (is_async) - abort(); - - IScopedLock lock(mutex.get()); - - close_bitmaps(); - - kv_store.reset(); - - open_bitmaps(); -} - -bool CloudFile::close_bitmaps() -{ - IScopedLock lock(mutex.get()); - - if(!big_blocks_bitmap->flush()) - { - return false; - } - - if(!bitmap->flush()) - { - return false; - } - - for(int64 i=0;isize();++i) - { - if(new_big_blocks_bitmap->get(i)) - { - old_big_blocks_bitmap->set(i, true); - } - } - - if(!old_big_blocks_bitmap->flush()) - { - return false; - } - - big_blocks_bitmap.reset(); - bitmap.reset(); - old_big_blocks_bitmap.reset(); - - kv_store.release("big_blocks_bitmap"); - kv_store.release("bitmap"); - kv_store.release("old_big_blocks_bitmap"); - - return true; -} - -void CloudFile::lock_extent(IScopedLock& lock, int64 start, int64 length, bool exclusive) -{ - if (is_async) - abort(); - - while (!exclusive - && wait_for_exclusive>0) - { - lock.relock(nullptr); - Server->wait(1); - lock.relock(mutex.get()); - } - - size_t first_dead; - bool retry=true; - while(retry) - { - retry=false; - first_dead = std::string::npos; - for(size_t i=0;i=extent.start+extent.length) - || (start>=extent.start && startextent.start && start+length<=extent.start+extent.length) ) - { - if(extent.cond!=nullptr) - { - retry=true; - ++extent.refcount; - ICondition* cond = extent.cond; - cond->wait(&lock); - - for (size_t j = 0; j < locked_extents.size(); ++j) - { - if (locked_extents[j].cond == cond) - { - --locked_extents[j].refcount; - if (locked_extents[j].refcount <= 0) - { - assert(!locked_extents[j].alive); - Server->destroy(locked_extents[j].cond); - locked_extents[j].cond = nullptr; - if (j == locked_extents_max_alive - && locked_extents_max_alive>0) - { - --locked_extents_max_alive; - while (locked_extents_max_alive > 0 - && !locked_extents[locked_extents_max_alive].alive - && locked_extents[locked_extents_max_alive].refcount <= 0) - { - --locked_extents_max_alive; - } - } - } - break; - } - } - break; - } - else if(exclusive) - { - retry=true; - lock.relock(nullptr); - Server->wait(1); - lock.relock(mutex.get()); - break; - } - } - } - } - - if (first_dead == std::string::npos) - { - for (size_t i = locked_extents_max_alive+1; - i < locked_extents.size(); ++i) - { - if (!locked_extents[i].alive - && locked_extents[i].refcount <= 0) - { - first_dead = i; - break; - } - } - } - - if (first_dead != std::string::npos) - { - SExtent& extent = locked_extents[first_dead]; - extent.start = start; - extent.length = length; - if (exclusive) - { - extent.cond = Server->createCondition(); - } - else - { - extent.cond = nullptr; - } - extent.refcount = 1; - extent.alive = true; - - if (first_dead > locked_extents_max_alive) - { - locked_extents_max_alive = first_dead; - } - return; - } - - SExtent new_extent; - new_extent.start = start; - new_extent.length = length; - if(exclusive) - { - new_extent.cond = Server->createCondition(); - } - else - { - new_extent.cond = nullptr; - } - new_extent.refcount = 1; - new_extent.alive = true; - - locked_extents.push_back(new_extent); - locked_extents_max_alive = locked_extents.size() - 1; -} - -void CloudFile::unlock_extent(IScopedLock& lock, int64 start, int64 length, bool exclusive) -{ - if (is_async) - abort(); - - for(size_t i=0;idestroy(extent.cond); - extent.cond = nullptr; - extent.alive = false; - if (i == locked_extents_max_alive - && locked_extents_max_alive>0) - { - --locked_extents_max_alive; - while (locked_extents_max_alive > 0 - && !locked_extents[locked_extents_max_alive].alive - && locked_extents[locked_extents_max_alive].refcount <= 0) - { - --locked_extents_max_alive; - } - } - } - else if (extent.cond != nullptr) - { - extent.cond->notify_all(); - } - - break; - } - } -} - -#ifdef HAS_ASYNC -fuse_io_context::io_uring_task CloudFile::lock_extent_async(int64 start, int64 length, bool exclusive) -{ - while (!exclusive - && wait_for_exclusive>0) - { - co_await ExclusiveAwaiter(*this); - } - - size_t first_dead; - bool retry=true; - while(retry) - { - retry=false; - first_dead = std::string::npos; - for(size_t i=0;i=extent.start+extent.length) - || (start>=extent.start && startextent.start && start+length<=extent.start+extent.length) ) - { - if(extent.exclusive || exclusive) - { - retry=true; - co_await ExtentAwaiter(extent); - break; - } - } - } - } - - if (first_dead == std::string::npos) - { - for (size_t i = locked_extents_max_alive+1; - i < async_locked_extents.size(); ++i) - { - if (async_locked_extents[i].refcount <= 0) - { - first_dead = i; - break; - } - } - } - - if (first_dead != std::string::npos) - { - SExtentAsync& extent = async_locked_extents[first_dead]; - assert(extent.awaiters == nullptr - && extent.refcount<=0); - extent.start = start; - extent.length = length; - extent.exclusive = exclusive; - extent.refcount = 1; - - if (first_dead > locked_extents_max_alive) - { - locked_extents_max_alive = first_dead; - } - co_return first_dead; - } - - SExtentAsync new_extent; - new_extent.start = start; - new_extent.length = length; - new_extent.exclusive = exclusive; - new_extent.refcount = 1; - new_extent.awaiters = nullptr; - - async_locked_extents.push_back(new_extent); - locked_extents_max_alive = async_locked_extents.size() - 1; - co_return async_locked_extents.size() - 1; -} -#endif //HAS_ASYNC - -#ifdef HAS_ASYNC -void CloudFile::unlock_extent_async(int64 start, int64 length, bool exclusive, size_t lock_idx) -{ - SExtentAsync& extent = async_locked_extents[lock_idx]; - assert(extent.start == start); - assert(extent.length == length); - assert(!exclusive || extent.exclusive); - - --extent.refcount; - assert(extent.refcount >= 0); - if(extent.refcount<=0) - { - extent.exclusive = false; - if (lock_idx == locked_extents_max_alive - && locked_extents_max_alive>0) - { - --locked_extents_max_alive; - while (locked_extents_max_alive > 0 - && async_locked_extents[locked_extents_max_alive].refcount <= 0) - { - --locked_extents_max_alive; - } - } - - AwaiterCoList* curr_awaiters = extent.awaiters; - extent.awaiters = nullptr; - while (curr_awaiters != nullptr) - { - AwaiterCoList* next = curr_awaiters->next; - curr_awaiters->awaiter.resume(); - curr_awaiters = next; - } - } -} -#endif //HAS_ASYNC - -void CloudFile::add_fracture_big_block(int64 b) -{ - if (fracture_big_blogs.size() > 100) - { - return; - } - - auto it = fracture_big_blogs.find(b); - if (it != fracture_big_blogs.end()) - { - return; - } - - fracture_big_blogs[b] = Server->getTimeMS()+60*1000; -} - -bool CloudFile::Resize(int64 nsize) -{ - if (is_async) - { -#ifdef HAS_ASYNC - CWData wdata; - wdata.addChar(queue_cmd_resize); - wdata.addVarInt(nsize); - - CRData rdata; - get_async_msg(wdata, rdata); - - char ch; - bool b = rdata.getChar(&ch); - assert(b); - return ch==1; -#else - assert(false); -#endif - } - - IScopedLock lock(mutex.get()); - - int64 orig_cloudfile_size = cloudfile_size; - ++wait_for_exclusive; - lock_extent(lock, 0, cloudfile_size, true); - --wait_for_exclusive; - - if (!close_bitmaps()) - { - unlock_extent(lock, 0, cloudfile_size, true); - return false; - } - bool ret = true; - - IFile* f_cloudfile_size = kv_store.get("cloudfile_size", TransactionalKvStore::BitmapInfo::Unknown, - TransactionalKvStore::Flag::disable_fd_cache | TransactionalKvStore::Flag::disable_throttling, -1); - - if (f_cloudfile_size == nullptr) - { - Server->Log("Cannot open cloudfile_size", LL_ERROR); - unlock_extent(lock, 0, orig_cloudfile_size, true); - return false; - } - - cloudfile_size = nsize; - - if (f_cloudfile_size->Write(0, reinterpret_cast(&cloudfile_size), sizeof(cloudfile_size)) != sizeof(cloudfile_size)) - { - Server->Log("Error writing new cloudfile size. " + os_last_error_str(), LL_ERROR); - ret = false; - } - - kv_store.release("cloudfile_size"); - - try - { - open_bitmaps(); - } - catch (const std::runtime_error&) - { - ret = false; - } - - unlock_extent(lock, 0, orig_cloudfile_size, true); - - return ret; -} - -#ifdef HAS_ASYNC -fuse_io_context::io_uring_task CloudFile::ResizeAsync(fuse_io_context& io, int64 nsize) -{ - int64 orig_cloudfile_size = cloudfile_size; - ++wait_for_exclusive; - size_t lock_idx = co_await lock_extent_async(0, cloudfile_size, true); - --wait_for_exclusive; - resume_exclusive_awaiters(); - - if (!close_bitmaps()) - { - unlock_extent_async(0, cloudfile_size, true, lock_idx); - co_return false; - } - bool ret = true; - - std::string cloudfile_size_str("cloudfile_size"); - - IFile* f_cloudfile_size = co_await kv_store.get_async(io, cloudfile_size_str, TransactionalKvStore::BitmapInfo::Unknown, - TransactionalKvStore::Flag::disable_fd_cache | TransactionalKvStore::Flag::disable_throttling, -1); - - if (f_cloudfile_size == NULL) - { - Server->Log("Cannot open cloudfile_size", LL_ERROR); - unlock_extent_async(0, orig_cloudfile_size, true, lock_idx); - co_return false; - } - - cloudfile_size = nsize; - - if (f_cloudfile_size->Write(0, reinterpret_cast(&cloudfile_size), sizeof(cloudfile_size)) != sizeof(cloudfile_size)) - { - Server->Log("Error writing new cloudfile size. " + os_last_error_str(), LL_ERROR); - ret = false; - } - - co_await kv_store.release_async(io, cloudfile_size_str); - - bool b = co_await io.run_in_threadpool([this]() { - try - { - this->open_bitmaps(); - } - catch (const std::runtime_error&) - { - return false; - } - return true; - }, "resize open bm"); - - if (!b) - ret = false; - - unlock_extent_async(0, orig_cloudfile_size, true, lock_idx); - - co_return ret; -} -#endif //HAS_ASYNC - -std::string CloudFile::getStats() -{ - return online_kv_store->get_stats(); -} - -int64 CloudFile::getCompBytes() -{ - return kv_store.get_comp_bytes(); -} - -void CloudFile::setDevNames(std::string bdev, std::string ldev) -{ - bdev_name = bdev; - ldev_name = ldev; -} - -void task_set_less_throttle() -{ -#ifndef _WIN32 - static bool warned = false; - if (prctl(PR_SET_IO_FLUSHER, 1, 0, 0, 0) != 0) - { - if (!warned) - { - warned = true; - Server->Log("Setting PR_SET_IO_FLUSHER failed. " + os_last_error_str(), LL_WARNING); - } - } -#endif -} - -void task_unset_less_throttle() -{ -#ifndef _WIN32 - prctl(PR_SET_IO_FLUSHER, 0, 0, 0, 0); -#endif -} - -/*_u32 CloudFile::prepare_zero_n_sqes(_u32 towrite) -{ - return towrite /zero_memfd_size + (towrite%zero_memfd_size ==0 ? 0 : 1); -} - -std::vector CloudFile::prepare_zeros(fuse_io_context& io, fuse_io_context::FuseIo& fuse_io, _u32 towrite, _u32 peek_add) -{ - _u32 nwrite = prepare_zero_n_sqes(towrite); - std::vector sqes; - sqes.reserve(nwrite); - while(nwrite>0) - { - io_uring_sqe* sqe = io.get_sqe(nwrite + peek_add); - --nwrite; - - _u32 c_to_write = (std::min)(towrite, zero_memfd_size); - io_uring_prep_splice(sqe, zero_memfd, - 0, fuse_io.pipe[1], -1, c_to_write, - SPLICE_F_MOVE| SPLICE_F_NONBLOCK); - sqe->flags |= IOSQE_FIXED_FILE | IOSQE_IO_LINK; - sqes.push_back(sqe); - towrite-=c_to_write; - } - assert(towrite==0); - return sqes; -}*/ - -#ifdef HAS_ASYNC -fuse_io_context::io_uring_task CloudFile::empty_pipe(fuse_io_context& io, fuse_io_context::FuseIo& fuse_io) -{ - int rc; - do - { - io_uring_sqe* write_sqe = io.get_sqe(); - if (write_sqe == nullptr) - co_return -1; - - io_uring_prep_splice(write_sqe, fuse_io.pipe[0], - -1, get_file_fd(null_file->getOsHandle()), 0, 512, - SPLICE_F_MOVE | SPLICE_F_FD_IN_FIXED | SPLICE_F_NONBLOCK); - - rc = co_await io.complete(write_sqe); - } while (rc > 0); - co_return 0; -} -#endif //HAS_ASYNC - -#ifdef HAS_ASYNC -fuse_io_context::io_uring_task CloudFile::verify_pipe_empty(fuse_io_context& io, - fuse_io_context::FuseIo& fuse_io) -{ - struct io_uring_sqe *sqe; - sqe = io.get_sqe(); - if(sqe==nullptr) - co_return false; - - char ch; - io_uring_prep_read(sqe, fuse_io.pipe[0], - &ch, 1, 0); - sqe->flags |= IOSQE_FIXED_FILE; - - int rc = co_await io.complete(sqe); - - co_return rc!=1; -} -#endif //HAS_ASYNC +} + + +#ifdef HAS_ASYNC +fuse_io_context::io_uring_task CloudFile::preload_async(fuse_io_context& io, int64 start, int64 stop, size_t n_threads) +{ + const _u32 rsize = 4096; + auto ios = std::make_unique< std::vector > >(); + + for (size_t i = 0; i < n_threads; ++i) + { + //needs special handling for non-fixed pipe + abort(); + std::unique_ptr fuse_io = std::make_unique(); + int rc = pipe2(fuse_io->pipe, O_CLOEXEC | O_NONBLOCK); + if (rc != 0) + { + Server->Log("Error creating pipe for preload. " + os_last_error_str(), LL_ERROR); + co_return -1; + } + rc = fcntl(fuse_io->pipe[0], F_SETPIPE_SZ, rsize); + if (rc < 0) + { + Server->Log("Error setting pipe size to " + std::to_string(rsize) + " in preload_async. " + os_last_error_str(), LL_ERROR); + co_return -1; + } + fuse_io->write_fuse = false; + ios->push_back(std::move(fuse_io)); + } + + auto ios_waiters_head = std::make_unique(nullptr); + + while (start < stop + && start < cloudfile_size) + { + size_t lock_idx = co_await lock_extent_async(0, 0, false); + + bool has_block = co_await bitmap->get_async(io, start / block_size); + + if (!has_block) + { + start += block_size; + unlock_extent_async(0, 0, false, lock_idx); + continue; + } + + bool in_big_block = co_await big_blocks_bitmap->get_async(io, start / big_block_size); + + unlock_extent_async(0, 0, false, lock_idx); + + int64 curr_block_size; + if (in_big_block) + { + curr_block_size = big_block_size; + } + else + { + curr_block_size = small_block_size; + } + + int64 toread = (std::min)((curr_block_size - start % curr_block_size), stop - start); + + preload_async_read(io, *ios, *ios_waiters_head, start, rsize); + + start += toread; + } + + while (ios->size() != n_threads) + { + co_await io.sleep_ms(100); + } + + for (auto& it : *ios) + { + io_uring_sqe* sqe = io.get_sqe(); + io_uring_prep_close(sqe, it->pipe[0]); + io.submit(sqe); + + io_uring_sqe* sqe2 = io.get_sqe(); + io_uring_prep_close(sqe2, it->pipe[1]); + io.submit(sqe2); + } +} +#endif //HAS_ASYNC + +void CloudFile::cmd(const std::string & c) +{ + if (is_async) + abort(); + + str_map params; + ParseParamStrHttp(c, ¶ms); + + std::string action = params["a"]; + if (action == "preload") + { + auto it_start = params.find("start"); + auto it_stop = params.find("stop"); + auto it_n_threads = params.find("n_threads"); + + if (it_start == params.end()) + { + Server->Log("Insufficient arguments for preload. 'start' missing.", LL_WARNING); + return; + } + + if (it_stop == params.end()) + { + Server->Log("Insufficient arguments for preload. 'stop' missing.", LL_WARNING); + return; + } + + size_t n_threads = 8; + if (it_n_threads!=params.end()) + { + n_threads = watoi(it_n_threads->second); + } + + preload(watoi64(it_start->second), watoi64(it_stop->second), n_threads); + } + else if (action == "scrub_sync_test1") + { + KvStoreFrontend::start_scrub_sync_test1(); + } + else if (action == "scrub_sync_test2") + { + KvStoreFrontend::start_scrub_sync_test2(); + } + else if (action == "reset_disk_errors") + { + auto frontend = dynamic_cast(online_kv_store.get()); + if (frontend != nullptr) + { +#ifdef HAS_LOCAL_BACKEND + auto local = dynamic_cast(frontend->getBackend()); + if (local != nullptr) + { + if (!local->reset_disk_errors()) + { + Server->Log("Resetting disk error info failed", LL_WARNING); + } + } + else +#endif //HAS_LOCAL_BACKEND + { + Server->Log("Wrong kv store backend type (not KvStoreBackendLocal)", LL_WARNING); + } + } + else + { + Server->Log("Wrong kv store type (not KvStoreFrontend)", LL_WARNING); + } + } + else if (action == "dirty_all") + { + kv_store.dirty_all(); + } + else if (action == "retry_all_deletion") + { + auto frontend = dynamic_cast(online_kv_store.get()); + if (frontend != nullptr) + { + frontend->retry_all_deletion(); + } + } + else if (action == "disable_compression") + { + kv_store.disable_compression(watoi64(params["time"])); + } + else if (action == "disable_flush") + { + IScopedLock lock(mutex.get()); + ++flush_enabled; + } + else if (action == "enable_flush") + { + IScopedLock lock(mutex.get()); + if (flush_enabled > 0) + --flush_enabled; + } + else if (action == "preload_items") + { + auto it_n_threads = params.find("n_threads"); + + size_t n_threads = os_get_num_cpus()*3; + if (it_n_threads != params.end()) + { + n_threads = watoi(it_n_threads->second); + } + + int tag = watoi(params["tag"]); + bool enable_memfiles = params["enable_memfiles"] == "1"; + bool load_only = params["load_only"] == "1"; + + preload_items(params["fn"], n_threads, tag, !enable_memfiles, load_only); + } + else if (action == "preload_remove") + { + int tag = watoi(params["tag"]); + kv_store.remove_preload_items(tag); + } + else if (action == "set_loglevel") + { + int loglevel = LL_INFO; + if (params["loglevel"] == "debug") + loglevel = LL_DEBUG; + else if (params["loglevel"] == "warn") + loglevel = LL_WARNING; + else if (params["loglevel"] == "error") + loglevel = LL_ERROR; + else if (params["loglevel"] == "info") + loglevel = LL_INFO; + else + loglevel = watoi(params["loglevel"]); + + Server->setLogLevel(loglevel); + } + else if (action == "migrate") + { + if (migration_ticket != ILLEGAL_THREADPOOL_TICKET) + { + Server->Log("Waiting for existing migration to finish", LL_WARNING); + Server->getThreadPool()->waitFor(migration_ticket); + } + + std::string conf_fn = params["conf"]; + migration_ticket = Server->getThreadPool()->execute(new MigrationCoordThread(this, conf_fn, false), "migrate coord"); + } + else if (action == "stop_defrag") + { + auto frontend = dynamic_cast(online_kv_store.get()); + if (frontend != nullptr) + { + frontend->stop_defrag(); + } + } + else if (action == "disable_read_memfiles") + { + kv_store.set_disable_read_memfiles(true); + } + else if (action == "disable_write_memfiles") + { + kv_store.set_disable_write_memfiles(true); + } + else if (action == "enable_read_memfiles") + { + kv_store.set_disable_read_memfiles(false); + } + else if (action == "enable_write_memfiles") + { + kv_store.set_disable_write_memfiles(false); + } + else if (action == "set_background_throttle_limit") + { + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != nullptr) + { +#ifdef HAS_LOCAL_BACKEND + KvStoreBackendLocal* backend = dynamic_cast(frontend->getBackend()); + if (backend != NULL) + { + backend->set_background_throttle_limit(atof(params["limit"].c_str())); + } +#endif + } + } + else if (action == "set_all_mirrored") + { + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != nullptr) + { + frontend->set_all_mirrored(true); + } + } + else if (action == "set_all_unmirrored") + { + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != nullptr) + { + frontend->set_all_mirrored(false); + } + } + else if (action == "enable_raid_freespace_stats") + { + IScopedLock lock(mutex.get()); + enable_raid_freespace_stats = true; + } + else if (action == "disable_raid_freespace_stats") + { + IScopedLock lock(mutex.get()); + enable_raid_freespace_stats = false; + } + else if (action == "purge_jemalloc") + { + purge_jemalloc(); + } + else if (action == "set_jemalloc_dirty_decay_ms") + { + set_jemalloc_dirty_decay(watoi64(params["timems"])); + } + else + { + Server->Log("Unknown cloud file action '" + action + "'", LL_WARNING); + } +} + +#ifdef HAS_ASYNC +fuse_io_context::io_uring_task CloudFile::cmd_async(fuse_io_context& io, const std::string& c) +{ + str_map params; + ParseParamStrHttp(c, ¶ms); + + std::string action = params["a"]; + if (action == "preload") + { + auto it_start = params.find("start"); + auto it_stop = params.find("stop"); + auto it_n_threads = params.find("n_threads"); + + if (it_start == params.end()) + { + Server->Log("Insufficient arguments for preload. 'start' missing.", LL_WARNING); + co_return 0; + } + + if (it_stop == params.end()) + { + Server->Log("Insufficient arguments for preload. 'stop' missing.", LL_WARNING); + co_return 0; + } + + size_t n_threads = 8; + if (it_n_threads != params.end()) + { + n_threads = watoi(it_n_threads->second); + } + + co_await preload_async(io, watoi64(it_start->second), watoi64(it_stop->second), n_threads); + + co_return 0; + } + else if (action == "scrub_sync_test1") + { + co_return co_await io.run_in_threadpool([]() { + KvStoreFrontend::start_scrub_sync_test1(); + return 0; + }, "test1"); + } + else if (action == "scrub_sync_test2") + { + co_return co_await io.run_in_threadpool([]() { + KvStoreFrontend::start_scrub_sync_test2(); + return 0; + }, "test2"); + } + else if (action == "reset_disk_errors") + { + auto frontend = dynamic_cast(online_kv_store.get()); + if (frontend != nullptr) + { + auto local = dynamic_cast(frontend->getBackend()); + if (local != nullptr) + { + co_return co_await io.run_in_threadpool([local]() { + if (!local->reset_disk_errors()) + { + Server->Log("Resetting disk error info failed", LL_WARNING); + } + return 0; + }, "rest disk errors"); + } + else + { + Server->Log("Wrong kv store backend type (not KvStoreBackendLocal)", LL_WARNING); + } + } + else + { + Server->Log("Wrong kv store type (not KvStoreFrontend)", LL_WARNING); + } + } + else if (action == "dirty_all") + { + co_return co_await io.run_in_threadpool([this]() { + this->kv_store.dirty_all(); + return 0; + }, "dirty all"); + } + else if (action == "retry_all_deletion") + { + auto frontend = dynamic_cast(online_kv_store.get()); + if (frontend != nullptr) + { + co_return co_await io.run_in_threadpool([frontend]() { + frontend->retry_all_deletion(); + return 0; + }, "del retry all"); + } + } + else if (action == "disable_compression") + { + int64 timems = watoi64(params["time"]); + co_return co_await io.run_in_threadpool([this, timems]() { + this->kv_store.disable_compression(timems); + return 0; + }, "disable compression"); + } + else if (action == "disable_flush") + { + IScopedLock lock(mutex.get()); + ++flush_enabled; + } + else if (action == "enable_flush") + { + IScopedLock lock(mutex.get()); + if (flush_enabled > 0) + --flush_enabled; + } + else if (action == "preload_items") + { + auto it_n_threads = params.find("n_threads"); + + size_t n_threads = os_get_num_cpus() * 3; + if (it_n_threads != params.end()) + { + n_threads = watoi(it_n_threads->second); + } + + int tag = watoi(params["tag"]); + bool enable_memfiles = params["enable_memfiles"] == "1"; + bool load_only = params["load_only"] == "1"; + + co_return co_await preload_items_async(io, params["fn"], n_threads, tag, !enable_memfiles, load_only); + } + else if (action == "preload_remove") + { + int tag = watoi(params["tag"]); + co_return co_await io.run_in_threadpool([this, tag]() { + this->kv_store.remove_preload_items(tag); + return 0; + }, "remove preload items"); + } + else if (action == "set_loglevel") + { + int loglevel = LL_INFO; + if (params["loglevel"] == "debug") + loglevel = LL_DEBUG; + else if (params["loglevel"] == "warn") + loglevel = LL_WARNING; + else if (params["loglevel"] == "error") + loglevel = LL_ERROR; + else if (params["loglevel"] == "info") + loglevel = LL_INFO; + else + loglevel = watoi(params["loglevel"]); + + Server->setLogLevel(loglevel); + } + else if (action == "migrate") + { + abort(); + + if (migration_ticket != ILLEGAL_THREADPOOL_TICKET) + { + Server->Log("Waiting for existing migration to finish", LL_WARNING); + Server->getThreadPool()->waitFor(migration_ticket); + } + + std::string conf_fn = params["conf"]; + migration_ticket = Server->getThreadPool()->execute(new MigrationCoordThread(this, conf_fn, false), "migrate coord"); + } + else if (action == "stop_defrag") + { + auto frontend = dynamic_cast(online_kv_store.get()); + if (frontend != nullptr) + { + co_return co_await io.run_in_threadpool([frontend]() { + frontend->stop_defrag(); + return 0; + }, "stop defrag"); + } + } + else if (action == "disable_read_memfiles") + { + co_return co_await io.run_in_threadpool([this]() { + kv_store.set_disable_read_memfiles(true); + return 0; + }, "disable read memfiles"); + } + else if (action == "disable_write_memfiles") + { + co_return co_await io.run_in_threadpool([this]() { + kv_store.set_disable_write_memfiles(true); + return 0; + }, "disable_write_memfiles"); + } + else if (action == "enable_read_memfiles") + { + co_return co_await io.run_in_threadpool([this]() { + kv_store.set_disable_read_memfiles(false); + return 0; + }, "enable_read_memfiles"); + } + else if (action == "enable_write_memfiles") + { + co_return co_await io.run_in_threadpool([this]() { + kv_store.set_disable_write_memfiles(false); + return 0; + }, "enable_write_memfiles"); + } + else if (action == "set_background_throttle_limit") + { + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != NULL) + { + KvStoreBackendLocal* backend = dynamic_cast(frontend->getBackend()); + if (backend != NULL) + { + double limit = atof(params["limit"].c_str()); + co_return co_await io.run_in_threadpool([backend, limit]() { + backend->set_background_throttle_limit(limit); + return 0; + }, "set bg throttle limit"); + } + } + } + else if (action == "set_all_mirrored") + { + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != NULL) + { + co_return co_await io.run_in_threadpool([frontend]() { + frontend->set_all_mirrored(true); + return 0; + }, "set mirrored"); + } + } + else if (action == "set_all_unmirrored") + { + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != NULL) + { + co_return co_await io.run_in_threadpool([frontend]() { + frontend->set_all_mirrored(false); + return 0; + }, "set unmirrored"); + } + } + else if (action == "enable_raid_freespace_stats") + { + enable_raid_freespace_stats = true; + } + else if (action == "disable_raid_freespace_stats") + { + enable_raid_freespace_stats = false; + } + else if (action == "purge_jemalloc") + { + io.run_in_threadpool_nowait([this]() { + this->purge_jemalloc(); + }); + } + else if (action == "set_jemalloc_dirty_decay_ms") + { + int64 timems = watoi64(params["timems"]); + io.run_in_threadpool_nowait([this, timems]() { + this->set_jemalloc_dirty_decay(timems); + }); + } + else + { + Server->Log("Unknown cloud file action '" + action + "'", LL_WARNING); + } + + co_return 0; +} +#endif //HAS_ASYNC + +int64 CloudFile::key_offset_hex(const std::string & key) +{ + std::string bkey = hexToBytes(key); + +#ifndef NDEBUG + if (strlower(bytesToHex(bkey))!=strlower(key)) + { + Server->Log("Key is not hex", LL_ERROR); + return -1; + } +#endif + + return key_offset(bkey); +} + +int64 CloudFile::key_offset(const std::string& key) +{ + bool big_block; + return key_offset(key, big_block); +} + +int64 CloudFile::key_offset(const std::string & key, bool& big_block) +{ + if (key.empty()) + return -1; + + if (key[0] == 's') + { + big_block = false; + return block_key_rev(key)*small_block_size; + } + else if (key[0] == 'b') + { + big_block = true; + return block_key_rev(key)*big_block_size; + } + else + { + return -1; + } +} + +std::string CloudFile::meminfo() +{ + std::string ret; + + if (is_async) + { +#ifdef HAS_ASYNC + CWData wdata; + wdata.addChar(queue_cmd_meminfo); + CRData rdata; + get_async_msg(wdata, rdata); + + int64 locked_extents_info; + rdata.getVarInt(&locked_extents_info); + int64 bitmap_info; + rdata.getVarInt(&bitmap_info); + int64 bitmap_max; + rdata.getVarInt(&bitmap_max); + int64 big_blocks_bitmap_info; + rdata.getVarInt(&big_blocks_bitmap_info); + int64 big_blocks_bitmap_max; + rdata.getVarInt(&big_blocks_bitmap_max); + int64 old_big_blocks_bitmap_info; + rdata.getVarInt(&old_big_blocks_bitmap_info); + int64 new_big_blocks_bitmap_info; + rdata.getVarInt(&new_big_blocks_bitmap_info); + int64 fracture_big_blogs_info; + rdata.getVarInt(&fracture_big_blogs_info); + int64 in_write_retrieval_info; + rdata.getVarInt(&in_write_retrieval_info); + + ret += "##CloudFile:\n"; + ret += " locked_extents: " + convert(locked_extents_info) + " * " + PrettyPrintBytes(sizeof(SExtent)) + "\n"; + ret += " bitmap: " + PrettyPrintBytes(bitmap_info) + "/" + PrettyPrintBytes(bitmap_max * 4096) + "\n"; + ret += " big_blocks_bitmap: " + PrettyPrintBytes(big_blocks_bitmap_info) + "/" + PrettyPrintBytes(big_blocks_bitmap_max * 4096) + "\n"; + if (old_big_blocks_bitmap.get() != nullptr) + { + ret += " old_big_blocks_bitmap: " + PrettyPrintBytes(old_big_blocks_bitmap_info) + "\n"; + } + if (new_big_blocks_bitmap.get() != nullptr) + { + ret += " new_big_blocks_bitmap: " + PrettyPrintBytes(new_big_blocks_bitmap_info) + "\n"; + } + ret += " fracture_big_blogs: " + convert(fracture_big_blogs_info) + " * " + PrettyPrintBytes(sizeof(int64) * 2) + "\n"; + ret += " in_write_retrieval: " + convert(in_write_retrieval_info) + " * " + PrettyPrintBytes(sizeof(std::string)) + "\n"; +#else + assert(false); +#endif + } + else + { + IScopedLock lock(mutex.get()); + + ret += "##CloudFile:\n"; + ret += " locked_extents: " + convert(locked_extents.capacity()) + " * " + PrettyPrintBytes(sizeof(SExtent))+"\n"; + ret += " bitmap: " + PrettyPrintBytes(bitmap->meminfo())+"/"+PrettyPrintBytes(bitmap->get_max_cache_items()*4096)+ "\n"; + ret += " big_blocks_bitmap: " + PrettyPrintBytes(big_blocks_bitmap->meminfo()) +"/"+PrettyPrintBytes(big_blocks_bitmap->get_max_cache_items()*4096)+ "\n"; + if (old_big_blocks_bitmap.get() != nullptr) + { + ret += " old_big_blocks_bitmap: " + PrettyPrintBytes(old_big_blocks_bitmap->meminfo()) + "\n"; + } + if (new_big_blocks_bitmap.get() != nullptr) + { + ret += " new_big_blocks_bitmap: " + PrettyPrintBytes(new_big_blocks_bitmap->meminfo()) + "\n"; + } + ret += " fracture_big_blogs: " + convert(fracture_big_blogs.size()) + " * " + PrettyPrintBytes(sizeof(int64) * 2) + "\n"; + ret += " in_write_retrieval: " + convert(in_write_retrieval.size()) + " * " + PrettyPrintBytes(sizeof(std::string)) + "\n"; + } + + { + IScopedReadLock lock(chunks_mutex.get()); + ret += " fs_chunks: " + convert(fs_chunks.capacity()) + " * " + PrettyPrintBytes(sizeof(IBackupFileSystem::SChunk)) + " = "+PrettyPrintBytes(sizeof(IBackupFileSystem::SChunk)*fs_chunks.capacity())+"\n"; + } + + ret += kv_store.meminfo(); + + return ret; +} + +void CloudFile::shrink_mem() +{ + if (is_async) + abort(); + + { + IScopedLock lock(mutex.get()); + if(bitmap.get()!=nullptr) + bitmap->flush(); + if (big_blocks_bitmap.get() != nullptr) + big_blocks_bitmap->flush(); + } + + kv_store.shrink_mem(); +} + +#ifdef HAS_ASYNC +fuse_io_context::io_uring_task CloudFile::shrink_mem_async(fuse_io_context& io) +{ + Server->Log("Shrinking memory (async)", LL_WARNING); + + size_t lock_idx = co_await lock_extent_async(0, 0, false); + + if (bitmap.get() != nullptr) + co_await bitmap->flush_async(io); + if (big_blocks_bitmap.get() != nullptr) + co_await big_blocks_bitmap->flush_async(io); + + unlock_extent_async(0, 0, false, lock_idx); + + co_await io.run_in_threadpool([this]() { + this->kv_store.shrink_mem(); + return 0; + }, "shrink kvstore mem"); + + co_return 0; +} +#endif + +int64 CloudFile::get_total_hits() +{ + return kv_store.get_total_hits(); +} + +int64 CloudFile::get_total_hits_async() +{ + return kv_store.get_total_hits_async(); +} + +int64 CloudFile::get_total_memory_hits() +{ + return kv_store.get_total_memory_hits(); +} + +int64 CloudFile::get_total_memory_hits_async() +{ + return kv_store.get_total_memory_hits_async(); +} + +int64 CloudFile::get_total_cache_miss_backend() +{ + return kv_store.get_total_cache_miss_backend(); +} + +int64 CloudFile::get_total_cache_miss_decompress() +{ + return kv_store.get_total_cache_miss_decompress(); +} + +int64 CloudFile::get_total_dirty_ops() +{ + return kv_store.get_total_dirty_ops(); +} + +int64 CloudFile::get_total_balance_ops() +{ + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != nullptr) + { + return frontend->get_total_balance_ops(); + } + return -1; +} + +int64 CloudFile::get_total_del_ops() +{ + KvStoreFrontend* frontend = dynamic_cast(online_kv_store.get()); + if (frontend != nullptr) + { + return frontend->get_total_del_ops(); + } + return -1; +} + +int64 CloudFile::get_total_put_ops() +{ + return kv_store.get_total_put_ops(); +} + +int64 CloudFile::get_total_compress_ops() +{ + return kv_store.get_total_compress_ops(); +} + +bool CloudFile::Flush(bool do_submit, bool for_slog) +{ + if (is_async) + abort(); + + if (!bdev_name.empty()) + { + Server->Log("Switching bcache device " + bdev_name + " to writethrough mode...", LL_INFO); + + { + IScopedLock lock(mutex.get()); + + if (writeback_count == 0) + { + std::unique_ptr writeback_percent_f(Server->openFile("/sys/block/" + bdev_name + "/bcache/writeback_percent", MODE_READ)); + bcache_writeback_percent = trim(writeback_percent_f->Read(512)); + } + + ++writeback_count; + } + + doublefork_writestring("writethrough", "/sys/block/" + bdev_name + "/bcache/cache_mode"); + + //disables writeback throttling + writestring("0", "/sys/block/" + bdev_name + "/bcache/writeback_percent"); + + while (true) + { + Server->wait(100); + std::unique_ptr state_f(Server->openFile("/sys/block/" + bdev_name + "/bcache/state", MODE_READ)); + std::string state = trim(state_f->Read(512)); + + if (!state.empty() && state != "dirty") + { + Server->Log("Bcache device is in state '"+state+"'", LL_INFO); + break; + } + } + +#ifndef _WIN32 + /* + This makes Linux deadlock in some cases. Not sure if even necessary... + if (!bdev_name.empty()) + { + Server->Log("Flushing /dev/" + bdev_name + "..."); + flush_dev("/dev/" + bdev_name); + } + + + if (!ldev_name.empty()) + { + Server->Log("Flushing /dev/" + ldev_name + "..."); + flush_dev("/dev/" + ldev_name); + }*/ +#endif + } + + IScopedLock lock(mutex.get()); + + is_flushing = true; + + Auto(is_flushing = false); + + while (flush_enabled>0) + { + lock.relock(nullptr); + Server->wait(1000); + lock.relock(mutex.get()); + } + + int64 orig_cloudfile_size = cloudfile_size; + ++wait_for_exclusive; + lock_extent(lock, 0, orig_cloudfile_size, true); + --wait_for_exclusive; + + if (for_slog + && slog.get() != nullptr + && slog->Size() < slog_max_size) + { + return true; + } + + bool ret = true; + + IScopedLock migrate_lock(nullptr); + if (migrate_to_cf != nullptr) + { + migrate_lock.relock(migrate_to_cf->mutex.get()); + ++migrate_to_cf->wait_for_exclusive; + migrate_to_cf->lock_extent(migrate_lock, 0, orig_cloudfile_size, true); + --migrate_to_cf->wait_for_exclusive; + + if (!update_migration_settings()) + { + Server->Log("Error updating migration settings", LL_ERROR); + migrate_to_cf->unlock_extent(migrate_lock, 0, orig_cloudfile_size, true); + unlock_extent(lock, 0, orig_cloudfile_size, true); + return false; + } + + if (!migrate_to_cf->close_bitmaps()) + { + migrate_to_cf->unlock_extent(migrate_lock, 0, orig_cloudfile_size, true); + unlock_extent(lock, 0, orig_cloudfile_size, true); + return false; + } + + size_t retry_n = 0; + while (migrate_to_cf->kv_store.checkpoint(do_submit, retry_n)) + { + Server->Log("Error checkpointing migrate_to KVStore. Retrying...", LL_WARNING); + retryWait(retry_n++); + } + + try + { + migrate_to_cf->open_bitmaps(); + } + catch (const std::runtime_error& e) + { + Server->Log(std::string("Error while opening migrate_to bitmaps: ") + e.what(), LL_ERROR); + ret = false; + } + } + + if(!close_bitmaps()) + { + if (migrate_to_cf != nullptr) + { + migrate_to_cf->unlock_extent(migrate_lock, 0, orig_cloudfile_size, true); + } + unlock_extent(lock, 0, orig_cloudfile_size, true); + if (!bdev_name.empty()) + { + Server->Log("Switching bcache device " + bdev_name + " to writeback mode...", LL_INFO); + + --writeback_count; + + if (writeback_count == 0) + { + doublefork_writestring("writeback", "/sys/block/" + bdev_name + "/bcache/cache_mode"); + } + } + return false; + } + + size_t retry_n=0; + while(!kv_store.checkpoint(do_submit, retry_n)) + { + Server->Log("Error checkpointing KVStore. Retrying...", LL_WARNING); + retryWait(retry_n++); + } + + try + { + open_bitmaps(); + } + catch(const std::runtime_error& e) + { + Server->Log(std::string("Error while opening bitmaps: ") + e.what(), LL_ERROR); + ret = false; + } + + if (migrate_to_cf != nullptr) + { + migrate_to_cf->unlock_extent(migrate_lock, 0, orig_cloudfile_size, true); + } + + unlock_extent(lock, 0, orig_cloudfile_size, true); + + if (!bdev_name.empty()) + { + Server->Log("Switching bcache device " + bdev_name + " to writeback mode...", LL_INFO); + + --writeback_count; + + if (writeback_count == 0) + { + lock.relock(nullptr); + + doublefork_writestring("writeback", "/sys/block/" + bdev_name + "/bcache/cache_mode"); + + writestring(bcache_writeback_percent, "/sys/block/" + bdev_name + "/bcache/writeback_percent"); + } + } + + if (!slog_open()) + { + ret = false; + } + + return ret; +} + +#ifdef HAS_ASYNC +fuse_io_context::io_uring_task CloudFile::FlushAsync(fuse_io_context& io, bool do_submit) +{ + if (!bdev_name.empty()) + { + Server->Log("Switching bcache device " + bdev_name + " to writethrough mode...", LL_INFO); + + { + IScopedLock lock(mutex.get()); + + if (writeback_count == 0) + { + std::unique_ptr writeback_percent_f(Server->openFile("/sys/block/" + bdev_name + "/bcache/writeback_percent", MODE_READ)); + bcache_writeback_percent = trim(writeback_percent_f->Read(512)); + } + + ++writeback_count; + } + + doublefork_writestring("writethrough", "/sys/block/" + bdev_name + "/bcache/cache_mode"); + + //disables writeback throttling + writestring("0", "/sys/block/" + bdev_name + "/bcache/writeback_percent"); + + while (true) + { + Server->wait(100); + std::unique_ptr state_f(Server->openFile("/sys/block/" + bdev_name + "/bcache/state", MODE_READ)); + std::string state = trim(state_f->Read(512)); + + if (!state.empty() && state != "dirty") + { + Server->Log("Bcache device is in state '" + state + "'", LL_INFO); + break; + } + } + +#ifndef _WIN32 + /* + This makes Linux deadlock in some cases. Not sure if even necessary... + if (!bdev_name.empty()) + { + Server->Log("Flushing /dev/" + bdev_name + "..."); + flush_dev("/dev/" + bdev_name); + } + + + if (!ldev_name.empty()) + { + Server->Log("Flushing /dev/" + ldev_name + "..."); + flush_dev("/dev/" + ldev_name); + }*/ +#endif + } + + is_flushing = true; + Auto(is_flushing = false); + + while (flush_enabled > 0) + { + co_await io.sleep_ms(1000); + } + + int64 orig_cloudfile_size = cloudfile_size; + ++wait_for_exclusive; + size_t lock_idx = co_await lock_extent_async(0, orig_cloudfile_size, true); + --wait_for_exclusive; + resume_exclusive_awaiters(); + + bool ret = true; + + size_t migrate_lock_idx; + if (migrate_to_cf != nullptr) + { + ++migrate_to_cf->wait_for_exclusive; + migrate_lock_idx = co_await migrate_to_cf->lock_extent_async(0, orig_cloudfile_size, true); + --migrate_to_cf->wait_for_exclusive; + migrate_to_cf->resume_exclusive_awaiters(); + + if (!update_migration_settings()) + { + Server->Log("Error updating migration settings", LL_ERROR); + migrate_to_cf->unlock_extent_async(0, orig_cloudfile_size, true, migrate_lock_idx); + unlock_extent_async(0, orig_cloudfile_size, true, lock_idx); + co_return false; + } + + if (!migrate_to_cf->close_bitmaps()) + { + migrate_to_cf->unlock_extent_async(0, orig_cloudfile_size, true, migrate_lock_idx); + unlock_extent_async(0, orig_cloudfile_size, true, lock_idx); + co_return false; + } + + size_t retry_n = 0; + while (migrate_to_cf->kv_store.checkpoint(do_submit, retry_n)) + { + Server->Log("Error checkpointing migrate_to KVStore. Retrying...", LL_WARNING); + retryWait(retry_n++); + } + + try + { + migrate_to_cf->open_bitmaps(); + } + catch (const std::runtime_error& e) + { + Server->Log(std::string("Error while opening migrate_to bitmaps: ") + e.what(), LL_ERROR); + ret = false; + } + } + + if (!close_bitmaps()) + { + unlock_extent_async(0, orig_cloudfile_size, true, lock_idx); + if (!bdev_name.empty()) + { + Server->Log("Switching bcache device " + bdev_name + " to writeback mode...", LL_INFO); + + --writeback_count; + + if (writeback_count == 0) + { + doublefork_writestring("writeback", "/sys/block/" + bdev_name + "/bcache/cache_mode"); + } + } + co_return false; + } + + bool b = co_await io.run_in_threadpool([this, do_submit]() { + size_t retry_n = 0; + while (!this->kv_store.checkpoint(do_submit, retry_n)) + { + Server->Log("Error checkpointing KVStore. Retrying...", LL_WARNING); + retryWait(retry_n++); + } + + try + { + this->open_bitmaps(); + } + catch (const std::runtime_error& e) + { + Server->Log(std::string("Error while opening bitmaps: ") + e.what(), LL_ERROR); + return false; + } + return true; + + }, "kv checkpoint"); + + if (!b) + ret = false; + + + if (migrate_to_cf != nullptr) + { + migrate_to_cf->unlock_extent_async(0, orig_cloudfile_size, true, migrate_lock_idx); + } + + unlock_extent_async(0, orig_cloudfile_size, true, lock_idx); + + if (!bdev_name.empty()) + { + Server->Log("Switching bcache device " + bdev_name + " to writeback mode...", LL_INFO); + + --writeback_count; + + if (writeback_count == 0) + { + doublefork_writestring("writeback", "/sys/block/" + bdev_name + "/bcache/cache_mode"); + + writestring(bcache_writeback_percent, "/sys/block/" + bdev_name + "/bcache/writeback_percent"); + } + } + + if (!slog_open()) + { + ret = false; + } + + co_return ret; +} +#endif //HAS_ASYNC + +bool CloudFile::slog_open() +{ + if (slog.get() == nullptr) + return true; + + slog.reset(); + Server->deleteFile(slog_path); + + syncDir(ExtractFilePath(slog_path, os_file_sep())); + + slog.reset(Server->openFile(slog_path, MODE_WRITE)); + + if (slog.get() == nullptr) + { + Server->Log("Error opening slog file at " + slog_path + ". " + os_last_error_str(), LL_ERROR); + return false; + } + + if (slog->Size() != 0) + return false; + + if (slog->Write(slog_magic) != slog_magic.size()) + { + Server->Log("Error writing slog magic. " + os_last_error_str(), LL_ERROR); + return false; + } + + int64 transid = kv_store.get_basetransid(); + + if (slog->Write(reinterpret_cast(&transid), sizeof(transid)) != sizeof(transid)) + { + Server->Log("Error writing transid to slog. " + os_last_error_str(), LL_ERROR); + return false; + } + + slog->Sync(); + + slog_size = slog_magic.size() + sizeof(transid); + slog_last_sync = slog_magic.size() + sizeof(transid);; + + return true; +} + +void CloudFile::purge_jemalloc() +{ +#if !defined(NO_JEMALLOC) && !defined(_WIN32) + unsigned narenas = 0; + size_t sz = sizeof(unsigned); + if (!mallctl("arenas.narenas", &narenas, &sz, NULL, 0)) + { + std::string purge_cmd = "arena."+convert(narenas)+".purge"; + if(mallctl(purge_cmd.c_str(), NULL, 0, NULL, 0)) + { + Server->Log("Purging jemalloc arenas failed", LL_WARNING); + } + } +#endif +} + +void CloudFile::set_jemalloc_dirty_decay(int64 timems) +{ +#if !defined(NO_JEMALLOC) && !defined(_WIN32) + ssize_t dirty_decay_old; + ssize_t dirty_decay_new = timems; + size_t sz = sizeof(dirty_decay_old); + if (!mallctl("arenas.dirty_decay_ms", &dirty_decay_old, &sz, &dirty_decay_new, sizeof(dirty_decay_new))) + { + Server->Log("Setting dirty decay ms failed", LL_WARNING); + } +#endif +} + +bool CloudFile::Flush() +{ + return Flush(true); +} + + +void CloudFile::open_bitmaps() +{ + bitmaps_file_size = 0; + + big_blocks_bitmap_file = kv_store.get("big_blocks_bitmap", TransactionalKvStore::BitmapInfo::Unknown, + TransactionalKvStore::Flag::disable_fd_cache|TransactionalKvStore::Flag::disable_throttling| + TransactionalKvStore::Flag::disable_memfiles, -1); + + if(big_blocks_bitmap_file==nullptr) + { + throw std::runtime_error("Cannot open big_blocks_bitmap"); + } + + bitmaps_file_size += big_blocks_bitmap_file->Size(); + + if(new_big_blocks_bitmap_file!=nullptr) + { + std::string fn = new_big_blocks_bitmap_file->getFilename(); + Server->destroy(new_big_blocks_bitmap_file); + Server->deleteFile(fn); + } + + new_big_blocks_bitmap_file = Server->openTemporaryFile(); + + if(new_big_blocks_bitmap_file==nullptr) + { + throw std::runtime_error("Cannot open new_big_blocks_bitmap"); + } + + old_big_blocks_bitmap_file = kv_store.get("old_big_blocks_bitmap", TransactionalKvStore::BitmapInfo::Unknown, + TransactionalKvStore::Flag::disable_fd_cache | TransactionalKvStore::Flag::disable_throttling | + TransactionalKvStore::Flag::disable_memfiles, -1); + + if(old_big_blocks_bitmap_file==nullptr) + { + throw std::runtime_error("Cannot open old_big_blocks_bitmap"); + } + + bitmaps_file_size += old_big_blocks_bitmap_file->Size(); + + int64 big_blocks = div_up(cloudfile_size, big_block_size); + + size_t bitmap_cache_items = static_cast((70 * 1024 * 1024*memory_usage_factor) / bitmap_block_size); + + size_t big_blocks_bitmap_cache_items = (std::max)(static_cast(bitmap_cache_items / (big_block_size / block_size)), + static_cast(100)); + + big_blocks_bitmap.reset(new SparseFileBitmap(big_blocks_bitmap_file, big_blocks, true, big_blocks_bitmap_cache_items)); + + new_big_blocks_bitmap.reset(new FileBitmap(new_big_blocks_bitmap_file, big_blocks, false)); + old_big_blocks_bitmap.reset(new FileBitmap(old_big_blocks_bitmap_file, big_blocks, false)); + + bitmap_file = kv_store.get("bitmap", TransactionalKvStore::BitmapInfo::Unknown, + TransactionalKvStore::Flag::disable_fd_cache | TransactionalKvStore::Flag::disable_throttling| + TransactionalKvStore::Flag::disable_memfiles, -1); + + if(bitmap_file==nullptr) + { + throw std::runtime_error("Cannot open bitmap"); + } + + bitmaps_file_size += bitmap_file->Size(); + + int64 bitmap_blocks = div_up(div_up(cloudfile_size, big_block_size)*big_block_size, block_size); + + bitmap_cache_items = (std::max)(bitmap_cache_items, static_cast(100)); + + bitmap.reset(new SparseFileBitmap(bitmap_file, bitmap_blocks, false, bitmap_cache_items)); + + if (used_bytes == 0) + { + used_bytes = bitmap->count_bits()*block_size; + bitmap->clear_cache_no_flush(); + } + + Server->Log("Cloud drive size: " + PrettyPrintBytes(used_bytes)); +} + +bool CloudFile::bitmap_has_big_block( int64 blocknum ) +{ + return bitmap->get_range((blocknum*big_block_size)/block_size, ((blocknum+1)*big_block_size)/block_size); +} + +bool CloudFile::bitmap_has_small_block( int64 blocknum ) +{ + return bitmap->get_range((blocknum*small_block_size)/block_size, ((blocknum+1)*small_block_size)/block_size); +} + +#ifdef HAS_ASYNC +fuse_io_context::io_uring_task CloudFile::bitmap_has_big_block_async(fuse_io_context& io, int64 blocknum) +{ + co_return co_await bitmap->get_range_async(io, (blocknum * big_block_size) / block_size, ((blocknum + 1) * big_block_size) / block_size); +} + +fuse_io_context::io_uring_task CloudFile::bitmap_has_small_block_async(fuse_io_context& io, int64 blocknum) +{ + co_return co_await bitmap->get_range_async(io, (blocknum * small_block_size) / block_size, ((blocknum + 1) * small_block_size) / block_size); +} +#endif + +bool CloudFile::Sync() +{ + if (!slog_path.empty()) + { + IScopedLock lock(mutex.get()); + lock_extent(lock, 1, 0, false); + bool ret; + if (slog.get() == nullptr) + { + ret = false; + } + else + { + ret = slog->Sync(); + } + unlock_extent(lock, 1, 0, false); + return ret; + } + return true; +} + +int64 CloudFile::getDirtyBytes() +{ + return kv_store.get_dirty_bytes() - bitmaps_file_size; +} + +int64 CloudFile::getSubmittedBytes() +{ + return kv_store.get_submitted_bytes(); +} + +int64 CloudFile::getTotalSubmittedBytes() +{ + return kv_store.get_total_submitted_bytes(); +} + +int64 CloudFile::getUsedBytes() +{ + IScopedLock lock(mutex.get()); + + return used_bytes; +} + +std::string CloudFile::getNumDirtyItems() +{ + std::string dirty_items; + std::map di = kv_store.get_num_dirty_items(); + for(std::map::iterator it=di.begin(); + it!=di.end();++it) + { + dirty_items+=convert(it->first)+": "+convert(it->second)+"\n"; + } + + return dirty_items; +} + +std::string CloudFile::getNumMemfileItems() +{ + std::string memfile_items; + std::map di = kv_store.get_num_memfile_items(); + for (std::map::iterator it = di.begin(); + it != di.end(); ++it) + { + memfile_items += convert(it->first) + ": " + convert(it->second) + "\n"; + } + + return memfile_items; +} + +int64 CloudFile::getCacheSize() +{ + return kv_store.get_cache_size(); +} + +void CloudFile::Reset() +{ + if (is_async) + abort(); + + IScopedLock lock(mutex.get()); + + close_bitmaps(); + + kv_store.reset(); + + open_bitmaps(); +} + +bool CloudFile::close_bitmaps() +{ + IScopedLock lock(mutex.get()); + + if(!big_blocks_bitmap->flush()) + { + return false; + } + + if(!bitmap->flush()) + { + return false; + } + + for(int64 i=0;isize();++i) + { + if(new_big_blocks_bitmap->get(i)) + { + old_big_blocks_bitmap->set(i, true); + } + } + + if(!old_big_blocks_bitmap->flush()) + { + return false; + } + + big_blocks_bitmap.reset(); + bitmap.reset(); + old_big_blocks_bitmap.reset(); + + kv_store.release("big_blocks_bitmap"); + kv_store.release("bitmap"); + kv_store.release("old_big_blocks_bitmap"); + + return true; +} + +void CloudFile::lock_extent(IScopedLock& lock, int64 start, int64 length, bool exclusive) +{ + if (is_async) + abort(); + + while (!exclusive + && wait_for_exclusive>0) + { + lock.relock(nullptr); + Server->wait(1); + lock.relock(mutex.get()); + } + + size_t first_dead; + bool retry=true; + while(retry) + { + retry=false; + first_dead = std::string::npos; + for(size_t i=0;i=extent.start+extent.length) + || (start>=extent.start && startextent.start && start+length<=extent.start+extent.length) ) + { + if(extent.cond!=nullptr) + { + retry=true; + ++extent.refcount; + ICondition* cond = extent.cond; + cond->wait(&lock); + + for (size_t j = 0; j < locked_extents.size(); ++j) + { + if (locked_extents[j].cond == cond) + { + --locked_extents[j].refcount; + if (locked_extents[j].refcount <= 0) + { + assert(!locked_extents[j].alive); + Server->destroy(locked_extents[j].cond); + locked_extents[j].cond = nullptr; + if (j == locked_extents_max_alive + && locked_extents_max_alive>0) + { + --locked_extents_max_alive; + while (locked_extents_max_alive > 0 + && !locked_extents[locked_extents_max_alive].alive + && locked_extents[locked_extents_max_alive].refcount <= 0) + { + --locked_extents_max_alive; + } + } + } + break; + } + } + break; + } + else if(exclusive) + { + retry=true; + lock.relock(nullptr); + Server->wait(1); + lock.relock(mutex.get()); + break; + } + } + } + } + + if (first_dead == std::string::npos) + { + for (size_t i = locked_extents_max_alive+1; + i < locked_extents.size(); ++i) + { + if (!locked_extents[i].alive + && locked_extents[i].refcount <= 0) + { + first_dead = i; + break; + } + } + } + + if (first_dead != std::string::npos) + { + SExtent& extent = locked_extents[first_dead]; + extent.start = start; + extent.length = length; + if (exclusive) + { + extent.cond = Server->createCondition(); + } + else + { + extent.cond = nullptr; + } + extent.refcount = 1; + extent.alive = true; + + if (first_dead > locked_extents_max_alive) + { + locked_extents_max_alive = first_dead; + } + return; + } + + SExtent new_extent; + new_extent.start = start; + new_extent.length = length; + if(exclusive) + { + new_extent.cond = Server->createCondition(); + } + else + { + new_extent.cond = nullptr; + } + new_extent.refcount = 1; + new_extent.alive = true; + + locked_extents.push_back(new_extent); + locked_extents_max_alive = locked_extents.size() - 1; +} + +void CloudFile::unlock_extent(IScopedLock& lock, int64 start, int64 length, bool exclusive) +{ + if (is_async) + abort(); + + for(size_t i=0;idestroy(extent.cond); + extent.cond = nullptr; + extent.alive = false; + if (i == locked_extents_max_alive + && locked_extents_max_alive>0) + { + --locked_extents_max_alive; + while (locked_extents_max_alive > 0 + && !locked_extents[locked_extents_max_alive].alive + && locked_extents[locked_extents_max_alive].refcount <= 0) + { + --locked_extents_max_alive; + } + } + } + else if (extent.cond != nullptr) + { + extent.cond->notify_all(); + } + + break; + } + } +} + +#ifdef HAS_ASYNC +fuse_io_context::io_uring_task CloudFile::lock_extent_async(int64 start, int64 length, bool exclusive) +{ + while (!exclusive + && wait_for_exclusive>0) + { + co_await ExclusiveAwaiter(*this); + } + + size_t first_dead; + bool retry=true; + while(retry) + { + retry=false; + first_dead = std::string::npos; + for(size_t i=0;i=extent.start+extent.length) + || (start>=extent.start && startextent.start && start+length<=extent.start+extent.length) ) + { + if(extent.exclusive || exclusive) + { + retry=true; + co_await ExtentAwaiter(extent); + break; + } + } + } + } + + if (first_dead == std::string::npos) + { + for (size_t i = locked_extents_max_alive+1; + i < async_locked_extents.size(); ++i) + { + if (async_locked_extents[i].refcount <= 0) + { + first_dead = i; + break; + } + } + } + + if (first_dead != std::string::npos) + { + SExtentAsync& extent = async_locked_extents[first_dead]; + assert(extent.awaiters == nullptr + && extent.refcount<=0); + extent.start = start; + extent.length = length; + extent.exclusive = exclusive; + extent.refcount = 1; + + if (first_dead > locked_extents_max_alive) + { + locked_extents_max_alive = first_dead; + } + co_return first_dead; + } + + SExtentAsync new_extent; + new_extent.start = start; + new_extent.length = length; + new_extent.exclusive = exclusive; + new_extent.refcount = 1; + new_extent.awaiters = nullptr; + + async_locked_extents.push_back(new_extent); + locked_extents_max_alive = async_locked_extents.size() - 1; + co_return async_locked_extents.size() - 1; +} +#endif //HAS_ASYNC + +#ifdef HAS_ASYNC +void CloudFile::unlock_extent_async(int64 start, int64 length, bool exclusive, size_t lock_idx) +{ + SExtentAsync& extent = async_locked_extents[lock_idx]; + assert(extent.start == start); + assert(extent.length == length); + assert(!exclusive || extent.exclusive); + + --extent.refcount; + assert(extent.refcount >= 0); + if(extent.refcount<=0) + { + extent.exclusive = false; + if (lock_idx == locked_extents_max_alive + && locked_extents_max_alive>0) + { + --locked_extents_max_alive; + while (locked_extents_max_alive > 0 + && async_locked_extents[locked_extents_max_alive].refcount <= 0) + { + --locked_extents_max_alive; + } + } + + AwaiterCoList* curr_awaiters = extent.awaiters; + extent.awaiters = nullptr; + while (curr_awaiters != nullptr) + { + AwaiterCoList* next = curr_awaiters->next; + curr_awaiters->awaiter.resume(); + curr_awaiters = next; + } + } +} +#endif //HAS_ASYNC + +void CloudFile::add_fracture_big_block(int64 b) +{ + if (fracture_big_blogs.size() > 100) + { + return; + } + + auto it = fracture_big_blogs.find(b); + if (it != fracture_big_blogs.end()) + { + return; + } + + fracture_big_blogs[b] = Server->getTimeMS()+60*1000; +} + +bool CloudFile::Resize(int64 nsize) +{ + if (is_async) + { +#ifdef HAS_ASYNC + CWData wdata; + wdata.addChar(queue_cmd_resize); + wdata.addVarInt(nsize); + + CRData rdata; + get_async_msg(wdata, rdata); + + char ch; + bool b = rdata.getChar(&ch); + assert(b); + return ch==1; +#else + assert(false); +#endif + } + + IScopedLock lock(mutex.get()); + + int64 orig_cloudfile_size = cloudfile_size; + ++wait_for_exclusive; + lock_extent(lock, 0, cloudfile_size, true); + --wait_for_exclusive; + + if (!close_bitmaps()) + { + unlock_extent(lock, 0, cloudfile_size, true); + return false; + } + bool ret = true; + + IFile* f_cloudfile_size = kv_store.get("cloudfile_size", TransactionalKvStore::BitmapInfo::Unknown, + TransactionalKvStore::Flag::disable_fd_cache | TransactionalKvStore::Flag::disable_throttling, -1); + + if (f_cloudfile_size == nullptr) + { + Server->Log("Cannot open cloudfile_size", LL_ERROR); + unlock_extent(lock, 0, orig_cloudfile_size, true); + return false; + } + + cloudfile_size = nsize; + + if (f_cloudfile_size->Write(0, reinterpret_cast(&cloudfile_size), sizeof(cloudfile_size)) != sizeof(cloudfile_size)) + { + Server->Log("Error writing new cloudfile size. " + os_last_error_str(), LL_ERROR); + ret = false; + } + + kv_store.release("cloudfile_size"); + + try + { + open_bitmaps(); + } + catch (const std::runtime_error&) + { + ret = false; + } + + unlock_extent(lock, 0, orig_cloudfile_size, true); + + return ret; +} + +#ifdef HAS_ASYNC +fuse_io_context::io_uring_task CloudFile::ResizeAsync(fuse_io_context& io, int64 nsize) +{ + int64 orig_cloudfile_size = cloudfile_size; + ++wait_for_exclusive; + size_t lock_idx = co_await lock_extent_async(0, cloudfile_size, true); + --wait_for_exclusive; + resume_exclusive_awaiters(); + + if (!close_bitmaps()) + { + unlock_extent_async(0, cloudfile_size, true, lock_idx); + co_return false; + } + bool ret = true; + + std::string cloudfile_size_str("cloudfile_size"); + + IFile* f_cloudfile_size = co_await kv_store.get_async(io, cloudfile_size_str, TransactionalKvStore::BitmapInfo::Unknown, + TransactionalKvStore::Flag::disable_fd_cache | TransactionalKvStore::Flag::disable_throttling, -1); + + if (f_cloudfile_size == NULL) + { + Server->Log("Cannot open cloudfile_size", LL_ERROR); + unlock_extent_async(0, orig_cloudfile_size, true, lock_idx); + co_return false; + } + + cloudfile_size = nsize; + + if (f_cloudfile_size->Write(0, reinterpret_cast(&cloudfile_size), sizeof(cloudfile_size)) != sizeof(cloudfile_size)) + { + Server->Log("Error writing new cloudfile size. " + os_last_error_str(), LL_ERROR); + ret = false; + } + + co_await kv_store.release_async(io, cloudfile_size_str); + + bool b = co_await io.run_in_threadpool([this]() { + try + { + this->open_bitmaps(); + } + catch (const std::runtime_error&) + { + return false; + } + return true; + }, "resize open bm"); + + if (!b) + ret = false; + + unlock_extent_async(0, orig_cloudfile_size, true, lock_idx); + + co_return ret; +} +#endif //HAS_ASYNC + +std::string CloudFile::getStats() +{ + return online_kv_store->get_stats(); +} + +int64 CloudFile::getCompBytes() +{ + return kv_store.get_comp_bytes(); +} + +void CloudFile::setDevNames(std::string bdev, std::string ldev) +{ + bdev_name = bdev; + ldev_name = ldev; +} + +void task_set_less_throttle() +{ +#ifndef _WIN32 + static bool warned = false; + if (prctl(PR_SET_IO_FLUSHER, 1, 0, 0, 0) != 0) + { + if (!warned) + { + warned = true; + Server->Log("Setting PR_SET_IO_FLUSHER failed. " + os_last_error_str(), LL_WARNING); + } + } +#endif +} + +void task_unset_less_throttle() +{ +#ifndef _WIN32 + prctl(PR_SET_IO_FLUSHER, 0, 0, 0, 0); +#endif +} + +/*_u32 CloudFile::prepare_zero_n_sqes(_u32 towrite) +{ + return towrite /zero_memfd_size + (towrite%zero_memfd_size ==0 ? 0 : 1); +} + +std::vector CloudFile::prepare_zeros(fuse_io_context& io, fuse_io_context::FuseIo& fuse_io, _u32 towrite, _u32 peek_add) +{ + _u32 nwrite = prepare_zero_n_sqes(towrite); + std::vector sqes; + sqes.reserve(nwrite); + while(nwrite>0) + { + io_uring_sqe* sqe = io.get_sqe(nwrite + peek_add); + --nwrite; + + _u32 c_to_write = (std::min)(towrite, zero_memfd_size); + io_uring_prep_splice(sqe, zero_memfd, + 0, fuse_io.pipe[1], -1, c_to_write, + SPLICE_F_MOVE| SPLICE_F_NONBLOCK); + sqe->flags |= IOSQE_FIXED_FILE | IOSQE_IO_LINK; + sqes.push_back(sqe); + towrite-=c_to_write; + } + assert(towrite==0); + return sqes; +}*/ + +#ifdef HAS_ASYNC +fuse_io_context::io_uring_task CloudFile::empty_pipe(fuse_io_context& io, fuse_io_context::FuseIo& fuse_io) +{ + int rc; + do + { + io_uring_sqe* write_sqe = io.get_sqe(); + if (write_sqe == nullptr) + co_return -1; + + io_uring_prep_splice(write_sqe, fuse_io.pipe[0], + -1, get_file_fd(null_file->getOsHandle()), 0, 512, + SPLICE_F_MOVE | SPLICE_F_FD_IN_FIXED | SPLICE_F_NONBLOCK); + + rc = co_await io.complete(write_sqe); + } while (rc > 0); + co_return 0; +} +#endif //HAS_ASYNC + +#ifdef HAS_ASYNC +fuse_io_context::io_uring_task CloudFile::verify_pipe_empty(fuse_io_context& io, + fuse_io_context::FuseIo& fuse_io) +{ + struct io_uring_sqe *sqe; + sqe = io.get_sqe(); + if(sqe==nullptr) + co_return false; + + char ch; + io_uring_prep_read(sqe, fuse_io.pipe[0], + &ch, 1, 0); + sqe->flags |= IOSQE_FIXED_FILE; + + int rc = co_await io.complete(sqe); + + co_return rc!=1; +} +#endif //HAS_ASYNC diff --git a/clouddrive/CloudFile.h b/clouddrive/CloudFile.h index fd704372..c5f38849 100644 --- a/clouddrive/CloudFile.h +++ b/clouddrive/CloudFile.h @@ -62,7 +62,8 @@ public: unsigned int background_comp_method, const std::string& slog_path, int64 slog_max_size, - bool is_async); + bool is_async, + unsigned int cache_comp, unsigned int meta_cache_comp); ~CloudFile(); diff --git a/clouddrive/ClouddriveFactory.cpp b/clouddrive/ClouddriveFactory.cpp index 5d25ba3a..5fc9b5fc 100644 --- a/clouddrive/ClouddriveFactory.cpp +++ b/clouddrive/ClouddriveFactory.cpp @@ -88,7 +88,9 @@ IFile* ClouddriveFactory::createCloudFile(IBackupFileSystem* cachefs, CloudSetti settings.resubmit_compressed_ratio, settings.memcache_size, std::string(), settings.memory_usage_factor, settings.only_memfiles, std::string(), std::string(), static_cast(settings.background_compression), - std::string(), -1, false); + std::string(), -1, false, + static_cast(settings.cache_object_compression), + static_cast(settings.metadata_cache_object_compression)); } IKvStoreBackend* ClouddriveFactory::createBackend(IBackupFileSystem* cachefs, diff --git a/clouddrive/CompressEncrypt.cpp b/clouddrive/CompressEncrypt.cpp index 86edbf11..7cbde481 100644 --- a/clouddrive/CompressEncrypt.cpp +++ b/clouddrive/CompressEncrypt.cpp @@ -52,10 +52,35 @@ unsigned int CompressionMethodFromString(const std::string & str) return CompressionNone; else if (str == "zstd_7") return CompressionZstd7; + else if (str == "lzo" ) + return CompressionLzo; else return CompressionZstd3; } +std::string CompressionMethodToBtrfsString(unsigned int cmeth) +{ + switch (cmeth) + { + case CompressionZlib5: + return "zlib:5"; + case CompressionZstd3: + return "zstd:3"; + case CompressionZstd19: + return "zstd:15"; + case CompressionZstd9: + return "zstd:9"; + case CompressionZstd7: + return "zstd:7"; + case CompressionNone: + return "none"; + case CompressionLzo: + return "lzo"; + default: + return "zstd:3"; + } +} + ICompressEncryptFactory* get_compress_encrypt_factory() { return compress_encrypt_factory; diff --git a/clouddrive/IClouddriveFactory.h b/clouddrive/IClouddriveFactory.h index 95915d21..1745f9c4 100644 --- a/clouddrive/IClouddriveFactory.h +++ b/clouddrive/IClouddriveFactory.h @@ -21,7 +21,9 @@ public: zstd_3 = 2, zstd_19 = 3, zstd_9 = 4, - none = 5 + none = 5, + zstd7 = 6, + lzo = 7 }; struct CloudSettingsS3 @@ -48,6 +50,8 @@ public: CompressionMethod submit_compression = CompressionMethod::none; CompressionMethod metadata_submit_compression = CompressionMethod::zstd_3; CompressionMethod background_compression = CompressionMethod::lzma_5; + CompressionMethod cache_object_compression = CompressionMethod::none; + CompressionMethod metadata_cache_object_compression = CompressionMethod::lzo; bool multi_trans_delete = true; int64 reserved_cache_device_space = 100 * 1024 * 1024; float memory_usage_factor = 0.1f; diff --git a/clouddrive/ICompressEncrypt.h b/clouddrive/ICompressEncrypt.h index 9921b07c..8d0150aa 100644 --- a/clouddrive/ICompressEncrypt.h +++ b/clouddrive/ICompressEncrypt.h @@ -34,9 +34,11 @@ namespace const unsigned int CompressionZstd9 = 4; const unsigned int CompressionNone = 5; const unsigned int CompressionZstd7 = 6; + const unsigned int CompressionLzo = 7; } unsigned int CompressionMethodFromString(const std::string& str); +std::string CompressionMethodToBtrfsString(unsigned int cmeth); class ICompressEncryptFactory : public IObject { diff --git a/clouddrive/IKvStoreBackend.h b/clouddrive/IKvStoreBackend.h index e6e199c2..ee0c1835 100644 --- a/clouddrive/IKvStoreBackend.h +++ b/clouddrive/IKvStoreBackend.h @@ -57,15 +57,16 @@ public: const static unsigned int GetStatusEnospc = 2; const static unsigned int GetStatusNotFound = 4; const static unsigned int GetStatusRepairError = 8; + const static unsigned int GetStatusSkipped = 16; - virtual bool get( const std::string& key, const std::string& path, const std::string& md5sum, - unsigned int flags, bool allow_error_event, IFsFile*& ret_file, std::string& ret_md5sum, unsigned int& get_status) = 0; + virtual bool get( const std::string& key, const std::string& md5sum, + unsigned int flags, bool allow_error_event, IFsFile* ret_file, std::string& ret_md5sum, unsigned int& get_status) = 0; virtual bool list(IListCallback* callback) = 0; const static unsigned int PutAlreadyCompressedEncrypted = 1; const static unsigned int PutMetadata = 2; - virtual bool put( const std::string& key, IFsFile* src, const std::string& path, + virtual bool put( const std::string& key, IFsFile* src, unsigned int flags, bool allow_error_event, std::string& md5sum, int64& size) = 0; enum class key_next_action_t diff --git a/clouddrive/IOnlineKvStore.h b/clouddrive/IOnlineKvStore.h index aee2f7f2..a6631474 100644 --- a/clouddrive/IOnlineKvStore.h +++ b/clouddrive/IOnlineKvStore.h @@ -1,12 +1,12 @@ #pragma once #include "../Interface/File.h" -#include "../Interface/Object.h" -#include "../Interface/SharedMutex.h" +#include "../Interface/Object.h" +#include "../Interface/SharedMutex.h" class IOnlineKvStore : public IObject { public: - virtual IFsFile* get(const std::string& key, int64 transid, const std::string& path, + virtual IFsFile* get(const std::string& key, int64 transid, bool prioritize_read, IFsFile* tmpl_file, bool allow_error_event, bool& not_found, int64* get_transid=nullptr) = 0; @@ -19,7 +19,7 @@ public: const static unsigned int PutMetadata = 2; virtual bool put(const std::string& key, int64 transid, IFsFile* src, - const std::string& path, unsigned int flags, + unsigned int flags, bool allow_error_event, int64& compressed_size) = 0; virtual int64 new_transaction(bool allow_error_event) = 0; diff --git a/clouddrive/KvStoreBackendS3.cpp b/clouddrive/KvStoreBackendS3.cpp index 368ad4ac..e8f86a52 100644 --- a/clouddrive/KvStoreBackendS3.cpp +++ b/clouddrive/KvStoreBackendS3.cpp @@ -339,9 +339,10 @@ void KvStoreBackendS3::init_mutex() Aws::InitAPI(options); } -bool KvStoreBackendS3::get( const std::string& key, const std::string& path, const std::string& md5sum, - unsigned int flags, bool allow_error_event, IFsFile*& ret_file, std::string& ret_md5sum, unsigned int& get_status) +bool KvStoreBackendS3::get( const std::string& key, const std::string& md5sum, + unsigned int flags, bool allow_error_event, IFsFile* ret_file, std::string& ret_md5sum, unsigned int& get_status) { + assert(ret_file!=nullptr); get_status=0; size_t idx0 = 0; @@ -444,37 +445,11 @@ bool KvStoreBackendS3::get( const std::string& key, const std::string& path, con downloaded_bytes+=tmpfile->Size(); - std::unique_ptr res_src; - IFsFile* res; - if(ret_file==nullptr) - { - res_src.reset(cachefs->openFile(path, MODE_RW_CREATE)); - - if(res_src.get()==nullptr) - { - std::string syserr = os_last_error_str(); - if (allow_error_event) - { - addSystemEvent("s3_backend", - "Error opening result file", - "Error opening result file at " + path + ". " + syserr, LL_ERROR); - } - Server->Log("Error opening result file at "+path+". "+syserr, LL_ERROR); - return false; - } - - res = res_src.get(); - } - else - { - res = ret_file; - } - std::unique_ptr decrypt_and_decompress; if(flags & IKvStoreBackend::GetDecrypted) { - decrypt_and_decompress.reset(compress_encrypt_factory->createDecryptAndDecompress(encryption_key, res)); + decrypt_and_decompress.reset(compress_encrypt_factory->createDecryptAndDecompress(encryption_key, ret_file)); } std::vector buffer; @@ -505,7 +480,7 @@ bool KvStoreBackendS3::get( const std::string& key, const std::string& path, con else if(decrypt_and_decompress.get()==nullptr) { md_check.update(reinterpret_cast(buffer.data()), read); - if(res->Write(buffer.data(), read)!=read) + if(ret_file->Write(buffer.data(), read)!=read) { std::string syserr = os_last_error_str(); Server->Log("Error writing to result file. "+syserr, LL_ERROR); @@ -557,10 +532,6 @@ bool KvStoreBackendS3::get( const std::string& key, const std::string& path, con return false; } - if(ret_file==nullptr) - { - ret_file = res_src.release(); - } return true; } else @@ -691,36 +662,10 @@ bool KvStoreBackendS3::list( IListCallback* callback ) return true; } -bool KvStoreBackendS3::put( const std::string& key, IFsFile* src, const std::string& path, +bool KvStoreBackendS3::put( const std::string& key, IFsFile* src, unsigned int flags, bool allow_error_event, std::string& md5sum, int64& compressed_size) { - std::unique_ptr tsourcefile; - - if(src==nullptr) - { - tsourcefile.reset(cachefs->openFile(path, MODE_READ)); - if(tsourcefile.get()==nullptr) - { - std::string syserr = os_last_error_str(); - Server->Log("Error opening source file "+ path+". "+syserr, LL_ERROR); - - cachefs->openFile(path, MODE_READ); - - if(allow_error_event) - { - addSystemEvent("s3_backend", - "Error opening source file", - "Error opening source file "+ path+". "+syserr, LL_ERROR); - } - - return false; - } - src = tsourcefile.get(); - } - else - { - src->Seek(0); - } + src->Seek(0); unsigned int curr_comp_method = (flags & IKvStoreBackend::GetMetadata) > 0 ? comp_method_metadata : comp_method; @@ -786,8 +731,6 @@ bool KvStoreBackendS3::put( const std::string& key, IFsFile* src, const std::str local_md5 = compress_encrypt->md5sum(); - tsourcefile.reset(); - tmpfile_delete.release(); offset_buffer.reset(new offset_buf(0, tmpfile, this, true)); } diff --git a/clouddrive/KvStoreBackendS3.h b/clouddrive/KvStoreBackendS3.h index dd16098a..5670c8b7 100644 --- a/clouddrive/KvStoreBackendS3.h +++ b/clouddrive/KvStoreBackendS3.h @@ -22,12 +22,12 @@ public: static void init_mutex(); - virtual bool get( const std::string& key, const std::string& path, const std::string& md5sum, - unsigned int flags, bool allow_error_event, IFsFile*& ret_file, std::string& ret_md5sum, unsigned int& get_status); + virtual bool get( const std::string& key, const std::string& md5sum, + unsigned int flags, bool allow_error_event, IFsFile* ret_file, std::string& ret_md5sum, unsigned int& get_status); virtual bool list( IListCallback* callback ); - virtual bool put( const std::string& key, IFsFile* src, const std::string& path, + virtual bool put( const std::string& key, IFsFile* src, unsigned int flags, bool allow_error_event, std::string& md5sum, int64& compressed_size) override; diff --git a/clouddrive/KvStoreFrontend.cpp b/clouddrive/KvStoreFrontend.cpp index 82626d87..0880839b 100644 --- a/clouddrive/KvStoreFrontend.cpp +++ b/clouddrive/KvStoreFrontend.cpp @@ -41,6 +41,7 @@ #include "../urbackupcommon/events.h" #include #include +#include "Auto.h" #ifdef HAS_MIRROR #include "../urbackupserver/server_settings.h" #endif @@ -264,7 +265,7 @@ KvStoreFrontend::KvStoreFrontend(const std::string& db_path, IFsFile* ret_file = tmp_f; std::string ret_md5sum; unsigned int get_status; - if (!backend->get("cd_magic_file", std::string(), std::string(), IKvStoreBackend::GetDecrypted, + if (!backend->get("cd_magic_file", std::string(), IKvStoreBackend::GetDecrypted, false, ret_file, ret_md5sum, get_status)) { KvStoreDao::CdSingleObject obj = dao.getSingleObject(); @@ -274,7 +275,7 @@ KvStoreFrontend::KvStoreFrontend(const std::string& db_path, Server->Log("Retrieving " + prefixKey(encodeKey(obj.tkey, obj.trans_id)) + " (first object in cache database)"); if (!backend->get(prefixKey(encodeKey(obj.tkey, obj.trans_id)), - std::string(), obj.md5sum, IKvStoreBackend::GetDecrypted, + obj.md5sum, IKvStoreBackend::GetDecrypted, false, ret_file, ret_md5sum, get_status)) { Server->Log("Retrieving " + prefixKey(encodeKey(obj.tkey, obj.trans_id)) @@ -298,7 +299,7 @@ KvStoreFrontend::KvStoreFrontend(const std::string& db_path, if (has_item.has_item && !backend->get(has_item.item_key, - std::string(), std::string(), IKvStoreBackend::GetDecrypted, + std::string(), IKvStoreBackend::GetDecrypted, true, ret_file, ret_md5sum, get_status)) { std::string err = "Error getting item " + has_item.item_key + " from cloud drive. Encryption key may be wrong."; @@ -330,7 +331,7 @@ KvStoreFrontend::KvStoreFrontend(const std::string& db_path, std::string md5sum; int64 size; - if (!backend->put("cd_magic_file", new_f, std::string(), 0, true, md5sum, size)) + if (!backend->put("cd_magic_file", new_f, 0, true, md5sum, size)) { std::string err = "Error uploading magic file."; setMountStatusErr(err); @@ -469,12 +470,14 @@ KvStoreFrontend::KvStoreFrontend(const std::string& db_path, if (!scrub_obj.empty()) { - std::unique_ptr tmpf(Server->openTemporaryFile()); - IFsFile* ret_file = nullptr; - ScopedFreeObjRef free_get_file(ret_file); + IFsFile* tmpf = Server->openTemporaryFile(); + ScopedDeleteFile del_tmpf(tmpf); + if(tmpf==nullptr) + abort(); + std::string ret_md5sum; unsigned int get_status; - if (!backend->get(trim(scrub_obj), "tmp://"+tmpf->getFilename(), std::string(), IKvStoreBackend::GetScrub, true, ret_file, ret_md5sum, get_status)) + if (!backend->get(trim(scrub_obj), std::string(), IKvStoreBackend::GetScrub, true, tmpf, ret_md5sum, get_status)) { Server->Log("Get scrub of object " + scrub_obj + " failed", LL_ERROR); } @@ -509,23 +512,12 @@ KvStoreFrontend::~KvStoreFrontend() } IFsFile* KvStoreFrontend::get(int64 cd_id, const std::string& key, int64 transid, - const std::string& path, bool prioritize_read, IFsFile* tmpl_file, + bool prioritize_read, IFsFile* tmpl_file, bool allow_error_event, bool& not_found, int64* get_transid) { not_found=false; - if(tmpl_file==nullptr - && !cachefs->directoryExists(ExtractFilePath(path))) - { - if(!cachefs->createDir(ExtractFilePath(path))) - { - std::string syserr = os_last_error_str(); - Server->Log("Error creating directory for GET-file with key "+bytesToHex(key)+" path "+ ExtractFilePath(path)+". "+ syserr, LL_ERROR); - addSystemEvent("cache_err_retry", - "Error creating directory for get file on cache", - "Error creating directory for GET-file with key " + bytesToHex(key) + " path " + ExtractFilePath(path) + ". " + syserr, LL_ERROR); - return nullptr; - } - } + if (tmpl_file == nullptr) + return nullptr; bool is_unsynced = false; SUnsyncedKey unsynced_key; @@ -636,10 +628,8 @@ IFsFile* KvStoreFrontend::get(int64 cd_id, const std::string& key, int64 transid || cd_object.size==-1) { not_found=true; - if (tmpl_file!=nullptr) - return tmpl_file; dao.getDb()->freeMemory(); - return cachefs->openFile(path, MODE_RW_CREATE); + return tmpl_file; } if (cd_object.md5sum.empty() @@ -674,13 +664,13 @@ IFsFile* KvStoreFrontend::get(int64 cd_id, const std::string& key, int64 transid bool from_mirror = false; std::string bkey = prefixKey(encodeKey(cd_id, key, cd_object.trans_id)); - if (!backend->get(bkey, path, cd_object.md5sum, + if (!backend->get(bkey, cd_object.md5sum, flags, allow_error_event, ret, ret_md5sum, get_status)) { not_found = (get_status & IKvStoreBackend::GetStatusNotFound)>0; bool backend_not_found = not_found; if (backend_mirror == nullptr - || !backend_mirror->get(bkey, path, + || !backend_mirror->get(bkey, get_md5sum(cd_object.md5sum), flags, allow_error_event, ret, ret_md5sum, get_status)) { @@ -690,9 +680,7 @@ IFsFile* KvStoreFrontend::get(int64 cd_id, const std::string& key, int64 transid not_found = backend_not_found && not_found; if (not_found) { - if (tmpl_file != nullptr) - return tmpl_file; - return cachefs->openFile(path, MODE_RW_CREATE); + return tmpl_file; } return nullptr; } @@ -811,7 +799,7 @@ bool KvStoreFrontend::reset(const std::string & key, int64 transid) } bool KvStoreFrontend::put(int64 cd_id, const std::string& key, int64 transid, - int64 generation, IFsFile* src, const std::string& path, unsigned int cflags, + int64 generation, IFsFile* src, unsigned int cflags, bool allow_error_event, int64& compressed_size) { IScopedReadLock read_lock(put_shared_mutex.get()); @@ -906,7 +894,7 @@ bool KvStoreFrontend::put(int64 cd_id, const std::string& key, int64 transid, put_flags |= IKvStoreBackend::PutMetadata; std::string md5sum; compressed_size = 0; - bool ret = backend->put(tkey, src, path, put_flags, allow_error_event, md5sum, compressed_size); + bool ret = backend->put(tkey, src, put_flags, allow_error_event, md5sum, compressed_size); if(ret) { @@ -1006,8 +994,16 @@ bool KvStoreFrontend::transaction_finalize(int64 cd_id, int64 transid, bool comp else tkey = convert(cd_id)+"_"+convert(transid) + (complete ? "_complete" : "_finalized"); + std::unique_ptr src(cachefs->openFile(empty_file_path, MODE_READ)); + + if(!src) + { + Server->Log("Error opening empty file. "+cachefs->lastError(), LL_ERROR); + return false; + } + if (backend->put(prefixKey(tkey), - nullptr, empty_file_path, 0, allow_error_event, md5sum, size)) + src.get(), 0, allow_error_event, md5sum, size)) { tries = -2; break; @@ -1061,6 +1057,14 @@ bool KvStoreFrontend::set_active_transactions(int64 cd_id, const std::vectorLog("Setting active transactions... (cd_id=" + convert(cd_id) + ")", LL_INFO); + std::unique_ptr empty_f(cachefs->openFile(empty_file_path, MODE_READ)); + + if(!empty_f) + { + Server->Log("Error opening empty file -2. "+cachefs->lastError(), LL_ERROR); + return false; + } + IDatabase* db = getDatabase(); KvStoreDao dao(db); DBScopedSynchronous synchronous(dao.getDb()); @@ -1095,7 +1099,7 @@ bool KvStoreFrontend::set_active_transactions(int64 cd_id, const std::vectorput(prefixKey(tkey), nullptr, empty_file_path, 0, true, md5sum, size)) + if (backend->put(prefixKey(tkey), empty_f.get(), 0, true, md5sum, size)) { tries = -2; break; @@ -1673,16 +1677,15 @@ bool KvStoreFrontend::importFromBackend( KvStoreDao& dao ) return false; } ScopedDeleteFile tmp_f_del(tmp_f); - IFsFile* ret_file = tmp_f; std::string ret_md5sum; unsigned int get_status; int64 import_total_num_files = -1; - if (backend->get("cd_num_file", std::string(), std::string(), IKvStoreBackend::GetDecrypted, - false, ret_file, ret_md5sum, get_status)) + if (backend->get("cd_num_file", std::string(), IKvStoreBackend::GetDecrypted, + false, tmp_f, ret_md5sum, get_status)) { - if (ret_file->Size() < 1 * 1024 * 1024) + if (tmp_f->Size() < 1 * 1024 * 1024) { - std::string num_data = ret_file->Read(0LL, static_cast<_u32>(ret_file->Size())); + std::string num_data = tmp_f->Read(0LL, static_cast<_u32>(tmp_f->Size())); CRData rnumdata(num_data.data(), num_data.size()); @@ -1703,7 +1706,7 @@ bool KvStoreFrontend::importFromBackend( KvStoreDao& dao ) } else { - Server->Log("cd_num_file too large ("+PrettyPrintBytes(ret_file->Size())+")", LL_WARNING); + Server->Log("cd_num_file too large ("+PrettyPrintBytes(tmp_f->Size())+")", LL_WARNING); } } else @@ -1731,17 +1734,14 @@ bool KvStoreFrontend::importFromBackend( KvStoreDao& dao ) { Server->Log("Retrieving last object to get current generation...", LL_INFO); - std::unique_ptr temp_file(Server->openTemporaryFile()); - std::string temp_file_path = temp_file->getFilename(); - temp_file.reset(); + IFsFile* temp_file = Server->openTemporaryFile(); + ScopedDeleteFile delete_last_f(temp_file); - IFsFile* last_f = nullptr; std::string md5sum_ret; unsigned int get_status; - bool b = backend->get(import_callback.lastModifiedKey(), "tmp://"+temp_file_path, import_callback.lastModifiedMd5sum(), 0, true, last_f, md5sum_ret, get_status); - ScopedDeleteFile delete_last_f(last_f); + bool b = backend->get(import_callback.lastModifiedKey(), import_callback.lastModifiedMd5sum(), 0, true, temp_file, md5sum_ret, get_status); - if(!b || last_f==nullptr) + if(!b) { Server->Log("Error retrieving last modified file", LL_ERROR); return false; @@ -1749,7 +1749,7 @@ bool KvStoreFrontend::importFromBackend( KvStoreDao& dao ) int64 generation=0; - if(!read_generation(last_f, 0, generation)) + if(!read_generation(temp_file, 0, generation)) { return false; } @@ -1985,11 +1985,10 @@ bool KvStoreFrontend::reupload(int64 transid_start, int64 transid_stop, IKvStore std::string ret_md5sum; unsigned int get_status; - IFsFile* ret_file = tmp_f; int flags = IKvStoreBackend::GetDecrypted; - if (!old_backend->get(fkey, std::string(), md5sum, flags, false, ret_file, ret_md5sum, get_status)) + if (!old_backend->get(fkey, md5sum, flags, false, tmp_f, ret_md5sum, get_status)) { - if (!backend->get(fkey, std::string(), md5sum, flags, false, ret_file, ret_md5sum, get_status)) + if (!backend->get(fkey, md5sum, flags, false, tmp_f, ret_md5sum, get_status)) { Server->Log("Error reading " + fkey + ". status=" + convert(get_status), LL_ERROR); ret = false; @@ -2000,7 +1999,7 @@ bool KvStoreFrontend::reupload(int64 transid_start, int64 transid_stop, IKvStore { int64 size = 0; std::string new_md5sum; - if (backend->put(fkey, ret_file, "tmp://"+ret_file->getFilename(), 0, false, ret_md5sum, size)) + if (backend->put(fkey, tmp_f, 0, false, ret_md5sum, size)) { Server->Log("Successfully rewritten " + fkey); KvStoreDao::CdIterObject new_obj; @@ -2377,7 +2376,7 @@ bool KvStoreFrontend::update_total_num(int64 num) std::string md5sum; int64 size; - if (!backend->put("cd_num_file", new_f, std::string(), 0, true, md5sum, size)) + if (!backend->put("cd_num_file", new_f, 0, true, md5sum, size)) { std::string err = "Error uploading cd_num_file file."; Server->Log(err, LL_ERROR); @@ -4257,7 +4256,16 @@ void KvStoreFrontend::PutDbWorker::worker_abort() void KvStoreFrontend::ScrubThread::operator()() { - std::string unused_tmp_file; + Auto(delete this); + IFsFile* tmpf = Server->openTemporaryFile(); + ScopedDeleteFile del_tmpf(tmpf); + + if (tmpf == nullptr) + { + Server->Log("Error opening temporary file in scrub. " + os_last_error_str(), LL_ERROR); + scrub_queue.set_error(true); + return; + } while (true) { @@ -4267,23 +4275,9 @@ void KvStoreFrontend::ScrubThread::operator()() { break; } - - if (unused_tmp_file.empty()) - { - std::unique_ptr tmpf(Server->openTemporaryFile()); - if (tmpf.get() == nullptr) - { - Server->Log("Error opening temporary file in scrub. " + os_last_error_str(), LL_ERROR); - scrub_queue.set_error(true); - return; - } - unused_tmp_file = tmpf->getFilename(); - } - ScopedDeleteFn delete_fn(unused_tmp_file); - IFsFile* get_file = nullptr; + std::string ret_md5sum; unsigned int get_status; - ScopedFreeObjRef free_get_file(get_file); int flags = IKvStoreBackend::GetBackground; if (scrub_action == ScrubAction::Balance) @@ -4321,8 +4315,8 @@ void KvStoreFrontend::ScrubThread::operator()() } bool b = backend->get(object_fn, - "tmp://"+unused_tmp_file, item.md5sum, flags, - false, get_file, ret_md5sum, get_status); + item.md5sum, flags, + false, tmpf, ret_md5sum, get_status); size_t tries = 3; size_t retry_n = 0; @@ -4342,9 +4336,9 @@ void KvStoreFrontend::ScrubThread::operator()() } Server->wait(1000); b = backend->get(object_fn, - "tmp://"+unused_tmp_file, item.md5sum, flags, + item.md5sum, flags, false, - get_file, ret_md5sum, get_status); + tmpf, ret_md5sum, get_status); --tries; ++retry_n; @@ -4360,8 +4354,8 @@ void KvStoreFrontend::ScrubThread::operator()() { if (backend_mirror == nullptr || !backend_mirror->get(object_fn, - "tmp://"+unused_tmp_file, item.md5sum, flags, - false, get_file, ret_md5sum, get_status)) + item.md5sum, flags, + false, tmpf, ret_md5sum, get_status)) { Server->Log("Error while " + strScrubAction(scrub_action) + " key " + object_fn+" Has_newer = "+convert(has_newer), LL_ERROR); curr_has_error = true; @@ -4376,14 +4370,24 @@ void KvStoreFrontend::ScrubThread::operator()() Server->Log("Repairing. Adding empty file...", LL_WARNING); int64 size; std::string md5sum; - if (!backend->put(object_fn, - nullptr, frontend->empty_file_path, 0, false, md5sum, size)) + std::unique_ptr empty_file(frontend->cachefs->openFile("empty.file", MODE_READ)); + + if(!empty_file) { + Server->Log("Error while opening empty file. "+frontend->cachefs->lastError(), LL_ERROR); ++scrub_queue.scrub_errors; } else { - ++scrub_queue.scrub_repaired; + if (!backend->put(object_fn, + empty_file.get(), 0, false, md5sum, size)) + { + ++scrub_queue.scrub_errors; + } + else + { + ++scrub_queue.scrub_repaired; + } } } else @@ -4391,8 +4395,8 @@ void KvStoreFrontend::ScrubThread::operator()() ++scrub_queue.scrub_errors; } } - else if (backend_mirror != nullptr - && get_file != nullptr) + else if (backend_mirror != nullptr && + !(get_status & IKvStoreBackend::GetStatusSkipped)) { bool put_success = false; size_t tries = 0; @@ -4400,7 +4404,7 @@ void KvStoreFrontend::ScrubThread::operator()() { std::string put_md5sum; int64 put_size = 0; - if (backend->put(object_fn, get_file, "tmp://"+unused_tmp_file, IKvStoreBackend::PutAlreadyCompressedEncrypted, true, put_md5sum, put_size)) + if (backend->put(object_fn, tmpf, IKvStoreBackend::PutAlreadyCompressedEncrypted, true, put_md5sum, put_size)) { put_success = true; item.md5sum = put_md5sum; @@ -4429,7 +4433,7 @@ void KvStoreFrontend::ScrubThread::operator()() ++scrub_queue.scrub_errors; } } - else if (scrub_action != ScrubAction::Scrub || get_file != nullptr) + else { if (get_status & IKvStoreBackend::GetStatusRepaired) ++scrub_queue.scrub_repaired; @@ -4437,20 +4441,16 @@ void KvStoreFrontend::ScrubThread::operator()() ++scrub_queue.scrub_oks; } - if (scrub_action == ScrubAction::Scrub && get_file == nullptr && !curr_has_error) + if(!(get_status & IKvStoreBackend::GetStatusSkipped)) { - Server->Log("No file while "+ strScrubAction(scrub_action)+" key "+ - object_fn+" has_newer="+convert(has_newer), LL_ERROR); - ++scrub_queue.scrub_errors; - } - if (b && get_file == nullptr) - { - delete_fn.release(); - } - else - { - unused_tmp_file.clear(); + if(!tmpf->Resize(0)) + { + Server->Log("Error resizing temporary file in scrub. " + os_last_error_str(), LL_ERROR); + scrub_queue.set_error(true); + return; + } } + if (b && !ret_md5sum.empty()) { @@ -4466,19 +4466,12 @@ void KvStoreFrontend::ScrubThread::operator()() } done_size += item.size; - if ( (!b || get_file != nullptr) && !has_changes) + if ( !b && !has_changes) { IScopedLock lock(new_md5sums_mutex); has_changes = true; } } - - if (!unused_tmp_file.empty()) - { - Server->deleteFile(unused_tmp_file); - } - - delete this; } void KvStoreFrontend::MirrorWorker::operator()() @@ -4639,10 +4632,18 @@ void KvStoreFrontend::MirrorWorker::operator()() continue; } + std::unique_ptr empty_file(frontend->cachefs->openFile(frontend->empty_file_path, MODE_READ)); + + if(!empty_file) + { + Server->Log("Cannot open empty file: "+frontend->cachefs->lastError(), LL_ERROR); + abort(); + } + int64 size; std::string md5sum; if (frontend->backend_mirror->put(frontend->prefixKey(convert(trans.id) + suffix), - nullptr, frontend->empty_file_path, 0, false, md5sum, size)) + empty_file.get(), 0, false, md5sum, size)) { dao.setTransactionMirrored(trans.id); --frontend->mirror_items; @@ -4721,12 +4722,11 @@ void KvStoreFrontend::MirrorWorker::operator()() void KvStoreFrontend::MirrorThread::operator()() { - std::string tmp_fn; - { - std::unique_ptr f(Server->openTemporaryFile()); - tmp_fn = f->getFilename(); - } - ScopedDeleteFn delete_tmp_fn(tmp_fn); + IFsFile* tmpf = Server->openTemporaryFile(); + ScopedDeleteFile del_tmpf(tmpf); + + if(tmpf==nullptr) + abort(); std::string cdata; while (!has_error @@ -4745,27 +4745,29 @@ void KvStoreFrontend::MirrorThread::operator()() KvStoreDao::CdIterObject2& obj = objs[i]; std::string fn = frontend->prefixKey(frontend->encodeKey(obj.tkey, obj.trans_id)); - IFsFile* ret_file = nullptr; - ScopedFreeObjRef tmp_f_del(ret_file); std::string ret_md5sum; unsigned int get_status; - Server->deleteFile(tmp_fn); size_t retry_n = 0; bool success = false; while (!success && retry_n < 10) { - if (backend->get(fn, "tmp://"+tmp_fn, obj.md5sum, IKvStoreBackend::GetPrependMd5sum, false, ret_file, ret_md5sum, get_status)) + if(!tmpf->Resize(0) || !tmpf->Seek(0)) + { + abort(); + } + + if (backend->get(fn, obj.md5sum, IKvStoreBackend::GetPrependMd5sum, false, tmpf, ret_md5sum, get_status)) { size_t retry_n_put = 0; while (!success && retry_n_put < 10) { - ret_file->Seek(0); + tmpf->Seek(0); int64 ret_size; - if (backend_mirror->put(fn, ret_file, "tmp://"+tmp_fn, IKvStoreBackend::PutAlreadyCompressedEncrypted, false, ret_md5sum, ret_size)) + if (backend_mirror->put(fn, tmpf, IKvStoreBackend::PutAlreadyCompressedEncrypted, false, ret_md5sum, ret_size)) { frontend->mirror_curr_pos += obj.size; success = true; diff --git a/clouddrive/KvStoreFrontend.h b/clouddrive/KvStoreFrontend.h index d45170e1..fb78f331 100644 --- a/clouddrive/KvStoreFrontend.h +++ b/clouddrive/KvStoreFrontend.h @@ -50,13 +50,13 @@ public: bool background_worker_manual_run, bool background_worker_multi_trans_delete, IBackupFileSystem* cachefs); ~KvStoreFrontend(); - virtual IFsFile* get(const std::string& key, int64 transid, const std::string& path, + virtual IFsFile* get(const std::string& key, int64 transid, bool prioritize_read, IFsFile* tmpl_file, bool allow_error_event, bool& not_found, int64* get_transid = nullptr) { - return get(0, key, transid, path, prioritize_read, tmpl_file, allow_error_event, not_found, get_transid); + return get(0, key, transid, prioritize_read, tmpl_file, allow_error_event, not_found, get_transid); } - IFsFile* get(int64 cd_id, const std::string& key, int64 transid, const std::string& path, + IFsFile* get(int64 cd_id, const std::string& key, int64 transid, bool prioritize_read, IFsFile* tmpl_file, bool allow_error_event, bool& not_found, int64* get_transid=nullptr); @@ -69,15 +69,15 @@ public: virtual bool reset(const std::string& key, int64 transid); virtual bool put(const std::string& key, int64 transid, IFsFile* src, - const std::string& path, unsigned int flags, + unsigned int flags, bool allow_error_event, int64& compressed_size) { - return put(0, key, transid, 0, src, path, flags, + return put(0, key, transid, 0, src, flags, allow_error_event, compressed_size); } bool put(int64 cd_id, const std::string& key, int64 transid, int64 generation, IFsFile* src, - const std::string& path, unsigned int flags, + unsigned int flags, bool allow_error_event, int64& compressed_size); virtual int64 new_transaction(bool allow_error_event) diff --git a/clouddrive/TransactionalKvStore.cpp b/clouddrive/TransactionalKvStore.cpp index 9e761a8e..69628493 100644 --- a/clouddrive/TransactionalKvStore.cpp +++ b/clouddrive/TransactionalKvStore.cpp @@ -463,7 +463,24 @@ public: && kv_store.num_second_chances_cb->is_metadata(item->key)) flags |= IOnlineKvStore::PutMetadata; - while(!kv_store.online_kv_store->put(item->key, item->transid, memf_file, path, flags, n>retry_log_n, compressed_size) ) + IFsFile* putf = memf_file; + while(putf==nullptr) + { + putf = kv_store.cachefs->openFile(path, MODE_READ); + + if(putf==nullptr) + { + std::string syserr = kv_store.cachefs->lastError(); + Server->Log("Cannot open file to submit: " + path+". "+ syserr, LL_ERROR); + addSystemEvent("online_kv_store", + "Cannot open file to submit", + "Cannot open file to submit: " + path + ". " + syserr, LL_ERROR); + + retryWait(n++); + } + } + + while(!kv_store.online_kv_store->put(item->key, item->transid, memf_file, flags, n>retry_log_n, compressed_size) ) { Server->Log("Error submitting dirty data block "+kv_store.hex(item->key)+" transid "+convert(item->transid)+". Retrying...", LL_ERROR); if (kv_store.online_kv_store->fast_write_retry()) @@ -2201,9 +2218,57 @@ IFsFile* TransactionalKvStore::get_retrieve(const std::string& key, BitmapInfo b do { not_found = false; - online_file = online_kv_store->get(key, transid, keypath2(key, transid), - prioritize_read, memfile, retry_n > retry_log_n, not_found); - assert(online_file == nullptr || memfile == nullptr || online_file == memfile); + + IFsFile* online_file_tmpl = memfile; + if (online_file_tmpl == nullptr) + { + std::string online_file_path = keypath2(key, transid); + bool path_err = false; + if (!cachefs->directoryExists(ExtractFilePath(online_file_path))) + { + if (!cachefs->createDir(ExtractFilePath(online_file_path))) + { + std::string syserr = os_last_error_str(); + Server->Log("Error creating directory for GET-file with key " + bytesToHex(key) + " path " + ExtractFilePath(online_file_path) + ". " + syserr, LL_ERROR); + addSystemEvent("cache_err_retry", + "Error creating directory for get file on cache", + "Error creating directory for GET-file with key " + bytesToHex(key) + " path " + ExtractFilePath(online_file_path) + ". " + syserr, LL_ERROR); + path_err = true; + } + } + + if (!path_err) + { + online_file_tmpl = cachefs->openFile(online_file_path, CREATE_DIRECT); + + if (online_file_tmpl == nullptr) + { + std::string syserr = os_last_error_str(); + Server->Log("Error opening output file " + online_file_path + ". " + syserr, LL_ERROR); + addSystemEvent("online_kv_store", + "Error opening output file", + "Error opening output file " + online_file_path + ". " + syserr, LL_ERROR); + } + else + { + set_cache_file_compression(key, online_file_path); + } + } + } + + if (online_file_tmpl != nullptr) + { + online_file = online_kv_store->get(key, transid, + prioritize_read, online_file_tmpl, retry_n > retry_log_n, not_found); + assert(online_file == nullptr || memfile == nullptr || online_file == memfile); + + if (online_file == nullptr && + online_file_tmpl != memfile) + { + Server->destroy(online_file_tmpl); + cachefs->deleteFile(keypath2(key, transid)); + } + } if (online_file != nullptr) { @@ -2392,6 +2457,8 @@ IFsFile* TransactionalKvStore::get_retrieve(const std::string& key, BitmapInfo b retryWait(++retry_n); } FILE_SIZE_CACHE(online_file->setCachedSize(0)); + + set_cache_file_compression(key, path); } } while (online_file == nullptr); @@ -3045,7 +3112,8 @@ TransactionalKvStore::TransactionalKvStore(IBackupFileSystem* cachefs, int64 min bool verify_cache, float cpu_multiplier, size_t no_compress_mult, bool with_prev_link, bool allow_evict, bool with_submitted_files, float resubmit_compressed_ratio, int64 max_memfile_size, std::string memcache_path, float memory_usage_factor, - bool only_memfiles, unsigned int background_comp_method) + bool only_memfiles, unsigned int background_comp_method, + unsigned int cache_comp, unsigned int meta_cache_comp) : min_cachesize(min_cachesize), min_free_size(min_free_size), critical_free_size(critical_free_size), comp_percent(comp_percent), comp_start_limit(comp_start_limit), throttle_free_size(throttle_free_size), do_stop(false), @@ -3074,7 +3142,8 @@ TransactionalKvStore::TransactionalKvStore(IBackupFileSystem* cachefs, int64 min total_submitted_bytes(0), retrieval_waiters_async(0), retrieval_waiters_sync(0), - cachefs(cachefs) + cachefs(cachefs), + cache_comp(cache_comp), meta_cache_comp(meta_cache_comp) { g_cache_mutex = &cache_mutex; @@ -3958,8 +4027,22 @@ void TransactionalKvStore::read_keys(std::unique_lock& cache_lock } std::string tmpfn = "verify.file"; + IFsFile* tmpl_file = cachefs->openFile(tmpfn, CREATE_DIRECT); + if (tmpl_file == nullptr) + { + Server->Log("Could not verify.file on cache fs. "+cachefs->lastError(), LL_ERROR); + assert(false); + throw std::runtime_error("Could not verify.file on cache fs. " + cachefs->lastError()); + } + bool not_found = false; - IFile* online_file = online_kv_store->get(key, transid, tmpfn, false, nullptr, true, not_found); + IFile* online_file = online_kv_store->get(key, transid, false, tmpl_file, true, not_found); + + if (online_file == nullptr) + { + Server->destroy(tmpl_file); + cachefs->deleteFile(tmpfn); + } if (online_file == nullptr || not_found) { @@ -5730,7 +5813,20 @@ bool TransactionalKvStore::read_from_dirty_file(std::unique_lock& bool not_found; int64 get_transid = 0; - IFsFile* f = online_kv_store->get(key, transaction_id, "submit.test.file", false, nullptr, true, not_found, &get_transid); + + std::string tmpfn = "submit.test.file"; + IFsFile* tmpl_file = cachefs->openFile(tmpfn, CREATE_DIRECT); + + if (tmpl_file == nullptr) + { + Server->Log("Cannot open submit.test.file "+cachefs->lastError(), LL_ERROR); + addSystemEvent("cache_err_fatal", + "Cannot open file", + "Cannot open submit.test.file " + cachefs->lastError(), LL_ERROR); + abort(); + } + + IFsFile* f = online_kv_store->get(key, transaction_id, false, tmpl_file, true, not_found, &get_transid); if (f == nullptr) { Server->Log("Cannot get file not_found="+convert(not_found), LL_ERROR); @@ -5742,7 +5838,7 @@ bool TransactionalKvStore::read_from_dirty_file(std::unique_lock& else { Server->destroy(f); - cachefs->deleteFile("submit.test.file"); + cachefs->deleteFile(tmpfn); Server->Log("Get ok", LL_WARNING); } @@ -9035,4 +9131,20 @@ void TransactionalKvStore::MemfdDelThread::operator()() lock.lock(); } +} + +bool TransactionalKvStore::set_cache_file_compression(const std::string& key, const std::string& fpath) +{ + if (meta_cache_comp != CompressionNone && + num_second_chances_cb != nullptr && + num_second_chances_cb->is_metadata(key)) + { + return cachefs->setXAttr(fpath, "btrfs.compression", CompressionMethodToBtrfsString(meta_cache_comp)); + } + else if (cache_comp != CompressionNone) + { + return cachefs->setXAttr(fpath, "btrfs.compression", CompressionMethodToBtrfsString(cache_comp)); + } + + return true; } \ No newline at end of file diff --git a/clouddrive/TransactionalKvStore.h b/clouddrive/TransactionalKvStore.h index bedd9701..1b48097d 100644 --- a/clouddrive/TransactionalKvStore.h +++ b/clouddrive/TransactionalKvStore.h @@ -8,12 +8,12 @@ #include "../Interface/BackupFileSystem.h" #include #include "../Interface/Condition.h" -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include "IOnlineKvStore.h" #include "../urbackupcommon/os_functions.h" #ifdef HAS_ASYNC @@ -206,7 +206,8 @@ public: IOnlineKvStore* online_kv_store, const std::string& encryption_key, ICompressEncryptFactory* compress_encrypt_factory, bool verify_cache, float cpu_multiplier, size_t no_compress_mult, bool with_prev_link, bool allow_evict, bool with_submitted_files, float resubmit_compressed_ratio, int64 max_memfile_size, std::string memcache_path, - float memory_usage_factor, bool only_memfiles, unsigned int background_comp_method); + float memory_usage_factor, bool only_memfiles, unsigned int background_comp_method, + unsigned int cache_comp, unsigned int meta_cache_comp); ~TransactionalKvStore(); @@ -413,8 +414,8 @@ private: #endif void wait_for_del_file(const std::string& fn); - - bool read_dirty_items(std::unique_lock& cache_lock, int64 transaction_id, int64 attibute_to_trans_id); + + bool read_dirty_items(std::unique_lock& cache_lock, int64 transaction_id, int64 attibute_to_trans_id); bool read_submitted_evicted_files(const std::string& fn, std::set& sub_evict); bool read_from_dirty_file(std::unique_lock& cache_lock, const std::string& fn, int64 transaction_id, bool do_submit, int64 nosubmit_transid); @@ -528,6 +529,8 @@ private: TransactionalKvStore::SCacheVal cache_val(const std::string& key, bool dirty); TransactionalKvStore::SCacheVal cache_val_nc(bool dirty); + bool set_cache_file_compression(const std::string& key, const std::string& fpath); + std::list::iterator submission_queue_add(SSubmissionItem& item, bool memfile); std::list::iterator submission_queue_insert(SSubmissionItem& item, bool memfile, std::list::iterator it); @@ -800,4 +803,7 @@ private: std::string memcache_path; std::unique_ptr cache_lock_file; + + unsigned int cache_comp; + unsigned int meta_cache_comp; };