mirror of
https://github.com/uroni/hs5.git
synced 2025-10-26 11:17:18 +00:00
591 lines
14 KiB
C++
591 lines
14 KiB
C++
#pragma once
|
|
#include <folly/io/IOBuf.h>
|
|
#include <string>
|
|
#include <vector>
|
|
#include <set>
|
|
#include <map>
|
|
#include <unordered_map>
|
|
#include <unordered_set>
|
|
#include <memory>
|
|
#include <queue>
|
|
#include <memory.h>
|
|
#include "lmdb/lmdb.h"
|
|
#include <assert.h>
|
|
#include "relaxed_atomic.h"
|
|
#include <sys/types.h>
|
|
#include <mutex>
|
|
#include <condition_variable>
|
|
#include <shared_mutex>
|
|
#include <folly/File.h>
|
|
#include <iostream>
|
|
#include <stdint.h>
|
|
#include <filesystem>
|
|
#include <thread>
|
|
#include <folly/io/IOBufQueue.h>
|
|
|
|
|
|
using THREAD_ID = pid_t;
|
|
|
|
class SingleFileStorage
|
|
{
|
|
public:
|
|
|
|
struct SPunchItem
|
|
{
|
|
SPunchItem()
|
|
: offset(-1), len(0) {}
|
|
|
|
SPunchItem(int64_t offset, int64_t len)
|
|
: offset(offset), len(len) {}
|
|
|
|
bool operator<(const SPunchItem& other) const
|
|
{
|
|
return offset < other.offset;
|
|
}
|
|
|
|
int64_t offset;
|
|
int64_t len;
|
|
};
|
|
|
|
struct Ext
|
|
{
|
|
Ext()
|
|
: obj_offset(0), data_file_offset(-1), len(0) {}
|
|
|
|
Ext(int64_t obj_offset, int64_t data_file_offset, int64_t len)
|
|
: obj_offset(obj_offset), data_file_offset(data_file_offset), len(len) {}
|
|
|
|
bool operator<(const Ext& other) const
|
|
{
|
|
return obj_offset < other.obj_offset;
|
|
}
|
|
|
|
int64_t obj_offset;
|
|
int64_t data_file_offset;
|
|
int64_t len;
|
|
};
|
|
|
|
enum class DelAction
|
|
{
|
|
Del = 0,
|
|
DelOld = 1,
|
|
DelWithQueued = 2,
|
|
Queue = 3,
|
|
Unqueue = 4,
|
|
AssertQueueEmpty = 5
|
|
};
|
|
|
|
struct SFSOptions
|
|
{
|
|
std::string data_path;
|
|
std::string db_path;
|
|
std::string freespace_cache_path;
|
|
std::string dm_cache_path;
|
|
int64_t dm_cache_size = 0;
|
|
bool use_direct_io = false;
|
|
int64_t data_file_size_limit_mb = 0;
|
|
int64_t alloc_chunk_size = 512 * 1024 * 1024;
|
|
std::string runtime_id;
|
|
bool manual_commit = false;
|
|
bool stop_on_error = false;
|
|
bool punch_holes = true;
|
|
};
|
|
|
|
SingleFileStorage(SFSOptions options);
|
|
|
|
//Start with dead SFS
|
|
SingleFileStorage();
|
|
|
|
void operator=(const SingleFileStorage&) = delete;
|
|
SingleFileStorage(SingleFileStorage&) = delete;
|
|
|
|
~SingleFileStorage();
|
|
|
|
static void init_mutex();
|
|
|
|
static void handle_mmap_read_error(void* addr);
|
|
|
|
struct WritePrepareResult
|
|
{
|
|
int err;
|
|
std::vector<Ext> extents;
|
|
};
|
|
|
|
WritePrepareResult write_prepare(const std::string& fn, size_t data_size, size_t max_data_fragments);
|
|
|
|
int write_ext(const Ext& ext, const void* data, size_t data_size);
|
|
|
|
int write_finalize(const std::string& fn, const std::vector<Ext>& extents, int64_t last_modified, const std::string& md5sum,
|
|
bool no_del_old, bool is_fragment);
|
|
|
|
int write(const std::string& fn,
|
|
const char* data, size_t data_size, int64_t last_modified, const std::string& md5sum,
|
|
bool no_del_old, bool is_fragment, size_t max_data_fragments);
|
|
|
|
const static unsigned int ReadWithReadahead = 1;
|
|
const static unsigned int ReadUnsynced = 2;
|
|
const static unsigned int ReadMetaOnly = 4;
|
|
|
|
struct ReadPrepareResult
|
|
{
|
|
int err;
|
|
std::vector<Ext> extents;
|
|
int64_t total_len;
|
|
};
|
|
|
|
ReadPrepareResult read_prepare(const std::string& fn, unsigned int flags);
|
|
|
|
struct ReadExtResult
|
|
{
|
|
int err;
|
|
std::unique_ptr<folly::IOBuf> buf;
|
|
};
|
|
|
|
ReadExtResult read_ext(const Ext& ext, const unsigned int flags, const size_t bufsize, folly::IOBufQueue& buf);
|
|
|
|
int read_finalize(const std::string& fn, const std::vector<Ext>& extents, unsigned int flags);
|
|
|
|
bool del(const std::string& fn, DelAction da,
|
|
bool background_queue);
|
|
|
|
bool restore_old(const std::string& fn);
|
|
|
|
|
|
bool commit(bool background_queue, int64_t transid) {
|
|
return commit(background_queue, transid, 0);
|
|
}
|
|
|
|
bool commit(bool background_queue, int64_t transid, int64_t disk_id);
|
|
|
|
bool empty_queue(bool background_queue);
|
|
|
|
struct IterData
|
|
{
|
|
MDB_txn* iter_txn;
|
|
MDB_cursor* iter_cur;
|
|
MDB_val iter_key;
|
|
MDB_val iter_val;
|
|
};
|
|
|
|
bool iter_start(int64_t disk_id, bool compressed, IterData& iter_data);
|
|
|
|
bool iter_start(bool compressed, IterData& iter_data);
|
|
|
|
bool iter_start(std::string fn, bool compressed, IterData& iter_data);
|
|
|
|
void start_debug();
|
|
|
|
void iter_stop(IterData& iter_data);
|
|
|
|
bool iter_next(IterData& iter_data);
|
|
|
|
bool iter_curr_val(std::string& fn, int64_t& offset, int64_t& size, std::vector<SPunchItem>& exta_exts, int64_t& last_modified, std::string& md5sum, IterData& iter_data);
|
|
|
|
bool iter_curr_val(std::string& fn, std::string& data, IterData& iter_data);
|
|
|
|
virtual void operator()();
|
|
|
|
int64_t get_free_space_in_data_file();
|
|
|
|
int64_t get_free_space_real();
|
|
|
|
int64_t get_total_space();
|
|
|
|
int64_t get_data_file_size();
|
|
|
|
int64_t max_free_extent(int64_t& len);
|
|
|
|
int64_t get_free_space_slow(bool verbose, int64_t& freespace_extents, std::vector<SPunchItem>* items);
|
|
|
|
bool check_len_idx();
|
|
|
|
using str_map = std::map<std::string, std::string>;
|
|
|
|
void defrag(str_map& params, relaxed_atomic<int64_t>& defrag_items);
|
|
|
|
std::string get_db_path() { return db_path; }
|
|
|
|
std::string get_cache_path() { return freespace_cache_path; }
|
|
|
|
bool is_write_offline() { return write_offline; }
|
|
|
|
bool start_thread(int64_t transid);
|
|
|
|
int64_t get_transid() {
|
|
std::scoped_lock lock(mutex);
|
|
return curr_transid;
|
|
}
|
|
|
|
int64_t get_transid(int64_t disk_id);
|
|
|
|
std::string meminfo();
|
|
|
|
bool set_write_offline(bool b);
|
|
|
|
bool get_is_dead() { return is_dead; }
|
|
|
|
bool set_allow_defrag(bool b) {
|
|
return set_allow_defrag(b, 0);
|
|
}
|
|
|
|
bool set_allow_defrag(bool b, int64_t disk_id);
|
|
|
|
bool set_stop_defrag(bool b) { stop_defrag = b; return true; }
|
|
|
|
bool reset_del_log(int64_t disk_id, int64_t reset_transid);
|
|
|
|
bool reset_del_queue(int64_t disk_id, int64_t reset_transid);
|
|
|
|
int64_t get_disk_id(const std::string& uuid);
|
|
|
|
void migrate_thread();
|
|
|
|
bool start_migrate();
|
|
|
|
void reference();
|
|
|
|
void unreference();
|
|
|
|
static std::string decompress_filename(const std::string& fn);
|
|
|
|
static int64_t get_fn_disk_id(const std::string& fn);
|
|
|
|
static std::string remove_disk_id(const std::string& fn, size_t disk_id_size);
|
|
|
|
std::string freespace_stats();
|
|
|
|
virtual void wait_for_startup_finish();
|
|
|
|
std::string get_runtime_id() {
|
|
return runtime_id;
|
|
}
|
|
|
|
bool get_manual_commit() {
|
|
return manual_commit;
|
|
}
|
|
|
|
private:
|
|
|
|
int write_int(const std::string& fn, const char* data, size_t data_size,
|
|
int64_t last_modified, const std::string& md5sum, bool allow_defrag_lock, bool no_del_old,
|
|
size_t max_data_fragments);
|
|
|
|
int64_t remove_fn(const std::string& fn,
|
|
MDB_txn* txn, MDB_txn* freespace_txn, bool del_from_main, bool del_old, THREAD_ID tid);
|
|
|
|
int64_t restore_fn(const std::string& fn,
|
|
MDB_txn* txn, MDB_txn* freespace_txn, THREAD_ID tid);
|
|
|
|
int64_t log_fn(const std::string& fn,
|
|
MDB_txn* txn, THREAD_ID tid, int64_t transid);
|
|
|
|
int64_t add_tmp(int64_t idx, MDB_txn* txn, THREAD_ID tid, int64_t offset, int64_t len);
|
|
|
|
int64_t rm_tmp(int64_t idx, MDB_txn* txn, THREAD_ID tid);
|
|
|
|
void wait_queue(std::unique_lock<std::mutex>& lock, bool background_queue, bool defrag_check);
|
|
|
|
bool add_freemap_ext(MDB_txn* txn, int64_t offset, int64_t len, bool used_in_curr_trans, THREAD_ID tid);
|
|
|
|
bool add_freemap_ext_simple(MDB_txn* txn, int64_t offset, int64_t len, THREAD_ID tid);
|
|
|
|
bool find_freemap_ext(MDB_txn* txn, THREAD_ID tid, int64_t& start, int64_t& len);
|
|
|
|
void lock_defrag(const std::string& fn);
|
|
|
|
bool is_defrag_skip_item(const std::string& fn);
|
|
|
|
void unlock_defrag(const std::string& fn);
|
|
|
|
void wait_defrag(const std::string& fn, std::unique_lock<std::mutex>& lock);
|
|
|
|
void setup_mmap_read_error(THREAD_ID tid);
|
|
|
|
bool clear_mmap_read_error(THREAD_ID tid);
|
|
|
|
bool has_mmap_read_error_reset(THREAD_ID tid);
|
|
|
|
int64_t reset_del_log_fn(MDB_txn* txn, MDB_txn* freespace_txn, THREAD_ID tid, int64_t disk_id, int64_t transid);
|
|
|
|
int64_t reset_holes(MDB_txn* txn, MDB_txn* freespace_txn, THREAD_ID tid);
|
|
|
|
int64_t reset_del_queue(MDB_txn* txn, MDB_txn* freespace_txn, THREAD_ID tid, int64_t disk_id, int64_t transid);
|
|
|
|
void wait_startup_finished(std::unique_lock<std::mutex>& lock);
|
|
|
|
void free_extents(const std::vector<Ext>& extents);
|
|
|
|
int64_t get_really_min_space(int64_t& index_file_size);
|
|
|
|
int64_t get_burn_in_data_size();
|
|
|
|
bool do_free_minspace(MDB_txn* txn, MDB_txn* freespace_txn, THREAD_ID tid);
|
|
|
|
static std::mutex mmap_read_error_mutex;
|
|
|
|
static std::unordered_map<THREAD_ID, std::pair<bool, std::vector<uintptr_t> > > mmap_read_error_jmp;
|
|
static std::vector<MDB_env*> mmap_dbs;
|
|
|
|
std::vector<uintptr_t> mmap_cleanup_addrs;
|
|
|
|
int64_t queue_del(const std::string& fn, MDB_txn* txn, THREAD_ID tid, int64_t transid);
|
|
|
|
int64_t unqueue_del(const std::string& fn, MDB_txn* txn, THREAD_ID tid);
|
|
|
|
void add_defrag_skip_items_queue();
|
|
|
|
bool open_cache_db(int64_t current_txn_id, int64_t mapsize, bool use_other, bool del_create, MDB_txn*& freespace_txn);
|
|
|
|
bool generate_freespace_cache(MDB_txn* source_txn, MDB_txn* dst_txncs, bool fast_gen);
|
|
|
|
bool freespace_check(MDB_txn* source_txn, MDB_txn* freespace_txn, bool fast_check);
|
|
|
|
bool clear_freespace_cache(MDB_txn* txn);
|
|
|
|
bool regen_datafile_free(MDB_txn* freespace_txn);
|
|
|
|
bool regen_free_len_idx(MDB_txn* freespace_txn);
|
|
|
|
class TmpMmapedPgIds
|
|
{
|
|
public:
|
|
TmpMmapedPgIds();
|
|
|
|
~TmpMmapedPgIds();
|
|
|
|
|
|
void add_pgid(size_t pgid)
|
|
{
|
|
if (n_pgids * sizeof(pgid) + sizeof(pgid) >= mmap_size)
|
|
{
|
|
std::cerr << "pgid mmap too small" << std::endl;
|
|
abort();
|
|
}
|
|
memcpy(mmap_ptr + n_pgids * sizeof(pgid), &pgid, sizeof(pgid));
|
|
++n_pgids;
|
|
}
|
|
|
|
size_t size()
|
|
{
|
|
return n_pgids;
|
|
}
|
|
|
|
size_t* begin()
|
|
{
|
|
return reinterpret_cast<size_t*>(mmap_ptr);
|
|
}
|
|
|
|
size_t* end()
|
|
{
|
|
return reinterpret_cast<size_t*>(mmap_ptr + n_pgids * sizeof(size_t));
|
|
}
|
|
|
|
size_t& get(size_t idx)
|
|
{
|
|
return *(begin() + idx);
|
|
}
|
|
|
|
private:
|
|
folly::File backing_file;
|
|
size_t n_pgids;
|
|
size_t mmap_size;
|
|
char* mmap_ptr;
|
|
};
|
|
|
|
bool read_pgids(MDB_txn* txn, MDB_dbi dbi, THREAD_ID tid, TmpMmapedPgIds& mmap_pg_ids);
|
|
|
|
enum class FragAction
|
|
{
|
|
Add,
|
|
Del,
|
|
Commit,
|
|
FindFree,
|
|
AddNoDelOld,
|
|
DelOld,
|
|
RestoreOld,
|
|
EmptyQueue,
|
|
ReadFragInfo,
|
|
FreeExtents,
|
|
ResetDelLog,
|
|
GetDiskId,
|
|
QueueDel,
|
|
UnqueueDel,
|
|
DelWithQueued,
|
|
ResetDelQueue,
|
|
AssertDelQueueEmpty
|
|
};
|
|
|
|
struct SFragInfo;
|
|
|
|
struct SCommitInfo
|
|
{
|
|
SCommitInfo()
|
|
: commit_errors(0),
|
|
frag_info(nullptr)
|
|
{}
|
|
int64_t commit_errors;
|
|
std::condition_variable commit_done;
|
|
int64_t new_datafile_offset;
|
|
int64_t new_datafile_offset_end;
|
|
SFragInfo* frag_info;
|
|
};
|
|
|
|
struct SFragInfo
|
|
{
|
|
SFragInfo() : offset(-1), len(0),
|
|
last_modified(0), commit_info(nullptr) {
|
|
}
|
|
SFragInfo(int64_t offset, int64_t len)
|
|
: offset(offset), len(len),
|
|
last_modified(0), commit_info(nullptr) {}
|
|
|
|
FragAction action;
|
|
std::string fn;
|
|
int64_t offset;
|
|
int64_t len;
|
|
int64_t last_modified;
|
|
std::string md5sum;
|
|
SCommitInfo* commit_info;
|
|
std::vector<SPunchItem> extra_exts;
|
|
};
|
|
|
|
std::string compress_filename(const std::string& fn);
|
|
|
|
SFragInfo get_frag_info(MDB_txn* txn, const std::string& fn);
|
|
|
|
bool generate_free_len_idx(MDB_txn* txn);
|
|
|
|
int64_t get_disk_id(MDB_txn * txn, THREAD_ID tid, const std::string& uuid);
|
|
|
|
int64_t get_disk_trans_id(MDB_txn * txn, THREAD_ID tid, int64_t disk_id);
|
|
|
|
bool set_disk_trans_id(MDB_txn * txn, THREAD_ID tid, int64_t disk_id, int64_t trans_id);
|
|
|
|
bool rewrite_npages(MDB_txn* txn, MDB_cursor* mc, THREAD_ID tid, size_t npages);
|
|
|
|
int put_with_rewrite(MDB_txn* txn, MDB_dbi dbi, MDB_val* tkey, MDB_val* tval, THREAD_ID tid, size_t npages);
|
|
|
|
void add_reading_item(const SFragInfo& fi);
|
|
|
|
void remove_reading_item(const std::vector<Ext>& extents);
|
|
|
|
void do_stop_on_error();
|
|
|
|
bool with_rewrite;
|
|
|
|
std::unordered_set<std::string> defrag_skip_items;
|
|
bool is_defragging;
|
|
int defrag_restart;
|
|
std::atomic<bool> stop_defrag;
|
|
bool allow_defrag;
|
|
std::set<int64_t> disallow_defrag_disk_id;
|
|
std::deque<SFragInfo> commit_queue;
|
|
std::deque<SFragInfo> commit_background_queue;
|
|
std::thread commit_thread_h;
|
|
std::unordered_map<size_t, size_t> commit_items;
|
|
|
|
bool do_quit;
|
|
|
|
bool startup_finished;
|
|
|
|
int64_t data_file_max_size;
|
|
int64_t data_file_offset;
|
|
int64_t data_file_offset_end;
|
|
int64_t data_file_free;
|
|
std::map<int64_t, int64_t> reserved_extents;
|
|
folly::File data_file;
|
|
folly::File data_file_dio;
|
|
folly::File new_data_file;
|
|
folly::File new_data_file_dio;
|
|
MDB_env* db_env;
|
|
MDB_dbi dbi_main;
|
|
MDB_dbi dbi_free;
|
|
MDB_dbi dbi_free_len;
|
|
MDB_dbi dbi_size;
|
|
MDB_dbi dbi_old;
|
|
MDB_dbi dbi_holes;
|
|
MDB_dbi dbi_queue_del;
|
|
MDB_env* cache_db_env;
|
|
MDB_dbi dbi_cache_size;
|
|
std::set<int64_t> curr_new_free_extents;
|
|
std::set<int64_t> reading_free_skip_extents;
|
|
std::unordered_set<std::string> defrag_items;
|
|
std::vector<SPunchItem> curr_free_skip_extents;
|
|
|
|
struct ReadingItem
|
|
{
|
|
size_t refs = 0;
|
|
bool free_skip = false;
|
|
};
|
|
|
|
std::map<int64_t, ReadingItem> reading_items;
|
|
|
|
std::mutex mutex;
|
|
std::condition_variable cond;
|
|
|
|
std::mutex datafileoffset_mutex;
|
|
std::mutex freespace_mutex;
|
|
|
|
int64_t min_free_space;
|
|
|
|
std::string db_path;
|
|
|
|
std::string freespace_cache_path;
|
|
|
|
relaxed_atomic<bool> is_dead;
|
|
relaxed_atomic<bool> write_offline;
|
|
|
|
int64_t curr_transid;
|
|
|
|
bool force_freespace_check;
|
|
|
|
bool regen_freespace_cache;
|
|
bool sync_freespace_cache;
|
|
|
|
int64_t next_disk_id;
|
|
|
|
int64_t data_file_copy_done;
|
|
int64_t data_file_copy_done_sync;
|
|
int64_t data_file_copy_max;
|
|
bool stop_data_file_copy;
|
|
std::shared_mutex data_file_copy_mutex;
|
|
std::thread migrate_thread_h;
|
|
relaxed_atomic<int> references;
|
|
|
|
bool mdb_curr_sync;
|
|
|
|
std::filesystem::path data_file_path;
|
|
|
|
int64_t data_file_size_limit;
|
|
int64_t alloc_chunk_size;
|
|
|
|
std::string runtime_id;
|
|
bool manual_commit;
|
|
bool stop_on_error;
|
|
bool punch_holes;
|
|
};
|
|
|
|
|
|
class ScopedSFSRef
|
|
{
|
|
SingleFileStorage* sfs;
|
|
public:
|
|
ScopedSFSRef(SingleFileStorage* sfs)
|
|
: sfs(sfs) {
|
|
if(sfs!=nullptr)
|
|
sfs->reference();
|
|
}
|
|
|
|
void reset(SingleFileStorage* nsfs)
|
|
{
|
|
if (sfs != nullptr) sfs->unreference();
|
|
sfs = nsfs;
|
|
if (sfs != nullptr) sfs->reference();
|
|
}
|
|
|
|
~ScopedSFSRef() {
|
|
if (sfs != nullptr)
|
|
sfs->unreference();
|
|
}
|
|
};
|