diff --git a/CMakeLists.txt b/CMakeLists.txt index 37e8443..0a2dee6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,6 +25,33 @@ set(APIGEN_SCHEMAS SessionCheckResp # Must be last ) +# Checkout https://github.com/duckdb/duckdb-ui at revision 6cbef805772b2cc974b86ab01da3906a0d365fdb into subdir duckdb-ui, don't run CMake on it +set(DUCKDB_UI_REVISION "6cbef805772b2cc974b86ab01da3906a0d365fdb") +include(FetchContent) +FetchContent_Declare( + duckdb-ui + GIT_REPOSITORY https://github.com/duckdb/duckdb-ui.git + GIT_TAG ${DUCKDB_UI_REVISION} + SOURCE_DIR ${CMAKE_SOURCE_DIR}/duckdb-ui + CONFIGURE_COMMAND "" # Prevent running CMake on duckdb-ui + BUILD_COMMAND "" # Prevent building duckdb-ui + INSTALL_COMMAND "" # Prevent installing duckdb-ui +) +FetchContent_Populate(duckdb-ui) + + +set(DUCKDB_SRCS "duckdb-ui/src/event_dispatcher.cpp" + "duckdb-ui/src/http_server.cpp" + "duckdb-ui/src/settings.cpp" + "duckdb-ui/src/state.cpp" + "duckdb-ui/src/ui_extension.cpp" + "duckdb-ui/src/watcher.cpp" + "duckdb-ui/src/utils/encoding.cpp" + "duckdb-ui/src/utils/env.cpp" + "duckdb-ui/src/utils/helpers.cpp" + "duckdb-ui/src/utils/md_helpers.cpp" + "duckdb-ui/src/utils/serialization.cpp" +) add_custom_target(apigen) foreach(SCHEMA ${APIGEN_SCHEMAS}) @@ -99,7 +126,8 @@ add_executable(hs5 src/WalFile.cpp wwwgen/www_files.cpp src/DuckDbFs.cpp - ${SCHEMA_SOURCES}) + ${SCHEMA_SOURCES} + ${DUCKDB_SRCS}) set_property(TARGET hs5 PROPERTY CXX_STANDARD 20) @@ -125,6 +153,9 @@ target_link_libraries(hs5 PRIVATE $,zst proxygen::proxygen proxygen::proxygencurl proxygen::proxygenhttpserver expat::expat unofficial-sodium::sodium SqliteCppGen::SqliteCppGen nlohmann_json::nlohmann_json fmt::fmt $,duckdb,duckdb_static>) +# DuckDB UI +target_compile_definitions(hs5 PRIVATE UI_EXTENSION_SEQ_NUM="1" UI_EXTENSION_GIT_SHA="${DUCKDB_UI_REVISION}") +target_include_directories(hs5 PRIVATE ${CMAKE_SOURCE_DIR}/duckdb-ui/src/include ${CMAKE_SOURCE_DIR}/duckdb-ui/third_party/httplib) target_compile_features(hs5 PUBLIC cxx_std_20) diff --git a/README.md b/README.md index c740b52..0d62112 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,7 @@ Table of Contents * [How the storage works](#how-the-storage-works) * [Durability guarantees](#durability-guarantees) * [Manual commit mode](#manual-commit-mode) + * [DuckDB](#duckdb-ui) # About HS5 @@ -116,3 +117,13 @@ The main object storage consists of (mostly) two files. One is an `index.lmdb` L Step 5 makes sure that we notice a restart of `hs5`. In that case, we have to re-upload the two objects since they might not be flushed to disk. If the comparison at step 5 fails, you could also abort, but `objA` and `objB` might be stored in the bucket. You might want to add some periodic task that checks for such orphaned objects or keep track of potentially orphaned objects somehow and clean them up regularly. +## DuckDB UI # + +HS5 integrates with DuckDB. If you run hs5 with the switch `--duckdb-ui` it'll start the DuckDB UI on port 4213 per default. You can then directly query e.g. `parquet` files in HS5 buckets with the `hs5://` scheme. Currently only reading is supported. E.g. following DuckDB query: + +```sql +SELECT MIN(arrival_time - departure_time) +FROM "hs5://test/train_services.parquet" + WHERE arrival_time > departure_time; +``` + diff --git a/src/cmd.cpp b/src/cmd.cpp index 846055c..0af24ec 100644 --- a/src/cmd.cpp +++ b/src/cmd.cpp @@ -227,6 +227,13 @@ int actionRun(std::vector args) TCLAP::SwitchArg manualCommitArg("", "manual-commit", "Manual commit mode (default false)", cmd); + TCLAP::SwitchArg duckDbUi("", "duckdb-ui", + "Run DuckDB UI", cmd); + + TCLAP::ValueArg duckDbUiPort("", "duckdb-http-port", + "Specifies on which port DuckDB UI will run (default 4213)", + false, 4213, "port number", cmd); + std::vector realArgs; realArgs.push_back(args[0]); @@ -316,6 +323,13 @@ int actionRun(std::vector args) realArgs.push_back(toFollyLoglevel(loglevelArg.getValue())); } + if(duckDbUi.getValue()) + { + realArgs.push_back("--run_duckdb"); + realArgs.push_back("--duckdb_port"); + realArgs.push_back(std::to_string(duckDbUiPort.getValue())); + } + return runRealMain(realArgs); } diff --git a/src/main.cpp b/src/main.cpp index 1de1f73..7629abb 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -32,6 +32,7 @@ #include "config.h" #include "StaticHandler.h" #include "DuckDbFs.h" +#include "ui_extension.hpp" duckdb::DuckDB& getDuckDb() { @@ -61,6 +62,8 @@ DEFINE_bool(punch_holes, true, "Free up space if not enough free space is left b DEFINE_string(server_url, "serverurl", "URL of server"); DEFINE_bool(bucket_versioning, false, "Enable bucket versioning"); DEFINE_string(index_wal_path, "", "Path where to put the index WAL file. Disabled if empty"); +DEFINE_bool(run_duckdb, false, "Run DuckDB UI"); +DEFINE_int32(duckdb_port, 4213, "Port to listen on with DuckDB UI protocol"); namespace { std::unique_ptr server; @@ -219,33 +222,37 @@ int realMain(int argc, char* argv[]) server.reset(); }); - XLOGF(INFO, "Starting DuckDB..."); - - duckdb::Connection con(getDuckDb()); - - auto& fs =(getDuckDb().instance)->GetFileSystem(); - fs.RegisterSubSystem(duckdb::make_uniq(sfs, FLAGS_bucket_versioning)); - - auto res = con.Query("LOAD '/home/urpc/duckdb-ui/build/release/extension/ui/ui.duckdb_extension'"); - if(res->HasError()) + if(FLAGS_run_duckdb) { - XLOGF(ERR, "Failed to load UI extension: {}", res->GetError()); - return 1; - } - else - { - XLOGF(INFO, "UI extension loaded successfully {}", res->ToString()); - } + XLOGF(INFO, "Starting DuckDB..."); - res = con.Query("CALL start_ui_server()"); - if(res->HasError()) - { - XLOGF(ERR, "Failed to start UI server: {}", res->GetError()); - return 1; - } - else - { - XLOGF(INFO, "UI server started successfully {}", res->ToString()); + duckdb::Connection con(getDuckDb()); + + auto& fs =(getDuckDb().instance)->GetFileSystem(); + fs.RegisterSubSystem(duckdb::make_uniq(sfs, FLAGS_bucket_versioning)); + + getDuckDb().LoadExtension(); + + auto res = con.Query("SET ui_local_port = "+std::to_string(FLAGS_duckdb_port)); + if(res->HasError()) + { + XLOGF(ERR, "Failed setting duckdb port: {}", res->GetError()); + return 1; + } + + res = con.Query("CALL start_ui_server()"); + if(res->HasError()) + { + XLOGF(ERR, "Failed to start UI server: {}", res->GetError()); + return 1; + } + else + { + if(res->RowCount()==1) + XLOGF(INFO, "DuckDB UI server started successfully {}", res->GetValue(0, 0).ToString()); + else + XLOGF(INFO, "DuckDB UI server started successfully {}", res->ToString()); + } } t.join();