Statically link DuckDB UI extension and improve integration

This commit is contained in:
Martin Raiber 2025-06-16 23:42:21 +02:00
parent e9c5958bc3
commit 07e89dafed
4 changed files with 89 additions and 26 deletions

View File

@ -25,6 +25,33 @@ set(APIGEN_SCHEMAS
SessionCheckResp # Must be last
)
# Checkout https://github.com/duckdb/duckdb-ui at revision 6cbef805772b2cc974b86ab01da3906a0d365fdb into subdir duckdb-ui, don't run CMake on it
set(DUCKDB_UI_REVISION "6cbef805772b2cc974b86ab01da3906a0d365fdb")
include(FetchContent)
FetchContent_Declare(
duckdb-ui
GIT_REPOSITORY https://github.com/duckdb/duckdb-ui.git
GIT_TAG ${DUCKDB_UI_REVISION}
SOURCE_DIR ${CMAKE_SOURCE_DIR}/duckdb-ui
CONFIGURE_COMMAND "" # Prevent running CMake on duckdb-ui
BUILD_COMMAND "" # Prevent building duckdb-ui
INSTALL_COMMAND "" # Prevent installing duckdb-ui
)
FetchContent_Populate(duckdb-ui)
set(DUCKDB_SRCS "duckdb-ui/src/event_dispatcher.cpp"
"duckdb-ui/src/http_server.cpp"
"duckdb-ui/src/settings.cpp"
"duckdb-ui/src/state.cpp"
"duckdb-ui/src/ui_extension.cpp"
"duckdb-ui/src/watcher.cpp"
"duckdb-ui/src/utils/encoding.cpp"
"duckdb-ui/src/utils/env.cpp"
"duckdb-ui/src/utils/helpers.cpp"
"duckdb-ui/src/utils/md_helpers.cpp"
"duckdb-ui/src/utils/serialization.cpp"
)
add_custom_target(apigen)
foreach(SCHEMA ${APIGEN_SCHEMAS})
@ -99,7 +126,8 @@ add_executable(hs5
src/WalFile.cpp
wwwgen/www_files.cpp
src/DuckDbFs.cpp
${SCHEMA_SOURCES})
${SCHEMA_SOURCES}
${DUCKDB_SRCS})
set_property(TARGET hs5 PROPERTY CXX_STANDARD 20)
@ -125,6 +153,9 @@ target_link_libraries(hs5 PRIVATE $<IF:$<TARGET_EXISTS:zstd::libzstd_shared>,zst
proxygen::proxygen proxygen::proxygencurl proxygen::proxygenhttpserver expat::expat unofficial-sodium::sodium SqliteCppGen::SqliteCppGen nlohmann_json::nlohmann_json fmt::fmt
$<IF:$<TARGET_EXISTS:duckdb>,duckdb,duckdb_static>)
# DuckDB UI
target_compile_definitions(hs5 PRIVATE UI_EXTENSION_SEQ_NUM="1" UI_EXTENSION_GIT_SHA="${DUCKDB_UI_REVISION}")
target_include_directories(hs5 PRIVATE ${CMAKE_SOURCE_DIR}/duckdb-ui/src/include ${CMAKE_SOURCE_DIR}/duckdb-ui/third_party/httplib)
target_compile_features(hs5 PUBLIC cxx_std_20)

View File

@ -20,6 +20,7 @@ Table of Contents
* [How the storage works](#how-the-storage-works)
* [Durability guarantees](#durability-guarantees)
* [Manual commit mode](#manual-commit-mode)
* [DuckDB](#duckdb-ui)
# About HS5
@ -116,3 +117,13 @@ The main object storage consists of (mostly) two files. One is an `index.lmdb` L
Step 5 makes sure that we notice a restart of `hs5`. In that case, we have to re-upload the two objects since they might not be flushed to disk. If the comparison at step 5 fails, you could also abort, but `objA` and `objB` might be stored in the bucket. You might want to add some periodic task that checks for such orphaned objects or keep track of potentially orphaned objects somehow and clean them up regularly.
## DuckDB UI #
HS5 integrates with DuckDB. If you run hs5 with the switch `--duckdb-ui` it'll start the DuckDB UI on port 4213 per default. You can then directly query e.g. `parquet` files in HS5 buckets with the `hs5://` scheme. Currently only reading is supported. E.g. following DuckDB query:
```sql
SELECT MIN(arrival_time - departure_time)
FROM "hs5://test/train_services.parquet"
WHERE arrival_time > departure_time;
```

View File

@ -227,6 +227,13 @@ int actionRun(std::vector<std::string> args)
TCLAP::SwitchArg manualCommitArg("", "manual-commit",
"Manual commit mode (default false)", cmd);
TCLAP::SwitchArg duckDbUi("", "duckdb-ui",
"Run DuckDB UI", cmd);
TCLAP::ValueArg<unsigned short> duckDbUiPort("", "duckdb-http-port",
"Specifies on which port DuckDB UI will run (default 4213)",
false, 4213, "port number", cmd);
std::vector<std::string> realArgs;
realArgs.push_back(args[0]);
@ -316,6 +323,13 @@ int actionRun(std::vector<std::string> args)
realArgs.push_back(toFollyLoglevel(loglevelArg.getValue()));
}
if(duckDbUi.getValue())
{
realArgs.push_back("--run_duckdb");
realArgs.push_back("--duckdb_port");
realArgs.push_back(std::to_string(duckDbUiPort.getValue()));
}
return runRealMain(realArgs);
}

View File

@ -32,6 +32,7 @@
#include "config.h"
#include "StaticHandler.h"
#include "DuckDbFs.h"
#include "ui_extension.hpp"
duckdb::DuckDB& getDuckDb()
{
@ -61,6 +62,8 @@ DEFINE_bool(punch_holes, true, "Free up space if not enough free space is left b
DEFINE_string(server_url, "serverurl", "URL of server");
DEFINE_bool(bucket_versioning, false, "Enable bucket versioning");
DEFINE_string(index_wal_path, "", "Path where to put the index WAL file. Disabled if empty");
DEFINE_bool(run_duckdb, false, "Run DuckDB UI");
DEFINE_int32(duckdb_port, 4213, "Port to listen on with DuckDB UI protocol");
namespace {
std::unique_ptr<proxygen::HTTPServer> server;
@ -219,33 +222,37 @@ int realMain(int argc, char* argv[])
server.reset();
});
XLOGF(INFO, "Starting DuckDB...");
duckdb::Connection con(getDuckDb());
auto& fs =(getDuckDb().instance)->GetFileSystem();
fs.RegisterSubSystem(duckdb::make_uniq<DuckDbFs>(sfs, FLAGS_bucket_versioning));
auto res = con.Query("LOAD '/home/urpc/duckdb-ui/build/release/extension/ui/ui.duckdb_extension'");
if(res->HasError())
if(FLAGS_run_duckdb)
{
XLOGF(ERR, "Failed to load UI extension: {}", res->GetError());
return 1;
}
else
{
XLOGF(INFO, "UI extension loaded successfully {}", res->ToString());
}
XLOGF(INFO, "Starting DuckDB...");
res = con.Query("CALL start_ui_server()");
if(res->HasError())
{
XLOGF(ERR, "Failed to start UI server: {}", res->GetError());
return 1;
}
else
{
XLOGF(INFO, "UI server started successfully {}", res->ToString());
duckdb::Connection con(getDuckDb());
auto& fs =(getDuckDb().instance)->GetFileSystem();
fs.RegisterSubSystem(duckdb::make_uniq<DuckDbFs>(sfs, FLAGS_bucket_versioning));
getDuckDb().LoadExtension<duckdb::UiExtension>();
auto res = con.Query("SET ui_local_port = "+std::to_string(FLAGS_duckdb_port));
if(res->HasError())
{
XLOGF(ERR, "Failed setting duckdb port: {}", res->GetError());
return 1;
}
res = con.Query("CALL start_ui_server()");
if(res->HasError())
{
XLOGF(ERR, "Failed to start UI server: {}", res->GetError());
return 1;
}
else
{
if(res->RowCount()==1)
XLOGF(INFO, "DuckDB UI server started successfully {}", res->GetValue(0, 0).ToString());
else
XLOGF(INFO, "DuckDB UI server started successfully {}", res->ToString());
}
}
t.join();