Skip to content

Commit

Permalink
org files, fix table_function, and add copy function
Browse files Browse the repository at this point in the history
  • Loading branch information
grammaright committed Apr 12, 2024
1 parent a83658f commit 054e320
Show file tree
Hide file tree
Showing 10 changed files with 642 additions and 187 deletions.
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 2.8.12)

# Set extension name here
set(TARGET_NAME quack)
set(TARGET_NAME array)

# DuckDB's extension distribution supports vcpkg. As such, dependencies can be added in ./vcpkg.json and then
# used in cmake with find_package. Feel free to remove or replace with other dependencies.
Expand All @@ -18,7 +18,7 @@ include_directories(src/include)
include_directories("$ENV{PREVISION_PATH}/tilestore/include")
include_directories("$ENV{PREVISION_PATH}/buffertile/include")

set(EXTENSION_SOURCES src/quack_extension.cpp)
set(EXTENSION_SOURCES src/array_extension.cpp src/read_array.cpp src/copy_array.cpp)

build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES})
build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES})
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))

# Configuration of extension
EXT_NAME=quack
EXT_NAME=array
EXT_CONFIG=${PROJ_DIR}extension_config.cmake

# Include the Makefile from extension-ci-tools
Expand Down
2 changes: 1 addition & 1 deletion extension_config.cmake
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# This file is included by DuckDB's build system. It specifies which extension to load

# Extension from this repo
duckdb_extension_load(quack
duckdb_extension_load(array
SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}
LOAD_TESTS
)
Expand Down
71 changes: 71 additions & 0 deletions src/array_extension.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#define DUCKDB_EXTENSION_MAIN

#include "duckdb.hpp"
#include "duckdb/common/exception.hpp"
#include "duckdb/common/string_util.hpp"
#include "duckdb/function/scalar_function.hpp"
#include "duckdb/main/extension_util.hpp"
#include <duckdb/parser/parsed_data/create_scalar_function_info.hpp>
#include "duckdb/function/copy_function.hpp"
#include "duckdb/parser/parsed_data/copy_info.hpp"

#include "array_extension.hpp"

extern "C"
{
#include "bf.h"

Check failure on line 16 in src/array_extension.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / MacOS (osx_amd64, x86_64, x64-osx)

'bf.h' file not found

Check failure on line 16 in src/array_extension.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / DuckDB-Wasm (wasm_mvp, wasm32-emscripten)

'bf.h' file not found

Check failure on line 16 in src/array_extension.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / DuckDB-Wasm (wasm_mvp, wasm32-emscripten)

'bf.h' file not found

Check failure on line 16 in src/array_extension.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / DuckDB-Wasm (wasm_eh, wasm32-emscripten)

'bf.h' file not found

Check failure on line 16 in src/array_extension.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / DuckDB-Wasm (wasm_eh, wasm32-emscripten)

'bf.h' file not found
}

namespace duckdb
{

static void LoadInternal(DatabaseInstance &instance)
{
}

void ArrayExtension::Load(DuckDB &db)
{
BF_Init();
BF_Attach();

// BF_Detach();
// BF_Free();
// LoadInternal(*db.instance);
// auto instance = ;
auto table_function = ArrayExtension::GetTableFunction();
// auto copy_function = ArrayExtension::GetCopyFunction();
auto res = ArrayExtension::GetCopyFunction();

ExtensionUtil::RegisterFunction(*db.instance, table_function);
ExtensionUtil::RegisterFunction(*db.instance, res);

std::cout << "ArrayExtension::Load()" << std::endl;
}
std::string ArrayExtension::Name()
{
std::cout << "ArrayExtension::Name()" << std::endl;
return "array";
}

} // namespace duckdb

extern "C"
{
DUCKDB_EXTENSION_API void quack_init(duckdb::DatabaseInstance &db)
{
std::cout << "quack_init()" << std::endl;

duckdb::DuckDB db_wrapper(db);
db_wrapper.LoadExtension<duckdb::ArrayExtension>();
}

DUCKDB_EXTENSION_API const char *quack_version()
{
std::cout << "quack_version()" << std::endl;
return duckdb::DuckDB::LibraryVersion();
}
}

#ifndef DUCKDB_EXTENSION_MAIN
#error DUCKDB_EXTENSION_MAIN not defined
#endif
283 changes: 283 additions & 0 deletions src/copy_array.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,283 @@

#include "duckdb.hpp"
#include "duckdb/common/exception.hpp"
#include "duckdb/common/string_util.hpp"
#include "duckdb/function/scalar_function.hpp"
#include "duckdb/main/extension_util.hpp"
#include <duckdb/parser/parsed_data/create_scalar_function_info.hpp>

#include "duckdb/common/bind_helpers.hpp"
#include "duckdb/common/file_system.hpp"
#include "duckdb/common/multi_file_reader.hpp"
#include "duckdb/common/serializer/memory_stream.hpp"
#include "duckdb/common/serializer/write_stream.hpp"
#include "duckdb/common/string_util.hpp"
#include "duckdb/common/types/column/column_data_collection.hpp"
#include "duckdb/common/types/string_type.hpp"
#include "duckdb/common/vector_operations/vector_operations.hpp"
#include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"

#include "duckdb/function/scalar/string_functions.hpp"
#include "duckdb/function/table/read_csv.hpp"
#include "duckdb/function/copy_function.hpp"
#include "duckdb/parser/parsed_data/copy_info.hpp"

#include <limits>

#include "array_extension.hpp"

extern "C"
{
#include "bf.h"
}

namespace duckdb
{
class WriteArrayData : public FunctionData
{
public:
WriteArrayData(string file_path, vector<uint32_t> tile_coords)
: tile_coords(tile_coords)
{
array_name = file_path.substr(0, file_path.find_last_of("."));

uint64_t **_dim_domains;
uint64_t *_tile_size;
uint64_t *_array_size_in_tile;
storage_util_get_dim_domains(array_name.c_str(), &_dim_domains, &dim_len);
storage_util_get_tile_extents(array_name.c_str(), &_tile_size, &dim_len);
storage_util_get_dcoord_lens(_dim_domains, _tile_size, dim_len, &_array_size_in_tile);

array_size_in_tile = vector<uint64_t>(_array_size_in_tile, _array_size_in_tile + dim_len);
tile_size = vector<uint64_t>(_tile_size, _tile_size + dim_len);

storage_util_free_dim_domains(&_dim_domains, dim_len);
storage_util_free_tile_extents(&_tile_size, dim_len);
storage_util_free_dcoord_lens(&_array_size_in_tile);
}

unique_ptr<FunctionData> Copy() const override;
bool Equals(const FunctionData &other) const override;

string array_name;
vector<uint32_t> tile_coords;

uint32_t dim_len;

vector<uint64_t> array_size_in_tile;
vector<uint64_t> tile_size;
};

unique_ptr<FunctionData> WriteArrayData::Copy() const
{
return nullptr;
}

bool WriteArrayData::Equals(const FunctionData &other_p) const
{
return false;
}

struct LocalWriteArrayData : public LocalFunctionData
{
};

struct GlobalWriteArrayData : public GlobalFunctionData
{
public:
GlobalWriteArrayData(ClientContext &context, FunctionData &bind_data,
const string &file_path)
{
auto &data = bind_data.Cast<WriteArrayData>();

// getbuffer
auto dcoords = make_uniq_array<uint64_t>(2);
dcoords[0] = data.tile_coords[0];
dcoords[1] = data.tile_coords[1];

auto arrname = data.array_name.c_str();
// why allocate newly?
arrname_char = new char[1024];
strcpy(arrname_char, arrname);

// TODO: Consider sparse tile in the future
key = {arrname_char, "a", dcoords.get(), data.dim_len, BF_EMPTYTILE_DENSE};

PFpage *page;
BF_GetBuf(key, &page);

buf_size = page->pagebuf_len / sizeof(double);
buf = (double *)bf_util_get_pagebuf(page);
}

~GlobalWriteArrayData()
{
// FIXME: why double free?
// BF_UnpinBuf(key);
// delete arrname_char;
}

void unpin()
{
BF_UnpinBuf(key);
delete arrname_char;
}

uint64_t buf_size;
double *buf;

uint64_t cur_idx = 0;

private:
array_key key;
char *arrname_char;

// vector<uint64_t> current_coords_in_tile;
// bool finished;
};

static unique_ptr<FunctionData>
WriteArrayBind(ClientContext &context, CopyFunctionBindInput &input,
const vector<string> &names, const vector<LogicalType> &sql_types)
{
uint32_t x, y;

// check all the options in the copy info
for (auto &option : input.info.options)
{
if (option.first == "COORD_X")
{
auto incoords = option.second;
for (auto incoord : incoords)
{
auto val = incoord.GetValue<uint32_t>();
x = val;
break; // I don't know why it gives a vector
}
}
else if (option.first == "COORD_Y")
{
auto incoords = option.second;
for (auto incoord : incoords)
{
auto val = incoord.GetValue<uint32_t>();
y = val;
break; // I don't know why it gives a vector
}
}
}

auto file_path = input.info.file_path;
vector<uint32_t> tile_coords = {x, y};
auto bind_data = make_uniq<WriteArrayData>(file_path, tile_coords);
return std::move(bind_data);
}

static unique_ptr<LocalFunctionData> WriteArrayInitializeLocal(ExecutionContext &context, FunctionData &bind_data)
{
return std::move(make_uniq<LocalWriteArrayData>());
}

static unique_ptr<GlobalFunctionData> WriteArrayInitializeGlobal(ClientContext &context, FunctionData &bind_data,
const string &file_path)
{
return std::move(make_uniq<GlobalWriteArrayData>(context, bind_data, file_path));
}

static void WriteArraySink(ExecutionContext &context, FunctionData &bind_data, GlobalFunctionData &gstate,
LocalFunctionData &lstate, DataChunk &input)
{
// NOTE: I assume that only one thread runs
auto array_gstate = gstate.Cast<GlobalWriteArrayData>();

// We don't know what vector type DuckDB will give
// So we need to convert it to unified vector format
// vector type ref: https://youtu.be/bZOvAKGkzpQ?si=ShnWtUDKNIm7ymo8&t=1265
input.data[0].Flatten(input.size()); // FIXME: Maybe performance panalty. exploit the vector type
auto vector = FlatVector::GetData<double>(input.data[0]);

D_ASSERT(array_gstate.cur_idx + input.size() <= array_gstate.buf_size);

for (idx_t i = 0; i < input.size(); i++)
{
array_gstate.cur_idx++;
array_gstate.buf[array_gstate.cur_idx] = vector[i];
}
}

//===--------------------------------------------------------------------===//
// Combine
//===--------------------------------------------------------------------===//
static void WriteArrayCombine(ExecutionContext &context, FunctionData &bind_data, GlobalFunctionData &gstate,
LocalFunctionData &lstate)
{
}

//===--------------------------------------------------------------------===//
// Finalize
//===--------------------------------------------------------------------===//
void WriteArrayFinalize(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate)
{
auto array_gstate = gstate.Cast<GlobalWriteArrayData>();
array_gstate.unpin();
}

//===--------------------------------------------------------------------===//
// Execution Mode
//===--------------------------------------------------------------------===//
CopyFunctionExecutionMode WriteArrayExecutionMode(bool preserve_insertion_order, bool supports_batch_index)
{
return CopyFunctionExecutionMode::REGULAR_COPY_TO_FILE;
}
//===--------------------------------------------------------------------===//
// Prepare Batch
//===--------------------------------------------------------------------===//
struct WriteArrayBatchData : public PreparedBatchData
{
};

unique_ptr<PreparedBatchData> WriteArrayPrepareBatch(ClientContext &context, FunctionData &bind_data,
GlobalFunctionData &gstate,
unique_ptr<ColumnDataCollection> collection)
{
return std::move(make_uniq<WriteArrayBatchData>());
}

//===--------------------------------------------------------------------===//
// Flush Batch
//===--------------------------------------------------------------------===//
void WriteArrayFlushBatch(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate,
PreparedBatchData &batch)
{
}

idx_t WriteArrayFileSize(GlobalFunctionData &gstate)
{
return 1;
}

CopyFunction ArrayExtension::GetCopyFunction()
{
CopyFunction info("tilestore");
info.copy_to_bind = WriteArrayBind;
info.copy_to_initialize_local = WriteArrayInitializeLocal;
info.copy_to_initialize_global = WriteArrayInitializeGlobal;
info.copy_to_sink = WriteArraySink;
info.copy_to_combine = WriteArrayCombine;
info.copy_to_finalize = WriteArrayFinalize;
info.execution_mode = WriteArrayExecutionMode;
// info.prepare_batch = WriteArrayPrepareBatch;
// info.flush_batch = WriteArrayFlushBatch;
info.file_size_bytes = WriteArrayFileSize;

info.plan = nullptr;

info.copy_from_bind = nullptr;
info.copy_from_function = ArrayExtension::GetTableFunction();

info.extension = "tilestore";

return info;
}

}
Loading

0 comments on commit 054e320

Please sign in to comment.