Skip to content

Commit

Permalink
chore: multiple files
Browse files Browse the repository at this point in the history
  • Loading branch information
hrbrmstr committed Aug 25, 2024
1 parent 8f24ee1 commit efb8537
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 16 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ This repository is based on https://github.com/duckdb/extension-template, check

---

This extension, Ppcap, allow you to read pcap files (well _one_ file for now).
This extension, `ppcap`, allow you to read pcap files.

I need to figure out how to have it be just `pcap` but enough symbols collide with `libpcap` that it may take me a bit to figure that out.

Expand All @@ -14,7 +14,7 @@ So far, this is what you get:
(
./build/release/duckdb --json <<EOF
FROM
read_pcap('scans.pcap')
read_pcap('*.pcap')
WHERE
is_http(payload)
LIMIT 1
Expand Down
70 changes: 56 additions & 14 deletions src/ppcap_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
// OpenSSL linked through vcpkg
#include <openssl/opensslv.h>

#include <glob.h>

#include <arpa/inet.h>
#include <netinet/if_ether.h>
#include <netinet/ip.h>
Expand All @@ -22,14 +24,16 @@
#include <cctype>
#include <iomanip>
#include <sstream>
#include <vector>

namespace duckdb {

struct PCAPData : public TableFunctionData {
vector<string> filenames;
size_t current_file_index;
pcap_t *handle;
string filename;

PCAPData(string filename) : filename(filename), handle(nullptr) {}
PCAPData() : current_file_index(0), handle(nullptr) {}

~PCAPData() {
if (handle) {
Expand Down Expand Up @@ -132,10 +136,30 @@ static void PCAPReaderFunction(ClientContext &context,

idx_t index = 0;
while (index < STANDARD_VECTOR_SIZE) {
result = pcap_next_ex(pcap_data.handle, &header, &packet);
if (!pcap_data.handle) {
// If we don't have an open file, try to open the next one
if (pcap_data.current_file_index >= pcap_data.filenames.size()) {
// No more files to process
break;
}
char errbuf[PCAP_ERRBUF_SIZE];
pcap_data.handle = pcap_open_offline(
pcap_data.filenames[pcap_data.current_file_index].c_str(), errbuf);
if (pcap_data.handle == nullptr) {
throw std::runtime_error("Failed to open PCAP file: " + string(errbuf));
}
pcap_data.current_file_index++;
}

struct pcap_pkthdr *header;
const u_char *packet;
int result = pcap_next_ex(pcap_data.handle, &header, &packet);

if (result == -2) {
// End of file
break;
// End of file, close current file and move to next
pcap_close(pcap_data.handle);
pcap_data.handle = nullptr;
continue;
} else if (result == -1) {
throw std::runtime_error(pcap_geterr(pcap_data.handle));
} else if (result == 0) {
Expand Down Expand Up @@ -231,6 +255,32 @@ static void PCAPReaderFunction(ClientContext &context,
static unique_ptr<FunctionData>
PCAPReaderBind(ClientContext &context, TableFunctionBindInput &input,
vector<LogicalType> &return_types, vector<string> &names) {
auto result = make_uniq<PCAPData>();

if (input.inputs[0].type().id() == LogicalTypeId::VARCHAR) {
// Single input: could be a filename or a wildcard
string file_pattern = input.inputs[0].GetValue<string>();
glob_t glob_result;
glob(file_pattern.c_str(), GLOB_TILDE, NULL, &glob_result);
for (unsigned int i = 0; i < glob_result.gl_pathc; ++i) {
result->filenames.push_back(string(glob_result.gl_pathv[i]));
}
globfree(&glob_result);
} else if (input.inputs[0].type().id() == LogicalTypeId::LIST) {
// List input: multiple filenames
auto file_list = ListValue::GetChildren(input.inputs[0]);
for (const auto &file : file_list) {
result->filenames.push_back(file.GetValue<string>());
}
} else {
throw InvalidInputException("Input must be either a string (filename or "
"wildcard) or a list of strings (filenames)");
}

if (result->filenames.empty()) {
throw InvalidInputException("No files found matching the input pattern");
}

return_types = {
LogicalType::TIMESTAMP, LogicalType::VARCHAR,
LogicalType::VARCHAR, LogicalType::INTEGER,
Expand All @@ -242,19 +292,11 @@ PCAPReaderBind(ClientContext &context, TableFunctionBindInput &input,
"dest_port", "length", "tcp_session", "source_mac",
"dest_mac", "protocols", "payload"};

auto result = make_uniq<PCAPData>(input.inputs[0].GetValue<string>());
char errbuf[PCAP_ERRBUF_SIZE];
result->handle = pcap_open_offline(result->filename.c_str(), errbuf);
if (result->handle == nullptr) {
throw std::runtime_error(errbuf);
}

return result;
}

static void LoadInternal(DatabaseInstance &instance) {
TableFunction pcap_reader("read_pcap", {LogicalType::VARCHAR},
PCAPReaderFunction, PCAPReaderBind);
TableFunction pcap_reader("read_pcap", {LogicalType::ANY}, PCAPReaderFunction, PCAPReaderBind);
ExtensionUtil::RegisterFunction(instance, pcap_reader);
ScalarFunction is_http_func("is_http", {LogicalType::BLOB},
LogicalType::BOOLEAN, IsHTTPFunction);
Expand Down

0 comments on commit efb8537

Please sign in to comment.