Skip to content

Commit

Permalink
Add gRPC support and Nlohmann JSON integration; update formatting set…
Browse files Browse the repository at this point in the history
…tings
  • Loading branch information
royshil committed Nov 25, 2024
1 parent 6cde59a commit 4f27ef8
Show file tree
Hide file tree
Showing 6 changed files with 342 additions and 2 deletions.
2 changes: 1 addition & 1 deletion .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ BreakBeforeBraces: Custom
BreakBeforeTernaryOperators: true
BreakConstructorInitializers: BeforeColon
BreakStringLiterals: false # apparently unpredictable
ColumnLimit: 80
ColumnLimit: 100
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 8
Expand Down
18 changes: 17 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,22 @@ if(ENABLE_QT)
AUTORCC ON)
endif()

target_sources(${CMAKE_PROJECT_NAME} PRIVATE src/plugin-main.c)
include(cmake/BuildGRPCpp.cmake)
include(cmake/FetchNlohmannJSON.cmake)

# run protoc to generate the grpc files
add_custom_command(
OUTPUT ${CMAKE_SOURCE_DIR}/src/cloud-providers/clova/nest.pb.cc
COMMAND ${PROTOC_EXECUTABLE} --cpp_out=${CMAKE_SOURCE_DIR}/src/cloud-providers/clova
--grpc_out=${CMAKE_SOURCE_DIR}/src/cloud-providers/clova
--plugin=protoc-gen-grpc=${GRPC_PLUGIN_EXECUTABLE}
-I ${CMAKE_SOURCE_DIR}/src/cloud-providers/clova
${CMAKE_SOURCE_DIR}/src/cloud-providers/clova/nest.proto
DEPENDS ${CMAKE_SOURCE_DIR}/src/cloud-providers/clova/nest.proto
)

target_sources(${CMAKE_PROJECT_NAME} PRIVATE src/plugin-main.c
src/cloud-providers/clova/clova-realtime.cpp
src/cloud-providers/clova/nest.pb.cc)

set_target_properties_plugin(${CMAKE_PROJECT_NAME} PROPERTIES OUTPUT_NAME ${_name})
82 changes: 82 additions & 0 deletions cmake/BuildGRPCpp.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
cmake_minimum_required(VERSION 3.14)

set(GRPC_VERSION v1.68.0)

if(WIN32)
# get the prebuilt version from https://github.com/thommyho/Cpp-gRPC-Windows-PreBuilts/releases/
include(FetchContent)

set(grpc_url "https://github.com/thommyho/Cpp-gRPC-Windows-PreBuilts/releases/download/${GRPC_VERSION}/MSVC143_64.zip")
FetchContent_Declare(
grpc
URL ${grpc_url}
DOWNLOAD_EXTRACT_TIMESTAMP 1
)
FetchContent_MakeAvailable(grpc)

# Specify include directories and link libraries for your project
set(GRPC_INCLUDE_DIR ${grpc_SOURCE_DIR}/${CMAKE_BUILD_TYPE}/include)
set(GRPC_LIB_DIR ${grpc_SOURCE_DIR}/${CMAKE_BUILD_TYPE}/lib)
set(PROTOC_EXECUTABLE ${grpc_SOURCE_DIR}/${CMAKE_BUILD_TYPE}/bin/protoc.exe)
set(GRPC_PLUGIN_EXECUTABLE ${grpc_SOURCE_DIR}/${CMAKE_BUILD_TYPE}/bin/grpc_cpp_plugin.exe)

# get all .lib files in the lib directory
file(GLOB GRPC_LIBRARIES ${GRPC_LIB_DIR}/*.lib)
set(GRPC_LIBRARIES ${GRPC_LIBRARIES} CACHE STRING "gRPC libraries")
else()
include(ExternalProject)

# Set the gRPC version you want to use

# Enable ccache if available
find_program(CCACHE_PROGRAM ccache)
if(CCACHE_PROGRAM)
message(STATUS "ccache found: ${CCACHE_PROGRAM}")
set(CMAKE_C_COMPILER_LAUNCHER ${CCACHE_PROGRAM})
set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_PROGRAM})
endif()

set(EXTRA_CMAKE_ARGS "")
set(EXTRA_CMAKE_BUILD_ARGS "")

# Define the external project for gRPC
ExternalProject_Add(
grpc
PREFIX ${CMAKE_BINARY_DIR}/grpc
GIT_REPOSITORY https://github.com/grpc/grpc.git
GIT_TAG ${GRPC_VERSION}
GIT_SHALLOW TRUE
GIT_PROGRESS TRUE
CMAKE_GENERATOR ${CMAKE_GENERATOR}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR> -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-DgRPC_BUILD_GRPC_CSHARP_PLUGIN=OFF
-DgRPC_BUILD_GRPC_NODE_PLUGIN=OFF -DgRPC_BUILD_GRPC_OBJECTIVE_C_PLUGIN=OFF
-DgRPC_BUILD_GRPC_PHP_PLUGIN=OFF -DgRPC_BUILD_GRPC_PYTHON_PLUGIN=OFF
-DgRPC_BUILD_GRPC_RUBY_PLUGIN=OFF -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF
${EXTRA_CMAKE_ARGS}
BUILD_COMMAND ${CMAKE_COMMAND} --build . --target install --config Release ${EXTRA_CMAKE_BUILD_ARGS}
INSTALL_COMMAND ${CMAKE_COMMAND} --install <INSTALL_DIR> --config ${CMAKE_BUILD_TYPE}
LOG_DOWNLOAD ON
LOG_CONFIGURE ON
LOG_BUILD ON
LOG_INSTALL ON
)

# Specify include directories and link libraries for your project
ExternalProject_Get_Property(grpc install_dir)
set(GRPC_INCLUDE_DIR ${install_dir}/include)
set(GRPC_LIB_DIR ${install_dir}/lib)
set(PROTOC_EXECUTABLE ${install_dir}/bin/protoc)
set(GRPC_PLUGIN_EXECUTABLE ${install_dir}/bin/grpc_cpp_plugin)

# get all .a files in the lib directory
file(GLOB GRPC_LIBRARIES ${GRPC_LIB_DIR}/*.a)
set(GRPC_LIBRARIES ${GRPC_LIBRARIES} CACHE STRING "gRPC libraries")
endif()

# Add include directories
target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE ${GRPC_INCLUDE_DIR})

# Link libraries
target_link_directories(${CMAKE_PROJECT_NAME} PRIVATE ${GRPC_LIB_DIR})
target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE ${GRPC_LIBRARIES})
12 changes: 12 additions & 0 deletions cmake/FetchNlohmannJSON.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
include(FetchContent)

FetchContent_Declare(
nlohmann_json
GIT_REPOSITORY https://github.com/nlohmann/json.git
GIT_TAG v3.11.3
)

FetchContent_MakeAvailable(nlohmann_json)

# Add include directories
target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE ${nlohmann_json_SOURCE_DIR}/include)
196 changes: 196 additions & 0 deletions src/cloud-providers/clova/clova-realtime.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
#include <iostream>
#include <fstream>
#include <string>
#include <memory>
#include <chrono>
#include <thread>
#include <vector>
#include <map>
#include <grpcpp/grpcpp.h>

#include "cloud-providers/clova/nest.grpc.pb.h"
#include "nlohmann/json.hpp"

using grpc::Channel;
using grpc::ClientContext;
using grpc::ClientReaderWriter;
using grpc::Status;

using NestService = com::nbp::cdncp::nest::grpc::proto::v1::NestService;
using NestRequest = com::nbp::cdncp::nest::grpc::proto::v1::NestRequest;
using NestResponse = com::nbp::cdncp::nest::grpc::proto::v1::NestResponse;
using NestConfig = com::nbp::cdncp::nest::grpc::proto::v1::NestConfig;
using NestData = com::nbp::cdncp::nest::grpc::proto::v1::NestData;
using RequestType = com::nbp::cdncp::nest::grpc::proto::v1::RequestType;
using json = nlohmann::json;

class ResponseObserver {
public:
ResponseObserver(const std::string &file_path, const std::string &source_lang,
std::map<int, std::chrono::steady_clock::time_point> &chunk_start_times)
: file_path(file_path),
source_lang(source_lang),
chunk_start_times(chunk_start_times)
{
}
void OnNext(const NestResponse &response)
{
auto end_time = std::chrono::steady_clock::now();
json json_response_data = json::parse(response.contents());
if (json_response_data.contains("transcription") &&
json_response_data["transcription"].contains("text")) {
std::string text_value = json_response_data["transcription"]["text"];
int seq_id = json_response_data["transcription"].value("seqId", -1);
if (seq_id != -1 && chunk_start_times.count(seq_id)) {
auto start_time = chunk_start_times[seq_id];
double latency =
std::chrono::duration<double>(end_time - start_time).count();
chunk_latencies[seq_id] = latency;
std::cout << "Chunk " << seq_id << " latency: " << (latency * 1000)
<< " milliseconds" << std::endl;
}
if (text_value.empty() && !sentence_sofar.empty()) {
std::cout << "Complete sentence: " << sentence_sofar << std::endl;
transcription += sentence_sofar + "\n";
sentence_sofar.clear();
} else {
sentence_sofar += text_value;
std::cout << "Partial transcription: " << text_value << std::endl;
}
}
}

void OnError(const grpc::Status &status)
{
std::cerr << "Error received: " << status.error_message() << std::endl;
}

void OnCompleted()
{
std::cout << "Stream completed" << std::endl;
std::cout << "---------- Chunk Latencies ----------" << std::endl;
for (const auto &entry : chunk_latencies) {
int seq_id = entry.first;
double latency = entry.second;
std::cout << "Chunk " << seq_id << " latency: " << (latency * 1000)
<< " milliseconds" << std::endl;
}
double avg_latency = 0;
if (!chunk_latencies.empty()) {
avg_latency = std::accumulate(chunk_latencies.begin(),
chunk_latencies.end(), 0.0,
[](double sum, const auto &p) {
return sum + p.second;
}) /
chunk_latencies.size();
}
std::cout << "Average latency: " << avg_latency << " seconds" << std::endl;
std::cout << "Average latency: " << (avg_latency * 1000) << " milliseconds"
<< std::endl;
if (!sentence_sofar.empty()) {
std::cout << sentence_sofar << std::endl;
transcription += sentence_sofar + "\n";
}
/*
std::cout << "---------- transcription ----------" << std::endl;
std::ofstream outfile(file_path + ".transcript", std::ios::out | std::ios::trunc);
if (outfile.is_open()) {
outfile << transcription;
outfile.close();
} else {
std::cerr << "Unable to open file for writing transcription" << std::endl;
}
*/
}

private:
std::string file_path;
std::string source_lang;
std::string sentence_sofar;
std::string transcription;
std::map<int, std::chrono::steady_clock::time_point> &chunk_start_times;
std::map<int, double> chunk_latencies;
};

class RequestGenerator {
public:
RequestGenerator(const std::string &file_path, const std::string &source_lang)
: file_path(file_path),
source_lang(source_lang)
{
}
void
GenerateRequests(std::function<void(const NestRequest &)> yield,
std::map<int, std::chrono::steady_clock::time_point> &chunk_start_times)
{
NestRequest config_request;
config_request.set_type(RequestType::CONFIG);
config_request.mutable_config()->set_config("{\"transcription\":{\"language\":\"" +
source_lang + "\"}}");
yield(config_request);
std::ifstream file(file_path, std::ios::binary);
int chunk_id = 1;
std::vector<char> buffer(32000);
while (file) {
file.read(buffer.data(), buffer.size());
std::streamsize bytes_read = file.gcount();
if (bytes_read > 0) {
chunk_start_times[chunk_id] = std::chrono::steady_clock::now();
NestRequest data_request;
data_request.set_type(RequestType::DATA);
data_request.mutable_data()->set_chunk(buffer.data(), bytes_read);
data_request.mutable_data()->set_extra_contents(
"{\"seqId\": " + std::to_string(chunk_id) +
", \"epFlag\": true}");
yield(data_request);
chunk_id++;
}
}
// Wait for 5 seconds before completing
std::this_thread::sleep_for(std::chrono::seconds(5));
}

private:
std::string file_path;
std::string source_lang;
};

void RunRecognition(const std::string &file_path, const std::string &source_lang)
{
grpc::SslCredentialsOptions ssl_opts;
auto channel = grpc::CreateChannel("clovaspeech-gw.ncloud.com:50051",
grpc::SslCredentials(ssl_opts));
std::unique_ptr<NestService::Stub> stub = NestService::NewStub(channel);
ClientContext context;
context.AddMetadata("authorization", "Bearer xxx");
std::map<int, std::chrono::steady_clock::time_point> chunk_start_times;
ResponseObserver response_observer(file_path, source_lang, chunk_start_times);
RequestGenerator request_generator(file_path, source_lang);
auto reader_writer = stub->recognize(&context);
std::thread writer([&]() {
request_generator.GenerateRequests(
[&](const NestRequest &request) { reader_writer->Write(request); },
chunk_start_times);
reader_writer->WritesDone();
});
NestResponse response;
while (reader_writer->Read(&response)) {
response_observer.OnNext(response);
}
writer.join();
Status status = reader_writer->Finish();
if (!status.ok()) {
response_observer.OnError(status);
}
response_observer.OnCompleted();
}

// int main(int argc, char *argv[])
// {
// if (argc != 3) {
// std::cerr << "Usage: " << argv[0] << " <file_path> <source_language>" << std::endl;
// return 1;
// }
// RunRecognition(argv[1], argv[2]);
// return 0;
// }
34 changes: 34 additions & 0 deletions src/cloud-providers/clova/nest.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
syntax = "proto3";

package com.nbp.cdncp.nest.grpc.proto.v1;

enum RequestType {
CONFIG = 0;
DATA = 1;
}

message NestConfig {
string config = 1;
}

message NestData {
bytes chunk = 1;
string extra_contents = 2;
}

message NestRequest {
RequestType type = 1;
oneof part {
NestConfig config = 2;
NestData data = 3;
}
}

message NestResponse {
string contents = 1;
}

service NestService {
rpc recognize(stream NestRequest) returns (stream NestResponse){};
}

0 comments on commit 4f27ef8

Please sign in to comment.