From 5977b30b3cb0a5ff5f07bf54038a9e68cf963a92 Mon Sep 17 00:00:00 2001 From: Takatoshi Kondo Date: Sat, 23 Dec 2023 23:21:55 +0900 Subject: [PATCH] Added UTF-8 checking. --- include/async_mqtt/packet/property.hpp | 11 ++-- include/async_mqtt/packet/v3_1_1_connect.hpp | 57 ++++++++++++------- include/async_mqtt/packet/v3_1_1_publish.hpp | 23 ++++++-- .../async_mqtt/packet/v3_1_1_subscribe.hpp | 18 +++++- .../async_mqtt/packet/v3_1_1_unsubscribe.hpp | 17 +++++- include/async_mqtt/packet/v5_connect.hpp | 57 ++++++++++++------- include/async_mqtt/packet/v5_publish.hpp | 22 +++++-- include/async_mqtt/packet/v5_subscribe.hpp | 18 +++++- include/async_mqtt/packet/v5_unsubscribe.hpp | 16 +++++- include/async_mqtt/util/utf8validate.hpp | 6 +- 10 files changed, 177 insertions(+), 68 deletions(-) diff --git a/include/async_mqtt/packet/property.hpp b/include/async_mqtt/packet/property.hpp index 22e6761f5..06ed47c42 100644 --- a/include/async_mqtt/packet/property.hpp +++ b/include/async_mqtt/packet/property.hpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -215,10 +216,12 @@ struct binary_property : private boost::totally_ordered { struct string_property : binary_property { string_property(property::id id, buffer buf) :binary_property{id, force_move(buf)} { -#if 0 // TBD - auto r = utf8string::validate_contents(this->val()); - if (r != utf8string::validation::well_formed) throw utf8string_contents_error(r); -#endif + if (!utf8string_check(this->val())) { + throw make_error( + errc::bad_message, + "string property invalid utf8" + ); + } } }; diff --git a/include/async_mqtt/packet/v3_1_1_connect.hpp b/include/async_mqtt/packet/v3_1_1_connect.hpp index 836b265c3..e0337dbc6 100644 --- a/include/async_mqtt/packet/v3_1_1_connect.hpp +++ b/include/async_mqtt/packet/v3_1_1_connect.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -136,14 +137,21 @@ class connect_packet { endian_store(keep_alive_sec, keep_alive_buf_.data()); endian_store(boost::numeric_cast(client_id_.size()), client_id_length_buf_.data()); -#if 0 // TBD - utf8string_check(client_id_); -#endif + if (!utf8string_check(client_id_)) { + throw make_error( + errc::bad_message, + "v3_1_1::connect_packet client_id invalid utf8" + ); + } + if (clean_session) connect_flags_ |= connect_flags::mask_clean_session; if (user_name) { -#if 0 // TBD - utf8string_check(*user_name); -#endif + if (!utf8string_check(*user_name)) { + throw make_error( + errc::bad_message, + "v3_1_1::connect_packet user name invalid utf8" + ); + } connect_flags_ |= connect_flags::mask_user_name_flag; user_name_ = force_move(*user_name); user_name_length_buf_ = endian_static_vector(boost::numeric_cast(user_name_.size())); @@ -159,10 +167,12 @@ class connect_packet { connect_flags_ |= connect_flags::mask_will_flag; if (w->get_retain() == pub::retain::yes) connect_flags_ |= connect_flags::mask_will_retain; connect_flags::set_will_qos(connect_flags_, w->get_qos()); - -#if 0 // TBD - utf8string_check(w->topic()); -#endif + if (!utf8string_check(w->topic())) { + throw make_error( + errc::bad_message, + "v3_1_1::connect_packet will topic invalid utf8" + ); + } will_topic_ = force_move(w->topic()); will_topic_length_buf_ = endian_static_vector(boost::numeric_cast(will_topic_.size())); if (w->message().size() > 0xffffL) { @@ -270,9 +280,12 @@ class connect_packet { ); } client_id_ = buf.substr(0, client_id_length); -#if 0 // TBD - utf8string_check(client_id_); -#endif + if (!utf8string_check(client_id_)) { + throw make_error( + errc::bad_message, + "v3_1_1::connect_packet client_id invalid utf8" + ); + } buf.remove_prefix(client_id_length); // will @@ -303,9 +316,12 @@ class connect_packet { ); } will_topic_ = buf.substr(0, will_topic_length); -#if 0 // TBD - utf8string_check(will_topic_); -#endif + if (!utf8string_check(will_topic_)) { + throw make_error( + errc::bad_message, + "v3_1_1::connect_packet will topic invalid utf8" + ); + } buf.remove_prefix(will_topic_length); // will_message_length @@ -362,9 +378,12 @@ class connect_packet { ); } user_name_ = buf.substr(0, user_name_length); -#if 0 // TBD - utf8string_check(user_name_); -#endif + if (!utf8string_check(user_name_)) { + throw make_error( + errc::bad_message, + "v3_1_1::connect_packet user name invalid utf8" + ); + } buf.remove_prefix(user_name_length); } diff --git a/include/async_mqtt/packet/v3_1_1_publish.hpp b/include/async_mqtt/packet/v3_1_1_publish.hpp index 7da2a3c66..21d49eff9 100644 --- a/include/async_mqtt/packet/v3_1_1_publish.hpp +++ b/include/async_mqtt/packet/v3_1_1_publish.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -104,9 +105,14 @@ class basic_publish_packet { remaining_length_ += payload.size(); payloads_.push_back(payload); } -#if 0 // TBD - utf8string_check(topic_name_); -#endif + + if (!utf8string_check(topic_name_)) { + throw make_error( + errc::bad_message, + "v3_1_1::publish_packet topic name invalid utf8" + ); + } + auto rb = val_to_variable_bytes(boost::numeric_cast(remaining_length_)); for (auto e : rb) { remaining_length_buf_.push_back(e); @@ -215,9 +221,14 @@ class basic_publish_packet { ); } topic_name_ = buf.substr(0, topic_name_length); -#if 0 // TBD - utf8string_check(topic_name_); -#endif + + if (!utf8string_check(topic_name_)) { + throw make_error( + errc::bad_message, + "v3_1_1::publish_packet topic name invalid utf8" + ); + } + buf.remove_prefix(topic_name_length); // packet_id diff --git a/include/async_mqtt/packet/v3_1_1_subscribe.hpp b/include/async_mqtt/packet/v3_1_1_subscribe.hpp index 9afd7f591..6425f1c91 100644 --- a/include/async_mqtt/packet/v3_1_1_subscribe.hpp +++ b/include/async_mqtt/packet/v3_1_1_subscribe.hpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -96,9 +97,12 @@ class basic_subscribe_packet { size + // topic filter 1; // opts -#if 0 // TBD - utf8string_check(e.all_topic()); -#endif + if (!utf8string_check(e.all_topic())) { + throw make_error( + errc::bad_message, + "v3_1_1::subscribe_packet topic filter invalid utf8" + ); + } } remaining_length_buf_ = val_to_variable_bytes(boost::numeric_cast(remaining_length_)); @@ -165,6 +169,14 @@ class basic_subscribe_packet { ); } auto topic = buf.substr(0, topic_length); + + if (!utf8string_check(topic)) { + throw make_error( + errc::bad_message, + "v3_1_1::subscribe_packet topic filter invalid utf8" + ); + } + buf.remove_prefix(topic_length); // opts diff --git a/include/async_mqtt/packet/v3_1_1_unsubscribe.hpp b/include/async_mqtt/packet/v3_1_1_unsubscribe.hpp index 1c5cf09a0..6c8abeb9e 100644 --- a/include/async_mqtt/packet/v3_1_1_unsubscribe.hpp +++ b/include/async_mqtt/packet/v3_1_1_unsubscribe.hpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -75,9 +76,13 @@ class basic_unsubscribe_packet { remaining_length_ += 2 + // topic filter length size; // topic filter -#if 0 // TBD - utf8string_check(e.all_topic()); -#endif + + if (!utf8string_check(e.all_topic())) { + throw make_error( + errc::bad_message, + "v3_1_1::unsubscribe_packet topic filter invalid utf8" + ); + } } remaining_length_buf_ = val_to_variable_bytes(boost::numeric_cast(remaining_length_)); @@ -144,6 +149,12 @@ class basic_unsubscribe_packet { ); } auto topic = buf.substr(0, topic_length); + if (!utf8string_check(topic)) { + throw make_error( + errc::bad_message, + "v3_1_1::unsubscribe_packet topic filter invalid utf8" + ); + } entries_.emplace_back(force_move(topic)); buf.remove_prefix(topic_length); } diff --git a/include/async_mqtt/packet/v5_connect.hpp b/include/async_mqtt/packet/v5_connect.hpp index d397a7033..5ecdf0cee 100644 --- a/include/async_mqtt/packet/v5_connect.hpp +++ b/include/async_mqtt/packet/v5_connect.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -144,14 +145,21 @@ class connect_packet { endian_store(keep_alive_sec, keep_alive_buf_.data()); endian_store(boost::numeric_cast(client_id_.size()), client_id_length_buf_.data()); -#if 0 // TBD - utf8string_check(client_id_); -#endif + if (!utf8string_check(client_id_)) { + throw make_error( + errc::bad_message, + "v5::connect_packet client_id invalid utf8" + ); + } + if (clean_start) connect_flags_ |= connect_flags::mask_clean_start; if (user_name) { -#if 0 // TBD - utf8string_check(*user_name); -#endif + if (!utf8string_check(*user_name)) { + throw make_error( + errc::bad_message, + "v5::connect_packet user name invalid utf8" + ); + } connect_flags_ |= connect_flags::mask_user_name_flag; user_name_ = force_move(*user_name); user_name_length_buf_ = endian_static_vector(boost::numeric_cast(user_name_.size())); @@ -185,10 +193,12 @@ class connect_packet { connect_flags_ |= connect_flags::mask_will_flag; if (w->get_retain() == pub::retain::yes) connect_flags_ |= connect_flags::mask_will_retain; connect_flags::set_will_qos(connect_flags_, w->get_qos()); - -#if 0 // TBD - utf8string_check(w->topic()); -#endif + if (!utf8string_check(w->topic())) { + throw make_error( + errc::bad_message, + "v5::connect_packet will topic invalid utf8" + ); + } will_topic_ = force_move(w->topic()); will_topic_length_buf_ = endian_static_vector(boost::numeric_cast(will_topic_.size())); if (w->message().size() > 0xffffL) { @@ -338,9 +348,12 @@ class connect_packet { ); } client_id_ = buf.substr(0, client_id_length); -#if 0 // TBD - utf8string_check(client_id_); -#endif + if (!utf8string_check(client_id_)) { + throw make_error( + errc::bad_message, + "v5::connect_packet client_id invalid utf8" + ); + } buf.remove_prefix(client_id_length); // will @@ -396,9 +409,12 @@ class connect_packet { ); } will_topic_ = buf.substr(0, will_topic_length); -#if 0 // TBD - utf8string_check(will_topic_); -#endif + if (!utf8string_check(will_topic_)) { + throw make_error( + errc::bad_message, + "v5::connect_packet will topic invalid utf8" + ); + } buf.remove_prefix(will_topic_length); // will_message_length @@ -455,9 +471,12 @@ class connect_packet { ); } user_name_ = buf.substr(0, user_name_length); -#if 0 // TBD - utf8string_check(user_name_); -#endif + if (!utf8string_check(user_name_)) { + throw make_error( + errc::bad_message, + "v5::connect_packet user name invalid utf8" + ); + } buf.remove_prefix(user_name_length); } diff --git a/include/async_mqtt/packet/v5_publish.hpp b/include/async_mqtt/packet/v5_publish.hpp index 62c60909a..def6ac012 100644 --- a/include/async_mqtt/packet/v5_publish.hpp +++ b/include/async_mqtt/packet/v5_publish.hpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -80,9 +81,13 @@ class basic_publish_packet { remaining_length_ += payload.size(); payloads_.push_back(payload); } -#if 0 // TBD - utf8string_check(topic_name_); -#endif + + if (!utf8string_check(topic_name_)) { + throw make_error( + errc::bad_message, + "v5::publish_packet topic name invalid utf8" + ); + } auto pb = val_to_variable_bytes(boost::numeric_cast(property_length_)); for (auto e : pb) { @@ -179,9 +184,14 @@ class basic_publish_packet { ); } topic_name_ = buf.substr(0, topic_name_length); -#if 0 // TBD - utf8string_check(topic_name_); -#endif + + if (!utf8string_check(topic_name_)) { + throw make_error( + errc::bad_message, + "v5::publish_packet topic name invalid utf8" + ); + } + buf.remove_prefix(topic_name_length); // packet_id diff --git a/include/async_mqtt/packet/v5_subscribe.hpp b/include/async_mqtt/packet/v5_subscribe.hpp index 6dd8f44d5..510768862 100644 --- a/include/async_mqtt/packet/v5_subscribe.hpp +++ b/include/async_mqtt/packet/v5_subscribe.hpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -132,9 +133,12 @@ class basic_subscribe_packet { size + // topic filter 1; // opts -#if 0 // TBD - utf8string_check(e.all_topic()); -#endif + if (!utf8string_check(e.all_topic())) { + throw make_error( + errc::bad_message, + "v5::subscribe_packet topic filter invalid utf8" + ); + } } remaining_length_buf_ = val_to_variable_bytes(boost::numeric_cast(remaining_length_)); @@ -224,6 +228,14 @@ class basic_subscribe_packet { ); } auto topic = buf.substr(0, topic_length); + + if (!utf8string_check(topic)) { + throw make_error( + errc::bad_message, + "v5::subscribe_packet topic filter invalid utf8" + ); + } + buf.remove_prefix(topic_length); // opts diff --git a/include/async_mqtt/packet/v5_unsubscribe.hpp b/include/async_mqtt/packet/v5_unsubscribe.hpp index 92671153f..d23280681 100644 --- a/include/async_mqtt/packet/v5_unsubscribe.hpp +++ b/include/async_mqtt/packet/v5_unsubscribe.hpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -99,9 +100,12 @@ class basic_unsubscribe_packet { 2 + // topic filter length size; // topic filter -#if 0 // TBD - utf8string_check(e.all_topic()); -#endif + if (!utf8string_check(e.all_topic())) { + throw make_error( + errc::bad_message, + "v5::unsubscribe_packet topic filter invalid utf8" + ); + } } remaining_length_buf_ = val_to_variable_bytes(boost::numeric_cast(remaining_length_)); @@ -191,6 +195,12 @@ class basic_unsubscribe_packet { ); } auto topic = buf.substr(0, topic_length); + if (!utf8string_check(topic)) { + throw make_error( + errc::bad_message, + "v5::unsubscribe_packet topic filter invalid utf8" + ); + } buf.remove_prefix(topic_length); entries_.emplace_back(force_move(topic)); } diff --git a/include/async_mqtt/util/utf8validate.hpp b/include/async_mqtt/util/utf8validate.hpp index 2ae559dac..e6b08557b 100644 --- a/include/async_mqtt/util/utf8validate.hpp +++ b/include/async_mqtt/util/utf8validate.hpp @@ -8,11 +8,13 @@ #define ASYNC_MQTT_UTIL_UTF8VALIDATE_HPP #include +#include namespace async_mqtt { -inline bool utf8string_check(std::string_view /*buf*/) { - return true; +inline bool utf8string_check(std::string_view buf) { + if (buf.empty()) return true; + return boost::beast::websocket::detail::check_utf8(buf.data(), buf.size()); } } // namespace async_mqtt