Skip to content

Commit

Permalink
[PyOV] Allow replacing of invalid bytes in string-based data (openvin…
Browse files Browse the repository at this point in the history
…otoolkit#21761)

* [PyOV] Allow replacing of invalid bytes in string-based data

* Fix linter
  • Loading branch information
Jan Iwaszkiewicz authored Dec 19, 2023
1 parent 27c9ca7 commit 712e374
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/bindings/python/src/pyopenvino/core/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ py::array string_array_from_tensor(ov::Tensor&& t) {
auto data = t.data<std::string>();
py::list _list;
for (size_t i = 0; i < t.get_size(); ++i) {
PyObject* _unicode_obj = PyUnicode_DecodeUTF8(&data[i][0], data[i].length(), "strict");
PyObject* _unicode_obj = PyUnicode_DecodeUTF8(&data[i][0], data[i].length(), "replace");
_list.append(_unicode_obj);
Py_XDECREF(_unicode_obj);
}
Expand Down
10 changes: 10 additions & 0 deletions src/bindings/python/tests/test_runtime/test_tensor_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,3 +254,13 @@ def test_empty_tensor_populate(init_type, init_shape, string_data, data_getter):
check_bytes_based(tensor, _string_data, to_flat=True)
# Decoded:
check_string_based(tensor, _string_data, to_flat=True)


def test_invalid_bytes_replaced():
string_data = np.array(b"\xe2\x80")
tensor = ov.Tensor(string_data)

# Encoded:
check_bytes_based(tensor, string_data, to_flat=True)
# Decoded:
check_string_based(tensor, np.char.decode(string_data, encoding="utf=8", errors="replace"), to_flat=True)

0 comments on commit 712e374

Please sign in to comment.