Skip to content

Commit

Permalink
Fixed pre-commit issues
Browse files Browse the repository at this point in the history
  • Loading branch information
Cyber-Var committed Jan 28, 2025
1 parent 3b016df commit 33f87c1
Show file tree
Hide file tree
Showing 135 changed files with 684 additions and 624 deletions.
12 changes: 12 additions & 0 deletions .github/workflows/causal_lm_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,18 @@ env:
w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.1.0-17911-83c047443de/w_openvino_toolkit_windows_2025.1.0.dev20250116_x86_64.zip

jobs:
code-quality-checks:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Install pre-commit
run: pip install pre-commit
- name: Run pre-commit (checks for trailing whitespaces, and non-ASCII symbols in filenames and file content)
run: pre-commit run --all-files --show-diff-on-failure

cpp-multinomial-greedy_causal_lm-ubuntu:
runs-on: ubuntu-20.04-8-cores
defaults:
Expand Down
20 changes: 20 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.0.1
hooks:
- id: trailing-whitespace # checks for files with trailing whitespaces, excluding .md and Git-related hidden files
exclude: '\.md$|.*\.git.*'
- id: check-merge-conflict # checks for files that contain merge conflict strings (such as <<<<<<<, =======, and >>>>>>>)
- id: check-json # Ensures that JSON files are syntactically correct
- id: end-of-file-fixer # ensures that each file ends with one blank line, excluding Git-related hidden files
exclude: '.*\.git.*'
- repo: local
hooks:
- id: forbid-non-ascii-filenames # runs the script that prohibits non-ASCII characters in file names
name: Prohibit non-ASCII characters in file names
entry: ./pre_commit_scripts/check_non_ascii_filenames.sh
language: script
- id: forbid-non-ascii-in-files # checks for non-ASCII characters in files (excluding Markdown and hidden files), with characters ± and ? allowed
name: Check for non-ASCII characters in files (excluding Markdown and hidden files), with characters ± and ? allowed
entry: ./pre_commit_scripts/check_non_ascii_in_files.sh
language: script
20 changes: 20 additions & 0 deletions pre_commit_scripts/check_non_ascii_filenames.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash

# Store the command output:
empty_tree=$(git hash-object -t tree /dev/null)

# Get a list of new files that might have non-ASCII characters:
problem_files=$(git diff --name-only --diff-filter=A -z "$empty_tree" | LC_ALL=C grep -P "[^\x00-\x7F]")

# Count the number of problematic files:
count=$(echo "$problem_files" | wc -w)

# Print necessary info based on the result:
if [ "$count" -ne 0 ]; then
echo "Error: Non-ASCII characters found in filenames of new files:"
echo "$problem_files"
exit 1
else
echo "Success: No non-ASCII filenames found."
fi
exit 0
17 changes: 17 additions & 0 deletions pre_commit_scripts/check_non_ascii_in_files.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash

# Define the list of files to check, excluding .md, hidden, and a number of specific files:
files_to_check=$(git ls-files | grep -vE "^\." | grep -vE "\.md$" | grep -vE "^(tests/python_tests|tools/who_what_benchmark/(tests|whowhatbench))" | grep -v "tools/llm_bench/llm_bench_utils/ov_model_classes.py")

# Run git grep to find non-ASCII characters in the selected files and store the results:
results=$(LC_ALL=C git grep -n "[^ -~±�“”]" -- $files_to_check)

# Print the results:
if [ -n "$results" ]; then
echo "Error: Non-ASCII characters found in files:"
echo "$results"
exit 1
else
echo "Success: No non-ASCII characters found in files."
fi
exit 0
2 changes: 1 addition & 1 deletion requirements-build.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
cmake~=3.23.0; platform_system != 'Darwin' or platform_machine == 'x86_64'
cmake~=3.24.0; platform_system == 'Darwin' and platform_machine == 'arm64'
pybind11-stubgen==2.5.1
pybind11-stubgen==2.5.1
2 changes: 1 addition & 1 deletion samples/cpp/image_generation/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -107,4 +107,4 @@ set_target_properties(inpainting PROPERTIES
install(TARGETS inpainting
RUNTIME DESTINATION samples_bin/
COMPONENT samples_bin
EXCLUDE_FROM_ALL)
EXCLUDE_FROM_ALL)
2 changes: 1 addition & 1 deletion samples/cpp/text_generation/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,4 @@ set_target_properties(benchmark_genai PROPERTIES
install(TARGETS benchmark_genai
RUNTIME DESTINATION samples_bin/
COMPONENT samples_bin
EXCLUDE_FROM_ALL)
EXCLUDE_FROM_ALL)
2 changes: 1 addition & 1 deletion samples/cpp/text_generation/beam_search_causal_lm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ int main(int argc, char* argv[]) try {
config.num_beams = 15;
config.diversity_penalty = 1.0f;
config.num_return_sequences = config.num_beams;

// Since the streamer is set, the results will
// be printed each time a new token is generated.
auto beams = pipe.generate(prompts, config);
Expand Down
6 changes: 3 additions & 3 deletions samples/cpp/text_generation/chat_sample.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ int main(int argc, char* argv[]) try {

std::string device = "CPU"; // GPU, NPU can be used as well
ov::genai::LLMPipeline pipe(models_path, device);

ov::genai::GenerationConfig config;
config.max_new_tokens = 100;
std::function<bool(std::string)> streamer = [](std::string word) {
std::function<bool(std::string)> streamer = [](std::string word) {
std::cout << word << std::flush;
// Return flag corresponds whether generation should be stopped.
// false means continue generation.
return false;
return false;
};

pipe.start_chat();
Expand Down
2 changes: 1 addition & 1 deletion samples/cpp/text_generation/encrypted_model_causal_lm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ int main(int argc, char* argv[]) try {

auto [model_str, model_weights] = decrypt_model(models_path + "/openvino_model.xml", models_path + "/openvino_model.bin");
ov::genai::Tokenizer tokenizer = decrypt_tokenizer(models_path);

ov::genai::LLMPipeline pipe(model_str, model_weights, tokenizer, device);

std::string result = pipe.generate(prompt, ov::genai::max_new_tokens(100));
Expand Down
2 changes: 1 addition & 1 deletion samples/cpp/text_generation/prompt_lookup_decoding_lm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ int main(int argc, char* argv[]) try {

std::string model_path = argv[1];
std::string prompt = argv[2];

std::string device = "CPU";

ov::genai::LLMPipeline pipe(
Expand Down
2 changes: 1 addition & 1 deletion samples/cpp/visual_language_chat/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,4 @@ set_target_properties(benchmark_vlm PROPERTIES
install(TARGETS benchmark_vlm
RUNTIME DESTINATION samples_bin/
COMPONENT samples_bin
EXCLUDE_FROM_ALL)
EXCLUDE_FROM_ALL)
6 changes: 3 additions & 3 deletions samples/cpp/visual_language_chat/benchmark_vlm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,15 @@ int main(int argc, char* argv[]) try {
size_t num_warmup = result["num_warmup"].as<size_t>();
size_t num_iter = result["num_iter"].as<size_t>();
ov::Tensor image = utils::load_image(image_path);

ov::genai::GenerationConfig config;
config.max_new_tokens = result["max_new_tokens"].as<size_t>();

ov::genai::VLMPipeline pipe(models_path, device);

for (size_t i = 0; i < num_warmup; i++)
pipe.generate(prompt, ov::genai::image(image), ov::genai::generation_config(config));

auto res = pipe.generate(prompt, ov::genai::image(image), ov::genai::generation_config(config));
auto metrics = res.perf_metrics;
for (size_t i = 0; i < num_iter - 1; i++) {
Expand Down
2 changes: 1 addition & 1 deletion samples/export-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ diffusers==0.32.2 # For image generation pipelines
timm==1.0.14 # For exporting InternVL2
torchvision # For visual language models
transformers>=4.43 # For Whisper
hf_transfer # for faster models download, should used with env var HF_HUB_ENABLE_HF_TRANSFER=1
hf_transfer # for faster models download, should used with env var HF_HUB_ENABLE_HF_TRANSFER=1
2 changes: 1 addition & 1 deletion samples/python/image_generation/text2image.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@ def main():


if '__main__' == __name__:
main()
main()
12 changes: 6 additions & 6 deletions samples/python/text_generation/benchmark_genai.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,31 +12,31 @@ def main():
parser.add_argument("-n", "--num_iter", type=int, default=2, help="Number of iterations")
parser.add_argument("-mt", "--max_new_tokens", type=int, default=20, help="Maximal number of new tokens")
parser.add_argument("-d", "--device", type=str, default="CPU", help="Device")

args = parser.parse_args()

# Perf metrics is stored in DecodedResults.
# Perf metrics is stored in DecodedResults.
# In order to get DecodedResults instead of a string input should be a list.
prompt = [args.prompt]
models_path = args.model
device = args.device
num_warmup = args.num_warmup
num_iter = args.num_iter

config = ov_genai.GenerationConfig()
config.max_new_tokens = args.max_new_tokens

pipe = ov_genai.LLMPipeline(models_path, device)

for _ in range(num_warmup):
pipe.generate(prompt, config)

res = pipe.generate(prompt, config)
perf_metrics = res.perf_metrics
for _ in range(num_iter - 1):
res = pipe.generate(prompt, config)
perf_metrics += res.perf_metrics

print(f"Load time: {perf_metrics.get_load_time():.2f} ms")
print(f"Generate time: {perf_metrics.get_generate_duration().mean:.2f} ± {perf_metrics.get_generate_duration().std:.2f} ms")
print(f"Tokenization time: {perf_metrics.get_tokenization_duration().mean:.2f} ± {perf_metrics.get_tokenization_duration().std:.2f} ms")
Expand Down
26 changes: 13 additions & 13 deletions samples/python/text_generation/multinomial_causal_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,18 @@
class IterableStreamer(openvino_genai.StreamerBase):
"""
A custom streamer class for handling token streaming and detokenization with buffering.
Attributes:
tokenizer (Tokenizer): The tokenizer used for encoding and decoding tokens.
tokens_cache (list): A buffer to accumulate tokens for detokenization.
text_queue (Queue): A synchronized queue for storing decoded text chunks.
print_len (int): The length of the printed text to manage incremental decoding.
"""

def __init__(self, tokenizer):
"""
Initializes the IterableStreamer with the given tokenizer.
Args:
tokenizer (Tokenizer): The tokenizer to use for encoding and decoding tokens.
"""
Expand All @@ -38,35 +38,35 @@ def __iter__(self):
Returns the iterator object itself.
"""
return self

def __next__(self):
"""
Returns the next value from the text queue.
Returns:
str: The next decoded text chunk.
Raises:
StopIteration: If there are no more elements in the queue.
"""
value = self.text_queue.get() # get() will be blocked until a token is available.
if value is None:
raise StopIteration
return value

def get_stop_flag(self):
"""
Checks whether the generation process should be stopped.
Returns:
bool: Always returns False in this implementation.
"""
return False

def put_word(self, word: str):
"""
Puts a word into the text queue.
Args:
word (str): The word to put into the queue.
"""
Expand All @@ -75,10 +75,10 @@ def put_word(self, word: str):
def put(self, token_id: int) -> bool:
"""
Processes a token and manages the decoding buffer. Adds decoded text to the queue.
Args:
token_id (int): The token_id to process.
Returns:
bool: True if generation should be stopped, False otherwise.
"""
Expand Down Expand Up @@ -168,7 +168,7 @@ def token_printer():
config.top_p = 0.9
config.top_k = 30

# Since the streamer is set, the results will be printed
# Since the streamer is set, the results will be printed
# every time a new token is generated and put into the streamer queue.
pipe.generate(args.prompt, config, text_print_streamer)
printer_thread.join()
Expand Down
12 changes: 6 additions & 6 deletions samples/python/text_generation/prompt_lookup_decoding_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
import argparse
import openvino_genai

def streamer(subword):
print(subword, end='', flush=True)
# Return flag corresponds whether generation should be stopped.
# False means continue generation.
def streamer(subword):
print(subword, end='', flush=True)
# Return flag corresponds whether generation should be stopped.
# False means continue generation.
return False

def main():
Expand All @@ -20,15 +20,15 @@ def main():
device = 'CPU'

pipe = openvino_genai.LLMPipeline(args.model_dir, device, prompt_lookup=True)

config = openvino_genai.GenerationConfig()
config.max_new_tokens = 100
# add parameter to enable prompt lookup decoding to generate `num_assistant_tokens` candidates per iteration
config.num_assistant_tokens = 5
# Define max_ngram_size
config.max_ngram_size = 3

# Since the streamer is set, the results will be printed
# Since the streamer is set, the results will be printed
# every time a new token is generated and put into the streamer queue.
pipe.generate(args.prompt, config, streamer)

Expand Down
6 changes: 3 additions & 3 deletions samples/python/text_generation/speculative_decoding_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

def streamer(subword):
print(subword, end='', flush=True)
# Return flag corresponds whether generation should be stopped.
# Return flag corresponds whether generation should be stopped.
# False means continue generation.
return False

Expand All @@ -27,7 +27,7 @@ def main():
draft_model = openvino_genai.draft_model(args.draft_model_dir, draft_device)

pipe = openvino_genai.LLMPipeline(args.model_dir, main_device, draft_model=draft_model)

config = openvino_genai.GenerationConfig()
config.max_new_tokens = 100
# Speculative decoding generation parameters like `num_assistant_tokens` and `assistant_confidence_threshold` are mutually excluded
Expand All @@ -36,7 +36,7 @@ def main():
# add parameter to enable speculative decoding to generate candidates by draft_model while candidate probability is higher than `assistant_confidence_threshold`
# config.assistant_confidence_threshold = 0.4

# Since the streamer is set, the results will be printed
# Since the streamer is set, the results will be printed
# every time a new token is generated and put into the streamer queue.
pipe.generate(args.prompt, config, streamer)

Expand Down
4 changes: 2 additions & 2 deletions samples/python/whisper_speech_recognition/recorder.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
sample_format = pyaudio.paInt16 # 16 bits per sample
channels = 1
fs = 16000 # Record at 16k samples per second
seconds = 5
seconds = 5
filename = "output.wav"

p = pyaudio.PyAudio() # Create an interface to PortAudio
Expand All @@ -34,7 +34,7 @@
data = stream.read(chunk)
frames.append(data)

# Stop and close the stream
# Stop and close the stream
stream.stop_stream()
stream.close()
# Terminate the PortAudio interface
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline {

/**
* @brief Constructs a ContinuousBatchingPipeline from already existing model and tokenizer.
*
*
* This constructor allows for the creation of a ContinuousBatchingPipeline using an existing model
* represented as a string and a weights tensor, along with a manually initialized tokenizer.
* This is useful when the model and tokenizer are already loaded or created in memory and do not
Expand Down
Loading

0 comments on commit 33f87c1

Please sign in to comment.