Skip to content

Commit

Permalink
Use requests.get instead of wget
Browse files Browse the repository at this point in the history
  • Loading branch information
alekszievr committed Jan 14, 2025
1 parent cefe7d8 commit 273b16c
Showing 1 changed file with 12 additions and 4 deletions.
16 changes: 12 additions & 4 deletions evals/qa_dataset_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from cognee.root_dir import get_absolute_path
import json
import wget
import requests
from jsonschema import ValidationError, validate
from pathlib import Path

Expand Down Expand Up @@ -31,7 +31,7 @@
}


def download_qa_dataset(dataset_name: str, dir: str):
def download_qa_dataset(dataset_name: str, filepath: Path):
if dataset_name not in qa_datasets:
raise ValueError(f"{dataset_name} is not a supported dataset.")

Expand All @@ -44,7 +44,15 @@ def download_qa_dataset(dataset_name: str, dir: str):
and unzip it."
)

wget.download(url, out=dir)
response = requests.get(url, stream=True)

if response.status_code == 200:
with open(filepath, "wb") as file:
for chunk in response.iter_content(chunk_size=8192):
file.write(chunk)
print(f"Dataset {dataset_name} downloaded and saved to {filepath}")
else:
print(f"Failed to download {dataset_name}. Status code: {response.status_code}")


def load_qa_dataset(dataset_name_or_filename: str):
Expand All @@ -58,7 +66,7 @@ def load_qa_dataset(dataset_name_or_filename: str):

filepath = data_root_dir / Path(filename)
if not filepath.exists():
download_qa_dataset(dataset_name, data_root_dir)
download_qa_dataset(dataset_name, filepath)
else:
filename = dataset_name_or_filename
filepath = Path(filename)
Expand Down

0 comments on commit 273b16c

Please sign in to comment.