-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathpyproject.toml
50 lines (45 loc) · 3.46 KB
/
pyproject.toml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
[tool.black]
line-length = 119
target-version = ['py37']
[tool.poetry]
name = "bsmetadata"
version = "0.1.0"
description = "Codebase for including metadata (e.g., URLs, timestamps, HTML tags) during language model pretraining."
authors = ["Your Name <[email protected]>"]
repository = "https://github.com/bigscience-workshop/metadata.git"
[tool.poetry.dependencies]
python = ">=3.7.11,<3.9" # wikipedia2vec doesn't support Python 3.9.
setuptools = "^62.0.0" # For `setup.py`
wheel = "^0.37.1" # For `setup.py`
pyarrow = "^7.0.0" # Our custom `Features` need >=6, yet 7 is probably better for bugfixes.
numpy = [{ version = "<1.22", python = "<3.8" }, { version = ">=1.22", python = ">=3.8" }]
pandas = [{ version = "<1.4", python = "<3.8" }, { version = ">=1.4", python = ">=3.8" }]
datasets = ">=2.10.0" # For evaluation.py such that `Features` conversion can be more automatic.
lxml = "4.6.5"
htmlmin = "0.1.12"
torch = { version = "1.9.0", optional = true } # Techincally an extra, but flair for REL only works with torch != 1.8.
tokenizers = "!=0.12.0" # A yanked version
transformers = "^4.22" # It implies "huggingface-hub>=0.9.0,<1.0" since 4.22.
accelerate = ">=0.4.0,<1" # Poetry will likely update it to 0.13.2, but it is probably ok and good for bugfixes.
hydra-core = ">=1.1,<1.2" # Poetry will likely update it to 1.1.2, but it is probably ok and good for bugfixes.
wandb = ">=0.10.32,<1" # Poetry will likely update it to 0.13.4, but it is probably ok and good for bugfixes.
REL = { git = "https://github.com/manandey/REL.git", branch = "main", optional = true }
bs-dateutil = { git = "https://github.com/cccntu/dateutil.git", tag = "2.8.5", optional = true }
gensim = { version = "^3.8.3", optional = true } # Technically secondary, but gensim>=4 breaks wikipedia2vec.
wikipedia2vec = { version = "1.0.5", optional = true }
nltk = { version = "3.6.7", optional = true }
loguru = ">=0.6.0"
deepspeed = ">=0.6.1"
tensorflow = [{version = "2.11.0", python = "<3.8"}, {version =">2.12.0", python = ">=3.8" }] # For with_metadata_datasetv2_tf.py
rich = ">=13.4.1" # For evaluation.py's debugging output
[tool.poetry.extras]
preprocessing = ["lxml", "htmlmin", "REL", "bs-dateutil", "gensim", "wikipedia2vec", "nltk"]
torch = ["torch"]
[tool.poetry.dev-dependencies]
isort = ">=5.5.4"
black = "^22.3.0" # For `"click>=8.1.0"`
flake8 = ">=3.8.3"
pytest = "6.2.4"
[build-system]
requires = ["poetry-core", "setuptools>=62.0.0", "wheel>=0.37.1"]
build-backend = "poetry.core.masonry.api"