Skip to content

Commit

Permalink
Merge pull request #2158 from dlt-hub/devel
Browse files Browse the repository at this point in the history
master merge for 1.5.0 release
  • Loading branch information
rudolfix authored Dec 17, 2024
2 parents f069071 + 38d0dab commit e8c5e9b
Show file tree
Hide file tree
Showing 176 changed files with 5,384 additions and 2,077 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_athena.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ jobs:

- name: Install dependencies
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction -E athena --with sentry-sdk --with pipeline
run: poetry install --no-interaction -E athena --with sentry-sdk --with pipeline,ibis

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_athena_iceberg.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ jobs:

- name: Install dependencies
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction -E athena --with sentry-sdk --with pipeline
run: poetry install --no-interaction -E athena --with sentry-sdk --with pipeline,ibis

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_bigquery.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ jobs:

- name: Install dependencies
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction -E bigquery --with providers -E parquet --with sentry-sdk --with pipeline
run: poetry install --no-interaction -E bigquery --with providers -E parquet --with sentry-sdk --with pipeline,ibis

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_clickhouse.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp

- name: Install dependencies
run: poetry install --no-interaction -E clickhouse --with providers -E parquet --with sentry-sdk --with pipeline
run: poetry install --no-interaction -E clickhouse --with providers -E parquet --with sentry-sdk --with pipeline,ibis

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_databricks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp

- name: Install dependencies
run: poetry install --no-interaction -E databricks -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline
run: poetry install --no-interaction -E databricks -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline,ibis

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_dremio.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp

- name: Install dependencies
run: poetry install --no-interaction -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline
run: poetry install --no-interaction -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline,ibis

- run: |
poetry run pytest tests/load --ignore tests/load/sources
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_motherduck.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-motherduck

- name: Install dependencies
run: poetry install --no-interaction -E motherduck -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline
run: poetry install --no-interaction -E motherduck -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline,ibis

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_mssql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp

- name: Install dependencies
run: poetry install --no-interaction -E mssql -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline
run: poetry install --no-interaction -E mssql -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline,ibis

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_snowflake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp

- name: Install dependencies
run: poetry install --no-interaction -E snowflake -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline
run: poetry install --no-interaction -E snowflake -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline,ibis

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_synapse.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp

- name: Install dependencies
run: poetry install --no-interaction -E synapse -E parquet --with sentry-sdk --with pipeline
run: poetry install --no-interaction -E synapse -E parquet --with sentry-sdk --with pipeline,ibis

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
6 changes: 4 additions & 2 deletions .github/workflows/test_destinations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,10 @@ jobs:
# key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-redshift

- name: Install dependencies
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction -E redshift -E postgis -E postgres -E gs -E s3 -E az -E parquet -E duckdb -E cli -E filesystem --with sentry-sdk --with pipeline -E deltalake
run: poetry install --no-interaction -E redshift -E postgis -E postgres -E gs -E s3 -E az -E parquet -E duckdb -E cli -E filesystem --with sentry-sdk --with pipeline,ibis -E deltalake -E pyiceberg

- name: Upgrade sqlalchemy
run: poetry run pip install sqlalchemy==2.0.18 # minimum version required by `pyiceberg`

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
5 changes: 4 additions & 1 deletion .github/workflows/test_local_destinations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,10 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-local-destinations

- name: Install dependencies
run: poetry install --no-interaction -E postgres -E postgis -E duckdb -E parquet -E filesystem -E cli -E weaviate -E qdrant -E sftp --with sentry-sdk --with pipeline -E deltalake
run: poetry install --no-interaction -E postgres -E postgis -E duckdb -E parquet -E filesystem -E cli -E weaviate -E qdrant -E sftp --with sentry-sdk --with pipeline,ibis -E deltalake -E pyiceberg

- name: Upgrade sqlalchemy
run: poetry run pip install sqlalchemy==2.0.18 # minimum version required by `pyiceberg`

- name: Start SFTP server
run: docker compose -f "tests/load/filesystem_sftp/docker-compose.yml" up -d
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_sqlalchemy_destinations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-local-destinations

- name: Install dependencies
run: poetry install --no-interaction -E parquet -E filesystem -E sqlalchemy -E cli --with sentry-sdk --with pipeline && poetry run pip install mysqlclient && poetry run pip install "sqlalchemy==${{ matrix.sqlalchemy }}"
run: poetry install --no-interaction -E parquet -E filesystem -E sqlalchemy -E cli --with sentry-sdk --with pipeline,ibis && poetry run pip install mysqlclient && poetry run pip install "sqlalchemy==${{ matrix.sqlalchemy }}"

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
5 changes: 2 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ has-poetry:
poetry --version

dev: has-poetry
poetry install --all-extras --with docs,providers,pipeline,sources,sentry-sdk,airflow
poetry install --all-extras --with docs,providers,pipeline,sources,sentry-sdk

lint:
./tools/check-package.sh
Expand All @@ -63,7 +63,6 @@ format:
lint-snippets:
cd docs/tools && poetry run python check_embedded_snippets.py full


lint-and-test-snippets: lint-snippets
poetry run mypy --config-file mypy.ini docs/website docs/tools --exclude docs/tools/lint_setup --exclude docs/website/docs_processed
poetry run flake8 --max-line-length=200 docs/website docs/tools --exclude docs/website/.dlt-repo
Expand All @@ -82,7 +81,7 @@ lint-security:
poetry run bandit -r dlt/ -n 3 -l

test:
(set -a && . tests/.env && poetry run pytest tests)
poetry run pytest tests

test-load-local:
DESTINATION__POSTGRES__CREDENTIALS=postgresql://loader:loader@localhost:5432/dlt_data DESTINATION__DUCKDB__CREDENTIALS=duckdb:///_storage/test_quack.duckdb poetry run pytest tests -k '(postgres or duckdb)'
Expand Down
2 changes: 0 additions & 2 deletions dlt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
)
from dlt.pipeline import progress
from dlt import destinations
from dlt.destinations.dataset import dataset as _dataset

pipeline = _pipeline
current = _current
Expand Down Expand Up @@ -80,7 +79,6 @@
"TCredentials",
"sources",
"destinations",
"_dataset",
]

# verify that no injection context was created
Expand Down
4 changes: 2 additions & 2 deletions dlt/cli/command_wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,14 @@ def init_command_wrapper(
destination_type: str,
repo_location: str,
branch: str,
omit_core_sources: bool = False,
eject_source: bool = False,
) -> None:
init_command(
source_name,
destination_type,
repo_location,
branch,
omit_core_sources,
eject_source,
)


Expand Down
31 changes: 18 additions & 13 deletions dlt/cli/init_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def _list_core_sources() -> Dict[str, SourceConfiguration]:
sources: Dict[str, SourceConfiguration] = {}
for source_name in files_ops.get_sources_names(core_sources_storage, source_type="core"):
sources[source_name] = files_ops.get_core_source_configuration(
core_sources_storage, source_name
core_sources_storage, source_name, eject_source=False
)
return sources

Expand Down Expand Up @@ -295,7 +295,7 @@ def init_command(
destination_type: str,
repo_location: str,
branch: str = None,
omit_core_sources: bool = False,
eject_source: bool = False,
) -> None:
# try to import the destination and get config spec
destination_reference = Destination.from_reference(destination_type)
Expand All @@ -310,13 +310,9 @@ def init_command(

# discover type of source
source_type: files_ops.TSourceType = "template"
if (
source_name in files_ops.get_sources_names(core_sources_storage, source_type="core")
) and not omit_core_sources:
if source_name in files_ops.get_sources_names(core_sources_storage, source_type="core"):
source_type = "core"
else:
if omit_core_sources:
fmt.echo("Omitting dlt core sources.")
verified_sources_storage = _clone_and_get_verified_sources_storage(repo_location, branch)
if source_name in files_ops.get_sources_names(
verified_sources_storage, source_type="verified"
Expand Down Expand Up @@ -380,7 +376,7 @@ def init_command(
else:
if source_type == "core":
source_configuration = files_ops.get_core_source_configuration(
core_sources_storage, source_name
core_sources_storage, source_name, eject_source
)
from importlib.metadata import Distribution

Expand All @@ -392,6 +388,9 @@ def init_command(

if canonical_source_name in extras:
source_configuration.requirements.update_dlt_extras(canonical_source_name)

# create remote modified index to copy files when ejecting
remote_modified = {file_name: None for file_name in source_configuration.files}
else:
if not is_valid_schema_name(source_name):
raise InvalidSchemaName(source_name)
Expand Down Expand Up @@ -536,11 +535,17 @@ def init_command(
"Creating a new pipeline with the dlt core source %s (%s)"
% (fmt.bold(source_name), source_configuration.doc)
)
fmt.echo(
"NOTE: Beginning with dlt 1.0.0, the source %s will no longer be copied from the"
" verified sources repo but imported from dlt.sources. You can provide the"
" --omit-core-sources flag to revert to the old behavior." % (fmt.bold(source_name))
)
if eject_source:
fmt.echo(
"NOTE: Source code of %s will be ejected. Remember to modify the pipeline "
"example script to import the ejected source." % (fmt.bold(source_name))
)
else:
fmt.echo(
"NOTE: Beginning with dlt 1.0.0, the source %s will no longer be copied from"
" the verified sources repo but imported from dlt.sources. You can provide the"
" --eject flag to revert to the old behavior." % (fmt.bold(source_name))
)
elif source_configuration.source_type == "verified":
fmt.echo(
"Creating and configuring a new pipeline with the verified source %s (%s)"
Expand Down
40 changes: 23 additions & 17 deletions dlt/cli/pipeline_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,19 +226,39 @@ def get_template_configuration(
)


def _get_source_files(sources_storage: FileStorage, source_name: str) -> List[str]:
"""Get all files that belong to source `source_name`"""
files: List[str] = []
for root, subdirs, _files in os.walk(sources_storage.make_full_path(source_name)):
# filter unwanted files
for subdir in list(subdirs):
if any(fnmatch.fnmatch(subdir, ignore) for ignore in IGNORE_FILES):
subdirs.remove(subdir)
rel_root = sources_storage.to_relative_path(root)
files.extend(
[
os.path.join(rel_root, file)
for file in _files
if all(not fnmatch.fnmatch(file, ignore) for ignore in IGNORE_FILES)
]
)
return files


def get_core_source_configuration(
sources_storage: FileStorage, source_name: str
sources_storage: FileStorage, source_name: str, eject_source: bool
) -> SourceConfiguration:
src_pipeline_file = CORE_SOURCE_TEMPLATE_MODULE_NAME + "/" + source_name + PIPELINE_FILE_SUFFIX
dest_pipeline_file = source_name + PIPELINE_FILE_SUFFIX
files: List[str] = _get_source_files(sources_storage, source_name) if eject_source else []

return SourceConfiguration(
"core",
"dlt.sources." + source_name,
sources_storage,
src_pipeline_file,
dest_pipeline_file,
[".gitignore"],
files,
SourceRequirements([]),
_get_docstring_for_module(sources_storage, source_name),
False,
Expand All @@ -259,21 +279,7 @@ def get_verified_source_configuration(
f"Pipeline example script {example_script} could not be found in the repository",
source_name,
)
# get all files recursively
files: List[str] = []
for root, subdirs, _files in os.walk(sources_storage.make_full_path(source_name)):
# filter unwanted files
for subdir in list(subdirs):
if any(fnmatch.fnmatch(subdir, ignore) for ignore in IGNORE_FILES):
subdirs.remove(subdir)
rel_root = sources_storage.to_relative_path(root)
files.extend(
[
os.path.join(rel_root, file)
for file in _files
if all(not fnmatch.fnmatch(file, ignore) for ignore in IGNORE_FILES)
]
)
files = _get_source_files(sources_storage, source_name)
# read requirements
requirements_path = os.path.join(source_name, utils.REQUIREMENTS_TXT)
if sources_storage.has_file(requirements_path):
Expand Down
10 changes: 3 additions & 7 deletions dlt/cli/plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,10 @@ def configure_parser(self, parser: argparse.ArgumentParser) -> None:
)

parser.add_argument(
"--omit-core-sources",
"--eject",
default=False,
action="store_true",
help=(
"When present, will not create the new pipeline with a core source of the given"
" name but will take a source of this name from the default or provided"
" location."
),
help="Ejects the source code of the core source like sql_database",
)

def execute(self, args: argparse.Namespace) -> None:
Expand All @@ -107,7 +103,7 @@ def execute(self, args: argparse.Namespace) -> None:
args.destination,
args.location,
args.branch,
args.omit_core_sources,
args.eject,
)


Expand Down
3 changes: 1 addition & 2 deletions dlt/cli/source_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ def find_call_arguments_to_replace(
if not isinstance(dn_node, ast.Constant) or not isinstance(dn_node.value, str):
raise CliCommandInnerException(
"init",
f"The pipeline script {init_script_name} must pass the {t_arg_name} as"
f" string to '{arg_name}' function in line {dn_node.lineno}",
f"The pipeline script {init_script_name} must pass the {t_arg_name} as string to '{arg_name}' function in line {dn_node.lineno}", # type: ignore[attr-defined]
)
else:
transformed_nodes.append((dn_node, ast.Constant(value=t_value, kind=None)))
Expand Down
6 changes: 6 additions & 0 deletions dlt/common/configuration/providers/toml.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,12 @@ def _read_google_colab_secrets(self, name: str, file_name: str) -> tomlkit.TOMLD
"""Try to load the toml from google colab userdata object"""
try:
from google.colab import userdata
from dlt.common.runtime.exec_info import is_notebook

# make sure we work in interactive mode (get_ipython() is available)
# when dlt cli is run, userdata is available but without a kernel
if not is_notebook():
return None

try:
return tomlkit.loads(userdata.get(file_name))
Expand Down
Loading

0 comments on commit e8c5e9b

Please sign in to comment.