Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add mkdir to CLI #188

Merged
merged 1 commit into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions python/conftest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import subprocess
import urllib
import urllib.parse
Expand Down Expand Up @@ -32,6 +33,8 @@ def minidfs():
output = child.stdout.readline().strip()
assert output == "Ready!", output

os.environ["HADOOP_CONF_DIR"] = "target/test"

yield "hdfs://127.0.0.1:9000"

try:
Expand Down
51 changes: 46 additions & 5 deletions python/hdfs_native/cli.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,31 @@
import functools
import os
from argparse import ArgumentParser, Namespace
from typing import Optional, Sequence
from typing import List, Optional, Sequence
from urllib.parse import urlparse

from hdfs_native import Client


@functools.cache
def _get_client(connection_url: Optional[str] = None):
return Client(connection_url)


def _client_for_url(url: str) -> Client:
parsed = urlparse(url)

if parsed.scheme:
connection_url = f"{parsed.scheme}://{parsed.hostname}"
if parsed.port:
connection_url += f":{parsed.port}"
return Client(connection_url)
return _get_client(connection_url)
elif parsed.hostname or parsed.port:
raise ValueError(
f"Cannot provide host or port without scheme: {parsed.hostname}"
)
else:
return Client()
return _get_client()


def _verify_nameservices_match(url: str, *urls: str) -> None:
Expand All @@ -37,6 +43,19 @@ def _path_for_url(url: str) -> str:
return urlparse(url).path


def _glob_path(client: Client, glob: str) -> List[str]:
# TODO: Actually implement this, for now just pretend we have multiple results
return [glob]


def mkdir(args: Namespace):
create_parent = args.parent

for path in args.path:
client = _client_for_url(path)
client.mkdirs(path, create_parent=create_parent)


def mv(args: Namespace):
_verify_nameservices_match(args.dst, *args.src)

Expand All @@ -49,12 +68,16 @@ def mv(args: Namespace):
except FileNotFoundError:
pass

if len(args.src) > 1 and not dst_isdir:
resolved_src = [
path for pattern in args.src for path in _glob_path(client, pattern)
]

if len(resolved_src) > 1 and not dst_isdir:
raise ValueError(
"destination must be a directory if multiple sources are provided"
)

for src in args.src:
for src in resolved_src:
src_path = _path_for_url(src)
if dst_isdir:
target_path = os.path.join(dst_path, os.path.basename(src_path))
Expand All @@ -72,6 +95,24 @@ def main(in_args: Optional[Sequence[str]] = None):

subparsers = parser.add_subparsers(title="Subcommands", required=True)

mkdir_parser = subparsers.add_parser(
"mkdir",
help="Create a directory",
description="Create a directory in a specified path",
)
mkdir_parser.add_argument(
"path",
nargs="+",
help="Path for the directory to create",
)
mkdir_parser.add_argument(
"-p",
"--parent",
action="store_true",
help="Create any missing parent directories",
)
mkdir_parser.set_defaults(func=mkdir)

mv_parser = subparsers.add_parser(
"mv",
help="Move files or directories",
Expand Down
27 changes: 17 additions & 10 deletions python/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,36 +7,43 @@
def test_cli(minidfs: str):
client = Client(minidfs)

def qualify(path: str) -> str:
return f"{minidfs}{path}"
# mkdir
cli_main(["mkdir", "/testdir"])
assert client.get_file_info("/testdir").isdir

with pytest.raises(RuntimeError):
cli_main(["mkdir", "/testdir/nested/dir"])

cli_main(["mkdir", "-p", "/testdir/nested/dir"])
assert client.get_file_info("/testdir/nested/dir").isdir

client.delete("/testdir", True)

# mv
client.create("/testfile").close()
client.mkdirs("/testdir")

cli_main(["mv", qualify("/testfile"), qualify("/testfile2")])
cli_main(["mv", "/testfile", "/testfile2"])

client.get_file_info("/testfile2")

with pytest.raises(ValueError):
cli_main(["mv", qualify("/testfile2"), "hdfs://badnameservice/testfile"])
cli_main(["mv", "/testfile2", "hdfs://badnameservice/testfile"])

with pytest.raises(RuntimeError):
cli_main(["mv", qualify("/testfile2"), qualify("/nonexistent/testfile")])
cli_main(["mv", "/testfile2", "/nonexistent/testfile"])

cli_main(["mv", qualify("/testfile2"), qualify("/testdir")])
cli_main(["mv", "/testfile2", "/testdir"])

client.get_file_info("/testdir/testfile2")

client.rename("/testdir/testfile2", "/testfile1")
client.create("/testfile2").close()

with pytest.raises(ValueError):
cli_main(
["mv", qualify("/testfile1"), qualify("/testfile2"), qualify("/testfile3")]
)
cli_main(["mv", "/testfile1", "/testfile2", "/testfile3"])

cli_main(["mv", qualify("/testfile1"), qualify("/testfile2"), qualify("/testdir/")])
cli_main(["mv", "/testfile1", "/testfile2", "/testdir/"])

client.get_file_info("/testdir/testfile1")
client.get_file_info("/testdir/testfile2")
Loading