Skip to content

Commit

Permalink
Python api improvements (#132)
Browse files Browse the repository at this point in the history
  • Loading branch information
Kimahriman authored Jun 17, 2024
1 parent 16f6feb commit eaf19d4
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 23 deletions.
28 changes: 22 additions & 6 deletions python/hdfs_native/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,46 +88,62 @@ def __exit__(self, *_args):

class Client:

def __init__(self, url: str, config: Optional[Dict[str, str]] = None):
def __init__(
self,
url: Optional[str] = None,
config: Optional[Dict[str, str]] = None,
):
self.inner = RawClient(url, config)

def get_file_info(self, path: str) -> "FileStatus":
"""Gets the file status for the file at `path`"""
return self.inner.get_file_info(path)

def list_status(self, path: str, recursive: bool) -> Iterator["FileStatus"]:
def list_status(self, path: str, recursive: bool = False) -> Iterator["FileStatus"]:
"""Gets the status of files rooted at `path`. If `recursive` is true, lists all files recursively."""
return self.inner.list_status(path, recursive)

def read(self, path: str) -> FileReader:
"""Opens a file for reading at `path`"""
return FileReader(self.inner.read(path))

def create(self, path: str, write_options: WriteOptions) -> FileWriter:
def create(
self,
path: str,
write_options: Optional[WriteOptions] = None,
) -> FileWriter:
"""Creates a new file and opens it for writing at `path`"""
if not write_options:
write_options = WriteOptions()

return FileWriter(self.inner.create(path, write_options))

def append(self, path: str) -> FileWriter:
"""Opens an existing file to append to at `path`"""
return FileWriter(self.inner.append(path))

def mkdirs(self, path: str, permission: int, create_parent: bool) -> None:
def mkdirs(
self,
path: str,
permission: int = 0o0755,
create_parent: bool = False,
) -> None:
"""
Creates a directory at `path` with unix permissions `permission`. If `create_parent` is true,
any parent directories that don't exist will also be created. Otherwise this will fail if
all parent directories don't already exist.
"""
return self.inner.mkdirs(path, permission, create_parent)

def rename(self, src: str, dst: str, overwrite: bool) -> None:
def rename(self, src: str, dst: str, overwrite: bool = False) -> None:
"""
Moves a file or directory from `src` to `dst`. If `overwrite` is True, the destination will be
overriden if it already exists, otherwise the operation will fail if the destination
exists.
"""
return self.inner.rename(src, dst, overwrite)

def delete(self, path: str, recursive: bool) -> bool:
def delete(self, path: str, recursive: bool = False) -> bool:
"""
Deletes a file or directory at `path`. If `recursive` is True and the target is a directory,
this will delete all contents underneath the directory. If `recursive` is False and the target
Expand Down
6 changes: 5 additions & 1 deletion python/hdfs_native/_internal.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,11 @@ class RawFileWriter:
"""Closes the file and saves the final metadata to the NameNode"""

class RawClient:
def __init__(self, url: str, config: Optional[Dict[str, str]]) -> None: ...
def __init__(
self,
url: Optional[str],
config: Optional[Dict[str, str]],
) -> None: ...
def get_file_info(self, path: str) -> FileStatus: ...
def list_status(self, path: str, recursive: bool) -> Iterator[FileStatus]: ...
def read(self, path: str) -> RawFileReader: ...
Expand Down
13 changes: 10 additions & 3 deletions python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -206,13 +206,20 @@ struct RawClient {
impl RawClient {
#[new]
#[pyo3(signature = (url, config))]
pub fn new(url: &str, config: Option<HashMap<String, String>>) -> PyResult<Self> {
pub fn new(url: Option<&str>, config: Option<HashMap<String, String>>) -> PyResult<Self> {
// Initialize logging, ignore errors if this is called multiple times
let _ = env_logger::try_init();

let config = config.unwrap_or_default();

let inner = if let Some(url) = url {
Client::new_with_config(url, config).map_err(PythonHdfsError::from)?
} else {
Client::default_with_config(config).map_err(PythonHdfsError::from)?
};

Ok(RawClient {
inner: Client::new_with_config(url, config.unwrap_or_default())
.map_err(PythonHdfsError::from)?,
inner,
rt: Arc::new(
tokio::runtime::Runtime::new()
.map_err(|err| PyRuntimeError::new_err(err.to_string()))?,
Expand Down
25 changes: 12 additions & 13 deletions rust/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,17 @@ impl Client {
Self::with_config(&parsed_url, Configuration::new_with_config(config)?)
}

pub fn default_with_config(config: HashMap<String, String>) -> Result<Self> {
let config = Configuration::new_with_config(config)?;
let url = config
.get(config::DEFAULT_FS)
.ok_or(HdfsError::InvalidArgument(format!(
"No {} setting found",
config::DEFAULT_FS
)))?;
Self::with_config(&Url::parse(&url)?, config)
}

fn with_config(url: &Url, config: Configuration) -> Result<Self> {
if !url.has_host() {
return Err(HdfsError::InvalidArgument(
Expand Down Expand Up @@ -493,19 +504,7 @@ impl Default for Client {
/// Creates a new HDFS Client based on the fs.defaultFS setting. Panics if the config files fail to load,
/// no defaultFS is defined, or the defaultFS is invalid.
fn default() -> Self {
let config = Configuration::new().expect("Failed to load configuration");
let url = config
.get(config::DEFAULT_FS)
.ok_or(HdfsError::InvalidArgument(format!(
"No {} setting found",
config::DEFAULT_FS
)))
.expect("No fs.defaultFS config defined");
Self::with_config(
&Url::parse(&url).expect("Failed to parse fs.defaultFS"),
config,
)
.expect("Failed to create default client")
Self::default_with_config(Default::default()).expect("Failed to create default client")
}
}

Expand Down

0 comments on commit eaf19d4

Please sign in to comment.