Skip to content

Commit

Permalink
feat: Use upload session urls for chunk upload
Browse files Browse the repository at this point in the history
Closes: SDK-3836
  • Loading branch information
lukaszsocha2 committed May 23, 2024
1 parent 5a7c767 commit e472942
Show file tree
Hide file tree
Showing 8 changed files with 251 additions and 74 deletions.
23 changes: 19 additions & 4 deletions boxsdk/object/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,20 @@ def preflight_check(self, size: int, name: Optional[str] = None) -> Optional[str
)

@api_call
def create_upload_session(self, file_size: int, file_name: Optional[str] = None) -> 'UploadSession':
def create_upload_session(
self, file_size: int, file_name: Optional[str] = None, use_upload_session_urls: bool = True
) -> 'UploadSession':
"""
Create a new chunked upload session for uploading a new version of the file.
:param file_size:
The size of the file in bytes that will be uploaded.
:param file_name:
The new name of the file version that will be uploaded.
:param use_upload_session_urls:
The parameter detrermining what urls to use to perform chunked upload.
If True, the urls returned by create_upload_session() endpoint response will be used.
If False, the base upload url will be used.
:returns:
A :class:`UploadSession` object.
"""
Expand All @@ -68,13 +74,18 @@ def create_upload_session(self, file_size: int, file_name: Optional[str] = None)
body_params['file_name'] = file_name
url = self.get_url('upload_sessions').replace(self.session.api_config.BASE_API_URL, self.session.api_config.UPLOAD_URL)
response = self._session.post(url, data=json.dumps(body_params)).json()
return self.translator.translate(
upload_session = self.translator.translate(
session=self._session,
response_object=response,
)
# pylint:disable=protected-access
upload_session._use_upload_session_urls = use_upload_session_urls
return upload_session

@api_call
def get_chunked_uploader(self, file_path: str, rename_file: bool = False) -> 'ChunkedUploader':
def get_chunked_uploader(
self, file_path: str, rename_file: bool = False, use_upload_session_urls: bool = True
) -> 'ChunkedUploader':
# pylint: disable=consider-using-with
"""
Instantiate the chunked upload instance and create upload session with path to file.
Expand All @@ -83,13 +94,17 @@ def get_chunked_uploader(self, file_path: str, rename_file: bool = False) -> 'Ch
The local path to the file you wish to upload.
:param rename_file:
Indicates whether the file should be renamed or not.
:param use_upload_session_urls:
The parameter detrermining what urls to use to perform chunked upload.
If True, the urls returned by create_upload_session() endpoint response will be used.
If False, the base upload url will be used.
:returns:
A :class:`ChunkedUploader` object.
"""
total_size = os.stat(file_path).st_size
content_stream = open(file_path, 'rb')
file_name = os.path.basename(file_path) if rename_file else None
upload_session = self.create_upload_session(total_size, file_name)
upload_session = self.create_upload_session(total_size, file_name, use_upload_session_urls)
return upload_session.get_chunked_uploader_for_stream(content_stream, total_size)

def _get_accelerator_upload_url_for_update(self) -> Optional[str]:
Expand Down
21 changes: 17 additions & 4 deletions boxsdk/object/folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,14 +115,18 @@ def preflight_check(self, size: int, name: str) -> Optional[str]:
)

@api_call
def create_upload_session(self, file_size: int, file_name: str) -> 'UploadSession':
def create_upload_session(self, file_size: int, file_name: str, use_upload_session_urls: bool = True) -> 'UploadSession':
"""
Creates a new chunked upload session for upload a new file.
:param file_size:
The size of the file in bytes that will be uploaded.
:param file_name:
The name of the file that will be uploaded.
:param use_upload_session_urls:
The parameter detrermining what urls to use to perform chunked upload.
If True, the urls returned by create_upload_session() endpoint response will be used.
If False, the base upload url will be used.
:returns:
A :class:`UploadSession` object.
"""
Expand All @@ -133,13 +137,18 @@ def create_upload_session(self, file_size: int, file_name: str) -> 'UploadSessio
'file_name': file_name,
}
response = self._session.post(url, data=json.dumps(body_params)).json()
return self.translator.translate(
upload_session = self.translator.translate(
session=self._session,
response_object=response,
)
# pylint:disable=protected-access
upload_session._use_upload_session_urls = use_upload_session_urls
return upload_session

@api_call
def get_chunked_uploader(self, file_path: str, file_name: Optional[str] = None) -> 'ChunkedUploader':
def get_chunked_uploader(
self, file_path: str, file_name: Optional[str] = None, use_upload_session_urls: bool = True
) -> 'ChunkedUploader':
# pylint: disable=consider-using-with
"""
Instantiate the chunked upload instance and create upload session with path to file.
Expand All @@ -149,6 +158,10 @@ def get_chunked_uploader(self, file_path: str, file_name: Optional[str] = None)
:param file_name:
The name with extention of the file that will be uploaded, e.g. new_file_name.zip.
If not specified, the name from the local system is used.
:param use_upload_session_urls:
The parameter detrermining what urls to use to perform chunked upload.
If True, the urls returned by create_upload_session() endpoint response will be used.
If False, the base upload url will be used.
:returns:
A :class:`ChunkedUploader` object.
"""
Expand All @@ -157,7 +170,7 @@ def get_chunked_uploader(self, file_path: str, file_name: Optional[str] = None)
content_stream = open(file_path, 'rb')

try:
upload_session = self.create_upload_session(total_size, upload_file_name)
upload_session = self.create_upload_session(total_size, upload_file_name, use_upload_session_urls)
return upload_session.get_chunked_uploader_for_stream(content_stream, total_size)
except Exception:
content_stream.close()
Expand Down
26 changes: 21 additions & 5 deletions boxsdk/object/upload_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from boxsdk import BoxAPIException
from boxsdk.util.api_call_decorator import api_call
from boxsdk.util.chunked_uploader import ChunkedUploader
from boxsdk.session.session import Session
from .base_object import BaseObject
from ..pagination.limit_offset_based_dict_collection import LimitOffsetBasedDictCollection

Expand All @@ -20,10 +21,20 @@ class UploadSession(BaseObject):
_item_type = 'upload_session'
_parent_item_type = 'file'

def get_url(self, *args: Any) -> str:
def __init__(
self, session: Session, object_id: str, response_object: dict = None, use_upload_session_urls: bool = True
):
super().__init__(session, object_id, response_object)
self._use_upload_session_urls = use_upload_session_urls

def get_url(self, *args: Any, url_key: str = None) -> str:
"""
Base class override. Endpoint is a little different - it's /files/upload_sessions.
"""
session_endpoints = getattr(self, 'session_endpoints', {})
if self._use_upload_session_urls and url_key in session_endpoints:
return session_endpoints[url_key]

return self._session.get_url(
f'{self._parent_item_type}s/{self._item_type}s',
self._object_id,
Expand All @@ -44,7 +55,7 @@ def get_parts(self, limit: Optional[int] = None, offset: Optional[int] = None) -
"""
return LimitOffsetBasedDictCollection(
session=self.session,
url=self.get_url('parts'),
url=self.get_url('parts', url_key='list_parts'),
limit=limit,
offset=offset,
fields=None,
Expand Down Expand Up @@ -87,7 +98,7 @@ def upload_part_bytes(
'Content-Range': f'bytes {offset}-{range_end}/{total_size}',
}
response = self._session.put(
self.get_url(),
self.get_url(url_key='upload_part'),
headers=headers,
data=part_bytes,
)
Expand Down Expand Up @@ -131,7 +142,7 @@ def commit(

try:
response = self._session.post(
self.get_url('commit'),
self.get_url('commit', url_key='commit'),
headers=headers,
data=json.dumps(body),
)
Expand All @@ -154,7 +165,12 @@ def abort(self) -> bool:
:returns:
A boolean indication success of the upload abort.
"""
return self.delete()

box_response = self._session.delete(
self.get_url(url_key='abort'),
expect_json_response=False
)
return box_response.ok

def get_chunked_uploader_for_stream(self, content_stream: IO[bytes], file_size: int) -> ChunkedUploader:
"""
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@


CLASSIFIERS = [
'Development Status :: 5 - Production/Stable',
'Development Status :: 6 - Mature',
'Intended Audience :: Developers',
'License :: OSI Approved :: Apache Software License',
'Programming Language :: Python',
Expand All @@ -18,6 +18,7 @@
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: 3.12',
'Programming Language :: Python :: Implementation :: CPython',
'Programming Language :: Python :: Implementation :: PyPy',
'Operating System :: OS Independent',
Expand Down
19 changes: 17 additions & 2 deletions test/integration_new/object/folder_itest.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,24 @@ def test_manual_chunked_upload(parent_folder, large_file, large_file_name):
util.permanently_delete(uploaded_file)


def test_auto_chunked_upload(parent_folder, large_file, large_file_name):
def test_auto_chunked_upload_using_upload_session_urls(parent_folder, large_file, large_file_name):
total_size = os.stat(large_file.path).st_size
chunked_uploader = parent_folder.get_chunked_uploader(large_file.path)
chunked_uploader = parent_folder.get_chunked_uploader(large_file.path, use_upload_session_urls=True)

uploaded_file = chunked_uploader.start()

try:
assert uploaded_file.id
assert uploaded_file.name == large_file_name
assert uploaded_file.parent == parent_folder
assert uploaded_file.size == total_size
finally:
util.permanently_delete(uploaded_file)


def test_auto_chunked_upload_NOT_using_upload_session_urls(parent_folder, large_file, large_file_name):
total_size = os.stat(large_file.path).st_size
chunked_uploader = parent_folder.get_chunked_uploader(large_file.path, use_upload_session_urls=False)

uploaded_file = chunked_uploader.start()

Expand Down
18 changes: 14 additions & 4 deletions test/unit/object/test_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ def test_delete_file(test_file, mock_box_session, etag, if_match_header):
)


def test_create_upload_session(test_file, mock_box_session):
@pytest.mark.parametrize('use_upload_session_urls', [True, False])
def test_create_upload_session(test_file, mock_box_session, use_upload_session_urls):
expected_url = f'{API.UPLOAD_URL}/files/{test_file.object_id}/upload_sessions'
file_size = 197520
part_size = 12345
Expand All @@ -71,7 +72,9 @@ def test_create_upload_session(test_file, mock_box_session):
'total_parts': total_parts,
'part_size': part_size,
}
upload_session = test_file.create_upload_session(file_size, file_name)
upload_session = test_file.create_upload_session(
file_size, file_name, use_upload_session_urls=use_upload_session_urls
)
mock_box_session.post.assert_called_once_with(expected_url, data=json.dumps(expected_data))
assert isinstance(upload_session, UploadSession)
assert upload_session._session == mock_box_session
Expand All @@ -80,9 +83,13 @@ def test_create_upload_session(test_file, mock_box_session):
assert upload_session.num_parts_processed == num_parts_processed
assert upload_session.type == upload_session_type
assert upload_session.id == upload_session_id
assert upload_session._use_upload_session_urls == use_upload_session_urls


def test_get_chunked_uploader(mock_box_session, mock_content_response, mock_file_path, test_file):
@pytest.mark.parametrize('use_upload_session_urls', [True, False])
def test_get_chunked_uploader(
mock_box_session, mock_content_response, mock_file_path, test_file, use_upload_session_urls
):
expected_url = f'{API.UPLOAD_URL}/files/{test_file.object_id}/upload_sessions'
mock_file_stream = BytesIO(mock_content_response.content)
file_size = 197520
Expand All @@ -105,14 +112,17 @@ def test_get_chunked_uploader(mock_box_session, mock_content_response, mock_file
with patch('os.stat') as stat:
stat.return_value.st_size = file_size
with patch('boxsdk.object.file.open', return_value=mock_file_stream):
chunked_uploader = test_file.get_chunked_uploader(mock_file_path)
chunked_uploader = test_file.get_chunked_uploader(
mock_file_path, use_upload_session_urls=use_upload_session_urls
)
mock_box_session.post.assert_called_once_with(expected_url, data=json.dumps(expected_data))
upload_session = chunked_uploader._upload_session
assert upload_session.part_size == part_size
assert upload_session.total_parts == total_parts
assert upload_session.num_parts_processed == num_parts_processed
assert upload_session.type == upload_session_type
assert upload_session.id == upload_session_id
assert upload_session._use_upload_session_urls == use_upload_session_urls
assert isinstance(chunked_uploader, ChunkedUploader)


Expand Down
76 changes: 43 additions & 33 deletions test/unit/object/test_folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,10 @@ def get_response(limit, offset):
return get_response


def test_get_chunked_uploader(mock_box_session, mock_content_response, mock_file_path, test_folder):
@pytest.mark.parametrize('use_upload_session_urls', [True, False])
def test_get_chunked_uploader(
mock_box_session, mock_content_response, mock_file_path, test_folder, use_upload_session_urls
):
expected_url = f'{API.UPLOAD_URL}/files/upload_sessions'
mock_file_stream = BytesIO(mock_content_response.content)
file_size = 197520
Expand All @@ -102,17 +105,55 @@ def test_get_chunked_uploader(mock_box_session, mock_content_response, mock_file
with patch('os.stat') as stat:
stat.return_value.st_size = file_size
with patch('boxsdk.object.folder.open', return_value=mock_file_stream):
chunked_uploader = test_folder.get_chunked_uploader(mock_file_path)
chunked_uploader = test_folder.get_chunked_uploader(
mock_file_path, use_upload_session_urls=use_upload_session_urls
)
mock_box_session.post.assert_called_once_with(expected_url, data=json.dumps(expected_data))
upload_session = chunked_uploader._upload_session
assert upload_session.part_size == part_size
assert upload_session.total_parts == total_parts
assert upload_session.num_parts_processed == num_parts_processed
assert upload_session.type == upload_session_type
assert upload_session.id == upload_session_id
assert upload_session._use_upload_session_urls is use_upload_session_urls
assert isinstance(chunked_uploader, ChunkedUploader)


@pytest.mark.parametrize('use_upload_session_urls', [True, False])
def test_create_upload_session(test_folder, mock_box_session, use_upload_session_urls):
expected_url = f'{API.UPLOAD_URL}/files/upload_sessions'
file_size = 197520
file_name = 'test_file.pdf'
upload_session_id = 'F971964745A5CD0C001BBE4E58196BFD'
upload_session_type = 'upload_session'
num_parts_processed = 0
total_parts = 16
part_size = 12345
expected_data = {
'folder_id': test_folder.object_id,
'file_size': file_size,
'file_name': file_name,
}
mock_box_session.post.return_value.json.return_value = {
'id': upload_session_id,
'type': upload_session_type,
'num_parts_processed': num_parts_processed,
'total_parts': total_parts,
'part_size': part_size,
}
upload_session = test_folder.create_upload_session(
file_size, file_name, use_upload_session_urls=use_upload_session_urls
)
mock_box_session.post.assert_called_once_with(expected_url, data=json.dumps(expected_data))
assert isinstance(upload_session, UploadSession)
assert upload_session.part_size == part_size
assert upload_session.total_parts == total_parts
assert upload_session.num_parts_processed == num_parts_processed
assert upload_session.type == upload_session_type
assert upload_session.id == upload_session_id
assert upload_session._use_upload_session_urls == use_upload_session_urls


@pytest.fixture()
def mock_items_response_with_marker(mock_items):
# pylint:disable=redefined-outer-name
Expand Down Expand Up @@ -334,37 +375,6 @@ def test_upload_combines_preflight_and_accelerator_calls_if_both_are_requested(
mock_box_session.options.assert_called_once()


def test_create_upload_session(test_folder, mock_box_session):
expected_url = f'{API.UPLOAD_URL}/files/upload_sessions'
file_size = 197520
file_name = 'test_file.pdf'
upload_session_id = 'F971964745A5CD0C001BBE4E58196BFD'
upload_session_type = 'upload_session'
num_parts_processed = 0
total_parts = 16
part_size = 12345
expected_data = {
'folder_id': test_folder.object_id,
'file_size': file_size,
'file_name': file_name,
}
mock_box_session.post.return_value.json.return_value = {
'id': upload_session_id,
'type': upload_session_type,
'num_parts_processed': num_parts_processed,
'total_parts': total_parts,
'part_size': part_size,
}
upload_session = test_folder.create_upload_session(file_size, file_name)
mock_box_session.post.assert_called_once_with(expected_url, data=json.dumps(expected_data))
assert isinstance(upload_session, UploadSession)
assert upload_session.part_size == part_size
assert upload_session.total_parts == total_parts
assert upload_session.num_parts_processed == num_parts_processed
assert upload_session.type == upload_session_type
assert upload_session.id == upload_session_id


def test_upload_stream_does_preflight_check_if_specified(
mock_box_session,
test_folder,
Expand Down
Loading

0 comments on commit e472942

Please sign in to comment.