Skip to content

Commit

Permalink
Merge pull request #205 from IQSS/feat/203-extra-param-html-to-markdown
Browse files Browse the repository at this point in the history
Add extra parameter to transform or not html to markdown
  • Loading branch information
ofahimIQSS authored Oct 29, 2024
2 parents 1913993 + f907dbb commit 38d68b7
Show file tree
Hide file tree
Showing 11 changed files with 211 additions and 45 deletions.
4 changes: 4 additions & 0 deletions docs/useCases.md
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,8 @@ The optional `datasetVersionId` parameter can correspond to a numeric version id

There is an optional third parameter called `includeDeaccessioned`, which indicates whether to consider deaccessioned versions or not in the dataset search. If not set, the default value is `false`.

There is an optional fourth parameter called `keepRawFields`, which indicates whether or not to keep the metadata fields as they are and avoid the transformation to Markdown. The default value is `false`.

#### Get Dataset By Private URL Token

Returns a [Dataset](../src/datasets/domain/models/Dataset.ts) instance, given an associated Private URL Token.
Expand All @@ -307,6 +309,8 @@ getPrivateUrlDataset.execute(token).then((dataset: Dataset) => {

_See [use case](../src/datasets/domain/useCases/GetPrivateUrlDataset.ts)_ definition.

There is an optional second parameter called `keepRawFields`, which indicates whether or not to keep the metadata fields as they are and avoid the transformation to Markdown. The default value is `false`.

#### Get Dataset Citation Text

Returns the Dataset citation text.
Expand Down
5 changes: 3 additions & 2 deletions src/datasets/domain/repositories/IDatasetsRepository.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,16 @@ export interface IDatasetsRepository {
getDataset(
datasetId: number | string,
datasetVersionId: string,
includeDeaccessioned: boolean
includeDeaccessioned: boolean,
keepRawFields: boolean
): Promise<Dataset>
getDatasetLocks(datasetId: number | string): Promise<DatasetLock[]>
getDatasetCitation(
datasetId: number,
datasetVersionId: string,
includeDeaccessioned: boolean
): Promise<string>
getPrivateUrlDataset(token: string): Promise<Dataset>
getPrivateUrlDataset(token: string, keepRawFields: boolean): Promise<Dataset>
getAllDatasetPreviews(
limit?: number,
offset?: number,
Expand Down
7 changes: 5 additions & 2 deletions src/datasets/domain/useCases/GetDataset.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,20 @@ export class GetDataset implements UseCase<Dataset> {
* @param {number | string} [datasetId] - The dataset identifier, which can be a string (for persistent identifiers), or a number (for numeric identifiers).
* @param {string | DatasetNotNumberedVersion} [datasetVersionId=DatasetNotNumberedVersion.LATEST] - The dataset version identifier, which can be a version-specific numeric string (for example, 1.0) or a DatasetNotNumberedVersion enum value. If this parameter is not set, the default value is: DatasetNotNumberedVersion.LATEST
* @param {boolean} [includeDeaccessioned=false] - Indicates whether to consider deaccessioned versions in the dataset search or not. The default value is false
* @param {boolean} [keepRawFields=false] - Indicates whether or not the use case should keep the metadata fields as they are and avoid the transformation to markdown. The default value is false.
* @returns {Promise<Dataset>}
*/
async execute(
datasetId: number | string,
datasetVersionId: string | DatasetNotNumberedVersion = DatasetNotNumberedVersion.LATEST,
includeDeaccessioned = false
includeDeaccessioned = false,
keepRawFields = false
): Promise<Dataset> {
return await this.datasetsRepository.getDataset(
datasetId,
datasetVersionId,
includeDeaccessioned
includeDeaccessioned,
keepRawFields
)
}
}
5 changes: 3 additions & 2 deletions src/datasets/domain/useCases/GetPrivateUrlDataset.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@ export class GetPrivateUrlDataset implements UseCase<Dataset> {
* Returns a Dataset instance, given an associated Private URL Token.
*
* @param {string} [token] - A Private URL token.
* @param {boolean} [keepRawFields=false] - Indicates whether or not the use case should keep the metadata fields as they are and avoid the transformation to markdown. The default value is false.
* @returns {Promise<Dataset>}
*/
async execute(token: string): Promise<Dataset> {
return await this.datasetsRepository.getPrivateUrlDataset(token)
async execute(token: string, keepRawFields = false): Promise<Dataset> {
return await this.datasetsRepository.getPrivateUrlDataset(token, keepRawFields)
}
}
9 changes: 5 additions & 4 deletions src/datasets/infra/repositories/DatasetsRepository.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@ export class DatasetsRepository extends ApiRepository implements IDatasetsReposi
})
}

public async getPrivateUrlDataset(token: string): Promise<Dataset> {
public async getPrivateUrlDataset(token: string, keepRawFields: boolean): Promise<Dataset> {
return this.doGet(
this.buildApiEndpoint(this.datasetsResourceName, `privateUrlDatasetVersion/${token}`),
false,
{
returnOwners: true
}
)
.then((response) => transformVersionResponseToDataset(response))
.then((response) => transformVersionResponseToDataset(response, keepRawFields))
.catch((error) => {
throw error
})
Expand All @@ -50,7 +50,8 @@ export class DatasetsRepository extends ApiRepository implements IDatasetsReposi
public async getDataset(
datasetId: number | string,
datasetVersionId: string,
includeDeaccessioned: boolean
includeDeaccessioned: boolean,
keepRawFields: boolean
): Promise<Dataset> {
return this.doGet(
this.buildApiEndpoint(this.datasetsResourceName, `versions/${datasetVersionId}`, datasetId),
Expand All @@ -61,7 +62,7 @@ export class DatasetsRepository extends ApiRepository implements IDatasetsReposi
returnOwners: true
}
)
.then((response) => transformVersionResponseToDataset(response))
.then((response) => transformVersionResponseToDataset(response, keepRawFields))
.catch((error) => {
throw error
})
Expand Down
47 changes: 34 additions & 13 deletions src/datasets/infra/repositories/transformers/datasetTransformers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -209,12 +209,18 @@ export const transformMetadataChildFieldValueToRequestPayload = (
return metadataChildFieldRequestPayload
}

export const transformVersionResponseToDataset = (response: AxiosResponse): Dataset => {
export const transformVersionResponseToDataset = (
response: AxiosResponse,
keepRawFields: boolean
): Dataset => {
const versionPayload = response.data.data
return transformVersionPayloadToDataset(versionPayload)
return transformVersionPayloadToDataset(versionPayload, keepRawFields)
}

export const transformVersionPayloadToDataset = (versionPayload: DatasetPayload): Dataset => {
export const transformVersionPayloadToDataset = (
versionPayload: DatasetPayload,
keepRawFields: boolean
): Dataset => {
const datasetModel: Dataset = {
id: versionPayload.datasetId,
versionId: versionPayload.id,
Expand All @@ -227,7 +233,10 @@ export const transformVersionPayloadToDataset = (versionPayload: DatasetPayload)
lastUpdateTime: new Date(versionPayload.lastUpdateTime),
releaseTime: new Date(versionPayload.releaseTime)
},
metadataBlocks: transformPayloadToDatasetMetadataBlocks(versionPayload.metadataBlocks),
metadataBlocks: transformPayloadToDatasetMetadataBlocks(
versionPayload.metadataBlocks,
keepRawFields
),
...(versionPayload.isPartOf && {
isPartOf: transformPayloadToOwnerNode(versionPayload.isPartOf)
})
Expand Down Expand Up @@ -260,25 +269,28 @@ const transformPayloadToDatasetLicense = (licensePayload: LicensePayload): Datas
}

const transformPayloadToDatasetMetadataBlocks = (
metadataBlocksPayload: MetadataBlocksPayload
metadataBlocksPayload: MetadataBlocksPayload,
keepRawFields: boolean
): DatasetMetadataBlocks => {
return Object.keys(metadataBlocksPayload).map((metadataBlockKey) => {
const metadataBlock = metadataBlocksPayload[metadataBlockKey]
return {
name: metadataBlock.name,
fields: transformPayloadToDatasetMetadataFields(metadataBlock.fields)
fields: transformPayloadToDatasetMetadataFields(metadataBlock.fields, keepRawFields)
}
}) as DatasetMetadataBlocks
}

const transformPayloadToDatasetMetadataFields = (
metadataFieldsPayload: MetadataFieldPayload[]
metadataFieldsPayload: MetadataFieldPayload[],
keepRawFields: boolean
): DatasetMetadataFields => {
return metadataFieldsPayload.reduce(
(datasetMetadataFieldsMap: DatasetMetadataFields, field: MetadataFieldPayload) => {
datasetMetadataFieldsMap[field.typeName] = transformPayloadToDatasetMetadataFieldValue(
field.value,
field.typeClass
field.typeClass,
keepRawFields
)
return datasetMetadataFieldsMap
},
Expand All @@ -288,7 +300,8 @@ const transformPayloadToDatasetMetadataFields = (

const transformPayloadToDatasetMetadataFieldValue = (
metadataFieldValuePayload: MetadataFieldValuePayload,
typeClass: string
typeClass: string,
keepRawFields: boolean
): DatasetMetadataFieldValue => {
function isArrayOfSubfieldValue(
array: (string | MetadataSubfieldValuePayload)[]
Expand All @@ -301,29 +314,37 @@ const transformPayloadToDatasetMetadataFieldValue = (
}

if (typeof metadataFieldValuePayload === 'string') {
if (keepRawFields) {
return metadataFieldValuePayload
}
return transformHtmlToMarkdown(metadataFieldValuePayload)
} else if (Array.isArray(metadataFieldValuePayload)) {
if (isArrayOfSubfieldValue(metadataFieldValuePayload)) {
return metadataFieldValuePayload.map((metadataSubfieldValuePayload) =>
transformPayloadToDatasetMetadataSubfieldValue(metadataSubfieldValuePayload)
transformPayloadToDatasetMetadataSubfieldValue(metadataSubfieldValuePayload, keepRawFields)
)
} else {
if (keepRawFields) {
return metadataFieldValuePayload
}
return metadataFieldValuePayload.map(transformHtmlToMarkdown)
}
} else {
return transformPayloadToDatasetMetadataSubfieldValue(
metadataFieldValuePayload as MetadataSubfieldValuePayload
metadataFieldValuePayload as MetadataSubfieldValuePayload,
keepRawFields
)
}
}

const transformPayloadToDatasetMetadataSubfieldValue = (
metadataSubfieldValuePayload: MetadataSubfieldValuePayload
metadataSubfieldValuePayload: MetadataSubfieldValuePayload,
keepRawFields: boolean
): DatasetMetadataSubField => {
const result: DatasetMetadataSubField = {}
Object.keys(metadataSubfieldValuePayload).forEach((key) => {
const subFieldValue = metadataSubfieldValuePayload[key].value
result[key] = transformHtmlToMarkdown(subFieldValue)
result[key] = keepRawFields ? subFieldValue : transformHtmlToMarkdown(subFieldValue)
})
return result
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ export const transformFileResponseToFile = (
if (returnDatasetVersion) {
return [
transformFilePayloadToFile(filePayload),
transformVersionPayloadToDataset(filePayload.datasetVersion)
transformVersionPayloadToDataset(filePayload.datasetVersion, false)
]
}
return transformFilePayloadToFile(filePayload)
Expand Down
109 changes: 109 additions & 0 deletions test/functional/datasets/GetDataset.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import { ApiConfig, createDataset, getDataset, ReadError } from '../../../src'
import { DataverseApiAuthMechanism } from '../../../src/core/infra/repositories/ApiConfig'
import { DatasetDescription } from '../../../src/datasets/domain/models/Dataset'
import { deleteUnpublishedDatasetViaApi } from '../../testHelpers/datasets/datasetHelper'
import { TestConstants } from '../../testHelpers/TestConstants'

const testNewDataset = {
metadataBlockValues: [
{
name: 'citation',
fields: {
title: 'Dataset created using the createDataset use case',
author: [
{
authorName: 'Admin, Dataverse',
authorAffiliation: 'Dataverse.org'
},
{
authorName: 'Owner, Dataverse',
authorAffiliation: 'Dataversedemo.org'
}
],
datasetContact: [
{
datasetContactEmail: '[email protected]',
datasetContactName: 'Finch, Fiona'
}
],
dsDescription: [
{
dsDescriptionValue: 'Hello <b>world</b>'
}
],
subject: ['Medicine, Health and Life Sciences']
}
}
]
}

describe('execute', () => {
beforeEach(async () => {
ApiConfig.init(
TestConstants.TEST_API_URL,
DataverseApiAuthMechanism.API_KEY,
process.env.TEST_API_KEY
)
})

test('should successfully get a dataset when a valid id is sent', async () => {
const createdDatasetIdentifiers = await createDataset.execute(testNewDataset)

const dataset = await getDataset.execute(createdDatasetIdentifiers.numericId)
expect(dataset).not.toBeNull()
expect(dataset.id).toBe(createdDatasetIdentifiers.numericId)

await deleteUnpublishedDatasetViaApi(createdDatasetIdentifiers.numericId)
})

test('should successfully get a dataset when a valid persistent id is sent', async () => {
const createdDatasetIdentifiers = await createDataset.execute(testNewDataset)

const dataset = await getDataset.execute(createdDatasetIdentifiers.persistentId)
expect(dataset).not.toBeNull()
expect(dataset.id).toBe(createdDatasetIdentifiers.numericId)

await deleteUnpublishedDatasetViaApi(createdDatasetIdentifiers.numericId)
})

test('should throw an error when an invalid id is sent', async () => {
const nonExistentTestDatasetId = 'non-existent-dataset'
const expectedError = new ReadError(`[400] Bad dataset ID number: ${nonExistentTestDatasetId}.`)

await expect(getDataset.execute(nonExistentTestDatasetId)).rejects.toThrow(expectedError)
})

test('should return metadata fields in markdown format when keepRawFields is false', async () => {
const createdDatasetIdentifiers = await createDataset.execute(testNewDataset)

const dataset = await getDataset.execute(
createdDatasetIdentifiers.numericId,
undefined,
false,
false
)

expect(
(dataset.metadataBlocks[0].fields.dsDescription[0] as DatasetDescription).dsDescriptionValue
).toBe('Hello **world**')

await deleteUnpublishedDatasetViaApi(createdDatasetIdentifiers.numericId)
})

test('should not return metadata fields in markdown format when keepRawFields is true', async () => {
const createdDatasetIdentifiers = await createDataset.execute(testNewDataset)

const dataset = await getDataset.execute(
createdDatasetIdentifiers.numericId,
undefined,
false,
true
)

expect(
(dataset.metadataBlocks[0].fields.dsDescription[0] as DatasetDescription).dsDescriptionValue
).toBe('Hello <b>world</b>')

await deleteUnpublishedDatasetViaApi(createdDatasetIdentifiers.numericId)
})
})
Loading

0 comments on commit 38d68b7

Please sign in to comment.