Skip to content

Commit

Permalink
AICORE-568: no filter on find similar op
Browse files Browse the repository at this point in the history
  • Loading branch information
Andrei Nechaev committed Jan 27, 2022
1 parent 0e36d07 commit 394da14
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 66 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import static org.nuxeo.ai.sdk.rest.Common.UID;
import static org.nuxeo.ai.sdk.rest.Common.XPATH_PARAM;
import static org.nuxeo.ai.similar.content.DedupConstants.DEDUPLICATION_FACET;
import static org.nuxeo.ai.similar.content.DedupConstants.NOT_DUPLICATE_TAG;
import static org.nuxeo.ai.similar.content.pipelines.IndexAction.INDEX_ACTION_NAME;
import static org.nuxeo.ai.similar.content.utils.PictureUtils.resize;
import static org.nuxeo.ecm.core.bulk.BulkServiceImpl.STATUS_PREFIX;
Expand All @@ -39,7 +38,6 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.annotation.Nullable;
import org.apache.commons.lang3.StringUtils;
Expand Down Expand Up @@ -208,17 +206,7 @@ public List<DocumentModel> findSimilar(CoreSession session, DocumentModel doc, S
parameters.put(XPATH_PARAM, xpath);

List<String> ids = client.api(API.Dedup.FIND).call(parameters, null);
if (ids == null || ids.isEmpty()) {
log.debug("No similar documents found");
return emptyList();
}

List<DocumentRef> refs = ids.stream().map(IdRef::new).filter(session::exists).collect(Collectors.toList());
if (refs.size() != ids.size()) {
log.warn("Deduplication found some nonexistent document, consider reindexing");
}

return session.getDocuments(refs.toArray(new DocumentRef[] {}));
return resolveDocuments(session, ids);
}

@Override
Expand Down Expand Up @@ -291,31 +279,11 @@ protected List<DocumentModel> resolveDocuments(CoreSession session, List<String>
return emptyList();
}

List<DocumentRef> refs = ids.stream().map(IdRef::new).filter(session::exists).collect(Collectors.toList());
if (refs.size() != ids.size()) {
DocumentRef[] refs = ids.stream().map(IdRef::new).filter(session::exists).toArray(DocumentRef[]::new);
if (refs.length != ids.size()) {
log.warn("Deduplication found some nonexistent document, consider reindexing");
}

return session.getDocuments(refs.toArray(new DocumentRef[] {}))
.stream()
.filter(this::tagNotDuplicateEmpty)
.collect(Collectors.toList());
}

protected boolean tagNotDuplicateEmpty(DocumentModel doc) {
@SuppressWarnings("unchecked")
List<Map<String, Serializable>> tags = (List<Map<String, Serializable>>) doc.getPropertyValue("nxtag:tags");
if (tags == null || tags.isEmpty()) {
return true;
}

for (Map<String, Serializable> tag : tags) {
if (tag.containsKey("label") && tag.get("label").equals(NOT_DUPLICATE_TAG)) {
return false;
}
}

return true;
return session.getDocuments(refs);
}

@Nullable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,10 @@
import static com.github.tomakehurst.wiremock.core.WireMockConfiguration.options;
import static org.assertj.core.api.Assertions.assertThat;
import static org.nuxeo.ai.pipes.functions.PropertyUtils.FILE_CONTENT;
import static org.nuxeo.ai.similar.content.DedupConstants.NOT_DUPLICATE_TAG;

import java.io.IOException;
import java.io.Serializable;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.inject.Inject;
import org.junit.Rule;
import org.junit.Test;
Expand Down Expand Up @@ -117,32 +113,6 @@ public void shouldRunOperationOnBlob() throws OperationException {
assertThat(response).isNotEmpty();
}

@Test
public void shouldRunOperationOnTaggedWithBlob() throws OperationException {
Blob textBlob = Blobs.createBlob("this is a blob");
DocumentModel fileDoc = session.createDocumentModel("/", "TestFile", "File");
fileDoc.setPropertyValue(FILE_CONTENT, (Serializable) textBlob);

Map<String, Serializable> tag = new HashMap<>();
tag.put("label", NOT_DUPLICATE_TAG);
tag.put("username", "test");
List<Map<String, Serializable>> tags = Collections.singletonList(tag);
fileDoc.setPropertyValue("nxtag:tags", (Serializable) tags);

fileDoc = session.createDocument(fileDoc);
session.save();

String url = "/api/v1/ai/dedup/mockTestProject/find?distance=0&xpath=file:content";
stubFor(WireMock.post(url).willReturn(okJson("[\"" + fileDoc.getId() + "\"]")));

OperationContext ctx = new OperationContext(session);
ctx.setInput(textBlob);
ctx.put("xpath", FILE_CONTENT);
@SuppressWarnings("unchecked")
List<DocumentModel> response = (List<DocumentModel>) automationService.run(ctx, FindSimilar.ID);
assertThat(response).isEmpty();
}

@Test
public void shouldRunOperationOnBlobFromBatchUpload() throws OperationException, IOException {
Blob textBlob = Blobs.createBlob("this is a blob");
Expand Down

0 comments on commit 394da14

Please sign in to comment.