From f0a221d1822e6a031ce94f1a66d01c3d5dc28e59 Mon Sep 17 00:00:00 2001 From: oU-Ua Date: Tue, 30 Jul 2024 11:51:06 +0900 Subject: [PATCH 1/4] =?UTF-8?q?feat:=20=EC=9D=B4=EB=AF=B8=EC=A7=80=20ocr?= =?UTF-8?q?=20=EA=B8=B0=EB=8A=A5=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- build.gradle | 5 + .../easylead/common/error/ErrorCode.java | 11 +- .../easylead/easylead/config/S3Config.java | 28 ++++ .../domain/gpt/service/GptService.java | 57 ++++++++- .../domain/text/business/TextBusiness.java | 48 +++++++ .../text/controller/TextController.java | 25 ++++ .../domain/text/converter/TextConverter.java | 16 +++ .../domain/text/dto/TextFileResDTO.java | 13 ++ .../text/service/GoogleVisionService.java | 55 ++++++++ .../domain/text/service/S3Service.java | 120 ++++++++++++++++++ .../domain/text/service/TextService.java | 22 ++++ 11 files changed, 398 insertions(+), 2 deletions(-) create mode 100644 src/main/java/com/easylead/easylead/config/S3Config.java create mode 100644 src/main/java/com/easylead/easylead/domain/text/business/TextBusiness.java create mode 100644 src/main/java/com/easylead/easylead/domain/text/controller/TextController.java create mode 100644 src/main/java/com/easylead/easylead/domain/text/converter/TextConverter.java create mode 100644 src/main/java/com/easylead/easylead/domain/text/dto/TextFileResDTO.java create mode 100644 src/main/java/com/easylead/easylead/domain/text/service/GoogleVisionService.java create mode 100644 src/main/java/com/easylead/easylead/domain/text/service/S3Service.java create mode 100644 src/main/java/com/easylead/easylead/domain/text/service/TextService.java diff --git a/build.gradle b/build.gradle index a728d3f..be8e50d 100644 --- a/build.gradle +++ b/build.gradle @@ -39,6 +39,11 @@ dependencies { annotationProcessor 'org.projectlombok:lombok' testImplementation 'org.springframework.boot:spring-boot-starter-test' testRuntimeOnly 'org.junit.platform:junit-platform-launcher' + + implementation platform('com.amazonaws:aws-java-sdk-bom:1.12.529') + implementation 'com.amazonaws:aws-java-sdk-s3' + + implementation 'com.google.cloud:google-cloud-vision:3.44.0' } tasks.named('test') { diff --git a/src/main/java/com/easylead/easylead/common/error/ErrorCode.java b/src/main/java/com/easylead/easylead/common/error/ErrorCode.java index a3c2824..c60c263 100644 --- a/src/main/java/com/easylead/easylead/common/error/ErrorCode.java +++ b/src/main/java/com/easylead/easylead/common/error/ErrorCode.java @@ -7,7 +7,16 @@ @AllArgsConstructor @Getter public enum ErrorCode { - SERVER_ERROR("G500",HttpStatus.INTERNAL_SERVER_ERROR, "요청 수행 중 서버 에러 발생"); + + SERVER_ERROR("G500",HttpStatus.INTERNAL_SERVER_ERROR, "요청 수행 중 서버 에러 발생"), + // S3 관련 에러 코드 + EMPTY_FILE_EXCEPTION("F500-1", HttpStatus.BAD_REQUEST, "빈 파일입니다."), + NO_FILE_EXTENTION("F500-2", HttpStatus.BAD_REQUEST, "확장자가 없습니다."), + INVALID_FILE_EXTENTION("F500-3", HttpStatus.BAD_REQUEST, "부적절한 확장자입니다."), + IO_EXCEPTION_ON_IMAGE_UPLOAD("F502-1", HttpStatus.INTERNAL_SERVER_ERROR, "이미지 업로드 중 에러가 발생했습니다."), + IO_EXCEPTION_ON_FILE_UPLOAD("F502-1", HttpStatus.INTERNAL_SERVER_ERROR, "파일 업로드 중 에러가 발생했습니다."), + PUT_OBJECT_EXCEPTION("F502-2", HttpStatus.INTERNAL_SERVER_ERROR, "S3에 이미지 업로드 중 에러가 발생했습니다."), + IO_EXCEPTION_ON_IMAGE_DELETE("F502-3", HttpStatus.INTERNAL_SERVER_ERROR, "이미지 삭제 중 에러가 발생했습니다."); private final String errorCode; private final HttpStatus httpStatusCode; diff --git a/src/main/java/com/easylead/easylead/config/S3Config.java b/src/main/java/com/easylead/easylead/config/S3Config.java new file mode 100644 index 0000000..5e842e3 --- /dev/null +++ b/src/main/java/com/easylead/easylead/config/S3Config.java @@ -0,0 +1,28 @@ +package com.easylead.easylead.config; + +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.services.s3.AmazonS3Client; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Configuration +public class S3Config { + @Value("${cloud.aws.credentials.access-key}") + private String accessKey; + @Value("${cloud.aws.credentials.secret-key}") + private String secretKey; + @Value("${cloud.aws.region.static}") + private String region; + + @Bean + public AmazonS3Client amazonS3Client() { + BasicAWSCredentials awsCredentials= new BasicAWSCredentials(accessKey, secretKey); + return (AmazonS3Client) AmazonS3ClientBuilder.standard() + .withRegion(region) + .withCredentials(new AWSStaticCredentialsProvider(awsCredentials)) + .build(); + } +} diff --git a/src/main/java/com/easylead/easylead/domain/gpt/service/GptService.java b/src/main/java/com/easylead/easylead/domain/gpt/service/GptService.java index 35323bd..f7575ba 100644 --- a/src/main/java/com/easylead/easylead/domain/gpt/service/GptService.java +++ b/src/main/java/com/easylead/easylead/domain/gpt/service/GptService.java @@ -205,6 +205,22 @@ public Flux askCustomStream(String text) throws JsonProcessingException List messages = new ArrayList<>(); + // 시스템 역할 설정 + messages.add(new Message("\"너는 입력받은 한국어를 쉬운 한국어로 변환해주는 도우미야. " + + "다음 조건들을 모두 충족하는 내용으로 변환해서 알려줘. \\\\n " + + "1. 입력받은 한국어 문장을 이해하기 쉬운 한국어 문장으로 변환해줘. " + + "2. 문장은 간결하게, 한 문장이 길어지면 두 문장으로 나눠서 변환해줘. " + + "3. 꾸미는 말 빼고, 이어진 문장은 두 개의 문장으로 변환해줘. " + + "4. 주어를 중심으로 알기 쉽게 변환해줘. " + + "5. 최대한 능동형 문장으로, 서술식의 구어체(-합니다, -입니다)로 변환해줘. " + + "6. 추상적 표현과 비유는 자제하도록 해. " + + "7. 이중부정 문장은 이해하기 쉬운 문장으로 바꿔. " + + "8. 한 문장에 한 줄씩 적어야해. " + + "9. 대화문은 문장 전후로 한 줄 띄어줘. " + + "10. 단어는 일상생활에서 자주 쓰는, 가능한 짧고 이해하기 쉬운 단어로 사용하도록 해. " + + "11. 한자어나 외국어를 풀어서 쉬운 말로 변환해. " + + "12. 약어가 있으면 다음 문장에 설명을 추가해.", "system")); + messages.add(new Message(text,"user")); ChatGPTRequestDTO chatGptRequest = new ChatGPTRequestDTO( @@ -228,9 +244,24 @@ public HttpRequest requestGPTCustom(String text) throws JsonProcessingException List messages = new ArrayList<>(); // Assistant API 사용할지 Prompt를 변경할지 선택하기 // 시스템 역할 설정 + messages.add(new Message("\"너는 입력받은 한국어를 쉬운 한국어로 변환해주는 도우미야. " + + "다음 조건들을 모두 충족하는 내용으로 변환해서 알려줘. \\\\n " + + "1. 입력받은 한국어 문장을 이해하기 쉬운 한국어 문장으로 변환해줘. " + + "2. 문장은 간결하게, 한 문장이 길어지면 두 문장으로 나눠서 변환해줘. " + + "3. 꾸미는 말 빼고, 이어진 문장은 두 개의 문장으로 변환해줘. " + + "4. 주어를 중심으로 알기 쉽게 변환해줘. " + + "5. 최대한 능동형 문장으로, 서술식의 구어체(-합니다, -입니다)로 변환해줘. " + + "6. 추상적 표현과 비유는 자제하도록 해. " + + "7. 이중부정 문장은 이해하기 쉬운 문장으로 바꿔. " + + "8. 한 문장에 한 줄씩 적어야해. " + + "9. 대화문은 문장 전후로 한 줄 띄어줘. " + + "10. 단어는 일상생활에서 자주 쓰는, 가능한 짧고 이해하기 쉬운 단어로 사용하도록 해. " + + "11. 한자어나 외국어를 풀어서 쉬운 말로 변환해. " + + "12. 약어가 있으면 다음 문장에 설명을 추가해.", "system")); + messages.add(new Message(text, "user")); - ChatGPTRequestDTO chatGptRequest = new ChatGPTRequestDTO("ft:gpt-3.5-turbo-0125:personal::9ldfWO0p", messages, 0.3,false); + ChatGPTRequestDTO chatGptRequest = new ChatGPTRequestDTO("ft:gpt-3.5-turbo-0613:personal::9prSIgJ8", messages, 0.3,false); String input = null; input = mapper.writeValueAsString(chatGptRequest); System.out.println(input); @@ -296,4 +327,28 @@ public String responseDalle(HttpRequest request) throws JsonProcessingException } + public HttpRequest requestImgPrompt(String reqText) throws JsonProcessingException { + ObjectMapper mapper = new ObjectMapper(); + List messages = new ArrayList<>(); + // Assistant API 사용할지 Prompt를 변경할지 선택하기 + // 시스템 역할 설정 + messages.add(new Message("너는 동화책 삽화에 대해 잘알고, 내용에 중요한 부분을 삽화 프롬프트로 작성할 수 있는 전문가야. ", "system")); + + messages.add(new Message(reqText+"\n\n 이 내용을 토대로 동화책 삽화 1개만 그리고 싶어. 따뜻한 느낌의 동화책에 맞는 그림체로 삽화 만드는 프롬프트 작성해줘 ", "user")); + + ChatGPTRequestDTO chatGptRequest = new ChatGPTRequestDTO("gpt-4", messages, 0.3,false); + String input = null; + input = mapper.writeValueAsString(chatGptRequest); + System.out.println(input); + System.out.println("apikey : " + gptApiKey); + + HttpRequest request = HttpRequest.newBuilder() + .uri(URI.create("https://api.openai.com/v1/chat/completions")) + .header("Content-Type", "application/json") + .header("Authorization", "Bearer " + gptApiKey) + .POST(HttpRequest.BodyPublishers.ofString(input)) + .build(); + + return request; + } } diff --git a/src/main/java/com/easylead/easylead/domain/text/business/TextBusiness.java b/src/main/java/com/easylead/easylead/domain/text/business/TextBusiness.java new file mode 100644 index 0000000..2b05ccc --- /dev/null +++ b/src/main/java/com/easylead/easylead/domain/text/business/TextBusiness.java @@ -0,0 +1,48 @@ +package com.easylead.easylead.domain.text.business; + +import com.easylead.easylead.common.annotation.Business; +import com.easylead.easylead.common.error.ErrorCode; +import com.easylead.easylead.common.exception.ApiException; +import com.easylead.easylead.domain.gpt.service.GptService; +import com.easylead.easylead.domain.text.converter.TextConverter; +import com.easylead.easylead.domain.text.dto.TextFileResDTO; +import com.easylead.easylead.domain.text.service.TextService; +import com.fasterxml.jackson.core.JsonProcessingException; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.web.multipart.MultipartFile; + +import java.net.http.HttpRequest; +import java.util.Objects; + +@Business +@RequiredArgsConstructor +@Slf4j +public class TextBusiness { + + private final TextService textService; + private final GptService gptService; + private final TextConverter textConverter; + + + public TextFileResDTO easyToRead(MultipartFile file) throws JsonProcessingException { + if (file.isEmpty() || Objects.isNull(file.getOriginalFilename())) { + throw new ApiException(ErrorCode.EMPTY_FILE_EXCEPTION); + } + String fileUrl = textService.uploadFile(file); + + log.info("=========== fileUrl : "+fileUrl+"============"); + + String reqText = textService.detectTextImage(fileUrl); + + log.info("=========== reqText : "+reqText+"============"); + + HttpRequest request = gptService.requestGPTCustom(reqText); + +// HttpRequest requestImgPrompt = gptService.requestImgPrompt(reqText); +// String prompt = gptService.responseGPT(requestImgPrompt); +// HttpRequest requestImg = gptService.requestGPTImage(prompt); +// String imgUrl = gptService.responseDalle(requestImg); + return textConverter.toTextFileResDTO(gptService.responseGPT(request),"imgUrl"); + } +} diff --git a/src/main/java/com/easylead/easylead/domain/text/controller/TextController.java b/src/main/java/com/easylead/easylead/domain/text/controller/TextController.java new file mode 100644 index 0000000..9cbb7f7 --- /dev/null +++ b/src/main/java/com/easylead/easylead/domain/text/controller/TextController.java @@ -0,0 +1,25 @@ +package com.easylead.easylead.domain.text.controller; + +import com.easylead.easylead.domain.text.business.TextBusiness; +import com.easylead.easylead.domain.text.dto.TextFileResDTO; +import com.fasterxml.jackson.core.JsonProcessingException; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.*; +import org.springframework.web.multipart.MultipartFile; + +@Slf4j +@RestController +@RequiredArgsConstructor +@RequestMapping("/text") +@CrossOrigin(origins = "*", allowedHeaders = "*") +public class TextController { + private final TextBusiness textBusiness; + + @PostMapping("/multi") + public ResponseEntity easyToReadFile(@RequestPart(value = "file") MultipartFile file) throws JsonProcessingException { + return ResponseEntity.ok(textBusiness.easyToRead(file)); + } + +} diff --git a/src/main/java/com/easylead/easylead/domain/text/converter/TextConverter.java b/src/main/java/com/easylead/easylead/domain/text/converter/TextConverter.java new file mode 100644 index 0000000..af8554f --- /dev/null +++ b/src/main/java/com/easylead/easylead/domain/text/converter/TextConverter.java @@ -0,0 +1,16 @@ +package com.easylead.easylead.domain.text.converter; + +import com.easylead.easylead.common.annotation.Converter; +import com.easylead.easylead.domain.text.dto.TextFileResDTO; +import lombok.RequiredArgsConstructor; + +@Converter +@RequiredArgsConstructor +public class TextConverter { + public TextFileResDTO toTextFileResDTO(String resText, String imgUrl) { + return TextFileResDTO.builder() + .img(imgUrl) + .text(resText) + .build(); + } +} diff --git a/src/main/java/com/easylead/easylead/domain/text/dto/TextFileResDTO.java b/src/main/java/com/easylead/easylead/domain/text/dto/TextFileResDTO.java new file mode 100644 index 0000000..b93f962 --- /dev/null +++ b/src/main/java/com/easylead/easylead/domain/text/dto/TextFileResDTO.java @@ -0,0 +1,13 @@ +package com.easylead.easylead.domain.text.dto; + +import lombok.*; + +@NoArgsConstructor +@AllArgsConstructor +@Getter +@Setter +@Builder +public class TextFileResDTO { + String text; + String img; +} diff --git a/src/main/java/com/easylead/easylead/domain/text/service/GoogleVisionService.java b/src/main/java/com/easylead/easylead/domain/text/service/GoogleVisionService.java new file mode 100644 index 0000000..18477fb --- /dev/null +++ b/src/main/java/com/easylead/easylead/domain/text/service/GoogleVisionService.java @@ -0,0 +1,55 @@ +package com.easylead.easylead.domain.text.service; + +import com.google.cloud.vision.v1.*; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Component; +import org.springframework.util.StopWatch; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +@Slf4j +@RequiredArgsConstructor +@Component +public class GoogleVisionService { + public String detechString(String url){ + StopWatch totalTime = new StopWatch(); + totalTime.start(); + + List requests = new ArrayList<>(); + + ImageSource imgSource = ImageSource.newBuilder().setImageUri(url).build(); + Image img = Image.newBuilder().setSource(imgSource).build(); + Feature feat = Feature.newBuilder().setType(Feature.Type.TEXT_DETECTION).build(); + AnnotateImageRequest request = + AnnotateImageRequest.newBuilder().addFeatures(feat).setImage(img).build(); + requests.add(request); + + try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) { + BatchAnnotateImagesResponse response = client.batchAnnotateImages(requests); + List responses = response.getResponsesList(); + + StringBuilder result = new StringBuilder(); + for (AnnotateImageResponse res : responses) { + if (res.hasError()) { + System.out.format("Error: %s%n", res.getError().getMessage()); + return null; + } + + for (EntityAnnotation annotation : res.getTextAnnotationsList()) { + result.append(annotation.getDescription()).append(" "); + } + } + + totalTime.stop(); + System.out.println("Total Time : " + totalTime.getTotalTimeMillis() + "ms"); + + return result.toString(); + } + catch (Exception exception) { + return exception.getMessage(); + } + } +} diff --git a/src/main/java/com/easylead/easylead/domain/text/service/S3Service.java b/src/main/java/com/easylead/easylead/domain/text/service/S3Service.java new file mode 100644 index 0000000..3ac7822 --- /dev/null +++ b/src/main/java/com/easylead/easylead/domain/text/service/S3Service.java @@ -0,0 +1,120 @@ +package com.easylead.easylead.domain.text.service; + + +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.model.ObjectMetadata; +import com.amazonaws.services.s3.model.PutObjectRequest; +import com.amazonaws.util.IOUtils; +import com.easylead.easylead.common.error.ErrorCode; +import com.easylead.easylead.common.exception.ApiException; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; +import org.springframework.web.multipart.MultipartFile; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; +import java.util.UUID; + +@Slf4j +@RequiredArgsConstructor +@Component +public class S3Service { + + private final AmazonS3 amazonS3; + + @Value("${cloud.aws.s3.bucketName}") + private String bucketName; + + + public String uploadImage(MultipartFile image) { + this.validateImageFileExtention(image.getOriginalFilename()); + try { + return this.uploadImageToS3(image); + } catch (IOException e) { + throw new ApiException(ErrorCode.IO_EXCEPTION_ON_IMAGE_UPLOAD); + } + } + + public String uploadPDF(MultipartFile file) { + this.validatePDFExtention(file.getOriginalFilename()); + try { + return this.uploadImageToS3(file); + } catch (IOException e) { + throw new ApiException(ErrorCode.IO_EXCEPTION_ON_FILE_UPLOAD); + } + } + + /** + * 이미지 파일의 확장자 명이 올바른지 확인 + */ + private void validateImageFileExtention(String filename) { + int lastDotIndex = filename.lastIndexOf("."); + if (lastDotIndex == -1) { + throw new ApiException(ErrorCode.NO_FILE_EXTENTION); + } + + String extention = filename.substring(lastDotIndex + 1).toLowerCase(); + List allowedExtentionList = Arrays.asList("jpg", "jpeg", "png", "gif"); + + if (!allowedExtentionList.contains(extention)) { + throw new ApiException(ErrorCode.INVALID_FILE_EXTENTION); + } + } + + /** + * PDF 파일의 확장자 명이 올바른지 확인 + */ + private void validatePDFExtention(String filename) { + int lastDotIndex = filename.lastIndexOf("."); + if (lastDotIndex == -1) { + throw new ApiException(ErrorCode.NO_FILE_EXTENTION); + } + + String extension = filename.substring(lastDotIndex + 1).toLowerCase(); + if (!extension.equals("pdf")) { + throw new ApiException(ErrorCode.INVALID_FILE_EXTENTION); + } + } + + /** + * 실제 S3에 이미지 업로드하는 메서드 + */ + private String uploadImageToS3(MultipartFile image) throws IOException { + String originalFilename = image.getOriginalFilename(); //원본 파일 명 + String extension = originalFilename.substring(originalFilename.lastIndexOf(".")); //확장자 명 + + String s3FileName = + UUID.randomUUID().toString().substring(0, 10) + originalFilename; //실제 S3에 저장될 파일 명 + + InputStream is = image.getInputStream(); + byte[] bytes = IOUtils.toByteArray(is); // image를 byte 배열로 변환 + + ObjectMetadata metadata = new ObjectMetadata(); // metadata 생성 + metadata.setContentType("image/" + extension); + metadata.setContentLength(bytes.length); + + // S3에 요청할 때 사용할 byteInputStream 생성 + ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(bytes); + + try { + PutObjectRequest putObjectRequest = + new PutObjectRequest(bucketName, s3FileName, byteArrayInputStream, metadata); + + // 실제 S3에 이미지 업로드하는 코드 + amazonS3.putObject(putObjectRequest); + } catch (Exception e) { + throw new ApiException(ErrorCode.PUT_OBJECT_EXCEPTION, e); + } finally { + byteArrayInputStream.close(); + is.close(); + } + + return amazonS3.getUrl(bucketName, s3FileName).toString(); + } +} \ No newline at end of file diff --git a/src/main/java/com/easylead/easylead/domain/text/service/TextService.java b/src/main/java/com/easylead/easylead/domain/text/service/TextService.java new file mode 100644 index 0000000..f5b58da --- /dev/null +++ b/src/main/java/com/easylead/easylead/domain/text/service/TextService.java @@ -0,0 +1,22 @@ +package com.easylead.easylead.domain.text.service; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Service; +import org.springframework.web.multipart.MultipartFile; + +@Service +@Slf4j +@RequiredArgsConstructor +public class TextService { + private final S3Service s3Service; + private final GoogleVisionService googleVisionService; + + public String detectTextImage(String imgUrl){ + return googleVisionService.detechString(imgUrl); + } + + public String uploadFile(MultipartFile file) { + return s3Service.uploadImage(file); + } +} From 9bdec859b10feade3e9acc8f7430f506007460fd Mon Sep 17 00:00:00 2001 From: oU-Ua Date: Tue, 30 Jul 2024 19:37:02 +0900 Subject: [PATCH 2/4] =?UTF-8?q?feat:=20pdf=20ocr=20=EA=B8=B0=EB=8A=A5=20?= =?UTF-8?q?=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- build.gradle | 6 +++ .../easylead/domain/books/entity/Book.java | 9 ++-- .../domain/content/entity/Content.java | 26 ++++++++++ .../domain/content/entity/ContentId.java | 19 ++++++++ .../easylead/domain/read/entity/Read.java | 43 +++++++++++++++++ .../easylead/domain/read/entity/ReadId.java | 19 ++++++++ .../domain/request/entity/Progress.java | 16 +++++++ .../domain/request/entity/Request.java | 46 ++++++++++++++++++ .../domain/request/entity/RequestId.java | 19 ++++++++ .../domain/text/business/TextBusiness.java | 28 +++++++---- .../text/controller/TextController.java | 22 ++++++++- .../domain/text/converter/TextConverter.java | 3 +- .../domain/text/dto/TextFileResDTO.java | 1 - .../text/service/GoogleStorageService.java | 48 +++++++++++++++++++ .../text/service/GoogleVisionService.java | 1 + .../domain/text/service/S3Service.java | 4 +- .../domain/text/service/TextService.java | 16 +++++-- .../easylead/domain/users/entity/Users.java | 9 ++-- src/main/resources/application.properties | 4 +- 19 files changed, 311 insertions(+), 28 deletions(-) create mode 100644 src/main/java/com/easylead/easylead/domain/content/entity/Content.java create mode 100644 src/main/java/com/easylead/easylead/domain/content/entity/ContentId.java create mode 100644 src/main/java/com/easylead/easylead/domain/read/entity/Read.java create mode 100644 src/main/java/com/easylead/easylead/domain/read/entity/ReadId.java create mode 100644 src/main/java/com/easylead/easylead/domain/request/entity/Progress.java create mode 100644 src/main/java/com/easylead/easylead/domain/request/entity/Request.java create mode 100644 src/main/java/com/easylead/easylead/domain/request/entity/RequestId.java create mode 100644 src/main/java/com/easylead/easylead/domain/text/service/GoogleStorageService.java diff --git a/build.gradle b/build.gradle index be8e50d..1f7e73e 100644 --- a/build.gradle +++ b/build.gradle @@ -40,10 +40,16 @@ dependencies { testImplementation 'org.springframework.boot:spring-boot-starter-test' testRuntimeOnly 'org.junit.platform:junit-platform-launcher' + //이미지 저장을 위한 s3의존성 implementation platform('com.amazonaws:aws-java-sdk-bom:1.12.529') implementation 'com.amazonaws:aws-java-sdk-s3' + //ocr을 위한 goolge vision 의존성 implementation 'com.google.cloud:google-cloud-vision:3.44.0' + implementation 'com.google.cloud:google-cloud-storage:2.10.0' + + //health check를 위한 의존성 + implementation("org.springframework.boot:spring-boot-starter-actuator") } tasks.named('test') { diff --git a/src/main/java/com/easylead/easylead/domain/books/entity/Book.java b/src/main/java/com/easylead/easylead/domain/books/entity/Book.java index 49605bb..0f8aff9 100644 --- a/src/main/java/com/easylead/easylead/domain/books/entity/Book.java +++ b/src/main/java/com/easylead/easylead/domain/books/entity/Book.java @@ -1,21 +1,22 @@ package com.easylead.easylead.domain.books.entity; -import com.easylead.easylead.common.entity.BaseEntity; import jakarta.persistence.Entity; +import jakarta.persistence.Id; import lombok.*; -import lombok.experimental.SuperBuilder; import java.util.Date; import static lombok.AccessLevel.PROTECTED; @NoArgsConstructor(access = PROTECTED) -@SuperBuilder +@Builder @AllArgsConstructor @EqualsAndHashCode(callSuper = false) @Entity @Getter -public class Book extends BaseEntity { +public class Book { + @Id + private String ISBN; private String title; private String writer; private String publisher; diff --git a/src/main/java/com/easylead/easylead/domain/content/entity/Content.java b/src/main/java/com/easylead/easylead/domain/content/entity/Content.java new file mode 100644 index 0000000..7922055 --- /dev/null +++ b/src/main/java/com/easylead/easylead/domain/content/entity/Content.java @@ -0,0 +1,26 @@ +package com.easylead.easylead.domain.content.entity; + +import com.easylead.easylead.domain.books.entity.Book; +import jakarta.persistence.*; +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.NoArgsConstructor; + +import static jakarta.persistence.FetchType.LAZY; + +@NoArgsConstructor() +@AllArgsConstructor +@Entity +@Getter +public class Content { + @EmbeddedId + private ContentId contentId; + + @ManyToOne(fetch = LAZY) + @MapsId("ISBN") + @JoinColumn(name = "ISBN") + private Book book; + + private String pageContent; + private String pageImg; +} diff --git a/src/main/java/com/easylead/easylead/domain/content/entity/ContentId.java b/src/main/java/com/easylead/easylead/domain/content/entity/ContentId.java new file mode 100644 index 0000000..0fb1fbe --- /dev/null +++ b/src/main/java/com/easylead/easylead/domain/content/entity/ContentId.java @@ -0,0 +1,19 @@ +package com.easylead.easylead.domain.content.entity; + +import jakarta.persistence.Embeddable; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +import java.io.Serializable; + +@Embeddable +@Data +@AllArgsConstructor +@NoArgsConstructor +@Builder +public class ContentId implements Serializable { + private Long pageId; + private String ISBN; +} diff --git a/src/main/java/com/easylead/easylead/domain/read/entity/Read.java b/src/main/java/com/easylead/easylead/domain/read/entity/Read.java new file mode 100644 index 0000000..40a51be --- /dev/null +++ b/src/main/java/com/easylead/easylead/domain/read/entity/Read.java @@ -0,0 +1,43 @@ +package com.easylead.easylead.domain.read.entity; + +import com.easylead.easylead.domain.books.entity.Book; +import com.easylead.easylead.domain.users.entity.Users; +import jakarta.persistence.*; +import lombok.*; +import org.hibernate.annotations.CreationTimestamp; +import org.hibernate.annotations.UpdateTimestamp; + +import java.time.LocalDateTime; + +import static jakarta.persistence.FetchType.LAZY; +import static lombok.AccessLevel.PROTECTED; + +@Builder +@AllArgsConstructor +@EqualsAndHashCode +@NoArgsConstructor(access = PROTECTED) +@Getter +@Entity +public class Read { + @EmbeddedId + private ReadId readId; + + @ManyToOne(fetch = LAZY) + @MapsId("userId") + @JoinColumn(name = "read_user_id") + private Users readUser; + + @ManyToOne(fetch = LAZY) + @MapsId("ISBN") + @JoinColumn(name = "ISBN") + private Book book; + + private Long page; + + @CreationTimestamp + @Column(nullable = false, updatable = false) + private LocalDateTime createdAt; + + @UpdateTimestamp + private LocalDateTime updateAt; +} diff --git a/src/main/java/com/easylead/easylead/domain/read/entity/ReadId.java b/src/main/java/com/easylead/easylead/domain/read/entity/ReadId.java new file mode 100644 index 0000000..b4cd891 --- /dev/null +++ b/src/main/java/com/easylead/easylead/domain/read/entity/ReadId.java @@ -0,0 +1,19 @@ +package com.easylead.easylead.domain.read.entity; + +import jakarta.persistence.Embeddable; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +import java.io.Serializable; + +@Embeddable +@Data +@AllArgsConstructor +@NoArgsConstructor +@Builder +public class ReadId implements Serializable { + private Long userId; + private String ISBN; +} diff --git a/src/main/java/com/easylead/easylead/domain/request/entity/Progress.java b/src/main/java/com/easylead/easylead/domain/request/entity/Progress.java new file mode 100644 index 0000000..26d5902 --- /dev/null +++ b/src/main/java/com/easylead/easylead/domain/request/entity/Progress.java @@ -0,0 +1,16 @@ +package com.easylead.easylead.domain.request.entity; + +import lombok.AllArgsConstructor; +import lombok.Getter; + +@AllArgsConstructor +@Getter +public enum Progress { + P0("접수 완료"), + P1("담당자 확인 중"), + P2("글맞춤 중"), + P3("검수 중"), + P4("글맞춤 완료"); + + private final String description; +} diff --git a/src/main/java/com/easylead/easylead/domain/request/entity/Request.java b/src/main/java/com/easylead/easylead/domain/request/entity/Request.java new file mode 100644 index 0000000..d5ff9b1 --- /dev/null +++ b/src/main/java/com/easylead/easylead/domain/request/entity/Request.java @@ -0,0 +1,46 @@ +package com.easylead.easylead.domain.request.entity; + +import com.easylead.easylead.domain.books.entity.Book; +import com.easylead.easylead.domain.users.entity.Users; +import jakarta.persistence.*; +import lombok.*; +import lombok.experimental.SuperBuilder; +import org.hibernate.annotations.CreationTimestamp; +import org.hibernate.annotations.UpdateTimestamp; + +import java.time.LocalDateTime; + +import static jakarta.persistence.FetchType.LAZY; +import static lombok.AccessLevel.PROTECTED; + +@Builder +@AllArgsConstructor +@EqualsAndHashCode +@NoArgsConstructor(access = PROTECTED) +@Getter +@Entity +public class Request { + @EmbeddedId + private RequestId requestId; + + @ManyToOne(fetch = LAZY) + @MapsId("userId") + @JoinColumn(name = "user_id") + private Users readUser; + + @ManyToOne(fetch = LAZY) + @MapsId("ISBN") + @JoinColumn(name = "ISBN") + private Book book; + + @Enumerated(EnumType.STRING) + private Progress progress; + + @CreationTimestamp + @Column(nullable = false, updatable = false) + private LocalDateTime createdAt; + + @UpdateTimestamp + private LocalDateTime updateAt; + +} diff --git a/src/main/java/com/easylead/easylead/domain/request/entity/RequestId.java b/src/main/java/com/easylead/easylead/domain/request/entity/RequestId.java new file mode 100644 index 0000000..e22fa1f --- /dev/null +++ b/src/main/java/com/easylead/easylead/domain/request/entity/RequestId.java @@ -0,0 +1,19 @@ +package com.easylead.easylead.domain.request.entity; + +import jakarta.persistence.Embeddable; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +import java.io.Serializable; + +@Embeddable +@Data +@AllArgsConstructor +@NoArgsConstructor +@Builder +public class RequestId implements Serializable { + private Long userId; + private String ISBN; +} diff --git a/src/main/java/com/easylead/easylead/domain/text/business/TextBusiness.java b/src/main/java/com/easylead/easylead/domain/text/business/TextBusiness.java index 2b05ccc..51dc06b 100644 --- a/src/main/java/com/easylead/easylead/domain/text/business/TextBusiness.java +++ b/src/main/java/com/easylead/easylead/domain/text/business/TextBusiness.java @@ -6,11 +6,14 @@ import com.easylead.easylead.domain.gpt.service.GptService; import com.easylead.easylead.domain.text.converter.TextConverter; import com.easylead.easylead.domain.text.dto.TextFileResDTO; +import com.easylead.easylead.domain.text.service.GoogleVisionService; +import com.easylead.easylead.domain.text.service.S3Service; import com.easylead.easylead.domain.text.service.TextService; import com.fasterxml.jackson.core.JsonProcessingException; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.web.multipart.MultipartFile; +import reactor.core.publisher.Flux; import java.net.http.HttpRequest; import java.util.Objects; @@ -22,6 +25,8 @@ public class TextBusiness { private final TextService textService; private final GptService gptService; + private final S3Service s3Service; + private final GoogleVisionService googleVisionService; private final TextConverter textConverter; @@ -29,20 +34,25 @@ public TextFileResDTO easyToRead(MultipartFile file) throws JsonProcessingExcept if (file.isEmpty() || Objects.isNull(file.getOriginalFilename())) { throw new ApiException(ErrorCode.EMPTY_FILE_EXCEPTION); } - String fileUrl = textService.uploadFile(file); - log.info("=========== fileUrl : "+fileUrl+"============"); - - String reqText = textService.detectTextImage(fileUrl); + String reqText = textService.detectTextPDF(file); log.info("=========== reqText : "+reqText+"============"); HttpRequest request = gptService.requestGPTCustom(reqText); -// HttpRequest requestImgPrompt = gptService.requestImgPrompt(reqText); -// String prompt = gptService.responseGPT(requestImgPrompt); -// HttpRequest requestImg = gptService.requestGPTImage(prompt); -// String imgUrl = gptService.responseDalle(requestImg); - return textConverter.toTextFileResDTO(gptService.responseGPT(request),"imgUrl"); + return textConverter.toTextFileResDTO(gptService.responseGPT(request)); + } + public Flux easyToReadImage(MultipartFile file) throws JsonProcessingException { + if (file.isEmpty() || Objects.isNull(file.getOriginalFilename())) { + throw new ApiException(ErrorCode.EMPTY_FILE_EXCEPTION); + } + + String reqText = textService.detectTextImage(file); + + log.info("=========== reqText : "+reqText+"============"); + + + return gptService.askCustomStream(reqText); } } diff --git a/src/main/java/com/easylead/easylead/domain/text/controller/TextController.java b/src/main/java/com/easylead/easylead/domain/text/controller/TextController.java index 9cbb7f7..f1887aa 100644 --- a/src/main/java/com/easylead/easylead/domain/text/controller/TextController.java +++ b/src/main/java/com/easylead/easylead/domain/text/controller/TextController.java @@ -3,11 +3,17 @@ import com.easylead.easylead.domain.text.business.TextBusiness; import com.easylead.easylead.domain.text.dto.TextFileResDTO; import com.fasterxml.jackson.core.JsonProcessingException; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.*; import org.springframework.web.multipart.MultipartFile; +import reactor.core.publisher.Flux; + +import java.util.Locale; @Slf4j @RestController @@ -17,7 +23,21 @@ public class TextController { private final TextBusiness textBusiness; - @PostMapping("/multi") + @PostMapping(value = "/image", produces = MediaType.TEXT_EVENT_STREAM_VALUE) + public Flux easyToReadImage(Locale locale, + HttpServletRequest request, + HttpServletResponse response, + @RequestPart(value = "image") MultipartFile file) throws JsonProcessingException { + try{ + return textBusiness.easyToReadImage(file); + + }catch (JsonProcessingException je){ + log.error(je.getMessage()); + return Flux.empty(); + } + } + + @PostMapping("/file") public ResponseEntity easyToReadFile(@RequestPart(value = "file") MultipartFile file) throws JsonProcessingException { return ResponseEntity.ok(textBusiness.easyToRead(file)); } diff --git a/src/main/java/com/easylead/easylead/domain/text/converter/TextConverter.java b/src/main/java/com/easylead/easylead/domain/text/converter/TextConverter.java index af8554f..71a85ce 100644 --- a/src/main/java/com/easylead/easylead/domain/text/converter/TextConverter.java +++ b/src/main/java/com/easylead/easylead/domain/text/converter/TextConverter.java @@ -7,9 +7,8 @@ @Converter @RequiredArgsConstructor public class TextConverter { - public TextFileResDTO toTextFileResDTO(String resText, String imgUrl) { + public TextFileResDTO toTextFileResDTO(String resText) { return TextFileResDTO.builder() - .img(imgUrl) .text(resText) .build(); } diff --git a/src/main/java/com/easylead/easylead/domain/text/dto/TextFileResDTO.java b/src/main/java/com/easylead/easylead/domain/text/dto/TextFileResDTO.java index b93f962..456db9c 100644 --- a/src/main/java/com/easylead/easylead/domain/text/dto/TextFileResDTO.java +++ b/src/main/java/com/easylead/easylead/domain/text/dto/TextFileResDTO.java @@ -9,5 +9,4 @@ @Builder public class TextFileResDTO { String text; - String img; } diff --git a/src/main/java/com/easylead/easylead/domain/text/service/GoogleStorageService.java b/src/main/java/com/easylead/easylead/domain/text/service/GoogleStorageService.java new file mode 100644 index 0000000..c59cacf --- /dev/null +++ b/src/main/java/com/easylead/easylead/domain/text/service/GoogleStorageService.java @@ -0,0 +1,48 @@ +package com.easylead.easylead.domain.text.service; + +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.Storage; +import com.google.cloud.storage.StorageOptions; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Component; +import org.springframework.stereotype.Service; +import org.springframework.web.multipart.MultipartFile; + +import java.io.IOException; + +@Slf4j +@RequiredArgsConstructor +@Service +public class GoogleStorageService { + private final Storage storage = StorageOptions.getDefaultInstance().getService(); + private final String bucketName = "geulmatchum-file"; + + public String uploadFile(MultipartFile file) { + String fileName = file.getOriginalFilename(); + Blob blob = null; + try { + blob = storage.create( + Blob.newBuilder(bucketName, fileName).build(), + file.getBytes() + ); + } catch (IOException e) { + throw new RuntimeException(e); + } + return blob.getSelfLink(); // Return the file's public URL + } + + public String getGcsSourcePath(MultipartFile file) { + // Upload the file and get its public URL + String publicUrl = uploadFile(file); + // Convert the public URL to gcsSourcePath + return publicUrl.replace("https://storage.googleapis.com/", "gs://"); + } + + public String getGcsDestinationPath(String fileName) { + // Define the GCS path for saving results + // For example, saving results to a directory named "results" + return String.format("gs://%s/results/%s", bucketName, fileName); + } + +} diff --git a/src/main/java/com/easylead/easylead/domain/text/service/GoogleVisionService.java b/src/main/java/com/easylead/easylead/domain/text/service/GoogleVisionService.java index 18477fb..56eecff 100644 --- a/src/main/java/com/easylead/easylead/domain/text/service/GoogleVisionService.java +++ b/src/main/java/com/easylead/easylead/domain/text/service/GoogleVisionService.java @@ -52,4 +52,5 @@ public String detechString(String url){ return exception.getMessage(); } } + } diff --git a/src/main/java/com/easylead/easylead/domain/text/service/S3Service.java b/src/main/java/com/easylead/easylead/domain/text/service/S3Service.java index 3ac7822..3ac546c 100644 --- a/src/main/java/com/easylead/easylead/domain/text/service/S3Service.java +++ b/src/main/java/com/easylead/easylead/domain/text/service/S3Service.java @@ -59,10 +59,10 @@ private void validateImageFileExtention(String filename) { throw new ApiException(ErrorCode.NO_FILE_EXTENTION); } - String extention = filename.substring(lastDotIndex + 1).toLowerCase(); + String extension = filename.substring(lastDotIndex + 1).toLowerCase(); List allowedExtentionList = Arrays.asList("jpg", "jpeg", "png", "gif"); - if (!allowedExtentionList.contains(extention)) { + if (!allowedExtentionList.contains(extension)) { throw new ApiException(ErrorCode.INVALID_FILE_EXTENTION); } } diff --git a/src/main/java/com/easylead/easylead/domain/text/service/TextService.java b/src/main/java/com/easylead/easylead/domain/text/service/TextService.java index f5b58da..9fcae97 100644 --- a/src/main/java/com/easylead/easylead/domain/text/service/TextService.java +++ b/src/main/java/com/easylead/easylead/domain/text/service/TextService.java @@ -1,5 +1,7 @@ package com.easylead.easylead.domain.text.service; +import com.easylead.easylead.common.error.ErrorCode; +import com.easylead.easylead.common.exception.ApiException; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Service; @@ -11,12 +13,18 @@ public class TextService { private final S3Service s3Service; private final GoogleVisionService googleVisionService; + private final GoogleStorageService googleStorageService; - public String detectTextImage(String imgUrl){ - return googleVisionService.detechString(imgUrl); + public String detectTextPDF(MultipartFile file){ + String gcsSourcePath = googleStorageService.getGcsSourcePath(file); + log.info("=========gcsSourcePath : "+gcsSourcePath+"=============="); + String gcsDestinationPath = googleStorageService.getGcsDestinationPath(file.getOriginalFilename()); + + return null; } - public String uploadFile(MultipartFile file) { - return s3Service.uploadImage(file); + public String detectTextImage(MultipartFile file) { + String imgUrl = s3Service.uploadImage(file); + return googleVisionService.detechString(imgUrl); } } diff --git a/src/main/java/com/easylead/easylead/domain/users/entity/Users.java b/src/main/java/com/easylead/easylead/domain/users/entity/Users.java index 7eb84a9..04310e6 100644 --- a/src/main/java/com/easylead/easylead/domain/users/entity/Users.java +++ b/src/main/java/com/easylead/easylead/domain/users/entity/Users.java @@ -1,19 +1,20 @@ package com.easylead.easylead.domain.users.entity; -import com.easylead.easylead.common.entity.BaseEntity; import jakarta.persistence.Entity; +import jakarta.persistence.Id; import lombok.*; -import lombok.experimental.SuperBuilder; import static lombok.AccessLevel.PROTECTED; @NoArgsConstructor(access = PROTECTED) -@SuperBuilder +@Builder @AllArgsConstructor @EqualsAndHashCode(callSuper = false) @Entity @Getter -public class Users extends BaseEntity { +public class Users { + @Id + private Long userId; private String name; private String email; private String password; diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 460f85d..828fadc 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -1,2 +1,4 @@ spring.application.name=easy-lead -spring.config.import=application-key.properties \ No newline at end of file +spring.config.import=application-key.properties +management.endpoints.web.exposure.include=health +management.endpoint.health.show-details=always From 11879e7095a4fc102439b25c3032570c2141dc99 Mon Sep 17 00:00:00 2001 From: oU-Ua Date: Tue, 30 Jul 2024 20:19:12 +0900 Subject: [PATCH 3/4] =?UTF-8?q?feat:=20pdf=20ocr=20=EA=B8=B0=EB=8A=A5=20?= =?UTF-8?q?=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../domain/text/business/TextBusiness.java | 2 +- .../text/controller/TextController.java | 2 +- .../text/service/GoogleStorageService.java | 9 +- .../text/service/GoogleVisionService.java | 114 +++++++++++++++++- .../domain/text/service/TextService.java | 7 +- 5 files changed, 123 insertions(+), 11 deletions(-) diff --git a/src/main/java/com/easylead/easylead/domain/text/business/TextBusiness.java b/src/main/java/com/easylead/easylead/domain/text/business/TextBusiness.java index 51dc06b..7929760 100644 --- a/src/main/java/com/easylead/easylead/domain/text/business/TextBusiness.java +++ b/src/main/java/com/easylead/easylead/domain/text/business/TextBusiness.java @@ -30,7 +30,7 @@ public class TextBusiness { private final TextConverter textConverter; - public TextFileResDTO easyToRead(MultipartFile file) throws JsonProcessingException { + public TextFileResDTO easyToRead(MultipartFile file) throws Exception { if (file.isEmpty() || Objects.isNull(file.getOriginalFilename())) { throw new ApiException(ErrorCode.EMPTY_FILE_EXCEPTION); } diff --git a/src/main/java/com/easylead/easylead/domain/text/controller/TextController.java b/src/main/java/com/easylead/easylead/domain/text/controller/TextController.java index f1887aa..5d7d805 100644 --- a/src/main/java/com/easylead/easylead/domain/text/controller/TextController.java +++ b/src/main/java/com/easylead/easylead/domain/text/controller/TextController.java @@ -38,7 +38,7 @@ public Flux easyToReadImage(Locale locale, } @PostMapping("/file") - public ResponseEntity easyToReadFile(@RequestPart(value = "file") MultipartFile file) throws JsonProcessingException { + public ResponseEntity easyToReadFile(@RequestPart(value = "file") MultipartFile file) throws Exception { return ResponseEntity.ok(textBusiness.easyToRead(file)); } diff --git a/src/main/java/com/easylead/easylead/domain/text/service/GoogleStorageService.java b/src/main/java/com/easylead/easylead/domain/text/service/GoogleStorageService.java index c59cacf..ea85580 100644 --- a/src/main/java/com/easylead/easylead/domain/text/service/GoogleStorageService.java +++ b/src/main/java/com/easylead/easylead/domain/text/service/GoogleStorageService.java @@ -5,7 +5,6 @@ import com.google.cloud.storage.StorageOptions; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.springframework.stereotype.Component; import org.springframework.stereotype.Service; import org.springframework.web.multipart.MultipartFile; @@ -18,7 +17,7 @@ public class GoogleStorageService { private final Storage storage = StorageOptions.getDefaultInstance().getService(); private final String bucketName = "geulmatchum-file"; - public String uploadFile(MultipartFile file) { + public Blob uploadFile(MultipartFile file) { String fileName = file.getOriginalFilename(); Blob blob = null; try { @@ -29,14 +28,14 @@ public String uploadFile(MultipartFile file) { } catch (IOException e) { throw new RuntimeException(e); } - return blob.getSelfLink(); // Return the file's public URL + return blob; // Return the file's public URL } public String getGcsSourcePath(MultipartFile file) { // Upload the file and get its public URL - String publicUrl = uploadFile(file); + Blob blob = uploadFile(file); // Convert the public URL to gcsSourcePath - return publicUrl.replace("https://storage.googleapis.com/", "gs://"); + return "gs://" + blob.getBucket() + "/" + blob.getName(); } public String getGcsDestinationPath(String fileName) { diff --git a/src/main/java/com/easylead/easylead/domain/text/service/GoogleVisionService.java b/src/main/java/com/easylead/easylead/domain/text/service/GoogleVisionService.java index 56eecff..51018e5 100644 --- a/src/main/java/com/easylead/easylead/domain/text/service/GoogleVisionService.java +++ b/src/main/java/com/easylead/easylead/domain/text/service/GoogleVisionService.java @@ -1,6 +1,12 @@ package com.easylead.easylead.domain.text.service; +import com.google.api.gax.longrunning.OperationFuture; +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.Bucket; +import com.google.cloud.storage.Storage; +import com.google.cloud.storage.StorageOptions; import com.google.cloud.vision.v1.*; +import com.google.protobuf.util.JsonFormat; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Component; @@ -9,12 +15,15 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; +import java.util.regex.Pattern; @Slf4j @RequiredArgsConstructor @Component public class GoogleVisionService { - public String detechString(String url){ + public String detechStringImage(String url){ StopWatch totalTime = new StopWatch(); totalTime.start(); @@ -52,5 +61,108 @@ public String detechString(String url){ return exception.getMessage(); } } + public String detectDocumentsGcs(String gcsSourcePath, String gcsDestinationPath) + throws Exception { + + + AnnotateImageResponse annotateImageResponse = null; + try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) { + List requests = new ArrayList<>(); + + GcsSource gcsSource = GcsSource.newBuilder().setUri(gcsSourcePath).build(); + + InputConfig inputConfig = + InputConfig.newBuilder() + .setMimeType( + "application/pdf") // Supported MimeTypes: "application/pdf", "image/tiff" + .setGcsSource(gcsSource) + .build(); + + GcsDestination gcsDestination = + GcsDestination.newBuilder().setUri(gcsDestinationPath).build(); + + OutputConfig outputConfig = + OutputConfig.newBuilder().setBatchSize(2).setGcsDestination(gcsDestination).build(); + + // Select the Feature required by the vision API + Feature feature = Feature.newBuilder().setType(Feature.Type.DOCUMENT_TEXT_DETECTION).build(); + + // Build the OCR request + AsyncAnnotateFileRequest request = + AsyncAnnotateFileRequest.newBuilder() + .addFeatures(feature) + .setInputConfig(inputConfig) + .setOutputConfig(outputConfig) + .build(); + + requests.add(request); + + // Perform the OCR request + OperationFuture response = + client.asyncBatchAnnotateFilesAsync(requests); + + System.out.println("Waiting for the operation to finish."); + + // Wait for the request to finish. (The result is not used, since the API saves the result to + // the specified location on GCS.) + List result = + response.get(180, TimeUnit.SECONDS).getResponsesList(); + + // Once the request has completed and the System.output has been + // written to GCS, we can list all the System.output files. + Storage storage = StorageOptions.getDefaultInstance().getService(); + + // Get the destination location from the gcsDestinationPath + Pattern pattern = Pattern.compile("gs://([^/]+)/(.+)"); + Matcher matcher = pattern.matcher(gcsDestinationPath); + + if (matcher.find()) { + String bucketName = matcher.group(1); + String prefix = matcher.group(2); + + // Get the list of objects with the given prefix from the GCS bucket + Bucket bucket = storage.get(bucketName); + com.google.api.gax.paging.Page pageList = bucket.list(Storage.BlobListOption.prefix(prefix)); + + Blob firstOutputFile = null; + + // List objects with the given prefix. + System.out.println("Output files:"); + for (Blob blob : pageList.iterateAll()) { + System.out.println(blob.getName()); + + // Process the first System.output file from GCS. + // Since we specified batch size = 2, the first response contains + // the first two pages of the input file. + if (firstOutputFile == null) { + firstOutputFile = blob; + } + } + + // Get the contents of the file and convert the JSON contents to an AnnotateFileResponse + // object. If the Blob is small read all its content in one request + // (Note: the file is a .json file) + // Storage guide: https://cloud.google.com/storage/docs/downloading-objects + String jsonContents = new String(firstOutputFile.getContent()); + AnnotateFileResponse.Builder builder = AnnotateFileResponse.newBuilder(); + JsonFormat.parser().merge(jsonContents, builder); + + // Build the AnnotateFileResponse object + AnnotateFileResponse annotateFileResponse = builder.build(); + + // Parse through the object to get the actual response for the first page of the input file. + annotateImageResponse = annotateFileResponse.getResponses(0); + + // Here we print the full text from the first page. + // The response contains more information: + // annotation/pages/blocks/paragraphs/words/symbols + // including confidence score and bounding boxes + System.out.format("%nText: %s%n", annotateImageResponse.getFullTextAnnotation().getText()); + } else { + System.out.println("No MATCH"); + } + } + return annotateImageResponse.getFullTextAnnotation().getText(); + } } diff --git a/src/main/java/com/easylead/easylead/domain/text/service/TextService.java b/src/main/java/com/easylead/easylead/domain/text/service/TextService.java index 9fcae97..53cbfb1 100644 --- a/src/main/java/com/easylead/easylead/domain/text/service/TextService.java +++ b/src/main/java/com/easylead/easylead/domain/text/service/TextService.java @@ -15,16 +15,17 @@ public class TextService { private final GoogleVisionService googleVisionService; private final GoogleStorageService googleStorageService; - public String detectTextPDF(MultipartFile file){ + public String detectTextPDF(MultipartFile file) throws Exception { String gcsSourcePath = googleStorageService.getGcsSourcePath(file); log.info("=========gcsSourcePath : "+gcsSourcePath+"=============="); String gcsDestinationPath = googleStorageService.getGcsDestinationPath(file.getOriginalFilename()); + String reqtext = googleVisionService.detectDocumentsGcs(gcsSourcePath,gcsDestinationPath); - return null; + return reqtext; } public String detectTextImage(MultipartFile file) { String imgUrl = s3Service.uploadImage(file); - return googleVisionService.detechString(imgUrl); + return googleVisionService.detechStringImage(imgUrl); } } From 358094a526b5f80184aad63cddfc5133244d8d5b Mon Sep 17 00:00:00 2001 From: oU-Ua Date: Tue, 30 Jul 2024 21:56:14 +0900 Subject: [PATCH 4/4] =?UTF-8?q?feat:=20pdf=20ocr=20=EA=B8=B0=EB=8A=A5=20?= =?UTF-8?q?=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../domain/text/business/TextBusiness.java | 11 ++++-- .../text/service/GoogleVisionService.java | 36 ++++++++++++------- .../domain/text/service/TextService.java | 6 ++-- src/main/resources/application.properties | 1 + 4 files changed, 36 insertions(+), 18 deletions(-) diff --git a/src/main/java/com/easylead/easylead/domain/text/business/TextBusiness.java b/src/main/java/com/easylead/easylead/domain/text/business/TextBusiness.java index 7929760..ff674ed 100644 --- a/src/main/java/com/easylead/easylead/domain/text/business/TextBusiness.java +++ b/src/main/java/com/easylead/easylead/domain/text/business/TextBusiness.java @@ -16,6 +16,7 @@ import reactor.core.publisher.Flux; import java.net.http.HttpRequest; +import java.util.List; import java.util.Objects; @Business @@ -35,13 +36,17 @@ public TextFileResDTO easyToRead(MultipartFile file) throws Exception { throw new ApiException(ErrorCode.EMPTY_FILE_EXCEPTION); } - String reqText = textService.detectTextPDF(file); + List reqText = textService.detectTextPDF(file); log.info("=========== reqText : "+reqText+"============"); - HttpRequest request = gptService.requestGPTCustom(reqText); + StringBuilder result = new StringBuilder(); + for(String text : reqText){ + HttpRequest request = gptService.requestGPTCustom(text); + result.append(gptService.responseGPT(request)); + } + return textConverter.toTextFileResDTO(result.toString()); - return textConverter.toTextFileResDTO(gptService.responseGPT(request)); } public Flux easyToReadImage(MultipartFile file) throws JsonProcessingException { if (file.isEmpty() || Objects.isNull(file.getOriginalFilename())) { diff --git a/src/main/java/com/easylead/easylead/domain/text/service/GoogleVisionService.java b/src/main/java/com/easylead/easylead/domain/text/service/GoogleVisionService.java index 51018e5..21197a3 100644 --- a/src/main/java/com/easylead/easylead/domain/text/service/GoogleVisionService.java +++ b/src/main/java/com/easylead/easylead/domain/text/service/GoogleVisionService.java @@ -61,8 +61,10 @@ public String detechStringImage(String url){ return exception.getMessage(); } } - public String detectDocumentsGcs(String gcsSourcePath, String gcsDestinationPath) + public List detectDocumentsGcs(String gcsSourcePath, String gcsDestinationPath) throws Exception { + List reqList = new ArrayList<>(); + StringBuilder fullText = new StringBuilder(); AnnotateImageResponse annotateImageResponse = null; @@ -131,38 +133,46 @@ public String detectDocumentsGcs(String gcsSourcePath, String gcsDestinationPath for (Blob blob : pageList.iterateAll()) { System.out.println(blob.getName()); + String jsonContents = new String(blob.getContent()); + AnnotateFileResponse.Builder builder = AnnotateFileResponse.newBuilder(); + JsonFormat.parser().merge(jsonContents, builder); + + AnnotateFileResponse annotateFileResponse = builder.build(); + for (AnnotateImageResponse a : annotateFileResponse.getResponsesList()) { + reqList.add(a.getFullTextAnnotation().getText()); + } // Process the first System.output file from GCS. // Since we specified batch size = 2, the first response contains // the first two pages of the input file. - if (firstOutputFile == null) { - firstOutputFile = blob; - } +// if (firstOutputFile == null) { +// firstOutputFile = blob; +// } } // Get the contents of the file and convert the JSON contents to an AnnotateFileResponse // object. If the Blob is small read all its content in one request // (Note: the file is a .json file) // Storage guide: https://cloud.google.com/storage/docs/downloading-objects - String jsonContents = new String(firstOutputFile.getContent()); - AnnotateFileResponse.Builder builder = AnnotateFileResponse.newBuilder(); - JsonFormat.parser().merge(jsonContents, builder); - - // Build the AnnotateFileResponse object - AnnotateFileResponse annotateFileResponse = builder.build(); +// String jsonContents = new String(firstOutputFile.getContent()); +// AnnotateFileResponse.Builder builder = AnnotateFileResponse.newBuilder(); +// JsonFormat.parser().merge(jsonContents, builder); +// +// // Build the AnnotateFileResponse object +// AnnotateFileResponse annotateFileResponse = builder.build(); // Parse through the object to get the actual response for the first page of the input file. - annotateImageResponse = annotateFileResponse.getResponses(0); +// annotateImageResponse = annotateFileResponse.getResponses(0); // Here we print the full text from the first page. // The response contains more information: // annotation/pages/blocks/paragraphs/words/symbols // including confidence score and bounding boxes - System.out.format("%nText: %s%n", annotateImageResponse.getFullTextAnnotation().getText()); + } else { System.out.println("No MATCH"); } } - return annotateImageResponse.getFullTextAnnotation().getText(); + return reqList; } } diff --git a/src/main/java/com/easylead/easylead/domain/text/service/TextService.java b/src/main/java/com/easylead/easylead/domain/text/service/TextService.java index 53cbfb1..5017a4a 100644 --- a/src/main/java/com/easylead/easylead/domain/text/service/TextService.java +++ b/src/main/java/com/easylead/easylead/domain/text/service/TextService.java @@ -7,6 +7,8 @@ import org.springframework.stereotype.Service; import org.springframework.web.multipart.MultipartFile; +import java.util.List; + @Service @Slf4j @RequiredArgsConstructor @@ -15,11 +17,11 @@ public class TextService { private final GoogleVisionService googleVisionService; private final GoogleStorageService googleStorageService; - public String detectTextPDF(MultipartFile file) throws Exception { + public List detectTextPDF(MultipartFile file) throws Exception { String gcsSourcePath = googleStorageService.getGcsSourcePath(file); log.info("=========gcsSourcePath : "+gcsSourcePath+"=============="); String gcsDestinationPath = googleStorageService.getGcsDestinationPath(file.getOriginalFilename()); - String reqtext = googleVisionService.detectDocumentsGcs(gcsSourcePath,gcsDestinationPath); + List reqtext = googleVisionService.detectDocumentsGcs(gcsSourcePath,gcsDestinationPath); return reqtext; } diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 828fadc..cacfacb 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -2,3 +2,4 @@ spring.application.name=easy-lead spring.config.import=application-key.properties management.endpoints.web.exposure.include=health management.endpoint.health.show-details=always +spring.servlet.multipart.max-file-size=10GB \ No newline at end of file