diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..029c527 --- /dev/null +++ b/.gitignore @@ -0,0 +1,44 @@ +HELP.md +.gradle +build/ +!gradle/wrapper/gradle-wrapper.jar +!**/src/main/**/build/ +!**/src/test/**/build/ + +### STS ### +.apt_generated +.classpath +.factorypath +.project +.settings +.springBeans +.sts4-cache +bin/ +!**/src/main/**/bin/ +!**/src/test/**/bin/ + +### IntelliJ IDEA ### +.idea +*.iws +*.iml +*.ipr +out/ +!**/src/main/**/out/ +!**/src/test/**/out/ + +### NetBeans ### +/nbproject/private/ +/nbbuild/ +/dist/ +/nbdist/ +/.nb-gradle/ + +### VS Code ### +.vscode/ + +### 업로드 파일 ### +uploads/ + +### 환경 설정 ### +.env +application-local.yml diff --git a/.run/ragone.run.xml b/.run/ragone.run.xml new file mode 100644 index 0000000..a467441 --- /dev/null +++ b/.run/ragone.run.xml @@ -0,0 +1,11 @@ + + + + \ No newline at end of file diff --git a/build.gradle b/build.gradle new file mode 100644 index 0000000..c5a66d3 --- /dev/null +++ b/build.gradle @@ -0,0 +1,53 @@ +plugins { + id 'java' + id 'org.springframework.boot' version '3.2.5' + id 'io.spring.dependency-management' version '1.1.4' +} + +group = 'kr.co' +version = '0.0.1-SNAPSHOT' + +java { + sourceCompatibility = '17' +} + +configurations { + compileOnly { + extendsFrom annotationProcessor + } +} + +repositories { + mavenCentral() +} + +dependencies { + // Spring Boot + implementation 'org.springframework.boot:spring-boot-starter-web' + implementation 'org.springframework.boot:spring-boot-starter-data-jpa' + implementation 'org.springframework.boot:spring-boot-starter-validation' + + // PostgreSQL + pgvector + implementation 'org.postgresql:postgresql' + implementation 'com.pgvector:pgvector:0.1.4' + + // OpenAI + implementation 'com.theokanning.openai-gpt3-java:service:0.18.2' + + // 문서 파싱 (PDF, DOCX 등) + implementation 'org.apache.tika:tika-core:2.9.1' + implementation 'org.apache.tika:tika-parsers-standard-package:2.9.1' + + // 유틸리티 + compileOnly 'org.projectlombok:lombok' + annotationProcessor 'org.projectlombok:lombok' + implementation 'org.mapstruct:mapstruct:1.5.5.Final' + annotationProcessor 'org.mapstruct:mapstruct-processor:1.5.5.Final' + + // 테스트 + testImplementation 'org.springframework.boot:spring-boot-starter-test' +} + +tasks.named('test') { + useJUnitPlatform() +} diff --git a/frontend/index.html b/frontend/index.html new file mode 100644 index 0000000..05d3aa6 --- /dev/null +++ b/frontend/index.html @@ -0,0 +1,13 @@ + + + + + + + RAGone - AI 문서 질의응답 + + +
+ + + diff --git a/frontend/package.json b/frontend/package.json new file mode 100644 index 0000000..f21d2c7 --- /dev/null +++ b/frontend/package.json @@ -0,0 +1,23 @@ +{ + "name": "ragone-frontend", + "version": "0.0.1", + "private": true, + "type": "module", + "scripts": { + "dev": "vite", + "build": "vite build", + "preview": "vite preview" + }, + "dependencies": { + "vue": "^3.4.21", + "vue-router": "^4.3.0", + "axios": "^1.6.8", + "marked": "^12.0.1", + "highlight.js": "^11.9.0" + }, + "devDependencies": { + "@vitejs/plugin-vue": "^5.0.4", + "vite": "^5.2.8", + "sass": "^1.72.0" + } +} diff --git a/frontend/vite.config.js b/frontend/vite.config.js new file mode 100644 index 0000000..d2cd794 --- /dev/null +++ b/frontend/vite.config.js @@ -0,0 +1,15 @@ +import { defineConfig } from 'vite' +import vue from '@vitejs/plugin-vue' + +export default defineConfig({ + plugins: [vue()], + server: { + port: 3000, + proxy: { + '/api': { + target: 'http://localhost:8080', + changeOrigin: true + } + } + } +}) diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000..8bdaf60 Binary files /dev/null and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000..1af9e09 --- /dev/null +++ b/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,7 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-8.5-bin.zip +networkTimeout=10000 +validateDistributionUrl=true +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew new file mode 100644 index 0000000..ef07e01 --- /dev/null +++ b/gradlew @@ -0,0 +1,251 @@ +#!/bin/sh + +# +# Copyright © 2015 the original authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 +# + +############################################################################## +# +# Gradle start up script for POSIX generated by Gradle. +# +# Important for running: +# +# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is +# noncompliant, but you have some other compliant shell such as ksh or +# bash, then to run this script, type that shell name before the whole +# command line, like: +# +# ksh Gradle +# +# Busybox and similar reduced shells will NOT work, because this script +# requires all of these POSIX shell features: +# * functions; +# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», +# «${var#prefix}», «${var%suffix}», and «$( cmd )»; +# * compound commands having a testable exit status, especially «case»; +# * various built-in commands including «command», «set», and «ulimit». +# +# Important for patching: +# +# (2) This script targets any POSIX shell, so it avoids extensions provided +# by Bash, Ksh, etc; in particular arrays are avoided. +# +# The "traditional" practice of packing multiple parameters into a +# space-separated string is a well documented source of bugs and security +# problems, so this is (mostly) avoided, by progressively accumulating +# options in "$@", and eventually passing that to Java. +# +# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, +# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; +# see the in-line comments for details. +# +# There are tweaks for specific operating systems such as AIX, CygWin, +# Darwin, MinGW, and NonStop. +# +# (3) This script is generated from the Groovy template +# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# within the Gradle project. +# +# You can find Gradle at https://github.com/gradle/gradle/. +# +############################################################################## + +# Attempt to set APP_HOME + +# Resolve links: $0 may be a link +app_path=$0 + +# Need this for daisy-chained symlinks. +while + APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path + [ -h "$app_path" ] +do + ls=$( ls -ld "$app_path" ) + link=${ls#*' -> '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac +done + +# This is normally unused +# shellcheck disable=SC2034 +APP_BASE_NAME=${0##*/} +# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) +APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s\n' "$PWD" ) || exit + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD=maximum + +warn () { + echo "$*" +} >&2 + +die () { + echo + echo "$*" + echo + exit 1 +} >&2 + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NONSTOP* ) nonstop=true ;; +esac + +CLASSPATH="\\\"\\\"" + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD=$JAVA_HOME/jre/sh/java + else + JAVACMD=$JAVA_HOME/bin/java + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD=java + if ! command -v java >/dev/null 2>&1 + then + die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +fi + +# Increase the maximum file descriptors if we can. +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC2039,SC3045 + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC2039,SC3045 + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" + esac +fi + +# Collect all arguments for the java command, stacking in reverse order: +# * args from the command line +# * the main class name +# * -classpath +# * -D...appname settings +# * --module-path (only if needed) +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. + +# For Cygwin or MSYS, switch paths to Windows format before running java +if "$cygwin" || "$msys" ; then + APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) + CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) + + JAVACMD=$( cygpath --unix "$JAVACMD" ) + + # Now convert the arguments - kludge to limit ourselves to /bin/sh + for arg do + if + case $arg in #( + -*) false ;; # don't mess with options #( + /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath + [ -e "$t" ] ;; #( + *) false ;; + esac + then + arg=$( cygpath --path --ignore --mixed "$arg" ) + fi + # Roll the args list around exactly as many times as the number of + # args, so each arg winds up back in the position where it started, but + # possibly modified. + # + # NB: a `for` loop captures its iteration list before it begins, so + # changing the positional parameters here affects neither the number of + # iterations, nor the values presented in `arg`. + shift # remove old arg + set -- "$@" "$arg" # push replacement arg + done +fi + + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Collect all arguments for the java command: +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, +# and any embedded shellness will be escaped. +# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be +# treated as '${Hostname}' itself on the command line. + +set -- \ + "-Dorg.gradle.appname=$APP_BASE_NAME" \ + -classpath "$CLASSPATH" \ + -jar "$APP_HOME/gradle/wrapper/gradle-wrapper.jar" \ + "$@" + +# Stop when "xargs" is not available. +if ! command -v xargs >/dev/null 2>&1 +then + die "xargs is not available" +fi + +# Use "xargs" to parse quoted args. +# +# With -n1 it outputs one arg per line, with the quotes and backslashes removed. +# +# In Bash we could simply go: +# +# readarray ARGS < <( xargs -n1 <<<"$var" ) && +# set -- "${ARGS[@]}" "$@" +# +# but POSIX shell has neither arrays nor command substitution, so instead we +# post-process each arg (as a line of input to sed) to backslash-escape any +# character that might be a shell metacharacter, then use eval to reverse +# that process (while maintaining the separation between arguments), and wrap +# the whole thing up as a single "set" statement. +# +# This will of course break if any of these variables contains a newline or +# an unmatched quote. +# + +eval "set -- $( + printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | + xargs -n1 | + sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | + tr '\n' ' ' + )" '"$@"' + +exec "$JAVACMD" "$@" diff --git a/gradlew.bat b/gradlew.bat new file mode 100644 index 0000000..f0443b7 --- /dev/null +++ b/gradlew.bat @@ -0,0 +1,90 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + +@if "%DEBUG%"=="" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%"=="" set DIRNAME=. +@rem This is normally unused +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if %ERRORLEVEL% equ 0 goto execute + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* + +:end +@rem End local scope for the variables with windows NT shell +if %ERRORLEVEL% equ 0 goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem having the _script_ exit with the return code. +if %GRADLE_EXIT_CONSOLE% equ 1 exit %ERRORLEVEL% +exit /b %ERRORLEVEL% + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/settings.gradle b/settings.gradle new file mode 100644 index 0000000..d506978 --- /dev/null +++ b/settings.gradle @@ -0,0 +1 @@ +rootProject.name = 'ragone' diff --git a/src/main/java/kr/co/ragone/RagoneApplication.java b/src/main/java/kr/co/ragone/RagoneApplication.java new file mode 100644 index 0000000..95feb75 --- /dev/null +++ b/src/main/java/kr/co/ragone/RagoneApplication.java @@ -0,0 +1,12 @@ +package kr.co.ragone; + +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; + +@SpringBootApplication +public class RagoneApplication { + + public static void main(String[] args) { + SpringApplication.run(RagoneApplication.class, args); + } +} diff --git a/src/main/java/kr/co/ragone/config/AsyncConfig.java b/src/main/java/kr/co/ragone/config/AsyncConfig.java new file mode 100644 index 0000000..6993ad8 --- /dev/null +++ b/src/main/java/kr/co/ragone/config/AsyncConfig.java @@ -0,0 +1,10 @@ +package kr.co.ragone.config; + +import org.springframework.context.annotation.Configuration; +import org.springframework.scheduling.annotation.EnableAsync; + +@Configuration +@EnableAsync +public class AsyncConfig { + // 비동기 처리 활성화 +} diff --git a/src/main/java/kr/co/ragone/config/CorsConfig.java b/src/main/java/kr/co/ragone/config/CorsConfig.java new file mode 100644 index 0000000..bac103f --- /dev/null +++ b/src/main/java/kr/co/ragone/config/CorsConfig.java @@ -0,0 +1,30 @@ +package kr.co.ragone.config; + +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.web.cors.CorsConfiguration; +import org.springframework.web.cors.UrlBasedCorsConfigurationSource; +import org.springframework.web.filter.CorsFilter; + +import java.util.Arrays; + +@Configuration +public class CorsConfig { + + @Bean + public CorsFilter corsFilter() { + CorsConfiguration config = new CorsConfiguration(); + config.setAllowCredentials(true); + config.setAllowedOrigins(Arrays.asList( + "http://localhost:3000", + "http://127.0.0.1:3000" + )); + config.setAllowedHeaders(Arrays.asList("*")); + config.setAllowedMethods(Arrays.asList("GET", "POST", "PUT", "DELETE", "OPTIONS")); + + UrlBasedCorsConfigurationSource source = new UrlBasedCorsConfigurationSource(); + source.registerCorsConfiguration("/**", config); + + return new CorsFilter(source); + } +} diff --git a/src/main/java/kr/co/ragone/config/OpenAiConfig.java b/src/main/java/kr/co/ragone/config/OpenAiConfig.java new file mode 100644 index 0000000..94a09f1 --- /dev/null +++ b/src/main/java/kr/co/ragone/config/OpenAiConfig.java @@ -0,0 +1,20 @@ +package kr.co.ragone.config; + +import com.theokanning.openai.service.OpenAiService; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import java.time.Duration; + +@Configuration +public class OpenAiConfig { + + @Value("${openai.api-key}") + private String apiKey; + + @Bean + public OpenAiService openAiService() { + return new OpenAiService(apiKey, Duration.ofSeconds(60)); + } +} diff --git a/src/main/java/kr/co/ragone/controller/ChatController.java b/src/main/java/kr/co/ragone/controller/ChatController.java new file mode 100644 index 0000000..445f6e9 --- /dev/null +++ b/src/main/java/kr/co/ragone/controller/ChatController.java @@ -0,0 +1,34 @@ +package kr.co.ragone.controller; + +import kr.co.ragone.service.ChatService; +import lombok.Data; +import lombok.RequiredArgsConstructor; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.*; + +import java.util.List; + +@RestController +@RequestMapping("/api/chat") +@RequiredArgsConstructor +public class ChatController { + + private final ChatService chatService; + + @PostMapping + public ResponseEntity chat(@RequestBody ChatRequest request) { + ChatService.RagResponse response = chatService.ask( + request.getQuestion(), + request.getTopicIds(), + request.getSessionKey() + ); + return ResponseEntity.ok(response); + } + + @Data + public static class ChatRequest { + private String question; + private List topicIds; + private String sessionKey; // 세션 키 (없으면 새로 생성) + } +} diff --git a/src/main/java/kr/co/ragone/controller/DocumentController.java b/src/main/java/kr/co/ragone/controller/DocumentController.java new file mode 100644 index 0000000..55e0412 --- /dev/null +++ b/src/main/java/kr/co/ragone/controller/DocumentController.java @@ -0,0 +1,150 @@ +package kr.co.ragone.controller; + +import kr.co.ragone.domain.DocInfo; +import kr.co.ragone.repository.DocInfoRepository; +import kr.co.ragone.service.DocumentIndexingService; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.core.io.Resource; +import org.springframework.core.io.UrlResource; +import org.springframework.http.HttpHeaders; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.*; +import org.springframework.web.multipart.MultipartFile; + +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; + +@Slf4j +@RestController +@RequestMapping("/api") +@RequiredArgsConstructor +public class DocumentController { + + private final DocumentIndexingService documentIndexingService; + private final DocInfoRepository docInfoRepository; + + /** + * 문서 업로드 및 인덱싱 + */ + @PostMapping("/topics/{topicId}/documents/upload") + public ResponseEntity uploadDocument( + @PathVariable Long topicId, + @RequestParam("file") MultipartFile file) { + + log.info("문서 업로드 요청: topicId={}, fileName={}", topicId, file.getOriginalFilename()); + + try { + DocInfo docInfo = documentIndexingService.uploadAndIndex(topicId, file); + return ResponseEntity.ok(docInfo); + } catch (Exception e) { + log.error("문서 업로드 실패", e); + return ResponseEntity.badRequest().build(); + } + } + + /** + * 주제별 문서 목록 조회 + */ + @GetMapping("/topics/{topicId}/documents") + public ResponseEntity> getDocuments(@PathVariable Long topicId) { + List documents = docInfoRepository.findByTopicInfo_TopicId(topicId); + return ResponseEntity.ok(documents); + } + + /** + * 문서 상세 조회 + */ + @GetMapping("/documents/{docId}") + public ResponseEntity getDocument(@PathVariable Long docId) { + return docInfoRepository.findById(docId) + .map(ResponseEntity::ok) + .orElse(ResponseEntity.notFound().build()); + } + + /** + * 문서 다운로드 + */ + @GetMapping("/documents/{docId}/download") + public ResponseEntity downloadDocument(@PathVariable Long docId) { + try { + DocInfo docInfo = docInfoRepository.findById(docId) + .orElseThrow(() -> new IllegalArgumentException("문서를 찾을 수 없습니다: " + docId)); + + Path filePath = Paths.get(docInfo.getFilePath()); + Resource resource = new UrlResource(filePath.toUri()); + + if (!resource.exists()) { + log.error("파일이 존재하지 않습니다: {}", docInfo.getFilePath()); + return ResponseEntity.notFound().build(); + } + + // 파일명 인코딩 (한글 지원) + String encodedFileName = URLEncoder.encode(docInfo.getOriginalName(), StandardCharsets.UTF_8) + .replaceAll("\\+", "%20"); + + // Content-Type 결정 + String contentType = getContentType(docInfo.getFileType()); + + return ResponseEntity.ok() + .contentType(MediaType.parseMediaType(contentType)) + .header(HttpHeaders.CONTENT_DISPOSITION, + "attachment; filename=\"" + encodedFileName + "\"; filename*=UTF-8''" + encodedFileName) + .body(resource); + } catch (Exception e) { + log.error("문서 다운로드 실패", e); + return ResponseEntity.badRequest().build(); + } + } + + /** + * 문서 삭제 + */ + @DeleteMapping("/documents/{docId}") + public ResponseEntity deleteDocument(@PathVariable Long docId) { + try { + documentIndexingService.deleteDocument(docId); + return ResponseEntity.ok().build(); + } catch (Exception e) { + log.error("문서 삭제 실패", e); + return ResponseEntity.badRequest().build(); + } + } + + /** + * 주제별 전체 문서 삭제 + */ + @DeleteMapping("/topics/{topicId}/documents") + public ResponseEntity deleteAllDocuments(@PathVariable Long topicId) { + try { + documentIndexingService.deleteAllByTopic(topicId); + return ResponseEntity.ok().build(); + } catch (Exception e) { + log.error("전체 문서 삭제 실패", e); + return ResponseEntity.badRequest().build(); + } + } + + /** + * 파일 확장자에 따른 Content-Type 반환 + */ + private String getContentType(String fileType) { + if (fileType == null) { + return "application/octet-stream"; + } + return switch (fileType.toLowerCase()) { + case "pdf" -> "application/pdf"; + case "docx" -> "application/vnd.openxmlformats-officedocument.wordprocessingml.document"; + case "doc" -> "application/msword"; + case "txt" -> "text/plain; charset=UTF-8"; + case "xlsx" -> "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"; + case "xls" -> "application/vnd.ms-excel"; + case "hwp" -> "application/x-hwp"; + default -> "application/octet-stream"; + }; + } +} diff --git a/src/main/java/kr/co/ragone/controller/TopicController.java b/src/main/java/kr/co/ragone/controller/TopicController.java new file mode 100644 index 0000000..d8bc130 --- /dev/null +++ b/src/main/java/kr/co/ragone/controller/TopicController.java @@ -0,0 +1,57 @@ +package kr.co.ragone.controller; + +import kr.co.ragone.domain.TopicInfo; +import kr.co.ragone.repository.TopicInfoRepository; +import lombok.RequiredArgsConstructor; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.*; + +import java.util.List; + +@RestController +@RequestMapping("/api/topics") +@RequiredArgsConstructor +public class TopicController { + + private final TopicInfoRepository topicInfoRepository; + + @GetMapping + public ResponseEntity> getTopics() { + List topics = topicInfoRepository.findByIsActiveTrue(); + return ResponseEntity.ok(topics); + } + + @GetMapping("/{topicId}") + public ResponseEntity getTopic(@PathVariable Long topicId) { + return topicInfoRepository.findById(topicId) + .map(ResponseEntity::ok) + .orElse(ResponseEntity.notFound().build()); + } + + @PostMapping + public ResponseEntity createTopic(@RequestBody TopicInfo topicInfo) { + TopicInfo saved = topicInfoRepository.save(topicInfo); + return ResponseEntity.ok(saved); + } + + @PutMapping("/{topicId}") + public ResponseEntity updateTopic( + @PathVariable Long topicId, + @RequestBody TopicInfo topicInfo) { + return topicInfoRepository.findById(topicId) + .map(existing -> { + existing.setTopicName(topicInfo.getTopicName()); + existing.setTopicDesc(topicInfo.getTopicDesc()); + existing.setTopicIcon(topicInfo.getTopicIcon()); + existing.setIsActive(topicInfo.getIsActive()); + return ResponseEntity.ok(topicInfoRepository.save(existing)); + }) + .orElse(ResponseEntity.notFound().build()); + } + + @DeleteMapping("/{topicId}") + public ResponseEntity deleteTopic(@PathVariable Long topicId) { + topicInfoRepository.deleteById(topicId); + return ResponseEntity.ok().build(); + } +} diff --git a/src/main/java/kr/co/ragone/domain/ChatMessage.java b/src/main/java/kr/co/ragone/domain/ChatMessage.java new file mode 100644 index 0000000..8dc97ea --- /dev/null +++ b/src/main/java/kr/co/ragone/domain/ChatMessage.java @@ -0,0 +1,48 @@ +package kr.co.ragone.domain; + +import jakarta.persistence.*; +import lombok.*; +import org.hibernate.annotations.JdbcTypeCode; +import org.hibernate.type.SqlTypes; +import java.time.LocalDateTime; +import java.util.List; +import java.util.Map; + +@Entity +@Table(name = "TB_CHAT_MESSAGE") +@Getter +@Setter +@NoArgsConstructor +@AllArgsConstructor +@Builder +public class ChatMessage { + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "msg_id") + private Long msgId; + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "session_id") + private ChatSession chatSession; + + @Column(name = "msg_role", nullable = false, length = 20) + private String msgRole; + + @Column(name = "msg_content", nullable = false, columnDefinition = "TEXT") + private String msgContent; + + @Column(name = "topic_ids", columnDefinition = "BIGINT[]") + private Long[] topicIds; + + @JdbcTypeCode(SqlTypes.JSON) + @Column(name = "source_refs", columnDefinition = "jsonb") + private List> sourceRefs; + + @Column(name = "token_count") + private Integer tokenCount; + + @Column(name = "created_at") + @Builder.Default + private LocalDateTime createdAt = LocalDateTime.now(); +} diff --git a/src/main/java/kr/co/ragone/domain/ChatSession.java b/src/main/java/kr/co/ragone/domain/ChatSession.java new file mode 100644 index 0000000..4fdbcbb --- /dev/null +++ b/src/main/java/kr/co/ragone/domain/ChatSession.java @@ -0,0 +1,37 @@ +package kr.co.ragone.domain; + +import jakarta.persistence.*; +import lombok.*; +import java.time.LocalDateTime; + +@Entity +@Table(name = "TB_CHAT_SESSION") +@Getter +@Setter +@NoArgsConstructor +@AllArgsConstructor +@Builder +public class ChatSession { + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "session_id") + private Long sessionId; + + @Column(name = "session_key", nullable = false, unique = true, length = 100) + private String sessionKey; + + @Column(name = "session_title", length = 255) + private String sessionTitle; + + @Column(name = "user_id", length = 100) + private String userId; + + @Column(name = "created_at") + @Builder.Default + private LocalDateTime createdAt = LocalDateTime.now(); + + @Column(name = "updated_at") + @Builder.Default + private LocalDateTime updatedAt = LocalDateTime.now(); +} diff --git a/src/main/java/kr/co/ragone/domain/DocChunk.java b/src/main/java/kr/co/ragone/domain/DocChunk.java new file mode 100644 index 0000000..4dfe5a3 --- /dev/null +++ b/src/main/java/kr/co/ragone/domain/DocChunk.java @@ -0,0 +1,52 @@ +package kr.co.ragone.domain; + +import jakarta.persistence.*; +import lombok.*; +import org.hibernate.annotations.JdbcTypeCode; +import org.hibernate.type.SqlTypes; +import java.time.LocalDateTime; +import java.util.Map; + +@Entity +@Table(name = "TB_DOC_CHUNK") +@Getter +@Setter +@NoArgsConstructor +@AllArgsConstructor +@Builder +public class DocChunk { + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "chunk_id") + private Long chunkId; + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "doc_id") + private DocInfo docInfo; + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "topic_id") + private TopicInfo topicInfo; + + @Column(name = "chunk_content", nullable = false, columnDefinition = "TEXT") + private String chunkContent; + + // pgvector는 Native Query로 처리 + @Column(name = "chunk_embedding", columnDefinition = "vector(1536)") + private String chunkEmbedding; + + @Column(name = "chunk_index") + private Integer chunkIndex; + + @Column(name = "token_count") + private Integer tokenCount; + + @JdbcTypeCode(SqlTypes.JSON) + @Column(name = "chunk_metadata", columnDefinition = "jsonb") + private Map chunkMetadata; + + @Column(name = "created_at") + @Builder.Default + private LocalDateTime createdAt = LocalDateTime.now(); +} diff --git a/src/main/java/kr/co/ragone/domain/DocInfo.java b/src/main/java/kr/co/ragone/domain/DocInfo.java new file mode 100644 index 0000000..62410f9 --- /dev/null +++ b/src/main/java/kr/co/ragone/domain/DocInfo.java @@ -0,0 +1,60 @@ +package kr.co.ragone.domain; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import jakarta.persistence.*; +import lombok.*; +import java.time.LocalDateTime; + +@Entity +@Table(name = "TB_DOC_INFO") +@Getter +@Setter +@NoArgsConstructor +@AllArgsConstructor +@Builder +public class DocInfo { + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "doc_id") + private Long docId; + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "topic_id") + @JsonIgnoreProperties({"hibernateLazyInitializer", "handler"}) + private TopicInfo topicInfo; + + @Column(name = "file_name", nullable = false, length = 255) + private String fileName; + + @Column(name = "original_name", length = 255) + private String originalName; + + @Column(name = "file_path", length = 500) + private String filePath; + + @Column(name = "file_size") + private Long fileSize; + + @Column(name = "file_type", length = 50) + private String fileType; + + @Column(name = "chunk_count") + @Builder.Default + private Integer chunkCount = 0; + + @Column(name = "doc_status", length = 20) + @Builder.Default + private String docStatus = "PENDING"; + + @Column(name = "error_msg", columnDefinition = "TEXT") + private String errorMsg; + + @Column(name = "created_at") + @Builder.Default + private LocalDateTime createdAt = LocalDateTime.now(); + + @Column(name = "updated_at") + @Builder.Default + private LocalDateTime updatedAt = LocalDateTime.now(); +} diff --git a/src/main/java/kr/co/ragone/domain/TopicInfo.java b/src/main/java/kr/co/ragone/domain/TopicInfo.java new file mode 100644 index 0000000..78cb7d0 --- /dev/null +++ b/src/main/java/kr/co/ragone/domain/TopicInfo.java @@ -0,0 +1,44 @@ +package kr.co.ragone.domain; + +import jakarta.persistence.*; +import lombok.*; +import java.time.LocalDateTime; + +@Entity +@Table(name = "TB_TOPIC_INFO") +@Getter +@Setter +@NoArgsConstructor +@AllArgsConstructor +@Builder +public class TopicInfo { + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "topic_id") + private Long topicId; + + @Column(name = "topic_code", nullable = false, unique = true, length = 50) + private String topicCode; + + @Column(name = "topic_name", nullable = false, length = 100) + private String topicName; + + @Column(name = "topic_desc", columnDefinition = "TEXT") + private String topicDesc; + + @Column(name = "topic_icon", length = 50) + private String topicIcon; + + @Column(name = "is_active") + @Builder.Default + private Boolean isActive = true; + + @Column(name = "created_at") + @Builder.Default + private LocalDateTime createdAt = LocalDateTime.now(); + + @Column(name = "updated_at") + @Builder.Default + private LocalDateTime updatedAt = LocalDateTime.now(); +} diff --git a/src/main/java/kr/co/ragone/repository/ChatMessageRepository.java b/src/main/java/kr/co/ragone/repository/ChatMessageRepository.java new file mode 100644 index 0000000..fe7c506 --- /dev/null +++ b/src/main/java/kr/co/ragone/repository/ChatMessageRepository.java @@ -0,0 +1,13 @@ +package kr.co.ragone.repository; + +import kr.co.ragone.domain.ChatMessage; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.stereotype.Repository; + +import java.util.List; + +@Repository +public interface ChatMessageRepository extends JpaRepository { + + List findByChatSession_SessionIdOrderByCreatedAtAsc(Long sessionId); +} diff --git a/src/main/java/kr/co/ragone/repository/ChatSessionRepository.java b/src/main/java/kr/co/ragone/repository/ChatSessionRepository.java new file mode 100644 index 0000000..f246c62 --- /dev/null +++ b/src/main/java/kr/co/ragone/repository/ChatSessionRepository.java @@ -0,0 +1,13 @@ +package kr.co.ragone.repository; + +import kr.co.ragone.domain.ChatSession; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.stereotype.Repository; + +import java.util.Optional; + +@Repository +public interface ChatSessionRepository extends JpaRepository { + + Optional findBySessionKey(String sessionKey); +} diff --git a/src/main/java/kr/co/ragone/repository/DocChunkRepository.java b/src/main/java/kr/co/ragone/repository/DocChunkRepository.java new file mode 100644 index 0000000..7375f21 --- /dev/null +++ b/src/main/java/kr/co/ragone/repository/DocChunkRepository.java @@ -0,0 +1,55 @@ +package kr.co.ragone.repository; + +import kr.co.ragone.domain.DocChunk; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; +import org.springframework.stereotype.Repository; + +import java.util.List; + +@Repository +public interface DocChunkRepository extends JpaRepository { + + List findByDocInfo_DocId(Long docId); + + void deleteByDocInfo_DocId(Long docId); + + /** + * 벡터 유사도 검색 (전체 주제) + */ + @Query(value = """ + SELECT c.chunk_id, c.doc_id, c.topic_id, c.chunk_content, + c.chunk_index, c.token_count, c.chunk_metadata, c.created_at, + 1 - (c.chunk_embedding <=> cast(:embedding as vector)) as similarity + FROM TB_DOC_CHUNK c + WHERE 1 - (c.chunk_embedding <=> cast(:embedding as vector)) > :threshold + ORDER BY c.chunk_embedding <=> cast(:embedding as vector) + LIMIT :limit + """, nativeQuery = true) + List findSimilarChunks( + @Param("embedding") String embedding, + @Param("threshold") double threshold, + @Param("limit") int limit + ); + + /** + * 벡터 유사도 검색 (특정 주제들) + */ + @Query(value = """ + SELECT c.chunk_id, c.doc_id, c.topic_id, c.chunk_content, + c.chunk_index, c.token_count, c.chunk_metadata, c.created_at, + 1 - (c.chunk_embedding <=> cast(:embedding as vector)) as similarity + FROM TB_DOC_CHUNK c + WHERE c.topic_id = ANY(cast(:topicIds as BIGINT[])) + AND 1 - (c.chunk_embedding <=> cast(:embedding as vector)) > :threshold + ORDER BY c.chunk_embedding <=> cast(:embedding as vector) + LIMIT :limit + """, nativeQuery = true) + List findSimilarChunksByTopics( + @Param("embedding") String embedding, + @Param("topicIds") Long[] topicIds, + @Param("threshold") double threshold, + @Param("limit") int limit + ); +} diff --git a/src/main/java/kr/co/ragone/repository/DocInfoRepository.java b/src/main/java/kr/co/ragone/repository/DocInfoRepository.java new file mode 100644 index 0000000..ca900df --- /dev/null +++ b/src/main/java/kr/co/ragone/repository/DocInfoRepository.java @@ -0,0 +1,15 @@ +package kr.co.ragone.repository; + +import kr.co.ragone.domain.DocInfo; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.stereotype.Repository; + +import java.util.List; + +@Repository +public interface DocInfoRepository extends JpaRepository { + + List findByTopicInfo_TopicId(Long topicId); + + List findByDocStatus(String docStatus); +} diff --git a/src/main/java/kr/co/ragone/repository/TopicInfoRepository.java b/src/main/java/kr/co/ragone/repository/TopicInfoRepository.java new file mode 100644 index 0000000..dff5f6a --- /dev/null +++ b/src/main/java/kr/co/ragone/repository/TopicInfoRepository.java @@ -0,0 +1,16 @@ +package kr.co.ragone.repository; + +import kr.co.ragone.domain.TopicInfo; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.stereotype.Repository; + +import java.util.List; +import java.util.Optional; + +@Repository +public interface TopicInfoRepository extends JpaRepository { + + Optional findByTopicCode(String topicCode); + + List findByIsActiveTrue(); +} diff --git a/src/main/java/kr/co/ragone/service/ChatService.java b/src/main/java/kr/co/ragone/service/ChatService.java new file mode 100644 index 0000000..bc0d691 --- /dev/null +++ b/src/main/java/kr/co/ragone/service/ChatService.java @@ -0,0 +1,275 @@ +package kr.co.ragone.service; + +import com.theokanning.openai.completion.chat.ChatCompletionRequest; +import com.theokanning.openai.completion.chat.ChatCompletionResult; +import com.theokanning.openai.completion.chat.ChatMessage; +import com.theokanning.openai.service.OpenAiService; +import kr.co.ragone.domain.ChatSession; +import kr.co.ragone.repository.ChatMessageRepository; +import kr.co.ragone.repository.ChatSessionRepository; +import kr.co.ragone.repository.DocChunkRepository; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; + +import java.time.LocalDateTime; +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; +import java.util.stream.Collectors; + +@Slf4j +@Service +@RequiredArgsConstructor +public class ChatService { + + private final OpenAiService openAiService; + private final EmbeddingService embeddingService; + private final DocChunkRepository docChunkRepository; + private final ChatSessionRepository chatSessionRepository; + private final ChatMessageRepository chatMessageRepository; + + @Value("${openai.model.chat}") + private String chatModel; + + @Value("${rag.retrieval.top-k}") + private int topK; + + @Value("${rag.retrieval.similarity-threshold}") + private double similarityThreshold; + + /** + * RAG 기반 질의응답 (세션 저장 포함) + */ + @Transactional + public RagResponse ask(String question, List topicIds, String sessionKey) { + // 1. 세션 조회 또는 생성 + ChatSession session = getOrCreateSession(sessionKey, question); + + // 2. 사용자 메시지 저장 + saveMessage(session, "user", question, topicIds, null); + + // 3. 질문 임베딩 + String questionEmbedding = embeddingService.createEmbeddingAsString(question); + log.info("[RAG] Question: {}", question); + log.info("[RAG] TopicIds: {}", topicIds); + log.info("[RAG] Threshold: {}, TopK: {}", similarityThreshold, topK); + + // 4. 유사 문서 검색 + List chunks; + if (topicIds == null || topicIds.isEmpty()) { + log.info("[RAG] Searching ALL topics"); + chunks = docChunkRepository.findSimilarChunks( + questionEmbedding, similarityThreshold, topK); + } else { + log.info("[RAG] Searching specific topics: {}", topicIds); + chunks = docChunkRepository.findSimilarChunksByTopics( + questionEmbedding, topicIds.toArray(new Long[0]), + similarityThreshold, topK); + } + log.info("[RAG] Found {} relevant chunks", chunks.size()); + + // 청크 내용 로깅 + for (int i = 0; i < chunks.size(); i++) { + Object[] row = chunks.get(i); + String content = (String) row[3]; + Double similarity = ((Number) row[8]).doubleValue(); + log.info("[RAG] Chunk {}: similarity={}, content={}", + i + 1, String.format("%.3f", similarity), + content.substring(0, Math.min(100, content.length()))); + } + + // 5. 컨텍스트 구성 + String context = buildContext(chunks); + + // 6. 프롬프트 구성 및 GPT 호출 + String answer = generateAnswer(question, context, chunks.isEmpty()); + + // 7. AI 응답 메시지 저장 + List sources = extractSources(chunks); + saveMessage(session, "assistant", answer, topicIds, sources); + + // 8. 응답 구성 + return RagResponse.builder() + .sessionKey(session.getSessionKey()) + .answer(answer) + .sources(sources) + .build(); + } + + /** + * 세션 조회 또는 생성 + */ + private ChatSession getOrCreateSession(String sessionKey, String firstQuestion) { + if (sessionKey != null && !sessionKey.isBlank()) { + return chatSessionRepository.findBySessionKey(sessionKey) + .orElseGet(() -> createSession(sessionKey, firstQuestion)); + } + return createSession(UUID.randomUUID().toString(), firstQuestion); + } + + /** + * 새 세션 생성 + */ + private ChatSession createSession(String sessionKey, String title) { + String sessionTitle = title.length() > 50 ? title.substring(0, 50) + "..." : title; + + ChatSession session = ChatSession.builder() + .sessionKey(sessionKey) + .sessionTitle(sessionTitle) + .build(); + + return chatSessionRepository.save(session); + } + + /** + * 메시지 저장 + */ + private void saveMessage(ChatSession session, String role, String content, + List topicIds, List sources) { + kr.co.ragone.domain.ChatMessage message = kr.co.ragone.domain.ChatMessage.builder() + .chatSession(session) + .msgRole(role) + .msgContent(content) + .build(); + + chatMessageRepository.save(message); + + session.setUpdatedAt(LocalDateTime.now()); + chatSessionRepository.save(session); + } + + private String buildContext(List chunks) { + if (chunks.isEmpty()) { + return ""; + } + + StringBuilder sb = new StringBuilder(); + sb.append("=== 검색된 문서 내용 ===\n\n"); + + for (int i = 0; i < chunks.size(); i++) { + Object[] row = chunks.get(i); + String content = (String) row[3]; // chunk_content + Double similarity = ((Number) row[8]).doubleValue(); // similarity + + sb.append(String.format("【문서 %d】 (관련도: %.0f%%)\n", i + 1, similarity * 100)); + sb.append("─".repeat(40)).append("\n"); + sb.append(content.trim()); + sb.append("\n\n"); + } + return sb.toString(); + } + + private String generateAnswer(String question, String context, boolean noContext) { + String systemPrompt; + + if (noContext) { + systemPrompt = """ + 당신은 친절한 문서 기반 질의응답 어시스턴트입니다. + 현재 검색된 관련 문서가 없습니다. + + 사용자에게 다음을 안내해주세요: + 1. 해당 질문과 관련된 문서가 시스템에 등록되어 있지 않을 수 있습니다. + 2. 더 구체적인 키워드로 질문하면 도움이 될 수 있습니다. + 3. 관리자에게 관련 문서 등록을 요청할 수 있습니다. + + 단, 일반적인 상식이나 공개된 정보로 답변 가능한 경우 도움을 드릴 수 있습니다. + """; + } else { + systemPrompt = """ + 당신은 전문적인 데이터 분석 및 문서 기반 질의응답 어시스턴트입니다. + + 【역할】 + - 제공된 문서 내용을 깊이 있게 분석하여 답변합니다. + - 데이터를 요약, 비교, 분석하여 인사이트를 제공합니다. + - 사용자가 이해하기 쉽게 구조화된 답변을 합니다. + + 【답변 규칙】 + 1. 문서에 있는 정보를 최대한 활용하여 상세히 답변하세요. + 2. 숫자, 날짜, 이름 등 구체적인 정보가 있으면 반드시 포함하세요. + 3. 여러 문서의 정보를 종합하여 분석적인 답변을 제공하세요. + 4. 표나 목록 형태로 정리하면 좋은 내용은 구조화하세요. + 5. 문서에서 직접 확인되지 않는 내용은 추측하지 마세요. + 6. 답변 마지막에 참고한 문서 번호를 명시하세요. + + 【분석 관점】 + - 진행 상황, 진척률, 일정 관련 질문: 구체적인 수치와 기간을 제시 + - 비교 질문: 표 형태로 비교 정리 + - 요약 질문: 핵심 포인트를 불릿으로 정리 + - 추세/변화 질문: 시간순 또는 단계별로 설명 + """; + } + + String userPrompt; + if (noContext) { + userPrompt = String.format(""" + [질문] + %s + + 관련 문서를 찾을 수 없었습니다. + 위 안내에 따라 사용자에게 도움이 되는 응답을 해주세요. + """, question); + } else { + userPrompt = String.format(""" + %s + + [질문] + %s + + 위 문서 내용을 분석하여 질문에 상세히 답변해주세요. + """, context, question); + } + + List messages = new ArrayList<>(); + messages.add(new ChatMessage("system", systemPrompt)); + messages.add(new ChatMessage("user", userPrompt)); + + ChatCompletionRequest request = ChatCompletionRequest.builder() + .model(chatModel) + .messages(messages) + .temperature(0.3) + .maxTokens(2000) // 더 긴 답변 허용 + .build(); + + ChatCompletionResult result = openAiService.createChatCompletion(request); + + return result.getChoices().get(0).getMessage().getContent(); + } + + private List extractSources(List chunks) { + return chunks.stream() + .map(row -> SourceInfo.builder() + .chunkId(((Number) row[0]).longValue()) + .docId(((Number) row[1]).longValue()) + .content(truncate((String) row[3], 150)) + .similarity(((Number) row[8]).doubleValue()) + .build()) + .collect(Collectors.toList()); + } + + private String truncate(String text, int maxLength) { + if (text.length() <= maxLength) { + return text; + } + return text.substring(0, maxLength) + "..."; + } + + @lombok.Data + @lombok.Builder + public static class RagResponse { + private String sessionKey; + private String answer; + private List sources; + } + + @lombok.Data + @lombok.Builder + public static class SourceInfo { + private Long chunkId; + private Long docId; + private String content; + private Double similarity; + } +} diff --git a/src/main/java/kr/co/ragone/service/ChunkingService.java b/src/main/java/kr/co/ragone/service/ChunkingService.java new file mode 100644 index 0000000..f936c3c --- /dev/null +++ b/src/main/java/kr/co/ragone/service/ChunkingService.java @@ -0,0 +1,202 @@ +package kr.co.ragone.service; + +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Pattern; + +@Slf4j +@Service +public class ChunkingService { + + @Value("${rag.chunk.size:1000}") + private int chunkSize; + + @Value("${rag.chunk.overlap:100}") + private int chunkOverlap; + + // 최소 청크 크기 (이보다 작으면 단일 청크로) + private static final int MIN_CHUNK_SIZE = 50; + + /** + * 텍스트를 청크로 분할 + */ + public List chunkText(String text) { + List chunks = new ArrayList<>(); + + if (text == null || text.isBlank()) { + log.warn("빈 텍스트가 입력되었습니다."); + return chunks; + } + + // 텍스트 정규화 + text = normalizeText(text); + + log.info("청킹 시작: 원본 {}자, 청크 크기 {}, 오버랩 {}", + text.length(), chunkSize, chunkOverlap); + + // 텍스트가 짧으면 단일 청크로 + if (text.length() <= chunkSize) { + chunks.add(createChunk(text, 0)); + log.info("텍스트가 짧아 단일 청크로 생성: {}자", text.length()); + return chunks; + } + + // 문장 단위로 분할 후 청크 구성 + List sentences = splitIntoSentences(text); + log.debug("문장 {} 개로 분할됨", sentences.size()); + + StringBuilder currentChunk = new StringBuilder(); + int chunkIndex = 0; + + for (String sentence : sentences) { + // 현재 청크에 문장 추가 시 크기 초과하면 저장 + if (currentChunk.length() + sentence.length() > chunkSize && currentChunk.length() >= MIN_CHUNK_SIZE) { + chunks.add(createChunk(currentChunk.toString().trim(), chunkIndex++)); + + // 오버랩 처리 + String overlap = getOverlapText(currentChunk.toString()); + currentChunk = new StringBuilder(overlap); + } + + if (currentChunk.length() > 0 && !currentChunk.toString().endsWith(" ")) { + currentChunk.append(" "); + } + currentChunk.append(sentence.trim()); + } + + // 마지막 청크 저장 + if (currentChunk.length() >= MIN_CHUNK_SIZE) { + chunks.add(createChunk(currentChunk.toString().trim(), chunkIndex)); + } else if (currentChunk.length() > 0 && !chunks.isEmpty()) { + // 너무 짧으면 이전 청크에 병합 + ChunkResult lastChunk = chunks.get(chunks.size() - 1); + String merged = lastChunk.getContent() + " " + currentChunk.toString().trim(); + chunks.set(chunks.size() - 1, createChunk(merged, lastChunk.getIndex())); + } else if (currentChunk.length() > 0) { + chunks.add(createChunk(currentChunk.toString().trim(), chunkIndex)); + } + + log.info("청킹 완료: {} 청크 생성", chunks.size()); + return chunks; + } + + /** + * 텍스트 정규화 + */ + private String normalizeText(String text) { + return text + // 연속 공백 제거 + .replaceAll("[ \\t]+", " ") + // 연속 줄바꿈 정리 + .replaceAll("\\n{2,}", "\n\n") + .trim(); + } + + /** + * 문장 단위로 분할 + */ + private List splitIntoSentences(String text) { + List sentences = new ArrayList<>(); + + // 한국어/영어 문장 종결 패턴 + // . ! ? 뒤에 공백이나 줄바꿈이 오는 경우 + Pattern sentencePattern = Pattern.compile("(?<=[.!?。])\\s+|(?<=\\n)"); + + String[] parts = sentencePattern.split(text); + + for (String part : parts) { + String trimmed = part.trim(); + if (!trimmed.isEmpty()) { + sentences.add(trimmed); + } + } + + // 문장 분할이 잘 안되면 (문장이 1개인 경우) 단어 수 기준으로 분할 + if (sentences.size() <= 1 && text.length() > chunkSize) { + sentences = splitByWords(text, chunkSize / 2); + } + + return sentences; + } + + /** + * 단어 수 기준으로 분할 (문장 분할 실패 시 폴백) + */ + private List splitByWords(String text, int wordsPerChunk) { + List chunks = new ArrayList<>(); + String[] words = text.split("\\s+"); + + StringBuilder current = new StringBuilder(); + int wordCount = 0; + + for (String word : words) { + if (wordCount >= wordsPerChunk && current.length() > 0) { + chunks.add(current.toString().trim()); + current = new StringBuilder(); + wordCount = 0; + } + + if (current.length() > 0) { + current.append(" "); + } + current.append(word); + wordCount++; + } + + if (current.length() > 0) { + chunks.add(current.toString().trim()); + } + + return chunks; + } + + private ChunkResult createChunk(String content, int index) { + return ChunkResult.builder() + .content(content) + .index(index) + .tokenCount(estimateTokenCount(content)) + .build(); + } + + private String getOverlapText(String text) { + if (text.length() <= chunkOverlap) { + return text; + } + // 단어 경계에서 자르기 + String overlap = text.substring(text.length() - chunkOverlap); + int spaceIndex = overlap.indexOf(' '); + if (spaceIndex > 0) { + overlap = overlap.substring(spaceIndex + 1); + } + return overlap; + } + + private int estimateTokenCount(String text) { + // 대략적인 토큰 수 추정 + int koreanChars = 0; + int otherChars = 0; + + for (char c : text.toCharArray()) { + if (Character.UnicodeScript.of(c) == Character.UnicodeScript.HANGUL) { + koreanChars++; + } else { + otherChars++; + } + } + + // 한글은 약 1.5자당 1토큰, 영어는 4자당 1토큰 + return (int) (koreanChars / 1.5 + otherChars / 4); + } + + @lombok.Data + @lombok.Builder + public static class ChunkResult { + private String content; + private int index; + private int tokenCount; + } +} diff --git a/src/main/java/kr/co/ragone/service/DocumentIndexingService.java b/src/main/java/kr/co/ragone/service/DocumentIndexingService.java new file mode 100644 index 0000000..907dcd0 --- /dev/null +++ b/src/main/java/kr/co/ragone/service/DocumentIndexingService.java @@ -0,0 +1,235 @@ +package kr.co.ragone.service; + +import kr.co.ragone.domain.DocChunk; +import kr.co.ragone.domain.DocInfo; +import kr.co.ragone.domain.TopicInfo; +import kr.co.ragone.repository.DocChunkRepository; +import kr.co.ragone.repository.DocInfoRepository; +import kr.co.ragone.repository.TopicInfoRepository; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.jdbc.core.JdbcTemplate; +import org.springframework.scheduling.annotation.Async; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; +import org.springframework.web.multipart.MultipartFile; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.LocalDateTime; +import java.util.List; +import java.util.UUID; + +@Slf4j +@Service +@RequiredArgsConstructor +public class DocumentIndexingService { + + private final TopicInfoRepository topicInfoRepository; + private final DocInfoRepository docInfoRepository; + private final DocChunkRepository docChunkRepository; + private final DocumentParserService documentParserService; + private final ChunkingService chunkingService; + private final EmbeddingService embeddingService; + private final JdbcTemplate jdbcTemplate; + + @Value("${file.upload-dir:./uploads}") + private String uploadDir; + + /** + * 문서 업로드 및 인덱싱 + */ + @Transactional + public DocInfo uploadAndIndex(Long topicId, MultipartFile file) throws Exception { + // 1. 주제 확인 + TopicInfo topicInfo = topicInfoRepository.findById(topicId) + .orElseThrow(() -> new IllegalArgumentException("주제를 찾을 수 없습니다: " + topicId)); + + // 2. 파일 저장 + String savedFileName = saveFile(file); + String filePath = Paths.get(uploadDir, savedFileName).toString(); + + // 3. 문서 정보 저장 (PROCESSING 상태) + DocInfo docInfo = DocInfo.builder() + .topicInfo(topicInfo) + .fileName(savedFileName) + .originalName(file.getOriginalFilename()) + .filePath(filePath) + .fileSize(file.getSize()) + .fileType(getFileExtension(file.getOriginalFilename())) + .docStatus("PROCESSING") + .build(); + docInfo = docInfoRepository.save(docInfo); + + // 4. 비동기로 인덱싱 처리 + processIndexingAsync(docInfo.getDocId(), topicInfo, file); + + return docInfo; + } + + /** + * 비동기 인덱싱 처리 + */ + @Async + public void processIndexingAsync(Long docId, TopicInfo topicInfo, MultipartFile file) { + try { + processIndexing(docId, topicInfo, file); + } catch (Exception e) { + log.error("인덱싱 실패: docId={}", docId, e); + updateDocStatus(docId, "FAILED", e.getMessage()); + } + } + + /** + * 실제 인덱싱 처리 + */ + private void processIndexing(Long docId, TopicInfo topicInfo, MultipartFile file) throws Exception { + log.info("인덱싱 시작: docId={}, fileName={}", docId, file.getOriginalFilename()); + + // 1. 문서 파싱 + String content = documentParserService.parseDocument(file); + if (content == null || content.isBlank()) { + throw new RuntimeException("문서 내용이 비어있습니다."); + } + + // 2. 청킹 + List chunks = chunkingService.chunkText(content); + if (chunks.isEmpty()) { + throw new RuntimeException("청크 생성 실패"); + } + log.info("청크 생성 완료: {} chunks", chunks.size()); + + // 3. 각 청크에 대해 임베딩 생성 및 저장 + DocInfo docInfo = docInfoRepository.findById(docId) + .orElseThrow(() -> new RuntimeException("문서를 찾을 수 없습니다.")); + + for (ChunkingService.ChunkResult chunk : chunks) { + // 임베딩 생성 + String embeddingVector = embeddingService.createEmbeddingAsString(chunk.getContent()); + + // Native Query로 벡터 저장 + saveChunkWithEmbedding(docInfo, topicInfo, chunk, embeddingVector); + + log.debug("청크 저장 완료: index={}", chunk.getIndex()); + } + + // 4. 문서 상태 업데이트 + updateDocStatus(docId, "INDEXED", null); + updateChunkCount(docId, chunks.size()); + + log.info("인덱싱 완료: docId={}, chunks={}", docId, chunks.size()); + } + + /** + * 청크 + 벡터 저장 (Native Query 사용) + */ + private void saveChunkWithEmbedding(DocInfo docInfo, TopicInfo topicInfo, + ChunkingService.ChunkResult chunk, String embedding) { + String sql = """ + INSERT INTO TB_DOC_CHUNK + (doc_id, topic_id, chunk_content, chunk_embedding, chunk_index, token_count, created_at) + VALUES (?, ?, ?, ?::vector, ?, ?, ?) + """; + + jdbcTemplate.update(sql, + docInfo.getDocId(), + topicInfo.getTopicId(), + chunk.getContent(), + embedding, + chunk.getIndex(), + chunk.getTokenCount(), + LocalDateTime.now() + ); + } + + /** + * 파일 저장 + */ + private String saveFile(MultipartFile file) throws IOException { + Path uploadPath = Paths.get(uploadDir); + if (!Files.exists(uploadPath)) { + Files.createDirectories(uploadPath); + } + + String originalFilename = file.getOriginalFilename(); + String extension = getFileExtension(originalFilename); + String savedFileName = UUID.randomUUID().toString() + "." + extension; + + Path filePath = uploadPath.resolve(savedFileName); + Files.copy(file.getInputStream(), filePath); + + log.info("파일 저장: {}", filePath); + return savedFileName; + } + + private String getFileExtension(String filename) { + if (filename == null) return ""; + int lastDot = filename.lastIndexOf('.'); + return lastDot > 0 ? filename.substring(lastDot + 1).toLowerCase() : ""; + } + + private void updateDocStatus(Long docId, String status, String errorMsg) { + docInfoRepository.findById(docId).ifPresent(doc -> { + doc.setDocStatus(status); + doc.setErrorMsg(errorMsg); + doc.setUpdatedAt(LocalDateTime.now()); + docInfoRepository.save(doc); + }); + } + + private void updateChunkCount(Long docId, int count) { + docInfoRepository.findById(docId).ifPresent(doc -> { + doc.setChunkCount(count); + doc.setUpdatedAt(LocalDateTime.now()); + docInfoRepository.save(doc); + }); + } + + /** + * 문서 삭제 (청크 포함) + */ + @Transactional + public void deleteDocument(Long docId) { + DocInfo docInfo = docInfoRepository.findById(docId) + .orElseThrow(() -> new IllegalArgumentException("문서를 찾을 수 없습니다: " + docId)); + + // 파일 삭제 + try { + Path filePath = Paths.get(docInfo.getFilePath()); + Files.deleteIfExists(filePath); + } catch (IOException e) { + log.warn("파일 삭제 실패: {}", docInfo.getFilePath(), e); + } + + // DB 삭제 (CASCADE로 청크도 함께 삭제됨) + docInfoRepository.delete(docInfo); + log.info("문서 삭제 완료: docId={}", docId); + } + + /** + * 주제별 전체 문서 삭제 + */ + @Transactional + public void deleteAllByTopic(Long topicId) { + List documents = docInfoRepository.findByTopicInfo_TopicId(topicId); + + log.info("전체 문서 삭제 시작: topicId={}, count={}", topicId, documents.size()); + + for (DocInfo docInfo : documents) { + // 파일 삭제 + try { + Path filePath = Paths.get(docInfo.getFilePath()); + Files.deleteIfExists(filePath); + } catch (IOException e) { + log.warn("파일 삭제 실패: {}", docInfo.getFilePath(), e); + } + } + + // DB 삭제 (CASCADE로 청크도 함께 삭제됨) + docInfoRepository.deleteAll(documents); + log.info("전체 문서 삭제 완료: topicId={}, count={}", topicId, documents.size()); + } +} diff --git a/src/main/java/kr/co/ragone/service/DocumentParserService.java b/src/main/java/kr/co/ragone/service/DocumentParserService.java new file mode 100644 index 0000000..29f5957 --- /dev/null +++ b/src/main/java/kr/co/ragone/service/DocumentParserService.java @@ -0,0 +1,120 @@ +package kr.co.ragone.service; + +import lombok.extern.slf4j.Slf4j; +import org.apache.tika.Tika; +import org.apache.tika.config.TikaConfig; +import org.apache.tika.exception.TikaException; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.parser.AutoDetectParser; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.Parser; +import org.apache.tika.parser.pdf.PDFParserConfig; +import org.apache.tika.sax.BodyContentHandler; +import org.springframework.stereotype.Service; +import org.springframework.web.multipart.MultipartFile; + +import java.io.IOException; +import java.io.InputStream; + +@Slf4j +@Service +public class DocumentParserService { + + private final Tika tika = new Tika(); + + /** + * 파일에서 텍스트 추출 (PDF, DOCX, TXT 등 지원) + */ + public String parseDocument(MultipartFile file) throws IOException, TikaException { + String filename = file.getOriginalFilename(); + log.info("문서 파싱 시작: {}", filename); + + try (InputStream inputStream = file.getInputStream()) { + String content; + + // PDF인 경우 특별 처리 + if (filename != null && filename.toLowerCase().endsWith(".pdf")) { + content = parsePdfWithOptions(inputStream); + } else { + content = tika.parseToString(inputStream); + } + + // 텍스트 정제 + content = cleanText(content); + + log.info("문서 파싱 완료: {} chars", content.length()); + + // 텍스트가 너무 짧으면 경고 + if (content.length() < 100) { + log.warn("⚠️ 추출된 텍스트가 매우 짧습니다. PDF가 이미지 기반일 수 있습니다."); + log.warn("원본 파일: {}, 추출 길이: {} chars", filename, content.length()); + } + + return content; + } + } + + /** + * PDF 파싱 옵션 설정 + */ + private String parsePdfWithOptions(InputStream inputStream) throws IOException, TikaException { + try { + // PDF 파서 설정 + PDFParserConfig pdfConfig = new PDFParserConfig(); + pdfConfig.setExtractInlineImages(true); + pdfConfig.setExtractUniqueInlineImagesOnly(true); + pdfConfig.setOcrStrategy(PDFParserConfig.OCR_STRATEGY.AUTO); // OCR 자동 시도 + + ParseContext parseContext = new ParseContext(); + parseContext.set(PDFParserConfig.class, pdfConfig); + + // 파서 설정 + Parser parser = new AutoDetectParser(); + parseContext.set(Parser.class, parser); + + // 메타데이터 및 콘텐츠 핸들러 + Metadata metadata = new Metadata(); + BodyContentHandler handler = new BodyContentHandler(-1); // 무제한 + + parser.parse(inputStream, handler, metadata, parseContext); + + // 메타데이터 로깅 + log.debug("PDF 메타데이터:"); + for (String name : metadata.names()) { + log.debug(" {}: {}", name, metadata.get(name)); + } + + return handler.toString(); + } catch (Exception e) { + log.error("PDF 파싱 실패, 기본 파싱으로 재시도", e); + // 기본 파싱으로 폴백 + return tika.parseToString(inputStream); + } + } + + /** + * 텍스트 정제 + */ + private String cleanText(String text) { + if (text == null) return ""; + + return text + // 연속 공백을 단일 공백으로 + .replaceAll("[ \\t]+", " ") + // 연속 줄바꿈을 2개로 제한 + .replaceAll("\\n{3,}", "\n\n") + // 앞뒤 공백 제거 + .trim(); + } + + /** + * 파일에서 텍스트 추출 (파일 경로로) + */ + public String parseDocument(java.io.File file) throws IOException, TikaException { + log.info("문서 파싱 시작: {}", file.getName()); + String content = tika.parseToString(file); + content = cleanText(content); + log.info("문서 파싱 완료: {} chars", content.length()); + return content; + } +} diff --git a/src/main/java/kr/co/ragone/service/EmbeddingService.java b/src/main/java/kr/co/ragone/service/EmbeddingService.java new file mode 100644 index 0000000..f7a9353 --- /dev/null +++ b/src/main/java/kr/co/ragone/service/EmbeddingService.java @@ -0,0 +1,55 @@ +package kr.co.ragone.service; + +import com.theokanning.openai.embedding.EmbeddingRequest; +import com.theokanning.openai.embedding.EmbeddingResult; +import com.theokanning.openai.service.OpenAiService; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; + +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +@Slf4j +@Service +@RequiredArgsConstructor +public class EmbeddingService { + + private final OpenAiService openAiService; + + @Value("${openai.model.embedding}") + private String embeddingModel; + + /** + * 텍스트를 임베딩 벡터로 변환 + */ + public List createEmbedding(String text) { + EmbeddingRequest request = EmbeddingRequest.builder() + .model(embeddingModel) + .input(Collections.singletonList(text)) + .build(); + + EmbeddingResult result = openAiService.createEmbeddings(request); + + return result.getData().get(0).getEmbedding(); + } + + /** + * 벡터를 PostgreSQL vector 형식 문자열로 변환 + */ + public String toVectorString(List embedding) { + return "[" + embedding.stream() + .map(String::valueOf) + .collect(Collectors.joining(",")) + "]"; + } + + /** + * 텍스트를 PostgreSQL vector 형식으로 직접 변환 + */ + public String createEmbeddingAsString(String text) { + List embedding = createEmbedding(text); + return toVectorString(embedding); + } +} diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml new file mode 100644 index 0000000..f06e850 --- /dev/null +++ b/src/main/resources/application.yml @@ -0,0 +1,54 @@ +server: + port: 8080 + +spring: + application: + name: ragone + + profiles: + active: local + + datasource: + url: jdbc:postgresql://172.25.0.79:5432/turbosoft_rag_db + username: turbosoft + password: xjqhthvmxm123 + driver-class-name: org.postgresql.Driver + + jpa: + hibernate: + ddl-auto: validate + show-sql: true + properties: + hibernate: + format_sql: true + dialect: org.hibernate.dialect.PostgreSQLDialect + + servlet: + multipart: + max-file-size: 50MB + max-request-size: 50MB + +# OpenAI 설정 +openai: + api-key: ${OPENAI_API_KEY:your-api-key-here} + model: + embedding: text-embedding-3-small + chat: gpt-4o-mini + +# RAG 설정 +rag: + chunk: + size: 1000 + overlap: 100 + retrieval: + top-k: 10 + similarity-threshold: 0.3 # 더 낮춰서 검색 범위 확대 + +# 파일 저장 경로 +file: + upload-dir: ./uploads + +logging: + level: + kr.co.ragone: DEBUG + org.hibernate.SQL: DEBUG